├── .github └── workflows │ └── test.yml ├── .gitignore ├── .readthedocs.yaml ├── Makefile ├── Manifest.in ├── README.md ├── docs ├── Makefile ├── make.bat └── source │ ├── api.md │ ├── changelog.md │ ├── concepts.md │ ├── conf.py │ ├── examples.md │ ├── index.md │ ├── install.md │ ├── log_config.md │ ├── testing.md │ └── usage.md ├── imc ├── __init__.py ├── cli.py ├── data_models │ ├── __init__.py │ ├── project.py │ ├── roi.py │ └── sample.py ├── defaults.py ├── demo │ ├── __init__.py │ ├── generate_data.py │ └── get_demo_data.py ├── exceptions.py ├── graphics.py ├── interactive_volume_viewer.py ├── logo.png ├── ops │ ├── __init__.py │ ├── adjacency.py │ ├── clustering.py │ ├── community.py │ ├── compensation.py │ ├── domain.py │ ├── mixture.py │ ├── quant.py │ └── signal.py ├── py.typed ├── scripts │ ├── __init__.py │ ├── illustrate.py │ ├── inspect_ilastik_model.py │ ├── inspect_mcds.py │ ├── phenotype.py │ ├── predict.py │ ├── prepare.py │ ├── process.py │ ├── quantify.py │ ├── segment_stacks.py │ └── view.py ├── segmentation.py ├── tests │ ├── __init__.py │ ├── _test_layers.py │ ├── conftest.py │ ├── test_full_analysis.py │ ├── test_graphics.py │ ├── test_obj_creation.py │ └── test_serialization.py ├── types.py └── utils.py ├── noxfile.py ├── pyproject.toml └── requirements ├── requirements.cellpose.txt ├── requirements.deepcell.txt ├── requirements.dev.txt ├── requirements.doc.txt ├── requirements.stardist.txt └── requirements.txt /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Test imc package 5 | 6 | on: 7 | push: 8 | branches: [ main ] 9 | pull_request: 10 | branches: [ main ] 11 | 12 | jobs: 13 | linux: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Set up Python 3.8 18 | uses: actions/setup-python@v2 19 | with: 20 | python-version: 3.8 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install wheel pytest 25 | pip install .[deepcell,astir] 26 | - name: Test with pytest 27 | run: | 28 | # Test package 29 | python -m pytest imc/ 30 | 31 | # Run pipeline 32 | mkdir -p imctest 33 | cd imctest 34 | imc process https://zenodo.org/record/5018260/files/COVID19_brain_Patient03_ROI3_COVID19_olfactorybulb.txt?download=1 35 | 36 | # List output files 37 | ls -l processed/ 38 | ls -l processed/${SAMPLE} 39 | ls -l results/phenotyping 40 | - name: Cache resources 41 | id: cache-resources 42 | uses: actions/cache@v2 43 | with: 44 | path: /home/$USER/.imc 45 | key: imc-resources-linux 46 | 47 | osx: 48 | runs-on: macos-10.14 49 | steps: 50 | - uses: actions/checkout@v2 51 | - name: Set up Python 3.8 52 | uses: actions/setup-python@v2 53 | with: 54 | python-version: 3.8 55 | - name: Install dependencies 56 | run: | 57 | python -m pip install --upgrade pip 58 | pip install wheel pytest 59 | pip install .[deepcell,astir] 60 | - name: Test with pytest 61 | run: | 62 | # Test package 63 | python -m pytest imc/ 64 | 65 | # Run example processing pipeline 66 | mkdir -p imctest 67 | cd imctest 68 | imc process https://zenodo.org/record/5018260/files/COVID19_brain_Patient03_ROI3_COVID19_olfactorybulb.txt?download=1 69 | 70 | # List output files 71 | ls -l processed/ 72 | ls -l processed/${SAMPLE} 73 | ls -l results/phenotyping 74 | - name: Cache resources 75 | id: cache-resources 76 | uses: actions/cache@v2 77 | with: 78 | path: /home/$USER/.imc 79 | key: imc-resources-osx 80 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # project specific 2 | data 3 | submission 4 | processed 5 | _models 6 | results 7 | 8 | *.tiff 9 | *.csv 10 | 11 | 12 | # ignore test files 13 | .tox 14 | _version.py 15 | pytest.log 16 | .coverage* 17 | 18 | # Build-related stuff 19 | build/ 20 | dist/ 21 | *.egg-info 22 | 23 | 24 | # toy/experimental files 25 | *.txt 26 | # *.csv 27 | *.tsv 28 | *.pkl 29 | *.pickle 30 | *.svg 31 | *.png 32 | *.jpg 33 | *.jpeg 34 | 35 | # ignore mypy 36 | .mypy* 37 | 38 | # ignore eggs 39 | .eggs/ 40 | 41 | # ignore built docs 42 | doc/build/* 43 | 44 | # generic ignore list: 45 | *.lst 46 | 47 | # Compiled source 48 | *.com 49 | *.class 50 | *.dll 51 | *.exe 52 | *.o 53 | *.so 54 | *.pyc 55 | 56 | # Packages 57 | # it's better to unpack these files and commit the raw source 58 | # git has its own built in compression methods 59 | *.7z 60 | *.dmg 61 | *.gz 62 | *.iso 63 | *.jar 64 | *.rar 65 | *.tar 66 | *.zip 67 | 68 | # Logs and databases 69 | *.log 70 | *.sql 71 | *.sqlite 72 | 73 | # OS generated files 74 | .DS_Store 75 | .DS_Store? 76 | ._* 77 | .Spotlight-V100 78 | .Trashes 79 | ehthumbs.db 80 | Thumbs.db 81 | 82 | # Sublime files 83 | *.sublime-* 84 | 85 | # Gedit temporary files 86 | *~ 87 | 88 | # libreoffice lock files: 89 | .~lock* 90 | 91 | # IDE-specific items 92 | .idea/ 93 | 94 | # pytest-related 95 | .cache/ 96 | .coverage* 97 | coverage.xml 98 | 99 | # Reserved files for comparison 100 | *RESERVE* 101 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the version of Python and other tools you might need 9 | build: 10 | os: ubuntu-20.04 11 | tools: 12 | python: "3.9" 13 | 14 | # Build documentation in the docs/ directory with Sphinx 15 | sphinx: 16 | configuration: docs/source/conf.py 17 | 18 | # If using Sphinx, optionally build your docs in additional formats such as PDF 19 | # formats: 20 | # - pdf 21 | 22 | # Optionally declare the Python requirements required to build your docs 23 | python: 24 | system_packages: true 25 | install: 26 | - method: pip 27 | path: . 28 | extra_requirements: 29 | - doc 30 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .DEFAULT_GOAL := all 2 | 3 | 4 | NAME=$(shell basename `pwd`) 5 | DOCS_DIR="docs" 6 | 7 | 8 | help: ## Display help and quit 9 | @echo Makefile for the $(NAME) package. 10 | @echo Available commands: 11 | @grep -E '^[0-9a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \ 12 | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-15s\033[0m\ 13 | %s\n", $$1, $$2}' 14 | 15 | all: install test ## Install the package and run tests 16 | 17 | clean_build: 18 | rm -rf build/ 19 | 20 | clean_dist: 21 | rm -rf dist/ 22 | 23 | clean_eggs: 24 | rm -rf *.egg-info 25 | 26 | clean_mypy: 27 | rm -rf .mypy_cache/ 28 | 29 | clean_docs: 30 | rm -rf docs/build/* 31 | 32 | clean_tests: 33 | rm -rf /tmp/pytest* 34 | 35 | clean: clean_dist clean_eggs clean_build clean_mypy clean_docs ## Remove build, mypy cache, tests and docs 36 | 37 | _install: 38 | # python setup.py sdist 39 | # python -m pip wheel --no-index --no-deps --wheel-dir dist dist/*.tar.gz 40 | # python -m pip install dist/*-py3-none-any.whl --user --upgrade 41 | python -m pip install . 42 | 43 | install: ## Install the package 44 | ${MAKE} clean 45 | ${MAKE} _install 46 | ${MAKE} clean 47 | 48 | docs: ## Build the documentation 49 | ${MAKE} -C $(DOCS_DIR) html 50 | xdg-open $(DOCS_DIR)/build/html/index.html 51 | 52 | 53 | lint: 54 | -flake8 --count --ignore E501,F401,F841,W503,E402,E203,E266,E722 --exclude tests/ imc/ 55 | 56 | test: lint ## Run the tests 57 | python -m pytest -m "not slow" $(NAME)/ 58 | 59 | backup_time: 60 | echo "Last backup: " `date` >> _backup_time 61 | chmod 700 _backup_time 62 | 63 | _sync: 64 | rsync --copy-links --progress -r \ 65 | . afr4001@pascal.med.cornell.edu:projects/$(NAME) 66 | 67 | sync: _sync backup_time ## [dev] Sync data/code to SCU server 68 | 69 | build: test 70 | python setup.py sdist bdist_wheel 71 | 72 | pypitest: build 73 | twine \ 74 | upload \ 75 | -r pypitest dist/* 76 | 77 | pypi: build 78 | twine \ 79 | upload \ 80 | dist/* 81 | 82 | .PHONY : clean_build clean_dist clean_eggs clean_mypy clean_docs clean_tests \ 83 | clean _install install clean_docs docs test backup_time _sync sync \ 84 | build pypitest pypi 85 | -------------------------------------------------------------------------------- /Manifest.in: -------------------------------------------------------------------------------- 1 | include AUTHORS.md 2 | include CONTRIBUTING.md 3 | include CHANGELOG.md 4 | include LICENSE 5 | include README.md 6 | 7 | recursive-include requirements * 8 | recursive-include tests * 9 | recursive-include docs *.md *.rst conf.py Makefile make.bat 10 | recursive-exclude * __pycache__ 11 | recursive-exclude * *.py[co] 12 | 13 | global-include *.typed 14 | 15 | include logo.png 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 | 5 | # Imaging mass cytometry 6 | 7 | A package for processing and analysis of imaging mass cytometry (IMC) data. 8 | 9 | It implements image- and channel-wise quality control, quantification of cell 10 | intenstity and morphology, cell type discovery through clustering, automated 11 | cell type labeling, community and super-community finding and differential 12 | comparisons between sample groups, in addition to many handy visualization tools. 13 | Above all, it is a tool for the use of IMC data at scale. 14 | 15 | Development is still underway, so use at your own risk. 16 | 17 | 18 | ## Requirements and installation 19 | 20 | Requires `Python >= 3.9`. `imc` uses a `pyproject.toml` configuration only, so you'll need a up-to-date version of `pip` before installing. Base packages as `gcc` and `g++` will also need to be installed on system using the command `sudo apt install g++` or likewise. We also highly recommend installing the package on a `conda` environment to avoid dependency issues. 21 | 22 | To install the most updated version of the program: 23 | ```bash 24 | git clone https://github.com/ElementoLab/imc.git 25 | cd imc 26 | make install 27 | ``` 28 | 29 | Install from [PyPI](https://pypi.org/project/imc/) with [`pip`](https://pip.pypa.io/) or with [poetry](https://python-poetry.org/): 30 | ```bash 31 | pip install imc 32 | # or 33 | poetry install imc 34 | ``` 35 | 36 | ## Quick start 37 | 38 | Install the package from [PyPI](https://pypi.org/project/imc/) with extra packages required for all steps: 39 | ```bash 40 | pip install imc[extra] 41 | # or 42 | poetry install imc[extra] 43 | ``` 44 | 45 | ### Use case 1 (pipeline processing) 46 | 47 | #### Example: Lung sample processing from MCD to single-cell h5ad 48 | 49 | One-line IMC data processing: 50 | ```bash 51 | # Run pipeline in one step with remote MCD file 52 | MCD_URL=https://zenodo.org/record/4110560/files/data/20200612_FLU_1923/20200612_FLU_1923.mcd 53 | imc process $MCD_URL 54 | ``` 55 | `imc` also supports TXT or TIFF files as input, local or remote files: 56 | ```bash 57 | # Run pipeline in one step with remote TXT file 58 | TXT_URL=https://zenodo.org/record/5018260/files/COVID19_brain_Patient03_ROI3_COVID19_olfactorybulb.txt?download=1 59 | imc process $TXT_URL 60 | ``` 61 | Input can be MCD, TIFF, or TXT files. 62 | Several files can be given to `imc process` at once. See more with the `--help` option. 63 | 64 | `imc` is nonetheless very modular and allows the user to run any of the step seperately as well. 65 | 66 | The above is also equivalent to the following: 67 | ```bash 68 | MCD_URL=https://zenodo.org/record/4110560/files/data/20200612_FLU_1923/20200612_FLU_1923.mcd 69 | SAMPLE=20200612_FLU_1923 70 | 71 | wget -O data/${SAMPLE}.mcd $MCD_URL 72 | 73 | ## output description of acquired data 74 | imc inspect data/${SAMPLE}.mcd 75 | 76 | ## convert MCD to TIFFs and auxiliary files 77 | imc prepare \ 78 | --ilastik \ 79 | --n-crops 0 \ 80 | --ilastik-compartment nuclear \ 81 | data/${SAMPLE}.mcd 82 | 83 | ## For each TIFF file, output prediction of mask probabilities and segment them 84 | TIFFS=processed/${SAMPLE}/tiffs/${SAMPLE}*_full.tiff 85 | 86 | ## Output pixel probabilities of nucleus, membrane and background using ilastik 87 | imc predict $TIFFS 88 | 89 | ## Segment cell instances with DeepCell 90 | imc segment \ 91 | --from-probabilities \ 92 | --model deepcell \ 93 | --compartment both $TIFFS 94 | 95 | ## Quantify channel intensity and morphology for each single cell in every image 96 | imc quantify $TIFFS 97 | ``` 98 | 99 | Once all MCD files have been processed for the project, create a concatenated AnnData object containing all cells within a project. 100 | 101 | ```python 102 | from glob import glob 103 | import os 104 | import anndata 105 | pattern = glob('processed/*.h5ad') 106 | adatas = [anndata.read(f) for f in pattern if os.path.exists(f)] 107 | adata = anndata.concat(adatas) 108 | adata.write('results/quant.h5ad') 109 | ``` 110 | 111 | To perform batch correction and cell clustering: 112 | ```bash 113 | ## Phenotype cells into clusters 114 | imc phenotype processed/quant.h5ad 115 | ``` 116 | 117 | There are many customization options for each step. Do `imc --help` or `imc --help` to see all. 118 | 119 | `imc` also includes a lightweight interactive image viewer: 120 | ```bash 121 | imc view $TIFFS 122 | ``` 123 | 124 | There is also an interface to the more full fledged `napari` image viwer: 125 | ```bash 126 | imc view --napari data/${SAMPLE}.mcd # view MCD file 127 | napari $TIFFS # view TIFF files directly with napari. Requires napari 128 | ``` 129 | 130 | A quick example of further analysis steps of single cell data downstream in IPython/Jupyter notebook: 131 | ```python 132 | import scanpy as sc 133 | a = sc.read('processed/quantification.h5ad') 134 | sc.pp.log1p(a) 135 | sc.pp.pca(a) 136 | sc.pp.neighbors(a) 137 | sc.tl.umap(a) 138 | sc.pl.umap(a, color=a.var.index) 139 | ``` 140 | 141 | ### Use case 2 (API usage) 142 | 143 | #### Demo data (synthetic) 144 | ```python 145 | >>> from imc.demo import generate_project 146 | >>> prj = generate_project(n_samples=2, n_rois_per_sample=3, shape=(8, 8)) 147 | >>> prj 148 | Project 'project' with 2 samples and 6 ROIs in total. 149 | 150 | >>> prj.samples # type: List[IMCSample] 151 | [Sample 'test_sample_01' with 3 ROIs, 152 | Sample 'test_sample_02' with 3 ROIs] 153 | 154 | >>> prj.rois # type: List[ROI] 155 | [Region 1 of sample 'test_sample_01', 156 | Region 2 of sample 'test_sample_01', 157 | Region 3 of sample 'test_sample_01', 158 | Region 1 of sample 'test_sample_02', 159 | Region 2 of sample 'test_sample_02', 160 | Region 3 of sample 'test_sample_02'] 161 | 162 | >>> prj.samples[0].rois # type: List[ROI] 163 | [Region 1 of sample 'test_sample_01', 164 | Region 2 of sample 'test_sample_01', 165 | Region 3 of sample 'test_sample_01'] 166 | 167 | >>> roi = prj.rois[0] # Let's assign one ROI to explore it 168 | >>> roi.channel_labels # type: pandas.Series; `channel_names`, `channel_metals` also available 169 | 0 Ch01(Ch01) 170 | 1 Ch02(Ch02) 171 | 2 Ch03(Ch03) 172 | Name: channel, dtype: object 173 | 174 | >>> roi.mask # type: numpy.ndarray 175 | array([[0, 0, 0, 0, 0, 0, 0, 0], 176 | [0, 0, 0, 0, 0, 0, 0, 0], 177 | [0, 0, 0, 0, 0, 0, 1, 0], 178 | [0, 0, 0, 0, 0, 0, 0, 0], 179 | [0, 2, 0, 0, 0, 3, 0, 0], 180 | [0, 0, 0, 0, 0, 0, 0, 0], 181 | [0, 0, 4, 0, 0, 0, 0, 0], 182 | [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32) 183 | 184 | >>> roi.stack.shape # roi.stack -> type: numpy.ndarray 185 | (3, 8, 8) 186 | 187 | >>> # QC 188 | >>> prj.channel_correlation() 189 | >>> prj.channel_summary() 190 | 191 | >>> # Cell type discovery 192 | >>> prj.cluster_cells() 193 | >>> prj.find_communities() 194 | 195 | ``` 196 | #### Demo data (real) 197 | ```python 198 | >>> import imc.demo 199 | >>> imc.demo.datasets 200 | ['jackson_2019_short', 'jackson_2019_short_joint'] 201 | 202 | >>> prj = imc.demo.get_dataset('jackson_2019_short') 203 | >>> prj # type: Project 204 | Project 'jackson_2019_short' with 4 samples and 4 ROIs in total. 205 | 206 | >>> prj.samples # type: List[IMCSample] 207 | [Sample 'BaselTMA_SP41_15.475kx12.665ky_10000x8500_5_20170905_90_88_X11Y5_242_a0' with 1 ROI, 208 | Sample 'BaselTMA_SP41_25.475kx12.665ky_8000x8500_3_20170905_90_88_X11Y5_235_a0' with 1 ROI, 209 | Sample 'BaselTMA_SP41_33.475kx12.66ky_8500x8500_2_20170905_24_61_X3Y4_207_a0' with 1 ROI, 210 | Sample 'BaselTMA_SP41_33.475kx12.66ky_8500x8500_2_20170905_33_61_X4Y4_215_a0' with 1 ROI] 211 | 212 | >>> prj.samples[0].channel_labels # type: pandas.Series 213 | chanel 214 | 0 Ar80(Ar80) 215 | 1 Ru96(Ru96) 216 | 2 Ru98(Ru98) 217 | 3 Ru99(Ru99) 218 | 4 Ru100(Ru100) 219 | 5 Ru101(Ru101) 220 | 6 Ru102(Ru102) 221 | 7 Ru104(Ru104) 222 | 8 HistoneH3(In113) 223 | 9 EMPTY(Xe126) 224 | 10 EMPTY(I127) 225 | 11 HistoneH3(La139) 226 | ... 227 | 42 vWF-CD31(Yb172) 228 | 43 mTOR(Yb173) 229 | 44 Cytokeratin7(Yb174) 230 | 45 PanCytokeratin-KeratinEpithelial(Lu175) 231 | 46 CleavedPARP-CleavedCaspase3(Yb176) 232 | 47 DNA1(Ir191) 233 | 48 DNA2(Ir193) 234 | 49 EMPTY(Pb206) 235 | 50 EMPTY(Pb207) 236 | 51 EMPTY(Pb208) 237 | Name: BaselTMA_SP41_15.475kx12.665ky_10000x8500_5_20170905_90_88_X11Y5_242_a0, dtype: object 238 | >>> prj.plot_channels(['DNA2', 'Ki67', "Cytokeratin7"]) 239 |
240 | ``` 241 | 242 | #### Your own data 243 | 244 | The best way is to have a CSV file with one row per sample, or one row per ROI. 245 | That will ensure additional sample/ROI metadata is passed to the objects and used later in analysis. 246 | Pass the path to the CSV file to the `Project` object constructor: 247 | 248 | ```python 249 | from imc import Project 250 | 251 | prj = Project() # will search current directory for Samples/ROIs 252 | 253 | prj = Project(processed_dir="processed") # will search `processed` for Samples/ROIs 254 | 255 | prj = Project("path/to/sample/annotation.csv", processed_dir="processed") 256 | # ^^ will use metadata from CSV and use the files in `processed`. 257 | ``` 258 | 259 | However, if one is not given, `Project` will search the current directory or the 260 | argument of `processed_dir` for IMCSamples and ROIs. 261 | 262 | The `processed_dir` directory can be structured in two ways: 263 | 1. One directory per sample. 264 | - Inside there is a directory `"tiffs"` which contains the stack `"*_full.tiff"`, channel labels 265 | `"*_full.csv"` and optionally a segmentation `"*_full_mask.tiff"`. 266 | 267 | 2. All samples in the same directory `processed_dir`. 268 | - Inside the one directory there are stack `"*_full.tiff"`, channel label `"*_full.csv"` and 269 | optionally segmentation `"*_full_mask.tiff"` files. 270 | 271 | The default is option one. If you choose `2`, simply pass `subfolder_per_sample`: 272 | 273 | ``` python 274 | prj = Project(subfolder_per_sample=True) 275 | ``` 276 | 277 | The expected files are produced by common preprocessing pipelines such as 278 | [imcpipeline](https://github.com/elementolab/imcpipeline) or [imcyto](https://nf-co.re/imcyto). 279 | 280 | 281 | ## Documentation 282 | 283 | Documentation is for now mostly a skeleton but will be expanded soon: 284 | 285 | ```bash 286 | make docs 287 | ``` 288 | 289 | ## Testing 290 | 291 | Tests are still very limited, but you can run tests this way: 292 | 293 | ```bash 294 | pip install pytest # install testing package 295 | python -m pytest --pyargs imc 296 | ``` 297 | 298 | For data processing, running the example lung data should make sure eveything is running smoothly. 299 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/api.md: -------------------------------------------------------------------------------- 1 | # API 2 | 3 | The great flexibility of `imc` comes from the ability to compose workflows using the API. 4 | 5 | It provides a rich but abstract `imc.analysis.Project` object and implements various modules building on it depending on the data type. 6 | 7 | In addition, the `imc.operations` module contains several analysis-independent methods and the `imc.utils` module provides low-level functions of general use. 8 | 9 | ## imc.data_models.project 10 | ```{eval-rst} 11 | .. automodule:: imc.data_models.project 12 | :members: 13 | ``` 14 | 15 | ## imc.data_models.sample 16 | ```{eval-rst} 17 | .. automodule:: imc.data_models.sample 18 | :members: 19 | ``` 20 | 21 | ## imc.data_models.roi 22 | ```{eval-rst} 23 | .. automodule:: imc.data_models.roi 24 | :members: 25 | ``` 26 | 27 | ## imc.operations 28 | ### imc.ops.signal 29 | ```{eval-rst} 30 | .. automodule:: imc.ops.signal 31 | :members: 32 | ``` 33 | ### imc.ops.compensation 34 | ```{eval-rst} 35 | .. automodule:: imc.ops.compensation 36 | :members: 37 | ``` 38 | ### imc.ops.mixture 39 | ```{eval-rst} 40 | .. automodule:: imc.ops.mixture 41 | :members: 42 | ``` 43 | ### imc.ops.domain 44 | ```{eval-rst} 45 | .. automodule:: imc.ops.domain 46 | :members: 47 | ``` 48 | ### imc.ops.quant 49 | ```{eval-rst} 50 | .. automodule:: imc.ops.quant 51 | :members: 52 | ``` 53 | ### imc.ops.clustering 54 | ```{eval-rst} 55 | .. automodule:: imc.ops.clustering 56 | :members: 57 | ``` 58 | ### imc.ops.adjacency 59 | ```{eval-rst} 60 | .. automodule:: imc.ops.adjacency 61 | :members: 62 | ``` 63 | ### imc.ops.community 64 | ```{eval-rst} 65 | .. automodule:: imc.ops.community 66 | :members: 67 | ``` 68 | ## imc.graphics 69 | ```{eval-rst} 70 | .. automodule:: imc.graphics 71 | :members: 72 | ``` 73 | 74 | ## imc.utils 75 | ```{eval-rst} 76 | .. automodule:: imc.utils 77 | :members: 78 | ``` 79 | 80 | ## imc.types 81 | ```{eval-rst} 82 | .. automodule:: imc.types 83 | :members: 84 | ``` -------------------------------------------------------------------------------- /docs/source/changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [Unreleased] 9 | ### Added 10 | - 11 | ### Changed 12 | - 13 | ### Removed 14 | - 15 | 16 | ## [0.0.12] - 2021-07-19 17 | ### Added 18 | - functions to handle multi-cell masks (topological domains) 19 | - napari + napari_imc to view MCD files 20 | ### Changed 21 | - fix support of OSX in ilastik segmentation 22 | - centralized package data under `.imc` 23 | 24 | ## [0.0.11] - 2021-07-01 25 | ### Added 26 | - Command `imc process`. 27 | 28 | ## [0.0.10] - 2021-07-01 29 | ### Added 30 | - CI on Github actions 31 | - add more CLI commands 32 | ### Changed 33 | - centralized package data under `.imc` 34 | - fix packaging 35 | 36 | ## [0.0.8] - 2021-06-01 37 | ### Added 38 | - add `.pyproject.toml` 39 | - support subcellular mask quantification 40 | ### Changed 41 | - rasterized linecollection plots by default 42 | 43 | ## [0.0.7] - 2021-04-26 44 | ### Added 45 | - initial support subcellular mask quantification 46 | - DeepCell postprocessing to match nuclear and cellular masks 47 | - function to plot and extract panorama images matching ROIs 48 | - Cellpose as segmentation method 49 | - add CLI command for segmentation 50 | ### Changed 51 | - rasterized linecollection plots by default 52 | 53 | ## [0.0.6] - 2020-12-16 54 | ### Added 55 | - segmentation module 56 | - mask layers to support alternative segmentations 57 | ### Changed 58 | - rasterized linecollection plots by default 59 | ### Removed 60 | - 61 | - graphics code that was abstracted to `seaborn_extensions` module 62 | 63 | ## [0.0.5] - 2020-12-07 64 | ### Added 65 | - segmentation module 66 | - mask layers to support alternative segmentations 67 | ### Changed 68 | - export panoramas by default 69 | - support ome-tiff 70 | - upgrade to `imctools==2.1.0` 71 | 72 | ## [0.0.4] - 2020-10-07 73 | 74 | 75 | ## [0.0.3] - 2020-06-17 76 | ### Changed 77 | - Patch `pathlib.Path` to support path building with `+` (operator overload) 78 | 79 | ## [0.0.2] - 2020-06-15 80 | ### Added 81 | - Many features 82 | 83 | 84 | ## [0.0.1] - 2020-04-14 85 | ### Added 86 | - Project, Sample and ROI modules/objects 87 | 88 | [Unreleased]: https://github.com/ElementoLab/imc/compare/0.0.2...HEAD 89 | [0.0.11]: https://github.com/ElementoLab/imc/compare/0.0.10...v0.0.11 90 | [0.0.10]: https://github.com/ElementoLab/imc/compare/0.0.9...v0.0.10 91 | [0.0.9]: https://github.com/ElementoLab/imc/compare/0.0.8...v0.0.9 92 | [0.0.8]: https://github.com/ElementoLab/imc/compare/0.0.7...v0.0.8 93 | [0.0.7]: https://github.com/ElementoLab/imc/compare/0.0.6...v0.0.7 94 | [0.0.6]: https://github.com/ElementoLab/imc/compare/0.0.5...v0.0.6 95 | [0.0.5]: https://github.com/ElementoLab/imc/compare/0.0.4...v0.0.5 96 | [0.0.4]: https://github.com/ElementoLab/imc/compare/0.0.3...v0.0.4 97 | [0.0.3]: https://github.com/ElementoLab/imc/compare/0.0.2...v0.0.3 98 | [0.0.2]: https://github.com/ElementoLab/imc/compare/0.0.1...v0.0.2 99 | [0.0.1]: https://github.com/ElementoLab/imc/releases/tag/v0.0.1 100 | -------------------------------------------------------------------------------- /docs/source/concepts.md: -------------------------------------------------------------------------------- 1 | # Concepts 2 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import sphinx_rtd_theme 5 | 6 | # If extensions (or modules to document with autodoc) are in another directory, 7 | # add these directories to sys.path here. If the directory is relative to the 8 | # documentation root, use os.path.abspath to make it absolute, like shown here. 9 | sys.path.insert(0, os.path.abspath("../../")) 10 | 11 | 12 | # Configuration file for the Sphinx documentation builder. 13 | # 14 | # This file only contains a selection of the most common options. For a full 15 | # list see the documentation: 16 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 17 | 18 | # -- Path setup -------------------------------------------------------------- 19 | 20 | # If extensions (or modules to document with autodoc) are in another directory, 21 | # add these directories to sys.path here. If the directory is relative to the 22 | # documentation root, use os.path.abspath to make it absolute, like shown here. 23 | # 24 | # import os 25 | # import sys 26 | # sys.path.insert(0, os.path.abspath('.')) 27 | 28 | 29 | # -- Project information ----------------------------------------------------- 30 | 31 | project = "imc" 32 | copyright = "2021, Andre Rendeiro" 33 | author = "Andre Rendeiro" 34 | 35 | 36 | # -- General configuration --------------------------------------------------- 37 | 38 | # Add any Sphinx extension module names here, as strings. They can be 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 40 | # ones. 41 | extensions = [ 42 | "sphinx.ext.autodoc", 43 | "sphinx.ext.autosummary", 44 | "sphinx.ext.intersphinx", 45 | "sphinx.ext.todo", 46 | "sphinx.ext.coverage", 47 | "sphinx.ext.viewcode", 48 | # "numpydoc", # numpy-style docs 49 | "sphinx.ext.napoleon", # numpy-style docs 50 | "sphinx_issues", 51 | "myst_parser", # to use markdown 52 | "sphinxarg.ext", # for CLI parsing of arguments 53 | "sphinx_autodoc_typehints" # <- this would be handy when whole codebase has typehinting 54 | # "sphinxcontrib.jupyter", <- this could be useful to make jupyter NBs 55 | ] 56 | autodoc_typehints = "signature" # https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html#confval-autodoc_typehints 57 | 58 | # Add any paths that contain templates here, relative to this directory. 59 | templates_path = ["_templates"] 60 | 61 | # List of patterns, relative to source directory, that match files and 62 | # directories to ignore when looking for source files. 63 | # This pattern also affects html_static_path and html_extra_path. 64 | exclude_patterns = [] 65 | 66 | 67 | # -- Options for type of input ----------------------------------------------- 68 | source_suffix = { 69 | ".rst": "restructuredtext", 70 | ".txt": "markdown", 71 | ".md": "markdown", 72 | } 73 | 74 | # -- Options for HTML output ------------------------------------------------- 75 | 76 | # The theme to use for HTML and HTML Help pages. See the documentation for 77 | # a list of builtin themes. 78 | 79 | # html_theme = "alabaster" 80 | html_theme = "sphinx_rtd_theme" 81 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 82 | # html_theme = "sphinx_material" 83 | # html_theme_options = { 84 | # "color_primary": "#ff4500", 85 | # } 86 | 87 | # Add any paths that contain custom static files (such as style sheets) here, 88 | # relative to this directory. They are copied after the builtin static files, 89 | # so a file named "default.css" will overwrite the builtin "default.css". 90 | html_static_path = ["_static"] 91 | 92 | issues_github_path = "ElementoLab/imc" 93 | 94 | napoleon_numpy_docstring = True 95 | napoleon_google_docstring = False 96 | napoleon_use_param = False 97 | napoleon_use_ivar = True 98 | 99 | # Example configuration for intersphinx: refer to the Python standard library. 100 | intersphinx_mapping = { 101 | "python": ("http://docs.python.org/3", None), 102 | "urllib3": ("http://urllib3.readthedocs.org/en/latest", None), 103 | "numpy": ("http://docs.scipy.org/doc/numpy/", None), 104 | "scipy": ("https://docs.scipy.org/doc/scipy-1.3.0/reference/", None), 105 | "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None), 106 | } 107 | -------------------------------------------------------------------------------- /docs/source/examples.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | -------------------------------------------------------------------------------- /docs/source/index.md: -------------------------------------------------------------------------------- 1 | # Welcome 2 | 3 | `imc` is a Python library for the analysis of imaging mass cytometry data. 4 | 5 | Head to the [installation](/install) to see installation instructions, to 6 | [usage](/usage) for quick use, or have a look at the catalogue of available 7 | functions in the [API](/api). 8 | 9 | 10 | ```{admonition} imc is still in development! 11 | This means things may change in the future, use at your own risk. 12 | ``` 13 | 14 | ## Contents 15 | 16 | ```{toctree} 17 | --- 18 | maxdepth: 1 19 | --- 20 | install.md 21 | usage.md 22 | examples.md 23 | concepts.md 24 | log_config.md 25 | api.md 26 | testing.md 27 | changelog.md 28 | ``` 29 | 30 | ## Links 31 | 32 | - Documentation: [http://imc.readthedocs.io/](http://imc.readthedocs.io/) 33 | - Issues and source code: [https://github.com/ElementoLab/imc](https://github.com/ElementoLab/imc) 34 | -------------------------------------------------------------------------------- /docs/source/install.md: -------------------------------------------------------------------------------- 1 | # Install 2 | -------------------------------------------------------------------------------- /docs/source/log_config.md: -------------------------------------------------------------------------------- 1 | # Logging and configuration 2 | -------------------------------------------------------------------------------- /docs/source/testing.md: -------------------------------------------------------------------------------- 1 | # Testing 2 | -------------------------------------------------------------------------------- /docs/source/usage.md: -------------------------------------------------------------------------------- 1 | # Usage 2 | -------------------------------------------------------------------------------- /imc/__init__.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | # fix the type annotatiton of not yet undefined classes 4 | from __future__ import annotations 5 | import os 6 | import sys 7 | import logging 8 | from functools import partialmethod 9 | from pathlib import Path as _Path 10 | 11 | from outdated import warn_if_outdated 12 | from joblib import Memory 13 | import matplotlib 14 | import matplotlib.pyplot as plt 15 | import seaborn as _sns 16 | 17 | try: 18 | # Even though there is no "imc/_version" file, 19 | # it should be generated by 20 | # setuptools_scm when building the package 21 | from imc._version import version 22 | 23 | __version__ = version 24 | except ImportError: 25 | from setuptools_scm import get_version as _get_version 26 | 27 | version = __version__ = _get_version(root="..", relative_to=__file__) 28 | 29 | 30 | warn_if_outdated("imc", __version__) 31 | 32 | plt.rcParams["svg.fonttype"] = "none" 33 | plt.rcParams["font.family"] = "Arial" 34 | plt.rcParams["font.sans-serif"] = ["Arial"] 35 | plt.rcParams["text.usetex"] = False 36 | 37 | import scanpy as _sc 38 | 39 | _sc.settings.n_jobs = -1 40 | 41 | 42 | def setup_logger(name: str = "imc", level: int = logging.INFO) -> logging.Logger: 43 | """Setup the logger for the package.""" 44 | logger = logging.getLogger(name) 45 | logger.setLevel(level) 46 | 47 | handler = logging.StreamHandler(sys.stdout) 48 | handler.setLevel(level) 49 | formatter = logging.Formatter("%(asctime)s - %(message)s") 50 | handler.setFormatter(formatter) 51 | logger.addHandler(handler) 52 | return logger 53 | 54 | 55 | LOGGER = setup_logger() 56 | 57 | # Setup joblib memory 58 | _Path.mkdir = partialmethod(_Path.mkdir, exist_ok=True, parents=True) 59 | JOBLIB_CACHE_DIR = _Path("~/.imc").expanduser() 60 | JOBLIB_CACHE_DIR.mkdir() 61 | MEMORY = Memory(location=JOBLIB_CACHE_DIR, verbose=0) 62 | 63 | # Decorate seaborn clustermap 64 | # _sns.clustermap = colorbar_decorator(_sns.clustermap) 65 | 66 | 67 | from imc.data_models.project import Project 68 | from imc.data_models.sample import IMCSample 69 | from imc.data_models.roi import ROI 70 | -------------------------------------------------------------------------------- /imc/cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Inspect MCD files, reporting on their basic statistics, saving 5 | metadata as YAML files, and panel information as CSV files. 6 | """ 7 | 8 | import sys 9 | import argparse 10 | from argparse import RawTextHelpFormatter 11 | import typing as tp 12 | 13 | from imc._version import version 14 | from imc.scripts.process import main as process 15 | from imc.scripts.inspect_mcds import main as inspect 16 | from imc.scripts.prepare import main as prepare 17 | from imc.scripts.predict import main as predict 18 | from imc.scripts.segment_stacks import main as segment 19 | from imc.scripts.quantify import main as quantify 20 | from imc.scripts.phenotype import main as phenotype 21 | from imc.scripts.illustrate import main as illustrate 22 | from imc.scripts.view import main as view 23 | 24 | cli_config: tp.Dict[str, tp.Any] 25 | from imc.scripts import cli_config 26 | 27 | 28 | def main(cli: tp.Sequence[str] = None) -> int: 29 | parser = get_args() 30 | parser.add_argument("-v", "--version", action="version", version=version) 31 | main_args, cmd_args = parser.parse_known_args(cli) 32 | 33 | if main_args.command not in cli_config["subcommands"]: 34 | raise ValueError(f"Command '{main_args.command}' not known!") 35 | return eval(main_args.command)(cmd_args) 36 | 37 | 38 | def get_args() -> argparse.ArgumentParser: 39 | parser = argparse.ArgumentParser(**cli_config["main"], formatter_class=RawTextHelpFormatter) # type: ignore[index] 40 | 41 | subparsers = parser.add_subparsers(dest="command", required=True) 42 | 43 | for cmd in cli_config["subcommands"]: 44 | subparsers.add_parser(cmd, add_help=False) 45 | return parser 46 | 47 | 48 | if __name__ == "__main__": 49 | try: 50 | sys.exit(main()) 51 | except KeyboardInterrupt: 52 | sys.exit(1) 53 | -------------------------------------------------------------------------------- /imc/data_models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ElementoLab/imc/9725b3ab72f2273cb4a702964fa8518c2f189e9c/imc/data_models/__init__.py -------------------------------------------------------------------------------- /imc/defaults.py: -------------------------------------------------------------------------------- 1 | from imc.types import Path 2 | 3 | # project 4 | DEFAULT_PROJECT_NAME = "project" 5 | DEFAULT_SAMPLE_NAME_ATTRIBUTE = "sample_name" 6 | DEFAULT_SAMPLE_GROUPING_ATTRIBUTEs = [DEFAULT_SAMPLE_NAME_ATTRIBUTE] 7 | DEFAULT_TOGGLE_ATTRIBUTE = "toggle" 8 | DEFAULT_PROCESSED_DIR_NAME = Path("processed") 9 | DEFAULT_RESULTS_DIR_NAME = Path("results") 10 | DEFAULT_PRJ_SINGLE_CELL_DIR = Path("single_cell") 11 | DEFAULT_ROI_NAME_ATTRIBUTE = "roi_name" 12 | DEFAULT_ROI_NUMBER_ATTRIBUTE = "roi_number" 13 | 14 | # # processed directory structure 15 | SUBFOLDERS_PER_SAMPLE = True 16 | ROI_STACKS_DIR = Path("tiffs") 17 | ROI_MASKS_DIR = Path("tiffs") 18 | ROI_UNCERTAINTY_DIR = Path("uncertainty") 19 | ROI_SINGLE_CELL_DIR = Path("single_cell") 20 | 21 | # sample 22 | DEFAULT_SAMPLE_NAME = "sample" 23 | DEFAULT_ROI_NAME_ATTRIBUTE = "roi_name" 24 | DEFAULT_ROI_NUMBER_ATTRIBUTE = "roi_number" 25 | DEFAULT_TOGGLE_ATTRIBUTE = "toggle" 26 | 27 | # roi 28 | SUBFOLDERS_PER_SAMPLE = True 29 | DEFAULT_ROI_NAME = "roi" 30 | ROI_STACKS_DIR = Path("tiffs") 31 | ROI_MASKS_DIR = Path("tiffs") 32 | ROI_UNCERTAINTY_DIR = Path("uncertainty") 33 | ROI_SINGLE_CELL_DIR = Path("single_cell") 34 | 35 | # graphics 36 | FIG_KWS = dict(dpi=300, bbox_inches="tight") 37 | -------------------------------------------------------------------------------- /imc/demo/__init__.py: -------------------------------------------------------------------------------- 1 | from .generate_data import generate_project 2 | from .get_demo_data import DATASETS as _DATASETS, get_dataset 3 | 4 | datasets = list(_DATASETS.keys()) 5 | -------------------------------------------------------------------------------- /imc/demo/generate_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from typing import Tuple, List, Dict, Union 4 | import tempfile 5 | 6 | import numpy as np 7 | import scipy.ndimage as ndi 8 | import matplotlib.pyplot as plt 9 | import tifffile 10 | import pandas as pd 11 | import skimage 12 | 13 | from imc import Project 14 | from imc.types import Array, Figure, Path 15 | from imc.utils import filter_kwargs_by_callable as filter_kws 16 | 17 | 18 | def generate_mask( 19 | shape: Tuple[int, int] = (8, 8), 20 | seeding_density: float = 0.1, 21 | # widths: int = None, 22 | # connectivity: float = None 23 | ) -> Array: 24 | mask = np.zeros(shape, dtype=bool) 25 | # Cells are placed in an effective mask area which is not touching borders 26 | eff_mask = mask[1:-1, 1:-1] 27 | centroids = np.random.choice( 28 | np.arange(eff_mask.size), 29 | int(np.ceil(eff_mask.size * seeding_density)), 30 | replace=False, 31 | ) 32 | eff_mask.flat[centroids] = True # type: ignore 33 | mask[1:-1, 1:-1] = eff_mask 34 | return ndi.label(mask, structure=np.zeros((3, 3)))[0] 35 | 36 | 37 | def generate_disk_masks( 38 | shape: Tuple[int, int] = (128, 128), 39 | seeding_density: float = 0.1, 40 | disk_diameter: int = 10, 41 | ): 42 | mask = np.zeros(shape, dtype=bool) 43 | 44 | area = np.multiply(*mask.shape) 45 | n = int(np.ceil(mask.size * seeding_density) * (disk_diameter**2 / area)) 46 | centroids = np.random.choice(np.arange(mask.size), n, replace=False) 47 | 48 | r = disk_diameter // 2 49 | disk = skimage.morphology.disk(r) 50 | x = centroids // shape[0] 51 | y = centroids % shape[1] 52 | for i in range(n): 53 | s = mask[x[i] - r : x[i] + r + 1, y[i] - r : y[i] + r + 1].shape 54 | mask[x[i] - r : x[i] + r + 1, y[i] - r : y[i] + r + 1] = disk[: s[0], : s[1]] 55 | return ndi.label(mask)[0] 56 | 57 | 58 | def generate_stack( 59 | mask: Array, 60 | n_channels: int = 3, 61 | channel_coeffs: Array = None, 62 | channel_std: Array = None, 63 | n_cell_types: int = 2, 64 | cell_type_coeffs: Array = None, 65 | cell_type_std: Array = None, 66 | ) -> Array: 67 | # partition cells into cell types 68 | n_cells = (mask > 0).sum() 69 | cells = np.arange(mask.size)[mask.flat > 0] 70 | assigned_cells = np.array([], dtype=int) 71 | ct_cells = dict() 72 | for i in range(n_cell_types): 73 | available_cells = [c for c in cells if c not in assigned_cells] 74 | ct_cells[i] = np.random.choice( 75 | available_cells, 76 | int(np.floor(n_cells / n_cell_types)), 77 | replace=False, 78 | ) 79 | assigned_cells = np.append(assigned_cells, ct_cells[i]) 80 | ct_cells[i] = np.append(ct_cells[i], cells[~np.isin(cells, assigned_cells)]) 81 | assert sum([len(x) for x in ct_cells.values()]) == n_cells 82 | 83 | # assign intensity values 84 | stack = np.zeros((n_channels,) + mask.shape, dtype=float) 85 | std_sd = 0.1 86 | if channel_coeffs is None: 87 | channel_coeffs = np.random.choice(np.linspace(-5, 5), n_channels) 88 | if channel_std is None: 89 | channel_std = np.abs(channel_coeffs) * std_sd 90 | if cell_type_coeffs is None: 91 | cell_type_coeffs = np.random.choice(np.linspace(-5, 5), n_cell_types) 92 | if cell_type_std is None: 93 | cell_type_std = np.abs(cell_type_coeffs) * std_sd 94 | # means = intercept + np.dot( 95 | means = np.dot( 96 | channel_coeffs.reshape((-1, n_channels)).T, 97 | cell_type_coeffs.reshape((-1, n_cell_types)), 98 | ) 99 | intercept = np.abs(means.min()) * 2 100 | means += intercept 101 | stds = channel_std.reshape((-1, n_channels)).T + cell_type_std.reshape( 102 | (-1, n_cell_types) 103 | ) 104 | 105 | for cell_type in range(n_cell_types): 106 | n = ct_cells[i].size 107 | for channel in range(n_channels): 108 | stack[channel].flat[ct_cells[cell_type]] = np.random.normal( 109 | means[channel, cell_type], stds[channel, cell_type], n 110 | ) 111 | 112 | # make sure array is non-negative 113 | if stack.min() < 0: 114 | stack[stack == 0] = stack.min() 115 | stack += abs(stack.min()) 116 | return stack 117 | 118 | 119 | def write_tiff(array: Array, output_file: Path) -> None: 120 | fr = tifffile.TiffWriter(output_file) 121 | fr.write(array) 122 | fr.close() 123 | 124 | 125 | def write_roi_to_disk(mask: Array, stack: Array, output_prefix: Path) -> None: 126 | # mask 127 | write_tiff(mask, output_prefix + "_full_mask.tiff") 128 | # stack 129 | write_tiff(stack, output_prefix + "_full.tiff") 130 | # channel_labels 131 | labels = [str(c).zfill(2) for c in range(1, stack.shape[0] + 1)] 132 | channel_labels = pd.Series([f"Ch{c}(Ch{c})" for c in labels], name="channel") 133 | channel_labels.to_csv(output_prefix + "_full.csv") 134 | 135 | 136 | def visualize_roi(mask: Array, stack: Array) -> Figure: 137 | fig, axes = plt.subplots(1, 5, figsize=(4 * 5, 4)) 138 | axes[0].set_title("Mask") 139 | axes[0].imshow(mask, cmap="binary_r") 140 | axes[1].set_title("RGB signal") 141 | axes[1].imshow(np.moveaxis(stack, 0, -1) / stack.max()) 142 | for i, (ax, cmap) in enumerate(zip(axes[2:], ["Reds", "Greens", "Blues"])): 143 | ax.set_title(f"Channel {i}") 144 | ax.imshow(stack[i] / stack.max(), cmap=cmap) 145 | return fig 146 | 147 | 148 | def generate_project( 149 | name: str = None, 150 | n_samples: int = 3, 151 | rois_per_sample: int = 3, 152 | root_dir: Path = None, 153 | sample_names: List[str] = None, 154 | return_object: bool = True, 155 | visualize: bool = False, 156 | **kwargs, 157 | ) -> Union[Project, Path]: 158 | if name is None: 159 | name = "test_project" 160 | if root_dir is None: 161 | root_dir = Path(tempfile.mkdtemp()) 162 | else: 163 | root_dir = Path(root_dir) 164 | root_dir.mkdir(exist_ok=True) 165 | meta_dir = root_dir / "metadata" 166 | meta_dir.mkdir(exist_ok=True) 167 | processed_dir = root_dir / "processed" 168 | processed_dir.mkdir(exist_ok=True) 169 | 170 | if sample_names is None: 171 | sample_names = ["test_sample_" + str(i).zfill(2) for i in range(1, n_samples + 1)] 172 | _meta: Dict[str, Dict[str, Union[str, int]]] = dict() 173 | for sample in sample_names: 174 | tiffs_dir = processed_dir / sample / "tiffs" 175 | tiffs_dir.mkdir(exist_ok=True, parents=True) 176 | for roi in range(1, rois_per_sample + 1): 177 | roi_name = f"{sample}-{str(roi).zfill(2)}" 178 | output_prefix = tiffs_dir / roi_name 179 | mask = generate_mask(**filter_kws(kwargs, generate_mask)) 180 | stack = generate_stack(mask, **filter_kws(kwargs, generate_stack)) 181 | if visualize: 182 | visualize_roi(mask, stack) 183 | write_roi_to_disk(mask, stack, output_prefix) 184 | _meta[roi_name] = {"roi_number": roi, "sample_name": sample} 185 | 186 | # write metadata 187 | meta = pd.DataFrame(_meta).T 188 | meta.index.name = "roi_name" 189 | meta.to_csv(meta_dir / "samples.csv") 190 | return ( 191 | Project( 192 | metadata=meta_dir / "samples.csv", 193 | processed_dir=processed_dir, 194 | results_dir=processed_dir.parent / "results", 195 | ) 196 | if return_object 197 | else root_dir 198 | ) 199 | -------------------------------------------------------------------------------- /imc/demo/get_demo_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import typing as tp 4 | import shutil 5 | import urllib.request as request 6 | from contextlib import closing 7 | import tarfile 8 | import tempfile 9 | import zipfile 10 | import re 11 | 12 | import requests 13 | from urlpath import URL 14 | import tifffile 15 | import numpy as np 16 | import pandas as pd 17 | 18 | from imc.types import Path 19 | from imc import Project 20 | 21 | 22 | DATASET_DB_PATH = Path("~").expanduser() / ".imc" / "demo_datasets" 23 | DATASETS = { 24 | "jackson_2019_short": "https://wcm.box.com/shared/static/eq1m5j972cf3b5jqoe2vdju3bg9e0r5n", 25 | "jackson_2019_short_joint": "https://wcm.box.com/shared/static/b8nxku3ywvenghxvvm4wki9znxwbenzb", 26 | "schwabenland_2021_full": "https://zenodo.org/record/5018260/files/COVID19_brain_all_patients_singletiffs_and_cellmasks.zip?download=1", 27 | } 28 | 29 | 30 | def _download_file(url: str, output_path: Path, chunk_size=1024) -> None: 31 | """ 32 | Download a file and write to disk in chunks (not in memory). 33 | 34 | Parameters 35 | ---------- 36 | url : :obj:`str` 37 | URL to download from. 38 | output_path : :obj:`str` 39 | Path to file as output. 40 | chunk_size : :obj:`int` 41 | Size in bytes of chunk to write to disk at a time. 42 | """ 43 | if url.startswith("ftp://"): 44 | with closing(request.urlopen(url)) as r: 45 | with open(output_path, "wb") as f: 46 | shutil.copyfileobj(r, f) 47 | else: 48 | response = requests.get(url, stream=True) 49 | with open(output_path, "wb") as outfile: 50 | outfile.writelines(response.iter_content(chunk_size=chunk_size)) 51 | 52 | 53 | def _decompress_tar_file(path: Path, output_root: Path = None) -> None: 54 | """Decompress a tar.xz file.""" 55 | with tarfile.open(path) as f: 56 | f.extractall(path.parent if output_root is None else output_root) 57 | 58 | 59 | def get_dataset(dataset_name: str, output_dir: Path = None) -> Project: 60 | DATASET_DB_PATH.mkdir() 61 | 62 | if dataset_name == "schwabenland_2021": 63 | return get_schwabenland_2021_data(output_dir) 64 | dataset_file = DATASET_DB_PATH / dataset_name + ".tar.gz" 65 | 66 | if output_dir is None: 67 | output_dir = Path(tempfile.TemporaryDirectory().name) 68 | 69 | if not dataset_file.exists(): 70 | _download_file(DATASETS[dataset_name], dataset_file) 71 | _decompress_tar_file(dataset_file, output_dir) 72 | return Project( 73 | name=dataset_name, 74 | processed_dir=output_dir / dataset_name / "processed", 75 | subfolder_per_sample="joint" not in dataset_name, 76 | ) 77 | 78 | 79 | def get_schwabenland_2021_data(output_dir: Path = None) -> Project: 80 | dataset_name = "schwabenland_2021" 81 | zip_file_url = ( 82 | "https://zenodo.org/record/5018260/files/" 83 | "COVID19_brain_all_patients_singletiffs_and_cellmasks.zip" 84 | "?download=1" 85 | ) 86 | 87 | if output_dir is None: 88 | output_dir = Path(tempfile.TemporaryDirectory().name).mkdir() 89 | 90 | zip_file = output_dir / dataset_name + "_imc_data.zip" 91 | 92 | if not zip_file.exists(): 93 | _download_file(zip_file_url, zip_file) 94 | with zipfile.ZipFile(zip_file) as zf: 95 | zf.extractall(output_dir) 96 | zip_file.unlink() 97 | 98 | for dir_ in filter(lambda x: x.is_dir(), output_dir.iterdir()): 99 | name = dir_.name 100 | _stack = list() 101 | _channel_names = list() 102 | for file in dir_.iterdir(): 103 | if "_mask.tiff" in file.as_posix(): 104 | mask = tifffile.imread(file) 105 | continue 106 | _stack.append(tifffile.imread(file)) 107 | _channel_names.append(file.stem) 108 | stack = np.asarray(_stack) 109 | channel_names = pd.Series(_channel_names) 110 | annotation = ( 111 | channel_names.str.split("_") 112 | .apply(pd.Series) 113 | .set_index(channel_names) 114 | .rename(columns={0: "marker", 1: "metal"}) 115 | ) 116 | annotation["mass"] = annotation["metal"].str.extract(r"(\d+)")[0].astype(int) 117 | stack = stack[annotation["mass"].rank().astype(int) - 1] 118 | annotation = annotation.sort_values("mass") 119 | annotation.index = annotation.index.str.replace("_", "(") + ")" 120 | labels = annotation.index.to_series().reset_index(drop=True).rename("channel") 121 | 122 | if "ROI" not in name: 123 | roi_number = "1" 124 | else: 125 | roi_number = re.findall(r"_ROI(\d)_", name)[0] 126 | name = re.sub(r"_ROI(\d)", "", name) 127 | 128 | od = (output_dir / "processed" / name / "tiffs").mkdir() 129 | output_prefix = od / name + f"-{roi_number}_full" 130 | tifffile.imwrite(output_prefix + ".tiff", stack) 131 | tifffile.imwrite(output_prefix + "_mask.tiff", mask) 132 | labels.to_csv(output_prefix + ".csv") 133 | 134 | shutil.rmtree(dir_) 135 | 136 | return Project(name=dataset_name, processed_dir=output_dir / "processed") 137 | 138 | 139 | def get_phillips_2021(output_dir: Path = None) -> Project: 140 | """ 141 | doi:10.3389/fimmu.2021.687673 142 | """ 143 | if output_dir is None: 144 | output_dir = Path(tempfile.TemporaryDirectory().name).mkdir() 145 | 146 | (output_dir / "processed").mkdir() 147 | 148 | dataset_name = "phillips_2021" 149 | base_url = URL("https://immunoatlas.org") 150 | group_id = "NOLN" 151 | project_id = "210614-2" 152 | cases = [f"NOLN2100{i}" for i in range(2, 10)] 153 | rois = ["A01"] 154 | markers = [ 155 | "DNA (Hoechst)", 156 | "T-bet", 157 | "GATA3", 158 | "FoxP3", 159 | "CD56", 160 | "TCR-γ/δ", 161 | "Tim-3", 162 | "CD30", 163 | "CCR6", 164 | "PD-L1", 165 | "TCR-β", 166 | "CD4", 167 | "CD2", 168 | "CD5", 169 | "Ki-67", 170 | "CD25", 171 | "CD134", 172 | "α-SMA", 173 | "CD20", 174 | "LAG3", 175 | "MUC-1/EMA", 176 | "CD11c", 177 | "PD-1", 178 | "Vimentin", 179 | "CD16", 180 | "IDO-1", 181 | "CD15", 182 | "EGFR", 183 | "VISTA", 184 | "Granzyme B", 185 | "CD206", 186 | "ICOS", 187 | "CD69", 188 | "CD45RA", 189 | "CD57", 190 | "CD3", 191 | "HLA-DR", 192 | "CD8", 193 | "BCL-2", 194 | "β-catenin", 195 | "CD7", 196 | "CD1a", 197 | "CD45RO", 198 | "CCR4/CD194", 199 | "CD163", 200 | "CD11b", 201 | "CD34", 202 | "Cytokeratin", 203 | "CD38", 204 | "CD68", 205 | "CD31", 206 | "Collagen IV", 207 | "CD138", 208 | "Podoplanin", 209 | "CD45", 210 | "MMP-9", 211 | "MCT", 212 | "CLA/CD162", 213 | "DNA (DRAQ5)", 214 | ] 215 | 216 | for case in cases: 217 | for roi in rois: 218 | print(case, roi) 219 | url = base_url / group_id / project_id / case / roi / f"{case}_{roi}.tif" 220 | roi = roi.replace("A", "") 221 | od = (output_dir / "processed" / case / "tiffs").mkdir() 222 | f = od / f"{case}-{roi}_full.tiff" 223 | if f.exists(): 224 | continue 225 | # Somehow the _download_file failed a few times 226 | _download_file(url.as_posix(), f) 227 | # resp = url.get() 228 | # with open(f, "wb") as handle: 229 | # handle.write(resp.content) 230 | pd.Series(markers, name="channel").to_csv(f.replace_(".tiff", ".csv")) 231 | 232 | return Project(name=dataset_name, processed_dir=output_dir / "processed") 233 | 234 | 235 | def get_allam_2021_data(output_dir: Path = None) -> Project: 236 | if output_dir is None: 237 | output_dir = Path(tempfile.TemporaryDirectory().name).mkdir() 238 | 239 | base_url = URL("https://raw.githubusercontent.com/coskunlab/SpatialViz/main/data") 240 | samples = [ 241 | y[0] + str(y[1]) for code in ["DT", "NT"] for y in zip([code] * 6, range(1, 7)) 242 | ] 243 | markers = [ 244 | "CD20", 245 | "CD3", 246 | "CD4", 247 | "CD45RO", 248 | "CD68", 249 | "CD8a", 250 | "Col1", 251 | "DNA1", 252 | "DNA2", 253 | "Ecadherin", 254 | "FoxP3", 255 | "GranzymeB", 256 | "Histone3", 257 | "Ki67", 258 | "PD1", 259 | "PDL1", 260 | "Pankeratin", 261 | "SMA", 262 | "Vimentin", 263 | ] 264 | 265 | for sample in samples: 266 | mask_url = base_url / "cell_masks" / f"{sample}_cell_Mask.tiff" 267 | for marker in markers: 268 | channel_url = base_url / "raw" / sample / f"{sample}_{marker}.tiff" 269 | -------------------------------------------------------------------------------- /imc/exceptions.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from imc.types import GenericType 3 | 4 | 5 | class AttributeNotSetError(Exception): 6 | pass 7 | 8 | 9 | def cast(arg: Optional[GenericType]) -> GenericType: 10 | """Remove `Optional` from `T`.""" 11 | if arg is None: 12 | raise AttributeNotSetError("Attribute cannot be None!") 13 | return arg 14 | -------------------------------------------------------------------------------- /imc/interactive_volume_viewer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | An example program to display a volumetric image from the command line. 5 | """ 6 | 7 | import sys 8 | import typing as tp 9 | from urlpath import URL 10 | from functools import partial 11 | 12 | import imageio 13 | import numpy as np 14 | import matplotlib.pyplot as plt 15 | from tqdm import tqdm 16 | 17 | from imc.types import Array, Axis, Figure, Path # https://github.com/ElementoLab/imc 18 | 19 | 20 | def multi_slice_viewer( 21 | volume: Array, up_key: str = "w", down_key: str = "s", **kwargs 22 | ) -> Figure: 23 | remove_keymap_conflicts({up_key, down_key}) 24 | print(f"Press '{up_key}' and '{down_key}' for scrolling through image channels.") 25 | 26 | fig, ax = plt.subplots() 27 | ax.volume = volume 28 | ax.index = volume.shape[0] // 2 29 | ax.imshow(volume[ax.index], **kwargs) 30 | fig.canvas.mpl_connect( 31 | "key_press_event", partial(process_key, up_key=up_key, down_key=down_key) 32 | ) 33 | return fig 34 | 35 | 36 | def remove_keymap_conflicts(new_keys_set: tp.Set) -> None: 37 | for prop in plt.rcParams: 38 | if prop.startswith("keymap."): 39 | keys = plt.rcParams[prop] 40 | remove_list = set(keys) & new_keys_set 41 | for key in remove_list: 42 | keys.remove(key) 43 | 44 | 45 | def process_key(event, up_key: str = "w", down_key: str = "s") -> None: 46 | fig = event.canvas.figure 47 | ax = fig.axes[0] 48 | if event.key == up_key: 49 | previous_slice(ax) 50 | elif event.key == down_key: 51 | next_slice(ax) 52 | fig.canvas.draw() 53 | 54 | 55 | def previous_slice(ax: Axis) -> None: 56 | """Go to the previous slice.""" 57 | volume = ax.volume 58 | ax.index = (ax.index - 1) % volume.shape[0] # wrap around using % 59 | ax.images[0].set_array(volume[ax.index]) 60 | 61 | 62 | def next_slice(ax: Axis) -> None: 63 | """Go to the next slice.""" 64 | volume = ax.volume 65 | ax.index = (ax.index + 1) % volume.shape[0] 66 | ax.images[0].set_array(volume[ax.index]) 67 | 68 | 69 | def get_volume() -> Array: 70 | base_url = URL("https://prod-images-static.radiopaedia.org/images/") 71 | start_n = 53734044 72 | length = 137 73 | 74 | imgs = list() 75 | for i in tqdm(range(length)): 76 | url = base_url / f"{start_n + i}/{i + 1}_gallery.jpeg" 77 | resp = url.get() 78 | c = resp.content 79 | imgs.append(imageio.read(c, format="jpeg").get_data(0)) 80 | img = np.asarray(imgs) 81 | return img 82 | 83 | 84 | def main() -> int: 85 | """ 86 | Run 87 | """ 88 | img_file = Path("/tmp/volumetric_image.npz") 89 | if not img_file.exists(): 90 | print("Downloading volumetric image.") 91 | img = get_volume() 92 | np.savez_compressed(img_file, img) 93 | else: 94 | img = np.load(img_file)["arr_0"] 95 | 96 | _ = multi_slice_viewer(img) 97 | print("Displaying volume.") 98 | print("Press 'w' for up and 's' for down.") 99 | plt.show(block=True) 100 | print("Done.") 101 | return 0 102 | 103 | 104 | if __name__ == "__main__" and "get_ipython" not in locals(): 105 | try: 106 | sys.exit(main()) 107 | except KeyboardInterrupt: 108 | sys.exit(1) 109 | -------------------------------------------------------------------------------- /imc/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ElementoLab/imc/9725b3ab72f2273cb4a702964fa8518c2f189e9c/imc/logo.png -------------------------------------------------------------------------------- /imc/ops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ElementoLab/imc/9725b3ab72f2273cb4a702964fa8518c2f189e9c/imc/ops/__init__.py -------------------------------------------------------------------------------- /imc/ops/adjacency.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for single-cell adjacency. 3 | """ 4 | 5 | import typing as tp 6 | 7 | import numpy as np 8 | import pandas as pd 9 | import matplotlib.pyplot as plt 10 | import seaborn as sns 11 | from tqdm import tqdm 12 | import scipy.ndimage as ndi 13 | from skimage import exposure 14 | from skimage import graph 15 | import networkx as nx 16 | 17 | import imc.data_models.roi as _roi 18 | from imc.types import DataFrame, Series, Path 19 | 20 | FIG_KWS = dict(bbox_inches="tight", dpi=300) 21 | MAX_BETWEEN_CELL_DIST = 4 22 | 23 | 24 | def get_adjacency_graph( 25 | roi: _roi.ROI, 26 | output_prefix: Path = None, 27 | max_dist: int = MAX_BETWEEN_CELL_DIST, 28 | ) -> graph: 29 | """ 30 | Derive a spatial representation of cells in image using a graph. 31 | 32 | Parameters 33 | ---------- 34 | roi: imc.ROI 35 | ROI object to derive graph for. 36 | 37 | output_prefix: typing.Path 38 | Prefix to output file with graph. 39 | Defaults to sample root dir / 'single_cell'. 40 | 41 | max_dist: int 42 | Maximum distance to consider physical interaction between cells (graph edges) 43 | 44 | Returns 45 | ------- 46 | networkx.Graph 47 | Adjacency graph for cells in ROI. 48 | """ 49 | import pickle 50 | 51 | clusters = roi.clusters 52 | if clusters is None: 53 | print("ROI does not have assigned clusters.") 54 | 55 | output_prefix = Path(output_prefix or (roi.single_cell_dir / roi.name + ".")) 56 | if not output_prefix.endswith("."): 57 | output_prefix += "." 58 | output_prefix.parent.mkdir() 59 | 60 | mask = roi.cell_mask 61 | 62 | # align mask with cell type assignment (this is only to remove border cells) 63 | if clusters is not None: 64 | mask[~np.isin(mask, roi.clusters.index)] = 0 65 | 66 | # Get the closest cell of each background point dependent on `max_dist` 67 | # # first measure the distance of each background point to the closest cell 68 | background = mask == 0 69 | d = ndi.distance_transform_edt( 70 | background, return_distances=True, return_indices=False 71 | ) 72 | 73 | background = background & (d <= max_dist) 74 | i, j = ndi.distance_transform_edt( 75 | background, return_distances=False, return_indices=True 76 | ) 77 | mask = mask[i, j] 78 | 79 | # Simply use mean of channels as distance 80 | stack = roi.stack 81 | if hasattr(roi, "channel_exclude"): 82 | stack = stack[~roi.channel_exclude] 83 | image_mean = np.asarray([exposure.equalize_hist(x) for x in stack]).mean(0) 84 | image_mean = (image_mean - image_mean.min()) / ( 85 | np.percentile(image_mean, 98) - image_mean.min() 86 | ) 87 | 88 | # Construct adjacency graph based on cell distances 89 | g = graph.rag_mean_color(image_mean, mask, connectivity=2, mode="distance") 90 | # g = skimage.future.graph.RAG(mask, connectivity=2) 91 | # remove background node (unfortunately it can't be masked beforehand) 92 | if 0 in g.nodes: 93 | g.remove_node(0) 94 | 95 | fig, ax = plt.subplots(1, 1) 96 | i = (image_mean * 255).astype("uint8") 97 | i = np.moveaxis(np.asarray([i, i, i]), 0, -1) 98 | lc = graph.show_rag( 99 | mask.astype("uint32"), 100 | g, 101 | i, 102 | ax=ax, 103 | img_cmap="viridis", 104 | edge_cmap="Reds", 105 | edge_width=1, 106 | ) 107 | ax.axis("off") 108 | fig.colorbar(lc, fraction=0.03, ax=ax) 109 | ax.get_children()[0].set_rasterized(True) 110 | ax.get_children()[-2].set_rasterized(True) 111 | fig.savefig(output_prefix + "neighbor_graph.svg", **FIG_KWS) 112 | plt.close(fig) 113 | 114 | # add cluster label atrtribute 115 | if clusters is not None: 116 | nx.set_node_attributes(g, roi.clusters.to_dict(), name="cluster") 117 | nx.set_node_attributes(g, roi.clusters.index.to_series().to_dict(), name="obj_id") 118 | 119 | # save graph 120 | with open(output_prefix + "neighbor_graph.gpickle", "wb") as f: 121 | pickle.dump(g, f) 122 | return g 123 | 124 | 125 | def measure_cell_type_adjacency( 126 | roi: _roi.ROI, 127 | method: str = "random", 128 | adjacency_graph: nx.Graph = None, 129 | n_iterations: int = 100, 130 | inf_replace_method: str = "min", 131 | output_prefix: Path = None, 132 | plot: bool = True, 133 | save: bool = True, 134 | ) -> DataFrame: 135 | """ 136 | Derive an aggregated measure of adjacency betwen cell types for one ROI. 137 | 138 | Parameters 139 | ---------- 140 | roi: imc.ROI 141 | ROI object to derive graph for. 142 | 143 | method: str 144 | Method to normalize interactions by. 145 | - 'random': generate empirical background of expected interactions based on cell type abundance by randomization (permutation of cell type identities). 146 | - 'pharmacoscopy': method with analytical solution from Vladimer et al (10.1038/nchembio.2360). Not recommended for small images. 147 | Default is 'random'. 148 | 149 | adjacency_graph: networkx.Graph 150 | Adjacency graph per cell for ROI. 151 | By default, and if not given will be the `ROI.adjacency_graph` attribute. 152 | 153 | n_iterations: int 154 | Number of permutations to run when `method` == 'random'. 155 | Defaults to 100. 156 | 157 | inf_replace_method: str 158 | If `method` == 'pharmacoscopy', how to handle cases where interactions are not observed. 159 | 160 | output_prefix: typing.Path 161 | Prefix to output file with graph. 162 | Defaults to sample root dir / 'single_cell'. 163 | 164 | plot: bool 165 | Whether to plot visualizations. 166 | Default is `True`. 167 | 168 | save: bool 169 | Whether to save output to disk. 170 | Default is `True`. 171 | 172 | Returns 173 | ------- 174 | pandas.DataFrame 175 | DataFrame of cell type interactions normalized by `method`. 176 | """ 177 | output_prefix = output_prefix or ( 178 | roi.sample.root_dir / "single_cell" / roi.name + "." 179 | ) 180 | if not output_prefix.endswith("."): 181 | output_prefix += "." 182 | 183 | cluster_counts = roi.clusters.value_counts() 184 | 185 | if adjacency_graph is None: 186 | adjacency_graph = roi.adjacency_graph 187 | 188 | import warnings # Networkx warns that the output of nx.linalg.attrmatrix.attr_matrix will be an array instead of a matrix 189 | 190 | with warnings.catch_warnings(): 191 | warnings.filterwarnings("ignore", category=FutureWarning) 192 | adj, order = nx.linalg.attrmatrix.attr_matrix( 193 | adjacency_graph, node_attr="cluster" 194 | ) 195 | order = pd.Series(order).astype( 196 | roi.clusters.dtype 197 | ) # passing dtype at instantiation gives warning 198 | freqs = pd.DataFrame(adj, order, order).sort_index(axis=0).sort_index(axis=1) 199 | if save: 200 | freqs.to_csv(output_prefix + "cluster_adjacency_graph.frequencies.csv") 201 | 202 | if method == "random": 203 | norm_freqs = correct_interaction_background_random( 204 | roi, freqs, "cluster", n_iterations, save, output_prefix 205 | ) 206 | elif method == "pharmacoscopy": 207 | norm_freqs = correct_interaction_background_pharmacoscopy( 208 | freqs, cluster_counts, roi.clusters.shape[0], inf_replace_method 209 | ) 210 | if save: 211 | norm_freqs.to_csv(output_prefix + "cluster_adjacency_graph.norm_over_random.csv") 212 | 213 | if not plot: 214 | return norm_freqs 215 | v = norm_freqs.values.std() * 2 216 | fig, axes = plt.subplots(1, 2, sharey=True, figsize=(4 * 2, 4)) 217 | kws = dict(cmap="RdBu_r", center=0, square=True, xticklabels=True, yticklabels=True) 218 | sns.heatmap(norm_freqs, robust=True, ax=axes[0], **kws) 219 | kws2 = dict(vmin=-v, vmax=v, cbar_kws=dict(label="Log odds interaction")) 220 | sns.heatmap(norm_freqs, ax=axes[1], **kws, **kws2) 221 | fig.savefig( 222 | output_prefix + "cluster_adjacency_graph.norm_over_random.heatmap.svg", 223 | **FIG_KWS, 224 | ) 225 | plt.close(fig) 226 | del kws["square"] 227 | try: 228 | grid = sns.clustermap(norm_freqs, **kws, **kws2) 229 | grid.savefig( 230 | output_prefix + "cluster_adjacency_graph.norm_over_random.clustermap.svg", 231 | **FIG_KWS, 232 | ) 233 | plt.close(grid.fig) 234 | except FloatingPointError: 235 | pass 236 | return norm_freqs 237 | 238 | 239 | def correct_interaction_background_random( 240 | roi: _roi.ROI, 241 | freqs: DataFrame, 242 | attribute, 243 | n_iterations: int, 244 | save: bool, 245 | output_prefix: tp.Union[str, Path], 246 | ): 247 | values = { 248 | x: roi.adjacency_graph.nodes[x][attribute] for x in roi.adjacency_graph.nodes 249 | } 250 | shuffled_freqs = list() 251 | for _ in tqdm(range(n_iterations)): 252 | g2 = roi.adjacency_graph.copy() 253 | shuffled_attr = pd.Series(values).sample(frac=1) 254 | shuffled_attr.index = values 255 | nx.set_node_attributes(g2, shuffled_attr.to_dict(), name=attribute) 256 | import warnings 257 | 258 | with warnings.catch_warnings(): 259 | warnings.filterwarnings("ignore", category=FutureWarning) 260 | rf, rl = nx.linalg.attrmatrix.attr_matrix(g2, node_attr=attribute) 261 | rl = pd.Series(rl, dtype=roi.clusters.dtype) 262 | shuffled_freqs.append( 263 | pd.DataFrame(rf, index=rl, columns=rl).sort_index(axis=0).sort_index(axis=1) 264 | ) 265 | shuffled_freq = pd.concat(shuffled_freqs) 266 | if save: 267 | shuffled_freq.to_csv( 268 | output_prefix 269 | + f"cluster_adjacency_graph.random_frequencies.all_iterations_{n_iterations}.csv" 270 | ) 271 | shuffled_freq = shuffled_freq.groupby(level=0).sum().sort_index(axis=1) 272 | if save: 273 | shuffled_freq.to_csv( 274 | output_prefix + "cluster_adjacency_graph.random_frequencies.csv" 275 | ) 276 | 277 | fl = np.log1p((freqs / freqs.values.sum()) * 1e6) 278 | sl = np.log1p((shuffled_freq / shuffled_freq.values.sum()) * 1e6) 279 | # make sure both contain all edges/nodes 280 | fl = fl.reindex(sl.index, axis=0).reindex(sl.index, axis=1).fillna(0) 281 | sl = sl.reindex(fl.index, axis=0).reindex(fl.index, axis=1).fillna(0) 282 | return fl - sl 283 | 284 | 285 | def correct_interaction_background_pharmacoscopy( 286 | frequency_matrix: DataFrame, 287 | cluster_counts: Series, 288 | total_cells: int, 289 | inf_replace_method: tp.Optional[str] = "min_symmetric", 290 | ): 291 | c = np.log(total_cells) 292 | fa = np.log(frequency_matrix.sum().sum()) - c 293 | norms = pd.DataFrame() 294 | for ct1 in frequency_matrix.index: 295 | for ct2 in frequency_matrix.columns: 296 | with np.errstate(divide="ignore", invalid="ignore"): 297 | o = np.log(frequency_matrix.loc[ct1, ct2]) - np.log( 298 | frequency_matrix.loc[ct1].sum() 299 | ) 300 | if o == 0: 301 | norms.loc[ct1, ct2] = 0.0 302 | continue 303 | f1 = np.log(cluster_counts.loc[ct1]) - c 304 | f2 = np.log(cluster_counts.loc[ct2]) - c 305 | 306 | norms.loc[ct1, ct2] = o - (f1 + f2 + fa) 307 | if inf_replace_method is None: 308 | return norms 309 | 310 | # three ways to replace -inf (cell types with no event touching): 311 | # # 1. replace with lowest non-inf value (dehemphasize the lower bottom - lack of touching) 312 | if inf_replace_method == "min": 313 | norm_freqs = norms.replace(-np.inf, norms[norms != (-np.inf)].min().min()) 314 | # # 2. replace with minus highest (try to ) 315 | if inf_replace_method == "max": 316 | norm_freqs = norms.replace(-np.inf, -norms.max().max()) 317 | # # 3. One of the above + make symmetric by X @ X.T + Z-score 318 | if inf_replace_method == "min_symmetric": 319 | norm_freqs = norms.replace(-np.inf, norms[norms != (-np.inf)].min().min()) 320 | norm_freqs = norm_freqs @ norm_freqs.T 321 | norm_freqs = (norm_freqs - norm_freqs.values.mean()) / norm_freqs.values.std() 322 | if inf_replace_method == "max_symmetric": 323 | norm_freqs = norms.replace(-np.inf, norms[norms != (-np.inf)].max().max()) 324 | norm_freqs = norm_freqs @ norm_freqs.T 325 | norm_freqs = (norm_freqs - norm_freqs.values.mean()) / norm_freqs.values.std() 326 | return norm_freqs 327 | -------------------------------------------------------------------------------- /imc/ops/clustering.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for single-cell clustering. 3 | """ 4 | 5 | import typing as tp 6 | 7 | import numpy as np 8 | import pandas as pd 9 | import matplotlib.pyplot as plt 10 | from tqdm import tqdm 11 | 12 | from anndata import AnnData 13 | import scanpy as sc 14 | 15 | from imc.types import DataFrame, Path 16 | from imc.graphics import rasterize_scanpy 17 | 18 | 19 | FIG_KWS = dict(bbox_inches="tight", dpi=300) 20 | sc.settings.n_jobs = -1 21 | 22 | 23 | DEFAULT_CELL_TYPE_REFERENCE = ( 24 | "https://gist.github.com/afrendeiro/4aa133c2fcb5eb0152957b11ec753b74/raw", 25 | Path(".imc.cell_type_reference.yaml"), 26 | ) 27 | 28 | 29 | def anndata_to_cluster_means( 30 | ann: AnnData, cluster_label: str, raw: bool = False 31 | ) -> DataFrame: 32 | means = dict() 33 | obj = ann if not raw else ann.raw 34 | for cluster in ann.obs[cluster_label].unique(): 35 | clust = ann.obs[cluster_label] == cluster 36 | means[cluster] = obj[clust, :].X.mean(0) 37 | mean_expr = pd.DataFrame(means, index=obj.var.index).sort_index(axis=1) 38 | mean_expr.columns.name = "cluster" 39 | return mean_expr 40 | 41 | 42 | def phenotyping( 43 | a: tp.Union[AnnData, Path], 44 | channels_include: tp.Sequence[str] = None, 45 | channels_exclude: tp.Sequence[str] = None, 46 | filter_cells: bool = True, 47 | z_score: bool = True, 48 | z_score_per: str = "roi", 49 | z_score_cap: float = 3.0, 50 | remove_batch: bool = True, 51 | batch_variable: str = "sample", 52 | dim_res_algos: tp.Sequence[str] = ("umap",), 53 | clustering_method: str = "leiden", 54 | clustering_resolutions: tp.Sequence[float] = (1.0,), 55 | ) -> AnnData: 56 | import anndata 57 | 58 | if "pymde" in dim_res_algos: 59 | import pymde 60 | if clustering_method == "parc": 61 | from parc import PARC 62 | 63 | # Checks 64 | reason = f"Can only Z-score values per 'roi' or 'sample'. '{z_score_per}' is not supported." 65 | assert z_score_per in ["sample", "roi"], reason 66 | reason = f"Clustering method '{clustering_method}' is not supported." 67 | assert clustering_method in ["leiden", "parc"] 68 | reason = "Can only use 'pca', 'umap', 'diffmap', or 'pymde' in `dim_res_algos`." 69 | assert all(x in ["pca", "umap", "diffmap", "pymde"] for x in dim_res_algos), reason 70 | 71 | if isinstance(a, Path): 72 | print(f"Reading h5ad file: '{a}'.") 73 | a = sc.read(a) 74 | 75 | if remove_batch: 76 | if a.obs[batch_variable].nunique() <= 1: 77 | print( 78 | "Batch correction not possible as only one batch detected. " 79 | "Check `batch_variable` keyord argument." 80 | ) 81 | remove_batch = False 82 | 83 | if "sample" not in a.obs.columns: 84 | a.obs["sample"] = a.obs["roi"].str.extract(r"(.*)-\d+")[0].fillna("") 85 | if a.raw is None: 86 | a.raw = a 87 | 88 | # Add morphological variables to obs 89 | sel = a.var.index.str.contains(r"\(") 90 | v = a.var.index[~sel] 91 | for col in v: 92 | a.obs[col] = a[:, col].X.tolist() 93 | a = a[:, sel] 94 | 95 | # Filter out channels 96 | if channels_exclude is not None: 97 | a = a[:, ~a.var.index.isin(channels_exclude)] 98 | if channels_include is not None: 99 | a = a[:, channels_include] 100 | a = a.copy() 101 | 102 | # # reduce DNA chanels to one, and move to obs 103 | dnas = a.var.index[a.var.index.str.contains(r"DNA\d")] 104 | a.obs["DNA"] = a[:, dnas].X.mean(1) 105 | a = a[:, ~a.var.index.isin(dnas)] 106 | 107 | # Filter out cells 108 | if filter_cells: 109 | if "solidity" not in a.obs.columns: 110 | print( 111 | "Could not filter cells based on solidity likely because morphological quantification was not performed!" 112 | ) 113 | else: 114 | exclude = a.obs["solidity"] == 1 115 | p = (exclude).sum() / a.shape[0] * 100 116 | print(f"Filtered out {exclude.sum()} cells ({p:.2f} %)") 117 | 118 | # Scaling/Normalization 119 | print("Performing data scaling/normalization.") 120 | sc.pp.log1p(a) 121 | if z_score: 122 | _ads = list() 123 | for roi_name in a.obs["roi"].unique(): 124 | a2 = a[a.obs["roi"] == roi_name, :].copy() 125 | sc.pp.scale(a2, max_value=z_score_cap) 126 | a2.X[a2.X < -z_score_cap] = -z_score_cap 127 | # print(a2.X.min(), a2.X.max()) 128 | _ads.append(a2) 129 | a = anndata.concat(_ads) 130 | sc.pp.scale(a) 131 | if remove_batch: 132 | sc.pp.combat(a, batch_variable) 133 | sc.pp.scale(a) 134 | 135 | # Dimensionality reduction 136 | print("Performing dimensionality reduction.") 137 | sc.pp.pca(a) 138 | if remove_batch: 139 | sc.external.pp.bbknn(a, batch_key=batch_variable) 140 | else: 141 | sc.pp.neighbors(a) 142 | if "umap" in dim_res_algos: 143 | sc.tl.umap(a, gamma=25) 144 | if "diffmap" in dim_res_algos: 145 | sc.tl.diffmap(a) 146 | if "pymde" in dim_res_algos: 147 | a.obsm["X_pymde"] = pymde.preserve_neighbors(a.X, embedding_dim=2).embed().numpy() 148 | a.obsm["X_pymde2"] = ( 149 | pymde.preserve_neighbors( 150 | a.X, 151 | embedding_dim=2, 152 | attractive_penalty=pymde.penalties.Quadratic, 153 | repulsive_penalty=None, 154 | ) 155 | .embed() 156 | .numpy() 157 | ) 158 | 159 | # Clustering 160 | print("Performing clustering.") 161 | if clustering_method == "leiden": 162 | for res in clustering_resolutions: 163 | sc.tl.leiden(a, resolution=res, key_added=f"cluster_{res}") 164 | a.obs[f"cluster_{res}"] = pd.Categorical( 165 | a.obs[f"cluster_{res}"].astype(int) + 1 166 | ) 167 | elif clustering_method == "parc": 168 | for res in clustering_resolutions: 169 | p = PARC( 170 | a.X, 171 | neighbor_graph=a.obsp["connectivities"], 172 | random_seed=42, 173 | resolution_parameter=res, 174 | ) 175 | p.run_PARC() 176 | a.obs[f"cluster_{res}"] = pd.Categorical(pd.Series(p.labels) + 1) 177 | 178 | print("Finished phenotyping.") 179 | return a 180 | 181 | 182 | def plot_phenotyping( 183 | a: tp.Union[AnnData, Path], 184 | output_prefix: Path, 185 | tech_channels: tp.Sequence[str] = None, 186 | dim_res_algos: tp.Sequence[str] = ("umap",), 187 | clustering_resolutions: tp.Sequence[float] = None, 188 | ): 189 | from matplotlib.backends.backend_pdf import PdfPages 190 | from imc.graphics import add_centroids 191 | from seaborn_extensions import clustermap 192 | 193 | # Read in 194 | if isinstance(a, Path): 195 | print(f"Reading h5ad file: '{a}'.") 196 | a = sc.read(a) 197 | a = a[a.obs.sample(frac=1).index] 198 | 199 | # Checks 200 | if output_prefix.is_dir(): 201 | output_prefix = output_prefix / "phenotypes." 202 | if not output_prefix.endswith("."): 203 | output_prefix += "." 204 | output_prefix.parent.mkdir() 205 | 206 | if "sample" not in a.obs.columns: 207 | a.obs["sample"] = a.obs["roi"].str.extract(r"(.*)-\d+")[0].fillna("") 208 | 209 | if tech_channels is None: 210 | tech_channels = [ 211 | "DNA", 212 | "eccentricity", 213 | "solidity", 214 | "area", 215 | "perimeter", 216 | "major_axis_length", 217 | ] 218 | tech_channels = [c for c in tech_channels if c in a.obs.columns] 219 | 220 | if clustering_resolutions is None: 221 | clustering_resolutions = ( 222 | a.obs.columns[a.obs.columns.str.contains("cluster_")] 223 | .str.extract(r"cluster_(.*)$")[0] 224 | .astype(float) 225 | ) 226 | 227 | # Plot projections 228 | non_tech_channels = a.var.index[~a.var.index.isin(tech_channels)].tolist() 229 | vmax = ( 230 | [None] 231 | + np.percentile(a.raw[:, non_tech_channels].X, 95, axis=0).tolist() 232 | + np.percentile(a.obs[tech_channels], 95, axis=0).tolist() 233 | # + [None] 234 | + ([None] * len(clustering_resolutions)) 235 | ) 236 | color = ( 237 | ["sample"] 238 | + non_tech_channels 239 | + tech_channels 240 | # + ["topological_domain"] 241 | + [f"cluster_{res}" for res in clustering_resolutions] 242 | ) 243 | for algo in tqdm(dim_res_algos): 244 | f = output_prefix + f"{algo}.pdf" 245 | with PdfPages(f) as pdf: 246 | for i, col in enumerate(color): 247 | fig = sc.pl.embedding( 248 | a, 249 | basis=algo, 250 | color=col, 251 | show=False, 252 | vmax=vmax[i], 253 | use_raw=True, 254 | ).figure 255 | rasterize_scanpy(fig) 256 | if i >= len(color) - len(clustering_resolutions): 257 | res = clustering_resolutions[i - len(color)] 258 | add_centroids(a, res=res, ax=fig.axes[0], algo=algo) 259 | plt.figure(fig) 260 | pdf.savefig(**FIG_KWS) 261 | plt.close(fig) 262 | 263 | # Plot ROIs separately 264 | f = output_prefix + f"{algo}.sample_roi.pdf" 265 | projf = getattr(sc.pl, algo) 266 | fig = projf(a, color=["sample", "roi"], show=False)[0].figure 267 | rasterize_scanpy(fig) 268 | fig.savefig(f, **FIG_KWS) 269 | plt.close(fig) 270 | 271 | # Plot average phenotypes 272 | for res in tqdm(clustering_resolutions): 273 | df = a.to_df()[non_tech_channels].join(a.obs[tech_channels]) 274 | 275 | # Drop variables with no variance 276 | v = df.var() 277 | if (v == 0).any(): 278 | df = df.drop(v.index[v == 0], axis=1) 279 | 280 | cluster_means = df.groupby(a.obs[f"cluster_{res}"].values).mean() 281 | 282 | cell_counts = a.obs[f"cluster_{res}"].value_counts().rename("Cells per cluster") 283 | 284 | cell_percs = ((cell_counts / cell_counts.sum()) * 100).rename("Cells (%)") 285 | 286 | op = output_prefix + f"cluster_means.{res}_res." 287 | kws = dict( 288 | row_colors=cell_percs.to_frame().join(cell_counts), 289 | figsize=(10, 6 * res), 290 | ) 291 | grid = clustermap(cluster_means, **kws) 292 | grid.savefig(op + "abs.svg") 293 | plt.close(grid.fig) 294 | 295 | grid = clustermap(cluster_means, **kws, config="z") 296 | grid.savefig(op + "zscore.svg") 297 | plt.close(grid.fig) 298 | 299 | # To plot topological domains: 300 | # df = (a.obs[args.sc_topo.columns.drop(["domain", "topological_domain"])]).replace( 301 | # {"False": False, "True": True, "nan": np.nan} 302 | # ) 303 | # topo_means = df.groupby(a.obs[f"cluster_{res}"].values).mean() 304 | # topo_means = topo_means.loc[:, topo_means.sum() > 0] 305 | 306 | # g = clustermap( 307 | # topo_means.loc[cluster_means.index[grid.dendrogram_row.reordered_ind]], 308 | # figsize=(3, 6 * res), 309 | # config="z", 310 | # row_cluster=False, 311 | # cmap="PuOr_r", 312 | # ) 313 | # g.savefig(op + "abs.topologic.svg") 314 | 315 | # g = clustermap( 316 | # topo_means.loc[cluster_means.index[grid.dendrogram_row.reordered_ind]], 317 | # figsize=(3, 6 * res), 318 | # config="z", 319 | # row_cluster=False, 320 | # cmap="PuOr_r", 321 | # ) 322 | # g.savefig(op + "zscore.topologic.svg") 323 | 324 | # grid = clustermap(cluster_means, **kws, config="z", row_cluster=False) 325 | # grid.savefig(op + "zscore.sorted.svg") 326 | # g = clustermap( 327 | # topo_means, 328 | # figsize=(3, 6 * res), 329 | # config="z", 330 | # row_cluster=False, 331 | # cmap="PuOr_r", 332 | # ) 333 | # g.savefig(op + "zscore.sorted.topologic.svg") 334 | # plt.close("all") 335 | 336 | 337 | def predict_cell_types_from_reference( 338 | quant: tp.Union[AnnData, DataFrame, Path], 339 | output_prefix: Path, 340 | covariates: DataFrame, 341 | method: str = "astir", 342 | astir_reference: Path = None, 343 | astir_parameters: tp.Dict[str, tp.Any] = {}, 344 | ): 345 | import anndata 346 | import yaml 347 | from imc.utils import download_file 348 | 349 | # Get dataframe with expression 350 | if isinstance(quant, Path): 351 | if quant.endswith("csv") or quant.endswith("csv.gz"): 352 | quant = pd.read_csv(quant, index_col=0) 353 | elif quant.endswith(".h5ad"): 354 | quant = anndata.read(quant) 355 | elif isinstance(quant, anndata.AnnData): 356 | quant = quant.to_df() 357 | 358 | # Remove metal label from column names 359 | quant.columns = quant.columns.str.extract(r"(.*)\(.*")[0].fillna( 360 | quant.columns.to_series().reset_index(drop=True) 361 | ) 362 | 363 | if method != "astir": 364 | raise NotImplementedError("Only the `astir` method is currently supported.") 365 | 366 | # Prepare reference dictionary 367 | if astir_reference is not None: 368 | reference = yaml.safe_load(astir_reference.open()) 369 | else: 370 | # if not DEFAULT_CELL_TYPE_REFERENCE[1].exists(): 371 | download_file(DEFAULT_CELL_TYPE_REFERENCE[0], DEFAULT_CELL_TYPE_REFERENCE[1]) 372 | ref = yaml.safe_load(DEFAULT_CELL_TYPE_REFERENCE[1].open()) 373 | reference = dict() 374 | reference["cell_types"] = unroll_reference_dict(ref["cell_types"], False) 375 | reference["cell_states"] = unroll_reference_dict(ref["cell_states"], False) 376 | reference = filter_reference_based_on_available_markers(reference, quant.columns) 377 | 378 | res = astir( 379 | input_expr=quant, 380 | marker_dict=reference, 381 | design=covariates, 382 | output_prefix=output_prefix, 383 | **astir_parameters, 384 | ) 385 | return res 386 | 387 | 388 | def astir( 389 | input_expr: DataFrame, 390 | marker_dict: tp.Dict[str, tp.List[str]], 391 | design: DataFrame, 392 | output_prefix: Path, 393 | batch_size: int = None, 394 | max_epochs: int = 200, 395 | learning_rate: float = 2e-3, 396 | initial_epochs: int = 3, 397 | device: str = "cpu", 398 | plot: bool = True, 399 | ): 400 | from astir import Astir 401 | import torch 402 | 403 | if output_prefix.is_dir(): 404 | output_prefix = output_prefix / "astir." 405 | output_prefix.parent.mkdir() 406 | 407 | ast = Astir(input_expr, marker_dict, design) 408 | ast._device = torch.device("cpu") 409 | if batch_size is None: 410 | batch_size = ast.get_type_dataset().get_exprs_df().shape[0] // 100 411 | 412 | params = dict( 413 | max_epochs=max_epochs, 414 | batch_size=batch_size, 415 | learning_rate=learning_rate, 416 | n_init_epochs=initial_epochs, 417 | ) 418 | res = pd.DataFrame(index=input_expr.index) 419 | if "cell_types" in marker_dict: 420 | ast.fit_type(**params) 421 | _t = ast.get_celltypes() 422 | res = res.join(_t) 423 | _tp = ast.get_celltype_probabilities() 424 | _tp.columns = _tp.columns + "_probability" 425 | res = res.join(_tp) 426 | if plot: 427 | fig, ax = plt.subplots(1, 1, figsize=(4, 2)) 428 | ax.plot(ast.get_type_losses(), label="loss") 429 | ax.legend() 430 | ax.set(xlabel="Epochs", ylabel="Loss") 431 | fig.savefig(output_prefix + "cell_type.loss.svg", **FIG_KWS) 432 | plt.close(fig) 433 | if "cell_states" in marker_dict: 434 | ast.fit_state(**params) 435 | _s = ast.get_cellstates() 436 | res = res.join(_s) 437 | if plot: 438 | fig, ax = plt.subplots(1, 1, figsize=(4, 2)) 439 | ax.plot(ast.get_state_losses(), label="loss") 440 | ax.legend() 441 | ax.set(xlabel="Epochs", ylabel="Loss") 442 | fig.savefig(output_prefix + "cell_state.loss.svg", **FIG_KWS) 443 | plt.close(fig) 444 | ast.save_models(output_prefix + "fitted_model.hdf5") 445 | return res 446 | 447 | 448 | def unroll_reference_dict( 449 | x: tp.Dict, 450 | name_with_predecessors: bool = True, 451 | max_depth: int = -1, 452 | _cur_depth: int = 0, 453 | _predecessors: tp.List[str] = [], 454 | ) -> tp.Dict: 455 | from copy import deepcopy 456 | 457 | x = deepcopy(x) 458 | new = dict() 459 | for k, v in x.items(): 460 | if "markers" in v: 461 | name = " - ".join(_predecessors + [k]) if name_with_predecessors else k 462 | if v["markers"] != [None]: 463 | new[name] = v["markers"] 464 | v.pop("markers") 465 | if ( 466 | isinstance(v, dict) 467 | and (len(v) > 0) 468 | and ((_cur_depth < max_depth) or max_depth == -1) 469 | ): 470 | new.update( 471 | unroll_reference_dict( 472 | v, 473 | name_with_predecessors=name_with_predecessors, 474 | max_depth=max_depth, 475 | _cur_depth=_cur_depth + 1, 476 | _predecessors=_predecessors + [k], 477 | ) 478 | ) 479 | return new 480 | 481 | 482 | def filter_reference_based_on_available_markers( 483 | x: tp.Dict, markers: tp.Sequence[str] 484 | ) -> tp.Dict: 485 | def _filter(x2): 486 | inter = dict() 487 | for k, v in x2.items(): 488 | n = list(filter(lambda i: i in markers, v)) 489 | if n: 490 | inter[k] = n 491 | return inter 492 | 493 | new = dict() 494 | new["cell_types"] = _filter(x["cell_types"]) 495 | new["cell_states"] = _filter(x["cell_states"]) 496 | return new 497 | -------------------------------------------------------------------------------- /imc/ops/community.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for community detection. 3 | """ 4 | 5 | import typing as tp 6 | from collections import Counter 7 | 8 | import numpy as np 9 | import pandas as pd 10 | import matplotlib.pyplot as plt 11 | import seaborn as sns 12 | from tqdm import tqdm 13 | import parmap 14 | from anndata import AnnData 15 | import scanpy as sc 16 | import community 17 | 18 | import imc.data_models.roi as _roi 19 | from imc.exceptions import cast 20 | from imc.types import Series, Path 21 | from imc.graphics import add_legend 22 | 23 | 24 | FIG_KWS = dict(bbox_inches="tight", dpi=300) 25 | 26 | DEFAULT_SINGLE_CELL_RESOLUTION = 1.0 27 | MAX_BETWEEN_CELL_DIST = 4 28 | DEFAULT_COMMUNITY_RESOLUTION = 0.005 29 | DEFAULT_SUPERCOMMUNITY_RESOLUTION = 0.5 30 | # DEFAULT_SUPER_COMMUNITY_NUMBER = 12 31 | 32 | 33 | def find_communities( 34 | roi: _roi.ROI, 35 | community_resolution: float = DEFAULT_COMMUNITY_RESOLUTION, 36 | plot: bool = True, 37 | ) -> tp.Tuple[Series, tp.Tuple]: 38 | # def networkx_to_igraph(graph): 39 | # import igraph as ig 40 | # g = ig.Graph(edges=list(graph.edges)) 41 | # # If the original graph has non-consecutive integer labels, 42 | # # igraph will create a node for the non existing vertexes. 43 | # # These can simply be removed from the graph. 44 | # nodes = pd.Series(list(graph.nodes)) 45 | # vertexes = pd.Series(range(len(g.vs))) 46 | # g.delete_vertices(vertexes[~vertexes.isin(nodes)].values) 47 | # return g 48 | 49 | def get_community_members(partition: tp.Dict) -> tp.Dict: 50 | counts = Counter(partition) 51 | # {com: members} 52 | comms: tp.Dict[int, set] = dict() 53 | for com in counts.keys(): 54 | comms[com] = set() 55 | for n, com in partition.items(): 56 | comms[com].add(n) 57 | return comms 58 | 59 | def get_community_cell_type_composition(roi: _roi.ROI, partition: Series): 60 | cts = dict() 61 | for com, members in get_community_members(partition).items(): 62 | # cts[f"{roi.sample.name} - {roi.roi_number} - {com}"] = \ 63 | cts[com] = roi.clusters.loc[members].value_counts() 64 | return ( 65 | pd.DataFrame(cts) 66 | .fillna(0) 67 | .rename_axis(index="cell_type", columns="community") 68 | .astype(int) 69 | ) 70 | 71 | # Community finding in graph (overclustering) 72 | roi_output_prefix = roi.sample.root_dir / "single_cell" / (roi.name + ".communities.") 73 | 74 | # TODO: use leiden instead of louvain 75 | # g = networkx_to_igraph(roi.adjacency_graph) 76 | # p = partitions[roi] = pd.Series( 77 | # la.find_partition( 78 | # g, la.RBConfigurationVertexPartition, 79 | # resolution_parameter=community_resolution).membership, 80 | # name="community", index=roi.adjacency_graph.nodes).sort_index() 81 | partition = pd.Series( 82 | community.best_partition( 83 | roi.adjacency_graph, resolution=community_resolution 84 | ), # , weight="expr_weight") 85 | name="community", 86 | ).sort_index() 87 | n = partition.value_counts().shape[0] 88 | tqdm.write(f"Found {n} communities for ROI {roi}.") 89 | partition += 1 90 | partition.to_csv(roi_output_prefix + "graph_partition.csv") 91 | comps = ( 92 | get_community_cell_type_composition(roi, partition) 93 | .T.assign(sample=roi.sample.name, roi=roi.name) 94 | .set_index(["sample", "roi"], append=True) 95 | ) 96 | comps.index = comps.index.reorder_levels(["sample", "roi", "community"]) 97 | 98 | if plot: 99 | # get cell type counts per community 100 | comps_s = comps.reset_index(level=["sample", "roi"], drop=True) 101 | percent = (comps_s.T / comps_s.sum(1)) * 100 102 | grid = sns.clustermap( 103 | percent, metric="correlation", cbar_kws=dict(label="% of cell type") 104 | ) 105 | grid.savefig(roi_output_prefix + "cell_type_composition.svg", **FIG_KWS) 106 | grid = sns.clustermap( 107 | percent, 108 | z_score=1, 109 | cmap="RdBu_r", 110 | center=0, 111 | metric="correlation", 112 | cbar_kws=dict(label="% of cell type (Z-score)"), 113 | ) 114 | grid.savefig(roi_output_prefix + "cell_type_composition.zscore.svg", **FIG_KWS) 115 | return partition, comps 116 | 117 | 118 | def cluster_communities( 119 | rois: tp.Sequence[_roi.ROI], 120 | output_prefix: Path = None, 121 | supercommunity_resolution: float = DEFAULT_SUPERCOMMUNITY_RESOLUTION, 122 | ) -> Series: 123 | from scipy.cluster.hierarchy import fcluster 124 | 125 | output_prefix = output_prefix or ( 126 | rois[0].prj.processed_dir / "single_cell" / (rois[0].prj.name + ".communities.") 127 | ) 128 | output_prefix = cast(output_prefix) 129 | 130 | res = parmap.map(find_communities, rois) 131 | partitions = {k: v[0] for k, v in zip(rois, res)} 132 | composition = pd.concat([v[1] for v in res]).fillna(0).astype(int).sort_index() 133 | composition.to_csv(output_prefix + ".all_communities.cell_type_composition.csv") 134 | 135 | print(f"Found {composition.shape[0]} communities across all ROIs.") 136 | 137 | composition = pd.read_csv( 138 | output_prefix + ".all_communities.cell_type_composition.csv", 139 | index_col=[0, 1, 2], 140 | ) 141 | 142 | # Cluster communities by leiden clustering based on cell type composition 143 | a = AnnData(composition) 144 | sc.pp.log1p(a) 145 | sc.pp.neighbors(a) 146 | sc.tl.leiden(a, resolution=supercommunity_resolution, key_added="supercommunity") 147 | n_scomms = len(a.obs["supercommunity"].unique()) 148 | print(f"Found {n_scomms} supercommunities.") 149 | # Make supercommunities 1-based (to distinguish from masks where 0 == background) 150 | a.obs["supercommunity"] = pd.Categorical(a.obs["supercommunity"].astype(int) + 1) 151 | sc.tl.umap(a) 152 | sc.pp.pca(a) 153 | 154 | # DataFrame(cell vs [celltype, community, supercommunity]) 155 | _assignments = list() 156 | for roi in rois: 157 | # {cell: cell type} 158 | if roi.clusters.dtype == "int" and roi.clusters.min() == 0: 159 | c1 = ( 160 | roi.clusters + 1 161 | ) # TODO: this +1 should be removed when clustering is re-run since the new implm 162 | else: 163 | c1 = roi.clusters 164 | # {cell: community} 165 | c2 = pd.Series(partitions[roi], name="community").rename_axis(index="obj_id") 166 | scomm = a.obs.loc[(roi.sample.name, roi.name), "supercommunity"].astype(int) 167 | assert c2.value_counts().shape[0] == scomm.shape[0] 168 | c3 = c2.replace(scomm.to_dict()).rename("supercommunity") 169 | assert c3.max() <= n_scomms 170 | assert c1.shape == c2.shape == c3.shape 171 | assert (c1.index == c2.index).all() 172 | assert (c2.index == c3.index).all() 173 | c = c1.to_frame().join(c2).join(c3) 174 | assert roi.clusters.shape[0] == c.shape[0] 175 | c["sample"] = roi.sample.name 176 | c["roi"] = roi.roi_number 177 | _assignments.append(c) 178 | assignments = pd.concat(_assignments).set_index(["sample", "roi"], append=True) 179 | assignments.index = assignments.index.reorder_levels(["sample", "roi", "obj_id"]) 180 | 181 | # Further merge supercommunities if distant by less than X% of composition 182 | # TODO: revise supercommunity merging 183 | max_supercommunity_difference = 10.0 184 | comp = assignments.assign(count=1).pivot_table( 185 | index="supercommunity", 186 | columns="cluster", 187 | values="count", 188 | aggfunc=sum, 189 | fill_value=0, 190 | ) 191 | 192 | perc = (comp.T / comp.sum(1)).T * 100 193 | diffs = pd.DataFrame( 194 | np.sqrt(abs(perc.values - perc.values[:, None]).sum(axis=2)), 195 | index=perc.index, 196 | columns=perc.index, 197 | ) 198 | grid = sns.clustermap(diffs) 199 | repl = pd.Series( 200 | dict( 201 | zip( 202 | grid.data.columns, 203 | fcluster( 204 | grid.dendrogram_col.linkage, 205 | t=max_supercommunity_difference, 206 | criterion="distance", 207 | ), 208 | ) 209 | ) 210 | ).sort_index() 211 | 212 | comp.index = comp.index.to_series().replace(repl) 213 | comp = comp.groupby(level=0).sum() 214 | 215 | assignments["supercommunity"] = assignments["supercommunity"].replace(repl) 216 | 217 | # check name/number supercommunities is sorted on the abundance of their cell types 218 | s = assignments["supercommunity"].value_counts().sort_values(ascending=False) 219 | assignments["supercommunity"] = assignments["supercommunity"].replace( 220 | dict(zip(s.index, np.arange(1, len(s)))) 221 | ) 222 | 223 | # save final assignments 224 | assignments.to_csv(output_prefix + "cell_type.community.supercommunities.csv") 225 | 226 | # Visualize 227 | # # visualize initial communities in clustermap, PCA or UMAP 228 | perc = (composition.T / composition.sum(1)).T * 100 229 | grid = sns.clustermap(perc, metric="correlation", rasterized=True) 230 | grid.savefig( 231 | output_prefix 232 | + "communities.cell_type_composition.leiden_clustering.clustermap_viz.svg", 233 | **FIG_KWS, 234 | ) 235 | grid = sns.clustermap( 236 | np.log1p(composition), 237 | row_linkage=grid.dendrogram_row.linkage, 238 | col_linkage=grid.dendrogram_col.linkage, 239 | metric="correlation", 240 | row_colors=plt.get_cmap("tab20")(a.obs["supercommunity"].astype(int)), 241 | rasterized=True, 242 | ) 243 | grid.savefig( 244 | output_prefix 245 | + "communities.cell_type_composition.leiden_clustering.clustermap_viz.counts.svg", 246 | **FIG_KWS, 247 | ) 248 | for method in ["pca", "umap"]: 249 | fig = getattr(sc.pl, method)( 250 | a, 251 | color=["supercommunity"] + a.var.index.tolist(), 252 | return_fig=True, 253 | show=False, 254 | ) 255 | fig.savefig( 256 | output_prefix 257 | + f"communities.cell_type_composition.leiden_clustering.{method}_viz.svg", 258 | **FIG_KWS, 259 | ) 260 | 261 | # # visualize the rediction of supercommunities based on difference thresh 262 | grid = sns.clustermap( 263 | diffs, 264 | col_colors=plt.get_cmap("tab20")(repl.values), 265 | row_colors=plt.get_cmap("tab20")(repl.values), 266 | cbar_kws=dict(label="Sqrt(Sum(diff))"), 267 | ) 268 | grid.savefig( 269 | output_prefix + "supercommunities.reduction_by_diff.clustermap.svg", 270 | **FIG_KWS, 271 | ) 272 | 273 | # assignments = pd.read_csv(output_prefix + "cell_type.community.supercommunities.csv", index_col=[0, 1, 2]) 274 | # # cell type vs {community, supercommunity} 275 | for var_ in ["community", "supercommunity"]: 276 | supercts = assignments.assign(count=1).pivot_table( 277 | index="cluster", 278 | columns=var_, 279 | values="count", 280 | aggfunc=sum, 281 | fill_value=0, 282 | ) 283 | perc_supercts = (supercts / supercts.sum()) * 100 284 | 285 | grid = sns.clustermap( 286 | perc_supercts, 287 | metric="correlation", 288 | rasterized=True, 289 | cbar_kws=dict(label="% of supercommunity"), 290 | ) 291 | grid.savefig(output_prefix + f"{var_}.cell_type_composition.svg", **FIG_KWS) 292 | grid = sns.clustermap( 293 | perc_supercts, 294 | z_score=1, 295 | cmap="RdBu_r", 296 | center=0, 297 | metric="correlation", 298 | rasterized=True, 299 | cbar_kws=dict(label="% of supercommunity (Z-score)"), 300 | ) 301 | grid.savefig( 302 | output_prefix + f"{var_}.cell_type_composition.zscore.svg", 303 | **FIG_KWS, 304 | ) 305 | 306 | leg_kws = dict(bbox_to_anchor=(0, -0.05)) 307 | 308 | vars_ = ["cluster", "community", "supercommunity"] 309 | n = len(rois) 310 | m = len(vars_) 311 | patches: tp.Dict[str, tp.List] = dict() 312 | fig, axes = plt.subplots( 313 | n, m, figsize=(4 * m, 4 * n), squeeze=False, sharex="row", sharey="row" 314 | ) 315 | for i, roi in enumerate(rois): 316 | for j, var_ in enumerate(vars_): 317 | if i == 0: 318 | patches[var_] = list() 319 | p = roi.plot_cell_types( 320 | ax=axes[i, j, np.newaxis, np.newaxis], 321 | cell_type_assignments=assignments.loc[ 322 | (roi.sample.name, roi.roi_number), var_ 323 | ], 324 | palette="nipy_spectral", 325 | ) 326 | patches[var_] += p 327 | for j, var_ in enumerate(vars_): 328 | if var_ == "community": 329 | continue 330 | add_legend(patches[var_], axes[-1, j], **leg_kws) # label="Super community", 331 | _z = zip( 332 | axes[0].squeeze(), 333 | ["Cell types", "Communities", "Super communities"], 334 | ) 335 | for axs, lab in _z: 336 | axs.set_title(lab) 337 | # TODO: limit rasterization to main image 338 | for axs in axes.flat: 339 | axs.set_rasterized(True) 340 | fig.savefig(output_prefix + "communities_supercommunities.all_rois.svg", **FIG_KWS) 341 | 342 | return assignments["supercommunity"] 343 | -------------------------------------------------------------------------------- /imc/ops/compensation.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | """ 4 | Functions for compensation of imaging mass cytometry data. 5 | """ 6 | 7 | from functools import partial 8 | import typing as tp 9 | 10 | import numpy as np 11 | import pandas as pd 12 | from scipy.optimize import nnls 13 | import parmap 14 | 15 | from imc import ROI 16 | from imc.types import Array, DataFrame 17 | 18 | 19 | def stack_to_flat_array(stack: Array) -> Array: 20 | return stack.reshape((stack.shape[0], -1)).T 21 | 22 | 23 | def _get_cytospill_spillover_matrix( 24 | array: DataFrame, subsample_frac: float = None, subsample_n: int = None 25 | ) -> Array: 26 | """ 27 | The columns of array must be metal labels (e.g. Nd142Di)! 28 | 29 | Requires the Github version of CytoSpill installed from a local clone, 30 | not through devtools pointing to the Github repo - not sure why. 31 | 32 | $ git clone https://github.com/KChen-lab/CytoSpill.git 33 | $ R CMD INSTALL CytoSpill/ 34 | """ 35 | from rpy2.robjects import numpy2ri, pandas2ri 36 | from rpy2.robjects.packages import importr 37 | 38 | numpy2ri.activate() 39 | pandas2ri.activate() 40 | 41 | cytospill = importr("CytoSpill") 42 | 43 | if subsample_frac is not None: 44 | subsample_n = int(array.shape[0] * subsample_frac) 45 | 46 | kwargs = dict() 47 | if subsample_n is not None: 48 | kwargs["n"] = subsample_n 49 | 50 | spillover_matrix, thresholds = cytospill.GetSpillMat( 51 | data=array, 52 | cols=np.arange(array.shape[1]), 53 | threshold=0.1, 54 | flexrep=5, 55 | neighbor=2, 56 | **kwargs, 57 | ) 58 | # spillover_matrix = pd.DataFrame(spillover_matrix, index=df.columns, columns=df.columns) 59 | return spillover_matrix 60 | 61 | 62 | def _get_correlation_spillover_matrix(array: Array, k=60) -> Array: 63 | return k ** np.corrcoef(array.T) / k 64 | 65 | 66 | def get_spillover_matrix(array: Array, method: str = "cytospill", **kwargs) -> Array: 67 | """""" 68 | if method == "cytospill": 69 | return _get_cytospill_spillover_matrix(array, **kwargs) 70 | if method == "correlation": 71 | return _get_correlation_spillover_matrix(array) 72 | raise ValueError("`method` must be one of 'cytospill' or 'correlation'.") 73 | 74 | 75 | def compensate_array( 76 | flat_array: Array, spillover_matrix: Array, original_shape: tp.Tuple[int, int, int] 77 | ) -> Array: 78 | new_shape = original_shape[1:] + (original_shape[0],) 79 | _nnls = partial(nnls, spillover_matrix) 80 | res = parmap.map(_nnls, flat_array) 81 | comp = np.asarray([x[0] for x in res]) 82 | return np.moveaxis( 83 | (comp).reshape(new_shape), 84 | -1, 85 | 0, 86 | ) 87 | 88 | 89 | def compensate_image_stack(roi: ROI, normalize: bool = True) -> Array: 90 | from imc.segmentation import normalize as _normf 91 | 92 | stack = roi.stack 93 | if roi.channel_exclude is not None: 94 | if roi.channel_exclude.any(): 95 | stack = stack[~roi.channel_exclude] 96 | if normalize: 97 | stack = _normf(stack) 98 | flat_array = stack_to_flat_array(stack) 99 | 100 | labels = roi.channel_labels[~roi.channel_exclude.values] 101 | metals = labels.str.extract(r".*\((.*)\)")[0] + "Di" 102 | df = pd.DataFrame(flat_array, columns=metals) # .iloc[:, 4:-4] 103 | spill = get_spillover_matrix(df, subsample_n=2000) 104 | comp_stack = compensate_array(flat_array, spill, roi.stack.shape) 105 | return comp_stack 106 | -------------------------------------------------------------------------------- /imc/ops/domain.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for image annotations. 3 | 4 | """ 5 | 6 | import os 7 | import json 8 | import typing as tp 9 | from collections import Counter 10 | 11 | import numpy as np 12 | import pandas as pd 13 | import matplotlib 14 | import matplotlib.pyplot as plt 15 | import seaborn as sns 16 | from tqdm import tqdm 17 | 18 | import imc.data_models.roi as _roi 19 | from imc.types import DataFrame, Array, Path 20 | 21 | 22 | def label_domains( 23 | rois: tp.Sequence[_roi.ROI], 24 | output_dir: Path, 25 | export: bool = True, 26 | domains: tp.Sequence[str] = ["T", "S", "A", "L", "V", "E"], 27 | **kwargs, 28 | ) -> None: 29 | """ 30 | Draw shapes outying topological domains in tissue. 31 | This step is done manually using the `labelme` program. 32 | 33 | $ labelme --autosave --labels metadata/labelme_labels.txt 34 | """ 35 | if export: 36 | export_images_for_topological_labeling(rois, output_dir, **kwargs) 37 | 38 | labels_f = (output_dir).mkdir() / "labelme_labels.txt" 39 | with open(labels_f, "w") as handle: 40 | handle.write("\n".join(domains)) 41 | os.system(f"labelme --autosave --labels {labels_f} {output_dir}") 42 | 43 | 44 | def export_images_for_topological_labeling( 45 | rois: tp.Sequence[_roi.ROI], 46 | output_dir: Path, 47 | channels: tp.Sequence[str] = ["mean"], 48 | overwrite: bool = False, 49 | ) -> None: 50 | """ 51 | Export PNGs for labeling with `labelme`. 52 | """ 53 | for roi in tqdm(rois): 54 | f = output_dir / roi.name + ".jpg" 55 | if not overwrite and f.exists(): 56 | continue 57 | array = roi._get_channels(channels, minmax=True, equalize=True)[1].squeeze() 58 | if array.ndim > 2: 59 | array = np.moveaxis(array, 0, -1) 60 | matplotlib.image.imsave(f, array) 61 | 62 | 63 | def collect_domains( 64 | input_dir: Path, rois: tp.Sequence[_roi.ROI] = None, output_file: Path = None 65 | ) -> tp.Dict[str, tp.Dict]: 66 | if rois is not None: 67 | roi_names = [r.name for r in rois] 68 | 69 | filenames = list(input_dir.glob("*.json")) 70 | if rois is not None: 71 | filenames = [f for f in filenames if f.stem in roi_names] 72 | 73 | topo_annots = dict() 74 | for filename in tqdm(filenames): 75 | annot_f = filename.replace_(".jpg", ".json") 76 | if not annot_f.exists(): 77 | continue 78 | with open(annot_f, "r") as handle: 79 | annot = json.load(handle) 80 | if annot["shapes"]: 81 | topo_annots[filename.stem] = annot["shapes"] 82 | if output_file is not None: 83 | with open(output_file, "w") as handle: 84 | json.dump(topo_annots, handle, indent=4) 85 | return topo_annots 86 | 87 | 88 | def illustrate_domains( 89 | topo_annots: tp.Dict[str, tp.Dict], 90 | rois: tp.Sequence[_roi.ROI], 91 | output_dir: Path, 92 | channels: tp.Sequence[str], 93 | domain_exclude: tp.Sequence[str] = None, 94 | cleanup: bool = False, 95 | cmap_str: str = "Set3", 96 | ) -> None: 97 | """ 98 | Illustrate annotated topological domains of each ROI. 99 | """ 100 | from imc.utils import polygon_to_mask 101 | from imc.graphics import legend_without_duplicate_labels 102 | from shapely.geometry import Polygon 103 | 104 | if domain_exclude is None: 105 | domain_exclude = [] 106 | 107 | output_dir.mkdir() 108 | 109 | labels = list(set(geom["label"] for n, j in topo_annots.items() for geom in j)) 110 | label_color = dict(zip(labels, sns.color_palette(cmap_str))) 111 | label_order = dict(zip(labels, range(1, len(labels) + 1))) 112 | cmap = plt.get_cmap(cmap_str)(range(len(labels) + 1)) 113 | cmap = np.vstack([[0, 0, 0, 1], cmap]) 114 | 115 | for roi_name in tqdm(topo_annots): 116 | roi = [r for r in rois if r.name == roi_name][0] 117 | shapes = topo_annots[roi_name] 118 | 119 | # re-order shapes so that largest are first 120 | areas = [ 121 | polygon_to_mask(shape["points"], roi.shape[1:][::-1]).sum() 122 | for shape in shapes 123 | ] 124 | shapes = np.asarray(shapes)[np.argsort(areas)[::-1]].tolist() 125 | 126 | annot_mask = np.zeros(roi.shape[1:]) 127 | for shape in shapes: 128 | if shape["label"] in domain_exclude: 129 | continue 130 | region = polygon_to_mask(shape["points"], roi.shape[1:][::-1]) 131 | annot_mask[region > 0] = label_order[shape["label"]] 132 | 133 | ar = roi.shape[1] / roi.shape[2] 134 | 135 | fig, axes = plt.subplots( 136 | 1, 2, figsize=(2 * 4, 4 * ar), gridspec_kw=dict(wspace=0, hspace=0) 137 | ) 138 | extra_txt = ( 139 | "" 140 | if getattr(roi, "attributes", None) is None 141 | else "; ".join([str(getattr(roi, attr)) for attr in roi.attributes]) 142 | ) 143 | 144 | axes[0].set(title=roi.name + "\n" + extra_txt) 145 | roi.plot_channels(channels, axes=[axes[0]], merged=True) 146 | 147 | shape_types: Counter[str] = Counter() 148 | for shape in shapes: 149 | label: str = shape["label"] 150 | if label in domain_exclude: 151 | continue 152 | shape_types[label] += 1 153 | c = Polygon(shape["points"]).centroid 154 | axes[1].text( 155 | c.x, 156 | c.y, 157 | s=f"{label}{shape_types[label]}", 158 | ha="center", 159 | va="center", 160 | ) 161 | axes[0].plot( 162 | *np.asarray(shape["points"] + [shape["points"][0]]).T, 163 | label=label, 164 | color=cmap[label_order[label]], 165 | ) 166 | 167 | axes[1].imshow( 168 | annot_mask, 169 | cmap=matplotlib.colors.ListedColormap(cmap), 170 | vmax=len(label_color) + 1, 171 | interpolation="none", 172 | ) 173 | axes[1].set(title="Manual annotations") 174 | legend_without_duplicate_labels( 175 | axes[0], title="Domain:", bbox_to_anchor=(-0.1, 1), loc="upper right" 176 | ) 177 | for ax in axes: 178 | ax.axis("off") 179 | fig.savefig( 180 | output_dir / roi.name + ".annotations.pdf", 181 | dpi=300, 182 | bbox_inches="tight", 183 | ) 184 | plt.close(fig) 185 | 186 | cmd = f"""pdftk 187 | {output_dir}/*.annotations.pdf 188 | cat 189 | output 190 | {output_dir}/topological_domain_annotations.pdf""" 191 | os.system(cmd.replace("\n", " ")) 192 | 193 | if cleanup: 194 | files = output_dir.glob("*.annotations.pdf") 195 | for file in files: 196 | file.unlink() 197 | 198 | 199 | def get_domains_per_cell( 200 | topo_annots: tp.Dict[str, tp.Dict], 201 | rois: tp.Sequence[_roi.ROI], 202 | exclude_domains: tp.Sequence[str] = None, 203 | remaining_domain: tp.Union[str, tp.Dict[str, str]] = "background", 204 | resolution: str = "largest", 205 | nest_domains: bool = True, 206 | ) -> DataFrame: 207 | """ 208 | Generate annotation of topological domain each cell is contained in 209 | based on manual annotated masks. 210 | 211 | Parameters 212 | ---------- 213 | topo_annots: dict 214 | Dictionary of annotations for each ROI. 215 | rois: list 216 | List of ROI objects. 217 | exclude_domains: list[str] 218 | Domains to ignore 219 | remaining_domain: str | dict[str, str] 220 | Name of domain to fill in for cells that do not fall under any domain annotation. 221 | If given a string, it will simply use that. 222 | If given a dict, the filled domain will be the value of the key which exists in the image. 223 | E.g. Annotating tumor/stroma domains. If an image has only domains of type 'Tumor', 224 | given `remaining_domain` == {'Tumor': 'Stroma', 'Stroma': 'Tumor'}, the remaining cells 225 | will be annotated with 'Stroma'. In an image annotated only with 'Stroma' domains, 226 | remaining cells will be annotated with 'Tumor' domains. 227 | resolution: str 228 | If `remaining_domain` is a dict, there may be more than one domain present in the image. 229 | A resolution method is thus needed to select which domain will be filled for the remaining cells. 230 | - 'largest' will choose as key of `remaining_domain` the largest annotated domain class. 231 | - 'unique' will be strict and only fill in if there is a unique domain. 232 | """ 233 | from imc.utils import polygon_to_mask 234 | 235 | if exclude_domains is None: 236 | exclude_domains = [] 237 | 238 | _full_assigns = list() 239 | for roi_name, shapes in tqdm(topo_annots.items()): 240 | roi = [r for r in rois if r.name == roi_name][0] 241 | mask = roi.mask 242 | cells = np.unique(mask)[1:] 243 | td_count: tp.Counter[str] = Counter() 244 | regions = list() 245 | _assigns = list() 246 | for shape in shapes: 247 | label = shape["label"] 248 | points = shape["points"] 249 | if label in exclude_domains: 250 | continue 251 | td_count[label] += 1 252 | points += [points[0]] 253 | region = polygon_to_mask(points, roi.shape[1:][::-1]) 254 | regions.append(region) 255 | assign = ( 256 | pd.Series(np.unique(mask[(mask > 0) & region]), name="obj_id") 257 | .to_frame() 258 | .assign( 259 | roi=roi.name, 260 | sample=roi.sample.name, 261 | domain_id=f"{label}{td_count[label]}", 262 | ) 263 | ) 264 | _assigns.append(assign) 265 | 266 | ## if remaining_domain explicitely annotated, skip 267 | if isinstance(remaining_domain, str): 268 | if remaining_domain in td_count: 269 | print( 270 | f"ROI '{roi.name}' has been manually annotated" 271 | " with remaining domains." 272 | ) 273 | _full_assigns += _assigns 274 | continue 275 | 276 | ## add a domain for cells not annotated 277 | remain = ~np.asarray(regions).sum(0).astype(bool) 278 | existing = np.sort(pd.concat(_assigns)["obj_id"].unique()) 279 | remain = remain & (~np.isin(mask, existing)) 280 | if remain.sum() == 0: 281 | _full_assigns += _assigns 282 | continue 283 | 284 | if isinstance(remaining_domain, str): 285 | ### if given a string just make that the domain for unnanotated cells 286 | domain = remaining_domain 287 | # print(f"ROI '{roi.name}' will be annotated with '{domain}' by default.") 288 | 289 | elif isinstance(remaining_domain, dict): 290 | ### if given a dict, dependent on the existing domains choose what to label the remaining 291 | ### useful for when labeling e.g. tumor/stroma and different images may be labeled with only one of them 292 | existing_domains = pd.concat(_assigns)["domain_id"].value_counts() 293 | existing_domains.index = existing_domains.index.str.replace( 294 | r"\d+", "", regex=True 295 | ) 296 | repl = set(v for k, v in remaining_domain.items() if k in existing_domains) 297 | if resolution == "largest": 298 | domain = remaining_domain[existing_domains.idxmax()] 299 | elif resolution == "unique": 300 | if len(repl) == 1: 301 | domain = repl.pop() 302 | else: 303 | raise ValueError( 304 | "More than one domain was detected and it is" 305 | " unclear how to annotate the remaining cells " 306 | f"with the mapping: {remaining_domain}" 307 | ) 308 | 309 | assign = ( 310 | pd.Series(np.unique(mask[remain]), name="obj_id") 311 | .drop(0, errors="ignore") 312 | .to_frame() 313 | .assign( 314 | roi=roi.name, 315 | sample=roi.sample.name, 316 | domain_id=domain + "1", 317 | ) 318 | ) 319 | _assigns.append(assign) 320 | _full_assigns += _assigns 321 | 322 | assigns = pd.concat(_full_assigns) 323 | assigns["topological_domain"] = assigns["domain_id"].str.replace( 324 | r"\d", "", regex=True 325 | ) 326 | 327 | # reduce duplicated annotations but for cells annotated with background, make that the primary annotation 328 | id_cols = ["sample", "roi", "obj_id"] 329 | assigns = ( 330 | assigns.groupby(id_cols).apply( 331 | lambda x: x 332 | if (x.shape[0] == 1) 333 | else x.loc[x["topological_domain"] == remaining_domain, :] 334 | if (x["topological_domain"] == remaining_domain).any() 335 | else x 336 | ) 337 | # .drop(id_cols, axis=1) 338 | .reset_index(level=-1, drop=True) 339 | ).set_index(id_cols) 340 | 341 | # If more than one domain per cell: 342 | if nest_domains: 343 | # Keep them all 344 | assigns = assigns.groupby(id_cols)["domain_id"].apply("-".join).to_frame() 345 | assigns["topological_domain"] = assigns["domain_id"].str.replace( 346 | r"\d", "", regex=True 347 | ) 348 | else: 349 | # make sure there are no cells with more than one domain that is background 350 | tpc = assigns.groupby(id_cols)["domain_id"].nunique() 351 | cells = tpc.index 352 | assert not assigns.loc[cells[tpc > 1]].isin([remaining_domain]).any().any() 353 | 354 | assigns = ( 355 | assigns.reset_index() 356 | .drop_duplicates(subset=id_cols) 357 | .set_index(id_cols) 358 | .sort_index() 359 | ) 360 | 361 | # expand domains 362 | for domain in assigns["topological_domain"].unique(): 363 | assigns[domain] = assigns["topological_domain"] == domain 364 | 365 | return assigns 366 | 367 | 368 | @tp.overload 369 | def get_domain_areas( 370 | topo_annots: tp.Dict[str, tp.Dict], 371 | rois: tp.Sequence[_roi.ROI], 372 | per_domain: tp.Literal[False], 373 | ) -> tp.Dict[Path, float]: 374 | ... 375 | 376 | 377 | @tp.overload 378 | def get_domain_areas( 379 | topo_annots: tp.Dict[str, tp.Dict], 380 | rois: tp.Sequence[_roi.ROI], 381 | per_domain: tp.Literal[True], 382 | ) -> DataFrame: 383 | ... 384 | 385 | 386 | def get_domain_areas( 387 | topo_annots: tp.Dict[str, tp.Dict], 388 | rois: tp.Sequence[_roi.ROI] = None, 389 | per_domain: bool = False, 390 | ) -> tp.Union[tp.Dict[Path, float], DataFrame]: 391 | """ 392 | Get area of airways per image in microns. 393 | """ 394 | from shapely.geometry import Polygon 395 | 396 | mpp = 1 # scale 397 | if rois is not None: 398 | roi_names = [r.name for r in rois] 399 | topo_annots = {k: v for k, v in topo_annots.items() if k in roi_names} 400 | 401 | _areas = list() 402 | for roi_name, shapes in tqdm(topo_annots.items()): 403 | count: tp.Counter[str] = Counter() 404 | for shape in shapes: 405 | label = shape["label"] 406 | count[label] += 1 407 | a = Polygon(shape["points"]).area 408 | _areas.append([roi_name, label + str(count[label]), a * mpp]) 409 | 410 | areas = ( 411 | pd.DataFrame(_areas) 412 | .rename(columns={0: "roi", 1: "domain_obj", 2: "area"}) 413 | .set_index("roi") 414 | ) 415 | areas["topological_domain"] = areas["domain_obj"].str.replace(r"\d", "", regex=True) 416 | if not per_domain: 417 | areas = areas.groupby("roi")["area"].sum().to_dict() 418 | 419 | return areas 420 | 421 | 422 | def get_domain_masks( 423 | topo_annots: tp.Dict, 424 | rois: tp.Sequence[_roi.ROI], 425 | exclude_domains: tp.Sequence[str] = None, 426 | fill_remaining: str = None, 427 | per_domain: bool = False, 428 | ) -> Array: 429 | _x = list() 430 | for roi in rois: 431 | x = get_domain_mask( 432 | topo_annots[roi.name], 433 | roi, 434 | exclude_domains=exclude_domains, 435 | fill_remaining=fill_remaining, 436 | per_domain=per_domain, 437 | ) 438 | _x.append(x) 439 | x = np.asarray(_x) 440 | return x 441 | 442 | 443 | def get_domain_mask( 444 | topo_annot: tp.Dict, 445 | roi: _roi.ROI, 446 | exclude_domains: tp.Sequence[str] = None, 447 | fill_remaining: str = None, 448 | per_domain: bool = False, 449 | ) -> Array: 450 | """ """ 451 | import tifffile 452 | from imc.utils import polygon_to_mask 453 | 454 | if exclude_domains is None: 455 | exclude_domains = [] 456 | 457 | _, h, w = roi.shape 458 | masks = list() 459 | region_types = list() 460 | region_names = list() 461 | count: tp.Counter[str] = Counter() 462 | for shape in topo_annot: 463 | shape["points"] += [shape["points"][0]] 464 | region = polygon_to_mask(shape["points"], (w, h)) 465 | label = shape["label"] 466 | count[label] += 1 467 | masks.append(region) 468 | region_types.append(label) 469 | region_names.append(label + str(count[label])) 470 | 471 | for_mask = np.asarray( 472 | [m for ll, m in zip(region_types, masks) if ll not in exclude_domains] 473 | ).sum(0) 474 | if fill_remaining is not None: 475 | masks += [for_mask == 0] 476 | region_types += [fill_remaining] 477 | for_mask[for_mask == 0] = -1 478 | exc_mask = np.asarray( 479 | [m for ll, m in zip(region_types, masks) if ll in exclude_domains] 480 | ).sum(0) 481 | mask: Array = ( 482 | ((for_mask != 0) & ~(exc_mask != 0)) 483 | if isinstance(exc_mask, np.ndarray) 484 | else for_mask 485 | ).astype(bool) 486 | 487 | if per_domain: 488 | nmask = np.empty_like(mask, dtype="object") 489 | for r, ll in zip(masks, region_types): 490 | if ll not in exclude_domains: 491 | nmask[mask & r] = ll 492 | mask = np.ma.masked_array(nmask, mask=nmask == None) 493 | 494 | return mask 495 | -------------------------------------------------------------------------------- /imc/ops/mixture.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for mixtures of signal. 3 | """ 4 | 5 | import typing as tp 6 | 7 | import numpy as np 8 | import pandas as pd 9 | import matplotlib.pyplot as plt 10 | import seaborn as sns 11 | from tqdm import tqdm 12 | 13 | from imc.types import DataFrame, Series, Array 14 | 15 | 16 | @tp.overload 17 | def get_best_mixture_number( 18 | x: Series, 19 | min_mix: int, 20 | max_mix: int, 21 | subsample_if_needed: bool, 22 | n_iters: int, 23 | metrics: tp.Sequence[str], 24 | red_func: str, 25 | return_prediction: tp.Literal[False], 26 | ) -> int: 27 | ... 28 | 29 | 30 | @tp.overload 31 | def get_best_mixture_number( 32 | x: Series, 33 | min_mix: int, 34 | max_mix: int, 35 | subsample_if_needed: bool, 36 | n_iters: int, 37 | metrics: tp.Sequence[str], 38 | red_func: str, 39 | return_prediction: tp.Literal[True], 40 | ) -> tp.Tuple[int, Array]: 41 | ... 42 | 43 | 44 | def get_best_mixture_number( 45 | x: Series, 46 | min_mix: int = 2, 47 | max_mix: int = 6, 48 | subsample_if_needed: bool = True, 49 | n_iters: int = 3, 50 | metrics: tp.Sequence[str] = [ 51 | "silhouette_score", 52 | "calinski_harabasz_score", 53 | "davies_bouldin_score", 54 | ], 55 | red_func: str = "mean", 56 | return_prediction: bool = False, 57 | ) -> tp.Union[int, tp.Tuple[int, Array]]: 58 | from sklearn.mixture import GaussianMixture 59 | import sklearn.metrics 60 | 61 | def get_means(num: Series, pred: tp.Union[Series, Array]) -> Series: 62 | return num.groupby(pred).mean().sort_values() 63 | 64 | def replace_pred(x: Series, y: tp.Union[Series, Array]) -> Series: 65 | means = get_means(x, y) 66 | repl = dict(zip(means.index, range(len(means)))) 67 | y2 = pd.Series(y, index=x.index).replace(repl) 68 | new_means = get_means(x, y2.values) 69 | assert all(new_means.index == range(len(new_means))) 70 | return y2 71 | 72 | xx = x.sample(n=10_000) if subsample_if_needed and x.shape[0] > 10_000 else x 73 | 74 | if isinstance(xx, pd.Series): 75 | xx = xx.values.reshape((-1, 1)) 76 | 77 | mi = range(min_mix, max_mix) 78 | mixes = pd.DataFrame(index=metrics, columns=mi) 79 | for i in tqdm(mi): 80 | mix = GaussianMixture(i) 81 | # mix.fit_predict(x) 82 | for f in metrics: 83 | func = getattr(sklearn.metrics, "davies_bouldin_score") 84 | mixes.loc[f, i] = np.mean( 85 | [func(xx, mix.fit_predict(xx)) for _ in range(n_iters)] 86 | ) 87 | # mixes[i] = np.mean([silhouette_score(x, mix.fit_predict(x)) for _ in range(iters)]) 88 | mixes.loc["davies_bouldin_score"] = 1 / mixes.loc["davies_bouldin_score"] 89 | 90 | # return best 91 | # return np.argmax(mixes.values()) + min_mix # type: ignore 92 | best = mixes.columns[int(getattr(np, red_func)(mixes.apply(np.argmax, 1)))] 93 | if not return_prediction: 94 | return best # type: ignore 95 | 96 | # now train with full data 97 | mix = GaussianMixture(best) 98 | return (best, replace_pred(x, mix.fit_predict(x.values.reshape((-1, 1))))) 99 | 100 | 101 | def get_threshold_from_gaussian_mixture( 102 | x: Series, y: Series = None, n_components: int = 2 103 | ) -> Array: 104 | def get_means(num: Series, pred: tp.Union[Series, Array]) -> Series: 105 | return num.groupby(pred).mean().sort_values() 106 | 107 | def replace_pred(x: Series, y: tp.Union[Series, Array]) -> Series: 108 | means = get_means(x, y) 109 | repl = dict(zip(means.index, range(len(means)))) 110 | y2 = pd.Series(y, index=x.index).replace(repl) 111 | new_means = get_means(x, y2.values) 112 | assert all(new_means.index == range(len(new_means))) 113 | return y2 114 | 115 | x = x.sort_values() 116 | 117 | if y is None: 118 | from sklearn.mixture import GaussianMixture # type: ignore 119 | 120 | mix = GaussianMixture(n_components=n_components) 121 | xx = x.values.reshape((-1, 1)) 122 | y = mix.fit_predict(xx) 123 | else: 124 | y = y.reindex(x.index).values 125 | y = replace_pred(x, y).values 126 | thresh = x.loc[((y[:-1] < y[1::])).tolist() + [False]].reset_index(drop=True) 127 | assert len(thresh) == (n_components - 1) 128 | return thresh 129 | 130 | 131 | def get_probability_of_gaussian_mixture( 132 | x: Series, n_components: int = 2, population=-1 133 | ) -> Series: 134 | from sklearn.mixture import GaussianMixture # type: ignore 135 | 136 | x = x.sort_values() 137 | mix = GaussianMixture(n_components=n_components) 138 | xx = x.values.reshape((-1, 1)) 139 | mix.fit(xx) 140 | means = pd.Series(mix.means_.squeeze()).sort_values() 141 | # assert (means.index == range(n_components)).all() 142 | # order components by mean 143 | p = mix.predict_proba(xx)[:, means.index] 144 | # take requested population 145 | p = p[:, population] 146 | return pd.Series(p, index=x.index).sort_index() 147 | 148 | 149 | def fit_gaussian_mixture( 150 | x: tp.Union[Series, DataFrame], n_mixtures: tp.Union[int, tp.List[int]] = None 151 | ) -> tp.Union[Series, DataFrame]: 152 | # TODO: paralelize 153 | from sklearn.mixture import GaussianMixture 154 | 155 | if isinstance(x, pd.Series): 156 | x = x.to_frame() 157 | if isinstance(n_mixtures, int): 158 | n_mixtures = [n_mixtures] * x.shape[1] 159 | expr_thresh = x.astype(int) 160 | 161 | def get_means(num, pred): 162 | return num.groupby(pred).mean().sort_values() 163 | 164 | def replace_pred(x, y): 165 | means = get_means(x, y) 166 | repl = dict(zip(range(len(means)), means.index)) 167 | y2 = y.replace(repl) 168 | new_means = get_means(x, y2) 169 | assert all(new_means.index == range(len(new_means))) 170 | return y2 171 | 172 | for i, ch in enumerate(x.columns): 173 | if n_mixtures is None: 174 | n_best = get_best_mixture_number(x, return_prediction=False) # type: ignore[call-tp.overload] 175 | mix = GaussianMixture(n_best) 176 | else: 177 | mix = GaussianMixture(n_mixtures[i]) 178 | _x = x.loc[:, ch] 179 | x2 = _x.values.reshape((-1, 1)) 180 | mix.fit(x2) 181 | y = pd.Series(mix.predict(x2), index=x.index, name="class") 182 | expr_thresh[ch] = replace_pred(_x, y) 183 | return expr_thresh.squeeze() 184 | 185 | 186 | def get_population( 187 | ser: Series, population: int = -1, plot=False, ax=None, **kwargs 188 | ) -> pd.Index: 189 | if population == -1: 190 | operator = np.greater_equal 191 | elif population == 0: 192 | operator = np.less_equal 193 | else: 194 | raise ValueError("Chosen population must be '0' (lowest) or '-1' (highest).") 195 | 196 | # Make sure index is unique 197 | if not ser.index.is_monotonic: 198 | ser = ser.reset_index(drop=True) 199 | 200 | # Work only in positive space 201 | xx = ser # + abs(ser.min()) 202 | done = False 203 | while not done: 204 | try: 205 | n, y = get_best_mixture_number(xx, return_prediction=True, **kwargs) 206 | except ValueError: # "Number of labels is 1. Valid values are 2 to n_samples - 1 (inclusive)" 207 | continue 208 | done = True 209 | print(f"Chosen mixture of {n} distributions.") 210 | done = False 211 | while not done: 212 | try: 213 | thresh = get_threshold_from_gaussian_mixture(xx, n_components=n) 214 | except AssertionError: 215 | continue 216 | done = True 217 | 218 | sel = operator(xx, thresh.iloc[population]).values 219 | 220 | if plot: 221 | ax = plt.gca() if ax is None else ax 222 | sns.distplot(xx, kde=False, ax=ax) 223 | sns.distplot(xx.loc[sel], kde=False, ax=ax) 224 | [ax.axvline(q, linestyle="--", color="grey") for q in thresh] 225 | ax = None 226 | return sel 227 | -------------------------------------------------------------------------------- /imc/ops/quant.py: -------------------------------------------------------------------------------- 1 | """ 2 | Operations of signal quantification. 3 | """ 4 | 5 | from __future__ import annotations 6 | import typing as tp 7 | 8 | import numpy as np 9 | import pandas as pd 10 | import parmap 11 | 12 | import skimage.measure 13 | from skimage.segmentation import clear_border 14 | 15 | from imc.data_models import roi as _roi 16 | from imc.types import DataFrame, Array, Path 17 | from imc.utils import read_image_from_file, minmax_scale 18 | 19 | 20 | def quantify_cell_intensity( 21 | stack: tp.Union[Array, Path], 22 | mask: tp.Union[Array, Path], 23 | red_func: str = "mean", 24 | border_objs: bool = False, 25 | equalize: bool = True, 26 | scale: bool = False, 27 | channel_include: Array = None, 28 | channel_exclude: Array = None, 29 | ) -> DataFrame: 30 | """ 31 | Measure the intensity of each channel in each cell 32 | 33 | Parameters 34 | ---------- 35 | stack: tp.Union[Array, Path] 36 | Image to quantify. 37 | mask: tp.Union[Array, Path] 38 | Mask to quantify. 39 | red_func: str 40 | Function to reduce pixels to object borders. Defaults to 'mean'. 41 | border_objs: bool 42 | Whether to quantify objects touching image border. Defaults to False. 43 | channel_include: :class:`~np.ndarray` 44 | Boolean array for channels to include. 45 | channel_exclude: :class:`~np.ndarray` 46 | Boolean array for channels to exclude. 47 | """ 48 | from skimage.exposure import equalize_hist as eq 49 | 50 | if isinstance(stack, Path): 51 | stack = read_image_from_file(stack) 52 | if isinstance(mask, Path): 53 | mask = read_image_from_file(mask) 54 | if not border_objs: 55 | mask = clear_border(mask) 56 | 57 | if equalize: 58 | # stack = np.asarray([eq(x) for x in stack]) 59 | _stack = list() 60 | for x in stack: 61 | p = np.percentile(x, 98) 62 | x[x > p] = p 63 | _stack.append(x) 64 | stack = np.asarray(_stack) 65 | if scale: 66 | stack = np.asarray([minmax_scale(x) for x in stack]) 67 | 68 | cells = [c for c in np.unique(mask) if c != 0] 69 | n_channels = stack.shape[0] 70 | 71 | if channel_include is None: 72 | channel_include = np.asarray([True] * n_channels) 73 | if channel_exclude is None: 74 | channel_exclude = np.asarray([False] * n_channels) 75 | 76 | res = np.zeros((len(cells), n_channels), dtype=int if red_func == "sum" else float) 77 | for channel in np.arange(stack.shape[0])[channel_include & ~channel_exclude]: 78 | res[:, channel] = [ 79 | getattr(x.intensity_image[x.image], red_func)() 80 | for x in skimage.measure.regionprops(mask, stack[channel]) 81 | ] 82 | return pd.DataFrame(res, index=cells).rename_axis(index="obj_id") 83 | 84 | 85 | def quantify_cell_morphology( 86 | mask: tp.Union[Array, Path], 87 | attributes: tp.Sequence[str] = [ 88 | "area", 89 | "perimeter", 90 | "minor_axis_length", 91 | "major_axis_length", 92 | # In some images I get ValueError for 'minor_axis_length' 93 | # just like https://github.com/scikit-image/scikit-image/issues/2625 94 | # 'orientation', # should be ~random for non-optical imaging, so I'm not including it 95 | "eccentricity", 96 | "solidity", 97 | "centroid", 98 | ], 99 | border_objs: bool = False, 100 | ) -> DataFrame: 101 | if isinstance(mask, Path): 102 | mask = read_image_from_file(mask) 103 | if not border_objs: 104 | mask = clear_border(mask) 105 | 106 | morph = ( 107 | pd.DataFrame( 108 | skimage.measure.regionprops_table(mask, properties=attributes), 109 | index=[c for c in np.unique(mask) if c != 0], 110 | ) 111 | .rename_axis(index="obj_id") 112 | .rename(columns={"centroid-0": "X_centroid", "centroid-1": "Y_centroid"}) 113 | ) 114 | if ("minor_axis_length" in attributes) and ("major_axis_length" in attributes): 115 | morph["ratio_axis_length"] = ( 116 | morph["major_axis_length"] / morph["minor_axis_length"] 117 | ) 118 | return morph 119 | 120 | 121 | def _quantify_cell_intensity__roi(roi: _roi.ROI, **kwargs) -> DataFrame: 122 | assignment = dict(roi=roi.name) 123 | if roi.sample is not None: 124 | assignment["sample"] = roi.sample.name 125 | return roi.quantify_cell_intensity(**kwargs).assign(**assignment) 126 | 127 | 128 | def _quantify_cell_morphology__roi(roi: _roi.ROI, **kwargs) -> DataFrame: 129 | assignment = dict(roi=roi.name) 130 | if roi.sample is not None: 131 | assignment["sample"] = roi.sample.name 132 | return roi.quantify_cell_morphology(**kwargs).assign(**assignment) 133 | 134 | 135 | def _correlate_channels__roi(roi: _roi.ROI, labels: str = "channel_names") -> DataFrame: 136 | xcorr = np.corrcoef(roi.stack.reshape((roi.channel_number, -1))) 137 | np.fill_diagonal(xcorr, 0) 138 | labs = getattr(roi, labels) 139 | return pd.DataFrame(xcorr, index=labs, columns=labs) 140 | 141 | 142 | # def _get_adjacency_graph__roi(roi: _roi.ROI, **kwargs) -> DataFrame: 143 | # output_prefix = roi.sample.root_dir / "single_cell" / roi.name 144 | # return get_adjacency_graph(roi.stack, roi.mask, roi.clusters, output_prefix, **kwargs) 145 | 146 | 147 | def quantify_cell_intensity_rois( 148 | rois: tp.Sequence[_roi.ROI], 149 | **kwargs, 150 | ) -> DataFrame: 151 | """ 152 | Measure the intensity of each channel in each single cell. 153 | """ 154 | return pd.concat( 155 | parmap.map(_quantify_cell_intensity__roi, rois, pm_pbar=True, **kwargs) 156 | ).rename_axis(index="obj_id") 157 | 158 | 159 | def quantify_cell_morphology_rois( 160 | rois: tp.Sequence[_roi.ROI], 161 | **kwargs, 162 | ) -> DataFrame: 163 | """ 164 | Measure the shape parameters of each single cell. 165 | """ 166 | return pd.concat( 167 | parmap.map(_quantify_cell_morphology__roi, rois, pm_pbar=True, **kwargs) 168 | ).rename_axis(index="obj_id") 169 | 170 | 171 | def quantify_cells_rois( 172 | rois: tp.Sequence[_roi.ROI], 173 | layers: tp.Sequence[str], 174 | intensity: bool = True, 175 | intensity_kwargs: tp.Dict[str, tp.Any] = {}, 176 | morphology: bool = True, 177 | morphology_kwargs: tp.Dict[str, tp.Any] = {}, 178 | ) -> DataFrame: 179 | """ 180 | Measure the intensity of each channel in each single cell. 181 | """ 182 | quants = list() 183 | if intensity: 184 | quants.append( 185 | quantify_cell_intensity_rois(rois=rois, layers=layers, **intensity_kwargs) 186 | ) 187 | if morphology: 188 | quants.append( 189 | quantify_cell_morphology_rois(rois=rois, layers=layers, **morphology_kwargs) 190 | ) 191 | 192 | return ( 193 | # todo: this will fail if there's different layers in intensity and morphology 194 | pd.concat( 195 | # ignore because a ROI is not obliged to have a Sample 196 | [quants[0].drop(["sample", "roi"], axis=1, errors="ignore"), quants[1]], 197 | axis=1, 198 | ) 199 | if len(quants) > 1 200 | else quants[0] 201 | ).rename_axis(index="obj_id") 202 | -------------------------------------------------------------------------------- /imc/ops/signal.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for handling signal intensity in images. 3 | """ 4 | 5 | import typing as tp 6 | 7 | import numpy as np 8 | import pandas as pd 9 | import matplotlib.patches as mpatches 10 | import matplotlib.pyplot as plt 11 | import seaborn as sns 12 | import parmap 13 | from skimage import exposure 14 | 15 | import imc.data_models.roi as _roi 16 | from imc.exceptions import cast 17 | from imc.types import DataFrame, Series, Array, Path 18 | 19 | FIG_KWS = dict(bbox_inches="tight", dpi=300) 20 | 21 | 22 | # def check_channel_axis_correlation( 23 | # arr: Array, channel_labels: tp.Sequence[str], output_prefix: Path 24 | # ) -> DataFrame: 25 | # # # Plot and regress 26 | # n, m = get_grid_dims(arr.shape[0]) 27 | # fig, axis = plt.subplots( 28 | # m, n, figsize=(n * 4, m * 4), squeeze=False, sharex=True, sharey=True 29 | # ) 30 | 31 | # res = list() 32 | # for channel in range(arr.shape[0]): 33 | # for axs in [0, 1]: 34 | # s = arr[channel].mean(axis=axs) 35 | # order = np.arange(s.shape[0]) 36 | # model = LinearRegression() 37 | # model.fit(order[:, np.newaxis] / max(order), s) 38 | # res.append( 39 | # [ 40 | # channel, 41 | # axs, 42 | # model.coef_[0], 43 | # model.intercept_, 44 | # pearsonr(order, s)[0], 45 | # ] 46 | # ) 47 | 48 | # axis.flatten()[channel].plot(order, s) 49 | # axis.flatten()[channel].set_title( 50 | # f"{channel_labels[channel]}\nr[X] = {res[-2][-1]:.2f}; r[Y] = {res[-1][-1]:.2f}" 51 | # ) 52 | 53 | # axis[int(m / 2), 0].set_ylabel("Mean signal along axis") 54 | # axis[-1, int(n / 2)].set_xlabel("Order along axis") 55 | # c = sns.color_palette("colorblind") 56 | # patches = [ 57 | # mpatches.Patch(color=c[0], label="X"), 58 | # mpatches.Patch(color=c[1], label="Y"), 59 | # ] 60 | # axis[int(m / 2), -1].legend( 61 | # handles=patches, 62 | # bbox_to_anchor=(1.05, 1), 63 | # loc=2, 64 | # borderaxespad=0.0, 65 | # title="Axis", 66 | # ) 67 | # fig.savefig(output_prefix + "channel-axis_correlation.svg", **FIG_KWS) 68 | 69 | # df = pd.DataFrame(res, columns=["channel", "axis", "coef", "intercept", "r"]) 70 | # df["axis_label"] = df["axis"].replace(0, "X_centroid").replace(1, "Y_centroid") 71 | # df["channel_label"] = [x for x in channel_labels for _ in range(2)] 72 | # df["abs_r"] = df["r"].abs() 73 | # df.to_csv(output_prefix + "channel-axis_correlation.csv", index=False) 74 | # return df 75 | 76 | 77 | def fix_signal_axis_dependency( 78 | arr: Array, channel_labels: tp.Sequence[str], res: DataFrame, output_prefix: Path 79 | ) -> Array: 80 | # res = pd.read_csv(pjoin("processed", "case_b", "plots", "qc", roi + "_channel-axis_correlation.csv")) 81 | corr_d = np.empty_like(arr) 82 | for channel in range(arr.shape[0]): 83 | r = res.query(f"channel == {channel}") 84 | x = r.query("axis_label == 'X'")["coef"].squeeze() 85 | xinter = r.query("axis_label == 'X'")["intercept"].squeeze() 86 | y = r.query("axis_label == 'Y'")["coef"].squeeze() 87 | yinter = r.query("axis_label == 'Y'")["intercept"].squeeze() 88 | # to_reg = pd.DataFrame(arr[channel]).reset_index().melt(id_vars='index').rename(columns=dict(index="X", variable="Y")) 89 | 90 | order = np.arange(arr[channel].shape[0]) 91 | dd = arr[channel] 92 | m = np.ones_like(dd) 93 | m = m * (order / max(order) * x) + (xinter) 94 | m = (m.T * (order / max(order) * y)).T + (yinter) 95 | ddfix = (dd - m) + dd.mean() 96 | corr_d[channel] = ddfix 97 | 98 | fig, axis = plt.subplots(1, 7, sharex=True, sharey=False, figsize=(7 * 3, 3 * 1)) 99 | fig.suptitle(channel_labels[channel]) 100 | axis[0].set_title("Original") 101 | axis[0].imshow(dd) 102 | axis[1].set_title("Original, equalized") 103 | axis[1].imshow(exposure.equalize_hist(dd)) 104 | axis[2].set_title("Bias mask") 105 | axis[2].imshow(m) 106 | axis[3].set_title("Bias removed") 107 | axis[3].imshow(ddfix) 108 | axis[4].set_title("Bias removed, equalized") 109 | axis[4].imshow(exposure.equalize_hist(ddfix)) 110 | axis[5].set_title("Channel bias") 111 | axis[5].plot(order, dd.mean(axis=0), label="Original", alpha=0.5) 112 | axis[5].plot(order, ddfix.mean(axis=0), label="Bias removed", alpha=0.5) 113 | axis[5].set_xlabel("Position along X axis") 114 | axis[5].set_ylabel("Signal along X axis") 115 | axis[5].legend() 116 | axis[6].set_title("Channel bias") 117 | axis[6].plot(order, dd.mean(axis=1), label="Original", alpha=0.5) 118 | axis[6].plot(order, ddfix.mean(axis=1), label="Bias removed", alpha=0.5) 119 | axis[6].set_xlabel("Position along Y axis") 120 | axis[6].set_ylabel("Signal along Y axis") 121 | axis[6].legend() 122 | for ax in axis[:-2]: 123 | ax.axis("off") 124 | fig.savefig( 125 | output_prefix 126 | + f"channel-axis_correlation_removal.{channel_labels[channel]}.demonstration.svg", 127 | **FIG_KWS, 128 | ) 129 | plt.close("all") 130 | return corr_d 131 | 132 | 133 | def channel_stats(roi: _roi.ROI, channels: tp.Sequence[str] = None): 134 | from skimage.restoration import estimate_sigma 135 | from imc.utils import estimate_sigma 136 | 137 | if channels is None: 138 | channels = roi.channel_labels.tolist() 139 | stack = roi._get_channels(channels)[1] 140 | mask = roi.cell_mask == 0 141 | res = dict() 142 | res["wmeans"] = pd.Series(stack.mean(axis=(1, 2)), index=channels) 143 | res["wstds"] = pd.Series(stack.std(axis=(1, 2)), index=channels) 144 | res["cmeans"] = pd.Series( 145 | [np.ma.masked_array(stack[i], mask=mask).mean() for i in range(len(channels))], 146 | index=channels, 147 | ) 148 | res["cstds"] = pd.Series( 149 | [np.ma.masked_array(stack[i], mask=mask).std() for i in range(len(channels))], 150 | index=channels, 151 | ) 152 | res["emeans"] = pd.Series( 153 | [np.ma.masked_array(stack[i], mask=~mask).mean() for i in range(len(channels))], 154 | index=channels, 155 | ) 156 | res["estds"] = pd.Series( 157 | [np.ma.masked_array(stack[i], mask=~mask).std() for i in range(len(channels))], 158 | index=channels, 159 | ) 160 | # res["noises"] = pd.Series([estimate_noise(ch) for ch in stack], index=channels) 161 | res["sigmas"] = pd.Series( 162 | estimate_sigma(np.moveaxis(stack, 0, -1), multichannel=True), index=channels 163 | ) 164 | return res 165 | 166 | 167 | def measure_channel_background( 168 | rois: tp.Sequence[_roi.ROI], plot: bool = True, output_prefix: Path = None 169 | ) -> Series: 170 | from imc.utils import align_channels_by_name 171 | from mpl_toolkits.axes_grid1 import make_axes_locatable 172 | 173 | if plot: 174 | assert ( 175 | output_prefix is not None 176 | ), "If `plot` is True, `output_prefix` must be given." 177 | 178 | _channels = pd.DataFrame( 179 | {r.name: r.channel_labels[~r.channel_exclude.values] for r in rois} 180 | ) 181 | channels = align_channels_by_name(_channels).dropna().iloc[:, 0].tolist() 182 | roi_names = [r.name for r in rois] 183 | 184 | res = parmap.map(channel_stats, rois, channels=channels, pm_pbar=True) 185 | 186 | wmeans = pd.DataFrame((x["wmeans"] for x in res), index=roi_names).T 187 | wstds = pd.DataFrame((x["wstds"] for x in res), index=roi_names).T 188 | wqv2s = np.sqrt(wstds / wmeans) 189 | cmeans = pd.DataFrame((x["cmeans"] for x in res), index=roi_names).T 190 | cstds = pd.DataFrame((x["cstds"] for x in res), index=roi_names).T 191 | cqv2s = np.sqrt(cstds / cmeans) 192 | emeans = pd.DataFrame((x["emeans"] for x in res), index=roi_names).T 193 | estds = pd.DataFrame((x["estds"] for x in res), index=roi_names).T 194 | eqv2s = np.sqrt(estds / emeans) 195 | fore_backg: DataFrame = np.log(cmeans / emeans) 196 | # fore_backg_disp = np.log1p(((cmeans / emeans) / (cmeans + emeans))).mean(1) 197 | noises = pd.DataFrame((x["noises"] for x in res), index=roi_names).T 198 | sigmas = pd.DataFrame((x["sigmas"] for x in res), index=roi_names).T 199 | 200 | # Join all metrics 201 | metrics = ( 202 | wmeans.mean(1) 203 | .to_frame(name="image_mean") 204 | .join(wstds.mean(1).rename("image_std")) 205 | .join(wqv2s.mean(1).rename("image_qv2")) 206 | .join(cmeans.mean(1).rename("cell_mean")) 207 | .join(cstds.mean(1).rename("cell_std")) 208 | .join(cqv2s.mean(1).rename("cell_qv2")) 209 | .join(emeans.mean(1).rename("extra_mean")) 210 | .join(estds.mean(1).rename("extra_std")) 211 | .join(eqv2s.mean(1).rename("extra_qv2")) 212 | .join(fore_backg.mean(1).rename("fore_backg")) 213 | .join(noises.mean(1).rename("noise")) 214 | .join(sigmas.mean(1).rename("sigma")) 215 | ).rename_axis(index="channel") 216 | metrics_std = (metrics - metrics.min()) / (metrics.max() - metrics.min()) 217 | 218 | if not plot: 219 | # Invert QV2 220 | sel = metrics_std.columns.str.contains("_qv2") 221 | metrics_std.loc[:, sel] = 1 - metrics_std.loc[:, sel] 222 | # TODO: better decision on which metrics matter 223 | return metrics_std.mean(1) 224 | 225 | output_prefix = cast(output_prefix) 226 | if not output_prefix.endswith("."): 227 | output_prefix += "." 228 | 229 | metrics.to_csv(output_prefix + "channel_background_noise_measurements.csv") 230 | metrics = pd.read_csv( 231 | output_prefix + "channel_background_noise_measurements.csv", index_col=0 232 | ) 233 | 234 | # Plot 235 | fig, axes = plt.subplots(2, 3, figsize=(3 * 4.1, 2 * 4), sharex="col") 236 | axes[0, 0].set_title("Whole image") 237 | axes[0, 1].set_title("Cells") 238 | axes[0, 2].set_title("Extracellular") 239 | for i, (means, stds, qv2s) in enumerate( 240 | [(wmeans, wstds, wqv2s), (cmeans, cstds, cqv2s), (emeans, estds, eqv2s)] 241 | ): 242 | # plot mean vs variance 243 | mean = means.mean(1) 244 | std = stds.mean(1) ** 2 245 | qv2 = qv2s.mean(1) 246 | fb = fore_backg.mean(1) 247 | 248 | axes[0, i].set_xlabel("Mean") 249 | axes[0, i].set_ylabel("Variance") 250 | pts = axes[0, i].scatter(mean, std, c=fb) 251 | if i == 2: 252 | div = make_axes_locatable(axes[0, i]) 253 | cax = div.append_axes("right", size="5%", pad=0.05) 254 | fig.colorbar(pts, cax=cax) 255 | 256 | for channel in means.index: 257 | lab = "left" if np.random.rand() > 0.5 else "right" 258 | axes[0, i].text( 259 | mean.loc[channel], std.loc[channel], channel, ha=lab, fontsize=4 260 | ) 261 | v = max(mean.max().max(), std.max().max()) 262 | axes[0, i].plot((0, v), (0, v), linestyle="--", color="grey") 263 | axes[0, i].loglog() 264 | 265 | # plot mean vs qv2 266 | axes[1, i].set_xlabel("Mean") 267 | axes[1, i].set_ylabel("Squared coefficient of variation") 268 | axes[1, i].scatter(mean, qv2, c=fb) 269 | for channel in means.index: 270 | lab = "left" if np.random.rand() > 0.5 else "right" 271 | axes[1, i].text( 272 | mean.loc[channel], qv2.loc[channel], channel, ha=lab, fontsize=4 273 | ) 274 | axes[1, i].axhline(1, linestyle="--", color="grey") 275 | axes[1, i].set_xscale("log") 276 | # if qv2.min() > 0.01: 277 | # axes[1, i].set_yscale("log") 278 | fig.savefig(output_prefix + "channel_mean_variation_noise.svg", **FIG_KWS) 279 | 280 | fig, axes = plt.subplots(1, 2, figsize=(2 * 6.2, 4)) 281 | p = fore_backg.mean(1).sort_values() 282 | r1 = p.rank() 283 | r2 = p.abs().rank() 284 | axes[0].scatter(r1, p) 285 | axes[1].scatter(r2, p.abs()) 286 | for i in p.index: 287 | axes[0].text(r1.loc[i], p.loc[i], s=i, rotation=90, ha="center", va="bottom") 288 | axes[1].text( 289 | r2.loc[i], p.abs().loc[i], s=i, rotation=90, ha="center", va="bottom" 290 | ) 291 | axes[1].set_yscale("log") 292 | axes[0].set_xlabel("Channel rank") 293 | axes[1].set_xlabel("Channel rank") 294 | axes[0].set_ylabel("Cellular/extracellular difference") 295 | axes[1].set_ylabel("Cellular/extracellular difference (abs)") 296 | axes[0].axhline(0, linestyle="--", color="grey") 297 | axes[1].axhline(0, linestyle="--", color="grey") 298 | fig.savefig( 299 | output_prefix + "channel_foreground_background_diff.rankplot.svg", 300 | **FIG_KWS, 301 | ) 302 | 303 | grid = sns.clustermap( 304 | metrics_std, 305 | xticklabels=True, 306 | yticklabels=True, 307 | metric="correlation", 308 | cbar_kws=dict(label="Variable (min-max)"), 309 | ) 310 | grid.fig.savefig( 311 | output_prefix + "channel_mean_variation_noise.clustermap.svg", **FIG_KWS 312 | ) 313 | 314 | # Invert QV2 315 | sel = metrics_std.columns.str.contains("_qv2") 316 | metrics_std.loc[:, sel] = 1 - metrics_std.loc[:, sel] 317 | # TODO: better decision on which metrics matter 318 | return metrics_std.mean(1) 319 | -------------------------------------------------------------------------------- /imc/py.typed: -------------------------------------------------------------------------------- 1 | # Marker file for PEP 561. This package uses inline types. 2 | -------------------------------------------------------------------------------- /imc/scripts/illustrate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Illustrate IMC data. 5 | """ 6 | 7 | import sys 8 | import typing as tp 9 | 10 | from tqdm import tqdm 11 | import matplotlib.pyplot as plt 12 | import scanpy as sc 13 | 14 | from imc import Project 15 | from imc.scripts import build_cli, find_tiffs, find_h5ad 16 | 17 | figkws = dict(dpi=300, bbox_inches="tight") 18 | 19 | 20 | def main(cli: tp.Sequence[str] = None) -> int: 21 | parser = build_cli("illustrate") 22 | args = parser.parse_args(cli) 23 | 24 | if args.tiffs is None: 25 | args.tiffs = find_tiffs() 26 | if len(args.tiffs) == 0: 27 | raise ValueError("Input files were not provided and could not be found!") 28 | 29 | if args.h5ad is None: 30 | args.h5ad = find_h5ad() 31 | if args.h5ad is None: 32 | if args.clusters: 33 | print( 34 | "No h5ad file was provided and it could not be found. " 35 | "Not illustrating clusters." 36 | ) 37 | args.clusters = False 38 | if args.cell_types: 39 | print( 40 | "No h5ad file was provided and it could not be found. " 41 | "Not illustrating cell types." 42 | ) 43 | args.cell_types = False 44 | 45 | print("Starting illustration step!") 46 | 47 | args.channels_include = ( 48 | args.channels_include.split(",") if args.channels_include is not None else None 49 | ) 50 | args.channels_exclude = args.channels_exclude.split(",") 51 | args.output_dir.mkdir() 52 | 53 | prj = Project.from_stacks(args.tiffs) 54 | if args.stacks: 55 | dir_ = (args.output_dir / "stacks").mkdir() 56 | print(f"Plotting full image stacks in directory '{dir_}'.") 57 | for roi in tqdm(prj.rois): 58 | f = dir_ / roi.name + ".full_stack.pdf" 59 | if f.exists() and not args.overwrite: 60 | continue 61 | fig = roi.plot_channels() 62 | fig.savefig(f, **figkws) 63 | plt.close(fig) 64 | 65 | if args.channels: 66 | dir_ = (args.output_dir / "channels").mkdir() 67 | print(f"Plotting channels for all images jointly in directory '{dir_}'.") 68 | for ch in tqdm(prj.rois[0].channel_labels): 69 | f = dir_ / ch + ".rois.pdf" 70 | if f.exists() and not args.overwrite: 71 | continue 72 | fig = prj.plot_channels([ch]) 73 | fig.savefig(f, **figkws) 74 | plt.close(fig) 75 | 76 | id_cols = ["sample", "roi", "obj_id"] 77 | if args.clusters: 78 | dir_ = (args.output_dir / "clusters").mkdir() 79 | print(f"Plotting cluster illustrations in directory '{dir_}'.") 80 | 81 | a = sc.read(args.h5ad) 82 | clusters = a.obs.columns[a.obs.columns.str.contains("cluster_")] 83 | for cluster in tqdm(clusters): 84 | f = dir_ / f"clustering_illustrations.{cluster}.pdf" 85 | if f.exists() and not args.overwrite: 86 | continue 87 | # TODO: plot markers next to clusters, or overlay 88 | prj.set_clusters(a.obs.set_index(id_cols)[cluster].rename("cluster")) 89 | fig = prj.plot_cell_types() 90 | for ax in fig.axes[1:]: 91 | ax.legend_.set_visible(False) 92 | fig.savefig(f, **figkws) 93 | plt.close(fig) 94 | 95 | if args.cell_types: 96 | dir_ = (args.output_dir / "cell_type").mkdir() 97 | print(f"Plotting cell_type illustrations in directory '{dir_}'.") 98 | 99 | a = sc.read(args.h5ad) 100 | cts = a.obs.columns[a.obs.columns.str.contains("cluster_")].intersection( 101 | a.obs.columns[a.obs.columns.str.contains("_label")] 102 | ) 103 | for ct in tqdm(cts): 104 | f = dir_ / f"cell_type_illustrations.{ct}.pdf" 105 | if f.exists() and not args.overwrite: 106 | continue 107 | # TODO: plot markers next to cell types, or overlay 108 | prj.set_clusters(a.obs.set_index(id_cols)[ct].rename("cluster")) 109 | fig = prj.plot_cell_types() 110 | for ax in fig.axes[1:]: 111 | ax.legend_.set_visible(False) 112 | fig.savefig(f, **figkws) 113 | plt.close(fig) 114 | 115 | print("Finished illustration step.") 116 | return 0 117 | 118 | 119 | if __name__ == "__main__": 120 | try: 121 | sys.exit(main()) 122 | except KeyboardInterrupt: 123 | sys.exit(1) 124 | -------------------------------------------------------------------------------- /imc/scripts/inspect_ilastik_model.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import argparse 4 | import sys 5 | import typing as tp 6 | 7 | import h5py 8 | import numpy as np 9 | import pandas as pd 10 | import matplotlib 11 | import matplotlib.pyplot as plt 12 | import seaborn as sns 13 | 14 | from imc.types import Path, Array 15 | from imc.graphics import get_grid_dims 16 | 17 | 18 | matplotlib.rcParams["svg.fonttype"] = "none" 19 | FIG_KWS = dict(dpi=300, bbox_inches="tight") 20 | 21 | 22 | cli = ["_models/utuc-imc/utuc-imc.ilp"] 23 | 24 | 25 | def main(cli: tp.List[str] = None) -> int: 26 | args = parse_arguments().parse_args(cli) 27 | 28 | inspect_ilastik_model(args.model_path) 29 | 30 | if args.plot: 31 | plot_training_data(args.model_path, args.channels_to_plot) 32 | 33 | if args.extract: 34 | extract_training_data(args.model_path, args.labels_output_file) 35 | 36 | if args.convert: 37 | convert_model_data( 38 | args.model_path, 39 | args.converted_model_output, 40 | args.channels_to_retain, 41 | ) 42 | 43 | return 0 44 | 45 | 46 | def parse_arguments() -> argparse.ArgumentParser: 47 | parser = argparse.ArgumentParser() 48 | 49 | # Extract 50 | parser.add_argument( 51 | "-e", 52 | "--extract", 53 | dest="extract", 54 | action="store_true", 55 | help="Whether to extract training labels from ilastik file into numpy array.", 56 | ) 57 | parser.add_argument( 58 | "--labels-output", 59 | dest="labels_output_file", 60 | default=None, 61 | type=Path, 62 | help="Path to file storing numpy array with training labels." 63 | " If not given will be same as model with different suffix.", 64 | ) 65 | 66 | # Plot 67 | parser.add_argument( 68 | "-p", 69 | "--plot", 70 | dest="plot", 71 | action="store_true", 72 | help="Whether training set examples should be plotted.", 73 | ) 74 | parser.add_argument( 75 | "--channels-to-plot", 76 | dest="channels_to_plot", 77 | choices=["mean", "last"], 78 | default="mean", 79 | help="Which channels to plot. One of 'mean' or 'last'.", 80 | ) 81 | 82 | # Convert 83 | parser.add_argument( 84 | "-c", 85 | "--convert", 86 | dest="convert", 87 | action="store_true", 88 | help="Whether to convert ilastik model to new file by changing the input channels.", 89 | ) 90 | parser.add_argument( 91 | "--keep-channels", 92 | dest="channels_to_retain", 93 | nargs="+", 94 | type=int, 95 | help="Channel numbers to retain in new model.", 96 | ) 97 | parser.add_argument( 98 | "--converted-model-output", 99 | dest="converted_model_output", 100 | type=Path, 101 | help="Path to new model output file.", 102 | ) 103 | parser.add_argument(dest="model_path", type=Path) 104 | 105 | return parser 106 | 107 | 108 | def inspect_ilastik_model(model_path: Path) -> None: 109 | print(f"Ilastik model '{model_path}'.") 110 | 111 | f = h5py.File(model_path.as_posix(), mode="r") 112 | 113 | # Input files 114 | # f['Input Data']['infos']['lane0000']['Raw Data']['filePath'][()].decode() 115 | n_input = len(f["Input Data"]["infos"]) 116 | training_files = [ 117 | f["Input Data"]["infos"]["lane" + str(x).zfill(4)]["Raw Data"]["filePath"][ 118 | () 119 | ].decode() 120 | for x in range(n_input) 121 | ] 122 | 123 | print(f"Model was trained with {n_input} files.") 124 | 125 | # Feature matrix 126 | fv = f["FeatureSelections"]["SelectionMatrix"][()] # values 127 | fx = f["FeatureSelections"]["FeatureIds"][()] # x = derivative 128 | fy = f["FeatureSelections"]["Scales"][()] # y = sigma 129 | feature_matrix = pd.DataFrame( 130 | fv, 131 | index=pd.Series(fx, name="Feature").str.decode("utf8"), 132 | columns=pd.Series(fy, name="Sigma"), 133 | ) 134 | used = feature_matrix.values.sum() 135 | total = np.multiply(*feature_matrix.shape) 136 | print(f"{used}/{total} of the possible feature combinations used.") 137 | print("Here is the feature matrix:") 138 | print(feature_matrix, "\n") 139 | 140 | # Pixel classification 141 | # labels = [x.decode() for x in f['PixelClassification']['LabelNames'][()]] 142 | # 3 labels (3 classes?) 143 | # 35 blocks (35 inputs) 144 | # values, shape=(x, y, 1) 145 | annots = [len(x) for x in f["PixelClassification"]["LabelSets"].values()] 146 | filled_annots = [x for x in annots if x != 0] 147 | print(f"{len(filled_annots)}/{n_input} of the input files were labeled.") 148 | 149 | f.close() 150 | 151 | 152 | def plot_training_data( 153 | model_path: Path, 154 | channels_to_plot: tp.Union[tp.Literal["mean"], tp.Literal["last"]] = "mean", 155 | ) -> None: 156 | from imc.segmentation import normalize 157 | 158 | f = h5py.File(model_path.as_posix(), mode="r") 159 | n_input = len(f["Input Data"]["infos"]) 160 | annots = [len(x) for x in f["PixelClassification"]["LabelSets"].values()] 161 | training_files = [ 162 | f["Input Data"]["infos"]["lane" + str(x).zfill(4)]["Raw Data"]["filePath"][ 163 | () 164 | ].decode() 165 | for x in range(n_input) 166 | ] 167 | 168 | # Plot labels on top of sum of channels 169 | n, m = get_grid_dims(len(annots)) 170 | fig, axes = plt.subplots( 171 | m, n, figsize=(n * 3, m * 3), gridspec_kw=dict(wspace=0, hspace=0.05) 172 | ) 173 | axes = axes.ravel() 174 | 175 | # get colormap depending on what channels are being plotted 176 | if channels_to_plot == "mean": 177 | cmap = matplotlib.colors.ListedColormap( 178 | np.asarray(sns.color_palette("tab10"))[np.asarray([-1, 1, 3])] 179 | ) 180 | else: 181 | cmap = matplotlib.colors.ListedColormap( 182 | np.asarray(sns.color_palette("tab10"))[np.asarray([-4, -6, 3])] 183 | ) 184 | 185 | # plot 186 | for i in range(n_input): 187 | if training_files[i].startswith("Input Data"): 188 | train_arr = f[training_files[i]] 189 | else: 190 | train_file = model_path.parent / training_files[i].replace( 191 | "/stacked_channels", "" 192 | ) 193 | train_arr = h5py.File(train_file, mode="r")["stacked_channels"] 194 | 195 | train_arr = train_arr[()] 196 | train_arr[pd.isnull(train_arr)] = 0 197 | 198 | if channels_to_plot == "mean": 199 | train_arr = normalize(train_arr).mean(-1) 200 | else: 201 | train_arr = normalize(train_arr[..., -1]) 202 | training_file_shape = train_arr.shape 203 | 204 | axes[i].imshow(train_arr, rasterized=True) # , cmap='inferno') 205 | # axes[i].set_title(image) 206 | axes[i].axis("off") 207 | 208 | # Now for each block, get coordinates and plot 209 | label_arr = np.zeros(training_file_shape, dtype=float) 210 | # label_arr = scipy.sparse.lil_matrix(training_file_shape) 211 | b = f["PixelClassification"]["LabelSets"]["labels" + str(i).zfill((3))] 212 | for j, label in enumerate(b): 213 | # get start-end coordinates within training image 214 | d = b["block" + str(j).zfill(4)] 215 | pos = dict(d.attrs)["blockSlice"].replace("[", "").replace("]", "").split(",") 216 | xs, ys, zs = [(int(x.split(":")[0]), int(x.split(":")[1])) for x in pos] 217 | arr = d[()].squeeze() 218 | # now fill the image with the labeled pixels 219 | label_arr[slice(*xs), slice(*ys)] = arr 220 | label_arr = np.ma.masked_array(label_arr, label_arr == 0) 221 | axes[i].imshow(label_arr, cmap=cmap, vmin=1, vmax=3, rasterized=True) 222 | fig.savefig( 223 | model_path.replace_(".ilp", f".training_data.{channels_to_plot}.pdf"), 224 | bbox_inches="tight", 225 | dpi=300, 226 | ) 227 | 228 | f.close() 229 | 230 | 231 | def extract_training_data( 232 | model_path: Path, output_path: Path = None 233 | ) -> tp.Tuple[Array, Array]: 234 | # Extract training labels for preservation independent of model 235 | 236 | if output_path is None: 237 | output_path = model_path.replace_(".ilp", ".training_data.npz") 238 | 239 | fi = h5py.File(model_path.as_posix(), mode="r") 240 | 241 | n_input = len(fi["Input Data"]["infos"]) 242 | training_files = [ 243 | fi["Input Data"]["infos"]["lane" + str(x).zfill(4)]["Raw Data"]["filePath"][ 244 | () 245 | ].decode() 246 | for x in range(n_input) 247 | ] 248 | 249 | # Extract arrays 250 | _signals = list() 251 | _labels = list() 252 | for i, file in enumerate(training_files): 253 | if file.startswith("Input Data"): 254 | train_arr = fi[file] 255 | else: 256 | train_file = model_path.parent / file.replace("/stacked_channels", "") 257 | train_arr = h5py.File(train_file, mode="r")["stacked_channels"] 258 | shape = train_arr.shape[:-1] 259 | 260 | # Now for each block, get coordinates and assemble 261 | label_arr = np.zeros(shape, dtype=float) 262 | b = fi["PixelClassification"]["LabelSets"]["labels" + str(i).zfill((3))] 263 | for j, _ in enumerate(b): 264 | # get start-end coordinates within training image 265 | d = b["block" + str(j).zfill(4)] 266 | pos = dict(d.attrs)["blockSlice"].replace("[", "").replace("]", "").split(",") 267 | xs, ys, _ = [(int(x.split(":")[0]), int(x.split(":")[1])) for x in pos] 268 | arr = d[()].squeeze() 269 | # now fill the image with the labeled pixels 270 | label_arr[slice(*xs), slice(*ys)] = arr 271 | 272 | _signals.append(train_arr[()]) 273 | _labels.append(label_arr) 274 | fi.close() 275 | 276 | # Save as numpy array 277 | signals = np.asarray(_signals) 278 | labels = np.asarray(_labels) 279 | np.savez_compressed(output_path, x=signals, y=labels) 280 | return (signals, labels) 281 | 282 | 283 | def convert_model_data( 284 | input_model_path: Path, 285 | output_model_path: Path, 286 | channels_to_retain: tp.List[int] = [-1], 287 | ) -> None: 288 | # For now this will assume all files were copied into H5 model 289 | # TODO: implement copying of h5 files with suffix if referenced to disk paths 290 | 291 | # After this, model should be reloaded in ilastik, 292 | # change one pixel in the training data and re-train 293 | 294 | if output_model_path is None: 295 | output_model_path = input_model_path.replace_(".ilp", ".converted.ilp") 296 | 297 | with open(output_model_path, "wb") as handle: 298 | handle.write(open(input_model_path, "rb").read()) 299 | 300 | f = h5py.File(output_model_path.as_posix(), mode="r+") 301 | 302 | shape = [v.shape for k, v in f["Input Data"]["local_data"].items()][0] 303 | print(f"Current shape of input data: {shape}") 304 | 305 | # Change shape of input data 306 | for k, v in f["Input Data"]["local_data"].items(): 307 | del f["Input Data"]["local_data"][k] 308 | from imc.segmentation import normalize 309 | 310 | f["Input Data"]["local_data"][k] = normalize(v[()][..., channels_to_retain]) 311 | 312 | shape = [v.shape for k, v in f["Input Data"]["local_data"].items()][0] 313 | print(f"Current shape of input data: {shape}") 314 | 315 | f.close() 316 | 317 | 318 | if __name__ == "__main__": 319 | try: 320 | sys.exit(main()) 321 | except KeyboardInterrupt: 322 | sys.exit(1) 323 | -------------------------------------------------------------------------------- /imc/scripts/inspect_mcds.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Inspect MCD files, reporting on their basic statistics, saving 5 | metadata as YAML files, and panel information as CSV files. 6 | """ 7 | 8 | import sys 9 | import yaml 10 | import argparse 11 | from collections import OrderedDict 12 | import typing as tp 13 | 14 | import pandas as pd 15 | 16 | from imctools.io.mcd.mcdparser import McdParser 17 | 18 | from imc.types import Path, DataFrame, Args 19 | from imc.utils import cleanup_channel_names, build_channel_name 20 | from imc.scripts import build_cli, find_mcds 21 | 22 | 23 | def main(cli: tp.Sequence[str] = None) -> int: 24 | parser = build_cli("inspect") 25 | args = parser.parse_args(cli) 26 | if len(args.mcd_files) == 0: 27 | args.mcd_files = find_mcds() 28 | if len(args.mcd_files) == 0: 29 | print("MCD files were not provided and could not be found!") 30 | return 1 31 | 32 | fs = "\n\t- " + "\n\t- ".join([f.as_posix() for f in args.mcd_files]) 33 | print(f"Starting inspection step for {len(args.mcd_files)} MCD files:{fs}!") 34 | 35 | # Inspect each MCD 36 | metas = dict() 37 | _chs = list() 38 | for mcd_file in args.mcd_files: 39 | print(f"\tAnalyzing '{mcd_file}':") 40 | meta, ch = inspect_mcd(mcd_file, args) 41 | metas[mcd_file.as_posix()] = meta 42 | _chs.append(ch.assign(mcd_file=mcd_file)) 43 | print(f"\tFinished with '{mcd_file}'!") 44 | 45 | # Dump joint metadata 46 | if not args.no_write: 47 | yaml.dump( 48 | encode(metas), 49 | open(args.output_prefix + ".all_mcds.yaml", "w"), 50 | indent=4, 51 | default_flow_style=False, 52 | sort_keys=False, 53 | ) 54 | 55 | # Save joint panel info 56 | # join panels and reorder columns 57 | channels = pd.concat(_chs) 58 | channels = channels.reset_index().reindex( 59 | ["mcd_file", "channel"] + ch.columns.tolist(), axis=1 60 | ) 61 | # check if more than one panel present 62 | n_panels = channels.groupby("mcd_file")["channel"].sum().nunique() 63 | if n_panels == 1: 64 | print("All MCD files use same panel.") 65 | else: 66 | print(f"MCD files use different panels, {n_panels} in total.") 67 | 68 | if not args.no_write: 69 | channels.to_csv(args.output_prefix + ".all_mcds.channel_labels.csv", index=False) 70 | 71 | print("Finished inspect step!") 72 | return 0 73 | 74 | 75 | def inspect_mcd(mcd_file: Path, args: Args) -> tp.Tuple[DataFrame, DataFrame]: 76 | cols = [ 77 | "Target", 78 | "Metal_Tag", 79 | "Atom", 80 | "full", 81 | "ilastik", 82 | ] 83 | exclude_channels = ["EMPTY", "190BCKG", "80Ar", "89Y", "127I", "124Xe"] 84 | 85 | mcd = McdParser(mcd_file) 86 | session = mcd.session 87 | 88 | # get channel labels 89 | ac_ids = session.acquisition_ids 90 | labels = pd.DataFrame( 91 | { 92 | # ac_id: pd.Series(cleanup_channel_names( 93 | # session.acquisitions[ac_id].channel_labels 94 | # ).values, index=session.acquisitions[ac_id].channel_masses) 95 | ac_id: cleanup_channel_names(session.acquisitions[ac_id].channel_labels) 96 | for ac_id in ac_ids 97 | } 98 | ) 99 | # the below fails if ROIs have different lengths of metals 100 | # metals = pd.DataFrame( 101 | # {ac_id: session.acquisitions[ac_id].channel_names for ac_id in ac_ids} 102 | # ) 103 | metals = pd.DataFrame( 104 | [ 105 | pd.Series(session.acquisitions[ac_id].channel_names, name=ac_id) 106 | for ac_id in ac_ids 107 | ] 108 | ).T 109 | if metals.isnull().any().any(): 110 | print( 111 | "Some ROIs have less metals than the others. " 112 | "Keeping only ROIs with most metals." 113 | ) 114 | metals = metals.loc[:, ~metals.isnull().any()] 115 | 116 | labels = labels.reindex(metals.columns, axis=1) 117 | 118 | channel_names = labels.replace({None: ""}) + "(" + metals + ")" 119 | 120 | same_channels = bool( 121 | channel_names.nunique(1).replace(0, 1).all() 122 | ) # np.bool is not serializable 123 | 124 | if same_channels: 125 | print("\t * All ROIs have the same markers/metals.") 126 | ch = channel_names.iloc[:, 0].rename("channel") 127 | ids = ch.str.extract(r"(?P.*)\((?P.*)\)") 128 | ids.index = ch 129 | 130 | annot = pd.DataFrame(ids, columns=cols) 131 | annot["Atom"] = annot["Metal_Tag"].str.extract(r"(\d+)")[0] 132 | annot["full"] = (~annot.index.str.contains("|".join(exclude_channels))).astype( 133 | int 134 | ) 135 | annot["ilastik"] = ( 136 | annot.index.str.contains("DNA") | annot.index.str.startswith("CD") 137 | ).astype(int) 138 | if not args.no_write: 139 | annot.to_csv(mcd_file.replace_(".mcd", ".channel_labels.csv")) 140 | else: 141 | annot = pd.DataFrame(columns=cols) 142 | print("\t * ROIs have different markers/metals.") 143 | 144 | # Save some metadata 145 | meta = session.get_csv_dict() 146 | meta["n_slides"] = len(session.slides) 147 | print(f"\t * Contains {meta['n_slides']} slides.") 148 | meta["n_panoramas"] = len(session.panoramas) 149 | print(f"\t * Contains {meta['n_panoramas']} panoramas.") 150 | meta["n_ROIs"] = len(session.acquisition_ids) 151 | print(f"\t * Contains {meta['n_ROIs']} ROIs.") 152 | meta["ROI_numbers"] = session.acquisition_ids 153 | meta["all_ROIs_same_channels"] = same_channels 154 | meta["consensus_channels"] = ( 155 | channel_names.iloc[:, 0].to_dict() if same_channels else None 156 | ) 157 | meta["panoramas"] = {p: v.get_csv_dict() for p, v in session.panoramas.items()} 158 | meta["acquisitions"] = { 159 | a: ac.get_csv_dict() for a, ac in session.acquisitions.items() 160 | } 161 | meta.update(session.metadata) 162 | if not args.no_write: 163 | yaml.dump( 164 | encode(meta), 165 | open(mcd_file.replace_(".mcd", ".session_metadata.yaml"), "w"), 166 | indent=4, 167 | default_flow_style=False, 168 | sort_keys=False, 169 | ) 170 | 171 | mcd.close() 172 | return meta, annot 173 | 174 | 175 | def encode(obj: tp.Any) -> tp.Any: 176 | """ 177 | For serializing to JSON or YAML with no special Python object references. 178 | 179 | Not fit for roundtrip! 180 | """ 181 | if isinstance(obj, bool): 182 | return str(obj).lower() 183 | if isinstance(obj, (list, tuple)): 184 | return [encode(item) for item in obj] 185 | if isinstance(obj, (dict, OrderedDict)): 186 | return {encode(key): encode(value) for key, value in obj.items()} 187 | return obj 188 | 189 | 190 | if __name__ == "__main__": 191 | try: 192 | sys.exit(main()) 193 | except KeyboardInterrupt: 194 | sys.exit(1) 195 | -------------------------------------------------------------------------------- /imc/scripts/phenotype.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Phenotype cells. 5 | """ 6 | 7 | import sys 8 | import typing as tp 9 | 10 | import pandas as pd 11 | 12 | from imc.ops.clustering import ( 13 | phenotyping, 14 | # plot_phenotyping, 15 | predict_cell_types_from_reference, 16 | ) 17 | from imc.scripts import build_cli 18 | from imc.utils import filter_kwargs_by_callable 19 | 20 | 21 | def main(cli: tp.Sequence[str] = None) -> int: 22 | parser = build_cli("phenotype") 23 | args = parser.parse_args(cli) 24 | print("Starting phenotyping step!") 25 | 26 | args.channels_include = ( 27 | args.channels_include.split(",") if args.channels_include is not None else None 28 | ) 29 | args.channels_exclude = args.channels_exclude.split(",") 30 | args.dim_res_algos = args.dim_res_algos.split(",") 31 | args.clustering_resolutions = list(map(float, args.clustering_resolutions.split(","))) 32 | args.output_dir.mkdir() 33 | 34 | if args.compute: 35 | print(f"Phenotyping quantified cells in '{args.a}'.") 36 | pkwargs = filter_kwargs_by_callable(args.__dict__, phenotyping) 37 | a = phenotyping(**pkwargs) 38 | a.write(args.output_dir / "processed.h5ad") 39 | # Save for project: 40 | # prj.get_input_filename("cell_cluster_assignments") 41 | 42 | # Cell type identity 43 | # TODO: connect options to CLI 44 | print("Matching expression to reference cell types.") 45 | df = a.raw.to_adata().to_df()[a.var.index[~a.var.index.str.contains("EMPTY")]] 46 | df = df.loc[:, df.var() > 0] 47 | cov = pd.get_dummies(a.obs[args.batch_variable]) 48 | preds = predict_cell_types_from_reference(df, args.output_dir, covariates=cov) 49 | a.obs = a.obs.join(preds) 50 | a.write(args.output_dir / "processed.h5ad") 51 | 52 | # grid = clustermap(a.to_df().groupby(a.obs['cell_type']).mean()) 53 | # grid = clustermap(a.obs.corr(), cmap='RdBu_r', center=0) 54 | 55 | # if args.plot: 56 | # print(f"Plotting phenotypes in directory '{args.output_dir}'.") 57 | # output_prefix = args.output_dir / "phenotypes." 58 | # if args.compute: 59 | # args.a = a 60 | # pkwargs = filter_kwargs_by_callable(args.__dict__, plot_phenotyping) 61 | # plot_phenotyping(output_prefix=output_prefix, **pkwargs) 62 | 63 | print("Finished phenotyping step.") 64 | return 0 65 | 66 | 67 | if __name__ == "__main__": 68 | try: 69 | sys.exit(main()) 70 | except KeyboardInterrupt: 71 | sys.exit(1) 72 | -------------------------------------------------------------------------------- /imc/scripts/predict.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Generate probablity maps for each pixel in each image. 5 | """ 6 | 7 | import sys 8 | import typing as tp 9 | 10 | from imc import ROI 11 | from imc.types import Path 12 | from imc.scripts import build_cli, find_tiffs 13 | from imc.utils import download_file, run_shell_command 14 | 15 | 16 | def main(cli: tp.Sequence[str] = None) -> int: 17 | """Generate probability maps for each ROI using ilastik.""" 18 | parser = build_cli("predict") 19 | args = parser.parse_args(cli) 20 | if not args.tiffs: 21 | args.tiffs = find_tiffs() 22 | if not args.tiffs: 23 | print("Input files were not provided and cannot be found!") 24 | return 1 25 | 26 | fs = "\n\t- " + "\n\t- ".join([f.as_posix() for f in args.tiffs]) 27 | print(f"Starting predict step for {len(args.tiffs)} TIFF files:{fs}!") 28 | 29 | # Prepare ROI objects 30 | rois = list() 31 | for tiff in args.tiffs: 32 | roi = ROI.from_stack(tiff) 33 | out = roi.get_input_filename("probabilities") 34 | if not args.overwrite and out.exists(): 35 | continue 36 | rois.append(roi) 37 | 38 | if not rois: 39 | print("All output predictions exist. Skipping prediction step.") 40 | return 0 41 | 42 | # Get resources 43 | ilastik_sh = get_ilastik(args.lib_dir) 44 | if args.custom_model is None: 45 | model_ilp = get_model(args.models_dir, args.ilastik_model_version) 46 | else: 47 | model_ilp = args.custom_model 48 | 49 | # Predict 50 | print("Starting ilastik pixel classification.") 51 | tiff_files = [roi.get_input_filename("ilastik_input") for roi in rois] 52 | predict_with_ilastik(tiff_files, ilastik_sh, model_ilp, args.quiet) 53 | 54 | for roi in rois: 55 | _in = roi.root_dir / roi.name + "_ilastik_s2_Probabilities.tiff" 56 | if _in.exists(): 57 | _in.rename(roi.get_input_filename("probabilities")) 58 | 59 | if args.cleanup: 60 | for roi in rois: 61 | roi.get_input_filename("ilastik_input").unlink() 62 | 63 | print("Finished predict step!") 64 | return 0 65 | 66 | 67 | def predict_with_ilastik( 68 | tiff_files: tp.Sequence[Path], ilastik_sh: Path, model_ilp: Path, quiet: bool = True 69 | ) -> int: 70 | """ 71 | Use a trained ilastik model to classify pixels in an IMC image. 72 | """ 73 | quiet_arg = "\n --redirect_output /dev/null \\" if quiet else "" 74 | cmd = f"""{ilastik_sh} \\ 75 | --headless \\ 76 | --readonly \\ 77 | --export_source probabilities \\{quiet_arg} 78 | --project {model_ilp} \\ 79 | """ 80 | # Shell expansion of input files won't happen in subprocess call 81 | cmd += " ".join([x.replace_(" ", r"\ ").as_posix() for x in tiff_files]) 82 | return run_shell_command(cmd, quiet=True) 83 | 84 | 85 | def get_ilastik(lib_dir: Path, version: str = "1.3.3post2") -> Path: 86 | """Download ilastik software.""" 87 | import tarfile 88 | 89 | base_url = "https://files.ilastik.org/" 90 | 91 | if sys.platform.startswith("linux"): 92 | _os = "Linux" 93 | file = f"ilastik-{version}-{_os}.tar.bz2" 94 | f = lib_dir / f"ilastik-{version}-{_os}" / "run_ilastik.sh" 95 | elif sys.platform.startswith("darwin"): 96 | _os = "OSX" 97 | file = f"ilastik-{version}-{_os}.tar.bz2" 98 | f = ( 99 | lib_dir 100 | / f"ilastik-{version}-{_os}.app" 101 | / "Contents" 102 | / "ilastik-release" 103 | / "run_ilastik.sh" 104 | ) 105 | else: 106 | raise NotImplementedError( 107 | "ilastik command line use is only available for Linux and MacOS!" 108 | ) 109 | 110 | if not f.exists(): 111 | lib_dir.mkdir() 112 | print("Downloading ilastik archive.") 113 | download_file(base_url + file, lib_dir / file) 114 | print("Extracting ilastik archive.") 115 | with tarfile.open(lib_dir / file, "r:bz2") as tar: 116 | tar.extractall(lib_dir) 117 | (lib_dir / file).unlink() 118 | return f 119 | 120 | 121 | def get_model(models_dir: Path, version: str = "20210302") -> Path: 122 | """Download pre-trained ilastik model.""" 123 | import tarfile 124 | 125 | versions = { 126 | "20210302": "https://wcm.box.com/shared/static/1q41oshxe76b1uzt1b12etbq3l5dyov4.ilp" 127 | } 128 | 129 | url = versions[version] 130 | file = f"pan_dataset.{version}.ilp" 131 | 132 | f = models_dir / file 133 | if not f.exists(): 134 | models_dir.mkdir() 135 | print("Downloading ilastik model.") 136 | download_file(url, f) 137 | return f 138 | 139 | 140 | if __name__ == "__main__": 141 | try: 142 | sys.exit(main()) 143 | except KeyboardInterrupt: 144 | sys.exit(1) 145 | -------------------------------------------------------------------------------- /imc/scripts/prepare.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Convert MCD files to TIFF and Sample/ROI structure. 5 | """ 6 | 7 | import sys 8 | import typing as tp 9 | 10 | import numpy as np 11 | import tifffile 12 | 13 | from imc import ROI 14 | from imc.scripts import build_cli 15 | from imc.segmentation import prepare_stack 16 | from imc.utils import ( 17 | mcd_to_dir, 18 | plot_panoramas_rois, 19 | stack_to_ilastik_h5, 20 | txt_to_tiff, 21 | filter_kwargs_by_callable, 22 | ) 23 | 24 | 25 | MCD_FILE_ENDINGS = (".mcd", ".MCD") 26 | TIFF_FILE_ENDINGS = (".tiff", ".TIFF", ".tif", ".TIF") 27 | TXT_FILE_ENDINGS = (".txt", ".TXT") 28 | 29 | 30 | def main(cli: tp.Sequence[str] = None) -> int: 31 | parser = build_cli("prepare") 32 | args = parser.parse_args(cli) 33 | 34 | if not args.pannel_csvs: 35 | args.pannel_csvs = [None] * len(args.input_files) 36 | elif len(args.pannel_csvs) == 1: 37 | args.pannel_csvs = args.pannel_csvs * len(args.input_files) 38 | else: 39 | assert len(args.input_files) == len(args.pannel_csvs) 40 | 41 | if (args.sample_names is None) or (len(args.input_files) != len(args.sample_names)): 42 | args.sample_names = [None] * len(args.input_files) 43 | 44 | args.compression = getattr(tifffile.TIFF.COMPRESSION, args.compression) 45 | 46 | mcds = [file for file in args.input_files if file.endswith(MCD_FILE_ENDINGS)] 47 | tiffs = [file for file in args.input_files if file.endswith(TIFF_FILE_ENDINGS)] 48 | txts = [file for file in args.input_files if file.endswith(TXT_FILE_ENDINGS)] 49 | if mcds and (tiffs or txts): 50 | raise ValueError( 51 | "Mixture of MCD and TIFFs/TXTs were given. " 52 | "Not yet supported, please run prepare step for each file type separately." 53 | ) 54 | 55 | if not args.quiet: 56 | ... 57 | 58 | fs = "\n\t- " + "\n\t- ".join([f.as_posix() for f in args.input_files]) 59 | print(f"Starting prepare step for {len(args.input_files)} files:{fs}!") 60 | 61 | for mcd_file, pannel_csv, sample_name in zip( 62 | mcds, args.pannel_csvs, args.sample_names 63 | ): 64 | sargs = args.__dict__.copy() 65 | sargs["mcd_file"] = mcd_file 66 | sargs["pannel_csv"] = pannel_csv 67 | sargs["sample_name"] = sample_name 68 | sargs["output_dir"] = args.root_output_dir / mcd_file.stem 69 | sargs = {k: v for k, v in sargs.items() if v is not None} 70 | sargs = filter_kwargs_by_callable(sargs, mcd_to_dir) 71 | 72 | print(f"Started analyzing '{mcd_file}'.") 73 | mcd_to_dir(**sargs) 74 | 75 | # Plot ROI positions on panoramas and slide 76 | plot_panoramas_rois( 77 | yaml_spec=mcd_file.replace_(".mcd", ".session_metadata.yaml"), 78 | output_prefix=args.root_output_dir / mcd_file.stem / mcd_file.stem + ".", 79 | panorama_image_prefix=args.root_output_dir / mcd_file.stem / "Panorama_", 80 | save_roi_arrays=False, 81 | overwrite=args.overwrite, 82 | ) 83 | print(f"Finished with '{mcd_file}'.") 84 | 85 | for txt in txts: 86 | print(f"Preparing TXT file: '{txt}'.") 87 | name = txt.name.replace(".txt", "") 88 | tiff_f = args.root_output_dir / name / "tiffs" / name + "_full.tiff" 89 | tiff_f.parent.mkdir() 90 | txt_to_tiff(txt, tiff_f, write_channel_labels=True) 91 | tiffs.append(tiff_f) 92 | 93 | for tiff in tiffs: 94 | roi = ROI.from_stack(tiff) 95 | ilastik_input = tiff.replace_("_full.tiff", "_ilastik_s2.h5") 96 | if (not ilastik_input.exists()) or args.overwrite: 97 | print(f"Preparing TIFF file: '{tiff}'.") 98 | s = prepare_stack(roi.stack, roi.channel_labels) 99 | _ = stack_to_ilastik_h5(s[np.newaxis, ...], ilastik_input) 100 | 101 | print("Finished prepare step!") 102 | return 0 103 | 104 | 105 | if __name__ == "__main__": 106 | try: 107 | sys.exit(main()) 108 | except KeyboardInterrupt: 109 | sys.exit(1) 110 | -------------------------------------------------------------------------------- /imc/scripts/process.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Process raw IMC files end-to-end. 5 | """ 6 | 7 | import sys 8 | import typing as tp 9 | import json 10 | from collections import defaultdict 11 | import time 12 | import warnings 13 | 14 | from urlpath import URL 15 | 16 | from imc.types import Path 17 | from imc.scripts import build_cli, find_mcds, find_tiffs 18 | from imc.scripts.inspect_mcds import main as inspect 19 | from imc.scripts.prepare import main as prepare 20 | from imc.scripts.predict import main as predict 21 | from imc.scripts.segment_stacks import main as segment 22 | from imc.scripts.quantify import main as quantify 23 | from imc.scripts.phenotype import main as phenotype 24 | from imc.utils import download_file 25 | 26 | 27 | DATA_DIR = Path("data") 28 | PROCESSED_DIR = Path("processed") 29 | MCD_FILE_ENDINGS = (".mcd", ".MCD") 30 | TIFF_FILE_ENDINGS = (".tiff", ".TIFF", ".tif", ".TIF") 31 | TXT_FILE_ENDINGS = (".txt", ".TXT") 32 | DEFAULT_STEP_ARGS = { 33 | "prepare": ["--ilastik", "--n-crops", "0", "--ilastik-compartment", "nuclear"], 34 | "segment": ["--from-probabilities", "--model", "deepcell", "--compartment", "both"], 35 | } 36 | process_step_order = ["inspect", "prepare", "predict", "segment", "quantify", "phenotype"] 37 | opts = defaultdict(list) 38 | for k, v in DEFAULT_STEP_ARGS.items(): 39 | opts[k] = v 40 | 41 | 42 | def main(cli: tp.Sequence[str] = None) -> int: 43 | parser = build_cli("process") 44 | args = parser.parse_args(cli) 45 | 46 | if args.quiet: 47 | warnings.filterwarnings("ignore") 48 | 49 | if not args.files: 50 | print( 51 | "No input files were given, " 52 | "searching for MCD files under current directory." 53 | ) 54 | args.files = find_mcds() 55 | if not args.files: 56 | print("No MCD files found. Searching for TIFF files.") 57 | args.files = find_tiffs() 58 | if not args.files: 59 | print( 60 | "No input files could be found. Specify them manually: " 61 | "`imc process $FILE`." 62 | ) 63 | return 1 64 | 65 | # If provided URLs, download files 66 | urls = list(map(URL, filter(is_url, args.files))) 67 | args.files = list(filter(lambda x: not is_url(x), args.files)) 68 | args.files = [Path(x).absolute().resolve() for x in args.files] 69 | 70 | missing = [f for f in args.files if not f.exists()] 71 | if missing: 72 | fs = "\n\t- ".join(map(str, missing)) 73 | print(f"Could not find the following input files:\n\t- {fs}") 74 | return 1 75 | 76 | for url in urls: 77 | print("Given URLs as input, will download.") 78 | if url.name.endswith(MCD_FILE_ENDINGS): 79 | f = DATA_DIR.mkdir() / url.name 80 | elif url.name.endswith(TIFF_FILE_ENDINGS): 81 | f = PROCESSED_DIR.mkdir() / url.name 82 | elif url.name.endswith(TXT_FILE_ENDINGS): 83 | f = DATA_DIR.mkdir() / url.name 84 | if not f.exists(): 85 | print(f"Downloading file '{url}' into '{f}'.") 86 | download_file(url.as_posix(), f) 87 | print("Completed.") 88 | args.files.append(f) 89 | 90 | # Figure out which steps are going to be done 91 | if args.steps is None: 92 | args.steps = process_step_order 93 | else: 94 | args.steps = args.steps.split(",") 95 | assert all(x in process_step_order for x in args.steps) 96 | if args.start_step is not None: 97 | args.steps = args.steps[args.steps.index(args.start_step) :] 98 | if args.stop_step is not None: 99 | args.steps = args.steps[: args.steps.index(args.stop_step) + 1] 100 | 101 | # Load config 102 | if args.config is not None: 103 | with open(args.config) as h: 104 | opts.update(json.load(h)) 105 | 106 | fs = "\n\t- " + "\n\t- ".join([f.as_posix() for f in args.files]) 107 | print(f"Starting processing of {len(args.files)} files:{fs}!") 108 | steps_s = "\n\t- ".join(args.steps) 109 | print(f"Will do following steps:\n\t- {steps_s}\n") 110 | time.sleep(1) 111 | 112 | mcds = [file for file in args.files if file.endswith(MCD_FILE_ENDINGS)] 113 | mcds_s = list(map(str, mcds)) 114 | tiffs = [file for file in args.files if file.endswith(TIFF_FILE_ENDINGS)] 115 | tiffs_s = list(map(str, tiffs)) 116 | txts = [file for file in args.files if file.endswith(TXT_FILE_ENDINGS)] 117 | txts_s = list(map(str, txts)) 118 | if "inspect" in args.steps and mcds: 119 | inspect(opts["inspect"] + mcds_s) 120 | if "prepare" in args.steps: 121 | prepare(opts["prepare"] + mcds_s + tiffs_s + txts_s) 122 | 123 | # Now run remaining for all 124 | new_tiffs = list() 125 | for mcd in mcds: 126 | new_tiffs += list( 127 | (PROCESSED_DIR / mcd.stem / "tiffs").glob(f"{mcd.stem}*_full.tiff") 128 | ) 129 | for txt in txts: 130 | name = txt.name.replace(".txt", "") 131 | tiff_f = PROCESSED_DIR / name / "tiffs" / name + "_full.tiff" 132 | new_tiffs += [tiff_f] 133 | tiffs = sorted(list(map(str, set(tiffs + new_tiffs)))) 134 | 135 | s_parser = build_cli("segment") 136 | s_args = s_parser.parse_args(opts["segment"] + tiffs) 137 | reason = ( 138 | f"Skipping predict step as segmentation model '{s_args.model}' does not need it." 139 | ) 140 | if "predict" in args.steps: 141 | if s_args.model == "deepcell": 142 | out = predict(opts["predict"] + tiffs) 143 | if out: 144 | return out 145 | else: 146 | print(reason) 147 | if "segment" in args.steps: 148 | segment(opts["segment"] + tiffs) 149 | if "quantify" in args.steps: 150 | quantify(opts["quantify"] + tiffs) 151 | h5ad_f = "processed/quantification.h5ad" 152 | if "phenotype" in args.steps: 153 | phenotype(opts["phenotype"] + [h5ad_f]) 154 | 155 | print("Finished processing!") 156 | return 0 157 | 158 | 159 | def is_url(x: str) -> bool: 160 | from urllib.parse import urlparse 161 | 162 | if isinstance(x, Path): 163 | x = x.as_posix() 164 | 165 | try: 166 | result = urlparse(x) 167 | return all([result.scheme, result.netloc]) 168 | except: 169 | return False 170 | 171 | 172 | if __name__ == "__main__": 173 | try: 174 | sys.exit(main()) 175 | except KeyboardInterrupt: 176 | sys.exit(1) 177 | -------------------------------------------------------------------------------- /imc/scripts/quantify.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Quantify images in stacks. 5 | """ 6 | 7 | import sys 8 | import typing as tp 9 | 10 | import numpy as np 11 | import anndata 12 | 13 | from imc import ROI 14 | from imc.types import Path 15 | from imc.ops.quant import quantify_cells_rois 16 | from imc.scripts import build_cli, find_tiffs 17 | 18 | def main(cli: tp.Sequence[str] = None) -> int: 19 | parser = build_cli("quantify") 20 | args = parser.parse_args(cli) 21 | if not args.tiffs: 22 | args.tiffs = sorted(find_tiffs()) 23 | if not args.tiffs: 24 | print("Input files were not provided and cannot be found!") 25 | return 1 26 | 27 | fs = "\n\t- " + "\n\t- ".join([f.as_posix() for f in args.tiffs]) 28 | print(f"Starting quantification step for {len(args.tiffs)} TIFF files:{fs}!") 29 | 30 | # Prepare ROI objects 31 | rois = list() 32 | for tiff in args.tiffs: 33 | roi = ROI.from_stack(tiff) 34 | roi.set_channel_exclude(args.channel_exclude.split(",")) 35 | rois.append(roi) 36 | 37 | missing = [r.name for r in rois if not r.get_input_filename("stack").exists()] 38 | if missing: 39 | m = "\n\t- ".join(missing) 40 | error = f"Not all stacks exist! Missing:\n\t- {m}" 41 | raise ValueError(error) 42 | missing = [r.name for r in rois if not r.get_input_filename("cell_mask").exists()] 43 | if missing: 44 | m = "\n\t- ".join(missing) 45 | error = f"Not all cell masks exist! Missing:\n\t- {m}" 46 | raise ValueError(error) 47 | 48 | quant = quantify_cells_rois( 49 | rois, args.layers.split(","), morphology=args.morphology 50 | ).reset_index() 51 | 52 | # reorder columns for nice effect 53 | ext = ["roi", "obj_id"] + (["X_centroid", "Y_centroid"] if args.morphology else []) 54 | rem = [x for x in quant.columns if x not in ext] 55 | quant = quant[ext + rem] 56 | 57 | if args.output is None: 58 | f = Path("processed").mkdir() / "quantification.csv.gz" 59 | else: 60 | f = Path(args.output) 61 | quant.to_csv(f, index=False) 62 | print(f"Wrote CSV file to '{f.absolute()}'.") 63 | 64 | if args.output_h5ad: 65 | v = len(str(quant["obj_id"].max())) 66 | idx = quant["roi"] + "-" + quant["obj_id"].astype(str).str.zfill(v) 67 | quant.index = idx 68 | 69 | cols = ["sample", "roi", "obj_id", "X_centroid", "Y_centroid", "layer"] 70 | cols = [c for c in cols if c in quant.columns] 71 | ann = anndata.AnnData( 72 | quant.drop(cols, axis=1, errors="ignore").astype(float), obs=quant[cols] 73 | ) 74 | if "X_centroid" in ann.obs.columns: 75 | ann.obsm["spatial"] = ann.obs[["Y_centroid", "X_centroid"]].values 76 | f = f.replace_(".csv.gz", ".h5ad") 77 | ann.write(f) 78 | print(f"Wrote h5ad file to '{f.absolute()}'.") 79 | ann2 = anndata.read(f) 80 | assert np.allclose(ann.X, ann2.X) 81 | 82 | print("Finished quantification step.") 83 | return 0 84 | 85 | 86 | if __name__ == "__main__": 87 | try: 88 | sys.exit(main()) 89 | except KeyboardInterrupt: 90 | sys.exit(1) 91 | -------------------------------------------------------------------------------- /imc/scripts/segment_stacks.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Segment image stacks. 5 | """ 6 | 7 | import sys 8 | import argparse 9 | import typing as tp 10 | from dataclasses import dataclass 11 | 12 | import numpy as np 13 | import pandas as pd 14 | import tifffile 15 | import matplotlib.pyplot as plt 16 | 17 | from imc import ROI 18 | from imc.types import Path, Series, Array 19 | from imc.segmentation import segment_roi, plot_image_and_mask 20 | from imc.scripts import build_cli, find_tiffs 21 | 22 | 23 | def main(cli: tp.Sequence[str] = None) -> int: 24 | parser = build_cli("segment") 25 | args = parser.parse_args(cli) 26 | if len(args.tiffs) == 0: 27 | args.tiffs = find_tiffs() 28 | if len(args.tiffs) == 0: 29 | print("TIFF files were not provided and could not be found!") 30 | return 1 31 | 32 | fs = "\n\t- " + "\n\t- ".join([f.as_posix() for f in args.tiffs]) 33 | print(f"Starting segmentation step for {len(args.tiffs)} TIFF files:{fs}!") 34 | 35 | # Prepare ROI objects 36 | rois = list() 37 | for tiff in args.tiffs: 38 | roi = ROI.from_stack(tiff) 39 | roi.set_channel_exclude(args.channel_exclude.split(",")) 40 | rois.append(roi) 41 | 42 | # Run segmentation 43 | for roi in rois: 44 | if args.compartment == "both": 45 | mask_files = { 46 | "cell": roi.get_input_filename("cell_mask"), 47 | "nuclei": roi.get_input_filename("nuclei_mask"), 48 | } 49 | else: 50 | mask_files = { 51 | args.compartment: roi.get_input_filename(args.compartment + "_mask") 52 | } 53 | exists = all(f.exists() for f in mask_files.values()) 54 | if exists and not args.overwrite: 55 | print(f"Mask for '{roi}' already exists, skipping...") 56 | continue 57 | 58 | print(f"Started segmentation of '{roi} with shape: '{roi.stack.shape}'") 59 | try: 60 | _ = segment_roi( 61 | roi, 62 | from_probabilities=args.from_probabilities, 63 | model=args.model, 64 | compartment=args.compartment, 65 | postprocessing=args.postprocessing, 66 | save=args.save, 67 | overwrite=args.overwrite, 68 | plot_segmentation=args.plot, 69 | verbose=not args.quiet, 70 | ) 71 | except ValueError as e: 72 | print("Error segmenting stack. Perhaps XY shape is not compatible?") 73 | print(e) 74 | continue 75 | print(f"Finished segmentation of '{roi}'.") 76 | 77 | print("Finished segmentation step!") 78 | return 0 79 | 80 | 81 | if __name__ == "__main__": 82 | try: 83 | sys.exit(main()) 84 | except KeyboardInterrupt: 85 | sys.exit(1) 86 | -------------------------------------------------------------------------------- /imc/scripts/view.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | View multiplexed TIFF files interactively. 5 | """ 6 | 7 | import sys 8 | import time 9 | import typing as tp 10 | 11 | import matplotlib.pyplot as plt 12 | 13 | from imc import ROI 14 | from imc.graphics import InteractiveViewer 15 | from imc.scripts import build_cli 16 | 17 | 18 | def main(cli: tp.Sequence[str] = None) -> int: 19 | parser = build_cli("view") 20 | args = parser.parse_args(cli) 21 | if len(args.input_files) == 0: 22 | print("Input files were not provided and could not be found!") 23 | return 1 24 | 25 | kwargs = {} 26 | if args.kwargs is not None: 27 | print(args.kwargs) 28 | params = [x.split("=") for x in args.kwargs.split(",")] 29 | kwargs = {y[0]: y[1] for y in params} 30 | 31 | fs = "\n\t- " + "\n\t- ".join([f.as_posix() for f in args.input_files]) 32 | print(f"Starting viewers for {len(args.input_files)} files: {fs}!") 33 | 34 | if args.napari: 35 | assert all( 36 | f.endswith(".mcd") for f in args.input_files 37 | ), "If using napari input must be MCD files!" 38 | import napari 39 | 40 | viewer = napari.Viewer() 41 | viewer.open(args.input_files) 42 | napari.run() 43 | return 0 44 | 45 | assert all( 46 | f.endswith((".tiff", ".tif")) for f in args.input_files 47 | ), "Input must be TIFF files!" 48 | 49 | # Prepare ROI objects 50 | rois = [ROI.from_stack(tiff) for tiff in args.input_files] 51 | 52 | # Generate viewer instances 53 | viewers = list() 54 | for roi in rois: 55 | view = InteractiveViewer( 56 | roi, 57 | up_key=args.up_key, 58 | down_key=args.down_key, 59 | log_key=args.log_key, 60 | **kwargs, 61 | ) 62 | viewers.append(view) 63 | 64 | print( 65 | f"Press '{args.up_key}' and '{args.down_key}' to scroll through image channels." 66 | + f" '{args.log_key}' to toggle logarithmic transformation." 67 | ) 68 | time.sleep(2) 69 | for view in viewers: 70 | view.fig.show() 71 | plt.show(block=True) 72 | 73 | print("Terminating!") 74 | return 0 75 | 76 | 77 | if __name__ == "__main__": 78 | try: 79 | sys.exit(main()) 80 | except KeyboardInterrupt: 81 | sys.exit(1) 82 | -------------------------------------------------------------------------------- /imc/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ElementoLab/imc/9725b3ab72f2273cb4a702964fa8518c2f189e9c/imc/tests/__init__.py -------------------------------------------------------------------------------- /imc/tests/_test_layers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | import tifffile 6 | import anndata 7 | import scanpy as sc 8 | import scipy.ndimage as ndi 9 | 10 | from imc import Project 11 | from imc.graphics import random_label_cmap 12 | 13 | layer_names = ["cell", "nuclei", "cytoplasm", "membrane", "extracellular"] 14 | 15 | prj = Project() 16 | 17 | roi = prj.rois[25] 18 | fig, axes = plt.subplots(1, 5, figsize=(5 * 4, 4), sharex=True, sharey=True) 19 | cmap = random_label_cmap() 20 | for i, layer in enumerate(layer_names): 21 | mask = getattr(roi, layer + "_mask") 22 | mask = np.ma.masked_array(mask, mask=mask == 0) 23 | axes[i].imshow(mask, cmap=cmap) 24 | axes[i].set(title=layer) 25 | axes[i].axis("off") 26 | 27 | 28 | prj.rois = prj.rois[25:27] 29 | quant = prj.quantify_cells(layers=layer_names, set_attribute=False) 30 | 31 | 32 | quant = quant.reset_index().melt(id_vars=["roi", "obj_id", "layer"], var_name="channel") 33 | quant = quant.pivot_table( 34 | index=["roi", "obj_id"], columns=["layer", "channel"], values="value" 35 | ) 36 | quant = quant.reset_index() 37 | 38 | X = quant.loc[:, layer_names[0]] 39 | obs = quant[["roi", "obj_id"]] 40 | obs["in_tissue"] = 1 41 | obs["array_row"] = ... 42 | obs["array_col"] = ... 43 | obs.columns = ["roi", "obj_id"] 44 | layers = quant.loc[:, layer_names[1:]] 45 | 46 | a = anndata.AnnData( 47 | X=X.reset_index(drop=True), 48 | obs=obs, 49 | layers={l: layers[l] for l in layer_names[1:]}, 50 | ) 51 | 52 | a = anndata.AnnData(X=quant.drop(["roi", "obj_id"], 1), obs=obs) 53 | 54 | for roi in prj.rois: 55 | a.uns["spatial"][roi.name] = { 56 | "images": {"hires": roi.stack}, 57 | "metadata": {}, 58 | "scalefactors": { 59 | "spot_diameter_fullres": 89.56665687930325, 60 | "tissue_hires_scalef": 0.150015, 61 | "fiducial_diameter_fullres": 144.6845995742591, 62 | "tissue_lowres_scalef": 0.045004502, 63 | }, 64 | } 65 | 66 | 67 | sc.pp.log1p(a) 68 | sc.pp.scale(a) 69 | sc.pp.pca(a) 70 | -------------------------------------------------------------------------------- /imc/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from imc.demo import generate_project 4 | 5 | 6 | # # To run manually: 7 | # import tempfile 8 | # tmp_path = tempfile.TemporaryDirectory().name 9 | 10 | 11 | @pytest.fixture 12 | def project(tmp_path): 13 | return generate_project(root_dir=tmp_path) 14 | 15 | 16 | @pytest.fixture 17 | def metadata(project): 18 | return project.sample_metadata 19 | 20 | 21 | @pytest.fixture 22 | def project_with_clusters(tmp_path): 23 | p = generate_project(root_dir=tmp_path) 24 | p.quantify_cells() 25 | c = ( 26 | p.quantification.set_index(["sample", "roi"], append=True) 27 | .rename_axis(["obj_id", "sample", "roi"]) 28 | .reorder_levels([1, 2, 0]) 29 | .assign(cluster=(p.quantification.index % 2))["cluster"] 30 | ) 31 | p.set_clusters(c, write_to_disk=True) 32 | return p 33 | -------------------------------------------------------------------------------- /imc/tests/test_full_analysis.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | class TestHighOrderFunctions: 5 | # @pytest.mark.slow 6 | @pytest.mark.xfail 7 | def test_cluster_cells(self, project): 8 | project.cluster_cells() 9 | 10 | @pytest.mark.slow 11 | def test_measure_adjacency(self, project_with_clusters): 12 | files = [ 13 | "cluster_adjacency_graph.frequencies.csv", 14 | "cluster_adjacency_graph.norm_over_random.clustermap.svg", 15 | "cluster_adjacency_graph.norm_over_random.csv", 16 | "cluster_adjacency_graph.norm_over_random.heatmap.svg", 17 | "cluster_adjacency_graph.random_frequencies.all_iterations_100.csv", 18 | "cluster_adjacency_graph.random_frequencies.csv", 19 | "neighbor_graph.gpickle", 20 | "neighbor_graph.svg", 21 | ] 22 | 23 | with project_with_clusters as prj: 24 | adj = prj.measure_adjacency() 25 | assert ( 26 | prj.results_dir / "single_cell" / "project.adjacency_frequencies.csv" 27 | ).exists() 28 | assert adj.shape == (36, 5) 29 | assert ~adj.isnull().any().any() 30 | 31 | for roi in prj.rois: 32 | prefix = roi.sample.root_dir / "single_cell" / roi.name + "." 33 | for file in files: 34 | assert (prefix + file).exists() 35 | -------------------------------------------------------------------------------- /imc/tests/test_graphics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | 5 | from matplotlib.image import AxesImage 6 | from matplotlib.legend import Legend 7 | 8 | 9 | class TestCellTypePlotting: 10 | def test_clusters_labeled_with_numbers(self, project_with_clusters): 11 | p = project_with_clusters 12 | 13 | # # make pattern: "int (1-based) - str" 14 | c = (p.clusters + 1).astype(str) + " - " + (p.clusters + 1).astype(str) 15 | p.set_clusters(c) 16 | 17 | # Plot both clusters 18 | roi = p.rois[0] 19 | fig1 = roi.plot_cell_types() 20 | 21 | # Remove first cluster 22 | c2 = roi.clusters.copy() 23 | for e in c2.index: 24 | c2[e] = roi.clusters.max() 25 | roi.set_clusters(c2) 26 | fig2 = roi.plot_cell_types() 27 | 28 | # Get arrays back from images 29 | a1 = [i for i in fig1.axes[0].get_children() if isinstance(i, AxesImage)] 30 | a1 = [a for a in a1 if len(a.get_array().shape) == 3][0].get_array() 31 | a2 = [i for i in fig2.axes[0].get_children() if isinstance(i, AxesImage)] 32 | a2 = [a for a in a2 if len(a.get_array().shape) == 3][0].get_array() 33 | 34 | # Get legend of second image 35 | l2 = [i for i in fig2.axes[0].get_children() if isinstance(i, Legend)][0] 36 | 37 | # Get color of legend patch (RGBA) 38 | lc = l2.get_patches()[0].get_facecolor()[:-1] 39 | # Get color from array (should be only one besides black) 40 | _t = a2.reshape((8 * 8, 3)) 41 | ac = _t[_t.sum(1) > 0][0] 42 | 43 | assert np.equal(ac, lc).all() 44 | -------------------------------------------------------------------------------- /imc/tests/test_obj_creation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | import pickle 5 | import tempfile 6 | 7 | import pytest 8 | 9 | from imc import Project, IMCSample, ROI 10 | from imc.demo import generate_project 11 | from imc.data_models.project import DEFAULT_PROJECT_NAME 12 | from imc.data_models.sample import DEFAULT_SAMPLE_NAME 13 | from imc.data_models.roi import DEFAULT_ROI_NAME 14 | 15 | 16 | class TestProjectInitialization: 17 | def test_empty_project(self): 18 | p = Project() 19 | assert p.name == DEFAULT_PROJECT_NAME 20 | assert isinstance(p.samples, list) 21 | assert isinstance(p.rois, list) 22 | assert not p.samples 23 | assert not p.rois 24 | 25 | def test_empty_sample(self): 26 | s = IMCSample() 27 | assert s.name == DEFAULT_SAMPLE_NAME 28 | assert isinstance(s.rois, list) 29 | assert not s.rois 30 | 31 | def test_empty_roi(self): 32 | r = ROI() 33 | assert r.name == DEFAULT_ROI_NAME 34 | 35 | def test_creation_without_rois(self, tmp_path): 36 | p = generate_project(root_dir=tmp_path) 37 | p2 = Project(p.metadata[["sample_name"]].drop_duplicates(), processed_dir=p.processed_dir) 38 | assert len(p2.samples) == 3 39 | assert len(p2.rois) == 9 40 | -------------------------------------------------------------------------------- /imc/tests/test_serialization.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | from typing import Any 3 | 4 | import parmap 5 | import pandas as pd 6 | 7 | from imc import Project, IMCSample, ROI 8 | from imc.ops.quant import _quantify_cell_intensity__roi 9 | from imc.types import Path 10 | 11 | 12 | def roundtrip(obj: Any, _dir: Path) -> Any: 13 | pickle.dump(obj, open(_dir / "file.pkl", "wb")) 14 | return pickle.load(open(_dir / "file.pkl", "rb")) 15 | 16 | 17 | class TestSimpleSerialization: 18 | def test_empty_project(self, tmp_path): 19 | p = Project(name="test_empty_project") 20 | q = roundtrip(p, tmp_path) 21 | assert q.name == "test_empty_project" 22 | # assert p is q 23 | 24 | def test_empty_sample(self, tmp_path): 25 | s = IMCSample(sample_name="test_empty_sample", root_dir=".") 26 | r = roundtrip(s, tmp_path) 27 | assert r.name == "test_empty_sample" 28 | # assert s is r 29 | 30 | def test_empty_roi(self, tmp_path): 31 | r = ROI(name="test_empty_roi", roi_number=1) 32 | s = roundtrip(r, tmp_path) 33 | assert s.name == "test_empty_roi" 34 | # assert r is s 35 | 36 | 37 | def func(roi: ROI) -> int: 38 | return len(roi.shape) 39 | 40 | 41 | class TestParmapSerialization: 42 | def test_simple_parmap(self, project): 43 | 44 | res = parmap.map(func, project.rois) 45 | assert all(x == 3 for x in res) 46 | 47 | def test_quant_parmap_lowlevel(self, project): 48 | 49 | _res = parmap.map(_quantify_cell_intensity__roi, project.rois) 50 | res = pd.concat(_res) 51 | assert not res.empty 52 | assert all( 53 | res.columns == project.rois[0].channel_labels.tolist() + ["roi", "sample"] 54 | ) 55 | 56 | def test_quant_parmap_highlevel(self, project): 57 | res = project.quantify_cell_intensity() 58 | assert not res.empty 59 | assert all( 60 | res.columns == project.rois[0].channel_labels.tolist() + ["roi", "sample"] 61 | ) 62 | -------------------------------------------------------------------------------- /imc/types.py: -------------------------------------------------------------------------------- 1 | """ 2 | Specific types or type aliases used in the library. 3 | """ 4 | 5 | from __future__ import annotations 6 | import os 7 | import typing as tp 8 | import pathlib 9 | import argparse 10 | 11 | import matplotlib 12 | import pandas 13 | import numpy 14 | from anndata import AnnData as _AnnData 15 | 16 | 17 | __all__ = [ 18 | "Path", 19 | "GenericType", 20 | "Args", 21 | "Array", 22 | "MultiIndexSeries", 23 | "Series", 24 | "DataFrame", 25 | "AnnData", 26 | "Figure", 27 | "Axis", 28 | "Patch", 29 | "ColorMap", 30 | ] 31 | 32 | 33 | class Path(pathlib.Path): 34 | """ 35 | A pathlib.Path child class that allows concatenation with strings 36 | by overloading the addition operator. 37 | 38 | In addition, it implements the ``startswith`` and ``endswith`` methods 39 | just like in the base :obj:`str` type. 40 | 41 | The ``replace_`` implementation is meant to be an implementation closer 42 | to the :obj:`str` type. 43 | 44 | Iterating over a directory with ``iterdir`` that does not exists 45 | will return an empty iterator instead of throwing an error. 46 | 47 | Creating a directory with ``mkdir`` allows existing directory and 48 | creates parents by default. 49 | """ 50 | 51 | _flavour = ( 52 | pathlib._windows_flavour # type: ignore[attr-defined] # pylint: disable=W0212 53 | if os.name == "nt" 54 | else pathlib._posix_flavour # type: ignore[attr-defined] # pylint: disable=W0212 55 | ) 56 | 57 | def __add__(self, string: str) -> Path: 58 | return Path(str(self) + string) 59 | 60 | def startswith(self, string: str) -> bool: 61 | return str(self).startswith(string) 62 | 63 | def endswith(self, string: str) -> bool: 64 | return str(self).endswith(string) 65 | 66 | def replace_(self, patt: str, repl: str) -> Path: 67 | return Path(str(self).replace(patt, repl)) 68 | 69 | def iterdir(self) -> tp.Generator: 70 | if self.exists(): 71 | yield from [Path(x) for x in pathlib.Path(str(self)).iterdir()] 72 | yield from [] 73 | 74 | def unlink(self, missing_ok: bool = True) -> Path: 75 | super().unlink(missing_ok=missing_ok) 76 | return self 77 | 78 | def mkdir(self, mode=0o777, parents: bool = True, exist_ok: bool = True) -> Path: 79 | super().mkdir(mode=mode, parents=parents, exist_ok=exist_ok) 80 | return self 81 | 82 | def glob(self, pattern: str) -> tp.Generator: 83 | # to support ** with symlinks: https://bugs.python.org/issue33428 84 | from glob import glob 85 | 86 | if "**" in pattern: 87 | sep = "/" if self.is_dir() else "" 88 | yield from map( 89 | Path, 90 | glob(self.as_posix() + sep + pattern, recursive=True), 91 | ) 92 | else: 93 | yield from super().glob(pattern) 94 | 95 | 96 | GenericType = tp.TypeVar("GenericType") 97 | 98 | # type aliasing (done with Union to distinguish from other declared variables) 99 | 100 | 101 | # # Args = Union[argparse.Namespace] 102 | # class Args(argparse.Namespace, tp.Mapping[str, tp.Any]): 103 | # pass 104 | 105 | 106 | # # Series = Union[pandas.Series] 107 | # class Series(pandas.Series, tp.Mapping[tp.Any, tp.Any]): 108 | # pass 109 | 110 | 111 | Args = tp.Union[argparse.Namespace, tp.Mapping[str, tp.Any]] 112 | 113 | Array = numpy.ndarray 114 | 115 | MultiIndexSeries = pandas.Series 116 | Series = pandas.Series 117 | DataFrame = pandas.DataFrame 118 | AnnData = _AnnData 119 | 120 | Figure = matplotlib.figure.Figure 121 | Axis = matplotlib.axis.Axis 122 | Patch = matplotlib.patches.Patch 123 | ColorMap = matplotlib.colors.LinearSegmentedColormap 124 | -------------------------------------------------------------------------------- /noxfile.py: -------------------------------------------------------------------------------- 1 | import nox 2 | 3 | python_versions = [ 4 | "3.8", 5 | "3.9", 6 | "3.10", 7 | ] 8 | 9 | # TODO: annotate these with explanation 10 | ignore_rules = [ 11 | "E501", 12 | "F401", 13 | "F841", 14 | "W503", 15 | "E402", 16 | "E203", 17 | "E266", 18 | "E722", # bare except 19 | ] 20 | 21 | exclude_directories = [ 22 | "tests", 23 | ] 24 | 25 | 26 | @nox.session(python=python_versions) 27 | def lint(session): 28 | session.install("flake8") 29 | session.run( 30 | "flake8", 31 | "--ignore", 32 | ",".join(ignore_rules), 33 | "--exclude", 34 | ",".join(exclude_directories), 35 | "imc/", 36 | ) 37 | 38 | 39 | @nox.session(python=python_versions) 40 | def test(session): 41 | session.install(".[dev]") 42 | session.run("pytest") 43 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # PIP, using PEP621 2 | [project] 3 | name = "imc" 4 | # version = "0.0.19.dev24+g43d6c06" 5 | description = "A framework for IMC data analysis." 6 | authors = [ 7 | {name = "Andre Rendeiro", email = "afrendeiro@gmail.com"}, 8 | ] 9 | # python = "^3.8" 10 | readme = "README.md" 11 | keywords = [ 12 | "computational biology", 13 | "bioinformatics", 14 | "imaging mass cytometry", 15 | "imaging", 16 | "mass cytometry", 17 | "mass spectrometry", 18 | ] 19 | classifiers = [ 20 | "Programming Language :: Python :: 3 :: Only", 21 | "Programming Language :: Python :: 3.8", 22 | "Programming Language :: Python :: 3.9", 23 | "Programming Language :: Python :: 3.10", 24 | "Development Status :: 3 - Alpha", 25 | "Typing :: Typed", 26 | "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", 27 | "Topic :: Scientific/Engineering :: Bio-Informatics", 28 | ] 29 | requires-python = ">=3.8" 30 | dependencies = [ 31 | "setuptools_scm", 32 | "outdated", 33 | "ordered-set", 34 | "urlpath", 35 | "PyYAML", 36 | "imctools>=2.1.0", 37 | "joblib", 38 | "leidenalg", 39 | "python-louvain", 40 | "networkx>=3.0.0", 41 | "pandas>=1.0.1", 42 | "matplotlib>=3.5.0", 43 | "scikit-image==0.19.0", 44 | "seaborn", 45 | "fastcluster", 46 | "parmap", 47 | "scanpy", 48 | "bbknn", 49 | "numpy_groupies", 50 | "tifffile==2022.4.8", 51 | "seaborn-extensions" 52 | ] 53 | dynamic = ['version'] 54 | 55 | [project.optional-dependencies] 56 | # not yet supported by pip! 57 | extra = [ 58 | "stardist>=0.7.1,<1.0.0", 59 | "DeepCell>=0.8.3,<1.0.0", 60 | "cellpose>=0.6.5,<1.0.0", 61 | "astir>=0.1.4,<1.0.0", 62 | ] 63 | stardist = [ 64 | "stardist>=0.7.1,<1.0.0", 65 | ] 66 | deepcell = [ 67 | "DeepCell>=0.8.3,<1.0.0", 68 | ] 69 | cellpose = [ 70 | "cellpose>=0.6.5,<1.0.0", 71 | ] 72 | astir = [ 73 | "astir>=0.1.4,<1.0.0", 74 | ] 75 | dev = [ 76 | "wheel", 77 | "ipython", 78 | "black[d]", 79 | "mypy>=0.900", # pin to version supporting pyproject.toml 80 | "pandas-stubs", 81 | "pylint", 82 | "flake8", 83 | "git-lint", 84 | "pydocstyle", 85 | "rich", 86 | # data-science-types 87 | "PyQt5", 88 | ] 89 | test = [ 90 | "pytest>=6", 91 | "pytest-cov", 92 | ] 93 | doc = [ 94 | "Sphinx", 95 | "sphinx-issues", 96 | "sphinx-rtd-theme", 97 | "sphinx-argparse", 98 | ] 99 | 100 | [project.urls] 101 | homepage = "https://github.com/ElementoLab/imc" 102 | repository = "https://github.com/ElementoLab/imc" 103 | documentation = "https://github.com/ElementoLab/imc" 104 | changelog = "https://github.com/ElementoLab/imc/blob/master/docs/source/changelog.md" 105 | 106 | [project.scripts] 107 | imc = "imc.cli:main" 108 | 109 | [build-system] 110 | build-backend = "setuptools.build_meta" 111 | requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.0"] 112 | 113 | # build-backend = "poetry.masonry.api" 114 | # requires = ["poetry>=1.2.0b1", "setuptools>=45", "wheel", "poetry-dynamic-versioning-plugin"] 115 | 116 | # build-backend = "flit_core.buildapi" 117 | # requires = ["flit_core >=3.2,<4"] 118 | 119 | [tool.setuptools_scm] 120 | write_to = "imc/_version.py" 121 | write_to_template = 'version = __version__ = "{version}"' 122 | 123 | # Poetry 124 | [tool.poetry-dynamic-versioning] 125 | enable = true 126 | vcs = "git" 127 | style = "semver" 128 | 129 | [tool.poetry] 130 | name = "imc" 131 | version = "0.0.0" # waiting on next release of poetry to use dynamic-versioning extension 132 | description = "A package for the analysis of imaging mass cytometry (IMC) data" 133 | authors = ["Andre Rendeiro "] 134 | homepage = "https://github.com/ElementoLab/imc" 135 | repository = "https://github.com/ElementoLab/imc" 136 | documentation = "https://github.com/ElementoLab/imc" 137 | 138 | [tool.poetry.dependencies] 139 | python = "^3.8" 140 | ordered-set = "^4.0.2" 141 | PyYAML = "^5.4.1" 142 | pandas = ">=1.0.1" 143 | tifffile = ">=2022.5.4" 144 | imctools = "^2.1.0" 145 | scikit-image = "^0.20.0" 146 | imagecodecs = "^2020.5.30" 147 | colorama = "^0.4.3" 148 | h5py = "^2.10.0" 149 | anndata = "^0.7.3" 150 | scanpy = "^1.5.1" 151 | leidenalg = "^0.8.1" 152 | python-louvain = "^0.14" 153 | networkx = "^3.0" 154 | parmap = "^1.5.2" 155 | joblib = "^0.15.1" 156 | 157 | [tool.poetry.dev-dependencies] 158 | ipython = "^7.16.1" 159 | pylint = "^2.5.3" 160 | git-lint = "^0.1.2" 161 | black = {extras = ["d"], version = "^19.10b0"} 162 | mypy = "^0.900" 163 | pytest = "^5.4.3" 164 | Sphinx = "^3.1.1" 165 | sphinx-issues = "^1.2.0" 166 | sphinx-rtd-theme = "^0.5.0" 167 | 168 | [tool.poetry.extras] 169 | stardist = [ 170 | "stardist", 171 | ] 172 | deepcell = [ 173 | "DeepCell", 174 | ] 175 | cellpose = [ 176 | "cellpose", 177 | ] 178 | astir = [ 179 | "astir", 180 | ] 181 | 182 | [tool.black] 183 | line-length = 90 184 | target-version = ['py39'] 185 | include = '\.pyi?$' 186 | exclude = ''' 187 | 188 | ( 189 | /( 190 | \.eggs # exclude a few common directories in the 191 | | \.git # root of the project 192 | | \.hg 193 | | \.mypy_cache 194 | | \.tox 195 | | \.venv 196 | | _build 197 | | buck-out 198 | | build 199 | | dist 200 | )/ 201 | | foo.py # also separately exclude a file named foo.py in 202 | # the root of the project 203 | ) 204 | ''' 205 | 206 | [tool.mypy] 207 | python_version = '3.9' 208 | warn_return_any = true 209 | warn_unused_configs = true 210 | 211 | # Packages without type annotations in shed yet 212 | [[tool.mypy.overrides]] 213 | module = [ 214 | 'numpy.*', 215 | 'pandas.*', 216 | 'scipy.*', 217 | 'skimage.*', 218 | 'matplotlib.*', 219 | 'seaborn.*', 220 | 'parmap.*', 221 | 'anndata.*', 222 | 'scanpy.*', 223 | 'pymde.*', 224 | 'umap.*', 225 | 'networkx.*', 226 | 'pingouin.*', 227 | 'tqdm.*', 228 | ] 229 | ignore_missing_imports = true 230 | 231 | [tool.pytest.ini_options] 232 | minversion = "6.0" 233 | addopts = "-ra -q --strict-markers" 234 | testpaths = [ 235 | "imc/tests" 236 | ] 237 | markers = [ 238 | 'slow', # 'marks tests as slow (deselect with "-m 'not slow'")', 239 | 'serial' 240 | ] 241 | 242 | 243 | [tool.tox] 244 | legacy_tox_ini = """ 245 | [tox] 246 | envlist = py39 247 | 248 | [testenv] 249 | deps = pytest >= 6, <7 250 | commands = pytest 251 | """ 252 | -------------------------------------------------------------------------------- /requirements/requirements.cellpose.txt: -------------------------------------------------------------------------------- 1 | cellpose>=0.1.0.1,<1.0.0 2 | -------------------------------------------------------------------------------- /requirements/requirements.deepcell.txt: -------------------------------------------------------------------------------- 1 | DeepCell>=0.8.3,<1.0.0 2 | -------------------------------------------------------------------------------- /requirements/requirements.dev.txt: -------------------------------------------------------------------------------- 1 | wheel 2 | ipython 3 | black[d] 4 | mypy>=0.900 5 | pandas-stubs 6 | pylint 7 | flake8 8 | git-lint 9 | pydocstyle 10 | rich 11 | pytest>=6 12 | pytest-cov 13 | -------------------------------------------------------------------------------- /requirements/requirements.doc.txt: -------------------------------------------------------------------------------- 1 | # data-science-types 2 | Sphinx 3 | sphinx-issues 4 | sphinx-rtd-theme 5 | sphinx-argparse 6 | myst_parser 7 | sphinx-autodoc-typehints 8 | -------------------------------------------------------------------------------- /requirements/requirements.stardist.txt: -------------------------------------------------------------------------------- 1 | stardist==0.6.0,<1.0.0 2 | -------------------------------------------------------------------------------- /requirements/requirements.txt: -------------------------------------------------------------------------------- 1 | setuptools_scm 2 | outdated 3 | ordered-set 4 | PyYAML 5 | parmap 6 | tqdm 7 | joblib 8 | numpy 9 | numpy_groupies 10 | scipy>=1.7 11 | pandas>=1.0.1 12 | matplotlib>=3.5 13 | imctools>=2.1.0 14 | tifffile==2022.4.8 15 | scikit-image==0.17.2 16 | leidenalg 17 | python-louvain 18 | networkx 19 | scanpy 20 | bbknn 21 | seaborn-extensions 22 | harmonypy 23 | --------------------------------------------------------------------------------