├── .gitignore ├── .vs ├── ProjectSettings.json ├── STQ │ └── v16 │ │ └── .suo └── slnx.sqlite ├── ChangeLog.md ├── LICENSE ├── README.md ├── assets ├── container-mamba-inception.def ├── container-singularity-bafextract.def ├── container-singularity-deepfocus.def ├── container-singularity-fastqtools.def ├── container-singularity-hovernet-py.def ├── container-singularity-inception-py.def ├── container-singularity-ome.def ├── container-singularity-python.def ├── container-singularity-spaceranger.def ├── container-singularity-stainnet.def ├── container-singularity-staintools.def ├── container-singularity-uni-conch.def ├── container-singularity-velocyto.def ├── container-singularity-vips.def ├── def-mamba-timm.def ├── def-mamba-xenomake.def ├── run_build.sb ├── run_build.sh ├── sample_roi.json ├── samplesheet_demo.csv ├── samplesheet_focus_test.csv ├── samplesheet_st_pancreas_all.csv ├── samplesheet_test.csv ├── samplesheet_test_sj.csv ├── samplesheet_test_sj_short.csv └── samplesheet_two.csv ├── bin ├── StainNetNorm.py ├── StainToolsNorm.py ├── __init__.py ├── extractROI.py ├── mtx_tools.py ├── run-conch.py ├── run-ctranspath.py ├── run-inception-v3-tiles.py ├── run-inception-v3.py ├── run-uni.py └── superpixelation.py ├── check.sh ├── conf ├── README.md ├── analysis-img.config ├── analysis-one.config ├── analysis-pancreas.config ├── analysis-two.config └── containers.config ├── docs ├── BAF_extract_scheme.png ├── Example_CPU_usage.png ├── STQ-imaging.svg ├── Scheme NF2.png ├── Scheme_NF3.png ├── dag-arb.svg ├── dag-one.svg ├── dag-two.svg ├── example ST wsi.png ├── example non-ST wsi.png ├── flow-static.png ├── flow.gif ├── hovernet-tissue-mask.png ├── imaging-clustering.png ├── mones-per-tile.png ├── multiscale-features.png ├── route-map.png └── sub-tiling.png ├── lib ├── __init__.py ├── hovernetConv.py ├── superpixels.py ├── wsiGrid.py └── wsiMask.py ├── main.nf ├── modules └── local │ ├── bafextract.nf │ ├── deconvolution.nf │ ├── focus.nf │ ├── gunzip.nf │ ├── hovernet.nf │ ├── load.nf │ ├── merge.nf │ ├── ome.nf │ ├── postprocessing.nf │ ├── spaceranger.nf │ ├── superpixel.nf │ ├── tasks.nf │ └── velocyto.nf ├── nextflow.config ├── run.sh ├── submit.sb ├── subworkflows ├── imaging.nf ├── sequencing.nf ├── sequencing_single.nf └── xenome_index.nf ├── utils └── AOI.ipynb └── workflows ├── README.md ├── arbitrary_grid.nf ├── one_reference.nf └── two_references.nf /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | slurm-*.out 10 | 11 | .vs/ 12 | 13 | dev/ 14 | 15 | results*/ 16 | .nextflow.log* 17 | .nextflow/ 18 | 19 | # Distribution / packaging 20 | .Python 21 | build/ 22 | develop-eggs/ 23 | dist/ 24 | downloads/ 25 | eggs/ 26 | .eggs/ 27 | # lib/ 28 | lib64/ 29 | parts/ 30 | sdist/ 31 | var/ 32 | work*/ 33 | wheels/ 34 | pip-wheel-metadata/ 35 | share/python-wheels/ 36 | *.egg-info/ 37 | .installed.cfg 38 | *.egg 39 | MANIFEST 40 | 41 | # PyInstaller 42 | # Usually these files are written by a python script from a template 43 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 44 | *.manifest 45 | *.spec 46 | 47 | # Installer logs 48 | pip-log.txt 49 | pip-delete-this-directory.txt 50 | 51 | # Unit test / coverage reports 52 | htmlcov/ 53 | .tox/ 54 | .nox/ 55 | .coverage 56 | .coverage.* 57 | .cache 58 | nosetests.xml 59 | coverage.xml 60 | *.cover 61 | *.py,cover 62 | .hypothesis/ 63 | .pytest_cache/ 64 | 65 | # Translations 66 | *.mo 67 | *.pot 68 | 69 | # Django stuff: 70 | *.log 71 | local_settings.py 72 | db.sqlite3 73 | db.sqlite3-journal 74 | 75 | # Flask stuff: 76 | instance/ 77 | .webassets-cache 78 | 79 | # Scrapy stuff: 80 | .scrapy 81 | 82 | # Sphinx documentation 83 | docs/_build/ 84 | 85 | # PyBuilder 86 | target/ 87 | 88 | # Jupyter Notebook 89 | .ipynb_checkpoints 90 | 91 | # IPython 92 | profile_default/ 93 | ipython_config.py 94 | 95 | # pyenv 96 | .python-version 97 | 98 | # pipenv 99 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 100 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 101 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 102 | # install all needed dependencies. 103 | #Pipfile.lock 104 | 105 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 106 | __pypackages__/ 107 | 108 | # Celery stuff 109 | celerybeat-schedule 110 | celerybeat.pid 111 | 112 | # SageMath parsed files 113 | *.sage.py 114 | 115 | # Environments 116 | .env 117 | .venv 118 | env/ 119 | venv/ 120 | ENV/ 121 | env.bak/ 122 | venv.bak/ 123 | 124 | # Spyder project settings 125 | .spyderproject 126 | .spyproject 127 | 128 | # Rope project settings 129 | .ropeproject 130 | 131 | # mkdocs documentation 132 | /site 133 | 134 | # mypy 135 | .mypy_cache/ 136 | .dmypy.json 137 | dmypy.json 138 | 139 | # Pyre type checker 140 | .pyre/ 141 | -------------------------------------------------------------------------------- /.vs/ProjectSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "CurrentProjectSetting": null 3 | } -------------------------------------------------------------------------------- /.vs/STQ/v16/.suo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/.vs/STQ/v16/.suo -------------------------------------------------------------------------------- /.vs/slnx.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/.vs/slnx.sqlite -------------------------------------------------------------------------------- /ChangeLog.md: -------------------------------------------------------------------------------- 1 | 2 | v0.3.0 3 | + Updated imaging workflow output directory structure. 4 | + Added export of pipeline parameters, metadata items, and additional image outputs. 5 | + Added multiscale feature extraction inspired by SAMPLER work (PMID: 37577691). 6 | + Added experimental option of sub-tiling for use with feature extraction. 7 | + Added slide focus checking. 8 | + Added CTransPath, MoCoV3, UNI, CONCH as an extractors of imaging features. 9 | + Added subworkflow for sampling of tiles. 10 | + Optimized HoVer-Net segmentation steps. Added option for GPU-based segmentation. 11 | + Added postprocessing step to visualize outputs. 12 | + Added the AOI (automatic object identification) util for preparing ROI JSON for STQ. 13 | 14 | v0.2.0 15 | + This version was referenced in the publication (PMID: 38626768). 16 | + Refactored and optimized codebase. 17 | + Added Xengsort read classification option. 18 | + Added documentation details to improve user experience. 19 | 20 | v0.1.0 21 | + Initial release 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 The Jackson Laboratory 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /assets/container-mamba-inception.def: -------------------------------------------------------------------------------- 1 | bootstrap: docker 2 | from: condaforge/mambaforge@sha256:1461e0a1fa14431128dc95d921655fd6cd0b9147b4ec757c6d99e02776e82b47 3 | #from: condaforge/mambaforge:23.3.1-1 4 | 5 | %environment 6 | export DEBIAN_FRONTEND=noninteractive 7 | 8 | %post 9 | echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections 10 | 11 | apt-get update 12 | apt-get install -y dialog apt-utils 13 | apt-get install -y gcc g++ 14 | apt-get install -y openslide-tools 15 | apt-get install -y procps 16 | 17 | /opt/conda/bin/mamba install -y -c conda-forge -c fastai -c anaconda \ 18 | pandas scikit-learn scipy matplotlib scikit-image \ 19 | jupyterlab tifffile imagecodecs stardist \ 20 | openslide-python opencv-python-headless pillow h5py \ 21 | "tensorflow==2.11.0" 22 | 23 | /opt/conda/bin/pip install pysnic 24 | -------------------------------------------------------------------------------- /assets/container-singularity-bafextract.def: -------------------------------------------------------------------------------- 1 | bootstrap: shub 2 | from: jaxreg.jax.org/rit-ci/samtools:1.5 3 | 4 | %post 5 | apk add --update-cache 6 | apk add build-base 7 | apk add g++ 8 | apk add git 9 | apk add bash 10 | apk add procps 11 | 12 | git clone https://github.com/akdess/BAFExtract.git 13 | 14 | cd BAFExtract 15 | 16 | make 17 | 18 | 19 | -------------------------------------------------------------------------------- /assets/container-singularity-deepfocus.def: -------------------------------------------------------------------------------- 1 | bootstrap: docker 2 | from: centos/python-36-centos7:latest 3 | 4 | %labels 5 | Author Sergii Domanskyi 6 | 7 | %environment 8 | export PYTHONPATH="${PYTHONPATH}:/usr/local/lib64/python3.6/site-packages/:/usr/local/lib/python3.6/site-packages:/usr/lib/python3.6/site-packages" 9 | 10 | %post 11 | set -eu 12 | 13 | yum -y update && \ 14 | yum -y install \ 15 | redhat-lsb-core \ 16 | epel-release \ 17 | 18 | lsb_release -a 19 | 20 | yum-config-manager --enable epel 21 | 22 | yum -y install \ 23 | bzip2 \ 24 | openslide \ 25 | python3-devel.x86_64 \ 26 | wget \ 27 | bash \ 28 | inkscape \ 29 | librsvg2 \ 30 | gcc \ 31 | libssl-dev \ 32 | openssl \ 33 | procps 34 | 35 | python3 -m pip install --upgrade pip==21.3.1 36 | python3 -m pip install grpcio==1.48.2 tflearn==0.3.2 tensorflow==1.13.1 tqdm==4.28.1 numpy==1.15.3 packaging==16.8 pandas==0.23.4 wheel==0.29.0 matplotlib==3.3.4 scikit-image==0.17.2 37 | python3 -m pip install openslide-python 38 | 39 | python3 -m pip list 40 | 41 | git clone https://github.com/sdomanskyi/deepfocus.git 42 | 43 | 44 | %runscript 45 | exec python3 "$@" 46 | 47 | %help 48 | The container is built on CentOS 7.9.2009 49 | Python 3.6.8 50 | -------------------------------------------------------------------------------- /assets/container-singularity-fastqtools.def: -------------------------------------------------------------------------------- 1 | bootstrap: docker 2 | from: continuumio/miniconda3:4.12.0 3 | 4 | %post 5 | apt-get update 6 | apt-get install -y bash 7 | apt-get install -y procps 8 | 9 | /opt/conda/bin/conda install --quiet -y -c bioconda fastq-tools 10 | -------------------------------------------------------------------------------- /assets/container-singularity-hovernet-py.def: -------------------------------------------------------------------------------- 1 | bootstrap: shub 2 | from: jaxreg.jax.org/singlecell/python:3.8 3 | 4 | %post 5 | apt-get update 6 | apt-get install -y gcc 7 | apt-get install -y git 8 | apt-get install -y g++ 9 | apt-get install -y openslide-tools 10 | apt-get install -y python-openslide 11 | apt-get install -y libsm6 libxext6 12 | apt-get install -y libxrender-dev 13 | apt-get install -y procps 14 | 15 | /opt/conda/bin/conda install --quiet -y python=3.6.12 pip=20.3.1 16 | /opt/conda/bin/conda install --quiet -y -c conda-forge pandas tifftools 17 | /opt/conda/bin/pip install gdown openslide-python==1.1.2 docopt==0.6.2 future==0.18.2 imgaug==0.4.0 matplotlib==3.3.0 numpy==1.19.1 opencv-python==4.3.0.36 pandas==1.1.0 pillow==7.2.0 psutil==5.7.3 scikit-image==0.17.2 scikit-learn==0.23.1 scipy==1.5.2 tensorboard==2.3.0 tensorboardx==2.1 termcolor==1.1.0 tqdm==4.48.0 torch==1.6.0 torchvision==0.7.0 18 | 19 | /opt/conda/bin/gdown 1SbSArI3KOOWHxRlxnjchO7_MbWzB4lNR 20 | 21 | git clone https://github.com/sdomanskyi/hover_net.git 22 | -------------------------------------------------------------------------------- /assets/container-singularity-inception-py.def: -------------------------------------------------------------------------------- 1 | bootstrap: docker 2 | from: condaforge/mambaforge@sha256:1461e0a1fa14431128dc95d921655fd6cd0b9147b4ec757c6d99e02776e82b47 3 | #from: condaforge/mambaforge:23.3.1-1 4 | 5 | %environment 6 | export DEBIAN_FRONTEND=noninteractive 7 | 8 | %post 9 | echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections 10 | 11 | apt-get update 12 | apt-get install -y dialog apt-utils 13 | apt-get install -y gcc g++ 14 | apt-get install -y openslide-tools 15 | apt-get install -y procps 16 | 17 | /opt/conda/bin/mamba install -y -c conda-forge -c fastai -c anaconda \ 18 | pandas scikit-learn scipy matplotlib scikit-image \ 19 | jupyterlab tifffile imagecodecs stardist \ 20 | openslide-python opencv-python-headless pillow h5py \ 21 | "tensorflow==2.11.0" 22 | 23 | /opt/conda/bin/pip install pysnic 24 | -------------------------------------------------------------------------------- /assets/container-singularity-ome.def: -------------------------------------------------------------------------------- 1 | bootstrap: docker 2 | from: alpine:3.18.2 3 | 4 | %environment 5 | export PATH="/bftools:$PATH" 6 | 7 | %post 8 | set -eu 9 | 10 | apk update && \ 11 | apk add unzip \ 12 | openjdk17 \ 13 | bash \ 14 | procps 15 | 16 | wget https://downloads.openmicroscopy.org/bio-formats/7.2.0/artifacts/bftools.zip 17 | unzip bftools.zip 18 | rm bftools.zip 19 | -------------------------------------------------------------------------------- /assets/container-singularity-python.def: -------------------------------------------------------------------------------- 1 | bootstrap: docker 2 | from: condaforge/mambaforge:23.3.1-1 3 | 4 | %environment 5 | export DEBIAN_FRONTEND=noninteractive 6 | 7 | %post 8 | echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections 9 | 10 | apt-get update 11 | apt-get install -y dialog apt-utils 12 | apt-get install -y procps 13 | 14 | /opt/conda/bin/mamba install --quiet -y -c conda-forge pandas numpy scipy scanpy leidenalg 15 | -------------------------------------------------------------------------------- /assets/container-singularity-spaceranger.def: -------------------------------------------------------------------------------- 1 | bootstrap: docker 2 | from: debian:buster-slim 3 | 4 | %environment 5 | SPACERANGER_HOME="/opt/spaceranger-1.3.1" 6 | SR_EXEC="${SPACERANGER_HOME}/bin" 7 | SR_PY="${SPACERANGER_HOME}/external/anaconda/bin" 8 | 9 | export PATH="${SPACERANGER_HOME}:${SR_EXEC}:${SR_PY}:$PATH" 10 | 11 | %post 12 | set -eu 13 | 14 | apt-get update && \ 15 | apt-get -y upgrade 16 | apt-get -y install wget \ 17 | ca-certificates \ 18 | locales \ 19 | bash \ 20 | procps 21 | rm -rf /var/lib/apt/lists/* 22 | 23 | # locale fix 24 | echo "LC_ALL=en_US.UTF-8" >> /etc/environment 25 | echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen 26 | echo "LANG=en_US.UTF-8" > /etc/locale.conf 27 | locale-gen en_US.UTF-8 28 | 29 | mkdir -p /opt/ && cd /opt/ 30 | 31 | wget --no-check-certificate -nv -O spaceranger-1.3.1.tar.gz "https://singlecell-software.s3-far.jax.org/spaceranger-1.3.1.tar.gz" 32 | 33 | tar -zxf spaceranger-1.3.1.tar.gz && rm spaceranger-1.3.1.tar.gz* 34 | rm -r spaceranger-1.3.1/external/spaceranger_tiny_* 35 | 36 | %runscript 37 | exec spaceranger "$@" 38 | -------------------------------------------------------------------------------- /assets/container-singularity-stainnet.def: -------------------------------------------------------------------------------- 1 | bootstrap: shub 2 | from: jaxreg.jax.org/singlecell/python:3.8 3 | 4 | %post 5 | apt-get update 6 | apt-get install -y procps 7 | 8 | /opt/conda/bin/conda install --quiet -y pip=20.3.1 9 | /opt/conda/bin/pip install tifffile imageio numpy pillow tqdm torch 10 | 11 | wget https://github.com/khtao/StainNet/blob/master/checkpoints/aligned_histopathology_dataset/StainNet-Public_layer3_ch32.pth 12 | wget https://github.com/khtao/StainNet/blob/master/checkpoints/aligned_cytopathology_dataset/StainNet-3x0_best_psnr_layer3_ch32.pth 13 | wget https://github.com/khtao/StainNet/blob/master/checkpoints/camelyon16_dataset/StainNet-Public-centerUni_layer3_ch32.pth 14 | -------------------------------------------------------------------------------- /assets/container-singularity-staintools.def: -------------------------------------------------------------------------------- 1 | bootstrap: shub 2 | from: jaxreg.jax.org/singlecell/python:3.8 3 | 4 | %post 5 | apt-get update 6 | apt-get install -y gcc 7 | apt-get install -y g++ 8 | apt-get install -y openslide-tools 9 | apt-get install -y python-openslide 10 | apt-get install -y libgl1 11 | apt-get install -y procps 12 | 13 | /opt/conda/bin/conda install --quiet -y -c conda-forge pandas python-spams 14 | /opt/conda/bin/conda install --quiet -y -c numba numba==0.56.4 15 | /opt/conda/bin/conda install --quiet -y pip 16 | /opt/conda/bin/pip install tifffile imagecodecs openslide-python opencv-python-headless staintools 17 | -------------------------------------------------------------------------------- /assets/container-singularity-uni-conch.def: -------------------------------------------------------------------------------- 1 | bootstrap: docker 2 | from: condaforge/mambaforge:23.3.1-1 3 | 4 | %environment 5 | export DEBIAN_FRONTEND=noninteractive 6 | 7 | %post 8 | echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections 9 | 10 | apt-get update 11 | apt-get install -y dialog apt-utils 12 | apt-get install -y openslide-tools 13 | apt-get install -y git 14 | apt-get install -y procps 15 | 16 | /opt/conda/bin/mamba install python=3.10 17 | /opt/conda/bin/mamba install -y -c anaconda pip numpy pandas pyarrow scipy 18 | /opt/conda/bin/mamba install -y -c conda-forge pillow openslide-python 19 | 20 | # Clone the repository at SHA of August 2024 21 | git clone https://github.com/mahmoodlab/CONCH.git 22 | cd CONCH 23 | git reset --hard 02d6ac59cc20874bff0f581de258c2b257f69a84 24 | 25 | /opt/conda/bin/pip install --upgrade pip 26 | /opt/conda/bin/pip install -e . 27 | -------------------------------------------------------------------------------- /assets/container-singularity-velocyto.def: -------------------------------------------------------------------------------- 1 | bootstrap: docker 2 | from: continuumio/miniconda3:4.12.0 3 | 4 | %post 5 | apt-get update 6 | apt-get install -y gcc 7 | apt-get install -y g++ 8 | apt-get install -y bash 9 | apt-get install -y procps 10 | 11 | /opt/conda/bin/conda install --quiet -y pip 12 | 13 | /opt/conda/bin/pip install numpy scipy numba matplotlib scikit-learn h5py loompy pysam Click pandas Cython 14 | /opt/conda/bin/pip install velocyto -------------------------------------------------------------------------------- /assets/container-singularity-vips.def: -------------------------------------------------------------------------------- 1 | bootstrap: docker 2 | from: codechimpio/vips-alpine 3 | 4 | %post 5 | apt-get update 6 | apt-get install -y bash 7 | apt-get install -y procps 8 | -------------------------------------------------------------------------------- /assets/def-mamba-timm.def: -------------------------------------------------------------------------------- 1 | bootstrap: docker 2 | from: condaforge/mambaforge:23.3.1-1 3 | 4 | %environment 5 | export DEBIAN_FRONTEND=noninteractive 6 | 7 | %post 8 | echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections 9 | 10 | apt-get update 11 | apt-get install -y dialog apt-utils 12 | apt-get install -y openslide-tools 13 | apt-get install -y git 14 | apt-get install -y procps 15 | 16 | # Clone the repository at SHA of December 2023 17 | git clone https://github.com/Xiyue-Wang/TransPath 18 | cd TransPath 19 | git reset --hard 74673ef15656a6f01e53dde5c06e6964022e3789 20 | 21 | /opt/conda/bin/mamba install -y -c anaconda pip pandas pyarrow scipy 22 | /opt/conda/bin/mamba install -y -c conda-forge pillow openslide-python 23 | 24 | /opt/conda/bin/pip install gdown ml-collections 25 | 26 | # Modified timm package 27 | gdown 1JV7aj9rKqGedXY1TdDfi3dP07022hcgZ 28 | /opt/conda/bin/pip install timm-0.5.4.tar 29 | 30 | # 1. CTransPath: ctranspath.pth - 108MB 31 | gdown 1DoDx_70_TLj98gTf6YTXnu4tFhsFocDX 32 | 33 | # 2. MoCo V3 model: vit_small.pth.tar - 680M 34 | gdown 13d_SHy9t9JCwp_MsU2oOUZ5AvI6tsC-K 35 | 36 | # 3. TransPath model: checkpoint.pth - 840MB 37 | gdown 1dhysqcv_Ct_A96qOF8i6COTK3jLb56vx 38 | -------------------------------------------------------------------------------- /assets/def-mamba-xenomake.def: -------------------------------------------------------------------------------- 1 | bootstrap: docker 2 | from: condaforge/mambaforge:23.3.1-1 3 | 4 | %environment 5 | export DEBIAN_FRONTEND=noninteractive 6 | 7 | %post 8 | echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections 9 | 10 | apt-get update 11 | apt-get install -y dialog apt-utils 12 | apt-get install -y git 13 | apt-get install -y procps 14 | 15 | /opt/conda/bin/mamba install -y -c bioconda numpy>=1.24 numba>=0.57 seqtk 16 | /opt/conda/bin/mamba install -y -c conda-forge jsonargparse pytest 17 | 18 | git clone https://github.com/Biivy/Xenomake 19 | cd Xenomake 20 | chmod -R +x scripts/ 21 | # Use the repository version at SHA of November 2023 22 | git reset --hard 363b8f5d51daae52ef12a2bd8bb9a12a1aacb4f4 23 | /opt/conda/bin/mamba env update -n base --file environment.yaml 24 | 25 | git clone https://gitlab.com/genomeinformatics/xengsort.git 26 | cd xengsort 27 | # Use the repository version at SHA of December 2023 28 | git reset --hard 62ea5c6419af8ad366212b617133dc1c6c1a8e28 29 | /opt/conda/bin/pip install -e . 30 | xengsort index --help 31 | which xengsort 32 | -------------------------------------------------------------------------------- /assets/run_build.sb: -------------------------------------------------------------------------------- 1 | #!/bin/bash --login 2 | #SBATCH -t 02:00:00 3 | #SBATCH -N 1 4 | #SBATCH -n 1 5 | #SBATCH --mem=12GB 6 | #SBATCH -q batch 7 | #SBATCH -p compute 8 | 9 | #singularity run http://s3-far.jax.org/builder/builder $2 $1 10 | singularity build --fakeroot $1 $2 11 | 12 | scontrol show job $SLURM_JOB_ID -------------------------------------------------------------------------------- /assets/run_build.sh: -------------------------------------------------------------------------------- 1 | sbatch run_build.sb /projects/chuang-lab/USERS/domans/containers/container-singularity-python.sif container-singularity-python.def -------------------------------------------------------------------------------- /assets/sample_roi.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": { 3 | "location": 0.33, 4 | "size": 0.33 5 | }, 6 | "1": { 7 | "location": 0.33, 8 | "size": 0.33 9 | } 10 | } -------------------------------------------------------------------------------- /assets/samplesheet_demo.csv: -------------------------------------------------------------------------------- 1 | sample,fastq,image,grid,roifile,mpp 2 | Demo_S1,/projects/rubinstein-lab/USERS/domans/melanoma_PDX/dev/demo_dataset/fastq/,/projects/rubinstein-lab/USERS/domans/melanoma_PDX/dev/demo_dataset/SC2200092.tiff,,,0.22075 3 | -------------------------------------------------------------------------------- /assets/samplesheet_focus_test.csv: -------------------------------------------------------------------------------- 1 | sample,fastq,image,grid,roifile,mpp 2 | WM4237_TE_S1_ST,,/projects/chuang-lab/USERS/domans/melanoma_PDX_ST/additionalHandE/tiff/WM4237_3/WM4237_229_T1_4_21_22_cut_level_0_oid_0.tiff,,,0.2513 3 | -------------------------------------------------------------------------------- /assets/samplesheet_st_pancreas_all.csv: -------------------------------------------------------------------------------- 1 | sample,fastq,image,grid,roifile,mpp 2 | JDC_WP_001_s_ST,,/sdata/activities/kappsen-tmc/visium/SC2300284R_JDC-WP-001-s/img/SC2300284R.tiff,/sdata/activities/kappsen-tmc/visium/SC2300284R_JDC-WP-001-s/spaceranger/spatial/,,0.2208187960959237 3 | JDC_WP_001_x_ST,,/sdata/activities/kappsen-tmc/visium/SC2300285R_JDC-WP-001-x/img/SC2300285R.tiff,/sdata/activities/kappsen-tmc/visium/SC2300285R_JDC-WP-001-x/spaceranger/spatial/,,0.2208187960959237 4 | JDC_WP_001_l_ST,,/sdata/activities/kappsen-tmc/visium/SC2300286R_JDC-WP-001-l/img/SC2300286R.tiff,/sdata/activities/kappsen-tmc/visium/SC2300286R_JDC-WP-001-l/spaceranger/spatial/,,0.2208187960959237 5 | JDC_WP_001_c_ST,,/sdata/activities/kappsen-tmc/visium/SC2300287R_JDC-WP-001-c/img/SC2300287R.tiff,/sdata/activities/kappsen-tmc/visium/SC2300287R_JDC-WP-001-c/spaceranger/spatial/,,0.2208187960959237 6 | JDC_WP_002_c_ST,,/sdata/activities/kappsen-tmc/visium/SC2300294R_JDC-WP-002-c/img/SC2300294R.tiff,/sdata/activities/kappsen-tmc/visium/SC2300294R_JDC-WP-002-c/spaceranger/spatial/,,0.2208187960959237 7 | JDC_WP_002_j_ST,,/sdata/activities/kappsen-tmc/visium/SC2300295R_JDC-WP-002-j/img/SC2300295R.tiff,/sdata/activities/kappsen-tmc/visium/SC2300295R_JDC-WP-002-j/spaceranger/spatial/,,0.2208187960959237 8 | JDC_WP_002_r_ST,,/sdata/activities/kappsen-tmc/visium/SC2300296R_JDC-WP-002-r/img/SC2300296R.tiff,/sdata/activities/kappsen-tmc/visium/SC2300296R_JDC-WP-002-r/spaceranger/spatial/,,0.2208187960959237 9 | JDC_WP_002_v_ST,,/sdata/activities/kappsen-tmc/visium/SC2300297R_JDC-WP-002-v/img/SC2300297R.tiff,/sdata/activities/kappsen-tmc/visium/SC2300297R_JDC-WP-002-v/spaceranger/spatial/,,0.2208187960959237 10 | JDC_WP_004_n_ST,,/sdata/activities/kappsen-tmc/visium/SC2300423_JDC-WP-004-n/img/SC2300423.tiff,/sdata/activities/kappsen-tmc/visium/SC2300423_JDC-WP-004-n/spaceranger/spatial/,,0.2208187960959237 11 | JDC_WP_004_c_ST,,/sdata/activities/kappsen-tmc/visium/SC2300424_JDC-WP-004-c/img/SC2300424.tiff,/sdata/activities/kappsen-tmc/visium/SC2300424_JDC-WP-004-c/spaceranger/spatial/,,0.2208187960959237 12 | JDC_WP_005_n_ST,,/sdata/activities/kappsen-tmc/visium/SC2300425_JDC-WP-005-n/img/SC2300425.tiff,/sdata/activities/kappsen-tmc/visium/SC2300425_JDC-WP-005-n/spaceranger/spatial/,,0.2208187960959237 13 | JDC_WP_005_r_ST,,/sdata/activities/kappsen-tmc/visium/SC2300426_JDC-WP-005-r/img/SC2300426.tiff,/sdata/activities/kappsen-tmc/visium/SC2300426_JDC-WP-005-r/spaceranger/spatial/,,0.2208187960959237 14 | JDC_WP_005_j_ST,,/sdata/activities/kappsen-tmc/visium/SC2300427_JDC-WP-005-j/img/SC2300427.tiff,/sdata/activities/kappsen-tmc/visium/SC2300427_JDC-WP-005-j/spaceranger/spatial/,,0.2208187960959237 15 | JDC_WP_005_c_ST,,/sdata/activities/kappsen-tmc/visium/SC2300455_JDC-WP-005-c/img/SC2300455.tiff,/sdata/activities/kappsen-tmc/visium/SC2300455_JDC-WP-005-c/spaceranger/spatial/,,0.2208187960959237 16 | JDC_WP_007_o_ST,,/sdata/activities/kappsen-tmc/visium/SC2300462_JDC-WP-007-o/img/SC2300462.tiff,/sdata/activities/kappsen-tmc/visium/SC2300462_JDC-WP-007-o/spaceranger/spatial/,,0.2208187960959237 17 | JDC_WP_007_s_ST,,/sdata/activities/kappsen-tmc/visium/SC2300463_JDC-WP-007-s/img/SC2300463.tiff,/sdata/activities/kappsen-tmc/visium/SC2300463_JDC-WP-007-s/spaceranger/spatial/,,0.2208187960959237 18 | JDC_WP_007_j_ST,,/sdata/activities/kappsen-tmc/visium/SC2300464_JDC-WP-007-j/img/SC2300464.tiff,/sdata/activities/kappsen-tmc/visium/SC2300464_JDC-WP-007-j/spaceranger/spatial/,,0.2208187960959237 19 | JDC_WP_009_n_ST,,/sdata/activities/kappsen-tmc/visium/SC2300465_JDC-WP-009-n/img/SC2300465.tiff,/sdata/activities/kappsen-tmc/visium/SC2300465_JDC-WP-009-n/spaceranger/spatial/,,0.2208187960959237 20 | JDC_WP_009_r_ST,,/sdata/activities/kappsen-tmc/visium/SC2300466_JDC-WP-009-r/img/SC2300466.tiff,/sdata/activities/kappsen-tmc/visium/SC2300466_JDC-WP-009-r/spaceranger/spatial/,,0.2208187960959237 21 | JDC_WP_009_c_ST,,/sdata/activities/kappsen-tmc/visium/SC2300467_JDC-WP-009-c/img/SC2300467.tiff,/sdata/activities/kappsen-tmc/visium/SC2300467_JDC-WP-009-c/spaceranger/spatial/,,0.2208187960959237 22 | JDC_WP_0010_w_ST,,/sdata/activities/kappsen-tmc/visium/SC2300511_JDC-WP-0010-w/img/SC2300511.tiff,/sdata/activities/kappsen-tmc/visium/SC2300511_JDC-WP-0010-w/spaceranger/spatial/,,0.2208187960959237 23 | JDC_WP_0010_c_ST,,/sdata/activities/kappsen-tmc/visium/SC2300514_JDC-WP-0010-c/img/SC2300514.tiff,/sdata/activities/kappsen-tmc/visium/SC2300514_JDC-WP-0010-c/spaceranger/spatial/,,0.2208187960959237 24 | JDC_WP_012_w_ST,,/sdata/activities/kappsen-tmc/visium/SC2300515_JDC-WP-012-w/img/SC2300515.tiff,/sdata/activities/kappsen-tmc/visium/SC2300515_JDC-WP-012-w/spaceranger/spatial/,,0.2208187960959237 25 | JDC_WP_012_ae_ST,,/sdata/activities/kappsen-tmc/visium/SC2300516_JDC-WP-012-ae/img/SC2300516.tiff,/sdata/activities/kappsen-tmc/visium/SC2300516_JDC-WP-012-ae/spaceranger/spatial/,,0.2208187960959237 26 | JDC_WP_012_n_ST,,/sdata/activities/kappsen-tmc/visium/SC2300517_JDC-WP-012-n/img/SC2300517.tiff,/sdata/activities/kappsen-tmc/visium/SC2300517_JDC-WP-012-n/spaceranger/spatial/,,0.2208187960959237 27 | JDC_WP_012_c_ST,,/sdata/activities/kappsen-tmc/visium/SC2300518_JDC-WP-012-c/img/SC2300518.tiff,/sdata/activities/kappsen-tmc/visium/SC2300518_JDC-WP-012-c/spaceranger/spatial/,,0.2208187960959237 28 | JDC_WP_004_y_ST,,/sdata/activities/kappsen-tmc/visium/SC2300519_JDC-WP-004-y/img/SC2300519.tiff,/sdata/activities/kappsen-tmc/visium/SC2300519_JDC-WP-004-y/spaceranger/spatial/,,0.2208187960959237 29 | JDC_WP_004_ah_ST,,/sdata/activities/kappsen-tmc/visium/SC2300520_JDC-WP-004-ah/img/SC2300520.tiff,/sdata/activities/kappsen-tmc/visium/SC2300520_JDC-WP-004-ah/spaceranger/spatial/,,0.2208187960959237 30 | JDC_WP_008_r_ST,,/sdata/activities/kappsen-tmc/visium/SC2300627_JDC-WP-008-r/img/SC2300627.tiff,/sdata/activities/kappsen-tmc/visium/SC2300627_JDC-WP-008-r/spaceranger/spatial/,,0.2208187960959237 31 | JDC_WP_008_v_ST,,/sdata/activities/kappsen-tmc/visium/SC2300628_JDC-WP-008-v/img/SC2300628.tiff,/sdata/activities/kappsen-tmc/visium/SC2300628_JDC-WP-008-v/spaceranger/spatial/,,0.2208187960959237 32 | JDC_WP_008_j_ST,,/sdata/activities/kappsen-tmc/visium/SC2300629_JDC-WP-008-j/img/SC2300629.tiff,/sdata/activities/kappsen-tmc/visium/SC2300629_JDC-WP-008-j/spaceranger/spatial/,,0.2208187960959237 33 | JDC_WP_011_w_ST,,/sdata/activities/kappsen-tmc/visium/SC2300631_JDC-WP-011-w/img/SC2300631.tiff,/sdata/activities/kappsen-tmc/visium/SC2300631_JDC-WP-011-w/spaceranger/spatial/,,0.2208187960959237 34 | JDC_WP_011_ac_ST,,/sdata/activities/kappsen-tmc/visium/SC2300632_JDC-WP-011-ac/img/SC2300632.tiff,/sdata/activities/kappsen-tmc/visium/SC2300632_JDC-WP-011-ac/spaceranger/spatial/,,0.2208187960959237 35 | JDC_WP_011_n_ST,,/sdata/activities/kappsen-tmc/visium/SC2300633_JDC-WP-011-n/img/SC2300633.tiff,/sdata/activities/kappsen-tmc/visium/SC2300633_JDC-WP-011-n/spaceranger/spatial/,,0.2208187960959237 36 | JDC_WP_011_b_ST,,/sdata/activities/kappsen-tmc/visium/SC2300634_JDC-WP-011-b/img/SC2300634.tiff,/sdata/activities/kappsen-tmc/visium/SC2300634_JDC-WP-011-b/spaceranger/spatial/,,0.2208187960959237 37 | JDC_WP_008_b_ST,,/sdata/activities/kappsen-tmc/visium/SC2300701_JDC-WP-008-b/img/SC2300701.tiff,/sdata/activities/kappsen-tmc/visium/SC2300701_JDC-WP-008-b/spaceranger/spatial/,,0.2208187960959237 38 | JDC_WP_007_c_ST,,/sdata/activities/kappsen-tmc/visium/SC2300460_JDC-WP-007-c/img/SC2300460.ome.tiff,/sdata/activities/kappsen-tmc/visium/SC2300460_JDC-WP-007-c/spaceranger/spatial/,,0.1469393661384487 39 | JDC_WP_009_j_ST,,/sdata/activities/kappsen-tmc/visium/SC2300461_JDC-WP-009-j/img/SC2300461.ome.tiff,/sdata/activities/kappsen-tmc/visium/SC2300461_JDC-WP-009-j/spaceranger/spatial/,,0.1469393661384487 40 | JDC_WP_010_ac_ST,,/sdata/activities/kappsen-tmc/visium/SC2300512_JDC-WP-010-ac/img/SC2300512.ome.tiff,/sdata/activities/kappsen-tmc/visium/SC2300512_JDC-WP-010-ac/spaceranger/spatial/,,0.1469393661384487 41 | JDC_WP_010_p_ST,,/sdata/activities/kappsen-tmc/visium/SC2300513_JDC-WP-010-p/img/SC2300513.ome.tiff,/sdata/activities/kappsen-tmc/visium/SC2300513_JDC-WP-010-p/spaceranger/spatial/,,0.1469393661384487 -------------------------------------------------------------------------------- /assets/samplesheet_test.csv: -------------------------------------------------------------------------------- 1 | sample,fastq,image,grid,roifile,mpp 2 | TCGA-AA-A00Z-01A-01-BS1.0,,/projects/chuang-lab/TCGA-COAD/WSI/TCGA-AA-A00Z-01A-01-BS1.47febbeb-d8d0-45fe-b934-30d8992a1737.svs,,/projects/rubinstein-lab/USERS/domans/COAD/tcga_coad_svs_thumbs/TCGA-AA-A00Z-01A-01-BS1.47febbeb-d8d0-45fe-b934-30d8992a1737.oid0.json,0.2485 3 | -------------------------------------------------------------------------------- /assets/samplesheet_test_sj.csv: -------------------------------------------------------------------------------- 1 | sample,fastq,image,grid,roifile,mpp 2 | SJRHB030549_D3-16812.oid1,,/projects/rubinstein-lab/pediSarcoma/stjude/SJRHB030549_D3-16812.svs,,/projects/rubinstein-lab/USERS/domans/pediSarcoma-stjude/thumbs/SJRHB030549_D3-16812.oid1.json,0.25159999999999999 3 | TCGA-AA-A00Z-01A-01-BS1.0,,/projects/chuang-lab/TCGA-COAD/WSI/TCGA-AA-A00Z-01A-01-BS1.47febbeb-d8d0-45fe-b934-30d8992a1737.svs,,/projects/rubinstein-lab/USERS/domans/COAD/tcga_coad_svs_thumbs/TCGA-AA-A00Z-01A-01-BS1.47febbeb-d8d0-45fe-b934-30d8992a1737.oid0.json,0.2485 4 | -------------------------------------------------------------------------------- /assets/samplesheet_test_sj_short.csv: -------------------------------------------------------------------------------- 1 | sample,fastq,image,grid,roifile,mpp 2 | SJRHB030549_D3-16812.oid1,,/projects/rubinstein-lab/USERS/domans/pediSarcoma-stjude/STQ-dev/results-56-112/SJRHB030549_D3-16812.oid1/image.ome.tiff,,,0.25 3 | SJRHB012405_X1-16322.oid0,,/projects/rubinstein-lab/USERS/domans/pediSarcoma-stjude/STQ-dev/results-56-112/SJRHB012405_X1-16322.oid0/image.ome.tiff,,,0.25 4 | -------------------------------------------------------------------------------- /assets/samplesheet_two.csv: -------------------------------------------------------------------------------- 1 | sample,fastq,image,grid,roifile,mpp 2 | WM4237_T1_S1,/projects/chuang-lab/rubinstein/ST_melanomaPDX/data/SC2200324_WM4237-T1-Day14-229/fastq,/projects/chuang-lab/rubinstein/ST_melanomaPDX/data/SC2200324_WM4237-T1-Day14-229/img/SC2200324.tiff,,,0.22075 3 | -------------------------------------------------------------------------------- /bin/StainNetNorm.py: -------------------------------------------------------------------------------- 1 | # Prepared by Domanskyi 2 | # from https://github.com/khtao/StainNet repository 3 | # Here I use their pretrained net to run image stain normalization 4 | 5 | import os 6 | import argparse 7 | import imageio 8 | import tifffile 9 | import numpy as np 10 | import torch 11 | import torch.nn as nn 12 | from tqdm import tqdm 13 | from PIL import Image 14 | from torch.utils.data import dataset, DataLoader 15 | from glob import glob 16 | 17 | import PIL.Image 18 | PIL.Image.MAX_IMAGE_PIXELS = None 19 | 20 | class StainNet(nn.Module): 21 | def __init__(self, input_nc=3, output_nc=3, n_layer=3, n_channel=32, kernel_size=1): 22 | super(StainNet, self).__init__() 23 | model_list = [] 24 | model_list.append(nn.Conv2d(input_nc, n_channel, kernel_size=kernel_size, bias=True, padding=kernel_size // 2)) 25 | model_list.append(nn.ReLU(True)) 26 | for n in range(n_layer - 2): 27 | model_list.append( 28 | nn.Conv2d(n_channel, n_channel, kernel_size=kernel_size, bias=True, padding=kernel_size // 2)) 29 | model_list.append(nn.ReLU(True)) 30 | model_list.append(nn.Conv2d(n_channel, output_nc, kernel_size=kernel_size, bias=True, padding=kernel_size // 2)) 31 | 32 | self.rgb_trans = nn.Sequential(*model_list) 33 | 34 | def forward(self, x): 35 | return self.rgb_trans(x) 36 | 37 | def list_file_tree(path, file_type="tif"): 38 | if file_type.find("*") < 0: 39 | file_type = "*" + file_type 40 | image_list = glob(os.path.join(path, "*" + file_type), recursive=True) 41 | return image_list 42 | 43 | class SingleImage(dataset.Dataset): 44 | def __init__(self, data_path, transform=None, augment=None): 45 | self.data_path = data_path 46 | self.transform = transform 47 | self.augment = augment 48 | self.image_list = list_file_tree(os.path.join(data_path), "png") 49 | self.image_list += list_file_tree(os.path.join(data_path), "jpg") 50 | self.image_list += list_file_tree(os.path.join(data_path), "tif") 51 | self.image_list += list_file_tree(os.path.join(data_path), "tiff") 52 | self.image_list.sort() 53 | 54 | def __len__(self): 55 | return len(self.image_list) 56 | 57 | def __getitem__(self, item): 58 | img = Image.open(self.image_list[item]) 59 | img = (np.array(img, dtype=np.float32) / 255.0).transpose((2, 0, 1)) 60 | return img 61 | 62 | def process_images(opt, model, s = 4096): 63 | dataset = SingleImage(opt.source_dir) 64 | dataloader = DataLoader(dataset, batch_size=1, num_workers=1, drop_last=False) 65 | file_list = dataset.image_list 66 | num = 0 67 | for imgs in dataloader: 68 | print(imgs.shape) 69 | imgs_corrected = imgs.numpy() 70 | 71 | dims = imgs.shape[2], imgs.shape[3] 72 | r = [np.append(s*np.array(range(0, int(np.floor(dims[i]/s))+1)), [dims[i]]) for i in range(2)] 73 | coords = [] 74 | for i in range(len(r[0])-1): 75 | for j in range(len(r[1])-1): 76 | coords.append((i,j)) 77 | 78 | for i, j in tqdm(coords): 79 | imgs_temp = imgs[:, :, r[0][i]:r[0][i+1], r[1][j]:r[1][j+1]] 80 | print(imgs_temp.shape) 81 | if (imgs_temp.shape[2]!=0) and (imgs_temp.shape[3]!=0): 82 | with torch.no_grad(): 83 | imgs_temp = imgs_temp.cpu() 84 | imgs_temp = (imgs_temp - 0.5) * 2 85 | outputs = (model(imgs_temp) * 0.5 + 0.5).clamp(0, 1).detach().cpu().numpy() 86 | 87 | imgs_corrected[:, :, r[0][i]:r[0][i+1], r[1][j]:r[1][j+1]] = outputs 88 | 89 | for out in imgs_corrected: 90 | file_path = file_list[num] 91 | file_path = os.path.join(os.path.join(opt.save_dir), os.path.split(file_path)[1]) 92 | os.makedirs(os.path.split(file_path)[0], exist_ok=True) 93 | print('\n', file_path) 94 | ext = os.path.splitext(file_path)[1] 95 | tifffile.imwrite(file_path[:-len(ext)] + ".tiff", np.array(np.array(Image.fromarray((out * 255).astype(np.uint8).transpose((1, 2, 0))))), bigtiff=True) # v2 96 | #Image.fromarray((out * 255).astype(np.uint8).transpose((1, 2, 0))).save(file_path[:-len(ext)] + ".tiff", compression='raw') # v1 97 | #imageio.imwrite(file_path[:-len(ext)] + ".tiff", (out * 255).astype(np.uint8).transpose((1, 2, 0))) # v0 98 | num += 1 99 | 100 | return 101 | 102 | def run_normalization(opt): 103 | model = StainNet() 104 | model = model.cpu() 105 | checkpoint = torch.load(opt.model_path, map_location=torch.device('cpu')) 106 | model.load_state_dict(checkpoint) 107 | model.eval() 108 | process_images(opt, model) 109 | return 110 | 111 | if __name__ == '__main__': 112 | 113 | # python norm.py --source_dir "input_images/" --save_dir "output_images/" --model_path "StainNet-Public_layer3_ch32.pth" 114 | 115 | parser = argparse.ArgumentParser() 116 | parser.add_argument("--source_dir", type=str, required=True, help="path to source images") 117 | parser.add_argument("--save_dir", type=str, required=True, help="path to save images") 118 | parser.add_argument('--model_path', type=str, required=True, help='models path to load') 119 | args = parser.parse_args() 120 | 121 | run_normalization(args) 122 | -------------------------------------------------------------------------------- /bin/StainToolsNorm.py: -------------------------------------------------------------------------------- 1 | # Prepared by Domanskyi 2 | # The normalization is done by patches 3 | # Some stitching lines may be visible 4 | 5 | import os 6 | import argparse 7 | import tifffile 8 | import staintools 9 | import numpy as np 10 | from tqdm import tqdm 11 | from PIL import Image 12 | 13 | import PIL.Image 14 | PIL.Image.MAX_IMAGE_PIXELS = None 15 | 16 | import spams 17 | 18 | def get_concentrations(I, stain_matrix, regularizer=0.01): 19 | OD = convert_RGB_to_OD(I).reshape((-1, 3)) 20 | return spams.lasso(X=OD.T, D=stain_matrix.T, mode=2, lambda1=regularizer, pos=True).toarray().T 21 | 22 | def convert_RGB_to_OD(I): 23 | mask = (I == 0) 24 | I[mask] = 1 25 | return np.maximum(-1 * np.log(I / 255), 1e-6) 26 | 27 | def convert_OD_to_RGB(OD): 28 | assert OD.min() >= 0, "Negative optical density." 29 | OD = np.maximum(OD, 1e-6) 30 | return (255 * np.exp(-1 * OD)).astype(np.uint8) 31 | 32 | class StainNormalizer(staintools.StainNormalizer): 33 | 34 | def __init__(self, method): 35 | super().__init__(method) 36 | 37 | def fit(self, target): 38 | self.stain_matrix_target = self.extractor.get_stain_matrix(target) 39 | target_concentrations = get_concentrations(target, self.stain_matrix_target) 40 | self.maxC_target = np.percentile(target_concentrations, 99, axis=0).reshape((1, 2)) 41 | return 42 | 43 | def estimate(self, I): 44 | stain_matrix_source = self.extractor.get_stain_matrix(I) 45 | print(stain_matrix_source.dtype) 46 | source_concentrations = get_concentrations(I, stain_matrix_source) 47 | maxC_source = np.percentile(source_concentrations, 99, axis=0).reshape((1, 2)) 48 | return stain_matrix_source, maxC_source 49 | 50 | def transform(self, I, stain_matrix_source, maxC_source): 51 | source_concentrations = get_concentrations(I, np.array(stain_matrix_source)) 52 | source_concentrations *= self.maxC_target / maxC_source 53 | tmp = 255 * np.exp(-1 * np.dot(source_concentrations, self.stain_matrix_target)) 54 | return tmp.reshape(I.shape).astype(np.uint8) 55 | 56 | if __name__ == '__main__': 57 | 58 | parser = argparse.ArgumentParser() 59 | parser.add_argument("--referenceImagePath", type=str, required=True, help="path to reference or target image") 60 | parser.add_argument("--inputImagePath", type=str, required=True, help="input image name") 61 | parser.add_argument("--outputImageName", type=str, required=True, help="output image name") 62 | parser.add_argument('--s', type=int, default=4096, help='patch size') 63 | parser.add_argument('--low', type=int, default=100, help='low threshold') 64 | parser.add_argument('--high', type=int, default=200, help='high threshold') 65 | parser.add_argument('--qfraction', type=float, default=0.75, help='quantile of fraction for tissue') 66 | args = parser.parse_args() 67 | 68 | print('s:', args.s) 69 | 70 | target = tifffile.imread(args.referenceImagePath) 71 | if target.shape[0]<=4: 72 | target = np.moveaxis(target, 0, 2) 73 | target = target[:,:,:3] 74 | 75 | max_color = 255 76 | quantile = 0.95 77 | v = max_color - int(np.quantile(target.ravel(), quantile)) 78 | print('Color max shift:', v) 79 | target[(target.astype(int) + v) > max_color] = max_color 80 | target[(target.astype(int) + v) <= max_color] += v 81 | target = target.astype(np.uint8) 82 | target = np.asfortranarray(target) 83 | print(target.shape) 84 | normalizer = StainNormalizer(method='macenko') 85 | normalizer.fit(target) 86 | 87 | img = tifffile.imread(args.inputImagePath) 88 | if img.shape[0]<=4: 89 | img = np.moveaxis(img, 0, 2) 90 | img = img[:,:,:3] 91 | max_color = 255 92 | quantile = 0.95 93 | v = max_color - int(np.quantile(img.ravel(), quantile)) 94 | print('Color max shift:', v) 95 | for i in tqdm(range(img.shape[0])): 96 | wh = np.where((img[i, :, :].astype(int) + v) > max_color) 97 | img[i, wh[0], wh[1]] = max_color 98 | wh = np.where((img[i, :, :].astype(int) + v) <= max_color) 99 | img[i, wh[0], wh[1]] += v 100 | img = np.asfortranarray(img) 101 | 102 | dims = img.shape[0], img.shape[1] 103 | r = [np.append(args.s*np.array(range(0, int(np.floor(dims[i]/args.s))+1)), [dims[i]]) for i in range(2)] 104 | coords = [(i,j) for i in range(len(r[0])-1) for j in range(len(r[1])-1)] 105 | print(coords) 106 | 107 | # Determine representative patch 108 | coordsf = [] 109 | fractions = [] 110 | for i, j in tqdm(coords): 111 | try: 112 | # Get in_tissue flags for patch 113 | v = img[r[0][i]:r[0][i+1], r[1][j]:r[1][j+1], :].mean(axis=2) 114 | vc = v.copy() 115 | v[vc < args.low] = 0 116 | v[vc > args.high] = 0 117 | v[(vc >= args.low) & (vc <= args.high)] = 1 118 | f = v.ravel().mean() 119 | print(i, j, f) 120 | if f==f: 121 | coordsf.append((i, j, f)) 122 | fractions.append(f) 123 | except Exception as exception: 124 | print('Exception:', exception) 125 | 126 | def get_ms_cs(fcutoff): 127 | print('fcutoff:', fcutoff) 128 | ms = [] 129 | cs = [] 130 | for i, j, f in tqdm(coordsf): 131 | # Get in_tissue flags for patch 132 | in_tissue = f >= fcutoff 133 | print(i, j, in_tissue, f) 134 | if in_tissue: 135 | try: 136 | m, c = normalizer.estimate(img[r[0][i]:r[0][i+1], r[1][j]:r[1][j+1], :]) 137 | ms.append(m) 138 | cs.append(c) 139 | except Exception as exception: 140 | print('Exception:', exception) 141 | return ms, cs 142 | 143 | #fcutoff = np.quantile(fractions, args.qfraction) 144 | ms, cs = get_ms_cs(0.5) 145 | 146 | if len(ms) == 0: 147 | ms, cs = get_ms_cs(0.25) 148 | 149 | if len(ms) == 0: 150 | ms, cs = get_ms_cs(0.125) 151 | 152 | ms = np.dstack(ms) 153 | msm = np.median(ms, axis=2) 154 | cs = np.vstack(cs) 155 | csm = np.median(cs, axis=0) 156 | print(ms.shape, cs.shape) 157 | closest = np.argsort(np.sqrt((np.array([(ms - msm[:, :, None])[:, :, i].ravel() for i in range(ms.shape[2])])**2).sum(axis=1)))[0] 158 | m, c = ms[:, :, closest], cs[closest, :] 159 | print(m) 160 | print(c) 161 | 162 | # Normalize all patches 163 | for i, j in tqdm(coords): 164 | try: 165 | img[r[0][i]:r[0][i+1], r[1][j]:r[1][j+1], :] = normalizer.transform(img[r[0][i]:r[0][i+1], r[1][j]:r[1][j+1], :], m, c) 166 | except Exception as exception: 167 | print('Exception:', exception) 168 | 169 | tifffile.imwrite(args.outputImageName, img, bigtiff=True) 170 | 171 | exit(0) 172 | -------------------------------------------------------------------------------- /bin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/bin/__init__.py -------------------------------------------------------------------------------- /bin/extractROI.py: -------------------------------------------------------------------------------- 1 | import os 2 | import openslide 3 | import json 4 | import tifffile 5 | from tifffile import TiffFile 6 | import numpy as np 7 | import argparse 8 | 9 | import PIL.Image 10 | PIL.Image.MAX_IMAGE_PIXELS = None 11 | 12 | if __name__ == '__main__': 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument("--fileslide", type=str, required=True, help="") 16 | parser.add_argument("--roifile", type=str, required=True, help="") 17 | parser.add_argument('--wholeside', default=False, action=argparse.BooleanOptionalAction, help="") 18 | parser.add_argument('--sizefile', type=str, default="size.txt", help="") 19 | parser.add_argument('--outfile', type=str, default="outfile.tiff", help="") 20 | parser.add_argument('--extract', type=str, default="False", help="") 21 | args = parser.parse_args() 22 | 23 | fileslide = args.fileslide.replace("\\", "") 24 | 25 | try: 26 | slide = openslide.open_slide(fileslide) 27 | dims0 = slide.dimensions 28 | except Exception as exception: 29 | print(exception) 30 | # If the slide is too large openslide may fail to read 31 | with TiffFile(fileslide) as imgh: 32 | dims0 = imgh.pages[0].tags[256].value, imgh.pages[0].tags[257].value 33 | print(dims0) 34 | 35 | with open(args.roifile, 'r') as tempfile: 36 | info = json.load(tempfile) 37 | 38 | icoords = int(dims0[0] * info['0']['location']), int(dims0[1] * info['1']['location']) 39 | size = int(dims0[0] * info['0']['size']), int(dims0[1] * info['1']['size']) 40 | print(dims0, '\t', icoords, '\t', size) 41 | 42 | if args.wholeside: 43 | sizegp = round(dims0[0] * dims0[1] / 10**6) 44 | else: 45 | sizegp = round(size[0] * size[1] / 10**6) 46 | 47 | with open(args.sizefile, 'w') as tempfile: 48 | tempfile.write(str(sizegp)) 49 | 50 | if args.extract=="True": 51 | print('Extracting ROI image') 52 | try: 53 | img = slide.read_region(location=icoords, level=0, size=size).convert('RGB') 54 | tifffile.imwrite(args.outfile, np.array(img), bigtiff=True) 55 | img.close() 56 | except Exception as exception: 57 | print(exception) 58 | # If the slide is too large openslide may fail to read 59 | img = tifffile.imread(fileslide)[icoords[1]:icoords[1]+size[1],icoords[0]:icoords[0]+size[0],:] 60 | tifffile.imwrite(args.outfile, img, bigtiff=True) 61 | del img 62 | 63 | exit(0) 64 | -------------------------------------------------------------------------------- /bin/mtx_tools.py: -------------------------------------------------------------------------------- 1 | import os 2 | import gzip 3 | import numpy as np 4 | import pandas as pd 5 | from scipy.sparse import csr_matrix 6 | from scipy.io import mmwrite 7 | import scanpy as sc 8 | 9 | def gz(fname): 10 | 11 | '''Compress file with gzip and remove source 12 | ''' 13 | 14 | with open(fname) as f_in: 15 | with gzip.open(fname + '.gz', 'wt') as f_out: 16 | f_out.writelines(f_in) 17 | 18 | os.remove(fname) 19 | 20 | return 21 | 22 | def read_sc_from_mtx(outsPath): 23 | 24 | sc_adata = sc.read_mtx(outsPath +'matrix.mtx.gz').T 25 | 26 | df_var = pd.read_csv(outsPath + 'features.tsv.gz', header=None, sep='\t', index_col=0) 27 | df_var.index.name = None 28 | sc_adata.var = df_var 29 | 30 | df_obs = pd.read_csv(outsPath + 'barcodes.tsv.gz', header=None).set_index(0) 31 | df_obs.index.name = None 32 | sc_adata.obs = df_obs 33 | 34 | print(sc_adata.shape) 35 | 36 | return sc_adata 37 | 38 | def read_mtx_combine_and_write_mtx(adata1, adata2, saveDataDir=''): 39 | 40 | if not os.path.exists(saveDataDir): 41 | os.makedirs(saveDataDir) 42 | 43 | df = pd.concat([adata1.to_df(), adata2.to_df()], axis=1).fillna(0).astype(int) 44 | 45 | obs = pd.Series(df.index) 46 | obs.to_csv(saveDataDir + '/barcodes.tsv.gz', sep='\t', index=False, header=False) 47 | 48 | var = pd.concat([adata1.var, adata2.var]).loc[df.columns].reset_index() 49 | var.to_csv(saveDataDir + '/features.tsv.gz', sep='\t', index=False, header=False) 50 | 51 | fname = saveDataDir + '/matrix.mtx' 52 | mmwrite(fname, csr_matrix(df.values.T)) 53 | gz(fname) 54 | 55 | return 56 | -------------------------------------------------------------------------------- /bin/run-conch.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pandas as pd 3 | import numpy as np 4 | import openslide 5 | import PIL 6 | import json 7 | from tqdm import tqdm 8 | 9 | import timm 10 | import torch 11 | from torchvision import transforms 12 | import torch.nn as nn 13 | from conch.open_clip_custom import create_model_from_pretrained 14 | import openslide 15 | 16 | import PIL.Image 17 | PIL.Image.MAX_IMAGE_PIXELS = None 18 | 19 | def normalizer(img, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), size=224): 20 | func = transforms.Compose([transforms.Resize(size), 21 | transforms.ToTensor(), 22 | transforms.Normalize(mean=mean, std=std)]) 23 | return func(img) 24 | 25 | if __name__ == '__main__': 26 | parser = argparse.ArgumentParser( 27 | description='Compute features of each tile') 28 | parser.add_argument('--wsi-file', dest='wsi_file', action='store', 29 | required=True, 30 | help="""The path to the whole slide image (WSI) in a format readable by openslide (e.g., svs or ndpi).""") 31 | parser.add_argument('--model-checkpoint-path', dest='modelPath', action='store', 32 | required=True, 33 | help="""Path to bin checkpoint.""") 34 | parser.add_argument('--use-conch-normalizer', dest='useCONCHnormalizer', action='store', 35 | required=False, default=False, 36 | help="""Use special normalization for CONCH, otherwise use the default normalizer""") 37 | parser.add_argument('--positions-list-file', dest='positions_list_file', action='store', 38 | required=True, 39 | help="""The positions_list.csv file output by spaceranger that has one row per spot and columns indicating whether the spot is within the tissue and its x and y coordinates in pixels.""") 40 | parser.add_argument('--scalefactors-json-file', dest='scalefactors_json_file', action='store', 41 | required=True, 42 | help="""The scalefactors_json.json file output by spaceranger that defines the spot diameter in spaceranger's full resolution (i.e., the resolution of the file input to spaceranger, which may or may not be wsi_file).""") 43 | parser.add_argument('--output-path', dest='output_path', action='store', 44 | required=True, 45 | help="""Name of _CSV_ file in which to store the feature matrix (rows are tiles, cols are features). 46 | The file will be compressed if it is named *.gz""") 47 | parser.add_argument('--tile-mask', dest='tile_mask', default=None, action='store', required=False) 48 | parser.add_argument('--downsample-expanded', dest='downsample', action='store', default=True, 49 | required=False, 50 | help="""If expansion factor is greater than 1 then downsample the tiles back to the input size""") 51 | parser.add_argument('--expansion-factor', dest='expansion', action='store', 52 | required=True, 53 | help="""Expansion factor, 1 means no expansion""") 54 | parser.add_argument('--subtiling', dest='subtiling', action='store', 55 | required=True, 56 | help="""Do subtiling""") 57 | parser.add_argument('--subcoords-factor', dest='subcoordsf', action='store', 58 | required=True, 59 | help="""Factor for subtiling subtiling""") 60 | parser.add_argument('--subcoords-list', dest='subcoords', action='store', 61 | required=True, 62 | help="""Subtiling coordinates""") 63 | 64 | args = parser.parse_args() 65 | expansion = float(args.expansion) 66 | downsample = args.downsample=='true' 67 | subtiling = args.subtiling=='true' 68 | useCONCHnormalizer = args.useCONCHnormalizer=='true' 69 | 70 | subcoordsf = int(args.subcoordsf) 71 | subcoords = json.loads(args.subcoords) 72 | 73 | if expansion == 1.0: 74 | print('Expansion factor is 1, requested downsampling:', downsample) 75 | downsample = False 76 | else: 77 | if downsample: 78 | expansion = np.ceil(expansion) 79 | print('Expansion factor rounded to next interger:', expansion) 80 | print('Tiles will be expanded and then downsampled') 81 | else: 82 | print('Expansion without downsampling is requested') 83 | 84 | wsi_file = args.wsi_file 85 | positions_list_file = args.positions_list_file 86 | scalefactors_json_file = args.scalefactors_json_file 87 | output_path = args.output_path 88 | # Read in the spaceranger positions list file 89 | pos = pd.read_csv(positions_list_file, header=None) 90 | pos.columns = ['barcode', 'in_tissue', 'array_row', 'array_col', 'pxl_row_in_fullres', 'pxl_col_in_fullres'] 91 | 92 | if args.tile_mask != 'None': 93 | print('Received tile mask %s' % args.tile_mask) 94 | mask = pd.read_csv(args.tile_mask, index_col=0, header=None) 95 | pos['in_tissue'] = mask.reindex(pos['barcode'].values).values 96 | 97 | # Read the spot diameter at spaceranger's "full resolution" from the scalefactors_json file 98 | # output by spaceranger, i.e., in the resolution of the file passed to spaceranger, which may not 99 | # be the same resolution of wsi_file. 100 | with open(scalefactors_json_file) as f: 101 | scalefactors_tbl = json.load(f) 102 | spot_diameter_fullres = scalefactors_tbl['spot_diameter_fullres'] 103 | 104 | 105 | # scale_factor = ratio of resolution of 'wsi_file' to resolution of "fullres" image input to spaceranger. 106 | # scale_factor = 4 107 | # NB: ideally, this code would accept the full resolution image along with the wsi_file and compare their sizes. 108 | # You would do that with something like (wait ... probably the full resolution image is a png/jpg/etc not openable by openslide) 109 | # full_resolution_slide = openslide.open_slide(full_resolution_file) 110 | # base_magnification = float(full_resolution_slide.properties[openslide.PROPERTY_NAME_OBJECTIVE_POWER]) 111 | scale_factor = 1 112 | # Define the spot diameter in the resolution of the wsi_file 113 | spot_diameter_wsi = round(spot_diameter_fullres * scale_factor) 114 | # Translate the pixel coordinates from full resolution to the resolution of the wsi 115 | pos['pxl_row_in_wsi'] = pos.pxl_row_in_fullres * scale_factor 116 | pos['pxl_col_in_wsi'] = pos.pxl_col_in_fullres * scale_factor 117 | # Create the inception v3 model 118 | num_dimensions = 3 119 | 120 | if downsample: 121 | num_rows = num_cols = round(spot_diameter_wsi) 122 | else: 123 | num_rows = num_cols = round(spot_diameter_wsi * expansion) 124 | 125 | # Load pre-trained CONCH model 126 | model, normalizerCONCH = create_model_from_pretrained("conch_ViT-B-16", checkpoint_path=args.modelPath) 127 | model.eval() 128 | 129 | # Use special normalization for CONCH, otherwise use the default normalizer 130 | if useCONCHnormalizer: 131 | normalizer = normalizerCONCH 132 | 133 | num_images = len(pos) 134 | batch_size = int(10**8 / (float(args.expansion) * float(args.expansion) * num_cols * num_rows)) 135 | if subtiling: 136 | batch_size = int(batch_size / 5) 137 | num_batches = int(np.ceil(num_images / batch_size)) 138 | 139 | print('Reading and pocessing tiles:', num_images) 140 | print('Batch size:', batch_size) 141 | print('Number of batches:', num_batches) 142 | 143 | slide = openslide.open_slide(wsi_file) 144 | 145 | w = num_cols 146 | h = num_rows 147 | lvl = 0 148 | features = [] 149 | for ibatch in tqdm(range(num_batches)): 150 | images = [] 151 | for indx in range(batch_size): 152 | try: 153 | cy = pos.loc[indx + ibatch*batch_size, 'pxl_row_in_wsi'] 154 | cx = pos.loc[indx + ibatch*batch_size, 'pxl_col_in_wsi'] 155 | 156 | if pos.loc[indx + ibatch*batch_size, 'in_tissue']: 157 | if downsample: 158 | ew = round(w * expansion) 159 | eh = round(h * expansion) 160 | else: 161 | ew = w 162 | eh = h 163 | 164 | img = np.array(slide.read_region((int(cx - ew / 2), int(cy - eh / 2)), lvl, (int(ew), int(eh))).convert('RGB')) 165 | 166 | if subtiling: 167 | a = int(np.floor(img.shape[0]/subcoordsf)) 168 | b = int(np.floor(img.shape[1]/subcoordsf)) 169 | for i, j in subcoords: 170 | subimg = img[a*(i-1): a*(i+1), b*(i-1): b*(i+1), :] 171 | images.append(subimg) 172 | else: 173 | # The downsampling is done to save memory 174 | if downsample: 175 | img = img[::int(expansion), ::int(expansion), :] 176 | assert (img.shape[0], img.shape[1])==(w, h), 'Wrong tile dimensions after downsampling!' 177 | 178 | images.append(img) 179 | 180 | except Exception as exception: 181 | #print(exception) 182 | pass 183 | print('Number of tiles:', len(images)) 184 | 185 | if len(images)>0: 186 | images = torch.cat([normalizer(PIL.Image.fromarray(image))[None, :, :, :] for image in images], 0) 187 | with torch.inference_mode(): 188 | temp_features = model.encode_image(images, proj_contrast=False, normalize=False).cpu().numpy() 189 | 190 | # Average the subtiles, e.g., every 5 subtiles 191 | if subtiling: 192 | df_temp = pd.DataFrame(temp_features) 193 | temp_features = df_temp.groupby(np.arange(len(df_temp.index))//len(subcoords)).mean().values 194 | 195 | features.append(temp_features) 196 | 197 | features = np.vstack(features) 198 | 199 | # Convert the dictionary of features to a dataframe and name its columns featXXX 200 | df_features = pd.DataFrame(features) 201 | df_features.columns = [f'feat_conch_' + str(i) for i in range(df_features.shape[1])] 202 | df_features.index = pos.loc[pos['in_tissue']==1].index 203 | 204 | # Append the spot position information to each row 205 | tbl = pd.concat([pos.loc[pos['in_tissue']==1], df_features], axis=1) 206 | print(tbl) 207 | 208 | # Output the features with spot information 209 | ## This will automatically compress if the file suffix is .gz 210 | 211 | tbl.to_csv(output_path + '.tsv.gz', index=False) 212 | print('Successfully wrote ' + output_path) 213 | 214 | exit(0) 215 | -------------------------------------------------------------------------------- /bin/run-inception-v3-tiles.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import pandas as pd 4 | import numpy as np 5 | import tensorflow as tf 6 | from tqdm import tqdm 7 | import matplotlib.pyplot as plt 8 | 9 | if __name__ == '__main__': 10 | parser = argparse.ArgumentParser(description='Compute Inception V3 features on tiles') 11 | parser.add_argument('--input-path', dest='input_path', action='store', 12 | required=True, 13 | help="""The path to the tiles from a whole slide image (WSI) in a TIF format.""") 14 | parser.add_argument('--output-path', dest='output_path', action='store', 15 | required=True, 16 | help="""Name of CSV file in which to store the feature matrix (rows are tiles, cols are features). 17 | The file will be compressed if it is named *.gz""") 18 | args = parser.parse_args() 19 | 20 | output_path = args.output_path 21 | input_path = args.input_path 22 | 23 | # Make tile names list 24 | fnames = [fname for fname in os.listdir(input_path) if fname[-len('.tif'):]=='.tif'] 25 | num_images = len(fnames) 26 | print("Number of images:", num_images) 27 | 28 | # Assuming that all tiles have the same shape 29 | # Read the first tile and create a model 30 | tile = plt.imread(input_path + fnames[0]) 31 | 32 | base_model = tf.keras.applications.inception_v3.InceptionV3(include_top=False, weights='imagenet', input_shape=tile.shape) 33 | xi = base_model.output 34 | xi = tf.keras.layers.GlobalAveragePooling2D(data_format=None)(xi) 35 | model = tf.keras.models.Model(inputs=base_model.input, outputs=xi) 36 | 37 | batch_size = int(10**8 / (tile.shape[0] * tile.shape[1])) 38 | num_batches = int(np.ceil(num_images / batch_size)) 39 | 40 | print('Reading and pocessing tiles:', num_images) 41 | print('Batch size:', batch_size) 42 | print('Number of batches:', num_batches) 43 | 44 | features = [] 45 | for ibatch in tqdm(range(num_batches)): 46 | images = [] 47 | for indx in range(batch_size): 48 | try: 49 | images.append(plt.imread(input_path + fnames[indx + ibatch*batch_size])) 50 | except: 51 | pass 52 | 53 | features.append(model.predict(tf.keras.applications.inception_v3.preprocess_input(np.stack(images)), verbose=0)) 54 | features = np.vstack(features) 55 | 56 | df_features = pd.DataFrame(data=features, index=[fname[:-len('.tif')] for fname in fnames], columns=['feat' + str(i) for i in range(features.shape[1])]) 57 | print(df_features) 58 | 59 | if not os.path.exists(os.path.dirname(output_path)): 60 | os.makedirs(os.path.dirname(output_path)) 61 | 62 | df_features.to_csv(output_path) 63 | print('Successfully wrote:' + output_path) 64 | 65 | exit(0) 66 | -------------------------------------------------------------------------------- /bin/run-inception-v3.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import pandas as pd 4 | import numpy as np 5 | import cv2 6 | import tensorflow as tf 7 | import openslide 8 | from pathlib import Path 9 | import itertools 10 | import PIL 11 | import json 12 | from scipy.ndimage import gaussian_filter 13 | from tqdm import tqdm 14 | import time 15 | 16 | import PIL.Image 17 | PIL.Image.MAX_IMAGE_PIXELS = None 18 | 19 | if __name__ == '__main__': 20 | parser = argparse.ArgumentParser( 21 | description='Compute Inception V3 features on tiles that cover a _single_ spatial transcriptomic spot') 22 | parser.add_argument('--wsi-file', dest='wsi_file', action='store', 23 | required=True, 24 | help="""The path to the whole slide image (WSI) in a format readable by openslide (e.g., svs or ndpi).""") 25 | parser.add_argument('--positions-list-file', dest='positions_list_file', action='store', 26 | required=True, 27 | help="""The positions_list.csv file output by spaceranger that has one row per spot and columns indicating whether the spot is within the tissue and its x and y coordinates in pixels.""") 28 | parser.add_argument('--scalefactors-json-file', dest='scalefactors_json_file', action='store', 29 | required=True, 30 | help="""The scalefactors_json.json file output by spaceranger that defines the spot diameter in spaceranger's full resolution (i.e., the resolution of the file input to spaceranger, which may or may not be wsi_file).""") 31 | parser.add_argument('--output-path', dest='output_path', action='store', 32 | required=True, 33 | help="""Name of _CSV_ file in which to store the feature matrix (rows are tiles, cols are features). 34 | The file will be compressed if it is named *.gz""") 35 | parser.add_argument('--tile-mask', dest='tile_mask', default=None, action='store', required=False) 36 | parser.add_argument('--downsample-expanded', dest='downsample', action='store', default=True, 37 | required=False, 38 | help="""If expansion factor is greater than 1 then downsample the tiles back to the input size""") 39 | parser.add_argument('--expansion-factor', dest='expansion', action='store', 40 | required=True, 41 | help="""Expansion factor, 1 means no expansion""") 42 | args = parser.parse_args() 43 | expansion = float(args.expansion) 44 | downsample = args.downsample=='true' 45 | 46 | if expansion == 1.0: 47 | print('Expansion factor is 1, requested downsampling:', downsample) 48 | downsample = False 49 | else: 50 | if downsample: 51 | expansion = np.ceil(expansion) 52 | print('Expansion factor rounded to next interger:', expansion) 53 | print('Tiles will be expanded and then downsampled') 54 | else: 55 | print('Expansion without downsampling is requested') 56 | 57 | wsi_file = args.wsi_file 58 | positions_list_file = args.positions_list_file 59 | scalefactors_json_file = args.scalefactors_json_file 60 | output_path = args.output_path 61 | # Read in the spaceranger positions list file 62 | pos = pd.read_csv(positions_list_file, header=None) 63 | pos.columns = ['barcode', 'in_tissue', 'array_row', 'array_col', 'pxl_row_in_fullres', 'pxl_col_in_fullres'] 64 | 65 | if args.tile_mask != 'None': 66 | print('Received tile mask %s' % args.tile_mask) 67 | mask = pd.read_csv(args.tile_mask, index_col=0, header=None) 68 | pos['in_tissue'] = mask.reindex(pos['barcode'].values).values 69 | 70 | # Read the spot diameter at spaceranger's "full resolution" from the scalefactors_json file 71 | # output by spaceranger, i.e., in the resolution of the file passed to spaceranger, which may not 72 | # be the same resolution of wsi_file. 73 | with open(scalefactors_json_file) as f: 74 | scalefactors_tbl = json.load(f) 75 | spot_diameter_fullres = scalefactors_tbl['spot_diameter_fullres'] 76 | 77 | 78 | # scale_factor = ratio of resolution of 'wsi_file' to resolution of "fullres" image input to spaceranger. 79 | # scale_factor = 4 80 | # NB: ideally, this code would accept the full resolution image along with the wsi_file and compare their sizes. 81 | # You would do that with something like (wait ... probably the full resolution image is a png/jpg/etc not openable by openslide) 82 | # full_resolution_slide = openslide.open_slide(full_resolution_file) 83 | # base_magnification = float(full_resolution_slide.properties[openslide.PROPERTY_NAME_OBJECTIVE_POWER]) 84 | scale_factor = 1 85 | # Define the spot diameter in the resolution of the wsi_file 86 | spot_diameter_wsi = round(spot_diameter_fullres * scale_factor) 87 | # Translate the pixel coordinates from full resolution to the resolution of the wsi 88 | pos['pxl_row_in_wsi'] = pos.pxl_row_in_fullres * scale_factor 89 | pos['pxl_col_in_wsi'] = pos.pxl_col_in_fullres * scale_factor 90 | # Create the inception v3 model 91 | num_dimensions = 3 92 | 93 | if downsample: 94 | num_rows = num_cols = round(spot_diameter_wsi) 95 | else: 96 | num_rows = num_cols = round(spot_diameter_wsi * expansion) 97 | print('Model image size:', num_rows, num_cols) 98 | 99 | base_modeli = tf.keras.applications.inception_v3.InceptionV3(include_top=False, weights='imagenet', 100 | input_shape=(num_rows, num_cols, num_dimensions), 101 | classes=2) 102 | xi = base_modeli.output 103 | xi = tf.keras.layers.GlobalAveragePooling2D(data_format=None)(xi) 104 | model = tf.keras.models.Model(inputs=base_modeli.input, outputs=xi) 105 | 106 | num_images = len(pos) 107 | batch_size = int(10**8 / (float(args.expansion) * float(args.expansion) * num_cols * num_rows)) 108 | num_batches = int(np.ceil(num_images / batch_size)) 109 | 110 | print('Reading and pocessing tiles:', num_images) 111 | print('Batch size:', batch_size) 112 | print('Number of batches:', num_batches) 113 | 114 | slide = openslide.open_slide(wsi_file) 115 | 116 | w = num_cols 117 | h = num_rows 118 | lvl = 0 119 | features = [] 120 | for ibatch in tqdm(range(num_batches)): 121 | sT = time.time() 122 | images = [] 123 | for indx in range(batch_size): 124 | try: 125 | cy = pos.loc[indx + ibatch*batch_size, 'pxl_row_in_wsi'] 126 | cx = pos.loc[indx + ibatch*batch_size, 'pxl_col_in_wsi'] 127 | if pos.loc[indx + ibatch*batch_size, 'in_tissue']: 128 | if downsample: 129 | ew = round(w * expansion) 130 | eh = round(h * expansion) 131 | else: 132 | ew = w 133 | eh = h 134 | 135 | img = np.array(slide.read_region((int(cx - ew / 2), int(cy - eh / 2)), lvl, (int(ew), int(eh))).convert('RGB')) 136 | 137 | if downsample: 138 | img = img[::int(expansion), ::int(expansion), :] 139 | assert (img.shape[0], img.shape[1])==(w, h), 'Wrong tile dimensions after downsampling!' 140 | 141 | images.append(img) 142 | except Exception as exception: 143 | #print(exception) 144 | pass 145 | print('Block 1:', time.time() - sT) 146 | print('Number of tiles:', len(images)) 147 | 148 | sT = time.time() 149 | if len(images)>0: 150 | features.append(model.predict(tf.keras.applications.inception_v3.preprocess_input(np.stack(images)), verbose=0)) 151 | print('Block 2:', time.time() - sT) 152 | 153 | features = np.vstack(features) 154 | 155 | # Convert the dictionary of features to a dataframe and name its columns featXXX 156 | df_features = pd.DataFrame(features) 157 | df_features.columns = ['feat_InceptionV3_' + str(i) for i in range(df_features.shape[1])] 158 | df_features.index = pos.loc[pos['in_tissue']==1].index 159 | 160 | # Append the spot position information to each row 161 | tbl = pd.concat([pos.loc[pos['in_tissue']==1], df_features], axis=1) 162 | print(tbl) 163 | 164 | # Output the features with spot information 165 | ## This will automatically compress if the file suffix is .gz 166 | 167 | tbl.to_csv(output_path + '.tsv.gz', index=False) 168 | print('Successfully wrote ' + output_path) 169 | 170 | exit(0) 171 | -------------------------------------------------------------------------------- /bin/run-uni.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pandas as pd 3 | import numpy as np 4 | import openslide 5 | import PIL 6 | import json 7 | from tqdm import tqdm 8 | 9 | import timm 10 | import torch 11 | from torchvision import transforms 12 | import torch.nn as nn 13 | import openslide 14 | 15 | import PIL.Image 16 | PIL.Image.MAX_IMAGE_PIXELS = None 17 | 18 | def normalizer(img, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), size=224): 19 | func = transforms.Compose([transforms.Resize(size), 20 | transforms.ToTensor(), 21 | transforms.Normalize(mean=mean, std=std)]) 22 | return func(img) 23 | 24 | if __name__ == '__main__': 25 | parser = argparse.ArgumentParser( 26 | description='Compute features of each tile') 27 | parser.add_argument('--wsi-file', dest='wsi_file', action='store', 28 | required=True, 29 | help="""The path to the whole slide image (WSI) in a format readable by openslide (e.g., svs or ndpi).""") 30 | parser.add_argument('--model-checkpoint-path', dest='modelPath', action='store', 31 | required=True, 32 | help="""Path to bin checkpoint.""") 33 | parser.add_argument('--positions-list-file', dest='positions_list_file', action='store', 34 | required=True, 35 | help="""The positions_list.csv file output by spaceranger that has one row per spot and columns indicating whether the spot is within the tissue and its x and y coordinates in pixels.""") 36 | parser.add_argument('--scalefactors-json-file', dest='scalefactors_json_file', action='store', 37 | required=True, 38 | help="""The scalefactors_json.json file output by spaceranger that defines the spot diameter in spaceranger's full resolution (i.e., the resolution of the file input to spaceranger, which may or may not be wsi_file).""") 39 | parser.add_argument('--output-path', dest='output_path', action='store', 40 | required=True, 41 | help="""Name of _CSV_ file in which to store the feature matrix (rows are tiles, cols are features). 42 | The file will be compressed if it is named *.gz""") 43 | parser.add_argument('--tile-mask', dest='tile_mask', default=None, action='store', required=False) 44 | parser.add_argument('--downsample-expanded', dest='downsample', action='store', default=True, 45 | required=False, 46 | help="""If expansion factor is greater than 1 then downsample the tiles back to the input size""") 47 | parser.add_argument('--expansion-factor', dest='expansion', action='store', 48 | required=True, 49 | help="""Expansion factor, 1 means no expansion""") 50 | parser.add_argument('--subtiling', dest='subtiling', action='store', 51 | required=True, 52 | help="""Do subtiling""") 53 | parser.add_argument('--subcoords-factor', dest='subcoordsf', action='store', 54 | required=True, 55 | help="""Factor for subtiling subtiling""") 56 | parser.add_argument('--subcoords-list', dest='subcoords', action='store', 57 | required=True, 58 | help="""Subtiling coordinates""") 59 | 60 | args = parser.parse_args() 61 | expansion = float(args.expansion) 62 | downsample = args.downsample=='true' 63 | subtiling = args.subtiling=='true' 64 | 65 | subcoordsf = int(args.subcoordsf) 66 | subcoords = json.loads(args.subcoords) 67 | 68 | if expansion == 1.0: 69 | print('Expansion factor is 1, requested downsampling:', downsample) 70 | downsample = False 71 | else: 72 | if downsample: 73 | expansion = np.ceil(expansion) 74 | print('Expansion factor rounded to next interger:', expansion) 75 | print('Tiles will be expanded and then downsampled') 76 | else: 77 | print('Expansion without downsampling is requested') 78 | 79 | wsi_file = args.wsi_file 80 | positions_list_file = args.positions_list_file 81 | scalefactors_json_file = args.scalefactors_json_file 82 | output_path = args.output_path 83 | # Read in the spaceranger positions list file 84 | pos = pd.read_csv(positions_list_file, header=None) 85 | pos.columns = ['barcode', 'in_tissue', 'array_row', 'array_col', 'pxl_row_in_fullres', 'pxl_col_in_fullres'] 86 | 87 | if args.tile_mask != 'None': 88 | print('Received tile mask %s' % args.tile_mask) 89 | mask = pd.read_csv(args.tile_mask, index_col=0, header=None) 90 | pos['in_tissue'] = mask.reindex(pos['barcode'].values).values 91 | 92 | # Read the spot diameter at spaceranger's "full resolution" from the scalefactors_json file 93 | # output by spaceranger, i.e., in the resolution of the file passed to spaceranger, which may not 94 | # be the same resolution of wsi_file. 95 | with open(scalefactors_json_file) as f: 96 | scalefactors_tbl = json.load(f) 97 | spot_diameter_fullres = scalefactors_tbl['spot_diameter_fullres'] 98 | 99 | 100 | # scale_factor = ratio of resolution of 'wsi_file' to resolution of "fullres" image input to spaceranger. 101 | # scale_factor = 4 102 | # NB: ideally, this code would accept the full resolution image along with the wsi_file and compare their sizes. 103 | # You would do that with something like (wait ... probably the full resolution image is a png/jpg/etc not openable by openslide) 104 | # full_resolution_slide = openslide.open_slide(full_resolution_file) 105 | # base_magnification = float(full_resolution_slide.properties[openslide.PROPERTY_NAME_OBJECTIVE_POWER]) 106 | scale_factor = 1 107 | # Define the spot diameter in the resolution of the wsi_file 108 | spot_diameter_wsi = round(spot_diameter_fullres * scale_factor) 109 | # Translate the pixel coordinates from full resolution to the resolution of the wsi 110 | pos['pxl_row_in_wsi'] = pos.pxl_row_in_fullres * scale_factor 111 | pos['pxl_col_in_wsi'] = pos.pxl_col_in_fullres * scale_factor 112 | # Create the inception v3 model 113 | num_dimensions = 3 114 | 115 | if downsample: 116 | num_rows = num_cols = round(spot_diameter_wsi) 117 | else: 118 | num_rows = num_cols = round(spot_diameter_wsi * expansion) 119 | 120 | # Load pre-trained UNI model 121 | model = timm.create_model("vit_large_patch16_224", img_size=224, patch_size=16, init_values=1e-5, num_classes=0, dynamic_img_size=True) 122 | model.load_state_dict(torch.load(args.modelPath, map_location="cpu"), strict=True) 123 | model.eval() 124 | 125 | num_images = len(pos) 126 | batch_size = int(10**8 / (float(args.expansion) * float(args.expansion) * num_cols * num_rows)) 127 | if subtiling: 128 | batch_size = int(batch_size / 5) 129 | num_batches = int(np.ceil(num_images / batch_size)) 130 | 131 | print('Reading and pocessing tiles:', num_images) 132 | print('Batch size:', batch_size) 133 | print('Number of batches:', num_batches) 134 | 135 | slide = openslide.open_slide(wsi_file) 136 | 137 | w = num_cols 138 | h = num_rows 139 | lvl = 0 140 | features = [] 141 | for ibatch in tqdm(range(num_batches)): 142 | images = [] 143 | for indx in range(batch_size): 144 | try: 145 | cy = pos.loc[indx + ibatch*batch_size, 'pxl_row_in_wsi'] 146 | cx = pos.loc[indx + ibatch*batch_size, 'pxl_col_in_wsi'] 147 | 148 | if pos.loc[indx + ibatch*batch_size, 'in_tissue']: 149 | if downsample: 150 | ew = round(w * expansion) 151 | eh = round(h * expansion) 152 | else: 153 | ew = w 154 | eh = h 155 | 156 | img = np.array(slide.read_region((int(cx - ew / 2), int(cy - eh / 2)), lvl, (int(ew), int(eh))).convert('RGB')) 157 | 158 | if subtiling: 159 | a = int(np.floor(img.shape[0]/subcoordsf)) 160 | b = int(np.floor(img.shape[1]/subcoordsf)) 161 | for i, j in subcoords: 162 | subimg = img[a*(i-1): a*(i+1), b*(i-1): b*(i+1), :] 163 | images.append(subimg) 164 | else: 165 | # The downsampling is done to save memory 166 | if downsample: 167 | img = img[::int(expansion), ::int(expansion), :] 168 | assert (img.shape[0], img.shape[1])==(w, h), 'Wrong tile dimensions after downsampling!' 169 | 170 | images.append(img) 171 | 172 | except Exception as exception: 173 | #print(exception) 174 | pass 175 | print('Number of tiles:', len(images)) 176 | 177 | if len(images)>0: 178 | images = torch.cat([normalizer(PIL.Image.fromarray(image))[None, :, :, :] for image in images], 0) 179 | with torch.inference_mode(): 180 | temp_features = model(images).cpu().numpy() 181 | 182 | # Average the subtiles, e.g., every 5 subtiles 183 | if subtiling: 184 | df_temp = pd.DataFrame(temp_features) 185 | temp_features = df_temp.groupby(np.arange(len(df_temp.index))//len(subcoords)).mean().values 186 | 187 | features.append(temp_features) 188 | 189 | features = np.vstack(features) 190 | 191 | # Convert the dictionary of features to a dataframe and name its columns featXXX 192 | df_features = pd.DataFrame(features) 193 | df_features.columns = [f'feat_uni_' + str(i) for i in range(df_features.shape[1])] 194 | df_features.index = pos.loc[pos['in_tissue']==1].index 195 | 196 | # Append the spot position information to each row 197 | tbl = pd.concat([pos.loc[pos['in_tissue']==1], df_features], axis=1) 198 | print(tbl) 199 | 200 | # Output the features with spot information 201 | ## This will automatically compress if the file suffix is .gz 202 | 203 | tbl.to_csv(output_path + '.tsv.gz', index=False) 204 | print('Successfully wrote ' + output_path) 205 | 206 | exit(0) 207 | -------------------------------------------------------------------------------- /bin/superpixelation.py: -------------------------------------------------------------------------------- 1 | # Prepared by Domanskyi 2 | # The superpixelation is done by patches 3 | # Each patch plot of superpixels is generated, small superpixels' identifiers are not shown 4 | 5 | import os 6 | import argparse 7 | import tifffile 8 | import numpy as np 9 | from tqdm import tqdm 10 | from PIL import Image 11 | import matplotlib.pyplot as plt 12 | import matplotlib.patheffects as path_effects 13 | from skimage.segmentation import mark_boundaries 14 | 15 | import PIL.Image 16 | PIL.Image.MAX_IMAGE_PIXELS = None 17 | 18 | import skimage 19 | from pysnic.algorithms.snic import snic 20 | 21 | def infere_spx(im_down_patch, target_number_of_segments='auto', pixels_per_segment=10000, compactness=1): 22 | 23 | lab_image = skimage.color.rgb2lab(im_down_patch).tolist() 24 | 25 | if target_number_of_segments == 'auto': 26 | target_number_of_segments = int(im_down_patch.shape[0] * im_down_patch.shape[1] / pixels_per_segment) 27 | 28 | segmentation, _, centroids = snic(lab_image, target_number_of_segments, compactness, update_func=None) 29 | segmentation = np.array(segmentation) 30 | 31 | return segmentation 32 | 33 | def plot_all_spx_nf(im_down, seg, seg_id='', fontcolor='k', fontsize=8, fontweight='demibold', 34 | figsize=(10, 10), boundaries_color=(1, 0, 0), min_size=500, 35 | pe=path_effects.Stroke(linewidth=2, foreground='w')): 36 | 37 | if im_down.shape[0] > im_down.shape[1]: 38 | figsize = figsize[0] * im_down.shape[1] / im_down.shape[0], figsize[1] 39 | else: 40 | figsize = figsize[0], figsize[1] * im_down.shape[0] / im_down.shape[1] 41 | 42 | fig, ax = plt.subplots(figsize=figsize) 43 | ax.imshow(mark_boundaries(im_down, seg, color=boundaries_color)) 44 | 45 | for s in np.unique(seg.ravel()): 46 | wh = np.array(np.where(seg==s)) 47 | if len(wh[0]) >= min_size: 48 | m = wh.mean(axis=1) 49 | params = dict(va='center', ha='center', color=fontcolor, fontsize=fontsize, fontweight=fontweight) 50 | ltext = ax.text(m[1], m[0], s, **params) 51 | ltext.set_path_effects([pe, path_effects.Normal()]) 52 | 53 | ax.set_aspect('equal') 54 | ax.axis('off') 55 | fig.tight_layout() 56 | 57 | plt.savefig(f'superpixelation_{seg_id}.png', facecolor='w', dpi=100, pad_inches=0.01) 58 | plt.close(fig) 59 | 60 | return 61 | 62 | if __name__ == '__main__': 63 | 64 | parser = argparse.ArgumentParser() 65 | parser.add_argument("--inputImagePath", type=str, required=True, help="input image name") 66 | parser.add_argument("--segmentationSavePath", type=str, required=True, help="output file name") 67 | parser.add_argument('--s', type=int, default=4096, help='patch size') 68 | parser.add_argument('--compactness', required=True, type=float) 69 | parser.add_argument('--pixelsPerSegment', required=True, type=int) 70 | parser.add_argument('--downsamplingFactor', required=True, type=int) 71 | args = parser.parse_args() 72 | 73 | print('s:', args.s) 74 | 75 | # If the image is in 40x, the downsampling_factor 4 will bring the resolution to 10x 76 | img = np.array(tifffile.imread(args.inputImagePath))[::args.downsamplingFactor, ::args.downsamplingFactor, :3] 77 | print(img.shape) 78 | 79 | ## Save downsampled image 80 | #print('Saving downsampled image') 81 | #tifffile.imwrite(args.outputImagePath, img, bigtiff=True) 82 | #print('Done') 83 | 84 | dims = img.shape[0], img.shape[1] 85 | 86 | # Prepare image patches' coordinates 87 | r = [np.append(args.s*np.array(range(0, int(np.floor(dims[i]/args.s))+1)), [dims[i]]) for i in range(2)] 88 | coords = [(i,j) for i in range(len(r[0])-1) for j in range(len(r[1])-1)] 89 | print(coords) 90 | 91 | segmentation = np.zeros(dims, dtype=np.int32) 92 | for ipatch, (i, j) in enumerate(tqdm(coords)): 93 | try: 94 | seg_patch = infere_spx(img[r[0][i]:r[0][i+1], r[1][j]:r[1][j+1], :], 95 | pixels_per_segment=args.pixelsPerSegment, 96 | compactness=args.compactness) 97 | 98 | # There are less than 1000 superpixels in each patch, less than 1000 patches 99 | # Make each superpixel id unique 100 | seg_patch += ipatch * 10000 101 | 102 | segmentation[r[0][i]:r[0][i+1], r[1][j]:r[1][j+1]] = seg_patch 103 | except Exception as exception: 104 | print('Superpixel ERROR:', exception) 105 | 106 | try: 107 | plot_all_spx_nf(img[r[0][i]:r[0][i+1], r[1][j]:r[1][j+1], :], 108 | segmentation[r[0][i]:r[0][i+1], r[1][j]:r[1][j+1]], 109 | seg_id=ipatch * 10000) 110 | except Exception as exception: 111 | print('Superpixel plot ERROR:', exception) 112 | 113 | # Save segmentation mask 114 | print('\nSegmentation:', segmentation.shape) 115 | with open(args.segmentationSavePath, 'wb') as tempfile: 116 | np.save(tempfile, segmentation) 117 | 118 | exit(0) 119 | -------------------------------------------------------------------------------- /check.sh: -------------------------------------------------------------------------------- 1 | version=(`nextflow -v`) 2 | major=$(echo ${version[2]} | cut -d. -f1) 3 | 4 | if [ "$major" -lt "24" ]; then 5 | read -p "Update nextflow to use the pipeline. Proceed? (y/n): " confirm && [[ $confirm == [yY] ]] && nextflow self-update 6 | fi -------------------------------------------------------------------------------- /conf/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Description of the pipeline parameters 3 | 4 | To find out the default value for each parameter, see `conf/analysis.config`. 5 | 6 | ### Sequencing analysis parameters 7 | 8 | + **`do_merge_mtx`** Merge graft and host MTX (gene by spot) matrices into one MTX matrix 9 | 10 | + **`do_splicing_quantification`** Run splicing quantification with velocyto. The pipeline also sorts by cell barcodes the BAM file produced by Space Ranger. 11 | 12 | + **`do_snv_extract`** Run the BAF extraction sub-workflow to get bulk-level SNV. 13 | 14 | + **`reference_genome`** Path to the reference genome to use for Space Ranger reads alignment in one-reference analysis route. See https://support.10xgenomics.com/single-cell-gene-expression/software/release-notes/build for Space Ranger requirements of the reference genomes. 15 | 16 | + **`mouse_reference_genome`** Path to the mouse reference genome for Space Ranger reads alignment in two-reference analysis route. 17 | 18 | + **`human_reference_genome`** Path to the human reference genome for Space Ranger reads alignment in two-reference analysis route. 19 | 20 | + **`deconvolution_reference_graft`** Path to a graft (e.g., human) reference genome (e.g., *.fa, *.fna, *.fa.gz, *.fna.gz) to build xenome or xengsort indices. If the indices supplied in `nextflow.config` already exits, then this parameter is ignored. 21 | 22 | + **`deconvolution_reference_host`** Path to a host (e.g., mouse) reference genome (e.g., *.fa, *.fna, *.fa.gz, *.fna.gz) to build xenome or xengsort indices. If the indices supplied in `nextflow.config` already exits, then this parameter is ignored. 23 | 24 | + **`deconvolution_kmer_size`** K-mer size for building xenome or xengsort indices. See https://github.com/data61/gossamer/blob/master/docs/xenome.md for a detailed description. 25 | 26 | + **`deconvolution_indices_path`** Path to save deconvolution indices. 27 | 28 | + **`deconvolution_indices_name`** Name of the indices. 29 | 30 | + **`xengsort_n`** Xengsort-specific parameter. See https://gitlab.com/genomeinformatics/xengsort for details. 31 | 32 | 33 | ##### See https://github.com/akdess/BAFExtract for the description of the following filtering parameters: 34 | 35 | + **`bafextract_minimum_mapping_quality`** 36 | 37 | + **`bafextract_minimum_base_quality`** 38 | 39 | + **`bafextract_min_coverage_per_SNV`** 40 | 41 | + **`bafextract_min_MAF_covg_per_SNV`** 42 | 43 | + **`bafextract_min_MAF`** 44 | 45 | 46 | ### Imaging analysis parameters 47 | 48 | + **`do_img_subworkflow`** Run the imaging sub-workflow to generate imaging and nuclear morphometric features for each spot on the grid. 49 | 50 | + **`short_workflow`** Run short imaging workflow instead of the full imaging workflow. See config for details. 51 | 52 | + **`do_imaging_anndata`** Create an AnnData object (e.g., for use with Scanpy) from the *.csv.gz data file with imaging and nuclear morphometric features 53 | 54 | + **`do_nuclear_sementation`** Perform nuclear segmentation (use either HoVer-Net or StarDist to segment nuclei) of the entire WSI. 55 | 56 | + **`target_mpp`** desired image resolution for scaling the images. Note that specific DL and ML models require full-resolution images, and the supplied pre-trained models are designed for images with a resolution of around 0.25 (mpp). In case a low-magnification image is supplied (e.g., mpp is 0.5) while target_mpp is 0.25, the image is upsampled and will have doubled dimensions. 57 | 58 | + **`tiled_tiff_tile_size`** The TIFF WSI is internally stored in blocks (for memory management). The tile size determines the block size. This parameter is not the size of tiles used for feature extraction or segmentation aggregation. The grid parameter `grid_spot_diamter` (in micrometers) and resolution parameter `target_mpp` define the scaled image tile size. 59 | 60 | + **`thumbnail_downsample_factor`** A factor used to reduce the WSI dimensions to create a low-resolution slide representation. 61 | 62 | + **`check_focus`** Run DeepFocus module to assess focus (blurryness) of the whole slide image. 63 | 64 | + **`deepfocus_model_path`** Path to DeepFocus checkpoint to use. 65 | 66 | 67 | 68 | + **`stain_normalization`** Whether to do any stain or color normalization. 69 | 70 | + **`stainnet`** Path to checkpoint for stain normalization model. 71 | 72 | + **`macenko_normalization`** If true, then use Macenko stain normalization. If false, use StainNet color normalization. This parameter is ignored if `stain_normalization` is false. 73 | 74 | + **`stain_reference_image`** Reference image (or a small patch, e.g., 2000 by 2000 pixels) to use with Macenko stain normalization. 75 | 76 | + **`stain_patch_size`** Macenco stain normalization patch size. 77 | 78 | 79 | + **`mask_background_cutoff`** Parameter for detecting image background with HoVer-Net. 80 | 81 | + **`pixel_mask_threshold_low`** Parameter for detecting tissue pixels on the low-resolution image. 82 | 83 | + **`pixel_mask_threshold_high`** Parameter for detecting tissue pixels on the low-resolution image. 84 | 85 | + **`fraction_for_mask`** Fraction of pixels in tissue required to call tile in tissue. 86 | 87 | 88 | + **`use_provided_grid`** Whether to use the grid provided in the input sample sheet. If false and no Space Ranger alignment is done, then a new grid of tiles is generated based on the grid parameters. 89 | 90 | + **`grid_type`** Type of the grid of tiles to generate. it can be hex, square, or random. 91 | 92 | + **`grid_spot_diamter`** Diameter of the spot (dimension of a tile) in micrometers. 93 | 94 | + **`grid_spot_horizontal_spacing`** Horizontal center-to-center distance between adjacent spots (or tiles). 95 | 96 | + **`grid_aspect_correction`** Factor to correct Visium slide aspect ratio. 97 | 98 | 99 | + **`overlap_scale_factor`** Imaging features extraction parameter. If the factor is 1, then features are extracted from the tile of the ST spot dimension. 100 | 101 | 102 | + **`hovernet_segmentation`** Do HoVer-Net segmetation. If false do StarDist segmentation. 103 | 104 | + **`nuclei_segmentation_dir`** name of directory to save segmentation. 105 | 106 | + **`hovernet_batch_size`** Parameter of HoVer-Net segmetation. This parameter is ignored when segmentation is done with StarDist. 107 | 108 | + **`hovernet_num_inference_workers`** Parameter of HoVer-Net segmetation. This parameter is ignored when segmentation is done with StarDist. 109 | 110 | + **`hovernet_chunk_size`** Parameter of HoVer-Net segmetation. This parameter is ignored when segmentation is done with StarDist. 111 | 112 | + **`hovernet_tile_size`** Parameter of HoVer-Net segmetation. This parameter is ignored when segmentation is done with StarDist. 113 | 114 | + **`stardist_model`** Path to checkpoint of stardist model. 115 | 116 | + **`stardist_block_size`** Size of the image block to run segmentation. Blocks are merged internally at the end of segmentation. 117 | 118 | + **`stardist_expand_size`** Size of cytoplasm arouhd nucleus in pixels. 119 | 120 | 121 | 122 | + **`hovernet_spot_assignment_factor`** Used for either HoVer-Net or StarDist segmentation postprocessing. Scaling factor of the boundary limiting the inclusion of nuclei to an ST spot. A value of 1 means the boundary size equals ST spot size. 123 | 124 | + **`hovernet_spot_assignment_shape`** Used for either HoVer-Net or StarDist segmentation postprocessing. The shape of the boundary, either square or disk. 125 | 126 | + **`hovernet_min_cell_type_prob`** Used for either HoVer-Net or StarDist segmentation postprocessing. This filtering parameteris used to remove nuclei assigned with low confidence. 127 | 128 | 129 | + **`extract_tile_features`** Extract (generate) imaging features for all tiles. 130 | 131 | + **`extract_inception_features`** If `extract_tile_features` then do Inception V3 features. 132 | 133 | + **`extract_transpath_features`** If `extract_tile_features` then do TransPath features. 134 | 135 | + **`extract_uni_features`** If `extract_tile_features` then do UNI features. 136 | 137 | + **`extract_conch_features`** If `extract_tile_features` then do CONCH features. 138 | 139 | + **`transpath_features_model`** One of 'CTransPath' or 'MoCoV3'. 140 | 141 | + **`use_conch_normalizer`** Use specialized CONCH normalizer, instead of the standard normalizer used with UNI and CTransPath. 142 | 143 | + **`uni_model_checkpoint`** Path to downloaded CONCH checkpoint. Download requires registration https://huggingface.co/MahmoodLab/UNI/blob/main/pytorch_model.bin. 144 | 145 | + **`conch_model_checkpoint`** Path to downloaded CONCH checkpoint. Download requires registration https://huggingface.co/MahmoodLab/CONCH/blob/main/pytorch_model.bin. 146 | 147 | 148 | 149 | + **`do_superpixels`** Do superpixel segmentation using SNIC algorithm. 150 | 151 | + **`export_superpixels_contours`** If true, export superpixel contours in JSON format. 152 | 153 | + **`superpixel_compactness`** Superpixel compactness parameter, see details of SNIC algorithm. 154 | 155 | + **`pixels_per_segment`** Number of pixels per superpixel segment, i.e., superpixel size. 156 | 157 | + **`superpixel_patch_size`** Superpixel patch size. Warning: patches boundaries are kept flat. 158 | 159 | + **`superpixel_downsampling_factor`** Superpixel downsampling factor for the input image downsampling . 160 | 161 | + **`od_block_size`** Block size for OD calculation. 162 | 163 | + **`expand_nuclei_distance`** Distance in pixels to expand the nuclei mask. 164 | 165 | 166 | 167 | 168 | + **`export_image`** Export the resized and normalized image in OME-TIFF format. 169 | 170 | + **`export_image_metadata`** Export input image metadata in OME-XML format. 171 | 172 | + **`compression`** Compression library to use with OME-TIFF, e.g., 'LZW'. 173 | 174 | 175 | + **`downsample_expanded_tile`** Downsample expanded tile. 176 | 177 | + **`expansion_factor`** Tile is read from expanded area around the tile center. 178 | 179 | + **`subtiling`** If true, split tile into subtiles, then extract features and compute average across the subtiles. 180 | 181 | + **`subcoords_factor`** Factor that defines the size of subtiles. 182 | 183 | + **`subcoords_list`** Centers of the subtiles within a tile. 184 | 185 | 186 | 187 | + **`do_clustering`** Do dimensionality reduction and clustering. Generate spatial and UMAP plots of imaging feature clusters as well as nucler morphometric features and classification results. 188 | 189 | + **`expansion_factor_for_clustering`** Features of the specified expansion factor are used for clustering. 190 | 191 | + **`suffix_for_clustering`** Features of this type are used for clustering. 192 | 193 | + **`plot_dpi`** DPI (dots per inch) of the figures. 194 | 195 | 196 | 197 | + **`hovernet_device_mode`** GPU or CPU device for use with HoVer-Net. 198 | 199 | + **`ctranspath_device_mode`** GPU or CPU device for use with TransPath inference models. 200 | 201 | 202 | 203 | + **`sample_tiles_subworkflow`** Run a subworkflow where a small number of tiles is saved, along with the HoVer-Net classification data. 204 | 205 | + **`tiles_per_slide`** Number of randomly selected tiles to use in the sampling tiles subworkflow. 206 | 207 | 208 | 209 | + **`do_segmentation_anndata`** DEPRECATED parameter, will be removed in future. 210 | -------------------------------------------------------------------------------- /conf/analysis-img.config: -------------------------------------------------------------------------------- 1 | 2 | params { 3 | 4 | short_workflow = false 5 | 6 | stain_normalization = true 7 | do_nuclear_segmentation = true 8 | check_focus = true 9 | extract_tile_features = true 10 | sample_tiles_subworkflow = true 11 | do_segmentation_anndata = true 12 | do_imaging_anndata = true 13 | export_image = true 14 | export_image_metadata = true 15 | do_superpixels = false 16 | 17 | // Short workflow skips: ROI extraction, stain normalization, image formatting and resizing, export of image and its metadata 18 | if (params.short_workflow) { 19 | do_nuclear_segmentation = false 20 | check_focus = false 21 | extract_tile_features = true 22 | sample_tiles_subworkflow = false 23 | do_imaging_anndata = true 24 | do_superpixels = false 25 | } 26 | 27 | target_mpp = 0.25 28 | 29 | compression = 'LZW' 30 | tiled_tiff_tile_size = 1024 31 | thumbnail_downsample_factor = 0.05 32 | 33 | mask_background_cutoff = 210.0 34 | 35 | pixel_mask_threshold_low = 100 36 | pixel_mask_threshold_high = 200 37 | 38 | use_provided_grid = true 39 | 40 | grid_type = 'square' // 'hex' 'square' 41 | grid_spot_diamter = 56 // visium=65 // 56 42 | grid_spot_horizontal_spacing = 56 // visium=100 // 112 43 | grid_aspect_correction = 0.95 44 | 45 | fraction_for_mask = 0.1 46 | 47 | do_clustering = true 48 | 49 | downsample_expanded_tile = true 50 | expansion_factor = [1, 2, 3, 4] // [1, 2, 3, 4] if downsampling, then this factor will be rounded up to integer 1.25 -> 2 51 | subtiling = false 52 | subcoords_factor = 4 53 | subcoords_list = '[[1, 1], [3, 1], [2, 2], [1, 3], [3, 3]]' 54 | 55 | if (params.do_clustering || params.do_imaging_anndata) { 56 | expansion_factor_for_clustering = 1 // one of expansion_factor 57 | suffix_for_clustering = 'uni' // 'ctranspath' or 'inception' or 'uni' or 'conch' 58 | plot_dpi = 300 59 | } 60 | 61 | deepfocus_model_path = "/projects/chuang-lab/USERS/domans/dev-focus/retrained-t0t1/" // "/deepfocus/" // "/projects/chuang-lab/USERS/domans/dev-focus/retrained-t0t1/" 62 | 63 | // Request access on HugginFace and download the checkpoints 64 | // UNI checkpoint: https://huggingface.co/MahmoodLab/UNI https://huggingface.co/MahmoodLab/UNI/blob/main/pytorch_model.bin 65 | // CONCH checkpoint: https://huggingface.co/MahmoodLab/CONCH https://huggingface.co/MahmoodLab/CONCH/blob/main/pytorch_model.bin 66 | uni_model_checkpoint = "/projects/chuang-lab/USERS/domans/containers/private/pytorch_model_uni_11_06_2024.bin" 67 | conch_model_checkpoint = "/projects/chuang-lab/USERS/domans/containers/private/pytorch_model_conch_11_06_2024.bin" 68 | use_conch_normalizer = true 69 | 70 | if (params.extract_tile_features) { 71 | // *.tsv.gz features are posted in './features/' 72 | 73 | extract_transpath_features = false 74 | extract_mocov3_features = false 75 | extract_inception_features = false 76 | extract_uni_features = true 77 | extract_conch_features = false 78 | } 79 | 80 | if (params.stain_normalization) { 81 | macenko_normalization = true 82 | 83 | if (params.macenko_normalization) { 84 | stain_reference_image = "/projects/chuang-lab/USERS/domans/containers/bronchus.tif" 85 | stain_patch_size = 512 86 | } 87 | else { 88 | stainnet = "/projects/chuang-lab/USERS/domans/containers/StainNet-Public_layer3_ch32.pth" 89 | } 90 | } 91 | 92 | 93 | if (params.sample_tiles_subworkflow) { 94 | hovernet_device_mode = "gpu" 95 | hovernet_batch_size = 1 96 | hovernet_num_inference_workers = 1 97 | hovernet_spot_assignment_factor = 1 98 | hovernet_spot_assignment_shape = 'square' 99 | hovernet_min_cell_type_prob = 0.75 100 | hovernet_chunk_size = 4096 101 | hovernet_tile_size = 1024 102 | } 103 | 104 | 105 | if (params.do_nuclear_segmentation) { 106 | nuclei_segmentation_dir = "nucseg" 107 | 108 | hovernet_segmentation = false 109 | 110 | if (params.hovernet_segmentation) { 111 | hovernet_device_mode = "gpu" 112 | hovernet_spot_assignment_factor = 1 113 | hovernet_spot_assignment_shape = 'square' 114 | hovernet_min_cell_type_prob = 0.75 115 | 116 | if (params.hovernet_device_mode == 'gpu') { 117 | hovernet_batch_size = 32 118 | hovernet_num_inference_workers = 8 119 | hovernet_chunk_size = 10000 120 | hovernet_tile_size = 2048 121 | } 122 | else if (params.hovernet_device_mode == 'cpu') { 123 | hovernet_batch_size = 1 124 | hovernet_num_inference_workers = 1 125 | hovernet_chunk_size = 4096 126 | hovernet_tile_size = 1024 127 | } 128 | } 129 | else { 130 | stardist_model = "/projects/chuang-lab/USERS/domans/containers/stardist-models.v0.1/python_2D_versatile_he/" 131 | stardist_block_size = 4096 132 | stardist_expand_size = 15 133 | } 134 | } 135 | 136 | 137 | if (params.do_superpixels) { 138 | export_superpixels_contours = true 139 | 140 | superpixel_compactness = 1 141 | pixels_per_segment = 10000 142 | superpixel_patch_size = 2048 143 | superpixel_downsampling_factor = 4 144 | 145 | od_block_size = 4096 146 | 147 | expand_nuclei_distance = 15 148 | } 149 | 150 | if (params.sample_tiles_subworkflow) { 151 | tiles_per_slide = 100 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /conf/analysis-one.config: -------------------------------------------------------------------------------- 1 | 2 | params { 3 | 4 | reference_genome = "/projects/chuang-lab/USERS/domans/reference/refdata-gex-GRCh38-and-mm10-2020-A" 5 | 6 | do_splicing_quantification = true 7 | do_snv_extract = true 8 | do_img_subworkflow = true 9 | 10 | if (params.do_snv_extract) { 11 | bafextract_minimum_mapping_quality = 50 12 | bafextract_minimum_base_quality = 0 13 | bafextract_min_coverage_per_SNV = 20 14 | bafextract_min_MAF_covg_per_SNV = 4 15 | bafextract_min_MAF = 0.1 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /conf/analysis-pancreas.config: -------------------------------------------------------------------------------- 1 | params { 2 | 3 | stain_normalization = true 4 | do_nuclear_sementation = true 5 | do_superpixels = true 6 | export_superpixels_contours = true 7 | check_focus = false 8 | do_imaging_anndata = true 9 | 10 | target_mpp = 0.2208187960959237 11 | thumbnail_downsample_factor = 0.025 12 | 13 | if (params.stain_normalization) { 14 | macenko_normalization = true 15 | 16 | if (params.macenko_normalization) { 17 | stain_reference_image = "/sdata/activities/kappsen-tmc/visium/cropped_capture_area/SC2300701_JDC-WP-008-b_patch.tiff" 18 | } 19 | } 20 | 21 | if (params.do_nuclear_sementation) { 22 | hovernet_segmentation = true 23 | 24 | if (params.hovernet_segmentation) { 25 | hovernet_device_mode = "gpu" 26 | 27 | if (params.hovernet_device_mode == 'gpu') { 28 | hovernet_batch_size = 32 29 | hovernet_num_inference_workers = 8 30 | } 31 | else if (params.hovernet_device_mode == 'cpu') { 32 | hovernet_batch_size = 1 33 | hovernet_num_inference_workers = 1 34 | } 35 | } 36 | else { 37 | stardist_block_size = 4096 38 | stardist_expand_size = 15 39 | } 40 | } 41 | 42 | if (params.do_superpixels) { 43 | superpixel_compactness = 1 44 | pixels_per_segment = 10000 45 | superpixel_patch_size = 2048 46 | superpixel_downsampling_factor= 4 47 | 48 | expand_nuclei_distance = 15 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /conf/analysis-two.config: -------------------------------------------------------------------------------- 1 | 2 | params { 3 | 4 | mouse_reference_genome = "/projects/chuang-lab/USERS/domans/reference/refdata-gex-mm10-2020-A" 5 | human_reference_genome = "/projects/chuang-lab/USERS/domans/reference/refdata-gex-GRCh38-2020-A" 6 | 7 | deconvolution_tool = "xengsort" // "xengsort", "xenome" 8 | 9 | deconvolution_reference_graft = "/projects/chuang-lab/USERS/domans/reference/GCA_009914755.4_T2T-CHM13v2.0_genomic.fna.gz" 10 | deconvolution_reference_host = "/projects/churchill-lab/resource/Custom_Genomes/R84-REL1505/NOD_ShiLtJ/NOD_ShiLtJ.fa" 11 | deconvolution_kmer_size = 35 12 | 13 | if (params.deconvolution_tool == "xenome") { 14 | deconvolution_indices_path = "/projects/chuang-lab/PDXnet/xenome/indices/nod/t2t-k35" 15 | deconvolution_indices_name = "t2t_k35" 16 | } 17 | else if (params.deconvolution_tool == "xengsort") { 18 | deconvolution_indices_path = "/projects/chuang-lab/PDXnet/xengsort/indices/nod/t2t-k25" 19 | deconvolution_indices_name = "t2t_k25" 20 | deconvolution_kmer_size = 25 21 | xengsort_n = "4_500_000_000" 22 | } 23 | 24 | do_splicing_quantification = true 25 | do_snv_extract = true 26 | do_img_subworkflow = true 27 | 28 | if (params.do_snv_extract) { 29 | bafextract_minimum_mapping_quality = 50 30 | bafextract_minimum_base_quality = 0 31 | bafextract_min_coverage_per_SNV = 20 32 | bafextract_min_MAF_covg_per_SNV = 4 33 | bafextract_min_MAF = 0.1 34 | } 35 | 36 | do_merge_mtx = true 37 | } 38 | -------------------------------------------------------------------------------- /conf/containers.config: -------------------------------------------------------------------------------- 1 | 2 | params { 3 | 4 | container_dir = "/projects/chuang-lab/USERS/domans/containers" 5 | 6 | container_inception = "${params.container_dir}/container-mamba-inception.sif" 7 | container_hovernet = "${params.container_dir}/local/container-singularity-hovernet-py.sif" 8 | container_stainnet = "${params.container_dir}/container-singularity-stainnet.sif" 9 | container_staintools = "${params.container_dir}/container-singularity-staintools.sif" 10 | container_vips = "${params.container_dir}/container-singularity-vips.sif" 11 | 12 | container_uni_conch = "${params.container_dir}/hf-uni-conch.sif" 13 | container_ctranspath = "${params.container_dir}/local/mamba-timm.sif" 14 | container_deepfocus = "${params.container_dir}/deepfocus.sif" 15 | container_ome = "${params.container_dir}/local/ome.sif" 16 | 17 | container_xenome = "/projects/compsci/omics_share/meta/containers/quay.io-jaxcompsci-xenome-1.0.1.img" 18 | 19 | container_xengsort = "${params.container_dir}/local/mamba-xenomake.sif" 20 | container_fastqtools = "${params.container_dir}/container-singularity-fastqtools.sif" 21 | container_spaceranger = "${params.container_dir}/container-singularity-spaceranger.sif" 22 | 23 | container_bafextract = "${params.container_dir}/container-singularity-bafextract.sif" 24 | container_samtools = "${params.container_bafextract}" 25 | 26 | container_python = "${params.container_dir}/container-singularity-python.sif" 27 | container_velocyto = "${params.container_dir}/container-singularity-velocyto.sif" 28 | 29 | } 30 | -------------------------------------------------------------------------------- /docs/BAF_extract_scheme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/BAF_extract_scheme.png -------------------------------------------------------------------------------- /docs/Example_CPU_usage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/Example_CPU_usage.png -------------------------------------------------------------------------------- /docs/Scheme NF2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/Scheme NF2.png -------------------------------------------------------------------------------- /docs/Scheme_NF3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/Scheme_NF3.png -------------------------------------------------------------------------------- /docs/example ST wsi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/example ST wsi.png -------------------------------------------------------------------------------- /docs/example non-ST wsi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/example non-ST wsi.png -------------------------------------------------------------------------------- /docs/flow-static.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/flow-static.png -------------------------------------------------------------------------------- /docs/flow.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/flow.gif -------------------------------------------------------------------------------- /docs/hovernet-tissue-mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/hovernet-tissue-mask.png -------------------------------------------------------------------------------- /docs/imaging-clustering.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/imaging-clustering.png -------------------------------------------------------------------------------- /docs/mones-per-tile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/mones-per-tile.png -------------------------------------------------------------------------------- /docs/multiscale-features.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/multiscale-features.png -------------------------------------------------------------------------------- /docs/route-map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/route-map.png -------------------------------------------------------------------------------- /docs/sub-tiling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/sub-tiling.png -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/lib/__init__.py -------------------------------------------------------------------------------- /lib/superpixels.py: -------------------------------------------------------------------------------- 1 | 2 | import json 3 | import gzip 4 | import numpy as np 5 | import cv2 6 | import matplotlib.pyplot as plt 7 | 8 | def plot_spx_contours(all_contours, figsize=(15, 15)): 9 | fig, ax = plt.subplots(figsize=figsize) 10 | for c in all_contours.keys(): 11 | xp = [] 12 | yp = [] 13 | for sub_contour in all_contours[c]: 14 | x = np.array(sub_contour).T[0].tolist() 15 | y = np.array(sub_contour).T[1].tolist() 16 | xp += x + [x[0], None] 17 | yp += y + [y[0], None] 18 | ax.plot(xp, yp, '-o', ms=0, lw=2, label=c) 19 | if len(all_contours.keys()) < 10: 20 | plt.legend() 21 | ax.set_aspect('equal') 22 | ax.axis('off') 23 | plt.show() 24 | return 25 | 26 | def get_countours_from_mask(superpixelation): 27 | 28 | ''' 29 | get_countours_from_mask(np.array([[0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8], 30 | [0, 0, 2, 2, 2, 2, 2, 2, 2, 8, 8, 8], 31 | [0, 0, 2, 2, 5, 5, 2, 2, 2, 8, 8, 8], 32 | [0, 0, 2, 2, 5, 5, 2, 2, 2, 8, 8, 8], 33 | [0, 0, 2, 2, 2, 2, 2, 2, 2, 8, 8, 8], 34 | [0, 0, 2, 2, 2, 1, 1, 1, 1, 1, 8, 8], 35 | [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 3, 3], 36 | [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 3, 3], 37 | [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 3, 3], 38 | [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 3, 3], 39 | [0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3], 40 | [0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3]])) 41 | ''' 42 | 43 | raw_contours = cv2.findContours(superpixelation + 1, cv2.RETR_FLOODFILL, cv2.CHAIN_APPROX_SIMPLE) 44 | all_contours = dict() 45 | for c in range(len(raw_contours[0])): 46 | cid = str(superpixelation[raw_contours[0][c][0][0][1], raw_contours[0][c][0][0][0]]) 47 | if not cid in all_contours.keys(): 48 | all_contours.update({cid: []}) 49 | new_contour = raw_contours[0][c][:, 0, :].tolist() 50 | all_contours.update({cid: all_contours[cid] + [new_contour]}) 51 | 52 | print('Created %s contours:' % len(all_contours)) 53 | 54 | return all_contours 55 | 56 | def save_contours(all_contours, filename='contours.json.gz'): 57 | 58 | with gzip.GzipFile(filename, 'w') as tempfile: 59 | tempfile.write(json.dumps(all_contours).encode('utf-8')) 60 | 61 | return -------------------------------------------------------------------------------- /lib/wsiGrid.py: -------------------------------------------------------------------------------- 1 | 2 | """Written by S.Domanskyi, 2022 3 | 4 | Module designed to generate a grid of centers of tiles from Whole Slide Image (WSI) 5 | Examples of usage below: 6 | 7 | # Load the module 8 | import lib.wsiGrid as wsiGrid 9 | 10 | # List module components 11 | dir(wsiGrid) 12 | 13 | # Get help on all module functions 14 | help(wsiGrid) 15 | 16 | # Generate and plot Visium-like hexagonal grid 17 | slide_dimensions = 2900, 3060 18 | grid, tile_size = wsiGrid.getGrid(*slide_dimensions, grid_type='hex') 19 | wsiGrid.plotGrid(grid, *slide_dimensions, size=tile_size) 20 | 21 | # Generate and plot Slide-seq-like random grid, save plot to current working directory 22 | slide_dimensions = 2900, 3060 23 | grid, tile_size = wsiGrid.getGrid(*slide_dimensions, grid_type='hex') 24 | grid, tile_size = wsiGrid.perturbGrid(*slide_dimensions, grid, tile_size, delta=0.1) 25 | wsiGrid.plotGrid(grid, *slide_dimensions, size=tile_size, savepath='', show=False) 26 | 27 | # Generate and plot square grid, specify magnification and spot diameter 28 | slide_dimensions = 3000, 1100 29 | grid, tile_size = wsiGrid.getGrid(*slide_dimensions, grid_type='square', magnification=20, spot_diamter=55) 30 | wsiGrid.plotGrid(grid, *slide_dimensions, size=tile_size) 31 | 32 | # enerate and plot large Visium-like hexagonal grid, save to file to current working directory 33 | slide_dimensions = 29000, 30600 34 | grid, tile_size = wsiGrid.getGrid(*slide_dimensions, savepath='') 35 | wsiGrid.plotGrid(grid, *slide_dimensions, size=tile_size, show_spot_labels=False, savepath='') 36 | """ 37 | 38 | import os 39 | import json 40 | 41 | from matplotlib import cm 42 | import matplotlib.pyplot as plt 43 | import matplotlib.patheffects as path_effects 44 | from matplotlib.patches import Circle, Rectangle 45 | from matplotlib.collections import PatchCollection 46 | 47 | import pandas as pd 48 | import numpy as np 49 | 50 | def getGrid(x: int, y: int, grid_type: str = 'hex', factor: float = 64/39, magnification: float = 40.0, 51 | resolution: float = 294/65, spot_diamter: float = 65, spot_horizontal_spacing: float = 100, 52 | aspect_correction: float = 0.95, savepath: str = None, sname: str = ''): 53 | 54 | """Generate grid of tile centers 55 | 56 | Parameters: 57 | x: full resolution image width 58 | 59 | y: full resolution image height 60 | 61 | grid_type: ['hex', 'square'] 62 | 63 | factor: Visium Spatial Gene Expression hex grid factor 64 | 65 | magnification: image magnification 66 | 67 | resolution: pixels per micron of sample at 40x magnification 68 | 69 | spot_diamter: spot diameter in microns 70 | 71 | spot_horizontal_spacing: spot horizontal center-to-center distance in microns 72 | 73 | aspect_correction: Visium capture area is not square, even though officially it is 6.5x6.5mm 74 | 75 | savepath: directory to save data files 76 | 77 | sname: identifier for saving data files 78 | 79 | Output: 80 | grid: pandas.DataFrame 81 | 82 | tile_size_pixels: tile size 83 | """ 84 | 85 | tile_size_pixels = resolution * spot_diamter * magnification / 40.0 86 | tile_horizontal_spacing_pixels = resolution * spot_horizontal_spacing * magnification / 40.0 87 | if grid_type=='hex': 88 | tile_vertical_spacing_pixels = 0.5 * factor * tile_horizontal_spacing_pixels / aspect_correction 89 | elif grid_type=='square': 90 | tile_vertical_spacing_pixels = tile_horizontal_spacing_pixels 91 | else: 92 | raise NotImplementedError 93 | 94 | if not savepath is None: 95 | if not os.path.exists(savepath): 96 | os.makedirs(savepath) 97 | 98 | info_dict = {'grid_type': grid_type, 'factor': factor, 'magnification': magnification, 99 | 'resolution': resolution, 'aspect_correction': aspect_correction, 100 | 'spot_diamter': spot_diamter, 'spot_horizontal_spacing': spot_horizontal_spacing, 101 | 'spot_diameter_fullres': tile_size_pixels, 'x': x, 'y': y} 102 | with open(savepath + '%s.json' % sname, 'w') as outfile: 103 | outfile.write(json.dumps(info_dict)) 104 | 105 | nx = int(np.ceil(x / tile_horizontal_spacing_pixels)) 106 | ny = int(np.ceil(y / tile_vertical_spacing_pixels)) 107 | 108 | _grid = [['in_tissue', 'array_row', 'array_col', 'pxl_row_in_fullres', 'pxl_col_in_fullres']] 109 | for i in range(nx): 110 | for j in range(ny): 111 | temp_x = tile_size_pixels/2. + i * tile_horizontal_spacing_pixels 112 | if grid_type=='hex': 113 | if j % 2 == 1: 114 | temp_x += 0.5 * tile_horizontal_spacing_pixels 115 | temp_x = int(temp_x) 116 | temp_y = int(tile_size_pixels/2. + j * tile_vertical_spacing_pixels) 117 | if (temp_x + tile_size_pixels/2. <= x) and (temp_y + tile_size_pixels/2. <= y): 118 | _grid.append([1, j, i, temp_y, temp_x]) 119 | 120 | _grid = pd.DataFrame(columns=np.array(_grid[0]), data=np.array(_grid[1:])) 121 | _grid.index = 'tile-' + (_grid.index+1).astype(str).str.pad(8, fillchar='0') 122 | _grid.index.name = 'barcode' 123 | 124 | if not savepath is None: 125 | if not os.path.exists(savepath): 126 | os.makedirs(savepath) 127 | 128 | _grid.to_csv(savepath + '%s.csv' % sname, header=False) 129 | 130 | return _grid, tile_size_pixels 131 | 132 | def perturbGrid(x, y, grid, tile_size_pixels, n_iterations: int = 5, delta: float = 0.5, seed: int = None, dmax: float = 7., verbose: int = 1): 133 | 134 | """Random perturbations of the grid produce grid limilar to Slide-Seq ST technology 135 | 136 | Parameters: 137 | x: full resolution image width 138 | 139 | y: full resolution image height 140 | 141 | grid: produced by function getGrid 142 | 143 | tile_size_pixels: produced by function getGrid 144 | 145 | n_iterations: number of iterations to randomly perturb the drid 146 | 147 | delta: fraction of the tile size that the tile can be displaced along x or y at most in one move 148 | 149 | seed: random seed to have reproducible perturbation 150 | 151 | dmax: max neighbor distance, number of tile sizes away from the spot center 152 | 153 | verbose: set to 0 to suppress print output 154 | 155 | Output: 156 | grid: pandas.DataFrame 157 | 158 | tile_size_pixels: tile size 159 | """ 160 | 161 | if not seed is None: 162 | np.random.seed(seed) 163 | 164 | x_col_ind = np.where(grid.columns=='pxl_col_in_fullres')[0][0] 165 | y_col_ind = np.where(grid.columns=='pxl_row_in_fullres')[0][0] 166 | 167 | if verbose >= 1: 168 | print('\tComuting neigbors of each spot with dmax: %s' % dmax) 169 | se = pd.Series(index=range(len(grid)), dtype='object') 170 | v = grid[['pxl_row_in_fullres', 'pxl_col_in_fullres']].values 171 | for _i, (_y, _x) in enumerate(v): 172 | nn = set(np.where((((v.T[0] - _y)**2 + (v.T[1] - _x)**2)**0.5) < (dmax * tile_size_pixels))[0]) 173 | se[_i] = nn.difference({_i}) 174 | 175 | for iter in range(n_iterations): 176 | if verbose >= 1: 177 | print('Iteration: %s' % iter) 178 | for _i in range(len(grid)): 179 | _x = grid.iloc[_i, x_col_ind] 180 | _y = grid.iloc[_i, y_col_ind] 181 | 182 | _p = (np.random.rand(2) - 0.5) * delta * tile_size_pixels 183 | 184 | nviolations = 0 185 | 186 | nn = np.array(list(se[_i])) 187 | if len(nn) > 0: 188 | vx = grid.iloc[nn, x_col_ind].values 189 | vy = grid.iloc[nn, y_col_ind].values 190 | d = ((vy - _y + _p[0])**2 + (vx - _x + _p[1])**2)**0.5 191 | nviolations += len(set(np.where(d < tile_size_pixels)[0])) 192 | 193 | if ((_y - _p[0]) > (y - 0.5 * tile_size_pixels)) or ((_y - _p[0]) < (0.5 * tile_size_pixels)): 194 | nviolations += 1 195 | 196 | if ((_x - _p[1]) > (x - 0.5 * tile_size_pixels)) or ((_x - _p[1]) < (0.5 * tile_size_pixels)): 197 | nviolations += 1 198 | 199 | if nviolations==0: 200 | grid.iloc[_i, y_col_ind] -= _p[0] 201 | grid.iloc[_i, x_col_ind] -= _p[1] 202 | 203 | return grid, tile_size_pixels 204 | 205 | def plotGrid(grid: pd.DataFrame, x: int, y: int, f: float = 3000, object_shape: str = 'spot', size: int = 294, show_spot_labels: bool = True, show: bool = True, savepath: str = None, sname: str = '', verbose: int = 1): 206 | 207 | 208 | """Plot grid of tiles or spots 209 | 210 | Parameters: 211 | x: full resolution image width 212 | 213 | y: full resolution image height 214 | 215 | f: figure scaling factor 216 | 217 | object_shape: ['spot', 'square', compatible object] 218 | 219 | size: tile height and width, or spot diameter 220 | 221 | show_spot_labels: display spot labels 222 | 223 | savepath: directory to save data files 224 | 225 | sname: identifier for saving data files 226 | 227 | verbose: set to 0 to suppress print output 228 | 229 | Output: 230 | None 231 | """ 232 | 233 | fig, ax = plt.subplots(figsize=(x/f, y/f)) 234 | ax.set_xlim(0, x) 235 | ax.set_ylim(0, y) 236 | ax.axis('off') 237 | ax.set_aspect('equal') 238 | ax.set_ylim(ax.get_ylim()[::-1]) 239 | ax.plot([0, 0, x, x, 0], [y, 0, 0, y, y], color='k') 240 | ax.scatter(0, 0, marker='+', s=1000, c='crimson', clip_on=False) 241 | v = grid[['pxl_row_in_fullres', 'pxl_col_in_fullres']].values 242 | 243 | if object_shape == 'spot': 244 | ax.add_collection(PatchCollection([Circle((x1, y1), size/2) for y1, x1 in v], alpha=0.9, color='gray')) 245 | elif object_shape == 'square': 246 | ax.add_collection(PatchCollection([Rectangle((x1-size/2, y1-size/2), size, size) for y1, x1 in v], alpha=0.9, color='gray')) 247 | else: 248 | raise NotImplementedError 249 | 250 | if show_spot_labels: 251 | for i, (y1, x1) in enumerate(v): 252 | ax.text(x1, y1, i, va='center', ha='center') 253 | 254 | fig.tight_layout() 255 | 256 | if not savepath is None: 257 | if not os.path.exists(savepath): 258 | os.makedirs(savepath) 259 | 260 | fig.savefig(savepath + '%s.png' % sname, facecolor='w') 261 | 262 | if show: 263 | plt.show() 264 | else: 265 | plt.close(fig) 266 | 267 | return -------------------------------------------------------------------------------- /lib/wsiMask.py: -------------------------------------------------------------------------------- 1 | 2 | """Written by S.Domanskyi, 2022 3 | 4 | Module designed to generate a mask for a given grid of centers of tiles from Whole Slide Image (WSI). 5 | Generate updated grid containing mask values, plot mask and low resolution image. 6 | 7 | Examples of usage below: 8 | 9 | # Load the module 10 | import lib.wsiMask as wsiMask 11 | 12 | # List module components 13 | dir(wsiMask) 14 | 15 | # Get help on all module functions 16 | help(wsiMask) 17 | 18 | ## Generate in tissue mask 19 | wsiMask.getInTissueMask(grid_csv='grid_sample.csv', 20 | grid_json='grid_sample.json, 21 | low_res_image='image_sample.tiff', 22 | show=True, savepath='', sname='sample'); 23 | """ 24 | 25 | import os 26 | import json 27 | import pandas as pd 28 | import numpy as np 29 | 30 | from matplotlib import cm 31 | import matplotlib.pyplot as plt 32 | import matplotlib.patheffects as path_effects 33 | from matplotlib.patches import Circle, Rectangle 34 | from matplotlib.collections import PatchCollection 35 | 36 | import cv2 37 | from skimage.transform import resize 38 | from scipy.ndimage import binary_fill_holes 39 | from skimage.draw import disk 40 | 41 | def plotMask(df, width: int = None, height: int = None, size: float = None, 42 | image = None, figdim = 10, object_shape: str = 'spot', spot_alpha: float = 0.4, 43 | savepath: str = None, sname: str = '', show: bool = True): 44 | 45 | """Plot mask as square tiles or disks/spots 46 | 47 | Parameters: 48 | df: grid produced by function getGrid of wsiGrid module 49 | 50 | width: full resolution image width 51 | 52 | height: full resolution image height 53 | 54 | size: value produced by function getGrid 55 | 56 | image: low resolution image 3D array 57 | 58 | figdim: image scale, the bigger the value, the large mask image will be 59 | 60 | object_shape: ['spot', 'square'] shape of patch to plot as mask 61 | 62 | spot_alpha: transparency of the patches 63 | 64 | savepath: directory to save data files 65 | 66 | sname: identifier for saving data files 67 | 68 | show: display the image, needs interactive backend 69 | 70 | Output: 71 | None 72 | """ 73 | 74 | figdim *= max(width, height) / 30000 75 | 76 | fig, ax = plt.subplots(figsize=(figdim, figdim)) 77 | ax.imshow(image, origin='lower', extent=(0, width, 0, height)) 78 | 79 | v = df[['pxl_row_in_fullres', 'pxl_col_in_fullres']].loc[df['in_tissue']==1].values 80 | 81 | if object_shape == 'spot': 82 | ax.add_collection(PatchCollection([Circle((x1, y1), size/2) for y1, x1 in v], alpha=spot_alpha, color='k', edgecolor=None, linewidth=0)) 83 | elif object_shape == 'square': 84 | ax.add_collection(PatchCollection([Rectangle((x1-size/2, y1-size/2), size, size) for y1, x1 in v], alpha=spot_alpha, color='k', edgecolor=None, linewidth=0)) 85 | else: 86 | raise NotImplementedError 87 | 88 | ax.set_ylim(ax.get_ylim()[::-1]) 89 | ax.set_aspect('equal') 90 | ax.axis('off') 91 | ax.plot([0, 0, width, width, 0], [0, height, height, 0, 0], linewidth=0.5, c='k', clip_on=False) 92 | 93 | fig.tight_layout() 94 | 95 | if not savepath is None: 96 | if not os.path.exists(savepath): 97 | os.makedirs(savepath) 98 | 99 | fig.savefig(savepath + '%s.png' % sname, dpi=150, facecolor='w') 100 | 101 | if show: 102 | plt.show() 103 | else: 104 | plt.close(fig) 105 | 106 | return 107 | 108 | def getInTissuePixelMask(low_res_image: str, low: float = 100, high: float = 200, savepath: str = None, sname: str = ''): 109 | 110 | """Plot mask as square tiles or disks/spots 111 | 112 | Parameters: 113 | low_res_image: path to file with low resolution image 114 | 115 | low: low threshold 116 | 117 | high: high threshold 118 | 119 | savepath: directory to save data files 120 | 121 | sname: identifier for saving data files 122 | 123 | Output: 124 | Pixel in tissue mask 125 | """ 126 | 127 | v = plt.imread(low_res_image)[:, :, :3].mean(axis=2) 128 | 129 | vc = v.copy() 130 | v[vc < low] = 0 131 | v[vc > high] = 0 132 | v[(vc >= low) & (vc <= high)] = 1 133 | 134 | df = pd.DataFrame(v.T) 135 | 136 | if not savepath is None: 137 | if not os.path.exists(savepath): 138 | os.makedirs(savepath) 139 | 140 | df.to_csv(savepath + '%s.csv' % sname, header=False, index=False) 141 | 142 | return df 143 | 144 | def getInTissueTileMask(pixel_mask_csv: str, grid_csv: str, grid_json: str, low_res_image: str, plot_mask: bool = True, 145 | fraction: float = 0.1, savepath: str = None, sname: str = '', show: bool = False): 146 | 147 | """Plot mask as square tiles or disks/spots 148 | 149 | Parameters: 150 | grid_csv: csv file produced by function getGrid of wsiGrid module 151 | 152 | grid_json: json file produced by function getGrid of wsiGrid module 153 | 154 | low_res_image: path to file with low resolution image 155 | 156 | plot_mask: whether to make a plot with mask and low resolution image 157 | 158 | fraction: fraction of low resolution pixels in tissue to call patch in_tissue 159 | 160 | savepath: directory to save data files 161 | 162 | sname: identifier for saving data files 163 | 164 | show: display the image, needs interactive backend 165 | 166 | Output: 167 | df_grid: grid of centers with updated mask column 168 | """ 169 | 170 | with open(grid_json) as f: 171 | info_dict = json.load(f) 172 | slide_fullres_width = info_dict['x'] 173 | slide_fullres_height = info_dict['y'] 174 | spot_diameter_fullres = info_dict['spot_diameter_fullres'] 175 | 176 | img_RGB_high_res = plt.imread(low_res_image)[:, :, :3] 177 | 178 | scale_factor = 0.5 * (img_RGB_high_res.shape[0] / slide_fullres_height) + 0.5 * (img_RGB_high_res.shape[1] / slide_fullres_width) 179 | 180 | df_grid = pd.read_csv(grid_csv, header=None, index_col=0) 181 | df_grid.columns = ['in_tissue', 'array_row', 'array_col', 'pxl_row_in_fullres', 'pxl_col_in_fullres'] 182 | df_grid.index.name = 'barcode' 183 | 184 | df_pixel_mask = pd.read_csv(pixel_mask_csv, index_col=None, header=None) 185 | 186 | for tile in df_grid.index[:]: 187 | tile_x = int(df_grid.loc[tile]['pxl_col_in_fullres'] * scale_factor) 188 | tile_y = int(df_grid.loc[tile]['pxl_row_in_fullres'] * scale_factor) 189 | tile_half_size = int(spot_diameter_fullres * scale_factor / 2) 190 | in_tissue = int(df_pixel_mask.iloc[tile_x - tile_half_size : tile_x + tile_half_size, 191 | tile_y - tile_half_size : tile_y + tile_half_size].mean().mean() >= fraction) 192 | df_grid.loc[tile, 'in_tissue'] = in_tissue 193 | 194 | if plot_mask: 195 | plotMask(df_grid, width=slide_fullres_width, height=slide_fullres_height, 196 | size=spot_diameter_fullres, image=img_RGB_high_res[:, :, :3], 197 | figdim=10, object_shape='square', savepath=savepath, sname=sname, show=show) 198 | 199 | if not savepath is None: 200 | if not os.path.exists(savepath): 201 | os.makedirs(savepath) 202 | 203 | df_grid['in_tissue'].to_csv(savepath + '%s.csv' % sname, header=False) 204 | 205 | return df_grid 206 | 207 | def makeTissueMaskFromTileMask(gridFile, gridInfoFile, tileMaskFile, squarePatch=False, upSizeFactor=1.5, 208 | downSizeChunkPx=1000, kernelSize=20, savePath='tissue_mask.png'): 209 | 210 | with open(gridInfoFile, 'r') as tempfile: 211 | info = json.loads(tempfile.read()) 212 | s, x, y = int(info['spot_diameter_fullres']), info['x'], info['y'] 213 | print(s, x, y) 214 | 215 | se_mask = pd.read_csv(tileMaskFile, index_col=0, header=None)[1].rename(None) 216 | 217 | df_grid = pd.read_csv(gridFile, index_col=0, header=None)[[4, 5]] 218 | df_grid.columns = ['x', 'y'] 219 | df_grid.index.name = None 220 | 221 | df_grid = df_grid.loc[se_mask[se_mask==1].index.values] 222 | 223 | downsampleFactor = int(np.ceil(max(x, y) / downSizeChunkPx)) 224 | 225 | m = np.zeros((x, y), dtype=np.int8)[::downsampleFactor, ::downsampleFactor] 226 | print(m.shape) 227 | 228 | maxxd = int(x / downsampleFactor) 229 | maxyd = int(y / downsampleFactor) 230 | 231 | for ty, tx in df_grid.values: 232 | xd = int(tx / downsampleFactor) 233 | yd = int(ty / downsampleFactor) 234 | radius = int(s * upSizeFactor / downsampleFactor) 235 | if squarePatch: 236 | m[xd-radius: xd+radius, yd-radius: yd+radius] = 1 237 | else: 238 | cc, rr = disk((xd, yd), radius) 239 | cc[cc<0] = 0 240 | cc[cc>maxxd] = maxxd-1 241 | rr[rr<0] = 0 242 | rr[rr>maxyd] = maxyd-1 243 | try: 244 | m[cc, rr] = 1 245 | except: 246 | pass 247 | 248 | m = m.T * 255 249 | m = resize(m, (int(np.round(y/downsampleFactor, 0)), int(np.round(x/downsampleFactor, 0))), order=3) 250 | m[m>=m.max()/2] = 255 251 | m[m [ (it.sample), it ] } ) 20 | .set{ samples } 21 | 22 | EXPORT_PARAMETERS () 23 | EXPORT_SAMPLEINFO ( samples ) 24 | 25 | if ( params.workflow == "arbitrary_grid" ) { 26 | ARB ( samples ) 27 | } 28 | 29 | if ( params.workflow == "one_reference" ) { 30 | ONE ( samples ) 31 | } 32 | 33 | if ( params.workflow == "two_references" ) { 34 | TWO ( samples ) 35 | } 36 | 37 | if ( params.workflow == "deconvolution_indices" ) { 38 | if ( params.deconvolution_tool == "xenome" ) { 39 | if ( !file("${params.deconvolution_indices_path}/${params.deconvolution_indices_name}-both.kmers.low-bits.lwr").exists() ) { 40 | XINDEX ( ) 41 | } 42 | } 43 | else if ( params.deconvolution_tool == "xengsort" ) { 44 | if ( !file("${params.deconvolution_indices_path}/${params.deconvolution_indices_name}-xind.hash").exists() ) { 45 | XINDEX ( ) 46 | } 47 | } 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /modules/local/bafextract.nf: -------------------------------------------------------------------------------- 1 | 2 | process GET_REFERENCE_PILEUP { 3 | 4 | tag "$reference" 5 | label "bafextract" 6 | 7 | input: 8 | path(reference) 9 | 10 | output: 11 | tuple file("sizes.list"), file("pileup/*") 12 | 13 | script: 14 | """ 15 | mkdir pileup 16 | 17 | /BAFExtract/bin/BAFExtract -preprocess_FASTA ${reference}/fasta/genome.fa pileup 18 | 19 | cut -f1,2 ${reference}/fasta/genome.fa.fai > sizes.list 20 | """ 21 | } 22 | 23 | 24 | process GET_PILEUP_OF_BAM { 25 | 26 | tag "$sample_id" 27 | label "bafextract" 28 | 29 | input: 30 | tuple val(sample_id), path(bam) 31 | tuple path(sizes), path(pileup) 32 | 33 | output: 34 | tuple val(sample_id), file("bam_pileup/*") 35 | 36 | script: 37 | """ 38 | mkdir bam_pileup 39 | 40 | if [[ ${bam[0]} == *".bam.bai"* ]]; then 41 | bbam=${bam[1]} 42 | else 43 | bbam=${bam[0]} 44 | fi 45 | 46 | samtools view \${bbam} | /BAFExtract/bin/BAFExtract -generate_compressed_pileup_per_SAM stdin ${sizes} bam_pileup ${params.bafextract_minimum_mapping_quality} ${params.bafextract_minimum_base_quality} 47 | """ 48 | } 49 | 50 | 51 | process GET_SNV_FROM_PILEUP { 52 | 53 | tag "$sample_id" 54 | label "bafextract" 55 | publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: params.overwrite_files_on_publish 56 | 57 | input: 58 | tuple val(sample_id), path(bam_pileup) 59 | tuple path(sizes), path(pileup) 60 | val(species) 61 | 62 | output: 63 | tuple val(sample_id), file("${species}/extracted.baf") 64 | 65 | script: 66 | """ 67 | mkdir ref_pileup 68 | 69 | for f in ${pileup} 70 | do 71 | if [[ \${f} == *" "* ]] 72 | then 73 | newf="`cut -d ' ' -f 1 <<< "\$f"`.bin" 74 | cp "\${f}" "ref_pileup/\${newf}" 75 | else 76 | cp "\${f}" "ref_pileup/\${f}" 77 | fi 78 | done 79 | 80 | mkdir bam_pileup 81 | cp ${bam_pileup} bam_pileup 82 | 83 | mkdir "${species}" 84 | 85 | /BAFExtract/bin/BAFExtract -get_SNVs_per_pileup ${sizes} bam_pileup ref_pileup ${params.bafextract_min_coverage_per_SNV} ${params.bafextract_min_MAF_covg_per_SNV} ${params.bafextract_min_MAF} "${species}/extracted.baf" 86 | 87 | rm -R ref_pileup 88 | rm -R bam_pileup 89 | """ 90 | } 91 | -------------------------------------------------------------------------------- /modules/local/deconvolution.nf: -------------------------------------------------------------------------------- 1 | 2 | process XENOME_GENERATE_INDEX { 3 | 4 | tag "${params.deconvolution_indices_name}" 5 | publishDir "${params.deconvolution_indices_path}", pattern: "${params.deconvolution_indices_name}-*", mode: 'copy', overwrite: false 6 | 7 | input: 8 | path host_fasta 9 | path graft_fasta 10 | val kmer_size 11 | 12 | output: 13 | path("${params.deconvolution_indices_name}-*"), emit: indices_path 14 | 15 | script: 16 | """ 17 | mkdir tempw 18 | 19 | /xenome-1.0.1-r/xenome index \ 20 | --kmer-size ${kmer_size} \ 21 | --prefix ${params.deconvolution_indices_name} \ 22 | --tmp-dir tempw \ 23 | --num-threads ${task.cpus} \ 24 | --host "${host_fasta}" \ 25 | --graft "${graft_fasta}" \ 26 | --verbose \ 27 | --max-memory 20 28 | """ 29 | } 30 | 31 | process XENGSORT_GENERATE_INDEX { 32 | 33 | tag "${params.deconvolution_indices_name}" 34 | publishDir "${params.deconvolution_indices_path}", pattern: "${params.deconvolution_indices_name}-xind*", mode: 'copy', overwrite: false 35 | 36 | input: 37 | path host_fasta 38 | path graft_fasta 39 | val kmer_size 40 | 41 | output: 42 | path("${params.deconvolution_indices_name}-xind*"), emit: indices_path 43 | 44 | script: 45 | """ 46 | xengsort index \ 47 | -H "${host_fasta}" \ 48 | -G "${graft_fasta}" \ 49 | -n ${params.xengsort_n} \ 50 | -k ${kmer_size} \ 51 | -W ${task.cpus} \ 52 | --index ${params.deconvolution_indices_name}-xind 53 | """ 54 | } 55 | 56 | process DECONVOLUTION_XENOME { 57 | 58 | tag "$sample_id" 59 | publishDir "${params.outdir}/${sample_id}", pattern: '.command.out', saveAs: { filename -> "xenome.summary.txt" }, mode: 'copy', overwrite: true 60 | 61 | input: 62 | tuple val(sample_id), path(fastq) 63 | path(indices_path) 64 | val(indices_name) 65 | 66 | output: 67 | tuple val(sample_id), file("categorized/unsorted_human_{1,2}.fastq"), emit: human 68 | tuple val(sample_id), file("categorized/unsorted_mouse_{1,2}.fastq"), emit: mouse 69 | path(".command.out"), emit: summary 70 | 71 | script: 72 | """ 73 | mkdir categorized 74 | mkdir tmp 75 | 76 | /xenome-1.0.1-r/xenome classify \ 77 | -T ${task.cpus} \ 78 | -i ${fastq[0]} \ 79 | -i ${fastq[1]} \ 80 | --pairs \ 81 | -P ${indices_path}/${indices_name} \ 82 | --graft-name human \ 83 | --host-name mouse \ 84 | --output-filename-prefix categorized/unsorted \ 85 | --tmp-dir tmp \ 86 | --verbose 87 | """ 88 | } 89 | 90 | process DECONVOLUTION_XENGSORT { 91 | 92 | tag "$sample_id" 93 | publishDir "${params.outdir}/${sample_id}", pattern: '.command.out', saveAs: { filename -> "xengsort.summary.txt" }, mode: 'copy', overwrite: params.overwrite_files_on_publish 94 | 95 | input: 96 | tuple val(sample_id), path(fastq) 97 | path(indices_path) 98 | val(indices_name) 99 | 100 | output: 101 | tuple val(sample_id), file("unsorted_human_{1,2}.fastq"), emit: human 102 | tuple val(sample_id), file("unsorted_mouse_{1,2}.fastq"), emit: mouse 103 | path(".command.out"), emit: summary 104 | 105 | script: 106 | """ 107 | xengsort classify \ 108 | --index "${indices_path}/${params.deconvolution_indices_name}-xind" \ 109 | --fastq ${fastq[1]} \ 110 | --out fastq \ 111 | --threads ${task.cpus} \ 112 | --chunksize 32.0 \ 113 | --compression none 114 | 115 | mv fastq-host.fq unsorted_mouse_2.fastq 116 | mv fastq-graft.fq unsorted_human_2.fastq 117 | 118 | seqtk subseq ${fastq[0]} <(fgrep "@" unsorted_mouse_2.fastq | cut -d ' ' -f1 | cut -d '@' -f2) >> unsorted_mouse_1.fastq 119 | seqtk subseq ${fastq[0]} <(fgrep "@" unsorted_human_2.fastq | cut -d ' ' -f1 | cut -d '@' -f2) >> unsorted_human_1.fastq 120 | """ 121 | } 122 | 123 | process SORT_FASTQ { 124 | 125 | tag "$sample_id" 126 | label "low_process" 127 | 128 | input: 129 | tuple val(sample_id), path(fastq) 130 | 131 | output: 132 | tuple val(sample_id), file("sorted_{1,2}.fastq") 133 | 134 | script: 135 | """ 136 | fastq-sort --id ${fastq[0]} > "sorted_1.fastq" 137 | fastq-sort --id ${fastq[1]} > "sorted_2.fastq" 138 | """ 139 | } 140 | -------------------------------------------------------------------------------- /modules/local/focus.nf: -------------------------------------------------------------------------------- 1 | 2 | process CHECK_FOCUS { 3 | 4 | tag "$sample_id" 5 | label 'process_focus' 6 | maxRetries 3 7 | errorStrategy { task.attempt <= maxRetries ? 'retry' : 'finish' } 8 | memory { 12.GB + (Float.valueOf(size) / 1000.0).round(2) * params.memory_scale_factor * 12.GB } 9 | publishDir "${params.outdir}/${sample_id}/focus", pattern: 'output/*', saveAs: { "${file(it).getFileName()}" }, mode: 'copy', overwrite: params.overwrite_files_on_publish 10 | 11 | input: 12 | tuple val(sample_id), path(image), val(size) 13 | 14 | output: 15 | tuple val(sample_id), file("output/*") 16 | 17 | script: 18 | """ 19 | [ ! -d "output" ] && mkdir "output" 20 | 21 | python3 /deepfocus/runDeepFocus.py --checkpoint-dir="${params.deepfocus_model_path}" --wsi-file="${image}" --output-dir="output/" 22 | """ 23 | } 24 | -------------------------------------------------------------------------------- /modules/local/gunzip.nf: -------------------------------------------------------------------------------- 1 | 2 | process GUNZIP { 3 | 4 | tag "$sample_id" 5 | 6 | input: 7 | tuple val(sample_id), path(fastq) 8 | 9 | output: 10 | tuple val(sample_id), file("*R{1,2}*.fastq") 11 | 12 | script: 13 | """ 14 | gzip -d -k ${fastq} 15 | """ 16 | } 17 | 18 | 19 | process GUNZIP_SEPARATE { 20 | 21 | tag "$sample_id" 22 | 23 | input: 24 | tuple val(sample_id), path(fastq) 25 | 26 | output: 27 | tuple val(sample_id), file("*_R1_*.fastq"), emit: R1 28 | tuple val(sample_id), file("*_R2_*.fastq"), emit: R2 29 | 30 | script: 31 | """ 32 | gzip -d -k ${fastq} 33 | """ 34 | } 35 | 36 | 37 | process GUNZIP_FASTA { 38 | 39 | input: 40 | path(fasta) 41 | 42 | output: 43 | file("*{.fa,.fna}") 44 | 45 | script: 46 | """ 47 | gzip -d -k ${fasta} 48 | """ 49 | } 50 | -------------------------------------------------------------------------------- /modules/local/load.nf: -------------------------------------------------------------------------------- 1 | 2 | import groovy.json.JsonOutput 3 | 4 | process LOAD_SAMPLE_INFO { 5 | 6 | tag "$sample_id" 7 | 8 | input: 9 | tuple val(sample_id), val(meta), path(fastq), path(image) 10 | 11 | output: 12 | tuple val(sample_id), file(image), emit: image 13 | tuple val(sample_id), file("${fastq}/*R{1,2}*.fastq*"), emit: fastq 14 | 15 | script: 16 | """ 17 | """ 18 | } 19 | 20 | 21 | process EXPORT_PARAMETERS { 22 | 23 | publishDir "${params.tracedir}", pattern: '{*.json}', mode: 'copy', overwrite: params.overwrite_files_on_publish 24 | 25 | output: 26 | path 'parameters.json' 27 | 28 | script: 29 | "echo '${JsonOutput.toJson(params)}' > parameters.json" 30 | } 31 | 32 | 33 | process EXPORT_SAMPLEINFO { 34 | 35 | publishDir "${params.outdir}/${sample_id}", pattern: '{*.json}', mode: 'copy', overwrite: params.overwrite_files_on_publish 36 | 37 | input: 38 | tuple val(sample_id), val(meta) 39 | 40 | output: 41 | path 'info.json' 42 | 43 | script: 44 | "echo '${JsonOutput.toJson(meta)}' > info.json" 45 | } 46 | -------------------------------------------------------------------------------- /modules/local/merge.nf: -------------------------------------------------------------------------------- 1 | 2 | process CONVERT_SEGMENTATION_DATA { 3 | 4 | tag "$sample_id" 5 | label 'python_low_process' 6 | maxRetries 1 7 | errorStrategy { task.attempt <= maxRetries ? 'retry' : 'finish' } 8 | publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: params.overwrite_files_on_publish 9 | memory { 1.GB + (Float.valueOf(size) / 1000.0).round(2) * params.memory_scale_factor * 3.GB } 10 | 11 | input: 12 | tuple val(sample_id), path(segmentation_csv), val(size) 13 | 14 | output: 15 | tuple val(sample_id), file("segmentation.h5ad") 16 | 17 | script: 18 | """ 19 | #!/usr/bin/env python 20 | 21 | import os 22 | os.environ["NUMBA_CACHE_DIR"] = "./tmp" 23 | 24 | import pandas as pd 25 | import scanpy as sc 26 | 27 | df_temp = pd.read_csv("${segmentation_csv}", index_col=0, header=0).sort_index() 28 | df_temp.index.name = 'id' 29 | df_temp.insert(0, 'original_barcode', df_temp.index.values) 30 | 31 | ad = sc.AnnData(X=df_temp.loc[:, ~df_temp.columns.str.contains('original_barcode')], 32 | obs=df_temp.loc[:, df_temp.columns.str.contains('original_barcode')]) 33 | 34 | ad.write("segmentation.h5ad") 35 | """ 36 | } 37 | 38 | 39 | process CONVERT_CSV_TO_ANNDATA { 40 | 41 | tag "$sample_id" 42 | label "python_low_process" 43 | publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: params.overwrite_files_on_publish 44 | 45 | input: 46 | tuple val(sample_id), path(data_csv), val(expansion_factor), val(suffix) 47 | 48 | output: 49 | tuple val(sample_id), file("img.data.${suffix}-${expansion_factor}.h5ad"), val(expansion_factor), val(suffix) 50 | 51 | script: 52 | """ 53 | #!/usr/bin/env python 54 | 55 | import os 56 | os.environ["NUMBA_CACHE_DIR"] = "./tmp" 57 | 58 | import gc 59 | import pandas as pd 60 | import scanpy as sc 61 | 62 | df_temp = pd.read_csv("${data_csv}", index_col=[0,1]).xs(1, level='in_tissue') 63 | df_temp.insert(0, 'original_barcode', df_temp.index.values) 64 | 65 | ad = sc.AnnData(X=df_temp.loc[:, df_temp.columns.str.contains('feat')], 66 | obs=df_temp.loc[:, ~df_temp.columns.str.contains('feat')]) 67 | 68 | df_temp = None 69 | gc.collect() 70 | 71 | ad.write("img.data.${suffix}-${expansion_factor}.h5ad") 72 | """ 73 | } 74 | 75 | 76 | process MERGE_MTX { 77 | 78 | tag "$sample_id" 79 | label "python_low_process" 80 | publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: params.overwrite_files_on_publish 81 | 82 | input: 83 | tuple val(sample_id), path('mtx_mouse/*'), path('mtx_human/*') 84 | 85 | output: 86 | tuple val(sample_id), file("raw_feature_bc_matrix/*") 87 | 88 | script: 89 | 90 | """ 91 | #!/usr/bin/env python 92 | 93 | import os 94 | os.environ[ 'NUMBA_CACHE_DIR' ] = './tmp/' 95 | 96 | import sys 97 | sys.path.append("${projectDir}/bin") 98 | from mtx_tools import read_mtx_combine_and_write_mtx as combine 99 | from mtx_tools import read_sc_from_mtx as read 100 | 101 | combine(read('mtx_mouse/'), read('mtx_human/'), saveDataDir='raw_feature_bc_matrix/') 102 | """ 103 | } 104 | 105 | 106 | process RETURN_SEPARATE_MTX { 107 | 108 | tag "$sample_id" 109 | label "python_low_process" 110 | publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: true 111 | 112 | input: 113 | tuple val(sample_id), path('mtx_mouse/*'), path('mtx_human/*') 114 | 115 | output: 116 | tuple val(sample_id), file("raw_feature_bc_matrix_mouse/*"), file("raw_feature_bc_matrix_human/*") 117 | 118 | script: 119 | """ 120 | [ ! -d "raw_feature_bc_matrix_mouse" ] && mkdir raw_feature_bc_matrix_mouse 121 | cp -R mtx_mouse/* raw_feature_bc_matrix_mouse/ 122 | 123 | [ ! -d "raw_feature_bc_matrix_human" ] && mkdir raw_feature_bc_matrix_human 124 | cp -R mtx_human/* raw_feature_bc_matrix_human/ 125 | """ 126 | } 127 | 128 | 129 | process RETURN_MTX { 130 | 131 | tag "$sample_id" 132 | label "python_low_process" 133 | publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: params.overwrite_files_on_publish 134 | 135 | input: 136 | tuple val(sample_id), path('mtx/*') 137 | 138 | output: 139 | tuple val(sample_id), file("raw_feature_bc_matrix/*") 140 | 141 | script: 142 | """ 143 | [ ! -d "raw_feature_bc_matrix" ] && mkdir raw_feature_bc_matrix 144 | cp -R mtx/* raw_feature_bc_matrix/ 145 | """ 146 | } 147 | -------------------------------------------------------------------------------- /modules/local/ome.nf: -------------------------------------------------------------------------------- 1 | 2 | process CONVERT_TO_PYRAMIDAL_OME { 3 | 4 | tag "$sample_id" 5 | label 'process_ome' 6 | maxRetries 1 7 | errorStrategy { task.attempt <= maxRetries ? 'retry' : 'finish' } 8 | publishDir "${params.outdir}/${sample_id}", pattern: 'image.ome.tiff', mode: 'copy', overwrite: params.overwrite_files_on_publish 9 | 10 | input: 11 | tuple val(sample_id), path(image) 12 | 13 | output: 14 | tuple val(sample_id), file("image.ome.tiff") 15 | 16 | script: 17 | """ 18 | export BF_MAX_MEM=24g 19 | export _JAVA_OPTIONS="-Xmx24g" 20 | bfconvert -version 21 | 22 | bfconvert -noflat -bigtiff -overwrite \ 23 | -pyramid-resolutions 3 -pyramid-scale 4 -tilex ${params.tiled_tiff_tile_size} -tiley ${params.tiled_tiff_tile_size} \ 24 | -compression ${params.compression} "${image}" image.ome.tiff || \ 25 | bfconvert -noflat -bigtiff -overwrite \ 26 | -pyramid-resolutions 2 -pyramid-scale 4 -tilex ${params.tiled_tiff_tile_size} -tiley ${params.tiled_tiff_tile_size} \ 27 | -compression ${params.compression} "${image}" image.ome.tiff || \ 28 | bfconvert -noflat -bigtiff -overwrite \ 29 | -pyramid-resolutions 1 -pyramid-scale 4 -tilex ${params.tiled_tiff_tile_size} -tiley ${params.tiled_tiff_tile_size} \ 30 | -compression ${params.compression} "${image}" image.ome.tiff 31 | """ 32 | } 33 | 34 | 35 | process EXTRACT_IMAGE_METADATA { 36 | 37 | tag "$sample_id" 38 | label 'process_ome' 39 | maxRetries 1 40 | errorStrategy { task.attempt <= maxRetries ? 'retry' : 'finish' } 41 | publishDir "${params.outdir}/${sample_id}", pattern: 'metadata.ome.xml', mode: 'copy', overwrite: params.overwrite_files_on_publish 42 | memory { 4.GB + (Float.valueOf(size) / 1000.0).round(2) * params.memory_scale_factor * 0.GB } 43 | 44 | input: 45 | tuple val(sample_id), path(fileslide), path(roifile), val(mpp), val(size) 46 | 47 | output: 48 | tuple val(sample_id), file("metadata.ome.xml") 49 | 50 | script: 51 | """ 52 | showinf -omexml-only -nopix ${fileslide} >> "metadata.ome.xml" 53 | """ 54 | } -------------------------------------------------------------------------------- /modules/local/postprocessing.nf: -------------------------------------------------------------------------------- 1 | 2 | process DIMRED_CLUSTER_MORPH { 3 | 4 | tag "$sample_id" 5 | label 'python_low_process' 6 | maxRetries 2 7 | errorStrategy { task.attempt <= maxRetries ? 'retry' : 'finish' } 8 | publishDir "${params.outdir}/${sample_id}/figures", pattern: 'figures/*/*.png', saveAs: { filename -> "${filename.split("/")[filename.split("/").length - 1]}" }, mode: 'copy', overwrite: params.overwrite_files_on_publish 9 | memory { 1.GB + (Float.valueOf(size) / 1000.0).round(2) * params.memory_scale_factor * 3.GB } 10 | 11 | input: 12 | tuple val(sample_id), path(grid_csv), path(grid_json), path(thumb), path(segmentation_csv), path(features_h5ad), val(expansion_factor), val(suffix) 13 | 14 | output: 15 | tuple val(sample_id), file("figures/*/*.png") 16 | 17 | script: 18 | """ 19 | #!/usr/bin/env python 20 | 21 | import os 22 | os.environ["NUMBA_CACHE_DIR"] = "./tmp" 23 | 24 | import sys 25 | import json 26 | import numpy as np 27 | import pandas as pd 28 | import scanpy as sc 29 | import matplotlib.pyplot as plt 30 | 31 | plt.rcParams['figure.dpi'] = ${params.plot_dpi} 32 | 33 | def loadImFeatures(dpath): 34 | df_temp = pd.read_csv(dpath, index_col=[0,1], sep=',').xs(1, level='in_tissue') 35 | df_temp.insert(0, 'original_barcode', df_temp.index.values) 36 | ad = sc.AnnData(X=df_temp.loc[:, df_temp.columns.str.contains('feat')], 37 | obs=df_temp.loc[:, ~df_temp.columns.str.contains('feat')]) 38 | return ad 39 | 40 | def loadAdImage(): 41 | thumbnail = plt.imread("${thumb}") 42 | with open("${grid_json}", 'r') as f: 43 | d = json.load(f) 44 | grid = pd.read_csv("${grid_csv}", index_col=0, header=None) 45 | image = {'library_id': {'images': {'lowres': thumbnail}, 46 | 'metadata': {'chemistry_description': None, 'software_version': None}, 47 | 'scalefactors': {'tissue_lowres_scalef': thumbnail.shape[0]/d['y'], 48 | 'spot_diameter_fullres': d['spot_diameter_fullres']}}}, grid.index.values, grid[[5, 4]].values 49 | return image 50 | 51 | # Load data 52 | ad = sc.read_h5ad("${features_h5ad}") 53 | df_temp = pd.read_csv("${segmentation_csv}", index_col=0, header=0).reindex(ad.obs.index) 54 | df_temp.index.name = 'id' 55 | ad.obs = pd.concat([ad.obs, df_temp], axis=1) 56 | 57 | # Load image 58 | image = loadAdImage() 59 | ad.uns['spatial'] = image[0] 60 | 61 | ad.obsm['spatial'] = pd.DataFrame(index=image[1], data=image[2]).reindex(ad.obs['original_barcode']).values 62 | 63 | # Morphometrics spatial plots 64 | cols1 = [None] + df_temp.columns[df_temp.columns.isin(['average_perimeter_length', 'average_area', 'average_eccentricity', 65 | 'average_orientation', 'average_cell_type_prob'])].values.tolist() 66 | cols2 = [None] + df_temp.columns[df_temp.columns.str.contains('count')].values.tolist() 67 | 68 | c, r = np.ptp(ad.obs['array_row']), np.ptp(ad.obs['array_col']) 69 | f = 5 70 | if r > c: 71 | figsize = f, f * c/r 72 | else: 73 | figsize = f * r/c, f 74 | 75 | if not os.path.exists('figures/show/'): 76 | os.makedirs('figures/show/') 77 | if not os.path.exists('figures/umap/'): 78 | os.makedirs('figures/umap/') 79 | 80 | spot_size = ad.uns['spatial']['library_id']['scalefactors']['spot_diameter_fullres'] 81 | print(spot_size) 82 | 83 | spot_size *= ${params.grid_spot_horizontal_spacing} / ${params.grid_spot_diamter} 84 | print(spot_size) 85 | 86 | plt.rcParams["figure.figsize"] = figsize 87 | sc.pl.spatial(ad, img_key='lowres', color=cols1, spot_size=spot_size, cmap='rainbow', ncols=3, show=False, save='/spatial_plot_morphometric.png'); 88 | sc.pl.spatial(ad, img_key='lowres', color=cols2, spot_size=spot_size, cmap='rainbow', ncols=3, show=False, save='/spatial_plot_classification.png'); 89 | 90 | print(ad.obs) 91 | print(ad) 92 | 93 | # Dimensionality reduction 94 | sc.pp.highly_variable_genes(ad, flavor='seurat', n_top_genes=500) 95 | sc.pp.scale(ad) 96 | sc.pp.pca(ad, n_comps=min(30, ad.shape[0]-1), zero_center=False, use_highly_variable=True) 97 | sc.pp.neighbors(ad, use_rep='X_pca') 98 | sc.tl.umap(ad) 99 | 100 | # Clustering 101 | res = 0.5 102 | sc.tl.leiden(ad, key_added='cluster', resolution=res) 103 | plt.rcParams["figure.figsize"] = figsize 104 | sc.pl.spatial(ad, img_key='lowres', color=[None, 'cluster'], spot_size=spot_size, show=False, save='/cluster.png'); 105 | 106 | # UMAP plots 107 | plt.rcParams["figure.figsize"] = (3,3) 108 | sc.pl.umap(ad, color=['cluster'], s=None, show=False, save='/umap_plot_cluster.png'); 109 | sc.pl.umap(ad, color=['cluster'] + cols1, s=None, ncols=3, show=False, save='/umap_plot_morphometric.png'); 110 | sc.pl.umap(ad, color=['cluster'] + cols2, s=None, ncols=3, show=False, save='/umap_plot_classification.png'); 111 | """ 112 | } 113 | 114 | 115 | process DIMRED_CLUSTER { 116 | 117 | tag "$sample_id" 118 | label 'python_low_process' 119 | maxRetries 2 120 | errorStrategy { task.attempt <= maxRetries ? 'retry' : 'finish' } 121 | publishDir "${params.outdir}/${sample_id}/figures", pattern: 'figures/*/*.png', saveAs: { filename -> "${filename.split("/")[filename.split("/").length - 1]}" }, mode: 'copy', overwrite: params.overwrite_files_on_publish 122 | memory { 1.GB + (Float.valueOf(size) / 1000.0).round(2) * params.memory_scale_factor * 3.GB } 123 | 124 | input: 125 | tuple val(sample_id), path(grid_csv), path(grid_json), path(thumb), path(features_h5ad), val(expansion_factor), val(suffix) 126 | 127 | output: 128 | tuple val(sample_id), file("figures/*/*.png") 129 | 130 | script: 131 | """ 132 | #!/usr/bin/env python 133 | 134 | import os 135 | os.environ["NUMBA_CACHE_DIR"] = "./tmp" 136 | 137 | import sys 138 | import json 139 | import numpy as np 140 | import pandas as pd 141 | import scanpy as sc 142 | import matplotlib.pyplot as plt 143 | 144 | plt.rcParams['figure.dpi'] = ${params.plot_dpi} 145 | 146 | def loadImFeatures(dpath): 147 | df_temp = pd.read_csv(dpath, index_col=[0,1], sep=',').xs(1, level='in_tissue') 148 | df_temp.insert(0, 'original_barcode', df_temp.index.values) 149 | ad = sc.AnnData(X=df_temp.loc[:, df_temp.columns.str.contains('feat')], 150 | obs=df_temp.loc[:, ~df_temp.columns.str.contains('feat')]) 151 | return ad 152 | 153 | def loadAdImage(): 154 | thumbnail = plt.imread("${thumb}") 155 | with open("${grid_json}", 'r') as f: 156 | d = json.load(f) 157 | grid = pd.read_csv("${grid_csv}", index_col=0, header=None) 158 | image = {'library_id': {'images': {'lowres': thumbnail}, 159 | 'metadata': {'chemistry_description': None, 'software_version': None}, 160 | 'scalefactors': {'tissue_lowres_scalef': thumbnail.shape[0]/d['y'], 161 | 'spot_diameter_fullres': d['spot_diameter_fullres']}}}, grid.index.values, grid[[5, 4]].values 162 | return image 163 | 164 | # Load data 165 | ad = sc.read_h5ad("${features_h5ad}") 166 | 167 | # Load image 168 | image = loadAdImage() 169 | ad.uns['spatial'] = image[0] 170 | 171 | ad.obsm['spatial'] = pd.DataFrame(index=image[1], data=image[2]).reindex(ad.obs['original_barcode']).values 172 | 173 | c, r = np.ptp(ad.obs['array_row']), np.ptp(ad.obs['array_col']) 174 | f = 5 175 | if r > c: 176 | figsize = f, f * c/r 177 | else: 178 | figsize = f * r/c, f 179 | 180 | if not os.path.exists('figures/show/'): 181 | os.makedirs('figures/show/') 182 | if not os.path.exists('figures/umap/'): 183 | os.makedirs('figures/umap/') 184 | 185 | plt.rcParams["figure.figsize"] = figsize 186 | 187 | print(ad.obs) 188 | print(ad) 189 | 190 | spot_size = ad.uns['spatial']['library_id']['scalefactors']['spot_diameter_fullres'] 191 | print(spot_size) 192 | 193 | spot_size *= ${params.grid_spot_horizontal_spacing} / ${params.grid_spot_diamter} 194 | print(spot_size) 195 | 196 | # Dimensionality reduction 197 | sc.pp.highly_variable_genes(ad, flavor='seurat', n_top_genes=500) 198 | sc.pp.scale(ad) 199 | sc.pp.pca(ad, n_comps=min(30, ad.shape[0]-1), zero_center=False, use_highly_variable=True) 200 | sc.pp.neighbors(ad, use_rep='X_pca') 201 | sc.tl.umap(ad) 202 | 203 | # Clustering 204 | res = 0.5 205 | sc.tl.leiden(ad, key_added='cluster', resolution=res) 206 | plt.rcParams["figure.figsize"] = figsize 207 | sc.pl.spatial(ad, img_key='lowres', color=[None, 'cluster'], spot_size=spot_size, show=False, save='/cluster.png'); 208 | 209 | # UMAP plots 210 | plt.rcParams["figure.figsize"] = (3,3) 211 | sc.pl.umap(ad, color=['cluster'], s=None, show=False, save='/umap_plot_cluster.png'); 212 | """ 213 | } 214 | 215 | -------------------------------------------------------------------------------- /modules/local/spaceranger.nf: -------------------------------------------------------------------------------- 1 | 2 | process SPACERANGER { 3 | 4 | tag "$sample_id" 5 | 6 | input: 7 | tuple val(sample_id), path(fastq), path(image) 8 | path(reference) 9 | 10 | output: 11 | tuple val(sample_id), file("sample/outs/spatial/*"), emit: spatial 12 | tuple val(sample_id), file("sample/outs/raw_feature_bc_matrix/*"), emit: mtx 13 | tuple val(sample_id), file("sample/outs/possorted_genome_bam.bam*"), emit: bam 14 | tuple val(sample_id), file("sample/outs/*summary*"), emit: metrics 15 | 16 | script: 17 | String mem = task.memory 18 | String memgb = mem.split(" ")[0] 19 | """ 20 | tempfastqdir="temp" 21 | mkdir \${tempfastqdir} 22 | cp ${fastq[0]} \${tempfastqdir}/sample_S1_L001_R1_001.fastq 23 | cp ${fastq[1]} \${tempfastqdir}/sample_S1_L001_R2_001.fastq 24 | 25 | spaceranger count \ 26 | --id=sample \ 27 | --sample=sample \ 28 | --fastqs="\${tempfastqdir}" \ 29 | --image=${image} \ 30 | --transcriptome=${reference} \ 31 | --unknown-slide \ 32 | --localcores=${task.cpus} \ 33 | --localmem=${memgb} 34 | 35 | rm -R \${tempfastqdir} 36 | """ 37 | } 38 | 39 | process RETURN_SPACERANGER_ALIGNMENT { 40 | 41 | tag "$sample_id" 42 | publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: params.overwrite_files_on_publish 43 | 44 | input: 45 | tuple val(sample_id), file("in_mouse/*"), file("in_human/*"), file("in_spatial/*") 46 | 47 | output: 48 | tuple val(sample_id), path("mouse/*"), path("human/*"), path("spatial/*") 49 | 50 | script: 51 | """ 52 | cp -R in_mouse/ mouse/ 53 | cp -R in_human/ human/ 54 | cp -R in_spatial/ spatial/ 55 | """ 56 | } 57 | 58 | 59 | process RETURN_SPACERANGER_ALIGNMENT_SINGLE { 60 | 61 | tag "$sample_id" 62 | publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: params.overwrite_files_on_publish 63 | 64 | input: 65 | tuple val(sample_id), file("in_spacerangerqc/*"), file("in_spatial/*") 66 | 67 | output: 68 | tuple val(sample_id), path("spacerangerqc/*"), path("spatial/*") 69 | 70 | script: 71 | """ 72 | cp -R in_spacerangerqc/ spacerangerqc/ 73 | cp -R in_spatial/ spatial/ 74 | """ 75 | } 76 | -------------------------------------------------------------------------------- /modules/local/superpixel.nf: -------------------------------------------------------------------------------- 1 | 2 | process SUPERPIXELATION { 3 | 4 | tag "$sample_id" 5 | label 'process_inception' 6 | maxRetries 1 7 | errorStrategy { task.attempt <= maxRetries ? 'retry' : 'finish' } 8 | memory { 6.GB + (Float.valueOf(size) / 1000.0).round(2) * params.memory_scale_factor * 12.GB } 9 | publishDir "${params.outdir}/${sample_id}/superpixels", pattern: "superpixelation_*.png", mode: 'copy', overwrite: params.overwrite_files_on_publish 10 | cpus 1 11 | 12 | input: 13 | tuple val(sample_id), path(image), val(size) 14 | 15 | output: 16 | tuple val(sample_id), file("segmentation.npy"), emit: main 17 | tuple val(sample_id), file("superpixelation_*.png"), emit: images optional true 18 | 19 | script: 20 | """ 21 | python -u "${projectDir}/bin/superpixelation.py" \ 22 | --inputImagePath "${image}" \ 23 | --segmentationSavePath "segmentation.npy" \ 24 | --pixelsPerSegment ${params.pixels_per_segment} \ 25 | --compactness ${params.superpixel_compactness} \ 26 | --s ${params.superpixel_patch_size} \ 27 | --downsamplingFactor ${params.superpixel_downsampling_factor} 28 | """ 29 | } 30 | 31 | 32 | process EXPORT_DOWN_IMAGE_FOR_CONTOURS { 33 | 34 | tag "$sample_id" 35 | label 'process_inception' 36 | maxRetries 1 37 | errorStrategy { task.attempt <= maxRetries ? 'retry' : 'finish' } 38 | memory { 6.GB + (Float.valueOf(size) / 1000.0).round(2) * params.memory_scale_factor * 5.GB } 39 | publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: params.overwrite_files_on_publish 40 | cpus 1 41 | 42 | input: 43 | tuple val(sample_id), path(image), val(size) 44 | 45 | output: 46 | tuple val(sample_id), file("im_down.tiff") 47 | 48 | script: 49 | """ 50 | #!/usr/bin/env python 51 | import tifffile 52 | import numpy as np 53 | 54 | # Convert image to numpy array to remove OME TIFF metadata 55 | f = ${params.superpixel_downsampling_factor} 56 | img = np.array(tifffile.imread("${image}"))[::f, ::f, :3] 57 | print(img.shape) 58 | 59 | print('Saving downsampled image', flush=True) 60 | tifffile.imwrite("im_down.tiff", img, bigtiff=True) 61 | """ 62 | } 63 | 64 | 65 | process EXPORT_SUPERPIXELATION_CONTOURS { 66 | 67 | tag "$sample_id" 68 | label 'process_inception' 69 | maxRetries 1 70 | errorStrategy { task.attempt <= maxRetries ? 'retry' : 'finish' } 71 | memory { 6.GB + (Float.valueOf(size) / 1000.0).round(2) * params.memory_scale_factor * 3.GB } 72 | publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: params.overwrite_files_on_publish 73 | cpus 1 74 | 75 | input: 76 | tuple val(sample_id), path(superpixelation), val(size) 77 | 78 | output: 79 | tuple val(sample_id), file("superpixelation.json.gz") 80 | 81 | script: 82 | """ 83 | #!/usr/bin/env python 84 | import numpy as np 85 | 86 | import sys 87 | sys.path.append("${projectDir}/lib") 88 | from superpixels import get_countours_from_mask, save_contours 89 | 90 | with open("${superpixelation}", 'rb') as tempfile: 91 | superpixelation = np.load(tempfile) 92 | print('Superpixelation mask shape:', superpixelation.shape) 93 | 94 | print('Computing contours', flush=True) 95 | contours = get_countours_from_mask(superpixelation) 96 | 97 | print('Saving contours', flush=True) 98 | save_contours(contours, filename='superpixelation.json.gz') 99 | """ 100 | } 101 | 102 | 103 | process CALCULATE_CELLS_OD { 104 | 105 | tag "$sample_id" 106 | label 'process_inception' 107 | maxRetries 0 108 | errorStrategy { task.attempt <= maxRetries ? 'retry' : 'finish' } 109 | memory { 3.GB + (Float.valueOf(size) / 1000.0).round(2) * params.memory_scale_factor * 11.GB } 110 | cpus 1 111 | 112 | input: 113 | tuple val(sample_id), path(img), path(nuclei), path(nuc_seg_measures), val(size) 114 | 115 | output: 116 | tuple val(sample_id), path("qp.csv") 117 | 118 | script: 119 | """ 120 | #!/usr/bin/env python 121 | import numpy as np 122 | import tifffile 123 | import pandas as pd 124 | from tqdm import tqdm 125 | 126 | import sys 127 | sys.path.append("${projectDir}/lib") 128 | from hovernetConv import calculate_H_E_OD_quantities 129 | 130 | # Load nuclei mask 131 | with open("${nuclei}", 'rb') as tempfile: 132 | nuclei = np.load(tempfile) 133 | print('Nuclei segmetation mask shape:', nuclei.shape, nuclei.dtype, flush=True) 134 | 135 | # Load nuc_seg_measures 136 | df_nuc_seg_measures = pd.read_csv("${nuc_seg_measures}", index_col=0) 137 | df_nuc_seg_measures.index = df_nuc_seg_measures.index.astype(str) 138 | print(df_nuc_seg_measures, flush=True) 139 | 140 | # Load image 141 | print('Loading full resolution image', flush=True) 142 | img = np.array(tifffile.imread("${img}"))[:, :, :3] 143 | dims = img.shape[0], img.shape[1] 144 | print(dims, flush=True) 145 | 146 | # Prepare image patches coordinates 147 | s = ${params.od_block_size} 148 | r = [np.append(s*np.array(range(0, int(np.floor(dims[i]/s))+1)), [dims[i]]) for i in range(2)] 149 | coords = [(i,j) for i in range(len(r[0])-1) for j in range(len(r[1])-1)] 150 | print(coords, flush=True) 151 | print() 152 | 153 | # Calculate HE OD quantities by patches 154 | dfs = [] 155 | for ipatch, (i, j) in enumerate(tqdm(coords)): 156 | if (r[0][i+1] - r[0][i] > 0) and (r[1][j+1] - r[1][j] > 0): 157 | patch_nuclei = nuclei[r[0][i]:r[0][i+1], r[1][j]:r[1][j+1]] 158 | if (patch_nuclei!=0).any(): 159 | df_OD_temp = calculate_H_E_OD_quantities(img[r[0][i]:r[0][i+1], r[1][j]:r[1][j+1], :], 160 | patch_nuclei, 161 | (r[0][i], r[1][j]), 162 | df_nuc_seg_measures, 163 | expand_nuclei_distance=${params.expand_nuclei_distance}) 164 | dfs.append(df_OD_temp) 165 | 166 | # Merge patches data, average cells fragments due to patching 167 | df_OD = pd.concat(dfs) 168 | print(df_OD.shape) 169 | df_OD = df_OD.groupby(level=0).mean() 170 | print(df_OD, flush=True) 171 | 172 | # Save data 173 | df_OD.to_csv("qp.csv") 174 | """ 175 | } 176 | 177 | 178 | process ASSIGN_NUCLEI_TO_SUPERPIXELS { 179 | 180 | tag "$sample_id" 181 | label 'process_inception' 182 | maxRetries 1 183 | errorStrategy { task.attempt <= maxRetries ? 'retry' : 'finish' } 184 | publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: params.overwrite_files_on_publish 185 | memory { 3.GB + (Float.valueOf(size) / 1000.0).round(2) * params.memory_scale_factor * 3.GB } 186 | cpus 1 187 | 188 | input: 189 | tuple val(sample_id), path(superpixelation), path(qp_od), val(size) 190 | 191 | output: 192 | tuple val(sample_id), path("od_per_cell.csv.gz") 193 | 194 | script: 195 | """ 196 | #!/usr/bin/env python 197 | import numpy as np 198 | import pandas as pd 199 | 200 | # Load pre-calculated nuclear and cytoplasmic quantities 201 | df_qp_od = pd.read_csv("${qp_od}", index_col=0) 202 | print(df_qp_od) 203 | 204 | # Load superpixelation mask 205 | with open("${superpixelation}", 'rb') as tempfile: 206 | superpixelation = np.load(tempfile) 207 | print('Superpixelation mask shape:', superpixelation.shape) 208 | 209 | # Assign each cell to a superpixel; superpixelation is in downsampled coordinates 210 | df_qp_od['xd'] = (df_qp_od['x'] / ${params.superpixel_downsampling_factor}).astype(int) 211 | df_qp_od['yd'] = (df_qp_od['y'] / ${params.superpixel_downsampling_factor}).astype(int) 212 | df_qp_od['uspx'] = df_qp_od.apply(lambda se: superpixelation[int(se['yd']), int(se['xd'])], axis=1) 213 | df_qp_od['ipatch'] = df_qp_od['uspx'].apply(lambda v: int((v - v % 1000) / 1000)) 214 | 215 | # Sort dataframe index 216 | df_qp_od.index = df_qp_od.index.astype(int) 217 | df_qp_od = df_qp_od.sort_index() 218 | df_qp_od.index = df_qp_od.index.astype(str) 219 | print(df_qp_od) 220 | 221 | # Normalize quantities within each ipatch identifier 222 | for col in df_qp_od.columns[~df_qp_od.columns.isin(['x', 'y', 'xd', 'yd', 'uspx', 'ipatch'])]: 223 | df_qp_od[col] = df_qp_od[col] * 0.18 / df_qp_od[col].quantile(0.5) 224 | 225 | se_sizes = pd.Series(superpixelation.ravel()).value_counts().sort_index()*(${params.superpixel_downsampling_factor}**2) 226 | df_qp_od['spx_size'] = (se_sizes.loc[df_qp_od['uspx'].values]).values 227 | 228 | # Export data 229 | df_qp_od.to_csv('od_per_cell.csv.gz') 230 | """ 231 | } 232 | -------------------------------------------------------------------------------- /modules/local/velocyto.nf: -------------------------------------------------------------------------------- 1 | 2 | process CELLSORT_BAM { 3 | 4 | tag "$sample_id" 5 | label "samtools" 6 | 7 | input: 8 | tuple val(sample_id), path("bam/*") 9 | 10 | output: 11 | tuple val(sample_id), file("cellsorted_possorted_genome_bam.bam") 12 | 13 | script: 14 | """ 15 | samtools sort bam/possorted_genome_bam.bam -o cellsorted_possorted_genome_bam.bam -t CB -O BAM -@ ${task.cpus} 16 | """ 17 | } 18 | 19 | 20 | process SPLICING_QUANTIFICATION { 21 | 22 | tag "$sample_id" 23 | label "splicing_quantification" 24 | publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: params.overwrite_files_on_publish 25 | 26 | input: 27 | tuple val(sample_id), path("sample/outs/*"), path("sample/outs/*"), path("sample/outs/filtered_feature_bc_matrix/*") 28 | path(reference) 29 | val(species) 30 | 31 | output: 32 | tuple val(sample_id), file("${species}/velocyto.loom") 33 | 34 | script: 35 | """ 36 | velocyto run10x sample ${reference}/genes/genes.gtf 37 | 38 | mkdir ${species} 39 | cp "sample/velocyto/sample.loom" "${species}/velocyto.loom" 40 | """ 41 | } 42 | -------------------------------------------------------------------------------- /nextflow.config: -------------------------------------------------------------------------------- 1 | 2 | // Global default params, used in configs 3 | params { 4 | 5 | // Default workflow 6 | workflow = "arbitrary_grid" // "two_references" "one_reference" "arbitrary_grid" "deconvolution_indices" 7 | 8 | input = "./assets/samplesheet.csv" 9 | 10 | outdir = "./results" 11 | tracedir = "${params.outdir}/pipeline_info" 12 | 13 | memory_scale_factor = 1.0 14 | 15 | bind = "" 16 | overwrite_files_on_publish = false 17 | 18 | } 19 | 20 | includeConfig 'conf/containers.config' 21 | 22 | 23 | // Default parameters 24 | if (params.workflow == 'one_reference') { 25 | includeConfig 'conf/analysis-one.config' 26 | 27 | if (params.do_img_subworkflow) { 28 | includeConfig 'conf/analysis-img.config' 29 | } 30 | } 31 | 32 | if (params.workflow == 'two_references' || params.workflow == 'deconvolution_indices') { 33 | includeConfig 'conf/analysis-two.config' 34 | 35 | if (params.do_img_subworkflow) { 36 | includeConfig 'conf/analysis-img.config' 37 | } 38 | } 39 | 40 | if (params.workflow == 'arbitrary_grid') { 41 | includeConfig 'conf/analysis-img.config' 42 | } 43 | 44 | process { 45 | resourceLimits = [ cpus: 70, memory: 768.GB, time: 72.h ] 46 | 47 | withName: GUNZIP { 48 | cpus = 1 49 | memory = 20.GB 50 | container = params.container_vips 51 | clusterOptions = '--time=01:00:00' 52 | } 53 | withName: GUNZIP_FASTA { 54 | cpus = 1 55 | memory = 20.GB 56 | container = params.container_vips 57 | clusterOptions = '--time=01:00:00' 58 | } 59 | withName: DECONVOLUTION_XENOME { 60 | cpus = 8 61 | memory = 50.GB 62 | container = params.container_xenome 63 | clusterOptions = '--time=06:00:00' 64 | } 65 | withName: DECONVOLUTION_XENGSORT { 66 | cpus = 8 67 | memory = 50.GB 68 | container = params.container_xengsort 69 | clusterOptions = '--time=06:00:00' 70 | } 71 | withName: XENOME_GENERATE_INDEX { 72 | cpus = 16 73 | memory = 100.GB 74 | container = params.container_xenome 75 | clusterOptions = '--time=06:00:00' 76 | } 77 | withName: XENGSORT_GENERATE_INDEX { 78 | cpus = 32 79 | memory = 50.GB 80 | container = params.container_xengsort 81 | clusterOptions = '--time=01:00:00' 82 | } 83 | withLabel: low_process { 84 | cpus = 1 85 | memory = 8.GB 86 | container = params.container_fastqtools 87 | } 88 | withLabel: samtools { 89 | cpus = 4 90 | memory = 64.GB 91 | container = params.container_samtools 92 | clusterOptions = '--time=03:00:00' 93 | } 94 | withLabel: splicing_quantification { 95 | cpus = 1 96 | memory = 64.GB 97 | container = params.container_velocyto 98 | clusterOptions = '--time=09:00:00' 99 | } 100 | withLabel: python_low_process { 101 | cpus = 1 102 | memory = 12.GB 103 | container = params.container_python 104 | clusterOptions = '--time=01:00:00' 105 | } 106 | withName: SPACERANGER { 107 | cpus = 8 108 | memory = 90.GB 109 | container = params.container_spaceranger 110 | clusterOptions = '--time=24:00:00' 111 | } 112 | withLabel: bafextract { 113 | cpus = 1 114 | memory = 36.GB 115 | container = params.container_bafextract 116 | clusterOptions = '--time=03:00:00' 117 | } 118 | withLabel: vips_process { 119 | cpus = 1 120 | memory = 3.GB 121 | container = params.container_vips 122 | } 123 | withLabel: process_ome { 124 | cpus = 1 125 | memory = 36.GB 126 | container = params.container_ome 127 | } 128 | withLabel: process_estimate_size { 129 | cpus = 1 130 | memory = 2.GB 131 | container = params.container_inception 132 | } 133 | withLabel: process_extract { 134 | cpus = 1 135 | container = params.container_inception 136 | clusterOptions = '--time=03:00:00' 137 | } 138 | withLabel: color_normalization_process { 139 | cpus = 1 140 | container = params.container_stainnet 141 | clusterOptions = '--time=16:00:00' 142 | } 143 | withLabel: stain_normalization_process { 144 | cpus = 1 145 | container = params.container_staintools 146 | clusterOptions = '--time=16:00:00' 147 | } 148 | withLabel: python_process_low { 149 | cpus = 1 150 | container = params.container_inception 151 | clusterOptions = '--time=02:00:00' 152 | } 153 | withLabel: process_focus { 154 | cpus = 16 155 | container = params.container_deepfocus 156 | clusterOptions = '--time=16:00:00' 157 | } 158 | withLabel: process_inception { 159 | cpus = 10 160 | container = params.container_inception 161 | clusterOptions = '--time=16:00:00' 162 | } 163 | withLabel: process_uni { 164 | cpus = 10 165 | container = params.container_uni_conch 166 | clusterOptions = '--time=16:00:00' 167 | } 168 | withLabel: process_conch { 169 | cpus = 10 170 | container = params.container_uni_conch 171 | clusterOptions = '--time=16:00:00' 172 | } 173 | withLabel: process_ctranspath { 174 | cpus = 10 175 | container = params.container_ctranspath 176 | clusterOptions = '--time=16:00:00' 177 | } 178 | withLabel: process_mocov3 { 179 | cpus = 1 180 | container = params.container_ctranspath 181 | resourceLimits = [ cpus: 46, memory: 170.GB, time: 6.h ] 182 | clusterOptions = '--time=03:00:00 -q gpu_inference --gres=gpu:1 --export=ALL' 183 | containerOptions = '--nv' 184 | queue = 'gpu_v100' 185 | } 186 | withLabel: process_stardist { 187 | cpus = 4 188 | container = params.container_inception 189 | clusterOptions = '--time=16:00:00' 190 | } 191 | withLabel: process_hovernet_low { 192 | cpus = 1 193 | memory = 3.GB 194 | container = params.container_hovernet 195 | clusterOptions = '--time=02:00:00' 196 | } 197 | withLabel: process_hovernet { 198 | container = params.container_hovernet 199 | if (params.do_nuclear_segmentation || params.sample_tiles_subworkflow) { 200 | if (params.hovernet_device_mode == 'gpu') { 201 | resourceLimits = [ cpus: 46, memory: 170.GB, time: 6.h ] 202 | cpus = 8 203 | clusterOptions = '--time=06:00:00 -q gpu_inference --gres=gpu:1 --export=ALL' 204 | containerOptions = '--nv' 205 | queue = 'gpu_v100' 206 | } 207 | else if (params.hovernet_device_mode == 'cpu') { 208 | cpus = 10 209 | clusterOptions = '--time=48:00:00' 210 | } 211 | } 212 | } 213 | withLabel: process_post_hovernet { 214 | container = params.container_hovernet 215 | cpus = 16 216 | } 217 | } 218 | 219 | 220 | profiles { 221 | slurm { 222 | executor { 223 | name = "slurm" 224 | submitRateLimit = '100/1s' 225 | queueSize = 100 226 | } 227 | process.queue = "compute" 228 | process.clusterOptions = '-q batch' 229 | process.module = "slurm" 230 | } 231 | singularity { 232 | process.module = 'singularity' 233 | singularity.enabled = true 234 | singularity.envWhitelist = 'CUDA_VISIBLE_DEVICES' 235 | singularity.autoMounts = true 236 | docker.enabled = false 237 | podman.enabled = false 238 | shifter.enabled = false 239 | charliecloud.enabled = false 240 | singularity.runOptions = "-B $projectDir " + params.bind 241 | } 242 | test { 243 | includeConfig 'conf/test.config' 244 | } 245 | } 246 | 247 | 248 | // Export these variables to prevent local Python/R libraries from conflicting with those in the container 249 | // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. 250 | // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. 251 | env { 252 | PYTHONNOUSERSITE = 1 253 | R_PROFILE_USER = "/.Rprofile" 254 | R_ENVIRON_USER = "/.Renviron" 255 | JULIA_DEPOT_PATH = "/usr/local/share/julia" 256 | } 257 | 258 | // Capture exit codes from upstream processes when piping 259 | process.shell = ['/bin/bash', '-euo', 'pipefail'] 260 | 261 | def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') 262 | timeline { 263 | enabled = true 264 | file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" 265 | } 266 | report { 267 | enabled = true 268 | file = "${params.tracedir}/execution_report_${trace_timestamp}.html" 269 | } 270 | trace { 271 | enabled = true 272 | file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" 273 | } 274 | dag { 275 | enabled = true 276 | file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.svg" 277 | } 278 | 279 | manifest { 280 | author = 'Sergii Domanskyi' 281 | homePage = 'https://github.com/TheJacksonLaboratory/STQ' 282 | description = 'Spatial Transcriptomics Quantification' 283 | mainScript = 'main.nf' 284 | } 285 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | 2 | workflow="arbitrary_grid" ### "two_references" "one_reference" "arbitrary_grid" "deconvolution_indices" 3 | samplesheet="./assets/samplesheet_test.csv" 4 | workdir="./work" 5 | outdir="./results" 6 | binddir="/projects/" 7 | 8 | #---------------------------------------------------------------------------------------------------- 9 | 10 | ./check.sh 11 | 12 | SLURM_SUBMIT_DIR=`pwd` 13 | 14 | sbatch \ 15 | submit.sb $workflow "$samplesheet" "$workdir" "$outdir" "$binddir" 16 | -------------------------------------------------------------------------------- /submit.sb: -------------------------------------------------------------------------------- 1 | #!/bin/bash --login 2 | #SBATCH -p compute 3 | #SBATCH -q batch 4 | #SBATCH -t 2-00:00:00 5 | #SBATCH --mem=56G 6 | #SBATCH --ntasks=1 7 | 8 | cd $SLURM_SUBMIT_DIR 9 | 10 | nextflow run main.nf \ 11 | -w $3 \ 12 | -profile slurm,singularity \ 13 | -resume \ 14 | --input=$2 \ 15 | --outdir=$4 \ 16 | --workflow=$1 \ 17 | --bind="-B $5" -------------------------------------------------------------------------------- /subworkflows/imaging.nf: -------------------------------------------------------------------------------- 1 | 2 | include { LOAD_SAMPLE_INFO; 3 | GET_IMAGE_SIZE; 4 | EXTRACT_ROI; 5 | COLOR_NORMALIZATION; 6 | STAIN_NORMALIZATION; 7 | CONVERT_TO_TILED_TIFF; 8 | RESIZE_IMAGE; 9 | GET_THUMB; 10 | MAKE_TINY_THUMB; 11 | GET_PIXEL_MASK; 12 | TILE_WSI; 13 | GET_TILE_MASK; 14 | GET_TISSUE_MASK; 15 | SELECT_SAVE_TILES; 16 | GET_INCEPTION_FEATURES_TILES; 17 | GET_INCEPTION_FEATURES; 18 | GET_CTRANSPATH_FEATURES; 19 | GET_MOCOV3_FEATURES; 20 | GET_UNI_FEATURES; 21 | GET_CONCH_FEATURES; 22 | } from '../modules/local/tasks' 23 | 24 | include { CHECK_FOCUS; 25 | } from '../modules/local/focus' 26 | 27 | include { SUPERPIXELATION; 28 | EXPORT_DOWN_IMAGE_FOR_CONTOURS; 29 | CALCULATE_CELLS_OD; 30 | ASSIGN_NUCLEI_TO_SUPERPIXELS; 31 | EXPORT_SUPERPIXELATION_CONTOURS; 32 | } from '../modules/local/superpixel' 33 | 34 | include { GET_NUCLEI_MASK_FROM_HOVERNET_JSON; 35 | INFER_HOVERNET_TILES; 36 | GET_NUCLEI_TYPE_COUNTS; 37 | INFER_HOVERNET; 38 | INFER_PREP_HOVERNET; 39 | INFER_STARDIST; 40 | COMPRESS_JSON_FILE; 41 | COMPUTE_SEGMENTATION_DATA; 42 | GENERATE_PERSPOT_SEGMENTATION_DATA; 43 | } from '../modules/local/hovernet' 44 | 45 | include { CONVERT_TO_PYRAMIDAL_OME; 46 | EXTRACT_IMAGE_METADATA; 47 | } from '../modules/local/ome' 48 | 49 | include { CONVERT_SEGMENTATION_DATA; 50 | CONVERT_CSV_TO_ANNDATA; 51 | } from '../modules/local/merge' 52 | 53 | include { DIMRED_CLUSTER; 54 | DIMRED_CLUSTER_MORPH; 55 | } from '../modules/local/postprocessing' 56 | 57 | workflow IMG { 58 | 59 | take: 60 | samples 61 | 62 | main: 63 | images = samples.map{[it[0], (it[1].image)]} 64 | 65 | LOAD_SAMPLE_INFO ( samples 66 | .join(images) ) 67 | 68 | GET_IMAGE_SIZE ( LOAD_SAMPLE_INFO.out.main ) 69 | 70 | if ( params.short_workflow ) { 71 | GET_THUMB ( LOAD_SAMPLE_INFO.out.image ) 72 | 73 | convertedimage = LOAD_SAMPLE_INFO.out.image 74 | thumbimage = GET_THUMB.out 75 | imagesize = GET_IMAGE_SIZE.out 76 | } 77 | else { 78 | if ( params.export_image_metadata ) { 79 | EXTRACT_IMAGE_METADATA ( LOAD_SAMPLE_INFO.out.main 80 | .join(GET_IMAGE_SIZE.out) ) 81 | } 82 | 83 | EXTRACT_ROI ( LOAD_SAMPLE_INFO.out.main 84 | .join(GET_IMAGE_SIZE.out) ) 85 | 86 | RESIZE_IMAGE ( EXTRACT_ROI.out.image ) 87 | 88 | imageroi = RESIZE_IMAGE.out.full 89 | imagesize = RESIZE_IMAGE.out.size 90 | 91 | if ( params.stain_normalization ) { 92 | if ( params.macenko_normalization ) { 93 | STAIN_NORMALIZATION ( imageroi 94 | .join(imagesize) ) 95 | 96 | normimage = STAIN_NORMALIZATION.out 97 | } 98 | else { 99 | COLOR_NORMALIZATION ( imageroi 100 | .join(imagesize) ) 101 | 102 | normimage = COLOR_NORMALIZATION.out 103 | } 104 | 105 | CONVERT_TO_TILED_TIFF ( normimage ) 106 | } 107 | else 108 | CONVERT_TO_TILED_TIFF ( imageroi ) 109 | 110 | convertedimage = CONVERT_TO_TILED_TIFF.out.full 111 | thumbimage = CONVERT_TO_TILED_TIFF.out.thumb 112 | 113 | if ( params.export_image ) { 114 | CONVERT_TO_PYRAMIDAL_OME ( convertedimage ) 115 | } 116 | } 117 | 118 | MAKE_TINY_THUMB ( thumbimage ) 119 | 120 | if ( params.check_focus ) { 121 | CHECK_FOCUS ( convertedimage 122 | .join(imagesize) ) 123 | } 124 | 125 | if ( params.do_superpixels ) { 126 | SUPERPIXELATION ( convertedimage 127 | .join(imagesize) ) 128 | 129 | if ( params.export_superpixels_contours ) { 130 | EXPORT_DOWN_IMAGE_FOR_CONTOURS ( convertedimage 131 | .join(imagesize) ) 132 | 133 | EXPORT_SUPERPIXELATION_CONTOURS ( SUPERPIXELATION.out.main 134 | .join(imagesize) ) 135 | } 136 | } 137 | 138 | 139 | GET_PIXEL_MASK ( thumbimage 140 | .join(imagesize) ) 141 | 142 | TILE_WSI ( convertedimage 143 | .join(LOAD_SAMPLE_INFO.out.grid) 144 | .join(imagesize) 145 | .join(LOAD_SAMPLE_INFO.out.mpp) ) 146 | 147 | GET_TILE_MASK ( thumbimage 148 | .join(GET_PIXEL_MASK.out) 149 | .join(TILE_WSI.out.grid) 150 | .join(imagesize)) 151 | 152 | 153 | // Tilitng sub-workflow for a small number of tiles 154 | if ( params.sample_tiles_subworkflow ) { 155 | SELECT_SAVE_TILES ( convertedimage 156 | .join(TILE_WSI.out.grid) 157 | .join(GET_TILE_MASK.out.mask) ) 158 | 159 | INFER_HOVERNET_TILES ( SELECT_SAVE_TILES.out.tiles ) 160 | 161 | GET_NUCLEI_TYPE_COUNTS ( INFER_HOVERNET_TILES.out.json ) 162 | } 163 | 164 | 165 | if ( params.extract_tile_features ) { 166 | 167 | if (params.extract_transpath_features) { 168 | GET_CTRANSPATH_FEATURES ( convertedimage 169 | .join(GET_TILE_MASK.out.mask) 170 | .join(TILE_WSI.out.grid) 171 | .join(LOAD_SAMPLE_INFO.out.grid) 172 | .join(imagesize) 173 | .combine(Channel.fromList(params.expansion_factor)) ) 174 | 175 | ctranspath_features_out = GET_CTRANSPATH_FEATURES.out 176 | } 177 | 178 | if (params.extract_mocov3_features) { 179 | GET_MOCOV3_FEATURES ( convertedimage 180 | .join(GET_TILE_MASK.out.mask) 181 | .join(TILE_WSI.out.grid) 182 | .join(LOAD_SAMPLE_INFO.out.grid) 183 | .join(imagesize) 184 | .combine(Channel.fromList(params.expansion_factor)) ) 185 | 186 | mocov3_features_out = GET_MOCOV3_FEATURES.out 187 | } 188 | 189 | if (params.extract_inception_features) { 190 | GET_INCEPTION_FEATURES ( convertedimage 191 | .join(GET_TILE_MASK.out.mask) 192 | .join(TILE_WSI.out.grid) 193 | .join(LOAD_SAMPLE_INFO.out.grid) 194 | .join(imagesize) 195 | .combine(Channel.fromList(params.expansion_factor)) ) 196 | 197 | inception_features_out = GET_INCEPTION_FEATURES.out 198 | } 199 | 200 | if (params.extract_uni_features) { 201 | GET_UNI_FEATURES ( convertedimage 202 | .join(GET_TILE_MASK.out.mask) 203 | .join(TILE_WSI.out.grid) 204 | .join(LOAD_SAMPLE_INFO.out.grid) 205 | .join(imagesize) 206 | .combine(Channel.fromList(params.expansion_factor)) ) 207 | 208 | uni_features_out = GET_UNI_FEATURES.out 209 | } 210 | 211 | if (params.extract_conch_features) { 212 | GET_CONCH_FEATURES ( convertedimage 213 | .join(GET_TILE_MASK.out.mask) 214 | .join(TILE_WSI.out.grid) 215 | .join(LOAD_SAMPLE_INFO.out.grid) 216 | .join(imagesize) 217 | .combine(Channel.fromList(params.expansion_factor)) ) 218 | 219 | conch_features_out = GET_CONCH_FEATURES.out 220 | } 221 | 222 | // Default features 223 | features_out = channel.empty() 224 | 225 | if (params.extract_transpath_features) { 226 | features_out = features_out.concat( ctranspath_features_out ) 227 | } 228 | if (params.extract_mocov3_features) { 229 | features_out = features_out.concat( mocov3_features_out ) 230 | } 231 | if (params.extract_inception_features) { 232 | features_out = features_out.concat( inception_features_out ) 233 | } 234 | if (params.extract_uni_features) { 235 | features_out = features_out.concat( uni_features_out ) 236 | } 237 | if (params.extract_conch_features) { 238 | features_out = features_out.concat( conch_features_out ) 239 | } 240 | 241 | if ( params.do_clustering ) { 242 | if ( params.do_imaging_anndata ) { 243 | CONVERT_CSV_TO_ANNDATA ( features_out 244 | .filter{ it[2]== params.expansion_factor_for_clustering } 245 | .filter{ it[3] == params.suffix_for_clustering } ) 246 | } 247 | } 248 | } 249 | 250 | if ( params.do_nuclear_segmentation ) { 251 | 252 | GET_TISSUE_MASK ( TILE_WSI.out.grid 253 | .join(GET_TILE_MASK.out.mask) 254 | .join(imagesize) ) 255 | 256 | if ( params.hovernet_segmentation ) { 257 | INFER_PREP_HOVERNET ( convertedimage 258 | .join(GET_TISSUE_MASK.out) 259 | .join(imagesize) ) 260 | 261 | INFER_HOVERNET ( convertedimage 262 | .join(GET_TISSUE_MASK.out) 263 | .join(imagesize) 264 | .join(INFER_PREP_HOVERNET.out) ) 265 | 266 | jsonout = INFER_HOVERNET.out.json 267 | 268 | GET_NUCLEI_MASK_FROM_HOVERNET_JSON ( convertedimage 269 | .join(jsonout) 270 | .join(imagesize) ) 271 | 272 | segmaskout = GET_NUCLEI_MASK_FROM_HOVERNET_JSON.out 273 | } 274 | else { 275 | INFER_STARDIST ( convertedimage 276 | .join(GET_TISSUE_MASK.out) 277 | .join(imagesize) ) 278 | 279 | jsonout = INFER_STARDIST.out.json 280 | segmaskout = INFER_STARDIST.out.mask 281 | } 282 | 283 | COMPRESS_JSON_FILE ( jsonout ) 284 | 285 | COMPUTE_SEGMENTATION_DATA ( jsonout 286 | .join(imagesize) ) 287 | 288 | if ( params.do_superpixels ) { 289 | CALCULATE_CELLS_OD ( convertedimage 290 | .join(segmaskout) 291 | .join(COMPUTE_SEGMENTATION_DATA.out) 292 | .join(imagesize) ) 293 | 294 | ASSIGN_NUCLEI_TO_SUPERPIXELS ( SUPERPIXELATION.out.main 295 | .join(CALCULATE_CELLS_OD.out) 296 | .join(imagesize) ) 297 | } 298 | 299 | GENERATE_PERSPOT_SEGMENTATION_DATA ( TILE_WSI.out.grid 300 | .join(COMPUTE_SEGMENTATION_DATA.out) 301 | .join(imagesize) ) 302 | 303 | if ( params.do_clustering ) { 304 | if ( params.do_imaging_anndata ) { 305 | features_selected_out = CONVERT_CSV_TO_ANNDATA.out 306 | .filter{ it[2]== params.expansion_factor_for_clustering } 307 | .filter{ it[3] == params.suffix_for_clustering } 308 | 309 | DIMRED_CLUSTER_MORPH ( TILE_WSI.out.grid 310 | .join(thumbimage) 311 | .join(GENERATE_PERSPOT_SEGMENTATION_DATA.out.data) 312 | .join(features_selected_out) ) 313 | } 314 | } 315 | } 316 | else { 317 | if ( params.do_clustering ) { 318 | if ( params.do_imaging_anndata ) { 319 | features_selected_out = CONVERT_CSV_TO_ANNDATA.out 320 | .filter{ it[2]== params.expansion_factor_for_clustering } 321 | .filter{ it[3] == params.suffix_for_clustering } 322 | 323 | DIMRED_CLUSTER ( TILE_WSI.out.grid 324 | .join(thumbimage) 325 | .join(features_selected_out) ) 326 | } 327 | } 328 | } 329 | } 330 | -------------------------------------------------------------------------------- /subworkflows/sequencing.nf: -------------------------------------------------------------------------------- 1 | 2 | include { LOAD_SAMPLE_INFO } from '../modules/local/load' 3 | 4 | include { GUNZIP as UNPACK_FASTQ; 5 | } from '../modules/local/gunzip' 6 | 7 | include { DECONVOLUTION_XENOME; 8 | DECONVOLUTION_XENGSORT; 9 | SORT_FASTQ as SORT_FASTQ_MOUSE; 10 | SORT_FASTQ as SORT_FASTQ_HUMAN; 11 | } from '../modules/local/deconvolution' 12 | 13 | include { SPACERANGER as SPACERANGER_MOUSE; 14 | SPACERANGER as SPACERANGER_HUMAN; 15 | RETURN_SPACERANGER_ALIGNMENT; 16 | } from '../modules/local/spaceranger' 17 | 18 | include { GET_REFERENCE_PILEUP as GET_REFERENCE_PILEUP_MOUSE; 19 | GET_REFERENCE_PILEUP as GET_REFERENCE_PILEUP_HUMAN; 20 | GET_PILEUP_OF_BAM as GET_PILEUP_OF_BAM_MOUSE; 21 | GET_PILEUP_OF_BAM as GET_PILEUP_OF_BAM_HUMAN; 22 | GET_SNV_FROM_PILEUP as GET_SNV_FROM_PILEUP_MOUSE; 23 | GET_SNV_FROM_PILEUP as GET_SNV_FROM_PILEUP_HUMAN; 24 | } from '../modules/local/bafextract' 25 | 26 | include { CELLSORT_BAM as CELLSORT_BAM_MOUSE; 27 | CELLSORT_BAM as CELLSORT_BAM_HUMAN; 28 | SPLICING_QUANTIFICATION as SPLICING_QUANTIFICATION_MOUSE; 29 | SPLICING_QUANTIFICATION as SPLICING_QUANTIFICATION_HUMAN; 30 | } from '../modules/local/velocyto' 31 | 32 | include { MERGE_MTX; 33 | RETURN_SEPARATE_MTX; 34 | } from '../modules/local/merge' 35 | 36 | 37 | workflow SEQ { 38 | 39 | take: 40 | samples 41 | 42 | main: 43 | fastqs = samples.map{[it[0], (it[1].fastq)]} 44 | images = samples.map{[it[0], (it[1].image)]} 45 | 46 | LOAD_SAMPLE_INFO ( samples 47 | .join(fastqs) 48 | .join(images) ) 49 | 50 | UNPACK_FASTQ ( LOAD_SAMPLE_INFO.out.fastq ) 51 | 52 | if ( params.deconvolution_tool == "xenome" ) { 53 | DECONVOLUTION_XENOME ( UNPACK_FASTQ.out, 54 | file("${params.deconvolution_indices_path}"), 55 | params.deconvolution_indices_name ) 56 | 57 | deconvolution_human = DECONVOLUTION_XENOME.out.human 58 | deconvolution_mouse = DECONVOLUTION_XENOME.out.mouse 59 | } 60 | else if (params.deconvolution_tool == "xengsort") { 61 | DECONVOLUTION_XENGSORT ( UNPACK_FASTQ.out, 62 | file("${params.deconvolution_indices_path}"), 63 | params.deconvolution_indices_name ) 64 | 65 | deconvolution_human = DECONVOLUTION_XENGSORT.out.human 66 | deconvolution_mouse = DECONVOLUTION_XENGSORT.out.mouse 67 | 68 | } 69 | 70 | 71 | SORT_FASTQ_MOUSE ( deconvolution_mouse ) 72 | 73 | SORT_FASTQ_HUMAN ( deconvolution_human ) 74 | 75 | 76 | SPACERANGER_MOUSE ( SORT_FASTQ_MOUSE.out 77 | .join(LOAD_SAMPLE_INFO.out.image), 78 | file("${params.mouse_reference_genome}") ) 79 | 80 | SPACERANGER_HUMAN ( SORT_FASTQ_HUMAN.out 81 | .join(LOAD_SAMPLE_INFO.out.image), 82 | file("${params.human_reference_genome}") ) 83 | 84 | RETURN_SPACERANGER_ALIGNMENT ( SPACERANGER_MOUSE.out.metrics 85 | .join(SPACERANGER_HUMAN.out.metrics) 86 | .join(SPACERANGER_HUMAN.out.spatial) ) 87 | 88 | if ( params.do_snv_extract ) { 89 | GET_REFERENCE_PILEUP_MOUSE ( file("${params.mouse_reference_genome}") ) 90 | 91 | GET_REFERENCE_PILEUP_HUMAN ( file("${params.human_reference_genome}") ) 92 | 93 | 94 | GET_PILEUP_OF_BAM_MOUSE ( SPACERANGER_MOUSE.out.bam, 95 | GET_REFERENCE_PILEUP_MOUSE.out ) 96 | 97 | GET_PILEUP_OF_BAM_HUMAN ( SPACERANGER_HUMAN.out.bam, 98 | GET_REFERENCE_PILEUP_HUMAN.out ) 99 | 100 | 101 | GET_SNV_FROM_PILEUP_MOUSE ( GET_PILEUP_OF_BAM_MOUSE.out, 102 | GET_REFERENCE_PILEUP_MOUSE.out, 103 | "mouse" ) 104 | 105 | GET_SNV_FROM_PILEUP_HUMAN ( GET_PILEUP_OF_BAM_HUMAN.out, 106 | GET_REFERENCE_PILEUP_HUMAN.out, 107 | "human" ) 108 | } 109 | 110 | if ( params.do_splicing_quantification ) { 111 | CELLSORT_BAM_MOUSE ( SPACERANGER_MOUSE.out.bam ) 112 | 113 | CELLSORT_BAM_HUMAN ( SPACERANGER_HUMAN.out.bam ) 114 | 115 | 116 | SPLICING_QUANTIFICATION_MOUSE ( CELLSORT_BAM_MOUSE.out 117 | .join(SPACERANGER_MOUSE.out.bam) 118 | .join(SPACERANGER_MOUSE.out.mtx), 119 | file("${params.mouse_reference_genome}"), 120 | "mouse" ) 121 | 122 | SPLICING_QUANTIFICATION_HUMAN ( CELLSORT_BAM_HUMAN.out 123 | .join(SPACERANGER_HUMAN.out.bam) 124 | .join(SPACERANGER_HUMAN.out.mtx), 125 | file("${params.human_reference_genome}"), 126 | "human" ) 127 | } 128 | 129 | if ( params.do_merge_mtx ) { 130 | MERGE_MTX ( SPACERANGER_MOUSE.out.mtx 131 | .join(SPACERANGER_HUMAN.out.mtx) ) 132 | } 133 | else { 134 | RETURN_SEPARATE_MTX ( SPACERANGER_MOUSE.out.mtx 135 | .join(SPACERANGER_HUMAN.out.mtx) ) 136 | } 137 | 138 | emit: 139 | SPACERANGER_HUMAN.out.spatial 140 | } 141 | -------------------------------------------------------------------------------- /subworkflows/sequencing_single.nf: -------------------------------------------------------------------------------- 1 | 2 | include { LOAD_SAMPLE_INFO; 3 | } from '../modules/local/load' 4 | 5 | include { GUNZIP as UNPACK_FASTQ; 6 | } from '../modules/local/gunzip' 7 | 8 | include { SPACERANGER; 9 | RETURN_SPACERANGER_ALIGNMENT_SINGLE; 10 | } from '../modules/local/spaceranger' 11 | 12 | include { GET_REFERENCE_PILEUP; 13 | GET_PILEUP_OF_BAM; 14 | GET_SNV_FROM_PILEUP; 15 | } from '../modules/local/bafextract' 16 | 17 | include { CELLSORT_BAM; 18 | SPLICING_QUANTIFICATION; 19 | } from '../modules/local/velocyto' 20 | 21 | include { RETURN_MTX; 22 | } from '../modules/local/merge' 23 | 24 | 25 | workflow SEQ { 26 | 27 | take: 28 | samples 29 | 30 | main: 31 | fastqs = samples.map{[it[0], (it[1].fastq)]} 32 | images = samples.map{[it[0], (it[1].image)]} 33 | 34 | LOAD_SAMPLE_INFO ( samples 35 | .join(fastqs) 36 | .join(images) ) 37 | 38 | UNPACK_FASTQ ( LOAD_SAMPLE_INFO.out.fastq ) 39 | 40 | SPACERANGER ( UNPACK_FASTQ.out 41 | .join(LOAD_SAMPLE_INFO.out.image), 42 | file("${params.reference_genome}") ) 43 | 44 | RETURN_SPACERANGER_ALIGNMENT_SINGLE ( SPACERANGER.out.metrics 45 | .join(SPACERANGER.out.spatial) ) 46 | 47 | RETURN_MTX ( SPACERANGER.out.mtx ) 48 | 49 | 50 | if ( params.do_snv_extract ) { 51 | 52 | GET_REFERENCE_PILEUP ( file("${params.reference_genome}") ) 53 | 54 | 55 | GET_PILEUP_OF_BAM ( SPACERANGER.out.bam, 56 | GET_REFERENCE_PILEUP.out ) 57 | 58 | 59 | GET_SNV_FROM_PILEUP ( GET_PILEUP_OF_BAM.out, 60 | GET_REFERENCE_PILEUP.out, 61 | "baf" ) 62 | 63 | } 64 | 65 | if ( params.do_splicing_quantification ) { 66 | CELLSORT_BAM ( SPACERANGER.out.bam ) 67 | 68 | SPLICING_QUANTIFICATION ( CELLSORT_BAM.out 69 | .join(SPACERANGER.out.bam) 70 | .join(SPACERANGER.out.mtx), 71 | file("${params.reference_genome}"), 72 | "baf" ) 73 | 74 | } 75 | 76 | emit: 77 | SPACERANGER.out.spatial 78 | } 79 | -------------------------------------------------------------------------------- /subworkflows/xenome_index.nf: -------------------------------------------------------------------------------- 1 | 2 | include { GUNZIP_FASTA as UNPACK_HOST; 3 | GUNZIP_FASTA as UNPACK_GRAFT; 4 | } from '../modules/local/gunzip' 5 | 6 | include { XENOME_GENERATE_INDEX; 7 | XENGSORT_GENERATE_INDEX; 8 | } from '../modules/local/deconvolution' 9 | 10 | workflow XINDEX { 11 | 12 | main: 13 | if ( file(params.deconvolution_reference_host).getExtension() == "gz" ) { 14 | UNPACK_HOST ( params.deconvolution_reference_host ) 15 | reference_host = UNPACK_HOST.out 16 | } 17 | else { 18 | reference_host = params.deconvolution_reference_host 19 | } 20 | 21 | if ( file(params.deconvolution_reference_graft).getExtension() == "gz" ) { 22 | UNPACK_GRAFT ( params.deconvolution_reference_graft ) 23 | reference_graft = UNPACK_GRAFT.out 24 | } 25 | else { 26 | reference_graft = params.deconvolution_reference_graft 27 | } 28 | 29 | if ( params.deconvolution_tool == "xenome" ) { 30 | XENOME_GENERATE_INDEX ( reference_host, reference_graft, params.deconvolution_kmer_size ) 31 | 32 | output = XENOME_GENERATE_INDEX.out.indices_path 33 | } 34 | else if ( params.deconvolution_tool == "xengsort" ) { 35 | XENGSORT_GENERATE_INDEX ( reference_host, reference_graft, params.deconvolution_kmer_size ) 36 | 37 | output = XENGSORT_GENERATE_INDEX.out.indices_path 38 | } 39 | 40 | emit: 41 | output 42 | 43 | } 44 | -------------------------------------------------------------------------------- /workflows/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Routes of analysis 3 | 4 |

5 | 6 |

7 | 8 | All three routes of analysis are implemented as Nextflow DSL2 workflows and use the same style [samplesheet](../README.md#samplesheet). The name of the workflow (one of "two_references", "one_reference", and "arbitrary_grid") is specified when the pipeline is invoked: 9 | 10 | nextflow run main.nf [...] --workflow="two_references" 11 | 12 | JAX users can edit the file [submit.sb](../submit.sb) to specify the workflow. 13 | -------------------------------------------------------------------------------- /workflows/arbitrary_grid.nf: -------------------------------------------------------------------------------- 1 | 2 | include { IMG } from '../subworkflows/imaging' 3 | 4 | workflow ARB { 5 | 6 | take: 7 | samples 8 | 9 | main: 10 | IMG ( samples 11 | .join(samples.map{[it[0], []]})) 12 | 13 | } 14 | -------------------------------------------------------------------------------- /workflows/one_reference.nf: -------------------------------------------------------------------------------- 1 | 2 | include { SEQ } from '../subworkflows/sequencing_single' 3 | include { IMG } from '../subworkflows/imaging' 4 | 5 | workflow ONE { 6 | 7 | take: 8 | samples 9 | 10 | main: 11 | SEQ ( samples ) 12 | 13 | if ( params.do_img_subworkflow ) { 14 | IMG ( samples 15 | .join(SEQ.out) ) 16 | } 17 | 18 | } 19 | -------------------------------------------------------------------------------- /workflows/two_references.nf: -------------------------------------------------------------------------------- 1 | 2 | include { XINDEX } from '../subworkflows/xenome_index' 3 | include { SEQ } from '../subworkflows/sequencing' 4 | include { IMG } from '../subworkflows/imaging' 5 | 6 | workflow TWO { 7 | 8 | take: 9 | samples 10 | 11 | main: 12 | if ( params.workflow == "deconvolution_indices" ) { 13 | if ( params.deconvolution_tool == "xenome" ) { 14 | if ( !file("${params.deconvolution_indices_path}/${params.deconvolution_indices_name}-both.kmers.low-bits.lwr").exists() ) { 15 | XINDEX ( ) 16 | } 17 | } 18 | else if ( params.deconvolution_tool == "xengsort" ) { 19 | if ( !file("${params.deconvolution_indices_path}/${params.deconvolution_indices_name}-xind.hash").exists() ) { 20 | XINDEX ( ) 21 | } 22 | } 23 | } 24 | 25 | SEQ ( samples ) 26 | 27 | if ( params.do_img_subworkflow ) { 28 | IMG ( samples 29 | .join(SEQ.out) ) 30 | } 31 | 32 | } 33 | --------------------------------------------------------------------------------