├── .gitignore
├── .vs
    ├── ProjectSettings.json
    ├── STQ
    │   └── v16
    │   │   └── .suo
    └── slnx.sqlite
├── ChangeLog.md
├── LICENSE
├── README.md
├── assets
    ├── container-mamba-inception.def
    ├── container-singularity-bafextract.def
    ├── container-singularity-deepfocus.def
    ├── container-singularity-fastqtools.def
    ├── container-singularity-hovernet-py.def
    ├── container-singularity-inception-py.def
    ├── container-singularity-ome.def
    ├── container-singularity-python.def
    ├── container-singularity-spaceranger.def
    ├── container-singularity-stainnet.def
    ├── container-singularity-staintools.def
    ├── container-singularity-uni-conch.def
    ├── container-singularity-velocyto.def
    ├── container-singularity-vips.def
    ├── def-mamba-timm.def
    ├── def-mamba-xenomake.def
    ├── run_build.sb
    ├── run_build.sh
    ├── sample_roi.json
    ├── samplesheet_demo.csv
    ├── samplesheet_focus_test.csv
    ├── samplesheet_st_pancreas_all.csv
    ├── samplesheet_test.csv
    ├── samplesheet_test_sj.csv
    ├── samplesheet_test_sj_short.csv
    └── samplesheet_two.csv
├── bin
    ├── StainNetNorm.py
    ├── StainToolsNorm.py
    ├── __init__.py
    ├── extractROI.py
    ├── mtx_tools.py
    ├── run-conch.py
    ├── run-ctranspath.py
    ├── run-inception-v3-tiles.py
    ├── run-inception-v3.py
    ├── run-uni.py
    └── superpixelation.py
├── check.sh
├── conf
    ├── README.md
    ├── analysis-img.config
    ├── analysis-one.config
    ├── analysis-pancreas.config
    ├── analysis-two.config
    └── containers.config
├── docs
    ├── BAF_extract_scheme.png
    ├── Example_CPU_usage.png
    ├── STQ-imaging.svg
    ├── Scheme NF2.png
    ├── Scheme_NF3.png
    ├── dag-arb.svg
    ├── dag-one.svg
    ├── dag-two.svg
    ├── example ST wsi.png
    ├── example non-ST wsi.png
    ├── flow-static.png
    ├── flow.gif
    ├── hovernet-tissue-mask.png
    ├── imaging-clustering.png
    ├── mones-per-tile.png
    ├── multiscale-features.png
    ├── route-map.png
    └── sub-tiling.png
├── lib
    ├── __init__.py
    ├── hovernetConv.py
    ├── superpixels.py
    ├── wsiGrid.py
    └── wsiMask.py
├── main.nf
├── modules
    └── local
    │   ├── bafextract.nf
    │   ├── deconvolution.nf
    │   ├── focus.nf
    │   ├── gunzip.nf
    │   ├── hovernet.nf
    │   ├── load.nf
    │   ├── merge.nf
    │   ├── ome.nf
    │   ├── postprocessing.nf
    │   ├── spaceranger.nf
    │   ├── superpixel.nf
    │   ├── tasks.nf
    │   └── velocyto.nf
├── nextflow.config
├── run.sh
├── submit.sb
├── subworkflows
    ├── imaging.nf
    ├── sequencing.nf
    ├── sequencing_single.nf
    └── xenome_index.nf
├── utils
    └── AOI.ipynb
└── workflows
    ├── README.md
    ├── arbitrary_grid.nf
    ├── one_reference.nf
    └── two_references.nf


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | slurm-*.out
 10 | 
 11 | .vs/
 12 | 
 13 | dev/
 14 | 
 15 | results*/
 16 | .nextflow.log*
 17 | .nextflow/
 18 | 
 19 | # Distribution / packaging
 20 | .Python
 21 | build/
 22 | develop-eggs/
 23 | dist/
 24 | downloads/
 25 | eggs/
 26 | .eggs/
 27 | # lib/
 28 | lib64/
 29 | parts/
 30 | sdist/
 31 | var/
 32 | work*/
 33 | wheels/
 34 | pip-wheel-metadata/
 35 | share/python-wheels/
 36 | *.egg-info/
 37 | .installed.cfg
 38 | *.egg
 39 | MANIFEST
 40 | 
 41 | # PyInstaller
 42 | #  Usually these files are written by a python script from a template
 43 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 44 | *.manifest
 45 | *.spec
 46 | 
 47 | # Installer logs
 48 | pip-log.txt
 49 | pip-delete-this-directory.txt
 50 | 
 51 | # Unit test / coverage reports
 52 | htmlcov/
 53 | .tox/
 54 | .nox/
 55 | .coverage
 56 | .coverage.*
 57 | .cache
 58 | nosetests.xml
 59 | coverage.xml
 60 | *.cover
 61 | *.py,cover
 62 | .hypothesis/
 63 | .pytest_cache/
 64 | 
 65 | # Translations
 66 | *.mo
 67 | *.pot
 68 | 
 69 | # Django stuff:
 70 | *.log
 71 | local_settings.py
 72 | db.sqlite3
 73 | db.sqlite3-journal
 74 | 
 75 | # Flask stuff:
 76 | instance/
 77 | .webassets-cache
 78 | 
 79 | # Scrapy stuff:
 80 | .scrapy
 81 | 
 82 | # Sphinx documentation
 83 | docs/_build/
 84 | 
 85 | # PyBuilder
 86 | target/
 87 | 
 88 | # Jupyter Notebook
 89 | .ipynb_checkpoints
 90 | 
 91 | # IPython
 92 | profile_default/
 93 | ipython_config.py
 94 | 
 95 | # pyenv
 96 | .python-version
 97 | 
 98 | # pipenv
 99 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
100 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
101 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
102 | #   install all needed dependencies.
103 | #Pipfile.lock
104 | 
105 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
106 | __pypackages__/
107 | 
108 | # Celery stuff
109 | celerybeat-schedule
110 | celerybeat.pid
111 | 
112 | # SageMath parsed files
113 | *.sage.py
114 | 
115 | # Environments
116 | .env
117 | .venv
118 | env/
119 | venv/
120 | ENV/
121 | env.bak/
122 | venv.bak/
123 | 
124 | # Spyder project settings
125 | .spyderproject
126 | .spyproject
127 | 
128 | # Rope project settings
129 | .ropeproject
130 | 
131 | # mkdocs documentation
132 | /site
133 | 
134 | # mypy
135 | .mypy_cache/
136 | .dmypy.json
137 | dmypy.json
138 | 
139 | # Pyre type checker
140 | .pyre/
141 | 


--------------------------------------------------------------------------------
/.vs/ProjectSettings.json:
--------------------------------------------------------------------------------
1 | {
2 |   "CurrentProjectSetting": null
3 | }


--------------------------------------------------------------------------------
/.vs/STQ/v16/.suo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/.vs/STQ/v16/.suo


--------------------------------------------------------------------------------
/.vs/slnx.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/.vs/slnx.sqlite


--------------------------------------------------------------------------------
/ChangeLog.md:
--------------------------------------------------------------------------------
 1 | 
 2 | v0.3.0
 3 | + Updated imaging workflow output directory structure.
 4 | + Added export of pipeline parameters, metadata items, and additional image outputs.
 5 | + Added multiscale feature extraction inspired by SAMPLER work (PMID: 37577691).
 6 | + Added experimental option of sub-tiling for use with feature extraction.
 7 | + Added slide focus checking.
 8 | + Added CTransPath, MoCoV3, UNI, CONCH as an extractors of imaging features.
 9 | + Added subworkflow for sampling of tiles.
10 | + Optimized HoVer-Net segmentation steps. Added option for GPU-based segmentation.
11 | + Added postprocessing step to visualize outputs.
12 | + Added the AOI (automatic object identification) util for preparing ROI JSON for STQ.
13 | 
14 | v0.2.0
15 | + This version was referenced in the publication (PMID: 38626768).
16 | + Refactored and optimized codebase.
17 | + Added Xengsort read classification option.
18 | + Added documentation details to improve user experience.
19 | 
20 | v0.1.0
21 | + Initial release
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 The Jackson Laboratory
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/assets/container-mamba-inception.def:
--------------------------------------------------------------------------------
 1 | bootstrap: docker
 2 | from: condaforge/mambaforge@sha256:1461e0a1fa14431128dc95d921655fd6cd0b9147b4ec757c6d99e02776e82b47
 3 | #from: condaforge/mambaforge:23.3.1-1
 4 | 
 5 | %environment
 6 |     export DEBIAN_FRONTEND=noninteractive
 7 | 
 8 | %post
 9 |     echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
10 |     
11 |     apt-get update
12 |     apt-get install -y dialog apt-utils
13 |     apt-get install -y gcc g++
14 |     apt-get install -y openslide-tools
15 |     apt-get install -y procps
16 | 
17 |     /opt/conda/bin/mamba install -y -c conda-forge -c fastai -c anaconda \
18 |     pandas scikit-learn scipy matplotlib scikit-image \
19 |     jupyterlab tifffile imagecodecs stardist \
20 |     openslide-python opencv-python-headless pillow h5py \
21 |     "tensorflow==2.11.0"
22 | 
23 |     /opt/conda/bin/pip install pysnic
24 | 


--------------------------------------------------------------------------------
/assets/container-singularity-bafextract.def:
--------------------------------------------------------------------------------
 1 | bootstrap: shub
 2 | from: jaxreg.jax.org/rit-ci/samtools:1.5
 3 | 
 4 | %post
 5 | apk add --update-cache
 6 | apk add build-base
 7 | apk add g++
 8 | apk add git
 9 | apk add bash
10 | apk add procps
11 | 
12 | git clone https://github.com/akdess/BAFExtract.git
13 | 
14 | cd BAFExtract
15 | 
16 | make
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/assets/container-singularity-deepfocus.def:
--------------------------------------------------------------------------------
 1 | bootstrap: docker
 2 | from: centos/python-36-centos7:latest
 3 | 
 4 | %labels
 5 |     Author Sergii Domanskyi
 6 | 
 7 | %environment
 8 |     export PYTHONPATH="${PYTHONPATH}:/usr/local/lib64/python3.6/site-packages/:/usr/local/lib/python3.6/site-packages:/usr/lib/python3.6/site-packages"
 9 | 
10 | %post
11 |     set -eu
12 | 
13 |     yum -y update && \
14 |     yum -y install \
15 |         redhat-lsb-core \
16 |         epel-release \
17 | 
18 |     lsb_release -a
19 | 
20 |     yum-config-manager --enable epel
21 | 
22 |     yum -y install \
23 |         bzip2 \
24 |         openslide \
25 |         python3-devel.x86_64 \
26 |         wget \
27 |         bash \
28 |         inkscape \
29 |         librsvg2 \
30 |         gcc \
31 |         libssl-dev \
32 |         openssl \
33 |         procps
34 | 
35 |     python3 -m pip install --upgrade pip==21.3.1
36 |     python3 -m pip install grpcio==1.48.2 tflearn==0.3.2 tensorflow==1.13.1 tqdm==4.28.1 numpy==1.15.3 packaging==16.8 pandas==0.23.4 wheel==0.29.0 matplotlib==3.3.4 scikit-image==0.17.2
37 |     python3 -m pip install openslide-python
38 |     
39 |     python3 -m pip list
40 |     
41 |     git clone https://github.com/sdomanskyi/deepfocus.git
42 |     
43 | 
44 | %runscript
45 |     exec python3 "$@"
46 | 
47 | %help
48 |     The container is built on CentOS 7.9.2009
49 |     Python 3.6.8
50 | 


--------------------------------------------------------------------------------
/assets/container-singularity-fastqtools.def:
--------------------------------------------------------------------------------
 1 | bootstrap: docker
 2 | from: continuumio/miniconda3:4.12.0
 3 | 
 4 | %post
 5 | apt-get update
 6 | apt-get install -y bash
 7 | apt-get install -y procps
 8 | 
 9 | /opt/conda/bin/conda install --quiet -y -c bioconda fastq-tools
10 | 


--------------------------------------------------------------------------------
/assets/container-singularity-hovernet-py.def:
--------------------------------------------------------------------------------
 1 | bootstrap: shub
 2 | from: jaxreg.jax.org/singlecell/python:3.8
 3 | 
 4 | %post
 5 | apt-get update
 6 | apt-get install -y gcc
 7 | apt-get install -y git
 8 | apt-get install -y g++
 9 | apt-get install -y openslide-tools
10 | apt-get install -y python-openslide
11 | apt-get install -y libsm6 libxext6
12 | apt-get install -y libxrender-dev
13 | apt-get install -y procps
14 | 
15 | /opt/conda/bin/conda install --quiet -y python=3.6.12 pip=20.3.1
16 | /opt/conda/bin/conda install --quiet -y -c conda-forge pandas tifftools
17 | /opt/conda/bin/pip install gdown openslide-python==1.1.2 docopt==0.6.2 future==0.18.2 imgaug==0.4.0 matplotlib==3.3.0 numpy==1.19.1 opencv-python==4.3.0.36 pandas==1.1.0 pillow==7.2.0 psutil==5.7.3 scikit-image==0.17.2 scikit-learn==0.23.1 scipy==1.5.2 tensorboard==2.3.0 tensorboardx==2.1 termcolor==1.1.0 tqdm==4.48.0 torch==1.6.0 torchvision==0.7.0
18 | 
19 | /opt/conda/bin/gdown 1SbSArI3KOOWHxRlxnjchO7_MbWzB4lNR
20 | 
21 | git clone https://github.com/sdomanskyi/hover_net.git
22 | 


--------------------------------------------------------------------------------
/assets/container-singularity-inception-py.def:
--------------------------------------------------------------------------------
 1 | bootstrap: docker
 2 | from: condaforge/mambaforge@sha256:1461e0a1fa14431128dc95d921655fd6cd0b9147b4ec757c6d99e02776e82b47
 3 | #from: condaforge/mambaforge:23.3.1-1
 4 | 
 5 | %environment
 6 |     export DEBIAN_FRONTEND=noninteractive
 7 | 
 8 | %post
 9 |     echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
10 |     
11 |     apt-get update
12 |     apt-get install -y dialog apt-utils
13 |     apt-get install -y gcc g++
14 |     apt-get install -y openslide-tools
15 |     apt-get install -y procps
16 | 
17 |     /opt/conda/bin/mamba install -y -c conda-forge -c fastai -c anaconda \
18 |     pandas scikit-learn scipy matplotlib scikit-image \
19 |     jupyterlab tifffile imagecodecs stardist \
20 |     openslide-python opencv-python-headless pillow h5py \
21 |     "tensorflow==2.11.0"
22 | 
23 |     /opt/conda/bin/pip install pysnic
24 | 


--------------------------------------------------------------------------------
/assets/container-singularity-ome.def:
--------------------------------------------------------------------------------
 1 | bootstrap: docker
 2 | from: alpine:3.18.2
 3 | 
 4 | %environment
 5 |     export PATH="/bftools:$PATH"
 6 | 
 7 | %post
 8 |     set -eu
 9 | 
10 |     apk update && \
11 |     apk add unzip \
12 |         openjdk17 \
13 |         bash \
14 |         procps
15 | 
16 |     wget https://downloads.openmicroscopy.org/bio-formats/7.2.0/artifacts/bftools.zip
17 |     unzip bftools.zip
18 |     rm bftools.zip
19 | 


--------------------------------------------------------------------------------
/assets/container-singularity-python.def:
--------------------------------------------------------------------------------
 1 | bootstrap: docker
 2 | from: condaforge/mambaforge:23.3.1-1
 3 | 
 4 | %environment
 5 |     export DEBIAN_FRONTEND=noninteractive
 6 | 
 7 | %post
 8 |     echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
 9 | 
10 |     apt-get update
11 |     apt-get install -y dialog apt-utils
12 |     apt-get install -y procps
13 | 
14 |     /opt/conda/bin/mamba install --quiet -y -c conda-forge pandas numpy scipy scanpy leidenalg
15 | 


--------------------------------------------------------------------------------
/assets/container-singularity-spaceranger.def:
--------------------------------------------------------------------------------
 1 | bootstrap: docker
 2 | from: debian:buster-slim
 3 | 
 4 | %environment
 5 |     SPACERANGER_HOME="/opt/spaceranger-1.3.1"
 6 |     SR_EXEC="${SPACERANGER_HOME}/bin"
 7 |     SR_PY="${SPACERANGER_HOME}/external/anaconda/bin"    
 8 |     
 9 |     export PATH="${SPACERANGER_HOME}:${SR_EXEC}:${SR_PY}:$PATH"
10 | 
11 | %post
12 |     set -eu
13 |     
14 |     apt-get update && \
15 |         apt-get -y upgrade
16 |     apt-get -y install wget \
17 |         ca-certificates \
18 |         locales \
19 |         bash \
20 |         procps
21 |     rm -rf /var/lib/apt/lists/*
22 | 
23 |     # locale fix
24 |     echo "LC_ALL=en_US.UTF-8" >> /etc/environment
25 |     echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen
26 |     echo "LANG=en_US.UTF-8" > /etc/locale.conf
27 |     locale-gen en_US.UTF-8
28 | 
29 |     mkdir -p /opt/ && cd /opt/
30 | 
31 |     wget --no-check-certificate -nv -O spaceranger-1.3.1.tar.gz "https://singlecell-software.s3-far.jax.org/spaceranger-1.3.1.tar.gz"
32 | 
33 |     tar -zxf spaceranger-1.3.1.tar.gz && rm spaceranger-1.3.1.tar.gz*
34 |     rm -r spaceranger-1.3.1/external/spaceranger_tiny_*
35 | 
36 | %runscript
37 |     exec spaceranger "$@"
38 | 


--------------------------------------------------------------------------------
/assets/container-singularity-stainnet.def:
--------------------------------------------------------------------------------
 1 | bootstrap: shub
 2 | from: jaxreg.jax.org/singlecell/python:3.8
 3 | 
 4 | %post
 5 | apt-get update
 6 | apt-get install -y procps
 7 | 
 8 | /opt/conda/bin/conda install --quiet -y pip=20.3.1
 9 | /opt/conda/bin/pip install tifffile imageio numpy pillow tqdm torch
10 | 
11 | wget https://github.com/khtao/StainNet/blob/master/checkpoints/aligned_histopathology_dataset/StainNet-Public_layer3_ch32.pth
12 | wget https://github.com/khtao/StainNet/blob/master/checkpoints/aligned_cytopathology_dataset/StainNet-3x0_best_psnr_layer3_ch32.pth
13 | wget https://github.com/khtao/StainNet/blob/master/checkpoints/camelyon16_dataset/StainNet-Public-centerUni_layer3_ch32.pth
14 | 


--------------------------------------------------------------------------------
/assets/container-singularity-staintools.def:
--------------------------------------------------------------------------------
 1 | bootstrap: shub
 2 | from: jaxreg.jax.org/singlecell/python:3.8
 3 | 
 4 | %post
 5 | apt-get update
 6 | apt-get install -y gcc
 7 | apt-get install -y g++
 8 | apt-get install -y openslide-tools
 9 | apt-get install -y python-openslide
10 | apt-get install -y libgl1
11 | apt-get install -y procps
12 | 
13 | /opt/conda/bin/conda install --quiet -y -c conda-forge pandas python-spams
14 | /opt/conda/bin/conda install --quiet -y -c numba numba==0.56.4
15 | /opt/conda/bin/conda install --quiet -y pip
16 | /opt/conda/bin/pip install tifffile imagecodecs openslide-python opencv-python-headless staintools
17 | 


--------------------------------------------------------------------------------
/assets/container-singularity-uni-conch.def:
--------------------------------------------------------------------------------
 1 | bootstrap: docker
 2 | from: condaforge/mambaforge:23.3.1-1
 3 | 
 4 | %environment
 5 |     export DEBIAN_FRONTEND=noninteractive
 6 | 
 7 | %post
 8 |     echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
 9 | 
10 |     apt-get update
11 |     apt-get install -y dialog apt-utils
12 |     apt-get install -y openslide-tools
13 |     apt-get install -y git
14 |     apt-get install -y procps
15 | 
16 |     /opt/conda/bin/mamba install python=3.10
17 |     /opt/conda/bin/mamba install -y -c anaconda pip numpy pandas pyarrow scipy
18 |     /opt/conda/bin/mamba install -y -c conda-forge pillow openslide-python
19 | 
20 |     # Clone the repository at SHA of August 2024
21 |     git clone https://github.com/mahmoodlab/CONCH.git
22 |     cd CONCH
23 |     git reset --hard 02d6ac59cc20874bff0f581de258c2b257f69a84
24 | 
25 |     /opt/conda/bin/pip install --upgrade pip
26 |     /opt/conda/bin/pip install -e .
27 | 


--------------------------------------------------------------------------------
/assets/container-singularity-velocyto.def:
--------------------------------------------------------------------------------
 1 | bootstrap: docker
 2 | from: continuumio/miniconda3:4.12.0
 3 | 
 4 | %post
 5 | apt-get update
 6 | apt-get install -y gcc
 7 | apt-get install -y g++
 8 | apt-get install -y bash
 9 | apt-get install -y procps
10 | 
11 | /opt/conda/bin/conda install --quiet -y pip
12 | 
13 | /opt/conda/bin/pip install numpy scipy numba matplotlib scikit-learn h5py loompy pysam Click pandas Cython
14 | /opt/conda/bin/pip install velocyto


--------------------------------------------------------------------------------
/assets/container-singularity-vips.def:
--------------------------------------------------------------------------------
1 | bootstrap: docker
2 | from: codechimpio/vips-alpine
3 | 
4 | %post
5 | apt-get update
6 | apt-get install -y bash
7 | apt-get install -y procps
8 | 


--------------------------------------------------------------------------------
/assets/def-mamba-timm.def:
--------------------------------------------------------------------------------
 1 | bootstrap: docker
 2 | from: condaforge/mambaforge:23.3.1-1
 3 | 
 4 | %environment
 5 |     export DEBIAN_FRONTEND=noninteractive
 6 | 
 7 | %post
 8 |     echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
 9 | 
10 |     apt-get update
11 |     apt-get install -y dialog apt-utils
12 |     apt-get install -y openslide-tools
13 |     apt-get install -y git
14 |     apt-get install -y procps
15 | 
16 |     # Clone the repository at SHA of December 2023
17 |     git clone https://github.com/Xiyue-Wang/TransPath
18 |     cd TransPath
19 |     git reset --hard 74673ef15656a6f01e53dde5c06e6964022e3789
20 | 
21 |     /opt/conda/bin/mamba install -y -c anaconda pip pandas pyarrow scipy
22 |     /opt/conda/bin/mamba install -y -c conda-forge pillow openslide-python
23 |     
24 |     /opt/conda/bin/pip install gdown ml-collections
25 |     
26 |     # Modified timm package
27 |     gdown 1JV7aj9rKqGedXY1TdDfi3dP07022hcgZ
28 |     /opt/conda/bin/pip install timm-0.5.4.tar
29 | 
30 |     # 1. CTransPath: ctranspath.pth - 108MB
31 |     gdown 1DoDx_70_TLj98gTf6YTXnu4tFhsFocDX
32 |     
33 |     # 2. MoCo V3 model: vit_small.pth.tar - 680M
34 |     gdown 13d_SHy9t9JCwp_MsU2oOUZ5AvI6tsC-K
35 |     
36 |     # 3. TransPath model: checkpoint.pth - 840MB
37 |     gdown 1dhysqcv_Ct_A96qOF8i6COTK3jLb56vx
38 | 


--------------------------------------------------------------------------------
/assets/def-mamba-xenomake.def:
--------------------------------------------------------------------------------
 1 | bootstrap: docker
 2 | from: condaforge/mambaforge:23.3.1-1
 3 | 
 4 | %environment
 5 |     export DEBIAN_FRONTEND=noninteractive
 6 | 
 7 | %post
 8 |     echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
 9 | 
10 |     apt-get update
11 |     apt-get install -y dialog apt-utils
12 |     apt-get install -y git
13 |     apt-get install -y procps
14 | 
15 |     /opt/conda/bin/mamba install -y -c bioconda numpy>=1.24 numba>=0.57 seqtk
16 |     /opt/conda/bin/mamba install -y -c conda-forge jsonargparse pytest
17 |     
18 |     git clone https://github.com/Biivy/Xenomake
19 |     cd Xenomake
20 |     chmod -R +x scripts/
21 |     # Use the repository version at SHA of November 2023
22 |     git reset --hard 363b8f5d51daae52ef12a2bd8bb9a12a1aacb4f4
23 |     /opt/conda/bin/mamba env update -n base --file environment.yaml
24 |     
25 |     git clone https://gitlab.com/genomeinformatics/xengsort.git
26 |     cd xengsort
27 |     # Use the repository version at SHA of December 2023
28 |     git reset --hard 62ea5c6419af8ad366212b617133dc1c6c1a8e28
29 |     /opt/conda/bin/pip install -e .
30 |     xengsort index --help
31 |     which xengsort
32 | 


--------------------------------------------------------------------------------
/assets/run_build.sb:
--------------------------------------------------------------------------------
 1 | #!/bin/bash --login
 2 | #SBATCH -t 02:00:00
 3 | #SBATCH -N 1
 4 | #SBATCH -n 1
 5 | #SBATCH --mem=12GB
 6 | #SBATCH -q batch
 7 | #SBATCH -p compute
 8 | 
 9 | #singularity run http://s3-far.jax.org/builder/builder $2 $1
10 | singularity build --fakeroot $1 $2
11 | 
12 | scontrol show job $SLURM_JOB_ID


--------------------------------------------------------------------------------
/assets/run_build.sh:
--------------------------------------------------------------------------------
1 | sbatch run_build.sb /projects/chuang-lab/USERS/domans/containers/container-singularity-python.sif container-singularity-python.def


--------------------------------------------------------------------------------
/assets/sample_roi.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "0": {
 3 |     "location": 0.33,
 4 |     "size": 0.33
 5 |   },
 6 |   "1": {
 7 |     "location": 0.33,
 8 |     "size": 0.33
 9 |   }
10 | }


--------------------------------------------------------------------------------
/assets/samplesheet_demo.csv:
--------------------------------------------------------------------------------
1 | sample,fastq,image,grid,roifile,mpp
2 | Demo_S1,/projects/rubinstein-lab/USERS/domans/melanoma_PDX/dev/demo_dataset/fastq/,/projects/rubinstein-lab/USERS/domans/melanoma_PDX/dev/demo_dataset/SC2200092.tiff,,,0.22075
3 | 


--------------------------------------------------------------------------------
/assets/samplesheet_focus_test.csv:
--------------------------------------------------------------------------------
1 | sample,fastq,image,grid,roifile,mpp
2 | WM4237_TE_S1_ST,,/projects/chuang-lab/USERS/domans/melanoma_PDX_ST/additionalHandE/tiff/WM4237_3/WM4237_229_T1_4_21_22_cut_level_0_oid_0.tiff,,,0.2513
3 | 


--------------------------------------------------------------------------------
/assets/samplesheet_st_pancreas_all.csv:
--------------------------------------------------------------------------------
 1 | sample,fastq,image,grid,roifile,mpp
 2 | JDC_WP_001_s_ST,,/sdata/activities/kappsen-tmc/visium/SC2300284R_JDC-WP-001-s/img/SC2300284R.tiff,/sdata/activities/kappsen-tmc/visium/SC2300284R_JDC-WP-001-s/spaceranger/spatial/,,0.2208187960959237
 3 | JDC_WP_001_x_ST,,/sdata/activities/kappsen-tmc/visium/SC2300285R_JDC-WP-001-x/img/SC2300285R.tiff,/sdata/activities/kappsen-tmc/visium/SC2300285R_JDC-WP-001-x/spaceranger/spatial/,,0.2208187960959237
 4 | JDC_WP_001_l_ST,,/sdata/activities/kappsen-tmc/visium/SC2300286R_JDC-WP-001-l/img/SC2300286R.tiff,/sdata/activities/kappsen-tmc/visium/SC2300286R_JDC-WP-001-l/spaceranger/spatial/,,0.2208187960959237
 5 | JDC_WP_001_c_ST,,/sdata/activities/kappsen-tmc/visium/SC2300287R_JDC-WP-001-c/img/SC2300287R.tiff,/sdata/activities/kappsen-tmc/visium/SC2300287R_JDC-WP-001-c/spaceranger/spatial/,,0.2208187960959237
 6 | JDC_WP_002_c_ST,,/sdata/activities/kappsen-tmc/visium/SC2300294R_JDC-WP-002-c/img/SC2300294R.tiff,/sdata/activities/kappsen-tmc/visium/SC2300294R_JDC-WP-002-c/spaceranger/spatial/,,0.2208187960959237
 7 | JDC_WP_002_j_ST,,/sdata/activities/kappsen-tmc/visium/SC2300295R_JDC-WP-002-j/img/SC2300295R.tiff,/sdata/activities/kappsen-tmc/visium/SC2300295R_JDC-WP-002-j/spaceranger/spatial/,,0.2208187960959237
 8 | JDC_WP_002_r_ST,,/sdata/activities/kappsen-tmc/visium/SC2300296R_JDC-WP-002-r/img/SC2300296R.tiff,/sdata/activities/kappsen-tmc/visium/SC2300296R_JDC-WP-002-r/spaceranger/spatial/,,0.2208187960959237
 9 | JDC_WP_002_v_ST,,/sdata/activities/kappsen-tmc/visium/SC2300297R_JDC-WP-002-v/img/SC2300297R.tiff,/sdata/activities/kappsen-tmc/visium/SC2300297R_JDC-WP-002-v/spaceranger/spatial/,,0.2208187960959237
10 | JDC_WP_004_n_ST,,/sdata/activities/kappsen-tmc/visium/SC2300423_JDC-WP-004-n/img/SC2300423.tiff,/sdata/activities/kappsen-tmc/visium/SC2300423_JDC-WP-004-n/spaceranger/spatial/,,0.2208187960959237
11 | JDC_WP_004_c_ST,,/sdata/activities/kappsen-tmc/visium/SC2300424_JDC-WP-004-c/img/SC2300424.tiff,/sdata/activities/kappsen-tmc/visium/SC2300424_JDC-WP-004-c/spaceranger/spatial/,,0.2208187960959237
12 | JDC_WP_005_n_ST,,/sdata/activities/kappsen-tmc/visium/SC2300425_JDC-WP-005-n/img/SC2300425.tiff,/sdata/activities/kappsen-tmc/visium/SC2300425_JDC-WP-005-n/spaceranger/spatial/,,0.2208187960959237
13 | JDC_WP_005_r_ST,,/sdata/activities/kappsen-tmc/visium/SC2300426_JDC-WP-005-r/img/SC2300426.tiff,/sdata/activities/kappsen-tmc/visium/SC2300426_JDC-WP-005-r/spaceranger/spatial/,,0.2208187960959237
14 | JDC_WP_005_j_ST,,/sdata/activities/kappsen-tmc/visium/SC2300427_JDC-WP-005-j/img/SC2300427.tiff,/sdata/activities/kappsen-tmc/visium/SC2300427_JDC-WP-005-j/spaceranger/spatial/,,0.2208187960959237
15 | JDC_WP_005_c_ST,,/sdata/activities/kappsen-tmc/visium/SC2300455_JDC-WP-005-c/img/SC2300455.tiff,/sdata/activities/kappsen-tmc/visium/SC2300455_JDC-WP-005-c/spaceranger/spatial/,,0.2208187960959237
16 | JDC_WP_007_o_ST,,/sdata/activities/kappsen-tmc/visium/SC2300462_JDC-WP-007-o/img/SC2300462.tiff,/sdata/activities/kappsen-tmc/visium/SC2300462_JDC-WP-007-o/spaceranger/spatial/,,0.2208187960959237
17 | JDC_WP_007_s_ST,,/sdata/activities/kappsen-tmc/visium/SC2300463_JDC-WP-007-s/img/SC2300463.tiff,/sdata/activities/kappsen-tmc/visium/SC2300463_JDC-WP-007-s/spaceranger/spatial/,,0.2208187960959237
18 | JDC_WP_007_j_ST,,/sdata/activities/kappsen-tmc/visium/SC2300464_JDC-WP-007-j/img/SC2300464.tiff,/sdata/activities/kappsen-tmc/visium/SC2300464_JDC-WP-007-j/spaceranger/spatial/,,0.2208187960959237
19 | JDC_WP_009_n_ST,,/sdata/activities/kappsen-tmc/visium/SC2300465_JDC-WP-009-n/img/SC2300465.tiff,/sdata/activities/kappsen-tmc/visium/SC2300465_JDC-WP-009-n/spaceranger/spatial/,,0.2208187960959237
20 | JDC_WP_009_r_ST,,/sdata/activities/kappsen-tmc/visium/SC2300466_JDC-WP-009-r/img/SC2300466.tiff,/sdata/activities/kappsen-tmc/visium/SC2300466_JDC-WP-009-r/spaceranger/spatial/,,0.2208187960959237
21 | JDC_WP_009_c_ST,,/sdata/activities/kappsen-tmc/visium/SC2300467_JDC-WP-009-c/img/SC2300467.tiff,/sdata/activities/kappsen-tmc/visium/SC2300467_JDC-WP-009-c/spaceranger/spatial/,,0.2208187960959237
22 | JDC_WP_0010_w_ST,,/sdata/activities/kappsen-tmc/visium/SC2300511_JDC-WP-0010-w/img/SC2300511.tiff,/sdata/activities/kappsen-tmc/visium/SC2300511_JDC-WP-0010-w/spaceranger/spatial/,,0.2208187960959237
23 | JDC_WP_0010_c_ST,,/sdata/activities/kappsen-tmc/visium/SC2300514_JDC-WP-0010-c/img/SC2300514.tiff,/sdata/activities/kappsen-tmc/visium/SC2300514_JDC-WP-0010-c/spaceranger/spatial/,,0.2208187960959237
24 | JDC_WP_012_w_ST,,/sdata/activities/kappsen-tmc/visium/SC2300515_JDC-WP-012-w/img/SC2300515.tiff,/sdata/activities/kappsen-tmc/visium/SC2300515_JDC-WP-012-w/spaceranger/spatial/,,0.2208187960959237
25 | JDC_WP_012_ae_ST,,/sdata/activities/kappsen-tmc/visium/SC2300516_JDC-WP-012-ae/img/SC2300516.tiff,/sdata/activities/kappsen-tmc/visium/SC2300516_JDC-WP-012-ae/spaceranger/spatial/,,0.2208187960959237
26 | JDC_WP_012_n_ST,,/sdata/activities/kappsen-tmc/visium/SC2300517_JDC-WP-012-n/img/SC2300517.tiff,/sdata/activities/kappsen-tmc/visium/SC2300517_JDC-WP-012-n/spaceranger/spatial/,,0.2208187960959237
27 | JDC_WP_012_c_ST,,/sdata/activities/kappsen-tmc/visium/SC2300518_JDC-WP-012-c/img/SC2300518.tiff,/sdata/activities/kappsen-tmc/visium/SC2300518_JDC-WP-012-c/spaceranger/spatial/,,0.2208187960959237
28 | JDC_WP_004_y_ST,,/sdata/activities/kappsen-tmc/visium/SC2300519_JDC-WP-004-y/img/SC2300519.tiff,/sdata/activities/kappsen-tmc/visium/SC2300519_JDC-WP-004-y/spaceranger/spatial/,,0.2208187960959237
29 | JDC_WP_004_ah_ST,,/sdata/activities/kappsen-tmc/visium/SC2300520_JDC-WP-004-ah/img/SC2300520.tiff,/sdata/activities/kappsen-tmc/visium/SC2300520_JDC-WP-004-ah/spaceranger/spatial/,,0.2208187960959237
30 | JDC_WP_008_r_ST,,/sdata/activities/kappsen-tmc/visium/SC2300627_JDC-WP-008-r/img/SC2300627.tiff,/sdata/activities/kappsen-tmc/visium/SC2300627_JDC-WP-008-r/spaceranger/spatial/,,0.2208187960959237
31 | JDC_WP_008_v_ST,,/sdata/activities/kappsen-tmc/visium/SC2300628_JDC-WP-008-v/img/SC2300628.tiff,/sdata/activities/kappsen-tmc/visium/SC2300628_JDC-WP-008-v/spaceranger/spatial/,,0.2208187960959237
32 | JDC_WP_008_j_ST,,/sdata/activities/kappsen-tmc/visium/SC2300629_JDC-WP-008-j/img/SC2300629.tiff,/sdata/activities/kappsen-tmc/visium/SC2300629_JDC-WP-008-j/spaceranger/spatial/,,0.2208187960959237
33 | JDC_WP_011_w_ST,,/sdata/activities/kappsen-tmc/visium/SC2300631_JDC-WP-011-w/img/SC2300631.tiff,/sdata/activities/kappsen-tmc/visium/SC2300631_JDC-WP-011-w/spaceranger/spatial/,,0.2208187960959237
34 | JDC_WP_011_ac_ST,,/sdata/activities/kappsen-tmc/visium/SC2300632_JDC-WP-011-ac/img/SC2300632.tiff,/sdata/activities/kappsen-tmc/visium/SC2300632_JDC-WP-011-ac/spaceranger/spatial/,,0.2208187960959237
35 | JDC_WP_011_n_ST,,/sdata/activities/kappsen-tmc/visium/SC2300633_JDC-WP-011-n/img/SC2300633.tiff,/sdata/activities/kappsen-tmc/visium/SC2300633_JDC-WP-011-n/spaceranger/spatial/,,0.2208187960959237
36 | JDC_WP_011_b_ST,,/sdata/activities/kappsen-tmc/visium/SC2300634_JDC-WP-011-b/img/SC2300634.tiff,/sdata/activities/kappsen-tmc/visium/SC2300634_JDC-WP-011-b/spaceranger/spatial/,,0.2208187960959237
37 | JDC_WP_008_b_ST,,/sdata/activities/kappsen-tmc/visium/SC2300701_JDC-WP-008-b/img/SC2300701.tiff,/sdata/activities/kappsen-tmc/visium/SC2300701_JDC-WP-008-b/spaceranger/spatial/,,0.2208187960959237
38 | JDC_WP_007_c_ST,,/sdata/activities/kappsen-tmc/visium/SC2300460_JDC-WP-007-c/img/SC2300460.ome.tiff,/sdata/activities/kappsen-tmc/visium/SC2300460_JDC-WP-007-c/spaceranger/spatial/,,0.1469393661384487
39 | JDC_WP_009_j_ST,,/sdata/activities/kappsen-tmc/visium/SC2300461_JDC-WP-009-j/img/SC2300461.ome.tiff,/sdata/activities/kappsen-tmc/visium/SC2300461_JDC-WP-009-j/spaceranger/spatial/,,0.1469393661384487
40 | JDC_WP_010_ac_ST,,/sdata/activities/kappsen-tmc/visium/SC2300512_JDC-WP-010-ac/img/SC2300512.ome.tiff,/sdata/activities/kappsen-tmc/visium/SC2300512_JDC-WP-010-ac/spaceranger/spatial/,,0.1469393661384487
41 | JDC_WP_010_p_ST,,/sdata/activities/kappsen-tmc/visium/SC2300513_JDC-WP-010-p/img/SC2300513.ome.tiff,/sdata/activities/kappsen-tmc/visium/SC2300513_JDC-WP-010-p/spaceranger/spatial/,,0.1469393661384487


--------------------------------------------------------------------------------
/assets/samplesheet_test.csv:
--------------------------------------------------------------------------------
1 | sample,fastq,image,grid,roifile,mpp
2 | TCGA-AA-A00Z-01A-01-BS1.0,,/projects/chuang-lab/TCGA-COAD/WSI/TCGA-AA-A00Z-01A-01-BS1.47febbeb-d8d0-45fe-b934-30d8992a1737.svs,,/projects/rubinstein-lab/USERS/domans/COAD/tcga_coad_svs_thumbs/TCGA-AA-A00Z-01A-01-BS1.47febbeb-d8d0-45fe-b934-30d8992a1737.oid0.json,0.2485
3 | 


--------------------------------------------------------------------------------
/assets/samplesheet_test_sj.csv:
--------------------------------------------------------------------------------
1 | sample,fastq,image,grid,roifile,mpp
2 | SJRHB030549_D3-16812.oid1,,/projects/rubinstein-lab/pediSarcoma/stjude/SJRHB030549_D3-16812.svs,,/projects/rubinstein-lab/USERS/domans/pediSarcoma-stjude/thumbs/SJRHB030549_D3-16812.oid1.json,0.25159999999999999
3 | TCGA-AA-A00Z-01A-01-BS1.0,,/projects/chuang-lab/TCGA-COAD/WSI/TCGA-AA-A00Z-01A-01-BS1.47febbeb-d8d0-45fe-b934-30d8992a1737.svs,,/projects/rubinstein-lab/USERS/domans/COAD/tcga_coad_svs_thumbs/TCGA-AA-A00Z-01A-01-BS1.47febbeb-d8d0-45fe-b934-30d8992a1737.oid0.json,0.2485
4 | 


--------------------------------------------------------------------------------
/assets/samplesheet_test_sj_short.csv:
--------------------------------------------------------------------------------
1 | sample,fastq,image,grid,roifile,mpp
2 | SJRHB030549_D3-16812.oid1,,/projects/rubinstein-lab/USERS/domans/pediSarcoma-stjude/STQ-dev/results-56-112/SJRHB030549_D3-16812.oid1/image.ome.tiff,,,0.25
3 | SJRHB012405_X1-16322.oid0,,/projects/rubinstein-lab/USERS/domans/pediSarcoma-stjude/STQ-dev/results-56-112/SJRHB012405_X1-16322.oid0/image.ome.tiff,,,0.25
4 | 


--------------------------------------------------------------------------------
/assets/samplesheet_two.csv:
--------------------------------------------------------------------------------
1 | sample,fastq,image,grid,roifile,mpp
2 | WM4237_T1_S1,/projects/chuang-lab/rubinstein/ST_melanomaPDX/data/SC2200324_WM4237-T1-Day14-229/fastq,/projects/chuang-lab/rubinstein/ST_melanomaPDX/data/SC2200324_WM4237-T1-Day14-229/img/SC2200324.tiff,,,0.22075
3 | 


--------------------------------------------------------------------------------
/bin/StainNetNorm.py:
--------------------------------------------------------------------------------
  1 | # Prepared by Domanskyi
  2 | # from https://github.com/khtao/StainNet repository
  3 | # Here I use their pretrained net to run image stain normalization
  4 | 
  5 | import os
  6 | import argparse
  7 | import imageio
  8 | import tifffile
  9 | import numpy as np
 10 | import torch
 11 | import torch.nn as nn
 12 | from tqdm import tqdm
 13 | from PIL import Image
 14 | from torch.utils.data import dataset, DataLoader
 15 | from glob import glob
 16 | 
 17 | import PIL.Image
 18 | PIL.Image.MAX_IMAGE_PIXELS = None
 19 | 
 20 | class StainNet(nn.Module):
 21 |     def __init__(self, input_nc=3, output_nc=3, n_layer=3, n_channel=32, kernel_size=1):
 22 |         super(StainNet, self).__init__()
 23 |         model_list = []
 24 |         model_list.append(nn.Conv2d(input_nc, n_channel, kernel_size=kernel_size, bias=True, padding=kernel_size // 2))
 25 |         model_list.append(nn.ReLU(True))
 26 |         for n in range(n_layer - 2):
 27 |             model_list.append(
 28 |                 nn.Conv2d(n_channel, n_channel, kernel_size=kernel_size, bias=True, padding=kernel_size // 2))
 29 |             model_list.append(nn.ReLU(True))
 30 |         model_list.append(nn.Conv2d(n_channel, output_nc, kernel_size=kernel_size, bias=True, padding=kernel_size // 2))
 31 | 
 32 |         self.rgb_trans = nn.Sequential(*model_list)
 33 | 
 34 |     def forward(self, x):
 35 |         return self.rgb_trans(x)
 36 | 
 37 | def list_file_tree(path, file_type="tif"):
 38 |     if file_type.find("*") < 0:
 39 |         file_type = "*" + file_type
 40 |     image_list = glob(os.path.join(path, "*" + file_type), recursive=True)
 41 |     return image_list
 42 | 
 43 | class SingleImage(dataset.Dataset):
 44 |     def __init__(self, data_path, transform=None, augment=None):
 45 |         self.data_path = data_path
 46 |         self.transform = transform
 47 |         self.augment = augment
 48 |         self.image_list = list_file_tree(os.path.join(data_path), "png")
 49 |         self.image_list += list_file_tree(os.path.join(data_path), "jpg")
 50 |         self.image_list += list_file_tree(os.path.join(data_path), "tif")
 51 |         self.image_list += list_file_tree(os.path.join(data_path), "tiff")
 52 |         self.image_list.sort()
 53 | 
 54 |     def __len__(self):
 55 |         return len(self.image_list)
 56 | 
 57 |     def __getitem__(self, item):
 58 |         img = Image.open(self.image_list[item])
 59 |         img = (np.array(img, dtype=np.float32) / 255.0).transpose((2, 0, 1))
 60 |         return img
 61 | 
 62 | def process_images(opt, model, s = 4096):
 63 |     dataset = SingleImage(opt.source_dir)
 64 |     dataloader = DataLoader(dataset, batch_size=1, num_workers=1, drop_last=False)
 65 |     file_list = dataset.image_list
 66 |     num = 0
 67 |     for imgs in dataloader:
 68 |         print(imgs.shape)
 69 |         imgs_corrected = imgs.numpy()
 70 | 
 71 |         dims = imgs.shape[2], imgs.shape[3]
 72 |         r = [np.append(s*np.array(range(0, int(np.floor(dims[i]/s))+1)), [dims[i]]) for i in range(2)]
 73 |         coords = []
 74 |         for i in range(len(r[0])-1):
 75 |             for j in range(len(r[1])-1):
 76 |                 coords.append((i,j))
 77 | 
 78 |         for i, j in tqdm(coords):
 79 |             imgs_temp = imgs[:, :, r[0][i]:r[0][i+1], r[1][j]:r[1][j+1]]
 80 |             print(imgs_temp.shape)
 81 |             if (imgs_temp.shape[2]!=0) and (imgs_temp.shape[3]!=0):
 82 |                 with torch.no_grad():
 83 |                     imgs_temp = imgs_temp.cpu()
 84 |                     imgs_temp = (imgs_temp - 0.5) * 2
 85 |                     outputs = (model(imgs_temp) * 0.5 + 0.5).clamp(0, 1).detach().cpu().numpy()
 86 |         
 87 |                 imgs_corrected[:, :, r[0][i]:r[0][i+1], r[1][j]:r[1][j+1]] = outputs
 88 | 
 89 |         for out in imgs_corrected:
 90 |             file_path = file_list[num]
 91 |             file_path = os.path.join(os.path.join(opt.save_dir), os.path.split(file_path)[1])
 92 |             os.makedirs(os.path.split(file_path)[0], exist_ok=True)
 93 |             print('\n', file_path)
 94 |             ext = os.path.splitext(file_path)[1]
 95 |             tifffile.imwrite(file_path[:-len(ext)] + ".tiff", np.array(np.array(Image.fromarray((out * 255).astype(np.uint8).transpose((1, 2, 0))))), bigtiff=True) # v2            
 96 |             #Image.fromarray((out * 255).astype(np.uint8).transpose((1, 2, 0))).save(file_path[:-len(ext)] + ".tiff", compression='raw') # v1
 97 |             #imageio.imwrite(file_path[:-len(ext)] + ".tiff", (out * 255).astype(np.uint8).transpose((1, 2, 0))) # v0
 98 |             num += 1
 99 | 
100 |     return
101 | 
102 | def run_normalization(opt):
103 |     model = StainNet()
104 |     model = model.cpu()
105 |     checkpoint = torch.load(opt.model_path, map_location=torch.device('cpu'))
106 |     model.load_state_dict(checkpoint)
107 |     model.eval()
108 |     process_images(opt, model)
109 |     return
110 | 
111 | if __name__ == '__main__':
112 | 
113 |     # python norm.py --source_dir "input_images/" --save_dir "output_images/" --model_path "StainNet-Public_layer3_ch32.pth"
114 | 
115 |     parser = argparse.ArgumentParser()
116 |     parser.add_argument("--source_dir", type=str, required=True, help="path to source images")
117 |     parser.add_argument("--save_dir", type=str, required=True, help="path to save images")
118 |     parser.add_argument('--model_path', type=str, required=True, help='models path to load')
119 |     args = parser.parse_args()
120 | 
121 |     run_normalization(args)
122 | 


--------------------------------------------------------------------------------
/bin/StainToolsNorm.py:
--------------------------------------------------------------------------------
  1 | # Prepared by Domanskyi
  2 | # The normalization is done by patches
  3 | # Some stitching lines may be visible
  4 | 
  5 | import os
  6 | import argparse
  7 | import tifffile
  8 | import staintools
  9 | import numpy as np
 10 | from tqdm import tqdm
 11 | from PIL import Image
 12 | 
 13 | import PIL.Image
 14 | PIL.Image.MAX_IMAGE_PIXELS = None
 15 | 
 16 | import spams
 17 | 
 18 | def get_concentrations(I, stain_matrix, regularizer=0.01):
 19 |     OD = convert_RGB_to_OD(I).reshape((-1, 3))
 20 |     return spams.lasso(X=OD.T, D=stain_matrix.T, mode=2, lambda1=regularizer, pos=True).toarray().T
 21 |     
 22 | def convert_RGB_to_OD(I):
 23 |     mask = (I == 0)
 24 |     I[mask] = 1
 25 |     return np.maximum(-1 * np.log(I / 255), 1e-6)
 26 |     
 27 | def convert_OD_to_RGB(OD):
 28 |     assert OD.min() >= 0, "Negative optical density."
 29 |     OD = np.maximum(OD, 1e-6)
 30 |     return (255 * np.exp(-1 * OD)).astype(np.uint8)
 31 | 
 32 | class StainNormalizer(staintools.StainNormalizer):
 33 | 
 34 |     def __init__(self, method):
 35 |         super().__init__(method)
 36 | 
 37 |     def fit(self, target):
 38 |         self.stain_matrix_target = self.extractor.get_stain_matrix(target)
 39 |         target_concentrations = get_concentrations(target, self.stain_matrix_target)
 40 |         self.maxC_target = np.percentile(target_concentrations, 99, axis=0).reshape((1, 2))
 41 |         return
 42 | 
 43 |     def estimate(self, I):
 44 |         stain_matrix_source = self.extractor.get_stain_matrix(I)
 45 |         print(stain_matrix_source.dtype)
 46 |         source_concentrations = get_concentrations(I, stain_matrix_source)
 47 |         maxC_source = np.percentile(source_concentrations, 99, axis=0).reshape((1, 2))
 48 |         return stain_matrix_source, maxC_source
 49 | 
 50 |     def transform(self, I, stain_matrix_source, maxC_source):
 51 |         source_concentrations = get_concentrations(I, np.array(stain_matrix_source))        
 52 |         source_concentrations *= self.maxC_target / maxC_source
 53 |         tmp = 255 * np.exp(-1 * np.dot(source_concentrations, self.stain_matrix_target))
 54 |         return tmp.reshape(I.shape).astype(np.uint8)
 55 | 
 56 | if __name__ == '__main__':
 57 | 
 58 |     parser = argparse.ArgumentParser()
 59 |     parser.add_argument("--referenceImagePath", type=str, required=True, help="path to reference or target image")
 60 |     parser.add_argument("--inputImagePath", type=str, required=True, help="input image name")
 61 |     parser.add_argument("--outputImageName", type=str, required=True, help="output image name")
 62 |     parser.add_argument('--s', type=int, default=4096, help='patch size')
 63 |     parser.add_argument('--low', type=int, default=100, help='low threshold')
 64 |     parser.add_argument('--high', type=int, default=200, help='high threshold')
 65 |     parser.add_argument('--qfraction', type=float, default=0.75, help='quantile of fraction for tissue')
 66 |     args = parser.parse_args()
 67 |     
 68 |     print('s:', args.s)
 69 | 
 70 |     target = tifffile.imread(args.referenceImagePath)
 71 |     if target.shape[0]<=4:
 72 |         target = np.moveaxis(target, 0, 2)
 73 |     target = target[:,:,:3]
 74 | 
 75 |     max_color = 255
 76 |     quantile = 0.95  
 77 |     v = max_color - int(np.quantile(target.ravel(), quantile))
 78 |     print('Color max shift:', v)
 79 |     target[(target.astype(int) + v) > max_color] = max_color
 80 |     target[(target.astype(int) + v) <= max_color] += v
 81 |     target = target.astype(np.uint8)
 82 |     target = np.asfortranarray(target)
 83 |     print(target.shape)
 84 |     normalizer = StainNormalizer(method='macenko')
 85 |     normalizer.fit(target)
 86 |     
 87 |     img = tifffile.imread(args.inputImagePath)
 88 |     if img.shape[0]<=4:
 89 |         img = np.moveaxis(img, 0, 2)
 90 |     img = img[:,:,:3]
 91 |     max_color = 255
 92 |     quantile = 0.95
 93 |     v = max_color - int(np.quantile(img.ravel(), quantile))
 94 |     print('Color max shift:', v)
 95 |     for i in tqdm(range(img.shape[0])):
 96 |         wh = np.where((img[i, :, :].astype(int) + v) > max_color)
 97 |         img[i, wh[0], wh[1]] = max_color
 98 |         wh = np.where((img[i, :, :].astype(int) + v) <= max_color)
 99 |         img[i, wh[0], wh[1]] += v
100 |     img = np.asfortranarray(img)
101 |     
102 |     dims = img.shape[0], img.shape[1]
103 |     r = [np.append(args.s*np.array(range(0, int(np.floor(dims[i]/args.s))+1)), [dims[i]]) for i in range(2)]
104 |     coords = [(i,j) for i in range(len(r[0])-1) for j in range(len(r[1])-1)]
105 |     print(coords)
106 | 
107 |     # Determine representative patch
108 |     coordsf = []
109 |     fractions = []
110 |     for i, j in tqdm(coords):
111 |         try:
112 |             # Get in_tissue flags for patch
113 |             v = img[r[0][i]:r[0][i+1], r[1][j]:r[1][j+1], :].mean(axis=2)
114 |             vc = v.copy()
115 |             v[vc < args.low] = 0
116 |             v[vc > args.high] = 0
117 |             v[(vc >= args.low) & (vc <= args.high)] = 1
118 |             f = v.ravel().mean()
119 |             print(i, j, f)
120 |             if f==f:
121 |                 coordsf.append((i, j, f))
122 |                 fractions.append(f)
123 |         except Exception as exception:
124 |             print('Exception:', exception)
125 |   
126 |     def get_ms_cs(fcutoff):
127 |         print('fcutoff:', fcutoff)
128 |         ms = []
129 |         cs = []
130 |         for i, j, f in tqdm(coordsf):
131 |             # Get in_tissue flags for patch
132 |             in_tissue = f >= fcutoff
133 |             print(i, j, in_tissue, f)
134 |             if in_tissue:
135 |                 try:
136 |                     m, c = normalizer.estimate(img[r[0][i]:r[0][i+1], r[1][j]:r[1][j+1], :])
137 |                     ms.append(m)
138 |                     cs.append(c)
139 |                 except Exception as exception:
140 |                     print('Exception:', exception)
141 |         return ms, cs
142 | 
143 |     #fcutoff = np.quantile(fractions, args.qfraction)
144 |     ms, cs = get_ms_cs(0.5)
145 |     
146 |     if len(ms) == 0:
147 |         ms, cs = get_ms_cs(0.25)
148 |         
149 |     if len(ms) == 0:
150 |         ms, cs = get_ms_cs(0.125)
151 | 
152 |     ms = np.dstack(ms)
153 |     msm = np.median(ms, axis=2)
154 |     cs = np.vstack(cs)
155 |     csm = np.median(cs, axis=0)
156 |     print(ms.shape, cs.shape)
157 |     closest = np.argsort(np.sqrt((np.array([(ms - msm[:, :, None])[:, :, i].ravel() for i in range(ms.shape[2])])**2).sum(axis=1)))[0]
158 |     m, c = ms[:, :, closest], cs[closest, :]
159 |     print(m)
160 |     print(c)
161 | 
162 |     # Normalize all patches
163 |     for i, j in tqdm(coords):
164 |         try:
165 |             img[r[0][i]:r[0][i+1], r[1][j]:r[1][j+1], :] = normalizer.transform(img[r[0][i]:r[0][i+1], r[1][j]:r[1][j+1], :], m, c)
166 |         except Exception as exception:
167 |             print('Exception:', exception)
168 |     
169 |     tifffile.imwrite(args.outputImageName, img, bigtiff=True)
170 |     
171 |     exit(0)
172 | 


--------------------------------------------------------------------------------
/bin/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/bin/__init__.py


--------------------------------------------------------------------------------
/bin/extractROI.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import openslide
 3 | import json
 4 | import tifffile
 5 | from tifffile import TiffFile
 6 | import numpy as np
 7 | import argparse
 8 | 
 9 | import PIL.Image
10 | PIL.Image.MAX_IMAGE_PIXELS = None
11 | 
12 | if __name__ == '__main__':
13 | 
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument("--fileslide", type=str, required=True, help="")
16 |     parser.add_argument("--roifile", type=str, required=True, help="")
17 |     parser.add_argument('--wholeside', default=False, action=argparse.BooleanOptionalAction, help="")
18 |     parser.add_argument('--sizefile', type=str, default="size.txt", help="")
19 |     parser.add_argument('--outfile', type=str, default="outfile.tiff", help="")
20 |     parser.add_argument('--extract', type=str, default="False", help="")
21 |     args = parser.parse_args()
22 | 
23 |     fileslide = args.fileslide.replace("\\", "")
24 |     
25 |     try:
26 |         slide = openslide.open_slide(fileslide)
27 |         dims0 = slide.dimensions
28 |     except Exception as exception:
29 |         print(exception)
30 |         # If the slide is too large openslide may fail to read
31 |         with TiffFile(fileslide) as imgh:
32 |             dims0 = imgh.pages[0].tags[256].value, imgh.pages[0].tags[257].value
33 |             print(dims0)
34 |     
35 |     with open(args.roifile, 'r') as tempfile:
36 |         info = json.load(tempfile)
37 |     
38 |     icoords = int(dims0[0] * info['0']['location']), int(dims0[1] * info['1']['location'])
39 |     size = int(dims0[0] * info['0']['size']), int(dims0[1] * info['1']['size'])
40 |     print(dims0, '\t', icoords, '\t', size)
41 |     
42 |     if args.wholeside:
43 |         sizegp = round(dims0[0] * dims0[1] / 10**6)
44 |     else:
45 |         sizegp = round(size[0] * size[1] / 10**6)
46 | 
47 |     with open(args.sizefile, 'w') as tempfile:
48 |         tempfile.write(str(sizegp))
49 |     
50 |     if args.extract=="True":
51 |         print('Extracting ROI image')
52 |         try:
53 |             img = slide.read_region(location=icoords, level=0, size=size).convert('RGB')
54 |             tifffile.imwrite(args.outfile, np.array(img), bigtiff=True)
55 |             img.close()
56 |         except Exception as exception:
57 |             print(exception)
58 |             # If the slide is too large openslide may fail to read
59 |             img = tifffile.imread(fileslide)[icoords[1]:icoords[1]+size[1],icoords[0]:icoords[0]+size[0],:]
60 |             tifffile.imwrite(args.outfile, img, bigtiff=True)
61 |             del img
62 | 
63 |     exit(0)
64 |     


--------------------------------------------------------------------------------
/bin/mtx_tools.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import gzip
 3 | import numpy as np
 4 | import pandas as pd
 5 | from scipy.sparse import csr_matrix
 6 | from scipy.io import mmwrite 
 7 | import scanpy as sc
 8 | 
 9 | def gz(fname):
10 | 
11 |     '''Compress file with gzip and remove source
12 |     '''
13 | 
14 |     with open(fname) as f_in:
15 |         with gzip.open(fname + '.gz', 'wt') as f_out:
16 |             f_out.writelines(f_in)
17 | 
18 |     os.remove(fname)
19 | 
20 |     return
21 | 
22 | def read_sc_from_mtx(outsPath):
23 |     
24 |     sc_adata = sc.read_mtx(outsPath +'matrix.mtx.gz').T
25 |     
26 |     df_var = pd.read_csv(outsPath + 'features.tsv.gz', header=None, sep='\t', index_col=0)
27 |     df_var.index.name = None
28 |     sc_adata.var = df_var
29 |     
30 |     df_obs = pd.read_csv(outsPath + 'barcodes.tsv.gz', header=None).set_index(0)
31 |     df_obs.index.name = None
32 |     sc_adata.obs = df_obs
33 |     
34 |     print(sc_adata.shape)
35 |     
36 |     return sc_adata
37 | 
38 | def read_mtx_combine_and_write_mtx(adata1, adata2, saveDataDir=''):
39 |     
40 |     if not os.path.exists(saveDataDir):
41 |         os.makedirs(saveDataDir)
42 | 
43 |     df = pd.concat([adata1.to_df(), adata2.to_df()], axis=1).fillna(0).astype(int)
44 | 
45 |     obs = pd.Series(df.index)
46 |     obs.to_csv(saveDataDir + '/barcodes.tsv.gz', sep='\t', index=False, header=False)
47 | 
48 |     var = pd.concat([adata1.var, adata2.var]).loc[df.columns].reset_index()
49 |     var.to_csv(saveDataDir + '/features.tsv.gz', sep='\t', index=False, header=False)
50 | 
51 |     fname = saveDataDir + '/matrix.mtx'
52 |     mmwrite(fname, csr_matrix(df.values.T))
53 |     gz(fname)
54 |     
55 |     return
56 | 


--------------------------------------------------------------------------------
/bin/run-conch.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import pandas as pd
  3 | import numpy as np
  4 | import openslide
  5 | import PIL
  6 | import json
  7 | from tqdm import tqdm
  8 | 
  9 | import timm
 10 | import torch
 11 | from torchvision import transforms
 12 | import torch.nn as nn
 13 | from conch.open_clip_custom import create_model_from_pretrained
 14 | import openslide
 15 | 
 16 | import PIL.Image
 17 | PIL.Image.MAX_IMAGE_PIXELS = None
 18 | 
 19 | def normalizer(img, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), size=224):
 20 |     func = transforms.Compose([transforms.Resize(size),
 21 |                               transforms.ToTensor(),
 22 |                               transforms.Normalize(mean=mean, std=std)])
 23 |     return func(img)
 24 | 
 25 | if __name__ == '__main__':
 26 |     parser = argparse.ArgumentParser(
 27 |         description='Compute features of each tile')
 28 |     parser.add_argument('--wsi-file', dest='wsi_file', action='store',
 29 |                         required=True,
 30 |                         help="""The path to the whole slide image (WSI) in a format readable by openslide (e.g., svs or ndpi).""")
 31 |     parser.add_argument('--model-checkpoint-path', dest='modelPath', action='store',
 32 |                         required=True,
 33 |                         help="""Path to bin checkpoint.""")
 34 |     parser.add_argument('--use-conch-normalizer', dest='useCONCHnormalizer', action='store',
 35 |                         required=False, default=False,
 36 |                         help="""Use special normalization for CONCH, otherwise use the default normalizer""")
 37 |     parser.add_argument('--positions-list-file', dest='positions_list_file', action='store',
 38 |                         required=True,
 39 |                         help="""The positions_list.csv file output by spaceranger that has one row per spot and columns indicating whether the spot is within the tissue and its x and y coordinates in pixels.""")
 40 |     parser.add_argument('--scalefactors-json-file', dest='scalefactors_json_file', action='store',
 41 |                         required=True,
 42 |                         help="""The scalefactors_json.json file output by spaceranger that defines the spot diameter in spaceranger's full resolution (i.e., the resolution of the file input to spaceranger, which may or may not be wsi_file).""")
 43 |     parser.add_argument('--output-path', dest='output_path', action='store',
 44 |                         required=True,
 45 |                         help="""Name of _CSV_ file in which to store the feature matrix (rows are tiles, cols are features). 
 46 |                                 The file will be compressed if it is named *.gz""")
 47 |     parser.add_argument('--tile-mask', dest='tile_mask', default=None, action='store', required=False)
 48 |     parser.add_argument('--downsample-expanded', dest='downsample', action='store', default=True,
 49 |                         required=False,
 50 |                         help="""If expansion factor is greater than 1 then downsample the tiles back to the input size""")
 51 |     parser.add_argument('--expansion-factor', dest='expansion', action='store',
 52 |                         required=True,
 53 |                         help="""Expansion factor, 1 means no expansion""")
 54 |     parser.add_argument('--subtiling', dest='subtiling', action='store',
 55 |                         required=True,
 56 |                         help="""Do subtiling""")
 57 |     parser.add_argument('--subcoords-factor', dest='subcoordsf', action='store',
 58 |                         required=True,
 59 |                         help="""Factor for subtiling subtiling""")                        
 60 |     parser.add_argument('--subcoords-list', dest='subcoords', action='store',
 61 |                         required=True,
 62 |                         help="""Subtiling coordinates""")    
 63 | 
 64 |     args = parser.parse_args()
 65 |     expansion = float(args.expansion)
 66 |     downsample = args.downsample=='true'
 67 |     subtiling = args.subtiling=='true'
 68 |     useCONCHnormalizer = args.useCONCHnormalizer=='true'
 69 | 
 70 |     subcoordsf = int(args.subcoordsf)
 71 |     subcoords = json.loads(args.subcoords)
 72 | 
 73 |     if expansion == 1.0:
 74 |         print('Expansion factor is 1, requested downsampling:', downsample)
 75 |         downsample = False
 76 |     else:
 77 |         if downsample:
 78 |             expansion = np.ceil(expansion)
 79 |             print('Expansion factor rounded to next interger:', expansion)
 80 |             print('Tiles will be expanded and then downsampled')
 81 |         else:
 82 |             print('Expansion without downsampling is requested')
 83 |     
 84 |     wsi_file = args.wsi_file
 85 |     positions_list_file = args.positions_list_file
 86 |     scalefactors_json_file = args.scalefactors_json_file
 87 |     output_path = args.output_path
 88 |     # Read in the spaceranger positions list file
 89 |     pos = pd.read_csv(positions_list_file, header=None)
 90 |     pos.columns = ['barcode', 'in_tissue', 'array_row', 'array_col', 'pxl_row_in_fullres', 'pxl_col_in_fullres']
 91 |     
 92 |     if args.tile_mask != 'None':
 93 |         print('Received tile mask %s' % args.tile_mask)
 94 |         mask = pd.read_csv(args.tile_mask, index_col=0, header=None)
 95 |         pos['in_tissue'] = mask.reindex(pos['barcode'].values).values
 96 | 
 97 |     # Read the spot diameter at spaceranger's "full resolution" from the scalefactors_json file
 98 |     # output by spaceranger, i.e., in the resolution of the file passed to spaceranger, which may not
 99 |     # be the same resolution of wsi_file.
100 |     with open(scalefactors_json_file) as f:
101 |         scalefactors_tbl = json.load(f)
102 |     spot_diameter_fullres = scalefactors_tbl['spot_diameter_fullres']
103 |     
104 |     
105 |     # scale_factor = ratio of resolution of 'wsi_file' to resolution of "fullres" image input to spaceranger.
106 |     # scale_factor = 4
107 |     # NB: ideally, this code would accept the full resolution image along with the wsi_file and compare their sizes.
108 |     # You would do that with something like (wait ... probably the full resolution image is a png/jpg/etc not openable by openslide)
109 |     # full_resolution_slide = openslide.open_slide(full_resolution_file)
110 |     # base_magnification = float(full_resolution_slide.properties[openslide.PROPERTY_NAME_OBJECTIVE_POWER])
111 |     scale_factor = 1
112 |     # Define the spot diameter in the resolution of the wsi_file
113 |     spot_diameter_wsi = round(spot_diameter_fullres * scale_factor)
114 |     # Translate the pixel coordinates from full resolution to the resolution of the wsi
115 |     pos['pxl_row_in_wsi'] = pos.pxl_row_in_fullres * scale_factor
116 |     pos['pxl_col_in_wsi'] = pos.pxl_col_in_fullres * scale_factor
117 |     # Create the inception v3 model
118 |     num_dimensions = 3
119 |     
120 |     if downsample:
121 |         num_rows = num_cols = round(spot_diameter_wsi)
122 |     else:
123 |         num_rows = num_cols = round(spot_diameter_wsi * expansion)
124 | 
125 |     # Load pre-trained CONCH model
126 |     model, normalizerCONCH = create_model_from_pretrained("conch_ViT-B-16", checkpoint_path=args.modelPath)
127 |     model.eval()
128 | 
129 |     # Use special normalization for CONCH, otherwise use the default normalizer
130 |     if useCONCHnormalizer:
131 |         normalizer = normalizerCONCH
132 | 
133 |     num_images = len(pos)
134 |     batch_size = int(10**8 / (float(args.expansion) * float(args.expansion) * num_cols * num_rows))
135 |     if subtiling:
136 |         batch_size = int(batch_size / 5)
137 |     num_batches = int(np.ceil(num_images / batch_size))
138 |     
139 |     print('Reading and pocessing tiles:', num_images)
140 |     print('Batch size:', batch_size)
141 |     print('Number of batches:', num_batches)
142 |     
143 |     slide = openslide.open_slide(wsi_file)
144 | 
145 |     w = num_cols
146 |     h = num_rows
147 |     lvl = 0
148 |     features = []
149 |     for ibatch in tqdm(range(num_batches)):
150 |         images = []
151 |         for indx in range(batch_size):
152 |             try:
153 |                 cy = pos.loc[indx + ibatch*batch_size, 'pxl_row_in_wsi']
154 |                 cx = pos.loc[indx + ibatch*batch_size, 'pxl_col_in_wsi']
155 | 
156 |                 if pos.loc[indx + ibatch*batch_size, 'in_tissue']:
157 |                     if downsample:
158 |                         ew = round(w * expansion)
159 |                         eh = round(h * expansion)
160 |                     else:
161 |                         ew = w
162 |                         eh = h
163 |                     
164 |                     img = np.array(slide.read_region((int(cx - ew / 2), int(cy - eh / 2)), lvl, (int(ew), int(eh))).convert('RGB'))
165 | 
166 |                     if subtiling:
167 |                         a = int(np.floor(img.shape[0]/subcoordsf))
168 |                         b = int(np.floor(img.shape[1]/subcoordsf))
169 |                         for i, j in subcoords:
170 |                             subimg = img[a*(i-1): a*(i+1), b*(i-1): b*(i+1), :]
171 |                             images.append(subimg)
172 |                     else:
173 |                         # The downsampling is done to save memory
174 |                         if downsample:
175 |                             img = img[::int(expansion), ::int(expansion), :]
176 |                             assert (img.shape[0], img.shape[1])==(w, h), 'Wrong tile dimensions after downsampling!'
177 |                         
178 |                         images.append(img)
179 | 
180 |             except Exception as exception:
181 |                 #print(exception)
182 |                 pass    
183 |         print('Number of tiles:', len(images)) 
184 | 
185 |         if len(images)>0:
186 |             images = torch.cat([normalizer(PIL.Image.fromarray(image))[None, :, :, :] for image in images], 0)
187 |             with torch.inference_mode():
188 |                 temp_features = model.encode_image(images, proj_contrast=False, normalize=False).cpu().numpy()
189 | 
190 |                 # Average the subtiles, e.g., every 5 subtiles
191 |                 if subtiling:
192 |                     df_temp = pd.DataFrame(temp_features)
193 |                     temp_features = df_temp.groupby(np.arange(len(df_temp.index))//len(subcoords)).mean().values
194 | 
195 |                 features.append(temp_features)
196 |         
197 |     features = np.vstack(features)
198 |     
199 |     # Convert the dictionary of features to a dataframe and name its columns featXXX
200 |     df_features = pd.DataFrame(features)
201 |     df_features.columns = [f'feat_conch_' + str(i) for i in range(df_features.shape[1])]
202 |     df_features.index = pos.loc[pos['in_tissue']==1].index
203 |     
204 |     # Append the spot position information to each row
205 |     tbl = pd.concat([pos.loc[pos['in_tissue']==1], df_features], axis=1)
206 |     print(tbl)   
207 |     
208 |     # Output the features with spot information
209 |     ## This will automatically compress if the file suffix is .gz
210 | 
211 |     tbl.to_csv(output_path + '.tsv.gz', index=False)
212 |     print('Successfully wrote ' + output_path)
213 |     
214 | exit(0)
215 | 


--------------------------------------------------------------------------------
/bin/run-inception-v3-tiles.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import pandas as pd
 4 | import numpy as np
 5 | import tensorflow as tf
 6 | from tqdm import tqdm
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | if __name__ == '__main__':
10 |     parser = argparse.ArgumentParser(description='Compute Inception V3 features on tiles')
11 |     parser.add_argument('--input-path', dest='input_path', action='store',
12 |                         required=True,
13 |                         help="""The path to the tiles from a whole slide image (WSI) in a TIF format.""")
14 |     parser.add_argument('--output-path', dest='output_path', action='store',
15 |                         required=True,
16 |                         help="""Name of CSV file in which to store the feature matrix (rows are tiles, cols are features). 
17 |                                 The file will be compressed if it is named *.gz""")
18 |     args = parser.parse_args()
19 |      
20 |     output_path = args.output_path
21 |     input_path = args.input_path
22 |     
23 |     # Make tile names list
24 |     fnames = [fname for fname in os.listdir(input_path) if fname[-len('.tif'):]=='.tif']
25 |     num_images = len(fnames)
26 |     print("Number of images:", num_images)
27 |     
28 |     # Assuming that all tiles have the same shape
29 |     # Read the first tile and create a model
30 |     tile = plt.imread(input_path + fnames[0])
31 |    
32 |     base_model = tf.keras.applications.inception_v3.InceptionV3(include_top=False, weights='imagenet', input_shape=tile.shape)
33 |     xi = base_model.output
34 |     xi = tf.keras.layers.GlobalAveragePooling2D(data_format=None)(xi)
35 |     model = tf.keras.models.Model(inputs=base_model.input, outputs=xi)
36 |   
37 |     batch_size = int(10**8 / (tile.shape[0] * tile.shape[1]))
38 |     num_batches = int(np.ceil(num_images / batch_size))
39 |     
40 |     print('Reading and pocessing tiles:', num_images)
41 |     print('Batch size:', batch_size)
42 |     print('Number of batches:', num_batches)
43 |     
44 |     features = []
45 |     for ibatch in tqdm(range(num_batches)):
46 |         images = []
47 |         for indx in range(batch_size):
48 |             try:
49 |                 images.append(plt.imread(input_path + fnames[indx + ibatch*batch_size]))
50 |             except:
51 |                 pass
52 | 
53 |         features.append(model.predict(tf.keras.applications.inception_v3.preprocess_input(np.stack(images)), verbose=0))
54 |     features = np.vstack(features)
55 |     
56 |     df_features = pd.DataFrame(data=features, index=[fname[:-len('.tif')] for fname in fnames], columns=['feat' + str(i) for i in range(features.shape[1])])
57 |     print(df_features)
58 |     
59 |     if not os.path.exists(os.path.dirname(output_path)):
60 |         os.makedirs(os.path.dirname(output_path))
61 |     
62 |     df_features.to_csv(output_path)
63 |     print('Successfully wrote:' + output_path)
64 |     
65 | exit(0)
66 | 


--------------------------------------------------------------------------------
/bin/run-inception-v3.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import pandas as pd
  4 | import numpy as np
  5 | import cv2
  6 | import tensorflow as tf
  7 | import openslide
  8 | from pathlib import Path
  9 | import itertools
 10 | import PIL
 11 | import json
 12 | from scipy.ndimage import gaussian_filter
 13 | from tqdm import tqdm
 14 | import time
 15 | 
 16 | import PIL.Image
 17 | PIL.Image.MAX_IMAGE_PIXELS = None
 18 | 
 19 | if __name__ == '__main__':
 20 |     parser = argparse.ArgumentParser(
 21 |         description='Compute Inception V3 features on tiles that cover a _single_ spatial transcriptomic spot')
 22 |     parser.add_argument('--wsi-file', dest='wsi_file', action='store',
 23 |                         required=True,
 24 |                         help="""The path to the whole slide image (WSI) in a format readable by openslide (e.g., svs or ndpi).""")
 25 |     parser.add_argument('--positions-list-file', dest='positions_list_file', action='store',
 26 |                         required=True,
 27 |                         help="""The positions_list.csv file output by spaceranger that has one row per spot and columns indicating whether the spot is within the tissue and its x and y coordinates in pixels.""")
 28 |     parser.add_argument('--scalefactors-json-file', dest='scalefactors_json_file', action='store',
 29 |                         required=True,
 30 |                         help="""The scalefactors_json.json file output by spaceranger that defines the spot diameter in spaceranger's full resolution (i.e., the resolution of the file input to spaceranger, which may or may not be wsi_file).""")
 31 |     parser.add_argument('--output-path', dest='output_path', action='store',
 32 |                         required=True,
 33 |                         help="""Name of _CSV_ file in which to store the feature matrix (rows are tiles, cols are features). 
 34 |                                 The file will be compressed if it is named *.gz""")
 35 |     parser.add_argument('--tile-mask', dest='tile_mask', default=None, action='store', required=False)
 36 |     parser.add_argument('--downsample-expanded', dest='downsample', action='store', default=True,
 37 |                         required=False,
 38 |                         help="""If expansion factor is greater than 1 then downsample the tiles back to the input size""")
 39 |     parser.add_argument('--expansion-factor', dest='expansion', action='store',
 40 |                         required=True,
 41 |                         help="""Expansion factor, 1 means no expansion""")
 42 |     args = parser.parse_args()
 43 |     expansion = float(args.expansion)
 44 |     downsample = args.downsample=='true'
 45 | 
 46 |     if expansion == 1.0:
 47 |         print('Expansion factor is 1, requested downsampling:', downsample)
 48 |         downsample = False
 49 |     else:
 50 |         if downsample:
 51 |             expansion = np.ceil(expansion)
 52 |             print('Expansion factor rounded to next interger:', expansion)
 53 |             print('Tiles will be expanded and then downsampled')
 54 |         else:
 55 |             print('Expansion without downsampling is requested')
 56 |     
 57 |     wsi_file = args.wsi_file
 58 |     positions_list_file = args.positions_list_file
 59 |     scalefactors_json_file = args.scalefactors_json_file
 60 |     output_path = args.output_path
 61 |     # Read in the spaceranger positions list file
 62 |     pos = pd.read_csv(positions_list_file, header=None)
 63 |     pos.columns = ['barcode', 'in_tissue', 'array_row', 'array_col', 'pxl_row_in_fullres', 'pxl_col_in_fullres']
 64 |     
 65 |     if args.tile_mask != 'None':
 66 |         print('Received tile mask %s' % args.tile_mask)
 67 |         mask = pd.read_csv(args.tile_mask, index_col=0, header=None)
 68 |         pos['in_tissue'] = mask.reindex(pos['barcode'].values).values
 69 | 
 70 |     # Read the spot diameter at spaceranger's "full resolution" from the scalefactors_json file
 71 |     # output by spaceranger, i.e., in the resolution of the file passed to spaceranger, which may not
 72 |     # be the same resolution of wsi_file.
 73 |     with open(scalefactors_json_file) as f:
 74 |         scalefactors_tbl = json.load(f)
 75 |     spot_diameter_fullres = scalefactors_tbl['spot_diameter_fullres']
 76 |     
 77 |     
 78 |     # scale_factor = ratio of resolution of 'wsi_file' to resolution of "fullres" image input to spaceranger.
 79 |     # scale_factor = 4
 80 |     # NB: ideally, this code would accept the full resolution image along with the wsi_file and compare their sizes.
 81 |     # You would do that with something like (wait ... probably the full resolution image is a png/jpg/etc not openable by openslide)
 82 |     # full_resolution_slide = openslide.open_slide(full_resolution_file)
 83 |     # base_magnification = float(full_resolution_slide.properties[openslide.PROPERTY_NAME_OBJECTIVE_POWER])
 84 |     scale_factor = 1
 85 |     # Define the spot diameter in the resolution of the wsi_file
 86 |     spot_diameter_wsi = round(spot_diameter_fullres * scale_factor)
 87 |     # Translate the pixel coordinates from full resolution to the resolution of the wsi
 88 |     pos['pxl_row_in_wsi'] = pos.pxl_row_in_fullres * scale_factor
 89 |     pos['pxl_col_in_wsi'] = pos.pxl_col_in_fullres * scale_factor
 90 |     # Create the inception v3 model
 91 |     num_dimensions = 3
 92 |     
 93 |     if downsample:
 94 |         num_rows = num_cols = round(spot_diameter_wsi)
 95 |     else:
 96 |         num_rows = num_cols = round(spot_diameter_wsi * expansion)
 97 |     print('Model image size:', num_rows, num_cols)
 98 | 
 99 |     base_modeli = tf.keras.applications.inception_v3.InceptionV3(include_top=False, weights='imagenet',
100 |                                                                  input_shape=(num_rows, num_cols, num_dimensions),
101 |                                                                  classes=2)
102 |     xi = base_modeli.output
103 |     xi = tf.keras.layers.GlobalAveragePooling2D(data_format=None)(xi)
104 |     model = tf.keras.models.Model(inputs=base_modeli.input, outputs=xi)
105 |     
106 |     num_images = len(pos)
107 |     batch_size = int(10**8 / (float(args.expansion) * float(args.expansion) * num_cols * num_rows))
108 |     num_batches = int(np.ceil(num_images / batch_size))
109 |     
110 |     print('Reading and pocessing tiles:', num_images)
111 |     print('Batch size:', batch_size)
112 |     print('Number of batches:', num_batches)
113 |     
114 |     slide = openslide.open_slide(wsi_file)
115 |     
116 |     w = num_cols
117 |     h = num_rows
118 |     lvl = 0
119 |     features = []
120 |     for ibatch in tqdm(range(num_batches)):
121 |         sT = time.time()
122 |         images = []
123 |         for indx in range(batch_size):
124 |             try:
125 |                 cy = pos.loc[indx + ibatch*batch_size, 'pxl_row_in_wsi']
126 |                 cx = pos.loc[indx + ibatch*batch_size, 'pxl_col_in_wsi']
127 |                 if pos.loc[indx + ibatch*batch_size, 'in_tissue']:
128 |                     if downsample:
129 |                         ew = round(w * expansion)
130 |                         eh = round(h * expansion)
131 |                     else:
132 |                         ew = w
133 |                         eh = h
134 |                         
135 |                     img = np.array(slide.read_region((int(cx - ew / 2), int(cy - eh / 2)), lvl, (int(ew), int(eh))).convert('RGB'))
136 |                     
137 |                     if downsample:
138 |                         img = img[::int(expansion), ::int(expansion), :]
139 |                         assert (img.shape[0], img.shape[1])==(w, h), 'Wrong tile dimensions after downsampling!'
140 |                     
141 |                     images.append(img)
142 |             except Exception as exception:
143 |                 #print(exception)
144 |                 pass
145 |         print('Block 1:', time.time() - sT)       
146 |         print('Number of tiles:', len(images)) 
147 |         
148 |         sT = time.time()
149 |         if len(images)>0:
150 |             features.append(model.predict(tf.keras.applications.inception_v3.preprocess_input(np.stack(images)), verbose=0))
151 |         print('Block 2:', time.time() - sT)
152 |         
153 |     features = np.vstack(features)
154 |     
155 |     # Convert the dictionary of features to a dataframe and name its columns featXXX
156 |     df_features = pd.DataFrame(features)
157 |     df_features.columns = ['feat_InceptionV3_' + str(i) for i in range(df_features.shape[1])]
158 |     df_features.index = pos.loc[pos['in_tissue']==1].index
159 |     
160 |     # Append the spot position information to each row
161 |     tbl = pd.concat([pos.loc[pos['in_tissue']==1], df_features], axis=1)
162 |     print(tbl)   
163 |     
164 |     # Output the features with spot information
165 |     ## This will automatically compress if the file suffix is .gz
166 | 
167 |     tbl.to_csv(output_path + '.tsv.gz', index=False)
168 |     print('Successfully wrote ' + output_path)
169 |     
170 | exit(0)
171 | 


--------------------------------------------------------------------------------
/bin/run-uni.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import pandas as pd
  3 | import numpy as np
  4 | import openslide
  5 | import PIL
  6 | import json
  7 | from tqdm import tqdm
  8 | 
  9 | import timm
 10 | import torch
 11 | from torchvision import transforms
 12 | import torch.nn as nn
 13 | import openslide
 14 | 
 15 | import PIL.Image
 16 | PIL.Image.MAX_IMAGE_PIXELS = None
 17 | 
 18 | def normalizer(img, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), size=224):
 19 |     func = transforms.Compose([transforms.Resize(size),
 20 |                               transforms.ToTensor(),
 21 |                               transforms.Normalize(mean=mean, std=std)])
 22 |     return func(img)
 23 | 
 24 | if __name__ == '__main__':
 25 |     parser = argparse.ArgumentParser(
 26 |         description='Compute features of each tile')
 27 |     parser.add_argument('--wsi-file', dest='wsi_file', action='store',
 28 |                         required=True,
 29 |                         help="""The path to the whole slide image (WSI) in a format readable by openslide (e.g., svs or ndpi).""")
 30 |     parser.add_argument('--model-checkpoint-path', dest='modelPath', action='store',
 31 |                         required=True,
 32 |                         help="""Path to bin checkpoint.""")
 33 |     parser.add_argument('--positions-list-file', dest='positions_list_file', action='store',
 34 |                         required=True,
 35 |                         help="""The positions_list.csv file output by spaceranger that has one row per spot and columns indicating whether the spot is within the tissue and its x and y coordinates in pixels.""")
 36 |     parser.add_argument('--scalefactors-json-file', dest='scalefactors_json_file', action='store',
 37 |                         required=True,
 38 |                         help="""The scalefactors_json.json file output by spaceranger that defines the spot diameter in spaceranger's full resolution (i.e., the resolution of the file input to spaceranger, which may or may not be wsi_file).""")
 39 |     parser.add_argument('--output-path', dest='output_path', action='store',
 40 |                         required=True,
 41 |                         help="""Name of _CSV_ file in which to store the feature matrix (rows are tiles, cols are features). 
 42 |                                 The file will be compressed if it is named *.gz""")
 43 |     parser.add_argument('--tile-mask', dest='tile_mask', default=None, action='store', required=False)
 44 |     parser.add_argument('--downsample-expanded', dest='downsample', action='store', default=True,
 45 |                         required=False,
 46 |                         help="""If expansion factor is greater than 1 then downsample the tiles back to the input size""")
 47 |     parser.add_argument('--expansion-factor', dest='expansion', action='store',
 48 |                         required=True,
 49 |                         help="""Expansion factor, 1 means no expansion""")
 50 |     parser.add_argument('--subtiling', dest='subtiling', action='store',
 51 |                         required=True,
 52 |                         help="""Do subtiling""")
 53 |     parser.add_argument('--subcoords-factor', dest='subcoordsf', action='store',
 54 |                         required=True,
 55 |                         help="""Factor for subtiling subtiling""")                        
 56 |     parser.add_argument('--subcoords-list', dest='subcoords', action='store',
 57 |                         required=True,
 58 |                         help="""Subtiling coordinates""")    
 59 | 
 60 |     args = parser.parse_args()
 61 |     expansion = float(args.expansion)
 62 |     downsample = args.downsample=='true'
 63 |     subtiling = args.subtiling=='true'
 64 | 
 65 |     subcoordsf = int(args.subcoordsf)
 66 |     subcoords = json.loads(args.subcoords)
 67 | 
 68 |     if expansion == 1.0:
 69 |         print('Expansion factor is 1, requested downsampling:', downsample)
 70 |         downsample = False
 71 |     else:
 72 |         if downsample:
 73 |             expansion = np.ceil(expansion)
 74 |             print('Expansion factor rounded to next interger:', expansion)
 75 |             print('Tiles will be expanded and then downsampled')
 76 |         else:
 77 |             print('Expansion without downsampling is requested')
 78 |     
 79 |     wsi_file = args.wsi_file
 80 |     positions_list_file = args.positions_list_file
 81 |     scalefactors_json_file = args.scalefactors_json_file
 82 |     output_path = args.output_path
 83 |     # Read in the spaceranger positions list file
 84 |     pos = pd.read_csv(positions_list_file, header=None)
 85 |     pos.columns = ['barcode', 'in_tissue', 'array_row', 'array_col', 'pxl_row_in_fullres', 'pxl_col_in_fullres']
 86 |     
 87 |     if args.tile_mask != 'None':
 88 |         print('Received tile mask %s' % args.tile_mask)
 89 |         mask = pd.read_csv(args.tile_mask, index_col=0, header=None)
 90 |         pos['in_tissue'] = mask.reindex(pos['barcode'].values).values
 91 | 
 92 |     # Read the spot diameter at spaceranger's "full resolution" from the scalefactors_json file
 93 |     # output by spaceranger, i.e., in the resolution of the file passed to spaceranger, which may not
 94 |     # be the same resolution of wsi_file.
 95 |     with open(scalefactors_json_file) as f:
 96 |         scalefactors_tbl = json.load(f)
 97 |     spot_diameter_fullres = scalefactors_tbl['spot_diameter_fullres']
 98 |     
 99 |     
100 |     # scale_factor = ratio of resolution of 'wsi_file' to resolution of "fullres" image input to spaceranger.
101 |     # scale_factor = 4
102 |     # NB: ideally, this code would accept the full resolution image along with the wsi_file and compare their sizes.
103 |     # You would do that with something like (wait ... probably the full resolution image is a png/jpg/etc not openable by openslide)
104 |     # full_resolution_slide = openslide.open_slide(full_resolution_file)
105 |     # base_magnification = float(full_resolution_slide.properties[openslide.PROPERTY_NAME_OBJECTIVE_POWER])
106 |     scale_factor = 1
107 |     # Define the spot diameter in the resolution of the wsi_file
108 |     spot_diameter_wsi = round(spot_diameter_fullres * scale_factor)
109 |     # Translate the pixel coordinates from full resolution to the resolution of the wsi
110 |     pos['pxl_row_in_wsi'] = pos.pxl_row_in_fullres * scale_factor
111 |     pos['pxl_col_in_wsi'] = pos.pxl_col_in_fullres * scale_factor
112 |     # Create the inception v3 model
113 |     num_dimensions = 3
114 |     
115 |     if downsample:
116 |         num_rows = num_cols = round(spot_diameter_wsi)
117 |     else:
118 |         num_rows = num_cols = round(spot_diameter_wsi * expansion)
119 | 
120 |     # Load pre-trained UNI model
121 |     model = timm.create_model("vit_large_patch16_224", img_size=224, patch_size=16, init_values=1e-5, num_classes=0, dynamic_img_size=True)
122 |     model.load_state_dict(torch.load(args.modelPath, map_location="cpu"), strict=True)
123 |     model.eval()
124 | 
125 |     num_images = len(pos)
126 |     batch_size = int(10**8 / (float(args.expansion) * float(args.expansion) * num_cols * num_rows))
127 |     if subtiling:
128 |         batch_size = int(batch_size / 5)
129 |     num_batches = int(np.ceil(num_images / batch_size))
130 |     
131 |     print('Reading and pocessing tiles:', num_images)
132 |     print('Batch size:', batch_size)
133 |     print('Number of batches:', num_batches)
134 |     
135 |     slide = openslide.open_slide(wsi_file)
136 | 
137 |     w = num_cols
138 |     h = num_rows
139 |     lvl = 0
140 |     features = []
141 |     for ibatch in tqdm(range(num_batches)):
142 |         images = []
143 |         for indx in range(batch_size):
144 |             try:
145 |                 cy = pos.loc[indx + ibatch*batch_size, 'pxl_row_in_wsi']
146 |                 cx = pos.loc[indx + ibatch*batch_size, 'pxl_col_in_wsi']
147 | 
148 |                 if pos.loc[indx + ibatch*batch_size, 'in_tissue']:
149 |                     if downsample:
150 |                         ew = round(w * expansion)
151 |                         eh = round(h * expansion)
152 |                     else:
153 |                         ew = w
154 |                         eh = h
155 |                     
156 |                     img = np.array(slide.read_region((int(cx - ew / 2), int(cy - eh / 2)), lvl, (int(ew), int(eh))).convert('RGB'))
157 | 
158 |                     if subtiling:
159 |                         a = int(np.floor(img.shape[0]/subcoordsf))
160 |                         b = int(np.floor(img.shape[1]/subcoordsf))
161 |                         for i, j in subcoords:
162 |                             subimg = img[a*(i-1): a*(i+1), b*(i-1): b*(i+1), :]
163 |                             images.append(subimg)
164 |                     else:
165 |                         # The downsampling is done to save memory
166 |                         if downsample:
167 |                             img = img[::int(expansion), ::int(expansion), :]
168 |                             assert (img.shape[0], img.shape[1])==(w, h), 'Wrong tile dimensions after downsampling!'
169 |                         
170 |                         images.append(img)
171 | 
172 |             except Exception as exception:
173 |                 #print(exception)
174 |                 pass    
175 |         print('Number of tiles:', len(images)) 
176 | 
177 |         if len(images)>0:
178 |             images = torch.cat([normalizer(PIL.Image.fromarray(image))[None, :, :, :] for image in images], 0)
179 |             with torch.inference_mode():
180 |                 temp_features = model(images).cpu().numpy()
181 | 
182 |                 # Average the subtiles, e.g., every 5 subtiles
183 |                 if subtiling:
184 |                     df_temp = pd.DataFrame(temp_features)
185 |                     temp_features = df_temp.groupby(np.arange(len(df_temp.index))//len(subcoords)).mean().values
186 | 
187 |                 features.append(temp_features)
188 |         
189 |     features = np.vstack(features)
190 |     
191 |     # Convert the dictionary of features to a dataframe and name its columns featXXX
192 |     df_features = pd.DataFrame(features)
193 |     df_features.columns = [f'feat_uni_' + str(i) for i in range(df_features.shape[1])]
194 |     df_features.index = pos.loc[pos['in_tissue']==1].index
195 |     
196 |     # Append the spot position information to each row
197 |     tbl = pd.concat([pos.loc[pos['in_tissue']==1], df_features], axis=1)
198 |     print(tbl)   
199 |     
200 |     # Output the features with spot information
201 |     ## This will automatically compress if the file suffix is .gz
202 | 
203 |     tbl.to_csv(output_path + '.tsv.gz', index=False)
204 |     print('Successfully wrote ' + output_path)
205 |     
206 | exit(0)
207 | 


--------------------------------------------------------------------------------
/bin/superpixelation.py:
--------------------------------------------------------------------------------
  1 | # Prepared by Domanskyi
  2 | # The superpixelation is done by patches
  3 | # Each patch plot of superpixels is generated, small superpixels' identifiers are not shown
  4 | 
  5 | import os
  6 | import argparse
  7 | import tifffile
  8 | import numpy as np
  9 | from tqdm import tqdm
 10 | from PIL import Image
 11 | import matplotlib.pyplot as plt
 12 | import matplotlib.patheffects as path_effects
 13 | from skimage.segmentation import mark_boundaries
 14 | 
 15 | import PIL.Image
 16 | PIL.Image.MAX_IMAGE_PIXELS = None
 17 | 
 18 | import skimage
 19 | from pysnic.algorithms.snic import snic
 20 | 
 21 | def infere_spx(im_down_patch, target_number_of_segments='auto', pixels_per_segment=10000, compactness=1):
 22 |     
 23 |     lab_image = skimage.color.rgb2lab(im_down_patch).tolist()
 24 | 
 25 |     if target_number_of_segments == 'auto':
 26 |         target_number_of_segments = int(im_down_patch.shape[0] * im_down_patch.shape[1] / pixels_per_segment)
 27 |     
 28 |     segmentation, _, centroids = snic(lab_image, target_number_of_segments, compactness, update_func=None)
 29 |     segmentation = np.array(segmentation)
 30 | 
 31 |     return segmentation
 32 |     
 33 | def plot_all_spx_nf(im_down, seg, seg_id='', fontcolor='k', fontsize=8, fontweight='demibold',
 34 |                  figsize=(10, 10), boundaries_color=(1, 0, 0), min_size=500,
 35 |                  pe=path_effects.Stroke(linewidth=2, foreground='w')):
 36 | 
 37 |     if im_down.shape[0] > im_down.shape[1]:
 38 |         figsize = figsize[0] * im_down.shape[1] / im_down.shape[0], figsize[1]
 39 |     else:
 40 |         figsize = figsize[0], figsize[1] * im_down.shape[0] / im_down.shape[1]
 41 |     
 42 |     fig, ax = plt.subplots(figsize=figsize)
 43 |     ax.imshow(mark_boundaries(im_down, seg, color=boundaries_color))
 44 |     
 45 |     for s in np.unique(seg.ravel()):
 46 |         wh = np.array(np.where(seg==s))
 47 |         if len(wh[0]) >= min_size:
 48 |             m = wh.mean(axis=1)
 49 |             params = dict(va='center', ha='center', color=fontcolor, fontsize=fontsize, fontweight=fontweight)
 50 |             ltext = ax.text(m[1], m[0], s, **params)
 51 |             ltext.set_path_effects([pe, path_effects.Normal()])
 52 |               
 53 |     ax.set_aspect('equal')  
 54 |     ax.axis('off')
 55 |     fig.tight_layout()
 56 |     
 57 |     plt.savefig(f'superpixelation_{seg_id}.png', facecolor='w', dpi=100, pad_inches=0.01)    
 58 |     plt.close(fig)
 59 |     
 60 |     return
 61 | 
 62 | if __name__ == '__main__':
 63 | 
 64 |     parser = argparse.ArgumentParser()
 65 |     parser.add_argument("--inputImagePath", type=str, required=True, help="input image name")
 66 |     parser.add_argument("--segmentationSavePath", type=str, required=True, help="output file name")
 67 |     parser.add_argument('--s', type=int, default=4096, help='patch size')
 68 |     parser.add_argument('--compactness', required=True, type=float)
 69 |     parser.add_argument('--pixelsPerSegment', required=True, type=int)
 70 |     parser.add_argument('--downsamplingFactor', required=True, type=int)
 71 |     args = parser.parse_args()
 72 |     
 73 |     print('s:', args.s)
 74 | 
 75 |     # If the image is in 40x, the downsampling_factor 4 will bring the resolution to 10x
 76 |     img = np.array(tifffile.imread(args.inputImagePath))[::args.downsamplingFactor, ::args.downsamplingFactor, :3]
 77 |     print(img.shape)
 78 |     
 79 |     ## Save downsampled image
 80 |     #print('Saving downsampled image')
 81 |     #tifffile.imwrite(args.outputImagePath, img, bigtiff=True)
 82 |     #print('Done')
 83 |     
 84 |     dims = img.shape[0], img.shape[1]
 85 |     
 86 |     # Prepare image patches' coordinates
 87 |     r = [np.append(args.s*np.array(range(0, int(np.floor(dims[i]/args.s))+1)), [dims[i]]) for i in range(2)]
 88 |     coords = [(i,j) for i in range(len(r[0])-1) for j in range(len(r[1])-1)]
 89 |     print(coords)
 90 | 
 91 |     segmentation = np.zeros(dims, dtype=np.int32)
 92 |     for ipatch, (i, j) in enumerate(tqdm(coords)):
 93 |         try:
 94 |             seg_patch = infere_spx(img[r[0][i]:r[0][i+1], r[1][j]:r[1][j+1], :],
 95 |                                    pixels_per_segment=args.pixelsPerSegment,
 96 |                                    compactness=args.compactness)
 97 |         
 98 |             # There are less than 1000 superpixels in each patch, less than 1000 patches
 99 |             # Make each superpixel id unique
100 |             seg_patch += ipatch * 10000
101 |             
102 |             segmentation[r[0][i]:r[0][i+1], r[1][j]:r[1][j+1]] = seg_patch
103 |         except Exception as exception:
104 |             print('Superpixel ERROR:', exception)
105 |             
106 |         try:
107 |             plot_all_spx_nf(img[r[0][i]:r[0][i+1], r[1][j]:r[1][j+1], :], 
108 |                          segmentation[r[0][i]:r[0][i+1], r[1][j]:r[1][j+1]],
109 |                          seg_id=ipatch * 10000)
110 |         except Exception as exception:
111 |             print('Superpixel plot ERROR:', exception)
112 |             
113 |     # Save segmentation mask
114 |     print('\nSegmentation:', segmentation.shape)
115 |     with open(args.segmentationSavePath, 'wb') as tempfile:
116 |         np.save(tempfile, segmentation)
117 | 
118 |     exit(0)
119 | 


--------------------------------------------------------------------------------
/check.sh:
--------------------------------------------------------------------------------
1 | version=(`nextflow -v`)
2 | major=$(echo ${version[2]} | cut -d. -f1)
3 | 
4 | if [ "$major" -lt "24" ]; then
5 |     read -p "Update nextflow to use the pipeline. Proceed? (y/n): " confirm && [[ $confirm == [yY] ]] && nextflow self-update
6 | fi


--------------------------------------------------------------------------------
/conf/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Description of the pipeline parameters
  3 | 
  4 | To find out the default value for each parameter, see `conf/analysis.config`.
  5 | 
  6 | ### Sequencing analysis parameters
  7 | 
  8 | + **`do_merge_mtx`** Merge graft and host MTX (gene by spot) matrices into one MTX matrix
  9 | 
 10 | + **`do_splicing_quantification`** Run splicing quantification with velocyto. The pipeline also sorts by cell barcodes the BAM file produced by Space Ranger.
 11 | 
 12 | + **`do_snv_extract`** Run the BAF extraction sub-workflow to get bulk-level SNV.
 13 | 
 14 | + **`reference_genome`** Path to the reference genome to use for Space Ranger reads alignment in one-reference analysis route. See https://support.10xgenomics.com/single-cell-gene-expression/software/release-notes/build for Space Ranger requirements of the reference genomes.
 15 | 
 16 | + **`mouse_reference_genome`** Path to the mouse reference genome for Space Ranger reads alignment in two-reference analysis route.
 17 | 
 18 | + **`human_reference_genome`** Path to the human reference genome for Space Ranger reads alignment in two-reference analysis route.
 19 | 
 20 | + **`deconvolution_reference_graft`** Path to a graft (e.g., human) reference genome (e.g., *.fa, *.fna, *.fa.gz, *.fna.gz) to build xenome or xengsort indices. If the indices supplied in `nextflow.config` already exits, then this parameter is ignored.
 21 | 
 22 | + **`deconvolution_reference_host`** Path to a host (e.g., mouse) reference genome (e.g., *.fa, *.fna, *.fa.gz, *.fna.gz) to build xenome or xengsort indices. If the indices supplied in `nextflow.config` already exits, then this parameter is ignored.
 23 | 
 24 | + **`deconvolution_kmer_size`** K-mer size for building xenome or xengsort indices. See https://github.com/data61/gossamer/blob/master/docs/xenome.md for a detailed description.
 25 | 
 26 | + **`deconvolution_indices_path`** Path to save deconvolution indices.
 27 | 
 28 | + **`deconvolution_indices_name`** Name of the indices.
 29 | 
 30 | + **`xengsort_n`** Xengsort-specific parameter. See https://gitlab.com/genomeinformatics/xengsort for details.
 31 | 
 32 | 
 33 | ##### See https://github.com/akdess/BAFExtract for the description of the following filtering parameters:
 34 | 
 35 | + **`bafextract_minimum_mapping_quality`** 
 36 | 
 37 | + **`bafextract_minimum_base_quality`** 
 38 | 
 39 | + **`bafextract_min_coverage_per_SNV`** 
 40 | 
 41 | + **`bafextract_min_MAF_covg_per_SNV`** 
 42 | 
 43 | + **`bafextract_min_MAF`** 
 44 | 
 45 | 
 46 | ### Imaging analysis parameters
 47 | 
 48 | + **`do_img_subworkflow`** Run the imaging sub-workflow to generate imaging and nuclear morphometric features for each spot on the grid.
 49 | 
 50 | + **`short_workflow`** Run short imaging workflow instead of the full imaging workflow. See config for details.
 51 | 
 52 | + **`do_imaging_anndata`** Create an AnnData object (e.g., for use with Scanpy) from the *.csv.gz data file with imaging and nuclear morphometric features
 53 | 
 54 | + **`do_nuclear_sementation`** Perform nuclear segmentation (use either HoVer-Net or StarDist to segment nuclei) of the entire WSI.
 55 | 
 56 | + **`target_mpp`** desired image resolution for scaling the images. Note that specific DL and ML models require full-resolution images, and the supplied pre-trained models are designed for images with a resolution of around 0.25 (mpp). In case a low-magnification image is supplied (e.g., mpp is 0.5) while target_mpp is 0.25, the image is upsampled and will have doubled dimensions.
 57 | 
 58 | + **`tiled_tiff_tile_size`** The TIFF WSI is internally stored in blocks (for memory management). The tile size determines the block size. This parameter is not the size of tiles used for feature extraction or segmentation aggregation. The grid parameter `grid_spot_diamter` (in micrometers) and resolution parameter `target_mpp` define the scaled image tile size.
 59 | 
 60 | + **`thumbnail_downsample_factor`** A factor used to reduce the WSI dimensions to create a low-resolution slide representation.
 61 | 
 62 | + **`check_focus`** Run DeepFocus module to assess focus (blurryness) of the whole slide image.
 63 | 
 64 | + **`deepfocus_model_path`** Path to DeepFocus checkpoint to use.
 65 | 
 66 | 
 67 | 
 68 | + **`stain_normalization`** Whether to do any stain or color normalization.
 69 | 
 70 | + **`stainnet`** Path to checkpoint for stain normalization model.
 71 | 
 72 | + **`macenko_normalization`** If true, then use Macenko stain normalization. If false, use StainNet color normalization. This parameter is ignored if `stain_normalization` is false.
 73 | 
 74 | + **`stain_reference_image`** Reference image (or a small patch, e.g., 2000 by 2000 pixels) to use with Macenko stain normalization.
 75 | 
 76 | + **`stain_patch_size`** Macenco stain normalization patch size.
 77 | 
 78 | 
 79 | + **`mask_background_cutoff`** Parameter for detecting image background with HoVer-Net.
 80 | 
 81 | + **`pixel_mask_threshold_low`** Parameter for detecting tissue pixels on the low-resolution image.
 82 | 
 83 | + **`pixel_mask_threshold_high`** Parameter for detecting tissue pixels on the low-resolution image.
 84 | 
 85 | + **`fraction_for_mask`** Fraction of pixels in tissue required to call tile in tissue.
 86 | 
 87 | 
 88 | + **`use_provided_grid`** Whether to use the grid provided in the input sample sheet. If false and no Space Ranger alignment is done, then a new grid of tiles is generated based on the grid parameters.
 89 | 
 90 | + **`grid_type`** Type of the grid of tiles to generate. it can be hex, square, or random.
 91 | 
 92 | + **`grid_spot_diamter`** Diameter of the spot (dimension of a tile) in micrometers.
 93 | 
 94 | + **`grid_spot_horizontal_spacing`** Horizontal center-to-center distance between adjacent spots (or tiles).
 95 | 
 96 | + **`grid_aspect_correction`** Factor to correct Visium slide aspect ratio.
 97 | 
 98 | 
 99 | + **`overlap_scale_factor`** Imaging features extraction parameter. If the factor is 1, then features are extracted from the tile of the ST spot dimension.
100 |  
101 | 
102 | + **`hovernet_segmentation`** Do HoVer-Net segmetation. If false do StarDist segmentation. 
103 | 
104 | + **`nuclei_segmentation_dir`** name of directory to save segmentation.
105 | 
106 | + **`hovernet_batch_size`** Parameter of HoVer-Net segmetation. This parameter is ignored when segmentation is done with StarDist.
107 | 
108 | + **`hovernet_num_inference_workers`** Parameter of HoVer-Net segmetation. This parameter is ignored when segmentation is done with StarDist.
109 | 
110 | + **`hovernet_chunk_size`** Parameter of HoVer-Net segmetation. This parameter is ignored when segmentation is done with StarDist.
111 | 
112 | + **`hovernet_tile_size`** Parameter of HoVer-Net segmetation. This parameter is ignored when segmentation is done with StarDist.
113 | 
114 | + **`stardist_model`** Path to checkpoint of stardist model.
115 | 
116 | + **`stardist_block_size`** Size of the image block to run segmentation. Blocks are merged internally at the end of segmentation.
117 | 
118 | + **`stardist_expand_size`** Size of cytoplasm arouhd nucleus in pixels.
119 | 
120 | 
121 | 
122 | + **`hovernet_spot_assignment_factor`** Used for either HoVer-Net or StarDist segmentation postprocessing. Scaling factor of the boundary limiting the inclusion of nuclei to an ST spot. A value of 1 means the boundary size equals ST spot size.
123 | 
124 | + **`hovernet_spot_assignment_shape`** Used for either HoVer-Net or StarDist segmentation postprocessing. The shape of the boundary, either square or disk.
125 | 
126 | + **`hovernet_min_cell_type_prob`** Used for either HoVer-Net or StarDist segmentation postprocessing. This filtering parameteris used to remove nuclei assigned with low confidence.
127 | 
128 | 
129 | + **`extract_tile_features`** Extract (generate) imaging features for all tiles.
130 | 
131 | + **`extract_inception_features`** If `extract_tile_features` then do Inception V3 features.
132 | 
133 | + **`extract_transpath_features`** If `extract_tile_features` then do TransPath features.
134 | 
135 | + **`extract_uni_features`** If `extract_tile_features` then do UNI features.
136 | 
137 | + **`extract_conch_features`** If `extract_tile_features` then do CONCH features.
138 | 
139 | + **`transpath_features_model`** One of 'CTransPath' or 'MoCoV3'.
140 | 
141 | + **`use_conch_normalizer`** Use specialized CONCH normalizer, instead of the standard normalizer used with UNI and CTransPath.
142 | 
143 | + **`uni_model_checkpoint`** Path to downloaded CONCH checkpoint. Download requires registration https://huggingface.co/MahmoodLab/UNI/blob/main/pytorch_model.bin.
144 | 
145 | + **`conch_model_checkpoint`** Path to downloaded CONCH checkpoint. Download requires registration https://huggingface.co/MahmoodLab/CONCH/blob/main/pytorch_model.bin.
146 | 
147 | 
148 | 
149 | + **`do_superpixels`** Do superpixel segmentation using SNIC algorithm.
150 | 
151 | + **`export_superpixels_contours`** If true, export superpixel contours in JSON format.
152 | 
153 | + **`superpixel_compactness`** Superpixel compactness parameter, see details of SNIC algorithm.
154 | 
155 | + **`pixels_per_segment`** Number of pixels per superpixel segment, i.e., superpixel size.
156 | 
157 | + **`superpixel_patch_size`** Superpixel patch size. Warning: patches boundaries are kept flat.
158 | 
159 | + **`superpixel_downsampling_factor`** Superpixel downsampling factor for the input image downsampling .
160 | 
161 | + **`od_block_size`** Block size for OD calculation.
162 | 
163 | + **`expand_nuclei_distance`** Distance in pixels to expand the nuclei mask.
164 | 
165 | 
166 | 
167 | 
168 | + **`export_image`** Export the resized and normalized image in OME-TIFF format.
169 | 
170 | + **`export_image_metadata`** Export input image metadata in OME-XML format.
171 | 
172 | + **`compression`** Compression library to use with OME-TIFF, e.g., 'LZW'.
173 | 
174 | 
175 | + **`downsample_expanded_tile`** Downsample expanded tile.
176 | 
177 | + **`expansion_factor`** Tile is read from expanded area around the tile center.
178 | 
179 | + **`subtiling`** If true, split tile into subtiles, then extract features and compute average across the subtiles.
180 | 
181 | + **`subcoords_factor`** Factor that defines the size of subtiles.
182 | 
183 | + **`subcoords_list`** Centers of the subtiles within a tile.
184 | 
185 | 
186 | 
187 | + **`do_clustering`** Do dimensionality reduction and clustering. Generate spatial and UMAP plots of imaging feature clusters as well as nucler morphometric features and classification results.
188 | 
189 | + **`expansion_factor_for_clustering`** Features of the specified expansion factor are used for clustering.
190 | 
191 | + **`suffix_for_clustering`** Features of this type are used for clustering.
192 | 
193 | + **`plot_dpi`** DPI (dots per inch) of the figures.
194 | 
195 | 
196 | 
197 | + **`hovernet_device_mode`** GPU or CPU device for use with HoVer-Net.
198 | 
199 | + **`ctranspath_device_mode`** GPU or CPU device for use with TransPath inference models.
200 | 
201 | 
202 | 
203 | + **`sample_tiles_subworkflow`** Run a subworkflow where a small number of tiles is saved, along with the HoVer-Net classification data.
204 | 
205 | + **`tiles_per_slide`** Number of randomly selected tiles to use in the sampling tiles subworkflow.
206 | 
207 | 
208 | 
209 | + **`do_segmentation_anndata`**   DEPRECATED parameter, will be removed in future.
210 | 


--------------------------------------------------------------------------------
/conf/analysis-img.config:
--------------------------------------------------------------------------------
  1 | 
  2 | params {
  3 | 
  4 |     short_workflow                   = false
  5 | 
  6 |     stain_normalization              = true
  7 |     do_nuclear_segmentation          = true
  8 |     check_focus                      = true
  9 |     extract_tile_features            = true
 10 |     sample_tiles_subworkflow         = true
 11 |     do_segmentation_anndata          = true
 12 |     do_imaging_anndata               = true
 13 |     export_image                     = true
 14 |     export_image_metadata            = true
 15 |     do_superpixels                   = false
 16 | 
 17 |     // Short workflow skips: ROI extraction, stain normalization, image formatting and resizing, export of image and its metadata
 18 |     if (params.short_workflow) {
 19 |         do_nuclear_segmentation      = false
 20 |         check_focus                  = false
 21 |         extract_tile_features        = true
 22 |         sample_tiles_subworkflow     = false
 23 |         do_imaging_anndata           = true
 24 |         do_superpixels               = false
 25 |     }
 26 |     
 27 |     target_mpp                       = 0.25
 28 |     
 29 |     compression                      = 'LZW'
 30 |     tiled_tiff_tile_size             = 1024
 31 |     thumbnail_downsample_factor      = 0.05
 32 |     
 33 |     mask_background_cutoff           = 210.0
 34 |    
 35 |     pixel_mask_threshold_low         = 100
 36 |     pixel_mask_threshold_high        = 200
 37 |     
 38 |     use_provided_grid                = true
 39 |         
 40 |     grid_type                        = 'square' // 'hex' 'square'
 41 |     grid_spot_diamter                = 56 // visium=65 // 56
 42 |     grid_spot_horizontal_spacing     = 56 // visium=100 // 112
 43 |     grid_aspect_correction           = 0.95
 44 |     
 45 |     fraction_for_mask                = 0.1
 46 |     
 47 |     do_clustering                    = true
 48 | 
 49 |     downsample_expanded_tile         = true
 50 |     expansion_factor                 = [1, 2, 3, 4] // [1, 2, 3, 4] if downsampling, then this factor will be rounded up to integer 1.25 -> 2
 51 |     subtiling                        = false
 52 |     subcoords_factor                 = 4
 53 |     subcoords_list                   = '[[1, 1], [3, 1], [2, 2], [1, 3], [3, 3]]'
 54 | 
 55 |     if (params.do_clustering || params.do_imaging_anndata) {
 56 |         expansion_factor_for_clustering  = 1 // one of expansion_factor
 57 |         suffix_for_clustering            = 'uni' // 'ctranspath' or 'inception' or 'uni' or 'conch'
 58 |         plot_dpi                         = 300
 59 |     }
 60 |     
 61 |     deepfocus_model_path             = "/projects/chuang-lab/USERS/domans/dev-focus/retrained-t0t1/" // "/deepfocus/" // "/projects/chuang-lab/USERS/domans/dev-focus/retrained-t0t1/"
 62 |     
 63 |     // Request access on HugginFace and download the checkpoints
 64 |     // UNI checkpoint: https://huggingface.co/MahmoodLab/UNI https://huggingface.co/MahmoodLab/UNI/blob/main/pytorch_model.bin
 65 |     // CONCH checkpoint: https://huggingface.co/MahmoodLab/CONCH https://huggingface.co/MahmoodLab/CONCH/blob/main/pytorch_model.bin
 66 |     uni_model_checkpoint             = "/projects/chuang-lab/USERS/domans/containers/private/pytorch_model_uni_11_06_2024.bin"
 67 |     conch_model_checkpoint           = "/projects/chuang-lab/USERS/domans/containers/private/pytorch_model_conch_11_06_2024.bin"
 68 |     use_conch_normalizer             = true
 69 | 
 70 |     if (params.extract_tile_features) {
 71 |         // *.tsv.gz features are posted in './features/'
 72 | 
 73 |         extract_transpath_features   = false
 74 |         extract_mocov3_features      = false
 75 |         extract_inception_features   = false
 76 |         extract_uni_features         = true
 77 |         extract_conch_features       = false
 78 |     }
 79 | 
 80 |     if (params.stain_normalization) {
 81 |         macenko_normalization         = true
 82 |         
 83 |         if (params.macenko_normalization) {
 84 |             stain_reference_image     = "/projects/chuang-lab/USERS/domans/containers/bronchus.tif"
 85 |             stain_patch_size          = 512
 86 |         }
 87 |         else {
 88 |             stainnet                  = "/projects/chuang-lab/USERS/domans/containers/StainNet-Public_layer3_ch32.pth"
 89 |         }
 90 |     }
 91 | 
 92 | 
 93 |     if (params.sample_tiles_subworkflow) {        
 94 |         hovernet_device_mode              = "gpu"
 95 |         hovernet_batch_size               = 1
 96 |         hovernet_num_inference_workers    = 1
 97 |         hovernet_spot_assignment_factor   = 1
 98 |         hovernet_spot_assignment_shape    = 'square'
 99 |         hovernet_min_cell_type_prob       = 0.75
100 |         hovernet_chunk_size               = 4096
101 |         hovernet_tile_size                = 1024        
102 |     }
103 | 
104 | 
105 |     if (params.do_nuclear_segmentation) {
106 |         nuclei_segmentation_dir       = "nucseg"
107 |         
108 |         hovernet_segmentation         = false
109 |         
110 |         if (params.hovernet_segmentation) {
111 |             hovernet_device_mode      = "gpu"
112 |             hovernet_spot_assignment_factor   = 1
113 |             hovernet_spot_assignment_shape    = 'square'
114 |             hovernet_min_cell_type_prob       = 0.75
115 | 
116 |             if (params.hovernet_device_mode == 'gpu') {
117 |                 hovernet_batch_size               = 32
118 |                 hovernet_num_inference_workers    = 8
119 |                 hovernet_chunk_size               = 10000
120 |                 hovernet_tile_size                = 2048
121 |             }
122 |             else if (params.hovernet_device_mode == 'cpu') {
123 |                 hovernet_batch_size               = 1
124 |                 hovernet_num_inference_workers    = 1
125 |                 hovernet_chunk_size               = 4096
126 |                 hovernet_tile_size                = 1024
127 |             }    
128 |         }
129 |         else {
130 |             stardist_model            = "/projects/chuang-lab/USERS/domans/containers/stardist-models.v0.1/python_2D_versatile_he/"
131 |             stardist_block_size       = 4096
132 |             stardist_expand_size      = 15
133 |         }
134 |     }
135 | 
136 | 
137 |     if (params.do_superpixels) {
138 |         export_superpixels_contours      = true
139 |         
140 |         superpixel_compactness           = 1
141 |         pixels_per_segment               = 10000
142 |         superpixel_patch_size            = 2048
143 |         superpixel_downsampling_factor   = 4
144 |         
145 |         od_block_size                    = 4096
146 |         
147 |         expand_nuclei_distance           = 15
148 |     }
149 |     
150 |     if (params.sample_tiles_subworkflow) {
151 |         tiles_per_slide                   = 100
152 |     }
153 | }
154 | 


--------------------------------------------------------------------------------
/conf/analysis-one.config:
--------------------------------------------------------------------------------
 1 | 
 2 | params {
 3 | 
 4 |     reference_genome                         = "/projects/chuang-lab/USERS/domans/reference/refdata-gex-GRCh38-and-mm10-2020-A"
 5 |     
 6 |     do_splicing_quantification               = true
 7 |     do_snv_extract                           = true
 8 |     do_img_subworkflow                       = true
 9 | 
10 |     if (params.do_snv_extract) {
11 |         bafextract_minimum_mapping_quality   = 50
12 |         bafextract_minimum_base_quality      = 0
13 |         bafextract_min_coverage_per_SNV      = 20
14 |         bafextract_min_MAF_covg_per_SNV      = 4
15 |         bafextract_min_MAF                   = 0.1
16 |     }
17 | }
18 | 


--------------------------------------------------------------------------------
/conf/analysis-pancreas.config:
--------------------------------------------------------------------------------
 1 | params {
 2 | 
 3 |     stain_normalization               = true
 4 |     do_nuclear_sementation            = true
 5 |     do_superpixels                    = true
 6 |     export_superpixels_contours       = true
 7 |     check_focus                       = false
 8 |     do_imaging_anndata                = true
 9 | 
10 |     target_mpp                        = 0.2208187960959237
11 |     thumbnail_downsample_factor       = 0.025
12 | 
13 |     if (params.stain_normalization) {
14 |         macenko_normalization         = true
15 |         
16 |         if (params.macenko_normalization) {
17 |             stain_reference_image     = "/sdata/activities/kappsen-tmc/visium/cropped_capture_area/SC2300701_JDC-WP-008-b_patch.tiff"
18 |         }
19 |     }
20 | 
21 |     if (params.do_nuclear_sementation) {
22 |         hovernet_segmentation         = true
23 |         
24 |         if (params.hovernet_segmentation) {
25 |             hovernet_device_mode      = "gpu"
26 |             
27 |             if (params.hovernet_device_mode == 'gpu') {
28 |                 hovernet_batch_size               = 32
29 |                 hovernet_num_inference_workers    = 8
30 |             }
31 |             else if (params.hovernet_device_mode == 'cpu') {
32 |                 hovernet_batch_size               = 1
33 |                 hovernet_num_inference_workers    = 1  
34 |             }    
35 |         }
36 |         else {
37 |             stardist_block_size       = 4096
38 |             stardist_expand_size      = 15
39 |         }
40 |     }
41 | 
42 |     if (params.do_superpixels) {
43 |         superpixel_compactness        = 1
44 |         pixels_per_segment            = 10000
45 |         superpixel_patch_size         = 2048
46 |         superpixel_downsampling_factor= 4
47 |         
48 |         expand_nuclei_distance        = 15
49 |     }
50 | 
51 | }
52 | 


--------------------------------------------------------------------------------
/conf/analysis-two.config:
--------------------------------------------------------------------------------
 1 | 
 2 | params {
 3 | 
 4 |     mouse_reference_genome                   = "/projects/chuang-lab/USERS/domans/reference/refdata-gex-mm10-2020-A"
 5 |     human_reference_genome                   = "/projects/chuang-lab/USERS/domans/reference/refdata-gex-GRCh38-2020-A"
 6 | 
 7 |     deconvolution_tool                       = "xengsort" // "xengsort", "xenome"
 8 | 
 9 |     deconvolution_reference_graft            = "/projects/chuang-lab/USERS/domans/reference/GCA_009914755.4_T2T-CHM13v2.0_genomic.fna.gz"
10 |     deconvolution_reference_host             = "/projects/churchill-lab/resource/Custom_Genomes/R84-REL1505/NOD_ShiLtJ/NOD_ShiLtJ.fa"
11 |     deconvolution_kmer_size                  = 35
12 |     
13 |     if (params.deconvolution_tool == "xenome") {
14 |         deconvolution_indices_path           = "/projects/chuang-lab/PDXnet/xenome/indices/nod/t2t-k35"
15 |         deconvolution_indices_name           = "t2t_k35"
16 |     }
17 |     else if (params.deconvolution_tool == "xengsort") {
18 |         deconvolution_indices_path           = "/projects/chuang-lab/PDXnet/xengsort/indices/nod/t2t-k25"
19 |         deconvolution_indices_name           = "t2t_k25"
20 |         deconvolution_kmer_size              = 25
21 |         xengsort_n                           = "4_500_000_000"
22 |     }
23 | 
24 |     do_splicing_quantification               = true
25 |     do_snv_extract                           = true
26 |     do_img_subworkflow                       = true
27 | 
28 |     if (params.do_snv_extract) {
29 |         bafextract_minimum_mapping_quality   = 50
30 |         bafextract_minimum_base_quality      = 0
31 |         bafextract_min_coverage_per_SNV      = 20
32 |         bafextract_min_MAF_covg_per_SNV      = 4
33 |         bafextract_min_MAF                   = 0.1
34 |     }
35 | 
36 |     do_merge_mtx                             = true
37 | }
38 | 


--------------------------------------------------------------------------------
/conf/containers.config:
--------------------------------------------------------------------------------
 1 | 
 2 | params {
 3 | 
 4 |     container_dir              = "/projects/chuang-lab/USERS/domans/containers"
 5 | 
 6 |     container_inception        = "${params.container_dir}/container-mamba-inception.sif"
 7 |     container_hovernet         = "${params.container_dir}/local/container-singularity-hovernet-py.sif"
 8 |     container_stainnet         = "${params.container_dir}/container-singularity-stainnet.sif"
 9 |     container_staintools       = "${params.container_dir}/container-singularity-staintools.sif"
10 |     container_vips             = "${params.container_dir}/container-singularity-vips.sif"
11 | 
12 |     container_uni_conch       = "${params.container_dir}/hf-uni-conch.sif"
13 |     container_ctranspath       = "${params.container_dir}/local/mamba-timm.sif"
14 |     container_deepfocus        = "${params.container_dir}/deepfocus.sif"
15 |     container_ome              = "${params.container_dir}/local/ome.sif"
16 | 
17 |     container_xenome           = "/projects/compsci/omics_share/meta/containers/quay.io-jaxcompsci-xenome-1.0.1.img"
18 | 
19 |     container_xengsort         = "${params.container_dir}/local/mamba-xenomake.sif"
20 |     container_fastqtools       = "${params.container_dir}/container-singularity-fastqtools.sif"
21 |     container_spaceranger      = "${params.container_dir}/container-singularity-spaceranger.sif"
22 | 
23 |     container_bafextract       = "${params.container_dir}/container-singularity-bafextract.sif"
24 |     container_samtools         = "${params.container_bafextract}"
25 | 
26 |     container_python           = "${params.container_dir}/container-singularity-python.sif"
27 |     container_velocyto         = "${params.container_dir}/container-singularity-velocyto.sif"
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/docs/BAF_extract_scheme.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/BAF_extract_scheme.png


--------------------------------------------------------------------------------
/docs/Example_CPU_usage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/Example_CPU_usage.png


--------------------------------------------------------------------------------
/docs/Scheme NF2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/Scheme NF2.png


--------------------------------------------------------------------------------
/docs/Scheme_NF3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/Scheme_NF3.png


--------------------------------------------------------------------------------
/docs/example ST wsi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/example ST wsi.png


--------------------------------------------------------------------------------
/docs/example non-ST wsi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/example non-ST wsi.png


--------------------------------------------------------------------------------
/docs/flow-static.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/flow-static.png


--------------------------------------------------------------------------------
/docs/flow.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/flow.gif


--------------------------------------------------------------------------------
/docs/hovernet-tissue-mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/hovernet-tissue-mask.png


--------------------------------------------------------------------------------
/docs/imaging-clustering.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/imaging-clustering.png


--------------------------------------------------------------------------------
/docs/mones-per-tile.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/mones-per-tile.png


--------------------------------------------------------------------------------
/docs/multiscale-features.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/multiscale-features.png


--------------------------------------------------------------------------------
/docs/route-map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/route-map.png


--------------------------------------------------------------------------------
/docs/sub-tiling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/docs/sub-tiling.png


--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheJacksonLaboratory/STQ/0148f7f1f0d4d8ccbca808fd9452a777ec1673b8/lib/__init__.py


--------------------------------------------------------------------------------
/lib/superpixels.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import json
 3 | import gzip
 4 | import numpy as np
 5 | import cv2
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | def plot_spx_contours(all_contours, figsize=(15, 15)):
 9 |     fig, ax = plt.subplots(figsize=figsize)
10 |     for c in all_contours.keys():
11 |         xp = []
12 |         yp = []
13 |         for sub_contour in all_contours[c]:
14 |             x = np.array(sub_contour).T[0].tolist()
15 |             y = np.array(sub_contour).T[1].tolist()
16 |             xp += x + [x[0], None]
17 |             yp += y + [y[0], None]
18 |         ax.plot(xp, yp, '-o', ms=0, lw=2, label=c)
19 |     if len(all_contours.keys()) < 10:
20 |         plt.legend()
21 |     ax.set_aspect('equal')
22 |     ax.axis('off')
23 |     plt.show()
24 |     return
25 | 
26 | def get_countours_from_mask(superpixelation):
27 | 
28 |     '''
29 |     get_countours_from_mask(np.array([[0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
30 |                                       [0, 0, 2, 2, 2, 2, 2, 2, 2, 8, 8, 8],
31 |                                       [0, 0, 2, 2, 5, 5, 2, 2, 2, 8, 8, 8],
32 |                                       [0, 0, 2, 2, 5, 5, 2, 2, 2, 8, 8, 8],
33 |                                       [0, 0, 2, 2, 2, 2, 2, 2, 2, 8, 8, 8],
34 |                                       [0, 0, 2, 2, 2, 1, 1, 1, 1, 1, 8, 8],
35 |                                       [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 3, 3],
36 |                                       [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 3, 3],
37 |                                       [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 3, 3],
38 |                                       [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 3, 3],
39 |                                       [0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3],
40 |                                       [0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3]]))
41 |     '''
42 | 
43 |     raw_contours = cv2.findContours(superpixelation + 1, cv2.RETR_FLOODFILL, cv2.CHAIN_APPROX_SIMPLE)
44 |     all_contours = dict()
45 |     for c in range(len(raw_contours[0])):
46 |         cid = str(superpixelation[raw_contours[0][c][0][0][1], raw_contours[0][c][0][0][0]])
47 |         if not cid in all_contours.keys():
48 |             all_contours.update({cid: []})
49 |         new_contour = raw_contours[0][c][:, 0, :].tolist()
50 |         all_contours.update({cid: all_contours[cid] + [new_contour]})
51 |     
52 |     print('Created %s contours:' % len(all_contours))
53 |     
54 |     return all_contours
55 | 
56 | def save_contours(all_contours, filename='contours.json.gz'):
57 | 
58 |     with gzip.GzipFile(filename, 'w') as tempfile:
59 |         tempfile.write(json.dumps(all_contours).encode('utf-8'))
60 |     
61 |     return


--------------------------------------------------------------------------------
/lib/wsiGrid.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """Written by S.Domanskyi, 2022
  3 | 
  4 | Module designed to generate a grid of centers of tiles from Whole Slide Image (WSI)
  5 | Examples of usage below:
  6 | 
  7 | # Load the module
  8 | import lib.wsiGrid as wsiGrid
  9 | 
 10 | # List module components
 11 | dir(wsiGrid)
 12 | 
 13 | # Get help on all module functions
 14 | help(wsiGrid)
 15 | 
 16 | # Generate and plot Visium-like hexagonal grid
 17 | slide_dimensions = 2900, 3060
 18 | grid, tile_size = wsiGrid.getGrid(*slide_dimensions, grid_type='hex')
 19 | wsiGrid.plotGrid(grid, *slide_dimensions, size=tile_size)
 20 | 
 21 | # Generate and plot Slide-seq-like random grid, save plot to current working directory
 22 | slide_dimensions = 2900, 3060
 23 | grid, tile_size = wsiGrid.getGrid(*slide_dimensions, grid_type='hex')
 24 | grid, tile_size = wsiGrid.perturbGrid(*slide_dimensions, grid, tile_size, delta=0.1)
 25 | wsiGrid.plotGrid(grid, *slide_dimensions, size=tile_size, savepath='', show=False)
 26 | 
 27 | # Generate and plot square grid, specify magnification and spot diameter
 28 | slide_dimensions = 3000, 1100
 29 | grid, tile_size = wsiGrid.getGrid(*slide_dimensions, grid_type='square', magnification=20, spot_diamter=55)
 30 | wsiGrid.plotGrid(grid, *slide_dimensions, size=tile_size)
 31 | 
 32 | # enerate and plot large Visium-like hexagonal grid, save to file to current working directory
 33 | slide_dimensions = 29000, 30600
 34 | grid, tile_size = wsiGrid.getGrid(*slide_dimensions, savepath='')
 35 | wsiGrid.plotGrid(grid, *slide_dimensions, size=tile_size, show_spot_labels=False, savepath='')
 36 | """
 37 | 
 38 | import os
 39 | import json
 40 | 
 41 | from matplotlib import cm
 42 | import matplotlib.pyplot as plt
 43 | import matplotlib.patheffects as path_effects
 44 | from matplotlib.patches import Circle, Rectangle
 45 | from matplotlib.collections import PatchCollection
 46 | 
 47 | import pandas as pd
 48 | import numpy as np
 49 | 
 50 | def getGrid(x: int, y: int, grid_type: str = 'hex', factor: float = 64/39, magnification: float = 40.0,
 51 |                resolution: float = 294/65, spot_diamter: float = 65, spot_horizontal_spacing: float = 100,
 52 |                aspect_correction: float = 0.95, savepath: str = None, sname: str = ''):
 53 |     
 54 |     """Generate grid of tile centers
 55 | 
 56 |     Parameters:
 57 |     x: full resolution image width
 58 | 
 59 |     y: full resolution image height
 60 | 
 61 |     grid_type: ['hex', 'square']
 62 | 
 63 |     factor: Visium Spatial Gene Expression hex grid factor
 64 | 
 65 |     magnification: image magnification
 66 | 
 67 |     resolution: pixels per micron of sample at 40x magnification
 68 | 
 69 |     spot_diamter: spot diameter in microns
 70 | 
 71 |     spot_horizontal_spacing: spot horizontal center-to-center distance in microns
 72 | 
 73 |     aspect_correction: Visium capture area is not square, even though officially it is 6.5x6.5mm
 74 | 
 75 |     savepath: directory to save data files
 76 | 
 77 |     sname: identifier for saving data files
 78 | 
 79 |     Output:
 80 |     grid: pandas.DataFrame
 81 | 
 82 |     tile_size_pixels: tile size
 83 |     """
 84 |     
 85 |     tile_size_pixels = resolution * spot_diamter * magnification / 40.0
 86 |     tile_horizontal_spacing_pixels = resolution * spot_horizontal_spacing * magnification / 40.0
 87 |     if grid_type=='hex':
 88 |         tile_vertical_spacing_pixels = 0.5 * factor * tile_horizontal_spacing_pixels / aspect_correction
 89 |     elif grid_type=='square':
 90 |         tile_vertical_spacing_pixels = tile_horizontal_spacing_pixels
 91 |     else:
 92 |         raise NotImplementedError
 93 |       
 94 |     if not savepath is None:
 95 |         if not os.path.exists(savepath):
 96 |             os.makedirs(savepath)
 97 |             
 98 |         info_dict = {'grid_type': grid_type, 'factor': factor, 'magnification': magnification,
 99 |                      'resolution': resolution, 'aspect_correction': aspect_correction,
100 |                      'spot_diamter': spot_diamter, 'spot_horizontal_spacing': spot_horizontal_spacing,
101 |                      'spot_diameter_fullres': tile_size_pixels, 'x': x, 'y': y}
102 |         with open(savepath + '%s.json' % sname, 'w') as outfile:        
103 |             outfile.write(json.dumps(info_dict))
104 |     
105 |     nx = int(np.ceil(x / tile_horizontal_spacing_pixels))
106 |     ny = int(np.ceil(y / tile_vertical_spacing_pixels))
107 |     
108 |     _grid = [['in_tissue', 'array_row', 'array_col', 'pxl_row_in_fullres', 'pxl_col_in_fullres']]
109 |     for i in range(nx):
110 |         for j in range(ny):
111 |             temp_x = tile_size_pixels/2. + i * tile_horizontal_spacing_pixels
112 |             if grid_type=='hex':
113 |                 if j % 2 == 1:
114 |                     temp_x += 0.5 * tile_horizontal_spacing_pixels
115 |             temp_x = int(temp_x)
116 |             temp_y = int(tile_size_pixels/2. + j * tile_vertical_spacing_pixels)
117 |             if (temp_x + tile_size_pixels/2. <= x) and (temp_y + tile_size_pixels/2. <= y):
118 |                 _grid.append([1, j, i, temp_y, temp_x])
119 |                 
120 |     _grid = pd.DataFrame(columns=np.array(_grid[0]), data=np.array(_grid[1:]))
121 |     _grid.index = 'tile-' + (_grid.index+1).astype(str).str.pad(8, fillchar='0')
122 |     _grid.index.name = 'barcode'
123 |     
124 |     if not savepath is None:
125 |         if not os.path.exists(savepath):
126 |             os.makedirs(savepath)
127 |             
128 |         _grid.to_csv(savepath + '%s.csv' % sname, header=False)
129 |     
130 |     return _grid, tile_size_pixels
131 | 
132 | def perturbGrid(x, y, grid, tile_size_pixels, n_iterations: int = 5, delta: float = 0.5, seed: int = None, dmax: float = 7., verbose: int = 1):
133 |     
134 |     """Random perturbations of the grid produce grid limilar to Slide-Seq ST technology
135 | 
136 |     Parameters:
137 |     x: full resolution image width
138 | 
139 |     y: full resolution image height
140 | 
141 |     grid: produced by function getGrid
142 | 
143 |     tile_size_pixels: produced by function getGrid
144 | 
145 |     n_iterations: number of iterations to randomly perturb the drid
146 | 
147 |     delta: fraction of the tile size that the tile can be displaced along x or y at most in one move
148 | 
149 |     seed: random seed to have reproducible perturbation
150 | 
151 |     dmax: max neighbor distance, number of tile sizes away from the spot center
152 | 
153 |     verbose: set to 0 to suppress print output
154 | 
155 |     Output:
156 |     grid: pandas.DataFrame
157 | 
158 |     tile_size_pixels: tile size
159 |     """
160 |     
161 |     if not seed is None:
162 |         np.random.seed(seed)
163 |         
164 |     x_col_ind = np.where(grid.columns=='pxl_col_in_fullres')[0][0]
165 |     y_col_ind = np.where(grid.columns=='pxl_row_in_fullres')[0][0]
166 |     
167 |     if verbose >= 1:
168 |         print('\tComuting neigbors of each spot with dmax: %s' % dmax)
169 |     se = pd.Series(index=range(len(grid)), dtype='object')
170 |     v = grid[['pxl_row_in_fullres', 'pxl_col_in_fullres']].values
171 |     for _i, (_y, _x) in enumerate(v):
172 |         nn = set(np.where((((v.T[0] - _y)**2 + (v.T[1] - _x)**2)**0.5) < (dmax * tile_size_pixels))[0])
173 |         se[_i] = nn.difference({_i})
174 |         
175 |     for iter in range(n_iterations):
176 |         if verbose >= 1:
177 |             print('Iteration: %s' % iter)
178 |         for _i in range(len(grid)):
179 |             _x = grid.iloc[_i, x_col_ind]
180 |             _y = grid.iloc[_i, y_col_ind]
181 |             
182 |             _p = (np.random.rand(2) - 0.5) * delta * tile_size_pixels
183 |             
184 |             nviolations = 0
185 |             
186 |             nn = np.array(list(se[_i]))
187 |             if len(nn) > 0:
188 |                 vx = grid.iloc[nn, x_col_ind].values
189 |                 vy = grid.iloc[nn, y_col_ind].values
190 |                 d = ((vy - _y + _p[0])**2 + (vx - _x + _p[1])**2)**0.5
191 |                 nviolations += len(set(np.where(d < tile_size_pixels)[0]))
192 | 
193 |             if ((_y - _p[0]) > (y - 0.5 * tile_size_pixels)) or ((_y - _p[0]) < (0.5 * tile_size_pixels)):
194 |                 nviolations += 1
195 | 
196 |             if ((_x - _p[1]) > (x - 0.5 * tile_size_pixels)) or ((_x - _p[1]) < (0.5 * tile_size_pixels)):
197 |                 nviolations += 1
198 | 
199 |             if nviolations==0:
200 |                 grid.iloc[_i, y_col_ind] -= _p[0]
201 |                 grid.iloc[_i, x_col_ind] -= _p[1]
202 |                 
203 |     return grid, tile_size_pixels
204 | 
205 | def plotGrid(grid: pd.DataFrame, x: int, y: int, f: float = 3000, object_shape: str = 'spot', size: int = 294, show_spot_labels: bool = True, show: bool = True, savepath: str = None, sname: str = '', verbose: int = 1):
206 | 
207 |     
208 |     """Plot grid of tiles or spots
209 | 
210 |     Parameters:
211 |     x: full resolution image width
212 | 
213 |     y: full resolution image height
214 | 
215 |     f: figure scaling factor
216 | 
217 |     object_shape: ['spot', 'square', compatible <matplotlib.patches> object]
218 | 
219 |     size: tile height and width, or spot diameter
220 | 
221 |     show_spot_labels: display spot labels
222 | 
223 |     savepath: directory to save data files
224 | 
225 |     sname: identifier for saving data files
226 | 
227 |     verbose: set to 0 to suppress print output
228 | 
229 |     Output:
230 |     None
231 |     """
232 | 
233 |     fig, ax = plt.subplots(figsize=(x/f, y/f))
234 |     ax.set_xlim(0, x)
235 |     ax.set_ylim(0, y)
236 |     ax.axis('off')
237 |     ax.set_aspect('equal')
238 |     ax.set_ylim(ax.get_ylim()[::-1])
239 |     ax.plot([0, 0, x, x, 0], [y, 0, 0, y, y], color='k')
240 |     ax.scatter(0, 0, marker='+', s=1000, c='crimson', clip_on=False)
241 |     v = grid[['pxl_row_in_fullres', 'pxl_col_in_fullres']].values
242 | 
243 |     if object_shape == 'spot':
244 |         ax.add_collection(PatchCollection([Circle((x1, y1), size/2) for y1, x1 in v], alpha=0.9, color='gray'))
245 |     elif object_shape == 'square':
246 |         ax.add_collection(PatchCollection([Rectangle((x1-size/2, y1-size/2), size, size) for y1, x1 in v], alpha=0.9, color='gray'))
247 |     else:
248 |         raise NotImplementedError
249 |     
250 |     if show_spot_labels:
251 |         for i, (y1, x1) in enumerate(v):
252 |             ax.text(x1, y1, i, va='center', ha='center')
253 | 
254 |     fig.tight_layout()
255 |     
256 |     if not savepath is None:
257 |         if not os.path.exists(savepath):
258 |             os.makedirs(savepath)
259 |     
260 |         fig.savefig(savepath + '%s.png' % sname, facecolor='w')
261 | 
262 |     if show:
263 |         plt.show()
264 |     else:
265 |         plt.close(fig)
266 | 
267 |     return


--------------------------------------------------------------------------------
/lib/wsiMask.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """Written by S.Domanskyi, 2022
  3 | 
  4 | Module designed to generate a mask for a given grid of centers of tiles from Whole Slide Image (WSI).
  5 | Generate updated grid containing mask values, plot mask and low resolution image.
  6 | 
  7 | Examples of usage below:
  8 | 
  9 | # Load the module
 10 | import lib.wsiMask as wsiMask
 11 | 
 12 | # List module components
 13 | dir(wsiMask)
 14 | 
 15 | # Get help on all module functions
 16 | help(wsiMask)
 17 | 
 18 | ## Generate in tissue mask
 19 | wsiMask.getInTissueMask(grid_csv='grid_sample.csv',
 20 |                         grid_json='grid_sample.json,
 21 |                         low_res_image='image_sample.tiff',
 22 |                         show=True, savepath='', sname='sample');
 23 | """
 24 | 
 25 | import os
 26 | import json
 27 | import pandas as pd
 28 | import numpy as np
 29 | 
 30 | from matplotlib import cm
 31 | import matplotlib.pyplot as plt
 32 | import matplotlib.patheffects as path_effects
 33 | from matplotlib.patches import Circle, Rectangle
 34 | from matplotlib.collections import PatchCollection
 35 | 
 36 | import cv2
 37 | from skimage.transform import resize
 38 | from scipy.ndimage import binary_fill_holes
 39 | from skimage.draw import disk
 40 | 
 41 | def plotMask(df, width: int = None, height: int = None, size: float = None, 
 42 |              image = None, figdim = 10, object_shape: str = 'spot', spot_alpha: float = 0.4,
 43 |              savepath: str = None, sname: str = '', show: bool = True):
 44 | 
 45 |     """Plot mask as square tiles or disks/spots
 46 |     
 47 |     Parameters:
 48 |     df: grid produced by function getGrid of wsiGrid module
 49 | 
 50 |     width: full resolution image width
 51 | 
 52 |     height: full resolution image height
 53 | 
 54 |     size: value produced by function getGrid
 55 | 
 56 |     image: low resolution image 3D array
 57 | 
 58 |     figdim: image scale, the bigger the value, the large mask image will be
 59 | 
 60 |     object_shape: ['spot', 'square'] shape of patch to plot as mask
 61 | 
 62 |     spot_alpha: transparency of the patches
 63 | 
 64 |     savepath: directory to save data files
 65 | 
 66 |     sname: identifier for saving data files
 67 | 
 68 |     show: display the image, needs interactive backend
 69 | 
 70 |     Output:
 71 |     None
 72 |     """
 73 |     
 74 |     figdim *= max(width, height) / 30000
 75 |     
 76 |     fig, ax = plt.subplots(figsize=(figdim, figdim))
 77 |     ax.imshow(image, origin='lower', extent=(0, width, 0, height))
 78 |     
 79 |     v = df[['pxl_row_in_fullres', 'pxl_col_in_fullres']].loc[df['in_tissue']==1].values
 80 |     
 81 |     if object_shape == 'spot':
 82 |         ax.add_collection(PatchCollection([Circle((x1, y1), size/2) for y1, x1 in v], alpha=spot_alpha, color='k', edgecolor=None, linewidth=0))
 83 |     elif object_shape == 'square':
 84 |         ax.add_collection(PatchCollection([Rectangle((x1-size/2, y1-size/2), size, size) for y1, x1 in v], alpha=spot_alpha, color='k', edgecolor=None, linewidth=0))
 85 |     else:
 86 |         raise NotImplementedError
 87 |         
 88 |     ax.set_ylim(ax.get_ylim()[::-1])
 89 |     ax.set_aspect('equal')
 90 |     ax.axis('off')
 91 |     ax.plot([0, 0, width, width, 0], [0, height, height, 0, 0], linewidth=0.5, c='k', clip_on=False)
 92 |     
 93 |     fig.tight_layout()
 94 |     
 95 |     if not savepath is None:
 96 |         if not os.path.exists(savepath):
 97 |             os.makedirs(savepath)
 98 |             
 99 |         fig.savefig(savepath + '%s.png' % sname, dpi=150, facecolor='w')
100 | 
101 |     if show:
102 |         plt.show()
103 |     else:
104 |         plt.close(fig)
105 |     
106 |     return
107 |         
108 | def getInTissuePixelMask(low_res_image: str, low: float = 100, high: float = 200, savepath: str = None, sname: str = ''):
109 | 
110 |     """Plot mask as square tiles or disks/spots
111 |     
112 |     Parameters:
113 |     low_res_image: path to file with low resolution image
114 |     
115 |     low: low threshold
116 |     
117 |     high: high threshold
118 | 
119 |     savepath: directory to save data files
120 | 
121 |     sname: identifier for saving data files
122 | 
123 |     Output:
124 |     Pixel in tissue mask
125 |     """
126 |      
127 |     v = plt.imread(low_res_image)[:, :, :3].mean(axis=2)
128 | 
129 |     vc = v.copy()
130 |     v[vc < low] = 0
131 |     v[vc > high] = 0
132 |     v[(vc >= low) & (vc <= high)] = 1
133 |     
134 |     df = pd.DataFrame(v.T)
135 |     
136 |     if not savepath is None:
137 |         if not os.path.exists(savepath):
138 |             os.makedirs(savepath)
139 |             
140 |         df.to_csv(savepath + '%s.csv' % sname, header=False, index=False)
141 |     
142 |     return df
143 | 
144 | def getInTissueTileMask(pixel_mask_csv: str, grid_csv: str, grid_json: str, low_res_image: str, plot_mask: bool = True,
145 |                     fraction: float = 0.1, savepath: str = None, sname: str = '', show: bool = False):
146 | 
147 |     """Plot mask as square tiles or disks/spots
148 |     
149 |     Parameters:
150 |     grid_csv: csv file produced by function getGrid of wsiGrid module
151 | 
152 |     grid_json: json file produced by function getGrid of wsiGrid module
153 | 
154 |     low_res_image: path to file with low resolution image
155 | 
156 |     plot_mask: whether to make a plot with mask and low resolution image
157 | 
158 |     fraction: fraction of low resolution pixels in tissue to call patch in_tissue
159 | 
160 |     savepath: directory to save data files
161 | 
162 |     sname: identifier for saving data files
163 | 
164 |     show: display the image, needs interactive backend
165 | 
166 |     Output:
167 |     df_grid: grid of centers with updated mask column
168 |     """
169 |  
170 |     with open(grid_json) as f:
171 |         info_dict = json.load(f)
172 |     slide_fullres_width = info_dict['x']
173 |     slide_fullres_height = info_dict['y']
174 |     spot_diameter_fullres = info_dict['spot_diameter_fullres']
175 |     
176 |     img_RGB_high_res = plt.imread(low_res_image)[:, :, :3]
177 |     
178 |     scale_factor = 0.5 * (img_RGB_high_res.shape[0] / slide_fullres_height) + 0.5 * (img_RGB_high_res.shape[1] / slide_fullres_width)
179 |     
180 |     df_grid = pd.read_csv(grid_csv, header=None, index_col=0)
181 |     df_grid.columns = ['in_tissue', 'array_row', 'array_col', 'pxl_row_in_fullres', 'pxl_col_in_fullres']
182 |     df_grid.index.name = 'barcode'
183 |     
184 |     df_pixel_mask = pd.read_csv(pixel_mask_csv, index_col=None, header=None)
185 |     
186 |     for tile in df_grid.index[:]:
187 |         tile_x = int(df_grid.loc[tile]['pxl_col_in_fullres'] * scale_factor)
188 |         tile_y = int(df_grid.loc[tile]['pxl_row_in_fullres'] * scale_factor)
189 |         tile_half_size = int(spot_diameter_fullres * scale_factor / 2)
190 |         in_tissue = int(df_pixel_mask.iloc[tile_x - tile_half_size : tile_x + tile_half_size,
191 |                                 tile_y - tile_half_size : tile_y + tile_half_size].mean().mean() >= fraction)
192 |         df_grid.loc[tile, 'in_tissue'] = in_tissue
193 |         
194 |     if plot_mask:
195 |         plotMask(df_grid, width=slide_fullres_width, height=slide_fullres_height,
196 |                  size=spot_diameter_fullres, image=img_RGB_high_res[:, :, :3], 
197 |                  figdim=10, object_shape='square', savepath=savepath, sname=sname, show=show)
198 |         
199 |     if not savepath is None:
200 |         if not os.path.exists(savepath):
201 |             os.makedirs(savepath)
202 |         
203 |         df_grid['in_tissue'].to_csv(savepath + '%s.csv' % sname, header=False)
204 |         
205 |     return df_grid
206 | 
207 | def makeTissueMaskFromTileMask(gridFile, gridInfoFile, tileMaskFile, squarePatch=False, upSizeFactor=1.5, 
208 |                                downSizeChunkPx=1000, kernelSize=20, savePath='tissue_mask.png'):
209 | 
210 |     with open(gridInfoFile, 'r') as tempfile:
211 |         info = json.loads(tempfile.read())
212 |     s, x, y = int(info['spot_diameter_fullres']), info['x'], info['y']
213 |     print(s, x, y)
214 | 
215 |     se_mask = pd.read_csv(tileMaskFile, index_col=0, header=None)[1].rename(None)
216 | 
217 |     df_grid = pd.read_csv(gridFile, index_col=0, header=None)[[4, 5]]
218 |     df_grid.columns = ['x', 'y']
219 |     df_grid.index.name = None
220 | 
221 |     df_grid = df_grid.loc[se_mask[se_mask==1].index.values]
222 | 
223 |     downsampleFactor = int(np.ceil(max(x, y) / downSizeChunkPx))
224 |     
225 |     m = np.zeros((x, y), dtype=np.int8)[::downsampleFactor, ::downsampleFactor]
226 |     print(m.shape)
227 | 
228 |     maxxd = int(x / downsampleFactor) 
229 |     maxyd = int(y / downsampleFactor) 
230 | 
231 |     for ty, tx in df_grid.values:
232 |         xd = int(tx / downsampleFactor)
233 |         yd = int(ty / downsampleFactor)
234 |         radius = int(s * upSizeFactor / downsampleFactor)
235 |         if squarePatch:
236 |             m[xd-radius: xd+radius, yd-radius: yd+radius] = 1
237 |         else:
238 |             cc, rr = disk((xd, yd), radius)
239 |             cc[cc<0] = 0
240 |             cc[cc>maxxd] = maxxd-1
241 |             rr[rr<0] = 0
242 |             rr[rr>maxyd] = maxyd-1
243 |             try:
244 |                 m[cc, rr] = 1
245 |             except:
246 |                 pass
247 | 
248 |     m = m.T * 255
249 |     m = resize(m, (int(np.round(y/downsampleFactor, 0)), int(np.round(x/downsampleFactor, 0))), order=3)
250 |     m[m>=m.max()/2] = 255
251 |     m[m<m.max()/2] = 0
252 | 
253 |     kernel = np.ones((kernelSize, kernelSize), dtype=np.uint8)
254 |     m = cv2.dilate(m, kernel, iterations=1)
255 |     m = binary_fill_holes(m).astype(np.uint8)
256 |     m = cv2.erode(m, kernel, iterations=1)
257 | 
258 |     print(m.T.shape)
259 |     
260 |     cv2.imwrite(savePath, m * 255)
261 |     
262 |     return savePath
263 | 


--------------------------------------------------------------------------------
/main.nf:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env nextflow
 2 | 
 3 | nextflow.enable.dsl = 2
 4 | 
 5 | include { ARB } from './workflows/arbitrary_grid'
 6 | include { ONE } from './workflows/one_reference'
 7 | include { TWO } from './workflows/two_references'
 8 | include { XINDEX } from './subworkflows/xenome_index'
 9 | 
10 | include { EXPORT_PARAMETERS;
11 |           EXPORT_SAMPLEINFO;
12 |         } from './modules/local/load'
13 | 
14 | workflow {
15 | 
16 |     Channel
17 |     .from(file(params.input))
18 |     .splitCsv(header:true, sep:',')
19 |     .map( { it -> [ (it.sample), it ] } )
20 |     .set{ samples }
21 |     
22 |     EXPORT_PARAMETERS ()
23 |     EXPORT_SAMPLEINFO ( samples )
24 | 
25 |     if ( params.workflow == "arbitrary_grid" ) {
26 |         ARB ( samples )
27 |     }
28 | 
29 |     if ( params.workflow == "one_reference" ) {
30 |         ONE ( samples )
31 |     }
32 | 
33 |     if ( params.workflow == "two_references" ) {
34 |         TWO ( samples )
35 |     }
36 | 
37 |     if ( params.workflow == "deconvolution_indices" ) {
38 |         if ( params.deconvolution_tool == "xenome" ) {
39 |             if ( !file("${params.deconvolution_indices_path}/${params.deconvolution_indices_name}-both.kmers.low-bits.lwr").exists() ) {
40 |                 XINDEX ( )
41 |             }
42 |         }
43 |         else if ( params.deconvolution_tool == "xengsort" ) {
44 |             if ( !file("${params.deconvolution_indices_path}/${params.deconvolution_indices_name}-xind.hash").exists() ) {
45 |                 XINDEX ( )
46 |             }        
47 |         }
48 |     }
49 | 
50 | }
51 | 


--------------------------------------------------------------------------------
/modules/local/bafextract.nf:
--------------------------------------------------------------------------------
 1 | 
 2 | process GET_REFERENCE_PILEUP {
 3 | 
 4 |     tag "$reference"
 5 |     label "bafextract"
 6 | 
 7 |     input:
 8 |     path(reference)
 9 |     
10 |     output:
11 |     tuple file("sizes.list"), file("pileup/*")
12 |     
13 |     script:    
14 |     """
15 |     mkdir pileup
16 | 
17 |     /BAFExtract/bin/BAFExtract -preprocess_FASTA ${reference}/fasta/genome.fa pileup
18 |     
19 |     cut -f1,2 ${reference}/fasta/genome.fa.fai > sizes.list 
20 |     """
21 | }
22 | 
23 | 
24 | process GET_PILEUP_OF_BAM {
25 | 
26 |     tag "$sample_id"
27 |     label "bafextract"
28 | 
29 |     input:
30 |     tuple val(sample_id), path(bam)
31 |     tuple path(sizes), path(pileup)
32 |     
33 |     output:
34 |     tuple val(sample_id), file("bam_pileup/*")
35 |     
36 |     script:    
37 |     """
38 |     mkdir bam_pileup
39 | 
40 |     if [[ ${bam[0]} == *".bam.bai"* ]]; then
41 |         bbam=${bam[1]}
42 |     else
43 |         bbam=${bam[0]}
44 |     fi
45 | 
46 |     samtools view \${bbam} | /BAFExtract/bin/BAFExtract -generate_compressed_pileup_per_SAM stdin ${sizes} bam_pileup ${params.bafextract_minimum_mapping_quality} ${params.bafextract_minimum_base_quality}
47 |     """
48 | }
49 | 
50 | 
51 | process GET_SNV_FROM_PILEUP {
52 | 
53 |     tag "$sample_id"
54 |     label "bafextract"
55 |     publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: params.overwrite_files_on_publish
56 | 
57 |     input:
58 |     tuple val(sample_id), path(bam_pileup)
59 |     tuple path(sizes), path(pileup)
60 |     val(species)
61 |     
62 |     output:
63 |     tuple val(sample_id), file("${species}/extracted.baf")
64 |     
65 |     script:    
66 |     """
67 |     mkdir ref_pileup
68 |     
69 |     for f in ${pileup}
70 |     do
71 |         if [[ \${f} == *" "* ]]
72 |         then
73 |             newf="`cut -d ' ' -f 1 <<< "\$f"`.bin"
74 |             cp "\${f}" "ref_pileup/\${newf}"
75 |         else
76 |             cp "\${f}" "ref_pileup/\${f}"
77 |         fi
78 |     done
79 |       
80 |     mkdir bam_pileup
81 |     cp ${bam_pileup} bam_pileup
82 |     
83 |     mkdir "${species}"
84 |     
85 |     /BAFExtract/bin/BAFExtract -get_SNVs_per_pileup ${sizes} bam_pileup ref_pileup ${params.bafextract_min_coverage_per_SNV} ${params.bafextract_min_MAF_covg_per_SNV} ${params.bafextract_min_MAF} "${species}/extracted.baf"
86 |     
87 |     rm -R ref_pileup
88 |     rm -R bam_pileup
89 |     """
90 | }
91 | 


--------------------------------------------------------------------------------
/modules/local/deconvolution.nf:
--------------------------------------------------------------------------------
  1 | 
  2 | process XENOME_GENERATE_INDEX {
  3 | 
  4 |     tag "${params.deconvolution_indices_name}"
  5 |     publishDir "${params.deconvolution_indices_path}", pattern: "${params.deconvolution_indices_name}-*", mode: 'copy', overwrite: false
  6 | 
  7 |     input:
  8 |     path host_fasta
  9 |     path graft_fasta
 10 |     val kmer_size
 11 |     
 12 |     output:
 13 |     path("${params.deconvolution_indices_name}-*"), emit: indices_path
 14 |     
 15 |     script:    
 16 |     """
 17 |     mkdir tempw
 18 |     
 19 |     /xenome-1.0.1-r/xenome index \
 20 |     --kmer-size ${kmer_size} \
 21 |     --prefix ${params.deconvolution_indices_name} \
 22 |     --tmp-dir tempw \
 23 |     --num-threads ${task.cpus} \
 24 |     --host "${host_fasta}" \
 25 |     --graft "${graft_fasta}" \
 26 |     --verbose \
 27 |     --max-memory 20
 28 |     """
 29 | }
 30 | 
 31 | process XENGSORT_GENERATE_INDEX {
 32 | 
 33 |     tag "${params.deconvolution_indices_name}"
 34 |     publishDir "${params.deconvolution_indices_path}", pattern: "${params.deconvolution_indices_name}-xind*", mode: 'copy', overwrite: false
 35 | 
 36 |     input:
 37 |     path host_fasta
 38 |     path graft_fasta
 39 |     val kmer_size
 40 |     
 41 |     output:
 42 |     path("${params.deconvolution_indices_name}-xind*"), emit: indices_path
 43 |     
 44 |     script:    
 45 |     """    
 46 |     xengsort index \
 47 |     -H "${host_fasta}" \
 48 |     -G "${graft_fasta}" \
 49 |     -n ${params.xengsort_n} \
 50 |     -k ${kmer_size} \
 51 |     -W ${task.cpus} \
 52 |     --index ${params.deconvolution_indices_name}-xind
 53 |     """
 54 | }
 55 | 
 56 | process DECONVOLUTION_XENOME {
 57 | 
 58 |     tag "$sample_id"
 59 |     publishDir "${params.outdir}/${sample_id}", pattern: '.command.out', saveAs: { filename -> "xenome.summary.txt" }, mode: 'copy', overwrite: true
 60 | 
 61 |     input:
 62 |     tuple val(sample_id), path(fastq)
 63 |     path(indices_path)
 64 |     val(indices_name)
 65 |     
 66 |     output:
 67 |     tuple val(sample_id), file("categorized/unsorted_human_{1,2}.fastq"), emit: human
 68 |     tuple val(sample_id), file("categorized/unsorted_mouse_{1,2}.fastq"), emit: mouse
 69 |     path(".command.out"), emit: summary
 70 |     
 71 |     script:    
 72 |     """
 73 |     mkdir categorized
 74 |     mkdir tmp
 75 | 
 76 |     /xenome-1.0.1-r/xenome classify \
 77 |     -T ${task.cpus} \
 78 |     -i ${fastq[0]} \
 79 |     -i ${fastq[1]} \
 80 |     --pairs \
 81 |     -P ${indices_path}/${indices_name} \
 82 |     --graft-name human \
 83 |     --host-name mouse \
 84 |     --output-filename-prefix categorized/unsorted \
 85 |     --tmp-dir tmp \
 86 |     --verbose
 87 |     """
 88 | }
 89 | 
 90 | process DECONVOLUTION_XENGSORT {
 91 | 
 92 |     tag "$sample_id"
 93 |     publishDir "${params.outdir}/${sample_id}", pattern: '.command.out', saveAs: { filename -> "xengsort.summary.txt" }, mode: 'copy', overwrite: params.overwrite_files_on_publish
 94 | 
 95 |     input:
 96 |     tuple val(sample_id), path(fastq)
 97 |     path(indices_path)
 98 |     val(indices_name)
 99 |     
100 |     output:
101 |     tuple val(sample_id), file("unsorted_human_{1,2}.fastq"), emit: human
102 |     tuple val(sample_id), file("unsorted_mouse_{1,2}.fastq"), emit: mouse
103 |     path(".command.out"), emit: summary
104 |     
105 |     script:    
106 |     """
107 |     xengsort classify \
108 |     --index "${indices_path}/${params.deconvolution_indices_name}-xind" \
109 |     --fastq ${fastq[1]} \
110 |     --out fastq \
111 |     --threads ${task.cpus} \
112 |     --chunksize 32.0 \
113 |     --compression none
114 | 
115 |     mv fastq-host.fq unsorted_mouse_2.fastq
116 |     mv fastq-graft.fq unsorted_human_2.fastq
117 |     
118 |     seqtk subseq ${fastq[0]} <(fgrep "@" unsorted_mouse_2.fastq | cut -d ' ' -f1 | cut -d '@' -f2) >> unsorted_mouse_1.fastq
119 |     seqtk subseq ${fastq[0]} <(fgrep "@" unsorted_human_2.fastq | cut -d ' ' -f1 | cut -d '@' -f2) >> unsorted_human_1.fastq
120 |     """
121 | }
122 | 
123 | process SORT_FASTQ {
124 | 
125 |     tag "$sample_id"
126 |     label "low_process"
127 | 
128 |     input:
129 |     tuple val(sample_id), path(fastq)
130 |     
131 |     output:
132 |     tuple val(sample_id), file("sorted_{1,2}.fastq")
133 |     
134 |     script:    
135 |     """
136 |     fastq-sort --id ${fastq[0]} > "sorted_1.fastq"
137 |     fastq-sort --id ${fastq[1]} > "sorted_2.fastq"
138 |     """
139 | }
140 | 


--------------------------------------------------------------------------------
/modules/local/focus.nf:
--------------------------------------------------------------------------------
 1 | 
 2 | process CHECK_FOCUS {
 3 | 
 4 |     tag "$sample_id"
 5 |     label 'process_focus'
 6 |     maxRetries 3
 7 |     errorStrategy  { task.attempt <= maxRetries  ? 'retry' : 'finish' }
 8 |     memory { 12.GB + (Float.valueOf(size) / 1000.0).round(2) * params.memory_scale_factor * 12.GB }
 9 |     publishDir "${params.outdir}/${sample_id}/focus", pattern: 'output/*', saveAs: { "${file(it).getFileName()}" }, mode: 'copy', overwrite: params.overwrite_files_on_publish
10 |     
11 |     input:
12 |     tuple val(sample_id), path(image), val(size)
13 |     
14 |     output:
15 |     tuple val(sample_id), file("output/*")
16 | 
17 |     script:    
18 |     """
19 |     [ ! -d "output" ] && mkdir "output"
20 |     
21 |     python3 /deepfocus/runDeepFocus.py --checkpoint-dir="${params.deepfocus_model_path}" --wsi-file="${image}" --output-dir="output/"
22 |     """
23 | }
24 | 


--------------------------------------------------------------------------------
/modules/local/gunzip.nf:
--------------------------------------------------------------------------------
 1 | 
 2 | process GUNZIP {
 3 | 
 4 |     tag "$sample_id"
 5 | 
 6 |     input:
 7 |     tuple val(sample_id), path(fastq)
 8 |     
 9 |     output:
10 |     tuple val(sample_id), file("*R{1,2}*.fastq")
11 |     
12 |     script:
13 |     """   
14 |     gzip -d -k ${fastq}
15 |     """
16 | }
17 | 
18 | 
19 | process GUNZIP_SEPARATE {
20 | 
21 |     tag "$sample_id"
22 | 
23 |     input:
24 |     tuple val(sample_id), path(fastq)
25 |     
26 |     output:
27 |     tuple val(sample_id), file("*_R1_*.fastq"), emit: R1
28 |     tuple val(sample_id), file("*_R2_*.fastq"), emit: R2
29 |     
30 |     script:
31 |     """   
32 |     gzip -d -k ${fastq}
33 |     """
34 | }
35 | 
36 | 
37 | process GUNZIP_FASTA {
38 | 
39 |     input:
40 |     path(fasta)
41 |     
42 |     output:
43 |     file("*{.fa,.fna}")
44 |     
45 |     script:
46 |     """   
47 |     gzip -d -k ${fasta}
48 |     """
49 | }
50 | 


--------------------------------------------------------------------------------
/modules/local/load.nf:
--------------------------------------------------------------------------------
 1 | 
 2 | import groovy.json.JsonOutput
 3 | 
 4 | process LOAD_SAMPLE_INFO {
 5 | 
 6 |     tag "$sample_id"
 7 | 
 8 |     input:
 9 |     tuple val(sample_id), val(meta), path(fastq), path(image)
10 |     
11 |     output:
12 |     tuple val(sample_id), file(image), emit: image
13 |     tuple val(sample_id), file("${fastq}/*R{1,2}*.fastq*"), emit: fastq
14 | 
15 |     script:    
16 |     """
17 |     """
18 | }
19 | 
20 | 
21 | process EXPORT_PARAMETERS {
22 | 
23 |     publishDir "${params.tracedir}", pattern: '{*.json}', mode: 'copy', overwrite: params.overwrite_files_on_publish
24 | 
25 |     output:
26 |     path 'parameters.json'
27 | 
28 |     script:
29 |     "echo '${JsonOutput.toJson(params)}' > parameters.json"
30 | }
31 | 
32 | 
33 | process EXPORT_SAMPLEINFO {
34 | 
35 |     publishDir "${params.outdir}/${sample_id}", pattern: '{*.json}', mode: 'copy', overwrite: params.overwrite_files_on_publish
36 | 
37 |     input:
38 |     tuple val(sample_id), val(meta)
39 |     
40 |     output:
41 |     path 'info.json'
42 | 
43 |     script:
44 |     "echo '${JsonOutput.toJson(meta)}' > info.json"
45 | }
46 | 


--------------------------------------------------------------------------------
/modules/local/merge.nf:
--------------------------------------------------------------------------------
  1 | 
  2 | process CONVERT_SEGMENTATION_DATA {
  3 | 
  4 |     tag "$sample_id"
  5 |     label 'python_low_process'
  6 |     maxRetries 1
  7 |     errorStrategy  { task.attempt <= maxRetries  ? 'retry' : 'finish' }
  8 |     publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: params.overwrite_files_on_publish
  9 |     memory { 1.GB + (Float.valueOf(size) / 1000.0).round(2) * params.memory_scale_factor * 3.GB }
 10 |     
 11 |     input:
 12 |     tuple val(sample_id), path(segmentation_csv), val(size)
 13 |     
 14 |     output:
 15 |     tuple val(sample_id), file("segmentation.h5ad")
 16 |     
 17 |     script:
 18 |     """
 19 |     #!/usr/bin/env python
 20 |     
 21 |     import os
 22 |     os.environ["NUMBA_CACHE_DIR"] = "./tmp"
 23 | 
 24 |     import pandas as pd
 25 |     import scanpy as sc
 26 | 
 27 |     df_temp = pd.read_csv("${segmentation_csv}", index_col=0, header=0).sort_index()
 28 |     df_temp.index.name = 'id'
 29 |     df_temp.insert(0, 'original_barcode', df_temp.index.values)
 30 | 
 31 |     ad = sc.AnnData(X=df_temp.loc[:, ~df_temp.columns.str.contains('original_barcode')],
 32 |                     obs=df_temp.loc[:, df_temp.columns.str.contains('original_barcode')])
 33 | 
 34 |     ad.write("segmentation.h5ad")
 35 |     """
 36 | }
 37 | 
 38 | 
 39 | process CONVERT_CSV_TO_ANNDATA {
 40 | 
 41 |     tag "$sample_id"
 42 |     label "python_low_process"
 43 |     publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: params.overwrite_files_on_publish
 44 | 
 45 |     input:
 46 |     tuple val(sample_id), path(data_csv), val(expansion_factor), val(suffix)
 47 | 
 48 |     output:
 49 |     tuple val(sample_id), file("img.data.${suffix}-${expansion_factor}.h5ad"), val(expansion_factor), val(suffix)
 50 |     
 51 |     script:
 52 |     """
 53 |     #!/usr/bin/env python
 54 |     
 55 |     import os
 56 |     os.environ["NUMBA_CACHE_DIR"] = "./tmp"
 57 | 
 58 |     import gc
 59 |     import pandas as pd
 60 |     import scanpy as sc
 61 | 
 62 |     df_temp = pd.read_csv("${data_csv}", index_col=[0,1]).xs(1, level='in_tissue')
 63 |     df_temp.insert(0, 'original_barcode', df_temp.index.values)
 64 | 
 65 |     ad = sc.AnnData(X=df_temp.loc[:, df_temp.columns.str.contains('feat')],
 66 |                     obs=df_temp.loc[:, ~df_temp.columns.str.contains('feat')])
 67 | 
 68 |     df_temp = None
 69 |     gc.collect()
 70 |         
 71 |     ad.write("img.data.${suffix}-${expansion_factor}.h5ad")
 72 |     """
 73 | }
 74 | 
 75 | 
 76 | process MERGE_MTX {
 77 | 
 78 |     tag "$sample_id"
 79 |     label "python_low_process"
 80 |     publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: params.overwrite_files_on_publish
 81 | 
 82 |     input:
 83 |     tuple val(sample_id), path('mtx_mouse/*'), path('mtx_human/*')
 84 | 
 85 |     output:
 86 |     tuple val(sample_id), file("raw_feature_bc_matrix/*")
 87 |     
 88 |     script:
 89 |         
 90 |     """    
 91 |     #!/usr/bin/env python
 92 | 
 93 |     import os
 94 |     os.environ[ 'NUMBA_CACHE_DIR' ] = './tmp/'
 95 |     
 96 |     import sys
 97 |     sys.path.append("${projectDir}/bin")   
 98 |     from mtx_tools import read_mtx_combine_and_write_mtx as combine
 99 |     from mtx_tools import read_sc_from_mtx as read
100 |     
101 |     combine(read('mtx_mouse/'), read('mtx_human/'), saveDataDir='raw_feature_bc_matrix/')
102 |     """
103 | }
104 | 
105 | 
106 | process RETURN_SEPARATE_MTX {
107 | 
108 |     tag "$sample_id"
109 |     label "python_low_process"
110 |     publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: true
111 | 
112 |     input:
113 |     tuple val(sample_id), path('mtx_mouse/*'), path('mtx_human/*')
114 | 
115 |     output:
116 |     tuple val(sample_id), file("raw_feature_bc_matrix_mouse/*"), file("raw_feature_bc_matrix_human/*")
117 |     
118 |     script:
119 |     """
120 |     [ ! -d "raw_feature_bc_matrix_mouse" ] && mkdir raw_feature_bc_matrix_mouse
121 |     cp -R mtx_mouse/* raw_feature_bc_matrix_mouse/
122 | 
123 |     [ ! -d "raw_feature_bc_matrix_human" ] && mkdir raw_feature_bc_matrix_human
124 |     cp -R mtx_human/* raw_feature_bc_matrix_human/
125 |     """
126 | }
127 | 
128 | 
129 | process RETURN_MTX {
130 | 
131 |     tag "$sample_id"
132 |     label "python_low_process"
133 |     publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: params.overwrite_files_on_publish
134 | 
135 |     input:
136 |     tuple val(sample_id), path('mtx/*')
137 | 
138 |     output:
139 |     tuple val(sample_id), file("raw_feature_bc_matrix/*")
140 |     
141 |     script:
142 |     """
143 |     [ ! -d "raw_feature_bc_matrix" ] && mkdir raw_feature_bc_matrix
144 |     cp -R mtx/* raw_feature_bc_matrix/
145 |     """
146 | }
147 | 


--------------------------------------------------------------------------------
/modules/local/ome.nf:
--------------------------------------------------------------------------------
 1 | 
 2 | process CONVERT_TO_PYRAMIDAL_OME {
 3 | 
 4 |     tag "$sample_id"
 5 |     label 'process_ome'
 6 |     maxRetries 1
 7 |     errorStrategy  { task.attempt <= maxRetries  ? 'retry' : 'finish' }
 8 |     publishDir "${params.outdir}/${sample_id}", pattern: 'image.ome.tiff', mode: 'copy', overwrite: params.overwrite_files_on_publish
 9 |     
10 |     input:
11 |     tuple val(sample_id), path(image)
12 |     
13 |     output:
14 |     tuple val(sample_id), file("image.ome.tiff")
15 | 
16 |     script:    
17 |     """
18 |     export BF_MAX_MEM=24g
19 |     export _JAVA_OPTIONS="-Xmx24g"
20 |     bfconvert -version
21 | 
22 |     bfconvert -noflat -bigtiff -overwrite \
23 |     -pyramid-resolutions 3 -pyramid-scale 4 -tilex ${params.tiled_tiff_tile_size} -tiley ${params.tiled_tiff_tile_size} \
24 |     -compression ${params.compression} "${image}" image.ome.tiff || \
25 |     bfconvert -noflat -bigtiff -overwrite \
26 |     -pyramid-resolutions 2 -pyramid-scale 4 -tilex ${params.tiled_tiff_tile_size} -tiley ${params.tiled_tiff_tile_size} \
27 |     -compression ${params.compression} "${image}" image.ome.tiff || \
28 |     bfconvert -noflat -bigtiff -overwrite \
29 |     -pyramid-resolutions 1 -pyramid-scale 4 -tilex ${params.tiled_tiff_tile_size} -tiley ${params.tiled_tiff_tile_size} \
30 |     -compression ${params.compression} "${image}" image.ome.tiff
31 |     """
32 | }
33 | 
34 | 
35 | process EXTRACT_IMAGE_METADATA {
36 | 
37 |     tag "$sample_id"
38 |     label 'process_ome'
39 |     maxRetries 1
40 |     errorStrategy  { task.attempt <= maxRetries  ? 'retry' : 'finish' }
41 |     publishDir "${params.outdir}/${sample_id}", pattern: 'metadata.ome.xml', mode: 'copy', overwrite: params.overwrite_files_on_publish
42 |     memory { 4.GB + (Float.valueOf(size) / 1000.0).round(2) * params.memory_scale_factor * 0.GB }
43 | 
44 |     input:
45 |     tuple val(sample_id), path(fileslide), path(roifile), val(mpp), val(size)
46 |     
47 |     output:
48 |     tuple val(sample_id), file("metadata.ome.xml")
49 | 
50 |     script:    
51 |     """
52 |     showinf -omexml-only -nopix ${fileslide} >> "metadata.ome.xml"
53 |     """
54 | }


--------------------------------------------------------------------------------
/modules/local/postprocessing.nf:
--------------------------------------------------------------------------------
  1 | 
  2 | process DIMRED_CLUSTER_MORPH {
  3 | 
  4 |     tag "$sample_id"
  5 |     label 'python_low_process'
  6 |     maxRetries 2
  7 |     errorStrategy  { task.attempt <= maxRetries  ? 'retry' : 'finish' }
  8 |     publishDir "${params.outdir}/${sample_id}/figures", pattern: 'figures/*/*.png', saveAs: { filename -> "${filename.split("/")[filename.split("/").length - 1]}" }, mode: 'copy', overwrite: params.overwrite_files_on_publish
  9 |     memory { 1.GB + (Float.valueOf(size) / 1000.0).round(2) * params.memory_scale_factor * 3.GB }
 10 |     
 11 |     input:
 12 |     tuple val(sample_id), path(grid_csv), path(grid_json), path(thumb), path(segmentation_csv), path(features_h5ad), val(expansion_factor), val(suffix)
 13 |     
 14 |     output:
 15 |     tuple val(sample_id), file("figures/*/*.png")
 16 | 
 17 |     script:
 18 |     """
 19 |     #!/usr/bin/env python
 20 |     
 21 |     import os
 22 |     os.environ["NUMBA_CACHE_DIR"] = "./tmp"
 23 | 
 24 |     import sys
 25 |     import json
 26 |     import numpy as np
 27 |     import pandas as pd
 28 |     import scanpy as sc
 29 |     import matplotlib.pyplot as plt
 30 |     
 31 |     plt.rcParams['figure.dpi'] = ${params.plot_dpi}
 32 |     
 33 |     def loadImFeatures(dpath):
 34 |         df_temp = pd.read_csv(dpath, index_col=[0,1], sep=',').xs(1, level='in_tissue')
 35 |         df_temp.insert(0, 'original_barcode', df_temp.index.values)
 36 |         ad = sc.AnnData(X=df_temp.loc[:, df_temp.columns.str.contains('feat')],
 37 |                         obs=df_temp.loc[:, ~df_temp.columns.str.contains('feat')])
 38 |         return ad
 39 |     
 40 |     def loadAdImage():
 41 |         thumbnail = plt.imread("${thumb}")
 42 |         with open("${grid_json}", 'r') as f:        
 43 |             d = json.load(f)
 44 |         grid = pd.read_csv("${grid_csv}", index_col=0, header=None)
 45 |         image = {'library_id': {'images': {'lowres': thumbnail},
 46 |                                     'metadata': {'chemistry_description': None, 'software_version': None},
 47 |                                     'scalefactors': {'tissue_lowres_scalef': thumbnail.shape[0]/d['y'],
 48 |                                                      'spot_diameter_fullres': d['spot_diameter_fullres']}}}, grid.index.values, grid[[5, 4]].values
 49 |         return image
 50 | 
 51 |     # Load data    
 52 |     ad = sc.read_h5ad("${features_h5ad}")
 53 |     df_temp = pd.read_csv("${segmentation_csv}", index_col=0, header=0).reindex(ad.obs.index)
 54 |     df_temp.index.name = 'id'
 55 |     ad.obs = pd.concat([ad.obs, df_temp], axis=1)
 56 |     
 57 |     # Load image
 58 |     image = loadAdImage()
 59 |     ad.uns['spatial'] = image[0]
 60 | 
 61 |     ad.obsm['spatial'] = pd.DataFrame(index=image[1], data=image[2]).reindex(ad.obs['original_barcode']).values
 62 |     
 63 |     # Morphometrics spatial plots
 64 |     cols1 = [None] + df_temp.columns[df_temp.columns.isin(['average_perimeter_length', 'average_area', 'average_eccentricity',
 65 |                                                   'average_orientation', 'average_cell_type_prob'])].values.tolist()
 66 |     cols2 = [None] + df_temp.columns[df_temp.columns.str.contains('count')].values.tolist()
 67 |     
 68 |     c, r = np.ptp(ad.obs['array_row']), np.ptp(ad.obs['array_col'])
 69 |     f = 5
 70 |     if r > c:
 71 |         figsize = f, f * c/r
 72 |     else:
 73 |         figsize = f * r/c, f
 74 |     
 75 |     if not os.path.exists('figures/show/'):
 76 |         os.makedirs('figures/show/')
 77 |     if not os.path.exists('figures/umap/'):
 78 |         os.makedirs('figures/umap/')
 79 | 
 80 |     spot_size = ad.uns['spatial']['library_id']['scalefactors']['spot_diameter_fullres']
 81 |     print(spot_size)
 82 |     
 83 |     spot_size *= ${params.grid_spot_horizontal_spacing} / ${params.grid_spot_diamter}
 84 |     print(spot_size)
 85 |     
 86 |     plt.rcParams["figure.figsize"] = figsize
 87 |     sc.pl.spatial(ad, img_key='lowres', color=cols1, spot_size=spot_size, cmap='rainbow', ncols=3, show=False, save='/spatial_plot_morphometric.png');
 88 |     sc.pl.spatial(ad, img_key='lowres', color=cols2, spot_size=spot_size, cmap='rainbow', ncols=3, show=False, save='/spatial_plot_classification.png');
 89 |     
 90 |     print(ad.obs)
 91 |     print(ad)
 92 |     
 93 |     # Dimensionality reduction
 94 |     sc.pp.highly_variable_genes(ad, flavor='seurat', n_top_genes=500)
 95 |     sc.pp.scale(ad)
 96 |     sc.pp.pca(ad, n_comps=min(30, ad.shape[0]-1), zero_center=False, use_highly_variable=True)
 97 |     sc.pp.neighbors(ad, use_rep='X_pca')
 98 |     sc.tl.umap(ad)
 99 |     
100 |     # Clustering
101 |     res = 0.5
102 |     sc.tl.leiden(ad, key_added='cluster', resolution=res)
103 |     plt.rcParams["figure.figsize"] = figsize
104 |     sc.pl.spatial(ad, img_key='lowres', color=[None, 'cluster'], spot_size=spot_size, show=False, save='/cluster.png');
105 |     
106 |     # UMAP plots
107 |     plt.rcParams["figure.figsize"] = (3,3)
108 |     sc.pl.umap(ad, color=['cluster'], s=None, show=False, save='/umap_plot_cluster.png');
109 |     sc.pl.umap(ad, color=['cluster'] + cols1, s=None, ncols=3, show=False, save='/umap_plot_morphometric.png');
110 |     sc.pl.umap(ad, color=['cluster'] + cols2, s=None, ncols=3, show=False, save='/umap_plot_classification.png');
111 |     """
112 | }
113 | 
114 | 
115 | process DIMRED_CLUSTER {
116 | 
117 |     tag "$sample_id"
118 |     label 'python_low_process'
119 |     maxRetries 2
120 |     errorStrategy  { task.attempt <= maxRetries  ? 'retry' : 'finish' }
121 |     publishDir "${params.outdir}/${sample_id}/figures", pattern: 'figures/*/*.png', saveAs: { filename -> "${filename.split("/")[filename.split("/").length - 1]}" }, mode: 'copy', overwrite: params.overwrite_files_on_publish
122 |     memory { 1.GB + (Float.valueOf(size) / 1000.0).round(2) * params.memory_scale_factor * 3.GB }
123 |     
124 |     input:
125 |     tuple val(sample_id), path(grid_csv), path(grid_json), path(thumb), path(features_h5ad), val(expansion_factor), val(suffix)
126 |     
127 |     output:
128 |     tuple val(sample_id), file("figures/*/*.png")
129 | 
130 |     script:
131 |     """
132 |     #!/usr/bin/env python
133 |     
134 |     import os
135 |     os.environ["NUMBA_CACHE_DIR"] = "./tmp"
136 | 
137 |     import sys
138 |     import json
139 |     import numpy as np
140 |     import pandas as pd
141 |     import scanpy as sc
142 |     import matplotlib.pyplot as plt
143 |     
144 |     plt.rcParams['figure.dpi'] = ${params.plot_dpi}
145 |     
146 |     def loadImFeatures(dpath):
147 |         df_temp = pd.read_csv(dpath, index_col=[0,1], sep=',').xs(1, level='in_tissue')
148 |         df_temp.insert(0, 'original_barcode', df_temp.index.values)
149 |         ad = sc.AnnData(X=df_temp.loc[:, df_temp.columns.str.contains('feat')],
150 |                         obs=df_temp.loc[:, ~df_temp.columns.str.contains('feat')])
151 |         return ad
152 |     
153 |     def loadAdImage():
154 |         thumbnail = plt.imread("${thumb}")
155 |         with open("${grid_json}", 'r') as f:        
156 |             d = json.load(f)
157 |         grid = pd.read_csv("${grid_csv}", index_col=0, header=None)
158 |         image = {'library_id': {'images': {'lowres': thumbnail},
159 |                                     'metadata': {'chemistry_description': None, 'software_version': None},
160 |                                     'scalefactors': {'tissue_lowres_scalef': thumbnail.shape[0]/d['y'],
161 |                                                      'spot_diameter_fullres': d['spot_diameter_fullres']}}}, grid.index.values, grid[[5, 4]].values
162 |         return image
163 | 
164 |     # Load data    
165 |     ad = sc.read_h5ad("${features_h5ad}")
166 |     
167 |     # Load image
168 |     image = loadAdImage()
169 |     ad.uns['spatial'] = image[0]
170 | 
171 |     ad.obsm['spatial'] = pd.DataFrame(index=image[1], data=image[2]).reindex(ad.obs['original_barcode']).values
172 |     
173 |     c, r = np.ptp(ad.obs['array_row']), np.ptp(ad.obs['array_col'])
174 |     f = 5
175 |     if r > c:
176 |         figsize = f, f * c/r
177 |     else:
178 |         figsize = f * r/c, f
179 |     
180 |     if not os.path.exists('figures/show/'):
181 |         os.makedirs('figures/show/')
182 |     if not os.path.exists('figures/umap/'):
183 |         os.makedirs('figures/umap/')
184 |     
185 |     plt.rcParams["figure.figsize"] = figsize
186 |     
187 |     print(ad.obs)
188 |     print(ad)
189 |     
190 |     spot_size = ad.uns['spatial']['library_id']['scalefactors']['spot_diameter_fullres']
191 |     print(spot_size)
192 |     
193 |     spot_size *= ${params.grid_spot_horizontal_spacing} / ${params.grid_spot_diamter}
194 |     print(spot_size)
195 | 
196 |     # Dimensionality reduction
197 |     sc.pp.highly_variable_genes(ad, flavor='seurat', n_top_genes=500)
198 |     sc.pp.scale(ad)
199 |     sc.pp.pca(ad, n_comps=min(30, ad.shape[0]-1), zero_center=False, use_highly_variable=True)
200 |     sc.pp.neighbors(ad, use_rep='X_pca')
201 |     sc.tl.umap(ad)
202 |     
203 |     # Clustering
204 |     res = 0.5
205 |     sc.tl.leiden(ad, key_added='cluster', resolution=res)
206 |     plt.rcParams["figure.figsize"] = figsize
207 |     sc.pl.spatial(ad, img_key='lowres', color=[None, 'cluster'], spot_size=spot_size, show=False, save='/cluster.png');
208 |     
209 |     # UMAP plots
210 |     plt.rcParams["figure.figsize"] = (3,3)
211 |     sc.pl.umap(ad, color=['cluster'], s=None, show=False, save='/umap_plot_cluster.png');
212 |     """
213 | }
214 | 
215 | 


--------------------------------------------------------------------------------
/modules/local/spaceranger.nf:
--------------------------------------------------------------------------------
 1 | 
 2 | process SPACERANGER {
 3 | 
 4 |     tag "$sample_id"
 5 | 
 6 |     input:
 7 |     tuple val(sample_id), path(fastq), path(image)
 8 |     path(reference)
 9 |     
10 |     output:
11 |     tuple val(sample_id), file("sample/outs/spatial/*"), emit: spatial
12 |     tuple val(sample_id), file("sample/outs/raw_feature_bc_matrix/*"), emit: mtx
13 |     tuple val(sample_id), file("sample/outs/possorted_genome_bam.bam*"), emit: bam
14 |     tuple val(sample_id), file("sample/outs/*summary*"), emit: metrics
15 |     
16 |     script:
17 |     String mem = task.memory
18 |     String memgb = mem.split(" ")[0]    
19 |     """   
20 |     tempfastqdir="temp"
21 |     mkdir \${tempfastqdir}
22 |     cp ${fastq[0]} \${tempfastqdir}/sample_S1_L001_R1_001.fastq
23 |     cp ${fastq[1]} \${tempfastqdir}/sample_S1_L001_R2_001.fastq
24 |     
25 |     spaceranger count \
26 |     --id=sample \
27 |     --sample=sample \
28 |     --fastqs="\${tempfastqdir}" \
29 |     --image=${image} \
30 |     --transcriptome=${reference} \
31 |     --unknown-slide \
32 |     --localcores=${task.cpus} \
33 |     --localmem=${memgb}
34 |     
35 |     rm -R \${tempfastqdir}
36 |     """
37 | }
38 | 
39 | process RETURN_SPACERANGER_ALIGNMENT {
40 | 
41 |     tag "$sample_id"
42 |     publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: params.overwrite_files_on_publish
43 | 
44 |     input:
45 |     tuple val(sample_id), file("in_mouse/*"), file("in_human/*"), file("in_spatial/*")
46 |     
47 |     output:
48 |     tuple val(sample_id), path("mouse/*"), path("human/*"), path("spatial/*")
49 |     
50 |     script:
51 |     """
52 |     cp -R in_mouse/ mouse/
53 |     cp -R in_human/ human/
54 |     cp -R in_spatial/ spatial/
55 |     """
56 | }
57 | 
58 | 
59 | process RETURN_SPACERANGER_ALIGNMENT_SINGLE {
60 | 
61 |     tag "$sample_id"
62 |     publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: params.overwrite_files_on_publish
63 | 
64 |     input:
65 |     tuple val(sample_id), file("in_spacerangerqc/*"), file("in_spatial/*")
66 |     
67 |     output:
68 |     tuple val(sample_id), path("spacerangerqc/*"), path("spatial/*")
69 |     
70 |     script:
71 |     """
72 |     cp -R in_spacerangerqc/ spacerangerqc/
73 |     cp -R in_spatial/ spatial/
74 |     """
75 | }
76 | 


--------------------------------------------------------------------------------
/modules/local/superpixel.nf:
--------------------------------------------------------------------------------
  1 | 
  2 | process SUPERPIXELATION {
  3 | 
  4 |     tag "$sample_id"
  5 |     label 'process_inception'
  6 |     maxRetries 1
  7 |     errorStrategy  { task.attempt <= maxRetries  ? 'retry' : 'finish' }
  8 |     memory { 6.GB + (Float.valueOf(size) / 1000.0).round(2) * params.memory_scale_factor * 12.GB }
  9 |     publishDir "${params.outdir}/${sample_id}/superpixels", pattern: "superpixelation_*.png", mode: 'copy', overwrite: params.overwrite_files_on_publish
 10 |     cpus 1
 11 |     
 12 |     input:
 13 |     tuple val(sample_id), path(image), val(size)
 14 |     
 15 |     output:
 16 |     tuple val(sample_id), file("segmentation.npy"), emit: main
 17 |     tuple val(sample_id), file("superpixelation_*.png"), emit: images optional true
 18 | 
 19 |     script:    
 20 |     """
 21 |     python -u "${projectDir}/bin/superpixelation.py" \
 22 |     --inputImagePath "${image}" \
 23 |     --segmentationSavePath "segmentation.npy" \
 24 |     --pixelsPerSegment ${params.pixels_per_segment} \
 25 |     --compactness ${params.superpixel_compactness} \
 26 |     --s ${params.superpixel_patch_size} \
 27 |     --downsamplingFactor ${params.superpixel_downsampling_factor}
 28 |     """
 29 | }
 30 | 
 31 | 
 32 | process EXPORT_DOWN_IMAGE_FOR_CONTOURS {
 33 | 
 34 |     tag "$sample_id"
 35 |     label 'process_inception'
 36 |     maxRetries 1
 37 |     errorStrategy  { task.attempt <= maxRetries  ? 'retry' : 'finish' }
 38 |     memory { 6.GB + (Float.valueOf(size) / 1000.0).round(2) * params.memory_scale_factor * 5.GB }
 39 |     publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: params.overwrite_files_on_publish
 40 |     cpus 1
 41 |     
 42 |     input:
 43 |     tuple val(sample_id), path(image), val(size)
 44 |     
 45 |     output:
 46 |     tuple val(sample_id), file("im_down.tiff")
 47 | 
 48 |     script:    
 49 |     """
 50 |     #!/usr/bin/env python
 51 |     import tifffile
 52 |     import numpy as np
 53 |     
 54 |     # Convert image to numpy array to remove OME TIFF metadata
 55 |     f = ${params.superpixel_downsampling_factor}
 56 |     img = np.array(tifffile.imread("${image}"))[::f, ::f, :3]
 57 |     print(img.shape)
 58 | 
 59 |     print('Saving downsampled image', flush=True)
 60 |     tifffile.imwrite("im_down.tiff", img, bigtiff=True)
 61 |     """
 62 | }
 63 | 
 64 | 
 65 | process EXPORT_SUPERPIXELATION_CONTOURS {
 66 | 
 67 |     tag "$sample_id"
 68 |     label 'process_inception'
 69 |     maxRetries 1
 70 |     errorStrategy  { task.attempt <= maxRetries  ? 'retry' : 'finish' }
 71 |     memory { 6.GB + (Float.valueOf(size) / 1000.0).round(2) * params.memory_scale_factor * 3.GB }
 72 |     publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: params.overwrite_files_on_publish
 73 |     cpus 1
 74 |     
 75 |     input:
 76 |     tuple val(sample_id), path(superpixelation), val(size)
 77 |     
 78 |     output:
 79 |     tuple val(sample_id), file("superpixelation.json.gz")
 80 | 
 81 |     script:    
 82 |     """
 83 |     #!/usr/bin/env python
 84 |     import numpy as np
 85 |     
 86 |     import sys
 87 |     sys.path.append("${projectDir}/lib")
 88 |     from superpixels import get_countours_from_mask, save_contours
 89 |     
 90 |     with open("${superpixelation}", 'rb') as tempfile:
 91 |         superpixelation = np.load(tempfile)
 92 |     print('Superpixelation mask shape:', superpixelation.shape)
 93 |     
 94 |     print('Computing contours', flush=True)
 95 |     contours = get_countours_from_mask(superpixelation)
 96 |     
 97 |     print('Saving contours', flush=True)
 98 |     save_contours(contours, filename='superpixelation.json.gz')
 99 |     """
100 | }
101 | 
102 | 
103 | process CALCULATE_CELLS_OD {
104 | 
105 |     tag "$sample_id"
106 |     label 'process_inception'
107 |     maxRetries 0
108 |     errorStrategy  { task.attempt <= maxRetries  ? 'retry' : 'finish' }
109 |     memory { 3.GB + (Float.valueOf(size) / 1000.0).round(2) * params.memory_scale_factor * 11.GB }
110 |     cpus 1
111 |     
112 |     input:
113 |     tuple val(sample_id), path(img), path(nuclei), path(nuc_seg_measures), val(size)
114 |     
115 |     output:
116 |     tuple val(sample_id), path("qp.csv")
117 |     
118 |     script:
119 |     """
120 |     #!/usr/bin/env python
121 |     import numpy as np
122 |     import tifffile
123 |     import pandas as pd
124 |     from tqdm import tqdm
125 |     
126 |     import sys
127 |     sys.path.append("${projectDir}/lib")
128 |     from hovernetConv import calculate_H_E_OD_quantities
129 |     
130 |     # Load nuclei mask
131 |     with open("${nuclei}", 'rb') as tempfile:
132 |         nuclei = np.load(tempfile)
133 |     print('Nuclei segmetation mask shape:', nuclei.shape, nuclei.dtype, flush=True)
134 |         
135 |     # Load nuc_seg_measures
136 |     df_nuc_seg_measures = pd.read_csv("${nuc_seg_measures}", index_col=0)
137 |     df_nuc_seg_measures.index = df_nuc_seg_measures.index.astype(str)
138 |     print(df_nuc_seg_measures, flush=True)
139 |     
140 |     # Load image
141 |     print('Loading full resolution image', flush=True)
142 |     img = np.array(tifffile.imread("${img}"))[:, :, :3]
143 |     dims = img.shape[0], img.shape[1]
144 |     print(dims, flush=True)
145 |     
146 |     # Prepare image patches coordinates
147 |     s = ${params.od_block_size}
148 |     r = [np.append(s*np.array(range(0, int(np.floor(dims[i]/s))+1)), [dims[i]]) for i in range(2)]
149 |     coords = [(i,j) for i in range(len(r[0])-1) for j in range(len(r[1])-1)]
150 |     print(coords, flush=True)
151 |     print()
152 |     
153 |     # Calculate HE OD quantities by patches
154 |     dfs = []
155 |     for ipatch, (i, j) in enumerate(tqdm(coords)):
156 |         if (r[0][i+1] - r[0][i] > 0) and (r[1][j+1] - r[1][j] > 0):
157 |             patch_nuclei = nuclei[r[0][i]:r[0][i+1], r[1][j]:r[1][j+1]]
158 |             if (patch_nuclei!=0).any():
159 |                 df_OD_temp = calculate_H_E_OD_quantities(img[r[0][i]:r[0][i+1], r[1][j]:r[1][j+1], :],
160 |                                                          patch_nuclei,
161 |                                                          (r[0][i], r[1][j]),
162 |                                                          df_nuc_seg_measures,
163 |                                                          expand_nuclei_distance=${params.expand_nuclei_distance})
164 |                 dfs.append(df_OD_temp)
165 |     
166 |     # Merge patches data, average cells fragments due to patching
167 |     df_OD = pd.concat(dfs)
168 |     print(df_OD.shape)
169 |     df_OD = df_OD.groupby(level=0).mean()
170 |     print(df_OD, flush=True)
171 |     
172 |     # Save data
173 |     df_OD.to_csv("qp.csv")
174 |     """
175 | }
176 | 
177 | 
178 | process ASSIGN_NUCLEI_TO_SUPERPIXELS {
179 | 
180 |     tag "$sample_id"
181 |     label 'process_inception'
182 |     maxRetries 1
183 |     errorStrategy  { task.attempt <= maxRetries  ? 'retry' : 'finish' }
184 |     publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: params.overwrite_files_on_publish
185 |     memory { 3.GB + (Float.valueOf(size) / 1000.0).round(2) * params.memory_scale_factor * 3.GB }
186 |     cpus 1
187 |     
188 |     input:
189 |     tuple val(sample_id), path(superpixelation), path(qp_od), val(size)
190 |     
191 |     output:
192 |     tuple val(sample_id), path("od_per_cell.csv.gz")
193 |     
194 |     script:
195 |     """
196 |     #!/usr/bin/env python
197 |     import numpy as np
198 |     import pandas as pd
199 |     
200 |     # Load pre-calculated nuclear and cytoplasmic quantities
201 |     df_qp_od = pd.read_csv("${qp_od}", index_col=0)
202 |     print(df_qp_od)
203 |     
204 |     # Load superpixelation mask
205 |     with open("${superpixelation}", 'rb') as tempfile:
206 |         superpixelation = np.load(tempfile)
207 |     print('Superpixelation mask shape:', superpixelation.shape)
208 |     
209 |     # Assign each cell to a superpixel; superpixelation is in downsampled coordinates
210 |     df_qp_od['xd'] = (df_qp_od['x'] / ${params.superpixel_downsampling_factor}).astype(int)
211 |     df_qp_od['yd'] = (df_qp_od['y'] / ${params.superpixel_downsampling_factor}).astype(int)
212 |     df_qp_od['uspx'] = df_qp_od.apply(lambda se: superpixelation[int(se['yd']), int(se['xd'])], axis=1)
213 |     df_qp_od['ipatch'] = df_qp_od['uspx'].apply(lambda v: int((v - v % 1000) / 1000))
214 |     
215 |     # Sort dataframe index
216 |     df_qp_od.index = df_qp_od.index.astype(int)
217 |     df_qp_od = df_qp_od.sort_index()
218 |     df_qp_od.index = df_qp_od.index.astype(str)
219 |     print(df_qp_od)
220 |     
221 |     # Normalize quantities within each ipatch identifier
222 |     for col in df_qp_od.columns[~df_qp_od.columns.isin(['x', 'y', 'xd', 'yd', 'uspx', 'ipatch'])]:
223 |         df_qp_od[col] = df_qp_od[col] * 0.18 / df_qp_od[col].quantile(0.5)
224 |         
225 |     se_sizes = pd.Series(superpixelation.ravel()).value_counts().sort_index()*(${params.superpixel_downsampling_factor}**2)
226 |     df_qp_od['spx_size'] = (se_sizes.loc[df_qp_od['uspx'].values]).values
227 |     
228 |     # Export data
229 |     df_qp_od.to_csv('od_per_cell.csv.gz')
230 |     """    
231 | }
232 | 


--------------------------------------------------------------------------------
/modules/local/velocyto.nf:
--------------------------------------------------------------------------------
 1 | 
 2 | process CELLSORT_BAM {
 3 | 
 4 |     tag "$sample_id"
 5 |     label "samtools"
 6 | 
 7 |     input:
 8 |     tuple val(sample_id), path("bam/*")
 9 |     
10 |     output:
11 |     tuple val(sample_id), file("cellsorted_possorted_genome_bam.bam")
12 |     
13 |     script:    
14 |     """   
15 |     samtools sort bam/possorted_genome_bam.bam -o cellsorted_possorted_genome_bam.bam -t CB -O BAM -@ ${task.cpus}
16 |     """
17 | }
18 | 
19 | 
20 | process SPLICING_QUANTIFICATION {
21 | 
22 |     tag "$sample_id"
23 |     label "splicing_quantification"
24 |     publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: params.overwrite_files_on_publish
25 | 
26 |     input:
27 |     tuple val(sample_id), path("sample/outs/*"), path("sample/outs/*"), path("sample/outs/filtered_feature_bc_matrix/*")
28 |     path(reference)
29 |     val(species)
30 |     
31 |     output:
32 |     tuple val(sample_id), file("${species}/velocyto.loom")
33 |     
34 |     script:    
35 |     """
36 |     velocyto run10x sample ${reference}/genes/genes.gtf
37 |     
38 |     mkdir ${species}
39 |     cp "sample/velocyto/sample.loom" "${species}/velocyto.loom"
40 |     """
41 | }
42 | 


--------------------------------------------------------------------------------
/nextflow.config:
--------------------------------------------------------------------------------
  1 | 
  2 | // Global default params, used in configs
  3 | params {
  4 | 
  5 |     // Default workflow
  6 |     workflow = "arbitrary_grid" // "two_references" "one_reference" "arbitrary_grid" "deconvolution_indices"
  7 | 
  8 |     input                      = "./assets/samplesheet.csv"
  9 | 
 10 |     outdir                     = "./results"
 11 |     tracedir                   = "${params.outdir}/pipeline_info"
 12 | 
 13 |     memory_scale_factor        = 1.0
 14 | 
 15 | 	bind                       = ""
 16 |     overwrite_files_on_publish = false
 17 | 
 18 | }
 19 | 
 20 | includeConfig 'conf/containers.config'
 21 | 
 22 | 
 23 | // Default parameters
 24 | if (params.workflow == 'one_reference') {
 25 |     includeConfig 'conf/analysis-one.config'
 26 | 
 27 |     if (params.do_img_subworkflow) {
 28 |         includeConfig 'conf/analysis-img.config'
 29 |     }
 30 | }
 31 | 
 32 | if (params.workflow == 'two_references' || params.workflow == 'deconvolution_indices') {
 33 |     includeConfig 'conf/analysis-two.config'
 34 | 
 35 |     if (params.do_img_subworkflow) {
 36 |         includeConfig 'conf/analysis-img.config'
 37 |     }
 38 | }
 39 | 
 40 | if (params.workflow == 'arbitrary_grid') {
 41 |     includeConfig 'conf/analysis-img.config'
 42 | }
 43 | 
 44 | process {
 45 |     resourceLimits = [ cpus: 70, memory: 768.GB, time: 72.h ]
 46 | 
 47 |     withName: GUNZIP {
 48 |         cpus = 1
 49 |         memory = 20.GB
 50 |         container = params.container_vips
 51 |         clusterOptions = '--time=01:00:00'
 52 |     }
 53 |     withName: GUNZIP_FASTA {
 54 |         cpus = 1
 55 |         memory = 20.GB
 56 |         container = params.container_vips
 57 |         clusterOptions = '--time=01:00:00'
 58 |     }
 59 |         withName: DECONVOLUTION_XENOME {
 60 |         cpus = 8
 61 |         memory = 50.GB
 62 |         container = params.container_xenome
 63 |         clusterOptions = '--time=06:00:00'
 64 |     }
 65 |     withName: DECONVOLUTION_XENGSORT {
 66 |         cpus = 8
 67 |         memory = 50.GB
 68 |         container = params.container_xengsort
 69 |         clusterOptions = '--time=06:00:00'
 70 |     }
 71 |     withName: XENOME_GENERATE_INDEX {
 72 |         cpus = 16
 73 |         memory = 100.GB
 74 |         container = params.container_xenome
 75 |         clusterOptions = '--time=06:00:00'
 76 |     }
 77 |     withName: XENGSORT_GENERATE_INDEX {
 78 |         cpus = 32
 79 |         memory = 50.GB
 80 |         container = params.container_xengsort
 81 |         clusterOptions = '--time=01:00:00'
 82 |     }	
 83 |     withLabel: low_process {
 84 |         cpus = 1
 85 |         memory = 8.GB
 86 |         container = params.container_fastqtools
 87 |     }
 88 |     withLabel: samtools {
 89 |         cpus = 4
 90 |         memory = 64.GB
 91 |         container = params.container_samtools
 92 |         clusterOptions = '--time=03:00:00'
 93 |     }
 94 |     withLabel: splicing_quantification {
 95 |         cpus = 1
 96 |         memory = 64.GB
 97 |         container = params.container_velocyto
 98 |         clusterOptions = '--time=09:00:00'
 99 |     }    
100 |     withLabel: python_low_process {
101 |         cpus = 1
102 |         memory = 12.GB
103 |         container = params.container_python
104 |         clusterOptions = '--time=01:00:00'
105 |     }
106 |     withName: SPACERANGER {
107 |         cpus = 8
108 |         memory = 90.GB
109 |         container = params.container_spaceranger
110 |         clusterOptions = '--time=24:00:00'
111 |     }
112 |     withLabel: bafextract {
113 |         cpus = 1
114 |         memory = 36.GB
115 |         container = params.container_bafextract
116 |         clusterOptions = '--time=03:00:00'
117 |     }
118 |     withLabel: vips_process {
119 |         cpus = 1
120 |         memory = 3.GB
121 |         container = params.container_vips
122 |     }
123 |     withLabel: process_ome {
124 |         cpus = 1
125 |         memory = 36.GB
126 |         container = params.container_ome
127 |     }
128 |     withLabel: process_estimate_size {
129 |         cpus = 1
130 |         memory = 2.GB
131 |         container = params.container_inception
132 |     }
133 |     withLabel: process_extract {
134 |         cpus = 1
135 |         container = params.container_inception
136 |         clusterOptions = '--time=03:00:00'
137 |     }
138 |     withLabel: color_normalization_process {
139 |         cpus = 1
140 |         container = params.container_stainnet
141 |         clusterOptions = '--time=16:00:00'
142 |     }
143 |     withLabel: stain_normalization_process {
144 |         cpus = 1
145 |         container = params.container_staintools
146 |         clusterOptions = '--time=16:00:00'
147 |     }
148 |     withLabel: python_process_low {
149 |         cpus = 1
150 |         container = params.container_inception
151 |         clusterOptions = '--time=02:00:00'
152 |     }
153 |     withLabel: process_focus {
154 |         cpus = 16
155 |         container = params.container_deepfocus
156 |         clusterOptions = '--time=16:00:00'
157 |     }    
158 |     withLabel: process_inception {
159 |         cpus = 10
160 |         container = params.container_inception
161 |         clusterOptions = '--time=16:00:00'
162 |     }
163 |     withLabel: process_uni {
164 |         cpus = 10
165 |         container = params.container_uni_conch
166 |         clusterOptions = '--time=16:00:00'
167 |     }
168 |     withLabel: process_conch {
169 |         cpus = 10
170 |         container = params.container_uni_conch
171 |         clusterOptions = '--time=16:00:00'
172 |     }
173 |     withLabel: process_ctranspath {
174 |         cpus = 10
175 |         container = params.container_ctranspath
176 |         clusterOptions = '--time=16:00:00'
177 |     }
178 |     withLabel: process_mocov3 {
179 |         cpus = 1
180 |         container = params.container_ctranspath
181 |         resourceLimits = [ cpus: 46, memory: 170.GB, time: 6.h ]
182 |         clusterOptions = '--time=03:00:00 -q gpu_inference --gres=gpu:1 --export=ALL'
183 |         containerOptions = '--nv'
184 |         queue = 'gpu_v100'
185 |     }
186 |     withLabel: process_stardist {
187 |         cpus = 4
188 |         container = params.container_inception
189 |         clusterOptions = '--time=16:00:00'
190 |     }
191 |     withLabel: process_hovernet_low {
192 |         cpus = 1
193 |         memory = 3.GB
194 |         container = params.container_hovernet
195 |         clusterOptions = '--time=02:00:00'
196 |     }
197 |     withLabel: process_hovernet {
198 |         container = params.container_hovernet
199 |         if (params.do_nuclear_segmentation || params.sample_tiles_subworkflow) {
200 |             if (params.hovernet_device_mode == 'gpu') {
201 |                 resourceLimits = [ cpus: 46, memory: 170.GB, time: 6.h ]
202 |                 cpus = 8
203 |                 clusterOptions = '--time=06:00:00 -q gpu_inference --gres=gpu:1 --export=ALL'
204 |                 containerOptions = '--nv'
205 |                 queue = 'gpu_v100'
206 |             }
207 |             else if (params.hovernet_device_mode == 'cpu') {
208 |                 cpus = 10
209 |                 clusterOptions = '--time=48:00:00'
210 |             }
211 |         }
212 |     }
213 |     withLabel: process_post_hovernet {
214 |         container = params.container_hovernet
215 |         cpus = 16
216 |     }
217 | }
218 | 
219 | 
220 | profiles {
221 |     slurm {
222 |         executor {
223 |             name = "slurm"
224 |             submitRateLimit = '100/1s'
225 |             queueSize = 100
226 |         }
227 |         process.queue = "compute"
228 |         process.clusterOptions = '-q batch'
229 |         process.module = "slurm"
230 |     }
231 |     singularity {
232 |         process.module = 'singularity'
233 |         singularity.enabled    = true
234 |         singularity.envWhitelist = 'CUDA_VISIBLE_DEVICES'
235 |         singularity.autoMounts = true
236 |         docker.enabled         = false
237 |         podman.enabled         = false
238 |         shifter.enabled        = false
239 |         charliecloud.enabled   = false
240 |         singularity.runOptions = "-B $projectDir " + params.bind
241 |     }
242 |     test {
243 |         includeConfig 'conf/test.config'
244 |     }
245 | }
246 | 
247 | 
248 | // Export these variables to prevent local Python/R libraries from conflicting with those in the container
249 | // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. 
250 | // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable.
251 | env {
252 |     PYTHONNOUSERSITE = 1
253 |     R_PROFILE_USER   = "/.Rprofile"
254 |     R_ENVIRON_USER   = "/.Renviron"
255 |     JULIA_DEPOT_PATH = "/usr/local/share/julia"
256 | }
257 | 
258 | // Capture exit codes from upstream processes when piping
259 | process.shell = ['/bin/bash', '-euo', 'pipefail']
260 | 
261 | def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
262 | timeline {
263 |     enabled = true
264 |     file    = "${params.tracedir}/execution_timeline_${trace_timestamp}.html"
265 | }
266 | report {
267 |     enabled = true
268 |     file    = "${params.tracedir}/execution_report_${trace_timestamp}.html"
269 | }
270 | trace {
271 |     enabled = true
272 |     file    = "${params.tracedir}/execution_trace_${trace_timestamp}.txt"
273 | }
274 | dag {
275 |     enabled = true
276 |     file    = "${params.tracedir}/pipeline_dag_${trace_timestamp}.svg"
277 | }
278 | 
279 | manifest {
280 |     author          = 'Sergii Domanskyi'
281 |     homePage        = 'https://github.com/TheJacksonLaboratory/STQ'
282 |     description     = 'Spatial Transcriptomics Quantification'
283 |     mainScript      = 'main.nf'
284 | }
285 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | workflow="arbitrary_grid" ### "two_references" "one_reference" "arbitrary_grid" "deconvolution_indices"
 3 | samplesheet="./assets/samplesheet_test.csv"
 4 | workdir="./work"
 5 | outdir="./results"
 6 | binddir="/projects/"
 7 | 
 8 | #----------------------------------------------------------------------------------------------------
 9 | 
10 | ./check.sh
11 | 
12 | SLURM_SUBMIT_DIR=`pwd`
13 | 
14 | sbatch \
15 | submit.sb $workflow "$samplesheet" "$workdir" "$outdir" "$binddir"
16 | 


--------------------------------------------------------------------------------
/submit.sb:
--------------------------------------------------------------------------------
 1 | #!/bin/bash --login
 2 | #SBATCH -p compute
 3 | #SBATCH -q batch
 4 | #SBATCH -t 2-00:00:00
 5 | #SBATCH --mem=56G
 6 | #SBATCH --ntasks=1
 7 | 
 8 | cd $SLURM_SUBMIT_DIR
 9 | 
10 | nextflow run main.nf \
11 | -w $3 \
12 | -profile slurm,singularity \
13 | -resume \
14 | --input=$2 \
15 | --outdir=$4 \
16 | --workflow=$1 \
17 | --bind="-B $5"


--------------------------------------------------------------------------------
/subworkflows/imaging.nf:
--------------------------------------------------------------------------------
  1 | 
  2 | include { LOAD_SAMPLE_INFO;
  3 |           GET_IMAGE_SIZE;
  4 |           EXTRACT_ROI;
  5 |           COLOR_NORMALIZATION;
  6 |           STAIN_NORMALIZATION;
  7 |           CONVERT_TO_TILED_TIFF;
  8 |           RESIZE_IMAGE;
  9 |           GET_THUMB;
 10 |           MAKE_TINY_THUMB;
 11 |           GET_PIXEL_MASK;
 12 |           TILE_WSI;
 13 |           GET_TILE_MASK;
 14 |           GET_TISSUE_MASK;
 15 |           SELECT_SAVE_TILES;
 16 |           GET_INCEPTION_FEATURES_TILES;
 17 |           GET_INCEPTION_FEATURES;
 18 |           GET_CTRANSPATH_FEATURES;
 19 |           GET_MOCOV3_FEATURES;
 20 |           GET_UNI_FEATURES;
 21 |           GET_CONCH_FEATURES;
 22 |         } from '../modules/local/tasks'
 23 | 
 24 | include { CHECK_FOCUS;
 25 |         } from '../modules/local/focus'
 26 |                 
 27 | include { SUPERPIXELATION;
 28 |           EXPORT_DOWN_IMAGE_FOR_CONTOURS;
 29 |           CALCULATE_CELLS_OD;
 30 |           ASSIGN_NUCLEI_TO_SUPERPIXELS;
 31 |           EXPORT_SUPERPIXELATION_CONTOURS;
 32 |         } from '../modules/local/superpixel'
 33 | 
 34 | include { GET_NUCLEI_MASK_FROM_HOVERNET_JSON;  
 35 |           INFER_HOVERNET_TILES;
 36 |           GET_NUCLEI_TYPE_COUNTS;
 37 |           INFER_HOVERNET;
 38 |           INFER_PREP_HOVERNET;
 39 |           INFER_STARDIST;
 40 |           COMPRESS_JSON_FILE;
 41 |           COMPUTE_SEGMENTATION_DATA;
 42 |           GENERATE_PERSPOT_SEGMENTATION_DATA;
 43 |         } from '../modules/local/hovernet'
 44 | 
 45 | include { CONVERT_TO_PYRAMIDAL_OME;
 46 |           EXTRACT_IMAGE_METADATA;
 47 |         } from '../modules/local/ome'
 48 |         
 49 | include { CONVERT_SEGMENTATION_DATA;
 50 |           CONVERT_CSV_TO_ANNDATA;
 51 |         } from '../modules/local/merge'
 52 | 
 53 | include { DIMRED_CLUSTER;
 54 |           DIMRED_CLUSTER_MORPH;
 55 |         } from '../modules/local/postprocessing'
 56 |         
 57 | workflow IMG {
 58 | 
 59 |     take:
 60 |         samples
 61 | 
 62 |     main:    
 63 |         images = samples.map{[it[0], (it[1].image)]}
 64 |         
 65 |         LOAD_SAMPLE_INFO ( samples
 66 |                            .join(images) )
 67 |          
 68 |         GET_IMAGE_SIZE ( LOAD_SAMPLE_INFO.out.main )
 69 |         
 70 |         if ( params.short_workflow ) {
 71 |             GET_THUMB ( LOAD_SAMPLE_INFO.out.image )
 72 | 
 73 |             convertedimage = LOAD_SAMPLE_INFO.out.image
 74 |             thumbimage = GET_THUMB.out
 75 |             imagesize = GET_IMAGE_SIZE.out
 76 |         }
 77 |         else {
 78 |             if ( params.export_image_metadata ) {
 79 |                 EXTRACT_IMAGE_METADATA ( LOAD_SAMPLE_INFO.out.main
 80 |                                          .join(GET_IMAGE_SIZE.out) )
 81 |             }
 82 |             
 83 |             EXTRACT_ROI ( LOAD_SAMPLE_INFO.out.main
 84 |                           .join(GET_IMAGE_SIZE.out) )
 85 |                           
 86 |             RESIZE_IMAGE ( EXTRACT_ROI.out.image )
 87 |             
 88 |             imageroi = RESIZE_IMAGE.out.full
 89 |             imagesize = RESIZE_IMAGE.out.size
 90 |     
 91 |             if ( params.stain_normalization ) {
 92 |                 if ( params.macenko_normalization ) {
 93 |                     STAIN_NORMALIZATION ( imageroi
 94 |                                           .join(imagesize) )
 95 |                     
 96 |                     normimage = STAIN_NORMALIZATION.out
 97 |                     }
 98 |                 else {
 99 |                     COLOR_NORMALIZATION ( imageroi
100 |                                           .join(imagesize) )
101 |                     
102 |                     normimage = COLOR_NORMALIZATION.out
103 |                     }
104 |                 
105 |                 CONVERT_TO_TILED_TIFF ( normimage )
106 |                 }
107 |             else
108 |                 CONVERT_TO_TILED_TIFF ( imageroi )
109 |             
110 |             convertedimage = CONVERT_TO_TILED_TIFF.out.full
111 |             thumbimage = CONVERT_TO_TILED_TIFF.out.thumb
112 |             
113 |             if ( params.export_image ) {
114 |                 CONVERT_TO_PYRAMIDAL_OME ( convertedimage )
115 |             }
116 |         }
117 |         
118 |         MAKE_TINY_THUMB ( thumbimage )
119 | 
120 |         if ( params.check_focus ) {
121 |             CHECK_FOCUS ( convertedimage
122 |                           .join(imagesize) )
123 |         }
124 |         
125 |         if ( params.do_superpixels ) {
126 |             SUPERPIXELATION ( convertedimage
127 |                               .join(imagesize) )
128 |             
129 |             if ( params.export_superpixels_contours ) {
130 |                 EXPORT_DOWN_IMAGE_FOR_CONTOURS ( convertedimage
131 |                                   .join(imagesize) )
132 |             
133 |                 EXPORT_SUPERPIXELATION_CONTOURS ( SUPERPIXELATION.out.main
134 |                                                   .join(imagesize) )
135 |                 }
136 |             }
137 | 
138 | 
139 |         GET_PIXEL_MASK ( thumbimage
140 |                          .join(imagesize) )
141 |         
142 |         TILE_WSI ( convertedimage
143 |                   .join(LOAD_SAMPLE_INFO.out.grid)
144 |                   .join(imagesize)
145 |                   .join(LOAD_SAMPLE_INFO.out.mpp) )
146 |         
147 |         GET_TILE_MASK ( thumbimage
148 |                         .join(GET_PIXEL_MASK.out)
149 |                         .join(TILE_WSI.out.grid) 
150 |                         .join(imagesize))    
151 |                         
152 |                         
153 |         // Tilitng sub-workflow for a small number of tiles
154 |         if ( params.sample_tiles_subworkflow ) {
155 |             SELECT_SAVE_TILES ( convertedimage
156 |                                 .join(TILE_WSI.out.grid)
157 |                                 .join(GET_TILE_MASK.out.mask) )
158 |                                              
159 |             INFER_HOVERNET_TILES ( SELECT_SAVE_TILES.out.tiles )
160 |             
161 |             GET_NUCLEI_TYPE_COUNTS ( INFER_HOVERNET_TILES.out.json )
162 |         }
163 |       
164 |         
165 |         if ( params.extract_tile_features ) {
166 | 
167 |             if (params.extract_transpath_features) {                   
168 |                 GET_CTRANSPATH_FEATURES ( convertedimage
169 |                                          .join(GET_TILE_MASK.out.mask)
170 |                                          .join(TILE_WSI.out.grid)
171 |                                          .join(LOAD_SAMPLE_INFO.out.grid)
172 |                                          .join(imagesize)
173 |                                          .combine(Channel.fromList(params.expansion_factor)) )
174 |                 
175 |                 ctranspath_features_out = GET_CTRANSPATH_FEATURES.out
176 |             }
177 | 
178 |             if (params.extract_mocov3_features) {                   
179 |                 GET_MOCOV3_FEATURES ( convertedimage
180 |                                          .join(GET_TILE_MASK.out.mask)
181 |                                          .join(TILE_WSI.out.grid)
182 |                                          .join(LOAD_SAMPLE_INFO.out.grid)
183 |                                          .join(imagesize)
184 |                                          .combine(Channel.fromList(params.expansion_factor)) )
185 |                 
186 |                 mocov3_features_out = GET_MOCOV3_FEATURES.out
187 |             }
188 | 
189 |             if (params.extract_inception_features) {
190 |                 GET_INCEPTION_FEATURES ( convertedimage
191 |                                          .join(GET_TILE_MASK.out.mask)
192 |                                          .join(TILE_WSI.out.grid)
193 |                                          .join(LOAD_SAMPLE_INFO.out.grid)
194 |                                          .join(imagesize)
195 |                                          .combine(Channel.fromList(params.expansion_factor)) )
196 |                 
197 |                 inception_features_out = GET_INCEPTION_FEATURES.out
198 |             }
199 | 
200 |             if (params.extract_uni_features) {                   
201 |                 GET_UNI_FEATURES ( convertedimage
202 |                                          .join(GET_TILE_MASK.out.mask)
203 |                                          .join(TILE_WSI.out.grid)
204 |                                          .join(LOAD_SAMPLE_INFO.out.grid)
205 |                                          .join(imagesize)
206 |                                          .combine(Channel.fromList(params.expansion_factor)) )
207 |                 
208 |                 uni_features_out = GET_UNI_FEATURES.out
209 |             }
210 | 
211 |             if (params.extract_conch_features) {                   
212 |                 GET_CONCH_FEATURES ( convertedimage
213 |                                          .join(GET_TILE_MASK.out.mask)
214 |                                          .join(TILE_WSI.out.grid)
215 |                                          .join(LOAD_SAMPLE_INFO.out.grid)
216 |                                          .join(imagesize)
217 |                                          .combine(Channel.fromList(params.expansion_factor)) )
218 |                 
219 |                 conch_features_out = GET_CONCH_FEATURES.out
220 |             }           
221 | 
222 |             // Default features
223 |             features_out = channel.empty()
224 | 
225 |             if (params.extract_transpath_features) {
226 |                 features_out = features_out.concat( ctranspath_features_out )
227 |             }
228 |             if (params.extract_mocov3_features) {
229 |                 features_out = features_out.concat( mocov3_features_out )
230 |             }
231 |             if (params.extract_inception_features) {
232 |                 features_out = features_out.concat( inception_features_out )
233 |             }
234 |             if (params.extract_uni_features) {
235 |                 features_out = features_out.concat( uni_features_out )
236 |             }
237 |             if (params.extract_conch_features) {
238 |                 features_out = features_out.concat( conch_features_out )
239 |             }
240 |             
241 |             if ( params.do_clustering ) {
242 |                 if ( params.do_imaging_anndata ) {
243 |                     CONVERT_CSV_TO_ANNDATA ( features_out
244 |                     .filter{ it[2]== params.expansion_factor_for_clustering }
245 |                     .filter{ it[3] == params.suffix_for_clustering } )
246 |                 }
247 |             }
248 |         }
249 | 
250 |         if ( params.do_nuclear_segmentation ) {
251 |         
252 |             GET_TISSUE_MASK ( TILE_WSI.out.grid
253 |                       .join(GET_TILE_MASK.out.mask)
254 |                       .join(imagesize) )
255 |                       
256 |             if ( params.hovernet_segmentation ) {
257 |                 INFER_PREP_HOVERNET ( convertedimage
258 |                                    .join(GET_TISSUE_MASK.out)
259 |                                    .join(imagesize) )
260 | 
261 |                 INFER_HOVERNET ( convertedimage
262 |                                  .join(GET_TISSUE_MASK.out)
263 |                                  .join(imagesize)
264 |                                  .join(INFER_PREP_HOVERNET.out) )
265 |                 
266 |                 jsonout = INFER_HOVERNET.out.json
267 |                                  
268 |                 GET_NUCLEI_MASK_FROM_HOVERNET_JSON ( convertedimage
269 |                                                      .join(jsonout)
270 |                                                      .join(imagesize) )
271 |                 
272 |                 segmaskout = GET_NUCLEI_MASK_FROM_HOVERNET_JSON.out
273 |             }
274 |             else {
275 |                 INFER_STARDIST ( convertedimage
276 |                                  .join(GET_TISSUE_MASK.out)
277 |                                  .join(imagesize) )
278 |             
279 |                 jsonout = INFER_STARDIST.out.json
280 |                 segmaskout = INFER_STARDIST.out.mask
281 |             }
282 | 
283 |             COMPRESS_JSON_FILE ( jsonout )
284 |         
285 |             COMPUTE_SEGMENTATION_DATA ( jsonout
286 |                                         .join(imagesize) )
287 |             
288 |             if ( params.do_superpixels ) {
289 |                 CALCULATE_CELLS_OD ( convertedimage
290 |                                      .join(segmaskout)
291 |                                      .join(COMPUTE_SEGMENTATION_DATA.out)
292 |                                      .join(imagesize) )
293 |                                      
294 |                 ASSIGN_NUCLEI_TO_SUPERPIXELS ( SUPERPIXELATION.out.main
295 |                                                .join(CALCULATE_CELLS_OD.out)
296 |                                                .join(imagesize) )
297 |                 }
298 | 
299 |             GENERATE_PERSPOT_SEGMENTATION_DATA ( TILE_WSI.out.grid
300 |                                              .join(COMPUTE_SEGMENTATION_DATA.out)
301 |                                              .join(imagesize) )
302 | 
303 |             if ( params.do_clustering ) {
304 |                 if ( params.do_imaging_anndata ) {
305 |                     features_selected_out = CONVERT_CSV_TO_ANNDATA.out
306 |                     .filter{ it[2]== params.expansion_factor_for_clustering }
307 |                     .filter{ it[3] == params.suffix_for_clustering }
308 | 
309 |                     DIMRED_CLUSTER_MORPH ( TILE_WSI.out.grid
310 |                                          .join(thumbimage)
311 |                                          .join(GENERATE_PERSPOT_SEGMENTATION_DATA.out.data)
312 |                                          .join(features_selected_out) )
313 |                 }
314 |             }
315 |         }
316 |         else {
317 |             if ( params.do_clustering ) {
318 |                 if ( params.do_imaging_anndata ) {
319 |                     features_selected_out = CONVERT_CSV_TO_ANNDATA.out
320 |                     .filter{ it[2]== params.expansion_factor_for_clustering }
321 |                     .filter{ it[3] == params.suffix_for_clustering }
322 | 
323 |                     DIMRED_CLUSTER ( TILE_WSI.out.grid
324 |                                      .join(thumbimage)
325 |                                      .join(features_selected_out) )
326 |                 }
327 |             }
328 |         }
329 | }
330 | 


--------------------------------------------------------------------------------
/subworkflows/sequencing.nf:
--------------------------------------------------------------------------------
  1 | 
  2 | include { LOAD_SAMPLE_INFO } from '../modules/local/load'
  3 | 
  4 | include { GUNZIP as UNPACK_FASTQ;
  5 |         } from '../modules/local/gunzip'
  6 |         
  7 | include { DECONVOLUTION_XENOME;
  8 |           DECONVOLUTION_XENGSORT;
  9 |           SORT_FASTQ as SORT_FASTQ_MOUSE;
 10 |           SORT_FASTQ as SORT_FASTQ_HUMAN;
 11 |         } from '../modules/local/deconvolution'
 12 |         
 13 | include { SPACERANGER as SPACERANGER_MOUSE;
 14 |           SPACERANGER as SPACERANGER_HUMAN;
 15 |           RETURN_SPACERANGER_ALIGNMENT;
 16 |         } from '../modules/local/spaceranger'
 17 |      
 18 | include { GET_REFERENCE_PILEUP as GET_REFERENCE_PILEUP_MOUSE;
 19 |           GET_REFERENCE_PILEUP as GET_REFERENCE_PILEUP_HUMAN;
 20 |           GET_PILEUP_OF_BAM as GET_PILEUP_OF_BAM_MOUSE;
 21 |           GET_PILEUP_OF_BAM as GET_PILEUP_OF_BAM_HUMAN;
 22 |           GET_SNV_FROM_PILEUP as GET_SNV_FROM_PILEUP_MOUSE;
 23 |           GET_SNV_FROM_PILEUP as GET_SNV_FROM_PILEUP_HUMAN;
 24 |         } from '../modules/local/bafextract'
 25 |         
 26 | include { CELLSORT_BAM as CELLSORT_BAM_MOUSE;
 27 |           CELLSORT_BAM as CELLSORT_BAM_HUMAN;
 28 |           SPLICING_QUANTIFICATION as SPLICING_QUANTIFICATION_MOUSE;
 29 |           SPLICING_QUANTIFICATION as SPLICING_QUANTIFICATION_HUMAN;
 30 |         } from '../modules/local/velocyto'
 31 |         
 32 | include { MERGE_MTX;
 33 |           RETURN_SEPARATE_MTX;
 34 |         } from '../modules/local/merge'        
 35 | 
 36 | 
 37 | workflow SEQ {
 38 | 
 39 |     take:
 40 |         samples
 41 | 
 42 |     main:   
 43 |         fastqs = samples.map{[it[0], (it[1].fastq)]}
 44 |         images = samples.map{[it[0], (it[1].image)]}
 45 |     
 46 |         LOAD_SAMPLE_INFO ( samples
 47 |                            .join(fastqs)
 48 |                            .join(images) )
 49 |                            
 50 |         UNPACK_FASTQ ( LOAD_SAMPLE_INFO.out.fastq )
 51 | 
 52 |         if ( params.deconvolution_tool == "xenome" ) {
 53 |             DECONVOLUTION_XENOME ( UNPACK_FASTQ.out,
 54 |                                   file("${params.deconvolution_indices_path}"),
 55 |                                   params.deconvolution_indices_name )
 56 | 
 57 |             deconvolution_human = DECONVOLUTION_XENOME.out.human
 58 |             deconvolution_mouse = DECONVOLUTION_XENOME.out.mouse
 59 |         }
 60 |         else if (params.deconvolution_tool == "xengsort") {
 61 |             DECONVOLUTION_XENGSORT ( UNPACK_FASTQ.out,
 62 |                                     file("${params.deconvolution_indices_path}"),
 63 |                                     params.deconvolution_indices_name )
 64 | 
 65 |             deconvolution_human = DECONVOLUTION_XENGSORT.out.human
 66 |             deconvolution_mouse = DECONVOLUTION_XENGSORT.out.mouse            
 67 |             
 68 |         }
 69 | 
 70 |         
 71 |         SORT_FASTQ_MOUSE ( deconvolution_mouse )     
 72 |          
 73 |         SORT_FASTQ_HUMAN ( deconvolution_human )
 74 | 
 75 | 
 76 |         SPACERANGER_MOUSE ( SORT_FASTQ_MOUSE.out
 77 |                             .join(LOAD_SAMPLE_INFO.out.image),
 78 |                             file("${params.mouse_reference_genome}") )
 79 |         
 80 |         SPACERANGER_HUMAN ( SORT_FASTQ_HUMAN.out
 81 |                             .join(LOAD_SAMPLE_INFO.out.image),
 82 |                             file("${params.human_reference_genome}") ) 
 83 |         
 84 |         RETURN_SPACERANGER_ALIGNMENT ( SPACERANGER_MOUSE.out.metrics
 85 |                                        .join(SPACERANGER_HUMAN.out.metrics)
 86 |                                        .join(SPACERANGER_HUMAN.out.spatial) )
 87 |         
 88 |         if ( params.do_snv_extract ) {
 89 |             GET_REFERENCE_PILEUP_MOUSE ( file("${params.mouse_reference_genome}") )
 90 |         
 91 |             GET_REFERENCE_PILEUP_HUMAN ( file("${params.human_reference_genome}") )
 92 |         
 93 |         
 94 |             GET_PILEUP_OF_BAM_MOUSE ( SPACERANGER_MOUSE.out.bam,
 95 |                                       GET_REFERENCE_PILEUP_MOUSE.out )
 96 |         
 97 |             GET_PILEUP_OF_BAM_HUMAN ( SPACERANGER_HUMAN.out.bam,
 98 |                                       GET_REFERENCE_PILEUP_HUMAN.out )
 99 |         
100 |         
101 |             GET_SNV_FROM_PILEUP_MOUSE ( GET_PILEUP_OF_BAM_MOUSE.out,
102 |                                         GET_REFERENCE_PILEUP_MOUSE.out,
103 |                                         "mouse" )
104 |         
105 |             GET_SNV_FROM_PILEUP_HUMAN ( GET_PILEUP_OF_BAM_HUMAN.out,
106 |                                         GET_REFERENCE_PILEUP_HUMAN.out,
107 |                                         "human" )
108 |         }
109 |         
110 |         if ( params.do_splicing_quantification ) {
111 |             CELLSORT_BAM_MOUSE ( SPACERANGER_MOUSE.out.bam )
112 |         
113 |             CELLSORT_BAM_HUMAN ( SPACERANGER_HUMAN.out.bam )
114 |         
115 |         
116 |             SPLICING_QUANTIFICATION_MOUSE ( CELLSORT_BAM_MOUSE.out
117 |                                             .join(SPACERANGER_MOUSE.out.bam)
118 |                                             .join(SPACERANGER_MOUSE.out.mtx),
119 |                                             file("${params.mouse_reference_genome}"),
120 |                                             "mouse" )
121 |        
122 |             SPLICING_QUANTIFICATION_HUMAN ( CELLSORT_BAM_HUMAN.out
123 |                                             .join(SPACERANGER_HUMAN.out.bam)
124 |                                             .join(SPACERANGER_HUMAN.out.mtx),
125 |                                             file("${params.human_reference_genome}"),
126 |                                             "human" )
127 |         }
128 | 
129 |         if ( params.do_merge_mtx ) {
130 |             MERGE_MTX ( SPACERANGER_MOUSE.out.mtx
131 |                         .join(SPACERANGER_HUMAN.out.mtx) )
132 |         }
133 |         else {
134 |             RETURN_SEPARATE_MTX ( SPACERANGER_MOUSE.out.mtx
135 |                                   .join(SPACERANGER_HUMAN.out.mtx) )
136 |         }
137 | 
138 |     emit:
139 |         SPACERANGER_HUMAN.out.spatial
140 | }
141 | 


--------------------------------------------------------------------------------
/subworkflows/sequencing_single.nf:
--------------------------------------------------------------------------------
 1 | 
 2 | include { LOAD_SAMPLE_INFO;
 3 |         } from '../modules/local/load'
 4 |         
 5 | include { GUNZIP as UNPACK_FASTQ;
 6 |         } from '../modules/local/gunzip'
 7 |         
 8 | include { SPACERANGER;
 9 |           RETURN_SPACERANGER_ALIGNMENT_SINGLE;
10 |         } from '../modules/local/spaceranger'
11 |      
12 | include { GET_REFERENCE_PILEUP;
13 |           GET_PILEUP_OF_BAM;
14 |           GET_SNV_FROM_PILEUP;
15 |         } from '../modules/local/bafextract'
16 |         
17 | include { CELLSORT_BAM;
18 |           SPLICING_QUANTIFICATION;
19 |         } from '../modules/local/velocyto'
20 |         
21 | include { RETURN_MTX;
22 |         } from '../modules/local/merge'        
23 | 
24 | 
25 | workflow SEQ {
26 | 
27 |     take:
28 |         samples
29 | 
30 |     main:   
31 |         fastqs = samples.map{[it[0], (it[1].fastq)]}
32 |         images = samples.map{[it[0], (it[1].image)]}
33 |     
34 |         LOAD_SAMPLE_INFO ( samples
35 |                            .join(fastqs)
36 |                            .join(images) )
37 | 
38 |         UNPACK_FASTQ ( LOAD_SAMPLE_INFO.out.fastq )
39 |         
40 |         SPACERANGER ( UNPACK_FASTQ.out
41 |                       .join(LOAD_SAMPLE_INFO.out.image),
42 |                       file("${params.reference_genome}") ) 
43 |         
44 |         RETURN_SPACERANGER_ALIGNMENT_SINGLE ( SPACERANGER.out.metrics
45 |                                               .join(SPACERANGER.out.spatial) )
46 |                                               
47 |         RETURN_MTX ( SPACERANGER.out.mtx )
48 |         
49 |         
50 |         if ( params.do_snv_extract ) {
51 |         
52 |             GET_REFERENCE_PILEUP ( file("${params.reference_genome}") )
53 |         
54 |         
55 |             GET_PILEUP_OF_BAM ( SPACERANGER.out.bam,
56 |                                 GET_REFERENCE_PILEUP.out )
57 |         
58 |         
59 |             GET_SNV_FROM_PILEUP ( GET_PILEUP_OF_BAM.out,
60 |                                   GET_REFERENCE_PILEUP.out,
61 |                                   "baf" )
62 | 
63 |         }
64 |         
65 |         if ( params.do_splicing_quantification ) {
66 |             CELLSORT_BAM ( SPACERANGER.out.bam )
67 |         
68 |             SPLICING_QUANTIFICATION ( CELLSORT_BAM.out
69 |                                       .join(SPACERANGER.out.bam)
70 |                                       .join(SPACERANGER.out.mtx),
71 |                                       file("${params.reference_genome}"),
72 |                                       "baf" )
73 | 
74 |         }
75 | 
76 |     emit:
77 |         SPACERANGER.out.spatial
78 | }
79 | 


--------------------------------------------------------------------------------
/subworkflows/xenome_index.nf:
--------------------------------------------------------------------------------
 1 | 
 2 | include { GUNZIP_FASTA as UNPACK_HOST;
 3 |           GUNZIP_FASTA as UNPACK_GRAFT;
 4 |         } from '../modules/local/gunzip'
 5 | 
 6 | include { XENOME_GENERATE_INDEX;
 7 |           XENGSORT_GENERATE_INDEX;
 8 |         } from '../modules/local/deconvolution'
 9 |         
10 | workflow XINDEX {
11 | 
12 |     main:
13 |         if ( file(params.deconvolution_reference_host).getExtension() == "gz" ) {
14 |             UNPACK_HOST ( params.deconvolution_reference_host )
15 |             reference_host = UNPACK_HOST.out
16 |         }
17 |         else {
18 |             reference_host = params.deconvolution_reference_host
19 |         }
20 | 
21 |         if ( file(params.deconvolution_reference_graft).getExtension() == "gz" ) {
22 |             UNPACK_GRAFT ( params.deconvolution_reference_graft )
23 |             reference_graft = UNPACK_GRAFT.out
24 |         }
25 |         else {
26 |             reference_graft = params.deconvolution_reference_graft
27 |         }
28 | 
29 |         if ( params.deconvolution_tool == "xenome" ) {
30 |             XENOME_GENERATE_INDEX ( reference_host, reference_graft, params.deconvolution_kmer_size )
31 |             
32 |             output = XENOME_GENERATE_INDEX.out.indices_path
33 |         }
34 |         else if ( params.deconvolution_tool == "xengsort" ) {
35 |             XENGSORT_GENERATE_INDEX ( reference_host, reference_graft, params.deconvolution_kmer_size )
36 |             
37 |             output = XENGSORT_GENERATE_INDEX.out.indices_path
38 |         }
39 |     
40 |     emit:
41 |        output
42 | 
43 | }
44 | 


--------------------------------------------------------------------------------
/workflows/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Routes of analysis
 3 | 
 4 | <p>
 5 |     <img src="../docs/route-map.png" width="800"/>
 6 | </p>
 7 | 
 8 | All three routes of analysis are implemented as Nextflow DSL2 workflows and use the same style [samplesheet](../README.md#samplesheet). The name of the workflow (one of "two_references", "one_reference", and "arbitrary_grid") is specified when the pipeline is invoked:
 9 | 
10 |       nextflow run main.nf [...] --workflow="two_references"
11 | 
12 | JAX users can edit the file [submit.sb](../submit.sb) to specify the workflow.
13 | 


--------------------------------------------------------------------------------
/workflows/arbitrary_grid.nf:
--------------------------------------------------------------------------------
 1 | 
 2 | include { IMG } from '../subworkflows/imaging'
 3 | 
 4 | workflow ARB {
 5 | 
 6 |     take:
 7 |         samples
 8 | 
 9 |     main:
10 |         IMG ( samples
11 |               .join(samples.map{[it[0], []]}))
12 | 
13 | }
14 | 


--------------------------------------------------------------------------------
/workflows/one_reference.nf:
--------------------------------------------------------------------------------
 1 | 
 2 | include { SEQ } from '../subworkflows/sequencing_single'
 3 | include { IMG } from '../subworkflows/imaging'
 4 | 
 5 | workflow ONE {
 6 | 
 7 |     take:
 8 |         samples
 9 | 
10 |     main:
11 |         SEQ ( samples )
12 | 
13 |         if ( params.do_img_subworkflow ) {
14 |             IMG ( samples
15 |                   .join(SEQ.out) )
16 |         }
17 | 
18 | }
19 | 


--------------------------------------------------------------------------------
/workflows/two_references.nf:
--------------------------------------------------------------------------------
 1 | 
 2 | include { XINDEX } from '../subworkflows/xenome_index'
 3 | include { SEQ } from '../subworkflows/sequencing'
 4 | include { IMG } from '../subworkflows/imaging'
 5 | 
 6 | workflow TWO {
 7 | 
 8 |     take:
 9 |         samples
10 | 
11 |     main:
12 |         if ( params.workflow == "deconvolution_indices" ) {
13 |             if ( params.deconvolution_tool == "xenome" ) {
14 |                 if ( !file("${params.deconvolution_indices_path}/${params.deconvolution_indices_name}-both.kmers.low-bits.lwr").exists() ) {
15 |                     XINDEX ( )
16 |                 }
17 |             }
18 |             else if ( params.deconvolution_tool == "xengsort" ) {
19 |                 if ( !file("${params.deconvolution_indices_path}/${params.deconvolution_indices_name}-xind.hash").exists() ) {
20 |                     XINDEX ( )
21 |                 }        
22 |             }
23 |         }
24 | 
25 |         SEQ ( samples )
26 | 
27 |         if ( params.do_img_subworkflow ) {
28 |             IMG ( samples
29 |                   .join(SEQ.out) )
30 |         }
31 | 
32 | }
33 | 


--------------------------------------------------------------------------------