├── .Rbuildignore ├── .github └── workflows │ └── build.yml ├── .gitignore ├── 01-intro.Rmd ├── 02-processing.Rmd ├── 03-prerequisites.Rmd ├── 04-read_data.Rmd ├── 05-spillover_matrix.Rmd ├── 06-quality_control.Rmd ├── 07-batch_correction.Rmd ├── 08-phenotyping.Rmd ├── 09-singlecell_visualization.Rmd ├── 10-image_visualization.Rmd ├── 11-spatial_analysis.Rmd ├── 12-references.Rmd ├── CHANGELOG.md ├── DEVELOPMENT.md ├── Dockerfile ├── IMCDataAnalysis.Rproj ├── LICENSE ├── README.md ├── _bookdown.yml ├── _output.yml ├── book.bib ├── data └── .gitkeep ├── img ├── Gating_scheme.pdf ├── Gating_scheme.pptx └── IMC_workflow.png ├── index.Rmd ├── packages.bib ├── preamble.tex ├── publication ├── .gitignore ├── README.md ├── protocol.Rmd ├── protocol.html ├── protocol.md ├── protocol_files │ ├── figure-markdown_github │ │ ├── batch-correction-1.png │ │ ├── cell-density-1.png │ │ ├── cell-size-1.png │ │ ├── celltype-cluster-UMAP-1.png │ │ ├── celltype-cluster-heatmap-1.png │ │ ├── celltype-cluster-heatmap-2.png │ │ ├── cellular-neighbourhood-1.png │ │ ├── cluster-sweep-1.png │ │ ├── compCytof-1.png │ │ ├── compImage-1.png │ │ ├── compImage-2.png │ │ ├── compImage-3.png │ │ ├── compImage-4.png │ │ ├── marker-distributions-1.png │ │ ├── plotSpotHeatmap-1.png │ │ ├── segmentation-quality-1.png │ │ ├── spatial-community-1.png │ │ ├── spatial-context-1.png │ │ ├── umap-1.png │ │ ├── unnamed-chunk-23-1.png │ │ ├── unnamed-chunk-48-1.png │ │ ├── unnamed-chunk-63-1.png │ │ ├── unnamed-chunk-67-1.png │ │ ├── unnamed-chunk-71-1.png │ │ └── unnamed-chunk-75-1.png │ └── figure-markdown_strict │ │ ├── batch-correction-1.png │ │ ├── cell-density-1.png │ │ ├── cell-size-1.png │ │ ├── celltype-cluster-UMAP-1.png │ │ ├── celltype-cluster-heatmap-1.png │ │ ├── celltype-cluster-heatmap-2.png │ │ ├── cellular-neighbourhood-1.png │ │ ├── cluster-sweep-1.png │ │ ├── compCytof-1.png │ │ ├── compImage-1.png │ │ ├── compImage-2.png │ │ ├── compImage-3.png │ │ ├── compImage-4.png │ │ ├── marker-distributions-1.png │ │ ├── plotSpotHeatmap-1.png │ │ ├── segmentation-quality-1.png │ │ ├── spatial-community-1.png │ │ ├── spatial-context-1.png │ │ ├── umap-1.png │ │ ├── unnamed-chunk-22-1.png │ │ ├── unnamed-chunk-47-1.png │ │ ├── unnamed-chunk-62-1.png │ │ ├── unnamed-chunk-66-1.png │ │ ├── unnamed-chunk-70-1.png │ │ └── unnamed-chunk-74-1.png ├── publication.Rproj ├── steinbock.sh └── steinbock_timing.txt ├── scripts └── transfer_labels.R └── style.css /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^\.github$ 2 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: [ main ] 4 | pull_request: 5 | branches: [ main ] 6 | schedule: 7 | - cron: '0 8 1 * *' 8 | 9 | name: build 10 | 11 | jobs: 12 | docker-build-push: 13 | name: Build and push Docker image to GitHub Container registry 14 | runs-on: ubuntu-latest 15 | permissions: 16 | packages: write 17 | contents: read 18 | 19 | steps: 20 | - name: Checkout the repository 21 | uses: actions/checkout@v3 22 | 23 | - name: set NOW 24 | id: now 25 | run: | 26 | echo "::set-output name=NOW::$(date +'%Y-%m-%d')" 27 | 28 | - name: Login to GitHub Container registry 29 | uses: docker/login-action@v1 30 | env: 31 | GITHUB_USER: ${{ github.actor }} 32 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 33 | with: 34 | registry: ghcr.io 35 | username: $GITHUB_USER 36 | password: ${{ secrets.GITHUB_TOKEN }} 37 | 38 | - name: Check Dockerfile 39 | uses: dorny/paths-filter@v2 40 | id: filter 41 | with: 42 | filters: | 43 | docker: 44 | - 'Dockerfile' 45 | 46 | - name: Build and Push Docker Image 47 | if: steps.filter.outputs.docker == 'true' || github.event_name == 'schedule' 48 | uses: docker/build-push-action@v3 49 | with: 50 | context: . 51 | file: ./Dockerfile 52 | push: true 53 | tags: | 54 | ghcr.io/bodenmillergroup/imcdataanalysis:${{ steps.now.outputs.NOW }} 55 | ghcr.io/bodenmillergroup/imcdataanalysis:latest 56 | 57 | bookdown: 58 | runs-on: ubuntu-latest 59 | if: ${{ always() }} 60 | needs: docker-build-push 61 | container: ghcr.io/bodenmillergroup/imcdataanalysis:latest 62 | env: 63 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 64 | 65 | steps: 66 | - name: Checkout 67 | uses: actions/checkout@v2 68 | 69 | - name: Render Book 70 | run: Rscript -e 'bookdown::render_book("index.Rmd", new_session = TRUE)' 71 | 72 | - name: Upload artifact 73 | uses: actions/upload-artifact@v2 74 | with: 75 | name: docs 76 | path: docs/ 77 | 78 | checkout-and-deploy: 79 | runs-on: ubuntu-latest 80 | needs: bookdown 81 | steps: 82 | - name: Checkout 83 | uses: actions/checkout@master 84 | - name: Download artifact 85 | uses: actions/download-artifact@v2 86 | with: 87 | name: docs # optional 88 | path: docs # optional 89 | - name: Deploy to GitHub Pages 90 | uses: peaceiris/actions-gh-pages@v3 91 | with: 92 | github_token: ${{ secrets.GITHUB_TOKEN }} 93 | publish_dir: docs/ 94 | force_orphan: true 95 | 96 | 97 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | *.DS_Store 6 | data/* 7 | docs/ 8 | IMCDataAnalysis.Rmd 9 | IMCDataAnalysis_files/ 10 | IMCDataAnalysis.rds 11 | _bookdown_files/ 12 | *~* 13 | *.md 14 | *.rds 15 | outputs/* 16 | 17 | !README.md 18 | !publication/README.md 19 | !publication/protocol.md 20 | !CHANGELOG.md 21 | !DEVELOPMENT.md 22 | -------------------------------------------------------------------------------- /01-intro.Rmd: -------------------------------------------------------------------------------- 1 | # Introduction {#intro} 2 | 3 | Highly multiplexed imaging (HMI) enables the simultaneous detection of dozens of 4 | biological molecules (e.g., proteins, transcripts; also referred to as 5 | “markers”) in tissues. Recently established multiplexed tissue imaging 6 | technologies rely on cyclic staining with fluorescently-tagged antibodies 7 | [@Lin2018; @Gut2018], or the use of oligonucleotide-tagged [@Goltsev2018; 8 | @Saka2019] or metal-tagged [@Giesen2014; @Angelo2014] antibodies, among others. 9 | The key strength of these technologies is that they allow in-depth analysis of 10 | single cells within their spatial tissue context. As a result, these methods 11 | have enabled analysis of the spatial architecture of the tumor microenvironment 12 | [@Lin2018; @Jackson2020; @Ali2020; @Schurch2020], determination of nucleic acid 13 | and protein abundances for assessment of spatial co-localization of cell types 14 | and chemokines [@Hoch2022] and spatial niches of virus infected cells [@Jiang2022], 15 | and characterization of pathological features during COVID-19 infection 16 | [@Rendeiro2021; @Mitamura2021], Type 1 diabetes progression [@Damond2019] and 17 | autoimmune disease [@Ferrian2021]. 18 | 19 | Imaging mass cytometry (IMC) utilizes metal-tagged antibodies to detect over 40 20 | proteins and other metal-tagged molecules in biological samples. IMC can be used 21 | to perform highly multiplexed imaging and is particularly suited to profiling 22 | selected areas of tissues across many samples. 23 | 24 | ![IMC_workflow](img/IMC_workflow.png) 25 | *Overview of imaging mass cytometry data acquisition. Taken from [@Giesen2014]* 26 | 27 | IMC has first been published in 2014 [@Giesen2014] and has been commercialized by 28 | Standard BioToolsTM to be distributed as the Hyperion Imaging 29 | SystemTM (documentation is available 30 | [here](https://www.fluidigm.com/products-services/instruments/hyperion)). 31 | Similar to other HMI technologies such as MIBI [@Angelo2014], CyCIF [@Lin2018], 32 | 4i [@Gut2018], CODEX [@Goltsev2018] and SABER [@Saka2019], IMC captures the spatial 33 | expression of multiple proteins in parallel. With a nominal 1 μm resolution, 34 | IMC is able to detect cytoplasmic and nuclear localization of proteins. The 35 | current ablation frequency of IMC is 200Hz, meaning that a 1 mm$^2$ area 36 | can be imaged within about 2 hours. 37 | 38 | ## Technical details of IMC 39 | 40 | Technical aspects of how data acquisition works can be found in the original 41 | publication [@Giesen2014]. Briefly, antibodies to detect targets in biological 42 | material are labeled with heavy metals (e.g., lanthanides) that do not occur in 43 | biological systems and thus can be used upon binding to their target as a 44 | readout similar to fluorophores in fluorescence microscopy. Thin sections of the 45 | biological sample on a glass slide are stained with an antibody cocktail. 46 | Stained microscopy slides are mounted on a precise motor-driven stage inside the 47 | ablation chamber of the IMC instrument. A high-energy UV laser is focused on the 48 | tissue, and each individual laser shot ablates tissue from an area of roughly 1 49 | μm$^2$. The energy of the laser is absorbed by the tissue resulting 50 | in vaporization followed by condensation of the ablated material. The ablated 51 | material from each laser shot is transported in the gas phase into the plasma of 52 | the mass cytometer, where first atomization of the particles and then ionization 53 | of the atoms occurs. The ion cloud is then transferred into a vacuum, and all 54 | ions below a mass of 80 m/z are filtered using a quadrupole mass filter. The 55 | remaining ions (mostly those used to tag antibodies) are analyzed in a 56 | time-of-flight mass spectrometer to ultimately obtain an accumulated mass 57 | spectrum from all ions that correspond to a single laser shot. One can regard 58 | this spectrum as the information underlying a 1 μm$^2$ pixel. With 59 | repetitive laser shots (e.g., at 200 Hz) and a simultaneous lateral sample 60 | movement, a tissue can be ablated pixel by pixel. Ultimately an image is 61 | reconstructed from each pixel mass spectrum. 62 | 63 | In principle, IMC can be applied to the same type of samples as conventional 64 | fluorescence microscopy. The largest distinction from fluorescence microscopy is 65 | that for IMC, primary-labeled antibodies are commonly used, whereas in 66 | fluorescence microscopy secondary antibodies carrying fluorophores are widely 67 | applied. Additionally, for IMC, samples are dried before acquisition and can be 68 | stored for years. Formalin-fixed and paraffin-embedded (FFPE) samples are widely 69 | used for IMC. The FFPE blocks are cut to 2-5 μm thick sections and are 70 | stained, dried, and analyzed with IMC. 71 | 72 | ### Metal-conjugated antobodies and staining 73 | 74 | Metal-labeled antibodies are used to stain molecules in tissues enabling to 75 | delineate tissue structures, cells, and subcellular structures. Metal-conjugated 76 | antibodies can either be purchased directly from Standard BioToolsTM ([MaxPar IMC Antibodies](https://store.fluidigm.com/Cytometry/ConsumablesandReagentsCytometry/MaxparAntibodies?cclcl=en_US)), 77 | or antibodies can be purchased and labeled individually ([MaxPar Antibody 78 | Labeling](https://store.fluidigm.com/Cytometry/ConsumablesandReagentsCytometry/MaxparAntibodyLabelingKits?cclcl=en_US)). 79 | Antibody labeling using the MaxPar kits is performed via TCEP antibody reduction 80 | followed by crosslinking with sulfhydryl-reactive maleimide-bearing metal 81 | polymers. For each antibody it is essential to validate its functionality, 82 | specificity and optimize its usage to provide optimal signal to noise. To 83 | facilitate antibody handling, a database is highly useful. 84 | [Airlab](https://github.com/BodenmillerGroup/airlab-web) is such a platform; it 85 | allows antibody lot tracking, validation data uploads, and panel generation for 86 | subsequent upload to the IMC acquisition software from Standard BioToolsTM 87 | 88 | Depending on the sample type, different staining protocols can be used. 89 | Generally, once antibodies of choice have been conjugated to a metal tag, 90 | titration experiments are performed to identify the optimal staining 91 | concentration. For FFPE samples, different staining protocols have been 92 | described, and different antibodies show variable staining with different 93 | protocols. Protocols such as the one provided by Standard BioToolsTM or the one describe by 94 | [@Ijsselsteijn2019] are recommended. Briefly, for FFPE tissues, a dewaxing 95 | step is performed to remove the paraffin used to embed the material, followed by 96 | a graded re-hydration of the samples. Thereafter, heat-induced epitope retrieval 97 | (HIER), a step aiming at the reversal of formalin-based fixation, is used to 98 | unmask epitopes within tissues and make them accessible to antibodies. Epitope 99 | unmasking is generally performed in either basic, EDTA-based buffers (pH 9.2) or 100 | acidic, citrate-based buffers (pH 6). Next, a buffer containing bovine serum 101 | albumin (BSA) is used to block non-specific binding. This buffer is also used to 102 | dilute antibody stocks for the actual antibody staining. Staining time and 103 | temperature may vary and optimization must be performed to ensure that each 104 | single antibody performs well. However, overnight staining at 4°C or 3-5 105 | hours at room temperature seem to be suitable in many cases. 106 | 107 | Following antibody incubation, unbound antibodies are washed away and a 108 | counterstain comparable to DAPI is applied to enable the identification of 109 | nuclei. The [Iridium intercalator](https://store.fluidigm.com/Cytometry/ConsumablesandReagentsCytometry/MassCytometryReagents/Cell-ID%E2%84%A2%20Intercalator-Ir%E2%80%94125%20%C2%B5M) 110 | from Standard BioToolsTM is a reagent of choice and applied in a brief 5 minute staining. 111 | Finally, the samples are washed again and then dried under an airflow. Once 112 | dried, the samples are ready for analysis using IMC and are 113 | usually stable for a long period of time (at least one year). 114 | 115 | ### Data acquisition 116 | 117 | Data is acquired using the CyTOF software from Standard BioToolsTM (see manuals 118 | [here](https://go.fluidigm.com/hyperion-support-documents)). 119 | 120 | The regions of interest are selected by providing coordinates for ablation. To 121 | determine the region to be imaged, so called "panoramas" can be generated. These 122 | are stitched images of single fields of views of about 200 μm in diameter. 123 | Panoramas provide an optical overview of the tissue with a resolution similar to 124 | 10x in microscopy and are intended to help with the selection of regions of 125 | interest for ablation. The tissue should be centered on the glass side, since 126 | the imaging mass cytometer cannot access roughly 5 mm from each of the slide 127 | edges. Currently, the instruments can process one slide at a time and usually one MCD 128 | file per sample slide is generated. 129 | 130 | Many regions of interest can be defined on a single slide and acquisition 131 | parameters such as channels to acquire, acquisition speed (100 Hz or 200 Hz), 132 | ablation energy, and other parameters are user-defined. It is recommended that 133 | all isotope channels are recorded. This will result in larger raw data files but valuable information such as 134 | potential contamination of the argon gas (e.g., Xenon) or of the samples (e.g., 135 | lead, barium) is stored. 136 | 137 | To process a large number of slides or to select regions on whole-slide samples, 138 | panoramas may not provide sufficient information. If this is the case, 139 | multi-color immunofluorescence of the same slide prior to staining with 140 | metal-labeled antibodies may be performed. To allow for region selection based 141 | on immunofluorescence images and to align those images with a panorama of the 142 | same or consecutive sections of the sample, we developed 143 | [napping](https://github.com/BodenmillerGroup/napping). 144 | 145 | Acquisition time is directly proportional to the total size of ablation, and run 146 | times for samples of large area or for large sample numbers can roughly be calculated by 147 | dividing the ablation area in square micrometer by the ablation speed (e.g., 148 | 200Hz). In addition to the proprietary MCD file format, TXT files can also 149 | be generated for each region of interest. This is recommended as a back-up 150 | option in case of errors that may corrupt MCD files but not TXT files. 151 | 152 | ## IMC data format {#data-format} 153 | 154 | Upon completion of the acquisition an MCD file of variable size is generated. A 155 | single MCD file can hold raw acquisition data for multiple regions of interest, 156 | optical images providing a slide level overview of the sample ("panoramas"), and 157 | detailed metadata about the experiment. Additionally, for each acquisition a 158 | TXT file is generated which holds the same pixel information as the matched 159 | acquisition in the MCD file. 160 | 161 | The Hyperion Imaging SystemTM produces files in the following folder structure: 162 | 163 | ``` 164 | . 165 | +-- {XYZ}_ROI_001_1.txt 166 | +-- {XYZ}_ROI_002_2.txt 167 | +-- {XYZ}_ROI_003_3.txt 168 | +-- {XYZ}.mcd 169 | ``` 170 | 171 | Here, `{XYZ}` defines the filename, `ROI_001`, `ROI_002`, `ROI_003` are 172 | user-defined names (descriptions) for the selected regions of interest (ROI), 173 | and `1`, `2`, `3` indicate the unique acquisition identifiers. The ROI 174 | description entry can be specified in the Standard BioTools software when 175 | selecting ROIs. The MCD file contains the raw imaging data and the full metadata 176 | of all acquired ROIs, while each TXT file contains data of a single ROI without 177 | metadata. To follow a consistent naming scheme and to bundle all metadata, we 178 | recommend to zip the folder. Each ZIP file should only contain data from a 179 | single MCD file, and the name of the ZIP file should match the name of the MCD 180 | file. 181 | 182 | We refer to this data as raw data and the further 183 | processing of this data is described in Section \@ref(processing). 184 | 185 | 186 | -------------------------------------------------------------------------------- /02-processing.Rmd: -------------------------------------------------------------------------------- 1 | # Multi-channel image processing {#processing} 2 | 3 | This book focuses on common analysis steps of spatially-resolved single-cell data 4 | **after** image segmentation and feature extraction. In this chapter, the sections 5 | describe the processing of multiplexed imaging data, including file type 6 | conversion, image segmentation, feature extraction and data export. To obtain 7 | more detailed information on the individual image processing approaches, please 8 | visit their repositories: 9 | 10 | [steinbock](https://github.com/BodenmillerGroup/steinbock): The `steinbock` 11 | toolkit offers tools for multi-channel image processing using the command-line 12 | or Python code [@Windhager2021]. Supported tasks include IMC data pre-processing, 13 | multi-channel image segmentation, object quantification and data 14 | export to a variety of file formats. It supports functionality similar to those 15 | of the IMC Segmentation Pipeline (see below) and further allows deep-learning enabled image 16 | segmentation. The toolkit is available as platform-independent Docker 17 | container, ensuring reproducibility and user-friendly installation. Read more in 18 | the [Docs](https://bodenmillergroup.github.io/steinbock/latest/). 19 | 20 | [IMC Segmentation 21 | Pipeline](https://github.com/BodenmillerGroup/ImcSegmentationPipeline): The IMC 22 | segmentation pipeline offers a rather manual way of segmenting multi-channel 23 | images using a pixel classification-based approach. We continue to maintain the 24 | pipeline but recommend the use of the `steinbock` toolkit for multi-channel 25 | image processing. Raw IMC data pre-processing is performed using the 26 | [readimc](https://github.com/BodenmillerGroup/readimc) Python package to convert 27 | raw MCD files into OME-TIFF and TIFF files. After image cropping, an 28 | [Ilastik](https://www.ilastik.org/) pixel classifier is trained for image 29 | classification prior to image segmentation using 30 | [CellProfiler](https://cellprofiler.org/). Features (i.e., mean pixel intensity) 31 | of segmented objects (i.e., cells) are quantified and exported. Read more in the 32 | [Docs](https://bodenmillergroup.github.io/ImcSegmentationPipeline/). 33 | 34 | ## Image pre-processing (IMC specific) 35 | 36 | Image pre-processing is technology dependent. While most multiplexed imaging 37 | technologies generated TIFF or OME-TIFF files which can be directly segmented 38 | using the `steinbock` toolkit, IMC produces data in the proprietary 39 | data format MCD. 40 | 41 | To facilitate IMC data pre-processing, the 42 | [readimc](https://github.com/BodenmillerGroup/readimc) open-source Python 43 | package allows extracting the multi-modal (IMC acquisitions, panoramas), 44 | multi-region, multi-channel information contained in raw IMC images. Both the 45 | IMC Segmentation Pipeline and the `steinbock` toolkit use the `readimc` 46 | package for IMC data pre-processing. Starting from IMC raw data and a "panel" 47 | file, individual acquisitions are extracted as TIFF files and OME-TIFF files if 48 | using the IMC Segmentation Pipeline. The panel contains information of 49 | antibodies used in the experiment and the user can specify which channels to 50 | keep for downstream analysis. When using the IMC Segmentation Pipeline, random 51 | tiles are cropped from images for convenience of pixel labelling. 52 | 53 | ## Image segmentation 54 | 55 | The IMC Segmentation Pipeline supports pixel classification-based image 56 | segmentation while `steinbock` supports pixel classification-based and deep 57 | learning-based segmentation. 58 | 59 | **Pixel classification-based** image segmentation is performed by training a 60 | random forest classifier using [Ilastik](https://www.ilastik.org/) on the 61 | randomly extracted image crops and selected image channels. Pixels are 62 | classified as nuclear, cytoplasmic, or background. Employing a customizable 63 | [CellProfiler](https://cellprofiler.org/) pipeline, the probabilities are then 64 | thresholded for segmenting nuclei, and nuclei are expanded into cytoplasmic 65 | regions to obtain cell masks. 66 | 67 | **Deep learning-based** image segmentation is performed as presented by 68 | [@Greenwald2021]. Briefly, `steinbock` first aggregates user-defined 69 | image channels to generate two-channel images representing nuclear and 70 | cytoplasmic signals. Next, the 71 | [DeepCell](https://github.com/vanvalenlab/intro-to-deepcell) Python package is 72 | used to run `Mesmer`, a deep learning-enabled segmentation algorithm pre-trained 73 | on `TissueNet`, to automatically obtain cell masks without any further user 74 | input. 75 | 76 | Segmentation masks are single-channel images that match the input images in 77 | size, with non-zero grayscale values indicating the IDs of segmented objects 78 | (e.g., cells). These masks are written out as TIFF files after segmentation. 79 | 80 | ## Feature extraction {#feature-extraction} 81 | 82 | Using the segmentation masks together with their corresponding multi-channel 83 | images, the IMC Segmentation Pipeline as well as the `steinbock` toolkit extract 84 | object-specific features. These include the mean pixel intensity per object and 85 | channel, morphological features (e.g., object area) and the objects' locations. 86 | Object-specific features are written out as CSV files where rows represent 87 | individual objects and columns represent features. 88 | 89 | Furthermore, the IMC Segmentation Pipeline and the `steinbock` toolkit compute 90 | _spatial object graphs_, in which nodes correspond to objects, and nodes in 91 | spatial proximity are connected by an edge. These graphs serve as a proxy for 92 | interactions between neighboring cells. They are stored as edge list in form of 93 | one CSV file per image. 94 | 95 | Both approaches also write out image-specific metadata (e.g., width and height) 96 | as a CSV file. 97 | 98 | ## Data export 99 | 100 | To further facilitate compatibility with downstream analysis, `steinbock` 101 | exports data to a variety of file formats such as OME-TIFF for images, FCS for 102 | single-cell data, the _anndata_ format [@Virshup2021] for data analysis in Python, 103 | and various graph file formats for network analysis using software such as 104 | [CytoScape](https://cytoscape.org/) [@Shannon2003]. For export to OME-TIFF, 105 | steinbock uses [xtiff](https://github.com/BodenmillerGroup/xtiff), a Python 106 | package developed for writing multi-channel TIFF stacks. 107 | 108 | ## Data import into R 109 | 110 | In Section \@ref(read-data), we will highlight the use of the 111 | [imcRtools](https://github.com/BodenmillerGroup/imcRtools) and 112 | [cytomapper](https://github.com/BodenmillerGroup/cytomapper) R/Bioconductor 113 | packages to read spatially-resolved, single-cell and images as generated by the 114 | IMC Segmentation Pipeline and the `steinbock` toolkit into the statistical 115 | programming language R. All further downstream analyses are performed in R and 116 | detailed in the following sections. 117 | 118 | 119 | 120 | 121 | 122 | 123 | -------------------------------------------------------------------------------- /03-prerequisites.Rmd: -------------------------------------------------------------------------------- 1 | # Prerequisites {#prerequisites} 2 | 3 | The analysis presented in this book requires a basic understanding of the 4 | `R` programing language. An introduction to `R` can be found [here](https://cran.r-project.org/doc/manuals/r-release/R-intro.pdf) and 5 | in the book [R for Data Science](https://r4ds.hadley.nz/). 6 | 7 | Furthermore, it is beneficial to be familiar with single-cell data analysis 8 | using the [Bioconductor](https://www.bioconductor.org/) framework. The 9 | [Orchestrating Single-Cell Analysis with Bioconductor](https://bioconductor.org/books/release/OSCA/) book 10 | gives an excellent overview on data containers and basic analysis that are being 11 | used here. 12 | 13 | An overview on IMC as technology and necessary image processing steps can be 14 | found on the [IMC workflow website](https://bodenmillergroup.github.io/IMCWorkflow/). 15 | 16 | Before we get started on IMC data analysis, we will need to make sure that 17 | software dependencies are installed and the example data is downloaded. 18 | 19 | ## Obtain the code 20 | 21 | This book provides R code to perform single-cell and spatial data analysis. 22 | You can copy the individual code chunks into your R scripts or you can obtain 23 | the full code of the book via: 24 | 25 | ``` 26 | git clone https://github.com/BodenmillerGroup/IMCDataAnalysis.git 27 | ``` 28 | 29 | ## Software requirements 30 | 31 | The R packages needed to execute the presented workflow can either be manually 32 | installed (see section \@ref(manual-install)) or are available within a provided 33 | Docker container (see section \@ref(docker)). The Docker option is useful if you 34 | want to exactly reproduce the presented analysis across operating systems; 35 | however, the manual install gives you more flexibility for exploratory data 36 | analysis. 37 | 38 | ### Using Docker {#docker} 39 | 40 | For reproducibility purposes, we provide a Docker container [here](https://github.com/BodenmillerGroup/IMCDataAnalysis/pkgs/container/imcdataanalysis). 41 | 42 | 1. After installing [Docker](https://docs.docker.com/get-docker/) you can first pull the container via: 43 | 44 | ``` 45 | docker pull ghcr.io/bodenmillergroup/imcdataanalysis:latest 46 | ``` 47 | 48 | and then run the container: 49 | 50 | ``` 51 | docker run -v /path/to/IMCDataAnalysis:/home/rstudio/IMCDataAnalysis \ 52 | -e PASSWORD=bioc -p 8787:8787 \ 53 | ghcr.io/bodenmillergroup/imcdataanalysis:latest 54 | ``` 55 | 56 | Here, the `/path/to/` needs to be adjusted to where you keep the code and data 57 | of the book. 58 | 59 | **Of note: it is recommended to use a date-tagged version of the container to ensure reproducibility**. 60 | This can be done via: 61 | 62 | ``` 63 | docker pull ghcr.io/bodenmillergroup/imcdataanalysis: 64 | ``` 65 | 66 | 2. An RStudio server session can be accessed via a browser at `localhost:8787` using `Username: rstudio` and `Password: bioc`. 67 | 3. Navigate to `IMCDataAnalysis` and open the `IMCDataAnalysis.Rproj` file. 68 | 4. Code in the individual files can now be executed or the whole workflow can be build by entering `bookdown::render_book()`. 69 | 70 | ### Manual installation {#manual-install} 71 | 72 | The following section describes how to manually install all needed R packages 73 | when not using the provided Docker container. 74 | To install all R packages needed for the analysis, please run: 75 | 76 | ```{r install-packages, eval=FALSE} 77 | if (!requireNamespace("BiocManager", quietly = TRUE)) 78 | install.packages("BiocManager") 79 | 80 | BiocManager::install(c("rmarkdown", "bookdown", "pheatmap", "viridis", "zoo", 81 | "devtools", "testthat", "tiff", "distill", "ggrepel", 82 | "patchwork", "mclust", "RColorBrewer", "uwot", "Rtsne", 83 | "harmony", "Seurat", "SeuratObject", "cowplot", "kohonen", 84 | "caret", "randomForest", "ggridges", "cowplot", 85 | "gridGraphics", "scales", "tiff", "harmony", "Matrix", 86 | "CATALYST", "scuttle", "scater", "dittoSeq", 87 | "tidyverse", "BiocStyle", "batchelor", "bluster", "scran", 88 | "lisaClust", "spicyR", "iSEE", "imcRtools", "cytomapper", 89 | "imcdatasets", "cytoviewer")) 90 | 91 | # Github dependencies 92 | devtools::install_github("i-cyto/Rphenograph") 93 | ``` 94 | 95 | ```{r load-libraries, echo = FALSE, message = FALSE} 96 | options(timeout=10000) 97 | library(CATALYST) 98 | library(SpatialExperiment) 99 | library(SingleCellExperiment) 100 | library(scuttle) 101 | library(scater) 102 | library(imcRtools) 103 | library(cytomapper) 104 | library(dittoSeq) 105 | library(tidyverse) 106 | library(bluster) 107 | library(scran) 108 | library(lisaClust) 109 | library(caret) 110 | library(cytoviewer) 111 | ``` 112 | 113 | ### Major package versions 114 | 115 | Throughout the analysis, we rely on different R software packages. 116 | This section lists the most commonly used packages in this workflow. 117 | 118 | Data containers: 119 | 120 | * [SpatialExperiment](https://bioconductor.org/packages/release/bioc/html/SpatialExperiment.html) version `r packageVersion("SpatialExperiment")` 121 | * [SingleCellExperiment](https://bioconductor.org/packages/release/bioc/html/SingleCellExperiment.html) version `r packageVersion("SingleCellExperiment")` 122 | 123 | Data analysis: 124 | 125 | * [CATALYST](https://bioconductor.org/packages/release/bioc/html/CATALYST.html) version `r packageVersion("CATALYST")` 126 | * [imcRtools](https://bioconductor.org/packages/release/bioc/html/imcRtools.html) version `r packageVersion("imcRtools")` 127 | * [scuttle](https://bioconductor.org/packages/release/bioc/html/scuttle.html) version `r packageVersion("scuttle")` 128 | * [scater](https://bioconductor.org/packages/release/bioc/html/scater.html) version `r packageVersion("scater")` 129 | * [batchelor](https://www.bioconductor.org/packages/release/bioc/html/batchelor.html) version `r packageVersion("batchelor")` 130 | * [bluster](https://www.bioconductor.org/packages/release/bioc/html/bluster.html) version `r packageVersion("bluster")` 131 | * [scran](https://www.bioconductor.org/packages/release/bioc/html/scran.html) version `r packageVersion("scran")` 132 | * [harmony](https://github.com/immunogenomics/harmony) version `r packageVersion("harmony")` 133 | * [Seurat](https://satijalab.org/seurat/index.html) version `r packageVersion("Seurat")` 134 | * [lisaClust](https://www.bioconductor.org/packages/release/bioc/html/lisaClust.html) version `r packageVersion("lisaClust")` 135 | * [caret](https://topepo.github.io/caret/) version `r packageVersion("caret")` 136 | 137 | Data visualization: 138 | 139 | * [cytomapper](https://bioconductor.org/packages/release/bioc/html/cytomapper.html) version `r packageVersion("cytomapper")` 140 | * [cytoviewer](https://bioconductor.org/packages/release/bioc/html/cytoviewer.html) version `r packageVersion("cytoviewer")` 141 | * [dittoSeq](https://bioconductor.org/packages/release/bioc/html/dittoSeq.html) version `r packageVersion("dittoSeq")` 142 | 143 | Tidy R: 144 | 145 | * [tidyverse](https://www.tidyverse.org/) version `r packageVersion("tidyverse")` 146 | 147 | ## Image processing {#image-processing} 148 | 149 | The analysis presented here fully relies on packages written in the programming 150 | language `R` and primarily focuses on analysis approaches downstream of image 151 | processing. The example data available at 152 | [https://zenodo.org/record/7575859](https://zenodo.org/record/7575859) were 153 | processed (file type conversion, image segmentation, feature extraction as 154 | explained in Section \@ref(processing)) using the 155 | [steinbock](https://bodenmillergroup.github.io/steinbock/latest/) toolkit. The 156 | exact command line interface calls to process the raw data are shown below: 157 | 158 | ```{r, echo = FALSE, message = FALSE} 159 | if (!dir.exists("data/steinbock")) dir.create("data/steinbock") 160 | if (!dir.exists("data/ImcSegmentationPipeline")) dir.create("data/ImcSegmentationPipeline") 161 | # Pre-download steinbock file 162 | download.file("https://zenodo.org/record/7624451/files/steinbock.sh", 163 | "data/steinbock/steinbock.sh") 164 | ``` 165 | 166 | ```{bash, file="data/steinbock/steinbock.sh", eval=FALSE} 167 | 168 | ``` 169 | 170 | ## Download example data {#download-data} 171 | 172 | Throughout this tutorial, we will access a number of different data types. 173 | To declutter the analysis scripts, we will already download all needed data here. 174 | 175 | To highlight the basic steps of IMC data analysis, we provide example data that 176 | were acquired as part of the **I**ntegrated i**MMU**noprofiling of large adaptive 177 | **CAN**cer patient cohorts projects ([immucan.eu](https://immucan.eu/)). The 178 | raw data of 4 patients can be accessed online at 179 | [zenodo.org/record/7575859](https://zenodo.org/record/7575859). We will only 180 | download the sample/patient metadata information here: 181 | 182 | ```{r download-sample-data} 183 | download.file("https://zenodo.org/record/7575859/files/sample_metadata.csv", 184 | destfile = "data/sample_metadata.csv") 185 | ``` 186 | 187 | ### Processed multiplexed imaging data 188 | 189 | The IMC raw data was either processed using the 190 | [steinbock](https://github.com/BodenmillerGroup/steinbock) toolkit or the 191 | [IMC Segmentation Pipeline](https://github.com/BodenmillerGroup/ImcSegmentationPipeline). 192 | Image processing included file type conversion, cell segmentation and feature 193 | extraction. 194 | 195 | **steinbock output** 196 | 197 | This book uses the output of the `steinbock` framework when applied to process 198 | the example data. The processed data includes the single-cell mean intensity 199 | files, the single-cell morphological features and spatial locations, spatial 200 | object graphs in form of edge lists indicating cells in close proximity, hot 201 | pixel filtered multi-channel images, segmentation masks, image metadata and 202 | channel metadata. All these files will be downloaded here for later use. The 203 | commands which were used to generate this data can be found in the shell script 204 | above. 205 | 206 | ```{r steinbock-results} 207 | # download intensities 208 | url <- "https://zenodo.org/record/7624451/files/intensities.zip" 209 | destfile <- "data/steinbock/intensities.zip" 210 | download.file(url, destfile) 211 | unzip(destfile, exdir="data/steinbock", overwrite=TRUE) 212 | unlink(destfile) 213 | 214 | # download regionprops 215 | url <- "https://zenodo.org/record/7624451/files/regionprops.zip" 216 | destfile <- "data/steinbock/regionprops.zip" 217 | download.file(url, destfile) 218 | unzip(destfile, exdir="data/steinbock", overwrite=TRUE) 219 | unlink(destfile) 220 | 221 | # download neighbors 222 | url <- "https://zenodo.org/record/7624451/files/neighbors.zip" 223 | destfile <- "data/steinbock/neighbors.zip" 224 | download.file(url, destfile) 225 | unzip(destfile, exdir="data/steinbock", overwrite=TRUE) 226 | unlink(destfile) 227 | 228 | # download images 229 | url <- "https://zenodo.org/record/7624451/files/img.zip" 230 | destfile <- "data/steinbock/img.zip" 231 | download.file(url, destfile) 232 | unzip(destfile, exdir="data/steinbock", overwrite=TRUE) 233 | unlink(destfile) 234 | 235 | # download masks 236 | url <- "https://zenodo.org/record/7624451/files/masks_deepcell.zip" 237 | destfile <- "data/steinbock/masks_deepcell.zip" 238 | download.file(url, destfile) 239 | unzip(destfile, exdir="data/steinbock", overwrite=TRUE) 240 | unlink(destfile) 241 | 242 | # download individual files 243 | download.file("https://zenodo.org/record/7624451/files/panel.csv", 244 | "data/steinbock/panel.csv") 245 | download.file("https://zenodo.org/record/7624451/files/images.csv", 246 | "data/steinbock/images.csv") 247 | download.file("https://zenodo.org/record/7624451/files/steinbock.sh", 248 | "data/steinbock/steinbock.sh") 249 | ``` 250 | 251 | **IMC Segmentation Pipeline output** 252 | 253 | The example data was also processed using the 254 | [IMC Segmetation Pipeline](https://github.com/BodenmillerGroup/ImcSegmentationPipeline) (version 3). 255 | To highlight the use of the reader function for this type of output, we will need 256 | to download the `cpout` folder which is part of the `analysis` folder. The `cpout` 257 | folder stores all relevant output files of the pipeline. For a full description 258 | of the pipeline, please refer to the [docs](https://bodenmillergroup.github.io/ImcSegmentationPipeline/). 259 | 260 | ```{r imcsegpipe-results} 261 | # download analysis folder 262 | url <- "https://zenodo.org/record/7997296/files/analysis.zip" 263 | destfile <- "data/ImcSegmentationPipeline/analysis.zip" 264 | download.file(url, destfile) 265 | unzip(destfile, exdir="data/ImcSegmentationPipeline", overwrite=TRUE) 266 | unlink(destfile) 267 | 268 | unlink("data/ImcSegmentationPipeline/analysis/cpinp/", recursive=TRUE) 269 | unlink("data/ImcSegmentationPipeline/analysis/crops/", recursive=TRUE) 270 | unlink("data/ImcSegmentationPipeline/analysis/histocat/", recursive=TRUE) 271 | unlink("data/ImcSegmentationPipeline/analysis/ilastik/", recursive=TRUE) 272 | unlink("data/ImcSegmentationPipeline/analysis/ometiff/", recursive=TRUE) 273 | unlink("data/ImcSegmentationPipeline/analysis/cpout/images/", recursive=TRUE) 274 | unlink("data/ImcSegmentationPipeline/analysis/cpout/probabilities/", recursive=TRUE) 275 | unlink("data/ImcSegmentationPipeline/analysis/cpout/masks/", recursive=TRUE) 276 | ``` 277 | 278 | ### Files for spillover matrix estimation 279 | 280 | To highlight the estimation and correction of channel-spillover as described by 281 | [@Chevrier2017], we can access an example spillover-acquisition from: 282 | 283 | ```{r download-spillover-data} 284 | download.file("https://zenodo.org/record/7575859/files/compensation.zip", 285 | "data/compensation.zip") 286 | unzip("data/compensation.zip", exdir="data", overwrite=TRUE) 287 | unlink("data/compensation.zip") 288 | ``` 289 | 290 | ### Gated cells 291 | 292 | In Section \@ref(classification), we present a cell type classification approach 293 | that relies on previously gated cells. This ground truth data is available 294 | online at [zenodo.org/record/8095133](https://zenodo.org/record/8095133) and 295 | will be downloaded here for later use: 296 | 297 | ```{r download-gated-cells} 298 | download.file("https://zenodo.org/record/8095133/files/gated_cells.zip", 299 | "data/gated_cells.zip") 300 | unzip("data/gated_cells.zip", exdir="data", overwrite=TRUE) 301 | unlink("data/gated_cells.zip") 302 | ``` 303 | 304 | ## Software versions {#sessionInfo} 305 | 306 |
307 | SessionInfo 308 | 309 | ```{r, echo = FALSE, message = FALSE} 310 | sessionInfo() 311 | ``` 312 |
313 | 314 | 315 | 316 | -------------------------------------------------------------------------------- /06-quality_control.Rmd: -------------------------------------------------------------------------------- 1 | # Image and cell-level quality control 2 | 3 | The following section discusses possible quality indicators for data obtained 4 | by IMC and other highly multiplexed imaging technologies. Here, we will focus 5 | on describing quality metrics on the single-cell as well as image level. 6 | 7 | ## Read in the data 8 | 9 | We will first read in the data processed in previous sections: 10 | 11 | ```{r read-data, message=FALSE} 12 | images <- readRDS("data/images.rds") 13 | masks <- readRDS("data/masks.rds") 14 | spe <- readRDS("data/spe.rds") 15 | ``` 16 | 17 | ## Segmentation quality control {#seg-quality} 18 | 19 | The first step after image segmentation is to observe its accuracy. 20 | Without having ground-truth data readily available, a common approach to 21 | segmentation quality control is to overlay segmentation masks on composite images 22 | displaying channels that were used for segmentation. 23 | The [cytomapper](https://www.bioconductor.org/packages/release/bioc/html/cytomapper.html) 24 | package supports exactly this tasks by using the `plotPixels` function. 25 | 26 | Here, we select 3 random images and perform image- and channel-wise 27 | normalization (channels are first min-max normalized and scaled to a range of 28 | 0-1 before clipping the maximum intensity to 0.2). 29 | 30 | ```{r overlay-masks, message=FALSE} 31 | library(cytomapper) 32 | set.seed(20220118) 33 | img_ids <- sample(seq_along(images), 3) 34 | 35 | # Normalize and clip images 36 | cur_images <- images[img_ids] 37 | cur_images <- cytomapper::normalize(cur_images, separateImages = TRUE) 38 | cur_images <- cytomapper::normalize(cur_images, inputRange = c(0, 0.2)) 39 | 40 | plotPixels(cur_images, 41 | mask = masks[img_ids], 42 | img_id = "sample_id", 43 | missing_colour = "white", 44 | colour_by = c("CD163", "CD20", "CD3", "Ecad", "DNA1"), 45 | colour = list(CD163 = c("black", "yellow"), 46 | CD20 = c("black", "red"), 47 | CD3 = c("black", "green"), 48 | Ecad = c("black", "cyan"), 49 | DNA1 = c("black", "blue")), 50 | image_title = NULL, 51 | legend = list(colour_by.title.cex = 0.7, 52 | colour_by.labels.cex = 0.7)) 53 | ``` 54 | 55 | We can see that nuclei are centered within the segmentation masks and all cell 56 | types are correctly segmented (note: to zoom into the image you can right click 57 | and select `Open Image in New Tab`). A common challenge here is to segment large (e.g., 58 | epithelial cells - in cyan) _versus_ small (e.g., B cells - in red). However, the 59 | segmentation approach here appears to correctly segment cells across different 60 | sizes. 61 | 62 | An easier and interactive way of observing segmentation quality is to use the 63 | interactive image viewer provided by the 64 | [cytoviewer](https://github.com/BodenmillerGroup/cytoviewer) R/Bioconductor 65 | package [@Meyer2024]. Under "Image-level" > "Basic controls", up to six markers 66 | can be selected for visualization. The contrast of each marker can be adjusted. 67 | Under "Image-level" > "Advanced controls", click the "Show cell outlines" box 68 | to outline segmented cells on the images. 69 | 70 | ```{r, message = FALSE} 71 | library(cytoviewer) 72 | 73 | app <- cytoviewer(image = images, 74 | mask = masks, 75 | object = spe, 76 | cell_id = "ObjectNumber", 77 | img_id = "sample_id") 78 | 79 | if (interactive()) { 80 | shiny::runApp(app) 81 | } 82 | ``` 83 | 84 | An additional approach to observe cell segmentation quality and potentially also 85 | antibody specificity issues is to visualize single-cell expression in form of a 86 | heatmap. Here, we sub-sample the dataset to 2000 cells for visualization 87 | purposes and overlay the cancer type from which the cells were extracted. 88 | 89 | ```{r segmentation-heatmap, message=FALSE, fig.height=7} 90 | library(dittoSeq) 91 | library(viridis) 92 | cur_cells <- sample(seq_len(ncol(spe)), 2000) 93 | 94 | dittoHeatmap(spe[,cur_cells], 95 | genes = rownames(spe)[rowData(spe)$use_channel], 96 | assay = "exprs", 97 | cluster_cols = TRUE, 98 | scale = "none", 99 | heatmap.colors = viridis(100), 100 | annot.by = "indication", 101 | annotation_colors = list(indication = metadata(spe)$color_vectors$indication)) 102 | ``` 103 | 104 | We can differentiate between epithelial cells (Ecad+) and immune cells 105 | (CD45RO+). Some of the markers are detected in specific cells (e.g., Ki67, CD20, 106 | Ecad) while others are more broadly expressed across cells (e.g., HLADR, B2M, 107 | CD4). 108 | 109 | ## Image-level quality control {#image-quality} 110 | 111 | Image-level quality control is often performed using tools that offer a 112 | graphical user interface such as [QuPath](https://qupath.github.io/), 113 | [FIJI](https://imagej.net/software/fiji/) and the previously mentioned 114 | [cytoviewer](https://github.com/BodenmillerGroup/cytoviewer) package. Viewers 115 | that were specifically developed for IMC data can be seen 116 | [here](https://bodenmillergroup.github.io/IMCWorkflow/viewers.html). In this 117 | section, we will specifically focus on quantitative metrics to assess image 118 | quality. 119 | 120 | It is often of interest to calculate the signal-to-noise ratio (SNR) for 121 | individual channels and markers. Here, we define the SNR as: 122 | 123 | $$SNR = I_s/I_n$$ 124 | 125 | where $I_s$ is the intensity of the signal (mean intensity of pixels with true 126 | signal) and $I_n$ is the intensity of the noise (mean intensity of pixels 127 | containing noise). This definition of the SNR is just one of many and other 128 | measures can be applied. Finding a threshold that separates pixels containing 129 | signal and pixels containing noise is not trivial and different approaches can 130 | be chosen. Here, we use the `otsu` thresholding approach to find pixels of the 131 | "foreground" (i.e., signal) and "background" (i.e., noise). The SNR is then 132 | defined as the mean intensity of foreground pixels divided by the mean intensity 133 | of background pixels. We compute this measure as well as the mean signal 134 | intensity per image. The plot below shows the average SNR _versus_ the average 135 | signal intensity across all images. 136 | 137 | ```{r image-snr, message=FALSE, warning=FALSE} 138 | library(tidyverse) 139 | library(ggrepel) 140 | library(EBImage) 141 | 142 | cur_snr <- lapply(names(images), function(x){ 143 | img <- images[[x]] 144 | mat <- apply(img, 3, function(ch){ 145 | # Otsu threshold 146 | thres <- otsu(ch, range = c(min(ch), max(ch)), levels = 65536) 147 | # Signal-to-noise ratio 148 | snr <- mean(ch[ch > thres]) / mean(ch[ch <= thres]) 149 | # Signal intensity 150 | ps <- mean(ch[ch > thres]) 151 | 152 | return(c(snr = snr, ps = ps)) 153 | }) 154 | t(mat) %>% as.data.frame() %>% 155 | mutate(image = x, 156 | marker = colnames(mat)) %>% 157 | pivot_longer(cols = c(snr, ps)) 158 | }) 159 | 160 | cur_snr <- do.call(rbind, cur_snr) 161 | 162 | cur_snr %>% 163 | group_by(marker, name) %>% 164 | summarize(log_mean = log2(mean(value))) %>% 165 | pivot_wider(names_from = name, values_from = log_mean) %>% 166 | ggplot() + 167 | geom_point(aes(ps, snr)) + 168 | geom_label_repel(aes(ps, snr, label = marker)) + 169 | theme_minimal(base_size = 15) + ylab("Signal-to-noise ratio [log2]") + 170 | xlab("Signal intensity [log2]") 171 | ``` 172 | 173 | We observe PD1, LAG3 and cleaved PARP to have high SNR but low signal intensity 174 | meaning that in general these markers are not abundantly expressed. The Iridium 175 | intercalator (here marked as DNA1 and DNA2) has the highest signal intensity 176 | but low SNR. This might be due to staining differences between individual nuclei 177 | where some nuclei are considered as background. We do however observe high 178 | SNR and sufficient signal intensity for the majority of markers. 179 | 180 | Otsu thesholding and SNR calculation does not perform well if the markers are 181 | lowly abundant. In the next code chunk, we will remove markers that have 182 | a positive signal of below 2 per image. 183 | 184 | ```{r, snr-adjusted, message=FALSE, warning=FALSE} 185 | cur_snr <- cur_snr %>% 186 | pivot_wider(names_from = name, values_from = value) %>% 187 | filter(ps > 2) %>% 188 | pivot_longer(cols = c(snr, ps)) 189 | 190 | cur_snr %>% 191 | group_by(marker, name) %>% 192 | summarize(log_mean = log2(mean(value))) %>% 193 | pivot_wider(names_from = name, values_from = log_mean) %>% 194 | ggplot() + 195 | geom_point(aes(ps, snr)) + 196 | geom_label_repel(aes(ps, snr, label = marker)) + 197 | theme_minimal(base_size = 15) + ylab("Signal-to-noise ratio [log2]") + 198 | xlab("Signal intensity [log2]") 199 | ``` 200 | 201 | This visualization shows a reduces SNR for PD1, LAG3 and cleaved PARP which was 202 | previously inflated due to low signal. 203 | 204 | Another quality indicator is the image area covered by cells (or biological 205 | tissue). This metric identifies ROIs where little cells are present, possibly 206 | hinting at incorrect selection of the ROI. We can compute the percentage of 207 | covered image area using the metadata contained in the `SpatialExperiment` 208 | object: 209 | 210 | ```{r cell-density} 211 | cell_density <- colData(spe) %>% 212 | as.data.frame() %>% 213 | group_by(sample_id) %>% 214 | # Compute the number of pixels covered by cells and 215 | # the total number of pixels 216 | summarize(cell_area = sum(area), 217 | no_pixels = mean(width_px) * mean(height_px)) %>% 218 | # Divide the total number of pixels 219 | # by the number of pixels covered by cells 220 | mutate(covered_area = cell_area / no_pixels) 221 | 222 | # Visualize the image area covered by cells per image 223 | ggplot(cell_density) + 224 | geom_point(aes(reorder(sample_id,covered_area), covered_area)) + 225 | theme_minimal(base_size = 15) + 226 | theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 15)) + 227 | ylim(c(0, 1)) + 228 | ylab("% covered area") + xlab("") 229 | ``` 230 | 231 | We observe that two of the 14 images show unusually low cell coverage. These 232 | two images can now be visualized using `cytomapper`. 233 | 234 | ```{r low-density-images, message=FALSE} 235 | # Normalize and clip images 236 | cur_images <- images[c("Patient4_005", "Patient4_007")] 237 | cur_images <- cytomapper::normalize(cur_images, separateImages = TRUE) 238 | cur_images <- cytomapper::normalize(cur_images, inputRange = c(0, 0.2)) 239 | 240 | plotPixels(cur_images, 241 | mask = masks[c("Patient4_005", "Patient4_007")], 242 | img_id = "sample_id", 243 | missing_colour = "white", 244 | colour_by = c("CD163", "CD20", "CD3", "Ecad", "DNA1"), 245 | colour = list(CD163 = c("black", "yellow"), 246 | CD20 = c("black", "red"), 247 | CD3 = c("black", "green"), 248 | Ecad = c("black", "cyan"), 249 | DNA1 = c("black", "blue")), 250 | legend = list(colour_by.title.cex = 0.7, 251 | colour_by.labels.cex = 0.7)) 252 | ``` 253 | 254 | These two images display less dense tissue structure but overall the images are 255 | intact and appear to be segmented correctly. 256 | 257 | Finally, it can be beneficial to visualize the mean marker expression per image 258 | to identify images with outlying marker expression. This check does not 259 | indicate image quality _per se_ but can highlight biological differences. Here, 260 | we will use the `aggregateAcrossCells` function of the 261 | `r BiocStyle::Biocpkg("scuttle")` package to compute the mean expression per 262 | image. For visualization purposes, we again `asinh` transform the mean expression 263 | values. 264 | 265 | ```{r mean-expression-per-image, message=FALSE, fig.height=7} 266 | library(scuttle) 267 | 268 | image_mean <- aggregateAcrossCells(spe, 269 | ids = spe$sample_id, 270 | statistics="mean", 271 | use.assay.type = "counts") 272 | assay(image_mean, "exprs") <- asinh(counts(image_mean)) 273 | 274 | dittoHeatmap(image_mean, genes = rownames(spe)[rowData(spe)$use_channel], 275 | assay = "exprs", cluster_cols = TRUE, scale = "none", 276 | heatmap.colors = viridis(100), 277 | annot.by = c("indication", "patient_id", "ROI"), 278 | annotation_colors = list(indication = metadata(spe)$color_vectors$indication, 279 | patient_id = metadata(spe)$color_vectors$patient_id, 280 | ROI = metadata(spe)$color_vectors$ROI), 281 | show_colnames = TRUE) 282 | ``` 283 | 284 | We observe extensive biological variation across the 14 images specifically for 285 | some of the cell phenotype markers including the macrophage marker CD206, the B 286 | cell marker CD20, the neutrophil marker CD15, and the proliferation marker Ki67. 287 | These differences will be further studied in the following chapters. 288 | 289 | ## Cell-level quality control {#cell-quality} 290 | 291 | In the following paragraphs we will look at different metrics and visualization 292 | approaches to assess data quality (as well as biological differences) on the 293 | single-cell level. 294 | 295 | Related to the signal-to-noise ratio (SNR) calculated above on the pixel-level, 296 | a similar measure can be derived on the single-cell level. Here, we will use 297 | a two component Gaussian mixture model for each marker to find cells 298 | with positive and negative expression. The SNR is defined as: 299 | 300 | $$SNR = I_s/I_n$$ 301 | 302 | where $I_s$ is the intensity of the signal (mean intensity of cells with 303 | positive signal) and $I_n$ is the intensity of the noise (mean intensity of 304 | cells lacking expression). To define cells with positive and negative marker 305 | expression, we fit the mixture model across the transformed counts of all cells 306 | contained in the `SpatialExperiment` object. Next, for each marker we calculate 307 | the mean of the non-transformed counts for the positive and the negative cells. 308 | The SNR is then the ratio between the mean of the positive signal and the mean 309 | of the negative signal. 310 | 311 | ```{r cell-snr, message=FALSE, warning=FALSE, results="hide", fig.keep="all"} 312 | library(mclust) 313 | 314 | set.seed(220224) 315 | mat <- sapply(seq_len(nrow(spe)), function(x){ 316 | cur_exprs <- assay(spe, "exprs")[x,] 317 | cur_counts <- assay(spe, "counts")[x,] 318 | 319 | cur_model <- Mclust(cur_exprs, G = 2) 320 | mean1 <- mean(cur_counts[cur_model$classification == 1]) 321 | mean2 <- mean(cur_counts[cur_model$classification == 2]) 322 | 323 | signal <- ifelse(mean1 > mean2, mean1, mean2) 324 | noise <- ifelse(mean1 > mean2, mean2, mean1) 325 | 326 | return(c(snr = signal/noise, ps = signal)) 327 | }) 328 | 329 | cur_snr <- t(mat) %>% as.data.frame() %>% 330 | mutate(marker = rownames(spe)) 331 | 332 | cur_snr %>% ggplot() + 333 | geom_point(aes(log2(ps), log2(snr))) + 334 | geom_label_repel(aes(log2(ps), log2(snr), label = marker)) + 335 | theme_minimal(base_size = 15) + ylab("Signal-to-noise ratio [log2]") + 336 | xlab("Signal intensity [log2]") 337 | ``` 338 | 339 | Next, we observe the distributions of cell size across the individual images. 340 | Differences in cell size distributions can indicate segmentation biases due to 341 | differences in cell density or can indicate biological differences due to cell 342 | type compositions (tumor cells tend to be larger than immune cells). 343 | 344 | ```{r cell-size, message=FALSE} 345 | dittoPlot(spe, var = "area", 346 | group.by = "sample_id", 347 | plots = "boxplot") + 348 | ylab("Cell area") + xlab("") 349 | 350 | summary(spe$area) 351 | ``` 352 | 353 | The median cell size is `r median(spe$area)` pixels with a median major axis 354 | length of `r round(median(spe$axis_major_length), digits = 1)`. The largest cell 355 | has an area of `r max(spe$area)` pixels which relates to a diameter of 356 | `r round(sqrt(max(spe$area)), digits = 1)` pixels assuming a circular shape. 357 | Overall, the distribution of cell sizes is similar across images with images from 358 | `Patient4_005` and `Patient4_007` showing a reduced average cell size. These 359 | images contain fewer tumor cells which can explain the smaller average cell size. 360 | 361 | We detect very small cells in the dataset and will remove them. 362 | The chosen threshold is arbitrary and needs to be adjusted per dataset. 363 | 364 | ```{r remove-small-cells} 365 | sum(spe$area < 5) 366 | spe <- spe[,spe$area >= 5] 367 | ``` 368 | 369 | Another quality indicator can be an absolute measure of cell density often 370 | reported in cells per mm$^2$. 371 | 372 | ```{r no-cells-per-image, message=FALSE} 373 | cell_density <- colData(spe) %>% 374 | as.data.frame() %>% 375 | group_by(sample_id) %>% 376 | summarize(cell_count = n(), 377 | no_pixels = mean(width_px) * mean(height_px)) %>% 378 | mutate(cells_per_mm2 = cell_count/(no_pixels/1000000)) 379 | 380 | ggplot(cell_density) + 381 | geom_point(aes(reorder(sample_id,cells_per_mm2), cells_per_mm2)) + 382 | theme_minimal(base_size = 15) + 383 | theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 8)) + 384 | ylab("Cells per mm2") + xlab("") 385 | ``` 386 | 387 | The number of cells per mm$^2$ varies across images which also depends on the 388 | number of tumor/non-tumor cells. As we can see in the following sections, some 389 | immune cells appear in cell dense regions while other stromal regions are less 390 | dense. 391 | 392 | The data presented here originate from samples from different locations with 393 | potential differences in pre-processing and each sample was stained individually. 394 | These (and other) technical aspects can induce staining differences between 395 | samples or batches of samples. Observing potential staining differences can be 396 | crucial to assess data quality. We will use ridgeline visualizations to check 397 | differences in staining patterns: 398 | 399 | ```{r ridges, message=FALSE, warning = FALSE, fig.width=7, fig.height=25} 400 | multi_dittoPlot(spe, vars = rownames(spe)[rowData(spe)$use_channel], 401 | group.by = "patient_id", plots = "ridgeplot", 402 | assay = "exprs", 403 | color.panel = metadata(spe)$color_vectors$patient_id) 404 | ``` 405 | 406 | We observe variations in the distributions of marker expression across patients. 407 | These variations may arise partly from different abundances of cells in 408 | different images (e.g., Patient3 may have higher numbers of CD11c+ and PD1+ 409 | cells) as well as staining differences between samples. While most of the 410 | selected markers are specifically expressed in immune cell subtypes, we can see 411 | that E-Cadherin (a marker for epithelial (tumor) cells) shows a similar 412 | expression range across all patients. 413 | 414 | Finally, we will use non-linear dimensionality reduction methods to project 415 | cells from a high-dimensional (40) down to a low-dimensional (2) space. For this 416 | the `r BiocStyle::Biocpkg("scater")` package provides the `runUMAP` and 417 | `runTSNE` function. To ensure reproducibility, we will need to set a seed; 418 | however different seeds and different parameter settings (e.g., the `perplexity` 419 | parameter in the `runTSNE` function) need to be tested to avoid 420 | over-interpretation of visualization artefacts. For dimensionality reduction, we 421 | will use all channels that show biological variation across the dataset. 422 | However, marker selection can be performed with different biological questions 423 | in mind. Here, both the `runUMAP` and `runTSNE` function are not deterministic, 424 | meaning they produce different results across different runs. We therefore 425 | set a `seed` in this chunk for reproducibility purposes. 426 | 427 | ```{r dimred, message=FALSE} 428 | library(scater) 429 | 430 | set.seed(220225) 431 | spe <- runUMAP(spe, subset_row = rowData(spe)$use_channel, exprs_values = "exprs") 432 | spe <- runTSNE(spe, subset_row = rowData(spe)$use_channel, exprs_values = "exprs") 433 | ``` 434 | 435 | After dimensionality reduction, the low-dimensional embeddings are stored in the 436 | `reducedDim` slot. 437 | 438 | ```{r show-dimred-slot} 439 | reducedDims(spe) 440 | head(reducedDim(spe, "UMAP")) 441 | ``` 442 | 443 | Visualization of the low-dimensional embedding facilitates assessment of 444 | potential "batch effects". The `dittoDimPlot` 445 | function allows flexible visualization. It returns `ggplot` objects which 446 | can be further modified. 447 | 448 | ```{r visualizing-dimred-1, message=FALSE, fig.height=8} 449 | library(patchwork) 450 | 451 | # visualize patient id 452 | p1 <- dittoDimPlot(spe, var = "patient_id", reduction.use = "UMAP", size = 0.2) + 453 | scale_color_manual(values = metadata(spe)$color_vectors$patient_id) + 454 | ggtitle("Patient ID on UMAP") 455 | p2 <- dittoDimPlot(spe, var = "patient_id", reduction.use = "TSNE", size = 0.2) + 456 | scale_color_manual(values = metadata(spe)$color_vectors$patient_id) + 457 | ggtitle("Patient ID on TSNE") 458 | 459 | # visualize region of interest id 460 | p3 <- dittoDimPlot(spe, var = "ROI", reduction.use = "UMAP", size = 0.2) + 461 | scale_color_manual(values = metadata(spe)$color_vectors$ROI) + 462 | ggtitle("ROI ID on UMAP") 463 | p4 <- dittoDimPlot(spe, var = "ROI", reduction.use = "TSNE", size = 0.2) + 464 | scale_color_manual(values = metadata(spe)$color_vectors$ROI) + 465 | ggtitle("ROI ID on TSNE") 466 | 467 | # visualize indication 468 | p5 <- dittoDimPlot(spe, var = "indication", reduction.use = "UMAP", size = 0.2) + 469 | scale_color_manual(values = metadata(spe)$color_vectors$indication) + 470 | ggtitle("Indication on UMAP") 471 | p6 <- dittoDimPlot(spe, var = "indication", reduction.use = "TSNE", size = 0.2) + 472 | scale_color_manual(values = metadata(spe)$color_vectors$indication) + 473 | ggtitle("Indication on TSNE") 474 | 475 | (p1 + p2) / (p3 + p4) / (p5 + p6) 476 | ``` 477 | 478 | ```{r, visualizing-dimred-2, message=FALSE} 479 | # visualize marker expression 480 | p1 <- dittoDimPlot(spe, var = "Ecad", reduction.use = "UMAP", 481 | assay = "exprs", size = 0.2) + 482 | scale_color_viridis(name = "Ecad") + 483 | ggtitle("E-Cadherin expression on UMAP") 484 | p2 <- dittoDimPlot(spe, var = "CD45RO", reduction.use = "UMAP", 485 | assay = "exprs", size = 0.2) + 486 | scale_color_viridis(name = "CD45RO") + 487 | ggtitle("CD45RO expression on UMAP") 488 | p3 <- dittoDimPlot(spe, var = "Ecad", reduction.use = "TSNE", 489 | assay = "exprs", size = 0.2) + 490 | scale_color_viridis(name = "Ecad") + 491 | ggtitle("Ecad expression on TSNE") 492 | p4 <- dittoDimPlot(spe, var = "CD45RO", reduction.use = "TSNE", 493 | assay = "exprs", size = 0.2) + 494 | scale_color_viridis(name = "CD45RO") + 495 | ggtitle("CD45RO expression on TSNE") 496 | 497 | (p1 + p2) / (p3 + p4) 498 | ``` 499 | 500 | We observe a strong separation of tumor cells (Ecad+ cells) between the 501 | patients. Here, each patient was diagnosed with a different tumor type. The 502 | separation of tumor cells could be of biological origin since tumor cells tend 503 | to display differences in expression between patients and cancer types and/or of 504 | technical origin: the panel only contains a single tumor marker (E-Cadherin) and 505 | therefore slight technical differences in staining causes visible separation 506 | between cells of different patients. Nevertheless, the immune compartment 507 | (CD45RO+ cells) mix between patients and we can rule out systematic staining 508 | differences between patients. 509 | 510 | ## Save objects 511 | 512 | The modified `SpatialExperiment` object is saved for further downstream analysis. 513 | 514 | ```{r save-objects-quality-control} 515 | saveRDS(spe, "data/spe.rds") 516 | ``` 517 | 518 | ```{r testing, include=FALSE} 519 | library(testthat) 520 | 521 | expect_equal(reducedDimNames(spe), c("UMAP", "TSNE")) 522 | 523 | expect_equal(head(reducedDim(spe, "UMAP"), n = 10), 524 | structure(c(-4.81016665957092, -4.39734727404236, -4.36988336107849, 525 | -4.08161431810974, -6.23401195070862, -5.66659671328186, -4.13260585329651, 526 | -0.930108251787412, -6.33803874514221, -5.40764981768249, -3.77736220987329, 527 | -3.45603595407495, -3.44556103380213, -3.16211901338587, -2.43397555978784, 528 | -3.42805753381739, -3.22162519128809, 4.09678735105505, -2.20264754922876, 529 | -3.72411928804407), dim = c(10L, 2L), dimnames = list(c("Patient1_001_1", 530 | "Patient1_001_2", "Patient1_001_3", "Patient1_001_4", "Patient1_001_5", 531 | "Patient1_001_6", "Patient1_001_7", "Patient1_001_8", "Patient1_001_9", 532 | "Patient1_001_10"), c("UMAP1", "UMAP2"))), tolerance = 0.01) 533 | 534 | expect_equal(reducedDim(spe, "UMAP")[100:130,], 535 | structure(c(-3.89626533053039, -7.13317567370056, -6.77943021319031, 536 | -7.11419230959533, -2.78164083025574, -3.94929200670837, -5.95046884081482, 537 | 0.763116416715395, -5.68849593660949, -6.22845536730407, -6.58062154314636, 538 | -5.80118590853332, -6.25312644503235, -5.86530810854553, -7.08645230791687, 539 | -4.12036305925964, -5.97095376513122, -4.08220035097717, -5.91776162645935, 540 | 0.557355967544329, -7.09781867525696, -5.62668353579162, -5.04605323336242, 541 | -4.7885444786322, -7.22946149370788, -5.26700240633606, -4.82962876818298, 542 | -4.25380879900573, 1.08371841647507, 1.44114249684693, -4.87143928072571, 543 | -3.41620216997156, -3.93748961122522, -2.58227525384912, -4.26784573228846, 544 | -5.4897724214364, -3.4107941213418, -3.86142430933008, 3.20950664846411, 545 | -3.71332822473535, -3.86072955759058, -2.47767458589563, -3.84708223970423, 546 | -4.17958079011927, -3.9504874769021, -2.7868140760232, -3.12866697938928, 547 | -2.78958783777246, -2.86517844827661, -3.41490874917993, 1.00301005212774, 548 | -3.6809254709054, -3.49266205461511, -2.95477948816309, -2.56105003984461, 549 | -3.1081076684762, -3.18702707918177, -3.78098068864832, -3.24055348070154, 550 | 0.368578153533841, 0.224757569713498, -2.99749766023645), dim = c(31L, 551 | 2L), dimnames = list(c("Patient1_001_100", "Patient1_001_101", 552 | "Patient1_001_102", "Patient1_001_103", "Patient1_001_104", "Patient1_001_105", 553 | "Patient1_001_106", "Patient1_001_107", "Patient1_001_108", "Patient1_001_109", 554 | "Patient1_001_110", "Patient1_001_111", "Patient1_001_112", "Patient1_001_113", 555 | "Patient1_001_114", "Patient1_001_115", "Patient1_001_116", "Patient1_001_117", 556 | "Patient1_001_118", "Patient1_001_119", "Patient1_001_120", "Patient1_001_121", 557 | "Patient1_001_122", "Patient1_001_123", "Patient1_001_124", "Patient1_001_125", 558 | "Patient1_001_126", "Patient1_001_127", "Patient1_001_128", "Patient1_001_129", 559 | "Patient1_001_130"), c("UMAP1", "UMAP2"))), tolerance = 0.01) 560 | 561 | expect_equal(head(reducedDim(spe, "TSNE"), n = 10), 562 | structure(c(8.5000231819133, 8.69417707607171, 8.66506409812202, 563 | 8.70346540608834, -4.85956000801397, -3.50347074831182, 8.62888756799549, 564 | -8.0907749992851, 3.45775862206781, 7.54108785460927, -30.1566664235465, 565 | -28.3558044334759, -28.2668374978953, -26.5385662781522, -21.5856934742621, 566 | -26.4730337308963, -26.8568530864791, 19.6159281655837, 20.5991986552636, 567 | -32.2252709733315), dim = c(10L, 2L), dimnames = list(c("Patient1_001_1", 568 | "Patient1_001_2", "Patient1_001_3", "Patient1_001_4", "Patient1_001_5", 569 | "Patient1_001_6", "Patient1_001_7", "Patient1_001_8", "Patient1_001_9", 570 | "Patient1_001_10"), c("TSNE1", "TSNE2"))), tolerance = 0.01) 571 | 572 | expect_equal(reducedDim(spe, "TSNE")[100:130,], 573 | structure(c(10.6418413622177, -8.19603725541398, -7.57330997290384, 574 | -11.9649205211375, 21.7044793746905, 10.5299308898414, -2.89361532839768, 575 | -14.1479057033804, 7.04881694519824, -2.27845635304146, -6.55119000983192, 576 | -1.82422115143277, -2.4097229038924, -1.64709145874245, -9.64650462467637, 577 | 8.35520499244715, -2.63232041735159, -6.92707082054738, -2.87217958426136, 578 | -8.06273786914118, -12.041109790411, 7.21278447587393, 2.78699525470502, 579 | 2.34486289464684, -9.89574054981405, 2.1828274368045, 8.49912719972949, 580 | 8.11668313724476, -5.71319426232428, -0.190728643680821, 3.48920576978465, 581 | -25.9423147604023, -32.0648373504634, -24.1480435193885, -33.8780988685417, 582 | -23.1903399682141, -26.1356569942554, -33.8181999524557, 0.0329466224873345, 583 | -32.6325983943606, -32.6301008178222, -24.1756860322692, -32.9821005945978, 584 | -35.8676859419689, -34.0911214749092, -25.5808689943855, -26.7586018145491, 585 | -25.6034715982434, -20.3596025910865, -28.0932829916676, -5.40775249366363, 586 | -31.3872828096166, -32.5651665675897, -27.111323151666, -24.101850440361, 587 | -26.9341083442237, -28.5420714838967, -30.3678523339134, -27.0214667414901, 588 | -5.02684547281321, -16.0298817965721, -26.769553922745), dim = c(31L, 589 | 2L), dimnames = list(c("Patient1_001_100", "Patient1_001_101", 590 | "Patient1_001_102", "Patient1_001_103", "Patient1_001_104", "Patient1_001_105", 591 | "Patient1_001_106", "Patient1_001_107", "Patient1_001_108", "Patient1_001_109", 592 | "Patient1_001_110", "Patient1_001_111", "Patient1_001_112", "Patient1_001_113", 593 | "Patient1_001_114", "Patient1_001_115", "Patient1_001_116", "Patient1_001_117", 594 | "Patient1_001_118", "Patient1_001_119", "Patient1_001_120", "Patient1_001_121", 595 | "Patient1_001_122", "Patient1_001_123", "Patient1_001_124", "Patient1_001_125", 596 | "Patient1_001_126", "Patient1_001_127", "Patient1_001_128", "Patient1_001_129", 597 | "Patient1_001_130"), c("TSNE1", "TSNE2"))), tolerance = 0.01) 598 | ``` 599 | 600 | ## Session Info 601 | 602 |
603 | SessionInfo 604 | 605 | ```{r, echo = FALSE} 606 | sessionInfo() 607 | ``` 608 |
-------------------------------------------------------------------------------- /09-singlecell_visualization.Rmd: -------------------------------------------------------------------------------- 1 | # Single cell visualization {#single-cell-visualization} 2 | 3 | The following section describes typical approaches for visualizing 4 | single-cell data. 5 | 6 | This chapter is divided into three parts. Section \@ref(cell-type-level) 7 | will highlight visualization approaches downstream of cell type 8 | classification from Section \@ref(classification). We will then focus on 9 | visualization methods that relate single-cell data to the sample level 10 | in Section \@ref(sample-level). Lastly, Section \@ref(rich-example) will 11 | provide a more customized example on how to integrate various 12 | single-cell and sample metadata into one heatmap using the 13 | [ComplexHeatmap](https://bioconductor.org/packages/release/bioc/html/ComplexHeatmap.html) 14 | package [@Gu2016]. 15 | 16 | Visualization functions from popular R packages in single-cell research 17 | such as 18 | [scater](https://bioconductor.org/packages/release/bioc/html/scater.html), 19 | [DittoSeq](https://bioconductor.org/packages/release/bioc/html/dittoSeq.html) 20 | and 21 | [CATALYST](https://bioconductor.org/packages/release/bioc/html/CATALYST.html) 22 | will be utilized. We will recycle methods and functions that we have 23 | used in previous sections, while also introducing new ones. 24 | 25 | Please note that this chapter aims to provide an overview on **common** 26 | visualization options and should be seen as a stepping-stone. However, 27 | many more options exist and the user should customize the visualization 28 | according to the biological question at hand. 29 | 30 | ## Load data 31 | 32 | First, we will read in the previously generated `SpatialExperiment` 33 | object. 34 | 35 | ```{r read-data-scviz, message=FALSE} 36 | spe <- readRDS("data/spe.rds") 37 | ``` 38 | 39 | For visualization purposes, we will define markers that were used for 40 | cell type classification and markers that can indicate a specific cell 41 | state (e.g., Ki67 for proliferating cells). 42 | 43 | ```{r define-markers, message=FALSE} 44 | # Define cell phenotype markers 45 | type_markers <- c("Ecad", "CD45RO", "CD20", "CD3", "FOXP3", "CD206", "MPO", 46 | "SMA", "CD8a", "CD4", "HLADR", "CD15", "CD38", "PDGFRb") 47 | 48 | # Define cell state markers 49 | state_markers <- c("CarbonicAnhydrase", "Ki67", "PD1", "GrzB", "PDL1", 50 | "ICOS", "TCF7", "VISTA") 51 | 52 | # Add to spe 53 | rowData(spe)$marker_class <- ifelse(rownames(spe) %in% type_markers, "type", 54 | ifelse(rownames(spe) %in% state_markers, "state", 55 | "other")) 56 | ``` 57 | 58 | ## Cell-type level {#cell-type-level} 59 | 60 | In the first section of this chapter, the grouping-level for the 61 | visualization approaches will be the cell type classification from 62 | Section \@ref(classification). Other grouping levels (e.g., cluster 63 | assignments from Section \@ref(clustering)) are possible and the user 64 | should adjust depending on the chosen analysis workflow. 65 | 66 | ### Dimensionality reduction visualization 67 | 68 | As seen before, we can visualize single-cells in low-dimensional space. 69 | Often, non-linear methods for dimensionality reduction such as tSNE and 70 | UMAP are used. They aim to preserve the distances between each cell and its 71 | neighbors in the high-dimensional space. 72 | 73 | Interpreting these plots is not trivial, but local neighborhoods in the 74 | plot can suggest similarity in expression for given cells. See 75 | [Orchestrating Single-Cell Analysis with 76 | Bioconductor](https://bioconductor.org/books/release/OSCA/) for more 77 | details. 78 | 79 | Here, we will use `dittoDimPlot` from the 80 | [DittoSeq](https://bioconductor.org/packages/release/bioc/html/dittoSeq.html) 81 | package and `plotReducedDim` from the 82 | [scater](https://bioconductor.org/packages/release/bioc/html/scater.html) package 83 | to visualize the fastMNN-corrected UMAP colored by cell type and 84 | expression (using the asinh-transformed intensities), respectively. 85 | 86 | Both functions are highly flexible and return `ggplot` objects which can 87 | be further modified. 88 | 89 | ```{r cell type umap, fig.width=10, fig.height=5, message=FALSE} 90 | library(dittoSeq) 91 | library(scater) 92 | library(patchwork) 93 | library(cowplot) 94 | library(viridis) 95 | 96 | ## UMAP colored by cell type and expression - dittoDimPlot 97 | p1 <- dittoDimPlot(spe, 98 | var = "celltype", 99 | reduction.use = "UMAP_mnnCorrected", 100 | size = 0.2, 101 | do.label = TRUE) + 102 | scale_color_manual(values = metadata(spe)$color_vectors$celltype) + 103 | theme(legend.title = element_blank()) + 104 | ggtitle("Cell types on UMAP, integrated cells") 105 | 106 | p2 <- dittoDimPlot(spe, 107 | var = "Ecad", 108 | assay = "exprs", 109 | reduction.use = "UMAP_mnnCorrected", 110 | size = 0.2, 111 | colors = viridis(100), 112 | do.label = TRUE) + 113 | scale_color_viridis() 114 | 115 | p1 + p2 116 | ``` 117 | 118 | The `plotReducedDim` function of the `scater` package provides an alternative 119 | way for visualizing cells in low dimensions. Here, we loop over all type 120 | markers, generate one plot per marker and plot the indivudual plots side-by-side. 121 | 122 | ```{r cell type umap 2, fig.width=10, fig.height=10, message=FALSE} 123 | # UMAP colored by expression for all markers - plotReducedDim 124 | plot_list <- lapply(rownames(spe)[rowData(spe)$marker_class == "type"], function(x){ 125 | p <- plotReducedDim(spe, 126 | dimred = "UMAP_mnnCorrected", 127 | colour_by = x, 128 | by_exprs_values = "exprs", 129 | point_size = 0.2) 130 | return(p) 131 | }) 132 | 133 | plot_grid(plotlist = plot_list) 134 | ``` 135 | 136 | ### Heatmap visualization 137 | 138 | Next, it is often useful to visualize single-cell expression per cell 139 | type in form of a heatmap. For this, we will use the `dittoHeatmap` 140 | function from the 141 | [DittoSeq](https://bioconductor.org/packages/release/bioc/html/dittoSeq.html) 142 | package. 143 | 144 | We sub-sample the dataset to 4000 cells for ease of visualization and 145 | overlay the cancer type and patient ID from which the cells were 146 | extracted. 147 | 148 | ```{r celltype heatmap, fig.height = 7,fig.width = 7, message=FALSE} 149 | set.seed(220818) 150 | cur_cells <- sample(seq_len(ncol(spe)), 4000) 151 | 152 | # Heatmap visualization - DittoHeatmap 153 | dittoHeatmap(spe[,cur_cells], 154 | genes = rownames(spe)[rowData(spe)$marker_class == "type"], 155 | assay = "exprs", 156 | cluster_cols = FALSE, 157 | scale = "none", 158 | heatmap.colors = viridis(100), 159 | annot.by = c("celltype", "indication", "patient_id"), 160 | annotation_colors = list(indication = metadata(spe)$color_vectors$indication, 161 | patient_id = metadata(spe)$color_vectors$patient_id, 162 | celltype = metadata(spe)$color_vectors$celltype)) 163 | ``` 164 | 165 | Similarly, we can visualize the mean marker expression per cell type for all 166 | cells bu first calculating the mean marker expression per cell type using the 167 | `aggregateAcrossCells` function from the 168 | [scuttle](https://bioconductor.org/packages/release/bioc/html/scuttle.html) 169 | package and then use `dittoHeatmap`. We will annotate the heatmap with the 170 | number of cells per cell type and we will used different ways for feature 171 | scaling. 172 | 173 | ```{r celltype mean-expression-per-cluster, fig.height=5} 174 | library(scuttle) 175 | 176 | ## aggregate by cell type 177 | celltype_mean <- aggregateAcrossCells(as(spe, "SingleCellExperiment"), 178 | ids = spe$celltype, 179 | statistics = "mean", 180 | use.assay.type = "exprs", 181 | subset.row = rownames(spe)[rowData(spe)$marker_class == "type"]) 182 | 183 | # No scaling 184 | dittoHeatmap(celltype_mean, 185 | assay = "exprs", 186 | cluster_cols = TRUE, 187 | scale = "none", 188 | heatmap.colors = viridis(100), 189 | annot.by = c("celltype", "ncells"), 190 | annotation_colors = list(celltype = metadata(spe)$color_vectors$celltype, 191 | ncells = plasma(100))) 192 | 193 | # Scaled to max 194 | dittoHeatmap(celltype_mean, 195 | assay = "exprs", 196 | cluster_cols = TRUE, 197 | scaled.to.max = TRUE, 198 | heatmap.colors.max.scaled = inferno(100), 199 | annot.by = c("celltype", "ncells"), 200 | annotation_colors = list(celltype = metadata(spe)$color_vectors$celltype, 201 | ncells = plasma(100))) 202 | 203 | # Z score scaled 204 | dittoHeatmap(celltype_mean, 205 | assay = "exprs", 206 | cluster_cols = TRUE, 207 | annot.by = c("celltype", "ncells"), 208 | annotation_colors = list(celltype = metadata(spe)$color_vectors$celltype, 209 | ncells = plasma(100))) 210 | ``` 211 | 212 | As illustrated above for not-, max-, and Z score-scaled expression values, 213 | different ways of scaling can have strong effects on visualization 214 | output and we encourage the user to test multiple options. 215 | 216 | Overall, we can observe cell-type specific marker expression (e.g., Tumor 217 | = Ecad high and B cells = CD20 high) in agreement with the gating scheme 218 | of Section \@ref(classification). 219 | 220 | ### Violin plot visualization 221 | 222 | The `plotExpression` function from the 223 | [scater](https://bioconductor.org/packages/release/bioc/html/scater.html) package 224 | allows to plot the distribution of expression values across cell types 225 | for a chosen set of proteins. The output is a `ggplot` object which can be 226 | modified further. 227 | 228 | ```{r celltype violin, message=FALSE, fig.height=12} 229 | # Violin Plot - plotExpression 230 | plotExpression(spe[,cur_cells], 231 | features = rownames(spe)[rowData(spe)$marker_class == "type"], 232 | x = "celltype", 233 | exprs_values = "exprs", 234 | colour_by = "celltype") + 235 | theme(axis.text.x = element_text(angle = 90))+ 236 | scale_color_manual(values = metadata(spe)$color_vectors$celltype) 237 | ``` 238 | 239 | ### Scatter plot visualization 240 | 241 | Moreover, a protein expression based scatter plot can be generated with 242 | `dittoScatterPlot` (returns a `ggplot` object). We overlay the plot with 243 | the cell type information. 244 | 245 | ```{r celltype scatter, message=FALSE} 246 | # Scatter plot 247 | dittoScatterPlot(spe, 248 | x.var = "CD3", 249 | y.var="CD20", 250 | assay.x = "exprs", 251 | assay.y = "exprs", 252 | color.var = "celltype") + 253 | scale_color_manual(values = metadata(spe)$color_vectors$celltype) + 254 | ggtitle("Scatterplot for CD3/CD20 labelled by celltype") 255 | ``` 256 | 257 | We can nicely observe how the "B next to T cell" phenotype (`BnTcell`) 258 | has high expression values for both CD20 and CD3. 259 | 260 | **Of note**, in a setting where the user aims to assign labels to 261 | clusters based on marker genes/proteins, all of the above plots can be 262 | particularly helpful. 263 | 264 | ### Barplot visualization 265 | 266 | In order to display frequencies of cell types per sample/patient, the 267 | `dittoBarPlot` function will be used. Data can be represented as 268 | percentages or counts and again `ggplot` objects are outputted. 269 | 270 | ```{r barplot celltype, message=FALSE} 271 | # by sample_id - percentage 272 | dittoBarPlot(spe, 273 | var = "celltype", 274 | group.by = "sample_id") + 275 | scale_fill_manual(values = metadata(spe)$color_vectors$celltype) 276 | 277 | # by patient_id - percentage 278 | dittoBarPlot(spe, 279 | var = "celltype", 280 | group.by = "patient_id") + 281 | scale_fill_manual(values = metadata(spe)$color_vectors$celltype) 282 | 283 | # by patient_id - count 284 | dittoBarPlot(spe, 285 | scale = "count", 286 | var = "celltype", 287 | group.by = "patient_id") + 288 | scale_fill_manual(values = metadata(spe)$color_vectors$celltype) 289 | ``` 290 | 291 | We can see that cell type frequencies change between samples/patients 292 | and that the highest proportion/counts of plasma cells and stromal 293 | cells can be observed for Patient 2 and Patient 4, respectively. 294 | 295 | ### CATALYST-based visualization 296 | 297 | In the following, we highlight some useful visualization 298 | functions from the 299 | [CATALYST](https://bioconductor.org/packages/release/bioc/html/CATALYST.html) 300 | package. 301 | 302 | To this end, we will first convert the `SpatialExperiment` object into a 303 | CATALYST-compatible format. 304 | 305 | ```{r celltype CATALYST} 306 | library(CATALYST) 307 | 308 | # Save SPE in CATALYST-compatible object with renamed colData entries and 309 | # new metadata information 310 | spe_cat <- spe 311 | 312 | spe_cat$sample_id <- factor(spe$sample_id) 313 | spe_cat$condition <- factor(spe$indication) 314 | spe_cat$cluster_id <- factor(spe$celltype) 315 | 316 | # Add celltype information to metadata 317 | metadata(spe_cat)$cluster_codes <- data.frame(celltype = factor(spe_cat$celltype)) 318 | ``` 319 | 320 | All of the `CATALYST` functions presented below return `ggplot` objects, 321 | which allow flexible downstream adjustment. 322 | 323 | #### Pseudobulk-level MDS plot 324 | 325 | Pseudobulk-level multi-dimensional scaling (MDS) plots can be rendered 326 | with the exported `pbMDS` function. 327 | 328 | Here, we will use `pbMDS` to highlight expression similarities between 329 | cell types and subsequently for each celltype-sample-combination. 330 | 331 | ```{r celltype pbmds, message=FALSE} 332 | # MDS pseudobulk by cell type 333 | pbMDS(spe_cat, 334 | by = "cluster_id", 335 | features = rownames(spe_cat)[rowData(spe_cat)$marker_class == "type"], 336 | label_by = "cluster_id", 337 | k = "celltype") + 338 | scale_color_manual(values = metadata(spe_cat)$color_vectors$celltype) 339 | 340 | # MDS pseudobulk by cell type and sample_id 341 | pbMDS(spe_cat, 342 | by = "both", 343 | features = rownames(spe_cat)[rowData(spe_cat)$marker_class == "type"], 344 | k = "celltype", 345 | shape_by = "condition", 346 | size_by = TRUE) + 347 | scale_color_manual(values = metadata(spe_cat)$color_vectors$celltype) 348 | ``` 349 | 350 | We can see that the pseudobulk-expression profile of neutrophils seems 351 | markedly distinct from the other cell types, while comparable cell types 352 | such as the T cell subtypes group together. Furthermore, pseudobulk 353 | cell-type profiles from SCCHN appear different from the other 354 | indications. 355 | 356 | #### Reduced dimension plot on CLR of proportions 357 | 358 | The `clrDR` function produces dimensionality reduction plots on centered 359 | log-ratios (CLR) of sample/cell type proportions across cell 360 | type/samples. 361 | 362 | As with `pbMDS`, the output plots aim to illustrate the degree of 363 | similarity between cell types based on sample proportions. 364 | 365 | ```{r celltype - clrDR, message=FALSE} 366 | # CLR on cluster proportions across samples 367 | clrDR(spe_cat, 368 | dr = "PCA", 369 | by = "cluster_id", 370 | k = "celltype", 371 | label_by = "cluster_id", 372 | arrow_col = "sample_id", 373 | point_pal = metadata(spe_cat)$color_vectors$celltype) 374 | ``` 375 | 376 | We can again observe that neutrophils have a divergent profile also in 377 | terms of their sample proportions. 378 | 379 | #### Pseudobulk expression boxplot 380 | 381 | The `plotPbExprs` generates combined box- and jitter-plots of aggregated marker 382 | expression per cell type and sample (image). Here, we further split the data by 383 | cancer type. 384 | 385 | ```{r celltype pbExprs, fig.width=7, fig.height=12, message=FALSE} 386 | plotPbExprs(spe_cat, 387 | k = "celltype", 388 | facet_by = "cluster_id", 389 | ncol = 2, 390 | features = rownames(spe_cat)[rowData(spe_cat)$marker_class == "type"]) + 391 | scale_color_manual(values = metadata(spe_cat)$color_vectors$indication) 392 | ``` 393 | 394 | Notably, CD15 levels are elevated in SCCHN in comparison to all other 395 | indications for most cell types. 396 | 397 | ## Sample-level {#sample-level} 398 | 399 | In the next section, we will shift the grouping-level focus from the 400 | cell type to the sample-level. Sample-levels will be further divided 401 | into the sample-(image) and patient-level. 402 | 403 | Although we will mostly repeat the functions from the previous section 404 | \@ref(cell-type-level), sample- and patient-level centered visualization 405 | can provide additional quality control and biological interpretation. 406 | 407 | ### Dimensionality reduction visualization 408 | 409 | Visualization of low-dimensional embeddings, here comparing non-corrected and 410 | fastMNN-corrected UMAPs, and coloring it by sample-levels is often used 411 | for "batch effect" assessment as mentioned in Section 412 | \@ref(cell-quality). 413 | 414 | We will again use `dittoDimPlot`. 415 | 416 | ```{r sample umap, fig.width=8, fig.height=8, message = FALSE} 417 | ## UMAP colored by cell type and expression - dittoDimPlot 418 | p1 <- dittoDimPlot(spe, 419 | var = "sample_id", 420 | reduction.use = "UMAP", 421 | size = 0.2, 422 | colors = viridis(100), 423 | do.label = FALSE) + 424 | scale_color_manual(values = metadata(spe)$color_vectors$sample_id) + 425 | theme(legend.title = element_blank()) + 426 | ggtitle("Sample ID") 427 | 428 | p2 <- dittoDimPlot(spe, 429 | var = "sample_id", 430 | reduction.use = "UMAP_mnnCorrected", 431 | size = 0.2, 432 | colors = viridis(100), 433 | do.label = FALSE) + 434 | scale_color_manual(values = metadata(spe)$color_vectors$sample_id) + 435 | theme(legend.title = element_blank()) + 436 | ggtitle("Sample ID") 437 | 438 | p3 <- dittoDimPlot(spe, 439 | var = "patient_id", 440 | reduction.use = "UMAP", 441 | size = 0.2, 442 | do.label = FALSE) + 443 | scale_color_manual(values = metadata(spe)$color_vectors$patient_id) + 444 | theme(legend.title = element_blank()) + 445 | ggtitle("Patient ID") 446 | 447 | p4 <- dittoDimPlot(spe, 448 | var = "patient_id", 449 | reduction.use = "UMAP_mnnCorrected", 450 | size = 0.2, 451 | do.label = FALSE) + 452 | scale_color_manual(values = metadata(spe)$color_vectors$patient_id) + 453 | theme(legend.title = element_blank()) + 454 | ggtitle("Patient ID") 455 | 456 | (p1 + p2) / (p3 + p4) 457 | ``` 458 | 459 | As illustrated in Section \@ref(batch-effects), we see that the fastMNN 460 | approach (right side of the plot) leads to mixing of cells across 461 | samples/patients and thus batch effect correction. 462 | 463 | ### Heatmap visualization 464 | 465 | It can be beneficial to use a heatmap to visualize single-cell 466 | expression per sample and patient. Such a plot, which we will create 467 | using `dittoHeatmap`, can highlight biological differences across 468 | samples/patients. 469 | 470 | ```{r sample heatmap, fig.height = 8,fig.width = 8} 471 | # Heatmap visualization - DittoHeatmap 472 | dittoHeatmap(spe[,cur_cells], 473 | genes = rownames(spe)[rowData(spe)$marker_class == "type"], 474 | assay = "exprs", 475 | order.by = c("patient_id","sample_id"), 476 | cluster_cols = FALSE, 477 | scale = "none", 478 | heatmap.colors = viridis(100), 479 | annot.by = c("celltype", "indication", "patient_id", "sample_id"), 480 | annotation_colors = list(celltype = metadata(spe)$color_vectors$celltype, 481 | indication = metadata(spe)$color_vectors$indication, 482 | patient_id = metadata(spe)$color_vectors$patient_id, 483 | sample_id = metadata(spe)$color_vectors$sample_id)) 484 | ``` 485 | 486 | As in Section \@ref(image-quality), aggregated mean marker expression 487 | per sample/patient allow identification of samples/patients with 488 | outlying expression patterns. 489 | 490 | Here, we will focus on the patient level and use `aggregateAcrossCells` 491 | and `dittoHeatmap`. The heatmap will be annotated with the number of 492 | cells per patient and cancer type and displayed using two scaling 493 | options. 494 | 495 | ```{r sample mean-expression-per-cluster, fig.height=5} 496 | # mean expression by patient_id 497 | patient_mean <- aggregateAcrossCells(as(spe, "SingleCellExperiment"), 498 | ids = spe$patient_id, 499 | statistics = "mean", 500 | use.assay.type = "exprs", 501 | subset.row = rownames(spe)[rowData(spe)$marker_class == "type"]) 502 | 503 | # No scaling 504 | dittoHeatmap(patient_mean, 505 | assay = "exprs", 506 | cluster_cols = TRUE, 507 | scale = "none", 508 | heatmap.colors = viridis(100), 509 | annot.by = c("patient_id","indication","ncells"), 510 | annotation_colors = list(patient_id = metadata(spe)$color_vectors$patient_id, 511 | indication = metadata(spe)$color_vectors$indication, 512 | ncells = plasma(100))) 513 | 514 | # Max expression scaling 515 | dittoHeatmap(patient_mean, 516 | assay = "exprs", 517 | cluster_cols = TRUE, 518 | scaled.to.max = TRUE, 519 | heatmap.colors.max.scaled = inferno(100), 520 | annot.by = c("patient_id","indication","ncells"), 521 | annotation_colors = list(patient_id = metadata(spe)$color_vectors$patient_id, 522 | indication = metadata(spe)$color_vectors$indication, 523 | ncells = plasma(100))) 524 | 525 | ``` 526 | 527 | As seen before, CD15 levels are elevated in Patient 4 (SCCHN), while SMA 528 | levels are highest for Patient 4 (CRC). 529 | 530 | ### Barplot visualization 531 | 532 | Complementary to displaying cell type frequencies per sample/patient, we 533 | can use `dittoBarPlot` to display sample/patient frequencies per cell 534 | type. 535 | 536 | ```{r barplot sample, message=FALSE} 537 | dittoBarPlot(spe, 538 | var = "patient_id", 539 | group.by = "celltype") + 540 | scale_fill_manual(values = metadata(spe)$color_vectors$patient_id) 541 | 542 | dittoBarPlot(spe, 543 | var = "sample_id", 544 | group.by = "celltype") + 545 | scale_fill_manual(values = metadata(spe)$color_vectors$sample_id) 546 | ``` 547 | 548 | `Patient2` has the highest and lowest proportion of plasma cells and 549 | neutrophils, respectively. 550 | 551 | ### CATALYST-based visualization 552 | 553 | #### Pseudobulk-level MDS plot 554 | 555 | Expression-based pseudobulks for each sample can be compared with the 556 | `pbMDS` function. 557 | 558 | ```{r sample-pbmds} 559 | # MDS pseudobulk by sample_id 560 | pbMDS(spe_cat, 561 | by = "sample_id", 562 | color_by = "sample_id", 563 | features = rownames(spe_cat)[rowData(spe_cat)$marker_class == "type"]) + 564 | scale_color_manual(values = metadata(spe_cat)$color_vectors$sample_id) 565 | ``` 566 | 567 | There are marked differences in pseudobulk-expression patterns between 568 | samples and across patients, which can be driven by biological 569 | differences and also technical aspects such as divergent region 570 | selection. 571 | 572 | #### Reduced dimension plot on CLR of proportions 573 | 574 | The `clrDR` function can also be used to analyze similarity of samples 575 | based on cell type proportions. 576 | 577 | ```{r sample-clrDR} 578 | # CLR on sample proportions across clusters 579 | clrDR(spe_cat, 580 | dr = "PCA", 581 | by = "sample_id", 582 | point_col = "sample_id", 583 | k = "celltype", 584 | point_pal = metadata(spe_cat)$color_vectors$sample_id) + 585 | scale_color_manual(values = metadata(spe_cat)$color_vectors$celltype) 586 | ``` 587 | 588 | There are notable differences between samples based on their cell type 589 | proportions. 590 | 591 | Interestingly, `Patient3_001`, `Patient1_003`, `Patient4_007` and 592 | `Patient4_006` group together and the PC loadings indicate a strong 593 | contribution of BnT and B cells, which could propose formation of 594 | tertiary lymphoid structures (TLS). In section \@ref(spatial-viz), we 595 | will be able to confirm this hypothesis visually on the images. 596 | 597 | ## Further examples {#rich-example} 598 | 599 | In the last section of this chapter, we will use the popular 600 | [ComplexHeatmap](https://bioconductor.org/packages/release/bioc/html/ComplexHeatmap.html) 601 | package to create a visualization example that combines various 602 | cell-type- and sample-level information. 603 | 604 | [ComplexHeatmap](https://bioconductor.org/packages/release/bioc/html/ComplexHeatmap.html) 605 | is highly versatile and is originally inspired from the 606 | [pheatmap](https://cran.r-project.org/web/packages/pheatmap/index.html) 607 | package. Therefore, many arguments have the same/similar names. 608 | 609 | For more details, we would recommend to read the [reference 610 | book](https://jokergoo.github.io/ComplexHeatmap-reference/book/). 611 | 612 | ### Publication-ready ComplexHeatmap 613 | 614 | For this example, we will concatenate heatmaps and annotations 615 | horizontally into one rich heatmap list. The grouping-level for the 616 | visualization will again be the cell type information from Section 617 | \@ref(classification) 618 | 619 | Initially, we will create two separate `Heatmap` objects for cell type 620 | and state markers. 621 | 622 | Then, metadata information, including the cancer type proportion and 623 | number of cells/patients per cell type, will be extracted into 624 | `HeatmapAnnotation` objects. 625 | 626 | Notably, we will add spatial features per cell type, here the number of 627 | neighbors extracted from `colPair(spe)` and cell area, in another 628 | `HeatmapAnnotation` object. 629 | 630 | Ultimately, all objects are combined in a `HeatmapList` and visualized. 631 | 632 | ```{r complex-heatmap, warning = FALSE, message = FALSE, fig.width=9, fig.height=5} 633 | library(ComplexHeatmap) 634 | library(circlize) 635 | library(tidyverse) 636 | set.seed(22) 637 | 638 | ### 1. Heatmap bodies ### 639 | 640 | # Heatmap body color 641 | col_exprs <- colorRamp2(c(0,1,2,3,4), 642 | c("#440154FF","#3B518BFF","#20938CFF", 643 | "#6ACD5AFF","#FDE725FF")) 644 | 645 | # Create Heatmap objects 646 | # By cell type markers 647 | celltype_mean <- aggregateAcrossCells(as(spe, "SingleCellExperiment"), 648 | ids = spe$celltype, 649 | statistics = "mean", 650 | use.assay.type = "exprs", 651 | subset.row = rownames(spe)[rowData(spe)$marker_class == "type"]) 652 | 653 | h_type <- Heatmap(t(assay(celltype_mean, "exprs")), 654 | column_title = "type_markers", 655 | col = col_exprs, 656 | name= "mean exprs", 657 | show_row_names = TRUE, 658 | show_column_names = TRUE) 659 | 660 | # By cell state markers 661 | cellstate_mean <- aggregateAcrossCells(as(spe, "SingleCellExperiment"), 662 | ids = spe$celltype, 663 | statistics = "mean", 664 | use.assay.type = "exprs", 665 | subset.row = rownames(spe)[rowData(spe)$marker_class == "state"]) 666 | 667 | h_state <- Heatmap(t(assay(cellstate_mean, "exprs")), 668 | column_title = "state_markers", 669 | col = col_exprs, 670 | name= "mean exprs", 671 | show_row_names = TRUE, 672 | show_column_names = TRUE) 673 | 674 | 675 | ### 2. Heatmap annotation ### 676 | 677 | ### 2.1 Metadata features 678 | 679 | anno <- colData(celltype_mean) %>% as.data.frame %>% select(celltype, ncells) 680 | 681 | # Proportion of indication per celltype 682 | indication <- unclass(prop.table(table(spe$celltype, spe$indication), margin = 1)) 683 | 684 | # Number of contributing patients per celltype 685 | cluster_PID <- colData(spe) %>% 686 | as.data.frame() %>% 687 | select(celltype, patient_id) %>% 688 | group_by(celltype) %>% table() %>% 689 | as.data.frame() 690 | 691 | n_PID <- cluster_PID %>% 692 | filter(Freq>0) %>% 693 | group_by(celltype) %>% 694 | count(name = "n_PID") %>% 695 | column_to_rownames("celltype") 696 | 697 | # Create HeatmapAnnotation objects 698 | ha_anno <- HeatmapAnnotation(celltype = anno$celltype, 699 | border = TRUE, 700 | gap = unit(1,"mm"), 701 | col = list(celltype = metadata(spe)$color_vectors$celltype), 702 | which = "row") 703 | 704 | ha_meta <- HeatmapAnnotation(n_cells = anno_barplot(anno$ncells, width = unit(10, "mm")), 705 | n_PID = anno_barplot(n_PID, width = unit(10, "mm")), 706 | indication = anno_barplot(indication,width = unit(10, "mm"), 707 | gp = gpar(fill = metadata(spe)$color_vectors$indication)), 708 | border = TRUE, 709 | annotation_name_rot = 90, 710 | gap = unit(1,"mm"), 711 | which = "row") 712 | 713 | ### 2.2 Spatial features 714 | 715 | # Add number of neighbors to spe object (saved in colPair) 716 | spe$n_neighbors <- countLnodeHits(colPair(spe, "neighborhood")) 717 | 718 | # Select spatial features and average over celltypes 719 | spatial <- colData(spe) %>% 720 | as.data.frame() %>% 721 | select(area, celltype, n_neighbors) 722 | 723 | spatial <- spatial %>% 724 | select(-celltype) %>% 725 | aggregate(by = list(celltype = spatial$celltype), FUN = mean) %>% 726 | column_to_rownames("celltype") 727 | 728 | # Create HeatmapAnnotation object 729 | ha_spatial <- HeatmapAnnotation( 730 | area = spatial$area, 731 | n_neighbors = spatial$n_neighbors, 732 | border = TRUE, 733 | gap = unit(1,"mm"), 734 | which = "row") 735 | 736 | ### 3. Plot rich heatmap ### 737 | 738 | # Create HeatmapList object 739 | h_list <- h_type + 740 | h_state + 741 | ha_anno + 742 | ha_spatial + 743 | ha_meta 744 | 745 | # Add customized legend for anno_barplot() 746 | lgd <- Legend(title = "indication", 747 | at = colnames(indication), 748 | legend_gp = gpar(fill = metadata(spe)$color_vectors$indication)) 749 | 750 | # Plot 751 | draw(h_list,annotation_legend_list = list(lgd)) 752 | ``` 753 | 754 | This plot summarizes most of the information we have seen in this 755 | chapter previously. In addition, we can observe that tumor cells have 756 | the largest mean cell area, high number of neighbors and elevated Ki67 757 | expression. BnT cells have the highest number of neighbors on average, 758 | which is biological sound given their predominant location in highly 759 | immune infiltrated regions (such as TLS). 760 | 761 | ### Interactive visualization 762 | 763 | For interactive visualization of the single-cell data the 764 | [iSEE](https://www.bioconductor.org/packages/release/bioc/html/iSEE.html) shiny 765 | application can be used. For a comprehensive tutorial, please refer to the 766 | [iSEE vignette](https://www.bioconductor.org/packages/release/bioc/vignettes/iSEE/inst/doc/basic.html). 767 | 768 | ```{r iSEE, message=FALSE} 769 | if (interactive()) { 770 | library(iSEE) 771 | 772 | iSEE(spe) 773 | } 774 | ``` 775 | 776 | ## Session Info 777 | 778 |
779 | SessionInfo 780 | 781 | ```{r, echo = FALSE} 782 | sessionInfo() 783 | ``` 784 |
785 | -------------------------------------------------------------------------------- /10-image_visualization.Rmd: -------------------------------------------------------------------------------- 1 | # Image visualization {#image-visualization} 2 | 3 | The following section describes how to visualize the abundance of 4 | biomolecules (e.g., protein or RNA) as well as cell-specific metadata on 5 | images. Section \@ref(pixel-visualization) focuses on visualizing 6 | pixel-level information including the generation of pseudo-color 7 | composite images. Section \@ref(mask-visualization) highlights the 8 | visualization of cell metadata (e.g., cell phenotype) as well as 9 | summarized pixel intensities on cell segmentation masks. Section 10 | \@ref(cytoviewer) showcases interactive pixel- and 11 | cell-level visualization with the 12 | [cytoviewer](https://bioconductor.org/packages/release/bioc/html/cytoviewer.html) 13 | R/Bioconductor package [@Meyer2024]. 14 | 15 | The 16 | [cytomapper](https://www.bioconductor.org/packages/release/bioc/html/cytomapper.html) 17 | R/Bioconductor package was developed to support the handling and 18 | visualization of multiple multi-channel images and segmentation masks 19 | [@Eling2020]. The main data object for image handling is the 20 | [CytoImageList](https://www.bioconductor.org/packages/release/bioc/vignettes/cytomapper/inst/doc/cytomapper.html#5_The_CytoImageList_object) 21 | container which we used in Section \@ref(read-data) to store 22 | multi-channel images and segmentation masks. 23 | 24 | We will first read in the previously processed data and randomly select 25 | 3 images for visualization purposes. 26 | 27 | ```{r read-data-img-viz, message=FALSE} 28 | library(SpatialExperiment) 29 | library(cytomapper) 30 | spe <- readRDS("data/spe.rds") 31 | images <- readRDS("data/images.rds") 32 | masks <- readRDS("data/masks.rds") 33 | 34 | # Sample images 35 | set.seed(220517) 36 | cur_id <- sample(unique(spe$sample_id), 3) 37 | cur_images <- images[names(images) %in% cur_id] 38 | cur_masks <- masks[names(masks) %in% cur_id] 39 | ``` 40 | 41 | ## Pixel visualization {#pixel-visualization} 42 | 43 | The following section gives examples for visualizing individual channels 44 | or multiple channels as pseudo-color composite images. For this the 45 | `cytomapper` package exports the `plotPixels` function which expects a 46 | `CytoImageList` object storing one or multiple multi-channel images. In 47 | the simplest use case, a single channel can be visualized as follows: 48 | 49 | ```{r single-channel} 50 | plotPixels(cur_images, 51 | colour_by = "Ecad", 52 | bcg = list(Ecad = c(0, 5, 1))) 53 | ``` 54 | 55 | The plot above shows the tissue expression of the epithelial tumor 56 | marker E-cadherin on the 3 selected images. The `bcg` parameter (default 57 | `c(0, 1, 1)`) stands for "background", "contrast", "gamma" and controls 58 | these attributes of the image. This parameter takes a named list where 59 | each entry specifies these attributes per channel. The first value of 60 | the numeric vector will be added to the pixel intensities (background); 61 | pixel intensities will be multiplied by the second entry of the vector 62 | (contrast); pixel intensities will be exponentiated by the third entry 63 | of the vector (gamma). In most cases, it is sufficient to adjust the 64 | second (contrast) entry of the vector. 65 | 66 | The following example highlights the visualization of 6 markers (maximum 67 | allowed number of markers) at once per image. The markers indicate the 68 | spatial distribution of tumor cells (E-cadherin), T cells (CD3), B cells 69 | (CD20), CD8+ T cells (CD8a), plasma cells (CD38) and proliferating cells 70 | (Ki67). 71 | 72 | ```{r 6-channel} 73 | plotPixels(cur_images, 74 | colour_by = c("Ecad", "CD3", "CD20", "CD8a", "CD38", "Ki67"), 75 | bcg = list(Ecad = c(0, 5, 1), 76 | CD3 = c(0, 5, 1), 77 | CD20 = c(0, 5, 1), 78 | CD8a = c(0, 5, 1), 79 | CD38 = c(0, 8, 1), 80 | Ki67 = c(0, 5, 1))) 81 | ``` 82 | 83 | ### Adjusting colors 84 | 85 | The default colors for visualization are chosen by the additive RGB 86 | (red, green, blue) color model. For six markers the default colors are: 87 | red, green, blue, cyan (green + blue), magenta (red + blue), yellow 88 | (green + red). These colors are the easiest to distinguish by eye. 89 | However, you can select other colors for each channel by setting the 90 | `colour` parameter: 91 | 92 | ```{r setting-colors} 93 | plotPixels(cur_images, 94 | colour_by = c("Ecad", "CD3", "CD20"), 95 | bcg = list(Ecad = c(0, 5, 1), 96 | CD3 = c(0, 5, 1), 97 | CD20 = c(0, 5, 1)), 98 | colour = list(Ecad = c("black", "burlywood1"), 99 | CD3 = c("black", "cyan2"), 100 | CD20 = c("black", "firebrick1"))) 101 | ``` 102 | 103 | The `colour` parameter takes a named list in which each entry specifies 104 | the colors from which a color gradient is constructed via 105 | `colorRampPalette`. These are usually vectors of length 2 in which the 106 | first entry is `"black"` and the second entry specifies the color of 107 | choice. Although not recommended, you can also specify more than two 108 | colors to generate a more complex color gradient. 109 | 110 | ### Image normalization 111 | 112 | As an alternative to setting the `bcg` parameter, images can first be 113 | normalized. Normalization here means to scale the pixel intensities per 114 | channel between 0 and 1 (or a range specified by the `ft` parameter in 115 | the `normalize` function). By default, the `normalize` function scales 116 | pixel intensities across **all** images contained in the `CytoImageList` 117 | object (`separateImages = FALSE`). Each individual channel is scaled 118 | independently (`separateChannels = TRUE`). 119 | 120 | After 0-1 normalization, maximum pixel intensities can be clipped to 121 | enhance the contrast of the image (setting the `inputRange` parameter). 122 | In the following example, the clipping to 0 and 0.2 is the same as 123 | multiplying the pixel intensities by a factor of 5. 124 | 125 | ```{r default-normalization} 126 | # 0 - 1 channel scaling across all images 127 | norm_images <- cytomapper::normalize(cur_images) 128 | 129 | # Clip channel at 0.2 130 | norm_images <- cytomapper::normalize(norm_images, inputRange = c(0, 0.2)) 131 | 132 | plotPixels(norm_images, 133 | colour_by = c("Ecad", "CD3", "CD20", "CD8a", "CD38", "Ki67")) 134 | ``` 135 | 136 | The default setting of scaling pixel intensities across all images 137 | ensures comparable intensity levels across images. Pixel intensities can 138 | also be scaled **per image** therefore correcting for 139 | staining/expression differences between images: 140 | 141 | ```{r individual-normalization} 142 | # 0 - 1 channel scaling per image 143 | norm_images <- cytomapper::normalize(cur_images, separateImages = TRUE) 144 | 145 | # Clip channel at 0.2 146 | norm_images <- cytomapper::normalize(norm_images, inputRange = c(0, 0.2)) 147 | 148 | plotPixels(norm_images, 149 | colour_by = c("Ecad", "CD3", "CD20", "CD8a", "CD38", "Ki67")) 150 | ``` 151 | 152 | As we can see, the marker Ki67 appears brighter on image 2 and 3 in 153 | comparison to scaling the channel across all images. 154 | 155 | Finally, the `normalize` function also accepts a named list input for 156 | the `inputRange` argument. In this list, the clipping range per channel 157 | can be set individually: 158 | 159 | ```{r setting-inputRange} 160 | # 0 - 1 channel scaling per image 161 | norm_images <- cytomapper::normalize(cur_images, 162 | separateImages = TRUE, 163 | inputRange = list(Ecad = c(0, 50), 164 | CD3 = c(0, 30), 165 | CD20 = c(0, 40), 166 | CD8a = c(0, 50), 167 | CD38 = c(0, 10), 168 | Ki67 = c(0, 70))) 169 | 170 | plotPixels(norm_images, 171 | colour_by = c("Ecad", "CD3", "CD20", "CD8a", "CD38", "Ki67")) 172 | ``` 173 | 174 | ## Cell visualization {#mask-visualization} 175 | 176 | In the following section, we will show examples on how to visualize 177 | single cells either as segmentation masks or outlined on composite 178 | images. This type of visualization allows to observe the spatial 179 | distribution of cell phenotypes, the visual assessment of morphological 180 | features and quality control in terms of cell segmentation and 181 | phenotyping. 182 | 183 | ### Visualzing metadata 184 | 185 | The `cytomapper` package provides the `plotCells` function that accepts 186 | a `CytoImageList` object containing segmentation masks. These are 187 | defined as single channel images where sets of pixels with the same 188 | integer ID identify individual cells. This integer ID can be found as an 189 | entry in the `colData(spe)` slot and as pixel information in the 190 | segmentation masks. The entry in `colData(spe)` needs to be specified 191 | via the `cell_id` argument to the `plotCells` function. In that way, 192 | data contained in the `SpatialExperiment` object can be mapped to 193 | segmentation masks. For the current dataset, the cell IDs are stored in 194 | `colData(spe)$ObjectNumber`. 195 | 196 | As cell IDs are only unique within a single image, `plotCells` also 197 | requires the `img_id` argument. This argument specifies the 198 | `colData(spe)` as well as the `mcols(masks)` entry that stores the 199 | unique image name from which each cell was extracted. In the current 200 | dataset the unique image names are stored in `colData(spe)$sample_id` 201 | and `mcols(masks)$sample_id`. 202 | 203 | Providing these two entries that allow mapping between the 204 | `SpatialExperiment` object and segmentation masks, we can now color 205 | individual cells based on their cell type: 206 | 207 | ```{r celltype} 208 | plotCells(cur_masks, 209 | object = spe, 210 | cell_id = "ObjectNumber", 211 | img_id = "sample_id", 212 | colour_by = "celltype") 213 | ``` 214 | 215 | For consistent visualization, the `plotCells` function takes a named 216 | list as `color` argument. The entry name must match the `colour_by` 217 | argument. 218 | 219 | ```{r setting-celltype-colors} 220 | plotCells(cur_masks, 221 | object = spe, 222 | cell_id = "ObjectNumber", 223 | img_id = "sample_id", 224 | colour_by = "celltype", 225 | colour = list(celltype = metadata(spe)$color_vectors$celltype)) 226 | ``` 227 | 228 | If only individual cell types should be visualized, the 229 | `SpatialExperiment` object can be subsetted (e.g., to only contain CD8+ 230 | T cells). In the following example CD8+ T cells are colored in red and 231 | all other cells that are not contained in the dataset are colored in 232 | white (as set by the `missing_color` argument). 233 | 234 | ```{r selective-visualization} 235 | CD8 <- spe[,spe$celltype == "CD8"] 236 | 237 | plotCells(cur_masks, 238 | object = CD8, 239 | cell_id = "ObjectNumber", 240 | img_id = "sample_id", 241 | colour_by = "celltype", 242 | colour = list(celltype = c(CD8 = "red")), 243 | missing_colour = "white") 244 | ``` 245 | 246 | In terms of visualizing metadata, any entry in the `colData(spe)` slot 247 | can be visualized. The `plotCells` function automatically detects if the 248 | entry is continuous or discrete. In this fashion, we can now visualize 249 | the area of each cell: 250 | 251 | ```{r area} 252 | plotCells(cur_masks, 253 | object = spe, 254 | cell_id = "ObjectNumber", 255 | img_id = "sample_id", 256 | colour_by = "area") 257 | ``` 258 | 259 | ### Visualizating expression 260 | 261 | Similar to visualizing single-cell metadata on segmentation masks, we 262 | can use the `plotCells` function to visualize the aggregated pixel 263 | intensities per cell. In the current dataset pixel intensities were 264 | aggregated by computing the mean pixel intensity per cell and per 265 | channel. The `plotCells` function accepts the `exprs_values` argument 266 | (default `counts`) that allows selecting the assay which stores the 267 | expression values that should be visualized. 268 | 269 | In the following example, we visualize the asinh-transformed mean pixel 270 | intensities of the epithelial marker E-cadherin on segmentation masks. 271 | 272 | ```{r Ecad-expression} 273 | plotCells(cur_masks, 274 | object = spe, 275 | cell_id = "ObjectNumber", 276 | img_id = "sample_id", 277 | colour_by = "Ecad", 278 | exprs_values = "exprs") 279 | ``` 280 | 281 | We will now visualize the maximum number of allowed markers as 282 | composites on the segmentation masks. As above the markers indicate the 283 | spatial distribution of tumor cells (E-cadherin), T cells (CD3), B cells 284 | (CD20), CD8+ T cells (CD8a), plasma cells (CD38) and proliferating cells 285 | (Ki67). 286 | 287 | ```{r 6-channel-expression} 288 | plotCells(cur_masks, 289 | object = spe, 290 | cell_id = "ObjectNumber", 291 | img_id = "sample_id", 292 | colour_by = c("Ecad", "CD3", "CD20", "CD8a", "CD38", "Ki67"), 293 | exprs_values = "exprs") 294 | ``` 295 | 296 | While visualizing 6 markers on the pixel-level may still allow the 297 | distinction of different tissue structures, observing single-cell 298 | expression levels is difficult when visualizing many markers 299 | simultaneously due to often overlapping expression. 300 | 301 | Similarly to adjusting marker colors when visualizing pixel intensities, 302 | we can change the color gradients per marker by setting the `color` 303 | argument: 304 | 305 | ```{r setting-expression-colors} 306 | plotCells(cur_masks, 307 | object = spe, 308 | cell_id = "ObjectNumber", 309 | img_id = "sample_id", 310 | colour_by = c("Ecad", "CD3", "CD20"), 311 | exprs_values = "exprs", 312 | colour = list(Ecad = c("black", "burlywood1"), 313 | CD3 = c("black", "cyan2"), 314 | CD20 = c("black", "firebrick1"))) 315 | ``` 316 | 317 | ### Outlining cells on images {#outline-cells} 318 | 319 | The following section highlights the combined visualization of pixel- 320 | and cell-level information at once. For this, besides the 321 | `SpatialExperiment` object, the `plotPixels` function accepts two 322 | `CytoImageList` objects. One for the multi-channel images and one for 323 | the segmentation masks. By specifying the `outline_by` parameter, the 324 | outlines of cells can now be colored based on their metadata. 325 | 326 | The following example first generates a 3-channel composite images 327 | displaying the expression of E-cadherin, CD3 and CD20 before coloring 328 | the cells' outlines by their cell phenotype. 329 | 330 | ```{r outlining-all-cells} 331 | plotPixels(image = cur_images, 332 | mask = cur_masks, 333 | object = spe, 334 | cell_id = "ObjectNumber", 335 | img_id = "sample_id", 336 | colour_by = c("Ecad", "CD3", "CD20"), 337 | outline_by = "celltype", 338 | bcg = list(Ecad = c(0, 5, 1), 339 | CD3 = c(0, 5, 1), 340 | CD20 = c(0, 5, 1)), 341 | colour = list(celltype = metadata(spe)$color_vectors$celltype), 342 | thick = TRUE) 343 | ``` 344 | 345 | Distinguishing individual cell phenotypes is nearly impossible in the 346 | images above. 347 | 348 | However, the `SpatialExperiment` object can be subsetted to only contain 349 | cells of a single or few phenotypes. This allows the selective 350 | visualization of cell outlines on composite images. 351 | 352 | Here, we select all CD8+ T cells from the dataset and outline them on a 353 | 2-channel composite image displaying the expression of CD3 and CD8a. 354 | 355 | ```{r outlining-CD8} 356 | CD8 <- spe[,spe$celltype == "CD8"] 357 | 358 | plotPixels(image = cur_images, 359 | mask = cur_masks, 360 | object = CD8, 361 | cell_id = "ObjectNumber", img_id = "sample_id", 362 | colour_by = c("CD3", "CD8a"), 363 | outline_by = "celltype", 364 | bcg = list(CD3 = c(0, 5, 1), 365 | CD8a = c(0, 5, 1)), 366 | colour = list(celltype = c("CD8" = "white")), 367 | thick = TRUE) 368 | ``` 369 | 370 | This type of visualization allows the quality control of two things: 1. 371 | segmentation quality of individual cell types can be checked and 2. cell 372 | phenotyping accuracy can be visually assessed against expected marker 373 | expression. 374 | 375 | ## Adjusting plot annotations 376 | 377 | The `cytomapper` package provides a number of function arguments to 378 | adjust the visual appearance of figures that are shared between the 379 | `plotPixels` and `plotCells` function. 380 | 381 | For a full overview of the arguments please refer to `?plotting-param`. 382 | 383 | We use the following example to highlight how to adjust the scale bar, 384 | the image title, the legend appearance and the margin between images. 385 | 386 | ```{r adjusting-parameters} 387 | plotPixels(cur_images, 388 | colour_by = c("Ecad", "CD3", "CD20", "CD8a", "CD38", "Ki67"), 389 | bcg = list(Ecad = c(0, 5, 1), 390 | CD3 = c(0, 5, 1), 391 | CD20 = c(0, 5, 1), 392 | CD8a = c(0, 5, 1), 393 | CD38 = c(0, 8, 1), 394 | Ki67 = c(0, 5, 1)), 395 | scale_bar = list(length = 100, 396 | label = expression("100 " ~ mu * "m"), 397 | cex = 0.7, 398 | lwidth = 10, 399 | colour = "grey", 400 | position = "bottomleft", 401 | margin = c(5,5), 402 | frame = 3), 403 | image_title = list(text = mcols(cur_images)$indication, 404 | position = "topright", 405 | colour = "grey", 406 | margin = c(5,5), 407 | font = 2, 408 | cex = 2), 409 | legend = list(colour_by.title.cex = 0.7, 410 | margin = 10), 411 | margin = 40) 412 | ``` 413 | 414 | ## Displaying individual images 415 | 416 | By default, all images are displayed on the same graphics device. This 417 | can be useful when saving all images at once (see next section) to zoom 418 | into the individual images instead of opening each image individually. 419 | However, when displaying images in a markdown document these are more 420 | accessible when visualized individually. For this, the `plotPixels` and 421 | `plotCells` function accepts the `display` parameter that when set to 422 | `"single"` displays each resulting image in its own graphics device: 423 | 424 | ```{r individual-images} 425 | plotCells(cur_masks, 426 | object = spe, 427 | cell_id = "ObjectNumber", 428 | img_id = "sample_id", 429 | colour_by = "celltype", 430 | colour = list(celltype = metadata(spe)$color_vectors$celltype), 431 | display = "single", 432 | legend = NULL) 433 | ``` 434 | 435 | ## Saving and returning images 436 | 437 | The final section addresses how to save composite images and how to 438 | return them for integration with other plots. 439 | 440 | The `plotPixels` and `plotCells` functions accept the `save_plot` 441 | argument which takes a named list of the following entries: `filename` 442 | indicates the location and file type of the image saved to disk; `scale` 443 | adjusts the resolution of the saved image (this only needs to be 444 | adjusted for small images). 445 | 446 | ```{r saving-images} 447 | plotCells(cur_masks, 448 | object = spe, 449 | cell_id = "ObjectNumber", 450 | img_id = "sample_id", 451 | colour_by = "celltype", 452 | colour = list(celltype = metadata(spe)$color_vectors$celltype), 453 | save_plot = list(filename = "data/celltype_image.png")) 454 | ``` 455 | 456 | The composite images (together with their annotation) can also be 457 | returned. In the following code chunk we save two example plots to 458 | variables (`out1` and `out2`). 459 | 460 | ```{r returning-images, results="hide", fig.show='hide'} 461 | out1 <- plotCells(cur_masks, 462 | object = spe, 463 | cell_id = "ObjectNumber", 464 | img_id = "sample_id", 465 | colour_by = "celltype", 466 | colour = list(celltype = metadata(spe)$color_vectors$celltype), 467 | return_plot = TRUE) 468 | 469 | out2 <- plotCells(cur_masks, 470 | object = spe, 471 | cell_id = "ObjectNumber", 472 | img_id = "sample_id", 473 | colour_by = c("Ecad", "CD3", "CD20"), 474 | exprs_values = "exprs", 475 | return_plot = TRUE) 476 | ``` 477 | 478 | The composite images are stored in `out1$plot` and `out2$plot` and can 479 | be converted into a graph object recognized by the 480 | [cowplot](https://cran.r-project.org/web/packages/cowplot/vignettes/introduction.html) 481 | package. 482 | 483 | The final function call of the following chunk plots both object next to 484 | each other. 485 | 486 | ```{r side-by-side-plot, message=FALSE} 487 | library(cowplot) 488 | library(gridGraphics) 489 | p1 <- ggdraw(out1$plot, clip = "on") 490 | p2 <- ggdraw(out2$plot, clip = "on") 491 | 492 | plot_grid(p1, p2) 493 | ``` 494 | 495 | ## Interactive image visualization {#cytoviewer} 496 | 497 | The 498 | [cytoviewer](https://bioconductor.org/packages/release/bioc/html/cytoviewer.html) 499 | R/Bioconductor package [@Meyer2024] extends the static visualization 500 | abilities from `cytomapper` via an interactive and user-friendly `shiny` 501 | application. 502 | 503 | It supports flexible generation of image composites, allows side-by-side 504 | visualization of single channels, and facilitates the spatial 505 | visualization of single-cell data in the form of segmentation masks. 506 | Rapid and publication-quality image downloads are also supported. For a 507 | full introduction to the package, please refer to 508 | [the vignette](https://bioconductor.org/packages/release/bioc/vignettes/cytoviewer/inst/doc/cytoviewer.html). 509 | 510 | ```{r cytoviewer-interactive, message = FALSE} 511 | library(cytoviewer) 512 | 513 | app <- cytoviewer(image = images, 514 | mask = masks, 515 | object = spe, 516 | cell_id = "ObjectNumber", 517 | img_id = "sample_id") 518 | 519 | if (interactive()) { 520 | shiny::runApp(app) 521 | } 522 | ``` 523 | 524 | ## Session Info 525 | 526 |
527 | 528 | SessionInfo 529 | 530 | ```{r, echo = FALSE} 531 | sessionInfo() 532 | ``` 533 | 534 |
535 | -------------------------------------------------------------------------------- /12-references.Rmd: -------------------------------------------------------------------------------- 1 | `r if (knitr::is_html_output()) ' 2 | # References {-} 3 | '` 4 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | **Version 1.0.0** [2023-06-30] 2 | 3 | - First stable release of the workflow 4 | 5 | **Version 1.0.1** [2023-10-19] 6 | 7 | - Added seed before `predict` call after training a classifier 8 | 9 | **Version 1.0.2** [2023-11-27] 10 | 11 | - Added developers documentation 12 | - Added more ways to visualize cell type composition per CN 13 | 14 | **Version 1.0.3** [2024-01-05] 15 | 16 | - Updated cytoviewer citation and corresponding text -------------------------------------------------------------------------------- /DEVELOPMENT.md: -------------------------------------------------------------------------------- 1 | # Useful information when developing this book 2 | 3 | This document is to guide future developers to maintain and extend the IMC 4 | data analysis book. 5 | 6 | ## General setup 7 | 8 | * The IMC data analysis book is written in [bookdown](https://bookdown.org/). 9 | * Each section is stored in its own `.Rmd` file with `index.Rmd` building the landing page 10 | * References are stored in `book.bib` 11 | * At the end of each `.Rmd` file a number of unit tests are executed. These 12 | unit tests are always executed but their results are not shown in the book. 13 | 14 | ### Continous integration/continous deployment 15 | 16 | * CI/CD is executed based on the workflow [here](https://github.com/BodenmillerGroup/IMCDataAnalysis/blob/main/.github/workflows/build.yml). 17 | * On the first of each month based on the [Dockerfile](https://github.com/BodenmillerGroup/IMCDataAnalysis/blob/main/Dockerfile) a new Docker image is build. We are doing this so that the workflow is always tested against the newest software versions. 18 | * The Docker image is pushed to the Github Container Registry [here](https://github.com/BodenmillerGroup/IMCDataAnalysis/pkgs/container/imcdataanalysis). 19 | * The Docker image is date tagged and `latest` always refers to the newest build. 20 | * Once the Docker image is build, the IMC data analysis book is executed within the 21 | newest Docker image. This will also run all unit tests. 22 | 23 | **Of note:** Sometimes the calculation of the UMAP produces slightly different 24 | results. If that happens the workflow run can be re-executed by clicking the `Re-run jobs` button of the workflow run. 25 | This test could also be excluded on the long run. 26 | 27 | * When pushing to `main` (either directly or via a PR), the CI/CD workflow is 28 | executed. 29 | * If the Dockerfile changed (e.g., if you want to add a new package), a new Docker image is build and the workflow is executed within the new Docker image. 30 | * If the Dockerfile did not change, the workflow is executed within the most recent Docker image. 31 | 32 | ## Updating the book 33 | 34 | This section describes how to update the book. You want to do this to add new content 35 | but also to fix bugs or adjust unit tests. 36 | 37 | ### Work on the devel branch 38 | 39 | It is recommended to work on the `devel` branch of the Github repository to add 40 | new changes. 41 | 42 | ### Work within the newest Docker container 43 | 44 | It is also recommended to always work within a Docker container based on the newest 45 | Docker image available: 46 | 47 | 1. After installing [Docker](https://docs.docker.com/get-docker/) you can first pull the container via: 48 | 49 | ``` 50 | docker pull ghcr.io/bodenmillergroup/imcdataanalysis:yyyy-mm-dd 51 | ``` 52 | 53 | and then run the container: 54 | 55 | ``` 56 | docker run -v /path/to/IMCDataAnalysis:/home/rstudio/IMCDataAnalysis \ 57 | -e PASSWORD=bioc -p 8787:8787 \ 58 | ghcr.io/bodenmillergroup/imcdataanalysis:yyyy-mm-dd 59 | ``` 60 | 61 | 2. An RStudio server session can be accessed via a browser at `localhost:8787` using `Username: rstudio` and `Password: bioc`. 62 | 3. Navigate to `IMCDataAnalysis` and open the `IMCDataAnalysis.Rproj` file. 63 | 4. Code in the individual files can now be executed or the whole workflow can be build by entering `bookdown::render_book()`. 64 | 65 | ### Adding new packages 66 | 67 | If you need to add new packages to the workflow, make sure to add them to the 68 | [software requirements](https://bodenmillergroup.github.io/IMCDataAnalysis/prerequisites.html#software-requirements) 69 | section and to the Dockerfile. 70 | 71 | ### Opening a pull request 72 | 73 | Now you can change the content of the book. 74 | Once you have added all changes, push the changes to `devel` and open a pull request 75 | to `main`. Wait until all checks have passed and you can merge the PR. 76 | 77 | ### Add changes to CHANGELOG.md 78 | 79 | Please track the changes that you are making in the [CHANGELOG.md](CHANGELOG.md) file. 80 | 81 | ### Trigger a new release 82 | 83 | Once you have added the changes to the CHANGELOG, merged the pull request and 84 | the workflow has been executed on CI/CD, you can trigger a new release. 85 | 86 | * Go to [here](https://github.com/BodenmillerGroup/IMCDataAnalysis/releases) and click on `Draft a new release` at the top of the page. 87 | * Under `Choose a tag` create a new tag and give details on the release. 88 | * With each release the corresponding [Zenodo repository](https://zenodo.org/records/10209942) is updated. 89 | 90 | ## Updating the data 91 | 92 | For new `steinbock` releases and specifically if the Mesmer version changes, the 93 | example data should be updated. The example data are stored on Central NAS 94 | and are hosted on Zenodo. 95 | 96 | ### Re-analyse the example data 97 | 98 | * You can find the raw data on [zenodo](https://zenodo.org/records/7575859). 99 | * On Central NAS under projects/IMCWorkflow/zenodo create a new folder called `steinbock_0.x.y` where x denotes the new major version and y the new minor version. 100 | * Copy the `steinbock.sh` script from the folder of the previous version to to folder of the newest version. 101 | * Change the steinbock version number in the `steinbock.sh` script and execute it. 102 | * It should generate all relevant files and zip all folders. 103 | 104 | ### Upload data to zenodo 105 | 106 | * On [zenodo](https://zenodo.org/records/7624451), click on `New version` and replace all files with the newer version. No need to upload the raw data to zenodo as they are hosted in a different repository. Make sure to change the date and update the version number. 107 | 108 | ### Adjust the book 109 | 110 | * Work in the most recent Docker container and on the devel branch. 111 | * Manually go through each section, update the links in the [Prerequisites](https://bodenmillergroup.github.io/IMCDataAnalysis/prerequisites.html#download-data) section 112 | * Make sure to check and asjust the unit tests at the end of each file 113 | * Make sure that the text (e.g. clustering) still matches the results 114 | 115 | *Important:* as we are training a random forest classifier on manually gated cells, these gated cells won't match the newest version of the data if the Mesmer version changed. For this, we have the `code/transfer_labels.R` script that automatically re-gates cells in the new SPE object. 116 | 117 | * Go through all sections until `Cell phenotyping` 118 | * Based on the old `gated_cells` and the new SPE object, execute the `code/transfer_labels.R` script 119 | * Zip the new `gated_cells` and upload them to a new version on [zendod](https://zenodo.org/records/8095133) 120 | * Adjust the link to the new gated cells in the [Prerequisites](https://bodenmillergroup.github.io/IMCDataAnalysis/prerequisites.html#download-data) section 121 | * Make sure that the new classification results closely match the new results 122 | 123 | * Continue going through the book 124 | 125 | ### Execute the book 126 | 127 | * When you are done working through the book, within the Docker container open the RProject file and execute `bookdown::render_book()` to make sure that it can be executed from beginning to end. 128 | * Under `data/CellTypeValidation` have a look at the PNGs to check if celltypes were correctly detected. 129 | 130 | ### Add changes to CHANGELOG.md 131 | 132 | Finally, add all the recent changes to the CHANGELOG, create and merge a PR and create a new release (see above). 133 | 134 | 135 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Docker inheritance 2 | FROM rocker/rstudio:latest 3 | 4 | RUN apt-get -y update \ 5 | && apt-get install -y --no-install-recommends apt-utils \ 6 | && apt-get install -y --no-install-recommends zlib1g-dev libglpk-dev libmagick++-dev libfftw3-dev libxml2-dev libxt-dev curl libcairo2-dev libproj-dev libgdal-dev libudunits2-dev libarchive-dev \ 7 | && apt-get clean \ 8 | && rm -rf /var/lib/apt/ilists/* 9 | 10 | RUN R -e 'install.packages(c("rmarkdown", "markdown", "bookdown", "pheatmap", "viridis", "zoo", "BiocManager", "devtools", "testthat", "tiff", \ 11 | "distill", "ggrepel", "patchwork", "mclust", "RColorBrewer", "uwot", "Rtsne", "harmony", \ 12 | "Seurat", "SeuratObject", "cowplot", "kohonen", "caret", "randomForest", "ggridges", "cowplot", \ 13 | "gridGraphics", "scales", "tiff", "harmony", "Matrix"))' 14 | RUN R -e 'BiocManager::install(c("CATALYST", "scuttle", "scater", "dittoSeq", "tidyverse", "BiocStyle", "batchelor", "bluster", \ 15 | "scran", "lisaClust", "spicyR", "iSEE", "imcRtools", "cytomapper", "imcdatasets", "cytoviewer"))' 16 | RUN R -e 'devtools::install_github(c("i-cyto/Rphenograph"))' 17 | 18 | -------------------------------------------------------------------------------- /IMCDataAnalysis.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 4 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Website 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 BodenmillerGroup 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8100220.svg)](https://doi.org/10.5281/zenodo.6806448) 2 | 3 | # R based analysis workflow for multiplexed imaging data 4 | 5 | 6 | [![build](https://github.com/BodenmillerGroup/IMCDataAnalysis/actions/workflows/build.yml/badge.svg)](https://github.com/BodenmillerGroup/IMCDataAnalysis/actions/workflows/build.yml) 7 | 8 | 9 | R workflow highlighting analyses approaches for multiplexed imaging data. 10 | 11 | ## Scope 12 | 13 | This workflow explains the use of common R/Bioconductor packages to pre-process and analyse single-cell data obtained from segmented multichannel images. 14 | While we use imaging mass cytometry (IMC) data as an example, the concepts presented here can be applied to images obtained by other technologies (e.g. CODEX, MIBI, mIF, CyCIF, etc.). 15 | The workflow can be largely divided into the following parts: 16 | 17 | 1. Preprocessing (reading in the data, spillover correction) 18 | 2. Image- and cell-level quality control, low-dimensional visualization 19 | 3. Sample/batch effect correction 20 | 4. Cell phenotyping via clustering or classification 21 | 5. Single-cell visualization 22 | 6. Image visualization 23 | 7. Spatial analyses 24 | 25 | ## Update freeze 26 | 27 | This workflow has been actively developed until December 2023. At that time 28 | we used the most recent (`v.0.16.0`) version of `steinbock` to process the 29 | example data. If you are having issues when using newer versions of `steinbock` 30 | please open an issue [here](https://github.com/BodenmillerGroup/IMCDataAnalysis/issues). 31 | 32 | ## Usage 33 | 34 | To reproduce the analysis displayed at [https://bodenmillergroup.github.io/IMCDataAnalysis/](https://bodenmillergroup.github.io/IMCDataAnalysis/) clone the repository via: 35 | 36 | ``` 37 | git clone https://github.com/BodenmillerGroup/IMCDataAnalysis.git 38 | ``` 39 | 40 | For reproducibility purposes, we provide a Docker container [here](https://github.com/BodenmillerGroup/IMCDataAnalysis/pkgs/container/imcdataanalysis). 41 | 42 | 1. After installing [Docker](https://docs.docker.com/get-docker/) you can first pull the container via: 43 | 44 | ``` 45 | docker pull ghcr.io/bodenmillergroup/imcdataanalysis:latest 46 | ``` 47 | 48 | and then run the container: 49 | 50 | ``` 51 | docker run -v /path/to/IMCDataAnalysis:/home/rstudio/IMCDataAnalysis \ 52 | -e PASSWORD=bioc -p 8787:8787 \ 53 | ghcr.io/bodenmillergroup/imcdataanalysis:latest 54 | ``` 55 | 56 | **Of note: it is recommended to use a date-tagged version of the container to ensure reproducibility**. 57 | This can be done via: 58 | 59 | ``` 60 | docker pull ghcr.io/bodenmillergroup/imcdataanalysis: 61 | ``` 62 | 63 | 2. An RStudio server session can be accessed via a browser at `localhost:8787` using `Username: rstudio` and `Password: bioc`. 64 | 3. Navigate to `IMCDataAnalysis` and open the `IMCDataAnalysis.Rproj` file. 65 | 4. Code in the individual files can now be executed or the whole workflow can be build by entering `bookdown::render_book()`. 66 | 67 | ## Feedback 68 | 69 | We provide the workflow as an open-source resource. It does not mean that 70 | this workflow is tested on all possible datasets or biological questions and 71 | there exist multiple ways of analysing data. It is therefore recommended to 72 | check the results and question their biological interpretation. 73 | 74 | If you notice an issue or missing information, please report an issue 75 | [here](https://github.com/BodenmillerGroup/IMCDataAnalysis/issues). We also 76 | welcome contributions in form of pull requests or feature requests in form of 77 | issues. Have a look at the source code at: 78 | 79 | [https://github.com/BodenmillerGroup/IMCDataAnalysis](https://github.com/BodenmillerGroup/IMCDataAnalysis) 80 | 81 | ## Contributing guidelines 82 | 83 | For feature requests and bug reports, please raise an issue [here](https://github.com/BodenmillerGroup/IMCDataAnalysis/issues). 84 | 85 | For adding new content to the book please work inside the Docker container as explained above. 86 | You can fork the repository, add your changes and open a pull request. 87 | To add new libraries to the container please add them to the [Dockerfile](Dockerfile). 88 | 89 | ## Maintainer 90 | 91 | [Daniel Schulz](https://github.com/SchulzDan) 92 | 93 | ## Contributors 94 | 95 | [Nils Eling](https://github.com/nilseling) 96 | [Vito Zanotelli](https://github.com/votti) 97 | [Daniel Schulz](https://github.com/SchulzDan) 98 | [Jonas Windhager](https://github.com/jwindhager) 99 | [Michelle Daniel](https://github.com/michdaniel) 100 | [Lasse Meyer](https://github.com/lassedochreden) 101 | 102 | ## Citation 103 | 104 | Please cite the following paper when using the presented workflow in your research: 105 | 106 | > Windhager, J., Zanotelli, V.R.T., Schulz, D. et al. An end-to-end workflow for multiplexed image processing and analysis. Nat Protoc (2023). https://doi.org/10.1038/s41596-023-00881-0 107 | 108 | @article{Windhager2023, 109 | author = {Windhager, Jonas and Zanotelli, Vito R.T. and Schulz, Daniel and Meyer, Lasse and Daniel, Michelle and Bodenmiller, Bernd and Eling, Nils}, 110 | title = {An end-to-end workflow for multiplexed image processing and analysis}, 111 | year = {2023}, 112 | doi = {10.1038/s41596-023-00881-0}, 113 | URL = {https://www.nature.com/articles/s41596-023-00881-0}, 114 | journal = {Nature Protocols} 115 | } 116 | 117 | 118 | ## Funding 119 | 120 | The work was funded by the European Union’s Horizon 2020 research and innovation program under Marie Sklodowska-Curie Actions grant agreement No 892225 (N.E) and by the CRUK IMAXT Grand Challenge (J.W.). 121 | -------------------------------------------------------------------------------- /_bookdown.yml: -------------------------------------------------------------------------------- 1 | book_filename: "IMCDataAnalysis" 2 | delete_merged_file: true 3 | output_dir: "docs" 4 | new_session: yes 5 | language: 6 | ui: 7 | chapter_name: "" 8 | view: https://github.com/BodenmillerGroup/IMCDataAnalysis/blob/master/%s -------------------------------------------------------------------------------- /_output.yml: -------------------------------------------------------------------------------- 1 | bookdown::gitbook: 2 | github-repo: "BodenmillerGroup/IMCDataAnalysis" 3 | css: style.css 4 | config: 5 | toc: 6 | collapse: section 7 | before: | 8 |
  • Multiplexed imaging data analysis
  • 9 | after: | 10 |
  • Published with bookdown
  • 11 | download: null 12 | sharing: 13 | facebook: no 14 | github: yes 15 | twitter: yes 16 | linkedin: yes 17 | weibo: no 18 | instapaper: no 19 | vk: no 20 | whatsapp: no 21 | bookdown::pdf_book: 22 | includes: 23 | in_header: preamble.tex 24 | latex_engine: xelatex 25 | citation_package: natbib 26 | keep_tex: yes 27 | bookdown::epub_book: default 28 | -------------------------------------------------------------------------------- /book.bib: -------------------------------------------------------------------------------- 1 | @article{Chevrier2017, 2 | title = {Compensation of Signal Spillover in Suspension and Imaging Mass Cytometry}, 3 | author = "Chevrier, Stéphane and Crowell, Helena L. and Zanotelli, Vito R.T. and Engler, Stefanie and Robinson, Mark D. and Bodenmiller, Bernd", 4 | journal = "Cell Systems", 5 | volume = 6, 6 | pages = "612--620", 7 | year = 2017 8 | } 9 | 10 | @article{Bendall2011, 11 | title = {Single-cell mass cytometry of differential immune and drug responses across a human hematopoietic continuum}, 12 | author = "Bendall, Sean C. and Simonds, Erin F. and Qiu, Peng and Amir, El Ad D. and Krutzik, Peter O. and Finck, Rachel and Bruggner, Robert V. and Melamed, Rachel and Trejo, Angelica and Ornatsky, Olga I. and Balderas, Robert S. and Plevritis, Sylvia K. and Sachs, Karen and Pe'er, Dana and Tanner, Scott D. and Nolan, Garry P.", 13 | journal = "Science", 14 | volume = 332, 15 | pages = "687--696", 16 | year = 2011 17 | } 18 | 19 | @article{Giesen2014, 20 | Author = {Giesen, Charlotte and Wang, Hao A.O. and Schapiro, Denis and Zivanovic, Nevena and Jacobs, Andrea and Hattendorf, Bodo and Schüffler, Peter J. and Grolimund, Daniel and Buhmann, Joachim M. and Brandt, Simone and Varga, Zsuzsanna and Wild, Peter J. and Günther, Detlef and Bodenmiller, Bernd}, 21 | Journal = {Nature {M}ethods}, 22 | Number = {4}, 23 | Pages = {417-422}, 24 | Title = {Highly multiplexed imaging of tumor tissues with subcellular resolution by mass cytometry}, 25 | Volume = {11}, 26 | Year = {2014} 27 | } 28 | 29 | 30 | @article{Schulz2018, 31 | title = {Simultaneous Multiplexed Imaging of mRNA and Proteins with Subcellular Resolution in Breast Cancer Tissue Samples by Mass Cytometry.}, 32 | author = "Schulz, Daniel and Zanotelli, Vito RT and Fischer, Rana R and Schapiro, Denis and Engler, Stefanie and Lun, Xiao-Kang and Jackson, Hartland W and Bodenmiller, Bernd", 33 | journal = "Cell Systems", 34 | volume = 6, 35 | pages = "25--36.e5", 36 | year = 2018 37 | } 38 | 39 | @article{Shapiro2017, 40 | title = {histoCAT: analysis of cell phenotypes and interactions in multiplex image cytometry data.}, 41 | author = "Schapiro, Denis and Jackson, Hartland W and Raghuraman, Swetha and Fischer, Jana R and Zanotelli, Vito RT and Schulz, Daniel and Giesen, Charlotte and Catena, Raúl and Varga, Zsuzsanna and Bodenmiller, Bernd", 42 | journal = "Nature Methods", 43 | volume = 14, 44 | pages = "873–-876", 45 | year = 2017 46 | } 47 | 48 | @article{Angelo2014, 49 | Author = {Angelo, Michael and Bendall, Sean C. and Finck, Rachel and Hale, Matthew B. and Hitzman, Chuck and Borowsky, Alexander D. and Levenson, Richard M. and Lowe, John B. and Liu, Scot D. and Zhao, Shuchun and Natkunam, Yasodha and Nolan, Garry P.}, 50 | Journal = {Nature Medicine}, 51 | Number = {4}, 52 | Pages = {436-442}, 53 | Title = {Multiplexed ion beam imaging of human breast tumors}, 54 | Volume = {20}, 55 | Year = {2014} 56 | } 57 | 58 | @article{Lin2018, 59 | Author = {Lin, Jia-Ren and Izar, Benjamin and Wang, Shu and Yapp, Clarence and Mei, Shaolin and Shah, Parin M. and Santagata, Sandro and Sorger, Peter K.}, 60 | Journal = {eLife}, 61 | Pages = {1-46}, 62 | Title = {Highly multiplexed immunofluorescence imaging of human tissues and tumors using t-CyCIF and conventional optical microscopes}, 63 | Volume = {7}, 64 | Year = {2018} 65 | } 66 | 67 | @article{Gut2018, 68 | Author = {Gut, Gabriele and Herrmann, Markus D and Pelkmans, Lucas}, 69 | Journal = {Science}, 70 | Pages = {1-13}, 71 | Title = {Multiplexed protein maps link subcellular organization to cellular states}, 72 | Volume = {361}, 73 | Year = {2018} 74 | } 75 | 76 | @article{Bodenmiller2016, 77 | Author = {Bodenmiller, Bernd}, 78 | Journal = {Cell Systems}, 79 | Pages = {225-238}, 80 | Title = {Multiplexed Epitope-Based Tissue Imaging for Discovery and Healthcare Applications}, 81 | Volume = {2}, 82 | Year = {2016} 83 | } 84 | 85 | 86 | @article{Chen2015, 87 | Author = {Chen, Kok Hao and Boettiger, Alistair N. and Moffitt, Jeffrey R. and Wang, Siyuan and Zhuang, Xiaowei}, 88 | Journal = {Science}, 89 | Pages = {aaa6090}, 90 | Title = {Spatially resolved, highly multiplexed RNA profiling in single cells}, 91 | Volume = {348}, 92 | Year = {2015} 93 | } 94 | 95 | @article{Greenwald2021, 96 | year = {2021}, 97 | volume = {40}, 98 | pages = {555--565}, 99 | author = {Noah F. Greenwald and Geneva Miller and Erick Moen and Alex Kong and Adam Kagel and Thomas Dougherty and Christine Camacho Fullaway and Brianna J. McIntosh and Ke Xuan Leow and Morgan Sarah Schwartz and Cole Pavelchek and Sunny Cui and Isabella Camplisson and Omer Bar-Tal and Jaiveer Singh and Mara Fong and Gautam Chaudhry and Zion Abraham and Jackson Moseley and Shiri Warshawsky and Erin Soon and Shirley Greenbaum and Tyler Risom and Travis Hollmann and Sean C. Bendall and Leeat Keren and William Graf and Michael Angelo and David Van Valen}, 100 | title = {Whole-cell segmentation of tissue images with human-level performance using large-scale data annotation and deep learning}, 101 | journal = {Nature Biotechnology} 102 | } 103 | 104 | @article{Lubeck2014, 105 | Author = {Lubeck, Eric and Coskun, Ahmet F and Zhiyentayev, Timur and Ahmad, Mubhij and Cai, Long}, 106 | Journal = {Nature Methods}, 107 | Pages = {360-361}, 108 | Title = {Single-cell in situ RNA profiling by sequential hybridization}, 109 | Volume = {11}, 110 | Year = {2014} 111 | } 112 | 113 | @article{Goltsev2018, 114 | Author = {Goltsev, Yury and Samusik, Nikolay and Kennedy-Darling, Julia and Bhate, Salil and Hale, Matthew and Vazquez, Gustavo and Black, Sarah and Nolan, Garry P.}, 115 | Journal = {Cell}, 116 | Pages = {968--981}, 117 | Title = {Deep Profiling of Mouse Splenic Architecture with CODEX Multiplexed Imaging}, 118 | Volume = {174}, 119 | Year = {2018} 120 | } 121 | 122 | @article{Saka2019, 123 | Author = {Saka, Sinem K. and Wang, Yu and Kishi, Jocelyn Y. and Zhu, Allen and Zeng, Yitian and Xie, Wenxin and Kirli, Koray and Yapp, Clarence and Cicconet, Marcelo and Beliveau, Brian J. and Lapan, Sylvain W. and Yin, Siyuan and Lin, Millicent and Boyden, Edward S. and Kaeser, Pascal S. and Pihan, German and Church, George M. and Yin, Peng}, 124 | Journal = {Nature Biotechnology}, 125 | Pages = {1080--1090}, 126 | Title = {Immuno-SABER enables highly multiplexed and amplified protein imaging in tissues}, 127 | Volume = {37}, 128 | Year = {2019} 129 | } 130 | 131 | @article{Ijsselsteijn2019, 132 | year = {2019}, 133 | volume = {10}, 134 | author = {Ijsselsteijn, Marieke E. and van der Breggen, Ruud and Sarasqueta, Arantza F. and Koning, Frits and de Miranda, Noel F. C. C.}, 135 | title = {A 40-Marker Panel for High Dimensional Characterization of Cancer Immune Microenvironments by Imaging Mass Cytometry}, 136 | journal = {Frontiers in Immunology} 137 | } 138 | 139 | @article{Windhager2021, 140 | year = {2021}, 141 | journal = {bioRxiv}, 142 | author = {Windhager,Jonas and Bodenmiller, Bernd and Eling, Nils}, 143 | title = {An end-to-end workflow for multiplexed image processing and analysis} 144 | } 145 | 146 | @article{Virshup2021, 147 | year = {2021}, 148 | journal = {bioRxiv}, 149 | author = {Isaac Virshup and Sergei Rybakov and Fabian J. Theis and Philipp Angerer and F. Alexander Wolf}, 150 | title = {anndata: Annotated data} 151 | } 152 | 153 | @article{Shannon2003, 154 | year = {2003}, 155 | volume = {13}, 156 | pages = {2498--2504}, 157 | author = {Shannon, Paul and Markiel, Andrew and Ozier, Owen and Baliga, Nitin S. and Wang, Jonathan T. and Ramage, Daniel and Amin, Nada and Schwikowski, Benno and Ideker, Trey}, 158 | title = {Cytoscape: A Software Environment for Integrated Models of Biomolecular Interaction Networks}, 159 | journal = {Genome Research} 160 | } 161 | 162 | @article{Amezquita2019, 163 | year = {2019}, 164 | volume = {17}, 165 | pages = {137--145}, 166 | author = {Amezquita, Robert A. and Lun, Aaron T. L. and Becht, Etienne and Carey, Vince J. and Carpp, Lindsay N. and Geistlinger, Ludwig and Marini, Federico and Rue-Albrecht, Kevin and Risso, Davide and Soneson, Charlotte and Waldron, Levi and Pag{\`{e}}s, Herv{\'{e}} and Smith, Mike L. and Huber, Wolfgang and Morgan, Martin and Gottardo, Raphael and Hicks, Stephanie C.}, 167 | title = {Orchestrating single-cell analysis with Bioconductor}, 168 | journal = {Nature Methods} 169 | } 170 | 171 | @article{Righelli2022, 172 | year = {2022}, 173 | volume = {38}, 174 | pages = {3128--3131}, 175 | author = {Dario Righelli and Lukas M Weber and Helena L Crowell and Brenda Pardo and Leonardo Collado-Torres and Shila Ghazanfar and Aaron T L Lun and Stephanie C Hicks and Davide Risso}, 176 | title = {{SpatialExperiment}: infrastructure for spatially-resolved~transcriptomics data in R using Bioconductor}, 177 | journal = {Bioinformatics} 178 | } 179 | 180 | @article{Haghverdi2018, 181 | year = {2018}, 182 | volume = {36}, 183 | pages = {421--427}, 184 | author = {Haghverdi, Laleh and Lun, Aaron T. L. and Morgan, Michael D. and Marioni, John C.}, 185 | title = {Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors}, 186 | journal = {Nature Biotechnology} 187 | } 188 | 189 | @article{Korsunsky2019, 190 | year = {2019}, 191 | volume = {16}, 192 | pages = {1289--1296}, 193 | author = {Korsunsky, Ilya and Millard, Nghia and Fan, Jean and Slowikowski, Kamil and Zhang, Fan and Wei, Kevin and Baglaenko, Yuriy and Brenner, Michael and Loh, Po-ru and Raychaudhuri, Soumya}, 194 | title = {Fast, sensitive and accurate integration of single-cell data with Harmony}, 195 | journal = {Nature Methods} 196 | } 197 | 198 | @article{Stuart2019, 199 | year = {2019}, 200 | volume = {177}, 201 | pages = {1888--1902}, 202 | author = {Stuart, Tim and Butler, Andrew and Hoffman, Paul and Hafemeister, Christoph and Papalexi, Efthymia and Mauck, William M. III and Hao, Yuhan and Stoeckius, Marlon and Smibert, Peter and Satija, Rahul}, 203 | title = {Comprehensive Integration of Single-Cell Data}, 204 | journal = {Cell} 205 | } 206 | 207 | @article{Bai2021, 208 | year = {2021}, 209 | volume = {12}, 210 | author = {Bai, Yunhao and Zhu, Bokai and Rovira-Clave, Xavier and Chen, Han and Markovic, Maxim and Chan, Chi Ngai and Su, Tung-Hung and McIlwain, David R. and Estes, Jacob D. and Keren, Leeat and Nolan, Garry P. and Jiang, Sizun}, 211 | title = {Adjacent Cell Marker Lateral Spillover Compensation and Reinforcement for Multiplexed Images}, 212 | journal = {Frontiers in Immunology} 213 | } 214 | 215 | @article{Hoch2022, 216 | year = {2022}, 217 | author = {Hoch, Tobias and Schulz, Daniel and Eling, Nils and Gómez, Julia Martínez and Levesque, Mitchell P. and Bodenmiller, Bernd}, 218 | title = {Multiplexed imaging mass cytometry of the chemokine milieus in melanoma characterizes features of the response to immunotherapy}, 219 | journal = {Science Immunology}, 220 | volume = {7}, 221 | number = {70}, 222 | pages = {eabk1692}, 223 | year = {2022}, 224 | } 225 | 226 | @article{Weber2016, 227 | year = {2016}, 228 | volume = {89A}, 229 | pages = {1084--1096}, 230 | author = {Weber, Lukas M. and Robinson, Mark D.}, 231 | title = {Comparison of Clustering Methods for High-Dimensional Single-Cell Flow and Mass Cytometry Data}, 232 | journal = {Cytometry Part A} 233 | } 234 | 235 | @article{Levine2015, 236 | year = {2015}, 237 | volume = {162}, 238 | pages = {184--197}, 239 | author = {Levine, Jacob H. and Simonds, Erin F. and Bendall, Sean C. and Davis, Kara L. and Amir, El-ad D. and Tadmor, Michelle D. and Litvin, Oren and Fienberg, Harris G. and Jager, Astraea and Zunder, Eli R. and Finck, Rachel and Gedman, Amanda L. and Radtke, Ina and 240 | Downing, James R. and Pe’er, Dana and Nolan, Garry P.}, 241 | title = {Data-Driven Phenotypic Dissection of AML Reveals Progenitor-like Cells that Correlate with Prognosis}, 242 | journal = {Cell} 243 | } 244 | 245 | @article{Jackson2020, 246 | year = {2020}, 247 | volume = {578}, 248 | pages = {615--620}, 249 | author = {Jackson, Hartland W. and Fischer, Jana R. and Zanotelli, Vito R. T. and Ali, H. Raza and Mechera, Robert and Soysal, Savas D. and Moch, Holger and Muenst, Simone and Varga, Zsuzsanna and Weber, Walter P. and Bodenmiller, Bernd}, 250 | title = {The single-cell pathology landscape of breast cancer}, 251 | journal = {Nature} 252 | } 253 | 254 | @article{Tietscher2022, 255 | year = {2022}, 256 | author = {Tietscher, Sandra and Wagner, Johanna and Anzeneder, Tobias and 257 | Langwieder, Claus and Rees, Martin and Sobottka, Bettina and de Souza, Natalie 258 | and Bodenmiller, Bernd}, 259 | title = {A comprehensive single-cell map of T cell exhaustion-associated immune environments in human breast cancer}, 260 | journal = {Research Square} 261 | } 262 | 263 | @article{Yu2022, 264 | doi = {10.1186/s13059-022-02622-0}, 265 | url = {https://doi.org/10.1186/s13059-022-02622-0}, 266 | year = {2022}, 267 | volume = {23}, 268 | number = {1}, 269 | author = {Yu, Lijia and Cao, Yue and Yang, Jean Y. H. and Yang, Pengyi}, 270 | title = {Benchmarking clustering algorithms on estimating the number of cell types from single-cell {RNA}-sequencing data}, 271 | journal = {Genome Biology} 272 | } 273 | 274 | @article{Eling2020, 275 | year = {2020}, 276 | volume = {36}, 277 | pages = {5706–-5708}, 278 | number = {24}, 279 | author = {Eling, Nils and Damond, Nicolas and Hoch, Tobias and Bodenmiller, Bernd}, 280 | title = {cytomapper: an R/Bioconductor package for visualization of highly multiplexed imaging data}, 281 | journal = {Bioinformatics} 282 | } 283 | 284 | @article{Schurch2020, 285 | year = {2020}, 286 | volume = {182}, 287 | pages = {1341--1359}, 288 | author = {Schürch, Christian M and Bhate, Salil S and Barlow, Graham L and Phillips, Darci J and Noti, Luca and Zlobec, Inti and Chu, Pauline and Black, Sarah and Demeter, Janos and Mcilwain, David R and Kinoshita, Shigemi and Samusik, Nikolay and Goltsev, Yury and Nolan, Garry P}, 289 | title = {Coordinated Cellular Neighborhoods Orchestrate Antitumoral Immunity at the Colorectal Cancer Invasive Front}, 290 | journal = {Cell} 291 | } 292 | 293 | @article{Patrick2023, 294 | year = {2023}, 295 | author = {Ellis Patrick and Nicolas P. Canete and Sourish S. Iyengar and Andrew N. Harman and Greg T. Sutherland and Pengyi Yang}, 296 | title = {Spatial analysis for highly multiplexed imaging data to identify tissue microenvironments}, 297 | journal = {Cytometry Part A} 298 | } 299 | 300 | @article{Bhate2022, 301 | author = {Salil S. Bhate and Graham L. Barlow and Christian M. Schürch and Garry P. Nolan}, 302 | journal = {Cell Systems}, 303 | number = {2}, 304 | pages = {109-130}, 305 | title = {Tissue schematics map the specialization of immune tissue motifs and their appropriation by tumors}, 306 | volume = {13}, 307 | year = {2022} 308 | } 309 | 310 | @article{Gu2016, 311 | author = {Zuguang Gu and Roland Eils and Matthias Schlesner}, 312 | journal = {Bioinformatics}, 313 | pages = {2847–2849}, 314 | title = {Complex heatmaps reveal patterns and correlations in multidimensional genomic data}, 315 | volume = {32}, 316 | year = {2016} 317 | } 318 | 319 | @article{Ali2020, 320 | author = {Raza Ali and Hartland W. Jackson and Vito R. T. Zanotelli and Esther Danenberg and Jana R. Fischer and Helen Bardwell and Elena Provenzanoa and CRUK IMAXT Grand Challenge Team and Oscar M. Rueda and Suet-Feung Chin and Samuel Aparicio and Carlos Caldas and Bernd Bodenmiller}, 321 | journal = {Nature Cancer}, 322 | pages = {163-175}, 323 | title = {Imaging mass cytometry and multiplatform genomics define the phenogenomic landscape of breast cancer}, 324 | volume = {1}, 325 | year = {2020} 326 | } 327 | 328 | @article{Jiang2022, 329 | author = {Sizun Jiang and Chi Ngai Chan and Xavier Rovira-Clavé and Han Chen and Yunhao Bai and Bokai Zhu and Erin McCaffrey and Noah F Greenwald and Candace Liu and Graham L Barlow and Jason L Weirather and John Paul Oliveria and Tsuguhisa Nakayama and Ivan T Lee and Matthias S Matter and Anne E Carlisle and Darci Philips and Gustavo Vazquez and Nilanjan Mukherjee and Kathleen Busman-Sahay and Michael Nekorchuk and Margaret Terry and Skyler Younger and Marc Bosse and Janos Demeter and Scott J Rodig and Alexandar Tzankov and Yury Goltsev and David Robert McIlwain and Michael Angelo and Jacob D Estes and Garry P Nolan}, 330 | journal = {Immunity}, 331 | pages = {1118-1134.e8}, 332 | title = {Combined protein and nucleic acid imaging reveals virus-dependent B cell and macrophage immunosuppression of tissue microenvironments}, 333 | volume = {55}, 334 | year = {2022} 335 | } 336 | 337 | @article{Rendeiro2021, 338 | year = {2021}, 339 | volume = {593}, 340 | pages = {564--569}, 341 | author = {Andr{\'{e}} F. Rendeiro and Hiranmayi Ravichandran and Yaron Bram and Vasuretha Chandar and Junbum Kim and Cem Meydan and Jiwoon Park and Jonathan Foox and Tyler Hether and Sarah Warren and Youngmi Kim and Jason Reeves and Steven Salvatore and Christopher E. Mason and Eric C. Swanson and Alain C. Borczuk and Olivier Elemento and Robert E. Schwartz}, 342 | title = {The spatial landscape of lung pathology during {COVID}-19 progression}, 343 | journal = {Nature} 344 | } 345 | 346 | @article{Mitamura2021, 347 | year = {2021}, 348 | volume = {77}, 349 | pages = {595--608}, 350 | author = {Yasutaka Mitamura and Daniel Schulz and Saskia Oro and Nick Li and Isabel Kolm and Claudia Lang and Reihane Ziadlou and Ge Tan and Bernd Bodenmiller and Peter Steiger and Angelo Marzano and Nicolas de Prost and Olivier Caudin and Mitchell Levesque and Corinne Stoffel and Peter Schmid-Grendelmeier and Emanual Maverakis and Cezmi A. Akdis and Marie-Charlotte Br\"{u}ggen}, 351 | title = {Cutaneous and systemic hyperinflammation drives maculopapular drug exanthema in severely ill {COVID}-19 patients}, 352 | journal = {Allergy} 353 | } 354 | 355 | @article{Damond2019, 356 | year = {2019}, 357 | volume = {29}, 358 | pages = {755--768.e5}, 359 | author = {Nicolas Damond and Stefanie Engler and Vito R.T. Zanotelli and Denis Schapiro and Clive H. Wasserfall and Irina Kusmartseva and Harry S. Nick and Fabrizio Thorel and Pedro L. Herrera and Mark A. Atkinson and Bernd Bodenmiller}, 360 | title = {A Map of Human Type 1 Diabetes Progression by Imaging Mass Cytometry}, 361 | journal = {Cell Metabolism} 362 | } 363 | 364 | @article{Ferrian2021, 365 | year = {2021}, 366 | volume = {2}, 367 | pages = {100419}, 368 | author = {Selena Ferrian and Candace C. Liu and Erin F. McCaffrey and Rashmi Kumar and Theodore S. Nowicki and David W. Dawson and Alex Baranski and John A. Glaspy and Antoni Ribas and Sean C. Bendall and Michael Angelo}, 369 | title = {Multiplexed imaging reveals an {IFN}-$\gamma$-driven inflammatory state in nivolumab-associated gastritis}, 370 | journal = {Cell Reports Medicine} 371 | } 372 | 373 | @article{Meyer2024, 374 | title = {cytoviewer: an R/Bioconductor package for interactive visualization and exploration of highly multiplexed imaging data}, 375 | volume = {25}, 376 | number = {1}, 377 | journal = {BMC Bioinformatics}, 378 | author = {Lasse Meyer and Nils Eling and Bernd Bodenmiller}, 379 | year = {2024} 380 | } 381 | 382 | -------------------------------------------------------------------------------- /data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/data/.gitkeep -------------------------------------------------------------------------------- /img/Gating_scheme.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/img/Gating_scheme.pdf -------------------------------------------------------------------------------- /img/Gating_scheme.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/img/Gating_scheme.pptx -------------------------------------------------------------------------------- /img/IMC_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/img/IMC_workflow.png -------------------------------------------------------------------------------- /index.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Analysis workflow for IMC data" 3 | author: "**Authors:** Nils Eling [1](#DQBM),[2](#IMHS),[*](#email), Vito Zanotelli [1](#DQBM),[2](#IMHS), Michelle Daniel [1](#DQBM),[2](#IMHS), Daniel Schulz [1](#DQBM),[2](#IMHS), Jonas Windhager [1](#DQBM),[2](#IMHS), Lasse Meyer [1](#DQBM),[2](#IMHS)" 4 | date: "**Compiled:** `r Sys.Date()`" 5 | site: bookdown::bookdown_site 6 | github-repo: "BodenmillerGroup/IMCDataAnalysis" 7 | documentclass: book 8 | bibliography: [book.bib, packages.bib] 9 | biblio-style: apalike 10 | link-citations: yes 11 | description: "This bookdown project highlights possible down-stream analyses performed on imaging mass cytometry data." 12 | --- 13 | 14 | # IMC Data Analysis Workflow {#preamble} 15 | 16 | This workflow highlights the use of common R/Bioconductor packages 17 | to analyze single-cell data obtained from segmented multi-channel images. We will not perform multi-channel image processing and segmentation in R 18 | but rather link to available approaches in Section \@ref(processing). While we 19 | use imaging mass cytometry (IMC) data as an example, the concepts presented here can be applied to images 20 | obtained by other highly-multiplexed imaging technologies (e.g. CODEX, MIBI, 21 | mIF, etc.). 22 | 23 | We will give an introduction to IMC in Section \@ref(intro) and highlight 24 | strategies to extract single-cell data from multi-channel images in Section 25 | \@ref(processing). 26 | 27 | Reproducible code written in R is available from Section \@ref(prerequisites) 28 | onwards and the workflow can be largely divided into the following parts: 29 | 30 | 1. Preprocessing (reading in the data, spillover correction) 31 | 2. Image- and cell-level quality control, low-dimensional visualization 32 | 3. Sample/batch effect correction 33 | 4. Cell phenotyping via clustering or classification 34 | 5. Single-cell and image visualization 35 | 6. Spatial analyses 36 | 37 | ## Disclaimer 38 | 39 | Multi-channel image and spatial, single-cell analysis is complex and we 40 | highlight an example workflow here. However, this workflow is not complete and 41 | does not cover all possible aspects of exploratory data analysis. Instead, we 42 | demonstrate this workflow as a solid basis that supports other aspects of data 43 | analysis. It offers interoperability with other packages for single-cell and 44 | spatial analysis and the user will need to become familiar with the general 45 | framework to efficiently analyse data obtained from multiplexed imaging 46 | technologies. 47 | 48 | ## Update freeze 49 | 50 | This workflow has been actively developed until December 2023. At that time 51 | we used the most recent (`v.0.16.0`) version of `steinbock` to process the 52 | example data. If you are having issues when using newer versions of `steinbock` 53 | please open an issue [here](https://github.com/BodenmillerGroup/IMCDataAnalysis/issues). 54 | 55 | ## Feedback and contributing 56 | 57 | We provide the workflow as an open-source resource. It does not mean that 58 | this workflow is tested on all possible datasets or biological questions and 59 | there exist multiple ways of analysing data. It is therefore recommended to 60 | check the results and question their biological interpretation. 61 | 62 | If you notice an issue or missing information, please report an issue 63 | [here](https://github.com/BodenmillerGroup/IMCDataAnalysis/issues). We also 64 | welcome contributions in form of pull requests or feature requests in form of 65 | issues. Have a look at the source code at: 66 | 67 | [https://github.com/BodenmillerGroup/IMCDataAnalysis](https://github.com/BodenmillerGroup/IMCDataAnalysis) 68 | 69 | ## Maintainer 70 | 71 | [Daniel Schulz](https://github.com/SchulzDan) 72 | 73 | ## Contributors 74 | 75 | [Nils Eling](https://github.com/nilseling) 76 | [Vito Zanotelli](https://github.com/votti) 77 | [Daniel Schulz](https://github.com/SchulzDan) 78 | [Jonas Windhager](https://github.com/jwindhager) 79 | [Michelle Daniel](https://github.com/michdaniel) 80 | [Lasse Meyer](https://github.com/lassedochreden) 81 | 82 | ## Citation 83 | 84 | The workflow has been published in 85 | [https://www.nature.com/articles/s41596-023-00881-0](https://www.nature.com/articles/s41596-023-00881-0) 86 | which you can cite as follows: 87 | 88 | ``` 89 | Windhager, J., Zanotelli, V.R.T., Schulz, D. et al. An end-to-end workflow for multiplexed image processing and analysis. 90 | Nat Protoc (2023). 91 | ``` 92 | 93 | ## Changelog 94 | 95 | ```{r echo=FALSE} 96 | htmltools::includeMarkdown('CHANGELOG.md') 97 | ``` 98 | 99 | --- 100 | 101 | * nils.eling@uzh.ch 102 | 1: Department for Quantitative Biomedicine, University of Zurich 103 | 2: Institute for Molecular Health Sciences, ETH Zurich 104 | -------------------------------------------------------------------------------- /packages.bib: -------------------------------------------------------------------------------- 1 | @Manual{R-base, 2 | title = {R: A Language and Environment for Statistical Computing}, 3 | author = {{R Core Team}}, 4 | organization = {R Foundation for Statistical Computing}, 5 | address = {Vienna, Austria}, 6 | year = {2021}, 7 | url = {https://www.R-project.org/}, 8 | } 9 | 10 | @Manual{R-bookdown, 11 | title = {bookdown: Authoring Books and Technical Documents with R Markdown}, 12 | author = {Yihui Xie}, 13 | year = {2021}, 14 | note = {R package version 0.22}, 15 | url = {https://CRAN.R-project.org/package=bookdown}, 16 | } 17 | 18 | @Manual{R-knitr, 19 | title = {knitr: A General-Purpose Package for Dynamic Report Generation in R}, 20 | author = {Yihui Xie}, 21 | year = {2021}, 22 | note = {R package version 1.33}, 23 | url = {https://yihui.org/knitr/}, 24 | } 25 | 26 | @Manual{R-rmarkdown, 27 | title = {rmarkdown: Dynamic Documents for R}, 28 | author = {JJ Allaire and Yihui Xie and Jonathan McPherson and Javier Luraschi and Kevin Ushey and Aron Atkins and Hadley Wickham and Joe Cheng and Winston Chang and Richard Iannone}, 29 | year = {2021}, 30 | note = {R package version 2.7}, 31 | url = {https://CRAN.R-project.org/package=rmarkdown}, 32 | } 33 | 34 | @Book{bookdown2016, 35 | title = {bookdown: Authoring Books and Technical Documents with {R} Markdown}, 36 | author = {Yihui Xie}, 37 | publisher = {Chapman and Hall/CRC}, 38 | address = {Boca Raton, Florida}, 39 | year = {2016}, 40 | note = {ISBN 978-1138700109}, 41 | url = {https://bookdown.org/yihui/bookdown}, 42 | } 43 | 44 | @Book{knitr2015, 45 | title = {Dynamic Documents with {R} and knitr}, 46 | author = {Yihui Xie}, 47 | publisher = {Chapman and Hall/CRC}, 48 | address = {Boca Raton, Florida}, 49 | year = {2015}, 50 | edition = {2nd}, 51 | note = {ISBN 978-1498716963}, 52 | url = {https://yihui.org/knitr/}, 53 | } 54 | 55 | @InCollection{knitr2014, 56 | booktitle = {Implementing Reproducible Computational Research}, 57 | editor = {Victoria Stodden and Friedrich Leisch and Roger D. Peng}, 58 | title = {knitr: A Comprehensive Tool for Reproducible Research in {R}}, 59 | author = {Yihui Xie}, 60 | publisher = {Chapman and Hall/CRC}, 61 | year = {2014}, 62 | note = {ISBN 978-1466561595}, 63 | url = {http://www.crcpress.com/product/isbn/9781466561595}, 64 | } 65 | 66 | @Book{rmarkdown2018, 67 | title = {R Markdown: The Definitive Guide}, 68 | author = {Yihui Xie and J.J. Allaire and Garrett Grolemund}, 69 | publisher = {Chapman and Hall/CRC}, 70 | address = {Boca Raton, Florida}, 71 | year = {2018}, 72 | note = {ISBN 9781138359338}, 73 | url = {https://bookdown.org/yihui/rmarkdown}, 74 | } 75 | 76 | @Book{rmarkdown2020, 77 | title = {R Markdown Cookbook}, 78 | author = {Yihui Xie and Christophe Dervieux and Emily Riederer}, 79 | publisher = {Chapman and Hall/CRC}, 80 | address = {Boca Raton, Florida}, 81 | year = {2020}, 82 | note = {ISBN 9780367563837}, 83 | url = {https://bookdown.org/yihui/rmarkdown-cookbook}, 84 | } 85 | 86 | -------------------------------------------------------------------------------- /preamble.tex: -------------------------------------------------------------------------------- 1 | \usepackage{booktabs} 2 | -------------------------------------------------------------------------------- /publication/.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | .Rproj.user 3 | /data/* 4 | /outputs/* 5 | -------------------------------------------------------------------------------- /publication/README.md: -------------------------------------------------------------------------------- 1 | ## An end-to-end workflow for multiplexed image processing and analysis 2 | 3 | This folder of the repository contains the code to reproduce the analysis presented in the following paper: 4 | 5 | ``` 6 | Windhager, J., Zanotelli, V.R.T., Schulz, D. et al. An end-to-end workflow for multiplexed image processing and analysis. 7 | Nat Protoc (2023). 8 | ``` 9 | 10 | It is accessible at [https://www.nature.com/articles/s41596-023-00881-0](https://www.nature.com/articles/s41596-023-00881-0) 11 | 12 | ### System requirements 13 | 14 | To run the workflow, a computer with a recent version of a Windows, Mac, or Linux operating system (OS) is required. 15 | With increasing dataset size, more memory is required and we recommend at least 8 GB RAM to analyse the provided dataset. 16 | Alternatively, a high performance computer (e.g. cluster) can be used, provided Docker can be installed (see below). 17 | For this manuscript, the workflow was run on MacOS Big Sur (11.7.4), 2.7 GHz Quad-Core Intel Core i7, 16 GB 2133 MHz LPDDR3. 18 | 19 | ### Reproducing the analysis 20 | 21 | **1. Obtain the code** 22 | 23 | To access the code you can clone the repository via 24 | 25 | ``` 26 | git clone https://github.com/BodenmillerGroup/IMCDataAnalysis.git 27 | ``` 28 | 29 | or you can click the `Code` > `Download ZIP` button. 30 | 31 | Navigate to the `IMCDataAnalysis/publication/` folder and open the 32 | `publication.Rproj` file in RStudio. 33 | 34 | **2. Obtain the example data** 35 | 36 | To obtain the example data, open the [protocol.Rmd](protocol.Rmd) file in RStudio 37 | and execute the first code chunk under `Example data`. 38 | 39 | **3. Perform image processing** 40 | 41 | Image processing is performed outside of R/RStudio. To process the example 42 | data stored in `publication/data/steinbock`, open a terminal and execute the 43 | following commands: 44 | 45 | ``` 46 | # setup steinbock alias 47 | alias steinbock="docker run -v /path/to/data/steinbock:/data -u $(id -u):$(id -g) ghcr.io/bodenmillergroup/steinbock:0.16.0" 48 | 49 | # image pre-processing 50 | steinbock preprocess imc images --hpf 50 51 | 52 | # image segmentation 53 | steinbock segment deepcell --minmax 54 | 55 | # intensity measurement 56 | steinbock measure intensities 57 | 58 | # regionprops measurement 59 | steinbock measure regionprops 60 | 61 | # spatial cell graph construction 62 | steinbock measure neighbors --type expansion --dmax 4 63 | ``` 64 | 65 | In the command above the `/path/to/data/steinbock` needs to be adapted and 66 | replaced by the anticipated working directory. 67 | 68 | To obtain more detailed installation instructions, please refer to the 69 | [steinbock documentation](https://bodenmillergroup.github.io/steinbock/latest/install-docker/). 70 | 71 | A shell script for automatic execution of the commands can be seen at [steinbock.sh](steinbock.sh). 72 | 73 | **4. Option A: Install R packages** 74 | 75 | The workflow highlights the use of a number of R packages. 76 | All packages can be installed as follows: 77 | 78 | ``` 79 | if (!requireNamespace("BiocManager", quietly = TRUE)) 80 | install.packages("BiocManager") 81 | 82 | BiocManager::install(c("pheatmap", "viridis", 83 | "tiff", "distill", "ggrepel", "patchwork", 84 | "mclust", "RColorBrewer", "uwot", "Rtsne", "caret", 85 | "randomForest", "ggridges", "gridGraphics", "scales", 86 | "CATALYST", "scuttle", "scater", "dittoSeq", 87 | "tidyverse", "batchelor", "bluster","scran", "cytomapper", 88 | "imcRtools")) 89 | ``` 90 | 91 | To install the required software around 1-2 hours need to be taken into account. 92 | When the workflow was written, we used R v4.3.0 installed and Bioconductor 93 | release version 3.17. 94 | 95 | Please see [protocol.md](protocol.md#session-information) for the exact versions of the software 96 | packages. 97 | 98 | **4. Option B: Obtain docker container** 99 | 100 | We provide a Docker container that can be used to exactly reproduce the 101 | analysis performed in the protocol. To obtain the Docker container execute the 102 | following call in the terminal: 103 | 104 | ``` 105 | docker pull ghcr.io/bodenmillergroup/imcdataanalysis:2023-05-01 106 | ``` 107 | 108 | After obtaining the Docker container, start it by calling: 109 | 110 | ``` 111 | docker run -v /path/to/IMCDataAnalysis/publication:/home/rstudio/publication \ 112 | -e PASSWORD=bioc -p 8787:8787 \ 113 | ghcr.io/bodenmillergroup/imcdataanalysis:2023-05-01 114 | ``` 115 | 116 | Please make sure to adapt the `/path/to/IMCDataAnalysis/publication` to the correct working directory. 117 | An RStudio server session can be accessed via a browser at `localhost:8787` using `Username: rstudio` and` Password: bioc`. 118 | 119 | **5. Execute the workflow** 120 | 121 | Open RStudio, navigate to `publication` and open the `publication.Rproj` file. 122 | The `protocol.Rmd` file contains all relevant code to reproduce the analysis. 123 | When `knitting` the `protocol.Rmd` you will be asked to update the `markdown` 124 | package. Accept the pop up and the workflow will run. 125 | 126 | Applying the workflow to the provided dataset takes roughly 30 minutes and 127 | provides the raw data files, data generated by the `steinbock` toolkit 128 | and a `SpatialExperiment` object storing all analysis results. 129 | 130 | ### Software used for the entire protocol 131 | 132 | * **napari & napari-imc (IMC-specific):** The multi-dimensional image viewer napari (https://napari.org) together with the napari-imc plugin for loading imaging mass cytometry files (https://github.com/BodenmillerGroup/napari-imc) were used to visualize and inspect raw multiplexed imaging data. Python 3.9.12 (https://www.python.org), napari 0.4.16, and napari-imc 0.6.5 were installed into a fresh conda (https://conda.io) environment; see below for installation instructions. 133 | * **steinbock Docker container:** The multi-channel image processing toolkit steinbock (https://bodenmillergroup.github.io/steinbock) was used to pre-process multiplexed imaging data, perform image segmentation, and extract single-cell data. The steinbock Docker container v0.16.0 was pulled from the GitHub container registry using Docker Desktop 4.9.0 for Mac; see below for installation instructions. 134 | * **Ilastik/CellProfiler-based segmentation pipeline:** Multiplexed image processing using random forest-based pixel classification and watershed-based cell segmentation was performed using the Ilastik/CellProfiler-based segmentation pipeline v3.6 (https://bodenmillergroup.github.io/ImcSegmentationPipeline/); see below for installation instructions. 135 | 136 | In addition, in order to use the pipeline, the following software need to be installed: 137 | * **Ilastik:** The Ilastik software is used for pixel-classification prior to cell segmentation and can be installed from https://www.ilastik.org/download.html. The version used for this workflow is v1.4.0. 138 | * **CellProfiler:** The CellProfiler software is used to segment individual cells. The tool can be installed from https://cellprofiler.org/previous-releases on Windows (64-bit) and MacOS (10.14+). The version used in this workflow is v4.2.1. 139 | * **R setup:** Downstream analysis after image processing is conducted using the statistical programming language R, which can be installed from https://cran.r-project.org/ following the OS-specific instructions. The version used in this workflow is v4.3.0. 140 | * The RStudio software offers an easy-to-use GUI for data analysis in R. It can be installed from https://www.rstudio.com/products/rstudio/download/. 141 | 142 | ### Installation instructions 143 | 144 | * **napari & napari-imc:** Install the conda package manager according to the instructions at https://docs.conda.io/projects/conda/en/latest/user-guide/install/ 145 | Create a new conda environment with Python 3.9: 146 | ``` 147 | conda create -n napari-imc -y python=3.9 148 | ``` 149 | Activate the conda environment and install napari & napari-imc: 150 | ``` 151 | conda activate napari-imc 152 | pip install “napari[all]==0.4.16” napari-imc==0.6.5 153 | ``` 154 | * **steinbock:** Instructions to install the dockerized steinbock toolkit can be found at https://bodenmillergroup.github.io/steinbock/v0.16.0/install-docker/. In particular, to run the steinbock container, Docker needs to be installed first (see online instructions). For this manuscript, we run steinbock using the following alias: 155 | ``` 156 | alias steinbock="docker run -v /path/to/data/steinbock:/data -u $(id -u):$(id -g) ghcr.io/bodenmillergroup/steinbock:0.16.0" 157 | ``` 158 | CRITICAL: In the command above the `/path/to/data/steinbock` needs to be adapted and replaced by the anticipated working directory. 159 | 160 | * **Ilastik/CellProfiler-based segmentation pipeline:** the pre-processing steps of the pipeline are performed in Python using a custom script. To setup the pre-processing script, the following steps need to be performed: 161 | 162 | Install conda from https://docs.conda.io/projects/conda/en/latest/user-guide/install/ 163 | 164 | Clone the repository 165 | ``` 166 | git clone --recursive https://github.com/BodenmillerGroup/ImcSegmentationPipeline.git 167 | ``` 168 | Setup the imcsegpipe conda environment: 169 | ``` 170 | cd ImcSegmentationPipeline 171 | conda env create -f environment.yml 172 | ``` 173 | 174 | Configure CellProfiler to use the required plugins by opening the CellProfiler GUI, selecting Preferences and setting the CellProfiler plugins directory to `path/to/ImcSegmentationPipeline/resources/ImcPluginsCP/plugins` and restart CellProfiler. 175 | -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/batch-correction-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/batch-correction-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/cell-density-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/cell-density-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/cell-size-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/cell-size-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/celltype-cluster-UMAP-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/celltype-cluster-UMAP-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/celltype-cluster-heatmap-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/celltype-cluster-heatmap-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/celltype-cluster-heatmap-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/celltype-cluster-heatmap-2.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/cellular-neighbourhood-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/cellular-neighbourhood-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/cluster-sweep-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/cluster-sweep-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/compCytof-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/compCytof-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/compImage-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/compImage-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/compImage-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/compImage-2.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/compImage-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/compImage-3.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/compImage-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/compImage-4.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/marker-distributions-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/marker-distributions-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/plotSpotHeatmap-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/plotSpotHeatmap-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/segmentation-quality-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/segmentation-quality-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/spatial-community-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/spatial-community-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/spatial-context-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/spatial-context-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/umap-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/umap-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/unnamed-chunk-23-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/unnamed-chunk-23-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/unnamed-chunk-48-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/unnamed-chunk-48-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/unnamed-chunk-63-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/unnamed-chunk-63-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/unnamed-chunk-67-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/unnamed-chunk-67-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/unnamed-chunk-71-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/unnamed-chunk-71-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_github/unnamed-chunk-75-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/unnamed-chunk-75-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/batch-correction-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/batch-correction-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/cell-density-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/cell-density-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/cell-size-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/cell-size-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/celltype-cluster-UMAP-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/celltype-cluster-UMAP-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/celltype-cluster-heatmap-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/celltype-cluster-heatmap-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/celltype-cluster-heatmap-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/celltype-cluster-heatmap-2.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/cellular-neighbourhood-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/cellular-neighbourhood-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/cluster-sweep-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/cluster-sweep-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/compCytof-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/compCytof-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/compImage-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/compImage-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/compImage-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/compImage-2.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/compImage-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/compImage-3.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/compImage-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/compImage-4.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/marker-distributions-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/marker-distributions-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/plotSpotHeatmap-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/plotSpotHeatmap-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/segmentation-quality-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/segmentation-quality-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/spatial-community-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/spatial-community-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/spatial-context-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/spatial-context-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/umap-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/umap-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/unnamed-chunk-22-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/unnamed-chunk-22-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/unnamed-chunk-47-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/unnamed-chunk-47-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/unnamed-chunk-62-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/unnamed-chunk-62-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/unnamed-chunk-66-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/unnamed-chunk-66-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/unnamed-chunk-70-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/unnamed-chunk-70-1.png -------------------------------------------------------------------------------- /publication/protocol_files/figure-markdown_strict/unnamed-chunk-74-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/unnamed-chunk-74-1.png -------------------------------------------------------------------------------- /publication/publication.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 4 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /publication/steinbock.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # change directory 4 | BASEDIR=$(cd -- "$(dirname "${BASH_SOURCE[0]}")" && pwd -P) 5 | cd ${BASEDIR} 6 | 7 | # setup steinbock alias 8 | shopt -s expand_aliases 9 | alias steinbock="docker run -v ${BASEDIR}/data/steinbock:/data -u $(id -u):$(id -g) ghcr.io/bodenmillergroup/steinbock:0.16.0" 10 | 11 | # image pre-processing 12 | { time steinbock preprocess imc images --hpf 50; } 2> steinbock_timing.txt 13 | 14 | # image segmentation 15 | { time steinbock segment deepcell --minmax; } 2>> steinbock_timing.txt 16 | 17 | # intensity measurement 18 | { time steinbock measure intensities; } 2>> steinbock_timing.txt 19 | 20 | # regionprops measurement 21 | { time steinbock measure regionprops; } 2>> steinbock_timing.txt 22 | 23 | # spatial cell graph construction 24 | { time steinbock measure neighbors --type expansion --dmax 4; } 2>> steinbock_timing.txt -------------------------------------------------------------------------------- /publication/steinbock_timing.txt: -------------------------------------------------------------------------------- 1 | 2023-06-01 14:27:02,316 INFO steinbock - img/Patient4_005.tiff 2 | 2023-06-01 14:27:03,648 INFO steinbock - img/Patient4_006.tiff 3 | 2023-06-01 14:27:04,474 INFO steinbock - img/Patient4_007.tiff 4 | 2023-06-01 14:27:05,365 INFO steinbock - img/Patient4_008.tiff 5 | 2023-06-01 14:27:11,692 INFO steinbock - img/Patient3_001.tiff 6 | 2023-06-01 14:27:12,685 INFO steinbock - img/Patient3_002.tiff 7 | 2023-06-01 14:27:13,543 INFO steinbock - img/Patient3_003.tiff 8 | 2023-06-01 14:27:18,641 INFO steinbock - img/Patient2_001.tiff 9 | 2023-06-01 14:27:19,476 INFO steinbock - img/Patient2_002.tiff 10 | 2023-06-01 14:27:20,375 INFO steinbock - img/Patient2_003.tiff 11 | 2023-06-01 14:27:21,239 INFO steinbock - img/Patient2_004.tiff 12 | 2023-06-01 14:27:25,398 INFO steinbock - img/Patient1_001.tiff 13 | 2023-06-01 14:27:26,374 INFO steinbock - img/Patient1_002.tiff 14 | 2023-06-01 14:27:27,317 INFO steinbock - img/Patient1_003.tiff 15 | 2023-06-01 14:27:27,395 INFO steinbock - images.csv 16 | 17 | real 0m38.336s 18 | user 0m0.067s 19 | sys 0m0.085s 20 | 2023-06-01 14:27:55,957 INFO steinbock - masks/Patient1_001.tiff 21 | 2023-06-01 14:28:08,949 INFO steinbock - masks/Patient1_002.tiff 22 | 2023-06-01 14:28:22,273 INFO steinbock - masks/Patient1_003.tiff 23 | 2023-06-01 14:28:34,584 INFO steinbock - masks/Patient2_001.tiff 24 | 2023-06-01 14:28:47,618 INFO steinbock - masks/Patient2_002.tiff 25 | 2023-06-01 14:29:01,514 INFO steinbock - masks/Patient2_003.tiff 26 | 2023-06-01 14:29:14,739 INFO steinbock - masks/Patient2_004.tiff 27 | 2023-06-01 14:29:31,109 INFO steinbock - masks/Patient3_001.tiff 28 | 2023-06-01 14:29:44,417 INFO steinbock - masks/Patient3_002.tiff 29 | 2023-06-01 14:29:57,562 INFO steinbock - masks/Patient3_003.tiff 30 | 2023-06-01 14:30:09,662 INFO steinbock - masks/Patient4_005.tiff 31 | 2023-06-01 14:30:22,530 INFO steinbock - masks/Patient4_006.tiff 32 | 2023-06-01 14:30:35,418 INFO steinbock - masks/Patient4_007.tiff 33 | 2023-06-01 14:30:47,967 INFO steinbock - masks/Patient4_008.tiff 34 | 35 | real 3m22.248s 36 | user 0m0.074s 37 | sys 0m0.088s 38 | 2023-06-01 14:30:54,062 INFO steinbock - intensities/Patient1_001.csv 39 | 2023-06-01 14:30:54,982 INFO steinbock - intensities/Patient1_002.csv 40 | 2023-06-01 14:30:55,974 INFO steinbock - intensities/Patient1_003.csv 41 | 2023-06-01 14:30:56,723 INFO steinbock - intensities/Patient2_001.csv 42 | 2023-06-01 14:30:57,388 INFO steinbock - intensities/Patient2_002.csv 43 | 2023-06-01 14:30:58,087 INFO steinbock - intensities/Patient2_003.csv 44 | 2023-06-01 14:30:59,249 INFO steinbock - intensities/Patient2_004.csv 45 | 2023-06-01 14:31:00,113 INFO steinbock - intensities/Patient3_001.csv 46 | 2023-06-01 14:31:00,911 INFO steinbock - intensities/Patient3_002.csv 47 | 2023-06-01 14:31:01,865 INFO steinbock - intensities/Patient3_003.csv 48 | 2023-06-01 14:31:02,672 INFO steinbock - intensities/Patient4_005.csv 49 | 2023-06-01 14:31:03,779 INFO steinbock - intensities/Patient4_006.csv 50 | 2023-06-01 14:31:04,790 INFO steinbock - intensities/Patient4_007.csv 51 | 2023-06-01 14:31:05,567 INFO steinbock - intensities/Patient4_008.csv 52 | 53 | real 0m15.858s 54 | user 0m0.060s 55 | sys 0m0.068s 56 | 2023-06-01 14:31:10,023 INFO steinbock - regionprops/Patient1_001.csv 57 | 2023-06-01 14:31:10,974 INFO steinbock - regionprops/Patient1_002.csv 58 | 2023-06-01 14:31:12,017 INFO steinbock - regionprops/Patient1_003.csv 59 | 2023-06-01 14:31:12,855 INFO steinbock - regionprops/Patient2_001.csv 60 | 2023-06-01 14:31:13,665 INFO steinbock - regionprops/Patient2_002.csv 61 | 2023-06-01 14:31:14,463 INFO steinbock - regionprops/Patient2_003.csv 62 | 2023-06-01 14:31:15,587 INFO steinbock - regionprops/Patient2_004.csv 63 | 2023-06-01 14:31:16,657 INFO steinbock - regionprops/Patient3_001.csv 64 | 2023-06-01 14:31:17,741 INFO steinbock - regionprops/Patient3_002.csv 65 | 2023-06-01 14:31:18,780 INFO steinbock - regionprops/Patient3_003.csv 66 | 2023-06-01 14:31:19,516 INFO steinbock - regionprops/Patient4_005.csv 67 | 2023-06-01 14:31:20,761 INFO steinbock - regionprops/Patient4_006.csv 68 | 2023-06-01 14:31:21,589 INFO steinbock - regionprops/Patient4_007.csv 69 | 2023-06-01 14:31:22,375 INFO steinbock - regionprops/Patient4_008.csv 70 | 71 | real 0m16.791s 72 | user 0m0.067s 73 | sys 0m0.074s 74 | 2023-06-01 14:31:28,992 INFO steinbock - neighbors/Patient1_001.csv 75 | 2023-06-01 14:31:32,190 INFO steinbock - neighbors/Patient1_002.csv 76 | 2023-06-01 14:31:35,893 INFO steinbock - neighbors/Patient1_003.csv 77 | 2023-06-01 14:31:38,832 INFO steinbock - neighbors/Patient2_001.csv 78 | 2023-06-01 14:31:41,589 INFO steinbock - neighbors/Patient2_002.csv 79 | 2023-06-01 14:31:44,051 INFO steinbock - neighbors/Patient2_003.csv 80 | 2023-06-01 14:31:47,806 INFO steinbock - neighbors/Patient2_004.csv 81 | 2023-06-01 14:31:51,564 INFO steinbock - neighbors/Patient3_001.csv 82 | 2023-06-01 14:31:54,950 INFO steinbock - neighbors/Patient3_002.csv 83 | 2023-06-01 14:31:58,524 INFO steinbock - neighbors/Patient3_003.csv 84 | 2023-06-01 14:32:01,106 INFO steinbock - neighbors/Patient4_005.csv 85 | 2023-06-01 14:32:05,149 INFO steinbock - neighbors/Patient4_006.csv 86 | 2023-06-01 14:32:08,069 INFO steinbock - neighbors/Patient4_007.csv 87 | 2023-06-01 14:32:10,696 INFO steinbock - neighbors/Patient4_008.csv 88 | 89 | real 0m48.331s 90 | user 0m0.060s 91 | sys 0m0.032s 92 | -------------------------------------------------------------------------------- /scripts/transfer_labels.R: -------------------------------------------------------------------------------- 1 | library(SingleCellExperiment) 2 | library(stringr) 3 | label_files <- list.files("data/gated_cells", 4 | full.names = TRUE, pattern = ".rds$") 5 | 6 | # Read in SPE objects 7 | spes <- lapply(label_files, readRDS) 8 | names(spes) <- list.files("data/gated_cells", pattern = ".rds$") 9 | 10 | # Read in current SPE object 11 | spe <- readRDS("data/spe.rds") 12 | 13 | new_spes <- lapply(spes, function(x){ 14 | cur_spe <- spe[,spe$sample_id == unique(x$sample_id)] 15 | cur_gates <- metadata(x)[grepl("cytomapper_gate", names(metadata(x)))] 16 | cur_gates <- cur_gates[order(as.numeric(str_split(names(cur_gates), "_", simplify = TRUE)[,3]), decreasing = FALSE)] 17 | 18 | cur_meta <- metadata(cur_spe) 19 | metadata(cur_spe) <- list() 20 | metadata(cur_spe)$metadata <- cur_meta 21 | 22 | for (i in 1:length(cur_gates)) { 23 | gate <- cur_gates[[i]] 24 | for (j in 1:nrow(gate$gate)){ 25 | cur_val <- assay(cur_spe, gate$exprs_values)[rownames(gate$gate)[j],] 26 | cur_spe <- cur_spe[,cur_val > gate$gate[j,1] & cur_val < gate$gate[j,2]] 27 | } 28 | metadata(cur_spe)[[names(cur_gates)[i]]] <- gate 29 | } 30 | 31 | cur_spe$cytomapper_CellLabel <- unique(x$cytomapper_CellLabel) 32 | 33 | metadata(cur_spe)$cytomapper_SessionInfo <- metadata(x)$cytomapper_SessionInfo 34 | metadata(cur_spe)$cytomapper_GatingDate <- metadata(x)$cytomapper_GatingDate 35 | 36 | return(cur_spe) 37 | }) 38 | 39 | lapply(1:length(new_spes), function(x){ 40 | saveRDS(new_spes[[x]], file = paste0("data/gated_cells/", names(spes)[x])) 41 | }) 42 | -------------------------------------------------------------------------------- /style.css: -------------------------------------------------------------------------------- 1 | p.caption { 2 | color: #777; 3 | margin-top: 10px; 4 | } 5 | p code { 6 | white-space: inherit; 7 | } 8 | pre { 9 | word-break: normal; 10 | word-wrap: normal; 11 | } 12 | pre code { 13 | white-space: inherit; 14 | } 15 | 16 | pre, code {white-space:pre !important; overflow-x:scroll !important} 17 | --------------------------------------------------------------------------------