├── .Rbuildignore
├── .github
└── workflows
│ └── build.yml
├── .gitignore
├── 01-intro.Rmd
├── 02-processing.Rmd
├── 03-prerequisites.Rmd
├── 04-read_data.Rmd
├── 05-spillover_matrix.Rmd
├── 06-quality_control.Rmd
├── 07-batch_correction.Rmd
├── 08-phenotyping.Rmd
├── 09-singlecell_visualization.Rmd
├── 10-image_visualization.Rmd
├── 11-spatial_analysis.Rmd
├── 12-references.Rmd
├── CHANGELOG.md
├── DEVELOPMENT.md
├── Dockerfile
├── IMCDataAnalysis.Rproj
├── LICENSE
├── README.md
├── _bookdown.yml
├── _output.yml
├── book.bib
├── data
└── .gitkeep
├── img
├── Gating_scheme.pdf
├── Gating_scheme.pptx
└── IMC_workflow.png
├── index.Rmd
├── packages.bib
├── preamble.tex
├── publication
├── .gitignore
├── README.md
├── protocol.Rmd
├── protocol.html
├── protocol.md
├── protocol_files
│ ├── figure-markdown_github
│ │ ├── batch-correction-1.png
│ │ ├── cell-density-1.png
│ │ ├── cell-size-1.png
│ │ ├── celltype-cluster-UMAP-1.png
│ │ ├── celltype-cluster-heatmap-1.png
│ │ ├── celltype-cluster-heatmap-2.png
│ │ ├── cellular-neighbourhood-1.png
│ │ ├── cluster-sweep-1.png
│ │ ├── compCytof-1.png
│ │ ├── compImage-1.png
│ │ ├── compImage-2.png
│ │ ├── compImage-3.png
│ │ ├── compImage-4.png
│ │ ├── marker-distributions-1.png
│ │ ├── plotSpotHeatmap-1.png
│ │ ├── segmentation-quality-1.png
│ │ ├── spatial-community-1.png
│ │ ├── spatial-context-1.png
│ │ ├── umap-1.png
│ │ ├── unnamed-chunk-23-1.png
│ │ ├── unnamed-chunk-48-1.png
│ │ ├── unnamed-chunk-63-1.png
│ │ ├── unnamed-chunk-67-1.png
│ │ ├── unnamed-chunk-71-1.png
│ │ └── unnamed-chunk-75-1.png
│ └── figure-markdown_strict
│ │ ├── batch-correction-1.png
│ │ ├── cell-density-1.png
│ │ ├── cell-size-1.png
│ │ ├── celltype-cluster-UMAP-1.png
│ │ ├── celltype-cluster-heatmap-1.png
│ │ ├── celltype-cluster-heatmap-2.png
│ │ ├── cellular-neighbourhood-1.png
│ │ ├── cluster-sweep-1.png
│ │ ├── compCytof-1.png
│ │ ├── compImage-1.png
│ │ ├── compImage-2.png
│ │ ├── compImage-3.png
│ │ ├── compImage-4.png
│ │ ├── marker-distributions-1.png
│ │ ├── plotSpotHeatmap-1.png
│ │ ├── segmentation-quality-1.png
│ │ ├── spatial-community-1.png
│ │ ├── spatial-context-1.png
│ │ ├── umap-1.png
│ │ ├── unnamed-chunk-22-1.png
│ │ ├── unnamed-chunk-47-1.png
│ │ ├── unnamed-chunk-62-1.png
│ │ ├── unnamed-chunk-66-1.png
│ │ ├── unnamed-chunk-70-1.png
│ │ └── unnamed-chunk-74-1.png
├── publication.Rproj
├── steinbock.sh
└── steinbock_timing.txt
├── scripts
└── transfer_labels.R
└── style.css
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^\.github$
2 |
--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | on:
2 | push:
3 | branches: [ main ]
4 | pull_request:
5 | branches: [ main ]
6 | schedule:
7 | - cron: '0 8 1 * *'
8 |
9 | name: build
10 |
11 | jobs:
12 | docker-build-push:
13 | name: Build and push Docker image to GitHub Container registry
14 | runs-on: ubuntu-latest
15 | permissions:
16 | packages: write
17 | contents: read
18 |
19 | steps:
20 | - name: Checkout the repository
21 | uses: actions/checkout@v3
22 |
23 | - name: set NOW
24 | id: now
25 | run: |
26 | echo "::set-output name=NOW::$(date +'%Y-%m-%d')"
27 |
28 | - name: Login to GitHub Container registry
29 | uses: docker/login-action@v1
30 | env:
31 | GITHUB_USER: ${{ github.actor }}
32 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
33 | with:
34 | registry: ghcr.io
35 | username: $GITHUB_USER
36 | password: ${{ secrets.GITHUB_TOKEN }}
37 |
38 | - name: Check Dockerfile
39 | uses: dorny/paths-filter@v2
40 | id: filter
41 | with:
42 | filters: |
43 | docker:
44 | - 'Dockerfile'
45 |
46 | - name: Build and Push Docker Image
47 | if: steps.filter.outputs.docker == 'true' || github.event_name == 'schedule'
48 | uses: docker/build-push-action@v3
49 | with:
50 | context: .
51 | file: ./Dockerfile
52 | push: true
53 | tags: |
54 | ghcr.io/bodenmillergroup/imcdataanalysis:${{ steps.now.outputs.NOW }}
55 | ghcr.io/bodenmillergroup/imcdataanalysis:latest
56 |
57 | bookdown:
58 | runs-on: ubuntu-latest
59 | if: ${{ always() }}
60 | needs: docker-build-push
61 | container: ghcr.io/bodenmillergroup/imcdataanalysis:latest
62 | env:
63 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
64 |
65 | steps:
66 | - name: Checkout
67 | uses: actions/checkout@v2
68 |
69 | - name: Render Book
70 | run: Rscript -e 'bookdown::render_book("index.Rmd", new_session = TRUE)'
71 |
72 | - name: Upload artifact
73 | uses: actions/upload-artifact@v2
74 | with:
75 | name: docs
76 | path: docs/
77 |
78 | checkout-and-deploy:
79 | runs-on: ubuntu-latest
80 | needs: bookdown
81 | steps:
82 | - name: Checkout
83 | uses: actions/checkout@master
84 | - name: Download artifact
85 | uses: actions/download-artifact@v2
86 | with:
87 | name: docs # optional
88 | path: docs # optional
89 | - name: Deploy to GitHub Pages
90 | uses: peaceiris/actions-gh-pages@v3
91 | with:
92 | github_token: ${{ secrets.GITHUB_TOKEN }}
93 | publish_dir: docs/
94 | force_orphan: true
95 |
96 |
97 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | *.DS_Store
6 | data/*
7 | docs/
8 | IMCDataAnalysis.Rmd
9 | IMCDataAnalysis_files/
10 | IMCDataAnalysis.rds
11 | _bookdown_files/
12 | *~*
13 | *.md
14 | *.rds
15 | outputs/*
16 |
17 | !README.md
18 | !publication/README.md
19 | !publication/protocol.md
20 | !CHANGELOG.md
21 | !DEVELOPMENT.md
22 |
--------------------------------------------------------------------------------
/01-intro.Rmd:
--------------------------------------------------------------------------------
1 | # Introduction {#intro}
2 |
3 | Highly multiplexed imaging (HMI) enables the simultaneous detection of dozens of
4 | biological molecules (e.g., proteins, transcripts; also referred to as
5 | “markers”) in tissues. Recently established multiplexed tissue imaging
6 | technologies rely on cyclic staining with fluorescently-tagged antibodies
7 | [@Lin2018; @Gut2018], or the use of oligonucleotide-tagged [@Goltsev2018;
8 | @Saka2019] or metal-tagged [@Giesen2014; @Angelo2014] antibodies, among others.
9 | The key strength of these technologies is that they allow in-depth analysis of
10 | single cells within their spatial tissue context. As a result, these methods
11 | have enabled analysis of the spatial architecture of the tumor microenvironment
12 | [@Lin2018; @Jackson2020; @Ali2020; @Schurch2020], determination of nucleic acid
13 | and protein abundances for assessment of spatial co-localization of cell types
14 | and chemokines [@Hoch2022] and spatial niches of virus infected cells [@Jiang2022],
15 | and characterization of pathological features during COVID-19 infection
16 | [@Rendeiro2021; @Mitamura2021], Type 1 diabetes progression [@Damond2019] and
17 | autoimmune disease [@Ferrian2021].
18 |
19 | Imaging mass cytometry (IMC) utilizes metal-tagged antibodies to detect over 40
20 | proteins and other metal-tagged molecules in biological samples. IMC can be used
21 | to perform highly multiplexed imaging and is particularly suited to profiling
22 | selected areas of tissues across many samples.
23 |
24 | 
25 | *Overview of imaging mass cytometry data acquisition. Taken from [@Giesen2014]*
26 |
27 | IMC has first been published in 2014 [@Giesen2014] and has been commercialized by
28 | Standard BioToolsTM to be distributed as the Hyperion Imaging
29 | SystemTM (documentation is available
30 | [here](https://www.fluidigm.com/products-services/instruments/hyperion)).
31 | Similar to other HMI technologies such as MIBI [@Angelo2014], CyCIF [@Lin2018],
32 | 4i [@Gut2018], CODEX [@Goltsev2018] and SABER [@Saka2019], IMC captures the spatial
33 | expression of multiple proteins in parallel. With a nominal 1 μm resolution,
34 | IMC is able to detect cytoplasmic and nuclear localization of proteins. The
35 | current ablation frequency of IMC is 200Hz, meaning that a 1 mm$^2$ area
36 | can be imaged within about 2 hours.
37 |
38 | ## Technical details of IMC
39 |
40 | Technical aspects of how data acquisition works can be found in the original
41 | publication [@Giesen2014]. Briefly, antibodies to detect targets in biological
42 | material are labeled with heavy metals (e.g., lanthanides) that do not occur in
43 | biological systems and thus can be used upon binding to their target as a
44 | readout similar to fluorophores in fluorescence microscopy. Thin sections of the
45 | biological sample on a glass slide are stained with an antibody cocktail.
46 | Stained microscopy slides are mounted on a precise motor-driven stage inside the
47 | ablation chamber of the IMC instrument. A high-energy UV laser is focused on the
48 | tissue, and each individual laser shot ablates tissue from an area of roughly 1
49 | μm$^2$. The energy of the laser is absorbed by the tissue resulting
50 | in vaporization followed by condensation of the ablated material. The ablated
51 | material from each laser shot is transported in the gas phase into the plasma of
52 | the mass cytometer, where first atomization of the particles and then ionization
53 | of the atoms occurs. The ion cloud is then transferred into a vacuum, and all
54 | ions below a mass of 80 m/z are filtered using a quadrupole mass filter. The
55 | remaining ions (mostly those used to tag antibodies) are analyzed in a
56 | time-of-flight mass spectrometer to ultimately obtain an accumulated mass
57 | spectrum from all ions that correspond to a single laser shot. One can regard
58 | this spectrum as the information underlying a 1 μm$^2$ pixel. With
59 | repetitive laser shots (e.g., at 200 Hz) and a simultaneous lateral sample
60 | movement, a tissue can be ablated pixel by pixel. Ultimately an image is
61 | reconstructed from each pixel mass spectrum.
62 |
63 | In principle, IMC can be applied to the same type of samples as conventional
64 | fluorescence microscopy. The largest distinction from fluorescence microscopy is
65 | that for IMC, primary-labeled antibodies are commonly used, whereas in
66 | fluorescence microscopy secondary antibodies carrying fluorophores are widely
67 | applied. Additionally, for IMC, samples are dried before acquisition and can be
68 | stored for years. Formalin-fixed and paraffin-embedded (FFPE) samples are widely
69 | used for IMC. The FFPE blocks are cut to 2-5 μm thick sections and are
70 | stained, dried, and analyzed with IMC.
71 |
72 | ### Metal-conjugated antobodies and staining
73 |
74 | Metal-labeled antibodies are used to stain molecules in tissues enabling to
75 | delineate tissue structures, cells, and subcellular structures. Metal-conjugated
76 | antibodies can either be purchased directly from Standard BioToolsTM ([MaxPar IMC Antibodies](https://store.fluidigm.com/Cytometry/ConsumablesandReagentsCytometry/MaxparAntibodies?cclcl=en_US)),
77 | or antibodies can be purchased and labeled individually ([MaxPar Antibody
78 | Labeling](https://store.fluidigm.com/Cytometry/ConsumablesandReagentsCytometry/MaxparAntibodyLabelingKits?cclcl=en_US)).
79 | Antibody labeling using the MaxPar kits is performed via TCEP antibody reduction
80 | followed by crosslinking with sulfhydryl-reactive maleimide-bearing metal
81 | polymers. For each antibody it is essential to validate its functionality,
82 | specificity and optimize its usage to provide optimal signal to noise. To
83 | facilitate antibody handling, a database is highly useful.
84 | [Airlab](https://github.com/BodenmillerGroup/airlab-web) is such a platform; it
85 | allows antibody lot tracking, validation data uploads, and panel generation for
86 | subsequent upload to the IMC acquisition software from Standard BioToolsTM
87 |
88 | Depending on the sample type, different staining protocols can be used.
89 | Generally, once antibodies of choice have been conjugated to a metal tag,
90 | titration experiments are performed to identify the optimal staining
91 | concentration. For FFPE samples, different staining protocols have been
92 | described, and different antibodies show variable staining with different
93 | protocols. Protocols such as the one provided by Standard BioToolsTM or the one describe by
94 | [@Ijsselsteijn2019] are recommended. Briefly, for FFPE tissues, a dewaxing
95 | step is performed to remove the paraffin used to embed the material, followed by
96 | a graded re-hydration of the samples. Thereafter, heat-induced epitope retrieval
97 | (HIER), a step aiming at the reversal of formalin-based fixation, is used to
98 | unmask epitopes within tissues and make them accessible to antibodies. Epitope
99 | unmasking is generally performed in either basic, EDTA-based buffers (pH 9.2) or
100 | acidic, citrate-based buffers (pH 6). Next, a buffer containing bovine serum
101 | albumin (BSA) is used to block non-specific binding. This buffer is also used to
102 | dilute antibody stocks for the actual antibody staining. Staining time and
103 | temperature may vary and optimization must be performed to ensure that each
104 | single antibody performs well. However, overnight staining at 4°C or 3-5
105 | hours at room temperature seem to be suitable in many cases.
106 |
107 | Following antibody incubation, unbound antibodies are washed away and a
108 | counterstain comparable to DAPI is applied to enable the identification of
109 | nuclei. The [Iridium intercalator](https://store.fluidigm.com/Cytometry/ConsumablesandReagentsCytometry/MassCytometryReagents/Cell-ID%E2%84%A2%20Intercalator-Ir%E2%80%94125%20%C2%B5M)
110 | from Standard BioToolsTM is a reagent of choice and applied in a brief 5 minute staining.
111 | Finally, the samples are washed again and then dried under an airflow. Once
112 | dried, the samples are ready for analysis using IMC and are
113 | usually stable for a long period of time (at least one year).
114 |
115 | ### Data acquisition
116 |
117 | Data is acquired using the CyTOF software from Standard BioToolsTM (see manuals
118 | [here](https://go.fluidigm.com/hyperion-support-documents)).
119 |
120 | The regions of interest are selected by providing coordinates for ablation. To
121 | determine the region to be imaged, so called "panoramas" can be generated. These
122 | are stitched images of single fields of views of about 200 μm in diameter.
123 | Panoramas provide an optical overview of the tissue with a resolution similar to
124 | 10x in microscopy and are intended to help with the selection of regions of
125 | interest for ablation. The tissue should be centered on the glass side, since
126 | the imaging mass cytometer cannot access roughly 5 mm from each of the slide
127 | edges. Currently, the instruments can process one slide at a time and usually one MCD
128 | file per sample slide is generated.
129 |
130 | Many regions of interest can be defined on a single slide and acquisition
131 | parameters such as channels to acquire, acquisition speed (100 Hz or 200 Hz),
132 | ablation energy, and other parameters are user-defined. It is recommended that
133 | all isotope channels are recorded. This will result in larger raw data files but valuable information such as
134 | potential contamination of the argon gas (e.g., Xenon) or of the samples (e.g.,
135 | lead, barium) is stored.
136 |
137 | To process a large number of slides or to select regions on whole-slide samples,
138 | panoramas may not provide sufficient information. If this is the case,
139 | multi-color immunofluorescence of the same slide prior to staining with
140 | metal-labeled antibodies may be performed. To allow for region selection based
141 | on immunofluorescence images and to align those images with a panorama of the
142 | same or consecutive sections of the sample, we developed
143 | [napping](https://github.com/BodenmillerGroup/napping).
144 |
145 | Acquisition time is directly proportional to the total size of ablation, and run
146 | times for samples of large area or for large sample numbers can roughly be calculated by
147 | dividing the ablation area in square micrometer by the ablation speed (e.g.,
148 | 200Hz). In addition to the proprietary MCD file format, TXT files can also
149 | be generated for each region of interest. This is recommended as a back-up
150 | option in case of errors that may corrupt MCD files but not TXT files.
151 |
152 | ## IMC data format {#data-format}
153 |
154 | Upon completion of the acquisition an MCD file of variable size is generated. A
155 | single MCD file can hold raw acquisition data for multiple regions of interest,
156 | optical images providing a slide level overview of the sample ("panoramas"), and
157 | detailed metadata about the experiment. Additionally, for each acquisition a
158 | TXT file is generated which holds the same pixel information as the matched
159 | acquisition in the MCD file.
160 |
161 | The Hyperion Imaging SystemTM produces files in the following folder structure:
162 |
163 | ```
164 | .
165 | +-- {XYZ}_ROI_001_1.txt
166 | +-- {XYZ}_ROI_002_2.txt
167 | +-- {XYZ}_ROI_003_3.txt
168 | +-- {XYZ}.mcd
169 | ```
170 |
171 | Here, `{XYZ}` defines the filename, `ROI_001`, `ROI_002`, `ROI_003` are
172 | user-defined names (descriptions) for the selected regions of interest (ROI),
173 | and `1`, `2`, `3` indicate the unique acquisition identifiers. The ROI
174 | description entry can be specified in the Standard BioTools software when
175 | selecting ROIs. The MCD file contains the raw imaging data and the full metadata
176 | of all acquired ROIs, while each TXT file contains data of a single ROI without
177 | metadata. To follow a consistent naming scheme and to bundle all metadata, we
178 | recommend to zip the folder. Each ZIP file should only contain data from a
179 | single MCD file, and the name of the ZIP file should match the name of the MCD
180 | file.
181 |
182 | We refer to this data as raw data and the further
183 | processing of this data is described in Section \@ref(processing).
184 |
185 |
186 |
--------------------------------------------------------------------------------
/02-processing.Rmd:
--------------------------------------------------------------------------------
1 | # Multi-channel image processing {#processing}
2 |
3 | This book focuses on common analysis steps of spatially-resolved single-cell data
4 | **after** image segmentation and feature extraction. In this chapter, the sections
5 | describe the processing of multiplexed imaging data, including file type
6 | conversion, image segmentation, feature extraction and data export. To obtain
7 | more detailed information on the individual image processing approaches, please
8 | visit their repositories:
9 |
10 | [steinbock](https://github.com/BodenmillerGroup/steinbock): The `steinbock`
11 | toolkit offers tools for multi-channel image processing using the command-line
12 | or Python code [@Windhager2021]. Supported tasks include IMC data pre-processing,
13 | multi-channel image segmentation, object quantification and data
14 | export to a variety of file formats. It supports functionality similar to those
15 | of the IMC Segmentation Pipeline (see below) and further allows deep-learning enabled image
16 | segmentation. The toolkit is available as platform-independent Docker
17 | container, ensuring reproducibility and user-friendly installation. Read more in
18 | the [Docs](https://bodenmillergroup.github.io/steinbock/latest/).
19 |
20 | [IMC Segmentation
21 | Pipeline](https://github.com/BodenmillerGroup/ImcSegmentationPipeline): The IMC
22 | segmentation pipeline offers a rather manual way of segmenting multi-channel
23 | images using a pixel classification-based approach. We continue to maintain the
24 | pipeline but recommend the use of the `steinbock` toolkit for multi-channel
25 | image processing. Raw IMC data pre-processing is performed using the
26 | [readimc](https://github.com/BodenmillerGroup/readimc) Python package to convert
27 | raw MCD files into OME-TIFF and TIFF files. After image cropping, an
28 | [Ilastik](https://www.ilastik.org/) pixel classifier is trained for image
29 | classification prior to image segmentation using
30 | [CellProfiler](https://cellprofiler.org/). Features (i.e., mean pixel intensity)
31 | of segmented objects (i.e., cells) are quantified and exported. Read more in the
32 | [Docs](https://bodenmillergroup.github.io/ImcSegmentationPipeline/).
33 |
34 | ## Image pre-processing (IMC specific)
35 |
36 | Image pre-processing is technology dependent. While most multiplexed imaging
37 | technologies generated TIFF or OME-TIFF files which can be directly segmented
38 | using the `steinbock` toolkit, IMC produces data in the proprietary
39 | data format MCD.
40 |
41 | To facilitate IMC data pre-processing, the
42 | [readimc](https://github.com/BodenmillerGroup/readimc) open-source Python
43 | package allows extracting the multi-modal (IMC acquisitions, panoramas),
44 | multi-region, multi-channel information contained in raw IMC images. Both the
45 | IMC Segmentation Pipeline and the `steinbock` toolkit use the `readimc`
46 | package for IMC data pre-processing. Starting from IMC raw data and a "panel"
47 | file, individual acquisitions are extracted as TIFF files and OME-TIFF files if
48 | using the IMC Segmentation Pipeline. The panel contains information of
49 | antibodies used in the experiment and the user can specify which channels to
50 | keep for downstream analysis. When using the IMC Segmentation Pipeline, random
51 | tiles are cropped from images for convenience of pixel labelling.
52 |
53 | ## Image segmentation
54 |
55 | The IMC Segmentation Pipeline supports pixel classification-based image
56 | segmentation while `steinbock` supports pixel classification-based and deep
57 | learning-based segmentation.
58 |
59 | **Pixel classification-based** image segmentation is performed by training a
60 | random forest classifier using [Ilastik](https://www.ilastik.org/) on the
61 | randomly extracted image crops and selected image channels. Pixels are
62 | classified as nuclear, cytoplasmic, or background. Employing a customizable
63 | [CellProfiler](https://cellprofiler.org/) pipeline, the probabilities are then
64 | thresholded for segmenting nuclei, and nuclei are expanded into cytoplasmic
65 | regions to obtain cell masks.
66 |
67 | **Deep learning-based** image segmentation is performed as presented by
68 | [@Greenwald2021]. Briefly, `steinbock` first aggregates user-defined
69 | image channels to generate two-channel images representing nuclear and
70 | cytoplasmic signals. Next, the
71 | [DeepCell](https://github.com/vanvalenlab/intro-to-deepcell) Python package is
72 | used to run `Mesmer`, a deep learning-enabled segmentation algorithm pre-trained
73 | on `TissueNet`, to automatically obtain cell masks without any further user
74 | input.
75 |
76 | Segmentation masks are single-channel images that match the input images in
77 | size, with non-zero grayscale values indicating the IDs of segmented objects
78 | (e.g., cells). These masks are written out as TIFF files after segmentation.
79 |
80 | ## Feature extraction {#feature-extraction}
81 |
82 | Using the segmentation masks together with their corresponding multi-channel
83 | images, the IMC Segmentation Pipeline as well as the `steinbock` toolkit extract
84 | object-specific features. These include the mean pixel intensity per object and
85 | channel, morphological features (e.g., object area) and the objects' locations.
86 | Object-specific features are written out as CSV files where rows represent
87 | individual objects and columns represent features.
88 |
89 | Furthermore, the IMC Segmentation Pipeline and the `steinbock` toolkit compute
90 | _spatial object graphs_, in which nodes correspond to objects, and nodes in
91 | spatial proximity are connected by an edge. These graphs serve as a proxy for
92 | interactions between neighboring cells. They are stored as edge list in form of
93 | one CSV file per image.
94 |
95 | Both approaches also write out image-specific metadata (e.g., width and height)
96 | as a CSV file.
97 |
98 | ## Data export
99 |
100 | To further facilitate compatibility with downstream analysis, `steinbock`
101 | exports data to a variety of file formats such as OME-TIFF for images, FCS for
102 | single-cell data, the _anndata_ format [@Virshup2021] for data analysis in Python,
103 | and various graph file formats for network analysis using software such as
104 | [CytoScape](https://cytoscape.org/) [@Shannon2003]. For export to OME-TIFF,
105 | steinbock uses [xtiff](https://github.com/BodenmillerGroup/xtiff), a Python
106 | package developed for writing multi-channel TIFF stacks.
107 |
108 | ## Data import into R
109 |
110 | In Section \@ref(read-data), we will highlight the use of the
111 | [imcRtools](https://github.com/BodenmillerGroup/imcRtools) and
112 | [cytomapper](https://github.com/BodenmillerGroup/cytomapper) R/Bioconductor
113 | packages to read spatially-resolved, single-cell and images as generated by the
114 | IMC Segmentation Pipeline and the `steinbock` toolkit into the statistical
115 | programming language R. All further downstream analyses are performed in R and
116 | detailed in the following sections.
117 |
118 |
119 |
120 |
121 |
122 |
123 |
--------------------------------------------------------------------------------
/03-prerequisites.Rmd:
--------------------------------------------------------------------------------
1 | # Prerequisites {#prerequisites}
2 |
3 | The analysis presented in this book requires a basic understanding of the
4 | `R` programing language. An introduction to `R` can be found [here](https://cran.r-project.org/doc/manuals/r-release/R-intro.pdf) and
5 | in the book [R for Data Science](https://r4ds.hadley.nz/).
6 |
7 | Furthermore, it is beneficial to be familiar with single-cell data analysis
8 | using the [Bioconductor](https://www.bioconductor.org/) framework. The
9 | [Orchestrating Single-Cell Analysis with Bioconductor](https://bioconductor.org/books/release/OSCA/) book
10 | gives an excellent overview on data containers and basic analysis that are being
11 | used here.
12 |
13 | An overview on IMC as technology and necessary image processing steps can be
14 | found on the [IMC workflow website](https://bodenmillergroup.github.io/IMCWorkflow/).
15 |
16 | Before we get started on IMC data analysis, we will need to make sure that
17 | software dependencies are installed and the example data is downloaded.
18 |
19 | ## Obtain the code
20 |
21 | This book provides R code to perform single-cell and spatial data analysis.
22 | You can copy the individual code chunks into your R scripts or you can obtain
23 | the full code of the book via:
24 |
25 | ```
26 | git clone https://github.com/BodenmillerGroup/IMCDataAnalysis.git
27 | ```
28 |
29 | ## Software requirements
30 |
31 | The R packages needed to execute the presented workflow can either be manually
32 | installed (see section \@ref(manual-install)) or are available within a provided
33 | Docker container (see section \@ref(docker)). The Docker option is useful if you
34 | want to exactly reproduce the presented analysis across operating systems;
35 | however, the manual install gives you more flexibility for exploratory data
36 | analysis.
37 |
38 | ### Using Docker {#docker}
39 |
40 | For reproducibility purposes, we provide a Docker container [here](https://github.com/BodenmillerGroup/IMCDataAnalysis/pkgs/container/imcdataanalysis).
41 |
42 | 1. After installing [Docker](https://docs.docker.com/get-docker/) you can first pull the container via:
43 |
44 | ```
45 | docker pull ghcr.io/bodenmillergroup/imcdataanalysis:latest
46 | ```
47 |
48 | and then run the container:
49 |
50 | ```
51 | docker run -v /path/to/IMCDataAnalysis:/home/rstudio/IMCDataAnalysis \
52 | -e PASSWORD=bioc -p 8787:8787 \
53 | ghcr.io/bodenmillergroup/imcdataanalysis:latest
54 | ```
55 |
56 | Here, the `/path/to/` needs to be adjusted to where you keep the code and data
57 | of the book.
58 |
59 | **Of note: it is recommended to use a date-tagged version of the container to ensure reproducibility**.
60 | This can be done via:
61 |
62 | ```
63 | docker pull ghcr.io/bodenmillergroup/imcdataanalysis:
64 | ```
65 |
66 | 2. An RStudio server session can be accessed via a browser at `localhost:8787` using `Username: rstudio` and `Password: bioc`.
67 | 3. Navigate to `IMCDataAnalysis` and open the `IMCDataAnalysis.Rproj` file.
68 | 4. Code in the individual files can now be executed or the whole workflow can be build by entering `bookdown::render_book()`.
69 |
70 | ### Manual installation {#manual-install}
71 |
72 | The following section describes how to manually install all needed R packages
73 | when not using the provided Docker container.
74 | To install all R packages needed for the analysis, please run:
75 |
76 | ```{r install-packages, eval=FALSE}
77 | if (!requireNamespace("BiocManager", quietly = TRUE))
78 | install.packages("BiocManager")
79 |
80 | BiocManager::install(c("rmarkdown", "bookdown", "pheatmap", "viridis", "zoo",
81 | "devtools", "testthat", "tiff", "distill", "ggrepel",
82 | "patchwork", "mclust", "RColorBrewer", "uwot", "Rtsne",
83 | "harmony", "Seurat", "SeuratObject", "cowplot", "kohonen",
84 | "caret", "randomForest", "ggridges", "cowplot",
85 | "gridGraphics", "scales", "tiff", "harmony", "Matrix",
86 | "CATALYST", "scuttle", "scater", "dittoSeq",
87 | "tidyverse", "BiocStyle", "batchelor", "bluster", "scran",
88 | "lisaClust", "spicyR", "iSEE", "imcRtools", "cytomapper",
89 | "imcdatasets", "cytoviewer"))
90 |
91 | # Github dependencies
92 | devtools::install_github("i-cyto/Rphenograph")
93 | ```
94 |
95 | ```{r load-libraries, echo = FALSE, message = FALSE}
96 | options(timeout=10000)
97 | library(CATALYST)
98 | library(SpatialExperiment)
99 | library(SingleCellExperiment)
100 | library(scuttle)
101 | library(scater)
102 | library(imcRtools)
103 | library(cytomapper)
104 | library(dittoSeq)
105 | library(tidyverse)
106 | library(bluster)
107 | library(scran)
108 | library(lisaClust)
109 | library(caret)
110 | library(cytoviewer)
111 | ```
112 |
113 | ### Major package versions
114 |
115 | Throughout the analysis, we rely on different R software packages.
116 | This section lists the most commonly used packages in this workflow.
117 |
118 | Data containers:
119 |
120 | * [SpatialExperiment](https://bioconductor.org/packages/release/bioc/html/SpatialExperiment.html) version `r packageVersion("SpatialExperiment")`
121 | * [SingleCellExperiment](https://bioconductor.org/packages/release/bioc/html/SingleCellExperiment.html) version `r packageVersion("SingleCellExperiment")`
122 |
123 | Data analysis:
124 |
125 | * [CATALYST](https://bioconductor.org/packages/release/bioc/html/CATALYST.html) version `r packageVersion("CATALYST")`
126 | * [imcRtools](https://bioconductor.org/packages/release/bioc/html/imcRtools.html) version `r packageVersion("imcRtools")`
127 | * [scuttle](https://bioconductor.org/packages/release/bioc/html/scuttle.html) version `r packageVersion("scuttle")`
128 | * [scater](https://bioconductor.org/packages/release/bioc/html/scater.html) version `r packageVersion("scater")`
129 | * [batchelor](https://www.bioconductor.org/packages/release/bioc/html/batchelor.html) version `r packageVersion("batchelor")`
130 | * [bluster](https://www.bioconductor.org/packages/release/bioc/html/bluster.html) version `r packageVersion("bluster")`
131 | * [scran](https://www.bioconductor.org/packages/release/bioc/html/scran.html) version `r packageVersion("scran")`
132 | * [harmony](https://github.com/immunogenomics/harmony) version `r packageVersion("harmony")`
133 | * [Seurat](https://satijalab.org/seurat/index.html) version `r packageVersion("Seurat")`
134 | * [lisaClust](https://www.bioconductor.org/packages/release/bioc/html/lisaClust.html) version `r packageVersion("lisaClust")`
135 | * [caret](https://topepo.github.io/caret/) version `r packageVersion("caret")`
136 |
137 | Data visualization:
138 |
139 | * [cytomapper](https://bioconductor.org/packages/release/bioc/html/cytomapper.html) version `r packageVersion("cytomapper")`
140 | * [cytoviewer](https://bioconductor.org/packages/release/bioc/html/cytoviewer.html) version `r packageVersion("cytoviewer")`
141 | * [dittoSeq](https://bioconductor.org/packages/release/bioc/html/dittoSeq.html) version `r packageVersion("dittoSeq")`
142 |
143 | Tidy R:
144 |
145 | * [tidyverse](https://www.tidyverse.org/) version `r packageVersion("tidyverse")`
146 |
147 | ## Image processing {#image-processing}
148 |
149 | The analysis presented here fully relies on packages written in the programming
150 | language `R` and primarily focuses on analysis approaches downstream of image
151 | processing. The example data available at
152 | [https://zenodo.org/record/7575859](https://zenodo.org/record/7575859) were
153 | processed (file type conversion, image segmentation, feature extraction as
154 | explained in Section \@ref(processing)) using the
155 | [steinbock](https://bodenmillergroup.github.io/steinbock/latest/) toolkit. The
156 | exact command line interface calls to process the raw data are shown below:
157 |
158 | ```{r, echo = FALSE, message = FALSE}
159 | if (!dir.exists("data/steinbock")) dir.create("data/steinbock")
160 | if (!dir.exists("data/ImcSegmentationPipeline")) dir.create("data/ImcSegmentationPipeline")
161 | # Pre-download steinbock file
162 | download.file("https://zenodo.org/record/7624451/files/steinbock.sh",
163 | "data/steinbock/steinbock.sh")
164 | ```
165 |
166 | ```{bash, file="data/steinbock/steinbock.sh", eval=FALSE}
167 |
168 | ```
169 |
170 | ## Download example data {#download-data}
171 |
172 | Throughout this tutorial, we will access a number of different data types.
173 | To declutter the analysis scripts, we will already download all needed data here.
174 |
175 | To highlight the basic steps of IMC data analysis, we provide example data that
176 | were acquired as part of the **I**ntegrated i**MMU**noprofiling of large adaptive
177 | **CAN**cer patient cohorts projects ([immucan.eu](https://immucan.eu/)). The
178 | raw data of 4 patients can be accessed online at
179 | [zenodo.org/record/7575859](https://zenodo.org/record/7575859). We will only
180 | download the sample/patient metadata information here:
181 |
182 | ```{r download-sample-data}
183 | download.file("https://zenodo.org/record/7575859/files/sample_metadata.csv",
184 | destfile = "data/sample_metadata.csv")
185 | ```
186 |
187 | ### Processed multiplexed imaging data
188 |
189 | The IMC raw data was either processed using the
190 | [steinbock](https://github.com/BodenmillerGroup/steinbock) toolkit or the
191 | [IMC Segmentation Pipeline](https://github.com/BodenmillerGroup/ImcSegmentationPipeline).
192 | Image processing included file type conversion, cell segmentation and feature
193 | extraction.
194 |
195 | **steinbock output**
196 |
197 | This book uses the output of the `steinbock` framework when applied to process
198 | the example data. The processed data includes the single-cell mean intensity
199 | files, the single-cell morphological features and spatial locations, spatial
200 | object graphs in form of edge lists indicating cells in close proximity, hot
201 | pixel filtered multi-channel images, segmentation masks, image metadata and
202 | channel metadata. All these files will be downloaded here for later use. The
203 | commands which were used to generate this data can be found in the shell script
204 | above.
205 |
206 | ```{r steinbock-results}
207 | # download intensities
208 | url <- "https://zenodo.org/record/7624451/files/intensities.zip"
209 | destfile <- "data/steinbock/intensities.zip"
210 | download.file(url, destfile)
211 | unzip(destfile, exdir="data/steinbock", overwrite=TRUE)
212 | unlink(destfile)
213 |
214 | # download regionprops
215 | url <- "https://zenodo.org/record/7624451/files/regionprops.zip"
216 | destfile <- "data/steinbock/regionprops.zip"
217 | download.file(url, destfile)
218 | unzip(destfile, exdir="data/steinbock", overwrite=TRUE)
219 | unlink(destfile)
220 |
221 | # download neighbors
222 | url <- "https://zenodo.org/record/7624451/files/neighbors.zip"
223 | destfile <- "data/steinbock/neighbors.zip"
224 | download.file(url, destfile)
225 | unzip(destfile, exdir="data/steinbock", overwrite=TRUE)
226 | unlink(destfile)
227 |
228 | # download images
229 | url <- "https://zenodo.org/record/7624451/files/img.zip"
230 | destfile <- "data/steinbock/img.zip"
231 | download.file(url, destfile)
232 | unzip(destfile, exdir="data/steinbock", overwrite=TRUE)
233 | unlink(destfile)
234 |
235 | # download masks
236 | url <- "https://zenodo.org/record/7624451/files/masks_deepcell.zip"
237 | destfile <- "data/steinbock/masks_deepcell.zip"
238 | download.file(url, destfile)
239 | unzip(destfile, exdir="data/steinbock", overwrite=TRUE)
240 | unlink(destfile)
241 |
242 | # download individual files
243 | download.file("https://zenodo.org/record/7624451/files/panel.csv",
244 | "data/steinbock/panel.csv")
245 | download.file("https://zenodo.org/record/7624451/files/images.csv",
246 | "data/steinbock/images.csv")
247 | download.file("https://zenodo.org/record/7624451/files/steinbock.sh",
248 | "data/steinbock/steinbock.sh")
249 | ```
250 |
251 | **IMC Segmentation Pipeline output**
252 |
253 | The example data was also processed using the
254 | [IMC Segmetation Pipeline](https://github.com/BodenmillerGroup/ImcSegmentationPipeline) (version 3).
255 | To highlight the use of the reader function for this type of output, we will need
256 | to download the `cpout` folder which is part of the `analysis` folder. The `cpout`
257 | folder stores all relevant output files of the pipeline. For a full description
258 | of the pipeline, please refer to the [docs](https://bodenmillergroup.github.io/ImcSegmentationPipeline/).
259 |
260 | ```{r imcsegpipe-results}
261 | # download analysis folder
262 | url <- "https://zenodo.org/record/7997296/files/analysis.zip"
263 | destfile <- "data/ImcSegmentationPipeline/analysis.zip"
264 | download.file(url, destfile)
265 | unzip(destfile, exdir="data/ImcSegmentationPipeline", overwrite=TRUE)
266 | unlink(destfile)
267 |
268 | unlink("data/ImcSegmentationPipeline/analysis/cpinp/", recursive=TRUE)
269 | unlink("data/ImcSegmentationPipeline/analysis/crops/", recursive=TRUE)
270 | unlink("data/ImcSegmentationPipeline/analysis/histocat/", recursive=TRUE)
271 | unlink("data/ImcSegmentationPipeline/analysis/ilastik/", recursive=TRUE)
272 | unlink("data/ImcSegmentationPipeline/analysis/ometiff/", recursive=TRUE)
273 | unlink("data/ImcSegmentationPipeline/analysis/cpout/images/", recursive=TRUE)
274 | unlink("data/ImcSegmentationPipeline/analysis/cpout/probabilities/", recursive=TRUE)
275 | unlink("data/ImcSegmentationPipeline/analysis/cpout/masks/", recursive=TRUE)
276 | ```
277 |
278 | ### Files for spillover matrix estimation
279 |
280 | To highlight the estimation and correction of channel-spillover as described by
281 | [@Chevrier2017], we can access an example spillover-acquisition from:
282 |
283 | ```{r download-spillover-data}
284 | download.file("https://zenodo.org/record/7575859/files/compensation.zip",
285 | "data/compensation.zip")
286 | unzip("data/compensation.zip", exdir="data", overwrite=TRUE)
287 | unlink("data/compensation.zip")
288 | ```
289 |
290 | ### Gated cells
291 |
292 | In Section \@ref(classification), we present a cell type classification approach
293 | that relies on previously gated cells. This ground truth data is available
294 | online at [zenodo.org/record/8095133](https://zenodo.org/record/8095133) and
295 | will be downloaded here for later use:
296 |
297 | ```{r download-gated-cells}
298 | download.file("https://zenodo.org/record/8095133/files/gated_cells.zip",
299 | "data/gated_cells.zip")
300 | unzip("data/gated_cells.zip", exdir="data", overwrite=TRUE)
301 | unlink("data/gated_cells.zip")
302 | ```
303 |
304 | ## Software versions {#sessionInfo}
305 |
306 |
307 | SessionInfo
308 |
309 | ```{r, echo = FALSE, message = FALSE}
310 | sessionInfo()
311 | ```
312 |
313 |
314 |
315 |
316 |
--------------------------------------------------------------------------------
/06-quality_control.Rmd:
--------------------------------------------------------------------------------
1 | # Image and cell-level quality control
2 |
3 | The following section discusses possible quality indicators for data obtained
4 | by IMC and other highly multiplexed imaging technologies. Here, we will focus
5 | on describing quality metrics on the single-cell as well as image level.
6 |
7 | ## Read in the data
8 |
9 | We will first read in the data processed in previous sections:
10 |
11 | ```{r read-data, message=FALSE}
12 | images <- readRDS("data/images.rds")
13 | masks <- readRDS("data/masks.rds")
14 | spe <- readRDS("data/spe.rds")
15 | ```
16 |
17 | ## Segmentation quality control {#seg-quality}
18 |
19 | The first step after image segmentation is to observe its accuracy.
20 | Without having ground-truth data readily available, a common approach to
21 | segmentation quality control is to overlay segmentation masks on composite images
22 | displaying channels that were used for segmentation.
23 | The [cytomapper](https://www.bioconductor.org/packages/release/bioc/html/cytomapper.html)
24 | package supports exactly this tasks by using the `plotPixels` function.
25 |
26 | Here, we select 3 random images and perform image- and channel-wise
27 | normalization (channels are first min-max normalized and scaled to a range of
28 | 0-1 before clipping the maximum intensity to 0.2).
29 |
30 | ```{r overlay-masks, message=FALSE}
31 | library(cytomapper)
32 | set.seed(20220118)
33 | img_ids <- sample(seq_along(images), 3)
34 |
35 | # Normalize and clip images
36 | cur_images <- images[img_ids]
37 | cur_images <- cytomapper::normalize(cur_images, separateImages = TRUE)
38 | cur_images <- cytomapper::normalize(cur_images, inputRange = c(0, 0.2))
39 |
40 | plotPixels(cur_images,
41 | mask = masks[img_ids],
42 | img_id = "sample_id",
43 | missing_colour = "white",
44 | colour_by = c("CD163", "CD20", "CD3", "Ecad", "DNA1"),
45 | colour = list(CD163 = c("black", "yellow"),
46 | CD20 = c("black", "red"),
47 | CD3 = c("black", "green"),
48 | Ecad = c("black", "cyan"),
49 | DNA1 = c("black", "blue")),
50 | image_title = NULL,
51 | legend = list(colour_by.title.cex = 0.7,
52 | colour_by.labels.cex = 0.7))
53 | ```
54 |
55 | We can see that nuclei are centered within the segmentation masks and all cell
56 | types are correctly segmented (note: to zoom into the image you can right click
57 | and select `Open Image in New Tab`). A common challenge here is to segment large (e.g.,
58 | epithelial cells - in cyan) _versus_ small (e.g., B cells - in red). However, the
59 | segmentation approach here appears to correctly segment cells across different
60 | sizes.
61 |
62 | An easier and interactive way of observing segmentation quality is to use the
63 | interactive image viewer provided by the
64 | [cytoviewer](https://github.com/BodenmillerGroup/cytoviewer) R/Bioconductor
65 | package [@Meyer2024]. Under "Image-level" > "Basic controls", up to six markers
66 | can be selected for visualization. The contrast of each marker can be adjusted.
67 | Under "Image-level" > "Advanced controls", click the "Show cell outlines" box
68 | to outline segmented cells on the images.
69 |
70 | ```{r, message = FALSE}
71 | library(cytoviewer)
72 |
73 | app <- cytoviewer(image = images,
74 | mask = masks,
75 | object = spe,
76 | cell_id = "ObjectNumber",
77 | img_id = "sample_id")
78 |
79 | if (interactive()) {
80 | shiny::runApp(app)
81 | }
82 | ```
83 |
84 | An additional approach to observe cell segmentation quality and potentially also
85 | antibody specificity issues is to visualize single-cell expression in form of a
86 | heatmap. Here, we sub-sample the dataset to 2000 cells for visualization
87 | purposes and overlay the cancer type from which the cells were extracted.
88 |
89 | ```{r segmentation-heatmap, message=FALSE, fig.height=7}
90 | library(dittoSeq)
91 | library(viridis)
92 | cur_cells <- sample(seq_len(ncol(spe)), 2000)
93 |
94 | dittoHeatmap(spe[,cur_cells],
95 | genes = rownames(spe)[rowData(spe)$use_channel],
96 | assay = "exprs",
97 | cluster_cols = TRUE,
98 | scale = "none",
99 | heatmap.colors = viridis(100),
100 | annot.by = "indication",
101 | annotation_colors = list(indication = metadata(spe)$color_vectors$indication))
102 | ```
103 |
104 | We can differentiate between epithelial cells (Ecad+) and immune cells
105 | (CD45RO+). Some of the markers are detected in specific cells (e.g., Ki67, CD20,
106 | Ecad) while others are more broadly expressed across cells (e.g., HLADR, B2M,
107 | CD4).
108 |
109 | ## Image-level quality control {#image-quality}
110 |
111 | Image-level quality control is often performed using tools that offer a
112 | graphical user interface such as [QuPath](https://qupath.github.io/),
113 | [FIJI](https://imagej.net/software/fiji/) and the previously mentioned
114 | [cytoviewer](https://github.com/BodenmillerGroup/cytoviewer) package. Viewers
115 | that were specifically developed for IMC data can be seen
116 | [here](https://bodenmillergroup.github.io/IMCWorkflow/viewers.html). In this
117 | section, we will specifically focus on quantitative metrics to assess image
118 | quality.
119 |
120 | It is often of interest to calculate the signal-to-noise ratio (SNR) for
121 | individual channels and markers. Here, we define the SNR as:
122 |
123 | $$SNR = I_s/I_n$$
124 |
125 | where $I_s$ is the intensity of the signal (mean intensity of pixels with true
126 | signal) and $I_n$ is the intensity of the noise (mean intensity of pixels
127 | containing noise). This definition of the SNR is just one of many and other
128 | measures can be applied. Finding a threshold that separates pixels containing
129 | signal and pixels containing noise is not trivial and different approaches can
130 | be chosen. Here, we use the `otsu` thresholding approach to find pixels of the
131 | "foreground" (i.e., signal) and "background" (i.e., noise). The SNR is then
132 | defined as the mean intensity of foreground pixels divided by the mean intensity
133 | of background pixels. We compute this measure as well as the mean signal
134 | intensity per image. The plot below shows the average SNR _versus_ the average
135 | signal intensity across all images.
136 |
137 | ```{r image-snr, message=FALSE, warning=FALSE}
138 | library(tidyverse)
139 | library(ggrepel)
140 | library(EBImage)
141 |
142 | cur_snr <- lapply(names(images), function(x){
143 | img <- images[[x]]
144 | mat <- apply(img, 3, function(ch){
145 | # Otsu threshold
146 | thres <- otsu(ch, range = c(min(ch), max(ch)), levels = 65536)
147 | # Signal-to-noise ratio
148 | snr <- mean(ch[ch > thres]) / mean(ch[ch <= thres])
149 | # Signal intensity
150 | ps <- mean(ch[ch > thres])
151 |
152 | return(c(snr = snr, ps = ps))
153 | })
154 | t(mat) %>% as.data.frame() %>%
155 | mutate(image = x,
156 | marker = colnames(mat)) %>%
157 | pivot_longer(cols = c(snr, ps))
158 | })
159 |
160 | cur_snr <- do.call(rbind, cur_snr)
161 |
162 | cur_snr %>%
163 | group_by(marker, name) %>%
164 | summarize(log_mean = log2(mean(value))) %>%
165 | pivot_wider(names_from = name, values_from = log_mean) %>%
166 | ggplot() +
167 | geom_point(aes(ps, snr)) +
168 | geom_label_repel(aes(ps, snr, label = marker)) +
169 | theme_minimal(base_size = 15) + ylab("Signal-to-noise ratio [log2]") +
170 | xlab("Signal intensity [log2]")
171 | ```
172 |
173 | We observe PD1, LAG3 and cleaved PARP to have high SNR but low signal intensity
174 | meaning that in general these markers are not abundantly expressed. The Iridium
175 | intercalator (here marked as DNA1 and DNA2) has the highest signal intensity
176 | but low SNR. This might be due to staining differences between individual nuclei
177 | where some nuclei are considered as background. We do however observe high
178 | SNR and sufficient signal intensity for the majority of markers.
179 |
180 | Otsu thesholding and SNR calculation does not perform well if the markers are
181 | lowly abundant. In the next code chunk, we will remove markers that have
182 | a positive signal of below 2 per image.
183 |
184 | ```{r, snr-adjusted, message=FALSE, warning=FALSE}
185 | cur_snr <- cur_snr %>%
186 | pivot_wider(names_from = name, values_from = value) %>%
187 | filter(ps > 2) %>%
188 | pivot_longer(cols = c(snr, ps))
189 |
190 | cur_snr %>%
191 | group_by(marker, name) %>%
192 | summarize(log_mean = log2(mean(value))) %>%
193 | pivot_wider(names_from = name, values_from = log_mean) %>%
194 | ggplot() +
195 | geom_point(aes(ps, snr)) +
196 | geom_label_repel(aes(ps, snr, label = marker)) +
197 | theme_minimal(base_size = 15) + ylab("Signal-to-noise ratio [log2]") +
198 | xlab("Signal intensity [log2]")
199 | ```
200 |
201 | This visualization shows a reduces SNR for PD1, LAG3 and cleaved PARP which was
202 | previously inflated due to low signal.
203 |
204 | Another quality indicator is the image area covered by cells (or biological
205 | tissue). This metric identifies ROIs where little cells are present, possibly
206 | hinting at incorrect selection of the ROI. We can compute the percentage of
207 | covered image area using the metadata contained in the `SpatialExperiment`
208 | object:
209 |
210 | ```{r cell-density}
211 | cell_density <- colData(spe) %>%
212 | as.data.frame() %>%
213 | group_by(sample_id) %>%
214 | # Compute the number of pixels covered by cells and
215 | # the total number of pixels
216 | summarize(cell_area = sum(area),
217 | no_pixels = mean(width_px) * mean(height_px)) %>%
218 | # Divide the total number of pixels
219 | # by the number of pixels covered by cells
220 | mutate(covered_area = cell_area / no_pixels)
221 |
222 | # Visualize the image area covered by cells per image
223 | ggplot(cell_density) +
224 | geom_point(aes(reorder(sample_id,covered_area), covered_area)) +
225 | theme_minimal(base_size = 15) +
226 | theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 15)) +
227 | ylim(c(0, 1)) +
228 | ylab("% covered area") + xlab("")
229 | ```
230 |
231 | We observe that two of the 14 images show unusually low cell coverage. These
232 | two images can now be visualized using `cytomapper`.
233 |
234 | ```{r low-density-images, message=FALSE}
235 | # Normalize and clip images
236 | cur_images <- images[c("Patient4_005", "Patient4_007")]
237 | cur_images <- cytomapper::normalize(cur_images, separateImages = TRUE)
238 | cur_images <- cytomapper::normalize(cur_images, inputRange = c(0, 0.2))
239 |
240 | plotPixels(cur_images,
241 | mask = masks[c("Patient4_005", "Patient4_007")],
242 | img_id = "sample_id",
243 | missing_colour = "white",
244 | colour_by = c("CD163", "CD20", "CD3", "Ecad", "DNA1"),
245 | colour = list(CD163 = c("black", "yellow"),
246 | CD20 = c("black", "red"),
247 | CD3 = c("black", "green"),
248 | Ecad = c("black", "cyan"),
249 | DNA1 = c("black", "blue")),
250 | legend = list(colour_by.title.cex = 0.7,
251 | colour_by.labels.cex = 0.7))
252 | ```
253 |
254 | These two images display less dense tissue structure but overall the images are
255 | intact and appear to be segmented correctly.
256 |
257 | Finally, it can be beneficial to visualize the mean marker expression per image
258 | to identify images with outlying marker expression. This check does not
259 | indicate image quality _per se_ but can highlight biological differences. Here,
260 | we will use the `aggregateAcrossCells` function of the
261 | `r BiocStyle::Biocpkg("scuttle")` package to compute the mean expression per
262 | image. For visualization purposes, we again `asinh` transform the mean expression
263 | values.
264 |
265 | ```{r mean-expression-per-image, message=FALSE, fig.height=7}
266 | library(scuttle)
267 |
268 | image_mean <- aggregateAcrossCells(spe,
269 | ids = spe$sample_id,
270 | statistics="mean",
271 | use.assay.type = "counts")
272 | assay(image_mean, "exprs") <- asinh(counts(image_mean))
273 |
274 | dittoHeatmap(image_mean, genes = rownames(spe)[rowData(spe)$use_channel],
275 | assay = "exprs", cluster_cols = TRUE, scale = "none",
276 | heatmap.colors = viridis(100),
277 | annot.by = c("indication", "patient_id", "ROI"),
278 | annotation_colors = list(indication = metadata(spe)$color_vectors$indication,
279 | patient_id = metadata(spe)$color_vectors$patient_id,
280 | ROI = metadata(spe)$color_vectors$ROI),
281 | show_colnames = TRUE)
282 | ```
283 |
284 | We observe extensive biological variation across the 14 images specifically for
285 | some of the cell phenotype markers including the macrophage marker CD206, the B
286 | cell marker CD20, the neutrophil marker CD15, and the proliferation marker Ki67.
287 | These differences will be further studied in the following chapters.
288 |
289 | ## Cell-level quality control {#cell-quality}
290 |
291 | In the following paragraphs we will look at different metrics and visualization
292 | approaches to assess data quality (as well as biological differences) on the
293 | single-cell level.
294 |
295 | Related to the signal-to-noise ratio (SNR) calculated above on the pixel-level,
296 | a similar measure can be derived on the single-cell level. Here, we will use
297 | a two component Gaussian mixture model for each marker to find cells
298 | with positive and negative expression. The SNR is defined as:
299 |
300 | $$SNR = I_s/I_n$$
301 |
302 | where $I_s$ is the intensity of the signal (mean intensity of cells with
303 | positive signal) and $I_n$ is the intensity of the noise (mean intensity of
304 | cells lacking expression). To define cells with positive and negative marker
305 | expression, we fit the mixture model across the transformed counts of all cells
306 | contained in the `SpatialExperiment` object. Next, for each marker we calculate
307 | the mean of the non-transformed counts for the positive and the negative cells.
308 | The SNR is then the ratio between the mean of the positive signal and the mean
309 | of the negative signal.
310 |
311 | ```{r cell-snr, message=FALSE, warning=FALSE, results="hide", fig.keep="all"}
312 | library(mclust)
313 |
314 | set.seed(220224)
315 | mat <- sapply(seq_len(nrow(spe)), function(x){
316 | cur_exprs <- assay(spe, "exprs")[x,]
317 | cur_counts <- assay(spe, "counts")[x,]
318 |
319 | cur_model <- Mclust(cur_exprs, G = 2)
320 | mean1 <- mean(cur_counts[cur_model$classification == 1])
321 | mean2 <- mean(cur_counts[cur_model$classification == 2])
322 |
323 | signal <- ifelse(mean1 > mean2, mean1, mean2)
324 | noise <- ifelse(mean1 > mean2, mean2, mean1)
325 |
326 | return(c(snr = signal/noise, ps = signal))
327 | })
328 |
329 | cur_snr <- t(mat) %>% as.data.frame() %>%
330 | mutate(marker = rownames(spe))
331 |
332 | cur_snr %>% ggplot() +
333 | geom_point(aes(log2(ps), log2(snr))) +
334 | geom_label_repel(aes(log2(ps), log2(snr), label = marker)) +
335 | theme_minimal(base_size = 15) + ylab("Signal-to-noise ratio [log2]") +
336 | xlab("Signal intensity [log2]")
337 | ```
338 |
339 | Next, we observe the distributions of cell size across the individual images.
340 | Differences in cell size distributions can indicate segmentation biases due to
341 | differences in cell density or can indicate biological differences due to cell
342 | type compositions (tumor cells tend to be larger than immune cells).
343 |
344 | ```{r cell-size, message=FALSE}
345 | dittoPlot(spe, var = "area",
346 | group.by = "sample_id",
347 | plots = "boxplot") +
348 | ylab("Cell area") + xlab("")
349 |
350 | summary(spe$area)
351 | ```
352 |
353 | The median cell size is `r median(spe$area)` pixels with a median major axis
354 | length of `r round(median(spe$axis_major_length), digits = 1)`. The largest cell
355 | has an area of `r max(spe$area)` pixels which relates to a diameter of
356 | `r round(sqrt(max(spe$area)), digits = 1)` pixels assuming a circular shape.
357 | Overall, the distribution of cell sizes is similar across images with images from
358 | `Patient4_005` and `Patient4_007` showing a reduced average cell size. These
359 | images contain fewer tumor cells which can explain the smaller average cell size.
360 |
361 | We detect very small cells in the dataset and will remove them.
362 | The chosen threshold is arbitrary and needs to be adjusted per dataset.
363 |
364 | ```{r remove-small-cells}
365 | sum(spe$area < 5)
366 | spe <- spe[,spe$area >= 5]
367 | ```
368 |
369 | Another quality indicator can be an absolute measure of cell density often
370 | reported in cells per mm$^2$.
371 |
372 | ```{r no-cells-per-image, message=FALSE}
373 | cell_density <- colData(spe) %>%
374 | as.data.frame() %>%
375 | group_by(sample_id) %>%
376 | summarize(cell_count = n(),
377 | no_pixels = mean(width_px) * mean(height_px)) %>%
378 | mutate(cells_per_mm2 = cell_count/(no_pixels/1000000))
379 |
380 | ggplot(cell_density) +
381 | geom_point(aes(reorder(sample_id,cells_per_mm2), cells_per_mm2)) +
382 | theme_minimal(base_size = 15) +
383 | theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 8)) +
384 | ylab("Cells per mm2") + xlab("")
385 | ```
386 |
387 | The number of cells per mm$^2$ varies across images which also depends on the
388 | number of tumor/non-tumor cells. As we can see in the following sections, some
389 | immune cells appear in cell dense regions while other stromal regions are less
390 | dense.
391 |
392 | The data presented here originate from samples from different locations with
393 | potential differences in pre-processing and each sample was stained individually.
394 | These (and other) technical aspects can induce staining differences between
395 | samples or batches of samples. Observing potential staining differences can be
396 | crucial to assess data quality. We will use ridgeline visualizations to check
397 | differences in staining patterns:
398 |
399 | ```{r ridges, message=FALSE, warning = FALSE, fig.width=7, fig.height=25}
400 | multi_dittoPlot(spe, vars = rownames(spe)[rowData(spe)$use_channel],
401 | group.by = "patient_id", plots = "ridgeplot",
402 | assay = "exprs",
403 | color.panel = metadata(spe)$color_vectors$patient_id)
404 | ```
405 |
406 | We observe variations in the distributions of marker expression across patients.
407 | These variations may arise partly from different abundances of cells in
408 | different images (e.g., Patient3 may have higher numbers of CD11c+ and PD1+
409 | cells) as well as staining differences between samples. While most of the
410 | selected markers are specifically expressed in immune cell subtypes, we can see
411 | that E-Cadherin (a marker for epithelial (tumor) cells) shows a similar
412 | expression range across all patients.
413 |
414 | Finally, we will use non-linear dimensionality reduction methods to project
415 | cells from a high-dimensional (40) down to a low-dimensional (2) space. For this
416 | the `r BiocStyle::Biocpkg("scater")` package provides the `runUMAP` and
417 | `runTSNE` function. To ensure reproducibility, we will need to set a seed;
418 | however different seeds and different parameter settings (e.g., the `perplexity`
419 | parameter in the `runTSNE` function) need to be tested to avoid
420 | over-interpretation of visualization artefacts. For dimensionality reduction, we
421 | will use all channels that show biological variation across the dataset.
422 | However, marker selection can be performed with different biological questions
423 | in mind. Here, both the `runUMAP` and `runTSNE` function are not deterministic,
424 | meaning they produce different results across different runs. We therefore
425 | set a `seed` in this chunk for reproducibility purposes.
426 |
427 | ```{r dimred, message=FALSE}
428 | library(scater)
429 |
430 | set.seed(220225)
431 | spe <- runUMAP(spe, subset_row = rowData(spe)$use_channel, exprs_values = "exprs")
432 | spe <- runTSNE(spe, subset_row = rowData(spe)$use_channel, exprs_values = "exprs")
433 | ```
434 |
435 | After dimensionality reduction, the low-dimensional embeddings are stored in the
436 | `reducedDim` slot.
437 |
438 | ```{r show-dimred-slot}
439 | reducedDims(spe)
440 | head(reducedDim(spe, "UMAP"))
441 | ```
442 |
443 | Visualization of the low-dimensional embedding facilitates assessment of
444 | potential "batch effects". The `dittoDimPlot`
445 | function allows flexible visualization. It returns `ggplot` objects which
446 | can be further modified.
447 |
448 | ```{r visualizing-dimred-1, message=FALSE, fig.height=8}
449 | library(patchwork)
450 |
451 | # visualize patient id
452 | p1 <- dittoDimPlot(spe, var = "patient_id", reduction.use = "UMAP", size = 0.2) +
453 | scale_color_manual(values = metadata(spe)$color_vectors$patient_id) +
454 | ggtitle("Patient ID on UMAP")
455 | p2 <- dittoDimPlot(spe, var = "patient_id", reduction.use = "TSNE", size = 0.2) +
456 | scale_color_manual(values = metadata(spe)$color_vectors$patient_id) +
457 | ggtitle("Patient ID on TSNE")
458 |
459 | # visualize region of interest id
460 | p3 <- dittoDimPlot(spe, var = "ROI", reduction.use = "UMAP", size = 0.2) +
461 | scale_color_manual(values = metadata(spe)$color_vectors$ROI) +
462 | ggtitle("ROI ID on UMAP")
463 | p4 <- dittoDimPlot(spe, var = "ROI", reduction.use = "TSNE", size = 0.2) +
464 | scale_color_manual(values = metadata(spe)$color_vectors$ROI) +
465 | ggtitle("ROI ID on TSNE")
466 |
467 | # visualize indication
468 | p5 <- dittoDimPlot(spe, var = "indication", reduction.use = "UMAP", size = 0.2) +
469 | scale_color_manual(values = metadata(spe)$color_vectors$indication) +
470 | ggtitle("Indication on UMAP")
471 | p6 <- dittoDimPlot(spe, var = "indication", reduction.use = "TSNE", size = 0.2) +
472 | scale_color_manual(values = metadata(spe)$color_vectors$indication) +
473 | ggtitle("Indication on TSNE")
474 |
475 | (p1 + p2) / (p3 + p4) / (p5 + p6)
476 | ```
477 |
478 | ```{r, visualizing-dimred-2, message=FALSE}
479 | # visualize marker expression
480 | p1 <- dittoDimPlot(spe, var = "Ecad", reduction.use = "UMAP",
481 | assay = "exprs", size = 0.2) +
482 | scale_color_viridis(name = "Ecad") +
483 | ggtitle("E-Cadherin expression on UMAP")
484 | p2 <- dittoDimPlot(spe, var = "CD45RO", reduction.use = "UMAP",
485 | assay = "exprs", size = 0.2) +
486 | scale_color_viridis(name = "CD45RO") +
487 | ggtitle("CD45RO expression on UMAP")
488 | p3 <- dittoDimPlot(spe, var = "Ecad", reduction.use = "TSNE",
489 | assay = "exprs", size = 0.2) +
490 | scale_color_viridis(name = "Ecad") +
491 | ggtitle("Ecad expression on TSNE")
492 | p4 <- dittoDimPlot(spe, var = "CD45RO", reduction.use = "TSNE",
493 | assay = "exprs", size = 0.2) +
494 | scale_color_viridis(name = "CD45RO") +
495 | ggtitle("CD45RO expression on TSNE")
496 |
497 | (p1 + p2) / (p3 + p4)
498 | ```
499 |
500 | We observe a strong separation of tumor cells (Ecad+ cells) between the
501 | patients. Here, each patient was diagnosed with a different tumor type. The
502 | separation of tumor cells could be of biological origin since tumor cells tend
503 | to display differences in expression between patients and cancer types and/or of
504 | technical origin: the panel only contains a single tumor marker (E-Cadherin) and
505 | therefore slight technical differences in staining causes visible separation
506 | between cells of different patients. Nevertheless, the immune compartment
507 | (CD45RO+ cells) mix between patients and we can rule out systematic staining
508 | differences between patients.
509 |
510 | ## Save objects
511 |
512 | The modified `SpatialExperiment` object is saved for further downstream analysis.
513 |
514 | ```{r save-objects-quality-control}
515 | saveRDS(spe, "data/spe.rds")
516 | ```
517 |
518 | ```{r testing, include=FALSE}
519 | library(testthat)
520 |
521 | expect_equal(reducedDimNames(spe), c("UMAP", "TSNE"))
522 |
523 | expect_equal(head(reducedDim(spe, "UMAP"), n = 10),
524 | structure(c(-4.81016665957092, -4.39734727404236, -4.36988336107849,
525 | -4.08161431810974, -6.23401195070862, -5.66659671328186, -4.13260585329651,
526 | -0.930108251787412, -6.33803874514221, -5.40764981768249, -3.77736220987329,
527 | -3.45603595407495, -3.44556103380213, -3.16211901338587, -2.43397555978784,
528 | -3.42805753381739, -3.22162519128809, 4.09678735105505, -2.20264754922876,
529 | -3.72411928804407), dim = c(10L, 2L), dimnames = list(c("Patient1_001_1",
530 | "Patient1_001_2", "Patient1_001_3", "Patient1_001_4", "Patient1_001_5",
531 | "Patient1_001_6", "Patient1_001_7", "Patient1_001_8", "Patient1_001_9",
532 | "Patient1_001_10"), c("UMAP1", "UMAP2"))), tolerance = 0.01)
533 |
534 | expect_equal(reducedDim(spe, "UMAP")[100:130,],
535 | structure(c(-3.89626533053039, -7.13317567370056, -6.77943021319031,
536 | -7.11419230959533, -2.78164083025574, -3.94929200670837, -5.95046884081482,
537 | 0.763116416715395, -5.68849593660949, -6.22845536730407, -6.58062154314636,
538 | -5.80118590853332, -6.25312644503235, -5.86530810854553, -7.08645230791687,
539 | -4.12036305925964, -5.97095376513122, -4.08220035097717, -5.91776162645935,
540 | 0.557355967544329, -7.09781867525696, -5.62668353579162, -5.04605323336242,
541 | -4.7885444786322, -7.22946149370788, -5.26700240633606, -4.82962876818298,
542 | -4.25380879900573, 1.08371841647507, 1.44114249684693, -4.87143928072571,
543 | -3.41620216997156, -3.93748961122522, -2.58227525384912, -4.26784573228846,
544 | -5.4897724214364, -3.4107941213418, -3.86142430933008, 3.20950664846411,
545 | -3.71332822473535, -3.86072955759058, -2.47767458589563, -3.84708223970423,
546 | -4.17958079011927, -3.9504874769021, -2.7868140760232, -3.12866697938928,
547 | -2.78958783777246, -2.86517844827661, -3.41490874917993, 1.00301005212774,
548 | -3.6809254709054, -3.49266205461511, -2.95477948816309, -2.56105003984461,
549 | -3.1081076684762, -3.18702707918177, -3.78098068864832, -3.24055348070154,
550 | 0.368578153533841, 0.224757569713498, -2.99749766023645), dim = c(31L,
551 | 2L), dimnames = list(c("Patient1_001_100", "Patient1_001_101",
552 | "Patient1_001_102", "Patient1_001_103", "Patient1_001_104", "Patient1_001_105",
553 | "Patient1_001_106", "Patient1_001_107", "Patient1_001_108", "Patient1_001_109",
554 | "Patient1_001_110", "Patient1_001_111", "Patient1_001_112", "Patient1_001_113",
555 | "Patient1_001_114", "Patient1_001_115", "Patient1_001_116", "Patient1_001_117",
556 | "Patient1_001_118", "Patient1_001_119", "Patient1_001_120", "Patient1_001_121",
557 | "Patient1_001_122", "Patient1_001_123", "Patient1_001_124", "Patient1_001_125",
558 | "Patient1_001_126", "Patient1_001_127", "Patient1_001_128", "Patient1_001_129",
559 | "Patient1_001_130"), c("UMAP1", "UMAP2"))), tolerance = 0.01)
560 |
561 | expect_equal(head(reducedDim(spe, "TSNE"), n = 10),
562 | structure(c(8.5000231819133, 8.69417707607171, 8.66506409812202,
563 | 8.70346540608834, -4.85956000801397, -3.50347074831182, 8.62888756799549,
564 | -8.0907749992851, 3.45775862206781, 7.54108785460927, -30.1566664235465,
565 | -28.3558044334759, -28.2668374978953, -26.5385662781522, -21.5856934742621,
566 | -26.4730337308963, -26.8568530864791, 19.6159281655837, 20.5991986552636,
567 | -32.2252709733315), dim = c(10L, 2L), dimnames = list(c("Patient1_001_1",
568 | "Patient1_001_2", "Patient1_001_3", "Patient1_001_4", "Patient1_001_5",
569 | "Patient1_001_6", "Patient1_001_7", "Patient1_001_8", "Patient1_001_9",
570 | "Patient1_001_10"), c("TSNE1", "TSNE2"))), tolerance = 0.01)
571 |
572 | expect_equal(reducedDim(spe, "TSNE")[100:130,],
573 | structure(c(10.6418413622177, -8.19603725541398, -7.57330997290384,
574 | -11.9649205211375, 21.7044793746905, 10.5299308898414, -2.89361532839768,
575 | -14.1479057033804, 7.04881694519824, -2.27845635304146, -6.55119000983192,
576 | -1.82422115143277, -2.4097229038924, -1.64709145874245, -9.64650462467637,
577 | 8.35520499244715, -2.63232041735159, -6.92707082054738, -2.87217958426136,
578 | -8.06273786914118, -12.041109790411, 7.21278447587393, 2.78699525470502,
579 | 2.34486289464684, -9.89574054981405, 2.1828274368045, 8.49912719972949,
580 | 8.11668313724476, -5.71319426232428, -0.190728643680821, 3.48920576978465,
581 | -25.9423147604023, -32.0648373504634, -24.1480435193885, -33.8780988685417,
582 | -23.1903399682141, -26.1356569942554, -33.8181999524557, 0.0329466224873345,
583 | -32.6325983943606, -32.6301008178222, -24.1756860322692, -32.9821005945978,
584 | -35.8676859419689, -34.0911214749092, -25.5808689943855, -26.7586018145491,
585 | -25.6034715982434, -20.3596025910865, -28.0932829916676, -5.40775249366363,
586 | -31.3872828096166, -32.5651665675897, -27.111323151666, -24.101850440361,
587 | -26.9341083442237, -28.5420714838967, -30.3678523339134, -27.0214667414901,
588 | -5.02684547281321, -16.0298817965721, -26.769553922745), dim = c(31L,
589 | 2L), dimnames = list(c("Patient1_001_100", "Patient1_001_101",
590 | "Patient1_001_102", "Patient1_001_103", "Patient1_001_104", "Patient1_001_105",
591 | "Patient1_001_106", "Patient1_001_107", "Patient1_001_108", "Patient1_001_109",
592 | "Patient1_001_110", "Patient1_001_111", "Patient1_001_112", "Patient1_001_113",
593 | "Patient1_001_114", "Patient1_001_115", "Patient1_001_116", "Patient1_001_117",
594 | "Patient1_001_118", "Patient1_001_119", "Patient1_001_120", "Patient1_001_121",
595 | "Patient1_001_122", "Patient1_001_123", "Patient1_001_124", "Patient1_001_125",
596 | "Patient1_001_126", "Patient1_001_127", "Patient1_001_128", "Patient1_001_129",
597 | "Patient1_001_130"), c("TSNE1", "TSNE2"))), tolerance = 0.01)
598 | ```
599 |
600 | ## Session Info
601 |
602 |
603 | SessionInfo
604 |
605 | ```{r, echo = FALSE}
606 | sessionInfo()
607 | ```
608 |
--------------------------------------------------------------------------------
/09-singlecell_visualization.Rmd:
--------------------------------------------------------------------------------
1 | # Single cell visualization {#single-cell-visualization}
2 |
3 | The following section describes typical approaches for visualizing
4 | single-cell data.
5 |
6 | This chapter is divided into three parts. Section \@ref(cell-type-level)
7 | will highlight visualization approaches downstream of cell type
8 | classification from Section \@ref(classification). We will then focus on
9 | visualization methods that relate single-cell data to the sample level
10 | in Section \@ref(sample-level). Lastly, Section \@ref(rich-example) will
11 | provide a more customized example on how to integrate various
12 | single-cell and sample metadata into one heatmap using the
13 | [ComplexHeatmap](https://bioconductor.org/packages/release/bioc/html/ComplexHeatmap.html)
14 | package [@Gu2016].
15 |
16 | Visualization functions from popular R packages in single-cell research
17 | such as
18 | [scater](https://bioconductor.org/packages/release/bioc/html/scater.html),
19 | [DittoSeq](https://bioconductor.org/packages/release/bioc/html/dittoSeq.html)
20 | and
21 | [CATALYST](https://bioconductor.org/packages/release/bioc/html/CATALYST.html)
22 | will be utilized. We will recycle methods and functions that we have
23 | used in previous sections, while also introducing new ones.
24 |
25 | Please note that this chapter aims to provide an overview on **common**
26 | visualization options and should be seen as a stepping-stone. However,
27 | many more options exist and the user should customize the visualization
28 | according to the biological question at hand.
29 |
30 | ## Load data
31 |
32 | First, we will read in the previously generated `SpatialExperiment`
33 | object.
34 |
35 | ```{r read-data-scviz, message=FALSE}
36 | spe <- readRDS("data/spe.rds")
37 | ```
38 |
39 | For visualization purposes, we will define markers that were used for
40 | cell type classification and markers that can indicate a specific cell
41 | state (e.g., Ki67 for proliferating cells).
42 |
43 | ```{r define-markers, message=FALSE}
44 | # Define cell phenotype markers
45 | type_markers <- c("Ecad", "CD45RO", "CD20", "CD3", "FOXP3", "CD206", "MPO",
46 | "SMA", "CD8a", "CD4", "HLADR", "CD15", "CD38", "PDGFRb")
47 |
48 | # Define cell state markers
49 | state_markers <- c("CarbonicAnhydrase", "Ki67", "PD1", "GrzB", "PDL1",
50 | "ICOS", "TCF7", "VISTA")
51 |
52 | # Add to spe
53 | rowData(spe)$marker_class <- ifelse(rownames(spe) %in% type_markers, "type",
54 | ifelse(rownames(spe) %in% state_markers, "state",
55 | "other"))
56 | ```
57 |
58 | ## Cell-type level {#cell-type-level}
59 |
60 | In the first section of this chapter, the grouping-level for the
61 | visualization approaches will be the cell type classification from
62 | Section \@ref(classification). Other grouping levels (e.g., cluster
63 | assignments from Section \@ref(clustering)) are possible and the user
64 | should adjust depending on the chosen analysis workflow.
65 |
66 | ### Dimensionality reduction visualization
67 |
68 | As seen before, we can visualize single-cells in low-dimensional space.
69 | Often, non-linear methods for dimensionality reduction such as tSNE and
70 | UMAP are used. They aim to preserve the distances between each cell and its
71 | neighbors in the high-dimensional space.
72 |
73 | Interpreting these plots is not trivial, but local neighborhoods in the
74 | plot can suggest similarity in expression for given cells. See
75 | [Orchestrating Single-Cell Analysis with
76 | Bioconductor](https://bioconductor.org/books/release/OSCA/) for more
77 | details.
78 |
79 | Here, we will use `dittoDimPlot` from the
80 | [DittoSeq](https://bioconductor.org/packages/release/bioc/html/dittoSeq.html)
81 | package and `plotReducedDim` from the
82 | [scater](https://bioconductor.org/packages/release/bioc/html/scater.html) package
83 | to visualize the fastMNN-corrected UMAP colored by cell type and
84 | expression (using the asinh-transformed intensities), respectively.
85 |
86 | Both functions are highly flexible and return `ggplot` objects which can
87 | be further modified.
88 |
89 | ```{r cell type umap, fig.width=10, fig.height=5, message=FALSE}
90 | library(dittoSeq)
91 | library(scater)
92 | library(patchwork)
93 | library(cowplot)
94 | library(viridis)
95 |
96 | ## UMAP colored by cell type and expression - dittoDimPlot
97 | p1 <- dittoDimPlot(spe,
98 | var = "celltype",
99 | reduction.use = "UMAP_mnnCorrected",
100 | size = 0.2,
101 | do.label = TRUE) +
102 | scale_color_manual(values = metadata(spe)$color_vectors$celltype) +
103 | theme(legend.title = element_blank()) +
104 | ggtitle("Cell types on UMAP, integrated cells")
105 |
106 | p2 <- dittoDimPlot(spe,
107 | var = "Ecad",
108 | assay = "exprs",
109 | reduction.use = "UMAP_mnnCorrected",
110 | size = 0.2,
111 | colors = viridis(100),
112 | do.label = TRUE) +
113 | scale_color_viridis()
114 |
115 | p1 + p2
116 | ```
117 |
118 | The `plotReducedDim` function of the `scater` package provides an alternative
119 | way for visualizing cells in low dimensions. Here, we loop over all type
120 | markers, generate one plot per marker and plot the indivudual plots side-by-side.
121 |
122 | ```{r cell type umap 2, fig.width=10, fig.height=10, message=FALSE}
123 | # UMAP colored by expression for all markers - plotReducedDim
124 | plot_list <- lapply(rownames(spe)[rowData(spe)$marker_class == "type"], function(x){
125 | p <- plotReducedDim(spe,
126 | dimred = "UMAP_mnnCorrected",
127 | colour_by = x,
128 | by_exprs_values = "exprs",
129 | point_size = 0.2)
130 | return(p)
131 | })
132 |
133 | plot_grid(plotlist = plot_list)
134 | ```
135 |
136 | ### Heatmap visualization
137 |
138 | Next, it is often useful to visualize single-cell expression per cell
139 | type in form of a heatmap. For this, we will use the `dittoHeatmap`
140 | function from the
141 | [DittoSeq](https://bioconductor.org/packages/release/bioc/html/dittoSeq.html)
142 | package.
143 |
144 | We sub-sample the dataset to 4000 cells for ease of visualization and
145 | overlay the cancer type and patient ID from which the cells were
146 | extracted.
147 |
148 | ```{r celltype heatmap, fig.height = 7,fig.width = 7, message=FALSE}
149 | set.seed(220818)
150 | cur_cells <- sample(seq_len(ncol(spe)), 4000)
151 |
152 | # Heatmap visualization - DittoHeatmap
153 | dittoHeatmap(spe[,cur_cells],
154 | genes = rownames(spe)[rowData(spe)$marker_class == "type"],
155 | assay = "exprs",
156 | cluster_cols = FALSE,
157 | scale = "none",
158 | heatmap.colors = viridis(100),
159 | annot.by = c("celltype", "indication", "patient_id"),
160 | annotation_colors = list(indication = metadata(spe)$color_vectors$indication,
161 | patient_id = metadata(spe)$color_vectors$patient_id,
162 | celltype = metadata(spe)$color_vectors$celltype))
163 | ```
164 |
165 | Similarly, we can visualize the mean marker expression per cell type for all
166 | cells bu first calculating the mean marker expression per cell type using the
167 | `aggregateAcrossCells` function from the
168 | [scuttle](https://bioconductor.org/packages/release/bioc/html/scuttle.html)
169 | package and then use `dittoHeatmap`. We will annotate the heatmap with the
170 | number of cells per cell type and we will used different ways for feature
171 | scaling.
172 |
173 | ```{r celltype mean-expression-per-cluster, fig.height=5}
174 | library(scuttle)
175 |
176 | ## aggregate by cell type
177 | celltype_mean <- aggregateAcrossCells(as(spe, "SingleCellExperiment"),
178 | ids = spe$celltype,
179 | statistics = "mean",
180 | use.assay.type = "exprs",
181 | subset.row = rownames(spe)[rowData(spe)$marker_class == "type"])
182 |
183 | # No scaling
184 | dittoHeatmap(celltype_mean,
185 | assay = "exprs",
186 | cluster_cols = TRUE,
187 | scale = "none",
188 | heatmap.colors = viridis(100),
189 | annot.by = c("celltype", "ncells"),
190 | annotation_colors = list(celltype = metadata(spe)$color_vectors$celltype,
191 | ncells = plasma(100)))
192 |
193 | # Scaled to max
194 | dittoHeatmap(celltype_mean,
195 | assay = "exprs",
196 | cluster_cols = TRUE,
197 | scaled.to.max = TRUE,
198 | heatmap.colors.max.scaled = inferno(100),
199 | annot.by = c("celltype", "ncells"),
200 | annotation_colors = list(celltype = metadata(spe)$color_vectors$celltype,
201 | ncells = plasma(100)))
202 |
203 | # Z score scaled
204 | dittoHeatmap(celltype_mean,
205 | assay = "exprs",
206 | cluster_cols = TRUE,
207 | annot.by = c("celltype", "ncells"),
208 | annotation_colors = list(celltype = metadata(spe)$color_vectors$celltype,
209 | ncells = plasma(100)))
210 | ```
211 |
212 | As illustrated above for not-, max-, and Z score-scaled expression values,
213 | different ways of scaling can have strong effects on visualization
214 | output and we encourage the user to test multiple options.
215 |
216 | Overall, we can observe cell-type specific marker expression (e.g., Tumor
217 | = Ecad high and B cells = CD20 high) in agreement with the gating scheme
218 | of Section \@ref(classification).
219 |
220 | ### Violin plot visualization
221 |
222 | The `plotExpression` function from the
223 | [scater](https://bioconductor.org/packages/release/bioc/html/scater.html) package
224 | allows to plot the distribution of expression values across cell types
225 | for a chosen set of proteins. The output is a `ggplot` object which can be
226 | modified further.
227 |
228 | ```{r celltype violin, message=FALSE, fig.height=12}
229 | # Violin Plot - plotExpression
230 | plotExpression(spe[,cur_cells],
231 | features = rownames(spe)[rowData(spe)$marker_class == "type"],
232 | x = "celltype",
233 | exprs_values = "exprs",
234 | colour_by = "celltype") +
235 | theme(axis.text.x = element_text(angle = 90))+
236 | scale_color_manual(values = metadata(spe)$color_vectors$celltype)
237 | ```
238 |
239 | ### Scatter plot visualization
240 |
241 | Moreover, a protein expression based scatter plot can be generated with
242 | `dittoScatterPlot` (returns a `ggplot` object). We overlay the plot with
243 | the cell type information.
244 |
245 | ```{r celltype scatter, message=FALSE}
246 | # Scatter plot
247 | dittoScatterPlot(spe,
248 | x.var = "CD3",
249 | y.var="CD20",
250 | assay.x = "exprs",
251 | assay.y = "exprs",
252 | color.var = "celltype") +
253 | scale_color_manual(values = metadata(spe)$color_vectors$celltype) +
254 | ggtitle("Scatterplot for CD3/CD20 labelled by celltype")
255 | ```
256 |
257 | We can nicely observe how the "B next to T cell" phenotype (`BnTcell`)
258 | has high expression values for both CD20 and CD3.
259 |
260 | **Of note**, in a setting where the user aims to assign labels to
261 | clusters based on marker genes/proteins, all of the above plots can be
262 | particularly helpful.
263 |
264 | ### Barplot visualization
265 |
266 | In order to display frequencies of cell types per sample/patient, the
267 | `dittoBarPlot` function will be used. Data can be represented as
268 | percentages or counts and again `ggplot` objects are outputted.
269 |
270 | ```{r barplot celltype, message=FALSE}
271 | # by sample_id - percentage
272 | dittoBarPlot(spe,
273 | var = "celltype",
274 | group.by = "sample_id") +
275 | scale_fill_manual(values = metadata(spe)$color_vectors$celltype)
276 |
277 | # by patient_id - percentage
278 | dittoBarPlot(spe,
279 | var = "celltype",
280 | group.by = "patient_id") +
281 | scale_fill_manual(values = metadata(spe)$color_vectors$celltype)
282 |
283 | # by patient_id - count
284 | dittoBarPlot(spe,
285 | scale = "count",
286 | var = "celltype",
287 | group.by = "patient_id") +
288 | scale_fill_manual(values = metadata(spe)$color_vectors$celltype)
289 | ```
290 |
291 | We can see that cell type frequencies change between samples/patients
292 | and that the highest proportion/counts of plasma cells and stromal
293 | cells can be observed for Patient 2 and Patient 4, respectively.
294 |
295 | ### CATALYST-based visualization
296 |
297 | In the following, we highlight some useful visualization
298 | functions from the
299 | [CATALYST](https://bioconductor.org/packages/release/bioc/html/CATALYST.html)
300 | package.
301 |
302 | To this end, we will first convert the `SpatialExperiment` object into a
303 | CATALYST-compatible format.
304 |
305 | ```{r celltype CATALYST}
306 | library(CATALYST)
307 |
308 | # Save SPE in CATALYST-compatible object with renamed colData entries and
309 | # new metadata information
310 | spe_cat <- spe
311 |
312 | spe_cat$sample_id <- factor(spe$sample_id)
313 | spe_cat$condition <- factor(spe$indication)
314 | spe_cat$cluster_id <- factor(spe$celltype)
315 |
316 | # Add celltype information to metadata
317 | metadata(spe_cat)$cluster_codes <- data.frame(celltype = factor(spe_cat$celltype))
318 | ```
319 |
320 | All of the `CATALYST` functions presented below return `ggplot` objects,
321 | which allow flexible downstream adjustment.
322 |
323 | #### Pseudobulk-level MDS plot
324 |
325 | Pseudobulk-level multi-dimensional scaling (MDS) plots can be rendered
326 | with the exported `pbMDS` function.
327 |
328 | Here, we will use `pbMDS` to highlight expression similarities between
329 | cell types and subsequently for each celltype-sample-combination.
330 |
331 | ```{r celltype pbmds, message=FALSE}
332 | # MDS pseudobulk by cell type
333 | pbMDS(spe_cat,
334 | by = "cluster_id",
335 | features = rownames(spe_cat)[rowData(spe_cat)$marker_class == "type"],
336 | label_by = "cluster_id",
337 | k = "celltype") +
338 | scale_color_manual(values = metadata(spe_cat)$color_vectors$celltype)
339 |
340 | # MDS pseudobulk by cell type and sample_id
341 | pbMDS(spe_cat,
342 | by = "both",
343 | features = rownames(spe_cat)[rowData(spe_cat)$marker_class == "type"],
344 | k = "celltype",
345 | shape_by = "condition",
346 | size_by = TRUE) +
347 | scale_color_manual(values = metadata(spe_cat)$color_vectors$celltype)
348 | ```
349 |
350 | We can see that the pseudobulk-expression profile of neutrophils seems
351 | markedly distinct from the other cell types, while comparable cell types
352 | such as the T cell subtypes group together. Furthermore, pseudobulk
353 | cell-type profiles from SCCHN appear different from the other
354 | indications.
355 |
356 | #### Reduced dimension plot on CLR of proportions
357 |
358 | The `clrDR` function produces dimensionality reduction plots on centered
359 | log-ratios (CLR) of sample/cell type proportions across cell
360 | type/samples.
361 |
362 | As with `pbMDS`, the output plots aim to illustrate the degree of
363 | similarity between cell types based on sample proportions.
364 |
365 | ```{r celltype - clrDR, message=FALSE}
366 | # CLR on cluster proportions across samples
367 | clrDR(spe_cat,
368 | dr = "PCA",
369 | by = "cluster_id",
370 | k = "celltype",
371 | label_by = "cluster_id",
372 | arrow_col = "sample_id",
373 | point_pal = metadata(spe_cat)$color_vectors$celltype)
374 | ```
375 |
376 | We can again observe that neutrophils have a divergent profile also in
377 | terms of their sample proportions.
378 |
379 | #### Pseudobulk expression boxplot
380 |
381 | The `plotPbExprs` generates combined box- and jitter-plots of aggregated marker
382 | expression per cell type and sample (image). Here, we further split the data by
383 | cancer type.
384 |
385 | ```{r celltype pbExprs, fig.width=7, fig.height=12, message=FALSE}
386 | plotPbExprs(spe_cat,
387 | k = "celltype",
388 | facet_by = "cluster_id",
389 | ncol = 2,
390 | features = rownames(spe_cat)[rowData(spe_cat)$marker_class == "type"]) +
391 | scale_color_manual(values = metadata(spe_cat)$color_vectors$indication)
392 | ```
393 |
394 | Notably, CD15 levels are elevated in SCCHN in comparison to all other
395 | indications for most cell types.
396 |
397 | ## Sample-level {#sample-level}
398 |
399 | In the next section, we will shift the grouping-level focus from the
400 | cell type to the sample-level. Sample-levels will be further divided
401 | into the sample-(image) and patient-level.
402 |
403 | Although we will mostly repeat the functions from the previous section
404 | \@ref(cell-type-level), sample- and patient-level centered visualization
405 | can provide additional quality control and biological interpretation.
406 |
407 | ### Dimensionality reduction visualization
408 |
409 | Visualization of low-dimensional embeddings, here comparing non-corrected and
410 | fastMNN-corrected UMAPs, and coloring it by sample-levels is often used
411 | for "batch effect" assessment as mentioned in Section
412 | \@ref(cell-quality).
413 |
414 | We will again use `dittoDimPlot`.
415 |
416 | ```{r sample umap, fig.width=8, fig.height=8, message = FALSE}
417 | ## UMAP colored by cell type and expression - dittoDimPlot
418 | p1 <- dittoDimPlot(spe,
419 | var = "sample_id",
420 | reduction.use = "UMAP",
421 | size = 0.2,
422 | colors = viridis(100),
423 | do.label = FALSE) +
424 | scale_color_manual(values = metadata(spe)$color_vectors$sample_id) +
425 | theme(legend.title = element_blank()) +
426 | ggtitle("Sample ID")
427 |
428 | p2 <- dittoDimPlot(spe,
429 | var = "sample_id",
430 | reduction.use = "UMAP_mnnCorrected",
431 | size = 0.2,
432 | colors = viridis(100),
433 | do.label = FALSE) +
434 | scale_color_manual(values = metadata(spe)$color_vectors$sample_id) +
435 | theme(legend.title = element_blank()) +
436 | ggtitle("Sample ID")
437 |
438 | p3 <- dittoDimPlot(spe,
439 | var = "patient_id",
440 | reduction.use = "UMAP",
441 | size = 0.2,
442 | do.label = FALSE) +
443 | scale_color_manual(values = metadata(spe)$color_vectors$patient_id) +
444 | theme(legend.title = element_blank()) +
445 | ggtitle("Patient ID")
446 |
447 | p4 <- dittoDimPlot(spe,
448 | var = "patient_id",
449 | reduction.use = "UMAP_mnnCorrected",
450 | size = 0.2,
451 | do.label = FALSE) +
452 | scale_color_manual(values = metadata(spe)$color_vectors$patient_id) +
453 | theme(legend.title = element_blank()) +
454 | ggtitle("Patient ID")
455 |
456 | (p1 + p2) / (p3 + p4)
457 | ```
458 |
459 | As illustrated in Section \@ref(batch-effects), we see that the fastMNN
460 | approach (right side of the plot) leads to mixing of cells across
461 | samples/patients and thus batch effect correction.
462 |
463 | ### Heatmap visualization
464 |
465 | It can be beneficial to use a heatmap to visualize single-cell
466 | expression per sample and patient. Such a plot, which we will create
467 | using `dittoHeatmap`, can highlight biological differences across
468 | samples/patients.
469 |
470 | ```{r sample heatmap, fig.height = 8,fig.width = 8}
471 | # Heatmap visualization - DittoHeatmap
472 | dittoHeatmap(spe[,cur_cells],
473 | genes = rownames(spe)[rowData(spe)$marker_class == "type"],
474 | assay = "exprs",
475 | order.by = c("patient_id","sample_id"),
476 | cluster_cols = FALSE,
477 | scale = "none",
478 | heatmap.colors = viridis(100),
479 | annot.by = c("celltype", "indication", "patient_id", "sample_id"),
480 | annotation_colors = list(celltype = metadata(spe)$color_vectors$celltype,
481 | indication = metadata(spe)$color_vectors$indication,
482 | patient_id = metadata(spe)$color_vectors$patient_id,
483 | sample_id = metadata(spe)$color_vectors$sample_id))
484 | ```
485 |
486 | As in Section \@ref(image-quality), aggregated mean marker expression
487 | per sample/patient allow identification of samples/patients with
488 | outlying expression patterns.
489 |
490 | Here, we will focus on the patient level and use `aggregateAcrossCells`
491 | and `dittoHeatmap`. The heatmap will be annotated with the number of
492 | cells per patient and cancer type and displayed using two scaling
493 | options.
494 |
495 | ```{r sample mean-expression-per-cluster, fig.height=5}
496 | # mean expression by patient_id
497 | patient_mean <- aggregateAcrossCells(as(spe, "SingleCellExperiment"),
498 | ids = spe$patient_id,
499 | statistics = "mean",
500 | use.assay.type = "exprs",
501 | subset.row = rownames(spe)[rowData(spe)$marker_class == "type"])
502 |
503 | # No scaling
504 | dittoHeatmap(patient_mean,
505 | assay = "exprs",
506 | cluster_cols = TRUE,
507 | scale = "none",
508 | heatmap.colors = viridis(100),
509 | annot.by = c("patient_id","indication","ncells"),
510 | annotation_colors = list(patient_id = metadata(spe)$color_vectors$patient_id,
511 | indication = metadata(spe)$color_vectors$indication,
512 | ncells = plasma(100)))
513 |
514 | # Max expression scaling
515 | dittoHeatmap(patient_mean,
516 | assay = "exprs",
517 | cluster_cols = TRUE,
518 | scaled.to.max = TRUE,
519 | heatmap.colors.max.scaled = inferno(100),
520 | annot.by = c("patient_id","indication","ncells"),
521 | annotation_colors = list(patient_id = metadata(spe)$color_vectors$patient_id,
522 | indication = metadata(spe)$color_vectors$indication,
523 | ncells = plasma(100)))
524 |
525 | ```
526 |
527 | As seen before, CD15 levels are elevated in Patient 4 (SCCHN), while SMA
528 | levels are highest for Patient 4 (CRC).
529 |
530 | ### Barplot visualization
531 |
532 | Complementary to displaying cell type frequencies per sample/patient, we
533 | can use `dittoBarPlot` to display sample/patient frequencies per cell
534 | type.
535 |
536 | ```{r barplot sample, message=FALSE}
537 | dittoBarPlot(spe,
538 | var = "patient_id",
539 | group.by = "celltype") +
540 | scale_fill_manual(values = metadata(spe)$color_vectors$patient_id)
541 |
542 | dittoBarPlot(spe,
543 | var = "sample_id",
544 | group.by = "celltype") +
545 | scale_fill_manual(values = metadata(spe)$color_vectors$sample_id)
546 | ```
547 |
548 | `Patient2` has the highest and lowest proportion of plasma cells and
549 | neutrophils, respectively.
550 |
551 | ### CATALYST-based visualization
552 |
553 | #### Pseudobulk-level MDS plot
554 |
555 | Expression-based pseudobulks for each sample can be compared with the
556 | `pbMDS` function.
557 |
558 | ```{r sample-pbmds}
559 | # MDS pseudobulk by sample_id
560 | pbMDS(spe_cat,
561 | by = "sample_id",
562 | color_by = "sample_id",
563 | features = rownames(spe_cat)[rowData(spe_cat)$marker_class == "type"]) +
564 | scale_color_manual(values = metadata(spe_cat)$color_vectors$sample_id)
565 | ```
566 |
567 | There are marked differences in pseudobulk-expression patterns between
568 | samples and across patients, which can be driven by biological
569 | differences and also technical aspects such as divergent region
570 | selection.
571 |
572 | #### Reduced dimension plot on CLR of proportions
573 |
574 | The `clrDR` function can also be used to analyze similarity of samples
575 | based on cell type proportions.
576 |
577 | ```{r sample-clrDR}
578 | # CLR on sample proportions across clusters
579 | clrDR(spe_cat,
580 | dr = "PCA",
581 | by = "sample_id",
582 | point_col = "sample_id",
583 | k = "celltype",
584 | point_pal = metadata(spe_cat)$color_vectors$sample_id) +
585 | scale_color_manual(values = metadata(spe_cat)$color_vectors$celltype)
586 | ```
587 |
588 | There are notable differences between samples based on their cell type
589 | proportions.
590 |
591 | Interestingly, `Patient3_001`, `Patient1_003`, `Patient4_007` and
592 | `Patient4_006` group together and the PC loadings indicate a strong
593 | contribution of BnT and B cells, which could propose formation of
594 | tertiary lymphoid structures (TLS). In section \@ref(spatial-viz), we
595 | will be able to confirm this hypothesis visually on the images.
596 |
597 | ## Further examples {#rich-example}
598 |
599 | In the last section of this chapter, we will use the popular
600 | [ComplexHeatmap](https://bioconductor.org/packages/release/bioc/html/ComplexHeatmap.html)
601 | package to create a visualization example that combines various
602 | cell-type- and sample-level information.
603 |
604 | [ComplexHeatmap](https://bioconductor.org/packages/release/bioc/html/ComplexHeatmap.html)
605 | is highly versatile and is originally inspired from the
606 | [pheatmap](https://cran.r-project.org/web/packages/pheatmap/index.html)
607 | package. Therefore, many arguments have the same/similar names.
608 |
609 | For more details, we would recommend to read the [reference
610 | book](https://jokergoo.github.io/ComplexHeatmap-reference/book/).
611 |
612 | ### Publication-ready ComplexHeatmap
613 |
614 | For this example, we will concatenate heatmaps and annotations
615 | horizontally into one rich heatmap list. The grouping-level for the
616 | visualization will again be the cell type information from Section
617 | \@ref(classification)
618 |
619 | Initially, we will create two separate `Heatmap` objects for cell type
620 | and state markers.
621 |
622 | Then, metadata information, including the cancer type proportion and
623 | number of cells/patients per cell type, will be extracted into
624 | `HeatmapAnnotation` objects.
625 |
626 | Notably, we will add spatial features per cell type, here the number of
627 | neighbors extracted from `colPair(spe)` and cell area, in another
628 | `HeatmapAnnotation` object.
629 |
630 | Ultimately, all objects are combined in a `HeatmapList` and visualized.
631 |
632 | ```{r complex-heatmap, warning = FALSE, message = FALSE, fig.width=9, fig.height=5}
633 | library(ComplexHeatmap)
634 | library(circlize)
635 | library(tidyverse)
636 | set.seed(22)
637 |
638 | ### 1. Heatmap bodies ###
639 |
640 | # Heatmap body color
641 | col_exprs <- colorRamp2(c(0,1,2,3,4),
642 | c("#440154FF","#3B518BFF","#20938CFF",
643 | "#6ACD5AFF","#FDE725FF"))
644 |
645 | # Create Heatmap objects
646 | # By cell type markers
647 | celltype_mean <- aggregateAcrossCells(as(spe, "SingleCellExperiment"),
648 | ids = spe$celltype,
649 | statistics = "mean",
650 | use.assay.type = "exprs",
651 | subset.row = rownames(spe)[rowData(spe)$marker_class == "type"])
652 |
653 | h_type <- Heatmap(t(assay(celltype_mean, "exprs")),
654 | column_title = "type_markers",
655 | col = col_exprs,
656 | name= "mean exprs",
657 | show_row_names = TRUE,
658 | show_column_names = TRUE)
659 |
660 | # By cell state markers
661 | cellstate_mean <- aggregateAcrossCells(as(spe, "SingleCellExperiment"),
662 | ids = spe$celltype,
663 | statistics = "mean",
664 | use.assay.type = "exprs",
665 | subset.row = rownames(spe)[rowData(spe)$marker_class == "state"])
666 |
667 | h_state <- Heatmap(t(assay(cellstate_mean, "exprs")),
668 | column_title = "state_markers",
669 | col = col_exprs,
670 | name= "mean exprs",
671 | show_row_names = TRUE,
672 | show_column_names = TRUE)
673 |
674 |
675 | ### 2. Heatmap annotation ###
676 |
677 | ### 2.1 Metadata features
678 |
679 | anno <- colData(celltype_mean) %>% as.data.frame %>% select(celltype, ncells)
680 |
681 | # Proportion of indication per celltype
682 | indication <- unclass(prop.table(table(spe$celltype, spe$indication), margin = 1))
683 |
684 | # Number of contributing patients per celltype
685 | cluster_PID <- colData(spe) %>%
686 | as.data.frame() %>%
687 | select(celltype, patient_id) %>%
688 | group_by(celltype) %>% table() %>%
689 | as.data.frame()
690 |
691 | n_PID <- cluster_PID %>%
692 | filter(Freq>0) %>%
693 | group_by(celltype) %>%
694 | count(name = "n_PID") %>%
695 | column_to_rownames("celltype")
696 |
697 | # Create HeatmapAnnotation objects
698 | ha_anno <- HeatmapAnnotation(celltype = anno$celltype,
699 | border = TRUE,
700 | gap = unit(1,"mm"),
701 | col = list(celltype = metadata(spe)$color_vectors$celltype),
702 | which = "row")
703 |
704 | ha_meta <- HeatmapAnnotation(n_cells = anno_barplot(anno$ncells, width = unit(10, "mm")),
705 | n_PID = anno_barplot(n_PID, width = unit(10, "mm")),
706 | indication = anno_barplot(indication,width = unit(10, "mm"),
707 | gp = gpar(fill = metadata(spe)$color_vectors$indication)),
708 | border = TRUE,
709 | annotation_name_rot = 90,
710 | gap = unit(1,"mm"),
711 | which = "row")
712 |
713 | ### 2.2 Spatial features
714 |
715 | # Add number of neighbors to spe object (saved in colPair)
716 | spe$n_neighbors <- countLnodeHits(colPair(spe, "neighborhood"))
717 |
718 | # Select spatial features and average over celltypes
719 | spatial <- colData(spe) %>%
720 | as.data.frame() %>%
721 | select(area, celltype, n_neighbors)
722 |
723 | spatial <- spatial %>%
724 | select(-celltype) %>%
725 | aggregate(by = list(celltype = spatial$celltype), FUN = mean) %>%
726 | column_to_rownames("celltype")
727 |
728 | # Create HeatmapAnnotation object
729 | ha_spatial <- HeatmapAnnotation(
730 | area = spatial$area,
731 | n_neighbors = spatial$n_neighbors,
732 | border = TRUE,
733 | gap = unit(1,"mm"),
734 | which = "row")
735 |
736 | ### 3. Plot rich heatmap ###
737 |
738 | # Create HeatmapList object
739 | h_list <- h_type +
740 | h_state +
741 | ha_anno +
742 | ha_spatial +
743 | ha_meta
744 |
745 | # Add customized legend for anno_barplot()
746 | lgd <- Legend(title = "indication",
747 | at = colnames(indication),
748 | legend_gp = gpar(fill = metadata(spe)$color_vectors$indication))
749 |
750 | # Plot
751 | draw(h_list,annotation_legend_list = list(lgd))
752 | ```
753 |
754 | This plot summarizes most of the information we have seen in this
755 | chapter previously. In addition, we can observe that tumor cells have
756 | the largest mean cell area, high number of neighbors and elevated Ki67
757 | expression. BnT cells have the highest number of neighbors on average,
758 | which is biological sound given their predominant location in highly
759 | immune infiltrated regions (such as TLS).
760 |
761 | ### Interactive visualization
762 |
763 | For interactive visualization of the single-cell data the
764 | [iSEE](https://www.bioconductor.org/packages/release/bioc/html/iSEE.html) shiny
765 | application can be used. For a comprehensive tutorial, please refer to the
766 | [iSEE vignette](https://www.bioconductor.org/packages/release/bioc/vignettes/iSEE/inst/doc/basic.html).
767 |
768 | ```{r iSEE, message=FALSE}
769 | if (interactive()) {
770 | library(iSEE)
771 |
772 | iSEE(spe)
773 | }
774 | ```
775 |
776 | ## Session Info
777 |
778 |
779 | SessionInfo
780 |
781 | ```{r, echo = FALSE}
782 | sessionInfo()
783 | ```
784 |
785 |
--------------------------------------------------------------------------------
/10-image_visualization.Rmd:
--------------------------------------------------------------------------------
1 | # Image visualization {#image-visualization}
2 |
3 | The following section describes how to visualize the abundance of
4 | biomolecules (e.g., protein or RNA) as well as cell-specific metadata on
5 | images. Section \@ref(pixel-visualization) focuses on visualizing
6 | pixel-level information including the generation of pseudo-color
7 | composite images. Section \@ref(mask-visualization) highlights the
8 | visualization of cell metadata (e.g., cell phenotype) as well as
9 | summarized pixel intensities on cell segmentation masks. Section
10 | \@ref(cytoviewer) showcases interactive pixel- and
11 | cell-level visualization with the
12 | [cytoviewer](https://bioconductor.org/packages/release/bioc/html/cytoviewer.html)
13 | R/Bioconductor package [@Meyer2024].
14 |
15 | The
16 | [cytomapper](https://www.bioconductor.org/packages/release/bioc/html/cytomapper.html)
17 | R/Bioconductor package was developed to support the handling and
18 | visualization of multiple multi-channel images and segmentation masks
19 | [@Eling2020]. The main data object for image handling is the
20 | [CytoImageList](https://www.bioconductor.org/packages/release/bioc/vignettes/cytomapper/inst/doc/cytomapper.html#5_The_CytoImageList_object)
21 | container which we used in Section \@ref(read-data) to store
22 | multi-channel images and segmentation masks.
23 |
24 | We will first read in the previously processed data and randomly select
25 | 3 images for visualization purposes.
26 |
27 | ```{r read-data-img-viz, message=FALSE}
28 | library(SpatialExperiment)
29 | library(cytomapper)
30 | spe <- readRDS("data/spe.rds")
31 | images <- readRDS("data/images.rds")
32 | masks <- readRDS("data/masks.rds")
33 |
34 | # Sample images
35 | set.seed(220517)
36 | cur_id <- sample(unique(spe$sample_id), 3)
37 | cur_images <- images[names(images) %in% cur_id]
38 | cur_masks <- masks[names(masks) %in% cur_id]
39 | ```
40 |
41 | ## Pixel visualization {#pixel-visualization}
42 |
43 | The following section gives examples for visualizing individual channels
44 | or multiple channels as pseudo-color composite images. For this the
45 | `cytomapper` package exports the `plotPixels` function which expects a
46 | `CytoImageList` object storing one or multiple multi-channel images. In
47 | the simplest use case, a single channel can be visualized as follows:
48 |
49 | ```{r single-channel}
50 | plotPixels(cur_images,
51 | colour_by = "Ecad",
52 | bcg = list(Ecad = c(0, 5, 1)))
53 | ```
54 |
55 | The plot above shows the tissue expression of the epithelial tumor
56 | marker E-cadherin on the 3 selected images. The `bcg` parameter (default
57 | `c(0, 1, 1)`) stands for "background", "contrast", "gamma" and controls
58 | these attributes of the image. This parameter takes a named list where
59 | each entry specifies these attributes per channel. The first value of
60 | the numeric vector will be added to the pixel intensities (background);
61 | pixel intensities will be multiplied by the second entry of the vector
62 | (contrast); pixel intensities will be exponentiated by the third entry
63 | of the vector (gamma). In most cases, it is sufficient to adjust the
64 | second (contrast) entry of the vector.
65 |
66 | The following example highlights the visualization of 6 markers (maximum
67 | allowed number of markers) at once per image. The markers indicate the
68 | spatial distribution of tumor cells (E-cadherin), T cells (CD3), B cells
69 | (CD20), CD8+ T cells (CD8a), plasma cells (CD38) and proliferating cells
70 | (Ki67).
71 |
72 | ```{r 6-channel}
73 | plotPixels(cur_images,
74 | colour_by = c("Ecad", "CD3", "CD20", "CD8a", "CD38", "Ki67"),
75 | bcg = list(Ecad = c(0, 5, 1),
76 | CD3 = c(0, 5, 1),
77 | CD20 = c(0, 5, 1),
78 | CD8a = c(0, 5, 1),
79 | CD38 = c(0, 8, 1),
80 | Ki67 = c(0, 5, 1)))
81 | ```
82 |
83 | ### Adjusting colors
84 |
85 | The default colors for visualization are chosen by the additive RGB
86 | (red, green, blue) color model. For six markers the default colors are:
87 | red, green, blue, cyan (green + blue), magenta (red + blue), yellow
88 | (green + red). These colors are the easiest to distinguish by eye.
89 | However, you can select other colors for each channel by setting the
90 | `colour` parameter:
91 |
92 | ```{r setting-colors}
93 | plotPixels(cur_images,
94 | colour_by = c("Ecad", "CD3", "CD20"),
95 | bcg = list(Ecad = c(0, 5, 1),
96 | CD3 = c(0, 5, 1),
97 | CD20 = c(0, 5, 1)),
98 | colour = list(Ecad = c("black", "burlywood1"),
99 | CD3 = c("black", "cyan2"),
100 | CD20 = c("black", "firebrick1")))
101 | ```
102 |
103 | The `colour` parameter takes a named list in which each entry specifies
104 | the colors from which a color gradient is constructed via
105 | `colorRampPalette`. These are usually vectors of length 2 in which the
106 | first entry is `"black"` and the second entry specifies the color of
107 | choice. Although not recommended, you can also specify more than two
108 | colors to generate a more complex color gradient.
109 |
110 | ### Image normalization
111 |
112 | As an alternative to setting the `bcg` parameter, images can first be
113 | normalized. Normalization here means to scale the pixel intensities per
114 | channel between 0 and 1 (or a range specified by the `ft` parameter in
115 | the `normalize` function). By default, the `normalize` function scales
116 | pixel intensities across **all** images contained in the `CytoImageList`
117 | object (`separateImages = FALSE`). Each individual channel is scaled
118 | independently (`separateChannels = TRUE`).
119 |
120 | After 0-1 normalization, maximum pixel intensities can be clipped to
121 | enhance the contrast of the image (setting the `inputRange` parameter).
122 | In the following example, the clipping to 0 and 0.2 is the same as
123 | multiplying the pixel intensities by a factor of 5.
124 |
125 | ```{r default-normalization}
126 | # 0 - 1 channel scaling across all images
127 | norm_images <- cytomapper::normalize(cur_images)
128 |
129 | # Clip channel at 0.2
130 | norm_images <- cytomapper::normalize(norm_images, inputRange = c(0, 0.2))
131 |
132 | plotPixels(norm_images,
133 | colour_by = c("Ecad", "CD3", "CD20", "CD8a", "CD38", "Ki67"))
134 | ```
135 |
136 | The default setting of scaling pixel intensities across all images
137 | ensures comparable intensity levels across images. Pixel intensities can
138 | also be scaled **per image** therefore correcting for
139 | staining/expression differences between images:
140 |
141 | ```{r individual-normalization}
142 | # 0 - 1 channel scaling per image
143 | norm_images <- cytomapper::normalize(cur_images, separateImages = TRUE)
144 |
145 | # Clip channel at 0.2
146 | norm_images <- cytomapper::normalize(norm_images, inputRange = c(0, 0.2))
147 |
148 | plotPixels(norm_images,
149 | colour_by = c("Ecad", "CD3", "CD20", "CD8a", "CD38", "Ki67"))
150 | ```
151 |
152 | As we can see, the marker Ki67 appears brighter on image 2 and 3 in
153 | comparison to scaling the channel across all images.
154 |
155 | Finally, the `normalize` function also accepts a named list input for
156 | the `inputRange` argument. In this list, the clipping range per channel
157 | can be set individually:
158 |
159 | ```{r setting-inputRange}
160 | # 0 - 1 channel scaling per image
161 | norm_images <- cytomapper::normalize(cur_images,
162 | separateImages = TRUE,
163 | inputRange = list(Ecad = c(0, 50),
164 | CD3 = c(0, 30),
165 | CD20 = c(0, 40),
166 | CD8a = c(0, 50),
167 | CD38 = c(0, 10),
168 | Ki67 = c(0, 70)))
169 |
170 | plotPixels(norm_images,
171 | colour_by = c("Ecad", "CD3", "CD20", "CD8a", "CD38", "Ki67"))
172 | ```
173 |
174 | ## Cell visualization {#mask-visualization}
175 |
176 | In the following section, we will show examples on how to visualize
177 | single cells either as segmentation masks or outlined on composite
178 | images. This type of visualization allows to observe the spatial
179 | distribution of cell phenotypes, the visual assessment of morphological
180 | features and quality control in terms of cell segmentation and
181 | phenotyping.
182 |
183 | ### Visualzing metadata
184 |
185 | The `cytomapper` package provides the `plotCells` function that accepts
186 | a `CytoImageList` object containing segmentation masks. These are
187 | defined as single channel images where sets of pixels with the same
188 | integer ID identify individual cells. This integer ID can be found as an
189 | entry in the `colData(spe)` slot and as pixel information in the
190 | segmentation masks. The entry in `colData(spe)` needs to be specified
191 | via the `cell_id` argument to the `plotCells` function. In that way,
192 | data contained in the `SpatialExperiment` object can be mapped to
193 | segmentation masks. For the current dataset, the cell IDs are stored in
194 | `colData(spe)$ObjectNumber`.
195 |
196 | As cell IDs are only unique within a single image, `plotCells` also
197 | requires the `img_id` argument. This argument specifies the
198 | `colData(spe)` as well as the `mcols(masks)` entry that stores the
199 | unique image name from which each cell was extracted. In the current
200 | dataset the unique image names are stored in `colData(spe)$sample_id`
201 | and `mcols(masks)$sample_id`.
202 |
203 | Providing these two entries that allow mapping between the
204 | `SpatialExperiment` object and segmentation masks, we can now color
205 | individual cells based on their cell type:
206 |
207 | ```{r celltype}
208 | plotCells(cur_masks,
209 | object = spe,
210 | cell_id = "ObjectNumber",
211 | img_id = "sample_id",
212 | colour_by = "celltype")
213 | ```
214 |
215 | For consistent visualization, the `plotCells` function takes a named
216 | list as `color` argument. The entry name must match the `colour_by`
217 | argument.
218 |
219 | ```{r setting-celltype-colors}
220 | plotCells(cur_masks,
221 | object = spe,
222 | cell_id = "ObjectNumber",
223 | img_id = "sample_id",
224 | colour_by = "celltype",
225 | colour = list(celltype = metadata(spe)$color_vectors$celltype))
226 | ```
227 |
228 | If only individual cell types should be visualized, the
229 | `SpatialExperiment` object can be subsetted (e.g., to only contain CD8+
230 | T cells). In the following example CD8+ T cells are colored in red and
231 | all other cells that are not contained in the dataset are colored in
232 | white (as set by the `missing_color` argument).
233 |
234 | ```{r selective-visualization}
235 | CD8 <- spe[,spe$celltype == "CD8"]
236 |
237 | plotCells(cur_masks,
238 | object = CD8,
239 | cell_id = "ObjectNumber",
240 | img_id = "sample_id",
241 | colour_by = "celltype",
242 | colour = list(celltype = c(CD8 = "red")),
243 | missing_colour = "white")
244 | ```
245 |
246 | In terms of visualizing metadata, any entry in the `colData(spe)` slot
247 | can be visualized. The `plotCells` function automatically detects if the
248 | entry is continuous or discrete. In this fashion, we can now visualize
249 | the area of each cell:
250 |
251 | ```{r area}
252 | plotCells(cur_masks,
253 | object = spe,
254 | cell_id = "ObjectNumber",
255 | img_id = "sample_id",
256 | colour_by = "area")
257 | ```
258 |
259 | ### Visualizating expression
260 |
261 | Similar to visualizing single-cell metadata on segmentation masks, we
262 | can use the `plotCells` function to visualize the aggregated pixel
263 | intensities per cell. In the current dataset pixel intensities were
264 | aggregated by computing the mean pixel intensity per cell and per
265 | channel. The `plotCells` function accepts the `exprs_values` argument
266 | (default `counts`) that allows selecting the assay which stores the
267 | expression values that should be visualized.
268 |
269 | In the following example, we visualize the asinh-transformed mean pixel
270 | intensities of the epithelial marker E-cadherin on segmentation masks.
271 |
272 | ```{r Ecad-expression}
273 | plotCells(cur_masks,
274 | object = spe,
275 | cell_id = "ObjectNumber",
276 | img_id = "sample_id",
277 | colour_by = "Ecad",
278 | exprs_values = "exprs")
279 | ```
280 |
281 | We will now visualize the maximum number of allowed markers as
282 | composites on the segmentation masks. As above the markers indicate the
283 | spatial distribution of tumor cells (E-cadherin), T cells (CD3), B cells
284 | (CD20), CD8+ T cells (CD8a), plasma cells (CD38) and proliferating cells
285 | (Ki67).
286 |
287 | ```{r 6-channel-expression}
288 | plotCells(cur_masks,
289 | object = spe,
290 | cell_id = "ObjectNumber",
291 | img_id = "sample_id",
292 | colour_by = c("Ecad", "CD3", "CD20", "CD8a", "CD38", "Ki67"),
293 | exprs_values = "exprs")
294 | ```
295 |
296 | While visualizing 6 markers on the pixel-level may still allow the
297 | distinction of different tissue structures, observing single-cell
298 | expression levels is difficult when visualizing many markers
299 | simultaneously due to often overlapping expression.
300 |
301 | Similarly to adjusting marker colors when visualizing pixel intensities,
302 | we can change the color gradients per marker by setting the `color`
303 | argument:
304 |
305 | ```{r setting-expression-colors}
306 | plotCells(cur_masks,
307 | object = spe,
308 | cell_id = "ObjectNumber",
309 | img_id = "sample_id",
310 | colour_by = c("Ecad", "CD3", "CD20"),
311 | exprs_values = "exprs",
312 | colour = list(Ecad = c("black", "burlywood1"),
313 | CD3 = c("black", "cyan2"),
314 | CD20 = c("black", "firebrick1")))
315 | ```
316 |
317 | ### Outlining cells on images {#outline-cells}
318 |
319 | The following section highlights the combined visualization of pixel-
320 | and cell-level information at once. For this, besides the
321 | `SpatialExperiment` object, the `plotPixels` function accepts two
322 | `CytoImageList` objects. One for the multi-channel images and one for
323 | the segmentation masks. By specifying the `outline_by` parameter, the
324 | outlines of cells can now be colored based on their metadata.
325 |
326 | The following example first generates a 3-channel composite images
327 | displaying the expression of E-cadherin, CD3 and CD20 before coloring
328 | the cells' outlines by their cell phenotype.
329 |
330 | ```{r outlining-all-cells}
331 | plotPixels(image = cur_images,
332 | mask = cur_masks,
333 | object = spe,
334 | cell_id = "ObjectNumber",
335 | img_id = "sample_id",
336 | colour_by = c("Ecad", "CD3", "CD20"),
337 | outline_by = "celltype",
338 | bcg = list(Ecad = c(0, 5, 1),
339 | CD3 = c(0, 5, 1),
340 | CD20 = c(0, 5, 1)),
341 | colour = list(celltype = metadata(spe)$color_vectors$celltype),
342 | thick = TRUE)
343 | ```
344 |
345 | Distinguishing individual cell phenotypes is nearly impossible in the
346 | images above.
347 |
348 | However, the `SpatialExperiment` object can be subsetted to only contain
349 | cells of a single or few phenotypes. This allows the selective
350 | visualization of cell outlines on composite images.
351 |
352 | Here, we select all CD8+ T cells from the dataset and outline them on a
353 | 2-channel composite image displaying the expression of CD3 and CD8a.
354 |
355 | ```{r outlining-CD8}
356 | CD8 <- spe[,spe$celltype == "CD8"]
357 |
358 | plotPixels(image = cur_images,
359 | mask = cur_masks,
360 | object = CD8,
361 | cell_id = "ObjectNumber", img_id = "sample_id",
362 | colour_by = c("CD3", "CD8a"),
363 | outline_by = "celltype",
364 | bcg = list(CD3 = c(0, 5, 1),
365 | CD8a = c(0, 5, 1)),
366 | colour = list(celltype = c("CD8" = "white")),
367 | thick = TRUE)
368 | ```
369 |
370 | This type of visualization allows the quality control of two things: 1.
371 | segmentation quality of individual cell types can be checked and 2. cell
372 | phenotyping accuracy can be visually assessed against expected marker
373 | expression.
374 |
375 | ## Adjusting plot annotations
376 |
377 | The `cytomapper` package provides a number of function arguments to
378 | adjust the visual appearance of figures that are shared between the
379 | `plotPixels` and `plotCells` function.
380 |
381 | For a full overview of the arguments please refer to `?plotting-param`.
382 |
383 | We use the following example to highlight how to adjust the scale bar,
384 | the image title, the legend appearance and the margin between images.
385 |
386 | ```{r adjusting-parameters}
387 | plotPixels(cur_images,
388 | colour_by = c("Ecad", "CD3", "CD20", "CD8a", "CD38", "Ki67"),
389 | bcg = list(Ecad = c(0, 5, 1),
390 | CD3 = c(0, 5, 1),
391 | CD20 = c(0, 5, 1),
392 | CD8a = c(0, 5, 1),
393 | CD38 = c(0, 8, 1),
394 | Ki67 = c(0, 5, 1)),
395 | scale_bar = list(length = 100,
396 | label = expression("100 " ~ mu * "m"),
397 | cex = 0.7,
398 | lwidth = 10,
399 | colour = "grey",
400 | position = "bottomleft",
401 | margin = c(5,5),
402 | frame = 3),
403 | image_title = list(text = mcols(cur_images)$indication,
404 | position = "topright",
405 | colour = "grey",
406 | margin = c(5,5),
407 | font = 2,
408 | cex = 2),
409 | legend = list(colour_by.title.cex = 0.7,
410 | margin = 10),
411 | margin = 40)
412 | ```
413 |
414 | ## Displaying individual images
415 |
416 | By default, all images are displayed on the same graphics device. This
417 | can be useful when saving all images at once (see next section) to zoom
418 | into the individual images instead of opening each image individually.
419 | However, when displaying images in a markdown document these are more
420 | accessible when visualized individually. For this, the `plotPixels` and
421 | `plotCells` function accepts the `display` parameter that when set to
422 | `"single"` displays each resulting image in its own graphics device:
423 |
424 | ```{r individual-images}
425 | plotCells(cur_masks,
426 | object = spe,
427 | cell_id = "ObjectNumber",
428 | img_id = "sample_id",
429 | colour_by = "celltype",
430 | colour = list(celltype = metadata(spe)$color_vectors$celltype),
431 | display = "single",
432 | legend = NULL)
433 | ```
434 |
435 | ## Saving and returning images
436 |
437 | The final section addresses how to save composite images and how to
438 | return them for integration with other plots.
439 |
440 | The `plotPixels` and `plotCells` functions accept the `save_plot`
441 | argument which takes a named list of the following entries: `filename`
442 | indicates the location and file type of the image saved to disk; `scale`
443 | adjusts the resolution of the saved image (this only needs to be
444 | adjusted for small images).
445 |
446 | ```{r saving-images}
447 | plotCells(cur_masks,
448 | object = spe,
449 | cell_id = "ObjectNumber",
450 | img_id = "sample_id",
451 | colour_by = "celltype",
452 | colour = list(celltype = metadata(spe)$color_vectors$celltype),
453 | save_plot = list(filename = "data/celltype_image.png"))
454 | ```
455 |
456 | The composite images (together with their annotation) can also be
457 | returned. In the following code chunk we save two example plots to
458 | variables (`out1` and `out2`).
459 |
460 | ```{r returning-images, results="hide", fig.show='hide'}
461 | out1 <- plotCells(cur_masks,
462 | object = spe,
463 | cell_id = "ObjectNumber",
464 | img_id = "sample_id",
465 | colour_by = "celltype",
466 | colour = list(celltype = metadata(spe)$color_vectors$celltype),
467 | return_plot = TRUE)
468 |
469 | out2 <- plotCells(cur_masks,
470 | object = spe,
471 | cell_id = "ObjectNumber",
472 | img_id = "sample_id",
473 | colour_by = c("Ecad", "CD3", "CD20"),
474 | exprs_values = "exprs",
475 | return_plot = TRUE)
476 | ```
477 |
478 | The composite images are stored in `out1$plot` and `out2$plot` and can
479 | be converted into a graph object recognized by the
480 | [cowplot](https://cran.r-project.org/web/packages/cowplot/vignettes/introduction.html)
481 | package.
482 |
483 | The final function call of the following chunk plots both object next to
484 | each other.
485 |
486 | ```{r side-by-side-plot, message=FALSE}
487 | library(cowplot)
488 | library(gridGraphics)
489 | p1 <- ggdraw(out1$plot, clip = "on")
490 | p2 <- ggdraw(out2$plot, clip = "on")
491 |
492 | plot_grid(p1, p2)
493 | ```
494 |
495 | ## Interactive image visualization {#cytoviewer}
496 |
497 | The
498 | [cytoviewer](https://bioconductor.org/packages/release/bioc/html/cytoviewer.html)
499 | R/Bioconductor package [@Meyer2024] extends the static visualization
500 | abilities from `cytomapper` via an interactive and user-friendly `shiny`
501 | application.
502 |
503 | It supports flexible generation of image composites, allows side-by-side
504 | visualization of single channels, and facilitates the spatial
505 | visualization of single-cell data in the form of segmentation masks.
506 | Rapid and publication-quality image downloads are also supported. For a
507 | full introduction to the package, please refer to
508 | [the vignette](https://bioconductor.org/packages/release/bioc/vignettes/cytoviewer/inst/doc/cytoviewer.html).
509 |
510 | ```{r cytoviewer-interactive, message = FALSE}
511 | library(cytoviewer)
512 |
513 | app <- cytoviewer(image = images,
514 | mask = masks,
515 | object = spe,
516 | cell_id = "ObjectNumber",
517 | img_id = "sample_id")
518 |
519 | if (interactive()) {
520 | shiny::runApp(app)
521 | }
522 | ```
523 |
524 | ## Session Info
525 |
526 |
527 |
528 | SessionInfo
529 |
530 | ```{r, echo = FALSE}
531 | sessionInfo()
532 | ```
533 |
534 |
535 |
--------------------------------------------------------------------------------
/12-references.Rmd:
--------------------------------------------------------------------------------
1 | `r if (knitr::is_html_output()) '
2 | # References {-}
3 | '`
4 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | **Version 1.0.0** [2023-06-30]
2 |
3 | - First stable release of the workflow
4 |
5 | **Version 1.0.1** [2023-10-19]
6 |
7 | - Added seed before `predict` call after training a classifier
8 |
9 | **Version 1.0.2** [2023-11-27]
10 |
11 | - Added developers documentation
12 | - Added more ways to visualize cell type composition per CN
13 |
14 | **Version 1.0.3** [2024-01-05]
15 |
16 | - Updated cytoviewer citation and corresponding text
--------------------------------------------------------------------------------
/DEVELOPMENT.md:
--------------------------------------------------------------------------------
1 | # Useful information when developing this book
2 |
3 | This document is to guide future developers to maintain and extend the IMC
4 | data analysis book.
5 |
6 | ## General setup
7 |
8 | * The IMC data analysis book is written in [bookdown](https://bookdown.org/).
9 | * Each section is stored in its own `.Rmd` file with `index.Rmd` building the landing page
10 | * References are stored in `book.bib`
11 | * At the end of each `.Rmd` file a number of unit tests are executed. These
12 | unit tests are always executed but their results are not shown in the book.
13 |
14 | ### Continous integration/continous deployment
15 |
16 | * CI/CD is executed based on the workflow [here](https://github.com/BodenmillerGroup/IMCDataAnalysis/blob/main/.github/workflows/build.yml).
17 | * On the first of each month based on the [Dockerfile](https://github.com/BodenmillerGroup/IMCDataAnalysis/blob/main/Dockerfile) a new Docker image is build. We are doing this so that the workflow is always tested against the newest software versions.
18 | * The Docker image is pushed to the Github Container Registry [here](https://github.com/BodenmillerGroup/IMCDataAnalysis/pkgs/container/imcdataanalysis).
19 | * The Docker image is date tagged and `latest` always refers to the newest build.
20 | * Once the Docker image is build, the IMC data analysis book is executed within the
21 | newest Docker image. This will also run all unit tests.
22 |
23 | **Of note:** Sometimes the calculation of the UMAP produces slightly different
24 | results. If that happens the workflow run can be re-executed by clicking the `Re-run jobs` button of the workflow run.
25 | This test could also be excluded on the long run.
26 |
27 | * When pushing to `main` (either directly or via a PR), the CI/CD workflow is
28 | executed.
29 | * If the Dockerfile changed (e.g., if you want to add a new package), a new Docker image is build and the workflow is executed within the new Docker image.
30 | * If the Dockerfile did not change, the workflow is executed within the most recent Docker image.
31 |
32 | ## Updating the book
33 |
34 | This section describes how to update the book. You want to do this to add new content
35 | but also to fix bugs or adjust unit tests.
36 |
37 | ### Work on the devel branch
38 |
39 | It is recommended to work on the `devel` branch of the Github repository to add
40 | new changes.
41 |
42 | ### Work within the newest Docker container
43 |
44 | It is also recommended to always work within a Docker container based on the newest
45 | Docker image available:
46 |
47 | 1. After installing [Docker](https://docs.docker.com/get-docker/) you can first pull the container via:
48 |
49 | ```
50 | docker pull ghcr.io/bodenmillergroup/imcdataanalysis:yyyy-mm-dd
51 | ```
52 |
53 | and then run the container:
54 |
55 | ```
56 | docker run -v /path/to/IMCDataAnalysis:/home/rstudio/IMCDataAnalysis \
57 | -e PASSWORD=bioc -p 8787:8787 \
58 | ghcr.io/bodenmillergroup/imcdataanalysis:yyyy-mm-dd
59 | ```
60 |
61 | 2. An RStudio server session can be accessed via a browser at `localhost:8787` using `Username: rstudio` and `Password: bioc`.
62 | 3. Navigate to `IMCDataAnalysis` and open the `IMCDataAnalysis.Rproj` file.
63 | 4. Code in the individual files can now be executed or the whole workflow can be build by entering `bookdown::render_book()`.
64 |
65 | ### Adding new packages
66 |
67 | If you need to add new packages to the workflow, make sure to add them to the
68 | [software requirements](https://bodenmillergroup.github.io/IMCDataAnalysis/prerequisites.html#software-requirements)
69 | section and to the Dockerfile.
70 |
71 | ### Opening a pull request
72 |
73 | Now you can change the content of the book.
74 | Once you have added all changes, push the changes to `devel` and open a pull request
75 | to `main`. Wait until all checks have passed and you can merge the PR.
76 |
77 | ### Add changes to CHANGELOG.md
78 |
79 | Please track the changes that you are making in the [CHANGELOG.md](CHANGELOG.md) file.
80 |
81 | ### Trigger a new release
82 |
83 | Once you have added the changes to the CHANGELOG, merged the pull request and
84 | the workflow has been executed on CI/CD, you can trigger a new release.
85 |
86 | * Go to [here](https://github.com/BodenmillerGroup/IMCDataAnalysis/releases) and click on `Draft a new release` at the top of the page.
87 | * Under `Choose a tag` create a new tag and give details on the release.
88 | * With each release the corresponding [Zenodo repository](https://zenodo.org/records/10209942) is updated.
89 |
90 | ## Updating the data
91 |
92 | For new `steinbock` releases and specifically if the Mesmer version changes, the
93 | example data should be updated. The example data are stored on Central NAS
94 | and are hosted on Zenodo.
95 |
96 | ### Re-analyse the example data
97 |
98 | * You can find the raw data on [zenodo](https://zenodo.org/records/7575859).
99 | * On Central NAS under projects/IMCWorkflow/zenodo create a new folder called `steinbock_0.x.y` where x denotes the new major version and y the new minor version.
100 | * Copy the `steinbock.sh` script from the folder of the previous version to to folder of the newest version.
101 | * Change the steinbock version number in the `steinbock.sh` script and execute it.
102 | * It should generate all relevant files and zip all folders.
103 |
104 | ### Upload data to zenodo
105 |
106 | * On [zenodo](https://zenodo.org/records/7624451), click on `New version` and replace all files with the newer version. No need to upload the raw data to zenodo as they are hosted in a different repository. Make sure to change the date and update the version number.
107 |
108 | ### Adjust the book
109 |
110 | * Work in the most recent Docker container and on the devel branch.
111 | * Manually go through each section, update the links in the [Prerequisites](https://bodenmillergroup.github.io/IMCDataAnalysis/prerequisites.html#download-data) section
112 | * Make sure to check and asjust the unit tests at the end of each file
113 | * Make sure that the text (e.g. clustering) still matches the results
114 |
115 | *Important:* as we are training a random forest classifier on manually gated cells, these gated cells won't match the newest version of the data if the Mesmer version changed. For this, we have the `code/transfer_labels.R` script that automatically re-gates cells in the new SPE object.
116 |
117 | * Go through all sections until `Cell phenotyping`
118 | * Based on the old `gated_cells` and the new SPE object, execute the `code/transfer_labels.R` script
119 | * Zip the new `gated_cells` and upload them to a new version on [zendod](https://zenodo.org/records/8095133)
120 | * Adjust the link to the new gated cells in the [Prerequisites](https://bodenmillergroup.github.io/IMCDataAnalysis/prerequisites.html#download-data) section
121 | * Make sure that the new classification results closely match the new results
122 |
123 | * Continue going through the book
124 |
125 | ### Execute the book
126 |
127 | * When you are done working through the book, within the Docker container open the RProject file and execute `bookdown::render_book()` to make sure that it can be executed from beginning to end.
128 | * Under `data/CellTypeValidation` have a look at the PNGs to check if celltypes were correctly detected.
129 |
130 | ### Add changes to CHANGELOG.md
131 |
132 | Finally, add all the recent changes to the CHANGELOG, create and merge a PR and create a new release (see above).
133 |
134 |
135 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # Docker inheritance
2 | FROM rocker/rstudio:latest
3 |
4 | RUN apt-get -y update \
5 | && apt-get install -y --no-install-recommends apt-utils \
6 | && apt-get install -y --no-install-recommends zlib1g-dev libglpk-dev libmagick++-dev libfftw3-dev libxml2-dev libxt-dev curl libcairo2-dev libproj-dev libgdal-dev libudunits2-dev libarchive-dev \
7 | && apt-get clean \
8 | && rm -rf /var/lib/apt/ilists/*
9 |
10 | RUN R -e 'install.packages(c("rmarkdown", "markdown", "bookdown", "pheatmap", "viridis", "zoo", "BiocManager", "devtools", "testthat", "tiff", \
11 | "distill", "ggrepel", "patchwork", "mclust", "RColorBrewer", "uwot", "Rtsne", "harmony", \
12 | "Seurat", "SeuratObject", "cowplot", "kohonen", "caret", "randomForest", "ggridges", "cowplot", \
13 | "gridGraphics", "scales", "tiff", "harmony", "Matrix"))'
14 | RUN R -e 'BiocManager::install(c("CATALYST", "scuttle", "scater", "dittoSeq", "tidyverse", "BiocStyle", "batchelor", "bluster", \
15 | "scran", "lisaClust", "spicyR", "iSEE", "imcRtools", "cytomapper", "imcdatasets", "cytoviewer"))'
16 | RUN R -e 'devtools::install_github(c("i-cyto/Rphenograph"))'
17 |
18 |
--------------------------------------------------------------------------------
/IMCDataAnalysis.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 4
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | BuildType: Website
16 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 BodenmillerGroup
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://doi.org/10.5281/zenodo.6806448)
2 |
3 | # R based analysis workflow for multiplexed imaging data
4 |
5 |
6 | [](https://github.com/BodenmillerGroup/IMCDataAnalysis/actions/workflows/build.yml)
7 |
8 |
9 | R workflow highlighting analyses approaches for multiplexed imaging data.
10 |
11 | ## Scope
12 |
13 | This workflow explains the use of common R/Bioconductor packages to pre-process and analyse single-cell data obtained from segmented multichannel images.
14 | While we use imaging mass cytometry (IMC) data as an example, the concepts presented here can be applied to images obtained by other technologies (e.g. CODEX, MIBI, mIF, CyCIF, etc.).
15 | The workflow can be largely divided into the following parts:
16 |
17 | 1. Preprocessing (reading in the data, spillover correction)
18 | 2. Image- and cell-level quality control, low-dimensional visualization
19 | 3. Sample/batch effect correction
20 | 4. Cell phenotyping via clustering or classification
21 | 5. Single-cell visualization
22 | 6. Image visualization
23 | 7. Spatial analyses
24 |
25 | ## Update freeze
26 |
27 | This workflow has been actively developed until December 2023. At that time
28 | we used the most recent (`v.0.16.0`) version of `steinbock` to process the
29 | example data. If you are having issues when using newer versions of `steinbock`
30 | please open an issue [here](https://github.com/BodenmillerGroup/IMCDataAnalysis/issues).
31 |
32 | ## Usage
33 |
34 | To reproduce the analysis displayed at [https://bodenmillergroup.github.io/IMCDataAnalysis/](https://bodenmillergroup.github.io/IMCDataAnalysis/) clone the repository via:
35 |
36 | ```
37 | git clone https://github.com/BodenmillerGroup/IMCDataAnalysis.git
38 | ```
39 |
40 | For reproducibility purposes, we provide a Docker container [here](https://github.com/BodenmillerGroup/IMCDataAnalysis/pkgs/container/imcdataanalysis).
41 |
42 | 1. After installing [Docker](https://docs.docker.com/get-docker/) you can first pull the container via:
43 |
44 | ```
45 | docker pull ghcr.io/bodenmillergroup/imcdataanalysis:latest
46 | ```
47 |
48 | and then run the container:
49 |
50 | ```
51 | docker run -v /path/to/IMCDataAnalysis:/home/rstudio/IMCDataAnalysis \
52 | -e PASSWORD=bioc -p 8787:8787 \
53 | ghcr.io/bodenmillergroup/imcdataanalysis:latest
54 | ```
55 |
56 | **Of note: it is recommended to use a date-tagged version of the container to ensure reproducibility**.
57 | This can be done via:
58 |
59 | ```
60 | docker pull ghcr.io/bodenmillergroup/imcdataanalysis:
61 | ```
62 |
63 | 2. An RStudio server session can be accessed via a browser at `localhost:8787` using `Username: rstudio` and `Password: bioc`.
64 | 3. Navigate to `IMCDataAnalysis` and open the `IMCDataAnalysis.Rproj` file.
65 | 4. Code in the individual files can now be executed or the whole workflow can be build by entering `bookdown::render_book()`.
66 |
67 | ## Feedback
68 |
69 | We provide the workflow as an open-source resource. It does not mean that
70 | this workflow is tested on all possible datasets or biological questions and
71 | there exist multiple ways of analysing data. It is therefore recommended to
72 | check the results and question their biological interpretation.
73 |
74 | If you notice an issue or missing information, please report an issue
75 | [here](https://github.com/BodenmillerGroup/IMCDataAnalysis/issues). We also
76 | welcome contributions in form of pull requests or feature requests in form of
77 | issues. Have a look at the source code at:
78 |
79 | [https://github.com/BodenmillerGroup/IMCDataAnalysis](https://github.com/BodenmillerGroup/IMCDataAnalysis)
80 |
81 | ## Contributing guidelines
82 |
83 | For feature requests and bug reports, please raise an issue [here](https://github.com/BodenmillerGroup/IMCDataAnalysis/issues).
84 |
85 | For adding new content to the book please work inside the Docker container as explained above.
86 | You can fork the repository, add your changes and open a pull request.
87 | To add new libraries to the container please add them to the [Dockerfile](Dockerfile).
88 |
89 | ## Maintainer
90 |
91 | [Daniel Schulz](https://github.com/SchulzDan)
92 |
93 | ## Contributors
94 |
95 | [Nils Eling](https://github.com/nilseling)
96 | [Vito Zanotelli](https://github.com/votti)
97 | [Daniel Schulz](https://github.com/SchulzDan)
98 | [Jonas Windhager](https://github.com/jwindhager)
99 | [Michelle Daniel](https://github.com/michdaniel)
100 | [Lasse Meyer](https://github.com/lassedochreden)
101 |
102 | ## Citation
103 |
104 | Please cite the following paper when using the presented workflow in your research:
105 |
106 | > Windhager, J., Zanotelli, V.R.T., Schulz, D. et al. An end-to-end workflow for multiplexed image processing and analysis. Nat Protoc (2023). https://doi.org/10.1038/s41596-023-00881-0
107 |
108 | @article{Windhager2023,
109 | author = {Windhager, Jonas and Zanotelli, Vito R.T. and Schulz, Daniel and Meyer, Lasse and Daniel, Michelle and Bodenmiller, Bernd and Eling, Nils},
110 | title = {An end-to-end workflow for multiplexed image processing and analysis},
111 | year = {2023},
112 | doi = {10.1038/s41596-023-00881-0},
113 | URL = {https://www.nature.com/articles/s41596-023-00881-0},
114 | journal = {Nature Protocols}
115 | }
116 |
117 |
118 | ## Funding
119 |
120 | The work was funded by the European Union’s Horizon 2020 research and innovation program under Marie Sklodowska-Curie Actions grant agreement No 892225 (N.E) and by the CRUK IMAXT Grand Challenge (J.W.).
121 |
--------------------------------------------------------------------------------
/_bookdown.yml:
--------------------------------------------------------------------------------
1 | book_filename: "IMCDataAnalysis"
2 | delete_merged_file: true
3 | output_dir: "docs"
4 | new_session: yes
5 | language:
6 | ui:
7 | chapter_name: ""
8 | view: https://github.com/BodenmillerGroup/IMCDataAnalysis/blob/master/%s
--------------------------------------------------------------------------------
/_output.yml:
--------------------------------------------------------------------------------
1 | bookdown::gitbook:
2 | github-repo: "BodenmillerGroup/IMCDataAnalysis"
3 | css: style.css
4 | config:
5 | toc:
6 | collapse: section
7 | before: |
8 | Multiplexed imaging data analysis
9 | after: |
10 | Published with bookdown
11 | download: null
12 | sharing:
13 | facebook: no
14 | github: yes
15 | twitter: yes
16 | linkedin: yes
17 | weibo: no
18 | instapaper: no
19 | vk: no
20 | whatsapp: no
21 | bookdown::pdf_book:
22 | includes:
23 | in_header: preamble.tex
24 | latex_engine: xelatex
25 | citation_package: natbib
26 | keep_tex: yes
27 | bookdown::epub_book: default
28 |
--------------------------------------------------------------------------------
/book.bib:
--------------------------------------------------------------------------------
1 | @article{Chevrier2017,
2 | title = {Compensation of Signal Spillover in Suspension and Imaging Mass Cytometry},
3 | author = "Chevrier, Stéphane and Crowell, Helena L. and Zanotelli, Vito R.T. and Engler, Stefanie and Robinson, Mark D. and Bodenmiller, Bernd",
4 | journal = "Cell Systems",
5 | volume = 6,
6 | pages = "612--620",
7 | year = 2017
8 | }
9 |
10 | @article{Bendall2011,
11 | title = {Single-cell mass cytometry of differential immune and drug responses across a human hematopoietic continuum},
12 | author = "Bendall, Sean C. and Simonds, Erin F. and Qiu, Peng and Amir, El Ad D. and Krutzik, Peter O. and Finck, Rachel and Bruggner, Robert V. and Melamed, Rachel and Trejo, Angelica and Ornatsky, Olga I. and Balderas, Robert S. and Plevritis, Sylvia K. and Sachs, Karen and Pe'er, Dana and Tanner, Scott D. and Nolan, Garry P.",
13 | journal = "Science",
14 | volume = 332,
15 | pages = "687--696",
16 | year = 2011
17 | }
18 |
19 | @article{Giesen2014,
20 | Author = {Giesen, Charlotte and Wang, Hao A.O. and Schapiro, Denis and Zivanovic, Nevena and Jacobs, Andrea and Hattendorf, Bodo and Schüffler, Peter J. and Grolimund, Daniel and Buhmann, Joachim M. and Brandt, Simone and Varga, Zsuzsanna and Wild, Peter J. and Günther, Detlef and Bodenmiller, Bernd},
21 | Journal = {Nature {M}ethods},
22 | Number = {4},
23 | Pages = {417-422},
24 | Title = {Highly multiplexed imaging of tumor tissues with subcellular resolution by mass cytometry},
25 | Volume = {11},
26 | Year = {2014}
27 | }
28 |
29 |
30 | @article{Schulz2018,
31 | title = {Simultaneous Multiplexed Imaging of mRNA and Proteins with Subcellular Resolution in Breast Cancer Tissue Samples by Mass Cytometry.},
32 | author = "Schulz, Daniel and Zanotelli, Vito RT and Fischer, Rana R and Schapiro, Denis and Engler, Stefanie and Lun, Xiao-Kang and Jackson, Hartland W and Bodenmiller, Bernd",
33 | journal = "Cell Systems",
34 | volume = 6,
35 | pages = "25--36.e5",
36 | year = 2018
37 | }
38 |
39 | @article{Shapiro2017,
40 | title = {histoCAT: analysis of cell phenotypes and interactions in multiplex image cytometry data.},
41 | author = "Schapiro, Denis and Jackson, Hartland W and Raghuraman, Swetha and Fischer, Jana R and Zanotelli, Vito RT and Schulz, Daniel and Giesen, Charlotte and Catena, Raúl and Varga, Zsuzsanna and Bodenmiller, Bernd",
42 | journal = "Nature Methods",
43 | volume = 14,
44 | pages = "873–-876",
45 | year = 2017
46 | }
47 |
48 | @article{Angelo2014,
49 | Author = {Angelo, Michael and Bendall, Sean C. and Finck, Rachel and Hale, Matthew B. and Hitzman, Chuck and Borowsky, Alexander D. and Levenson, Richard M. and Lowe, John B. and Liu, Scot D. and Zhao, Shuchun and Natkunam, Yasodha and Nolan, Garry P.},
50 | Journal = {Nature Medicine},
51 | Number = {4},
52 | Pages = {436-442},
53 | Title = {Multiplexed ion beam imaging of human breast tumors},
54 | Volume = {20},
55 | Year = {2014}
56 | }
57 |
58 | @article{Lin2018,
59 | Author = {Lin, Jia-Ren and Izar, Benjamin and Wang, Shu and Yapp, Clarence and Mei, Shaolin and Shah, Parin M. and Santagata, Sandro and Sorger, Peter K.},
60 | Journal = {eLife},
61 | Pages = {1-46},
62 | Title = {Highly multiplexed immunofluorescence imaging of human tissues and tumors using t-CyCIF and conventional optical microscopes},
63 | Volume = {7},
64 | Year = {2018}
65 | }
66 |
67 | @article{Gut2018,
68 | Author = {Gut, Gabriele and Herrmann, Markus D and Pelkmans, Lucas},
69 | Journal = {Science},
70 | Pages = {1-13},
71 | Title = {Multiplexed protein maps link subcellular organization to cellular states},
72 | Volume = {361},
73 | Year = {2018}
74 | }
75 |
76 | @article{Bodenmiller2016,
77 | Author = {Bodenmiller, Bernd},
78 | Journal = {Cell Systems},
79 | Pages = {225-238},
80 | Title = {Multiplexed Epitope-Based Tissue Imaging for Discovery and Healthcare Applications},
81 | Volume = {2},
82 | Year = {2016}
83 | }
84 |
85 |
86 | @article{Chen2015,
87 | Author = {Chen, Kok Hao and Boettiger, Alistair N. and Moffitt, Jeffrey R. and Wang, Siyuan and Zhuang, Xiaowei},
88 | Journal = {Science},
89 | Pages = {aaa6090},
90 | Title = {Spatially resolved, highly multiplexed RNA profiling in single cells},
91 | Volume = {348},
92 | Year = {2015}
93 | }
94 |
95 | @article{Greenwald2021,
96 | year = {2021},
97 | volume = {40},
98 | pages = {555--565},
99 | author = {Noah F. Greenwald and Geneva Miller and Erick Moen and Alex Kong and Adam Kagel and Thomas Dougherty and Christine Camacho Fullaway and Brianna J. McIntosh and Ke Xuan Leow and Morgan Sarah Schwartz and Cole Pavelchek and Sunny Cui and Isabella Camplisson and Omer Bar-Tal and Jaiveer Singh and Mara Fong and Gautam Chaudhry and Zion Abraham and Jackson Moseley and Shiri Warshawsky and Erin Soon and Shirley Greenbaum and Tyler Risom and Travis Hollmann and Sean C. Bendall and Leeat Keren and William Graf and Michael Angelo and David Van Valen},
100 | title = {Whole-cell segmentation of tissue images with human-level performance using large-scale data annotation and deep learning},
101 | journal = {Nature Biotechnology}
102 | }
103 |
104 | @article{Lubeck2014,
105 | Author = {Lubeck, Eric and Coskun, Ahmet F and Zhiyentayev, Timur and Ahmad, Mubhij and Cai, Long},
106 | Journal = {Nature Methods},
107 | Pages = {360-361},
108 | Title = {Single-cell in situ RNA profiling by sequential hybridization},
109 | Volume = {11},
110 | Year = {2014}
111 | }
112 |
113 | @article{Goltsev2018,
114 | Author = {Goltsev, Yury and Samusik, Nikolay and Kennedy-Darling, Julia and Bhate, Salil and Hale, Matthew and Vazquez, Gustavo and Black, Sarah and Nolan, Garry P.},
115 | Journal = {Cell},
116 | Pages = {968--981},
117 | Title = {Deep Profiling of Mouse Splenic Architecture with CODEX Multiplexed Imaging},
118 | Volume = {174},
119 | Year = {2018}
120 | }
121 |
122 | @article{Saka2019,
123 | Author = {Saka, Sinem K. and Wang, Yu and Kishi, Jocelyn Y. and Zhu, Allen and Zeng, Yitian and Xie, Wenxin and Kirli, Koray and Yapp, Clarence and Cicconet, Marcelo and Beliveau, Brian J. and Lapan, Sylvain W. and Yin, Siyuan and Lin, Millicent and Boyden, Edward S. and Kaeser, Pascal S. and Pihan, German and Church, George M. and Yin, Peng},
124 | Journal = {Nature Biotechnology},
125 | Pages = {1080--1090},
126 | Title = {Immuno-SABER enables highly multiplexed and amplified protein imaging in tissues},
127 | Volume = {37},
128 | Year = {2019}
129 | }
130 |
131 | @article{Ijsselsteijn2019,
132 | year = {2019},
133 | volume = {10},
134 | author = {Ijsselsteijn, Marieke E. and van der Breggen, Ruud and Sarasqueta, Arantza F. and Koning, Frits and de Miranda, Noel F. C. C.},
135 | title = {A 40-Marker Panel for High Dimensional Characterization of Cancer Immune Microenvironments by Imaging Mass Cytometry},
136 | journal = {Frontiers in Immunology}
137 | }
138 |
139 | @article{Windhager2021,
140 | year = {2021},
141 | journal = {bioRxiv},
142 | author = {Windhager,Jonas and Bodenmiller, Bernd and Eling, Nils},
143 | title = {An end-to-end workflow for multiplexed image processing and analysis}
144 | }
145 |
146 | @article{Virshup2021,
147 | year = {2021},
148 | journal = {bioRxiv},
149 | author = {Isaac Virshup and Sergei Rybakov and Fabian J. Theis and Philipp Angerer and F. Alexander Wolf},
150 | title = {anndata: Annotated data}
151 | }
152 |
153 | @article{Shannon2003,
154 | year = {2003},
155 | volume = {13},
156 | pages = {2498--2504},
157 | author = {Shannon, Paul and Markiel, Andrew and Ozier, Owen and Baliga, Nitin S. and Wang, Jonathan T. and Ramage, Daniel and Amin, Nada and Schwikowski, Benno and Ideker, Trey},
158 | title = {Cytoscape: A Software Environment for Integrated Models of Biomolecular Interaction Networks},
159 | journal = {Genome Research}
160 | }
161 |
162 | @article{Amezquita2019,
163 | year = {2019},
164 | volume = {17},
165 | pages = {137--145},
166 | author = {Amezquita, Robert A. and Lun, Aaron T. L. and Becht, Etienne and Carey, Vince J. and Carpp, Lindsay N. and Geistlinger, Ludwig and Marini, Federico and Rue-Albrecht, Kevin and Risso, Davide and Soneson, Charlotte and Waldron, Levi and Pag{\`{e}}s, Herv{\'{e}} and Smith, Mike L. and Huber, Wolfgang and Morgan, Martin and Gottardo, Raphael and Hicks, Stephanie C.},
167 | title = {Orchestrating single-cell analysis with Bioconductor},
168 | journal = {Nature Methods}
169 | }
170 |
171 | @article{Righelli2022,
172 | year = {2022},
173 | volume = {38},
174 | pages = {3128--3131},
175 | author = {Dario Righelli and Lukas M Weber and Helena L Crowell and Brenda Pardo and Leonardo Collado-Torres and Shila Ghazanfar and Aaron T L Lun and Stephanie C Hicks and Davide Risso},
176 | title = {{SpatialExperiment}: infrastructure for spatially-resolved~transcriptomics data in R using Bioconductor},
177 | journal = {Bioinformatics}
178 | }
179 |
180 | @article{Haghverdi2018,
181 | year = {2018},
182 | volume = {36},
183 | pages = {421--427},
184 | author = {Haghverdi, Laleh and Lun, Aaron T. L. and Morgan, Michael D. and Marioni, John C.},
185 | title = {Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors},
186 | journal = {Nature Biotechnology}
187 | }
188 |
189 | @article{Korsunsky2019,
190 | year = {2019},
191 | volume = {16},
192 | pages = {1289--1296},
193 | author = {Korsunsky, Ilya and Millard, Nghia and Fan, Jean and Slowikowski, Kamil and Zhang, Fan and Wei, Kevin and Baglaenko, Yuriy and Brenner, Michael and Loh, Po-ru and Raychaudhuri, Soumya},
194 | title = {Fast, sensitive and accurate integration of single-cell data with Harmony},
195 | journal = {Nature Methods}
196 | }
197 |
198 | @article{Stuart2019,
199 | year = {2019},
200 | volume = {177},
201 | pages = {1888--1902},
202 | author = {Stuart, Tim and Butler, Andrew and Hoffman, Paul and Hafemeister, Christoph and Papalexi, Efthymia and Mauck, William M. III and Hao, Yuhan and Stoeckius, Marlon and Smibert, Peter and Satija, Rahul},
203 | title = {Comprehensive Integration of Single-Cell Data},
204 | journal = {Cell}
205 | }
206 |
207 | @article{Bai2021,
208 | year = {2021},
209 | volume = {12},
210 | author = {Bai, Yunhao and Zhu, Bokai and Rovira-Clave, Xavier and Chen, Han and Markovic, Maxim and Chan, Chi Ngai and Su, Tung-Hung and McIlwain, David R. and Estes, Jacob D. and Keren, Leeat and Nolan, Garry P. and Jiang, Sizun},
211 | title = {Adjacent Cell Marker Lateral Spillover Compensation and Reinforcement for Multiplexed Images},
212 | journal = {Frontiers in Immunology}
213 | }
214 |
215 | @article{Hoch2022,
216 | year = {2022},
217 | author = {Hoch, Tobias and Schulz, Daniel and Eling, Nils and Gómez, Julia Martínez and Levesque, Mitchell P. and Bodenmiller, Bernd},
218 | title = {Multiplexed imaging mass cytometry of the chemokine milieus in melanoma characterizes features of the response to immunotherapy},
219 | journal = {Science Immunology},
220 | volume = {7},
221 | number = {70},
222 | pages = {eabk1692},
223 | year = {2022},
224 | }
225 |
226 | @article{Weber2016,
227 | year = {2016},
228 | volume = {89A},
229 | pages = {1084--1096},
230 | author = {Weber, Lukas M. and Robinson, Mark D.},
231 | title = {Comparison of Clustering Methods for High-Dimensional Single-Cell Flow and Mass Cytometry Data},
232 | journal = {Cytometry Part A}
233 | }
234 |
235 | @article{Levine2015,
236 | year = {2015},
237 | volume = {162},
238 | pages = {184--197},
239 | author = {Levine, Jacob H. and Simonds, Erin F. and Bendall, Sean C. and Davis, Kara L. and Amir, El-ad D. and Tadmor, Michelle D. and Litvin, Oren and Fienberg, Harris G. and Jager, Astraea and Zunder, Eli R. and Finck, Rachel and Gedman, Amanda L. and Radtke, Ina and
240 | Downing, James R. and Pe’er, Dana and Nolan, Garry P.},
241 | title = {Data-Driven Phenotypic Dissection of AML Reveals Progenitor-like Cells that Correlate with Prognosis},
242 | journal = {Cell}
243 | }
244 |
245 | @article{Jackson2020,
246 | year = {2020},
247 | volume = {578},
248 | pages = {615--620},
249 | author = {Jackson, Hartland W. and Fischer, Jana R. and Zanotelli, Vito R. T. and Ali, H. Raza and Mechera, Robert and Soysal, Savas D. and Moch, Holger and Muenst, Simone and Varga, Zsuzsanna and Weber, Walter P. and Bodenmiller, Bernd},
250 | title = {The single-cell pathology landscape of breast cancer},
251 | journal = {Nature}
252 | }
253 |
254 | @article{Tietscher2022,
255 | year = {2022},
256 | author = {Tietscher, Sandra and Wagner, Johanna and Anzeneder, Tobias and
257 | Langwieder, Claus and Rees, Martin and Sobottka, Bettina and de Souza, Natalie
258 | and Bodenmiller, Bernd},
259 | title = {A comprehensive single-cell map of T cell exhaustion-associated immune environments in human breast cancer},
260 | journal = {Research Square}
261 | }
262 |
263 | @article{Yu2022,
264 | doi = {10.1186/s13059-022-02622-0},
265 | url = {https://doi.org/10.1186/s13059-022-02622-0},
266 | year = {2022},
267 | volume = {23},
268 | number = {1},
269 | author = {Yu, Lijia and Cao, Yue and Yang, Jean Y. H. and Yang, Pengyi},
270 | title = {Benchmarking clustering algorithms on estimating the number of cell types from single-cell {RNA}-sequencing data},
271 | journal = {Genome Biology}
272 | }
273 |
274 | @article{Eling2020,
275 | year = {2020},
276 | volume = {36},
277 | pages = {5706–-5708},
278 | number = {24},
279 | author = {Eling, Nils and Damond, Nicolas and Hoch, Tobias and Bodenmiller, Bernd},
280 | title = {cytomapper: an R/Bioconductor package for visualization of highly multiplexed imaging data},
281 | journal = {Bioinformatics}
282 | }
283 |
284 | @article{Schurch2020,
285 | year = {2020},
286 | volume = {182},
287 | pages = {1341--1359},
288 | author = {Schürch, Christian M and Bhate, Salil S and Barlow, Graham L and Phillips, Darci J and Noti, Luca and Zlobec, Inti and Chu, Pauline and Black, Sarah and Demeter, Janos and Mcilwain, David R and Kinoshita, Shigemi and Samusik, Nikolay and Goltsev, Yury and Nolan, Garry P},
289 | title = {Coordinated Cellular Neighborhoods Orchestrate Antitumoral Immunity at the Colorectal Cancer Invasive Front},
290 | journal = {Cell}
291 | }
292 |
293 | @article{Patrick2023,
294 | year = {2023},
295 | author = {Ellis Patrick and Nicolas P. Canete and Sourish S. Iyengar and Andrew N. Harman and Greg T. Sutherland and Pengyi Yang},
296 | title = {Spatial analysis for highly multiplexed imaging data to identify tissue microenvironments},
297 | journal = {Cytometry Part A}
298 | }
299 |
300 | @article{Bhate2022,
301 | author = {Salil S. Bhate and Graham L. Barlow and Christian M. Schürch and Garry P. Nolan},
302 | journal = {Cell Systems},
303 | number = {2},
304 | pages = {109-130},
305 | title = {Tissue schematics map the specialization of immune tissue motifs and their appropriation by tumors},
306 | volume = {13},
307 | year = {2022}
308 | }
309 |
310 | @article{Gu2016,
311 | author = {Zuguang Gu and Roland Eils and Matthias Schlesner},
312 | journal = {Bioinformatics},
313 | pages = {2847–2849},
314 | title = {Complex heatmaps reveal patterns and correlations in multidimensional genomic data},
315 | volume = {32},
316 | year = {2016}
317 | }
318 |
319 | @article{Ali2020,
320 | author = {Raza Ali and Hartland W. Jackson and Vito R. T. Zanotelli and Esther Danenberg and Jana R. Fischer and Helen Bardwell and Elena Provenzanoa and CRUK IMAXT Grand Challenge Team and Oscar M. Rueda and Suet-Feung Chin and Samuel Aparicio and Carlos Caldas and Bernd Bodenmiller},
321 | journal = {Nature Cancer},
322 | pages = {163-175},
323 | title = {Imaging mass cytometry and multiplatform genomics define the phenogenomic landscape of breast cancer},
324 | volume = {1},
325 | year = {2020}
326 | }
327 |
328 | @article{Jiang2022,
329 | author = {Sizun Jiang and Chi Ngai Chan and Xavier Rovira-Clavé and Han Chen and Yunhao Bai and Bokai Zhu and Erin McCaffrey and Noah F Greenwald and Candace Liu and Graham L Barlow and Jason L Weirather and John Paul Oliveria and Tsuguhisa Nakayama and Ivan T Lee and Matthias S Matter and Anne E Carlisle and Darci Philips and Gustavo Vazquez and Nilanjan Mukherjee and Kathleen Busman-Sahay and Michael Nekorchuk and Margaret Terry and Skyler Younger and Marc Bosse and Janos Demeter and Scott J Rodig and Alexandar Tzankov and Yury Goltsev and David Robert McIlwain and Michael Angelo and Jacob D Estes and Garry P Nolan},
330 | journal = {Immunity},
331 | pages = {1118-1134.e8},
332 | title = {Combined protein and nucleic acid imaging reveals virus-dependent B cell and macrophage immunosuppression of tissue microenvironments},
333 | volume = {55},
334 | year = {2022}
335 | }
336 |
337 | @article{Rendeiro2021,
338 | year = {2021},
339 | volume = {593},
340 | pages = {564--569},
341 | author = {Andr{\'{e}} F. Rendeiro and Hiranmayi Ravichandran and Yaron Bram and Vasuretha Chandar and Junbum Kim and Cem Meydan and Jiwoon Park and Jonathan Foox and Tyler Hether and Sarah Warren and Youngmi Kim and Jason Reeves and Steven Salvatore and Christopher E. Mason and Eric C. Swanson and Alain C. Borczuk and Olivier Elemento and Robert E. Schwartz},
342 | title = {The spatial landscape of lung pathology during {COVID}-19 progression},
343 | journal = {Nature}
344 | }
345 |
346 | @article{Mitamura2021,
347 | year = {2021},
348 | volume = {77},
349 | pages = {595--608},
350 | author = {Yasutaka Mitamura and Daniel Schulz and Saskia Oro and Nick Li and Isabel Kolm and Claudia Lang and Reihane Ziadlou and Ge Tan and Bernd Bodenmiller and Peter Steiger and Angelo Marzano and Nicolas de Prost and Olivier Caudin and Mitchell Levesque and Corinne Stoffel and Peter Schmid-Grendelmeier and Emanual Maverakis and Cezmi A. Akdis and Marie-Charlotte Br\"{u}ggen},
351 | title = {Cutaneous and systemic hyperinflammation drives maculopapular drug exanthema in severely ill {COVID}-19 patients},
352 | journal = {Allergy}
353 | }
354 |
355 | @article{Damond2019,
356 | year = {2019},
357 | volume = {29},
358 | pages = {755--768.e5},
359 | author = {Nicolas Damond and Stefanie Engler and Vito R.T. Zanotelli and Denis Schapiro and Clive H. Wasserfall and Irina Kusmartseva and Harry S. Nick and Fabrizio Thorel and Pedro L. Herrera and Mark A. Atkinson and Bernd Bodenmiller},
360 | title = {A Map of Human Type 1 Diabetes Progression by Imaging Mass Cytometry},
361 | journal = {Cell Metabolism}
362 | }
363 |
364 | @article{Ferrian2021,
365 | year = {2021},
366 | volume = {2},
367 | pages = {100419},
368 | author = {Selena Ferrian and Candace C. Liu and Erin F. McCaffrey and Rashmi Kumar and Theodore S. Nowicki and David W. Dawson and Alex Baranski and John A. Glaspy and Antoni Ribas and Sean C. Bendall and Michael Angelo},
369 | title = {Multiplexed imaging reveals an {IFN}-$\gamma$-driven inflammatory state in nivolumab-associated gastritis},
370 | journal = {Cell Reports Medicine}
371 | }
372 |
373 | @article{Meyer2024,
374 | title = {cytoviewer: an R/Bioconductor package for interactive visualization and exploration of highly multiplexed imaging data},
375 | volume = {25},
376 | number = {1},
377 | journal = {BMC Bioinformatics},
378 | author = {Lasse Meyer and Nils Eling and Bernd Bodenmiller},
379 | year = {2024}
380 | }
381 |
382 |
--------------------------------------------------------------------------------
/data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/data/.gitkeep
--------------------------------------------------------------------------------
/img/Gating_scheme.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/img/Gating_scheme.pdf
--------------------------------------------------------------------------------
/img/Gating_scheme.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/img/Gating_scheme.pptx
--------------------------------------------------------------------------------
/img/IMC_workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/img/IMC_workflow.png
--------------------------------------------------------------------------------
/index.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Analysis workflow for IMC data"
3 | author: "**Authors:** Nils Eling [1](#DQBM),[2](#IMHS),[*](#email), Vito Zanotelli [1](#DQBM),[2](#IMHS), Michelle Daniel [1](#DQBM),[2](#IMHS), Daniel Schulz [1](#DQBM),[2](#IMHS), Jonas Windhager [1](#DQBM),[2](#IMHS), Lasse Meyer [1](#DQBM),[2](#IMHS)"
4 | date: "**Compiled:** `r Sys.Date()`"
5 | site: bookdown::bookdown_site
6 | github-repo: "BodenmillerGroup/IMCDataAnalysis"
7 | documentclass: book
8 | bibliography: [book.bib, packages.bib]
9 | biblio-style: apalike
10 | link-citations: yes
11 | description: "This bookdown project highlights possible down-stream analyses performed on imaging mass cytometry data."
12 | ---
13 |
14 | # IMC Data Analysis Workflow {#preamble}
15 |
16 | This workflow highlights the use of common R/Bioconductor packages
17 | to analyze single-cell data obtained from segmented multi-channel images. We will not perform multi-channel image processing and segmentation in R
18 | but rather link to available approaches in Section \@ref(processing). While we
19 | use imaging mass cytometry (IMC) data as an example, the concepts presented here can be applied to images
20 | obtained by other highly-multiplexed imaging technologies (e.g. CODEX, MIBI,
21 | mIF, etc.).
22 |
23 | We will give an introduction to IMC in Section \@ref(intro) and highlight
24 | strategies to extract single-cell data from multi-channel images in Section
25 | \@ref(processing).
26 |
27 | Reproducible code written in R is available from Section \@ref(prerequisites)
28 | onwards and the workflow can be largely divided into the following parts:
29 |
30 | 1. Preprocessing (reading in the data, spillover correction)
31 | 2. Image- and cell-level quality control, low-dimensional visualization
32 | 3. Sample/batch effect correction
33 | 4. Cell phenotyping via clustering or classification
34 | 5. Single-cell and image visualization
35 | 6. Spatial analyses
36 |
37 | ## Disclaimer
38 |
39 | Multi-channel image and spatial, single-cell analysis is complex and we
40 | highlight an example workflow here. However, this workflow is not complete and
41 | does not cover all possible aspects of exploratory data analysis. Instead, we
42 | demonstrate this workflow as a solid basis that supports other aspects of data
43 | analysis. It offers interoperability with other packages for single-cell and
44 | spatial analysis and the user will need to become familiar with the general
45 | framework to efficiently analyse data obtained from multiplexed imaging
46 | technologies.
47 |
48 | ## Update freeze
49 |
50 | This workflow has been actively developed until December 2023. At that time
51 | we used the most recent (`v.0.16.0`) version of `steinbock` to process the
52 | example data. If you are having issues when using newer versions of `steinbock`
53 | please open an issue [here](https://github.com/BodenmillerGroup/IMCDataAnalysis/issues).
54 |
55 | ## Feedback and contributing
56 |
57 | We provide the workflow as an open-source resource. It does not mean that
58 | this workflow is tested on all possible datasets or biological questions and
59 | there exist multiple ways of analysing data. It is therefore recommended to
60 | check the results and question their biological interpretation.
61 |
62 | If you notice an issue or missing information, please report an issue
63 | [here](https://github.com/BodenmillerGroup/IMCDataAnalysis/issues). We also
64 | welcome contributions in form of pull requests or feature requests in form of
65 | issues. Have a look at the source code at:
66 |
67 | [https://github.com/BodenmillerGroup/IMCDataAnalysis](https://github.com/BodenmillerGroup/IMCDataAnalysis)
68 |
69 | ## Maintainer
70 |
71 | [Daniel Schulz](https://github.com/SchulzDan)
72 |
73 | ## Contributors
74 |
75 | [Nils Eling](https://github.com/nilseling)
76 | [Vito Zanotelli](https://github.com/votti)
77 | [Daniel Schulz](https://github.com/SchulzDan)
78 | [Jonas Windhager](https://github.com/jwindhager)
79 | [Michelle Daniel](https://github.com/michdaniel)
80 | [Lasse Meyer](https://github.com/lassedochreden)
81 |
82 | ## Citation
83 |
84 | The workflow has been published in
85 | [https://www.nature.com/articles/s41596-023-00881-0](https://www.nature.com/articles/s41596-023-00881-0)
86 | which you can cite as follows:
87 |
88 | ```
89 | Windhager, J., Zanotelli, V.R.T., Schulz, D. et al. An end-to-end workflow for multiplexed image processing and analysis.
90 | Nat Protoc (2023).
91 | ```
92 |
93 | ## Changelog
94 |
95 | ```{r echo=FALSE}
96 | htmltools::includeMarkdown('CHANGELOG.md')
97 | ```
98 |
99 | ---
100 |
101 | * nils.eling@uzh.ch
102 | 1: Department for Quantitative Biomedicine, University of Zurich
103 | 2: Institute for Molecular Health Sciences, ETH Zurich
104 |
--------------------------------------------------------------------------------
/packages.bib:
--------------------------------------------------------------------------------
1 | @Manual{R-base,
2 | title = {R: A Language and Environment for Statistical Computing},
3 | author = {{R Core Team}},
4 | organization = {R Foundation for Statistical Computing},
5 | address = {Vienna, Austria},
6 | year = {2021},
7 | url = {https://www.R-project.org/},
8 | }
9 |
10 | @Manual{R-bookdown,
11 | title = {bookdown: Authoring Books and Technical Documents with R Markdown},
12 | author = {Yihui Xie},
13 | year = {2021},
14 | note = {R package version 0.22},
15 | url = {https://CRAN.R-project.org/package=bookdown},
16 | }
17 |
18 | @Manual{R-knitr,
19 | title = {knitr: A General-Purpose Package for Dynamic Report Generation in R},
20 | author = {Yihui Xie},
21 | year = {2021},
22 | note = {R package version 1.33},
23 | url = {https://yihui.org/knitr/},
24 | }
25 |
26 | @Manual{R-rmarkdown,
27 | title = {rmarkdown: Dynamic Documents for R},
28 | author = {JJ Allaire and Yihui Xie and Jonathan McPherson and Javier Luraschi and Kevin Ushey and Aron Atkins and Hadley Wickham and Joe Cheng and Winston Chang and Richard Iannone},
29 | year = {2021},
30 | note = {R package version 2.7},
31 | url = {https://CRAN.R-project.org/package=rmarkdown},
32 | }
33 |
34 | @Book{bookdown2016,
35 | title = {bookdown: Authoring Books and Technical Documents with {R} Markdown},
36 | author = {Yihui Xie},
37 | publisher = {Chapman and Hall/CRC},
38 | address = {Boca Raton, Florida},
39 | year = {2016},
40 | note = {ISBN 978-1138700109},
41 | url = {https://bookdown.org/yihui/bookdown},
42 | }
43 |
44 | @Book{knitr2015,
45 | title = {Dynamic Documents with {R} and knitr},
46 | author = {Yihui Xie},
47 | publisher = {Chapman and Hall/CRC},
48 | address = {Boca Raton, Florida},
49 | year = {2015},
50 | edition = {2nd},
51 | note = {ISBN 978-1498716963},
52 | url = {https://yihui.org/knitr/},
53 | }
54 |
55 | @InCollection{knitr2014,
56 | booktitle = {Implementing Reproducible Computational Research},
57 | editor = {Victoria Stodden and Friedrich Leisch and Roger D. Peng},
58 | title = {knitr: A Comprehensive Tool for Reproducible Research in {R}},
59 | author = {Yihui Xie},
60 | publisher = {Chapman and Hall/CRC},
61 | year = {2014},
62 | note = {ISBN 978-1466561595},
63 | url = {http://www.crcpress.com/product/isbn/9781466561595},
64 | }
65 |
66 | @Book{rmarkdown2018,
67 | title = {R Markdown: The Definitive Guide},
68 | author = {Yihui Xie and J.J. Allaire and Garrett Grolemund},
69 | publisher = {Chapman and Hall/CRC},
70 | address = {Boca Raton, Florida},
71 | year = {2018},
72 | note = {ISBN 9781138359338},
73 | url = {https://bookdown.org/yihui/rmarkdown},
74 | }
75 |
76 | @Book{rmarkdown2020,
77 | title = {R Markdown Cookbook},
78 | author = {Yihui Xie and Christophe Dervieux and Emily Riederer},
79 | publisher = {Chapman and Hall/CRC},
80 | address = {Boca Raton, Florida},
81 | year = {2020},
82 | note = {ISBN 9780367563837},
83 | url = {https://bookdown.org/yihui/rmarkdown-cookbook},
84 | }
85 |
86 |
--------------------------------------------------------------------------------
/preamble.tex:
--------------------------------------------------------------------------------
1 | \usepackage{booktabs}
2 |
--------------------------------------------------------------------------------
/publication/.gitignore:
--------------------------------------------------------------------------------
1 | *.DS_Store
2 | .Rproj.user
3 | /data/*
4 | /outputs/*
5 |
--------------------------------------------------------------------------------
/publication/README.md:
--------------------------------------------------------------------------------
1 | ## An end-to-end workflow for multiplexed image processing and analysis
2 |
3 | This folder of the repository contains the code to reproduce the analysis presented in the following paper:
4 |
5 | ```
6 | Windhager, J., Zanotelli, V.R.T., Schulz, D. et al. An end-to-end workflow for multiplexed image processing and analysis.
7 | Nat Protoc (2023).
8 | ```
9 |
10 | It is accessible at [https://www.nature.com/articles/s41596-023-00881-0](https://www.nature.com/articles/s41596-023-00881-0)
11 |
12 | ### System requirements
13 |
14 | To run the workflow, a computer with a recent version of a Windows, Mac, or Linux operating system (OS) is required.
15 | With increasing dataset size, more memory is required and we recommend at least 8 GB RAM to analyse the provided dataset.
16 | Alternatively, a high performance computer (e.g. cluster) can be used, provided Docker can be installed (see below).
17 | For this manuscript, the workflow was run on MacOS Big Sur (11.7.4), 2.7 GHz Quad-Core Intel Core i7, 16 GB 2133 MHz LPDDR3.
18 |
19 | ### Reproducing the analysis
20 |
21 | **1. Obtain the code**
22 |
23 | To access the code you can clone the repository via
24 |
25 | ```
26 | git clone https://github.com/BodenmillerGroup/IMCDataAnalysis.git
27 | ```
28 |
29 | or you can click the `Code` > `Download ZIP` button.
30 |
31 | Navigate to the `IMCDataAnalysis/publication/` folder and open the
32 | `publication.Rproj` file in RStudio.
33 |
34 | **2. Obtain the example data**
35 |
36 | To obtain the example data, open the [protocol.Rmd](protocol.Rmd) file in RStudio
37 | and execute the first code chunk under `Example data`.
38 |
39 | **3. Perform image processing**
40 |
41 | Image processing is performed outside of R/RStudio. To process the example
42 | data stored in `publication/data/steinbock`, open a terminal and execute the
43 | following commands:
44 |
45 | ```
46 | # setup steinbock alias
47 | alias steinbock="docker run -v /path/to/data/steinbock:/data -u $(id -u):$(id -g) ghcr.io/bodenmillergroup/steinbock:0.16.0"
48 |
49 | # image pre-processing
50 | steinbock preprocess imc images --hpf 50
51 |
52 | # image segmentation
53 | steinbock segment deepcell --minmax
54 |
55 | # intensity measurement
56 | steinbock measure intensities
57 |
58 | # regionprops measurement
59 | steinbock measure regionprops
60 |
61 | # spatial cell graph construction
62 | steinbock measure neighbors --type expansion --dmax 4
63 | ```
64 |
65 | In the command above the `/path/to/data/steinbock` needs to be adapted and
66 | replaced by the anticipated working directory.
67 |
68 | To obtain more detailed installation instructions, please refer to the
69 | [steinbock documentation](https://bodenmillergroup.github.io/steinbock/latest/install-docker/).
70 |
71 | A shell script for automatic execution of the commands can be seen at [steinbock.sh](steinbock.sh).
72 |
73 | **4. Option A: Install R packages**
74 |
75 | The workflow highlights the use of a number of R packages.
76 | All packages can be installed as follows:
77 |
78 | ```
79 | if (!requireNamespace("BiocManager", quietly = TRUE))
80 | install.packages("BiocManager")
81 |
82 | BiocManager::install(c("pheatmap", "viridis",
83 | "tiff", "distill", "ggrepel", "patchwork",
84 | "mclust", "RColorBrewer", "uwot", "Rtsne", "caret",
85 | "randomForest", "ggridges", "gridGraphics", "scales",
86 | "CATALYST", "scuttle", "scater", "dittoSeq",
87 | "tidyverse", "batchelor", "bluster","scran", "cytomapper",
88 | "imcRtools"))
89 | ```
90 |
91 | To install the required software around 1-2 hours need to be taken into account.
92 | When the workflow was written, we used R v4.3.0 installed and Bioconductor
93 | release version 3.17.
94 |
95 | Please see [protocol.md](protocol.md#session-information) for the exact versions of the software
96 | packages.
97 |
98 | **4. Option B: Obtain docker container**
99 |
100 | We provide a Docker container that can be used to exactly reproduce the
101 | analysis performed in the protocol. To obtain the Docker container execute the
102 | following call in the terminal:
103 |
104 | ```
105 | docker pull ghcr.io/bodenmillergroup/imcdataanalysis:2023-05-01
106 | ```
107 |
108 | After obtaining the Docker container, start it by calling:
109 |
110 | ```
111 | docker run -v /path/to/IMCDataAnalysis/publication:/home/rstudio/publication \
112 | -e PASSWORD=bioc -p 8787:8787 \
113 | ghcr.io/bodenmillergroup/imcdataanalysis:2023-05-01
114 | ```
115 |
116 | Please make sure to adapt the `/path/to/IMCDataAnalysis/publication` to the correct working directory.
117 | An RStudio server session can be accessed via a browser at `localhost:8787` using `Username: rstudio` and` Password: bioc`.
118 |
119 | **5. Execute the workflow**
120 |
121 | Open RStudio, navigate to `publication` and open the `publication.Rproj` file.
122 | The `protocol.Rmd` file contains all relevant code to reproduce the analysis.
123 | When `knitting` the `protocol.Rmd` you will be asked to update the `markdown`
124 | package. Accept the pop up and the workflow will run.
125 |
126 | Applying the workflow to the provided dataset takes roughly 30 minutes and
127 | provides the raw data files, data generated by the `steinbock` toolkit
128 | and a `SpatialExperiment` object storing all analysis results.
129 |
130 | ### Software used for the entire protocol
131 |
132 | * **napari & napari-imc (IMC-specific):** The multi-dimensional image viewer napari (https://napari.org) together with the napari-imc plugin for loading imaging mass cytometry files (https://github.com/BodenmillerGroup/napari-imc) were used to visualize and inspect raw multiplexed imaging data. Python 3.9.12 (https://www.python.org), napari 0.4.16, and napari-imc 0.6.5 were installed into a fresh conda (https://conda.io) environment; see below for installation instructions.
133 | * **steinbock Docker container:** The multi-channel image processing toolkit steinbock (https://bodenmillergroup.github.io/steinbock) was used to pre-process multiplexed imaging data, perform image segmentation, and extract single-cell data. The steinbock Docker container v0.16.0 was pulled from the GitHub container registry using Docker Desktop 4.9.0 for Mac; see below for installation instructions.
134 | * **Ilastik/CellProfiler-based segmentation pipeline:** Multiplexed image processing using random forest-based pixel classification and watershed-based cell segmentation was performed using the Ilastik/CellProfiler-based segmentation pipeline v3.6 (https://bodenmillergroup.github.io/ImcSegmentationPipeline/); see below for installation instructions.
135 |
136 | In addition, in order to use the pipeline, the following software need to be installed:
137 | * **Ilastik:** The Ilastik software is used for pixel-classification prior to cell segmentation and can be installed from https://www.ilastik.org/download.html. The version used for this workflow is v1.4.0.
138 | * **CellProfiler:** The CellProfiler software is used to segment individual cells. The tool can be installed from https://cellprofiler.org/previous-releases on Windows (64-bit) and MacOS (10.14+). The version used in this workflow is v4.2.1.
139 | * **R setup:** Downstream analysis after image processing is conducted using the statistical programming language R, which can be installed from https://cran.r-project.org/ following the OS-specific instructions. The version used in this workflow is v4.3.0.
140 | * The RStudio software offers an easy-to-use GUI for data analysis in R. It can be installed from https://www.rstudio.com/products/rstudio/download/.
141 |
142 | ### Installation instructions
143 |
144 | * **napari & napari-imc:** Install the conda package manager according to the instructions at https://docs.conda.io/projects/conda/en/latest/user-guide/install/
145 | Create a new conda environment with Python 3.9:
146 | ```
147 | conda create -n napari-imc -y python=3.9
148 | ```
149 | Activate the conda environment and install napari & napari-imc:
150 | ```
151 | conda activate napari-imc
152 | pip install “napari[all]==0.4.16” napari-imc==0.6.5
153 | ```
154 | * **steinbock:** Instructions to install the dockerized steinbock toolkit can be found at https://bodenmillergroup.github.io/steinbock/v0.16.0/install-docker/. In particular, to run the steinbock container, Docker needs to be installed first (see online instructions). For this manuscript, we run steinbock using the following alias:
155 | ```
156 | alias steinbock="docker run -v /path/to/data/steinbock:/data -u $(id -u):$(id -g) ghcr.io/bodenmillergroup/steinbock:0.16.0"
157 | ```
158 | CRITICAL: In the command above the `/path/to/data/steinbock` needs to be adapted and replaced by the anticipated working directory.
159 |
160 | * **Ilastik/CellProfiler-based segmentation pipeline:** the pre-processing steps of the pipeline are performed in Python using a custom script. To setup the pre-processing script, the following steps need to be performed:
161 |
162 | Install conda from https://docs.conda.io/projects/conda/en/latest/user-guide/install/
163 |
164 | Clone the repository
165 | ```
166 | git clone --recursive https://github.com/BodenmillerGroup/ImcSegmentationPipeline.git
167 | ```
168 | Setup the imcsegpipe conda environment:
169 | ```
170 | cd ImcSegmentationPipeline
171 | conda env create -f environment.yml
172 | ```
173 |
174 | Configure CellProfiler to use the required plugins by opening the CellProfiler GUI, selecting Preferences and setting the CellProfiler plugins directory to `path/to/ImcSegmentationPipeline/resources/ImcPluginsCP/plugins` and restart CellProfiler.
175 |
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/batch-correction-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/batch-correction-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/cell-density-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/cell-density-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/cell-size-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/cell-size-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/celltype-cluster-UMAP-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/celltype-cluster-UMAP-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/celltype-cluster-heatmap-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/celltype-cluster-heatmap-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/celltype-cluster-heatmap-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/celltype-cluster-heatmap-2.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/cellular-neighbourhood-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/cellular-neighbourhood-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/cluster-sweep-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/cluster-sweep-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/compCytof-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/compCytof-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/compImage-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/compImage-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/compImage-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/compImage-2.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/compImage-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/compImage-3.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/compImage-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/compImage-4.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/marker-distributions-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/marker-distributions-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/plotSpotHeatmap-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/plotSpotHeatmap-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/segmentation-quality-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/segmentation-quality-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/spatial-community-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/spatial-community-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/spatial-context-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/spatial-context-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/umap-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/umap-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/unnamed-chunk-23-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/unnamed-chunk-23-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/unnamed-chunk-48-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/unnamed-chunk-48-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/unnamed-chunk-63-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/unnamed-chunk-63-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/unnamed-chunk-67-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/unnamed-chunk-67-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/unnamed-chunk-71-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/unnamed-chunk-71-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_github/unnamed-chunk-75-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_github/unnamed-chunk-75-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/batch-correction-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/batch-correction-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/cell-density-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/cell-density-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/cell-size-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/cell-size-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/celltype-cluster-UMAP-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/celltype-cluster-UMAP-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/celltype-cluster-heatmap-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/celltype-cluster-heatmap-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/celltype-cluster-heatmap-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/celltype-cluster-heatmap-2.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/cellular-neighbourhood-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/cellular-neighbourhood-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/cluster-sweep-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/cluster-sweep-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/compCytof-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/compCytof-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/compImage-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/compImage-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/compImage-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/compImage-2.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/compImage-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/compImage-3.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/compImage-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/compImage-4.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/marker-distributions-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/marker-distributions-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/plotSpotHeatmap-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/plotSpotHeatmap-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/segmentation-quality-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/segmentation-quality-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/spatial-community-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/spatial-community-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/spatial-context-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/spatial-context-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/umap-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/umap-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/unnamed-chunk-22-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/unnamed-chunk-22-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/unnamed-chunk-47-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/unnamed-chunk-47-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/unnamed-chunk-62-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/unnamed-chunk-62-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/unnamed-chunk-66-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/unnamed-chunk-66-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/unnamed-chunk-70-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/unnamed-chunk-70-1.png
--------------------------------------------------------------------------------
/publication/protocol_files/figure-markdown_strict/unnamed-chunk-74-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BodenmillerGroup/IMCDataAnalysis/8de28d5cbf597d0f93a9b3281c9371be6927b430/publication/protocol_files/figure-markdown_strict/unnamed-chunk-74-1.png
--------------------------------------------------------------------------------
/publication/publication.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 4
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
--------------------------------------------------------------------------------
/publication/steinbock.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # change directory
4 | BASEDIR=$(cd -- "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)
5 | cd ${BASEDIR}
6 |
7 | # setup steinbock alias
8 | shopt -s expand_aliases
9 | alias steinbock="docker run -v ${BASEDIR}/data/steinbock:/data -u $(id -u):$(id -g) ghcr.io/bodenmillergroup/steinbock:0.16.0"
10 |
11 | # image pre-processing
12 | { time steinbock preprocess imc images --hpf 50; } 2> steinbock_timing.txt
13 |
14 | # image segmentation
15 | { time steinbock segment deepcell --minmax; } 2>> steinbock_timing.txt
16 |
17 | # intensity measurement
18 | { time steinbock measure intensities; } 2>> steinbock_timing.txt
19 |
20 | # regionprops measurement
21 | { time steinbock measure regionprops; } 2>> steinbock_timing.txt
22 |
23 | # spatial cell graph construction
24 | { time steinbock measure neighbors --type expansion --dmax 4; } 2>> steinbock_timing.txt
--------------------------------------------------------------------------------
/publication/steinbock_timing.txt:
--------------------------------------------------------------------------------
1 | 2023-06-01 14:27:02,316 INFO steinbock - img/Patient4_005.tiff
2 | 2023-06-01 14:27:03,648 INFO steinbock - img/Patient4_006.tiff
3 | 2023-06-01 14:27:04,474 INFO steinbock - img/Patient4_007.tiff
4 | 2023-06-01 14:27:05,365 INFO steinbock - img/Patient4_008.tiff
5 | 2023-06-01 14:27:11,692 INFO steinbock - img/Patient3_001.tiff
6 | 2023-06-01 14:27:12,685 INFO steinbock - img/Patient3_002.tiff
7 | 2023-06-01 14:27:13,543 INFO steinbock - img/Patient3_003.tiff
8 | 2023-06-01 14:27:18,641 INFO steinbock - img/Patient2_001.tiff
9 | 2023-06-01 14:27:19,476 INFO steinbock - img/Patient2_002.tiff
10 | 2023-06-01 14:27:20,375 INFO steinbock - img/Patient2_003.tiff
11 | 2023-06-01 14:27:21,239 INFO steinbock - img/Patient2_004.tiff
12 | 2023-06-01 14:27:25,398 INFO steinbock - img/Patient1_001.tiff
13 | 2023-06-01 14:27:26,374 INFO steinbock - img/Patient1_002.tiff
14 | 2023-06-01 14:27:27,317 INFO steinbock - img/Patient1_003.tiff
15 | 2023-06-01 14:27:27,395 INFO steinbock - images.csv
16 |
17 | real 0m38.336s
18 | user 0m0.067s
19 | sys 0m0.085s
20 | 2023-06-01 14:27:55,957 INFO steinbock - masks/Patient1_001.tiff
21 | 2023-06-01 14:28:08,949 INFO steinbock - masks/Patient1_002.tiff
22 | 2023-06-01 14:28:22,273 INFO steinbock - masks/Patient1_003.tiff
23 | 2023-06-01 14:28:34,584 INFO steinbock - masks/Patient2_001.tiff
24 | 2023-06-01 14:28:47,618 INFO steinbock - masks/Patient2_002.tiff
25 | 2023-06-01 14:29:01,514 INFO steinbock - masks/Patient2_003.tiff
26 | 2023-06-01 14:29:14,739 INFO steinbock - masks/Patient2_004.tiff
27 | 2023-06-01 14:29:31,109 INFO steinbock - masks/Patient3_001.tiff
28 | 2023-06-01 14:29:44,417 INFO steinbock - masks/Patient3_002.tiff
29 | 2023-06-01 14:29:57,562 INFO steinbock - masks/Patient3_003.tiff
30 | 2023-06-01 14:30:09,662 INFO steinbock - masks/Patient4_005.tiff
31 | 2023-06-01 14:30:22,530 INFO steinbock - masks/Patient4_006.tiff
32 | 2023-06-01 14:30:35,418 INFO steinbock - masks/Patient4_007.tiff
33 | 2023-06-01 14:30:47,967 INFO steinbock - masks/Patient4_008.tiff
34 |
35 | real 3m22.248s
36 | user 0m0.074s
37 | sys 0m0.088s
38 | 2023-06-01 14:30:54,062 INFO steinbock - intensities/Patient1_001.csv
39 | 2023-06-01 14:30:54,982 INFO steinbock - intensities/Patient1_002.csv
40 | 2023-06-01 14:30:55,974 INFO steinbock - intensities/Patient1_003.csv
41 | 2023-06-01 14:30:56,723 INFO steinbock - intensities/Patient2_001.csv
42 | 2023-06-01 14:30:57,388 INFO steinbock - intensities/Patient2_002.csv
43 | 2023-06-01 14:30:58,087 INFO steinbock - intensities/Patient2_003.csv
44 | 2023-06-01 14:30:59,249 INFO steinbock - intensities/Patient2_004.csv
45 | 2023-06-01 14:31:00,113 INFO steinbock - intensities/Patient3_001.csv
46 | 2023-06-01 14:31:00,911 INFO steinbock - intensities/Patient3_002.csv
47 | 2023-06-01 14:31:01,865 INFO steinbock - intensities/Patient3_003.csv
48 | 2023-06-01 14:31:02,672 INFO steinbock - intensities/Patient4_005.csv
49 | 2023-06-01 14:31:03,779 INFO steinbock - intensities/Patient4_006.csv
50 | 2023-06-01 14:31:04,790 INFO steinbock - intensities/Patient4_007.csv
51 | 2023-06-01 14:31:05,567 INFO steinbock - intensities/Patient4_008.csv
52 |
53 | real 0m15.858s
54 | user 0m0.060s
55 | sys 0m0.068s
56 | 2023-06-01 14:31:10,023 INFO steinbock - regionprops/Patient1_001.csv
57 | 2023-06-01 14:31:10,974 INFO steinbock - regionprops/Patient1_002.csv
58 | 2023-06-01 14:31:12,017 INFO steinbock - regionprops/Patient1_003.csv
59 | 2023-06-01 14:31:12,855 INFO steinbock - regionprops/Patient2_001.csv
60 | 2023-06-01 14:31:13,665 INFO steinbock - regionprops/Patient2_002.csv
61 | 2023-06-01 14:31:14,463 INFO steinbock - regionprops/Patient2_003.csv
62 | 2023-06-01 14:31:15,587 INFO steinbock - regionprops/Patient2_004.csv
63 | 2023-06-01 14:31:16,657 INFO steinbock - regionprops/Patient3_001.csv
64 | 2023-06-01 14:31:17,741 INFO steinbock - regionprops/Patient3_002.csv
65 | 2023-06-01 14:31:18,780 INFO steinbock - regionprops/Patient3_003.csv
66 | 2023-06-01 14:31:19,516 INFO steinbock - regionprops/Patient4_005.csv
67 | 2023-06-01 14:31:20,761 INFO steinbock - regionprops/Patient4_006.csv
68 | 2023-06-01 14:31:21,589 INFO steinbock - regionprops/Patient4_007.csv
69 | 2023-06-01 14:31:22,375 INFO steinbock - regionprops/Patient4_008.csv
70 |
71 | real 0m16.791s
72 | user 0m0.067s
73 | sys 0m0.074s
74 | 2023-06-01 14:31:28,992 INFO steinbock - neighbors/Patient1_001.csv
75 | 2023-06-01 14:31:32,190 INFO steinbock - neighbors/Patient1_002.csv
76 | 2023-06-01 14:31:35,893 INFO steinbock - neighbors/Patient1_003.csv
77 | 2023-06-01 14:31:38,832 INFO steinbock - neighbors/Patient2_001.csv
78 | 2023-06-01 14:31:41,589 INFO steinbock - neighbors/Patient2_002.csv
79 | 2023-06-01 14:31:44,051 INFO steinbock - neighbors/Patient2_003.csv
80 | 2023-06-01 14:31:47,806 INFO steinbock - neighbors/Patient2_004.csv
81 | 2023-06-01 14:31:51,564 INFO steinbock - neighbors/Patient3_001.csv
82 | 2023-06-01 14:31:54,950 INFO steinbock - neighbors/Patient3_002.csv
83 | 2023-06-01 14:31:58,524 INFO steinbock - neighbors/Patient3_003.csv
84 | 2023-06-01 14:32:01,106 INFO steinbock - neighbors/Patient4_005.csv
85 | 2023-06-01 14:32:05,149 INFO steinbock - neighbors/Patient4_006.csv
86 | 2023-06-01 14:32:08,069 INFO steinbock - neighbors/Patient4_007.csv
87 | 2023-06-01 14:32:10,696 INFO steinbock - neighbors/Patient4_008.csv
88 |
89 | real 0m48.331s
90 | user 0m0.060s
91 | sys 0m0.032s
92 |
--------------------------------------------------------------------------------
/scripts/transfer_labels.R:
--------------------------------------------------------------------------------
1 | library(SingleCellExperiment)
2 | library(stringr)
3 | label_files <- list.files("data/gated_cells",
4 | full.names = TRUE, pattern = ".rds$")
5 |
6 | # Read in SPE objects
7 | spes <- lapply(label_files, readRDS)
8 | names(spes) <- list.files("data/gated_cells", pattern = ".rds$")
9 |
10 | # Read in current SPE object
11 | spe <- readRDS("data/spe.rds")
12 |
13 | new_spes <- lapply(spes, function(x){
14 | cur_spe <- spe[,spe$sample_id == unique(x$sample_id)]
15 | cur_gates <- metadata(x)[grepl("cytomapper_gate", names(metadata(x)))]
16 | cur_gates <- cur_gates[order(as.numeric(str_split(names(cur_gates), "_", simplify = TRUE)[,3]), decreasing = FALSE)]
17 |
18 | cur_meta <- metadata(cur_spe)
19 | metadata(cur_spe) <- list()
20 | metadata(cur_spe)$metadata <- cur_meta
21 |
22 | for (i in 1:length(cur_gates)) {
23 | gate <- cur_gates[[i]]
24 | for (j in 1:nrow(gate$gate)){
25 | cur_val <- assay(cur_spe, gate$exprs_values)[rownames(gate$gate)[j],]
26 | cur_spe <- cur_spe[,cur_val > gate$gate[j,1] & cur_val < gate$gate[j,2]]
27 | }
28 | metadata(cur_spe)[[names(cur_gates)[i]]] <- gate
29 | }
30 |
31 | cur_spe$cytomapper_CellLabel <- unique(x$cytomapper_CellLabel)
32 |
33 | metadata(cur_spe)$cytomapper_SessionInfo <- metadata(x)$cytomapper_SessionInfo
34 | metadata(cur_spe)$cytomapper_GatingDate <- metadata(x)$cytomapper_GatingDate
35 |
36 | return(cur_spe)
37 | })
38 |
39 | lapply(1:length(new_spes), function(x){
40 | saveRDS(new_spes[[x]], file = paste0("data/gated_cells/", names(spes)[x]))
41 | })
42 |
--------------------------------------------------------------------------------
/style.css:
--------------------------------------------------------------------------------
1 | p.caption {
2 | color: #777;
3 | margin-top: 10px;
4 | }
5 | p code {
6 | white-space: inherit;
7 | }
8 | pre {
9 | word-break: normal;
10 | word-wrap: normal;
11 | }
12 | pre code {
13 | white-space: inherit;
14 | }
15 |
16 | pre, code {white-space:pre !important; overflow-x:scroll !important}
17 |
--------------------------------------------------------------------------------