├── .Rbuildignore ├── .gitignore ├── DESCRIPTION ├── FigureScripts ├── BenignRefs_ForFigs2and3 │ ├── BenignRefs.Rmd │ ├── BenignRefs.md │ ├── Consensus_AllBenigns_phylo_Nodes.png │ ├── NodeSelection_BenignRefs_Dendrogram.png │ ├── PurestBenigns.png │ └── infercnv.21_denoised.png ├── Figure 1 │ ├── Step1_PreprocessingToSpotLevelHMMs │ │ ├── Fig1D_Step1_PreprocessingToSpotLevelHMMs.Rmd │ │ └── Fig1D_Step1_PreprocessingToSpotLevelHMMs.md │ └── Step2_FigureImages │ │ ├── Figure1_Part2_FigureImages.Rmd │ │ ├── Figure1_Part2_FigureImages.md │ │ ├── H2_5_Revised_PGA_SpatialVisualization_2022-02-28.png │ │ └── siCNV_SectionBarPlot_Figure1G.png ├── Figure 2 │ ├── Consensus_PurestBenigns.csv │ ├── Step1 │ │ ├── Figure2_AllCancer_siCNV_step1_unsupervised.Rmd │ │ ├── Figure2_AllCancer_siCNV_step1_unsupervised.md │ │ └── infercnv.21_denoised.png │ ├── Step2 │ │ ├── Consensus_AllCancer_forclustering_phylo.png │ │ ├── Fig2_Step2_ManualClustering.Rmd │ │ └── Fig2_Step2_ManualClustering.md │ ├── Step3 │ │ ├── Fig2_Step3_ClusteredPlot_and_HMM.Rmd │ │ ├── Fig2_Step3_ClusteredPlot_and_HMM.md │ │ └── infercnv.21_denoised.png │ └── siCNV_GeneOrderFile.tsv ├── Figure 3 │ ├── Consensus_H2_1_forclustering_phylo.png │ ├── Figure3.Rmd │ ├── Figure3.md │ ├── LoupeBrowser_Vis.gif │ ├── NodeSelectionDendrogram.png │ ├── NodeSelectionFromDenoised.png │ ├── infercnv.21_denoised_supervised.png │ └── infercnv.21_denoised_unsupervised.png ├── Figure 4 │ ├── Figure4a_LN │ │ ├── Figure4a_LNHeatmap.Rmd │ │ ├── Figure4a_LNHeatmap.md │ │ ├── infercnv.21_denoised.png │ │ └── siCNV_GeneOrderFile.tsv │ ├── Figure4c_SCC │ │ ├── GeneToENSMBL.csv │ │ ├── Step1 │ │ │ ├── Figure4c_Step1_P6_scRNAseq_Benigns.Rmd │ │ │ ├── Figure4c_Step1_P6_scRNAseq_Benigns.md │ │ │ ├── SCC_P6_benigns_for_clustering_phylo.png │ │ │ └── infercnv.21_denoised.png │ │ ├── Step2 │ │ │ ├── Figure4c_Step2_SCC_P6_siCNV_unsupervised.Rmd │ │ │ ├── Figure4c_Step2_SCC_P6_siCNV_unsupervised.md │ │ │ └── infercnv.21_denoised.png │ │ └── Step3 │ │ │ ├── Figure4c_Step3_SCC_P6_siCNV_supervised.Rmd │ │ │ ├── Figure4c_Step3_SCC_P6_siCNV_supervised.md │ │ │ ├── SCC_for_clustering_phylo.png │ │ │ └── infercnv.21_denoised.png │ └── Figure4e │ │ ├── Figure4e_pediatricmedulloblastoma.Rmd │ │ ├── Figure4e_pediatricmedulloblastoma.md │ │ └── infercnv.png ├── SCRIPTS.Rmd ├── SCRIPTS.md ├── Seurat │ ├── Seurat_Spatial_Import.Rmd │ ├── Seurat_Spatial_Import.md │ ├── filtered_feature_bc_matrix.h5 │ └── spatial │ │ ├── H2_1_tissue_hires_image.png │ │ ├── scalefactors_json.json │ │ └── tissue_positions_list.csv ├── SpotLevelCloneCalls │ ├── Figure2 │ │ ├── H1_2_Clones.csv │ │ ├── H1_4_Clones.csv │ │ ├── H1_5_Clones.csv │ │ ├── H2_1_Clones.csv │ │ ├── H2_2_Clones.csv │ │ └── H2_5_Clones.csv │ └── Figure3 │ │ └── Figure3_Clones.csv └── siCNV_GeneOrderFile.tsv ├── Images ├── KTH_Logotyp_PMS_2013.eps ├── primary-logo.png └── secondary-logo.png ├── NAMESPACE ├── R ├── ExtractSectionWise.R ├── FinalAnnotations.R ├── ImportCountData.R ├── ImportHistologicalAnnotations.R ├── ImportHistologicalOriginalSTSelections.R ├── ImportOriginalSTCountData.R ├── MergingCountAndAnnotationData.R ├── OriginalST_MergingCountAndAnnotationData.R ├── Output_PGA_Visualization_MatrixGreyNA.R ├── Plot_PGA_Visualization_Matrix().R └── SelectingSubTreeData.R ├── README.Rmd ├── README.md ├── UserGuide ├── Images │ ├── BC23209_C1_PGA_SpatialVisualization_2022-03-27.png │ ├── BreastCancer10x_forclustering_phylo.png │ ├── BreastCancer10x_forclustering_phylo_manual.png │ ├── LB_Annotated.png │ ├── LB_CloneImport.png │ ├── LB_DragSelection.png │ ├── LB_ExcludeUnlabeled.png │ ├── LB_ExportHistology.png │ ├── LB_ExportingCSV.png │ ├── LB_Histology.png │ ├── LB_ImportingCloneCSV.png │ ├── LB_PolygonalSelection.png │ ├── LB_UserguideClones_Visualized.png │ ├── LB_Userguide_12.png │ ├── NewCategoryImage.png │ ├── infercnv.21_denoised.png │ └── infercnv.21_denoised_manualselection.png ├── UserGuideDraft.Rmd ├── UserGuideDraft.md └── UserGuideFiles │ ├── 10xBreast_UserguideHistologyAnnotations.csv │ ├── 17_HMM_predHMMi6.hmm_mode-cells.pred_cnv_genes.dat │ ├── infercnv.21_denoised.observations_dendrogram.txt │ └── siCNV_GeneOrderFile.tsv ├── environment.yml ├── index.html └── man ├── ExtractSectionWise.Rd ├── FinalAnnotations.Rd ├── ImportCountData.Rd ├── ImportHistologicalAnnotations.Rd ├── ImportHistologicalOriginalSTSelections.Rd ├── ImportOriginalSTCountData.Rd ├── MergingCountAndAnnotationData.Rd ├── OriginalST_MergingCountAndAnnotationData.Rd ├── Output_PGA_Visualization_MatrixGreyNA.Rd ├── Plot_PGA_Visualization_Matrix.Rd └── SelectingSubTreeData.Rd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: SpatialInferCNV 2 | Type: Package 3 | Title: What the Package Does (Title Case) 4 | Version: 0.1.0 5 | Author: Who wrote it 6 | Maintainer: Andrew Erickson 7 | Description: More about what it does (maybe more than one line) 8 | Use four spaces when indenting paragraphs within the Description. 9 | License: What license is it under? 10 | Encoding: UTF-8 11 | LazyData: true 12 | Imports: 13 | tidyverse, 14 | infercnv, 15 | Seurat, 16 | hdf5r, 17 | phylogram, 18 | ape 19 | RoxygenNote: 7.1.2 20 | -------------------------------------------------------------------------------- /FigureScripts/BenignRefs_ForFigs2and3/BenignRefs.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: Selecting Benign References 3 | author: "Andrew Erickson, Nuffield Department of Surgical Sciences, Unviersity of Oxford" 4 | output: 5 | md_document: 6 | variant: markdown_github 7 | #output: html_document 8 | --- 9 | 10 | # Setup 11 | 12 | Initiating libraries. 13 | 14 | ```{r setup, eval = FALSE} 15 | library(SpatialInferCNV) 16 | library(devtools) 17 | library(ape) 18 | library(phylogram) 19 | library(tidyverse) 20 | ``` 21 | 22 | # Download data 23 | 24 | Download all the data from [Mendeley](https://data.mendeley.com/v1/datasets/svw96g68dv/draft?a=3f263217-2bd3-4a3c-8125-8c517c3a9e29), specifically all folders from: count_matrices/Patient 1/Visium_with_annotation/. 25 | 26 | ```{r, eval = FALSE} 27 | dir.create("Patient1_BenignRefs") 28 | setwd("Patient1_BenignRefs") 29 | ``` 30 | 31 | # Selecting Benign Histogical Spot annotations 32 | 33 | We then import the consensus pathology annotations and select benigns only, and created an annotation dataframe. 34 | 35 | ```{r, eval = FALSE} 36 | H1_2_Cleaned <- ImportHistologicalAnnotations("H1_2", "./Patient1_BenignRefs/Visium_with_annotation/H1_2/H1_2_Final_Consensus_Annotations.csv") 37 | H1_2_Benigns <- filter(H1_2_Cleaned, Histology == "Benign") 38 | rm(H1_2_Cleaned) 39 | 40 | H1_4_Cleaned <- ImportHistologicalAnnotations("H1_4", "./Patient1_BenignRefs/Visium_with_annotation/H1_4/H1_4_Final_Consensus_Annotations.csv") 41 | H1_4_Benigns <- filter(H1_4_Cleaned, Histology == "Benign") 42 | rm(H1_4_Cleaned) 43 | 44 | H1_5_Cleaned <- ImportHistologicalAnnotations("H1_5", "./Patient1_BenignRefs/Visium_with_annotation/H1_5/H1_5_Final_Consensus_Annotations.csv") 45 | H1_5_Benigns <- filter(H1_5_Cleaned, Histology == "Benign") 46 | rm(H1_5_Cleaned) 47 | 48 | H2_1_Cleaned <- ImportHistologicalAnnotations("H2_1", "./Patient1_BenignRefs/Visium_with_annotation/H2_1/H2_1_Final_Consensus_Annotations.csv") 49 | H2_1_Benigns <- filter(H2_1_Cleaned, Histology == "Benign") 50 | rm(H2_1_Cleaned) 51 | 52 | H2_2_Cleaned <- ImportHistologicalAnnotations("H2_2","./Patient1_BenignRefs/Visium_with_annotation/H2_2/H2_2_Final_Consensus_Annotations.csv") 53 | H2_2_Benigns <- filter(H2_2_Cleaned, Histology == "Benign") 54 | rm(H2_2_Cleaned) 55 | 56 | H2_5_Cleaned <- ImportHistologicalAnnotations("H2_5", "./Patient1_BenignRefs/Visium_with_annotation/H2_5/H2_5_Final_Consensus_Annotations.csv") 57 | H2_5_Benigns <- filter(H2_5_Cleaned, Histology == "Benign") 58 | rm(H2_5_Cleaned) 59 | 60 | V1_2_Cleaned <- ImportHistologicalAnnotations("V1_2", "./Patient1_BenignRefs/Visium_with_annotation/V1_2/V1_2_Final_Consensus_Annotations.csv") 61 | V1_2_Benigns <- filter(V1_2_Cleaned, Histology == "Benign" | Histology == "Benign*") 62 | rm(V1_2_Cleaned) 63 | 64 | AllBenigns <- rbind(H1_2_Benigns, H1_4_Benigns) 65 | AllBenigns <- rbind(AllBenigns, H2_1_Benigns) 66 | AllBenigns <- rbind(AllBenigns, H2_2_Benigns) 67 | AllBenigns <- rbind(AllBenigns, H2_5_Benigns) 68 | AllBenigns <- rbind(AllBenigns, V1_2_Benigns) 69 | 70 | rm(H1_2_Benigns, 71 | H1_4_Benigns, 72 | H1_5_Benigns, 73 | H2_1_Benigns, 74 | H2_2_Benigns, 75 | H2_5_Benigns, 76 | V1_2_Benigns) 77 | 78 | MergedAll <- AllBenigns 79 | names(MergedAll)[2] <- "Histology" 80 | rm(AllBenigns) 81 | ``` 82 | 83 | 84 | # Importing Count Data 85 | 86 | This code chunk imports the .h5 files a default processed output from [10x Genomics cell ranger pipeline documentation](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/output/molecule_info), and appends a section label to the barcode. 87 | 88 | We use the function ImportCountData(), which requires a section label, and a path to the corresponding .h5 file. Again these are provided from the Mendeley repository (as described above). 89 | 90 | ```{r, eval = FALSE} 91 | H2_1_ENSBMLID_Counts <- ImportCountData("H2_1", "./Patient1_BenignRefs/Visium_with_annotation/H2_1/filtered_feature_bc_matrix.h5") 92 | H2_2_ENSBMLID_Counts <- ImportCountData("H2_2", "./Patient1_BenignRefs/Visium_with_annotation/H2_2/filtered_feature_bc_matrix.h5") 93 | H1_2_ENSBMLID_Counts <- ImportCountData("H1_2", "./Patient1_BenignRefs/Visium_with_annotation/H1_2/filtered_feature_bc_matrix.h5") 94 | H2_5_ENSBMLID_Counts <- ImportCountData("H2_5", "./Patient1_BenignRefs/Visium_with_annotation/H2_5/filtered_feature_bc_matrix.h5") 95 | H1_4_ENSBMLID_Counts <- ImportCountData("H1_4", "./Patient1_BenignRefs/Visium_with_annotation/H1_4/filtered_feature_bc_matrix.h5") 96 | V1_2_ENSBMLID_Counts <- ImportCountData("V1_2", "./Patient1_BenignRefs/Visium_with_annotation/V1_2/filtered_feature_bc_matrix.h5") 97 | ``` 98 | 99 | # QC, and Merging Count and Annotation Data 100 | 101 | Next, we merge annotations with count data to get section wise count matrices of only benign spots. This also applies a QC threshold (only allowing spots with 500 UMIs or more to pass to the filtered dataframes). 102 | 103 | ```{r, eval = FALSE} 104 | H2_1_Joined_Counts <- MergingCountAndAnnotationData("H2_1",MergedAll, H2_1_ENSBMLID_Counts) 105 | H2_2_Joined_Counts <- MergingCountAndAnnotationData("H2_2",MergedAll, H2_2_ENSBMLID_Counts) 106 | H1_2_Joined_Counts <- MergingCountAndAnnotationData("H1_2",MergedAll, H1_2_ENSBMLID_Counts) 107 | H2_5_Joined_Counts <- MergingCountAndAnnotationData("H2_5",MergedAll, H2_5_ENSBMLID_Counts) 108 | H1_4_Joined_Counts <- MergingCountAndAnnotationData("H1_4",MergedAll, H1_4_ENSBMLID_Counts) 109 | V1_2_Joined_Counts <- MergingCountAndAnnotationData("V1_2",MergedAll, V1_2_ENSBMLID_Counts) 110 | 111 | rm(H2_1_ENSBMLID_Counts, H2_2_ENSBMLID_Counts, H1_2_ENSBMLID_Counts, H2_5_ENSBMLID_Counts, H1_4_ENSBMLID_Counts, V1_2_ENSBMLID_Counts) 112 | ``` 113 | 114 | # Merging all count data into one object 115 | 116 | We then merge all the sectionwise dataframes together, replace joined NA's with 0's (inferCNV requires this), and output final count and annotation .tsv files that are required for infercnv:run. 117 | 118 | ```{r, eval = FALSE} 119 | Counts_joined <- H2_1_Joined_Counts %>% full_join(H2_2_Joined_Counts, by = "Genes") 120 | Counts_joined <- Counts_joined %>% full_join(H1_2_Joined_Counts, by = "Genes") 121 | Counts_joined <- Counts_joined %>% full_join(H2_5_Joined_Counts, by = "Genes") 122 | Counts_joined <- Counts_joined %>% full_join(H1_4_Joined_Counts, by = "Genes") 123 | Counts_joined <- Counts_joined %>% full_join(V1_2_Joined_Counts, by = "Genes") 124 | 125 | rm(H2_1_Joined_Counts ,H2_2_Joined_Counts, H1_2_Joined_Counts, H2_5_Joined_Counts, H1_4_Joined_Counts, V1_2_Joined_Counts) 126 | 127 | Counts_joined <- Counts_joined %>% replace(., is.na(.), 0) 128 | Counts_joined <- Counts_joined %>% column_to_rownames(., var = "Genes") 129 | 130 | write.table(Counts_joined, "Organscale_Consensus_Benign_Counts.tsv", sep = "\t") 131 | 132 | MergedAll_Final <- FinalAnnotations(MergedAll, Counts_joined) 133 | 134 | write.table(MergedAll_Final, "Organscale_Consensus_Benign_Annotations.tsv", 135 | sep = "\t", 136 | quote = FALSE, 137 | col.names = FALSE, 138 | row.names = FALSE) 139 | ``` 140 | 141 | # Confirming that the files are formatted correctly to create an inferCNV object 142 | 143 | The siCNV_GeneOrderFile.tsv has been provided here: https://github.com/aerickso/SpatialInferCNV/tree/main/FigureScripts. 144 | 145 | ```{r, eval = FALSE} 146 | AllBenigns_Consensus_Test_infCNV <- infercnv::CreateInfercnvObject(raw_counts_matrix="./Patient1_BenignRefs/Organscale_Consensus_Benign_Counts.tsv", 147 | gene_order_file="./FigureScripts/siCNV_GeneOrderFile.tsv", 148 | annotations_file="./Patient1_BenignRefs/Organscale_Consensus_Benign_Annotations_04112020.tsv", 149 | delim="\t", 150 | ref_group_names=NULL) 151 | ``` 152 | 153 | # Running InferCNV (Unsupervised) 154 | 155 | ```{r, eval = FALSE} 156 | AllBenigns_Consensus_Test_infCNV = infercnv::run(AllBenigns_Consensus_Test_infCNV, 157 | cutoff=0.1, 158 | out_dir="./Patient1_BenignRefs/Outputs", 159 | num_threads = 20, 160 | cluster_by_groups=FALSE, 161 | denoise=TRUE, 162 | HMM=FALSE) 163 | ``` 164 | 165 | 166 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/BenignRefs_ForFigs2and3/infercnv.21_denoised.png) 167 | 168 | InferCNV will output many files. We are primarily interested in the final "infercnv.21_denoised.png" file, as well as the text file associated with the dendrogram associated with the hierarchical clustering on the left hand side of the image (infercnv.21_denoised.observations_dendrogram.txt). 169 | 170 | # Importing dendrogram 171 | 172 | Next, we want to import this dendrogram file fromo the above step: 173 | 174 | ```{r, eval = FALSE} 175 | Consensus_AllBenigns <- read.dendrogram(file = "./Patient1_BenignRefs/Outputs/infercnv.21_denoised.observations_dendrogram.txt") 176 | 177 | Consensus_AllBenigns_phylo <- as.phylo(Consensus_AllBenigns) 178 | ``` 179 | 180 | # Visualizing dendrogram node numbers 181 | 182 | ```{r, eval = FALSE} 183 | my.subtrees = subtrees(Consensus_AllBenigns_phylo) 184 | 185 | png("Consensus_AllBenigns_phylo_Nodes.png",width=10000,height=2500, res = 300) 186 | plot(Consensus_AllBenigns_phylo,show.tip.label = FALSE) 187 | nodelabels(text=1:Consensus_AllBenigns_phylo$Nnode,node=1:Consensus_AllBenigns_phylo$Nnode+Ntip(Consensus_AllBenigns_phylo)) 188 | dev.off() 189 | ``` 190 | 191 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/BenignRefs_ForFigs2and3/PurestBenigns.png) 192 | 193 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/BenignRefs_ForFigs2and3/Consensus_AllBenigns_phylo_Nodes.png) 194 | 195 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/BenignRefs_ForFigs2and3/NodeSelection_BenignRefs_Dendrogram.png) 196 | 197 | # Node selection (Manual Task outside of R in an image editor) 198 | 199 | Next, view the output .png file, which provides a (albeit cluttered) labeling of the dendrogram tree nodes. Manually select individual nodes that correspond with a distinct signal, in this case, nodes of visium spots with little-to-no signal. 200 | 201 | ```{r, eval = FALSE} 202 | #3039 + 2560 203 | ``` 204 | 205 | # Selecting clones in R 206 | 207 | Next, after identifying the numerical nodes that correspond to dendrogram branches that correspond with a given set of signals (aka, clones), we then manually select these nodes in R, apply a label, then join them all together and output as a .csv file for use as a "Histologically Benign, inferCNV null" reference set to compare other features of interest against. 208 | 209 | ```{r, eval = FALSE} 210 | Node3039 <- SelectingSubTreeData(my.subtrees, 3039) 211 | Node2560 <- SelectingSubTreeData(my.subtrees, 2560) 212 | 213 | Merged <- rbind(Node3039, Node2560) 214 | 215 | table(Merged$Node) 216 | 217 | Merged$Node <- "Purest Benigns" 218 | names(Merged)[2] <- "Histology" 219 | 220 | write.csv(Merged, "Consensus_PurestBenigns.csv", row.names = FALSE) 221 | ``` 222 | 223 | The final file is provided at [Mendeley](https://data.mendeley.com/v1/datasets/svw96g68dv/draft): Count_matrices/Patient 1/Consensus Pathology.csv. -------------------------------------------------------------------------------- /FigureScripts/BenignRefs_ForFigs2and3/BenignRefs.md: -------------------------------------------------------------------------------- 1 | # Setup 2 | 3 | Initiating libraries. 4 | 5 | ``` r 6 | library(SpatialInferCNV) 7 | library(devtools) 8 | library(ape) 9 | library(phylogram) 10 | library(tidyverse) 11 | ``` 12 | 13 | # Download data 14 | 15 | Download all the data from 16 | [Mendeley](https://data.mendeley.com/v1/datasets/svw96g68dv/draft?a=3f263217-2bd3-4a3c-8125-8c517c3a9e29), 17 | specifically all folders from: count_matrices/Patient 18 | 1/Visium_with_annotation/. 19 | 20 | ``` r 21 | dir.create("Patient1_BenignRefs") 22 | setwd("Patient1_BenignRefs") 23 | ``` 24 | 25 | # Selecting Benign Histogical Spot annotations 26 | 27 | We then import the consensus pathology annotations and select benigns 28 | only, and created an annotation dataframe. 29 | 30 | ``` r 31 | H1_2_Cleaned <- ImportHistologicalAnnotations("H1_2", "./Patient1_BenignRefs/Visium_with_annotation/H1_2/H1_2_Final_Consensus_Annotations.csv") 32 | H1_2_Benigns <- filter(H1_2_Cleaned, Histology == "Benign") 33 | rm(H1_2_Cleaned) 34 | 35 | H1_4_Cleaned <- ImportHistologicalAnnotations("H1_4", "./Patient1_BenignRefs/Visium_with_annotation/H1_4/H1_4_Final_Consensus_Annotations.csv") 36 | H1_4_Benigns <- filter(H1_4_Cleaned, Histology == "Benign") 37 | rm(H1_4_Cleaned) 38 | 39 | H1_5_Cleaned <- ImportHistologicalAnnotations("H1_5", "./Patient1_BenignRefs/Visium_with_annotation/H1_5/H1_5_Final_Consensus_Annotations.csv") 40 | H1_5_Benigns <- filter(H1_5_Cleaned, Histology == "Benign") 41 | rm(H1_5_Cleaned) 42 | 43 | H2_1_Cleaned <- ImportHistologicalAnnotations("H2_1", "./Patient1_BenignRefs/Visium_with_annotation/H2_1/H2_1_Final_Consensus_Annotations.csv") 44 | H2_1_Benigns <- filter(H2_1_Cleaned, Histology == "Benign") 45 | rm(H2_1_Cleaned) 46 | 47 | H2_2_Cleaned <- ImportHistologicalAnnotations("H2_2","./Patient1_BenignRefs/Visium_with_annotation/H2_2/H2_2_Final_Consensus_Annotations.csv") 48 | H2_2_Benigns <- filter(H2_2_Cleaned, Histology == "Benign") 49 | rm(H2_2_Cleaned) 50 | 51 | H2_5_Cleaned <- ImportHistologicalAnnotations("H2_5", "./Patient1_BenignRefs/Visium_with_annotation/H2_5/H2_5_Final_Consensus_Annotations.csv") 52 | H2_5_Benigns <- filter(H2_5_Cleaned, Histology == "Benign") 53 | rm(H2_5_Cleaned) 54 | 55 | V1_2_Cleaned <- ImportHistologicalAnnotations("V1_2", "./Patient1_BenignRefs/Visium_with_annotation/V1_2/V1_2_Final_Consensus_Annotations.csv") 56 | V1_2_Benigns <- filter(V1_2_Cleaned, Histology == "Benign" | Histology == "Benign*") 57 | rm(V1_2_Cleaned) 58 | 59 | AllBenigns <- rbind(H1_2_Benigns, H1_4_Benigns) 60 | AllBenigns <- rbind(AllBenigns, H2_1_Benigns) 61 | AllBenigns <- rbind(AllBenigns, H2_2_Benigns) 62 | AllBenigns <- rbind(AllBenigns, H2_5_Benigns) 63 | AllBenigns <- rbind(AllBenigns, V1_2_Benigns) 64 | 65 | rm(H1_2_Benigns, 66 | H1_4_Benigns, 67 | H1_5_Benigns, 68 | H2_1_Benigns, 69 | H2_2_Benigns, 70 | H2_5_Benigns, 71 | V1_2_Benigns) 72 | 73 | MergedAll <- AllBenigns 74 | names(MergedAll)[2] <- "Histology" 75 | rm(AllBenigns) 76 | ``` 77 | 78 | # Importing Count Data 79 | 80 | This code chunk imports the .h5 files a default processed output from 81 | [10x Genomics cell ranger pipeline 82 | documentation](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/output/molecule_info), 83 | and appends a section label to the barcode. 84 | 85 | We use the function ImportCountData(), which requires a section label, 86 | and a path to the corresponding .h5 file. Again these are provided from 87 | the Mendeley repository (as described above). 88 | 89 | ``` r 90 | H2_1_ENSBMLID_Counts <- ImportCountData("H2_1", "./Patient1_BenignRefs/Visium_with_annotation/H2_1/filtered_feature_bc_matrix.h5") 91 | H2_2_ENSBMLID_Counts <- ImportCountData("H2_2", "./Patient1_BenignRefs/Visium_with_annotation/H2_2/filtered_feature_bc_matrix.h5") 92 | H1_2_ENSBMLID_Counts <- ImportCountData("H1_2", "./Patient1_BenignRefs/Visium_with_annotation/H1_2/filtered_feature_bc_matrix.h5") 93 | H2_5_ENSBMLID_Counts <- ImportCountData("H2_5", "./Patient1_BenignRefs/Visium_with_annotation/H2_5/filtered_feature_bc_matrix.h5") 94 | H1_4_ENSBMLID_Counts <- ImportCountData("H1_4", "./Patient1_BenignRefs/Visium_with_annotation/H1_4/filtered_feature_bc_matrix.h5") 95 | V1_2_ENSBMLID_Counts <- ImportCountData("V1_2", "./Patient1_BenignRefs/Visium_with_annotation/V1_2/filtered_feature_bc_matrix.h5") 96 | ``` 97 | 98 | # QC, and Merging Count and Annotation Data 99 | 100 | Next, we merge annotations with count data to get section wise count 101 | matrices of only benign spots. This also applies a QC threshold (only 102 | allowing spots with 500 UMIs or more to pass to the filtered 103 | dataframes). 104 | 105 | ``` r 106 | H2_1_Joined_Counts <- MergingCountAndAnnotationData("H2_1",MergedAll, H2_1_ENSBMLID_Counts) 107 | H2_2_Joined_Counts <- MergingCountAndAnnotationData("H2_2",MergedAll, H2_2_ENSBMLID_Counts) 108 | H1_2_Joined_Counts <- MergingCountAndAnnotationData("H1_2",MergedAll, H1_2_ENSBMLID_Counts) 109 | H2_5_Joined_Counts <- MergingCountAndAnnotationData("H2_5",MergedAll, H2_5_ENSBMLID_Counts) 110 | H1_4_Joined_Counts <- MergingCountAndAnnotationData("H1_4",MergedAll, H1_4_ENSBMLID_Counts) 111 | V1_2_Joined_Counts <- MergingCountAndAnnotationData("V1_2",MergedAll, V1_2_ENSBMLID_Counts) 112 | 113 | rm(H2_1_ENSBMLID_Counts, H2_2_ENSBMLID_Counts, H1_2_ENSBMLID_Counts, H2_5_ENSBMLID_Counts, H1_4_ENSBMLID_Counts, V1_2_ENSBMLID_Counts) 114 | ``` 115 | 116 | # Merging all count data into one object 117 | 118 | We then merge all the sectionwise dataframes together, replace joined 119 | NA’s with 0’s (inferCNV requires this), and output final count and 120 | annotation .tsv files that are required for infercnv:run. 121 | 122 | ``` r 123 | Counts_joined <- H2_1_Joined_Counts %>% full_join(H2_2_Joined_Counts, by = "Genes") 124 | Counts_joined <- Counts_joined %>% full_join(H1_2_Joined_Counts, by = "Genes") 125 | Counts_joined <- Counts_joined %>% full_join(H2_5_Joined_Counts, by = "Genes") 126 | Counts_joined <- Counts_joined %>% full_join(H1_4_Joined_Counts, by = "Genes") 127 | Counts_joined <- Counts_joined %>% full_join(V1_2_Joined_Counts, by = "Genes") 128 | 129 | rm(H2_1_Joined_Counts ,H2_2_Joined_Counts, H1_2_Joined_Counts, H2_5_Joined_Counts, H1_4_Joined_Counts, V1_2_Joined_Counts) 130 | 131 | Counts_joined <- Counts_joined %>% replace(., is.na(.), 0) 132 | Counts_joined <- Counts_joined %>% column_to_rownames(., var = "Genes") 133 | 134 | write.table(Counts_joined, "Organscale_Consensus_Benign_Counts.tsv", sep = "\t") 135 | 136 | MergedAll_Final <- FinalAnnotations(MergedAll, Counts_joined) 137 | 138 | write.table(MergedAll_Final, "Organscale_Consensus_Benign_Annotations.tsv", 139 | sep = "\t", 140 | quote = FALSE, 141 | col.names = FALSE, 142 | row.names = FALSE) 143 | ``` 144 | 145 | # Confirming that the files are formatted correctly to create an inferCNV object 146 | 147 | The siCNV_GeneOrderFile.tsv has been provided here: 148 | . 149 | 150 | ``` r 151 | AllBenigns_Consensus_Test_infCNV <- infercnv::CreateInfercnvObject(raw_counts_matrix="./Patient1_BenignRefs/Organscale_Consensus_Benign_Counts.tsv", 152 | gene_order_file="./FigureScripts/siCNV_GeneOrderFile.tsv", 153 | annotations_file="./Patient1_BenignRefs/Organscale_Consensus_Benign_Annotations_04112020.tsv", 154 | delim="\t", 155 | ref_group_names=NULL) 156 | ``` 157 | 158 | # Running InferCNV (Unsupervised) 159 | 160 | ``` r 161 | AllBenigns_Consensus_Test_infCNV = infercnv::run(AllBenigns_Consensus_Test_infCNV, 162 | cutoff=0.1, 163 | out_dir="./Patient1_BenignRefs/Outputs", 164 | num_threads = 20, 165 | cluster_by_groups=FALSE, 166 | denoise=TRUE, 167 | HMM=FALSE) 168 | ``` 169 | 170 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/BenignRefs_ForFigs2and3/infercnv.21_denoised.png) 171 | 172 | InferCNV will output many files. We are primarily interested in the 173 | final “infercnv.21_denoised.png” file, as well as the text file 174 | associated with the dendrogram associated with the hierarchical 175 | clustering on the left hand side of the image 176 | (infercnv.21_denoised.observations_dendrogram.txt). 177 | 178 | # Importing dendrogram 179 | 180 | Next, we want to import this dendrogram file fromo the above step: 181 | 182 | ``` r 183 | Consensus_AllBenigns <- read.dendrogram(file = "./Patient1_BenignRefs/Outputs/infercnv.21_denoised.observations_dendrogram.txt") 184 | 185 | Consensus_AllBenigns_phylo <- as.phylo(Consensus_AllBenigns) 186 | ``` 187 | 188 | # Visualizing dendrogram node numbers 189 | 190 | ``` r 191 | my.subtrees = subtrees(Consensus_AllBenigns_phylo) 192 | 193 | png("Consensus_AllBenigns_phylo_Nodes.png",width=10000,height=2500, res = 300) 194 | plot(Consensus_AllBenigns_phylo,show.tip.label = FALSE) 195 | nodelabels(text=1:Consensus_AllBenigns_phylo$Nnode,node=1:Consensus_AllBenigns_phylo$Nnode+Ntip(Consensus_AllBenigns_phylo)) 196 | dev.off() 197 | ``` 198 | 199 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/BenignRefs_ForFigs2and3/PurestBenigns.png) 200 | 201 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/BenignRefs_ForFigs2and3/Consensus_AllBenigns_phylo_Nodes.png) 202 | 203 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/BenignRefs_ForFigs2and3/NodeSelection_BenignRefs_Dendrogram.png) 204 | 205 | # Node selection (Manual Task outside of R in an image editor) 206 | 207 | Next, view the output .png file, which provides a (albeit cluttered) 208 | labeling of the dendrogram tree nodes. Manually select individual nodes 209 | that correspond with a distinct signal, in this case, nodes of visium 210 | spots with little-to-no signal. 211 | 212 | ``` r 213 | #3039 + 2560 214 | ``` 215 | 216 | # Selecting clones in R 217 | 218 | Next, after identifying the numerical nodes that correspond to 219 | dendrogram branches that correspond with a given set of signals (aka, 220 | clones), we then manually select these nodes in R, apply a label, then 221 | join them all together and output as a .csv file for use as a 222 | “Histologically Benign, inferCNV null” reference set to compare other 223 | features of interest against. 224 | 225 | ``` r 226 | Node3039 <- SelectingSubTreeData(my.subtrees, 3039) 227 | Node2560 <- SelectingSubTreeData(my.subtrees, 2560) 228 | 229 | Merged <- rbind(Node3039, Node2560) 230 | 231 | table(Merged$Node) 232 | 233 | Merged$Node <- "Purest Benigns" 234 | names(Merged)[2] <- "Histology" 235 | 236 | write.csv(Merged, "Consensus_PurestBenigns.csv", row.names = FALSE) 237 | ``` 238 | 239 | The final file is provided at 240 | [Mendeley](https://data.mendeley.com/v1/datasets/svw96g68dv/draft): 241 | Count_matrices/Patient 1/Consensus Pathology.csv. 242 | -------------------------------------------------------------------------------- /FigureScripts/BenignRefs_ForFigs2and3/Consensus_AllBenigns_phylo_Nodes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/BenignRefs_ForFigs2and3/Consensus_AllBenigns_phylo_Nodes.png -------------------------------------------------------------------------------- /FigureScripts/BenignRefs_ForFigs2and3/NodeSelection_BenignRefs_Dendrogram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/BenignRefs_ForFigs2and3/NodeSelection_BenignRefs_Dendrogram.png -------------------------------------------------------------------------------- /FigureScripts/BenignRefs_ForFigs2and3/PurestBenigns.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/BenignRefs_ForFigs2and3/PurestBenigns.png -------------------------------------------------------------------------------- /FigureScripts/BenignRefs_ForFigs2and3/infercnv.21_denoised.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/BenignRefs_ForFigs2and3/infercnv.21_denoised.png -------------------------------------------------------------------------------- /FigureScripts/Figure 1/Step2_FigureImages/H2_5_Revised_PGA_SpatialVisualization_2022-02-28.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 1/Step2_FigureImages/H2_5_Revised_PGA_SpatialVisualization_2022-02-28.png -------------------------------------------------------------------------------- /FigureScripts/Figure 1/Step2_FigureImages/siCNV_SectionBarPlot_Figure1G.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 1/Step2_FigureImages/siCNV_SectionBarPlot_Figure1G.png -------------------------------------------------------------------------------- /FigureScripts/Figure 2/Step1/Figure2_AllCancer_siCNV_step1_unsupervised.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Figure2_Step1_unsupervised_allcancer" 3 | author: "Andrew Erickson" 4 | output: md_document 5 | --- 6 | # Setup 7 | 8 | ```{r setup, messages=FALSE} 9 | library(tidyverse) 10 | library(SpatialInferCNV) 11 | ``` 12 | 13 | # Creating a working directory 14 | 15 | We start by creating an empty working directory so that all downloaded files are organized in one place. Download the files [from Mendeley](https://data.mendeley.com/v1/datasets/svw96g68dv/draft?a=3f263217-2bd3-4a3c-8125-8c517c3a9e29): Count_matrices/Patient 1/Visium_with_annotation. 16 | 17 | 18 | ```{r, eval = FALSE} 19 | dir.create("Figure2_output") 20 | setwd("Figure2_output") 21 | ``` 22 | 23 | # Consensus Purest Benigns 24 | 25 | Importing Consensus_PurestBenigns.csv. Creating this file is documented [in this script](https://github.com/aerickso/SpatialInferCNV/tree/main/FigureScripts/BenignRefs_ForFigs2and3), but is provided [Via Mendeley](https://data.mendeley.com/v1/datasets/svw96g68dv/draft): Count_matrices/Patient 1/Consensus_PurestBenigns.csv. 26 | 27 | ```{r, eval = FALSE} 28 | PurestBenigns_All <- read.csv("./Figure2_output/Patient 1/Consensus_PurestBenigns.csv") 29 | ``` 30 | 31 | # Selecting Patient 1 All Cancer Annotations 32 | 33 | Next, we select all cancer annotations from all sections, and create an annotation file for all of the cancer bearing spots from patient 1. We bind this with the purest benigns, to create a list of all barcodes, reference set (benigns) and observation set (cancer spots) for analysis. 34 | 35 | ```{r, eval = FALSE} 36 | H1_2_Annotations <- ImportHistologicalAnnotations("H1_2", "./Figure2_output/Patient 1/Visium_with_annotation/H1_2/H1_2_Final_Consensus_Annotations.csv") 37 | H1_2_CancerSpots <- filter(H1_2_Annotations, Histology == "GG1") 38 | 39 | H1_4_Annotations <- ImportHistologicalAnnotations("H1_4", "./Figure2_output/Patient 1/Visium_with_annotation/H1_4/H1_4_Final_Consensus_Annotations.csv") 40 | H1_4_CancerSpots <- filter(H1_4_Annotations, Histology == "GG2" | Histology == "GG4 Cribriform") 41 | 42 | H1_5_Annotations <- ImportHistologicalAnnotations("H1_5", "./Figure2_output/Patient 1/Visium_with_annotation/H1_5/H1_5_Final_Consensus_Annotations.csv") 43 | H1_5_CancerSpots <- filter(H1_5_Annotations, Histology == "GG4 Cribriform") 44 | 45 | H2_1_Annotations <- ImportHistologicalAnnotations("H2_1", "./Figure2_output/Patient 1/Visium_with_annotation/H2_1/H2_1_Final_Consensus_Annotations.csv") 46 | H2_1_CancerSpots <- filter(H2_1_Annotations, Histology == "GG2" | Histology == "GG4") 47 | 48 | H2_2_Annotations <- ImportHistologicalAnnotations("H2_2", "./Figure2_output/Patient 1/Visium_with_annotation/H2_2/H2_2_Final_Consensus_Annotations.csv") 49 | H2_2_CancerSpots <- filter(H2_2_Annotations, Histology == "GG2") 50 | 51 | H2_5_Annotations <- ImportHistologicalAnnotations("H2_5", "./Figure2_output/Patient 1/Visium_with_annotation/H2_5/H2_5_Final_Consensus_Annotations.csv") 52 | H2_5_CancerSpots <- filter(H2_5_Annotations, Histology == "GG4 Cribriform" | Histology == "Transition_State") 53 | 54 | rm(H1_2_Annotations, 55 | H1_4_Annotations, 56 | H1_5_Annotations, 57 | H2_1_Annotations, 58 | H2_2_Annotations, 59 | H2_5_Annotations) 60 | 61 | AllCancers <- rbind(H1_2_CancerSpots, H1_4_CancerSpots) 62 | AllCancers <- rbind(AllCancers, H1_5_CancerSpots) 63 | AllCancers <- rbind(AllCancers, H2_1_CancerSpots) 64 | AllCancers <- rbind(AllCancers, H2_2_CancerSpots) 65 | AllCancers <- rbind(AllCancers, H2_5_CancerSpots) 66 | 67 | names(AllCancers)[2] <- "Histology" 68 | 69 | rm(H1_2_CancerSpots, 70 | H1_4_CancerSpots, 71 | H1_5_CancerSpots, 72 | H2_1_CancerSpots, 73 | H2_2_CancerSpots, 74 | H2_5_CancerSpots) 75 | 76 | MergedAll <- rbind(PurestBenigns_All, AllCancers) 77 | 78 | rm(PurestBenigns_All) 79 | rm(AllCancers) 80 | ``` 81 | 82 | # Merging Cancer and Benign annotations with the ENSMBLIDs 83 | 84 | Next, we select create count dataframes that include only spots to be included in the analysis (defined above), and that pass a QC threshold of >500 UMIs per spot. 85 | 86 | ```{r, eval = FALSE} 87 | H2_1_ENSBMLID_Counts <- ImportCountData("H2_1", "./Figure2_output/Patient 1/Visium_with_annotation/H2_1/filtered_feature_bc_matrix.h5") 88 | H2_1_Joined_Counts <- MergingCountAndAnnotationData("H2_1",MergedAll, H2_1_ENSBMLID_Counts) 89 | rm(H2_1_ENSBMLID_Counts) 90 | Counts_joined <- H2_1_Joined_Counts 91 | rm(H2_1_Joined_Counts) 92 | 93 | H1_5_ENSBMLID_Counts <- ImportCountData("H1_5", "./Figure2_output/Patient 1/Visium_with_annotation/H1_5/filtered_feature_bc_matrix.h5") 94 | H1_5_Joined_Counts <- MergingCountAndAnnotationData("H1_5",MergedAll, H1_5_ENSBMLID_Counts) 95 | rm(H1_5_ENSBMLID_Counts) 96 | Counts_joined <- Counts_joined %>% full_join(H1_5_Joined_Counts, by = "Genes") 97 | rm(H1_5_Joined_Counts) 98 | 99 | H2_2_ENSBMLID_Counts <- ImportCountData("H2_2", "./Figure2_output/Patient 1/Visium_with_annotation/H2_2/filtered_feature_bc_matrix.h5") 100 | H2_2_Joined_Counts <- MergingCountAndAnnotationData("H2_2",MergedAll, H2_2_ENSBMLID_Counts) 101 | rm(H2_2_ENSBMLID_Counts) 102 | Counts_joined <- Counts_joined %>% full_join(H2_2_Joined_Counts, by = "Genes") 103 | rm(H2_2_Joined_Counts) 104 | 105 | H1_2_ENSBMLID_Counts <- ImportCountData("H1_2", "./Figure2_output/Patient 1/Visium_with_annotation/H1_2/filtered_feature_bc_matrix.h5") 106 | H1_2_Joined_Counts <- MergingCountAndAnnotationData("H1_2",MergedAll, H1_2_ENSBMLID_Counts) 107 | rm(H1_2_ENSBMLID_Counts) 108 | Counts_joined <- Counts_joined %>% full_join(H1_2_Joined_Counts, by = "Genes") 109 | rm(H1_2_Joined_Counts) 110 | 111 | H2_5_ENSBMLID_Counts <- ImportCountData("H2_5", "./Figure2_output/Patient 1/Visium_with_annotation/H2_5/filtered_feature_bc_matrix.h5") 112 | H2_5_Joined_Counts <- MergingCountAndAnnotationData("H2_5",MergedAll, H2_5_ENSBMLID_Counts) 113 | rm(H2_5_ENSBMLID_Counts) 114 | Counts_joined <- Counts_joined %>% full_join(H2_5_Joined_Counts, by = "Genes") 115 | rm(H2_5_Joined_Counts) 116 | 117 | H1_4_ENSBMLID_Counts <- ImportCountData("H1_4", "./Figure2_output/Patient 1/Visium_with_annotation/H1_4/filtered_feature_bc_matrix.h5") 118 | H1_4_Joined_Counts <- MergingCountAndAnnotationData("H1_4",MergedAll, H1_4_ENSBMLID_Counts) 119 | rm(H1_4_ENSBMLID_Counts) 120 | Counts_joined <- Counts_joined %>% full_join(H1_4_Joined_Counts, by = "Genes") 121 | rm(H1_4_Joined_Counts) 122 | 123 | V1_2_ENSBMLID_Counts <- ImportCountData("V1_2", "./Figure2_output/Patient 1/Visium_with_annotation/V1_2/filtered_feature_bc_matrix.h5") 124 | V1_2_Joined_Counts <- MergingCountAndAnnotationData("V1_2",MergedAll, V1_2_ENSBMLID_Counts) 125 | rm(V1_2_ENSBMLID_Counts) 126 | Counts_joined <- Counts_joined %>% full_join(V1_2_Joined_Counts, by = "Genes") 127 | rm(V1_2_Joined_Counts) 128 | 129 | ``` 130 | 131 | # Joining all Counts 132 | 133 | Next, we replace NAs from the joined count dataframe with 0's (required for inferCNV), and output the count and annotation .tsv files required for infercnv:run. 134 | 135 | ```{r, eval = FALSE} 136 | Counts_joined <- Counts_joined %>% replace(., is.na(.), 0) 137 | Counts_joined <- Counts_joined %>% column_to_rownames(., var = "Genes") 138 | 139 | write.table(Counts_joined, "Organscale_Unsupervised_Consensus_AllCancer_Counts.tsv", sep = "\t") 140 | 141 | MergedAll_Final <- FinalAnnotations(MergedAll, Counts_joined) 142 | 143 | write.table(MergedAll_Final, "Organscale_Unsupervised_Consensus_AllCancer_Annotations.tsv", 144 | sep = "\t", 145 | quote = FALSE, 146 | col.names = FALSE, 147 | row.names = FALSE) 148 | ``` 149 | 150 | # Creating the inferCNV object (prior to run) 151 | 152 | We then create the infercnv object and confirm that the above were run correctly. 153 | 154 | ```{r, eval = FALSE} 155 | AllCancer_Unsupervised <- infercnv::CreateInfercnvObject(raw_counts_matrix="./Organscale_Unsupervised_Consensus_AllCancer_Counts.tsv", 156 | gene_order_file="./siCNV_GeneOrderFile.tsv", 157 | annotations_file="./Organscale_Unsupervised_Consensus_AllCancer_Annotations.tsv", 158 | delim="\t", 159 | ref_group_names="Purest Benigns", 160 | chr_exclude = c("chrM")) 161 | 162 | ``` 163 | 164 | # Unsupervised Run - (Typically ran on cluster) 165 | 166 | We then run the analysis (typically ran on a high performance cluster). 167 | 168 | ```{r, eval = FALSE} 169 | AllCancer_Unsupervised = infercnv::run(AllCancer_Unsupervised, 170 | cutoff=0.1, 171 | out_dir="./Figure2_output/Figure2_Step1/Outputs", 172 | cluster_by_groups=FALSE, 173 | num_threads = 20, 174 | denoise=TRUE, 175 | HMM=FALSE) 176 | ``` 177 | 178 | The output infercnv.observations_dendrogram.txt and infercnv.21_denoised.png are used for the next step. 179 | 180 | ![infercnv.21_denoised.png output](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%202/Step1/infercnv.21_denoised.png). 181 | 182 | -------------------------------------------------------------------------------- /FigureScripts/Figure 2/Step1/Figure2_AllCancer_siCNV_step1_unsupervised.md: -------------------------------------------------------------------------------- 1 | # Setup 2 | 3 | library(tidyverse) 4 | 5 | ## -- Attaching packages --------------------------------------- tidyverse 1.3.1 -- 6 | 7 | ## v ggplot2 3.3.5 v purrr 0.3.4 8 | ## v tibble 3.1.1 v dplyr 1.0.6 9 | ## v tidyr 1.1.3 v stringr 1.4.0 10 | ## v readr 2.0.1 v forcats 0.5.1 11 | 12 | ## -- Conflicts ------------------------------------------ tidyverse_conflicts() -- 13 | ## x dplyr::filter() masks stats::filter() 14 | ## x dplyr::lag() masks stats::lag() 15 | 16 | library(SpatialInferCNV) 17 | 18 | ## Registered S3 method overwritten by 'spatstat.geom': 19 | ## method from 20 | ## print.boxx cli 21 | 22 | ## Warning: replacing previous import 'phylogram::as.phylo' by 'ape::as.phylo' when 23 | ## loading 'SpatialInferCNV' 24 | 25 | # Creating a working directory 26 | 27 | We start by creating an empty working directory so that all downloaded 28 | files are organized in one place. Download the files [from 29 | Mendeley](https://data.mendeley.com/v1/datasets/svw96g68dv/draft): 30 | Count\_matrices/Patient 1/Visium\_with\_annotation. 31 | 32 | dir.create("Figure2_output") 33 | setwd("Figure2_output") 34 | 35 | # Consensus Purest Benigns 36 | 37 | Importing Consensus\_PurestBenigns.csv. Creating this file is documented 38 | [in this 39 | script](https://github.com/aerickso/SpatialInferCNV/tree/main/FigureScripts/BenignRefs_ForFigs2and3), 40 | but is provided [Via 41 | Mendeley](https://data.mendeley.com/v1/datasets/svw96g68dv/draft): 42 | Count\_matrices/Patient 1/Consensus\_PurestBenigns.csv. 43 | 44 | PurestBenigns_All <- read.csv("./Figure2_output/Patient 1/Consensus_PurestBenigns.csv") 45 | 46 | # Selecting Patient 1 All Cancer Annotations 47 | 48 | Next, we select all cancer annotations from all sections, and create an 49 | annotation file for all of the cancer bearing spots from patient 1. We 50 | bind this with the purest benigns, to create a list of all barcodes, 51 | reference set (benigns) and observation set (cancer spots) for analysis. 52 | 53 | H1_2_Annotations <- ImportHistologicalAnnotations("H1_2", "./Figure2_output/Patient 1/Visium_with_annotation/H1_2/H1_2_Final_Consensus_Annotations.csv") 54 | H1_2_CancerSpots <- filter(H1_2_Annotations, Histology == "GG1") 55 | 56 | H1_4_Annotations <- ImportHistologicalAnnotations("H1_4", "./Figure2_output/Patient 1/Visium_with_annotation/H1_4/H1_4_Final_Consensus_Annotations.csv") 57 | H1_4_CancerSpots <- filter(H1_4_Annotations, Histology == "GG2" | Histology == "GG4 Cribriform") 58 | 59 | H1_5_Annotations <- ImportHistologicalAnnotations("H1_5", "./Figure2_output/Patient 1/Visium_with_annotation/H1_5/H1_5_Final_Consensus_Annotations.csv") 60 | H1_5_CancerSpots <- filter(H1_5_Annotations, Histology == "GG4 Cribriform") 61 | 62 | H2_1_Annotations <- ImportHistologicalAnnotations("H2_1", "./Figure2_output/Patient 1/Visium_with_annotation/H2_1/H2_1_Final_Consensus_Annotations.csv") 63 | H2_1_CancerSpots <- filter(H2_1_Annotations, Histology == "GG2" | Histology == "GG4") 64 | 65 | H2_2_Annotations <- ImportHistologicalAnnotations("H2_2", "./Figure2_output/Patient 1/Visium_with_annotation/H2_2/H2_2_Final_Consensus_Annotations.csv") 66 | H2_2_CancerSpots <- filter(H2_2_Annotations, Histology == "GG2") 67 | 68 | H2_5_Annotations <- ImportHistologicalAnnotations("H2_5", "./Figure2_output/Patient 1/Visium_with_annotation/H2_5/H2_5_Final_Consensus_Annotations.csv") 69 | H2_5_CancerSpots <- filter(H2_5_Annotations, Histology == "GG4 Cribriform" | Histology == "Transition_State") 70 | 71 | rm(H1_2_Annotations, 72 | H1_4_Annotations, 73 | H1_5_Annotations, 74 | H2_1_Annotations, 75 | H2_2_Annotations, 76 | H2_5_Annotations) 77 | 78 | AllCancers <- rbind(H1_2_CancerSpots, H1_4_CancerSpots) 79 | AllCancers <- rbind(AllCancers, H1_5_CancerSpots) 80 | AllCancers <- rbind(AllCancers, H2_1_CancerSpots) 81 | AllCancers <- rbind(AllCancers, H2_2_CancerSpots) 82 | AllCancers <- rbind(AllCancers, H2_5_CancerSpots) 83 | 84 | names(AllCancers)[2] <- "Histology" 85 | 86 | rm(H1_2_CancerSpots, 87 | H1_4_CancerSpots, 88 | H1_5_CancerSpots, 89 | H2_1_CancerSpots, 90 | H2_2_CancerSpots, 91 | H2_5_CancerSpots) 92 | 93 | MergedAll <- rbind(PurestBenigns_All, AllCancers) 94 | 95 | rm(PurestBenigns_All) 96 | rm(AllCancers) 97 | 98 | # Merging Cancer and Benign annotations with the ENSMBLIDs 99 | 100 | Next, we select create count dataframes that include only spots to be 101 | included in the analysis (defined above), and that pass a QC threshold 102 | of >500 UMIs per spot. 103 | 104 | H2_1_ENSBMLID_Counts <- ImportCountData("H2_1", "./Figure2_output/Patient 1/Visium_with_annotation/H2_1/filtered_feature_bc_matrix.h5") 105 | H2_1_Joined_Counts <- MergingCountAndAnnotationData("H2_1",MergedAll, H2_1_ENSBMLID_Counts) 106 | rm(H2_1_ENSBMLID_Counts) 107 | Counts_joined <- H2_1_Joined_Counts 108 | rm(H2_1_Joined_Counts) 109 | 110 | H1_5_ENSBMLID_Counts <- ImportCountData("H1_5", "./Figure2_output/Patient 1/Visium_with_annotation/H1_5/filtered_feature_bc_matrix.h5") 111 | H1_5_Joined_Counts <- MergingCountAndAnnotationData("H1_5",MergedAll, H1_5_ENSBMLID_Counts) 112 | rm(H1_5_ENSBMLID_Counts) 113 | Counts_joined <- Counts_joined %>% full_join(H1_5_Joined_Counts, by = "Genes") 114 | rm(H1_5_Joined_Counts) 115 | 116 | H2_2_ENSBMLID_Counts <- ImportCountData("H2_2", "./Figure2_output/Patient 1/Visium_with_annotation/H2_2/filtered_feature_bc_matrix.h5") 117 | H2_2_Joined_Counts <- MergingCountAndAnnotationData("H2_2",MergedAll, H2_2_ENSBMLID_Counts) 118 | rm(H2_2_ENSBMLID_Counts) 119 | Counts_joined <- Counts_joined %>% full_join(H2_2_Joined_Counts, by = "Genes") 120 | rm(H2_2_Joined_Counts) 121 | 122 | H1_2_ENSBMLID_Counts <- ImportCountData("H1_2", "./Figure2_output/Patient 1/Visium_with_annotation/H1_2/filtered_feature_bc_matrix.h5") 123 | H1_2_Joined_Counts <- MergingCountAndAnnotationData("H1_2",MergedAll, H1_2_ENSBMLID_Counts) 124 | rm(H1_2_ENSBMLID_Counts) 125 | Counts_joined <- Counts_joined %>% full_join(H1_2_Joined_Counts, by = "Genes") 126 | rm(H1_2_Joined_Counts) 127 | 128 | H2_5_ENSBMLID_Counts <- ImportCountData("H2_5", "./Figure2_output/Patient 1/Visium_with_annotation/H2_5/filtered_feature_bc_matrix.h5") 129 | H2_5_Joined_Counts <- MergingCountAndAnnotationData("H2_5",MergedAll, H2_5_ENSBMLID_Counts) 130 | rm(H2_5_ENSBMLID_Counts) 131 | Counts_joined <- Counts_joined %>% full_join(H2_5_Joined_Counts, by = "Genes") 132 | rm(H2_5_Joined_Counts) 133 | 134 | H1_4_ENSBMLID_Counts <- ImportCountData("H1_4", "./Figure2_output/Patient 1/Visium_with_annotation/H1_4/filtered_feature_bc_matrix.h5") 135 | H1_4_Joined_Counts <- MergingCountAndAnnotationData("H1_4",MergedAll, H1_4_ENSBMLID_Counts) 136 | rm(H1_4_ENSBMLID_Counts) 137 | Counts_joined <- Counts_joined %>% full_join(H1_4_Joined_Counts, by = "Genes") 138 | rm(H1_4_Joined_Counts) 139 | 140 | V1_2_ENSBMLID_Counts <- ImportCountData("V1_2", "./Figure2_output/Patient 1/Visium_with_annotation/V1_2/filtered_feature_bc_matrix.h5") 141 | V1_2_Joined_Counts <- MergingCountAndAnnotationData("V1_2",MergedAll, V1_2_ENSBMLID_Counts) 142 | rm(V1_2_ENSBMLID_Counts) 143 | Counts_joined <- Counts_joined %>% full_join(V1_2_Joined_Counts, by = "Genes") 144 | rm(V1_2_Joined_Counts) 145 | 146 | # Joining all Counts 147 | 148 | Next, we replace NAs from the joined count dataframe with 0’s (required 149 | for inferCNV), and output the count and annotation .tsv files required 150 | for infercnv:run. 151 | 152 | Counts_joined <- Counts_joined %>% replace(., is.na(.), 0) 153 | Counts_joined <- Counts_joined %>% column_to_rownames(., var = "Genes") 154 | 155 | write.table(Counts_joined, "Organscale_Unsupervised_Consensus_AllCancer_Counts.tsv", sep = "\t") 156 | 157 | MergedAll_Final <- FinalAnnotations(MergedAll, Counts_joined) 158 | 159 | write.table(MergedAll_Final, "Organscale_Unsupervised_Consensus_AllCancer_Annotations.tsv", 160 | sep = "\t", 161 | quote = FALSE, 162 | col.names = FALSE, 163 | row.names = FALSE) 164 | 165 | # Creating the inferCNV object (prior to run) 166 | 167 | We then create the infercnv object and confirm that the above were run 168 | correctly. 169 | 170 | AllCancer_Unsupervised <- infercnv::CreateInfercnvObject(raw_counts_matrix="./Organscale_Unsupervised_Consensus_AllCancer_Counts.tsv", 171 | gene_order_file="./siCNV_GeneOrderFile.tsv", 172 | annotations_file="./Organscale_Unsupervised_Consensus_AllCancer_Annotations.tsv", 173 | delim="\t", 174 | ref_group_names="Purest Benigns", 175 | chr_exclude = c("chrM")) 176 | 177 | # Unsupervised Run - (Typically ran on cluster) 178 | 179 | We then run the analysis (typically ran on a high performance cluster). 180 | 181 | AllCancer_Unsupervised = infercnv::run(AllCancer_Unsupervised, 182 | cutoff=0.1, 183 | out_dir="./Figure2_output/Figure2_Step1/Outputs", 184 | cluster_by_groups=FALSE, 185 | num_threads = 20, 186 | denoise=TRUE, 187 | HMM=FALSE) 188 | 189 | The output infercnv.observations\_dendrogram.txt and 190 | infercnv.21\_denoised.png are used for the next step. 191 | 192 | ![infercnv.21\_denoised.png 193 | output](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%202/Step1/infercnv.21_denoised.png). 194 | -------------------------------------------------------------------------------- /FigureScripts/Figure 2/Step1/infercnv.21_denoised.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 2/Step1/infercnv.21_denoised.png -------------------------------------------------------------------------------- /FigureScripts/Figure 2/Step2/Consensus_AllCancer_forclustering_phylo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 2/Step2/Consensus_AllCancer_forclustering_phylo.png -------------------------------------------------------------------------------- /FigureScripts/Figure 2/Step2/Fig2_Step2_ManualClustering.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Fig2_Step2_ManualClustering" 3 | author: "Andrew Erickson" 4 | output: md_document 5 | --- 6 | 7 | # Setup 8 | 9 | Initializing libraries. 10 | 11 | ```{r setup, messages=FALSE} 12 | library(SpatialInferCNV) 13 | library(phylogram) 14 | library(ape) 15 | library(tidyverse) 16 | ``` 17 | 18 | #Importing dendrogram 19 | 20 | Importing the dendogram file created in step 1. 21 | 22 | ```{r, eval = FALSE} 23 | Consensus_AllCancer_for_clustering <- read.dendrogram(file="./Figure2_output/Figure2_Step1/Outputs/infercnv.observations_dendrogram.txt") 24 | 25 | Consensus_AllCancer_for_clustering_phylo <- as.phylo(Consensus_AllCancer_for_clustering) 26 | ``` 27 | 28 | # Visualizing Tree 29 | 30 | Next, we use the dendrogram file to visualize the dendrogram itself. 31 | 32 | ```{r, eval = FALSE} 33 | my.subtrees = subtrees(Consensus_AllCancer_for_clustering_phylo) # subtrees() to subset 34 | 35 | png("Consensus_AllCancer_forclustering_phylo.png",width=10000,height=2500, res = 300) 36 | plot(Consensus_AllCancer_for_clustering_phylo,show.tip.label = FALSE) 37 | nodelabels(text=1:Consensus_AllCancer_for_clustering_phylo$Nnode,node=1:Consensus_AllCancer_for_clustering_phylo$Nnode+Ntip(Consensus_AllCancer_for_clustering_phylo)) 38 | dev.off() 39 | ``` 40 | 41 | Here is the output image: 42 | 43 | ![example Consensus_AllCancer_forclustering_phylo.png for section H2_5](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%202/Step2/Consensus_AllCancer_forclustering_phylo.png). 44 | 45 | # Manual Clone Selection 46 | 47 | Comparison of the output image and the denoised image (through use of an image viewer), allows for selection of groups of spots with shared CNVs. Note the "nodes" from the visualized dendrogram, allowing for supervised selection of clones. 48 | 49 | ```{r, eval = FALSE} 50 | #Clone J - Node 4617 51 | #Clone I - Node 4446 52 | #Clone I - Node 3617 53 | #Clone F - Node 2934 54 | #Clone E - Node 2893 55 | #Clone E - Node 2832 56 | #Clone E - Node 2769 57 | #Clone H - Node 3114 58 | #Clone B - Node 2991 59 | #Clone G - Node 2525 60 | #Clone C - Node 2284 61 | #Clone D - Node 2078 62 | #Clone K - Node 92 63 | #Clone A - Node 3 64 | 65 | Node4617 <- SelectingSubTreeData(my.subtrees, 4617) 66 | Node4446 <- SelectingSubTreeData(my.subtrees, 4446) 67 | Node3617 <- SelectingSubTreeData(my.subtrees, 3617) 68 | Node2934 <- SelectingSubTreeData(my.subtrees, 2934) 69 | Node2893 <- SelectingSubTreeData(my.subtrees, 2893) 70 | Node2832 <- SelectingSubTreeData(my.subtrees, 2832) 71 | Node2769 <- SelectingSubTreeData(my.subtrees, 2769) 72 | Node3114 <- SelectingSubTreeData(my.subtrees, 3114) 73 | Node2991 <- SelectingSubTreeData(my.subtrees, 2991) 74 | Node2525 <- SelectingSubTreeData(my.subtrees, 2525) 75 | Node2284 <- SelectingSubTreeData(my.subtrees, 2284) 76 | Node2078 <- SelectingSubTreeData(my.subtrees, 2078) 77 | Node92 <- SelectingSubTreeData(my.subtrees, 92) 78 | Node3 <- SelectingSubTreeData(my.subtrees, 3) 79 | 80 | Merged <- rbind(Node4617, Node4446) 81 | Merged <- rbind(Merged, Node3617) 82 | Merged <- rbind(Merged, Node2934) 83 | Merged <- rbind(Merged, Node2893) 84 | Merged <- rbind(Merged, Node2832) 85 | Merged <- rbind(Merged, Node2769) 86 | Merged <- rbind(Merged, Node3114) 87 | Merged <- rbind(Merged, Node2991) 88 | Merged <- rbind(Merged, Node2525) 89 | Merged <- rbind(Merged, Node2284) 90 | Merged <- rbind(Merged, Node2078) 91 | Merged <- rbind(Merged, Node92) 92 | Merged <- rbind(Merged, Node3) 93 | 94 | table(Merged$Node) 95 | 96 | Merged$Node <- ifelse(Merged$Node == "Node_4617" , "Clone_J", 97 | ifelse(Merged$Node == "Node_4446" , "Clone_I", 98 | ifelse(Merged$Node == "Node_3617" , "Clone_I", 99 | ifelse(Merged$Node == "Node_2934" , "Clone_F", 100 | ifelse(Merged$Node == "Node_2893" , "Clone_E", 101 | ifelse(Merged$Node == "Node_2832" , "Clone_E", 102 | ifelse(Merged$Node == "Node_2769" , "Clone_E", 103 | ifelse(Merged$Node == "Node_3114" , "Clone_H", 104 | ifelse(Merged$Node == "Node_2991" , "Clone_B", 105 | ifelse(Merged$Node == "Node_2525" , "Clone_G", 106 | ifelse(Merged$Node == "Node_2284" , "Clone_C", 107 | ifelse(Merged$Node == "Node_2078" , "Clone_D", 108 | ifelse(Merged$Node == "Node_92" , "Clone_K", 109 | ifelse(Merged$Node == "Node_3" , "Clone_A", Merged$Node)))))))))))))) 110 | 111 | write.csv(Merged, "Fig2_forclustering.csv", row.names = FALSE) 112 | ``` 113 | 114 | This Fig2_forclustering.csv file is used in [Step 3](https://github.com/aerickso/SpatialInferCNV/tree/main/FigureScripts/Figure%202/Step3). 115 | 116 | # Outputting .CSV files for LoupeBrowser visualization. 117 | 118 | LoupeBrowser files are available from the authors upon request: andrew.erickson@nds.ox.ac.uk, or joakim.lundenberg@scilifelab.se. However, we provide the [high resolution input files](https://data.mendeley.com/v1/datasets/svw96g68dv/draft?a=3f263217-2bd3-4a3c-8125-8c517c3a9e29) : Histological_images/Patient 1/Visium and FASTQ files (EGA link pending) to run [SpaceRanger](https://support.10xgenomics.com/spatial-gene-expression/software/pipelines/latest/output/overview) to output the LoupeBrowser files. 119 | 120 | ```{r, eval = FALSE} 121 | H1_5_Merged <- Merged 122 | H1_5_Merged <- H1_5_Merged %>% mutate(section = substr(Barcode, 1, 4)) 123 | H1_5_Merged$Barcode <- trimws(substr(H1_5_Merged$Barcode, 6, 100)) 124 | H1_5_Merged$Barcode <- gsub("\\.", "\\-", H1_5_Merged$Barcode) 125 | H1_5_Clones_ForLoupeBrowser <- filter(H1_5_Merged, section == "H1_5") %>% 126 | select(Barcode, Node) 127 | write.csv(H1_5_Clones_ForLoupeBrowser, "Fig2e_H1_5_Clones_ForLoupeBrowser.csv", row.names = FALSE) 128 | 129 | H2_5_Merged <- Merged 130 | H2_5_Merged <- H2_5_Merged %>% mutate(section = substr(Barcode, 1, 4)) 131 | H2_5_Merged$Barcode <- trimws(substr(H2_5_Merged$Barcode, 6, 100)) 132 | H2_5_Merged$Barcode <- gsub("\\.", "\\-", H2_5_Merged$Barcode) 133 | H2_5_Clones_ForLoupeBrowser <- filter(H2_5_Merged, section == "H2_5") %>% 134 | select(Barcode, Node) 135 | write.csv(H2_5_Clones_ForLoupeBrowser, "Fig2e_H2_5_Clones_ForLoupeBrowser.csv", row.names = FALSE) 136 | 137 | H1_4_Merged <- Merged 138 | H1_4_Merged <- H1_4_Merged %>% mutate(section = substr(Barcode, 1, 4)) 139 | H1_4_Merged$Barcode <- trimws(substr(H1_4_Merged$Barcode, 6, 100)) 140 | H1_4_Merged$Barcode <- gsub("\\.", "\\-", H1_4_Merged$Barcode) 141 | H1_4_Clones_ForLoupeBrowser <- filter(H1_4_Merged, section == "H1_4") %>% 142 | select(Barcode, Node) 143 | write.csv(H1_4_Clones_ForLoupeBrowser, "Fig2e_H1_4_Clones_ForLoupeBrowser.csv", row.names = FALSE) 144 | 145 | H1_2_Merged <- Merged 146 | H1_2_Merged <- H1_2_Merged %>% mutate(section = substr(Barcode, 1, 4)) 147 | H1_2_Merged$Barcode <- trimws(substr(H1_2_Merged$Barcode, 6, 100)) 148 | H1_2_Merged$Barcode <- gsub("\\.", "\\-", H1_2_Merged$Barcode) 149 | H1_2_Clones_ForLoupeBrowser <- filter(H1_2_Merged, section == "H1_2") %>% 150 | select(Barcode, Node) 151 | write.csv(H1_2_Clones_ForLoupeBrowser, "Fig2e_H1_2_Clones_ForLoupeBrowser.csv", row.names = FALSE) 152 | 153 | H2_1_Merged <- Merged 154 | H2_1_Merged <- H2_1_Merged %>% mutate(section = substr(Barcode, 1, 4)) 155 | H2_1_Merged$Barcode <- trimws(substr(H2_1_Merged$Barcode, 6, 100)) 156 | H2_1_Merged$Barcode <- gsub("\\.", "\\-", H2_1_Merged$Barcode) 157 | H2_1_Clones_ForLoupeBrowser <- filter(H2_1_Merged, section == "H2_1") %>% 158 | select(Barcode, Node) 159 | write.csv(H2_1_Clones_ForLoupeBrowser, "Fig2e_H2_1_Clones_ForLoupeBrowser.csv", row.names = FALSE) 160 | 161 | H2_2_Merged <- Merged 162 | H2_2_Merged <- H2_2_Merged %>% mutate(section = substr(Barcode, 1, 4)) 163 | H2_2_Merged$Barcode <- trimws(substr(H2_2_Merged$Barcode, 6, 100)) 164 | H2_2_Merged$Barcode <- gsub("\\.", "\\-", H2_2_Merged$Barcode) 165 | H2_2_Clones_ForLoupeBrowser <- filter(H2_2_Merged, section == "H2_2") %>% 166 | select(Barcode, Node) 167 | write.csv(H2_2_Clones_ForLoupeBrowser, "Fig2e_H2_2_Clones_ForLoupeBrowser.csv", row.names = FALSE) 168 | ``` 169 | 170 | 171 | -------------------------------------------------------------------------------- /FigureScripts/Figure 2/Step2/Fig2_Step2_ManualClustering.md: -------------------------------------------------------------------------------- 1 | # Setup 2 | 3 | Initializing libraries. 4 | 5 | library(SpatialInferCNV) 6 | 7 | ## Warning: replacing previous import 'phylogram::as.phylo' by 'ape::as.phylo' when 8 | ## loading 'SpatialInferCNV' 9 | 10 | library(phylogram) 11 | library(ape) 12 | 13 | ## 14 | ## Attaching package: 'ape' 15 | 16 | ## The following object is masked from 'package:phylogram': 17 | ## 18 | ## as.phylo 19 | 20 | library(tidyverse) 21 | 22 | ## Registered S3 method overwritten by 'cli': 23 | ## method from 24 | ## print.boxx spatstat.geom 25 | 26 | ## -- Attaching packages --------------------------------------- tidyverse 1.3.1 -- 27 | 28 | ## v ggplot2 3.3.5 v purrr 0.3.4 29 | ## v tibble 3.1.1 v dplyr 1.0.6 30 | ## v tidyr 1.1.3 v stringr 1.4.0 31 | ## v readr 2.0.1 v forcats 0.5.1 32 | 33 | ## -- Conflicts ------------------------------------------ tidyverse_conflicts() -- 34 | ## x dplyr::filter() masks stats::filter() 35 | ## x dplyr::lag() masks stats::lag() 36 | 37 | \#Importing dendrogram 38 | 39 | Importing the dendogram file created in step 1. 40 | 41 | Consensus_AllCancer_for_clustering <- read.dendrogram(file="./Figure2_output/Figure2_Step1/Outputs/infercnv.observations_dendrogram.txt") 42 | 43 | Consensus_AllCancer_for_clustering_phylo <- as.phylo(Consensus_AllCancer_for_clustering) 44 | 45 | # Visualizing Tree 46 | 47 | Next, we use the dendrogram file to visualize the dendrogram itself. 48 | 49 | my.subtrees = subtrees(Consensus_AllCancer_for_clustering_phylo) # subtrees() to subset 50 | 51 | png("Consensus_AllCancer_forclustering_phylo.png",width=10000,height=2500, res = 300) 52 | plot(Consensus_AllCancer_for_clustering_phylo,show.tip.label = FALSE) 53 | nodelabels(text=1:Consensus_AllCancer_for_clustering_phylo$Nnode,node=1:Consensus_AllCancer_for_clustering_phylo$Nnode+Ntip(Consensus_AllCancer_for_clustering_phylo)) 54 | dev.off() 55 | 56 | Here is the output image: 57 | 58 | ![example Consensus\_AllCancer\_forclustering\_phylo.png for section 59 | H2\_5](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%202/Step2/Consensus_AllCancer_forclustering_phylo.png). 60 | 61 | # Manual Clone Selection 62 | 63 | Comparison of the output image and the denoised image (through use of an 64 | image viewer), allows for selection of groups of spots with shared CNVs. 65 | Note the “nodes” from the visualized dendrogram, allowing for supervised 66 | selection of clones. 67 | 68 | #Clone J - Node 4617 69 | #Clone I - Node 4446 70 | #Clone I - Node 3617 71 | #Clone F - Node 2934 72 | #Clone E - Node 2893 73 | #Clone E - Node 2832 74 | #Clone E - Node 2769 75 | #Clone H - Node 3114 76 | #Clone B - Node 2991 77 | #Clone G - Node 2525 78 | #Clone C - Node 2284 79 | #Clone D - Node 2078 80 | #Clone K - Node 92 81 | #Clone A - Node 3 82 | 83 | Node4617 <- SelectingSubTreeData(my.subtrees, 4617) 84 | Node4446 <- SelectingSubTreeData(my.subtrees, 4446) 85 | Node3617 <- SelectingSubTreeData(my.subtrees, 3617) 86 | Node2934 <- SelectingSubTreeData(my.subtrees, 2934) 87 | Node2893 <- SelectingSubTreeData(my.subtrees, 2893) 88 | Node2832 <- SelectingSubTreeData(my.subtrees, 2832) 89 | Node2769 <- SelectingSubTreeData(my.subtrees, 2769) 90 | Node3114 <- SelectingSubTreeData(my.subtrees, 3114) 91 | Node2991 <- SelectingSubTreeData(my.subtrees, 2991) 92 | Node2525 <- SelectingSubTreeData(my.subtrees, 2525) 93 | Node2284 <- SelectingSubTreeData(my.subtrees, 2284) 94 | Node2078 <- SelectingSubTreeData(my.subtrees, 2078) 95 | Node92 <- SelectingSubTreeData(my.subtrees, 92) 96 | Node3 <- SelectingSubTreeData(my.subtrees, 3) 97 | 98 | Merged <- rbind(Node4617, Node4446) 99 | Merged <- rbind(Merged, Node3617) 100 | Merged <- rbind(Merged, Node2934) 101 | Merged <- rbind(Merged, Node2893) 102 | Merged <- rbind(Merged, Node2832) 103 | Merged <- rbind(Merged, Node2769) 104 | Merged <- rbind(Merged, Node3114) 105 | Merged <- rbind(Merged, Node2991) 106 | Merged <- rbind(Merged, Node2525) 107 | Merged <- rbind(Merged, Node2284) 108 | Merged <- rbind(Merged, Node2078) 109 | Merged <- rbind(Merged, Node92) 110 | Merged <- rbind(Merged, Node3) 111 | 112 | table(Merged$Node) 113 | 114 | Merged$Node <- ifelse(Merged$Node == "Node_4617" , "Clone_J", 115 | ifelse(Merged$Node == "Node_4446" , "Clone_I", 116 | ifelse(Merged$Node == "Node_3617" , "Clone_I", 117 | ifelse(Merged$Node == "Node_2934" , "Clone_F", 118 | ifelse(Merged$Node == "Node_2893" , "Clone_E", 119 | ifelse(Merged$Node == "Node_2832" , "Clone_E", 120 | ifelse(Merged$Node == "Node_2769" , "Clone_E", 121 | ifelse(Merged$Node == "Node_3114" , "Clone_H", 122 | ifelse(Merged$Node == "Node_2991" , "Clone_B", 123 | ifelse(Merged$Node == "Node_2525" , "Clone_G", 124 | ifelse(Merged$Node == "Node_2284" , "Clone_C", 125 | ifelse(Merged$Node == "Node_2078" , "Clone_D", 126 | ifelse(Merged$Node == "Node_92" , "Clone_K", 127 | ifelse(Merged$Node == "Node_3" , "Clone_A", Merged$Node)))))))))))))) 128 | 129 | write.csv(Merged, "Fig2_forclustering.csv", row.names = FALSE) 130 | 131 | This Fig2\_forclustering.csv file is used in [Step 132 | 3](https://github.com/aerickso/SpatialInferCNV/tree/main/FigureScripts/Figure%202/Step3). 133 | 134 | # Outputting .CSV files for LoupeBrowser visualization. 135 | 136 | LoupeBrowser files are available from the authors upon request: 137 | , or . 138 | However, we provide the [high resolution input 139 | files](https://data.mendeley.com/v1/datasets/svw96g68dv/draft?a=3f263217-2bd3-4a3c-8125-8c517c3a9e29) 140 | : Histological\_images/Patient 1/Visium and FASTQ files (EGA link 141 | pending) to run 142 | [SpaceRanger](https://support.10xgenomics.com/spatial-gene-expression/software/pipelines/latest/output/overview) 143 | to output the LoupeBrowser files. 144 | 145 | H1_5_Merged <- Merged 146 | H1_5_Merged <- H1_5_Merged %>% mutate(section = substr(Barcode, 1, 4)) 147 | H1_5_Merged$Barcode <- trimws(substr(H1_5_Merged$Barcode, 6, 100)) 148 | H1_5_Merged$Barcode <- gsub("\\.", "\\-", H1_5_Merged$Barcode) 149 | H1_5_Clones_ForLoupeBrowser <- filter(H1_5_Merged, section == "H1_5") %>% 150 | select(Barcode, Node) 151 | write.csv(H1_5_Clones_ForLoupeBrowser, "Fig2e_H1_5_Clones_ForLoupeBrowser.csv", row.names = FALSE) 152 | 153 | H2_5_Merged <- Merged 154 | H2_5_Merged <- H2_5_Merged %>% mutate(section = substr(Barcode, 1, 4)) 155 | H2_5_Merged$Barcode <- trimws(substr(H2_5_Merged$Barcode, 6, 100)) 156 | H2_5_Merged$Barcode <- gsub("\\.", "\\-", H2_5_Merged$Barcode) 157 | H2_5_Clones_ForLoupeBrowser <- filter(H2_5_Merged, section == "H2_5") %>% 158 | select(Barcode, Node) 159 | write.csv(H2_5_Clones_ForLoupeBrowser, "Fig2e_H2_5_Clones_ForLoupeBrowser.csv", row.names = FALSE) 160 | 161 | H1_4_Merged <- Merged 162 | H1_4_Merged <- H1_4_Merged %>% mutate(section = substr(Barcode, 1, 4)) 163 | H1_4_Merged$Barcode <- trimws(substr(H1_4_Merged$Barcode, 6, 100)) 164 | H1_4_Merged$Barcode <- gsub("\\.", "\\-", H1_4_Merged$Barcode) 165 | H1_4_Clones_ForLoupeBrowser <- filter(H1_4_Merged, section == "H1_4") %>% 166 | select(Barcode, Node) 167 | write.csv(H1_4_Clones_ForLoupeBrowser, "Fig2e_H1_4_Clones_ForLoupeBrowser.csv", row.names = FALSE) 168 | 169 | H1_2_Merged <- Merged 170 | H1_2_Merged <- H1_2_Merged %>% mutate(section = substr(Barcode, 1, 4)) 171 | H1_2_Merged$Barcode <- trimws(substr(H1_2_Merged$Barcode, 6, 100)) 172 | H1_2_Merged$Barcode <- gsub("\\.", "\\-", H1_2_Merged$Barcode) 173 | H1_2_Clones_ForLoupeBrowser <- filter(H1_2_Merged, section == "H1_2") %>% 174 | select(Barcode, Node) 175 | write.csv(H1_2_Clones_ForLoupeBrowser, "Fig2e_H1_2_Clones_ForLoupeBrowser.csv", row.names = FALSE) 176 | 177 | H2_1_Merged <- Merged 178 | H2_1_Merged <- H2_1_Merged %>% mutate(section = substr(Barcode, 1, 4)) 179 | H2_1_Merged$Barcode <- trimws(substr(H2_1_Merged$Barcode, 6, 100)) 180 | H2_1_Merged$Barcode <- gsub("\\.", "\\-", H2_1_Merged$Barcode) 181 | H2_1_Clones_ForLoupeBrowser <- filter(H2_1_Merged, section == "H2_1") %>% 182 | select(Barcode, Node) 183 | write.csv(H2_1_Clones_ForLoupeBrowser, "Fig2e_H2_1_Clones_ForLoupeBrowser.csv", row.names = FALSE) 184 | 185 | H2_2_Merged <- Merged 186 | H2_2_Merged <- H2_2_Merged %>% mutate(section = substr(Barcode, 1, 4)) 187 | H2_2_Merged$Barcode <- trimws(substr(H2_2_Merged$Barcode, 6, 100)) 188 | H2_2_Merged$Barcode <- gsub("\\.", "\\-", H2_2_Merged$Barcode) 189 | H2_2_Clones_ForLoupeBrowser <- filter(H2_2_Merged, section == "H2_2") %>% 190 | select(Barcode, Node) 191 | write.csv(H2_2_Clones_ForLoupeBrowser, "Fig2e_H2_2_Clones_ForLoupeBrowser.csv", row.names = FALSE) 192 | -------------------------------------------------------------------------------- /FigureScripts/Figure 2/Step3/Fig2_Step3_ClusteredPlot_and_HMM.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Fig2_Step3_ClusteredPlot_and_HMM" 3 | author: "Andrew Erickson" 4 | output: md_document 5 | --- 6 | 7 | # Setup 8 | 9 | ```{r, messages=FALSE} 10 | library(tidyverse) 11 | library(infercnv) 12 | library(Seurat) 13 | library(hdf5r) 14 | library(SpatialInferCNV) 15 | ``` 16 | 17 | # Pre-processing clustered data 18 | 19 | Importing previously downloaded Consensus_PurestBenigns.csv (step 1), and the Fig2_forclustering.csv file created in step 2. We use this to create an updated annotation file for infercnv::run. 20 | 21 | ```{r, eval = FALSE} 22 | PurestBenigns_All <- read.csv("./Figure2_output/Patient 1/Consensus_PurestBenigns.csv") 23 | PurestBenigns_All$Histology <- "Purest Benigns" 24 | 25 | CorrectedBenigns_Consensus_AllCancer_ManualNodes_selected <- read.csv("./Mendeley/ProcessedFilesForFigures/Figure2/Step3/Inputs/Fig2_forclustering.csv") 26 | names(CorrectedBenigns_Consensus_AllCancer_ManualNodes_selected)[2] <- "Histology" 27 | 28 | Fig2a_ManualClusters <- rbind(CorrectedBenigns_Consensus_AllCancer_ManualNodes_selected, PurestBenigns_All) 29 | 30 | write.table(Fig2a_ManualClusters, "Fig2_ManualClusters_for_ClusteredPlot_and_HMM.tsv", 31 | sep = "\t", 32 | quote = FALSE, 33 | col.names = FALSE, 34 | row.names = FALSE) 35 | ``` 36 | 37 | # Creating the inferCNV object (prior to run) 38 | 39 | Now creating the object for the supervised clustered run. 40 | 41 | ```{r, eval = FALSE} 42 | AllCancer_clustered <- infercnv::CreateInfercnvObject(raw_counts_matrix="./Organscale_Unsupervised_Consensus_AllCancer_Counts.tsv", 43 | gene_order_file="./siCNV_GeneOrderFile.tsv", 44 | annotations_file="./Fig2_ManualClusters_for_ClusteredPlot_and_HMM.tsv", 45 | delim="\t", 46 | ref_group_names="Purest Benigns", 47 | chr_exclude = c("chrM")) 48 | ``` 49 | 50 | # Unsupervised Run - (Typically ran on cluster) 51 | 52 | Now creating the object for the supervised clustered run. Note: this is typically run 53 | 54 | ```{r, eval = FALSE} 55 | AllCancer_clustered = infercnv::run(AllCancer_clustered, 56 | cutoff=0.1, 57 | out_dir="./Figure2_output/Figure2_step3/Outputs", 58 | num_threads = 20, 59 | cluster_by_groups=TRUE, 60 | denoise=TRUE, 61 | HMM=TRUE) 62 | ``` 63 | 64 | And here is the final output file infercnv.21_denoised.png (order rearranged in the manuscript figure 2). 65 | 66 | ![infercnv.21_denoised.png output](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%202/Step3/infercnv.21_denoised.png) 67 | 68 | -------------------------------------------------------------------------------- /FigureScripts/Figure 2/Step3/Fig2_Step3_ClusteredPlot_and_HMM.md: -------------------------------------------------------------------------------- 1 | # Setup 2 | 3 | library(tidyverse) 4 | 5 | ## -- Attaching packages --------------------------------------- tidyverse 1.3.1 -- 6 | 7 | ## v ggplot2 3.3.5 v purrr 0.3.4 8 | ## v tibble 3.1.1 v dplyr 1.0.6 9 | ## v tidyr 1.1.3 v stringr 1.4.0 10 | ## v readr 2.0.1 v forcats 0.5.1 11 | 12 | ## -- Conflicts ------------------------------------------ tidyverse_conflicts() -- 13 | ## x dplyr::filter() masks stats::filter() 14 | ## x dplyr::lag() masks stats::lag() 15 | 16 | library(infercnv) 17 | library(Seurat) 18 | 19 | ## Registered S3 method overwritten by 'spatstat.geom': 20 | ## method from 21 | ## print.boxx cli 22 | 23 | ## Attaching SeuratObject 24 | 25 | library(hdf5r) 26 | 27 | ## 28 | ## Attaching package: 'hdf5r' 29 | 30 | ## The following object is masked from 'package:purrr': 31 | ## 32 | ## flatten_df 33 | 34 | library(SpatialInferCNV) 35 | 36 | ## Warning: replacing previous import 'phylogram::as.phylo' by 'ape::as.phylo' when 37 | ## loading 'SpatialInferCNV' 38 | 39 | # Pre-processing clustered data 40 | 41 | Importing previously downloaded Consensus\_PurestBenigns.csv (step 1), 42 | and the Fig2\_forclustering.csv file created in step 2. We use this to 43 | create an updated annotation file for infercnv::run. 44 | 45 | PurestBenigns_All <- read.csv("./Figure2_output/Patient 1/Consensus_PurestBenigns.csv") 46 | PurestBenigns_All$Histology <- "Purest Benigns" 47 | 48 | CorrectedBenigns_Consensus_AllCancer_ManualNodes_selected <- read.csv("./Mendeley/ProcessedFilesForFigures/Figure2/Step3/Inputs/Fig2_forclustering.csv") 49 | names(CorrectedBenigns_Consensus_AllCancer_ManualNodes_selected)[2] <- "Histology" 50 | 51 | Fig2a_ManualClusters <- rbind(CorrectedBenigns_Consensus_AllCancer_ManualNodes_selected, PurestBenigns_All) 52 | 53 | write.table(Fig2a_ManualClusters, "Fig2_ManualClusters_for_ClusteredPlot_and_HMM.tsv", 54 | sep = "\t", 55 | quote = FALSE, 56 | col.names = FALSE, 57 | row.names = FALSE) 58 | 59 | # Creating the inferCNV object (prior to run) 60 | 61 | Now creating the object for the supervised clustered run. 62 | 63 | AllCancer_clustered <- infercnv::CreateInfercnvObject(raw_counts_matrix="./Organscale_Unsupervised_Consensus_AllCancer_Counts.tsv", 64 | gene_order_file="./siCNV_GeneOrderFile.tsv", 65 | annotations_file="./Fig2_ManualClusters_for_ClusteredPlot_and_HMM.tsv", 66 | delim="\t", 67 | ref_group_names="Purest Benigns", 68 | chr_exclude = c("chrM")) 69 | 70 | # Unsupervised Run - (Typically ran on cluster) 71 | 72 | Now creating the object for the supervised clustered run. Note: this is 73 | typically run 74 | 75 | AllCancer_clustered = infercnv::run(AllCancer_clustered, 76 | cutoff=0.1, 77 | out_dir="./Figure2_output/Figure2_step3/Outputs", 78 | num_threads = 20, 79 | cluster_by_groups=TRUE, 80 | denoise=TRUE, 81 | HMM=TRUE) 82 | 83 | And here is the final output file infercnv.21\_denoised.png (order 84 | rearranged in the manuscript figure 2). 85 | 86 | ![infercnv.21\_denoised.png 87 | output](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%202/Step3/infercnv.21_denoised.png) 88 | -------------------------------------------------------------------------------- /FigureScripts/Figure 2/Step3/infercnv.21_denoised.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 2/Step3/infercnv.21_denoised.png -------------------------------------------------------------------------------- /FigureScripts/Figure 3/Consensus_H2_1_forclustering_phylo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 3/Consensus_H2_1_forclustering_phylo.png -------------------------------------------------------------------------------- /FigureScripts/Figure 3/LoupeBrowser_Vis.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 3/LoupeBrowser_Vis.gif -------------------------------------------------------------------------------- /FigureScripts/Figure 3/NodeSelectionDendrogram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 3/NodeSelectionDendrogram.png -------------------------------------------------------------------------------- /FigureScripts/Figure 3/NodeSelectionFromDenoised.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 3/NodeSelectionFromDenoised.png -------------------------------------------------------------------------------- /FigureScripts/Figure 3/infercnv.21_denoised_supervised.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 3/infercnv.21_denoised_supervised.png -------------------------------------------------------------------------------- /FigureScripts/Figure 3/infercnv.21_denoised_unsupervised.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 3/infercnv.21_denoised_unsupervised.png -------------------------------------------------------------------------------- /FigureScripts/Figure 4/Figure4a_LN/Figure4a_LNHeatmap.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: Fig4a_LNHeatmap 3 | Data with InferCNV 4 | author: "Andrew Erickson, Nuffield Department of Surgical Sciences, Unviersity of Oxford" 5 | output: 6 | md_document: 7 | variant: markdown_github 8 | --- 9 | 10 | # Setup 11 | 12 | ```{r setup, message=FALSE} 13 | library(tidyverse) 14 | library(SpatialInferCNV) 15 | ``` 16 | 17 | # Importing Count Data 18 | 19 | Just to make things a little easier for the user, we start by creating an empty working directory so that all downloaded files are organized in one place. 20 | 21 | ```{r, eval = FALSE} 22 | dir.create("siCNV_Figure4a") 23 | setwd("siCNV_Figure4a") 24 | ``` 25 | 26 | This code chunk imports the .h5 files a default processed output from [10x Genomics cell ranger pipeline documentation](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/output/molecule_info), and appends a section label to the barcode. 27 | 28 | We use the function ImportCountData(), which requires a section label, and a path to the corresponding .h5 file. 29 | 30 | ```{r, eval = FALSE} 31 | download.file("https://cf.10xgenomics.com/samples/spatial-exp/1.1.0/V1_Human_Lymph_Node/V1_Human_Lymph_Node_filtered_feature_bc_matrix.h5", "./V1_Human_Lymph_Node_filtered_feature_bc_matrix.h5", mode = "wb") 32 | 33 | Lymph_ENSBMLID_Counts <- ImportCountData("LN10X", "./V1_Human_Lymph_Node_filtered_feature_bc_matrix.h5") 34 | 35 | Lymph_Annotations <- Lymph_ENSBMLID_Counts %>% 36 | select(Barcode) 37 | Lymph_Annotations$Histology <- "Lymph" 38 | ``` 39 | 40 | # QC, and Merging Count and Annotation Data 41 | 42 | We then join the annotations with the count data, to select only spots that are to be analyzed. This step also includes a bare minimum QC thresholding step of allowing only Visium spots with 500 counts or more to be included in the analysis 43 | 44 | ```{r, eval = FALSE} 45 | Lymph_Joined_Counts <- MergingCountAndAnnotationData("LN10X",Lymph_Annotations, Lymph_ENSBMLID_Counts) 46 | 47 | rm(Lymph_ENSBMLID_Counts) 48 | ``` 49 | 50 | # Merging all count data into one object 51 | 52 | We then join all of the selected count data together into a final dataframe, which is then output as a .tsv file (1 of 3 required inputs for inferCNV), as well as a revised annotation file (takes into account only those of interest that remain after the QC step) as another .tsv file (2 of 3 required inputs from inferCNV) 53 | 54 | ```{r, eval = FALSE} 55 | Counts_joined <- Lymph_Joined_Counts 56 | 57 | Counts_joined <- Counts_joined %>% replace(., is.na(.), 0) 58 | Counts_joined <- Counts_joined %>% column_to_rownames(., var = "Genes") 59 | 60 | write.table(Counts_joined, "10xLymph_Counts.tsv", sep = "\t") 61 | 62 | LymphFinalAnnotations <- FinalAnnotations(Lymph_Annotations, Counts_joined) 63 | 64 | write.table(LymphFinalAnnotations, "10xLymph_Annotations.tsv", 65 | sep = "\t", 66 | quote = FALSE, 67 | col.names = FALSE, 68 | row.names = FALSE) 69 | ``` 70 | 71 | # Confirming that the files are formatted correctly to create an inferCNV object 72 | 73 | This code then creates an inferCNV object from the 2 previously created files, as well as from a gene position file, which maps ENSMBLIDs to genomic loci. This file has been provided at /SpatialInferCNV_Dev/SpatialInferCNV/FigureScripts/Figure 4/Figure4a_LN, but if you'd like to build one youself, please see [the InferCNV documentation on their wiki](https://github.com/broadinstitute/inferCNV/wiki/instructions-create-genome-position-file). 74 | 75 | ```{r, eval = FALSE} 76 | Lymph10X_unsupervised <- infercnv::CreateInfercnvObject(raw_counts_matrix="10xLymph_Counts.tsv", 77 | gene_order_file="./siCNV_GeneOrderFile.tsv", 78 | annotations_file="10xLymph_Annotations.tsv", 79 | delim="\t", 80 | ref_group_names=NULL, 81 | chr_exclude = c("chrM")) 82 | ``` 83 | 84 | # Running InferCNV (Unsupervised) 85 | 86 | If the above steps were performed correctly, then there should be no errors from the previous step. 87 | 88 | Warning! This step is quite computationally intensive. Consider using a high performance cluster for timely analyses instead of running it on a local computer. 89 | 90 | ```{r, eval = FALSE} 91 | Lymph10X_unsupervised = infercnv::run(Lymph10X_unsupervised, 92 | cutoff=0.1, 93 | out_dir="./Fig4_LN/Outputs/", 94 | num_threads = 20, 95 | cluster_by_groups=FALSE, 96 | denoise=TRUE, 97 | HMM=FALSE) 98 | ``` 99 | 100 | # Final output 101 | 102 | The final desired output is the infercnv.21_denoised.png file. 103 | 104 | ![Output Image](infercnv.21_denoised.png) 105 | 106 | -------------------------------------------------------------------------------- /FigureScripts/Figure 4/Figure4a_LN/Figure4a_LNHeatmap.md: -------------------------------------------------------------------------------- 1 | # Setup 2 | 3 | ``` r 4 | library(tidyverse) 5 | library(SpatialInferCNV) 6 | ``` 7 | 8 | ## Warning: replacing previous import 'phylogram::as.phylo' by 'ape::as.phylo' when 9 | ## loading 'SpatialInferCNV' 10 | 11 | # Importing Count Data 12 | 13 | Just to make things a little easier for the user, we start by creating 14 | an empty working directory so that all downloaded files are organized in 15 | one place. 16 | 17 | ``` r 18 | dir.create("siCNV_Figure4a") 19 | setwd("siCNV_Figure4a") 20 | ``` 21 | 22 | This code chunk imports the .h5 files a default processed output from 23 | [10x Genomics cell ranger pipeline 24 | documentation](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/output/molecule_info), 25 | and appends a section label to the barcode. 26 | 27 | We use the function ImportCountData(), which requires a section label, 28 | and a path to the corresponding .h5 file. 29 | 30 | ``` r 31 | download.file("https://cf.10xgenomics.com/samples/spatial-exp/1.1.0/V1_Human_Lymph_Node/V1_Human_Lymph_Node_filtered_feature_bc_matrix.h5", "./V1_Human_Lymph_Node_filtered_feature_bc_matrix.h5", mode = "wb") 32 | 33 | Lymph_ENSBMLID_Counts <- ImportCountData("LN10X", "./V1_Human_Lymph_Node_filtered_feature_bc_matrix.h5") 34 | 35 | Lymph_Annotations <- Lymph_ENSBMLID_Counts %>% 36 | select(Barcode) 37 | Lymph_Annotations$Histology <- "Lymph" 38 | ``` 39 | 40 | # QC, and Merging Count and Annotation Data 41 | 42 | We then join the annotations with the count data, to select only spots 43 | that are to be analyzed. This step also includes a bare minimum QC 44 | thresholding step of allowing only Visium spots with 500 counts or more 45 | to be included in the analysis 46 | 47 | ``` r 48 | Lymph_Joined_Counts <- MergingCountAndAnnotationData("LN10X",Lymph_Annotations, Lymph_ENSBMLID_Counts) 49 | 50 | rm(Lymph_ENSBMLID_Counts) 51 | ``` 52 | 53 | # Merging all count data into one object 54 | 55 | We then join all of the selected count data together into a final 56 | dataframe, which is then output as a .tsv file (1 of 3 required inputs 57 | for inferCNV), as well as a revised annotation file (takes into account 58 | only those of interest that remain after the QC step) as another .tsv 59 | file (2 of 3 required inputs from inferCNV) 60 | 61 | ``` r 62 | Counts_joined <- Lymph_Joined_Counts 63 | 64 | Counts_joined <- Counts_joined %>% replace(., is.na(.), 0) 65 | Counts_joined <- Counts_joined %>% column_to_rownames(., var = "Genes") 66 | 67 | write.table(Counts_joined, "10xLymph_Counts.tsv", sep = "\t") 68 | 69 | LymphFinalAnnotations <- FinalAnnotations(Lymph_Annotations, Counts_joined) 70 | 71 | write.table(LymphFinalAnnotations, "10xLymph_Annotations.tsv", 72 | sep = "\t", 73 | quote = FALSE, 74 | col.names = FALSE, 75 | row.names = FALSE) 76 | ``` 77 | 78 | # Confirming that the files are formatted correctly to create an inferCNV object 79 | 80 | This code then creates an inferCNV object from the 2 previously created 81 | files, as well as from a gene position file, which maps ENSMBLIDs to 82 | genomic loci. This file has been provided at 83 | /SpatialInferCNV_Dev/SpatialInferCNV/FigureScripts/Figure 4/Figure4a_LN, 84 | but if you’d like to build one youself, please see [the InferCNV 85 | documentation on their 86 | wiki](https://github.com/broadinstitute/inferCNV/wiki/instructions-create-genome-position-file). 87 | 88 | ``` r 89 | Lymph10X_unsupervised <- infercnv::CreateInfercnvObject(raw_counts_matrix="10xLymph_Counts.tsv", 90 | gene_order_file="./siCNV_GeneOrderFile.tsv", 91 | annotations_file="10xLymph_Annotations.tsv", 92 | delim="\t", 93 | ref_group_names=NULL, 94 | chr_exclude = c("chrM")) 95 | ``` 96 | 97 | # Running InferCNV (Unsupervised) 98 | 99 | If the above steps were performed correctly, then there should be no 100 | errors from the previous step. 101 | 102 | Warning! This step is quite computationally intensive. Consider using a 103 | high performance cluster for timely analyses instead of running it on a 104 | local computer. 105 | 106 | ``` r 107 | Lymph10X_unsupervised = infercnv::run(Lymph10X_unsupervised, 108 | cutoff=0.1, 109 | out_dir="./Fig4_LN/Outputs/", 110 | num_threads = 20, 111 | cluster_by_groups=FALSE, 112 | denoise=TRUE, 113 | HMM=FALSE) 114 | ``` 115 | 116 | # Final output 117 | 118 | The final desired output is the infercnv.21_denoised.png file. 119 | 120 | ![Output Image](infercnv.21_denoised.png) 121 | -------------------------------------------------------------------------------- /FigureScripts/Figure 4/Figure4a_LN/infercnv.21_denoised.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 4/Figure4a_LN/infercnv.21_denoised.png -------------------------------------------------------------------------------- /FigureScripts/Figure 4/Figure4c_SCC/Step1/Figure4c_Step1_P6_scRNAseq_Benigns.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Figure4c_Step1_P6_scRNAseq_Benigns" 3 | author: "Andrew Erickson" 4 | output: md_document 5 | --- 6 | # Figure 4c - Step 1 - Selectiong of Benign references (from paired scRNAseq data) 7 | 8 | In order to run the SCC Visium data with siCNV, we need a reference set. We identifed a set of paired scRNA sequencing data, from benign skin cells (from the same exact patient), provided by the authors as listed below. 9 | 10 | # Setup 11 | 12 | Initializing packages. 13 | 14 | ```{r, messages=FALSE} 15 | library(tidyverse) 16 | library(SpatialInferCNV) 17 | ``` 18 | 19 | # Creating a working directory 20 | 21 | ```{r, eval = FALSE} 22 | dir.create("Figure4c_output") 23 | setwd("Figure4c_output") 24 | ``` 25 | 26 | # Downloading and formatting data, part 1 27 | 28 | Warning, this step will take 10-60 min, even with a decent internet connection. 29 | 30 | ```{r, eval = FALSE} 31 | counturl <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE144236&format=file&file=GSE144236%5FcSCC%5Fcounts%2Etxt%2Egz" 32 | tmp <- tempfile() 33 | download.file(counturl,tmp) 34 | 35 | #Warning, this next step will take 10-60 minutes 36 | merge10pts_counts <- read.delim(gzfile(tmp)) 37 | merge10pts_counts <- as.data.frame(t(merge10pts_counts)) 38 | 39 | SCC_P6_Benigns <- merge10pts_counts %>% 40 | filter(Patient == 6) %>% 41 | filter(`Tissue: 0=Normal, 1=Tumor` == 0) 42 | 43 | SCC_P6_Benigns <- SCC_P6_Benigns %>% 44 | select(-Patient, -`Tissue: 0=Normal, 1=Tumor`) 45 | 46 | save(SCC_P6_Benigns, file = "SCC_P6_Benigns.RData") 47 | ``` 48 | 49 | # Downloading and formatting data, part 2 50 | 51 | We then select only patient 6 data (corresponds to the specific patient in our analyses). 52 | 53 | ```{r, eval = FALSE} 54 | #Import SCC, Patient 6, scRNAseq benigns that we subset out above 55 | load("./SCC_P6_Benigns.RData") 56 | 57 | #Following code creates a barcode dataframe that we will need later 58 | P6_Benigns_forannotations <- SCC_P6_Benigns %>% rownames_to_column() 59 | 60 | Barcodes_P6 <- P6_Benigns_forannotations %>% 61 | select(rowname) %>% 62 | mutate(Histology = "P6_Benigns") 63 | names(Barcodes_P6)[1] <- "Barcodes" 64 | 65 | #Next, we will prepare the gene order file required for infercnv:run 66 | SCC_P6_Benigns <- as.data.frame(t(SCC_P6_Benigns)) 67 | SCC_P6_Benigns <- SCC_P6_Benigns %>% rownames_to_column() 68 | names(SCC_P6_Benigns)[1] <- "Genes" 69 | ``` 70 | 71 | # Creating GeneToENSMBL dataframe 72 | 73 | The code below creates the GeneToENSMBL.csv file, but we have provided this on our GitHub: 74 | 75 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/GeneToENSMBL.csv). 76 | 77 | ```{r, eval = FALSE} 78 | GeneToENSMBL <- read.csv("./GeneToENSMBL.csv") 79 | 80 | #library(tidyverse) 81 | #library(data.table) 82 | #GeneToENSMBL <- fread('https://data.broadinstitute.org/Trinity/CTAT/cnv/gencode_v19_gen_pos.complete.txt') 83 | #GeneToENSMBL <- mydat %>% separate(V1, c("left","ENSMBLID"), sep = "\\|") 84 | 85 | #names(GeneToENSMBL)[1] <- "Genes" 86 | #names(GeneToENSMBL)[3] <- "chr" 87 | #names(GeneToENSMBL)[4] <- "start" 88 | #names(GeneToENSMBL)[5] <- "stop" 89 | 90 | #write.csv(GeneToENSMBL, "GeneToENSMBL.csv", row.names = FALSE) 91 | ``` 92 | 93 | # Mapping Gene Names to counts/barcodes, and then outputting the requisite files for infercnv::run 94 | 95 | We need to provide a gene ordering file to inferCNV, in the form of: Gene Name / Chromosome Number / Start Loci / Stop Loci. As the files provided by the authors are in "Gene Name", and our chromosomal / loci information are mapped to ENSMBLID's, we need to map the Gene Names to ENSMBLIDs. 96 | 97 | ```{r, eval = FALSE} 98 | Counts_joined <- SCC_P6_Benigns 99 | Counts_joined <- Counts_joined %>% 100 | separate(Genes, c("Genes", NA)) 101 | 102 | Counts_joined <- Counts_joined %>% select(Genes) 103 | 104 | #Selecting Gene name, chromosome, start and stop locations 105 | GenesForMapping <- GeneToENSMBL %>% select(Genes, chr, start, stop) 106 | GenesInSample <- Counts_joined %>% select(Genes) 107 | 108 | #Next, reordering the entries from Chromsomes 1-22, followed by X and Y 109 | GenesInSamplevsOrdering <- inner_join(GenesInSample, GenesForMapping, by = c("Genes" = "Genes")) 110 | dedup_GenesInSamplevsOrdering <- GenesInSamplevsOrdering[!duplicated(GenesInSamplevsOrdering$Genes), ] 111 | dedup_GenesInSamplevsOrdering$chromorder <- gsub("chr","",dedup_GenesInSamplevsOrdering$chr) 112 | dedup_GenesInSamplevsOrdering$chromorder <- as.numeric(ifelse(dedup_GenesInSamplevsOrdering$chromorder == "X", 23, 113 | ifelse(dedup_GenesInSamplevsOrdering$chromorder == "Y", 24, dedup_GenesInSamplevsOrdering$chromorder))) 114 | dedup_GenesInSamplevsOrdering <- dedup_GenesInSamplevsOrdering[order(dedup_GenesInSamplevsOrdering$chromorder),] 115 | dedup_GenesInSamplevsOrdering <- dedup_GenesInSamplevsOrdering[,1:4] 116 | 117 | MappingFileForInferCNV <- dedup_GenesInSamplevsOrdering 118 | 119 | #Selecting only genes that have location data 120 | CountmappedGenes <- select(MappingFileForInferCNV, Genes) 121 | Counts_joined <- SCC_P6_Benigns 122 | Counts_joined <- Counts_joined %>% 123 | separate(Genes, c("Genes", NA)) 124 | 125 | #Selecting only genes that have location and count data 126 | Mapped_Counts_joined <- left_join(CountmappedGenes, Counts_joined) 127 | #Removing duplicates 128 | Mapped_Counts_joined <- Mapped_Counts_joined[!duplicated(Mapped_Counts_joined$Genes), ] 129 | ``` 130 | 131 | # Outputting all files for inferCNV::run 132 | 133 | ```{r, eval = FALSE} 134 | #Write GenesInSamplevsOrdering 135 | write.table(Mapped_Counts_joined, 136 | "SCC_P6_Bg_Selected_Mapped_Counts.tsv", 137 | row.names = FALSE, 138 | sep = "\t") 139 | 140 | write.table(Barcodes_P6, 141 | "SCC_P6_Bg_Selected_CorrectedBarcodes.tsv", 142 | quote = FALSE, 143 | col.names = FALSE, 144 | row.names = FALSE, 145 | sep = "\t") 146 | 147 | write.table(MappingFileForInferCNV, 148 | "SCC_P6_Bg_MappingFileForInferCNV.tsv", 149 | quote = FALSE, 150 | col.names = FALSE, 151 | row.names = FALSE, 152 | sep = "\t") 153 | ``` 154 | 155 | # Creating the inferCNV object (prior to run) 156 | 157 | Creating the object for infercnv::run. 158 | 159 | ```{r, eval = FALSE} 160 | P6_Bg_infCNV <- infercnv::CreateInfercnvObject(raw_counts_matrix="./SCC_P6_Bg_Selected_Mapped_Counts.tsv", 161 | gene_order_file="./SCC_P6_Bg_MappingFileForInferCNV.tsv", 162 | annotations_file="./SCC_P6_Bg_Selected_CorrectedBarcodes.tsv", 163 | delim="\t", 164 | ref_group_names=NULL, 165 | chr_exclude = c("chrM")) 166 | 167 | 168 | ``` 169 | 170 | # Unsupervised Run - (Typically ran on cluster) 171 | 172 | Running infercnv. 173 | 174 | ```{r, eval = FALSE} 175 | P6_Bg_infCNV = infercnv::run(P6_Bg_infCNV, 176 | cutoff=0.1, 177 | out_dir="./Figure4c_Step1/Outputs", 178 | num_threads = 20, 179 | cluster_by_groups=FALSE, 180 | denoise=TRUE, 181 | HMM=FALSE) 182 | ``` 183 | 184 | InferCNV will output many files. We are primarily interested in the final "infercnv.21_denoised.png" file, as well as the text file associated with the dendrogram associated with the hierarchical clustering on the left hand side of the image (infercnv.21_denoised.observations_dendrogram.txt). 185 | 186 | ![infercnv.21_denoised.png](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/Step1/infercnv.21_denoised.png) 187 | 188 | # Importing dendrogram 189 | 190 | Next, we want to import this dendrogram file, this was created just above. 191 | 192 | ```{r, eval = FALSE} 193 | library(ape) 194 | library(phylogram) 195 | SCC_P6_benigns_for_clustering <- read.dendrogram(file = "./Figure4c_Step1/Outputs/infercnv.21_denoised.observations_dendrogram.txt") 196 | 197 | SCC_P6_benigns_for_clustering_phylo <- as.phylo(SCC_P6_benigns_for_clustering) 198 | ``` 199 | 200 | # Visualizing dendrogram node numbers 201 | 202 | Next, we want to visualize the numbers associated with the nodes of interest (clones). We output a large image file that allows us to manually inspect which nodes (cells) should be selected the purest benign references. Here, we want the cells with the least signal possible. 203 | 204 | ```{r, eval = FALSE} 205 | my.subtrees = subtrees(SCC_P6_benigns_for_clustering_phylo) # subtrees() to subset 206 | 207 | png("SCC_P6_benigns_for_clustering_phylo.png",width=10000,height=2500, res = 300) 208 | plot(SCC_P6_benigns_for_clustering_phylo,show.tip.label = FALSE) 209 | nodelabels(text=1:SCC_P6_benigns_for_clustering_phylo$Nnode,node=1:SCC_P6_benigns_for_clustering_phylo$Nnode+Ntip(SCC_P6_benigns_for_clustering_phylo)) 210 | dev.off() 211 | ``` 212 | 213 | We provide the image output here: 214 | 215 | ![SCC_P6_benigns_for_clustering_phylo.png](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/Step1/SCC_P6_benigns_for_clustering_phylo.png) 216 | 217 | # Purest Benign selection 218 | 219 | Next, view the output .png file, which provides a (albeit cluttered) labeling of the dendrogram tree nodes. Manually select individual nodes that correspond with a distinct subclonal grouping or signal, that will be taken forward for re-clustering. This can be iteratively tweaked with the next step + spatial visualization til optimal. We provide more details [here](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/BenignRefs_ForFigs2and3/BenignRefs.md), and provide the finalized selected SCC purest benign nodes here. 220 | 221 | 222 | ```{r, eval = FALSE} 223 | #A - 4034 224 | #B - 3605 225 | #B - 3360 226 | #B - 2316 227 | #B - 724 228 | #C - 2 229 | ``` 230 | 231 | # Selecting purest benigns 232 | 233 | Next, after identifying the numerical nodes that correspond to dendrogram branches that correspond with a given set of molecular signals, we then manually select these nodes in R, apply a label, then join them all together for use in the next step. 234 | 235 | ```{r, eval = FALSE} 236 | library(SpatialInferCNV) 237 | library(tidyverse) 238 | 239 | Node4034 <- SelectingSubTreeData(my.subtrees, 4034) 240 | Node2 <- SelectingSubTreeData(my.subtrees, 2) 241 | 242 | Merged <- rbind(Node4034, Node2) 243 | table(Merged$Node) 244 | 245 | Merged$Node <- ifelse(Merged$Node == "Node_4034", "PurestBenigns", "OtherBenigns") 246 | names(Merged)[2] <- "Histology" 247 | 248 | BenignRefs <- filter(Merged, Histology == "PurestBenigns") %>% 249 | select(Barcode, Histology) 250 | 251 | write.csv(BenignRefs, "Figure4c_SCCP6_BenignReferenceSet.csv", row.names = FALSE) 252 | 253 | ``` 254 | -------------------------------------------------------------------------------- /FigureScripts/Figure 4/Figure4c_SCC/Step1/Figure4c_Step1_P6_scRNAseq_Benigns.md: -------------------------------------------------------------------------------- 1 | # Figure 4c - Step 1 - Selectiong of Benign references (from paired scRNAseq data) 2 | 3 | In order to run the SCC Visium data with siCNV, we need a reference set. 4 | We identifed a set of paired scRNA sequencing data, from benign skin 5 | cells (from the same exact patient), provided by the authors as listed 6 | below. 7 | 8 | # Setup 9 | 10 | Initializing packages. 11 | 12 | library(tidyverse) 13 | 14 | ## -- Attaching packages --------------------------------------- tidyverse 1.3.1 -- 15 | 16 | ## v ggplot2 3.3.5 v purrr 0.3.4 17 | ## v tibble 3.1.1 v dplyr 1.0.6 18 | ## v tidyr 1.1.3 v stringr 1.4.0 19 | ## v readr 2.0.1 v forcats 0.5.1 20 | 21 | ## -- Conflicts ------------------------------------------ tidyverse_conflicts() -- 22 | ## x dplyr::filter() masks stats::filter() 23 | ## x dplyr::lag() masks stats::lag() 24 | 25 | library(SpatialInferCNV) 26 | 27 | ## Registered S3 method overwritten by 'spatstat.geom': 28 | ## method from 29 | ## print.boxx cli 30 | 31 | ## Warning: replacing previous import 'phylogram::as.phylo' by 'ape::as.phylo' when 32 | ## loading 'SpatialInferCNV' 33 | 34 | # Creating a working directory 35 | 36 | dir.create("Figure4c_output") 37 | setwd("Figure4c_output") 38 | 39 | # Downloading and formatting data, part 1 40 | 41 | Warning, this step will take 10-60 min, even with a decent internet 42 | connection. 43 | 44 | counturl <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE144236&format=file&file=GSE144236%5FcSCC%5Fcounts%2Etxt%2Egz" 45 | tmp <- tempfile() 46 | download.file(counturl,tmp) 47 | 48 | #Warning, this next step will take 10-60 minutes 49 | merge10pts_counts <- read.delim(gzfile(tmp)) 50 | merge10pts_counts <- as.data.frame(t(merge10pts_counts)) 51 | 52 | SCC_P6_Benigns <- merge10pts_counts %>% 53 | filter(Patient == 6) %>% 54 | filter(`Tissue: 0=Normal, 1=Tumor` == 0) 55 | 56 | SCC_P6_Benigns <- SCC_P6_Benigns %>% 57 | select(-Patient, -`Tissue: 0=Normal, 1=Tumor`) 58 | 59 | save(SCC_P6_Benigns, file = "SCC_P6_Benigns.RData") 60 | 61 | # Downloading and formatting data, part 2 62 | 63 | We then select only patient 6 data (corresponds to the specific patient 64 | in our analyses). 65 | 66 | #Import SCC, Patient 6, scRNAseq benigns that we subset out above 67 | load("./SCC_P6_Benigns.RData") 68 | 69 | #Following code creates a barcode dataframe that we will need later 70 | P6_Benigns_forannotations <- SCC_P6_Benigns %>% rownames_to_column() 71 | 72 | Barcodes_P6 <- P6_Benigns_forannotations %>% 73 | select(rowname) %>% 74 | mutate(Histology = "P6_Benigns") 75 | names(Barcodes_P6)[1] <- "Barcodes" 76 | 77 | #Next, we will prepare the gene order file required for infercnv:run 78 | SCC_P6_Benigns <- as.data.frame(t(SCC_P6_Benigns)) 79 | SCC_P6_Benigns <- SCC_P6_Benigns %>% rownames_to_column() 80 | names(SCC_P6_Benigns)[1] <- "Genes" 81 | 82 | # Creating GeneToENSMBL dataframe 83 | 84 | The code below creates the GeneToENSMBL.csv file, but we have provided 85 | this on our GitHub: 86 | 87 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/GeneToENSMBL.csv). 88 | 89 | GeneToENSMBL <- read.csv("./GeneToENSMBL.csv") 90 | 91 | #library(tidyverse) 92 | #library(data.table) 93 | #GeneToENSMBL <- fread('https://data.broadinstitute.org/Trinity/CTAT/cnv/gencode_v19_gen_pos.complete.txt') 94 | #GeneToENSMBL <- mydat %>% separate(V1, c("left","ENSMBLID"), sep = "\\|") 95 | 96 | #names(GeneToENSMBL)[1] <- "Genes" 97 | #names(GeneToENSMBL)[3] <- "chr" 98 | #names(GeneToENSMBL)[4] <- "start" 99 | #names(GeneToENSMBL)[5] <- "stop" 100 | 101 | #write.csv(GeneToENSMBL, "GeneToENSMBL.csv", row.names = FALSE) 102 | 103 | # Mapping Gene Names to counts/barcodes, and then outputting the requisite files for infercnv::run 104 | 105 | We need to provide a gene ordering file to inferCNV, in the form of: 106 | Gene Name / Chromosome Number / Start Loci / Stop Loci. As the files 107 | provided by the authors are in “Gene Name”, and our chromosomal / loci 108 | information are mapped to ENSMBLID’s, we need to map the Gene Names to 109 | ENSMBLIDs. 110 | 111 | Counts_joined <- SCC_P6_Benigns 112 | Counts_joined <- Counts_joined %>% 113 | separate(Genes, c("Genes", NA)) 114 | 115 | Counts_joined <- Counts_joined %>% select(Genes) 116 | 117 | #Selecting Gene name, chromosome, start and stop locations 118 | GenesForMapping <- GeneToENSMBL %>% select(Genes, chr, start, stop) 119 | GenesInSample <- Counts_joined %>% select(Genes) 120 | 121 | #Next, reordering the entries from Chromsomes 1-22, followed by X and Y 122 | GenesInSamplevsOrdering <- inner_join(GenesInSample, GenesForMapping, by = c("Genes" = "Genes")) 123 | dedup_GenesInSamplevsOrdering <- GenesInSamplevsOrdering[!duplicated(GenesInSamplevsOrdering$Genes), ] 124 | dedup_GenesInSamplevsOrdering$chromorder <- gsub("chr","",dedup_GenesInSamplevsOrdering$chr) 125 | dedup_GenesInSamplevsOrdering$chromorder <- as.numeric(ifelse(dedup_GenesInSamplevsOrdering$chromorder == "X", 23, 126 | ifelse(dedup_GenesInSamplevsOrdering$chromorder == "Y", 24, dedup_GenesInSamplevsOrdering$chromorder))) 127 | dedup_GenesInSamplevsOrdering <- dedup_GenesInSamplevsOrdering[order(dedup_GenesInSamplevsOrdering$chromorder),] 128 | dedup_GenesInSamplevsOrdering <- dedup_GenesInSamplevsOrdering[,1:4] 129 | 130 | MappingFileForInferCNV <- dedup_GenesInSamplevsOrdering 131 | 132 | #Selecting only genes that have location data 133 | CountmappedGenes <- select(MappingFileForInferCNV, Genes) 134 | Counts_joined <- SCC_P6_Benigns 135 | Counts_joined <- Counts_joined %>% 136 | separate(Genes, c("Genes", NA)) 137 | 138 | #Selecting only genes that have location and count data 139 | Mapped_Counts_joined <- left_join(CountmappedGenes, Counts_joined) 140 | #Removing duplicates 141 | Mapped_Counts_joined <- Mapped_Counts_joined[!duplicated(Mapped_Counts_joined$Genes), ] 142 | 143 | # Outputting all files for inferCNV::run 144 | 145 | #Write GenesInSamplevsOrdering 146 | write.table(Mapped_Counts_joined, 147 | "SCC_P6_Bg_Selected_Mapped_Counts.tsv", 148 | row.names = FALSE, 149 | sep = "\t") 150 | 151 | write.table(Barcodes_P6, 152 | "SCC_P6_Bg_Selected_CorrectedBarcodes.tsv", 153 | quote = FALSE, 154 | col.names = FALSE, 155 | row.names = FALSE, 156 | sep = "\t") 157 | 158 | write.table(MappingFileForInferCNV, 159 | "SCC_P6_Bg_MappingFileForInferCNV.tsv", 160 | quote = FALSE, 161 | col.names = FALSE, 162 | row.names = FALSE, 163 | sep = "\t") 164 | 165 | # Creating the inferCNV object (prior to run) 166 | 167 | Creating the object for infercnv::run. 168 | 169 | P6_Bg_infCNV <- infercnv::CreateInfercnvObject(raw_counts_matrix="./SCC_P6_Bg_Selected_Mapped_Counts.tsv", 170 | gene_order_file="./SCC_P6_Bg_MappingFileForInferCNV.tsv", 171 | annotations_file="./SCC_P6_Bg_Selected_CorrectedBarcodes.tsv", 172 | delim="\t", 173 | ref_group_names=NULL, 174 | chr_exclude = c("chrM")) 175 | 176 | # Unsupervised Run - (Typically ran on cluster) 177 | 178 | Running infercnv. 179 | 180 | P6_Bg_infCNV = infercnv::run(P6_Bg_infCNV, 181 | cutoff=0.1, 182 | out_dir="./Figure4c_Step1/Outputs", 183 | num_threads = 20, 184 | cluster_by_groups=FALSE, 185 | denoise=TRUE, 186 | HMM=FALSE) 187 | 188 | InferCNV will output many files. We are primarily interested in the 189 | final “infercnv.21\_denoised.png” file, as well as the text file 190 | associated with the dendrogram associated with the hierarchical 191 | clustering on the left hand side of the image 192 | (infercnv.21\_denoised.observations\_dendrogram.txt). 193 | 194 | ![infercnv.21\_denoised.png](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/Step1/infercnv.21_denoised.png) 195 | 196 | # Importing dendrogram 197 | 198 | Next, we want to import this dendrogram file, this was created just 199 | above. 200 | 201 | library(ape) 202 | library(phylogram) 203 | SCC_P6_benigns_for_clustering <- read.dendrogram(file = "./Figure4c_Step1/Outputs/infercnv.21_denoised.observations_dendrogram.txt") 204 | 205 | SCC_P6_benigns_for_clustering_phylo <- as.phylo(SCC_P6_benigns_for_clustering) 206 | 207 | # Visualizing dendrogram node numbers 208 | 209 | Next, we want to visualize the numbers associated with the nodes of 210 | interest (clones). We output a large image file that allows us to 211 | manually inspect which nodes (cells) should be selected the purest 212 | benign references. Here, we want the cells with the least signal 213 | possible. 214 | 215 | my.subtrees = subtrees(SCC_P6_benigns_for_clustering_phylo) # subtrees() to subset 216 | 217 | png("SCC_P6_benigns_for_clustering_phylo.png",width=10000,height=2500, res = 300) 218 | plot(SCC_P6_benigns_for_clustering_phylo,show.tip.label = FALSE) 219 | nodelabels(text=1:SCC_P6_benigns_for_clustering_phylo$Nnode,node=1:SCC_P6_benigns_for_clustering_phylo$Nnode+Ntip(SCC_P6_benigns_for_clustering_phylo)) 220 | dev.off() 221 | 222 | We provide the image output here: 223 | 224 | ![SCC\_P6\_benigns\_for\_clustering\_phylo.png](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/Step1/SCC_P6_benigns_for_clustering_phylo.png) 225 | 226 | # Purest Benign selection 227 | 228 | Next, view the output .png file, which provides a (albeit cluttered) 229 | labeling of the dendrogram tree nodes. Manually select individual nodes 230 | that correspond with a distinct subclonal grouping or signal, that will 231 | be taken forward for re-clustering. This can be iteratively tweaked with 232 | the next step + spatial visualization til optimal. We provide more 233 | details 234 | [here](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/BenignRefs_ForFigs2and3/BenignRefs.md), 235 | and provide the finalized selected SCC purest benign nodes here. 236 | 237 | #A - 4034 238 | #B - 3605 239 | #B - 3360 240 | #B - 2316 241 | #B - 724 242 | #C - 2 243 | 244 | # Selecting purest benigns 245 | 246 | Next, after identifying the numerical nodes that correspond to 247 | dendrogram branches that correspond with a given set of molecular 248 | signals, we then manually select these nodes in R, apply a label, then 249 | join them all together for use in the next step. 250 | 251 | library(SpatialInferCNV) 252 | library(tidyverse) 253 | 254 | Node4034 <- SelectingSubTreeData(my.subtrees, 4034) 255 | Node2 <- SelectingSubTreeData(my.subtrees, 2) 256 | 257 | Merged <- rbind(Node4034, Node2) 258 | table(Merged$Node) 259 | 260 | Merged$Node <- ifelse(Merged$Node == "Node_4034", "PurestBenigns", "OtherBenigns") 261 | names(Merged)[2] <- "Histology" 262 | 263 | BenignRefs <- filter(Merged, Histology == "PurestBenigns") %>% 264 | select(Barcode, Histology) 265 | 266 | write.csv(BenignRefs, "Figure4c_SCCP6_BenignReferenceSet.csv", row.names = FALSE) 267 | -------------------------------------------------------------------------------- /FigureScripts/Figure 4/Figure4c_SCC/Step1/SCC_P6_benigns_for_clustering_phylo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 4/Figure4c_SCC/Step1/SCC_P6_benigns_for_clustering_phylo.png -------------------------------------------------------------------------------- /FigureScripts/Figure 4/Figure4c_SCC/Step1/infercnv.21_denoised.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 4/Figure4c_SCC/Step1/infercnv.21_denoised.png -------------------------------------------------------------------------------- /FigureScripts/Figure 4/Figure4c_SCC/Step2/Figure4c_Step2_SCC_P6_siCNV_unsupervised.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "SCC_benigns" 3 | author: "Andrew Erickson" 4 | output: md_document 5 | --- 6 | 7 | Having now selected a benign reference set in Step 1, we now use these data to perform unsupervised analysis of the SCC Visium section. 8 | 9 | # Setup 10 | 11 | Initializing libraries 12 | 13 | ```{r, messages=FALSE} 14 | library(tidyverse) 15 | library(SpatialInferCNV) 16 | ``` 17 | 18 | # Importing Data for Benigns 19 | 20 | We already imported the data in the previous step, lets reimport it again and filter only for the selected/filtered benign reference set. 21 | 22 | ```{r, eval = FALSE} 23 | #Import SCC, Patient 6, scRNAseq benigns that we subset out in step 1 24 | load("./Figure4c_output/SCC_P6_Benigns.RData") 25 | 26 | head(SCC_P6_Benigns) 27 | 28 | SCC_P6_BenignReferences_Barcodes <- read.csv("./Figure4c_SCCP6_BenignReferenceSet.csv") 29 | names(SCC_P6_BenignReferences_Barcodes)[1] <- "Barcodes" 30 | SCC_P6_BenignReferences_Barcodes$Histology <- "PurestBenign_SCCPatient6" 31 | SCC_P6_Benigns <- SCC_P6_Benigns %>% rownames_to_column() 32 | names(SCC_P6_Benigns)[1] <- "Barcodes" 33 | 34 | SCC_P6_BenignReferences_Counts <- left_join(SCC_P6_BenignReferences_Barcodes, SCC_P6_Benigns, by = c("Barcodes" = "Barcodes")) 35 | rm(SCC_P6_Benigns) 36 | SCC_P6_BenignReferences_Counts <- SCC_P6_BenignReferences_Counts %>% select(-Histology) 37 | SCC_P6_BenignReferences_Counts <- SCC_P6_BenignReferences_Counts %>% column_to_rownames(var = "Barcodes") 38 | SCC_P6_BenignReferences_Counts <- as.data.frame(t(SCC_P6_BenignReferences_Counts)) 39 | SCC_P6_BenignReferences_Counts <- SCC_P6_BenignReferences_Counts %>% rownames_to_column() 40 | names(SCC_P6_BenignReferences_Counts)[1] <- "Genes" 41 | 42 | saveRDS(SCC_P6_BenignReferences_Counts, file = "SCC_P6_BenignReferences_Counts.rds") 43 | saveRDS(SCC_P6_BenignReferences_Barcodes, file = "SCC_P6_BenignReferences_Barcodes.rds") 44 | 45 | ``` 46 | 47 | # Importing Data for Visium Data 48 | 49 | Download the files [from Mendeley](https://data.mendeley.com/v1/datasets/svw96g68dv/draft?a=3f263217-2bd3-4a3c-8125-8c517c3a9e29): SCC_patient/. 50 | 51 | Here, we are filtering for the section used in the Figure 4d analysis from a parent seurat object. We output both the counts and the barcodes from this Visium section. We manually apply a QC threshold to only include Visium spots with at least 500 counts. 52 | 53 | ```{r, eval = FALSE} 54 | t28 <- readRDS("./t28.Rds") 55 | 56 | SCC_P6_Visium_Counts <- as.data.frame(t28@assays$Spatial@counts) 57 | rm(t28) 58 | 59 | head(SCC_P6_Visium_Counts) 60 | 61 | SCC_P6_Visium_Counts <- as.data.frame(t(SCC_P6_Visium_Counts)) 62 | SCC_P6_Visium_Counts <- rownames_to_column(SCC_P6_Visium_Counts) 63 | SCC_P6_Visium_Counts$section <- str_sub(SCC_P6_Visium_Counts$rowname, start= -1) 64 | table(SCC_P6_Visium_Counts$section) 65 | 66 | SCC_P6_Visium_Counts$barcode <- str_sub(SCC_P6_Visium_Counts$rowname, start = 1L, end = -3) 67 | 68 | SCC_P6_Visium_Counts <- SCC_P6_Visium_Counts %>% filter(section == 1) 69 | 70 | SCC_P6_Visium_Annotations <- SCC_P6_Visium_Counts %>% select(barcode, section) 71 | SCC_P6_Visium_Annotations$section <- "SCC_P6_Visium" 72 | names(SCC_P6_Visium_Annotations)[1] <- "Barcodes" 73 | names(SCC_P6_Visium_Annotations)[2] <- "Histology" 74 | 75 | saveRDS(SCC_P6_Visium_Annotations, file = "SCC_P6_Visium_Annotations.rds") 76 | 77 | SCC_P6_Visium_Counts <- column_to_rownames(SCC_P6_Visium_Counts, var = "barcode") 78 | SCC_P6_Visium_Counts <- SCC_P6_Visium_Counts %>% select(-rowname, -section) 79 | 80 | SCC_P6_Visium_Counts$Total <- rowSums(SCC_P6_Visium_Counts) 81 | SCC_P6_Visium_Counts <- SCC_P6_Visium_Counts %>% filter(Total >= 500) 82 | SCC_P6_Visium_Counts <- select(SCC_P6_Visium_Counts, -Total) 83 | SCC_P6_Visium_Counts <- as.data.frame(t(SCC_P6_Visium_Counts)) 84 | SCC_P6_Visium_Counts <- SCC_P6_Visium_Counts[,colSums(is.na(SCC_P6_Visium_Counts))% full_join(SCC_P6_Visium_Counts, by = "Genes") 114 | SCC_P6_BenignRef_and_Visium_Counts <- SCC_P6_BenignRef_and_Visium_Counts %>% replace(., is.na(.), 0) 115 | 116 | saveRDS(SCC_P6_BenignRef_and_Visium_Counts, file = "SCC_P6_BenignRef_and_Visium_Counts.rds") 117 | ``` 118 | 119 | # Creating GeneToENSMBL dataframe 120 | 121 | The code below creates the GeneToENSMBL.csv file, but we have provided this on our GitHub: 122 | 123 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/GeneToENSMBL.csv). 124 | 125 | ```{r, eval = FALSE} 126 | GeneToENSMBL <- read.csv("./Mendeley/ProcessedFilesForFigures/Figure4/GeneToENSMBL.csv") 127 | 128 | #library(tidyverse) 129 | #library(data.table) 130 | #GeneToENSMBL <- fread('https://data.broadinstitute.org/Trinity/CTAT/cnv/gencode_v19_gen_pos.complete.txt') 131 | #GeneToENSMBL <- mydat %>% separate(V1, c("left","ENSMBLID"), sep = "\\|") 132 | 133 | #names(GeneToENSMBL)[1] <- "Genes" 134 | #names(GeneToENSMBL)[3] <- "chr" 135 | #names(GeneToENSMBL)[4] <- "start" 136 | #names(GeneToENSMBL)[5] <- "stop" 137 | 138 | #write.csv(GeneToENSMBL, "GeneToENSMBL.csv", row.names = FALSE) 139 | ``` 140 | 141 | # Mapping Gene Names to counts/barcodes, and then outputting the requisite files for infercnv::run, part 1 142 | 143 | We need to provide a gene ordering file to inferCNV, in the form of: Gene Name / Chromosome Number / Start Loci / Stop Loci. As the files provided by the authors are in "Gene Name", and our chromosomal / loci information are mapped to ENSMBLID's, we need to map the Gene Names to ENSMBLIDs. 144 | 145 | ```{r, eval = FALSE} 146 | #removing "." 147 | Counts_joined <- SCC_P6_BenignRef_and_Visium_Counts 148 | Counts_joined <- Counts_joined %>% 149 | separate(Genes, c("Genes", NA)) 150 | 151 | Counts_joined <- Counts_joined %>% select(Genes) 152 | 153 | GenesForMapping <- GeneToENSMBL %>% select(Genes, chr, start, stop) 154 | GenesInSample <- Counts_joined %>% select(Genes) 155 | GenesInSamplevsOrdering <- inner_join(GenesInSample, GenesForMapping, by = c("Genes" = "Genes")) 156 | dedup_GenesInSamplevsOrdering <- GenesInSamplevsOrdering[!duplicated(GenesInSamplevsOrdering$Genes), ] 157 | dedup_GenesInSamplevsOrdering$chromorder <- gsub("chr","",dedup_GenesInSamplevsOrdering$chr) 158 | dedup_GenesInSamplevsOrdering$chromorder <- as.numeric(ifelse(dedup_GenesInSamplevsOrdering$chromorder == "X", 23, 159 | ifelse(dedup_GenesInSamplevsOrdering$chromorder == "Y", 24, dedup_GenesInSamplevsOrdering$chromorder))) 160 | dedup_GenesInSamplevsOrdering <- dedup_GenesInSamplevsOrdering[order(dedup_GenesInSamplevsOrdering$chromorder),] 161 | dedup_GenesInSamplevsOrdering <- dedup_GenesInSamplevsOrdering[,1:4] 162 | 163 | MappingFileForInferCNV <- dedup_GenesInSamplevsOrdering 164 | 165 | saveRDS(MappingFileForInferCNV, file = "MappingFileForSCC_P6_Visium_and_Bg.rds") 166 | ``` 167 | 168 | # Outputting the requisite files for infercnv::run, part 2 169 | 170 | We then filter for only mapped genes, from counts, and then output the three requisite files for infercnv::run. 171 | 172 | ```{r, eval = FALSE} 173 | MappingFileForInferCNV <- readRDS("MappingFileForSCC_P6_Visium_and_Bg.rds") 174 | SCC_P6_BenignRef_and_Visium_Counts <- readRDS("SCC_P6_BenignRef_and_Visium_Counts.rds") 175 | 176 | CountmappedGenes <- select(MappingFileForInferCNV, Genes) 177 | 178 | Counts_joined <- SCC_P6_BenignRef_and_Visium_Counts 179 | Counts_joined <- Counts_joined %>% 180 | separate(Genes, c("Genes", NA)) 181 | 182 | Mapped_Counts_joined <- left_join(CountmappedGenes, Counts_joined) 183 | Mapped_Counts_joined <- Mapped_Counts_joined[!duplicated(Mapped_Counts_joined$Genes), ] 184 | Mapped_Counts_joinedSliced <- Mapped_Counts_joined %>% slice(1L) 185 | Mapped_Counts_joinedSliced <- as.data.frame(t(Mapped_Counts_joinedSliced[, colnames(Mapped_Counts_joinedSliced)[c(1:length(Mapped_Counts_joinedSliced))]])) 186 | Mapped_Counts_joinedSliced <- Mapped_Counts_joinedSliced %>% rownames_to_column() 187 | Mapped_Counts_joinedSliced <- as.data.frame(Mapped_Counts_joinedSliced[2:(dim(Mapped_Counts_joinedSliced)[1]), 1]) 188 | names(Mapped_Counts_joinedSliced)[1] <- "Barcode" 189 | 190 | Mapped_Counts_joinedSliced$Histology <- ifelse(paste0(substr(Mapped_Counts_joinedSliced$Barcode, start = 1, stop = 4)) == "P6_N", "PurestBenign_SCCPatient6", "Visium") 191 | 192 | #Write GenesInSamplevsOrdering 193 | write.table(Mapped_Counts_joined, 194 | "SCC_P6_BenignRef_and_Visium_Mapped_Counts.tsv", 195 | row.names = FALSE, 196 | sep = "\t") 197 | 198 | write.table(MappingFileForInferCNV, 199 | "SCC_P6_BenignRef_and_Visium_GeneOrderFile.tsv", 200 | quote = FALSE, 201 | col.names = FALSE, 202 | row.names = FALSE, 203 | sep = "\t") 204 | 205 | 206 | write.table(Mapped_Counts_joinedSliced, 207 | "SCC_P6_BenignRef_and_Visium_Mapped_Annotations.tsv", 208 | quote = FALSE, 209 | col.names = FALSE, 210 | row.names = FALSE, 211 | sep = "\t") 212 | ``` 213 | 214 | # Creating the inferCNV object (prior to run) 215 | 216 | Creating the object for infercnv::run. 217 | 218 | ```{r, eval = FALSE} 219 | Visium_P6_Bg_infCNV <- infercnv::CreateInfercnvObject(raw_counts_matrix="./SCC_P6_BenignRef_and_Visium_Mapped_Counts.tsv", 220 | gene_order_file="./SCC_P6_BenignRef_and_Visium_GeneOrderFile.tsv", 221 | annotations_file="./SCC_P6_BenignRef_and_Visium_Mapped_Annotations.tsv", 222 | delim="\t", 223 | ref_group_names="PurestBenign_SCCPatient6", 224 | chr_exclude = c("chrM")) 225 | 226 | ``` 227 | 228 | # Unsupervised Run - (Typically ran on cluster) 229 | 230 | Running infercnv, typically ran on a server. 231 | 232 | ```{r, eval = FALSE} 233 | Visium_P6_Bg_infCNV = infercnv::run(Visium_P6_Bg_infCNV, 234 | cutoff=0.1, 235 | out_dir="./Figure4c_Step2/Outputs", 236 | num_threads = 10, 237 | cluster_by_groups=FALSE, 238 | denoise=TRUE, 239 | HMM=FALSE) 240 | ``` 241 | 242 | InferCNV will output many files. We are primarily interested in the final "infercnv.21_denoised.png" file, as well as the text file associated with the dendrogram associated with the hierarchical clustering on the left hand side of the image (infercnv.21_denoised.observations_dendrogram.txt). 243 | 244 | ![infercnv.21_denoised.png](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/Step2/infercnv.21_denoised.png) 245 | -------------------------------------------------------------------------------- /FigureScripts/Figure 4/Figure4c_SCC/Step2/Figure4c_Step2_SCC_P6_siCNV_unsupervised.md: -------------------------------------------------------------------------------- 1 | Having now selected a benign reference set in Step 1, we now use these 2 | data to perform unsupervised analysis of the SCC Visium section. 3 | 4 | # Setup 5 | 6 | Initializing libraries 7 | 8 | library(tidyverse) 9 | 10 | ## -- Attaching packages --------------------------------------- tidyverse 1.3.1 -- 11 | 12 | ## v ggplot2 3.3.5 v purrr 0.3.4 13 | ## v tibble 3.1.1 v dplyr 1.0.6 14 | ## v tidyr 1.1.3 v stringr 1.4.0 15 | ## v readr 2.0.1 v forcats 0.5.1 16 | 17 | ## -- Conflicts ------------------------------------------ tidyverse_conflicts() -- 18 | ## x dplyr::filter() masks stats::filter() 19 | ## x dplyr::lag() masks stats::lag() 20 | 21 | library(SpatialInferCNV) 22 | 23 | ## Registered S3 method overwritten by 'spatstat.geom': 24 | ## method from 25 | ## print.boxx cli 26 | 27 | ## Warning: replacing previous import 'phylogram::as.phylo' by 'ape::as.phylo' when 28 | ## loading 'SpatialInferCNV' 29 | 30 | # Importing Data for Benigns 31 | 32 | We already imported the data in the previous step, lets reimport it 33 | again and filter only for the selected/filtered benign reference set. 34 | 35 | #Import SCC, Patient 6, scRNAseq benigns that we subset out in step 1 36 | load("./Figure4c_output/SCC_P6_Benigns.RData") 37 | 38 | head(SCC_P6_Benigns) 39 | 40 | SCC_P6_BenignReferences_Barcodes <- read.csv("./Figure4c_SCCP6_BenignReferenceSet.csv") 41 | names(SCC_P6_BenignReferences_Barcodes)[1] <- "Barcodes" 42 | SCC_P6_BenignReferences_Barcodes$Histology <- "PurestBenign_SCCPatient6" 43 | SCC_P6_Benigns <- SCC_P6_Benigns %>% rownames_to_column() 44 | names(SCC_P6_Benigns)[1] <- "Barcodes" 45 | 46 | SCC_P6_BenignReferences_Counts <- left_join(SCC_P6_BenignReferences_Barcodes, SCC_P6_Benigns, by = c("Barcodes" = "Barcodes")) 47 | rm(SCC_P6_Benigns) 48 | SCC_P6_BenignReferences_Counts <- SCC_P6_BenignReferences_Counts %>% select(-Histology) 49 | SCC_P6_BenignReferences_Counts <- SCC_P6_BenignReferences_Counts %>% column_to_rownames(var = "Barcodes") 50 | SCC_P6_BenignReferences_Counts <- as.data.frame(t(SCC_P6_BenignReferences_Counts)) 51 | SCC_P6_BenignReferences_Counts <- SCC_P6_BenignReferences_Counts %>% rownames_to_column() 52 | names(SCC_P6_BenignReferences_Counts)[1] <- "Genes" 53 | 54 | saveRDS(SCC_P6_BenignReferences_Counts, file = "SCC_P6_BenignReferences_Counts.rds") 55 | saveRDS(SCC_P6_BenignReferences_Barcodes, file = "SCC_P6_BenignReferences_Barcodes.rds") 56 | 57 | # Importing Data for Visium Data 58 | 59 | Download the files [from 60 | Mendeley](https://data.mendeley.com/v1/datasets/svw96g68dv/draft?a=3f263217-2bd3-4a3c-8125-8c517c3a9e29): 61 | SCC\_patient/. 62 | 63 | Here, we are filtering for the section used in the Figure 4d analysis 64 | from a parent seurat object. We output both the counts and the barcodes 65 | from this Visium section. We manually apply a QC threshold to only 66 | include Visium spots with at least 500 counts. 67 | 68 | t28 <- readRDS("./t28.Rds") 69 | 70 | SCC_P6_Visium_Counts <- as.data.frame(t28@assays$Spatial@counts) 71 | rm(t28) 72 | 73 | head(SCC_P6_Visium_Counts) 74 | 75 | SCC_P6_Visium_Counts <- as.data.frame(t(SCC_P6_Visium_Counts)) 76 | SCC_P6_Visium_Counts <- rownames_to_column(SCC_P6_Visium_Counts) 77 | SCC_P6_Visium_Counts$section <- str_sub(SCC_P6_Visium_Counts$rowname, start= -1) 78 | table(SCC_P6_Visium_Counts$section) 79 | 80 | SCC_P6_Visium_Counts$barcode <- str_sub(SCC_P6_Visium_Counts$rowname, start = 1L, end = -3) 81 | 82 | SCC_P6_Visium_Counts <- SCC_P6_Visium_Counts %>% filter(section == 1) 83 | 84 | SCC_P6_Visium_Annotations <- SCC_P6_Visium_Counts %>% select(barcode, section) 85 | SCC_P6_Visium_Annotations$section <- "SCC_P6_Visium" 86 | names(SCC_P6_Visium_Annotations)[1] <- "Barcodes" 87 | names(SCC_P6_Visium_Annotations)[2] <- "Histology" 88 | 89 | saveRDS(SCC_P6_Visium_Annotations, file = "SCC_P6_Visium_Annotations.rds") 90 | 91 | SCC_P6_Visium_Counts <- column_to_rownames(SCC_P6_Visium_Counts, var = "barcode") 92 | SCC_P6_Visium_Counts <- SCC_P6_Visium_Counts %>% select(-rowname, -section) 93 | 94 | SCC_P6_Visium_Counts$Total <- rowSums(SCC_P6_Visium_Counts) 95 | SCC_P6_Visium_Counts <- SCC_P6_Visium_Counts %>% filter(Total >= 500) 96 | SCC_P6_Visium_Counts <- select(SCC_P6_Visium_Counts, -Total) 97 | SCC_P6_Visium_Counts <- as.data.frame(t(SCC_P6_Visium_Counts)) 98 | SCC_P6_Visium_Counts <- SCC_P6_Visium_Counts[,colSums(is.na(SCC_P6_Visium_Counts))% full_join(SCC_P6_Visium_Counts, by = "Genes") 124 | SCC_P6_BenignRef_and_Visium_Counts <- SCC_P6_BenignRef_and_Visium_Counts %>% replace(., is.na(.), 0) 125 | 126 | saveRDS(SCC_P6_BenignRef_and_Visium_Counts, file = "SCC_P6_BenignRef_and_Visium_Counts.rds") 127 | 128 | # Creating GeneToENSMBL dataframe 129 | 130 | The code below creates the GeneToENSMBL.csv file, but we have provided 131 | this on our GitHub: 132 | 133 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/GeneToENSMBL.csv). 134 | 135 | GeneToENSMBL <- read.csv("./Mendeley/ProcessedFilesForFigures/Figure4/GeneToENSMBL.csv") 136 | 137 | #library(tidyverse) 138 | #library(data.table) 139 | #GeneToENSMBL <- fread('https://data.broadinstitute.org/Trinity/CTAT/cnv/gencode_v19_gen_pos.complete.txt') 140 | #GeneToENSMBL <- mydat %>% separate(V1, c("left","ENSMBLID"), sep = "\\|") 141 | 142 | #names(GeneToENSMBL)[1] <- "Genes" 143 | #names(GeneToENSMBL)[3] <- "chr" 144 | #names(GeneToENSMBL)[4] <- "start" 145 | #names(GeneToENSMBL)[5] <- "stop" 146 | 147 | #write.csv(GeneToENSMBL, "GeneToENSMBL.csv", row.names = FALSE) 148 | 149 | # Mapping Gene Names to counts/barcodes, and then outputting the requisite files for infercnv::run, part 1 150 | 151 | We need to provide a gene ordering file to inferCNV, in the form of: 152 | Gene Name / Chromosome Number / Start Loci / Stop Loci. As the files 153 | provided by the authors are in “Gene Name”, and our chromosomal / loci 154 | information are mapped to ENSMBLID’s, we need to map the Gene Names to 155 | ENSMBLIDs. 156 | 157 | #removing "." 158 | Counts_joined <- SCC_P6_BenignRef_and_Visium_Counts 159 | Counts_joined <- Counts_joined %>% 160 | separate(Genes, c("Genes", NA)) 161 | 162 | Counts_joined <- Counts_joined %>% select(Genes) 163 | 164 | GenesForMapping <- GeneToENSMBL %>% select(Genes, chr, start, stop) 165 | GenesInSample <- Counts_joined %>% select(Genes) 166 | GenesInSamplevsOrdering <- inner_join(GenesInSample, GenesForMapping, by = c("Genes" = "Genes")) 167 | dedup_GenesInSamplevsOrdering <- GenesInSamplevsOrdering[!duplicated(GenesInSamplevsOrdering$Genes), ] 168 | dedup_GenesInSamplevsOrdering$chromorder <- gsub("chr","",dedup_GenesInSamplevsOrdering$chr) 169 | dedup_GenesInSamplevsOrdering$chromorder <- as.numeric(ifelse(dedup_GenesInSamplevsOrdering$chromorder == "X", 23, 170 | ifelse(dedup_GenesInSamplevsOrdering$chromorder == "Y", 24, dedup_GenesInSamplevsOrdering$chromorder))) 171 | dedup_GenesInSamplevsOrdering <- dedup_GenesInSamplevsOrdering[order(dedup_GenesInSamplevsOrdering$chromorder),] 172 | dedup_GenesInSamplevsOrdering <- dedup_GenesInSamplevsOrdering[,1:4] 173 | 174 | MappingFileForInferCNV <- dedup_GenesInSamplevsOrdering 175 | 176 | saveRDS(MappingFileForInferCNV, file = "MappingFileForSCC_P6_Visium_and_Bg.rds") 177 | 178 | # Outputting the requisite files for infercnv::run, part 2 179 | 180 | We then filter for only mapped genes, from counts, and then output the 181 | three requisite files for infercnv::run. 182 | 183 | MappingFileForInferCNV <- readRDS("MappingFileForSCC_P6_Visium_and_Bg.rds") 184 | SCC_P6_BenignRef_and_Visium_Counts <- readRDS("SCC_P6_BenignRef_and_Visium_Counts.rds") 185 | 186 | CountmappedGenes <- select(MappingFileForInferCNV, Genes) 187 | 188 | Counts_joined <- SCC_P6_BenignRef_and_Visium_Counts 189 | Counts_joined <- Counts_joined %>% 190 | separate(Genes, c("Genes", NA)) 191 | 192 | Mapped_Counts_joined <- left_join(CountmappedGenes, Counts_joined) 193 | Mapped_Counts_joined <- Mapped_Counts_joined[!duplicated(Mapped_Counts_joined$Genes), ] 194 | Mapped_Counts_joinedSliced <- Mapped_Counts_joined %>% slice(1L) 195 | Mapped_Counts_joinedSliced <- as.data.frame(t(Mapped_Counts_joinedSliced[, colnames(Mapped_Counts_joinedSliced)[c(1:length(Mapped_Counts_joinedSliced))]])) 196 | Mapped_Counts_joinedSliced <- Mapped_Counts_joinedSliced %>% rownames_to_column() 197 | Mapped_Counts_joinedSliced <- as.data.frame(Mapped_Counts_joinedSliced[2:(dim(Mapped_Counts_joinedSliced)[1]), 1]) 198 | names(Mapped_Counts_joinedSliced)[1] <- "Barcode" 199 | 200 | Mapped_Counts_joinedSliced$Histology <- ifelse(paste0(substr(Mapped_Counts_joinedSliced$Barcode, start = 1, stop = 4)) == "P6_N", "PurestBenign_SCCPatient6", "Visium") 201 | 202 | #Write GenesInSamplevsOrdering 203 | write.table(Mapped_Counts_joined, 204 | "SCC_P6_BenignRef_and_Visium_Mapped_Counts.tsv", 205 | row.names = FALSE, 206 | sep = "\t") 207 | 208 | write.table(MappingFileForInferCNV, 209 | "SCC_P6_BenignRef_and_Visium_GeneOrderFile.tsv", 210 | quote = FALSE, 211 | col.names = FALSE, 212 | row.names = FALSE, 213 | sep = "\t") 214 | 215 | 216 | write.table(Mapped_Counts_joinedSliced, 217 | "SCC_P6_BenignRef_and_Visium_Mapped_Annotations.tsv", 218 | quote = FALSE, 219 | col.names = FALSE, 220 | row.names = FALSE, 221 | sep = "\t") 222 | 223 | # Creating the inferCNV object (prior to run) 224 | 225 | Creating the object for infercnv::run. 226 | 227 | Visium_P6_Bg_infCNV <- infercnv::CreateInfercnvObject(raw_counts_matrix="./SCC_P6_BenignRef_and_Visium_Mapped_Counts.tsv", 228 | gene_order_file="./SCC_P6_BenignRef_and_Visium_GeneOrderFile.tsv", 229 | annotations_file="./SCC_P6_BenignRef_and_Visium_Mapped_Annotations.tsv", 230 | delim="\t", 231 | ref_group_names="PurestBenign_SCCPatient6", 232 | chr_exclude = c("chrM")) 233 | 234 | # Unsupervised Run - (Typically ran on cluster) 235 | 236 | Running infercnv, typically ran on a server. 237 | 238 | Visium_P6_Bg_infCNV = infercnv::run(Visium_P6_Bg_infCNV, 239 | cutoff=0.1, 240 | out_dir="./Figure4c_Step2/Outputs", 241 | num_threads = 10, 242 | cluster_by_groups=FALSE, 243 | denoise=TRUE, 244 | HMM=FALSE) 245 | 246 | InferCNV will output many files. We are primarily interested in the 247 | final “infercnv.21\_denoised.png” file, as well as the text file 248 | associated with the dendrogram associated with the hierarchical 249 | clustering on the left hand side of the image 250 | (infercnv.21\_denoised.observations\_dendrogram.txt). 251 | 252 | ![infercnv.21\_denoised.png](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/Step2/infercnv.21_denoised.png) 253 | -------------------------------------------------------------------------------- /FigureScripts/Figure 4/Figure4c_SCC/Step2/infercnv.21_denoised.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 4/Figure4c_SCC/Step2/infercnv.21_denoised.png -------------------------------------------------------------------------------- /FigureScripts/Figure 4/Figure4c_SCC/Step3/Figure4c_Step3_SCC_P6_siCNV_supervised.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Fig4c_Step3_SCC_P6_siCNV_supervised" 3 | author: "Andrew Erickson" 4 | output: md_document 5 | --- 6 | 7 | Now that we ran the previous unsupervised step, we next will identified clones and run the final clustered inferCNVs to generate the clustered figure panel image in 4c. 8 | 9 | # Setup 10 | 11 | Initializing libraries. 12 | 13 | ```{r, messages=FALSE} 14 | library(SpatialInferCNV) 15 | library(phylogram) 16 | library(ape) 17 | library(tidyverse) 18 | ``` 19 | 20 | # Importing dendrogram 21 | 22 | Next, we want to import this dendrogram file, this was created in the previous step. 23 | 24 | ```{r, eval = FALSE} 25 | SCC_for_clustering <- read.dendrogram(file = "./Figure4c_Step2/Outputs/infercnv.21_denoised.observations_dendrogram.txt") 26 | 27 | SCC_for_clustering_phylo <- as.phylo(SCC_for_clustering) 28 | ``` 29 | 30 | # Visualizing Tree 31 | 32 | Next, we want to visualize the numbers associated with the nodes of interest (clones). We output a large image file that allows us to manually inspect which nodes (corresponding to clones) should be selected. 33 | 34 | ```{r, eval = FALSE} 35 | my.subtrees = subtrees(SCC_for_clustering_phylo) # subtrees() to subset 36 | 37 | png("SCC_for_clustering_phylo.png",width=10000,height=2500, res = 300) 38 | plot(SCC_for_clustering_phylo,show.tip.label = FALSE) 39 | nodelabels(text=1:SCC_for_clustering_phylo$Nnode,node=1:SCC_for_clustering_phylo$Nnode+Ntip(SCC_for_clustering_phylo)) 40 | dev.off() 41 | ``` 42 | 43 | We provide the following output image. 44 | 45 | ![infercnv.21_denoised.png](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/Step3/SCC_for_clustering_phylo.png) 46 | 47 | 48 | # Clone selection 49 | 50 | Next, view the output .png file, which provides a (albeit cluttered) labeling of the dendrogram tree nodes. Manually select individual nodes that correspond with a distinct subclonal grouping or signal, that will be taken forward for re-clustering. This can be iteratively tweaked with the next step + spatial visualization til optimal. We provide more details [here](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%203/Figure3.md), and provide the finalized selected clone nodes here. 51 | 52 | We output a Figure4c_SCC_P6_Clones.csv file, identifying the barcodes and annotations for each clone for the next steps. 53 | 54 | ```{r, eval = FALSE} 55 | #A - 1656 - spots 56 | #B - 1322 - spots 57 | #C - 1183 - spots 58 | #D - 2 - spots 59 | 60 | Node1656 <- SelectingSubTreeData(my.subtrees, 1656) 61 | Node1322 <- SelectingSubTreeData(my.subtrees, 1322) 62 | Node1183 <- SelectingSubTreeData(my.subtrees, 1183) 63 | Node2 <- SelectingSubTreeData(my.subtrees, 2) 64 | 65 | Merged <- rbind(Node1656, Node1322) 66 | Merged <- rbind(Merged, Node1183) 67 | Merged <- rbind(Merged, Node2) 68 | 69 | table(Merged$Node) 70 | 71 | Merged$Node <- ifelse(Merged$Node == "Node_1656" , "Clone_A", 72 | ifelse(Merged$Node == "Node_1322" , "Clone_B", 73 | ifelse(Merged$Node == "Node_1183" , "Clone_C", 74 | ifelse(Merged$Node == "Node_2" , "Clone_D",Merged$Node)))) 75 | 76 | write.csv(Merged, "Figure4c_SCC_P6_Clones.csv", row.names = FALSE) 77 | ``` 78 | 79 | # Outputting the requisite files for infercnv::run 80 | 81 | We import the files generated in step 2, with the updated clone barcodes, and generate a new annotation file for input to infercnv::run. 82 | 83 | ```{r, eval = FALSE} 84 | library(tidyverse) 85 | library(SpatialInferCNV) 86 | 87 | OriginalBarcodes <- read.table("./SCC_P6_BenignRef_and_Visium_Mapped_Annotations.tsv", sep = "\t") 88 | 89 | ClusteredBarcodes <- read.csv("./Figure4c_SCC_P6_Clones.csv") 90 | 91 | names(OriginalBarcodes)[1] <- "Barcode" 92 | names(OriginalBarcodes)[2] <- "Histology" 93 | 94 | UpdatedBarcodes <- left_join(OriginalBarcodes, ClusteredBarcodes) 95 | 96 | UpdatedBarcodes$Node <- ifelse(is.na(UpdatedBarcodes$Node), "PurestBenign_SCCPatient6", UpdatedBarcodes$Node) 97 | 98 | UpdatedBarcodes <- UpdatedBarcodes %>% 99 | select(Barcode, Node) %>% 100 | arrange(desc(Node)) 101 | 102 | write.table(UpdatedBarcodes, 103 | "Clustered_SCC_P6_BenignRef_and_Visium_Mapped_Annotations.tsv", 104 | quote = FALSE, 105 | col.names = FALSE, 106 | row.names = FALSE, 107 | sep = "\t") 108 | 109 | ``` 110 | 111 | # Creating the inferCNV object (prior to run) 112 | 113 | We generate the infercnv object. 114 | 115 | ```{r, eval = FALSE} 116 | SCC_P6_ForClusteringClones <- infercnv::CreateInfercnvObject(raw_counts_matrix="./SCC_P6_BenignRef_and_Visium_Mapped_Counts.tsv", 117 | gene_order_file="./SCC_P6_BenignRef_and_Visium_GeneOrderFile.tsv", 118 | annotations_file="./Clustered_SCC_P6_BenignRef_and_Visium_Mapped_Annotations.tsv", 119 | delim="\t", 120 | ref_group_names="PurestBenign_SCCPatient6", 121 | chr_exclude = c("chrM")) 122 | ``` 123 | 124 | # InferCNV Run - (Typically ran on cluster) 125 | 126 | Running infercnv. 127 | 128 | ```{r, eval = FALSE} 129 | SCC_P6_ForClusteringClones = infercnv::run(SCC_P6_ForClusteringClones, 130 | cutoff=0.1, 131 | out_dir="./Figure4c_Step3/Outputs", 132 | cluster_by_groups=TRUE, 133 | num_threads = 20, 134 | denoise=TRUE, 135 | HMM=TRUE) 136 | ``` 137 | 138 | InferCNV will output many files. We are primarily interested in the final "infercnv.21_denoised.png" file, corresponding to the one provided in Figure 4c. These are reordered in the final figure. 139 | 140 | ![infercnv.21_denoised.png](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/Step3/infercnv.21_denoised.png) 141 | -------------------------------------------------------------------------------- /FigureScripts/Figure 4/Figure4c_SCC/Step3/Figure4c_Step3_SCC_P6_siCNV_supervised.md: -------------------------------------------------------------------------------- 1 | Now that we ran the previous unsupervised step, we next will identified 2 | clones and run the final clustered inferCNVs to generate the clustered 3 | figure panel image in 4c. 4 | 5 | # Setup 6 | 7 | Initializing libraries. 8 | 9 | library(SpatialInferCNV) 10 | 11 | ## Warning: replacing previous import 'phylogram::as.phylo' by 'ape::as.phylo' when 12 | ## loading 'SpatialInferCNV' 13 | 14 | library(phylogram) 15 | library(ape) 16 | 17 | ## 18 | ## Attaching package: 'ape' 19 | 20 | ## The following object is masked from 'package:phylogram': 21 | ## 22 | ## as.phylo 23 | 24 | library(tidyverse) 25 | 26 | ## Registered S3 method overwritten by 'cli': 27 | ## method from 28 | ## print.boxx spatstat.geom 29 | 30 | ## -- Attaching packages --------------------------------------- tidyverse 1.3.1 -- 31 | 32 | ## v ggplot2 3.3.5 v purrr 0.3.4 33 | ## v tibble 3.1.1 v dplyr 1.0.6 34 | ## v tidyr 1.1.3 v stringr 1.4.0 35 | ## v readr 2.0.1 v forcats 0.5.1 36 | 37 | ## -- Conflicts ------------------------------------------ tidyverse_conflicts() -- 38 | ## x dplyr::filter() masks stats::filter() 39 | ## x dplyr::lag() masks stats::lag() 40 | 41 | # Importing dendrogram 42 | 43 | Next, we want to import this dendrogram file, this was created in the 44 | previous step. 45 | 46 | SCC_for_clustering <- read.dendrogram(file = "./Figure4c_Step2/Outputs/infercnv.21_denoised.observations_dendrogram.txt") 47 | 48 | SCC_for_clustering_phylo <- as.phylo(SCC_for_clustering) 49 | 50 | # Visualizing Tree 51 | 52 | Next, we want to visualize the numbers associated with the nodes of 53 | interest (clones). We output a large image file that allows us to 54 | manually inspect which nodes (corresponding to clones) should be 55 | selected. 56 | 57 | my.subtrees = subtrees(SCC_for_clustering_phylo) # subtrees() to subset 58 | 59 | png("SCC_for_clustering_phylo.png",width=10000,height=2500, res = 300) 60 | plot(SCC_for_clustering_phylo,show.tip.label = FALSE) 61 | nodelabels(text=1:SCC_for_clustering_phylo$Nnode,node=1:SCC_for_clustering_phylo$Nnode+Ntip(SCC_for_clustering_phylo)) 62 | dev.off() 63 | 64 | We provide the following output image. 65 | 66 | ![infercnv.21\_denoised.png](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/Step3/SCC_for_clustering_phylo.png) 67 | 68 | # Clone selection 69 | 70 | Next, view the output .png file, which provides a (albeit cluttered) 71 | labeling of the dendrogram tree nodes. Manually select individual nodes 72 | that correspond with a distinct subclonal grouping or signal, that will 73 | be taken forward for re-clustering. This can be iteratively tweaked with 74 | the next step + spatial visualization til optimal. We provide more 75 | details 76 | [here](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%203/Figure3.md), 77 | and provide the finalized selected clone nodes here. 78 | 79 | We output a Figure4c\_SCC\_P6\_Clones.csv file, identifying the barcodes 80 | and annotations for each clone for the next steps. 81 | 82 | #A - 1656 - spots 83 | #B - 1322 - spots 84 | #C - 1183 - spots 85 | #D - 2 - spots 86 | 87 | Node1656 <- SelectingSubTreeData(my.subtrees, 1656) 88 | Node1322 <- SelectingSubTreeData(my.subtrees, 1322) 89 | Node1183 <- SelectingSubTreeData(my.subtrees, 1183) 90 | Node2 <- SelectingSubTreeData(my.subtrees, 2) 91 | 92 | Merged <- rbind(Node1656, Node1322) 93 | Merged <- rbind(Merged, Node1183) 94 | Merged <- rbind(Merged, Node2) 95 | 96 | table(Merged$Node) 97 | 98 | Merged$Node <- ifelse(Merged$Node == "Node_1656" , "Clone_A", 99 | ifelse(Merged$Node == "Node_1322" , "Clone_B", 100 | ifelse(Merged$Node == "Node_1183" , "Clone_C", 101 | ifelse(Merged$Node == "Node_2" , "Clone_D",Merged$Node)))) 102 | 103 | write.csv(Merged, "Figure4c_SCC_P6_Clones.csv", row.names = FALSE) 104 | 105 | # Outputting the requisite files for infercnv::run 106 | 107 | We import the files generated in step 2, with the updated clone 108 | barcodes, and generate a new annotation file for input to infercnv::run. 109 | 110 | library(tidyverse) 111 | library(SpatialInferCNV) 112 | 113 | OriginalBarcodes <- read.table("./SCC_P6_BenignRef_and_Visium_Mapped_Annotations.tsv", sep = "\t") 114 | 115 | ClusteredBarcodes <- read.csv("./Figure4c_SCC_P6_Clones.csv") 116 | 117 | names(OriginalBarcodes)[1] <- "Barcode" 118 | names(OriginalBarcodes)[2] <- "Histology" 119 | 120 | UpdatedBarcodes <- left_join(OriginalBarcodes, ClusteredBarcodes) 121 | 122 | UpdatedBarcodes$Node <- ifelse(is.na(UpdatedBarcodes$Node), "PurestBenign_SCCPatient6", UpdatedBarcodes$Node) 123 | 124 | UpdatedBarcodes <- UpdatedBarcodes %>% 125 | select(Barcode, Node) %>% 126 | arrange(desc(Node)) 127 | 128 | write.table(UpdatedBarcodes, 129 | "Clustered_SCC_P6_BenignRef_and_Visium_Mapped_Annotations.tsv", 130 | quote = FALSE, 131 | col.names = FALSE, 132 | row.names = FALSE, 133 | sep = "\t") 134 | 135 | # Creating the inferCNV object (prior to run) 136 | 137 | We generate the infercnv object. 138 | 139 | SCC_P6_ForClusteringClones <- infercnv::CreateInfercnvObject(raw_counts_matrix="./SCC_P6_BenignRef_and_Visium_Mapped_Counts.tsv", 140 | gene_order_file="./SCC_P6_BenignRef_and_Visium_GeneOrderFile.tsv", 141 | annotations_file="./Clustered_SCC_P6_BenignRef_and_Visium_Mapped_Annotations.tsv", 142 | delim="\t", 143 | ref_group_names="PurestBenign_SCCPatient6", 144 | chr_exclude = c("chrM")) 145 | 146 | # InferCNV Run - (Typically ran on cluster) 147 | 148 | Running infercnv. 149 | 150 | SCC_P6_ForClusteringClones = infercnv::run(SCC_P6_ForClusteringClones, 151 | cutoff=0.1, 152 | out_dir="./Figure4c_Step3/Outputs", 153 | cluster_by_groups=TRUE, 154 | num_threads = 20, 155 | denoise=TRUE, 156 | HMM=TRUE) 157 | 158 | InferCNV will output many files. We are primarily interested in the 159 | final “infercnv.21\_denoised.png” file, corresponding to the one 160 | provided in Figure 4c. These are reordered in the final figure. 161 | 162 | ![infercnv.21\_denoised.png](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/Step3/infercnv.21_denoised.png) 163 | -------------------------------------------------------------------------------- /FigureScripts/Figure 4/Figure4c_SCC/Step3/SCC_for_clustering_phylo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 4/Figure4c_SCC/Step3/SCC_for_clustering_phylo.png -------------------------------------------------------------------------------- /FigureScripts/Figure 4/Figure4c_SCC/Step3/infercnv.21_denoised.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 4/Figure4c_SCC/Step3/infercnv.21_denoised.png -------------------------------------------------------------------------------- /FigureScripts/Figure 4/Figure4e/Figure4e_pediatricmedulloblastoma.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Figure4e" 3 | author: "Linda Kvastad, Andrew Erickson" 4 | output: md_document 5 | --- 6 | # Code for generating inferCNV plot of pediatric brain tumor patient 1 7 | 8 | # Set working directory, this directory should also include a folder containing all necessary files called "InferCNV_pediatric_patient_1" 9 | 10 | ```{r setup, messages=FALSE} 11 | #setwd("type_in_the_path_to_your_working_directory") 12 | 13 | # Load R packages 14 | library(STutility) 15 | library(infercnv) 16 | ``` 17 | 18 | # Loading Data 19 | 20 | We start by creating an empty working directory so that all downloaded files are organized in one place. Download the files [from Mendeley](https://data.mendeley.com/v1/datasets/svw96g68dv/draft?a=3f263217-2bd3-4a3c-8125-8c517c3a9e29): inferCNV_pediatric_patient_1/ 21 | 22 | ```{r, eval = FALSE} 23 | # Load infoTables: Sample from patient 3 included regions of stroma cells that will be excluded from the inferCNV analysis, which only contains spots from tumor regions. 24 | infoTable_pat_1_2 <- read.table("./inferCNV_pediatric_patient_1/infoTable_pat_1_2.csv", sep=";", header=T, stringsAsFactors = F) 25 | infoTable_pat_3 <- read.table("./inferCNV_pediatric_patient_1/infoTable_pat_3.csv", sep=";", header=T, stringsAsFactors = F) 26 | 27 | # Creat Seurat Objects 28 | se_pat_1_2 <- InputFromTable(infotable = infoTable_pat_1_2, 29 | min.gene.count = 100, 30 | min.gene.spots = 5, 31 | min.spot.count = 500, 32 | platform="Visium") 33 | 34 | 35 | se_pat_3 <- InputFromTable(infotable = infoTable_pat_3, 36 | min.gene.count = 100, 37 | min.gene.spots = 5, 38 | min.spot.count = 500, 39 | platform="Visium") 40 | ``` 41 | 42 | # Further Formatting 43 | 44 | Creating and formating the dataframes before outputting the requisite input files for infercnv::run. 45 | 46 | ```{r, eval = FALSE} 47 | # Add Pathology annotations to Meta.data in se_pat_3 object 48 | df <- read.csv(file = "./inferCNV_pediatric_patient_1/pathology_patient_3.csv") 49 | df$Barcode <- paste0(df$Barcode, "_1") 50 | rownames(df) <- df$Barcode 51 | se_pat_3$pathology <- df[rownames(se_pat_3[[]]), ]$Pathology 52 | 53 | # Check that pathology data was added to meta data of se_pat_3 54 | head(se_pat_3[[]]) 55 | tail(se_pat_3[[]]) 56 | table(se_pat_3$pathology) 57 | 58 | # Subsetting se_pat_3 to only contain spots with annotated tumor cells 59 | se_pat_3 <- SetIdent(se_pat_3, value = "pathology") 60 | se_pat_3 <- SubsetSTData(se_pat_3, idents = c("tumor cells")) 61 | 62 | # Check that only spots containing tumor cells are left 63 | table(se_pat_3$pathology) 64 | 65 | # Merge the se objects 66 | se <- MergeSTData(se_pat_1_2, y = c(se_pat_3)) 67 | 68 | # Check that the merge worked 69 | se 70 | head(se[[]]) 71 | tail(se[[]]) 72 | table(se$sample) 73 | 74 | # Set ident to sample 75 | se <- SetIdent(se, value = "sample") 76 | table(se$sample) 77 | 78 | # prepare a data.frame used as input for the inferCNV run 79 | se_sample <- as.data.frame(se$sample) 80 | head(se_sample) 81 | colnames(se_sample) <- c("sample") 82 | head(se_sample) 83 | se_sample <- cbind(Barcode = rownames(se_sample), se_sample) 84 | rownames(se_sample) <- NULL 85 | head(se_sample) 86 | tail(se_sample) 87 | ``` 88 | 89 | # Outputting Files for infercnv::run 90 | 91 | Creating the files for the next step. 92 | 93 | ```{r, eval = FALSE} 94 | 95 | # save the data.frame 96 | write.table(x = se_sample, file = "./inferCNV_annotions_se_pat_1_2_3.txt",sep = "\t", row.names = F, col.names = F) 97 | 98 | # extract 10x count data from used as input for the inferCNV run 99 | counts_matrix = GetAssayData(se, slot="counts") 100 | 101 | ``` 102 | 103 | # Create the infercnv object 104 | 105 | Creating the inferCNV object for the inferCNV run. 106 | 107 | ```{r, eval = FALSE} 108 | infercnv_obj = CreateInfercnvObject(raw_counts_matrix=counts_matrix, 109 | annotations_file="./inferCNV_annotions_se_pat_1_2_3.txt", 110 | delim="\t", 111 | gene_order_file="./inferCNV_pediatric_patient_1/gencode.v25.annotation_gen_pos_v3.txt", 112 | ref_group_names=c("patient_2","patient_3"), 113 | chr_exclude=c("chrMT")) 114 | ``` 115 | 116 | # InferCNV run 117 | 118 | Running the infercnv::run. This is typically ran on a high performance cluster. 119 | 120 | ```{r, eval = FALSE} 121 | 122 | # perform infercnv operations to reveal cnv signal 123 | infercnv_obj = infercnv::run(infercnv_obj, 124 | cutoff=0.1, 125 | out_dir="./inferCNV_pediatric_patient_1_output_dir", # dir is auto-created for storing outputs 126 | cluster_by_groups=T, # If observations are defined according to groups (ie. patients), each group will be clustered separately 127 | denoise=T, 128 | HMM=T) 129 | 130 | ``` 131 | 132 | The output infercnv.png was used in Figure 4e: 133 | 134 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4e/infercnv.png). 135 | 136 | -------------------------------------------------------------------------------- /FigureScripts/Figure 4/Figure4e/Figure4e_pediatricmedulloblastoma.md: -------------------------------------------------------------------------------- 1 | # Code for generating inferCNV plot of pediatric brain tumor patient 1 2 | 3 | # Set working directory, this directory should also include a folder containing all necessary files called “InferCNV\_pediatric\_patient\_1” 4 | 5 | #setwd("type_in_the_path_to_your_working_directory") 6 | 7 | # Load R packages 8 | library(STutility) 9 | 10 | ## Loading required package: Seurat 11 | 12 | ## Attaching SeuratObject 13 | 14 | ## Loading required package: ggplot2 15 | 16 | ## Registered S3 method overwritten by 'imager': 17 | ## method from 18 | ## plot.imlist 19 | 20 | library(infercnv) 21 | 22 | ## Registered S3 method overwritten by 'ape': 23 | ## method from 24 | ## plot.mst spdep 25 | 26 | ## Registered S3 method overwritten by 'gplots': 27 | ## method from 28 | ## reorder.factor gdata 29 | 30 | # Loading Data 31 | 32 | We start by creating an empty working directory so that all downloaded 33 | files are organized in one place. Download the files [from 34 | Mendeley](https://data.mendeley.com/v1/datasets/svw96g68dv/draft?a=3f263217-2bd3-4a3c-8125-8c517c3a9e29): 35 | inferCNV\_pediatric\_patient\_1/ 36 | 37 | # Load infoTables: Sample from patient 3 included regions of stroma cells that will be excluded from the inferCNV analysis, which only contains spots from tumor regions. 38 | infoTable_pat_1_2 <- read.table("./inferCNV_pediatric_patient_1/infoTable_pat_1_2.csv", sep=";", header=T, stringsAsFactors = F) 39 | infoTable_pat_3 <- read.table("./inferCNV_pediatric_patient_1/infoTable_pat_3.csv", sep=";", header=T, stringsAsFactors = F) 40 | 41 | # Creat Seurat Objects 42 | se_pat_1_2 <- InputFromTable(infotable = infoTable_pat_1_2, 43 | min.gene.count = 100, 44 | min.gene.spots = 5, 45 | min.spot.count = 500, 46 | platform="Visium") 47 | 48 | 49 | se_pat_3 <- InputFromTable(infotable = infoTable_pat_3, 50 | min.gene.count = 100, 51 | min.gene.spots = 5, 52 | min.spot.count = 500, 53 | platform="Visium") 54 | 55 | # Further Formatting 56 | 57 | Creating and formating the dataframes before outputting the requisite 58 | input files for infercnv::run. 59 | 60 | # Add Pathology annotations to Meta.data in se_pat_3 object 61 | df <- read.csv(file = "./inferCNV_pediatric_patient_1/pathology_patient_3.csv") 62 | df$Barcode <- paste0(df$Barcode, "_1") 63 | rownames(df) <- df$Barcode 64 | se_pat_3$pathology <- df[rownames(se_pat_3[[]]), ]$Pathology 65 | 66 | # Check that pathology data was added to meta data of se_pat_3 67 | head(se_pat_3[[]]) 68 | tail(se_pat_3[[]]) 69 | table(se_pat_3$pathology) 70 | 71 | # Subsetting se_pat_3 to only contain spots with annotated tumor cells 72 | se_pat_3 <- SetIdent(se_pat_3, value = "pathology") 73 | se_pat_3 <- SubsetSTData(se_pat_3, idents = c("tumor cells")) 74 | 75 | # Check that only spots containing tumor cells are left 76 | table(se_pat_3$pathology) 77 | 78 | # Merge the se objects 79 | se <- MergeSTData(se_pat_1_2, y = c(se_pat_3)) 80 | 81 | # Check that the merge worked 82 | se 83 | head(se[[]]) 84 | tail(se[[]]) 85 | table(se$sample) 86 | 87 | # Set ident to sample 88 | se <- SetIdent(se, value = "sample") 89 | table(se$sample) 90 | 91 | # prepare a data.frame used as input for the inferCNV run 92 | se_sample <- as.data.frame(se$sample) 93 | head(se_sample) 94 | colnames(se_sample) <- c("sample") 95 | head(se_sample) 96 | se_sample <- cbind(Barcode = rownames(se_sample), se_sample) 97 | rownames(se_sample) <- NULL 98 | head(se_sample) 99 | tail(se_sample) 100 | 101 | # Outputting Files for infercnv::run 102 | 103 | Creating the files for the next step. 104 | 105 | # save the data.frame 106 | write.table(x = se_sample, file = "./inferCNV_annotions_se_pat_1_2_3.txt",sep = "\t", row.names = F, col.names = F) 107 | 108 | # extract 10x count data from used as input for the inferCNV run 109 | counts_matrix = GetAssayData(se, slot="counts") 110 | 111 | # Create the infercnv object 112 | 113 | Creating the inferCNV object for the inferCNV run. 114 | 115 | infercnv_obj = CreateInfercnvObject(raw_counts_matrix=counts_matrix, 116 | annotations_file="./inferCNV_annotions_se_pat_1_2_3.txt", 117 | delim="\t", 118 | gene_order_file="./inferCNV_pediatric_patient_1/gencode.v25.annotation_gen_pos_v3.txt", 119 | ref_group_names=c("patient_2","patient_3"), 120 | chr_exclude=c("chrMT")) 121 | 122 | # InferCNV run 123 | 124 | Running the infercnv::run. This is typically ran on a high performance 125 | cluster. 126 | 127 | # perform infercnv operations to reveal cnv signal 128 | infercnv_obj = infercnv::run(infercnv_obj, 129 | cutoff=0.1, 130 | out_dir="./inferCNV_pediatric_patient_1_output_dir", # dir is auto-created for storing outputs 131 | cluster_by_groups=T, # If observations are defined according to groups (ie. patients), each group will be clustered separately 132 | denoise=T, 133 | HMM=T) 134 | 135 | The output infercnv.png was used in Figure 4e: 136 | 137 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4e/infercnv.png). 138 | -------------------------------------------------------------------------------- /FigureScripts/Figure 4/Figure4e/infercnv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 4/Figure4e/infercnv.png -------------------------------------------------------------------------------- /FigureScripts/SCRIPTS.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: Scripts to Reproduce Main Figures 3 | author: "Andrew Erickson, Nuffield Department of Surgical Sciences, Unviersity of Oxford" 4 | output: 5 | md_document: 6 | variant: markdown_github 7 | --- 8 | 9 | # Landing Page 10 | 11 | ```{r, eval = FALSE} 12 | #Landing page text 13 | ``` 14 | -------------------------------------------------------------------------------- /FigureScripts/SCRIPTS.md: -------------------------------------------------------------------------------- 1 | # Landing Page 2 | 3 | ``` r 4 | #Landing page text 5 | ``` 6 | -------------------------------------------------------------------------------- /FigureScripts/Seurat/Seurat_Spatial_Import.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Ericksonetal_SeuratSpatialImport" 3 | author: "Andrew Erickson" 4 | date: "2022-12-08" 5 | output: md_document 6 | --- 7 | 8 | # Seurat Spatial Import example 9 | 10 | The data for [Erickson et al](https://www.nature.com/articles/s41586-022-05023-2) can be found at the following Mendeley link [(latest dataset version = 4)](https://data.mendeley.com/datasets/svw96g68dv/4). 11 | 12 | The following code downloads the count matrix file, and the spaceranger "spatial" folder files, and imports them into a [Seurat](https://satijalab.org/seurat/index.html) object for further analysis. 13 | 14 | ```{r} 15 | #Install Seurat if not already installed 16 | #install.packages('Seurat') 17 | 18 | #Initialize the Seurat library 19 | library(Seurat) 20 | 21 | #Downloading Patient 1 - H2_1 filtered_feature_bc_matrix.h5 file to working folder 22 | url = "https://data.mendeley.com/public-files/datasets/svw96g68dv/files/8b69170c-6c07-4e69-abf2-35fade0f5e2c/file_downloaded" 23 | download.file(url,'./filtered_feature_bc_matrix.h5', mode = 'wb') 24 | 25 | #Create subdirectory called "spatial" 26 | dir.create("spatial") 27 | 28 | #07.12.2022 - This is manually downloaded for the user while waiting for Mendeley updates to be pushed 29 | #Downloading Patient 1 - H2_1 tissue_hires_image.png image file to spatial folder 30 | url = "https://data.mendeley.com/public-files/datasets/svw96g68dv/files/e1399690-dc45-43e5-ae39-7a065bf7d34e/file_downloaded" 31 | download.file(url,'./spatial/H2_1_tissue_hires_image.png', mode = 'wb') 32 | 33 | #Downloading Patient 1 - H2_1 scalefactors_json.json file to spatial folder 34 | url = "https://data.mendeley.com/public-files/datasets/svw96g68dv/files/06eb7410-a6a3-4ea9-a364-c6a734a22169/file_downloaded" 35 | download.file(url,'./spatial/scalefactors_json.json', mode = 'wb') 36 | 37 | #Downloading Patient 1 - H2_1 tissue_positions_list.csv file to spatial folder 38 | url = "https://data.mendeley.com/public-files/datasets/svw96g68dv/files/e028d330-142b-4d8b-b32d-9114b5c48421/file_downloaded" 39 | download.file(url,'./spatial/tissue_positions_list.csv', mode = 'wb') 40 | 41 | InputImage <- Read10X_Image( 42 | "./spatial", 43 | image.name = "H2_1_tissue_hires_image.png", 44 | filter.matrix = FALSE 45 | ) 46 | 47 | H2_1_Seurat <- Load10X_Spatial( 48 | ".", 49 | filename = "filtered_feature_bc_matrix.h5", 50 | assay = "Spatial", 51 | image = InputImage 52 | ) 53 | 54 | summary(H2_1_Seurat) 55 | head(H2_1_Seurat) 56 | 57 | sessionInfo() 58 | ``` 59 | -------------------------------------------------------------------------------- /FigureScripts/Seurat/Seurat_Spatial_Import.md: -------------------------------------------------------------------------------- 1 | # Seurat Spatial Import example 2 | 3 | The data for [Erickson et 4 | al](https://www.nature.com/articles/s41586-022-05023-2) can be found at 5 | the following Mendeley link [(latest dataset version = 6 | 4)](https://data.mendeley.com/datasets/svw96g68dv/4). 7 | 8 | The following code downloads the count matrix file, and the spaceranger 9 | “spatial” folder files, and imports them into a 10 | [Seurat](https://satijalab.org/seurat/index.html) object for further 11 | analysis. 12 | 13 | #Install Seurat if not already installed 14 | #install.packages('Seurat') 15 | 16 | #Initialize the Seurat library 17 | library(Seurat) 18 | 19 | ## Warning: package 'Seurat' was built under R version 4.2.2 20 | 21 | ## Attaching SeuratObject 22 | 23 | #Downloading Patient 1 - H2_1 filtered_feature_bc_matrix.h5 file to working folder 24 | url = "https://data.mendeley.com/public-files/datasets/svw96g68dv/files/8b69170c-6c07-4e69-abf2-35fade0f5e2c/file_downloaded" 25 | download.file(url,'./filtered_feature_bc_matrix.h5', mode = 'wb') 26 | 27 | #Create subdirectory called "spatial" 28 | dir.create("spatial") 29 | 30 | #07.12.2022 - This is manually downloaded for the user while waiting for Mendeley updates to be pushed 31 | #Downloading Patient 1 - H2_1 tissue_hires_image.png image file to spatial folder 32 | url = "https://data.mendeley.com/public-files/datasets/svw96g68dv/files/e1399690-dc45-43e5-ae39-7a065bf7d34e/file_downloaded" 33 | download.file(url,'./spatial/H2_1_tissue_hires_image.png', mode = 'wb') 34 | 35 | #Downloading Patient 1 - H2_1 scalefactors_json.json file to spatial folder 36 | url = "https://data.mendeley.com/public-files/datasets/svw96g68dv/files/06eb7410-a6a3-4ea9-a364-c6a734a22169/file_downloaded" 37 | download.file(url,'./spatial/scalefactors_json.json', mode = 'wb') 38 | 39 | #Downloading Patient 1 - H2_1 tissue_positions_list.csv file to spatial folder 40 | url = "https://data.mendeley.com/public-files/datasets/svw96g68dv/files/e028d330-142b-4d8b-b32d-9114b5c48421/file_downloaded" 41 | download.file(url,'./spatial/tissue_positions_list.csv', mode = 'wb') 42 | 43 | InputImage <- Read10X_Image( 44 | "./spatial", 45 | image.name = "H2_1_tissue_hires_image.png", 46 | filter.matrix = FALSE 47 | ) 48 | 49 | H2_1_Seurat <- Load10X_Spatial( 50 | ".", 51 | filename = "filtered_feature_bc_matrix.h5", 52 | assay = "Spatial", 53 | image = InputImage 54 | ) 55 | 56 | summary(H2_1_Seurat) 57 | 58 | ## Length Class Mode 59 | ## 1 Seurat S4 60 | 61 | head(H2_1_Seurat) 62 | 63 | ## orig.ident nCount_Spatial nFeature_Spatial 64 | ## AAACAAGTATCTCCCA-1 SeuratProject 8758 2717 65 | ## AAACACCAATAACTGC-1 SeuratProject 13466 3889 66 | ## AAACAGCTTTCAGAAG-1 SeuratProject 9514 2511 67 | ## AAACAGGGTCTATATT-1 SeuratProject 15668 3601 68 | ## AAACAGTGTTCCTGGG-1 SeuratProject 0 0 69 | ## AAACATTTCCCGGATT-1 SeuratProject 5290 2211 70 | ## AAACCCGAACGAAATC-1 SeuratProject 27 26 71 | ## AAACCGGAAATGTTAA-1 SeuratProject 7 7 72 | ## AAACCGGGTAGGTACC-1 SeuratProject 9728 2781 73 | ## AAACCGTTCGTCCAGG-1 SeuratProject 3783 1660 74 | 75 | sessionInfo() 76 | 77 | ## R version 4.2.1 (2022-06-23 ucrt) 78 | ## Platform: x86_64-w64-mingw32/x64 (64-bit) 79 | ## Running under: Windows 10 x64 (build 22000) 80 | ## 81 | ## Matrix products: default 82 | ## 83 | ## locale: 84 | ## [1] LC_COLLATE=English_United States.utf8 85 | ## [2] LC_CTYPE=English_United States.utf8 86 | ## [3] LC_MONETARY=English_United States.utf8 87 | ## [4] LC_NUMERIC=C 88 | ## [5] LC_TIME=English_United States.utf8 89 | ## 90 | ## attached base packages: 91 | ## [1] stats graphics grDevices utils datasets methods base 92 | ## 93 | ## other attached packages: 94 | ## [1] SeuratObject_4.1.3 Seurat_4.3.0 95 | ## 96 | ## loaded via a namespace (and not attached): 97 | ## [1] Rtsne_0.16 colorspace_2.0-3 deldir_1.0-6 98 | ## [4] ellipsis_0.3.2 ggridges_0.5.4 rstudioapi_0.14 99 | ## [7] spatstat.data_3.0-0 leiden_0.4.3 listenv_0.8.0 100 | ## [10] bit64_4.0.5 ggrepel_0.9.2 fansi_1.0.3 101 | ## [13] codetools_0.2-18 splines_4.2.1 knitr_1.40 102 | ## [16] polyclip_1.10-4 jsonlite_1.8.3 ica_1.0-3 103 | ## [19] cluster_2.1.3 png_0.1-7 uwot_0.1.14 104 | ## [22] shiny_1.7.3 sctransform_0.3.5 spatstat.sparse_3.0-0 105 | ## [25] compiler_4.2.1 httr_1.4.4 assertthat_0.2.1 106 | ## [28] Matrix_1.5-3 fastmap_1.1.0 lazyeval_0.2.2 107 | ## [31] cli_3.3.0 later_1.3.0 htmltools_0.5.2 108 | ## [34] tools_4.2.1 igraph_1.3.5 gtable_0.3.1 109 | ## [37] glue_1.6.2 RANN_2.6.1 reshape2_1.4.4 110 | ## [40] dplyr_1.0.10 Rcpp_1.0.9 scattermore_0.8 111 | ## [43] vctrs_0.5.1 nlme_3.1-157 spatstat.explore_3.0-5 112 | ## [46] progressr_0.11.0 lmtest_0.9-40 spatstat.random_3.0-1 113 | ## [49] xfun_0.31 stringr_1.4.1 globals_0.16.2 114 | ## [52] mime_0.12 miniUI_0.1.1.1 lifecycle_1.0.3 115 | ## [55] irlba_2.3.5.1 goftest_1.2-3 future_1.29.0 116 | ## [58] MASS_7.3-57 zoo_1.8-11 scales_1.2.1 117 | ## [61] promises_1.2.0.1 spatstat.utils_3.0-1 parallel_4.2.1 118 | ## [64] RColorBrewer_1.1-3 yaml_2.3.5 reticulate_1.26 119 | ## [67] pbapply_1.6-0 gridExtra_2.3 ggplot2_3.4.0 120 | ## [70] stringi_1.7.8 rlang_1.0.6 pkgconfig_2.0.3 121 | ## [73] matrixStats_0.63.0 evaluate_0.18 lattice_0.20-45 122 | ## [76] ROCR_1.0-11 purrr_0.3.5 tensor_1.5 123 | ## [79] patchwork_1.1.2 htmlwidgets_1.5.4 bit_4.0.5 124 | ## [82] cowplot_1.1.1 tidyselect_1.2.0 parallelly_1.32.1 125 | ## [85] RcppAnnoy_0.0.20 plyr_1.8.8 magrittr_2.0.3 126 | ## [88] R6_2.5.1 generics_0.1.3 DBI_1.1.3 127 | ## [91] pillar_1.8.1 fitdistrplus_1.1-8 survival_3.3-1 128 | ## [94] abind_1.4-5 sp_1.5-1 tibble_3.1.8 129 | ## [97] future.apply_1.10.0 hdf5r_1.3.7 KernSmooth_2.23-20 130 | ## [100] utf8_1.2.2 spatstat.geom_3.0-3 plotly_4.10.1 131 | ## [103] rmarkdown_2.18 grid_4.2.1 data.table_1.14.6 132 | ## [106] digest_0.6.29 xtable_1.8-4 tidyr_1.2.1 133 | ## [109] httpuv_1.6.6 munsell_0.5.0 viridisLite_0.4.1 134 | -------------------------------------------------------------------------------- /FigureScripts/Seurat/filtered_feature_bc_matrix.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Seurat/filtered_feature_bc_matrix.h5 -------------------------------------------------------------------------------- /FigureScripts/Seurat/spatial/H2_1_tissue_hires_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Seurat/spatial/H2_1_tissue_hires_image.png -------------------------------------------------------------------------------- /FigureScripts/Seurat/spatial/scalefactors_json.json: -------------------------------------------------------------------------------- 1 | {"spot_diameter_fullres": 113.3410085152946, "tissue_hires_scalef": 0.12641425, "fiducial_diameter_fullres": 183.08932144778362, "tissue_lowres_scalef": 0.03792428} -------------------------------------------------------------------------------- /FigureScripts/SpotLevelCloneCalls/Figure2/H1_2_Clones.csv: -------------------------------------------------------------------------------- 1 | Barcode,CloneNames 2 | AGATTCACAACCGATA-1,FigureClone_K 3 | ACTCCCATTCCTAAAG-1,FigureClone_K 4 | GAAGCTCGGACCCGTC-1,FigureClone_K 5 | AGATGCAAGACGTGCA-1,FigureClone_K 6 | TTAAGGCCCGTACTTT-1,FigureClone_K 7 | TTGGTTGCGGTGCGCG-1,FigureClone_K 8 | CCGTATCTCGTCGTAG-1,FigureClone_K 9 | CTCATGGTAATTTGCG-1,FigureClone_K 10 | AGAAGGTACACTTCAC-1,FigureClone_K 11 | GCAACACACTAGAACT-1,FigureClone_K 12 | AGCGGCGGTTAGCGGT-1,FigureClone_K 13 | CTACTCAAGGTATAGT-1,FigureClone_K 14 | GCGCAAATATATTCAA-1,FigureClone_K 15 | TTGTGGTAGGAGGGAT-1,FigureClone_K 16 | ATACGGAACGTCGTTT-1,FigureClone_K 17 | CCCGTAGCTGGGAAGA-1,FigureClone_K 18 | CTGGGTTGAGTTAAAG-1,FigureClone_K 19 | TTGTTTCACATCCAGG-1,FigureClone_K 20 | GTGGAGTCGGCGGTTG-1,FigureClone_E 21 | GGGCGGCAAATGAATT-1,FigureClone_E 22 | TAAGGCATAACATCAA-1,FigureClone_E 23 | CGAACCCGCATGCGTC-1,FigureClone_B 24 | GGAACCGTGTAAATTG-1,FigureClone_B 25 | ACAGGTGGAGGTGAGG-1,FigureClone_B 26 | AGCGACAGGAACGGTC-1,FigureClone_B 27 | GCCCGCGCGTAAACGG-1,FigureClone_B 28 | CACCGTTAGGGATCAC-1,FigureClone_B 29 | AATAACACTAGAACAA-1,FigureClone_B 30 | TTGATTAGCTGTTTCT-1,FigureClone_B 31 | CCAGAAAGCAACTCAT-1,FigureClone_B 32 | CACGTCGGCAACCTCT-1,FigureClone_B 33 | TACCTACTCCCAGTAT-1,FigureClone_B 34 | CTCGCCGAATGTAGGG-1,FigureClone_B 35 | CAGACGAACCTGATAC-1,FigureClone_B 36 | TTCCGGCCTTGAGGCT-1,FigureClone_B 37 | GGTGAAGTACAGGGAT-1,FigureClone_B 38 | ATCACGTGCTAATTAA-1,FigureClone_B 39 | GGTAGACCGTTGGGCG-1,FigureClone_B 40 | TCACAGCAAACTCGAA-1,FigureClone_B 41 | CCGTGTTAAATTCCAT-1,FigureClone_B 42 | TAGCTAGAAGGCATGA-1,FigureClone_B 43 | CCAGTCTTGTCATAGA-1,FigureClone_B 44 | TTACCCTAGGGATTGG-1,FigureClone_B 45 | AGCGGACACTTCGTAG-1,FigureClone_B 46 | TGAGTAAATTAGCGTA-1,FigureClone_B 47 | ATACCTAACCAAGAAA-1,FigureClone_B 48 | GAAGCCTGCACATTCC-1,FigureClone_B 49 | TGAGGAGTGCCAGCTT-1,FigureClone_B 50 | ATACGTTATGCACGGA-1,FigureClone_B 51 | CGTTGTCGGCAATTGA-1,FigureClone_B 52 | TACCGTAGGTTAACTA-1,FigureClone_B 53 | CCAGCTCGAACGCATT-1,FigureClone_B 54 | AATAGAACAGAGTGGC-1,FigureClone_B 55 | ACTACGCGTTAGAATT-1,FigureClone_B 56 | CCTGTACTCACGCCCA-1,FigureClone_B 57 | CGGTTGACCTGGCATA-1,FigureClone_B 58 | CTTCCGCTCCGTGAAG-1,FigureClone_B 59 | GCTAGCTTGAATAGCT-1,FigureClone_B 60 | CCAAGAAAGTGGGCGA-1,FigureClone_B 61 | GCGGACCGCGTTGTGG-1,FigureClone_B 62 | TGGCGACTGCTCCAAA-1,FigureClone_B 63 | TTACTGGGATATTTCA-1,FigureClone_B 64 | GAGTAGATACTAGTTG-1,FigureClone_B 65 | GACATCGATTTATAAC-1,FigureClone_B 66 | TGTGAGACTAGCCCAA-1,FigureClone_B 67 | ATAATTAGCTAAGTAG-1,FigureClone_B 68 | GTCGCCGTTGTGTGTT-1,FigureClone_B 69 | GCACGCCTACTTAGAT-1,FigureClone_B 70 | AACTCTCAGTGTGCTC-1,FigureClone_B 71 | CAATATTCTTGACCTA-1,FigureClone_B 72 | TAATAGAACAGAGTTA-1,FigureClone_B 73 | GTCGGGAAGCAGAAAC-1,FigureClone_B 74 | TTGCGGCATCAGAAAG-1,FigureClone_B 75 | TAAGTAACATCTTGAC-1,FigureClone_B 76 | GTGGTATAGTCTGCCG-1,FigureClone_B 77 | TCGTCAAGTACGCGCA-1,FigureClone_B 78 | TCAACGCAGGAAATAA-1,FigureClone_B 79 | TTGACCGTGTTAATGA-1,FigureClone_B 80 | TGCGAGAATATTACCC-1,FigureClone_B 81 | CGTTTGTGTAGAGGGT-1,FigureClone_B 82 | CCGCGGAATGCGTCAC-1,FigureClone_B 83 | TCGTTGCTATCCGGTC-1,FigureClone_B 84 | TCTGTTACCCAGCATA-1,FigureClone_B 85 | ATCAAACGAAGGTTTG-1,FigureClone_B 86 | CCGCTTACCTCACTCT-1,FigureClone_B 87 | TTGCGTCGGCCAACCG-1,FigureClone_B 88 | CTAACTGGTCCGGTTC-1,FigureClone_B 89 | TCGTTAGGAGTCCCTA-1,FigureClone_B 90 | CAGCGATTCCCTTCAA-1,FigureClone_B 91 | CAGAGGCGATGCATGA-1,FigureClone_B 92 | CATTTGAGTGGTACGT-1,FigureClone_B 93 | CAGACACCGATCGCTG-1,FigureClone_B 94 | TTCCACACAGATTTGA-1,FigureClone_B 95 | AGGCTTCCCGAAGAAG-1,FigureClone_B 96 | CATAGCGTTGCCCACC-1,FigureClone_B 97 | CCTATACCGTCCTGTC-1,FigureClone_B 98 | ATCCAGAGCAACAACC-1,FigureClone_B 99 | CTGCTGAGGCCACGAA-1,FigureClone_B 100 | CACCCGGTTTGTGACT-1,FigureClone_B 101 | AAACCGTTCGTCCAGG-1,FigureClone_B 102 | GTAATCTGATTCTTCG-1,FigureClone_B 103 | CTTCTATTAATGCTAG-1,FigureClone_B 104 | TGATACATTTAGCCGT-1,FigureClone_B 105 | AAATTTGCGGGTGTGG-1,FigureClone_B 106 | ACCCTATGCCATATCG-1,FigureClone_B 107 | GCTGTATTACTGGCCC-1,FigureClone_B 108 | TATTCCTCCGCCCACT-1,FigureClone_B 109 | CCGGTTTGTAATTGTG-1,FigureClone_B 110 | GATCCTCGACACTGGC-1,FigureClone_B 111 | TAGATATGGACTGGAA-1,FigureClone_B 112 | GAGTATGCCCGCCTTG-1,FigureClone_B 113 | TACTGAACAGATTTAG-1,FigureClone_B 114 | CGGTGCGCGTTGGTCC-1,FigureClone_B 115 | CCACCAACTTTACTGT-1,FigureClone_B 116 | CCTACATTCACAGACG-1,FigureClone_B 117 | TCGAGCCAGGCAGGCC-1,FigureClone_B 118 | GTAGAGGGAGACAAGT-1,FigureClone_B 119 | CCGAACACTGGGCCTC-1,FigureClone_B 120 | GGGCAGAGCAATCGTT-1,FigureClone_B 121 | TATTAACCTGACCGCG-1,FigureClone_B 122 | ACCTAAGTACCTTTCA-1,FigureClone_B 123 | GTGGACCAACCCGATT-1,FigureClone_B 124 | TCTAGCAATCTCCGCC-1,FigureClone_B 125 | AAATCGTGTACCACAA-1,FigureClone_B 126 | CATAGTCCACAAGAAC-1,FigureClone_B 127 | GGTTACCACCCTCGGG-1,FigureClone_B 128 | TTGCACAATTCAGAAA-1,FigureClone_B 129 | TGGCAGATTACGATCA-1,FigureClone_B 130 | GAGTAAGGCCACGGGA-1,FigureClone_A 131 | GCAGGAACTTAGATCT-1,FigureClone_A 132 | CAGTACCAGTTTACGT-1,FigureClone_A 133 | AGGATCACGCGATCTG-1,FigureClone_A 134 | CGGAAAGAATCAAACG-1,FigureClone_A 135 | CGACCCTTAACGCCGG-1,FigureClone_A 136 | CCCAGTAAACTTGGGA-1,FigureClone_A 137 | CACCCTTGGTGAGACC-1,FigureClone_A 138 | TCGGTCCCGACAATAG-1,FigureClone_A 139 | CCGACGGGCATGAGGT-1,FigureClone_A 140 | TGGCCAATTTGGTACT-1,FigureClone_A 141 | CTTCAGTGGTCGCCTA-1,FigureClone_A 142 | GACCCAATTATGATAC-1,FigureClone_A 143 | GGATCTTGACTCAACC-1,FigureClone_A 144 | AGCCTAATACCCACGT-1,FigureClone_A 145 | GTTAGGCTACCCGTTT-1,FigureClone_A 146 | AGCAACCGAAAGTAAT-1,FigureClone_A 147 | CAGATCCTGGTTTGAA-1,FigureClone_A 148 | TCAACAAAGATAATTC-1,FigureClone_A 149 | TTGAATCGTTGTATAA-1,FigureClone_A 150 | CTATGTGAGTCACGGC-1,FigureClone_A 151 | AATCTAGGTTTACTTG-1,FigureClone_A 152 | CCTAGGTAAAGGTAGC-1,FigureClone_A 153 | AGTATGCTGGAGACCA-1,FigureClone_A 154 | GCTGAATCTTCCAATC-1,FigureClone_A 155 | AGAAGTGATTCGTGAT-1,FigureClone_A 156 | CTCTCTAACTGCCTAG-1,FigureClone_A 157 | GGGCAACCGCACGTGC-1,FigureClone_A 158 | GCCCTAGCCGTCGCGA-1,FigureClone_A 159 | TATCTTGCAATACAAC-1,FigureClone_A 160 | TGCCAAAGTCAGACTT-1,FigureClone_A 161 | CCGGCGTGAGACTCTG-1,FigureClone_A 162 | AATCTGGCTTTCTAGT-1,FigureClone_A 163 | TCGGCGTACTGCACAA-1,FigureClone_A 164 | AAACAGGGTCTATATT-1,FigureClone_A 165 | TCCTCCTAAGACATTC-1,FigureClone_A 166 | TACCTTAAGATTTCCC-1,FigureClone_A 167 | AAATGGCCCGTGCCCT-1,FigureClone_A 168 | CATATGTCAGGCTACG-1,FigureClone_A 169 | TACTCTTTCGTCTTCA-1,FigureClone_A 170 | CGAACGGCCGGACAAC-1,FigureClone_A 171 | CTGGACGCAGTCCGGC-1,FigureClone_A 172 | TACTTTCCGCACGCCA-1,FigureClone_A 173 | ATCAGCTCGTCCACTA-1,FigureClone_A 174 | AGGGCGAGCAGCTGAT-1,FigureClone_A 175 | TTAGGTCATAACCGAC-1,FigureClone_A 176 | CAAAGATTATTGGGCC-1,FigureClone_A 177 | AATGTGCCCGAGGTGT-1,FigureClone_A 178 | TTGCCATAGCCCGCTC-1,FigureClone_A 179 | TAACATACACGCGATC-1,FigureClone_A 180 | CGAGTGAAGGTACCAG-1,FigureClone_A 181 | AGTCGGCTCAACTTTA-1,FigureClone_A 182 | CTCGGTTGTCGGCCCT-1,FigureClone_A 183 | AACACGAGACGCGGCC-1,FigureClone_A 184 | GTATCTCAGTCTTGAC-1,FigureClone_A 185 | ATGTGAAAGCCTAATG-1,FigureClone_A 186 | AGTCTCACAAGACTAC-1,FigureClone_A 187 | AAATTGATAGTCCTTT-1,FigureClone_A 188 | TCATCGATGGTCCCAA-1,FigureClone_A 189 | TCTAATACTGCCTCAG-1,FigureClone_A 190 | GGTAACCGGGAGGATA-1,FigureClone_A 191 | ACCTGCGTGTCATGTT-1,FigureClone_A 192 | CCTTTAAGGGAGCACT-1,FigureClone_A 193 | ACGGGAGTGTCGGCCC-1,FigureClone_A 194 | TTCTTGGACGATCTGC-1,FigureClone_A 195 | TCAACATAGCGCCCTA-1,FigureClone_A 196 | TAAGTCGCCGAGTATC-1,FigureClone_A 197 | AGACGACGATGCCGCT-1,FigureClone_A 198 | AATGACAGCAATGTCT-1,FigureClone_A 199 | AAACTTGCAAACGTAT-1,FigureClone_A 200 | CCACGGAGCCATAAGA-1,FigureClone_A 201 | TTAACTTCAGGTAGGA-1,FigureClone_A 202 | CAGGCGCACGGTGGTC-1,FigureClone_A 203 | CGATCTGTTGGAGGAC-1,FigureClone_A 204 | TTGTTTCCATACAACT-1,FigureClone_A 205 | AGTTTGGCCAGACCTA-1,FigureClone_A 206 | CACCGTTGCGCGATAT-1,FigureClone_A 207 | CGCGCAAATGTCCAGA-1,FigureClone_A 208 | CTTCTATGTTGAAGTA-1,FigureClone_A 209 | AGCTGTAACCTCAATC-1,FigureClone_A 210 | ATGACGCGTTCTATCC-1,FigureClone_A 211 | AGGTCAGGTGAGAGTG-1,FigureClone_A 212 | ATTTGCGCGAGTAGCT-1,FigureClone_A 213 | -------------------------------------------------------------------------------- /FigureScripts/SpotLevelCloneCalls/Figure2/H2_2_Clones.csv: -------------------------------------------------------------------------------- 1 | Barcode,CloneNames 2 | GCAGAAGGTAATCTCC-1,FigureClone_F 3 | ACTAGTTGCGATCGTC-1,FigureClone_C 4 | ATTACATGTCAGTCTT-1,FigureClone_C 5 | TTGGACCATCTGGCAA-1,FigureClone_C 6 | TTGGGACGTAAGAGTT-1,FigureClone_C 7 | CCCTCCTCGCTCGTAT-1,FigureClone_C 8 | CTTCGGCCAATTGTTT-1,FigureClone_C 9 | TTCGGTGGAGACGCCC-1,FigureClone_B 10 | CTTTGCTGTCATGGAT-1,FigureClone_B 11 | TACTGTTTCTCTGGTA-1,FigureClone_B 12 | AGACCGCTCCGCGGTT-1,FigureClone_B 13 | CCGATATGACGTAAGG-1,FigureClone_B 14 | AAACCGGAAATGTTAA-1,FigureClone_B 15 | AGACAGGCATCTCAGC-1,FigureClone_B 16 | GGTGTTGGGCGTCTTA-1,FigureClone_B 17 | GGACTCACAAATTAGG-1,FigureClone_B 18 | TGTGGTAGGGTGCCTT-1,FigureClone_B 19 | CGTGGAAGCCTCGTAC-1,FigureClone_B 20 | CTGCTGTCTAACGAGC-1,FigureClone_B 21 | ATTGTTCAACGATCCG-1,FigureClone_B 22 | GTGCAGCGTAGAGTAG-1,FigureClone_B 23 | -------------------------------------------------------------------------------- /Images/KTH_Logotyp_PMS_2013.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/Images/KTH_Logotyp_PMS_2013.eps -------------------------------------------------------------------------------- /Images/primary-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/Images/primary-logo.png -------------------------------------------------------------------------------- /Images/secondary-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/Images/secondary-logo.png -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | exportPattern("^[[:alpha:]]+") 2 | import(tidyverse, infercnv, Seurat, hdf5r, phylogram, ape) 3 | -------------------------------------------------------------------------------- /R/ExtractSectionWise.R: -------------------------------------------------------------------------------- 1 | #' Obtaining a thresholded dataframe as part of spatial visualization of spatial transcriptomics data. 2 | #' 3 | #' ExtractSectionWise() 4 | #' 5 | #' @param SectionName A character string for section name. 6 | #' @param CNV_Genes_Organscale_Input A dataframe, mirroring the structure of infercnv::run output file 17_HMM_predHMMi6.hmm_mode-cells.pred_cnv_genes.dat 7 | #' @param AllBarcodes A dataframe of barcodes and annotations. 8 | #' @param Threshold A numerical value for sectionwise thresholding of the number of genes to pass: integer values from 0-100. 9 | #' 10 | #' @return A dataframe of ST counts, that have passed QC and are selected. 11 | #' 12 | #' @examples 13 | #' ExtractSectionWise("H2_1", CNV_Genes_Filtered, AllBarcodes, 0.45) 14 | 15 | 16 | ExtractSectionWise <- function(SectionName, CNV_Genes_Organscale_Input, AllBarcodes, Threshold) { 17 | output <- CNV_Genes_Organscale_Input %>% 18 | filter(section == paste0(SectionName)) %>% 19 | select(-section) 20 | Counted <- output %>% group_by(gene) %>% tally() 21 | sectionbarcodes <- AllBarcodes %>% 22 | filter(Histology == paste0(SectionName)) 23 | MaxLength <- as.numeric(nrow(sectionbarcodes)) 24 | CountPercentageThreshold <- round(Threshold * MaxLength,0) 25 | CountedThresholded <- Counted %>% 26 | filter(n > CountPercentageThreshold) 27 | CNV_Genes_Filtered <- inner_join(output, CountedThresholded) 28 | CNVs <- CNV_Genes_Filtered 29 | CNVsGenes_Counted <- CNVs %>% group_by(cell_group_name) %>% tally() 30 | names(CNVsGenes_Counted)[1] <- "Barcode" 31 | names(CNVsGenes_Counted)[2] <- "PercentageGenomeAltered" 32 | return(CNVsGenes_Counted) 33 | } 34 | -------------------------------------------------------------------------------- /R/FinalAnnotations.R: -------------------------------------------------------------------------------- 1 | #' Creating A finalized annotation dataframe containing only barcodes in the count file. 2 | #' 3 | #' FinalAnnotations() 4 | #' 5 | #' @param InputOriginalAnnotationFile A dataframe of barcodes selected for analysis 6 | #' @param InputCounts A joined count dataframe, of barcodes selected for analysis AND has passed QC (counts per spot >= 500 counts) 7 | #' @return A finalized annotation dataframe containing only barcodes in the count file. 8 | #' @examples 9 | #' SelectingSubTreeData(my.subtrees, 4617) 10 | #' FinalAnnotations(MergedAll, Counts_joined) 11 | 12 | FinalAnnotations <- function(InputOriginalAnnotationFile, InputCounts) { 13 | input <- InputCounts 14 | input <- as.data.frame(input[1,]) 15 | input <- as.data.frame(t(input)) 16 | input <- rownames_to_column(input, var = "Barcode") 17 | input <- as.data.frame(input[,1]) 18 | names(input)[1] <- "Barcode" 19 | input <- right_join(InputOriginalAnnotationFile, input) 20 | return(input) 21 | } 22 | -------------------------------------------------------------------------------- /R/ImportCountData.R: -------------------------------------------------------------------------------- 1 | #' Importing Visium spatial transcriptomics count data from filtered_feature_bc_matrix.h5 file (output from SpaceRanger pipeline) and appending section name to barcodes 2 | #' 3 | #' ImportCountData() 4 | #' 5 | #' @param SectionName A character string for section name. 6 | #' @param InputCountFile A file path to a filtered_feature_bc_matrix.h5 file (output from 10X Genomics SpaceRanger pipeline) 7 | #' @return A dataframe of counts with appended section names 8 | #' @examples 9 | #' ImportCountData("H2_1", "./filtered_feature_bc_matrix.h5") 10 | 11 | ImportCountData <- function(SectionName, InputCountFile) { 12 | input <- Read10X_h5(InputCountFile, use.names = FALSE) 13 | input <- as.matrix(input) 14 | input <- as.data.frame(t(input)) 15 | input <- rownames_to_column(input, "Barcode") 16 | input$Barcode <- paste0(SectionName, "_", input$Barcode) 17 | input$Barcode <- gsub("\\-", "\\.", input$Barcode) 18 | return(input) 19 | } 20 | -------------------------------------------------------------------------------- /R/ImportHistologicalAnnotations.R: -------------------------------------------------------------------------------- 1 | #' Importing histological annotations of Visium barcodes and appending a section name to the barcodes. 2 | #' 3 | #' the LoupeBrower. 4 | #' ImportHistologicalAnnotations() 5 | #' 6 | #' @param SectionName A character string for section name. 7 | #' @param InputAnnotationFile A file path to a .csv file, with annotations (for example, output from LoupeBrowser after manual annotations) 8 | #' @return A dataframe of barcodes with appended section names 9 | #' @examples 10 | #' ImportHistologicalAnnotations("H1_2", "./H1_2_Final_Consensus_Annotations.csv") 11 | 12 | ImportHistologicalAnnotations <- function(SectionName, InputAnnotationFile) { 13 | input <- read.csv(paste0(InputAnnotationFile)) 14 | names(input)[2] <- "Histology" 15 | input <- input %>% 16 | mutate(Barcode = str_replace_all(Barcode, "-", ".")) 17 | input$Barcode <- paste0(SectionName, "_", input$Barcode) 18 | return(input) 19 | } 20 | -------------------------------------------------------------------------------- /R/ImportHistologicalOriginalSTSelections.R: -------------------------------------------------------------------------------- 1 | #' Importing spatial transcriptomics, 1k array selected spot file data and append section names to the barcodes. 2 | #' 3 | #' ImportHistologicalOriginalSTSelections() 4 | #' 5 | #' @param SectionName A character string for section name. 6 | #' @param InputAnnotationFile A file path to a .tsv file 7 | #' @return A dataframe of barcodes with appended section names 8 | #' @examples 9 | #' ImportHistologicalOriginalSTSelections("H2_1", "./Patient 1/1k_arrays/H2_1/spot_data-selection-180903_L11_CN63_D1_P_H2.1_CY3_EB_aligned.tsv") 10 | 11 | ImportHistologicalOriginalSTSelections <- function(SectionName, InputAnnotationFile) { 12 | input <- read.delim(paste0(InputAnnotationFile), sep = "\t") 13 | input <- input %>% select(x, y) 14 | input$Barcode <- paste0(SectionName, "_",input$x, "x", input$y) 15 | input <- input %>% select(Barcode) 16 | return(input) 17 | } 18 | -------------------------------------------------------------------------------- /R/ImportOriginalSTCountData.R: -------------------------------------------------------------------------------- 1 | #' Importing spatial transcriptomics, 1k array count data and append section names to the barcodes. 2 | #' 3 | #' ImportOriginalSTCountData() 4 | #' 5 | #' @param SectionName A character string for section name. 6 | #' @param InputCountFile A file path to a .tsv file 7 | #' @return A dataframe of count data, having barcodes with appended section names 8 | #' @examples 9 | #' ImportOriginalSTCountData("H2_1", "./Patient 1/1k_arrays/H2_1/180903_L11_CN63_D1_H2.1_EB_stdata.tsv") 10 | 11 | ImportOriginalSTCountData <- function(SectionName, InputCountFile) { 12 | input <- as.data.frame(read.delim(InputCountFile, row.names = 1)) 13 | input <- rownames_to_column(input) 14 | input$rowname <- paste0(SectionName, "_", input$rowname) 15 | names(input)[1] <- "Barcode" 16 | return(input) 17 | } 18 | -------------------------------------------------------------------------------- /R/MergingCountAndAnnotationData.R: -------------------------------------------------------------------------------- 1 | #' Merging Visium spatial transciptomics count and annotation data, as well as applying a QC filter to only include spots with >= 500 counts 2 | #' 3 | #' MergingCountAndAnnotationData() 4 | #' 5 | #' @param SectionName A character string for section name. 6 | #' @param InputAnnotationFile An annotation file containing all barcodes to be used in the analysis (bound dataframe of one or more outputs from ImportHistologicalAnnotations()) 7 | #' @param InputCountFile A dataframe of Visium count data (output from ImportCountData()) 8 | #' @return A dataframe of barcodes with appended section names that have passed QC 9 | #' @examples 10 | #' MergingCountAndAnnotationData("H2_1",MergedAll, H2_1_ENSBMLID_Counts) 11 | 12 | MergingCountAndAnnotationData <- function(SectionName, InputAnnotationFile, InputCountFile) { 13 | formerge <- select(InputAnnotationFile, -Histology) 14 | MergedAnnotationsandCounts <- inner_join(formerge, InputCountFile) 15 | MergedAnnotationsandCounts <- remove_rownames(MergedAnnotationsandCounts) 16 | MergedAnnotationsandCounts <- column_to_rownames(MergedAnnotationsandCounts, "Barcode") 17 | MergedAnnotationsandCounts$Total <- rowSums(MergedAnnotationsandCounts) 18 | MergedAnnotationsandCounts <- MergedAnnotationsandCounts %>% filter(Total >= 500) 19 | MergedAnnotationsandCounts <- select(MergedAnnotationsandCounts, -Total) 20 | MergedAnnotationsandCounts <- as.data.frame(t(MergedAnnotationsandCounts)) 21 | MergedAnnotationsandCounts <- MergedAnnotationsandCounts[,colSums(is.na(MergedAnnotationsandCounts))=500 total unique molecular identifiers. 3 | #' 4 | #' OriginalST_MergingCountAndAnnotationData() 5 | #' 6 | #' @param InputAnnotationFile An annotation file created by ImportHistologicalOriginalSTSelections() 7 | #' @param InputCountFile A ST count file created by ImportOriginalSTCountData() 8 | #' @return A dataframe of ST counts, that have passed QC and are selected. 9 | #' @examples 10 | #' OriginalST_MergingCountAndAnnotationData(Barcodes_H2_1, Counts_H2.1) 11 | 12 | OriginalST_MergingCountAndAnnotationData <- function(InputAnnotationFile, InputCountFile) { 13 | formerge <- select(InputAnnotationFile, -Histology) 14 | MergedAnnotationsandCounts <- inner_join(formerge, InputCountFile) 15 | MergedAnnotationsandCounts <- remove_rownames(MergedAnnotationsandCounts) 16 | MergedAnnotationsandCounts <- column_to_rownames(MergedAnnotationsandCounts, "Barcode") 17 | MergedAnnotationsandCounts$Total <- rowSums(MergedAnnotationsandCounts) 18 | MergedAnnotationsandCounts <- MergedAnnotationsandCounts %>% filter(Total >= 500) 19 | MergedAnnotationsandCounts <- select(MergedAnnotationsandCounts, -Total) 20 | MergedAnnotationsandCounts <- as.data.frame(t(MergedAnnotationsandCounts)) 21 | if(length(MergedAnnotationsandCounts) == 1){ 22 | MergedAnnotationsandCounts <- tibble::rownames_to_column(MergedAnnotationsandCounts, "Genes") 23 | return(MergedAnnotationsandCounts) 24 | } else { 25 | MergedAnnotationsandCounts <- MergedAnnotationsandCounts[,colSums(is.na(MergedAnnotationsandCounts))% extract(Barcode, c("Barcode", "XY"), "(.*)_([^_]+)") %>% select(-Barcode) 15 | names(input)[1] <- "Barcode" 16 | PGA_Visualization_Matrix <- left_join(BarcodesFile, input) 17 | PGA_Visualization_Matrix$x <- as.numeric(sub('.*x', '', PGA_Visualization_Matrix$Barcode)) 18 | PGA_Visualization_Matrix$y <- as.numeric(sub('x.*', '', PGA_Visualization_Matrix$Barcode)) 19 | PGA_Visualization_Matrix <- PGA_Visualization_Matrix %>% 20 | select(x, y, PercentageGenomeAltered) %>% 21 | arrange(x, y) 22 | return(PGA_Visualization_Matrix) 23 | } -------------------------------------------------------------------------------- /R/Plot_PGA_Visualization_Matrix().R: -------------------------------------------------------------------------------- 1 | #' Plotting the spatial distribution of genes with an inferred copy number alteration from an underlying matrix 2 | #' 3 | #' Plot_PGA_Visualization_Matrix() 4 | #' 5 | #' @param SectionName A character string for section name. 6 | #' @param InputMatrix An input matrix created by the function Output_PGA_Visualization_MatrixGreyNA() 7 | #' @param MaxValInput An upper threshold for plotting, derived from the maximum sectionwise value of the number of inferred genes with a CNV (from ExtractSectionWise()) 8 | #' 9 | #' @return An output spatial visualization of the number of genes with an inferred CNV from 1k array spatial transcriptomics data. 10 | #' @examples 11 | #' Plot_PGA_Visualization_Matrix("H2_1", PGA_Matrix, MaxVal) 12 | 13 | Plot_PGA_Visualization_Matrix <- function(SectionName, InputMatrix, MaxValInput) { 14 | ggplot(InputMatrix, aes(x = x, y = y)) + 15 | geom_raster(aes(fill=PercentageGenomeAltered)) + 16 | scale_fill_gradient(limits = c(0, MaxValInput), low="blue", high="yellow", na.value = "grey50") + 17 | labs(x="X-coord", y="Y-coord") + 18 | theme_bw() + theme(axis.text.x=element_text(size=9, angle=0, vjust=0.3), 19 | axis.text.y=element_text(size=9), 20 | plot.title=element_text(size=11)) + 21 | theme(panel.border = element_blank(), 22 | panel.grid.major = element_blank(), 23 | panel.grid.minor = element_blank(), 24 | line = element_blank(), 25 | title = element_blank(), 26 | axis.text.x=element_blank(), 27 | axis.text.y=element_blank(), 28 | axis.ticks=element_blank(), 29 | legend.position = "none", 30 | plot.margin=grid::unit(c(0,0,0,0), "mm")) 31 | } 32 | -------------------------------------------------------------------------------- /R/SelectingSubTreeData.R: -------------------------------------------------------------------------------- 1 | #' Selecting Subtree Data for Node Selection: this selects a number of barcoded spots from a inferCNV dendrogram object for further analysis. 2 | #' 3 | #' SelectingSubTreeData() 4 | #' 5 | #' @param SubtreeObject A dendrogram, phylo object created by subtrees(as.phylo([dendogram.txt])) 6 | #' @param NodeOfInterest A numerical integer corresponding to a phylogram/dendogram node of interest 7 | #' @return A specific subtree node 8 | #' @examples 9 | #' SelectingSubTreeData(my.subtrees, 4617) 10 | 11 | SelectingSubTreeData <- function(SubtreeObject, NodeOfInterest) { 12 | tree_node <- SubtreeObject[[NodeOfInterest]] 13 | output <- tree_node$tip.label 14 | output <- as.data.frame(output) 15 | output <- output %>% 16 | mutate(Node = paste0("Node_", NodeOfInterest)) 17 | names(output)[1] <- "Barcode" 18 | return(output) 19 | } 20 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: Walkthrough of Clone Calling from Prostate Cancer Visium Spatial Transcriptomics 3 | Data with InferCNV 4 | author: "Andrew Erickson, Nuffield Department of Surgical Sciences, Unviersity of Oxford" 5 | output: 6 | md_document: 7 | variant: markdown_github 8 | --- 9 | 10 | # siCNV: Spatial InferCNV from Spatial Transcriptomics Data 11 | 12 | Spatially resolved transcriptomics has emerged as a genome-wide analysis of gene expression to explore tissues in an unsupervised manner. In this study we infer genome-wide copy-number variations (CNV) from spatially resolved mRNA profiles in situ. Gene expression has [previously been used to infer CNVs](https://github.com/broadinstitute/infercnv) in single cells, successfully identifying regions of chromosomal gain and loss. Here we expand into a spatial modality, generating CNV calls in each spatial region represented by barcoded spots. 13 | 14 | We provide a R package via this github page, as well as [scripts to reproduce the main figures](https://github.com/aerickso/SpatialInferCNV/tree/main/FigureScripts) in the manuscript. 15 | 16 | This code was tested using [R](https://www.r-project.org/) version 4.1.3, a Windows 11 Computer, 32GB RAM, and 12 CPUs (1.6 GHz). 17 | 18 | For timely data-analyses of datasets comprising 2 or more Visium sections, consider use of a high performance cluster. In our project, the infercnv::run analyses steps were ran on the [BMRC](https://www.medsci.ox.ac.uk/divisional-services/support-services-1/bmrc/cluster-usage), with 10-20 CPUs, each 1.6 GHz and 16GB ram. 19 | 20 | # System level dependency (for the hdf5r package) 21 | 22 | SpatialInferCNV has HDF5 as a system level dependency which needs to be installed before installing the `hd5fr` R package. See here for details: https://github.com/hhoeflin/hdf5r#Requirements. For Windows users, you can download the windows version here: https://github.com/mannau/h5-libwin. 23 | 24 | # Installation of SpatialInferCNV Dependencies - R 25 | 26 | ```{r, eval = FALSE} 27 | install.packages("devtools") 28 | if (!requireNamespace("BiocManager", quietly = TRUE)) 29 | install.packages("BiocManager") 30 | BiocManager::install("infercnv") 31 | install.packages("tidyverse") 32 | install.packages("Seurat") 33 | install.packages("phylogram") 34 | install.packages("ape") 35 | install.packages("hdf5r") 36 | ``` 37 | 38 | # Installation 39 | 40 | ```{r, eval = FALSE} 41 | install.packages("devtools") 42 | library(devtools) 43 | install_github("aerickso/SpatialInferCNV") 44 | library(SpatialInferCNV) 45 | ``` 46 | 47 | # SpatialInferCNV installation via in a conda environment - MacOS or Linux 48 | 49 | We also provide environment files to set up a conda environment in a MacOSX or Linux environment with all the dependencies necessary for SpatialInferCNV. Installation was tested using MacOS Mojave, Version 10.14.6 and an HPC running Scientific Linux 7.9. If you have anaconda installed, you can create a new environment and activate it by running the code below. 50 | 51 | [environment.yml file](https://github.com/aerickso/SpatialInferCNV/blob/main/environment.yml) 52 | 53 | ``` 54 | conda env create -f environment.yml 55 | conda activate SpatialInferCNV 56 | ``` 57 | 58 | If you want to run RStudio within this environment you can install it from the terminal with conda: 59 | 60 | ``` 61 | # make sure that the environment is active 62 | conda install -c r rstudio 63 | rstudio 64 | ``` 65 | 66 | Once you have created the environment you can install SpatialInferCNV. 67 | 68 | ```{r, eval = FALSE} 69 | # From R 70 | install.packages("devtools") 71 | library(devtools) 72 | install_github("aerickso/SpatialInferCNV") 73 | ``` 74 | 75 | # SpatialInferCNV installation via in a conda environment - Windows 76 | 77 | This was tested in Anaconda3, conda version 4.12.0, on Windows 11, R version 4.1.3. 78 | 79 | In anaconda3 terminal, create a new conda environment with R 4.1.3. 80 | 81 | ``` 82 | conda config --add channels conda-forge 83 | conda create -n siCNV r-base=4.1.3 84 | # Select yes to install all new packages 85 | conda activate siCNV 86 | 87 | R 88 | ``` 89 | Install devtools, and configure the file download method for windows to allow install_githb() to resolve 90 | 91 | ```{r, eval = FALSE} 92 | install.packages("devtools") 93 | library(devtools) 94 | options(download.file.method = "wininet") 95 | ``` 96 | 97 | Installing R dependencies. Note: hdf5r has a system level dependency of hdf5, see above for more details. 98 | 99 | ```{r, eval = FALSE} 100 | if (!requireNamespace("BiocManager", quietly = TRUE)) 101 | install.packages("BiocManager") 102 | BiocManager::install("infercnv") 103 | install.packages("tidyverse") 104 | install.packages("Seurat") 105 | install.packages("phylogram") 106 | install.packages("ape") 107 | install.packages("hdf5r") 108 | # enter a to update/install all 109 | 110 | library(infercnv) 111 | library(tidyverse) 112 | library(Seurat) 113 | library(phylogram) 114 | library(ape) 115 | library(hdf5r) 116 | ``` 117 | 118 | After installing the R dependencies, install and initialize SpatialInferCNV. 119 | 120 | ```{r, eval = FALSE} 121 | install_github("aerickso/SpatialInferCNV") 122 | # enter 1 to update all 123 | # yes 124 | library(SpatialInferCNV) 125 | ``` 126 | 127 | # Userguide 128 | 129 | The package provides a number of functions, please read the user guide [here](https://aerickso.github.io/SpatialInferCNV/). 130 | 131 | # Study Data 132 | 133 | We provide data used in this study at the following [Mendeley Repository](https://data.mendeley.com/v1/datasets/svw96g68dv/draft?a=3f263217-2bd3-4a3c-8125-8c517c3a9e29). 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # siCNV: Spatial InferCNV from Spatial Transcriptomics Data 4 | 5 | Spatially resolved transcriptomics has emerged as a genome-wide analysis 6 | of gene expression to explore tissues in an unsupervised manner. In this 7 | study we infer genome-wide copy-number variations (CNV) from spatially 8 | resolved mRNA profiles in situ. Gene expression has [previously been 9 | used to infer CNVs](https://github.com/broadinstitute/infercnv) in 10 | single cells, successfully identifying regions of chromosomal gain and 11 | loss. Here we expand into a spatial modality, generating CNV calls in 12 | each spatial region represented by barcoded spots. 13 | 14 | We provide a R package via this github page, as well as [scripts to 15 | reproduce the main 16 | figures](https://github.com/aerickso/SpatialInferCNV/tree/main/FigureScripts) 17 | in the manuscript. 18 | 19 | This code was tested using [R](https://www.r-project.org/) version 20 | 4.1.3, a Windows 11 Computer, 32GB RAM, and 12 CPUs (1.6 GHz). 21 | 22 | For timely data-analyses of datasets comprising 2 or more Visium 23 | sections, consider use of a high performance cluster. In our project, 24 | the infercnv::run analyses steps were ran on the 25 | [BMRC](https://www.medsci.ox.ac.uk/divisional-services/support-services-1/bmrc/cluster-usage), 26 | with 10-20 CPUs, each 1.6 GHz and 16GB ram. 27 | 28 | # System level dependency (for the hdf5r package) 29 | 30 | SpatialInferCNV has HDF5 as a system level dependency which needs to be 31 | installed before installing the `hd5fr` R package. See here for details: 32 | . For Windows users, you 33 | can download the windows version here: 34 | . 35 | 36 | # Installation of SpatialInferCNV Dependencies - R 37 | 38 | ``` r 39 | install.packages("devtools") 40 | if (!requireNamespace("BiocManager", quietly = TRUE)) 41 | install.packages("BiocManager") 42 | BiocManager::install("infercnv") 43 | install.packages("tidyverse") 44 | install.packages("Seurat") 45 | install.packages("phylogram") 46 | install.packages("ape") 47 | install.packages("hdf5r") 48 | ``` 49 | 50 | # Installation 51 | 52 | ``` r 53 | install.packages("devtools") 54 | library(devtools) 55 | install_github("aerickso/SpatialInferCNV") 56 | library(SpatialInferCNV) 57 | ``` 58 | 59 | # SpatialInferCNV installation via in a conda environment - MacOS or Linux 60 | 61 | We also provide environment files to set up a conda environment in a 62 | MacOSX or Linux environment with all the dependencies necessary for 63 | SpatialInferCNV. Installation was tested using MacOS Mojave, Version 64 | 10.14.6 and an HPC running Scientific Linux 7.9. If you have anaconda 65 | installed, you can create a new environment and activate it by running 66 | the code below. 67 | 68 | [environment.yml 69 | file](https://github.com/aerickso/SpatialInferCNV/blob/main/environment.yml) 70 | 71 | conda env create -f environment.yml 72 | conda activate SpatialInferCNV 73 | 74 | If you want to run RStudio within this environment you can install it 75 | from the terminal with conda: 76 | 77 | # make sure that the environment is active 78 | conda install -c r rstudio 79 | rstudio 80 | 81 | Once you have created the environment you can install SpatialInferCNV. 82 | 83 | ``` r 84 | # From R 85 | install.packages("devtools") 86 | library(devtools) 87 | install_github("aerickso/SpatialInferCNV") 88 | ``` 89 | 90 | # SpatialInferCNV installation via in a conda environment - Windows 91 | 92 | This was tested in Anaconda3, conda version 4.12.0, on Windows 11, R 93 | version 4.1.3. 94 | 95 | In anaconda3 terminal, create a new conda environment with R 4.1.3. 96 | 97 | conda config --add channels conda-forge 98 | conda create -n siCNV r-base=4.1.3 99 | # Select yes to install all new packages 100 | conda activate siCNV 101 | 102 | R 103 | 104 | Install devtools, and configure the file download method for windows to 105 | allow install_githb() to resolve 106 | 107 | ``` r 108 | install.packages("devtools") 109 | library(devtools) 110 | options(download.file.method = "wininet") 111 | ``` 112 | 113 | Installing R dependencies. Note: hdf5r has a system level dependency of 114 | hdf5, see above for more details. 115 | 116 | ``` r 117 | if (!requireNamespace("BiocManager", quietly = TRUE)) 118 | install.packages("BiocManager") 119 | BiocManager::install("infercnv") 120 | install.packages("tidyverse") 121 | install.packages("Seurat") 122 | install.packages("phylogram") 123 | install.packages("ape") 124 | install.packages("hdf5r") 125 | # enter a to update/install all 126 | 127 | library(infercnv) 128 | library(tidyverse) 129 | library(Seurat) 130 | library(phylogram) 131 | library(ape) 132 | library(hdf5r) 133 | ``` 134 | 135 | After installing the R dependencies, install and initialize 136 | SpatialInferCNV. 137 | 138 | ``` r 139 | install_github("aerickso/SpatialInferCNV") 140 | # enter 1 to update all 141 | # yes 142 | library(SpatialInferCNV) 143 | ``` 144 | 145 | # Userguide 146 | 147 | The package provides a number of functions, please read the user guide 148 | [here](https://aerickso.github.io/SpatialInferCNV/). 149 | 150 | # Study Data 151 | 152 | We provide data used in this study at the following [Mendeley 153 | Repository](https://data.mendeley.com/v1/datasets/svw96g68dv/draft?a=3f263217-2bd3-4a3c-8125-8c517c3a9e29). 154 | -------------------------------------------------------------------------------- /UserGuide/Images/BC23209_C1_PGA_SpatialVisualization_2022-03-27.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/BC23209_C1_PGA_SpatialVisualization_2022-03-27.png -------------------------------------------------------------------------------- /UserGuide/Images/BreastCancer10x_forclustering_phylo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/BreastCancer10x_forclustering_phylo.png -------------------------------------------------------------------------------- /UserGuide/Images/BreastCancer10x_forclustering_phylo_manual.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/BreastCancer10x_forclustering_phylo_manual.png -------------------------------------------------------------------------------- /UserGuide/Images/LB_Annotated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/LB_Annotated.png -------------------------------------------------------------------------------- /UserGuide/Images/LB_CloneImport.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/LB_CloneImport.png -------------------------------------------------------------------------------- /UserGuide/Images/LB_DragSelection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/LB_DragSelection.png -------------------------------------------------------------------------------- /UserGuide/Images/LB_ExcludeUnlabeled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/LB_ExcludeUnlabeled.png -------------------------------------------------------------------------------- /UserGuide/Images/LB_ExportHistology.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/LB_ExportHistology.png -------------------------------------------------------------------------------- /UserGuide/Images/LB_ExportingCSV.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/LB_ExportingCSV.png -------------------------------------------------------------------------------- /UserGuide/Images/LB_Histology.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/LB_Histology.png -------------------------------------------------------------------------------- /UserGuide/Images/LB_ImportingCloneCSV.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/LB_ImportingCloneCSV.png -------------------------------------------------------------------------------- /UserGuide/Images/LB_PolygonalSelection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/LB_PolygonalSelection.png -------------------------------------------------------------------------------- /UserGuide/Images/LB_UserguideClones_Visualized.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/LB_UserguideClones_Visualized.png -------------------------------------------------------------------------------- /UserGuide/Images/LB_Userguide_12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/LB_Userguide_12.png -------------------------------------------------------------------------------- /UserGuide/Images/NewCategoryImage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/NewCategoryImage.png -------------------------------------------------------------------------------- /UserGuide/Images/infercnv.21_denoised.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/infercnv.21_denoised.png -------------------------------------------------------------------------------- /UserGuide/Images/infercnv.21_denoised_manualselection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/infercnv.21_denoised_manualselection.png -------------------------------------------------------------------------------- /UserGuide/UserGuideFiles/10xBreast_UserguideHistologyAnnotations.csv: -------------------------------------------------------------------------------- 1 | Barcode,Histology 2 | AATAACGTCGCGCCCA-1,Userguide_12 3 | CACCCGCGTTTGACAC-1,Userguide_12 4 | CGCAGTTCTATCTTTC-1,Userguide_12 5 | CGTTAAATACGACCAG-1,Userguide_12 6 | CGTTTCGGTTATATGC-1,Userguide_12 7 | GACAGGTAATCCGTGT-1,Userguide_12 8 | GTCTTACCACGCCAAG-1,Userguide_12 9 | TACGCTGCACGGTCGT-1,Userguide_12 10 | TAGATTCTCTAGCAAA-1,Userguide_12 11 | TAGGAGGCTCGAGAAC-1,Userguide_12 12 | TCGACTGACGATGGCT-1,Userguide_12 13 | TCTACCCGCATCATTT-1,Userguide_12 14 | -------------------------------------------------------------------------------- /UserGuide/UserGuideFiles/infercnv.21_denoised.observations_dendrogram.txt: -------------------------------------------------------------------------------- 1 | (Breast10X_TCGACTGACGATGGCT.1:6.76625323,((Breast10X_CGCAGTTCTATCTTTC.1:4.451235413,Breast10X_GACAGGTAATCCGTGT.1:4.451235413):0.5368723659,(((Breast10X_CGTTTCGGTTATATGC.1:3.879678282,Breast10X_TAGATTCTCTAGCAAA.1:3.879678282):0.4285436197,(Breast10X_TACGCTGCACGGTCGT.1:4.09129203,(Breast10X_CACCCGCGTTTGACAC.1:3.702021609,Breast10X_TAGGAGGCTCGAGAAC.1:3.702021609):0.3892704214):0.2169298715):0.5458187634,(Breast10X_TCTACCCGCATCATTT.1:4.480431411,(Breast10X_CGTTAAATACGACCAG.1:4.111940764,Breast10X_GTCTTACCACGCCAAG.1:4.111940764):0.3684906471):0.3736092537):0.1340671139):1.778145451); 2 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: SpatialInferCNV 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | dependencies: 7 | - bioconductor-infercnv=1.10.0 8 | - r-essentials=4.1 9 | - r-devtools=2.4.3 10 | - r-phylogram=2.1.0 11 | - r-base=4.1.2 12 | - r-hdf5r=1.3.5 13 | - r-seurat=4.1.0 -------------------------------------------------------------------------------- /man/ExtractSectionWise.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ExtractSectionWise.R 3 | \name{ExtractSectionWise} 4 | \alias{ExtractSectionWise} 5 | \title{Obtaining a thresholded dataframe as part of spatial visualization of spatial transcriptomics data.} 6 | \usage{ 7 | ExtractSectionWise( 8 | SectionName, 9 | CNV_Genes_Organscale_Input, 10 | AllBarcodes, 11 | Threshold 12 | ) 13 | } 14 | \arguments{ 15 | \item{SectionName}{A character string for section name.} 16 | 17 | \item{CNV_Genes_Organscale_Input}{A dataframe, mirroring the structure of infercnv::run output file 17_HMM_predHMMi6.hmm_mode-cells.pred_cnv_genes.dat} 18 | 19 | \item{AllBarcodes}{A dataframe of barcodes and annotations.} 20 | 21 | \item{Threshold}{A numerical value for sectionwise thresholding of the number of genes to pass: integer values from 0-100.} 22 | } 23 | \value{ 24 | A dataframe of ST counts, that have passed QC and are selected. 25 | } 26 | \description{ 27 | ExtractSectionWise() 28 | } 29 | \examples{ 30 | ExtractSectionWise("H2_1", CNV_Genes_Filtered, AllBarcodes, 0.45) 31 | } 32 | -------------------------------------------------------------------------------- /man/FinalAnnotations.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FinalAnnotations.R 3 | \name{FinalAnnotations} 4 | \alias{FinalAnnotations} 5 | \title{Creating A finalized annotation dataframe containing only barcodes in the count file.} 6 | \usage{ 7 | FinalAnnotations(InputOriginalAnnotationFile, InputCounts) 8 | } 9 | \arguments{ 10 | \item{InputOriginalAnnotationFile}{A dataframe of barcodes selected for analysis} 11 | 12 | \item{InputCounts}{A joined count dataframe, of barcodes selected for analysis AND has passed QC (counts per spot >= 500 counts)} 13 | } 14 | \value{ 15 | A finalized annotation dataframe containing only barcodes in the count file. 16 | } 17 | \description{ 18 | FinalAnnotations() 19 | } 20 | \examples{ 21 | SelectingSubTreeData(my.subtrees, 4617) 22 | FinalAnnotations(MergedAll, Counts_joined) 23 | } 24 | -------------------------------------------------------------------------------- /man/ImportCountData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ImportCountData.R 3 | \name{ImportCountData} 4 | \alias{ImportCountData} 5 | \title{Importing Visium spatial transcriptomics count data from filtered_feature_bc_matrix.h5 file (output from SpaceRanger pipeline) and appending section name to barcodes} 6 | \usage{ 7 | ImportCountData(SectionName, InputCountFile) 8 | } 9 | \arguments{ 10 | \item{SectionName}{A character string for section name.} 11 | 12 | \item{InputCountFile}{A file path to a filtered_feature_bc_matrix.h5 file (output from 10X Genomics SpaceRanger pipeline)} 13 | } 14 | \value{ 15 | A dataframe of counts with appended section names 16 | } 17 | \description{ 18 | ImportCountData() 19 | } 20 | \examples{ 21 | ImportCountData("H2_1", "./filtered_feature_bc_matrix.h5") 22 | } 23 | -------------------------------------------------------------------------------- /man/ImportHistologicalAnnotations.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ImportHistologicalAnnotations.R 3 | \name{ImportHistologicalAnnotations} 4 | \alias{ImportHistologicalAnnotations} 5 | \title{Importing histological annotations of Visium barcodes and appending a section name to the barcodes.} 6 | \usage{ 7 | ImportHistologicalAnnotations(SectionName, InputAnnotationFile) 8 | } 9 | \arguments{ 10 | \item{SectionName}{A character string for section name.} 11 | 12 | \item{InputAnnotationFile}{A file path to a .csv file, with annotations (for example, output from LoupeBrowser after manual annotations)} 13 | } 14 | \value{ 15 | A dataframe of barcodes with appended section names 16 | } 17 | \description{ 18 | the LoupeBrower. 19 | ImportHistologicalAnnotations() 20 | } 21 | \examples{ 22 | ImportHistologicalAnnotations("H1_2", "./H1_2_Final_Consensus_Annotations.csv") 23 | } 24 | -------------------------------------------------------------------------------- /man/ImportHistologicalOriginalSTSelections.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ImportHistologicalOriginalSTSelections.R 3 | \name{ImportHistologicalOriginalSTSelections} 4 | \alias{ImportHistologicalOriginalSTSelections} 5 | \title{Importing spatial transcriptomics, 1k array selected spot file data and append section names to the barcodes.} 6 | \usage{ 7 | ImportHistologicalOriginalSTSelections(SectionName, InputAnnotationFile) 8 | } 9 | \arguments{ 10 | \item{SectionName}{A character string for section name.} 11 | 12 | \item{InputAnnotationFile}{A file path to a .tsv file} 13 | } 14 | \value{ 15 | A dataframe of barcodes with appended section names 16 | } 17 | \description{ 18 | ImportHistologicalOriginalSTSelections() 19 | } 20 | \examples{ 21 | ImportHistologicalOriginalSTSelections("H2_1", "./Patient 1/1k_arrays/H2_1/spot_data-selection-180903_L11_CN63_D1_P_H2.1_CY3_EB_aligned.tsv") 22 | } 23 | -------------------------------------------------------------------------------- /man/ImportOriginalSTCountData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ImportOriginalSTCountData.R 3 | \name{ImportOriginalSTCountData} 4 | \alias{ImportOriginalSTCountData} 5 | \title{Importing spatial transcriptomics, 1k array count data and append section names to the barcodes.} 6 | \usage{ 7 | ImportOriginalSTCountData(SectionName, InputCountFile) 8 | } 9 | \arguments{ 10 | \item{SectionName}{A character string for section name.} 11 | 12 | \item{InputCountFile}{A file path to a .tsv file} 13 | } 14 | \value{ 15 | A dataframe of count data, having barcodes with appended section names 16 | } 17 | \description{ 18 | ImportOriginalSTCountData() 19 | } 20 | \examples{ 21 | ImportOriginalSTCountData("H2_1", "./Patient 1/1k_arrays/H2_1/180903_L11_CN63_D1_H2.1_EB_stdata.tsv") 22 | } 23 | -------------------------------------------------------------------------------- /man/MergingCountAndAnnotationData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/MergingCountAndAnnotationData.R 3 | \name{MergingCountAndAnnotationData} 4 | \alias{MergingCountAndAnnotationData} 5 | \title{Merging Visium spatial transciptomics count and annotation data, as well as applying a QC filter to only include spots with >= 500 counts} 6 | \usage{ 7 | MergingCountAndAnnotationData(SectionName, InputAnnotationFile, InputCountFile) 8 | } 9 | \arguments{ 10 | \item{SectionName}{A character string for section name.} 11 | 12 | \item{InputAnnotationFile}{An annotation file containing all barcodes to be used in the analysis (bound dataframe of one or more outputs from ImportHistologicalAnnotations())} 13 | 14 | \item{InputCountFile}{A dataframe of Visium count data (output from ImportCountData())} 15 | } 16 | \value{ 17 | A dataframe of barcodes with appended section names that have passed QC 18 | } 19 | \description{ 20 | MergingCountAndAnnotationData() 21 | } 22 | \examples{ 23 | MergingCountAndAnnotationData("H2_1",MergedAll, H2_1_ENSBMLID_Counts) 24 | } 25 | -------------------------------------------------------------------------------- /man/OriginalST_MergingCountAndAnnotationData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/OriginalST_MergingCountAndAnnotationData.R 3 | \name{OriginalST_MergingCountAndAnnotationData} 4 | \alias{OriginalST_MergingCountAndAnnotationData} 5 | \title{Merging spatial transcriptomics, 1k array count files and barcodes, an apply a QC metric to only select 6 | ST spots with >=500 total unique molecular identifiers.} 7 | \usage{ 8 | OriginalST_MergingCountAndAnnotationData(InputAnnotationFile, InputCountFile) 9 | } 10 | \arguments{ 11 | \item{InputAnnotationFile}{An annotation file created by ImportHistologicalOriginalSTSelections()} 12 | 13 | \item{InputCountFile}{A ST count file created by ImportOriginalSTCountData()} 14 | } 15 | \value{ 16 | A dataframe of ST counts, that have passed QC and are selected. 17 | } 18 | \description{ 19 | OriginalST_MergingCountAndAnnotationData() 20 | } 21 | \examples{ 22 | OriginalST_MergingCountAndAnnotationData(Barcodes_H2_1, Counts_H2.1) 23 | } 24 | -------------------------------------------------------------------------------- /man/Output_PGA_Visualization_MatrixGreyNA.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Output_PGA_Visualization_MatrixGreyNA.R 3 | \name{Output_PGA_Visualization_MatrixGreyNA} 4 | \alias{Output_PGA_Visualization_MatrixGreyNA} 5 | \title{Preparing a matrix for spatial visualization of number of genes with an inferred CNV, derived from spatial transriptomics data.} 6 | \usage{ 7 | Output_PGA_Visualization_MatrixGreyNA(SectionName, InputCNVs, BarcodesFile) 8 | } 9 | \arguments{ 10 | \item{SectionName}{A character string for section name.} 11 | 12 | \item{InputCNVs}{An input dataframe created by the function ExtractSectionWise()} 13 | 14 | \item{BarcodesFile}{A single column dataframe comprised of a list of barcode coordinates in the form AxB, where A = the X coordinate, and B = the Y coordinate.} 15 | } 16 | \value{ 17 | A dataframe for spatial visualization by Plot_PGA_Visualization_Matrix() 18 | } 19 | \description{ 20 | Output_PGA_Visualization_MatrixGreyNA() 21 | } 22 | \examples{ 23 | Output_PGA_Visualization_MatrixGreyNA("H2_1", H2_1_Sectionwise_CNVsGenes_Counted, L2_Barcodes) 24 | } 25 | -------------------------------------------------------------------------------- /man/Plot_PGA_Visualization_Matrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Plot_PGA_Visualization_Matrix().R 3 | \name{Plot_PGA_Visualization_Matrix} 4 | \alias{Plot_PGA_Visualization_Matrix} 5 | \title{Plotting the spatial distribution of genes with an inferred copy number alteration from an underlying matrix} 6 | \usage{ 7 | Plot_PGA_Visualization_Matrix(SectionName, InputMatrix, MaxValInput) 8 | } 9 | \arguments{ 10 | \item{SectionName}{A character string for section name.} 11 | 12 | \item{InputMatrix}{An input matrix created by the function Output_PGA_Visualization_MatrixGreyNA()} 13 | 14 | \item{MaxValInput}{An upper threshold for plotting, derived from the maximum sectionwise value of the number of inferred genes with a CNV (from ExtractSectionWise())} 15 | } 16 | \value{ 17 | An output spatial visualization of the number of genes with an inferred CNV from 1k array spatial transcriptomics data. 18 | } 19 | \description{ 20 | Plot_PGA_Visualization_Matrix() 21 | } 22 | \examples{ 23 | Plot_PGA_Visualization_Matrix("H2_1", PGA_Matrix, MaxVal) 24 | } 25 | -------------------------------------------------------------------------------- /man/SelectingSubTreeData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SelectingSubTreeData.R 3 | \name{SelectingSubTreeData} 4 | \alias{SelectingSubTreeData} 5 | \title{Selecting Subtree Data for Node Selection: this selects a number of barcoded spots from a inferCNV dendrogram object for further analysis.} 6 | \usage{ 7 | SelectingSubTreeData(SubtreeObject, NodeOfInterest) 8 | } 9 | \arguments{ 10 | \item{SubtreeObject}{A dendrogram, phylo object created by subtrees(as.phylo([dendogram.txt]))} 11 | 12 | \item{NodeOfInterest}{A numerical integer corresponding to a phylogram/dendogram node of interest} 13 | } 14 | \value{ 15 | A specific subtree node 16 | } 17 | \description{ 18 | SelectingSubTreeData() 19 | } 20 | \examples{ 21 | SelectingSubTreeData(my.subtrees, 4617) 22 | } 23 | --------------------------------------------------------------------------------