├── .Rbuildignore
├── .gitignore
├── DESCRIPTION
├── FigureScripts
    ├── BenignRefs_ForFigs2and3
    │   ├── BenignRefs.Rmd
    │   ├── BenignRefs.md
    │   ├── Consensus_AllBenigns_phylo_Nodes.png
    │   ├── NodeSelection_BenignRefs_Dendrogram.png
    │   ├── PurestBenigns.png
    │   └── infercnv.21_denoised.png
    ├── Figure 1
    │   ├── Step1_PreprocessingToSpotLevelHMMs
    │   │   ├── Fig1D_Step1_PreprocessingToSpotLevelHMMs.Rmd
    │   │   └── Fig1D_Step1_PreprocessingToSpotLevelHMMs.md
    │   └── Step2_FigureImages
    │   │   ├── Figure1_Part2_FigureImages.Rmd
    │   │   ├── Figure1_Part2_FigureImages.md
    │   │   ├── H2_5_Revised_PGA_SpatialVisualization_2022-02-28.png
    │   │   └── siCNV_SectionBarPlot_Figure1G.png
    ├── Figure 2
    │   ├── Consensus_PurestBenigns.csv
    │   ├── Step1
    │   │   ├── Figure2_AllCancer_siCNV_step1_unsupervised.Rmd
    │   │   ├── Figure2_AllCancer_siCNV_step1_unsupervised.md
    │   │   └── infercnv.21_denoised.png
    │   ├── Step2
    │   │   ├── Consensus_AllCancer_forclustering_phylo.png
    │   │   ├── Fig2_Step2_ManualClustering.Rmd
    │   │   └── Fig2_Step2_ManualClustering.md
    │   ├── Step3
    │   │   ├── Fig2_Step3_ClusteredPlot_and_HMM.Rmd
    │   │   ├── Fig2_Step3_ClusteredPlot_and_HMM.md
    │   │   └── infercnv.21_denoised.png
    │   └── siCNV_GeneOrderFile.tsv
    ├── Figure 3
    │   ├── Consensus_H2_1_forclustering_phylo.png
    │   ├── Figure3.Rmd
    │   ├── Figure3.md
    │   ├── LoupeBrowser_Vis.gif
    │   ├── NodeSelectionDendrogram.png
    │   ├── NodeSelectionFromDenoised.png
    │   ├── infercnv.21_denoised_supervised.png
    │   └── infercnv.21_denoised_unsupervised.png
    ├── Figure 4
    │   ├── Figure4a_LN
    │   │   ├── Figure4a_LNHeatmap.Rmd
    │   │   ├── Figure4a_LNHeatmap.md
    │   │   ├── infercnv.21_denoised.png
    │   │   └── siCNV_GeneOrderFile.tsv
    │   ├── Figure4c_SCC
    │   │   ├── GeneToENSMBL.csv
    │   │   ├── Step1
    │   │   │   ├── Figure4c_Step1_P6_scRNAseq_Benigns.Rmd
    │   │   │   ├── Figure4c_Step1_P6_scRNAseq_Benigns.md
    │   │   │   ├── SCC_P6_benigns_for_clustering_phylo.png
    │   │   │   └── infercnv.21_denoised.png
    │   │   ├── Step2
    │   │   │   ├── Figure4c_Step2_SCC_P6_siCNV_unsupervised.Rmd
    │   │   │   ├── Figure4c_Step2_SCC_P6_siCNV_unsupervised.md
    │   │   │   └── infercnv.21_denoised.png
    │   │   └── Step3
    │   │   │   ├── Figure4c_Step3_SCC_P6_siCNV_supervised.Rmd
    │   │   │   ├── Figure4c_Step3_SCC_P6_siCNV_supervised.md
    │   │   │   ├── SCC_for_clustering_phylo.png
    │   │   │   └── infercnv.21_denoised.png
    │   └── Figure4e
    │   │   ├── Figure4e_pediatricmedulloblastoma.Rmd
    │   │   ├── Figure4e_pediatricmedulloblastoma.md
    │   │   └── infercnv.png
    ├── SCRIPTS.Rmd
    ├── SCRIPTS.md
    ├── Seurat
    │   ├── Seurat_Spatial_Import.Rmd
    │   ├── Seurat_Spatial_Import.md
    │   ├── filtered_feature_bc_matrix.h5
    │   └── spatial
    │   │   ├── H2_1_tissue_hires_image.png
    │   │   ├── scalefactors_json.json
    │   │   └── tissue_positions_list.csv
    ├── SpotLevelCloneCalls
    │   ├── Figure2
    │   │   ├── H1_2_Clones.csv
    │   │   ├── H1_4_Clones.csv
    │   │   ├── H1_5_Clones.csv
    │   │   ├── H2_1_Clones.csv
    │   │   ├── H2_2_Clones.csv
    │   │   └── H2_5_Clones.csv
    │   └── Figure3
    │   │   └── Figure3_Clones.csv
    └── siCNV_GeneOrderFile.tsv
├── Images
    ├── KTH_Logotyp_PMS_2013.eps
    ├── primary-logo.png
    └── secondary-logo.png
├── NAMESPACE
├── R
    ├── ExtractSectionWise.R
    ├── FinalAnnotations.R
    ├── ImportCountData.R
    ├── ImportHistologicalAnnotations.R
    ├── ImportHistologicalOriginalSTSelections.R
    ├── ImportOriginalSTCountData.R
    ├── MergingCountAndAnnotationData.R
    ├── OriginalST_MergingCountAndAnnotationData.R
    ├── Output_PGA_Visualization_MatrixGreyNA.R
    ├── Plot_PGA_Visualization_Matrix().R
    └── SelectingSubTreeData.R
├── README.Rmd
├── README.md
├── UserGuide
    ├── Images
    │   ├── BC23209_C1_PGA_SpatialVisualization_2022-03-27.png
    │   ├── BreastCancer10x_forclustering_phylo.png
    │   ├── BreastCancer10x_forclustering_phylo_manual.png
    │   ├── LB_Annotated.png
    │   ├── LB_CloneImport.png
    │   ├── LB_DragSelection.png
    │   ├── LB_ExcludeUnlabeled.png
    │   ├── LB_ExportHistology.png
    │   ├── LB_ExportingCSV.png
    │   ├── LB_Histology.png
    │   ├── LB_ImportingCloneCSV.png
    │   ├── LB_PolygonalSelection.png
    │   ├── LB_UserguideClones_Visualized.png
    │   ├── LB_Userguide_12.png
    │   ├── NewCategoryImage.png
    │   ├── infercnv.21_denoised.png
    │   └── infercnv.21_denoised_manualselection.png
    ├── UserGuideDraft.Rmd
    ├── UserGuideDraft.md
    └── UserGuideFiles
    │   ├── 10xBreast_UserguideHistologyAnnotations.csv
    │   ├── 17_HMM_predHMMi6.hmm_mode-cells.pred_cnv_genes.dat
    │   ├── infercnv.21_denoised.observations_dendrogram.txt
    │   └── siCNV_GeneOrderFile.tsv
├── environment.yml
├── index.html
└── man
    ├── ExtractSectionWise.Rd
    ├── FinalAnnotations.Rd
    ├── ImportCountData.Rd
    ├── ImportHistologicalAnnotations.Rd
    ├── ImportHistologicalOriginalSTSelections.Rd
    ├── ImportOriginalSTCountData.Rd
    ├── MergingCountAndAnnotationData.Rd
    ├── OriginalST_MergingCountAndAnnotationData.Rd
    ├── Output_PGA_Visualization_MatrixGreyNA.Rd
    ├── Plot_PGA_Visualization_Matrix.Rd
    └── SelectingSubTreeData.Rd


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: SpatialInferCNV
 2 | Type: Package
 3 | Title: What the Package Does (Title Case)
 4 | Version: 0.1.0
 5 | Author: Who wrote it
 6 | Maintainer: Andrew Erickson <ericksonandrewm@gmail.com>
 7 | Description: More about what it does (maybe more than one line)
 8 |     Use four spaces when indenting paragraphs within the Description.
 9 | License: What license is it under?
10 | Encoding: UTF-8
11 | LazyData: true
12 | Imports: 
13 | 	tidyverse, 
14 | 	infercnv, 
15 | 	Seurat,
16 | 	hdf5r,
17 | 	phylogram,
18 | 	ape
19 | RoxygenNote: 7.1.2
20 | 


--------------------------------------------------------------------------------
/FigureScripts/BenignRefs_ForFigs2and3/BenignRefs.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Selecting Benign References
  3 | author: "Andrew Erickson, Nuffield Department of Surgical Sciences, Unviersity of Oxford"
  4 | output:
  5 |   md_document:
  6 |    variant: markdown_github
  7 | #output: html_document
  8 | ---
  9 | 
 10 | # Setup
 11 | 
 12 | Initiating libraries.
 13 | 
 14 | ```{r setup, eval = FALSE}
 15 | library(SpatialInferCNV)
 16 | library(devtools)
 17 | library(ape)
 18 | library(phylogram)
 19 | library(tidyverse)
 20 | ```
 21 | 
 22 | # Download data
 23 | 
 24 | Download all the data from [Mendeley](https://data.mendeley.com/v1/datasets/svw96g68dv/draft?a=3f263217-2bd3-4a3c-8125-8c517c3a9e29), specifically all folders from: count_matrices/Patient 1/Visium_with_annotation/.
 25 | 
 26 | ```{r, eval = FALSE}
 27 | dir.create("Patient1_BenignRefs")
 28 | setwd("Patient1_BenignRefs")
 29 | ```
 30 | 
 31 | # Selecting Benign Histogical Spot annotations
 32 | 
 33 | We then import the consensus pathology annotations and select benigns only, and created an annotation dataframe.
 34 | 
 35 | ```{r, eval = FALSE}
 36 | H1_2_Cleaned <- ImportHistologicalAnnotations("H1_2", "./Patient1_BenignRefs/Visium_with_annotation/H1_2/H1_2_Final_Consensus_Annotations.csv")
 37 | H1_2_Benigns <- filter(H1_2_Cleaned, Histology == "Benign")
 38 | rm(H1_2_Cleaned)
 39 | 
 40 | H1_4_Cleaned <- ImportHistologicalAnnotations("H1_4", "./Patient1_BenignRefs/Visium_with_annotation/H1_4/H1_4_Final_Consensus_Annotations.csv")
 41 | H1_4_Benigns <- filter(H1_4_Cleaned, Histology == "Benign")
 42 | rm(H1_4_Cleaned)
 43 | 
 44 | H1_5_Cleaned <- ImportHistologicalAnnotations("H1_5", "./Patient1_BenignRefs/Visium_with_annotation/H1_5/H1_5_Final_Consensus_Annotations.csv")
 45 | H1_5_Benigns <- filter(H1_5_Cleaned, Histology == "Benign")
 46 | rm(H1_5_Cleaned)
 47 | 
 48 | H2_1_Cleaned <- ImportHistologicalAnnotations("H2_1", "./Patient1_BenignRefs/Visium_with_annotation/H2_1/H2_1_Final_Consensus_Annotations.csv")
 49 | H2_1_Benigns <- filter(H2_1_Cleaned, Histology == "Benign")
 50 | rm(H2_1_Cleaned)
 51 | 
 52 | H2_2_Cleaned <- ImportHistologicalAnnotations("H2_2","./Patient1_BenignRefs/Visium_with_annotation/H2_2/H2_2_Final_Consensus_Annotations.csv")
 53 | H2_2_Benigns <- filter(H2_2_Cleaned, Histology == "Benign")
 54 | rm(H2_2_Cleaned)
 55 | 
 56 | H2_5_Cleaned <- ImportHistologicalAnnotations("H2_5", "./Patient1_BenignRefs/Visium_with_annotation/H2_5/H2_5_Final_Consensus_Annotations.csv")
 57 | H2_5_Benigns <- filter(H2_5_Cleaned, Histology == "Benign")
 58 | rm(H2_5_Cleaned)
 59 | 
 60 | V1_2_Cleaned <- ImportHistologicalAnnotations("V1_2", "./Patient1_BenignRefs/Visium_with_annotation/V1_2/V1_2_Final_Consensus_Annotations.csv")
 61 | V1_2_Benigns <- filter(V1_2_Cleaned, Histology == "Benign" | Histology == "Benign*")
 62 | rm(V1_2_Cleaned)
 63 | 
 64 | AllBenigns <- rbind(H1_2_Benigns, H1_4_Benigns)
 65 | AllBenigns <- rbind(AllBenigns, H2_1_Benigns)
 66 | AllBenigns <- rbind(AllBenigns, H2_2_Benigns)
 67 | AllBenigns <- rbind(AllBenigns, H2_5_Benigns)
 68 | AllBenigns <- rbind(AllBenigns, V1_2_Benigns)
 69 | 
 70 | rm(H1_2_Benigns,
 71 |    H1_4_Benigns,
 72 |    H1_5_Benigns,
 73 |    H2_1_Benigns,
 74 |    H2_2_Benigns,
 75 |    H2_5_Benigns,
 76 |    V1_2_Benigns)
 77 | 
 78 | MergedAll <- AllBenigns
 79 | names(MergedAll)[2] <- "Histology"
 80 | rm(AllBenigns)
 81 | ```
 82 | 
 83 | 
 84 | # Importing Count Data
 85 | 
 86 | This code chunk imports the .h5 files a default processed output from [10x Genomics cell ranger pipeline documentation](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/output/molecule_info), and appends a section label to the barcode. 
 87 | 
 88 | We use the function ImportCountData(), which requires a section label, and a path to the corresponding .h5 file. Again these are provided from the Mendeley repository (as described above).
 89 | 
 90 | ```{r, eval = FALSE}
 91 | H2_1_ENSBMLID_Counts <- ImportCountData("H2_1", "./Patient1_BenignRefs/Visium_with_annotation/H2_1/filtered_feature_bc_matrix.h5")
 92 | H2_2_ENSBMLID_Counts <- ImportCountData("H2_2", "./Patient1_BenignRefs/Visium_with_annotation/H2_2/filtered_feature_bc_matrix.h5")
 93 | H1_2_ENSBMLID_Counts <- ImportCountData("H1_2", "./Patient1_BenignRefs/Visium_with_annotation/H1_2/filtered_feature_bc_matrix.h5")
 94 | H2_5_ENSBMLID_Counts <- ImportCountData("H2_5", "./Patient1_BenignRefs/Visium_with_annotation/H2_5/filtered_feature_bc_matrix.h5")
 95 | H1_4_ENSBMLID_Counts <- ImportCountData("H1_4", "./Patient1_BenignRefs/Visium_with_annotation/H1_4/filtered_feature_bc_matrix.h5")
 96 | V1_2_ENSBMLID_Counts <- ImportCountData("V1_2", "./Patient1_BenignRefs/Visium_with_annotation/V1_2/filtered_feature_bc_matrix.h5")
 97 | ```
 98 | 
 99 | # QC, and Merging Count and Annotation Data
100 | 
101 | Next, we merge annotations with count data to get section wise count matrices of only benign spots. This also applies a QC threshold (only allowing spots with 500 UMIs or more to pass to the filtered dataframes).
102 | 
103 | ```{r, eval = FALSE}
104 | H2_1_Joined_Counts <- MergingCountAndAnnotationData("H2_1",MergedAll, H2_1_ENSBMLID_Counts)
105 | H2_2_Joined_Counts <- MergingCountAndAnnotationData("H2_2",MergedAll, H2_2_ENSBMLID_Counts)
106 | H1_2_Joined_Counts <- MergingCountAndAnnotationData("H1_2",MergedAll, H1_2_ENSBMLID_Counts)
107 | H2_5_Joined_Counts <- MergingCountAndAnnotationData("H2_5",MergedAll, H2_5_ENSBMLID_Counts)
108 | H1_4_Joined_Counts <- MergingCountAndAnnotationData("H1_4",MergedAll, H1_4_ENSBMLID_Counts)
109 | V1_2_Joined_Counts <- MergingCountAndAnnotationData("V1_2",MergedAll, V1_2_ENSBMLID_Counts)
110 | 
111 | rm(H2_1_ENSBMLID_Counts, H2_2_ENSBMLID_Counts, H1_2_ENSBMLID_Counts, H2_5_ENSBMLID_Counts, H1_4_ENSBMLID_Counts, V1_2_ENSBMLID_Counts)
112 | ```
113 | 
114 | # Merging all count data into one object
115 | 
116 | We then merge all the sectionwise dataframes together, replace joined NA's with 0's (inferCNV requires this), and output final count and annotation .tsv files that are required for infercnv:run.
117 | 
118 | ```{r, eval = FALSE}
119 | Counts_joined <- H2_1_Joined_Counts %>% full_join(H2_2_Joined_Counts, by = "Genes")
120 | Counts_joined <- Counts_joined %>% full_join(H1_2_Joined_Counts, by = "Genes")
121 | Counts_joined <- Counts_joined %>% full_join(H2_5_Joined_Counts, by = "Genes")
122 | Counts_joined <- Counts_joined %>% full_join(H1_4_Joined_Counts, by = "Genes")
123 | Counts_joined <- Counts_joined %>% full_join(V1_2_Joined_Counts, by = "Genes")
124 | 
125 | rm(H2_1_Joined_Counts ,H2_2_Joined_Counts, H1_2_Joined_Counts, H2_5_Joined_Counts, H1_4_Joined_Counts, V1_2_Joined_Counts)
126 | 
127 | Counts_joined <- Counts_joined %>% replace(., is.na(.), 0)
128 | Counts_joined <- Counts_joined %>% column_to_rownames(., var = "Genes")
129 | 
130 | write.table(Counts_joined, "Organscale_Consensus_Benign_Counts.tsv", sep = "\t")
131 | 
132 | MergedAll_Final <- FinalAnnotations(MergedAll, Counts_joined)
133 | 
134 | write.table(MergedAll_Final, "Organscale_Consensus_Benign_Annotations.tsv", 
135 |             sep = "\t",
136 |             quote = FALSE, 
137 |             col.names = FALSE, 
138 |             row.names = FALSE)
139 | ```
140 | 
141 | # Confirming that the files are formatted correctly to create an inferCNV object
142 | 
143 | The siCNV_GeneOrderFile.tsv has been provided here: https://github.com/aerickso/SpatialInferCNV/tree/main/FigureScripts.
144 | 
145 | ```{r, eval = FALSE}
146 | AllBenigns_Consensus_Test_infCNV <- infercnv::CreateInfercnvObject(raw_counts_matrix="./Patient1_BenignRefs/Organscale_Consensus_Benign_Counts.tsv", 
147 |                                                gene_order_file="./FigureScripts/siCNV_GeneOrderFile.tsv",
148 |                                                annotations_file="./Patient1_BenignRefs/Organscale_Consensus_Benign_Annotations_04112020.tsv",
149 |                                                delim="\t",
150 |                                                ref_group_names=NULL)
151 | ```
152 | 
153 | # Running InferCNV (Unsupervised)
154 | 
155 | ```{r, eval = FALSE}
156 | AllBenigns_Consensus_Test_infCNV = infercnv::run(AllBenigns_Consensus_Test_infCNV,
157 |                                               cutoff=0.1,
158 |                                               out_dir="./Patient1_BenignRefs/Outputs", 
159 |                                               num_threads = 20,
160 |                                               cluster_by_groups=FALSE, 
161 |                                               denoise=TRUE,
162 |                                               HMM=FALSE)
163 | ```
164 | 
165 | 
166 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/BenignRefs_ForFigs2and3/infercnv.21_denoised.png) 
167 | 
168 | InferCNV will output many files. We are primarily interested in the final "infercnv.21_denoised.png" file, as well as the text file associated with the dendrogram associated with the hierarchical clustering on the left hand side of the image (infercnv.21_denoised.observations_dendrogram.txt).
169 | 
170 | # Importing dendrogram
171 | 
172 | Next, we want to import this dendrogram file fromo the above step: 
173 | 
174 | ```{r, eval = FALSE}
175 | Consensus_AllBenigns <- read.dendrogram(file = "./Patient1_BenignRefs/Outputs/infercnv.21_denoised.observations_dendrogram.txt")
176 | 
177 | Consensus_AllBenigns_phylo <- as.phylo(Consensus_AllBenigns)
178 | ```
179 | 
180 | # Visualizing dendrogram node numbers
181 | 
182 | ```{r, eval = FALSE}
183 | my.subtrees = subtrees(Consensus_AllBenigns_phylo)  
184 | 
185 | png("Consensus_AllBenigns_phylo_Nodes.png",width=10000,height=2500, res = 300)
186 | plot(Consensus_AllBenigns_phylo,show.tip.label = FALSE)
187 | nodelabels(text=1:Consensus_AllBenigns_phylo$Nnode,node=1:Consensus_AllBenigns_phylo$Nnode+Ntip(Consensus_AllBenigns_phylo))
188 | dev.off()
189 | ```
190 | 
191 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/BenignRefs_ForFigs2and3/PurestBenigns.png) 
192 | 
193 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/BenignRefs_ForFigs2and3/Consensus_AllBenigns_phylo_Nodes.png)  
194 | 
195 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/BenignRefs_ForFigs2and3/NodeSelection_BenignRefs_Dendrogram.png)    
196 | 
197 | # Node selection (Manual Task outside of R in an image editor)
198 | 
199 | Next, view the output .png file, which provides a (albeit cluttered) labeling of the dendrogram tree nodes. Manually select individual nodes that correspond with a distinct signal, in this case, nodes of visium spots with little-to-no signal.
200 | 
201 | ```{r, eval = FALSE}
202 | #3039 + 2560 
203 | ```
204 | 
205 | # Selecting clones in R
206 | 
207 | Next, after identifying the numerical nodes that correspond to dendrogram branches that correspond with a given set of  signals (aka, clones), we then manually select these nodes in R, apply a label, then join them all together and output as a .csv file for use as a "Histologically Benign, inferCNV null" reference set to compare other features of interest against.
208 | 
209 | ```{r, eval = FALSE}
210 | Node3039 <- SelectingSubTreeData(my.subtrees, 3039) 
211 | Node2560 <- SelectingSubTreeData(my.subtrees, 2560)
212 | 
213 | Merged <- rbind(Node3039, Node2560)
214 | 
215 | table(Merged$Node)
216 | 
217 | Merged$Node <- "Purest Benigns"
218 | names(Merged)[2] <- "Histology"
219 | 
220 | write.csv(Merged, "Consensus_PurestBenigns.csv", row.names = FALSE)
221 | ```
222 | 
223 | The final file is provided at [Mendeley](https://data.mendeley.com/v1/datasets/svw96g68dv/draft): Count_matrices/Patient 1/Consensus Pathology.csv.


--------------------------------------------------------------------------------
/FigureScripts/BenignRefs_ForFigs2and3/BenignRefs.md:
--------------------------------------------------------------------------------
  1 | # Setup
  2 | 
  3 | Initiating libraries.
  4 | 
  5 | ``` r
  6 | library(SpatialInferCNV)
  7 | library(devtools)
  8 | library(ape)
  9 | library(phylogram)
 10 | library(tidyverse)
 11 | ```
 12 | 
 13 | # Download data
 14 | 
 15 | Download all the data from
 16 | [Mendeley](https://data.mendeley.com/v1/datasets/svw96g68dv/draft?a=3f263217-2bd3-4a3c-8125-8c517c3a9e29),
 17 | specifically all folders from: count_matrices/Patient
 18 | 1/Visium_with_annotation/.
 19 | 
 20 | ``` r
 21 | dir.create("Patient1_BenignRefs")
 22 | setwd("Patient1_BenignRefs")
 23 | ```
 24 | 
 25 | # Selecting Benign Histogical Spot annotations
 26 | 
 27 | We then import the consensus pathology annotations and select benigns
 28 | only, and created an annotation dataframe.
 29 | 
 30 | ``` r
 31 | H1_2_Cleaned <- ImportHistologicalAnnotations("H1_2", "./Patient1_BenignRefs/Visium_with_annotation/H1_2/H1_2_Final_Consensus_Annotations.csv")
 32 | H1_2_Benigns <- filter(H1_2_Cleaned, Histology == "Benign")
 33 | rm(H1_2_Cleaned)
 34 | 
 35 | H1_4_Cleaned <- ImportHistologicalAnnotations("H1_4", "./Patient1_BenignRefs/Visium_with_annotation/H1_4/H1_4_Final_Consensus_Annotations.csv")
 36 | H1_4_Benigns <- filter(H1_4_Cleaned, Histology == "Benign")
 37 | rm(H1_4_Cleaned)
 38 | 
 39 | H1_5_Cleaned <- ImportHistologicalAnnotations("H1_5", "./Patient1_BenignRefs/Visium_with_annotation/H1_5/H1_5_Final_Consensus_Annotations.csv")
 40 | H1_5_Benigns <- filter(H1_5_Cleaned, Histology == "Benign")
 41 | rm(H1_5_Cleaned)
 42 | 
 43 | H2_1_Cleaned <- ImportHistologicalAnnotations("H2_1", "./Patient1_BenignRefs/Visium_with_annotation/H2_1/H2_1_Final_Consensus_Annotations.csv")
 44 | H2_1_Benigns <- filter(H2_1_Cleaned, Histology == "Benign")
 45 | rm(H2_1_Cleaned)
 46 | 
 47 | H2_2_Cleaned <- ImportHistologicalAnnotations("H2_2","./Patient1_BenignRefs/Visium_with_annotation/H2_2/H2_2_Final_Consensus_Annotations.csv")
 48 | H2_2_Benigns <- filter(H2_2_Cleaned, Histology == "Benign")
 49 | rm(H2_2_Cleaned)
 50 | 
 51 | H2_5_Cleaned <- ImportHistologicalAnnotations("H2_5", "./Patient1_BenignRefs/Visium_with_annotation/H2_5/H2_5_Final_Consensus_Annotations.csv")
 52 | H2_5_Benigns <- filter(H2_5_Cleaned, Histology == "Benign")
 53 | rm(H2_5_Cleaned)
 54 | 
 55 | V1_2_Cleaned <- ImportHistologicalAnnotations("V1_2", "./Patient1_BenignRefs/Visium_with_annotation/V1_2/V1_2_Final_Consensus_Annotations.csv")
 56 | V1_2_Benigns <- filter(V1_2_Cleaned, Histology == "Benign" | Histology == "Benign*")
 57 | rm(V1_2_Cleaned)
 58 | 
 59 | AllBenigns <- rbind(H1_2_Benigns, H1_4_Benigns)
 60 | AllBenigns <- rbind(AllBenigns, H2_1_Benigns)
 61 | AllBenigns <- rbind(AllBenigns, H2_2_Benigns)
 62 | AllBenigns <- rbind(AllBenigns, H2_5_Benigns)
 63 | AllBenigns <- rbind(AllBenigns, V1_2_Benigns)
 64 | 
 65 | rm(H1_2_Benigns,
 66 |    H1_4_Benigns,
 67 |    H1_5_Benigns,
 68 |    H2_1_Benigns,
 69 |    H2_2_Benigns,
 70 |    H2_5_Benigns,
 71 |    V1_2_Benigns)
 72 | 
 73 | MergedAll <- AllBenigns
 74 | names(MergedAll)[2] <- "Histology"
 75 | rm(AllBenigns)
 76 | ```
 77 | 
 78 | # Importing Count Data
 79 | 
 80 | This code chunk imports the .h5 files a default processed output from
 81 | [10x Genomics cell ranger pipeline
 82 | documentation](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/output/molecule_info),
 83 | and appends a section label to the barcode.
 84 | 
 85 | We use the function ImportCountData(), which requires a section label,
 86 | and a path to the corresponding .h5 file. Again these are provided from
 87 | the Mendeley repository (as described above).
 88 | 
 89 | ``` r
 90 | H2_1_ENSBMLID_Counts <- ImportCountData("H2_1", "./Patient1_BenignRefs/Visium_with_annotation/H2_1/filtered_feature_bc_matrix.h5")
 91 | H2_2_ENSBMLID_Counts <- ImportCountData("H2_2", "./Patient1_BenignRefs/Visium_with_annotation/H2_2/filtered_feature_bc_matrix.h5")
 92 | H1_2_ENSBMLID_Counts <- ImportCountData("H1_2", "./Patient1_BenignRefs/Visium_with_annotation/H1_2/filtered_feature_bc_matrix.h5")
 93 | H2_5_ENSBMLID_Counts <- ImportCountData("H2_5", "./Patient1_BenignRefs/Visium_with_annotation/H2_5/filtered_feature_bc_matrix.h5")
 94 | H1_4_ENSBMLID_Counts <- ImportCountData("H1_4", "./Patient1_BenignRefs/Visium_with_annotation/H1_4/filtered_feature_bc_matrix.h5")
 95 | V1_2_ENSBMLID_Counts <- ImportCountData("V1_2", "./Patient1_BenignRefs/Visium_with_annotation/V1_2/filtered_feature_bc_matrix.h5")
 96 | ```
 97 | 
 98 | # QC, and Merging Count and Annotation Data
 99 | 
100 | Next, we merge annotations with count data to get section wise count
101 | matrices of only benign spots. This also applies a QC threshold (only
102 | allowing spots with 500 UMIs or more to pass to the filtered
103 | dataframes).
104 | 
105 | ``` r
106 | H2_1_Joined_Counts <- MergingCountAndAnnotationData("H2_1",MergedAll, H2_1_ENSBMLID_Counts)
107 | H2_2_Joined_Counts <- MergingCountAndAnnotationData("H2_2",MergedAll, H2_2_ENSBMLID_Counts)
108 | H1_2_Joined_Counts <- MergingCountAndAnnotationData("H1_2",MergedAll, H1_2_ENSBMLID_Counts)
109 | H2_5_Joined_Counts <- MergingCountAndAnnotationData("H2_5",MergedAll, H2_5_ENSBMLID_Counts)
110 | H1_4_Joined_Counts <- MergingCountAndAnnotationData("H1_4",MergedAll, H1_4_ENSBMLID_Counts)
111 | V1_2_Joined_Counts <- MergingCountAndAnnotationData("V1_2",MergedAll, V1_2_ENSBMLID_Counts)
112 | 
113 | rm(H2_1_ENSBMLID_Counts, H2_2_ENSBMLID_Counts, H1_2_ENSBMLID_Counts, H2_5_ENSBMLID_Counts, H1_4_ENSBMLID_Counts, V1_2_ENSBMLID_Counts)
114 | ```
115 | 
116 | # Merging all count data into one object
117 | 
118 | We then merge all the sectionwise dataframes together, replace joined
119 | NA’s with 0’s (inferCNV requires this), and output final count and
120 | annotation .tsv files that are required for infercnv:run.
121 | 
122 | ``` r
123 | Counts_joined <- H2_1_Joined_Counts %>% full_join(H2_2_Joined_Counts, by = "Genes")
124 | Counts_joined <- Counts_joined %>% full_join(H1_2_Joined_Counts, by = "Genes")
125 | Counts_joined <- Counts_joined %>% full_join(H2_5_Joined_Counts, by = "Genes")
126 | Counts_joined <- Counts_joined %>% full_join(H1_4_Joined_Counts, by = "Genes")
127 | Counts_joined <- Counts_joined %>% full_join(V1_2_Joined_Counts, by = "Genes")
128 | 
129 | rm(H2_1_Joined_Counts ,H2_2_Joined_Counts, H1_2_Joined_Counts, H2_5_Joined_Counts, H1_4_Joined_Counts, V1_2_Joined_Counts)
130 | 
131 | Counts_joined <- Counts_joined %>% replace(., is.na(.), 0)
132 | Counts_joined <- Counts_joined %>% column_to_rownames(., var = "Genes")
133 | 
134 | write.table(Counts_joined, "Organscale_Consensus_Benign_Counts.tsv", sep = "\t")
135 | 
136 | MergedAll_Final <- FinalAnnotations(MergedAll, Counts_joined)
137 | 
138 | write.table(MergedAll_Final, "Organscale_Consensus_Benign_Annotations.tsv", 
139 |             sep = "\t",
140 |             quote = FALSE, 
141 |             col.names = FALSE, 
142 |             row.names = FALSE)
143 | ```
144 | 
145 | # Confirming that the files are formatted correctly to create an inferCNV object
146 | 
147 | The siCNV_GeneOrderFile.tsv has been provided here:
148 | <https://github.com/aerickso/SpatialInferCNV/tree/main/FigureScripts>.
149 | 
150 | ``` r
151 | AllBenigns_Consensus_Test_infCNV <- infercnv::CreateInfercnvObject(raw_counts_matrix="./Patient1_BenignRefs/Organscale_Consensus_Benign_Counts.tsv", 
152 |                                                gene_order_file="./FigureScripts/siCNV_GeneOrderFile.tsv",
153 |                                                annotations_file="./Patient1_BenignRefs/Organscale_Consensus_Benign_Annotations_04112020.tsv",
154 |                                                delim="\t",
155 |                                                ref_group_names=NULL)
156 | ```
157 | 
158 | # Running InferCNV (Unsupervised)
159 | 
160 | ``` r
161 | AllBenigns_Consensus_Test_infCNV = infercnv::run(AllBenigns_Consensus_Test_infCNV,
162 |                                               cutoff=0.1,
163 |                                               out_dir="./Patient1_BenignRefs/Outputs", 
164 |                                               num_threads = 20,
165 |                                               cluster_by_groups=FALSE, 
166 |                                               denoise=TRUE,
167 |                                               HMM=FALSE)
168 | ```
169 | 
170 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/BenignRefs_ForFigs2and3/infercnv.21_denoised.png)
171 | 
172 | InferCNV will output many files. We are primarily interested in the
173 | final “infercnv.21_denoised.png” file, as well as the text file
174 | associated with the dendrogram associated with the hierarchical
175 | clustering on the left hand side of the image
176 | (infercnv.21_denoised.observations_dendrogram.txt).
177 | 
178 | # Importing dendrogram
179 | 
180 | Next, we want to import this dendrogram file fromo the above step:
181 | 
182 | ``` r
183 | Consensus_AllBenigns <- read.dendrogram(file = "./Patient1_BenignRefs/Outputs/infercnv.21_denoised.observations_dendrogram.txt")
184 | 
185 | Consensus_AllBenigns_phylo <- as.phylo(Consensus_AllBenigns)
186 | ```
187 | 
188 | # Visualizing dendrogram node numbers
189 | 
190 | ``` r
191 | my.subtrees = subtrees(Consensus_AllBenigns_phylo)  
192 | 
193 | png("Consensus_AllBenigns_phylo_Nodes.png",width=10000,height=2500, res = 300)
194 | plot(Consensus_AllBenigns_phylo,show.tip.label = FALSE)
195 | nodelabels(text=1:Consensus_AllBenigns_phylo$Nnode,node=1:Consensus_AllBenigns_phylo$Nnode+Ntip(Consensus_AllBenigns_phylo))
196 | dev.off()
197 | ```
198 | 
199 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/BenignRefs_ForFigs2and3/PurestBenigns.png)
200 | 
201 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/BenignRefs_ForFigs2and3/Consensus_AllBenigns_phylo_Nodes.png)
202 | 
203 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/BenignRefs_ForFigs2and3/NodeSelection_BenignRefs_Dendrogram.png)
204 | 
205 | # Node selection (Manual Task outside of R in an image editor)
206 | 
207 | Next, view the output .png file, which provides a (albeit cluttered)
208 | labeling of the dendrogram tree nodes. Manually select individual nodes
209 | that correspond with a distinct signal, in this case, nodes of visium
210 | spots with little-to-no signal.
211 | 
212 | ``` r
213 | #3039 + 2560 
214 | ```
215 | 
216 | # Selecting clones in R
217 | 
218 | Next, after identifying the numerical nodes that correspond to
219 | dendrogram branches that correspond with a given set of signals (aka,
220 | clones), we then manually select these nodes in R, apply a label, then
221 | join them all together and output as a .csv file for use as a
222 | “Histologically Benign, inferCNV null” reference set to compare other
223 | features of interest against.
224 | 
225 | ``` r
226 | Node3039 <- SelectingSubTreeData(my.subtrees, 3039) 
227 | Node2560 <- SelectingSubTreeData(my.subtrees, 2560)
228 | 
229 | Merged <- rbind(Node3039, Node2560)
230 | 
231 | table(Merged$Node)
232 | 
233 | Merged$Node <- "Purest Benigns"
234 | names(Merged)[2] <- "Histology"
235 | 
236 | write.csv(Merged, "Consensus_PurestBenigns.csv", row.names = FALSE)
237 | ```
238 | 
239 | The final file is provided at
240 | [Mendeley](https://data.mendeley.com/v1/datasets/svw96g68dv/draft):
241 | Count_matrices/Patient 1/Consensus Pathology.csv.
242 | 


--------------------------------------------------------------------------------
/FigureScripts/BenignRefs_ForFigs2and3/Consensus_AllBenigns_phylo_Nodes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/BenignRefs_ForFigs2and3/Consensus_AllBenigns_phylo_Nodes.png


--------------------------------------------------------------------------------
/FigureScripts/BenignRefs_ForFigs2and3/NodeSelection_BenignRefs_Dendrogram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/BenignRefs_ForFigs2and3/NodeSelection_BenignRefs_Dendrogram.png


--------------------------------------------------------------------------------
/FigureScripts/BenignRefs_ForFigs2and3/PurestBenigns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/BenignRefs_ForFigs2and3/PurestBenigns.png


--------------------------------------------------------------------------------
/FigureScripts/BenignRefs_ForFigs2and3/infercnv.21_denoised.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/BenignRefs_ForFigs2and3/infercnv.21_denoised.png


--------------------------------------------------------------------------------
/FigureScripts/Figure 1/Step2_FigureImages/H2_5_Revised_PGA_SpatialVisualization_2022-02-28.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 1/Step2_FigureImages/H2_5_Revised_PGA_SpatialVisualization_2022-02-28.png


--------------------------------------------------------------------------------
/FigureScripts/Figure 1/Step2_FigureImages/siCNV_SectionBarPlot_Figure1G.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 1/Step2_FigureImages/siCNV_SectionBarPlot_Figure1G.png


--------------------------------------------------------------------------------
/FigureScripts/Figure 2/Step1/Figure2_AllCancer_siCNV_step1_unsupervised.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Figure2_Step1_unsupervised_allcancer"
  3 | author: "Andrew Erickson"
  4 | output: md_document
  5 | ---
  6 | # Setup
  7 | 
  8 | ```{r setup, messages=FALSE}
  9 | library(tidyverse)
 10 | library(SpatialInferCNV)
 11 | ```
 12 | 
 13 | # Creating a working directory
 14 | 
 15 | We start by creating an empty working directory so that all downloaded files are organized in one place. Download the files [from Mendeley](https://data.mendeley.com/v1/datasets/svw96g68dv/draft?a=3f263217-2bd3-4a3c-8125-8c517c3a9e29): Count_matrices/Patient 1/Visium_with_annotation.
 16 | 
 17 | 
 18 | ```{r, eval = FALSE}
 19 | dir.create("Figure2_output")
 20 | setwd("Figure2_output")
 21 | ```
 22 | 
 23 | # Consensus Purest Benigns
 24 | 
 25 | Importing Consensus_PurestBenigns.csv. Creating this file is documented [in this script](https://github.com/aerickso/SpatialInferCNV/tree/main/FigureScripts/BenignRefs_ForFigs2and3), but is provided [Via Mendeley](https://data.mendeley.com/v1/datasets/svw96g68dv/draft): Count_matrices/Patient 1/Consensus_PurestBenigns.csv.
 26 | 
 27 | ```{r, eval = FALSE}
 28 | PurestBenigns_All <- read.csv("./Figure2_output/Patient 1/Consensus_PurestBenigns.csv")
 29 | ```
 30 | 
 31 | # Selecting Patient 1 All Cancer Annotations
 32 | 
 33 | Next, we select all cancer annotations from all sections, and create an annotation file for all of the cancer bearing spots from patient 1. We bind this with the purest benigns, to create a list of all barcodes, reference set (benigns) and observation set (cancer spots) for analysis.
 34 | 
 35 | ```{r, eval = FALSE}
 36 | H1_2_Annotations <- ImportHistologicalAnnotations("H1_2", "./Figure2_output/Patient 1/Visium_with_annotation/H1_2/H1_2_Final_Consensus_Annotations.csv")
 37 | H1_2_CancerSpots <- filter(H1_2_Annotations, Histology == "GG1")
 38 | 
 39 | H1_4_Annotations <- ImportHistologicalAnnotations("H1_4", "./Figure2_output/Patient 1/Visium_with_annotation/H1_4/H1_4_Final_Consensus_Annotations.csv")
 40 | H1_4_CancerSpots <- filter(H1_4_Annotations, Histology == "GG2" | Histology ==  "GG4 Cribriform")
 41 | 
 42 | H1_5_Annotations <- ImportHistologicalAnnotations("H1_5", "./Figure2_output/Patient 1/Visium_with_annotation/H1_5/H1_5_Final_Consensus_Annotations.csv")
 43 | H1_5_CancerSpots <- filter(H1_5_Annotations, Histology ==  "GG4 Cribriform")
 44 | 
 45 | H2_1_Annotations <- ImportHistologicalAnnotations("H2_1", "./Figure2_output/Patient 1/Visium_with_annotation/H2_1/H2_1_Final_Consensus_Annotations.csv")
 46 | H2_1_CancerSpots <- filter(H2_1_Annotations, Histology == "GG2" | Histology ==  "GG4")
 47 | 
 48 | H2_2_Annotations <- ImportHistologicalAnnotations("H2_2", "./Figure2_output/Patient 1/Visium_with_annotation/H2_2/H2_2_Final_Consensus_Annotations.csv")
 49 | H2_2_CancerSpots <- filter(H2_2_Annotations, Histology == "GG2")
 50 | 
 51 | H2_5_Annotations <- ImportHistologicalAnnotations("H2_5", "./Figure2_output/Patient 1/Visium_with_annotation/H2_5/H2_5_Final_Consensus_Annotations.csv")
 52 | H2_5_CancerSpots <- filter(H2_5_Annotations, Histology == "GG4 Cribriform" | Histology == "Transition_State")
 53 | 
 54 | rm(H1_2_Annotations,
 55 |    H1_4_Annotations,
 56 |    H1_5_Annotations,
 57 |    H2_1_Annotations,
 58 |    H2_2_Annotations,
 59 |    H2_5_Annotations)
 60 | 
 61 | AllCancers <- rbind(H1_2_CancerSpots, H1_4_CancerSpots)
 62 | AllCancers <- rbind(AllCancers, H1_5_CancerSpots)
 63 | AllCancers <- rbind(AllCancers, H2_1_CancerSpots)
 64 | AllCancers <- rbind(AllCancers, H2_2_CancerSpots)
 65 | AllCancers <- rbind(AllCancers, H2_5_CancerSpots)
 66 | 
 67 | names(AllCancers)[2] <- "Histology"
 68 | 
 69 | rm(H1_2_CancerSpots,
 70 |    H1_4_CancerSpots,
 71 |    H1_5_CancerSpots,
 72 |    H2_1_CancerSpots,
 73 |    H2_2_CancerSpots,
 74 |    H2_5_CancerSpots)
 75 | 
 76 | MergedAll <- rbind(PurestBenigns_All, AllCancers)
 77 | 
 78 | rm(PurestBenigns_All)
 79 | rm(AllCancers)
 80 | ```
 81 | 
 82 | # Merging Cancer and Benign annotations with the ENSMBLIDs
 83 | 
 84 | Next, we select create count dataframes that include only spots to be included in the analysis (defined above), and that pass a QC threshold of >500 UMIs per spot.
 85 | 
 86 | ```{r, eval = FALSE}
 87 | H2_1_ENSBMLID_Counts <- ImportCountData("H2_1", "./Figure2_output/Patient 1/Visium_with_annotation/H2_1/filtered_feature_bc_matrix.h5")
 88 | H2_1_Joined_Counts <- MergingCountAndAnnotationData("H2_1",MergedAll, H2_1_ENSBMLID_Counts)
 89 | rm(H2_1_ENSBMLID_Counts)
 90 | Counts_joined <- H2_1_Joined_Counts
 91 | rm(H2_1_Joined_Counts)
 92 | 
 93 | H1_5_ENSBMLID_Counts <- ImportCountData("H1_5", "./Figure2_output/Patient 1/Visium_with_annotation/H1_5/filtered_feature_bc_matrix.h5")
 94 | H1_5_Joined_Counts <- MergingCountAndAnnotationData("H1_5",MergedAll, H1_5_ENSBMLID_Counts)
 95 | rm(H1_5_ENSBMLID_Counts)
 96 | Counts_joined <- Counts_joined %>% full_join(H1_5_Joined_Counts, by = "Genes")
 97 | rm(H1_5_Joined_Counts)
 98 | 
 99 | H2_2_ENSBMLID_Counts <- ImportCountData("H2_2", "./Figure2_output/Patient 1/Visium_with_annotation/H2_2/filtered_feature_bc_matrix.h5")
100 | H2_2_Joined_Counts <- MergingCountAndAnnotationData("H2_2",MergedAll, H2_2_ENSBMLID_Counts)
101 | rm(H2_2_ENSBMLID_Counts)
102 | Counts_joined <- Counts_joined %>% full_join(H2_2_Joined_Counts, by = "Genes")
103 | rm(H2_2_Joined_Counts)
104 | 
105 | H1_2_ENSBMLID_Counts <- ImportCountData("H1_2", "./Figure2_output/Patient 1/Visium_with_annotation/H1_2/filtered_feature_bc_matrix.h5")
106 | H1_2_Joined_Counts <- MergingCountAndAnnotationData("H1_2",MergedAll, H1_2_ENSBMLID_Counts)
107 | rm(H1_2_ENSBMLID_Counts)
108 | Counts_joined <- Counts_joined %>% full_join(H1_2_Joined_Counts, by = "Genes")
109 | rm(H1_2_Joined_Counts)
110 | 
111 | H2_5_ENSBMLID_Counts <- ImportCountData("H2_5", "./Figure2_output/Patient 1/Visium_with_annotation/H2_5/filtered_feature_bc_matrix.h5")
112 | H2_5_Joined_Counts <- MergingCountAndAnnotationData("H2_5",MergedAll, H2_5_ENSBMLID_Counts)
113 | rm(H2_5_ENSBMLID_Counts)
114 | Counts_joined <- Counts_joined %>% full_join(H2_5_Joined_Counts, by = "Genes")
115 | rm(H2_5_Joined_Counts)
116 | 
117 | H1_4_ENSBMLID_Counts <- ImportCountData("H1_4", "./Figure2_output/Patient 1/Visium_with_annotation/H1_4/filtered_feature_bc_matrix.h5")
118 | H1_4_Joined_Counts <- MergingCountAndAnnotationData("H1_4",MergedAll, H1_4_ENSBMLID_Counts)
119 | rm(H1_4_ENSBMLID_Counts)
120 | Counts_joined <- Counts_joined %>% full_join(H1_4_Joined_Counts, by = "Genes")
121 | rm(H1_4_Joined_Counts)
122 | 
123 | V1_2_ENSBMLID_Counts <- ImportCountData("V1_2", "./Figure2_output/Patient 1/Visium_with_annotation/V1_2/filtered_feature_bc_matrix.h5")
124 | V1_2_Joined_Counts <- MergingCountAndAnnotationData("V1_2",MergedAll, V1_2_ENSBMLID_Counts)
125 | rm(V1_2_ENSBMLID_Counts)
126 | Counts_joined <- Counts_joined %>% full_join(V1_2_Joined_Counts, by = "Genes")
127 | rm(V1_2_Joined_Counts)
128 | 
129 | ```
130 | 
131 | # Joining all Counts
132 | 
133 | Next, we replace NAs from the joined count dataframe with 0's (required for inferCNV), and output the count and annotation .tsv files required for infercnv:run.
134 | 
135 | ```{r, eval = FALSE}
136 | Counts_joined <- Counts_joined %>% replace(., is.na(.), 0)
137 | Counts_joined <- Counts_joined %>% column_to_rownames(., var = "Genes")
138 | 
139 | write.table(Counts_joined, "Organscale_Unsupervised_Consensus_AllCancer_Counts.tsv", sep = "\t")
140 | 
141 | MergedAll_Final <- FinalAnnotations(MergedAll, Counts_joined)
142 | 
143 | write.table(MergedAll_Final, "Organscale_Unsupervised_Consensus_AllCancer_Annotations.tsv", 
144 |             sep = "\t",
145 |             quote = FALSE, 
146 |             col.names = FALSE, 
147 |             row.names = FALSE)
148 | ```
149 | 
150 | # Creating the inferCNV object (prior to run)
151 | 
152 | We then create the infercnv object and confirm that the above were run correctly.
153 | 
154 | ```{r, eval = FALSE}
155 | AllCancer_Unsupervised <- infercnv::CreateInfercnvObject(raw_counts_matrix="./Organscale_Unsupervised_Consensus_AllCancer_Counts.tsv", 
156 |                                                gene_order_file="./siCNV_GeneOrderFile.tsv",
157 |                                                annotations_file="./Organscale_Unsupervised_Consensus_AllCancer_Annotations.tsv",
158 |                                                delim="\t",
159 |                                                ref_group_names="Purest Benigns",
160 | 								                               chr_exclude = c("chrM"))
161 | 
162 | ```
163 | 
164 | # Unsupervised Run - (Typically ran on cluster)
165 | 
166 | We then run the analysis (typically ran on a high performance cluster).
167 | 
168 | ```{r, eval = FALSE}
169 | AllCancer_Unsupervised = infercnv::run(AllCancer_Unsupervised,
170 |                                               cutoff=0.1,
171 |                                               out_dir="./Figure2_output/Figure2_Step1/Outputs", 
172 |                                               cluster_by_groups=FALSE,
173 |                                               num_threads = 20, 
174 |                                               denoise=TRUE,
175 |                                               HMM=FALSE)
176 | ```
177 | 
178 | The output infercnv.observations_dendrogram.txt and infercnv.21_denoised.png are used for the next step.
179 | 
180 | ![infercnv.21_denoised.png output](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%202/Step1/infercnv.21_denoised.png).
181 | 
182 | 


--------------------------------------------------------------------------------
/FigureScripts/Figure 2/Step1/Figure2_AllCancer_siCNV_step1_unsupervised.md:
--------------------------------------------------------------------------------
  1 | # Setup
  2 | 
  3 |     library(tidyverse)
  4 | 
  5 |     ## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
  6 | 
  7 |     ## v ggplot2 3.3.5     v purrr   0.3.4
  8 |     ## v tibble  3.1.1     v dplyr   1.0.6
  9 |     ## v tidyr   1.1.3     v stringr 1.4.0
 10 |     ## v readr   2.0.1     v forcats 0.5.1
 11 | 
 12 |     ## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
 13 |     ## x dplyr::filter() masks stats::filter()
 14 |     ## x dplyr::lag()    masks stats::lag()
 15 | 
 16 |     library(SpatialInferCNV)
 17 | 
 18 |     ## Registered S3 method overwritten by 'spatstat.geom':
 19 |     ##   method     from
 20 |     ##   print.boxx cli
 21 | 
 22 |     ## Warning: replacing previous import 'phylogram::as.phylo' by 'ape::as.phylo' when
 23 |     ## loading 'SpatialInferCNV'
 24 | 
 25 | # Creating a working directory
 26 | 
 27 | We start by creating an empty working directory so that all downloaded
 28 | files are organized in one place. Download the files [from
 29 | Mendeley](https://data.mendeley.com/v1/datasets/svw96g68dv/draft):
 30 | Count\_matrices/Patient 1/Visium\_with\_annotation.
 31 | 
 32 |     dir.create("Figure2_output")
 33 |     setwd("Figure2_output")
 34 | 
 35 | # Consensus Purest Benigns
 36 | 
 37 | Importing Consensus\_PurestBenigns.csv. Creating this file is documented
 38 | [in this
 39 | script](https://github.com/aerickso/SpatialInferCNV/tree/main/FigureScripts/BenignRefs_ForFigs2and3),
 40 | but is provided [Via
 41 | Mendeley](https://data.mendeley.com/v1/datasets/svw96g68dv/draft):
 42 | Count\_matrices/Patient 1/Consensus\_PurestBenigns.csv.
 43 | 
 44 |     PurestBenigns_All <- read.csv("./Figure2_output/Patient 1/Consensus_PurestBenigns.csv")
 45 | 
 46 | # Selecting Patient 1 All Cancer Annotations
 47 | 
 48 | Next, we select all cancer annotations from all sections, and create an
 49 | annotation file for all of the cancer bearing spots from patient 1. We
 50 | bind this with the purest benigns, to create a list of all barcodes,
 51 | reference set (benigns) and observation set (cancer spots) for analysis.
 52 | 
 53 |     H1_2_Annotations <- ImportHistologicalAnnotations("H1_2", "./Figure2_output/Patient 1/Visium_with_annotation/H1_2/H1_2_Final_Consensus_Annotations.csv")
 54 |     H1_2_CancerSpots <- filter(H1_2_Annotations, Histology == "GG1")
 55 | 
 56 |     H1_4_Annotations <- ImportHistologicalAnnotations("H1_4", "./Figure2_output/Patient 1/Visium_with_annotation/H1_4/H1_4_Final_Consensus_Annotations.csv")
 57 |     H1_4_CancerSpots <- filter(H1_4_Annotations, Histology == "GG2" | Histology ==  "GG4 Cribriform")
 58 | 
 59 |     H1_5_Annotations <- ImportHistologicalAnnotations("H1_5", "./Figure2_output/Patient 1/Visium_with_annotation/H1_5/H1_5_Final_Consensus_Annotations.csv")
 60 |     H1_5_CancerSpots <- filter(H1_5_Annotations, Histology ==  "GG4 Cribriform")
 61 | 
 62 |     H2_1_Annotations <- ImportHistologicalAnnotations("H2_1", "./Figure2_output/Patient 1/Visium_with_annotation/H2_1/H2_1_Final_Consensus_Annotations.csv")
 63 |     H2_1_CancerSpots <- filter(H2_1_Annotations, Histology == "GG2" | Histology ==  "GG4")
 64 | 
 65 |     H2_2_Annotations <- ImportHistologicalAnnotations("H2_2", "./Figure2_output/Patient 1/Visium_with_annotation/H2_2/H2_2_Final_Consensus_Annotations.csv")
 66 |     H2_2_CancerSpots <- filter(H2_2_Annotations, Histology == "GG2")
 67 | 
 68 |     H2_5_Annotations <- ImportHistologicalAnnotations("H2_5", "./Figure2_output/Patient 1/Visium_with_annotation/H2_5/H2_5_Final_Consensus_Annotations.csv")
 69 |     H2_5_CancerSpots <- filter(H2_5_Annotations, Histology == "GG4 Cribriform" | Histology == "Transition_State")
 70 | 
 71 |     rm(H1_2_Annotations,
 72 |        H1_4_Annotations,
 73 |        H1_5_Annotations,
 74 |        H2_1_Annotations,
 75 |        H2_2_Annotations,
 76 |        H2_5_Annotations)
 77 | 
 78 |     AllCancers <- rbind(H1_2_CancerSpots, H1_4_CancerSpots)
 79 |     AllCancers <- rbind(AllCancers, H1_5_CancerSpots)
 80 |     AllCancers <- rbind(AllCancers, H2_1_CancerSpots)
 81 |     AllCancers <- rbind(AllCancers, H2_2_CancerSpots)
 82 |     AllCancers <- rbind(AllCancers, H2_5_CancerSpots)
 83 | 
 84 |     names(AllCancers)[2] <- "Histology"
 85 | 
 86 |     rm(H1_2_CancerSpots,
 87 |        H1_4_CancerSpots,
 88 |        H1_5_CancerSpots,
 89 |        H2_1_CancerSpots,
 90 |        H2_2_CancerSpots,
 91 |        H2_5_CancerSpots)
 92 | 
 93 |     MergedAll <- rbind(PurestBenigns_All, AllCancers)
 94 | 
 95 |     rm(PurestBenigns_All)
 96 |     rm(AllCancers)
 97 | 
 98 | # Merging Cancer and Benign annotations with the ENSMBLIDs
 99 | 
100 | Next, we select create count dataframes that include only spots to be
101 | included in the analysis (defined above), and that pass a QC threshold
102 | of &gt;500 UMIs per spot.
103 | 
104 |     H2_1_ENSBMLID_Counts <- ImportCountData("H2_1", "./Figure2_output/Patient 1/Visium_with_annotation/H2_1/filtered_feature_bc_matrix.h5")
105 |     H2_1_Joined_Counts <- MergingCountAndAnnotationData("H2_1",MergedAll, H2_1_ENSBMLID_Counts)
106 |     rm(H2_1_ENSBMLID_Counts)
107 |     Counts_joined <- H2_1_Joined_Counts
108 |     rm(H2_1_Joined_Counts)
109 | 
110 |     H1_5_ENSBMLID_Counts <- ImportCountData("H1_5", "./Figure2_output/Patient 1/Visium_with_annotation/H1_5/filtered_feature_bc_matrix.h5")
111 |     H1_5_Joined_Counts <- MergingCountAndAnnotationData("H1_5",MergedAll, H1_5_ENSBMLID_Counts)
112 |     rm(H1_5_ENSBMLID_Counts)
113 |     Counts_joined <- Counts_joined %>% full_join(H1_5_Joined_Counts, by = "Genes")
114 |     rm(H1_5_Joined_Counts)
115 | 
116 |     H2_2_ENSBMLID_Counts <- ImportCountData("H2_2", "./Figure2_output/Patient 1/Visium_with_annotation/H2_2/filtered_feature_bc_matrix.h5")
117 |     H2_2_Joined_Counts <- MergingCountAndAnnotationData("H2_2",MergedAll, H2_2_ENSBMLID_Counts)
118 |     rm(H2_2_ENSBMLID_Counts)
119 |     Counts_joined <- Counts_joined %>% full_join(H2_2_Joined_Counts, by = "Genes")
120 |     rm(H2_2_Joined_Counts)
121 | 
122 |     H1_2_ENSBMLID_Counts <- ImportCountData("H1_2", "./Figure2_output/Patient 1/Visium_with_annotation/H1_2/filtered_feature_bc_matrix.h5")
123 |     H1_2_Joined_Counts <- MergingCountAndAnnotationData("H1_2",MergedAll, H1_2_ENSBMLID_Counts)
124 |     rm(H1_2_ENSBMLID_Counts)
125 |     Counts_joined <- Counts_joined %>% full_join(H1_2_Joined_Counts, by = "Genes")
126 |     rm(H1_2_Joined_Counts)
127 | 
128 |     H2_5_ENSBMLID_Counts <- ImportCountData("H2_5", "./Figure2_output/Patient 1/Visium_with_annotation/H2_5/filtered_feature_bc_matrix.h5")
129 |     H2_5_Joined_Counts <- MergingCountAndAnnotationData("H2_5",MergedAll, H2_5_ENSBMLID_Counts)
130 |     rm(H2_5_ENSBMLID_Counts)
131 |     Counts_joined <- Counts_joined %>% full_join(H2_5_Joined_Counts, by = "Genes")
132 |     rm(H2_5_Joined_Counts)
133 | 
134 |     H1_4_ENSBMLID_Counts <- ImportCountData("H1_4", "./Figure2_output/Patient 1/Visium_with_annotation/H1_4/filtered_feature_bc_matrix.h5")
135 |     H1_4_Joined_Counts <- MergingCountAndAnnotationData("H1_4",MergedAll, H1_4_ENSBMLID_Counts)
136 |     rm(H1_4_ENSBMLID_Counts)
137 |     Counts_joined <- Counts_joined %>% full_join(H1_4_Joined_Counts, by = "Genes")
138 |     rm(H1_4_Joined_Counts)
139 | 
140 |     V1_2_ENSBMLID_Counts <- ImportCountData("V1_2", "./Figure2_output/Patient 1/Visium_with_annotation/V1_2/filtered_feature_bc_matrix.h5")
141 |     V1_2_Joined_Counts <- MergingCountAndAnnotationData("V1_2",MergedAll, V1_2_ENSBMLID_Counts)
142 |     rm(V1_2_ENSBMLID_Counts)
143 |     Counts_joined <- Counts_joined %>% full_join(V1_2_Joined_Counts, by = "Genes")
144 |     rm(V1_2_Joined_Counts)
145 | 
146 | # Joining all Counts
147 | 
148 | Next, we replace NAs from the joined count dataframe with 0’s (required
149 | for inferCNV), and output the count and annotation .tsv files required
150 | for infercnv:run.
151 | 
152 |     Counts_joined <- Counts_joined %>% replace(., is.na(.), 0)
153 |     Counts_joined <- Counts_joined %>% column_to_rownames(., var = "Genes")
154 | 
155 |     write.table(Counts_joined, "Organscale_Unsupervised_Consensus_AllCancer_Counts.tsv", sep = "\t")
156 | 
157 |     MergedAll_Final <- FinalAnnotations(MergedAll, Counts_joined)
158 | 
159 |     write.table(MergedAll_Final, "Organscale_Unsupervised_Consensus_AllCancer_Annotations.tsv", 
160 |                 sep = "\t",
161 |                 quote = FALSE, 
162 |                 col.names = FALSE, 
163 |                 row.names = FALSE)
164 | 
165 | # Creating the inferCNV object (prior to run)
166 | 
167 | We then create the infercnv object and confirm that the above were run
168 | correctly.
169 | 
170 |     AllCancer_Unsupervised <- infercnv::CreateInfercnvObject(raw_counts_matrix="./Organscale_Unsupervised_Consensus_AllCancer_Counts.tsv", 
171 |                                                    gene_order_file="./siCNV_GeneOrderFile.tsv",
172 |                                                    annotations_file="./Organscale_Unsupervised_Consensus_AllCancer_Annotations.tsv",
173 |                                                    delim="\t",
174 |                                                    ref_group_names="Purest Benigns",
175 |                                                                    chr_exclude = c("chrM"))
176 | 
177 | # Unsupervised Run - (Typically ran on cluster)
178 | 
179 | We then run the analysis (typically ran on a high performance cluster).
180 | 
181 |     AllCancer_Unsupervised = infercnv::run(AllCancer_Unsupervised,
182 |                                                   cutoff=0.1,
183 |                                                   out_dir="./Figure2_output/Figure2_Step1/Outputs", 
184 |                                                   cluster_by_groups=FALSE,
185 |                                                   num_threads = 20, 
186 |                                                   denoise=TRUE,
187 |                                                   HMM=FALSE)
188 | 
189 | The output infercnv.observations\_dendrogram.txt and
190 | infercnv.21\_denoised.png are used for the next step.
191 | 
192 | ![infercnv.21\_denoised.png
193 | output](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%202/Step1/infercnv.21_denoised.png).
194 | 


--------------------------------------------------------------------------------
/FigureScripts/Figure 2/Step1/infercnv.21_denoised.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 2/Step1/infercnv.21_denoised.png


--------------------------------------------------------------------------------
/FigureScripts/Figure 2/Step2/Consensus_AllCancer_forclustering_phylo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 2/Step2/Consensus_AllCancer_forclustering_phylo.png


--------------------------------------------------------------------------------
/FigureScripts/Figure 2/Step2/Fig2_Step2_ManualClustering.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Fig2_Step2_ManualClustering"
  3 | author: "Andrew Erickson"
  4 | output: md_document
  5 | ---
  6 | 
  7 | # Setup
  8 | 
  9 | Initializing libraries.
 10 | 
 11 | ```{r setup, messages=FALSE}
 12 | library(SpatialInferCNV)
 13 | library(phylogram)
 14 | library(ape)
 15 | library(tidyverse)
 16 | ```
 17 | 
 18 | #Importing dendrogram
 19 | 
 20 | Importing the dendogram file created in step 1.
 21 | 
 22 | ```{r, eval = FALSE}
 23 | Consensus_AllCancer_for_clustering <- read.dendrogram(file="./Figure2_output/Figure2_Step1/Outputs/infercnv.observations_dendrogram.txt")
 24 | 
 25 | Consensus_AllCancer_for_clustering_phylo <- as.phylo(Consensus_AllCancer_for_clustering)
 26 | ```
 27 | 
 28 | # Visualizing Tree
 29 | 
 30 | Next, we use the dendrogram file to visualize the dendrogram itself.
 31 | 
 32 | ```{r, eval = FALSE}
 33 | my.subtrees = subtrees(Consensus_AllCancer_for_clustering_phylo)  # subtrees() to subset
 34 | 
 35 | png("Consensus_AllCancer_forclustering_phylo.png",width=10000,height=2500, res = 300)
 36 | plot(Consensus_AllCancer_for_clustering_phylo,show.tip.label = FALSE)
 37 | nodelabels(text=1:Consensus_AllCancer_for_clustering_phylo$Nnode,node=1:Consensus_AllCancer_for_clustering_phylo$Nnode+Ntip(Consensus_AllCancer_for_clustering_phylo))
 38 | dev.off()
 39 | ```
 40 | 
 41 | Here is the output image:
 42 | 
 43 | ![example Consensus_AllCancer_forclustering_phylo.png for section H2_5](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%202/Step2/Consensus_AllCancer_forclustering_phylo.png).
 44 | 
 45 | # Manual Clone Selection
 46 | 
 47 | Comparison of the output image and the denoised image (through use of an image viewer), allows for selection of groups of spots with shared CNVs. Note the "nodes" from the visualized dendrogram, allowing for supervised selection of clones.
 48 | 
 49 | ```{r, eval = FALSE}
 50 | #Clone J - Node 4617
 51 | #Clone I - Node 4446
 52 | #Clone I - Node 3617
 53 | #Clone F - Node 2934
 54 | #Clone E - Node 2893
 55 | #Clone E - Node 2832
 56 | #Clone E - Node 2769
 57 | #Clone H - Node 3114 
 58 | #Clone B - Node 2991
 59 | #Clone G - Node 2525
 60 | #Clone C - Node 2284
 61 | #Clone D - Node 2078
 62 | #Clone K - Node 92 
 63 | #Clone A - Node 3
 64 | 
 65 | Node4617 <- SelectingSubTreeData(my.subtrees, 4617)
 66 | Node4446 <- SelectingSubTreeData(my.subtrees, 4446)
 67 | Node3617 <- SelectingSubTreeData(my.subtrees, 3617)
 68 | Node2934 <- SelectingSubTreeData(my.subtrees, 2934)
 69 | Node2893 <- SelectingSubTreeData(my.subtrees, 2893)
 70 | Node2832 <- SelectingSubTreeData(my.subtrees, 2832)
 71 | Node2769 <- SelectingSubTreeData(my.subtrees, 2769)
 72 | Node3114 <- SelectingSubTreeData(my.subtrees, 3114)
 73 | Node2991 <- SelectingSubTreeData(my.subtrees, 2991)
 74 | Node2525 <- SelectingSubTreeData(my.subtrees, 2525)
 75 | Node2284 <- SelectingSubTreeData(my.subtrees, 2284)
 76 | Node2078 <- SelectingSubTreeData(my.subtrees, 2078)
 77 | Node92 <- SelectingSubTreeData(my.subtrees, 92)
 78 | Node3 <- SelectingSubTreeData(my.subtrees, 3)
 79 | 
 80 | Merged <- rbind(Node4617, Node4446)
 81 | Merged <- rbind(Merged, Node3617)
 82 | Merged <- rbind(Merged, Node2934)
 83 | Merged <- rbind(Merged, Node2893)
 84 | Merged <- rbind(Merged, Node2832)
 85 | Merged <- rbind(Merged, Node2769)
 86 | Merged <- rbind(Merged, Node3114)
 87 | Merged <- rbind(Merged, Node2991)
 88 | Merged <- rbind(Merged, Node2525)
 89 | Merged <- rbind(Merged, Node2284)
 90 | Merged <- rbind(Merged, Node2078)
 91 | Merged <- rbind(Merged, Node92)
 92 | Merged <- rbind(Merged, Node3)
 93 | 
 94 | table(Merged$Node)
 95 | 
 96 | Merged$Node <- ifelse(Merged$Node == "Node_4617" , "Clone_J",
 97 |                      ifelse(Merged$Node == "Node_4446" , "Clone_I",
 98 |                      ifelse(Merged$Node == "Node_3617" , "Clone_I",
 99 |                      ifelse(Merged$Node == "Node_2934" , "Clone_F",
100 |                      ifelse(Merged$Node == "Node_2893" , "Clone_E",
101 |                      ifelse(Merged$Node == "Node_2832" , "Clone_E",
102 |                      ifelse(Merged$Node == "Node_2769" , "Clone_E",
103 |                      ifelse(Merged$Node == "Node_3114" , "Clone_H",
104 |                      ifelse(Merged$Node == "Node_2991" , "Clone_B",
105 |                      ifelse(Merged$Node == "Node_2525" , "Clone_G",
106 |                      ifelse(Merged$Node == "Node_2284" , "Clone_C",
107 |                      ifelse(Merged$Node == "Node_2078" , "Clone_D",
108 |                      ifelse(Merged$Node == "Node_92" , "Clone_K",
109 |                      ifelse(Merged$Node == "Node_3" , "Clone_A", Merged$Node))))))))))))))
110 | 
111 | write.csv(Merged, "Fig2_forclustering.csv", row.names = FALSE)
112 | ```
113 | 
114 | This Fig2_forclustering.csv file is used in [Step 3](https://github.com/aerickso/SpatialInferCNV/tree/main/FigureScripts/Figure%202/Step3).
115 | 
116 | # Outputting .CSV files for LoupeBrowser visualization.
117 | 
118 | LoupeBrowser files are available from the authors upon request: andrew.erickson@nds.ox.ac.uk, or joakim.lundenberg@scilifelab.se. However, we provide the [high resolution input files](https://data.mendeley.com/v1/datasets/svw96g68dv/draft?a=3f263217-2bd3-4a3c-8125-8c517c3a9e29) : Histological_images/Patient 1/Visium and FASTQ files (EGA link pending) to run [SpaceRanger](https://support.10xgenomics.com/spatial-gene-expression/software/pipelines/latest/output/overview) to output the LoupeBrowser files.
119 | 
120 | ```{r, eval = FALSE}
121 | H1_5_Merged <- Merged
122 | H1_5_Merged <- H1_5_Merged %>% mutate(section = substr(Barcode, 1, 4))
123 | H1_5_Merged$Barcode <- trimws(substr(H1_5_Merged$Barcode, 6, 100))
124 | H1_5_Merged$Barcode <- gsub("\\.", "\\-", H1_5_Merged$Barcode)
125 | H1_5_Clones_ForLoupeBrowser <- filter(H1_5_Merged, section == "H1_5") %>%
126 |                                             select(Barcode, Node)
127 | write.csv(H1_5_Clones_ForLoupeBrowser, "Fig2e_H1_5_Clones_ForLoupeBrowser.csv", row.names = FALSE)
128 | 
129 | H2_5_Merged <- Merged
130 | H2_5_Merged <- H2_5_Merged %>% mutate(section = substr(Barcode, 1, 4))
131 | H2_5_Merged$Barcode <- trimws(substr(H2_5_Merged$Barcode, 6, 100))
132 | H2_5_Merged$Barcode <- gsub("\\.", "\\-", H2_5_Merged$Barcode)
133 | H2_5_Clones_ForLoupeBrowser <- filter(H2_5_Merged, section == "H2_5") %>%
134 |                                             select(Barcode, Node)
135 | write.csv(H2_5_Clones_ForLoupeBrowser, "Fig2e_H2_5_Clones_ForLoupeBrowser.csv", row.names = FALSE)
136 | 
137 | H1_4_Merged <- Merged
138 | H1_4_Merged <- H1_4_Merged %>% mutate(section = substr(Barcode, 1, 4))
139 | H1_4_Merged$Barcode <- trimws(substr(H1_4_Merged$Barcode, 6, 100))
140 | H1_4_Merged$Barcode <- gsub("\\.", "\\-", H1_4_Merged$Barcode)
141 | H1_4_Clones_ForLoupeBrowser <- filter(H1_4_Merged, section == "H1_4") %>%
142 |                                             select(Barcode, Node)
143 | write.csv(H1_4_Clones_ForLoupeBrowser, "Fig2e_H1_4_Clones_ForLoupeBrowser.csv", row.names = FALSE)
144 | 
145 | H1_2_Merged <- Merged
146 | H1_2_Merged <- H1_2_Merged %>% mutate(section = substr(Barcode, 1, 4))
147 | H1_2_Merged$Barcode <- trimws(substr(H1_2_Merged$Barcode, 6, 100))
148 | H1_2_Merged$Barcode <- gsub("\\.", "\\-", H1_2_Merged$Barcode)
149 | H1_2_Clones_ForLoupeBrowser <- filter(H1_2_Merged, section == "H1_2") %>%
150 |                                             select(Barcode, Node)
151 | write.csv(H1_2_Clones_ForLoupeBrowser, "Fig2e_H1_2_Clones_ForLoupeBrowser.csv", row.names = FALSE)
152 | 
153 | H2_1_Merged <- Merged
154 | H2_1_Merged <- H2_1_Merged %>% mutate(section = substr(Barcode, 1, 4))
155 | H2_1_Merged$Barcode <- trimws(substr(H2_1_Merged$Barcode, 6, 100))
156 | H2_1_Merged$Barcode <- gsub("\\.", "\\-", H2_1_Merged$Barcode)
157 | H2_1_Clones_ForLoupeBrowser <- filter(H2_1_Merged, section == "H2_1") %>%
158 |                                             select(Barcode, Node)
159 | write.csv(H2_1_Clones_ForLoupeBrowser, "Fig2e_H2_1_Clones_ForLoupeBrowser.csv", row.names = FALSE)
160 | 
161 | H2_2_Merged <- Merged
162 | H2_2_Merged <- H2_2_Merged %>% mutate(section = substr(Barcode, 1, 4))
163 | H2_2_Merged$Barcode <- trimws(substr(H2_2_Merged$Barcode, 6, 100))
164 | H2_2_Merged$Barcode <- gsub("\\.", "\\-", H2_2_Merged$Barcode)
165 | H2_2_Clones_ForLoupeBrowser <- filter(H2_2_Merged, section == "H2_2") %>%
166 |                                             select(Barcode, Node)
167 | write.csv(H2_2_Clones_ForLoupeBrowser, "Fig2e_H2_2_Clones_ForLoupeBrowser.csv", row.names = FALSE)
168 | ```
169 | 
170 | 
171 | 


--------------------------------------------------------------------------------
/FigureScripts/Figure 2/Step2/Fig2_Step2_ManualClustering.md:
--------------------------------------------------------------------------------
  1 | # Setup
  2 | 
  3 | Initializing libraries.
  4 | 
  5 |     library(SpatialInferCNV)
  6 | 
  7 |     ## Warning: replacing previous import 'phylogram::as.phylo' by 'ape::as.phylo' when
  8 |     ## loading 'SpatialInferCNV'
  9 | 
 10 |     library(phylogram)
 11 |     library(ape)
 12 | 
 13 |     ## 
 14 |     ## Attaching package: 'ape'
 15 | 
 16 |     ## The following object is masked from 'package:phylogram':
 17 |     ## 
 18 |     ##     as.phylo
 19 | 
 20 |     library(tidyverse)
 21 | 
 22 |     ## Registered S3 method overwritten by 'cli':
 23 |     ##   method     from         
 24 |     ##   print.boxx spatstat.geom
 25 | 
 26 |     ## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
 27 | 
 28 |     ## v ggplot2 3.3.5     v purrr   0.3.4
 29 |     ## v tibble  3.1.1     v dplyr   1.0.6
 30 |     ## v tidyr   1.1.3     v stringr 1.4.0
 31 |     ## v readr   2.0.1     v forcats 0.5.1
 32 | 
 33 |     ## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
 34 |     ## x dplyr::filter() masks stats::filter()
 35 |     ## x dplyr::lag()    masks stats::lag()
 36 | 
 37 | \#Importing dendrogram
 38 | 
 39 | Importing the dendogram file created in step 1.
 40 | 
 41 |     Consensus_AllCancer_for_clustering <- read.dendrogram(file="./Figure2_output/Figure2_Step1/Outputs/infercnv.observations_dendrogram.txt")
 42 | 
 43 |     Consensus_AllCancer_for_clustering_phylo <- as.phylo(Consensus_AllCancer_for_clustering)
 44 | 
 45 | # Visualizing Tree
 46 | 
 47 | Next, we use the dendrogram file to visualize the dendrogram itself.
 48 | 
 49 |     my.subtrees = subtrees(Consensus_AllCancer_for_clustering_phylo)  # subtrees() to subset
 50 | 
 51 |     png("Consensus_AllCancer_forclustering_phylo.png",width=10000,height=2500, res = 300)
 52 |     plot(Consensus_AllCancer_for_clustering_phylo,show.tip.label = FALSE)
 53 |     nodelabels(text=1:Consensus_AllCancer_for_clustering_phylo$Nnode,node=1:Consensus_AllCancer_for_clustering_phylo$Nnode+Ntip(Consensus_AllCancer_for_clustering_phylo))
 54 |     dev.off()
 55 | 
 56 | Here is the output image:
 57 | 
 58 | ![example Consensus\_AllCancer\_forclustering\_phylo.png for section
 59 | H2\_5](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%202/Step2/Consensus_AllCancer_forclustering_phylo.png).
 60 | 
 61 | # Manual Clone Selection
 62 | 
 63 | Comparison of the output image and the denoised image (through use of an
 64 | image viewer), allows for selection of groups of spots with shared CNVs.
 65 | Note the “nodes” from the visualized dendrogram, allowing for supervised
 66 | selection of clones.
 67 | 
 68 |     #Clone J - Node 4617
 69 |     #Clone I - Node 4446
 70 |     #Clone I - Node 3617
 71 |     #Clone F - Node 2934
 72 |     #Clone E - Node 2893
 73 |     #Clone E - Node 2832
 74 |     #Clone E - Node 2769
 75 |     #Clone H - Node 3114 
 76 |     #Clone B - Node 2991
 77 |     #Clone G - Node 2525
 78 |     #Clone C - Node 2284
 79 |     #Clone D - Node 2078
 80 |     #Clone K - Node 92 
 81 |     #Clone A - Node 3
 82 | 
 83 |     Node4617 <- SelectingSubTreeData(my.subtrees, 4617)
 84 |     Node4446 <- SelectingSubTreeData(my.subtrees, 4446)
 85 |     Node3617 <- SelectingSubTreeData(my.subtrees, 3617)
 86 |     Node2934 <- SelectingSubTreeData(my.subtrees, 2934)
 87 |     Node2893 <- SelectingSubTreeData(my.subtrees, 2893)
 88 |     Node2832 <- SelectingSubTreeData(my.subtrees, 2832)
 89 |     Node2769 <- SelectingSubTreeData(my.subtrees, 2769)
 90 |     Node3114 <- SelectingSubTreeData(my.subtrees, 3114)
 91 |     Node2991 <- SelectingSubTreeData(my.subtrees, 2991)
 92 |     Node2525 <- SelectingSubTreeData(my.subtrees, 2525)
 93 |     Node2284 <- SelectingSubTreeData(my.subtrees, 2284)
 94 |     Node2078 <- SelectingSubTreeData(my.subtrees, 2078)
 95 |     Node92 <- SelectingSubTreeData(my.subtrees, 92)
 96 |     Node3 <- SelectingSubTreeData(my.subtrees, 3)
 97 | 
 98 |     Merged <- rbind(Node4617, Node4446)
 99 |     Merged <- rbind(Merged, Node3617)
100 |     Merged <- rbind(Merged, Node2934)
101 |     Merged <- rbind(Merged, Node2893)
102 |     Merged <- rbind(Merged, Node2832)
103 |     Merged <- rbind(Merged, Node2769)
104 |     Merged <- rbind(Merged, Node3114)
105 |     Merged <- rbind(Merged, Node2991)
106 |     Merged <- rbind(Merged, Node2525)
107 |     Merged <- rbind(Merged, Node2284)
108 |     Merged <- rbind(Merged, Node2078)
109 |     Merged <- rbind(Merged, Node92)
110 |     Merged <- rbind(Merged, Node3)
111 | 
112 |     table(Merged$Node)
113 | 
114 |     Merged$Node <- ifelse(Merged$Node == "Node_4617" , "Clone_J",
115 |                          ifelse(Merged$Node == "Node_4446" , "Clone_I",
116 |                          ifelse(Merged$Node == "Node_3617" , "Clone_I",
117 |                          ifelse(Merged$Node == "Node_2934" , "Clone_F",
118 |                          ifelse(Merged$Node == "Node_2893" , "Clone_E",
119 |                          ifelse(Merged$Node == "Node_2832" , "Clone_E",
120 |                          ifelse(Merged$Node == "Node_2769" , "Clone_E",
121 |                          ifelse(Merged$Node == "Node_3114" , "Clone_H",
122 |                          ifelse(Merged$Node == "Node_2991" , "Clone_B",
123 |                          ifelse(Merged$Node == "Node_2525" , "Clone_G",
124 |                          ifelse(Merged$Node == "Node_2284" , "Clone_C",
125 |                          ifelse(Merged$Node == "Node_2078" , "Clone_D",
126 |                          ifelse(Merged$Node == "Node_92" , "Clone_K",
127 |                          ifelse(Merged$Node == "Node_3" , "Clone_A", Merged$Node))))))))))))))
128 | 
129 |     write.csv(Merged, "Fig2_forclustering.csv", row.names = FALSE)
130 | 
131 | This Fig2\_forclustering.csv file is used in [Step
132 | 3](https://github.com/aerickso/SpatialInferCNV/tree/main/FigureScripts/Figure%202/Step3).
133 | 
134 | # Outputting .CSV files for LoupeBrowser visualization.
135 | 
136 | LoupeBrowser files are available from the authors upon request:
137 | <andrew.erickson@nds.ox.ac.uk>, or <joakim.lundenberg@scilifelab.se>.
138 | However, we provide the [high resolution input
139 | files](https://data.mendeley.com/v1/datasets/svw96g68dv/draft?a=3f263217-2bd3-4a3c-8125-8c517c3a9e29)
140 | : Histological\_images/Patient 1/Visium and FASTQ files (EGA link
141 | pending) to run
142 | [SpaceRanger](https://support.10xgenomics.com/spatial-gene-expression/software/pipelines/latest/output/overview)
143 | to output the LoupeBrowser files.
144 | 
145 |     H1_5_Merged <- Merged
146 |     H1_5_Merged <- H1_5_Merged %>% mutate(section = substr(Barcode, 1, 4))
147 |     H1_5_Merged$Barcode <- trimws(substr(H1_5_Merged$Barcode, 6, 100))
148 |     H1_5_Merged$Barcode <- gsub("\\.", "\\-", H1_5_Merged$Barcode)
149 |     H1_5_Clones_ForLoupeBrowser <- filter(H1_5_Merged, section == "H1_5") %>%
150 |                                                 select(Barcode, Node)
151 |     write.csv(H1_5_Clones_ForLoupeBrowser, "Fig2e_H1_5_Clones_ForLoupeBrowser.csv", row.names = FALSE)
152 | 
153 |     H2_5_Merged <- Merged
154 |     H2_5_Merged <- H2_5_Merged %>% mutate(section = substr(Barcode, 1, 4))
155 |     H2_5_Merged$Barcode <- trimws(substr(H2_5_Merged$Barcode, 6, 100))
156 |     H2_5_Merged$Barcode <- gsub("\\.", "\\-", H2_5_Merged$Barcode)
157 |     H2_5_Clones_ForLoupeBrowser <- filter(H2_5_Merged, section == "H2_5") %>%
158 |                                                 select(Barcode, Node)
159 |     write.csv(H2_5_Clones_ForLoupeBrowser, "Fig2e_H2_5_Clones_ForLoupeBrowser.csv", row.names = FALSE)
160 | 
161 |     H1_4_Merged <- Merged
162 |     H1_4_Merged <- H1_4_Merged %>% mutate(section = substr(Barcode, 1, 4))
163 |     H1_4_Merged$Barcode <- trimws(substr(H1_4_Merged$Barcode, 6, 100))
164 |     H1_4_Merged$Barcode <- gsub("\\.", "\\-", H1_4_Merged$Barcode)
165 |     H1_4_Clones_ForLoupeBrowser <- filter(H1_4_Merged, section == "H1_4") %>%
166 |                                                 select(Barcode, Node)
167 |     write.csv(H1_4_Clones_ForLoupeBrowser, "Fig2e_H1_4_Clones_ForLoupeBrowser.csv", row.names = FALSE)
168 | 
169 |     H1_2_Merged <- Merged
170 |     H1_2_Merged <- H1_2_Merged %>% mutate(section = substr(Barcode, 1, 4))
171 |     H1_2_Merged$Barcode <- trimws(substr(H1_2_Merged$Barcode, 6, 100))
172 |     H1_2_Merged$Barcode <- gsub("\\.", "\\-", H1_2_Merged$Barcode)
173 |     H1_2_Clones_ForLoupeBrowser <- filter(H1_2_Merged, section == "H1_2") %>%
174 |                                                 select(Barcode, Node)
175 |     write.csv(H1_2_Clones_ForLoupeBrowser, "Fig2e_H1_2_Clones_ForLoupeBrowser.csv", row.names = FALSE)
176 | 
177 |     H2_1_Merged <- Merged
178 |     H2_1_Merged <- H2_1_Merged %>% mutate(section = substr(Barcode, 1, 4))
179 |     H2_1_Merged$Barcode <- trimws(substr(H2_1_Merged$Barcode, 6, 100))
180 |     H2_1_Merged$Barcode <- gsub("\\.", "\\-", H2_1_Merged$Barcode)
181 |     H2_1_Clones_ForLoupeBrowser <- filter(H2_1_Merged, section == "H2_1") %>%
182 |                                                 select(Barcode, Node)
183 |     write.csv(H2_1_Clones_ForLoupeBrowser, "Fig2e_H2_1_Clones_ForLoupeBrowser.csv", row.names = FALSE)
184 | 
185 |     H2_2_Merged <- Merged
186 |     H2_2_Merged <- H2_2_Merged %>% mutate(section = substr(Barcode, 1, 4))
187 |     H2_2_Merged$Barcode <- trimws(substr(H2_2_Merged$Barcode, 6, 100))
188 |     H2_2_Merged$Barcode <- gsub("\\.", "\\-", H2_2_Merged$Barcode)
189 |     H2_2_Clones_ForLoupeBrowser <- filter(H2_2_Merged, section == "H2_2") %>%
190 |                                                 select(Barcode, Node)
191 |     write.csv(H2_2_Clones_ForLoupeBrowser, "Fig2e_H2_2_Clones_ForLoupeBrowser.csv", row.names = FALSE)
192 | 


--------------------------------------------------------------------------------
/FigureScripts/Figure 2/Step3/Fig2_Step3_ClusteredPlot_and_HMM.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Fig2_Step3_ClusteredPlot_and_HMM"
 3 | author: "Andrew Erickson"
 4 | output: md_document
 5 | ---
 6 | 
 7 | # Setup
 8 | 
 9 | ```{r, messages=FALSE}
10 | library(tidyverse)
11 | library(infercnv)
12 | library(Seurat)
13 | library(hdf5r)
14 | library(SpatialInferCNV)
15 | ```
16 | 
17 | # Pre-processing clustered data
18 | 
19 | Importing previously downloaded Consensus_PurestBenigns.csv (step 1), and the Fig2_forclustering.csv file created in step 2. We use this to create an updated annotation file for infercnv::run.
20 | 
21 | ```{r, eval = FALSE}
22 | PurestBenigns_All <- read.csv("./Figure2_output/Patient 1/Consensus_PurestBenigns.csv")
23 | PurestBenigns_All$Histology <- "Purest Benigns"
24 | 
25 | CorrectedBenigns_Consensus_AllCancer_ManualNodes_selected <- read.csv("./Mendeley/ProcessedFilesForFigures/Figure2/Step3/Inputs/Fig2_forclustering.csv")
26 | names(CorrectedBenigns_Consensus_AllCancer_ManualNodes_selected)[2] <- "Histology"
27 | 
28 | Fig2a_ManualClusters <- rbind(CorrectedBenigns_Consensus_AllCancer_ManualNodes_selected, PurestBenigns_All)
29 | 
30 | write.table(Fig2a_ManualClusters, "Fig2_ManualClusters_for_ClusteredPlot_and_HMM.tsv", 
31 |             sep = "\t",
32 |             quote = FALSE, 
33 |             col.names = FALSE, 
34 |             row.names = FALSE)
35 | ```
36 | 
37 | # Creating the inferCNV object (prior to run)
38 | 
39 | Now creating the object for the supervised clustered run.
40 | 
41 | ```{r, eval = FALSE}
42 | AllCancer_clustered <- infercnv::CreateInfercnvObject(raw_counts_matrix="./Organscale_Unsupervised_Consensus_AllCancer_Counts.tsv", 
43 |                                                gene_order_file="./siCNV_GeneOrderFile.tsv",
44 |                                                annotations_file="./Fig2_ManualClusters_for_ClusteredPlot_and_HMM.tsv",
45 |                                                delim="\t",
46 |                                                ref_group_names="Purest Benigns",
47 | 								                               chr_exclude = c("chrM"))
48 | ```
49 | 
50 | # Unsupervised Run - (Typically ran on cluster)
51 | 
52 | Now creating the object for the supervised clustered run. Note: this is typically run 
53 | 
54 | ```{r, eval = FALSE}
55 | AllCancer_clustered = infercnv::run(AllCancer_clustered,
56 |                                               cutoff=0.1,
57 |                                               out_dir="./Figure2_output/Figure2_step3/Outputs", 
58 | 					                                    num_threads = 20,
59 |                                               cluster_by_groups=TRUE, 
60 |                                               denoise=TRUE,
61 |                                               HMM=TRUE)
62 | ```
63 | 
64 | And here is the final output file infercnv.21_denoised.png (order rearranged in the manuscript figure 2).
65 | 
66 | ![infercnv.21_denoised.png output](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%202/Step3/infercnv.21_denoised.png)
67 | 
68 | 


--------------------------------------------------------------------------------
/FigureScripts/Figure 2/Step3/Fig2_Step3_ClusteredPlot_and_HMM.md:
--------------------------------------------------------------------------------
 1 | # Setup
 2 | 
 3 |     library(tidyverse)
 4 | 
 5 |     ## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
 6 | 
 7 |     ## v ggplot2 3.3.5     v purrr   0.3.4
 8 |     ## v tibble  3.1.1     v dplyr   1.0.6
 9 |     ## v tidyr   1.1.3     v stringr 1.4.0
10 |     ## v readr   2.0.1     v forcats 0.5.1
11 | 
12 |     ## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
13 |     ## x dplyr::filter() masks stats::filter()
14 |     ## x dplyr::lag()    masks stats::lag()
15 | 
16 |     library(infercnv)
17 |     library(Seurat)
18 | 
19 |     ## Registered S3 method overwritten by 'spatstat.geom':
20 |     ##   method     from
21 |     ##   print.boxx cli
22 | 
23 |     ## Attaching SeuratObject
24 | 
25 |     library(hdf5r)
26 | 
27 |     ## 
28 |     ## Attaching package: 'hdf5r'
29 | 
30 |     ## The following object is masked from 'package:purrr':
31 |     ## 
32 |     ##     flatten_df
33 | 
34 |     library(SpatialInferCNV)
35 | 
36 |     ## Warning: replacing previous import 'phylogram::as.phylo' by 'ape::as.phylo' when
37 |     ## loading 'SpatialInferCNV'
38 | 
39 | # Pre-processing clustered data
40 | 
41 | Importing previously downloaded Consensus\_PurestBenigns.csv (step 1),
42 | and the Fig2\_forclustering.csv file created in step 2. We use this to
43 | create an updated annotation file for infercnv::run.
44 | 
45 |     PurestBenigns_All <- read.csv("./Figure2_output/Patient 1/Consensus_PurestBenigns.csv")
46 |     PurestBenigns_All$Histology <- "Purest Benigns"
47 | 
48 |     CorrectedBenigns_Consensus_AllCancer_ManualNodes_selected <- read.csv("./Mendeley/ProcessedFilesForFigures/Figure2/Step3/Inputs/Fig2_forclustering.csv")
49 |     names(CorrectedBenigns_Consensus_AllCancer_ManualNodes_selected)[2] <- "Histology"
50 | 
51 |     Fig2a_ManualClusters <- rbind(CorrectedBenigns_Consensus_AllCancer_ManualNodes_selected, PurestBenigns_All)
52 | 
53 |     write.table(Fig2a_ManualClusters, "Fig2_ManualClusters_for_ClusteredPlot_and_HMM.tsv", 
54 |                 sep = "\t",
55 |                 quote = FALSE, 
56 |                 col.names = FALSE, 
57 |                 row.names = FALSE)
58 | 
59 | # Creating the inferCNV object (prior to run)
60 | 
61 | Now creating the object for the supervised clustered run.
62 | 
63 |     AllCancer_clustered <- infercnv::CreateInfercnvObject(raw_counts_matrix="./Organscale_Unsupervised_Consensus_AllCancer_Counts.tsv", 
64 |                                                    gene_order_file="./siCNV_GeneOrderFile.tsv",
65 |                                                    annotations_file="./Fig2_ManualClusters_for_ClusteredPlot_and_HMM.tsv",
66 |                                                    delim="\t",
67 |                                                    ref_group_names="Purest Benigns",
68 |                                                                    chr_exclude = c("chrM"))
69 | 
70 | # Unsupervised Run - (Typically ran on cluster)
71 | 
72 | Now creating the object for the supervised clustered run. Note: this is
73 | typically run
74 | 
75 |     AllCancer_clustered = infercnv::run(AllCancer_clustered,
76 |                                                   cutoff=0.1,
77 |                                                   out_dir="./Figure2_output/Figure2_step3/Outputs", 
78 |                                                             num_threads = 20,
79 |                                                   cluster_by_groups=TRUE, 
80 |                                                   denoise=TRUE,
81 |                                                   HMM=TRUE)
82 | 
83 | And here is the final output file infercnv.21\_denoised.png (order
84 | rearranged in the manuscript figure 2).
85 | 
86 | ![infercnv.21\_denoised.png
87 | output](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%202/Step3/infercnv.21_denoised.png)
88 | 


--------------------------------------------------------------------------------
/FigureScripts/Figure 2/Step3/infercnv.21_denoised.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 2/Step3/infercnv.21_denoised.png


--------------------------------------------------------------------------------
/FigureScripts/Figure 3/Consensus_H2_1_forclustering_phylo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 3/Consensus_H2_1_forclustering_phylo.png


--------------------------------------------------------------------------------
/FigureScripts/Figure 3/LoupeBrowser_Vis.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 3/LoupeBrowser_Vis.gif


--------------------------------------------------------------------------------
/FigureScripts/Figure 3/NodeSelectionDendrogram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 3/NodeSelectionDendrogram.png


--------------------------------------------------------------------------------
/FigureScripts/Figure 3/NodeSelectionFromDenoised.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 3/NodeSelectionFromDenoised.png


--------------------------------------------------------------------------------
/FigureScripts/Figure 3/infercnv.21_denoised_supervised.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 3/infercnv.21_denoised_supervised.png


--------------------------------------------------------------------------------
/FigureScripts/Figure 3/infercnv.21_denoised_unsupervised.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 3/infercnv.21_denoised_unsupervised.png


--------------------------------------------------------------------------------
/FigureScripts/Figure 4/Figure4a_LN/Figure4a_LNHeatmap.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Fig4a_LNHeatmap
  3 |   Data with InferCNV
  4 | author: "Andrew Erickson, Nuffield Department of Surgical Sciences, Unviersity of Oxford"
  5 | output:
  6 |   md_document:
  7 |     variant: markdown_github
  8 | ---
  9 | 
 10 | # Setup 
 11 | 
 12 | ```{r setup, message=FALSE}
 13 | library(tidyverse)
 14 | library(SpatialInferCNV)
 15 | ```
 16 | 
 17 | # Importing Count Data
 18 | 
 19 | Just to make things a little easier for the user, we start by creating an empty working directory so that all downloaded files are organized in one place. 
 20 | 
 21 | ```{r, eval = FALSE}
 22 | dir.create("siCNV_Figure4a")
 23 | setwd("siCNV_Figure4a")
 24 | ```
 25 | 
 26 | This code chunk imports the .h5 files a default processed output from [10x Genomics cell ranger pipeline documentation](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/output/molecule_info), and appends a section label to the barcode. 
 27 | 
 28 | We use the function ImportCountData(), which requires a section label, and a path to the corresponding .h5 file.
 29 | 
 30 | ```{r, eval = FALSE}
 31 | download.file("https://cf.10xgenomics.com/samples/spatial-exp/1.1.0/V1_Human_Lymph_Node/V1_Human_Lymph_Node_filtered_feature_bc_matrix.h5", "./V1_Human_Lymph_Node_filtered_feature_bc_matrix.h5", mode = "wb")
 32 | 
 33 | Lymph_ENSBMLID_Counts <- ImportCountData("LN10X", "./V1_Human_Lymph_Node_filtered_feature_bc_matrix.h5")
 34 | 
 35 | Lymph_Annotations <- Lymph_ENSBMLID_Counts %>%
 36 |                           select(Barcode)
 37 | Lymph_Annotations$Histology <- "Lymph"
 38 | ```
 39 | 
 40 | # QC, and Merging Count and Annotation Data
 41 | 
 42 | We then join the annotations with the count data, to select only spots that are to be analyzed. This step also includes a bare minimum QC thresholding step of allowing only Visium spots with 500 counts or more to be included in the analysis
 43 | 
 44 | ```{r, eval = FALSE}
 45 | Lymph_Joined_Counts <- MergingCountAndAnnotationData("LN10X",Lymph_Annotations, Lymph_ENSBMLID_Counts)
 46 | 
 47 | rm(Lymph_ENSBMLID_Counts)
 48 | ```
 49 | 
 50 | # Merging all count data into one object
 51 | 
 52 | We then join all of the selected count data together into a final dataframe, which is then output as a .tsv file (1 of 3 required inputs for inferCNV), as well as a revised annotation file (takes into account only those of interest that remain after the QC step) as another .tsv file (2 of 3 required inputs from inferCNV)
 53 | 
 54 | ```{r, eval = FALSE}
 55 | Counts_joined <- Lymph_Joined_Counts
 56 | 
 57 | Counts_joined <- Counts_joined %>% replace(., is.na(.), 0)
 58 | Counts_joined <- Counts_joined %>% column_to_rownames(., var = "Genes")
 59 | 
 60 | write.table(Counts_joined, "10xLymph_Counts.tsv", sep = "\t")
 61 | 
 62 | LymphFinalAnnotations <- FinalAnnotations(Lymph_Annotations, Counts_joined)
 63 | 
 64 | write.table(LymphFinalAnnotations, "10xLymph_Annotations.tsv", 
 65 |             sep = "\t",
 66 |             quote = FALSE, 
 67 |             col.names = FALSE, 
 68 |             row.names = FALSE)
 69 | ```
 70 | 
 71 | # Confirming that the files are formatted correctly to create an inferCNV object
 72 | 
 73 | This code then creates an inferCNV object from the 2 previously created files, as well as from a gene position file, which maps ENSMBLIDs to genomic loci. This file has been provided at /SpatialInferCNV_Dev/SpatialInferCNV/FigureScripts/Figure 4/Figure4a_LN, but if you'd like to build one youself, please see [the InferCNV documentation on their wiki](https://github.com/broadinstitute/inferCNV/wiki/instructions-create-genome-position-file). 
 74 | 
 75 | ```{r, eval = FALSE}
 76 | Lymph10X_unsupervised <- infercnv::CreateInfercnvObject(raw_counts_matrix="10xLymph_Counts.tsv", 
 77 |                                                gene_order_file="./siCNV_GeneOrderFile.tsv",
 78 |                                                annotations_file="10xLymph_Annotations.tsv",
 79 |                                                delim="\t",
 80 |                                                ref_group_names=NULL,
 81 |                                                chr_exclude = c("chrM"))
 82 | ```
 83 | 
 84 | # Running InferCNV (Unsupervised)
 85 | 
 86 | If the above steps were performed correctly, then there should be no errors from the previous step.
 87 | 
 88 | Warning! This step is quite computationally intensive. Consider using a high performance cluster for timely analyses instead of running it on a local computer.
 89 | 
 90 | ```{r, eval = FALSE}
 91 | Lymph10X_unsupervised = infercnv::run(Lymph10X_unsupervised,
 92 |                                               cutoff=0.1,
 93 |                                               out_dir="./Fig4_LN/Outputs/", 
 94 |                                               num_threads = 20,
 95 |                                               cluster_by_groups=FALSE, 
 96 |                                               denoise=TRUE,
 97 |                                               HMM=FALSE)
 98 | ```
 99 | 
100 | # Final output
101 | 
102 | The final desired output is the infercnv.21_denoised.png file.
103 | 
104 | ![Output Image](infercnv.21_denoised.png)
105 | 
106 | 


--------------------------------------------------------------------------------
/FigureScripts/Figure 4/Figure4a_LN/Figure4a_LNHeatmap.md:
--------------------------------------------------------------------------------
  1 | # Setup
  2 | 
  3 | ``` r
  4 | library(tidyverse)
  5 | library(SpatialInferCNV)
  6 | ```
  7 | 
  8 |     ## Warning: replacing previous import 'phylogram::as.phylo' by 'ape::as.phylo' when
  9 |     ## loading 'SpatialInferCNV'
 10 | 
 11 | # Importing Count Data
 12 | 
 13 | Just to make things a little easier for the user, we start by creating
 14 | an empty working directory so that all downloaded files are organized in
 15 | one place.
 16 | 
 17 | ``` r
 18 | dir.create("siCNV_Figure4a")
 19 | setwd("siCNV_Figure4a")
 20 | ```
 21 | 
 22 | This code chunk imports the .h5 files a default processed output from
 23 | [10x Genomics cell ranger pipeline
 24 | documentation](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/output/molecule_info),
 25 | and appends a section label to the barcode.
 26 | 
 27 | We use the function ImportCountData(), which requires a section label,
 28 | and a path to the corresponding .h5 file.
 29 | 
 30 | ``` r
 31 | download.file("https://cf.10xgenomics.com/samples/spatial-exp/1.1.0/V1_Human_Lymph_Node/V1_Human_Lymph_Node_filtered_feature_bc_matrix.h5", "./V1_Human_Lymph_Node_filtered_feature_bc_matrix.h5", mode = "wb")
 32 | 
 33 | Lymph_ENSBMLID_Counts <- ImportCountData("LN10X", "./V1_Human_Lymph_Node_filtered_feature_bc_matrix.h5")
 34 | 
 35 | Lymph_Annotations <- Lymph_ENSBMLID_Counts %>%
 36 |                           select(Barcode)
 37 | Lymph_Annotations$Histology <- "Lymph"
 38 | ```
 39 | 
 40 | # QC, and Merging Count and Annotation Data
 41 | 
 42 | We then join the annotations with the count data, to select only spots
 43 | that are to be analyzed. This step also includes a bare minimum QC
 44 | thresholding step of allowing only Visium spots with 500 counts or more
 45 | to be included in the analysis
 46 | 
 47 | ``` r
 48 | Lymph_Joined_Counts <- MergingCountAndAnnotationData("LN10X",Lymph_Annotations, Lymph_ENSBMLID_Counts)
 49 | 
 50 | rm(Lymph_ENSBMLID_Counts)
 51 | ```
 52 | 
 53 | # Merging all count data into one object
 54 | 
 55 | We then join all of the selected count data together into a final
 56 | dataframe, which is then output as a .tsv file (1 of 3 required inputs
 57 | for inferCNV), as well as a revised annotation file (takes into account
 58 | only those of interest that remain after the QC step) as another .tsv
 59 | file (2 of 3 required inputs from inferCNV)
 60 | 
 61 | ``` r
 62 | Counts_joined <- Lymph_Joined_Counts
 63 | 
 64 | Counts_joined <- Counts_joined %>% replace(., is.na(.), 0)
 65 | Counts_joined <- Counts_joined %>% column_to_rownames(., var = "Genes")
 66 | 
 67 | write.table(Counts_joined, "10xLymph_Counts.tsv", sep = "\t")
 68 | 
 69 | LymphFinalAnnotations <- FinalAnnotations(Lymph_Annotations, Counts_joined)
 70 | 
 71 | write.table(LymphFinalAnnotations, "10xLymph_Annotations.tsv", 
 72 |             sep = "\t",
 73 |             quote = FALSE, 
 74 |             col.names = FALSE, 
 75 |             row.names = FALSE)
 76 | ```
 77 | 
 78 | # Confirming that the files are formatted correctly to create an inferCNV object
 79 | 
 80 | This code then creates an inferCNV object from the 2 previously created
 81 | files, as well as from a gene position file, which maps ENSMBLIDs to
 82 | genomic loci. This file has been provided at
 83 | /SpatialInferCNV_Dev/SpatialInferCNV/FigureScripts/Figure 4/Figure4a_LN,
 84 | but if you’d like to build one youself, please see [the InferCNV
 85 | documentation on their
 86 | wiki](https://github.com/broadinstitute/inferCNV/wiki/instructions-create-genome-position-file).
 87 | 
 88 | ``` r
 89 | Lymph10X_unsupervised <- infercnv::CreateInfercnvObject(raw_counts_matrix="10xLymph_Counts.tsv", 
 90 |                                                gene_order_file="./siCNV_GeneOrderFile.tsv",
 91 |                                                annotations_file="10xLymph_Annotations.tsv",
 92 |                                                delim="\t",
 93 |                                                ref_group_names=NULL,
 94 |                                                chr_exclude = c("chrM"))
 95 | ```
 96 | 
 97 | # Running InferCNV (Unsupervised)
 98 | 
 99 | If the above steps were performed correctly, then there should be no
100 | errors from the previous step.
101 | 
102 | Warning! This step is quite computationally intensive. Consider using a
103 | high performance cluster for timely analyses instead of running it on a
104 | local computer.
105 | 
106 | ``` r
107 | Lymph10X_unsupervised = infercnv::run(Lymph10X_unsupervised,
108 |                                               cutoff=0.1,
109 |                                               out_dir="./Fig4_LN/Outputs/", 
110 |                                               num_threads = 20,
111 |                                               cluster_by_groups=FALSE, 
112 |                                               denoise=TRUE,
113 |                                               HMM=FALSE)
114 | ```
115 | 
116 | # Final output
117 | 
118 | The final desired output is the infercnv.21_denoised.png file.
119 | 
120 | ![Output Image](infercnv.21_denoised.png)
121 | 


--------------------------------------------------------------------------------
/FigureScripts/Figure 4/Figure4a_LN/infercnv.21_denoised.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 4/Figure4a_LN/infercnv.21_denoised.png


--------------------------------------------------------------------------------
/FigureScripts/Figure 4/Figure4c_SCC/Step1/Figure4c_Step1_P6_scRNAseq_Benigns.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Figure4c_Step1_P6_scRNAseq_Benigns"
  3 | author: "Andrew Erickson"
  4 | output: md_document
  5 | ---
  6 | # Figure 4c - Step 1 - Selectiong of Benign references (from paired scRNAseq data)
  7 | 
  8 | In order to run the SCC Visium data with siCNV, we need a reference set. We identifed a set of paired scRNA sequencing data, from benign skin cells (from the same exact patient), provided by the authors as listed below.
  9 | 
 10 | # Setup
 11 | 
 12 | Initializing packages.
 13 | 
 14 | ```{r, messages=FALSE}
 15 | library(tidyverse)
 16 | library(SpatialInferCNV)
 17 | ```
 18 | 
 19 | # Creating a working directory
 20 | 
 21 | ```{r, eval = FALSE}
 22 | dir.create("Figure4c_output")
 23 | setwd("Figure4c_output")
 24 | ```
 25 | 
 26 | # Downloading and formatting data, part 1
 27 | 
 28 | Warning, this step will take 10-60 min, even with a decent internet connection. 
 29 | 
 30 | ```{r, eval = FALSE}
 31 | counturl <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE144236&format=file&file=GSE144236%5FcSCC%5Fcounts%2Etxt%2Egz"
 32 | tmp <- tempfile()
 33 | download.file(counturl,tmp)
 34 | 
 35 | #Warning, this next step will take 10-60 minutes
 36 | merge10pts_counts <- read.delim(gzfile(tmp))
 37 | merge10pts_counts <- as.data.frame(t(merge10pts_counts))
 38 | 
 39 | SCC_P6_Benigns <- merge10pts_counts %>%
 40 |             filter(Patient == 6) %>%
 41 |             filter(`Tissue: 0=Normal, 1=Tumor` == 0)
 42 | 
 43 | SCC_P6_Benigns <- SCC_P6_Benigns %>%
 44 |       select(-Patient, -`Tissue: 0=Normal, 1=Tumor`)
 45 | 
 46 | save(SCC_P6_Benigns, file = "SCC_P6_Benigns.RData")
 47 | ```
 48 | 
 49 | # Downloading and formatting data, part 2
 50 | 
 51 | We then select only patient 6 data (corresponds to the specific patient in our analyses).
 52 | 
 53 | ```{r, eval = FALSE}
 54 | #Import SCC, Patient 6, scRNAseq benigns that we subset out above
 55 | load("./SCC_P6_Benigns.RData")
 56 | 
 57 | #Following code creates a barcode dataframe that we will need later
 58 | P6_Benigns_forannotations <- SCC_P6_Benigns %>% rownames_to_column()
 59 | 
 60 | Barcodes_P6 <- P6_Benigns_forannotations %>% 
 61 |                   select(rowname) %>%
 62 |                   mutate(Histology = "P6_Benigns")
 63 | names(Barcodes_P6)[1] <- "Barcodes"
 64 | 
 65 | #Next, we will prepare the gene order file required for infercnv:run
 66 | SCC_P6_Benigns <- as.data.frame(t(SCC_P6_Benigns))
 67 | SCC_P6_Benigns <- SCC_P6_Benigns %>% rownames_to_column()
 68 | names(SCC_P6_Benigns)[1] <- "Genes"
 69 | ```
 70 | 
 71 | # Creating GeneToENSMBL dataframe
 72 | 
 73 | The code below creates the GeneToENSMBL.csv file, but we have provided this on our GitHub:
 74 | 
 75 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/GeneToENSMBL.csv).
 76 | 
 77 | ```{r, eval = FALSE}
 78 | GeneToENSMBL <- read.csv("./GeneToENSMBL.csv")
 79 | 
 80 | #library(tidyverse)
 81 | #library(data.table)
 82 | #GeneToENSMBL <- fread('https://data.broadinstitute.org/Trinity/CTAT/cnv/gencode_v19_gen_pos.complete.txt')
 83 | #GeneToENSMBL <- mydat %>% separate(V1, c("left","ENSMBLID"), sep = "\\|")
 84 | 
 85 | #names(GeneToENSMBL)[1] <- "Genes"
 86 | #names(GeneToENSMBL)[3] <- "chr"
 87 | #names(GeneToENSMBL)[4] <- "start"
 88 | #names(GeneToENSMBL)[5] <- "stop"
 89 | 
 90 | #write.csv(GeneToENSMBL, "GeneToENSMBL.csv", row.names = FALSE)
 91 | ```
 92 | 
 93 | # Mapping Gene Names to counts/barcodes, and then outputting the requisite files for infercnv::run
 94 | 
 95 | We need to provide a gene ordering file to inferCNV, in the form of: Gene Name / Chromosome Number / Start Loci / Stop Loci. As the files provided by the authors are in "Gene Name", and our chromosomal / loci information are mapped to ENSMBLID's, we need to map the Gene Names to ENSMBLIDs. 
 96 | 
 97 | ```{r, eval = FALSE}
 98 | Counts_joined <- SCC_P6_Benigns
 99 | Counts_joined <- Counts_joined %>%
100 |                     separate(Genes, c("Genes", NA))
101 | 
102 | Counts_joined <- Counts_joined %>% select(Genes)
103 | 
104 | #Selecting Gene name, chromosome, start and stop locations
105 | GenesForMapping <- GeneToENSMBL %>% select(Genes, chr, start, stop)
106 | GenesInSample <- Counts_joined %>% select(Genes)
107 | 
108 | #Next, reordering the entries from Chromsomes 1-22, followed by X and Y
109 | GenesInSamplevsOrdering <- inner_join(GenesInSample, GenesForMapping, by = c("Genes" = "Genes"))
110 |   dedup_GenesInSamplevsOrdering <- GenesInSamplevsOrdering[!duplicated(GenesInSamplevsOrdering$Genes), ]
111 |   dedup_GenesInSamplevsOrdering$chromorder <- gsub("chr","",dedup_GenesInSamplevsOrdering$chr)
112 |   dedup_GenesInSamplevsOrdering$chromorder <- as.numeric(ifelse(dedup_GenesInSamplevsOrdering$chromorder == "X", 23,
113 |                                                          ifelse(dedup_GenesInSamplevsOrdering$chromorder == "Y", 24,      dedup_GenesInSamplevsOrdering$chromorder)))
114 |   dedup_GenesInSamplevsOrdering <- dedup_GenesInSamplevsOrdering[order(dedup_GenesInSamplevsOrdering$chromorder),]
115 |   dedup_GenesInSamplevsOrdering <- dedup_GenesInSamplevsOrdering[,1:4]  
116 | 
117 | MappingFileForInferCNV <- dedup_GenesInSamplevsOrdering
118 | 
119 | #Selecting only genes that have location data
120 | CountmappedGenes <- select(MappingFileForInferCNV, Genes)
121 | Counts_joined <- SCC_P6_Benigns
122 | Counts_joined <- Counts_joined %>%
123 |                     separate(Genes, c("Genes", NA))
124 | 
125 | #Selecting only genes that have location and count data
126 | Mapped_Counts_joined <- left_join(CountmappedGenes, Counts_joined)
127 | #Removing duplicates
128 | Mapped_Counts_joined <- Mapped_Counts_joined[!duplicated(Mapped_Counts_joined$Genes), ]
129 | ```
130 | 
131 | # Outputting all files for inferCNV::run
132 | 
133 | ```{r, eval = FALSE}
134 | #Write GenesInSamplevsOrdering
135 | write.table(Mapped_Counts_joined, 
136 |             "SCC_P6_Bg_Selected_Mapped_Counts.tsv",
137 |             row.names = FALSE,
138 |             sep = "\t")
139 | 
140 | write.table(Barcodes_P6, 
141 |             "SCC_P6_Bg_Selected_CorrectedBarcodes.tsv", 
142 |             quote = FALSE, 
143 |             col.names = FALSE, 
144 |             row.names = FALSE, 
145 |             sep = "\t")
146 | 
147 | write.table(MappingFileForInferCNV, 
148 |             "SCC_P6_Bg_MappingFileForInferCNV.tsv", 
149 |             quote = FALSE, 
150 |             col.names = FALSE, 
151 |             row.names = FALSE, 
152 |             sep = "\t")
153 | ```
154 | 
155 | # Creating the inferCNV object (prior to run)
156 | 
157 | Creating the object for infercnv::run.
158 | 
159 | ```{r, eval = FALSE}
160 | P6_Bg_infCNV <- infercnv::CreateInfercnvObject(raw_counts_matrix="./SCC_P6_Bg_Selected_Mapped_Counts.tsv", 
161 |                                                gene_order_file="./SCC_P6_Bg_MappingFileForInferCNV.tsv",
162 |                                                annotations_file="./SCC_P6_Bg_Selected_CorrectedBarcodes.tsv",
163 |                                                delim="\t",
164 |                                                ref_group_names=NULL,
165 |                                                chr_exclude = c("chrM"))
166 | 
167 | 
168 | ```
169 | 
170 | # Unsupervised Run - (Typically ran on cluster)
171 | 
172 | Running infercnv.
173 | 
174 | ```{r, eval = FALSE}
175 | P6_Bg_infCNV = infercnv::run(P6_Bg_infCNV,
176 |                                               cutoff=0.1,
177 |                                               out_dir="./Figure4c_Step1/Outputs", 
178 |                                               num_threads = 20,
179 |                                               cluster_by_groups=FALSE, 
180 |                                               denoise=TRUE,
181 |                                               HMM=FALSE)
182 | ```
183 | 
184 | InferCNV will output many files. We are primarily interested in the final "infercnv.21_denoised.png" file, as well as the text file associated with the dendrogram associated with the hierarchical clustering on the left hand side of the image (infercnv.21_denoised.observations_dendrogram.txt).
185 | 
186 | ![infercnv.21_denoised.png](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/Step1/infercnv.21_denoised.png)
187 | 
188 | # Importing dendrogram
189 | 
190 | Next, we want to import this dendrogram file, this was created just above.
191 | 
192 | ```{r, eval = FALSE}
193 | library(ape)
194 | library(phylogram)
195 | SCC_P6_benigns_for_clustering <- read.dendrogram(file = "./Figure4c_Step1/Outputs/infercnv.21_denoised.observations_dendrogram.txt")
196 | 
197 | SCC_P6_benigns_for_clustering_phylo <- as.phylo(SCC_P6_benigns_for_clustering)
198 | ```
199 | 
200 | # Visualizing dendrogram node numbers
201 | 
202 | Next, we want to visualize the numbers associated with the nodes of interest (clones). We output a large image file that allows us to manually inspect which nodes (cells) should be selected the purest benign references. Here, we want the cells with the least signal possible.
203 | 
204 | ```{r, eval = FALSE}
205 | my.subtrees = subtrees(SCC_P6_benigns_for_clustering_phylo)  # subtrees() to subset
206 | 
207 | png("SCC_P6_benigns_for_clustering_phylo.png",width=10000,height=2500, res = 300)
208 | plot(SCC_P6_benigns_for_clustering_phylo,show.tip.label = FALSE)
209 | nodelabels(text=1:SCC_P6_benigns_for_clustering_phylo$Nnode,node=1:SCC_P6_benigns_for_clustering_phylo$Nnode+Ntip(SCC_P6_benigns_for_clustering_phylo))
210 | dev.off()
211 | ```
212 | 
213 | We provide the image output here: 
214 | 
215 | ![SCC_P6_benigns_for_clustering_phylo.png](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/Step1/SCC_P6_benigns_for_clustering_phylo.png)
216 | 
217 | # Purest Benign selection 
218 | 
219 | Next, view the output .png file, which provides a (albeit cluttered) labeling of the dendrogram tree nodes. Manually select individual nodes that correspond with a distinct subclonal grouping or signal, that will be taken forward for re-clustering. This can be iteratively tweaked with the next step + spatial visualization til optimal. We provide more details [here](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/BenignRefs_ForFigs2and3/BenignRefs.md), and provide the finalized selected SCC purest benign nodes here.
220 | 
221 | 
222 | ```{r, eval = FALSE}
223 | #A - 4034
224 | #B - 3605   
225 | #B - 3360  
226 | #B - 2316
227 | #B - 724
228 | #C - 2
229 | ```
230 | 
231 | # Selecting purest benigns 
232 | 
233 | Next, after identifying the numerical nodes that correspond to dendrogram branches that correspond with a given set of molecular signals, we then manually select these nodes in R, apply a label, then join them all together for use in the next step.
234 | 
235 | ```{r, eval = FALSE}
236 | library(SpatialInferCNV)
237 | library(tidyverse)
238 | 
239 | Node4034 <- SelectingSubTreeData(my.subtrees, 4034)
240 | Node2 <- SelectingSubTreeData(my.subtrees, 2)
241 | 
242 | Merged <- rbind(Node4034, Node2)
243 | table(Merged$Node)
244 | 
245 | Merged$Node <- ifelse(Merged$Node == "Node_4034", "PurestBenigns", "OtherBenigns")
246 | names(Merged)[2] <- "Histology"
247 | 
248 | BenignRefs <- filter(Merged, Histology == "PurestBenigns") %>%
249 |                                             select(Barcode, Histology)
250 | 
251 | write.csv(BenignRefs, "Figure4c_SCCP6_BenignReferenceSet.csv", row.names = FALSE)
252 | 
253 | ```
254 | 


--------------------------------------------------------------------------------
/FigureScripts/Figure 4/Figure4c_SCC/Step1/Figure4c_Step1_P6_scRNAseq_Benigns.md:
--------------------------------------------------------------------------------
  1 | # Figure 4c - Step 1 - Selectiong of Benign references (from paired scRNAseq data)
  2 | 
  3 | In order to run the SCC Visium data with siCNV, we need a reference set.
  4 | We identifed a set of paired scRNA sequencing data, from benign skin
  5 | cells (from the same exact patient), provided by the authors as listed
  6 | below.
  7 | 
  8 | # Setup
  9 | 
 10 | Initializing packages.
 11 | 
 12 |     library(tidyverse)
 13 | 
 14 |     ## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
 15 | 
 16 |     ## v ggplot2 3.3.5     v purrr   0.3.4
 17 |     ## v tibble  3.1.1     v dplyr   1.0.6
 18 |     ## v tidyr   1.1.3     v stringr 1.4.0
 19 |     ## v readr   2.0.1     v forcats 0.5.1
 20 | 
 21 |     ## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
 22 |     ## x dplyr::filter() masks stats::filter()
 23 |     ## x dplyr::lag()    masks stats::lag()
 24 | 
 25 |     library(SpatialInferCNV)
 26 | 
 27 |     ## Registered S3 method overwritten by 'spatstat.geom':
 28 |     ##   method     from
 29 |     ##   print.boxx cli
 30 | 
 31 |     ## Warning: replacing previous import 'phylogram::as.phylo' by 'ape::as.phylo' when
 32 |     ## loading 'SpatialInferCNV'
 33 | 
 34 | # Creating a working directory
 35 | 
 36 |     dir.create("Figure4c_output")
 37 |     setwd("Figure4c_output")
 38 | 
 39 | # Downloading and formatting data, part 1
 40 | 
 41 | Warning, this step will take 10-60 min, even with a decent internet
 42 | connection.
 43 | 
 44 |     counturl <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE144236&format=file&file=GSE144236%5FcSCC%5Fcounts%2Etxt%2Egz"
 45 |     tmp <- tempfile()
 46 |     download.file(counturl,tmp)
 47 | 
 48 |     #Warning, this next step will take 10-60 minutes
 49 |     merge10pts_counts <- read.delim(gzfile(tmp))
 50 |     merge10pts_counts <- as.data.frame(t(merge10pts_counts))
 51 | 
 52 |     SCC_P6_Benigns <- merge10pts_counts %>%
 53 |                 filter(Patient == 6) %>%
 54 |                 filter(`Tissue: 0=Normal, 1=Tumor` == 0)
 55 | 
 56 |     SCC_P6_Benigns <- SCC_P6_Benigns %>%
 57 |           select(-Patient, -`Tissue: 0=Normal, 1=Tumor`)
 58 | 
 59 |     save(SCC_P6_Benigns, file = "SCC_P6_Benigns.RData")
 60 | 
 61 | # Downloading and formatting data, part 2
 62 | 
 63 | We then select only patient 6 data (corresponds to the specific patient
 64 | in our analyses).
 65 | 
 66 |     #Import SCC, Patient 6, scRNAseq benigns that we subset out above
 67 |     load("./SCC_P6_Benigns.RData")
 68 | 
 69 |     #Following code creates a barcode dataframe that we will need later
 70 |     P6_Benigns_forannotations <- SCC_P6_Benigns %>% rownames_to_column()
 71 | 
 72 |     Barcodes_P6 <- P6_Benigns_forannotations %>% 
 73 |                       select(rowname) %>%
 74 |                       mutate(Histology = "P6_Benigns")
 75 |     names(Barcodes_P6)[1] <- "Barcodes"
 76 | 
 77 |     #Next, we will prepare the gene order file required for infercnv:run
 78 |     SCC_P6_Benigns <- as.data.frame(t(SCC_P6_Benigns))
 79 |     SCC_P6_Benigns <- SCC_P6_Benigns %>% rownames_to_column()
 80 |     names(SCC_P6_Benigns)[1] <- "Genes"
 81 | 
 82 | # Creating GeneToENSMBL dataframe
 83 | 
 84 | The code below creates the GeneToENSMBL.csv file, but we have provided
 85 | this on our GitHub:
 86 | 
 87 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/GeneToENSMBL.csv).
 88 | 
 89 |     GeneToENSMBL <- read.csv("./GeneToENSMBL.csv")
 90 | 
 91 |     #library(tidyverse)
 92 |     #library(data.table)
 93 |     #GeneToENSMBL <- fread('https://data.broadinstitute.org/Trinity/CTAT/cnv/gencode_v19_gen_pos.complete.txt')
 94 |     #GeneToENSMBL <- mydat %>% separate(V1, c("left","ENSMBLID"), sep = "\\|")
 95 | 
 96 |     #names(GeneToENSMBL)[1] <- "Genes"
 97 |     #names(GeneToENSMBL)[3] <- "chr"
 98 |     #names(GeneToENSMBL)[4] <- "start"
 99 |     #names(GeneToENSMBL)[5] <- "stop"
100 | 
101 |     #write.csv(GeneToENSMBL, "GeneToENSMBL.csv", row.names = FALSE)
102 | 
103 | # Mapping Gene Names to counts/barcodes, and then outputting the requisite files for infercnv::run
104 | 
105 | We need to provide a gene ordering file to inferCNV, in the form of:
106 | Gene Name / Chromosome Number / Start Loci / Stop Loci. As the files
107 | provided by the authors are in “Gene Name”, and our chromosomal / loci
108 | information are mapped to ENSMBLID’s, we need to map the Gene Names to
109 | ENSMBLIDs.
110 | 
111 |     Counts_joined <- SCC_P6_Benigns
112 |     Counts_joined <- Counts_joined %>%
113 |                         separate(Genes, c("Genes", NA))
114 | 
115 |     Counts_joined <- Counts_joined %>% select(Genes)
116 | 
117 |     #Selecting Gene name, chromosome, start and stop locations
118 |     GenesForMapping <- GeneToENSMBL %>% select(Genes, chr, start, stop)
119 |     GenesInSample <- Counts_joined %>% select(Genes)
120 | 
121 |     #Next, reordering the entries from Chromsomes 1-22, followed by X and Y
122 |     GenesInSamplevsOrdering <- inner_join(GenesInSample, GenesForMapping, by = c("Genes" = "Genes"))
123 |       dedup_GenesInSamplevsOrdering <- GenesInSamplevsOrdering[!duplicated(GenesInSamplevsOrdering$Genes), ]
124 |       dedup_GenesInSamplevsOrdering$chromorder <- gsub("chr","",dedup_GenesInSamplevsOrdering$chr)
125 |       dedup_GenesInSamplevsOrdering$chromorder <- as.numeric(ifelse(dedup_GenesInSamplevsOrdering$chromorder == "X", 23,
126 |                                                              ifelse(dedup_GenesInSamplevsOrdering$chromorder == "Y", 24,      dedup_GenesInSamplevsOrdering$chromorder)))
127 |       dedup_GenesInSamplevsOrdering <- dedup_GenesInSamplevsOrdering[order(dedup_GenesInSamplevsOrdering$chromorder),]
128 |       dedup_GenesInSamplevsOrdering <- dedup_GenesInSamplevsOrdering[,1:4]  
129 | 
130 |     MappingFileForInferCNV <- dedup_GenesInSamplevsOrdering
131 | 
132 |     #Selecting only genes that have location data
133 |     CountmappedGenes <- select(MappingFileForInferCNV, Genes)
134 |     Counts_joined <- SCC_P6_Benigns
135 |     Counts_joined <- Counts_joined %>%
136 |                         separate(Genes, c("Genes", NA))
137 | 
138 |     #Selecting only genes that have location and count data
139 |     Mapped_Counts_joined <- left_join(CountmappedGenes, Counts_joined)
140 |     #Removing duplicates
141 |     Mapped_Counts_joined <- Mapped_Counts_joined[!duplicated(Mapped_Counts_joined$Genes), ]
142 | 
143 | # Outputting all files for inferCNV::run
144 | 
145 |     #Write GenesInSamplevsOrdering
146 |     write.table(Mapped_Counts_joined, 
147 |                 "SCC_P6_Bg_Selected_Mapped_Counts.tsv",
148 |                 row.names = FALSE,
149 |                 sep = "\t")
150 | 
151 |     write.table(Barcodes_P6, 
152 |                 "SCC_P6_Bg_Selected_CorrectedBarcodes.tsv", 
153 |                 quote = FALSE, 
154 |                 col.names = FALSE, 
155 |                 row.names = FALSE, 
156 |                 sep = "\t")
157 | 
158 |     write.table(MappingFileForInferCNV, 
159 |                 "SCC_P6_Bg_MappingFileForInferCNV.tsv", 
160 |                 quote = FALSE, 
161 |                 col.names = FALSE, 
162 |                 row.names = FALSE, 
163 |                 sep = "\t")
164 | 
165 | # Creating the inferCNV object (prior to run)
166 | 
167 | Creating the object for infercnv::run.
168 | 
169 |     P6_Bg_infCNV <- infercnv::CreateInfercnvObject(raw_counts_matrix="./SCC_P6_Bg_Selected_Mapped_Counts.tsv", 
170 |                                                    gene_order_file="./SCC_P6_Bg_MappingFileForInferCNV.tsv",
171 |                                                    annotations_file="./SCC_P6_Bg_Selected_CorrectedBarcodes.tsv",
172 |                                                    delim="\t",
173 |                                                    ref_group_names=NULL,
174 |                                                    chr_exclude = c("chrM"))
175 | 
176 | # Unsupervised Run - (Typically ran on cluster)
177 | 
178 | Running infercnv.
179 | 
180 |     P6_Bg_infCNV = infercnv::run(P6_Bg_infCNV,
181 |                                                   cutoff=0.1,
182 |                                                   out_dir="./Figure4c_Step1/Outputs", 
183 |                                                   num_threads = 20,
184 |                                                   cluster_by_groups=FALSE, 
185 |                                                   denoise=TRUE,
186 |                                                   HMM=FALSE)
187 | 
188 | InferCNV will output many files. We are primarily interested in the
189 | final “infercnv.21\_denoised.png” file, as well as the text file
190 | associated with the dendrogram associated with the hierarchical
191 | clustering on the left hand side of the image
192 | (infercnv.21\_denoised.observations\_dendrogram.txt).
193 | 
194 | ![infercnv.21\_denoised.png](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/Step1/infercnv.21_denoised.png)
195 | 
196 | # Importing dendrogram
197 | 
198 | Next, we want to import this dendrogram file, this was created just
199 | above.
200 | 
201 |     library(ape)
202 |     library(phylogram)
203 |     SCC_P6_benigns_for_clustering <- read.dendrogram(file = "./Figure4c_Step1/Outputs/infercnv.21_denoised.observations_dendrogram.txt")
204 | 
205 |     SCC_P6_benigns_for_clustering_phylo <- as.phylo(SCC_P6_benigns_for_clustering)
206 | 
207 | # Visualizing dendrogram node numbers
208 | 
209 | Next, we want to visualize the numbers associated with the nodes of
210 | interest (clones). We output a large image file that allows us to
211 | manually inspect which nodes (cells) should be selected the purest
212 | benign references. Here, we want the cells with the least signal
213 | possible.
214 | 
215 |     my.subtrees = subtrees(SCC_P6_benigns_for_clustering_phylo)  # subtrees() to subset
216 | 
217 |     png("SCC_P6_benigns_for_clustering_phylo.png",width=10000,height=2500, res = 300)
218 |     plot(SCC_P6_benigns_for_clustering_phylo,show.tip.label = FALSE)
219 |     nodelabels(text=1:SCC_P6_benigns_for_clustering_phylo$Nnode,node=1:SCC_P6_benigns_for_clustering_phylo$Nnode+Ntip(SCC_P6_benigns_for_clustering_phylo))
220 |     dev.off()
221 | 
222 | We provide the image output here:
223 | 
224 | ![SCC\_P6\_benigns\_for\_clustering\_phylo.png](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/Step1/SCC_P6_benigns_for_clustering_phylo.png)
225 | 
226 | # Purest Benign selection
227 | 
228 | Next, view the output .png file, which provides a (albeit cluttered)
229 | labeling of the dendrogram tree nodes. Manually select individual nodes
230 | that correspond with a distinct subclonal grouping or signal, that will
231 | be taken forward for re-clustering. This can be iteratively tweaked with
232 | the next step + spatial visualization til optimal. We provide more
233 | details
234 | [here](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/BenignRefs_ForFigs2and3/BenignRefs.md),
235 | and provide the finalized selected SCC purest benign nodes here.
236 | 
237 |     #A - 4034
238 |     #B - 3605   
239 |     #B - 3360  
240 |     #B - 2316
241 |     #B - 724
242 |     #C - 2
243 | 
244 | # Selecting purest benigns
245 | 
246 | Next, after identifying the numerical nodes that correspond to
247 | dendrogram branches that correspond with a given set of molecular
248 | signals, we then manually select these nodes in R, apply a label, then
249 | join them all together for use in the next step.
250 | 
251 |     library(SpatialInferCNV)
252 |     library(tidyverse)
253 | 
254 |     Node4034 <- SelectingSubTreeData(my.subtrees, 4034)
255 |     Node2 <- SelectingSubTreeData(my.subtrees, 2)
256 | 
257 |     Merged <- rbind(Node4034, Node2)
258 |     table(Merged$Node)
259 | 
260 |     Merged$Node <- ifelse(Merged$Node == "Node_4034", "PurestBenigns", "OtherBenigns")
261 |     names(Merged)[2] <- "Histology"
262 | 
263 |     BenignRefs <- filter(Merged, Histology == "PurestBenigns") %>%
264 |                                                 select(Barcode, Histology)
265 | 
266 |     write.csv(BenignRefs, "Figure4c_SCCP6_BenignReferenceSet.csv", row.names = FALSE)
267 | 


--------------------------------------------------------------------------------
/FigureScripts/Figure 4/Figure4c_SCC/Step1/SCC_P6_benigns_for_clustering_phylo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 4/Figure4c_SCC/Step1/SCC_P6_benigns_for_clustering_phylo.png


--------------------------------------------------------------------------------
/FigureScripts/Figure 4/Figure4c_SCC/Step1/infercnv.21_denoised.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 4/Figure4c_SCC/Step1/infercnv.21_denoised.png


--------------------------------------------------------------------------------
/FigureScripts/Figure 4/Figure4c_SCC/Step2/Figure4c_Step2_SCC_P6_siCNV_unsupervised.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "SCC_benigns"
  3 | author: "Andrew Erickson"
  4 | output: md_document
  5 | ---
  6 | 
  7 | Having now selected a benign reference set in Step 1, we now use these data to perform unsupervised analysis of the SCC Visium section.
  8 | 
  9 | # Setup
 10 | 
 11 | Initializing libraries
 12 | 
 13 | ```{r, messages=FALSE}
 14 | library(tidyverse)
 15 | library(SpatialInferCNV)
 16 | ```
 17 | 
 18 | # Importing Data for Benigns
 19 | 
 20 | We already imported the data in the previous step, lets reimport it again and filter only for the selected/filtered benign reference set.
 21 | 
 22 | ```{r, eval = FALSE}
 23 | #Import SCC, Patient 6, scRNAseq benigns that we subset out in step 1
 24 | load("./Figure4c_output/SCC_P6_Benigns.RData")
 25 | 
 26 | head(SCC_P6_Benigns)
 27 | 
 28 | SCC_P6_BenignReferences_Barcodes <- read.csv("./Figure4c_SCCP6_BenignReferenceSet.csv")
 29 | names(SCC_P6_BenignReferences_Barcodes)[1] <- "Barcodes"
 30 | SCC_P6_BenignReferences_Barcodes$Histology <- "PurestBenign_SCCPatient6"
 31 | SCC_P6_Benigns <- SCC_P6_Benigns %>% rownames_to_column()
 32 | names(SCC_P6_Benigns)[1] <- "Barcodes"
 33 | 
 34 | SCC_P6_BenignReferences_Counts <- left_join(SCC_P6_BenignReferences_Barcodes, SCC_P6_Benigns, by = c("Barcodes" = "Barcodes"))
 35 | rm(SCC_P6_Benigns)
 36 | SCC_P6_BenignReferences_Counts <- SCC_P6_BenignReferences_Counts %>% select(-Histology)
 37 | SCC_P6_BenignReferences_Counts <- SCC_P6_BenignReferences_Counts %>% column_to_rownames(var = "Barcodes")
 38 | SCC_P6_BenignReferences_Counts <- as.data.frame(t(SCC_P6_BenignReferences_Counts))
 39 | SCC_P6_BenignReferences_Counts <- SCC_P6_BenignReferences_Counts %>% rownames_to_column()
 40 | names(SCC_P6_BenignReferences_Counts)[1] <- "Genes"
 41 | 
 42 | saveRDS(SCC_P6_BenignReferences_Counts, file = "SCC_P6_BenignReferences_Counts.rds")
 43 | saveRDS(SCC_P6_BenignReferences_Barcodes, file = "SCC_P6_BenignReferences_Barcodes.rds")
 44 | 
 45 | ```
 46 | 
 47 | # Importing Data for Visium Data
 48 | 
 49 | Download the files [from Mendeley](https://data.mendeley.com/v1/datasets/svw96g68dv/draft?a=3f263217-2bd3-4a3c-8125-8c517c3a9e29): SCC_patient/.
 50 | 
 51 | Here, we are filtering for the section used in the Figure 4d analysis from a parent seurat object. We output both the counts and the barcodes from this Visium section. We manually apply a QC threshold to only include Visium spots with at least 500 counts.
 52 | 
 53 | ```{r, eval = FALSE}
 54 | t28 <- readRDS("./t28.Rds")
 55 | 
 56 | SCC_P6_Visium_Counts <- as.data.frame(t28@assays$Spatial@counts)
 57 | rm(t28)
 58 | 
 59 | head(SCC_P6_Visium_Counts)
 60 | 
 61 | SCC_P6_Visium_Counts <- as.data.frame(t(SCC_P6_Visium_Counts))
 62 | SCC_P6_Visium_Counts <- rownames_to_column(SCC_P6_Visium_Counts)
 63 | SCC_P6_Visium_Counts$section <- str_sub(SCC_P6_Visium_Counts$rowname, start= -1)
 64 | table(SCC_P6_Visium_Counts$section)
 65 | 
 66 | SCC_P6_Visium_Counts$barcode <- str_sub(SCC_P6_Visium_Counts$rowname, start = 1L, end = -3)
 67 | 
 68 | SCC_P6_Visium_Counts <- SCC_P6_Visium_Counts %>% filter(section == 1)
 69 | 
 70 | SCC_P6_Visium_Annotations <- SCC_P6_Visium_Counts %>% select(barcode, section)
 71 | SCC_P6_Visium_Annotations$section <- "SCC_P6_Visium"
 72 | names(SCC_P6_Visium_Annotations)[1] <- "Barcodes"
 73 | names(SCC_P6_Visium_Annotations)[2] <- "Histology"
 74 | 
 75 | saveRDS(SCC_P6_Visium_Annotations, file = "SCC_P6_Visium_Annotations.rds")
 76 | 
 77 | SCC_P6_Visium_Counts <- column_to_rownames(SCC_P6_Visium_Counts, var = "barcode")
 78 | SCC_P6_Visium_Counts <- SCC_P6_Visium_Counts %>% select(-rowname, -section)
 79 | 
 80 | SCC_P6_Visium_Counts$Total <- rowSums(SCC_P6_Visium_Counts)
 81 | SCC_P6_Visium_Counts <- SCC_P6_Visium_Counts %>% filter(Total >= 500)
 82 | SCC_P6_Visium_Counts <- select(SCC_P6_Visium_Counts, -Total)
 83 | SCC_P6_Visium_Counts <- as.data.frame(t(SCC_P6_Visium_Counts))
 84 | SCC_P6_Visium_Counts <- SCC_P6_Visium_Counts[,colSums(is.na(SCC_P6_Visium_Counts))<nrow(SCC_P6_Visium_Counts)]
 85 | SCC_P6_Visium_Counts <- tibble::rownames_to_column(SCC_P6_Visium_Counts, "Genes")
 86 | 
 87 | saveRDS(SCC_P6_Visium_Counts, file = "SCC_P6_Visium_Counts.rds")
 88 | ```
 89 | 
 90 | # Importing Data Visium and Benign Data
 91 | 
 92 | Next, we join the benign reference and Visium barcodes.
 93 | 
 94 | ```{r, eval = FALSE}
 95 | SCC_P6_Visium_Annotations <- readRDS("./SCC_P6_Visium_Annotations.rds")
 96 | SCC_P6_BenignReferences_Barcodes <- readRDS("./SCC_P6_BenignReferences_Barcodes.rds")
 97 | 
 98 | Joined_Barcodes <- rbind(SCC_P6_Visium_Annotations, SCC_P6_BenignReferences_Barcodes)
 99 | saveRDS(Joined_Barcodes, file = "SCC_P6_BenignRef_and_Visium_Annotations.rds")
100 | ```
101 | 
102 | # Importing Data Visium and Benign Data
103 | 
104 | Next, we join the benign reference and visium count data.
105 | 
106 | ```{r, eval = FALSE}
107 | SCC_P6_BenignReferences_Counts <- readRDS("./SCC_P6_BenignReferences_Counts.rds")
108 | SCC_P6_Visium_Counts <- readRDS("./SCC_P6_Visium_Counts.rds")
109 | 
110 | head(SCC_P6_BenignReferences_Counts)
111 | head(SCC_P6_Visium_Counts)
112 | 
113 | SCC_P6_BenignRef_and_Visium_Counts <- SCC_P6_BenignReferences_Counts %>% full_join(SCC_P6_Visium_Counts, by = "Genes")
114 | SCC_P6_BenignRef_and_Visium_Counts <- SCC_P6_BenignRef_and_Visium_Counts %>% replace(., is.na(.), 0)
115 | 
116 | saveRDS(SCC_P6_BenignRef_and_Visium_Counts, file = "SCC_P6_BenignRef_and_Visium_Counts.rds")
117 | ```
118 | 
119 | # Creating GeneToENSMBL dataframe
120 | 
121 | The code below creates the GeneToENSMBL.csv file, but we have provided this on our GitHub:
122 | 
123 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/GeneToENSMBL.csv).
124 | 
125 | ```{r, eval = FALSE}
126 | GeneToENSMBL <- read.csv("./Mendeley/ProcessedFilesForFigures/Figure4/GeneToENSMBL.csv")
127 | 
128 | #library(tidyverse)
129 | #library(data.table)
130 | #GeneToENSMBL <- fread('https://data.broadinstitute.org/Trinity/CTAT/cnv/gencode_v19_gen_pos.complete.txt')
131 | #GeneToENSMBL <- mydat %>% separate(V1, c("left","ENSMBLID"), sep = "\\|")
132 | 
133 | #names(GeneToENSMBL)[1] <- "Genes"
134 | #names(GeneToENSMBL)[3] <- "chr"
135 | #names(GeneToENSMBL)[4] <- "start"
136 | #names(GeneToENSMBL)[5] <- "stop"
137 | 
138 | #write.csv(GeneToENSMBL, "GeneToENSMBL.csv", row.names = FALSE)
139 | ```
140 | 
141 | # Mapping Gene Names to counts/barcodes, and then outputting the requisite files for infercnv::run, part 1
142 | 
143 | We need to provide a gene ordering file to inferCNV, in the form of: Gene Name / Chromosome Number / Start Loci / Stop Loci. As the files provided by the authors are in "Gene Name", and our chromosomal / loci information are mapped to ENSMBLID's, we need to map the Gene Names to ENSMBLIDs. 
144 | 
145 | ```{r, eval = FALSE}
146 | #removing "."
147 | Counts_joined <- SCC_P6_BenignRef_and_Visium_Counts
148 | Counts_joined <- Counts_joined %>%
149 |                     separate(Genes, c("Genes", NA))
150 | 
151 | Counts_joined <- Counts_joined %>% select(Genes)
152 | 
153 | GenesForMapping <- GeneToENSMBL %>% select(Genes, chr, start, stop)
154 | GenesInSample <- Counts_joined %>% select(Genes)
155 | GenesInSamplevsOrdering <- inner_join(GenesInSample, GenesForMapping, by = c("Genes" = "Genes"))
156 |   dedup_GenesInSamplevsOrdering <- GenesInSamplevsOrdering[!duplicated(GenesInSamplevsOrdering$Genes), ]
157 |   dedup_GenesInSamplevsOrdering$chromorder <- gsub("chr","",dedup_GenesInSamplevsOrdering$chr)
158 |   dedup_GenesInSamplevsOrdering$chromorder <- as.numeric(ifelse(dedup_GenesInSamplevsOrdering$chromorder == "X", 23,
159 |                                                          ifelse(dedup_GenesInSamplevsOrdering$chromorder == "Y", 24,      dedup_GenesInSamplevsOrdering$chromorder)))
160 |   dedup_GenesInSamplevsOrdering <- dedup_GenesInSamplevsOrdering[order(dedup_GenesInSamplevsOrdering$chromorder),]
161 |   dedup_GenesInSamplevsOrdering <- dedup_GenesInSamplevsOrdering[,1:4]  
162 |   
163 | MappingFileForInferCNV <- dedup_GenesInSamplevsOrdering
164 | 
165 | saveRDS(MappingFileForInferCNV, file = "MappingFileForSCC_P6_Visium_and_Bg.rds")  
166 | ```
167 | 
168 | # Outputting the requisite files for infercnv::run, part 2
169 | 
170 | We then filter for only mapped genes, from counts, and then output the three requisite files for infercnv::run.
171 | 
172 | ```{r, eval = FALSE}
173 | MappingFileForInferCNV <- readRDS("MappingFileForSCC_P6_Visium_and_Bg.rds")
174 | SCC_P6_BenignRef_and_Visium_Counts <- readRDS("SCC_P6_BenignRef_and_Visium_Counts.rds")
175 | 
176 | CountmappedGenes <- select(MappingFileForInferCNV, Genes)
177 | 
178 | Counts_joined <- SCC_P6_BenignRef_and_Visium_Counts
179 | Counts_joined <- Counts_joined %>%
180 |                     separate(Genes, c("Genes", NA))
181 | 
182 | Mapped_Counts_joined <- left_join(CountmappedGenes, Counts_joined)
183 | Mapped_Counts_joined <- Mapped_Counts_joined[!duplicated(Mapped_Counts_joined$Genes), ]
184 | Mapped_Counts_joinedSliced <- Mapped_Counts_joined %>% slice(1L)
185 | Mapped_Counts_joinedSliced <- as.data.frame(t(Mapped_Counts_joinedSliced[, colnames(Mapped_Counts_joinedSliced)[c(1:length(Mapped_Counts_joinedSliced))]]))
186 | Mapped_Counts_joinedSliced <- Mapped_Counts_joinedSliced %>% rownames_to_column()
187 | Mapped_Counts_joinedSliced <- as.data.frame(Mapped_Counts_joinedSliced[2:(dim(Mapped_Counts_joinedSliced)[1]), 1])
188 | names(Mapped_Counts_joinedSliced)[1] <- "Barcode"
189 | 
190 | Mapped_Counts_joinedSliced$Histology <- ifelse(paste0(substr(Mapped_Counts_joinedSliced$Barcode, start = 1, stop = 4)) == "P6_N", "PurestBenign_SCCPatient6", "Visium")
191 | 
192 | #Write GenesInSamplevsOrdering
193 | write.table(Mapped_Counts_joined, 
194 |             "SCC_P6_BenignRef_and_Visium_Mapped_Counts.tsv",
195 |             row.names = FALSE,
196 |             sep = "\t")
197 | 
198 | write.table(MappingFileForInferCNV, 
199 |             "SCC_P6_BenignRef_and_Visium_GeneOrderFile.tsv", 
200 |             quote = FALSE, 
201 |             col.names = FALSE, 
202 |             row.names = FALSE, 
203 |             sep = "\t")
204 | 
205 | 
206 | write.table(Mapped_Counts_joinedSliced, 
207 |             "SCC_P6_BenignRef_and_Visium_Mapped_Annotations.tsv", 
208 |             quote = FALSE, 
209 |             col.names = FALSE, 
210 |             row.names = FALSE, 
211 |             sep = "\t")
212 | ```
213 | 
214 | # Creating the inferCNV object (prior to run)
215 | 
216 | Creating the object for infercnv::run.
217 | 
218 | ```{r, eval = FALSE}
219 | Visium_P6_Bg_infCNV <- infercnv::CreateInfercnvObject(raw_counts_matrix="./SCC_P6_BenignRef_and_Visium_Mapped_Counts.tsv", 
220 |                                                gene_order_file="./SCC_P6_BenignRef_and_Visium_GeneOrderFile.tsv",
221 |                                                annotations_file="./SCC_P6_BenignRef_and_Visium_Mapped_Annotations.tsv",
222 |                                                delim="\t",
223 |                                                ref_group_names="PurestBenign_SCCPatient6",
224 |                                                chr_exclude = c("chrM"))
225 | 
226 | ```
227 | 
228 | # Unsupervised Run - (Typically ran on cluster)
229 | 
230 | Running infercnv, typically ran on a server.
231 | 
232 | ```{r, eval = FALSE}
233 | Visium_P6_Bg_infCNV = infercnv::run(Visium_P6_Bg_infCNV,
234 |                                               cutoff=0.1,
235 |                                             out_dir="./Figure4c_Step2/Outputs", 
236 |                                               num_threads = 10,
237 |                                               cluster_by_groups=FALSE, 
238 |                                               denoise=TRUE,
239 |                                               HMM=FALSE)
240 | ```
241 | 
242 | InferCNV will output many files. We are primarily interested in the final "infercnv.21_denoised.png" file, as well as the text file associated with the dendrogram associated with the hierarchical clustering on the left hand side of the image (infercnv.21_denoised.observations_dendrogram.txt).
243 | 
244 | ![infercnv.21_denoised.png](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/Step2/infercnv.21_denoised.png)
245 | 


--------------------------------------------------------------------------------
/FigureScripts/Figure 4/Figure4c_SCC/Step2/Figure4c_Step2_SCC_P6_siCNV_unsupervised.md:
--------------------------------------------------------------------------------
  1 | Having now selected a benign reference set in Step 1, we now use these
  2 | data to perform unsupervised analysis of the SCC Visium section.
  3 | 
  4 | # Setup
  5 | 
  6 | Initializing libraries
  7 | 
  8 |     library(tidyverse)
  9 | 
 10 |     ## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
 11 | 
 12 |     ## v ggplot2 3.3.5     v purrr   0.3.4
 13 |     ## v tibble  3.1.1     v dplyr   1.0.6
 14 |     ## v tidyr   1.1.3     v stringr 1.4.0
 15 |     ## v readr   2.0.1     v forcats 0.5.1
 16 | 
 17 |     ## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
 18 |     ## x dplyr::filter() masks stats::filter()
 19 |     ## x dplyr::lag()    masks stats::lag()
 20 | 
 21 |     library(SpatialInferCNV)
 22 | 
 23 |     ## Registered S3 method overwritten by 'spatstat.geom':
 24 |     ##   method     from
 25 |     ##   print.boxx cli
 26 | 
 27 |     ## Warning: replacing previous import 'phylogram::as.phylo' by 'ape::as.phylo' when
 28 |     ## loading 'SpatialInferCNV'
 29 | 
 30 | # Importing Data for Benigns
 31 | 
 32 | We already imported the data in the previous step, lets reimport it
 33 | again and filter only for the selected/filtered benign reference set.
 34 | 
 35 |     #Import SCC, Patient 6, scRNAseq benigns that we subset out in step 1
 36 |     load("./Figure4c_output/SCC_P6_Benigns.RData")
 37 | 
 38 |     head(SCC_P6_Benigns)
 39 | 
 40 |     SCC_P6_BenignReferences_Barcodes <- read.csv("./Figure4c_SCCP6_BenignReferenceSet.csv")
 41 |     names(SCC_P6_BenignReferences_Barcodes)[1] <- "Barcodes"
 42 |     SCC_P6_BenignReferences_Barcodes$Histology <- "PurestBenign_SCCPatient6"
 43 |     SCC_P6_Benigns <- SCC_P6_Benigns %>% rownames_to_column()
 44 |     names(SCC_P6_Benigns)[1] <- "Barcodes"
 45 | 
 46 |     SCC_P6_BenignReferences_Counts <- left_join(SCC_P6_BenignReferences_Barcodes, SCC_P6_Benigns, by = c("Barcodes" = "Barcodes"))
 47 |     rm(SCC_P6_Benigns)
 48 |     SCC_P6_BenignReferences_Counts <- SCC_P6_BenignReferences_Counts %>% select(-Histology)
 49 |     SCC_P6_BenignReferences_Counts <- SCC_P6_BenignReferences_Counts %>% column_to_rownames(var = "Barcodes")
 50 |     SCC_P6_BenignReferences_Counts <- as.data.frame(t(SCC_P6_BenignReferences_Counts))
 51 |     SCC_P6_BenignReferences_Counts <- SCC_P6_BenignReferences_Counts %>% rownames_to_column()
 52 |     names(SCC_P6_BenignReferences_Counts)[1] <- "Genes"
 53 | 
 54 |     saveRDS(SCC_P6_BenignReferences_Counts, file = "SCC_P6_BenignReferences_Counts.rds")
 55 |     saveRDS(SCC_P6_BenignReferences_Barcodes, file = "SCC_P6_BenignReferences_Barcodes.rds")
 56 | 
 57 | # Importing Data for Visium Data
 58 | 
 59 | Download the files [from
 60 | Mendeley](https://data.mendeley.com/v1/datasets/svw96g68dv/draft?a=3f263217-2bd3-4a3c-8125-8c517c3a9e29):
 61 | SCC\_patient/.
 62 | 
 63 | Here, we are filtering for the section used in the Figure 4d analysis
 64 | from a parent seurat object. We output both the counts and the barcodes
 65 | from this Visium section. We manually apply a QC threshold to only
 66 | include Visium spots with at least 500 counts.
 67 | 
 68 |     t28 <- readRDS("./t28.Rds")
 69 | 
 70 |     SCC_P6_Visium_Counts <- as.data.frame(t28@assays$Spatial@counts)
 71 |     rm(t28)
 72 | 
 73 |     head(SCC_P6_Visium_Counts)
 74 | 
 75 |     SCC_P6_Visium_Counts <- as.data.frame(t(SCC_P6_Visium_Counts))
 76 |     SCC_P6_Visium_Counts <- rownames_to_column(SCC_P6_Visium_Counts)
 77 |     SCC_P6_Visium_Counts$section <- str_sub(SCC_P6_Visium_Counts$rowname, start= -1)
 78 |     table(SCC_P6_Visium_Counts$section)
 79 | 
 80 |     SCC_P6_Visium_Counts$barcode <- str_sub(SCC_P6_Visium_Counts$rowname, start = 1L, end = -3)
 81 | 
 82 |     SCC_P6_Visium_Counts <- SCC_P6_Visium_Counts %>% filter(section == 1)
 83 | 
 84 |     SCC_P6_Visium_Annotations <- SCC_P6_Visium_Counts %>% select(barcode, section)
 85 |     SCC_P6_Visium_Annotations$section <- "SCC_P6_Visium"
 86 |     names(SCC_P6_Visium_Annotations)[1] <- "Barcodes"
 87 |     names(SCC_P6_Visium_Annotations)[2] <- "Histology"
 88 | 
 89 |     saveRDS(SCC_P6_Visium_Annotations, file = "SCC_P6_Visium_Annotations.rds")
 90 | 
 91 |     SCC_P6_Visium_Counts <- column_to_rownames(SCC_P6_Visium_Counts, var = "barcode")
 92 |     SCC_P6_Visium_Counts <- SCC_P6_Visium_Counts %>% select(-rowname, -section)
 93 | 
 94 |     SCC_P6_Visium_Counts$Total <- rowSums(SCC_P6_Visium_Counts)
 95 |     SCC_P6_Visium_Counts <- SCC_P6_Visium_Counts %>% filter(Total >= 500)
 96 |     SCC_P6_Visium_Counts <- select(SCC_P6_Visium_Counts, -Total)
 97 |     SCC_P6_Visium_Counts <- as.data.frame(t(SCC_P6_Visium_Counts))
 98 |     SCC_P6_Visium_Counts <- SCC_P6_Visium_Counts[,colSums(is.na(SCC_P6_Visium_Counts))<nrow(SCC_P6_Visium_Counts)]
 99 |     SCC_P6_Visium_Counts <- tibble::rownames_to_column(SCC_P6_Visium_Counts, "Genes")
100 | 
101 |     saveRDS(SCC_P6_Visium_Counts, file = "SCC_P6_Visium_Counts.rds")
102 | 
103 | # Importing Data Visium and Benign Data
104 | 
105 | Next, we join the benign reference and Visium barcodes.
106 | 
107 |     SCC_P6_Visium_Annotations <- readRDS("./SCC_P6_Visium_Annotations.rds")
108 |     SCC_P6_BenignReferences_Barcodes <- readRDS("./SCC_P6_BenignReferences_Barcodes.rds")
109 | 
110 |     Joined_Barcodes <- rbind(SCC_P6_Visium_Annotations, SCC_P6_BenignReferences_Barcodes)
111 |     saveRDS(Joined_Barcodes, file = "SCC_P6_BenignRef_and_Visium_Annotations.rds")
112 | 
113 | # Importing Data Visium and Benign Data
114 | 
115 | Next, we join the benign reference and visium count data.
116 | 
117 |     SCC_P6_BenignReferences_Counts <- readRDS("./SCC_P6_BenignReferences_Counts.rds")
118 |     SCC_P6_Visium_Counts <- readRDS("./SCC_P6_Visium_Counts.rds")
119 | 
120 |     head(SCC_P6_BenignReferences_Counts)
121 |     head(SCC_P6_Visium_Counts)
122 | 
123 |     SCC_P6_BenignRef_and_Visium_Counts <- SCC_P6_BenignReferences_Counts %>% full_join(SCC_P6_Visium_Counts, by = "Genes")
124 |     SCC_P6_BenignRef_and_Visium_Counts <- SCC_P6_BenignRef_and_Visium_Counts %>% replace(., is.na(.), 0)
125 | 
126 |     saveRDS(SCC_P6_BenignRef_and_Visium_Counts, file = "SCC_P6_BenignRef_and_Visium_Counts.rds")
127 | 
128 | # Creating GeneToENSMBL dataframe
129 | 
130 | The code below creates the GeneToENSMBL.csv file, but we have provided
131 | this on our GitHub:
132 | 
133 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/GeneToENSMBL.csv).
134 | 
135 |     GeneToENSMBL <- read.csv("./Mendeley/ProcessedFilesForFigures/Figure4/GeneToENSMBL.csv")
136 | 
137 |     #library(tidyverse)
138 |     #library(data.table)
139 |     #GeneToENSMBL <- fread('https://data.broadinstitute.org/Trinity/CTAT/cnv/gencode_v19_gen_pos.complete.txt')
140 |     #GeneToENSMBL <- mydat %>% separate(V1, c("left","ENSMBLID"), sep = "\\|")
141 | 
142 |     #names(GeneToENSMBL)[1] <- "Genes"
143 |     #names(GeneToENSMBL)[3] <- "chr"
144 |     #names(GeneToENSMBL)[4] <- "start"
145 |     #names(GeneToENSMBL)[5] <- "stop"
146 | 
147 |     #write.csv(GeneToENSMBL, "GeneToENSMBL.csv", row.names = FALSE)
148 | 
149 | # Mapping Gene Names to counts/barcodes, and then outputting the requisite files for infercnv::run, part 1
150 | 
151 | We need to provide a gene ordering file to inferCNV, in the form of:
152 | Gene Name / Chromosome Number / Start Loci / Stop Loci. As the files
153 | provided by the authors are in “Gene Name”, and our chromosomal / loci
154 | information are mapped to ENSMBLID’s, we need to map the Gene Names to
155 | ENSMBLIDs.
156 | 
157 |     #removing "."
158 |     Counts_joined <- SCC_P6_BenignRef_and_Visium_Counts
159 |     Counts_joined <- Counts_joined %>%
160 |                         separate(Genes, c("Genes", NA))
161 | 
162 |     Counts_joined <- Counts_joined %>% select(Genes)
163 | 
164 |     GenesForMapping <- GeneToENSMBL %>% select(Genes, chr, start, stop)
165 |     GenesInSample <- Counts_joined %>% select(Genes)
166 |     GenesInSamplevsOrdering <- inner_join(GenesInSample, GenesForMapping, by = c("Genes" = "Genes"))
167 |       dedup_GenesInSamplevsOrdering <- GenesInSamplevsOrdering[!duplicated(GenesInSamplevsOrdering$Genes), ]
168 |       dedup_GenesInSamplevsOrdering$chromorder <- gsub("chr","",dedup_GenesInSamplevsOrdering$chr)
169 |       dedup_GenesInSamplevsOrdering$chromorder <- as.numeric(ifelse(dedup_GenesInSamplevsOrdering$chromorder == "X", 23,
170 |                                                              ifelse(dedup_GenesInSamplevsOrdering$chromorder == "Y", 24,      dedup_GenesInSamplevsOrdering$chromorder)))
171 |       dedup_GenesInSamplevsOrdering <- dedup_GenesInSamplevsOrdering[order(dedup_GenesInSamplevsOrdering$chromorder),]
172 |       dedup_GenesInSamplevsOrdering <- dedup_GenesInSamplevsOrdering[,1:4]  
173 |       
174 |     MappingFileForInferCNV <- dedup_GenesInSamplevsOrdering
175 | 
176 |     saveRDS(MappingFileForInferCNV, file = "MappingFileForSCC_P6_Visium_and_Bg.rds")  
177 | 
178 | # Outputting the requisite files for infercnv::run, part 2
179 | 
180 | We then filter for only mapped genes, from counts, and then output the
181 | three requisite files for infercnv::run.
182 | 
183 |     MappingFileForInferCNV <- readRDS("MappingFileForSCC_P6_Visium_and_Bg.rds")
184 |     SCC_P6_BenignRef_and_Visium_Counts <- readRDS("SCC_P6_BenignRef_and_Visium_Counts.rds")
185 | 
186 |     CountmappedGenes <- select(MappingFileForInferCNV, Genes)
187 | 
188 |     Counts_joined <- SCC_P6_BenignRef_and_Visium_Counts
189 |     Counts_joined <- Counts_joined %>%
190 |                         separate(Genes, c("Genes", NA))
191 | 
192 |     Mapped_Counts_joined <- left_join(CountmappedGenes, Counts_joined)
193 |     Mapped_Counts_joined <- Mapped_Counts_joined[!duplicated(Mapped_Counts_joined$Genes), ]
194 |     Mapped_Counts_joinedSliced <- Mapped_Counts_joined %>% slice(1L)
195 |     Mapped_Counts_joinedSliced <- as.data.frame(t(Mapped_Counts_joinedSliced[, colnames(Mapped_Counts_joinedSliced)[c(1:length(Mapped_Counts_joinedSliced))]]))
196 |     Mapped_Counts_joinedSliced <- Mapped_Counts_joinedSliced %>% rownames_to_column()
197 |     Mapped_Counts_joinedSliced <- as.data.frame(Mapped_Counts_joinedSliced[2:(dim(Mapped_Counts_joinedSliced)[1]), 1])
198 |     names(Mapped_Counts_joinedSliced)[1] <- "Barcode"
199 | 
200 |     Mapped_Counts_joinedSliced$Histology <- ifelse(paste0(substr(Mapped_Counts_joinedSliced$Barcode, start = 1, stop = 4)) == "P6_N", "PurestBenign_SCCPatient6", "Visium")
201 | 
202 |     #Write GenesInSamplevsOrdering
203 |     write.table(Mapped_Counts_joined, 
204 |                 "SCC_P6_BenignRef_and_Visium_Mapped_Counts.tsv",
205 |                 row.names = FALSE,
206 |                 sep = "\t")
207 | 
208 |     write.table(MappingFileForInferCNV, 
209 |                 "SCC_P6_BenignRef_and_Visium_GeneOrderFile.tsv", 
210 |                 quote = FALSE, 
211 |                 col.names = FALSE, 
212 |                 row.names = FALSE, 
213 |                 sep = "\t")
214 | 
215 | 
216 |     write.table(Mapped_Counts_joinedSliced, 
217 |                 "SCC_P6_BenignRef_and_Visium_Mapped_Annotations.tsv", 
218 |                 quote = FALSE, 
219 |                 col.names = FALSE, 
220 |                 row.names = FALSE, 
221 |                 sep = "\t")
222 | 
223 | # Creating the inferCNV object (prior to run)
224 | 
225 | Creating the object for infercnv::run.
226 | 
227 |     Visium_P6_Bg_infCNV <- infercnv::CreateInfercnvObject(raw_counts_matrix="./SCC_P6_BenignRef_and_Visium_Mapped_Counts.tsv", 
228 |                                                    gene_order_file="./SCC_P6_BenignRef_and_Visium_GeneOrderFile.tsv",
229 |                                                    annotations_file="./SCC_P6_BenignRef_and_Visium_Mapped_Annotations.tsv",
230 |                                                    delim="\t",
231 |                                                    ref_group_names="PurestBenign_SCCPatient6",
232 |                                                    chr_exclude = c("chrM"))
233 | 
234 | # Unsupervised Run - (Typically ran on cluster)
235 | 
236 | Running infercnv, typically ran on a server.
237 | 
238 |     Visium_P6_Bg_infCNV = infercnv::run(Visium_P6_Bg_infCNV,
239 |                                                   cutoff=0.1,
240 |                                                 out_dir="./Figure4c_Step2/Outputs", 
241 |                                                   num_threads = 10,
242 |                                                   cluster_by_groups=FALSE, 
243 |                                                   denoise=TRUE,
244 |                                                   HMM=FALSE)
245 | 
246 | InferCNV will output many files. We are primarily interested in the
247 | final “infercnv.21\_denoised.png” file, as well as the text file
248 | associated with the dendrogram associated with the hierarchical
249 | clustering on the left hand side of the image
250 | (infercnv.21\_denoised.observations\_dendrogram.txt).
251 | 
252 | ![infercnv.21\_denoised.png](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/Step2/infercnv.21_denoised.png)
253 | 


--------------------------------------------------------------------------------
/FigureScripts/Figure 4/Figure4c_SCC/Step2/infercnv.21_denoised.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 4/Figure4c_SCC/Step2/infercnv.21_denoised.png


--------------------------------------------------------------------------------
/FigureScripts/Figure 4/Figure4c_SCC/Step3/Figure4c_Step3_SCC_P6_siCNV_supervised.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Fig4c_Step3_SCC_P6_siCNV_supervised"
  3 | author: "Andrew Erickson"
  4 | output: md_document
  5 | ---
  6 | 
  7 | Now that we ran the previous unsupervised step, we next will identified clones and run the final clustered inferCNVs to generate the clustered figure panel image in 4c.
  8 | 
  9 | # Setup
 10 | 
 11 | Initializing libraries.
 12 | 
 13 | ```{r, messages=FALSE}
 14 | library(SpatialInferCNV)
 15 | library(phylogram)
 16 | library(ape)
 17 | library(tidyverse)
 18 | ```
 19 | 
 20 | # Importing dendrogram
 21 | 
 22 | Next, we want to import this dendrogram file, this was created  in the previous step.
 23 | 
 24 | ```{r, eval = FALSE}
 25 | SCC_for_clustering <- read.dendrogram(file = "./Figure4c_Step2/Outputs/infercnv.21_denoised.observations_dendrogram.txt")
 26 | 
 27 | SCC_for_clustering_phylo <- as.phylo(SCC_for_clustering)
 28 | ```
 29 | 
 30 | # Visualizing Tree
 31 | 
 32 | Next, we want to visualize the numbers associated with the nodes of interest (clones). We output a large image file that allows us to manually inspect which nodes (corresponding to clones) should be selected.
 33 | 
 34 | ```{r, eval = FALSE}
 35 | my.subtrees = subtrees(SCC_for_clustering_phylo)  # subtrees() to subset
 36 | 
 37 | png("SCC_for_clustering_phylo.png",width=10000,height=2500, res = 300)
 38 | plot(SCC_for_clustering_phylo,show.tip.label = FALSE)
 39 | nodelabels(text=1:SCC_for_clustering_phylo$Nnode,node=1:SCC_for_clustering_phylo$Nnode+Ntip(SCC_for_clustering_phylo))
 40 | dev.off()
 41 | ```
 42 | 
 43 | We provide the following output image.
 44 | 
 45 | ![infercnv.21_denoised.png](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/Step3/SCC_for_clustering_phylo.png)
 46 | 
 47 | 
 48 | # Clone selection 
 49 | 
 50 | Next, view the output .png file, which provides a (albeit cluttered) labeling of the dendrogram tree nodes. Manually select individual nodes that correspond with a distinct subclonal grouping or signal, that will be taken forward for re-clustering. This can be iteratively tweaked with the next step + spatial visualization til optimal. We provide more details [here](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%203/Figure3.md), and provide the finalized selected clone nodes here.
 51 | 
 52 | We output a Figure4c_SCC_P6_Clones.csv file, identifying the barcodes and annotations for each clone for the next steps.
 53 | 
 54 | ```{r, eval = FALSE}
 55 | #A - 1656 -  spots
 56 | #B - 1322 -  spots
 57 | #C - 1183 -  spots
 58 | #D - 2  -  spots
 59 | 
 60 | Node1656 <- SelectingSubTreeData(my.subtrees, 1656)
 61 | Node1322 <- SelectingSubTreeData(my.subtrees, 1322)
 62 | Node1183 <- SelectingSubTreeData(my.subtrees, 1183)
 63 | Node2 <- SelectingSubTreeData(my.subtrees, 2)
 64 | 
 65 | Merged <- rbind(Node1656, Node1322)
 66 | Merged <- rbind(Merged, Node1183)
 67 | Merged <- rbind(Merged, Node2)
 68 | 
 69 | table(Merged$Node)
 70 | 
 71 | Merged$Node <- ifelse(Merged$Node == "Node_1656" , "Clone_A", 
 72 |                      ifelse(Merged$Node == "Node_1322" , "Clone_B",
 73 |                      ifelse(Merged$Node == "Node_1183" , "Clone_C",
 74 |                      ifelse(Merged$Node == "Node_2" , "Clone_D",Merged$Node))))
 75 | 
 76 | write.csv(Merged, "Figure4c_SCC_P6_Clones.csv", row.names = FALSE)
 77 | ```
 78 | 
 79 | # Outputting the requisite files for infercnv::run
 80 | 
 81 | We import the files generated in step 2, with the updated clone barcodes, and generate a new annotation file for input to infercnv::run.
 82 | 
 83 | ```{r, eval = FALSE}
 84 | library(tidyverse)
 85 | library(SpatialInferCNV)
 86 | 
 87 | OriginalBarcodes <- read.table("./SCC_P6_BenignRef_and_Visium_Mapped_Annotations.tsv", sep = "\t")
 88 | 
 89 | ClusteredBarcodes <- read.csv("./Figure4c_SCC_P6_Clones.csv")
 90 | 
 91 | names(OriginalBarcodes)[1] <- "Barcode"
 92 | names(OriginalBarcodes)[2] <- "Histology"
 93 | 
 94 | UpdatedBarcodes <- left_join(OriginalBarcodes, ClusteredBarcodes)
 95 | 
 96 | UpdatedBarcodes$Node <- ifelse(is.na(UpdatedBarcodes$Node), "PurestBenign_SCCPatient6", UpdatedBarcodes$Node)
 97 | 
 98 | UpdatedBarcodes <- UpdatedBarcodes %>%
 99 |                       select(Barcode, Node) %>%
100 |                       arrange(desc(Node))
101 | 
102 | write.table(UpdatedBarcodes, 
103 |             "Clustered_SCC_P6_BenignRef_and_Visium_Mapped_Annotations.tsv", 
104 |             quote = FALSE, 
105 |             col.names = FALSE, 
106 |             row.names = FALSE, 
107 |             sep = "\t")
108 | 
109 | ```
110 | 
111 | # Creating the inferCNV object (prior to run)
112 | 
113 | We generate the infercnv object.
114 | 
115 | ```{r, eval = FALSE}
116 | SCC_P6_ForClusteringClones <- infercnv::CreateInfercnvObject(raw_counts_matrix="./SCC_P6_BenignRef_and_Visium_Mapped_Counts.tsv", 
117 |                                                                 gene_order_file="./SCC_P6_BenignRef_and_Visium_GeneOrderFile.tsv",
118 |                                                                 annotations_file="./Clustered_SCC_P6_BenignRef_and_Visium_Mapped_Annotations.tsv",
119 |                                                                 delim="\t",
120 |                                                                 ref_group_names="PurestBenign_SCCPatient6",
121 | 								                                                chr_exclude = c("chrM"))
122 | ```
123 | 
124 | # InferCNV Run - (Typically ran on cluster)
125 | 
126 | Running infercnv.
127 | 
128 | ```{r, eval = FALSE}
129 | SCC_P6_ForClusteringClones = infercnv::run(SCC_P6_ForClusteringClones,
130 |                                               cutoff=0.1,
131 |                                               out_dir="./Figure4c_Step3/Outputs", 
132 |                                               cluster_by_groups=TRUE,
133 |                                               num_threads = 20, 
134 |                                               denoise=TRUE,
135 |                                               HMM=TRUE)
136 | ```
137 | 
138 | InferCNV will output many files. We are primarily interested in the final "infercnv.21_denoised.png" file, corresponding to the one provided in Figure 4c. These are reordered in the final figure.
139 | 
140 | ![infercnv.21_denoised.png](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/Step3/infercnv.21_denoised.png)
141 | 


--------------------------------------------------------------------------------
/FigureScripts/Figure 4/Figure4c_SCC/Step3/Figure4c_Step3_SCC_P6_siCNV_supervised.md:
--------------------------------------------------------------------------------
  1 | Now that we ran the previous unsupervised step, we next will identified
  2 | clones and run the final clustered inferCNVs to generate the clustered
  3 | figure panel image in 4c.
  4 | 
  5 | # Setup
  6 | 
  7 | Initializing libraries.
  8 | 
  9 |     library(SpatialInferCNV)
 10 | 
 11 |     ## Warning: replacing previous import 'phylogram::as.phylo' by 'ape::as.phylo' when
 12 |     ## loading 'SpatialInferCNV'
 13 | 
 14 |     library(phylogram)
 15 |     library(ape)
 16 | 
 17 |     ## 
 18 |     ## Attaching package: 'ape'
 19 | 
 20 |     ## The following object is masked from 'package:phylogram':
 21 |     ## 
 22 |     ##     as.phylo
 23 | 
 24 |     library(tidyverse)
 25 | 
 26 |     ## Registered S3 method overwritten by 'cli':
 27 |     ##   method     from         
 28 |     ##   print.boxx spatstat.geom
 29 | 
 30 |     ## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
 31 | 
 32 |     ## v ggplot2 3.3.5     v purrr   0.3.4
 33 |     ## v tibble  3.1.1     v dplyr   1.0.6
 34 |     ## v tidyr   1.1.3     v stringr 1.4.0
 35 |     ## v readr   2.0.1     v forcats 0.5.1
 36 | 
 37 |     ## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
 38 |     ## x dplyr::filter() masks stats::filter()
 39 |     ## x dplyr::lag()    masks stats::lag()
 40 | 
 41 | # Importing dendrogram
 42 | 
 43 | Next, we want to import this dendrogram file, this was created in the
 44 | previous step.
 45 | 
 46 |     SCC_for_clustering <- read.dendrogram(file = "./Figure4c_Step2/Outputs/infercnv.21_denoised.observations_dendrogram.txt")
 47 | 
 48 |     SCC_for_clustering_phylo <- as.phylo(SCC_for_clustering)
 49 | 
 50 | # Visualizing Tree
 51 | 
 52 | Next, we want to visualize the numbers associated with the nodes of
 53 | interest (clones). We output a large image file that allows us to
 54 | manually inspect which nodes (corresponding to clones) should be
 55 | selected.
 56 | 
 57 |     my.subtrees = subtrees(SCC_for_clustering_phylo)  # subtrees() to subset
 58 | 
 59 |     png("SCC_for_clustering_phylo.png",width=10000,height=2500, res = 300)
 60 |     plot(SCC_for_clustering_phylo,show.tip.label = FALSE)
 61 |     nodelabels(text=1:SCC_for_clustering_phylo$Nnode,node=1:SCC_for_clustering_phylo$Nnode+Ntip(SCC_for_clustering_phylo))
 62 |     dev.off()
 63 | 
 64 | We provide the following output image.
 65 | 
 66 | ![infercnv.21\_denoised.png](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/Step3/SCC_for_clustering_phylo.png)
 67 | 
 68 | # Clone selection
 69 | 
 70 | Next, view the output .png file, which provides a (albeit cluttered)
 71 | labeling of the dendrogram tree nodes. Manually select individual nodes
 72 | that correspond with a distinct subclonal grouping or signal, that will
 73 | be taken forward for re-clustering. This can be iteratively tweaked with
 74 | the next step + spatial visualization til optimal. We provide more
 75 | details
 76 | [here](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%203/Figure3.md),
 77 | and provide the finalized selected clone nodes here.
 78 | 
 79 | We output a Figure4c\_SCC\_P6\_Clones.csv file, identifying the barcodes
 80 | and annotations for each clone for the next steps.
 81 | 
 82 |     #A - 1656 -  spots
 83 |     #B - 1322 -  spots
 84 |     #C - 1183 -  spots
 85 |     #D - 2  -  spots
 86 | 
 87 |     Node1656 <- SelectingSubTreeData(my.subtrees, 1656)
 88 |     Node1322 <- SelectingSubTreeData(my.subtrees, 1322)
 89 |     Node1183 <- SelectingSubTreeData(my.subtrees, 1183)
 90 |     Node2 <- SelectingSubTreeData(my.subtrees, 2)
 91 | 
 92 |     Merged <- rbind(Node1656, Node1322)
 93 |     Merged <- rbind(Merged, Node1183)
 94 |     Merged <- rbind(Merged, Node2)
 95 | 
 96 |     table(Merged$Node)
 97 | 
 98 |     Merged$Node <- ifelse(Merged$Node == "Node_1656" , "Clone_A", 
 99 |                          ifelse(Merged$Node == "Node_1322" , "Clone_B",
100 |                          ifelse(Merged$Node == "Node_1183" , "Clone_C",
101 |                          ifelse(Merged$Node == "Node_2" , "Clone_D",Merged$Node))))
102 | 
103 |     write.csv(Merged, "Figure4c_SCC_P6_Clones.csv", row.names = FALSE)
104 | 
105 | # Outputting the requisite files for infercnv::run
106 | 
107 | We import the files generated in step 2, with the updated clone
108 | barcodes, and generate a new annotation file for input to infercnv::run.
109 | 
110 |     library(tidyverse)
111 |     library(SpatialInferCNV)
112 | 
113 |     OriginalBarcodes <- read.table("./SCC_P6_BenignRef_and_Visium_Mapped_Annotations.tsv", sep = "\t")
114 | 
115 |     ClusteredBarcodes <- read.csv("./Figure4c_SCC_P6_Clones.csv")
116 | 
117 |     names(OriginalBarcodes)[1] <- "Barcode"
118 |     names(OriginalBarcodes)[2] <- "Histology"
119 | 
120 |     UpdatedBarcodes <- left_join(OriginalBarcodes, ClusteredBarcodes)
121 | 
122 |     UpdatedBarcodes$Node <- ifelse(is.na(UpdatedBarcodes$Node), "PurestBenign_SCCPatient6", UpdatedBarcodes$Node)
123 | 
124 |     UpdatedBarcodes <- UpdatedBarcodes %>%
125 |                           select(Barcode, Node) %>%
126 |                           arrange(desc(Node))
127 | 
128 |     write.table(UpdatedBarcodes, 
129 |                 "Clustered_SCC_P6_BenignRef_and_Visium_Mapped_Annotations.tsv", 
130 |                 quote = FALSE, 
131 |                 col.names = FALSE, 
132 |                 row.names = FALSE, 
133 |                 sep = "\t")
134 | 
135 | # Creating the inferCNV object (prior to run)
136 | 
137 | We generate the infercnv object.
138 | 
139 |     SCC_P6_ForClusteringClones <- infercnv::CreateInfercnvObject(raw_counts_matrix="./SCC_P6_BenignRef_and_Visium_Mapped_Counts.tsv", 
140 |                                                                     gene_order_file="./SCC_P6_BenignRef_and_Visium_GeneOrderFile.tsv",
141 |                                                                     annotations_file="./Clustered_SCC_P6_BenignRef_and_Visium_Mapped_Annotations.tsv",
142 |                                                                     delim="\t",
143 |                                                                     ref_group_names="PurestBenign_SCCPatient6",
144 |                                                                                     chr_exclude = c("chrM"))
145 | 
146 | # InferCNV Run - (Typically ran on cluster)
147 | 
148 | Running infercnv.
149 | 
150 |     SCC_P6_ForClusteringClones = infercnv::run(SCC_P6_ForClusteringClones,
151 |                                                   cutoff=0.1,
152 |                                                   out_dir="./Figure4c_Step3/Outputs", 
153 |                                                   cluster_by_groups=TRUE,
154 |                                                   num_threads = 20, 
155 |                                                   denoise=TRUE,
156 |                                                   HMM=TRUE)
157 | 
158 | InferCNV will output many files. We are primarily interested in the
159 | final “infercnv.21\_denoised.png” file, corresponding to the one
160 | provided in Figure 4c. These are reordered in the final figure.
161 | 
162 | ![infercnv.21\_denoised.png](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4c_SCC/Step3/infercnv.21_denoised.png)
163 | 


--------------------------------------------------------------------------------
/FigureScripts/Figure 4/Figure4c_SCC/Step3/SCC_for_clustering_phylo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 4/Figure4c_SCC/Step3/SCC_for_clustering_phylo.png


--------------------------------------------------------------------------------
/FigureScripts/Figure 4/Figure4c_SCC/Step3/infercnv.21_denoised.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 4/Figure4c_SCC/Step3/infercnv.21_denoised.png


--------------------------------------------------------------------------------
/FigureScripts/Figure 4/Figure4e/Figure4e_pediatricmedulloblastoma.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Figure4e"
  3 | author: "Linda Kvastad, Andrew Erickson"
  4 | output: md_document
  5 | ---
  6 | # Code for generating inferCNV plot of pediatric brain tumor patient 1
  7 | 
  8 | # Set working directory, this directory should also include a folder containing all necessary files called "InferCNV_pediatric_patient_1"
  9 | 
 10 | ```{r setup, messages=FALSE}
 11 | #setwd("type_in_the_path_to_your_working_directory")
 12 | 
 13 | # Load R packages
 14 | library(STutility)
 15 | library(infercnv)
 16 | ```
 17 | 
 18 | # Loading Data
 19 | 
 20 | We start by creating an empty working directory so that all downloaded files are organized in one place. Download the files [from Mendeley](https://data.mendeley.com/v1/datasets/svw96g68dv/draft?a=3f263217-2bd3-4a3c-8125-8c517c3a9e29): inferCNV_pediatric_patient_1/
 21 | 
 22 | ```{r, eval = FALSE}
 23 | # Load infoTables: Sample from patient 3 included regions of stroma cells that will be excluded from the inferCNV analysis, which only contains spots from tumor regions.
 24 | infoTable_pat_1_2 <- read.table("./inferCNV_pediatric_patient_1/infoTable_pat_1_2.csv", sep=";", header=T, stringsAsFactors = F)
 25 | infoTable_pat_3 <- read.table("./inferCNV_pediatric_patient_1/infoTable_pat_3.csv", sep=";", header=T, stringsAsFactors = F)
 26 | 
 27 | # Creat Seurat Objects
 28 | se_pat_1_2 <- InputFromTable(infotable = infoTable_pat_1_2, 
 29 |                            min.gene.count = 100, 
 30 |                            min.gene.spots = 5,
 31 |                            min.spot.count = 500,
 32 |                            platform="Visium")
 33 | 
 34 | 
 35 | se_pat_3 <- InputFromTable(infotable = infoTable_pat_3, 
 36 |                      min.gene.count = 100, 
 37 |                      min.gene.spots = 5,
 38 |                      min.spot.count = 500,
 39 |                      platform="Visium")
 40 | ```
 41 | 
 42 | # Further Formatting 
 43 | 
 44 | Creating and formating the dataframes before outputting the requisite input files for infercnv::run.
 45 | 
 46 | ```{r, eval = FALSE}
 47 | # Add Pathology annotations to Meta.data in se_pat_3 object
 48 | df <- read.csv(file = "./inferCNV_pediatric_patient_1/pathology_patient_3.csv")
 49 | df$Barcode <- paste0(df$Barcode, "_1")
 50 | rownames(df) <- df$Barcode
 51 | se_pat_3$pathology <- df[rownames(se_pat_3[[]]), ]$Pathology
 52 | 
 53 | # Check that pathology data was added to meta data of se_pat_3
 54 | head(se_pat_3[[]])
 55 | tail(se_pat_3[[]])
 56 | table(se_pat_3$pathology)
 57 | 
 58 | # Subsetting se_pat_3 to only contain spots with annotated tumor cells
 59 | se_pat_3 <- SetIdent(se_pat_3, value = "pathology")
 60 | se_pat_3 <- SubsetSTData(se_pat_3, idents = c("tumor cells"))
 61 | 
 62 | # Check that only spots containing tumor cells are left
 63 | table(se_pat_3$pathology)
 64 | 
 65 | # Merge the se objects 
 66 | se <- MergeSTData(se_pat_1_2, y = c(se_pat_3))
 67 | 
 68 | # Check that the merge worked
 69 | se
 70 | head(se[[]])
 71 | tail(se[[]])
 72 | table(se$sample)
 73 | 
 74 | # Set ident to sample
 75 | se <- SetIdent(se, value = "sample")
 76 | table(se$sample)
 77 | 
 78 | # prepare a data.frame used as input for the inferCNV run
 79 | se_sample <- as.data.frame(se$sample)
 80 | head(se_sample)
 81 | colnames(se_sample) <- c("sample")
 82 | head(se_sample)
 83 | se_sample <- cbind(Barcode = rownames(se_sample), se_sample)
 84 | rownames(se_sample) <- NULL
 85 | head(se_sample)
 86 | tail(se_sample)
 87 | ```
 88 | 
 89 | # Outputting Files for infercnv::run 
 90 | 
 91 | Creating the files for the next step.
 92 | 
 93 | ```{r, eval = FALSE}
 94 | 
 95 | # save the data.frame
 96 | write.table(x = se_sample, file = "./inferCNV_annotions_se_pat_1_2_3.txt",sep = "\t", row.names = F, col.names = F)
 97 | 
 98 | # extract 10x count data from used as input for the inferCNV run
 99 | counts_matrix = GetAssayData(se, slot="counts")
100 | 
101 | ```
102 | 
103 | # Create the infercnv object
104 | 
105 | Creating the inferCNV object for the inferCNV run.
106 | 
107 | ```{r, eval = FALSE}
108 | infercnv_obj = CreateInfercnvObject(raw_counts_matrix=counts_matrix,
109 |                                     annotations_file="./inferCNV_annotions_se_pat_1_2_3.txt",
110 |                                     delim="\t",
111 |                                     gene_order_file="./inferCNV_pediatric_patient_1/gencode.v25.annotation_gen_pos_v3.txt",
112 |                                     ref_group_names=c("patient_2","patient_3"),
113 |                                     chr_exclude=c("chrMT"))
114 | ```
115 | 
116 | # InferCNV run
117 | 
118 | Running the infercnv::run. This is typically ran on a high performance cluster.
119 | 
120 | ```{r, eval = FALSE}
121 | 
122 | # perform infercnv operations to reveal cnv signal
123 | infercnv_obj = infercnv::run(infercnv_obj,
124 |                              cutoff=0.1,  
125 |                              out_dir="./inferCNV_pediatric_patient_1_output_dir",  # dir is auto-created for storing outputs
126 |                              cluster_by_groups=T,   # If observations are defined according to groups (ie. patients), each group will be clustered separately
127 |                              denoise=T,
128 |                              HMM=T)
129 | 
130 | ```
131 | 
132 | The output infercnv.png was used in Figure 4e:
133 | 
134 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4e/infercnv.png).
135 | 
136 | 


--------------------------------------------------------------------------------
/FigureScripts/Figure 4/Figure4e/Figure4e_pediatricmedulloblastoma.md:
--------------------------------------------------------------------------------
  1 | # Code for generating inferCNV plot of pediatric brain tumor patient 1
  2 | 
  3 | # Set working directory, this directory should also include a folder containing all necessary files called “InferCNV\_pediatric\_patient\_1”
  4 | 
  5 |     #setwd("type_in_the_path_to_your_working_directory")
  6 | 
  7 |     # Load R packages
  8 |     library(STutility)
  9 | 
 10 |     ## Loading required package: Seurat
 11 | 
 12 |     ## Attaching SeuratObject
 13 | 
 14 |     ## Loading required package: ggplot2
 15 | 
 16 |     ## Registered S3 method overwritten by 'imager':
 17 |     ##   method      from
 18 |     ##   plot.imlist
 19 | 
 20 |     library(infercnv)
 21 | 
 22 |     ## Registered S3 method overwritten by 'ape':
 23 |     ##   method   from 
 24 |     ##   plot.mst spdep
 25 | 
 26 |     ## Registered S3 method overwritten by 'gplots':
 27 |     ##   method         from 
 28 |     ##   reorder.factor gdata
 29 | 
 30 | # Loading Data
 31 | 
 32 | We start by creating an empty working directory so that all downloaded
 33 | files are organized in one place. Download the files [from
 34 | Mendeley](https://data.mendeley.com/v1/datasets/svw96g68dv/draft?a=3f263217-2bd3-4a3c-8125-8c517c3a9e29):
 35 | inferCNV\_pediatric\_patient\_1/
 36 | 
 37 |     # Load infoTables: Sample from patient 3 included regions of stroma cells that will be excluded from the inferCNV analysis, which only contains spots from tumor regions.
 38 |     infoTable_pat_1_2 <- read.table("./inferCNV_pediatric_patient_1/infoTable_pat_1_2.csv", sep=";", header=T, stringsAsFactors = F)
 39 |     infoTable_pat_3 <- read.table("./inferCNV_pediatric_patient_1/infoTable_pat_3.csv", sep=";", header=T, stringsAsFactors = F)
 40 | 
 41 |     # Creat Seurat Objects
 42 |     se_pat_1_2 <- InputFromTable(infotable = infoTable_pat_1_2, 
 43 |                                min.gene.count = 100, 
 44 |                                min.gene.spots = 5,
 45 |                                min.spot.count = 500,
 46 |                                platform="Visium")
 47 | 
 48 | 
 49 |     se_pat_3 <- InputFromTable(infotable = infoTable_pat_3, 
 50 |                          min.gene.count = 100, 
 51 |                          min.gene.spots = 5,
 52 |                          min.spot.count = 500,
 53 |                          platform="Visium")
 54 | 
 55 | # Further Formatting
 56 | 
 57 | Creating and formating the dataframes before outputting the requisite
 58 | input files for infercnv::run.
 59 | 
 60 |     # Add Pathology annotations to Meta.data in se_pat_3 object
 61 |     df <- read.csv(file = "./inferCNV_pediatric_patient_1/pathology_patient_3.csv")
 62 |     df$Barcode <- paste0(df$Barcode, "_1")
 63 |     rownames(df) <- df$Barcode
 64 |     se_pat_3$pathology <- df[rownames(se_pat_3[[]]), ]$Pathology
 65 | 
 66 |     # Check that pathology data was added to meta data of se_pat_3
 67 |     head(se_pat_3[[]])
 68 |     tail(se_pat_3[[]])
 69 |     table(se_pat_3$pathology)
 70 | 
 71 |     # Subsetting se_pat_3 to only contain spots with annotated tumor cells
 72 |     se_pat_3 <- SetIdent(se_pat_3, value = "pathology")
 73 |     se_pat_3 <- SubsetSTData(se_pat_3, idents = c("tumor cells"))
 74 | 
 75 |     # Check that only spots containing tumor cells are left
 76 |     table(se_pat_3$pathology)
 77 | 
 78 |     # Merge the se objects 
 79 |     se <- MergeSTData(se_pat_1_2, y = c(se_pat_3))
 80 | 
 81 |     # Check that the merge worked
 82 |     se
 83 |     head(se[[]])
 84 |     tail(se[[]])
 85 |     table(se$sample)
 86 | 
 87 |     # Set ident to sample
 88 |     se <- SetIdent(se, value = "sample")
 89 |     table(se$sample)
 90 | 
 91 |     # prepare a data.frame used as input for the inferCNV run
 92 |     se_sample <- as.data.frame(se$sample)
 93 |     head(se_sample)
 94 |     colnames(se_sample) <- c("sample")
 95 |     head(se_sample)
 96 |     se_sample <- cbind(Barcode = rownames(se_sample), se_sample)
 97 |     rownames(se_sample) <- NULL
 98 |     head(se_sample)
 99 |     tail(se_sample)
100 | 
101 | # Outputting Files for infercnv::run
102 | 
103 | Creating the files for the next step.
104 | 
105 |     # save the data.frame
106 |     write.table(x = se_sample, file = "./inferCNV_annotions_se_pat_1_2_3.txt",sep = "\t", row.names = F, col.names = F)
107 | 
108 |     # extract 10x count data from used as input for the inferCNV run
109 |     counts_matrix = GetAssayData(se, slot="counts")
110 | 
111 | # Create the infercnv object
112 | 
113 | Creating the inferCNV object for the inferCNV run.
114 | 
115 |     infercnv_obj = CreateInfercnvObject(raw_counts_matrix=counts_matrix,
116 |                                         annotations_file="./inferCNV_annotions_se_pat_1_2_3.txt",
117 |                                         delim="\t",
118 |                                         gene_order_file="./inferCNV_pediatric_patient_1/gencode.v25.annotation_gen_pos_v3.txt",
119 |                                         ref_group_names=c("patient_2","patient_3"),
120 |                                         chr_exclude=c("chrMT"))
121 | 
122 | # InferCNV run
123 | 
124 | Running the infercnv::run. This is typically ran on a high performance
125 | cluster.
126 | 
127 |     # perform infercnv operations to reveal cnv signal
128 |     infercnv_obj = infercnv::run(infercnv_obj,
129 |                                  cutoff=0.1,  
130 |                                  out_dir="./inferCNV_pediatric_patient_1_output_dir",  # dir is auto-created for storing outputs
131 |                                  cluster_by_groups=T,   # If observations are defined according to groups (ie. patients), each group will be clustered separately
132 |                                  denoise=T,
133 |                                  HMM=T)
134 | 
135 | The output infercnv.png was used in Figure 4e:
136 | 
137 | ![](https://github.com/aerickso/SpatialInferCNV/blob/main/FigureScripts/Figure%204/Figure4e/infercnv.png).
138 | 


--------------------------------------------------------------------------------
/FigureScripts/Figure 4/Figure4e/infercnv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Figure 4/Figure4e/infercnv.png


--------------------------------------------------------------------------------
/FigureScripts/SCRIPTS.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Scripts to Reproduce Main Figures
 3 | author: "Andrew Erickson, Nuffield Department of Surgical Sciences, Unviersity of Oxford"
 4 | output:
 5 |   md_document:
 6 |     variant: markdown_github
 7 | ---
 8 | 
 9 | # Landing Page
10 | 
11 | ```{r, eval = FALSE}
12 | #Landing page text
13 | ```
14 | 


--------------------------------------------------------------------------------
/FigureScripts/SCRIPTS.md:
--------------------------------------------------------------------------------
1 | # Landing Page
2 | 
3 | ``` r
4 | #Landing page text
5 | ```
6 | 


--------------------------------------------------------------------------------
/FigureScripts/Seurat/Seurat_Spatial_Import.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Ericksonetal_SeuratSpatialImport"
 3 | author: "Andrew Erickson"
 4 | date: "2022-12-08"
 5 | output: md_document
 6 | ---
 7 | 
 8 | # Seurat Spatial Import example
 9 | 
10 | The data for [Erickson et al](https://www.nature.com/articles/s41586-022-05023-2) can be found at the following Mendeley link [(latest dataset version = 4)](https://data.mendeley.com/datasets/svw96g68dv/4).
11 | 
12 | The following code downloads the count matrix file, and the spaceranger "spatial" folder files, and imports them into a [Seurat](https://satijalab.org/seurat/index.html) object for further analysis.
13 | 
14 | ```{r}
15 | #Install Seurat if not already installed
16 | #install.packages('Seurat')
17 | 
18 | #Initialize the Seurat library
19 | library(Seurat)
20 | 
21 | #Downloading Patient 1 - H2_1 filtered_feature_bc_matrix.h5 file to working folder
22 | url = "https://data.mendeley.com/public-files/datasets/svw96g68dv/files/8b69170c-6c07-4e69-abf2-35fade0f5e2c/file_downloaded"
23 | download.file(url,'./filtered_feature_bc_matrix.h5', mode = 'wb')
24 | 
25 | #Create subdirectory called "spatial"
26 | dir.create("spatial")
27 | 
28 | #07.12.2022 - This is manually downloaded for the user while waiting for Mendeley updates to be pushed
29 | #Downloading Patient 1 - H2_1 tissue_hires_image.png image file to spatial folder
30 | url = "https://data.mendeley.com/public-files/datasets/svw96g68dv/files/e1399690-dc45-43e5-ae39-7a065bf7d34e/file_downloaded"
31 | download.file(url,'./spatial/H2_1_tissue_hires_image.png', mode = 'wb')
32 | 
33 | #Downloading Patient 1 - H2_1 scalefactors_json.json file to spatial folder
34 | url = "https://data.mendeley.com/public-files/datasets/svw96g68dv/files/06eb7410-a6a3-4ea9-a364-c6a734a22169/file_downloaded"
35 | download.file(url,'./spatial/scalefactors_json.json', mode = 'wb')
36 | 
37 | #Downloading Patient 1 - H2_1 tissue_positions_list.csv file to spatial folder
38 | url = "https://data.mendeley.com/public-files/datasets/svw96g68dv/files/e028d330-142b-4d8b-b32d-9114b5c48421/file_downloaded"
39 | download.file(url,'./spatial/tissue_positions_list.csv', mode = 'wb')
40 | 
41 | InputImage <- Read10X_Image(
42 |   "./spatial",
43 |   image.name = "H2_1_tissue_hires_image.png",
44 |   filter.matrix = FALSE
45 | )
46 | 
47 | H2_1_Seurat <- Load10X_Spatial(
48 |   ".",
49 |   filename = "filtered_feature_bc_matrix.h5",
50 |   assay = "Spatial",
51 |   image = InputImage
52 | )
53 | 
54 | summary(H2_1_Seurat)
55 | head(H2_1_Seurat)
56 | 
57 | sessionInfo()
58 | ```
59 | 


--------------------------------------------------------------------------------
/FigureScripts/Seurat/Seurat_Spatial_Import.md:
--------------------------------------------------------------------------------
  1 | # Seurat Spatial Import example
  2 | 
  3 | The data for [Erickson et
  4 | al](https://www.nature.com/articles/s41586-022-05023-2) can be found at
  5 | the following Mendeley link [(latest dataset version =
  6 | 4)](https://data.mendeley.com/datasets/svw96g68dv/4).
  7 | 
  8 | The following code downloads the count matrix file, and the spaceranger
  9 | “spatial” folder files, and imports them into a
 10 | [Seurat](https://satijalab.org/seurat/index.html) object for further
 11 | analysis.
 12 | 
 13 |     #Install Seurat if not already installed
 14 |     #install.packages('Seurat')
 15 | 
 16 |     #Initialize the Seurat library
 17 |     library(Seurat)
 18 | 
 19 |     ## Warning: package 'Seurat' was built under R version 4.2.2
 20 | 
 21 |     ## Attaching SeuratObject
 22 | 
 23 |     #Downloading Patient 1 - H2_1 filtered_feature_bc_matrix.h5 file to working folder
 24 |     url = "https://data.mendeley.com/public-files/datasets/svw96g68dv/files/8b69170c-6c07-4e69-abf2-35fade0f5e2c/file_downloaded"
 25 |     download.file(url,'./filtered_feature_bc_matrix.h5', mode = 'wb')
 26 | 
 27 |     #Create subdirectory called "spatial"
 28 |     dir.create("spatial")
 29 | 
 30 |     #07.12.2022 - This is manually downloaded for the user while waiting for Mendeley updates to be pushed
 31 |     #Downloading Patient 1 - H2_1 tissue_hires_image.png image file to spatial folder
 32 |     url = "https://data.mendeley.com/public-files/datasets/svw96g68dv/files/e1399690-dc45-43e5-ae39-7a065bf7d34e/file_downloaded"
 33 |     download.file(url,'./spatial/H2_1_tissue_hires_image.png', mode = 'wb')
 34 | 
 35 |     #Downloading Patient 1 - H2_1 scalefactors_json.json file to spatial folder
 36 |     url = "https://data.mendeley.com/public-files/datasets/svw96g68dv/files/06eb7410-a6a3-4ea9-a364-c6a734a22169/file_downloaded"
 37 |     download.file(url,'./spatial/scalefactors_json.json', mode = 'wb')
 38 | 
 39 |     #Downloading Patient 1 - H2_1 tissue_positions_list.csv file to spatial folder
 40 |     url = "https://data.mendeley.com/public-files/datasets/svw96g68dv/files/e028d330-142b-4d8b-b32d-9114b5c48421/file_downloaded"
 41 |     download.file(url,'./spatial/tissue_positions_list.csv', mode = 'wb')
 42 | 
 43 |     InputImage <- Read10X_Image(
 44 |       "./spatial",
 45 |       image.name = "H2_1_tissue_hires_image.png",
 46 |       filter.matrix = FALSE
 47 |     )
 48 | 
 49 |     H2_1_Seurat <- Load10X_Spatial(
 50 |       ".",
 51 |       filename = "filtered_feature_bc_matrix.h5",
 52 |       assay = "Spatial",
 53 |       image = InputImage
 54 |     )
 55 | 
 56 |     summary(H2_1_Seurat)
 57 | 
 58 |     ## Length  Class   Mode 
 59 |     ##      1 Seurat     S4
 60 | 
 61 |     head(H2_1_Seurat)
 62 | 
 63 |     ##                       orig.ident nCount_Spatial nFeature_Spatial
 64 |     ## AAACAAGTATCTCCCA-1 SeuratProject           8758             2717
 65 |     ## AAACACCAATAACTGC-1 SeuratProject          13466             3889
 66 |     ## AAACAGCTTTCAGAAG-1 SeuratProject           9514             2511
 67 |     ## AAACAGGGTCTATATT-1 SeuratProject          15668             3601
 68 |     ## AAACAGTGTTCCTGGG-1 SeuratProject              0                0
 69 |     ## AAACATTTCCCGGATT-1 SeuratProject           5290             2211
 70 |     ## AAACCCGAACGAAATC-1 SeuratProject             27               26
 71 |     ## AAACCGGAAATGTTAA-1 SeuratProject              7                7
 72 |     ## AAACCGGGTAGGTACC-1 SeuratProject           9728             2781
 73 |     ## AAACCGTTCGTCCAGG-1 SeuratProject           3783             1660
 74 | 
 75 |     sessionInfo()
 76 | 
 77 |     ## R version 4.2.1 (2022-06-23 ucrt)
 78 |     ## Platform: x86_64-w64-mingw32/x64 (64-bit)
 79 |     ## Running under: Windows 10 x64 (build 22000)
 80 |     ## 
 81 |     ## Matrix products: default
 82 |     ## 
 83 |     ## locale:
 84 |     ## [1] LC_COLLATE=English_United States.utf8 
 85 |     ## [2] LC_CTYPE=English_United States.utf8   
 86 |     ## [3] LC_MONETARY=English_United States.utf8
 87 |     ## [4] LC_NUMERIC=C                          
 88 |     ## [5] LC_TIME=English_United States.utf8    
 89 |     ## 
 90 |     ## attached base packages:
 91 |     ## [1] stats     graphics  grDevices utils     datasets  methods   base     
 92 |     ## 
 93 |     ## other attached packages:
 94 |     ## [1] SeuratObject_4.1.3 Seurat_4.3.0      
 95 |     ## 
 96 |     ## loaded via a namespace (and not attached):
 97 |     ##   [1] Rtsne_0.16             colorspace_2.0-3       deldir_1.0-6          
 98 |     ##   [4] ellipsis_0.3.2         ggridges_0.5.4         rstudioapi_0.14       
 99 |     ##   [7] spatstat.data_3.0-0    leiden_0.4.3           listenv_0.8.0         
100 |     ##  [10] bit64_4.0.5            ggrepel_0.9.2          fansi_1.0.3           
101 |     ##  [13] codetools_0.2-18       splines_4.2.1          knitr_1.40            
102 |     ##  [16] polyclip_1.10-4        jsonlite_1.8.3         ica_1.0-3             
103 |     ##  [19] cluster_2.1.3          png_0.1-7              uwot_0.1.14           
104 |     ##  [22] shiny_1.7.3            sctransform_0.3.5      spatstat.sparse_3.0-0 
105 |     ##  [25] compiler_4.2.1         httr_1.4.4             assertthat_0.2.1      
106 |     ##  [28] Matrix_1.5-3           fastmap_1.1.0          lazyeval_0.2.2        
107 |     ##  [31] cli_3.3.0              later_1.3.0            htmltools_0.5.2       
108 |     ##  [34] tools_4.2.1            igraph_1.3.5           gtable_0.3.1          
109 |     ##  [37] glue_1.6.2             RANN_2.6.1             reshape2_1.4.4        
110 |     ##  [40] dplyr_1.0.10           Rcpp_1.0.9             scattermore_0.8       
111 |     ##  [43] vctrs_0.5.1            nlme_3.1-157           spatstat.explore_3.0-5
112 |     ##  [46] progressr_0.11.0       lmtest_0.9-40          spatstat.random_3.0-1 
113 |     ##  [49] xfun_0.31              stringr_1.4.1          globals_0.16.2        
114 |     ##  [52] mime_0.12              miniUI_0.1.1.1         lifecycle_1.0.3       
115 |     ##  [55] irlba_2.3.5.1          goftest_1.2-3          future_1.29.0         
116 |     ##  [58] MASS_7.3-57            zoo_1.8-11             scales_1.2.1          
117 |     ##  [61] promises_1.2.0.1       spatstat.utils_3.0-1   parallel_4.2.1        
118 |     ##  [64] RColorBrewer_1.1-3     yaml_2.3.5             reticulate_1.26       
119 |     ##  [67] pbapply_1.6-0          gridExtra_2.3          ggplot2_3.4.0         
120 |     ##  [70] stringi_1.7.8          rlang_1.0.6            pkgconfig_2.0.3       
121 |     ##  [73] matrixStats_0.63.0     evaluate_0.18          lattice_0.20-45       
122 |     ##  [76] ROCR_1.0-11            purrr_0.3.5            tensor_1.5            
123 |     ##  [79] patchwork_1.1.2        htmlwidgets_1.5.4      bit_4.0.5             
124 |     ##  [82] cowplot_1.1.1          tidyselect_1.2.0       parallelly_1.32.1     
125 |     ##  [85] RcppAnnoy_0.0.20       plyr_1.8.8             magrittr_2.0.3        
126 |     ##  [88] R6_2.5.1               generics_0.1.3         DBI_1.1.3             
127 |     ##  [91] pillar_1.8.1           fitdistrplus_1.1-8     survival_3.3-1        
128 |     ##  [94] abind_1.4-5            sp_1.5-1               tibble_3.1.8          
129 |     ##  [97] future.apply_1.10.0    hdf5r_1.3.7            KernSmooth_2.23-20    
130 |     ## [100] utf8_1.2.2             spatstat.geom_3.0-3    plotly_4.10.1         
131 |     ## [103] rmarkdown_2.18         grid_4.2.1             data.table_1.14.6     
132 |     ## [106] digest_0.6.29          xtable_1.8-4           tidyr_1.2.1           
133 |     ## [109] httpuv_1.6.6           munsell_0.5.0          viridisLite_0.4.1
134 | 


--------------------------------------------------------------------------------
/FigureScripts/Seurat/filtered_feature_bc_matrix.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Seurat/filtered_feature_bc_matrix.h5


--------------------------------------------------------------------------------
/FigureScripts/Seurat/spatial/H2_1_tissue_hires_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/FigureScripts/Seurat/spatial/H2_1_tissue_hires_image.png


--------------------------------------------------------------------------------
/FigureScripts/Seurat/spatial/scalefactors_json.json:
--------------------------------------------------------------------------------
1 | {"spot_diameter_fullres": 113.3410085152946, "tissue_hires_scalef": 0.12641425, "fiducial_diameter_fullres": 183.08932144778362, "tissue_lowres_scalef": 0.03792428}


--------------------------------------------------------------------------------
/FigureScripts/SpotLevelCloneCalls/Figure2/H1_2_Clones.csv:
--------------------------------------------------------------------------------
  1 | ﻿Barcode,CloneNames
  2 | AGATTCACAACCGATA-1,FigureClone_K
  3 | ACTCCCATTCCTAAAG-1,FigureClone_K
  4 | GAAGCTCGGACCCGTC-1,FigureClone_K
  5 | AGATGCAAGACGTGCA-1,FigureClone_K
  6 | TTAAGGCCCGTACTTT-1,FigureClone_K
  7 | TTGGTTGCGGTGCGCG-1,FigureClone_K
  8 | CCGTATCTCGTCGTAG-1,FigureClone_K
  9 | CTCATGGTAATTTGCG-1,FigureClone_K
 10 | AGAAGGTACACTTCAC-1,FigureClone_K
 11 | GCAACACACTAGAACT-1,FigureClone_K
 12 | AGCGGCGGTTAGCGGT-1,FigureClone_K
 13 | CTACTCAAGGTATAGT-1,FigureClone_K
 14 | GCGCAAATATATTCAA-1,FigureClone_K
 15 | TTGTGGTAGGAGGGAT-1,FigureClone_K
 16 | ATACGGAACGTCGTTT-1,FigureClone_K
 17 | CCCGTAGCTGGGAAGA-1,FigureClone_K
 18 | CTGGGTTGAGTTAAAG-1,FigureClone_K
 19 | TTGTTTCACATCCAGG-1,FigureClone_K
 20 | GTGGAGTCGGCGGTTG-1,FigureClone_E
 21 | GGGCGGCAAATGAATT-1,FigureClone_E
 22 | TAAGGCATAACATCAA-1,FigureClone_E
 23 | CGAACCCGCATGCGTC-1,FigureClone_B
 24 | GGAACCGTGTAAATTG-1,FigureClone_B
 25 | ACAGGTGGAGGTGAGG-1,FigureClone_B
 26 | AGCGACAGGAACGGTC-1,FigureClone_B
 27 | GCCCGCGCGTAAACGG-1,FigureClone_B
 28 | CACCGTTAGGGATCAC-1,FigureClone_B
 29 | AATAACACTAGAACAA-1,FigureClone_B
 30 | TTGATTAGCTGTTTCT-1,FigureClone_B
 31 | CCAGAAAGCAACTCAT-1,FigureClone_B
 32 | CACGTCGGCAACCTCT-1,FigureClone_B
 33 | TACCTACTCCCAGTAT-1,FigureClone_B
 34 | CTCGCCGAATGTAGGG-1,FigureClone_B
 35 | CAGACGAACCTGATAC-1,FigureClone_B
 36 | TTCCGGCCTTGAGGCT-1,FigureClone_B
 37 | GGTGAAGTACAGGGAT-1,FigureClone_B
 38 | ATCACGTGCTAATTAA-1,FigureClone_B
 39 | GGTAGACCGTTGGGCG-1,FigureClone_B
 40 | TCACAGCAAACTCGAA-1,FigureClone_B
 41 | CCGTGTTAAATTCCAT-1,FigureClone_B
 42 | TAGCTAGAAGGCATGA-1,FigureClone_B
 43 | CCAGTCTTGTCATAGA-1,FigureClone_B
 44 | TTACCCTAGGGATTGG-1,FigureClone_B
 45 | AGCGGACACTTCGTAG-1,FigureClone_B
 46 | TGAGTAAATTAGCGTA-1,FigureClone_B
 47 | ATACCTAACCAAGAAA-1,FigureClone_B
 48 | GAAGCCTGCACATTCC-1,FigureClone_B
 49 | TGAGGAGTGCCAGCTT-1,FigureClone_B
 50 | ATACGTTATGCACGGA-1,FigureClone_B
 51 | CGTTGTCGGCAATTGA-1,FigureClone_B
 52 | TACCGTAGGTTAACTA-1,FigureClone_B
 53 | CCAGCTCGAACGCATT-1,FigureClone_B
 54 | AATAGAACAGAGTGGC-1,FigureClone_B
 55 | ACTACGCGTTAGAATT-1,FigureClone_B
 56 | CCTGTACTCACGCCCA-1,FigureClone_B
 57 | CGGTTGACCTGGCATA-1,FigureClone_B
 58 | CTTCCGCTCCGTGAAG-1,FigureClone_B
 59 | GCTAGCTTGAATAGCT-1,FigureClone_B
 60 | CCAAGAAAGTGGGCGA-1,FigureClone_B
 61 | GCGGACCGCGTTGTGG-1,FigureClone_B
 62 | TGGCGACTGCTCCAAA-1,FigureClone_B
 63 | TTACTGGGATATTTCA-1,FigureClone_B
 64 | GAGTAGATACTAGTTG-1,FigureClone_B
 65 | GACATCGATTTATAAC-1,FigureClone_B
 66 | TGTGAGACTAGCCCAA-1,FigureClone_B
 67 | ATAATTAGCTAAGTAG-1,FigureClone_B
 68 | GTCGCCGTTGTGTGTT-1,FigureClone_B
 69 | GCACGCCTACTTAGAT-1,FigureClone_B
 70 | AACTCTCAGTGTGCTC-1,FigureClone_B
 71 | CAATATTCTTGACCTA-1,FigureClone_B
 72 | TAATAGAACAGAGTTA-1,FigureClone_B
 73 | GTCGGGAAGCAGAAAC-1,FigureClone_B
 74 | TTGCGGCATCAGAAAG-1,FigureClone_B
 75 | TAAGTAACATCTTGAC-1,FigureClone_B
 76 | GTGGTATAGTCTGCCG-1,FigureClone_B
 77 | TCGTCAAGTACGCGCA-1,FigureClone_B
 78 | TCAACGCAGGAAATAA-1,FigureClone_B
 79 | TTGACCGTGTTAATGA-1,FigureClone_B
 80 | TGCGAGAATATTACCC-1,FigureClone_B
 81 | CGTTTGTGTAGAGGGT-1,FigureClone_B
 82 | CCGCGGAATGCGTCAC-1,FigureClone_B
 83 | TCGTTGCTATCCGGTC-1,FigureClone_B
 84 | TCTGTTACCCAGCATA-1,FigureClone_B
 85 | ATCAAACGAAGGTTTG-1,FigureClone_B
 86 | CCGCTTACCTCACTCT-1,FigureClone_B
 87 | TTGCGTCGGCCAACCG-1,FigureClone_B
 88 | CTAACTGGTCCGGTTC-1,FigureClone_B
 89 | TCGTTAGGAGTCCCTA-1,FigureClone_B
 90 | CAGCGATTCCCTTCAA-1,FigureClone_B
 91 | CAGAGGCGATGCATGA-1,FigureClone_B
 92 | CATTTGAGTGGTACGT-1,FigureClone_B
 93 | CAGACACCGATCGCTG-1,FigureClone_B
 94 | TTCCACACAGATTTGA-1,FigureClone_B
 95 | AGGCTTCCCGAAGAAG-1,FigureClone_B
 96 | CATAGCGTTGCCCACC-1,FigureClone_B
 97 | CCTATACCGTCCTGTC-1,FigureClone_B
 98 | ATCCAGAGCAACAACC-1,FigureClone_B
 99 | CTGCTGAGGCCACGAA-1,FigureClone_B
100 | CACCCGGTTTGTGACT-1,FigureClone_B
101 | AAACCGTTCGTCCAGG-1,FigureClone_B
102 | GTAATCTGATTCTTCG-1,FigureClone_B
103 | CTTCTATTAATGCTAG-1,FigureClone_B
104 | TGATACATTTAGCCGT-1,FigureClone_B
105 | AAATTTGCGGGTGTGG-1,FigureClone_B
106 | ACCCTATGCCATATCG-1,FigureClone_B
107 | GCTGTATTACTGGCCC-1,FigureClone_B
108 | TATTCCTCCGCCCACT-1,FigureClone_B
109 | CCGGTTTGTAATTGTG-1,FigureClone_B
110 | GATCCTCGACACTGGC-1,FigureClone_B
111 | TAGATATGGACTGGAA-1,FigureClone_B
112 | GAGTATGCCCGCCTTG-1,FigureClone_B
113 | TACTGAACAGATTTAG-1,FigureClone_B
114 | CGGTGCGCGTTGGTCC-1,FigureClone_B
115 | CCACCAACTTTACTGT-1,FigureClone_B
116 | CCTACATTCACAGACG-1,FigureClone_B
117 | TCGAGCCAGGCAGGCC-1,FigureClone_B
118 | GTAGAGGGAGACAAGT-1,FigureClone_B
119 | CCGAACACTGGGCCTC-1,FigureClone_B
120 | GGGCAGAGCAATCGTT-1,FigureClone_B
121 | TATTAACCTGACCGCG-1,FigureClone_B
122 | ACCTAAGTACCTTTCA-1,FigureClone_B
123 | GTGGACCAACCCGATT-1,FigureClone_B
124 | TCTAGCAATCTCCGCC-1,FigureClone_B
125 | AAATCGTGTACCACAA-1,FigureClone_B
126 | CATAGTCCACAAGAAC-1,FigureClone_B
127 | GGTTACCACCCTCGGG-1,FigureClone_B
128 | TTGCACAATTCAGAAA-1,FigureClone_B
129 | TGGCAGATTACGATCA-1,FigureClone_B
130 | GAGTAAGGCCACGGGA-1,FigureClone_A
131 | GCAGGAACTTAGATCT-1,FigureClone_A
132 | CAGTACCAGTTTACGT-1,FigureClone_A
133 | AGGATCACGCGATCTG-1,FigureClone_A
134 | CGGAAAGAATCAAACG-1,FigureClone_A
135 | CGACCCTTAACGCCGG-1,FigureClone_A
136 | CCCAGTAAACTTGGGA-1,FigureClone_A
137 | CACCCTTGGTGAGACC-1,FigureClone_A
138 | TCGGTCCCGACAATAG-1,FigureClone_A
139 | CCGACGGGCATGAGGT-1,FigureClone_A
140 | TGGCCAATTTGGTACT-1,FigureClone_A
141 | CTTCAGTGGTCGCCTA-1,FigureClone_A
142 | GACCCAATTATGATAC-1,FigureClone_A
143 | GGATCTTGACTCAACC-1,FigureClone_A
144 | AGCCTAATACCCACGT-1,FigureClone_A
145 | GTTAGGCTACCCGTTT-1,FigureClone_A
146 | AGCAACCGAAAGTAAT-1,FigureClone_A
147 | CAGATCCTGGTTTGAA-1,FigureClone_A
148 | TCAACAAAGATAATTC-1,FigureClone_A
149 | TTGAATCGTTGTATAA-1,FigureClone_A
150 | CTATGTGAGTCACGGC-1,FigureClone_A
151 | AATCTAGGTTTACTTG-1,FigureClone_A
152 | CCTAGGTAAAGGTAGC-1,FigureClone_A
153 | AGTATGCTGGAGACCA-1,FigureClone_A
154 | GCTGAATCTTCCAATC-1,FigureClone_A
155 | AGAAGTGATTCGTGAT-1,FigureClone_A
156 | CTCTCTAACTGCCTAG-1,FigureClone_A
157 | GGGCAACCGCACGTGC-1,FigureClone_A
158 | GCCCTAGCCGTCGCGA-1,FigureClone_A
159 | TATCTTGCAATACAAC-1,FigureClone_A
160 | TGCCAAAGTCAGACTT-1,FigureClone_A
161 | CCGGCGTGAGACTCTG-1,FigureClone_A
162 | AATCTGGCTTTCTAGT-1,FigureClone_A
163 | TCGGCGTACTGCACAA-1,FigureClone_A
164 | AAACAGGGTCTATATT-1,FigureClone_A
165 | TCCTCCTAAGACATTC-1,FigureClone_A
166 | TACCTTAAGATTTCCC-1,FigureClone_A
167 | AAATGGCCCGTGCCCT-1,FigureClone_A
168 | CATATGTCAGGCTACG-1,FigureClone_A
169 | TACTCTTTCGTCTTCA-1,FigureClone_A
170 | CGAACGGCCGGACAAC-1,FigureClone_A
171 | CTGGACGCAGTCCGGC-1,FigureClone_A
172 | TACTTTCCGCACGCCA-1,FigureClone_A
173 | ATCAGCTCGTCCACTA-1,FigureClone_A
174 | AGGGCGAGCAGCTGAT-1,FigureClone_A
175 | TTAGGTCATAACCGAC-1,FigureClone_A
176 | CAAAGATTATTGGGCC-1,FigureClone_A
177 | AATGTGCCCGAGGTGT-1,FigureClone_A
178 | TTGCCATAGCCCGCTC-1,FigureClone_A
179 | TAACATACACGCGATC-1,FigureClone_A
180 | CGAGTGAAGGTACCAG-1,FigureClone_A
181 | AGTCGGCTCAACTTTA-1,FigureClone_A
182 | CTCGGTTGTCGGCCCT-1,FigureClone_A
183 | AACACGAGACGCGGCC-1,FigureClone_A
184 | GTATCTCAGTCTTGAC-1,FigureClone_A
185 | ATGTGAAAGCCTAATG-1,FigureClone_A
186 | AGTCTCACAAGACTAC-1,FigureClone_A
187 | AAATTGATAGTCCTTT-1,FigureClone_A
188 | TCATCGATGGTCCCAA-1,FigureClone_A
189 | TCTAATACTGCCTCAG-1,FigureClone_A
190 | GGTAACCGGGAGGATA-1,FigureClone_A
191 | ACCTGCGTGTCATGTT-1,FigureClone_A
192 | CCTTTAAGGGAGCACT-1,FigureClone_A
193 | ACGGGAGTGTCGGCCC-1,FigureClone_A
194 | TTCTTGGACGATCTGC-1,FigureClone_A
195 | TCAACATAGCGCCCTA-1,FigureClone_A
196 | TAAGTCGCCGAGTATC-1,FigureClone_A
197 | AGACGACGATGCCGCT-1,FigureClone_A
198 | AATGACAGCAATGTCT-1,FigureClone_A
199 | AAACTTGCAAACGTAT-1,FigureClone_A
200 | CCACGGAGCCATAAGA-1,FigureClone_A
201 | TTAACTTCAGGTAGGA-1,FigureClone_A
202 | CAGGCGCACGGTGGTC-1,FigureClone_A
203 | CGATCTGTTGGAGGAC-1,FigureClone_A
204 | TTGTTTCCATACAACT-1,FigureClone_A
205 | AGTTTGGCCAGACCTA-1,FigureClone_A
206 | CACCGTTGCGCGATAT-1,FigureClone_A
207 | CGCGCAAATGTCCAGA-1,FigureClone_A
208 | CTTCTATGTTGAAGTA-1,FigureClone_A
209 | AGCTGTAACCTCAATC-1,FigureClone_A
210 | ATGACGCGTTCTATCC-1,FigureClone_A
211 | AGGTCAGGTGAGAGTG-1,FigureClone_A
212 | ATTTGCGCGAGTAGCT-1,FigureClone_A
213 | 


--------------------------------------------------------------------------------
/FigureScripts/SpotLevelCloneCalls/Figure2/H2_2_Clones.csv:
--------------------------------------------------------------------------------
 1 | ﻿Barcode,CloneNames
 2 | GCAGAAGGTAATCTCC-1,FigureClone_F
 3 | ACTAGTTGCGATCGTC-1,FigureClone_C
 4 | ATTACATGTCAGTCTT-1,FigureClone_C
 5 | TTGGACCATCTGGCAA-1,FigureClone_C
 6 | TTGGGACGTAAGAGTT-1,FigureClone_C
 7 | CCCTCCTCGCTCGTAT-1,FigureClone_C
 8 | CTTCGGCCAATTGTTT-1,FigureClone_C
 9 | TTCGGTGGAGACGCCC-1,FigureClone_B
10 | CTTTGCTGTCATGGAT-1,FigureClone_B
11 | TACTGTTTCTCTGGTA-1,FigureClone_B
12 | AGACCGCTCCGCGGTT-1,FigureClone_B
13 | CCGATATGACGTAAGG-1,FigureClone_B
14 | AAACCGGAAATGTTAA-1,FigureClone_B
15 | AGACAGGCATCTCAGC-1,FigureClone_B
16 | GGTGTTGGGCGTCTTA-1,FigureClone_B
17 | GGACTCACAAATTAGG-1,FigureClone_B
18 | TGTGGTAGGGTGCCTT-1,FigureClone_B
19 | CGTGGAAGCCTCGTAC-1,FigureClone_B
20 | CTGCTGTCTAACGAGC-1,FigureClone_B
21 | ATTGTTCAACGATCCG-1,FigureClone_B
22 | GTGCAGCGTAGAGTAG-1,FigureClone_B
23 | 


--------------------------------------------------------------------------------
/Images/KTH_Logotyp_PMS_2013.eps:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/Images/KTH_Logotyp_PMS_2013.eps


--------------------------------------------------------------------------------
/Images/primary-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/Images/primary-logo.png


--------------------------------------------------------------------------------
/Images/secondary-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/Images/secondary-logo.png


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | exportPattern("^[[:alpha:]]+")
2 | import(tidyverse, infercnv, Seurat, hdf5r, phylogram, ape)
3 | 


--------------------------------------------------------------------------------
/R/ExtractSectionWise.R:
--------------------------------------------------------------------------------
 1 | #' Obtaining a thresholded dataframe as part of spatial visualization of spatial transcriptomics data.
 2 | #'
 3 | #' ExtractSectionWise()
 4 | #' 
 5 | #' @param SectionName A character string for section name.
 6 | #' @param CNV_Genes_Organscale_Input A dataframe, mirroring the structure of infercnv::run output file 17_HMM_predHMMi6.hmm_mode-cells.pred_cnv_genes.dat
 7 | #' @param AllBarcodes A dataframe of barcodes and annotations.
 8 | #' @param Threshold A numerical value for sectionwise thresholding of the number of genes to pass: integer values from 0-100.
 9 | #' 
10 | #' @return A dataframe of ST counts, that have passed QC and are selected.
11 | #' 
12 | #' @examples
13 | #' ExtractSectionWise("H2_1", CNV_Genes_Filtered, AllBarcodes, 0.45)
14 |  
15 |  
16 | ExtractSectionWise <- function(SectionName, CNV_Genes_Organscale_Input, AllBarcodes, Threshold) {
17 |   output <- CNV_Genes_Organscale_Input %>%
18 |     filter(section == paste0(SectionName)) %>%
19 |     select(-section)
20 |   Counted <- output %>% group_by(gene) %>% tally()
21 |   sectionbarcodes <- AllBarcodes %>%
22 |     filter(Histology == paste0(SectionName))
23 |   MaxLength <- as.numeric(nrow(sectionbarcodes))
24 |   CountPercentageThreshold <- round(Threshold * MaxLength,0)
25 |   CountedThresholded <- Counted %>%
26 |     filter(n > CountPercentageThreshold)
27 |   CNV_Genes_Filtered <- inner_join(output, CountedThresholded)
28 |   CNVs <- CNV_Genes_Filtered
29 |   CNVsGenes_Counted <- CNVs %>% group_by(cell_group_name) %>% tally()
30 |   names(CNVsGenes_Counted)[1] <- "Barcode"
31 |   names(CNVsGenes_Counted)[2] <- "PercentageGenomeAltered"
32 |   return(CNVsGenes_Counted)
33 | }
34 | 


--------------------------------------------------------------------------------
/R/FinalAnnotations.R:
--------------------------------------------------------------------------------
 1 | #' Creating A finalized annotation dataframe containing only barcodes in the count file.  
 2 | #'
 3 | #' FinalAnnotations()
 4 | #' 
 5 | #' @param InputOriginalAnnotationFile A dataframe of barcodes selected for analysis
 6 | #' @param InputCounts A joined count dataframe, of barcodes selected for analysis AND has passed QC (counts per spot >= 500 counts)
 7 | #' @return A finalized annotation dataframe containing only barcodes in the count file.
 8 | #' @examples
 9 | #' SelectingSubTreeData(my.subtrees, 4617)
10 | #' FinalAnnotations(MergedAll, Counts_joined)
11 | 
12 | FinalAnnotations <- function(InputOriginalAnnotationFile, InputCounts) {
13 |   input <- InputCounts
14 |   input <- as.data.frame(input[1,])
15 |   input <- as.data.frame(t(input))
16 |   input <- rownames_to_column(input, var = "Barcode")
17 |   input <- as.data.frame(input[,1])
18 |   names(input)[1] <- "Barcode"
19 |   input <- right_join(InputOriginalAnnotationFile, input)
20 |   return(input)
21 | }
22 | 


--------------------------------------------------------------------------------
/R/ImportCountData.R:
--------------------------------------------------------------------------------
 1 | #' Importing Visium spatial transcriptomics count data from filtered_feature_bc_matrix.h5 file (output from SpaceRanger pipeline) and appending section name to barcodes
 2 | #'
 3 | #' ImportCountData()
 4 | #' 
 5 | #' @param SectionName A character string for section name.
 6 | #' @param InputCountFile A file path to a filtered_feature_bc_matrix.h5 file (output from 10X Genomics SpaceRanger pipeline)
 7 | #' @return A dataframe of counts with appended section names
 8 | #' @examples
 9 | #' ImportCountData("H2_1", "./filtered_feature_bc_matrix.h5")
10 | 
11 | ImportCountData <- function(SectionName, InputCountFile) {
12 |   input <- Read10X_h5(InputCountFile, use.names = FALSE)
13 |   input <- as.matrix(input)
14 |   input <- as.data.frame(t(input))
15 |   input <- rownames_to_column(input, "Barcode")
16 |   input$Barcode <- paste0(SectionName, "_", input$Barcode)
17 |   input$Barcode <- gsub("\\-", "\\.", input$Barcode)
18 |   return(input)
19 | }
20 | 


--------------------------------------------------------------------------------
/R/ImportHistologicalAnnotations.R:
--------------------------------------------------------------------------------
 1 | #' Importing histological annotations of Visium barcodes and appending a section name to the barcodes. 
 2 | #' 
 3 | #' the LoupeBrower.
 4 | #' ImportHistologicalAnnotations()
 5 | #' 
 6 | #' @param SectionName A character string for section name.
 7 | #' @param InputAnnotationFile A file path to a .csv file, with annotations (for example, output from LoupeBrowser after manual annotations)
 8 | #' @return A dataframe of barcodes with appended section names
 9 | #' @examples
10 | #' ImportHistologicalAnnotations("H1_2", "./H1_2_Final_Consensus_Annotations.csv")
11 | 
12 | ImportHistologicalAnnotations <- function(SectionName, InputAnnotationFile) {
13 |   input <- read.csv(paste0(InputAnnotationFile))
14 |   names(input)[2] <- "Histology"
15 |   input <- input %>%
16 |     mutate(Barcode = str_replace_all(Barcode, "-", "."))
17 |   input$Barcode <- paste0(SectionName, "_", input$Barcode)
18 |   return(input)
19 | }
20 | 


--------------------------------------------------------------------------------
/R/ImportHistologicalOriginalSTSelections.R:
--------------------------------------------------------------------------------
 1 | #' Importing spatial transcriptomics, 1k array selected spot file data and append section names to the barcodes.
 2 | #'
 3 | #' ImportHistologicalOriginalSTSelections()
 4 | #' 
 5 | #' @param SectionName A character string for section name.
 6 | #' @param InputAnnotationFile A file path to a .tsv file
 7 | #' @return A dataframe of barcodes with appended section names
 8 | #' @examples
 9 | #' ImportHistologicalOriginalSTSelections("H2_1", "./Patient 1/1k_arrays/H2_1/spot_data-selection-180903_L11_CN63_D1_P_H2.1_CY3_EB_aligned.tsv")
10 | 
11 | ImportHistologicalOriginalSTSelections <- function(SectionName, InputAnnotationFile) {
12 |   input <- read.delim(paste0(InputAnnotationFile), sep = "\t")
13 |   input <- input %>% select(x, y)
14 |   input$Barcode <- paste0(SectionName, "_",input$x, "x", input$y)
15 |   input <- input %>% select(Barcode)
16 |   return(input)
17 | }
18 | 


--------------------------------------------------------------------------------
/R/ImportOriginalSTCountData.R:
--------------------------------------------------------------------------------
 1 | #' Importing spatial transcriptomics, 1k array count data and append section names to the barcodes.
 2 | #'
 3 | #' ImportOriginalSTCountData()
 4 | #'
 5 | #' @param SectionName A character string for section name.
 6 | #' @param InputCountFile A file path to a .tsv file
 7 | #' @return A dataframe of count data, having barcodes with appended section names
 8 | #' @examples
 9 | #' ImportOriginalSTCountData("H2_1", "./Patient 1/1k_arrays/H2_1/180903_L11_CN63_D1_H2.1_EB_stdata.tsv")
10 | 
11 | ImportOriginalSTCountData <- function(SectionName, InputCountFile) {
12 |   input <- as.data.frame(read.delim(InputCountFile, row.names = 1))
13 |   input <- rownames_to_column(input)
14 |   input$rowname <- paste0(SectionName, "_", input$rowname)
15 |   names(input)[1] <- "Barcode"
16 |   return(input)
17 | }
18 | 


--------------------------------------------------------------------------------
/R/MergingCountAndAnnotationData.R:
--------------------------------------------------------------------------------
 1 | #' Merging Visium spatial transciptomics count and annotation data, as well as applying a QC filter to only include spots with >= 500 counts
 2 | #'
 3 | #' MergingCountAndAnnotationData()
 4 | #' 
 5 | #' @param SectionName A character string for section name.
 6 | #' @param InputAnnotationFile An annotation file containing all barcodes to be used in the analysis (bound dataframe of one or more outputs from ImportHistologicalAnnotations())
 7 | #' @param InputCountFile A dataframe of Visium count data (output from ImportCountData())
 8 | #' @return A dataframe of barcodes with appended section names that have passed QC
 9 | #' @examples
10 | #' MergingCountAndAnnotationData("H2_1",MergedAll, H2_1_ENSBMLID_Counts)
11 | 
12 | MergingCountAndAnnotationData <- function(SectionName, InputAnnotationFile, InputCountFile) {
13 |   formerge <- select(InputAnnotationFile, -Histology)
14 |   MergedAnnotationsandCounts <- inner_join(formerge, InputCountFile)
15 |   MergedAnnotationsandCounts <- remove_rownames(MergedAnnotationsandCounts)
16 |   MergedAnnotationsandCounts <- column_to_rownames(MergedAnnotationsandCounts, "Barcode")
17 |   MergedAnnotationsandCounts$Total <- rowSums(MergedAnnotationsandCounts)
18 |   MergedAnnotationsandCounts <- MergedAnnotationsandCounts %>% filter(Total >= 500)
19 |   MergedAnnotationsandCounts <- select(MergedAnnotationsandCounts, -Total)
20 |   MergedAnnotationsandCounts <- as.data.frame(t(MergedAnnotationsandCounts))
21 |   MergedAnnotationsandCounts <- MergedAnnotationsandCounts[,colSums(is.na(MergedAnnotationsandCounts))<nrow(MergedAnnotationsandCounts)]
22 |   MergedAnnotationsandCounts <- tibble::rownames_to_column(MergedAnnotationsandCounts, "Genes")
23 |   return(MergedAnnotationsandCounts)
24 | }
25 | 


--------------------------------------------------------------------------------
/R/OriginalST_MergingCountAndAnnotationData.R:
--------------------------------------------------------------------------------
 1 | #' Merging spatial transcriptomics, 1k array count files and barcodes, an apply a QC metric to only select
 2 | #' ST spots with >=500 total unique molecular identifiers.
 3 | #'
 4 | #' OriginalST_MergingCountAndAnnotationData()
 5 | #' 
 6 | #' @param InputAnnotationFile An annotation file created by ImportHistologicalOriginalSTSelections()
 7 | #' @param InputCountFile A ST count file created by ImportOriginalSTCountData()
 8 | #' @return A dataframe of ST counts, that have passed QC and are selected.
 9 | #' @examples
10 | #' OriginalST_MergingCountAndAnnotationData(Barcodes_H2_1, Counts_H2.1)
11 | 
12 | OriginalST_MergingCountAndAnnotationData <- function(InputAnnotationFile, InputCountFile) {
13 |   formerge <- select(InputAnnotationFile, -Histology)
14 |   MergedAnnotationsandCounts <- inner_join(formerge, InputCountFile)
15 |   MergedAnnotationsandCounts <- remove_rownames(MergedAnnotationsandCounts)
16 |   MergedAnnotationsandCounts <- column_to_rownames(MergedAnnotationsandCounts, "Barcode")
17 |   MergedAnnotationsandCounts$Total <- rowSums(MergedAnnotationsandCounts)
18 |   MergedAnnotationsandCounts <- MergedAnnotationsandCounts %>% filter(Total >= 500)
19 |   MergedAnnotationsandCounts <- select(MergedAnnotationsandCounts, -Total)
20 |   MergedAnnotationsandCounts <- as.data.frame(t(MergedAnnotationsandCounts))
21 |   if(length(MergedAnnotationsandCounts) == 1){
22 |     MergedAnnotationsandCounts <- tibble::rownames_to_column(MergedAnnotationsandCounts, "Genes")
23 |     return(MergedAnnotationsandCounts)
24 |   } else {
25 |     MergedAnnotationsandCounts <- MergedAnnotationsandCounts[,colSums(is.na(MergedAnnotationsandCounts))<nrow(MergedAnnotationsandCounts)]
26 |     MergedAnnotationsandCounts <- tibble::rownames_to_column(MergedAnnotationsandCounts, "Genes")
27 |     return(MergedAnnotationsandCounts)
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/R/Output_PGA_Visualization_MatrixGreyNA.R:
--------------------------------------------------------------------------------
 1 | #' Preparing a matrix for spatial visualization of number of genes with an inferred CNV, derived from spatial transriptomics data.
 2 | #'
 3 | #' Output_PGA_Visualization_MatrixGreyNA()
 4 | #' 
 5 | #' @param SectionName A character string for section name.
 6 | #' @param InputCNVs An input dataframe created by the function ExtractSectionWise()
 7 | #' @param BarcodesFile A single column dataframe comprised of a list of barcode coordinates in the form AxB, where A = the X coordinate, and B = the Y coordinate.
 8 | #' 
 9 | #' @return A dataframe for spatial visualization by Plot_PGA_Visualization_Matrix()
10 | #' @examples
11 | #' Output_PGA_Visualization_MatrixGreyNA("H2_1", H2_1_Sectionwise_CNVsGenes_Counted, L2_Barcodes)
12 | 
13 | Output_PGA_Visualization_MatrixGreyNA <- function(SectionName, InputCNVs, BarcodesFile) {
14 |   input <- InputCNVs %>% extract(Barcode, c("Barcode", "XY"), "(.*)_([^_]+)") %>% select(-Barcode)
15 |   names(input)[1] <- "Barcode"
16 |   PGA_Visualization_Matrix <- left_join(BarcodesFile, input)
17 |   PGA_Visualization_Matrix$x <- as.numeric(sub('.*x', '', PGA_Visualization_Matrix$Barcode))
18 |   PGA_Visualization_Matrix$y <- as.numeric(sub('x.*', '', PGA_Visualization_Matrix$Barcode))
19 |   PGA_Visualization_Matrix <- PGA_Visualization_Matrix %>%
20 |     select(x, y, PercentageGenomeAltered) %>%
21 |     arrange(x, y)
22 |   return(PGA_Visualization_Matrix)
23 | }


--------------------------------------------------------------------------------
/R/Plot_PGA_Visualization_Matrix().R:
--------------------------------------------------------------------------------
 1 | #' Plotting the spatial distribution of genes with an inferred copy number alteration from an underlying matrix
 2 | #'
 3 | #' Plot_PGA_Visualization_Matrix()
 4 | #' 
 5 | #' @param SectionName A character string for section name.
 6 | #' @param InputMatrix An input matrix created by the function Output_PGA_Visualization_MatrixGreyNA()
 7 | #' @param MaxValInput An upper threshold for plotting, derived from the maximum sectionwise value of the number of inferred genes with a CNV (from ExtractSectionWise()) 
 8 | #' 
 9 | #' @return An output spatial visualization of the number of genes with an inferred CNV from 1k array spatial transcriptomics data.
10 | #' @examples
11 | #' Plot_PGA_Visualization_Matrix("H2_1", PGA_Matrix, MaxVal)
12 | 
13 | Plot_PGA_Visualization_Matrix <- function(SectionName, InputMatrix, MaxValInput) {
14 |   ggplot(InputMatrix, aes(x = x, y = y)) +
15 |     geom_raster(aes(fill=PercentageGenomeAltered)) +
16 |     scale_fill_gradient(limits = c(0, MaxValInput), low="blue", high="yellow", na.value = "grey50") +
17 |     labs(x="X-coord", y="Y-coord") +
18 |     theme_bw() + theme(axis.text.x=element_text(size=9, angle=0, vjust=0.3),
19 |                        axis.text.y=element_text(size=9),
20 |                        plot.title=element_text(size=11)) +
21 |     theme(panel.border = element_blank(),
22 |           panel.grid.major = element_blank(),
23 |           panel.grid.minor = element_blank(),
24 |           line = element_blank(),
25 |           title = element_blank(),
26 |           axis.text.x=element_blank(),
27 |           axis.text.y=element_blank(),
28 |           axis.ticks=element_blank(),
29 |           legend.position = "none",
30 |           plot.margin=grid::unit(c(0,0,0,0), "mm"))
31 | }
32 | 


--------------------------------------------------------------------------------
/R/SelectingSubTreeData.R:
--------------------------------------------------------------------------------
 1 | #' Selecting Subtree Data for Node Selection: this selects a number of barcoded spots from a inferCNV dendrogram object for further analysis.
 2 | #'
 3 | #' SelectingSubTreeData()
 4 | #' 
 5 | #' @param SubtreeObject A dendrogram, phylo object created by subtrees(as.phylo([dendogram.txt]))
 6 | #' @param NodeOfInterest A numerical integer corresponding to a phylogram/dendogram node of interest
 7 | #' @return A specific subtree node
 8 | #' @examples
 9 | #' SelectingSubTreeData(my.subtrees, 4617)
10 | 
11 | SelectingSubTreeData <- function(SubtreeObject, NodeOfInterest) {
12 |   tree_node <- SubtreeObject[[NodeOfInterest]]
13 |   output <- tree_node$tip.label
14 |   output <- as.data.frame(output)
15 |   output <- output %>%
16 |     mutate(Node = paste0("Node_", NodeOfInterest))
17 |   names(output)[1] <- "Barcode"
18 |   return(output)
19 | }
20 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Walkthrough of Clone Calling from Prostate Cancer Visium Spatial Transcriptomics
  3 |   Data with InferCNV
  4 | author: "Andrew Erickson, Nuffield Department of Surgical Sciences, Unviersity of Oxford"
  5 | output:
  6 |   md_document:
  7 |     variant: markdown_github
  8 | ---
  9 | 
 10 | # siCNV: Spatial InferCNV from Spatial Transcriptomics Data
 11 | 
 12 | Spatially resolved transcriptomics has emerged as a genome-wide analysis of gene expression to explore tissues in an unsupervised manner. In this study we infer genome-wide copy-number variations (CNV) from spatially resolved mRNA profiles in situ. Gene expression has [previously been used to infer CNVs](https://github.com/broadinstitute/infercnv) in single cells, successfully identifying regions of chromosomal gain and loss. Here we expand into a spatial modality, generating CNV calls in each spatial region represented by barcoded spots.
 13 | 
 14 | We provide a R package via this github page, as well as [scripts to reproduce the main figures](https://github.com/aerickso/SpatialInferCNV/tree/main/FigureScripts) in the manuscript.
 15 | 
 16 | This code was tested using [R](https://www.r-project.org/) version 4.1.3, a Windows 11 Computer, 32GB RAM, and 12 CPUs (1.6 GHz). 
 17 | 
 18 | For timely data-analyses of datasets comprising 2 or more Visium sections, consider use of a high performance cluster. In our project, the infercnv::run analyses steps were ran on the [BMRC](https://www.medsci.ox.ac.uk/divisional-services/support-services-1/bmrc/cluster-usage), with 10-20 CPUs, each 1.6 GHz and 16GB ram. 
 19 | 
 20 | # System level dependency (for the hdf5r package)
 21 | 
 22 | SpatialInferCNV has HDF5 as a system level dependency which needs to be installed before installing the `hd5fr` R package. See here for details: https://github.com/hhoeflin/hdf5r#Requirements. For Windows users, you can download the windows version here: https://github.com/mannau/h5-libwin.
 23 | 
 24 | # Installation of SpatialInferCNV Dependencies - R
 25 | 
 26 | ```{r, eval = FALSE}
 27 | install.packages("devtools")
 28 | if (!requireNamespace("BiocManager", quietly = TRUE))
 29 |      install.packages("BiocManager")
 30 | BiocManager::install("infercnv")
 31 | install.packages("tidyverse")
 32 | install.packages("Seurat")
 33 | install.packages("phylogram")
 34 | install.packages("ape")
 35 | install.packages("hdf5r")
 36 | ```
 37 | 
 38 | # Installation 
 39 | 
 40 | ```{r, eval = FALSE}
 41 | install.packages("devtools")
 42 | library(devtools)
 43 | install_github("aerickso/SpatialInferCNV")
 44 | library(SpatialInferCNV)
 45 | ```
 46 | 
 47 | # SpatialInferCNV installation via in a conda environment - MacOS or Linux
 48 | 
 49 | We also provide environment files to set up a conda environment in a MacOSX or Linux environment with all the dependencies necessary for SpatialInferCNV. Installation was tested using MacOS Mojave, Version 10.14.6 and an HPC running Scientific Linux 7.9. If you have anaconda installed, you can create a new environment and activate it by running the code below.
 50 | 
 51 | [environment.yml file](https://github.com/aerickso/SpatialInferCNV/blob/main/environment.yml)
 52 | 
 53 | ```
 54 | conda env create -f environment.yml
 55 | conda activate SpatialInferCNV
 56 | ```
 57 | 
 58 | If you want to run RStudio within this environment you can install it from the terminal with conda:
 59 | 
 60 | ```
 61 | # make sure that the environment is active
 62 | conda install -c r rstudio
 63 | rstudio
 64 | ```
 65 | 
 66 | Once you have created the environment you can install SpatialInferCNV.
 67 | 
 68 | ```{r, eval = FALSE}
 69 | # From R
 70 | install.packages("devtools")
 71 | library(devtools)
 72 | install_github("aerickso/SpatialInferCNV")
 73 | ```
 74 | 
 75 | # SpatialInferCNV installation via in a conda environment - Windows
 76 | 
 77 | This was tested in Anaconda3, conda version 4.12.0, on Windows 11, R version 4.1.3.
 78 | 
 79 | In anaconda3 terminal, create a new conda environment with R 4.1.3.
 80 | 
 81 | ```
 82 | conda config --add channels conda-forge 
 83 | conda create -n siCNV r-base=4.1.3
 84 | # Select yes to install all new packages
 85 | conda activate siCNV
 86 | 
 87 | R
 88 | ```
 89 | Install devtools, and configure the file download method for windows to allow install_githb() to resolve
 90 | 
 91 | ```{r, eval = FALSE}
 92 | install.packages("devtools")
 93 | library(devtools)
 94 | options(download.file.method = "wininet")
 95 | ```
 96 | 
 97 | Installing R dependencies. Note: hdf5r has a system level dependency of hdf5, see above for more details.
 98 | 
 99 | ```{r, eval = FALSE}
100 | if (!requireNamespace("BiocManager", quietly = TRUE))
101 |      install.packages("BiocManager")
102 | BiocManager::install("infercnv")
103 | install.packages("tidyverse")
104 | install.packages("Seurat")
105 | install.packages("phylogram")
106 | install.packages("ape")
107 | install.packages("hdf5r")
108 | # enter a to update/install all
109 | 
110 | library(infercnv)
111 | library(tidyverse)
112 | library(Seurat)
113 | library(phylogram)
114 | library(ape)
115 | library(hdf5r)
116 | ```
117 | 
118 | After installing the R dependencies, install and initialize SpatialInferCNV.
119 | 
120 | ```{r, eval = FALSE}
121 | install_github("aerickso/SpatialInferCNV")
122 | # enter 1 to update all
123 | # yes 
124 | library(SpatialInferCNV)
125 | ```
126 | 
127 | # Userguide
128 | 
129 | The package provides a number of functions, please read the user guide [here](https://aerickso.github.io/SpatialInferCNV/).
130 | 
131 | # Study Data
132 | 
133 | We provide data used in this study at the following [Mendeley Repository](https://data.mendeley.com/v1/datasets/svw96g68dv/draft?a=3f263217-2bd3-4a3c-8125-8c517c3a9e29).
134 | 
135 | 
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 
148 | 
149 | 
150 | 
151 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <img src="https://www.nds.ox.ac.uk/images/logos/secondary-logo" height="75" /> <img src="https://www.nds.ox.ac.uk/images/logos/primary-logo" height="75"/> 
  2 | 
  3 | # siCNV: Spatial InferCNV from Spatial Transcriptomics Data
  4 | 
  5 | Spatially resolved transcriptomics has emerged as a genome-wide analysis
  6 | of gene expression to explore tissues in an unsupervised manner. In this
  7 | study we infer genome-wide copy-number variations (CNV) from spatially
  8 | resolved mRNA profiles in situ. Gene expression has [previously been
  9 | used to infer CNVs](https://github.com/broadinstitute/infercnv) in
 10 | single cells, successfully identifying regions of chromosomal gain and
 11 | loss. Here we expand into a spatial modality, generating CNV calls in
 12 | each spatial region represented by barcoded spots.
 13 | 
 14 | We provide a R package via this github page, as well as [scripts to
 15 | reproduce the main
 16 | figures](https://github.com/aerickso/SpatialInferCNV/tree/main/FigureScripts)
 17 | in the manuscript.
 18 | 
 19 | This code was tested using [R](https://www.r-project.org/) version
 20 | 4.1.3, a Windows 11 Computer, 32GB RAM, and 12 CPUs (1.6 GHz).
 21 | 
 22 | For timely data-analyses of datasets comprising 2 or more Visium
 23 | sections, consider use of a high performance cluster. In our project,
 24 | the infercnv::run analyses steps were ran on the
 25 | [BMRC](https://www.medsci.ox.ac.uk/divisional-services/support-services-1/bmrc/cluster-usage),
 26 | with 10-20 CPUs, each 1.6 GHz and 16GB ram.
 27 | 
 28 | # System level dependency (for the hdf5r package)
 29 | 
 30 | SpatialInferCNV has HDF5 as a system level dependency which needs to be
 31 | installed before installing the `hd5fr` R package. See here for details:
 32 | <https://github.com/hhoeflin/hdf5r#Requirements>. For Windows users, you
 33 | can download the windows version here:
 34 | <https://github.com/mannau/h5-libwin>.
 35 | 
 36 | # Installation of SpatialInferCNV Dependencies - R
 37 | 
 38 | ``` r
 39 | install.packages("devtools")
 40 | if (!requireNamespace("BiocManager", quietly = TRUE))
 41 |      install.packages("BiocManager")
 42 | BiocManager::install("infercnv")
 43 | install.packages("tidyverse")
 44 | install.packages("Seurat")
 45 | install.packages("phylogram")
 46 | install.packages("ape")
 47 | install.packages("hdf5r")
 48 | ```
 49 | 
 50 | # Installation
 51 | 
 52 | ``` r
 53 | install.packages("devtools")
 54 | library(devtools)
 55 | install_github("aerickso/SpatialInferCNV")
 56 | library(SpatialInferCNV)
 57 | ```
 58 | 
 59 | # SpatialInferCNV installation via in a conda environment - MacOS or Linux
 60 | 
 61 | We also provide environment files to set up a conda environment in a
 62 | MacOSX or Linux environment with all the dependencies necessary for
 63 | SpatialInferCNV. Installation was tested using MacOS Mojave, Version
 64 | 10.14.6 and an HPC running Scientific Linux 7.9. If you have anaconda
 65 | installed, you can create a new environment and activate it by running
 66 | the code below.
 67 | 
 68 | [environment.yml
 69 | file](https://github.com/aerickso/SpatialInferCNV/blob/main/environment.yml)
 70 | 
 71 |     conda env create -f environment.yml
 72 |     conda activate SpatialInferCNV
 73 | 
 74 | If you want to run RStudio within this environment you can install it
 75 | from the terminal with conda:
 76 | 
 77 |     # make sure that the environment is active
 78 |     conda install -c r rstudio
 79 |     rstudio
 80 | 
 81 | Once you have created the environment you can install SpatialInferCNV.
 82 | 
 83 | ``` r
 84 | # From R
 85 | install.packages("devtools")
 86 | library(devtools)
 87 | install_github("aerickso/SpatialInferCNV")
 88 | ```
 89 | 
 90 | # SpatialInferCNV installation via in a conda environment - Windows
 91 | 
 92 | This was tested in Anaconda3, conda version 4.12.0, on Windows 11, R
 93 | version 4.1.3.
 94 | 
 95 | In anaconda3 terminal, create a new conda environment with R 4.1.3.
 96 | 
 97 |     conda config --add channels conda-forge 
 98 |     conda create -n siCNV r-base=4.1.3
 99 |     # Select yes to install all new packages
100 |     conda activate siCNV
101 | 
102 |     R
103 | 
104 | Install devtools, and configure the file download method for windows to
105 | allow install_githb() to resolve
106 | 
107 | ``` r
108 | install.packages("devtools")
109 | library(devtools)
110 | options(download.file.method = "wininet")
111 | ```
112 | 
113 | Installing R dependencies. Note: hdf5r has a system level dependency of
114 | hdf5, see above for more details.
115 | 
116 | ``` r
117 | if (!requireNamespace("BiocManager", quietly = TRUE))
118 |      install.packages("BiocManager")
119 | BiocManager::install("infercnv")
120 | install.packages("tidyverse")
121 | install.packages("Seurat")
122 | install.packages("phylogram")
123 | install.packages("ape")
124 | install.packages("hdf5r")
125 | # enter a to update/install all
126 | 
127 | library(infercnv)
128 | library(tidyverse)
129 | library(Seurat)
130 | library(phylogram)
131 | library(ape)
132 | library(hdf5r)
133 | ```
134 | 
135 | After installing the R dependencies, install and initialize
136 | SpatialInferCNV.
137 | 
138 | ``` r
139 | install_github("aerickso/SpatialInferCNV")
140 | # enter 1 to update all
141 | # yes 
142 | library(SpatialInferCNV)
143 | ```
144 | 
145 | # Userguide
146 | 
147 | The package provides a number of functions, please read the user guide
148 | [here](https://aerickso.github.io/SpatialInferCNV/).
149 | 
150 | # Study Data
151 | 
152 | We provide data used in this study at the following [Mendeley
153 | Repository](https://data.mendeley.com/v1/datasets/svw96g68dv/draft?a=3f263217-2bd3-4a3c-8125-8c517c3a9e29).
154 | 


--------------------------------------------------------------------------------
/UserGuide/Images/BC23209_C1_PGA_SpatialVisualization_2022-03-27.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/BC23209_C1_PGA_SpatialVisualization_2022-03-27.png


--------------------------------------------------------------------------------
/UserGuide/Images/BreastCancer10x_forclustering_phylo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/BreastCancer10x_forclustering_phylo.png


--------------------------------------------------------------------------------
/UserGuide/Images/BreastCancer10x_forclustering_phylo_manual.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/BreastCancer10x_forclustering_phylo_manual.png


--------------------------------------------------------------------------------
/UserGuide/Images/LB_Annotated.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/LB_Annotated.png


--------------------------------------------------------------------------------
/UserGuide/Images/LB_CloneImport.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/LB_CloneImport.png


--------------------------------------------------------------------------------
/UserGuide/Images/LB_DragSelection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/LB_DragSelection.png


--------------------------------------------------------------------------------
/UserGuide/Images/LB_ExcludeUnlabeled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/LB_ExcludeUnlabeled.png


--------------------------------------------------------------------------------
/UserGuide/Images/LB_ExportHistology.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/LB_ExportHistology.png


--------------------------------------------------------------------------------
/UserGuide/Images/LB_ExportingCSV.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/LB_ExportingCSV.png


--------------------------------------------------------------------------------
/UserGuide/Images/LB_Histology.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/LB_Histology.png


--------------------------------------------------------------------------------
/UserGuide/Images/LB_ImportingCloneCSV.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/LB_ImportingCloneCSV.png


--------------------------------------------------------------------------------
/UserGuide/Images/LB_PolygonalSelection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/LB_PolygonalSelection.png


--------------------------------------------------------------------------------
/UserGuide/Images/LB_UserguideClones_Visualized.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/LB_UserguideClones_Visualized.png


--------------------------------------------------------------------------------
/UserGuide/Images/LB_Userguide_12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/LB_Userguide_12.png


--------------------------------------------------------------------------------
/UserGuide/Images/NewCategoryImage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/NewCategoryImage.png


--------------------------------------------------------------------------------
/UserGuide/Images/infercnv.21_denoised.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/infercnv.21_denoised.png


--------------------------------------------------------------------------------
/UserGuide/Images/infercnv.21_denoised_manualselection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerickso/SpatialInferCNV/b131e6669d33ce763ec98e0bc502d3526327732f/UserGuide/Images/infercnv.21_denoised_manualselection.png


--------------------------------------------------------------------------------
/UserGuide/UserGuideFiles/10xBreast_UserguideHistologyAnnotations.csv:
--------------------------------------------------------------------------------
 1 | Barcode,Histology
 2 | AATAACGTCGCGCCCA-1,Userguide_12
 3 | CACCCGCGTTTGACAC-1,Userguide_12
 4 | CGCAGTTCTATCTTTC-1,Userguide_12
 5 | CGTTAAATACGACCAG-1,Userguide_12
 6 | CGTTTCGGTTATATGC-1,Userguide_12
 7 | GACAGGTAATCCGTGT-1,Userguide_12
 8 | GTCTTACCACGCCAAG-1,Userguide_12
 9 | TACGCTGCACGGTCGT-1,Userguide_12
10 | TAGATTCTCTAGCAAA-1,Userguide_12
11 | TAGGAGGCTCGAGAAC-1,Userguide_12
12 | TCGACTGACGATGGCT-1,Userguide_12
13 | TCTACCCGCATCATTT-1,Userguide_12
14 | 


--------------------------------------------------------------------------------
/UserGuide/UserGuideFiles/infercnv.21_denoised.observations_dendrogram.txt:
--------------------------------------------------------------------------------
1 | (Breast10X_TCGACTGACGATGGCT.1:6.76625323,((Breast10X_CGCAGTTCTATCTTTC.1:4.451235413,Breast10X_GACAGGTAATCCGTGT.1:4.451235413):0.5368723659,(((Breast10X_CGTTTCGGTTATATGC.1:3.879678282,Breast10X_TAGATTCTCTAGCAAA.1:3.879678282):0.4285436197,(Breast10X_TACGCTGCACGGTCGT.1:4.09129203,(Breast10X_CACCCGCGTTTGACAC.1:3.702021609,Breast10X_TAGGAGGCTCGAGAAC.1:3.702021609):0.3892704214):0.2169298715):0.5458187634,(Breast10X_TCTACCCGCATCATTT.1:4.480431411,(Breast10X_CGTTAAATACGACCAG.1:4.111940764,Breast10X_GTCTTACCACGCCAAG.1:4.111940764):0.3684906471):0.3736092537):0.1340671139):1.778145451);
2 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: SpatialInferCNV
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 | dependencies:
 7 |   - bioconductor-infercnv=1.10.0
 8 |   - r-essentials=4.1
 9 |   - r-devtools=2.4.3
10 |   - r-phylogram=2.1.0
11 |   - r-base=4.1.2
12 |   - r-hdf5r=1.3.5
13 |   - r-seurat=4.1.0


--------------------------------------------------------------------------------
/man/ExtractSectionWise.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ExtractSectionWise.R
 3 | \name{ExtractSectionWise}
 4 | \alias{ExtractSectionWise}
 5 | \title{Obtaining a thresholded dataframe as part of spatial visualization of spatial transcriptomics data.}
 6 | \usage{
 7 | ExtractSectionWise(
 8 |   SectionName,
 9 |   CNV_Genes_Organscale_Input,
10 |   AllBarcodes,
11 |   Threshold
12 | )
13 | }
14 | \arguments{
15 | \item{SectionName}{A character string for section name.}
16 | 
17 | \item{CNV_Genes_Organscale_Input}{A dataframe, mirroring the structure of infercnv::run output file 17_HMM_predHMMi6.hmm_mode-cells.pred_cnv_genes.dat}
18 | 
19 | \item{AllBarcodes}{A dataframe of barcodes and annotations.}
20 | 
21 | \item{Threshold}{A numerical value for sectionwise thresholding of the number of genes to pass: integer values from 0-100.}
22 | }
23 | \value{
24 | A dataframe of ST counts, that have passed QC and are selected.
25 | }
26 | \description{
27 | ExtractSectionWise()
28 | }
29 | \examples{
30 | ExtractSectionWise("H2_1", CNV_Genes_Filtered, AllBarcodes, 0.45)
31 | }
32 | 


--------------------------------------------------------------------------------
/man/FinalAnnotations.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/FinalAnnotations.R
 3 | \name{FinalAnnotations}
 4 | \alias{FinalAnnotations}
 5 | \title{Creating A finalized annotation dataframe containing only barcodes in the count file.}
 6 | \usage{
 7 | FinalAnnotations(InputOriginalAnnotationFile, InputCounts)
 8 | }
 9 | \arguments{
10 | \item{InputOriginalAnnotationFile}{A dataframe of barcodes selected for analysis}
11 | 
12 | \item{InputCounts}{A joined count dataframe, of barcodes selected for analysis AND has passed QC (counts per spot >= 500 counts)}
13 | }
14 | \value{
15 | A finalized annotation dataframe containing only barcodes in the count file.
16 | }
17 | \description{
18 | FinalAnnotations()
19 | }
20 | \examples{
21 | SelectingSubTreeData(my.subtrees, 4617)
22 | FinalAnnotations(MergedAll, Counts_joined)
23 | }
24 | 


--------------------------------------------------------------------------------
/man/ImportCountData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ImportCountData.R
 3 | \name{ImportCountData}
 4 | \alias{ImportCountData}
 5 | \title{Importing Visium spatial transcriptomics count data from filtered_feature_bc_matrix.h5 file (output from SpaceRanger pipeline) and appending section name to barcodes}
 6 | \usage{
 7 | ImportCountData(SectionName, InputCountFile)
 8 | }
 9 | \arguments{
10 | \item{SectionName}{A character string for section name.}
11 | 
12 | \item{InputCountFile}{A file path to a filtered_feature_bc_matrix.h5 file (output from 10X Genomics SpaceRanger pipeline)}
13 | }
14 | \value{
15 | A dataframe of counts with appended section names
16 | }
17 | \description{
18 | ImportCountData()
19 | }
20 | \examples{
21 | ImportCountData("H2_1", "./filtered_feature_bc_matrix.h5")
22 | }
23 | 


--------------------------------------------------------------------------------
/man/ImportHistologicalAnnotations.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ImportHistologicalAnnotations.R
 3 | \name{ImportHistologicalAnnotations}
 4 | \alias{ImportHistologicalAnnotations}
 5 | \title{Importing histological annotations of Visium barcodes and appending a section name to the barcodes.}
 6 | \usage{
 7 | ImportHistologicalAnnotations(SectionName, InputAnnotationFile)
 8 | }
 9 | \arguments{
10 | \item{SectionName}{A character string for section name.}
11 | 
12 | \item{InputAnnotationFile}{A file path to a .csv file, with annotations (for example, output from LoupeBrowser after manual annotations)}
13 | }
14 | \value{
15 | A dataframe of barcodes with appended section names
16 | }
17 | \description{
18 | the LoupeBrower.
19 | ImportHistologicalAnnotations()
20 | }
21 | \examples{
22 | ImportHistologicalAnnotations("H1_2", "./H1_2_Final_Consensus_Annotations.csv")
23 | }
24 | 


--------------------------------------------------------------------------------
/man/ImportHistologicalOriginalSTSelections.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ImportHistologicalOriginalSTSelections.R
 3 | \name{ImportHistologicalOriginalSTSelections}
 4 | \alias{ImportHistologicalOriginalSTSelections}
 5 | \title{Importing spatial transcriptomics, 1k array selected spot file data and append section names to the barcodes.}
 6 | \usage{
 7 | ImportHistologicalOriginalSTSelections(SectionName, InputAnnotationFile)
 8 | }
 9 | \arguments{
10 | \item{SectionName}{A character string for section name.}
11 | 
12 | \item{InputAnnotationFile}{A file path to a .tsv file}
13 | }
14 | \value{
15 | A dataframe of barcodes with appended section names
16 | }
17 | \description{
18 | ImportHistologicalOriginalSTSelections()
19 | }
20 | \examples{
21 | ImportHistologicalOriginalSTSelections("H2_1", "./Patient 1/1k_arrays/H2_1/spot_data-selection-180903_L11_CN63_D1_P_H2.1_CY3_EB_aligned.tsv")
22 | }
23 | 


--------------------------------------------------------------------------------
/man/ImportOriginalSTCountData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ImportOriginalSTCountData.R
 3 | \name{ImportOriginalSTCountData}
 4 | \alias{ImportOriginalSTCountData}
 5 | \title{Importing spatial transcriptomics, 1k array count data and append section names to the barcodes.}
 6 | \usage{
 7 | ImportOriginalSTCountData(SectionName, InputCountFile)
 8 | }
 9 | \arguments{
10 | \item{SectionName}{A character string for section name.}
11 | 
12 | \item{InputCountFile}{A file path to a .tsv file}
13 | }
14 | \value{
15 | A dataframe of count data, having barcodes with appended section names
16 | }
17 | \description{
18 | ImportOriginalSTCountData()
19 | }
20 | \examples{
21 | ImportOriginalSTCountData("H2_1", "./Patient 1/1k_arrays/H2_1/180903_L11_CN63_D1_H2.1_EB_stdata.tsv")
22 | }
23 | 


--------------------------------------------------------------------------------
/man/MergingCountAndAnnotationData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/MergingCountAndAnnotationData.R
 3 | \name{MergingCountAndAnnotationData}
 4 | \alias{MergingCountAndAnnotationData}
 5 | \title{Merging Visium spatial transciptomics count and annotation data, as well as applying a QC filter to only include spots with >= 500 counts}
 6 | \usage{
 7 | MergingCountAndAnnotationData(SectionName, InputAnnotationFile, InputCountFile)
 8 | }
 9 | \arguments{
10 | \item{SectionName}{A character string for section name.}
11 | 
12 | \item{InputAnnotationFile}{An annotation file containing all barcodes to be used in the analysis (bound dataframe of one or more outputs from ImportHistologicalAnnotations())}
13 | 
14 | \item{InputCountFile}{A dataframe of Visium count data (output from ImportCountData())}
15 | }
16 | \value{
17 | A dataframe of barcodes with appended section names that have passed QC
18 | }
19 | \description{
20 | MergingCountAndAnnotationData()
21 | }
22 | \examples{
23 | MergingCountAndAnnotationData("H2_1",MergedAll, H2_1_ENSBMLID_Counts)
24 | }
25 | 


--------------------------------------------------------------------------------
/man/OriginalST_MergingCountAndAnnotationData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/OriginalST_MergingCountAndAnnotationData.R
 3 | \name{OriginalST_MergingCountAndAnnotationData}
 4 | \alias{OriginalST_MergingCountAndAnnotationData}
 5 | \title{Merging spatial transcriptomics, 1k array count files and barcodes, an apply a QC metric to only select
 6 | ST spots with >=500 total unique molecular identifiers.}
 7 | \usage{
 8 | OriginalST_MergingCountAndAnnotationData(InputAnnotationFile, InputCountFile)
 9 | }
10 | \arguments{
11 | \item{InputAnnotationFile}{An annotation file created by ImportHistologicalOriginalSTSelections()}
12 | 
13 | \item{InputCountFile}{A ST count file created by ImportOriginalSTCountData()}
14 | }
15 | \value{
16 | A dataframe of ST counts, that have passed QC and are selected.
17 | }
18 | \description{
19 | OriginalST_MergingCountAndAnnotationData()
20 | }
21 | \examples{
22 | OriginalST_MergingCountAndAnnotationData(Barcodes_H2_1, Counts_H2.1)
23 | }
24 | 


--------------------------------------------------------------------------------
/man/Output_PGA_Visualization_MatrixGreyNA.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Output_PGA_Visualization_MatrixGreyNA.R
 3 | \name{Output_PGA_Visualization_MatrixGreyNA}
 4 | \alias{Output_PGA_Visualization_MatrixGreyNA}
 5 | \title{Preparing a matrix for spatial visualization of number of genes with an inferred CNV, derived from spatial transriptomics data.}
 6 | \usage{
 7 | Output_PGA_Visualization_MatrixGreyNA(SectionName, InputCNVs, BarcodesFile)
 8 | }
 9 | \arguments{
10 | \item{SectionName}{A character string for section name.}
11 | 
12 | \item{InputCNVs}{An input dataframe created by the function ExtractSectionWise()}
13 | 
14 | \item{BarcodesFile}{A single column dataframe comprised of a list of barcode coordinates in the form AxB, where A = the X coordinate, and B = the Y coordinate.}
15 | }
16 | \value{
17 | A dataframe for spatial visualization by Plot_PGA_Visualization_Matrix()
18 | }
19 | \description{
20 | Output_PGA_Visualization_MatrixGreyNA()
21 | }
22 | \examples{
23 | Output_PGA_Visualization_MatrixGreyNA("H2_1", H2_1_Sectionwise_CNVsGenes_Counted, L2_Barcodes)
24 | }
25 | 


--------------------------------------------------------------------------------
/man/Plot_PGA_Visualization_Matrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Plot_PGA_Visualization_Matrix().R
 3 | \name{Plot_PGA_Visualization_Matrix}
 4 | \alias{Plot_PGA_Visualization_Matrix}
 5 | \title{Plotting the spatial distribution of genes with an inferred copy number alteration from an underlying matrix}
 6 | \usage{
 7 | Plot_PGA_Visualization_Matrix(SectionName, InputMatrix, MaxValInput)
 8 | }
 9 | \arguments{
10 | \item{SectionName}{A character string for section name.}
11 | 
12 | \item{InputMatrix}{An input matrix created by the function Output_PGA_Visualization_MatrixGreyNA()}
13 | 
14 | \item{MaxValInput}{An upper threshold for plotting, derived from the maximum sectionwise value of the number of inferred genes with a CNV (from ExtractSectionWise())}
15 | }
16 | \value{
17 | An output spatial visualization of the number of genes with an inferred CNV from 1k array spatial transcriptomics data.
18 | }
19 | \description{
20 | Plot_PGA_Visualization_Matrix()
21 | }
22 | \examples{
23 | Plot_PGA_Visualization_Matrix("H2_1", PGA_Matrix, MaxVal)
24 | }
25 | 


--------------------------------------------------------------------------------
/man/SelectingSubTreeData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SelectingSubTreeData.R
 3 | \name{SelectingSubTreeData}
 4 | \alias{SelectingSubTreeData}
 5 | \title{Selecting Subtree Data for Node Selection: this selects a number of barcoded spots from a inferCNV dendrogram object for further analysis.}
 6 | \usage{
 7 | SelectingSubTreeData(SubtreeObject, NodeOfInterest)
 8 | }
 9 | \arguments{
10 | \item{SubtreeObject}{A dendrogram, phylo object created by subtrees(as.phylo([dendogram.txt]))}
11 | 
12 | \item{NodeOfInterest}{A numerical integer corresponding to a phylogram/dendogram node of interest}
13 | }
14 | \value{
15 | A specific subtree node
16 | }
17 | \description{
18 | SelectingSubTreeData()
19 | }
20 | \examples{
21 | SelectingSubTreeData(my.subtrees, 4617)
22 | }
23 | 


--------------------------------------------------------------------------------