├── .DS_Store ├── .gitattributes ├── Molecular_Cartography └── Nature2023_Human_PDAC_MolecularCartography_analyses.r ├── README.md ├── Visium ├── Nature2023_Mouse_PDAC_Visium_DestVI_analysis.ipynb ├── Nature2023_Mouse_PDAC_Visium_post_deconvolution_analyses.r └── Nature2023_Mouse_PDAC_Visium_pre_processing.r └── scRNAseq ├── Human ├── .DS_Store ├── Nature2023_Human_PDAC_scRNAseq_analyses.r ├── Nature2023_Human_PDAC_scRNAseq_pre_processing.r └── data │ ├── .DS_Store │ ├── counts.RDS │ └── metadata.RDS └── Mouse ├── COX2-KO_KPC ├── Analysis.R └── Pre-processing.R └── Timecourse_KPC ├── .DS_Store ├── Analysis.R ├── Optimal_Transport.sh ├── Pre-processing.R ├── Velocity_and_Cellrank_analysis.ipynb ├── Velocyto.sh └── data ├── KPC_timecourse_counts.rds ├── KPC_timecourse_metadata.rds ├── MonoMacro_KPC_timecourse_counts.rds ├── MonoMacro_KPC_timecourse_metadata.rds ├── annotated_clusters.csv ├── cell_sets.gmt ├── tissue_annotation.csv └── tsne.csv /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ostunilab/PDAC_Nature_2023/74db198d5ec667eb1ccf9e82295ab0ff5bcdbe43/.DS_Store -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.rds filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /Molecular_Cartography/Nature2023_Human_PDAC_MolecularCartography_analyses.r: -------------------------------------------------------------------------------- 1 | library(Matrix) 2 | library(graphics) 3 | library(parallel) 4 | library(Seurat) 5 | library(dplyr) 6 | library(spdep) 7 | library(dbscan) 8 | library(ggplot2) 9 | library(scales) 10 | 11 | LoadResolve <- function (data, fov, assay = "Resolve") { 12 | segs <- CreateSegmentation(data$segmentations) 13 | cents <- CreateCentroids(data$centroids) 14 | segmentations.data <- list(centroids = cents, segmentation = segs) 15 | coords <- CreateFOV(coords = segmentations.data, type = c("segmentation", "centroids"), molecules = data$microns, assay = assay) 16 | obj <- CreateSeuratObject(counts = data$transcripts, assay = assay) 17 | coords <- subset(x = coords, cells = intersect(x = Cells(x = coords[["segmentation"]]), y = Cells(x = obj))) 18 | obj[[fov]] <- coords 19 | return(obj) 20 | } 21 | 22 | ##### UPLOAD baysor output and create a Seurat object 23 | setwd('GSM7655264/outs') 24 | #setwd('GSM7655265/outs') 25 | #setwd('GSM7655264/outs') 26 | 27 | 28 | data <- vector(mode = 'list', length = 4) 29 | names(x = data) = c("transcripts","centroids","segmentations","microns") 30 | 31 | ###### centroids 32 | cell_centroids<-read.csv('segmentation_cell_stats.csv') 33 | cell_centroids$cell = paste("Cell_",cell_centroids$cell,sep="") 34 | data[['centroids']] = cell_centroids[,c("x","y","cell")] 35 | 36 | ###### microns 37 | baysor_out<-read.csv('segmentation.csv') 38 | table(baysor_out$is_noise) 39 | baysor_out = baysor_out[ baysor_out$is_noise == "false",] 40 | baysor_out$cell = paste("Cell_",baysor_out$cell,sep="") 41 | data[['microns']] = baysor_out[,c("x","y","gene")] 42 | 43 | ###### transcripts 44 | genes<-unique(baysor_out$gene) 45 | genes<-genes[order(genes)] 46 | cells_id <- cell_centroids$cell 47 | counts<-mclapply(cells_id, function(x) { 48 | as.vector(table(factor(baysor_out[baysor_out[,"cell"] == x,"gene"], levels=genes))) 49 | }) 50 | counts <- do.call(cbind.data.frame, counts) 51 | rownames(counts) = genes 52 | colnames(counts) = cells_id 53 | counts <- Matrix(as.matrix(counts), sparse = TRUE) 54 | data[['transcripts']] = counts 55 | 56 | 57 | ####### Compute segmentation 58 | cells_id <- cell_centroids$cell 59 | id_edges_segmentation <- unlist(mclapply(cells_id, function(x) { 60 | test<-chull(baysor_out[baysor_out[,"cell"] == x,c("x","y")]) 61 | c(rownames(baysor_out[baysor_out[,"cell"] == x,][test,]),rownames(baysor_out[baysor_out[,"cell"] == x,][test,])[1]) 62 | }) 63 | ) 64 | segmentation = baysor_out[id_edges_segmentation,c("x","y","cell")] 65 | rownames(segmentation) = c(1:nrow(segmentation)) 66 | data[['segmentations']] = segmentation 67 | 68 | resolve_B2_1<- LoadResolve(data, "GSM7655264") 69 | resolve_B2_1<-RenameCells(resolve_B2_1, add.cell.id= "B2_1") 70 | DefaultBoundary(resolve_B2_1[["B2_1"]]) <- "segmentation" 71 | Sample_B2_1 <- CreateSeuratObject(resolve_B2_1@assays$Resolve@counts, min.cells = 10, project = "B2_1", min.features = 4) 72 | 73 | #resolve_C2_1<- LoadResolve(data, "GSM7655265") 74 | #resolve_C2_1<-RenameCells(resolve_C2_1, add.cell.id= "C2_1") 75 | #DefaultBoundary(resolve_C2_1[["C2_1"]]) <- "segmentation" 76 | #Sample_C2_1 <- CreateSeuratObject(resolve_C2_1@assays$Resolve@counts, min.cells = 10, project = "C2_1", min.features = 4) 77 | 78 | #resolve_D2_1<- LoadResolve(data, "GSM7655266") 79 | #resolve_D2_1<-RenameCells(resolve_D2_1, add.cell.id= "D2_1") 80 | #DefaultBoundary(resolve_D2_1[["D2_1"]]) <- "segmentation" 81 | #Sample_D2_1 <- CreateSeuratObject(resolve_D2_1@assays$Resolve@counts, min.cells = 10, project = "D2_1", min.features = 4) 82 | 83 | ############ 84 | 85 | 86 | # Data analysis 87 | Sample.merge<- merge(Sample_B2_1, y = c(Sample_C2_1,Sample_D2_1)) 88 | Sample.merge <- subset(Sample.merge, subset = nCount_RNA >= 10 & nFeature_RNA <= 25) 89 | Sample.merge <- SCTransform(Sample.merge, assay = "RNA", clip.range = c(-10, 10), ) 90 | Sample.merge <- RunPCA(Sample.merge, npcs = 30, features = rownames(Sample.merge)) 91 | Sample.merge <- RunUMAP(Sample.merge, dims = 1:20) 92 | Sample.merge <- FindNeighbors(Sample.merge, reduction = "pca", dims = 1:20) 93 | Sample.merge <- FindClusters(Sample.merge, resolution = c(0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1)) 94 | 95 | Idents(Sample.merge) <- 'SCT_snn_res.0.5' 96 | DEGs_res0.5 <- Reduce("rbind",lapply(unique(Sample.merge$SCT_snn_res.0.5), function(x) { 97 | Markers <- FindMarkers(Sample.merge, ident.1 = x, ident.2 = NULL, only.pos = TRUE, min.pct = 0.2, logfc.threshold = 1, pseudocount.use = 0.3) 98 | Markers$gene <- rownames(Markers) 99 | Markers$Cluster <- rep(paste("Cluster",x),nrow(Markers)) 100 | return(Markers) 101 | })) 102 | 103 | resolve_B2_1 <- subset(Sample.merge, cells= rownames(Sample.merge@meta.data[which(Sample.merge@meta.data$orig.ident == "B2"),])) 104 | resolve_C2_1 <- subset(Sample.merge, cells= rownames(Sample.merge@meta.data[which(Sample.merge@meta.data$orig.ident == "C2"),])) 105 | resolve_D2_1 <- subset(Sample.merge, cells= rownames(Sample.merge@meta.data[which(Sample.merge@meta.data$orig.ident == "D2"),])) 106 | 107 | # Neighbourhood analysis 108 | resolve.obj=list(B2_1=resolve_B2_1,C2_1=resolve_C2_1, D2_1=resolve_D2_1) 109 | 110 | fraction_NN=Reduce("+",lapply(resolve.obj, function(x) { 111 | xy_cells<-GetTissueCoordinates(x[[names(x)[2]]][["centroids"]]) 112 | rownames(xy_cells)=xy_cells[,"cell"] 113 | 114 | cells_sel<-rownames(x@meta.data[which(x@meta.data$clusters == 16),]) 115 | 116 | dim_NN<-c(length=40) 117 | for (nNeighbours in 1:40){ 118 | knn_spatial <- dbscan::kNN(x = xy_cells[, c("x", "y")] %>% as.matrix(), k = nNeighbours) 119 | knn_spatial.norm <- data.frame(from = rep(1:nrow(knn_spatial$id), nNeighbours), 120 | to = as.vector(knn_spatial$id), 121 | weight = 1/(1 + as.vector(knn_spatial$dist)), 122 | distance = as.vector(knn_spatial$dist)) 123 | knn_spatial.norm$from = rownames(xy_cells)[knn_spatial.norm$from] 124 | knn_spatial.norm$to= rownames(xy_cells)[knn_spatial.norm$to] 125 | 126 | nn <- unique(knn_spatial.norm[which(knn_spatial.norm$from %in% cells_sel & knn_spatial.norm$distance < 400),"to"]) 127 | dim_NN<-rbind(dim_NN,table(factor(x@meta.data[nn,"clusters"], levels=c(0:18)) )) 128 | } 129 | dim_NN = dim_NN[-1,] 130 | return(dim_NN) 131 | })) 132 | 133 | for (i in 1:nrow(fraction_NN)){ 134 | fraction_NN[i,] = fraction_NN[i,]/table(factor(Sample.merge@meta.data[,"SCT_snn_res.0.5"], levels=c(0:18))) 135 | } 136 | 137 | 138 | # Neighbourhood enrichment 139 | resolve.obj=list(B2_1=resolve_B2_1,C2_1=resolve_C2_1, D2_1=resolve_D2_1) 140 | 141 | rand_out_lists <- mclapply(resolve.obj, function(x) { 142 | xy_cells<-GetTissueCoordinates(x[[names(x)[2]]][["centroids"]]) 143 | rownames(xy_cells)=xy_cells[,"cell"] 144 | nNeighbours =40 145 | knn_spatial <- dbscan::kNN(x = xy_cells[, c("x", "y")] %>% as.matrix(), k = nNeighbours) 146 | knn_spatial.norm <- data.frame(from = rep(1:nrow(knn_spatial$id), nNeighbours), to = as.vector(knn_spatial$id),distance = as.vector(knn_spatial$dist)) 147 | knn_spatial.norm$from = rownames(xy_cells)[knn_spatial.norm$from] 148 | knn_spatial.norm$to= rownames(xy_cells)[knn_spatial.norm$to] 149 | randomization<-matrix(nrow=ncol(x), ncol=1000) 150 | randomization<-apply(randomization,2, function(y) { 151 | y = sample(as.vector(x@meta.data$clusters)) 152 | }) 153 | rownames(randomization) = rownames(x@meta.data) 154 | 155 | rand_out=apply(randomization, 2, function(y) { 156 | y=factor(y,levels=levels(x@meta.data$clusters)) 157 | cells_sel<-list() 158 | for (i in levels(y)){ 159 | cells_sel[[i]] = names(y[y == i]) 160 | } 161 | 162 | dim_NN=lapply(cells_sel, function(z) { 163 | nn <- unique(knn_spatial.norm[which(knn_spatial.norm$from %in% z & knn_spatial.norm$distance < 400),"to"]) 164 | table(factor(y[nn], levels=levels(x@meta.data$clusters))) 165 | }) 166 | return(dim_NN) 167 | }) 168 | 169 | rand_out_list<-list() 170 | for (i in levels(x@meta.data$clusters)){ 171 | rand_out_list[[i]] = Reduce('rbind', lapply(rand_out, function(z) z[[i]])) 172 | } 173 | return(rand_out_list) 174 | },mc.cores = 16) 175 | 176 | randomized_NN<-list() 177 | for (i in levels(resolve.obj[[1]]@meta.data$clusters)){ 178 | randomized_NN[[i]] = Reduce('+', lapply(rand_out_lists, function(z) z[[i]])) 179 | } 180 | 181 | real_NN=Reduce("+",lapply(resolve.obj, function(x) { 182 | xy_cells<-GetTissueCoordinates(x[[names(x)[2]]][["centroids"]]) 183 | rownames(xy_cells)=xy_cells[,"cell"] 184 | nNeighbours =40 185 | knn_spatial <- dbscan::kNN(x = xy_cells[, c("x", "y")] %>% as.matrix(), k = nNeighbours) 186 | knn_spatial.norm <- data.frame(from = rep(1:nrow(knn_spatial$id), nNeighbours), 187 | to = as.vector(knn_spatial$id), 188 | distance = as.vector(knn_spatial$dist)) 189 | knn_spatial.norm$from = rownames(xy_cells)[knn_spatial.norm$from] 190 | knn_spatial.norm$to= rownames(xy_cells)[knn_spatial.norm$to] 191 | 192 | cells_sel<-list() 193 | for (i in levels(x@meta.data$clusters)){ 194 | cells_sel[[i]] = rownames(x@meta.data[which(x@meta.data$clusters == i),]) 195 | } 196 | 197 | dim_NN=Reduce('rbind',lapply(cells_sel, function(z) { 198 | nn <- unique(knn_spatial.norm[which(knn_spatial.norm$from %in% z & knn_spatial.norm$distance < 400),"to"]) 199 | dim_nn <- table(factor(x@meta.data[nn,"clusters"], levels=levels(x@meta.data$clusters))) 200 | return(dim_nn) 201 | })) 202 | return(dim_NN) 203 | })) 204 | rownames(real_NN) = levels(resolve.obj[[1]]@meta.data$clusters) 205 | 206 | z_scores <- Reduce('rbind',lapply(names(randomized_NN), function(x){ 207 | z_scores<-c() 208 | for (i in 1:ncol(randomized_NN[[x]])){ 209 | z_scores<-c(z_scores,(real_NN[x,i] - mean(randomized_NN[[x]][,i]))/ sd(randomized_NN[[x]][,i])) 210 | } 211 | return(z_scores) 212 | })) 213 | 214 | 215 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # IL-1B+ macrophages fuel pathogenic inflammation in pancreatic cancer 2 | -------------------------------------------------------------------------------- /Visium/Nature2023_Mouse_PDAC_Visium_DestVI_analysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import destvi_utils\n", 10 | "import matplotlib.pyplot as plt\n", 11 | "import numpy as np\n", 12 | "import scanpy as sc\n", 13 | "import anndata as ad\n", 14 | "import pandas as pd\n", 15 | "import scipy\n", 16 | "from scvi.model import CondSCVI, DestVI\n", 17 | "from skmisc.loess import loess\n", 18 | "import torch" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "sc_adata=sc.read_h5ad('scRNA_mouse_PDAC_day30.h5ad')\n", 28 | "st_adata=sc.read_visium('GSM6727528/outs',source_image_path='.GSM6727528/outs/spatial')\n", 29 | "st_filtered=pd.read_csv('SelectedSpots.csv')" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "# subset st data\n", 39 | "st_adata.var_names_make_unique()\n", 40 | "st_adata=st_adata[st_filtered['x'],]\n", 41 | "st_adata" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "# NB: sc_adata contains raw counts\n", 51 | "sc.pp.filter_genes(sc_adata, min_counts=10)\n", 52 | "G = 2000\n", 53 | "sc_adata.layers[\"counts\"] = sc_adata.X.copy()\n", 54 | "sc.pp.highly_variable_genes(sc_adata, n_top_genes=G, subset=True, layer=\"counts\", flavor=\"seurat_v3\")\n", 55 | "sc.pp.normalize_total(sc_adata, target_sum=10e4)\n", 56 | "sc.pp.log1p(sc_adata)\n", 57 | "sc_adata.raw = sc_adata" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "# Spatial data\n", 67 | "st_adata.layers[\"counts\"] = st_adata.X.copy()\n", 68 | "\n", 69 | "sc.pp.normalize_total(st_adata, target_sum=10e4)\n", 70 | "sc.pp.log1p(st_adata)\n", 71 | "st_adata.raw = st_adata\n", 72 | "\n", 73 | "loc=st_adata.obsm[\"spatial\"]\n", 74 | "st_adata.obsm[\"spatial\"]=loc.astype('float')" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "# filter genes to be the same on the spatial and sc data\n", 84 | "intersect = np.intersect1d(sc_adata.var_names, st_adata.var_names)\n", 85 | "st_adata = st_adata[:, intersect].copy()\n", 86 | "sc_adata = sc_adata[:, intersect].copy()" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "# Fit the scLMV\n", 96 | "CondSCVI.setup_anndata(sc_adata, layer=\"counts\", labels_key=\"Annotation\")\n", 97 | "sc_model = CondSCVI(sc_adata, weight_obs=False)\n", 98 | "sc_model.view_anndata_setup()\n", 99 | "sc_model.train()\n", 100 | "\n", 101 | "sc_model.history[\"elbo_train\"].iloc[5:].plot()\n", 102 | "plt.show()" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "# Deconvolution\n", 112 | "DestVI.setup_anndata(st_adata, layer=\"counts\")\n", 113 | "st_model = DestVI.from_rna_model(st_adata, sc_model)\n", 114 | "st_model.view_anndata_setup()\n", 115 | "st_model.train(max_epochs=2500)\n", 116 | "st_model.history[\"elbo_train\"].iloc[10:].plot()\n", 117 | "plt.show()" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "# Get proportions\n", 127 | "st_adata.obsm[\"proportions\"] = st_model.get_proportions()\n", 128 | "st_adata.obsm[\"proportions\"].to_csv('CellProp_DestVI.csv')" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "ct_thresholds = destvi_utils.automatic_proportion_threshold(st_adata, kind_threshold=\"primary\")\n", 138 | "ct_thresholds['MonoMacro'] = 0.1" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "for ct, g in st_model.get_gamma().items():\n", 148 | " st_adata.obsm[f\"{ct}_gamma\"] = g" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "# LOAD FUNCTIONS FROM destvi_utils\n", 158 | "import anndata as ad\n", 159 | "import hotspot\n", 160 | "import matplotlib.pyplot as plt\n", 161 | "import numpy as np\n", 162 | "from scipy.interpolate import splev, splrep\n", 163 | "from scipy.spatial.distance import pdist, squareform\n", 164 | "from sklearn.mixture import GaussianMixture\n", 165 | "\n", 166 | "\n", 167 | "def _prettify_axis(ax, spatial=False):\n", 168 | " # Hide the right and top spines\n", 169 | " ax.spines[\"right\"].set_visible(False)\n", 170 | " ax.spines[\"top\"].set_visible(False)\n", 171 | " # Only show ticks on the left and bottom spines\n", 172 | " ax.yaxis.set_ticks_position(\"left\")\n", 173 | " ax.xaxis.set_ticks_position(\"bottom\")\n", 174 | " if spatial:\n", 175 | " plt.xticks([])\n", 176 | " plt.yticks([])\n", 177 | " plt.xlabel(\"Spatial1\")\n", 178 | " plt.ylabel(\"Spatial2\")\n", 179 | "\n", 180 | "\n", 181 | "def _form_stacked_quantiles(data, N=100):\n", 182 | " quantiles = np.quantile(data, np.linspace(0, 1, N, endpoint=False))\n", 183 | " return quantiles, np.vstack([_flatten(data, q) for q in quantiles])\n", 184 | "\n", 185 | "\n", 186 | "def _flatten(x, threshold):\n", 187 | " return (x > threshold) * x\n", 188 | "\n", 189 | "\n", 190 | "def _smooth_get_critical_points(x, noisy_data, k=5, s=0.1):\n", 191 | " f = splrep(x, noisy_data, k=5, s=1)\n", 192 | " smoothed = splev(x, f)\n", 193 | " derivative = splev(x, f, der=1)\n", 194 | " sign_2nd = splev(x, f, der=2) > 0\n", 195 | " curvature = splev(x, f, der=3)\n", 196 | " return noisy_data, smoothed, derivative, sign_2nd, curvature\n", 197 | "\n", 198 | "\n", 199 | "def _get_autocorrelations(st_adata, stacked_quantiles, quantiles):\n", 200 | " # create Anndata and run hotspot\n", 201 | " adata = ad.AnnData(stacked_quantiles.T)\n", 202 | " adata.obs_names = st_adata.obs.index\n", 203 | " adata.var_names = [str(i) for i in quantiles]\n", 204 | " adata.obsm[\"spatial\"] = st_adata.obsm[\"spatial\"]\n", 205 | " hs = hotspot.Hotspot(adata, model=\"none\", latent_obsm_key=\"spatial\")\n", 206 | " hs.create_knn_graph(\n", 207 | " weighted_graph=True,\n", 208 | " n_neighbors=10,\n", 209 | " )\n", 210 | " hs_results = hs.compute_autocorrelations(jobs=1)\n", 211 | " index = np.array([float(i) for i in hs_results.index.values])\n", 212 | " return index, hs_results[\"Z\"].values\n", 213 | "\n", 214 | "\n", 215 | "def _get_laplacian(s, pi):\n", 216 | " N = s.shape[0]\n", 217 | " dist_table = pdist(s)\n", 218 | " bandwidth = np.median(dist_table)\n", 219 | " sigma = 0.5 * bandwidth**2\n", 220 | "\n", 221 | " l2_square = squareform(dist_table) ** 2\n", 222 | " D = np.exp(-l2_square / sigma) * np.dot(pi, pi.T)\n", 223 | " L = -D\n", 224 | " sum_D = np.sum(D, axis=1)\n", 225 | " for i in range(N):\n", 226 | " L[i, i] = sum_D[i]\n", 227 | " return L\n", 228 | "\n", 229 | "\n", 230 | "def _get_spatial_components(locations, proportions, data):\n", 231 | " # find top two spatial principal vectors\n", 232 | " # form laplacian\n", 233 | " L = _get_laplacian(locations, proportions)\n", 234 | " # center data\n", 235 | " transla_ = data.copy()\n", 236 | " transla_ -= np.mean(transla_, axis=0)\n", 237 | " # get eigenvectors\n", 238 | " A = np.dot(transla_.T, np.dot(L, transla_))\n", 239 | " w, v = np.linalg.eig(A)\n", 240 | " # don't forget to sort them...\n", 241 | " idx = np.argsort(w)[::-1]\n", 242 | " vec = v[:, idx][:, :]\n", 243 | " return vec\n", 244 | "\n", 245 | "\n", 246 | "def _vcorrcoef(X, y):\n", 247 | " Xm = np.reshape(np.mean(X, axis=1), (X.shape[0], 1))\n", 248 | " ym = np.mean(y)\n", 249 | " r_num = np.sum((X - Xm) * (y - ym), axis=1)\n", 250 | " r_den = np.sqrt(np.sum((X - Xm) ** 2, axis=1) * np.sum((y - ym) ** 2))\n", 251 | " r = np.divide(\n", 252 | " r_num,\n", 253 | " r_den,\n", 254 | " out=np.zeros_like(\n", 255 | " r_num,\n", 256 | " ),\n", 257 | " where=r_den != 0,\n", 258 | " )\n", 259 | " return r\n", 260 | "\n", 261 | "\n", 262 | "def _get_delta(lfc):\n", 263 | " return np.max(\n", 264 | " np.abs(GaussianMixture(n_components=3).fit(np.array(lfc).reshape(-1, 1)).means_)\n", 265 | " )" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "metadata": {}, 272 | "outputs": [], 273 | "source": [ 274 | "# Get 5 Spatial PCs\n", 275 | "gamma = st_model.get_gamma(return_numpy=True)\n", 276 | "filter_ = st_adata.obsm[\"proportions\"]['MonoMacro'].values > ct_thresholds['MonoMacro']\n", 277 | "locations = st_adata.obsm[\"spatial\"][filter_]\n", 278 | "proportions = st_adata.obsm[\"proportions\"]['MonoMacro'].values[filter_]\n", 279 | "ct_index = np.where('MonoMacro' == st_model.cell_type_mapping)[0][0]\n", 280 | "data = gamma[:, :, ct_index][filter_]\n", 281 | "\n", 282 | "vec=get_spatial_components(locations, proportions, data)[:,:]\n", 283 | "projection = np.dot(data - np.mean(data, 0), vec)\n", 284 | "\n", 285 | "SpatialPCs=pd.DataFrame(projection)\n", 286 | "SpatialPCs.index=st_adata.obs_names[filter_]\n", 287 | "SpatialPCs.to_csv('SpatialPCs_MonoMacro.csv')" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": null, 293 | "metadata": {}, 294 | "outputs": [], 295 | "source": [ 296 | "# Get genes whose expression correlates with Spatial PCs\n", 297 | "sc_adata_slice = sc_adata[sc_adata.obs[\"Annotation\"] == 'MonoMacro']\n", 298 | "is_sparse = scipy.sparse.issparse(sc_adata_slice.X)\n", 299 | "normalized_counts = sc_adata_slice.X.A if is_sparse else sc_adata_slice.X\n", 300 | "\n", 301 | "indices_ct = np.where(sc_adata.obs[\"Annotation\"] == 'MonoMacro')[0]\n", 302 | "sc_latent = sc_model.get_latent_representation(indices=indices_ct)\n", 303 | "sc_projection = np.dot(sc_latent - np.mean(sc_latent,0), vec)\n", 304 | "\n", 305 | "r = _vcorrcoef(normalized_counts.T, sc_projection[:, 0])\n", 306 | "ranking = np.argsort(r)\n", 307 | "PC1Pos=pd.DataFrame(r[ranking][::-1][:50])\n", 308 | "PC1Pos.index=list(st_adata.var.index[ranking[::-1][:50]])\n", 309 | "\n", 310 | "PC1Neg=pd.DataFrame(r[ranking][:50])\n", 311 | "PC1Neg.index=list(st_adata.var.index[ranking[:50]])" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": null, 317 | "metadata": {}, 318 | "outputs": [], 319 | "source": [ 320 | "## Generate Expression Matrix for CellType\n", 321 | "\n", 322 | "# impute \n", 323 | "imp_ge = st_model.get_scale_for_ct(\"MonoMacro\", indices=np.where(filter_)[0]).values\n", 324 | "\n", 325 | "# get statistics\n", 326 | "avg_library_size = np.mean(np.sum(st_adata.layers[\"counts\"], axis=1).A.flatten())\n", 327 | "exp_px_o = st_model.module.px_o.detach().exp().cpu().numpy()\n", 328 | "mean = avg_library_size * imp_ge\n", 329 | "\n", 330 | "# create distribution\n", 331 | "concentration = torch.tensor(avg_library_size * imp_ge / exp_px_o)\n", 332 | "rate = torch.tensor(1. / exp_px_o)\n", 333 | "\n", 334 | "# generate\n", 335 | "for j in [1,2,3,4,5,6]:\n", 336 | " N = 1\n", 337 | " simulated = torch.distributions.Gamma(concentration=concentration, rate = rate).sample((N,)).cpu().numpy()\n", 338 | " simulated = np.log(simulated + 1)\n", 339 | " simulated = simulated.reshape((-1, simulated.shape[-1]))\n", 340 | " simulated=pd.DataFrame(simulated, index=st_adata.obs['_indices'][np.where(filter_)[0]].index, columns=st_adata.var['gene_ids'].index)\n", 341 | " simulated.to_csv(f\"Simulation_{j}_MonoMacro.csv\")" 342 | ] 343 | } 344 | ], 345 | "metadata": { 346 | "language_info": { 347 | "name": "python" 348 | }, 349 | "orig_nbformat": 4 350 | }, 351 | "nbformat": 4, 352 | "nbformat_minor": 2 353 | } 354 | -------------------------------------------------------------------------------- /Visium/Nature2023_Mouse_PDAC_Visium_post_deconvolution_analyses.r: -------------------------------------------------------------------------------- 1 | library(data.table) 2 | library(Matrix) 3 | library(ggplot2) 4 | library(future) 5 | library(dplyr) 6 | library(grid) 7 | library(Seurat) 8 | library(tidyr) 9 | library(dendextend) 10 | library(Giotto) 11 | library(clusterProfiler) 12 | library(org.Mm.eg.db) 13 | library(biomaRt) 14 | 15 | 16 | load('Spatial.filt.Robj') 17 | images <- Images(Spatial.filt, assay = DefaultAssay(object = Spatial.filt)) 18 | image.use <- Spatial.filt[[images]] 19 | coordinates <- GetTissueCoordinates(object = image.use) 20 | 21 | # import proportions predicted by DestVI 22 | proportions<-read.csv('CellProp_DestVI.csv', row.names=1) 23 | 24 | 25 | #### Clustering and analysis of MonoMacro erniched spots 26 | 27 | SpatialPCs_MonoMacro<-read.csv('SpatialPCs_MonoMacro.csv', row.names=1) 28 | Spatial.filt_MonoMacro<-subset(Spatial.filt, cells=rownames(SpatialPCs_MonoMacro)) 29 | 30 | List_simulations <- lapply(c(1:6), function(i) { 31 | sim<-read.csv(paste('Simulation_',i,'_MonoMacro.csv',sep=""), row.names=1) 32 | sim=sim[rownames(Spatial.filt_MonoMacro@meta.data),] 33 | return(sim) 34 | }) 35 | simulation_mean<-Reduce("+",List_simulations)/length(List_simulations) 36 | 37 | simulationMean <- CreateSeuratObject(t(simulation_mean), min.cells = 0, project = "MonoMacro", min.features = 0) 38 | simulationMean <- AddMetaData(simulationMean, Spatial.filt_MonoMacro@meta.data) 39 | simulationMean <- ScaleData(simulationMean) 40 | simulationMean[['SpatialPCA']] <- CreateDimReducObject(embeddings = as.matrix(SpatialPCs_MonoMacro[rownames(Spatial.filt_MonoMacro@meta.data),]), key="SpatialPCA_") 41 | #simulationMean <- RunUMAP(simulationMean, reduction = "SpatialPCA", dims = 1:5) 42 | #simulationMean <- FindNeighbors(simulationMean, reduction = "SpatialPCA", dims = 1:5) 43 | #simulationMean <- FindClusters(simulationMean, resolution = c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3,1.4,1.5,2)) 44 | 45 | # load TAMs markers from TABLE 5 (sheet TAM_markers_day30_MonoMacro) 46 | Il1b_markers=Il1b_markers[Il1b_markers %in% rownames(simulationMean)] 47 | Folr2_markers=Folr2_markers[Folr2_markers %in% rownames(simulationMean)] 48 | Spp1_markers=Spp1_markers[Spp1_markers %in% rownames(simulationMean)] 49 | 50 | # correlation with signatures gene expression ans Spatial PCs coordinates 51 | pca_spatial<-Spatial.filt_MonoMacro@reductions$SpatialPCs@cell.embeddings[,1:5] 52 | mean.exp_Il1b <- log(colMeans(as.matrix(expm1(simulationMean@assays$RNA@data[Il1b_markers,rownames(pca_spatial)])))+1) 53 | mean.exp_Folr2 <- log(colMeans(as.matrix(expm1(simulationMean@assays$RNA@data[Folr2_markers,rownames(pca_spatial)])))+1) 54 | mean.exp_Spp1 <- log(colMeans(as.matrix(expm1(simulationMean@assays$RNA@data[Spp1_markers,rownames(pca_spatial)])))+1) 55 | 56 | dotplot_correlations=matrix(nrow=15, ncol=4) 57 | dotplot_correlations=as.data.frame(dotplot_correlations) 58 | colnames(dotplot_correlations) = c("TAM_subset","corr","p_value","PC") 59 | for (i in 0:4) { 60 | j=i+1 61 | c<-cor.test(pca_spatial[order(pca_spatial[,j]),j], mean.exp_Il1b[rownames(pca_spatial[order(pca_spatial[,j]),])], method=c("pearson")) 62 | dotplot_correlations[i*3+1,1]="Il1b" 63 | dotplot_correlations[i*3+1,2]=c$estimate 64 | dotplot_correlations[i*3+1,3]=c$p.value 65 | eval(parse(text=paste("dotplot_correlations[",i*3+1,",4]=\'PC_",j,"\'",sep=""))) 66 | 67 | c<-cor.test(pca_spatial[order(pca_spatial[,j]),j], mean.exp_Folr2[rownames(pca_spatial[order(pca_spatial[,j]),])], method=c("pearson")) 68 | dotplot_correlations[i*3+2,1]="Folr2" 69 | dotplot_correlations[i*3+2,2]=c$estimate 70 | dotplot_correlations[i*3+2,3]=c$p.value 71 | eval(parse(text=paste("dotplot_correlations[",i*3+2,",4]=\'PC_",j,"\'",sep=""))) 72 | 73 | c<-cor.test(pca_spatial[order(pca_spatial[,j]),j], mean.exp_Spp1[rownames(pca_spatial[order(pca_spatial[,j]),])], method=c("pearson")) 74 | dotplot_correlations[i*3+3,1]="Spp1" 75 | dotplot_correlations[i*3+3,2]=c$estimate 76 | dotplot_correlations[i*3+3,3]=c$p.value 77 | eval(parse(text=paste("dotplot_correlations[",i*3+3,",4]=\'PC_",j,"\'",sep=""))) 78 | } 79 | dotplot_correlations[,3]=-log10(dotplot_correlations[,3]) 80 | dotplot_correlations[,3]= MinMax(dotplot_correlations[,3], min = 0, max = 30) 81 | dotplot_correlations[,1]=factor(dotplot_correlations[,1], levels=c("Spp1","Folr2","Il1b")) 82 | 83 | # load TAMs markers from TABLE 5 (sheet TAM_markers_day30_MonoMacro) 84 | TAMs<-list(Il1b_markers,Folr2_markers,Spp1_markers) 85 | TAMsSign<-makeSignMatrixPAGE(sign_list = TAMs, sign_names=c('Il1b_markers','Folr2_markers','Spp1_markers')) 86 | 87 | # Giotto signature enrichemnt analysis (PAGE) for TAMs markers (same analysis for other lists of genes) 88 | giotto.obj = createGiottoObject(raw_exprs = Spatial.filt@assays$Spatial@counts, spatial_locs = coordinates) 89 | giotto.obj <- normalizeGiotto(gobject = giotto.obj, scalefactor = 6000, verbose = T) 90 | TAMs<-list(Il1b_markers,Folr2_markers,Spp1_markers) 91 | TAMsSign<-makeSignMatrixPAGE(sign_list = TAMs, sign_names=c('Il1b_markers','Folr2_markers','Spp1_markers')) 92 | giotto.obj <- runPAGEEnrich(gobject = giotto.obj, p_value = TRUE, sign_matrix = TAMsSign, output_enrichment='original', min_overlap_genes=5, include_depletion=F, expression_values='normalized') # with pvalues; -log10(p) is returned 93 | # highlight only MonoMacro enriched spots with p < 0.001 94 | enrichment_PAGE=as.data.frame(giotto.obj@spatial_enrichment$PAGE) 95 | rownames(enrichment_PAGE)=enrichment_PAGE[,1] 96 | for (i in 2:ncol(enrichment_PAGE)){ 97 | enrichment_PAGE[,i] = unlist(lapply(enrichment_PAGE[,i], function(x) ifelse(x >= 3, 1, 0))) 98 | } 99 | enrichment_PAGE=as.data.frame(enrichment_PAGE) 100 | for (i in 1:nrow(enrichment_PAGE)){ 101 | if(! (enrichment_PAGE$cell_ID[i] %in% colnames(Spatial.filt_MonoMacro))) { 102 | enrichment_PAGE[i,2:ncol(enrichment_PAGE)] = rep(0,ncol(enrichment_PAGE)-1) 103 | } 104 | } 105 | 106 | ## GO_BP enrichment analysis 107 | Idents(SpatialA1FilteredFil) <- 'SCT_snn_res.0.3' 108 | Cluster_Il1bvsAll_spatial_seurat <- FindMarkers(SpatialA1FilteredFil, ident.1 = 4, ident.2 = NULL, only.pos = FALSE, min.pct = 0.1, pseudocount.use = 0.1,logfc.threshold = 0) 109 | 110 | mart <- useMart(biomart="ENSEMBL_MART_ENSEMBL",dataset="mmusculus_gene_ensembl", host = "jul2018.archive.ensembl.org") 111 | genes <- rownames(Cluster_Il1bvsAll_spatial_seurat) 112 | logFC <- data.frame(Cluster_Il1bvsAll_spatial_seurat$avg_log2FC) 113 | bioM <- getBM(filters="mgi_symbol",values=genes, attributes=c("entrezgene","mgi_symbol","description"),mart = mart) 114 | gene_id <- as.character(unlist(mclapply(genes, function(x) ifelse(x%in%bioM$mgi_symbol,bioM[which(bioM$mgi_symbol==x),1],"NA"),mc.cores = 4))) 115 | logFC <- logFC[!(is.na(gene_id))] 116 | names(logFC) <- gene_id[!(is.na(gene_id))] 117 | logFC <- sort(logFC, decreasing = TRUE) 118 | GSEA_Il1b <- gseGO(geneList = logFC, OrgDb = org.Mm.eg.db, ont= "BP", minGSSize= 15, maxGSSize=500, pvalueCutoff = 1,verbose = FALSE) 119 | geneSets_list <- GSEA_Il1b@geneSets 120 | GSEA_Il1b <- data.frame(ID=GSEA_Il1b@result$ID, Description=GSEA_Il1b@result$Description, setSize=GSEA_Il1b@result$setSize, NES=GSEA_Il1b@result$NES,pvalue=GSEA_Il1b@result$pvalue, qvalues=GSEA_Il1b@result$qvalues) 121 | GSEA_Il1b <- GSEA_Il1b[which(GSEA_Il1b$qvalues < 0.01),] 122 | # extract gene names for Bio processes terms 123 | geneName_list_go_il1b <- sapply(GSEA_Il1b$ID, function(i){ 124 | bioM=getBM(filters="entrezgene",values=geneSets_list[[i]], attributes=c("entrezgene","mgi_symbol","description"),mart = mart) 125 | gene_symbol<-as.character(unlist(mclapply(geneSets_list[[i]], function(x) ifelse(x%in%bioM$entrezgene,bioM[which(bioM$entrezgene==x),2],NA),mc.cores = 4))) 126 | }, simplify = FALSE, USE.NAMES = TRUE) 127 | -------------------------------------------------------------------------------- /Visium/Nature2023_Mouse_PDAC_Visium_pre_processing.r: -------------------------------------------------------------------------------- 1 | library(data.table) 2 | library(RColorBrewer) 3 | library(anndata) 4 | library(Matrix) 5 | library(ggplot2) 6 | library(future) 7 | library(dplyr) 8 | library(scales) 9 | library(grid) 10 | library(Seurat) 11 | library(SeuratData) 12 | library(tidyr) 13 | library(dendextend) 14 | library(cowplot) 15 | library(patchwork) 16 | 17 | 18 | ####### HOW TO UPLOAD HIGH RES IMAGE (tissue_hires_image.png) 19 | library(jsonlite) 20 | library(png) 21 | Read10X_Image <- function(image.dir, image.name = "tissue_hires_image.png", filter.matrix = TRUE, ...) { 22 | image <- readPNG(source = file.path(image.dir, image.name)) 23 | scale.factors <- fromJSON(txt = file.path(image.dir, 'scalefactors_json.json')) 24 | tissue.positions.path <- Sys.glob(paths = file.path(image.dir, 'tissue_positions*')) 25 | tissue.positions <- read.csv( 26 | file = tissue.positions.path[1], 27 | col.names = c('barcodes', 'tissue', 'row', 'col', 'imagerow', 'imagecol'), 28 | header = ifelse( 29 | test = basename(tissue.positions.path[1]) == "tissue_positions.csv", 30 | yes = TRUE, 31 | no = FALSE 32 | ), 33 | as.is = TRUE, 34 | row.names = 1 35 | ) 36 | if (filter.matrix) { 37 | tissue.positions <- tissue.positions[which(x = tissue.positions$tissue == 1), , drop = FALSE] 38 | } 39 | unnormalized.radius <- scale.factors$fiducial_diameter_fullres * scale.factors$tissue_hires_scalef 40 | spot.radius <- unnormalized.radius / max(dim(x = image)) 41 | return(new( 42 | Class = 'VisiumV1', 43 | image = image, 44 | scale.factors = scalefactors( 45 | spot = scale.factors$tissue_hires_scalef, 46 | fiducial = scale.factors$fiducial_diameter_fullres, 47 | hires = scale.factors$tissue_hires_scalef, 48 | scale.factors$tissue_hires_scalef 49 | ), 50 | coordinates = tissue.positions, 51 | spot.radius = spot.radius 52 | )) 53 | } 54 | ############# 55 | 56 | image<-Read10X_Image('GSM6727528/outs/spatial/') 57 | Spatial<-Load10X_Spatial('GSM6727528/outs/', image=image) 58 | 59 | #image<-Read10X_Image('GSM6727529/outs/spatial/') 60 | #Spatial<-Load10X_Spatial('GSM6727529/outs/', image=image) 61 | 62 | 63 | Spatial.filt <- subset(Spatial, subset = nFeature_Spatial > 100) 64 | Spatial.filt <- SCTransform(Spatial.filt, assay = "Spatial", return.only.var.genes = FALSE, verbose = FALSE ) 65 | Spatial.filt <- RunPCA(Spatial.filt, assay = "SCT", verbose = FALSE, pcs.compute=50) 66 | Spatial.filt <- FindNeighbors(Spatial.filt, reduction = "pca", dims = 1:20) 67 | Spatial.filt <- FindClusters(Spatial.filt, verbose = FALSE, resolution = c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3,1.4,1.5,2)) 68 | Spatial.filt <- RunUMAP(Spatial.filt, reduction = "pca", dims = 1:20) 69 | 70 | write.csv(rownames(Spatial.filt@meta.data),"SelectedSpots.csv") 71 | save(Spatial.filt,file="Spatial.filt.Robj") -------------------------------------------------------------------------------- /scRNAseq/Human/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ostunilab/PDAC_Nature_2023/74db198d5ec667eb1ccf9e82295ab0ff5bcdbe43/scRNAseq/Human/.DS_Store -------------------------------------------------------------------------------- /scRNAseq/Human/Nature2023_Human_PDAC_scRNAseq_analyses.r: -------------------------------------------------------------------------------- 1 | #### HUMAN TUMOUR scRNA-seq DATA #### 2 | 3 | library(Seurat) 4 | library(ggplot2) 5 | library(scDblFinder) 6 | library(copykat) 7 | library(SeuratWrappers) 8 | library(harmony) 9 | library(foreach) 10 | library(parallel) 11 | library(dplyr) 12 | library(tidyr) 13 | library(magrittr) 14 | library(nichenetr) 15 | library(clusterProfiler) 16 | library(org.Hs.eg.db) 17 | library(biomaRt) 18 | library(slingshot) 19 | library(viridis) 20 | library(scales) 21 | library(msigdbr) 22 | 23 | 24 | #### FULL DATASET ANALYSIS #### 25 | 26 | Sample.merge <- NormalizeData(Sample.merge, normalization.method = "LogNormalize", scale.factor = 1e4, assay='RNA') 27 | Sample.merge <- ScaleData(Sample.merge, vars.to.regress = c("CC.Difference")) 28 | Sample.merge <- FindVariableFeatures(object = Sample.merge) 29 | Sample.merge <- RunPCA(Sample.merge, pcs.compute=50) 30 | Sample.merge <- RunFastMNN(object.list = SplitObject(Sample.merge, split.by = "orig.ident")) 31 | Sample.merge <- RunUMAP(Sample.merge, reduction = "mnn", dims = 1:30) 32 | Sample.merge <- FindNeighbors(Sample.merge, reduction = "mnn", dims = 1:30) 33 | Sample.merge <- FindClusters(Sample.merge, resolution = c(0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3,1.4,1.5,2)) 34 | 35 | 36 | #### MONONUCLEAR PHAGOCYTES #### 37 | 38 | Sample.merge_MP <- subset(Sample.merge, subset = RNA_snn_res.0.5 %in% c(0,9,12)) 39 | Sample.merge_MP <- ScaleData(Sample.merge_MP, vars.to.regress = c("CC.Difference"), features=rownames(Sample.merge_MP)) 40 | Sample.merge_MP <- FindVariableFeatures(Sample.merge_MP) 41 | Sample.merge_MP <- RunPCA(Sample.merge_MP, pcs.compute=50) 42 | Sample.merge_MP <- RunHarmony(Sample.merge_MP, "orig.ident", dims.use = 1:30, max.iter.harmony = 30) 43 | Sample.merge_MP <- RunUMAP(Sample.merge_MP, reduction="harmony", dims = 1:ncol(Embeddings(Sample.merge_MP, "harmony")), reduction.name="umap", reduction.key="UMAPHARMONY_") 44 | Sample.merge_MP <- FindNeighbors(Sample.merge_MP, reduction = "harmony", dims = 1:30) 45 | Sample.merge_MP <- FindClusters(Sample.merge_MP, resolution = c(0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3,1.4,1.5,2)) 46 | 47 | #### TUMOR-ASSOCIATED MACROPHAGES #### 48 | 49 | Sample.merge_TAM <- subset(Sample.merge_MP, subset = RNA_snn_res.1 %in% c(1,2,5,6,7,9,11)) 50 | Sample.merge_TAM <- ScaleData(Sample.merge_TAM, vars.to.regress = c("CC.Difference"), features=rownames(Sample.merge_TAM)) 51 | Sample.merge_TAM <- FindVariableFeatures(Sample.merge_TAM) 52 | Sample.merge_TAM <- RunPCA(Sample.merge_TAM, pcs.compute=50) 53 | Sample.merge_TAM <- RunHarmony(Sample.merge_TAM, "orig.ident", dims.use = 1:30, max.iter.harmony = 30, theta=3) 54 | Sample.merge_TAM <- RunUMAP(Sample.merge_TAM, reduction="harmony", dims = 1:ncol(Embeddings(Sample.merge_MP, "harmony")), reduction.name="umap", reduction.key="UMAPHARMONY_") 55 | Sample.merge_TAM <- FindNeighbors(Sample.merge_TAM, reduction = "harmony", dims = 1:30) 56 | Sample.merge_TAM <- FindClusters(Sample.merge_TAM, resolution = c(0.2,0.3,0.31,0.32,0.33,0.34,0.35,0.36,0.37,0.38,0.39,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3,1.4,1.5,2)) 57 | 58 | # find markers for TAM subsets 59 | Idents(Sample.merge_TAM) <- 'RNA_snn_res.0.36' 60 | DEGs_TAMsubsets <- Reduce("rbind",lapply(unique(Sample.merge_TAM$RNA_snn_res.0.36), function(x) { 61 | Markers <- FindMarkers(Sample.merge_TAM, ident.1 = x, ident.2 = NULL, only.pos = TRUE, min.pct = 0.1, logfc.threshold = 1, pseudocount.use = 0.1) 62 | Markers <- Markers[which(Markers$p_val_adj < 0.01),] 63 | Markers$gene <- rownames(Markers) 64 | Markers$Cluster <- rep(paste("Cluster",x),nrow(Markers)) 65 | return(Markers) 66 | })) 67 | 68 | # GSEA on GO BP 69 | mart = useMart(biomart="ENSEMBL_MART_ENSEMBL",dataset="hsapiens_gene_ensembl", host = "jul2018.archive.ensembl.org") 70 | bioM=getBM(filters="hgnc_symbol", values=rownames(Sample.merge_TAM), attributes=c("entrezgene","hgnc_symbol"), mart = mart) 71 | 72 | GO_BP_GSEA_TAMsubsets <- Reduce("rbind",lapply(unique(Sample.merge_TAM$RNA_snn_res.0.36), function(x) { 73 | AllMarkers <- FindMarkers(Sample.merge_TAM, ident.1 = x, ident.2 = NULL, only.pos = FALSE, min.pct = 0.1, logfc.threshold = 0, pseudocount.use = 0.1) 74 | logFC = AllMarkers$avg_log2FC 75 | gene_id<-as.character(unlist(mclapply(rownames(AllMarkers), function(x) ifelse(x%in%bioM$hgnc_symbol,bioM[which(bioM$hgnc_symbol==x),1],"NA"),mc.cores = 4))) 76 | logFC <- logFC[!(is.na(gene_id))] 77 | names(logFC)= gene_id[!(is.na(gene_id))] 78 | logFC = sort(logFC, decreasing = TRUE) 79 | GSEA_bp <- gseGO(geneList = logFC, OrgDb = org.Hs.eg.db, ont= "BP", minGSSize= 10, maxGSSize=500, pvalueCutoff = 1,verbose = FALSE) 80 | GSEA_bp<-data.frame(ID=GSEA_bp@result$ID, Description=GSEA_bp@result$Description, setSize=GSEA_bp@result$setSize, NES=GSEA_bp@result$NES,pvalue=GSEA_bp@result$pvalue, qvalues=GSEA_bp@result$qvalues) 81 | GSEA_bp$Cluster <- rep(paste("Cluster",x),nrow(GSEA_bp)) 82 | GSEA_bp <- GSEA_bp[which(GSEA_bp$qvalues < 0.01),] 83 | return(GSEA_bp) 84 | })) 85 | 86 | #### HUMAN-MOUSE TAMs OVERLAP - GSEA 87 | 88 | # compute orthologous of expressed genes in TAMs subset 89 | Idents(Sample_Macro_hg38) <- 'RNA_snn_res.0.36' 90 | Idents(Sample_Macro_mm10) <- 'RNA_snn_res.0.4' 91 | human = useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "jul2018.archive.ensembl.org") 92 | mouse = useMart("ensembl", dataset= "mmusculus_gene_ensembl", host = "jul2018.archive.ensembl.org") 93 | genes_human = rownames(Sample_Macro_hg38) 94 | genes_mouse = rownames(Sample_Macro_mm10) 95 | genes_human_converted = getLDS(attributes = c("hgnc_symbol"), filters = "hgnc_symbol", values = genes_human , mart = human, attributesL = c("mgi_symbol"), martL = mouse, uniqueRows=F) 96 | genes_mouse_converted = getLDS(attributes = c("mgi_symbol"), filters = "mgi_symbol", values = genes_mouse , mart = mouse, attributesL = c("hgnc_symbol"), martL = human, uniqueRows=F) 97 | unambiguous_mouse_genes = genes_mouse_converted %>% group_by(MGI.symbol) %>% count() %>% filter(n<2) %>% .$MGI.symbol 98 | ambiguous_mouse_genes = genes_mouse_converted %>% group_by(MGI.symbol) %>% count() %>% filter(n>=2) %>% .$MGI.symbol 99 | geneinfo_ambiguous_solved = genes_mouse_converted %>% filter(MGI.symbol %in% ambiguous_mouse_genes) %>% filter(HGNC.symbol==toupper(MGI.symbol)) 100 | genes_mouse_converted = genes_mouse_converted %>% filter(MGI.symbol %in% unambiguous_mouse_genes) %>% bind_rows(geneinfo_ambiguous_solved) 101 | rownames(genes_mouse_converted) =genes_mouse_converted[,1] 102 | genes_mouse_converted=genes_mouse_converted[!(duplicated(genes_mouse_converted[,1])),] 103 | rownames(genes_mouse_converted) =genes_mouse_converted[,1] 104 | expressed_genes_TAM_hg38 <- unique(unlist(lapply(unique(Sample_Macro_hg38$RNA_snn_res.0.36), function(x){ 105 | cells <- rownames(Sample_Macro_hg38@meta.data[which(Sample_Macro_hg38@meta.data$RNA_snn_res.0.36 == x),]) 106 | pct <- rowSums(Sample_Macro_hg38@assays$RNA@data[,cells]>0)/length(cells) 107 | return(names(pct[which(pct > 0.1)])) 108 | }))) 109 | expressed_genes_TAM_mm10 <- unique(unlist(lapply(unique(Sample_Macro_mm10$RNA_snn_res.0.4), function(x){ 110 | cells <- rownames(Sample_Macro_mm10@meta.data[which(Sample_Macro_mm10@meta.data$RNA_snn_res.0.4 == x),]) 111 | pct <- rowSums(Sample_Macro_mm10@assays$RNA@data[,cells]>0)/length(cells) 112 | return(names(pct[which(pct > 0.1)])) 113 | }))) 114 | tmp=genes_mouse_converted[which(genes_mouse_converted$MGI.symbol %in% expressed_genes_TAM_mm10),] 115 | gene_to_mouse_common_expressed_TAM = tmp[which(tmp$HGNC.symbol %in% expressed_genes_TAM_hg38),] 116 | 117 | term2gene <- Reduce("rbind", lapply(unique(Sample_Macro_hg38$RNA_snn_res.0.36), function(x){ 118 | Markers <- FindMarkers(Sample_Macro_hg38, ident.1 = x, ident.2 = NULL, only.pos = TRUE, min.pct = 0.1, logfc.threshold = 0.8, pseudocount.use = 0.1) 119 | Markers <- Markers[which(Markers$p_val_adj < 0.01),] 120 | Markers_to_mm10 <- unlist(lapply(rownames(Markers), function(x) ifelse(x %in% gene_to_mouse_common_expressed_TAM$HGNC.symbol, gene_to_mouse_common_expressed_TAM[which(gene_to_mouse_common_expressed_TAM$HGNC.symbol == x),1], NA))) 121 | Markers_to_mm10 <- Markers_to_mm10[!(is.na(Markers_to_mm10))] 122 | term2gene = data.frame(id=rep(paste("Cluster_",x,sep=""),length(Markers_to_mm10)),gene=Markers_to_mm10) 123 | return(term2gene) 124 | })) 125 | term2name = data.frame(id=unique(term2gene[,1])[order(unique(term2gene[,1]))],Description=c("Hu_SPP1+","Hu_IL1B+","Hu_FOLR2+","Hu_HSP+","Hu_MT+","Hu_MKI67+")) 126 | 127 | GSEA_human_to_Mouse_TAMs<-Reduce("rbind",lapply(unique(Sample_Macro_mm10$RNA_snn_res.0.4), function(x){ 128 | AllMarkers <- FindMarkers(Sample_Macro_mm10, ident.1 = x, ident.2 = NULL, only.pos = FALSE, min.pct = 0.1, logfc.threshold = 0, pseudocount.use = 0.1) 129 | ranks=AllMarkers[order(AllMarkers$avg_log2FC,decreasing=T),"avg_log2FC"] 130 | names(ranks)=rownames(AllMarkers[order(AllMarkers$avg_log2FC,decreasing=T),]) 131 | fgseaplot=GSEA(ranks, minGSSize = 10, maxGSSize = 500, eps = 1e-50, pvalueCutoff = 1, pAdjustMethod = "BH", TERM2GENE = term2gene,TERM2NAME = term2name) 132 | fgseaplot@result[,7]=-log10(fgseaplot@result[,7]) 133 | gsea <- data.frame(fgseaplot@result[,c(2,5,7)],rep(paste("Cluster",x,sep=""),nrow(fgseaplot@result))) 134 | return(gsea) 135 | })) 136 | colnames(GSEA_human_to_Mouse_TAMs) = c("Hu_TAMs","NES","log_padj","mouseTAMs_Cluster") 137 | 138 | #### TUMOR CELLS IN NAIVE SAMPLES #### 139 | 140 | cells_Naive<-rownames(Sample.merge@meta.data[which(Sample.merge@meta.data$orig.ident %in% c("LPDAC_30_tumor","PDAC_50_tumor","PDAC_55_tumor","PDAC_60_Tumor")),]) 141 | Sample.merge_Naive <- subset(Sample.merge, cells = cells_Naive) 142 | Sample.merge_Naive <- NormalizeData(Sample.merge_Naive, normalization.method = "LogNormalize", scale.factor = 1e4) 143 | #Sample.merge_Naive <- ScaleData(Sample.merge_Naive, vars.to.regress = c("CC.Difference"), features=rownames(Sample.merge_Naive)) 144 | #Sample.merge_Naive <- FindVariableFeatures(object = Sample.merge_Naive) 145 | #Sample.merge_Naive <- RunPCA(Sample.merge_Naive, pcs.compute=50) 146 | Sample.merge_Naive <- RunFastMNN(object.list = SplitObject(Sample.merge_Naive, split.by = "orig.ident")) 147 | Sample.merge_Naive <- RunUMAP(Sample.merge_Naive, reduction = "mnn", dims = 1:30) 148 | Sample.merge_Naive <- FindNeighbors(Sample.merge_Naive, reduction = "mnn", dims = 1:30) 149 | Sample.merge_Naive <- FindClusters(Sample.merge_Naive, resolution = c(0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3,1.4,1.5,2)) 150 | 151 | cells_Tumor=rownames(Sample.merge_Naive@meta.data[which(Sample.merge_Naive@meta.data$RNA_snn_res.1 %in% c(1,3,13,15,5,8,17)),]) 152 | Sample_Tumor<-subset(Sample.merge_Naive, cells=cells_Tumor) 153 | #Sample_Tumor <- ScaleData(Sample_Tumor, vars.to.regress = c("CC.Difference"), features=rownames(Sample_Tumor)) 154 | #Sample_Tumor <- FindVariableFeatures(object = Sample_Tumor) 155 | #Sample_Tumor <- RunPCA(Sample_Tumor, pcs.compute=50) 156 | Sample_Tumor <- RunFastMNN(object.list = SplitObject(Sample_Tumor, split.by = "orig.ident")) 157 | Sample_Tumor <- RunUMAP(Sample_Tumor, reduction="mnn", dims = 1:30) 158 | Sample_Tumor <- FindNeighbors(Sample_Tumor, reduction = "mnn", dims = 1:30) 159 | Sample_Tumor <- FindClusters(Sample_Tumor, resolution = c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3,1.4,1.5,2)) 160 | 161 | # find markers for Tumor cells subsets 162 | Idents(Sample_Tumor) <- 'RNA_snn_res.0.3' 163 | DEGs_TAMsubsets <- Reduce("rbind",lapply(unique(Sample_Tumor$RNA_snn_res.0.3), function(x) { 164 | Markers <- FindMarkers(Sample_Tumor, ident.1 = x, ident.2 = NULL, only.pos = TRUE, min.pct = 0.1, logfc.threshold = 1, pseudocount.use = 0.1) 165 | Markers <- Markers[which(Markers$p_val_adj < 0.01),] 166 | Markers$gene <- rownames(Markers) 167 | Markers$Cluster <- rep(paste("Cluster",x),nrow(Markers)) 168 | return(Markers) 169 | })) 170 | 171 | # re-analysis of clusters enriched in T1RS+ cells 172 | cells_T1RS <- rownames(Sample_Tumor@meta.data[which(Sample_Tumor@meta.data$RNA_snn_res.0.1 == 1),]) 173 | 174 | Sample_Tumor_T1RS <- subset(Sample_Tumor, cells=cells_T1RS) 175 | #Sample_Tumor_T1RS <- ScaleData(Sample_Tumor_T1RS, vars.to.regress = c("CC.Difference"), features=rownames(Sample_Tumor_T1RS)) 176 | #Sample_Tumor_T1RS <- FindVariableFeatures(object = Sample_Tumor_T1RS) 177 | #Sample_Tumor_T1RS_Naive <- RunPCA(Sample_Tumor_T1RS_Naive, pcs.compute=50) 178 | Sample_Tumor_T1RS <- RunFastMNN(object.list = SplitObject(Sample_Tumor_T1RS, split.by = "orig.ident")) 179 | Sample_Tumor_T1RS <- RunUMAP(Sample_Tumor_T1RS, reduction="mnn", dims = 1:20) 180 | Sample_Tumor_T1RS <- FindNeighbors(Sample_Tumor_T1RS, reduction = "mnn", dims = 1:20) 181 | Sample_Tumor_T1RS <- FindClusters(Sample_Tumor_T1RS, resolution = c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3,1.4,1.5,2)) 182 | 183 | # pseudotime analysis with slingshot 184 | Tumor_sl<- slingshot(Embeddings(Sample_Tumor_T1RS, "mnn")[,c(1:10)], clusterLabels = Sample_Tumor_T1RS$RNA_snn_res.0.2) 185 | pt <- slingPseudotime(Tumor_sl) 186 | 187 | pct <- rowSums(Sample_Tumor_T1RS@assays$RNA@counts > 0)/ncol(Sample_Tumor_T1RS@assays$RNA@counts) 188 | expressedGenes <- names(pct[which(pct > 0.1)]) 189 | t <- na.omit(pt[,1]) 190 | y<- Sample_Tumor_T1RS@assays$RNA@scale.data[expressedGenes,names(t)] 191 | corr <- apply(y,1,function(z){ 192 | cor(t,z, method = "pearson") 193 | }) 194 | 195 | ## NICHENET analysis 196 | 197 | # load nichenet networks 198 | ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) 199 | lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) 200 | ligands = lr_network %>% pull(from) %>% unique() 201 | receptors = lr_network %>% pull(to) %>% unique() 202 | weighted_networks = readRDS(url("https://zenodo.org/record/3260758/files/weighted_networks.rds")) 203 | weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network %>% distinct(from,to), by = c("from","to")) 204 | 205 | Idents(Sample.merge_Naive) <- 'Annotation_nichnet' 206 | receiver="PDAC_cluster_T1RS_enriched" # cells subjected to pseudotime analysis 207 | sender="IL1B_TAM" # IL1B+ TAMs 208 | 209 | # reciver (PDAC cells subjected to pseudotime analysis) expressed genes 210 | DEG_PDAC<-FindMarkers(Sample.merge_Naive, ident.1="PDAC_cluster_T1RS_enriched", ident.2="other_PDAC_clusters", only.pos=TRUE, logfc.threshold=0.5, pseudocount.use=0.1) 211 | expressed_genes_receiver = get_expressed_genes(receiver, Sample.merge_Naive, pct = 0.15) %>% .[. %in% rownames(DEG_PDAC)] 212 | background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)] 213 | expressed_receptors = intersect(receptors,expressed_genes_receiver) 214 | 215 | # sender (IL1B+ TAMs) expressed genes 216 | DEG_IL1B<-FindMarkers(Sample.merge_Naive, ident.1="IL1B_TAM", ident.2="other_TAM", only.pos=TRUE, logfc.threshold=0.2, pseudocount.use=0.1) 217 | list_expressed_genes_sender = sender %>% unique() %>% lapply(get_expressed_genes, Sample.merge_Naive, 0.15) # lapply to get the expressed genes of every sender cell type separately here 218 | expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() %>% .[. %in% rownames(DEG_IL1B)] 219 | expressed_ligands = intersect(ligands,expressed_genes_sender) 220 | 221 | # target genes: genes upregulated in T1RS+ PDAC cells (cluster at the end-point of pseudotime curve) 222 | Idents(Sample_Tumor_T1RS) <- 'RNA_snn_res.0.4' 223 | markers_Cluster2 <- FindMarkers(Sample_Tumor_T1RS, ident.1=2, logfc.threshold=1, min.pct=0.3, pseudocount.use=0.1, only.pos=TRUE) 224 | geneset_oi = rownames(markers_Cluster2) 225 | geneset_oi = geneset_oi %>% .[. %in% rownames(ligand_target_matrix)] 226 | 227 | # MODEL 228 | potential_ligands = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% pull(from) %>% unique() 229 | ligand_activities = predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) 230 | ligand_activities = ligand_activities %>% arrange(-pearson) %>% mutate(rank = rank(desc(pearson))) 231 | best_upstream_ligands = ligand_activities %>% top_n(20, pearson) %>% arrange(-pearson) %>% pull(test_ligand) %>% unique() 232 | # targets 233 | active_ligand_target_links_df = best_upstream_ligands %>% lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = 200) %>% bind_rows() %>% drop_na() 234 | # receptors 235 | lr_network_top = lr_network %>% filter(from %in% best_upstream_ligands & to %in% expressed_receptors) %>% distinct(from,to) 236 | best_upstream_receptors = lr_network_top %>% pull(to) %>% unique() 237 | lr_network_top_df_large = weighted_networks_lr %>% filter(from %in% best_upstream_ligands & to %in% best_upstream_receptors) 238 | lr_network_strict = lr_network %>% filter(database != "ppi_prediction_go" & database != "ppi_prediction") 239 | ligands_bona_fide = lr_network_strict %>% pull(from) %>% unique() 240 | receptors_bona_fide = lr_network_strict %>% pull(to) %>% unique() 241 | lr_network_top_df_large_strict = lr_network_top_df_large %>% distinct(from,to) %>% inner_join(lr_network_strict, by = c("from","to")) %>% distinct(from,to) 242 | lr_network_top_df_large_strict = lr_network_top_df_large_strict %>% inner_join(lr_network_top_df_large, by = c("from","to")) 243 | # ligand pearson 244 | ligand_pearson_matrix = ligand_activities %>% select(pearson) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities$test_ligand) 245 | -------------------------------------------------------------------------------- /scRNAseq/Human/Nature2023_Human_PDAC_scRNAseq_pre_processing.r: -------------------------------------------------------------------------------- 1 | #### HUMAN TUMOUR scRNA-seq DATA #### 2 | 3 | library(Seurat) 4 | library(ggplot2) 5 | library(scDblFinder) 6 | library(copykat) 7 | library(SeuratWrappers) 8 | library(harmony) 9 | library(foreach) 10 | library(parallel) 11 | library(dplyr) 12 | library(tidyr) 13 | library(magrittr) 14 | library(nichenetr) 15 | library(clusterProfiler) 16 | library(org.Hs.eg.db) 17 | library(biomaRt) 18 | library(slingshot) 19 | library(viridis) 20 | library(scales) 21 | library(msigdbr) 22 | 23 | #### PRE-PROCESSING #### 24 | 25 | #load datasets 26 | Sample.data <- Read10X("GSM6727545/filtered_feature_bc_matrix/") 27 | Sample_30_tumor <- CreateSeuratObject(Sample.data, min.cells = 3, project = "LPDAC_30_tumor") 28 | 29 | Sample.data <- Read10X("GSM6727548/filtered_feature_bc_matrix") 30 | Sample_50_tumor <- CreateSeuratObject(Sample.data, min.cells = 3, project = "PDAC_50_tumor") 31 | 32 | Sample.data <- Read10X("GSM6727549/filtered_feature_bc_matrix") 33 | Sample_51_tumor <- CreateSeuratObject(Sample.data, min.cells = 3, project = "PDAC_51_tumor") 34 | 35 | Sample.data <- Read10X("GSM6727550/filtered_feature_bc_matrix") 36 | Sample_55_tumor <- CreateSeuratObject(Sample.data, min.cells = 3, project = "PDAC_55_tumor") 37 | 38 | Sample.data <- Read10X("GSM6727546/filtered_feature_bc_matrix") 39 | Sample_47_tumor <- CreateSeuratObject(Sample.data, min.cells = 3, project = "PDAC_47_tumor") 40 | 41 | Sample.data <- Read10X("GSM6727547/filtered_feature_bc_matrix") 42 | Sample_48_tumor <- CreateSeuratObject(Sample.data, min.cells = 3, project = "PDAC_48_tumor") 43 | 44 | Sample.data <- Read10X("GSM6727543/filtered_feature_bc_matrix/") 45 | Sample_25_tumor <- CreateSeuratObject(Sample.data, min.cells = 3, project = "LPDAC_25_tumor") 46 | 47 | Sample.data <- Read10X("GSM6727551/filtered_feature_bc_matrix/") 48 | Sample_60_tumor <- CreateSeuratObject(Sample.data, min.cells = 3, project = "PDAC_60_tumor") 49 | 50 | Sample.data <- Read10X("GSM6727544/filtered_feature_bc_matrix/") 51 | Sample_26_tumor <- CreateSeuratObject(Sample.data, min.cells = 3, project = "LPDAC_26_tumor") 52 | 53 | Sample.data <- Read10X("GSM6727542/filtered_feature_bc_matrix/") 54 | Sample_15_tumor <- CreateSeuratObject(Sample.data, min.cells = 3, project = "LPDAC_15_tumor") 55 | 56 | #merge samples 57 | Sample.merge<- merge(Sample_30_tumor, y = c(Sample_50_tumor, Sample_51_tumor, Sample_55_tumor, Sample_47_tumor,Sample_60_tumor, Sample_48_tumor, Sample_25_tumor, Sample_26_tumor, Sample_15_tumor), add.cell.ids = c("LPDAC_30_tumor", "PDAC_50_tumor", "PDAC_51_tumor", "PDAC_55_tumor", "PDAC_47_tumor","PDAC_60_tumor", "PDAC_48_tumor", "LPDAC_25_tumor", "LPDAC_26_tumor", "LPDAC_15_tumor"), project = "humanPDAC") 58 | 59 | # cn prediction with copykat 60 | copykat.PDAC48 <- copykat(rawmat=as.matrix(Sample_48_tumor@assays$RNA@counts), id.type="S", ngene.chr=5, win.size=25, KS.cut=0.15, sam.name="test", distance="euclidean", norm.cell.names="", n.cores=4,output.seg="FALSE") 61 | copykat.PDAC60 <- copykat(rawmat=as.matrix(Sample_60_tumor@assays$RNA@counts), id.type="S", ngene.chr=5, win.size=25, KS.cut=0.15, sam.name="test", distance="euclidean", norm.cell.names="", n.cores=4,output.seg="FLASE") 62 | copykat.LPDAC25 <- copykat(rawmat=as.matrix(Sample_25_tumor@assays$RNA@counts), id.type="S", ngene.chr=5, win.size=25, KS.cut=0.1, sam.name="test", distance="euclidean", norm.cell.names="", n.cores=4,output.seg="FLASE") 63 | copykat.LPDAC26 <- copykat(rawmat=as.matrix(Sample_26_tumor@assays$RNA@counts), id.type="S", ngene.chr=5, win.size=25, KS.cut=0.15, sam.name="test", distance="euclidean", norm.cell.names="", n.cores=4,output.seg="FALSE", cell.line=T) 64 | copykat.LPDAC30 <- copykat(rawmat=as.matrix(Sample_30_tumor@assays$RNA@counts),, id.type="S", ngene.chr=5, win.size=25, KS.cut=0.1, sam.name="test", distance="euclidean", norm.cell.names="", n.cores=4,output.seg="FLASE") 65 | copykat.PDAC47 <- copykat(rawmat=as.matrix(Sample_47_tumor@assays$RNA@counts), id.type="S", ngene.chr=5, win.size=25, KS.cut=0.1, sam.name="test", distance="euclidean", norm.cell.names="", n.cores=4,output.seg="FALSE") 66 | copykat.PDAC55 <- copykat(rawmat=as.matrix(Sample_55_tumor@assays$RNA@counts), id.type="S", ngene.chr=5, win.size=25, KS.cut=0.15, sam.name="test", distance="euclidean", norm.cell.names="", n.cores=4,output.seg="FALSE") 67 | copykat.PDAC51 <- copykat(rawmat=as.matrix(Sample_51_tumor@assays$RNA@counts), id.type="S", ngene.chr=5, win.size=25, KS.cut=0.15, sam.name="test", distance="euclidean", norm.cell.names="", n.cores=4,output.seg="FALSE") 68 | copykat.PDAC50 <- copykat(rawmat=as.matrix(Sample_50_tumor@assays$RNA@counts), id.type="S", ngene.chr=5, win.size=25, KS.cut=0.1, sam.name="test", distance="euclidean", norm.cell.names="", n.cores=4,output.seg="FALSE") 69 | 70 | # add percentage of expression of mitochondrial genes and ribosomal protein genes 71 | Sample.merge <- PercentageFeatureSet(Sample.merge, pattern = "^MT-", col.name = "percent.mito") 72 | Sample.merge <- PercentageFeatureSet(Sample.merge, pattern = "^RPL", col.name = "percent.ribo") 73 | 74 | #filtering 75 | Sample.merge <- subset(Sample.merge, subset = percent.mt < 40 & nCount_RNA > 1000 & nFeature_RNA > 500) 76 | 77 | #filtering for Neutrophils annotation 78 | #Sample.merge <- subset(Sample.merge, subset = percent.mt < 40 & nFeature_RNA > 200) 79 | 80 | #cell-cycle prediction 81 | s.genes <- readLines('genes_Sphase.txt') 82 | g2m.genes <- readLines('genes_G2Mphase.txt') 83 | Sample.merge <- CellCycleScoring(Sample.merge, g2m.features=g2m.genes[g2m.genes %in% rownames(Sample.merge@assays$RNA@data)], s.features=s.genes[s.genes %in% rownames(Sample.merge@assays$RNA@data)], set.ident = FALSE) 84 | Sample.merge@meta.data$CC.Difference <- Sample.merge@meta.data$S.Score - Sample.merge@meta.data$G2M.Score 85 | 86 | #doublet calling with scDblFinder 87 | doublets.scdblfinder <- unlist(lapply(unique(Sample.merge$orig.ident), function(x) { 88 | sel_cells <- rownames(Sample.merge@meta.data[which(Sample.merge@meta.data$orig.ident == x),]) 89 | sceDblF <- scDblFinder(Sample.merge@assays$RNA@counts[,sel_cells],dbr =0.05) 90 | doublets_anno <- as.vector(sceDblF@colData$scDblFinder.class) 91 | names(doublets_anno) <- row.names(sceDblF@colData) 92 | return(doublets_anno) 93 | })) 94 | 95 | #filtering doublets 96 | Sample.merge <- AddMetaData(Sample.merge, doublets.scdblfinder[colnames(Sample.merge)], "is.doublet") 97 | Sample.merge <- subset(Sample.merge, subset = is.doublet == 'singlet') 98 | 99 | #filtering mitochondrial genes and ribosomal protein genes 100 | mito.genes.expr <- grep("^MT-", rownames(Sample.merge@assays$RNA@counts), value = T) 101 | ribo.genes.expr <- grep("^RPL", rownames(Sample.merge@assays$RNA@counts), value = T) 102 | keep_genes = rownames(Sample.merge@assays$RNA@counts) 103 | keep_genes = keep_genes[!(keep_genes %in% c(mito.genes.expr,ribo.genes.expr))] 104 | Sample.merge <- subset(Sample.merge, features = keep_genes) 105 | 106 | #upload data from .rds objects 107 | counts <- readRDS("counts.RDS") 108 | meta.data <- readRDS("metadata.RDS") 109 | Sample.merge <- CreateSeuratObject(counts) 110 | Sample.merge <- AddMetaData(Sample.merge, meta.data) 111 | umap_data <- Sample.merge@meta.data[,c("UMAP_1_allCells","UMAP_2_allCells")] 112 | colnames(umap_data) <- c("UMAP_1", "UMAP_2") 113 | Sample.merge[['umap']] <- CreateDimReducObject(embeddings = as.matrix(umap_data), key = "UMAP_", global = T, assay = "RNA") 114 | 115 | -------------------------------------------------------------------------------- /scRNAseq/Human/data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ostunilab/PDAC_Nature_2023/74db198d5ec667eb1ccf9e82295ab0ff5bcdbe43/scRNAseq/Human/data/.DS_Store -------------------------------------------------------------------------------- /scRNAseq/Human/data/counts.RDS: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:25555b326b0824e574664f1cd1ae0498c87c1142e817a89f7bb21133e3641354 3 | size 260585219 4 | -------------------------------------------------------------------------------- /scRNAseq/Human/data/metadata.RDS: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:af2f70d73abc7d55f60982d134d1a6aa562ca977bd50f69ae17274ce284f0d8e 3 | size 2687161 4 | -------------------------------------------------------------------------------- /scRNAseq/Mouse/COX2-KO_KPC/Analysis.R: -------------------------------------------------------------------------------- 1 | library(Seurat) 2 | library(SeuratWrappers) 3 | library(harmony) 4 | library(dplyr) 5 | set.seed(123) 6 | 7 | ### ALL CELLS ### 8 | 9 | Sample_expr <- NormalizeData(Sample_expr, normalization.method = "LogNormalize", scale.factor = 1e4, assay='RNA') 10 | Sample_expr <- RunFastMNN(object.list = SplitObject(Sample_expr, split.by = "orig.ident")) 11 | Sample_expr <- RunUMAP(Sample_expr, reduction='mnn', dims = 1:30) 12 | Sample_expr <- FindNeighbors(Sample_expr, reduction = 'mnn', dims = 1:30) 13 | Sample_expr <- FindClusters(Sample_expr, resolution = c(0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.5)) 14 | 15 | for (i in c(0,15,4,10,12,9,11,18,2,13,17)){ 16 | eval(parse(text=(paste("sub_cl <- subset(Sample_expr, subset = RNA_snn_res.0.8 == ",i,")",sep="")))) 17 | Idents(sub_cl) <- sub_cl$orig.ident 18 | eval(parse(text=(paste("Cluster_",i,"_WT_vs_KO <- FindMarkers(sub_cl, ident.1 ='WT', ident.2='KO', min.pct=0.1, only.pos = FALSE, pseudocount.use = 0.1, logfc.threshold = 0.5, assay = 'RNA')",sep="")))) 19 | #eval(parse(text=(paste("write.table(Cluster_",i,"_WT_vs_KO[Cluster_",i,"_WT_vs_KO$p_val_adj<0.01,], 'ALLCELLS_Cluster",i,"_DEG_WT_vs_COX2_KO_res0.8.txt', sep='\t', quote=F, col.names=T, row.names=T)",sep="")))) 20 | } 21 | 22 | sub_cl <- subset(Sample_expr, subset = RNA_snn_res.0.8 %in% c(1,3,5,8)) 23 | Idents(sub_cl) <- sub_cl$orig.ident 24 | Cluster_Macro_WT_vs_KO <- FindMarkers(sub_cl, ident.1 ='WT', ident.2='KO', min.pct=0.1, only.pos = FALSE, pseudocount.use = 0.1, logfc.threshold = 0.5, assay = 'RNA') 25 | #write.table(Cluster_Macro_WT_vs_KO[Cluster_Macro_WT_vs_KO$p_val_adj<0.01,], 'ALLCELLS_ClusterMacro_DEG_WT_vs_COX2_KO_res0.8.txt', sep='\t', quote=F, col.names=T, row.names=T) 26 | 27 | sub_cl <- subset(Sample_expr, subset = RNA_snn_res.0.8 %in% c(6,16)) 28 | Idents(sub_cl) <- sub_cl$orig.ident 29 | Cluster_Fibroblasts_WT_vs_KO <- FindMarkers(sub_cl, ident.1 ='WT', ident.2='KO', min.pct=0.1, only.pos = FALSE, pseudocount.use = 0.1, logfc.threshold = 0.5, assay = 'RNA') 30 | #write.table(Cluster_Fibroblasts_WT_vs_KO[Cluster_Fibroblasts_WT_vs_KO$p_val_adj<0.01,], 'ALLCELLS_ClusterFibroblasts_DEG_WT_vs_COX2_KO_res0.8.txt', sep='\t', quote=F, col.names=T, row.names=T) 31 | 32 | sub_cl <- subset(Sample_expr, subset = RNA_snn_res.0.8 %in% c(7,14)) 33 | Idents(sub_cl) <- sub_cl$orig.ident 34 | Cluster_DCs_WT_vs_KO <- FindMarkers(sub_cl, ident.1 ='WT', ident.2='KO', min.pct=0.1, only.pos = FALSE, pseudocount.use = 0.1, logfc.threshold = 0.5, assay = 'RNA') 35 | #write.table(Cluster_DCs_WT_vs_KO[Cluster_DCs_WT_vs_KO$p_val_adj<0.01,], 'ALLCELLS_ClusterDCs_DEG_WT_vs_COX2_KO_res0.8.txt', sep='\t', quote=F, col.names=T, row.names=T) 36 | 37 | ### TUMOR-ASSOCIATED MACROPHAGES ### 38 | 39 | Sample_expr_TAM <- subset(Sample_expr, subset = Annotation_2 == 'TAMs') 40 | 41 | Sample_expr_TAM <- NormalizeData(Sample_expr_TAM, normalization.method = "LogNormalize", scale.factor = 1e4, assay='RNA') 42 | Sample_expr_TAM <- FindVariableFeatures(Sample_expr_TAM,selection.method = "vst", nfeatures = 3000) 43 | Sample_expr_TAM <- ScaleData(Sample_expr_TAM, vars.to.regress = c("CC.Difference")) 44 | Sample_expr_TAM <- RunPCA(Sample_expr_TAM) 45 | Sample_expr_TAM <- RunHarmony(Sample_expr_TAM, group.by.vars = c('orig.ident'), dims.use = 1:30, theta=2, reduction.save = 'harmony') 46 | Sample_expr_TAM <- RunUMAP(Sample_expr_TAM, reduction='harmony', dims = 1:20) 47 | Sample_expr_TAM <- FindNeighbors(Sample_expr_TAM, reduction = 'harmony', dims = 1:20) 48 | Sample_expr_TAM <- FindClusters(Sample_expr_TAM, resolution = c(0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.5)) 49 | 50 | sub_Il1bTAM <- subset(Sample_expr_TAM, subset = Annotation_TAMs == 'Il1b_TAMs') 51 | Idents(sub_Il1bTAM) <- sub_Il1bTAM$orig.ident 52 | Il1bTAM_WT_vs_KO <- FindMarkers(sub_Il1bTAM, ident.1 ='WT', ident.2='KO', min.pct=0.1, only.pos = FALSE, pseudocount.use = 0.1, logfc.threshold = 0, assay = 'RNA') 53 | -------------------------------------------------------------------------------- /scRNAseq/Mouse/COX2-KO_KPC/Pre-processing.R: -------------------------------------------------------------------------------- 1 | library(Seurat) 2 | library(scDblFinder) 3 | set.seed(123) 4 | 5 | #### PRE-PROCESSING #### 6 | 7 | Sample.d7.WT <- Read10X('GSM6727566/filtered_feature_bc_matrix/') 8 | Sample.d7.WT <- CreateSeuratObject(Sample.d7.WT, min.cells = 3, project ="WT") 9 | 10 | Sample.d7.KO <- Read10X('GSM6727567/filtered_feature_bc_matrix/') 11 | Sample.d7.KO <- CreateSeuratObject(Sample.d7.KO, min.cells = 3, project ="KO") 12 | 13 | Sample_expr <- merge(Sample.d7.WT, y = c(Sample.d7.KO), add.cell.ids = c('WT','KO')) 14 | 15 | Sample_expr[['percent.mt']] <- PercentageFeatureSet(Sample_expr, pattern = '^mt-') 16 | Sample_expr[['percent.ribo']] <- PercentageFeatureSet(Sample_expr, pattern = '^Rp[sl]') 17 | s.genes <- readLines('ccgenes_mm_Sphase.txt') 18 | g2m.genes <- readLines('ccgenes_mm_G2Mphase.txt') 19 | Sample_expr <- CellCycleScoring(Sample_expr, g2m.features=g2m.genes[g2m.genes %in% rownames(Sample_expr@assays$RNA@data)], s.features=s.genes[s.genes %in% rownames(Sample_expr@assays$RNA@data)], set.ident = FALSE) 20 | Sample_expr@meta.data$CC.Difference <- Sample_expr@meta.data$S.Score - Sample_expr@meta.data$G2M.Score 21 | 22 | Sample_expr <- subset(Sample_expr, subset = percent.mt < 25 & nFeature_RNA > 200) 23 | 24 | for (i in c('WT','KO')){ 25 | sub <- subset(Sample_expr, subset = orig.ident == i) 26 | eval(parse(text=paste("sceDblF_",i," <- scDblFinder(sub@assays$RNA@counts, dbr = 0.05)",sep=""))) 27 | eval(parse(text=paste("score.",i," <- sceDblF_",i,"@colData@listData[['scDblFinder.score']]",sep=""))) 28 | eval(parse(text=paste("names(score.",i,") <- rownames(sceDblF_",i,"@colData)",sep=""))) 29 | } 30 | 31 | doublets.info <- rbind(sceDblF_WT@colData,sceDblF_KO@colData) 32 | Sample_expr$is.doublet <- doublets.info$scDblFinder.class 33 | 34 | Sample_expr <- subset(Sample_expr, subset = is.doublet == 'singlet') -------------------------------------------------------------------------------- /scRNAseq/Mouse/Timecourse_KPC/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ostunilab/PDAC_Nature_2023/74db198d5ec667eb1ccf9e82295ab0ff5bcdbe43/scRNAseq/Mouse/Timecourse_KPC/.DS_Store -------------------------------------------------------------------------------- /scRNAseq/Mouse/Timecourse_KPC/Analysis.R: -------------------------------------------------------------------------------- 1 | library(Seurat) 2 | library(SeuratWrappers) 3 | library(harmony) 4 | library(SeuratExtend) 5 | library(parallel) 6 | library(foreach) 7 | library(dplyr) 8 | library(clusterProfiler) 9 | library(biomaRt) 10 | library(org.Mm.eg.db) 11 | library(msigdbr) 12 | set.seed(123) 13 | 14 | readRDS('KPC_timecourse_counts.rds') 15 | readRDS('KPC_timecourse_metadata.rds') # metadata contains embedding coordinates and cluster ids to reproduce figures 16 | 17 | #### ALL CELLS #### 18 | 19 | Sample_expr <- CreateSeuratObject(counts, min_cells = 0, meta.data = metadata) 20 | 21 | Sample_expr <- NormalizeData(Sample_expr, normalization.method = "LogNormalize", scale.factor = 1e4, assay='RNA') 22 | Sample_expr_FastMNN <- FindVariableFeatures(Sample_expr,selection.method = "vst", nfeatures = 3000) 23 | Sample_expr_FastMNN <- RunFastMNN(object.list = SplitObject(Sample_expr_FastMNN, split.by = "orig.ident")) 24 | Sample_expr_FastMNN <- RunUMAP(Sample_expr_FastMNN, reduction='mnn', dims = 1:20) 25 | Sample_expr_FastMNN <- FindNeighbors(Sample_expr_FastMNN, reduction = 'mnn', dims = 1:20) 26 | Sample_expr_FastMNN <- FindClusters(Sample_expr_FastMNN, resolution = c(0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.5)) 27 | 28 | Idents(Sample_expr_FastMNN) <- 'RNA_snn_res.0.5' 29 | i <- 0 30 | while(i<=23){ 31 | eval(parse(text=(paste("cluster",i,".markers0.5 <- FindMarkers(Sample_expr_FastMNN, ident.1 =",i,", min.pct=0.25, only.pos = TRUE, pseudocount.use = 0.1, logfc_threshold = 1, assay = 'RNA')", sep="")))) 32 | eval(parse(text=(paste("cluster",i,".markers0.5 <- cluster",i,".markers0.5[order(cluster",i,".markers0.5$avg_log2FC, decreasing = TRUE),]", sep="")))) 33 | #eval(parse(text=(paste("write.table(cluster",i,".markers0.5, 'MarkerGenes_in_Cluster",i,"_res0.5.txt', sep='\t', quote=F, col.names=T, row.names=T)", sep="")))) 34 | print(paste("Evaluated the markers' significance of cluster n.",i)) 35 | i<-i+1} 36 | 37 | #### MONONUCLEAR PHAGOCYTES #### 38 | 39 | Sample_expr_MP <- subset(Sample_expr, subset = MNPs_refined == 'MNPs') 40 | 41 | Sample_expr_MP <- NormalizeData(Sample_expr_MP, normalization.method = "LogNormalize", scale.factor = 1e4, assay='RNA') 42 | Sample_expr_MP <- FindVariableFeatures(Sample_expr_MP,selection.method = "vst", nfeatures = 3000) 43 | Sample_expr_MP <- ScaleData(Sample_expr_MP, vars.to.regress = c("CC.Difference")) 44 | Sample_expr_MP <- RunPCA(Sample_expr_MP) 45 | Sample_expr_MP <- RunHarmony(Sample_expr_MP, group.by.vars = c('orig.ident'), dims.use = 1:30, theta=2, reduction.save = 'harmony') 46 | Sample_expr_MP <- RunUMAP(Sample_expr_MP, reduction='harmony', dims = 1:20) 47 | Sample_expr_MP <- FindNeighbors(Sample_expr_MP, reduction = 'harmony', dims = 1:20) 48 | Sample_expr_MP <- FindClusters(Sample_expr_MP, resolution = c(0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.5)) 49 | 50 | Idents(Sample_expr_MP) <- 'RNA_snn_res.1' 51 | i <- 0 52 | while(i<=15){ 53 | eval(parse(text=(paste("cluster",i,".markers1 <- FindMarkers(Sample_expr_MP, ident.1 =",i,", min.pct=0.1, only.pos = TRUE, pseudocount.use = 0.1, logfc_threshold = 1, assay = 'RNA')", sep="")))) 54 | eval(parse(text=(paste("cluster",i,".markers1 <- cluster",i,".markers1[order(cluster",i,".markers1$avg_log2FC, decreasing = TRUE),]", sep="")))) 55 | eval(parse(text=(paste("write.table(cluster",i,".markers1, 'MarkerGenes_in_Cluster",i,"_res1.txt', sep='\t', quote=F, col.names=T, row.names=T)", sep="")))) 56 | print(paste("Evaluated the markers' significance of cluster n.",i)) 57 | i<-i+1} 58 | 59 | #### TUMOR-ASSOCIATED MACROPHAGES #### 60 | 61 | Sample_expr_TAM <- subset(Sample_expr, subset = Macro_refined == 'Macrophage') 62 | 63 | Sample_expr_TAM <- NormalizeData(Sample_expr_TAM, normalization.method = "LogNormalize", scale.factor = 1e4, assay='RNA') 64 | Sample_expr_TAM <- FindVariableFeatures(Sample_expr_TAM,selection.method = "vst", nfeatures = 3000) 65 | Sample_expr_TAM <- ScaleData(Sample_expr_TAM, vars.to.regress = c("CC.Difference")) 66 | Sample_expr_TAM <- RunPCA(Sample_expr_TAM) 67 | Sample_expr_TAM <- RunHarmony(Sample_expr_TAM, group.by.vars = c('orig.ident'), dims.use = 1:30, theta=2, reduction.save = 'harmony') 68 | Sample_expr_TAM <- RunUMAP(Sample_expr_TAM, reduction='harmony', dims = 1:20) 69 | Sample_expr_TAM <- FindNeighbors(Sample_expr_TAM, reduction = 'harmony', dims = 1:20) 70 | Sample_expr_TAM <- FindClusters(Sample_expr_TAM, resolution = c(0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.5)) 71 | 72 | Idents(Sample_expr_TAM) <- 'RNA_snn_res.0.4' 73 | i <- 0 74 | while(i<=6){ 75 | eval(parse(text=(paste("cluster",i,".markers0.4 <- FindMarkers(Sample_expr_TAM, ident.1 =",i,", min.pct=0.1, only.pos = TRUE, pseudocount.use = 0.1, logfc_threshold = 1, assay = 'RNA')", sep="")))) 76 | eval(parse(text=(paste("cluster",i,".markers0.4 <- cluster",i,".markers0.4[order(cluster",i,".markers0.4$avg_log2FC, decreasing = TRUE),]", sep="")))) 77 | eval(parse(text=(paste("write.table(cluster",i,".markers0.4, 'MarkerGenes_in_Cluster",i,"_res1.txt', sep='\t', quote=F, col.names=T, row.names=T)", sep="")))) 78 | print(paste("Evaluated the markers' significance of cluster n.",i)) 79 | i<-i+1} 80 | 81 | TAM_annotation <- ifelse(Sample_expr_TAM$RNA_snn_res.0.4 == 0, 'Il1b_TAMs', 82 | ifelse(Sample_expr_TAM$RNA_snn_res.0.4 == 1, 'Cxcl9_TAMs', 83 | ifelse(Sample_expr_TAM$RNA_snn_res.0.4 == 2, 'Spp1_TAMs', 84 | ifelse(Sample_expr_TAM$RNA_snn_res.0.4 == 3, 'Folr2_TAMs', 85 | ifelse(Sample_expr_TAM$RNA_snn_res.0.4 == 4, 'Clps_TAMs', 86 | ifelse(Sample_expr_TAM$RNA_snn_res.0.4 == 5, 'Proliferating_TAMs', 'Marco_TAMs')))))) 87 | 88 | Sample_expr_TAM$TAM_Annotation <- TAM_annotation 89 | 90 | #### MONOCYTES AND MACROPHAGES #### 91 | 92 | readRDS('MonoMacro_KPC_timecourse_counts.rds') 93 | readRDS('MonoMacro_KPC_timecourse_metadata.rds') # metadata contains embedding coordinates and cluster ids to reproduce figures 94 | 95 | Sample_merge_MonoMacro <- CreateSeuratObject(counts, min_cells = 0, meta.data = metadata) 96 | 97 | Sample_merge_MonoMacro <- NormalizeData(Sample_merge_MonoMacro, normalization.method = "LogNormalize", scale.factor = 1e4, assay='RNA') 98 | Sample_merge_MonoMacro <- FindVariableFeatures(Sample_merge_MonoMacro,selection.method = "vst", nfeatures = 3000) 99 | Sample_merge_MonoMacro <- RunFastMNN(object.list = SplitObject(Sample_merge_MonoMacro, split.by = "orig.ident")) 100 | palantir_so<-RunPalantirDiffusionMap(Sample_merge_MonoMacro, reduction = "mnn", n_components = 20) 101 | Sample_merge_MonoMacro[["tsne_mnn"]] <- 102 | read.csv("tmp/tsne.csv", row.names = 1) %>% 103 | set_colnames(paste0("TSNE_FASTMNN_", 1:ncol(.))) %>% 104 | as.matrix() %>% 105 | CreateDimReducObject(key = "TSNEFASTMNN_", assay = DefaultAssay(Sample_merge_MonoMacro)) 106 | 107 | ## prepare annotations for velocity and Cellrank analysis 108 | 109 | annotated_clusters <- as.data.frame(Sample_merge_MonoMacro$Annotation) 110 | colnames(annotated_clusters) <- 'clusters_refined' 111 | write.csv(annotated_clusters, file='annotated_clusters.csv') 112 | 113 | embedding_coord <- Sample_merge_MonoMacro@meta.data[,c('MonoMacro_TSNE_1','MonoMacro_TSNE_2')] 114 | colnames(embedding_coord)<-c('x','y') 115 | write.csv(embedding_coord, file='tsne.csv') 116 | 117 | ### run python notebook scripts for velocity analysis + Cellrank 118 | 119 | ## prepare data for optimal transport analysis 120 | cell_day <- data.frame(id=rownames(Sample_merge_MonoMacro@meta.data),day=ifelse(Sample_merge_MonoMacro$orig.ident=='Tumor_d10',10, 121 | ifelse(Sample_merge_MonoMacro$orig.ident=='Tumor_d20',20, 122 | ifelse(Sample_merge_MonoMacro$orig.ident=='Tumor_d30',30,0)))) 123 | write.table(cell_day, 'cell_day.txt', sep='\t', quote=F, col.names=T, row.names=F) 124 | 125 | embedding_coord <- Sample_merge_MonoMacro@meta.data[,c('cell_id','MonoMacro_TSNE_1','MonoMacro_TSNE_2')] 126 | colnames(embedding_coord)<-c('id','x','y') 127 | write.table(embedding_coord, 'embedding_coord.txt', sep='\t', quote=F, col.names=T, row.names=F) 128 | 129 | SaveH5Seurat(counts, filename = "matrix_MM.h5Seurat") 130 | Convert("matrix_MM.h5Seurat", dest = "h5ad") 131 | 132 | ### run WOT scripts for optimal transport analysis 133 | 134 | #### EPITHELIAL AND TUMOR CELLS #### 135 | 136 | Sample_expr_Epithelial <- subset(Sample_expr, subset = Epithelial_refined == 'Epithelial_cells') 137 | 138 | Sample_expr_Epithelial <- NormalizeData(Sample_expr_Epithelial, normalization.method = "LogNormalize", scale.factor = 1e4, assay='RNA') 139 | Sample_expr_Epithelial <- FindVariableFeatures(Sample_expr_Epithelial,selection.method = "vst", nfeatures = 3000) 140 | Sample_expr_Epithelial <- ScaleData(Sample_expr_Epithelial, vars.to.regress = c("CC.Difference")) 141 | Sample_expr_Epithelial <- RunPCA(Sample_expr_Epithelial) 142 | Sample_expr_Epithelial <- RunHarmony(Sample_expr_Epithelial, group.by.vars = c('orig.ident'), dims.use = 1:20, theta=1, reduction.save = 'harmony') 143 | Sample_expr_Epithelial <- RunUMAP(Sample_expr_Epithelial, reduction='harmony', dims = 1:20) 144 | Sample_expr_Epithelial <- FindNeighbors(Sample_expr_Epithelial, reduction = 'harmony', dims = 1:20) 145 | Sample_expr_Epithelial <- FindClusters(Sample_expr_Epithelial, resolution = c(0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.5)) 146 | 147 | Idents(Sample_expr_Epithelial)<-Sample_expr_Epithelial$orig.ident 148 | 149 | Epithelial_Day10_vs_Healthy <- FindMarkers(Sample_expr_Epithelial, ident.1 ='Tumor_d10',ident.2 ='Healthy', min.pct=0.1, only.pos = FALSE, pseudocount.use = 0.1, logfc_threshold = 0, assay = 'RNA') 150 | #write.table(Epithelial_Day10_vs_Healthy[which(Epithelial_Day10_vs_Healthy$p_val_adj < 0.01 & abs(Epithelial_Day10_vs_Healthy$avg_log2FC) >= 1),], 'Epithelial_Day10_vs_Healthy.txt', sep='\t', quote=F, col.names=T, row.names=T) 151 | 152 | Epithelial_Day20_vs_Healthy <- FindMarkers(Sample_expr_Epithelial, ident.1 ='Tumor_d20',ident.2 ='Healthy', min.pct=0.1, only.pos = FALSE, pseudocount.use = 0.1, logfc_threshold = 0, assay = 'RNA') 153 | #write.table(Epithelial_Day20_vs_Healthy[which(Epithelial_Day20_vs_Healthy$p_val_adj < 0.01 & abs(Epithelial_Day20_vs_Healthy$avg_log2FC) >= 1),], 'Epithelial_Day20_vs_Healthy.txt', sep='\t', quote=F, col.names=T, row.names=T) 154 | 155 | Epithelial_Day30_vs_Healthy <- FindMarkers(Sample_expr_Epithelial, ident.1 ='Tumor_d30',ident.2 ='Healthy', min.pct=0.1, only.pos = FALSE, pseudocount.use = 0.1, logfc_threshold = 0, assay = 'RNA') 156 | #write.table(Epithelial_Day30_vs_Healthy[which(Epithelial_Day30_vs_Healthy$p_val_adj < 0.01 & abs(Epithelial_Day30_vs_Healthy$avg_log2FC) >= 1),], 'Epithelial_Day30_vs_Healthy.txt', sep='\t', quote=F, col.names=T, row.names=T) 157 | 158 | hallmark_gene_sets = msigdbr(species = "mouse", category = "H") 159 | mouse = useMart(biomart="ENSEMBL_MART_ENSEMBL",dataset="mmusculus_gene_ensembl", host = "jul2018.archive.ensembl.org") 160 | 161 | my_term_mouse=data.frame(hallmark_gene_sets$gs_name,hallmark_gene_sets$entrez_gene) 162 | 163 | clusters_ordered_mouse = c(10,20,30) 164 | for (i in clusters_ordered_mouse){ 165 | eval(parse(text=(paste("tmp <- Epithelial_Day",i,"_vs_Healthy[,'avg_log2FC']", sep="")))) 166 | eval(parse(text=(paste("names(tmp) <- rownames(Epithelial_Day",i,"_vs_Healthy)", sep="")))) 167 | tmp <- tmp[which(tmp != "NA")] 168 | tmp <- sort(tmp, decreasing=TRUE) 169 | bioM_mouse=getBM(filters="mgi_symbol",values=names(tmp), attributes=c("entrezgene","mgi_symbol","description"),mart = mouse) 170 | gene_id<-as.character(unlist(mclapply(names(tmp), function(x) ifelse(x%in%bioM_mouse$mgi_symbol,bioM_mouse[which(bioM_mouse$mgi_symbol==x),1],"NA"),mc.cores = 4))) 171 | geneList <- tmp 172 | names(geneList) <- as.character(gene_id) 173 | geneList=geneList[which(names(geneList) != "NA")] 174 | eval(parse(text=(paste("GSEA_Epithelial_Day",i,"_vs_Healthy.mouse_HALLMARK <- GSEA(geneList, TERM2GENE = my_term_mouse, nPerm=100000, minGSSize= 15, maxGSSize=500, pvalueCutoff = 1,verbose = FALSE)", sep="")))) 175 | #eval(parse(text=(paste("write.table(GSEA_Epithelial_Day",i,"_vs_Healthy.mouse_HALLMARK@result,'Epithelial_Day",i,"_vs_Healthy_GSEA_HALLMARK.txt', sep='\t', quote=F, col.names=T, row.names=F)", sep="")))) 176 | } 177 | 178 | -------------------------------------------------------------------------------- /scRNAseq/Mouse/Timecourse_KPC/Optimal_Transport.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ##### OPTIMAL TRANSPORT #### 4 | # wot command line interface 5 | 6 | wot optimal_transport --matrix matrix_MM.h5ad --cell_days cells_day.txt --growth_iters 3 --lambda1 1 --lambda2 50 --epsilon 0.05 --verbose 7 | wot trajectory --tmap tmaps --cell_set cell_sets.gmt --day 30 --embedding embedding_coord.txt 8 | wot transition_table --tmap tmaps --cell_set cell_sets.gmt --start_time 0 --end_time 30 9 | -------------------------------------------------------------------------------- /scRNAseq/Mouse/Timecourse_KPC/Pre-processing.R: -------------------------------------------------------------------------------- 1 | library(Seurat) 2 | library(scDblFinder) 3 | 4 | #### PRE-PROCESSING #### 5 | 6 | Sample.Healthy <- Read10X('GSM6727561/filtered_feature_bc_matrix/') 7 | Sample.Healthy <- CreateSeuratObject(Sample.Healthy, min.cells = 3, project ="Healthy") 8 | 9 | Sample.d10.Tumor <- Read10X('GSM6727558/filtered_feature_bc_matrix/') 10 | Sample.d10.Tumor <- CreateSeuratObject(Sample.d10.Tumor, min.cells = 3, project ="Tumor_d10") 11 | 12 | Sample.d20.Tumor <- Read10X('GSM6727559/filtered_feature_bc_matrix/') 13 | Sample.d20.Tumor <- CreateSeuratObject(Sample.d20.Tumor, min.cells = 3, project ="Tumor_d20") 14 | 15 | Sample.d30.Tumor <- Read10X('GSM6727560/filtered_feature_bc_matrix/') 16 | Sample.d30.Tumor <- CreateSeuratObject(Sample.d30.Tumor, min.cells = 3, project ="Tumor_d30") 17 | 18 | Sample_expr <- merge(Sample.d10.Tumor, y = c(Sample.d20.Tumor, Sample.Healthy, Sample.d30.Tumor), 19 | add.cell.ids = c('Tumor_d10','Tumor_d20','Healthy','Tumor_d30')) 20 | 21 | Sample_expr[['percent.mt']] <- PercentageFeatureSet(Sample_expr, pattern = '^mt-') 22 | Sample_expr[['percent.ribo']] <- PercentageFeatureSet(Sample_expr, pattern = '^Rp[sl]') 23 | s.genes <- readLines('ccgenes_mm_Sphase.txt') 24 | g2m.genes <- readLines('ccgenes_mm_G2Mphase.txt') 25 | Sample_expr <- CellCycleScoring(Sample_expr, g2m.features=g2m.genes[g2m.genes %in% rownames(Sample_expr@assays$RNA@data)], s.features=s.genes[s.genes %in% rownames(Sample_expr@assays$RNA@data)], set.ident = FALSE) 26 | Sample_expr@meta.data$CC.Difference <- Sample_expr@meta.data$S.Score - Sample_expr@meta.data$G2M.Score 27 | 28 | Sample_expr <- subset(Sample_expr, subset = percent.mt < 25 & nCount_RNA > 1000 & nFeature_RNA > 200) 29 | 30 | for (i in c('Healthy','Tumor_d10','Tumor_d20','Tumor_d30')){ 31 | sub <- subset(Sample_expr, subset = orig.ident == i) 32 | eval(parse(text=paste("sceDblF_",i," <- scDblFinder(sub@assays$RNA@counts, dbr = 0.07)",sep=""))) 33 | eval(parse(text=paste("score.",i," <- sceDblF_",i,"@colData@listData[['scDblFinder.score']]",sep=""))) 34 | eval(parse(text=paste("names(score.",i,") <- rownames(sceDblF_",i,"@colData)",sep=""))) 35 | } 36 | 37 | doublets.info <- rbind(sceDblF_Tumor_d10@colData,sceDblF_Tumor_d20@colData,sceDblF_Healthy@colData,sceDblF_Tumor_d30@colData) 38 | Sample_expr$is.doublet <- doublets.info$scDblFinder.class 39 | 40 | Sample_expr <- subset(Sample_expr, subset = is.doublet == 'singlet') -------------------------------------------------------------------------------- /scRNAseq/Mouse/Timecourse_KPC/Velocyto.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ##### VELOCYTO #### 3 | # run on each sample, refdata-gex-mm10-2020-A used as reference 4 | 5 | samtools sort -t CB -O BAM -o cellsorted_possorted_genome_bam.bam $file_bam 6 | velocyto run -b barcodes.tsv -o $output_path -m mm10_rmsk.gtf $file_bam genes.gtf 7 | -------------------------------------------------------------------------------- /scRNAseq/Mouse/Timecourse_KPC/data/KPC_timecourse_counts.rds: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a5f71488818768be17cf5cf2b9ea7ab6f7b054db6ad4e4a5688284dba86f04f4 3 | size 259428730 4 | -------------------------------------------------------------------------------- /scRNAseq/Mouse/Timecourse_KPC/data/KPC_timecourse_metadata.rds: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1b7e7aec95b70af9c686a1468f91f8d960cbd293cac2aa270af9c0181ac3f134 3 | size 3260070 4 | -------------------------------------------------------------------------------- /scRNAseq/Mouse/Timecourse_KPC/data/MonoMacro_KPC_timecourse_counts.rds: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e6b51ae2f5ac3d4ea2c728c14df559b84e19037026c48ffa605975a6b8856f4c 3 | size 49666751 4 | -------------------------------------------------------------------------------- /scRNAseq/Mouse/Timecourse_KPC/data/MonoMacro_KPC_timecourse_metadata.rds: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:12ee4d5343cff03d9ffda94ec0a3894408c4d2a2511fc939511e7319fd6b03bd 3 | size 738582 4 | --------------------------------------------------------------------------------