├── .DS_Store
├── .gitattributes
├── Molecular_Cartography
    └── Nature2023_Human_PDAC_MolecularCartography_analyses.r
├── README.md
├── Visium
    ├── Nature2023_Mouse_PDAC_Visium_DestVI_analysis.ipynb
    ├── Nature2023_Mouse_PDAC_Visium_post_deconvolution_analyses.r
    └── Nature2023_Mouse_PDAC_Visium_pre_processing.r
└── scRNAseq
    ├── Human
        ├── .DS_Store
        ├── Nature2023_Human_PDAC_scRNAseq_analyses.r
        ├── Nature2023_Human_PDAC_scRNAseq_pre_processing.r
        └── data
        │   ├── .DS_Store
        │   ├── counts.RDS
        │   └── metadata.RDS
    └── Mouse
        ├── COX2-KO_KPC
            ├── Analysis.R
            └── Pre-processing.R
        └── Timecourse_KPC
            ├── .DS_Store
            ├── Analysis.R
            ├── Optimal_Transport.sh
            ├── Pre-processing.R
            ├── Velocity_and_Cellrank_analysis.ipynb
            ├── Velocyto.sh
            └── data
                ├── KPC_timecourse_counts.rds
                ├── KPC_timecourse_metadata.rds
                ├── MonoMacro_KPC_timecourse_counts.rds
                ├── MonoMacro_KPC_timecourse_metadata.rds
                ├── annotated_clusters.csv
                ├── cell_sets.gmt
                ├── tissue_annotation.csv
                └── tsne.csv


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ostunilab/PDAC_Nature_2023/74db198d5ec667eb1ccf9e82295ab0ff5bcdbe43/.DS_Store


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.rds filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/Molecular_Cartography/Nature2023_Human_PDAC_MolecularCartography_analyses.r:
--------------------------------------------------------------------------------
  1 | library(Matrix)
  2 | library(graphics)
  3 | library(parallel)
  4 | library(Seurat)
  5 | library(dplyr)
  6 | library(spdep)
  7 | library(dbscan)
  8 | library(ggplot2)
  9 | library(scales)
 10 | 
 11 |  LoadResolve <- function (data, fov, assay = "Resolve") {
 12 |     segs <- CreateSegmentation(data$segmentations)
 13 |     cents <- CreateCentroids(data$centroids)
 14 |     segmentations.data <- list(centroids = cents, segmentation = segs)
 15 |     coords <- CreateFOV(coords = segmentations.data, type = c("segmentation", "centroids"), molecules = data$microns, assay = assay)
 16 |     obj <- CreateSeuratObject(counts = data$transcripts, assay = assay)
 17 |     coords <- subset(x = coords, cells = intersect(x = Cells(x = coords[["segmentation"]]), y = Cells(x = obj)))
 18 |     obj[[fov]] <- coords
 19 |     return(obj)
 20 | }
 21 | 
 22 | ##### UPLOAD baysor output and create a Seurat object
 23 | setwd('GSM7655264/outs')
 24 | #setwd('GSM7655265/outs')
 25 | #setwd('GSM7655264/outs')
 26 | 
 27 | 
 28 | data <- vector(mode = 'list', length = 4)
 29 | names(x = data) = c("transcripts","centroids","segmentations","microns")
 30 | 
 31 | ###### centroids
 32 | cell_centroids<-read.csv('segmentation_cell_stats.csv')
 33 | cell_centroids$cell = paste("Cell_",cell_centroids$cell,sep="")
 34 | data[['centroids']] = cell_centroids[,c("x","y","cell")]
 35 | 
 36 | ###### microns
 37 | baysor_out<-read.csv('segmentation.csv')
 38 | table(baysor_out$is_noise)
 39 | baysor_out = baysor_out[ baysor_out$is_noise == "false",]
 40 | baysor_out$cell = paste("Cell_",baysor_out$cell,sep="")
 41 | data[['microns']] = baysor_out[,c("x","y","gene")]
 42 | 
 43 | ###### transcripts
 44 | genes<-unique(baysor_out$gene)
 45 | genes<-genes[order(genes)]
 46 | cells_id <- cell_centroids$cell
 47 | counts<-mclapply(cells_id, function(x) {
 48 | 	as.vector(table(factor(baysor_out[baysor_out[,"cell"] == x,"gene"], levels=genes)))
 49 | })
 50 | counts <- do.call(cbind.data.frame, counts)
 51 | rownames(counts) = genes
 52 | colnames(counts) = cells_id
 53 | counts <- Matrix(as.matrix(counts), sparse = TRUE)
 54 | data[['transcripts']] = counts
 55 | 
 56 | 
 57 | ####### Compute segmentation 
 58 | cells_id <- cell_centroids$cell
 59 | id_edges_segmentation <- unlist(mclapply(cells_id, function(x) {
 60 | 	test<-chull(baysor_out[baysor_out[,"cell"] == x,c("x","y")])
 61 | 	c(rownames(baysor_out[baysor_out[,"cell"] == x,][test,]),rownames(baysor_out[baysor_out[,"cell"] == x,][test,])[1])
 62 | 	})
 63 | 	)
 64 | segmentation = baysor_out[id_edges_segmentation,c("x","y","cell")]
 65 | rownames(segmentation) = c(1:nrow(segmentation))
 66 | data[['segmentations']] = segmentation
 67 | 
 68 | resolve_B2_1<- LoadResolve(data, "GSM7655264")
 69 | resolve_B2_1<-RenameCells(resolve_B2_1, add.cell.id= "B2_1")
 70 | DefaultBoundary(resolve_B2_1[["B2_1"]]) <- "segmentation"
 71 | Sample_B2_1 <- CreateSeuratObject(resolve_B2_1@assays$Resolve@counts, min.cells = 10,  project = "B2_1", min.features = 4)
 72 | 
 73 | #resolve_C2_1<- LoadResolve(data, "GSM7655265")
 74 | #resolve_C2_1<-RenameCells(resolve_C2_1, add.cell.id= "C2_1")
 75 | #DefaultBoundary(resolve_C2_1[["C2_1"]]) <- "segmentation"
 76 | #Sample_C2_1 <- CreateSeuratObject(resolve_C2_1@assays$Resolve@counts, min.cells = 10,  project = "C2_1", min.features = 4)
 77 | 
 78 | #resolve_D2_1<- LoadResolve(data, "GSM7655266")
 79 | #resolve_D2_1<-RenameCells(resolve_D2_1, add.cell.id= "D2_1")
 80 | #DefaultBoundary(resolve_D2_1[["D2_1"]]) <- "segmentation"
 81 | #Sample_D2_1 <- CreateSeuratObject(resolve_D2_1@assays$Resolve@counts, min.cells = 10,  project = "D2_1", min.features = 4)
 82 | 
 83 | ############
 84 | 
 85 | 
 86 | # Data analysis
 87 | Sample.merge<- merge(Sample_B2_1, y = c(Sample_C2_1,Sample_D2_1))
 88 | Sample.merge <- subset(Sample.merge, subset =  nCount_RNA >= 10 & nFeature_RNA  <= 25)
 89 | Sample.merge <- SCTransform(Sample.merge, assay = "RNA", clip.range = c(-10, 10), )
 90 | Sample.merge <- RunPCA(Sample.merge, npcs = 30, features = rownames(Sample.merge))
 91 | Sample.merge <- RunUMAP(Sample.merge, dims = 1:20)
 92 | Sample.merge <- FindNeighbors(Sample.merge, reduction = "pca", dims = 1:20)
 93 | Sample.merge <- FindClusters(Sample.merge, resolution = c(0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1))
 94 | 
 95 | Idents(Sample.merge) <- 'SCT_snn_res.0.5'
 96 | DEGs_res0.5 <- Reduce("rbind",lapply(unique(Sample.merge$SCT_snn_res.0.5), function(x) {
 97 |     Markers <- FindMarkers(Sample.merge, ident.1 = x, ident.2 = NULL, only.pos = TRUE, min.pct = 0.2, logfc.threshold = 1, pseudocount.use = 0.3)
 98 |     Markers$gene <- rownames(Markers)
 99 |     Markers$Cluster <- rep(paste("Cluster",x),nrow(Markers))
100 |     return(Markers)
101 | }))
102 | 
103 | resolve_B2_1 <- subset(Sample.merge, cells= rownames(Sample.merge@meta.data[which(Sample.merge@meta.data$orig.ident == "B2"),]))
104 | resolve_C2_1 <- subset(Sample.merge, cells= rownames(Sample.merge@meta.data[which(Sample.merge@meta.data$orig.ident == "C2"),]))
105 | resolve_D2_1 <- subset(Sample.merge, cells= rownames(Sample.merge@meta.data[which(Sample.merge@meta.data$orig.ident == "D2"),]))
106 | 
107 | # Neighbourhood analysis
108 | resolve.obj=list(B2_1=resolve_B2_1,C2_1=resolve_C2_1, D2_1=resolve_D2_1)
109 | 
110 | fraction_NN=Reduce("+",lapply(resolve.obj, function(x) {
111 |     xy_cells<-GetTissueCoordinates(x[[names(x)[2]]][["centroids"]])
112 |     rownames(xy_cells)=xy_cells[,"cell"]
113 | 
114 |     cells_sel<-rownames(x@meta.data[which(x@meta.data$clusters == 16),])
115 | 
116 |     dim_NN<-c(length=40)    
117 |     for (nNeighbours in 1:40){   
118 |     knn_spatial <- dbscan::kNN(x = xy_cells[, c("x", "y")] %>% as.matrix(), k = nNeighbours)
119 |     knn_spatial.norm <- data.frame(from = rep(1:nrow(knn_spatial$id), nNeighbours),
120 |                                  to = as.vector(knn_spatial$id),
121 |                                  weight = 1/(1 + as.vector(knn_spatial$dist)),
122 |                                  distance = as.vector(knn_spatial$dist))
123 |     knn_spatial.norm$from = rownames(xy_cells)[knn_spatial.norm$from]                                 
124 |     knn_spatial.norm$to= rownames(xy_cells)[knn_spatial.norm$to]         
125 | 
126 |     nn <- unique(knn_spatial.norm[which(knn_spatial.norm$from %in% cells_sel & knn_spatial.norm$distance < 400),"to"])
127 |     dim_NN<-rbind(dim_NN,table(factor(x@meta.data[nn,"clusters"], levels=c(0:18)) ))
128 |     }
129 |     dim_NN = dim_NN[-1,]
130 |     return(dim_NN)
131 | }))
132 | 
133 | for (i in 1:nrow(fraction_NN)){
134 |     fraction_NN[i,] = fraction_NN[i,]/table(factor(Sample.merge@meta.data[,"SCT_snn_res.0.5"], levels=c(0:18)))
135 | }
136 | 
137 | 
138 | # Neighbourhood enrichment
139 | resolve.obj=list(B2_1=resolve_B2_1,C2_1=resolve_C2_1, D2_1=resolve_D2_1)
140 | 
141 | rand_out_lists <- mclapply(resolve.obj, function(x) {
142 |     xy_cells<-GetTissueCoordinates(x[[names(x)[2]]][["centroids"]])
143 |     rownames(xy_cells)=xy_cells[,"cell"]
144 |     nNeighbours =40
145 |     knn_spatial <- dbscan::kNN(x = xy_cells[, c("x", "y")] %>% as.matrix(), k = nNeighbours)
146 |     knn_spatial.norm <- data.frame(from = rep(1:nrow(knn_spatial$id), nNeighbours), to = as.vector(knn_spatial$id),distance = as.vector(knn_spatial$dist))
147 |     knn_spatial.norm$from = rownames(xy_cells)[knn_spatial.norm$from]                                 
148 |     knn_spatial.norm$to= rownames(xy_cells)[knn_spatial.norm$to]   
149 |     randomization<-matrix(nrow=ncol(x), ncol=1000)
150 |     randomization<-apply(randomization,2, function(y) {
151 |         y = sample(as.vector(x@meta.data$clusters))
152 |     })
153 |     rownames(randomization) = rownames(x@meta.data)
154 | 
155 |     rand_out=apply(randomization, 2, function(y) {
156 |         y=factor(y,levels=levels(x@meta.data$clusters))
157 |         cells_sel<-list()
158 |         for (i in levels(y)){
159 |             cells_sel[[i]] = names(y[y == i])
160 |         }
161 | 
162 |         dim_NN=lapply(cells_sel, function(z) {
163 |             nn <- unique(knn_spatial.norm[which(knn_spatial.norm$from %in% z & knn_spatial.norm$distance < 400),"to"])
164 |             table(factor(y[nn], levels=levels(x@meta.data$clusters)))
165 |         })
166 |         return(dim_NN)
167 |     })
168 | 
169 |     rand_out_list<-list()
170 |     for (i in levels(x@meta.data$clusters)){
171 |         rand_out_list[[i]] = Reduce('rbind', lapply(rand_out, function(z) z[[i]]))
172 |     }
173 |     return(rand_out_list)
174 | },mc.cores = 16)
175 | 
176 | randomized_NN<-list()
177 | for (i in levels(resolve.obj[[1]]@meta.data$clusters)){
178 |     randomized_NN[[i]] = Reduce('+', lapply(rand_out_lists, function(z) z[[i]]))
179 | }
180 | 
181 | real_NN=Reduce("+",lapply(resolve.obj, function(x) {
182 |     xy_cells<-GetTissueCoordinates(x[[names(x)[2]]][["centroids"]])
183 |     rownames(xy_cells)=xy_cells[,"cell"]
184 |     nNeighbours =40
185 |     knn_spatial <- dbscan::kNN(x = xy_cells[, c("x", "y")] %>% as.matrix(), k = nNeighbours)
186 |     knn_spatial.norm <- data.frame(from = rep(1:nrow(knn_spatial$id), nNeighbours),
187 |                                  to = as.vector(knn_spatial$id),
188 |                                  distance = as.vector(knn_spatial$dist))
189 |     knn_spatial.norm$from = rownames(xy_cells)[knn_spatial.norm$from]                                 
190 |     knn_spatial.norm$to= rownames(xy_cells)[knn_spatial.norm$to]   
191 | 
192 |     cells_sel<-list()
193 |     for (i in levels(x@meta.data$clusters)){
194 |         cells_sel[[i]] = rownames(x@meta.data[which(x@meta.data$clusters == i),])
195 |     }
196 |         
197 |     dim_NN=Reduce('rbind',lapply(cells_sel, function(z) {
198 |         nn <- unique(knn_spatial.norm[which(knn_spatial.norm$from %in% z & knn_spatial.norm$distance < 400),"to"])
199 |         dim_nn <- table(factor(x@meta.data[nn,"clusters"], levels=levels(x@meta.data$clusters)))
200 |         return(dim_nn)
201 |     }))
202 |     return(dim_NN)
203 | }))
204 | rownames(real_NN) = levels(resolve.obj[[1]]@meta.data$clusters)
205 | 
206 | z_scores <- Reduce('rbind',lapply(names(randomized_NN), function(x){
207 |     z_scores<-c()
208 |     for (i in 1:ncol(randomized_NN[[x]])){
209 |         z_scores<-c(z_scores,(real_NN[x,i] - mean(randomized_NN[[x]][,i]))/ sd(randomized_NN[[x]][,i]))
210 |     }
211 |     return(z_scores)
212 | }))
213 | 
214 | 
215 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # IL-1B+ macrophages fuel pathogenic inflammation in pancreatic cancer
2 | 


--------------------------------------------------------------------------------
/Visium/Nature2023_Mouse_PDAC_Visium_DestVI_analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import destvi_utils\n",
 10 |     "import matplotlib.pyplot as plt\n",
 11 |     "import numpy as np\n",
 12 |     "import scanpy as sc\n",
 13 |     "import anndata as ad\n",
 14 |     "import pandas as pd\n",
 15 |     "import scipy\n",
 16 |     "from scvi.model import CondSCVI, DestVI\n",
 17 |     "from skmisc.loess import loess\n",
 18 |     "import torch"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "sc_adata=sc.read_h5ad('scRNA_mouse_PDAC_day30.h5ad')\n",
 28 |     "st_adata=sc.read_visium('GSM6727528/outs',source_image_path='.GSM6727528/outs/spatial')\n",
 29 |     "st_filtered=pd.read_csv('SelectedSpots.csv')"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": null,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "# subset st data\n",
 39 |     "st_adata.var_names_make_unique()\n",
 40 |     "st_adata=st_adata[st_filtered['x'],]\n",
 41 |     "st_adata"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "# NB: sc_adata contains raw counts\n",
 51 |     "sc.pp.filter_genes(sc_adata, min_counts=10)\n",
 52 |     "G = 2000\n",
 53 |     "sc_adata.layers[\"counts\"] = sc_adata.X.copy()\n",
 54 |     "sc.pp.highly_variable_genes(sc_adata, n_top_genes=G, subset=True, layer=\"counts\", flavor=\"seurat_v3\")\n",
 55 |     "sc.pp.normalize_total(sc_adata, target_sum=10e4)\n",
 56 |     "sc.pp.log1p(sc_adata)\n",
 57 |     "sc_adata.raw = sc_adata"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "# Spatial data\n",
 67 |     "st_adata.layers[\"counts\"] = st_adata.X.copy()\n",
 68 |     "\n",
 69 |     "sc.pp.normalize_total(st_adata, target_sum=10e4)\n",
 70 |     "sc.pp.log1p(st_adata)\n",
 71 |     "st_adata.raw = st_adata\n",
 72 |     "\n",
 73 |     "loc=st_adata.obsm[\"spatial\"]\n",
 74 |     "st_adata.obsm[\"spatial\"]=loc.astype('float')"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "# filter genes to be the same on the spatial and sc data\n",
 84 |     "intersect = np.intersect1d(sc_adata.var_names, st_adata.var_names)\n",
 85 |     "st_adata = st_adata[:, intersect].copy()\n",
 86 |     "sc_adata = sc_adata[:, intersect].copy()"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "# Fit the scLMV\n",
 96 |     "CondSCVI.setup_anndata(sc_adata, layer=\"counts\", labels_key=\"Annotation\")\n",
 97 |     "sc_model = CondSCVI(sc_adata, weight_obs=False)\n",
 98 |     "sc_model.view_anndata_setup()\n",
 99 |     "sc_model.train()\n",
100 |     "\n",
101 |     "sc_model.history[\"elbo_train\"].iloc[5:].plot()\n",
102 |     "plt.show()"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "# Deconvolution\n",
112 |     "DestVI.setup_anndata(st_adata, layer=\"counts\")\n",
113 |     "st_model = DestVI.from_rna_model(st_adata, sc_model)\n",
114 |     "st_model.view_anndata_setup()\n",
115 |     "st_model.train(max_epochs=2500)\n",
116 |     "st_model.history[\"elbo_train\"].iloc[10:].plot()\n",
117 |     "plt.show()"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "# Get proportions\n",
127 |     "st_adata.obsm[\"proportions\"] = st_model.get_proportions()\n",
128 |     "st_adata.obsm[\"proportions\"].to_csv('CellProp_DestVI.csv')"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": null,
134 |    "metadata": {},
135 |    "outputs": [],
136 |    "source": [
137 |     "ct_thresholds = destvi_utils.automatic_proportion_threshold(st_adata,  kind_threshold=\"primary\")\n",
138 |     "ct_thresholds['MonoMacro'] = 0.1"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "metadata": {},
145 |    "outputs": [],
146 |    "source": [
147 |     "for ct, g in st_model.get_gamma().items():\n",
148 |     "    st_adata.obsm[f\"{ct}_gamma\"] = g"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": null,
154 |    "metadata": {},
155 |    "outputs": [],
156 |    "source": [
157 |     "# LOAD FUNCTIONS FROM destvi_utils\n",
158 |     "import anndata as ad\n",
159 |     "import hotspot\n",
160 |     "import matplotlib.pyplot as plt\n",
161 |     "import numpy as np\n",
162 |     "from scipy.interpolate import splev, splrep\n",
163 |     "from scipy.spatial.distance import pdist, squareform\n",
164 |     "from sklearn.mixture import GaussianMixture\n",
165 |     "\n",
166 |     "\n",
167 |     "def _prettify_axis(ax, spatial=False):\n",
168 |     "    # Hide the right and top spines\n",
169 |     "    ax.spines[\"right\"].set_visible(False)\n",
170 |     "    ax.spines[\"top\"].set_visible(False)\n",
171 |     "    # Only show ticks on the left and bottom spines\n",
172 |     "    ax.yaxis.set_ticks_position(\"left\")\n",
173 |     "    ax.xaxis.set_ticks_position(\"bottom\")\n",
174 |     "    if spatial:\n",
175 |     "        plt.xticks([])\n",
176 |     "        plt.yticks([])\n",
177 |     "        plt.xlabel(\"Spatial1\")\n",
178 |     "        plt.ylabel(\"Spatial2\")\n",
179 |     "\n",
180 |     "\n",
181 |     "def _form_stacked_quantiles(data, N=100):\n",
182 |     "    quantiles = np.quantile(data, np.linspace(0, 1, N, endpoint=False))\n",
183 |     "    return quantiles, np.vstack([_flatten(data, q) for q in quantiles])\n",
184 |     "\n",
185 |     "\n",
186 |     "def _flatten(x, threshold):\n",
187 |     "    return (x > threshold) * x\n",
188 |     "\n",
189 |     "\n",
190 |     "def _smooth_get_critical_points(x, noisy_data, k=5, s=0.1):\n",
191 |     "    f = splrep(x, noisy_data, k=5, s=1)\n",
192 |     "    smoothed = splev(x, f)\n",
193 |     "    derivative = splev(x, f, der=1)\n",
194 |     "    sign_2nd = splev(x, f, der=2) > 0\n",
195 |     "    curvature = splev(x, f, der=3)\n",
196 |     "    return noisy_data, smoothed, derivative, sign_2nd, curvature\n",
197 |     "\n",
198 |     "\n",
199 |     "def _get_autocorrelations(st_adata, stacked_quantiles, quantiles):\n",
200 |     "    # create Anndata and run hotspot\n",
201 |     "    adata = ad.AnnData(stacked_quantiles.T)\n",
202 |     "    adata.obs_names = st_adata.obs.index\n",
203 |     "    adata.var_names = [str(i) for i in quantiles]\n",
204 |     "    adata.obsm[\"spatial\"] = st_adata.obsm[\"spatial\"]\n",
205 |     "    hs = hotspot.Hotspot(adata, model=\"none\", latent_obsm_key=\"spatial\")\n",
206 |     "    hs.create_knn_graph(\n",
207 |     "        weighted_graph=True,\n",
208 |     "        n_neighbors=10,\n",
209 |     "    )\n",
210 |     "    hs_results = hs.compute_autocorrelations(jobs=1)\n",
211 |     "    index = np.array([float(i) for i in hs_results.index.values])\n",
212 |     "    return index, hs_results[\"Z\"].values\n",
213 |     "\n",
214 |     "\n",
215 |     "def _get_laplacian(s, pi):\n",
216 |     "    N = s.shape[0]\n",
217 |     "    dist_table = pdist(s)\n",
218 |     "    bandwidth = np.median(dist_table)\n",
219 |     "    sigma = 0.5 * bandwidth**2\n",
220 |     "\n",
221 |     "    l2_square = squareform(dist_table) ** 2\n",
222 |     "    D = np.exp(-l2_square / sigma) * np.dot(pi, pi.T)\n",
223 |     "    L = -D\n",
224 |     "    sum_D = np.sum(D, axis=1)\n",
225 |     "    for i in range(N):\n",
226 |     "        L[i, i] = sum_D[i]\n",
227 |     "    return L\n",
228 |     "\n",
229 |     "\n",
230 |     "def _get_spatial_components(locations, proportions, data):\n",
231 |     "    # find top two spatial principal vectors\n",
232 |     "    # form laplacian\n",
233 |     "    L = _get_laplacian(locations, proportions)\n",
234 |     "    # center data\n",
235 |     "    transla_ = data.copy()\n",
236 |     "    transla_ -= np.mean(transla_, axis=0)\n",
237 |     "    # get eigenvectors\n",
238 |     "    A = np.dot(transla_.T, np.dot(L, transla_))\n",
239 |     "    w, v = np.linalg.eig(A)\n",
240 |     "    # don't forget to sort them...\n",
241 |     "    idx = np.argsort(w)[::-1]\n",
242 |     "    vec = v[:, idx][:, :]\n",
243 |     "    return vec\n",
244 |     "\n",
245 |     "\n",
246 |     "def _vcorrcoef(X, y):\n",
247 |     "    Xm = np.reshape(np.mean(X, axis=1), (X.shape[0], 1))\n",
248 |     "    ym = np.mean(y)\n",
249 |     "    r_num = np.sum((X - Xm) * (y - ym), axis=1)\n",
250 |     "    r_den = np.sqrt(np.sum((X - Xm) ** 2, axis=1) * np.sum((y - ym) ** 2))\n",
251 |     "    r = np.divide(\n",
252 |     "        r_num,\n",
253 |     "        r_den,\n",
254 |     "        out=np.zeros_like(\n",
255 |     "            r_num,\n",
256 |     "        ),\n",
257 |     "        where=r_den != 0,\n",
258 |     "    )\n",
259 |     "    return r\n",
260 |     "\n",
261 |     "\n",
262 |     "def _get_delta(lfc):\n",
263 |     "    return np.max(\n",
264 |     "        np.abs(GaussianMixture(n_components=3).fit(np.array(lfc).reshape(-1, 1)).means_)\n",
265 |     "    )"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": null,
271 |    "metadata": {},
272 |    "outputs": [],
273 |    "source": [
274 |     "# Get 5 Spatial PCs\n",
275 |     "gamma = st_model.get_gamma(return_numpy=True)\n",
276 |     "filter_ = st_adata.obsm[\"proportions\"]['MonoMacro'].values > ct_thresholds['MonoMacro']\n",
277 |     "locations = st_adata.obsm[\"spatial\"][filter_]\n",
278 |     "proportions = st_adata.obsm[\"proportions\"]['MonoMacro'].values[filter_]\n",
279 |     "ct_index = np.where('MonoMacro' == st_model.cell_type_mapping)[0][0]\n",
280 |     "data = gamma[:, :, ct_index][filter_]\n",
281 |     "\n",
282 |     "vec=get_spatial_components(locations, proportions, data)[:,:]\n",
283 |     "projection = np.dot(data - np.mean(data, 0), vec)\n",
284 |     "\n",
285 |     "SpatialPCs=pd.DataFrame(projection)\n",
286 |     "SpatialPCs.index=st_adata.obs_names[filter_]\n",
287 |     "SpatialPCs.to_csv('SpatialPCs_MonoMacro.csv')"
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "code",
292 |    "execution_count": null,
293 |    "metadata": {},
294 |    "outputs": [],
295 |    "source": [
296 |     "# Get genes whose expression correlates with Spatial PCs\n",
297 |     "sc_adata_slice = sc_adata[sc_adata.obs[\"Annotation\"] == 'MonoMacro']\n",
298 |     "is_sparse = scipy.sparse.issparse(sc_adata_slice.X)\n",
299 |     "normalized_counts = sc_adata_slice.X.A if is_sparse else sc_adata_slice.X\n",
300 |     "\n",
301 |     "indices_ct = np.where(sc_adata.obs[\"Annotation\"] == 'MonoMacro')[0]\n",
302 |     "sc_latent = sc_model.get_latent_representation(indices=indices_ct)\n",
303 |     "sc_projection = np.dot(sc_latent - np.mean(sc_latent,0), vec)\n",
304 |     "\n",
305 |     "r = _vcorrcoef(normalized_counts.T, sc_projection[:, 0])\n",
306 |     "ranking = np.argsort(r)\n",
307 |     "PC1Pos=pd.DataFrame(r[ranking][::-1][:50])\n",
308 |     "PC1Pos.index=list(st_adata.var.index[ranking[::-1][:50]])\n",
309 |     "\n",
310 |     "PC1Neg=pd.DataFrame(r[ranking][:50])\n",
311 |     "PC1Neg.index=list(st_adata.var.index[ranking[:50]])"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": null,
317 |    "metadata": {},
318 |    "outputs": [],
319 |    "source": [
320 |     "## Generate Expression Matrix for CellType\n",
321 |     "\n",
322 |     "# impute \n",
323 |     "imp_ge = st_model.get_scale_for_ct(\"MonoMacro\", indices=np.where(filter_)[0]).values\n",
324 |     "\n",
325 |     "# get statistics\n",
326 |     "avg_library_size = np.mean(np.sum(st_adata.layers[\"counts\"], axis=1).A.flatten())\n",
327 |     "exp_px_o = st_model.module.px_o.detach().exp().cpu().numpy()\n",
328 |     "mean = avg_library_size * imp_ge\n",
329 |     "\n",
330 |     "# create distribution\n",
331 |     "concentration = torch.tensor(avg_library_size * imp_ge / exp_px_o)\n",
332 |     "rate = torch.tensor(1. / exp_px_o)\n",
333 |     "\n",
334 |     "# generate\n",
335 |     "for j in [1,2,3,4,5,6]:\n",
336 |     "    N = 1\n",
337 |     "    simulated = torch.distributions.Gamma(concentration=concentration, rate = rate).sample((N,)).cpu().numpy()\n",
338 |     "    simulated = np.log(simulated + 1)\n",
339 |     "    simulated = simulated.reshape((-1, simulated.shape[-1]))\n",
340 |     "    simulated=pd.DataFrame(simulated, index=st_adata.obs['_indices'][np.where(filter_)[0]].index, columns=st_adata.var['gene_ids'].index)\n",
341 |     "    simulated.to_csv(f\"Simulation_{j}_MonoMacro.csv\")"
342 |    ]
343 |   }
344 |  ],
345 |  "metadata": {
346 |   "language_info": {
347 |    "name": "python"
348 |   },
349 |   "orig_nbformat": 4
350 |  },
351 |  "nbformat": 4,
352 |  "nbformat_minor": 2
353 | }
354 | 


--------------------------------------------------------------------------------
/Visium/Nature2023_Mouse_PDAC_Visium_post_deconvolution_analyses.r:
--------------------------------------------------------------------------------
  1 | library(data.table)
  2 | library(Matrix)
  3 | library(ggplot2)
  4 | library(future)
  5 | library(dplyr)
  6 | library(grid)
  7 | library(Seurat)
  8 | library(tidyr)
  9 | library(dendextend)
 10 | library(Giotto)
 11 | library(clusterProfiler)
 12 | library(org.Mm.eg.db)
 13 | library(biomaRt)
 14 | 
 15 | 
 16 | load('Spatial.filt.Robj')
 17 | images <- Images(Spatial.filt, assay = DefaultAssay(object = Spatial.filt))
 18 | image.use <- Spatial.filt[[images]]
 19 | coordinates <- GetTissueCoordinates(object = image.use)
 20 | 
 21 | # import proportions predicted by DestVI
 22 | proportions<-read.csv('CellProp_DestVI.csv', row.names=1)
 23 | 
 24 | 
 25 | #### Clustering and analysis of MonoMacro erniched spots 
 26 | 
 27 | SpatialPCs_MonoMacro<-read.csv('SpatialPCs_MonoMacro.csv', row.names=1)
 28 | Spatial.filt_MonoMacro<-subset(Spatial.filt, cells=rownames(SpatialPCs_MonoMacro))
 29 | 
 30 | List_simulations <- lapply(c(1:6), function(i) {
 31 |     sim<-read.csv(paste('Simulation_',i,'_MonoMacro.csv',sep=""), row.names=1)
 32 |     sim=sim[rownames(Spatial.filt_MonoMacro@meta.data),]
 33 |     return(sim)
 34 | })
 35 | simulation_mean<-Reduce("+",List_simulations)/length(List_simulations)
 36 | 
 37 | simulationMean <- CreateSeuratObject(t(simulation_mean), min.cells = 0,  project = "MonoMacro", min.features = 0)
 38 | simulationMean <- AddMetaData(simulationMean, Spatial.filt_MonoMacro@meta.data)
 39 | simulationMean <- ScaleData(simulationMean)
 40 | simulationMean[['SpatialPCA']] <- CreateDimReducObject(embeddings = as.matrix(SpatialPCs_MonoMacro[rownames(Spatial.filt_MonoMacro@meta.data),]), key="SpatialPCA_")
 41 | #simulationMean <- RunUMAP(simulationMean, reduction = "SpatialPCA", dims = 1:5)
 42 | #simulationMean <- FindNeighbors(simulationMean, reduction = "SpatialPCA", dims = 1:5)
 43 | #simulationMean <- FindClusters(simulationMean,  resolution = c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3,1.4,1.5,2))
 44 | 
 45 | # load TAMs markers from TABLE 5 (sheet TAM_markers_day30_MonoMacro)
 46 | Il1b_markers=Il1b_markers[Il1b_markers %in% rownames(simulationMean)]
 47 | Folr2_markers=Folr2_markers[Folr2_markers %in% rownames(simulationMean)]
 48 | Spp1_markers=Spp1_markers[Spp1_markers %in% rownames(simulationMean)]
 49 | 
 50 | # correlation with signatures gene expression ans Spatial PCs coordinates
 51 | pca_spatial<-Spatial.filt_MonoMacro@reductions$SpatialPCs@cell.embeddings[,1:5]
 52 | mean.exp_Il1b <- log(colMeans(as.matrix(expm1(simulationMean@assays$RNA@data[Il1b_markers,rownames(pca_spatial)])))+1)
 53 | mean.exp_Folr2 <- log(colMeans(as.matrix(expm1(simulationMean@assays$RNA@data[Folr2_markers,rownames(pca_spatial)])))+1)
 54 | mean.exp_Spp1 <- log(colMeans(as.matrix(expm1(simulationMean@assays$RNA@data[Spp1_markers,rownames(pca_spatial)])))+1)
 55 | 
 56 | dotplot_correlations=matrix(nrow=15, ncol=4)
 57 | dotplot_correlations=as.data.frame(dotplot_correlations)
 58 | colnames(dotplot_correlations) = c("TAM_subset","corr","p_value","PC")
 59 | for (i in 0:4) {
 60 | j=i+1
 61 | c<-cor.test(pca_spatial[order(pca_spatial[,j]),j],  mean.exp_Il1b[rownames(pca_spatial[order(pca_spatial[,j]),])], method=c("pearson"))
 62 | dotplot_correlations[i*3+1,1]="Il1b"
 63 | dotplot_correlations[i*3+1,2]=c$estimate
 64 | dotplot_correlations[i*3+1,3]=c$p.value
 65 | eval(parse(text=paste("dotplot_correlations[",i*3+1,",4]=\'PC_",j,"\'",sep="")))
 66 | 
 67 | c<-cor.test(pca_spatial[order(pca_spatial[,j]),j],  mean.exp_Folr2[rownames(pca_spatial[order(pca_spatial[,j]),])], method=c("pearson"))
 68 | dotplot_correlations[i*3+2,1]="Folr2"
 69 | dotplot_correlations[i*3+2,2]=c$estimate
 70 | dotplot_correlations[i*3+2,3]=c$p.value
 71 | eval(parse(text=paste("dotplot_correlations[",i*3+2,",4]=\'PC_",j,"\'",sep="")))
 72 | 
 73 | c<-cor.test(pca_spatial[order(pca_spatial[,j]),j],  mean.exp_Spp1[rownames(pca_spatial[order(pca_spatial[,j]),])], method=c("pearson"))
 74 | dotplot_correlations[i*3+3,1]="Spp1"
 75 | dotplot_correlations[i*3+3,2]=c$estimate
 76 | dotplot_correlations[i*3+3,3]=c$p.value
 77 | eval(parse(text=paste("dotplot_correlations[",i*3+3,",4]=\'PC_",j,"\'",sep="")))
 78 | }
 79 | dotplot_correlations[,3]=-log10(dotplot_correlations[,3])
 80 | dotplot_correlations[,3]= MinMax(dotplot_correlations[,3], min = 0, max = 30)
 81 | dotplot_correlations[,1]=factor(dotplot_correlations[,1], levels=c("Spp1","Folr2","Il1b"))
 82 | 
 83 | # load TAMs markers from TABLE 5 (sheet TAM_markers_day30_MonoMacro)
 84 | TAMs<-list(Il1b_markers,Folr2_markers,Spp1_markers)
 85 | TAMsSign<-makeSignMatrixPAGE(sign_list = TAMs, sign_names=c('Il1b_markers','Folr2_markers','Spp1_markers'))
 86 | 
 87 | # Giotto signature enrichemnt analysis (PAGE) for TAMs markers (same analysis for other lists of genes)
 88 | giotto.obj = createGiottoObject(raw_exprs = Spatial.filt@assays$Spatial@counts, spatial_locs = coordinates)
 89 | giotto.obj <- normalizeGiotto(gobject = giotto.obj, scalefactor = 6000, verbose = T)
 90 | TAMs<-list(Il1b_markers,Folr2_markers,Spp1_markers)
 91 | TAMsSign<-makeSignMatrixPAGE(sign_list = TAMs, sign_names=c('Il1b_markers','Folr2_markers','Spp1_markers'))
 92 | giotto.obj <- runPAGEEnrich(gobject = giotto.obj, p_value = TRUE, sign_matrix = TAMsSign, output_enrichment='original', min_overlap_genes=5, include_depletion=F, expression_values='normalized') # with pvalues; -log10(p) is returned
 93 | # highlight only MonoMacro enriched spots with p < 0.001 
 94 | enrichment_PAGE=as.data.frame(giotto.obj@spatial_enrichment$PAGE) 
 95 | rownames(enrichment_PAGE)=enrichment_PAGE[,1]
 96 | for (i in 2:ncol(enrichment_PAGE)){         
 97 | enrichment_PAGE[,i] = unlist(lapply(enrichment_PAGE[,i], function(x) ifelse(x >= 3, 1, 0)))
 98 | }
 99 | enrichment_PAGE=as.data.frame(enrichment_PAGE) 
100 | for (i in 1:nrow(enrichment_PAGE)){
101 | 	if(! (enrichment_PAGE$cell_ID[i] %in% colnames(Spatial.filt_MonoMacro))) {
102 | 		enrichment_PAGE[i,2:ncol(enrichment_PAGE)] = rep(0,ncol(enrichment_PAGE)-1)
103 | 	}
104 | }
105 | 
106 | ## GO_BP enrichment analysis 
107 | Idents(SpatialA1FilteredFil) <- 'SCT_snn_res.0.3'
108 | Cluster_Il1bvsAll_spatial_seurat <- FindMarkers(SpatialA1FilteredFil, ident.1 = 4, ident.2 = NULL, only.pos = FALSE, min.pct = 0.1, pseudocount.use = 0.1,logfc.threshold = 0)
109 | 
110 | mart <- useMart(biomart="ENSEMBL_MART_ENSEMBL",dataset="mmusculus_gene_ensembl", host = "jul2018.archive.ensembl.org")
111 | genes <- rownames(Cluster_Il1bvsAll_spatial_seurat)
112 | logFC <- data.frame(Cluster_Il1bvsAll_spatial_seurat$avg_log2FC)
113 | bioM <- getBM(filters="mgi_symbol",values=genes, attributes=c("entrezgene","mgi_symbol","description"),mart = mart)
114 | gene_id <- as.character(unlist(mclapply(genes, function(x) ifelse(x%in%bioM$mgi_symbol,bioM[which(bioM$mgi_symbol==x),1],"NA"),mc.cores = 4)))
115 | logFC <- logFC[!(is.na(gene_id))]
116 | names(logFC) <- gene_id[!(is.na(gene_id))]
117 | logFC <- sort(logFC, decreasing = TRUE)
118 | GSEA_Il1b <- gseGO(geneList = logFC, OrgDb = org.Mm.eg.db, ont= "BP", minGSSize= 15, maxGSSize=500, pvalueCutoff = 1,verbose = FALSE)
119 | geneSets_list <- GSEA_Il1b@geneSets
120 | GSEA_Il1b <- data.frame(ID=GSEA_Il1b@result$ID, Description=GSEA_Il1b@result$Description, setSize=GSEA_Il1b@result$setSize, NES=GSEA_Il1b@result$NES,pvalue=GSEA_Il1b@result$pvalue, qvalues=GSEA_Il1b@result$qvalues)
121 | GSEA_Il1b <- GSEA_Il1b[which(GSEA_Il1b$qvalues < 0.01),]
122 | # extract gene names for Bio processes terms
123 | geneName_list_go_il1b <- sapply(GSEA_Il1b$ID, function(i){
124 | bioM=getBM(filters="entrezgene",values=geneSets_list[[i]], attributes=c("entrezgene","mgi_symbol","description"),mart = mart)
125 | gene_symbol<-as.character(unlist(mclapply(geneSets_list[[i]], function(x) ifelse(x%in%bioM$entrezgene,bioM[which(bioM$entrezgene==x),2],NA),mc.cores = 4)))
126 | }, simplify = FALSE, USE.NAMES = TRUE)
127 | 


--------------------------------------------------------------------------------
/Visium/Nature2023_Mouse_PDAC_Visium_pre_processing.r:
--------------------------------------------------------------------------------
 1 | library(data.table)
 2 | library(RColorBrewer)
 3 | library(anndata)
 4 | library(Matrix)
 5 | library(ggplot2)
 6 | library(future)
 7 | library(dplyr)
 8 | library(scales)
 9 | library(grid)
10 | library(Seurat)
11 | library(SeuratData)
12 | library(tidyr)
13 | library(dendextend)
14 | library(cowplot)
15 | library(patchwork)
16 | 
17 | 
18 | ####### HOW TO UPLOAD HIGH RES IMAGE (tissue_hires_image.png)
19 | library(jsonlite)
20 | library(png)
21 | Read10X_Image <- function(image.dir, image.name = "tissue_hires_image.png", filter.matrix = TRUE, ...) {
22 |   image <- readPNG(source = file.path(image.dir, image.name))
23 |   scale.factors <- fromJSON(txt = file.path(image.dir, 'scalefactors_json.json'))
24 |   tissue.positions.path <- Sys.glob(paths = file.path(image.dir, 'tissue_positions*'))
25 |   tissue.positions <- read.csv(
26 |     file = tissue.positions.path[1],
27 |     col.names = c('barcodes', 'tissue', 'row', 'col', 'imagerow', 'imagecol'),
28 |     header = ifelse(
29 |       test = basename(tissue.positions.path[1]) == "tissue_positions.csv",
30 |       yes = TRUE,
31 |       no = FALSE
32 |     ),
33 |     as.is = TRUE,
34 |     row.names = 1
35 |   )
36 |   if (filter.matrix) {
37 |     tissue.positions <- tissue.positions[which(x = tissue.positions$tissue == 1), , drop = FALSE]
38 |   }
39 |   unnormalized.radius <- scale.factors$fiducial_diameter_fullres * scale.factors$tissue_hires_scalef
40 |   spot.radius <-  unnormalized.radius / max(dim(x = image))
41 |   return(new(
42 |     Class = 'VisiumV1',
43 |     image = image,
44 |     scale.factors = scalefactors(
45 |       spot = scale.factors$tissue_hires_scalef,
46 |       fiducial = scale.factors$fiducial_diameter_fullres,
47 |       hires = scale.factors$tissue_hires_scalef,
48 |       scale.factors$tissue_hires_scalef
49 |     ),
50 |     coordinates = tissue.positions,
51 |     spot.radius = spot.radius
52 |   ))
53 | }
54 | #############
55 | 
56 | image<-Read10X_Image('GSM6727528/outs/spatial/')
57 | Spatial<-Load10X_Spatial('GSM6727528/outs/', image=image)
58 | 
59 | #image<-Read10X_Image('GSM6727529/outs/spatial/')
60 | #Spatial<-Load10X_Spatial('GSM6727529/outs/', image=image)
61 | 
62 | 
63 | Spatial.filt <- subset(Spatial, subset =  nFeature_Spatial  > 100)
64 | Spatial.filt <- SCTransform(Spatial.filt, assay = "Spatial", return.only.var.genes = FALSE, verbose = FALSE )
65 | Spatial.filt <- RunPCA(Spatial.filt, assay = "SCT", verbose = FALSE, pcs.compute=50)
66 | Spatial.filt <- FindNeighbors(Spatial.filt, reduction = "pca", dims = 1:20)
67 | Spatial.filt <- FindClusters(Spatial.filt, verbose = FALSE, resolution = c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3,1.4,1.5,2))
68 | Spatial.filt <- RunUMAP(Spatial.filt, reduction = "pca", dims = 1:20)
69 | 
70 | write.csv(rownames(Spatial.filt@meta.data),"SelectedSpots.csv")
71 | save(Spatial.filt,file="Spatial.filt.Robj")


--------------------------------------------------------------------------------
/scRNAseq/Human/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ostunilab/PDAC_Nature_2023/74db198d5ec667eb1ccf9e82295ab0ff5bcdbe43/scRNAseq/Human/.DS_Store


--------------------------------------------------------------------------------
/scRNAseq/Human/Nature2023_Human_PDAC_scRNAseq_analyses.r:
--------------------------------------------------------------------------------
  1 | #### HUMAN TUMOUR scRNA-seq DATA ####
  2 | 
  3 | library(Seurat)
  4 | library(ggplot2)
  5 | library(scDblFinder)
  6 | library(copykat)
  7 | library(SeuratWrappers)
  8 | library(harmony)
  9 | library(foreach)
 10 | library(parallel)
 11 | library(dplyr)
 12 | library(tidyr)
 13 | library(magrittr)
 14 | library(nichenetr)
 15 | library(clusterProfiler)
 16 | library(org.Hs.eg.db)
 17 | library(biomaRt)
 18 | library(slingshot)
 19 | library(viridis)
 20 | library(scales)
 21 | library(msigdbr)
 22 | 
 23 | 
 24 | #### FULL DATASET ANALYSIS ####
 25 | 
 26 | Sample.merge <- NormalizeData(Sample.merge, normalization.method = "LogNormalize", scale.factor = 1e4, assay='RNA')
 27 | Sample.merge <- ScaleData(Sample.merge, vars.to.regress = c("CC.Difference"))
 28 | Sample.merge <- FindVariableFeatures(object = Sample.merge)
 29 | Sample.merge <- RunPCA(Sample.merge, pcs.compute=50)
 30 | Sample.merge <- RunFastMNN(object.list = SplitObject(Sample.merge, split.by = "orig.ident"))
 31 | Sample.merge <- RunUMAP(Sample.merge, reduction = "mnn", dims = 1:30)
 32 | Sample.merge <- FindNeighbors(Sample.merge, reduction = "mnn", dims = 1:30)
 33 | Sample.merge <- FindClusters(Sample.merge,  resolution = c(0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3,1.4,1.5,2))
 34 | 
 35 | 
 36 | #### MONONUCLEAR PHAGOCYTES ####
 37 | 
 38 | Sample.merge_MP <- subset(Sample.merge, subset = RNA_snn_res.0.5 %in% c(0,9,12))
 39 | Sample.merge_MP <- ScaleData(Sample.merge_MP, vars.to.regress = c("CC.Difference"), features=rownames(Sample.merge_MP))
 40 | Sample.merge_MP <- FindVariableFeatures(Sample.merge_MP)
 41 | Sample.merge_MP <- RunPCA(Sample.merge_MP, pcs.compute=50)
 42 | Sample.merge_MP <- RunHarmony(Sample.merge_MP, "orig.ident", dims.use = 1:30, max.iter.harmony = 30)
 43 | Sample.merge_MP <- RunUMAP(Sample.merge_MP, reduction="harmony", dims = 1:ncol(Embeddings(Sample.merge_MP, "harmony")), reduction.name="umap", reduction.key="UMAPHARMONY_")
 44 | Sample.merge_MP <- FindNeighbors(Sample.merge_MP, reduction = "harmony", dims = 1:30)
 45 | Sample.merge_MP <- FindClusters(Sample.merge_MP,  resolution = c(0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3,1.4,1.5,2))
 46 | 
 47 | #### TUMOR-ASSOCIATED MACROPHAGES ####
 48 | 
 49 | Sample.merge_TAM <- subset(Sample.merge_MP, subset = RNA_snn_res.1 %in% c(1,2,5,6,7,9,11))
 50 | Sample.merge_TAM <- ScaleData(Sample.merge_TAM, vars.to.regress = c("CC.Difference"), features=rownames(Sample.merge_TAM))
 51 | Sample.merge_TAM <- FindVariableFeatures(Sample.merge_TAM)
 52 | Sample.merge_TAM <- RunPCA(Sample.merge_TAM, pcs.compute=50)
 53 | Sample.merge_TAM <- RunHarmony(Sample.merge_TAM, "orig.ident", dims.use = 1:30, max.iter.harmony = 30, theta=3)
 54 | Sample.merge_TAM <- RunUMAP(Sample.merge_TAM, reduction="harmony", dims = 1:ncol(Embeddings(Sample.merge_MP, "harmony")), reduction.name="umap", reduction.key="UMAPHARMONY_")
 55 | Sample.merge_TAM <- FindNeighbors(Sample.merge_TAM, reduction = "harmony", dims = 1:30)
 56 | Sample.merge_TAM <- FindClusters(Sample.merge_TAM,  resolution = c(0.2,0.3,0.31,0.32,0.33,0.34,0.35,0.36,0.37,0.38,0.39,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3,1.4,1.5,2))
 57 | 
 58 | # find markers for TAM subsets
 59 | Idents(Sample.merge_TAM) <- 'RNA_snn_res.0.36'
 60 | DEGs_TAMsubsets <- Reduce("rbind",lapply(unique(Sample.merge_TAM$RNA_snn_res.0.36), function(x) {
 61 |     Markers <- FindMarkers(Sample.merge_TAM, ident.1 = x, ident.2 = NULL, only.pos = TRUE, min.pct = 0.1, logfc.threshold = 1, pseudocount.use = 0.1)
 62 |     Markers <- Markers[which(Markers$p_val_adj < 0.01),]
 63 |     Markers$gene <- rownames(Markers)
 64 |     Markers$Cluster <- rep(paste("Cluster",x),nrow(Markers))
 65 |     return(Markers)
 66 | }))
 67 | 
 68 | # GSEA on GO BP
 69 | mart = useMart(biomart="ENSEMBL_MART_ENSEMBL",dataset="hsapiens_gene_ensembl", host = "jul2018.archive.ensembl.org")
 70 | bioM=getBM(filters="hgnc_symbol", values=rownames(Sample.merge_TAM), attributes=c("entrezgene","hgnc_symbol"), mart = mart)
 71 | 
 72 | GO_BP_GSEA_TAMsubsets <- Reduce("rbind",lapply(unique(Sample.merge_TAM$RNA_snn_res.0.36), function(x) {
 73 |     AllMarkers <- FindMarkers(Sample.merge_TAM, ident.1 = x, ident.2 = NULL, only.pos = FALSE, min.pct = 0.1, logfc.threshold = 0, pseudocount.use = 0.1)
 74 |     logFC = AllMarkers$avg_log2FC
 75 |     gene_id<-as.character(unlist(mclapply(rownames(AllMarkers), function(x) ifelse(x%in%bioM$hgnc_symbol,bioM[which(bioM$hgnc_symbol==x),1],"NA"),mc.cores = 4)))
 76 |     logFC <- logFC[!(is.na(gene_id))]
 77 |     names(logFC)= gene_id[!(is.na(gene_id))]
 78 |     logFC = sort(logFC, decreasing = TRUE)
 79 |     GSEA_bp <- gseGO(geneList = logFC, OrgDb = org.Hs.eg.db, ont= "BP",  minGSSize= 10, maxGSSize=500, pvalueCutoff = 1,verbose = FALSE)
 80 |     GSEA_bp<-data.frame(ID=GSEA_bp@result$ID, Description=GSEA_bp@result$Description, setSize=GSEA_bp@result$setSize, NES=GSEA_bp@result$NES,pvalue=GSEA_bp@result$pvalue, qvalues=GSEA_bp@result$qvalues)
 81 |     GSEA_bp$Cluster <- rep(paste("Cluster",x),nrow(GSEA_bp))
 82 |     GSEA_bp <- GSEA_bp[which(GSEA_bp$qvalues < 0.01),]
 83 |     return(GSEA_bp)
 84 | }))
 85 | 
 86 | #### HUMAN-MOUSE TAMs OVERLAP - GSEA 
 87 | 
 88 | # compute orthologous of expressed genes in TAMs subset
 89 | Idents(Sample_Macro_hg38) <- 'RNA_snn_res.0.36'
 90 | Idents(Sample_Macro_mm10) <- 'RNA_snn_res.0.4'
 91 | human = useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "jul2018.archive.ensembl.org")
 92 | mouse = useMart("ensembl", dataset= "mmusculus_gene_ensembl", host = "jul2018.archive.ensembl.org")
 93 | genes_human = rownames(Sample_Macro_hg38)
 94 | genes_mouse = rownames(Sample_Macro_mm10)
 95 | genes_human_converted = getLDS(attributes = c("hgnc_symbol"), filters = "hgnc_symbol", values = genes_human , mart = human, attributesL = c("mgi_symbol"), martL = mouse, uniqueRows=F)
 96 | genes_mouse_converted = getLDS(attributes = c("mgi_symbol"), filters = "mgi_symbol", values = genes_mouse , mart = mouse, attributesL = c("hgnc_symbol"), martL = human, uniqueRows=F)
 97 | unambiguous_mouse_genes = genes_mouse_converted %>% group_by(MGI.symbol) %>% count() %>% filter(n<2) %>% .$MGI.symbol
 98 | ambiguous_mouse_genes = genes_mouse_converted  %>% group_by(MGI.symbol) %>% count() %>% filter(n>=2) %>% .$MGI.symbol
 99 | geneinfo_ambiguous_solved = genes_mouse_converted %>% filter(MGI.symbol %in% ambiguous_mouse_genes) %>% filter(HGNC.symbol==toupper(MGI.symbol))
100 | genes_mouse_converted = genes_mouse_converted %>% filter(MGI.symbol %in% unambiguous_mouse_genes) %>% bind_rows(geneinfo_ambiguous_solved)
101 | rownames(genes_mouse_converted) =genes_mouse_converted[,1]	
102 | genes_mouse_converted=genes_mouse_converted[!(duplicated(genes_mouse_converted[,1])),]
103 | rownames(genes_mouse_converted) =genes_mouse_converted[,1]
104 | expressed_genes_TAM_hg38 <- unique(unlist(lapply(unique(Sample_Macro_hg38$RNA_snn_res.0.36), function(x){
105 |     cells <- rownames(Sample_Macro_hg38@meta.data[which(Sample_Macro_hg38@meta.data$RNA_snn_res.0.36 == x),])
106 |     pct <- rowSums(Sample_Macro_hg38@assays$RNA@data[,cells]>0)/length(cells)
107 |     return(names(pct[which(pct > 0.1)]))
108 | })))
109 | expressed_genes_TAM_mm10 <- unique(unlist(lapply(unique(Sample_Macro_mm10$RNA_snn_res.0.4), function(x){
110 |     cells <- rownames(Sample_Macro_mm10@meta.data[which(Sample_Macro_mm10@meta.data$RNA_snn_res.0.4 == x),])
111 |     pct <- rowSums(Sample_Macro_mm10@assays$RNA@data[,cells]>0)/length(cells)
112 |     return(names(pct[which(pct > 0.1)]))
113 | })))
114 | tmp=genes_mouse_converted[which(genes_mouse_converted$MGI.symbol %in% expressed_genes_TAM_mm10),]
115 | gene_to_mouse_common_expressed_TAM = tmp[which(tmp$HGNC.symbol %in% expressed_genes_TAM_hg38),]
116 | 
117 | term2gene <- Reduce("rbind", lapply(unique(Sample_Macro_hg38$RNA_snn_res.0.36), function(x){
118 |     Markers <- FindMarkers(Sample_Macro_hg38, ident.1 = x, ident.2 = NULL, only.pos = TRUE, min.pct = 0.1, logfc.threshold = 0.8, pseudocount.use = 0.1)
119 | 	Markers <- Markers[which(Markers$p_val_adj < 0.01),]
120 | 	Markers_to_mm10 <- unlist(lapply(rownames(Markers), function(x) ifelse(x %in% gene_to_mouse_common_expressed_TAM$HGNC.symbol, gene_to_mouse_common_expressed_TAM[which(gene_to_mouse_common_expressed_TAM$HGNC.symbol == x),1], NA)))
121 | 	Markers_to_mm10 <- Markers_to_mm10[!(is.na(Markers_to_mm10))]
122 |     term2gene = data.frame(id=rep(paste("Cluster_",x,sep=""),length(Markers_to_mm10)),gene=Markers_to_mm10)
123 |     return(term2gene)
124 | }))
125 | term2name = data.frame(id=unique(term2gene[,1])[order(unique(term2gene[,1]))],Description=c("Hu_SPP1+","Hu_IL1B+","Hu_FOLR2+","Hu_HSP+","Hu_MT+","Hu_MKI67+"))
126 | 
127 | GSEA_human_to_Mouse_TAMs<-Reduce("rbind",lapply(unique(Sample_Macro_mm10$RNA_snn_res.0.4), function(x){
128 | 	AllMarkers <- FindMarkers(Sample_Macro_mm10, ident.1 = x, ident.2 = NULL, only.pos = FALSE, min.pct = 0.1, logfc.threshold = 0, pseudocount.use = 0.1)
129 |     ranks=AllMarkers[order(AllMarkers$avg_log2FC,decreasing=T),"avg_log2FC"]
130 |     names(ranks)=rownames(AllMarkers[order(AllMarkers$avg_log2FC,decreasing=T),])
131 |     fgseaplot=GSEA(ranks, minGSSize = 10, maxGSSize = 500, eps = 1e-50, pvalueCutoff = 1, pAdjustMethod = "BH", TERM2GENE = term2gene,TERM2NAME = term2name)
132 |     fgseaplot@result[,7]=-log10(fgseaplot@result[,7])
133 |     gsea <- data.frame(fgseaplot@result[,c(2,5,7)],rep(paste("Cluster",x,sep=""),nrow(fgseaplot@result)))
134 |     return(gsea)
135 | }))
136 | colnames(GSEA_human_to_Mouse_TAMs) = c("Hu_TAMs","NES","log_padj","mouseTAMs_Cluster")
137 | 
138 | #### TUMOR CELLS IN NAIVE SAMPLES ####
139 | 
140 | cells_Naive<-rownames(Sample.merge@meta.data[which(Sample.merge@meta.data$orig.ident %in% c("LPDAC_30_tumor","PDAC_50_tumor","PDAC_55_tumor","PDAC_60_Tumor")),])
141 | Sample.merge_Naive <- subset(Sample.merge, cells = cells_Naive)
142 | Sample.merge_Naive <- NormalizeData(Sample.merge_Naive, normalization.method = "LogNormalize", scale.factor = 1e4)
143 | #Sample.merge_Naive <- ScaleData(Sample.merge_Naive, vars.to.regress = c("CC.Difference"), features=rownames(Sample.merge_Naive))
144 | #Sample.merge_Naive <- FindVariableFeatures(object = Sample.merge_Naive)
145 | #Sample.merge_Naive <- RunPCA(Sample.merge_Naive, pcs.compute=50)
146 | Sample.merge_Naive <- RunFastMNN(object.list = SplitObject(Sample.merge_Naive, split.by = "orig.ident"))
147 | Sample.merge_Naive <- RunUMAP(Sample.merge_Naive, reduction = "mnn", dims = 1:30)
148 | Sample.merge_Naive <- FindNeighbors(Sample.merge_Naive, reduction = "mnn", dims = 1:30)
149 | Sample.merge_Naive <- FindClusters(Sample.merge_Naive,  resolution = c(0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3,1.4,1.5,2))
150 | 
151 | cells_Tumor=rownames(Sample.merge_Naive@meta.data[which(Sample.merge_Naive@meta.data$RNA_snn_res.1 %in% c(1,3,13,15,5,8,17)),])
152 | Sample_Tumor<-subset(Sample.merge_Naive, cells=cells_Tumor)
153 | #Sample_Tumor <- ScaleData(Sample_Tumor, vars.to.regress = c("CC.Difference"), features=rownames(Sample_Tumor))
154 | #Sample_Tumor <- FindVariableFeatures(object = Sample_Tumor)
155 | #Sample_Tumor <- RunPCA(Sample_Tumor, pcs.compute=50)
156 | Sample_Tumor <- RunFastMNN(object.list = SplitObject(Sample_Tumor, split.by = "orig.ident"))
157 | Sample_Tumor <- RunUMAP(Sample_Tumor, reduction="mnn", dims = 1:30)
158 | Sample_Tumor <- FindNeighbors(Sample_Tumor, reduction = "mnn", dims = 1:30)
159 | Sample_Tumor <- FindClusters(Sample_Tumor,  resolution = c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3,1.4,1.5,2))
160 | 
161 | # find markers for Tumor cells subsets
162 | Idents(Sample_Tumor) <- 'RNA_snn_res.0.3'
163 | DEGs_TAMsubsets <- Reduce("rbind",lapply(unique(Sample_Tumor$RNA_snn_res.0.3), function(x) {
164 |     Markers <- FindMarkers(Sample_Tumor, ident.1 = x, ident.2 = NULL, only.pos = TRUE, min.pct = 0.1, logfc.threshold = 1, pseudocount.use = 0.1)
165 |     Markers <- Markers[which(Markers$p_val_adj < 0.01),]
166 |     Markers$gene <- rownames(Markers)
167 |     Markers$Cluster <- rep(paste("Cluster",x),nrow(Markers))
168 |     return(Markers)
169 | }))
170 | 
171 | # re-analysis of clusters enriched in T1RS+ cells
172 | cells_T1RS <- rownames(Sample_Tumor@meta.data[which(Sample_Tumor@meta.data$RNA_snn_res.0.1 == 1),])
173 | 
174 | Sample_Tumor_T1RS <- subset(Sample_Tumor, cells=cells_T1RS)
175 | #Sample_Tumor_T1RS <- ScaleData(Sample_Tumor_T1RS, vars.to.regress = c("CC.Difference"), features=rownames(Sample_Tumor_T1RS))
176 | #Sample_Tumor_T1RS <- FindVariableFeatures(object = Sample_Tumor_T1RS)
177 | #Sample_Tumor_T1RS_Naive <- RunPCA(Sample_Tumor_T1RS_Naive, pcs.compute=50)
178 | Sample_Tumor_T1RS <- RunFastMNN(object.list = SplitObject(Sample_Tumor_T1RS, split.by = "orig.ident"))
179 | Sample_Tumor_T1RS <- RunUMAP(Sample_Tumor_T1RS, reduction="mnn", dims = 1:20)
180 | Sample_Tumor_T1RS <- FindNeighbors(Sample_Tumor_T1RS, reduction = "mnn", dims = 1:20)
181 | Sample_Tumor_T1RS <- FindClusters(Sample_Tumor_T1RS,  resolution = c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3,1.4,1.5,2))
182 | 
183 | # pseudotime analysis with slingshot
184 | Tumor_sl<- slingshot(Embeddings(Sample_Tumor_T1RS, "mnn")[,c(1:10)], clusterLabels = Sample_Tumor_T1RS$RNA_snn_res.0.2)
185 | pt <- slingPseudotime(Tumor_sl)
186 | 
187 | pct <- rowSums(Sample_Tumor_T1RS@assays$RNA@counts > 0)/ncol(Sample_Tumor_T1RS@assays$RNA@counts)
188 | expressedGenes <- names(pct[which(pct > 0.1)])
189 | t <- na.omit(pt[,1])
190 | y<- Sample_Tumor_T1RS@assays$RNA@scale.data[expressedGenes,names(t)]
191 | corr <- apply(y,1,function(z){
192 |     cor(t,z, method = "pearson")
193 | })
194 | 
195 | ## NICHENET analysis
196 | 
197 | # load nichenet networks
198 | ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds"))
199 | lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds"))
200 | ligands = lr_network %>% pull(from) %>% unique()
201 | receptors = lr_network %>% pull(to) %>% unique()
202 | weighted_networks = readRDS(url("https://zenodo.org/record/3260758/files/weighted_networks.rds"))
203 | weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network %>% distinct(from,to), by = c("from","to"))
204 | 
205 | Idents(Sample.merge_Naive) <- 'Annotation_nichnet'
206 | receiver="PDAC_cluster_T1RS_enriched"  # cells subjected to pseudotime analysis
207 | sender="IL1B_TAM"  # IL1B+ TAMs
208 | 
209 | # reciver (PDAC cells subjected to pseudotime analysis) expressed genes 
210 | DEG_PDAC<-FindMarkers(Sample.merge_Naive, ident.1="PDAC_cluster_T1RS_enriched", ident.2="other_PDAC_clusters", only.pos=TRUE, logfc.threshold=0.5, pseudocount.use=0.1)
211 | expressed_genes_receiver = get_expressed_genes(receiver, Sample.merge_Naive, pct = 0.15) %>% .[. %in% rownames(DEG_PDAC)] 
212 | background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)] 
213 | expressed_receptors = intersect(receptors,expressed_genes_receiver)
214 | 
215 | # sender (IL1B+ TAMs) expressed genes 
216 | DEG_IL1B<-FindMarkers(Sample.merge_Naive, ident.1="IL1B_TAM", ident.2="other_TAM", only.pos=TRUE, logfc.threshold=0.2, pseudocount.use=0.1)
217 | list_expressed_genes_sender = sender %>% unique() %>% lapply(get_expressed_genes, Sample.merge_Naive, 0.15) # lapply to get the expressed genes of every sender cell type separately here
218 | expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() %>% .[. %in% rownames(DEG_IL1B)]
219 | expressed_ligands = intersect(ligands,expressed_genes_sender)
220 | 
221 | # target genes: genes upregulated in T1RS+ PDAC cells (cluster at the end-point of pseudotime curve)
222 | Idents(Sample_Tumor_T1RS) <- 'RNA_snn_res.0.4'
223 | markers_Cluster2 <- FindMarkers(Sample_Tumor_T1RS, ident.1=2, logfc.threshold=1, min.pct=0.3, pseudocount.use=0.1, only.pos=TRUE)
224 | geneset_oi = rownames(markers_Cluster2) 
225 | geneset_oi = geneset_oi %>% .[. %in% rownames(ligand_target_matrix)] 
226 | 
227 | # MODEL
228 | potential_ligands = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% pull(from) %>% unique()
229 | ligand_activities = predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands)
230 | ligand_activities = ligand_activities %>% arrange(-pearson) %>% mutate(rank = rank(desc(pearson)))
231 | best_upstream_ligands = ligand_activities %>% top_n(20, pearson) %>% arrange(-pearson) %>% pull(test_ligand) %>% unique()
232 | # targets
233 | active_ligand_target_links_df = best_upstream_ligands %>% lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = 200) %>% bind_rows() %>% drop_na()
234 | # receptors
235 | lr_network_top = lr_network %>% filter(from %in% best_upstream_ligands & to %in% expressed_receptors) %>% distinct(from,to)
236 | best_upstream_receptors = lr_network_top %>% pull(to) %>% unique()
237 | lr_network_top_df_large = weighted_networks_lr %>% filter(from %in% best_upstream_ligands & to %in% best_upstream_receptors)
238 | lr_network_strict = lr_network %>% filter(database != "ppi_prediction_go" & database != "ppi_prediction")
239 | ligands_bona_fide = lr_network_strict %>% pull(from) %>% unique()
240 | receptors_bona_fide = lr_network_strict %>% pull(to) %>% unique()
241 | lr_network_top_df_large_strict = lr_network_top_df_large %>% distinct(from,to) %>% inner_join(lr_network_strict, by = c("from","to")) %>% distinct(from,to)
242 | lr_network_top_df_large_strict = lr_network_top_df_large_strict %>% inner_join(lr_network_top_df_large, by = c("from","to"))
243 | # ligand pearson
244 | ligand_pearson_matrix = ligand_activities %>% select(pearson) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities$test_ligand)
245 | 


--------------------------------------------------------------------------------
/scRNAseq/Human/Nature2023_Human_PDAC_scRNAseq_pre_processing.r:
--------------------------------------------------------------------------------
  1 | #### HUMAN TUMOUR scRNA-seq DATA ####
  2 | 
  3 | library(Seurat)
  4 | library(ggplot2)
  5 | library(scDblFinder)
  6 | library(copykat)
  7 | library(SeuratWrappers)
  8 | library(harmony)
  9 | library(foreach)
 10 | library(parallel)
 11 | library(dplyr)
 12 | library(tidyr)
 13 | library(magrittr)
 14 | library(nichenetr)
 15 | library(clusterProfiler)
 16 | library(org.Hs.eg.db)
 17 | library(biomaRt)
 18 | library(slingshot)
 19 | library(viridis)
 20 | library(scales)
 21 | library(msigdbr)
 22 | 
 23 | #### PRE-PROCESSING ####
 24 | 
 25 | #load datasets
 26 | Sample.data <- Read10X("GSM6727545/filtered_feature_bc_matrix/")
 27 | Sample_30_tumor <- CreateSeuratObject(Sample.data, min.cells = 3,  project = "LPDAC_30_tumor")
 28 | 
 29 | Sample.data <- Read10X("GSM6727548/filtered_feature_bc_matrix")
 30 | Sample_50_tumor <- CreateSeuratObject(Sample.data, min.cells = 3,  project = "PDAC_50_tumor")
 31 | 
 32 | Sample.data <- Read10X("GSM6727549/filtered_feature_bc_matrix")
 33 | Sample_51_tumor <- CreateSeuratObject(Sample.data, min.cells = 3,  project = "PDAC_51_tumor")
 34 | 
 35 | Sample.data <- Read10X("GSM6727550/filtered_feature_bc_matrix")
 36 | Sample_55_tumor <- CreateSeuratObject(Sample.data, min.cells = 3,  project = "PDAC_55_tumor")
 37 | 
 38 | Sample.data <- Read10X("GSM6727546/filtered_feature_bc_matrix")
 39 | Sample_47_tumor <- CreateSeuratObject(Sample.data, min.cells = 3,  project = "PDAC_47_tumor")
 40 | 
 41 | Sample.data <- Read10X("GSM6727547/filtered_feature_bc_matrix")
 42 | Sample_48_tumor <- CreateSeuratObject(Sample.data, min.cells = 3,  project = "PDAC_48_tumor")
 43 | 
 44 | Sample.data <- Read10X("GSM6727543/filtered_feature_bc_matrix/")
 45 | Sample_25_tumor <- CreateSeuratObject(Sample.data, min.cells = 3,  project = "LPDAC_25_tumor")
 46 | 
 47 | Sample.data <- Read10X("GSM6727551/filtered_feature_bc_matrix/")
 48 | Sample_60_tumor <- CreateSeuratObject(Sample.data, min.cells = 3,  project = "PDAC_60_tumor")
 49 | 
 50 | Sample.data <- Read10X("GSM6727544/filtered_feature_bc_matrix/")
 51 | Sample_26_tumor <- CreateSeuratObject(Sample.data, min.cells = 3,  project = "LPDAC_26_tumor")
 52 | 
 53 | Sample.data <- Read10X("GSM6727542/filtered_feature_bc_matrix/")
 54 | Sample_15_tumor <- CreateSeuratObject(Sample.data, min.cells = 3,  project = "LPDAC_15_tumor")
 55 | 
 56 | #merge samples
 57 | Sample.merge<- merge(Sample_30_tumor, y = c(Sample_50_tumor,  Sample_51_tumor, Sample_55_tumor, Sample_47_tumor,Sample_60_tumor, Sample_48_tumor, Sample_25_tumor, Sample_26_tumor, Sample_15_tumor), add.cell.ids = c("LPDAC_30_tumor", "PDAC_50_tumor", "PDAC_51_tumor", "PDAC_55_tumor", "PDAC_47_tumor","PDAC_60_tumor", "PDAC_48_tumor", "LPDAC_25_tumor", "LPDAC_26_tumor", "LPDAC_15_tumor"), project = "humanPDAC")
 58 | 
 59 | # cn prediction with copykat
 60 | copykat.PDAC48 <- copykat(rawmat=as.matrix(Sample_48_tumor@assays$RNA@counts), id.type="S", ngene.chr=5, win.size=25, KS.cut=0.15, sam.name="test", distance="euclidean", norm.cell.names="", n.cores=4,output.seg="FALSE")
 61 | copykat.PDAC60 <- copykat(rawmat=as.matrix(Sample_60_tumor@assays$RNA@counts), id.type="S", ngene.chr=5, win.size=25, KS.cut=0.15, sam.name="test", distance="euclidean", norm.cell.names="", n.cores=4,output.seg="FLASE")
 62 | copykat.LPDAC25 <- copykat(rawmat=as.matrix(Sample_25_tumor@assays$RNA@counts), id.type="S", ngene.chr=5, win.size=25, KS.cut=0.1, sam.name="test", distance="euclidean", norm.cell.names="", n.cores=4,output.seg="FLASE")
 63 | copykat.LPDAC26 <- copykat(rawmat=as.matrix(Sample_26_tumor@assays$RNA@counts), id.type="S", ngene.chr=5, win.size=25, KS.cut=0.15, sam.name="test", distance="euclidean", norm.cell.names="", n.cores=4,output.seg="FALSE", cell.line=T)
 64 | copykat.LPDAC30 <- copykat(rawmat=as.matrix(Sample_30_tumor@assays$RNA@counts),, id.type="S", ngene.chr=5, win.size=25, KS.cut=0.1, sam.name="test", distance="euclidean", norm.cell.names="", n.cores=4,output.seg="FLASE")
 65 | copykat.PDAC47 <- copykat(rawmat=as.matrix(Sample_47_tumor@assays$RNA@counts), id.type="S", ngene.chr=5, win.size=25, KS.cut=0.1, sam.name="test", distance="euclidean", norm.cell.names="", n.cores=4,output.seg="FALSE")
 66 | copykat.PDAC55 <- copykat(rawmat=as.matrix(Sample_55_tumor@assays$RNA@counts), id.type="S", ngene.chr=5, win.size=25, KS.cut=0.15, sam.name="test", distance="euclidean", norm.cell.names="", n.cores=4,output.seg="FALSE")
 67 | copykat.PDAC51 <- copykat(rawmat=as.matrix(Sample_51_tumor@assays$RNA@counts), id.type="S", ngene.chr=5, win.size=25, KS.cut=0.15, sam.name="test", distance="euclidean", norm.cell.names="", n.cores=4,output.seg="FALSE")
 68 | copykat.PDAC50 <- copykat(rawmat=as.matrix(Sample_50_tumor@assays$RNA@counts), id.type="S", ngene.chr=5, win.size=25, KS.cut=0.1, sam.name="test", distance="euclidean", norm.cell.names="", n.cores=4,output.seg="FALSE")
 69 | 
 70 | # add percentage of expression of mitochondrial genes and ribosomal protein genes 
 71 | Sample.merge <- PercentageFeatureSet(Sample.merge, pattern = "^MT-", col.name = "percent.mito")
 72 | Sample.merge <- PercentageFeatureSet(Sample.merge, pattern = "^RPL", col.name = "percent.ribo")
 73 | 
 74 | #filtering
 75 | Sample.merge <- subset(Sample.merge, subset = percent.mt < 40 & nCount_RNA > 1000 & nFeature_RNA > 500)
 76 | 
 77 | #filtering for Neutrophils annotation
 78 | #Sample.merge <- subset(Sample.merge, subset = percent.mt < 40 & nFeature_RNA > 200)
 79 | 
 80 | #cell-cycle prediction
 81 | s.genes <- readLines('genes_Sphase.txt')
 82 | g2m.genes <- readLines('genes_G2Mphase.txt')
 83 | Sample.merge <- CellCycleScoring(Sample.merge, g2m.features=g2m.genes[g2m.genes %in% rownames(Sample.merge@assays$RNA@data)], s.features=s.genes[s.genes %in% rownames(Sample.merge@assays$RNA@data)], set.ident = FALSE)
 84 | Sample.merge@meta.data$CC.Difference <- Sample.merge@meta.data$S.Score - Sample.merge@meta.data$G2M.Score
 85 | 
 86 | #doublet calling with scDblFinder
 87 | doublets.scdblfinder <- unlist(lapply(unique(Sample.merge$orig.ident), function(x) {
 88 | sel_cells <- rownames(Sample.merge@meta.data[which(Sample.merge@meta.data$orig.ident == x),])
 89 | sceDblF <- scDblFinder(Sample.merge@assays$RNA@counts[,sel_cells],dbr =0.05)
 90 | doublets_anno <- as.vector(sceDblF@colData$scDblFinder.class)
 91 | names(doublets_anno) <- row.names(sceDblF@colData)
 92 | return(doublets_anno)
 93 | }))
 94 | 
 95 | #filtering doublets
 96 | Sample.merge <- AddMetaData(Sample.merge, doublets.scdblfinder[colnames(Sample.merge)], "is.doublet")
 97 | Sample.merge <- subset(Sample.merge, subset = is.doublet == 'singlet')
 98 | 
 99 | #filtering mitochondrial genes and ribosomal protein genes
100 | mito.genes.expr <- grep("^MT-", rownames(Sample.merge@assays$RNA@counts), value = T)
101 | ribo.genes.expr <- grep("^RPL", rownames(Sample.merge@assays$RNA@counts), value = T)
102 | keep_genes = rownames(Sample.merge@assays$RNA@counts)
103 | keep_genes = keep_genes[!(keep_genes %in% c(mito.genes.expr,ribo.genes.expr))]
104 | Sample.merge <- subset(Sample.merge, features = keep_genes)
105 | 
106 | #upload data from .rds objects
107 | counts <- readRDS("counts.RDS")
108 | meta.data <- readRDS("metadata.RDS")
109 | Sample.merge <- CreateSeuratObject(counts)
110 | Sample.merge <- AddMetaData(Sample.merge, meta.data)
111 | umap_data <- Sample.merge@meta.data[,c("UMAP_1_allCells","UMAP_2_allCells")]
112 | colnames(umap_data) <- c("UMAP_1", "UMAP_2")
113 | Sample.merge[['umap']] <- CreateDimReducObject(embeddings = as.matrix(umap_data), key = "UMAP_", global = T, assay = "RNA")
114 | 
115 | 


--------------------------------------------------------------------------------
/scRNAseq/Human/data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ostunilab/PDAC_Nature_2023/74db198d5ec667eb1ccf9e82295ab0ff5bcdbe43/scRNAseq/Human/data/.DS_Store


--------------------------------------------------------------------------------
/scRNAseq/Human/data/counts.RDS:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:25555b326b0824e574664f1cd1ae0498c87c1142e817a89f7bb21133e3641354
3 | size 260585219
4 | 


--------------------------------------------------------------------------------
/scRNAseq/Human/data/metadata.RDS:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:af2f70d73abc7d55f60982d134d1a6aa562ca977bd50f69ae17274ce284f0d8e
3 | size 2687161
4 | 


--------------------------------------------------------------------------------
/scRNAseq/Mouse/COX2-KO_KPC/Analysis.R:
--------------------------------------------------------------------------------
 1 | library(Seurat)
 2 | library(SeuratWrappers)
 3 | library(harmony)
 4 | library(dplyr)
 5 | set.seed(123)
 6 | 
 7 | ### ALL CELLS ###
 8 | 
 9 | Sample_expr <- NormalizeData(Sample_expr, normalization.method = "LogNormalize", scale.factor = 1e4, assay='RNA')
10 | Sample_expr <- RunFastMNN(object.list = SplitObject(Sample_expr, split.by = "orig.ident"))
11 | Sample_expr <- RunUMAP(Sample_expr, reduction='mnn', dims = 1:30)
12 | Sample_expr <- FindNeighbors(Sample_expr, reduction = 'mnn', dims = 1:30)
13 | Sample_expr <- FindClusters(Sample_expr, resolution = c(0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.5))
14 | 
15 | for (i in c(0,15,4,10,12,9,11,18,2,13,17)){
16 |   eval(parse(text=(paste("sub_cl <- subset(Sample_expr, subset = RNA_snn_res.0.8 == ",i,")",sep=""))))
17 |   Idents(sub_cl) <- sub_cl$orig.ident
18 |   eval(parse(text=(paste("Cluster_",i,"_WT_vs_KO <- FindMarkers(sub_cl, ident.1 ='WT', ident.2='KO', min.pct=0.1, only.pos = FALSE, pseudocount.use = 0.1, logfc.threshold = 0.5, assay = 'RNA')",sep=""))))
19 |   #eval(parse(text=(paste("write.table(Cluster_",i,"_WT_vs_KO[Cluster_",i,"_WT_vs_KO$p_val_adj<0.01,], 'ALLCELLS_Cluster",i,"_DEG_WT_vs_COX2_KO_res0.8.txt', sep='\t', quote=F, col.names=T, row.names=T)",sep=""))))
20 | }
21 | 
22 | sub_cl <- subset(Sample_expr, subset = RNA_snn_res.0.8 %in% c(1,3,5,8))
23 | Idents(sub_cl) <- sub_cl$orig.ident
24 | Cluster_Macro_WT_vs_KO <- FindMarkers(sub_cl, ident.1 ='WT', ident.2='KO', min.pct=0.1, only.pos = FALSE, pseudocount.use = 0.1, logfc.threshold = 0.5, assay = 'RNA')
25 | #write.table(Cluster_Macro_WT_vs_KO[Cluster_Macro_WT_vs_KO$p_val_adj<0.01,], 'ALLCELLS_ClusterMacro_DEG_WT_vs_COX2_KO_res0.8.txt', sep='\t', quote=F, col.names=T, row.names=T)
26 | 
27 | sub_cl <- subset(Sample_expr, subset = RNA_snn_res.0.8 %in% c(6,16))
28 | Idents(sub_cl) <- sub_cl$orig.ident
29 | Cluster_Fibroblasts_WT_vs_KO <- FindMarkers(sub_cl, ident.1 ='WT', ident.2='KO', min.pct=0.1, only.pos = FALSE, pseudocount.use = 0.1, logfc.threshold = 0.5, assay = 'RNA')
30 | #write.table(Cluster_Fibroblasts_WT_vs_KO[Cluster_Fibroblasts_WT_vs_KO$p_val_adj<0.01,], 'ALLCELLS_ClusterFibroblasts_DEG_WT_vs_COX2_KO_res0.8.txt', sep='\t', quote=F, col.names=T, row.names=T)
31 | 
32 | sub_cl <- subset(Sample_expr, subset = RNA_snn_res.0.8 %in% c(7,14))
33 | Idents(sub_cl) <- sub_cl$orig.ident
34 | Cluster_DCs_WT_vs_KO <- FindMarkers(sub_cl, ident.1 ='WT', ident.2='KO', min.pct=0.1, only.pos = FALSE, pseudocount.use = 0.1, logfc.threshold = 0.5, assay = 'RNA')
35 | #write.table(Cluster_DCs_WT_vs_KO[Cluster_DCs_WT_vs_KO$p_val_adj<0.01,], 'ALLCELLS_ClusterDCs_DEG_WT_vs_COX2_KO_res0.8.txt', sep='\t', quote=F, col.names=T, row.names=T)
36 | 
37 | ### TUMOR-ASSOCIATED MACROPHAGES ###
38 | 
39 | Sample_expr_TAM <- subset(Sample_expr, subset = Annotation_2 == 'TAMs')
40 | 
41 | Sample_expr_TAM <- NormalizeData(Sample_expr_TAM, normalization.method = "LogNormalize", scale.factor = 1e4, assay='RNA')
42 | Sample_expr_TAM <- FindVariableFeatures(Sample_expr_TAM,selection.method = "vst", nfeatures = 3000)
43 | Sample_expr_TAM <- ScaleData(Sample_expr_TAM, vars.to.regress = c("CC.Difference"))
44 | Sample_expr_TAM <- RunPCA(Sample_expr_TAM)
45 | Sample_expr_TAM <- RunHarmony(Sample_expr_TAM, group.by.vars = c('orig.ident'), dims.use = 1:30, theta=2, reduction.save = 'harmony')
46 | Sample_expr_TAM <- RunUMAP(Sample_expr_TAM, reduction='harmony', dims = 1:20)
47 | Sample_expr_TAM <- FindNeighbors(Sample_expr_TAM, reduction = 'harmony', dims = 1:20)
48 | Sample_expr_TAM <- FindClusters(Sample_expr_TAM, resolution = c(0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.5))
49 | 
50 | sub_Il1bTAM <- subset(Sample_expr_TAM, subset = Annotation_TAMs == 'Il1b_TAMs')
51 | Idents(sub_Il1bTAM) <- sub_Il1bTAM$orig.ident
52 | Il1bTAM_WT_vs_KO <- FindMarkers(sub_Il1bTAM, ident.1 ='WT', ident.2='KO', min.pct=0.1, only.pos = FALSE, pseudocount.use = 0.1, logfc.threshold = 0, assay = 'RNA')
53 | 


--------------------------------------------------------------------------------
/scRNAseq/Mouse/COX2-KO_KPC/Pre-processing.R:
--------------------------------------------------------------------------------
 1 | library(Seurat)
 2 | library(scDblFinder)
 3 | set.seed(123)
 4 | 
 5 | #### PRE-PROCESSING ####
 6 | 
 7 | Sample.d7.WT <- Read10X('GSM6727566/filtered_feature_bc_matrix/')
 8 | Sample.d7.WT <- CreateSeuratObject(Sample.d7.WT, min.cells = 3,  project ="WT")
 9 | 
10 | Sample.d7.KO <- Read10X('GSM6727567/filtered_feature_bc_matrix/')
11 | Sample.d7.KO <- CreateSeuratObject(Sample.d7.KO, min.cells = 3,  project ="KO")
12 | 
13 | Sample_expr <- merge(Sample.d7.WT, y = c(Sample.d7.KO), add.cell.ids = c('WT','KO'))
14 |  
15 | Sample_expr[['percent.mt']] <- PercentageFeatureSet(Sample_expr, pattern = '^mt-')
16 | Sample_expr[['percent.ribo']] <- PercentageFeatureSet(Sample_expr, pattern = '^Rp[sl]')
17 | s.genes <- readLines('ccgenes_mm_Sphase.txt')
18 | g2m.genes <- readLines('ccgenes_mm_G2Mphase.txt')
19 | Sample_expr <- CellCycleScoring(Sample_expr, g2m.features=g2m.genes[g2m.genes %in% rownames(Sample_expr@assays$RNA@data)], s.features=s.genes[s.genes %in% rownames(Sample_expr@assays$RNA@data)], set.ident = FALSE)
20 | Sample_expr@meta.data$CC.Difference <- Sample_expr@meta.data$S.Score - Sample_expr@meta.data$G2M.Score
21 | 
22 | Sample_expr <- subset(Sample_expr, subset = percent.mt < 25 & nFeature_RNA > 200)
23 | 
24 | for (i in c('WT','KO')){
25 | 	sub <- subset(Sample_expr, subset = orig.ident == i)
26 | 	eval(parse(text=paste("sceDblF_",i," <- scDblFinder(sub@assays$RNA@counts, dbr = 0.05)",sep="")))
27 | 	eval(parse(text=paste("score.",i," <- sceDblF_",i,"@colData@listData[['scDblFinder.score']]",sep="")))
28 | 	eval(parse(text=paste("names(score.",i,") <- rownames(sceDblF_",i,"@colData)",sep="")))
29 | }
30 | 
31 | doublets.info <- rbind(sceDblF_WT@colData,sceDblF_KO@colData)
32 | Sample_expr$is.doublet <- doublets.info$scDblFinder.class
33 | 
34 | Sample_expr <- subset(Sample_expr, subset = is.doublet == 'singlet')


--------------------------------------------------------------------------------
/scRNAseq/Mouse/Timecourse_KPC/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ostunilab/PDAC_Nature_2023/74db198d5ec667eb1ccf9e82295ab0ff5bcdbe43/scRNAseq/Mouse/Timecourse_KPC/.DS_Store


--------------------------------------------------------------------------------
/scRNAseq/Mouse/Timecourse_KPC/Analysis.R:
--------------------------------------------------------------------------------
  1 | library(Seurat)
  2 | library(SeuratWrappers)
  3 | library(harmony)
  4 | library(SeuratExtend)
  5 | library(parallel)
  6 | library(foreach)
  7 | library(dplyr)
  8 | library(clusterProfiler)
  9 | library(biomaRt)
 10 | library(org.Mm.eg.db)
 11 | library(msigdbr)
 12 | set.seed(123)
 13 | 
 14 | readRDS('KPC_timecourse_counts.rds')
 15 | readRDS('KPC_timecourse_metadata.rds') # metadata contains embedding coordinates and cluster ids to reproduce figures
 16 | 
 17 | #### ALL CELLS ####
 18 | 
 19 | Sample_expr <- CreateSeuratObject(counts, min_cells = 0, meta.data = metadata)
 20 | 
 21 | Sample_expr <- NormalizeData(Sample_expr, normalization.method = "LogNormalize", scale.factor = 1e4, assay='RNA')
 22 | Sample_expr_FastMNN <- FindVariableFeatures(Sample_expr,selection.method = "vst", nfeatures = 3000)
 23 | Sample_expr_FastMNN <- RunFastMNN(object.list = SplitObject(Sample_expr_FastMNN, split.by = "orig.ident"))
 24 | Sample_expr_FastMNN <- RunUMAP(Sample_expr_FastMNN, reduction='mnn', dims = 1:20)
 25 | Sample_expr_FastMNN <- FindNeighbors(Sample_expr_FastMNN, reduction = 'mnn', dims = 1:20)
 26 | Sample_expr_FastMNN <- FindClusters(Sample_expr_FastMNN, resolution = c(0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.5))
 27 | 
 28 | Idents(Sample_expr_FastMNN) <- 'RNA_snn_res.0.5'
 29 | i <- 0
 30 | while(i<=23){
 31 |   eval(parse(text=(paste("cluster",i,".markers0.5 <- FindMarkers(Sample_expr_FastMNN, ident.1 =",i,", min.pct=0.25, only.pos = TRUE, pseudocount.use = 0.1, logfc_threshold = 1, assay = 'RNA')", sep=""))))
 32 |   eval(parse(text=(paste("cluster",i,".markers0.5 <- cluster",i,".markers0.5[order(cluster",i,".markers0.5$avg_log2FC, decreasing = TRUE),]", sep=""))))
 33 |   #eval(parse(text=(paste("write.table(cluster",i,".markers0.5, 'MarkerGenes_in_Cluster",i,"_res0.5.txt', sep='\t', quote=F, col.names=T, row.names=T)", sep=""))))
 34 |   print(paste("Evaluated the markers' significance of cluster n.",i))
 35 |   i<-i+1}
 36 | 
 37 | #### MONONUCLEAR PHAGOCYTES ####
 38 | 
 39 | Sample_expr_MP <- subset(Sample_expr, subset = MNPs_refined == 'MNPs')
 40 | 
 41 | Sample_expr_MP <- NormalizeData(Sample_expr_MP, normalization.method = "LogNormalize", scale.factor = 1e4, assay='RNA')
 42 | Sample_expr_MP <- FindVariableFeatures(Sample_expr_MP,selection.method = "vst", nfeatures = 3000)
 43 | Sample_expr_MP <- ScaleData(Sample_expr_MP, vars.to.regress = c("CC.Difference"))
 44 | Sample_expr_MP <- RunPCA(Sample_expr_MP)
 45 | Sample_expr_MP <- RunHarmony(Sample_expr_MP, group.by.vars = c('orig.ident'), dims.use = 1:30, theta=2, reduction.save = 'harmony')
 46 | Sample_expr_MP <- RunUMAP(Sample_expr_MP, reduction='harmony', dims = 1:20)
 47 | Sample_expr_MP <- FindNeighbors(Sample_expr_MP, reduction = 'harmony', dims = 1:20)
 48 | Sample_expr_MP <- FindClusters(Sample_expr_MP, resolution = c(0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.5))
 49 | 
 50 | Idents(Sample_expr_MP) <- 'RNA_snn_res.1'
 51 | i <- 0
 52 | while(i<=15){
 53 |   eval(parse(text=(paste("cluster",i,".markers1 <- FindMarkers(Sample_expr_MP, ident.1 =",i,", min.pct=0.1, only.pos = TRUE, pseudocount.use = 0.1, logfc_threshold = 1, assay = 'RNA')", sep=""))))
 54 |   eval(parse(text=(paste("cluster",i,".markers1 <- cluster",i,".markers1[order(cluster",i,".markers1$avg_log2FC, decreasing = TRUE),]", sep=""))))
 55 |   eval(parse(text=(paste("write.table(cluster",i,".markers1, 'MarkerGenes_in_Cluster",i,"_res1.txt', sep='\t', quote=F, col.names=T, row.names=T)", sep=""))))
 56 |   print(paste("Evaluated the markers' significance of cluster n.",i))
 57 |   i<-i+1}
 58 |   
 59 | #### TUMOR-ASSOCIATED MACROPHAGES ####
 60 | 
 61 | Sample_expr_TAM <- subset(Sample_expr, subset = Macro_refined == 'Macrophage')
 62 | 
 63 | Sample_expr_TAM <- NormalizeData(Sample_expr_TAM, normalization.method = "LogNormalize", scale.factor = 1e4, assay='RNA')
 64 | Sample_expr_TAM <- FindVariableFeatures(Sample_expr_TAM,selection.method = "vst", nfeatures = 3000)
 65 | Sample_expr_TAM <- ScaleData(Sample_expr_TAM, vars.to.regress = c("CC.Difference"))
 66 | Sample_expr_TAM <- RunPCA(Sample_expr_TAM)
 67 | Sample_expr_TAM <- RunHarmony(Sample_expr_TAM, group.by.vars = c('orig.ident'), dims.use = 1:30, theta=2, reduction.save = 'harmony')
 68 | Sample_expr_TAM <- RunUMAP(Sample_expr_TAM, reduction='harmony', dims = 1:20)
 69 | Sample_expr_TAM <- FindNeighbors(Sample_expr_TAM, reduction = 'harmony', dims = 1:20)
 70 | Sample_expr_TAM <- FindClusters(Sample_expr_TAM, resolution = c(0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.5))
 71 | 
 72 | Idents(Sample_expr_TAM) <- 'RNA_snn_res.0.4'
 73 | i <- 0
 74 | while(i<=6){
 75 |   eval(parse(text=(paste("cluster",i,".markers0.4 <- FindMarkers(Sample_expr_TAM, ident.1 =",i,", min.pct=0.1, only.pos = TRUE, pseudocount.use = 0.1, logfc_threshold = 1, assay = 'RNA')", sep=""))))
 76 |   eval(parse(text=(paste("cluster",i,".markers0.4 <- cluster",i,".markers0.4[order(cluster",i,".markers0.4$avg_log2FC, decreasing = TRUE),]", sep=""))))
 77 |   eval(parse(text=(paste("write.table(cluster",i,".markers0.4, 'MarkerGenes_in_Cluster",i,"_res1.txt', sep='\t', quote=F, col.names=T, row.names=T)", sep=""))))
 78 |   print(paste("Evaluated the markers' significance of cluster n.",i))
 79 |   i<-i+1}
 80 |   
 81 | TAM_annotation <- ifelse(Sample_expr_TAM$RNA_snn_res.0.4 == 0, 'Il1b_TAMs',
 82 |                         ifelse(Sample_expr_TAM$RNA_snn_res.0.4 == 1, 'Cxcl9_TAMs', 
 83 |                         ifelse(Sample_expr_TAM$RNA_snn_res.0.4 == 2, 'Spp1_TAMs',
 84 |                         ifelse(Sample_expr_TAM$RNA_snn_res.0.4 == 3, 'Folr2_TAMs',
 85 |                         ifelse(Sample_expr_TAM$RNA_snn_res.0.4 == 4, 'Clps_TAMs',
 86 |                         ifelse(Sample_expr_TAM$RNA_snn_res.0.4 == 5, 'Proliferating_TAMs', 'Marco_TAMs'))))))
 87 | 
 88 | Sample_expr_TAM$TAM_Annotation <- TAM_annotation
 89 | 
 90 | #### MONOCYTES AND MACROPHAGES ####
 91 | 
 92 | readRDS('MonoMacro_KPC_timecourse_counts.rds')
 93 | readRDS('MonoMacro_KPC_timecourse_metadata.rds') # metadata contains embedding coordinates and cluster ids to reproduce figures
 94 | 
 95 | Sample_merge_MonoMacro <- CreateSeuratObject(counts, min_cells = 0, meta.data = metadata)
 96 | 
 97 | Sample_merge_MonoMacro <- NormalizeData(Sample_merge_MonoMacro, normalization.method = "LogNormalize", scale.factor = 1e4, assay='RNA')
 98 | Sample_merge_MonoMacro <- FindVariableFeatures(Sample_merge_MonoMacro,selection.method = "vst", nfeatures = 3000)
 99 | Sample_merge_MonoMacro <- RunFastMNN(object.list = SplitObject(Sample_merge_MonoMacro, split.by = "orig.ident"))
100 | palantir_so<-RunPalantirDiffusionMap(Sample_merge_MonoMacro, reduction = "mnn", n_components = 20)
101 | Sample_merge_MonoMacro[["tsne_mnn"]] <-
102 |     read.csv("tmp/tsne.csv", row.names = 1) %>%
103 |     set_colnames(paste0("TSNE_FASTMNN_", 1:ncol(.))) %>%
104 |     as.matrix() %>%
105 |     CreateDimReducObject(key = "TSNEFASTMNN_", assay = DefaultAssay(Sample_merge_MonoMacro))
106 | 
107 | ## prepare annotations for velocity and Cellrank analysis
108 | 
109 | annotated_clusters <- as.data.frame(Sample_merge_MonoMacro$Annotation)
110 | colnames(annotated_clusters) <- 'clusters_refined'
111 | write.csv(annotated_clusters, file='annotated_clusters.csv')
112 | 
113 | embedding_coord <- Sample_merge_MonoMacro@meta.data[,c('MonoMacro_TSNE_1','MonoMacro_TSNE_2')]
114 | colnames(embedding_coord)<-c('x','y')
115 | write.csv(embedding_coord, file='tsne.csv')
116 | 
117 | ### run python notebook scripts for velocity analysis + Cellrank
118 | 
119 | ## prepare data for optimal transport analysis
120 | cell_day <- data.frame(id=rownames(Sample_merge_MonoMacro@meta.data),day=ifelse(Sample_merge_MonoMacro$orig.ident=='Tumor_d10',10,
121 |                                                                                 ifelse(Sample_merge_MonoMacro$orig.ident=='Tumor_d20',20,
122 |                                                                                        ifelse(Sample_merge_MonoMacro$orig.ident=='Tumor_d30',30,0))))
123 | write.table(cell_day, 'cell_day.txt', sep='\t', quote=F, col.names=T, row.names=F)
124 | 
125 | embedding_coord <- Sample_merge_MonoMacro@meta.data[,c('cell_id','MonoMacro_TSNE_1','MonoMacro_TSNE_2')]
126 | colnames(embedding_coord)<-c('id','x','y')
127 | write.table(embedding_coord, 'embedding_coord.txt', sep='\t', quote=F, col.names=T, row.names=F)
128 | 
129 | SaveH5Seurat(counts, filename = "matrix_MM.h5Seurat")
130 | Convert("matrix_MM.h5Seurat", dest = "h5ad")
131 | 
132 | ### run WOT scripts for optimal transport analysis
133 | 
134 | #### EPITHELIAL AND TUMOR CELLS ####
135 | 
136 | Sample_expr_Epithelial <- subset(Sample_expr, subset = Epithelial_refined == 'Epithelial_cells')
137 | 
138 | Sample_expr_Epithelial <- NormalizeData(Sample_expr_Epithelial, normalization.method = "LogNormalize", scale.factor = 1e4, assay='RNA')
139 | Sample_expr_Epithelial <- FindVariableFeatures(Sample_expr_Epithelial,selection.method = "vst", nfeatures = 3000)
140 | Sample_expr_Epithelial <- ScaleData(Sample_expr_Epithelial, vars.to.regress = c("CC.Difference"))
141 | Sample_expr_Epithelial <- RunPCA(Sample_expr_Epithelial)
142 | Sample_expr_Epithelial <- RunHarmony(Sample_expr_Epithelial, group.by.vars = c('orig.ident'), dims.use = 1:20, theta=1, reduction.save = 'harmony')
143 | Sample_expr_Epithelial <- RunUMAP(Sample_expr_Epithelial, reduction='harmony', dims = 1:20)
144 | Sample_expr_Epithelial <- FindNeighbors(Sample_expr_Epithelial, reduction = 'harmony', dims = 1:20)
145 | Sample_expr_Epithelial <- FindClusters(Sample_expr_Epithelial, resolution = c(0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.5))
146 | 
147 | Idents(Sample_expr_Epithelial)<-Sample_expr_Epithelial$orig.ident
148 | 
149 | Epithelial_Day10_vs_Healthy <- FindMarkers(Sample_expr_Epithelial, ident.1 ='Tumor_d10',ident.2 ='Healthy', min.pct=0.1, only.pos = FALSE, pseudocount.use = 0.1, logfc_threshold = 0, assay = 'RNA')
150 | #write.table(Epithelial_Day10_vs_Healthy[which(Epithelial_Day10_vs_Healthy$p_val_adj < 0.01 & abs(Epithelial_Day10_vs_Healthy$avg_log2FC) >= 1),], 'Epithelial_Day10_vs_Healthy.txt', sep='\t', quote=F, col.names=T, row.names=T)
151 | 
152 | Epithelial_Day20_vs_Healthy <- FindMarkers(Sample_expr_Epithelial, ident.1 ='Tumor_d20',ident.2 ='Healthy', min.pct=0.1, only.pos = FALSE, pseudocount.use = 0.1, logfc_threshold = 0, assay = 'RNA')
153 | #write.table(Epithelial_Day20_vs_Healthy[which(Epithelial_Day20_vs_Healthy$p_val_adj < 0.01 & abs(Epithelial_Day20_vs_Healthy$avg_log2FC) >= 1),], 'Epithelial_Day20_vs_Healthy.txt', sep='\t', quote=F, col.names=T, row.names=T)
154 | 
155 | Epithelial_Day30_vs_Healthy <- FindMarkers(Sample_expr_Epithelial, ident.1 ='Tumor_d30',ident.2 ='Healthy', min.pct=0.1, only.pos = FALSE, pseudocount.use = 0.1, logfc_threshold = 0, assay = 'RNA')
156 | #write.table(Epithelial_Day30_vs_Healthy[which(Epithelial_Day30_vs_Healthy$p_val_adj < 0.01 & abs(Epithelial_Day30_vs_Healthy$avg_log2FC) >= 1),], 'Epithelial_Day30_vs_Healthy.txt', sep='\t', quote=F, col.names=T, row.names=T)
157 | 
158 | hallmark_gene_sets = msigdbr(species = "mouse", category = "H")
159 | mouse = useMart(biomart="ENSEMBL_MART_ENSEMBL",dataset="mmusculus_gene_ensembl", host = "jul2018.archive.ensembl.org")
160 | 
161 | my_term_mouse=data.frame(hallmark_gene_sets$gs_name,hallmark_gene_sets$entrez_gene)
162 | 
163 | clusters_ordered_mouse = c(10,20,30)
164 | for (i in clusters_ordered_mouse){
165 |   eval(parse(text=(paste("tmp <- Epithelial_Day",i,"_vs_Healthy[,'avg_log2FC']", sep=""))))
166 |   eval(parse(text=(paste("names(tmp) <- rownames(Epithelial_Day",i,"_vs_Healthy)", sep=""))))
167 |   tmp <- tmp[which(tmp != "NA")]
168 |   tmp <- sort(tmp, decreasing=TRUE)
169 |   bioM_mouse=getBM(filters="mgi_symbol",values=names(tmp), attributes=c("entrezgene","mgi_symbol","description"),mart = mouse)
170 |   gene_id<-as.character(unlist(mclapply(names(tmp), function(x) ifelse(x%in%bioM_mouse$mgi_symbol,bioM_mouse[which(bioM_mouse$mgi_symbol==x),1],"NA"),mc.cores = 4)))
171 |   geneList <- tmp
172 |   names(geneList) <- as.character(gene_id)
173 |   geneList=geneList[which(names(geneList) != "NA")]
174 |   eval(parse(text=(paste("GSEA_Epithelial_Day",i,"_vs_Healthy.mouse_HALLMARK <- GSEA(geneList, TERM2GENE = my_term_mouse, nPerm=100000, minGSSize= 15, maxGSSize=500, pvalueCutoff = 1,verbose = FALSE)", sep=""))))
175 |   #eval(parse(text=(paste("write.table(GSEA_Epithelial_Day",i,"_vs_Healthy.mouse_HALLMARK@result,'Epithelial_Day",i,"_vs_Healthy_GSEA_HALLMARK.txt', sep='\t', quote=F, col.names=T, row.names=F)", sep=""))))
176 | }
177 | 
178 | 


--------------------------------------------------------------------------------
/scRNAseq/Mouse/Timecourse_KPC/Optimal_Transport.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | ##### OPTIMAL TRANSPORT ####
4 | # wot command line interface
5 | 
6 | wot optimal_transport --matrix matrix_MM.h5ad --cell_days cells_day.txt --growth_iters 3 --lambda1 1 --lambda2 50 --epsilon 0.05 --verbose
7 | wot trajectory --tmap tmaps --cell_set cell_sets.gmt --day 30 --embedding embedding_coord.txt
8 | wot transition_table --tmap tmaps --cell_set cell_sets.gmt --start_time 0 --end_time 30
9 | 


--------------------------------------------------------------------------------
/scRNAseq/Mouse/Timecourse_KPC/Pre-processing.R:
--------------------------------------------------------------------------------
 1 | library(Seurat)
 2 | library(scDblFinder)
 3 | 
 4 | #### PRE-PROCESSING ####
 5 | 
 6 | Sample.Healthy <- Read10X('GSM6727561/filtered_feature_bc_matrix/')
 7 | Sample.Healthy <- CreateSeuratObject(Sample.Healthy, min.cells = 3,  project ="Healthy")
 8 | 
 9 | Sample.d10.Tumor <- Read10X('GSM6727558/filtered_feature_bc_matrix/')
10 | Sample.d10.Tumor <- CreateSeuratObject(Sample.d10.Tumor, min.cells = 3,  project ="Tumor_d10")
11 | 
12 | Sample.d20.Tumor <- Read10X('GSM6727559/filtered_feature_bc_matrix/')
13 | Sample.d20.Tumor <- CreateSeuratObject(Sample.d20.Tumor, min.cells = 3,  project ="Tumor_d20")
14 | 
15 | Sample.d30.Tumor <- Read10X('GSM6727560/filtered_feature_bc_matrix/')
16 | Sample.d30.Tumor <- CreateSeuratObject(Sample.d30.Tumor, min.cells = 3,  project ="Tumor_d30")
17 | 
18 | Sample_expr <- merge(Sample.d10.Tumor, y = c(Sample.d20.Tumor, Sample.Healthy, Sample.d30.Tumor), 
19 |                          add.cell.ids = c('Tumor_d10','Tumor_d20','Healthy','Tumor_d30'))
20 |  
21 | Sample_expr[['percent.mt']] <- PercentageFeatureSet(Sample_expr, pattern = '^mt-')
22 | Sample_expr[['percent.ribo']] <- PercentageFeatureSet(Sample_expr, pattern = '^Rp[sl]')
23 | s.genes <- readLines('ccgenes_mm_Sphase.txt')
24 | g2m.genes <- readLines('ccgenes_mm_G2Mphase.txt')
25 | Sample_expr <- CellCycleScoring(Sample_expr, g2m.features=g2m.genes[g2m.genes %in% rownames(Sample_expr@assays$RNA@data)], s.features=s.genes[s.genes %in% rownames(Sample_expr@assays$RNA@data)], set.ident = FALSE)
26 | Sample_expr@meta.data$CC.Difference <- Sample_expr@meta.data$S.Score - Sample_expr@meta.data$G2M.Score
27 | 
28 | Sample_expr <- subset(Sample_expr, subset = percent.mt < 25 & nCount_RNA > 1000 & nFeature_RNA > 200)
29 | 
30 | for (i in c('Healthy','Tumor_d10','Tumor_d20','Tumor_d30')){
31 | 	sub <- subset(Sample_expr, subset = orig.ident == i)
32 | 	eval(parse(text=paste("sceDblF_",i," <- scDblFinder(sub@assays$RNA@counts, dbr = 0.07)",sep="")))
33 | 	eval(parse(text=paste("score.",i," <- sceDblF_",i,"@colData@listData[['scDblFinder.score']]",sep="")))
34 | 	eval(parse(text=paste("names(score.",i,") <- rownames(sceDblF_",i,"@colData)",sep="")))
35 | }
36 | 
37 | doublets.info <- rbind(sceDblF_Tumor_d10@colData,sceDblF_Tumor_d20@colData,sceDblF_Healthy@colData,sceDblF_Tumor_d30@colData)
38 | Sample_expr$is.doublet <- doublets.info$scDblFinder.class
39 | 
40 | Sample_expr <- subset(Sample_expr, subset = is.doublet == 'singlet')


--------------------------------------------------------------------------------
/scRNAseq/Mouse/Timecourse_KPC/Velocyto.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ##### VELOCYTO ####
3 | # run on each sample, refdata-gex-mm10-2020-A used as reference
4 | 
5 | samtools sort -t CB -O BAM -o cellsorted_possorted_genome_bam.bam $file_bam
6 | velocyto run -b barcodes.tsv -o $output_path -m mm10_rmsk.gtf $file_bam genes.gtf
7 | 


--------------------------------------------------------------------------------
/scRNAseq/Mouse/Timecourse_KPC/data/KPC_timecourse_counts.rds:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:a5f71488818768be17cf5cf2b9ea7ab6f7b054db6ad4e4a5688284dba86f04f4
3 | size 259428730
4 | 


--------------------------------------------------------------------------------
/scRNAseq/Mouse/Timecourse_KPC/data/KPC_timecourse_metadata.rds:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:1b7e7aec95b70af9c686a1468f91f8d960cbd293cac2aa270af9c0181ac3f134
3 | size 3260070
4 | 


--------------------------------------------------------------------------------
/scRNAseq/Mouse/Timecourse_KPC/data/MonoMacro_KPC_timecourse_counts.rds:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:e6b51ae2f5ac3d4ea2c728c14df559b84e19037026c48ffa605975a6b8856f4c
3 | size 49666751
4 | 


--------------------------------------------------------------------------------
/scRNAseq/Mouse/Timecourse_KPC/data/MonoMacro_KPC_timecourse_metadata.rds:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:12ee4d5343cff03d9ffda94ec0a3894408c4d2a2511fc939511e7319fd6b03bd
3 | size 738582
4 | 


--------------------------------------------------------------------------------