├── old_stuff ├── README.md ├── holgersen_mucosamarkers.R ├── subset_rawdatafiles_from_seuratfile.md ├── reactome_pathway_finding.R ├── Differential_expression_analysis.md ├── 251_10x_vs_251_10x.R ├── overlap_DE_patientregress_not_regressed.R ├── random_sampling_10x+cd.R ├── Import10X_gene-per-patient.R └── merge_ctl_10x_cd.md ├── PSC ├── PSC_2023 │ ├── README.md │ ├── Final_analyses │ │ └── README.md │ ├── Step2_souporcell │ ├── Step1_cellranger │ ├── Step3_PSC_preprep.R │ └── Step4_SCT_integration_noribo_v2.R ├── PSC_version2022 │ ├── PSC_preprep.R │ ├── postqc_filtering │ ├── README.md │ ├── DUOX2.R │ ├── DE_loop.R │ ├── SCT_integration_noribo_v2.R │ └── basicDE_2023.R ├── README.md ├── epithelium_markers.R ├── Proportion_analysis_202103.R ├── Proportion_analysis.R ├── Celltype_new.R ├── riskgene_expression_analysis.R ├── Celltyping_202103 └── DE_analysis (PSC_I and UC_I per cell type)_01062022.R ├── README.md ├── DDTx ├── README.txt ├── Demuxlet_demultiplexing.py ├── ddtx_azimuth_elmentaiteadultileum_classification_demuxlet.Rmd ├── ddtx_filter_normalize_cluster_demuxlet.Rmd ├── demultiplexed_data_demuxlet.R ├── find_markers_and_analyze_proportions.R ├── filtering_and_QC.R ├── ddtx_compartment_assignment_demuxlet.Rmd └── cell_number_tables.py ├── Vedo2 ├── step_1_Cellranger script ├── step_5_Vedo2_celltyping_pbmc_log.R ├── step_5_Vedo2_celltyping_epi_Smillie_match_markergenes.R ├── step_4_integration_pbmc_noribo_nonorm ├── step_2_Souporcell script ├── step_4_integration_biop_noribo_nonorm ├── step_5_Vedo2_celltyping_biop_log.R └── Vedo001_preprep.R ├── cellranger_on_gearshift ├── adding_cell_subtype_to_Seurat_dataframe.R ├── Integration.md ├── create_raw_datafile_from_seq_data.md └── Method_comparisons ├── SCT_integration.R └── rebuttal_20210916 /old_stuff/README.md: -------------------------------------------------------------------------------- 1 | Amazing 2 | -------------------------------------------------------------------------------- /PSC/PSC_2023/README.md: -------------------------------------------------------------------------------- 1 | # README 2 | -------------------------------------------------------------------------------- /PSC/PSC_version2022/PSC_preprep.R: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /PSC/PSC_2023/Final_analyses/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /PSC/PSC_2023/Step2_souporcell: -------------------------------------------------------------------------------- 1 | # example script for running souporcell per lane 2 | -------------------------------------------------------------------------------- /PSC/PSC_2023/Step1_cellranger: -------------------------------------------------------------------------------- 1 | # Example script for running of cellranger per lane 2 | -------------------------------------------------------------------------------- /PSC/PSC_2023/Step3_PSC_preprep.R: -------------------------------------------------------------------------------- 1 | #Script used for sample calling, qc and pre-integration prepping 2 | 3 | 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SingleCell 2 | 3 | In this repository you will find scripts that help you filter, analyze and visualize single cell data using various R packages, such as 'Seurat', 'SCDE', 'scPred', 'harmony' etc. 4 | 5 | The scripts are written for use with single cell UMI-based transcriptomic data, and were tested in datasets ranging from ~1000-150.000 gut mucosal (T-)cells from adapted smartseq2 protocols and 10x genomics v3 protocols. 6 | 7 | See 8 | 9 | http://satijalab.org/ # for sample data and tutorials 10 | 11 | http://satijalab.org/seurat/visualization_vignette.html#interactive-plotting-features # for visualization tips 12 | -------------------------------------------------------------------------------- /DDTx/README.txt: -------------------------------------------------------------------------------- 1 | This project features a case series of 3 small bowel transplant patients. 2 | The aims of this project are to find out: 3 | 1. How immune cell repopulation (recipient cells--> donor organ) happens (over time) 4 | 2. Which cell composition (shifts) are related to rejection (grade)? And with which transcriptional changes 5 | 3. Which transcriptional/compositional changes happen before and after a rejection phase 6 | 7 | We sampled small bowel graft biopsies over time, processed the biopsies for single cell sequencing according to the cryopreserved one-step collagenase protocol (Uniken Venema, Ramirez-Sanchez et al, Sci rep 2022), and made use of Souporcell (Heaton, Nature Methods 2020) to separate donor and recipient cells 8 | -------------------------------------------------------------------------------- /old_stuff/holgersen_mucosamarkers.R: -------------------------------------------------------------------------------- 1 | # Author: WTC 2 | # Date: July 2018 3 | # Script that merges DE genes for CD vs controls from Holgersen paper (JCC 2015) with cell type markers of CD gut mucosacells 4 | 5 | ## match holgersen with risk genes and DE genes 6 | holgersen<-read.csv("~/Desktop/Single_cell/final_data_paper/holgersen/holgersen.csv", sep=";") # load only the 62 genes that are significant in CD 7 | mucosamarkers<-read.table("~/Desktop/Single_cell/final_data_paper/mucosacells_eightcelltypes_DEmarkers_1percMAST.txt") # load cell type markers within mucosa 8 | mucosamarkers<-mucosamarkers[mucosamarkers$p_val_adj<0.05,] 9 | holgersen_mucosamarkers<-merge(mucosamarkers, holgersen, by="gene", all=F) 10 | write.csv(holgersen_mucosamarkers,"~/Desktop/Single_cell/final_data_paper/holgersen/overlap_mucosamarkers_cd_holgersen.csv") 11 | -------------------------------------------------------------------------------- /DDTx/Demuxlet_demultiplexing.py: -------------------------------------------------------------------------------- 1 | singularity exec --bind /groups/umcg-weersma/tmp01/ /groups/umcg-weersma/tmp01/tools/sc-eqtlgen-consortium-pipeline/wg1/wg1-pipeline-20230308_2.simg popscle demuxlet \ 2 | --sam /groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/popscle_tools_filtered/alignment/filtered_alignment//ddtx_220504_laneX_genofiltered_possorted_genome_bam.bam \ 3 | --tag-group CB \ 4 | --tag-UMI UB \ 5 | --field GT \ 6 | --vcf /groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/popscle_tools_filtered/genotype/sorted_genotype/ddtx_bamsorted_220504_laneX_maf005.vcf.gz \ 7 | --out /groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/demuxlet/output/220504_laneX \ 8 | --group-list /groups/umcg-weersma/tmp01/projects/ddtx/processed/alignment/cellranger_output/GRCh38/220504_laneX/outs/filtered_feature_bc_matrix/barcodes.tsv.gz 9 | -------------------------------------------------------------------------------- /Vedo2/step_1_Cellranger script: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=200611_lane1 3 | #SBATCH --output=/groups/umcg-weersma/tmp01/Emilia/output/200611_lane1.out 4 | #SBATCH --error=/groups/umcg-weersma/tmp01/Emilia/output/200611_lane1.err 5 | #SBATCH --time=6-23:59:00 6 | #SBATCH --cpus-per-task=22 7 | #SBATCH --mem=100gb 8 | #SBATCH --nodes=1 9 | #SBATCH --export=NONE 10 | #SBATCH --get-user-env=L 11 | #SBATCH --tmp=1000gb 12 | mkdir -p ${TMPDIR}/cellranger/200611_lane1 13 | cd ${TMPDIR}/cellranger/200611_lane1 14 | /groups/umcg-weersma/tmp01/singelcell/cellranger-3.1.0/cellranger count \ 15 | --id=cellranger_200611_lane1 \ 16 | --transcriptome=/groups/umcg-weersma/tmp01/Amber/refdata-cellranger-GRCh38-3.0.0 \ 17 | --libraries=/groups/umcg-weersma/tmp01/Emilia/libraries/library_200611_lane1.csv \ 18 | --feature-ref=/groups/umcg-weersma/tmp01/Emilia/featurerefs/feature_ref_200611_lane1.csv \ 19 | --localcores=22 20 | 21 | mv ${TMPDIR}/cellranger/200611_lane1 /groups/umcg-weersma/tmp01/Emilia/final_output/ 22 | -------------------------------------------------------------------------------- /cellranger_on_gearshift: -------------------------------------------------------------------------------- 1 | ## job on gearshift 2 | 3 | #!/bin/bash 4 | #SBATCH --job-name=NAME 5 | #SBATCH --output=/groups/umcg-weersma/tmp01/singelcell/NAME.out 6 | #SBATCH --error=/groups/umcg-weersma/tmp01/singelcell/NAME.err 7 | #SBATCH --time=6-23:59:00 8 | #SBATCH --cpus-per-task=22 9 | #SBATCH --mem=200gb 10 | #SBATCH --nodes=1 11 | #SBATCH --export=NONE 12 | #SBATCH --get-user-env=L 13 | #SBATCH --tmp=2700gb 14 | mkdir -p ${TMPDIR}/cellranger/NAME 15 | mkdir /groups/umcg-weersma/tmp01/singelcell/NAME/ 16 | cd ${TMPDIR}/cellranger/NAME 17 | /groups/umcg-weersma/tmp01/umcg-hbrugge/cellranger-3.0.2/cellranger count \ 18 | --id=NAME \ 19 | --transcriptome=/groups/umcg-weersma/tmp01/umcg-hbrugge/refdata-cellranger-hg19-3.0.0 \ 20 | --libraries=/groups/umcg-weersma/tmp01/singelcell/PSC_data_202001/library_files/library_NAME.csv \ 21 | --feature-ref=/groups/umcg-weersma/tmp01/singelcell/PSC_data_202001/feature_refs/feature_ref_NAME.csv \ 22 | --localcores=22 23 | 24 | mv ${TMPDIR}/cellranger/200108_lane1/ /groups/umcg-weersma/tmp01/singelcell/NAME/ 25 | rm -rf ${TMPDIR}/cellranger/NAME/SC_RNA_COUNTER_CS 26 | -------------------------------------------------------------------------------- /PSC/PSC_version2022/postqc_filtering: -------------------------------------------------------------------------------- 1 | ########################### 2 | # Post-qc filtering # 3 | # and processing # 4 | ########################### 5 | 6 | library(dplyr) 7 | library(Seurat) 8 | library(patchwork) 9 | library(ggplot2) 10 | library(readr) 11 | library(tidyr) 12 | library(MAST) 13 | library(readxl) 14 | library(openxlsx) 15 | 16 | data<- readRDS("Data/PSC_202002_integrated_v2_noribo.rds") 17 | data=UpdateSeuratObject(data) 18 | DimPlot(data, label = T) + NoLegend() 19 | DefaultAssay(data) = "RNA" 20 | 21 | table(data@meta.data$Final_HTO) 22 | data@meta.data$Final_HTO[data@meta.data$Final_HTO == "PSC-I-X019"]<-"PSC-NI-XXXX" 23 | data@meta.data$Final_HTO[data@meta.data$Final_HTO == "PSC-NI-X019"]<-"PSC-I-XXXX" 24 | table(data@meta.data$Final_HTO) 25 | data@meta.data$disease = sapply(strsplit(data$Final_HTO,"-"), `[`, 1) 26 | data@meta.data$inflammation = sapply(strsplit(data$Final_HTO,"-"), `[`, 2) 27 | data@meta.data$sample = sapply(strsplit(data$Final_HTO,"-"), `[`, 3) 28 | data@meta.data$state <- paste(data$disease, data$inflammation, sep='-') 29 | table(data@meta.data$sample) 30 | data = subset(data, subset = sample != "X296") 31 | table(data@meta.data$sample) 32 | DimPlot(data, label = T) + NoLegend() 33 | 34 | saveRDS(data, "Data/PSC_2022_integrated_postqc_filtered.rds") 35 | -------------------------------------------------------------------------------- /PSC/PSC_version2022/README.md: -------------------------------------------------------------------------------- 1 | This folder contains code and information on the processing and analysis of data for the PSC project. Information on wet lab processes can be found upon publication online. Find below the workflow of the computational analyses. 2 | 3 | 1. Count matrices were made directly from raw data using cellranger v3.1.0 with cellranger-GRCh38-3.0.0 as alignment reference 4 | 2. Demultiplexing was done using souporcell (https://github.com/wheaton5/souporcell) 5 | 3. Each 10x lane was preprocessed (HTO demultiplexing, souporcell demultiplexing, QC filtering, SCtransformation) seperately following PSC_preprep.R 6 | 4. Lanes were integrated into one set using SCT_integration_noribo_v2.R following standardized Seurat SCT integration workflow (https://satijalab.org/seurat/v3.0/integration.html) 7 | 5. Post-qc filtering of samples was done following postqc_filtering.R 8 | 6. Celltype annotation was done using Azimuth following XXX 9 | 10 | 11 | Further analyses: 12 | 1. DE_loop.R can be used to generate DE genes and enriched GO processes 13 | 14 | Addition 2023: 15 | 1. DUOX2.R is gebruikt voor de DUOX2 analyse 16 | 2. Figure1.R is gebruikt voor figuur 1. 17 | 3. basicDE_2023.R is gebruikt voor figuur 3 18 | 4. PSC_riskgenes.R is gebruikt voor figuur 4 19 | 5. DE_analysis_2023 is gebaseerd op DE_loop.R en is gebruikt om de DE en GO tabellen te genereren 20 | -------------------------------------------------------------------------------- /Vedo2/step_5_Vedo2_celltyping_pbmc_log.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(Seurat) 3 | library(patchwork) 4 | library(ggplot2) 5 | library(readr) 6 | library(tidyr) 7 | library(MAST) 8 | library(readxl) 9 | library(openxlsx) 10 | 11 | # Load in database 12 | data <- readRDS("Data/batch1_integrated/vedo2_batch1_PBMC_integrated_noribo.rds") 13 | data = UpdateSeuratObject(object = data) 14 | DefaultAssay(data) = "RNA" 15 | 16 | #not normalized dataset 17 | DimPlot(data) 18 | FeaturePlot(data, features = c("CD3E", "CD14", "CD4", "CD8A", "MS4A1", "IL7R")) 19 | 20 | #normalization 21 | data <- NormalizeData(data, verbose = FALSE) 22 | DimPlot(data, label = T) +NoLegend() 23 | 24 | #add metadata 25 | data@meta.data$patient = sapply(strsplit(data$Final_HTO,"-"), `[`, 1) 26 | data@meta.data$timepoint = sapply(strsplit(data$Final_HTO,"-"), `[`, 2) 27 | 28 | table(data$patient, data$Final_HTO) 29 | 30 | #Setting cluster resolution (used on "integrated" assay) 31 | DefaultAssay(data) <- "integrated" 32 | data <- FindClusters(data, resolution = 0.7) 33 | DimPlot(data, label = T) 34 | 35 | #Marker genes pbmc 36 | DefaultAssay(data) <- "RNA" 37 | markers_pbmc <- FindAllMarkers(data, only.pos = TRUE, min.pct = 0.25) 38 | write.csv(markers_pbmc, "Results/markergenes/markers_pbmc_log.csv") 39 | selected_markers_pbmc <- markers_pbmc %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC) 40 | write.csv(selected_markers_pbmc, "Results/markergenes/selected_markers_pbmc_log.csv") 41 | -------------------------------------------------------------------------------- /PSC/README.md: -------------------------------------------------------------------------------- 1 | This folder contains code and information on the processing and analysis of data for the PSC project. 2 | Information on wet lab processes can be found upon publication online. Find below the workflow of the computational analyses. 3 | 4 | 1. Count matrices were made directly from raw data using cellranger v3.1.0 with cellranger-GRCh38-3.0.0 as alignment reference 5 | 2. Demultiplexing was done using souporcell (https://github.com/wheaton5/souporcell) 6 | 3. Each 10x lane was preprocessed (HTO demultiplexing, souporcell demultiplexing, QC filtering, SCtransformation) seperately following preprocess.R 7 | 4. Lanes were integrated into one set using SCT_integration_noribo_v2.R following standardized Seurat SCT integration workflow (https://satijalab.org/seurat/v3.0/integration.html) 8 | 5. Cells were clustered and celltyped using celltype_new.R. This output was used for all subsequent analyses 9 | 10 | Analyses: 11 | - Proportion_analyses.R was used for differential abundance analysis 12 | - DE_analysis.R was used to identify differentially expressed genes for several comparisons, and GO terms associated 13 | - DE_analysis_final_hopelijk.R was used to identify differentially expressed genes, but corrected for number of cells using permutation analysis 14 | - Cellchat.R was used to perform cell-cell interaction analysis (https://github.com/sqjin/CellChat). This was done on a subset of cells for efficiency. 15 | - riskgene_expression_analysis.R was used to assess PSC and UC risk genes differentially expressed in different health and disease states, for each of the different cell types 16 | -------------------------------------------------------------------------------- /adding_cell_subtype_to_Seurat_dataframe.R: -------------------------------------------------------------------------------- 1 | # Adding new celltype from a subset of cells to a Seurat dataframe 2 | 3 | colonocytes<-subset(PSC_dataset, idents = 16) 4 | DefaultAssay(colonocytes)<-"integrated" 5 | colonocytes<-FindNeighbors(colonocytes, dims = 1:30) 6 | colonocytes<-FindClusters(colonocytes, resolution = 0.1) 7 | DimPlot(colonocytes) 8 | table(colonocytes@meta.data$integrated_snn_res.0.1) 9 | 10 | 11 | colonocytes@meta.data$best4_sub[colonocytes@meta.data$seurat_clusters == "0"]<-"Best4_enterocytes" 12 | colonocytes@meta.data$best4_sub[colonocytes@meta.data$seurat_clusters == "1"]<-"Best4_enterocytes" 13 | colonocytes@meta.data$best4_sub[colonocytes@meta.data$seurat_clusters == "2"]<-"M_cells" 14 | 15 | 16 | CellsMeta<-data.frame(PSC_dataset@meta.data) 17 | x<-data.frame(colonocytes@meta.data) 18 | x$NAME<-row.names(x) 19 | CellsMeta$NAME<-row.names(CellsMeta) 20 | x<-x[,c(17,35)] 21 | row.names(CellsMeta)=NULL 22 | row.names(x)<-NULL 23 | keeping.order <- function(data, fn, ...) { 24 | col <- ".sortColumn" 25 | data[,col] <- 1:nrow(data) 26 | out <- fn(data, ...) 27 | if (!col %in% colnames(out)) stop("Ordering column not preserved by function") 28 | out <- out[order(out[,col]),] 29 | out[,col] <- NULL 30 | out 31 | } 32 | CellsMeta<-keeping.order(CellsMeta, merge, y=x, by = "NAME", all=T) 33 | CellsMeta<-CellsMeta[,c(1,34)] 34 | rownames(CellsMeta)<-CellsMeta$NAME 35 | PSC_dataset<-AddMetaData(PSC_dataset, CellsMeta) 36 | 37 | PSC_dataset@meta.data$celltype_subsets[PSC_dataset@meta.data$best4_sub == "Best4_enterocytes"]<-"Best4_enterocytes" 38 | PSC_dataset@meta.data$celltype_subsets[PSC_dataset@meta.data$best4_sub == "M_cells"]<-"M_cells" 39 | 40 | # check whether it worked 41 | table(PSC_dataset@meta.data$best4_sub) 42 | DimPlot(PSC_dataset, group.by="celltype_subsets", label=T, repel=T) 43 | 44 | 45 | -------------------------------------------------------------------------------- /PSC/PSC_version2022/DUOX2.R: -------------------------------------------------------------------------------- 1 | ####################################### 2 | # Generate DUOX2 markers # 3 | ####################################### 4 | 5 | library(dplyr) 6 | library(Seurat) 7 | library(patchwork) 8 | library(ggplot2) 9 | library(readr) 10 | library(tidyr) 11 | library(MAST) 12 | library(readxl) 13 | library(openxlsx) 14 | library(enrichR) 15 | 16 | #load in datasets 17 | epi <- readRDS("Nieuw/epi_azimuth_duox2.rds") 18 | DefaultAssay(epi) = "RNA" 19 | Idents(epi) <- "celltype.final" 20 | 21 | #Create DUOX2 marker pathways 22 | DUOX2markers <- FindMarkers(epi, group.by = "celltype.final", test.use = "MAST", ident.1 = "DUOX2 enterocytes") 23 | DUOX2markers$Gene <- rownames(DUOX2markers) 24 | DUOX2markers <- filter(DUOX2markers, DUOX2markers$p_val_adj < 0.05) 25 | DUOX2pathways <- enrichr(DUOX2markers$Gene, databases = "GO_Biological_Process_2018")$GO_Biological_Process 26 | DUOX2pathways <- filter(DUOX2pathways, DUOX2pathways$Adjusted.P.value < 0.05) 27 | 28 | #Create enterocyte marker pathways 29 | Enteromarkers <- FindMarkers(epi, group.by = "celltype.final", test.use = "MAST", ident.1 = "Enterocytes") 30 | Enteromarkers$Gene <- rownames(Enteromarkers) 31 | Enteromarkers <- filter(Enteromarkers, Enteromarkers$p_val_adj < 0.05) 32 | Enteropathways <- enrichr(Enteromarkers$Gene, databases = "GO_Biological_Process_2018")$GO_Biological_Process 33 | Enteropathways <- filter(Enteropathways, Enteropathways$Adjusted.P.value < 0.05) 34 | 35 | #Create specific marker pathways list and write df's 36 | DUOX2specific <- subset(DUOX2pathways,!(Term%in%Enteropathways$Term)) 37 | DUOX2nonspecific <- subset(DUOX2pathways,(Term%in%Enteropathways$Term)) 38 | write.csv(DUOX2specific, "Results/DE_2023/DUOX2specific.csv") 39 | write.csv(DUOX2nonspecific, "Results/DE_2023/DUOX2nonspecific.csv") 40 | write.csv(DUOX2pathways, "Results/DE_2023/DUOX2pathways.csv") 41 | write.csv(Enteropathways, "Results/DE_2023/Enteropathways.csv") 42 | 43 | 44 | -------------------------------------------------------------------------------- /old_stuff/subset_rawdatafiles_from_seuratfile.md: -------------------------------------------------------------------------------- 1 | **extract CD8 pos and neg bloodcell names from old seurat file and make new seurat file with raw data and these cells** 2 | --- 3 | 4 | Author: WTC 5 | Date: 20171020 6 | 7 | **take filtered dataset containing all cells raw data** 8 | ``` 9 | datafile<-SetAllIdent(datafile, "tissue") 10 | ``` 11 | identify in 'cells_blood' which cells are the bloodcells that are left after filtering of the allcells-dataset 12 | ``` 13 | cells_blood<-WhichCells(datafile, "BLOOD") 14 | ``` 15 | subset data with only bloodcells 16 | ``` 17 | BLOODCELLS<-SubsetData(datafile, cells.use = cells_blood) 18 | ``` 19 | identify in 'CD8*_BLOODCELLS' which cells are CD8-positive and which are not 20 | ``` 21 | BLOODCELLS<-SetAllIdent(BLOODCELLS, "CD8ab.positive") 22 | CD8pos_BLOODCELLS<-WhichCells(BLOODCELLS, "1") 23 | CD8neg_BLOODCELLS<-WhichCells(BLOODCELLS, "0") 24 | ``` 25 | create dataframes containing only raw data of CD8*_mucosacells 26 | ``` 27 | raw_data<-data.frame(datafile@raw.data, check.names = F) 28 | raw_data_CD8pos_blood<-raw_data[,CD8pos_BLOODCELLS] 29 | raw_data_CD8neg_blood<-raw_data[,CD8neg_BLOODCELLS] 30 | ``` 31 | 32 | identify in 'cells_mucosa' which cells are the mucosacells that are left after filtering of the allcells-dataset 33 | ``` 34 | cells_mucosa<-WhichCells(datafile, c("IEL", "LPL")) 35 | ``` 36 | subset data with only mucosacells 37 | ``` 38 | MUCOSACELLS<-SubsetData(datafile, cells.use = cells_mucosa) 39 | ``` 40 | identify in 'CD8*_MUCOSACELLS' which cells are CD8-positive and which are not 41 | ``` 42 | MUCOSACELLS<-SetAllIdent(MUCOSACELLS, "CD8ab.positive") 43 | CD8pos_MUCOSACELLS<-WhichCells(MUCOSACELLS, "1") 44 | CD8neg_MUCOSACELLS<-WhichCells(MUCOSACELLS, "0") 45 | ``` 46 | create dataframes containing only war data of CD8*_mucosacells 47 | ``` 48 | raw_data_CD8pos_mucosa<-raw_data[,CD8pos_MUCOSACELLS] 49 | raw_data_CD8neg_mucosa<-raw_data[,CD8neg_MUCOSACELLS] 50 | ``` 51 | 52 | **in the same way, you can subset IEL, LPL and BLOOD cells** 53 | -------------------------------------------------------------------------------- /old_stuff/reactome_pathway_finding.R: -------------------------------------------------------------------------------- 1 | # Author: WTC 2 | # Year: 2018 3 | # Extract enriched pathways from DE genes, using reactomePA package 4 | 5 | # open markergenes file 6 | celltype_markergenes<-read.csv("~/Desktop/Single_cell/final_data_paper/DE/allcells_pos_neg_DE_markers.csv") 7 | # select only significant genes 8 | celltype_markergenes<-subset(celltype_markergenes, celltype_markergenes$p_val_adj < 0.05) 9 | # select all upregultated genes 10 | markergenes_up<-celltype_markergenes[celltype_markergenes$avg_logFC >0,] 11 | markergenes_down<-celltype_markergenes[celltype_markergenes$avg_logFC <0,] 12 | write.csv(markergenes_down, "~/Desktop/Single_cell/final_data_paper/DE/downregulated_eight_cell_types_genes_1percMAST.csv") 13 | # select all downreagulated genes 14 | 15 | # libraries 16 | library(reactome.db) 17 | library(clusterProfiler) 18 | library(ReactomePA) 19 | 20 | # select TregQuiescent_mucosa_down genes 21 | TregQuiescent_mucosa_down_genes<-subset(markergenes_down, (markergenes_down$cluster == "Treg/Quiescent_mucosa")) 22 | TregQuiescent_mucosa_down_genes<-TregQuiescent_mucosa_down_genes$gene 23 | 24 | # convert gene symbols to entrezid for reactome 25 | TregQuiescent_mucosa_down_genes = bitr(TregQuiescent_mucosa_down_genes, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db") 26 | # take only Entrez-id column in list 27 | TregQuiescent_mucosa_down_entrez <- TregQuiescent_mucosa_down_genes$ENTREZID 28 | # do pathway analysis 29 | TregQuiescent_mucosa_down_pathways <- enrichPathway(gene=TregQuiescent_mucosa_down_entrez,pvalueCutoff=0.05, readable=T) 30 | # write dataframe with results 31 | y<-as.data.frame(TregQuiescent_mucosa_down_pathways) 32 | write.csv(y, "~/Desktop/Single_cell/final_data_paper/DE/TregQuiescent_mucosa_downregulated_pathways.csv") 33 | 34 | # barplot pathways 35 | barplot(TregQuiescent_mucosa_down_pathways, showCategory=15) 36 | 37 | # dotplot enrichment 38 | dotplot(TregQuiescent_mucosa_down_pathways, showCategory=15) 39 | 40 | # enrichMap(x, layout=igraph::layout.kamada.kawai, vertex.label.cex = 1) 41 | cnetplot(TregQuiescent_mucosa_down_pathways, categorySize="pvalue", foldChange=geneList) 42 | 43 | 44 | -------------------------------------------------------------------------------- /Vedo2/step_5_Vedo2_celltyping_epi_Smillie_match_markergenes.R: -------------------------------------------------------------------------------- 1 | 2 | library(data.table) 3 | library(reshape2) 4 | library(readxl) 5 | options(stringsAsFactors = FALSE) 6 | 7 | #load the reference 8 | reference <- read_xlsx("/Applications/Results/Article/6 main scRNAseq studies in human IBD/Smillie-2019-Intra--and-inter-cellular-rewiring-/Supplementary Table/Table S2.xlsx", sheet = 1, col_names = T) 9 | reference <- reference[,1:2] 10 | 11 | #grab the marker genes for each cell tyeps 12 | markergenes <- dcast(setDT(reference), ident~rowid(ident, prefix="gene"), value.var="gene") 13 | markergenes <- as.data.frame(t(markergenes)) 14 | colnames(markergenes) <- markergenes[1,] 15 | markergenes <- markergenes[-1,] 16 | dim(markergenes) 17 | rownames(markergenes) <- paste0("gene", 1:202) 18 | rm(reference) 19 | markergenes[is.na(markergenes)] <- "+++" 20 | 21 | 22 | #Load input_clusters (markergenes of your clusters) 23 | input <- fread("/Applications/Results/Vedo2_batch1_biopsy_dataset/subclusters/epithelial/celltyping/markers_epi_log.csv",data.table = T) 24 | input <- input[,-c(1:6)] 25 | clusters <- dcast(setDT(input), cluster~rowid(cluster, prefix="gene"), value.var="gene") 26 | clusters <- as.data.frame(t(clusters)) 27 | colnames(clusters) <- paste0("cluster", 0:13) 28 | clusters <- clusters[-1,] 29 | dim(clusters) 30 | rownames(clusters) <- paste0("gene", 1:993) 31 | rm(input) 32 | clusters[is.na(clusters)] <- "---" 33 | 34 | 35 | #matching genes 36 | output=as.data.frame(matrix(nrow = ncol(clusters),ncol = ncol(markergenes))) 37 | for(i in 1:ncol(clusters)){ 38 | tmp.cluster=colnames(clusters)[i] 39 | tmp.cluster.data=clusters[,tmp.cluster] 40 | 41 | mm=c() 42 | for(j in 1:ncol(markergenes)){ 43 | tmp.markergenes=colnames(markergenes)[j] 44 | tmp.markergenes.data=markergenes[,tmp.markergenes] 45 | aa=length(which(tmp.cluster.data %in% tmp.markergenes.data)) 46 | mm=append(mm,aa) 47 | } 48 | output[i,]=mm 49 | } 50 | colnames(output)=colnames(markergenes) 51 | rownames(output)=colnames(clusters) 52 | 53 | write.csv(output, "/Applications/Results/Vedo2_batch1_biopsy_dataset/subclusters/epithelial/celltyping/output/Smillie_matching_markergenes.csv") 54 | 55 | -------------------------------------------------------------------------------- /old_stuff/Differential_expression_analysis.md: -------------------------------------------------------------------------------- 1 | 2 | **blood vs mucosa differential expression analysis using SCDE on cluster** 3 | ==== 4 | Author: WTC 5 | Date: 20171020 6 | 7 | *takes a lot of time, so performing on cluster is advised* 8 | 9 | ``` 10 | library("methods") 11 | library(devtools) 12 | library(ggplot2) 13 | library(Matrix) # Sparse matrices 14 | library(dplyr) # Dataframe manipulation 15 | library(flexmix) 16 | library(scde) # Differential Expression 17 | ``` 18 | **load data** 19 | ``` 20 | cd<-read.csv("raw_data_seurat.csv", header=TRUE, row.names=1, check.names=FALSE) 21 | metadata<-read.csv("data_info_seurat.csv", header=TRUE, row.names=1) 22 | ``` 23 | 24 | filter raw data for cells<200 genes (expressed = >1 count) and genes<3 cells 25 | ``` 26 | cd2 <- clean.counts(cd, min.lib.size=200, min.reads = 1, min.detected = 3) 27 | ``` 28 | change metadatafile so that it contains only th filtered cells from datafile 'cd2' 29 | ``` 30 | dim(metadata) 31 | dim(cd2) 32 | xx=colnames(cd2) 33 | metadata2<-metadata[xx,] 34 | dim(metadata2) 35 | dim(cd2) 36 | ``` 37 | 38 | make celltype file which will be used to define which cell belongs to which tissue category, using metadata2 file and converting IEL and LPL to MUCOSA 39 | ``` 40 | celltype = as.factor(gsub("IEL|LPL","MUCOSA",metadata2$tissue)) 41 | ``` 42 | giving each value in the factor celltype the name of the corresponding cell 43 | ``` 44 | names(celltype) <- colnames(cd2) 45 | ``` 46 | calculate error models 47 | --- 48 | 49 | ``` 50 | err.mod <- scde.error.models(counts = cd2, groups = celltype, n.cores = 15, 51 | threshold.segmentation = TRUE, save.crossfit.plots = FALSE, 52 | save.model.plots = FALSE, verbose = 1) 53 | ``` 54 | estimate gene expression prior 55 | --- 56 | 57 | ``` 58 | gene.ex.prior <- scde.expression.prior(models = err.mod, counts = cd2, length.out = 400, 59 | show.plot = F) 60 | ``` 61 | Testing for differential expression 62 | --- 63 | ``` 64 | ediff <- scde.expression.difference(err.mod, cd2, gene.ex.prior, groups = celltype, 65 | n.randomizations = 100, n.cores = 15, verbose = 1) 66 | ``` 67 | write out a table with all the results, showing most significantly different genes 68 | ``` 69 | write.table(ediff[order(abs(ediff$Z), decreasing = TRUE), ], file = “..”, row.names = TRUE, col.names = TRUE, sep = "\t", quote = FALSE) 70 | ``` 71 | -------------------------------------------------------------------------------- /Vedo2/step_4_integration_pbmc_noribo_nonorm: -------------------------------------------------------------------------------- 1 | library(Seurat) 2 | library(ggplot2) 3 | 4 | options(future.globals.maxSize = 150000 * 1024^2) 5 | 6 | lane18_1<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_new/200618_lane1_sct.rds") 7 | DefaultAssay(lane18_1)<-"SCT" 8 | lane18_2<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_new/200618_lane2_sct.rds") 9 | DefaultAssay(lane18_2)<-"SCT" 10 | lane18_3<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_new/200618_lane3_sct.rds") 11 | DefaultAssay(lane18_3)<-"SCT" 12 | lane18_4<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_new/200618_lane4_sct.rds") 13 | DefaultAssay(lane18_4)<-"SCT" 14 | 15 | lane18_1[["percent.ribo"]] <- PercentageFeatureSet(lane18_1, pattern = "^RPL|^RPS") 16 | lane18_2[["percent.ribo"]] <- PercentageFeatureSet(lane18_2, pattern = "^RPL|^RPS") 17 | lane18_3[["percent.ribo"]] <- PercentageFeatureSet(lane18_3, pattern = "^RPL|^RPS") 18 | lane18_4[["percent.ribo"]] <- PercentageFeatureSet(lane18_4, pattern = "^RPL|^RPS") 19 | 20 | alldata_vedo2_batch1_PBMC_integration_list <- list(lane18_1, lane18_2, lane18_3, lane18_4) 21 | 22 | print("loaded") 23 | features <- SelectIntegrationFeatures(object.list = alldata_vedo2_batch1_PBMC_integration_list, nfeatures = 3000) 24 | print("features_selected") 25 | alldata_vedo2_batch1_PBMC_integration_list <- PrepSCTIntegration(object.list = alldata_vedo2_batch1_PBMC_integration_list, anchor.features = features) 26 | print("integration_prepped") 27 | anchors <- FindIntegrationAnchors(object.list = alldata_vedo2_batch1_PBMC_integration_list, anchor.features = features, normalization.method = "SCT") 28 | print("achors_found") 29 | rm(alldata_vedo2_batch1_PBMC_integration_list) 30 | alldata.integrated <- IntegrateData(anchorset = anchors, normalization.method = "SCT") 31 | print("integrated") 32 | rm(anchors) 33 | alldata.integrated <- RunPCA(alldata.integrated, verbose = FALSE) 34 | print("pca2_ran") 35 | alldata.integrated <- RunUMAP(alldata.integrated, dims = 1:30) 36 | print("umap_ran") 37 | alldata.integrated <- FindNeighbors(alldata.integrated, dims = 1:30) 38 | alldata.integrated <- FindClusters(alldata.integrated, resolution = 0.5) 39 | 40 | DefaultAssay(alldata.integrated) <- "RNA" 41 | 42 | saveRDS(alldata.integrated, file="/groups/umcg-weersma/tmp01/Emilia/batch1_integration/vedo2_batch1_PBMC_integrated_noribo_withmito.rds") 43 | -------------------------------------------------------------------------------- /PSC/PSC_version2022/DE_loop.R: -------------------------------------------------------------------------------- 1 | ####################################### 2 | # Generate DE list per cell type # 3 | ####################################### 4 | 5 | library(dplyr) 6 | library(Seurat) 7 | library(patchwork) 8 | library(ggplot2) 9 | library(readr) 10 | library(tidyr) 11 | library(MAST) 12 | library(readxl) 13 | library(openxlsx) 14 | library(enrichR) 15 | 16 | #load in datasets 17 | data <- readRDS("XXXX") 18 | data = UpdateSeuratObject(object = data) 19 | 20 | #set idents to final celltyping and assay to RNA 21 | Idents(data) <- "XXXX" 22 | DefaultAssay(data) = "RNA" 23 | 24 | #for loop for DE analysis ##paths and celltype specification should be changed to your likings 25 | celltypes <- unique(data$celltypes_azi_final) 26 | for (x in celltypes) { 27 | # safe pathnames 28 | pathPSCI <- paste("/Users/amberbangma/Documents/R/PSC/Results/",x,"_PSC_I_up.csv",sep="") 29 | pathUCI <- paste("/Users/amberbangma/Documents/R/PSC/Results/",x,"_UC_I_up.csv",sep="") 30 | pathPSCI_GO <- paste("/Users/amberbangma/Documents/R/PSC/Results/",x,"_PSC_I_up_GO.csv",sep="") 31 | pathUCI_GO <- paste("/Users/amberbangma/Documents/R/PSC/Results/",x,"_UC_I_up_GO.csv",sep="") 32 | # find markers 33 | x <- FindMarkers(data, subset.ident = x, group.by = "state", test.use = "MAST", ident.1 = "PSC-I", ident.2 = "UC-I") 34 | x <- filter(x, x$p_val_adj < 0.05) 35 | if(nrow(x) == 0) 36 | { 37 | #skip iteration because of zero significant genes 38 | next 39 | } 40 | #rest of iteration for case of no error 41 | # create a 'state' column 42 | x$state = NA 43 | x = as.data.frame(x) 44 | x$state = ifelse(x$avg_log2FC > 0,"PSC_I","UC_I") 45 | # create a 'gene' column 46 | x$gene = NA 47 | x$gene = rownames(x) 48 | # separate PSC_I up gene list and UC_I up gene list 49 | PSC_I_up_x = filter(x, x$state == "PSC_I") 50 | UC_I_up_x = filter(x, x$state == "UC_I") 51 | # GO terms for each gene list 52 | GO_PSCIup_x <- enrichr(PSC_I_up_x$gene, databases = "GO_Biological_Process_2018")$GO_Biological_Process 53 | GO_UCIup_x <- enrichr(UC_I_up_x$gene, databases = "GO_Biological_Process_2018")$GO_Biological_Process 54 | # save dataframes 55 | write.csv(PSC_I_up_x, pathPSCI) 56 | write.csv(UC_I_up_x, pathUCI) 57 | write.csv(GO_PSCIup_x, pathPSCI_GO) 58 | write.csv(GO_UCIup_x, pathUCI_GO) 59 | } 60 | 61 | ## Findmarkers parameters and script can be tweeked for different DE analyses (for example UC-I vs UC-NI) 62 | ## check DE_analysis_202103.R for examples 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /PSC/epithelium_markers.R: -------------------------------------------------------------------------------- 1 | library(Seurat) 2 | data<-readRDS("Data/PSC_processed_march_2021.rds") 3 | 4 | 5 | data@meta.data$cell_cat<-"other" 6 | 7 | data@meta.data$cell_cat[data@meta.data$celltypes == "Immature_enterocyte"]<-"epithelial" 8 | data@meta.data$cell_cat[data@meta.data$celltypes == "Cycling_TA"]<-"epithelial" 9 | data@meta.data$cell_cat[data@meta.data$celltypes == "Absorptive_enterocyte"]<-"epithelial" 10 | data@meta.data$cell_cat[data@meta.data$celltypes == "Ribo_TA"]<-"epithelial" 11 | data@meta.data$cell_cat[data@meta.data$celltypes == "Stem"]<-"epithelial" 12 | 13 | data@meta.data$cell_cat[data@meta.data$celltypes == "Immature_goblet"]<-"epithelial" 14 | data@meta.data$cell_cat[data@meta.data$celltypes == "BEST4_enterocyte"]<-"epithelial" 15 | data@meta.data$cell_cat[data@meta.data$celltypes == "DUOX2_enterocyte"]<-"epithelial" 16 | data@meta.data$cell_cat[data@meta.data$celltypes == "PLCG2_TA"]<-"epithelial" 17 | data@meta.data$cell_cat[data@meta.data$celltypes == "Absorptive_TA"]<-"epithelial" 18 | data@meta.data$cell_cat[data@meta.data$celltypes == "Tuft"]<-"epithelial" 19 | data@meta.data$cell_cat[data@meta.data$celltypes == "REG_TA"]<-"epithelial" 20 | data@meta.data$cell_cat[data@meta.data$celltypes == "Enteroendocrine"]<-"epithelial" 21 | 22 | data@meta.data$cell_cat[data@meta.data$celltypes == "Goblet"]<-"epithelial" 23 | 24 | Idents(data)<-"cell_cat" 25 | DefaultAssay(data)<-"RNA" 26 | markers_epi<-FindMarkers(data, subset.ident = "epithelial", group.by="state", test.use = "MAST", ident.1 = "PSC-NI", ident.2 = "HC-NI") 27 | write.csv(markers_epi, "Results/DE/epithelium_pscNI_vs_HC.csv") 28 | 29 | markers_epi<-FindMarkers(data, subset.ident = "epithelial", group.by="state", test.use = "MAST", ident.1 = "PSC-NI", ident.2 = "HC-NI") 30 | write.csv(markers_epi, "Results/DE/pithelium_pscNI_vs_HC.csv") 31 | 32 | markers_epi<-FindMarkers(data, subset.ident = "epithelial", group.by="state", test.use = "MAST", ident.1 = "PSC-I", ident.2 = "HC-NI") 33 | write.csv(markers_epi, "Results/DE/epithelium_pscI_vs_HC.csv") 34 | 35 | markers_epi<-FindMarkers(data, subset.ident = "epithelial", group.by="state", test.use = "MAST", ident.1 = "UC-NI", ident.2 = "HC-NI") 36 | write.csv(markers_epi, "Results/DE/epithelium_ucNI_vs_HC.csv") 37 | 38 | markers_epi<-FindMarkers(data, subset.ident = "epithelial", group.by="state", test.use = "MAST", ident.1 = "UC-I", ident.2 = "HC-NI") 39 | write.csv(markers_epi, "Results/DE/epithelium_ucI_vs_HC.csv") 40 | 41 | markers_epi<-FindMarkers(data, subset.ident = "epithelial", group.by="state", test.use = "MAST", ident.1 = "PSC-I", ident.2 = "PSC-NI") 42 | write.csv(markers_epi, "Results/DE/epithelium_pscI_vs_pscNI.csv") 43 | 44 | markers_epi<-FindMarkers(data, subset.ident = "epithelial", group.by="state", test.use = "MAST", ident.1 = "UC-I", ident.2 = "UC-NI") 45 | write.csv(markers_epi, "Results/DE/epithelium_ucI_vs_ucNI.csv") 46 | -------------------------------------------------------------------------------- /Integration.md: -------------------------------------------------------------------------------- 1 | ## Integrating datasets using SCTransform integration, Seurat V3 integration and harmony 2 | ``` 3 | # first: load all QC-ed, SCTransformed and filtered datasets 4 | ``` 5 | ### SCT integration 6 | testing SCT integration, takes a long time and a lot of computational power to compute 7 | ``` 8 | DefaultAssay(Day6_lane2)<-"SCT" # set all default assays to 'SCT' 9 | alldata_202002_integration_list <- list(Day1_lane1, Day1_lane2, Day2_lane1, Day2_lane2, Day3_lane1, Day3_lane2,Day4_lane1, Day4_lane2,Day5_lane1, Day5_lane2,Day6_lane1, Day6_lane2) 10 | 11 | print("loaded") 12 | features <- SelectIntegrationFeatures(object.list = alldata_202002_integration_list, nfeatures = 3000) 13 | print("features_selected") 14 | alldata_202002_integration_list <- PrepSCTIntegration(object.list = alldata_202002_integration_list, anchor.features = features) 15 | print("integration_prepped") 16 | anchors <- FindIntegrationAnchors(object.list = alldata_202002_integration_list, anchor.features = features, normalization.method = "SCT") 17 | print("achors_found") 18 | rm(alldata_202002_integration_list) 19 | alldata.integrated <- IntegrateData(anchorset = anchors, normalization.method = "SCT") 20 | print("integrated") 21 | rm(anchors) 22 | alldata.integrated <- RunPCA(alldata.integrated, verbose = FALSE) 23 | print("pca2_ran") 24 | alldata.integrated <- RunUMAP(alldata.integrated, dims = 1:30) 25 | print("umap_ran") 26 | alldata.integrated <- FindNeighbors(alldata.integrated, dims = 1:30) 27 | alldata.integrated <- FindClusters(alldata.integrated, resolution = 0.5) 28 | 29 | DefaultAssay(alldata.integrated) <- "RNA" 30 | alldata.integrated <- NormalizeData(alldata.integrated, normalization.method = "LogNormalize", scale.factor = 10000, verbose = FALSE) 31 | saveRDS(alldata.integrated, file="/data_complete_sct_integrated.rds") 32 | 33 | ``` 34 | ### Seurat integration 35 | testing Seurat v3 integration 36 | ``` 37 | options(future.globals.maxSize = 300000 * 1024^2) # set to max necessary 38 | alldata_202002_integration_anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30) 39 | alldata.integrated <- IntegrateData(anchorset = alldata_202002_integration_anchors, dims = 1:30) 40 | DefaultAssay(alldata.integrated) <- "integrated" 41 | alldata.integrated <- ScaleData(alldata.integrated, verbose = FALSE) 42 | alldata.integrated <- RunPCA(alldata.integrated, npcs = 30, verbose = FALSE) 43 | alldata.integrated <- RunUMAP(alldata.integrated, reduction = "pca", dims = 1:30) 44 | saveRDS(alldata.integrated, "/alldata_normal_integration.rds") 45 | ``` 46 | ### harmony integration 47 | super fast integration 48 | ``` 49 | # start with integrated dataset from above, or merge different datasets into one and start with that 50 | T_cells_harmony <- RunHarmony(T_cells, "dataset", assay.use="SCT") 51 | T_cells_harmony <- RunUMAP(T_cells_harmony, reduction = "harmony", dims = 1:30) 52 | DimPlot(T_cells_harmony, group.by="Sample") 53 | DefaultAssay(T_cells_harmony)<-"SCT" 54 | T_cells_harmony<-FindNeighbors(T_cells_harmony, reduction = "harmony", dims = 1:30) 55 | T_cells_harmony<-FindClusters(T_cells_harmony,resolution = 0.5) 56 | saveRDS(T_cells_harmony, "/T_cells_harmony_integration.rds") 57 | ``` 58 | -------------------------------------------------------------------------------- /Vedo2/step_2_Souporcell script: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=souporcell_200612_lane1 3 | #SBATCH --output=/groups/umcg-weersma/tmp01/Emilia/souporcell/souporcell_output/souporcell_200612_lane1.out 4 | #SBATCH --error=/groups/umcg-weersma/tmp01/Emilia/souporcell/souporcell_output/souporcell_200612_lane1.err 5 | #SBATCH --time=6-23:59:00 6 | #SBATCH --cpus-per-task=12 7 | #SBATCH --mem=100gb 8 | #SBATCH --nodes=1 9 | #SBATCH --export=NONE 10 | #SBATCH --get-user-env=L 11 | #SBATCH --tmp=1000gb 12 | 13 | ml PythonPlus 14 | python /groups/umcg-weersma/tmp01/Amber/souporcell/souporcell/renamer_v2.py \ 15 | --bam /groups/umcg-weersma/tmp01/Emilia/final_output/200612_lane1/cellranger_200612_lane1/outs/possorted_genome_bam.bam \ 16 | --barcodes /groups/umcg-weersma/tmp01/Emilia/souporcell/barcodes/barcodes_200612_lane1.tsv \ 17 | --out /groups/umcg-weersma/tmp01/Emilia/souporcell/remap/200612_lane1_remap.fq.fq 18 | 19 | /groups/umcg-weersma/tmp01/Amber/souporcell/minimap2-2.17_x64-linux/minimap2 -ax splice -t 8 -G50k -k 21 -w 11 --sr -A2 -B8 -O12,32 -E2,1 -r200 -p.5 -N2 20 | 0 -f1000,5000 -n2 -m20 -s40 -g2000 -2K50m \ 21 | --secondary=no /groups/umcg-weersma/tmp01/Amber/refdata-cellranger-GRCh38-3.0.0/fasta/genome.fa \ 22 | /groups/umcg-weersma/tmp01/Emilia/souporcell/remap/200612_lane1_remap.fq.fq \ 23 | > /groups/umcg-weersma/tmp01/Emilia/souporcell/remap/minimap_200612_lane1.sam 24 | 25 | python /groups/umcg-weersma/tmp01/Amber/souporcell/souporcell/retag_v2.py \ 26 | --sam /groups/umcg-weersma/tmp01/Emilia/souporcell/remap/minimap_200612_lane1.sam \ 27 | --out /groups/umcg-weersma/tmp01/Emilia/souporcell/remap/minitagged_200612_lane1.bam 28 | 29 | ml SAMtools/1.9-foss-2018b 30 | samtools sort /groups/umcg-weersma/tmp01/Emilia/souporcell/remap/minitagged_200612_lane1.bam \ 31 | > /groups/umcg-weersma/tmp01/Emilia/souporcell/remap/minitagged_sorted_200612_lane1.bam 32 | samtools index /groups/umcg-weersma/tmp01/Emilia/souporcell/remap/minitagged_sorted_200612_lane1.bam 33 | 34 | ml freebayes 35 | freebayes -f /groups/umcg-weersma/tmp01/Amber/refdata-cellranger-GRCh38-3.0.0/fasta/genome.fa -iXu -C 2 -q 20 -n 3 -E 1 -m 30 --min-coverage 6 -g 100000 \ 36 | /groups/umcg-weersma/tmp01/Emilia/souporcell/remap/minitagged_sorted_200612_lane1.bam \ 37 | > /groups/umcg-weersma/tmp01/Emilia/souporcell/variants/200612_lane1.vcf 38 | 39 | /groups/umcg-weersma/tmp01/Amber/souporcell/vartrix-v1.1.4-x86_64-linux/vartrix \ 40 | --umi \ 41 | --mapq 30 \ 42 | -b /groups/umcg-weersma/tmp01/Emilia/final_output/200612_lane1/cellranger_200612_lane1/outs/possorted_genome_bam.bam \ 43 | -c /groups/umcg-weersma/tmp01/Emilia/souporcell/barcodes/barcodes_200612_lane1.tsv \ 44 | --scoring-method coverage \ 45 | --threads 8 \ 46 | --ref-matrix /groups/umcg-weersma/tmp01/Emilia/souporcell/allele_counting/200612_lane1_wes_ref.mtx \ 47 | --out-matrix /groups/umcg-weersma/tmp01/Emilia/souporcell/allele_counting/200612_lane1_wes_alt.mtx \ 48 | -v /groups/umcg-weersma/tmp01/Emilia/souporcell/variants/200612_lane1.vcf \ 49 | --fasta /groups/umcg-weersma/tmp01/Amber/refdata-cellranger-GRCh38-3.0.0/fasta/genome.fa 50 | 51 | /groups/umcg-weersma/tmp01/Amber/souporcell/souporcell/souporcell/target/release/souporcell \ 52 | --alt_matrix /groups/umcg-weersma/tmp01/Emilia/souporcell/allele_counting/200612_lane1_wes_alt.mtx \ 53 | --ref_matrix /groups/umcg-weersma/tmp01/Emilia/souporcell/allele_counting/200612_lane1_wes_ref.mtx \ 54 | --num_clusters 4 \ 55 | --threads 8 \ 56 | --barcodes /groups/umcg-weersma/tmp01/Emilia/souporcell/barcodes/barcodes_200612_lane1.tsv \ 57 | > /groups/umcg-weersma/tmp01/Emilia/souporcell/final_output/clusters_200612_lane1_tmp.tsv 58 | 59 | /groups/umcg-weersma/tmp01/Amber/souporcell/souporcell/troublet/target/release/troublet \ 60 | -a /groups/umcg-weersma/tmp01/Emilia/souporcell/allele_counting/200612_lane1_wes_alt.mtx \ 61 | -r /groups/umcg-weersma/tmp01/Emilia/souporcell/allele_counting/200612_lane1_wes_ref.mtx \ 62 | --clusters /groups/umcg-weersma/tmp01/Emilia/souporcell/final_output/clusters_200612_lane1_tmp.tsv \ 63 | > /groups/umcg-weersma/tmp01/Emilia/souporcell/final_output/clusters_200612_lane1.tsv 64 | -------------------------------------------------------------------------------- /PSC/PSC_version2022/SCT_integration_noribo_v2.R: -------------------------------------------------------------------------------- 1 | library(Seurat) 2 | library(ggplot2) 3 | 4 | options(future.globals.maxSize = 150000 * 1024^2) 5 | 6 | lane08_1 <-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200108_lane1_sct.rds") 7 | DefaultAssay(lane08_1)<-"SCT" 8 | lane08_2<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200108_lane2_sct.rds") 9 | DefaultAssay(lane08_2)<-"SCT" 10 | lane09_1<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200109_lane1_sct.rds") 11 | DefaultAssay(lane09_1)<-"SCT" 12 | lane09_2<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200109_lane2_sct.rds") 13 | DefaultAssay(lane09_2)<-"SCT" 14 | lane13_1<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200113_lane1_sct.rds") 15 | DefaultAssay(lane13_1)<-"SCT" 16 | lane13_2<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200113_lane2_sct.rds") 17 | DefaultAssay(lane13_2)<-"SCT" 18 | lane14_1<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200114_lane1_sct.rds") 19 | DefaultAssay(lane14_1)<-"SCT" 20 | lane14_2<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200114_lane2_sct.rds") 21 | DefaultAssay(lane14_2)<-"SCT" 22 | lane15_1<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200115_lane1_sct.rds") 23 | DefaultAssay(lane15_1)<-"SCT" 24 | lane15_2<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200115_lane2_sct.rds") 25 | DefaultAssay(lane15_2)<-"SCT" 26 | lane16_1<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200116_lane1_sct.rds") 27 | DefaultAssay(lane16_1)<-"SCT" 28 | lane16_2<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200116_lane2_sct.rds") 29 | DefaultAssay(lane16_2)<-"SCT" 30 | 31 | lane08_1[["percent.ribo"]] <- PercentageFeatureSet(lane08_1, pattern = "^RPL|^RPS") 32 | lane08_2[["percent.ribo"]] <- PercentageFeatureSet(lane08_2, pattern = "^RPL|^RPS") 33 | lane09_1[["percent.ribo"]] <- PercentageFeatureSet(lane09_1, pattern = "^RPL|^RPS") 34 | lane09_2[["percent.ribo"]] <- PercentageFeatureSet(lane09_2, pattern = "^RPL|^RPS") 35 | lane13_1[["percent.ribo"]] <- PercentageFeatureSet(lane13_1, pattern = "^RPL|^RPS") 36 | lane13_2[["percent.ribo"]] <- PercentageFeatureSet(lane13_2, pattern = "^RPL|^RPS") 37 | lane14_1[["percent.ribo"]] <- PercentageFeatureSet(lane14_1, pattern = "^RPL|^RPS") 38 | lane14_2[["percent.ribo"]] <- PercentageFeatureSet(lane14_2, pattern = "^RPL|^RPS") 39 | lane15_1[["percent.ribo"]] <- PercentageFeatureSet(lane15_1, pattern = "^RPL|^RPS") 40 | lane15_2[["percent.ribo"]] <- PercentageFeatureSet(lane15_2, pattern = "^RPL|^RPS") 41 | lane16_1[["percent.ribo"]] <- PercentageFeatureSet(lane16_1, pattern = "^RPL|^RPS") 42 | lane16_2[["percent.ribo"]] <- PercentageFeatureSet(lane16_2, pattern = "^RPL|^RPS") 43 | 44 | alldata_PSC_202002_integration_list <- list(lane08_1, lane08_2, lane09_1, lane09_2, lane13_1, lane13_2, lane14_1, lane14_2, lane15_1, lane15_2, lane16_1, lane16_2) 45 | 46 | print("loaded") 47 | features <- SelectIntegrationFeatures(object.list = alldata_PSC_202002_integration_list, nfeatures = 3000) 48 | print("features_selected") 49 | alldata_PSC_202002_integration_list <- PrepSCTIntegration(object.list = alldata_PSC_202002_integration_list, anchor.features = features) 50 | print("integration_prepped") 51 | anchors <- FindIntegrationAnchors(object.list = alldata_PSC_202002_integration_list, anchor.features = features, normalization.method = "SCT") 52 | print("achors_found") 53 | rm(alldata_PSC_202002_integration_list) 54 | alldata.integrated <- IntegrateData(anchorset = anchors, normalization.method = "SCT") 55 | print("integrated") 56 | rm(anchors) 57 | alldata.integrated <- RunPCA(alldata.integrated, verbose = FALSE) 58 | print("pca2_ran") 59 | alldata.integrated <- RunUMAP(alldata.integrated, dims = 1:30) 60 | print("umap_ran") 61 | alldata.integrated <- FindNeighbors(alldata.integrated, dims = 1:30) 62 | alldata.integrated <- FindClusters(alldata.integrated, resolution = 0.5) 63 | 64 | DefaultAssay(alldata.integrated) <- "RNA" 65 | alldata.integrated <- NormalizeData(alldata.integrated, normalization.method = "LogNormalize", scale.factor = 10000, verbose = FALSE) 66 | 67 | saveRDS(alldata.integrated, file="/groups/umcg-weersma/tmp01/Amber/PSC_202002_integrated_v2_noribo.rds") 68 | -------------------------------------------------------------------------------- /PSC/PSC_2023/Step4_SCT_integration_noribo_v2.R: -------------------------------------------------------------------------------- 1 | #Script used for integration of lanes and normalisation 2 | 3 | library(Seurat) 4 | library(ggplot2) 5 | 6 | options(future.globals.maxSize = 150000 * 1024^2) 7 | 8 | lane08_1 <-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200108_lane1_sct.rds") 9 | DefaultAssay(lane08_1)<-"SCT" 10 | lane08_2<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200108_lane2_sct.rds") 11 | DefaultAssay(lane08_2)<-"SCT" 12 | lane09_1<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200109_lane1_sct.rds") 13 | DefaultAssay(lane09_1)<-"SCT" 14 | lane09_2<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200109_lane2_sct.rds") 15 | DefaultAssay(lane09_2)<-"SCT" 16 | lane13_1<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200113_lane1_sct.rds") 17 | DefaultAssay(lane13_1)<-"SCT" 18 | lane13_2<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200113_lane2_sct.rds") 19 | DefaultAssay(lane13_2)<-"SCT" 20 | lane14_1<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200114_lane1_sct.rds") 21 | DefaultAssay(lane14_1)<-"SCT" 22 | lane14_2<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200114_lane2_sct.rds") 23 | DefaultAssay(lane14_2)<-"SCT" 24 | lane15_1<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200115_lane1_sct.rds") 25 | DefaultAssay(lane15_1)<-"SCT" 26 | lane15_2<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200115_lane2_sct.rds") 27 | DefaultAssay(lane15_2)<-"SCT" 28 | lane16_1<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200116_lane1_sct.rds") 29 | DefaultAssay(lane16_1)<-"SCT" 30 | lane16_2<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200116_lane2_sct.rds") 31 | DefaultAssay(lane16_2)<-"SCT" 32 | 33 | lane08_1[["percent.ribo"]] <- PercentageFeatureSet(lane08_1, pattern = "^RPL|^RPS") 34 | lane08_2[["percent.ribo"]] <- PercentageFeatureSet(lane08_2, pattern = "^RPL|^RPS") 35 | lane09_1[["percent.ribo"]] <- PercentageFeatureSet(lane09_1, pattern = "^RPL|^RPS") 36 | lane09_2[["percent.ribo"]] <- PercentageFeatureSet(lane09_2, pattern = "^RPL|^RPS") 37 | lane13_1[["percent.ribo"]] <- PercentageFeatureSet(lane13_1, pattern = "^RPL|^RPS") 38 | lane13_2[["percent.ribo"]] <- PercentageFeatureSet(lane13_2, pattern = "^RPL|^RPS") 39 | lane14_1[["percent.ribo"]] <- PercentageFeatureSet(lane14_1, pattern = "^RPL|^RPS") 40 | lane14_2[["percent.ribo"]] <- PercentageFeatureSet(lane14_2, pattern = "^RPL|^RPS") 41 | lane15_1[["percent.ribo"]] <- PercentageFeatureSet(lane15_1, pattern = "^RPL|^RPS") 42 | lane15_2[["percent.ribo"]] <- PercentageFeatureSet(lane15_2, pattern = "^RPL|^RPS") 43 | lane16_1[["percent.ribo"]] <- PercentageFeatureSet(lane16_1, pattern = "^RPL|^RPS") 44 | lane16_2[["percent.ribo"]] <- PercentageFeatureSet(lane16_2, pattern = "^RPL|^RPS") 45 | 46 | alldata_PSC_202002_integration_list <- list(lane08_1, lane08_2, lane09_1, lane09_2, lane13_1, lane13_2, lane14_1, lane14_2, lane15_1, lane15_2, lane16_1, lane16_2) 47 | 48 | print("loaded") 49 | features <- SelectIntegrationFeatures(object.list = alldata_PSC_202002_integration_list, nfeatures = 3000) 50 | print("features_selected") 51 | alldata_PSC_202002_integration_list <- PrepSCTIntegration(object.list = alldata_PSC_202002_integration_list, anchor.features = features) 52 | print("integration_prepped") 53 | anchors <- FindIntegrationAnchors(object.list = alldata_PSC_202002_integration_list, anchor.features = features, normalization.method = "SCT") 54 | print("achors_found") 55 | rm(alldata_PSC_202002_integration_list) 56 | alldata.integrated <- IntegrateData(anchorset = anchors, normalization.method = "SCT") 57 | print("integrated") 58 | rm(anchors) 59 | alldata.integrated <- RunPCA(alldata.integrated, verbose = FALSE) 60 | print("pca2_ran") 61 | alldata.integrated <- RunUMAP(alldata.integrated, dims = 1:30) 62 | print("umap_ran") 63 | alldata.integrated <- FindNeighbors(alldata.integrated, dims = 1:30) 64 | alldata.integrated <- FindClusters(alldata.integrated, resolution = 0.5) 65 | 66 | DefaultAssay(alldata.integrated) <- "RNA" 67 | alldata.integrated <- NormalizeData(alldata.integrated, normalization.method = "LogNormalize", scale.factor = 10000, verbose = FALSE) 68 | 69 | saveRDS(alldata.integrated, file="/groups/umcg-weersma/tmp01/Amber/PSC_202002_integrated_v2_noribo.rds") 70 | -------------------------------------------------------------------------------- /old_stuff/251_10x_vs_251_10x.R: -------------------------------------------------------------------------------- 1 | ########################################################################################################################### 2 | # Author: WTC 3 | # Date: July 2018 4 | # Function: Sample 1000x 251 cells from 10x cytotoxic cell dataset, calculate DE with CD dataset and determine risk genes in DE genes 5 | ########################################################################################################################### 6 | # 7 | # Libraries 8 | # 9 | ########################################################################################################################### 10 | library(Seurat) 11 | library(Matrix) 12 | #library(Matrix.utils) 13 | #library(ggplot2) 14 | #library(pryr) 15 | 16 | ########################################################################################################################### 17 | # 18 | # Functions 19 | # 20 | ########################################################################################################################### 21 | 22 | # download Cytotoxic T cell gene/cell matrix (raw) from https://support.10xgenomics.com/single-cell-gene-expression/datasets/1.1.0/cytotoxic_t 23 | #CTL_10x<-Read10X(data.dir = "~/Downloads/matrices_mex 3/hg19/") 24 | #CTL_10x<-CreateSeuratObject(raw.data = CTL_10x, min.cells = 3, project = "healthy") 25 | # select subset of 251 cells 26 | #load("~/Desktop/Single_cell/CTL_10x_CD/CTL_10x.Rda") 27 | 28 | setwd("~/randomised_permutation_10xsubset_cd/") 29 | load("~/Desktop/Single_cell/CTL_10x_CD/CTL_10x.Rda") 30 | load("~/Desktop/Single_cell/CTL_10x_CD/CTL_CD.Rda") 31 | risk_use<-read.csv("~/Desktop/Single_cell/final_data_paper/DE/risk_genes_in_cd_and_10x.csv") 32 | genes_use<-read.csv("~/Desktop/Single_cell/final_data_paper/DE/genes_in_cd_and_10x.csv") 33 | mkdir("~/nonrandomised_permutation_10xsubset_cd/") 34 | setwd("~/nonrandomised_permutation_10xsubset_cd/") 35 | a<-"number_DEgenes" 36 | b<-"number_DEriskgenes" 37 | number_DE_genes<-data.frame(a) 38 | number_DE_riskgenes<-data.frame(b) 39 | for (i in 1:1000){ 40 | cat("iteration", i, "\n") 41 | samp_col_idxs <- sample(ncol(CTL_10x@data), 251) 42 | samp_col_names <- colnames(CTL_10x@data) [samp_col_idxs] 43 | CTL_10x_subset<-CTL_10x@data[,samp_col_names] 44 | dim(CTL_10x_subset) 45 | CTL_10x_subset<-CreateSeuratObject(raw.data = CTL_10x_subset, min.cells = 3, project = "subset") 46 | mito.genes <- grep(pattern = "^MT-", x = rownames(x = CTL_10x_subset@data), value = TRUE) 47 | percent.mito <- Matrix::colSums(CTL_10x_subset@raw.data[mito.genes, ])/Matrix::colSums(CTL_10x_subset@raw.data) 48 | CTL_10x_subset <- AddMetaData(object = CTL_10x_subset, metadata = percent.mito, col.name = "percent.mito") 49 | 50 | # for ngene: 200-2500, percent mito: <5% 51 | CTL_10x_subset <- FilterCells(object = CTL_10x_subset, subset.names = c("nGene", "percent.mito"), low.thresholds = c(200, -Inf), high.thresholds = c(2500, 0.05)) 52 | 53 | # log normalize 54 | CTL_10x_subset <- NormalizeData(object = CTL_10x_subset, normalization.method = "LogNormalize", scale.factor = 10000) 55 | CTL_10x_subset <- ScaleData(object = CTL_10x_subset, vars.to.regress = c("nUMI", "percent.mito")) 56 | CTL_10x_subset@meta.data$dataset <- "subset10x" 57 | 58 | save(CTL_10x_subset, file=paste0("CTL_10x_subset",i,".Rda")) 59 | 60 | ## assign name to CD dataset 61 | CTL_CD@meta.data$dataset <- "cd" 62 | 63 | ## Combine the cdulated and ctrlulated cells into a single object 64 | CTL_subset.combined <- RunCCA(CTL_10x_subset, CTL_CD, genes.use = intersect(rownames(CTL_10x_subset@data), rownames(CTL_CD@data)), num.cc = 30, scale.data=T) 65 | 66 | ############################################################### 67 | # Process the data after combining the two datasets 68 | ############################################################### 69 | CTL_subset.combined<-SetAllIdent(CTL_subset.combined, "dataset") 70 | subset10x_cells<-WhichCells(CTL_subset.combined, "subset10x") 71 | cd_cells<-WhichCells(CTL_subset.combined, "cd") 72 | 73 | # calculate DE 74 | # DE with genes present in the two datasets, in >1%, MAST 75 | CTL_subset.combined_DE_markers_1perc = FindAllMarkers(CTL_subset.combined, only.pos = T, test.use = "MAST", genes.use = genes_use$gene) 76 | write.table(CTL_subset.combined_DE_markers_1perc, file = paste0("CTL_subset",i,".blood.10x._DEmarkers_MAST.txt")) 77 | 78 | # and the significant ones (p<0.05) 79 | CTL_subset.combined_DE_markers_1perc<-CTL_subset.combined_DE_markers_1perc[CTL_subset.combined_DE_markers_1perc$p_val_adj <0.05,] 80 | number_DE_genes[i,a]<-nrow(CTL_subset.combined_DE_markers_1perc) 81 | 82 | # merge all DE results with risk genes 83 | risk_all_filtered_subset<-merge(risk_use, CTL_subset.combined_DE_markers_1perc, by="gene", all=F) 84 | write.csv(risk_all_filtered_subset, paste0("riskgenes_subset",i,"_vs_cd_filtered.csv")) 85 | number_DE_riskgenes[i,b]<-nrow(risk_all_filtered_subset)} 86 | 87 | write.csv(number_DE_genes, "number_DE_genes_cd_vs_10xsubsets.csv") 88 | write.csv(number_DE_riskgenes, "number_DE_riskgenes_cd_vs_10xsubsets.csv") 89 | -------------------------------------------------------------------------------- /DDTx/ddtx_azimuth_elmentaiteadultileum_classification_demuxlet.Rmd: -------------------------------------------------------------------------------- 1 | title: "ddtx_azimuth_elmentaiteadultileum_classification_demuxlet" 2 | author: "adapted from Roy Oelen" 3 | date: "2023-06-15" 4 | output: html_document 5 | --- 6 | 7 | ```{r header, include=FALSE} 8 | ############################################################################################################################ 9 | # Authors: Roy Oelen 10 | # Name: ddtx_azimuth_elmentaiteadultileum_classification.Rmd 11 | # Function: assign compartments to the cells 12 | ############################################################################################################################ 13 | ``` 14 | 15 | ```{r libraries, include=FALSE} 16 | knitr::opts_chunk$set(echo = FALSE) 17 | #################### 18 | # libraries # 19 | #################### 20 | 21 | # for the object containing meta and count data 22 | library(Seurat) 23 | # for plotting 24 | library(ggplot2) 25 | library(cowplot) 26 | 27 | ``` 28 | 29 | ```{r functions, include=FALSE} 30 | knitr::opts_chunk$set(echo = FALSE) 31 | #################### 32 | # Functions # 33 | #################### 34 | 35 | ``` 36 | 37 | ```{r setup, include=FALSE} 38 | knitr::opts_chunk$set(echo = FALSE) 39 | #################### 40 | # Main Code # 41 | #################### 42 | 43 | # locations of the objects 44 | seurat_objects_loc <- '/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/' 45 | seurat_object_compartment_loc <- paste(seurat_objects_loc, 'ddtx_merged_demultiplexed_clustered_compartment_demuxlet.rds', sep = '') 46 | result_loc <- paste(seurat_objects_loc, 'ddtx_merged_demultiplexed_clustered_compartment_azi_elmentaiteadultileum_demuxlet.rds', sep = '') 47 | 48 | # location of reference 49 | references_loc <- '/groups/umcg-weersma/tmp01/datasets/elmentaite_2021/' 50 | reference_loc <- paste(references_loc, 'elmentaite_2021_all_sct_clus.rds', sep = '') 51 | 52 | ``` 53 | 54 | ```{r read_objects, include=FALSE} 55 | knitr::opts_chunk$set(echo = FALSE) 56 | # read the objects 57 | ddtx <- readRDS(seurat_object_compartment_loc) 58 | elmentaite_full <- readRDS(reference_loc) 59 | 60 | ``` 61 | 62 | ```{r process_reference, include=FALSE} 63 | knitr::opts_chunk$set(echo = FALSE) 64 | 65 | # setting seed for reproduceability 66 | set.seed(7777) 67 | 68 | # subset the elmentaite object to adults and ileum 69 | elmentaite_adult_ileum <- elmentaite_full[, elmentaite_full@meta.data[['Region']] == 'SmallInt' & elmentaite_full@meta.data[['Diagnosis']] == 'Healthy adult'] 70 | # do PCA dimensional reduction 71 | elmentaite_adult_ileum <- RunPCA(elmentaite_adult_ileum) 72 | # do 2d UMAP dimensional reduction 73 | elmentaite_adult_ileum <- RunUMAP(elmentaite_adult_ileum, dims = 1:30, return.model = T) 74 | # create neighbourhood graph 75 | elmentaite_adult_ileum <- FindNeighbors(elmentaite_adult_ileum, dims = 1:30, k.param = 20) 76 | # find clusters 77 | elmentaite_adult_ileum <- FindClusters(elmentaite_adult_ileum, resolution = 1.5) 78 | ``` 79 | 80 | ```{r do_mapping, include=FALSE} 81 | knitr::opts_chunk$set(echo = FALSE) 82 | 83 | # find transfer anchors 84 | anchors <- FindTransferAnchors( 85 | reference = elmentaite_adult_ileum, 86 | query = ddtx, 87 | normalization.method = "SCT", 88 | reference.reduction = "pca", 89 | dims = 1:50 90 | ) 91 | # do the reference mapping 92 | ddtx <- MapQuery( 93 | anchorset = anchors, 94 | query = ddtx, 95 | reference = elmentaite_adult_ileum, 96 | refdata = list( 97 | celltype.elmentaiteadultileum = "cell_type" 98 | ), 99 | reference.reduction = "pca", 100 | reduction.model = "umap" 101 | ) 102 | 103 | ``` 104 | 105 | ```{r create_plots, include=FALSE} 106 | knitr::opts_chunk$set(echo = FALSE) 107 | 108 | # check the reference mapping 109 | reference_umap <- DimPlot(object = elmentaite_adult_ileum, reduction = "umap", group.by = "cell_type", label = TRUE, label.size = 3, repel = TRUE) + NoLegend() 110 | # vs the prediction 111 | projected_umap <- DimPlot(object = ddtx, reduction = "ref.umap", group.by = "predicted.celltype.elmentaiteadultileum", label = TRUE, label.size = 3, repel = TRUE) + NoLegend() 112 | # check the clusters 113 | clusters_umap <- DimPlot(object = ddtx, reduction = "umap", group.by = "seurat_clusters", label = TRUE, label.size = 3, repel = TRUE) + NoLegend() 114 | # vs the predictions 115 | celltypes_umap <- DimPlot(object = ddtx, reduction = "umap", group.by = "predicted.celltype.elmentaiteadultileum", label = TRUE, label.size = 3, repel = TRUE) + NoLegend() 116 | 117 | ``` 118 | 119 | ```{r show_plots, include = TRUE, fig.width=20, fig.height=20} 120 | knitr::opts_chunk$set(echo = FALSE) 121 | 122 | # show result 123 | plot_grid(reference_umap, projected_umap, clusters_umap, celltypes_umap) 124 | ``` 125 | 126 | ```{r save, include=FALSE} 127 | knitr::opts_chunk$set(echo = FALSE) 128 | # save the result 129 | saveRDS(ddtx, result_loc) 130 | ``` 131 | -------------------------------------------------------------------------------- /old_stuff/overlap_DE_patientregress_not_regressed.R: -------------------------------------------------------------------------------- 1 | ## single cell sequencing in Crohn's 2 | 3 | # Author: Werna Uniken Venema 4 | # year: 2018 5 | 6 | ## looking into overlap DE analyses for different patients using two methods: 7 | # 1. comparing DE with and without patient regressed 8 | # 2. comparing DE per patient 9 | 10 | #### method 1 - patient regressed 11 | 12 | # load DE file, not regressed for patient 13 | DE_allpts<-read.csv("~/Dropbox/scRNAseq paper/Results_paper/markergenes/markers/allcells/eight_celltypes_markers.csv", row.names=1) 14 | row.names(DE_allpts)<-NULL 15 | dim(DE_allpts) 16 | DE_allpts$gene_cluster<-paste0(DE_allpts$gene, sep="_", DE_allpts$cluster) 17 | 18 | # use DE file, regressed for patient 19 | # DE with genes >1%, MAST 20 | seuratfile_allcells_patientregr<-SetAllIdent(seuratfile_allcells_patientregr, "eight_cell_types") 21 | allcells_patientregr_DE_markers = FindAllMarkers(seuratfile_allcells_patientregr, min.pct = 0.01, only.pos = T, test.use = "MAST") 22 | row.names(allcells_patientregr_DE_markers)<-NULL 23 | allcells_patientregr_DE_markers<-allcells_patientregr_DE_markers[allcells_patientregr_DE_markers$p_val_adj < 0.05,] 24 | dim(allcells_patientregr_DE_markers) 25 | colnames(allcells_patientregr_DE_markers)[1]<-"p_val_patientregr" 26 | colnames(allcells_patientregr_DE_markers)[2]<-"Av_logFC_patientregr" 27 | colnames(allcells_patientregr_DE_markers)[3]<-"pct.1_patientregr" 28 | colnames(allcells_patientregr_DE_markers)[4]<-"pct.2_patientregr" 29 | colnames(allcells_patientregr_DE_markers)[5]<-"p_val_adj_patientregr" 30 | colnames(allcells_patientregr_DE_markers)[6]<-"cluster_patientregr" 31 | allcells_patientregr_DE_markers$gene_cluster<-paste0(allcells_patientregr_DE_markers$gene, sep="_", allcells_patientregr_DE_markers$cluster_patientregr) 32 | overlap_patient_w_o_regress<-merge(allcells_patientregr_DE_markers, DE_allpts, by="gene_cluster", all=F) 33 | 34 | write.csv(overlap_patient_w_o_regress, file="~/Desktop/Single_cell/final_data_paper/DE/overlap_patient_w_o_regress.csv") 35 | 36 | ####method 2 - overlap patients 37 | 38 | # use DE file, regressed for patient 39 | # DE with genes >1%, MAST 40 | seuratfile_pt1cells<-SetAllIdent(seuratfile_pt1cells, "eight_cell_types") 41 | pt1cells_DE_markers = FindAllMarkers(seuratfile_pt1cells, min.pct = 0.01, only.pos = T, test.use = "MAST") 42 | row.names(pt1cells_DE_markers)<-NULL 43 | pt1cells_DE_markers<-pt1cells_DE_markers[pt1cells_DE_markers$p_val_adj < 0.05,] 44 | dim(pt1cells_DE_markers) 45 | colnames(pt1cells_DE_markers)[1]<-"p_val_pt1cells" 46 | colnames(pt1cells_DE_markers)[2]<-"Av_logFC_pt1cells" 47 | colnames(pt1cells_DE_markers)[3]<-"pct.1_pt1cells" 48 | colnames(pt1cells_DE_markers)[4]<-"pct.2_pt1cells" 49 | colnames(pt1cells_DE_markers)[5]<-"p_val_adj_pt1cells" 50 | colnames(pt1cells_DE_markers)[6]<-"cluster_pt1cells" 51 | pt1cells_DE_markers$gene_cluster<-paste0(pt1cells_DE_markers$gene, sep="_", pt1cells_DE_markers$cluster_pt1cells) 52 | 53 | # use DE file, regressed for patient 54 | # DE with genes >1%, MAST 55 | seuratfile_pt2cells<-SetAllIdent(seuratfile_pt2cells, "eight_cell_types") 56 | pt2cells_DE_markers = FindAllMarkers(seuratfile_pt2cells, min.pct = 0.01, only.pos = T, test.use = "MAST") 57 | row.names(pt2cells_DE_markers)<-NULL 58 | pt2cells_DE_markers<-pt2cells_DE_markers[pt2cells_DE_markers$p_val_adj < 0.05,] 59 | dim(pt2cells_DE_markers) 60 | colnames(pt2cells_DE_markers)[1]<-"p_val_pt2cells" 61 | colnames(pt2cells_DE_markers)[2]<-"Av_logFC_pt2cells" 62 | colnames(pt2cells_DE_markers)[3]<-"pct.1_pt2cells" 63 | colnames(pt2cells_DE_markers)[4]<-"pct.2_pt2cells" 64 | colnames(pt2cells_DE_markers)[5]<-"p_val_adj_pt2cells" 65 | colnames(pt2cells_DE_markers)[6]<-"cluster_pt2cells" 66 | pt2cells_DE_markers$gene_cluster<-paste0(pt2cells_DE_markers$gene, sep="_", pt2cells_DE_markers$cluster_pt2cells) 67 | 68 | ## calculate overlap 69 | overlap_patient1_2<-merge(pt1cells_DE_markers, pt2cells_DE_markers, by="gene_cluster", all=F) 70 | 71 | write.csv(overlap_patient1_2, file="~/Desktop/Single_cell/final_data_paper/DE/overlap_patient1_2.csv") 72 | 73 | # use DE file, regressed for patient 74 | # DE with genes >1%, MAST 75 | seuratfile_pt3cells<-SetAllIdent(seuratfile_pt3cells, "eight_cell_types") 76 | pt3cells_DE_markers = FindAllMarkers(seuratfile_pt3cells, min.pct = 0.01, only.pos = T, test.use = "MAST") 77 | row.names(pt3cells_DE_markers)<-NULL 78 | pt3cells_DE_markers<-pt3cells_DE_markers[pt3cells_DE_markers$p_val_adj < 0.05,] 79 | dim(pt3cells_DE_markers) 80 | colnames(pt3cells_DE_markers)[1]<-"p_val_pt3cells" 81 | colnames(pt3cells_DE_markers)[2]<-"Av_logFC_pt3cells" 82 | colnames(pt3cells_DE_markers)[3]<-"pct.1_pt3cells" 83 | colnames(pt3cells_DE_markers)[4]<-"pct.2_pt3cells" 84 | colnames(pt3cells_DE_markers)[5]<-"p_val_adj_pt3cells" 85 | colnames(pt3cells_DE_markers)[6]<-"cluster_pt3cells" 86 | pt3cells_DE_markers$gene_cluster<-paste0(pt3cells_DE_markers$gene, sep="_", pt3cells_DE_markers$cluster_pt3cells) 87 | 88 | ## calculate overlap 89 | overlap_patient1_2_3<-merge(overlap_patient1_2, pt3cells_DE_markers, by="gene_cluster", all=F) 90 | write.csv(overlap_patient1_2_3, file="~/Desktop/Single_cell/final_data_paper/DE/overlap_patient1_2_3.csv") 91 | 92 | -------------------------------------------------------------------------------- /DDTx/ddtx_filter_normalize_cluster_demuxlet.Rmd: -------------------------------------------------------------------------------- 1 | title: "ddtx_filter_normalize_cluster_demuxlet" 2 | author: "adapted from Roy Oelen" 3 | date: "2023-06-15" 4 | output: html_document 5 | --- 6 | 7 | ```{r header, include=FALSE} 8 | ############################################################################################################################ 9 | # Authors: Roy Oelen 10 | # Name: ddtx_filter_normalize_cluster.Rmd 11 | # Function: filter the object on doublets, normalize with SCT and cluster+UMAP 12 | ############################################################################################################################ 13 | ``` 14 | 15 | ```{r libraries, include=FALSE} 16 | knitr::opts_chunk$set(echo = FALSE) 17 | #################### 18 | # libraries # 19 | #################### 20 | 21 | # for the object containing meta and count data 22 | library(Seurat) 23 | # for plotting 24 | library(ggplot2) 25 | 26 | ``` 27 | 28 | ```{r functions, include=FALSE} 29 | knitr::opts_chunk$set(echo = FALSE) 30 | #################### 31 | # Functions # 32 | #################### 33 | 34 | ``` 35 | 36 | ```{r setup, include=FALSE} 37 | knitr::opts_chunk$set(echo = FALSE) 38 | #################### 39 | # Main Code # 40 | #################### 41 | 42 | # locations of the objects 43 | seurat_objects_loc <- '/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/' 44 | seurat_object_demultiplexed_loc <- paste(seurat_objects_loc, 'ddtx_merged_demultiplexed_demuxlet.rds', sep = '') 45 | seurat_object_clustered_loc <- paste(seurat_objects_loc, 'ddtx_merged_demultiplexed_clustered_demuxlet.rds', sep = '') 46 | 47 | ``` 48 | 49 | ```{r read_object, include=FALSE} 50 | knitr::opts_chunk$set(echo = FALSE) 51 | 52 | # read the object 53 | ddtx <- readRDS(seurat_object_demultiplexed_loc) 54 | 55 | ``` 56 | 57 | ### before filtering 58 | ```{r show_before_filter, include=TRUE, echo=TRUE} 59 | # show the number of cells before filtering 60 | ddtx 61 | ``` 62 | 63 | ### filtering 64 | 65 | The individuals that were sequenced, received a gut transplant. This means that there should be expression data of both the receipiant and the donor, as such the number of clusters from Souporcell, is twice the number of individuals sequenced. The only exception here, is 220504_lane10. Here there no donors. When filtering for doublets, we as such use the 2*n clusters for all lanes but that one. 66 | 67 | ```{r doublet_filter, include=FALSE} 68 | # filter on doublets 69 | ddtx <- ddtx[, ( 70 | ddtx@meta.data$lane %in% c( 71 | '220504_lane01', 72 | '220504_lane02', 73 | '220504_lane03', 74 | '220504_lane04', 75 | '220504_lane05', 76 | '220504_lane06', 77 | '220504_lane07', 78 | '220504_lane08', 79 | '220504_lane09', 80 | '220504_lane10' 81 | ) & 82 | ddtx@meta.data$DROPLET.TYPE == "SNG" 83 | )] 84 | ``` 85 | 86 | ### after filtering 87 | ```{r show_after_filter, include=TRUE, echo=TRUE} 88 | # show the number of cells after filtering 89 | ddtx 90 | ``` 91 | 92 | ```{r normalize_rna, include=FALSE} 93 | # do old-style normalization 94 | ddtx <- NormalizeData(ddtx) 95 | ``` 96 | 97 | ```{r normalize_sct, include=FALSE} 98 | # do SCT normalization 99 | ddtx <- SCTransform(ddtx) 100 | ``` 101 | 102 | ```{r pca_umap_cluster, include=FALSE} 103 | # set the seed for reproduceability 104 | set.seed(7777) 105 | # do PCA dimensional reduction 106 | ddtx <- RunPCA(ddtx) 107 | # do 2d UMAP dimensional reduction 108 | ddtx <- RunUMAP(ddtx, dims = 1:30, return.model = T) 109 | # create neighbourhood graph 110 | ddtx <- FindNeighbors(ddtx, dims = 1:30, k.param = 20) 111 | # find clusters 112 | ddtx <- FindClusters(ddtx, resolution = 1.5) 113 | ``` 114 | 115 | ```{r donors, include=FALSE} 116 | # add preliminary donor assignment 117 | ddtx@meta.data[['donor_final']] <- NA 118 | # we use the donor+receipiant for all lanes 119 | ddtx@meta.data[ddtx@meta.data$lane %in% c('220504_lane01', '220504_lane02', '220504_lane03', '220504_lane04', '220504_lane05', '220504_lane06', '220504_lane07', '220504_lane08', '220504_lane09', '220504_lane10'), 'donor_final'] <- ddtx@meta.data[ddtx@meta.data$lane %in% c('220504_lane01', '220504_lane02', '220504_lane03', '220504_lane04', '220504_lane05', '220504_lane06', '220504_lane07', '220504_lane08', '220504_lane09', '220504_lane10'), 'SNG.BEST.GUESS'] 120 | 121 | ``` 122 | 123 | ```{r create_plots, include=FALSE} 124 | # plot the clusters 125 | p_clusters <- DimPlot(ddtx, reduction = 'umap', group.by = 'seurat_clusters', label = TRUE, label.size = 3, repel = TRUE, raster = FALSE) + NoLegend() + ggtitle('Leiden clusters') 126 | # plot the individuals 127 | p_individuals <- DimPlot(ddtx, reduction = 'umap', group.by = 'donor_final', label = FALSE, label.size = 3, repel = TRUE, raster = FALSE) + ggtitle('genotyped individuals') 128 | ``` 129 | 130 | ### after 2d UMAP reduction and clustering 131 | 132 | ```{r show_plots, include = TRUE, fig.width=10, fig.height=10} 133 | p_clusters 134 | p_individuals 135 | ``` 136 | 137 | ```{r save, include=FALSE} 138 | # save the result 139 | saveRDS(ddtx, seurat_object_clustered_loc) 140 | ``` 141 | -------------------------------------------------------------------------------- /old_stuff/random_sampling_10x+cd.R: -------------------------------------------------------------------------------- 1 | # Author: WTC 2 | # Date: July 2018 3 | # Script for sampling 251 cells from 10x CTL dataset, 4 | # combine those (using Seurat's CCA function) with 251 CTLs from CD dataset, randomly assign source IDs 5 | # and perform differential expression analyses on these randomly made groups of cells, each existing of 251 cells. 6 | 7 | # download Cytotoxic T cell gene/cell matrix (raw) from https://support.10xgenomics.com/single-cell-gene-expression/datasets/1.1.0/cytotoxic_t 8 | load("~/Desktop/Single_cell/CTL_10x_CD/CTL_10x.Rda") 9 | load("~/Desktop/Single_cell/CTL_10x_CD/CTL_CD.Rda") # import CD CTL dataset 10 | risk_use<-read.csv("~/Desktop/Single_cell/final_data_paper/DE/risk_genes_in_cd_and_10x.csv") 11 | genes_use<-read.csv("~/Desktop/Single_cell/final_data_paper/DE/genes_in_cd_and_10x.csv") 12 | 13 | a<-"number_DEgenes" 14 | b<-"number_DEriskgenes" 15 | number_DE_genes<-data.frame(a) 16 | number_DE_riskgenes<-data.frame(b) 17 | for(i in (1:1000)){ 18 | cat("iteration", i, "\n") 19 | # select subset of 251 cells 20 | samp_col_idxs_1 <- sample(ncol(CTL_10x@data), 251) 21 | samp_col_names_1 <- colnames(CTL_10x@data) [samp_col_idxs_1] 22 | CTL_10x_subset<-CTL_10x@data[,samp_col_names_1] 23 | dim(CTL_10x_subset) 24 | CTL_10x_subset<-CreateSeuratObject(raw.data = CTL_10x_subset, min.cells = 3, project = "subset") 25 | 26 | mito.genes <- grep(pattern = "^MT-", x = rownames(x = CTL_10x_subset@data), value = TRUE) 27 | percent.mito <- Matrix::colSums(CTL_10x_subset@raw.data[mito.genes, ])/Matrix::colSums(CTL_10x_subset@raw.data) 28 | CTL_10x_subset <- AddMetaData(object = CTL_10x_subset, metadata = percent.mito, col.name = "percent.mito") 29 | 30 | # for ngene: 200-2500, percent mito: <5% 31 | CTL_10x_subset <- FilterCells(object = CTL_10x_subset, subset.names = c("nGene", "percent.mito"), low.thresholds = c(200, -Inf), high.thresholds = c(2500, 0.05)) 32 | 33 | # log normalize 34 | CTL_10x_subset <- NormalizeData(object = CTL_10x_subset, normalization.method = "LogNormalize", scale.factor = 10000) 35 | CTL_10x_subset <- ScaleData(object = CTL_10x_subset, vars.to.regress = c("nUMI", "percent.mito")) 36 | dim(CTL_10x_subset@data) 37 | 38 | CTL_10x_subset@meta.data$dataset <- "subset10x" 39 | 40 | save(CTL_10x_subset, file=paste0("CTL_10x_subset", i, ".Rda")) 41 | 42 | 43 | ## same for CD 44 | CTL_CD@meta.data$dataset <- "cd" 45 | 46 | ## Combine the cdulated and ctrlulated cells into a single object 47 | CTL_subset.combined <- RunCCA(CTL_10x_subset, CTL_CD, genes.use = intersect(rownames(CTL_10x_subset@data), rownames(CTL_CD@data)), num.cc = 30, scale.data=T) 48 | 49 | ############################################################### 50 | # Process the data after combining the two datasets 51 | ############################################################### 52 | 53 | #CTL_subset.combined <- RunPCA(object = CTL_subset.combined, pc.genes = CTL_subset.combined@var.genes, do.print = TRUE, pcs.print = 1:5, genes.print = 5) 54 | #CTL_subset.combined = RunTSNE(CTL_subset.combined, dims.use = 1:15, do.fast = T) 55 | 56 | ## CCA equivalent of the PCElbowPlot 57 | #MetageneBicorPlot(CTL_subset.combined, grouping.var = "dataset", dims.eval = 1:30, display.progress = FALSE) 58 | 59 | ## Use the CCA reduction to better overlap the two original datasetes 60 | #CTL_subset.combined <- AlignSubspace(CTL_subset.combined, reduction.type = "cca", grouping.var = "dataset", dims.align = 1:15) 61 | 62 | #CTL_subset.combined <- RunTSNE(CTL_subset.combined, reduction.use = "cca.aligned", dims.use = 1:15, do.fast = T) 63 | #CTL_subset.combined <- FindClusters(CTL_subset.combined, reduction.type = "cca.aligned", resolution = 0.6, dims.use = 1:15) 64 | #save(CTL_subset.combined, file=paste0("CTL_subset_", i, "combined_aligned.Rda")) 65 | 66 | ### add metadata to randomize cell ids 67 | 68 | 69 | ## add location 70 | #extract file that is a copy from @data.info 71 | CellsMeta = CTL_subset.combined@meta.data 72 | CellsMeta$random_asssignment <- "A" 73 | CellsMeta$random_asssignment[sample(nrow(CellsMeta), abs(nrow(CellsMeta)/2), replace=FALSE)] <- "B" 74 | CTL_subset.combined <- AddMetaData(CTL_subset.combined, CellsMeta) 75 | 76 | CTL_subset.combined<-SetAllIdent(CTL_subset.combined, "random_asssignment") 77 | subset_A<-WhichCells(CTL_subset.combined, "A") 78 | subset_B<-WhichCells(CTL_subset.combined, "B") 79 | 80 | # calculate DE 81 | 82 | # genes present in the two datasets: 83 | # DE with genes present in the two datasets, in >1%, MAST 84 | CTL_subset.combined_DE_markers = FindAllMarkers(CTL_subset.combined, only.pos = T, test.use = "MAST", genes.use = genes_use$gene) 85 | write.table(CTL_subset.combined_DE_markers, file = paste0("CTL_subset", i, "_DEmarkers_MAST.txt")) 86 | 87 | # and the significant ones (p<0.05) 88 | CTL_subset.combined_DE_markers<-CTL_subset.combined_DE_markers[CTL_subset.combined_DE_markers$p_val_adj <0.05,] 89 | number_DE_genes[i,a]<-nrow(CTL_subset.combined_DE_markers) 90 | 91 | # merge with risk genes 92 | # merge all DE results with risk genes 93 | risk_all_filtered_subset<-merge(risk_use, CTL_subset.combined_DE_markers, by="gene", all=F) 94 | 95 | number_DE_riskgenes[i,b]<-nrow(risk_all_filtered_subset)} 96 | 97 | write.csv(number_DE_genes, "number_DE_genes.csv") 98 | write.csv(number_DE_riskgenes, "number_DE_riskgenes.csv") 99 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /DDTx/demultiplexed_data_demuxlet.R: -------------------------------------------------------------------------------- 1 | ## comparison demuxlet cell and doublet assignment results to souporcell results 2 | # WTC june 2023 3 | 4 | # prep demuxlet results for comparing to souporcell results 5 | 6 | lane1<-read.table("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/demuxlet/output/220504_lane01.best", header=T) 7 | lane1$barcode_lane = substr(lane1$BARCODE,1,nchar(lane1$BARCODE)-2) 8 | lane1$barcode_lane = substr(lane1$BARCODE,1,nchar(lane1$BARCODE)-2) 9 | lane1$barcode_lane<-paste0(lane1$barcode_lane, "_220504_lane01") 10 | 11 | lane2<-read.table("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/demuxlet/output/220504_lane02.best", header=T) 12 | lane2$barcode_lane = substr(lane2$BARCODE,1,nchar(lane2$BARCODE)-2) 13 | lane2$barcode_lane = substr(lane2$BARCODE,1,nchar(lane2$BARCODE)-2) 14 | lane2$barcode_lane<-paste0(lane2$barcode_lane, "_220504_lane02") 15 | 16 | 17 | lane3<-read.table("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/demuxlet/output/220504_lane03.best", header=T) 18 | lane3$barcode_lane = substr(lane3$BARCODE,1,nchar(lane3$BARCODE)-2) 19 | lane3$barcode_lane = substr(lane3$BARCODE,1,nchar(lane3$BARCODE)-2) 20 | lane3$barcode_lane<-paste0(lane3$barcode_lane, "_220504_lane03") 21 | 22 | lane4<-read.table("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/demuxlet/output/220504_lane04.best", header=T) 23 | lane4$barcode_lane = substr(lane4$BARCODE,1,nchar(lane4$BARCODE)-2) 24 | lane4$barcode_lane = substr(lane4$BARCODE,1,nchar(lane4$BARCODE)-2) 25 | lane4$barcode_lane<-paste0(lane4$barcode_lane, "_220504_lane04") 26 | 27 | lane5<-read.table("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/demuxlet/output/220504_lane05.best", header=T) 28 | lane5$barcode_lane = substr(lane5$BARCODE,1,nchar(lane5$BARCODE)-2) 29 | lane5$barcode_lane = substr(lane5$BARCODE,1,nchar(lane5$BARCODE)-2) 30 | lane5$barcode_lane<-paste0(lane5$barcode_lane, "_220504_lane05") 31 | 32 | lane6<-read.table("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/demuxlet/output/220504_lane06.best", header=T) 33 | lane6$barcode_lane = substr(lane6$BARCODE,1,nchar(lane6$BARCODE)-2) 34 | lane6$barcode_lane = substr(lane6$BARCODE,1,nchar(lane6$BARCODE)-2) 35 | lane6$barcode_lane<-paste0(lane6$barcode_lane, "_220504_lane06") 36 | 37 | lane7<-read.table("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/demuxlet/output/220504_lane07.best", header=T) 38 | lane7$barcode_lane = substr(lane7$BARCODE,1,nchar(lane7$BARCODE)-2) 39 | lane7$barcode_lane = substr(lane7$BARCODE,1,nchar(lane7$BARCODE)-2) 40 | lane7$barcode_lane<-paste0(lane7$barcode_lane, "_220504_lane07") 41 | 42 | lane8<-read.table("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/demuxlet/output/220504_lane08.best", header=T) 43 | lane8$barcode_lane = substr(lane8$BARCODE,1,nchar(lane8$BARCODE)-2) 44 | lane8$barcode_lane = substr(lane8$BARCODE,1,nchar(lane8$BARCODE)-2) 45 | lane8$barcode_lane<-paste0(lane8$barcode_lane, "_220504_lane08") 46 | 47 | lane9<-read.table("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/demuxlet/output/220504_lane09.best", header=T) 48 | lane9$barcode_lane = substr(lane9$BARCODE,1,nchar(lane9$BARCODE)-2) 49 | lane9$barcode_lane = substr(lane9$BARCODE,1,nchar(lane9$BARCODE)-2) 50 | lane9$barcode_lane<-paste0(lane9$barcode_lane, "_220504_lane09") 51 | 52 | lane10<-read.table("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/demuxlet/output/220504_lane10.best", header=T) 53 | lane10$barcode_lane = substr(lane10$BARCODE,1,nchar(lane10$BARCODE)-2) 54 | lane10$barcode_lane = substr(lane10$BARCODE,1,nchar(lane10$BARCODE)-2) 55 | lane10$barcode_lane<-paste0(lane10$barcode_lane, "_220504_lane10") 56 | 57 | x<-rbind(lane1, lane2) 58 | x<-rbind(x, lane3) 59 | x<-rbind(x, lane4) 60 | x<-rbind(x, lane5) 61 | x<-rbind(x, lane6) 62 | x<-rbind(x, lane7) 63 | x<-rbind(x, lane8) 64 | x<-rbind(x, lane9) 65 | x<-rbind(x, lane10) 66 | 67 | 68 | x<-x[c(5,13,15,21)] 69 | dim(x) 70 | 71 | # load mito, epi and mislabeled sample filtered souporcell results 72 | metadata<-read.table("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/ddtx_merged_demultiplexed_clustered_compartment_azi_elmentaiteadultileum_below60pctmito_epifilter_meta.csv") 73 | dim(metadata) 74 | 75 | ## demuxlet assigned singlets largely overlap with souporcell assigned singlets 76 | merge<-merge(x, metadata, by="barcode_lane", all=F) 77 | dim(merge) 78 | merge_single<-merge[merge$DROPLET.TYPE=="SNG",] 79 | table(merge_single$donor_final, merge_single$SNG.BEST.GUESS, merge_single$DROPLET.TYPE) 80 | 81 | #read demultiplexed Seuratobject, not doublet filtered 82 | data<-readDRS("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/objects/ddtx_merged_demultiplexed.rds") 83 | 84 | #add demuxlet output to seuratfile 85 | metadata<-data@meta.data 86 | metadata<-merge(metadata, x,by="barcode_lane") 87 | rownames(metadata)<-metadata$barcode_lane 88 | metadata<-metadata[c(35,36,37)] 89 | data <- AddMetaData(data, metadata) 90 | 91 | # add clinical metadata 92 | reference<-read.csv("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/reference_2.csv", sep=";") 93 | reference<-reference[-c(1,10)] 94 | reference<-reference[-18,] 95 | 96 | meta<-data@meta.data 97 | dim(meta) 98 | meta$pt_project_lane<-paste(meta$lane, meta$SNG.BEST.GUESS, sep="_") 99 | meta<-merge(meta, reference, by="pt_project_lane", all=T) 100 | dim(meta) 101 | meta<-meta[c(7,39:47)] 102 | head(meta) 103 | rownames(meta)<-meta$barcode_lane 104 | meta<-meta[-1] 105 | data<-AddMetaData(data, meta) 106 | 107 | # save 108 | saveRDS(data, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/ddtx_merged_demultiplexed_demuxlet.rds") 109 | 110 | 111 | -------------------------------------------------------------------------------- /PSC/PSC_version2022/basicDE_2023.R: -------------------------------------------------------------------------------- 1 | 2 | ################################################### 3 | # Title: basic DE analysis 4 | # Date: 28-08-2023 5 | ################################################### 6 | 7 | 8 | ###################### 9 | # Library 10 | ###################### 11 | 12 | library(ggplot2) 13 | library(dplyr) 14 | library(tidyr) 15 | library(stringr) 16 | library(patchwork) 17 | library(grid) 18 | library(gridExtra) 19 | library(ggpubr) 20 | library(RColorBrewer) 21 | library(Seurat) 22 | library(readr) 23 | library(MAST) 24 | library(readxl) 25 | library(openxlsx) 26 | library(enrichR) 27 | require(tidyverse) 28 | library(packcircles) 29 | 30 | ###################### 31 | # Main codes 32 | ###################### 33 | 34 | # read in dataframes 35 | epi <- readRDS("Nieuw/epi_azimuth_duox2.rds") 36 | imm <- readRDS("Nieuw/imm_azimuth_with_plasma.rds") 37 | str <- readRDS("Nieuw/stro_azimuth.rds") 38 | 39 | DefaultAssay(epi) = "RNA" 40 | DefaultAssay(imm) = "RNA" 41 | DefaultAssay(str) = "RNA" 42 | Idents(epi) <- "celltype.final" 43 | Idents(imm) <- "predicted.cell_type.pred" 44 | Idents(str) <- "predicted.cell_type.pred" 45 | 46 | # Create a table of frequencies 47 | epi_frequencies <- as.data.frame.matrix(table(epi$celltype.final, epi$disease)) 48 | str_frequencies <- as.data.frame.matrix(table(str$predicted.cell_type.pred, str$disease)) 49 | imm_frequencies <- as.data.frame.matrix(table(imm$predicted.cell_type.pred, imm$disease)) 50 | frequencies <- rbind(epi_frequencies, str_frequencies, imm_frequencies) 51 | 52 | #write_csv(frequencies, "Results/DE_2023/frequencies.csv") 53 | 54 | # Create a table with rows celltype and column disease/analysis, with number of DE genes per analysis for each celltype for UC and PSC 55 | files <- list.files(path="Results/DE_2023/DE_PSCINI_all/", pattern="*.csv", full.names=TRUE, recursive=FALSE) 56 | total_PSC <- data.frame() 57 | lapply(files, function(x) { 58 | t <- read.csv(x, header=TRUE) # load file 59 | celltype <- tools::file_path_sans_ext(basename(x)) 60 | t$celltype <- celltype 61 | out <- t[t$p_val_adj < 0.05,] 62 | # write to file 63 | total_PSC <<- rbind(total_PSC, out) 64 | }) 65 | total_PSC <- total_PSC[,c(6,8,9)] 66 | DEpercelltype_PSC <- total_PSC %>% 67 | group_by(celltype) %>% 68 | summarize(num_genes = n()) 69 | 70 | files <- list.files(path="Results/DE_2023/DE_UCINI_all/", pattern="*.csv", full.names=TRUE, recursive=FALSE) 71 | total_UC <- data.frame() 72 | lapply(files, function(x) { 73 | t <- read.csv(x, header=TRUE) # load file 74 | celltype <- tools::file_path_sans_ext(basename(x)) 75 | t$celltype <- celltype 76 | out <- t[t$p_val_adj < 0.05,] 77 | # write to file 78 | total_UC <<- rbind(total_UC, out) 79 | }) 80 | total_UC <- total_UC[,c(6,8,9)] 81 | DEpercelltype_UC <- total_UC %>% 82 | group_by(celltype) %>% 83 | summarize(num_genes = n()) 84 | 85 | # Filter celltypes with n genes > 9 86 | DEpercelltype_PSC_10 <- DEpercelltype_PSC[DEpercelltype_PSC$num_genes > 9,] 87 | DEpercelltype_UC_10 <- DEpercelltype_UC[DEpercelltype_UC$num_genes > 9,] 88 | 89 | # Make figure for PSC 90 | # Generate the layout. This function return a dataframe with one line per bubble. 91 | # It gives its center (x and y) and its radius, proportional of the value 92 | packing <- circleProgressiveLayout(DEpercelltype_PSC_10$num_genes, sizetype='area') 93 | 94 | # We can add these packing information to the initial data frame 95 | data <- cbind(DEpercelltype_PSC_10, packing) 96 | 97 | # Check that radius is proportional to value. We don't want a linear relationship, since it is the AREA that must be proportional to the value 98 | plot(data$radius, data$value) 99 | 100 | # The next step is to go from one center + a radius to the coordinates of a circle that 101 | # is drawn by a multitude of straight lines. 102 | dat.gg <- circleLayoutVertices(packing, npoints=50) 103 | 104 | ggplot() + 105 | 106 | # Make the bubbles 107 | geom_polygon(data = dat.gg, aes(x, y, group = id, fill=as.factor(id)), colour = "black", alpha = 0.6) + 108 | 109 | # Add text in the center of each bubble + control its size 110 | geom_text(data = data, aes(x, y, size=num_genes, label = celltype)) + 111 | scale_size_continuous(range = c(1,4)) + 112 | 113 | # General theme: 114 | theme_void() + 115 | theme(legend.position="none") + 116 | coord_equal() 117 | 118 | ggsave("/Users/amberbangma/Documents/R/PSC/Results/Circle_PSC.pdf") 119 | 120 | # Make figure for UC 121 | # Generate the layout. This function return a dataframe with one line per bubble. 122 | # It gives its center (x and y) and its radius, proportional of the value 123 | packing <- circleProgressiveLayout(DEpercelltype_UC_10$num_genes, sizetype='area') 124 | 125 | # We can add these packing information to the initial data frame 126 | data <- cbind(DEpercelltype_UC_10, packing) 127 | 128 | # Check that radius is proportional to value. We don't want a linear relationship, since it is the AREA that must be proportional to the value 129 | plot(data$radius, data$value) 130 | 131 | # The next step is to go from one center + a radius to the coordinates of a circle that 132 | # is drawn by a multitude of straight lines. 133 | dat.gg <- circleLayoutVertices(packing, npoints=50) 134 | 135 | 136 | ggplot() + 137 | 138 | # Make the bubbles 139 | geom_polygon(data = dat.gg, aes(x, y, group = id, fill=as.factor(id)), colour = "black", alpha = 0.6) + 140 | 141 | # Add text in the center of each bubble + control its size 142 | geom_text(data = data, aes(x, y, size=num_genes, label = celltype)) + 143 | scale_size_continuous(range = c(1,4)) + 144 | 145 | # General theme: 146 | theme_void() + 147 | theme(legend.position="none") + 148 | coord_equal() 149 | 150 | ggsave("/Users/amberbangma/Documents/R/PSC/Results/Circle_UC.pdf", width=6, height=6) 151 | -------------------------------------------------------------------------------- /PSC/Proportion_analysis_202103.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(Seurat) 3 | library(patchwork) 4 | library(ggplot2) 5 | library(readr) 6 | library(tidyr) 7 | library(MAST) 8 | library(readxl) 9 | library(openxlsx) 10 | library(reshape2) 11 | library(phyloseq) 12 | library(fido) 13 | library(compositions) 14 | library(corrplot) 15 | 16 | # load in dataset 17 | data<- readRDS("PSC_processed_march_2021.rds") 18 | DefaultAssay(data) = "RNA" 19 | 20 | # extract counts to matrix 21 | counts=as.data.frame.matrix(table(data$Final_HTO, data$celltypes)) 22 | 23 | # calculate proportions 24 | total_row = apply(counts, 1, sum) 25 | pcts = lapply(counts, function(x) { 26 | x / total_row 27 | }) 28 | frequencies = as.data.frame(pcts) 29 | rownames(counts) = rownames(frequencies) 30 | 31 | # add state 32 | frequencies$disease = sapply(strsplit(rownames(frequencies),"-"), `[`, 1) 33 | frequencies$inflammation = sapply(strsplit(rownames(frequencies),"-"), `[`, 2) 34 | frequencies$state = apply( frequencies[ ,c(44,45)] , 1 , paste , collapse = "-" ) 35 | frequencies = frequencies[,-c(44,45)] 36 | 37 | # remove doublets and MT-Hi 38 | frequencies=frequencies[,-c(6,14,20,26,33)] 39 | 40 | # plot frequencies 41 | list_celltypes<-list(colnames(frequencies[-39])) 42 | df = melt(frequencies) 43 | df$state = factor(df$state, levels = c("HC-NI", "UC-NI", "UC-I", "PSC-NI", "PSC-I")) 44 | df$variable = factor(df$variable, levels = list_celltypes[[1]]) 45 | ggplot(df, aes(x=state, y=value, fill=state)) + 46 | geom_boxplot(outlier.size = 1) + 47 | facet_wrap( ~ variable, scales="free") + 48 | theme_bw(base_size = 14) + 49 | scale_fill_brewer(palette="Set1") 50 | 51 | ggsave("Results/Figures/frequencies.pdf", width = 33, height = 19) 52 | 53 | # fido's pibble model https://jsilve24.github.io/fido/articles/introduction-to-fido.html 54 | # create counts matrix with state 55 | counts=as.data.frame.matrix(table(data$Final_HTO, data$celltypes)) 56 | counts$disease = sapply(strsplit(rownames(counts),"-"), `[`, 1) 57 | counts$inflammation = sapply(strsplit(rownames(counts),"-"), `[`, 2) 58 | counts$state = apply(counts[ ,c(44,45)] , 1 , paste , collapse = "-" ) 59 | counts = counts[,-c(44,45)] 60 | counts$state = as.factor(counts$state) 61 | #counts$state = relevel(counts$state, ref = "UC-I") #to set reference, otherwise HC-NI is default 62 | 63 | # remove doublets and MT-Hi 64 | counts=counts[,-c(6,14,20,26,33)] 65 | 66 | # create priors 67 | X = t(model.matrix(~state, data=counts)) 68 | X[,1:5] 69 | Y = t(counts[,-39]) 70 | Y[1:5,1:5] 71 | upsilon <- nrow(Y)+3 72 | theta <- matrix(0, nrow(Y)-1, nrow(X)) 73 | gamma <- diag(nrow(X)) 74 | G <- cbind(diag(nrow(Y)-1), -1) 75 | Xi <- (upsilon-nrow(Y))*G%*%diag(nrow(Y))%*%t(G) 76 | priors <- pibble(Y, X, upsilon, theta, gamma, Xi) 77 | print(priors) 78 | 79 | # change model to clr 80 | priors <- to_clr(priors) 81 | summary(priors, pars="Lambda") 82 | names_covariates(priors) <- rownames(X) 83 | 84 | #plot log probabilites for each state 85 | plot(priors, par="Lambda") + ggplot2::xlim(c(-10, 10)) 86 | 87 | #check if model fits data (checked by Johannes) 88 | priors$Y <- Y 89 | posterior <- refit(priors, optim_method="adam") 90 | ppc(posterior) + ggplot2::coord_cartesian(ylim=c(0, 500)) #I changed axis from 30000 to 500 for visualization 91 | ppc_summary(posterior) 92 | ppc(posterior, from_scratch=TRUE) +ggplot2::coord_cartesian(ylim=c(0, 500)) 93 | ppc_summary(posterior, from_scratch=TRUE) 94 | 95 | # look at posterior distribution of regression parameters 96 | posterior_summary <- summary(posterior, pars="Lambda")$Lambda 97 | 98 | # focus on coordinates with non-zero effect ("significant") for UC-I 99 | posterior_summary <- filter(posterior_summary, covariate=="stateUC-I") 100 | focus <- posterior_summary[sign(posterior_summary$p2.5) == sign(posterior_summary$p97.5),] 101 | focus <- unique(focus$coord) 102 | plot(posterior, par="Lambda", focus.coord = focus, focus.cov = rownames(X)[4]) + geom_vline(xintercept=0, linetype="dashed", color="red") 103 | 104 | # create list of significant celltypes for each state 105 | posterior_summary <- summary(posterior, pars="Lambda")$Lambda 106 | state_focus_taxa <- vector("list", length(rownames(X)[2:5])) 107 | names(state_focus_taxa) <- rownames(X)[2:5] 108 | for(state in rownames(X)[2:5]) { 109 | post_summary <- filter(posterior_summary, covariate %in% state) 110 | focus <- post_summary[sign(post_summary$p2.5) == sign(post_summary$p97.5),] 111 | focus <- unique(focus$coord) 112 | state_focus_taxa[[state]] <- focus 113 | } 114 | 115 | # create square plot for differences 116 | mat_state <- diag(length(rownames(X)[2:5])) 117 | rownames(mat_state) <- colnames(mat_state) <- rownames(X)[2:5] 118 | state_pairs <- data.frame(t(combn(rownames(X)[2:5], 2))) 119 | for(pair in 1:nrow(state_pairs)){ 120 | print(pair) 121 | mat_state[state_pairs[pair,]$X1, state_pairs[pair,]$X2] <- length(setdiff(state_focus_taxa[[state_pairs[pair,]$X1]], state_focus_taxa[[state_pairs[pair,]$X2]])) 122 | mat_state[state_pairs[pair,]$X2, state_pairs[pair,]$X1] <- length(setdiff(state_focus_taxa[[state_pairs[pair,]$X2]], state_focus_taxa[[state_pairs[pair,]$X1]])) 123 | } 124 | corrplot::corrplot(mat_state, method="square", is.corr=F, addCoef.col = "black", diag = F, tl.col="black", cl.pos="n") 125 | 126 | # create square plot for agreement 127 | mat_state <- diag(length(rownames(X)[2:5])) 128 | rownames(mat_state) <- colnames(mat_state) <- rownames(X)[2:5] 129 | state_pairs <- data.frame(t(combn(rownames(X)[2:5], 2))) 130 | for(pair in 1:nrow(state_pairs)){ 131 | print(pair) 132 | mat_state[state_pairs[pair,]$X1, state_pairs[pair,]$X2] <- length(intersect(state_focus_taxa[[state_pairs[pair,]$X1]], state_focus_taxa[[state_pairs[pair,]$X2]])) 133 | mat_state[state_pairs[pair,]$X2, state_pairs[pair,]$X1] <- length(intersect(state_focus_taxa[[state_pairs[pair,]$X2]], state_focus_taxa[[state_pairs[pair,]$X1]])) 134 | } 135 | corrplot::corrplot(mat_state, method="square", type = "lower", is.corr=F, addCoef.col = "black", diag = F, tl.col="black", cl.pos="n") 136 | 137 | -------------------------------------------------------------------------------- /create_raw_datafile_from_seq_data.md: -------------------------------------------------------------------------------- 1 | **Use in R** 2 | ==== 3 | 4 | Autor: WTC 5 | Date: 20172010 6 | 7 | **preparing raw data per patient** 8 | ---- 9 | ``` 10 | dir.create(("..")) 11 | ``` 12 | change folder accordingly 13 | ``` 14 | setwd("..") 15 | ``` 16 | list all files from working directory with a similar pattern 17 | ``` 18 | temp= list.files(pattern="UMIs_counts_per_gene_exon") ## lists input files per pool, change pattern for using either UMIs or TPMs/datacounts 19 | ``` 20 | paste pre- and suffix to all colnames, being PATIENT_CELLTYPE_CELLBARCODEPOOLNUMBER, except for column "GENE" 21 | column GENE is renamed to ensembl_gene_id for merging with annotation file 22 | unique name is assigned to pool 23 | ``` 24 | number_iel=10 25 | number_lpl=20 26 | number_blood=1 27 | for (i in 1:length(temp)) { ## paste suffix to all colnames except for column "GENE" and assigns name to file accordingly 28 | if (grepl("IEL", temp[i])){ 29 | my_file=read.table(temp[i], header=TRUE) ## make a table of the first file in the list temp, with colnames in the header 30 | my_file=as.data.frame(my_file) ## makes a data.frame of this file 31 | colnames(my_file) <- paste("1_IEL_", colnames(my_file), number_iel,sep = "") # Hi_IEL_ as prefix to column names and poolnr as suffix 32 | colnames(my_file)[1]="ensembl_gene_id" # changes the first column back to GENE in order to be able to merge later 33 | assign(paste0("prePool",number_iel), my_file) 34 | rm(my_file) 35 | number_iel=number_iel+1 # makes sure the cells from the next prepool will be numbered accordingly 36 | } else if (grepl("LPL", temp[i])){ # takes all files in the list temp with LPL in the file name 37 | 38 | my_file=read.table(temp[i], header=TRUE) ## make a table of the first file in the list temp, with colnames in the header 39 | my_file=as.data.frame(my_file) ## makes a data.frame of this file 40 | colnames(my_file) <- paste("1_LPL_", colnames(my_file), number_lpl,sep = "") # pastes Hi_LPL_ as prefix to column names and poolnr as suffix 41 | colnames(my_file)[1]="ensembl_gene_id" # changes the first column back to ensembl_gene_id in order to be able to merge later 42 | assign(paste0("prePool",number_lpl), my_file) # assignes a unique poolname to each prePool file 43 | rm(my_file) 44 | number_lpl=number_lpl+1 # makes sure the cells from the next prepool will be numbered accordingly 45 | } else if (grepl("blood", temp[i])){ # takes all files in the list temp with blood in the file name 46 | my_file=read.table(temp[i], header=TRUE) ## make a table of the first file in the list temp, with colnames in the header 47 | my_file=as.data.frame(my_file) ## makes a data.frame of this file 48 | colnames(my_file) <- paste("1_BLOOD_", colnames(my_file), number_blood,sep = "") # pastes Hi_BLOOD_ as prefix to column names and poolnr as suffix 49 | colnames(my_file)[1]="ensembl_gene_id" # changes the first column back to ensembl_gene_id in order to be able to merge later 50 | assign(paste0("prePool",number_blood), my_file) # assignes a unique poolname to each prePool file 51 | rm(my_file) 52 | number_blood=number_blood+1 # makes sure the cells from the next prepool will be numbered accordingly 53 | } else {print ("Error!!!")} 54 | } 55 | ``` 56 | 57 | merge files named 'prePoolx' by ensembl_gene_id, keeping all genes, and change column ensembl_gene_id to rownames and store as numeric data matrix first_file 58 | ``` 59 | my_count=1 60 | for (i in mget(ls(pattern="prePool"))) { ## mget searches for an r-object with a given name, ls makes sure the search is within the global environment 61 | if (my_count==1){ ##names the first file i that is found hereabove 'first_file' 62 | first_file=i 63 | my_count=my_count+1 ## makes sure the second file i enters the 'else' part 64 | } 65 | else{ 66 | first_file=merge(first_file,i, by="ensembl_gene_id", all=TRUE) ## merges first file with all the other files in the global environment that have the patter prePool. set all=F for merging only genes expressed in all pools 67 | } 68 | first_file[is.na(first_file)]<-0 ## all NAs to zero 69 | rm(i) 70 | } 71 | ``` 72 | merge files keeping only genes expressed in all pools, creating first_file_reduced 73 | ``` 74 | my_count=1 75 | for (i in mget(ls(pattern="prePool"))) { ## mget searches for an r-object with a given name, ls makes sure the search is within the global environment 76 | if (my_count==1){ ##names the first file i that is found hereabove 'first_file' 77 | first_file_reduced=i 78 | my_count=my_count+1 ## makes sure the second file i enters the 'else' part 79 | } 80 | else{ 81 | first_file_reduced=merge(first_file_reduced,i, by="ensembl_gene_id", all=FALSE) ## merges second_file by all=F for merging only genes expressed in all pools 82 | } 83 | rm(i) 84 | } 85 | first_file_pt1<-first_file 86 | ``` 87 | repeat these steps for all patients 88 | 89 | **merge raw data from all patients** 90 | --- 91 | ``` 92 | second_file<-merge(first_file_pt1, first_file_pt2, by="ensembl_gene_id", all=TRUE) 93 | second_file<-merge(second_file, first_file_pt3, by="ensembl_gene_id", all=TRUE) 94 | second_file<-merge(second_file, first_file_pt3.1, by="ensembl_gene_id", all=TRUE) 95 | ``` 96 | 97 | **enter gene annotation** 98 | get annotation file 99 | ``` 100 | ensembl_geneName_mapping<-read.csv("..") 101 | ``` 102 | merge second_file with annotationfile 103 | ``` 104 | second_file<-merge(ensembl_geneName_mapping, second_file, by="ensembl_gene_id") 105 | ``` 106 | paste ensembl gene, chromosome name and genesymbol 107 | ``` 108 | second_file$ensembl_gene_id<-paste(second_file$ensembl_gene_id, second_file$NEW, sep="_") 109 | row.names(second_file)=second_file$ensembl_gene_id 110 | third_file<-second_file # assign the second_file file to third_file 111 | ``` 112 | delete non-numeric columns in third_file 113 | ``` 114 | fourth_file<-third_file[,-c(1:10)] # remove all non-numeric columns 115 | View(fourth_file) 116 | fourth_file[is.na(fourth_file)]<-0 ## all NAs to zero 117 | fourth_file.data=fourth_file 118 | ``` 119 | 120 | **save raw data file containing all data** 121 | ``` 122 | save(fourth_file, file="..") 123 | ``` 124 | -------------------------------------------------------------------------------- /Vedo2/step_4_integration_biop_noribo_nonorm: -------------------------------------------------------------------------------- 1 | library(Seurat) 2 | library(ggplot2) 3 | 4 | options(future.globals.maxSize = 150000 * 1024^2) 5 | 6 | lane11_1<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200611_lane1_nomito_sct.rds") 7 | DefaultAssay(lane11_1)<-"SCT" 8 | lane11_2<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200611_lane2_nomito_sct.rds") 9 | DefaultAssay(lane11_2)<-"SCT" 10 | lane11_3<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200611_lane3_nomito_sct.rds") 11 | DefaultAssay(lane11_3)<-"SCT" 12 | lane11_4<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200611_lane4_nomito_sct.rds") 13 | DefaultAssay(lane11_4)<-"SCT" 14 | lane12_1<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200612_lane1_nomito_sct.rds") 15 | DefaultAssay(lane12_1)<-"SCT" 16 | lane12_2<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200612_lane2_nomito_sct.rds") 17 | DefaultAssay(lane12_2)<-"SCT" 18 | lane12_3<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200612_lane3_nomito_sct.rds") 19 | DefaultAssay(lane12_3)<-"SCT" 20 | lane12_4<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200612_lane4_nomito_sct.rds") 21 | DefaultAssay(lane12_4)<-"SCT" 22 | lane25_1<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200625_lane1_nomito_sct.rds") 23 | DefaultAssay(lane25_1)<-"SCT" 24 | lane25_2<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200625_lane2_nomito_sct.rds") 25 | DefaultAssay(lane25_2)<-"SCT" 26 | lane25_3<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200625_lane3_nomito_sct.rds") 27 | DefaultAssay(lane25_3)<-"SCT" 28 | lane25_4<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200625_lane4_nomito_sct.rds") 29 | DefaultAssay(lane25_4)<-"SCT" 30 | lane26_1<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200626_lane1_nomito_sct.rds") 31 | DefaultAssay(lane26_1)<-"SCT" 32 | lane26_2<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200626_lane2_nomito_sct.rds") 33 | DefaultAssay(lane26_2)<-"SCT" 34 | lane26_3<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200626_lane3_nomito_sct.rds") 35 | DefaultAssay(lane26_3)<-"SCT" 36 | lane26_4<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200626_lane4_nomito_sct.rds") 37 | DefaultAssay(lane26_4)<-"SCT" 38 | Vedo01_T0_NI<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/VEDO01_T0_NI_sct.rds") 39 | DefaultAssay(Vedo01_T0_NI)<-"SCT" 40 | Vedo01_T0_I<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/VEDO01_T0_I_sct.rds") 41 | DefaultAssay(Vedo01_T0_I)<-"SCT" 42 | 43 | lane11_1[["percent.ribo"]] <- PercentageFeatureSet(lane11_1, pattern = "^RPL|^RPS") 44 | lane11_2[["percent.ribo"]] <- PercentageFeatureSet(lane11_2, pattern = "^RPL|^RPS") 45 | lane11_3[["percent.ribo"]] <- PercentageFeatureSet(lane11_3, pattern = "^RPL|^RPS") 46 | lane11_4[["percent.ribo"]] <- PercentageFeatureSet(lane11_4, pattern = "^RPL|^RPS") 47 | lane12_1[["percent.ribo"]] <- PercentageFeatureSet(lane12_1, pattern = "^RPL|^RPS") 48 | lane12_2[["percent.ribo"]] <- PercentageFeatureSet(lane12_2, pattern = "^RPL|^RPS") 49 | lane12_3[["percent.ribo"]] <- PercentageFeatureSet(lane12_3, pattern = "^RPL|^RPS") 50 | lane12_4[["percent.ribo"]] <- PercentageFeatureSet(lane12_4, pattern = "^RPL|^RPS") 51 | lane25_1[["percent.ribo"]] <- PercentageFeatureSet(lane25_1, pattern = "^RPL|^RPS") 52 | lane25_2[["percent.ribo"]] <- PercentageFeatureSet(lane25_2, pattern = "^RPL|^RPS") 53 | lane25_3[["percent.ribo"]] <- PercentageFeatureSet(lane25_3, pattern = "^RPL|^RPS") 54 | lane25_4[["percent.ribo"]] <- PercentageFeatureSet(lane25_4, pattern = "^RPL|^RPS") 55 | lane26_1[["percent.ribo"]] <- PercentageFeatureSet(lane26_1, pattern = "^RPL|^RPS") 56 | lane26_2[["percent.ribo"]] <- PercentageFeatureSet(lane26_2, pattern = "^RPL|^RPS") 57 | lane26_3[["percent.ribo"]] <- PercentageFeatureSet(lane26_3, pattern = "^RPL|^RPS") 58 | lane26_4[["percent.ribo"]] <- PercentageFeatureSet(lane26_4, pattern = "^RPL|^RPS") 59 | Vedo01_T0_NI[["percent.ribo"]] <- PercentageFeatureSet(Vedo01_T0_NI, pattern = "^RPL|^RPS") 60 | Vedo01_T0_I[["percent.ribo"]] <- PercentageFeatureSet(Vedo01_T0_I, pattern = "^RPL|^RPS") 61 | 62 | alldata_vedo2_batch1_biop_integration_list <- list(lane11_1, lane11_2, lane11_3, lane11_4, lane12_1, lane12_2, lane12_3, lane12_4, lane25_1, lane25_2, lane25_3, lane25_4, lane26_1, lane26_2, lane26_3, lane26_4, Vedo01_T0_NI, Vedo01_T0_I) 63 | 64 | print("loaded") 65 | features <- SelectIntegrationFeatures(object.list = alldata_vedo2_batch1_biop_integration_list, nfeatures = 3000) 66 | print("features_selected") 67 | alldata_vedo2_batch1_biop_integration_list <- PrepSCTIntegration(object.list = alldata_vedo2_batch1_biop_integration_list, anchor.features = features) 68 | print("integration_prepped") 69 | anchors <- FindIntegrationAnchors(object.list = alldata_vedo2_batch1_biop_integration_list, anchor.features = features, normalization.method = "SCT") 70 | 71 | #when using reference lanes 72 | #anchors <- FindIntegrationAnchors(object.list = alldata_vedo2_batch1_biop_integration_list, anchor.features = features, normalization.method = "SCT", reference = c(1,4,7,8,16)) 73 | 74 | print("achors_found") 75 | rm(alldata_vedo2_batch1_biop_integration_list) 76 | alldata.integrated <- IntegrateData(anchorset = anchors, normalization.method = "SCT") 77 | print("integrated") 78 | rm(anchors) 79 | alldata.integrated <- RunPCA(alldata.integrated, verbose = FALSE) 80 | print("pca2_ran") 81 | alldata.integrated <- RunUMAP(alldata.integrated, dims = 1:30) 82 | print("umap_ran") 83 | alldata.integrated <- FindNeighbors(alldata.integrated, dims = 1:30) 84 | alldata.integrated <- FindClusters(alldata.integrated, resolution = 0.5) 85 | 86 | DefaultAssay(alldata.integrated) <- "RNA" 87 | 88 | saveRDS(alldata.integrated, file="/groups/umcg-weersma/tmp01/Emilia/batch1_integration/vedo2_batch1_biop_integrated_noribo_nonorm_withmito.rds") 89 | -------------------------------------------------------------------------------- /PSC/Proportion_analysis.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(Seurat) 3 | library(patchwork) 4 | library(ggplot2) 5 | library(readr) 6 | library(tidyr) 7 | library(MAST) 8 | library(readxl) 9 | library(openxlsx) 10 | library(reshape2) 11 | library(phyloseq) 12 | library(fido) 13 | library(compositions) 14 | 15 | # load in dataset 16 | data<- readRDS("/Users/amberbangma/Documents/R/PSC/Data/PSC_processed_oct.rds") 17 | DefaultAssay(data) = "RNA" 18 | 19 | # extract counts to matrix 20 | counts=as.data.frame.matrix(table(data$Final_HTO, data$celltypes)) 21 | 22 | # calculate proportions 23 | total_row = apply(counts, 1, sum) 24 | pcts = lapply(counts, function(x) { 25 | x / total_row 26 | }) 27 | frequencies = as.data.frame(pcts) 28 | rownames(counts) = rownames(frequencies) 29 | 30 | # add state 31 | frequencies$disease = sapply(strsplit(rownames(frequencies),"-"), `[`, 1) 32 | frequencies$inflammation = sapply(strsplit(rownames(frequencies),"-"), `[`, 2) 33 | frequencies$state = apply( frequencies[ ,c(43,44)] , 1 , paste , collapse = "-" ) 34 | frequencies = frequencies[,-c(43,44)] 35 | 36 | # remove doublets and MT-Hi 37 | frequencies=frequencies[,-c(6,10,15,19,25,32)] 38 | 39 | # plot frequencies 40 | df = melt(frequencies) 41 | df$state = factor(df$state, levels = c("HC-NI", "UC-NI", "UC-I", "PSC-NI", "PSC-I")) 42 | df$variable = factor(df$variable, levels = c("Absorptive_enterocyte","Immature_enterocyte","BEST4_enterocyte","DUOX2_enterocyte","Absorptive_TA","PLCG2_TA","Ribo_TA","REG_TA","Cycling_TA","Stem","Enteroendocrine","Immature_goblet","Goblet","Tuft","MAST","glia","WNT2B.","WNT5B.","RSPO3.","Inflammatory_fibroblast","myofibroblasts","pericytes","endothelial","IgG_plasma","IgA_plasma","IgM_plasma","Cycling_B","GC","Follicular_B","CD8T","T_activated_FOS_low","T_activated_FOS_high","CD4_T_memory", "Treg","Cycling_T","APC")) 43 | ggplot(df, aes(x=state, y=value, fill=state)) + 44 | geom_boxplot(outlier.size = 1) + 45 | facet_wrap( ~ variable, scales="free") + 46 | theme_bw(base_size = 14) + 47 | scale_fill_brewer(palette="Set1") 48 | 49 | #ggsave("Results/Figures/frequencies.pdf", width = 33, height = 19) 50 | 51 | # fido's pibble model https://jsilve24.github.io/fido/articles/introduction-to-fido.html 52 | # create counts matrix with state 53 | counts=as.data.frame.matrix(table(data$Final_HTO, data$celltypes)) 54 | counts$disease = sapply(strsplit(rownames(counts),"-"), `[`, 1) 55 | counts$inflammation = sapply(strsplit(rownames(counts),"-"), `[`, 2) 56 | counts$state = apply(counts[ ,c(43,44)] , 1 , paste , collapse = "-" ) 57 | counts = counts[,-c(43,44)] 58 | counts$state = as.factor(counts$state) 59 | #counts$state = relevel(counts$state, ref = "UC-I") #to set reference, otherwise HC-NI is default 60 | 61 | # remove doublets and MT-Hi 62 | counts=counts[,-c(6,10,15,19,25,32)] 63 | 64 | # create priors 65 | X = t(model.matrix(~state, data=counts)) 66 | X[,1:5] 67 | Y = t(counts[,-37]) 68 | Y[1:5,1:5] 69 | upsilon <- nrow(Y)+3 70 | theta <- matrix(0, nrow(Y)-1, nrow(X)) 71 | gamma <- diag(nrow(X)) 72 | G <- cbind(diag(nrow(Y)-1), -1) 73 | Xi <- (upsilon-nrow(Y))*G%*%diag(nrow(Y))%*%t(G) 74 | priors <- pibble(Y, X, upsilon, theta, gamma, Xi) 75 | print(priors) 76 | 77 | # change model to clr 78 | priors <- to_clr(priors) 79 | summary(priors, pars="Lambda") 80 | names_covariates(priors) <- rownames(X) 81 | 82 | #plot log probabilites for each state 83 | plot(priors, par="Lambda") + ggplot2::xlim(c(-10, 10)) 84 | 85 | #check if model fits data (checked by Johannes) 86 | priors$Y <- Y 87 | posterior <- refit(priors, optim_method="adam") 88 | ppc(posterior) + ggplot2::coord_cartesian(ylim=c(0, 500)) #I changed axis from 30000 to 500 for visualization 89 | ppc_summary(posterior) 90 | ppc(posterior, from_scratch=TRUE) +ggplot2::coord_cartesian(ylim=c(0, 500)) 91 | ppc_summary(posterior, from_scratch=TRUE) 92 | 93 | # look at posterior distribution of regression parameters 94 | posterior_summary <- summary(posterior, pars="Lambda")$Lambda 95 | 96 | # focus on coordinates with non-zero effect ("significant") for UC-I 97 | posterior_summary <- filter(posterior_summary, covariate=="stateUC-I") 98 | focus <- posterior_summary[sign(posterior_summary$p2.5) == sign(posterior_summary$p97.5),] 99 | focus <- unique(focus$coord) 100 | plot(posterior, par="Lambda", focus.coord = focus, focus.cov = rownames(X)[4]) + geom_vline(xintercept=0, linetype="dashed", color="red") 101 | 102 | # create list of significant celltypes for each state 103 | posterior_summary <- summary(posterior, pars="Lambda")$Lambda 104 | state_focus_taxa <- vector("list", length(rownames(X)[2:5])) 105 | names(state_focus_taxa) <- rownames(X)[2:5] 106 | for(state in rownames(X)[2:5]) { 107 | post_summary <- filter(posterior_summary, covariate %in% state) 108 | focus <- post_summary[sign(post_summary$p2.5) == sign(post_summary$p97.5),] 109 | focus <- unique(focus$coord) 110 | state_focus_taxa[[state]] <- focus 111 | } 112 | 113 | # create square plot for differences 114 | mat_state <- diag(length(rownames(X)[2:5])) 115 | rownames(mat_state) <- colnames(mat_state) <- rownames(X)[2:5] 116 | state_pairs <- data.frame(t(combn(rownames(X)[2:5], 2))) 117 | for(pair in 1:nrow(state_pairs)){ 118 | print(pair) 119 | mat_state[state_pairs[pair,]$X1, state_pairs[pair,]$X2] <- length(setdiff(state_focus_taxa[[state_pairs[pair,]$X1]], state_focus_taxa[[state_pairs[pair,]$X2]])) 120 | mat_state[state_pairs[pair,]$X2, state_pairs[pair,]$X1] <- length(setdiff(state_focus_taxa[[state_pairs[pair,]$X2]], state_focus_taxa[[state_pairs[pair,]$X1]])) 121 | } 122 | corrplot::corrplot(mat_state, method="square", is.corr=F, addCoef.col = "black", diag = F, tl.col="black", cl.pos="n") 123 | 124 | # create square plot for agreement 125 | mat_state <- diag(length(rownames(X)[2:5])) 126 | rownames(mat_state) <- colnames(mat_state) <- rownames(X)[2:5] 127 | state_pairs <- data.frame(t(combn(rownames(X)[2:5], 2))) 128 | for(pair in 1:nrow(state_pairs)){ 129 | print(pair) 130 | mat_state[state_pairs[pair,]$X1, state_pairs[pair,]$X2] <- length(intersect(state_focus_taxa[[state_pairs[pair,]$X1]], state_focus_taxa[[state_pairs[pair,]$X2]])) 131 | mat_state[state_pairs[pair,]$X2, state_pairs[pair,]$X1] <- length(intersect(state_focus_taxa[[state_pairs[pair,]$X2]], state_focus_taxa[[state_pairs[pair,]$X1]])) 132 | } 133 | corrplot::corrplot(mat_state, method="square", type = "lower", is.corr=F, addCoef.col = "black", diag = F, tl.col="black", cl.pos="n") 134 | 135 | 136 | -------------------------------------------------------------------------------- /PSC/Celltype_new.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(Seurat) 3 | library(patchwork) 4 | library(ggplot2) 5 | library(readr) 6 | library(tidyr) 7 | library(MAST) 8 | library(readxl) 9 | library(openxlsx) 10 | 11 | data<- readRDS("Data/PSC_202002_integrated_v2_noribo.rds") 12 | data=UpdateSeuratObject(data) 13 | DimPlot(data, label = T) + NoLegend() 14 | 15 | data@meta.data$disease = sapply(strsplit(data$Final_HTO,"-"), `[`, 1) 16 | data@meta.data$inflammation = sapply(strsplit(data$Final_HTO,"-"), `[`, 2) 17 | data@meta.data$sample = sapply(strsplit(data$Final_HTO,"-"), `[`, 3) 18 | data@meta.data$state <- paste(data$disease, data$inflammation, sep='-') 19 | 20 | # recluster subset 21 | # epi = average expression EPCAM > 5 22 | epi<-subset(data, idents = c("6","7","17","19","0","14","10","3","21")) 23 | DefaultAssay(epi)<-"integrated" 24 | epi<- RunPCA(epi) 25 | epi <- RunUMAP(epi, dims = 1:30) 26 | epi<-FindNeighbors(epi, dims = 1:30) 27 | epi<-FindClusters(epi, resolution = 0.4) 28 | DimPlot(epi, label=T) 29 | DefaultAssay(epi)<-"RNA" 30 | #markers_epi <- FindAllMarkers(epi, only.pos = TRUE, min.pct = 0.25) 31 | #selected_markers_epi <- markers_epi %>% group_by(cluster) %>% top_n(n = 3, wt = avg_logFC) 32 | new.ident=c("Immature_enterocyte", "Cycling_TA", "Absorptive_enterocyte", "Ribo_TA","Stem", "MT-Hi-enterocyte", "BEST4_enterocyte", "Immature_goblet", "DUOX2_enterocyte", "PLCG2_TA", "Absorptive_TA", "Tuft", "REG_TA","Enteroendocrine", "Goblet", "MAST") 33 | names(new.ident) <- levels(epi) 34 | epi=RenameIdents(epi,new.ident) 35 | DimPlot(epi, label=T) 36 | epi[["celltypes"]] <- Idents(object = epi) 37 | meta_epi<-epi@meta.data 38 | 39 | # leuko = average expression PTPRC (CD45) > 5 40 | leuko<-subset(data, idents = c("2","11","18","24","27","15","4","26")) 41 | DefaultAssay(leuko)<-"integrated" 42 | leuko<- RunPCA(leuko) 43 | leuko <- RunUMAP(leuko, dims = 1:30) 44 | leuko<-FindNeighbors(leuko, dims = 1:30) 45 | leuko<-FindClusters(leuko, resolution = 0.8) 46 | DimPlot(leuko, label=T) 47 | DefaultAssay(leuko)<-"RNA" 48 | #markers_leuko <- FindAllMarkers(leuko, only.pos = TRUE, min.pct = 0.25) 49 | #selected_markers_leuko <- markers_leuko %>% group_by(cluster) %>% top_n(n = 3, wt = avg_logFC) 50 | new.idents<-c("Follicular_B", "T_activated_FOS_low", "Treg", "CD4_T_memory", "MT_Hi_T", "CD8T", "APC", "CD8T", "T_activated_FOS_high", "Follicular_B", "APC", "MT_Hi_B", "Doublets", "GC", "MT_Hi_T", "Cycling_B", "APC", "Cycling_T", "Doublets") 51 | names(new.idents) <- levels(leuko) 52 | leuko=RenameIdents(leuko,new.idents) 53 | DimPlot(leuko, label=T) 54 | leuko[["celltypes"]] <- Idents(object = leuko) 55 | meta_leuko<-leuko@meta.data 56 | 57 | # stromal = average expression THY1 > 0.4 (fibroblast), SOX10 > 2 (glia), MADCAM1 > 1 (endotheel) 58 | stromal<-subset(data, idents = c("9", "13", "20", "22", "25")) 59 | DefaultAssay(stromal)<-"integrated" 60 | stromal<- RunPCA(stromal) 61 | stromal <- RunUMAP(stromal, dims = 1:30) 62 | stromal <-FindNeighbors(stromal, dims = 1:30) 63 | stromal <-FindClusters(stromal, resolution = 0.4) 64 | DimPlot(stromal, label=T) 65 | DefaultAssay(stromal)<-"RNA" 66 | #markers_stromal <- FindAllMarkers(stromal, only.pos = TRUE) 67 | new.idents<-c("WNT5B+", "Inflammatory_fibroblast", "RSPO3+", "WNT2B+", "endothelial", "endothelial", "Mt-Hi_stromal", "pericytes", "glia", "myofibroblasts", "endothelial", "Doublets") 68 | names(new.idents) <- levels(stromal) 69 | stromal=RenameIdents(stromal,new.idents) 70 | DimPlot(stromal, label=T) 71 | stromal[["celltypes"]] <- Idents(object = stromal) 72 | DimPlot(stromal) 73 | meta_stromal<-stromal@meta.data 74 | 75 | # plasma = remaining cells 76 | plasma<-subset(data, idents = c("1", "16", "12", "23", "5", "8")) 77 | DefaultAssay(plasma)<-"integrated" 78 | plasma<- RunPCA(plasma) 79 | plasma <- RunUMAP(plasma, dims = 1:30) 80 | plasma <-FindNeighbors(plasma, dims = 1:30) 81 | plasma <-FindClusters(plasma, resolution = 0.25) 82 | DimPlot(plasma, label=T) 83 | DefaultAssay(plasma)<-"RNA" 84 | #markers_plasma <- FindAllMarkers(plasma, only.pos = TRUE) 85 | new.idents<-c("IgA_plasma", "IgA_plasma", "IgG_plasma", "IgA_plasma", "IgG_plasma", "MT-Hi_plasma", "IgM_plasma", "IgA_plasma", "IgM_plasma") 86 | names(new.idents) <- levels(plasma) 87 | plasma=RenameIdents(plasma,new.idents) 88 | DimPlot(plasma, label=T) 89 | plasma[["celltypes"]] <- Idents(object = plasma) 90 | DimPlot(plasma) 91 | meta_plasma<-plasma@meta.data 92 | 93 | # create meta_all 94 | colnames(meta_stromal) 95 | colnames(meta_leuko) 96 | colnames(meta_plasma) 97 | colnames(meta_epi) 98 | meta_plasma<-meta_plasma[c(1,29)] 99 | meta_leuko<-meta_leuko[c(1,29)] 100 | meta_stromal<-meta_stromal[c(1,29)] 101 | meta_epi <- meta_epi[c(1,29)] 102 | x<-rbind(meta_stromal, meta_leuko) 103 | x<-rbind(x, meta_plasma) 104 | x<-rbind(x, meta_epi) 105 | write.csv(x, "Data/meta_all.csv") 106 | 107 | # add celltypes 108 | CellsMeta<-data.frame(data@meta.data) 109 | x$NAME<-rownames(x) 110 | CellsMeta$NAME<-row.names(CellsMeta) 111 | x<-x[,c(2,3)] 112 | row.names(CellsMeta)=NULL 113 | row.names(x)<-NULL 114 | keeping.order <- function(data, fn, ...) { 115 | col <- ".sortColumn" 116 | data[,col] <- 1:nrow(data) 117 | out <- fn(data, ...) 118 | if (!col %in% colnames(out)) stop("Ordering column not preserved by function") 119 | out <- out[order(out[,col]),] 120 | out[,col] <- NULL 121 | out 122 | } 123 | CellsMeta<-keeping.order(CellsMeta, merge, y=x, by = "NAME", all=T) 124 | CellsMeta<-CellsMeta[,c(1,29)] 125 | rownames(CellsMeta)<-CellsMeta$NAME 126 | data<-AddMetaData(data, CellsMeta) 127 | Idents(data)=data$celltypes 128 | DimPlot(data, label = T, repel = T) 129 | 130 | data = subset(data, subset = celltypes != "MT_Hi_T") 131 | data = subset(data, subset = celltypes != "Doublets") 132 | data = subset(data, subset = celltypes != "MT_Hi_B") 133 | data = subset(data, subset = celltypes != "MT-Hi_plasma") 134 | data = subset(data, subset = celltypes != "Mt-Hi_stromal") 135 | data = subset(data, subset = celltypes != "MT-Hi-enterocyte") 136 | 137 | Idents(data)=data$celltypes 138 | DimPlot(data, label = T, repel = T) 139 | 140 | DefaultAssay(data)<-"integrated" 141 | data <- RunUMAP(data, dims = 1:30) 142 | data <-FindNeighbors(data, dims = 1:30) 143 | data<-FindClusters(data, resolution = 0.4) 144 | 145 | Idents(data)=data$celltypes 146 | DimPlot(data, label = T, repel = T, order = c("T_activated_FOS_low", "Absorptive_enterocyte", "IgA_plasma", "WNT5B+", "Aborptive_TA")) + NoLegend() 147 | 148 | data = subset(data, subset = sample != "3296") 149 | 150 | saveRDS(data, "Data/PSC_processed_oct.rds") 151 | 152 | # create figures -------------------------------------------------------------------------------- /Vedo2/step_5_Vedo2_celltyping_biop_log.R: -------------------------------------------------------------------------------- 1 | # general stuff for sc data 2 | 3 | library(dplyr) 4 | library(Seurat) 5 | library(patchwork) 6 | library(ggplot2) 7 | library(readr) 8 | library(tidyr) 9 | library(MAST) 10 | library(readxl) 11 | library(openxlsx) 12 | 13 | # Load in database 14 | data <- readRDS("Data/batch1_integrated/vedo2_batch1_biop_integrated_noribo_nonorm_withVEDO01.rds") 15 | data = UpdateSeuratObject(object = data) 16 | DefaultAssay(data) = "RNA" 17 | 18 | #not normalized dataset 19 | DimPlot(data) 20 | 21 | #normalization 22 | data <- NormalizeData(data, verbose = FALSE) 23 | DimPlot(data, label = T) +NoLegend() 24 | 25 | #add metadata 26 | data$Final_HTO <- sub("V001_T0_ascendens_I", "V001-T0-ascendens-I-3191", data$Final_HTO) 27 | data$Final_HTO <- sub("V001_T0_transversum_NI", "V001-T0-transversum-NI-3191", data$Final_HTO) 28 | data$Final_HTO <- sub("V002-T0-colonasc-NI-3160", "V002-T0-ascendens-NI-3160", data$Final_HTO) 29 | data$Final_HTO <- sub("V005-T0-transvers-NI-3228", "V005-T0-transversum-NI-3228", data$Final_HTO) 30 | data$Final_HTO <- sub("V005-T0-sigmoid-NI-3228", "V005-T0-sigmoid-I-3228", data$Final_HTO) 31 | data@meta.data$patient = sapply(strsplit(data$Final_HTO,"-"), `[`, 1) 32 | data@meta.data$timepoint = sapply(strsplit(data$Final_HTO,"-"), `[`, 2) 33 | data@meta.data$location = sapply(strsplit(data$Final_HTO,"-"), `[`, 3) 34 | #data@meta.data$state <- paste(data$disease, data$inflammation, sep='-') 35 | data@meta.data$inflammation = sapply(strsplit(data$Final_HTO,"-"), `[`, 4) 36 | data@meta.data$sample = sapply(strsplit(data$Final_HTO,"-"), `[`, 5) 37 | 38 | table(data$inflammation, data$Final_HTO) 39 | 40 | #Setting cluster resolution (used on "integrated" assay) 41 | DefaultAssay(data) <- "integrated" 42 | data <- FindClusters(data, resolution = 0.7) 43 | DimPlot(data, label = T) 44 | data <- RunTSNE(data, reduction = "pca", dims = 1:5, seed.use = 1, tsne.method = "Rtsne", dim.embed = 2, reduction.name = "tsne", reduction.key = "tSNE_") 45 | DimPlot(data, reduction = "tsne", label = T) 46 | 47 | # recluster subset 48 | # epi = average expression EPCAM > 4 49 | AverageExpression(data, features = "EPCAM", assays = "RNA") 50 | epi<-subset(data, idents = c("3", "5", "7", "12", "14", "16", "19", "27", "28")) 51 | DefaultAssay(epi)<-"integrated" 52 | epi<- RunPCA(epi) 53 | epi <- RunUMAP(epi, dims = 1:30) 54 | epi<-FindNeighbors(epi, dims = 1:30) 55 | epi<-FindClusters(epi, resolution = 0.6) 56 | DimPlot(epi, label=T) 57 | DefaultAssay(epi)<-"RNA" 58 | FeaturePlot(epi, features = c("BEST4", "DUOX2", "MUC2")) 59 | 60 | #epi markers 61 | markers_epi <- FindAllMarkers(epi, only.pos = TRUE, min.pct = 0.25) 62 | write.csv(markers_epi, "Results/markergenes/markers_epi_log.csv") 63 | #to export only top (10/20/etc) gene markers 64 | selected_markers_epi <- markers_epi %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC) 65 | write.csv(selected_markers_epi, "Results/markergenes/selected_markers_epi_log.csv") 66 | 67 | # leuko = average expression PTPRC (CD45) > 3 68 | AverageExpression(data, features = "PTPRC", assays = "RNA") 69 | leuko<-subset(data, idents = c("6", "8", "11", "13","15","17","22","23","24","31")) 70 | DefaultAssay(leuko)<-"integrated" 71 | leuko<- RunPCA(leuko) 72 | leuko <- RunUMAP(leuko, dims = 1:30) 73 | leuko<-FindNeighbors(leuko, dims = 1:30) 74 | leuko<-FindClusters(leuko, resolution = 0.8) 75 | DimPlot(leuko, label=T) 76 | DefaultAssay(leuko)<-"RNA" 77 | FeaturePlot(leuko, features = c("CTSG", "IL17A")) 78 | 79 | #leuko markers 80 | markers_leuko <- FindAllMarkers(leuko, only.pos = TRUE, min.pct = 0.25) 81 | write.csv(markers_leuko, "Results/markergenes/markers_leuko_log.csv") 82 | selected_markers_leuko <- markers_leuko %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC) 83 | write.csv(selected_markers_leuko, "Results/markergenes/selected_markers_leuko_log.csv") 84 | 85 | 86 | # stromal = average expression THY1 > 1 (fibroblast), SOX10 > 2 (glia), MADCAM1 > 1 (endotheel) 87 | AverageExpression(data, features = "THY1", assays = "RNA") 88 | AverageExpression(data, features = "SOX10", assays = "RNA") 89 | AverageExpression(data, features = "MADCAM1", assays = "RNA") 90 | stromal<-subset(data, idents = c("1", "9", "20", "25", "26", "29")) 91 | DefaultAssay(stromal)<-"integrated" 92 | stromal<- RunPCA(stromal) 93 | stromal <- RunUMAP(stromal, dims = 1:30) 94 | stromal <-FindNeighbors(stromal, dims = 1:30) 95 | stromal <-FindClusters(stromal, resolution = 0.4) 96 | DimPlot(stromal, label=T) 97 | DefaultAssay(stromal)<-"RNA" 98 | FeaturePlot(stromal, features = c("WNT2B", "WNT5B", "MADCAM1", "RSPO3", "SOX6")) 99 | 100 | #subcluster again (only clusters 1,5 and 6)to find IAFs 101 | fibro <- subset(stromal, idents = c("1", "5", "6")) 102 | DefaultAssay(fibro)<-"integrated" 103 | fibro <- RunPCA(fibro) 104 | fibro <- RunUMAP(fibro, dims = 1:30) 105 | fibro <-FindNeighbors(fibro, dims = 1:30) 106 | fibro <-FindClusters(fibro, resolution = 0.4) 107 | DimPlot(fibro, label=T) 108 | DefaultAssay(fibro)<-"RNA" 109 | FeaturePlot(fibro, features = c("WNT2B", "WNT5B", "WNT5A", "SOX6", "RSPO3", "TNFRSF11B", "IL11", "IL13RA2", "CHI3L1", "IL6","CXCL3", "IL33", "MMP3", "CCL19"), cols = c("grey", "red"), min.cutoff = "q10", max.cutoff = "q90") 110 | FeaturePlot(fibro, features = c("IL11", "IL13RA2", "CHI3L1"), cols = c("grey", "red"), split.by = "inflammation",min.cutoff = "q10", max.cutoff = "q90") 111 | DotPlot(fibro, features = c("WNT2B", "WNT5B", "WNT5A", "SOX6", "RSPO3", "TNFRSF11B", "IL11", "IL13RA2", "CHI3L1", "IL6","CXCL3", "IL33", "MMP3", "CCL19"), cols = c("grey", "red"), dot.scale = 6) + RotatedAxis() + coord_flip() 112 | DotPlot(fibro, features = c("WNT2B", "WNT5B", "WNT5A", "SOX6", "RSPO3", "TNFRSF11B", "IL11", "IL13RA2", "CHI3L1", "IL6","CXCL3", "IL33", "MMP3", "CCL19"), cols = c("green", "blue"),split.by = "timepoint", dot.scale = 6) + RotatedAxis() + coord_flip() 113 | 114 | #stromal markers 115 | markers_stromal <- FindAllMarkers(stromal, only.pos = TRUE, min.pct = 0.25) 116 | write.csv(markers_stromal, "Results/markergenes/markers_stromal_log.csv") 117 | selected_markers_stromal <- markers_stromal %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC) 118 | write.csv(selected_markers_stromal, "Results/markergenes/selected_markers_stromal_log.csv") 119 | 120 | 121 | # plasma = remaining cells 122 | plasma<-subset(data, idents = c("0", "2", "4", "10", "18", "21", "30")) 123 | DefaultAssay(plasma)<-"integrated" 124 | plasma<- RunPCA(plasma) 125 | plasma <- RunUMAP(plasma, dims = 1:30) 126 | plasma <-FindNeighbors(plasma, dims = 1:30) 127 | plasma <-FindClusters(plasma, resolution = 0.1) 128 | DimPlot(plasma, label=T) 129 | DefaultAssay(plasma)<-"RNA" 130 | FeaturePlot(plasma, features = c("IGHG1", "IGHA1", "IGHM")) 131 | 132 | #plasma markers 133 | markers_plasma <- FindAllMarkers(plasma, only.pos = TRUE, min.pct = 0.25) 134 | write.csv(markers_plasma, "Results/markergenes//markers_plasma_log.csv") 135 | selected_markers_plasma <- markers_plasma %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC) 136 | write.csv(selected_markers_plasma, "Results/markergenes/selected_markers_plasma_log.csv") 137 | 138 | -------------------------------------------------------------------------------- /DDTx/find_markers_and_analyze_proportions.R: -------------------------------------------------------------------------------- 1 | ## script for marker finding and proportion analysis DDTx project 2 | # May 16 2023 3 | # WTC 4 | 5 | # open filtered dataset 6 | library(Seurat) 7 | data<-readRDS("/source/ddtx_merged_demultiplexed_clustered_compartment_azi_elmentaiteadultileum_below60pctmito_withoutEpithelialR.rds") 8 | 9 | #Vlnplots to check donor/recipient cellorigin 10 | library(ggplot2) 11 | VlnPlot(data, "CCR6", assay="RNA", group.by="compartment_final", split.by="donor_recipient") # CCR6 higher in donor immune cells (as expected) 12 | ggsave("CCR6_donor_recipient.png", width = 20, height = 5, dpi = 600) 13 | 14 | VlnPlot(data, "SELL", assay="RNA", group.by="compartment_final", split.by="donor_recipient") # CD62L higher in recipient immune cells (as expected) 15 | ggsave("SELL_donor_recipient.png", width = 20, height = 5, dpi = 600) 16 | 17 | #find general markers for donor and recipient, all cells and datapoints taken together 18 | Idents(data)<-"donor_recipient" 19 | markers_data <- FindAllMarkers(data, only.pos = TRUE, min.pct = 0.25) 20 | write.csv(markers_data, "markers_donor_recipient_mito_epi_filtered_data.csv") 21 | 22 | #find general markers per cell type, all participants and datapoints taken together 23 | Idents(data)<-"predicted.celltype.elmentaiteadultileum" 24 | markers_data <- FindAllMarkers(data, only.pos = TRUE, min.pct = 0.25) 25 | write.csv(markers_data, "markers_allcelltypes_mito_epi_filtered_data.csv") 26 | 27 | #downsampling to equal cell numbers per sample for cell proportion analysis 28 | library(Seurat) 29 | Idents(data)<-"sample" 30 | data_1949<-subset(data, downsample=1949) 31 | saveRDS(data_1949, "DDTX_sample_1949cells.rds") 32 | 33 | #generate CD4 T subsets 34 | 35 | ## find subsets T cells 36 | Idents(data)<-"predicted.celltype.elmentaiteadultileum" 37 | cd4T<-subset(data,idents="Activated CD4 T") 38 | DimPlot(cd4T) 39 | ggsave("cd4T_before_reclustering.png", width = 5, height = 5, dpi = 600) 40 | 41 | DefaultAssay(object = cd4T) <- "RNA" 42 | cd4T <- NormalizeData(cd4T, normalization.method = "LogNormalize", scale.factor = 10000) 43 | cd4T <- NormalizeData(cd4T) 44 | cd4T <- FindVariableFeatures(cd4T, selection.method = "vst", nfeatures = 2000) 45 | 46 | # Identify the 10 most highly variable genes 47 | top10 <- head(VariableFeatures(cd4T), 10) 48 | all.genes <- rownames(cd4T) 49 | cd4T <- ScaleData(cd4T, features = all.genes) 50 | cd4T <- RunPCA(cd4T, features = VariableFeatures(object = cd4T)) 51 | ElbowPlot(cd4T) 52 | ggsave("Elbowplot_cd4T_after_reclustering.png", width = 5, height = 5, dpi = 600) 53 | 54 | cd4T <- FindNeighbors(cd4T, dims = 1:10) 55 | cd4T <- FindClusters(cd4T, resolution = 0.4) 56 | head(Idents(cd4T), 5) 57 | cd4T <- RunUMAP(cd4T, dims = 1:10) 58 | DimPlot(cd4T, reduction = "umap") 59 | ggsave("cd4T_after_reclustering.png", width = 5, height = 5, dpi = 600) 60 | markers_cd4T <- FindAllMarkers(cd4T, only.pos = TRUE, min.pct = 0.25) 61 | write.csv(markers_cd4T, "markers_cd4T_mito_epi_filtered_data.csv") 62 | FeaturePlot(cd4T, c("IL17A")) 63 | ggsave("cd4T_after_reclustering_il17.png", width = 5, height = 5, dpi = 600) 64 | 65 | ## markers for different timepoints epithelial cells in patient 1,2,3 66 | Idents(data)<-"compartment_final" 67 | epi<-subset(data, ident="Epithelial") 68 | Idents(epi)<-"patient" 69 | epi_pt3<-subset(epi, ident="UMCGDDtx00005") 70 | Idents(epi_pt3)<-"Timepoint_days" 71 | markers_epi_pt3<-FindAllMarkers(epi_pt3, only.pos=T) 72 | write.csv(markers_epi_pt3, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/DE/markers_epi_pt3_timepoints.csv") 73 | 74 | epi_pt2<-subset(epi, ident="UMCGDDtx00004") 75 | Idents(epi_pt2)<-"Timepoint_days" 76 | markers_epi_pt2<-FindAllMarkers(epi_pt2, only.pos=T) 77 | write.csv(markers_epi_pt2, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/DE/markers_epi_pt2_timepoints.csv") 78 | 79 | epi_pt1<-subset(epi, ident="UMCGDDtx00003") 80 | Idents(epi_pt1)<-"Timepoint_days" 81 | markers_epi_pt1<-FindAllMarkers(epi_pt1, only.pos=T) 82 | write.csv(markers_epi_pt1, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/DE/markers_epi_pt1_timepoints.csv") 83 | 84 | ## find endothelial markers 85 | Idents(data)<-"major_celltype" 86 | endo<-subset(data, ident="Endothelial") 87 | Idents(endo)<-"patient" 88 | endo_pt3<-subset(endo, ident="UMCGDDtx00005") 89 | Idents(endo_pt3)<-"Timepoint_days" 90 | markers_endo_pt3<-FindAllMarkers(endo_pt3, only.pos=T) 91 | write.csv(markers_endo_pt3, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/DE/markers_endo_pt3_timepoints.csv") 92 | 93 | endo_pt2<-subset(endo, ident="UMCGDDtx00004") 94 | Idents(endo_pt2)<-"Timepoint_days" 95 | markers_endo_pt2<-FindAllMarkers(endo_pt2, only.pos=T) 96 | write.csv(markers_endo_pt2, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/DE/markers_endo_pt2_timepoints.csv") 97 | 98 | endo_pt1<-subset(endo, ident="UMCGDDtx00003") 99 | Idents(endo_pt1)<-"Timepoint_days" 100 | markers_endo_pt1<-FindAllMarkers(endo_pt1, only.pos=T) 101 | write.csv(markers_endo_pt1, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/DE/markers_endo_pt1_timepoints.csv") 102 | 103 | ## find CD4T markers 104 | Idents(data)<-"predicted.celltype.elmentaiteadultileum" 105 | CD4<-subset(data, ident="Activated CD4 T") 106 | Idents(CD4)<-"patient" 107 | CD4_pt3<-subset(CD4, ident="UMCGDDtx00005") 108 | Idents(CD4_pt3)<-"Timepoint_days" 109 | markers_CD4_pt3<-FindAllMarkers(CD4_pt3, only.pos=T) 110 | write.csv(markers_CD4_pt3, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/DE/markers_CD4_pt3_timepoints.csv") 111 | 112 | CD4_pt2<-subset(CD4, ident="UMCGDDtx00004") 113 | Idents(CD4_pt2)<-"Timepoint_days" 114 | markers_CD4_pt2<-FindAllMarkers(CD4_pt2, only.pos=T) 115 | write.csv(markers_CD4_pt2, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/DE/markers_CD4_pt2_timepoints.csv") 116 | 117 | CD4_pt1<-subset(CD4, ident="UMCGDDtx00003") 118 | Idents(CD4_pt1)<-"Timepoint_days" 119 | markers_CD4_pt1<-FindAllMarkers(CD4_pt1, only.pos=T) 120 | write.csv(markers_CD4_pt1, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/DE/markers_CD4_pt1_timepoints.csv") 121 | 122 | ## find CD8T markers 123 | CD8<-subset(data, ident="Activated CD8 T") 124 | Idents(CD8)<-"patient" 125 | CD8_pt3<-subset(CD8, ident="UMCGDDtx00005") 126 | Idents(CD8_pt3)<-"Timepoint_days" 127 | markers_CD8_pt3<-FindAllMarkers(CD8_pt3, only.pos=T) 128 | write.csv(markers_CD8_pt3, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/DE/markers_CD8_pt3_timepoints.csv") 129 | 130 | CD8_pt2<-subset(CD8, ident="UMCGDDtx00004") 131 | Idents(CD8_pt2)<-"Timepoint_days" 132 | markers_CD8_pt2<-FindAllMarkers(CD8_pt2, only.pos=T) 133 | write.csv(markers_CD8_pt2, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/DE/markers_CD8_pt2_timepoints.csv") 134 | 135 | CD8_pt1<-subset(CD8, ident="UMCGDDtx00003") 136 | Idents(CD8_pt1)<-"Timepoint_days" 137 | markers_CD8_pt1<-FindAllMarkers(CD8_pt1, only.pos=T) 138 | write.csv(markers_CD8_pt1, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/DE/markers_CD8_pt1_timepoints.csv") 139 | 140 | 141 | 142 | 143 | 144 | # check markers on gutcellatlas 145 | 146 | 147 | -------------------------------------------------------------------------------- /old_stuff/Import10X_gene-per-patient.R: -------------------------------------------------------------------------------- 1 | ## 10X import for Takeda RIPK2 2 | ## Cytotoxic cells 3 | ## 10X import for Takeda RIPK2 4 | ## Cytotoxic cells 5 | 6 | library(Seurat) 7 | 8 | ## Cytotoxic T cells 9 | 10 | Cytotox <- Read10X(data.dir = "/Users/festeneam/Downloads/filtered_matrices_mex_cytotox/hg19") 11 | Cytotox_T <- CreateSeuratObject(raw.data = Cytotox, min.cells = 3, 12 | min.genes = 200, 13 | project = "10X Project", 14 | names.delim = "-", 15 | names.field = 2) 16 | 17 | Cytotox_T <- SetIdent(Cytotox_T, ident.use = c("Cyto")) 18 | 19 | mito.genes <- grep(pattern = "^MT-", x = rownames(x = Cytotox_T@data), value = TRUE) 20 | 21 | # Get TSS normalized mitochodrial counts 22 | col.total.counts <- Matrix::colSums(expm1(Cytotox_T@data)) 23 | percent.mito <- Matrix::colSums(Cytotox_T@raw.data[mito.genes,])/Matrix::colSums(Cytotox_T@raw.data) 24 | 25 | Cytotox_T <- AddMetaData(object = Cytotox_T, metadata = percent.mito, col.name = "percent.mito") 26 | 27 | Cytotox_T <- NormalizeData(object = Cytotox_T, normalization.method = "LogNormalize", 28 | scale.factor = 1e4) 29 | 30 | Cytotox_T <- ScaleData(Cytotox_T) 31 | Cytotox_T <- RunPCA(object = Cytotox_T, pc.genes=rownames(Cytotox_T@data), 32 | do.print = TRUE, 33 | pcs.print = 1:5, 34 | genes.print = 5) 35 | 36 | PrintPCA(object = Cytotox_T, pcs.print = 1:5, genes.print = 5, use.full = FALSE) 37 | 38 | Cytotox_T <- RunTSNE(object = Cytotox_T, dims.use = 1:10, do.fast = TRUE) 39 | TSNEPlot(object = Cytotox_T) 40 | 41 | VlnPlot(Cytotox_T, c("ZBTB7B","CD8B", "GZMA","RIPK2"), nCol = 4) 42 | 43 | FeaturePlot(Cytotox_T, c("ZBTB7B","CD8B", "GZMA","RIPK2"), cols.use = c("lightgrey","deeppink"), 44 | pt.size = 1) 45 | 46 | Cytotox_T <- FindClusters(object = Cytotox_T, reduction.type = "pca", 47 | dims.use = 1:10, 48 | resolution = 0.6, 49 | print.output = 0, 50 | save.SNN = TRUE) 51 | ## Monocytes 52 | 53 | CD14 <- Read10X(data.dir = "/Users/festeneam/Downloads/filtered_matrices_mex_CD14/hg19") 54 | CD14_mon <- CreateSeuratObject(raw.data = CD14, min.cells = 3, 55 | min.genes = 200, 56 | project = "10X Project", 57 | names.delim = "-", 58 | names.field = 2) 59 | 60 | CD14_mon <- SetIdent(CD14_mon, ident.use = c("CD14")) 61 | 62 | mito.genes <- grep(pattern = "^MT-", x = rownames(x = CD14_mon@data), value = TRUE) 63 | 64 | # Get TSS normalized mitochodrial counts 65 | col.total.counts <- Matrix::colSums(expm1(CD14_mon@data)) 66 | percent.mito <- Matrix::colSums(CD14_mon@raw.data[mito.genes,])/Matrix::colSums(CD14_mon@raw.data) 67 | 68 | CD14_mon <- AddMetaData(object = CD14_mon, metadata = percent.mito, col.name = "percent.mito") 69 | 70 | CD14_mon <- NormalizeData(object = CD14_mon, normalization.method = "LogNormalize", 71 | scale.factor = 1e4) 72 | 73 | CD14_mon <- ScaleData(CD14_mon) 74 | CD14_mon <- RunPCA(object = CD14_mon, pc.genes=rownames(CD14_mon@data), 75 | do.print = TRUE, 76 | pcs.print = 1:5, 77 | genes.print = 5) 78 | 79 | PrintPCA(object = CD14_mon, pcs.print = 1:5, genes.print = 5, use.full = FALSE) 80 | 81 | CD14_mon <- RunTSNE(object = CD14_mon, dims.use = 1:10, do.fast = TRUE) 82 | TSNEPlot(object = CD14_mon) 83 | 84 | VlnPlot(CD14_mon, c("CD14","SELL", "HLA-DRA","RIPK2"), nCol = 4) 85 | 86 | FeaturePlot(CD14_mon, c("CD14","SELL", "HLA-DRA","RIPK2"), cols.use = c("lightgrey","deeppink"), 87 | pt.size = 1) 88 | 89 | CD14_mon <- FindClusters(object = CD14_mon, reduction.type = "pca", 90 | dims.use = 1:10, 91 | resolution = 0.6, 92 | print.output = 0, 93 | save.SNN = TRUE) 94 | 95 | ## NK cells 96 | 97 | CD56 <- Read10X(data.dir = "/Users/festeneam/Downloads/filtered_matrices_mex_CD56/hg19") 98 | CD56_mon <- CreateSeuratObject(raw.data = CD56, min.cells = 3, 99 | min.genes = 200, 100 | project = "10X Project", 101 | names.delim = "-", 102 | names.field = 2) 103 | 104 | CD56_mon <- SetIdent(CD56_mon, ident.use = c("NK")) 105 | 106 | mito.genes <- grep(pattern = "^MT-", x = rownames(x = CD56_mon@data), value = TRUE) 107 | 108 | # Get TSS normalized mitochodrial counts 109 | col.total.counts <- Matrix::colSums(expm1(CD56_mon@data)) 110 | percent.mito <- Matrix::colSums(CD56_mon@raw.data[mito.genes,])/Matrix::colSums(CD56_mon@raw.data) 111 | 112 | CD56_mon <- AddMetaData(object = CD56_mon, metadata = percent.mito, col.name = "percent.mito") 113 | 114 | CD56_mon <- NormalizeData(object = CD56_mon, normalization.method = "LogNormalize", 115 | scale.factor = 1e4) 116 | 117 | CD56_mon <- ScaleData(CD56_mon) 118 | CD56_mon <- RunPCA(object = CD56_mon, pc.genes=rownames(CD56_mon@data), 119 | do.print = TRUE, 120 | pcs.print = 1:5, 121 | genes.print = 5) 122 | 123 | PrintPCA(object = CD56_mon, pcs.print = 1:5, genes.print = 5, use.full = FALSE) 124 | 125 | CD56_mon <- RunTSNE(object = CD56_mon, dims.use = 1:10, do.fast = TRUE) 126 | TSNEPlot(object = CD56_mon) 127 | 128 | VlnPlot(CD56_mon, c("NCAM1","FCGR3A", "ITGAM","RIPK2"), nCol = 4) 129 | 130 | FeaturePlot(CD56_mon, c("NCAM1","FCGR3A", "ITGAM","RIPK2"), cols.use = c("lightgrey","deeppink"), 131 | pt.size = 1) 132 | 133 | CD56_mon <- FindClusters(object = CD56_mon, reduction.type = "pca", 134 | dims.use = 1:10, 135 | resolution = 0.6, 136 | print.output = 0, 137 | save.SNN = TRUE) 138 | 139 | total <- MergeSeurat(NaiveCD4_T, CD14_mon, add.cell.id1 = "CD4_Naive", add.cell.id2 = "CD14") 140 | total2 <- MergeSeurat(total, CD56_mon, add.cell.id2 = "NK") 141 | TenX_cells <- MergeSeurat(total2, Cytotox_T, add.cell.id2 = "Cytotox") 142 | 143 | TenX_cells <- ScaleData(TenX_cells) 144 | TenX_cells <- RunPCA(object = TenX_cells, pc.genes=rownames(TenX_cells@data), 145 | do.print = TRUE, 146 | pcs.print = 1:5, 147 | genes.print = 5) 148 | 149 | PrintPCA(object = TenX_cells, pcs.print = 1:5, genes.print = 5, use.full = FALSE) 150 | 151 | TenX_cells <- RunTSNE(object = TenX_cells, dims.use = 1:10, do.fast = TRUE) 152 | TSNEPlot(object = TenX_cells) 153 | 154 | VlnPlot(TenX_cells, c("OSM","NOD2","OPRL1","RIPK2"), nCol = 4) 155 | 156 | FeaturePlot(TenX_cells, c("CCR7","CD160","GZMA","RIPK2"), cols.use = c("lightgrey","deeppink"), 157 | pt.size = 1) 158 | 159 | ## RIPK2 positivity 160 | # Table positive cells for a gene per patient 161 | 162 | RIPK2.pos <- TenX_cells@data["RIPK2",]>0 163 | RIPK2.pos <- 1*RIPK2.pos 164 | 165 | TenX_cells <- AddMetaData(TenX_cells, metadata = RIPK2.pos, col.name = "RIPK2.pos") 166 | TenX_cells <- AddMetaData(TenX_cells, metadata = TenX_cells@ident, col.name = "ident") 167 | 168 | 169 | -------------------------------------------------------------------------------- /Vedo2/Vedo001_preprep.R: -------------------------------------------------------------------------------- 1 | #Read dataframe 08 lane2 2 | dataset_10x<-Read10X("../VEDO2/Data/08_lane2/") 3 | dataset<-CreateSeuratObject(counts = dataset_10x$`Gene Expression`, project = "PSC") 4 | dataset[['HTO']]<-CreateAssayObject(counts = dataset_10x$`Antibody Capture`) 5 | dataset<-subset(dataset, nCount_RNA > 200) 6 | dataset <- NormalizeData(dataset, assay = "HTO", normalization.method = "CLR") 7 | dataset <- HTODemux(dataset, assay = "HTO", positive.quantile = 0.993) 8 | table(dataset$HTO_classification.global) 9 | table(dataset$HTO_maxID) 10 | HTOHeatmap(dataset, assay = "HTO", ncells = 1000) 11 | 12 | #demultiplex 13 | HTO <- dataset@meta.data[["HTO_classification"]] 14 | HTO_status <- dataset@meta.data[["HTO_classification.global"]] 15 | barcode <- dataset@assays[["RNA"]]@data@Dimnames[[2]] 16 | seurat_call <- data.frame(HTO, HTO_status, barcode) 17 | souporcell <- read.table(file = '../VEDO2/Data/clusters_08_lane2.tsv', sep = '\t', header = TRUE) 18 | souporcell <- select(souporcell, c("barcode", "status", "assignment")) 19 | All <- merge(seurat_call, souporcell, by = "barcode", all = F) 20 | rownames(All) <- All$barcode 21 | table(All$HTO, All$assignment) 22 | All$assignment[All$assignment == "0"]="UC-NI-3006" 23 | All$assignment[All$assignment == "2"]="PSC-NI-3267" 24 | All$assignment[All$assignment == "3"]="UC-I-3249" 25 | All$assignment[All$assignment == "4"]="PSC-NI-3191" 26 | All$status[All$status == "doublet"]="Doublet" 27 | All$status[All$status == "singlet"]="Singlet" 28 | All$status[All$status == "unassigned"]="Negative" 29 | for (i in 1:nrow(All)){ 30 | if (All[i,2] == "Negative" & All[i,5] != "1"){ 31 | All[i,2]<-All[i,5] 32 | } else { 33 | All[i,2]<-All[i,2] 34 | } 35 | } 36 | for (i in 1:nrow(All)){ 37 | if (All[i,3] == "Negative" & All[i,5] != "1"){ 38 | All[i,3]<-All[i,4] 39 | } else { 40 | All[i,3]<-All[i,3] 41 | } 42 | } 43 | 44 | Final_HTO <- All$HTO 45 | names(Final_HTO) <- rownames(All) 46 | dataset <- AddMetaData( 47 | object = dataset, 48 | metadata = Final_HTO, 49 | col.name = 'Final_HTO') 50 | Final_HTO_status <- All$HTO_status 51 | names(Final_HTO_status) <- rownames(All) 52 | dataset <- AddMetaData( 53 | object = dataset, 54 | metadata = Final_HTO_status, 55 | col.name = 'Final_HTO_status') 56 | table(dataset@meta.data[["Final_HTO_status"]], dataset@meta.data[["hash.ID"]]) 57 | table(dataset@meta.data[["Final_HTO"]], dataset@meta.data[["hash.ID"]]) 58 | 59 | #qc and preprocess 60 | DefaultAssay(dataset)<-"RNA" 61 | dataset[["percent.mt"]] <- PercentageFeatureSet(dataset, pattern = "^MT-") 62 | dataset<-subset(dataset, subset=percent.mt < 60) 63 | HTO_number_08lane2 <- sum(dataset$HTO_classification.global == "Singlet") 64 | souporcell_number_08lane2 <- sum(dataset$Final_HTO_status == "Singlet") 65 | Idents(dataset) <- "Final_HTO" 66 | dataset <- subset(dataset, idents = "PSC-NI-3191") 67 | Idents(dataset) <- "Final_HTO_status" 68 | dataset <- subset(dataset, idents = "Singlet") 69 | table(dataset@meta.data[["Final_HTO"]]) 70 | dataset<-RenameCells(dataset, add.cell.id="VEDO01_T0_NI") 71 | dataset@meta.data$lane<-"VEDO01_T0_NI" 72 | dataset<-SCTransform(dataset,vars.to.regress = "percent.mt", verbose = F) 73 | dataset <- RunPCA(dataset, verbose = FALSE) 74 | dataset <- RunUMAP(dataset, dims = 1:30, verbose = FALSE) 75 | dataset <- FindNeighbors(dataset, dims = 1:30, verbose = FALSE) 76 | dataset <- FindClusters(dataset, resolution = 0.2, verbose = FALSE) 77 | DimPlot(dataset) 78 | FeaturePlot(dataset, "IGHA2") 79 | FeaturePlot(dataset, "EPCAM") 80 | FeaturePlot(dataset, "CD3E") 81 | dataset@meta.data[["Final_HTO"]] <- "V001_T0_transversum_NI" 82 | table(dataset@meta.data[["Final_HTO"]]) 83 | saveRDS(dataset, file="../VEDO2/Data/VEDO01_T0_NI_sct.rds") 84 | 85 | #Read dataframe 09 lane1 86 | dataset_10x<-Read10X("../VEDO2/Data/09_lane1/") 87 | dataset<-CreateSeuratObject(counts = dataset_10x$`Gene Expression`, project = "PSC") 88 | dataset[['HTO']]<-CreateAssayObject(counts = dataset_10x$`Antibody Capture`) 89 | dataset<-subset(dataset, nCount_RNA > 200) 90 | dataset <- NormalizeData(dataset, assay = "HTO", normalization.method = "CLR") 91 | dataset <- HTODemux(dataset, assay = "HTO", positive.quantile = 0.91) 92 | table(dataset$HTO_classification.global) 93 | table(dataset$HTO_maxID) 94 | HTOHeatmap(dataset, assay = "HTO", ncells = 1000) 95 | 96 | #demultiplex 97 | HTO <- dataset@meta.data[["HTO_classification"]] 98 | HTO_status <- dataset@meta.data[["HTO_classification.global"]] 99 | barcode <- dataset@assays[["RNA"]]@data@Dimnames[[2]] 100 | seurat_call <- data.frame(HTO, HTO_status, barcode) 101 | souporcell <- read.table(file = '../VEDO2/Data/clusters_09_lane1.tsv', sep = '\t', header = TRUE) 102 | souporcell <- select(souporcell, c("barcode", "status", "assignment")) 103 | All <- merge(seurat_call, souporcell, by = "barcode", all = F) 104 | rownames(All) <- All$barcode 105 | table(All$HTO, All$assignment) 106 | All$assignment[All$assignment == "0"]="UC-NI-3107" 107 | All$assignment[All$assignment == "1"]="HC-NI-3083" 108 | All$assignment[All$assignment == "2"]="UC-NI-3195" 109 | All$assignment[All$assignment == "3"]="PSC-I-3191" 110 | All$status[All$status == "doublet"]="Doublet" 111 | All$status[All$status == "singlet"]="Singlet" 112 | All$status[All$status == "unassigned"]="Negative" 113 | for (i in 1:nrow(All)){ 114 | if (All[i,2] == "Negative"){ 115 | All[i,2]<-All[i,5] 116 | } else { 117 | All[i,2]<-All[i,2] 118 | } 119 | } 120 | for (i in 1:nrow(All)){ 121 | if (All[i,3] == "Negative"){ 122 | All[i,3]<-All[i,4] 123 | } else { 124 | All[i,3]<-All[i,3] 125 | } 126 | } 127 | 128 | Final_HTO <- All$HTO 129 | names(Final_HTO) <- rownames(All) 130 | dataset <- AddMetaData( 131 | object = dataset, 132 | metadata = Final_HTO, 133 | col.name = 'Final_HTO') 134 | Final_HTO_status <- All$HTO_status 135 | names(Final_HTO_status) <- rownames(All) 136 | dataset <- AddMetaData( 137 | object = dataset, 138 | metadata = Final_HTO_status, 139 | col.name = 'Final_HTO_status') 140 | table(dataset@meta.data[["Final_HTO_status"]], dataset@meta.data[["hash.ID"]]) 141 | table(dataset@meta.data[["Final_HTO"]], dataset@meta.data[["hash.ID"]]) 142 | 143 | #qc and preprocess 144 | DefaultAssay(dataset)<-"RNA" 145 | dataset[["percent.mt"]] <- PercentageFeatureSet(dataset, pattern = "^MT-") 146 | dataset<-subset(dataset, subset=percent.mt < 60) 147 | HTO_number_09lane1 <- sum(dataset$HTO_classification.global == "Singlet") 148 | souporcell_number_09lane1 <- sum(dataset$Final_HTO_status == "Singlet") 149 | Idents(dataset) <- "Final_HTO" 150 | dataset <- subset(dataset, idents = "PSC-I-3191") 151 | Idents(dataset) <- "Final_HTO_status" 152 | dataset <- subset(dataset, idents = "Singlet") 153 | table(dataset@meta.data[["Final_HTO"]]) 154 | dataset<-RenameCells(dataset, add.cell.id="VEDO01_T0_I") 155 | dataset@meta.data$lane<-"VEDO01_T0_I" 156 | dataset<-SCTransform(dataset,vars.to.regress = "percent.mt", verbose = F) 157 | dataset <- RunPCA(dataset, verbose = FALSE) 158 | dataset <- RunUMAP(dataset, dims = 1:30, verbose = FALSE) 159 | dataset <- FindNeighbors(dataset, dims = 1:30, verbose = FALSE) 160 | dataset <- FindClusters(dataset, resolution = 0.2, verbose = FALSE) 161 | DimPlot(dataset) 162 | FeaturePlot(dataset, "IGHG4") 163 | FeaturePlot(dataset, "EPCAM") 164 | dataset@meta.data[["Final_HTO"]] <- "V001_T0_ascendens_I" 165 | table(dataset@meta.data[["Final_HTO"]]) 166 | saveRDS(dataset, file="../VEDO2/Data/VEDO01_T0_I_sct.rds") 167 | 168 | -------------------------------------------------------------------------------- /Method_comparisons/SCT_integration.R: -------------------------------------------------------------------------------- 1 | 2 | library(Seurat) 3 | library(ggplot2) 4 | 5 | options(future.globals.maxSize = 150000 * 1024^2) 6 | 7 | 8 | 9 | HC_3296<-readRDS("/methodspaper_data/HC-NI-3296_sct.rds") 10 | DefaultAssay(HC_3296)<-"SCT" 11 | HC_3002<-readRDS("/methodspaper_data/HC-NI-3002_sct.rds") 12 | DefaultAssay(HC_3002)<-"SCT" 13 | HC_3030<-readRDS("/methodspaper_data/HC-NI-3030_sct.rds") 14 | DefaultAssay(HC_3030)<-"SCT" 15 | HC_3049<-readRDS("/methodspaper_data/HC-NI-3049_sct.rds") 16 | DefaultAssay(HC_3049)<-"SCT" 17 | HC_3083<-readRDS("/methodspaper_data/HC-NI-3083_sct.rds") 18 | DefaultAssay(HC_3083)<-"SCT" 19 | HC_3037<-readRDS("/methodspaper_data/D-3037-HC_sct.rds") 20 | DefaultAssay(HC_3037)<-"SCT" 21 | HC_3034<-readRDS("/methodspaper_data/C-3034-HC_sct.rds") 22 | DefaultAssay(HC_3034)<-"SCT" 23 | 24 | HC_1<-readRDS("/methodspaper_data/HC_Sanger1_dataset_sct.rds") 25 | HC_2<-readRDS("/methodspaper_data/HC_Sanger2_dataset_sct.rds") 26 | HC_3<-readRDS("/methodspaper_data/HC_Sanger3_dataset_sct.rds") 27 | HC_4<-readRDS("/methodspaper_data/HC_Sanger4_dataset_sct.rds") 28 | HC_5<-readRDS("/methodspaper_data/HC_Sanger5_dataset_sct.rds") 29 | HC_6<-readRDS("/methodspaper_data/HC_Sanger6_dataset_sct.rds") 30 | HC_7<-readRDS("/methodspaper_data/HC_Sanger7_dataset_sct.rds") 31 | HC_8<-readRDS("/methodspaper_data/HC_Sanger8_dataset_sct.rds") 32 | HC_9<-readRDS("/methodspaper_data/HC_Sanger9_dataset_sct.rds") 33 | DefaultAssay(HC_1)<-"SCT" 34 | DefaultAssay(HC_2)<-"SCT" 35 | DefaultAssay(HC_3)<-"SCT" 36 | DefaultAssay(HC_4)<-"SCT" 37 | DefaultAssay(HC_5)<-"SCT" 38 | DefaultAssay(HC_6)<-"SCT" 39 | DefaultAssay(HC_7)<-"SCT" 40 | DefaultAssay(HC_8)<-"SCT" 41 | DefaultAssay(HC_9)<-"SCT" 42 | imm_healthy<-readRDS("/imm_healthy.rds") 43 | epi_healthy<-readRDS("/epi_healthy.rds") 44 | fib_healthy<-readRDS("/fib_healthy.rds") 45 | 46 | HC_3296<-RenameCells(HC_3296, add.cell.id="HC3296") 47 | HC_3002<-RenameCells(HC_3002, add.cell.id="HC3002") 48 | 49 | HC_3030<-RenameCells(HC_3030, add.cell.id="HC3030") 50 | HC_3049<-RenameCells(HC_3049, add.cell.id="HC3049") 51 | HC_3083<-RenameCells(HC_3083, add.cell.id="HC3083") 52 | 53 | HC_3034<-RenameCells(HC_3034, add.cell.id="HC3034") 54 | HC_3037<-RenameCells(HC_3037, add.cell.id="HC3037") 55 | 56 | HC_1<-RenameCells(HC_1, add.cell.id="HC1") 57 | HC_2<-RenameCells(HC_2, add.cell.id="HC2") 58 | HC_3<-RenameCells(HC_3, add.cell.id="HC3") 59 | HC_4<-RenameCells(HC_4, add.cell.id="HC4") 60 | HC_5<-RenameCells(HC_5, add.cell.id="HC5") 61 | HC_6<-RenameCells(HC_6, add.cell.id="HC6") 62 | HC_7<-RenameCells(HC_7, add.cell.id="HC7") 63 | HC_8<-RenameCells(HC_8, add.cell.id="HC8") 64 | HC_9<-RenameCells(HC_9, add.cell.id="HC9") 65 | 66 | imm_healthy@meta.data$dataset<-"imm_healthy" 67 | epi_healthy@meta.data$dataset<-"epi_healthy" 68 | fib_healthy@meta.data$dataset<-"fib_healthy" 69 | 70 | HC_3296@meta.data$dataset<-"HC_3296" 71 | HC_3002@meta.data$dataset<-"HC_3002" 72 | 73 | HC_3030@meta.data$dataset<-"HC_3030" 74 | HC_3049@meta.data$dataset<-"HC_3049" 75 | HC_3083@meta.data$dataset<-"HC_3083" 76 | HC_3037@meta.data$dataset<-"HC_3037" 77 | HC_3034@meta.data$dataset<-"HC_3034" 78 | 79 | HC_1@meta.data$dataset<-"HC_1" 80 | HC_2@meta.data$dataset<-"HC_2" 81 | HC_3@meta.data$dataset<-"HC_3" 82 | HC_4@meta.data$dataset<-"HC_4" 83 | HC_5@meta.data$dataset<-"HC_5" 84 | HC_6@meta.data$dataset<-"HC_6" 85 | HC_7@meta.data$dataset<-"HC_7" 86 | HC_8@meta.data$dataset<-"HC_8" 87 | HC_9@meta.data$dataset<-"HC_9" 88 | 89 | imm_healthy@meta.data$method<-"splitcollagenase" 90 | epi_healthy@meta.data$method<-"splitcollagenase" 91 | fib_healthy@meta.data$method<-"splitcollagenase" 92 | 93 | HC_3296@meta.data$method<-"wholecollagenase" 94 | HC_3002@meta.data$method<-"wholecollagenase" 95 | 96 | HC_3030@meta.data$method<-"wholecollagenase" 97 | HC_3049@meta.data$method<-"wholecollagenase" 98 | HC_3083@meta.data$method<-"wholecollagenase" 99 | HC_3037@meta.data$method<-"wholecollagenase" 100 | HC_3034@meta.data$method<-"wholecollagenase" 101 | 102 | HC_1@meta.data$method<-"splitprotease" 103 | HC_2@meta.data$method<-"splitprotease" 104 | HC_3@meta.data$method<-"splitprotease" 105 | HC_4@meta.data$method<-"splitprotease" 106 | HC_5@meta.data$method<-"splitprotease" 107 | HC_6@meta.data$method<-"splitprotease" 108 | HC_7@meta.data$method<-"splitprotease" 109 | HC_8@meta.data$method<-"splitprotease" 110 | HC_9@meta.data$method<-"splitprotease" 111 | 112 | 113 | imm_healthy_sub[["percent.ribo"]] <- PercentageFeatureSet(imm_healthy_sub, pattern = "^RPL|^RPS") 114 | epi_healthy_sub[["percent.ribo"]] <- PercentageFeatureSet(epi_healthy_sub, pattern = "^RPL|^RPS") 115 | fib_healthy_sub[["percent.ribo"]] <- PercentageFeatureSet(fib_healthy_sub, pattern = "^RPL|^RPS") 116 | 117 | imm_healthy[["percent.ribo"]] <- PercentageFeatureSet(imm_healthy, pattern = "^RPL|^RPS") 118 | epi_healthy[["percent.ribo"]] <- PercentageFeatureSet(epi_healthy, pattern = "^RPL|^RPS") 119 | fib_healthy[["percent.ribo"]] <- PercentageFeatureSet(fib_healthy, pattern = "^RPL|^RPS") 120 | 121 | HC_2[["percent.ribo"]] <- PercentageFeatureSet(HC_2, pattern = "^RPL|^RPS") 122 | HC_6[["percent.ribo"]] <- PercentageFeatureSet(HC_6, pattern = "^RPL|^RPS") 123 | HC_9[["percent.ribo"]] <- PercentageFeatureSet(HC_9, pattern = "^RPL|^RPS") 124 | 125 | HC_3296[["percent.ribo"]] <- PercentageFeatureSet(HC_3296, pattern = "^RPL|^RPS") 126 | HC_3030[["percent.ribo"]] <- PercentageFeatureSet(HC_3030, pattern = "^RPL|^RPS") 127 | HC_3083[["percent.ribo"]] <- PercentageFeatureSet(HC_3083, pattern = "^RPL|^RPS") 128 | HC_3037[["percent.ribo"]] <- PercentageFeatureSet(HC_3037, pattern = "^RPL|^RPS") 129 | HC_3034[["percent.ribo"]] <- PercentageFeatureSet(HC_3034, pattern = "^RPL|^RPS") 130 | 131 | 132 | alldata_202002_integration_list <- list(HC_3296, HC_3030, HC_3083, HC_3034, HC_3037,HC_2, HC_6,HC_9, epi_healthy_sub,imm_healthy_sub,fib_healthy_sub) 133 | 134 | for (i in 1:length(alldata_202002_integration_list)){ 135 | alldata_202002_integration_list[[i]] <- SCTransform(alldata_202002_integration_list[[i]],vars.to.regress = c("percent.mt", "percent.ribo"), verbose = FALSE) 136 | } 137 | 138 | print("loaded") 139 | features <- SelectIntegrationFeatures(object.list = alldata_202002_integration_list, nfeatures = 3000) 140 | print("features_selected") 141 | alldata_202002_integration_list <- PrepSCTIntegration(object.list = alldata_202002_integration_list, anchor.features = features) 142 | print("integration_prepped") 143 | anchors <- FindIntegrationAnchors(object.list = alldata_202002_integration_list, anchor.features = features, normalization.method = "SCT") 144 | print("achors_found") 145 | rm(alldata_202002_integration_list) 146 | alldata.integrated <- IntegrateData(anchorset = anchors, normalization.method = "SCT") 147 | print("integrated") 148 | rm(anchors) 149 | alldata.integrated <- RunPCA(alldata.integrated, verbose = FALSE) 150 | print("pca2_ran") 151 | alldata.integrated <- RunUMAP(alldata.integrated, dims = 1:30) 152 | print("umap_ran") 153 | alldata.integrated <- FindNeighbors(alldata.integrated, dims = 1:30) 154 | alldata.integrated <- FindClusters(alldata.integrated, resolution = 0.5) 155 | 156 | DefaultAssay(alldata.integrated) <- "RNA" 157 | alldata.integrated <- NormalizeData(alldata.integrated, normalization.method = "LogNormalize", scale.factor = 10000, verbose = FALSE) 158 | 159 | 160 | for (i in 1:nrow(methodspaper@meta.data)){ 161 | if (methodspaper@meta.data[i,21] == "unassigned"){ 162 | methodspaper@meta.data[i,23]<-methodspaper@meta.data[i,22] 163 | } else { 164 | methodspaper@meta.data[i,23]<-methodspaper@meta.data[i,21] 165 | } 166 | } 167 | 168 | -------------------------------------------------------------------------------- /PSC/riskgene_expression_analysis.R: -------------------------------------------------------------------------------- 1 | #analysis UC and PSC risk/implicated genes and their differential expression in different health statuses 2 | 3 | data<- readRDS("~/PSC_processed.rds") 4 | DefaultAssay(data) = "RNA" 5 | UC_risk<-read.csv("~/ucputativeriskgenes.csv", sep=";") 6 | genes<-read.csv("~/genes_junctions.csv", sep=";") 7 | PSC_risk<-genes[genes$function. == "PSC_and_suggestive_risk_genes",] 8 | 9 | #identifyng risk gene expression 10 | Idents(data) <- "celltypes" 11 | celltypes <- levels(data$celltypes) 12 | table(data$celltypes, data$state) 13 | celltypes <- celltypes[-c(2,3,4,6,7,8,9,10,13,14,15,18,19,20,21,22,24,25,26,32,35,39,40,41,42)] #filter out celltypes with too few (<100) cells per group 14 | DefaultAssay(data)<-"RNA" 15 | 16 | ##### 17 | allgenes <- data.frame(matrix(ncol = 2, nrow = 33538)) 18 | colnames(allgenes)[1]<-"Gene" 19 | allgenes$Gene<-rownames(data@assays$RNA) 20 | PSC_risk_in_set<-merge(allgenes, PSC_risk, by="Gene") 21 | UC_risk_in_set<-merge(allgenes, UC_risk, by="Gene") 22 | write.csv(UC_risk_in_set, "~/ucputativeriskgenes_smillie_inset.csv") 23 | write.csv(PSC_risk_in_set, "~/PSCgenes_inset.csv") 24 | 25 | PSC_risk<-read.csv("~/PSC_genes.csv", sep=";") 26 | colnames(PSC_risk)[1]<-"Gene" 27 | 28 | DEgenes <- FindMarkers(data, subset.ident = celltypes[1], group.by = "state", test.use = "MAST", ident.1 = "PSC-I", ident.2 = "HC-NI") 29 | DEgenes$celltype <- celltypes[1] 30 | DEgenes$Gene <- rownames(DEgenes) 31 | PSC <- merge(DEgenes, PSC_risk, by = "Gene") 32 | 33 | for(i in 2:17){ 34 | DEgenes <- FindMarkers(data, subset.ident = celltypes[i], group.by = "state", test.use = "MAST", ident.1 = "PSC-I", ident.2 = "HC-NI") 35 | DEgenes$celltype <- celltypes[i] 36 | DEgenes$Gene <- rownames(DEgenes) 37 | DEgenes1 <- merge(DEgenes, PSC_risk, by = "Gene") 38 | PSC <- rbind(PSC, DEgenes1) 39 | } 40 | 41 | PSC_I<-PSC 42 | 43 | DEgenes <- FindMarkers(data, subset.ident = celltypes[1], group.by = "state", test.use = "MAST", ident.1 = "PSC-NI", ident.2 = "HC-NI") 44 | DEgenes$celltype <- celltypes[1] 45 | DEgenes$Gene <- rownames(DEgenes) 46 | PSC <- merge(DEgenes, PSC_risk, by = "Gene") 47 | 48 | for(i in 2:17){ 49 | DEgenes <- FindMarkers(data, subset.ident = celltypes[i], group.by = "state", test.use = "MAST", ident.1 = "PSC-NI", ident.2 = "HC-NI") 50 | DEgenes$celltype <- celltypes[i] 51 | DEgenes$Gene <- rownames(DEgenes) 52 | DEgenes1 <- merge(DEgenes, PSC_risk, by = "Gene") 53 | PSC <- rbind(PSC, DEgenes1) 54 | } 55 | 56 | PSC_NI<-PSC 57 | 58 | x<-merge(PSC_risk, allgenes, by="Gene", all=F) # 71 PSC risk genes in set 59 | PSC_I$p_val_adj<-PSC_I$p_val*71 60 | PSC_NI$p_val_adj<-PSC_NI$p_val*71 61 | 62 | write.csv(PSC_I, "~/PSC_I_bonferroni_allrisk_genes_celltypes.csv") 63 | write.csv(PSC_NI, "~/PSC_NI_onferroni_allrisk_genes_celltypes.csv") 64 | 65 | UC <- data.frame(matrix(ncol = 6, nrow = 0)) 66 | colnames(UC) <- c("p_val", "avg_logFC","pct.1","pct2","p_val_adj","celltype") 67 | for(i in 1:17){ 68 | DEgenes <- FindMarkers(data, subset.ident = celltypes[i], group.by = "state", test.use = "MAST", ident.1 = "UC-NI", ident.2 = "HC-NI") 69 | DEgenes$celltype <- celltypes[i] 70 | DEgenes$Gene <- rownames(DEgenes) 71 | DEgenes <- merge(DEgenes, UC_risk, by = "Gene") 72 | UC <- rbind(UC, DEgenes) 73 | } 74 | 75 | UC_NI<-UC 76 | 77 | UC <- data.frame(matrix(ncol = 6, nrow = 0)) 78 | colnames(UC) <- c("p_val", "avg_logFC","pct.1","pct2","p_val_adj","celltype") 79 | for(i in 1:17){ 80 | DEgenes <- FindMarkers(data, subset.ident = celltypes[i], group.by = "state", test.use = "MAST", ident.1 = "UC-I", ident.2 = "HC-NI") 81 | DEgenes$celltype <- celltypes[i] 82 | DEgenes$Gene <- rownames(DEgenes) 83 | DEgenes <- merge(DEgenes, UC_risk, by = "Gene") 84 | UC <- rbind(UC, DEgenes) 85 | } 86 | 87 | UC_I<-UC 88 | UC_significance_level_bonferroni<-0.05/54 #54 UCrisk genes in total in set 89 | 90 | UC_I$p_val_adj<-UC_I$p_val*54 91 | UC_NI$p_val_adj<-UC_NI$p_val*54 92 | 93 | write.csv(UC_I, "~/UC_I_bonferroni_risk_genes_celltypes.csv") 94 | write.csv(UC_NI, "~/UC_NI_bonferroni_risk_genes_celltypes.csv") 95 | 96 | # plot 97 | library(ggplot2) 98 | PSC_I <- filter(PSC_I, PSC_I$p_val_adj <= 0.05) 99 | UC_I <- filter(UC_I, UC_I$p_val_adj <= 0.05) 100 | UC_NI <- filter(UC_NI, UC_NI$p_val_adj <=0.05) 101 | PSC_NI <- filter(PSC_NI, PSC_NI$p_val_adj <=0.05) 102 | 103 | UC_I$Adj_p_value <- ifelse(UC_I$p_val_adj <= 0.001, "< 0.001", ifelse(UC_I$p_val_adj <= 0.01, "< 0.01", "< 0.05")) 104 | UC_I$Adj_p_value <- factor(UC_I$Adj_p_value, levels = c("< 0.05", "< 0.01","< 0.001")) 105 | UC_NI$Adj_p_value <- ifelse(UC_NI$p_val_adj <= 0.001, "< 0.001", ifelse(UC_NI$p_val_adj <= 0.01, "< 0.01", "< 0.05")) 106 | UC_NI$Adj_p_value <- factor(UC_NI$Adj_p_value, levels = c("< 0.05", "< 0.01","< 0.001")) 107 | PSC_I$Adj_p_value <- ifelse(PSC_I$p_val_adj <= 0.001, "< 0.001", ifelse(PSC_I$p_val_adj <= 0.01, "< 0.01", "< 0.05")) 108 | PSC_I$Adj_p_value <- factor(PSC_I$Adj_p_value, levels = c("< 0.05", "< 0.01","< 0.001")) 109 | PSC_NI$Adj_p_value <- ifelse(PSC_NI$p_val_adj <= 0.001, "< 0.001", ifelse(PSC_NI$p_val_adj <= 0.01, "< 0.01", "< 0.05")) 110 | PSC_NI$Adj_p_value <- factor(PSC_NI$Adj_p_value, levels = c("< 0.05", "< 0.01","< 0.001")) 111 | UC_I$direction <- ifelse(UC_I$avg_logFC > 0, "up", "down") 112 | UC_NI$direction <- ifelse(UC_NI$avg_logFC > 0, "up", "down") 113 | PSC_I$direction <- ifelse(PSC_I$avg_logFC > 0, "up", "down") 114 | PSC_NI$direction <- ifelse(PSC_NI$avg_logFC > 0, "up", "down") 115 | 116 | ggplot(PSC_I, aes(x = Gene, y = celltype, color = direction)) + 117 | geom_point(aes(alpha = Adj_p_value), size = 5) + 118 | scale_alpha_manual(values =c(0.2, 0.5, 1)) + 119 | scale_color_manual(values = c("#00AFBB", "#FC4E07")) + 120 | theme_bw() + 121 | labs(title = "PSC risk genes in PSC_I vs HC") 122 | 123 | ggsave("Results/Figures/Riskgenes_PSCI.pdf", height = 7) 124 | 125 | ggplot(PSC_NI, aes(x = Gene, y = celltype, color = direction)) + 126 | geom_point(aes(alpha = Adj_p_value), size = 5) + 127 | scale_alpha_manual(values =c(1)) + 128 | scale_color_manual(values = c("#00AFBB", "#FC4E07")) + 129 | theme_bw() + 130 | labs(title = "PSC risk genes in PSC_NI vs HC") 131 | 132 | ggsave("Results/Figures/Riskgenes_PSCNI.pdf", height = 7) 133 | 134 | ggplot(UC_I, aes(x = Gene, y = celltype, color = direction)) + 135 | geom_point(aes(alpha = Adj_p_value), size = 5) + 136 | scale_alpha_manual(values =c(0.2, 0.5, 1)) + 137 | scale_color_manual(values = c("#00AFBB", "#FC4E07")) + 138 | theme_bw()+ 139 | labs(title = "UC risk genes in UC_I vs HC") 140 | 141 | ggsave("Results/Figures/Riskgenes_UCI.pdf", height = 7) 142 | 143 | ggplot(UC_NI, aes(x = Gene, y = celltype, color = direction)) + 144 | geom_point(aes(alpha = Adj_p_value), size = 5) + 145 | scale_alpha_manual(values =c(0.5, 1)) + 146 | scale_color_manual(values = c("#00AFBB", "#FC4E07")) + 147 | theme_bw()+ 148 | labs(title = "UC risk genes in UC_NI vs HC") 149 | 150 | ggsave("Results/Figures/Riskgenes_UCNI.pdf", height = 7) 151 | 152 | # old 153 | library(viridis) 154 | library(ggplot2) 155 | 156 | ggplot(PSC_NI, aes(x=celltype, y = Gene, color = avg_logFC , size = -log(p_val))) + 157 | geom_point() + scale_color_viridis(option = "D") +theme(title = element_text()) + 158 | labs(title = "PSC risk genes in PSC_NI vs HC") # saved PSC_riskgenes_PSC_NI_vs_HC 159 | 160 | ggplot(PSC_I, aes(x=celltype, y = Gene, color = avg_logFC , size = -log(p_val))) + 161 | geom_point() + scale_color_viridis(option = "D") +theme(title = element_text()) + 162 | labs(title = "PSC risk genes in PSC_I vs HC") # saved PSC_riskgenes_PSC_I_vs_HC 163 | 164 | ggplot(UC_NI, aes(x=celltype, y = Gene, color = avg_logFC , size = -log(p_val))) + 165 | geom_point() + scale_color_viridis(option = "D") +theme(title = element_text()) + 166 | labs(title = "UC risk genes in UC_NI vs HC") # saved UC_riskgenes_UC_NI_vs_HC 167 | 168 | ggplot(UC_I, aes(x=celltype, y = Gene, color = avg_logFC , size = -log(p_val))) + 169 | geom_point() + scale_color_viridis(option = "D") +theme(title = element_text()) + 170 | labs(title = "UC risk genes in UC_I vs HC") # saved UC_riskgenes_UC_I_vs_HC 171 | 172 | -------------------------------------------------------------------------------- /DDTx/filtering_and_QC.R: -------------------------------------------------------------------------------- 1 | library(Seurat) 2 | 3 | #open Seuratfile 4 | ddtx<-readRDS("/source/ddtx_merged_demultiplexed_clustered_compartment_azi_elmentaiteadultileum_demuxlet.rds") 5 | dim(ddtx) 6 | 7 | # filter 60% mito 8 | data<-subset(ddtx, subset=percent.mt < 60) 9 | dim(data) 10 | # reduction of 90553 to 88167 cells 11 | 12 | # exclude T8. patient 3 (wrongly sampled, only recipient cells) 13 | data@meta.data$pt_project_lane<-paste(data@meta.data$lane,data@meta.data$donor_final, sep="_") 14 | data<-subset(data, subset=pt_project_lane!="220504_lane04_UMCGDDtx00005r") 15 | dim(data) 16 | # reduction to 85750 cells 17 | 18 | # exclude epithelial cells from recipients: this is spillover 19 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Enterocyte"),"compartment_final"]<-"Epithelial" 20 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Paneth"),"compartment_final"]<-"Epithelial" 21 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="TA"),"compartment_final"]<-"Epithelial" 22 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Tuft"),"compartment_final"]<-"Epithelial" 23 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Stem cells"),"compartment_final"]<-"Epithelial" 24 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="BEST4+ epithelial"),"compartment_final"]<-"Epithelial" 25 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Goblet cell"),"compartment_final"]<-"Epithelial" 26 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="I cells (CCK+)"),"compartment_final"]<-"Epithelial" 27 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="K cells (GIP+)"),"compartment_final"]<-"Epithelial" 28 | 29 | 30 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Activated CD8 T"),"compartment_final"]<-"Immune" 31 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Activated CD4 T"),"compartment_final"]<-"Immune" 32 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Memory B"),"compartment_final"]<-"Immune" 33 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="IgA plasma cell"),"compartment_final"]<-"Immune" 34 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="cDC2"),"compartment_final"]<-"Immune" 35 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="LYVE1+ Macrophage"),"compartment_final"]<-"Immune" 36 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Macrophages"),"compartment_final"]<-"Immune" 37 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="IgG plasma cell"),"compartment_final"]<-"Immune" 38 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Monocytes"),"compartment_final"]<-"Immune" 39 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="TRGV2 gdT"),"compartment_final"]<-"Immune" 40 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Mast cell"),"compartment_final"]<-"Immune" 41 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Cycling B cell"),"compartment_final"]<-"Immune" 42 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Naive B"),"compartment_final"]<-"Immune" 43 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="ILC3"),"compartment_final"]<-"Immune" 44 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="cDC1"),"compartment_final"]<-"Immune" 45 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="TRGV5/7 gdT"),"compartment_final"]<-"Immune" 46 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="gdT"),"compartment_final"]<-"Immune" 47 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="CX3CR1+ CD8 Tmem"),"compartment_final"]<-"Immune" 48 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="TRGV4 gdT"),"compartment_final"]<-"Immune" 49 | 50 | 51 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Stromal 1 (ADAMDEC1+)"),"compartment_final"]<-"Stromal" 52 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="arterial capillary"),"compartment_final"]<-"Stromal" 53 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Stromal 1 (CCL11+)"),"compartment_final"]<-"Stromal" 54 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Stromal 2 (NPY+)"),"compartment_final"]<-"Stromal" 55 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Mature venous EC"),"compartment_final"]<-"Stromal" 56 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Contractile pericyte (PLN+)"),"compartment_final"]<-"Stromal" 57 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Adult Glia"),"compartment_final"]<-"Stromal" 58 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Mature arterial EC"),"compartment_final"]<-"Stromal" 59 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Transitional Stromal 3 (C3+)"),"compartment_final"]<-"Stromal" 60 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="LEC3 (ADGRG3+)"),"compartment_final"]<-"Stromal" 61 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="myofibroblast (RSPO2+)"),"compartment_final"]<-"Stromal" 62 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="LEC1 (ACKR4+)"),"compartment_final"]<-"Stromal" 63 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Stromal 3 (C7+)"),"compartment_final"]<-"Stromal" 64 | 65 | data@meta.data$patient_compartment <- paste(data@meta.data$donor_final,data@meta.data$compartment_final, sep="_") 66 | 67 | 68 | data@meta.data$major_celltype<- data@meta.data$compartment_final 69 | 70 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Stromal 1 (ADAMDEC1+)"),"major_celltype"]<-"Endothelial" 71 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="arterial capillary"),"major_celltype"]<-"Endothelial" 72 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Stromal 1 (CCL11+)"),"major_celltype"]<-"Endothelial" 73 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Stromal 2 (NPY+)"),"major_celltype"]<-"Endothelial" 74 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Mature venous EC"),"major_celltype"]<-"Endothelial" 75 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Contractile pericyte (PLN+)"),"major_celltype"]<-"Endothelial" 76 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Adult Glia"),"major_celltype"]<-"Endothelial" 77 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Mature arterial EC"),"major_celltype"]<-"Endothelial" 78 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Transitional Stromal 3 (C3+)"),"major_celltype"]<-"Endothelial" 79 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="LEC3 (ADGRG3+)"),"major_celltype"]<-"Endothelial" 80 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="LEC1 (ACKR4+)"),"major_celltype"]<-"Endothelial" 81 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Stromal 3 (C7+)"),"major_celltype"]<-"Endothelial" 82 | 83 | 84 | 85 | 86 | # subset the data 87 | unique(data@meta.data$patient_compartment) 88 | data_2 <- subset(data, patient_compartment != "UMCGDDtx00003r_Epithelial" & patient_compartment != "UMCGDDtx00004r_Epithelial" & patient_compartment !="UMCGDDtx00005r_Epithelial") 89 | unique(data_2@meta.data$patient_compartment) 90 | dim(data_2) 91 | #reduction to 85554 cells 92 | 93 | # save 94 | saveRDS(data_2, "/source/ddtx_merged_demultiplexed_clustered_compartment_azi_elmentaiteadultileum_below60pctmito_withoutEpithelialR_demuxlet.rds") 95 | -------------------------------------------------------------------------------- /PSC/Celltyping_202103: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(Seurat) 3 | library(patchwork) 4 | library(ggplot2) 5 | library(readr) 6 | library(tidyr) 7 | library(MAST) 8 | library(readxl) 9 | library(openxlsx) 10 | 11 | data<- readRDS("PSC_202002_integrated_v2_noribo.rds") 12 | data=UpdateSeuratObject(data) 13 | DimPlot(data, label = T) + NoLegend() 14 | 15 | data@meta.data$Final_HTO[data@meta.data$Final_HTO == "PSC-I-X019"]<-"PSC-NI-XXXX" 16 | data@meta.data$Final_HTO[data@meta.data$Final_HTO == "PSC-NI-X019"]<-"PSC-I-XXXX" 17 | table(data@meta.data$Final_HTO) 18 | data@meta.data$disease = sapply(strsplit(data$Final_HTO,"-"), `[`, 1) 19 | data@meta.data$inflammation = sapply(strsplit(data$Final_HTO,"-"), `[`, 2) 20 | data@meta.data$sample = sapply(strsplit(data$Final_HTO,"-"), `[`, 3) 21 | data@meta.data$state <- paste(data$disease, data$inflammation, sep='-') 22 | table(data@meta.data$sample) 23 | data = subset(data, subset = sample != "X296") 24 | table(data@meta.data$sample) 25 | 26 | # recluster subset 27 | # epi = average expression EPCAM > 5 28 | epi<-subset(data, idents = c("6","7","17","19","0","14","10","3","21")) 29 | DefaultAssay(epi)<-"integrated" 30 | epi<- RunPCA(epi) 31 | epi <- RunUMAP(epi, dims = 1:30) 32 | epi<-FindNeighbors(epi, dims = 1:30) 33 | epi<-FindClusters(epi, resolution = 0.4) 34 | DimPlot(epi, label=T) 35 | Idents(epi)<-"integrated_snn_res.0.4" 36 | DefaultAssay(epi)<-"RNA" 37 | #markers_epi <- FindAllMarkers(epi, only.pos = TRUE, min.pct = 0.25) 38 | #selected_markers_epi <- markers_epi %>% group_by(cluster) %>% top_n(n = 3, wt = avg_logFC) 39 | # stem (LEFTY, LGR5, ASCL2); enteroendocrine (ASCL2, CHGA, CCL7), Tuft (TRPM5, HCK); Goblet (MUC2, TFF1); DUOX (MUC2lo, TFF1lo, DUOX2); BEST4 (BEST4); Absorptive_enterocyte (SLC26A3hi, RBP, CA1, SLC26A2); Immature_enterocyte (SLC26A3, RBP, CA1, SLC26A2, FABP1); Cycling_TA (CDC25C); RIBO_TA (LEFTY); Immature_Goblet (MUC2, TFF1lo) 40 | new.ident=c("Immature_enterocyte", "Cycling_TA", "Absorptive_enterocyte", "Ribo_TA","Stem", "MT-Hi-enterocyte", "BEST4_enterocyte", "Immature_goblet", "DUOX2_enterocyte", "PLCG2_TA", "Absorptive_TA", "Tuft", "REG_TA","Enteroendocrine", "Goblet", "MAST") 41 | names(new.ident) <- levels(epi) 42 | epi=RenameIdents(epi,new.ident) 43 | DimPlot(epi, label=T) 44 | epi[["celltypes"]] <- Idents(object = epi) 45 | meta_epi<-epi@meta.data 46 | write.csv(meta_epi, "meta_epi.csv") 47 | 48 | # leuko = average expression PTPRC (CD45) > 5 49 | leuko<-subset(data, idents = c("2","11","18","24","27","15","4","26")) 50 | DefaultAssay(leuko)<-"integrated" 51 | leuko<- RunPCA(leuko) 52 | leuko <- RunUMAP(leuko, dims = 1:30) 53 | leuko<-FindNeighbors(leuko, dims = 1:30) 54 | leuko<-FindClusters(leuko, resolution = 0.8) 55 | Idents(leuko)<-"integrated_snn_res.0.8" 56 | DimPlot(leuko, label=T) 57 | DefaultAssay(leuko)<-"RNA" 58 | #markers_leuko <- FindAllMarkers(leuko, only.pos = TRUE, min.pct = 0.25) 59 | #selected_markers_leuko <- markers_leuko %>% group_by(cluster) %>% top_n(n = 3, wt = avg_logFC) 60 | # all B cells (CD19 and/or CD20), all T cells (CD3E, CD3D), non-T non-B is APC; Activated_B (CD74, CD83, FCRL5); MAIT (CCR7-, SELLlo, IL7R, CD4lo, KLRB1, CD44); Treg (TNFRSF4, CD4); Memory_CD4T (SELL, CCR7, CD4); Mt (mito); CD8pos (GNLY+/-, CD8A, NKG7) DC (CD1C, ITGAx, HLADRB1, STAT1); ; Cycling_T (TFRC+, CCL5, CD4lo, CD3); Mature_B (IGHD+/-, SELL, CD24, CD83+/-); Inflammatory_monocytes (OSM, S100A8, S100A9, ITGAX); Memory_B (CXCR5, TNFRSF13B, IGHD, POUZAF1lo); Cycling_B (CD74,AICDA, POUZAF1, ASF1B, CLSPN); Macrophages (CD14, HLADRB1); Cycling_T (CLSPN, GNLY, CD4) 61 | new.idents<-c("Mature_B", "MAIT", "Treg", "Memory_CD4T", "Mt","CD8T", "DC", "CD8T", "Cycling_T", "Mature_B", "Inflammatory_monocyte", "Mature_B", "Doublets", "Activated_B", "Mt", "Activated_cycling_B", "Macrophage", "Cycling_T", "Doublets") 62 | names(new.idents) <- levels(leuko) 63 | leuko=RenameIdents(leuko,new.idents) 64 | DimPlot(leuko, label=T) 65 | leuko[["celltypes"]] <- Idents(object = leuko) 66 | 67 | meta_leuko<-leuko@meta.data 68 | write.csv(meta_leuko, "meta_leuko.csv") 69 | 70 | # stromal = average expression THY1 > 0.4 (fibroblast), SOX10 > 2 (glia), MADCAM1 > 1 (endotheel) 71 | stromal<-subset(data, idents = c("9", "13", "20", "22", "25")) 72 | DefaultAssay(stromal)<-"integrated" 73 | stromal<- RunPCA(stromal) 74 | stromal <- RunUMAP(stromal, dims = 1:30) 75 | stromal <-FindNeighbors(stromal, dims = 1:30) 76 | stromal <-FindClusters(stromal, resolution = 0.4) 77 | DimPlot(stromal, label=T) 78 | Idents(stromal)<-"integrated_snn_res.0.4" 79 | DefaultAssay(stromal)<-"RNA" 80 | #markers_stromal <- FindAllMarkers(stromal, only.pos = TRUE) 81 | #fibroblasts (COL1A1, PDGFRA, THY1), endothelium (RBP7, CD34, PLVAP, CLDN5). inflammatory_fibroblast (IGFBP7, CHI3L1, NNMT); Pericytes (RGS5, COX4I2, HIGD1B); Myofibroblast (ACTA2, CNN1) 82 | new.idents<-c("WNT5B_fibroblast", "Inflammatory_fibroblast", "RSPO3_fibroblast", "WNT2B_fibroblast", "Endothelial", "Endothelial", "Mt-Hi_stromal", "Pericytes", "Glia", "Myofibroblasts", "Endothelial", "Endothelial") 83 | names(new.idents) <- levels(stromal) 84 | stromal=RenameIdents(stromal,new.idents) 85 | DimPlot(stromal, label=T) 86 | stromal[["celltypes"]] <- Idents(object = stromal) 87 | DimPlot(stromal) 88 | meta_stromal<-stromal@meta.data 89 | write.csv(meta_stromal, "meta_stromal.csv") 90 | 91 | # plasma = remaining cells 92 | plasma<-subset(data, idents = c("1", "16", "12", "23", "5", "8")) 93 | DefaultAssay(plasma)<-"integrated" 94 | plasma<- RunPCA(plasma) 95 | plasma <- RunUMAP(plasma, dims = 1:30) 96 | plasma <-FindNeighbors(plasma, dims = 1:30) 97 | plasma <-FindClusters(plasma, resolution = 0.25) 98 | Idents(plasma)<-"integrated_snn_res.0.25" 99 | DimPlot(plasma, label=T) 100 | DefaultAssay(plasma)<-"RNA" 101 | #markers_plasma <- FindAllMarkers(plasma, only.pos = TRUE) 102 | new.idents<-c("IgA_plasma", "IgA_plasma", "IgG_plasma", "IgA_plasma", "IgG_plasma", "MT-Hi_plasma", "IgM_plasma", "IgA_plasma", "IgM_plasma") 103 | names(new.idents) <- levels(plasma) 104 | plasma=RenameIdents(plasma,new.idents) 105 | DimPlot(plasma, label=T) 106 | plasma[["celltypes"]] <- Idents(object = plasma) 107 | DimPlot(plasma) 108 | meta_plasma<-plasma@meta.data 109 | write.csv(meta_plasma, "meta_plasma.csv") 110 | 111 | # create meta_all 112 | colnames(meta_stromal) 113 | colnames(meta_leuko) 114 | colnames(meta_plasma) 115 | colnames(meta_epi) 116 | meta_plasma<-meta_plasma[c(1,29)] 117 | meta_leuko<-meta_leuko[c(1,29)] 118 | meta_stromal<-meta_stromal[c(1,29)] 119 | meta_epi <- meta_epi[c(1,29)] 120 | x<-rbind(meta_stromal, meta_leuko) 121 | x<-rbind(x, meta_plasma) 122 | x<-rbind(x, meta_epi) 123 | write.csv(x, "Data/meta_all2.csv") 124 | 125 | # add celltypes 126 | x$NAME<-rownames(x) 127 | x<-x[,c(2,3)] 128 | row.names(x)<-NULL 129 | CellsMeta<-data.frame(data@meta.data) 130 | CellsMeta$NAME<-row.names(CellsMeta) 131 | row.names(CellsMeta)=NULL 132 | keeping.order <- function(data, fn, ...) { 133 | col <- ".sortColumn" 134 | data[,col] <- 1:nrow(data) 135 | out <- fn(data, ...) 136 | if (!col %in% colnames(out)) stop("Ordering column not preserved by function") 137 | out <- out[order(out[,col]),] 138 | out[,col] <- NULL 139 | out 140 | } 141 | CellsMeta<-keeping.order(CellsMeta, merge, y=x, by = "NAME") 142 | CellsMeta<-CellsMeta[,c(1,29)] 143 | rownames(CellsMeta)<-CellsMeta$NAME 144 | data<-AddMetaData(data, CellsMeta) 145 | Idents(data)=data$celltypes 146 | 147 | 148 | DimPlot(data, label = T, repel = T) 149 | CellsMeta<-data.frame(data@meta.data) 150 | CellsMeta$NAME<-row.names(CellsMeta) 151 | row.names(CellsMeta)=NULL 152 | 153 | severity<-read.csv("severity_patients_3_2021.csv", sep=";") 154 | severity<-severity[-6,] 155 | severity$Final_HTO<-droplevels(severity$Final_HTO) 156 | dim(CellsMeta) 157 | CellsMeta<-merge(CellsMeta, severity, by="Final_HTO") 158 | dim(CellsMeta) 159 | 160 | CellsMeta<-CellsMeta[,c(28,30,31)] 161 | rownames(CellsMeta)<-CellsMeta$NAME 162 | dim(data) 163 | data<-AddMetaData(data, CellsMeta) 164 | dim(data) 165 | Idents(data)=data$celltypes 166 | 167 | data = subset(data, subset = celltypes != "Mt") 168 | data = subset(data, subset = celltypes != "MT_Hi_T") 169 | data = subset(data, subset = celltypes != "Doublets") 170 | data = subset(data, subset = celltypes != "MT_Hi_B") 171 | data = subset(data, subset = celltypes != "MT-Hi_plasma") 172 | data = subset(data, subset = celltypes != "Mt-Hi_stromal") 173 | data = subset(data, subset = celltypes != "MT-Hi-enterocyte") 174 | 175 | Idents(data)=data$celltypes 176 | DimPlot(data, label = T, repel = T) 177 | 178 | DefaultAssay(data)<-"integrated" 179 | data <- RunUMAP(data, dims = 1:30) 180 | data <-FindNeighbors(data, dims = 1:30) 181 | data<-FindClusters(data, resolution = 0.4) 182 | dim(data) 183 | Idents(data)=data$celltypes 184 | DimPlot(data, label = T, repel = T, order = c("MAIT","WNT2B+", "Absorptive_enterocyte", "IgA_plasma", "WNT5B+", "Aborptive_TA")) + NoLegend() 185 | 186 | ggsave("dimplot_data2march2021.pdf", height=15, width=15) 187 | 188 | saveRDS(data, "Data/PSC_processed_march16_2021.rds") 189 | 190 | # create figures and proceed to DE analysis 191 | -------------------------------------------------------------------------------- /PSC/DE_analysis (PSC_I and UC_I per cell type)_01062022.R: -------------------------------------------------------------------------------- 1 | ####################################### 2 | # Generate DE list per cell type # 3 | # between PSC-I and UC-I # 4 | ####################################### 5 | 6 | 7 | 8 | ####################################### 9 | # library # 10 | ####################################### 11 | 12 | library(dplyr) 13 | library(Seurat) 14 | library(patchwork) 15 | library(ggplot2) 16 | library(readr) 17 | library(tidyr) 18 | library(MAST) 19 | library(readxl) 20 | library(openxlsx) 21 | library(enrichR) 22 | 23 | 24 | ####################################### 25 | # function # 26 | ####################################### 27 | 28 | DE_list <- function(celltype, cell_type, path1, path2){ 29 | celltype <- FindMarkers(PSC, subset.ident = cell_type, group.by = "state", test.use = "MAST", ident.1 = "PSC-I", ident.2 = "UC-I") 30 | celltype <- filter(celltype, celltype$p_val_adj < 0.05) 31 | # create a 'state' column 32 | celltype$state = NA 33 | celltype = as.data.frame(celltype) 34 | celltype$state = ifelse(celltype$avg_log2FC > 0,"PSC_I","UC_I") 35 | # create a 'gene' column 36 | celltype$gene = NA 37 | celltype$gene = rownames(celltype) 38 | # separate PSC_I up gene list and UC_I up gene list 39 | PSC_I_up_celltype = filter(celltype, celltype$state == "PSC_I") 40 | UC_I_up_celltype = filter(celltype, celltype$state == "PSC_I") 41 | write.csv(PSC_I_up_celltype, path1) 42 | write.csv(UC_I_up_celltype, path2) 43 | } 44 | 45 | 46 | ######################### 47 | # main code # 48 | ######################### 49 | 50 | PSC <- readRDS("/Applications/Chapters/4 PSC_Werna/PSC_processed_march16_2021.rds") 51 | 52 | # how many cell types in PSC dataset? 53 | unique(PSC$celltypes) 54 | 55 | #[1] CD8T Cycling_TA IgA_plasma Absorptive_enterocyte 56 | #[5] Immature_enterocyte Glia IgG_plasma MAIT 57 | #[9] Myofibroblasts DUOX2_enterocyte Mature_B PLCG2_TA 58 | #[13] REG_TA RSPO3_fibroblast Stem IgM_plasma 59 | #[17] DC Treg Activated_B Macrophage 60 | #[21] Tuft Enteroendocrine Memory_CD4T BEST4_enterocyte 61 | #[25] Endothelial WNT5B_fibroblast Pericytes Inflammatory_fibroblast 62 | #[29] WNT2B_fibroblast Activated_cycling_B Inflammatory_monocyte Absorptive_TA 63 | #[33] Ribo_TA Cycling_T Goblet Immature_goblet 64 | #[37] MAST 65 | 66 | # PSC-I vs UC-I 67 | Idents(PSC) <- "celltypes" 68 | 69 | # Apply each cell type to function 70 | 71 | CD8T <- DE_list(CD8T, "CD8T", "/Users/s.qs/Desktop/DE list/PSC_I_up_CD8T.csv", "/Users/s.qs/Desktop/DE list/UC_I_up_CD8T.csv") 72 | Cycling_TA <- DE_list(Cycling_TA, "Cycling_TA", "/Users/s.qs/Desktop/DE list/PSC_I_up_Cycling_TA.csv", "/Users/s.qs/Desktop/DE list/UC_I_up_Cycling_TA.csv") 73 | IgA_plasma <- DE_list(IgA_plasma, "IgA_plasma", "/Users/s.qs/Desktop/DE list/PSC_I_up_IgA_plasma.csv", "/Users/s.qs/Desktop/DE list/UC_I_up_IgA_plasma.csv") 74 | Absorptive_enterocyte <- DE_list(Absorptive_enterocyte, "Absorptive_enterocyte", "/Users/s.qs/Desktop/DE list/PSC_I_up_Absorptive_enterocyte.csv", "/Users/s.qs/Desktop/DE list/UC_I_up_Absorptive_enterocyte.csv") 75 | Immature_enterocyte <- DE_list(Immature_enterocyte, "Immature_enterocyte", "/Users/s.qs/Desktop/DE list/PSC_I_up_Immature_enterocyte.csv", "/Users/s.qs/Desktop/DE list/UC_I_up_Immature_enterocyte.csv") 76 | Glia <- DE_list(Glia, "Glia", "/Users/s.qs/Desktop/DE list/PSC_I_up_Glia.csv", "/Users/s.qs/Desktop/DE list/UC_I_up_Glia.csv") 77 | IgG_plasma <- DE_list(IgG_plasma, "IgG_plasma", "/Users/s.qs/Desktop/DE list/PSC_I_up_IgG_plasma.csv", "/Users/s.qs/Desktop/DE list/UC_I_up_IgG_plasma.csv") 78 | MAIT <- DE_list(MAIT, "MAIT", "/Users/s.qs/Desktop/DE list/MAIT/PSC_I_up_MAIT.csv", "/Users/s.qs/Desktop/DE list/MAIT/UC_I_up_MAIT.csv") 79 | Myofibroblasts <- DE_list(Myofibroblasts, "Myofibroblasts", "/Users/s.qs/Desktop/DE list/Myofibroblasts/PSC_I_up_Myofibroblasts.csv", "/Users/s.qs/Desktop/DE list/Myofibroblasts/UC_I_up_Myofibroblasts.csv") 80 | DUOX2_enterocyte <- DE_list(DUOX2_enterocyte, "DUOX2_enterocyte", "/Users/s.qs/Desktop/DE list/DUOX2_enterocyte/PSC_I_up_DUOX2_enterocyte.csv", "/Users/s.qs/Desktop/DE list/DUOX2_enterocyte/UC_I_up_DUOX2_enterocyte.csv") 81 | Mature_B <- DE_list(Mature_B, "Mature_B", "/Users/s.qs/Desktop/DE list/Mature_B/PSC_I_up_Mature_B.csv", "/Users/s.qs/Desktop/DE list/Mature_B/UC_I_up_Mature_B.csv") 82 | PLCG2_TA <- DE_list(PLCG2_TA, "PLCG2_TA", "/Users/s.qs/Desktop/DE list/PLCG2_TA/PSC_I_up_PLCG2_TA.csv", "/Users/s.qs/Desktop/DE list/PLCG2_TA/UC_I_up_PLCG2_TA.csv") 83 | REG_TA <- DE_list(REG_TA, "REG_TA", "/Users/s.qs/Desktop/DE list/REG_TA/PSC_I_up_REG_TA.csv", "/Users/s.qs/Desktop/DE list/REG_TA/UC_I_up_REG_TA.csv") 84 | RSPO3_fibroblast <- DE_list(RSPO3_fibroblast, "RSPO3_fibroblast", "/Users/s.qs/Desktop/DE list/RSPO3_fibroblast/PSC_I_up_RSPO3_fibroblast.csv", "/Users/s.qs/Desktop/DE list/RSPO3_fibroblast/UC_I_up_RSPO3_fibroblast.csv") 85 | Stem <- DE_list(Stem, "Stem", "/Users/s.qs/Desktop/DE list/Stem/PSC_I_up_Stem.csv", "/Users/s.qs/Desktop/DE list/Stem/UC_I_up_Stem.csv") 86 | IgM_plasma <- DE_list(IgM_plasma, "IgM_plasma", "/Users/s.qs/Desktop/DE list/IgM_plasma/PSC_I_up_IgM_plasma.csv", "/Users/s.qs/Desktop/DE list/IgM_plasma/UC_I_up_IgM_plasma.csv") 87 | DC <- DE_list(DC, "DC", "/Users/s.qs/Desktop/DE list/DC/PSC_I_up_DC.csv", "/Users/s.qs/Desktop/DE list/DC/UC_I_up_DC.csv") 88 | Treg <- DE_list(Treg, "Treg", "/Users/s.qs/Desktop/DE list/Treg/PSC_I_up_TregC.csv", "/Users/s.qs/Desktop/DE list/Treg/UC_I_up_Treg.csv") 89 | Activated_B <- DE_list(Activated_B, "Activated_B", "/Users/s.qs/Desktop/DE list/Activated_B/PSC_I_up_Activated_B.csv", "/Users/s.qs/Desktop/DE list/Activated_B/UC_I_up_Activated_B.csv") 90 | Macrophage <- DE_list(Macrophage, "Macrophage", "/Users/s.qs/Desktop/DE list/Macrophage/PSC_I_up_Macrophage.csv", "/Users/s.qs/Desktop/DE list/Macrophage/UC_I_up_Macrophage.csv") 91 | Tuft <- DE_list(Tuft, "Tuft", "/Users/s.qs/Desktop/DE list/Tuft/PSC_I_up_Tuft.csv", "/Users/s.qs/Desktop/DE list/Tuft/UC_I_up_Tuft.csv") 92 | Enteroendocrine <- DE_list(Enteroendocrine, "Enteroendocrine", "/Users/s.qs/Desktop/DE list/Enteroendocrine/PSC_I_up_Enteroendocrine.csv", "/Users/s.qs/Desktop/DE list/Enteroendocrine/UC_I_up_Enteroendocrine.csv") 93 | Memory_CD4T <- DE_list(Memory_CD4T, "Memory_CD4T", "/Users/s.qs/Desktop/DE list/Memory_CD4T/PSC_I_up_Memory_CD4T.csv", "/Users/s.qs/Desktop/DE list/Memory_CD4T/UC_I_up_Memory_CD4T.csv") 94 | BEST4_enterocyte <- DE_list(BEST4_enterocyte, "BEST4_enterocyte", "/Users/s.qs/Desktop/DE list/BEST4_enterocyte/PSC_I_up_BEST4_enterocyte.csv", "/Users/s.qs/Desktop/DE list/BEST4_enterocyte/UC_I_up_BEST4_enterocyte.csv") 95 | Endothelial <- DE_list(Endothelial, "Endothelial", "/Users/s.qs/Desktop/DE list/Endothelial/PSC_I_up_Endothelial.csv", "/Users/s.qs/Desktop/DE list/Endothelial/UC_I_up_Endothelial.csv") 96 | WNT5B_fibroblast <- DE_list(WNT5B_fibroblast, "WNT5B_fibroblast", "/Users/s.qs/Desktop/DE list/WNT5B_fibroblast/PSC_I_up_WNT5B_fibroblast.csv", "/Users/s.qs/Desktop/DE list/WNT5B_fibroblast/UC_I_up_WNT5B_fibroblast.csv") 97 | Pericytes <- DE_list(Pericytes, "Pericytes", "/Users/s.qs/Desktop/DE list/Pericytes/PSC_I_up_Pericytes.csv", "/Users/s.qs/Desktop/DE list/Pericytes/UC_I_up_Pericytes.csv") 98 | Inflammatory_fibroblast <- DE_list(Inflammatory_fibroblast, "Inflammatory_fibroblast", "/Users/s.qs/Desktop/DE list/Inflammatory_fibroblast/PSC_I_up_Inflammatory_fibroblast.csv", "/Users/s.qs/Desktop/DE list/Inflammatory_fibroblast/UC_I_up_Inflammatory_fibroblast.csv") 99 | WNT2B_fibroblast <- DE_list(WNT2B_fibroblast, "WNT2B_fibroblast", "/Users/s.qs/Desktop/DE list/WNT2B_fibroblast/PSC_I_up_WNT2B_fibroblast.csv", "/Users/s.qs/Desktop/DE list/WNT2B_fibroblast/UC_I_up_WNT2B_fibroblast.csv") 100 | Activated_cycling_B <- DE_list(Activated_cycling_B, "Activated_cycling_B", "/Users/s.qs/Desktop/DE list/Activated_cycling_B/PSC_I_up_Activated_cycling_B.csv", "/Users/s.qs/Desktop/DE list/Activated_cycling_B/UC_I_up_Activated_cycling_B.csv") 101 | Inflammatory_monocyte <- DE_list(Inflammatory_monocyte, "Inflammatory_monocyte", "/Users/s.qs/Desktop/DE list/Inflammatory_monocyte/PSC_I_up_Inflammatory_monocyte.csv", "/Users/s.qs/Desktop/DE list/Inflammatory_monocyte/UC_I_up_Inflammatory_monocyte.csv") 102 | Absorptive_TA <- DE_list(Absorptive_TA, "Absorptive_TA", "/Users/s.qs/Desktop/DE list/Absorptive_TA/PSC_I_up_Absorptive_TA.csv", "/Users/s.qs/Desktop/DE list/Absorptive_TA/UC_I_up_Absorptive_TA.csv") 103 | Ribo_TA <- DE_list(Ribo_TA, "Ribo_TA", "/Users/s.qs/Desktop/DE list/Ribo_TA/PSC_I_up_Ribo_TA.csv", "/Users/s.qs/Desktop/DE list/Ribo_TA/UC_I_up_Ribo_TA.csv") 104 | Cycling_T <- DE_list(Cycling_T, "Cycling_T", "/Users/s.qs/Desktop/DE list/Cycling_T/PSC_I_up_Cycling_T.csv", "/Users/s.qs/Desktop/DE list/Cycling_T/UC_I_up_Cycling_T.csv") 105 | Goblet <- DE_list(Goblet, "Goblet", "/Users/s.qs/Desktop/DE list/Goblet/PSC_I_up_Goblet.csv", "/Users/s.qs/Desktop/DE list/Goblet/UC_I_up_Goblet.csv") 106 | Immature_goblet <- DE_list(Immature_goblet, "Immature_goblet", "/Users/s.qs/Desktop/DE list/Immature_goblet/PSC_I_up_Immature_goblet.csv", "/Users/s.qs/Desktop/DE list/Immature_goblet/UC_I_up_Immature_goblet.csv") 107 | MAST <- DE_list(MAST, "MAST", "/Users/s.qs/Desktop/DE list/MAST/PSC_I_up_MAST.csv", "/Users/s.qs/Desktop/DE list/MAST/UC_I_up_MAST.csv") 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | -------------------------------------------------------------------------------- /DDTx/ddtx_compartment_assignment_demuxlet.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "ddtx_compartment_assignment_demuxlet" 3 | author: "adapted from Roy Oelen" 4 | date: "2023-06-16" 5 | output: html_document 6 | --- 7 | 8 | ```{r header, include=FALSE} 9 | ############################################################################################################################ 10 | # Authors: Roy Oelen 11 | # Name: ddtx_compartment_assignment.Rmd 12 | # Function: assign compartments to the cells 13 | ############################################################################################################################ 14 | ``` 15 | 16 | ```{r libraries, include=FALSE} 17 | knitr::opts_chunk$set(echo = FALSE) 18 | #################### 19 | # libraries # 20 | #################### 21 | 22 | # for the object containing meta and count data 23 | library(Seurat) 24 | # for plotting 25 | library(ggplot2) 26 | library(cowplot) 27 | 28 | ``` 29 | 30 | ```{r functions, include=FALSE} 31 | knitr::opts_chunk$set(echo = FALSE) 32 | #################### 33 | # Functions # 34 | #################### 35 | 36 | get_average_expression_per_group <- function(seurat_object, metadata_column, genes, use_sct=F){ 37 | # initialize the table 38 | expression_table <- NULL 39 | # check each group 40 | for(group in unique(seurat_object@meta.data[[metadata_column]])){ 41 | # we won't do NA, obviously 42 | if(!is.na(group)){ 43 | # subset to that group 44 | seurat_object_group <- seurat_object[, !is.na(seurat_object@meta.data[[metadata_column]]) & seurat_object@meta.data[[metadata_column]] == group] 45 | # put per gene in a list 46 | exp_per_list <- list() 47 | # check each gene 48 | for(gene in genes){ 49 | # get the mean expression 50 | mean_expression <- NULL 51 | # depending on the assay 52 | if(use_sct){ 53 | mean_expression <- mean(as.vector(unlist(seurat_object_group@assays$SCT@counts[gene, ]))) 54 | } 55 | else{ 56 | mean_expression <- mean(as.vector(unlist(seurat_object_group@assays$RNA@data[gene, ]))) 57 | } 58 | # put in the list 59 | exp_per_list[[gene]] <- mean_expression 60 | } 61 | # turn into a row 62 | exp_row <- data.frame(exp_per_list) 63 | # add the group 64 | exp_row[['group']] <- group 65 | # set the correct order 66 | exp_row <- exp_row[, c('group', genes)] 67 | # add to the big matrix 68 | if(is.null(expression_table)){ 69 | expression_table <- exp_row 70 | } 71 | else{ 72 | expression_table <- rbind(expression_table, exp_row) 73 | } 74 | } 75 | } 76 | return(expression_table) 77 | } 78 | 79 | 80 | ``` 81 | 82 | ```{r setup, include=FALSE} 83 | knitr::opts_chunk$set(echo = FALSE) 84 | #################### 85 | # Main Code # 86 | #################### 87 | 88 | # locations of the objects 89 | seurat_objects_loc <- '/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/' 90 | seurat_object_clustered_loc <- paste(seurat_objects_loc, 'ddtx_merged_demultiplexed_clustered_demuxlet.rds', sep = '') 91 | seurat_object_compartment_loc <- paste(seurat_objects_loc, 'ddtx_merged_demultiplexed_clustered_compartment_demuxlet.rds', sep = '') 92 | # location of the tables 93 | tables_loc <- '/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/cell-type-classification/marker_genes/tables/' 94 | 95 | ``` 96 | 97 | ```{r read_object, include=FALSE} 98 | knitr::opts_chunk$set(echo = FALSE) 99 | 100 | # read the object 101 | ddtx <- readRDS(seurat_object_clustered_loc) 102 | 103 | ``` 104 | 105 | ```{r avg_cluster_expression, include=FALSE} 106 | knitr::opts_chunk$set(echo = FALSE) 107 | 108 | # get the mean expression of marker genes per cluster 109 | avg_exp_cluster <- get_average_expression_per_group(ddtx, 'seurat_clusters', c('EPCAM', 'PTPRC', 'THY1', 'SOX10', 'MADCAM1', 'CD27')) 110 | # write this somewhere 111 | write.table(avg_exp_cluster, paste(tables_loc, 'cluster_mean_exp_markers.tsv', sep = ''), row.names = F, col.names = T) 112 | 113 | ``` 114 | 115 | ```{r create_features_plots, include=FALSE} 116 | knitr::opts_chunk$set(echo = FALSE) 117 | 118 | # switch to the RNA slot, that is the one we also used in previous studies for this 119 | DefaultAssay(ddtx) <- 'RNA' 120 | # create the features plots 121 | epcam_feature <- FeaturePlot(ddtx, reduction = 'umap', features = c('EPCAM')) 122 | cd45_feature <- FeaturePlot(ddtx, reduction = 'umap', features = c('PTPRC')) 123 | cd27_feature <- FeaturePlot(ddtx, reduction = 'umap', features = c('CD27')) 124 | thy1_feature <- FeaturePlot(ddtx, reduction = 'umap', features = c('THY1')) 125 | sox10_feature <- FeaturePlot(ddtx, reduction = 'umap', features = c('SOX10')) 126 | madcam1_feature <- FeaturePlot(ddtx, reduction = 'umap', features = c('MADCAM1')) 127 | 128 | ``` 129 | 130 | ## marker gene features 131 | ```{r show_features_plots, include=TRUE, fig.width=10, fig.height=10} 132 | knitr::opts_chunk$set(echo = FALSE) 133 | 134 | # show the features 135 | plot_grid(epcam_feature, cd45_feature, cd27_feature, thy1_feature, sox10_feature, madcam1_feature) 136 | 137 | ``` 138 | 139 | ```{r positivity_assignment, include=FALSE} 140 | knitr::opts_chunk$set(echo = FALSE) 141 | 142 | # set the positivity for the markers 143 | ddtx@meta.data$epcam_positive <- NA 144 | ddtx@meta.data[ddtx@meta.data[['seurat_clusters']] %in% avg_exp_cluster[avg_exp_cluster[['EPCAM']] >= 1, 'group'], 'epcam_positive'] <- T 145 | ddtx@meta.data[ddtx@meta.data[['seurat_clusters']] %in% avg_exp_cluster[avg_exp_cluster[['EPCAM']] < 1, 'group'], 'epcam_positive'] <- F 146 | ddtx@meta.data$cd45_positive <- NA 147 | ddtx@meta.data[ddtx@meta.data[['seurat_clusters']] %in% avg_exp_cluster[avg_exp_cluster[['PTPRC']] >= 1, 'group'], 'cd45_positive'] <- T 148 | ddtx@meta.data[ddtx@meta.data[['seurat_clusters']] %in% avg_exp_cluster[avg_exp_cluster[['PTPRC']] < 1, 'group'], 'cd45_positive'] <- F 149 | ddtx@meta.data$thy1_positive <- NA 150 | ddtx@meta.data[ddtx@meta.data[['seurat_clusters']] %in% avg_exp_cluster[avg_exp_cluster[['THY1']] >= 0.5, 'group'], 'thy1_positive'] <- T 151 | ddtx@meta.data[ddtx@meta.data[['seurat_clusters']] %in% avg_exp_cluster[avg_exp_cluster[['THY1']] < 0.5, 'group'], 'thy1_positive'] <- F 152 | ddtx@meta.data$sox10_positive <- NA 153 | ddtx@meta.data[ddtx@meta.data[['seurat_clusters']] %in% avg_exp_cluster[avg_exp_cluster[['SOX10']] >= 0.5, 'group'], 'sox10_positive'] <- T 154 | ddtx@meta.data[ddtx@meta.data[['seurat_clusters']] %in% avg_exp_cluster[avg_exp_cluster[['SOX10']] < 0.5, 'group'], 'sox10_positive'] <- F 155 | ddtx@meta.data$madcam1_positive <- NA 156 | ddtx@meta.data[ddtx@meta.data[['seurat_clusters']] %in% avg_exp_cluster[avg_exp_cluster[['MADCAM1']] >= 0.2, 'group'], 'madcam1_positive'] <- T 157 | ddtx@meta.data[ddtx@meta.data[['seurat_clusters']] %in% avg_exp_cluster[avg_exp_cluster[['MADCAM1']] < 0.2, 'group'], 'madcam1_positive'] <- F 158 | ddtx@meta.data$cd27_positive <- NA 159 | ddtx@meta.data[ddtx@meta.data[['seurat_clusters']] %in% avg_exp_cluster[avg_exp_cluster[['CD27']] >= 0.3, 'group'], 'cd27_positive'] <- T 160 | ddtx@meta.data[ddtx@meta.data[['seurat_clusters']] %in% avg_exp_cluster[avg_exp_cluster[['CD27']] < 0.3, 'group'], 'cd27_positive'] <- F 161 | 162 | # and create the plots 163 | epcam_positive <- DimPlot(ddtx, reduction = 'umap', group.by = 'epcam_positive') + scale_color_manual(values = c('gray', 'red')) 164 | cd45_positive <- DimPlot(ddtx, reduction = 'umap', group.by = 'cd45_positive') + scale_color_manual(values = c('gray', 'red')) 165 | cd27_positive <- DimPlot(ddtx, reduction = 'umap', group.by = 'cd27_positive') + scale_color_manual(values = c('gray', 'red')) 166 | thy1_positive <- DimPlot(ddtx, reduction = 'umap', group.by = 'thy1_positive') + scale_color_manual(values = c('gray', 'red')) 167 | sox10_positive <- DimPlot(ddtx, reduction = 'umap', group.by = 'sox10_positive') + scale_color_manual(values = c('gray', 'red')) 168 | madcam1_positive <- DimPlot(ddtx, reduction = 'umap', group.by = 'madcam1_positive') + scale_color_manual(values = c('gray', 'red')) 169 | 170 | ``` 171 | 172 | ### marker genes and the positive clusters 173 | ```{r show_positivity, include=TRUE, fig.width=10, fig.height=30} 174 | knitr::opts_chunk$set(echo = FALSE) 175 | 176 | # view everything 177 | plot_grid(epcam_feature, epcam_positive, 178 | cd45_feature, cd45_positive, 179 | cd27_feature, cd27_positive, 180 | thy1_feature, thy1_positive, 181 | sox10_feature, sox10_positive, 182 | madcam1_feature, madcam1_positive, 183 | ncol = 2, nrow = 6) 184 | 185 | ``` 186 | 187 | ```{r compartment_assignment, include=FALSE} 188 | knitr::opts_chunk$set(echo = FALSE) 189 | 190 | # assign the compartment 191 | ddtx@meta.data$compartment <- 'none' 192 | ddtx@meta.data[ddtx@meta.data$epcam_positive == T, 'compartment'] <- 'epithelial' 193 | ddtx@meta.data[ddtx@meta.data$cd45_positive == T | 194 | ddtx@meta.data$cd27_positive == T, 'compartment'] <- 'immune' 195 | ddtx@meta.data[ddtx@meta.data$thy1_positive == T | 196 | ddtx@meta.data$sox10_positive == T | 197 | ddtx@meta.data$madcam1_positive == T, 'compartment'] <- 'stromal' 198 | 199 | ``` 200 | 201 | ```{r create_compartment_plot, include=FALSE} 202 | knitr::opts_chunk$set(echo = FALSE) 203 | 204 | # create the plot of the compartment assignment 205 | p_compartment <- DimPlot(ddtx, reduction = 'umap', group.by = 'compartment', label = FALSE, label.size = 3, repel = TRUE, raster = FALSE) + ggtitle('compartment') 206 | 207 | ``` 208 | 209 | ### final compartment assignment 210 | 211 | ```{r show_plots, include = TRUE, fig.width=10, fig.height=10} 212 | knitr::opts_chunk$set(echo = FALSE) 213 | 214 | p_compartment 215 | ``` 216 | 217 | ```{r save, include=FALSE} 218 | # save the result 219 | saveRDS(ddtx, seurat_object_compartment_loc) 220 | ``` 221 | -------------------------------------------------------------------------------- /DDTx/cell_number_tables.py: -------------------------------------------------------------------------------- 1 | ### original script by R. Oelen 2 | 3 | import tensorflow as tf 4 | import pandas as pd 5 | import csv 6 | import pickle as pkl 7 | import matplotlib.pyplot as plt 8 | from sccoda.util import cell_composition_data as dat 9 | from sccoda.util import data_visualization as viz 10 | from sccoda.util import comp_ana as mod 11 | import os 12 | 13 | # create class we will use 14 | class ProportionAnalyzer: 15 | """ 16 | Object to load the count data, set parameters and fetch results 17 | """ 18 | def __init__(self, count_data_loc, treatment_column='donor_recipient', exclusion_columns='ACR_grade', covariates_columns=[], fdr=0.05, sep='\t', reference=None): 19 | """constructor 20 | 21 | Parameters 22 | ---------- 23 | count_data_loc : str 24 | The location of the count data to load 25 | treatment_column : str, optional 26 | The column that contains the data upon which to compare conditions or treatments 27 | exclusion_columns : list, optional 28 | The columns that are removed from the count data (for example covariates that are not used, or cell types to exclude) 29 | covariates_columns : list, optional 30 | The columns that contain the covariates to use 31 | fdr : float, optional 32 | The FDR cutoff to use 33 | sep : str, optional 34 | The separator for reading the count data 35 | 36 | """ 37 | # set up class variables from constructor 38 | self.__count_data_loc = count_data_loc 39 | self.__treatment_column = treatment_column 40 | self.__exclusion_columns = exclusion_columns 41 | self.__covariates_columns = covariates_columns 42 | self.__fdr = fdr 43 | self.__sep = sep 44 | # other variables that will be populated later 45 | self.__raw_counts = None 46 | self.__filtered_counts = None 47 | self.__sccoda = None 48 | self.__reference = reference 49 | # set up the count data 50 | self.__setup_count_data() 51 | # set up the reference 52 | if self.__reference is None: 53 | self.__setup_reference() 54 | # of course we will at some point have a result 55 | self.__has_run = False 56 | self.__formula = None 57 | self.__model = None 58 | self.__result = None 59 | # set up a seed so that the results are always the same 60 | tf.random.uniform([1], seed=7777) 61 | tf.random.set_seed(7777) 62 | 63 | def __setup_reference(self): 64 | """set up the reference for the model 65 | 66 | """ 67 | # get the unique entries possible 68 | levels = self.__filtered_counts[self.__treatment_column].unique() 69 | # the first level is the reference 70 | self.__reference = levels[0] 71 | 72 | 73 | def __make_columns_safe(self): 74 | """rename columns so that they don't contain quotes 75 | 76 | """ 77 | # get the current column names 78 | current_colnames = self.__raw_counts.columns 79 | # create a new list of columns 80 | new_colnames = [] 81 | # replace each occurence 82 | for old_column in current_colnames: 83 | # replace 84 | new_column = old_column.replace('"', '') 85 | # add to list 86 | new_colnames.append(new_column) 87 | 88 | # zip to create a mapping from the old to the new column names 89 | columnname_mapping = dict(zip(current_colnames, new_colnames)) 90 | # and do the actual renaming 91 | self.__raw_counts.rename(columnname_mapping, axis = 'columns', inplace = True) 92 | 93 | 94 | def __setup_count_data(self): 95 | """setup the count data to be used for the analysis 96 | 97 | """ 98 | # read the count data 99 | self.__raw_counts = pd.read_csv(self.__count_data_loc, sep = self.__sep, quoting = csv.QUOTE_NONE) 100 | # make the columns safe 101 | self.__make_columns_safe() 102 | # make a copy from which we will remove some columns 103 | self.__filtered_counts = self.__raw_counts.copy(deep = True) 104 | # remove the columns that we don't need 105 | self.__filtered_counts.drop(self.__exclusion_columns, axis = 'columns', inplace = True) 106 | # set up the sccoda object 107 | self.__sccoda = dat.from_pandas(self.__filtered_counts, covariate_columns = self.__covariates_columns + [self.__treatment_column]) 108 | 109 | 110 | def create_boxplot(self): 111 | """plot the counts 112 | 113 | """ 114 | # create the plot 115 | viz.boxplots(self.__sccoda, feature_name = self.__treatment_column) 116 | plt.tight_layout() 117 | 118 | 119 | def run_model(self): 120 | """run the model 121 | 122 | """ 123 | # set up the covariates 124 | covariates_formula = '' 125 | for covariate in self.__covariates_columns: 126 | covariates_formula = ''.join([covariates_formula, covariate, ' + ']) 127 | # set up treatment 128 | treatment_formula = "C(" + self.__treatment_column + ", Treatment('" + self.__reference + "'))" 129 | # create formula 130 | self.__formula = covariates_formula + treatment_formula 131 | # create the model 132 | self.__model = mod.CompositionalAnalysis(self.__sccoda, formula = self.__formula) 133 | # run the analysis 134 | self.__result = self.__model.sample_hmc() 135 | # we ran the analysis, so let's say so 136 | self.__has_run = True 137 | 138 | 139 | def get_result(self): 140 | """return the result of the model 141 | 142 | Returns 143 | ------- 144 | result 145 | The result of the compositional analysis 146 | """ 147 | # check if we have run the model 148 | if self.__has_run: 149 | return self.__result 150 | else: 151 | print('model has not been run, returning None') 152 | return None 153 | 154 | 155 | def get_summary(self, fdr=None): 156 | """get a summary of the results for a specific FDR 157 | 158 | Parameters 159 | ---------- 160 | fdr : float, optional 161 | The FDR cutoff to use 162 | 163 | Returns 164 | ------- 165 | result 166 | summary of the compositional analysis 167 | """ 168 | # see if an fdr was supplied 169 | fdr_to_use=None 170 | if fdr is None: 171 | fdr_to_use = self.__fdr 172 | else: 173 | fdr_to_use = fdr 174 | 175 | # check if we have run the model 176 | if self.__has_run: 177 | # if so, return a summary 178 | summary = self.__result.summary_prepare([fdr_to_use]) 179 | return(summary) 180 | else: 181 | print('model has not been run, returning None') 182 | return None 183 | 184 | 185 | def get_raw_counts(self): 186 | """get the raw counts 187 | 188 | Returns 189 | ------- 190 | pandas.DataFrame 191 | The raw counts before filtering columns 192 | 193 | """ 194 | return self.__raw_counts 195 | 196 | 197 | def get_counts(self): 198 | """get the cell counts 199 | 200 | Returns 201 | ------- 202 | pandas.Dataframe 203 | The filtered cell counts 204 | 205 | """ 206 | return self.__filtered_counts 207 | 208 | 209 | def get_sccoda(self): 210 | """get the sccoda input object 211 | 212 | Returns 213 | ------- 214 | sccoda 215 | The filtered cell counts 216 | 217 | """ 218 | return self.__sccoda 219 | 220 | 221 | def get_fdr(self): 222 | """get the fdr set 223 | 224 | Returns 225 | ------- 226 | float 227 | The fdr used 228 | 229 | """ 230 | return self.__fdr 231 | 232 | 233 | 234 | 235 | # the locations of the cell proportion files 236 | proportions_table_loc = '/xx/ddtx/ongoing/differential_proportion/cell_number_tables/donor_recipient/' 237 | # locations of the specific cell type tables 238 | proportions_table_compartment_loc = ''.join([proportions_table_loc, 'cell_numbers_compartment_donor_recipient.tsv']) 239 | proportions_table_adult_elmentaite_martin_immune_loc = ''.join([proportions_table_loc, 'cell_numbers_adult_elmentaite_martin_immune.tsv']) 240 | # the column of the sample 241 | sample_column = 'timepoint' 242 | # the column of the inflammation status 243 | inflammation_column = 'donor_recipient' 244 | # the FDR to use 245 | fdr = 0.05 246 | 247 | 248 | 249 | # perform the analysis for the compartment 250 | compartment_analysis = ProportionAnalyzer(count_data_loc = proportions_table_compartment_loc, exclusion_columns = ['sample_status_timepoint', 'ACR_grade'], covariates_columns = [], reference = '"d"') 251 | # show the plot 252 | compartment_analysis.create_boxplot() 253 | # do the analysis 254 | compartment_analysis.run_model() 255 | # get the result 256 | compartment_summary_005 = compartment_analysis.get_summary() 257 | 258 | compartment_summary_005[0] 259 | 260 | compartment_summary_005[1] 261 | 262 | # perform the analysis for the compartment 263 | celltype_analysis = ProportionAnalyzer(count_data_loc = proportions_table_adult_elmentaite_martin_immune_loc, exclusion_columns = ['sample_status_timepoint', 'ACR_grade'], covariates_columns = [], reference = '"d"') 264 | # show the plot 265 | celltype_analysis.create_boxplot() 266 | # do the analysis 267 | celltype_analysis.run_model() 268 | # get the result 269 | celltype_summary_005 = celltype_analysis.get_summary() 270 | 271 | celltype_summary_005[0] 272 | 273 | 274 | celltype_summary_005[1] 275 | 276 | # save the results 277 | 278 | result_output_loc = '/xx/ddtx/ongoing/differential_proportion/sccoda/' 279 | # specific files 280 | result_compartment_summary_005_general_summary_loc = ''.join([result_output_loc, 'ddtx_sccoda_compartment_005', '_general_summary.tsv']) 281 | result_compartment_summary_005_covariate_summary_loc = ''.join([result_output_loc, 'ddtx_sccoda_compartment_005', '_covariate_summary.tsv']) 282 | result_celltype_summary_005_general_summary_loc = ''.join([result_output_loc, 'ddtx_sccoda_adult_elmentaite_martin_immune_005', '_general_summary.tsv']) 283 | result_celltype_summary_005_covariate_summary_loc = ''.join([result_output_loc, 'ddtx_sccoda_adult_elmentaite_martin_immune_005', '_covariate_summary.tsv']) 284 | # write the results 285 | compartment_summary_005[0].to_csv(result_compartment_summary_005_general_summary_loc, sep = '\t') 286 | compartment_summary_005[1].to_csv(result_compartment_summary_005_covariate_summary_loc, sep = '\t') 287 | celltype_summary_005[0].to_csv(result_celltype_summary_005_general_summary_loc, sep = '\t') 288 | celltype_summary_005[1].to_csv(result_celltype_summary_005_covariate_summary_loc, sep = '\t') 289 | 290 | 291 | plt.rcParams['font.size'] = '5' 292 | celltype_analysis.create_boxplot() 293 | plt.xticks(rotation=45, ha='right') 294 | plt.savefig('ddtx_celltypes_donor_recipient.png',dpi=1000) 295 | 296 | -------------------------------------------------------------------------------- /Method_comparisons/rebuttal_20210916: -------------------------------------------------------------------------------- 1 | #### script rebuttal methodspaper 2 | 3 | fresh_frozen@meta.data$Predicted_all_categories<-fresh_frozen@meta.data$Predicted_all 4 | 5 | fresh_frozen@meta.data$Predicted_all_categories[fresh_frozen@meta.data$Predicted_all == "Bcell"]<-"immune" 6 | fresh_frozen@meta.data$Predicted_all_categories[fresh_frozen@meta.data$Predicted_all == "Tcell"]<-"immune" 7 | fresh_frozen@meta.data$Predicted_all_categories[fresh_frozen@meta.data$Predicted_all == "myeloid"]<-"immune" 8 | fresh_frozen@meta.data$Predicted_all_categories[fresh_frozen@meta.data$Predicted_all == "Lymphoid"]<-"immune" 9 | 10 | fresh_frozen@meta.data$Predicted_all_categories[fresh_frozen@meta.data$Predicted_all == "endothelial"]<-"stromal" 11 | fresh_frozen@meta.data$Predicted_all_categories[fresh_frozen@meta.data$Predicted_all == "Glia"]<-"stromal" 12 | fresh_frozen@meta.data$Predicted_all_categories[fresh_frozen@meta.data$Predicted_all == "fibroblast"]<-"stromal" 13 | 14 | 15 | VlnPlot(fresh_frozen, group.by="Predicted_all_categories", features="percent.mt", split.by="preservation") 16 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/fresh_frozen_mito_cell_categories.png", width = 20, height = 20, dpi = 600) 17 | 18 | VlnPlot(data, group.by="Predicted_all_categories", features="percent.mt", split.by="method") 19 | 20 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/fresh_frozen_mito_cell_categories_noNA.png", width = 20, height = 20, dpi = 600) 21 | 22 | plot1 <- FeatureScatter(fresh_frozen, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="Predicted_all_categories") 23 | 24 | immune<-subset(fresh_frozen, subset=Predicted_all_categories=="immune") 25 | epithelium<-subset(fresh_frozen, subset=Predicted_all_categories=="epithelium") 26 | stromal<-subset(fresh_frozen, subset=Predicted_all_categories=="stromal") 27 | 28 | fresh<-subset(fresh_frozen, subset=preservation=="fresh") 29 | frozen<-subset(fresh_frozen, subset=preservation=="cryo") 30 | 31 | plot1 <- FeatureScatter(fresh, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="Predicted_all_categories", pt.size=1)+xlim(-1,(3*10^5))+ylim(-1,12000) 32 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/fresh_nCountRNA_nGene_celltypes.pdf", width = 10, height = 10, dpi = 600) 33 | 34 | 35 | plot1 <- FeatureScatter(frozen, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="Predicted_all_categories", pt.size=1)+xlim(-1,(3*10^5))+ylim(-1,12000) 36 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/frozen_nCountRNA_nGene_celltypes.pdf", width = 10, height = 10, dpi = 600) 37 | 38 | 39 | plot1 <- FeatureScatter(fresh, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="Predicted_all_categories", pt.size=1)+xlim(-1,(3*10^5))+ylim(-1,60) 40 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/fresh_nCountRNA_percentMt_celltypes.pdf", width = 10, height = 10, dpi = 600) 41 | 42 | 43 | plot1 <- FeatureScatter(frozen, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="Predicted_all_categories", pt.size=1)+xlim(-1,(3*10^5))+ylim(-1,60) 44 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/frozen_nCountRNA_percentMt_celltypes.pdf", width = 10, height = 10, dpi = 600) 45 | 46 | VlnPlot(frozen, feature="percent.mt", group.by="Predicted_all") 47 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/frozen_vpln_PercentMt_allcelltypes.pdf", width = 10, height = 10, dpi = 600) 48 | 49 | 50 | plot1 <- FeatureScatter(immune, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="preservation", pt.size=1) 51 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/fresh_frozen_nCountRNA_PercentMt_immune.png", width = 20, height = 20, dpi = 600) 52 | 53 | plot1 <- FeatureScatter(stromal, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="preservation", pt.size=1) 54 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/fresh_frozen_nCountRNA_PercentMt_stromal.png", width = 20, height = 20, dpi = 600) 55 | 56 | plot1 <- FeatureScatter(epithelium, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="preservation", pt.size=1) 57 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/fresh_frozen_nCountRNA_PercentMt_epithelium.png", width = 20, height = 20, dpi = 600) 58 | 59 | plot2 <- FeatureScatter(immune, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="preservation", pt.size=1) 60 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/fresh_frozen_nCountRNA_nFeature_immune.png", width = 20, height = 20, dpi = 600) 61 | 62 | plot2 <- FeatureScatter(stromal, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="preservation", pt.size=1) 63 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/fresh_frozen_nCountRNA_nFeature_stromal.png", width = 20, height = 20, dpi = 600) 64 | 65 | plot2 <- FeatureScatter(epithelium, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="preservation", pt.size=1) 66 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/fresh_frozen_nCountRNA_nFeature_epithelium.png", width = 20, height = 20, dpi = 600) 67 | 68 | sanger<-subset.. 69 | 70 | 71 | 72 | ##### 73 | wholecollagenase<-subset(methodspaper, subset=method=="wholecollagenase") 74 | splitcollagenase<-subset(methodspaper, subset=method=="splitcollagenase") 75 | splitprotease<-subset(methodspaper, subset=method=="splitprotease") 76 | splitprotease_epi<-subset(methodspaper, subset=method=="splitprotease_epi") 77 | 78 | plot1 <- FeatureScatter(wholecollagenase, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="Predicted_all_categories", pt.size=1)+xlim(-1,(3*10^5))+ylim(-1,12000) 79 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/wholecollagenase_nCountRNA_nGene_celltypes.pdf", width = 10, height = 10, dpi = 600) 80 | 81 | plot1 <- FeatureScatter(wholecollagenase, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="Predicted_all_categories", pt.size=1)+xlim(-1,(3*10^5))+ylim(-1,60) 82 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/wholecollagenase_nCountRNA_percentMt_celltypes.pdf", width = 10, height = 10, dpi = 600) 83 | 84 | plot1 <- FeatureScatter(splitcollagenase, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="Predicted_all_categories", pt.size=1)+xlim(-1,(3*10^5))+ylim(-1,12000) 85 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/splitcollagenase_nCountRNA_nGene_celltypes.pdf", width = 10, height = 10, dpi = 600) 86 | 87 | plot1 <- FeatureScatter(splitcollagenase, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="Predicted_all_categories", pt.size=1)+xlim(-1,(3*10^5))+ylim(-1,60) 88 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/splitcollagenase_nCountRNA_percentMt_celltypes.pdf", width = 10, height = 10, dpi = 600) 89 | 90 | 91 | plot1 <- FeatureScatter(splitprotease, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="Predicted_all_categories", pt.size=1)+xlim(-1,(3*10^5))+ylim(-1,12000) 92 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/splitprotease_nCountRNA_nGene_celltypes.pdf", width = 10, height = 10, dpi = 600) 93 | 94 | plot1 <- FeatureScatter(splitprotease, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="Predicted_all_categories", pt.size=1)+xlim(-1,(3*10^5))+ylim(-1,60) 95 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/splitprotease_nCountRNA_percentMt_celltypes.pdf", width = 10, height = 10, dpi = 600) 96 | 97 | plot1 <- FeatureScatter(splitprotease_epi, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="Predicted_all_categories", pt.size=1)+xlim(-1,(3*10^5))+ylim(-1,12000) 98 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/splitprotease_epi_nCountRNA_nGene_celltypes.pdf", width = 10, height = 10, dpi = 600) 99 | 100 | plot1 <- FeatureScatter(splitprotease_epi, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="Predicted_all_categories", pt.size=1)+xlim(-1,(3*10^5))+ylim(-1,60) 101 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/splitprotease_epi_nCountRNA_percentMt_celltypes.pdf", width = 10, height = 10, dpi = 600) 102 | 103 | 104 | #### 105 | #calculate genes expressed i 10% cells 106 | lp<-subset(smillie, subset=Location=="LP") 107 | dim(lp) 108 | epi<-subset(smillie, subset=Location=="Epi") 109 | dim(epi) 110 | x<-CreateSeuratObject(counts =lp@assays$RNA@counts, min.cells=262) 111 | dim(x) 112 | y<-CreateSeuratObject(counts =epi@assays$RNA@counts, min.cells=267) 113 | dim(y) 114 | genes_fresh<-rownames(x@assays$RNA@counts) 115 | head(genes_fresh) 116 | genes_fresh<-data.frame(rownames(x@assays$RNA@counts)) 117 | genes_frozen<-data.frame(rownames(y@assays$RNA@counts)) 118 | head(genes_fresh) 119 | colnames(genes_fresh)[1]<-"Gene" 120 | colnames(genes_frozen)[1]<-"Gene" 121 | z<-merge(genes_fresh,genes_frozen, by="Gene", all=T) 122 | dim(z) 123 | 124 | 125 | ###### selecting PCs 126 | 127 | methodspaper<-readRDS("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/Processed_data/SCT_100smillie_noNA.rds") 128 | smillie<-subset(methodspaper, subset=Predicted_all=="epithelium") 129 | smillie<-subset(smillie, subset=method=="splitcollagenase") 130 | 131 | DefaultAssay(smillie)<-"RNA" 132 | Idents(smillie)<-"Location" 133 | smillie<-subset(smillie, downsample=2671) 134 | smillie <- NormalizeData(smillie) 135 | all.genes <- rownames(smillie) 136 | smillie <- ScaleData(smillie, features = all.genes) 137 | smillie <- RunPCA(smillie, features = all.genes) 138 | #DimPlot(smillie, reduction = "pca") 139 | #DimPlot(smillie, reduction = "pca", group.by="Location") 140 | #ggsave("Methodspaper/PCA1_2_smillie_epi.png", width = 8, height = 8, dpi = 600) 141 | DE_epi<-FindMarkers(smillie, test.use="MAST", ident.1="epi", ident.2="lp", ) 142 | 143 | ### 144 | #patient specific DE fresh_frozen (not done yet) 145 | 146 | sanger<-subset(fresh_frozen,subset=orig.ident=="Sanger") 147 | takeda<-subset(fresh_frozen,subset=orig.ident=="Takeda") 148 | 149 | Tcells<-subset(sanger, subset = Predicted_all == "Tcell") 150 | Bcells<-subset(sanger, subset = Predicted_all == "Bcell") 151 | Fibroblasts<-subset(sanger, subset = Predicted_all == "fibroblast") 152 | Myeloid<-subset(sanger, subset = Predicted_all == "myeloid") 153 | Epithelium<-subset(sanger, subset = Predicted_all == "epithelium") 154 | Endothelial<-subset(sanger, subset = Predicted_all == "endothelial") 155 | Lymphoid<-subset(sanger, subset = Predicted_all == "Lymphoid") 156 | Glia<-subset(sanger, subset = Predicted_all == "Glia") 157 | 158 | Tcell_fresh_frozen_sanger<-FindMarkers(Tcells, test.use="MAST", ident.1="fresh", ident.2="cryo") 159 | write.csv(Tcell_fresh_frozen_sanger, "/groups/umcg-weersma/tmp01/singelcell/Methodspaper/Tcell_fresh_frozen_sanger.csv") 160 | 161 | 162 | 163 | 164 | 165 | -------------------------------------------------------------------------------- /old_stuff/merge_ctl_10x_cd.md: -------------------------------------------------------------------------------- 1 | # **TITLE: Compare cytotoxic cell dataset from 10X, and compare with 'own' CTLs** 2 | 3 | Author: WTC 4 | Date: 20180629 5 | 6 | # Description: 7 | using this script in R, one can merge two datasets from different sources (i.e. a subset of our CD dataset and a publicly available 10X genomics dataset) and explore gene expression differences between those 8 | 9 | # Libraries 10 | library(Seurat) 11 | library(Matrix) 12 | #library(Matrix.utils) 13 | library(ggplot2) 14 | #library(pryr) 15 | 16 | 17 | # Functions 18 | 19 | # download Cytotoxic T cell gene/cell matrix (raw) from https://support.10xgenomics.com/single-cell-gene-expression/datasets/1.1.0/cytotoxic_t 20 | CTL_10x<-Read10X(data.dir = "~/Downloads/matrices_mex/hg19/") 21 | CTL_10x<-CreateSeuratObject(raw.data = CTL_10x, min.cells = 3, project = "healthy") 22 | 23 | 24 | # extract blood Cytotoxic T cells from CD Seurat file 25 | #allcells_meta<-SetAllIdent(allcells_meta, "eight_cell_types") 26 | #CTL_CD<-WhichCells(allcells_meta, "Cytotoxic_Blood") 27 | #CTL_CD<-SubsetData(allcells_meta, CTL_CD) 28 | 29 | # filter mito genes 30 | mito.genes <- grep(pattern = "^MT-", x = rownames(x = CTL_10x@data), value = TRUE) 31 | percent.mito <- Matrix::colSums(CTL_10x@raw.data[mito.genes, ])/Matrix::colSums(CTL_10x@raw.data) 32 | CTL_10x <- AddMetaData(object = CTL_10x, metadata = percent.mito, col.name = "percent.mito") 33 | VlnPlot(object = CTL_10x, features.plot = c("nGene", "nUMI", "percent.mito"), nCol = 3) 34 | 35 | # for ngene: 200-2500, percent mito: <5% 36 | CTL_10x <- FilterCells(object = CTL_10x, subset.names = c("nGene", "percent.mito"), low.thresholds = c(200, -Inf), high.thresholds = c(2500, 0.05)) 37 | 38 | # log normalize 39 | CTL_10x <- NormalizeData(object = CTL_10x, normalization.method = "LogNormalize", scale.factor = 10000) 40 | 41 | CTL_10x <- ScaleData(object = CTL_10x, vars.to.regress = c("nUMI", "percent.mito")) 42 | 43 | ## same for CD 44 | mito.genes <- grep(pattern = "^MT\\.", x = rownames(x = CTL_CD@data), value = TRUE) 45 | percent.mito <- Matrix::colSums(CTL_CD@raw.data[mito.genes, ])/Matrix::colSums(CTL_CD@raw.data) 46 | CTL_CD <- AddMetaData(object = CTL_CD, metadata = percent.mito, col.name = "percent.mito") 47 | VlnPlot(object = CTL_CD, features.plot = c("nGene", "nUMI", "percent.mito"), nCol = 3) 48 | 49 | # for ngene: 200-2500, percent mito: <5% 50 | CTL_CD <- FilterCells(object = CTL_CD, subset.names = c("nGene", "percent.mito"), low.thresholds = c(200, -Inf), high.thresholds = c(2500, 0.05)) 51 | 52 | # log normalize 53 | CTL_CD <- NormalizeData(object = CTL_CD, normalization.method = "LogNormalize", scale.factor = 10000) 54 | 55 | CTL_CD <- ScaleData(object = CTL_CD, vars.to.regress = c("nUMI", "percent.mito")) 56 | 57 | 58 | 59 | CTL_10x@meta.data$dataset <- "ctrl" 60 | CTL_CD@meta.data$dataset <- "cd" 61 | 62 | save(CTL_10x, file="~/Desktop/Single_cell/CTL_10x_CD/CTL_10x.Rda") 63 | save(CTL_CD, file="~/Desktop/Single_cell/CTL_10x_CD/CTL_CD.Rda") 64 | 65 | 66 | ## Combine the cdulated and ctrlulated cells into a single object 67 | ctl.combined <- RunCCA(CTL_10x, CTL_CD, genes.use = intersect(rownames(CTL_10x@data), rownames(CTL_CD@data)), num.cc = 30, scale.data=T) 68 | 69 | 70 | # Process the data after combining the two datasets 71 | 72 | ctl.combined <- RunPCA(object = ctl.combined, pc.genes = ctl.combined@var.genes, do.print = TRUE, pcs.print = 1:5, genes.print = 5) 73 | ctl.combined = RunTSNE(ctl.combined, dims.use = 1:15, do.fast = T) 74 | 75 | ## The DimPlot can use either "pca", "tsne" or "cca" for it's reduction.use option and you can add any meta data to the group.by for the colouring 76 | DimPlot(object = ctl.combined, reduction.use = "cca", group.by = "dataset", pt.size = 0.5, do.return = TRUE) 77 | VlnPlot(object = ctl.combined, features.plot = "CC1", group.by = "dataset", do.return = TRUE) 78 | 79 | ## CCA equivalent of the PCElbowPlot 80 | MetageneBicorPlot(ctl.combined, grouping.var = "dataset", dims.eval = 1:30, display.progress = FALSE) 81 | 82 | ## Use the CCA reduction to better overlap the two original datasetes 83 | ctl.combined <- AlignSubspace(ctl.combined, reduction.type = "cca", grouping.var = "dataset", dims.align = 1:15) 84 | 85 | ctl.combined <- RunTSNE(ctl.combined, reduction.use = "cca.aligned", dims.use = 1:15, do.fast = T) 86 | ctl.combined <- FindClusters(ctl.combined, reduction.type = "cca.aligned", resolution = 0.6, dims.use = 1:15) 87 | 88 | setwd("~/Desktop/Single_cell/CTL_10x_CD/") 89 | save(ctl.combined, file="~/Desktop/Single_cell/CTL_10x_CD/ctl_combined_aligned.Rda") 90 | 91 | # Making several tSNE plots, coloured by different meta data 92 | 93 | TSNEPlot(ctl.combined, do.return = T, pt.size = 0.5, group.by = "dataset") 94 | TSNEPlot(ctl.combined, do.label = T, do.return = T, pt.size = 0.5) 95 | 96 | # Make feature plots for some marker genes 97 | 98 | genes.to.use <- c("MS4A1", "CD3E", "LYZ", "GNLY", "CD8A", "PF4") 99 | for (gene in genes.to.use){ 100 | cols <- colorRampPalette(c("lightgrey", "blue"))(100) 101 | ens.id <- gene 102 | expression <- ctl.combined@data[ens.id,] 103 | bins <- .bincode(expression, seq(min(expression)-0.00001,max(expression)+0.00001, length.out=100)) 104 | plot(ctl.combined@dr$tsne@cell.embeddings[,"tSNE_1"], ctl.combined@dr$tsne@cell.embeddings[,"tSNE_2"], col=alpha(cols[bins],0.3), xlab="tSNE_1", ylab="tSNE_2", main=paste0("Expression of ", gene), pch=20) 105 | } 106 | 107 | 108 | # Name: get.violin.data 109 | # Function: Get the data to make a multi-violin plot 110 | # Input: 111 | # Name Type Description 112 | # seurat Seurat The seurat object to take the data from 113 | # genes Character The genes for which to make the violin plots 114 | # HGNC.names Character The HGNC IDs for the genes 115 | # 116 | # Output: 117 | # The data to plot in the violin plots 118 | 119 | get.violin.data <- function(seurat, genes, HGNC.names) { 120 | output = data.frame(gene = character(0), value= numeric(0), ident = character(0)) 121 | for (i in 1:length(genes)) { 122 | data.use = data.frame(FetchData(seurat,genes[i])) 123 | data.use = t(data.use) 124 | data.melt=data.frame(gene=rep(genes[i], length(seurat@ident)), gene.HGNC=rep(HGNC.names[i], length(seurat@ident))) 125 | data.melt$value=as.numeric(data.use[1,1:length(seurat@ident)]) 126 | data.melt$id=names(data.use)[1:length(seurat@ident)] 127 | data.melt$ident=seurat@ident 128 | noise = rnorm(length(data.melt$value))/100000 129 | data.melt$value=as.numeric(as.character(data.melt$value))+noise 130 | output = rbind(output, data.melt) 131 | } 132 | return(output) 133 | } 134 | 135 | # Name: gg.color.hue 136 | # Function: Create ggplot colors 137 | # Input: 138 | # Name Type Description 139 | # n numeric The number of colours to make 140 | # 141 | # Output: 142 | # The ggplot colors 143 | 144 | gg.color.hue <- function(n) { 145 | hues = seq(15, 375, length = n + 1) 146 | hcl(h = hues, l = 65, c = 100)[1:n] 147 | } 148 | 149 | 150 | # Making the multi-violin plots, scipt from Dylan de Vries/ Harm Brugge 151 | 152 | ctl.combined<-SetAllIdent(ctl.combined, "dataset") 153 | identities <- ctl.combined@ident 154 | colors.x <- gg.color.hue(length(unique(identities))) 155 | plot.data <- get.violin.data(ctl.combined, c("CD14", "LYZ", "S100A9", "LYN", "ITGAX", "IFITM3", "CST3", "GZMB", "PRF1", "CD3D", "NKG7", "KLRC1", "CD79A", "SELL", "CCR7", "S100A4", "CD27", "CD8A", "CD8B", "MS4A1"), c("CD14", "LYZ", "S100A9", "LYN", "ITGAX", "IFITM3", "CST3", "GZMB", "PRF1", "CD3D", "NKG7", "KLRC1", "CD79A", "SELL", "CCR7", "S100A4", "CD27", "CD8A", "CD8B", "MS4A1")) 156 | 157 | ggplot(plot.data, aes(factor(ident),value)) + 158 | geom_violin(scale="width",adjust=1,trim=TRUE,aes(fill=factor(ident)),show.legend = F) + 159 | ylab("") + xlab("") + 160 | coord_flip() + 161 | facet_wrap(~ gene.HGNC,scales = "free_x", ncol = length(levels(plot.data$gene.HGNC))) + 162 | theme(strip.text.x = element_text(size=18, angle=90), 163 | axis.text.y = element_text(size=24), 164 | strip.background = element_blank(), 165 | panel.spacing.x = unit(c(-0.2), "lines"), 166 | axis.title.x=element_blank(), 167 | axis.text.x=element_blank(), 168 | axis.ticks.x=element_blank(), 169 | plot.margin = unit(c(0.5,0.4,0.5,1.3), "cm")) + 170 | scale_x_discrete(limits = rev(levels(plot.data$ident)), position = "top") + 171 | scale_fill_manual(values = colors.x) #+ theme_set(theme_gray(base_size = 28)) 172 | 173 | 174 | # Compare the new Seurat object with the old Seurat object 175 | 176 | 177 | # visualize 178 | # add metadata own data 179 | info<-allcells_meta@meta.data 180 | info<-info[,c(7:18,35,38:41)] 181 | info$cellen<-row.names(info) 182 | row.names(info)<-NULL 183 | keeping.order <- function(data, fn, ...) { 184 | col <- ".sortColumn" 185 | data[,col] <- 1:nrow(data) 186 | out <- fn(data, ...) 187 | if (!col %in% colnames(out)) stop("Ordering column not preserved by function") 188 | out <- out[order(out[,col]),] 189 | out[,col] <- NULL 190 | out 191 | } 192 | ## add location 193 | #extract file that is a copy from @meta.data 194 | CellsMeta = ctl.combined@meta.data 195 | head(CellsMeta) 196 | CellsMeta$cellen=row.names(CellsMeta) 197 | row.names(CellsMeta)=NULL 198 | dim(CellsMeta) 199 | info<-keeping.order(CellsMeta, merge, y=info, by = "cellen", all=FALSE) 200 | 201 | CellsMeta = ctl.combined@meta.data 202 | head(CellsMeta) 203 | CellsMeta$cellen=row.names(CellsMeta) 204 | row.names(CellsMeta)=NULL 205 | dim(CellsMeta) 206 | CellsMeta<-keeping.order(CellsMeta, merge, y=info, by = "cellen", all=T) 207 | row.names(CellsMeta)<-CellsMeta$cellen 208 | dim(CellsMeta) 209 | CellsMeta$cellen<-NULL 210 | head(CellsMeta) 211 | ctl.combined <- AddMetaData(ctl.combined, CellsMeta) 212 | 213 | ctl.combined<-SetAllIdent(ctl.combined, "dataset") 214 | cd_cells<-WhichCells(ctl.combined, "cd") 215 | healthy_cells<-WhichCells(ctl.combined, "ctrl") 216 | FeaturePlot(ctl.combined, "NKG7", cells.use = cd_cells, pt.size=3) 217 | FeaturePlot(ctl.combined, "NKG7", cells.use = healthy_cells, pt.size=3) 218 | 219 | # calculate DE 220 | # DE with genes >0.01%, MAST 221 | ctl.combined_DE_markers = FindAllMarkers(ctl.combined, min.pct = 0.001, only.pos = T, test.use = "MAST") 222 | write.table(ctl.combined_DE_markers, file = "ctl.blood.10x.cd_DEmarkers_0.1percMAST.txt") 223 | 224 | # and the significant ones (p<0.05) 225 | ctl.combined_DE_markers<-ctl.combined_DE_markers[ctl.combined_DE_markers$p_val_adj <0.05,] 226 | 227 | # top 10 of both sets 228 | library(dplyr) 229 | ctl.combined_DE_markers %>% group_by(cluster) %>% top_n(10, avg_logFC) 230 | top10 <- ctl.combined_DE_markers %>% group_by(cluster) %>% top_n(10, avg_logFC) 231 | # histogram of top 10 232 | DoHeatmap(object = ctl.combined, genes.use = top10$gene, slim.col.label = TRUE, remove.key = FALSE) 233 | --------------------------------------------------------------------------------