├── old_stuff
    ├── README.md
    ├── holgersen_mucosamarkers.R
    ├── subset_rawdatafiles_from_seuratfile.md
    ├── reactome_pathway_finding.R
    ├── Differential_expression_analysis.md
    ├── 251_10x_vs_251_10x.R
    ├── overlap_DE_patientregress_not_regressed.R
    ├── random_sampling_10x+cd.R
    ├── Import10X_gene-per-patient.R
    └── merge_ctl_10x_cd.md
├── PSC
    ├── PSC_2023
    │   ├── README.md
    │   ├── Final_analyses
    │   │   └── README.md
    │   ├── Step2_souporcell
    │   ├── Step1_cellranger
    │   ├── Step3_PSC_preprep.R
    │   └── Step4_SCT_integration_noribo_v2.R
    ├── PSC_version2022
    │   ├── PSC_preprep.R
    │   ├── postqc_filtering
    │   ├── README.md
    │   ├── DUOX2.R
    │   ├── DE_loop.R
    │   ├── SCT_integration_noribo_v2.R
    │   └── basicDE_2023.R
    ├── README.md
    ├── epithelium_markers.R
    ├── Proportion_analysis_202103.R
    ├── Proportion_analysis.R
    ├── Celltype_new.R
    ├── riskgene_expression_analysis.R
    ├── Celltyping_202103
    └── DE_analysis (PSC_I and UC_I per cell type)_01062022.R
├── README.md
├── DDTx
    ├── README.txt
    ├── Demuxlet_demultiplexing.py
    ├── ddtx_azimuth_elmentaiteadultileum_classification_demuxlet.Rmd
    ├── ddtx_filter_normalize_cluster_demuxlet.Rmd
    ├── demultiplexed_data_demuxlet.R
    ├── find_markers_and_analyze_proportions.R
    ├── filtering_and_QC.R
    ├── ddtx_compartment_assignment_demuxlet.Rmd
    └── cell_number_tables.py
├── Vedo2
    ├── step_1_Cellranger script
    ├── step_5_Vedo2_celltyping_pbmc_log.R
    ├── step_5_Vedo2_celltyping_epi_Smillie_match_markergenes.R
    ├── step_4_integration_pbmc_noribo_nonorm
    ├── step_2_Souporcell script
    ├── step_4_integration_biop_noribo_nonorm
    ├── step_5_Vedo2_celltyping_biop_log.R
    └── Vedo001_preprep.R
├── cellranger_on_gearshift
├── adding_cell_subtype_to_Seurat_dataframe.R
├── Integration.md
├── create_raw_datafile_from_seq_data.md
└── Method_comparisons
    ├── SCT_integration.R
    └── rebuttal_20210916


/old_stuff/README.md:
--------------------------------------------------------------------------------
1 | Amazing
2 | 


--------------------------------------------------------------------------------
/PSC/PSC_2023/README.md:
--------------------------------------------------------------------------------
1 | # README
2 | 


--------------------------------------------------------------------------------
/PSC/PSC_version2022/PSC_preprep.R:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/PSC/PSC_2023/Final_analyses/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/PSC/PSC_2023/Step2_souporcell:
--------------------------------------------------------------------------------
1 | # example script for running souporcell per lane
2 | 


--------------------------------------------------------------------------------
/PSC/PSC_2023/Step1_cellranger:
--------------------------------------------------------------------------------
1 | # Example script for running of cellranger per lane
2 | 


--------------------------------------------------------------------------------
/PSC/PSC_2023/Step3_PSC_preprep.R:
--------------------------------------------------------------------------------
1 | #Script used for sample calling, qc and pre-integration prepping
2 | 
3 | 
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SingleCell
 2 | 
 3 | In this repository you will find scripts that help you filter, analyze and visualize single cell data using various R packages, such as 'Seurat', 'SCDE', 'scPred', 'harmony' etc.
 4 | 
 5 | The scripts are written for use with single cell UMI-based transcriptomic data, and were tested in datasets ranging from ~1000-150.000 gut mucosal (T-)cells from adapted smartseq2 protocols and 10x genomics v3 protocols.
 6 | 
 7 | See
 8 | 
 9 | http://satijalab.org/ # for sample data and tutorials
10 | 
11 | http://satijalab.org/seurat/visualization_vignette.html#interactive-plotting-features # for visualization tips
12 | 


--------------------------------------------------------------------------------
/DDTx/README.txt:
--------------------------------------------------------------------------------
1 | This project features a case series of 3 small bowel transplant patients.
2 | The aims of this project are to find out:
3 | 1. How immune cell repopulation (recipient cells--> donor organ) happens (over time)
4 | 2. Which cell composition (shifts) are related to rejection (grade)? And with which transcriptional changes
5 | 3. Which transcriptional/compositional changes happen before and after a rejection phase
6 | 
7 | We sampled small bowel graft biopsies over time, processed the biopsies for single cell sequencing according to the cryopreserved one-step collagenase protocol (Uniken Venema, Ramirez-Sanchez et al, Sci rep 2022), and made use of Souporcell (Heaton, Nature Methods 2020) to separate donor and recipient cells
8 | 


--------------------------------------------------------------------------------
/old_stuff/holgersen_mucosamarkers.R:
--------------------------------------------------------------------------------
 1 | # Author: WTC
 2 | # Date: July 2018
 3 | # Script that merges DE genes for CD vs controls from Holgersen paper (JCC 2015) with cell type markers of CD gut mucosacells
 4 | 
 5 | ## match holgersen with risk genes and DE genes
 6 | holgersen<-read.csv("~/Desktop/Single_cell/final_data_paper/holgersen/holgersen.csv", sep=";") # load only the 62 genes that are significant in CD 
 7 | mucosamarkers<-read.table("~/Desktop/Single_cell/final_data_paper/mucosacells_eightcelltypes_DEmarkers_1percMAST.txt") # load cell type markers within mucosa
 8 | mucosamarkers<-mucosamarkers[mucosamarkers$p_val_adj<0.05,]
 9 | holgersen_mucosamarkers<-merge(mucosamarkers, holgersen, by="gene", all=F)
10 | write.csv(holgersen_mucosamarkers,"~/Desktop/Single_cell/final_data_paper/holgersen/overlap_mucosamarkers_cd_holgersen.csv")
11 | 


--------------------------------------------------------------------------------
/DDTx/Demuxlet_demultiplexing.py:
--------------------------------------------------------------------------------
1 | singularity exec --bind /groups/umcg-weersma/tmp01/ /groups/umcg-weersma/tmp01/tools/sc-eqtlgen-consortium-pipeline/wg1/wg1-pipeline-20230308_2.simg popscle demuxlet \
2 |         --sam /groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/popscle_tools_filtered/alignment/filtered_alignment//ddtx_220504_laneX_genofiltered_possorted_genome_bam.bam \
3 |         --tag-group CB \
4 |         --tag-UMI UB \
5 |         --field GT \
6 |         --vcf /groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/popscle_tools_filtered/genotype/sorted_genotype/ddtx_bamsorted_220504_laneX_maf005.vcf.gz \
7 |         --out /groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/demuxlet/output/220504_laneX \
8 |         --group-list /groups/umcg-weersma/tmp01/projects/ddtx/processed/alignment/cellranger_output/GRCh38/220504_laneX/outs/filtered_feature_bc_matrix/barcodes.tsv.gz
9 | 


--------------------------------------------------------------------------------
/Vedo2/step_1_Cellranger script:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --job-name=200611_lane1
 3 | #SBATCH --output=/groups/umcg-weersma/tmp01/Emilia/output/200611_lane1.out
 4 | #SBATCH --error=/groups/umcg-weersma/tmp01/Emilia/output/200611_lane1.err
 5 | #SBATCH --time=6-23:59:00
 6 | #SBATCH --cpus-per-task=22
 7 | #SBATCH --mem=100gb
 8 | #SBATCH --nodes=1
 9 | #SBATCH --export=NONE
10 | #SBATCH --get-user-env=L
11 | #SBATCH --tmp=1000gb
12 | mkdir -p ${TMPDIR}/cellranger/200611_lane1
13 | cd ${TMPDIR}/cellranger/200611_lane1
14 | /groups/umcg-weersma/tmp01/singelcell/cellranger-3.1.0/cellranger count \
15 | --id=cellranger_200611_lane1 \
16 | --transcriptome=/groups/umcg-weersma/tmp01/Amber/refdata-cellranger-GRCh38-3.0.0 \
17 | --libraries=/groups/umcg-weersma/tmp01/Emilia/libraries/library_200611_lane1.csv  \
18 | --feature-ref=/groups/umcg-weersma/tmp01/Emilia/featurerefs/feature_ref_200611_lane1.csv \
19 | --localcores=22
20 | 
21 | mv ${TMPDIR}/cellranger/200611_lane1 /groups/umcg-weersma/tmp01/Emilia/final_output/
22 | 


--------------------------------------------------------------------------------
/cellranger_on_gearshift:
--------------------------------------------------------------------------------
 1 | ## job on gearshift
 2 | 
 3 | #!/bin/bash
 4 | #SBATCH --job-name=NAME
 5 | #SBATCH --output=/groups/umcg-weersma/tmp01/singelcell/NAME.out
 6 | #SBATCH --error=/groups/umcg-weersma/tmp01/singelcell/NAME.err
 7 | #SBATCH --time=6-23:59:00
 8 | #SBATCH --cpus-per-task=22
 9 | #SBATCH --mem=200gb
10 | #SBATCH --nodes=1
11 | #SBATCH --export=NONE
12 | #SBATCH --get-user-env=L
13 | #SBATCH --tmp=2700gb
14 | mkdir -p ${TMPDIR}/cellranger/NAME
15 | mkdir /groups/umcg-weersma/tmp01/singelcell/NAME/
16 | cd ${TMPDIR}/cellranger/NAME
17 | /groups/umcg-weersma/tmp01/umcg-hbrugge/cellranger-3.0.2/cellranger count \
18 | --id=NAME \
19 | --transcriptome=/groups/umcg-weersma/tmp01/umcg-hbrugge/refdata-cellranger-hg19-3.0.0 \
20 | --libraries=/groups/umcg-weersma/tmp01/singelcell/PSC_data_202001/library_files/library_NAME.csv \
21 | --feature-ref=/groups/umcg-weersma/tmp01/singelcell/PSC_data_202001/feature_refs/feature_ref_NAME.csv \
22 | --localcores=22 
23 | 
24 | mv ${TMPDIR}/cellranger/200108_lane1/ /groups/umcg-weersma/tmp01/singelcell/NAME/
25 | rm -rf ${TMPDIR}/cellranger/NAME/SC_RNA_COUNTER_CS
26 | 


--------------------------------------------------------------------------------
/PSC/PSC_version2022/postqc_filtering:
--------------------------------------------------------------------------------
 1 | ###########################
 2 | # Post-qc filtering       #  
 3 | # and processing          #
 4 | ###########################
 5 | 
 6 | library(dplyr)
 7 | library(Seurat)
 8 | library(patchwork)
 9 | library(ggplot2)
10 | library(readr)
11 | library(tidyr)
12 | library(MAST)
13 | library(readxl)
14 | library(openxlsx)
15 | 
16 | data<- readRDS("Data/PSC_202002_integrated_v2_noribo.rds")
17 | data=UpdateSeuratObject(data)
18 | DimPlot(data, label = T) + NoLegend()
19 | DefaultAssay(data) = "RNA"
20 | 
21 | table(data@meta.data$Final_HTO)
22 | data@meta.data$Final_HTO[data@meta.data$Final_HTO == "PSC-I-X019"]<-"PSC-NI-XXXX"
23 | data@meta.data$Final_HTO[data@meta.data$Final_HTO == "PSC-NI-X019"]<-"PSC-I-XXXX"
24 | table(data@meta.data$Final_HTO)
25 | data@meta.data$disease = sapply(strsplit(data$Final_HTO,"-"), `[`, 1)
26 | data@meta.data$inflammation = sapply(strsplit(data$Final_HTO,"-"), `[`, 2)
27 | data@meta.data$sample = sapply(strsplit(data$Final_HTO,"-"), `[`, 3)
28 | data@meta.data$state <- paste(data$disease, data$inflammation, sep='-')
29 | table(data@meta.data$sample)
30 | data = subset(data, subset = sample != "X296")
31 | table(data@meta.data$sample)
32 | DimPlot(data, label = T) + NoLegend()
33 | 
34 | saveRDS(data, "Data/PSC_2022_integrated_postqc_filtered.rds")
35 | 


--------------------------------------------------------------------------------
/PSC/PSC_version2022/README.md:
--------------------------------------------------------------------------------
 1 | This folder contains code and information on the processing and analysis of data for the PSC project. Information on wet lab processes can be found upon publication online. Find below the workflow of the computational analyses.
 2 | 
 3 | 1. Count matrices were made directly from raw data using cellranger v3.1.0 with cellranger-GRCh38-3.0.0 as alignment reference
 4 | 2. Demultiplexing was done using souporcell (https://github.com/wheaton5/souporcell)
 5 | 3. Each 10x lane was preprocessed (HTO demultiplexing, souporcell demultiplexing, QC filtering, SCtransformation) seperately following PSC_preprep.R
 6 | 4. Lanes were integrated into one set using SCT_integration_noribo_v2.R following standardized Seurat SCT integration workflow (https://satijalab.org/seurat/v3.0/integration.html)
 7 | 5. Post-qc filtering of samples was done following postqc_filtering.R
 8 | 6. Celltype annotation was done using Azimuth following XXX
 9 | 
10 | 
11 | Further analyses:
12 | 1. DE_loop.R can be used to generate DE genes and enriched GO processes
13 | 
14 | Addition 2023:
15 | 1. DUOX2.R is gebruikt voor de DUOX2 analyse
16 | 2. Figure1.R is gebruikt voor figuur 1.
17 | 3. basicDE_2023.R is gebruikt voor figuur 3
18 | 4. PSC_riskgenes.R is gebruikt voor figuur 4
19 | 5. DE_analysis_2023 is gebaseerd op DE_loop.R en is gebruikt om de DE en GO tabellen te genereren
20 | 


--------------------------------------------------------------------------------
/Vedo2/step_5_Vedo2_celltyping_pbmc_log.R:
--------------------------------------------------------------------------------
 1 | library(dplyr)
 2 | library(Seurat)
 3 | library(patchwork)
 4 | library(ggplot2)
 5 | library(readr)
 6 | library(tidyr)
 7 | library(MAST)
 8 | library(readxl)
 9 | library(openxlsx)
10 | 
11 | # Load in database
12 | data <- readRDS("Data/batch1_integrated/vedo2_batch1_PBMC_integrated_noribo.rds")
13 | data = UpdateSeuratObject(object = data)
14 | DefaultAssay(data) = "RNA"
15 | 
16 | #not normalized dataset
17 | DimPlot(data)
18 | FeaturePlot(data, features = c("CD3E", "CD14", "CD4", "CD8A", "MS4A1", "IL7R"))
19 | 
20 | #normalization
21 | data <- NormalizeData(data, verbose = FALSE)
22 | DimPlot(data, label = T) +NoLegend()
23 | 
24 | #add metadata
25 | data@meta.data$patient = sapply(strsplit(data$Final_HTO,"-"), `[`, 1)
26 | data@meta.data$timepoint = sapply(strsplit(data$Final_HTO,"-"), `[`, 2)
27 | 
28 | table(data$patient, data$Final_HTO)
29 | 
30 | #Setting cluster resolution (used on "integrated" assay)
31 | DefaultAssay(data) <- "integrated"
32 | data <- FindClusters(data, resolution = 0.7)
33 | DimPlot(data, label = T)
34 | 
35 | #Marker genes pbmc
36 | DefaultAssay(data) <- "RNA"
37 | markers_pbmc <- FindAllMarkers(data, only.pos = TRUE, min.pct = 0.25)
38 | write.csv(markers_pbmc, "Results/markergenes/markers_pbmc_log.csv")
39 | selected_markers_pbmc <- markers_pbmc %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
40 | write.csv(selected_markers_pbmc, "Results/markergenes/selected_markers_pbmc_log.csv")
41 | 


--------------------------------------------------------------------------------
/PSC/README.md:
--------------------------------------------------------------------------------
 1 | This folder contains code and information on the processing and analysis of data for the PSC project. 
 2 | Information on wet lab processes can be found upon publication online. Find below the workflow of the computational analyses. 
 3 | 
 4 | 1. Count matrices were made directly from raw data using cellranger v3.1.0 with cellranger-GRCh38-3.0.0 as alignment reference
 5 | 2. Demultiplexing was done using souporcell (https://github.com/wheaton5/souporcell) 
 6 | 3. Each 10x lane was preprocessed (HTO demultiplexing, souporcell demultiplexing, QC filtering, SCtransformation) seperately following preprocess.R
 7 | 4. Lanes were integrated into one set using SCT_integration_noribo_v2.R following standardized Seurat SCT integration workflow (https://satijalab.org/seurat/v3.0/integration.html)
 8 | 5. Cells were clustered and celltyped using celltype_new.R. This output was used for all subsequent analyses
 9 | 
10 | Analyses:
11 | - Proportion_analyses.R was used for differential abundance analysis
12 | - DE_analysis.R was used to identify differentially expressed genes for several comparisons, and GO terms associated
13 | - DE_analysis_final_hopelijk.R was used to identify differentially expressed genes, but corrected for number of cells using permutation analysis
14 | - Cellchat.R was used to perform cell-cell interaction analysis (https://github.com/sqjin/CellChat). This was done on a subset of cells for efficiency. 
15 | - riskgene_expression_analysis.R was used to assess PSC and UC risk genes differentially expressed in different health and disease states, for each of the different cell types
16 | 


--------------------------------------------------------------------------------
/adding_cell_subtype_to_Seurat_dataframe.R:
--------------------------------------------------------------------------------
 1 | # Adding new celltype from a subset of cells to a Seurat dataframe
 2 | 
 3 | colonocytes<-subset(PSC_dataset, idents = 16)
 4 | DefaultAssay(colonocytes)<-"integrated"
 5 | colonocytes<-FindNeighbors(colonocytes, dims = 1:30)
 6 | colonocytes<-FindClusters(colonocytes, resolution = 0.1)
 7 | DimPlot(colonocytes)
 8 | table(colonocytes@meta.data$integrated_snn_res.0.1)
 9 | 
10 | 
11 | colonocytes@meta.data$best4_sub[colonocytes@meta.data$seurat_clusters == "0"]<-"Best4_enterocytes" 
12 | colonocytes@meta.data$best4_sub[colonocytes@meta.data$seurat_clusters == "1"]<-"Best4_enterocytes"
13 | colonocytes@meta.data$best4_sub[colonocytes@meta.data$seurat_clusters == "2"]<-"M_cells" 
14 | 
15 | 
16 | CellsMeta<-data.frame(PSC_dataset@meta.data)
17 | x<-data.frame(colonocytes@meta.data)
18 | x$NAME<-row.names(x)
19 | CellsMeta$NAME<-row.names(CellsMeta)
20 | x<-x[,c(17,35)]
21 | row.names(CellsMeta)=NULL
22 | row.names(x)<-NULL
23 | keeping.order <- function(data, fn, ...) { 
24 |   col <- ".sortColumn"
25 |   data[,col] <- 1:nrow(data) 
26 |   out <- fn(data, ...) 
27 |   if (!col %in% colnames(out)) stop("Ordering column not preserved by function") 
28 |   out <- out[order(out[,col]),] 
29 |   out[,col] <- NULL 
30 |   out 
31 | } 
32 | CellsMeta<-keeping.order(CellsMeta, merge, y=x, by = "NAME", all=T)
33 | CellsMeta<-CellsMeta[,c(1,34)]
34 | rownames(CellsMeta)<-CellsMeta$NAME
35 | PSC_dataset<-AddMetaData(PSC_dataset, CellsMeta)
36 | 
37 | PSC_dataset@meta.data$celltype_subsets[PSC_dataset@meta.data$best4_sub == "Best4_enterocytes"]<-"Best4_enterocytes" 
38 | PSC_dataset@meta.data$celltype_subsets[PSC_dataset@meta.data$best4_sub == "M_cells"]<-"M_cells"
39 | 
40 | # check whether it worked
41 | table(PSC_dataset@meta.data$best4_sub)
42 | DimPlot(PSC_dataset, group.by="celltype_subsets", label=T, repel=T)
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/PSC/PSC_version2022/DUOX2.R:
--------------------------------------------------------------------------------
 1 | #######################################
 2 | # Generate DUOX2 markers     #
 3 | #######################################
 4 | 
 5 | library(dplyr)
 6 | library(Seurat)
 7 | library(patchwork)
 8 | library(ggplot2)
 9 | library(readr)
10 | library(tidyr)
11 | library(MAST)
12 | library(readxl)
13 | library(openxlsx)
14 | library(enrichR)
15 | 
16 | #load in datasets
17 | epi <- readRDS("Nieuw/epi_azimuth_duox2.rds")
18 | DefaultAssay(epi) = "RNA"
19 | Idents(epi) <- "celltype.final"
20 | 
21 | #Create DUOX2 marker pathways
22 | DUOX2markers <- FindMarkers(epi, group.by = "celltype.final", test.use = "MAST", ident.1 = "DUOX2 enterocytes")
23 | DUOX2markers$Gene <- rownames(DUOX2markers)
24 | DUOX2markers <- filter(DUOX2markers, DUOX2markers$p_val_adj < 0.05)
25 | DUOX2pathways <- enrichr(DUOX2markers$Gene, databases = "GO_Biological_Process_2018")$GO_Biological_Process
26 | DUOX2pathways <- filter(DUOX2pathways, DUOX2pathways$Adjusted.P.value < 0.05)
27 | 
28 | #Create enterocyte marker pathways
29 | Enteromarkers <- FindMarkers(epi, group.by = "celltype.final", test.use = "MAST", ident.1 = "Enterocytes")
30 | Enteromarkers$Gene <- rownames(Enteromarkers)
31 | Enteromarkers <- filter(Enteromarkers, Enteromarkers$p_val_adj < 0.05)
32 | Enteropathways <- enrichr(Enteromarkers$Gene, databases = "GO_Biological_Process_2018")$GO_Biological_Process
33 | Enteropathways <- filter(Enteropathways, Enteropathways$Adjusted.P.value < 0.05)
34 | 
35 | #Create specific marker pathways list and write df's
36 | DUOX2specific <- subset(DUOX2pathways,!(Term%in%Enteropathways$Term))
37 | DUOX2nonspecific <- subset(DUOX2pathways,(Term%in%Enteropathways$Term))
38 | write.csv(DUOX2specific, "Results/DE_2023/DUOX2specific.csv")
39 | write.csv(DUOX2nonspecific, "Results/DE_2023/DUOX2nonspecific.csv")
40 | write.csv(DUOX2pathways, "Results/DE_2023/DUOX2pathways.csv")
41 | write.csv(Enteropathways, "Results/DE_2023/Enteropathways.csv")
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/old_stuff/subset_rawdatafiles_from_seuratfile.md:
--------------------------------------------------------------------------------
 1 | **extract CD8 pos and neg bloodcell names from old seurat file and make new seurat file with raw data and these cells**
 2 | ---
 3 | 
 4 | Author: WTC
 5 | Date: 20171020
 6 | 
 7 | **take filtered dataset containing all cells raw data**
 8 | ```
 9 | datafile<-SetAllIdent(datafile, "tissue")
10 | ```
11 | identify in 'cells_blood' which cells are the bloodcells that are left after filtering of the allcells-dataset
12 | ```
13 | cells_blood<-WhichCells(datafile, "BLOOD")
14 | ```
15 | subset data with only bloodcells
16 | ```
17 | BLOODCELLS<-SubsetData(datafile, cells.use = cells_blood)
18 | ```
19 | identify in 'CD8*_BLOODCELLS' which cells are CD8-positive and which are not
20 | ```
21 | BLOODCELLS<-SetAllIdent(BLOODCELLS, "CD8ab.positive")
22 | CD8pos_BLOODCELLS<-WhichCells(BLOODCELLS, "1")
23 | CD8neg_BLOODCELLS<-WhichCells(BLOODCELLS, "0")
24 | ```
25 | create dataframes containing only raw data of CD8*_mucosacells
26 | ```
27 | raw_data<-data.frame(datafile@raw.data, check.names = F)
28 | raw_data_CD8pos_blood<-raw_data[,CD8pos_BLOODCELLS]
29 | raw_data_CD8neg_blood<-raw_data[,CD8neg_BLOODCELLS]
30 | ```
31 | 
32 | identify in 'cells_mucosa' which cells are the mucosacells that are left after filtering of the allcells-dataset
33 | ```
34 | cells_mucosa<-WhichCells(datafile, c("IEL", "LPL"))
35 | ```
36 | subset data with only mucosacells
37 | ```
38 | MUCOSACELLS<-SubsetData(datafile, cells.use = cells_mucosa)
39 | ```
40 | identify in 'CD8*_MUCOSACELLS' which cells are CD8-positive and which are not
41 | ```
42 | MUCOSACELLS<-SetAllIdent(MUCOSACELLS, "CD8ab.positive")
43 | CD8pos_MUCOSACELLS<-WhichCells(MUCOSACELLS, "1")
44 | CD8neg_MUCOSACELLS<-WhichCells(MUCOSACELLS, "0")
45 | ```
46 | create dataframes containing only war data of CD8*_mucosacells
47 | ```
48 | raw_data_CD8pos_mucosa<-raw_data[,CD8pos_MUCOSACELLS]
49 | raw_data_CD8neg_mucosa<-raw_data[,CD8neg_MUCOSACELLS]
50 | ```
51 | 
52 | **in the same way, you can subset IEL, LPL and BLOOD cells**
53 | 


--------------------------------------------------------------------------------
/old_stuff/reactome_pathway_finding.R:
--------------------------------------------------------------------------------
 1 | # Author: WTC
 2 | # Year: 2018
 3 | # Extract enriched pathways from DE genes, using reactomePA package
 4 | 
 5 | # open markergenes file
 6 | celltype_markergenes<-read.csv("~/Desktop/Single_cell/final_data_paper/DE/allcells_pos_neg_DE_markers.csv")
 7 | # select only significant genes
 8 | celltype_markergenes<-subset(celltype_markergenes, celltype_markergenes$p_val_adj < 0.05)
 9 | # select all upregultated genes
10 | markergenes_up<-celltype_markergenes[celltype_markergenes$avg_logFC >0,]
11 | markergenes_down<-celltype_markergenes[celltype_markergenes$avg_logFC <0,]
12 | write.csv(markergenes_down, "~/Desktop/Single_cell/final_data_paper/DE/downregulated_eight_cell_types_genes_1percMAST.csv")
13 | # select all downreagulated genes
14 | 
15 | # libraries
16 | library(reactome.db)
17 | library(clusterProfiler)
18 | library(ReactomePA)
19 | 
20 | # select TregQuiescent_mucosa_down genes
21 | TregQuiescent_mucosa_down_genes<-subset(markergenes_down, (markergenes_down$cluster == "Treg/Quiescent_mucosa")) 
22 | TregQuiescent_mucosa_down_genes<-TregQuiescent_mucosa_down_genes$gene
23 | 
24 | # convert gene symbols to entrezid for reactome
25 | TregQuiescent_mucosa_down_genes = bitr(TregQuiescent_mucosa_down_genes, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
26 | # take only Entrez-id column in list
27 | TregQuiescent_mucosa_down_entrez <- TregQuiescent_mucosa_down_genes$ENTREZID
28 | # do pathway analysis
29 | TregQuiescent_mucosa_down_pathways <- enrichPathway(gene=TregQuiescent_mucosa_down_entrez,pvalueCutoff=0.05, readable=T)
30 | # write dataframe with results
31 | y<-as.data.frame(TregQuiescent_mucosa_down_pathways)
32 | write.csv(y, "~/Desktop/Single_cell/final_data_paper/DE/TregQuiescent_mucosa_downregulated_pathways.csv")
33 | 
34 | # barplot pathways
35 | barplot(TregQuiescent_mucosa_down_pathways, showCategory=15)
36 | 
37 | # dotplot enrichment
38 | dotplot(TregQuiescent_mucosa_down_pathways, showCategory=15)
39 | 
40 | # enrichMap(x, layout=igraph::layout.kamada.kawai, vertex.label.cex = 1)
41 | cnetplot(TregQuiescent_mucosa_down_pathways, categorySize="pvalue", foldChange=geneList)
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/Vedo2/step_5_Vedo2_celltyping_epi_Smillie_match_markergenes.R:
--------------------------------------------------------------------------------
 1 | 
 2 | library(data.table)
 3 | library(reshape2)
 4 | library(readxl)
 5 | options(stringsAsFactors = FALSE)
 6 | 
 7 | #load the reference
 8 | reference <- read_xlsx("/Applications/Results/Article/6 main scRNAseq studies in human IBD/Smillie-2019-Intra--and-inter-cellular-rewiring-/Supplementary Table/Table S2.xlsx", sheet = 1, col_names = T)
 9 | reference <- reference[,1:2]
10 | 
11 | #grab the marker genes for each cell tyeps
12 | markergenes <- dcast(setDT(reference), ident~rowid(ident, prefix="gene"), value.var="gene")
13 | markergenes <- as.data.frame(t(markergenes))
14 | colnames(markergenes) <- markergenes[1,]
15 | markergenes <- markergenes[-1,]
16 | dim(markergenes)
17 | rownames(markergenes) <- paste0("gene", 1:202)
18 | rm(reference)
19 | markergenes[is.na(markergenes)] <- "+++"
20 | 
21 | 
22 | #Load input_clusters (markergenes of your clusters)
23 | input <- fread("/Applications/Results/Vedo2_batch1_biopsy_dataset/subclusters/epithelial/celltyping/markers_epi_log.csv",data.table = T)
24 | input <- input[,-c(1:6)]
25 | clusters <- dcast(setDT(input), cluster~rowid(cluster, prefix="gene"), value.var="gene")
26 | clusters <- as.data.frame(t(clusters))
27 | colnames(clusters) <- paste0("cluster", 0:13)
28 | clusters <- clusters[-1,]
29 | dim(clusters)
30 | rownames(clusters) <- paste0("gene", 1:993)
31 | rm(input)
32 | clusters[is.na(clusters)] <- "---"
33 | 
34 | 
35 | #matching genes
36 | output=as.data.frame(matrix(nrow = ncol(clusters),ncol = ncol(markergenes)))
37 | for(i in 1:ncol(clusters)){
38 |   tmp.cluster=colnames(clusters)[i]
39 |   tmp.cluster.data=clusters[,tmp.cluster]
40 |   
41 |   mm=c()
42 |   for(j in 1:ncol(markergenes)){
43 |     tmp.markergenes=colnames(markergenes)[j]
44 |     tmp.markergenes.data=markergenes[,tmp.markergenes]
45 |     aa=length(which(tmp.cluster.data %in% tmp.markergenes.data))
46 |     mm=append(mm,aa)
47 |   }
48 |   output[i,]=mm
49 | }
50 | colnames(output)=colnames(markergenes)
51 | rownames(output)=colnames(clusters)
52 | 
53 | write.csv(output, "/Applications/Results/Vedo2_batch1_biopsy_dataset/subclusters/epithelial/celltyping/output/Smillie_matching_markergenes.csv")
54 | 
55 | 


--------------------------------------------------------------------------------
/old_stuff/Differential_expression_analysis.md:
--------------------------------------------------------------------------------
 1 | 
 2 | **blood vs mucosa differential expression analysis using SCDE on cluster**
 3 | ====
 4 | Author: WTC
 5 | Date: 20171020
 6 | 
 7 | *takes a lot of time, so performing on cluster is advised*
 8 | 
 9 | ```
10 | library("methods")
11 | library(devtools)
12 | library(ggplot2)
13 | library(Matrix) # Sparse matrices
14 | library(dplyr) # Dataframe manipulation
15 | library(flexmix)
16 | library(scde) # Differential Expression
17 | ```
18 | **load data**
19 | ```
20 | cd<-read.csv("raw_data_seurat.csv", header=TRUE, row.names=1, check.names=FALSE)
21 | metadata<-read.csv("data_info_seurat.csv", header=TRUE, row.names=1)
22 | ```
23 | 
24 | filter raw data for cells<200 genes (expressed = >1 count) and genes<3 cells
25 | ```
26 | cd2 <- clean.counts(cd, min.lib.size=200, min.reads = 1, min.detected = 3)
27 | ```
28 | change metadatafile so that it contains only th filtered cells from datafile 'cd2'
29 | ```
30 | dim(metadata)
31 | dim(cd2)
32 | xx=colnames(cd2)
33 | metadata2<-metadata[xx,]
34 | dim(metadata2)
35 | dim(cd2)
36 | ```
37 | 
38 | make celltype file which will be used to define which cell belongs to which tissue category, using metadata2 file and converting IEL and LPL to MUCOSA
39 | ```
40 | celltype = as.factor(gsub("IEL|LPL","MUCOSA",metadata2$tissue))
41 | ```
42 | giving each value in the factor celltype the name of the corresponding cell
43 | ```
44 | names(celltype) <- colnames(cd2)
45 | ```
46 | calculate error models
47 | ---
48 | 
49 | ```
50 | err.mod <- scde.error.models(counts = cd2, groups = celltype, n.cores = 15,
51 |            threshold.segmentation = TRUE, save.crossfit.plots = FALSE,
52 |            save.model.plots = FALSE, verbose = 1)
53 | ```
54 | estimate gene expression prior
55 | ---
56 | 
57 | ```
58 | gene.ex.prior <- scde.expression.prior(models = err.mod, counts = cd2, length.out = 400,
59 |                                        show.plot = F)
60 | ```
61 | Testing for differential expression
62 | ---
63 | ```
64 | ediff <- scde.expression.difference(err.mod, cd2, gene.ex.prior, groups = celltype,
65 |                     n.randomizations = 100, n.cores = 15, verbose = 1)
66 | ```
67 | write out a table with all the results, showing most significantly different genes
68 | ```
69 | write.table(ediff[order(abs(ediff$Z), decreasing = TRUE), ], file = “..”, row.names = TRUE, col.names = TRUE, sep = "\t", quote = FALSE)
70 | ```
71 | 


--------------------------------------------------------------------------------
/Vedo2/step_4_integration_pbmc_noribo_nonorm:
--------------------------------------------------------------------------------
 1 | library(Seurat)
 2 | library(ggplot2)
 3 | 
 4 | options(future.globals.maxSize = 150000 * 1024^2)
 5 | 
 6 | lane18_1<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_new/200618_lane1_sct.rds")
 7 | DefaultAssay(lane18_1)<-"SCT"
 8 | lane18_2<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_new/200618_lane2_sct.rds")  
 9 | DefaultAssay(lane18_2)<-"SCT"
10 | lane18_3<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_new/200618_lane3_sct.rds")
11 | DefaultAssay(lane18_3)<-"SCT"
12 | lane18_4<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_new/200618_lane4_sct.rds")
13 | DefaultAssay(lane18_4)<-"SCT"
14 | 
15 | lane18_1[["percent.ribo"]] <- PercentageFeatureSet(lane18_1, pattern = "^RPL|^RPS")
16 | lane18_2[["percent.ribo"]] <- PercentageFeatureSet(lane18_2, pattern = "^RPL|^RPS")
17 | lane18_3[["percent.ribo"]] <- PercentageFeatureSet(lane18_3, pattern = "^RPL|^RPS")
18 | lane18_4[["percent.ribo"]] <- PercentageFeatureSet(lane18_4, pattern = "^RPL|^RPS")
19 | 
20 | alldata_vedo2_batch1_PBMC_integration_list <- list(lane18_1, lane18_2, lane18_3, lane18_4)
21 | 
22 | print("loaded")
23 | features <- SelectIntegrationFeatures(object.list = alldata_vedo2_batch1_PBMC_integration_list, nfeatures = 3000)
24 | print("features_selected")
25 | alldata_vedo2_batch1_PBMC_integration_list <- PrepSCTIntegration(object.list = alldata_vedo2_batch1_PBMC_integration_list, anchor.features = features)
26 | print("integration_prepped")
27 | anchors <- FindIntegrationAnchors(object.list = alldata_vedo2_batch1_PBMC_integration_list, anchor.features = features, normalization.method = "SCT")
28 | print("achors_found")
29 | rm(alldata_vedo2_batch1_PBMC_integration_list)
30 | alldata.integrated <- IntegrateData(anchorset = anchors, normalization.method = "SCT")
31 | print("integrated")
32 | rm(anchors)
33 | alldata.integrated <- RunPCA(alldata.integrated, verbose = FALSE)
34 | print("pca2_ran")
35 | alldata.integrated <- RunUMAP(alldata.integrated, dims = 1:30)
36 | print("umap_ran")
37 | alldata.integrated <- FindNeighbors(alldata.integrated, dims = 1:30)
38 | alldata.integrated <- FindClusters(alldata.integrated, resolution = 0.5)
39 | 
40 | DefaultAssay(alldata.integrated) <- "RNA"
41 | 
42 | saveRDS(alldata.integrated, file="/groups/umcg-weersma/tmp01/Emilia/batch1_integration/vedo2_batch1_PBMC_integrated_noribo_withmito.rds")
43 | 


--------------------------------------------------------------------------------
/PSC/PSC_version2022/DE_loop.R:
--------------------------------------------------------------------------------
 1 | #######################################
 2 | # Generate DE list per cell type      #
 3 | #######################################
 4 | 
 5 | library(dplyr)
 6 | library(Seurat)
 7 | library(patchwork)
 8 | library(ggplot2)
 9 | library(readr)
10 | library(tidyr)
11 | library(MAST)
12 | library(readxl)
13 | library(openxlsx)
14 | library(enrichR)
15 | 
16 | #load in datasets
17 | data <- readRDS("XXXX")
18 | data = UpdateSeuratObject(object = data)
19 | 
20 | #set idents to final celltyping and assay to RNA
21 | Idents(data) <- "XXXX"             
22 | DefaultAssay(data) = "RNA"
23 | 
24 | #for loop for DE analysis ##paths and celltype specification should be changed to your likings
25 | celltypes <- unique(data$celltypes_azi_final)
26 | for (x in celltypes) {
27 |   # safe pathnames
28 |   pathPSCI <- paste("/Users/amberbangma/Documents/R/PSC/Results/",x,"_PSC_I_up.csv",sep="")
29 |   pathUCI <- paste("/Users/amberbangma/Documents/R/PSC/Results/",x,"_UC_I_up.csv",sep="")
30 |   pathPSCI_GO <- paste("/Users/amberbangma/Documents/R/PSC/Results/",x,"_PSC_I_up_GO.csv",sep="")
31 |   pathUCI_GO <- paste("/Users/amberbangma/Documents/R/PSC/Results/",x,"_UC_I_up_GO.csv",sep="")
32 |   # find markers
33 |   x <- FindMarkers(data, subset.ident = x, group.by = "state", test.use = "MAST", ident.1 = "PSC-I", ident.2 = "UC-I")
34 |   x <- filter(x, x$p_val_adj < 0.05)
35 |   if(nrow(x) == 0)
36 |   {
37 |     #skip iteration because of zero significant genes
38 |     next
39 |   }
40 |   #rest of iteration for case of no error
41 |   # create a 'state' column
42 |   x$state = NA
43 |   x = as.data.frame(x)
44 |   x$state = ifelse(x$avg_log2FC > 0,"PSC_I","UC_I")
45 |   # create a 'gene' column
46 |   x$gene = NA
47 |   x$gene = rownames(x)
48 |   # separate PSC_I up gene list and UC_I up gene list
49 |   PSC_I_up_x = filter(x, x$state == "PSC_I")  
50 |   UC_I_up_x = filter(x, x$state == "UC_I")
51 |   # GO terms for each gene list 
52 |   GO_PSCIup_x <- enrichr(PSC_I_up_x$gene, databases = "GO_Biological_Process_2018")$GO_Biological_Process
53 |   GO_UCIup_x <- enrichr(UC_I_up_x$gene, databases = "GO_Biological_Process_2018")$GO_Biological_Process
54 |   # save dataframes
55 |   write.csv(PSC_I_up_x, pathPSCI)
56 |   write.csv(UC_I_up_x, pathUCI)
57 |   write.csv(GO_PSCIup_x, pathPSCI_GO)
58 |   write.csv(GO_UCIup_x, pathUCI_GO)
59 | }
60 | 
61 | ## Findmarkers parameters and script can be tweeked for different DE analyses (for example UC-I vs UC-NI)
62 | ## check DE_analysis_202103.R for examples
63 | 
64 | 
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 
76 | 
77 | 
78 |   
79 | 


--------------------------------------------------------------------------------
/PSC/epithelium_markers.R:
--------------------------------------------------------------------------------
 1 | library(Seurat)
 2 | data<-readRDS("Data/PSC_processed_march_2021.rds")
 3 | 
 4 | 
 5 | data@meta.data$cell_cat<-"other"
 6 | 
 7 | data@meta.data$cell_cat[data@meta.data$celltypes == "Immature_enterocyte"]<-"epithelial"
 8 | data@meta.data$cell_cat[data@meta.data$celltypes == "Cycling_TA"]<-"epithelial"
 9 | data@meta.data$cell_cat[data@meta.data$celltypes == "Absorptive_enterocyte"]<-"epithelial"
10 | data@meta.data$cell_cat[data@meta.data$celltypes == "Ribo_TA"]<-"epithelial"
11 | data@meta.data$cell_cat[data@meta.data$celltypes == "Stem"]<-"epithelial"
12 | 
13 | data@meta.data$cell_cat[data@meta.data$celltypes == "Immature_goblet"]<-"epithelial"
14 | data@meta.data$cell_cat[data@meta.data$celltypes == "BEST4_enterocyte"]<-"epithelial"
15 | data@meta.data$cell_cat[data@meta.data$celltypes == "DUOX2_enterocyte"]<-"epithelial"
16 | data@meta.data$cell_cat[data@meta.data$celltypes == "PLCG2_TA"]<-"epithelial"
17 | data@meta.data$cell_cat[data@meta.data$celltypes == "Absorptive_TA"]<-"epithelial"
18 | data@meta.data$cell_cat[data@meta.data$celltypes == "Tuft"]<-"epithelial"
19 | data@meta.data$cell_cat[data@meta.data$celltypes == "REG_TA"]<-"epithelial"
20 | data@meta.data$cell_cat[data@meta.data$celltypes == "Enteroendocrine"]<-"epithelial"
21 | 
22 | data@meta.data$cell_cat[data@meta.data$celltypes == "Goblet"]<-"epithelial"
23 | 
24 | Idents(data)<-"cell_cat"
25 | DefaultAssay(data)<-"RNA"
26 | markers_epi<-FindMarkers(data, subset.ident = "epithelial", group.by="state", test.use = "MAST", ident.1 = "PSC-NI", ident.2 = "HC-NI")
27 | write.csv(markers_epi, "Results/DE/epithelium_pscNI_vs_HC.csv")
28 | 
29 | markers_epi<-FindMarkers(data, subset.ident = "epithelial", group.by="state", test.use = "MAST", ident.1 = "PSC-NI", ident.2 = "HC-NI")
30 | write.csv(markers_epi, "Results/DE/pithelium_pscNI_vs_HC.csv")
31 | 
32 | markers_epi<-FindMarkers(data, subset.ident = "epithelial", group.by="state", test.use = "MAST", ident.1 = "PSC-I", ident.2 = "HC-NI")
33 | write.csv(markers_epi, "Results/DE/epithelium_pscI_vs_HC.csv")
34 | 
35 | markers_epi<-FindMarkers(data, subset.ident = "epithelial", group.by="state", test.use = "MAST", ident.1 = "UC-NI", ident.2 = "HC-NI")
36 | write.csv(markers_epi, "Results/DE/epithelium_ucNI_vs_HC.csv")
37 | 
38 | markers_epi<-FindMarkers(data, subset.ident = "epithelial", group.by="state", test.use = "MAST", ident.1 = "UC-I", ident.2 = "HC-NI")
39 | write.csv(markers_epi, "Results/DE/epithelium_ucI_vs_HC.csv")
40 | 
41 | markers_epi<-FindMarkers(data, subset.ident = "epithelial", group.by="state", test.use = "MAST", ident.1 = "PSC-I", ident.2 = "PSC-NI")
42 | write.csv(markers_epi, "Results/DE/epithelium_pscI_vs_pscNI.csv")
43 | 
44 | markers_epi<-FindMarkers(data, subset.ident = "epithelial", group.by="state", test.use = "MAST", ident.1 = "UC-I", ident.2 = "UC-NI")
45 | write.csv(markers_epi, "Results/DE/epithelium_ucI_vs_ucNI.csv")
46 | 


--------------------------------------------------------------------------------
/Integration.md:
--------------------------------------------------------------------------------
 1 | ## Integrating datasets using SCTransform integration, Seurat V3 integration and harmony
 2 | ```
 3 | # first: load all QC-ed, SCTransformed and filtered datasets
 4 | ```
 5 | ### SCT integration
 6 | testing SCT integration, takes a long time and a lot of computational power to compute
 7 | ```
 8 | DefaultAssay(Day6_lane2)<-"SCT" # set all default assays to 'SCT'
 9 | alldata_202002_integration_list <- list(Day1_lane1, Day1_lane2, Day2_lane1, Day2_lane2, Day3_lane1, Day3_lane2,Day4_lane1, Day4_lane2,Day5_lane1, Day5_lane2,Day6_lane1, Day6_lane2)
10 | 
11 | print("loaded")
12 | features <- SelectIntegrationFeatures(object.list = alldata_202002_integration_list, nfeatures = 3000)
13 | print("features_selected")
14 | alldata_202002_integration_list <- PrepSCTIntegration(object.list = alldata_202002_integration_list, anchor.features = features)
15 | print("integration_prepped")
16 | anchors <- FindIntegrationAnchors(object.list = alldata_202002_integration_list, anchor.features = features, normalization.method = "SCT")
17 | print("achors_found")
18 | rm(alldata_202002_integration_list)
19 | alldata.integrated <- IntegrateData(anchorset = anchors, normalization.method = "SCT")
20 | print("integrated")
21 | rm(anchors)
22 | alldata.integrated <- RunPCA(alldata.integrated, verbose = FALSE)
23 | print("pca2_ran")
24 | alldata.integrated <- RunUMAP(alldata.integrated, dims = 1:30)
25 | print("umap_ran")
26 | alldata.integrated <- FindNeighbors(alldata.integrated, dims = 1:30)
27 | alldata.integrated <- FindClusters(alldata.integrated, resolution = 0.5)
28 | 
29 | DefaultAssay(alldata.integrated) <- "RNA"
30 | alldata.integrated <- NormalizeData(alldata.integrated, normalization.method = "LogNormalize", scale.factor = 10000, verbose = FALSE)
31 | saveRDS(alldata.integrated, file="/data_complete_sct_integrated.rds")
32 | 
33 | ```
34 | ### Seurat integration
35 | testing Seurat v3 integration
36 | ```
37 | options(future.globals.maxSize = 300000 * 1024^2) # set to max necessary
38 | alldata_202002_integration_anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30)
39 | alldata.integrated <- IntegrateData(anchorset = alldata_202002_integration_anchors, dims = 1:30)
40 | DefaultAssay(alldata.integrated) <- "integrated"
41 | alldata.integrated <- ScaleData(alldata.integrated, verbose = FALSE)
42 | alldata.integrated <- RunPCA(alldata.integrated, npcs = 30, verbose = FALSE)
43 | alldata.integrated <- RunUMAP(alldata.integrated, reduction = "pca", dims = 1:30)
44 | saveRDS(alldata.integrated, "/alldata_normal_integration.rds")
45 | ```
46 | ### harmony integration
47 | super fast integration
48 | ```
49 | # start with integrated dataset from above, or merge different datasets into one and start with that
50 | T_cells_harmony <- RunHarmony(T_cells, "dataset", assay.use="SCT")
51 | T_cells_harmony  <- RunUMAP(T_cells_harmony, reduction = "harmony", dims = 1:30)
52 | DimPlot(T_cells_harmony, group.by="Sample")
53 | DefaultAssay(T_cells_harmony)<-"SCT"
54 | T_cells_harmony<-FindNeighbors(T_cells_harmony, reduction = "harmony", dims = 1:30)
55 | T_cells_harmony<-FindClusters(T_cells_harmony,resolution = 0.5)
56 | saveRDS(T_cells_harmony, "/T_cells_harmony_integration.rds")
57 | ```
58 | 


--------------------------------------------------------------------------------
/Vedo2/step_2_Souporcell script:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --job-name=souporcell_200612_lane1
 3 | #SBATCH --output=/groups/umcg-weersma/tmp01/Emilia/souporcell/souporcell_output/souporcell_200612_lane1.out
 4 | #SBATCH --error=/groups/umcg-weersma/tmp01/Emilia/souporcell/souporcell_output/souporcell_200612_lane1.err
 5 | #SBATCH --time=6-23:59:00
 6 | #SBATCH --cpus-per-task=12
 7 | #SBATCH --mem=100gb
 8 | #SBATCH --nodes=1
 9 | #SBATCH --export=NONE
10 | #SBATCH --get-user-env=L
11 | #SBATCH --tmp=1000gb
12 | 
13 | ml PythonPlus
14 | python /groups/umcg-weersma/tmp01/Amber/souporcell/souporcell/renamer_v2.py \
15 | --bam /groups/umcg-weersma/tmp01/Emilia/final_output/200612_lane1/cellranger_200612_lane1/outs/possorted_genome_bam.bam \
16 | --barcodes /groups/umcg-weersma/tmp01/Emilia/souporcell/barcodes/barcodes_200612_lane1.tsv \
17 | --out /groups/umcg-weersma/tmp01/Emilia/souporcell/remap/200612_lane1_remap.fq.fq
18 | 
19 | /groups/umcg-weersma/tmp01/Amber/souporcell/minimap2-2.17_x64-linux/minimap2 -ax splice -t 8 -G50k -k 21 -w 11 --sr -A2 -B8 -O12,32 -E2,1 -r200 -p.5 -N2
20 | 0 -f1000,5000 -n2 -m20 -s40 -g2000 -2K50m \
21 | --secondary=no /groups/umcg-weersma/tmp01/Amber/refdata-cellranger-GRCh38-3.0.0/fasta/genome.fa \
22 | /groups/umcg-weersma/tmp01/Emilia/souporcell/remap/200612_lane1_remap.fq.fq \
23 | > /groups/umcg-weersma/tmp01/Emilia/souporcell/remap/minimap_200612_lane1.sam
24 | 
25 | python /groups/umcg-weersma/tmp01/Amber/souporcell/souporcell/retag_v2.py \
26 | --sam /groups/umcg-weersma/tmp01/Emilia/souporcell/remap/minimap_200612_lane1.sam \
27 | --out /groups/umcg-weersma/tmp01/Emilia/souporcell/remap/minitagged_200612_lane1.bam
28 | 
29 | ml SAMtools/1.9-foss-2018b
30 | samtools sort /groups/umcg-weersma/tmp01/Emilia/souporcell/remap/minitagged_200612_lane1.bam \
31 | > /groups/umcg-weersma/tmp01/Emilia/souporcell/remap/minitagged_sorted_200612_lane1.bam
32 | samtools index /groups/umcg-weersma/tmp01/Emilia/souporcell/remap/minitagged_sorted_200612_lane1.bam
33 | 
34 | ml freebayes
35 | freebayes -f /groups/umcg-weersma/tmp01/Amber/refdata-cellranger-GRCh38-3.0.0/fasta/genome.fa -iXu -C 2 -q 20 -n 3 -E 1 -m 30 --min-coverage 6 -g 100000 \
36 | /groups/umcg-weersma/tmp01/Emilia/souporcell/remap/minitagged_sorted_200612_lane1.bam \
37 | > /groups/umcg-weersma/tmp01/Emilia/souporcell/variants/200612_lane1.vcf
38 | 
39 | /groups/umcg-weersma/tmp01/Amber/souporcell/vartrix-v1.1.4-x86_64-linux/vartrix \
40 | --umi \
41 | --mapq 30 \
42 | -b /groups/umcg-weersma/tmp01/Emilia/final_output/200612_lane1/cellranger_200612_lane1/outs/possorted_genome_bam.bam \
43 | -c /groups/umcg-weersma/tmp01/Emilia/souporcell/barcodes/barcodes_200612_lane1.tsv \
44 | --scoring-method coverage \
45 | --threads 8 \
46 | --ref-matrix /groups/umcg-weersma/tmp01/Emilia/souporcell/allele_counting/200612_lane1_wes_ref.mtx \
47 | --out-matrix /groups/umcg-weersma/tmp01/Emilia/souporcell/allele_counting/200612_lane1_wes_alt.mtx \
48 | -v /groups/umcg-weersma/tmp01/Emilia/souporcell/variants/200612_lane1.vcf \
49 | --fasta /groups/umcg-weersma/tmp01/Amber/refdata-cellranger-GRCh38-3.0.0/fasta/genome.fa
50 | 
51 | /groups/umcg-weersma/tmp01/Amber/souporcell/souporcell/souporcell/target/release/souporcell \
52 | --alt_matrix /groups/umcg-weersma/tmp01/Emilia/souporcell/allele_counting/200612_lane1_wes_alt.mtx \
53 | --ref_matrix /groups/umcg-weersma/tmp01/Emilia/souporcell/allele_counting/200612_lane1_wes_ref.mtx \
54 | --num_clusters 4 \
55 | --threads 8 \
56 | --barcodes /groups/umcg-weersma/tmp01/Emilia/souporcell/barcodes/barcodes_200612_lane1.tsv \
57 | > /groups/umcg-weersma/tmp01/Emilia/souporcell/final_output/clusters_200612_lane1_tmp.tsv
58 | 
59 | /groups/umcg-weersma/tmp01/Amber/souporcell/souporcell/troublet/target/release/troublet \
60 | -a /groups/umcg-weersma/tmp01/Emilia/souporcell/allele_counting/200612_lane1_wes_alt.mtx \
61 | -r /groups/umcg-weersma/tmp01/Emilia/souporcell/allele_counting/200612_lane1_wes_ref.mtx \
62 | --clusters /groups/umcg-weersma/tmp01/Emilia/souporcell/final_output/clusters_200612_lane1_tmp.tsv \
63 | > /groups/umcg-weersma/tmp01/Emilia/souporcell/final_output/clusters_200612_lane1.tsv
64 | 


--------------------------------------------------------------------------------
/PSC/PSC_version2022/SCT_integration_noribo_v2.R:
--------------------------------------------------------------------------------
 1 | library(Seurat)
 2 | library(ggplot2)
 3 | 
 4 | options(future.globals.maxSize = 150000 * 1024^2)
 5 | 
 6 | lane08_1 <-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200108_lane1_sct.rds")
 7 | DefaultAssay(lane08_1)<-"SCT"
 8 | lane08_2<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200108_lane2_sct.rds")  
 9 | DefaultAssay(lane08_2)<-"SCT"
10 | lane09_1<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200109_lane1_sct.rds")
11 | DefaultAssay(lane09_1)<-"SCT"
12 | lane09_2<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200109_lane2_sct.rds")
13 | DefaultAssay(lane09_2)<-"SCT"
14 | lane13_1<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200113_lane1_sct.rds")
15 | DefaultAssay(lane13_1)<-"SCT"
16 | lane13_2<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200113_lane2_sct.rds")
17 | DefaultAssay(lane13_2)<-"SCT"
18 | lane14_1<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200114_lane1_sct.rds")
19 | DefaultAssay(lane14_1)<-"SCT"
20 | lane14_2<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200114_lane2_sct.rds")
21 | DefaultAssay(lane14_2)<-"SCT"
22 | lane15_1<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200115_lane1_sct.rds")
23 | DefaultAssay(lane15_1)<-"SCT"
24 | lane15_2<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200115_lane2_sct.rds")
25 | DefaultAssay(lane15_2)<-"SCT"
26 | lane16_1<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200116_lane1_sct.rds")
27 | DefaultAssay(lane16_1)<-"SCT"
28 | lane16_2<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200116_lane2_sct.rds")
29 | DefaultAssay(lane16_2)<-"SCT"
30 | 
31 | lane08_1[["percent.ribo"]] <- PercentageFeatureSet(lane08_1, pattern = "^RPL|^RPS")
32 | lane08_2[["percent.ribo"]] <- PercentageFeatureSet(lane08_2, pattern = "^RPL|^RPS")
33 | lane09_1[["percent.ribo"]] <- PercentageFeatureSet(lane09_1, pattern = "^RPL|^RPS")
34 | lane09_2[["percent.ribo"]] <- PercentageFeatureSet(lane09_2, pattern = "^RPL|^RPS")
35 | lane13_1[["percent.ribo"]] <- PercentageFeatureSet(lane13_1, pattern = "^RPL|^RPS")
36 | lane13_2[["percent.ribo"]] <- PercentageFeatureSet(lane13_2, pattern = "^RPL|^RPS")
37 | lane14_1[["percent.ribo"]] <- PercentageFeatureSet(lane14_1, pattern = "^RPL|^RPS")
38 | lane14_2[["percent.ribo"]] <- PercentageFeatureSet(lane14_2, pattern = "^RPL|^RPS")
39 | lane15_1[["percent.ribo"]] <- PercentageFeatureSet(lane15_1, pattern = "^RPL|^RPS")
40 | lane15_2[["percent.ribo"]] <- PercentageFeatureSet(lane15_2, pattern = "^RPL|^RPS")
41 | lane16_1[["percent.ribo"]] <- PercentageFeatureSet(lane16_1, pattern = "^RPL|^RPS")
42 | lane16_2[["percent.ribo"]] <- PercentageFeatureSet(lane16_2, pattern = "^RPL|^RPS")
43 | 
44 | alldata_PSC_202002_integration_list <- list(lane08_1, lane08_2, lane09_1, lane09_2, lane13_1, lane13_2, lane14_1, lane14_2, lane15_1, lane15_2, lane16_1, lane16_2)
45 | 
46 | print("loaded")
47 | features <- SelectIntegrationFeatures(object.list = alldata_PSC_202002_integration_list, nfeatures = 3000)
48 | print("features_selected")
49 | alldata_PSC_202002_integration_list <- PrepSCTIntegration(object.list = alldata_PSC_202002_integration_list, anchor.features = features)
50 | print("integration_prepped")
51 | anchors <- FindIntegrationAnchors(object.list = alldata_PSC_202002_integration_list, anchor.features = features, normalization.method = "SCT")
52 | print("achors_found")
53 | rm(alldata_PSC_202002_integration_list)
54 | alldata.integrated <- IntegrateData(anchorset = anchors, normalization.method = "SCT")
55 | print("integrated")
56 | rm(anchors)
57 | alldata.integrated <- RunPCA(alldata.integrated, verbose = FALSE)
58 | print("pca2_ran")
59 | alldata.integrated <- RunUMAP(alldata.integrated, dims = 1:30)
60 | print("umap_ran")
61 | alldata.integrated <- FindNeighbors(alldata.integrated, dims = 1:30)
62 | alldata.integrated <- FindClusters(alldata.integrated, resolution = 0.5)
63 | 
64 | DefaultAssay(alldata.integrated) <- "RNA"
65 | alldata.integrated <- NormalizeData(alldata.integrated, normalization.method = "LogNormalize", scale.factor = 10000, verbose = FALSE)
66 | 
67 | saveRDS(alldata.integrated, file="/groups/umcg-weersma/tmp01/Amber/PSC_202002_integrated_v2_noribo.rds")
68 | 


--------------------------------------------------------------------------------
/PSC/PSC_2023/Step4_SCT_integration_noribo_v2.R:
--------------------------------------------------------------------------------
 1 | #Script used for integration of lanes and normalisation
 2 | 
 3 | library(Seurat)
 4 | library(ggplot2)
 5 | 
 6 | options(future.globals.maxSize = 150000 * 1024^2)
 7 | 
 8 | lane08_1 <-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200108_lane1_sct.rds")
 9 | DefaultAssay(lane08_1)<-"SCT"
10 | lane08_2<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200108_lane2_sct.rds")  
11 | DefaultAssay(lane08_2)<-"SCT"
12 | lane09_1<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200109_lane1_sct.rds")
13 | DefaultAssay(lane09_1)<-"SCT"
14 | lane09_2<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200109_lane2_sct.rds")
15 | DefaultAssay(lane09_2)<-"SCT"
16 | lane13_1<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200113_lane1_sct.rds")
17 | DefaultAssay(lane13_1)<-"SCT"
18 | lane13_2<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200113_lane2_sct.rds")
19 | DefaultAssay(lane13_2)<-"SCT"
20 | lane14_1<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200114_lane1_sct.rds")
21 | DefaultAssay(lane14_1)<-"SCT"
22 | lane14_2<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200114_lane2_sct.rds")
23 | DefaultAssay(lane14_2)<-"SCT"
24 | lane15_1<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200115_lane1_sct.rds")
25 | DefaultAssay(lane15_1)<-"SCT"
26 | lane15_2<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200115_lane2_sct.rds")
27 | DefaultAssay(lane15_2)<-"SCT"
28 | lane16_1<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200116_lane1_sct.rds")
29 | DefaultAssay(lane16_1)<-"SCT"
30 | lane16_2<-readRDS("/groups/umcg-weersma/tmp01/Amber/PSC_preprocessed_new/200116_lane2_sct.rds")
31 | DefaultAssay(lane16_2)<-"SCT"
32 | 
33 | lane08_1[["percent.ribo"]] <- PercentageFeatureSet(lane08_1, pattern = "^RPL|^RPS")
34 | lane08_2[["percent.ribo"]] <- PercentageFeatureSet(lane08_2, pattern = "^RPL|^RPS")
35 | lane09_1[["percent.ribo"]] <- PercentageFeatureSet(lane09_1, pattern = "^RPL|^RPS")
36 | lane09_2[["percent.ribo"]] <- PercentageFeatureSet(lane09_2, pattern = "^RPL|^RPS")
37 | lane13_1[["percent.ribo"]] <- PercentageFeatureSet(lane13_1, pattern = "^RPL|^RPS")
38 | lane13_2[["percent.ribo"]] <- PercentageFeatureSet(lane13_2, pattern = "^RPL|^RPS")
39 | lane14_1[["percent.ribo"]] <- PercentageFeatureSet(lane14_1, pattern = "^RPL|^RPS")
40 | lane14_2[["percent.ribo"]] <- PercentageFeatureSet(lane14_2, pattern = "^RPL|^RPS")
41 | lane15_1[["percent.ribo"]] <- PercentageFeatureSet(lane15_1, pattern = "^RPL|^RPS")
42 | lane15_2[["percent.ribo"]] <- PercentageFeatureSet(lane15_2, pattern = "^RPL|^RPS")
43 | lane16_1[["percent.ribo"]] <- PercentageFeatureSet(lane16_1, pattern = "^RPL|^RPS")
44 | lane16_2[["percent.ribo"]] <- PercentageFeatureSet(lane16_2, pattern = "^RPL|^RPS")
45 | 
46 | alldata_PSC_202002_integration_list <- list(lane08_1, lane08_2, lane09_1, lane09_2, lane13_1, lane13_2, lane14_1, lane14_2, lane15_1, lane15_2, lane16_1, lane16_2)
47 | 
48 | print("loaded")
49 | features <- SelectIntegrationFeatures(object.list = alldata_PSC_202002_integration_list, nfeatures = 3000)
50 | print("features_selected")
51 | alldata_PSC_202002_integration_list <- PrepSCTIntegration(object.list = alldata_PSC_202002_integration_list, anchor.features = features)
52 | print("integration_prepped")
53 | anchors <- FindIntegrationAnchors(object.list = alldata_PSC_202002_integration_list, anchor.features = features, normalization.method = "SCT")
54 | print("achors_found")
55 | rm(alldata_PSC_202002_integration_list)
56 | alldata.integrated <- IntegrateData(anchorset = anchors, normalization.method = "SCT")
57 | print("integrated")
58 | rm(anchors)
59 | alldata.integrated <- RunPCA(alldata.integrated, verbose = FALSE)
60 | print("pca2_ran")
61 | alldata.integrated <- RunUMAP(alldata.integrated, dims = 1:30)
62 | print("umap_ran")
63 | alldata.integrated <- FindNeighbors(alldata.integrated, dims = 1:30)
64 | alldata.integrated <- FindClusters(alldata.integrated, resolution = 0.5)
65 | 
66 | DefaultAssay(alldata.integrated) <- "RNA"
67 | alldata.integrated <- NormalizeData(alldata.integrated, normalization.method = "LogNormalize", scale.factor = 10000, verbose = FALSE)
68 | 
69 | saveRDS(alldata.integrated, file="/groups/umcg-weersma/tmp01/Amber/PSC_202002_integrated_v2_noribo.rds")
70 | 


--------------------------------------------------------------------------------
/old_stuff/251_10x_vs_251_10x.R:
--------------------------------------------------------------------------------
 1 | ###########################################################################################################################
 2 | # Author: WTC
 3 | # Date: July 2018
 4 | # Function: Sample 1000x 251 cells from 10x cytotoxic cell dataset, calculate DE with CD dataset and determine risk genes in DE genes
 5 | ###########################################################################################################################
 6 | #
 7 | # Libraries
 8 | #
 9 | ###########################################################################################################################
10 | library(Seurat)
11 | library(Matrix)
12 | #library(Matrix.utils)
13 | #library(ggplot2)
14 | #library(pryr)
15 | 
16 | ###########################################################################################################################
17 | #
18 | # Functions
19 | #
20 | ###########################################################################################################################
21 | 
22 | # download Cytotoxic T cell gene/cell matrix (raw) from https://support.10xgenomics.com/single-cell-gene-expression/datasets/1.1.0/cytotoxic_t
23 | #CTL_10x<-Read10X(data.dir = "~/Downloads/matrices_mex 3/hg19/")
24 | #CTL_10x<-CreateSeuratObject(raw.data = CTL_10x, min.cells = 3, project = "healthy")
25 | # select subset of 251 cells
26 | #load("~/Desktop/Single_cell/CTL_10x_CD/CTL_10x.Rda")
27 | 
28 | setwd("~/randomised_permutation_10xsubset_cd/")
29 | load("~/Desktop/Single_cell/CTL_10x_CD/CTL_10x.Rda")
30 | load("~/Desktop/Single_cell/CTL_10x_CD/CTL_CD.Rda")
31 | risk_use<-read.csv("~/Desktop/Single_cell/final_data_paper/DE/risk_genes_in_cd_and_10x.csv")
32 | genes_use<-read.csv("~/Desktop/Single_cell/final_data_paper/DE/genes_in_cd_and_10x.csv")
33 | mkdir("~/nonrandomised_permutation_10xsubset_cd/")
34 | setwd("~/nonrandomised_permutation_10xsubset_cd/")
35 | a<-"number_DEgenes"
36 | b<-"number_DEriskgenes"
37 | number_DE_genes<-data.frame(a)
38 | number_DE_riskgenes<-data.frame(b)
39 | for (i in 1:1000){
40 | cat("iteration", i, "\n")
41 | samp_col_idxs  <- sample(ncol(CTL_10x@data), 251)
42 | samp_col_names <- colnames(CTL_10x@data) [samp_col_idxs]
43 | CTL_10x_subset<-CTL_10x@data[,samp_col_names]
44 | dim(CTL_10x_subset)
45 | CTL_10x_subset<-CreateSeuratObject(raw.data = CTL_10x_subset, min.cells = 3, project = "subset")
46 | mito.genes <- grep(pattern = "^MT-", x = rownames(x = CTL_10x_subset@data), value = TRUE)
47 | percent.mito <- Matrix::colSums(CTL_10x_subset@raw.data[mito.genes, ])/Matrix::colSums(CTL_10x_subset@raw.data)
48 | CTL_10x_subset <- AddMetaData(object = CTL_10x_subset, metadata = percent.mito, col.name = "percent.mito")
49 | 
50 | # for ngene: 200-2500, percent mito: <5%
51 | CTL_10x_subset <- FilterCells(object = CTL_10x_subset, subset.names = c("nGene", "percent.mito"), low.thresholds = c(200, -Inf), high.thresholds = c(2500, 0.05))
52 | 
53 | # log normalize
54 | CTL_10x_subset <- NormalizeData(object = CTL_10x_subset, normalization.method = "LogNormalize", scale.factor = 10000)
55 | CTL_10x_subset <- ScaleData(object = CTL_10x_subset, vars.to.regress = c("nUMI", "percent.mito"))
56 | CTL_10x_subset@meta.data$dataset <- "subset10x"
57 | 
58 | save(CTL_10x_subset, file=paste0("CTL_10x_subset",i,".Rda"))
59 | 
60 | ## assign name to CD dataset
61 | CTL_CD@meta.data$dataset <- "cd"
62 | 
63 | ## Combine the cdulated and ctrlulated cells into a single object
64 | CTL_subset.combined <- RunCCA(CTL_10x_subset, CTL_CD, genes.use = intersect(rownames(CTL_10x_subset@data), rownames(CTL_CD@data)), num.cc = 30, scale.data=T)
65 | 
66 | ###############################################################
67 | # Process the data after combining the two datasets
68 | ###############################################################
69 | CTL_subset.combined<-SetAllIdent(CTL_subset.combined, "dataset")
70 | subset10x_cells<-WhichCells(CTL_subset.combined, "subset10x")
71 | cd_cells<-WhichCells(CTL_subset.combined, "cd")
72 | 
73 | # calculate DE
74 | # DE with genes present in the two datasets, in >1%, MAST
75 | CTL_subset.combined_DE_markers_1perc = FindAllMarkers(CTL_subset.combined, only.pos = T, test.use = "MAST", genes.use = genes_use$gene)
76 | write.table(CTL_subset.combined_DE_markers_1perc, file = paste0("CTL_subset",i,".blood.10x._DEmarkers_MAST.txt"))
77 | 
78 | # and the significant ones (p<0.05)
79 | CTL_subset.combined_DE_markers_1perc<-CTL_subset.combined_DE_markers_1perc[CTL_subset.combined_DE_markers_1perc$p_val_adj <0.05,]
80 | number_DE_genes[i,a]<-nrow(CTL_subset.combined_DE_markers_1perc)
81 | 
82 | # merge all DE results with risk genes
83 | risk_all_filtered_subset<-merge(risk_use, CTL_subset.combined_DE_markers_1perc, by="gene", all=F)
84 | write.csv(risk_all_filtered_subset, paste0("riskgenes_subset",i,"_vs_cd_filtered.csv"))
85 | number_DE_riskgenes[i,b]<-nrow(risk_all_filtered_subset)}
86 | 
87 | write.csv(number_DE_genes, "number_DE_genes_cd_vs_10xsubsets.csv")
88 | write.csv(number_DE_riskgenes, "number_DE_riskgenes_cd_vs_10xsubsets.csv")
89 | 


--------------------------------------------------------------------------------
/DDTx/ddtx_azimuth_elmentaiteadultileum_classification_demuxlet.Rmd:
--------------------------------------------------------------------------------
  1 | title: "ddtx_azimuth_elmentaiteadultileum_classification_demuxlet"
  2 | author: "adapted from Roy Oelen"
  3 | date: "2023-06-15"
  4 | output: html_document
  5 | ---
  6 | 
  7 | ```{r header, include=FALSE}
  8 | ############################################################################################################################
  9 | # Authors: Roy Oelen
 10 | # Name: ddtx_azimuth_elmentaiteadultileum_classification.Rmd
 11 | # Function: assign compartments to the cells
 12 | ############################################################################################################################
 13 | ```
 14 | 
 15 | ```{r libraries, include=FALSE}
 16 | knitr::opts_chunk$set(echo = FALSE)
 17 | ####################
 18 | # libraries        #
 19 | ####################
 20 | 
 21 | # for the object containing meta and count data
 22 | library(Seurat)
 23 | # for plotting
 24 | library(ggplot2)
 25 | library(cowplot)
 26 | 
 27 | ```
 28 | 
 29 | ```{r functions, include=FALSE}
 30 | knitr::opts_chunk$set(echo = FALSE)
 31 | ####################
 32 | # Functions        #
 33 | ####################
 34 | 
 35 | ```
 36 | 
 37 | ```{r setup, include=FALSE}
 38 | knitr::opts_chunk$set(echo = FALSE)
 39 | ####################
 40 | # Main Code        #
 41 | ####################
 42 | 
 43 | # locations of the objects
 44 | seurat_objects_loc <- '/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/'
 45 | seurat_object_compartment_loc <- paste(seurat_objects_loc, 'ddtx_merged_demultiplexed_clustered_compartment_demuxlet.rds', sep = '')
 46 | result_loc <- paste(seurat_objects_loc, 'ddtx_merged_demultiplexed_clustered_compartment_azi_elmentaiteadultileum_demuxlet.rds', sep = '')
 47 | 
 48 | # location of reference
 49 | references_loc <- '/groups/umcg-weersma/tmp01/datasets/elmentaite_2021/'
 50 | reference_loc <- paste(references_loc, 'elmentaite_2021_all_sct_clus.rds', sep = '')
 51 | 
 52 | ```
 53 | 
 54 | ```{r read_objects, include=FALSE}
 55 | knitr::opts_chunk$set(echo = FALSE)
 56 | # read the objects
 57 | ddtx <- readRDS(seurat_object_compartment_loc)
 58 | elmentaite_full <- readRDS(reference_loc)
 59 | 
 60 | ```
 61 | 
 62 | ```{r process_reference, include=FALSE}
 63 | knitr::opts_chunk$set(echo = FALSE)
 64 | 
 65 | # setting seed for reproduceability
 66 | set.seed(7777)
 67 | 
 68 | # subset the elmentaite object to adults and ileum
 69 | elmentaite_adult_ileum <- elmentaite_full[, elmentaite_full@meta.data[['Region']] == 'SmallInt' & elmentaite_full@meta.data[['Diagnosis']] == 'Healthy adult']
 70 | # do PCA dimensional reduction
 71 | elmentaite_adult_ileum <- RunPCA(elmentaite_adult_ileum)
 72 | # do 2d UMAP dimensional reduction
 73 | elmentaite_adult_ileum <- RunUMAP(elmentaite_adult_ileum, dims = 1:30, return.model = T)
 74 | # create neighbourhood graph
 75 | elmentaite_adult_ileum <- FindNeighbors(elmentaite_adult_ileum, dims = 1:30, k.param = 20)
 76 | # find clusters
 77 | elmentaite_adult_ileum <- FindClusters(elmentaite_adult_ileum, resolution = 1.5)
 78 | ```
 79 | 
 80 | ```{r do_mapping, include=FALSE}
 81 | knitr::opts_chunk$set(echo = FALSE)
 82 | 
 83 | # find transfer anchors
 84 | anchors <- FindTransferAnchors(
 85 |   reference = elmentaite_adult_ileum,
 86 |   query = ddtx,
 87 |   normalization.method = "SCT",
 88 |   reference.reduction = "pca",
 89 |   dims = 1:50
 90 | )
 91 | # do the reference mapping
 92 | ddtx <- MapQuery(
 93 |   anchorset = anchors,
 94 |   query = ddtx,
 95 |   reference = elmentaite_adult_ileum,
 96 |   refdata = list(
 97 |     celltype.elmentaiteadultileum = "cell_type"
 98 |   ),
 99 |   reference.reduction = "pca",
100 |   reduction.model = "umap"
101 | )
102 | 
103 | ```
104 | 
105 | ```{r create_plots, include=FALSE}
106 | knitr::opts_chunk$set(echo = FALSE)
107 | 
108 | # check the reference mapping
109 | reference_umap <- DimPlot(object = elmentaite_adult_ileum, reduction = "umap", group.by = "cell_type", label = TRUE, label.size = 3, repel = TRUE) + NoLegend()
110 | # vs the prediction
111 | projected_umap <- DimPlot(object = ddtx, reduction = "ref.umap", group.by = "predicted.celltype.elmentaiteadultileum", label = TRUE, label.size = 3, repel = TRUE) + NoLegend()
112 | # check the clusters
113 | clusters_umap <- DimPlot(object = ddtx, reduction = "umap", group.by = "seurat_clusters", label = TRUE, label.size = 3, repel = TRUE) + NoLegend()
114 | # vs the predictions
115 | celltypes_umap <- DimPlot(object = ddtx, reduction = "umap", group.by = "predicted.celltype.elmentaiteadultileum", label = TRUE, label.size = 3, repel = TRUE) + NoLegend()
116 | 
117 | ```
118 | 
119 | ```{r show_plots, include = TRUE, fig.width=20, fig.height=20}
120 | knitr::opts_chunk$set(echo = FALSE)
121 | 
122 | # show result
123 | plot_grid(reference_umap, projected_umap, clusters_umap, celltypes_umap)
124 | ```
125 | 
126 | ```{r save, include=FALSE}
127 | knitr::opts_chunk$set(echo = FALSE)
128 | # save the result
129 | saveRDS(ddtx, result_loc)
130 | ```
131 | 


--------------------------------------------------------------------------------
/old_stuff/overlap_DE_patientregress_not_regressed.R:
--------------------------------------------------------------------------------
 1 | ## single cell sequencing in Crohn's 
 2 | 
 3 | # Author: Werna Uniken Venema
 4 | # year: 2018
 5 | 
 6 | ## looking into overlap DE analyses for different patients using two methods:
 7 | # 1. comparing DE with and without patient regressed
 8 | # 2. comparing DE per patient
 9 | 
10 | #### method 1 - patient regressed
11 | 
12 | # load DE file, not regressed for patient
13 | DE_allpts<-read.csv("~/Dropbox/scRNAseq paper/Results_paper/markergenes/markers/allcells/eight_celltypes_markers.csv", row.names=1)
14 | row.names(DE_allpts)<-NULL
15 | dim(DE_allpts)
16 | DE_allpts$gene_cluster<-paste0(DE_allpts$gene, sep="_", DE_allpts$cluster)
17 | 
18 | # use DE file, regressed for patient
19 | # DE with genes >1%, MAST
20 | seuratfile_allcells_patientregr<-SetAllIdent(seuratfile_allcells_patientregr, "eight_cell_types")
21 | allcells_patientregr_DE_markers = FindAllMarkers(seuratfile_allcells_patientregr, min.pct = 0.01, only.pos = T, test.use = "MAST")
22 | row.names(allcells_patientregr_DE_markers)<-NULL
23 | allcells_patientregr_DE_markers<-allcells_patientregr_DE_markers[allcells_patientregr_DE_markers$p_val_adj < 0.05,]
24 | dim(allcells_patientregr_DE_markers)
25 | colnames(allcells_patientregr_DE_markers)[1]<-"p_val_patientregr"
26 | colnames(allcells_patientregr_DE_markers)[2]<-"Av_logFC_patientregr"
27 | colnames(allcells_patientregr_DE_markers)[3]<-"pct.1_patientregr"
28 | colnames(allcells_patientregr_DE_markers)[4]<-"pct.2_patientregr"
29 | colnames(allcells_patientregr_DE_markers)[5]<-"p_val_adj_patientregr"
30 | colnames(allcells_patientregr_DE_markers)[6]<-"cluster_patientregr"
31 | allcells_patientregr_DE_markers$gene_cluster<-paste0(allcells_patientregr_DE_markers$gene, sep="_", allcells_patientregr_DE_markers$cluster_patientregr)
32 | overlap_patient_w_o_regress<-merge(allcells_patientregr_DE_markers, DE_allpts, by="gene_cluster", all=F)
33 | 
34 | write.csv(overlap_patient_w_o_regress, file="~/Desktop/Single_cell/final_data_paper/DE/overlap_patient_w_o_regress.csv")
35 | 
36 | ####method 2 - overlap patients
37 | 
38 | # use DE file, regressed for patient
39 | # DE with genes >1%, MAST
40 | seuratfile_pt1cells<-SetAllIdent(seuratfile_pt1cells, "eight_cell_types")
41 | pt1cells_DE_markers = FindAllMarkers(seuratfile_pt1cells, min.pct = 0.01, only.pos = T, test.use = "MAST")
42 | row.names(pt1cells_DE_markers)<-NULL
43 | pt1cells_DE_markers<-pt1cells_DE_markers[pt1cells_DE_markers$p_val_adj < 0.05,]
44 | dim(pt1cells_DE_markers)
45 | colnames(pt1cells_DE_markers)[1]<-"p_val_pt1cells"
46 | colnames(pt1cells_DE_markers)[2]<-"Av_logFC_pt1cells"
47 | colnames(pt1cells_DE_markers)[3]<-"pct.1_pt1cells"
48 | colnames(pt1cells_DE_markers)[4]<-"pct.2_pt1cells"
49 | colnames(pt1cells_DE_markers)[5]<-"p_val_adj_pt1cells"
50 | colnames(pt1cells_DE_markers)[6]<-"cluster_pt1cells"
51 | pt1cells_DE_markers$gene_cluster<-paste0(pt1cells_DE_markers$gene, sep="_", pt1cells_DE_markers$cluster_pt1cells)
52 | 
53 | # use DE file, regressed for patient
54 | # DE with genes >1%, MAST
55 | seuratfile_pt2cells<-SetAllIdent(seuratfile_pt2cells, "eight_cell_types")
56 | pt2cells_DE_markers = FindAllMarkers(seuratfile_pt2cells, min.pct = 0.01, only.pos = T, test.use = "MAST")
57 | row.names(pt2cells_DE_markers)<-NULL
58 | pt2cells_DE_markers<-pt2cells_DE_markers[pt2cells_DE_markers$p_val_adj < 0.05,]
59 | dim(pt2cells_DE_markers)
60 | colnames(pt2cells_DE_markers)[1]<-"p_val_pt2cells"
61 | colnames(pt2cells_DE_markers)[2]<-"Av_logFC_pt2cells"
62 | colnames(pt2cells_DE_markers)[3]<-"pct.1_pt2cells"
63 | colnames(pt2cells_DE_markers)[4]<-"pct.2_pt2cells"
64 | colnames(pt2cells_DE_markers)[5]<-"p_val_adj_pt2cells"
65 | colnames(pt2cells_DE_markers)[6]<-"cluster_pt2cells"
66 | pt2cells_DE_markers$gene_cluster<-paste0(pt2cells_DE_markers$gene, sep="_", pt2cells_DE_markers$cluster_pt2cells)
67 | 
68 | ## calculate overlap
69 | overlap_patient1_2<-merge(pt1cells_DE_markers, pt2cells_DE_markers, by="gene_cluster", all=F)
70 | 
71 | write.csv(overlap_patient1_2, file="~/Desktop/Single_cell/final_data_paper/DE/overlap_patient1_2.csv")
72 | 
73 | # use DE file, regressed for patient
74 | # DE with genes >1%, MAST
75 | seuratfile_pt3cells<-SetAllIdent(seuratfile_pt3cells, "eight_cell_types")
76 | pt3cells_DE_markers = FindAllMarkers(seuratfile_pt3cells, min.pct = 0.01, only.pos = T, test.use = "MAST")
77 | row.names(pt3cells_DE_markers)<-NULL
78 | pt3cells_DE_markers<-pt3cells_DE_markers[pt3cells_DE_markers$p_val_adj < 0.05,]
79 | dim(pt3cells_DE_markers)
80 | colnames(pt3cells_DE_markers)[1]<-"p_val_pt3cells"
81 | colnames(pt3cells_DE_markers)[2]<-"Av_logFC_pt3cells"
82 | colnames(pt3cells_DE_markers)[3]<-"pct.1_pt3cells"
83 | colnames(pt3cells_DE_markers)[4]<-"pct.2_pt3cells"
84 | colnames(pt3cells_DE_markers)[5]<-"p_val_adj_pt3cells"
85 | colnames(pt3cells_DE_markers)[6]<-"cluster_pt3cells"
86 | pt3cells_DE_markers$gene_cluster<-paste0(pt3cells_DE_markers$gene, sep="_", pt3cells_DE_markers$cluster_pt3cells)
87 | 
88 | ## calculate overlap
89 | overlap_patient1_2_3<-merge(overlap_patient1_2, pt3cells_DE_markers, by="gene_cluster", all=F)
90 | write.csv(overlap_patient1_2_3, file="~/Desktop/Single_cell/final_data_paper/DE/overlap_patient1_2_3.csv")
91 | 
92 | 


--------------------------------------------------------------------------------
/DDTx/ddtx_filter_normalize_cluster_demuxlet.Rmd:
--------------------------------------------------------------------------------
  1 | title: "ddtx_filter_normalize_cluster_demuxlet"
  2 | author: "adapted from Roy Oelen"
  3 | date: "2023-06-15"
  4 | output: html_document
  5 | ---
  6 | 
  7 | ```{r header, include=FALSE}
  8 | ############################################################################################################################
  9 | # Authors: Roy Oelen
 10 | # Name: ddtx_filter_normalize_cluster.Rmd
 11 | # Function: filter the object on doublets, normalize with SCT and cluster+UMAP
 12 | ############################################################################################################################
 13 | ```
 14 | 
 15 | ```{r libraries, include=FALSE}
 16 | knitr::opts_chunk$set(echo = FALSE)
 17 | ####################
 18 | # libraries        #
 19 | ####################
 20 | 
 21 | # for the object containing meta and count data
 22 | library(Seurat)
 23 | # for plotting
 24 | library(ggplot2)
 25 | 
 26 | ```
 27 | 
 28 | ```{r functions, include=FALSE}
 29 | knitr::opts_chunk$set(echo = FALSE)
 30 | ####################
 31 | # Functions        #
 32 | ####################
 33 | 
 34 | ```
 35 | 
 36 | ```{r setup, include=FALSE}
 37 | knitr::opts_chunk$set(echo = FALSE)
 38 | ####################
 39 | # Main Code        #
 40 | ####################
 41 | 
 42 | # locations of the objects
 43 | seurat_objects_loc <- '/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/'
 44 | seurat_object_demultiplexed_loc <- paste(seurat_objects_loc, 'ddtx_merged_demultiplexed_demuxlet.rds', sep = '')
 45 | seurat_object_clustered_loc <- paste(seurat_objects_loc, 'ddtx_merged_demultiplexed_clustered_demuxlet.rds', sep = '')
 46 | 
 47 | ```
 48 | 
 49 | ```{r read_object, include=FALSE}
 50 | knitr::opts_chunk$set(echo = FALSE)
 51 | 
 52 | # read the object
 53 | ddtx <- readRDS(seurat_object_demultiplexed_loc)
 54 | 
 55 | ```
 56 | 
 57 | ### before filtering
 58 | ```{r show_before_filter, include=TRUE, echo=TRUE}
 59 | # show the number of cells before filtering
 60 | ddtx
 61 | ```
 62 | 
 63 | ### filtering
 64 | 
 65 | The individuals that were sequenced, received a gut transplant. This means that there should be expression data of both the receipiant and the donor, as such the number of clusters from Souporcell, is twice the number of individuals sequenced. The only exception here, is 220504_lane10. Here there no donors. When filtering for doublets, we as such use the 2*n clusters for all lanes but that one.
 66 | 
 67 | ```{r doublet_filter, include=FALSE}
 68 | # filter on doublets
 69 | ddtx <- ddtx[, (
 70 |   ddtx@meta.data$lane %in% c(
 71 |     '220504_lane01',
 72 |     '220504_lane02',
 73 |     '220504_lane03',
 74 |     '220504_lane04',
 75 |     '220504_lane05',
 76 |     '220504_lane06',
 77 |     '220504_lane07',
 78 |     '220504_lane08',
 79 |     '220504_lane09',
 80 |     '220504_lane10'
 81 |     ) &
 82 |   ddtx@meta.data$DROPLET.TYPE == "SNG"
 83 |   )]
 84 | ```
 85 | 
 86 | ### after filtering
 87 | ```{r show_after_filter, include=TRUE, echo=TRUE}
 88 | # show the number of cells after filtering
 89 | ddtx
 90 | ```
 91 | 
 92 | ```{r normalize_rna, include=FALSE}
 93 | # do old-style normalization
 94 | ddtx <- NormalizeData(ddtx)
 95 | ```
 96 | 
 97 | ```{r normalize_sct, include=FALSE}
 98 | # do SCT normalization
 99 | ddtx <- SCTransform(ddtx)
100 | ```
101 | 
102 | ```{r pca_umap_cluster, include=FALSE}
103 | # set the seed for reproduceability
104 | set.seed(7777)
105 | # do PCA dimensional reduction
106 | ddtx <- RunPCA(ddtx)
107 | # do 2d UMAP dimensional reduction
108 | ddtx <- RunUMAP(ddtx, dims = 1:30, return.model = T)
109 | # create neighbourhood graph
110 | ddtx <- FindNeighbors(ddtx, dims = 1:30, k.param = 20)
111 | # find clusters
112 | ddtx <- FindClusters(ddtx, resolution = 1.5)
113 | ```
114 | 
115 | ```{r donors, include=FALSE}
116 | # add preliminary donor assignment
117 | ddtx@meta.data[['donor_final']] <- NA
118 | # we use the donor+receipiant for all lanes
119 | ddtx@meta.data[ddtx@meta.data$lane %in% c('220504_lane01', '220504_lane02', '220504_lane03', '220504_lane04', '220504_lane05', '220504_lane06', '220504_lane07', '220504_lane08',     '220504_lane09', '220504_lane10'), 'donor_final'] <- ddtx@meta.data[ddtx@meta.data$lane %in% c('220504_lane01', '220504_lane02', '220504_lane03', '220504_lane04', '220504_lane05', '220504_lane06', '220504_lane07', '220504_lane08', '220504_lane09', '220504_lane10'), 'SNG.BEST.GUESS']
120 | 
121 | ```
122 | 
123 | ```{r create_plots, include=FALSE}
124 | # plot the clusters
125 | p_clusters <- DimPlot(ddtx, reduction = 'umap', group.by = 'seurat_clusters', label = TRUE, label.size = 3, repel = TRUE, raster = FALSE) + NoLegend() + ggtitle('Leiden clusters')
126 | # plot the individuals
127 | p_individuals <- DimPlot(ddtx, reduction = 'umap', group.by = 'donor_final', label = FALSE, label.size = 3, repel = TRUE, raster = FALSE) + ggtitle('genotyped individuals')
128 | ```
129 | 
130 | ### after 2d UMAP reduction and clustering
131 | 
132 | ```{r show_plots, include = TRUE, fig.width=10, fig.height=10}
133 | p_clusters
134 | p_individuals
135 | ```
136 | 
137 | ```{r save, include=FALSE}
138 | # save the result
139 | saveRDS(ddtx, seurat_object_clustered_loc)
140 | ```
141 | 


--------------------------------------------------------------------------------
/old_stuff/random_sampling_10x+cd.R:
--------------------------------------------------------------------------------
  1 | # Author: WTC
  2 | # Date: July 2018
  3 | # Script for sampling 251 cells from 10x CTL dataset, 
  4 | #   combine those (using Seurat's CCA function) with 251 CTLs from CD dataset, randomly assign source IDs 
  5 | #   and perform differential expression analyses on these randomly made groups of cells, each existing of 251 cells. 
  6 | 
  7 | # download Cytotoxic T cell gene/cell matrix (raw) from https://support.10xgenomics.com/single-cell-gene-expression/datasets/1.1.0/cytotoxic_t
  8 | load("~/Desktop/Single_cell/CTL_10x_CD/CTL_10x.Rda")
  9 | load("~/Desktop/Single_cell/CTL_10x_CD/CTL_CD.Rda") # import CD CTL dataset
 10 | risk_use<-read.csv("~/Desktop/Single_cell/final_data_paper/DE/risk_genes_in_cd_and_10x.csv")
 11 | genes_use<-read.csv("~/Desktop/Single_cell/final_data_paper/DE/genes_in_cd_and_10x.csv")
 12 | 
 13 | a<-"number_DEgenes"
 14 | b<-"number_DEriskgenes"
 15 | number_DE_genes<-data.frame(a)
 16 | number_DE_riskgenes<-data.frame(b)
 17 | for(i in (1:1000)){
 18 | cat("iteration", i, "\n")
 19 | # select subset of 251 cells
 20 | samp_col_idxs_1  <- sample(ncol(CTL_10x@data), 251)
 21 | samp_col_names_1 <- colnames(CTL_10x@data) [samp_col_idxs_1]
 22 | CTL_10x_subset<-CTL_10x@data[,samp_col_names_1]
 23 | dim(CTL_10x_subset)
 24 | CTL_10x_subset<-CreateSeuratObject(raw.data = CTL_10x_subset, min.cells = 3, project = "subset")
 25 | 
 26 | mito.genes <- grep(pattern = "^MT-", x = rownames(x = CTL_10x_subset@data), value = TRUE)
 27 | percent.mito <- Matrix::colSums(CTL_10x_subset@raw.data[mito.genes, ])/Matrix::colSums(CTL_10x_subset@raw.data)
 28 | CTL_10x_subset <- AddMetaData(object = CTL_10x_subset, metadata = percent.mito, col.name = "percent.mito")
 29 | 
 30 | # for ngene: 200-2500, percent mito: <5%
 31 | CTL_10x_subset <- FilterCells(object = CTL_10x_subset, subset.names = c("nGene", "percent.mito"), low.thresholds = c(200, -Inf), high.thresholds = c(2500, 0.05))
 32 | 
 33 | # log normalize
 34 | CTL_10x_subset <- NormalizeData(object = CTL_10x_subset, normalization.method = "LogNormalize", scale.factor = 10000)
 35 | CTL_10x_subset <- ScaleData(object = CTL_10x_subset, vars.to.regress = c("nUMI", "percent.mito"))
 36 | dim(CTL_10x_subset@data)
 37 | 
 38 | CTL_10x_subset@meta.data$dataset <- "subset10x"
 39 | 
 40 | save(CTL_10x_subset, file=paste0("CTL_10x_subset", i, ".Rda"))
 41 | 
 42 | 
 43 | ## same for CD
 44 | CTL_CD@meta.data$dataset <- "cd"
 45 | 
 46 | ## Combine the cdulated and ctrlulated cells into a single object
 47 | CTL_subset.combined <- RunCCA(CTL_10x_subset, CTL_CD, genes.use = intersect(rownames(CTL_10x_subset@data), rownames(CTL_CD@data)), num.cc = 30, scale.data=T)
 48 | 
 49 | ###############################################################
 50 | # Process the data after combining the two datasets
 51 | ###############################################################
 52 | 
 53 | #CTL_subset.combined <- RunPCA(object = CTL_subset.combined, pc.genes = CTL_subset.combined@var.genes, do.print = TRUE, pcs.print = 1:5, genes.print = 5)
 54 | #CTL_subset.combined = RunTSNE(CTL_subset.combined, dims.use = 1:15, do.fast = T)
 55 | 
 56 | ## CCA equivalent of the PCElbowPlot
 57 | #MetageneBicorPlot(CTL_subset.combined, grouping.var = "dataset", dims.eval = 1:30, display.progress = FALSE)
 58 | 
 59 | ## Use the CCA reduction to better overlap the two original datasetes
 60 | #CTL_subset.combined <- AlignSubspace(CTL_subset.combined, reduction.type = "cca", grouping.var = "dataset", dims.align = 1:15)
 61 | 
 62 | #CTL_subset.combined <- RunTSNE(CTL_subset.combined, reduction.use = "cca.aligned", dims.use = 1:15, do.fast = T)
 63 | #CTL_subset.combined <- FindClusters(CTL_subset.combined, reduction.type = "cca.aligned", resolution = 0.6, dims.use = 1:15)
 64 | #save(CTL_subset.combined, file=paste0("CTL_subset_", i, "combined_aligned.Rda"))
 65 | 
 66 | ### add metadata to randomize cell ids
 67 | 
 68 | 
 69 | ## add location
 70 | #extract file that is a copy from @data.info
 71 | CellsMeta = CTL_subset.combined@meta.data
 72 | CellsMeta$random_asssignment <- "A" 
 73 | CellsMeta$random_asssignment[sample(nrow(CellsMeta), abs(nrow(CellsMeta)/2), replace=FALSE)] <- "B"
 74 | CTL_subset.combined <- AddMetaData(CTL_subset.combined, CellsMeta)
 75 | 
 76 | CTL_subset.combined<-SetAllIdent(CTL_subset.combined, "random_asssignment")
 77 | subset_A<-WhichCells(CTL_subset.combined, "A")
 78 | subset_B<-WhichCells(CTL_subset.combined, "B")
 79 | 
 80 | # calculate DE
 81 | 
 82 | # genes present in the two datasets:
 83 | # DE with genes present in the two datasets, in >1%, MAST
 84 | CTL_subset.combined_DE_markers = FindAllMarkers(CTL_subset.combined, only.pos = T, test.use = "MAST", genes.use = genes_use$gene)
 85 | write.table(CTL_subset.combined_DE_markers, file = paste0("CTL_subset", i, "_DEmarkers_MAST.txt"))
 86 | 
 87 | # and the significant ones (p<0.05)
 88 | CTL_subset.combined_DE_markers<-CTL_subset.combined_DE_markers[CTL_subset.combined_DE_markers$p_val_adj <0.05,]
 89 | number_DE_genes[i,a]<-nrow(CTL_subset.combined_DE_markers)
 90 | 
 91 | # merge with risk genes
 92 | # merge all DE results with risk genes
 93 | risk_all_filtered_subset<-merge(risk_use, CTL_subset.combined_DE_markers, by="gene", all=F)
 94 | 
 95 | number_DE_riskgenes[i,b]<-nrow(risk_all_filtered_subset)}
 96 | 
 97 | write.csv(number_DE_genes, "number_DE_genes.csv")
 98 | write.csv(number_DE_riskgenes, "number_DE_riskgenes.csv")
 99 | 
100 | 
101 | 
102 | 


--------------------------------------------------------------------------------
/DDTx/demultiplexed_data_demuxlet.R:
--------------------------------------------------------------------------------
  1 | ## comparison demuxlet cell and doublet assignment results to souporcell results
  2 | # WTC june 2023
  3 | 
  4 | # prep demuxlet results for comparing to souporcell results
  5 | 
  6 | lane1<-read.table("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/demuxlet/output/220504_lane01.best", header=T)
  7 | lane1$barcode_lane = substr(lane1$BARCODE,1,nchar(lane1$BARCODE)-2)
  8 | lane1$barcode_lane = substr(lane1$BARCODE,1,nchar(lane1$BARCODE)-2)
  9 | lane1$barcode_lane<-paste0(lane1$barcode_lane, "_220504_lane01")
 10 | 
 11 | lane2<-read.table("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/demuxlet/output/220504_lane02.best", header=T)
 12 | lane2$barcode_lane = substr(lane2$BARCODE,1,nchar(lane2$BARCODE)-2)
 13 | lane2$barcode_lane = substr(lane2$BARCODE,1,nchar(lane2$BARCODE)-2)
 14 | lane2$barcode_lane<-paste0(lane2$barcode_lane, "_220504_lane02")
 15 | 
 16 | 
 17 | lane3<-read.table("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/demuxlet/output/220504_lane03.best", header=T)
 18 | lane3$barcode_lane = substr(lane3$BARCODE,1,nchar(lane3$BARCODE)-2)
 19 | lane3$barcode_lane = substr(lane3$BARCODE,1,nchar(lane3$BARCODE)-2)
 20 | lane3$barcode_lane<-paste0(lane3$barcode_lane, "_220504_lane03")
 21 | 
 22 | lane4<-read.table("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/demuxlet/output/220504_lane04.best", header=T)
 23 | lane4$barcode_lane = substr(lane4$BARCODE,1,nchar(lane4$BARCODE)-2)
 24 | lane4$barcode_lane = substr(lane4$BARCODE,1,nchar(lane4$BARCODE)-2)
 25 | lane4$barcode_lane<-paste0(lane4$barcode_lane, "_220504_lane04")
 26 | 
 27 | lane5<-read.table("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/demuxlet/output/220504_lane05.best", header=T)
 28 | lane5$barcode_lane = substr(lane5$BARCODE,1,nchar(lane5$BARCODE)-2)
 29 | lane5$barcode_lane = substr(lane5$BARCODE,1,nchar(lane5$BARCODE)-2)
 30 | lane5$barcode_lane<-paste0(lane5$barcode_lane, "_220504_lane05")
 31 | 
 32 | lane6<-read.table("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/demuxlet/output/220504_lane06.best", header=T)
 33 | lane6$barcode_lane = substr(lane6$BARCODE,1,nchar(lane6$BARCODE)-2)
 34 | lane6$barcode_lane = substr(lane6$BARCODE,1,nchar(lane6$BARCODE)-2)
 35 | lane6$barcode_lane<-paste0(lane6$barcode_lane, "_220504_lane06")
 36 | 
 37 | lane7<-read.table("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/demuxlet/output/220504_lane07.best", header=T)
 38 | lane7$barcode_lane = substr(lane7$BARCODE,1,nchar(lane7$BARCODE)-2)
 39 | lane7$barcode_lane = substr(lane7$BARCODE,1,nchar(lane7$BARCODE)-2)
 40 | lane7$barcode_lane<-paste0(lane7$barcode_lane, "_220504_lane07")
 41 | 
 42 | lane8<-read.table("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/demuxlet/output/220504_lane08.best", header=T)
 43 | lane8$barcode_lane = substr(lane8$BARCODE,1,nchar(lane8$BARCODE)-2)
 44 | lane8$barcode_lane = substr(lane8$BARCODE,1,nchar(lane8$BARCODE)-2)
 45 | lane8$barcode_lane<-paste0(lane8$barcode_lane, "_220504_lane08")
 46 | 
 47 | lane9<-read.table("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/demuxlet/output/220504_lane09.best", header=T)
 48 | lane9$barcode_lane = substr(lane9$BARCODE,1,nchar(lane9$BARCODE)-2)
 49 | lane9$barcode_lane = substr(lane9$BARCODE,1,nchar(lane9$BARCODE)-2)
 50 | lane9$barcode_lane<-paste0(lane9$barcode_lane, "_220504_lane09")
 51 | 
 52 | lane10<-read.table("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/demultiplexing/demuxlet/output/220504_lane10.best", header=T)
 53 | lane10$barcode_lane = substr(lane10$BARCODE,1,nchar(lane10$BARCODE)-2)
 54 | lane10$barcode_lane = substr(lane10$BARCODE,1,nchar(lane10$BARCODE)-2)
 55 | lane10$barcode_lane<-paste0(lane10$barcode_lane, "_220504_lane10")
 56 | 
 57 | x<-rbind(lane1, lane2)
 58 | x<-rbind(x, lane3)
 59 | x<-rbind(x, lane4)
 60 | x<-rbind(x, lane5)
 61 | x<-rbind(x, lane6)
 62 | x<-rbind(x, lane7)
 63 | x<-rbind(x, lane8)
 64 | x<-rbind(x, lane9)
 65 | x<-rbind(x, lane10)
 66 | 
 67 | 
 68 | x<-x[c(5,13,15,21)]
 69 | dim(x)
 70 | 
 71 | # load mito, epi and mislabeled sample filtered souporcell results
 72 | metadata<-read.table("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/ddtx_merged_demultiplexed_clustered_compartment_azi_elmentaiteadultileum_below60pctmito_epifilter_meta.csv")
 73 | dim(metadata)
 74 | 
 75 | ## demuxlet assigned singlets largely overlap with souporcell assigned singlets 
 76 | merge<-merge(x, metadata, by="barcode_lane", all=F)
 77 | dim(merge)
 78 | merge_single<-merge[merge$DROPLET.TYPE=="SNG",]
 79 | table(merge_single$donor_final, merge_single$SNG.BEST.GUESS, merge_single$DROPLET.TYPE)
 80 | 
 81 | #read demultiplexed Seuratobject, not doublet filtered
 82 | data<-readDRS("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/objects/ddtx_merged_demultiplexed.rds")
 83 | 
 84 | #add demuxlet output to seuratfile
 85 | metadata<-data@meta.data
 86 | metadata<-merge(metadata, x,by="barcode_lane")
 87 | rownames(metadata)<-metadata$barcode_lane
 88 | metadata<-metadata[c(35,36,37)]
 89 | data <- AddMetaData(data, metadata)
 90 | 
 91 | # add clinical metadata
 92 | reference<-read.csv("/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/reference_2.csv", sep=";")
 93 | reference<-reference[-c(1,10)]
 94 | reference<-reference[-18,]
 95 | 
 96 | meta<-data@meta.data
 97 | dim(meta)
 98 | meta$pt_project_lane<-paste(meta$lane, meta$SNG.BEST.GUESS, sep="_")
 99 | meta<-merge(meta, reference, by="pt_project_lane", all=T)
100 | dim(meta)
101 | meta<-meta[c(7,39:47)]
102 | head(meta)
103 | rownames(meta)<-meta$barcode_lane
104 | meta<-meta[-1]
105 | data<-AddMetaData(data, meta)
106 | 
107 | # save
108 | saveRDS(data, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/ddtx_merged_demultiplexed_demuxlet.rds")
109 | 
110 | 
111 | 


--------------------------------------------------------------------------------
/PSC/PSC_version2022/basicDE_2023.R:
--------------------------------------------------------------------------------
  1 | 
  2 | ###################################################
  3 | # Title: basic DE analysis
  4 | # Date: 28-08-2023
  5 | ###################################################
  6 | 
  7 | 
  8 | ######################
  9 | # Library
 10 | ######################
 11 | 
 12 | library(ggplot2) 
 13 | library(dplyr) 
 14 | library(tidyr) 
 15 | library(stringr)
 16 | library(patchwork)
 17 | library(grid)
 18 | library(gridExtra) 
 19 | library(ggpubr)
 20 | library(RColorBrewer) 
 21 | library(Seurat)
 22 | library(readr)
 23 | library(MAST)
 24 | library(readxl)
 25 | library(openxlsx)
 26 | library(enrichR)
 27 | require(tidyverse)
 28 | library(packcircles)
 29 | 
 30 | ######################
 31 | # Main codes
 32 | ######################
 33 | 
 34 | # read in dataframes
 35 | epi <- readRDS("Nieuw/epi_azimuth_duox2.rds")
 36 | imm <- readRDS("Nieuw/imm_azimuth_with_plasma.rds")
 37 | str <- readRDS("Nieuw/stro_azimuth.rds")
 38 | 
 39 | DefaultAssay(epi) = "RNA"
 40 | DefaultAssay(imm) = "RNA"
 41 | DefaultAssay(str) = "RNA"
 42 | Idents(epi) <- "celltype.final"
 43 | Idents(imm) <- "predicted.cell_type.pred"
 44 | Idents(str) <- "predicted.cell_type.pred"
 45 | 
 46 | # Create a table of frequencies
 47 | epi_frequencies <- as.data.frame.matrix(table(epi$celltype.final, epi$disease))
 48 | str_frequencies <- as.data.frame.matrix(table(str$predicted.cell_type.pred, str$disease))
 49 | imm_frequencies <- as.data.frame.matrix(table(imm$predicted.cell_type.pred, imm$disease))
 50 | frequencies <- rbind(epi_frequencies, str_frequencies, imm_frequencies)
 51 | 
 52 | #write_csv(frequencies, "Results/DE_2023/frequencies.csv")
 53 | 
 54 | # Create a table with rows celltype and column disease/analysis, with number of DE genes per analysis for each celltype for UC and PSC
 55 | files <- list.files(path="Results/DE_2023/DE_PSCINI_all/", pattern="*.csv", full.names=TRUE, recursive=FALSE)
 56 | total_PSC <- data.frame()
 57 | lapply(files, function(x) {
 58 |   t <- read.csv(x, header=TRUE) # load file
 59 |   celltype <- tools::file_path_sans_ext(basename(x))
 60 |   t$celltype <- celltype
 61 |   out <- t[t$p_val_adj < 0.05,]
 62 |     # write to file
 63 |   total_PSC <<- rbind(total_PSC, out)
 64 | })
 65 | total_PSC <- total_PSC[,c(6,8,9)]
 66 | DEpercelltype_PSC <- total_PSC %>%
 67 |   group_by(celltype) %>%
 68 |   summarize(num_genes = n())
 69 | 
 70 | files <- list.files(path="Results/DE_2023/DE_UCINI_all/", pattern="*.csv", full.names=TRUE, recursive=FALSE)
 71 | total_UC <- data.frame()
 72 | lapply(files, function(x) {
 73 |   t <- read.csv(x, header=TRUE) # load file
 74 |   celltype <- tools::file_path_sans_ext(basename(x))
 75 |   t$celltype <- celltype
 76 |   out <- t[t$p_val_adj < 0.05,]
 77 |   # write to file
 78 |   total_UC <<- rbind(total_UC, out)
 79 | })
 80 | total_UC <- total_UC[,c(6,8,9)]
 81 | DEpercelltype_UC <- total_UC %>%
 82 |   group_by(celltype) %>%
 83 |   summarize(num_genes = n())
 84 | 
 85 | # Filter celltypes with n genes > 9
 86 | DEpercelltype_PSC_10 <- DEpercelltype_PSC[DEpercelltype_PSC$num_genes > 9,]
 87 | DEpercelltype_UC_10 <- DEpercelltype_UC[DEpercelltype_UC$num_genes > 9,]
 88 | 
 89 | # Make figure for PSC
 90 |   # Generate the layout. This function return a dataframe with one line per bubble. 
 91 |   # It gives its center (x and y) and its radius, proportional of the value
 92 | packing <- circleProgressiveLayout(DEpercelltype_PSC_10$num_genes, sizetype='area')
 93 | 
 94 |   # We can add these packing information to the initial data frame
 95 | data <- cbind(DEpercelltype_PSC_10, packing)
 96 | 
 97 |   # Check that radius is proportional to value. We don't want a linear relationship, since it is the AREA that must be proportional to the value
 98 | plot(data$radius, data$value)
 99 | 
100 |   # The next step is to go from one center + a radius to the coordinates of a circle that
101 |   # is drawn by a multitude of straight lines.
102 | dat.gg <- circleLayoutVertices(packing, npoints=50)
103 | 
104 | ggplot() + 
105 |   
106 |   # Make the bubbles
107 |   geom_polygon(data = dat.gg, aes(x, y, group = id, fill=as.factor(id)), colour = "black", alpha = 0.6) +
108 |   
109 |   # Add text in the center of each bubble + control its size
110 |   geom_text(data = data, aes(x, y, size=num_genes, label = celltype)) +
111 |   scale_size_continuous(range = c(1,4)) +
112 |   
113 |   # General theme:
114 |   theme_void() + 
115 |   theme(legend.position="none") +
116 |   coord_equal()
117 | 
118 | ggsave("/Users/amberbangma/Documents/R/PSC/Results/Circle_PSC.pdf")
119 | 
120 | # Make figure for UC
121 | # Generate the layout. This function return a dataframe with one line per bubble. 
122 | # It gives its center (x and y) and its radius, proportional of the value
123 | packing <- circleProgressiveLayout(DEpercelltype_UC_10$num_genes, sizetype='area')
124 | 
125 | # We can add these packing information to the initial data frame
126 | data <- cbind(DEpercelltype_UC_10, packing)
127 | 
128 | # Check that radius is proportional to value. We don't want a linear relationship, since it is the AREA that must be proportional to the value
129 | plot(data$radius, data$value)
130 | 
131 | # The next step is to go from one center + a radius to the coordinates of a circle that
132 | # is drawn by a multitude of straight lines.
133 | dat.gg <- circleLayoutVertices(packing, npoints=50)
134 | 
135 | 
136 | ggplot() + 
137 |   
138 |   # Make the bubbles
139 |   geom_polygon(data = dat.gg, aes(x, y, group = id, fill=as.factor(id)), colour = "black", alpha = 0.6) +
140 |   
141 |   # Add text in the center of each bubble + control its size
142 |   geom_text(data = data, aes(x, y, size=num_genes, label = celltype)) +
143 |   scale_size_continuous(range = c(1,4)) +
144 |   
145 |   # General theme:
146 |   theme_void() + 
147 |   theme(legend.position="none") +
148 |   coord_equal()
149 | 
150 | ggsave("/Users/amberbangma/Documents/R/PSC/Results/Circle_UC.pdf",  width=6, height=6)
151 | 


--------------------------------------------------------------------------------
/PSC/Proportion_analysis_202103.R:
--------------------------------------------------------------------------------
  1 | library(dplyr)
  2 | library(Seurat)
  3 | library(patchwork)
  4 | library(ggplot2)
  5 | library(readr)
  6 | library(tidyr)
  7 | library(MAST)
  8 | library(readxl)
  9 | library(openxlsx)
 10 | library(reshape2)
 11 | library(phyloseq)
 12 | library(fido)
 13 | library(compositions)
 14 | library(corrplot)
 15 | 
 16 | # load in dataset
 17 | data<- readRDS("PSC_processed_march_2021.rds")
 18 | DefaultAssay(data) = "RNA"
 19 | 
 20 | # extract counts to matrix
 21 | counts=as.data.frame.matrix(table(data$Final_HTO, data$celltypes))
 22 | 
 23 | # calculate proportions
 24 | total_row = apply(counts, 1, sum)
 25 | pcts = lapply(counts, function(x) {
 26 |   x / total_row
 27 | })
 28 | frequencies = as.data.frame(pcts)
 29 | rownames(counts) = rownames(frequencies)
 30 | 
 31 | # add state
 32 | frequencies$disease =  sapply(strsplit(rownames(frequencies),"-"), `[`, 1)
 33 | frequencies$inflammation =  sapply(strsplit(rownames(frequencies),"-"), `[`, 2)
 34 | frequencies$state =  apply( frequencies[ ,c(44,45)] , 1 , paste , collapse = "-" )
 35 | frequencies = frequencies[,-c(44,45)]
 36 | 
 37 | # remove doublets and MT-Hi
 38 | frequencies=frequencies[,-c(6,14,20,26,33)]
 39 | 
 40 | # plot frequencies
 41 | list_celltypes<-list(colnames(frequencies[-39]))
 42 | df = melt(frequencies)
 43 | df$state = factor(df$state, levels = c("HC-NI", "UC-NI", "UC-I", "PSC-NI", "PSC-I"))
 44 | df$variable = factor(df$variable, levels = list_celltypes[[1]])
 45 | ggplot(df, aes(x=state, y=value, fill=state)) + 
 46 |   geom_boxplot(outlier.size = 1) + 
 47 |   facet_wrap( ~ variable, scales="free") +
 48 |   theme_bw(base_size = 14) + 
 49 |   scale_fill_brewer(palette="Set1")
 50 | 
 51 | ggsave("Results/Figures/frequencies.pdf", width = 33, height = 19)
 52 | 
 53 | # fido's pibble model https://jsilve24.github.io/fido/articles/introduction-to-fido.html
 54 | # create counts matrix with state
 55 | counts=as.data.frame.matrix(table(data$Final_HTO, data$celltypes))
 56 | counts$disease =  sapply(strsplit(rownames(counts),"-"), `[`, 1)
 57 | counts$inflammation =  sapply(strsplit(rownames(counts),"-"), `[`, 2)
 58 | counts$state =  apply(counts[ ,c(44,45)] , 1 , paste , collapse = "-" )
 59 | counts = counts[,-c(44,45)]
 60 | counts$state = as.factor(counts$state)
 61 | #counts$state = relevel(counts$state, ref = "UC-I") #to set reference, otherwise HC-NI is default
 62 | 
 63 | # remove doublets and MT-Hi
 64 | counts=counts[,-c(6,14,20,26,33)]
 65 | 
 66 | # create priors 
 67 | X = t(model.matrix(~state, data=counts))
 68 | X[,1:5]
 69 | Y = t(counts[,-39])
 70 | Y[1:5,1:5]
 71 | upsilon <- nrow(Y)+3
 72 | theta <- matrix(0, nrow(Y)-1, nrow(X))
 73 | gamma <- diag(nrow(X))
 74 | G <- cbind(diag(nrow(Y)-1), -1)
 75 | Xi <- (upsilon-nrow(Y))*G%*%diag(nrow(Y))%*%t(G)
 76 | priors <- pibble(Y, X, upsilon, theta, gamma, Xi)
 77 | print(priors)
 78 | 
 79 | # change model to clr
 80 | priors <- to_clr(priors)
 81 | summary(priors, pars="Lambda")
 82 | names_covariates(priors) <- rownames(X)
 83 | 
 84 | #plot log probabilites for each state
 85 | plot(priors, par="Lambda") + ggplot2::xlim(c(-10, 10))
 86 | 
 87 | #check if model fits data (checked by Johannes)
 88 | priors$Y <- Y
 89 | posterior <- refit(priors, optim_method="adam")
 90 | ppc(posterior) + ggplot2::coord_cartesian(ylim=c(0, 500)) #I changed axis from 30000 to 500 for visualization
 91 | ppc_summary(posterior)
 92 | ppc(posterior, from_scratch=TRUE) +ggplot2::coord_cartesian(ylim=c(0, 500))
 93 | ppc_summary(posterior, from_scratch=TRUE)
 94 | 
 95 | # look at posterior distribution of regression parameters
 96 | posterior_summary <- summary(posterior, pars="Lambda")$Lambda
 97 | 
 98 | # focus on coordinates with non-zero effect ("significant") for UC-I 
 99 | posterior_summary <- filter(posterior_summary, covariate=="stateUC-I")
100 | focus <- posterior_summary[sign(posterior_summary$p2.5) == sign(posterior_summary$p97.5),]
101 | focus <- unique(focus$coord)
102 | plot(posterior, par="Lambda", focus.coord = focus, focus.cov = rownames(X)[4]) + geom_vline(xintercept=0, linetype="dashed", color="red")
103 | 
104 | # create list of significant celltypes for each state
105 | posterior_summary <- summary(posterior, pars="Lambda")$Lambda
106 | state_focus_taxa <- vector("list", length(rownames(X)[2:5]))
107 | names(state_focus_taxa) <- rownames(X)[2:5]
108 | for(state in rownames(X)[2:5]) {
109 |   post_summary <- filter(posterior_summary, covariate %in% state)
110 |   focus <- post_summary[sign(post_summary$p2.5) == sign(post_summary$p97.5),]
111 |   focus <- unique(focus$coord)
112 |   state_focus_taxa[[state]] <- focus
113 | }
114 | 
115 | # create square plot for differences
116 | mat_state <- diag(length(rownames(X)[2:5]))
117 | rownames(mat_state) <- colnames(mat_state) <- rownames(X)[2:5]
118 | state_pairs <- data.frame(t(combn(rownames(X)[2:5], 2)))
119 | for(pair in 1:nrow(state_pairs)){
120 |   print(pair)
121 |   mat_state[state_pairs[pair,]$X1, state_pairs[pair,]$X2] <- length(setdiff(state_focus_taxa[[state_pairs[pair,]$X1]], state_focus_taxa[[state_pairs[pair,]$X2]]))
122 |   mat_state[state_pairs[pair,]$X2, state_pairs[pair,]$X1] <- length(setdiff(state_focus_taxa[[state_pairs[pair,]$X2]], state_focus_taxa[[state_pairs[pair,]$X1]]))
123 | }
124 | corrplot::corrplot(mat_state, method="square", is.corr=F, addCoef.col = "black", diag = F, tl.col="black", cl.pos="n")
125 | 
126 | # create square plot for agreement
127 | mat_state <- diag(length(rownames(X)[2:5]))
128 | rownames(mat_state) <- colnames(mat_state) <- rownames(X)[2:5]
129 | state_pairs <- data.frame(t(combn(rownames(X)[2:5], 2)))
130 | for(pair in 1:nrow(state_pairs)){
131 |   print(pair)
132 |   mat_state[state_pairs[pair,]$X1, state_pairs[pair,]$X2] <- length(intersect(state_focus_taxa[[state_pairs[pair,]$X1]], state_focus_taxa[[state_pairs[pair,]$X2]]))
133 |   mat_state[state_pairs[pair,]$X2, state_pairs[pair,]$X1] <- length(intersect(state_focus_taxa[[state_pairs[pair,]$X2]], state_focus_taxa[[state_pairs[pair,]$X1]]))
134 | }
135 | corrplot::corrplot(mat_state, method="square", type = "lower", is.corr=F, addCoef.col = "black", diag = F, tl.col="black", cl.pos="n")
136 | 
137 | 


--------------------------------------------------------------------------------
/create_raw_datafile_from_seq_data.md:
--------------------------------------------------------------------------------
  1 | **Use in R**
  2 | ====
  3 | 
  4 | Autor: WTC
  5 | Date: 20172010
  6 | 
  7 | **preparing raw data per patient**
  8 | ----
  9 | ```
 10 | dir.create((".."))
 11 | ```
 12 | change folder accordingly
 13 | ```
 14 | setwd("..") 
 15 | ```
 16 | list all files from working directory with a similar pattern
 17 | ```
 18 | temp= list.files(pattern="UMIs_counts_per_gene_exon") ## lists input files per pool, change pattern for using either UMIs or TPMs/datacounts
 19 | ```
 20 | paste pre- and suffix to all colnames, being PATIENT_CELLTYPE_CELLBARCODEPOOLNUMBER, except for column "GENE" 
 21 | column GENE is renamed to ensembl_gene_id for merging with annotation file
 22 | unique name is assigned to pool
 23 | ```
 24 | number_iel=10
 25 | number_lpl=20
 26 | number_blood=1
 27 | for (i in 1:length(temp)) { ## paste suffix to all colnames except for column "GENE" and assigns name to file accordingly
 28 |   if (grepl("IEL", temp[i])){
 29 |     my_file=read.table(temp[i], header=TRUE) ## make a table of the first file in the list temp, with colnames in the header
 30 |     my_file=as.data.frame(my_file) ## makes a data.frame of this file
 31 |     colnames(my_file) <- paste("1_IEL_", colnames(my_file), number_iel,sep = "") # Hi_IEL_ as prefix to column names and poolnr as suffix
 32 |     colnames(my_file)[1]="ensembl_gene_id" # changes the first column back to GENE in order to be able to merge later
 33 |     assign(paste0("prePool",number_iel), my_file)
 34 |     rm(my_file)
 35 |     number_iel=number_iel+1 # makes sure the cells from the next prepool will be numbered accordingly
 36 |   } else if (grepl("LPL", temp[i])){ # takes all files in the list temp with LPL in the file name
 37 |     
 38 |     my_file=read.table(temp[i], header=TRUE) ## make a table of the first file in the list temp, with colnames in the header
 39 |     my_file=as.data.frame(my_file) ## makes a data.frame of this file
 40 |     colnames(my_file) <- paste("1_LPL_", colnames(my_file), number_lpl,sep = "") # pastes Hi_LPL_ as prefix to column names and poolnr as suffix
 41 |     colnames(my_file)[1]="ensembl_gene_id" # changes the first column back to ensembl_gene_id in order to be able to merge later
 42 |     assign(paste0("prePool",number_lpl), my_file) # assignes a unique poolname to each prePool file
 43 |     rm(my_file)
 44 |     number_lpl=number_lpl+1 # makes sure the cells from the next prepool will be numbered accordingly
 45 |   } else if (grepl("blood", temp[i])){ # takes all files in the list temp with blood in the file name
 46 |     my_file=read.table(temp[i], header=TRUE) ## make a table of the first file in the list temp, with colnames in the header
 47 |     my_file=as.data.frame(my_file) ## makes a data.frame of this file
 48 |     colnames(my_file) <- paste("1_BLOOD_", colnames(my_file), number_blood,sep = "") # pastes Hi_BLOOD_ as prefix to column names and poolnr as suffix
 49 |     colnames(my_file)[1]="ensembl_gene_id" # changes the first column back to ensembl_gene_id in order to be able to merge later
 50 |     assign(paste0("prePool",number_blood), my_file) # assignes a unique poolname to each prePool file
 51 |     rm(my_file)
 52 |     number_blood=number_blood+1 # makes sure the cells from the next prepool will be numbered accordingly
 53 |   } else {print ("Error!!!")}
 54 | }
 55 | ```
 56 | 
 57 | merge files named 'prePoolx' by ensembl_gene_id, keeping all genes, and change column ensembl_gene_id to rownames and store as numeric data matrix first_file
 58 | ```
 59 | my_count=1
 60 | for (i in mget(ls(pattern="prePool"))) { ## mget searches for an r-object with a given name, ls makes sure the search is within the global environment
 61 |   if (my_count==1){ ##names the first file i that is found hereabove 'first_file'
 62 |     first_file=i
 63 |     my_count=my_count+1 ## makes sure the second file i enters the 'else' part
 64 |   }
 65 |   else{  
 66 |     first_file=merge(first_file,i, by="ensembl_gene_id", all=TRUE) ## merges first file with all the other files in the global environment that have the patter prePool. set all=F for merging only genes expressed in all pools
 67 |   }
 68 |   first_file[is.na(first_file)]<-0 ## all NAs to zero
 69 |   rm(i)
 70 | }
 71 | ```
 72 | merge files keeping only genes expressed in all pools, creating first_file_reduced
 73 | ```
 74 | my_count=1
 75 | for (i in mget(ls(pattern="prePool"))) { ## mget searches for an r-object with a given name, ls makes sure the search is within the global environment
 76 |   if (my_count==1){ ##names the first file i that is found hereabove 'first_file'
 77 |     first_file_reduced=i
 78 |     my_count=my_count+1 ## makes sure the second file i enters the 'else' part
 79 |   }
 80 |   else{  
 81 |     first_file_reduced=merge(first_file_reduced,i, by="ensembl_gene_id", all=FALSE) ## merges second_file by all=F for merging only genes expressed in all pools
 82 |   }
 83 |   rm(i)
 84 | }
 85 | first_file_pt1<-first_file
 86 | ```
 87 | repeat these steps for all patients
 88 | 
 89 | **merge raw data from all patients**
 90 | ---
 91 | ```
 92 | second_file<-merge(first_file_pt1, first_file_pt2, by="ensembl_gene_id", all=TRUE)
 93 | second_file<-merge(second_file, first_file_pt3, by="ensembl_gene_id", all=TRUE)
 94 | second_file<-merge(second_file, first_file_pt3.1, by="ensembl_gene_id", all=TRUE)
 95 | ```
 96 | 
 97 | **enter gene annotation**
 98 | get annotation file
 99 | ```
100 | ensembl_geneName_mapping<-read.csv("..") 
101 | ```
102 | merge second_file with annotationfile
103 | ```
104 | second_file<-merge(ensembl_geneName_mapping, second_file, by="ensembl_gene_id") 
105 | ```
106 | paste ensembl gene, chromosome name and genesymbol
107 | ```
108 | second_file$ensembl_gene_id<-paste(second_file$ensembl_gene_id, second_file$NEW, sep="_") 
109 | row.names(second_file)=second_file$ensembl_gene_id
110 | third_file<-second_file # assign the second_file file to third_file
111 | ```
112 | delete non-numeric columns in third_file
113 | ```
114 | fourth_file<-third_file[,-c(1:10)] # remove all non-numeric columns
115 | View(fourth_file)
116 | fourth_file[is.na(fourth_file)]<-0 ## all NAs to zero
117 | fourth_file.data=fourth_file
118 | ```
119 | 
120 | **save raw data file containing all data**
121 | ```
122 | save(fourth_file, file="..")
123 | ```
124 | 


--------------------------------------------------------------------------------
/Vedo2/step_4_integration_biop_noribo_nonorm:
--------------------------------------------------------------------------------
 1 | library(Seurat)
 2 | library(ggplot2)
 3 | 
 4 | options(future.globals.maxSize = 150000 * 1024^2)
 5 | 
 6 | lane11_1<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200611_lane1_nomito_sct.rds")
 7 | DefaultAssay(lane11_1)<-"SCT"
 8 | lane11_2<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200611_lane2_nomito_sct.rds")  
 9 | DefaultAssay(lane11_2)<-"SCT"
10 | lane11_3<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200611_lane3_nomito_sct.rds")
11 | DefaultAssay(lane11_3)<-"SCT"
12 | lane11_4<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200611_lane4_nomito_sct.rds")
13 | DefaultAssay(lane11_4)<-"SCT"
14 | lane12_1<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200612_lane1_nomito_sct.rds")
15 | DefaultAssay(lane12_1)<-"SCT"
16 | lane12_2<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200612_lane2_nomito_sct.rds")
17 | DefaultAssay(lane12_2)<-"SCT"
18 | lane12_3<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200612_lane3_nomito_sct.rds")
19 | DefaultAssay(lane12_3)<-"SCT"
20 | lane12_4<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200612_lane4_nomito_sct.rds")
21 | DefaultAssay(lane12_4)<-"SCT"
22 | lane25_1<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200625_lane1_nomito_sct.rds")
23 | DefaultAssay(lane25_1)<-"SCT"
24 | lane25_2<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200625_lane2_nomito_sct.rds")
25 | DefaultAssay(lane25_2)<-"SCT"
26 | lane25_3<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200625_lane3_nomito_sct.rds")
27 | DefaultAssay(lane25_3)<-"SCT"
28 | lane25_4<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200625_lane4_nomito_sct.rds")
29 | DefaultAssay(lane25_4)<-"SCT"
30 | lane26_1<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200626_lane1_nomito_sct.rds")
31 | DefaultAssay(lane26_1)<-"SCT"
32 | lane26_2<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200626_lane2_nomito_sct.rds")
33 | DefaultAssay(lane26_2)<-"SCT"
34 | lane26_3<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200626_lane3_nomito_sct.rds")
35 | DefaultAssay(lane26_3)<-"SCT"
36 | lane26_4<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/preprocessed_lanes_nomito/200626_lane4_nomito_sct.rds")
37 | DefaultAssay(lane26_4)<-"SCT"
38 | Vedo01_T0_NI<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/VEDO01_T0_NI_sct.rds")
39 | DefaultAssay(Vedo01_T0_NI)<-"SCT"
40 | Vedo01_T0_I<-readRDS("/groups/umcg-weersma/tmp01/Emilia/batch1_integration/VEDO01_T0_I_sct.rds")
41 | DefaultAssay(Vedo01_T0_I)<-"SCT"
42 | 
43 | lane11_1[["percent.ribo"]] <- PercentageFeatureSet(lane11_1, pattern = "^RPL|^RPS")
44 | lane11_2[["percent.ribo"]] <- PercentageFeatureSet(lane11_2, pattern = "^RPL|^RPS")
45 | lane11_3[["percent.ribo"]] <- PercentageFeatureSet(lane11_3, pattern = "^RPL|^RPS")
46 | lane11_4[["percent.ribo"]] <- PercentageFeatureSet(lane11_4, pattern = "^RPL|^RPS")
47 | lane12_1[["percent.ribo"]] <- PercentageFeatureSet(lane12_1, pattern = "^RPL|^RPS")
48 | lane12_2[["percent.ribo"]] <- PercentageFeatureSet(lane12_2, pattern = "^RPL|^RPS")
49 | lane12_3[["percent.ribo"]] <- PercentageFeatureSet(lane12_3, pattern = "^RPL|^RPS")
50 | lane12_4[["percent.ribo"]] <- PercentageFeatureSet(lane12_4, pattern = "^RPL|^RPS")
51 | lane25_1[["percent.ribo"]] <- PercentageFeatureSet(lane25_1, pattern = "^RPL|^RPS")
52 | lane25_2[["percent.ribo"]] <- PercentageFeatureSet(lane25_2, pattern = "^RPL|^RPS")
53 | lane25_3[["percent.ribo"]] <- PercentageFeatureSet(lane25_3, pattern = "^RPL|^RPS")
54 | lane25_4[["percent.ribo"]] <- PercentageFeatureSet(lane25_4, pattern = "^RPL|^RPS")
55 | lane26_1[["percent.ribo"]] <- PercentageFeatureSet(lane26_1, pattern = "^RPL|^RPS")
56 | lane26_2[["percent.ribo"]] <- PercentageFeatureSet(lane26_2, pattern = "^RPL|^RPS")
57 | lane26_3[["percent.ribo"]] <- PercentageFeatureSet(lane26_3, pattern = "^RPL|^RPS")
58 | lane26_4[["percent.ribo"]] <- PercentageFeatureSet(lane26_4, pattern = "^RPL|^RPS")
59 | Vedo01_T0_NI[["percent.ribo"]] <- PercentageFeatureSet(Vedo01_T0_NI, pattern = "^RPL|^RPS")
60 | Vedo01_T0_I[["percent.ribo"]] <- PercentageFeatureSet(Vedo01_T0_I, pattern = "^RPL|^RPS")
61 | 
62 | alldata_vedo2_batch1_biop_integration_list <- list(lane11_1, lane11_2, lane11_3, lane11_4, lane12_1, lane12_2, lane12_3, lane12_4, lane25_1, lane25_2, lane25_3, lane25_4, lane26_1, lane26_2, lane26_3, lane26_4, Vedo01_T0_NI, Vedo01_T0_I)
63 | 
64 | print("loaded")
65 | features <- SelectIntegrationFeatures(object.list = alldata_vedo2_batch1_biop_integration_list, nfeatures = 3000)
66 | print("features_selected")
67 | alldata_vedo2_batch1_biop_integration_list <- PrepSCTIntegration(object.list = alldata_vedo2_batch1_biop_integration_list, anchor.features = features)
68 | print("integration_prepped")
69 | anchors <- FindIntegrationAnchors(object.list = alldata_vedo2_batch1_biop_integration_list, anchor.features = features, normalization.method = "SCT")
70 | 
71 | #when using reference lanes
72 | #anchors <- FindIntegrationAnchors(object.list = alldata_vedo2_batch1_biop_integration_list, anchor.features = features, normalization.method = "SCT", reference = c(1,4,7,8,16))
73 | 
74 | print("achors_found")
75 | rm(alldata_vedo2_batch1_biop_integration_list)
76 | alldata.integrated <- IntegrateData(anchorset = anchors, normalization.method = "SCT")
77 | print("integrated")
78 | rm(anchors)
79 | alldata.integrated <- RunPCA(alldata.integrated, verbose = FALSE)
80 | print("pca2_ran")
81 | alldata.integrated <- RunUMAP(alldata.integrated, dims = 1:30)
82 | print("umap_ran")
83 | alldata.integrated <- FindNeighbors(alldata.integrated, dims = 1:30)
84 | alldata.integrated <- FindClusters(alldata.integrated, resolution = 0.5)
85 | 
86 | DefaultAssay(alldata.integrated) <- "RNA"
87 | 
88 | saveRDS(alldata.integrated, file="/groups/umcg-weersma/tmp01/Emilia/batch1_integration/vedo2_batch1_biop_integrated_noribo_nonorm_withmito.rds")
89 | 


--------------------------------------------------------------------------------
/PSC/Proportion_analysis.R:
--------------------------------------------------------------------------------
  1 | library(dplyr)
  2 | library(Seurat)
  3 | library(patchwork)
  4 | library(ggplot2)
  5 | library(readr)
  6 | library(tidyr)
  7 | library(MAST)
  8 | library(readxl)
  9 | library(openxlsx)
 10 | library(reshape2)
 11 | library(phyloseq)
 12 | library(fido)
 13 | library(compositions)
 14 | 
 15 | # load in dataset
 16 | data<- readRDS("/Users/amberbangma/Documents/R/PSC/Data/PSC_processed_oct.rds")
 17 | DefaultAssay(data) = "RNA"
 18 | 
 19 | # extract counts to matrix
 20 | counts=as.data.frame.matrix(table(data$Final_HTO, data$celltypes))
 21 | 
 22 | # calculate proportions
 23 | total_row = apply(counts, 1, sum)
 24 | pcts = lapply(counts, function(x) {
 25 |   x / total_row
 26 | })
 27 | frequencies = as.data.frame(pcts)
 28 | rownames(counts) = rownames(frequencies)
 29 | 
 30 | # add state
 31 | frequencies$disease =  sapply(strsplit(rownames(frequencies),"-"), `[`, 1)
 32 | frequencies$inflammation =  sapply(strsplit(rownames(frequencies),"-"), `[`, 2)
 33 | frequencies$state =  apply( frequencies[ ,c(43,44)] , 1 , paste , collapse = "-" )
 34 | frequencies = frequencies[,-c(43,44)]
 35 | 
 36 | # remove doublets and MT-Hi
 37 | frequencies=frequencies[,-c(6,10,15,19,25,32)]
 38 | 
 39 | # plot frequencies
 40 | df = melt(frequencies)
 41 | df$state = factor(df$state, levels = c("HC-NI", "UC-NI", "UC-I", "PSC-NI", "PSC-I"))
 42 | df$variable = factor(df$variable, levels = c("Absorptive_enterocyte","Immature_enterocyte","BEST4_enterocyte","DUOX2_enterocyte","Absorptive_TA","PLCG2_TA","Ribo_TA","REG_TA","Cycling_TA","Stem","Enteroendocrine","Immature_goblet","Goblet","Tuft","MAST","glia","WNT2B.","WNT5B.","RSPO3.","Inflammatory_fibroblast","myofibroblasts","pericytes","endothelial","IgG_plasma","IgA_plasma","IgM_plasma","Cycling_B","GC","Follicular_B","CD8T","T_activated_FOS_low","T_activated_FOS_high","CD4_T_memory", "Treg","Cycling_T","APC"))
 43 | ggplot(df, aes(x=state, y=value, fill=state)) + 
 44 |   geom_boxplot(outlier.size = 1) + 
 45 |   facet_wrap( ~ variable, scales="free") +
 46 |   theme_bw(base_size = 14) + 
 47 |   scale_fill_brewer(palette="Set1")
 48 | 
 49 | #ggsave("Results/Figures/frequencies.pdf", width = 33, height = 19)
 50 | 
 51 | # fido's pibble model https://jsilve24.github.io/fido/articles/introduction-to-fido.html
 52 | # create counts matrix with state
 53 | counts=as.data.frame.matrix(table(data$Final_HTO, data$celltypes))
 54 | counts$disease =  sapply(strsplit(rownames(counts),"-"), `[`, 1)
 55 | counts$inflammation =  sapply(strsplit(rownames(counts),"-"), `[`, 2)
 56 | counts$state =  apply(counts[ ,c(43,44)] , 1 , paste , collapse = "-" )
 57 | counts = counts[,-c(43,44)]
 58 | counts$state = as.factor(counts$state)
 59 | #counts$state = relevel(counts$state, ref = "UC-I") #to set reference, otherwise HC-NI is default
 60 | 
 61 | # remove doublets and MT-Hi
 62 | counts=counts[,-c(6,10,15,19,25,32)]
 63 | 
 64 | # create priors 
 65 | X = t(model.matrix(~state, data=counts))
 66 | X[,1:5]
 67 | Y = t(counts[,-37])
 68 | Y[1:5,1:5]
 69 | upsilon <- nrow(Y)+3
 70 | theta <- matrix(0, nrow(Y)-1, nrow(X))
 71 | gamma <- diag(nrow(X))
 72 | G <- cbind(diag(nrow(Y)-1), -1)
 73 | Xi <- (upsilon-nrow(Y))*G%*%diag(nrow(Y))%*%t(G)
 74 | priors <- pibble(Y, X, upsilon, theta, gamma, Xi)
 75 | print(priors)
 76 | 
 77 | # change model to clr
 78 | priors <- to_clr(priors)
 79 | summary(priors, pars="Lambda")
 80 | names_covariates(priors) <- rownames(X)
 81 | 
 82 | #plot log probabilites for each state
 83 | plot(priors, par="Lambda") + ggplot2::xlim(c(-10, 10))
 84 | 
 85 | #check if model fits data (checked by Johannes)
 86 | priors$Y <- Y
 87 | posterior <- refit(priors, optim_method="adam")
 88 | ppc(posterior) + ggplot2::coord_cartesian(ylim=c(0, 500)) #I changed axis from 30000 to 500 for visualization
 89 | ppc_summary(posterior)
 90 | ppc(posterior, from_scratch=TRUE) +ggplot2::coord_cartesian(ylim=c(0, 500))
 91 | ppc_summary(posterior, from_scratch=TRUE)
 92 | 
 93 | # look at posterior distribution of regression parameters
 94 | posterior_summary <- summary(posterior, pars="Lambda")$Lambda
 95 | 
 96 | # focus on coordinates with non-zero effect ("significant") for UC-I 
 97 | posterior_summary <- filter(posterior_summary, covariate=="stateUC-I")
 98 | focus <- posterior_summary[sign(posterior_summary$p2.5) == sign(posterior_summary$p97.5),]
 99 | focus <- unique(focus$coord)
100 | plot(posterior, par="Lambda", focus.coord = focus, focus.cov = rownames(X)[4]) + geom_vline(xintercept=0, linetype="dashed", color="red")
101 | 
102 | # create list of significant celltypes for each state
103 | posterior_summary <- summary(posterior, pars="Lambda")$Lambda
104 | state_focus_taxa <- vector("list", length(rownames(X)[2:5]))
105 | names(state_focus_taxa) <- rownames(X)[2:5]
106 | for(state in rownames(X)[2:5]) {
107 |   post_summary <- filter(posterior_summary, covariate %in% state)
108 |   focus <- post_summary[sign(post_summary$p2.5) == sign(post_summary$p97.5),]
109 |   focus <- unique(focus$coord)
110 |   state_focus_taxa[[state]] <- focus
111 | }
112 | 
113 | # create square plot for differences
114 | mat_state <- diag(length(rownames(X)[2:5]))
115 | rownames(mat_state) <- colnames(mat_state) <- rownames(X)[2:5]
116 | state_pairs <- data.frame(t(combn(rownames(X)[2:5], 2)))
117 | for(pair in 1:nrow(state_pairs)){
118 |   print(pair)
119 |   mat_state[state_pairs[pair,]$X1, state_pairs[pair,]$X2] <- length(setdiff(state_focus_taxa[[state_pairs[pair,]$X1]], state_focus_taxa[[state_pairs[pair,]$X2]]))
120 |   mat_state[state_pairs[pair,]$X2, state_pairs[pair,]$X1] <- length(setdiff(state_focus_taxa[[state_pairs[pair,]$X2]], state_focus_taxa[[state_pairs[pair,]$X1]]))
121 | }
122 | corrplot::corrplot(mat_state, method="square", is.corr=F, addCoef.col = "black", diag = F, tl.col="black", cl.pos="n")
123 | 
124 | # create square plot for agreement
125 | mat_state <- diag(length(rownames(X)[2:5]))
126 | rownames(mat_state) <- colnames(mat_state) <- rownames(X)[2:5]
127 | state_pairs <- data.frame(t(combn(rownames(X)[2:5], 2)))
128 | for(pair in 1:nrow(state_pairs)){
129 |   print(pair)
130 |   mat_state[state_pairs[pair,]$X1, state_pairs[pair,]$X2] <- length(intersect(state_focus_taxa[[state_pairs[pair,]$X1]], state_focus_taxa[[state_pairs[pair,]$X2]]))
131 |   mat_state[state_pairs[pair,]$X2, state_pairs[pair,]$X1] <- length(intersect(state_focus_taxa[[state_pairs[pair,]$X2]], state_focus_taxa[[state_pairs[pair,]$X1]]))
132 | }
133 | corrplot::corrplot(mat_state, method="square", type = "lower", is.corr=F, addCoef.col = "black", diag = F, tl.col="black", cl.pos="n")
134 | 
135 | 
136 | 


--------------------------------------------------------------------------------
/PSC/Celltype_new.R:
--------------------------------------------------------------------------------
  1 | library(dplyr)
  2 | library(Seurat)
  3 | library(patchwork)
  4 | library(ggplot2)
  5 | library(readr)
  6 | library(tidyr)
  7 | library(MAST)
  8 | library(readxl)
  9 | library(openxlsx)
 10 | 
 11 | data<- readRDS("Data/PSC_202002_integrated_v2_noribo.rds")
 12 | data=UpdateSeuratObject(data)
 13 | DimPlot(data, label = T) + NoLegend()
 14 | 
 15 | data@meta.data$disease = sapply(strsplit(data$Final_HTO,"-"), `[`, 1)
 16 | data@meta.data$inflammation = sapply(strsplit(data$Final_HTO,"-"), `[`, 2)
 17 | data@meta.data$sample = sapply(strsplit(data$Final_HTO,"-"), `[`, 3)
 18 | data@meta.data$state <- paste(data$disease, data$inflammation, sep='-')
 19 | 
 20 | # recluster subset
 21 | # epi = average expression EPCAM > 5
 22 | epi<-subset(data, idents = c("6","7","17","19","0","14","10","3","21"))
 23 | DefaultAssay(epi)<-"integrated"
 24 | epi<- RunPCA(epi)
 25 | epi <- RunUMAP(epi, dims = 1:30)
 26 | epi<-FindNeighbors(epi, dims = 1:30)
 27 | epi<-FindClusters(epi, resolution = 0.4)
 28 | DimPlot(epi, label=T)
 29 | DefaultAssay(epi)<-"RNA"
 30 | #markers_epi <- FindAllMarkers(epi, only.pos = TRUE, min.pct = 0.25)
 31 | #selected_markers_epi <- markers_epi %>% group_by(cluster) %>% top_n(n = 3, wt = avg_logFC)
 32 | new.ident=c("Immature_enterocyte", "Cycling_TA", "Absorptive_enterocyte", "Ribo_TA","Stem", "MT-Hi-enterocyte", "BEST4_enterocyte", "Immature_goblet", "DUOX2_enterocyte", "PLCG2_TA", "Absorptive_TA", "Tuft", "REG_TA","Enteroendocrine", "Goblet", "MAST")
 33 | names(new.ident) <- levels(epi)
 34 | epi=RenameIdents(epi,new.ident)
 35 | DimPlot(epi, label=T)
 36 | epi[["celltypes"]] <- Idents(object = epi)
 37 | meta_epi<-epi@meta.data
 38 | 
 39 | # leuko = average expression PTPRC (CD45) > 5
 40 | leuko<-subset(data, idents = c("2","11","18","24","27","15","4","26"))
 41 | DefaultAssay(leuko)<-"integrated"
 42 | leuko<- RunPCA(leuko)
 43 | leuko <- RunUMAP(leuko, dims = 1:30)
 44 | leuko<-FindNeighbors(leuko, dims = 1:30)
 45 | leuko<-FindClusters(leuko, resolution = 0.8)
 46 | DimPlot(leuko, label=T)
 47 | DefaultAssay(leuko)<-"RNA"
 48 | #markers_leuko <- FindAllMarkers(leuko, only.pos = TRUE, min.pct = 0.25)
 49 | #selected_markers_leuko <- markers_leuko %>% group_by(cluster) %>% top_n(n = 3, wt = avg_logFC)
 50 | new.idents<-c("Follicular_B", "T_activated_FOS_low", "Treg", "CD4_T_memory", "MT_Hi_T", "CD8T", "APC", "CD8T", "T_activated_FOS_high", "Follicular_B", "APC", "MT_Hi_B", "Doublets", "GC", "MT_Hi_T", "Cycling_B", "APC", "Cycling_T", "Doublets")
 51 | names(new.idents) <- levels(leuko)
 52 | leuko=RenameIdents(leuko,new.idents)
 53 | DimPlot(leuko, label=T)
 54 | leuko[["celltypes"]] <- Idents(object = leuko)
 55 | meta_leuko<-leuko@meta.data
 56 | 
 57 | # stromal =  average expression THY1 > 0.4 (fibroblast), SOX10 > 2 (glia), MADCAM1 > 1 (endotheel)
 58 | stromal<-subset(data, idents = c("9", "13", "20", "22", "25"))
 59 | DefaultAssay(stromal)<-"integrated"
 60 | stromal<- RunPCA(stromal)
 61 | stromal <- RunUMAP(stromal, dims = 1:30)
 62 | stromal <-FindNeighbors(stromal, dims = 1:30)
 63 | stromal <-FindClusters(stromal, resolution = 0.4)
 64 | DimPlot(stromal, label=T)
 65 | DefaultAssay(stromal)<-"RNA"
 66 | #markers_stromal <- FindAllMarkers(stromal, only.pos = TRUE)
 67 | new.idents<-c("WNT5B+", "Inflammatory_fibroblast", "RSPO3+", "WNT2B+", "endothelial", "endothelial", "Mt-Hi_stromal", "pericytes", "glia", "myofibroblasts", "endothelial", "Doublets")
 68 | names(new.idents) <- levels(stromal)
 69 | stromal=RenameIdents(stromal,new.idents)
 70 | DimPlot(stromal, label=T)
 71 | stromal[["celltypes"]] <- Idents(object = stromal)
 72 | DimPlot(stromal)
 73 | meta_stromal<-stromal@meta.data
 74 | 
 75 | # plasma = remaining cells
 76 | plasma<-subset(data, idents = c("1", "16", "12", "23", "5", "8"))
 77 | DefaultAssay(plasma)<-"integrated"
 78 | plasma<- RunPCA(plasma)
 79 | plasma <- RunUMAP(plasma, dims = 1:30)
 80 | plasma <-FindNeighbors(plasma, dims = 1:30)
 81 | plasma <-FindClusters(plasma, resolution = 0.25)
 82 | DimPlot(plasma, label=T)
 83 | DefaultAssay(plasma)<-"RNA"
 84 | #markers_plasma <- FindAllMarkers(plasma, only.pos = TRUE)
 85 | new.idents<-c("IgA_plasma", "IgA_plasma", "IgG_plasma", "IgA_plasma", "IgG_plasma", "MT-Hi_plasma", "IgM_plasma", "IgA_plasma", "IgM_plasma")
 86 | names(new.idents) <- levels(plasma)
 87 | plasma=RenameIdents(plasma,new.idents)
 88 | DimPlot(plasma, label=T)
 89 | plasma[["celltypes"]] <- Idents(object = plasma)
 90 | DimPlot(plasma)
 91 | meta_plasma<-plasma@meta.data
 92 | 
 93 | # create meta_all
 94 | colnames(meta_stromal)
 95 | colnames(meta_leuko)
 96 | colnames(meta_plasma)
 97 | colnames(meta_epi)
 98 | meta_plasma<-meta_plasma[c(1,29)]
 99 | meta_leuko<-meta_leuko[c(1,29)]
100 | meta_stromal<-meta_stromal[c(1,29)]
101 | meta_epi <- meta_epi[c(1,29)]
102 | x<-rbind(meta_stromal, meta_leuko)
103 | x<-rbind(x, meta_plasma)
104 | x<-rbind(x, meta_epi)
105 | write.csv(x, "Data/meta_all.csv")
106 | 
107 | # add celltypes
108 | CellsMeta<-data.frame(data@meta.data)
109 | x$NAME<-rownames(x)
110 | CellsMeta$NAME<-row.names(CellsMeta)
111 | x<-x[,c(2,3)]
112 | row.names(CellsMeta)=NULL
113 | row.names(x)<-NULL
114 | keeping.order <- function(data, fn, ...) { 
115 |   col <- ".sortColumn"
116 |   data[,col] <- 1:nrow(data) 
117 |   out <- fn(data, ...) 
118 |   if (!col %in% colnames(out)) stop("Ordering column not preserved by function") 
119 |   out <- out[order(out[,col]),] 
120 |   out[,col] <- NULL 
121 |   out 
122 | } 
123 | CellsMeta<-keeping.order(CellsMeta, merge, y=x, by = "NAME", all=T)
124 | CellsMeta<-CellsMeta[,c(1,29)]
125 | rownames(CellsMeta)<-CellsMeta$NAME
126 | data<-AddMetaData(data, CellsMeta)
127 | Idents(data)=data$celltypes
128 | DimPlot(data, label = T, repel = T)
129 | 
130 | data = subset(data, subset = celltypes != "MT_Hi_T")
131 | data = subset(data, subset = celltypes != "Doublets")
132 | data = subset(data, subset = celltypes != "MT_Hi_B")
133 | data = subset(data, subset = celltypes != "MT-Hi_plasma")
134 | data = subset(data, subset = celltypes != "Mt-Hi_stromal")
135 | data = subset(data, subset = celltypes != "MT-Hi-enterocyte")
136 | 
137 | Idents(data)=data$celltypes
138 | DimPlot(data, label = T, repel = T)
139 | 
140 | DefaultAssay(data)<-"integrated"
141 | data <- RunUMAP(data, dims = 1:30)
142 | data <-FindNeighbors(data, dims = 1:30)
143 | data<-FindClusters(data, resolution = 0.4)
144 | 
145 | Idents(data)=data$celltypes
146 | DimPlot(data, label = T, repel = T, order = c("T_activated_FOS_low", "Absorptive_enterocyte", "IgA_plasma", "WNT5B+", "Aborptive_TA")) + NoLegend()
147 | 
148 | data = subset(data, subset = sample != "3296")
149 | 
150 | saveRDS(data, "Data/PSC_processed_oct.rds")
151 | 
152 | # create figures


--------------------------------------------------------------------------------
/Vedo2/step_5_Vedo2_celltyping_biop_log.R:
--------------------------------------------------------------------------------
  1 | # general stuff for sc data
  2 | 
  3 | library(dplyr)
  4 | library(Seurat)
  5 | library(patchwork)
  6 | library(ggplot2)
  7 | library(readr)
  8 | library(tidyr)
  9 | library(MAST)
 10 | library(readxl)
 11 | library(openxlsx)
 12 | 
 13 | # Load in database
 14 | data <- readRDS("Data/batch1_integrated/vedo2_batch1_biop_integrated_noribo_nonorm_withVEDO01.rds")
 15 | data = UpdateSeuratObject(object = data)
 16 | DefaultAssay(data) = "RNA"
 17 | 
 18 | #not normalized dataset
 19 | DimPlot(data)
 20 | 
 21 | #normalization
 22 | data <- NormalizeData(data, verbose = FALSE)
 23 | DimPlot(data, label = T) +NoLegend()
 24 | 
 25 | #add metadata
 26 | data$Final_HTO <- sub("V001_T0_ascendens_I", "V001-T0-ascendens-I-3191", data$Final_HTO)
 27 | data$Final_HTO <- sub("V001_T0_transversum_NI", "V001-T0-transversum-NI-3191", data$Final_HTO)
 28 | data$Final_HTO <- sub("V002-T0-colonasc-NI-3160", "V002-T0-ascendens-NI-3160", data$Final_HTO)
 29 | data$Final_HTO <- sub("V005-T0-transvers-NI-3228", "V005-T0-transversum-NI-3228", data$Final_HTO)
 30 | data$Final_HTO <- sub("V005-T0-sigmoid-NI-3228", "V005-T0-sigmoid-I-3228", data$Final_HTO)
 31 | data@meta.data$patient = sapply(strsplit(data$Final_HTO,"-"), `[`, 1)
 32 | data@meta.data$timepoint = sapply(strsplit(data$Final_HTO,"-"), `[`, 2)
 33 | data@meta.data$location = sapply(strsplit(data$Final_HTO,"-"), `[`, 3)
 34 |     #data@meta.data$state <- paste(data$disease, data$inflammation, sep='-')
 35 | data@meta.data$inflammation = sapply(strsplit(data$Final_HTO,"-"), `[`, 4)
 36 | data@meta.data$sample = sapply(strsplit(data$Final_HTO,"-"), `[`, 5)
 37 | 
 38 | table(data$inflammation, data$Final_HTO)
 39 | 
 40 | #Setting cluster resolution (used on "integrated" assay)
 41 | DefaultAssay(data) <- "integrated"
 42 | data <- FindClusters(data, resolution = 0.7)
 43 | DimPlot(data, label = T)
 44 | data <- RunTSNE(data, reduction = "pca", dims = 1:5, seed.use = 1, tsne.method = "Rtsne", dim.embed = 2, reduction.name = "tsne", reduction.key = "tSNE_")
 45 | DimPlot(data, reduction = "tsne", label = T)
 46 | 
 47 | # recluster subset
 48 | # epi = average expression EPCAM > 4
 49 | AverageExpression(data, features = "EPCAM", assays = "RNA")
 50 | epi<-subset(data, idents = c("3", "5", "7", "12", "14", "16", "19", "27", "28"))
 51 | DefaultAssay(epi)<-"integrated"
 52 | epi<- RunPCA(epi)
 53 | epi <- RunUMAP(epi, dims = 1:30)
 54 | epi<-FindNeighbors(epi, dims = 1:30)
 55 | epi<-FindClusters(epi, resolution = 0.6)
 56 | DimPlot(epi, label=T)
 57 | DefaultAssay(epi)<-"RNA"
 58 | FeaturePlot(epi, features = c("BEST4", "DUOX2", "MUC2"))
 59 | 
 60 | #epi markers
 61 | markers_epi <- FindAllMarkers(epi, only.pos = TRUE, min.pct = 0.25)
 62 | write.csv(markers_epi, "Results/markergenes/markers_epi_log.csv")
 63 | #to export only top (10/20/etc) gene markers
 64 | selected_markers_epi <- markers_epi %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
 65 | write.csv(selected_markers_epi, "Results/markergenes/selected_markers_epi_log.csv")
 66 | 
 67 | # leuko = average expression PTPRC (CD45) > 3
 68 | AverageExpression(data, features = "PTPRC", assays = "RNA")
 69 | leuko<-subset(data, idents = c("6", "8", "11", "13","15","17","22","23","24","31"))
 70 | DefaultAssay(leuko)<-"integrated"
 71 | leuko<- RunPCA(leuko)
 72 | leuko <- RunUMAP(leuko, dims = 1:30)
 73 | leuko<-FindNeighbors(leuko, dims = 1:30)
 74 | leuko<-FindClusters(leuko, resolution = 0.8)
 75 | DimPlot(leuko, label=T)
 76 | DefaultAssay(leuko)<-"RNA"
 77 | FeaturePlot(leuko, features = c("CTSG", "IL17A"))
 78 | 
 79 | #leuko markers
 80 | markers_leuko <- FindAllMarkers(leuko, only.pos = TRUE, min.pct = 0.25)
 81 | write.csv(markers_leuko, "Results/markergenes/markers_leuko_log.csv")
 82 | selected_markers_leuko <- markers_leuko %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
 83 | write.csv(selected_markers_leuko, "Results/markergenes/selected_markers_leuko_log.csv")
 84 | 
 85 | 
 86 | # stromal =  average expression THY1 > 1 (fibroblast), SOX10 > 2 (glia), MADCAM1 > 1 (endotheel)
 87 | AverageExpression(data, features = "THY1", assays = "RNA")
 88 | AverageExpression(data, features = "SOX10", assays = "RNA")
 89 | AverageExpression(data, features = "MADCAM1", assays = "RNA")
 90 | stromal<-subset(data, idents = c("1", "9", "20", "25", "26", "29"))
 91 | DefaultAssay(stromal)<-"integrated"
 92 | stromal<- RunPCA(stromal)
 93 | stromal <- RunUMAP(stromal, dims = 1:30)
 94 | stromal <-FindNeighbors(stromal, dims = 1:30)
 95 | stromal <-FindClusters(stromal, resolution = 0.4)
 96 | DimPlot(stromal, label=T)
 97 | DefaultAssay(stromal)<-"RNA"
 98 | FeaturePlot(stromal, features = c("WNT2B", "WNT5B", "MADCAM1", "RSPO3", "SOX6"))
 99 | 
100 | #subcluster again (only clusters 1,5 and 6)to find IAFs
101 | fibro <- subset(stromal, idents = c("1", "5", "6"))
102 | DefaultAssay(fibro)<-"integrated"
103 | fibro <- RunPCA(fibro)
104 | fibro <- RunUMAP(fibro, dims = 1:30)
105 | fibro <-FindNeighbors(fibro, dims = 1:30)
106 | fibro <-FindClusters(fibro, resolution = 0.4)
107 | DimPlot(fibro, label=T)
108 | DefaultAssay(fibro)<-"RNA"
109 | FeaturePlot(fibro, features = c("WNT2B", "WNT5B", "WNT5A", "SOX6", "RSPO3", "TNFRSF11B", "IL11", "IL13RA2", "CHI3L1", "IL6","CXCL3", "IL33", "MMP3", "CCL19"), cols = c("grey", "red"), min.cutoff = "q10", max.cutoff = "q90")
110 | FeaturePlot(fibro, features = c("IL11", "IL13RA2", "CHI3L1"), cols = c("grey", "red"), split.by = "inflammation",min.cutoff = "q10", max.cutoff = "q90")
111 | DotPlot(fibro, features = c("WNT2B", "WNT5B", "WNT5A", "SOX6", "RSPO3", "TNFRSF11B", "IL11", "IL13RA2", "CHI3L1", "IL6","CXCL3", "IL33", "MMP3", "CCL19"), cols = c("grey", "red"), dot.scale = 6) + RotatedAxis() + coord_flip()
112 | DotPlot(fibro, features = c("WNT2B", "WNT5B", "WNT5A", "SOX6", "RSPO3", "TNFRSF11B", "IL11", "IL13RA2", "CHI3L1", "IL6","CXCL3", "IL33", "MMP3", "CCL19"), cols = c("green", "blue"),split.by = "timepoint", dot.scale = 6) + RotatedAxis() + coord_flip()
113 | 
114 | #stromal markers
115 | markers_stromal <- FindAllMarkers(stromal, only.pos = TRUE, min.pct = 0.25)
116 | write.csv(markers_stromal, "Results/markergenes/markers_stromal_log.csv")
117 | selected_markers_stromal <- markers_stromal %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
118 | write.csv(selected_markers_stromal, "Results/markergenes/selected_markers_stromal_log.csv")
119 | 
120 | 
121 | # plasma = remaining cells
122 | plasma<-subset(data, idents = c("0", "2", "4", "10", "18", "21", "30"))
123 | DefaultAssay(plasma)<-"integrated"
124 | plasma<- RunPCA(plasma)
125 | plasma <- RunUMAP(plasma, dims = 1:30)
126 | plasma <-FindNeighbors(plasma, dims = 1:30)
127 | plasma <-FindClusters(plasma, resolution = 0.1)
128 | DimPlot(plasma, label=T)
129 | DefaultAssay(plasma)<-"RNA"
130 | FeaturePlot(plasma, features = c("IGHG1", "IGHA1", "IGHM"))
131 | 
132 | #plasma markers
133 | markers_plasma <- FindAllMarkers(plasma, only.pos = TRUE, min.pct = 0.25)
134 | write.csv(markers_plasma, "Results/markergenes//markers_plasma_log.csv")
135 | selected_markers_plasma <- markers_plasma %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
136 | write.csv(selected_markers_plasma, "Results/markergenes/selected_markers_plasma_log.csv")
137 | 
138 | 


--------------------------------------------------------------------------------
/DDTx/find_markers_and_analyze_proportions.R:
--------------------------------------------------------------------------------
  1 | ## script for marker finding and proportion analysis DDTx project
  2 | # May 16 2023
  3 | # WTC
  4 | 
  5 | # open filtered dataset
  6 | library(Seurat)
  7 | data<-readRDS("/source/ddtx_merged_demultiplexed_clustered_compartment_azi_elmentaiteadultileum_below60pctmito_withoutEpithelialR.rds")
  8 | 
  9 | #Vlnplots to check donor/recipient cellorigin
 10 | library(ggplot2)
 11 | VlnPlot(data, "CCR6", assay="RNA", group.by="compartment_final", split.by="donor_recipient") # CCR6 higher in donor immune cells (as expected)
 12 | ggsave("CCR6_donor_recipient.png", width = 20, height = 5, dpi = 600)
 13 | 
 14 | VlnPlot(data, "SELL", assay="RNA", group.by="compartment_final", split.by="donor_recipient") # CD62L higher in recipient immune cells (as expected)
 15 | ggsave("SELL_donor_recipient.png", width = 20, height = 5, dpi = 600)
 16 | 
 17 | #find general markers for donor and recipient, all cells and datapoints taken together
 18 | Idents(data)<-"donor_recipient"
 19 | markers_data <- FindAllMarkers(data, only.pos = TRUE, min.pct = 0.25)
 20 | write.csv(markers_data, "markers_donor_recipient_mito_epi_filtered_data.csv")
 21 | 
 22 | #find general markers per cell type, all participants and datapoints taken together
 23 | Idents(data)<-"predicted.celltype.elmentaiteadultileum"
 24 | markers_data <- FindAllMarkers(data, only.pos = TRUE, min.pct = 0.25)
 25 | write.csv(markers_data, "markers_allcelltypes_mito_epi_filtered_data.csv")
 26 | 
 27 | #downsampling to equal cell numbers per sample for cell proportion analysis
 28 | library(Seurat)
 29 | Idents(data)<-"sample"
 30 | data_1949<-subset(data, downsample=1949)
 31 | saveRDS(data_1949, "DDTX_sample_1949cells.rds")
 32 | 
 33 | #generate CD4 T subsets
 34 | 
 35 | ## find subsets T cells
 36 | Idents(data)<-"predicted.celltype.elmentaiteadultileum"
 37 | cd4T<-subset(data,idents="Activated CD4 T")
 38 | DimPlot(cd4T)
 39 | ggsave("cd4T_before_reclustering.png", width = 5, height = 5, dpi = 600)
 40 | 
 41 | DefaultAssay(object = cd4T) <- "RNA"
 42 | cd4T <- NormalizeData(cd4T, normalization.method = "LogNormalize", scale.factor = 10000)
 43 | cd4T <- NormalizeData(cd4T)
 44 | cd4T <- FindVariableFeatures(cd4T, selection.method = "vst", nfeatures = 2000)
 45 | 
 46 | # Identify the 10 most highly variable genes
 47 | top10 <- head(VariableFeatures(cd4T), 10)
 48 | all.genes <- rownames(cd4T)
 49 | cd4T <- ScaleData(cd4T, features = all.genes)
 50 | cd4T <- RunPCA(cd4T, features = VariableFeatures(object = cd4T))
 51 | ElbowPlot(cd4T)
 52 | ggsave("Elbowplot_cd4T_after_reclustering.png", width = 5, height = 5, dpi = 600)
 53 | 
 54 | cd4T <- FindNeighbors(cd4T, dims = 1:10)
 55 | cd4T <- FindClusters(cd4T, resolution = 0.4)
 56 | head(Idents(cd4T), 5)
 57 | cd4T <- RunUMAP(cd4T, dims = 1:10)
 58 | DimPlot(cd4T, reduction = "umap")
 59 | ggsave("cd4T_after_reclustering.png", width = 5, height = 5, dpi = 600)
 60 | markers_cd4T <- FindAllMarkers(cd4T, only.pos = TRUE, min.pct = 0.25)
 61 | write.csv(markers_cd4T, "markers_cd4T_mito_epi_filtered_data.csv")
 62 | FeaturePlot(cd4T, c("IL17A"))
 63 | ggsave("cd4T_after_reclustering_il17.png", width = 5, height = 5, dpi = 600)
 64 | 
 65 | ## markers for different timepoints epithelial cells in patient 1,2,3
 66 | Idents(data)<-"compartment_final"
 67 | epi<-subset(data, ident="Epithelial")
 68 | Idents(epi)<-"patient"
 69 | epi_pt3<-subset(epi, ident="UMCGDDtx00005")
 70 | Idents(epi_pt3)<-"Timepoint_days"
 71 | markers_epi_pt3<-FindAllMarkers(epi_pt3, only.pos=T) 
 72 | write.csv(markers_epi_pt3, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/DE/markers_epi_pt3_timepoints.csv")
 73 | 
 74 | epi_pt2<-subset(epi, ident="UMCGDDtx00004")
 75 | Idents(epi_pt2)<-"Timepoint_days"
 76 | markers_epi_pt2<-FindAllMarkers(epi_pt2, only.pos=T) 
 77 | write.csv(markers_epi_pt2, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/DE/markers_epi_pt2_timepoints.csv")
 78 | 
 79 | epi_pt1<-subset(epi, ident="UMCGDDtx00003")
 80 | Idents(epi_pt1)<-"Timepoint_days"
 81 | markers_epi_pt1<-FindAllMarkers(epi_pt1, only.pos=T) 
 82 | write.csv(markers_epi_pt1, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/DE/markers_epi_pt1_timepoints.csv")
 83 | 
 84 | ## find endothelial markers
 85 | Idents(data)<-"major_celltype"
 86 | endo<-subset(data, ident="Endothelial")
 87 | Idents(endo)<-"patient"
 88 | endo_pt3<-subset(endo, ident="UMCGDDtx00005")
 89 | Idents(endo_pt3)<-"Timepoint_days"
 90 | markers_endo_pt3<-FindAllMarkers(endo_pt3, only.pos=T) 
 91 | write.csv(markers_endo_pt3, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/DE/markers_endo_pt3_timepoints.csv")
 92 | 
 93 | endo_pt2<-subset(endo, ident="UMCGDDtx00004")
 94 | Idents(endo_pt2)<-"Timepoint_days"
 95 | markers_endo_pt2<-FindAllMarkers(endo_pt2, only.pos=T) 
 96 | write.csv(markers_endo_pt2, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/DE/markers_endo_pt2_timepoints.csv")
 97 | 
 98 | endo_pt1<-subset(endo, ident="UMCGDDtx00003")
 99 | Idents(endo_pt1)<-"Timepoint_days"
100 | markers_endo_pt1<-FindAllMarkers(endo_pt1, only.pos=T) 
101 | write.csv(markers_endo_pt1, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/DE/markers_endo_pt1_timepoints.csv")
102 | 
103 | ## find CD4T markers
104 | Idents(data)<-"predicted.celltype.elmentaiteadultileum"
105 | CD4<-subset(data, ident="Activated CD4 T")
106 | Idents(CD4)<-"patient"
107 | CD4_pt3<-subset(CD4, ident="UMCGDDtx00005")
108 | Idents(CD4_pt3)<-"Timepoint_days"
109 | markers_CD4_pt3<-FindAllMarkers(CD4_pt3, only.pos=T) 
110 | write.csv(markers_CD4_pt3, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/DE/markers_CD4_pt3_timepoints.csv")
111 | 
112 | CD4_pt2<-subset(CD4, ident="UMCGDDtx00004")
113 | Idents(CD4_pt2)<-"Timepoint_days"
114 | markers_CD4_pt2<-FindAllMarkers(CD4_pt2, only.pos=T) 
115 | write.csv(markers_CD4_pt2, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/DE/markers_CD4_pt2_timepoints.csv")
116 | 
117 | CD4_pt1<-subset(CD4, ident="UMCGDDtx00003")
118 | Idents(CD4_pt1)<-"Timepoint_days"
119 | markers_CD4_pt1<-FindAllMarkers(CD4_pt1, only.pos=T) 
120 | write.csv(markers_CD4_pt1, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/DE/markers_CD4_pt1_timepoints.csv")
121 | 
122 | ## find CD8T markers
123 | CD8<-subset(data, ident="Activated CD8 T")
124 | Idents(CD8)<-"patient"
125 | CD8_pt3<-subset(CD8, ident="UMCGDDtx00005")
126 | Idents(CD8_pt3)<-"Timepoint_days"
127 | markers_CD8_pt3<-FindAllMarkers(CD8_pt3, only.pos=T) 
128 | write.csv(markers_CD8_pt3, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/DE/markers_CD8_pt3_timepoints.csv")
129 | 
130 | CD8_pt2<-subset(CD8, ident="UMCGDDtx00004")
131 | Idents(CD8_pt2)<-"Timepoint_days"
132 | markers_CD8_pt2<-FindAllMarkers(CD8_pt2, only.pos=T) 
133 | write.csv(markers_CD8_pt2, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/DE/markers_CD8_pt2_timepoints.csv")
134 | 
135 | CD8_pt1<-subset(CD8, ident="UMCGDDtx00003")
136 | Idents(CD8_pt1)<-"Timepoint_days"
137 | markers_CD8_pt1<-FindAllMarkers(CD8_pt1, only.pos=T) 
138 | write.csv(markers_CD8_pt1, "/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/DE/markers_CD8_pt1_timepoints.csv")
139 | 
140 | 
141 | 
142 | 
143 | 
144 | # check markers on gutcellatlas
145 | 
146 | 
147 | 


--------------------------------------------------------------------------------
/old_stuff/Import10X_gene-per-patient.R:
--------------------------------------------------------------------------------
  1 | ## 10X import for Takeda RIPK2
  2 | ## Cytotoxic cells
  3 | ## 10X import for Takeda RIPK2
  4 | ## Cytotoxic cells
  5 | 
  6 | library(Seurat)
  7 | 
  8 | ## Cytotoxic T cells
  9 | 
 10 | Cytotox <- Read10X(data.dir = "/Users/festeneam/Downloads/filtered_matrices_mex_cytotox/hg19")
 11 | Cytotox_T <- CreateSeuratObject(raw.data = Cytotox, min.cells = 3,
 12 |                                 min.genes = 200,
 13 |                                 project = "10X Project",
 14 |                                 names.delim = "-",
 15 |                                 names.field = 2)
 16 | 
 17 | Cytotox_T <- SetIdent(Cytotox_T, ident.use = c("Cyto"))
 18 | 
 19 | mito.genes <- grep(pattern = "^MT-", x = rownames(x = Cytotox_T@data), value = TRUE) 
 20 | 
 21 | # Get TSS normalized mitochodrial counts
 22 | col.total.counts <- Matrix::colSums(expm1(Cytotox_T@data))
 23 | percent.mito <- Matrix::colSums(Cytotox_T@raw.data[mito.genes,])/Matrix::colSums(Cytotox_T@raw.data)
 24 | 
 25 | Cytotox_T <- AddMetaData(object = Cytotox_T, metadata = percent.mito, col.name = "percent.mito")
 26 | 
 27 | Cytotox_T <- NormalizeData(object = Cytotox_T, normalization.method = "LogNormalize",
 28 |                            scale.factor = 1e4)
 29 | 
 30 | Cytotox_T <- ScaleData(Cytotox_T)
 31 | Cytotox_T <- RunPCA(object = Cytotox_T, pc.genes=rownames(Cytotox_T@data),
 32 |                     do.print = TRUE,
 33 |                     pcs.print = 1:5,
 34 |                     genes.print = 5)
 35 | 
 36 | PrintPCA(object = Cytotox_T, pcs.print = 1:5, genes.print = 5, use.full = FALSE)
 37 | 
 38 | Cytotox_T <- RunTSNE(object = Cytotox_T, dims.use = 1:10, do.fast = TRUE) 
 39 | TSNEPlot(object = Cytotox_T)
 40 | 
 41 | VlnPlot(Cytotox_T, c("ZBTB7B","CD8B", "GZMA","RIPK2"), nCol = 4)
 42 | 
 43 | FeaturePlot(Cytotox_T, c("ZBTB7B","CD8B", "GZMA","RIPK2"), cols.use = c("lightgrey","deeppink"), 
 44 |             pt.size = 1)
 45 | 
 46 | Cytotox_T <- FindClusters(object = Cytotox_T, reduction.type = "pca",
 47 |                           dims.use = 1:10,
 48 |                           resolution = 0.6,
 49 |                           print.output = 0,
 50 |                           save.SNN = TRUE)
 51 | ## Monocytes
 52 | 
 53 | CD14 <- Read10X(data.dir = "/Users/festeneam/Downloads/filtered_matrices_mex_CD14/hg19")
 54 | CD14_mon <- CreateSeuratObject(raw.data = CD14, min.cells = 3,
 55 |                                min.genes = 200,
 56 |                                project = "10X Project",
 57 |                                names.delim = "-",
 58 |                                names.field = 2)
 59 | 
 60 | CD14_mon <- SetIdent(CD14_mon, ident.use = c("CD14"))
 61 | 
 62 | mito.genes <- grep(pattern = "^MT-", x = rownames(x = CD14_mon@data), value = TRUE) 
 63 | 
 64 | # Get TSS normalized mitochodrial counts
 65 | col.total.counts <- Matrix::colSums(expm1(CD14_mon@data))
 66 | percent.mito <- Matrix::colSums(CD14_mon@raw.data[mito.genes,])/Matrix::colSums(CD14_mon@raw.data)
 67 | 
 68 | CD14_mon <- AddMetaData(object = CD14_mon, metadata = percent.mito, col.name = "percent.mito")
 69 | 
 70 | CD14_mon <- NormalizeData(object = CD14_mon, normalization.method = "LogNormalize",
 71 |                           scale.factor = 1e4)
 72 | 
 73 | CD14_mon <- ScaleData(CD14_mon)
 74 | CD14_mon <- RunPCA(object = CD14_mon, pc.genes=rownames(CD14_mon@data),
 75 |                    do.print = TRUE,
 76 |                    pcs.print = 1:5,
 77 |                    genes.print = 5)
 78 | 
 79 | PrintPCA(object = CD14_mon, pcs.print = 1:5, genes.print = 5, use.full = FALSE)
 80 | 
 81 | CD14_mon <- RunTSNE(object = CD14_mon, dims.use = 1:10, do.fast = TRUE) 
 82 | TSNEPlot(object = CD14_mon)
 83 | 
 84 | VlnPlot(CD14_mon, c("CD14","SELL", "HLA-DRA","RIPK2"), nCol = 4)
 85 | 
 86 | FeaturePlot(CD14_mon, c("CD14","SELL", "HLA-DRA","RIPK2"), cols.use = c("lightgrey","deeppink"), 
 87 |             pt.size = 1)
 88 | 
 89 | CD14_mon <- FindClusters(object = CD14_mon, reduction.type = "pca",
 90 |                          dims.use = 1:10,
 91 |                          resolution = 0.6,
 92 |                          print.output = 0,
 93 |                          save.SNN = TRUE)
 94 | 
 95 | ## NK cells
 96 | 
 97 | CD56 <- Read10X(data.dir = "/Users/festeneam/Downloads/filtered_matrices_mex_CD56/hg19")
 98 | CD56_mon <- CreateSeuratObject(raw.data = CD56, min.cells = 3,
 99 |                                min.genes = 200,
100 |                                project = "10X Project",
101 |                                names.delim = "-",
102 |                                names.field = 2)
103 | 
104 | CD56_mon <- SetIdent(CD56_mon, ident.use = c("NK"))
105 | 
106 | mito.genes <- grep(pattern = "^MT-", x = rownames(x = CD56_mon@data), value = TRUE) 
107 | 
108 | # Get TSS normalized mitochodrial counts
109 | col.total.counts <- Matrix::colSums(expm1(CD56_mon@data))
110 | percent.mito <- Matrix::colSums(CD56_mon@raw.data[mito.genes,])/Matrix::colSums(CD56_mon@raw.data)
111 | 
112 | CD56_mon <- AddMetaData(object = CD56_mon, metadata = percent.mito, col.name = "percent.mito")
113 | 
114 | CD56_mon <- NormalizeData(object = CD56_mon, normalization.method = "LogNormalize",
115 |                           scale.factor = 1e4)
116 | 
117 | CD56_mon <- ScaleData(CD56_mon)
118 | CD56_mon <- RunPCA(object = CD56_mon, pc.genes=rownames(CD56_mon@data),
119 |                    do.print = TRUE,
120 |                    pcs.print = 1:5,
121 |                    genes.print = 5)
122 | 
123 | PrintPCA(object = CD56_mon, pcs.print = 1:5, genes.print = 5, use.full = FALSE)
124 | 
125 | CD56_mon <- RunTSNE(object = CD56_mon, dims.use = 1:10, do.fast = TRUE) 
126 | TSNEPlot(object = CD56_mon)
127 | 
128 | VlnPlot(CD56_mon, c("NCAM1","FCGR3A", "ITGAM","RIPK2"), nCol = 4)
129 | 
130 | FeaturePlot(CD56_mon, c("NCAM1","FCGR3A", "ITGAM","RIPK2"), cols.use = c("lightgrey","deeppink"), 
131 |             pt.size = 1)
132 | 
133 | CD56_mon <- FindClusters(object = CD56_mon, reduction.type = "pca",
134 |                          dims.use = 1:10,
135 |                          resolution = 0.6,
136 |                          print.output = 0,
137 |                          save.SNN = TRUE)
138 | 
139 | total <- MergeSeurat(NaiveCD4_T, CD14_mon, add.cell.id1 = "CD4_Naive", add.cell.id2 = "CD14")
140 | total2 <- MergeSeurat(total, CD56_mon, add.cell.id2 = "NK")
141 | TenX_cells <- MergeSeurat(total2, Cytotox_T, add.cell.id2 = "Cytotox")
142 | 
143 | TenX_cells <- ScaleData(TenX_cells)
144 | TenX_cells <- RunPCA(object = TenX_cells, pc.genes=rownames(TenX_cells@data),
145 |                    do.print = TRUE,
146 |                    pcs.print = 1:5,
147 |                    genes.print = 5)
148 | 
149 | PrintPCA(object = TenX_cells, pcs.print = 1:5, genes.print = 5, use.full = FALSE)
150 | 
151 | TenX_cells <- RunTSNE(object = TenX_cells, dims.use = 1:10, do.fast = TRUE) 
152 | TSNEPlot(object = TenX_cells)
153 | 
154 | VlnPlot(TenX_cells, c("OSM","NOD2","OPRL1","RIPK2"), nCol = 4)
155 | 
156 | FeaturePlot(TenX_cells, c("CCR7","CD160","GZMA","RIPK2"), cols.use = c("lightgrey","deeppink"), 
157 |             pt.size = 1)
158 | 
159 | ## RIPK2 positivity
160 | # Table positive cells for a gene per patient
161 | 
162 | RIPK2.pos <- TenX_cells@data["RIPK2",]>0
163 | RIPK2.pos <- 1*RIPK2.pos
164 | 
165 | TenX_cells <- AddMetaData(TenX_cells, metadata = RIPK2.pos, col.name = "RIPK2.pos")
166 | TenX_cells <- AddMetaData(TenX_cells, metadata = TenX_cells@ident, col.name = "ident")
167 | 
168 | 
169 | 


--------------------------------------------------------------------------------
/Vedo2/Vedo001_preprep.R:
--------------------------------------------------------------------------------
  1 | #Read dataframe 08 lane2
  2 | dataset_10x<-Read10X("../VEDO2/Data/08_lane2/")
  3 | dataset<-CreateSeuratObject(counts = dataset_10x$`Gene Expression`, project = "PSC")
  4 | dataset[['HTO']]<-CreateAssayObject(counts = dataset_10x$`Antibody Capture`)
  5 | dataset<-subset(dataset, nCount_RNA > 200)
  6 | dataset <- NormalizeData(dataset, assay = "HTO", normalization.method = "CLR")
  7 | dataset <- HTODemux(dataset, assay = "HTO", positive.quantile = 0.993)
  8 | table(dataset$HTO_classification.global)
  9 | table(dataset$HTO_maxID)
 10 | HTOHeatmap(dataset, assay = "HTO", ncells = 1000)
 11 | 
 12 | #demultiplex
 13 | HTO <- dataset@meta.data[["HTO_classification"]]
 14 | HTO_status <- dataset@meta.data[["HTO_classification.global"]]
 15 | barcode <- dataset@assays[["RNA"]]@data@Dimnames[[2]]
 16 | seurat_call <- data.frame(HTO, HTO_status, barcode)
 17 | souporcell <- read.table(file = '../VEDO2/Data/clusters_08_lane2.tsv', sep = '\t', header = TRUE)
 18 | souporcell <- select(souporcell, c("barcode", "status", "assignment"))
 19 | All <- merge(seurat_call, souporcell, by = "barcode", all = F)
 20 | rownames(All) <- All$barcode
 21 | table(All$HTO, All$assignment)
 22 | All$assignment[All$assignment == "0"]="UC-NI-3006"
 23 | All$assignment[All$assignment == "2"]="PSC-NI-3267"
 24 | All$assignment[All$assignment == "3"]="UC-I-3249"
 25 | All$assignment[All$assignment == "4"]="PSC-NI-3191"
 26 | All$status[All$status == "doublet"]="Doublet"
 27 | All$status[All$status == "singlet"]="Singlet"
 28 | All$status[All$status == "unassigned"]="Negative"
 29 | for (i in 1:nrow(All)){ 
 30 |   if (All[i,2] == "Negative" & All[i,5] != "1"){
 31 |     All[i,2]<-All[i,5] 
 32 |   } else {
 33 |     All[i,2]<-All[i,2]
 34 |   }
 35 | }
 36 | for (i in 1:nrow(All)){ 
 37 |   if (All[i,3] == "Negative" & All[i,5] != "1"){
 38 |     All[i,3]<-All[i,4] 
 39 |   } else {
 40 |     All[i,3]<-All[i,3]
 41 |   }
 42 | }
 43 | 
 44 | Final_HTO <- All$HTO
 45 | names(Final_HTO) <- rownames(All)
 46 | dataset <- AddMetaData(
 47 |   object = dataset,
 48 |   metadata = Final_HTO,
 49 |   col.name = 'Final_HTO')
 50 | Final_HTO_status <- All$HTO_status
 51 | names(Final_HTO_status) <- rownames(All)
 52 | dataset <- AddMetaData(
 53 |   object = dataset,
 54 |   metadata = Final_HTO_status,
 55 |   col.name = 'Final_HTO_status')
 56 | table(dataset@meta.data[["Final_HTO_status"]], dataset@meta.data[["hash.ID"]])
 57 | table(dataset@meta.data[["Final_HTO"]], dataset@meta.data[["hash.ID"]])
 58 | 
 59 | #qc and preprocess
 60 | DefaultAssay(dataset)<-"RNA"
 61 | dataset[["percent.mt"]] <- PercentageFeatureSet(dataset, pattern = "^MT-")
 62 | dataset<-subset(dataset, subset=percent.mt < 60)
 63 | HTO_number_08lane2 <- sum(dataset$HTO_classification.global == "Singlet")
 64 | souporcell_number_08lane2 <- sum(dataset$Final_HTO_status == "Singlet")
 65 | Idents(dataset) <- "Final_HTO"
 66 | dataset <- subset(dataset, idents = "PSC-NI-3191")
 67 | Idents(dataset) <- "Final_HTO_status"
 68 | dataset <- subset(dataset, idents = "Singlet")
 69 | table(dataset@meta.data[["Final_HTO"]])
 70 | dataset<-RenameCells(dataset, add.cell.id="VEDO01_T0_NI")
 71 | dataset@meta.data$lane<-"VEDO01_T0_NI"
 72 | dataset<-SCTransform(dataset,vars.to.regress = "percent.mt", verbose = F)
 73 | dataset <- RunPCA(dataset, verbose = FALSE)
 74 | dataset <- RunUMAP(dataset, dims = 1:30, verbose = FALSE)
 75 | dataset <- FindNeighbors(dataset, dims = 1:30, verbose = FALSE)
 76 | dataset <- FindClusters(dataset, resolution = 0.2, verbose = FALSE)
 77 | DimPlot(dataset)
 78 | FeaturePlot(dataset, "IGHA2")
 79 | FeaturePlot(dataset, "EPCAM")
 80 | FeaturePlot(dataset, "CD3E")
 81 | dataset@meta.data[["Final_HTO"]] <- "V001_T0_transversum_NI"
 82 | table(dataset@meta.data[["Final_HTO"]])
 83 | saveRDS(dataset, file="../VEDO2/Data/VEDO01_T0_NI_sct.rds")
 84 | 
 85 | #Read dataframe 09 lane1
 86 | dataset_10x<-Read10X("../VEDO2/Data/09_lane1/")
 87 | dataset<-CreateSeuratObject(counts = dataset_10x$`Gene Expression`, project = "PSC")
 88 | dataset[['HTO']]<-CreateAssayObject(counts = dataset_10x$`Antibody Capture`)
 89 | dataset<-subset(dataset, nCount_RNA > 200)
 90 | dataset <- NormalizeData(dataset, assay = "HTO", normalization.method = "CLR")
 91 | dataset <- HTODemux(dataset, assay = "HTO", positive.quantile = 0.91)
 92 | table(dataset$HTO_classification.global)
 93 | table(dataset$HTO_maxID)
 94 | HTOHeatmap(dataset, assay = "HTO", ncells = 1000)
 95 | 
 96 | #demultiplex
 97 | HTO <- dataset@meta.data[["HTO_classification"]]
 98 | HTO_status <- dataset@meta.data[["HTO_classification.global"]]
 99 | barcode <- dataset@assays[["RNA"]]@data@Dimnames[[2]]
100 | seurat_call <- data.frame(HTO, HTO_status, barcode)
101 | souporcell <- read.table(file = '../VEDO2/Data/clusters_09_lane1.tsv', sep = '\t', header = TRUE)
102 | souporcell <- select(souporcell, c("barcode", "status", "assignment"))
103 | All <- merge(seurat_call, souporcell, by = "barcode", all = F)
104 | rownames(All) <- All$barcode
105 | table(All$HTO, All$assignment)
106 | All$assignment[All$assignment == "0"]="UC-NI-3107"
107 | All$assignment[All$assignment == "1"]="HC-NI-3083"
108 | All$assignment[All$assignment == "2"]="UC-NI-3195"
109 | All$assignment[All$assignment == "3"]="PSC-I-3191"
110 | All$status[All$status == "doublet"]="Doublet"
111 | All$status[All$status == "singlet"]="Singlet"
112 | All$status[All$status == "unassigned"]="Negative"
113 | for (i in 1:nrow(All)){ 
114 |   if (All[i,2] == "Negative"){
115 |     All[i,2]<-All[i,5] 
116 |   } else {
117 |     All[i,2]<-All[i,2]
118 |   }
119 | }
120 | for (i in 1:nrow(All)){ 
121 |   if (All[i,3] == "Negative"){
122 |     All[i,3]<-All[i,4] 
123 |   } else {
124 |     All[i,3]<-All[i,3]
125 |   }
126 | }
127 | 
128 | Final_HTO <- All$HTO
129 | names(Final_HTO) <- rownames(All)
130 | dataset <- AddMetaData(
131 |   object = dataset,
132 |   metadata = Final_HTO,
133 |   col.name = 'Final_HTO')
134 | Final_HTO_status <- All$HTO_status
135 | names(Final_HTO_status) <- rownames(All)
136 | dataset <- AddMetaData(
137 |   object = dataset,
138 |   metadata = Final_HTO_status,
139 |   col.name = 'Final_HTO_status')
140 | table(dataset@meta.data[["Final_HTO_status"]], dataset@meta.data[["hash.ID"]])
141 | table(dataset@meta.data[["Final_HTO"]], dataset@meta.data[["hash.ID"]])
142 | 
143 | #qc and preprocess
144 | DefaultAssay(dataset)<-"RNA"
145 | dataset[["percent.mt"]] <- PercentageFeatureSet(dataset, pattern = "^MT-")
146 | dataset<-subset(dataset, subset=percent.mt < 60)
147 | HTO_number_09lane1 <- sum(dataset$HTO_classification.global == "Singlet")
148 | souporcell_number_09lane1 <- sum(dataset$Final_HTO_status == "Singlet")
149 | Idents(dataset) <- "Final_HTO"
150 | dataset <- subset(dataset, idents = "PSC-I-3191")
151 | Idents(dataset) <- "Final_HTO_status"
152 | dataset <- subset(dataset, idents = "Singlet")
153 | table(dataset@meta.data[["Final_HTO"]])
154 | dataset<-RenameCells(dataset, add.cell.id="VEDO01_T0_I")
155 | dataset@meta.data$lane<-"VEDO01_T0_I"
156 | dataset<-SCTransform(dataset,vars.to.regress = "percent.mt", verbose = F)
157 | dataset <- RunPCA(dataset, verbose = FALSE)
158 | dataset <- RunUMAP(dataset, dims = 1:30, verbose = FALSE)
159 | dataset <- FindNeighbors(dataset, dims = 1:30, verbose = FALSE)
160 | dataset <- FindClusters(dataset, resolution = 0.2, verbose = FALSE)
161 | DimPlot(dataset)
162 | FeaturePlot(dataset, "IGHG4")
163 | FeaturePlot(dataset, "EPCAM")
164 | dataset@meta.data[["Final_HTO"]] <- "V001_T0_ascendens_I"
165 | table(dataset@meta.data[["Final_HTO"]])
166 | saveRDS(dataset, file="../VEDO2/Data/VEDO01_T0_I_sct.rds")
167 | 
168 | 


--------------------------------------------------------------------------------
/Method_comparisons/SCT_integration.R:
--------------------------------------------------------------------------------
  1 | 
  2 | library(Seurat)
  3 | library(ggplot2)
  4 | 
  5 | options(future.globals.maxSize = 150000 * 1024^2)
  6 | 
  7 | 
  8 | 
  9 | HC_3296<-readRDS("/methodspaper_data/HC-NI-3296_sct.rds")
 10 | DefaultAssay(HC_3296)<-"SCT"
 11 | HC_3002<-readRDS("/methodspaper_data/HC-NI-3002_sct.rds")  
 12 | DefaultAssay(HC_3002)<-"SCT"
 13 | HC_3030<-readRDS("/methodspaper_data/HC-NI-3030_sct.rds")
 14 | DefaultAssay(HC_3030)<-"SCT"
 15 | HC_3049<-readRDS("/methodspaper_data/HC-NI-3049_sct.rds")
 16 | DefaultAssay(HC_3049)<-"SCT"
 17 | HC_3083<-readRDS("/methodspaper_data/HC-NI-3083_sct.rds")
 18 | DefaultAssay(HC_3083)<-"SCT"
 19 | HC_3037<-readRDS("/methodspaper_data/D-3037-HC_sct.rds")
 20 | DefaultAssay(HC_3037)<-"SCT"
 21 | HC_3034<-readRDS("/methodspaper_data/C-3034-HC_sct.rds")
 22 | DefaultAssay(HC_3034)<-"SCT"
 23 | 
 24 | HC_1<-readRDS("/methodspaper_data/HC_Sanger1_dataset_sct.rds")
 25 | HC_2<-readRDS("/methodspaper_data/HC_Sanger2_dataset_sct.rds")
 26 | HC_3<-readRDS("/methodspaper_data/HC_Sanger3_dataset_sct.rds")
 27 | HC_4<-readRDS("/methodspaper_data/HC_Sanger4_dataset_sct.rds")
 28 | HC_5<-readRDS("/methodspaper_data/HC_Sanger5_dataset_sct.rds")
 29 | HC_6<-readRDS("/methodspaper_data/HC_Sanger6_dataset_sct.rds")
 30 | HC_7<-readRDS("/methodspaper_data/HC_Sanger7_dataset_sct.rds")
 31 | HC_8<-readRDS("/methodspaper_data/HC_Sanger8_dataset_sct.rds")
 32 | HC_9<-readRDS("/methodspaper_data/HC_Sanger9_dataset_sct.rds")
 33 | DefaultAssay(HC_1)<-"SCT"
 34 | DefaultAssay(HC_2)<-"SCT"
 35 | DefaultAssay(HC_3)<-"SCT"
 36 | DefaultAssay(HC_4)<-"SCT"
 37 | DefaultAssay(HC_5)<-"SCT"
 38 | DefaultAssay(HC_6)<-"SCT"
 39 | DefaultAssay(HC_7)<-"SCT"
 40 | DefaultAssay(HC_8)<-"SCT"
 41 | DefaultAssay(HC_9)<-"SCT"
 42 | imm_healthy<-readRDS("/imm_healthy.rds")
 43 | epi_healthy<-readRDS("/epi_healthy.rds")
 44 | fib_healthy<-readRDS("/fib_healthy.rds")
 45 | 
 46 | HC_3296<-RenameCells(HC_3296, add.cell.id="HC3296")
 47 | HC_3002<-RenameCells(HC_3002, add.cell.id="HC3002")
 48 | 
 49 | HC_3030<-RenameCells(HC_3030, add.cell.id="HC3030")
 50 | HC_3049<-RenameCells(HC_3049, add.cell.id="HC3049")
 51 | HC_3083<-RenameCells(HC_3083, add.cell.id="HC3083")
 52 | 
 53 | HC_3034<-RenameCells(HC_3034, add.cell.id="HC3034")
 54 | HC_3037<-RenameCells(HC_3037, add.cell.id="HC3037")
 55 | 
 56 | HC_1<-RenameCells(HC_1, add.cell.id="HC1")
 57 | HC_2<-RenameCells(HC_2, add.cell.id="HC2")
 58 | HC_3<-RenameCells(HC_3, add.cell.id="HC3")
 59 | HC_4<-RenameCells(HC_4, add.cell.id="HC4")
 60 | HC_5<-RenameCells(HC_5, add.cell.id="HC5")
 61 | HC_6<-RenameCells(HC_6, add.cell.id="HC6")
 62 | HC_7<-RenameCells(HC_7, add.cell.id="HC7")
 63 | HC_8<-RenameCells(HC_8, add.cell.id="HC8")
 64 | HC_9<-RenameCells(HC_9, add.cell.id="HC9")
 65 | 
 66 | imm_healthy@meta.data$dataset<-"imm_healthy"
 67 | epi_healthy@meta.data$dataset<-"epi_healthy"
 68 | fib_healthy@meta.data$dataset<-"fib_healthy"
 69 | 
 70 | HC_3296@meta.data$dataset<-"HC_3296"
 71 | HC_3002@meta.data$dataset<-"HC_3002"
 72 | 
 73 | HC_3030@meta.data$dataset<-"HC_3030"
 74 | HC_3049@meta.data$dataset<-"HC_3049"
 75 | HC_3083@meta.data$dataset<-"HC_3083"
 76 | HC_3037@meta.data$dataset<-"HC_3037"
 77 | HC_3034@meta.data$dataset<-"HC_3034"
 78 | 
 79 | HC_1@meta.data$dataset<-"HC_1"
 80 | HC_2@meta.data$dataset<-"HC_2"
 81 | HC_3@meta.data$dataset<-"HC_3"
 82 | HC_4@meta.data$dataset<-"HC_4"
 83 | HC_5@meta.data$dataset<-"HC_5"
 84 | HC_6@meta.data$dataset<-"HC_6"
 85 | HC_7@meta.data$dataset<-"HC_7"
 86 | HC_8@meta.data$dataset<-"HC_8"
 87 | HC_9@meta.data$dataset<-"HC_9"
 88 | 
 89 | imm_healthy@meta.data$method<-"splitcollagenase"
 90 | epi_healthy@meta.data$method<-"splitcollagenase"
 91 | fib_healthy@meta.data$method<-"splitcollagenase"
 92 | 
 93 | HC_3296@meta.data$method<-"wholecollagenase"
 94 | HC_3002@meta.data$method<-"wholecollagenase"
 95 | 
 96 | HC_3030@meta.data$method<-"wholecollagenase"
 97 | HC_3049@meta.data$method<-"wholecollagenase"
 98 | HC_3083@meta.data$method<-"wholecollagenase"
 99 | HC_3037@meta.data$method<-"wholecollagenase"
100 | HC_3034@meta.data$method<-"wholecollagenase"
101 | 
102 | HC_1@meta.data$method<-"splitprotease"
103 | HC_2@meta.data$method<-"splitprotease"
104 | HC_3@meta.data$method<-"splitprotease"
105 | HC_4@meta.data$method<-"splitprotease"
106 | HC_5@meta.data$method<-"splitprotease"
107 | HC_6@meta.data$method<-"splitprotease"
108 | HC_7@meta.data$method<-"splitprotease"
109 | HC_8@meta.data$method<-"splitprotease"
110 | HC_9@meta.data$method<-"splitprotease"
111 | 
112 | 
113 | imm_healthy_sub[["percent.ribo"]] <- PercentageFeatureSet(imm_healthy_sub, pattern = "^RPL|^RPS")
114 | epi_healthy_sub[["percent.ribo"]] <- PercentageFeatureSet(epi_healthy_sub, pattern = "^RPL|^RPS")
115 | fib_healthy_sub[["percent.ribo"]] <- PercentageFeatureSet(fib_healthy_sub, pattern = "^RPL|^RPS")
116 | 
117 | imm_healthy[["percent.ribo"]] <- PercentageFeatureSet(imm_healthy, pattern = "^RPL|^RPS")
118 | epi_healthy[["percent.ribo"]] <- PercentageFeatureSet(epi_healthy, pattern = "^RPL|^RPS")
119 | fib_healthy[["percent.ribo"]] <- PercentageFeatureSet(fib_healthy, pattern = "^RPL|^RPS")
120 | 
121 | HC_2[["percent.ribo"]] <- PercentageFeatureSet(HC_2, pattern = "^RPL|^RPS")
122 | HC_6[["percent.ribo"]] <- PercentageFeatureSet(HC_6, pattern = "^RPL|^RPS")
123 | HC_9[["percent.ribo"]] <- PercentageFeatureSet(HC_9, pattern = "^RPL|^RPS")
124 | 
125 | HC_3296[["percent.ribo"]] <- PercentageFeatureSet(HC_3296, pattern = "^RPL|^RPS")
126 | HC_3030[["percent.ribo"]] <- PercentageFeatureSet(HC_3030, pattern = "^RPL|^RPS")
127 | HC_3083[["percent.ribo"]] <- PercentageFeatureSet(HC_3083, pattern = "^RPL|^RPS")
128 | HC_3037[["percent.ribo"]] <- PercentageFeatureSet(HC_3037, pattern = "^RPL|^RPS")
129 | HC_3034[["percent.ribo"]] <- PercentageFeatureSet(HC_3034, pattern = "^RPL|^RPS")
130 | 
131 | 
132 | alldata_202002_integration_list <- list(HC_3296, HC_3030, HC_3083, HC_3034, HC_3037,HC_2, HC_6,HC_9, epi_healthy_sub,imm_healthy_sub,fib_healthy_sub)
133 | 
134 | for (i in 1:length(alldata_202002_integration_list)){
135 | alldata_202002_integration_list[[i]] <- SCTransform(alldata_202002_integration_list[[i]],vars.to.regress = c("percent.mt", "percent.ribo"), verbose = FALSE)
136 | }
137 | 
138 | print("loaded")
139 | features <- SelectIntegrationFeatures(object.list = alldata_202002_integration_list, nfeatures = 3000)
140 | print("features_selected")
141 | alldata_202002_integration_list <- PrepSCTIntegration(object.list = alldata_202002_integration_list, anchor.features = features)
142 | print("integration_prepped")
143 | anchors <- FindIntegrationAnchors(object.list = alldata_202002_integration_list, anchor.features = features, normalization.method = "SCT")
144 | print("achors_found")
145 | rm(alldata_202002_integration_list)
146 | alldata.integrated <- IntegrateData(anchorset = anchors, normalization.method = "SCT")
147 | print("integrated")
148 | rm(anchors)
149 | alldata.integrated <- RunPCA(alldata.integrated, verbose = FALSE)
150 | print("pca2_ran")
151 | alldata.integrated <- RunUMAP(alldata.integrated, dims = 1:30)
152 | print("umap_ran")
153 | alldata.integrated <- FindNeighbors(alldata.integrated, dims = 1:30)
154 | alldata.integrated <- FindClusters(alldata.integrated, resolution = 0.5)
155 | 
156 | DefaultAssay(alldata.integrated) <- "RNA"
157 | alldata.integrated <- NormalizeData(alldata.integrated, normalization.method = "LogNormalize", scale.factor = 10000, verbose = FALSE)
158 | 
159 | 
160 | for (i in 1:nrow(methodspaper@meta.data)){ 
161 |   if (methodspaper@meta.data[i,21] == "unassigned"){
162 |     methodspaper@meta.data[i,23]<-methodspaper@meta.data[i,22] 
163 |   } else {
164 |     methodspaper@meta.data[i,23]<-methodspaper@meta.data[i,21]
165 |   }
166 | }
167 | 
168 | 


--------------------------------------------------------------------------------
/PSC/riskgene_expression_analysis.R:
--------------------------------------------------------------------------------
  1 | #analysis UC and PSC risk/implicated genes and their differential expression in different health statuses
  2 | 
  3 | data<- readRDS("~/PSC_processed.rds")
  4 | DefaultAssay(data) = "RNA"
  5 | UC_risk<-read.csv("~/ucputativeriskgenes.csv", sep=";")
  6 | genes<-read.csv("~/genes_junctions.csv", sep=";")
  7 | PSC_risk<-genes[genes$function. == "PSC_and_suggestive_risk_genes",]
  8 | 
  9 | #identifyng risk gene expression
 10 | Idents(data) <- "celltypes"
 11 | celltypes <- levels(data$celltypes)
 12 | table(data$celltypes, data$state)
 13 | celltypes <- celltypes[-c(2,3,4,6,7,8,9,10,13,14,15,18,19,20,21,22,24,25,26,32,35,39,40,41,42)] #filter out celltypes with too few (<100) cells per group
 14 | DefaultAssay(data)<-"RNA"
 15 | 
 16 | #####
 17 | allgenes <- data.frame(matrix(ncol = 2, nrow = 33538))
 18 | colnames(allgenes)[1]<-"Gene"
 19 | allgenes$Gene<-rownames(data@assays$RNA)
 20 | PSC_risk_in_set<-merge(allgenes, PSC_risk, by="Gene")
 21 | UC_risk_in_set<-merge(allgenes, UC_risk, by="Gene")
 22 | write.csv(UC_risk_in_set, "~/ucputativeriskgenes_smillie_inset.csv")
 23 | write.csv(PSC_risk_in_set, "~/PSCgenes_inset.csv")
 24 | 
 25 | PSC_risk<-read.csv("~/PSC_genes.csv", sep=";")
 26 | colnames(PSC_risk)[1]<-"Gene"
 27 | 
 28 | DEgenes <- FindMarkers(data, subset.ident = celltypes[1], group.by = "state", test.use = "MAST", ident.1 = "PSC-I", ident.2 = "HC-NI")
 29 | DEgenes$celltype <- celltypes[1]
 30 | DEgenes$Gene <- rownames(DEgenes)
 31 | PSC <- merge(DEgenes, PSC_risk, by = "Gene")
 32 | 
 33 | for(i in 2:17){
 34 |   DEgenes <- FindMarkers(data, subset.ident = celltypes[i], group.by = "state", test.use = "MAST", ident.1 = "PSC-I", ident.2 = "HC-NI")
 35 |   DEgenes$celltype <- celltypes[i]
 36 |   DEgenes$Gene <- rownames(DEgenes)
 37 |   DEgenes1 <- merge(DEgenes, PSC_risk, by = "Gene")
 38 |   PSC <- rbind(PSC, DEgenes1)
 39 | }
 40 | 
 41 | PSC_I<-PSC
 42 | 
 43 | DEgenes <- FindMarkers(data, subset.ident = celltypes[1], group.by = "state", test.use = "MAST", ident.1 = "PSC-NI", ident.2 = "HC-NI")
 44 | DEgenes$celltype <- celltypes[1]
 45 | DEgenes$Gene <- rownames(DEgenes)
 46 | PSC <- merge(DEgenes, PSC_risk, by = "Gene")
 47 | 
 48 | for(i in 2:17){
 49 |   DEgenes <- FindMarkers(data, subset.ident = celltypes[i], group.by = "state", test.use = "MAST", ident.1 = "PSC-NI", ident.2 = "HC-NI")
 50 |   DEgenes$celltype <- celltypes[i]
 51 |   DEgenes$Gene <- rownames(DEgenes)
 52 |   DEgenes1 <- merge(DEgenes, PSC_risk, by = "Gene")
 53 |   PSC <- rbind(PSC, DEgenes1)
 54 | }
 55 | 
 56 | PSC_NI<-PSC
 57 | 
 58 | x<-merge(PSC_risk, allgenes, by="Gene", all=F) # 71 PSC risk genes in set 
 59 | PSC_I$p_val_adj<-PSC_I$p_val*71
 60 | PSC_NI$p_val_adj<-PSC_NI$p_val*71
 61 | 
 62 | write.csv(PSC_I, "~/PSC_I_bonferroni_allrisk_genes_celltypes.csv")
 63 | write.csv(PSC_NI, "~/PSC_NI_onferroni_allrisk_genes_celltypes.csv")
 64 | 
 65 | UC <- data.frame(matrix(ncol = 6, nrow = 0))
 66 | colnames(UC) <- c("p_val", "avg_logFC","pct.1","pct2","p_val_adj","celltype")
 67 | for(i in 1:17){
 68 |   DEgenes <- FindMarkers(data, subset.ident = celltypes[i], group.by = "state", test.use = "MAST", ident.1 = "UC-NI", ident.2 = "HC-NI")
 69 |   DEgenes$celltype <- celltypes[i]
 70 |   DEgenes$Gene <- rownames(DEgenes)
 71 |   DEgenes <- merge(DEgenes, UC_risk, by = "Gene")
 72 |   UC <- rbind(UC, DEgenes)
 73 | }
 74 | 
 75 | UC_NI<-UC
 76 | 
 77 | UC <- data.frame(matrix(ncol = 6, nrow = 0))
 78 | colnames(UC) <- c("p_val", "avg_logFC","pct.1","pct2","p_val_adj","celltype")
 79 | for(i in 1:17){
 80 |   DEgenes <- FindMarkers(data, subset.ident = celltypes[i], group.by = "state", test.use = "MAST", ident.1 = "UC-I", ident.2 = "HC-NI")
 81 |   DEgenes$celltype <- celltypes[i]
 82 |   DEgenes$Gene <- rownames(DEgenes)
 83 |   DEgenes <- merge(DEgenes, UC_risk, by = "Gene")
 84 |   UC <- rbind(UC, DEgenes)
 85 | }
 86 | 
 87 | UC_I<-UC
 88 | UC_significance_level_bonferroni<-0.05/54 #54 UCrisk genes in total in set
 89 | 
 90 | UC_I$p_val_adj<-UC_I$p_val*54
 91 | UC_NI$p_val_adj<-UC_NI$p_val*54
 92 | 
 93 | write.csv(UC_I, "~/UC_I_bonferroni_risk_genes_celltypes.csv")
 94 | write.csv(UC_NI, "~/UC_NI_bonferroni_risk_genes_celltypes.csv")
 95 | 
 96 | # plot
 97 | library(ggplot2)
 98 | PSC_I <- filter(PSC_I, PSC_I$p_val_adj <= 0.05)
 99 | UC_I <- filter(UC_I, UC_I$p_val_adj <= 0.05)
100 | UC_NI <- filter(UC_NI, UC_NI$p_val_adj <=0.05)
101 | PSC_NI <- filter(PSC_NI, PSC_NI$p_val_adj <=0.05)
102 | 
103 | UC_I$Adj_p_value <- ifelse(UC_I$p_val_adj <= 0.001, "< 0.001", ifelse(UC_I$p_val_adj <= 0.01, "< 0.01", "< 0.05"))
104 | UC_I$Adj_p_value <- factor(UC_I$Adj_p_value, levels = c("< 0.05", "< 0.01","< 0.001"))
105 | UC_NI$Adj_p_value <- ifelse(UC_NI$p_val_adj <= 0.001, "< 0.001", ifelse(UC_NI$p_val_adj <= 0.01, "< 0.01", "< 0.05"))
106 | UC_NI$Adj_p_value <- factor(UC_NI$Adj_p_value, levels = c("< 0.05", "< 0.01","< 0.001"))
107 | PSC_I$Adj_p_value <- ifelse(PSC_I$p_val_adj <= 0.001, "< 0.001", ifelse(PSC_I$p_val_adj <= 0.01, "< 0.01", "< 0.05"))
108 | PSC_I$Adj_p_value <- factor(PSC_I$Adj_p_value, levels = c("< 0.05", "< 0.01","< 0.001"))
109 | PSC_NI$Adj_p_value <- ifelse(PSC_NI$p_val_adj <= 0.001, "< 0.001", ifelse(PSC_NI$p_val_adj <= 0.01, "< 0.01", "< 0.05"))
110 | PSC_NI$Adj_p_value <- factor(PSC_NI$Adj_p_value, levels = c("< 0.05", "< 0.01","< 0.001"))
111 | UC_I$direction <- ifelse(UC_I$avg_logFC > 0, "up", "down")
112 | UC_NI$direction <- ifelse(UC_NI$avg_logFC > 0, "up", "down")
113 | PSC_I$direction <- ifelse(PSC_I$avg_logFC > 0, "up", "down")
114 | PSC_NI$direction <- ifelse(PSC_NI$avg_logFC > 0, "up", "down")
115 | 
116 | ggplot(PSC_I, aes(x = Gene, y = celltype, color = direction)) + 
117 |   geom_point(aes(alpha = Adj_p_value), size = 5) +
118 |   scale_alpha_manual(values =c(0.2, 0.5, 1)) +
119 |   scale_color_manual(values = c("#00AFBB", "#FC4E07")) +
120 |   theme_bw() +
121 |   labs(title =  "PSC risk genes in PSC_I vs HC")
122 | 
123 | ggsave("Results/Figures/Riskgenes_PSCI.pdf", height = 7)
124 | 
125 | ggplot(PSC_NI, aes(x = Gene, y = celltype, color = direction)) + 
126 |   geom_point(aes(alpha = Adj_p_value), size = 5) +
127 |   scale_alpha_manual(values =c(1)) +
128 |   scale_color_manual(values = c("#00AFBB", "#FC4E07")) +
129 |   theme_bw() +
130 |   labs(title =  "PSC risk genes in PSC_NI vs HC")
131 | 
132 | ggsave("Results/Figures/Riskgenes_PSCNI.pdf", height = 7)
133 | 
134 | ggplot(UC_I, aes(x = Gene, y = celltype, color = direction)) + 
135 |   geom_point(aes(alpha = Adj_p_value), size = 5) +
136 |   scale_alpha_manual(values =c(0.2, 0.5, 1)) +
137 |   scale_color_manual(values = c("#00AFBB", "#FC4E07")) +
138 |   theme_bw()+
139 |   labs(title =  "UC risk genes in UC_I vs HC")
140 | 
141 | ggsave("Results/Figures/Riskgenes_UCI.pdf", height = 7)
142 | 
143 | ggplot(UC_NI, aes(x = Gene, y = celltype, color = direction)) + 
144 |   geom_point(aes(alpha = Adj_p_value), size = 5) +
145 |   scale_alpha_manual(values =c(0.5, 1)) +
146 |   scale_color_manual(values = c("#00AFBB", "#FC4E07")) +
147 |   theme_bw()+
148 |   labs(title =  "UC risk genes in UC_NI vs HC")
149 | 
150 | ggsave("Results/Figures/Riskgenes_UCNI.pdf", height = 7)
151 | 
152 | # old
153 | library(viridis)
154 | library(ggplot2)
155 | 
156 | ggplot(PSC_NI, aes(x=celltype, y = Gene, color = avg_logFC , size = -log(p_val))) + 
157 |   geom_point()  + scale_color_viridis(option = "D") +theme(title = element_text()) +
158 |   labs(title =  "PSC risk genes in PSC_NI vs HC") # saved PSC_riskgenes_PSC_NI_vs_HC
159 | 
160 | ggplot(PSC_I, aes(x=celltype, y = Gene, color = avg_logFC , size = -log(p_val))) + 
161 |   geom_point()  + scale_color_viridis(option = "D") +theme(title = element_text()) +
162 |   labs(title =  "PSC risk genes in PSC_I vs HC") # saved PSC_riskgenes_PSC_I_vs_HC
163 | 
164 | ggplot(UC_NI, aes(x=celltype, y = Gene, color = avg_logFC , size = -log(p_val))) + 
165 |   geom_point()  + scale_color_viridis(option = "D") +theme(title = element_text()) +
166 |   labs(title =  "UC risk genes in UC_NI vs HC") # saved UC_riskgenes_UC_NI_vs_HC
167 | 
168 | ggplot(UC_I, aes(x=celltype, y = Gene, color = avg_logFC , size = -log(p_val))) + 
169 |   geom_point()  + scale_color_viridis(option = "D") +theme(title = element_text()) +
170 |   labs(title =  "UC risk genes in UC_I vs HC") # saved UC_riskgenes_UC_I_vs_HC
171 | 
172 | 


--------------------------------------------------------------------------------
/DDTx/filtering_and_QC.R:
--------------------------------------------------------------------------------
 1 | library(Seurat)
 2 | 
 3 | #open Seuratfile
 4 | ddtx<-readRDS("/source/ddtx_merged_demultiplexed_clustered_compartment_azi_elmentaiteadultileum_demuxlet.rds")
 5 | dim(ddtx)
 6 | 
 7 | # filter 60% mito
 8 | data<-subset(ddtx, subset=percent.mt < 60)
 9 | dim(data)
10 | # reduction of 90553 to 88167 cells
11 | 
12 | # exclude T8. patient 3 (wrongly sampled, only recipient cells)
13 | data@meta.data$pt_project_lane<-paste(data@meta.data$lane,data@meta.data$donor_final, sep="_")
14 | data<-subset(data, subset=pt_project_lane!="220504_lane04_UMCGDDtx00005r")
15 | dim(data)
16 | # reduction to 85750 cells
17 | 
18 | # exclude epithelial cells from recipients: this is spillover
19 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Enterocyte"),"compartment_final"]<-"Epithelial"
20 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Paneth"),"compartment_final"]<-"Epithelial"
21 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="TA"),"compartment_final"]<-"Epithelial"
22 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Tuft"),"compartment_final"]<-"Epithelial"
23 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Stem cells"),"compartment_final"]<-"Epithelial"
24 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="BEST4+ epithelial"),"compartment_final"]<-"Epithelial"
25 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Goblet cell"),"compartment_final"]<-"Epithelial"
26 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="I cells (CCK+)"),"compartment_final"]<-"Epithelial"
27 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="K cells (GIP+)"),"compartment_final"]<-"Epithelial"
28 | 
29 | 
30 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Activated CD8 T"),"compartment_final"]<-"Immune"
31 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Activated CD4 T"),"compartment_final"]<-"Immune"
32 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Memory B"),"compartment_final"]<-"Immune"
33 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="IgA plasma cell"),"compartment_final"]<-"Immune"
34 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="cDC2"),"compartment_final"]<-"Immune"
35 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="LYVE1+ Macrophage"),"compartment_final"]<-"Immune"
36 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Macrophages"),"compartment_final"]<-"Immune"
37 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="IgG plasma cell"),"compartment_final"]<-"Immune"
38 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Monocytes"),"compartment_final"]<-"Immune"
39 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="TRGV2 gdT"),"compartment_final"]<-"Immune"
40 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Mast cell"),"compartment_final"]<-"Immune"
41 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Cycling B cell"),"compartment_final"]<-"Immune"
42 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Naive B"),"compartment_final"]<-"Immune"
43 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="ILC3"),"compartment_final"]<-"Immune"
44 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="cDC1"),"compartment_final"]<-"Immune"
45 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="TRGV5/7 gdT"),"compartment_final"]<-"Immune"
46 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="gdT"),"compartment_final"]<-"Immune"
47 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="CX3CR1+ CD8 Tmem"),"compartment_final"]<-"Immune"
48 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="TRGV4 gdT"),"compartment_final"]<-"Immune"
49 | 
50 | 
51 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Stromal 1 (ADAMDEC1+)"),"compartment_final"]<-"Stromal"
52 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="arterial capillary"),"compartment_final"]<-"Stromal"
53 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Stromal 1 (CCL11+)"),"compartment_final"]<-"Stromal"
54 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Stromal 2 (NPY+)"),"compartment_final"]<-"Stromal"
55 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Mature venous EC"),"compartment_final"]<-"Stromal"
56 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Contractile pericyte (PLN+)"),"compartment_final"]<-"Stromal"
57 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Adult Glia"),"compartment_final"]<-"Stromal"
58 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Mature arterial EC"),"compartment_final"]<-"Stromal"
59 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Transitional Stromal 3 (C3+)"),"compartment_final"]<-"Stromal"
60 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="LEC3 (ADGRG3+)"),"compartment_final"]<-"Stromal"
61 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="myofibroblast (RSPO2+)"),"compartment_final"]<-"Stromal"
62 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="LEC1 (ACKR4+)"),"compartment_final"]<-"Stromal"
63 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Stromal 3 (C7+)"),"compartment_final"]<-"Stromal"
64 | 
65 | data@meta.data$patient_compartment <- paste(data@meta.data$donor_final,data@meta.data$compartment_final, sep="_")
66 | 
67 | 
68 | data@meta.data$major_celltype<- data@meta.data$compartment_final
69 | 
70 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Stromal 1 (ADAMDEC1+)"),"major_celltype"]<-"Endothelial"
71 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="arterial capillary"),"major_celltype"]<-"Endothelial"
72 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Stromal 1 (CCL11+)"),"major_celltype"]<-"Endothelial"
73 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Stromal 2 (NPY+)"),"major_celltype"]<-"Endothelial"
74 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Mature venous EC"),"major_celltype"]<-"Endothelial"
75 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Contractile pericyte (PLN+)"),"major_celltype"]<-"Endothelial"
76 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Adult Glia"),"major_celltype"]<-"Endothelial"
77 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Mature arterial EC"),"major_celltype"]<-"Endothelial"
78 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Transitional Stromal 3 (C3+)"),"major_celltype"]<-"Endothelial"
79 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="LEC3 (ADGRG3+)"),"major_celltype"]<-"Endothelial"
80 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="LEC1 (ACKR4+)"),"major_celltype"]<-"Endothelial"
81 | data@meta.data[which(data@meta.data$predicted.celltype.elmentaiteadultileum=="Stromal 3 (C7+)"),"major_celltype"]<-"Endothelial"
82 | 
83 | 
84 | 
85 | 
86 | # subset the data
87 | unique(data@meta.data$patient_compartment)
88 | data_2 <- subset(data, patient_compartment != "UMCGDDtx00003r_Epithelial" & patient_compartment != "UMCGDDtx00004r_Epithelial" & patient_compartment !="UMCGDDtx00005r_Epithelial")
89 | unique(data_2@meta.data$patient_compartment)
90 | dim(data_2)
91 | #reduction to 85554 cells
92 | 
93 | # save
94 | saveRDS(data_2, "/source/ddtx_merged_demultiplexed_clustered_compartment_azi_elmentaiteadultileum_below60pctmito_withoutEpithelialR_demuxlet.rds")
95 | 


--------------------------------------------------------------------------------
/PSC/Celltyping_202103:
--------------------------------------------------------------------------------
  1 | library(dplyr)
  2 | library(Seurat)
  3 | library(patchwork)
  4 | library(ggplot2)
  5 | library(readr)
  6 | library(tidyr)
  7 | library(MAST)
  8 | library(readxl)
  9 | library(openxlsx)
 10 | 
 11 | data<- readRDS("PSC_202002_integrated_v2_noribo.rds")
 12 | data=UpdateSeuratObject(data)
 13 | DimPlot(data, label = T) + NoLegend()
 14 | 
 15 | data@meta.data$Final_HTO[data@meta.data$Final_HTO == "PSC-I-X019"]<-"PSC-NI-XXXX"
 16 | data@meta.data$Final_HTO[data@meta.data$Final_HTO == "PSC-NI-X019"]<-"PSC-I-XXXX"
 17 | table(data@meta.data$Final_HTO)
 18 | data@meta.data$disease = sapply(strsplit(data$Final_HTO,"-"), `[`, 1)
 19 | data@meta.data$inflammation = sapply(strsplit(data$Final_HTO,"-"), `[`, 2)
 20 | data@meta.data$sample = sapply(strsplit(data$Final_HTO,"-"), `[`, 3)
 21 | data@meta.data$state <- paste(data$disease, data$inflammation, sep='-')
 22 | table(data@meta.data$sample)
 23 | data = subset(data, subset = sample != "X296")
 24 | table(data@meta.data$sample)
 25 | 
 26 | # recluster subset
 27 | # epi = average expression EPCAM > 5
 28 | epi<-subset(data, idents = c("6","7","17","19","0","14","10","3","21"))
 29 | DefaultAssay(epi)<-"integrated"
 30 | epi<- RunPCA(epi)
 31 | epi <- RunUMAP(epi, dims = 1:30)
 32 | epi<-FindNeighbors(epi, dims = 1:30)
 33 | epi<-FindClusters(epi, resolution = 0.4)
 34 | DimPlot(epi, label=T)
 35 | Idents(epi)<-"integrated_snn_res.0.4"
 36 | DefaultAssay(epi)<-"RNA"
 37 | #markers_epi <- FindAllMarkers(epi, only.pos = TRUE, min.pct = 0.25)
 38 | #selected_markers_epi <- markers_epi %>% group_by(cluster) %>% top_n(n = 3, wt = avg_logFC)
 39 | # stem (LEFTY, LGR5, ASCL2); enteroendocrine (ASCL2, CHGA, CCL7), Tuft (TRPM5, HCK); Goblet (MUC2, TFF1); DUOX (MUC2lo, TFF1lo, DUOX2); BEST4 (BEST4); Absorptive_enterocyte (SLC26A3hi, RBP, CA1, SLC26A2); Immature_enterocyte (SLC26A3, RBP, CA1, SLC26A2, FABP1); Cycling_TA (CDC25C); RIBO_TA (LEFTY); Immature_Goblet (MUC2, TFF1lo) 
 40 | new.ident=c("Immature_enterocyte", "Cycling_TA", "Absorptive_enterocyte", "Ribo_TA","Stem", "MT-Hi-enterocyte", "BEST4_enterocyte", "Immature_goblet", "DUOX2_enterocyte", "PLCG2_TA", "Absorptive_TA", "Tuft", "REG_TA","Enteroendocrine", "Goblet", "MAST")
 41 | names(new.ident) <- levels(epi)
 42 | epi=RenameIdents(epi,new.ident)
 43 | DimPlot(epi, label=T)
 44 | epi[["celltypes"]] <- Idents(object = epi)
 45 | meta_epi<-epi@meta.data
 46 | write.csv(meta_epi, "meta_epi.csv")
 47 | 
 48 | # leuko = average expression PTPRC (CD45) > 5
 49 | leuko<-subset(data, idents = c("2","11","18","24","27","15","4","26"))
 50 | DefaultAssay(leuko)<-"integrated"
 51 | leuko<- RunPCA(leuko)
 52 | leuko <- RunUMAP(leuko, dims = 1:30)
 53 | leuko<-FindNeighbors(leuko, dims = 1:30)
 54 | leuko<-FindClusters(leuko, resolution = 0.8)
 55 | Idents(leuko)<-"integrated_snn_res.0.8"
 56 | DimPlot(leuko, label=T)
 57 | DefaultAssay(leuko)<-"RNA"
 58 | #markers_leuko <- FindAllMarkers(leuko, only.pos = TRUE, min.pct = 0.25)
 59 | #selected_markers_leuko <- markers_leuko %>% group_by(cluster) %>% top_n(n = 3, wt = avg_logFC)
 60 | # all B cells (CD19 and/or CD20), all T cells (CD3E, CD3D), non-T non-B is APC; Activated_B (CD74, CD83, FCRL5); MAIT (CCR7-, SELLlo, IL7R, CD4lo, KLRB1, CD44); Treg (TNFRSF4, CD4); Memory_CD4T (SELL, CCR7, CD4); Mt (mito); CD8pos (GNLY+/-, CD8A, NKG7) DC (CD1C, ITGAx, HLADRB1, STAT1); ; Cycling_T (TFRC+, CCL5, CD4lo, CD3); Mature_B (IGHD+/-, SELL, CD24, CD83+/-); Inflammatory_monocytes (OSM, S100A8, S100A9, ITGAX); Memory_B (CXCR5, TNFRSF13B, IGHD, POUZAF1lo); Cycling_B (CD74,AICDA, POUZAF1, ASF1B, CLSPN); Macrophages (CD14, HLADRB1); Cycling_T (CLSPN, GNLY, CD4)
 61 | new.idents<-c("Mature_B", "MAIT", "Treg", "Memory_CD4T", "Mt","CD8T", "DC", "CD8T", "Cycling_T", "Mature_B", "Inflammatory_monocyte", "Mature_B", "Doublets", "Activated_B", "Mt", "Activated_cycling_B", "Macrophage", "Cycling_T", "Doublets")
 62 | names(new.idents) <- levels(leuko)
 63 | leuko=RenameIdents(leuko,new.idents)
 64 | DimPlot(leuko, label=T)
 65 | leuko[["celltypes"]] <- Idents(object = leuko)
 66 | 
 67 | meta_leuko<-leuko@meta.data
 68 | write.csv(meta_leuko, "meta_leuko.csv")
 69 | 
 70 | # stromal =  average expression THY1 > 0.4 (fibroblast), SOX10 > 2 (glia), MADCAM1 > 1 (endotheel)
 71 | stromal<-subset(data, idents = c("9", "13", "20", "22", "25"))
 72 | DefaultAssay(stromal)<-"integrated"
 73 | stromal<- RunPCA(stromal)
 74 | stromal <- RunUMAP(stromal, dims = 1:30)
 75 | stromal <-FindNeighbors(stromal, dims = 1:30)
 76 | stromal <-FindClusters(stromal, resolution = 0.4)
 77 | DimPlot(stromal, label=T)
 78 | Idents(stromal)<-"integrated_snn_res.0.4"
 79 | DefaultAssay(stromal)<-"RNA"
 80 | #markers_stromal <- FindAllMarkers(stromal, only.pos = TRUE)
 81 | #fibroblasts (COL1A1, PDGFRA, THY1), endothelium (RBP7, CD34, PLVAP, CLDN5). inflammatory_fibroblast (IGFBP7, CHI3L1, NNMT); Pericytes (RGS5, COX4I2, HIGD1B); Myofibroblast (ACTA2, CNN1)
 82 | new.idents<-c("WNT5B_fibroblast", "Inflammatory_fibroblast", "RSPO3_fibroblast", "WNT2B_fibroblast", "Endothelial", "Endothelial", "Mt-Hi_stromal", "Pericytes", "Glia", "Myofibroblasts", "Endothelial", "Endothelial")
 83 | names(new.idents) <- levels(stromal)
 84 | stromal=RenameIdents(stromal,new.idents)
 85 | DimPlot(stromal, label=T)
 86 | stromal[["celltypes"]] <- Idents(object = stromal)
 87 | DimPlot(stromal)
 88 | meta_stromal<-stromal@meta.data
 89 | write.csv(meta_stromal, "meta_stromal.csv")
 90 | 
 91 | # plasma = remaining cells
 92 | plasma<-subset(data, idents = c("1", "16", "12", "23", "5", "8"))
 93 | DefaultAssay(plasma)<-"integrated"
 94 | plasma<- RunPCA(plasma)
 95 | plasma <- RunUMAP(plasma, dims = 1:30)
 96 | plasma <-FindNeighbors(plasma, dims = 1:30)
 97 | plasma <-FindClusters(plasma, resolution = 0.25)
 98 | Idents(plasma)<-"integrated_snn_res.0.25"
 99 | DimPlot(plasma, label=T)
100 | DefaultAssay(plasma)<-"RNA"
101 | #markers_plasma <- FindAllMarkers(plasma, only.pos = TRUE)
102 | new.idents<-c("IgA_plasma", "IgA_plasma", "IgG_plasma", "IgA_plasma", "IgG_plasma", "MT-Hi_plasma", "IgM_plasma", "IgA_plasma", "IgM_plasma")
103 | names(new.idents) <- levels(plasma)
104 | plasma=RenameIdents(plasma,new.idents)
105 | DimPlot(plasma, label=T)
106 | plasma[["celltypes"]] <- Idents(object = plasma)
107 | DimPlot(plasma)
108 | meta_plasma<-plasma@meta.data
109 | write.csv(meta_plasma, "meta_plasma.csv")
110 | 
111 | # create meta_all
112 | colnames(meta_stromal)
113 | colnames(meta_leuko)
114 | colnames(meta_plasma)
115 | colnames(meta_epi)
116 | meta_plasma<-meta_plasma[c(1,29)]
117 | meta_leuko<-meta_leuko[c(1,29)]
118 | meta_stromal<-meta_stromal[c(1,29)]
119 | meta_epi <- meta_epi[c(1,29)]
120 | x<-rbind(meta_stromal, meta_leuko)
121 | x<-rbind(x, meta_plasma)
122 | x<-rbind(x, meta_epi)
123 | write.csv(x, "Data/meta_all2.csv")
124 | 
125 | # add celltypes
126 | x$NAME<-rownames(x)
127 | x<-x[,c(2,3)]
128 | row.names(x)<-NULL
129 | CellsMeta<-data.frame(data@meta.data)
130 | CellsMeta$NAME<-row.names(CellsMeta)
131 | row.names(CellsMeta)=NULL
132 | keeping.order <- function(data, fn, ...) { 
133 |   col <- ".sortColumn"
134 |   data[,col] <- 1:nrow(data) 
135 |   out <- fn(data, ...) 
136 |   if (!col %in% colnames(out)) stop("Ordering column not preserved by function") 
137 |   out <- out[order(out[,col]),] 
138 |   out[,col] <- NULL 
139 |   out 
140 | } 
141 | CellsMeta<-keeping.order(CellsMeta, merge, y=x, by = "NAME")
142 | CellsMeta<-CellsMeta[,c(1,29)]
143 | rownames(CellsMeta)<-CellsMeta$NAME
144 | data<-AddMetaData(data, CellsMeta)
145 | Idents(data)=data$celltypes
146 | 
147 | 
148 | DimPlot(data, label = T, repel = T)
149 | CellsMeta<-data.frame(data@meta.data)
150 | CellsMeta$NAME<-row.names(CellsMeta)
151 | row.names(CellsMeta)=NULL
152 | 
153 | severity<-read.csv("severity_patients_3_2021.csv", sep=";")
154 | severity<-severity[-6,]
155 | severity$Final_HTO<-droplevels(severity$Final_HTO)
156 | dim(CellsMeta)
157 | CellsMeta<-merge(CellsMeta, severity, by="Final_HTO")
158 | dim(CellsMeta)
159 | 
160 | CellsMeta<-CellsMeta[,c(28,30,31)]
161 | rownames(CellsMeta)<-CellsMeta$NAME
162 | dim(data)
163 | data<-AddMetaData(data, CellsMeta)
164 | dim(data)
165 | Idents(data)=data$celltypes
166 | 
167 | data = subset(data, subset = celltypes != "Mt")
168 | data = subset(data, subset = celltypes != "MT_Hi_T")
169 | data = subset(data, subset = celltypes != "Doublets")
170 | data = subset(data, subset = celltypes != "MT_Hi_B")
171 | data = subset(data, subset = celltypes != "MT-Hi_plasma")
172 | data = subset(data, subset = celltypes != "Mt-Hi_stromal")
173 | data = subset(data, subset = celltypes != "MT-Hi-enterocyte")
174 | 
175 | Idents(data)=data$celltypes
176 | DimPlot(data, label = T, repel = T)
177 | 
178 | DefaultAssay(data)<-"integrated"
179 | data <- RunUMAP(data, dims = 1:30)
180 | data <-FindNeighbors(data, dims = 1:30)
181 | data<-FindClusters(data, resolution = 0.4)
182 | dim(data)
183 | Idents(data)=data$celltypes
184 | DimPlot(data, label = T, repel = T, order = c("MAIT","WNT2B+", "Absorptive_enterocyte", "IgA_plasma", "WNT5B+", "Aborptive_TA")) + NoLegend()
185 | 
186 | ggsave("dimplot_data2march2021.pdf", height=15, width=15)
187 | 
188 | saveRDS(data, "Data/PSC_processed_march16_2021.rds")
189 | 
190 | # create figures and proceed to DE analysis
191 | 


--------------------------------------------------------------------------------
/PSC/DE_analysis (PSC_I and UC_I per cell type)_01062022.R:
--------------------------------------------------------------------------------
  1 | #######################################
  2 | # Generate DE list per cell type      #
  3 | # between PSC-I and UC-I              #
  4 | #######################################
  5 | 
  6 | 
  7 | 
  8 | #######################################
  9 | # library                             #
 10 | #######################################
 11 | 
 12 | library(dplyr)
 13 | library(Seurat)
 14 | library(patchwork)
 15 | library(ggplot2)
 16 | library(readr)
 17 | library(tidyr)
 18 | library(MAST)
 19 | library(readxl)
 20 | library(openxlsx)
 21 | library(enrichR)
 22 | 
 23 | 
 24 | #######################################
 25 | # function                            #
 26 | #######################################
 27 | 
 28 | DE_list <- function(celltype, cell_type, path1, path2){
 29 |   celltype <- FindMarkers(PSC, subset.ident = cell_type, group.by = "state", test.use = "MAST", ident.1 = "PSC-I", ident.2 = "UC-I")
 30 |   celltype <- filter(celltype, celltype$p_val_adj < 0.05)
 31 |   # create a 'state' column
 32 |   celltype$state = NA
 33 |   celltype = as.data.frame(celltype)
 34 |   celltype$state = ifelse(celltype$avg_log2FC > 0,"PSC_I","UC_I")
 35 |   # create a 'gene' column
 36 |   celltype$gene = NA
 37 |   celltype$gene = rownames(celltype)
 38 |   # separate PSC_I up gene list and UC_I up gene list
 39 |   PSC_I_up_celltype = filter(celltype, celltype$state == "PSC_I")  
 40 |   UC_I_up_celltype = filter(celltype, celltype$state == "PSC_I")
 41 |   write.csv(PSC_I_up_celltype, path1)
 42 |   write.csv(UC_I_up_celltype, path2)
 43 | }
 44 | 
 45 | 
 46 | #########################
 47 | # main code             #
 48 | #########################
 49 | 
 50 | PSC <- readRDS("/Applications/Chapters/4 PSC_Werna/PSC_processed_march16_2021.rds")
 51 | 
 52 | # how many cell types in PSC dataset?
 53 | unique(PSC$celltypes)
 54 | 
 55 | #[1] CD8T                    Cycling_TA              IgA_plasma              Absorptive_enterocyte  
 56 | #[5] Immature_enterocyte     Glia                    IgG_plasma              MAIT                   
 57 | #[9] Myofibroblasts          DUOX2_enterocyte        Mature_B                PLCG2_TA               
 58 | #[13] REG_TA                  RSPO3_fibroblast        Stem                    IgM_plasma             
 59 | #[17] DC                      Treg                    Activated_B             Macrophage             
 60 | #[21] Tuft                    Enteroendocrine         Memory_CD4T             BEST4_enterocyte       
 61 | #[25] Endothelial             WNT5B_fibroblast        Pericytes               Inflammatory_fibroblast
 62 | #[29] WNT2B_fibroblast        Activated_cycling_B     Inflammatory_monocyte   Absorptive_TA          
 63 | #[33] Ribo_TA                 Cycling_T               Goblet                  Immature_goblet        
 64 | #[37] MAST 
 65 | 
 66 | # PSC-I vs UC-I
 67 | Idents(PSC) <- "celltypes"
 68 | 
 69 | # Apply each cell type to function
 70 | 
 71 | CD8T <- DE_list(CD8T, "CD8T", "/Users/s.qs/Desktop/DE list/PSC_I_up_CD8T.csv", "/Users/s.qs/Desktop/DE list/UC_I_up_CD8T.csv")
 72 | Cycling_TA <- DE_list(Cycling_TA, "Cycling_TA", "/Users/s.qs/Desktop/DE list/PSC_I_up_Cycling_TA.csv", "/Users/s.qs/Desktop/DE list/UC_I_up_Cycling_TA.csv")
 73 | IgA_plasma <- DE_list(IgA_plasma, "IgA_plasma", "/Users/s.qs/Desktop/DE list/PSC_I_up_IgA_plasma.csv", "/Users/s.qs/Desktop/DE list/UC_I_up_IgA_plasma.csv")
 74 | Absorptive_enterocyte <- DE_list(Absorptive_enterocyte, "Absorptive_enterocyte", "/Users/s.qs/Desktop/DE list/PSC_I_up_Absorptive_enterocyte.csv", "/Users/s.qs/Desktop/DE list/UC_I_up_Absorptive_enterocyte.csv")
 75 | Immature_enterocyte <- DE_list(Immature_enterocyte, "Immature_enterocyte", "/Users/s.qs/Desktop/DE list/PSC_I_up_Immature_enterocyte.csv", "/Users/s.qs/Desktop/DE list/UC_I_up_Immature_enterocyte.csv")
 76 | Glia <- DE_list(Glia, "Glia", "/Users/s.qs/Desktop/DE list/PSC_I_up_Glia.csv", "/Users/s.qs/Desktop/DE list/UC_I_up_Glia.csv")
 77 | IgG_plasma <- DE_list(IgG_plasma, "IgG_plasma", "/Users/s.qs/Desktop/DE list/PSC_I_up_IgG_plasma.csv", "/Users/s.qs/Desktop/DE list/UC_I_up_IgG_plasma.csv")
 78 | MAIT <- DE_list(MAIT, "MAIT", "/Users/s.qs/Desktop/DE list/MAIT/PSC_I_up_MAIT.csv", "/Users/s.qs/Desktop/DE list/MAIT/UC_I_up_MAIT.csv")
 79 | Myofibroblasts <- DE_list(Myofibroblasts, "Myofibroblasts", "/Users/s.qs/Desktop/DE list/Myofibroblasts/PSC_I_up_Myofibroblasts.csv", "/Users/s.qs/Desktop/DE list/Myofibroblasts/UC_I_up_Myofibroblasts.csv")
 80 | DUOX2_enterocyte <- DE_list(DUOX2_enterocyte, "DUOX2_enterocyte", "/Users/s.qs/Desktop/DE list/DUOX2_enterocyte/PSC_I_up_DUOX2_enterocyte.csv", "/Users/s.qs/Desktop/DE list/DUOX2_enterocyte/UC_I_up_DUOX2_enterocyte.csv")
 81 | Mature_B <- DE_list(Mature_B, "Mature_B", "/Users/s.qs/Desktop/DE list/Mature_B/PSC_I_up_Mature_B.csv", "/Users/s.qs/Desktop/DE list/Mature_B/UC_I_up_Mature_B.csv")
 82 | PLCG2_TA <- DE_list(PLCG2_TA, "PLCG2_TA", "/Users/s.qs/Desktop/DE list/PLCG2_TA/PSC_I_up_PLCG2_TA.csv", "/Users/s.qs/Desktop/DE list/PLCG2_TA/UC_I_up_PLCG2_TA.csv")
 83 | REG_TA <- DE_list(REG_TA, "REG_TA", "/Users/s.qs/Desktop/DE list/REG_TA/PSC_I_up_REG_TA.csv", "/Users/s.qs/Desktop/DE list/REG_TA/UC_I_up_REG_TA.csv")
 84 | RSPO3_fibroblast <- DE_list(RSPO3_fibroblast, "RSPO3_fibroblast", "/Users/s.qs/Desktop/DE list/RSPO3_fibroblast/PSC_I_up_RSPO3_fibroblast.csv", "/Users/s.qs/Desktop/DE list/RSPO3_fibroblast/UC_I_up_RSPO3_fibroblast.csv")
 85 | Stem <- DE_list(Stem, "Stem", "/Users/s.qs/Desktop/DE list/Stem/PSC_I_up_Stem.csv", "/Users/s.qs/Desktop/DE list/Stem/UC_I_up_Stem.csv")
 86 | IgM_plasma <- DE_list(IgM_plasma, "IgM_plasma", "/Users/s.qs/Desktop/DE list/IgM_plasma/PSC_I_up_IgM_plasma.csv", "/Users/s.qs/Desktop/DE list/IgM_plasma/UC_I_up_IgM_plasma.csv")
 87 | DC <- DE_list(DC, "DC", "/Users/s.qs/Desktop/DE list/DC/PSC_I_up_DC.csv", "/Users/s.qs/Desktop/DE list/DC/UC_I_up_DC.csv")
 88 | Treg <- DE_list(Treg, "Treg", "/Users/s.qs/Desktop/DE list/Treg/PSC_I_up_TregC.csv", "/Users/s.qs/Desktop/DE list/Treg/UC_I_up_Treg.csv")
 89 | Activated_B <- DE_list(Activated_B, "Activated_B", "/Users/s.qs/Desktop/DE list/Activated_B/PSC_I_up_Activated_B.csv", "/Users/s.qs/Desktop/DE list/Activated_B/UC_I_up_Activated_B.csv")
 90 | Macrophage <- DE_list(Macrophage, "Macrophage", "/Users/s.qs/Desktop/DE list/Macrophage/PSC_I_up_Macrophage.csv", "/Users/s.qs/Desktop/DE list/Macrophage/UC_I_up_Macrophage.csv")
 91 | Tuft <- DE_list(Tuft, "Tuft", "/Users/s.qs/Desktop/DE list/Tuft/PSC_I_up_Tuft.csv", "/Users/s.qs/Desktop/DE list/Tuft/UC_I_up_Tuft.csv")
 92 | Enteroendocrine <- DE_list(Enteroendocrine, "Enteroendocrine", "/Users/s.qs/Desktop/DE list/Enteroendocrine/PSC_I_up_Enteroendocrine.csv", "/Users/s.qs/Desktop/DE list/Enteroendocrine/UC_I_up_Enteroendocrine.csv")
 93 | Memory_CD4T <- DE_list(Memory_CD4T, "Memory_CD4T", "/Users/s.qs/Desktop/DE list/Memory_CD4T/PSC_I_up_Memory_CD4T.csv", "/Users/s.qs/Desktop/DE list/Memory_CD4T/UC_I_up_Memory_CD4T.csv")
 94 | BEST4_enterocyte <- DE_list(BEST4_enterocyte, "BEST4_enterocyte", "/Users/s.qs/Desktop/DE list/BEST4_enterocyte/PSC_I_up_BEST4_enterocyte.csv", "/Users/s.qs/Desktop/DE list/BEST4_enterocyte/UC_I_up_BEST4_enterocyte.csv")
 95 | Endothelial <- DE_list(Endothelial, "Endothelial", "/Users/s.qs/Desktop/DE list/Endothelial/PSC_I_up_Endothelial.csv", "/Users/s.qs/Desktop/DE list/Endothelial/UC_I_up_Endothelial.csv")
 96 | WNT5B_fibroblast <- DE_list(WNT5B_fibroblast, "WNT5B_fibroblast", "/Users/s.qs/Desktop/DE list/WNT5B_fibroblast/PSC_I_up_WNT5B_fibroblast.csv", "/Users/s.qs/Desktop/DE list/WNT5B_fibroblast/UC_I_up_WNT5B_fibroblast.csv")
 97 | Pericytes <- DE_list(Pericytes, "Pericytes", "/Users/s.qs/Desktop/DE list/Pericytes/PSC_I_up_Pericytes.csv", "/Users/s.qs/Desktop/DE list/Pericytes/UC_I_up_Pericytes.csv")
 98 | Inflammatory_fibroblast <- DE_list(Inflammatory_fibroblast, "Inflammatory_fibroblast", "/Users/s.qs/Desktop/DE list/Inflammatory_fibroblast/PSC_I_up_Inflammatory_fibroblast.csv", "/Users/s.qs/Desktop/DE list/Inflammatory_fibroblast/UC_I_up_Inflammatory_fibroblast.csv")
 99 | WNT2B_fibroblast <- DE_list(WNT2B_fibroblast, "WNT2B_fibroblast", "/Users/s.qs/Desktop/DE list/WNT2B_fibroblast/PSC_I_up_WNT2B_fibroblast.csv", "/Users/s.qs/Desktop/DE list/WNT2B_fibroblast/UC_I_up_WNT2B_fibroblast.csv")
100 | Activated_cycling_B <- DE_list(Activated_cycling_B, "Activated_cycling_B", "/Users/s.qs/Desktop/DE list/Activated_cycling_B/PSC_I_up_Activated_cycling_B.csv", "/Users/s.qs/Desktop/DE list/Activated_cycling_B/UC_I_up_Activated_cycling_B.csv")
101 | Inflammatory_monocyte <- DE_list(Inflammatory_monocyte, "Inflammatory_monocyte", "/Users/s.qs/Desktop/DE list/Inflammatory_monocyte/PSC_I_up_Inflammatory_monocyte.csv", "/Users/s.qs/Desktop/DE list/Inflammatory_monocyte/UC_I_up_Inflammatory_monocyte.csv")
102 | Absorptive_TA <- DE_list(Absorptive_TA, "Absorptive_TA", "/Users/s.qs/Desktop/DE list/Absorptive_TA/PSC_I_up_Absorptive_TA.csv", "/Users/s.qs/Desktop/DE list/Absorptive_TA/UC_I_up_Absorptive_TA.csv")
103 | Ribo_TA <- DE_list(Ribo_TA, "Ribo_TA", "/Users/s.qs/Desktop/DE list/Ribo_TA/PSC_I_up_Ribo_TA.csv", "/Users/s.qs/Desktop/DE list/Ribo_TA/UC_I_up_Ribo_TA.csv")
104 | Cycling_T <- DE_list(Cycling_T, "Cycling_T", "/Users/s.qs/Desktop/DE list/Cycling_T/PSC_I_up_Cycling_T.csv", "/Users/s.qs/Desktop/DE list/Cycling_T/UC_I_up_Cycling_T.csv")
105 | Goblet <- DE_list(Goblet, "Goblet", "/Users/s.qs/Desktop/DE list/Goblet/PSC_I_up_Goblet.csv", "/Users/s.qs/Desktop/DE list/Goblet/UC_I_up_Goblet.csv")
106 | Immature_goblet <- DE_list(Immature_goblet, "Immature_goblet", "/Users/s.qs/Desktop/DE list/Immature_goblet/PSC_I_up_Immature_goblet.csv", "/Users/s.qs/Desktop/DE list/Immature_goblet/UC_I_up_Immature_goblet.csv")
107 | MAST <- DE_list(MAST, "MAST", "/Users/s.qs/Desktop/DE list/MAST/PSC_I_up_MAST.csv", "/Users/s.qs/Desktop/DE list/MAST/UC_I_up_MAST.csv")
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 
129 | 


--------------------------------------------------------------------------------
/DDTx/ddtx_compartment_assignment_demuxlet.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "ddtx_compartment_assignment_demuxlet"
  3 | author: "adapted from Roy Oelen"
  4 | date: "2023-06-16"
  5 | output: html_document
  6 | ---
  7 | 
  8 | ```{r header, include=FALSE}
  9 | ############################################################################################################################
 10 | # Authors: Roy Oelen
 11 | # Name: ddtx_compartment_assignment.Rmd
 12 | # Function: assign compartments to the cells
 13 | ############################################################################################################################
 14 | ```
 15 | 
 16 | ```{r libraries, include=FALSE}
 17 | knitr::opts_chunk$set(echo = FALSE)
 18 | ####################
 19 | # libraries        #
 20 | ####################
 21 | 
 22 | # for the object containing meta and count data
 23 | library(Seurat)
 24 | # for plotting
 25 | library(ggplot2)
 26 | library(cowplot)
 27 | 
 28 | ```
 29 | 
 30 | ```{r functions, include=FALSE}
 31 | knitr::opts_chunk$set(echo = FALSE)
 32 | ####################
 33 | # Functions        #
 34 | ####################
 35 | 
 36 | get_average_expression_per_group <- function(seurat_object, metadata_column, genes, use_sct=F){
 37 |   # initialize the table
 38 |   expression_table <- NULL
 39 |   # check each group
 40 |   for(group in unique(seurat_object@meta.data[[metadata_column]])){
 41 |     # we won't do NA, obviously
 42 |     if(!is.na(group)){
 43 |       # subset to that group
 44 |       seurat_object_group <- seurat_object[, !is.na(seurat_object@meta.data[[metadata_column]]) & seurat_object@meta.data[[metadata_column]] == group]
 45 |       # put per gene in a list
 46 |       exp_per_list <- list()
 47 |       # check each gene
 48 |       for(gene in genes){
 49 |         # get the mean expression
 50 |         mean_expression <- NULL
 51 |         # depending on the assay
 52 |         if(use_sct){
 53 |           mean_expression <- mean(as.vector(unlist(seurat_object_group@assays$SCT@counts[gene, ])))
 54 |         }
 55 |         else{
 56 |           mean_expression <- mean(as.vector(unlist(seurat_object_group@assays$RNA@data[gene, ])))
 57 |         }
 58 |         # put in the list
 59 |         exp_per_list[[gene]] <- mean_expression
 60 |       }
 61 |       # turn into a row
 62 |       exp_row <- data.frame(exp_per_list)
 63 |       # add the group
 64 |       exp_row[['group']] <- group
 65 |       # set the correct order
 66 |       exp_row <- exp_row[, c('group', genes)]
 67 |       # add to the big matrix
 68 |       if(is.null(expression_table)){
 69 |         expression_table <- exp_row
 70 |       }
 71 |       else{
 72 |         expression_table <- rbind(expression_table, exp_row)
 73 |       }
 74 |     }
 75 |   }
 76 |   return(expression_table)
 77 | }
 78 | 
 79 | 
 80 | ```
 81 | 
 82 | ```{r setup, include=FALSE}
 83 | knitr::opts_chunk$set(echo = FALSE)
 84 | ####################
 85 | # Main Code        #
 86 | ####################
 87 | 
 88 | # locations of the objects
 89 | seurat_objects_loc <- '/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/seurat_preprocess_samples/'
 90 | seurat_object_clustered_loc <- paste(seurat_objects_loc, 'ddtx_merged_demultiplexed_clustered_demuxlet.rds', sep = '')
 91 | seurat_object_compartment_loc <- paste(seurat_objects_loc, 'ddtx_merged_demultiplexed_clustered_compartment_demuxlet.rds', sep = '')
 92 | # location of the tables
 93 | tables_loc <- '/groups/umcg-weersma/tmp01/projects/ddtx/ongoing/cell-type-classification/marker_genes/tables/'
 94 | 
 95 | ```
 96 | 
 97 | ```{r read_object, include=FALSE}
 98 | knitr::opts_chunk$set(echo = FALSE)
 99 | 
100 | # read the object
101 | ddtx <- readRDS(seurat_object_clustered_loc)
102 | 
103 | ```
104 | 
105 | ```{r avg_cluster_expression, include=FALSE}
106 | knitr::opts_chunk$set(echo = FALSE)
107 | 
108 | # get the mean expression of marker genes per cluster
109 | avg_exp_cluster <- get_average_expression_per_group(ddtx, 'seurat_clusters', c('EPCAM', 'PTPRC', 'THY1', 'SOX10', 'MADCAM1', 'CD27'))
110 | # write this somewhere
111 | write.table(avg_exp_cluster, paste(tables_loc, 'cluster_mean_exp_markers.tsv', sep = ''), row.names = F, col.names = T)
112 | 
113 | ```
114 | 
115 | ```{r create_features_plots, include=FALSE}
116 | knitr::opts_chunk$set(echo = FALSE)
117 | 
118 | # switch to the RNA slot, that is the one we also used in previous studies for this
119 | DefaultAssay(ddtx) <- 'RNA'
120 | # create the features plots
121 | epcam_feature <- FeaturePlot(ddtx, reduction = 'umap', features = c('EPCAM'))
122 | cd45_feature <- FeaturePlot(ddtx, reduction = 'umap', features = c('PTPRC'))
123 | cd27_feature <- FeaturePlot(ddtx, reduction = 'umap', features = c('CD27'))
124 | thy1_feature <- FeaturePlot(ddtx, reduction = 'umap', features = c('THY1'))
125 | sox10_feature <- FeaturePlot(ddtx, reduction = 'umap', features = c('SOX10'))
126 | madcam1_feature <- FeaturePlot(ddtx, reduction = 'umap', features = c('MADCAM1'))
127 | 
128 | ```
129 | 
130 | ## marker gene features
131 | ```{r show_features_plots, include=TRUE, fig.width=10, fig.height=10}
132 | knitr::opts_chunk$set(echo = FALSE)
133 | 
134 | # show the features
135 | plot_grid(epcam_feature, cd45_feature, cd27_feature, thy1_feature, sox10_feature, madcam1_feature)
136 | 
137 | ```
138 | 
139 | ```{r positivity_assignment, include=FALSE}
140 | knitr::opts_chunk$set(echo = FALSE)
141 | 
142 | # set the positivity for the markers
143 | ddtx@meta.data$epcam_positive <- NA
144 | ddtx@meta.data[ddtx@meta.data[['seurat_clusters']] %in% avg_exp_cluster[avg_exp_cluster[['EPCAM']] >= 1, 'group'], 'epcam_positive'] <- T
145 | ddtx@meta.data[ddtx@meta.data[['seurat_clusters']] %in% avg_exp_cluster[avg_exp_cluster[['EPCAM']] < 1, 'group'], 'epcam_positive'] <- F
146 | ddtx@meta.data$cd45_positive <- NA
147 | ddtx@meta.data[ddtx@meta.data[['seurat_clusters']] %in% avg_exp_cluster[avg_exp_cluster[['PTPRC']] >= 1, 'group'], 'cd45_positive'] <- T
148 | ddtx@meta.data[ddtx@meta.data[['seurat_clusters']] %in% avg_exp_cluster[avg_exp_cluster[['PTPRC']] < 1, 'group'], 'cd45_positive'] <- F
149 | ddtx@meta.data$thy1_positive <- NA
150 | ddtx@meta.data[ddtx@meta.data[['seurat_clusters']] %in% avg_exp_cluster[avg_exp_cluster[['THY1']] >= 0.5, 'group'], 'thy1_positive'] <- T
151 | ddtx@meta.data[ddtx@meta.data[['seurat_clusters']] %in% avg_exp_cluster[avg_exp_cluster[['THY1']] < 0.5, 'group'], 'thy1_positive'] <- F
152 | ddtx@meta.data$sox10_positive <- NA
153 | ddtx@meta.data[ddtx@meta.data[['seurat_clusters']] %in% avg_exp_cluster[avg_exp_cluster[['SOX10']] >= 0.5, 'group'], 'sox10_positive'] <- T
154 | ddtx@meta.data[ddtx@meta.data[['seurat_clusters']] %in% avg_exp_cluster[avg_exp_cluster[['SOX10']] < 0.5, 'group'], 'sox10_positive'] <- F
155 | ddtx@meta.data$madcam1_positive <- NA
156 | ddtx@meta.data[ddtx@meta.data[['seurat_clusters']] %in% avg_exp_cluster[avg_exp_cluster[['MADCAM1']] >= 0.2, 'group'], 'madcam1_positive'] <- T
157 | ddtx@meta.data[ddtx@meta.data[['seurat_clusters']] %in% avg_exp_cluster[avg_exp_cluster[['MADCAM1']] < 0.2, 'group'], 'madcam1_positive'] <- F
158 | ddtx@meta.data$cd27_positive <- NA
159 | ddtx@meta.data[ddtx@meta.data[['seurat_clusters']] %in% avg_exp_cluster[avg_exp_cluster[['CD27']] >= 0.3, 'group'], 'cd27_positive'] <- T
160 | ddtx@meta.data[ddtx@meta.data[['seurat_clusters']] %in% avg_exp_cluster[avg_exp_cluster[['CD27']] < 0.3, 'group'], 'cd27_positive'] <- F
161 | 
162 | # and create the plots
163 | epcam_positive <- DimPlot(ddtx, reduction = 'umap', group.by = 'epcam_positive') + scale_color_manual(values = c('gray', 'red'))
164 | cd45_positive <- DimPlot(ddtx, reduction = 'umap', group.by = 'cd45_positive') + scale_color_manual(values = c('gray', 'red'))
165 | cd27_positive <- DimPlot(ddtx, reduction = 'umap', group.by = 'cd27_positive') + scale_color_manual(values = c('gray', 'red'))
166 | thy1_positive <- DimPlot(ddtx, reduction = 'umap', group.by = 'thy1_positive') + scale_color_manual(values = c('gray', 'red'))
167 | sox10_positive <- DimPlot(ddtx, reduction = 'umap', group.by = 'sox10_positive') + scale_color_manual(values = c('gray', 'red'))
168 | madcam1_positive <- DimPlot(ddtx, reduction = 'umap', group.by = 'madcam1_positive') + scale_color_manual(values = c('gray', 'red'))
169 | 
170 | ```
171 | 
172 | ### marker genes and the positive clusters
173 | ```{r show_positivity, include=TRUE, fig.width=10, fig.height=30}
174 | knitr::opts_chunk$set(echo = FALSE)
175 | 
176 | # view everything
177 | plot_grid(epcam_feature, epcam_positive,
178 |           cd45_feature, cd45_positive,
179 |           cd27_feature, cd27_positive,
180 |           thy1_feature, thy1_positive,
181 |           sox10_feature, sox10_positive,
182 |           madcam1_feature, madcam1_positive,
183 |           ncol = 2, nrow = 6)
184 | 
185 | ```
186 | 
187 | ```{r compartment_assignment, include=FALSE}
188 | knitr::opts_chunk$set(echo = FALSE)
189 | 
190 | # assign the compartment
191 | ddtx@meta.data$compartment <- 'none'
192 | ddtx@meta.data[ddtx@meta.data$epcam_positive == T, 'compartment'] <- 'epithelial'
193 | ddtx@meta.data[ddtx@meta.data$cd45_positive == T |
194 |                    ddtx@meta.data$cd27_positive == T, 'compartment'] <- 'immune'
195 | ddtx@meta.data[ddtx@meta.data$thy1_positive == T |
196 |                    ddtx@meta.data$sox10_positive == T |
197 |                    ddtx@meta.data$madcam1_positive == T, 'compartment'] <- 'stromal'
198 | 
199 | ```
200 | 
201 | ```{r create_compartment_plot, include=FALSE}
202 | knitr::opts_chunk$set(echo = FALSE)
203 | 
204 | # create the plot of the compartment assignment
205 | p_compartment <- DimPlot(ddtx, reduction = 'umap', group.by = 'compartment', label = FALSE, label.size = 3, repel = TRUE, raster = FALSE) + ggtitle('compartment')
206 | 
207 | ```
208 | 
209 | ### final compartment assignment
210 | 
211 | ```{r show_plots, include = TRUE, fig.width=10, fig.height=10}
212 | knitr::opts_chunk$set(echo = FALSE)
213 | 
214 | p_compartment
215 | ```
216 | 
217 | ```{r save, include=FALSE}
218 | # save the result
219 | saveRDS(ddtx, seurat_object_compartment_loc)
220 | ```
221 | 


--------------------------------------------------------------------------------
/DDTx/cell_number_tables.py:
--------------------------------------------------------------------------------
  1 | ### original script by R. Oelen
  2 | 
  3 | import tensorflow as tf
  4 | import pandas as pd
  5 | import csv
  6 | import pickle as pkl
  7 | import matplotlib.pyplot as plt
  8 | from sccoda.util import cell_composition_data as dat
  9 | from sccoda.util import data_visualization as viz
 10 | from sccoda.util import comp_ana as mod
 11 | import os
 12 | 
 13 | # create class we will use
 14 | class ProportionAnalyzer:
 15 |   """
 16 |     Object to load the count data, set parameters and fetch results
 17 |     """
 18 | def __init__(self, count_data_loc, treatment_column='donor_recipient', exclusion_columns='ACR_grade', covariates_columns=[], fdr=0.05, sep='\t', reference=None):
 19 |   """constructor
 20 |         
 21 |         Parameters
 22 |         ----------
 23 |         count_data_loc : str
 24 |             The location of the count data to load
 25 |         treatment_column : str, optional
 26 |             The column that contains the data upon which to compare conditions or treatments
 27 |         exclusion_columns : list, optional
 28 |             The columns that are removed from the count data (for example covariates that are not used, or cell types to exclude)
 29 |         covariates_columns : list, optional
 30 |             The columns that contain the covariates to use
 31 |         fdr : float, optional
 32 |             The FDR cutoff to use
 33 |         sep : str, optional
 34 |             The separator for reading the count data
 35 |         
 36 |         """
 37 | # set up class variables from constructor
 38 | self.__count_data_loc = count_data_loc
 39 | self.__treatment_column = treatment_column
 40 | self.__exclusion_columns = exclusion_columns
 41 | self.__covariates_columns = covariates_columns
 42 | self.__fdr = fdr
 43 | self.__sep = sep
 44 | # other variables that will be populated later
 45 | self.__raw_counts = None
 46 | self.__filtered_counts = None
 47 | self.__sccoda = None
 48 | self.__reference = reference
 49 | # set up the count data
 50 | self.__setup_count_data()
 51 | # set up the reference
 52 | if self.__reference is None:
 53 |   self.__setup_reference()
 54 | # of course we will at some point have a result
 55 | self.__has_run = False
 56 | self.__formula = None
 57 | self.__model = None
 58 | self.__result = None
 59 | # set up a seed so that the results are always the same
 60 | tf.random.uniform([1], seed=7777)
 61 | tf.random.set_seed(7777)
 62 | 
 63 | def __setup_reference(self):
 64 |   """set up the reference for the model
 65 |         
 66 |         """
 67 | # get the unique entries possible
 68 | levels = self.__filtered_counts[self.__treatment_column].unique()
 69 | # the first level is the reference
 70 | self.__reference = levels[0]
 71 | 
 72 | 
 73 | def __make_columns_safe(self):
 74 |   """rename columns so that they don't contain quotes
 75 |         
 76 |         """
 77 | # get the current column names
 78 | current_colnames = self.__raw_counts.columns
 79 | # create a new list of columns
 80 | new_colnames = []
 81 | # replace each occurence
 82 | for old_column in current_colnames:
 83 |   # replace
 84 |   new_column = old_column.replace('"', '')
 85 | # add to list
 86 | new_colnames.append(new_column)
 87 | 
 88 | # zip to create a mapping from the old to the new column names
 89 | columnname_mapping = dict(zip(current_colnames, new_colnames))
 90 | # and do the actual renaming
 91 | self.__raw_counts.rename(columnname_mapping, axis = 'columns', inplace = True)
 92 | 
 93 | 
 94 | def __setup_count_data(self):
 95 |   """setup the count data to be used for the analysis
 96 |         
 97 |         """
 98 | # read the count data
 99 | self.__raw_counts = pd.read_csv(self.__count_data_loc, sep = self.__sep, quoting = csv.QUOTE_NONE)
100 | # make the columns safe
101 | self.__make_columns_safe()
102 | # make a copy from which we will remove some columns
103 | self.__filtered_counts = self.__raw_counts.copy(deep = True)
104 | # remove the columns that we don't need
105 | self.__filtered_counts.drop(self.__exclusion_columns, axis = 'columns', inplace = True)
106 | # set up the sccoda object
107 | self.__sccoda = dat.from_pandas(self.__filtered_counts, covariate_columns = self.__covariates_columns + [self.__treatment_column])
108 | 
109 | 
110 | def create_boxplot(self):
111 |   """plot the counts
112 |         
113 |         """
114 | # create the plot
115 | viz.boxplots(self.__sccoda, feature_name = self.__treatment_column)
116 | plt.tight_layout()
117 | 
118 | 
119 | def run_model(self):
120 |   """run the model
121 |         
122 |         """
123 | # set up the covariates
124 | covariates_formula = ''
125 | for covariate in self.__covariates_columns:
126 |   covariates_formula = ''.join([covariates_formula, covariate, ' + '])
127 | # set up treatment
128 | treatment_formula = "C(" + self.__treatment_column + ", Treatment('" + self.__reference + "'))"
129 | # create formula
130 | self.__formula = covariates_formula + treatment_formula
131 | # create the model
132 | self.__model = mod.CompositionalAnalysis(self.__sccoda, formula = self.__formula)
133 | # run the analysis
134 | self.__result = self.__model.sample_hmc()
135 | # we ran the analysis, so let's say so
136 | self.__has_run = True
137 | 
138 | 
139 | def get_result(self):
140 |   """return the result of the model
141 |         
142 |         Returns
143 |         -------
144 |         result
145 |            The result of the compositional analysis
146 |         """
147 | # check if we have run the model
148 | if self.__has_run:
149 |   return self.__result
150 | else:
151 |   print('model has not been run, returning None')
152 | return None
153 | 
154 | 
155 | def get_summary(self, fdr=None):
156 |   """get a summary of the results for a specific FDR
157 |         
158 |         Parameters
159 |         ----------
160 |         fdr : float, optional
161 |             The FDR cutoff to use
162 |         
163 |         Returns
164 |         -------
165 |         result
166 |            summary of the compositional analysis
167 |         """
168 | # see if an fdr was supplied
169 | fdr_to_use=None
170 | if fdr is None:
171 |   fdr_to_use = self.__fdr
172 | else:
173 |   fdr_to_use = fdr
174 | 
175 | # check if we have run the model
176 | if self.__has_run:
177 |   # if so, return a summary
178 |   summary = self.__result.summary_prepare([fdr_to_use])
179 | return(summary)
180 | else:
181 |   print('model has not been run, returning None')
182 | return None
183 | 
184 | 
185 | def get_raw_counts(self):
186 |   """get the raw counts
187 |         
188 |         Returns
189 |         -------
190 |         pandas.DataFrame
191 |            The raw counts before filtering columns
192 |         
193 |         """
194 | return self.__raw_counts
195 | 
196 | 
197 | def get_counts(self):
198 |   """get the cell counts
199 |         
200 |         Returns
201 |         -------
202 |         pandas.Dataframe
203 |             The filtered cell counts
204 |         
205 |         """
206 | return self.__filtered_counts
207 | 
208 | 
209 | def get_sccoda(self):
210 |   """get the sccoda input object
211 |         
212 |         Returns
213 |         -------
214 |         sccoda
215 |             The filtered cell counts
216 |         
217 |         """
218 | return self.__sccoda
219 | 
220 | 
221 | def get_fdr(self):
222 |   """get the fdr set
223 |         
224 |         Returns
225 |         -------
226 |         float
227 |             The fdr used
228 |         
229 |         """
230 | return self.__fdr
231 | 
232 | 
233 | 
234 | 
235 | # the locations of the cell proportion files
236 | proportions_table_loc = '/xx/ddtx/ongoing/differential_proportion/cell_number_tables/donor_recipient/'
237 | # locations of the specific cell type tables
238 | proportions_table_compartment_loc = ''.join([proportions_table_loc, 'cell_numbers_compartment_donor_recipient.tsv'])
239 | proportions_table_adult_elmentaite_martin_immune_loc = ''.join([proportions_table_loc, 'cell_numbers_adult_elmentaite_martin_immune.tsv'])
240 | # the column of the sample
241 | sample_column = 'timepoint'
242 | # the column of the inflammation status
243 | inflammation_column = 'donor_recipient'
244 | # the FDR to use
245 | fdr = 0.05
246 | 
247 | 
248 | 
249 | # perform the analysis for the compartment
250 | compartment_analysis = ProportionAnalyzer(count_data_loc = proportions_table_compartment_loc, exclusion_columns = ['sample_status_timepoint', 'ACR_grade'], covariates_columns = [], reference = '"d"')
251 | # show the plot
252 | compartment_analysis.create_boxplot()
253 | # do the analysis
254 | compartment_analysis.run_model()
255 | # get the result
256 | compartment_summary_005 = compartment_analysis.get_summary()
257 | 
258 | compartment_summary_005[0]
259 | 
260 | compartment_summary_005[1]
261 | 
262 | # perform the analysis for the compartment
263 | celltype_analysis = ProportionAnalyzer(count_data_loc = proportions_table_adult_elmentaite_martin_immune_loc, exclusion_columns = ['sample_status_timepoint', 'ACR_grade'], covariates_columns = [], reference = '"d"')
264 | # show the plot
265 | celltype_analysis.create_boxplot()
266 | # do the analysis
267 | celltype_analysis.run_model()
268 | # get the result
269 | celltype_summary_005 = celltype_analysis.get_summary()
270 | 
271 | celltype_summary_005[0]
272 | 
273 | 
274 | celltype_summary_005[1]
275 | 
276 | # save the results
277 | 
278 | result_output_loc = '/xx/ddtx/ongoing/differential_proportion/sccoda/'
279 | # specific files
280 | result_compartment_summary_005_general_summary_loc = ''.join([result_output_loc, 'ddtx_sccoda_compartment_005', '_general_summary.tsv'])
281 | result_compartment_summary_005_covariate_summary_loc = ''.join([result_output_loc, 'ddtx_sccoda_compartment_005', '_covariate_summary.tsv'])
282 | result_celltype_summary_005_general_summary_loc = ''.join([result_output_loc, 'ddtx_sccoda_adult_elmentaite_martin_immune_005', '_general_summary.tsv'])
283 | result_celltype_summary_005_covariate_summary_loc = ''.join([result_output_loc, 'ddtx_sccoda_adult_elmentaite_martin_immune_005', '_covariate_summary.tsv'])
284 | # write the results
285 | compartment_summary_005[0].to_csv(result_compartment_summary_005_general_summary_loc, sep = '\t')
286 | compartment_summary_005[1].to_csv(result_compartment_summary_005_covariate_summary_loc, sep = '\t')
287 | celltype_summary_005[0].to_csv(result_celltype_summary_005_general_summary_loc, sep = '\t')
288 | celltype_summary_005[1].to_csv(result_celltype_summary_005_covariate_summary_loc, sep = '\t')
289 | 
290 | 
291 | plt.rcParams['font.size'] = '5'
292 | celltype_analysis.create_boxplot()
293 | plt.xticks(rotation=45, ha='right')
294 | plt.savefig('ddtx_celltypes_donor_recipient.png',dpi=1000)
295 | 
296 | 


--------------------------------------------------------------------------------
/Method_comparisons/rebuttal_20210916:
--------------------------------------------------------------------------------
  1 | #### script rebuttal methodspaper
  2 | 
  3 | fresh_frozen@meta.data$Predicted_all_categories<-fresh_frozen@meta.data$Predicted_all
  4 | 
  5 | fresh_frozen@meta.data$Predicted_all_categories[fresh_frozen@meta.data$Predicted_all == "Bcell"]<-"immune"
  6 | fresh_frozen@meta.data$Predicted_all_categories[fresh_frozen@meta.data$Predicted_all == "Tcell"]<-"immune"
  7 | fresh_frozen@meta.data$Predicted_all_categories[fresh_frozen@meta.data$Predicted_all == "myeloid"]<-"immune"
  8 | fresh_frozen@meta.data$Predicted_all_categories[fresh_frozen@meta.data$Predicted_all == "Lymphoid"]<-"immune"
  9 | 
 10 | fresh_frozen@meta.data$Predicted_all_categories[fresh_frozen@meta.data$Predicted_all == "endothelial"]<-"stromal"
 11 | fresh_frozen@meta.data$Predicted_all_categories[fresh_frozen@meta.data$Predicted_all == "Glia"]<-"stromal"
 12 | fresh_frozen@meta.data$Predicted_all_categories[fresh_frozen@meta.data$Predicted_all == "fibroblast"]<-"stromal"
 13 | 
 14 | 
 15 | VlnPlot(fresh_frozen, group.by="Predicted_all_categories", features="percent.mt", split.by="preservation")
 16 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/fresh_frozen_mito_cell_categories.png", width = 20, height = 20, dpi = 600)
 17 | 
 18 | VlnPlot(data, group.by="Predicted_all_categories", features="percent.mt", split.by="method")
 19 | 
 20 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/fresh_frozen_mito_cell_categories_noNA.png", width = 20, height = 20, dpi = 600)
 21 | 
 22 | plot1 <- FeatureScatter(fresh_frozen, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="Predicted_all_categories")
 23 | 
 24 | immune<-subset(fresh_frozen, subset=Predicted_all_categories=="immune")
 25 | epithelium<-subset(fresh_frozen, subset=Predicted_all_categories=="epithelium")
 26 | stromal<-subset(fresh_frozen, subset=Predicted_all_categories=="stromal")
 27 | 
 28 | fresh<-subset(fresh_frozen, subset=preservation=="fresh")
 29 | frozen<-subset(fresh_frozen, subset=preservation=="cryo")
 30 | 
 31 | plot1 <- FeatureScatter(fresh, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="Predicted_all_categories", pt.size=1)+xlim(-1,(3*10^5))+ylim(-1,12000)
 32 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/fresh_nCountRNA_nGene_celltypes.pdf", width = 10, height = 10, dpi = 600)
 33 | 
 34 | 
 35 | plot1 <- FeatureScatter(frozen, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="Predicted_all_categories", pt.size=1)+xlim(-1,(3*10^5))+ylim(-1,12000)
 36 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/frozen_nCountRNA_nGene_celltypes.pdf", width = 10, height = 10, dpi = 600)
 37 | 
 38 | 
 39 | plot1 <- FeatureScatter(fresh, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="Predicted_all_categories", pt.size=1)+xlim(-1,(3*10^5))+ylim(-1,60)
 40 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/fresh_nCountRNA_percentMt_celltypes.pdf", width = 10, height = 10, dpi = 600)
 41 | 
 42 | 
 43 | plot1 <- FeatureScatter(frozen, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="Predicted_all_categories", pt.size=1)+xlim(-1,(3*10^5))+ylim(-1,60)
 44 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/frozen_nCountRNA_percentMt_celltypes.pdf", width = 10, height = 10, dpi = 600)
 45 | 
 46 | VlnPlot(frozen, feature="percent.mt", group.by="Predicted_all")
 47 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/frozen_vpln_PercentMt_allcelltypes.pdf", width = 10, height = 10, dpi = 600)
 48 | 
 49 | 
 50 | plot1 <- FeatureScatter(immune, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="preservation", pt.size=1)
 51 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/fresh_frozen_nCountRNA_PercentMt_immune.png", width = 20, height = 20, dpi = 600)
 52 | 
 53 | plot1 <- FeatureScatter(stromal, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="preservation", pt.size=1)
 54 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/fresh_frozen_nCountRNA_PercentMt_stromal.png", width = 20, height = 20, dpi = 600)
 55 | 
 56 | plot1 <- FeatureScatter(epithelium, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="preservation", pt.size=1)
 57 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/fresh_frozen_nCountRNA_PercentMt_epithelium.png", width = 20, height = 20, dpi = 600)
 58 | 
 59 | plot2 <- FeatureScatter(immune, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="preservation", pt.size=1)
 60 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/fresh_frozen_nCountRNA_nFeature_immune.png", width = 20, height = 20, dpi = 600)
 61 | 
 62 | plot2 <- FeatureScatter(stromal, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="preservation", pt.size=1)
 63 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/fresh_frozen_nCountRNA_nFeature_stromal.png", width = 20, height = 20, dpi = 600)
 64 | 
 65 | plot2 <- FeatureScatter(epithelium, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="preservation", pt.size=1)
 66 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/fresh_frozen_nCountRNA_nFeature_epithelium.png", width = 20, height = 20, dpi = 600)
 67 | 
 68 | sanger<-subset..
 69 | 
 70 | 
 71 | 
 72 | #####
 73 | wholecollagenase<-subset(methodspaper, subset=method=="wholecollagenase")
 74 | splitcollagenase<-subset(methodspaper, subset=method=="splitcollagenase")
 75 | splitprotease<-subset(methodspaper, subset=method=="splitprotease")
 76 | splitprotease_epi<-subset(methodspaper, subset=method=="splitprotease_epi")
 77 | 
 78 | plot1 <- FeatureScatter(wholecollagenase, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="Predicted_all_categories", pt.size=1)+xlim(-1,(3*10^5))+ylim(-1,12000)
 79 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/wholecollagenase_nCountRNA_nGene_celltypes.pdf", width = 10, height = 10, dpi = 600)
 80 | 
 81 | plot1 <- FeatureScatter(wholecollagenase, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="Predicted_all_categories", pt.size=1)+xlim(-1,(3*10^5))+ylim(-1,60)
 82 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/wholecollagenase_nCountRNA_percentMt_celltypes.pdf", width = 10, height = 10, dpi = 600)
 83 | 
 84 | plot1 <- FeatureScatter(splitcollagenase, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="Predicted_all_categories", pt.size=1)+xlim(-1,(3*10^5))+ylim(-1,12000)
 85 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/splitcollagenase_nCountRNA_nGene_celltypes.pdf", width = 10, height = 10, dpi = 600)
 86 | 
 87 | plot1 <- FeatureScatter(splitcollagenase, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="Predicted_all_categories", pt.size=1)+xlim(-1,(3*10^5))+ylim(-1,60)
 88 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/splitcollagenase_nCountRNA_percentMt_celltypes.pdf", width = 10, height = 10, dpi = 600)
 89 | 
 90 | 
 91 | plot1 <- FeatureScatter(splitprotease, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="Predicted_all_categories", pt.size=1)+xlim(-1,(3*10^5))+ylim(-1,12000)
 92 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/splitprotease_nCountRNA_nGene_celltypes.pdf", width = 10, height = 10, dpi = 600)
 93 | 
 94 | plot1 <- FeatureScatter(splitprotease, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="Predicted_all_categories", pt.size=1)+xlim(-1,(3*10^5))+ylim(-1,60)
 95 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/splitprotease_nCountRNA_percentMt_celltypes.pdf", width = 10, height = 10, dpi = 600)
 96 | 
 97 | plot1 <- FeatureScatter(splitprotease_epi, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="Predicted_all_categories", pt.size=1)+xlim(-1,(3*10^5))+ylim(-1,12000)
 98 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/splitprotease_epi_nCountRNA_nGene_celltypes.pdf", width = 10, height = 10, dpi = 600)
 99 | 
100 | plot1 <- FeatureScatter(splitprotease_epi, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="Predicted_all_categories", pt.size=1)+xlim(-1,(3*10^5))+ylim(-1,60)
101 | ggsave("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/splitprotease_epi_nCountRNA_percentMt_celltypes.pdf", width = 10, height = 10, dpi = 600)
102 | 
103 | 
104 | ####
105 | #calculate genes expressed i 10% cells
106 | lp<-subset(smillie, subset=Location=="LP")
107 | dim(lp)
108 | epi<-subset(smillie, subset=Location=="Epi")
109 | dim(epi)
110 | x<-CreateSeuratObject(counts =lp@assays$RNA@counts, min.cells=262)
111 | dim(x)
112 | y<-CreateSeuratObject(counts =epi@assays$RNA@counts, min.cells=267)
113 | dim(y)
114 | genes_fresh<-rownames(x@assays$RNA@counts)
115 | head(genes_fresh)
116 | genes_fresh<-data.frame(rownames(x@assays$RNA@counts))
117 | genes_frozen<-data.frame(rownames(y@assays$RNA@counts))
118 | head(genes_fresh)
119 | colnames(genes_fresh)[1]<-"Gene"
120 | colnames(genes_frozen)[1]<-"Gene"
121 | z<-merge(genes_fresh,genes_frozen, by="Gene", all=T)
122 | dim(z)
123 | 
124 | 
125 | ###### selecting PCs
126 | 
127 | methodspaper<-readRDS("/groups/umcg-weersma/tmp01/singelcell/Methodspaper/Processed_data/SCT_100smillie_noNA.rds")
128 | smillie<-subset(methodspaper, subset=Predicted_all=="epithelium")
129 | smillie<-subset(smillie, subset=method=="splitcollagenase")
130 | 
131 | DefaultAssay(smillie)<-"RNA"
132 | Idents(smillie)<-"Location"
133 | smillie<-subset(smillie, downsample=2671)
134 | smillie <- NormalizeData(smillie)
135 | all.genes <- rownames(smillie)
136 | smillie <- ScaleData(smillie, features = all.genes)
137 | smillie <- RunPCA(smillie, features = all.genes)
138 | #DimPlot(smillie, reduction = "pca")
139 | #DimPlot(smillie, reduction = "pca", group.by="Location")
140 | #ggsave("Methodspaper/PCA1_2_smillie_epi.png", width = 8, height = 8, dpi = 600)
141 | DE_epi<-FindMarkers(smillie, test.use="MAST", ident.1="epi", ident.2="lp", 	)
142 | 
143 | ###
144 | #patient specific DE fresh_frozen (not done yet)
145 | 
146 | sanger<-subset(fresh_frozen,subset=orig.ident=="Sanger")
147 | takeda<-subset(fresh_frozen,subset=orig.ident=="Takeda")
148 | 
149 | Tcells<-subset(sanger, subset = Predicted_all == "Tcell")
150 | Bcells<-subset(sanger, subset = Predicted_all == "Bcell")
151 | Fibroblasts<-subset(sanger, subset = Predicted_all == "fibroblast")
152 | Myeloid<-subset(sanger, subset = Predicted_all == "myeloid")
153 | Epithelium<-subset(sanger, subset = Predicted_all == "epithelium")
154 | Endothelial<-subset(sanger, subset = Predicted_all == "endothelial")
155 | Lymphoid<-subset(sanger, subset = Predicted_all == "Lymphoid")
156 | Glia<-subset(sanger, subset = Predicted_all == "Glia")
157 | 
158 | Tcell_fresh_frozen_sanger<-FindMarkers(Tcells, test.use="MAST", ident.1="fresh", ident.2="cryo")
159 | write.csv(Tcell_fresh_frozen_sanger, "/groups/umcg-weersma/tmp01/singelcell/Methodspaper/Tcell_fresh_frozen_sanger.csv")
160 | 
161 | 
162 | 
163 | 
164 | 
165 | 


--------------------------------------------------------------------------------
/old_stuff/merge_ctl_10x_cd.md:
--------------------------------------------------------------------------------
  1 | # **TITLE: Compare cytotoxic cell dataset from 10X, and compare with 'own' CTLs**
  2 | 
  3 | Author: WTC
  4 | Date: 20180629
  5 | 
  6 | # Description:
  7 | using this script in R, one can merge two datasets from different sources (i.e. a subset of our CD dataset and a publicly available 10X genomics dataset) and explore gene expression differences between those
  8 | 
  9 | # Libraries
 10 | library(Seurat)
 11 | library(Matrix)
 12 | #library(Matrix.utils)
 13 | library(ggplot2)
 14 | #library(pryr)
 15 | 
 16 | 
 17 | # Functions
 18 | 
 19 | # download Cytotoxic T cell gene/cell matrix (raw) from https://support.10xgenomics.com/single-cell-gene-expression/datasets/1.1.0/cytotoxic_t
 20 | CTL_10x<-Read10X(data.dir = "~/Downloads/matrices_mex/hg19/")
 21 | CTL_10x<-CreateSeuratObject(raw.data = CTL_10x, min.cells = 3, project = "healthy")
 22 | 
 23 | 
 24 | # extract blood Cytotoxic T cells from CD Seurat file
 25 | #allcells_meta<-SetAllIdent(allcells_meta, "eight_cell_types")
 26 | #CTL_CD<-WhichCells(allcells_meta, "Cytotoxic_Blood")
 27 | #CTL_CD<-SubsetData(allcells_meta, CTL_CD)
 28 | 
 29 | # filter mito genes
 30 | mito.genes <- grep(pattern = "^MT-", x = rownames(x = CTL_10x@data), value = TRUE)
 31 | percent.mito <- Matrix::colSums(CTL_10x@raw.data[mito.genes, ])/Matrix::colSums(CTL_10x@raw.data)
 32 | CTL_10x <- AddMetaData(object = CTL_10x, metadata = percent.mito, col.name = "percent.mito")
 33 | VlnPlot(object = CTL_10x, features.plot = c("nGene", "nUMI", "percent.mito"), nCol = 3)
 34 | 
 35 | # for ngene: 200-2500, percent mito: <5%
 36 | CTL_10x <- FilterCells(object = CTL_10x, subset.names = c("nGene", "percent.mito"), low.thresholds = c(200, -Inf), high.thresholds = c(2500, 0.05))
 37 | 
 38 | # log normalize
 39 | CTL_10x <- NormalizeData(object = CTL_10x, normalization.method = "LogNormalize", scale.factor = 10000)
 40 | 
 41 | CTL_10x <- ScaleData(object = CTL_10x, vars.to.regress = c("nUMI", "percent.mito"))
 42 | 
 43 | ## same for CD
 44 | mito.genes <- grep(pattern = "^MT\\.", x = rownames(x = CTL_CD@data), value = TRUE)
 45 | percent.mito <- Matrix::colSums(CTL_CD@raw.data[mito.genes, ])/Matrix::colSums(CTL_CD@raw.data)
 46 | CTL_CD <- AddMetaData(object = CTL_CD, metadata = percent.mito, col.name = "percent.mito")
 47 | VlnPlot(object = CTL_CD, features.plot = c("nGene", "nUMI", "percent.mito"), nCol = 3)
 48 | 
 49 | # for ngene: 200-2500, percent mito: <5%
 50 | CTL_CD <- FilterCells(object = CTL_CD, subset.names = c("nGene", "percent.mito"), low.thresholds = c(200, -Inf), high.thresholds = c(2500, 0.05))
 51 | 
 52 | # log normalize
 53 | CTL_CD <- NormalizeData(object = CTL_CD, normalization.method = "LogNormalize", scale.factor = 10000)
 54 | 
 55 | CTL_CD <- ScaleData(object = CTL_CD, vars.to.regress = c("nUMI", "percent.mito"))
 56 | 
 57 | 
 58 | 
 59 | CTL_10x@meta.data$dataset <- "ctrl"
 60 | CTL_CD@meta.data$dataset <- "cd"
 61 | 
 62 | save(CTL_10x, file="~/Desktop/Single_cell/CTL_10x_CD/CTL_10x.Rda")
 63 | save(CTL_CD, file="~/Desktop/Single_cell/CTL_10x_CD/CTL_CD.Rda")
 64 | 
 65 | 
 66 | ## Combine the cdulated and ctrlulated cells into a single object
 67 | ctl.combined <- RunCCA(CTL_10x, CTL_CD, genes.use = intersect(rownames(CTL_10x@data), rownames(CTL_CD@data)), num.cc = 30, scale.data=T)
 68 | 
 69 | 
 70 | # Process the data after combining the two datasets
 71 | 
 72 | ctl.combined <- RunPCA(object = ctl.combined, pc.genes = ctl.combined@var.genes, do.print = TRUE, pcs.print = 1:5, genes.print = 5)
 73 | ctl.combined = RunTSNE(ctl.combined, dims.use = 1:15, do.fast = T)
 74 | 
 75 | ## The DimPlot can use either "pca", "tsne" or "cca" for it's reduction.use option and you can add any meta data to the group.by for the colouring
 76 | DimPlot(object = ctl.combined, reduction.use = "cca", group.by = "dataset", pt.size = 0.5, do.return = TRUE)
 77 | VlnPlot(object = ctl.combined, features.plot = "CC1", group.by = "dataset", do.return = TRUE)
 78 | 
 79 | ## CCA equivalent of the PCElbowPlot
 80 | MetageneBicorPlot(ctl.combined, grouping.var = "dataset", dims.eval = 1:30, display.progress = FALSE)
 81 | 
 82 | ## Use the CCA reduction to better overlap the two original datasetes
 83 | ctl.combined <- AlignSubspace(ctl.combined, reduction.type = "cca", grouping.var = "dataset", dims.align = 1:15)
 84 | 
 85 | ctl.combined <- RunTSNE(ctl.combined, reduction.use = "cca.aligned", dims.use = 1:15, do.fast = T)
 86 | ctl.combined <- FindClusters(ctl.combined, reduction.type = "cca.aligned", resolution = 0.6, dims.use = 1:15)
 87 | 
 88 | setwd("~/Desktop/Single_cell/CTL_10x_CD/")
 89 | save(ctl.combined, file="~/Desktop/Single_cell/CTL_10x_CD/ctl_combined_aligned.Rda")
 90 | 
 91 | # Making several tSNE plots, coloured by different meta data
 92 | 
 93 | TSNEPlot(ctl.combined, do.return = T, pt.size = 0.5, group.by = "dataset")
 94 | TSNEPlot(ctl.combined, do.label = T, do.return = T, pt.size = 0.5)
 95 | 
 96 | # Make feature plots for some marker genes
 97 | 
 98 | genes.to.use <- c("MS4A1", "CD3E", "LYZ", "GNLY", "CD8A", "PF4")
 99 | for (gene in genes.to.use){
100 | 	cols <- colorRampPalette(c("lightgrey", "blue"))(100)
101 | 	ens.id <- gene
102 | 	expression <- ctl.combined@data[ens.id,]
103 | 	bins <- .bincode(expression, seq(min(expression)-0.00001,max(expression)+0.00001, length.out=100))
104 | 	plot(ctl.combined@dr$tsne@cell.embeddings[,"tSNE_1"], ctl.combined@dr$tsne@cell.embeddings[,"tSNE_2"], col=alpha(cols[bins],0.3), xlab="tSNE_1", ylab="tSNE_2", main=paste0("Expression of ", gene), pch=20)
105 | }
106 | 
107 | 
108 | # Name: get.violin.data
109 | # Function: Get the data to make a multi-violin plot
110 | # Input:
111 | #   Name 	            Type          Description
112 | #   seurat 		       	Seurat 	      The seurat object to take the data from
113 | #   genes 		       	Character     The genes for which to make the violin plots
114 | #   HGNC.names 			Character     The HGNC IDs for the genes
115 | #
116 | # Output:
117 | #   The data to plot in the violin plots
118 | 
119 | get.violin.data <- function(seurat, genes, HGNC.names) {
120 |   output = data.frame(gene = character(0), value= numeric(0), ident = character(0))
121 |   for (i in 1:length(genes)) {
122 |     data.use = data.frame(FetchData(seurat,genes[i]))
123 |     data.use = t(data.use)
124 |     data.melt=data.frame(gene=rep(genes[i], length(seurat@ident)), gene.HGNC=rep(HGNC.names[i], length(seurat@ident)))
125 |     data.melt$value=as.numeric(data.use[1,1:length(seurat@ident)])
126 |     data.melt$id=names(data.use)[1:length(seurat@ident)]
127 |     data.melt$ident=seurat@ident
128 |     noise = rnorm(length(data.melt$value))/100000
129 |     data.melt$value=as.numeric(as.character(data.melt$value))+noise
130 |     output = rbind(output, data.melt)
131 |   }
132 |   return(output)
133 | }
134 | 
135 | # Name: gg.color.hue
136 | # Function: Create ggplot colors
137 | # Input:
138 | #   Name 	            Type          Description
139 | #   n  				  	numeric 	  The number of colours to make
140 | #
141 | # Output:
142 | #   The ggplot colors
143 | 
144 | gg.color.hue <- function(n) {
145 |   hues = seq(15, 375, length = n + 1)
146 |   hcl(h = hues, l = 65, c = 100)[1:n]
147 | }
148 | 
149 | 
150 | # Making the multi-violin plots, scipt from Dylan de Vries/ Harm Brugge
151 | 
152 | ctl.combined<-SetAllIdent(ctl.combined, "dataset")
153 | identities <- ctl.combined@ident
154 | colors.x <- gg.color.hue(length(unique(identities)))
155 | plot.data <- get.violin.data(ctl.combined, c("CD14", "LYZ", "S100A9", "LYN", "ITGAX", "IFITM3",  "CST3", "GZMB", "PRF1", "CD3D", "NKG7", "KLRC1", "CD79A", "SELL", "CCR7", "S100A4", "CD27", "CD8A", "CD8B", "MS4A1"), c("CD14", "LYZ", "S100A9", "LYN", "ITGAX", "IFITM3", "CST3", "GZMB", "PRF1", "CD3D", "NKG7", "KLRC1", "CD79A", "SELL", "CCR7", "S100A4", "CD27", "CD8A", "CD8B", "MS4A1"))
156 | 
157 | ggplot(plot.data, aes(factor(ident),value)) +
158 |   geom_violin(scale="width",adjust=1,trim=TRUE,aes(fill=factor(ident)),show.legend = F) + 
159 |   ylab("") + xlab("") +
160 |   coord_flip() + 
161 |   facet_wrap(~ gene.HGNC,scales = "free_x", ncol = length(levels(plot.data$gene.HGNC))) + 
162 |   theme(strip.text.x = element_text(size=18, angle=90),
163 |         axis.text.y = element_text(size=24),
164 |         strip.background = element_blank(),
165 |         panel.spacing.x = unit(c(-0.2), "lines"),
166 |         axis.title.x=element_blank(),
167 |         axis.text.x=element_blank(),
168 |         axis.ticks.x=element_blank(),
169 |         plot.margin = unit(c(0.5,0.4,0.5,1.3), "cm")) +
170 |   scale_x_discrete(limits = rev(levels(plot.data$ident)), position = "top") +
171 |   scale_fill_manual(values = colors.x) #+ theme_set(theme_gray(base_size = 28))
172 | 
173 | 
174 | # Compare the new Seurat object with the old Seurat object
175 | 
176 | 
177 | # visualize
178 | # add metadata own data
179 | info<-allcells_meta@meta.data
180 | info<-info[,c(7:18,35,38:41)]
181 | info$cellen<-row.names(info)
182 | row.names(info)<-NULL
183 | keeping.order <- function(data, fn, ...) { 
184 |   col <- ".sortColumn"
185 |   data[,col] <- 1:nrow(data) 
186 |   out <- fn(data, ...) 
187 |   if (!col %in% colnames(out)) stop("Ordering column not preserved by function") 
188 |   out <- out[order(out[,col]),] 
189 |   out[,col] <- NULL 
190 |   out 
191 | } 
192 | ## add location
193 | #extract file that is a copy from @meta.data
194 | CellsMeta = ctl.combined@meta.data
195 | head(CellsMeta)
196 | CellsMeta$cellen=row.names(CellsMeta)
197 | row.names(CellsMeta)=NULL
198 | dim(CellsMeta)
199 | info<-keeping.order(CellsMeta, merge, y=info, by = "cellen", all=FALSE)
200 | 
201 | CellsMeta = ctl.combined@meta.data
202 | head(CellsMeta)
203 | CellsMeta$cellen=row.names(CellsMeta)
204 | row.names(CellsMeta)=NULL
205 | dim(CellsMeta)
206 | CellsMeta<-keeping.order(CellsMeta, merge, y=info, by = "cellen", all=T)
207 | row.names(CellsMeta)<-CellsMeta$cellen
208 | dim(CellsMeta)
209 | CellsMeta$cellen<-NULL
210 | head(CellsMeta)
211 | ctl.combined <- AddMetaData(ctl.combined, CellsMeta)
212 | 
213 | ctl.combined<-SetAllIdent(ctl.combined, "dataset")
214 | cd_cells<-WhichCells(ctl.combined, "cd")
215 | healthy_cells<-WhichCells(ctl.combined, "ctrl")
216 | FeaturePlot(ctl.combined, "NKG7", cells.use = cd_cells, pt.size=3)
217 | FeaturePlot(ctl.combined, "NKG7", cells.use = healthy_cells, pt.size=3)
218 | 
219 | # calculate DE
220 | # DE with genes >0.01%, MAST
221 | ctl.combined_DE_markers = FindAllMarkers(ctl.combined, min.pct = 0.001, only.pos = T, test.use = "MAST")
222 | write.table(ctl.combined_DE_markers, file = "ctl.blood.10x.cd_DEmarkers_0.1percMAST.txt")
223 | 
224 | # and the significant ones (p<0.05)
225 | ctl.combined_DE_markers<-ctl.combined_DE_markers[ctl.combined_DE_markers$p_val_adj <0.05,]
226 | 
227 | # top 10 of both sets
228 | library(dplyr)
229 | ctl.combined_DE_markers %>% group_by(cluster) %>% top_n(10, avg_logFC)
230 | top10 <- ctl.combined_DE_markers %>% group_by(cluster) %>% top_n(10, avg_logFC)
231 | # histogram of top 10
232 | DoHeatmap(object = ctl.combined, genes.use = top10$gene, slim.col.label = TRUE, remove.key = FALSE)
233 | 


--------------------------------------------------------------------------------