├── .Rbuildignore ├── .gitignore ├── .here ├── DESCRIPTION ├── Dockerfile ├── GBOX_BASE_NAME.txt ├── LICENSE ├── Makefile ├── NAMESPACE ├── R ├── CombinePvalue.R ├── DrugCombination.R ├── DrugScore.R ├── GetDrug.R ├── GetDrugRef.R ├── PrepareReference.R ├── SCplasticity.R ├── TopCombination.R ├── TopDrug.R ├── data_preprocess.R ├── get_CEGs.R ├── get_drug_pval.R └── get_gene_pval.R ├── README.md ├── VERSION.txt ├── asgard_pipeline.png ├── data ├── FDA_drug.rda └── L1000_meta.rda ├── man ├── DrugCombination.Rd ├── DrugScore.Rd ├── GetDrug.Rd ├── GetDrugRef.Rd ├── PrepareReference.Rd ├── SCplasticity.Rd ├── TopCombination.Rd └── TopDrug.Rd └── prep_files.sh /.Rbuildignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lanagarmire/Asgard/fc2b60855e90fe231b85723cb5fb9711bb588c66/.Rbuildignore -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Docker related 2 | /build/ 3 | 4 | # History files 5 | .Rhistory 6 | .Rapp.history 7 | 8 | # Session Data files 9 | .RData 10 | .RDataTmp 11 | 12 | # User-specific files 13 | .Ruserdata 14 | 15 | # Example code in package build process 16 | *-Ex.R 17 | 18 | # Output files from R CMD build 19 | /*.tar.gz 20 | 21 | # Output files from R CMD check 22 | /*.Rcheck/ 23 | 24 | # RStudio files 25 | .Rproj.user/ 26 | 27 | # produced vignettes 28 | vignettes/*.html 29 | vignettes/*.pdf 30 | 31 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 32 | .httr-oauth 33 | 34 | # knitr and R markdown default cache directories 35 | *_cache/ 36 | /cache/ 37 | 38 | # Temporary files created by R markdown 39 | *.utf8.md 40 | *.knit.md 41 | 42 | # R Environment Variables 43 | .Renviron 44 | 45 | # pkgdown site 46 | docs/ 47 | 48 | # translation temp files 49 | po/*~ 50 | 51 | # RStudio Connect folder 52 | rsconnect/ -------------------------------------------------------------------------------- /.here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lanagarmire/Asgard/fc2b60855e90fe231b85723cb5fb9711bb588c66/.here -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: Asgard 2 | Type: Package 3 | Title: A Single-cell Guided pipeline for Accurate Repurposing of Drugs 4 | Version: 1.0.0 5 | Author: Bing He [aut], Lana Garmire [aut, cre] 6 | Maintainer: Bing He 7 | Description: Asgard repurposes drugs for every single cell population and predicts personalized combination of drugs to address cellular heterogeneity of patients. 8 | Depends: R (>= 3.5.0) 9 | Imports: 10 | cmapR 11 | Suggests: 12 | Seurat, 13 | SeuratObject, 14 | SingleR, 15 | celldex, 16 | cowplot, 17 | dplyr 18 | License: PolyForm-Noncommercial 19 | Encoding: UTF-8 20 | LazyData: true 21 | RoxygenNote: 7.2.3 22 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM rocker/rstudio:4.3.1 2 | 3 | RUN R -e 'install.packages("devtools")' 4 | 5 | RUN R -e 'install.packages("BiocManager")' 6 | RUN R -e 'install.packages("remotes")' 7 | 8 | RUN apt-get update 9 | RUN apt install -y zlib1g-dev 10 | RUN R -e 'BiocManager::install(c("SingleR","limma","cmapR","celldex"))' 11 | RUN R -e 'install.packages("Seurat")' 12 | 13 | WORKDIR /home/rstudio 14 | 15 | COPY . . 16 | 17 | RUN R -e 'install.packages(".", repos = NULL, type = "source")' 18 | 19 | # WORKDIR /home/rstudio/build 20 | # RUN mkdir -p /home/rstudio/build/DrugReference 21 | # RUN R -e 'library("Asgard"); PrepareReference(cell.info="GSE70138_Broad_LINCS_cell_info_2017-04-28.txt", gene.info="GSE70138_Broad_LINCS_gene_info_2017-03-06.txt", GSE70138.sig.info = "GSE70138_Broad_LINCS_sig_info_2017-03-06.txt", GSE92742.sig.info = "GSE92742_Broad_LINCS_sig_info.txt", GSE70138.gctx = "GSE70138_Broad_LINCS_Level5_COMPZ_n118050x12328_2017-03-06.gctx", GSE92742.gctx = "GSE92742_Broad_LINCS_Level5_COMPZ.MODZ_n473647x12328.gctx", Output.Dir = "DrugReference/")' 22 | 23 | # RUN mv DrugReference /home/rstudio/. 24 | 25 | # WORKDIR /home/rstudio 26 | 27 | # RUN rm -rf /home/rstudio/build 28 | -------------------------------------------------------------------------------- /GBOX_BASE_NAME.txt: -------------------------------------------------------------------------------- 1 | lanagarmire/asgard 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | # PolyForm Noncommercial License 1.0.0 2 | 3 | 4 | 5 | ## Acceptance 6 | 7 | In order to get any license under these terms, you must agree 8 | to them as both strict obligations and conditions to all 9 | your licenses. 10 | 11 | ## Copyright License 12 | 13 | The licensor grants you a copyright license for the 14 | software to do everything you might do with the software 15 | that would otherwise infringe the licensor's copyright 16 | in it for any permitted purpose. However, you may 17 | only distribute the software according to [Distribution 18 | License](#distribution-license) and make changes or new works 19 | based on the software according to [Changes and New Works 20 | License](#changes-and-new-works-license). 21 | 22 | ## Distribution License 23 | 24 | The licensor grants you an additional copyright license 25 | to distribute copies of the software. Your license 26 | to distribute covers distributing the software with 27 | changes and new works permitted by [Changes and New Works 28 | License](#changes-and-new-works-license). 29 | 30 | ## Notices 31 | 32 | You must ensure that anyone who gets a copy of any part of 33 | the software from you also gets a copy of these terms or the 34 | URL for them above, as well as copies of any plain-text lines 35 | beginning with `Required Notice:` that the licensor provided 36 | with the software. For example: 37 | 38 | > Required Notice: Copyright Yoyodyne, Inc. (http://example.com) 39 | 40 | ## Changes and New Works License 41 | 42 | The licensor grants you an additional copyright license to 43 | make changes and new works based on the software for any 44 | permitted purpose. 45 | 46 | ## Patent License 47 | 48 | The licensor grants you a patent license for the software that 49 | covers patent claims the licensor can license, or becomes able 50 | to license, that you would infringe by using the software. 51 | 52 | ## Noncommercial Purposes 53 | 54 | Any noncommercial purpose is a permitted purpose. 55 | 56 | ## Personal Uses 57 | 58 | Personal use for research, experiment, and testing for 59 | the benefit of public knowledge, personal study, private 60 | entertainment, hobby projects, amateur pursuits, or religious 61 | observance, without any anticipated commercial application, 62 | is use for a permitted purpose. 63 | 64 | ## Noncommercial Organizations 65 | 66 | Use by any charitable organization, educational institution, 67 | public research organization, public safety or health 68 | organization, environmental protection organization, 69 | or government institution is use for a permitted purpose 70 | regardless of the source of funding or obligations resulting 71 | from the funding. 72 | 73 | ## Fair Use 74 | 75 | You may have "fair use" rights for the software under the 76 | law. These terms do not limit them. 77 | 78 | ## No Other Rights 79 | 80 | These terms do not allow you to sublicense or transfer any of 81 | your licenses to anyone else, or prevent the licensor from 82 | granting licenses to anyone else. These terms do not imply 83 | any other licenses. 84 | 85 | ## Patent Defense 86 | 87 | If you make any written claim that the software infringes or 88 | contributes to infringement of any patent, your patent license 89 | for the software granted under these terms ends immediately. If 90 | your company makes such a claim, your patent license ends 91 | immediately for work on behalf of your company. 92 | 93 | ## Violations 94 | 95 | The first time you are notified in writing that you have 96 | violated any of these terms, or done anything with the software 97 | not covered by your licenses, your licenses can nonetheless 98 | continue if you come into full compliance with these terms, 99 | and take practical steps to correct past violations, within 100 | 32 days of receiving notice. Otherwise, all your licenses 101 | end immediately. 102 | 103 | ## No Liability 104 | 105 | ***As far as the law allows, the software comes as is, without 106 | any warranty or condition, and the licensor will not be liable 107 | to you for any damages arising out of these terms or the use 108 | or nature of the software, under any kind of legal claim.*** 109 | 110 | ## Definitions 111 | 112 | The **licensor** is the individual or entity offering these 113 | terms, and the **software** is the software the licensor makes 114 | available under these terms. 115 | 116 | **You** refers to the individual or entity agreeing to these 117 | terms. 118 | 119 | **Your company** is any legal entity, sole proprietorship, 120 | or other kind of organization that you work for, plus all 121 | organizations that have control over, are under the control of, 122 | or are under common control with that organization. **Control** 123 | means ownership of substantially all the assets of an entity, 124 | or the power to direct its management and policies by vote, 125 | contract, or otherwise. Control can be direct or indirect. 126 | 127 | **Your licenses** are all the licenses granted to you for the 128 | software under these terms. 129 | 130 | **Use** means anything you do with the software requiring one 131 | of your licenses. 132 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | VERfile="VERSION.txt" 2 | GBOXfile="GBOX_BASE_NAME.txt" 3 | VER=`cat $(VERfile)` 4 | GBOX=`cat $(GBOXfile)`:$(VER) 5 | export 6 | 7 | docker: 8 | docker build -t $(GBOX) . 9 | 10 | docker-push: 11 | docker push $(GBOX) 12 | 13 | server: 14 | docker run --rm -v `pwd`:/home/rstudio/Asgard -p 8787:8787 -it $(GBOX) 15 | 16 | shell: 17 | docker run --rm -it $(GBOX) /bin/bash 18 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(DrugCombination) 4 | export(DrugScore) 5 | export(GetDrug) 6 | export(GetDrugRef) 7 | export(PrepareReference) 8 | export(SCplasticity) 9 | export(TopCombination) 10 | export(TopDrug) 11 | import(cmapR) 12 | -------------------------------------------------------------------------------- /R/CombinePvalue.R: -------------------------------------------------------------------------------- 1 | 2 | ##Combine P-value 3 | CombineP = function (p){ 4 | keep <- (p > 0) & (p <= 1) 5 | invalid <- sum(1L * keep) < 2 6 | if (invalid) { 7 | warning("Must have at least two valid p values") 8 | res <- list(chisq = NA_real_, df = NA_integer_, p = NA_real_, 9 | validp = p[keep]) 10 | } 11 | else { 12 | lnp <- log(p[keep]) 13 | chisq <- (-2) * sum(lnp) 14 | df <- 2 * length(lnp) 15 | if (length(lnp) != length(p)) { 16 | warning("Some studies omitted") 17 | } 18 | res <- pchisq(chisq,df, lower.tail = FALSE) 19 | } 20 | return(res) 21 | } 22 | 23 | -------------------------------------------------------------------------------- /R/DrugCombination.R: -------------------------------------------------------------------------------- 1 | #' @title Treatment Efficacy of the Drug Combination. 2 | #' @description It evaluates treatment efficacy to identify drug combinations that can best reverse the target genes’ expression in diseased cells in case samples. 3 | #' @details This function evaluates treatment efficacy and ranks drug combinations using therapeutics score, which integrates gene responses to multiple drugs, the proportion of genes, and cells treated by combined drugs. 4 | #' @param SC.integrated A Seurat object of aligned single cells from SCalignment function. 5 | #' @param Gene.data A list of differnential gene expression profiles for every cell type. It's from GetGene function. 6 | #' @param Drug.data A list of mono-drugs for every cell type. It's from GetDrug function. 7 | #' @param Drug.FDR The FDR threshold to select drug. The default value is 0.1. 8 | #' @param FDA.drug.only logical; if TRUE, will only return FDA-approved drugs. 9 | #' @param Combined.drugs The number of drugs in a combination. The default value is 2. 10 | #' @param GSE92742.gctx The gctx file contains drug responses from GSE92742 dataset (https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE92742). 11 | #' @param GSE70138.gctx The gctx file contains drug responses from GSE70138 dataset (https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE70138). 12 | #' @param Case A vector contains names of case samples. 13 | #' @param Tissue Reference tissue. If one used lung_rankMatrix.txt in GetDrugRef function, then the Reference tissue is lung. 14 | #' @return A data frame of drug combinations with therapeutics scores and FDR. 15 | #' @export 16 | #' @import cmapR 17 | 18 | 19 | DrugCombination <- function(SC.integrated=SC.data, 20 | Gene.data=Gene.list, 21 | Drug.data=Drug.ident.res, 22 | Drug.FDR=0.1, 23 | FDA.drug.only=TRUE, 24 | Combined.drugs=2, 25 | GSE92742.gctx=NULL, 26 | GSE70138.gctx=NULL, 27 | Case=NULL, 28 | Tissue="breast" 29 | ){ 30 | ##Cell proportion 31 | cells <- SC.integrated@meta.data 32 | if(length(Case)>0){ 33 | cells <- subset(cells,sample %in% Case) 34 | } 35 | cells <- cells$celltype 36 | cell.count <- table(cells) 37 | cell.count <- cell.count[which(cell.count>3)] 38 | cells.freq <- round(100*cell.count/length(cells),2) 39 | 40 | ##Load drug data 41 | Drug.list <- data.frame() 42 | for(i in names(Drug.data)){ 43 | Cd <- Drug.data[[i]] 44 | Cd <- Cd[!duplicated(Cd$Drug.name),] 45 | #Cd <- subset(Cd, FDR0){ 51 | Cd <- subset(Cd, Drug.name %in% Drugs) 52 | FDRs <- Cd$FDR 53 | Pvalue <- Cd$P.value 54 | temp <- data.frame(Drug=Drugs,Cluster=i,Size=cells.freq[i],P.value=Pvalue,FDR=FDRs,row.names = NULL) 55 | Drug.list <- rbind(Drug.list,temp) 56 | } 57 | } 58 | Drug.list <- unique(Drug.list) 59 | Drug.list$w.size <- Drug.list$Size*(-log10(Drug.list$FDR)) 60 | Drug.list[is.na(Drug.list)] <- 0 61 | Drug.coverage <- tapply(Drug.list$w.size, Drug.list$Drug,sum) 62 | raw.raw.Drug.list <- Drug.list 63 | Drug.list <- subset(Drug.list, FDR0)] 73 | Selected.Drug.combinations.coverage <- label[which(label>0)] 74 | C.Drugs <- unique(as.vector(Selected.Drug.combinations)) 75 | 76 | ##Cell line information 77 | cells <- subset(cell_data,primary_site == Tissue)$cell_id 78 | 79 | ##Load experiment information 80 | data_infor1 <- col_meta_GSE92742[,c("sig_id","pert_iname")] 81 | row.names(data_infor1) <- data_infor1$sig_id 82 | idx <- which(col_meta_GSE92742$cell_id %in% cells & col_meta_GSE92742$pert_iname %in% C.Drugs) 83 | sig_ids <- col_meta_GSE92742$sig_id[idx] 84 | data_infor1 <- data_infor1[sig_ids,] 85 | 86 | ##Load drug response 87 | my_ds <- parse_gctx(GSE92742.gctx, cid=sig_ids) 88 | gene.data <- as.data.frame(my_ds@mat) 89 | gene.data$geneid <- row.names(gene.data) 90 | treatments <- colnames(gene.data) 91 | treatments <- setdiff(treatments,"geneid") 92 | data <- merge(gene.data,gene_meta,by.x="geneid",by.y="pr_gene_id") 93 | data1 <- data[,c("pr_gene_symbol",treatments)] 94 | 95 | ##Load experiment information 96 | data_infor2 <- col_meta_GSE70138[,c("sig_id","pert_iname")] 97 | row.names(data_infor2) <- data_infor2$sig_id 98 | idx <- which(col_meta_GSE70138$cell_id %in% cells & col_meta_GSE70138$pert_iname %in% C.Drugs) 99 | sig_ids <- col_meta_GSE70138$sig_id[idx] 100 | data_infor2 <- data_infor2[sig_ids,] 101 | 102 | ##Load drug response 103 | sig_ids <- col_meta_GSE70138$sig_id[idx] 104 | my_ds <- parse_gctx(GSE70138.gctx, cid=sig_ids) 105 | gene.data <- as.data.frame(my_ds@mat) 106 | gene.data$geneid <- row.names(gene.data) 107 | treatments <- colnames(gene.data) 108 | treatments <- setdiff(treatments,"geneid") 109 | data <- merge(gene.data,gene_meta,by.x="geneid",by.y="pr_gene_id") 110 | data2 <- data[,c("pr_gene_symbol",treatments)] 111 | data <- merge(data1,data2,by="pr_gene_symbol") 112 | row.names(data) <- data[,1] 113 | data <- data[,-1] 114 | data_infor <- rbind(data_infor1,data_infor2) 115 | 116 | ##Combination score 117 | D.genes <- list() 118 | for(i in names(Gene.data)){ 119 | Cd <- Gene.data[[i]] 120 | Cd <- subset(Cd, adj.P.Val<0.05) 121 | D.genes.temp <- list(temp=rownames(Cd)) 122 | D.genes <- cbind(D.genes,D.genes.temp) 123 | } 124 | D.genes <- Reduce(intersect,D.genes) 125 | Gene.expression <- data.frame() 126 | for(i in names(Gene.data)){ 127 | Cd <- Gene.data[[i]] 128 | if(nrow(Gene.expression)==0){ 129 | Gene.expression <- data.frame(Score=Cd[D.genes,"score"]) 130 | }else{ 131 | Gene.expression.temp <- data.frame(Score=Cd[D.genes,"score"]) 132 | Gene.expression <- cbind(Gene.expression,Gene.expression.temp) 133 | } 134 | } 135 | Gene.expression <- as.data.frame(Gene.expression) 136 | Gene.expression <- as.matrix(Gene.expression) 137 | row.names(Gene.expression) <- D.genes 138 | D.gene.expression <- apply(Gene.expression,1,mean) 139 | names(D.gene.expression) <- D.genes 140 | Single.treated.score.list <- NULL 141 | for(Drug in C.Drugs){ 142 | D.genes.treated <- NULL 143 | drug.treatments <- subset(data_infor,pert_iname == Drug)$sig_id 144 | drug.responses <- data[,drug.treatments] 145 | drug.responses.mean <- apply(drug.responses,1,mean) 146 | D.D.genes <- intersect(names(D.gene.expression),names(drug.responses.mean)) 147 | D.genes.treated <- -D.gene.expression[D.D.genes]*drug.responses.mean[D.D.genes] 148 | D.genes.treated <- D.genes.treated[which(D.genes.treated>0)] 149 | D.genes.treated <- D.genes.treated 150 | Mean.treated <- mean(D.genes.treated) 151 | Ratio.treated <- length(D.genes.treated)/length(D.D.genes) 152 | Coverage.treated <- Drug.coverage[Drug]/100 153 | Treated.score <- (Ratio.treated*Coverage.treated) 154 | Single.treated.score.list <- c(Single.treated.score.list,Treated.score) 155 | } 156 | Combination.treated.score <- function(Drugs){ 157 | D.genes.treated<-NULL 158 | for(drug in Drugs){ 159 | drug.treatments <- subset(data_infor,pert_iname == drug)$sig_id 160 | drug.responses <- data[,drug.treatments] 161 | drug.responses.mean <- apply(drug.responses,1,mean) 162 | D.D.genes <- intersect(names(D.gene.expression),names(drug.responses.mean)) 163 | D.genes.treated.temp <- -D.gene.expression[D.D.genes]*drug.responses.mean[D.D.genes] 164 | D.genes.treated <- cbind(D.genes.treated,D.genes.treated.temp) 165 | } 166 | remove <- which(rowSums(D.genes.treated<0)==length(Drugs)) 167 | D.genes.combination <- D.genes.treated[-remove,] 168 | scores <- apply(D.genes.combination,1,mean) 169 | temp.scores <- scores 170 | return(temp.scores) 171 | } 172 | Score.list <- apply(Selected.Drug.combinations, 2, Combination.treated.score) 173 | Combination.treated.ratio <- function(Drugs){ 174 | D.genes.treated<-NULL 175 | for(drug in Drugs){ 176 | drug.treatments <- subset(data_infor,pert_iname == drug)$sig_id 177 | drug.responses <- data[,drug.treatments] 178 | drug.responses.mean <- apply(drug.responses,1,mean) 179 | D.D.genes <- intersect(names(D.gene.expression),names(drug.responses.mean)) 180 | D.genes.treated.temp <- -D.gene.expression[D.D.genes]*drug.responses.mean[D.D.genes] 181 | D.genes.treated <- cbind(D.genes.treated,D.genes.treated.temp) 182 | } 183 | remove <- which(rowSums(D.genes.treated<0)==length(Drugs)) 184 | D.genes.combination <- D.genes.treated[-remove,] 185 | scores <- apply(D.genes.combination,1,mean) 186 | Ratio.treated <- length(which(scores>0))/length(D.D.genes) 187 | temp.scores <- Ratio.treated 188 | return(temp.scores) 189 | } 190 | Ratio.list <- apply(Selected.Drug.combinations, 2, Combination.treated.ratio) 191 | ref.score <- unlist(Score.list) 192 | P.value <- function(Score) { 193 | if(length(Score)>1 && length(ref.score)>1){ 194 | temp <- ks.test(Score, ref.score) 195 | p.value <- temp$p.value 196 | return(p.value) 197 | }else{ 198 | return(1) 199 | } 200 | } 201 | pvalues <- unlist(suppressWarnings(lapply(Score.list, P.value))) 202 | combination.scores <- unlist(suppressWarnings(lapply(Ratio.list,mean))) 203 | Combination.table <- as.data.frame(t(Selected.Drug.combinations)) 204 | for(d in 1:Combined.drugs){ 205 | Combination.table <- cbind(Combination.table, Single.treated.score.list[Combination.table[,d]]) 206 | } 207 | neg.combination.scores <- which(combination.scores<0) 208 | combination.scores[neg.combination.scores] <- -combination.scores[neg.combination.scores] 209 | Combination.table$Combination.therapeutic.score <- (Selected.Drug.combinations.coverage*combination.scores/100) 210 | Combination.table$Combination.therapeutic.score[neg.combination.scores] <- -Combination.table$Combination.therapeutic.score[neg.combination.scores] 211 | Combination.table$P.value <- pvalues 212 | Combination.table$FDR <- p.adjust(pvalues, method = "BH") 213 | colnames(Combination.table)[1:Combined.drugs] <- paste0("Drug",1:Combined.drugs) 214 | colnames(Combination.table)[(Combined.drugs+1):(2*Combined.drugs)] <- paste0("Drug",1:Combined.drugs,".therapeutic.score") 215 | return(Combination.table) 216 | } 217 | -------------------------------------------------------------------------------- /R/DrugScore.R: -------------------------------------------------------------------------------- 1 | #' @title Calculate drug score 2 | #' @description The drug score is a comprehensive estimation of drug therapeutic 3 | #' effects using all or a selected set of clusters. 4 | #' @details This function calculates drug score using cellular proportion of 5 | #' clusters, the significance of reversal in DEGs' expressions, and the ratio of 6 | #' the reversed genes. 7 | #' @param cell_metadata A data.frame of cell metadata. It must have a column 8 | #' named 'cluster' indicating which cluster cells belong, and a column named 9 | #' 'sample' indicating which sample cells belong. 10 | #' @param cluster_degs A list of differential gene expression profiles for 11 | #' each cluster. 12 | #' @param cluster_drugs Drug repurposing result from GetDrug function. 13 | #' @param tissue Reference tissue. If one used 'lung_rankMatrix.txt' in 14 | #' GetDrugRef function, then the Reference tissue is lung. Please use " " 15 | #' instead of "-" in tissue name. For example, while 16 | #' 'haematopoietic-and-lymphoid-tissue' is the prefix of the drug reference 17 | #' files, the corresponding tissue name is "haematopoietic and lymphoid tissue". 18 | #' @param gse70138_gctx_path The gctx file contains drug responses from GSE70138 19 | #' dataset (https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE70138). 20 | #' @param gse92742_gctx_path The gctx file contains drug responses from GSE92742 21 | #' dataset (https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE92742).. 22 | #' @param clusters Select which clusters (cell types) to be used for drug score 23 | #' estimation. By default, it uses all clusters. 24 | #' @param case A vector containing case sample names. 25 | #' @param fda_drugs_only logical; if TRUE, will only return FDA-approved drugs, 26 | #' else, will return all drugs/compounds. 27 | #' @return A data frame of drug score, P-value and FDR. 28 | #' @export 29 | #' @import cmapR 30 | DrugScore <- function(cell_metadata, cluster_degs, cluster_drugs, tissue, 31 | gse70138_gctx_path, gse92742_gctx_path, 32 | clusters = NULL, case = NULL, fda_drugs_only = TRUE) { 33 | 34 | # Subset input data to the set of clusters we are interested in 35 | if (length(clusters) > 0) { 36 | clusters = intersect(clusters, unique(cell_metada$cluster)) 37 | cell_metadata = subset(cell_metadata, cluster %in% clusters) 38 | cluster_drugs = cluster_drugs[clusters] 39 | cluster_degs = cluster_degs[clusters] 40 | } 41 | 42 | # Calculate cluster proportions in diseased tissue 43 | if (length(case) > 0) { 44 | cell_metadata <- subset(cell_metadata, sample %in% case) 45 | } 46 | clustering <- cell_metadata$cluster 47 | cluster_sizes <- table(clustering) 48 | cluster_sizes <- cluster_sizes[which(cluster_sizes > 3)] 49 | cluster_prop <- round(100*cluster_sizes/nrow(cell_metadata), 2) 50 | 51 | # Combine cluster drugs into a single data frame 52 | drug_list <- data.frame() 53 | for (i in names(cluster_drugs)) { 54 | ith_cluster_drugs <- cluster_drugs[[i]] 55 | drug_names <- ith_cluster_drugs$Drug.name 56 | ith_cluster_drugs <- ith_cluster_drugs[!duplicated(drug_names), ] 57 | 58 | # Subset to FDA drugs 59 | if (fda_drugs_only) { 60 | drug_names <- intersect(drug_names, FDA.drug) 61 | } 62 | 63 | if (length(drug_names)>0) { 64 | ith_cluster_drugs <- subset(ith_cluster_drugs, Drug.name %in% drug_names) 65 | fdrs <- ith_cluster_drugs$FDR 66 | p_values <- ith_cluster_drugs$P.value 67 | 68 | temp <- data.frame( 69 | drug = drug_names, 70 | cluster = i, 71 | cluster_prop = cluster_prop[i], 72 | p_value = p_values, 73 | fdr = fdrs, 74 | row.names = NULL 75 | ) 76 | drug_list <- rbind(drug_list, temp) 77 | } 78 | } 79 | drug_list <- unique(drug_list) 80 | drug_list$weighted_prop <- drug_list$cluster_prop*(-log10(drug_list$fdr)) 81 | drug_list[is.na(drug_list)] <- 0 82 | 83 | drug_coverage <- tapply(drug_list$weighted_prop, drug_list$drug, sum) 84 | drugs <- rownames(drug_coverage) 85 | 86 | # Combine cluster spesific p-values of drugs 87 | if(length(unique(names(cluster_drugs)))>1){ 88 | combined_p_values <- tapply(drug_list$p_value, drug_list$drug, CombineP) 89 | }else{ 90 | combined_p_values <- drug_list$p_value 91 | names(combined_p_values) <- drug_list$drug 92 | } 93 | 94 | # Cell line information 95 | cell_lines <- subset(cell_data, primary_site == tissue)$cell_id 96 | 97 | # Load drugs metadata for GSE92742 and subset it to tissue of interest and 98 | # drugs of interest 99 | drug_metadata_92742 <- col_meta_GSE92742[, c("sig_id", "pert_iname")] 100 | row.names(drug_metadata_92742) <- drug_metadata_92742$sig_id 101 | idx <- which(col_meta_GSE92742$cell_id %in% cell_lines & 102 | col_meta_GSE92742$pert_iname %in% drugs) 103 | sig_ids <- col_meta_GSE92742$sig_id[idx] 104 | drug_metadata_92742 <- drug_metadata_92742[sig_ids, ] 105 | 106 | # Load drug response for GSE92742 107 | exprs <- as.data.frame(parse_gctx(gse92742_gctx_path, cid=sig_ids)@mat) 108 | treatments <- colnames(exprs) 109 | exprs$gene_id <- row.names(exprs) 110 | tmp <- merge(exprs, gene_meta, by.x="gene_id", by.y="pr_gene_id") 111 | drug_responses_92742 <- tmp[, c("pr_gene_symbol", treatments)] 112 | 113 | # Load drugs metadata for GSE70138 and subset it to tissue of interest and 114 | # drugs of interest 115 | drug_metadata_70138 <- col_meta_GSE70138[, c("sig_id", "pert_iname")] 116 | row.names(drug_metadata_70138) <- drug_metadata_70138$sig_id 117 | idx <- which(col_meta_GSE70138$cell_id %in% cell_lines & 118 | col_meta_GSE70138$pert_iname %in% drugs) 119 | sig_ids <- col_meta_GSE70138$sig_id[idx] 120 | drug_metadata_70138 <- drug_metadata_70138[sig_ids, ] 121 | 122 | # Load drug response for GSE70138 123 | exprs <- as.data.frame(parse_gctx(gse70138_gctx_path, cid=sig_ids)@mat) 124 | treatments <- colnames(exprs) 125 | exprs$gene_id <- row.names(exprs) 126 | tmp <- merge(exprs, gene_meta, by.x="gene_id", by.y="pr_gene_id") 127 | drug_responses_70138 <- tmp[, c("pr_gene_symbol", treatments)] 128 | 129 | drug_responses <- merge(drug_responses_92742, drug_responses_70138, 130 | by="pr_gene_symbol") 131 | row.names(drug_responses) <- drug_responses[, 1] 132 | drug_responses <- drug_responses[, -1] 133 | drug_metadata <- rbind(drug_metadata_92742, drug_metadata_70138) 134 | 135 | # Find DEGs that are common to all clusters 136 | common_degs <- list() 137 | for (i in names(cluster_degs)) { 138 | ith_cluster_degs <- cluster_degs[[i]] 139 | ith_cluster_degs <- subset(ith_cluster_degs, adj.P.Val < 0.05) 140 | if (length(ith_cluster_degs) > 0) { 141 | common_degs[[i]] <- rownames(ith_cluster_degs) 142 | } 143 | } 144 | common_degs <- Reduce(intersect, common_degs) 145 | 146 | # Combine cluster specific DEG scores into a matrix 147 | deg_scores <- data.frame() 148 | for (i in names(cluster_degs)) { 149 | ith_cluster_degs <- cluster_degs[[i]] 150 | if (nrow(deg_scores) == 0) { 151 | deg_scores <- data.frame(score = ith_cluster_degs[common_degs, "score"]) 152 | } else { 153 | tmp <- data.frame(score = ith_cluster_degs[common_degs,"score"]) 154 | deg_scores <- cbind(deg_scores, tmp) 155 | } 156 | } 157 | deg_scores <- as.matrix(deg_scores) 158 | row.names(deg_scores) <- common_degs 159 | 160 | deg_scores_mean <- apply(deg_scores, 1, mean) 161 | names(deg_scores_mean) <- common_degs 162 | 163 | # Calculate drug score 164 | drug_scores <- list() 165 | for (drug in drugs) { 166 | # Get response from CMap 167 | treatments <- subset(drug_metadata, pert_iname == drug)$sig_id 168 | if (length(treatments) > 1) { 169 | curr_drug_response <- drug_responses[, treatments] 170 | mean_response <- apply(curr_drug_response, 1, mean) 171 | } else { 172 | curr_drug_response <- drug_responses[, treatments] 173 | mean_response <- curr_drug_response 174 | } 175 | 176 | drug_stats <- drug_list[drug_list$drug == drug, ] 177 | drug_score <- 0 178 | for (i in names(cluster_degs)) { 179 | cluster_prop <- drug_stats[drug_stats$cluster == i, "cluster_prop"] 180 | fdr <- drug_stats[drug_stats$cluster == i, "fdr"] 181 | p_value <- drug_stats[drug_stats$cluster == i, "p_value"] 182 | 183 | ith_cluster_degs <- cluster_degs[[i]] 184 | ith_cluster_degs <- subset(ith_cluster_degs, adj.P.Val < 0.05) 185 | 186 | treatable_degs <- intersect(row.names(ith_cluster_degs), names(mean_response)) 187 | if (length(treatable_degs > 0)) { 188 | deg_scores <- ith_cluster_degs[treatable_degs, "score"] 189 | 190 | treated_degs <- -deg_scores*mean_response[treatable_degs] 191 | treated_degs <- treated_degs[which(treated_degs > 0)] 192 | 193 | treated_degs_ratio <- length(treated_degs)/length(treatable_degs) 194 | drug_score <- drug_score + 195 | (cluster_prop/100)*(-log10(fdr))*treated_degs_ratio 196 | } 197 | } 198 | 199 | drug_scores[[drug]] <- drug_score 200 | } 201 | drug_scores <- t(as.data.frame(drug_scores)) 202 | 203 | out <- data.frame( 204 | Drug.therapeutic.score = drug_scores, 205 | P.value = combined_p_values[drugs], 206 | FDR = p.adjust(combined_p_values[drugs], method = "BH") 207 | ) 208 | return(out) 209 | 210 | } 211 | -------------------------------------------------------------------------------- /R/GetDrug.R: -------------------------------------------------------------------------------- 1 | #' @title Mono-drug Repurposing. 2 | #' @description It identify mono-drug therapy for every cell type. 3 | #' @details This function allows user to use the differential expression data of every case cell type to query against reference drug response profiles.This function is a reverised version of drug.identification from DrInsight package. 4 | #' @param drug.ref.profiles A list contains tissue specific drug reference Profiles from GetDrugRef function. 5 | #' @param repurposing.unit The parameter of either "treatment" or "drug", which indicates if user want the function to test drug repurposing p value at treatment level or drug level. The default is "treatment", which treats the drug data from different cell lines separately. 6 | #' @param CEG.threshold The p value threshold to select the consistently differential expressed genes (CEGs). The default value is 0.05. 7 | #' @param connectivity The type of connectivity, either "negative" or "positive". Negative connectivity is used when the query data is the differential scores from disease data, and it will repurpose drugs that can potentially reverse the query disease phenotype. Positive connectivity is used when the query data is from a drug profile, and it will return the drugs that are similar to the query drug. The default value is "negative". 8 | #' @param drug.type The parameter of either "FDA" or "compounds" or "all", which indicates if user want the function to identify FDA-approved drugs or compounds or both, respectively.The default value is "FDA". 9 | #' @return A list of mono-drugs for every cell type. 10 | #' @export 11 | 12 | 13 | GetDrug = function(gene.data = NULL, 14 | drug.ref.profiles = NULL, 15 | repurposing.unit = "drug", 16 | CEG.threshold = 0.05, 17 | connectivity = "negative", 18 | drug.type="FDA"){ 19 | if(drug.type=="FDA"){ 20 | Drug.info <- drug.ref.profiles$drug.info 21 | Drug.info$temp_name <- gsub("_.*","",Drug.info$cmap_name) 22 | Drug.info <- subset(Drug.info, temp_name %in% FDA.drug) 23 | Drug.info <- Drug.info[,colnames(drug.ref.profiles$drug.info)] 24 | drug.ref.profiles$drug.rank.matrix <- drug.ref.profiles$drug.rank.matrix[,Drug.info$instance_id] 25 | drug.ref.profiles$drug.info <- Drug.info 26 | }else if(drug.type=="compounds"){ 27 | Drug.info <- drug.ref.profiles$drug.info 28 | Drug.info$temp_name <- gsub("_.*","",Drug.info$cmap_name) 29 | Drug.info <- subset(Drug.info, !(temp_name %in% FDA.drug)) 30 | Drug.info <- Drug.info[,colnames(drug.ref.profiles$drug.info)] 31 | drug.ref.profiles$drug.rank.matrix <- drug.ref.profiles$drug.rank.matrix[,Drug.info$instance_id] 32 | drug.ref.profiles$drug.info <- Drug.info 33 | } 34 | res.list <- list() 35 | for(ci in 1:length(names(gene.data))){ 36 | query.data <- data.frame(geneSymbol=row.names(gene.data[[ci]]),score=gene.data[[ci]]$score) 37 | cmap.drug.rank = drug.ref.profiles$drug.rank.matrix 38 | e1 = simpleError("Did not find the column named 'geneSymbol' in query data that contains the gene symbols in it.") 39 | e2 = simpleError("Did not find the column named 'score' in query data that contains the test statistics or any values that you would like to rank the genes.") 40 | 41 | cat("\n") 42 | cat("\n") 43 | message("Data preprocessing ...\n") 44 | cat("\n") 45 | if("score" %in% colnames(query.data)){ 46 | if("geneSymbol" %in% colnames(query.data)){ 47 | tmp = data_preprocess(query.data, cmap.drug.rank,connectivity = connectivity) 48 | query.data = tmp[[1]] 49 | cmap.drug.rank = tmp[[2]] 50 | rm(tmp) 51 | } else{ 52 | stop(e1) 53 | } 54 | } else{ 55 | stop(e2) 56 | } 57 | 58 | message("Identifying drug instance CEGs...\n") 59 | cat("\n") 60 | p_min = get_gene_pval('min',cmap.drug.rank,query.data) 61 | p_max = get_gene_pval('max',cmap.drug.rank,query.data) 62 | 63 | ##Select the smallest p value (between 2 p values) as the p value of the gene 64 | p_score = pmin(p_min,p_max) 65 | z_score = qnorm(p_score,lower.tail = F) 66 | CEG.pvals = get_CEGs(p_min, p_max, z_score,threshold = CEG.threshold) 67 | 68 | message("Calculating drug connectivity p values ...\n") 69 | cat("\n") 70 | drug.info = drug.ref.profiles$drug.info 71 | if(repurposing.unit == "drug"){ 72 | drug.info$drug = drug.info$cmap_name 73 | } else if(repurposing.unit == "treatment"){ 74 | drug.info$drug = drug.info$treatment 75 | } else{ 76 | stop(simpleError("Please set the repurposing unit to either 'drug' or 'treatment'.")) 77 | } 78 | 79 | drug.pvals = get_drug_pval(CEGsum = CEG.pvals$CEG.sumz.scores,drug.info = drug.info) 80 | 81 | drug.pvals = drug.pvals[order(drug.pvals$pval),] 82 | drugs = rownames(drug.pvals) 83 | drug.pvals$Drug.name = gsub("_BRD-.*","",drugs) 84 | drug.pvals$Drug.id = gsub(".*_","",drugs) 85 | rownames(drug.pvals) = NULL 86 | drug.pvals = drug.pvals[,c(3,4,1)] 87 | drug.pvals$FDR = p.adjust(drug.pvals$pval,method = "fdr") 88 | colnames(drug.pvals)[3] = "P.value" 89 | 90 | res = list(drug.pvals,drug.info,CEG.pvals$CEG.pvals) 91 | names(res) = c("drug.pvals","drug.info","CEG.pvals") 92 | res.list[[ci]] <- drug.pvals 93 | } 94 | names(res.list) <- names(gene.data) 95 | return(res.list) 96 | } 97 | -------------------------------------------------------------------------------- /R/GetDrugRef.R: -------------------------------------------------------------------------------- 1 | #' @title Load and Process Drug Reference Profiles. 2 | #' @description This function allows user to load in the tissue specific drug rank matrix. 3 | #' @details This function is a reverised version of get.cmap.ref from DrInsight package. The tissue specific drug rank matrix is tranformed from L1000data (GEO: GSE92742 and GSE70138) using PrepareReference function. 4 | #' @param drug.response.path The local path and the name of the tissue specific drug rank matrix. 5 | #' @param probe.to.genes A data.frame contains gene IDs (the IDs used in drug rank matrix) and official gene symbol. This files was automately generated with drug rank matrix. 6 | #' @param drug.info A data.frame contains drug information. This file was automately generated with drug rank matrix. 7 | #' @export 8 | 9 | 10 | GetDrugRef = function(drug.response.path = NULL, probe.to.genes = NULL, drug.info = NULL){ 11 | cat("\n") 12 | cat("\n") 13 | message("Loading CMap drug matrix. This may take some time ... \n") 14 | cmap.drug.rank = read.table(drug.response.path,row.names = 1, header = T, check.names = FALSE) 15 | cmap.drug.rank = cmap.drug.rank[probe.to.genes$ID,] 16 | rownames(cmap.drug.rank) = probe.to.genes$Gene.Symbol 17 | cmap.ref.profiles = list(drug.info = drug.info, drug.rank.matrix = cmap.drug.rank) 18 | return(cmap.ref.profiles) 19 | } 20 | 21 | -------------------------------------------------------------------------------- /R/PrepareReference.R: -------------------------------------------------------------------------------- 1 | #' @title Prepare Drug Reference. 2 | #' @description Prepare tissue specific drug reference Profiles from L1000 drug response data. 3 | #' @details This function converts L1000 data to the tissue specific drug rank matrix. 4 | #' @param cell.info The local path and the name of the cell.info text file. It's downloaded from https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_cell_info_2017-04-28.txt.gz . 5 | #' @param gene.info The local path and the name of the gene.info text file. It's downloaded from https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_gene_info_2017-03-06.txt.gz . 6 | #' @param GSE70138.sig.info The local path and the name of the cell.info text file. It's downloaded from https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_sig_info_2017-03-06.txt.gz . 7 | #' @param GSE92742.sig.info The local path and the name of the cell.info text file. It's downloaded from https://ftp.ncbi.nlm.nih.gov/geo/series/GSE92nnn/GSE92742/suppl/GSE92742_Broad_LINCS_sig_info.txt.gz . 8 | #' @param GSE70138.gctx The local path and the name of the cell.info text file. It's downloaded from https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_Level5_COMPZ_n118050x12328_2017-03-06.gctx.gz . 9 | #' @param GSE92742.gctx The local path and the name of the cell.info text file. It's downloaded from https://ftp.ncbi.nlm.nih.gov/geo/series/GSE92nnn/GSE92742/suppl/GSE92742_Broad_LINCS_Level5_COMPZ.MODZ_n473647x12328.gctx.gz . 10 | #' @param Output.Dir The output directory for the generated files. 11 | #' @export 12 | #' @import cmapR 13 | 14 | PrepareReference <- function(cell.info = NULL, 15 | gene.info = NULL, 16 | GSE70138.sig.info = NULL, 17 | GSE92742.sig.info = NULL, 18 | GSE70138.gctx = NULL, 19 | GSE92742.gctx = NULL, 20 | Output.Dir = "./"){ 21 | cell_data<-read.table(file=cell.info,sep="\t",header = T,quote = "") 22 | tissues<-unique(as.character(cell_data$primary_site)) 23 | tissues<-tissues[which(tissues!="-666")] 24 | for (tissue in tissues){ 25 | print(tissue) 26 | cell_data<-read.table(file=cell.info,sep="\t",header = T,quote = "") 27 | cell_ids<-which(cell_data$primary_site == tissue) 28 | cell_names <- cell_data$cell_id[cell_ids] 29 | ds_path <- GSE70138.gctx 30 | col_meta_path <- GSE70138.sig.info 31 | col_meta <- read.delim(col_meta_path, sep="\t", stringsAsFactors=F) 32 | if(tissue == "breast"){ 33 | idx <- which(col_meta$cell_id %in% cell_names & col_meta$pert_type == "trt_cp" & col_meta$pert_id!="BRD-K18910433") 34 | }else{ 35 | idx <- which(col_meta$cell_id %in% cell_names & col_meta$pert_type == "trt_cp") 36 | } 37 | sig_ids <- col_meta$sig_id[idx] 38 | rm.ids <- grep('REP\\.',sig_ids) 39 | if(length(rm.ids)>0){ 40 | sig_ids <- sig_ids[-rm.ids] 41 | } 42 | length1<-length(sig_ids) 43 | if(length1 > 0){ 44 | my_ds <- parse_gctx(ds_path, cid=sig_ids) 45 | myrank <- function(x){ 46 | temp<-rank(-x,ties.method ="min") 47 | return(temp) 48 | } 49 | rank_matrix1<-apply(my_ds@mat,2,myrank) 50 | rank_matrix1<-as.data.frame(rank_matrix1) 51 | } 52 | cell_data<-read.table(file=cell.info,sep="\t",header = T,quote = "") 53 | cell_ids<-which(cell_data$primary_site == tissue) 54 | cell_names <- cell_data$cell_id[cell_ids] 55 | ds_path <- GSE92742.gctx 56 | col_meta_path <- GSE92742.sig.info 57 | col_meta <- read.delim(col_meta_path, sep="\t", stringsAsFactors=F) 58 | if(tissue == "breast"){ 59 | idx <- which(col_meta$cell_id %in% cell_names & col_meta$pert_type == "trt_cp" & col_meta$pert_id!="BRD-K18910433") 60 | }else{ 61 | idx <- which(col_meta$cell_id %in% cell_names & col_meta$pert_type == "trt_cp") 62 | } 63 | sig_ids <- col_meta$sig_id[idx] 64 | rm.ids <- grep('REP\\.',sig_ids) 65 | if(length(rm.ids)>0){ 66 | sig_ids <- sig_ids[-rm.ids] 67 | } 68 | length2<-length(sig_ids) 69 | if(length2 > 0){ 70 | my_ds <- parse_gctx(ds_path, cid=sig_ids) 71 | myrank <- function(x){ 72 | temp<-rank(-x,ties.method ="min") 73 | return(temp) 74 | } 75 | rank_matrix2<-apply(my_ds@mat,2,myrank) 76 | rank_matrix2<-as.data.frame(rank_matrix2) 77 | } 78 | 79 | if(length1 > 0 & length2 > 0){ 80 | rank_matrix<-cbind(rank_matrix1,rank_matrix2) 81 | }else if(length1 > 0 & length2 == 0){ 82 | rank_matrix<-rank_matrix1 83 | }else if(length1 == 0 & length2 > 0){ 84 | rank_matrix<-rank_matrix2 85 | } 86 | if(length1 > 0 | length2 > 0){ 87 | colnames(rank_matrix)<-gsub(":","_",colnames(rank_matrix)) 88 | cnames<-colnames(rank_matrix) 89 | colnames(rank_matrix)<-1:length(cnames) 90 | dcnames<-colnames(rank_matrix) 91 | rank_matrix$probe_id<-row.names(rank_matrix) 92 | rank_matrix <- rank_matrix[,c('probe_id', dcnames)] 93 | filename<-paste(Output.Dir,gsub(" ","-",tissue),"_rankMatrix.txt",sep = "") 94 | write.table(rank_matrix,file=filename,quote=FALSE,row.names = FALSE,sep = "\t") 95 | 96 | gene_data<-read.table(file=gene.info,sep="\t",header = T,quote = "") 97 | my_gene_info<-gene_data[,1:2] 98 | colnames(my_gene_info)<-c("ID","Gene.Symbol") 99 | filename<-paste(Output.Dir,gsub(" ","-",tissue),"_gene_info.txt",sep = "") 100 | write.table(my_gene_info,file=filename,quote=FALSE,row.names = FALSE,sep = "\t") 101 | 102 | sig_data<-read.table(file=GSE70138.sig.info,sep="\t",header = T,quote = "") 103 | sig_data$sig_id<-gsub(":","_",sig_data$sig_id) 104 | my_drug_info<-data.frame(instance_id=sig_data$sig_id,cmap_name=paste(sig_data$pert_iname,sig_data$pert_id,sep="_"),concentration..M=sig_data$pert_idose,duration..h=sig_data$pert_itime,cell2=sig_data$cell_id,catalog_name=sig_data$pert_id,treatment=paste(sig_data$pert_iname,"_",sig_data$sig_id,sep = "")) 105 | my_drug_info1<-subset(my_drug_info,instance_id %in% cnames) 106 | 107 | sig_data<-read.table(file=GSE92742.sig.info,sep="\t",header = T,quote = "") 108 | sig_data$sig_id<-gsub(":","_",sig_data$sig_id) 109 | my_drug_info<-data.frame(instance_id=sig_data$sig_id,cmap_name=paste(sig_data$pert_iname,sig_data$pert_id,sep="_"),concentration..M=sig_data$pert_idose,duration..h=sig_data$pert_itime,cell2=sig_data$cell_id,catalog_name=sig_data$pert_id,treatment=paste(sig_data$pert_iname,"_",sig_data$sig_id,sep = "")) 110 | my_drug_info2<-subset(my_drug_info,instance_id %in% cnames) 111 | 112 | my_drug_info<-rbind(my_drug_info1,my_drug_info2) 113 | my_drug_info$instance_id<-1:length(my_drug_info$instance_id) 114 | filename<-paste(Output.Dir,gsub(" ","-",tissue),"_drug_info.txt",sep = "") 115 | write.table(my_drug_info,file=filename,quote=FALSE,row.names = FALSE,sep = "\t") 116 | } 117 | 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /R/SCplasticity.R: -------------------------------------------------------------------------------- 1 | #' @title Sinlge-cell Plasticity. 2 | #' @description It determines the plasticity of each cell type. 3 | #' @details This function estimate the entropy of every cell in the case samples. For each cell type, it use the median entropy value as the plasticity of each cell type. 4 | #' @param SC.integrated A Seurat object of aligned single cells from SCalignment function. 5 | #' @param Case A vector contains names of case samples. 6 | #' @return A data frame of plasticity, normailized plasticity and cell type coverage. 7 | #' @export 8 | 9 | SCplasticity <- function (SC.integrated = SC.data, Case=NULL) 10 | { 11 | if(length(Case)>0){ 12 | SC.integrated <- subset(SC.integrated, sample %in% Case) 13 | }else{ 14 | SC.integrated <- SC.integrated 15 | } 16 | SC.meta <- SC.integrated@meta.data 17 | expr.data <- as.matrix(SC.integrated@assays$RNA@counts) 18 | 19 | #Entorpy-based Plasticity 20 | probs <- t(t(expr.data)/apply(expr.data,2,sum)) 21 | probs[is.na(probs)] <- 0 22 | log.probs <- log(probs) 23 | log.probs[which(is.infinite(log.probs))] <- 0 24 | SC.meta$cell.entropy <- -apply(probs*log.probs/log(nrow(expr.data)),2,sum) 25 | SC.entropy <- tapply(SC.meta$cell.entropy, SC.meta$celltype, median) 26 | SC.entropy <- data.frame(Cell.Type=row.names(SC.entropy),Plasticity=SC.entropy) 27 | rm(expr.data) 28 | rm(log.probs) 29 | rm(probs) 30 | 31 | #Normalize Plasticity 32 | SC.entropy$Normalized.Plasticity=(SC.entropy$Plasticity-min(SC.entropy$Plasticity))/(max(SC.entropy$Plasticity)-min(SC.entropy$Plasticity)) 33 | 34 | #Population Size 35 | Cluster.cell.rate <- table(SC.meta$celltype)/nrow(SC.meta) 36 | SC.entropy$Coverage <- 100*Cluster.cell.rate[row.names(SC.entropy)] 37 | 38 | return(SC.entropy) 39 | } 40 | 41 | -------------------------------------------------------------------------------- /R/TopCombination.R: -------------------------------------------------------------------------------- 1 | #' @title Combination Drug Selection. 2 | #' @description Select drug combinations by combination therapeutic score and FDR of combination therapeutic score. 3 | #' @details Input raw drug combination result and return the top drug combinations. 4 | #' @param Drug.combination raw drug combination result from DrugCombination function. 5 | #' @param Combination.FDR The FDR threshold to select drug combination. The default value is 0.1. 6 | #' @param Min.combination.score The Combination therapeutic score threshold to select drug combination. The default value is 1. 7 | #' @return A data frame of selected drug combinations. 8 | #' @export 9 | 10 | 11 | TopCombination <- function(Drug.combination=Drug.combinations, 12 | Combination.FDR=0.1, 13 | Min.combination.score=1 14 | ){ 15 | Drug.combination <- subset(Drug.combination, Combination.therapeutic.score > Min.combination.score & FDR < Combination.FDR) 16 | Drug.combination <- Drug.combination[order(Drug.combination$Combination.therapeutic.score, decreasing = T),] 17 | return(Drug.combination) 18 | } 19 | -------------------------------------------------------------------------------- /R/TopDrug.R: -------------------------------------------------------------------------------- 1 | #' @title Single Drug Selection for Individual Clusters. 2 | #' @description Select single drugs for every cell population by FDR and drug type, and summarize cell coverage for selected drugs. 3 | #' @details Input raw drug repurosing result and return the top drugs with summary of cell coverage. 4 | #' @param SC.integrated A Seurat object of aligned single cells. 5 | #' @param Drug.data Drug repurosing result from GetDrug function. 6 | #' @param Drug.FDR The FDR threshold to select drug. The default value is 0.1. 7 | #' @param FDA.drug.only logical; if TRUE, will only return FDA-approved drugs. 8 | #' @param Case An vector of case (diseased) samples.Only case sammples are involved in the calculation of coverage. 9 | #' @return A data frame of selected drugs with summary of cell coverage. 10 | #' @export 11 | 12 | 13 | TopDrug <- function(SC.integrated = SC.data, 14 | Drug.data = Drug.ident.res, 15 | Drug.FDR = 0.1, 16 | FDA.drug.only = TRUE, 17 | Case = NULL){ 18 | 19 | ##Cell proportion 20 | cells <- SC.integrated@meta.data 21 | if(length(Case)>0){ 22 | cells <- subset(cells,sample %in% Case) 23 | } 24 | cells <- cells$celltype 25 | cell.count <- table(cells) 26 | cell.count <- cell.count[which(cell.count>3)] 27 | cells.freq <- round(100*cell.count/length(cells),2) 28 | 29 | ##Load drug data 30 | Drug.list <- data.frame() 31 | for(i in names(Drug.data)){ 32 | Cd <- Drug.data[[i]] 33 | Cd <- subset(Cd, FDR0){ 39 | Cd <- subset(Cd, Drug.name %in% Drugs) 40 | temp <- data.frame(Drug=Cd$Drug.name,Cell.type=i,Cell.type.coverage=cells.freq[i],FDR=Cd$FDR,row.names = NULL) 41 | Drug.list <- rbind(Drug.list,temp) 42 | } 43 | } 44 | Drug.list <- Drug.list[order(Drug.list$FDR, decreasing = F),] 45 | Drug.list <- Drug.list[!duplicated(Drug.list),] 46 | Drug.coverage <- tapply(Drug.list$Cell.type.coverage, Drug.list$Drug,sum) 47 | temp.coverage <- Drug.coverage[Drug.list$Drug] 48 | Drug.list$Drug.coverage <- temp.coverage 49 | Drug.list <- Drug.list[,c(1:3,5,4)] 50 | Drug.list <- Drug.list[order(Drug.list$Drug.coverage, decreasing = T),] 51 | return(Drug.list) 52 | } 53 | -------------------------------------------------------------------------------- /R/data_preprocess.R: -------------------------------------------------------------------------------- 1 | 2 | ##Match common genes between users query data and cmap 3 | data_preprocess = function(query.data,cmap.drug.rank, connectivity){ 4 | common.genes = intersect(query.data$geneSymbol,rownames(cmap.drug.rank)) 5 | rownames(query.data) = query.data$geneSymbol 6 | query.data = query.data[common.genes,] 7 | if(connectivity == "negative"){ 8 | ##Rank query data gene statistic scores from smallest to largest, opposite to cmap gene rank 9 | query.data$geneRank = rank(query.data$score,ties.method = "first") 10 | } else if(connectivity == "positive"){ 11 | ##Rank query data gene statistic scores from largest to smallest, same with cmap gene rank 12 | query.data$geneRank = rank(-(query.data$score),ties.method = "first") 13 | } 14 | 15 | cmap.drug.rank = cmap.drug.rank[common.genes,] 16 | ##Re-rank drug rank matrix after excluding uncommon genes 17 | for(i in 1:ncol(cmap.drug.rank)){ 18 | cmap.drug.rank[,i] = rank(cmap.drug.rank[,i]) 19 | } 20 | return(list(query.data,cmap.drug.rank)) 21 | } 22 | 23 | -------------------------------------------------------------------------------- /R/get_CEGs.R: -------------------------------------------------------------------------------- 1 | 2 | ##Get CEGs and CEG's sumz scores 3 | get_CEGs = function(p_min, p_max, z_score,threshold){ 4 | CEG.pvals = list() 5 | CEG.pvals$down = p_min 6 | CEG.pvals$up = p_max 7 | CEGz= numeric(ncol(z_score)) 8 | for(i in 1:ncol(z_score)){ 9 | CEGz[i] = sum(z_score[which(z_score[,i] >= qnorm(threshold,lower.tail = F)),i]) 10 | } 11 | names(CEGz) = colnames(z_score) 12 | res = list(CEGz, CEG.pvals) 13 | names(res) = c("CEG.sumz.scores","CEG.pvals") 14 | return(res) 15 | } 16 | 17 | -------------------------------------------------------------------------------- /R/get_drug_pval.R: -------------------------------------------------------------------------------- 1 | 2 | ##Get drug p value based on CEGscore 3 | get_drug_pval = function(CEGsum,drug.info){ 4 | treat_drug_ks = matrix(0,ncol=1,nrow = length(unique(drug.info$drug))) 5 | treat_drug_ks = as.data.frame(treat_drug_ks) 6 | rownames(treat_drug_ks) = unique(drug.info$drug) 7 | colnames(treat_drug_ks) = 'pval' 8 | 9 | for(i in 1:nrow(treat_drug_ks)){ 10 | indiv_drug = drug.info[(drug.info$drug == rownames(treat_drug_ks)[i]),]$instance_id 11 | indiv_drug <- as.character(indiv_drug) 12 | indiv_drug_score = CEGsum[indiv_drug] 13 | rest_score = CEGsum[setdiff(names(CEGsum),indiv_drug)] 14 | 15 | ##k-s test: one drug drug.info v.s. other drug.info 16 | options(warn = -1) 17 | treat_drug_ks$pval[i] = (ks.test(indiv_drug_score,rest_score,alternative = 'less'))$p.value 18 | } 19 | treat_drug_ks$drug = sapply(rownames(treat_drug_ks),function(x){strsplit(x,split="_")[[1]][1]}) 20 | 21 | return(treat_drug_ks) 22 | } 23 | 24 | -------------------------------------------------------------------------------- /R/get_gene_pval.R: -------------------------------------------------------------------------------- 1 | 2 | ##Beta test for gene p values 3 | get_gene_pval = function(order, cmap.drug.rank,query.data){ 4 | geneRank = as.matrix(replicate(ncol(cmap.drug.rank),query.data$geneRank)) 5 | ##Keep min(x,y) to find the bottom ranked genes 6 | if (order == 'min'){ 7 | order_stat = pmin(as.matrix(cmap.drug.rank),geneRank) 8 | p_val = 1 - pbeta((order_stat-1)/nrow(order_stat),1,2,lower.tail = T) 9 | } 10 | else if(order == 'max'){ 11 | order_stat = pmax(as.matrix(cmap.drug.rank),geneRank) 12 | p_val = pbeta(order_stat/nrow(order_stat),2,1,lower.tail = T) 13 | } 14 | return(p_val) 15 | } 16 | 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # **Asgard: A Single-cell Guided pipeline to Aid Repurposing of Drugs** 2 | 3 | Using scRNA-seq data, Asgard repurposes drugs and predicts personalized drug 4 | combinations to address the cellular heterogeneity of patients. 5 | 6 | ![image](asgard_pipeline.png) 7 | 8 | ### **Citation** 9 | 10 | > He, B., Xiao, Y., Liang, H. et al. ASGARD is A Single-cell Guided Pipeline to 11 | Aid Repurposing of Drugs. *Nat Commun* 14, 993 (2023). 12 | https://doi.org/10.1038/s41467-023-36637-3 13 | 14 | ## **System Requirements** 15 | 16 | ### **Hardware requirements** 17 | 18 | Asgard package requires only a standard computer with enough RAM (>64GB) to 19 | support the in-memory operations. 20 | 21 | ### **Software requirements** 22 | 23 | The package has been tested on the following systems: 24 | ``` 25 | Windows 10 26 | CentOS Linux 7 27 | ``` 28 | 29 | Required R packages: 30 | ``` 31 | Seurat 32 | limma 33 | cmapR 34 | SingleR 35 | celldex 36 | ``` 37 | ## Installation 38 | #### Install devtools if you don't have it 39 | ``` 40 | install.packages('devtools') 41 | ``` 42 | #### Install recommended packages 43 | ``` 44 | if (!requireNamespace("BiocManager", quietly = TRUE)) 45 | install.packages("BiocManager") 46 | 47 | BiocManager::install(c("SingleR","limma","cmapR","celldex")) 48 | 49 | install.packages('Seurat') 50 | 51 | #If you can't install a package with above commands, try to download the gz file and install it locally. 52 | 53 | #Take celldex package as an example: 54 | 55 | #Downlaod the source package of celldex in linux 56 | wget https://bioconductor.org/packages/release/data/experiment/src/contrib/celldex_1.0.0.tar.gz 57 | 58 | #Start R 59 | R 60 | 61 | #Install celldex from the local source package 62 | install.packages('celldex_1.0.0.tar.gz') 63 | 64 | #Note: some dependency packages require R version newer than 4.0 65 | 66 | ``` 67 | #### Install Asgard 68 | ``` 69 | devtools::install_github("lanagarmire/Asgard") 70 | ``` 71 | #### Load Asgard 72 | ``` 73 | library('Asgard') 74 | ``` 75 | #### Docker 76 | 77 | You can run Asgard via Docker. First, install Docker for your platform. 78 | 79 | ``` 80 | docker run --rm -v `pwd`:/home/rstudio/Asgard -p 8787:8787 -it lanagarmire/asgard:1.0.0 81 | ``` 82 | 83 | This will mount the directory that you are currently working in so it is accessible by the Docker container. 84 | 85 | You can then open a browser and navigate to 127.0.0.1:8787, put in "rstudio" as the username and 86 | copy the password from the terminal. You will want to change the working directory to "/home/rstudio/Asgard". 87 | 88 | To build the DrugReference, you will need a large amount of RAM (64GB). 89 | 90 | Upon completion, you can press ^C in the terminal to quit the rstudio server. 91 | 92 | ## Prepare Drug Referecne Library 93 | #### Step 1 94 | #### Download L1000 Connectivity Map perturbational profiles GSE70138 and GSE92742 from GEO 95 |

Method 1: click file names below

96 | 97 | [GSE70138_Broad_LINCS_cell_info_2017-04-28.txt](https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_cell_info_2017-04-28.txt.gz) 98 | 99 | [GSE70138_Broad_LINCS_Level5_COMPZ_n118050x12328_2017-03-06.gctx](https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_Level5_COMPZ_n118050x12328_2017-03-06.gctx.gz) 100 | 101 | [GSE70138_Broad_LINCS_sig_info_2017-03-06.txt](https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_sig_info_2017-03-06.txt.gz) 102 | 103 | [GSE70138_Broad_LINCS_gene_info_2017-03-06.txt](https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_gene_info_2017-03-06.txt.gz) 104 | 105 | [GSE92742_Broad_LINCS_cell_info.txt](https://ftp.ncbi.nlm.nih.gov/geo/series/GSE92nnn/GSE92742/suppl/GSE92742_Broad_LINCS_cell_info.txt.gz) 106 | 107 | [GSE92742_Broad_LINCS_Level5_COMPZ.MODZ_n473647x12328.gctx](https://ftp.ncbi.nlm.nih.gov/geo/series/GSE92nnn/GSE92742/suppl/GSE92742_Broad_LINCS_Level5_COMPZ.MODZ_n473647x12328.gctx.gz) 108 | 109 | [GSE92742_Broad_LINCS_sig_info.txt](https://ftp.ncbi.nlm.nih.gov/geo/series/GSE92nnn/GSE92742/suppl/GSE92742_Broad_LINCS_sig_info.txt.gz) 110 | 111 | or Method 2: run following commands in linux 112 | ``` 113 | wget https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_cell_info_2017-04-28.txt.gz 114 | wget https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_Level5_COMPZ_n118050x12328_2017-03-06.gctx.gz 115 | wget https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_sig_info_2017-03-06.txt.gz 116 | wget https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_gene_info_2017-03-06.txt.gz 117 | wget https://ftp.ncbi.nlm.nih.gov/geo/series/GSE92nnn/GSE92742/suppl/GSE92742_Broad_LINCS_cell_info.txt.gz 118 | wget https://ftp.ncbi.nlm.nih.gov/geo/series/GSE92nnn/GSE92742/suppl/GSE92742_Broad_LINCS_Level5_COMPZ.MODZ_n473647x12328.gctx.gz 119 | wget https://ftp.ncbi.nlm.nih.gov/geo/series/GSE92nnn/GSE92742/suppl/GSE92742_Broad_LINCS_sig_info.txt.gz 120 | ``` 121 | #### Step 2 122 | #### Generate tissue specific drug references from GSE70138 and GSE92742 123 | Unzip downloaded files, revise the Your_local_path and run the following code: 124 | ``` 125 | library('Asgard') 126 | 127 | #Please replace Your_local_path with your real local folder 128 | 129 | PrepareReference(cell.info="GSE70138_Broad_LINCS_cell_info_2017-04-28.txt", 130 | gene.info="GSE70138_Broad_LINCS_gene_info_2017-03-06.txt", 131 | GSE70138.sig.info = "GSE70138_Broad_LINCS_sig_info_2017-03-06.txt", 132 | GSE92742.sig.info = "GSE92742_Broad_LINCS_sig_info.txt", 133 | GSE70138.gctx = "GSE70138_Broad_LINCS_Level5_COMPZ_n118050x12328_2017-03-06.gctx", 134 | GSE92742.gctx = "GSE92742_Broad_LINCS_Level5_COMPZ.MODZ_n473647x12328.gctx", 135 | Output.Dir = "DrugReference/" 136 | ) 137 | 138 | #Note: the file names here maybe different after unzipping. 139 | #Please note that it takes more than one hour to produce drug references in a standard computer with RAM>64GB. 140 | ``` 141 | Please use '?PrepareReference' for more help. 142 | 143 | ## **Drug Repurposing** 144 | ### **Step 1: Load single-cell RNA-seq data** 145 | 146 | Download datasets GSE113197 and GSE123926 from GEO before running this script. 147 | 148 | Human Breast Cancer Epithelial Cells (GSE123926): 149 | [GSE123926_RAW.tar](https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE123926&format=file) 150 | 151 | Normal Human Breast Epithelial Cells (GSE113197): 152 | [GSE113197_RAW.tar](https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE113197&format=file) 153 | 154 | ```R 155 | library('Seurat') 156 | 157 | # Load cells' cell type annotations for GSE113197 158 | cell_types_file <- paste0( 159 | "https://raw.githubusercontent.com/lanagarmire/" 160 | "Single-cell-drug-repositioning/master/Drug/Normal_celltype.txt" 161 | ) 162 | cell_types <- read.table(file=celltypes, header=TRUE, check.names=FALSE) 163 | 164 | # Cell type of interest 165 | cell_types_names <- c( 166 | "Luminal_L2_epithelial_cells", "Luminal_L1.1_epithelial_cells", 167 | "Luminal_L1.2_epithelial_cells", "Basal_epithelial_cells" 168 | ) 169 | 170 | # Load normal sample Ind5 from GSE113197 dataset 171 | data <- read.table(file="GSM3099847_Ind5_Expression_Matrix.txt", 172 | header=TRUE, check.names=FALSE) 173 | row.names(data) <- data[, 1] 174 | data <- data[, -1] 175 | ind5_cells <- subset(cell_type, sample=="Ind5" & celltype %in% celltypes_names) 176 | common <- intersect(colnames(data), rownames(ind5_cells)) 177 | data <- data[, common] 178 | 179 | metadata = data.frame( 180 | ind5_celltypes, 181 | cell = colnames(data), 182 | type = "normal" 183 | ) 184 | epithelial2 <- CreateSeuratObject(counts=data, project="Epithelial", min.cells=3, 185 | min.features=200, meta.data=metada) 186 | 187 | #Load normal sample Ind6 from GSE113197 dataset 188 | data <- read.table(file="GSM3099848_Ind6_Expression_Matrix.txt", header=TRUE, 189 | check.names=FALSE) 190 | row.names(data) <- data[, 1] 191 | data <- data[, -1] 192 | ind6_cells <- subset(celltype,sample=="Ind6" & celltype %in% c("Luminal_L2_epithelial_cells","Luminal_L1.1_epithelial_cells", "Luminal_L1.2_epithelial_cells", "Basal_epithelial_cells")) 193 | common <- intersect(colnames(data), rownames(celltype3)) 194 | data<-data[,common] 195 | Epithelial3 <- CreateSeuratObject(counts = data, project = "Epithelial", min.cells = 3, min.features = 200,meta.data=data.frame(celltype3,cell=colnames(data),type="Normal")) 196 | 197 | #Load normal sample Ind7 from GSE113197 dataset 198 | data<-read.table(file="GSM3099849_Ind7_Expression_Matrix.txt",header = T,check.names=FALSE) 199 | row.names(data)<-data[,1] 200 | data<-data[,-1] 201 | celltype4<-subset(celltype,sample=="Ind7" & celltype %in% c("Luminal_L2_epithelial_cells","Luminal_L1.1_epithelial_cells", "Luminal_L1.2_epithelial_cells", "Basal_epithelial_cells")) 202 | common <- intersect(colnames(data), rownames(celltype4)) 203 | data<-data[,common] 204 | Epithelial4 <- CreateSeuratObject(counts = data, project = "Epithelial", min.cells = 3, min.features = 200,meta.data=data.frame(celltype4,cell=colnames(data),type="Normal")) 205 | 206 | #Load cancer sample PDX110 from GSE123926 dataset 207 | TNBC_PDX.data<- Read10X(data.dir = "GSM3516947_PDX110") 208 | TNBC.PDX2 <- CreateSeuratObject(counts = TNBC_PDX.data, project = "TNBC", min.cells = 3, min.features = 200, meta.data=data.frame(row.names=colnames(TNBC_PDX.data), cell=colnames(TNBC_PDX.data), sample="PDX-110",type="TNBC.PDX")) 209 | 210 | #Load cancer sample PDX322 from GSE123926 dataset 211 | TNBC_PDX.data<- Read10X(data.dir = "GSM3516948_PDX322") 212 | TNBC.PDX3 <- CreateSeuratObject(counts = TNBC_PDX.data, project = "TNBC", min.cells = 3, min.features = 200, meta.data=data.frame(row.names=colnames(TNBC_PDX.data), cell=colnames(TNBC_PDX.data), sample="PDX-332",type="TNBC.PDX")) 213 | 214 | 215 | ``` 216 | 217 | #### Step 2 218 | #### Single-cell alignment 219 | ```R 220 | SC.list <- list( 221 | TNBC.PDX2 = TNBC.PDX2, 222 | TNBC.PDX3 = TNBC.PDX3, 223 | Epithelial2 = Epithelial2, 224 | Epithelial3 = Epithelial3, 225 | Epithelial4 = Epithelial4 226 | ) 227 | CellCycle = TRUE #Set it TRUE if you want to do Cell Cycle Regression 228 | anchor.features=2000 229 | 230 | for (i in 1:length(SC.list)) { 231 | SC.list[[i]] <- NormalizeData(SC.list[[i]], verbose = FALSE) 232 | SC.list[[i]] <- FindVariableFeatures(SC.list[[i]], selection.method = "vst", 233 | nfeatures = anchor.features, verbose = FALSE) 234 | } 235 | SC.anchors <- FindIntegrationAnchors(object.list = SC.list,anchor.features = anchor.features, dims = 1:15) 236 | SC.integrated <- IntegrateData(anchorset = SC.anchors, dims = 1:15) 237 | DefaultAssay(SC.integrated) <- "integrated" 238 | if (CellCycle) { 239 | ##Cell Cycle Regression 240 | s.genes <- cc.genes$s.genes 241 | g2m.genes <- cc.genes$g2m.genes 242 | SC.integrated <- CellCycleScoring(SC.integrated, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE) 243 | SC.integrated <- ScaleData(SC.integrated, vars.to.regress = c("S.Score", "G2M.Score"), features = rownames(SC.integrated)) 244 | SC.integrated <- RunPCA(SC.integrated, npcs = 15, verbose = FALSE) 245 | } 246 | else { 247 | ##Run the standard workflow for visualization and clustering 248 | SC.integrated <- ScaleData(SC.integrated, verbose = FALSE) 249 | SC.integrated <- RunPCA(SC.integrated, npcs = 15, verbose = FALSE) 250 | } 251 | ##t-SNE and Clustering 252 | SC.integrated <- RunUMAP(SC.integrated, reduction = "pca", dims = 1:15) 253 | SC.integrated <- FindNeighbors(SC.integrated, reduction = "pca", dims = 1:15) 254 | SC.integrated <- FindClusters(SC.integrated, algorithm = 1, resolution = 0.4) 255 | 256 | ##Cell Type Annotation, set by.CellType=TRUE if you want to annotate cell type. 257 | by.CellType=FALSE 258 | if(by.CellType == TRUE){ 259 | data <- as.matrix(SC.integrated@assays$RNA@data) 260 | hpca.se <- HumanPrimaryCellAtlasData() 261 | pred.hpca <- SingleR(test = data, ref = hpca.se, assay.type.test=1, labels = hpca.se$label.main) 262 | cell.label <- data.frame(row.names = row.names(pred.hpca),celltype=pred.hpca$labels) 263 | if(length(SC.integrated@meta.data$celltype)>0){ 264 | SC.integrated@meta.data$celltype <- cell.label$celltype 265 | }else{ 266 | SC.integrated@meta.data <- cbind(SC.integrated@meta.data,cell.label) 267 | } 268 | new.cells <- data.frame() 269 | for(i in unique(SC.integrated$seurat_clusters)){ 270 | sub.data <- subset(SC.integrated,seurat_clusters==i) 271 | temp <- table(sub.data@meta.data$celltype) 272 | best.cell <- names(which(temp==temp[which.max(temp)])) 273 | cells.temp <- data.frame(cell.id=row.names(sub.data@meta.data),celltype=best.cell) 274 | new.cells <- rbind(new.cells,cells.temp) 275 | } 276 | cell.meta <- SC.integrated@meta.data 277 | cell.id <- rownames(cell.meta) 278 | row.names(new.cells) <- new.cells[,1] 279 | new.cells <- new.cells[cell.id,] 280 | SC.integrated@meta.data$celltype <- new.cells$celltype 281 | }else{ 282 | SC.integrated@meta.data$celltype <- paste0("C",as.numeric(SC.integrated@meta.data$seurat_clusters)) 283 | } 284 | 285 | #Change sample names 286 | sample<-SC.integrated@meta.data$sample 287 | sample[which(sample=="Ind5")]<-"Normal1" 288 | sample[which(sample=="Ind6")]<-"Normal2" 289 | sample[which(sample=="Ind7")]<-"Normal3" 290 | SC.integrated@meta.data$sample<-sample 291 | 292 | #Visualize alignment result 293 | DimPlot(SC.integrated, reduction = "umap", split.by = "sample",group.by = "celltype") 294 | ``` 295 | #### Step 3 296 | #### Single-cell comparison 297 | ``` 298 | #Case sample names 299 | Case=c("PDX-110","PDX-332") 300 | 301 | #Control sample names 302 | Control=c("Normal1","Normal2","Normal3") 303 | 304 | 305 | #Get differential gene expression profiles for every cell type (or cluster if without annotation) from Limma 306 | library('limma') 307 | DefaultAssay(SC.integrated) <- "RNA" 308 | set.seed(123456) 309 | Gene.list <- list() 310 | C_names <- NULL 311 | for(i in unique(SC.integrated@meta.data$celltype)){ 312 | Idents(SC.integrated) <- "celltype" 313 | c_cells <- subset(SC.integrated, celltype == i) 314 | Idents(c_cells) <- "type" 315 | Samples=c_cells@meta.data 316 | Controlsample <- row.names(subset(Samples,sample %in% Control)) 317 | Casesample <- row.names(subset(Samples,sample %in% Case)) 318 | if(length(Controlsample)>min.cells & length(Casesample)>min.cells){ 319 | expr <- as.matrix(c_cells@assays$RNA@data) 320 | new_expr <- as.matrix(expr[,c(Casesample,Controlsample)]) 321 | new_sample <- data.frame(Samples=c(Casesample,Controlsample),type=c(rep("Case",length(Casesample)),rep("Control",length(Controlsample)))) 322 | row.names(new_sample) <- paste(new_sample$Samples,row.names(new_sample),sep="_") 323 | expr <- new_expr 324 | bad <- which(rowSums(expr>0)<3) 325 | expr <- expr[-bad,] 326 | mm <- model.matrix(~0 + type, data = new_sample) 327 | fit <- lmFit(expr, mm) 328 | contr <- makeContrasts(typeCase - typeControl, levels = colnames(coef(fit))) 329 | tmp <- contrasts.fit(fit, contrasts = contr) 330 | tmp <- eBayes(tmp) 331 | C_data <- topTable(tmp, sort.by = "P",n = nrow(tmp)) 332 | C_data_for_drug <- data.frame(row.names=row.names(C_data),score=C_data$t,adj.P.Val=C_data$adj.P.Val,P.Value=C_data$P.Value) 333 | Gene.list[[i]] <- C_data_for_drug 334 | C_names <- c(C_names,i) 335 | } 336 | } 337 | names(Gene.list) <- C_names 338 | 339 | #Get differential genes from Seurat (Wilcoxon Rank Sum test) 340 | library('Seurat') 341 | DefaultAssay(SC.integrated) <- "RNA" 342 | set.seed(123456) 343 | Gene.list <- list() 344 | C_names <- NULL 345 | for(i in unique(SC.integrated@meta.data$celltype)){ 346 | Idents(SC.integrated) <- "celltype" 347 | c_cells <- subset(SC.integrated, celltype == i) 348 | Idents(c_cells) <- "type" 349 | C_data <- FindMarkers(c_cells, ident.1 = "TNBC.PDX", ident.2 = "Normal") 350 | C_data_for_drug <- data.frame(row.names=row.names(C_data),score=C_data$avg_logFC,adj.P.Val=C_data$p_val_adj,P.Value=C_data$p_val) ##for Seurat version > 4.0, please use avg_log2FC instead of avg_logFC 351 | Gene.list[[i]] <- C_data_for_drug 352 | C_names <- c(C_names,i) 353 | } 354 | names(Gene.list) <- C_names 355 | 356 | #Get differential genes from DESeq2 method 357 | library('Seurat') 358 | DefaultAssay(SC.integrated) <- "RNA" 359 | set.seed(123456) 360 | Gene.list <- list() 361 | C_names <- NULL 362 | for(i in unique(SC.integrated@meta.data$celltype)){ 363 | Idents(SC.integrated) <- "celltype" 364 | c_cells <- subset(SC.integrated, celltype == i) 365 | Idents(c_cells) <- "type" 366 | C_data <- FindMarkers(c_cells, ident.1 = "TNBC.PDX", ident.2 = "Normal", test.use = "DESeq2") 367 | C_data_for_drug <- data.frame(row.names=row.names(C_data),score=C_data$avg_logFC,adj.P.Val=C_data$p_val_adj,P.Value=C_data$p_val) ##for Seurat version > 4.0, please use avg_log2FC instead of avg_logFC 368 | Gene.list[[i]] <- C_data_for_drug 369 | C_names <- c(C_names,i) 370 | } 371 | names(Gene.list) <- C_names 372 | 373 | #Get differential genes from EdgeR 374 | library('edgeR') 375 | Case=c("PDX-110","PDX-332") 376 | Control=c("Normal1","Normal2","Normal3") 377 | DefaultAssay(SC.integrated) <- "RNA" 378 | set.seed(123456) 379 | min.cells=3 # The minimum number of cells for a cell type. A cell type is omitted if it has less cells than the minimum number. 380 | Gene.list <- list() 381 | C_names <- NULL 382 | for(i in unique(SC.integrated@meta.data$celltype)){ 383 | Idents(SC.integrated) <- "celltype" 384 | c_cells <- subset(SC.integrated, celltype == i) 385 | Idents(c_cells) <- "type" 386 | Samples=c_cells@meta.data 387 | Controlsample <- row.names(subset(Samples,sample %in% Control)) 388 | Casesample <- row.names(subset(Samples,sample %in% Case)) 389 | if(length(Controlsample)>min.cells & length(Casesample)>min.cells){ 390 | expr <- as.matrix(c_cells@assays$RNA@data) 391 | new_expr <- as.matrix(expr[,c(Casesample,Controlsample)]) 392 | new_sample <- data.frame(Samples=c(Casesample,Controlsample),type=c(rep("Case",length(Casesample)),rep("Control",length(Controlsample)))) 393 | row.names(new_sample) <- paste(new_sample$Samples,row.names(new_sample),sep="_") 394 | expr <- new_expr 395 | bad <- which(rowSums(expr>0)<3) 396 | expr <- expr[-bad,] 397 | group <- new_sample$type 398 | dge <- DGEList(counts=expr, group=group) 399 | group_edgeR <- factor(group,levels = c("Control","Case")) 400 | design <- model.matrix(~ group_edgeR) 401 | dge <- estimateDisp(dge, design = design) 402 | fit <- glmFit(dge, design) 403 | res <- glmLRT(fit) 404 | C_data <- res$table 405 | C_data_for_drug <- data.frame(row.names=row.names(C_data),score=C_data$logFC,adj.P.Val=p.adjust(C_data$PValue,method = "BH"),P.Value=C_data$PValue) 406 | Gene.list[[i]] <- C_data_for_drug 407 | C_names <- c(C_names,i) 408 | } 409 | } 410 | names(Gene.list) <- C_names 411 | ``` 412 | 413 | #### Step 4 414 | #### Mono-drug repurposing for every cell type 415 | ```R 416 | library('Asgard') 417 | 418 | #Load tissue specific drug reference produced by PrepareReference function as mentioned above. Please select proper tissue accroding to the disease. 419 | my_gene_info<-read.table(file="DrugReference/breast_gene_info.txt",sep="\t",header = T,quote = "") 420 | my_drug_info<-read.table(file="DrugReference/breast_drug_info.txt",sep="\t",header = T,quote = "") 421 | drug.ref.profiles = GetDrugRef(drug.response.path = 'DrugReference/breast_rankMatrix.txt', 422 | probe.to.genes = my_gene_info, 423 | drug.info = my_drug_info) 424 | 425 | #Repurpose mono-drugs for every cell type 426 | Drug.ident.res = GetDrug(gene.data = Gene.list, 427 | drug.ref.profiles = drug.ref.profiles, 428 | repurposing.unit = "drug", 429 | connectivity = "negative", 430 | drug.type = "FDA") 431 | 432 | ``` 433 | Use '?GetDrug' for more help 434 | 435 | #### Step 5: Estimation of drug score 436 | 437 | Calculate drug score using information from all or a subset of clusters. Use 438 | `?DrugScore` for more help. 439 | 440 | ```R 441 | library('Asgard') 442 | library('Seurat') 443 | 444 | # Change the following two lines with the paths on your computer 445 | gse92742_gctx_path <- "GSE92742_Broad_LINCS_Level5_COMPZ.MODZ_n473647x12328.gctx" 446 | gse70138_gctx_path <- "GSE70138_Broad_LINCS_Level5_COMPZ_n118050x12328_2017-03-06.gctx" 447 | 448 | cell_metadata <- SC.integrated@meta.data 449 | cell_metadata$cluster <- SC.integrated@meta.data$celltype 450 | 451 | Drug.score <- DrugScore(cell_metadata, cluster_degs = Gene.list, 452 | cluster_drugs = Drug.ident.res, tissue = "breast", 453 | case = Case, gse92742_gctx_path = gse92742_gctx_path, 454 | gse70138_gctx_path = gse70138_gctx_path) 455 | ``` 456 | 457 | #### Step 6: Select mono-drug therapies 458 | ``` 459 | library('Asgard') 460 | library('Seurat') 461 | 462 | #Select drug using drug socre 463 | library(Hmisc) 464 | Final.drugs<-subset(Drug.score,Drug.therapeutic.score>quantile(Drug.score$Drug.therapeutic.score, 0.99,na.rm=T) & FDR <0.05) 465 | 466 | 467 | #Select drug for individual clusters 468 | Final.drugs<-TopDrug(SC.integrated=SC.integrated, 469 | Drug.data=Drug.ident.res, 470 | Drug.FDR=0.1, 471 | FDA.drug.only=TRUE, 472 | Case=Case.samples, 473 | DrugScore=FALSE 474 | ) 475 | 476 | ``` 477 | #### Step 7 (optional) 478 | #### Drug combination analysis 479 | ``` 480 | library('Asgard') 481 | library('Seurat') 482 | 483 | GSE92742.gctx.path="GSE92742_Broad_LINCS_Level5_COMPZ.MODZ_n473647x12328.gctx" 484 | GSE70138.gctx.path="GSE70138_Broad_LINCS_Level5_COMPZ_n118050x12328_2017-03-06.gctx" 485 | Drug.combinations<-DrugCombination(SC.integrated=SC.integrated, 486 | Gene.data=Gene.list, 487 | Drug.data=Drug.ident.res, 488 | Drug.FDR=0.1, 489 | FDA.drug.only=TRUE, 490 | Combined.drugs=2, 491 | Case=Case, 492 | Tissue="breast", 493 | GSE92742.gctx=GSE92742.gctx.path, 494 | GSE70138.gctx=GSE70138.gctx.path) 495 | ``` 496 | Please use '?DrugCombination' for more help. 497 | 498 | #### Select drug combination therapies 499 | ``` 500 | library('Asgard') 501 | Final.combinations<-TopCombination(Drug.combination=Drug.combinations, 502 | Combination.FDR=0.1, 503 | Min.combination.score=1 504 | ) 505 | ``` 506 | Demo codes using real datasets are available at: https://github.com/lanagarmire/Single-cell-drug-repositioning 507 | 508 | If you have further questions or comments, please contact Dr.Bing He: hbing@umich.edu or hebinghb@gmail.com 509 | -------------------------------------------------------------------------------- /VERSION.txt: -------------------------------------------------------------------------------- 1 | 1.0.0 2 | -------------------------------------------------------------------------------- /asgard_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lanagarmire/Asgard/fc2b60855e90fe231b85723cb5fb9711bb588c66/asgard_pipeline.png -------------------------------------------------------------------------------- /data/FDA_drug.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lanagarmire/Asgard/fc2b60855e90fe231b85723cb5fb9711bb588c66/data/FDA_drug.rda -------------------------------------------------------------------------------- /data/L1000_meta.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lanagarmire/Asgard/fc2b60855e90fe231b85723cb5fb9711bb588c66/data/L1000_meta.rda -------------------------------------------------------------------------------- /man/DrugCombination.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DrugCombination.R 3 | \name{DrugCombination} 4 | \alias{DrugCombination} 5 | \title{Treatment Efficacy of the Drug Combination.} 6 | \usage{ 7 | DrugCombination( 8 | SC.integrated = SC.data, 9 | Gene.data = Gene.list, 10 | Drug.data = Drug.ident.res, 11 | Drug.FDR = 0.1, 12 | FDA.drug.only = TRUE, 13 | Combined.drugs = 2, 14 | GSE92742.gctx = NULL, 15 | GSE70138.gctx = NULL, 16 | Case = NULL, 17 | Tissue = "breast" 18 | ) 19 | } 20 | \arguments{ 21 | \item{SC.integrated}{A Seurat object of aligned single cells from SCalignment function.} 22 | 23 | \item{Gene.data}{A list of differnential gene expression profiles for every cell type. It's from GetGene function.} 24 | 25 | \item{Drug.data}{A list of mono-drugs for every cell type. It's from GetDrug function.} 26 | 27 | \item{Drug.FDR}{The FDR threshold to select drug. The default value is 0.1.} 28 | 29 | \item{FDA.drug.only}{logical; if TRUE, will only return FDA-approved drugs.} 30 | 31 | \item{Combined.drugs}{The number of drugs in a combination. The default value is 2.} 32 | 33 | \item{GSE92742.gctx}{The gctx file contains drug responses from GSE92742 dataset (https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE92742).} 34 | 35 | \item{GSE70138.gctx}{The gctx file contains drug responses from GSE70138 dataset (https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE70138).} 36 | 37 | \item{Case}{A vector contains names of case samples.} 38 | 39 | \item{Tissue}{Reference tissue. If one used lung_rankMatrix.txt in GetDrugRef function, then the Reference tissue is lung.} 40 | } 41 | \value{ 42 | A data frame of drug combinations with therapeutics scores and FDR. 43 | } 44 | \description{ 45 | It evaluates treatment efficacy to identify drug combinations that can best reverse the target genes’ expression in diseased cells in case samples. 46 | } 47 | \details{ 48 | This function evaluates treatment efficacy and ranks drug combinations using therapeutics score, which integrates gene responses to multiple drugs, the proportion of genes, and cells treated by combined drugs. 49 | } 50 | -------------------------------------------------------------------------------- /man/DrugScore.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DrugScore.R 3 | \name{DrugScore} 4 | \alias{DrugScore} 5 | \title{Calculate drug score} 6 | \usage{ 7 | DrugScore( 8 | cell_metadata, 9 | cluster_degs, 10 | cluster_drugs, 11 | tissue, 12 | gse70138_gctx_path, 13 | gse92742_gctx_path, 14 | clusters = NULL, 15 | case = NULL, 16 | fda_drugs_only = TRUE 17 | ) 18 | } 19 | \arguments{ 20 | \item{cell_metadata}{A data.frame of cell metadata. It must have a column 21 | named 'cluster' indicating which cluster cells belong, and a column named 22 | 'sample' indicating which sample cells belong.} 23 | 24 | \item{cluster_degs}{A list of differential gene expression profiles for 25 | each cluster.} 26 | 27 | \item{cluster_drugs}{Drug repurposing result from GetDrug function.} 28 | 29 | \item{tissue}{Reference tissue. If one used 'lung_rankMatrix.txt' in 30 | GetDrugRef function, then the Reference tissue is lung. Please use " " 31 | instead of "-" in tissue name. For example, while 32 | 'haematopoietic-and-lymphoid-tissue' is the prefix of the drug reference 33 | files, the corresponding tissue name is "haematopoietic and lymphoid tissue".} 34 | 35 | \item{gse70138_gctx_path}{The gctx file contains drug responses from GSE70138 36 | dataset (https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE70138).} 37 | 38 | \item{gse92742_gctx_path}{The gctx file contains drug responses from GSE92742 39 | dataset (https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE92742)..} 40 | 41 | \item{clusters}{Select which clusters (cell types) to be used for drug score 42 | estimation. By default, it uses all clusters.} 43 | 44 | \item{case}{A vector containing case sample names.} 45 | 46 | \item{fda_drugs_only}{logical; if TRUE, will only return FDA-approved drugs, 47 | else, will return all drugs/compounds.} 48 | } 49 | \value{ 50 | A data frame of drug score, P-value and FDR. 51 | } 52 | \description{ 53 | The drug score is a comprehensive estimation of drug therapeutic 54 | effects using all or a selected set of clusters. 55 | } 56 | \details{ 57 | This function calculates drug score using cellular proportion of 58 | clusters, the significance of reversal in DEGs' expressions, and the ratio of 59 | the reversed genes. 60 | } 61 | -------------------------------------------------------------------------------- /man/GetDrug.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/GetDrug.R 3 | \name{GetDrug} 4 | \alias{GetDrug} 5 | \title{Mono-drug Repurposing.} 6 | \usage{ 7 | GetDrug( 8 | gene.data = NULL, 9 | drug.ref.profiles = NULL, 10 | repurposing.unit = "drug", 11 | CEG.threshold = 0.05, 12 | connectivity = "negative", 13 | drug.type = "FDA" 14 | ) 15 | } 16 | \arguments{ 17 | \item{drug.ref.profiles}{A list contains tissue specific drug reference Profiles from GetDrugRef function.} 18 | 19 | \item{repurposing.unit}{The parameter of either "treatment" or "drug", which indicates if user want the function to test drug repurposing p value at treatment level or drug level. The default is "treatment", which treats the drug data from different cell lines separately.} 20 | 21 | \item{CEG.threshold}{The p value threshold to select the consistently differential expressed genes (CEGs). The default value is 0.05.} 22 | 23 | \item{connectivity}{The type of connectivity, either "negative" or "positive". Negative connectivity is used when the query data is the differential scores from disease data, and it will repurpose drugs that can potentially reverse the query disease phenotype. Positive connectivity is used when the query data is from a drug profile, and it will return the drugs that are similar to the query drug. The default value is "negative".} 24 | 25 | \item{drug.type}{The parameter of either "FDA" or "compounds" or "all", which indicates if user want the function to identify FDA-approved drugs or compounds or both, respectively.The default value is "FDA".} 26 | } 27 | \value{ 28 | A list of mono-drugs for every cell type. 29 | } 30 | \description{ 31 | It identify mono-drug therapy for every cell type. 32 | } 33 | \details{ 34 | This function allows user to use the differential expression data of every case cell type to query against reference drug response profiles.This function is a reverised version of drug.identification from DrInsight package. 35 | } 36 | -------------------------------------------------------------------------------- /man/GetDrugRef.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/GetDrugRef.R 3 | \name{GetDrugRef} 4 | \alias{GetDrugRef} 5 | \title{Load and Process Drug Reference Profiles.} 6 | \usage{ 7 | GetDrugRef(drug.response.path = NULL, probe.to.genes = NULL, drug.info = NULL) 8 | } 9 | \arguments{ 10 | \item{drug.response.path}{The local path and the name of the tissue specific drug rank matrix.} 11 | 12 | \item{probe.to.genes}{A data.frame contains gene IDs (the IDs used in drug rank matrix) and official gene symbol. This files was automately generated with drug rank matrix.} 13 | 14 | \item{drug.info}{A data.frame contains drug information. This file was automately generated with drug rank matrix.} 15 | } 16 | \description{ 17 | This function allows user to load in the tissue specific drug rank matrix. 18 | } 19 | \details{ 20 | This function is a reverised version of get.cmap.ref from DrInsight package. The tissue specific drug rank matrix is tranformed from L1000data (GEO: GSE92742 and GSE70138) using PrepareReference function. 21 | } 22 | -------------------------------------------------------------------------------- /man/PrepareReference.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/PrepareReference.R 3 | \name{PrepareReference} 4 | \alias{PrepareReference} 5 | \title{Prepare Drug Reference.} 6 | \usage{ 7 | PrepareReference( 8 | cell.info = NULL, 9 | gene.info = NULL, 10 | GSE70138.sig.info = NULL, 11 | GSE92742.sig.info = NULL, 12 | GSE70138.gctx = NULL, 13 | GSE92742.gctx = NULL, 14 | Output.Dir = "./" 15 | ) 16 | } 17 | \arguments{ 18 | \item{cell.info}{The local path and the name of the cell.info text file. It's downloaded from https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_cell_info_2017-04-28.txt.gz .} 19 | 20 | \item{gene.info}{The local path and the name of the gene.info text file. It's downloaded from https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_gene_info_2017-03-06.txt.gz .} 21 | 22 | \item{GSE70138.sig.info}{The local path and the name of the cell.info text file. It's downloaded from https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_sig_info_2017-03-06.txt.gz .} 23 | 24 | \item{GSE92742.sig.info}{The local path and the name of the cell.info text file. It's downloaded from https://ftp.ncbi.nlm.nih.gov/geo/series/GSE92nnn/GSE92742/suppl/GSE92742_Broad_LINCS_sig_info.txt.gz .} 25 | 26 | \item{GSE70138.gctx}{The local path and the name of the cell.info text file. It's downloaded from https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_Level5_COMPZ_n118050x12328_2017-03-06.gctx.gz .} 27 | 28 | \item{GSE92742.gctx}{The local path and the name of the cell.info text file. It's downloaded from https://ftp.ncbi.nlm.nih.gov/geo/series/GSE92nnn/GSE92742/suppl/GSE92742_Broad_LINCS_Level5_COMPZ.MODZ_n473647x12328.gctx.gz .} 29 | 30 | \item{Output.Dir}{The output directory for the generated files.} 31 | } 32 | \description{ 33 | Prepare tissue specific drug reference Profiles from L1000 drug response data. 34 | } 35 | \details{ 36 | This function converts L1000 data to the tissue specific drug rank matrix. 37 | } 38 | -------------------------------------------------------------------------------- /man/SCplasticity.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SCplasticity.R 3 | \name{SCplasticity} 4 | \alias{SCplasticity} 5 | \title{Sinlge-cell Plasticity.} 6 | \usage{ 7 | SCplasticity(SC.integrated = SC.data, Case = NULL) 8 | } 9 | \arguments{ 10 | \item{SC.integrated}{A Seurat object of aligned single cells from SCalignment function.} 11 | 12 | \item{Case}{A vector contains names of case samples.} 13 | } 14 | \value{ 15 | A data frame of plasticity, normailized plasticity and cell type coverage. 16 | } 17 | \description{ 18 | It determines the plasticity of each cell type. 19 | } 20 | \details{ 21 | This function estimate the entropy of every cell in the case samples. For each cell type, it use the median entropy value as the plasticity of each cell type. 22 | } 23 | -------------------------------------------------------------------------------- /man/TopCombination.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/TopCombination.R 3 | \name{TopCombination} 4 | \alias{TopCombination} 5 | \title{Combination Drug Selection.} 6 | \usage{ 7 | TopCombination( 8 | Drug.combination = Drug.combinations, 9 | Combination.FDR = 0.1, 10 | Min.combination.score = 1 11 | ) 12 | } 13 | \arguments{ 14 | \item{Drug.combination}{raw drug combination result from DrugCombination function.} 15 | 16 | \item{Combination.FDR}{The FDR threshold to select drug combination. The default value is 0.1.} 17 | 18 | \item{Min.combination.score}{The Combination therapeutic score threshold to select drug combination. The default value is 1.} 19 | } 20 | \value{ 21 | A data frame of selected drug combinations. 22 | } 23 | \description{ 24 | Select drug combinations by combination therapeutic score and FDR of combination therapeutic score. 25 | } 26 | \details{ 27 | Input raw drug combination result and return the top drug combinations. 28 | } 29 | -------------------------------------------------------------------------------- /man/TopDrug.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/TopDrug.R 3 | \name{TopDrug} 4 | \alias{TopDrug} 5 | \title{Single Drug Selection for Individual Clusters.} 6 | \usage{ 7 | TopDrug( 8 | SC.integrated = SC.data, 9 | Drug.data = Drug.ident.res, 10 | Drug.FDR = 0.1, 11 | FDA.drug.only = TRUE, 12 | Case = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{SC.integrated}{A Seurat object of aligned single cells.} 17 | 18 | \item{Drug.data}{Drug repurosing result from GetDrug function.} 19 | 20 | \item{Drug.FDR}{The FDR threshold to select drug. The default value is 0.1.} 21 | 22 | \item{FDA.drug.only}{logical; if TRUE, will only return FDA-approved drugs.} 23 | 24 | \item{Case}{An vector of case (diseased) samples.Only case sammples are involved in the calculation of coverage.} 25 | } 26 | \value{ 27 | A data frame of selected drugs with summary of cell coverage. 28 | } 29 | \description{ 30 | Select single drugs for every cell population by FDR and drug type, and summarize cell coverage for selected drugs. 31 | } 32 | \details{ 33 | Input raw drug repurosing result and return the top drugs with summary of cell coverage. 34 | } 35 | -------------------------------------------------------------------------------- /prep_files.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Check if the target directory is provided as an argument 4 | if [ "$#" -ne 1 ]; then 5 | echo "Usage: $0 /path/to/target/directory" 6 | exit 1 7 | fi 8 | 9 | # Define the directory where you want to download and unzip the files 10 | TARGET_DIR="$1" 11 | 12 | # Create the directory if it doesn't exist 13 | mkdir -p "$TARGET_DIR" 14 | 15 | # Change to the target directory 16 | cd "$TARGET_DIR" 17 | 18 | # URL prefix 19 | URL_PREFIX="https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/" 20 | 21 | # List of files to download 22 | FILES=( 23 | "GSE70138_Broad_LINCS_cell_info_2017-04-28.txt" 24 | "GSE70138_Broad_LINCS_Level5_COMPZ_n118050x12328_2017-03-06.gctx" 25 | "GSE70138_Broad_LINCS_sig_info_2017-03-06.txt" 26 | "GSE70138_Broad_LINCS_gene_info_2017-03-06.txt" 27 | ) 28 | 29 | # Download and unzip each file 30 | for file in "${FILES[@]}"; do 31 | # Check if the file already exists 32 | if [[ ! -f "$file" ]]; then 33 | wget "${URL_PREFIX}${file}.gz" 34 | gunzip "$(basename "$file")" 35 | else 36 | echo "File $file already exists. Skipping download." 37 | fi 38 | done 39 | 40 | URL_PREFIX="https://ftp.ncbi.nlm.nih.gov/geo/series/GSE92nnn/GSE92742/suppl/" 41 | FILES=( 42 | "GSE92742_Broad_LINCS_cell_info.txt" 43 | "GSE92742_Broad_LINCS_Level5_COMPZ.MODZ_n473647x12328.gctx" 44 | "GSE92742_Broad_LINCS_sig_info.txt" 45 | ) 46 | 47 | # Download and unzip each file 48 | for file in "${FILES[@]}"; do 49 | # Check if the file already exists 50 | if [[ ! -f "$file" ]]; then 51 | wget "${URL_PREFIX}${file}.gz" 52 | gunzip "$(basename "$file")" 53 | else 54 | echo "File $file already exists. Skipping download." 55 | fi 56 | done 57 | --------------------------------------------------------------------------------