├── .Rbuildignore
├── .gitignore
├── .here
├── DESCRIPTION
├── Dockerfile
├── GBOX_BASE_NAME.txt
├── LICENSE
├── Makefile
├── NAMESPACE
├── R
    ├── CombinePvalue.R
    ├── DrugCombination.R
    ├── DrugScore.R
    ├── GetDrug.R
    ├── GetDrugRef.R
    ├── PrepareReference.R
    ├── SCplasticity.R
    ├── TopCombination.R
    ├── TopDrug.R
    ├── data_preprocess.R
    ├── get_CEGs.R
    ├── get_drug_pval.R
    └── get_gene_pval.R
├── README.md
├── VERSION.txt
├── asgard_pipeline.png
├── data
    ├── FDA_drug.rda
    └── L1000_meta.rda
├── man
    ├── DrugCombination.Rd
    ├── DrugScore.Rd
    ├── GetDrug.Rd
    ├── GetDrugRef.Rd
    ├── PrepareReference.Rd
    ├── SCplasticity.Rd
    ├── TopCombination.Rd
    └── TopDrug.Rd
└── prep_files.sh


/.Rbuildignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lanagarmire/Asgard/fc2b60855e90fe231b85723cb5fb9711bb588c66/.Rbuildignore


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Docker related
 2 | /build/
 3 | 
 4 | # History files
 5 | .Rhistory
 6 | .Rapp.history
 7 | 
 8 | # Session Data files
 9 | .RData
10 | .RDataTmp
11 | 
12 | # User-specific files
13 | .Ruserdata
14 | 
15 | # Example code in package build process
16 | *-Ex.R
17 | 
18 | # Output files from R CMD build
19 | /*.tar.gz
20 | 
21 | # Output files from R CMD check
22 | /*.Rcheck/
23 | 
24 | # RStudio files
25 | .Rproj.user/
26 | 
27 | # produced vignettes
28 | vignettes/*.html
29 | vignettes/*.pdf
30 | 
31 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
32 | .httr-oauth
33 | 
34 | # knitr and R markdown default cache directories
35 | *_cache/
36 | /cache/
37 | 
38 | # Temporary files created by R markdown
39 | *.utf8.md
40 | *.knit.md
41 | 
42 | # R Environment Variables
43 | .Renviron
44 | 
45 | # pkgdown site
46 | docs/
47 | 
48 | # translation temp files
49 | po/*~
50 | 
51 | # RStudio Connect folder
52 | rsconnect/


--------------------------------------------------------------------------------
/.here:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lanagarmire/Asgard/fc2b60855e90fe231b85723cb5fb9711bb588c66/.here


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: Asgard
 2 | Type: Package
 3 | Title: A Single-cell Guided pipeline for Accurate Repurposing of Drugs
 4 | Version: 1.0.0
 5 | Author: Bing He [aut], Lana Garmire [aut, cre]
 6 | Maintainer: Bing He <hbing@med.umich.edu>
 7 | Description: Asgard repurposes drugs for every single cell population and predicts personalized combination of drugs to address cellular heterogeneity of patients. 
 8 | Depends: R (>= 3.5.0)
 9 | Imports: 
10 |     cmapR
11 | Suggests: 
12 |     Seurat,
13 |     SeuratObject,
14 |     SingleR,
15 |     celldex,
16 |     cowplot,
17 |     dplyr
18 | License: PolyForm-Noncommercial
19 | Encoding: UTF-8
20 | LazyData: true
21 | RoxygenNote: 7.2.3
22 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM rocker/rstudio:4.3.1
 2 | 
 3 | RUN R -e 'install.packages("devtools")'
 4 | 
 5 | RUN R -e 'install.packages("BiocManager")'
 6 | RUN R -e 'install.packages("remotes")'
 7 | 
 8 | RUN apt-get update
 9 | RUN apt install -y zlib1g-dev
10 | RUN R -e 'BiocManager::install(c("SingleR","limma","cmapR","celldex"))'
11 | RUN R -e 'install.packages("Seurat")'
12 | 
13 | WORKDIR /home/rstudio
14 | 
15 | COPY . .
16 | 
17 | RUN R -e 'install.packages(".", repos = NULL, type = "source")'
18 | 
19 | # WORKDIR /home/rstudio/build
20 | # RUN mkdir -p /home/rstudio/build/DrugReference
21 | # RUN R -e 'library("Asgard"); PrepareReference(cell.info="GSE70138_Broad_LINCS_cell_info_2017-04-28.txt", gene.info="GSE70138_Broad_LINCS_gene_info_2017-03-06.txt", GSE70138.sig.info = "GSE70138_Broad_LINCS_sig_info_2017-03-06.txt", GSE92742.sig.info = "GSE92742_Broad_LINCS_sig_info.txt", GSE70138.gctx = "GSE70138_Broad_LINCS_Level5_COMPZ_n118050x12328_2017-03-06.gctx", GSE92742.gctx = "GSE92742_Broad_LINCS_Level5_COMPZ.MODZ_n473647x12328.gctx", Output.Dir = "DrugReference/")'
22 | 
23 | # RUN mv DrugReference /home/rstudio/.
24 | 
25 | # WORKDIR /home/rstudio
26 | 
27 | # RUN rm -rf /home/rstudio/build
28 | 


--------------------------------------------------------------------------------
/GBOX_BASE_NAME.txt:
--------------------------------------------------------------------------------
1 | lanagarmire/asgard
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | # PolyForm Noncommercial License 1.0.0
  2 | 
  3 | <https://polyformproject.org/licenses/noncommercial/1.0.0>
  4 | 
  5 | ## Acceptance
  6 | 
  7 | In order to get any license under these terms, you must agree
  8 | to them as both strict obligations and conditions to all
  9 | your licenses.
 10 | 
 11 | ## Copyright License
 12 | 
 13 | The licensor grants you a copyright license for the
 14 | software to do everything you might do with the software
 15 | that would otherwise infringe the licensor's copyright
 16 | in it for any permitted purpose.  However, you may
 17 | only distribute the software according to [Distribution
 18 | License](#distribution-license) and make changes or new works
 19 | based on the software according to [Changes and New Works
 20 | License](#changes-and-new-works-license).
 21 | 
 22 | ## Distribution License
 23 | 
 24 | The licensor grants you an additional copyright license
 25 | to distribute copies of the software.  Your license
 26 | to distribute covers distributing the software with
 27 | changes and new works permitted by [Changes and New Works
 28 | License](#changes-and-new-works-license).
 29 | 
 30 | ## Notices
 31 | 
 32 | You must ensure that anyone who gets a copy of any part of
 33 | the software from you also gets a copy of these terms or the
 34 | URL for them above, as well as copies of any plain-text lines
 35 | beginning with `Required Notice:` that the licensor provided
 36 | with the software.  For example:
 37 | 
 38 | > Required Notice: Copyright Yoyodyne, Inc. (http://example.com)
 39 | 
 40 | ## Changes and New Works License
 41 | 
 42 | The licensor grants you an additional copyright license to
 43 | make changes and new works based on the software for any
 44 | permitted purpose.
 45 | 
 46 | ## Patent License
 47 | 
 48 | The licensor grants you a patent license for the software that
 49 | covers patent claims the licensor can license, or becomes able
 50 | to license, that you would infringe by using the software.
 51 | 
 52 | ## Noncommercial Purposes
 53 | 
 54 | Any noncommercial purpose is a permitted purpose.
 55 | 
 56 | ## Personal Uses
 57 | 
 58 | Personal use for research, experiment, and testing for
 59 | the benefit of public knowledge, personal study, private
 60 | entertainment, hobby projects, amateur pursuits, or religious
 61 | observance, without any anticipated commercial application,
 62 | is use for a permitted purpose.
 63 | 
 64 | ## Noncommercial Organizations
 65 | 
 66 | Use by any charitable organization, educational institution,
 67 | public research organization, public safety or health
 68 | organization, environmental protection organization,
 69 | or government institution is use for a permitted purpose
 70 | regardless of the source of funding or obligations resulting
 71 | from the funding.
 72 | 
 73 | ## Fair Use
 74 | 
 75 | You may have "fair use" rights for the software under the
 76 | law. These terms do not limit them.
 77 | 
 78 | ## No Other Rights
 79 | 
 80 | These terms do not allow you to sublicense or transfer any of
 81 | your licenses to anyone else, or prevent the licensor from
 82 | granting licenses to anyone else.  These terms do not imply
 83 | any other licenses.
 84 | 
 85 | ## Patent Defense
 86 | 
 87 | If you make any written claim that the software infringes or
 88 | contributes to infringement of any patent, your patent license
 89 | for the software granted under these terms ends immediately. If
 90 | your company makes such a claim, your patent license ends
 91 | immediately for work on behalf of your company.
 92 | 
 93 | ## Violations
 94 | 
 95 | The first time you are notified in writing that you have
 96 | violated any of these terms, or done anything with the software
 97 | not covered by your licenses, your licenses can nonetheless
 98 | continue if you come into full compliance with these terms,
 99 | and take practical steps to correct past violations, within
100 | 32 days of receiving notice.  Otherwise, all your licenses
101 | end immediately.
102 | 
103 | ## No Liability
104 | 
105 | ***As far as the law allows, the software comes as is, without
106 | any warranty or condition, and the licensor will not be liable
107 | to you for any damages arising out of these terms or the use
108 | or nature of the software, under any kind of legal claim.***
109 | 
110 | ## Definitions
111 | 
112 | The **licensor** is the individual or entity offering these
113 | terms, and the **software** is the software the licensor makes
114 | available under these terms.
115 | 
116 | **You** refers to the individual or entity agreeing to these
117 | terms.
118 | 
119 | **Your company** is any legal entity, sole proprietorship,
120 | or other kind of organization that you work for, plus all
121 | organizations that have control over, are under the control of,
122 | or are under common control with that organization.  **Control**
123 | means ownership of substantially all the assets of an entity,
124 | or the power to direct its management and policies by vote,
125 | contract, or otherwise.  Control can be direct or indirect.
126 | 
127 | **Your licenses** are all the licenses granted to you for the
128 | software under these terms.
129 | 
130 | **Use** means anything you do with the software requiring one
131 | of your licenses.
132 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | VERfile="VERSION.txt"
 2 | GBOXfile="GBOX_BASE_NAME.txt"
 3 | VER=`cat $(VERfile)`
 4 | GBOX=`cat $(GBOXfile)`:$(VER)
 5 | export
 6 | 
 7 | docker:
 8 | 	docker build -t $(GBOX) .
 9 | 
10 | docker-push:
11 | 	docker push $(GBOX)
12 | 
13 | server:
14 | 	docker run --rm -v `pwd`:/home/rstudio/Asgard -p 8787:8787 -it $(GBOX)
15 | 
16 | shell:
17 | 	docker run --rm -it $(GBOX) /bin/bash
18 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(DrugCombination)
 4 | export(DrugScore)
 5 | export(GetDrug)
 6 | export(GetDrugRef)
 7 | export(PrepareReference)
 8 | export(SCplasticity)
 9 | export(TopCombination)
10 | export(TopDrug)
11 | import(cmapR)
12 | 


--------------------------------------------------------------------------------
/R/CombinePvalue.R:
--------------------------------------------------------------------------------
 1 | 
 2 | ##Combine P-value
 3 | CombineP = function (p){
 4 |   keep <- (p > 0) & (p <= 1)
 5 |   invalid <- sum(1L * keep) < 2
 6 |   if (invalid) {
 7 |     warning("Must have at least two valid p values")
 8 |     res <- list(chisq = NA_real_, df = NA_integer_, p = NA_real_, 
 9 |       validp = p[keep])
10 |   }
11 |   else {
12 |     lnp <- log(p[keep])
13 |     chisq <- (-2) * sum(lnp)
14 |     df <- 2 * length(lnp)
15 |     if (length(lnp) != length(p)) {
16 |       warning("Some studies omitted")
17 |     }
18 |     res <- pchisq(chisq,df, lower.tail = FALSE)
19 |   }
20 |   return(res)
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/R/DrugCombination.R:
--------------------------------------------------------------------------------
  1 | #' @title Treatment Efficacy of the Drug Combination.
  2 | #' @description  It evaluates treatment efficacy to identify drug combinations that can best reverse the target genes’ expression in diseased cells in case samples.
  3 | #' @details This function evaluates treatment efficacy and ranks drug combinations using therapeutics score, which integrates gene responses to multiple drugs, the proportion of genes, and cells treated by combined drugs.
  4 | #' @param SC.integrated A Seurat object of aligned single cells from SCalignment function.
  5 | #' @param Gene.data A list of differnential gene expression profiles for every cell type. It's from GetGene function.
  6 | #' @param Drug.data A list of mono-drugs for every cell type. It's from GetDrug function.
  7 | #' @param Drug.FDR The FDR threshold to select drug. The default value is 0.1.
  8 | #' @param FDA.drug.only logical; if TRUE, will only return FDA-approved drugs.
  9 | #' @param Combined.drugs The number of drugs in a combination. The default value is 2.
 10 | #' @param GSE92742.gctx The gctx file contains drug responses from GSE92742 dataset (https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE92742).
 11 | #' @param GSE70138.gctx The gctx file contains drug responses from GSE70138 dataset (https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE70138).
 12 | #' @param Case A vector contains names of case samples.
 13 | #' @param Tissue Reference tissue. If one used lung_rankMatrix.txt in GetDrugRef function, then the Reference tissue is lung.
 14 | #' @return A data frame of drug combinations with therapeutics scores and FDR.
 15 | #' @export
 16 | #' @import cmapR
 17 | 
 18 | 
 19 | DrugCombination <- function(SC.integrated=SC.data,
 20 |                             Gene.data=Gene.list,
 21 |                             Drug.data=Drug.ident.res,
 22 |                             Drug.FDR=0.1,
 23 |                             FDA.drug.only=TRUE,
 24 |                             Combined.drugs=2,
 25 |                             GSE92742.gctx=NULL,
 26 |                             GSE70138.gctx=NULL,
 27 |                             Case=NULL,
 28 |                             Tissue="breast"
 29 | ){
 30 |     ##Cell proportion
 31 |     cells <- SC.integrated@meta.data
 32 |     if(length(Case)>0){
 33 |     cells <- subset(cells,sample %in% Case)
 34 |     }
 35 |     cells <- cells$celltype
 36 |     cell.count <- table(cells)
 37 |     cell.count <- cell.count[which(cell.count>3)]
 38 |     cells.freq <- round(100*cell.count/length(cells),2)
 39 | 
 40 |     ##Load drug data
 41 |     Drug.list <- data.frame()
 42 |     for(i in names(Drug.data)){
 43 |       Cd <- Drug.data[[i]]
 44 |       Cd <- Cd[!duplicated(Cd$Drug.name),]
 45 |       #Cd <- subset(Cd, FDR<Drug.FDR)
 46 |       Drugs <- Cd$Drug.name
 47 |       if(FDA.drug.only==TRUE){
 48 |       Drugs <- intersect(Drugs,FDA.drug)
 49 |       }
 50 |       if(length(Drugs)>0){
 51 |       Cd <- subset(Cd, Drug.name %in% Drugs)
 52 |       FDRs <- Cd$FDR
 53 |       Pvalue <- Cd$P.value
 54 |       temp <- data.frame(Drug=Drugs,Cluster=i,Size=cells.freq[i],P.value=Pvalue,FDR=FDRs,row.names = NULL)
 55 |       Drug.list <- rbind(Drug.list,temp)
 56 |       }
 57 |     }
 58 |     Drug.list <- unique(Drug.list)
 59 |     Drug.list$w.size <- Drug.list$Size*(-log10(Drug.list$FDR))
 60 |     Drug.list[is.na(Drug.list)] <- 0
 61 |     Drug.coverage <- tapply(Drug.list$w.size, Drug.list$Drug,sum)
 62 |     raw.raw.Drug.list <- Drug.list
 63 |     Drug.list <- subset(Drug.list, FDR<Drug.FDR)
 64 |     Drug.combinations <- combn(unique(Drug.list$Drug),Combined.drugs)
 65 |     Select.combnation <- function(x){
 66 |       temp.list <- subset(raw.raw.Drug.list,Drug %in% x)
 67 |       #temp.list <- unique(temp.list[,2:3])
 68 |       temp.size <- sum(temp.list$w.size)
 69 |       return(temp.size)
 70 |     }
 71 |     label<-apply(Drug.combinations,2,Select.combnation)
 72 |     Selected.Drug.combinations <- Drug.combinations[,which(label>0)]
 73 |     Selected.Drug.combinations.coverage <- label[which(label>0)]
 74 |     C.Drugs <- unique(as.vector(Selected.Drug.combinations))
 75 | 
 76 |     ##Cell line information
 77 |     cells <- subset(cell_data,primary_site == Tissue)$cell_id
 78 | 
 79 |     ##Load experiment information
 80 |     data_infor1 <- col_meta_GSE92742[,c("sig_id","pert_iname")]
 81 |     row.names(data_infor1) <- data_infor1$sig_id
 82 |     idx <- which(col_meta_GSE92742$cell_id %in% cells & col_meta_GSE92742$pert_iname %in% C.Drugs)
 83 |     sig_ids <- col_meta_GSE92742$sig_id[idx]
 84 |     data_infor1 <- data_infor1[sig_ids,]
 85 | 
 86 |     ##Load drug response
 87 |     my_ds <- parse_gctx(GSE92742.gctx, cid=sig_ids)
 88 |     gene.data <- as.data.frame(my_ds@mat)
 89 |     gene.data$geneid <- row.names(gene.data)
 90 |     treatments <- colnames(gene.data)
 91 |     treatments <- setdiff(treatments,"geneid")
 92 |     data <- merge(gene.data,gene_meta,by.x="geneid",by.y="pr_gene_id")
 93 |     data1 <- data[,c("pr_gene_symbol",treatments)]
 94 | 
 95 |     ##Load experiment information
 96 |     data_infor2 <- col_meta_GSE70138[,c("sig_id","pert_iname")]
 97 |     row.names(data_infor2) <- data_infor2$sig_id
 98 |     idx <- which(col_meta_GSE70138$cell_id %in% cells & col_meta_GSE70138$pert_iname %in% C.Drugs)
 99 |     sig_ids <- col_meta_GSE70138$sig_id[idx]
100 |     data_infor2 <- data_infor2[sig_ids,]
101 | 
102 |     ##Load drug response
103 |     sig_ids <- col_meta_GSE70138$sig_id[idx]
104 |     my_ds <- parse_gctx(GSE70138.gctx, cid=sig_ids)
105 |     gene.data <- as.data.frame(my_ds@mat)
106 |     gene.data$geneid <- row.names(gene.data)
107 |     treatments <- colnames(gene.data)
108 |     treatments <- setdiff(treatments,"geneid")
109 |     data <- merge(gene.data,gene_meta,by.x="geneid",by.y="pr_gene_id")
110 |     data2 <- data[,c("pr_gene_symbol",treatments)]
111 |     data <- merge(data1,data2,by="pr_gene_symbol")
112 |     row.names(data) <- data[,1]
113 |     data <- data[,-1]
114 |     data_infor <- rbind(data_infor1,data_infor2)
115 | 
116 |     ##Combination score
117 |     D.genes <- list()
118 |     for(i in names(Gene.data)){
119 |       Cd <- Gene.data[[i]]
120 |       Cd <- subset(Cd, adj.P.Val<0.05)
121 |       D.genes.temp <- list(temp=rownames(Cd))
122 |       D.genes <- cbind(D.genes,D.genes.temp)
123 |     }
124 |     D.genes <- Reduce(intersect,D.genes)
125 |     Gene.expression <- data.frame()
126 |     for(i in names(Gene.data)){
127 |       Cd <- Gene.data[[i]]
128 |       if(nrow(Gene.expression)==0){
129 |         Gene.expression <- data.frame(Score=Cd[D.genes,"score"])
130 |       }else{
131 |       Gene.expression.temp <- data.frame(Score=Cd[D.genes,"score"])
132 |       Gene.expression <- cbind(Gene.expression,Gene.expression.temp)
133 |       }
134 |     }
135 |     Gene.expression <- as.data.frame(Gene.expression)
136 |     Gene.expression <- as.matrix(Gene.expression)
137 |     row.names(Gene.expression) <- D.genes
138 |     D.gene.expression <- apply(Gene.expression,1,mean)
139 |     names(D.gene.expression) <- D.genes
140 |     Single.treated.score.list <- NULL
141 |     for(Drug in C.Drugs){
142 |       D.genes.treated <- NULL
143 |       drug.treatments <- subset(data_infor,pert_iname == Drug)$sig_id
144 |       drug.responses <- data[,drug.treatments]
145 |       drug.responses.mean <- apply(drug.responses,1,mean)
146 |       D.D.genes <- intersect(names(D.gene.expression),names(drug.responses.mean))
147 |       D.genes.treated <- -D.gene.expression[D.D.genes]*drug.responses.mean[D.D.genes]
148 |       D.genes.treated <- D.genes.treated[which(D.genes.treated>0)]
149 |       D.genes.treated <- D.genes.treated
150 |       Mean.treated <- mean(D.genes.treated)
151 |       Ratio.treated <- length(D.genes.treated)/length(D.D.genes)
152 |       Coverage.treated <- Drug.coverage[Drug]/100
153 |       Treated.score <- (Ratio.treated*Coverage.treated)
154 |       Single.treated.score.list <- c(Single.treated.score.list,Treated.score)
155 |     }
156 |     Combination.treated.score <- function(Drugs){
157 |       D.genes.treated<-NULL
158 |       for(drug in Drugs){
159 |         drug.treatments <- subset(data_infor,pert_iname == drug)$sig_id
160 |         drug.responses <- data[,drug.treatments]
161 |         drug.responses.mean <- apply(drug.responses,1,mean)
162 |         D.D.genes <- intersect(names(D.gene.expression),names(drug.responses.mean))
163 |         D.genes.treated.temp <- -D.gene.expression[D.D.genes]*drug.responses.mean[D.D.genes]
164 |         D.genes.treated <- cbind(D.genes.treated,D.genes.treated.temp)
165 |       }
166 |       remove <- which(rowSums(D.genes.treated<0)==length(Drugs))
167 |       D.genes.combination <- D.genes.treated[-remove,]
168 |       scores <- apply(D.genes.combination,1,mean)
169 |       temp.scores <- scores
170 |       return(temp.scores)
171 |     }
172 |     Score.list <- apply(Selected.Drug.combinations, 2, Combination.treated.score)
173 |     Combination.treated.ratio <- function(Drugs){
174 |       D.genes.treated<-NULL
175 |       for(drug in Drugs){
176 |         drug.treatments <- subset(data_infor,pert_iname == drug)$sig_id
177 |         drug.responses <- data[,drug.treatments]
178 |         drug.responses.mean <- apply(drug.responses,1,mean)
179 |         D.D.genes <- intersect(names(D.gene.expression),names(drug.responses.mean))
180 |         D.genes.treated.temp <- -D.gene.expression[D.D.genes]*drug.responses.mean[D.D.genes]
181 |         D.genes.treated <- cbind(D.genes.treated,D.genes.treated.temp)
182 |       }
183 |       remove <- which(rowSums(D.genes.treated<0)==length(Drugs))
184 |       D.genes.combination <- D.genes.treated[-remove,]
185 |       scores <- apply(D.genes.combination,1,mean)
186 |       Ratio.treated <- length(which(scores>0))/length(D.D.genes)
187 |       temp.scores <- Ratio.treated
188 |       return(temp.scores)
189 |     }
190 |     Ratio.list <- apply(Selected.Drug.combinations, 2, Combination.treated.ratio)
191 |     ref.score <- unlist(Score.list)
192 |   	P.value <- function(Score) {
193 |   	  if(length(Score)>1 && length(ref.score)>1){
194 |   	  temp <- ks.test(Score, ref.score)
195 |   	  p.value <- temp$p.value
196 |   	  return(p.value)
197 |   	  }else{
198 |   		return(1)
199 |   	  }
200 |   	}
201 |     pvalues <- unlist(suppressWarnings(lapply(Score.list, P.value)))
202 |     combination.scores <- unlist(suppressWarnings(lapply(Ratio.list,mean)))
203 |     Combination.table <- as.data.frame(t(Selected.Drug.combinations))
204 |     for(d in 1:Combined.drugs){
205 |      Combination.table <- cbind(Combination.table, Single.treated.score.list[Combination.table[,d]])
206 |     }
207 |     neg.combination.scores <- which(combination.scores<0)
208 |     combination.scores[neg.combination.scores] <- -combination.scores[neg.combination.scores]
209 |     Combination.table$Combination.therapeutic.score <- (Selected.Drug.combinations.coverage*combination.scores/100)
210 |     Combination.table$Combination.therapeutic.score[neg.combination.scores] <- -Combination.table$Combination.therapeutic.score[neg.combination.scores]
211 |     Combination.table$P.value <- pvalues
212 |     Combination.table$FDR <- p.adjust(pvalues, method = "BH")
213 |     colnames(Combination.table)[1:Combined.drugs] <- paste0("Drug",1:Combined.drugs)
214 |     colnames(Combination.table)[(Combined.drugs+1):(2*Combined.drugs)] <- paste0("Drug",1:Combined.drugs,".therapeutic.score")
215 |     return(Combination.table)
216 | }
217 | 


--------------------------------------------------------------------------------
/R/DrugScore.R:
--------------------------------------------------------------------------------
  1 | #' @title Calculate drug score
  2 | #' @description The drug score is a comprehensive estimation of drug therapeutic 
  3 | #' effects using all or a selected set of clusters. 
  4 | #' @details This function calculates drug score using cellular proportion of 
  5 | #' clusters, the significance of reversal in DEGs' expressions, and the ratio of 
  6 | #' the reversed genes. 
  7 | #' @param cell_metadata A data.frame of cell metadata. It must have a column 
  8 | #' named 'cluster' indicating which cluster cells belong, and a column named 
  9 | #' 'sample' indicating which sample cells belong. 
 10 | #' @param cluster_degs A list of differential gene expression profiles for 
 11 | #' each cluster.
 12 | #' @param cluster_drugs Drug repurposing result from GetDrug function.
 13 | #' @param tissue Reference tissue. If one used 'lung_rankMatrix.txt' in 
 14 | #' GetDrugRef function, then the Reference tissue is lung. Please use " " 
 15 | #' instead of "-" in tissue name. For example, while 
 16 | #' 'haematopoietic-and-lymphoid-tissue' is the prefix of the drug reference 
 17 | #' files, the corresponding tissue name is "haematopoietic and lymphoid tissue".
 18 | #' @param gse70138_gctx_path The gctx file contains drug responses from GSE70138 
 19 | #' dataset (https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE70138).
 20 | #' @param gse92742_gctx_path The gctx file contains drug responses from GSE92742 
 21 | #' dataset (https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE92742)..
 22 | #' @param clusters Select which clusters (cell types) to be used for drug score 
 23 | #' estimation. By default, it uses all clusters.
 24 | #' @param case A vector containing case sample names.
 25 | #' @param fda_drugs_only logical; if TRUE, will only return FDA-approved drugs, 
 26 | #' else, will return all drugs/compounds.
 27 | #' @return A data frame of drug score, P-value and FDR.
 28 | #' @export
 29 | #' @import cmapR
 30 | DrugScore <- function(cell_metadata, cluster_degs, cluster_drugs, tissue,
 31 | 					  gse70138_gctx_path, gse92742_gctx_path, 
 32 | 					  clusters = NULL, case = NULL, fda_drugs_only = TRUE) {
 33 | 
 34 | 	# Subset input data to the set of clusters we are interested in 
 35 |     if (length(clusters) > 0) {
 36 |     	clusters = intersect(clusters, unique(cell_metada$cluster))
 37 |       	cell_metadata = subset(cell_metadata, cluster %in% clusters)
 38 |       	cluster_drugs = cluster_drugs[clusters]
 39 |       	cluster_degs = cluster_degs[clusters]
 40 |     }
 41 | 
 42 |     # Calculate cluster proportions in diseased tissue
 43 |     if (length(case) > 0) {
 44 |       	cell_metadata <- subset(cell_metadata, sample %in% case)
 45 |     }
 46 |     clustering <- cell_metadata$cluster
 47 |     cluster_sizes <- table(clustering)
 48 |     cluster_sizes <- cluster_sizes[which(cluster_sizes > 3)]
 49 |     cluster_prop <- round(100*cluster_sizes/nrow(cell_metadata), 2) 
 50 | 
 51 |     # Combine cluster drugs into a single data frame
 52 |     drug_list <- data.frame()
 53 |     for (i in names(cluster_drugs)) {
 54 |     	ith_cluster_drugs <- cluster_drugs[[i]]
 55 | 		drug_names <- ith_cluster_drugs$Drug.name
 56 |       	ith_cluster_drugs <- ith_cluster_drugs[!duplicated(drug_names), ]
 57 | 
 58 | 		# Subset to FDA drugs
 59 |       	if (fda_drugs_only) {
 60 |       		drug_names <- intersect(drug_names, FDA.drug)
 61 |       	}
 62 | 
 63 |       	if (length(drug_names)>0) {
 64 |       		ith_cluster_drugs <- subset(ith_cluster_drugs, Drug.name %in% drug_names)
 65 |       		fdrs <- ith_cluster_drugs$FDR
 66 |       		p_values <- ith_cluster_drugs$P.value
 67 |       		
 68 | 			temp <- data.frame(
 69 | 				drug = drug_names, 
 70 | 				cluster = i,
 71 | 				cluster_prop = cluster_prop[i],
 72 | 				p_value = p_values,
 73 | 				fdr = fdrs,
 74 | 				row.names = NULL
 75 | 			)
 76 |       		drug_list <- rbind(drug_list, temp)
 77 |       	}
 78 |     }
 79 |     drug_list <- unique(drug_list)
 80 |     drug_list$weighted_prop <- drug_list$cluster_prop*(-log10(drug_list$fdr))
 81 |     drug_list[is.na(drug_list)] <- 0
 82 | 
 83 |     drug_coverage <- tapply(drug_list$weighted_prop, drug_list$drug, sum)
 84 |     drugs <- rownames(drug_coverage)
 85 | 
 86 |     # Combine cluster spesific p-values of drugs
 87 |     if(length(unique(names(cluster_drugs)))>1){
 88 |        	combined_p_values <- tapply(drug_list$p_value, drug_list$drug, CombineP)
 89 |     }else{
 90 |       	combined_p_values <- drug_list$p_value
 91 |       	names(combined_p_values) <- drug_list$drug
 92 |     }
 93 |   
 94 | 	# Cell line information
 95 |     cell_lines <- subset(cell_data, primary_site == tissue)$cell_id
 96 | 
 97 |     # Load drugs metadata for GSE92742 and subset it to tissue of interest and 
 98 | 	# drugs of interest
 99 |     drug_metadata_92742 <- col_meta_GSE92742[, c("sig_id", "pert_iname")]
100 |     row.names(drug_metadata_92742) <- drug_metadata_92742$sig_id
101 |     idx <- which(col_meta_GSE92742$cell_id %in% cell_lines & 
102 | 				 col_meta_GSE92742$pert_iname %in% drugs)
103 |     sig_ids <- col_meta_GSE92742$sig_id[idx]
104 |     drug_metadata_92742 <- drug_metadata_92742[sig_ids, ]
105 | 
106 |     # Load drug response for GSE92742
107 |     exprs <- as.data.frame(parse_gctx(gse92742_gctx_path, cid=sig_ids)@mat)
108 |     treatments <- colnames(exprs)
109 | 	exprs$gene_id <- row.names(exprs)
110 |     tmp <- merge(exprs, gene_meta, by.x="gene_id", by.y="pr_gene_id")
111 |     drug_responses_92742 <- tmp[, c("pr_gene_symbol", treatments)]
112 | 
113 |     # Load drugs metadata for GSE70138 and subset it to tissue of interest and 
114 | 	# drugs of interest
115 |     drug_metadata_70138 <- col_meta_GSE70138[, c("sig_id", "pert_iname")]
116 |     row.names(drug_metadata_70138) <- drug_metadata_70138$sig_id
117 |     idx <- which(col_meta_GSE70138$cell_id %in% cell_lines & 
118 | 				 col_meta_GSE70138$pert_iname %in% drugs)
119 |     sig_ids <- col_meta_GSE70138$sig_id[idx]
120 |     drug_metadata_70138 <- drug_metadata_70138[sig_ids, ]
121 | 
122 |     # Load drug response for GSE70138
123 |     exprs <- as.data.frame(parse_gctx(gse70138_gctx_path, cid=sig_ids)@mat)
124 |     treatments <- colnames(exprs)
125 |     exprs$gene_id <- row.names(exprs)
126 | 	tmp <- merge(exprs, gene_meta, by.x="gene_id", by.y="pr_gene_id")
127 |     drug_responses_70138 <- tmp[, c("pr_gene_symbol", treatments)]
128 | 
129 |     drug_responses <- merge(drug_responses_92742, drug_responses_70138, 
130 | 							by="pr_gene_symbol")
131 |     row.names(drug_responses) <- drug_responses[, 1]
132 |     drug_responses <- drug_responses[, -1]
133 |     drug_metadata <- rbind(drug_metadata_92742, drug_metadata_70138)
134 | 
135 | 	# Find DEGs that are common to all clusters
136 |     common_degs <- list()
137 |     for (i in names(cluster_degs)) {
138 |     	ith_cluster_degs <- cluster_degs[[i]]
139 |       	ith_cluster_degs <- subset(ith_cluster_degs, adj.P.Val < 0.05)
140 | 		if (length(ith_cluster_degs) > 0) {
141 | 	    	common_degs[[i]] <- rownames(ith_cluster_degs)
142 | 		}
143 |     }
144 | 	common_degs <- Reduce(intersect, common_degs)
145 | 
146 | 	# Combine cluster specific DEG scores into a matrix
147 | 	deg_scores <- data.frame()
148 |     for (i in names(cluster_degs)) {
149 |     	ith_cluster_degs <- cluster_degs[[i]]
150 |     	if (nrow(deg_scores) == 0) {
151 |     		deg_scores <- data.frame(score = ith_cluster_degs[common_degs, "score"])
152 |     	} else {
153 |     	    tmp <- data.frame(score = ith_cluster_degs[common_degs,"score"])
154 |         	deg_scores <- cbind(deg_scores, tmp)
155 |        }
156 |     }
157 |     deg_scores <- as.matrix(deg_scores)
158 |     row.names(deg_scores) <- common_degs
159 | 
160 |     deg_scores_mean <- apply(deg_scores, 1, mean)
161 |     names(deg_scores_mean) <- common_degs
162 | 
163 | 	# Calculate drug score
164 |     drug_scores <- list()
165 |     for (drug in drugs) {
166 | 		# Get response from CMap
167 | 		treatments <- subset(drug_metadata, pert_iname == drug)$sig_id
168 | 		if (length(treatments) > 1) {
169 | 			curr_drug_response <- drug_responses[, treatments]
170 | 			mean_response <- apply(curr_drug_response, 1, mean)
171 | 		} else {
172 | 			curr_drug_response <- drug_responses[, treatments]
173 | 			mean_response <- curr_drug_response
174 | 		}
175 | 
176 | 		drug_stats <- drug_list[drug_list$drug == drug, ]
177 | 		drug_score <- 0
178 | 		for (i in names(cluster_degs)) {
179 | 			cluster_prop <- drug_stats[drug_stats$cluster == i, "cluster_prop"]
180 | 			fdr <- drug_stats[drug_stats$cluster == i, "fdr"]
181 | 			p_value <- drug_stats[drug_stats$cluster == i, "p_value"]
182 | 
183 | 			ith_cluster_degs <- cluster_degs[[i]]
184 |       		ith_cluster_degs <- subset(ith_cluster_degs, adj.P.Val < 0.05)
185 | 
186 | 			treatable_degs <- intersect(row.names(ith_cluster_degs), names(mean_response))
187 | 			if (length(treatable_degs > 0)) {
188 | 				deg_scores <- ith_cluster_degs[treatable_degs, "score"]
189 | 
190 | 				treated_degs <- -deg_scores*mean_response[treatable_degs]
191 | 				treated_degs <- treated_degs[which(treated_degs > 0)]
192 | 
193 | 				treated_degs_ratio <- length(treated_degs)/length(treatable_degs)
194 | 				drug_score <- drug_score +
195 | 					(cluster_prop/100)*(-log10(fdr))*treated_degs_ratio
196 | 			}
197 | 	    }
198 | 		
199 | 		drug_scores[[drug]] <- drug_score
200 |     }
201 | 	drug_scores <- t(as.data.frame(drug_scores))
202 | 
203 |     out <- data.frame(
204 | 		Drug.therapeutic.score = drug_scores,
205 | 		P.value = combined_p_values[drugs],
206 | 		FDR = p.adjust(combined_p_values[drugs], method = "BH")
207 | 	)
208 |     return(out)
209 | 
210 | }
211 | 


--------------------------------------------------------------------------------
/R/GetDrug.R:
--------------------------------------------------------------------------------
 1 | #' @title Mono-drug Repurposing.
 2 | #' @description  It identify mono-drug therapy for every cell type.
 3 | #' @details This function allows user to use the differential expression data of every case cell type to query against reference drug response profiles.This function is a reverised version of drug.identification from DrInsight package.
 4 | #' @param drug.ref.profiles A list contains tissue specific drug reference Profiles from GetDrugRef function.
 5 | #' @param repurposing.unit The parameter of either "treatment" or "drug", which indicates if user want the function to test drug repurposing p value at treatment level or drug level. The default is "treatment", which treats the drug data from different cell lines separately.
 6 | #' @param CEG.threshold The p value threshold to select the consistently differential expressed genes (CEGs). The default value is 0.05.
 7 | #' @param connectivity The type of connectivity, either "negative" or "positive". Negative connectivity is used when the query data is the differential scores from disease data, and it will repurpose drugs that can potentially reverse the query disease phenotype. Positive connectivity is used when the query data is from a drug profile, and it will return the drugs that are similar to the query drug. The default value is "negative".
 8 | #' @param drug.type The parameter of either "FDA" or "compounds" or "all", which indicates if user want the function to identify FDA-approved drugs or compounds or both, respectively.The default value is "FDA".
 9 | #' @return A list of mono-drugs for every cell type.
10 | #' @export
11 | 
12 | 
13 | GetDrug = function(gene.data = NULL,
14 |                     drug.ref.profiles = NULL,
15 |                     repurposing.unit = "drug",
16 |                     CEG.threshold = 0.05,
17 |                     connectivity = "negative",
18 |                     drug.type="FDA"){
19 |   if(drug.type=="FDA"){
20 |     Drug.info <- drug.ref.profiles$drug.info
21 |     Drug.info$temp_name <- gsub("_.*","",Drug.info$cmap_name)
22 |     Drug.info <- subset(Drug.info, temp_name %in% FDA.drug)
23 |     Drug.info <- Drug.info[,colnames(drug.ref.profiles$drug.info)]
24 |     drug.ref.profiles$drug.rank.matrix <- drug.ref.profiles$drug.rank.matrix[,Drug.info$instance_id]
25 |     drug.ref.profiles$drug.info <- Drug.info
26 |   }else if(drug.type=="compounds"){
27 |     Drug.info <- drug.ref.profiles$drug.info
28 |     Drug.info$temp_name <- gsub("_.*","",Drug.info$cmap_name)
29 |     Drug.info <- subset(Drug.info, !(temp_name %in% FDA.drug))
30 |     Drug.info <- Drug.info[,colnames(drug.ref.profiles$drug.info)]
31 |     drug.ref.profiles$drug.rank.matrix <- drug.ref.profiles$drug.rank.matrix[,Drug.info$instance_id]
32 |     drug.ref.profiles$drug.info <- Drug.info
33 |   }
34 |   res.list <- list()
35 |   for(ci in 1:length(names(gene.data))){
36 |       query.data <- data.frame(geneSymbol=row.names(gene.data[[ci]]),score=gene.data[[ci]]$score)
37 |       cmap.drug.rank = drug.ref.profiles$drug.rank.matrix
38 |       e1 = simpleError("Did not find the column named 'geneSymbol' in query data that contains the gene symbols in it.")
39 |       e2 = simpleError("Did not find the column named 'score' in query data that contains the test statistics or any values that you would like to rank the genes.")
40 | 
41 |       cat("\n")
42 |       cat("\n")
43 |       message("Data preprocessing ...\n")
44 |       cat("\n")
45 |       if("score" %in% colnames(query.data)){
46 |         if("geneSymbol" %in% colnames(query.data)){
47 |           tmp = data_preprocess(query.data, cmap.drug.rank,connectivity = connectivity)
48 |           query.data = tmp[[1]]
49 |           cmap.drug.rank = tmp[[2]]
50 |           rm(tmp)
51 |         } else{
52 |           stop(e1)
53 |         }
54 |       } else{
55 |         stop(e2)
56 |       }
57 | 
58 |       message("Identifying drug instance CEGs...\n")
59 |       cat("\n")
60 |       p_min = get_gene_pval('min',cmap.drug.rank,query.data)
61 |       p_max = get_gene_pval('max',cmap.drug.rank,query.data)
62 | 
63 |       ##Select the smallest p value (between 2 p values) as the p value of the gene
64 |       p_score = pmin(p_min,p_max)
65 |       z_score = qnorm(p_score,lower.tail = F)
66 |       CEG.pvals = get_CEGs(p_min, p_max, z_score,threshold = CEG.threshold)
67 | 
68 |       message("Calculating drug connectivity p values ...\n")
69 |       cat("\n")
70 |       drug.info = drug.ref.profiles$drug.info
71 |       if(repurposing.unit == "drug"){
72 |         drug.info$drug = drug.info$cmap_name
73 |       } else if(repurposing.unit == "treatment"){
74 |         drug.info$drug = drug.info$treatment
75 |       } else{
76 |         stop(simpleError("Please set the repurposing unit to either 'drug' or 'treatment'."))
77 |       }
78 | 
79 |       drug.pvals = get_drug_pval(CEGsum = CEG.pvals$CEG.sumz.scores,drug.info = drug.info)
80 | 
81 |       drug.pvals = drug.pvals[order(drug.pvals$pval),]
82 |       drugs = rownames(drug.pvals)
83 |       drug.pvals$Drug.name = gsub("_BRD-.*","",drugs)
84 |       drug.pvals$Drug.id = gsub(".*_","",drugs)
85 |       rownames(drug.pvals) = NULL
86 |       drug.pvals = drug.pvals[,c(3,4,1)]
87 |       drug.pvals$FDR = p.adjust(drug.pvals$pval,method = "fdr")
88 |       colnames(drug.pvals)[3] = "P.value"
89 | 
90 |       res = list(drug.pvals,drug.info,CEG.pvals$CEG.pvals)
91 |       names(res) = c("drug.pvals","drug.info","CEG.pvals")
92 |       res.list[[ci]] <- drug.pvals
93 |   }
94 |   names(res.list) <- names(gene.data)
95 |   return(res.list)
96 | }
97 | 


--------------------------------------------------------------------------------
/R/GetDrugRef.R:
--------------------------------------------------------------------------------
 1 | #' @title Load and Process Drug Reference Profiles.
 2 | #' @description  This function allows user to load in the tissue specific drug rank matrix.
 3 | #' @details This function is a reverised version of get.cmap.ref from DrInsight package. The tissue specific drug rank matrix is tranformed from L1000data (GEO: GSE92742 and GSE70138) using PrepareReference function.
 4 | #' @param drug.response.path The local path and the name of the tissue specific drug rank matrix.
 5 | #' @param probe.to.genes A data.frame contains gene IDs (the IDs used in drug rank matrix) and official gene symbol. This files was automately generated with drug rank matrix.
 6 | #' @param drug.info A data.frame contains drug information. This file was automately generated with drug rank matrix.
 7 | #' @export
 8 | 
 9 | 
10 | GetDrugRef = function(drug.response.path = NULL, probe.to.genes = NULL, drug.info = NULL){
11 |   cat("\n")
12 |   cat("\n")
13 |   message("Loading CMap drug matrix. This may take some time ... \n")
14 |   cmap.drug.rank = read.table(drug.response.path,row.names = 1, header = T, check.names = FALSE)
15 |   cmap.drug.rank = cmap.drug.rank[probe.to.genes$ID,]
16 |   rownames(cmap.drug.rank) = probe.to.genes$Gene.Symbol
17 |   cmap.ref.profiles = list(drug.info = drug.info, drug.rank.matrix = cmap.drug.rank)
18 |   return(cmap.ref.profiles)
19 | }
20 | 
21 | 


--------------------------------------------------------------------------------
/R/PrepareReference.R:
--------------------------------------------------------------------------------
  1 | #' @title Prepare Drug Reference.
  2 | #' @description  Prepare tissue specific drug reference Profiles from L1000 drug response data.
  3 | #' @details This function converts L1000 data to the tissue specific drug rank matrix.
  4 | #' @param cell.info The local path and the name of the cell.info text file. It's downloaded from https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_cell_info_2017-04-28.txt.gz .
  5 | #' @param gene.info The local path and the name of the gene.info text file. It's downloaded from https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_gene_info_2017-03-06.txt.gz .
  6 | #' @param GSE70138.sig.info The local path and the name of the cell.info text file. It's downloaded from https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_sig_info_2017-03-06.txt.gz .
  7 | #' @param GSE92742.sig.info The local path and the name of the cell.info text file. It's downloaded from https://ftp.ncbi.nlm.nih.gov/geo/series/GSE92nnn/GSE92742/suppl/GSE92742_Broad_LINCS_sig_info.txt.gz .
  8 | #' @param GSE70138.gctx The local path and the name of the cell.info text file. It's downloaded from https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_Level5_COMPZ_n118050x12328_2017-03-06.gctx.gz .
  9 | #' @param GSE92742.gctx The local path and the name of the cell.info text file. It's downloaded from https://ftp.ncbi.nlm.nih.gov/geo/series/GSE92nnn/GSE92742/suppl/GSE92742_Broad_LINCS_Level5_COMPZ.MODZ_n473647x12328.gctx.gz .
 10 | #' @param Output.Dir The output directory for the generated files.
 11 | #' @export
 12 | #' @import cmapR
 13 | 
 14 | PrepareReference <- function(cell.info = NULL,
 15 |                     gene.info = NULL,
 16 |                     GSE70138.sig.info = NULL,
 17 |                     GSE92742.sig.info = NULL,
 18 |                     GSE70138.gctx = NULL,
 19 |                     GSE92742.gctx = NULL,
 20 |                     Output.Dir = "./"){
 21 |       cell_data<-read.table(file=cell.info,sep="\t",header = T,quote = "")
 22 |       tissues<-unique(as.character(cell_data$primary_site))
 23 |       tissues<-tissues[which(tissues!="-666")]
 24 |       for (tissue in tissues){
 25 |           print(tissue)
 26 |           cell_data<-read.table(file=cell.info,sep="\t",header = T,quote = "")
 27 |           cell_ids<-which(cell_data$primary_site == tissue)
 28 |           cell_names <- cell_data$cell_id[cell_ids]
 29 |           ds_path <- GSE70138.gctx
 30 |           col_meta_path <- GSE70138.sig.info
 31 |           col_meta <- read.delim(col_meta_path, sep="\t", stringsAsFactors=F)
 32 |           if(tissue == "breast"){
 33 |           idx <- which(col_meta$cell_id %in% cell_names & col_meta$pert_type == "trt_cp" & col_meta$pert_id!="BRD-K18910433")
 34 |           }else{
 35 |           idx <- which(col_meta$cell_id %in% cell_names & col_meta$pert_type == "trt_cp")
 36 |           }
 37 |           sig_ids <- col_meta$sig_id[idx]
 38 |           rm.ids <- grep('REP\\.',sig_ids)
 39 |           if(length(rm.ids)>0){
 40 |             sig_ids <- sig_ids[-rm.ids]
 41 |           }
 42 |           length1<-length(sig_ids)
 43 |           if(length1 > 0){
 44 |           my_ds <- parse_gctx(ds_path, cid=sig_ids)
 45 |           myrank <- function(x){
 46 |             temp<-rank(-x,ties.method ="min")
 47 |             return(temp)
 48 |           }
 49 |           rank_matrix1<-apply(my_ds@mat,2,myrank)
 50 |           rank_matrix1<-as.data.frame(rank_matrix1)
 51 |           }
 52 |           cell_data<-read.table(file=cell.info,sep="\t",header = T,quote = "")
 53 |           cell_ids<-which(cell_data$primary_site == tissue)
 54 |           cell_names <- cell_data$cell_id[cell_ids]
 55 |           ds_path <- GSE92742.gctx
 56 |           col_meta_path <- GSE92742.sig.info
 57 |           col_meta <- read.delim(col_meta_path, sep="\t", stringsAsFactors=F)
 58 |           if(tissue == "breast"){
 59 |           idx <- which(col_meta$cell_id %in% cell_names & col_meta$pert_type == "trt_cp" & col_meta$pert_id!="BRD-K18910433")
 60 |           }else{
 61 |           idx <- which(col_meta$cell_id %in% cell_names & col_meta$pert_type == "trt_cp")
 62 |           }
 63 |           sig_ids <- col_meta$sig_id[idx]
 64 | 		  rm.ids <- grep('REP\\.',sig_ids)
 65 |           if(length(rm.ids)>0){
 66 |              sig_ids <- sig_ids[-rm.ids]
 67 |             }
 68 |           length2<-length(sig_ids)
 69 |           if(length2 > 0){
 70 |           my_ds <- parse_gctx(ds_path, cid=sig_ids)
 71 |           myrank <- function(x){
 72 |             temp<-rank(-x,ties.method ="min")
 73 |             return(temp)
 74 |            }
 75 |           rank_matrix2<-apply(my_ds@mat,2,myrank)
 76 |           rank_matrix2<-as.data.frame(rank_matrix2)
 77 |           }
 78 | 
 79 |           if(length1 > 0 & length2 > 0){
 80 |           rank_matrix<-cbind(rank_matrix1,rank_matrix2)
 81 |           }else if(length1 > 0 & length2 == 0){
 82 |           rank_matrix<-rank_matrix1
 83 |           }else if(length1 == 0 & length2 > 0){
 84 |           rank_matrix<-rank_matrix2
 85 |           }
 86 |           if(length1 > 0 |  length2 > 0){
 87 |               colnames(rank_matrix)<-gsub(":","_",colnames(rank_matrix))
 88 |               cnames<-colnames(rank_matrix)
 89 |               colnames(rank_matrix)<-1:length(cnames)
 90 |               dcnames<-colnames(rank_matrix)
 91 |               rank_matrix$probe_id<-row.names(rank_matrix)
 92 |               rank_matrix <- rank_matrix[,c('probe_id', dcnames)]
 93 |               filename<-paste(Output.Dir,gsub(" ","-",tissue),"_rankMatrix.txt",sep = "")
 94 |               write.table(rank_matrix,file=filename,quote=FALSE,row.names = FALSE,sep = "\t")
 95 | 
 96 |               gene_data<-read.table(file=gene.info,sep="\t",header = T,quote = "")
 97 |               my_gene_info<-gene_data[,1:2]
 98 |               colnames(my_gene_info)<-c("ID","Gene.Symbol")
 99 |               filename<-paste(Output.Dir,gsub(" ","-",tissue),"_gene_info.txt",sep = "")
100 |               write.table(my_gene_info,file=filename,quote=FALSE,row.names = FALSE,sep = "\t")
101 | 
102 |               sig_data<-read.table(file=GSE70138.sig.info,sep="\t",header = T,quote = "")
103 |               sig_data$sig_id<-gsub(":","_",sig_data$sig_id)
104 |               my_drug_info<-data.frame(instance_id=sig_data$sig_id,cmap_name=paste(sig_data$pert_iname,sig_data$pert_id,sep="_"),concentration..M=sig_data$pert_idose,duration..h=sig_data$pert_itime,cell2=sig_data$cell_id,catalog_name=sig_data$pert_id,treatment=paste(sig_data$pert_iname,"_",sig_data$sig_id,sep = ""))
105 |               my_drug_info1<-subset(my_drug_info,instance_id %in% cnames)
106 | 
107 |               sig_data<-read.table(file=GSE92742.sig.info,sep="\t",header = T,quote = "")
108 |               sig_data$sig_id<-gsub(":","_",sig_data$sig_id)
109 |               my_drug_info<-data.frame(instance_id=sig_data$sig_id,cmap_name=paste(sig_data$pert_iname,sig_data$pert_id,sep="_"),concentration..M=sig_data$pert_idose,duration..h=sig_data$pert_itime,cell2=sig_data$cell_id,catalog_name=sig_data$pert_id,treatment=paste(sig_data$pert_iname,"_",sig_data$sig_id,sep = ""))
110 |               my_drug_info2<-subset(my_drug_info,instance_id %in% cnames)
111 | 
112 |               my_drug_info<-rbind(my_drug_info1,my_drug_info2)
113 |               my_drug_info$instance_id<-1:length(my_drug_info$instance_id)
114 |               filename<-paste(Output.Dir,gsub(" ","-",tissue),"_drug_info.txt",sep = "")
115 |               write.table(my_drug_info,file=filename,quote=FALSE,row.names = FALSE,sep = "\t")
116 |           }
117 | 
118 |     }
119 | }
120 | 


--------------------------------------------------------------------------------
/R/SCplasticity.R:
--------------------------------------------------------------------------------
 1 | #' @title Sinlge-cell Plasticity.
 2 | #' @description  It determines the plasticity of each cell type.
 3 | #' @details This function estimate the entropy of every cell in the case samples. For each cell type, it use the median entropy value as the plasticity of each cell type.
 4 | #' @param SC.integrated A Seurat object of aligned single cells from SCalignment function.
 5 | #' @param Case A vector contains names of case samples.
 6 | #' @return A data frame of plasticity, normailized plasticity and cell type coverage.
 7 | #' @export
 8 | 
 9 | SCplasticity <- function (SC.integrated = SC.data, Case=NULL) 
10 | {
11 |   if(length(Case)>0){
12 |     SC.integrated <- subset(SC.integrated, sample %in% Case)
13 |   }else{
14 |     SC.integrated <- SC.integrated
15 |   }
16 |   SC.meta <- SC.integrated@meta.data
17 |   expr.data <- as.matrix(SC.integrated@assays$RNA@counts)
18 |   
19 |   #Entorpy-based Plasticity
20 |   probs   <- t(t(expr.data)/apply(expr.data,2,sum))
21 |   probs[is.na(probs)] <- 0
22 |   log.probs <- log(probs)
23 |   log.probs[which(is.infinite(log.probs))] <- 0
24 |   SC.meta$cell.entropy <- -apply(probs*log.probs/log(nrow(expr.data)),2,sum)
25 |   SC.entropy <- tapply(SC.meta$cell.entropy, SC.meta$celltype, median)
26 |   SC.entropy <- data.frame(Cell.Type=row.names(SC.entropy),Plasticity=SC.entropy)
27 |   rm(expr.data)
28 |   rm(log.probs)
29 |   rm(probs)
30 |   
31 |   #Normalize Plasticity
32 |   SC.entropy$Normalized.Plasticity=(SC.entropy$Plasticity-min(SC.entropy$Plasticity))/(max(SC.entropy$Plasticity)-min(SC.entropy$Plasticity))
33 |   
34 |   #Population Size
35 |   Cluster.cell.rate <- table(SC.meta$celltype)/nrow(SC.meta)
36 |   SC.entropy$Coverage <- 100*Cluster.cell.rate[row.names(SC.entropy)]
37 |   
38 |   return(SC.entropy)
39 | }
40 | 
41 | 


--------------------------------------------------------------------------------
/R/TopCombination.R:
--------------------------------------------------------------------------------
 1 | #' @title Combination Drug Selection.
 2 | #' @description  Select drug combinations by combination therapeutic score and FDR of combination therapeutic score.
 3 | #' @details Input raw drug combination result and return the top drug combinations.
 4 | #' @param Drug.combination raw drug combination result from DrugCombination function.
 5 | #' @param Combination.FDR The FDR threshold to select drug combination. The default value is 0.1.
 6 | #' @param Min.combination.score The Combination therapeutic score threshold to select drug combination. The default value is 1.
 7 | #' @return A data frame of selected drug combinations.
 8 | #' @export
 9 | 
10 | 
11 | TopCombination <- function(Drug.combination=Drug.combinations,
12 |                            Combination.FDR=0.1,
13 |                            Min.combination.score=1
14 | ){
15 |   Drug.combination <- subset(Drug.combination, Combination.therapeutic.score > Min.combination.score & FDR < Combination.FDR)
16 |   Drug.combination <- Drug.combination[order(Drug.combination$Combination.therapeutic.score, decreasing = T),]
17 |   return(Drug.combination)
18 | }
19 | 


--------------------------------------------------------------------------------
/R/TopDrug.R:
--------------------------------------------------------------------------------
 1 | #' @title Single Drug Selection for Individual Clusters.
 2 | #' @description  Select single drugs for every cell population by FDR and drug type, and summarize cell coverage for selected drugs.
 3 | #' @details Input raw drug repurosing result and return the top drugs with summary of cell coverage.
 4 | #' @param SC.integrated A Seurat object of aligned single cells.
 5 | #' @param Drug.data Drug repurosing result from GetDrug function.
 6 | #' @param Drug.FDR The FDR threshold to select drug. The default value is 0.1.
 7 | #' @param FDA.drug.only logical; if TRUE, will only return FDA-approved drugs.
 8 | #' @param Case An vector of case (diseased) samples.Only case sammples are involved in the calculation of coverage.
 9 | #' @return A data frame of selected drugs with summary of cell coverage.
10 | #' @export
11 | 
12 | 
13 | TopDrug <- function(SC.integrated = SC.data,
14 |                     Drug.data = Drug.ident.res,
15 |                     Drug.FDR = 0.1,
16 |                     FDA.drug.only = TRUE,
17 |                     Case = NULL){
18 | 
19 |   ##Cell proportion
20 |   cells <- SC.integrated@meta.data
21 |   if(length(Case)>0){
22 |   cells <- subset(cells,sample %in% Case)
23 |   }
24 |   cells <- cells$celltype
25 |   cell.count <- table(cells)
26 |   cell.count <- cell.count[which(cell.count>3)]
27 |   cells.freq <- round(100*cell.count/length(cells),2)
28 | 
29 |   ##Load drug data
30 |   Drug.list <- data.frame()
31 |   for(i in names(Drug.data)){
32 |     Cd <- Drug.data[[i]]
33 |     Cd <- subset(Cd, FDR<Drug.FDR)
34 |     Drugs <- Cd$Drug.name
35 |     if(FDA.drug.only==TRUE){
36 |       Drugs <- intersect(Drugs,FDA.drug)
37 |     }
38 |     if(length(Drugs)>0){
39 |       Cd <- subset(Cd, Drug.name %in% Drugs)
40 |       temp <- data.frame(Drug=Cd$Drug.name,Cell.type=i,Cell.type.coverage=cells.freq[i],FDR=Cd$FDR,row.names = NULL)
41 |       Drug.list <- rbind(Drug.list,temp)
42 |     }
43 |   }
44 |   Drug.list <- Drug.list[order(Drug.list$FDR, decreasing = F),]
45 |   Drug.list <- Drug.list[!duplicated(Drug.list),]
46 |   Drug.coverage <- tapply(Drug.list$Cell.type.coverage, Drug.list$Drug,sum)
47 |   temp.coverage <- Drug.coverage[Drug.list$Drug]
48 |   Drug.list$Drug.coverage <- temp.coverage
49 |   Drug.list <- Drug.list[,c(1:3,5,4)]
50 |   Drug.list <- Drug.list[order(Drug.list$Drug.coverage, decreasing = T),]
51 |   return(Drug.list)
52 | }
53 | 


--------------------------------------------------------------------------------
/R/data_preprocess.R:
--------------------------------------------------------------------------------
 1 | 
 2 | ##Match common genes between users query data and cmap
 3 | data_preprocess = function(query.data,cmap.drug.rank, connectivity){
 4 |   common.genes = intersect(query.data$geneSymbol,rownames(cmap.drug.rank))
 5 |   rownames(query.data) = query.data$geneSymbol
 6 |   query.data = query.data[common.genes,]
 7 |   if(connectivity == "negative"){
 8 |     ##Rank query data gene statistic scores from smallest to largest, opposite to cmap gene rank
 9 |     query.data$geneRank = rank(query.data$score,ties.method = "first")
10 |   } else if(connectivity == "positive"){
11 |     ##Rank query data gene statistic scores from largest to smallest, same with cmap gene rank
12 |     query.data$geneRank = rank(-(query.data$score),ties.method = "first")
13 |   }
14 | 
15 |   cmap.drug.rank = cmap.drug.rank[common.genes,]
16 |   ##Re-rank drug rank matrix after excluding uncommon genes
17 |   for(i in 1:ncol(cmap.drug.rank)){
18 |     cmap.drug.rank[,i] = rank(cmap.drug.rank[,i])
19 |   }
20 |   return(list(query.data,cmap.drug.rank))
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/R/get_CEGs.R:
--------------------------------------------------------------------------------
 1 | 
 2 | ##Get CEGs and CEG's sumz scores
 3 | get_CEGs = function(p_min, p_max, z_score,threshold){
 4 |   CEG.pvals = list()
 5 |   CEG.pvals$down = p_min
 6 |   CEG.pvals$up = p_max
 7 |   CEGz= numeric(ncol(z_score))
 8 |   for(i in 1:ncol(z_score)){
 9 |     CEGz[i] = sum(z_score[which(z_score[,i] >= qnorm(threshold,lower.tail = F)),i])
10 |   }
11 |   names(CEGz) = colnames(z_score)
12 |   res = list(CEGz, CEG.pvals)
13 |   names(res) = c("CEG.sumz.scores","CEG.pvals")
14 |   return(res)
15 | }
16 | 
17 | 


--------------------------------------------------------------------------------
/R/get_drug_pval.R:
--------------------------------------------------------------------------------
 1 | 
 2 | ##Get drug p value based on CEGscore
 3 | get_drug_pval = function(CEGsum,drug.info){
 4 |   treat_drug_ks = matrix(0,ncol=1,nrow = length(unique(drug.info$drug)))
 5 |   treat_drug_ks = as.data.frame(treat_drug_ks)
 6 |   rownames(treat_drug_ks) = unique(drug.info$drug)
 7 |   colnames(treat_drug_ks) = 'pval'
 8 | 
 9 |   for(i in 1:nrow(treat_drug_ks)){
10 |     indiv_drug = drug.info[(drug.info$drug == rownames(treat_drug_ks)[i]),]$instance_id
11 |     indiv_drug  <-  as.character(indiv_drug)
12 |     indiv_drug_score = CEGsum[indiv_drug]
13 |     rest_score = CEGsum[setdiff(names(CEGsum),indiv_drug)]
14 | 
15 |     ##k-s test: one drug drug.info v.s. other drug.info
16 |     options(warn = -1)
17 |     treat_drug_ks$pval[i] = (ks.test(indiv_drug_score,rest_score,alternative = 'less'))$p.value
18 |   }
19 |   treat_drug_ks$drug = sapply(rownames(treat_drug_ks),function(x){strsplit(x,split="_")[[1]][1]})
20 | 
21 |   return(treat_drug_ks)
22 | }
23 | 
24 | 


--------------------------------------------------------------------------------
/R/get_gene_pval.R:
--------------------------------------------------------------------------------
 1 | 
 2 | ##Beta test for gene p values
 3 | get_gene_pval = function(order, cmap.drug.rank,query.data){
 4 |   geneRank = as.matrix(replicate(ncol(cmap.drug.rank),query.data$geneRank))
 5 |   ##Keep min(x,y) to find the bottom ranked genes
 6 |   if (order == 'min'){
 7 |     order_stat = pmin(as.matrix(cmap.drug.rank),geneRank)
 8 |     p_val = 1 - pbeta((order_stat-1)/nrow(order_stat),1,2,lower.tail = T)
 9 |   }
10 |   else if(order == 'max'){
11 |     order_stat = pmax(as.matrix(cmap.drug.rank),geneRank)
12 |     p_val = pbeta(order_stat/nrow(order_stat),2,1,lower.tail = T)
13 |   }
14 |   return(p_val)
15 | }
16 | 
17 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # **Asgard: A Single-cell Guided pipeline to Aid Repurposing of Drugs**
  2 | 
  3 | Using scRNA-seq data, Asgard repurposes drugs and predicts personalized drug
  4 | combinations to address the cellular heterogeneity of patients. 
  5 | 
  6 | ![image](asgard_pipeline.png)
  7 | 
  8 | ### **Citation**
  9 | 
 10 | > He, B., Xiao, Y., Liang, H. et al. ASGARD is A Single-cell Guided Pipeline to
 11 | Aid Repurposing of Drugs. *Nat Commun* 14, 993 (2023).
 12 | https://doi.org/10.1038/s41467-023-36637-3
 13 | 
 14 | ## **System Requirements**
 15 | 
 16 | ### **Hardware requirements**
 17 | 
 18 | Asgard package requires only a standard computer with enough RAM (>64GB) to
 19 | support the in-memory operations.
 20 | 
 21 | ### **Software requirements**
 22 | 
 23 | The package has been tested on the following systems:
 24 | ```
 25 | Windows 10
 26 | CentOS Linux 7
 27 | ```
 28 | 
 29 | Required R packages:
 30 | ```
 31 | Seurat
 32 | limma
 33 | cmapR
 34 | SingleR
 35 | celldex
 36 | ```
 37 | ## Installation
 38 | #### Install devtools if you don't have it
 39 | ```
 40 | install.packages('devtools')
 41 | ```
 42 | #### Install recommended packages
 43 | ```
 44 | if (!requireNamespace("BiocManager", quietly = TRUE))
 45 |     install.packages("BiocManager")
 46 |     
 47 | BiocManager::install(c("SingleR","limma","cmapR","celldex"))
 48 | 
 49 | install.packages('Seurat')
 50 | 
 51 | #If you can't install a package with above commands, try to download the gz file and install it locally.
 52 | 
 53 | #Take celldex package as an example:
 54 | 
 55 | #Downlaod the source package of celldex in linux
 56 | wget https://bioconductor.org/packages/release/data/experiment/src/contrib/celldex_1.0.0.tar.gz
 57 | 
 58 | #Start R
 59 | R
 60 | 
 61 | #Install celldex from the local source package
 62 | install.packages('celldex_1.0.0.tar.gz')
 63 | 
 64 | #Note: some dependency packages require R version newer than 4.0
 65 | 
 66 | ```
 67 | #### Install Asgard
 68 | ```
 69 | devtools::install_github("lanagarmire/Asgard")
 70 | ```
 71 | #### Load Asgard
 72 | ```
 73 | library('Asgard')
 74 | ```
 75 | #### Docker
 76 | 
 77 | You can run Asgard via Docker. First, install Docker for your platform.
 78 | 
 79 | ```
 80 | docker run --rm -v `pwd`:/home/rstudio/Asgard -p 8787:8787 -it lanagarmire/asgard:1.0.0
 81 | ```
 82 | 
 83 | This will mount the directory that you are currently working in so it is accessible by the Docker container.
 84 | 
 85 | You can then open a browser and navigate to 127.0.0.1:8787, put in "rstudio" as the username and 
 86 | copy the password from the terminal. You will want to change the working directory to "/home/rstudio/Asgard".
 87 | 
 88 | To build the DrugReference, you will need a large amount of RAM (64GB).
 89 | 
 90 | Upon completion, you can press ^C in the terminal to quit the rstudio server.
 91 | 
 92 | ## Prepare Drug Referecne Library
 93 | #### Step 1
 94 | #### Download L1000 Connectivity Map perturbational profiles GSE70138 and GSE92742 from GEO
 95 | <p>Method 1: click file names below </p>
 96 | 
 97 | [GSE70138_Broad_LINCS_cell_info_2017-04-28.txt](https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_cell_info_2017-04-28.txt.gz)
 98 | 
 99 | [GSE70138_Broad_LINCS_Level5_COMPZ_n118050x12328_2017-03-06.gctx](https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_Level5_COMPZ_n118050x12328_2017-03-06.gctx.gz)
100 | 
101 | [GSE70138_Broad_LINCS_sig_info_2017-03-06.txt](https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_sig_info_2017-03-06.txt.gz)
102 | 
103 | [GSE70138_Broad_LINCS_gene_info_2017-03-06.txt](https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_gene_info_2017-03-06.txt.gz)
104 | 
105 | [GSE92742_Broad_LINCS_cell_info.txt](https://ftp.ncbi.nlm.nih.gov/geo/series/GSE92nnn/GSE92742/suppl/GSE92742_Broad_LINCS_cell_info.txt.gz)
106 | 
107 | [GSE92742_Broad_LINCS_Level5_COMPZ.MODZ_n473647x12328.gctx](https://ftp.ncbi.nlm.nih.gov/geo/series/GSE92nnn/GSE92742/suppl/GSE92742_Broad_LINCS_Level5_COMPZ.MODZ_n473647x12328.gctx.gz)
108 | 
109 | [GSE92742_Broad_LINCS_sig_info.txt](https://ftp.ncbi.nlm.nih.gov/geo/series/GSE92nnn/GSE92742/suppl/GSE92742_Broad_LINCS_sig_info.txt.gz)
110 | 
111 | or Method 2: run following commands in linux
112 | ```
113 | wget https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_cell_info_2017-04-28.txt.gz
114 | wget https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_Level5_COMPZ_n118050x12328_2017-03-06.gctx.gz
115 | wget https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_sig_info_2017-03-06.txt.gz
116 | wget https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_gene_info_2017-03-06.txt.gz
117 | wget https://ftp.ncbi.nlm.nih.gov/geo/series/GSE92nnn/GSE92742/suppl/GSE92742_Broad_LINCS_cell_info.txt.gz
118 | wget https://ftp.ncbi.nlm.nih.gov/geo/series/GSE92nnn/GSE92742/suppl/GSE92742_Broad_LINCS_Level5_COMPZ.MODZ_n473647x12328.gctx.gz
119 | wget https://ftp.ncbi.nlm.nih.gov/geo/series/GSE92nnn/GSE92742/suppl/GSE92742_Broad_LINCS_sig_info.txt.gz
120 | ```
121 | #### Step 2 
122 | #### Generate tissue specific drug references from GSE70138 and GSE92742
123 | Unzip downloaded files, revise the Your_local_path and run the following code:
124 | ```
125 | library('Asgard')
126 | 
127 | #Please replace Your_local_path with your real local folder
128 | 
129 | PrepareReference(cell.info="GSE70138_Broad_LINCS_cell_info_2017-04-28.txt",
130 |                  gene.info="GSE70138_Broad_LINCS_gene_info_2017-03-06.txt",
131 |                  GSE70138.sig.info = "GSE70138_Broad_LINCS_sig_info_2017-03-06.txt",
132 |                  GSE92742.sig.info = "GSE92742_Broad_LINCS_sig_info.txt",
133 |                  GSE70138.gctx = "GSE70138_Broad_LINCS_Level5_COMPZ_n118050x12328_2017-03-06.gctx",
134 |                  GSE92742.gctx = "GSE92742_Broad_LINCS_Level5_COMPZ.MODZ_n473647x12328.gctx",
135 |                  Output.Dir = "DrugReference/"
136 | )
137 | 
138 | #Note: the file names here maybe different after unzipping.
139 | #Please note that it takes more than one hour to produce drug references in a standard computer with RAM>64GB.
140 | ```
141 | Please use '?PrepareReference' for more help.
142 | 
143 | ## **Drug Repurposing**
144 | ### **Step 1: Load single-cell RNA-seq data**
145 | 
146 | Download datasets GSE113197 and GSE123926 from GEO before running this script.
147 | 
148 | Human Breast Cancer Epithelial Cells (GSE123926):
149 | [GSE123926_RAW.tar](https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE123926&format=file) 
150 | 
151 | Normal Human Breast Epithelial Cells (GSE113197):
152 | [GSE113197_RAW.tar](https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE113197&format=file) 
153 | 
154 | ```R
155 | library('Seurat')
156 | 
157 | # Load cells' cell type annotations for GSE113197
158 | cell_types_file <- paste0(
159 | 	"https://raw.githubusercontent.com/lanagarmire/"
160 | 	"Single-cell-drug-repositioning/master/Drug/Normal_celltype.txt"
161 | )
162 | cell_types <- read.table(file=celltypes, header=TRUE, check.names=FALSE)
163 | 
164 | # Cell type of interest
165 | cell_types_names <- c(
166 |   	"Luminal_L2_epithelial_cells", "Luminal_L1.1_epithelial_cells", 
167 |     "Luminal_L1.2_epithelial_cells", "Basal_epithelial_cells"
168 | )
169 | 
170 | # Load normal sample Ind5 from GSE113197 dataset 
171 | data <- read.table(file="GSM3099847_Ind5_Expression_Matrix.txt", 
172 |                    header=TRUE, check.names=FALSE)
173 | row.names(data) <- data[, 1]
174 | data <- data[, -1]
175 | ind5_cells <- subset(cell_type, sample=="Ind5" & celltype %in% celltypes_names)
176 | common <- intersect(colnames(data), rownames(ind5_cells))
177 | data <- data[, common]
178 | 
179 | metadata = data.frame(
180 | 	ind5_celltypes,
181 | 	cell = colnames(data),
182 | 	type = "normal"
183 | )
184 | epithelial2 <- CreateSeuratObject(counts=data, project="Epithelial", min.cells=3, 
185 | 								  min.features=200, meta.data=metada)
186 | 
187 | #Load normal sample Ind6 from GSE113197 dataset
188 | data <- read.table(file="GSM3099848_Ind6_Expression_Matrix.txt", header=TRUE,
189 | 				   check.names=FALSE)
190 | row.names(data) <- data[, 1]
191 | data <- data[, -1]
192 | ind6_cells <- subset(celltype,sample=="Ind6" & celltype %in% c("Luminal_L2_epithelial_cells","Luminal_L1.1_epithelial_cells", "Luminal_L1.2_epithelial_cells", "Basal_epithelial_cells"))
193 | common <- intersect(colnames(data), rownames(celltype3))
194 | data<-data[,common]
195 | Epithelial3 <- CreateSeuratObject(counts = data, project = "Epithelial", min.cells = 3, min.features = 200,meta.data=data.frame(celltype3,cell=colnames(data),type="Normal"))
196 | 
197 | #Load normal sample Ind7 from GSE113197 dataset
198 | data<-read.table(file="GSM3099849_Ind7_Expression_Matrix.txt",header = T,check.names=FALSE)
199 | row.names(data)<-data[,1]
200 | data<-data[,-1]
201 | celltype4<-subset(celltype,sample=="Ind7" & celltype %in% c("Luminal_L2_epithelial_cells","Luminal_L1.1_epithelial_cells", "Luminal_L1.2_epithelial_cells", "Basal_epithelial_cells"))
202 | common <- intersect(colnames(data), rownames(celltype4))
203 | data<-data[,common]
204 | Epithelial4 <- CreateSeuratObject(counts = data, project = "Epithelial", min.cells = 3, min.features = 200,meta.data=data.frame(celltype4,cell=colnames(data),type="Normal"))
205 | 
206 | #Load cancer sample PDX110 from GSE123926 dataset
207 | TNBC_PDX.data<- Read10X(data.dir = "GSM3516947_PDX110")
208 | TNBC.PDX2 <- CreateSeuratObject(counts = TNBC_PDX.data, project = "TNBC", min.cells = 3, min.features = 200, meta.data=data.frame(row.names=colnames(TNBC_PDX.data), cell=colnames(TNBC_PDX.data), sample="PDX-110",type="TNBC.PDX"))
209 | 
210 | #Load cancer sample PDX322 from GSE123926 dataset
211 | TNBC_PDX.data<- Read10X(data.dir = "GSM3516948_PDX322")
212 | TNBC.PDX3 <- CreateSeuratObject(counts = TNBC_PDX.data, project = "TNBC", min.cells = 3, min.features = 200, meta.data=data.frame(row.names=colnames(TNBC_PDX.data), cell=colnames(TNBC_PDX.data), sample="PDX-332",type="TNBC.PDX"))
213 | 
214 | 
215 | ```
216 | 
217 | #### Step 2
218 | #### Single-cell alignment
219 | ```R 
220 | SC.list <- list(
221 | 	TNBC.PDX2 = TNBC.PDX2,
222 | 	TNBC.PDX3 = TNBC.PDX3,
223 | 	Epithelial2 = Epithelial2,
224 | 	Epithelial3 = Epithelial3,
225 | 	Epithelial4 = Epithelial4
226 | )
227 | CellCycle = TRUE #Set it TRUE if you want to do Cell Cycle Regression
228 | anchor.features=2000
229 | 
230 | for (i in 1:length(SC.list)) {
231 |     SC.list[[i]] <- NormalizeData(SC.list[[i]], verbose = FALSE)
232 |     SC.list[[i]] <- FindVariableFeatures(SC.list[[i]], selection.method = "vst",
233 |                            nfeatures = anchor.features, verbose = FALSE)
234 | }
235 |     SC.anchors <- FindIntegrationAnchors(object.list = SC.list,anchor.features = anchor.features, dims = 1:15)
236 |     SC.integrated <- IntegrateData(anchorset = SC.anchors, dims = 1:15)
237 |     DefaultAssay(SC.integrated) <- "integrated"
238 |     if (CellCycle) {
239 | 		##Cell Cycle Regression
240 | 		s.genes <- cc.genes$s.genes
241 | 		g2m.genes <- cc.genes$g2m.genes
242 | 		SC.integrated <- CellCycleScoring(SC.integrated, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
243 | 		SC.integrated <- ScaleData(SC.integrated, vars.to.regress = c("S.Score", "G2M.Score"), features = rownames(SC.integrated))
244 | 		SC.integrated <- RunPCA(SC.integrated, npcs = 15, verbose = FALSE)
245 |     }
246 | 	else {
247 | 		##Run the standard workflow for visualization and clustering
248 | 		SC.integrated <- ScaleData(SC.integrated, verbose = FALSE)
249 | 		SC.integrated <- RunPCA(SC.integrated, npcs = 15, verbose = FALSE)
250 |     }
251 |     ##t-SNE and Clustering
252 |     SC.integrated <- RunUMAP(SC.integrated, reduction = "pca", dims = 1:15)
253 |     SC.integrated <- FindNeighbors(SC.integrated, reduction = "pca", dims = 1:15)
254 |     SC.integrated <- FindClusters(SC.integrated, algorithm = 1, resolution = 0.4)
255 | 
256 |     ##Cell Type Annotation, set by.CellType=TRUE if you want to annotate cell  type.
257 |     by.CellType=FALSE
258 |     if(by.CellType == TRUE){
259 |      data <- as.matrix(SC.integrated@assays$RNA@data)
260 |      hpca.se <- HumanPrimaryCellAtlasData()
261 |      pred.hpca <- SingleR(test = data, ref = hpca.se, assay.type.test=1, labels = hpca.se$label.main)
262 |      cell.label <- data.frame(row.names = row.names(pred.hpca),celltype=pred.hpca$labels)
263 |      if(length(SC.integrated@meta.data$celltype)>0){
264 |       SC.integrated@meta.data$celltype <- cell.label$celltype
265 |      }else{
266 |        SC.integrated@meta.data <- cbind(SC.integrated@meta.data,cell.label)
267 |      }
268 |      new.cells <- data.frame()
269 |      for(i in unique(SC.integrated$seurat_clusters)){
270 |       sub.data <- subset(SC.integrated,seurat_clusters==i)
271 |       temp <- table(sub.data@meta.data$celltype)
272 |       best.cell <- names(which(temp==temp[which.max(temp)]))
273 |       cells.temp <- data.frame(cell.id=row.names(sub.data@meta.data),celltype=best.cell)
274 |       new.cells <- rbind(new.cells,cells.temp)
275 |      }
276 |      cell.meta <- SC.integrated@meta.data
277 |      cell.id <- rownames(cell.meta)
278 |      row.names(new.cells) <- new.cells[,1]
279 |      new.cells <- new.cells[cell.id,]
280 |      SC.integrated@meta.data$celltype <- new.cells$celltype
281 |     }else{
282 |      SC.integrated@meta.data$celltype <- paste0("C",as.numeric(SC.integrated@meta.data$seurat_clusters))
283 |     }
284 | 
285 | #Change sample names
286 | sample<-SC.integrated@meta.data$sample
287 | sample[which(sample=="Ind5")]<-"Normal1"
288 | sample[which(sample=="Ind6")]<-"Normal2"
289 | sample[which(sample=="Ind7")]<-"Normal3"
290 | SC.integrated@meta.data$sample<-sample
291 | 
292 | #Visualize alignment result
293 | DimPlot(SC.integrated, reduction = "umap", split.by = "sample",group.by = "celltype")
294 | ```
295 | #### Step 3
296 | #### Single-cell comparison
297 | ```
298 | #Case sample names
299 | Case=c("PDX-110","PDX-332")
300 | 
301 | #Control sample names
302 | Control=c("Normal1","Normal2","Normal3")
303 | 
304 | 
305 | #Get differential gene expression profiles for every cell type (or cluster if without annotation) from Limma
306 | library('limma')
307 | DefaultAssay(SC.integrated) <- "RNA"
308 | set.seed(123456)
309 | Gene.list <- list()
310 | C_names <- NULL
311 | for(i in unique(SC.integrated@meta.data$celltype)){
312 |      Idents(SC.integrated) <- "celltype"
313 |      c_cells <- subset(SC.integrated, celltype == i)
314 |      Idents(c_cells) <- "type"
315 |      Samples=c_cells@meta.data
316 |      Controlsample <- row.names(subset(Samples,sample %in% Control))
317 |      Casesample <- row.names(subset(Samples,sample %in% Case))
318 |      if(length(Controlsample)>min.cells & length(Casesample)>min.cells){
319 |       expr <- as.matrix(c_cells@assays$RNA@data)
320 |       new_expr <- as.matrix(expr[,c(Casesample,Controlsample)])
321 |       new_sample <- data.frame(Samples=c(Casesample,Controlsample),type=c(rep("Case",length(Casesample)),rep("Control",length(Controlsample))))
322 |       row.names(new_sample) <- paste(new_sample$Samples,row.names(new_sample),sep="_")
323 |       expr <- new_expr
324 |       bad <- which(rowSums(expr>0)<3)
325 |       expr <- expr[-bad,]
326 |       mm <- model.matrix(~0 + type, data = new_sample)
327 |       fit <- lmFit(expr, mm)
328 |       contr <- makeContrasts(typeCase - typeControl, levels = colnames(coef(fit)))
329 |       tmp <- contrasts.fit(fit, contrasts = contr)
330 |       tmp <- eBayes(tmp)
331 |       C_data <- topTable(tmp, sort.by = "P",n = nrow(tmp))
332 |       C_data_for_drug <- data.frame(row.names=row.names(C_data),score=C_data$t,adj.P.Val=C_data$adj.P.Val,P.Value=C_data$P.Value)
333 |       Gene.list[[i]] <- C_data_for_drug
334 |       C_names <- c(C_names,i)
335 |      }
336 | }
337 | names(Gene.list) <- C_names
338 | 
339 | #Get differential genes from Seurat (Wilcoxon Rank Sum test)
340 | library('Seurat')
341 | DefaultAssay(SC.integrated) <- "RNA"
342 | set.seed(123456)
343 | Gene.list <- list()
344 | C_names <- NULL
345 | for(i in unique(SC.integrated@meta.data$celltype)){
346 |   Idents(SC.integrated) <- "celltype"
347 |   c_cells <- subset(SC.integrated, celltype == i)
348 |   Idents(c_cells) <- "type"
349 |   C_data <- FindMarkers(c_cells, ident.1 = "TNBC.PDX", ident.2 = "Normal")
350 |   C_data_for_drug <- data.frame(row.names=row.names(C_data),score=C_data$avg_logFC,adj.P.Val=C_data$p_val_adj,P.Value=C_data$p_val) ##for Seurat version > 4.0, please use avg_log2FC instead of avg_logFC
351 |   Gene.list[[i]] <- C_data_for_drug
352 |   C_names <- c(C_names,i)
353 | }
354 | names(Gene.list) <- C_names
355 | 
356 | #Get differential genes from DESeq2 method
357 | library('Seurat')
358 | DefaultAssay(SC.integrated) <- "RNA"
359 | set.seed(123456)
360 | Gene.list <- list()
361 | C_names <- NULL
362 | for(i in unique(SC.integrated@meta.data$celltype)){
363 |   Idents(SC.integrated) <- "celltype"
364 |   c_cells <- subset(SC.integrated, celltype == i)
365 |   Idents(c_cells) <- "type"
366 |   C_data <- FindMarkers(c_cells, ident.1 = "TNBC.PDX", ident.2 = "Normal", test.use = "DESeq2")
367 |   C_data_for_drug <- data.frame(row.names=row.names(C_data),score=C_data$avg_logFC,adj.P.Val=C_data$p_val_adj,P.Value=C_data$p_val) ##for Seurat version > 4.0, please use avg_log2FC instead of avg_logFC
368 |   Gene.list[[i]] <- C_data_for_drug
369 |   C_names <- c(C_names,i)
370 | }
371 | names(Gene.list) <- C_names
372 | 
373 | #Get differential genes from EdgeR
374 | library('edgeR')
375 | Case=c("PDX-110","PDX-332")
376 | Control=c("Normal1","Normal2","Normal3")
377 | DefaultAssay(SC.integrated) <- "RNA"
378 | set.seed(123456)
379 | min.cells=3 # The minimum number of cells for a cell type. A cell type is omitted if it has less cells than the minimum number.
380 | Gene.list <- list()
381 | C_names <- NULL
382 | for(i in unique(SC.integrated@meta.data$celltype)){
383 |   Idents(SC.integrated) <- "celltype"
384 |   c_cells <- subset(SC.integrated, celltype == i)
385 |   Idents(c_cells) <- "type"
386 |   Samples=c_cells@meta.data
387 |   Controlsample <- row.names(subset(Samples,sample %in% Control))
388 |   Casesample <- row.names(subset(Samples,sample %in% Case))
389 |   if(length(Controlsample)>min.cells & length(Casesample)>min.cells){
390 |     expr <- as.matrix(c_cells@assays$RNA@data)
391 |     new_expr <- as.matrix(expr[,c(Casesample,Controlsample)])
392 |     new_sample <- data.frame(Samples=c(Casesample,Controlsample),type=c(rep("Case",length(Casesample)),rep("Control",length(Controlsample))))
393 |     row.names(new_sample) <- paste(new_sample$Samples,row.names(new_sample),sep="_")
394 |     expr <- new_expr
395 |     bad <- which(rowSums(expr>0)<3)
396 |     expr <- expr[-bad,]
397 |     group <- new_sample$type
398 |     dge <- DGEList(counts=expr, group=group)
399 |     group_edgeR <- factor(group,levels = c("Control","Case"))
400 |     design <- model.matrix(~ group_edgeR)
401 |     dge <- estimateDisp(dge, design = design)
402 |     fit <- glmFit(dge, design)
403 |     res <- glmLRT(fit)
404 |     C_data <- res$table
405 |     C_data_for_drug <- data.frame(row.names=row.names(C_data),score=C_data$logFC,adj.P.Val=p.adjust(C_data$PValue,method = "BH"),P.Value=C_data$PValue)
406 |     Gene.list[[i]] <- C_data_for_drug
407 |     C_names <- c(C_names,i)
408 |   }
409 | }
410 | names(Gene.list) <- C_names
411 | ```
412 | 
413 | #### Step 4
414 | #### Mono-drug repurposing for every cell type
415 | ```R
416 | library('Asgard')
417 | 
418 | #Load tissue specific drug reference produced by PrepareReference function as mentioned above. Please select proper tissue accroding to the disease.
419 | my_gene_info<-read.table(file="DrugReference/breast_gene_info.txt",sep="\t",header = T,quote = "")
420 | my_drug_info<-read.table(file="DrugReference/breast_drug_info.txt",sep="\t",header = T,quote = "")
421 | drug.ref.profiles = GetDrugRef(drug.response.path = 'DrugReference/breast_rankMatrix.txt',
422 |                                probe.to.genes = my_gene_info, 
423 |                                drug.info = my_drug_info)
424 | 
425 | #Repurpose mono-drugs for every cell type                               
426 | Drug.ident.res = GetDrug(gene.data = Gene.list, 
427 |                         drug.ref.profiles = drug.ref.profiles, 
428 |                         repurposing.unit = "drug", 
429 |                         connectivity = "negative", 
430 |                         drug.type = "FDA")
431 |                        
432 | ```
433 | Use '?GetDrug' for more help
434 | 
435 | #### Step 5: Estimation of drug score
436 | 
437 | Calculate drug score using information from all or a subset of clusters. Use 
438 | `?DrugScore` for more help.
439 | 
440 | ```R
441 | library('Asgard')
442 | library('Seurat')
443 | 
444 | # Change the following two lines with the paths on your computer
445 | gse92742_gctx_path <- "GSE92742_Broad_LINCS_Level5_COMPZ.MODZ_n473647x12328.gctx"
446 | gse70138_gctx_path <- "GSE70138_Broad_LINCS_Level5_COMPZ_n118050x12328_2017-03-06.gctx"
447 | 
448 | cell_metadata <- SC.integrated@meta.data
449 | cell_metadata$cluster <- SC.integrated@meta.data$celltype
450 | 
451 | Drug.score <- DrugScore(cell_metadata, cluster_degs = Gene.list, 
452 |                         cluster_drugs = Drug.ident.res, tissue = "breast", 
453 |                         case = Case, gse92742_gctx_path = gse92742_gctx_path, 
454 |                         gse70138_gctx_path = gse70138_gctx_path)
455 | ```
456 | 
457 | #### Step 6: Select mono-drug therapies
458 | ```
459 | library('Asgard')
460 | library('Seurat')
461 | 
462 | #Select drug using drug socre
463 | library(Hmisc)
464 | Final.drugs<-subset(Drug.score,Drug.therapeutic.score>quantile(Drug.score$Drug.therapeutic.score, 0.99,na.rm=T) & FDR <0.05)
465 | 
466 | 
467 | #Select drug for individual clusters
468 | Final.drugs<-TopDrug(SC.integrated=SC.integrated,
469 |                    Drug.data=Drug.ident.res,
470 |                    Drug.FDR=0.1,
471 |                    FDA.drug.only=TRUE,
472 |                    Case=Case.samples,
473 |                    DrugScore=FALSE
474 | )
475 | 
476 | ```
477 | #### Step 7 (optional)
478 | #### Drug combination analysis
479 | ```
480 | library('Asgard')
481 | library('Seurat')
482 | 
483 | GSE92742.gctx.path="GSE92742_Broad_LINCS_Level5_COMPZ.MODZ_n473647x12328.gctx"
484 | GSE70138.gctx.path="GSE70138_Broad_LINCS_Level5_COMPZ_n118050x12328_2017-03-06.gctx"
485 | Drug.combinations<-DrugCombination(SC.integrated=SC.integrated,
486 |                       Gene.data=Gene.list,
487 |                       Drug.data=Drug.ident.res,
488 |                       Drug.FDR=0.1,
489 |                       FDA.drug.only=TRUE,
490 |                       Combined.drugs=2,
491 |                       Case=Case,
492 |                       Tissue="breast",
493 |                       GSE92742.gctx=GSE92742.gctx.path,
494 |                       GSE70138.gctx=GSE70138.gctx.path)
495 | ```
496 | Please use '?DrugCombination' for more help.
497 | 
498 | #### Select drug combination therapies
499 | ```
500 | library('Asgard')
501 | Final.combinations<-TopCombination(Drug.combination=Drug.combinations,
502 |                    Combination.FDR=0.1,
503 |                    Min.combination.score=1
504 | )
505 | ```
506 | Demo codes using real datasets are available at: https://github.com/lanagarmire/Single-cell-drug-repositioning
507 | 
508 | If you have further questions or comments, please contact Dr.Bing He: hbing@umich.edu or hebinghb@gmail.com
509 | 


--------------------------------------------------------------------------------
/VERSION.txt:
--------------------------------------------------------------------------------
1 | 1.0.0
2 | 


--------------------------------------------------------------------------------
/asgard_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lanagarmire/Asgard/fc2b60855e90fe231b85723cb5fb9711bb588c66/asgard_pipeline.png


--------------------------------------------------------------------------------
/data/FDA_drug.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lanagarmire/Asgard/fc2b60855e90fe231b85723cb5fb9711bb588c66/data/FDA_drug.rda


--------------------------------------------------------------------------------
/data/L1000_meta.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lanagarmire/Asgard/fc2b60855e90fe231b85723cb5fb9711bb588c66/data/L1000_meta.rda


--------------------------------------------------------------------------------
/man/DrugCombination.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/DrugCombination.R
 3 | \name{DrugCombination}
 4 | \alias{DrugCombination}
 5 | \title{Treatment Efficacy of the Drug Combination.}
 6 | \usage{
 7 | DrugCombination(
 8 |   SC.integrated = SC.data,
 9 |   Gene.data = Gene.list,
10 |   Drug.data = Drug.ident.res,
11 |   Drug.FDR = 0.1,
12 |   FDA.drug.only = TRUE,
13 |   Combined.drugs = 2,
14 |   GSE92742.gctx = NULL,
15 |   GSE70138.gctx = NULL,
16 |   Case = NULL,
17 |   Tissue = "breast"
18 | )
19 | }
20 | \arguments{
21 | \item{SC.integrated}{A Seurat object of aligned single cells from SCalignment function.}
22 | 
23 | \item{Gene.data}{A list of differnential gene expression profiles for every cell type. It's from GetGene function.}
24 | 
25 | \item{Drug.data}{A list of mono-drugs for every cell type. It's from GetDrug function.}
26 | 
27 | \item{Drug.FDR}{The FDR threshold to select drug. The default value is 0.1.}
28 | 
29 | \item{FDA.drug.only}{logical; if TRUE, will only return FDA-approved drugs.}
30 | 
31 | \item{Combined.drugs}{The number of drugs in a combination. The default value is 2.}
32 | 
33 | \item{GSE92742.gctx}{The gctx file contains drug responses from GSE92742 dataset (https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE92742).}
34 | 
35 | \item{GSE70138.gctx}{The gctx file contains drug responses from GSE70138 dataset (https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE70138).}
36 | 
37 | \item{Case}{A vector contains names of case samples.}
38 | 
39 | \item{Tissue}{Reference tissue. If one used lung_rankMatrix.txt in GetDrugRef function, then the Reference tissue is lung.}
40 | }
41 | \value{
42 | A data frame of drug combinations with therapeutics scores and FDR.
43 | }
44 | \description{
45 | It evaluates treatment efficacy to identify drug combinations that can best reverse the target genes’ expression in diseased cells in case samples.
46 | }
47 | \details{
48 | This function evaluates treatment efficacy and ranks drug combinations using therapeutics score, which integrates gene responses to multiple drugs, the proportion of genes, and cells treated by combined drugs.
49 | }
50 | 


--------------------------------------------------------------------------------
/man/DrugScore.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/DrugScore.R
 3 | \name{DrugScore}
 4 | \alias{DrugScore}
 5 | \title{Calculate drug score}
 6 | \usage{
 7 | DrugScore(
 8 |   cell_metadata,
 9 |   cluster_degs,
10 |   cluster_drugs,
11 |   tissue,
12 |   gse70138_gctx_path,
13 |   gse92742_gctx_path,
14 |   clusters = NULL,
15 |   case = NULL,
16 |   fda_drugs_only = TRUE
17 | )
18 | }
19 | \arguments{
20 | \item{cell_metadata}{A data.frame of cell metadata. It must have a column 
21 | named 'cluster' indicating which cluster cells belong, and a column named 
22 | 'sample' indicating which sample cells belong.}
23 | 
24 | \item{cluster_degs}{A list of differential gene expression profiles for 
25 | each cluster.}
26 | 
27 | \item{cluster_drugs}{Drug repurposing result from GetDrug function.}
28 | 
29 | \item{tissue}{Reference tissue. If one used 'lung_rankMatrix.txt' in 
30 | GetDrugRef function, then the Reference tissue is lung. Please use " " 
31 | instead of "-" in tissue name. For example, while 
32 | 'haematopoietic-and-lymphoid-tissue' is the prefix of the drug reference 
33 | files, the corresponding tissue name is "haematopoietic and lymphoid tissue".}
34 | 
35 | \item{gse70138_gctx_path}{The gctx file contains drug responses from GSE70138 
36 | dataset (https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE70138).}
37 | 
38 | \item{gse92742_gctx_path}{The gctx file contains drug responses from GSE92742 
39 | dataset (https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE92742)..}
40 | 
41 | \item{clusters}{Select which clusters (cell types) to be used for drug score 
42 | estimation. By default, it uses all clusters.}
43 | 
44 | \item{case}{A vector containing case sample names.}
45 | 
46 | \item{fda_drugs_only}{logical; if TRUE, will only return FDA-approved drugs, 
47 | else, will return all drugs/compounds.}
48 | }
49 | \value{
50 | A data frame of drug score, P-value and FDR.
51 | }
52 | \description{
53 | The drug score is a comprehensive estimation of drug therapeutic 
54 | effects using all or a selected set of clusters.
55 | }
56 | \details{
57 | This function calculates drug score using cellular proportion of 
58 | clusters, the significance of reversal in DEGs' expressions, and the ratio of 
59 | the reversed genes.
60 | }
61 | 


--------------------------------------------------------------------------------
/man/GetDrug.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/GetDrug.R
 3 | \name{GetDrug}
 4 | \alias{GetDrug}
 5 | \title{Mono-drug Repurposing.}
 6 | \usage{
 7 | GetDrug(
 8 |   gene.data = NULL,
 9 |   drug.ref.profiles = NULL,
10 |   repurposing.unit = "drug",
11 |   CEG.threshold = 0.05,
12 |   connectivity = "negative",
13 |   drug.type = "FDA"
14 | )
15 | }
16 | \arguments{
17 | \item{drug.ref.profiles}{A list contains tissue specific drug reference Profiles from GetDrugRef function.}
18 | 
19 | \item{repurposing.unit}{The parameter of either "treatment" or "drug", which indicates if user want the function to test drug repurposing p value at treatment level or drug level. The default is "treatment", which treats the drug data from different cell lines separately.}
20 | 
21 | \item{CEG.threshold}{The p value threshold to select the consistently differential expressed genes (CEGs). The default value is 0.05.}
22 | 
23 | \item{connectivity}{The type of connectivity, either "negative" or "positive". Negative connectivity is used when the query data is the differential scores from disease data, and it will repurpose drugs that can potentially reverse the query disease phenotype. Positive connectivity is used when the query data is from a drug profile, and it will return the drugs that are similar to the query drug. The default value is "negative".}
24 | 
25 | \item{drug.type}{The parameter of either "FDA" or "compounds" or "all", which indicates if user want the function to identify FDA-approved drugs or compounds or both, respectively.The default value is "FDA".}
26 | }
27 | \value{
28 | A list of mono-drugs for every cell type.
29 | }
30 | \description{
31 | It identify mono-drug therapy for every cell type.
32 | }
33 | \details{
34 | This function allows user to use the differential expression data of every case cell type to query against reference drug response profiles.This function is a reverised version of drug.identification from DrInsight package.
35 | }
36 | 


--------------------------------------------------------------------------------
/man/GetDrugRef.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/GetDrugRef.R
 3 | \name{GetDrugRef}
 4 | \alias{GetDrugRef}
 5 | \title{Load and Process Drug Reference Profiles.}
 6 | \usage{
 7 | GetDrugRef(drug.response.path = NULL, probe.to.genes = NULL, drug.info = NULL)
 8 | }
 9 | \arguments{
10 | \item{drug.response.path}{The local path and the name of the tissue specific drug rank matrix.}
11 | 
12 | \item{probe.to.genes}{A data.frame contains gene IDs (the IDs used in drug rank matrix) and official gene symbol. This files was automately generated with drug rank matrix.}
13 | 
14 | \item{drug.info}{A data.frame contains drug information. This file was automately generated with drug rank matrix.}
15 | }
16 | \description{
17 | This function allows user to load in the tissue specific drug rank matrix.
18 | }
19 | \details{
20 | This function is a reverised version of get.cmap.ref from DrInsight package. The tissue specific drug rank matrix is tranformed from L1000data (GEO: GSE92742 and GSE70138) using PrepareReference function.
21 | }
22 | 


--------------------------------------------------------------------------------
/man/PrepareReference.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/PrepareReference.R
 3 | \name{PrepareReference}
 4 | \alias{PrepareReference}
 5 | \title{Prepare Drug Reference.}
 6 | \usage{
 7 | PrepareReference(
 8 |   cell.info = NULL,
 9 |   gene.info = NULL,
10 |   GSE70138.sig.info = NULL,
11 |   GSE92742.sig.info = NULL,
12 |   GSE70138.gctx = NULL,
13 |   GSE92742.gctx = NULL,
14 |   Output.Dir = "./"
15 | )
16 | }
17 | \arguments{
18 | \item{cell.info}{The local path and the name of the cell.info text file. It's downloaded from https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_cell_info_2017-04-28.txt.gz .}
19 | 
20 | \item{gene.info}{The local path and the name of the gene.info text file. It's downloaded from https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_gene_info_2017-03-06.txt.gz .}
21 | 
22 | \item{GSE70138.sig.info}{The local path and the name of the cell.info text file. It's downloaded from https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_sig_info_2017-03-06.txt.gz .}
23 | 
24 | \item{GSE92742.sig.info}{The local path and the name of the cell.info text file. It's downloaded from https://ftp.ncbi.nlm.nih.gov/geo/series/GSE92nnn/GSE92742/suppl/GSE92742_Broad_LINCS_sig_info.txt.gz .}
25 | 
26 | \item{GSE70138.gctx}{The local path and the name of the cell.info text file. It's downloaded from https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/GSE70138_Broad_LINCS_Level5_COMPZ_n118050x12328_2017-03-06.gctx.gz .}
27 | 
28 | \item{GSE92742.gctx}{The local path and the name of the cell.info text file. It's downloaded from https://ftp.ncbi.nlm.nih.gov/geo/series/GSE92nnn/GSE92742/suppl/GSE92742_Broad_LINCS_Level5_COMPZ.MODZ_n473647x12328.gctx.gz .}
29 | 
30 | \item{Output.Dir}{The output directory for the generated files.}
31 | }
32 | \description{
33 | Prepare tissue specific drug reference Profiles from L1000 drug response data.
34 | }
35 | \details{
36 | This function converts L1000 data to the tissue specific drug rank matrix.
37 | }
38 | 


--------------------------------------------------------------------------------
/man/SCplasticity.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SCplasticity.R
 3 | \name{SCplasticity}
 4 | \alias{SCplasticity}
 5 | \title{Sinlge-cell Plasticity.}
 6 | \usage{
 7 | SCplasticity(SC.integrated = SC.data, Case = NULL)
 8 | }
 9 | \arguments{
10 | \item{SC.integrated}{A Seurat object of aligned single cells from SCalignment function.}
11 | 
12 | \item{Case}{A vector contains names of case samples.}
13 | }
14 | \value{
15 | A data frame of plasticity, normailized plasticity and cell type coverage.
16 | }
17 | \description{
18 | It determines the plasticity of each cell type.
19 | }
20 | \details{
21 | This function estimate the entropy of every cell in the case samples. For each cell type, it use the median entropy value as the plasticity of each cell type.
22 | }
23 | 


--------------------------------------------------------------------------------
/man/TopCombination.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/TopCombination.R
 3 | \name{TopCombination}
 4 | \alias{TopCombination}
 5 | \title{Combination Drug Selection.}
 6 | \usage{
 7 | TopCombination(
 8 |   Drug.combination = Drug.combinations,
 9 |   Combination.FDR = 0.1,
10 |   Min.combination.score = 1
11 | )
12 | }
13 | \arguments{
14 | \item{Drug.combination}{raw drug combination result from DrugCombination function.}
15 | 
16 | \item{Combination.FDR}{The FDR threshold to select drug combination. The default value is 0.1.}
17 | 
18 | \item{Min.combination.score}{The Combination therapeutic score threshold to select drug combination. The default value is 1.}
19 | }
20 | \value{
21 | A data frame of selected drug combinations.
22 | }
23 | \description{
24 | Select drug combinations by combination therapeutic score and FDR of combination therapeutic score.
25 | }
26 | \details{
27 | Input raw drug combination result and return the top drug combinations.
28 | }
29 | 


--------------------------------------------------------------------------------
/man/TopDrug.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/TopDrug.R
 3 | \name{TopDrug}
 4 | \alias{TopDrug}
 5 | \title{Single Drug Selection for Individual Clusters.}
 6 | \usage{
 7 | TopDrug(
 8 |   SC.integrated = SC.data,
 9 |   Drug.data = Drug.ident.res,
10 |   Drug.FDR = 0.1,
11 |   FDA.drug.only = TRUE,
12 |   Case = NULL
13 | )
14 | }
15 | \arguments{
16 | \item{SC.integrated}{A Seurat object of aligned single cells.}
17 | 
18 | \item{Drug.data}{Drug repurosing result from GetDrug function.}
19 | 
20 | \item{Drug.FDR}{The FDR threshold to select drug. The default value is 0.1.}
21 | 
22 | \item{FDA.drug.only}{logical; if TRUE, will only return FDA-approved drugs.}
23 | 
24 | \item{Case}{An vector of case (diseased) samples.Only case sammples are involved in the calculation of coverage.}
25 | }
26 | \value{
27 | A data frame of selected drugs with summary of cell coverage.
28 | }
29 | \description{
30 | Select single drugs for every cell population by FDR and drug type, and summarize cell coverage for selected drugs.
31 | }
32 | \details{
33 | Input raw drug repurosing result and return the top drugs with summary of cell coverage.
34 | }
35 | 


--------------------------------------------------------------------------------
/prep_files.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Check if the target directory is provided as an argument
 4 | if [ "$#" -ne 1 ]; then
 5 |     echo "Usage: $0 /path/to/target/directory"
 6 |     exit 1
 7 | fi
 8 | 
 9 | # Define the directory where you want to download and unzip the files
10 | TARGET_DIR="$1"
11 | 
12 | # Create the directory if it doesn't exist
13 | mkdir -p "$TARGET_DIR"
14 | 
15 | # Change to the target directory
16 | cd "$TARGET_DIR"
17 | 
18 | # URL prefix
19 | URL_PREFIX="https://ftp.ncbi.nlm.nih.gov/geo/series/GSE70nnn/GSE70138/suppl/"
20 | 
21 | # List of files to download
22 | FILES=(
23 |     "GSE70138_Broad_LINCS_cell_info_2017-04-28.txt"
24 |     "GSE70138_Broad_LINCS_Level5_COMPZ_n118050x12328_2017-03-06.gctx"
25 |     "GSE70138_Broad_LINCS_sig_info_2017-03-06.txt"
26 |     "GSE70138_Broad_LINCS_gene_info_2017-03-06.txt"
27 | )
28 | 
29 | # Download and unzip each file
30 | for file in "${FILES[@]}"; do
31 |     # Check if the file already exists
32 |     if [[ ! -f "$file" ]]; then
33 |         wget "${URL_PREFIX}${file}.gz"
34 |         gunzip "$(basename "$file")"
35 |     else
36 |         echo "File $file already exists. Skipping download."
37 |     fi
38 | done
39 | 
40 | URL_PREFIX="https://ftp.ncbi.nlm.nih.gov/geo/series/GSE92nnn/GSE92742/suppl/"
41 | FILES=(
42 |     "GSE92742_Broad_LINCS_cell_info.txt"
43 |     "GSE92742_Broad_LINCS_Level5_COMPZ.MODZ_n473647x12328.gctx"
44 |     "GSE92742_Broad_LINCS_sig_info.txt"
45 | )
46 | 
47 | # Download and unzip each file
48 | for file in "${FILES[@]}"; do
49 |     # Check if the file already exists
50 |     if [[ ! -f "$file" ]]; then
51 |         wget "${URL_PREFIX}${file}.gz"
52 |         gunzip "$(basename "$file")"
53 |     else
54 |         echo "File $file already exists. Skipping download."
55 |     fi
56 | done
57 | 


--------------------------------------------------------------------------------