├── DESCRIPTION ├── NAMESPACE ├── R ├── coexpr.R ├── comparecor.R ├── data.R └── diffcoexp.R ├── README.md ├── data └── gse4158part.RData ├── inst ├── NEWS └── unitTests │ └── test_diffcoexp.R ├── man ├── coexpr.Rd ├── comparecor.Rd ├── diffcoexp.Rd ├── exprs.1.Rd └── exprs.2.Rd ├── tests └── runTests.R └── vignettes └── diffcoexp.Rnw /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: diffcoexp 2 | Title: Differential Co-expression Analysis 3 | Version: 1.21.1 4 | Date: 2023-08-09 5 | Author: Wenbin Wei, Sandeep Amberkar, Winston Hide 6 | Description: A tool for the identification of differentially coexpressed links (DCLs) and differentially coexpressed genes (DCGs). DCLs are gene pairs with significantly different correlation coefficients under two conditions. DCGs are genes with significantly more DCLs than by chance. 7 | Maintainer: Wenbin Wei 8 | Depends: R (>= 3.5), WGCNA, SummarizedExperiment 9 | Imports: stats, DiffCorr, psych, igraph, BiocGenerics 10 | Suggests: GEOquery, RUnit 11 | URL: https://github.com/hidelab/diffcoexp 12 | biocViews: GeneExpression, DifferentialExpression, Transcription, Microarray, OneChannel, TwoChannel, RNASeq, Sequencing, Coverage, ImmunoOncology 13 | License: GPL (>2) 14 | Encoding: UTF-8 15 | LazyData: true 16 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | import(BiocGenerics) 2 | export(coexpr) 3 | export(comparecor) 4 | export(diffcoexp) 5 | importFrom("stats", "p.adjust", "pbinom", "pt") 6 | importFrom(DiffCorr,compcorr) 7 | importFrom(SummarizedExperiment, assays) 8 | importFrom(WGCNA,cor) 9 | importFrom(igraph,graph.data.frame) 10 | importFrom(psych,count.pairwise) 11 | -------------------------------------------------------------------------------- /R/coexpr.R: -------------------------------------------------------------------------------- 1 | #' Identification of gene pairs coexpressed in at least one of two conditions 2 | #' 3 | #' This function identifies gene pairs coexpressed in at least one of two 4 | #' conditions. 5 | #' @param exprs.1 a SummarizedExperiment, data frame or matrix 6 | #' for condition 1, with gene IDs as rownames and sample IDs as column names. 7 | #' @param exprs.2 a SummarizedExperiment, data frame or matrix 8 | #' for condition 2, with gene IDs as rownames and sample IDs as column names. 9 | #' @param rth the cutoff of r; must be within [0,1]. 10 | #' @param qth the cutoff of q-value; must be within [0,1]. 11 | #' @param r.method a character string specifying the method to be used to 12 | #' calculate correlation coefficients. 13 | #' @param q.method a character string specifying the method for adjusting p values. 14 | #' @keywords coexpression 15 | #' @importFrom stats p.adjust pbinom pt 16 | #' @export 17 | #' @return a data frame containing gene pairs that are coexpressed in at least 18 | #' one of the conditions with the criteria that absolute value of 19 | #' correlation coefficient is greater than rth and q value less than qth. It 20 | #' has the following columns: 21 | #' \item{\code{Gene.1}}{Gene ID} 22 | #' \item{\code{Gene.2}}{Gene ID} 23 | #' \item{\code{cor.1}}{correlation coefficients under condition 1} 24 | #' \item{\code{cor.2}}{correlation coefficients under condition 2} 25 | #' \item{\code{cor.diff}}{difference between correlation coefficients under 26 | #' condition 2 and condition 1} 27 | #' \item{\code{p.1}}{p value under null hypothesis that correlation 28 | #' coefficient under condition 1 equals to zero} 29 | #' \item{\code{p.2}}{p value under null hypothesis that correlation 30 | #' coefficient under condition 2 equals to zero} 31 | #' \item{\code{p.diffcor}}{p value under null hypothesis that difference 32 | #' between two correlation coefficients under two conditions equals to zero 33 | #' using Fisher's r-to-Z transformation} 34 | #' \item{\code{q.1}}{adjusted p value under null hypothesis that correlation 35 | #' coefficient under condition 1 equals to zero} 36 | #' \item{\code{q.2}}{adjusted p value under null hypothesis that correlation 37 | #' coefficient under condition 2 equals to zero} 38 | #' \item{\code{q.diffcor}}{adjusted p value under null hypothesis that the 39 | #' difference between two correlation coefficients under two conditions equals 40 | #' to zero using Fisher's r-to-Z transformation} 41 | #' @examples 42 | #' data(gse4158part) 43 | #' allowWGCNAThreads() 44 | #' res=coexpr(exprs.1 = exprs.1, exprs.2 = exprs.2, r.method = "spearman") 45 | #' #The result is a data frames. 46 | #' str(res) 47 | "coexpr"<-function(exprs.1, exprs.2, r.method=c('pearson','spearman')[1], 48 | q.method=c("BH", "holm", "hochberg", "hommel", "bonferroni", "BY", "fdr", 49 | "none")[1], rth=0.5, qth=0.1) { 50 | if (is(exprs.1, "SummarizedExperiment")) { 51 | exprs.1<- assays(exprs.1)[[1]] 52 | } 53 | if (is(exprs.2, "SummarizedExperiment")) { 54 | exprs.2<- assays(exprs.2)[[1]] 55 | } 56 | exprs.1<-exprs.1[!is.na(rownames(exprs.1)), ] 57 | exprs.1<-exprs.1[rownames(exprs.1) != "", ] 58 | exprs.2<-exprs.2[!is.na(rownames(exprs.2)), ] 59 | exprs.2<-exprs.2[rownames(exprs.2) != "", ] 60 | if(!all(rownames(exprs.1)==rownames(exprs.2))) { 61 | stop("Rownames of two expression matrices must be the same!") 62 | } 63 | x<-comparecor(exprs.1, exprs.2, r.method=r.method) 64 | if (!is.null(x)) { 65 | message("Finished running comparecor.") 66 | } 67 | x<-subset(x, subset=( (abs(x$cor.1) > rth & x$q.1 < qth) | 68 | (abs(x$cor.2) > rth & x$q.2 < qth)) ) 69 | return(x) 70 | } 71 | -------------------------------------------------------------------------------- /R/comparecor.R: -------------------------------------------------------------------------------- 1 | #' Compare gene-gene correlation coefficients under two conditions 2 | #' 3 | #' This function calculates correlation coefficients of all gene pairs under 4 | #' two conditions and compare them using Fisher's Z-transformation. 5 | #' @param exprs.1 a SummarizedExperiment, data frame or matrix 6 | #' for condition 1, with gene IDs as rownames and sample IDs as column names. 7 | #' @param exprs.2 a SummarizedExperiment, data frame or matrix 8 | #' for condition 2, with gene IDs as rownames and sample IDs as column names. 9 | #' @param r.method a character string specifying the method to be used to 10 | #' calculate correlation coefficients. 11 | #' @param q.method a character string specifying the method for adjusting p values. 12 | #' @keywords coexpression 13 | #' @importFrom DiffCorr compcorr 14 | #' @importFrom WGCNA cor 15 | #' @importFrom psych count.pairwise 16 | #' @return a data frame containing the differences between the correlation 17 | #' coefficients under two consitions and their p values. It has the following 18 | #' columns: 19 | #' \item{\code{Gene.1}}{Gene ID} 20 | #' \item{\code{Gene.2}}{Gene ID} 21 | #' \item{\code{cor.1}}{correlation coefficients under condition 1} 22 | #' \item{\code{cor.2}}{correlation coefficients under condition 2} 23 | #' \item{\code{cor.diff}}{difference between correlation coefficients under 24 | #' condition 2 and condition 1} 25 | #' \item{\code{p.1}}{p value under null hypothesis that correlation 26 | #' coefficient under condition 1 equals to zero} 27 | #' \item{\code{p.2}}{p value under null hypothesis that correlation 28 | #' coefficient under condition 2 equals to zero} 29 | #' \item{\code{p.diffcor}}{p value under null hypothesis that difference 30 | #' between two correlation coefficients under two conditions equals to zero 31 | #' using Fisher's r-to-Z transformation} 32 | #' \item{\code{q.1}}{adjusted p value under null hypothesis that correlation 33 | #' coefficient under condition 1 equals to zero} 34 | #' \item{\code{q.2}}{adjusted p value under null hypothesis that correlation 35 | #' coefficient under condition 2 equals to zero} 36 | #' \item{\code{q.diffcor}}{adjusted p value under null hypothesis that the 37 | #' difference between two correlation coefficients under two conditions equals 38 | #' to zero using Fisher's r-to-Z transformation} 39 | #' @export 40 | #' @examples 41 | #' data(gse4158part) 42 | #' allowWGCNAThreads() 43 | #' res=comparecor(exprs.1 = exprs.1, exprs.2 = exprs.2, r.method = "spearman") 44 | #' #The result is a data frames. 45 | #' str(res) 46 | "comparecor" <-function(exprs.1, exprs.2, r.method=c('pearson','spearman')[1], 47 | q.method=c("BH", "holm", "hochberg", "hommel", "bonferroni", "BY", "fdr", 48 | "none")[1]) { 49 | if (is(exprs.1, "SummarizedExperiment")) { 50 | exprs.1<- assays(exprs.1)[[1]] 51 | } 52 | if (is(exprs.2, "SummarizedExperiment")) { 53 | exprs.2<- assays(exprs.2)[[1]] 54 | } 55 | exprs.1<-exprs.1[!is.na(rownames(exprs.1)), ] 56 | exprs.1<-exprs.1[rownames(exprs.1) != "", ] 57 | exprs.2<-exprs.2[!is.na(rownames(exprs.2)), ] 58 | exprs.2<-exprs.2[rownames(exprs.2) != "", ] 59 | if(!all(rownames(exprs.1)==rownames(exprs.2))) { 60 | stop("Rownames of two expression matrices must be the same!") 61 | } 62 | genes <- rownames(exprs.1) 63 | exprs.1 <- as.matrix(exprs.1) 64 | exprs.2 <- as.matrix(exprs.2) 65 | if(sum(is.na(exprs.1))==0) { 66 | cor.1 <- cor(t(exprs.1), method=r.method, use="all.obs") 67 | n.1 <- ncol(exprs.1) 68 | } else { 69 | cor.1 <- cor(t(exprs.1), method=r.method, use="pairwise.complete.obs") 70 | n.1 <- count.pairwise(t(exprs.1)) 71 | n.1 <- n.1[lower.tri(n.1, diag=FALSE)] 72 | } 73 | 74 | if(sum(is.na(exprs.2))==0) { 75 | cor.2 <- cor(t(exprs.2), method=r.method, use="all.obs") 76 | n.2 <- ncol (exprs.2) 77 | } else { 78 | cor.2 <- cor(t(exprs.2), method=r.method, use="pairwise.complete.obs") 79 | n.2 <- count.pairwise(t(exprs.2)) 80 | n.2 <- n.2[lower.tri(n.2, diag=FALSE)] 81 | } 82 | 83 | cor.1 <- cor.1[lower.tri(cor.1, diag=FALSE)] 84 | cor.2 <- cor.2[lower.tri(cor.2, diag=FALSE)] 85 | rm(exprs.1); rm(exprs.2) 86 | 87 | name.row <- matrix(rep(genes, length(genes)), length(genes), length(genes)) 88 | name.col <- matrix(rep(genes, length(genes)), length(genes), length(genes), 89 | byrow=TRUE) 90 | name.pairs <- matrix(paste(name.row, name.col, sep=','), length(genes), 91 | length(genes)) 92 | name.pairs <- name.pairs[lower.tri(name.pairs, diag=FALSE)] 93 | Gene.1 <- name.row[lower.tri(name.row, diag=FALSE)] 94 | Gene.2 <- name.col[lower.tri(name.col, diag=FALSE)] 95 | names(Gene.1)<-names(Gene.2) <- name.pairs 96 | rm(list=c('name.row', 'name.col')) 97 | p.1 <- r2p(cor.1, n.1) 98 | p.2 <- r2p(cor.2, n.2) 99 | 100 | dc<-compcorr(n.1, cor.1, n.2, cor.2) 101 | res <- data.frame(Gene.1=Gene.1, Gene.2=Gene.2, cor.1 = cor.1, cor.2 = cor.2, 102 | cor.diff=cor.2-cor.1, p.1 = p.1, p.2 = p.2, p.diffcor = dc$pval, 103 | stringsAsFactors =FALSE) 104 | res$q.1<-p.adjust(res$p.1, method=q.method) 105 | res$q.2<-p.adjust(res$p.2, method=q.method) 106 | res$q.diffcor <- p.adjust(res$p.diffcor, method=q.method) 107 | return(res) 108 | } 109 | 110 | r2p<-function(r, n) { 111 | t<-r*sqrt((n-2)/(1-r^2)) 112 | p.value <- 2*pt(-abs(t), n-2) 113 | return(p.value) 114 | } 115 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #' exprs.1 2 | #' 3 | #' expression of 400 genes in 14 samples (GSM94988 to GSM95001) of yeast after pulses 4 | #' 2 g/l glucose, \url{https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE4158}. 5 | #' 6 | #' @format A data frame with 400 genes and 14 samples. 7 | "exprs.1" 8 | 9 | #' exprs.2 10 | #' 11 | #' expression of 400 genes in 14 samples (GSM94988 to GSM95001) of yeast after pulses 12 | #' 0.2 g/l glucose, \url{https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE4158}. 13 | #' 14 | #' @format A data frame with 400 genes and 12 samples. 15 | "exprs.2" 16 | -------------------------------------------------------------------------------- /R/diffcoexp.R: -------------------------------------------------------------------------------- 1 | #modified from DCe function of DCGL package 2 | #' Differential co-expression analysis 3 | #' 4 | #' This function identifies differentially coexpressed links (DCLs) and 5 | #' differentially coexpressed genes (DCGs). 6 | #' @param exprs.1 a SummarizedExperiment, data frame or matrix 7 | #' for condition 1, with gene IDs as rownames and sample IDs as column names. 8 | #' @param exprs.2 a SummarizedExperiment, data frame or matrix 9 | #' for condition 2, with gene IDs as rownames and sample IDs as column names. 10 | #' @param rth the cutoff of r; must be within [0,1]. 11 | #' @param qth the cutoff of q-value (adjusted p value); must be within [0,1]. 12 | #' @param r.diffth the cutoff of absolute value of the difference between the 13 | #' correlation coefficients of the two conditions; must be within [0,1]. 14 | #' @param q.diffth the cutoff of q-value (adjusted p value) of the difference 15 | #' between the correlation coefficients of the two conditions; must be 16 | #' within [0,1]. 17 | #' @param q.dcgth the cutoff of q-value (adjusted p value) of the genes 18 | #' enriched in the differentilly correlated gene pairs between the two 19 | #' conditions; must be within [0,1]. 20 | #' @param r.method a character string specifying the method to be used to 21 | #' calculate correlation coefficients. 22 | #' @param q.method a character string specifying the method for adjusting p 23 | #' values. 24 | #' @keywords coexpression 25 | #' @importFrom igraph graph.data.frame 26 | #' @export 27 | #' @return a list of two data frames. 28 | #' 29 | #' The DCGs data frame contains genes that contribute to differentially 30 | #' correlated links (gene pairs) with q value less than q.dcgth. 31 | #' It has the following columns: 32 | #' \item{\code{Gene}}{Gene ID} 33 | #' \item{\code{CLs}}{Number of links with absolute correlation 34 | #' coefficient greater than rth and q value less than qth in at least one 35 | #' condition} 36 | #' \item{\code{DCLs}}{Number of links that meet the criteria for CLs and the 37 | #' criteria that the absolute value of the difference between the correlation coefficients 38 | #' in the two condition is greater than r.diffth and q value less than q.diffth} 39 | #' \item{\code{DCL.same}}{Number of subset of DCLs with same signed 40 | #' correlation coefficients in both conditions} 41 | #' \item{\code{DCL.diff}}{Number of subset of DCLs with oppositely signed 42 | #' correlation coefficients under two conditions but only one of them has 43 | #' absolute correlation coefficient greater than rth and q value less than qth} 44 | #' \item{\code{DCL.switch}}{Number of subset of DCLs with oppositely signed 45 | #' correlation coefficients under two conditions and both of them have 46 | #' absolute correlation coefficient greater than rth and q value less than qth} 47 | #' \item{\code{p}}{p value of having >=DCLs given CLs} 48 | #' \item{\code{q}}{adjusted p value} 49 | #' 50 | #' The DCLs data frame contains the differentially correlated links (gene pairs) 51 | #' that meet the criteria that at least one of their correlation coefficients 52 | #' (cor.1 and/or cor.2) is greater than rth with q value (q.1 and/or q.2) less 53 | #' than qth and the absolute value of the difference between the correlation 54 | #' coefficients under two conditions (cor.diff) is greater than r.diffth with 55 | #' q.diffcor less than q.diffth. It has the following columns: 56 | #' \item{\code{Gene.1}}{Gene ID} 57 | #' \item{\code{Gene.2}}{Gene ID} 58 | #' \item{\code{cor.1}}{correlation coefficients under condition 1} 59 | #' \item{\code{cor.2}}{correlation coefficients under condition 2} 60 | #' \item{\code{cor.diff}}{difference between correlation coefficients under 61 | #' condition 2 and condition 1} 62 | #' \item{\code{p.1}}{p value under null hypothesis that correlation 63 | #' coefficient under condition 1 equals to zero} 64 | #' \item{\code{p.2}}{p value under null hypothesis that correlation 65 | #' coefficient under condition 2 equals to zero} 66 | #' \item{\code{p.diffcor}}{p value under null hypothesis that difference 67 | #' between two correlation coefficients under two conditions equals to zero 68 | #' using Fisher's r-to-Z transformation} 69 | #' \item{\code{q.1}}{adjusted p value under null hypothesis that correlation 70 | #' coefficient under condition 1 equals to zero} 71 | #' \item{\code{q.2}}{adjusted p value under null hypothesis that correlation 72 | #' coefficient under condition 2 equals to zero} 73 | #' \item{\code{q.diffcor}}{adjusted p value under null hypothesis that the 74 | #' difference between two correlation coefficients under two conditions equals 75 | #' to zero using Fisher's r-to-Z transformation} 76 | #' \item{\code{type}}{can have value "same signed", "diff signed", or 77 | #' "switched opposites". "same signed" indicates that the gene pair has same 78 | #' signed correlation coefficients under both conditions. "diff signed" 79 | #' indicates that the gene pair has oppositely signed correlation coefficients 80 | #' under two conditions and only one of them meets the criteria that 81 | #' absolute correlation coefficient is greater than rth and q value less than qth. 82 | #' "switched opposites" indicates that the gene pair has oppositely signed 83 | #' correlation coefficients under two conditions and both of them meet the 84 | #' criteria that absolute correlation coefficient is greater than rth and q 85 | #' value less than qth.} 86 | #' @details diffcoexp function identifies differentially coexpressed links 87 | #' (DCLs) and differentially coexpressed genes (DCGs). DCLs are gene pairs with 88 | #' significantly different correlation coefficients under two conditions (de la 89 | #' Fuente 2010, Jiang et al., 2016). DCGs are genes with significantly more DCLs 90 | #' than by chance (Yu et al., 2011, Jiang et al., 2016). It takes two gene 91 | #' expression matrices or data frames under two conditions as input, calculates 92 | #' gene-gene correlations under two conditions and compare them with Fisher's Z 93 | #' transformation, filter the correlation with the rth and qth and the 94 | #' correlation changes with r.diffth and q.diffth. It identifies DCGs using 95 | #' binomial probability model (Jiang et al., 2016). 96 | #' 97 | #' The main steps are as follows: 98 | #' 99 | #' a). Correlation coefficients and p values of all gene pairs under two 100 | #' conditions are calculated. 101 | #' 102 | #' b). The difference between the correlation coefficients under two conditions 103 | #' are calculated and the p value is calculated using Fisher's Z-transformation. 104 | #' 105 | #' c). p values are adjusted. 106 | #' 107 | #' d). Gene pairs (links) coexpressed in at least one condition are identified 108 | #' using the criteria that at least one of the correlation coefficients under 109 | #' two conditions has absolute value greater than the threshold rth and 110 | #' adjusted p value less than the threshold qth. The links that meet the 111 | #' criteria are included in CLs. 112 | #' 113 | #' e). Differentially coexpressed gene pairs (links) are identified from CLs 114 | #' using the criteria that the absolute value of the difference between the two 115 | #' correlation coefficients is greater than the threshold r.diffth and the 116 | #' adjusted p value is less than the threshold q.diffth. The links that meet 117 | #' the criteria are included in DCLs. 118 | #' 119 | #' f). The DCLs are classified into three categories: "same signed", 120 | #' "diff signed", or "switched opposites". "same signed" indicates that the gene 121 | #' pair has same signed correlation coefficients under both conditions. 122 | #' "diff signed" indicates that the gene pair has oppositely signed correlation 123 | #' coefficients under two conditions and only one of them meets the criteria 124 | #' that absolute correlation coefficient is greater than the threshold rth 125 | #' and adjusted p value less than the threshold qth. "switched opposites" 126 | #' indicates that the gene pair has oppositely signed correlation coefficients 127 | #' under two conditions and both of them meet the criteria that absolute 128 | #' correlation coefficient is greater than the threshold rth and adjusted p 129 | #' value less than the threshold qth. 130 | #' 131 | #' g). All the genes in DCLs are tested for their enrichment of DCLs, i.e, 132 | #' whether they have more DCLs than by chance using binomial probability model 133 | #' (Jiang et al., 2016). Those with adjusted p value less than the threshold 134 | #' q.dcgth are included in DCGs. 135 | #' @author Wenbin Wei 136 | #' @references 137 | #' 1. de la Fuente A. From "differential expression" to "differential networking" 138 | #' - identification of dysfunctional regulatory networks in diseases. Trends in 139 | #' Genetics. 2010 Jul;26(7):326-33. 140 | #' 141 | #' 2. Jiang Z, Dong X, Li Z-G, He F, Zhang Z. Differential Coexpression Analysis 142 | #' Reveals Extensive Rewiring of Arabidopsis Gene Coexpression in Response to 143 | #' Pseudomonas syringae Infection. Scientific Reports. 2016 Dec;6(1):35064. 144 | #' 145 | #' 3. Yu H, Liu B-H, Ye Z-Q, Li C, Li Y-X, Li Y-Y. Link-based quantitative 146 | #' methods to identify differentially coexpressed genes and gene pairs. BMC 147 | #' bioinformatics. 2011;12(1):315. 148 | #' @examples 149 | #' data(gse4158part) 150 | #' allowWGCNAThreads() 151 | #' res=diffcoexp(exprs.1 = exprs.1, exprs.2 = exprs.2, r.method = "spearman") 152 | #' #The results are a list of two data frames, one for differentially co-expressed 153 | #' #links (DCLs, gene pairs) and one for differentially co-expressed genes (DCGs). 154 | #' str(res) 155 | "diffcoexp" <- 156 | function(exprs.1, exprs.2, r.method = c("pearson", "kendall", "spearman")[1], 157 | q.method = c( 158 | "BH", "holm", "hochberg", "hommel", "bonferroni", "BY", "fdr", 159 | "none" 160 | )[1], rth = 0.5, qth = 0.1, r.diffth = 0.5, q.diffth = 0.1, q.dcgth = 0.1) { 161 | if (is(exprs.1, "SummarizedExperiment")) { 162 | exprs.1 <- assays(exprs.1)[[1]] 163 | } 164 | if (is(exprs.2, "SummarizedExperiment")) { 165 | exprs.2 <- assays(exprs.2)[[1]] 166 | } 167 | exprs.1 <- exprs.1[!is.na(rownames(exprs.1)), ] 168 | exprs.1 <- exprs.1[rownames(exprs.1) != "", ] 169 | exprs.2 <- exprs.2[!is.na(rownames(exprs.2)), ] 170 | exprs.2 <- exprs.2[rownames(exprs.2) != "", ] 171 | if (!all(rownames(exprs.1) == rownames(exprs.2))) { 172 | stop("Rownames of two expression matrices must be the same!") 173 | } 174 | if (length(rownames(exprs.1)) == 0 | length(rownames(exprs.2)) == 0) { 175 | stop("The expression matrices must have row names specifying the gene names.") 176 | } 177 | if (min(ncol(exprs.1), ncol(exprs.2)) < 3) { 178 | stop("Each expression matrix must have at least three or more columns.") 179 | } else if (min(ncol(exprs.1), ncol(exprs.2)) < 5) { 180 | warning("The minimum number of columns is less than five and the result 181 | may not be reliable.") 182 | } 183 | 184 | m <- nrow(exprs.1) 185 | genes <- rownames(exprs.1) 186 | 187 | colinks <- coexpr(exprs.1, exprs.2, r.method = r.method, rth = rth, qth = qth) 188 | if (!is.null(colinks)) { 189 | message("Finished running coexpr.") 190 | } 191 | 192 | if (nrow(colinks) == 0) { 193 | Result <- emptyresult() 194 | return(Result) 195 | } 196 | 197 | # colinks$cor.diff<-colinks$cor.2-colinks$cor.1 198 | ############################################################# 199 | ## decide three sets of correlation pairs and organize them into two-columned matrices. 200 | ############################################################# 201 | idx.same <- (colinks$cor.1 * colinks$cor.2) > 0 202 | idx.same[is.na(idx.same)] <- TRUE 203 | idx.diff <- (colinks$cor.1 * colinks$cor.2) < 0 204 | idx.diff[is.na(idx.diff)] <- FALSE 205 | idx.switched <- (colinks$cor.1 * colinks$cor.2 < 0) & 206 | (abs(colinks$cor.1) >= rth & abs(colinks$cor.2) >= rth & 207 | colinks$q.1 < qth & colinks$q.2 < qth) 208 | idx.switched[is.na(idx.switched)] <- FALSE 209 | 210 | cor.same <- colinks[idx.same, ] 211 | cor.switched <- colinks[idx.switched, ] 212 | cor.diff <- colinks[idx.diff & (!idx.switched), ] 213 | 214 | name.same <- NULL 215 | name.switched <- NULL 216 | name.diff <- NULL 217 | 218 | ############################################################# 219 | ## Determine DCLs from same sign correlation pairs 220 | ############################################################# 221 | n.sameDCL <- 0 222 | if (nrow(cor.same) > 1) { 223 | idx.DCL.same <- cor.same$q.diffcor < q.diffth & 224 | abs(cor.same$cor.diff) > r.diffth 225 | DCL.same <- cor.same[idx.DCL.same, ] 226 | name.same <- DCL.same[, c("Gene.1", "Gene.2")] 227 | n.sameDCL <- nrow(DCL.same) 228 | } else { 229 | DCL.same <- NULL 230 | } 231 | 232 | ############################################################# 233 | ## Determine DCLs from different sign correlation pairs 234 | ############################################################# 235 | n.diffDCL <- 0 236 | if (nrow(cor.diff) > 1) { 237 | idx.DCL.diff <- cor.diff$q.diffcor < q.diffth & 238 | abs(cor.diff$cor.diff) > r.diffth 239 | DCL.diff <- cor.diff[idx.DCL.diff, ] 240 | name.diff <- DCL.diff[, c("Gene.1", "Gene.2")] 241 | n.diffDCL <- nrow(DCL.diff) 242 | } else { 243 | DCL.diff <- NULL 244 | } 245 | 246 | ############################################################################# 247 | ## Determine Switched DCLs if they exist 248 | ############################################################################# 249 | n.switchedDCL <- 0 250 | if (nrow(cor.switched) > 1) { 251 | idx.DCL.switched <- cor.switched$q.diffcor < q.diffth & 252 | abs(cor.switched$cor.diff) > r.diffth 253 | DCL.switched <- cor.switched[idx.DCL.switched, ] 254 | name.switched <- DCL.switched[, c("Gene.1", "Gene.2")] 255 | n.switchedDCL <- nrow(DCL.switched) 256 | } else { 257 | DCL.switched <- NULL 258 | } 259 | 260 | n.DCL <- n.sameDCL + n.diffDCL + n.switchedDCL 261 | message(nrow(colinks), " gene pairs remain after half thresholding.") 262 | if (n.DCL == 0) { 263 | message("No DCL meets the thresholds!") 264 | Result <- emptyresult() 265 | return(Result) 266 | } else { 267 | message(n.DCL, " DCLs identified.") 268 | } 269 | name.DCL <- rbind(name.same, name.diff, name.switched) 270 | 271 | #################################### 272 | ## colinks 273 | #################################### 274 | name.colinks <- colinks[, c("Gene.1", "Gene.2")] 275 | g.colinks <- igraph::graph.data.frame(name.colinks) 276 | g.colinks.name <- as.matrix(igraph::V(g.colinks)$name) 277 | degree.colinks <- igraph::degree(g.colinks) 278 | 279 | ##################################### 280 | ## DCLs 281 | ##################################### 282 | g.DCL <- igraph::graph.data.frame(name.DCL) 283 | g.DCL.name <- as.matrix(igraph::V(g.DCL)$name) 284 | degree.DCL <- igraph::degree(g.DCL) 285 | 286 | ###################################### 287 | ## DCLs of same sign 288 | ###################################### 289 | if (n.sameDCL > 0) { 290 | g.same <- igraph::graph.data.frame(name.same) 291 | g.same.name <- as.matrix(igraph::V(g.same)$name) 292 | degree.same <- as.matrix(igraph::degree(g.same)) 293 | } else { 294 | degree.same <- matrix(0, 1, 1) 295 | } 296 | 297 | ######################################## 298 | ## DCLs of different sign 299 | ######################################## 300 | if (n.diffDCL > 0) { 301 | g.diff <- igraph::graph.data.frame(name.diff) 302 | g.diff.name <- as.matrix(igraph::V(g.diff)$name) 303 | degree.diff <- as.matrix(igraph::degree(g.diff)) 304 | } else { 305 | degree.diff <- matrix(0, 1, 1) 306 | } 307 | 308 | ####################################### 309 | ## DCLs of switched correlation 310 | ####################################### 311 | if (n.switchedDCL > 0) { 312 | g.switch <- igraph::graph.data.frame(name.switched) 313 | g.switch.name <- as.matrix(igraph::V(g.switch)$name) 314 | degree.switch <- as.matrix(igraph::degree(g.switch)) 315 | } else { 316 | degree.switch <- matrix(0, 1, 1) 317 | } 318 | 319 | ####################################### 320 | ## Numbers for DCLs of different type. 321 | ####################################### 322 | degree.bind <- data.frame(matrix(0, m, 5), stringsAsFactors = FALSE) 323 | row.names(degree.bind) <- genes 324 | colnames(degree.bind) <- c("CLs", "DCLs", "DCL.same", "DCL.diff", "DCL.switched") 325 | 326 | degree.bind[g.colinks.name, 1] <- degree.colinks 327 | degree.bind[g.DCL.name, 2] <- degree.DCL 328 | if (n.sameDCL > 0) { 329 | degree.bind[g.same.name, 3] <- degree.same 330 | } 331 | if (n.diffDCL > 0) { 332 | degree.bind[g.diff.name, 4] <- degree.diff 333 | } 334 | if (n.switchedDCL > 0) { 335 | degree.bind[g.switch.name, 5] <- degree.switch 336 | } 337 | 338 | ######################################################## 339 | ## DCGs Identification 340 | ######################################################## 341 | prob <- nrow(name.DCL) / nrow(name.colinks) 342 | p.value <- pbinom(degree.bind[, "DCLs"] - 1, degree.bind[, "CLs"], prob, 343 | lower.tail = FALSE, log.p = FALSE 344 | ) 345 | q.value <- p.adjust(p.value, method = q.method) 346 | degree.bind <- cbind(degree.bind, p.value, q.value) 347 | colnames(degree.bind) <- c("CLs", "DCLs", "DCL.same", "DCL.diff", "DCL.switch", "p", "q") 348 | DCGs <- degree.bind 349 | DCGs <- as.data.frame(DCGs) 350 | DCGs <- subset(DCGs, subset = q < q.dcgth) 351 | DCGs <- cbind(Gene = as.character(rownames(DCGs)), DCGs) 352 | DCGs$Gene <- as.character(DCGs$Gene) 353 | o <- order(DCGs$p) 354 | DCGs <- DCGs[o, ] 355 | message(length(DCGs$Gene), " DCGs identified.") 356 | 357 | ######################################################### 358 | DCLs <- data.frame() 359 | if (n.sameDCL > 0) { 360 | DCLs <- rbind(DCLs, data.frame(DCL.same, type = "same signed")) 361 | } 362 | 363 | if (n.diffDCL > 0) { 364 | DCLs <- rbind(DCLs, data.frame(DCL.diff, type = "diff signed")) 365 | } 366 | 367 | if (n.switchedDCL > 0) { 368 | DCLs <- rbind(DCLs, data.frame(DCL.switched, type = "switched opposites")) 369 | } 370 | 371 | DCLs$Gene.1 <- as.character(DCLs$Gene.1) 372 | DCLs$Gene.2 <- as.character(DCLs$Gene.2) 373 | 374 | Result <- list(DCGs = DCGs, DCLs = DCLs) 375 | return(Result) 376 | } 377 | 378 | "emptyresult" <- function() { 379 | DCGs <- matrix(0, 0, 8) 380 | colnames(DCGs) <- c( 381 | "Gene", "CLs", "DCLs", "DCL.same", "DCL.diff", 382 | "DCL.switched", "p", "q" 383 | ) 384 | DCGs <- as.data.frame(DCGs) 385 | DCLs <- matrix(0, 0, 12) 386 | colnames(DCLs) <- c( 387 | "Gene.1", "Gene.2", "cor.1", "cor.2", "p.1", "p.2", 388 | "p.diffcor", "q.1", "q.2", "q.diffcor", "cor.diff", "type" 389 | ) 390 | DCLs <- as.data.frame(DCLs, stringsAsFactors = FALSE) 391 | Result <- list(DCGs = DCGs, DCLs = DCLs) 392 | return(Result) 393 | } 394 | 395 | #The results of diffcoexp can be further analysed using DRsort fucntion of 396 | #DCGL package 397 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | diffcoexp 2 | ========= 3 | Differential coexpression analysis 4 | 5 | ##### Wenbin Wei, Sandeep Amberkar, Winston Hide, Aug 9, 2023 6 | 7 | ## 1. Description 8 | 9 | This package identifies differentially coexpressed links (DCLs) and differentially coexpressed genes (DCGs). DCLs are gene pairs with significantly different correlation coefficients under two conditions (de la Fuente 2010, Jiang et al., 2016). DCGs are genes with significantly more DCLs than by chance (Yu et al., 2011, Jiang et al., 2016). It takes two gene expression matrices or data frames under two conditions as input, calculates gene-gene correlations under two conditions and compares them with Fisher's Z transformation(Fisher 1915 and Fisher 1921). It filters gene pairs with the thresholds for correlation coefficients and their adjusted p value as well as the thresholds for the difference between the two correlation coefficients and its adjusted p value. It identifies DCGs using binomial probability model (Jiang et al., 2016). 10 | 11 | The main steps are as follows: 12 | 13 | a). Correlation coefficients and p values of all gene pairs under two conditions are calculated. 14 | 15 | b). The differences between the correlation coefficients under two conditions are calculated and their p values are calculated using Fisher's Z-transformation. 16 | 17 | c). p values are adjusted. 18 | 19 | d). Gene pairs (links) coexpressed in at least one condition are identified using the criteria that at least one of the correlation coefficients under two conditions has absolute value greater than the threshold *rth* and adjusted p value less than the threshold *qth*. The links that meet the criteria are included in co-expressed links (CLs). 20 | 21 | e). Differentially coexpressed links (gene pairs) are identified from CLs using the criteria that the absolute value of the difference between the two correlation coefficients is greater than the threshold *r.diffth* and its adjusted p value is less than the threshold *q.diffth*. The links that meet the criteria are included in DCLs. 22 | 23 | f). The DCLs are classified into three categories: *same signed*, *diff signed*, or *switched opposites*. *same signed* indicates that the gene pair has same signed correlation coefficients under both conditions. *diff signed* indicates that the gene pair has oppositely signed correlation coefficients under two conditions and only one of them meets the criteria that absolute correlation coefficient is greater than the threshold *rth* and adjusted p value less than the threshold *qth*. *switched opposites* indicates that the gene pair has oppositely signed correlation coefficients under two conditions and both of them meet the criteria that absolute correlation coefficient is greater than the threshold *rth* and adjusted p value less than the threshold *qth*. 24 | 25 | g). All the genes in DCLs are tested for their enrichment of DCLs, i.e, whether they have more DCLs than by chance using binomial probability model (Jiang et al., 2016). Those with adjusted p value less than the threshold *q.dcgth* are included in DCGs. 26 | 27 | ## 2. Installation and removal 28 | This package is available from Bioconductor and can be 29 | installed within R as follows: 30 | ```R 31 | ## try http:// if https:// URLs are not supported 32 | if (!requireNamespace("BiocManager", quietly=TRUE)) 33 | install.packages("BiocManager") 34 | BiocManager::install("diffcoexp") 35 | ``` 36 | To install this package from GitHub, start R and enter: 37 | ```R 38 | library(devtools) 39 | devtools::install_git("git://github.com/hidelab/diffcoexp.git", branch = "master") 40 | ``` 41 | The above method does not build and install vignette. To install the package with vignette, enter the following from command line: 42 | ``` 43 | git clone https://github.com/hidelab/diffcoexp.git 44 | R CMD build diffcoexp 45 | R CMD check diffcoexp_1.11.1.tar.gz 46 | R CMD INSTALL diffcoexp_1.11.1.tar.gz 47 | ``` 48 | To remove this package, start R and enter: 49 | ```R 50 | remove.packages("diffcoexp") 51 | ``` 52 | 53 | ## 3. Input and output of *diffcoexp* function 54 | The main function of this package is *diffcoexp* function. The first two arguments, *exprs.1* and *exprs.2*, are normalized gene expression data under two conditions with rows as genes and columns as samples. They should be objects of classes *SummarizedExperiment*, *data.frame* or *matrix*. Both should have the same number of genes in the same order. The third argument *r.method* is passed to the *cor* function of the *WGCNA* package as argument *method*, details of which can be found by typing 55 | ```R 56 | help(cor, WGCNA) 57 | ``` 58 | The fourth argument *q.method* is passed to the *p.adjust* function of the *stats* package as argument *method*, details of which can be found by typing 59 | ```R 60 | help(p.adjust, stats) 61 | ``` 62 | Details of other arguments of *diffcoexp* function can be found by typing 63 | ```R 64 | help(diffcoexp, diffcoexp) 65 | ``` 66 | The output of *diffcoexp* function is a list of two data frames, one for differentially co-expressed links (DCLs), the other for differentially co-expressed genes (DCGs). Further details of the output can be seen on the help page. 67 | 68 | ## 4. Analysis and interpretation of DCGs and DCLs 69 | DCGs are a list of genes and therefore can be further analysed using other tools such as FGNet (https://bioconductor.org/packages/release/bioc/html/FGNet.html), clusterProfiler (https://bioconductor.org/packages/release/bioc/html/clusterProfiler.html) and enrichr (http://amp.pharm.mssm.edu/Enrichr/). DCLs are a list of differentially co-expressed gene pairs and can be assembled into a differential coexpression network. The network is scale-free but not smallworld (Hsu et al., 2017). The network can be visualized and analyzed using igraph (https://cran.r-project.org/web/packages/igraph/index.html). DCLs can also be further analyzed to identify upstream causal regulators using other tools such as DCGL v2.0 (Yang et al., 2013). 70 | 71 | ## 5. Example 72 | 73 | This example illustrates the workflow of downloading gene expression data from GEO and identifying differentially coexpressed links (DCLs) and differentially coexpressed genes (DCGs). 74 | 75 | ```R 76 | library(GEOquery) 77 | gse4158 <- getGEO("GSE4158") 78 | exprs<-exprs(gse4158[[1]]) 79 | keep<-rowSums(is.na(exprs)) < ncol(exprs)/5 80 | exprs<-exprs[keep,] 81 | dim(exprs) 82 | GPL3415<-getGEO("GPL3415") 83 | exprs<-data.frame(ID=rownames(exprs), exprs) 84 | exprs<-merge(GPL3415@dataTable@table, exprs, by.x="ID", by.y="ID") 85 | colnames(exprs) 86 | exprs<-exprs[, c(7, 11:36)] 87 | exprs<-aggregate(exprs[, -1], by=list(Gene=exprs$ORF), FUN=mean, na.action = na.omit) 88 | rownames(exprs)<-exprs$Gene 89 | exprs<-exprs[, -1] 90 | ``` 91 | Analysis of all the genes (6104) will take about 20 minutes on a computer with 8 cores and 16GB RAM. 92 | ```R 93 | exprs.1<-exprs[, c(1:14)] 94 | exprs.2<-exprs[, c(15:26)] 95 | library(diffcoexp) 96 | allowWGCNAThreads() 97 | res=diffcoexp(exprs.1 = exprs.1, exprs.2 = exprs.2, r.method = "spearman" ) 98 | ``` 99 | The results are a list of two data frames, one for differentially co-expressed links (DCLs, gene pairs), the other for differentially co-expressed genes (DCGs). 100 | ```R 101 | str(res) 102 | sessionInfo() 103 | ``` 104 | ## References 105 | de la Fuente A (2010). From “differential expression” to “differential networking” – 106 | identification of dysfunctional regulatory networks in diseases. *Trends in Genetics*, 26(7):326-33. 107 | 108 | Fisher, R. A. (1915). Frequency distribution of the values of the correlation coefficient in samples of an indefinitely large population. *Biometrika*, 10 (4): 507–521. 109 | 110 | Fisher, R. A. (1921). On the 'probable error' of a coefficient of correlation deduced from a small sample. *Metron*, 1: 3–32. 111 | 112 | Hsu C-L, Juan H-F, Huang H-C (2015). Functional analysis and characterization of differential coexpression networks. *Scientific Reports*, 5: 13295 113 | 114 | Jiang Z, Dong X, Li Z-G, He F, Zhang Z (2016). Differential coexpression analysis reveals extensive rewiring of Arabidopsis gene coexpression in response to Pseudomonas syringae infection. *Scientific Reports*, 6(1):35064. 115 | 116 | Yang J, Yu H, Liu B-H, Zhao Z, Liu L, Ma L-X, et al. (2013) DCGL v2.0: An R package for unveiling differential regulation from differential co-expression. *PLoS ONE*, 8(11):e79729. 117 | 118 | Yu H, Liu B-H, Ye Z-Q, Li C, Li Y-X, Li Y-Y (2011). Link-based quantitative methods to identify differentially coexpressed genes and gene pairs. *BMC bioinformatics*, 12(1):315. 119 | -------------------------------------------------------------------------------- /data/gse4158part.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hidelab/diffcoexp/5d8427d7e8c09351a12c9c652f6676d686ba8914/data/gse4158part.RData -------------------------------------------------------------------------------- /inst/NEWS: -------------------------------------------------------------------------------- 1 | CHANGES IN VERSION 0.99.2 2 | ------------------------- 3 | 4 | SIGNIFICANT USER-VISIBLE CHANGES 5 | 6 | o example data exprs.1 and exprs.2 are represented as matrices 7 | 8 | BUG FIXES 9 | 10 | o this package imports rather than depends on the following packages: stats, DiffCorr, psych, igraph, BiocGenerics 11 | o messages are generated using message() instead of print() function. 12 | o use is() function to test inheritance relationships between an object and a class. 13 | o format NEWS file so that utils::news() parses the file. 14 | 15 | CHANGES IN VERSION 0.99.1 16 | ------------------------- 17 | 18 | SIGNIFICANT USER-VISIBLE CHANGES 19 | 20 | o diffcoexp(), coexpr(), and comparecor() accept SummarizedExperiment objects. 21 | 22 | CHANGES IN VERSION 0.99.0 23 | ------------------------- 24 | 25 | SIGNIFICANT USER-VISIBLE CHANGES 26 | 27 | o this package was given version number 0.99.0 and submitted to Bioconductor. 28 | -------------------------------------------------------------------------------- /inst/unitTests/test_diffcoexp.R: -------------------------------------------------------------------------------- 1 | #library(SummarizedExperiment) 2 | #library(diffcoexp) 3 | #library(RUnit) 4 | test_diffcoexp <- function() { 5 | data(gse4158part) 6 | allowWGCNAThreads() 7 | exprs.1<-SummarizedExperiment(list(exprs.1=as.matrix(exprs.1))) 8 | exprs.2<-SummarizedExperiment(list(exprs.1=as.matrix(exprs.2))) 9 | res=diffcoexp(exprs.1 = exprs.1, exprs.2 = exprs.2, r.method = "spearman") 10 | checkEquals(nrow(res$DCGs), 15) 11 | checkEquals(ncol(res$DCGs), 8) 12 | checkEquals(nrow(res$DCLs), 363) 13 | checkEquals(ncol(res$DCLs), 12) 14 | } 15 | 16 | test_coexpr <- function() { 17 | data(gse4158part) 18 | allowWGCNAThreads() 19 | exprs.1<-SummarizedExperiment(list(exprs.1=as.matrix(exprs.1))) 20 | exprs.2<-SummarizedExperiment(list(exprs.1=as.matrix(exprs.2))) 21 | res=coexpr(exprs.1 = exprs.1, exprs.2 = exprs.2, r.method = "spearman") 22 | checkEquals(nrow(res), 13179) 23 | checkEquals(ncol(res), 11) 24 | } 25 | 26 | test_comparecor <- function() { 27 | data(gse4158part) 28 | allowWGCNAThreads() 29 | exprs.1<-SummarizedExperiment(list(exprs.1=as.matrix(exprs.1))) 30 | exprs.2<-SummarizedExperiment(list(exprs.1=as.matrix(exprs.2))) 31 | res=comparecor(exprs.1 = exprs.1, exprs.2 = exprs.2, r.method = "spearman") 32 | checkEquals(nrow(res), 79800) 33 | checkEquals(ncol(res), 11) 34 | } 35 | -------------------------------------------------------------------------------- /man/coexpr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/coexpr.R 3 | \name{coexpr} 4 | \alias{coexpr} 5 | \title{Identification of gene pairs coexpressed in at least one of two conditions} 6 | \usage{ 7 | coexpr(exprs.1, exprs.2, r.method = c("pearson", "spearman")[1], 8 | q.method = c("BH", "holm", "hochberg", "hommel", "bonferroni", "BY", "fdr", 9 | "none")[1], rth = 0.5, qth = 0.1) 10 | } 11 | \arguments{ 12 | \item{exprs.1}{a SummarizedExperiment, data frame or matrix for condition 1, with gene IDs as rownames and sample IDs as column names.} 13 | \item{exprs.2}{a SummarizedExperiment, data frame or matrix for condition 2, with gene IDs as rownames and sample IDs as column names.} 14 | \item{r.method}{a character string specifying the method to be used to calculate correlation coefficients. It is passed to the cor function of the WGCNA package.} 15 | \item{q.method}{a character string specifying the method for adjusting p values. It is passed to the p.adjust function of the stats package.} 16 | \item{rth}{the cutoff of absolute value of correlation coefficients; must be within [0,1].} 17 | \item{qth}{the cutoff of q-value (adjusted p value); must be within [0,1].} 18 | } 19 | \value{ 20 | a data frame containing gene pairs that are coexpressed in at least one of the conditions with the criteria that absolute value of correlation coefficient is greater than rth and q value less than qth. It has the following columns: 21 | \item{\code{Gene.1}}{Gene ID} 22 | \item{\code{Gene.2}}{Gene ID} 23 | \item{\code{cor.1}}{correlation coefficients under condition 1} 24 | \item{\code{cor.2}}{correlation coefficients under condition 2} 25 | \item{\code{cor.diff}}{difference between correlation coefficients under condition 2 and condition 1} 26 | \item{\code{p.1}}{p value under null hypothesis that correlation coefficient under condition 1 equals to zero} 27 | \item{\code{p.2}}{p value under null hypothesis that correlation coefficient under condition 2 equals to zero} 28 | \item{\code{p.diffcor}}{p value under null hypothesis that difference between two correlation coefficients under two conditions equals to zero using Fisher's r-to-Z transformation} 29 | \item{\code{q.1}}{adjusted p value under null hypothesis that correlation coefficient under condition 1 equals to zero} 30 | \item{\code{q.2}}{adjusted p value under null hypothesis that correlation coefficient under condition 2 equals to zero} 31 | \item{\code{q.diffcor}}{adjusted p value under null hypothesis that the difference between two correlation coefficients under two conditions equals to zero using Fisher's r-to-Z transformation} 32 | } 33 | \description{ 34 | This function identifies gene pairs coexpressed in at least one of two conditions. 35 | } 36 | \examples{ 37 | data(gse4158part) 38 | allowWGCNAThreads() 39 | res=coexpr(exprs.1 = exprs.1, exprs.2 = exprs.2, r.method = "spearman") 40 | #The result is a data frames. 41 | str(res) 42 | } 43 | \keyword{coexpression} 44 | -------------------------------------------------------------------------------- /man/comparecor.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/comparecor.R 3 | \name{comparecor} 4 | \alias{comparecor} 5 | \title{Compare gene-gene correlation coefficients under two conditions} 6 | \usage{ 7 | comparecor(exprs.1, exprs.2, r.method = c("pearson", "spearman")[1], 8 | q.method = c("BH", "holm", "hochberg", "hommel", "bonferroni", "BY", "fdr", 9 | "none")[1]) 10 | } 11 | \arguments{ 12 | \item{exprs.1}{a SummarizedExperiment, data frame or matrix for condition 1, with gene IDs as rownames and sample IDs as column names.} 13 | \item{exprs.2}{a SummarizedExperiment, data frame or matrix for condition 2, with gene IDs as rownames and sample IDs as column names.} 14 | \item{r.method}{a character string specifying the method to be used to calculate correlation coefficients. It is passed to the cor function of the WGCNA package.} 15 | \item{q.method}{a character string specifying the method for adjusting p values. It is passed to the p.adjust function of the stats package.} 16 | } 17 | \value{ 18 | a data frame containing the differences between the correlation coefficients under two consitions and their p values. It has the following columns: 19 | \item{\code{Gene.1}}{Gene ID} 20 | \item{\code{Gene.2}}{Gene ID} 21 | \item{\code{cor.1}}{correlation coefficients under condition 1} 22 | \item{\code{cor.2}}{correlation coefficients under condition 2} 23 | \item{\code{cor.diff}}{difference between correlation coefficients under condition 2 and condition 1} 24 | \item{\code{p.1}}{p value under null hypothesis that correlation coefficient under condition 1 equals to zero} 25 | \item{\code{p.2}}{p value under null hypothesis that correlation coefficient under condition 2 equals to zero} 26 | \item{\code{p.diffcor}}{p value under null hypothesis that difference between two correlation coefficients under two conditions equals to zero using Fisher's r-to-Z transformation} 27 | \item{\code{q.1}}{adjusted p value under null hypothesis that correlation coefficient under condition 1 equals to zero} 28 | \item{\code{q.2}}{adjusted p value under null hypothesis that correlation coefficient under condition 2 equals to zero} 29 | \item{\code{q.diffcor}}{adjusted p value under null hypothesis that the difference between two correlation coefficients under two conditions equals to zero using Fisher's r-to-Z transformation} 30 | } 31 | \description{ 32 | This function calculates correlation coefficients of all gene pairs under two conditions and compare them using Fisher's Z-transformation. 33 | } 34 | \examples{ 35 | data(gse4158part) 36 | allowWGCNAThreads() 37 | res=comparecor(exprs.1 = exprs.1, exprs.2 = exprs.2, r.method = "spearman") 38 | #The result is a data frames. 39 | str(res) 40 | } 41 | \keyword{coexpression} 42 | -------------------------------------------------------------------------------- /man/diffcoexp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/diffcoexp.R 3 | \name{diffcoexp} 4 | \alias{diffcoexp} 5 | \title{Differential co-expression analysis} 6 | \usage{ 7 | diffcoexp(exprs.1, exprs.2, r.method = c("pearson", "kendall", "spearman")[1], 8 | q.method = c("BH", "holm", "hochberg", "hommel", "bonferroni", "BY", "fdr", 9 | "none")[1], rth = 0.5, qth = 0.1, r.diffth = 0.5, q.diffth = 0.1, 10 | q.dcgth = 0.1) 11 | } 12 | \arguments{ 13 | \item{exprs.1}{a SummarizedExperiment, data frame or matrix for condition 1, with gene IDs as rownames and sample IDs as column names.} 14 | \item{exprs.2}{a SummarizedExperiment, data frame or matrix for condition 2, with gene IDs as rownames and sample IDs as column names.} 15 | \item{r.method}{a character string specifying the method to be used to calculate correlation coefficients. It is passed to the cor function of the WGCNA package.} 16 | \item{q.method}{a character string specifying the method for adjusting p values. It is passed to the p.adjust function of the stats package.} 17 | \item{rth}{the cutoff of absolute value of correlation coefficients; must be within [0,1].} 18 | \item{qth}{the cutoff of q-value (adjusted p value); must be within [0,1].} 19 | \item{r.diffth}{the cutoff of absolute value of the difference between the correlation coefficients of the two conditions; must be within [0,1].} 20 | \item{q.diffth}{the cutoff of q-value (adjusted p value) of the difference between the correlation coefficients of the two conditions; must be within [0,1].} 21 | \item{q.dcgth}{the cutoff of q-value (adjusted p value) of the genes enriched in the differentilly correlated gene pairs between the two conditions; must be within [0,1].} 22 | } 23 | \value{ 24 | a list of two data frames. 25 | 26 | The DCGs data frame contains genes that contribute to differentially correlated links (gene pairs) with q value less than q.dcgth. It has the following columns: 27 | \item{\code{Gene}}{Gene ID} 28 | \item{\code{CLs}}{Number of links with absolute correlation coefficient greater than rth and q value less than qth in at least one condition} 29 | \item{\code{DCLs}}{Number of links that meet the criteria for CLs and the criteria that absolute difference between the correlation coefficients of the two condition is greater than r.diffth and q value less than q.diffth} 30 | \item{\code{DCL.same}}{Number of subset of DCLs with same signed correlation coefficients in both conditions} 31 | \item{\code{DCL.diff}}{Number of subset of DCLs with oppositely signed correlation coefficients under two conditions but only one of them has absolute correlation coefficient greater than rth and q value less than qth} 32 | \item{\code{DCL.switch}}{Number of subset of DCLs with oppositely signed correlation coefficients under two conditions and both of them have absolute correlation coefficient greater than rth and q value less than qth} 33 | \item{\code{p}}{p value of having >=DCLs given CLs} 34 | \item{\code{q}}{adjusted p value} 35 | 36 | The DCLs data frame contains the differentially correlated links (gene pairs) that meet the criteria that at least one of their correlation coefficients (cor.1 and/or cor.2) is greater than rth with q value (q.1 and/or q.2) less than qth and the absolute value of the difference between the correlation coefficients under two conditions (cor.diff) is greater than r.diffth with q.diffcor less than q.diffth. It has the following columns: 37 | \item{\code{Gene.1}}{Gene ID} 38 | \item{\code{Gene.2}}{Gene ID} 39 | \item{\code{cor.1}}{correlation coefficients under condition 1} 40 | \item{\code{cor.2}}{correlation coefficients under condition 2} 41 | \item{\code{cor.diff}}{difference between correlation coefficients under condition 2 and condition 1} 42 | \item{\code{p.1}}{p value under null hypothesis that correlation coefficient under condition 1 equals to zero} 43 | \item{\code{p.2}}{p value under null hypothesis that correlation coefficient under condition 2 equals to zero} 44 | \item{\code{p.diffcor}}{p value under null hypothesis that difference between two correlation coefficients under two conditions equals to zero using Fisher's r-to-Z transformation} 45 | \item{\code{q.1}}{adjusted p value under null hypothesis that correlation coefficient under condition 1 equals to zero} 46 | \item{\code{q.2}}{adjusted p value under null hypothesis that correlation coefficient under condition 2 equals to zero} 47 | \item{\code{q.diffcor}}{adjusted p value under null hypothesis that the difference between two correlation coefficients under two conditions equals to zero using Fisher's r-to-Z transformation} 48 | \item{\code{type}}{can have value "same signed", "diff signed", or "switched opposites". "same signed" indicates that the gene pair has same signed correlation coefficients under both conditions. "diff signed" indicates that the gene pair has oppositely signed correlation coefficients under two conditions and only one of them meets the criteria that absolute correlation coefficient is greater than rth and q value less than qth. "switched opposites" indicates that the gene pair has oppositely signed correlation coefficients under two conditions and both of them meet the criteria that absolute correlation coefficient is greater than rth and q value less than qth.} 49 | } 50 | \description{ 51 | This function identifies differentially coexpressed links (DCLs) and differentially coexpressed genes (DCGs). 52 | } 53 | \details{ 54 | diffcoexp function identifies differentially coexpressed links (DCLs) and differentially coexpressed genes (DCGs). DCLs are gene pairs with significantly different correlation coefficients under two conditions (de la Fuente 2010, Jiang et al., 2016). DCGs are genes with significantly more DCLs than by chance (Yu et al., 2011, Jiang et al., 2016). It takes two gene expression matrices or data frames under two conditions as input, calculates gene-gene correlations under two conditions and compare them with Fisher's Z transformation, filter the correlation with the rth and qth and the correlation changes with r.diffth and q.diffth. It identifies DCGs using binomial probability model (Jiang et al., 2016). 55 | 56 | The main steps are as follows: 57 | 58 | a). Correlation coefficients and p values of all gene pairs under two conditions are calculated. 59 | 60 | b). The difference between the correlation coefficients under two conditions are calculated and the p value is calculated using Fisher's Z-transformation. 61 | 62 | c). p values are adjusted. 63 | 64 | d). Gene pairs (links) coexpressed in at least one condition are identified using the criteria that at least one of the correlation coefficients under two conditions has absolute value greater than the threshold rth and adjusted p value less than the threshold qth. The links that meet the criteria are included in CLs. 65 | 66 | e). Differentially coexpressed gene pairs (links) are identified from CLs using the criteria that the absolute value of the difference between the two correlation coefficients is greater the threshold r.diffth and adjusted p value is less than the threshold q.diffth. The links that meet the criteria are included in DCLs. 67 | 68 | f). The DCLs are classified into three categories: "same signed", "diff signed", or "switched opposites". "same signed" indicates that the gene pair has same signed correlation coefficients under both conditions. "diff signed" indicates that the gene pair has oppositely signed correlation coefficients under two conditions and only one of them meets the criteria that absolute correlation coefficient is greater than the threshold rth and adjusted p value less than the threshold qth. "switched opposites" indicates that the gene pair has oppositely signed correlation coefficients under two conditions and both of them meet the criteria that absolute correlation coefficient is greater than the threshold rth and adjusted p value less than the threshold qth. 69 | 70 | g). All the genes in DCLs are tested for their enrichment of DCLs, i.e, whether they have more DCLs than by chance using binomial probability model (Jiang et al., 2016). Those with adjusted p value less than the threshold q.dcgth are included in DCGs. 71 | } 72 | \examples{ 73 | data(gse4158part) 74 | allowWGCNAThreads() 75 | res=diffcoexp(exprs.1 = exprs.1, exprs.2 = exprs.2, r.method = "spearman") 76 | #The results are a list of two data frames, one for differentially co-expressed 77 | #links (DCLs, gene pairs) and one for differentially co-expressed genes (DCGs). 78 | str(res) 79 | } 80 | \references{ 81 | 1. de la Fuente A. From "differential expression" to "differential networking" - identification of dysfunctional regulatory networks in diseases. Trends in Genetics. 2010 Jul;26(7):326-33. 82 | 83 | 2. Jiang Z, Dong X, Li Z-G, He F, Zhang Z. Differential Coexpression Analysis Reveals Extensive Rewiring of Arabidopsis Gene Coexpression in Response to Pseudomonas syringae Infection. Scientific Reports. 2016 Dec;6(1):35064. 84 | 85 | 3. Yu H, Liu B-H, Ye Z-Q, Li C, Li Y-X, Li Y-Y. Link-based quantitative methods to identify differentially coexpressed genes and gene pairs. BMC bioinformatics. 2011;12(1):315. 86 | } 87 | \author{ 88 | Wenbin Wei 89 | } 90 | \keyword{coexpression} 91 | -------------------------------------------------------------------------------- /man/exprs.1.Rd: -------------------------------------------------------------------------------- 1 | \docType{data} 2 | \name{exprs.1} 3 | \alias{exprs.1} 4 | \title{exprs.1} 5 | \format{A matrix with 400 genes and 14 samples.} 6 | \usage{ 7 | exprs.1 8 | } 9 | \description{ 10 | expression of 400 genes in 14 samples (GSM94988 to GSM95001) of yeast after pulses 11 | 2 g/l glucose, \url{https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE4158}. 12 | } 13 | \keyword{datasets} 14 | -------------------------------------------------------------------------------- /man/exprs.2.Rd: -------------------------------------------------------------------------------- 1 | \docType{data} 2 | \name{exprs.2} 3 | \alias{exprs.2} 4 | \title{exprs.2} 5 | \format{A matrix with 400 genes and 12 samples.} 6 | \usage{ 7 | exprs.2 8 | } 9 | \description{ 10 | expression of 400 genes in 14 samples (GSM94988 to GSM95001) of yeast after pulses 11 | 0.2 g/l glucose, \url{https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE4158}. 12 | } 13 | \keyword{datasets} 14 | -------------------------------------------------------------------------------- /tests/runTests.R: -------------------------------------------------------------------------------- 1 | BiocGenerics:::testPackage("diffcoexp") 2 | -------------------------------------------------------------------------------- /vignettes/diffcoexp.Rnw: -------------------------------------------------------------------------------- 1 | %\VignetteIndexEntry{About diffcoexp} 2 | \documentclass{article} 3 | \usepackage{authblk} 4 | \title{About \emph{diffcoexp}} 5 | \date{November 23, 2020} 6 | \author{Wenbin Wei, Sandeep Amberkar, Winston Hide} 7 | \affil{Sheffield Institute of Translational Neuroscience, University of Sheffield, Sheffield, United Kingdom} 8 | \usepackage[utf8]{inputenc} 9 | \begin{document} 10 | \SweaveOpts{concordance=TRUE} 11 | \maketitle 12 | \tableofcontents 13 | \section{Description} 14 | This package identifies differentially coexpressed links (DCLs) and differentially 15 | coexpressed genes (DCGs). DCLs are gene pairs with significantly different correlation 16 | coefficients under two conditions (de la Fuente 2010, Jiang et al., 2016). DCGs are genes 17 | with significantly more DCLs than by chance (Yu et al., 2011, Jiang et al., 2016). It 18 | takes two gene expression matrices or data frames under two conditions as input, 19 | calculates gene-gene correlations under two conditions and compares them with Fisher's Z 20 | transformation(Fisher 1915 and Fisher 1921). It filters gene pairs with the thresholds for correlation coefficients 21 | and their adjusted p value as well as the thresholds for the difference between the two 22 | correlation coefficients and its adjusted p value. It identifies DCGs using binomial 23 | probability model (Jiang et al., 2016).\smallskip 24 | 25 | \noindent 26 | The main steps are as follows: 27 | 28 | a). Correlation coefficients and p values of all gene pairs under two conditions are 29 | calculated. 30 | 31 | b). The differences between the correlation coefficients under two conditions are 32 | calculated and their p values are calculated using Fisher's Z-transformation. 33 | 34 | c). p values are adjusted. 35 | 36 | d). Gene pairs (links) coexpressed in at least one condition are identified using the 37 | criteria that at least one of the correlation coefficients under two conditions has 38 | absolute value greater than the threshold \emph{rth} and adjusted p value less than the 39 | threshold \emph{qth}. The links that meet the criteria are included in co-expressed links(CLs). 40 | 41 | e). Differentially coexpressed links (gene pairs) are identified from CLs using the 42 | criteria that the absolute value of the difference between the two correlation 43 | coefficients is greater than the threshold \emph{r.diffth} and adjusted p value is less than 44 | the threshold \emph{q.diffth}. The links that meet the criteria are included in differentially coexpressed links (DCLs). 45 | 46 | f). The DCLs are classified into three categories: \emph{same signed}, \emph{diff signed}, or 47 | \emph{switched opposites}. \emph{same signed} indicates that the gene pair has same signed 48 | correlation coefficients under both conditions. \emph{diff signed} indicates that the gene 49 | pair has oppositely signed correlation coefficients under two conditions and only one of 50 | them meets the criteria that absolute correlation coefficient is greater than the 51 | threshold \emph{rth} and adjusted p value less than the threshold \emph{qth}. \emph{switched opposites} 52 | indicates that the gene pair has oppositely signed correlation coefficients under two 53 | conditions and both of them meet the criteria that absolute correlation coefficient 54 | is greater than the threshold \emph{rth} and adjusted p value less than the threshold \emph{qth}. 55 | 56 | g). All the genes in DCLs are tested for their enrichment of DCLs, i.e, whether they have 57 | more DCLs than by chance using binomial probability model (Jiang et al., 2016). Those 58 | with adjusted p value less than the threshold \emph{q.dcgth} are included in DCGs. 59 | \section{Installation and removal} 60 | \noindent 61 | This package is available from Bioconductor and can be installed within R as follows: \par 62 | \#\# try http:// if https:// URLs are not supported \par 63 | if (!requireNamespace("BiocManager", quietly=TRUE)) \par 64 | install.packages("BiocManager") \par 65 | BiocManager::install("diffcoexp") \smallskip 66 | \par 67 | \noindent 68 | To install this package from GitHub, start R and enter: \par 69 | library(devtools) \par 70 | devtools::install\_git("git://github.com/hidelab/diffcoexp.git", \par 71 | branch = "master") \smallskip 72 | \par 73 | \noindent 74 | The above method does not build and install vignette. To install the package with 75 | vignette, enter the following from command line: \par 76 | \par 77 | git clone https://github.com/hidelab/diffcoexp.git \par 78 | R CMD build diffcoexp \par 79 | R CMD check diffcoexp\_1.11.1.tar.gz \par 80 | R CMD INSTALL diffcoexp\_1.11.1.tar.gz \smallskip 81 | \par 82 | \noindent 83 | To remove this package, start R and enter: \par 84 | \par 85 | @ 86 | remove.packages("diffcoexp") 87 | \section{Input and output of \emph{diffcoexp} function} 88 | The main function of this package is \emph{diffcoexp} function. The first two arguments, \emph{exprs.1} and \emph{exprs.2}, are normalized gene expression data under two conditions with rows as genes and columns as samples. They should be objects of classes \emph{SummarizedExperiment}, \emph{data.frame} or \emph{matrix}. Both should have the same number of genes in the same order. The third argument \emph{r.method} is passed to the \emph{cor} function of the \emph{WGCNA} package as argument \emph{method}, details of which can be found by typing 89 | <>= 90 | help(cor, WGCNA) 91 | @ 92 | The fourth argument \emph{q.method} is passed to the \emph{p.adjust} function of the \emph{stats} package as argument \emph{method}, details of which can be found by typing 93 | <>= 94 | help(p.adjust, stats) 95 | @ 96 | Details of other arguments of \emph{diffcoexp} function can be found by typing 97 | <>= 98 | help(diffcoexp, diffcoexp) 99 | @ 100 | The output of \emph{diffcoexp} function is a list of two data frames, one for differentially co-expressed links (DCLs), the other for differentially co-expressed genes (DCGs). Further details of the output can be seen on the help page. 101 | \section{Analysis and interpretation of DCGs and DCLs} 102 | \noindent 103 | DCGs are a list of genes and therefore can be further analysed using other tools such as FGNet (https://bioconductor.org/packages/release/bioc/html/FGNet.html), clusterProfiler (https://bioconductor.org/packages/release/bioc/html/clusterProfiler.html) and enrichr (http://amp.pharm.mssm.edu/Enrichr/). DCLs are a list of differentially co-expressed gene pairs and can be assembled into a differential coexpression network. The network is scale-free but not smallworld (Hsu et al., 2017). The network can be visualized and analyzed using igraph (https://cran.r-project.org/web/packages/igraph/index.html). DCLs can also be further analyzed to identify upstream causal regulators using other tools such as DCGL v2.0 (Yang et al., 2013). 104 | \section{Example} 105 | This example illustrates the identification of differentially coexpressed links (gene 106 | pairs) and differentially coexpressed genes of yeast after pulses of 2 g/l and 0.2 g/l 107 | glucose separately. The data were downloaded from GEO (GSE4158). Only 400 genes were 108 | analysed in this example. Analysis of all the genes (6104) will take about 20 minutes on 109 | a computer with 8 cores and 16GB RAM. 110 | <>= 111 | library(diffcoexp) 112 | data(gse4158part) 113 | @ 114 | <<>>= 115 | allowWGCNAThreads() 116 | res=diffcoexp(exprs.1 = exprs.1, exprs.2 = exprs.2, r.method = "spearman" ) 117 | @ 118 | The results are a list of two data frames, one for differentially co-expressed links 119 | (DCLs), the other for differentially co-expressed genes (DCGs). 120 | <<>>= 121 | str(res) 122 | @ 123 | \section{References} 124 | de la Fuente A (2010). From “differential expression” to “differential networking” – 125 | identification of dysfunctional regulatory networks in diseases. \emph{Trends in Genetics}, 26(7):326-33.\smallskip 126 | \par 127 | \noindent 128 | Fisher, R. A. (1915). Frequency distribution of the values of the correlation coefficient in samples of an indefinitely large population. \emph{Biometrika}, 10 (4): 507–521. \smallskip 129 | \par 130 | \noindent 131 | Fisher, R. A. (1921). On the 'probable error' of a coefficient of correlation deduced from a small sample. \emph{Metron}, 1: 3–32.\smallskip 132 | \par 133 | \noindent 134 | Hsu C-L, Juan H-F, Huang H-C (2015). Functional analysis and characterization of differential coexpression networks. \emph{Scientific Reports}, 5: 13295\smallskip 135 | \par 136 | \noindent 137 | Jiang Z, Dong X, Li Z-G, He F, Zhang Z (2016). Differential coexpression analysis reveals extensive rewiring of Arabidopsis gene coexpression in response to Pseudomonas syringae infection. \emph{Scientific Reports}, 6(1):35064.\smallskip 138 | \par 139 | \noindent 140 | Yang J, Yu H, Liu B-H, Zhao Z, Liu L, Ma L-X, et al. (2013) DCGL v2.0: An R package for unveiling differential regulation from differential co-expression. \emph{PLoS ONE}, 8(11):e79729.\smallskip 141 | \par 142 | \noindent 143 | Yu H, Liu B-H, Ye Z-Q, Li C, Li Y-X, Li Y-Y (2011). Link-based quantitative methods to identify differentially coexpressed genes and gene pairs. \emph{BMC bioinformatics}, 12(1):315. 144 | 145 | \end{document} 146 | --------------------------------------------------------------------------------