├── MTL.result.collapsed.R ├── README.md ├── add_nearest_gene.R ├── add_rna.R ├── correlation_anova_kmeans.R ├── distance_to_ensembleTSS.R ├── distance_to_ensembleTSS.sh ├── get_csv.R ├── get_venn.R ├── main.sh ├── pairwise_comparison.R ├── peak_size.R ├── shared_peaks.R └── summarize_AUC.R /MTL.result.collapsed.R: -------------------------------------------------------------------------------- 1 | ## add CTCF/Rad21 within 5kb 2 | d <- read.csv('MTL.result.log2.csv',header=T,check.names=F) 3 | t1 <- read.table('MTL.hasCTCF_5kb.bed',header=F) 4 | t2 <- read.table('MTL.hasRad21_5kb.bed',header=F) 5 | d$hasCTCF.5kb <- t1$V1 6 | d$hasRad21.5kb <- t2$V1 7 | write.csv(d,'MTL.result.log2.csv',row.names=F) 8 | 9 | ## collapse MTL table so that each transcript appear once (record only the nearest peak to its TSS) 10 | d <- d[order(d$Distance,decreasing=F),] 11 | d2 <- subset(d, !duplicated(Genes)) 12 | d2 <- d2[order(as.numeric(rownames(d2)),decreasing=F),] 13 | write.csv(d2,'MTL.result.log2.collapsed.csv',row.names=F) 14 | 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ADEpigenetics 2 | Main codes for AD epigenetics paper by Nativio et al. 3 | 4 | Follow main.sh to run the scripts in order. 5 | -------------------------------------------------------------------------------- /add_nearest_gene.R: -------------------------------------------------------------------------------- 1 | library(plyr) 2 | library(data.table) 3 | 4 | mtl <- read.csv('MTL.result.csv',stringsAsFactors = F) 5 | 6 | t <- read.table('multiMTL.anno.txt',header=T,sep="\t",comment.char = "",quote = "",stringsAsFactors = F) 7 | t <- t[,c(2,3,4,10,16)] 8 | colnames(t) <- c("chr","start","end","dist.to.nearest.gene","nearest.gene") 9 | t$start <- t$start-1 10 | mtl <- join(mtl,t) 11 | mtl <- mtl[,c( 1:3,dim(mtl)[2]-1,dim(mtl)[2],4:(dim(mtl)[2]-2) )] 12 | 13 | ## merge coordinate 3-columns to 1 column 14 | mtl$coordinate <- paste0(mtl$chr,":",mtl$start,"-",mtl$end) 15 | mtl <- mtl[, c( dim(mtl)[2],4:(dim(mtl)[2]-1) ) ] 16 | 17 | ## 18 | write.csv(mtl,'MTL.result.csv',row.names = F) 19 | -------------------------------------------------------------------------------- /add_rna.R: -------------------------------------------------------------------------------- 1 | library(plyr) 2 | library(data.table) 3 | 4 | mtl <- read.csv('MTL.result.csv', check.names=F, stringsAsFactors = F) 5 | colnames(mtl) <- c("Locus","Distance","Genes","ID", 6 | "CTCF.InA","CTCF.InO","CTCF.InY","Rad21.InA","Rad21.InO","Rad21.InY", 7 | "H3K27ac.InA","H3K27ac.InO","H3K27ac.InY","H3K9ac.InA","H3K9ac.InO","H3K9ac.InY", 8 | "H3K122ac.InA","H3K122ac.InO","H3K122ac.InY","H3K4me1.InA","H3K4me1.InO","H3K4me1.InY", 9 | "CTCF.HY","CTCF.HO","CTCF.HA","Rad21.HY","Rad21.HO","Rad21.HA", 10 | "H3K27ac.HY","H3K27ac.HO","H3K27ac.HA","H3K9ac.HY","H3K9ac.HO","H3K9ac.HA", 11 | "H3K122ac.HY","H3K122ac.HO","H3K122ac.HA","H3K4me1.HY","H3K4me1.HO","H3K4me1.HA", 12 | "H3K27ac.PHO-HY","H3K27ac.QHO-HY","H3K27ac.HO-HY","H3K27ac.PHA-HY","H3K27ac.QHA-HY","H3K27ac.HA-HY", 13 | "H3K27ac.PHA-HO","H3K27ac.QHA-HO","H3K27ac.HA-HO","H3K9ac.PHO-HY","H3K9ac.QHO-HY","H3K9ac.HO-HY", 14 | "H3K9ac.PHA-HY","H3K9ac.QHA-HY","H3K9ac.HA-HY","H3K9ac.PHA-HO","H3K9ac.QHA-HO","H3K9ac.HA-HO", 15 | "H3K122ac.PHO-HY","H3K122ac.QHO-HY","H3K122ac.HO-HY","H3K122ac.PHA-HY","H3K122ac.QHA-HY","H3K122ac.HA-HY", 16 | "H3K122ac.PHA-HO","H3K122ac.QHA-HO","H3K122ac.HA-HO") 17 | 18 | mtl$Distance <- abs(mtl$Distance) 19 | mtl$Distance.group <- cut(mtl$Distance,breaks=c(0,1000,50000,100000,Inf),include.lowest=T) 20 | levels(mtl$Distance.group) <- c("within.1kb","1-50kb","50-100kb","beyond.100kb") 21 | 22 | mtl$`H3K27ac.HO-HY` <- mtl$H3K27ac.HO - mtl$H3K27ac.HY 23 | mtl$`H3K27ac.HA-HY` <- mtl$H3K27ac.HA - mtl$H3K27ac.HY 24 | mtl$`H3K27ac.HA-HO` <- mtl$H3K27ac.HA - mtl$H3K27ac.HO 25 | mtl$`H3K9ac.HO-HY` <- mtl$H3K9ac.HO - mtl$H3K9ac.HY 26 | mtl$`H3K9ac.HA-HY` <- mtl$H3K9ac.HA - mtl$H3K9ac.HY 27 | mtl$`H3K9ac.HA-HO` <- mtl$H3K9ac.HA - mtl$H3K9ac.HO 28 | mtl$`H3K122ac.HO-HY` <- mtl$H3K122ac.HO - mtl$H3K122ac.HY 29 | mtl$`H3K122ac.HA-HY` <- mtl$H3K122ac.HA - mtl$H3K122ac.HY 30 | mtl$`H3K122ac.HA-HO` <- mtl$H3K122ac.HA - mtl$H3K122ac.HO 31 | 32 | mtl <- subset(mtl,select=c("Locus","Genes","Distance","Distance.group","ID", colnames(mtl)[!(colnames(mtl) %in% c("Locus","Genes","Distance","Distance.group","ID"))] )) 33 | 34 | rna <- read.csv('brain.rna.csv', check.names=F, stringsAsFactors = F) 35 | colnames(rna)[1] <- "Genes" 36 | 37 | mtl <- join(mtl,rna) 38 | 39 | write.csv(mtl,'MTL.result.log2.csv',row.names = F) 40 | 41 | -------------------------------------------------------------------------------- /correlation_anova_kmeans.R: -------------------------------------------------------------------------------- 1 | library(corrplot) 2 | library(ggplot2) 3 | library(reshape2) 4 | library(plyr) 5 | library(data.table) 6 | 7 | auc <- read.table('AUC.txt',sep="\t",header=T,stringsAsFactors = F) 8 | mtl <- read.table('MTL.txt',sep="\t",header=F,stringsAsFactors = F) 9 | mtl$ID <- paste0("MTL_",rownames(mtl)) 10 | rownames(auc) <- mtl$ID 11 | colnames(mtl)[1:6] <- c("chr","start","end","peak.in.AD","peak.in.O","peak.in.Y") 12 | 13 | sample.list <- as.data.frame(colnames(auc)) 14 | colnames(sample.list) <- "sample" 15 | sample.list$group <- as.factor(gsub("\\..*$","",sample.list$sample)) 16 | 17 | NeuFrac <- read.table('NeuFrac.txt',sep="\t",header=T,stringsAsFactors = F) 18 | NeuFrac$sampleID <- gsub("-",".",NeuFrac$sampleID) 19 | 20 | ############################### 21 | 22 | ## spearman's correlation 23 | pdf("correlation.pdf",height=8,width=7) 24 | c <- cor(auc, method = "spearman") 25 | col <- colorRampPalette(c("blue","grey90","red", "grey90", "blue")) 26 | corrplot(c, type = "lower", method = "number", order="hclust", hclust.method = "ward.D", mar=c(0,3,0,0), 27 | tl.cex = 0.5, tl.col = "black", number.cex = 0.4, addshade = "all", 28 | bg="white", cl.lim=c(0,1), col=col(100)) 29 | dev.off() 30 | rm(c,col) 31 | 32 | ############################### 33 | 34 | ## mask peaks with 10% highest pearson corr with NeuFrac 35 | c <- apply(auc, 1, function(x) cor(NeuFrac$percentage, x, method = "pearson") ) 36 | c <- abs(c) 37 | mtl$mask <- as.numeric(c>quantile(c, probs = 0.9)) 38 | 39 | auc.filter <- auc[mtl$mask==0,] 40 | 41 | ## PCA plot of all/top10000 MTLs 42 | pdf("PCA.mask.pdf",height=8,width=8) 43 | #all MTLs 44 | pc <- prcomp(t(auc.filter), scale=TRUE) 45 | scores <- as.data.frame(pc$x) 46 | v <- as.integer(100*(pc$sdev)^2/sum(pc$sdev^2)) 47 | for (i in 1:5){ 48 | print( paste0("PC",i," (",v[i],"%): ",cor(NeuFrac$percentage, scores[,i], method = "pearson")) ) 49 | } 50 | ggplot(data = scores, aes(x = PC1, y = PC2, label = rownames(scores), col=sample.list$group)) + 51 | geom_hline(yintercept = 0, colour = "gray65") + 52 | geom_vline(xintercept = 0, colour = "gray65") + 53 | geom_text(alpha = 0.8, size = 2) + 54 | theme_bw() + theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank()) + 55 | theme(legend.position = "none") + 56 | ggtitle("PCA plot of all MTLs") + 57 | xlab(paste0("PC1 (",v[1],"%)")) + 58 | ylab(paste0("PC2 (",v[2],"%)")) 59 | #top10000 MTLs 60 | x <- auc.filter[order(rowSums(auc.filter),decreasing=T),] 61 | x <- x[1:10000,] 62 | pc <- prcomp(t(x), scale=TRUE) 63 | scores <- as.data.frame(pc$x) 64 | v <- as.integer(100*(pc$sdev)^2/sum(pc$sdev^2)) 65 | for (i in 1:5){ 66 | print( paste0("PC",i," (",v[i],"%): ",cor(NeuFrac$percentage, scores[,i], method = "pearson")) ) 67 | } 68 | ggplot(data = scores, aes(x = PC1, y = PC2, label = rownames(scores), col=sample.list$group)) + 69 | geom_hline(yintercept = 0, colour = "gray65") + 70 | geom_vline(xintercept = 0, colour = "gray65") + 71 | geom_text(alpha = 0.8, size = 2) + 72 | theme_bw() + theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank()) + 73 | theme(legend.position = "none") + 74 | ggtitle("PCA plot of top 10000 MTLs") + 75 | xlab(paste0("PC1 (",v[1],"%)")) + 76 | ylab(paste0("PC2 (",v[2],"%)")) 77 | dev.off() 78 | rm(pc,scores,v,x) 79 | 80 | ############################### 81 | 82 | ## PCA plot of all/top10000 MTLs 83 | pdf("PCA.pdf",height=8,width=8) 84 | #all MTLs 85 | pc <- prcomp(t(auc), scale=TRUE) 86 | scores <- as.data.frame(pc$x) 87 | v <- as.integer(100*(pc$sdev)^2/sum(pc$sdev^2)) 88 | for (i in 1:5){ 89 | print( paste0("PC",i," (",v[i],"%): ",cor(NeuFrac$percentage, scores[,i], method = "spearman")) ) 90 | } 91 | ggplot(data = scores, aes(x = PC1, y = PC2, label = rownames(scores), col=sample.list$group)) + 92 | geom_hline(yintercept = 0, colour = "gray65") + 93 | geom_vline(xintercept = 0, colour = "gray65") + 94 | geom_text(alpha = 0.8, size = 2) + 95 | theme_bw() + theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank()) + 96 | theme(legend.position = "none") + 97 | ggtitle("PCA plot of all MTLs") + 98 | xlab(paste0("PC1 (",v[1],"%)")) + 99 | ylab(paste0("PC2 (",v[2],"%)")) 100 | #top10000 MTLs 101 | x <- auc[order(rowSums(auc),decreasing=T),] 102 | x <- x[1:10000,] 103 | pc <- prcomp(t(x), scale=TRUE) 104 | scores <- as.data.frame(pc$x) 105 | v <- as.integer(100*(pc$sdev)^2/sum(pc$sdev^2)) 106 | for (i in 1:5){ 107 | print( paste0("PC",i," (",v[i],"%): ",cor(NeuFrac$percentage, scores[,i], method = "spearman")) ) 108 | } 109 | ggplot(data = scores, aes(x = PC1, y = PC2, label = rownames(scores), col=sample.list$group)) + 110 | geom_hline(yintercept = 0, colour = "gray65") + 111 | geom_vline(xintercept = 0, colour = "gray65") + 112 | geom_text(alpha = 0.8, size = 2) + 113 | theme_bw() + theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank()) + 114 | theme(legend.position = "none") + 115 | ggtitle("PCA plot of top 10000 MTLs") + 116 | xlab(paste0("PC1 (",v[1],"%)")) + 117 | ylab(paste0("PC2 (",v[2],"%)")) 118 | dev.off() 119 | rm(pc,scores,v,x) 120 | 121 | ############################### 122 | 123 | ## anova 124 | for (i in 1:dim(auc)[1]) { 125 | d <- melt(auc[i,]) 126 | colnames(d) <- c("sample","auc") 127 | d <- join(d,sample.list,by="sample") 128 | fit <- lm(auc ~ group, data=d) 129 | fit2 <- anova(fit) 130 | mtl$anova[i] <- fit2$`Pr(>F)`[1] 131 | } 132 | rm(d,i,fit,fit2) 133 | 134 | ## PCA plot of significant MTLs 135 | auc.sig <- auc[mtl$anova<0.05,] 136 | pdf("PCA.sig.pdf",height=8,width=8) 137 | pc <- prcomp(t(auc.sig), scale=TRUE) 138 | scores <- as.data.frame(pc$x) 139 | v <- as.integer(100*(pc$sdev)^2/sum(pc$sdev^2)) 140 | ggplot(data = scores, aes(x = PC1, y = PC2, label = rownames(scores), col=sample.list$group)) + 141 | geom_hline(yintercept = 0, colour = "gray65") + 142 | geom_vline(xintercept = 0, colour = "gray65") + 143 | geom_text(alpha = 0.8, size = 2) + 144 | theme_bw() + theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank()) + 145 | theme(legend.position = "none") + 146 | ggtitle("PCA plot of significant MTLs") + 147 | xlab(paste0("PC1 (",v[1],"%)")) + 148 | ylab(paste0("PC2 (",v[2],"%)")) 149 | dev.off() 150 | rm(pc,scores,v) 151 | 152 | ## k-means for anova.sig.MTLs 153 | pdf("k-means_decide.pdf",height=4,width=6) 154 | wss <- (nrow(auc.sig)-1)*sum(apply(auc.sig,2,var)) 155 | for (i in 2:15) wss[i] <- sum(kmeans(auc.sig,centers=i)$withinss) 156 | plot(1:15, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares") 157 | dev.off() 158 | 159 | bestK <- 10 160 | 161 | set.seed(1) 162 | wss <- kmeans(auc.sig,centers=bestK) 163 | d <- as.data.frame(wss$cluster) 164 | d$ID <- row.names(d) 165 | colnames(d) <- c("k.means","ID") 166 | mtl <- join(mtl,d) 167 | mtl$k.means[is.na(mtl$k.means)] <- 0 #non-sig MTLs were assigned to cluster0 168 | rm(d,wss) 169 | 170 | for (i in 1:bestK){ 171 | t <- mtl[mtl$k.means==i,] 172 | print(c(wilcox.test(t$Y.auc,t$O.auc)$p.value, wilcox.test(t$Y.auc,t$AD.auc)$p.value, wilcox.test(t$O.auc,t$AD.auc)$p.value)) 173 | } 174 | rm(i,t) 175 | 176 | ############################### 177 | 178 | mtl$AD.auc <- rowMeans(auc[,colnames(auc) %like% "AD."]) 179 | mtl$O.auc <- rowMeans(auc[,colnames(auc) %like% "O."]) 180 | mtl$Y.auc <- rowMeans(auc[,colnames(auc) %like% "Y."]) 181 | 182 | ############################### 183 | 184 | d <- melt(mtl,id.vars=c("chr","start","end","peak.in.AD","peak.in.O","peak.in.Y","ID","anova","k.means"), 185 | variable.name = "group", value.name = "ave.AUC") 186 | d <- d[d$k.means!=0,] 187 | d$group <- gsub(".auc","",d$group) 188 | d$group <- factor(d$group,c("Y","O","AD")) 189 | pdf("k-means_boxplot.pdf",height=4,width=8) 190 | ggplot(d,aes(group,ave.AUC,fill=group)) + geom_boxplot() + 191 | facet_wrap(~k.means, nrow = 2, scales = "free") + 192 | theme_bw() + theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank()) + 193 | theme(legend.position = "none") + xlab("") 194 | dev.off() 195 | table(mtl$k.means) 196 | rm(d) 197 | 198 | ############################### 199 | 200 | write.csv(mtl,"MTL.result.csv",row.names = F) 201 | -------------------------------------------------------------------------------- /distance_to_ensembleTSS.R: -------------------------------------------------------------------------------- 1 | mtl <- read.table('t1',header=F) 2 | colnames(mtl) <- 'Distance' 3 | mtl$Distance.group <- cut(mtl$Distance,breaks=c(0,1000,50000,100000,Inf),include.lowest=T) 4 | levels(mtl$Distance.group) <- c("within.1kb","1-50kb","50-100kb","beyond.100kb") 5 | write.csv(mtl,'distance_to_ensembleTSS.csv',row.names=F,quote=F) 6 | 7 | 8 | -------------------------------------------------------------------------------- /distance_to_ensembleTSS.sh: -------------------------------------------------------------------------------- 1 | awk '{FS=OFS="\t"}{if($6=="+")print $1,$2,$2;else print $1,$3,$3}' /project/ibilab/library/annotation/hg19/hg19_ensembl.bed12 | sort -k1,1 -k2,2n | uniq > hg19_TSS.bed 2 | bedtools closest -a multiMTL.bed -b hg19_TSS.bed -d -t first | cut -f7 | sed "s|-1|1000000|g" > t1 3 | R --no-save < log/distance_to_ensembleTSS.R 4 | rm t1 hg19_TSS.bed 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /get_csv.R: -------------------------------------------------------------------------------- 1 | library(corrplot) 2 | library(ggplot2) 3 | library(reshape2) 4 | library(plyr) 5 | library(data.table) 6 | 7 | mtl <- read.table('multiMTL.bed',sep="\t",header=F,stringsAsFactors = F) 8 | mtl$ID <- paste0("MTL_",rownames(mtl)) 9 | colnames(mtl) <- c("chr","start","end","ID") 10 | 11 | in.mtl <- read.table('InMTL.txt',sep="\t",header=T,stringsAsFactors = F) 12 | mtl <- cbind(mtl,in.mtl) 13 | rm(in.mtl) 14 | 15 | auc <- read.table('AUC.txt',sep="\t",header=T,stringsAsFactors = F) 16 | rownames(auc) <- mtl$ID 17 | 18 | sample.list <- as.data.frame(colnames(auc)) 19 | colnames(sample.list) <- "sample" 20 | sample.list$group <- as.factor(gsub("\\..*$","",sample.list$sample)) 21 | sample.list$marker <- as.factor(gsub("^.*\\.","",sample.list$sample)) 22 | 23 | mtl$CTCF.Y.auc <- log2( rowMeans(auc[,colnames(auc) %like% "Y.*.CTCF"]) + 1) 24 | mtl$CTCF.O.auc <- log2( rowMeans(auc[,colnames(auc) %like% "O.*.CTCF"]) + 1) 25 | mtl$CTCF.A.auc <- log2( rowMeans(auc[,colnames(auc) %like% "AD.*.CTCF"]) + 1) 26 | mtl$Rad21.Y.auc <- log2( rowMeans(auc[,colnames(auc) %like% "Y.*.Rad21"]) + 1) 27 | mtl$Rad21.O.auc <- log2( rowMeans(auc[,colnames(auc) %like% "O.*.Rad21"]) + 1) 28 | mtl$Rad21.A.auc <- log2( rowMeans(auc[,colnames(auc) %like% "AD.*.Rad21"]) + 1) 29 | mtl$H3K27ac.Y.auc <- log2( rowMeans(auc[,colnames(auc) %like% "Y.*.H3K27ac"]) + 1) 30 | mtl$H3K27ac.O.auc <- log2( rowMeans(auc[,colnames(auc) %like% "O.*.H3K27ac"]) + 1) 31 | mtl$H3K27ac.A.auc <- log2( rowMeans(auc[,colnames(auc) %like% "AD.*.H3K27ac"]) + 1) 32 | mtl$H3K9ac.Y.auc <- log2( rowMeans(auc[,colnames(auc) %like% "Y.*.H3K9ac"]) + 1) 33 | mtl$H3K9ac.O.auc <- log2( rowMeans(auc[,colnames(auc) %like% "O.*.H3K9ac"]) + 1) 34 | mtl$H3K9ac.A.auc <- log2( rowMeans(auc[,colnames(auc) %like% "AD.*.H3K9ac"]) + 1) 35 | mtl$H3K122ac.Y.auc <- log2( rowMeans(auc[,colnames(auc) %like% "Y.*.H3K122ac"]) + 1) 36 | mtl$H3K122ac.O.auc <- log2( rowMeans(auc[,colnames(auc) %like% "O.*.H3K122ac"]) + 1) 37 | mtl$H3K122ac.A.auc <- log2( rowMeans(auc[,colnames(auc) %like% "AD.*.H3K122ac"]) + 1) 38 | mtl$H3K4me1.Y.auc <- log2( rowMeans(auc[,colnames(auc) %like% "Y.*.H3K4me1"]) + 1) 39 | mtl$H3K4me1.O.auc <- log2( rowMeans(auc[,colnames(auc) %like% "O.*.H3K4me1"]) + 1) 40 | mtl$H3K4me1.A.auc <- log2( rowMeans(auc[,colnames(auc) %like% "AD.*.H3K4me1"]) + 1) 41 | 42 | write.csv(mtl,"MTL.result.csv",row.names = F) 43 | 44 | -------------------------------------------------------------------------------- /get_venn.R: -------------------------------------------------------------------------------- 1 | library(plyr) 2 | library(data.table) 3 | library(venneuler) 4 | 5 | mtl <- read.csv('MTL.result.csv',stringsAsFactors = F) 6 | 7 | ########################### 8 | ## replace with ensembleTSS 9 | #d <- read.csv('distance_to_ensembleTSS.csv',header=T) 10 | #mtl$dist.to.nearest.gene <- d$Distance 11 | #rm(d) 12 | ########################### 13 | 14 | mtl$group1 <- "within.1kb" 15 | mtl$group1[abs(mtl$dist.to.nearest.gene)>1000] <- ">1kb" 16 | mtl$group2 <- "no.H3K4me1" 17 | mtl$group2[(mtl$In.H3K4me1.Y+mtl$In.H3K4me1.O+mtl$In.H3K4me1.A)>0] <- "has.H3K4me1" 18 | table(subset(mtl,select=c("group1","group2"))) 19 | 20 | ################# 21 | 22 | mtl.part <- subset(mtl,group1=="within.1kb" & group2=="has.H3K4me1",select=c("In.H3K27ac.Y","In.H3K9ac.Y","In.H3K122ac.Y")) 23 | mtl.part$group <- paste0(as.numeric(mtl.part[,1]>0),"-",as.numeric(mtl.part[,2]>0),"-",as.numeric(mtl.part[,3]>0)) 24 | t1 <- as.data.frame(table(mtl.part$group)) 25 | colnames(t1) <- c("group","Y") 26 | mtl.part <- subset(mtl,group1=="within.1kb" & group2=="has.H3K4me1",select=c("In.H3K27ac.O","In.H3K9ac.O","In.H3K122ac.O")) 27 | mtl.part$group <- paste0(as.numeric(mtl.part[,1]>0),"-",as.numeric(mtl.part[,2]>0),"-",as.numeric(mtl.part[,3]>0)) 28 | t2 <- as.data.frame(table(mtl.part$group)) 29 | colnames(t2) <- c("group","O") 30 | mtl.part <- subset(mtl,group1=="within.1kb" & group2=="has.H3K4me1",select=c("In.H3K27ac.A","In.H3K9ac.A","In.H3K122ac.A")) 31 | mtl.part$group <- paste0(as.numeric(mtl.part[,1]>0),"-",as.numeric(mtl.part[,2]>0),"-",as.numeric(mtl.part[,3]>0)) 32 | t3 <- as.data.frame(table(mtl.part$group)) 33 | colnames(t3) <- c("group","A") 34 | mtl.part <- subset(mtl,group1=="within.1kb" & group2=="has.H3K4me1",select=c("In.H3K27ac.Y","In.H3K9ac.Y","In.H3K122ac.Y","In.H3K27ac.O","In.H3K9ac.O","In.H3K122ac.O","In.H3K27ac.A","In.H3K9ac.A","In.H3K122ac.A")) 35 | mtl.part$In.H3K27ac <- mtl.part$In.H3K27ac.Y + mtl.part$In.H3K27ac.O + mtl.part$In.H3K27ac.A 36 | mtl.part$In.H3K9ac <- mtl.part$In.H3K9ac.Y + mtl.part$In.H3K9ac.O + mtl.part$In.H3K9ac.A 37 | mtl.part$In.H3K122ac <- mtl.part$In.H3K122ac.Y + mtl.part$In.H3K122ac.O + mtl.part$In.H3K122ac.A 38 | mtl.part <- subset(mtl.part,select=c("In.H3K27ac","In.H3K9ac","In.H3K122ac")) 39 | mtl.part$group <- paste0(as.numeric(mtl.part[,1]>0),"-",as.numeric(mtl.part[,2]>0),"-",as.numeric(mtl.part[,3]>0)) 40 | t4 <- as.data.frame(table(mtl.part$group)) 41 | colnames(t4) <- c("group","any") 42 | t <- join(t1,t2) 43 | t <- join(t,t3) 44 | t <- join(t,t4) 45 | 46 | ################# 47 | 48 | mtl.part <- subset(mtl,group1=="within.1kb" & group2=="no.H3K4me1",select=c("In.H3K27ac.Y","In.H3K9ac.Y","In.H3K122ac.Y")) 49 | mtl.part$group <- paste0(as.numeric(mtl.part[,1]>0),"-",as.numeric(mtl.part[,2]>0),"-",as.numeric(mtl.part[,3]>0)) 50 | t1 <- as.data.frame(table(mtl.part$group)) 51 | colnames(t1) <- c("group","Y") 52 | mtl.part <- subset(mtl,group1=="within.1kb" & group2=="no.H3K4me1",select=c("In.H3K27ac.O","In.H3K9ac.O","In.H3K122ac.O")) 53 | mtl.part$group <- paste0(as.numeric(mtl.part[,1]>0),"-",as.numeric(mtl.part[,2]>0),"-",as.numeric(mtl.part[,3]>0)) 54 | t2 <- as.data.frame(table(mtl.part$group)) 55 | colnames(t2) <- c("group","O") 56 | mtl.part <- subset(mtl,group1=="within.1kb" & group2=="no.H3K4me1",select=c("In.H3K27ac.A","In.H3K9ac.A","In.H3K122ac.A")) 57 | mtl.part$group <- paste0(as.numeric(mtl.part[,1]>0),"-",as.numeric(mtl.part[,2]>0),"-",as.numeric(mtl.part[,3]>0)) 58 | t3 <- as.data.frame(table(mtl.part$group)) 59 | colnames(t3) <- c("group","A") 60 | mtl.part <- subset(mtl,group1=="within.1kb" & group2=="no.H3K4me1",select=c("In.H3K27ac.Y","In.H3K9ac.Y","In.H3K122ac.Y","In.H3K27ac.O","In.H3K9ac.O","In.H3K122ac.O","In.H3K27ac.A","In.H3K9ac.A","In.H3K122ac.A")) 61 | mtl.part$In.H3K27ac <- mtl.part$In.H3K27ac.Y + mtl.part$In.H3K27ac.O + mtl.part$In.H3K27ac.A 62 | mtl.part$In.H3K9ac <- mtl.part$In.H3K9ac.Y + mtl.part$In.H3K9ac.O + mtl.part$In.H3K9ac.A 63 | mtl.part$In.H3K122ac <- mtl.part$In.H3K122ac.Y + mtl.part$In.H3K122ac.O + mtl.part$In.H3K122ac.A 64 | mtl.part <- subset(mtl.part,select=c("In.H3K27ac","In.H3K9ac","In.H3K122ac")) 65 | mtl.part$group <- paste0(as.numeric(mtl.part[,1]>0),"-",as.numeric(mtl.part[,2]>0),"-",as.numeric(mtl.part[,3]>0)) 66 | t4 <- as.data.frame(table(mtl.part$group)) 67 | colnames(t4) <- c("group","any") 68 | t <- join(t1,t2) 69 | t <- join(t,t3) 70 | t <- join(t,t4) 71 | 72 | ################# 73 | 74 | mtl.part <- subset(mtl,group1==">1kb" & group2=="has.H3K4me1",select=c("In.H3K27ac.Y","In.H3K9ac.Y","In.H3K122ac.Y")) 75 | mtl.part$group <- paste0(as.numeric(mtl.part[,1]>0),"-",as.numeric(mtl.part[,2]>0),"-",as.numeric(mtl.part[,3]>0)) 76 | t1 <- as.data.frame(table(mtl.part$group)) 77 | colnames(t1) <- c("group","Y") 78 | mtl.part <- subset(mtl,group1==">1kb" & group2=="has.H3K4me1",select=c("In.H3K27ac.O","In.H3K9ac.O","In.H3K122ac.O")) 79 | mtl.part$group <- paste0(as.numeric(mtl.part[,1]>0),"-",as.numeric(mtl.part[,2]>0),"-",as.numeric(mtl.part[,3]>0)) 80 | t2 <- as.data.frame(table(mtl.part$group)) 81 | colnames(t2) <- c("group","O") 82 | mtl.part <- subset(mtl,group1==">1kb" & group2=="has.H3K4me1",select=c("In.H3K27ac.A","In.H3K9ac.A","In.H3K122ac.A")) 83 | mtl.part$group <- paste0(as.numeric(mtl.part[,1]>0),"-",as.numeric(mtl.part[,2]>0),"-",as.numeric(mtl.part[,3]>0)) 84 | t3 <- as.data.frame(table(mtl.part$group)) 85 | colnames(t3) <- c("group","A") 86 | mtl.part <- subset(mtl,group1==">1kb" & group2=="has.H3K4me1",select=c("In.H3K27ac.Y","In.H3K9ac.Y","In.H3K122ac.Y","In.H3K27ac.O","In.H3K9ac.O","In.H3K122ac.O","In.H3K27ac.A","In.H3K9ac.A","In.H3K122ac.A")) 87 | mtl.part$In.H3K27ac <- mtl.part$In.H3K27ac.Y + mtl.part$In.H3K27ac.O + mtl.part$In.H3K27ac.A 88 | mtl.part$In.H3K9ac <- mtl.part$In.H3K9ac.Y + mtl.part$In.H3K9ac.O + mtl.part$In.H3K9ac.A 89 | mtl.part$In.H3K122ac <- mtl.part$In.H3K122ac.Y + mtl.part$In.H3K122ac.O + mtl.part$In.H3K122ac.A 90 | mtl.part <- subset(mtl.part,select=c("In.H3K27ac","In.H3K9ac","In.H3K122ac")) 91 | mtl.part$group <- paste0(as.numeric(mtl.part[,1]>0),"-",as.numeric(mtl.part[,2]>0),"-",as.numeric(mtl.part[,3]>0)) 92 | t4 <- as.data.frame(table(mtl.part$group)) 93 | colnames(t4) <- c("group","any") 94 | t <- join(t1,t2) 95 | t <- join(t,t3) 96 | t <- join(t,t4) 97 | 98 | ################# 99 | 100 | mtl.part <- subset(mtl,group1==">1kb" & group2=="no.H3K4me1",select=c("In.H3K27ac.Y","In.H3K9ac.Y","In.H3K122ac.Y")) 101 | mtl.part$group <- paste0(as.numeric(mtl.part[,1]>0),"-",as.numeric(mtl.part[,2]>0),"-",as.numeric(mtl.part[,3]>0)) 102 | t1 <- as.data.frame(table(mtl.part$group)) 103 | colnames(t1) <- c("group","Y") 104 | mtl.part <- subset(mtl,group1==">1kb" & group2=="no.H3K4me1",select=c("In.H3K27ac.O","In.H3K9ac.O","In.H3K122ac.O")) 105 | mtl.part$group <- paste0(as.numeric(mtl.part[,1]>0),"-",as.numeric(mtl.part[,2]>0),"-",as.numeric(mtl.part[,3]>0)) 106 | t2 <- as.data.frame(table(mtl.part$group)) 107 | colnames(t2) <- c("group","O") 108 | mtl.part <- subset(mtl,group1==">1kb" & group2=="no.H3K4me1",select=c("In.H3K27ac.A","In.H3K9ac.A","In.H3K122ac.A")) 109 | mtl.part$group <- paste0(as.numeric(mtl.part[,1]>0),"-",as.numeric(mtl.part[,2]>0),"-",as.numeric(mtl.part[,3]>0)) 110 | t3 <- as.data.frame(table(mtl.part$group)) 111 | colnames(t3) <- c("group","A") 112 | mtl.part <- subset(mtl,group1==">1kb" & group2=="no.H3K4me1",select=c("In.H3K27ac.Y","In.H3K9ac.Y","In.H3K122ac.Y","In.H3K27ac.O","In.H3K9ac.O","In.H3K122ac.O","In.H3K27ac.A","In.H3K9ac.A","In.H3K122ac.A")) 113 | mtl.part$In.H3K27ac <- mtl.part$In.H3K27ac.Y + mtl.part$In.H3K27ac.O + mtl.part$In.H3K27ac.A 114 | mtl.part$In.H3K9ac <- mtl.part$In.H3K9ac.Y + mtl.part$In.H3K9ac.O + mtl.part$In.H3K9ac.A 115 | mtl.part$In.H3K122ac <- mtl.part$In.H3K122ac.Y + mtl.part$In.H3K122ac.O + mtl.part$In.H3K122ac.A 116 | mtl.part <- subset(mtl.part,select=c("In.H3K27ac","In.H3K9ac","In.H3K122ac")) 117 | mtl.part$group <- paste0(as.numeric(mtl.part[,1]>0),"-",as.numeric(mtl.part[,2]>0),"-",as.numeric(mtl.part[,3]>0)) 118 | t4 <- as.data.frame(table(mtl.part$group)) 119 | colnames(t4) <- c("group","any") 120 | t <- join(t1,t2) 121 | t <- join(t,t3) 122 | t <- join(t,t4) 123 | 124 | -------------------------------------------------------------------------------- /main.sh: -------------------------------------------------------------------------------- 1 | ## get multiMTL table1 with H3K27ac/H3K9ac/H3K122ac 2 | for i in `ls ../maskedMTL/MTL.* | grep -v "gain\|loss\|Y\|O\|A"`; do ln -s $i .;done 3 | cat MTL.H3K27ac.bed MTL.H3K9ac.bed MTL.H3K122ac.bed | sort -k1,1 -k2,2n | bedtools merge > multiMTL.bed 4 | R --no-save < log/peak_size.R 5 | rm MTL.*.bed 6 | 7 | ## get presence/absence in each 8 | for i in `ls ../maskedMTL/MTL.* | grep "Y.bed\|O.bed\|A.bed"`; do ln -s $i .;done 9 | for i in `ls MTL.*.bed|sed "s|.bed||g"`; do bedtools intersect -a multiMTL.bed -b $i.bed -c | cut -f4 > In.$i;done 10 | for i in `ls In.*`; do echo "$i" | cat - $i > $i.tmp; done 11 | paste In.MTL.CTCF.*.tmp In.MTL.Rad21.*.tmp In.MTL.H3K27ac.*.tmp In.MTL.H3K9ac.*.tmp In.MTL.H3K122ac.*.tmp In.MTL.H3K4me1.*.tmp | sed "s|In.MTL.|In.|g" > InMTL.txt 12 | rm MTL.*.bed In.MTL.* 13 | 14 | ## parse AUC from individual-mark-MTL AUC.txt files 15 | # H3K27ac 16 | echo -e "#chr\tstart\tend" |cat - ../run_H3K27ac/peak_calling.v2/MTL.bed > t1 17 | paste t1 ../run_H3K27ac/peak_calling.v2/AUC.txt > t2 18 | bedtools intersect -a multiMTL.bed -b t2 -wa -wb > t3 19 | R --no-save < log/summarize_AUC.R 20 | mv AUC.txt AUC.H3K27ac.txt 21 | rm t1 t2 t3 22 | # H3K9ac 23 | echo -e "#chr\tstart\tend" |cat - ../run_H3K9ac/peak_calling.v2/MTL.bed > t1 24 | paste t1 ../run_H3K9ac/peak_calling.v2/AUC.txt > t2 25 | bedtools intersect -a multiMTL.bed -b t2 -wa -wb > t3 26 | R --no-save < log/summarize_AUC.R 27 | mv AUC.txt AUC.H3K9ac.txt 28 | rm t1 t2 t3 29 | # H3K122ac 30 | echo -e "#chr\tstart\tend" |cat - ../run_H3K122ac/peak_calling.v2/MTL.bed > t1 31 | paste t1 ../run_H3K122ac/peak_calling.v2/AUC.txt > t2 32 | bedtools intersect -a multiMTL.bed -b t2 -wa -wb > t3 33 | R --no-save < log/summarize_AUC.R 34 | mv AUC.txt AUC.H3K122ac.txt 35 | rm t1 t2 t3 36 | # 37 | paste AUC.H3K27ac.txt AUC.H3K9ac.txt AUC.H3K122ac.txt > AUC.txt 38 | rm AUC.H3K27ac.txt AUC.H3K9ac.txt AUC.H3K122ac.txt 39 | 40 | ## get MTL.result.csv 41 | R --no-save < log/get_csv.R 42 | 43 | ## add result of pairwise comparison 44 | R --no-save < log/pairwise_comparison.R 45 | 46 | ## add nearest gene 47 | annotatePeaks.pl multiMTL.bed hg19 > multiMTL.anno.txt 48 | R --no-save < log/add_nearest_gene.R 49 | 50 | ## add rna result 51 | ln -s ../brain.rna/brain.rna.csv . 52 | R --no-save < log/add_rna.R 53 | 54 | ## has CTCF/Rad21 within 5kb 55 | bedtools closest -a multiMTL.bed -b ../run_CTCF/peak_calling.v2/MTL.bed -t "first" -d | awk '{print($7<=5000)?1:0}' > MTL.hasCTCF_5kb.bed 56 | bedtools closest -a multiMTL.bed -b ../run_Rad21/peak_calling.v2/MTL.bed -t "first" -d | awk '{print($7<=5000)?1:0}' > MTL.hasRad21_5kb.bed 57 | 58 | ## collapse MTL table so that each transcript appear once (record only the nearest peak to its TSS) 59 | R --no-save < log/MTL.result.collapsed.R 60 | 61 | ## count multiMTLs for venn diagram 62 | R --no-save < log/get_venn.R 63 | 64 | ## overlap with PrefrontalCortex K9ac peaks 65 | bedtools intersect -a multiMTL.bed -b ../run_ROSMAP_PrefrontalCortex_H3K9ac/differential/Again.bed -c | awk '{print ($4>0)?"gain":"-"}' > t1 66 | bedtools intersect -a multiMTL.bed -b ../run_ROSMAP_PrefrontalCortex_H3K9ac/differential/Aloss.bed -c | awk '{print ($4>0)?"loss":"-"}' > t2 67 | paste -d- t1 t2 | sed "s|---|nonsig|g" | sed "s|--||g" > PrefrontalCortex.sig.txt 68 | rm t1 t2 69 | 70 | ## overlap with PrefrontalCortex K9ac Tau-burden peaks 71 | bedtools intersect -a multiMTL.bed -b ../run_ROSMAP_PrefrontalCortex_H3K9ac/ROSMAP/peaks_correlated_with_tau.pos.bed -c | awk '{print ($4>0)?"gain":"-"}' > t1 72 | bedtools intersect -a multiMTL.bed -b ../run_ROSMAP_PrefrontalCortex_H3K9ac/ROSMAP/peaks_correlated_with_tau.neg.bed -c | awk '{print ($4>0)?"loss":"-"}' > t2 73 | paste -d- t1 t2 | sed "s|---|nonsig|g" | sed "s|--||g" > PrefrontalCortex_TauBurden.sig.txt 74 | rm t1 t2 75 | 76 | -------------------------------------------------------------------------------- /pairwise_comparison.R: -------------------------------------------------------------------------------- 1 | library(corrplot) 2 | library(ggplot2) 3 | library(reshape2) 4 | library(plyr) 5 | library(data.table) 6 | 7 | MTL <- read.csv('MTL.result.csv') 8 | AUC <- read.table('AUC.txt',sep="\t",header=T,stringsAsFactors = F) 9 | 10 | ################ H3K27ac ################ 11 | 12 | auc <- AUC[,colnames(AUC) %like% "H3K27ac"] 13 | mtl <- subset(MTL,select="ID") 14 | 15 | sample.list <- as.data.frame(colnames(auc)) 16 | colnames(sample.list) <- "sample" 17 | sample.list$group <- as.factor(gsub("\\..*$","",sample.list$sample)) 18 | 19 | ## pairwise comparison of Y_vs_O 20 | s1 <- sample.list[sample.list$group == "Y",] 21 | s2 <- sample.list[sample.list$group == "O",] 22 | x1 <- subset(auc,select=s1$sample) 23 | x2 <- subset(auc,select=s2$sample) 24 | ## wilcox 25 | for (i in 1:dim(x1)[1]) { 26 | mtl$Y.vs.O.pval[i] <- wilcox.test( as.numeric(x1[i,]), as.numeric(x2[i,]) )$p.value 27 | } 28 | mtl$Y.vs.O.qval <- p.adjust(mtl$Y.vs.O.pval,method="fdr") 29 | mtl$Y.vs.O.diff <- rowMeans(x2) - rowMeans(x1) 30 | rm(s1,s2,x1,x2,i) 31 | 32 | ## pairwise comparison of Y_vs_AD 33 | s1 <- sample.list[sample.list$group == "Y",] 34 | s2 <- sample.list[sample.list$group == "AD",] 35 | x1 <- subset(auc,select=s1$sample) 36 | x2 <- subset(auc,select=s2$sample) 37 | ## wilcox 38 | for (i in 1:dim(x1)[1]) { 39 | mtl$Y.vs.AD.pval[i] <- wilcox.test( as.numeric(x1[i,]), as.numeric(x2[i,]) )$p.value 40 | } 41 | mtl$Y.vs.AD.qval <- p.adjust(mtl$Y.vs.AD.pval,method="fdr") 42 | mtl$Y.vs.AD.diff <- rowMeans(x2) - rowMeans(x1) 43 | rm(s1,s2,x1,x2,i) 44 | 45 | ## pairwise comparison of O_vs_AD 46 | s1 <- sample.list[sample.list$group == "O",] 47 | s2 <- sample.list[sample.list$group == "AD",] 48 | x1 <- subset(auc,select=s1$sample) 49 | x2 <- subset(auc,select=s2$sample) 50 | ## wilcox 51 | for (i in 1:dim(x1)[1]) { 52 | mtl$O.vs.AD.pval[i] <- wilcox.test( as.numeric(x1[i,]), as.numeric(x2[i,]) )$p.value 53 | } 54 | mtl$O.vs.AD.qval <- p.adjust(mtl$O.vs.AD.pval,method="fdr") 55 | mtl$O.vs.AD.diff <- rowMeans(x2) - rowMeans(x1) 56 | rm(s1,s2,x1,x2,i) 57 | 58 | mtl$ID <- NULL 59 | colnames(mtl) <- paste0("H3K27ac.",colnames(mtl)) 60 | MTL <- cbind(MTL,mtl) 61 | 62 | ################ H3K9ac ################ 63 | 64 | auc <- AUC[,colnames(AUC) %like% "H3K9ac"] 65 | mtl <- subset(MTL,select="ID") 66 | 67 | sample.list <- as.data.frame(colnames(auc)) 68 | colnames(sample.list) <- "sample" 69 | sample.list$group <- as.factor(gsub("\\..*$","",sample.list$sample)) 70 | 71 | ## pairwise comparison of Y_vs_O 72 | s1 <- sample.list[sample.list$group == "Y",] 73 | s2 <- sample.list[sample.list$group == "O",] 74 | x1 <- subset(auc,select=s1$sample) 75 | x2 <- subset(auc,select=s2$sample) 76 | ## wilcox 77 | for (i in 1:dim(x1)[1]) { 78 | mtl$Y.vs.O.pval[i] <- wilcox.test( as.numeric(x1[i,]), as.numeric(x2[i,]) )$p.value 79 | } 80 | mtl$Y.vs.O.qval <- p.adjust(mtl$Y.vs.O.pval,method="fdr") 81 | mtl$Y.vs.O.diff <- rowMeans(x2) - rowMeans(x1) 82 | rm(s1,s2,x1,x2,i) 83 | 84 | ## pairwise comparison of Y_vs_AD 85 | s1 <- sample.list[sample.list$group == "Y",] 86 | s2 <- sample.list[sample.list$group == "AD",] 87 | x1 <- subset(auc,select=s1$sample) 88 | x2 <- subset(auc,select=s2$sample) 89 | ## wilcox 90 | for (i in 1:dim(x1)[1]) { 91 | mtl$Y.vs.AD.pval[i] <- wilcox.test( as.numeric(x1[i,]), as.numeric(x2[i,]) )$p.value 92 | } 93 | mtl$Y.vs.AD.qval <- p.adjust(mtl$Y.vs.AD.pval,method="fdr") 94 | mtl$Y.vs.AD.diff <- rowMeans(x2) - rowMeans(x1) 95 | rm(s1,s2,x1,x2,i) 96 | 97 | ## pairwise comparison of O_vs_AD 98 | s1 <- sample.list[sample.list$group == "O",] 99 | s2 <- sample.list[sample.list$group == "AD",] 100 | x1 <- subset(auc,select=s1$sample) 101 | x2 <- subset(auc,select=s2$sample) 102 | ## wilcox 103 | for (i in 1:dim(x1)[1]) { 104 | mtl$O.vs.AD.pval[i] <- wilcox.test( as.numeric(x1[i,]), as.numeric(x2[i,]) )$p.value 105 | } 106 | mtl$O.vs.AD.qval <- p.adjust(mtl$O.vs.AD.pval,method="fdr") 107 | mtl$O.vs.AD.diff <- rowMeans(x2) - rowMeans(x1) 108 | rm(s1,s2,x1,x2,i) 109 | 110 | mtl$ID <- NULL 111 | colnames(mtl) <- paste0("H3K9ac.",colnames(mtl)) 112 | MTL <- cbind(MTL,mtl) 113 | 114 | ################ H3K122ac ################ 115 | 116 | auc <- AUC[,colnames(AUC) %like% "H3K122ac"] 117 | mtl <- subset(MTL,select="ID") 118 | 119 | sample.list <- as.data.frame(colnames(auc)) 120 | colnames(sample.list) <- "sample" 121 | sample.list$group <- as.factor(gsub("\\..*$","",sample.list$sample)) 122 | 123 | ## pairwise comparison of Y_vs_O 124 | s1 <- sample.list[sample.list$group == "Y",] 125 | s2 <- sample.list[sample.list$group == "O",] 126 | x1 <- subset(auc,select=s1$sample) 127 | x2 <- subset(auc,select=s2$sample) 128 | ## wilcox 129 | for (i in 1:dim(x1)[1]) { 130 | mtl$Y.vs.O.pval[i] <- wilcox.test( as.numeric(x1[i,]), as.numeric(x2[i,]) )$p.value 131 | } 132 | mtl$Y.vs.O.qval <- p.adjust(mtl$Y.vs.O.pval,method="fdr") 133 | mtl$Y.vs.O.diff <- rowMeans(x2) - rowMeans(x1) 134 | rm(s1,s2,x1,x2,i) 135 | 136 | ## pairwise comparison of Y_vs_AD 137 | s1 <- sample.list[sample.list$group == "Y",] 138 | s2 <- sample.list[sample.list$group == "AD",] 139 | x1 <- subset(auc,select=s1$sample) 140 | x2 <- subset(auc,select=s2$sample) 141 | ## wilcox 142 | for (i in 1:dim(x1)[1]) { 143 | mtl$Y.vs.AD.pval[i] <- wilcox.test( as.numeric(x1[i,]), as.numeric(x2[i,]) )$p.value 144 | } 145 | mtl$Y.vs.AD.qval <- p.adjust(mtl$Y.vs.AD.pval,method="fdr") 146 | mtl$Y.vs.AD.diff <- rowMeans(x2) - rowMeans(x1) 147 | rm(s1,s2,x1,x2,i) 148 | 149 | ## pairwise comparison of O_vs_AD 150 | s1 <- sample.list[sample.list$group == "O",] 151 | s2 <- sample.list[sample.list$group == "AD",] 152 | x1 <- subset(auc,select=s1$sample) 153 | x2 <- subset(auc,select=s2$sample) 154 | ## wilcox 155 | for (i in 1:dim(x1)[1]) { 156 | mtl$O.vs.AD.pval[i] <- wilcox.test( as.numeric(x1[i,]), as.numeric(x2[i,]) )$p.value 157 | } 158 | mtl$O.vs.AD.qval <- p.adjust(mtl$O.vs.AD.pval,method="fdr") 159 | mtl$O.vs.AD.diff <- rowMeans(x2) - rowMeans(x1) 160 | rm(s1,s2,x1,x2,i) 161 | 162 | mtl$ID <- NULL 163 | colnames(mtl) <- paste0("H3K122ac.",colnames(mtl)) 164 | MTL <- cbind(MTL,mtl) 165 | 166 | ################################ 167 | 168 | write.csv(MTL,'MTL.result.csv',row.names = F) 169 | -------------------------------------------------------------------------------- /peak_size.R: -------------------------------------------------------------------------------- 1 | library(ggplot2) 2 | 3 | d1 <- read.table('multiMTL.bed',header=F,sep="\t") 4 | d1$peakwidth <- (d1[,3] - d1[,2]) 5 | d1$group <- "multiMTL" 6 | d1 <- subset(d1, select=c("peakwidth","group")) 7 | 8 | d2 <- read.table('MTL.H3K27ac.bed',header=F,sep="\t") 9 | d2$peakwidth <- (d2[,3] - d2[,2]) 10 | d2$group <- "MTL.H3K27ac" 11 | d2 <- subset(d2, select=c("peakwidth","group")) 12 | 13 | d3 <- read.table('MTL.H3K9ac.bed',header=F,sep="\t") 14 | d3$peakwidth <- (d3[,3] - d3[,2]) 15 | d3$group <- "MTL.H3K9ac" 16 | d3 <- subset(d3, select=c("peakwidth","group")) 17 | 18 | d4 <- read.table('MTL.H3K122ac.bed',header=F,sep="\t") 19 | d4$peakwidth <- (d4[,3] - d4[,2]) 20 | d4$group <- "MTL.H3K122ac" 21 | d4 <- subset(d4, select=c("peakwidth","group")) 22 | 23 | d <- rbind(d1,d2,d3,d4) 24 | d$group <- factor(d$group,c("multiMTL","MTL.H3K27ac","MTL.H3K9ac","MTL.H3K122ac")) 25 | 26 | pdf('peak_size.pdf',height=4,width=6) 27 | ggplot(d,aes(log10(peakwidth),col=group)) + 28 | #geom_density() + 29 | geom_freqpoly() + 30 | theme_bw(base_size=16) + 31 | theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank()) + 32 | geom_hline(yintercept = 0) 33 | dev.off() 34 | 35 | -------------------------------------------------------------------------------- /shared_peaks.R: -------------------------------------------------------------------------------- 1 | d <- read.csv('MTL.result.log2.csv',check.names=F) 2 | 3 | 4 | tmp <- read.table('EntorhinalCortex.sig.txt',header=T) 5 | d$entor <- tmp[,1] 6 | tmp <- read.table('PrefrontalCortex.sig.txt',header=F) 7 | d$prefr <- tmp[,1] 8 | tmp <- read.table('PrefrontalCortex_TauBurden.sig.txt',header=F) 9 | d$prefr.tau <- tmp[,1] 10 | rm(tmp) 11 | 12 | tmp <- subset(d,select=c("H3K122ac.category","prefr.tau")) 13 | tmp[,3] <- "others" 14 | tmp[tmp[,1]=="DD_Gain" ,3] <- "DD_Gain" 15 | tmp[tmp[,1]=="DD_Loss" ,3] <- "DD_Loss" 16 | as.data.frame(table(paste0(tmp[,3],":",tmp[,2]))) 17 | 18 | 19 | 20 | write.table(subset(d,H3K27ac.category=="DD_Gain" & entor=="gain",select="Locus"),'~/tmp/DD_gain_EntorGain.bed',row.names=F,col.names=F,quote=F) 21 | 22 | write.table(subset(d,H3K27ac.category=="DD_Loss" & entor=="loss",select="Locus"),'~/tmp/DD_loss_EntorLoss.bed',row.names=F,col.names=F,quote=F) 23 | 24 | write.table(subset(d,H3K9ac.category=="DD_Gain" & prefr.tau=="gain",select="Locus"),'~/tmp/DD_gain_PrefrTauPos.bed',row.names=F,col.names=F,quote=F) 25 | 26 | write.table(subset(d,H3K9ac.category=="DD_Loss" & prefr.tau=="loss",select="Locus"),'~/tmp/DD_loss_PrefrTauNeg.bed',row.names=F,col.names=F,quote=F) 27 | 28 | -------------------------------------------------------------------------------- /summarize_AUC.R: -------------------------------------------------------------------------------- 1 | library(plyr) 2 | library(data.table) 3 | 4 | ## read AUC.txt from single mark MTL 5 | d <- read.table('t3',header=F,sep="\t") 6 | # multiMTL 7 | dA <- d[,1:3] 8 | colnames(dA) <- c("chr","start","end") 9 | # MTL 10 | dB <- d[,4:6] 11 | colnames(dB) <- c("chr","start","end") 12 | # AUC 13 | dC <- d[,7:dim(d)[2]] 14 | h <- read.table('t2',header=T,sep="\t",comment.char="",check.names=F) 15 | colnames(dC) <- colnames(h)[4:dim(h)[2]] 16 | rm(h) 17 | 18 | # use AUC*width to aggregate 19 | dA$coordinate <- paste0(dA$chr,":",dA$start,"-",dA$end) 20 | dB$width <- dB$end-dB$start 21 | dC <- dC * dB$width/1000 22 | d <- cbind( subset(dA,select="coordinate") , subset(dB,select="width") , dC ) 23 | D <- aggregate(d[,-1] ,by=list(d$coordinate),FUN="sum") 24 | colnames(D)[1] <- "coordinate" 25 | 26 | ## divide aggregated sum by aggregated width 27 | DA <- D[,1:2] 28 | DA$chr <- gsub(":.*$","",DA$coordinate) 29 | DA$start <- gsub("-.*$","",gsub(".*:","",DA$coordinate)) 30 | DA$end <- gsub(".*-","",DA$coordinate) 31 | DB <- D[,3:dim(D)[2]] 32 | DB <- DB*1000/DA$width 33 | D <- cbind( subset(DA,select=c("coordinate")) ,DB) 34 | # 35 | 36 | ## join to multiMTL table and replace zeros 37 | mtl <- read.table('multiMTL.bed',header=F,sep="\t") 38 | colnames(mtl) <- c("chr","start","end") 39 | mtl$coordinate <- paste0(mtl$chr,":",mtl$start,"-",mtl$end) 40 | mtl <- join(mtl,D) 41 | D <- mtl[,c(-1:-4)] 42 | D[is.na(D)] <- 0 43 | 44 | write.table(D,'AUC.txt',sep="\t",row.names=F,quote=F) 45 | 46 | 47 | --------------------------------------------------------------------------------