├── README ├── SQL_sheet_code.sql ├── phyper.r ├── list2DF.r ├── write.GOhyper.r ├── source_https.r ├── plot_bar.r ├── pubmed_trend.pl ├── get.ppiNCBI.r ├── pubmed_trend.r ├── GO_over.r ├── population.plot.r ├── SNPSpD.r ├── slopegraph.r ├── table.graph.r ├── Affy_QA.r └── hapmap.LD.r /README: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /SQL_sheet_code.sql: -------------------------------------------------------------------------------- 1 | -- create column and fill it based on value from another. 2 | ALTER TABLE first_pain_icd_primary ADD COLUMN genderBin INT(5) NOT NULL; 3 | UPDATE first_pain_icd_primary SET genderBin = CASE WHEN gender = "M" THEN -1 ELSE 1 END 4 | -------------------------------------------------------------------------------- /phyper.r: -------------------------------------------------------------------------------- 1 | # of white balls drawn w/o replacement 2 | q=6-1 3 | # of white ball in the urn 4 | m=28 5 | # of black balls 6 | n=15006-28 7 | # of ball drawn from the urn 8 | k=2426 9 | phyper(q, m, n, k, lower.tail = FALSE) 10 | # [1] 0.2935694 11 | rm(q,m,n,k) 12 | 13 | -------------------------------------------------------------------------------- /list2DF.r: -------------------------------------------------------------------------------- 1 | ## Transform a list of vector of the same length to a data.frame 2 | list2DF <- function(list){ 3 | l <- as.vector(unlist(lapply(list, length))) 4 | if(length(unique(l))>1) stop("list elements not some length. Cannot transform to data.frame") 5 | df <- as.data.frame(matrix(as.vector(unlist(list)), ncol=l[1])) 6 | return(df) 7 | } -------------------------------------------------------------------------------- /write.GOhyper.r: -------------------------------------------------------------------------------- 1 | # 2 | # write.GOhyper.r 3 | # 4 | # Created by David Ruau on 2012-01-23. 5 | # Dept. of Pediatrics/Div. Systems Medicine, Stanford University. 6 | # 7 | # 8 | ##################### USAGE ######################### 9 | # 10 | # write.GOhyper(mfhyper, filename="results.xlsx") 11 | # 12 | ##################################################### 13 | 14 | write.GOhyper <- function(mfhyper, filename='GO_results.xlsx') { 15 | require(GOstats) 16 | require(multtest) 17 | require(xlsx) 18 | 19 | gogo <- summary(mfhyper) 20 | gogo$adjPvalue <- mt.rawp2adjp(gogo$Pvalue, proc="BH")$adjp[,2] 21 | gogo <- gogo[,c(1:2,8,3:7)] 22 | gogo <- gogo[order(gogo$OddsRatio),] 23 | write.xlsx(gogo, file=filename) 24 | print(paste('Results written in', filename)) 25 | return(gogo) 26 | } 27 | -------------------------------------------------------------------------------- /source_https.r: -------------------------------------------------------------------------------- 1 | # 2 | # source_https.r 3 | # 4 | # 2012-02-14. 5 | # Adapted from from http://tonybreyal.wordpress.com/2011/11/24/source_https-sourcing-an-r-script-from-github/ 6 | # 7 | ##################### USAGE ######################### 8 | # 9 | # source_https("https://github.com/bobthecat/codebox/blob/master/GO_over.r") 10 | # 11 | ##################################################### 12 | 13 | source_https <- function(url, ...) { 14 | # load package 15 | require(RCurl) 16 | # using the raw option 17 | if(length(grep('https://github.com', url))==1) stop("github RAW source required") 18 | # parse and evaluate each .R script 19 | sapply(c(url, ...), function(u) { 20 | eval(parse(text = getURL(u, followlocation = TRUE, cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl"))), envir = .GlobalEnv) 21 | }) 22 | } 23 | -------------------------------------------------------------------------------- /plot_bar.r: -------------------------------------------------------------------------------- 1 | # 2 | # plot_bar.r 3 | # 4 | # Created by David Ruau on 2011-03-29. 5 | # Department of Pediatrics/Div. System Medicine Stanford University. 6 | # 7 | ##################### USAGE ######################### 8 | # source('sex_diff_pain.r') 9 | # sex.pub <- sex_diff_pain(search.str = 'Sex+Characteristics[mh] AND Pain[mh]', year.span=1970:2011) 10 | # par(las=1) 11 | # plot_bar(x=sex.pub[1:41], linecol='royalblue', cols="Reds", myTitle='Number of publication per year for\n"sex differences" and "pain"') 12 | # 13 | ##################################################### 14 | 15 | plot_bar <- function(x=sex.pub, linecol="royalblue", cols, addArg=TRUE) { 16 | require("RColorBrewer") 17 | # colorfunction = colorRampPalette(brewer.pal(9, cols)) 18 | # mycolors = colorfunction(length(x)) 19 | ## This draw the bar plot and save it in an object... 20 | # bp <- barplot(x, col=mycolors) 21 | bp <- barplot(x, col=cols, add=addArg) 22 | fit <- stats::lowess(x, f=1/3) 23 | lines(x=bp, fit$y, col=linecol, lwd=3) 24 | } 25 | -------------------------------------------------------------------------------- /pubmed_trend.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/perl -w 2 | # 3 | # sex_diff_pain.pl 4 | # 5 | # Created by David Ruau on 2011-02-17. 6 | # Department of Pediatrics/Div. System Medicine Stanford University. 7 | # 8 | ##################### USAGE ######################### 9 | # 10 | # Query PubMed with Eutils tools 11 | # 12 | ##################################################### 13 | use Bio::TGen::EUtils; 14 | 15 | use strict; 16 | 17 | my $queryString = $ARGV[0]; 18 | 19 | ## query info 20 | my $eu = Bio::TGen::EUtils->new( 'tool' => 'pubmed_trend.pl', 21 | 'email' => 'REPLACE_ME@gmail.com' ); 22 | 23 | ## EFetch 24 | my $query = $eu->esearch( db => 'pubmed', 25 | term => $queryString, 26 | usehistory => 'n' ); 27 | 28 | $query->write_raw( file => 'tempfile.xml' ); 29 | 30 | if (-z 'tempfile.xml') { 31 | # one more time 32 | my $query = $eu->esearch( db => 'pubmed', 33 | term => $queryString, 34 | usehistory => 'n' ); 35 | 36 | $query->write_raw( file => 'tempfile.xml' ); 37 | if (-z 'tempfile.xml') { 38 | open (FILE, '>', 'tempfile.xml') or die 'Could not open file, $!'; 39 | 40 | print FILE "hello world"; 41 | close (FILE); 42 | } 43 | } -------------------------------------------------------------------------------- /get.ppiNCBI.r: -------------------------------------------------------------------------------- 1 | # 2 | # get.ppiNCBI.r 3 | # 4 | # Created by David Ruau on 2012-06-05. 5 | # Dept. of Pediatrics/Div. Systems Medicine, Stanford University. 6 | # 7 | # 8 | ##################### USAGE ######################### 9 | # see http://brainchronicle.blogspot.com/2012/05/obtain-protein-protein-interaction-from.html 10 | # ppi <- get.ppiNCBI(c("777", "57619")) 11 | # 12 | ##################################################### 13 | 14 | get.ppiNCBI <- function(g.n) { 15 | require(XML) 16 | ppi <- data.frame() 17 | for(i in 1:length(g.n)){ 18 | o <- htmlParse(paste("http://www.ncbi.nlm.nih.gov/gene/", g.n[i], sep='')) 19 | # check if interaction table exists 20 | exist <- length(getNodeSet(o, "//table//th[@id='inter-prod']"))>0 21 | if(exist){ 22 | p <- getNodeSet(o, "//table") 23 | ## need to know which table is the good one 24 | for(j in 1:length(p)){ 25 | int <- readHTMLTable(p[[j]]) 26 | if(colnames(int)[2]=="Interactant"){break} 27 | } 28 | ppi <- rbind(ppi, data.frame(egID=g.n[i], intSymbol=int$`Other Gene`)) 29 | } 30 | # play nice! and avoid being kicked out from NCBI servers 31 | Sys.sleep(1) 32 | } 33 | if(dim(ppi)[1]>0){ 34 | ppi <- unique(ppi) 35 | print(paste(dim(ppi)[1], "interactions found")) 36 | return(ppi) 37 | } else{ 38 | print("No interaction found") 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /pubmed_trend.r: -------------------------------------------------------------------------------- 1 | # 2 | # sex_diff_pain.r 3 | # 4 | # Created by David Ruau on 2011-02-17. 5 | # Department of Pediatrics/Div. System Medicine Stanford University. 6 | # 7 | ##################### USAGE ######################### 8 | # 9 | # source('pubmed_trend.r') 10 | # sex.pub <- pubmed_trend(search.str = 'Sex+Characteristics[mh] AND Pain[mh]', year.span=1970:2011) 11 | # 12 | ##################################################### 13 | 14 | pubmed_trend <- function(search.str = 'Sex+Characteristics[mh] AND Pain[mh]', year.span=1970:2011) { 15 | require(XML) 16 | require(RCurl) 17 | 18 | results <- NULL 19 | tmpf <- "./tempfile.xml" 20 | ## clean before 21 | system(paste("rm", tmpf)) 22 | 23 | for(i in year.span){ 24 | queryString <- paste(search.str, ' AND ', i, '[dp]', sep="") 25 | print(paste('queryString:', queryString)) 26 | sysString <- paste('./pubmed_trend.pl "', queryString,'"', sep="") 27 | system(sysString) 28 | 29 | xml <- xmlTreeParse(tmpf, useInternalNodes=TRUE) 30 | pubTerm <- as.numeric(xmlValue(getNodeSet(xml, "//Count")[[1]])) 31 | print(paste("#______num pub for",i,":",pubTerm)) 32 | rm(xml) 33 | results <- append(results, pubTerm) 34 | ## avoid being kicked out! 35 | Sys.sleep(1) 36 | } 37 | names(results) <- year.span 38 | ## clean after 39 | system(paste("rm", tmpf)) 40 | 41 | return(results) 42 | } -------------------------------------------------------------------------------- /GO_over.r: -------------------------------------------------------------------------------- 1 | # 2 | # GO_over.r 3 | # 4 | # Created by David Ruau on 2012-01-23. 5 | # Dept. of Pediatrics/Div. Systems Medicine, Stanford University. 6 | # 7 | # 8 | ##################### USAGE ######################### 9 | # 10 | # mfhyper <- GO_over(entrezUniverse, glist=glist, annot='org.Hs.eg.db') 11 | # geneMappedCount(mfhyper) 12 | # htmlReport(mfhyper, file="results.html", label="ceci n'est pas une pipe") 13 | # 14 | ##################################################### 15 | 16 | GO_over <- function(universe, glist, annot='HsAgilentDesign026652.db', ontology='BP', cutoff=0.001) { 17 | require(GOstats) 18 | universe <- unique(as.character(universe)) 19 | glist <- unique(as.character(glist)) 20 | 21 | # conditional hypergeometric test 22 | # "uses the structure of the GO graph to estimate for each term whether or 23 | # not there is evidence beyond that which is provided by the term’s children 24 | # to call the term in question statistically overrepresented." - GOstatsHyperG vignette 25 | params <- new("GOHyperGParams", 26 | geneIds=glist, 27 | universeGeneIds=universe, # this is a gene list of all the genes tested on the microarray 28 | annotation=annot, # annotation pacakge for the microarray in question 29 | ontology=ontology, # either BP, CC, or MF 30 | pvalueCutoff=cutoff, # for uncorrected pvalues 31 | conditional=TRUE, # Use conditional algorithms or standard Hypergeometric test 32 | testDirection="over") 33 | 34 | mfhyper = hyperGTest(params) 35 | return(mfhyper) 36 | } 37 | 38 | 39 | -------------------------------------------------------------------------------- /population.plot.r: -------------------------------------------------------------------------------- 1 | # 2 | # population.plot.r 3 | # 4 | # Created by David Ruau on 2011-07-09. 5 | # 2011 Dept. of Pediatrics/Div. Systems Medicine 6 | # Stanford University. 7 | # 8 | # 9 | ##################### USAGE ######################### 10 | # 11 | # m <- c(2300, 2500, 2400, 2000, 1900, 1800, 1000) 12 | # f <- c(2400, 2200, 2500, 3000, 2200, 2000, 1500) 13 | # population.plot(m, f, labels=c('0-18', '19-25', '26-40', '40-50', '51-65', '66-77', '78-99')) 14 | # 15 | ##################################################### 16 | 17 | population.plot <- function(male.count, female.count, sub.title="count", labels=NULL, mycex.axis=1) { 18 | xmax <- max(c(male.count, female.count)) 19 | op <- par() 20 | par(mfrow=c(1,2), mar=c(4,2,4,2)) 21 | 22 | barplot(-male.count, axes=F, axisnames=F, space=0, xlim=c(-xmax, 0), horiz=T) 23 | mtext('Male', side=3, at=0, line=1, cex=1.5, adj=1) 24 | mtext(sub.title, side=1, line=2, at=-xmax/4, adj=1) 25 | grid(nx=NULL, ny=NA, col='white', lty="solid") 26 | # axis(side=1, at=c(0,-1000, -2000, -3000), labels=c(0,1000, 2000, 3000)) 27 | axis(side=1, cex.axis=mycex.axis) 28 | if(!is.null(labels)){ 29 | axis(side=4, at=seq(0.5, length(labels)-0.5, 1), labels=labels ,las=1, tick=F, adj=0.5) 30 | } 31 | else{stop("Labels Y-axis needed")} 32 | 33 | # par(mar=c(4,2,4,2)) 34 | barplot(female.count, axes=F, axisnames=F, space=0, xlim=c(0,xmax), horiz=T) 35 | mtext('Female', side=3, at=0, line=1, cex=1.5, adj = 0) 36 | mtext(sub.title, side=1, line=2, at=xmax/4, adj=0) 37 | grid(nx=NULL, ny=NA, col='white', lty="solid") 38 | axis(side=1, cex.axis=mycex.axis) 39 | # par reset 40 | par(op) 41 | } -------------------------------------------------------------------------------- /SNPSpD.r: -------------------------------------------------------------------------------- 1 | ## Method for computing the effective number of variable (SNPs) based on the 2 | # spectral decomposition (SpD) of matrices of pairwise LD between SNPs 3 | # See Nyholt et al. 2004 (PMID:14997420) 4 | library(snpMatrix) 5 | library(NCBI2R) 6 | library(annotate) 7 | 8 | # First you need to download the chromosome file from hapmap 9 | # To know the chromosome use: 10 | GetSNPInfo("rs12345")$chr 11 | 12 | chrURL <- "ftp://ftp.ncbi.nlm.nih.gov/hapmap/genotypes/2010-08_phaseII+III/forward/genotypes_chr8_CEU_r28_nr.b36_fwd.txt.gz" 13 | chr <- read.HapMap.data(chrURL) 14 | 15 | # I am usually interested in only a subset of the SNPs. Here subSNP is a vector 16 | # of rsxxxx SNP IDs 17 | # Suppress suppress the following line if you want to do it for all the SNPs on 18 | # the chromosome 19 | chr$snp.data@.Data <- chr$snp.data@.Data[,subSNP] 20 | ldinfo <- ld.snp(chr$snp.data, depth=dim(chr$snp.data)[2]) 21 | plot(ldinfo, filename='ld_plot.eps') 22 | 23 | # matrix massage to make it square 24 | ldinfo$rsq2 <- ldinfo$rsq2[,dim(ldinfo$rsq2)[2]:1] 25 | ldinfo$rsq2 <- cbind(0, ldinfo$rsq2) 26 | ldinfo$rsq2 <- rbind(ldinfo$rsq2, 0) 27 | ldinfo$rsq2 <- t(ldinfo$rsq2) 28 | # fill diagonal with 1 29 | for(i in 1:dim(ldinfo$rsq2)[1]){ 30 | ldinfo$rsq2[i,i]=1 31 | } 32 | 33 | # EIGEN values extraction 34 | e <- eigen(ldinfo$rsq2, symmetric=TRUE, only.values=TRUE) 35 | 36 | # The effective number of variable (Meff) 37 | Meff <- function(lambda){ 38 | return(1 + (length(lambda) - 1)*(1 - (var(lambda)/length(lambda)))) 39 | } 40 | 41 | Meff(e$values) 42 | 43 | ## final notes 44 | # reduction are not huge so don't be surprise 45 | # a few example 131 SNP were reduced to 122.4 46 | # 16 -> 11.3; 47 | # 51-> 38.7... 48 | -------------------------------------------------------------------------------- /slopegraph.r: -------------------------------------------------------------------------------- 1 | # 2 | # slopegraph.r 3 | # 4 | # Created by David Ruau on 2011-07-18. 5 | # 2011 Dept. of Pediatrics/Div. Systems Medicine 6 | # Stanford University. 7 | # 8 | # 9 | ##################### USAGE ######################### 10 | # data: data.frame in the same shape as the slopegraph is wanted 11 | # label.cex: magnification for numeric line labels from 0 to 1 12 | # axis.cex: magnificatoin for axis titles from 0 to 1 13 | # digits: number of significant digits to report 14 | # rounding.method: can be NULL, round or signif 15 | # ...: supplementary arguments supplied to par, usually margins 16 | # 17 | # EXAMPLE: 18 | # source('slopegraph.r') 19 | # pdf('slopegraph.pdf', height=7, width=8) 20 | # slopegraph(data = t(WorldPhones[,1:3]), mymain = "YEARS", mar=c(2, 5, 5, 5), label.cex=0.8, axis.cex=0.9) 21 | # dev.off() 22 | # 23 | # Tips: when values overlap try first to extend the height of you plot and if this does not work 24 | # round your value using the option rounding.method = 'round' and digits=0 25 | # 26 | ##################################################### 27 | 28 | slopegraph <- function(data, label.cex=0.8, axis.cex=0.9, digits = 2, rounding.method = NULL, mymain = "slopegraph", ...) { 29 | require(plotrix) 30 | if(!is.data.frame(data)){ 31 | data <- as.data.frame(data) 32 | } 33 | 34 | if(!is.null(rounding.method)){ 35 | data.temp <- .rd.method(rounding.method, width, digits) 36 | data.temp <- as.numeric(sprintf(fmt, as.matrix(data))) 37 | data <- as.data.frame(matrix(data.temp, nrow=nrow(data), ncol=ncol(data), dimnames=list(rownames(data), colnames(data)))) 38 | } 39 | 40 | old.par <- par(no.readonly = TRUE) 41 | par(...) 42 | matplot(t(data), type='b', pch=NA, axes=FALSE, xlab='', ylab='', lty='solid', col="grey", ...) 43 | for(i in 1:ncol(data)){ 44 | for(j in 1:nrow(data)){ 45 | boxed.labels(i, data[j,i], labels=data[j,i], bg='white', border = FALSE, cex=label.cex) 46 | } 47 | } 48 | mtext(text = rownames(data), side = 2, at=data[,1], line = 0.5, las=1, cex=axis.cex) 49 | mtext(text = colnames(data), side = 3, at=1:ncol(data), line = 1, cex=axis.cex) 50 | mtext(text = rownames(data), side = 4, at=data[,ncol(data)], line = 0.5, las=1, cex=axis.cex) 51 | title(main = mymain, line=3) 52 | par(old.par) 53 | } 54 | 55 | 56 | .rd.method <- function(rounding.method, width, digits){ 57 | rounding.character <- switch(match(rounding.method, c("round", "signif")), "f", "g") 58 | fmt = paste("%.", digits, rounding.character, sep = "") 59 | return(fmt) 60 | } 61 | 62 | 63 | -------------------------------------------------------------------------------- /table.graph.r: -------------------------------------------------------------------------------- 1 | # 2 | # table.graph.r 3 | # 4 | # Created by David Ruau on 2011-06-23. 5 | # Department of Pediatrics/Systems Medicine, 6 | # Stanford University. 7 | # 8 | # 9 | ##################### USAGE ######################### 10 | # Tufte table-graphic or slopegraph shown in the Tufte book for paired numeric data 11 | # "the visual display of quantitative information" p. 158 12 | # 13 | # Column names and row names will be used to label the plot 14 | # Depending on the length of your rownames the margins might have to be adjusted 15 | # 16 | # df: data frame with 2 column 17 | # line.col: vector length 2 with colors for the lines. Default to grey for 18 | # value going up and black for value going down 19 | # label.cex: magnification for x and y labels from 0 to 1 20 | # title.cex: magnificatoin for titles from 0 to 1 21 | # digits: number of significant digits to report 22 | # ...: supplementary arguments supplied to par, usually margins 23 | # 24 | # EXAMPLE: 25 | # source("table.graph.r") 26 | # table.graph(WorldPhones[,1:2], label.cex=0.7, title.cex=1.2, mar=c(5, 5, 1, 5)) 27 | # 28 | ##################################################### 29 | 30 | table.graph <- function(df, line.col=c("grey", "black"), label.cex=1, title.cex=1, width = 6, digits = 2, rounding.method = NULL, ...) { 31 | xmin <- min(df) 32 | xmax <- max(df) 33 | X1 <- as.numeric(as.vector(df[,1])) 34 | X2 <- as.numeric(as.vector(df[,2])) 35 | # original settings 36 | old.par <- par(no.readonly = TRUE) 37 | # par settings usually margins 38 | par(...) 39 | # rounding 40 | fmt <- .rd.method(rounding.method, width, digits) 41 | # left 42 | plot(rep(0, nrow(df)), X1, xlim=c(0,1), ylim=c(xmin, xmax), 43 | axes=FALSE, xlab='', ylab='', type='n') 44 | mtext(text=paste(rownames(df), sprintf(fmt, X1), sep=' '), side=2, at=X1, las=1, cex=label.cex) 45 | par(new=TRUE) 46 | # right 47 | plot(rep(1, nrow(df)), X2, xlim=c(0,1), ylim=c(xmin, xmax), 48 | axes=FALSE, xlab='', ylab='', type='n') 49 | mtext(text=paste(sprintf(fmt, X2), rownames(df), sep=' '), side=4, at=X2, las=1, cex=label.cex) 50 | # class label 51 | mtext(colnames(df)[1], side=3, at=0, cex=title.cex) 52 | mtext(colnames(df)[2], side=3, at=1, cex=title.cex) 53 | # lines 54 | segments(x0 = rep(0, nrow(df)), y0 = X1, x1 = rep(1, nrow(df)), y1 = X2, 55 | col=ifelse({X1 - X2} < 0, line.col[1], line.col[2])) 56 | # restore original settings 57 | par(old.par) 58 | } 59 | 60 | .rd.method <- function(rounding.method, width, digits){ 61 | if(is.null(rounding.method)){ 62 | fmt = "%s" 63 | } 64 | else{ 65 | rounding.character <- switch(match(rounding.method, c("round", "signif")), "f", "g") 66 | fmt = paste("%", width, ".", digits, rounding.character, sep = "") 67 | } 68 | return(fmt) 69 | } 70 | -------------------------------------------------------------------------------- /Affy_QA.r: -------------------------------------------------------------------------------- 1 | # 2 | # Affy_QA.r 3 | # 4 | # Created by David Ruau on 2011-07-26. 5 | # 2011 Dept. of Pediatrics/Div. Systems Medicine, Stanford University. 6 | # 7 | # 8 | ##################### USAGE ######################### 9 | # R function to perform automatically all the quality control test describe 10 | # in the book "Bioinformatics and Computational Biology Solutions Using R and Bioconductor" 11 | # from Gentleman, Carey, Huber, Irizarry and Dudoit; Springer. 12 | # argument: an affybatch 13 | # 14 | # EXAMPLE: 15 | # library(affydata) 16 | # data(Dilution) 17 | # Affy_QA(Dilution) 18 | # 19 | ##################################################### 20 | 21 | Affy_QA <- function(abatch) { 22 | require(affy) 23 | require(simpleaffy) 24 | require(RColorBrewer) 25 | require(affyPLM) 26 | 27 | if (class(abatch)!= 'AffyBatch') { 28 | stop("argument must be AffyBatch!") 29 | } 30 | 31 | # colors 32 | cols <- brewer.pal(12, "Set3") 33 | 34 | # Boxplot 35 | pdf(file='qa_boxplot.pdf', height=8, width=10) 36 | boxplot(abatch, col=cols, main="Unprocessed log scale probe-level data", xlab="If discrepancy, they are not conclusive\n Difference can be reduce by normalization") 37 | dev.off() 38 | 39 | # Histogram 40 | pdf(file='qa_histogram.pdf', height=8, width=8) 41 | hist(abatch, col=cols, lwd=2, xlab="Log(base2) intensities; Bimodal distribution indicate spatial artifact\n Second mode is the result of array(s) having abnormally high value") 42 | legend("topright", sampleNames(abatch), lty=1, lwd=2,col=cols) 43 | dev.off() 44 | 45 | #RNA degradation 46 | pdf(file="qa_RNAdeg.pdf", height=8, width=8) 47 | RNAdeg <- AffyRNAdeg(abatch) 48 | plotAffyRNAdeg(RNAdeg, cols=cols) 49 | legend("topleft", sampleNames(abatch), lty=1, lwd=2, col=cols) 50 | box() 51 | dev.off() 52 | 53 | # simpleaffy graph 54 | abatch.qc <- qc(abatch) 55 | pdf(file="qa_QC-simpleaffy.pdf", height=8, width=10) 56 | plot(abatch.qc) 57 | dev.off() 58 | 59 | pset <- fitPLM(abatch) 60 | 61 | # false color image control 62 | for (n in 1:length(abatch)) { 63 | filename <- paste("qa_QC",as.vector(sampleNames(abatch))[n],".png") 64 | png(file=filename, height=900, width=800) 65 | .img.Test(abatch,pset,n) 66 | dev.off() 67 | } 68 | 69 | # RLE plot 70 | pdf(file="qa_RLE.pdf", height=8, width= 8) 71 | Mbox(pset, col = cols, main ="RLE (Relative Log Expression)", xlab="Assuming that the majority of the gene are not changing\n Ideally these boxes would have small spread and be centered at M=0") 72 | dev.off() 73 | 74 | # NUSE plot 75 | pdf(file="qa_NUSE.pdf", height=8, width= 8) 76 | boxplot(pset, col=cols, main= "NUSE (Normalized Unscaled Standard Error)", xlab="High values of median NUSE are indicative of a problematic array") 77 | dev.off() 78 | } 79 | 80 | .img.Test <- function(batch,pset,x) { 81 | par(mfrow = c(2,2)) 82 | affy::image(batch[,x]) 83 | affy::image(pset, type = "weights", which = x) 84 | affy::image(pset, type = "resids", which = x) 85 | affy::image(pset, type = "sign.resids", which = x) 86 | } -------------------------------------------------------------------------------- /hapmap.LD.r: -------------------------------------------------------------------------------- 1 | # 2 | # hapmap.LD.r 3 | # 4 | # Created by David Ruau on 2011-04-23. 5 | # Department of Pediatrics/Div. Systems Medicine 6 | # Stanford University. 7 | # 8 | # 9 | ##################### USAGE ######################### 10 | # Input: 11 | # geneName: Entrez gene ID [if provided subSNP is ignored] 12 | # subSNP: [required if geneID is NULL] character vector; the rs SNP ID you want to 13 | # plot otherwise try to plot the entire chromosome 14 | # LD map (will fail) 15 | # chr: integer; chromosome number 16 | # hapmap.file: file location of HapMap chromosome file 17 | # downloaded using readHapMap.data from snpMatrix 18 | # package 19 | # ...: further argument for ld.snp function from snpMatrix 20 | # 21 | # USAGE 22 | # > library(org.Hs.eg.db) 23 | # > (id <- as.vector(unlist(mget("POU5F1", org.Hs.egSYMBOL2EG)))) 24 | # > hapmap.LD(geneID=id) 25 | # 26 | # output the heatmap into a EPS file 27 | # 28 | ##################################################### 29 | 30 | hapmap.LD <- function(geneID=NULL, subSNP=NULL, chr=NULL, hapmap.file=NULL, ...) { 31 | require(snpMatrix) 32 | require(annotate) 33 | require(org.Hs.eg.db) 34 | require(foreach) 35 | require(doMC) 36 | require(multicore) 37 | ncore = multicore:::detectCores() 38 | registerDoMC(cores = ncore) 39 | 40 | if(is.null(subSNP) && is.null(geneID)){ 41 | stop("subSNP and geneID are NULL. Selection of SNP to plot is required.\nAn entire chromosome cannot be plotted") 42 | } 43 | 44 | if(!is.null(geneID)){ 45 | start <- mget(geneID, env=org.Hs.egCHRLOC) 46 | end <- mget(geneID, env=org.Hs.egCHRLOCEND) 47 | chr <- mget(geneID, env=org.Hs.egCHR) 48 | subSNP <- NULL 49 | } 50 | 51 | print('Getting LD info from HapMap') 52 | if(is.null(hapmap.file)){ 53 | if(is.null(chr)){ 54 | stop("Chromosome number has to be provided if no HapMap file is given") 55 | } 56 | hapmap.file <- paste("ftp://ftp.ncbi.nlm.nih.gov/hapmap/genotypes/2010-08_phaseII+III/forward/genotypes_chr",chr,"_CEU_r28_nr.b36_fwd.txt.gz", sep='') 57 | } 58 | else{ 59 | hapmap.file <- paste("file://", hapmap.file, sep="") 60 | hapmap <- read.HapMap.data(hapmap.file) 61 | } 62 | 63 | 64 | if(is.null(subSNP)){ 65 | xx <- hapmap$snp.support[as.numeric(as.vector(hapmap$snp.support$Position))>= start,] 66 | xx <- xx[as.numeric(as.vector(xx$Position)) <= end,] 67 | subSNP <- rownames(xx) 68 | idx.cols <- which(colnames(hapmap$snp.data@.Data) %in% subSNP) 69 | hapmap$snp.data@.Data <- hapmap$snp.data@.Data[,idx.cols] 70 | } 71 | else{ 72 | idx.cols <- which(colnames(hapmap$snp.data@.Data) %in% subSNP) 73 | hapmap$snp.data@.Data <- hapmap$snp.data@.Data[,idx.cols] 74 | } 75 | 76 | # remove grey lines SNP with no info for the selected region 77 | idx <- foreach(i = 1:ncol(hapmap$snp.data@.Data), .combine=c) %dopar% { 78 | if(length(unique(as.numeric(hapmap$snp.data@.Data[,i]))) <= 2){ 79 | i 80 | } 81 | } 82 | 83 | if(length(idx)>0){ 84 | hapmap$snp.data@.Data <- hapmap$snp.data@.Data[,-idx] 85 | } 86 | print('a') 87 | ldinfo <- ld.snp(hapmap$snp.data, depth=dim(hapmap$snp.data)[2], ...) 88 | print('b') 89 | fName <- paste('ld_plot',".eps", sep='') 90 | plot.snp.dprime(ldinfo, start=a, end=b, filename=fName) 91 | } --------------------------------------------------------------------------------