├── README
├── SQL_sheet_code.sql
├── phyper.r
├── list2DF.r
├── write.GOhyper.r
├── source_https.r
├── plot_bar.r
├── pubmed_trend.pl
├── get.ppiNCBI.r
├── pubmed_trend.r
├── GO_over.r
├── population.plot.r
├── SNPSpD.r
├── slopegraph.r
├── table.graph.r
├── Affy_QA.r
└── hapmap.LD.r


/README:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/SQL_sheet_code.sql:
--------------------------------------------------------------------------------
1 | -- create column and fill it based on value from another.
2 | ALTER TABLE first_pain_icd_primary ADD COLUMN genderBin INT(5) NOT NULL;
3 | UPDATE first_pain_icd_primary SET genderBin = CASE WHEN gender = "M" THEN -1 ELSE 1 END
4 | 


--------------------------------------------------------------------------------
/phyper.r:
--------------------------------------------------------------------------------
 1 | # of white balls drawn w/o replacement
 2 | q=6-1
 3 | # of white ball in the urn
 4 | m=28
 5 | # of black balls
 6 | n=15006-28
 7 | # of ball drawn from the urn
 8 | k=2426
 9 | phyper(q, m, n, k, lower.tail = FALSE)
10 | # [1] 0.2935694
11 | rm(q,m,n,k)
12 | 
13 | 


--------------------------------------------------------------------------------
/list2DF.r:
--------------------------------------------------------------------------------
1 | ## Transform a list of vector of the same length to a data.frame
2 | list2DF <- function(list){
3 | 	l <- as.vector(unlist(lapply(list, length)))
4 | 	if(length(unique(l))>1) stop("list elements not some length. Cannot transform to data.frame")
5 | 	df <- as.data.frame(matrix(as.vector(unlist(list)), ncol=l[1]))
6 | 	return(df)
7 | }


--------------------------------------------------------------------------------
/write.GOhyper.r:
--------------------------------------------------------------------------------
 1 | #
 2 | # write.GOhyper.r
 3 | #
 4 | # Created by David Ruau on 2012-01-23.  
 5 | # Dept. of Pediatrics/Div. Systems Medicine, Stanford University.
 6 | # 
 7 | #
 8 | ##################### USAGE #########################
 9 | # 
10 | # write.GOhyper(mfhyper, filename="results.xlsx")
11 | # 
12 | #####################################################
13 | 
14 | write.GOhyper <- function(mfhyper, filename='GO_results.xlsx') {
15 |   require(GOstats)
16 |   require(multtest)
17 |   require(xlsx)
18 |   
19 |   gogo <- summary(mfhyper)
20 |   gogo$adjPvalue <- mt.rawp2adjp(gogo$Pvalue, proc="BH")$adjp[,2]
21 |   gogo <- gogo[,c(1:2,8,3:7)]
22 | 	gogo <- gogo[order(gogo$OddsRatio),]
23 |   write.xlsx(gogo, file=filename)
24 |   print(paste('Results written in', filename))
25 |   return(gogo)
26 | }
27 | 


--------------------------------------------------------------------------------
/source_https.r:
--------------------------------------------------------------------------------
 1 | #
 2 | # source_https.r
 3 | #
 4 | # 2012-02-14.  
 5 | # Adapted from from http://tonybreyal.wordpress.com/2011/11/24/source_https-sourcing-an-r-script-from-github/
 6 | #
 7 | ##################### USAGE #########################
 8 | # 
 9 | # source_https("https://github.com/bobthecat/codebox/blob/master/GO_over.r")
10 | # 
11 | #####################################################
12 | 
13 | source_https <- function(url, ...) {
14 |   # load package
15 |   require(RCurl)
16 |   # using the raw option
17 |   if(length(grep('https://github.com', url))==1) stop("github RAW source required")
18 |   # parse and evaluate each .R script
19 |   sapply(c(url, ...), function(u) {
20 |     eval(parse(text = getURL(u, followlocation = TRUE, cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl"))), envir = .GlobalEnv)
21 |   })
22 | }
23 | 


--------------------------------------------------------------------------------
/plot_bar.r:
--------------------------------------------------------------------------------
 1 | #
 2 | # plot_bar.r
 3 | #
 4 | # Created by David Ruau on 2011-03-29.  
 5 | # Department of Pediatrics/Div. System Medicine Stanford University.
 6 | # 
 7 | ##################### USAGE #########################
 8 | # source('sex_diff_pain.r')
 9 | # sex.pub <- sex_diff_pain(search.str = 'Sex+Characteristics[mh] AND Pain[mh]', year.span=1970:2011)
10 | # par(las=1)
11 | # plot_bar(x=sex.pub[1:41], linecol='royalblue', cols="Reds", myTitle='Number of publication per year for\n"sex differences" and "pain"')
12 | # 
13 | #####################################################
14 | 
15 | plot_bar <- function(x=sex.pub, linecol="royalblue", cols, addArg=TRUE) {
16 |   require("RColorBrewer")
17 |   # colorfunction = colorRampPalette(brewer.pal(9, cols))
18 |   # mycolors = colorfunction(length(x))
19 |   ## This draw the bar plot and save it in an object...
20 |   # bp <- barplot(x, col=mycolors)
21 |   bp <- barplot(x, col=cols, add=addArg)
22 |   fit <- stats::lowess(x, f=1/3)
23 |   lines(x=bp, fit$y, col=linecol, lwd=3)  
24 | }
25 | 


--------------------------------------------------------------------------------
/pubmed_trend.pl:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/perl -w
 2 | #
 3 | # sex_diff_pain.pl
 4 | #
 5 | # Created by David Ruau on 2011-02-17.  
 6 | # Department of Pediatrics/Div. System Medicine Stanford University.
 7 | # 
 8 | ##################### USAGE #########################
 9 | # 
10 | # Query PubMed with Eutils tools
11 | # 
12 | #####################################################
13 | use Bio::TGen::EUtils;
14 | 
15 | use strict;
16 | 
17 | my $queryString = $ARGV[0];
18 | 
19 | ## query info
20 | my $eu = Bio::TGen::EUtils->new( 'tool'  => 'pubmed_trend.pl',
21 |                                  'email' => 'REPLACE_ME@gmail.com' );
22 | 
23 | ## EFetch
24 | my $query = $eu->esearch( db   => 'pubmed',
25 |                           term => $queryString,
26 |                           usehistory => 'n' );
27 | 
28 | $query->write_raw( file => 'tempfile.xml' );
29 | 
30 | if (-z 'tempfile.xml') {
31 |   # one more time
32 |   my $query = $eu->esearch( db   => 'pubmed',
33 |                             term => $queryString,
34 |                             usehistory => 'n' );
35 | 
36 |   $query->write_raw( file => 'tempfile.xml' );
37 |   if (-z 'tempfile.xml') {
38 |     open (FILE, '>', 'tempfile.xml') or die 'Could not open file, $!';
39 |     
40 |     print FILE "<begin>hello world</begin>";
41 |     close (FILE);
42 |   }
43 | }


--------------------------------------------------------------------------------
/get.ppiNCBI.r:
--------------------------------------------------------------------------------
 1 | #
 2 | # get.ppiNCBI.r
 3 | #
 4 | # Created by David Ruau on 2012-06-05.
 5 | # Dept. of Pediatrics/Div. Systems Medicine, Stanford University.
 6 | # 
 7 | #
 8 | ##################### USAGE #########################
 9 | # see http://brainchronicle.blogspot.com/2012/05/obtain-protein-protein-interaction-from.html
10 | # ppi <- get.ppiNCBI(c("777", "57619"))
11 | # 
12 | #####################################################
13 | 
14 | get.ppiNCBI <- function(g.n) {
15 |   require(XML)
16 |   ppi <- data.frame()
17 |   for(i in 1:length(g.n)){
18 |     o <- htmlParse(paste("http://www.ncbi.nlm.nih.gov/gene/", g.n[i], sep=''))
19 |     # check if interaction table exists
20 |     exist <- length(getNodeSet(o, "//table//th[@id='inter-prod']"))>0
21 |     if(exist){
22 |       p <- getNodeSet(o, "//table")
23 |       ## need to know which table is the good one
24 |       for(j in 1:length(p)){
25 |         int <- readHTMLTable(p[[j]])
26 |         if(colnames(int)[2]=="Interactant"){break}
27 |       }
28 |       ppi <- rbind(ppi, data.frame(egID=g.n[i], intSymbol=int$`Other Gene`))
29 |     }
30 |     # play nice! and avoid being kicked out from NCBI servers
31 |     Sys.sleep(1)
32 |   }
33 |   if(dim(ppi)[1]>0){
34 |     ppi <- unique(ppi)
35 |     print(paste(dim(ppi)[1], "interactions found"))
36 |     return(ppi)
37 |   } else{
38 |     print("No interaction found")
39 |   }
40 | }
41 | 


--------------------------------------------------------------------------------
/pubmed_trend.r:
--------------------------------------------------------------------------------
 1 | #
 2 | # sex_diff_pain.r
 3 | #
 4 | # Created by David Ruau on 2011-02-17.  
 5 | # Department of Pediatrics/Div. System Medicine Stanford University.
 6 | # 
 7 | ##################### USAGE #########################
 8 | # 
 9 | # source('pubmed_trend.r')
10 | # sex.pub <- pubmed_trend(search.str = 'Sex+Characteristics[mh] AND Pain[mh]', year.span=1970:2011)
11 | # 
12 | #####################################################
13 | 
14 | pubmed_trend <- function(search.str = 'Sex+Characteristics[mh] AND Pain[mh]', year.span=1970:2011) {
15 |   require(XML)
16 |   require(RCurl)
17 |   
18 |   results <- NULL
19 |   tmpf <- "./tempfile.xml"
20 |   ## clean before
21 |   system(paste("rm", tmpf))
22 | 
23 |   for(i in year.span){
24 |     queryString <- paste(search.str, ' AND ', i, '[dp]', sep="")
25 |     print(paste('queryString:', queryString))
26 |     sysString <- paste('./pubmed_trend.pl "', queryString,'"', sep="")
27 |     system(sysString)
28 | 
29 |     xml <- xmlTreeParse(tmpf, useInternalNodes=TRUE)
30 |     pubTerm <- as.numeric(xmlValue(getNodeSet(xml, "//Count")[[1]]))
31 |     print(paste("#______num pub for",i,":",pubTerm))
32 |     rm(xml)
33 |     results <- append(results, pubTerm)
34 |     ## avoid being kicked out!
35 |     Sys.sleep(1)
36 |   }
37 |   names(results) <- year.span
38 |   ## clean after
39 |   system(paste("rm", tmpf))
40 |   
41 |   return(results)
42 | }


--------------------------------------------------------------------------------
/GO_over.r:
--------------------------------------------------------------------------------
 1 | #
 2 | # GO_over.r
 3 | #
 4 | # Created by David Ruau on 2012-01-23.
 5 | # Dept. of Pediatrics/Div. Systems Medicine, Stanford University.
 6 | # 
 7 | #
 8 | ##################### USAGE #########################
 9 | # 
10 | # mfhyper <- GO_over(entrezUniverse, glist=glist, annot='org.Hs.eg.db')
11 | # geneMappedCount(mfhyper)
12 | # htmlReport(mfhyper, file="results.html", label="ceci n'est pas une pipe")
13 | # 
14 | #####################################################
15 | 
16 | GO_over <- function(universe, glist, annot='HsAgilentDesign026652.db', ontology='BP', cutoff=0.001) {
17 |   require(GOstats)
18 |   universe <- unique(as.character(universe))
19 |   glist <- unique(as.character(glist))
20 |   
21 |   # conditional hypergeometric test
22 |   # "uses the structure of the GO graph to estimate for each term whether or
23 |   # not there is evidence beyond that which is provided by the term’s children
24 |   # to call the term in question statistically overrepresented." - GOstatsHyperG vignette
25 |   params <-  new("GOHyperGParams", 
26 |   geneIds=glist,
27 |   universeGeneIds=universe, # this is a gene list of all the genes tested on the microarray
28 |   annotation=annot, # annotation pacakge for the microarray in question
29 |   ontology=ontology, # either BP, CC, or MF
30 |   pvalueCutoff=cutoff, # for uncorrected pvalues
31 |   conditional=TRUE, # Use conditional algorithms or standard Hypergeometric test
32 |   testDirection="over")
33 |   
34 |   mfhyper = hyperGTest(params)
35 |   return(mfhyper)
36 | }
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/population.plot.r:
--------------------------------------------------------------------------------
 1 | #
 2 | # population.plot.r
 3 | #
 4 | # Created by David Ruau on 2011-07-09.  
 5 | # 2011 Dept. of Pediatrics/Div. Systems Medicine
 6 | # Stanford University.
 7 | # 
 8 | #
 9 | ##################### USAGE #########################
10 | # 
11 | # m <- c(2300, 2500, 2400, 2000, 1900, 1800, 1000)
12 | # f <- c(2400, 2200, 2500, 3000, 2200, 2000, 1500)
13 | # population.plot(m, f, labels=c('0-18', '19-25', '26-40', '40-50', '51-65', '66-77', '78-99'))
14 | # 
15 | #####################################################
16 | 
17 | population.plot <- function(male.count, female.count, sub.title="count", labels=NULL, mycex.axis=1) {
18 |   xmax <- max(c(male.count, female.count))
19 |   op <- par()
20 |   par(mfrow=c(1,2), mar=c(4,2,4,2))
21 |   
22 |   barplot(-male.count, axes=F, axisnames=F, space=0, xlim=c(-xmax, 0), horiz=T)
23 |   mtext('Male', side=3, at=0, line=1, cex=1.5, adj=1)
24 |   mtext(sub.title, side=1, line=2, at=-xmax/4, adj=1)
25 |   grid(nx=NULL, ny=NA, col='white', lty="solid")
26 |   # axis(side=1, at=c(0,-1000, -2000, -3000), labels=c(0,1000, 2000, 3000))
27 |   axis(side=1, cex.axis=mycex.axis)
28 |   if(!is.null(labels)){
29 |     axis(side=4, at=seq(0.5, length(labels)-0.5, 1), labels=labels ,las=1, tick=F, adj=0.5)
30 |   }
31 |   else{stop("Labels Y-axis needed")}
32 |   
33 |   # par(mar=c(4,2,4,2))
34 |   barplot(female.count, axes=F, axisnames=F, space=0, xlim=c(0,xmax), horiz=T)
35 |   mtext('Female', side=3, at=0, line=1, cex=1.5, adj = 0)
36 |   mtext(sub.title, side=1, line=2, at=xmax/4, adj=0)
37 |   grid(nx=NULL, ny=NA, col='white', lty="solid")
38 |   axis(side=1, cex.axis=mycex.axis)
39 |   # par reset
40 |   par(op)
41 | }


--------------------------------------------------------------------------------
/SNPSpD.r:
--------------------------------------------------------------------------------
 1 | ## Method for computing the effective number of variable (SNPs) based on the 
 2 | # spectral decomposition (SpD) of matrices of pairwise LD between SNPs
 3 | # See Nyholt et al. 2004 (PMID:14997420)
 4 | library(snpMatrix)
 5 | library(NCBI2R)
 6 | library(annotate)
 7 | 
 8 | # First you need to download the chromosome file from hapmap
 9 | # To know the chromosome use:
10 | GetSNPInfo("rs12345")$chr
11 |  
12 | chrURL <- "ftp://ftp.ncbi.nlm.nih.gov/hapmap/genotypes/2010-08_phaseII+III/forward/genotypes_chr8_CEU_r28_nr.b36_fwd.txt.gz"
13 | chr <- read.HapMap.data(chrURL)
14 | 
15 | # I am usually interested in only a subset of the SNPs. Here subSNP is a vector
16 | # of rsxxxx SNP IDs
17 | # Suppress suppress the following line if you want to do it for all the SNPs on
18 | # the chromosome
19 | chr$snp.data@.Data <- chr$snp.data@.Data[,subSNP]
20 | ldinfo <- ld.snp(chr$snp.data, depth=dim(chr$snp.data)[2])
21 | plot(ldinfo, filename='ld_plot.eps')
22 | 
23 | # matrix massage to make it square
24 | ldinfo$rsq2 <- ldinfo$rsq2[,dim(ldinfo$rsq2)[2]:1]
25 | ldinfo$rsq2 <- cbind(0, ldinfo$rsq2)
26 | ldinfo$rsq2 <- rbind(ldinfo$rsq2, 0)
27 | ldinfo$rsq2 <- t(ldinfo$rsq2)
28 | # fill diagonal with 1
29 | for(i in 1:dim(ldinfo$rsq2)[1]){
30 |   ldinfo$rsq2[i,i]=1
31 | }
32 | 
33 | # EIGEN values extraction
34 | e <- eigen(ldinfo$rsq2, symmetric=TRUE, only.values=TRUE)
35 | 
36 | # The effective number of variable (Meff)
37 | Meff <- function(lambda){
38 |   return(1 + (length(lambda) - 1)*(1 - (var(lambda)/length(lambda))))
39 | }
40 | 
41 | Meff(e$values)
42 | 
43 | ## final notes
44 | # reduction are not huge so don't be surprise
45 | # a few example 131 SNP were reduced to 122.4
46 | # 16 -> 11.3;
47 | # 51-> 38.7...
48 | 


--------------------------------------------------------------------------------
/slopegraph.r:
--------------------------------------------------------------------------------
 1 | #
 2 | # slopegraph.r
 3 | #
 4 | # Created by David Ruau on 2011-07-18.  
 5 | # 2011 Dept. of Pediatrics/Div. Systems Medicine
 6 | # Stanford University.
 7 | # 
 8 | #
 9 | ##################### USAGE #########################
10 | # data: data.frame in the same shape as the slopegraph is wanted
11 | # label.cex: magnification for numeric line labels from 0 to 1
12 | # axis.cex: magnificatoin for axis titles from 0 to 1
13 | # digits: number of significant digits to report
14 | # rounding.method: can be NULL, round or signif
15 | # ...: supplementary arguments supplied to par, usually margins
16 | #
17 | # EXAMPLE:
18 | # source('slopegraph.r')
19 | # pdf('slopegraph.pdf', height=7, width=8)
20 | # slopegraph(data = t(WorldPhones[,1:3]), mymain = "YEARS", mar=c(2, 5, 5, 5), label.cex=0.8, axis.cex=0.9)
21 | # dev.off()
22 | #
23 | # Tips: when values overlap try first to extend the height of you plot and if this does not work
24 | # round your value using the option rounding.method = 'round' and digits=0
25 | # 
26 | #####################################################
27 | 
28 | slopegraph <- function(data, label.cex=0.8, axis.cex=0.9, digits = 2, rounding.method = NULL, mymain = "slopegraph", ...) {
29 |   require(plotrix)
30 |   if(!is.data.frame(data)){
31 |     data <- as.data.frame(data)
32 |   }
33 |   
34 |   if(!is.null(rounding.method)){
35 |     data.temp <- .rd.method(rounding.method, width, digits)
36 |     data.temp <- as.numeric(sprintf(fmt, as.matrix(data)))
37 |     data <- as.data.frame(matrix(data.temp, nrow=nrow(data), ncol=ncol(data), dimnames=list(rownames(data), colnames(data))))
38 |   }
39 |   
40 |   old.par <- par(no.readonly = TRUE)
41 |   par(...)
42 |   matplot(t(data), type='b', pch=NA, axes=FALSE, xlab='', ylab='', lty='solid', col="grey", ...)
43 |   for(i in 1:ncol(data)){
44 |     for(j in 1:nrow(data)){
45 |       boxed.labels(i, data[j,i], labels=data[j,i], bg='white', border = FALSE, cex=label.cex)
46 |     }
47 |   }
48 |   mtext(text = rownames(data), side = 2, at=data[,1], line = 0.5, las=1, cex=axis.cex)
49 |   mtext(text = colnames(data), side = 3, at=1:ncol(data), line = 1, cex=axis.cex)
50 |   mtext(text = rownames(data), side = 4, at=data[,ncol(data)], line = 0.5, las=1, cex=axis.cex)
51 |   title(main = mymain, line=3)
52 |   par(old.par)
53 | }
54 | 
55 | 
56 | .rd.method <- function(rounding.method, width, digits){
57 |   rounding.character <- switch(match(rounding.method, c("round", "signif")), "f", "g")
58 |   fmt = paste("%.", digits, rounding.character, sep = "")
59 |   return(fmt)
60 | }
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/table.graph.r:
--------------------------------------------------------------------------------
 1 | #
 2 | # table.graph.r
 3 | #
 4 | # Created by David Ruau on 2011-06-23.  
 5 | # Department of Pediatrics/Systems Medicine,
 6 | # Stanford University.
 7 | # 
 8 | #
 9 | ##################### USAGE #########################
10 | # Tufte table-graphic or slopegraph shown in the Tufte book for paired numeric data
11 | # "the visual display of quantitative information" p. 158
12 | #
13 | # Column names and row names will be used to label the plot
14 | # Depending on the length of your rownames the margins might have to be adjusted
15 | #
16 | # df: data frame with 2 column
17 | # line.col: vector length 2 with colors for the lines. Default to grey for
18 | #   value going up and black for value going down
19 | # label.cex: magnification for x and y labels from 0 to 1
20 | # title.cex: magnificatoin for titles from 0 to 1
21 | # digits: number of significant digits to report
22 | # ...: supplementary arguments supplied to par, usually margins
23 | # 
24 | # EXAMPLE:
25 | # source("table.graph.r")
26 | # table.graph(WorldPhones[,1:2], label.cex=0.7, title.cex=1.2, mar=c(5, 5, 1, 5))
27 | # 
28 | #####################################################
29 | 
30 | table.graph <- function(df, line.col=c("grey", "black"), label.cex=1, title.cex=1, width = 6, digits = 2, rounding.method = NULL, ...) {
31 |   xmin <- min(df)
32 |   xmax <- max(df)
33 |   X1 <- as.numeric(as.vector(df[,1]))
34 |   X2 <- as.numeric(as.vector(df[,2]))
35 |   # original settings
36 |   old.par <- par(no.readonly = TRUE)
37 |   # par settings usually margins
38 |   par(...)
39 |   # rounding
40 |   fmt <- .rd.method(rounding.method, width, digits)
41 |   # left
42 |   plot(rep(0, nrow(df)), X1, xlim=c(0,1), ylim=c(xmin, xmax), 
43 |     axes=FALSE, xlab='', ylab='', type='n')
44 |   mtext(text=paste(rownames(df), sprintf(fmt, X1), sep='  '), side=2, at=X1, las=1, cex=label.cex)
45 |   par(new=TRUE)
46 |   # right
47 |   plot(rep(1, nrow(df)), X2, xlim=c(0,1), ylim=c(xmin, xmax), 
48 |     axes=FALSE, xlab='', ylab='', type='n')
49 |   mtext(text=paste(sprintf(fmt, X2), rownames(df), sep='  '), side=4, at=X2, las=1, cex=label.cex)
50 |   # class label
51 |   mtext(colnames(df)[1], side=3, at=0, cex=title.cex)
52 |   mtext(colnames(df)[2], side=3, at=1, cex=title.cex)
53 |   # lines
54 |   segments(x0 = rep(0, nrow(df)), y0 = X1, x1 = rep(1, nrow(df)), y1 = X2,
55 |    col=ifelse({X1 - X2} < 0, line.col[1], line.col[2]))
56 |   # restore original settings
57 |   par(old.par)
58 | }
59 | 
60 | .rd.method <- function(rounding.method, width, digits){
61 |   if(is.null(rounding.method)){
62 |     fmt = "%s"
63 |   } 
64 |   else{
65 |     rounding.character <- switch(match(rounding.method, c("round", "signif")), "f", "g")
66 |     fmt = paste("%", width, ".", digits, rounding.character, sep = "")
67 |   }
68 |   return(fmt)
69 | }
70 | 


--------------------------------------------------------------------------------
/Affy_QA.r:
--------------------------------------------------------------------------------
 1 | #
 2 | # Affy_QA.r
 3 | #
 4 | # Created by David Ruau on 2011-07-26.  
 5 | # 2011 Dept. of Pediatrics/Div. Systems Medicine, Stanford University.
 6 | # 
 7 | #
 8 | ##################### USAGE #########################
 9 | # R function to perform automatically all the quality control test describe
10 | # in the book "Bioinformatics and Computational Biology Solutions Using R and Bioconductor"
11 | # from Gentleman, Carey, Huber, Irizarry and Dudoit; Springer.
12 | # argument: an affybatch
13 | # 
14 | # EXAMPLE:
15 | # library(affydata)
16 | # data(Dilution)
17 | # Affy_QA(Dilution)
18 | # 
19 | #####################################################
20 | 
21 | Affy_QA <- function(abatch) {
22 |   require(affy)
23 |   require(simpleaffy)
24 |   require(RColorBrewer)
25 |   require(affyPLM)
26 | 
27 |   if (class(abatch)!= 'AffyBatch') {
28 |     stop("argument must be AffyBatch!")
29 |   }
30 | 
31 |   # colors
32 |   cols <- brewer.pal(12, "Set3")
33 | 
34 |   # Boxplot
35 |   pdf(file='qa_boxplot.pdf', height=8, width=10)
36 |   boxplot(abatch, col=cols, main="Unprocessed log scale probe-level data", xlab="If discrepancy, they are not conclusive\n Difference can be reduce by normalization")
37 |   dev.off()
38 | 
39 |   # Histogram
40 |   pdf(file='qa_histogram.pdf', height=8, width=8)
41 |   hist(abatch, col=cols, lwd=2, xlab="Log(base2) intensities; Bimodal distribution indicate spatial artifact\n Second mode is the result of array(s) having abnormally high value")
42 |   legend("topright", sampleNames(abatch), lty=1, lwd=2,col=cols)
43 |   dev.off()
44 | 
45 |   #RNA degradation
46 |   pdf(file="qa_RNAdeg.pdf", height=8, width=8)
47 |   RNAdeg <- AffyRNAdeg(abatch)
48 |   plotAffyRNAdeg(RNAdeg, cols=cols)
49 |   legend("topleft", sampleNames(abatch), lty=1, lwd=2, col=cols)
50 |   box()
51 |   dev.off()
52 | 
53 |   # simpleaffy graph
54 |   abatch.qc <- qc(abatch)
55 |   pdf(file="qa_QC-simpleaffy.pdf", height=8, width=10)
56 |   plot(abatch.qc)
57 |   dev.off()
58 | 
59 |   pset <- fitPLM(abatch)
60 | 
61 |   # false color image control
62 |   for (n in 1:length(abatch)) {
63 |     filename <- paste("qa_QC",as.vector(sampleNames(abatch))[n],".png")
64 |     png(file=filename, height=900, width=800)
65 |     .img.Test(abatch,pset,n)
66 |     dev.off()
67 |   }
68 | 
69 |   # RLE plot
70 |   pdf(file="qa_RLE.pdf", height=8, width= 8)
71 |   Mbox(pset, col = cols, main ="RLE (Relative Log Expression)", xlab="Assuming that the majority of the gene are not changing\n Ideally these boxes would have small spread and be centered at M=0")
72 |   dev.off()
73 | 
74 |   # NUSE plot
75 |   pdf(file="qa_NUSE.pdf", height=8, width= 8)
76 |   boxplot(pset, col=cols, main= "NUSE (Normalized Unscaled Standard Error)", xlab="High values of median NUSE are indicative of a problematic array")
77 |   dev.off()
78 | }
79 | 
80 | .img.Test <- function(batch,pset,x) {
81 | par(mfrow = c(2,2))
82 | affy::image(batch[,x])
83 | affy::image(pset, type = "weights", which = x)
84 | affy::image(pset, type = "resids", which = x)
85 | affy::image(pset, type = "sign.resids", which = x)
86 | }


--------------------------------------------------------------------------------
/hapmap.LD.r:
--------------------------------------------------------------------------------
 1 | #
 2 | # hapmap.LD.r
 3 | #
 4 | # Created by David Ruau on 2011-04-23.  
 5 | # Department of Pediatrics/Div. Systems Medicine
 6 | # Stanford University.
 7 | # 
 8 | #
 9 | ##################### USAGE #########################
10 | # Input:
11 | # geneName: Entrez gene ID [if provided subSNP is ignored]
12 | # subSNP: [required if geneID is NULL] character vector; the rs SNP ID you want to 
13 | #   plot otherwise try to plot the entire chromosome 
14 | #   LD map (will fail)
15 | # chr: integer; chromosome number
16 | # hapmap.file: file location of HapMap chromosome file 
17 | #   downloaded using readHapMap.data from snpMatrix 
18 | #   package
19 | # ...: further argument for ld.snp function from snpMatrix
20 | #
21 | # USAGE
22 | # > library(org.Hs.eg.db)
23 | # > (id <- as.vector(unlist(mget("POU5F1", org.Hs.egSYMBOL2EG))))
24 | # > hapmap.LD(geneID=id)
25 | # 
26 | # output the heatmap into a EPS file
27 | # 
28 | #####################################################
29 | 
30 | hapmap.LD <- function(geneID=NULL, subSNP=NULL, chr=NULL, hapmap.file=NULL, ...) {
31 |   require(snpMatrix)
32 |   require(annotate)
33 |   require(org.Hs.eg.db)
34 |   require(foreach)
35 |   require(doMC)
36 |   require(multicore)
37 |   ncore = multicore:::detectCores()
38 |   registerDoMC(cores = ncore)
39 |   
40 |   if(is.null(subSNP) && is.null(geneID)){
41 |     stop("subSNP and geneID are NULL. Selection of SNP to plot is required.\nAn entire chromosome cannot be plotted")
42 |   }
43 |   
44 |   if(!is.null(geneID)){
45 |     start <- mget(geneID, env=org.Hs.egCHRLOC)
46 |     end <- mget(geneID, env=org.Hs.egCHRLOCEND)
47 |     chr <- mget(geneID, env=org.Hs.egCHR)
48 |     subSNP <- NULL
49 |   }
50 |   
51 |   print('Getting LD info from HapMap')
52 |   if(is.null(hapmap.file)){
53 |     if(is.null(chr)){
54 |       stop("Chromosome number has to be provided if no HapMap file is given")
55 |     }
56 |     hapmap.file <- paste("ftp://ftp.ncbi.nlm.nih.gov/hapmap/genotypes/2010-08_phaseII+III/forward/genotypes_chr",chr,"_CEU_r28_nr.b36_fwd.txt.gz", sep='')
57 |   }
58 |   else{
59 |     hapmap.file <- paste("file://", hapmap.file, sep="")
60 |     hapmap <- read.HapMap.data(hapmap.file)
61 |   }
62 |   
63 |   
64 |   if(is.null(subSNP)){
65 |     xx <- hapmap$snp.support[as.numeric(as.vector(hapmap$snp.support$Position))>= start,]
66 |     xx <- xx[as.numeric(as.vector(xx$Position)) <= end,]
67 |     subSNP <- rownames(xx)
68 |     idx.cols <- which(colnames(hapmap$snp.data@.Data) %in% subSNP)
69 |     hapmap$snp.data@.Data <- hapmap$snp.data@.Data[,idx.cols]
70 |   }
71 |   else{
72 |     idx.cols <- which(colnames(hapmap$snp.data@.Data) %in% subSNP)
73 |     hapmap$snp.data@.Data <- hapmap$snp.data@.Data[,idx.cols]
74 |   }
75 |   
76 |   # remove grey lines SNP with no info for the selected region
77 |   idx <- foreach(i = 1:ncol(hapmap$snp.data@.Data), .combine=c) %dopar% {
78 |     if(length(unique(as.numeric(hapmap$snp.data@.Data[,i]))) <= 2){
79 |       i
80 |     }
81 |   }
82 |   
83 |   if(length(idx)>0){
84 |     hapmap$snp.data@.Data <- hapmap$snp.data@.Data[,-idx]
85 |   }
86 |   print('a')
87 |   ldinfo <- ld.snp(hapmap$snp.data, depth=dim(hapmap$snp.data)[2], ...)
88 |   print('b')
89 |   fName <- paste('ld_plot',".eps", sep='')
90 |   plot.snp.dprime(ldinfo, start=a, end=b, filename=fName)
91 | }


--------------------------------------------------------------------------------