├── .Rprofile
├── .gitignore
├── Makefile
├── R
    ├── README.md
    ├── nullalthist.R
    ├── plot_FDReg_hist.R
    └── set_plot_colors.R
├── README.md
├── analysis
    ├── CompareEEvsETmodel.Rmd
    ├── CompareEEvsETmodel.html
    ├── IPvsEM.Rmd
    ├── IPvsEM.html
    ├── Makefile
    ├── adaptive_shrinkage.Rmd
    ├── adaptive_shrinkage.html
    ├── ash_npmle.Rmd
    ├── ash_npmle.html
    ├── checkIP.Rmd
    ├── checkIP.html
    ├── check_mixfdr_lownoise.Rmd
    ├── check_mixfdr_lownoise.html
    ├── chunk-options.R
    ├── efron.fcr.Rmd
    ├── efron.fcr.html
    ├── estimate_mode.Rmd
    ├── estimate_mode.html
    ├── figure
    │   ├── IPvsEM.Rmd
    │   │   ├── unnamed-chunk-2-1.png
    │   │   ├── unnamed-chunk-3-1.png
    │   │   └── unnamed-chunk-4-1.png
    │   ├── adaptive_shrinkage.Rmd
    │   │   ├── plotting lfsr-1.png
    │   │   ├── plotting-1.png
    │   │   ├── run-ash-ET-1.png
    │   │   ├── run-ash-ET-2.png
    │   │   ├── sehist-1.png
    │   │   ├── unnamed-chunk-1-1.png
    │   │   └── volcano-1.png
    │   ├── ash_npmle.Rmd
    │   │   ├── unnamed-chunk-1-1.png
    │   │   ├── unnamed-chunk-1-2.png
    │   │   └── unnamed-chunk-2-1.png
    │   ├── checkIP.Rmd
    │   │   ├── unnamed-chunk-1-1.png
    │   │   └── unnamed-chunk-2-1.png
    │   ├── check_mixfdr_lownoise.Rmd
    │   │   ├── unnamed-chunk-1-1.png
    │   │   └── unnamed-chunk-2-1.png
    │   ├── efron.fcr.Rmd
    │   │   ├── unnamed-chunk-2-1.png
    │   │   ├── unnamed-chunk-3-1.png
    │   │   ├── unnamed-chunk-3-2.png
    │   │   ├── unnamed-chunk-4-1.png
    │   │   ├── unnamed-chunk-4-2.png
    │   │   ├── unnamed-chunk-4-3.png
    │   │   ├── unnamed-chunk-5-1.png
    │   │   ├── unnamed-chunk-5-2.png
    │   │   ├── unnamed-chunk-5-3.png
    │   │   ├── unnamed-chunk-6-1.png
    │   │   ├── unnamed-chunk-7-1.png
    │   │   └── unnamed-chunk-8-1.png
    │   ├── investigate_hu_badcoverage.Rmd
    │   │   ├── unnamed-chunk-4-1.png
    │   │   ├── unnamed-chunk-4-2.png
    │   │   ├── unnamed-chunk-6-1.png
    │   │   └── unnamed-chunk-6-2.png
    │   ├── make_GOODPOOR_figs.Rmd
    │   │   ├── GOODPOOReg_hist-1.pdf
    │   │   ├── GOODPOOReg_scatter-1.pdf
    │   │   ├── lfsr_vs_pval_GOODPOOR-1.pdf
    │   │   ├── lfsr_vs_pval_GOODPOOR-2.pdf
    │   │   ├── lfsr_vs_pval_GOODPOOR_single-1.pdf
    │   │   ├── lfsr_vs_pval_GOODPOOR_single-2.pdf
    │   │   ├── lfsr_vs_pval_GOODPOOR_single-3.pdf
    │   │   ├── lfsr_vs_pval_GOODPOOR_single-4.pdf
    │   │   ├── roc-curve-1.pdf
    │   │   ├── tp_vs_fp-1.pdf
    │   │   └── unnamed-chunk-3-1.png
    │   ├── makefig_FDReg.Rmd
    │   │   ├── decomp_ZA-1.pdf
    │   │   ├── decomp_ZA_poster-1.pdf
    │   │   ├── unnamed-chunk-1-1.png
    │   │   └── unnamed-chunk-2-1.png
    │   ├── plot_cdf_eg.Rmd
    │   │   ├── egcdf-1.pdf
    │   │   ├── egcdf-reduce-1.pdf
    │   │   ├── egcdf-reduce-with-npmle-1.pdf
    │   │   ├── egcdf-reduce-with-npmle-2.pdf
    │   │   ├── mean_cdf-1.pdf
    │   │   ├── mean_cdf-reduce-1.pdf
    │   │   ├── mean_cdf_nopen-1.pdf
    │   │   └── mean_cdf_nopen-reduce-1.pdf
    │   ├── plot_egdens.Rmd
    │   │   └── scenario_density-1.pdf
    │   ├── plot_lfsr.Rmd
    │   │   ├── plot_lfdr-1.pdf
    │   │   ├── plot_lfdr-1.png
    │   │   ├── plot_lfsr-1.pdf
    │   │   ├── plot_lfsr-1.png
    │   │   ├── plot_lfsr_s-1.pdf
    │   │   ├── plot_lfsr_s-1.png
    │   │   ├── plot_lfsr_s_nn-1.pdf
    │   │   └── plot_lfsr_s_nn-1.png
    │   ├── plot_pi0est.Rmd
    │   │   └── plot_pi0est-1.pdf
    │   ├── referee.response.Rmd
    │   │   ├── unnamed-chunk-1-1.png
    │   │   └── unnamed-chunk-1-2.png
    │   ├── referee_uaza.Rmd
    │   │   ├── unnamed-chunk-2-1.png
    │   │   ├── unnamed-chunk-3-1.png
    │   │   ├── unnamed-chunk-4-1.png
    │   │   └── unnamed-chunk-5-1.png
    │   └── summarize_dsc_znull.Rmd
    │   │   ├── unnamed-chunk-2-1.png
    │   │   ├── unnamed-chunk-2-2.png
    │   │   └── unnamed-chunk-2-3.png
    ├── index.Rmd
    ├── index.html
    ├── investigate_hu_badcoverage.Rmd
    ├── investigate_hu_badcoverage.html
    ├── make_GOODPOOR_figs.Rmd
    ├── make_GOODPOOR_figs.html
    ├── makefig_FDReg.Rmd
    ├── makefig_FDReg.html
    ├── metaplot_examples.Rmd
    ├── metaplot_examples.html
    ├── plot_cdf_eg.Rmd
    ├── plot_cdf_eg.html
    ├── plot_egdens.Rmd
    ├── plot_egdens.html
    ├── plot_egdens2.Rmd
    ├── plot_egdens2.html
    ├── plot_lfsr.Rmd
    ├── plot_lfsr.html
    ├── plot_pi0est.Rmd
    ├── plot_pi0est.html
    ├── referee.response.Rmd
    ├── referee.response.html
    ├── referee_uaza.Rmd
    ├── referee_uaza.html
    ├── summarize_coverage.Rmd
    ├── summarize_coverage.html
    ├── summarize_dsc_znull.Rmd
    ├── summarize_dsc_znull.html
    ├── table
    │   ├── README.md
    │   ├── coverage_all.tex
    │   ├── coverage_all_nopen.tex
    │   ├── coverage_neg.tex
    │   ├── coverage_neg_nopen.tex
    │   ├── coverage_pos.tex
    │   ├── coverage_pos_nopen.tex
    │   ├── scoverage_all.tex
    │   ├── scoverage_all_nopen.tex
    │   ├── scoverage_neg.tex
    │   ├── scoverage_neg_nopen.tex
    │   ├── scoverage_pos.tex
    │   └── scoverage_pos_nopen.tex
    ├── template.Rmd
    └── template.html
├── ash-packrat.Rproj
├── code
    ├── Makefile
    ├── README.md
    ├── dsc-opt
    │   ├── methods
    │   │   └── ash.multiopt.wrapper.R
    │   └── run_dsc_opt.R
    └── dsc-shrink
    │   ├── add_methods.R
    │   ├── add_methods.null.R
    │   ├── add_named_scenarios.R
    │   ├── add_scenarios.null.R
    │   ├── datamakers
    │       ├── datamaker.R
    │       ├── null_t_datamaker.R
    │       └── null_z_datamaker.R
    │   ├── methods
    │       ├── ash.wrapper.R
    │       ├── locfdr.wrapper.R
    │       ├── mixfdr.wrapper.R
    │       └── qvalue.wrapper.R
    │   ├── run_dsc.R
    │   ├── run_dsc_mini.R
    │   ├── run_dsc_znull.R
    │   └── score.R
├── data
    └── README.md
├── output
    └── README.md
├── packrat
    ├── init.R
    ├── packrat.lock
    ├── packrat.opts
    └── src
    │   ├── mixfdr
    │       └── mixfdr_1.0.tar.gz
    │   └── packrat
    │       └── packrat_0.4.4.tar.gz
├── paper
    ├── Makefile
    ├── README.md
    ├── main.bbl
    ├── main.blg
    ├── main.dvi
    ├── main.fff
    ├── main.lof
    ├── main.lot
    ├── main.pdf
    ├── main.tex
    └── main.ttt
└── talks
    └── README.md


/.Rprofile:
--------------------------------------------------------------------------------
1 | #### -- Packrat Autoloader (version 0.4.4) -- ####
2 | source("packrat/init.R")
3 | #### -- End Packrat Autoloader -- ####
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.Rout
 2 | paper/*.aux
 3 | paper/*.log
 4 | output/*
 5 | packrat/lib*/
 6 | figure/*/*.pdf
 7 | *.png
 8 | .Rhistory
 9 | .Rproj.user
10 | packrat/src/
11 | .DS_Store
12 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY:	all clean output analysis paper
 2 | 
 3 | all:	output analysis paper
 4 | 
 5 | clean:	
 6 | 	cd analysis && $(MAKE) clean
 7 | 
 8 | output:	
 9 | 	cd code && $(MAKE)
10 | 
11 | analysis:
12 | 	cd analysis && $(MAKE)
13 | 
14 | paper:
15 | 	cd paper && $(MAKE)
16 | 
17 | 


--------------------------------------------------------------------------------
/R/README.md:
--------------------------------------------------------------------------------
1 | Directory for R scripts that may be used in analysis
2 | 


--------------------------------------------------------------------------------
/R/nullalthist.R:
--------------------------------------------------------------------------------
 1 | #plot a histogram of z scores, highlighting the alternative distribution
 2 | #of z scores that is implied by localfdr values lfdr.
 3 | nullalthist = function(z,lfdr,nullcol="blue",altcol="cyan",ncz=100,...){
 4 |   h=hist(z, freq=FALSE,col=nullcol,nclass=ncz,...)
 5 |   avlfdr = unlist(lapply(split(lfdr,cut(z,h$breaks),drop=FALSE),mean))
 6 |   h$density = (1-avlfdr) * h$density
 7 |   plot(h,add=TRUE,col=altcol,freq=FALSE)
 8 | }
 9 | 
10 | #this one puts the null on the bottom
11 | altnullhist = function(z,lfdr,nullcol="blue",altcol="cyan",ncz=100,...){
12 |   h=hist(z, freq=FALSE,col=altcol,nclass=ncz,...)
13 |   avlfdr = unlist(lapply(split(lfdr,cut(z,h$breaks),drop=FALSE),mean))
14 |   h$density = avlfdr * h$density
15 |   plot(h,add=TRUE,col=nullcol,freq=FALSE)
16 | }
17 | 
18 | plotall_hist=function(sim,iter=1,histfun=nullalthist){
19 |   hh.zscore=sim$zscore[[iter]]    
20 |   par(mfcol=c(2,2))
21 |   histfun(hh.zscore,sim$betahat.fdrtool[[iter]]$lfdr,main="fdrtool")  
22 |   histfun(hh.zscore,sim$betahat.locfdr[[iter]]$fdr,main="locfdr")
23 |   histfun(hh.zscore,sim$betahat.mixfdr[[iter]]$fdr,main="mixfdr")
24 |   histfun(hh.zscore,sim$betahat.ash.n[[iter]]$lfdr,main="ash")
25 |   par(mfcol=c(1,1))
26 | }
27 | 


--------------------------------------------------------------------------------
/R/plot_FDReg_hist.R:
--------------------------------------------------------------------------------
 1 | plot_FDReg_hist = function(hh.pval,pi0,nc=40,nullcol="blue",altcol="cyan",type=4,textsize=1.2,title="Distribution of p values",...){
 2 |   hh.hist=hist(hh.pval,freq=FALSE,xlab="p value",main=title,nclass=nc,col=altcol,...)
 3 |   if(type>1){
 4 |     abline(h=pi0,col=nullcol,lwd=2)
 5 |     
 6 |     hh.hist$density=rep(pi0,length(hh.hist$density))  
 7 |     #hh.hist$counts=rep(hh.q$pi0*length(hh.pval)/nc,length(hh.hist$counts)) 
 8 |     plot(hh.hist,add=TRUE,col=nullcol,freq=FALSE)
 9 |   }
10 |   if(type>2){
11 |     abline(v=0.1,lwd=2,col=2)
12 |   }
13 |   if(type>3){
14 |     text(0.05,1.2,labels="A",col=2,cex=1.2)  
15 |     text(0.05,0.4,labels="B",col=2,cex=1.2)  
16 |     text(0.6,3,labels=paste0("FDR = B/(A+B) =  ",round(pi0*0.1*length(hh.pval)/sum(hh.pval<0.1),2)),cex=textsize)
17 |   }
18 | }
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/R/set_plot_colors.R:
--------------------------------------------------------------------------------
1 | library("RColorBrewer")
2 | library("ggplot2")
3 | myColors <- brewer.pal(8,"Set1")
4 | names(myColors) <- c("truth","ash.hu","ash.n","ash.u","qvalue","locfdr","mixfdr.tnull","NPMLE")
5 | colScale <- scale_colour_manual(name = "method",values = myColors)
6 | 
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Introduction
  2 | 
  3 | This repo is intended to contain code to reproduce results
  4 | from the paper, *False Discovery Rates: A New Deal*.
  5 | 
  6 | The hardest part may be making sure R is set up with all the right
  7 | packages installed. I have used the R package `packrat` to try to
  8 | make this easier, but on some systems it may be easier to install all
  9 | the packages you need by hand. If things go wrong, look also at
 10 | "If things go wrong" below.
 11 | 
 12 | ### Step-by-step instructions
 13 | 
 14 | 1. Preliminaries: I made the paper with `R v3.2.3`, so you might start
 15 | by installing this version. Or more recent versions should (!) also
 16 | work. Install `pandoc v1.12.3` or higher. You will also need a working
 17 | `pdflatex` installation to make the paper. The `Rmosek` package
 18 | [`mosek`](https://www.mosek.com/resources/downloads) needs to be
 19 | installed, so do that now, too. Don't forget to follow instructions
 20 | regarding the license file. Possibly you will come across other
 21 | dependencies as you run the following steps.
 22 | 
 23 | 2. Clone (or download and unzip) this repository.
 24 | 
 25 | 3. Install the `R` packages you need. I have tried to use the
 26 | [`packrat`](https://rstudio.github.io/packrat/) package to automate
 27 | this process, with some measure of success. To do it this way, start up
 28 | `R` (e.g. from the command line) within the repository directory. The
 29 | first time you enter `R` the hidden `.Rprofile` file will cause `R` to
 30 | try to install all the packages you need to a local library in the
 31 | `packrat` subdirectory. (Specifically it should create a `packrat/lib`
 32 | directory with more files in a subdirectory whose name will depend on
 33 | your architecture.)
 34 | 
 35 | **To do:** Mention that the data generation scripts use the `dscr`
 36 | package; see [here](http://github.com/stephens999/dscr).
 37 | 
 38 | If this does not work first time - e.g. because you don't have some
 39 | dependencies installed - then install the dependencies and try again.
 40 | This time on entering `R` you will have to tell `packrat` to try again
 41 | yourself by typing `packrat::restore()`.  If this still does not work
 42 | for you, or you already have the packages you need installed then you
 43 | may prefer to remove the packrat subdirectory and install the packages
 44 | you need yourself. Quit `R`.
 45 | 
 46 | 4. Within the repository directory type `make clean`. This will remove
 47 | figure etc files that I have already included in the repository.
 48 | 
 49 | 5. Within the repository directory type `make`. This will try to:
 50 | 
 51 |       i) Run all the code for the simulation studies. It will take a
 52 | while (hours), so you might want to run it overnight. This should
 53 | create a bunch of output files in the `output` directory. Particularly
 54 | you will know that it worked iff you can find the files
 55 | `dsc-shrink-files/res.RData` and `dsc-robust-files/dsc_robust.RData`.
 56 | 
 57 |       ii) Build/render the .Rmd files in the `analysis` directory. If
 58 |       successful you should have a file `analysis/index.html` that you
 59 |       can open to see a list of all the rendered files.
 60 | 
 61 |       iii) `pdflatex` the paper.
 62 | 
 63 | If you have problems (more than likely!) you might like to try each of
 64 | these steps in turn, by sequentially typing `make output`, `make
 65 | analysis`, and `make paper`.
 66 | 
 67 | # If things go wrong
 68 | 
 69 | - If you have trouble installing Rmosek, maybe
 70 |   [this](http://r-forge.r-project.org/scm/viewvc.php/*checkout*/pkg/inst/doc/userguide.pdf?root=rmosek)
 71 |   will help.
 72 | 
 73 | - Ultimately you don't need Rmosek to make things run - if you don't
 74 | have Rmosek installed then the ashr package will use an EM algorithm
 75 | instead. The results from this method are very similar to those from
 76 | using the interior point method (but the interior point method is
 77 | faster and provides better convergence).
 78 | 
 79 | - If things go wrong in making the output files, try looking at the
 80 | `.Rout` files created in the appropriate output subdirectory
 81 | (`output/dsc-shrink/`, `output/dsc-znull` or `output/dsc-robust`) to
 82 | see what went wrong.
 83 | 
 84 | - If things go wrong in making the analysis files, try looking at the
 85 |   `.html` files produced to see what went wrong.
 86 | 
 87 | # Directory Structure
 88 | 
 89 | The directory structure here, and features of the `analysis`
 90 | subdirectory (including the `Makefile`), are based on
 91 | [https://github.com/jdblischak/singleCellSeq](https://github.com/jdblischak/singleCellSeq). Here's
 92 | a brief summary of the directory structure.
 93 | 
 94 | + analysis: Rmd files for investigations; will generate figures in
 95 | `figure/` subdirectory
 96 | 
 97 | + R: R scripts/functions used in analysis; no code that is actually
 98 | run put here
 99 | 
100 | + output: largish files of data, typically created by code
101 | (e.g. post-processed data, simulations)
102 | 
103 | + code: code used for preprocessing, generation of output files etc ;
104 | may be long-running
105 | 
106 | + data: datasets that generally will not change once deposited
107 | 
108 | + paper: the paper
109 | 
110 | + packrat: a directory that contains information about all the R package used.
111 | See the R package `packrat` for more details.
112 | 
113 | + talks: any presentations
114 | 


--------------------------------------------------------------------------------
/analysis/CompareEEvsETmodel.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Comparing Exchangeable Effects (EE) and Exchangeable Standardized Effects (ET) models"
  3 | output: html_document
  4 | ---
  5 | 
  6 | The ashr package implements a shrinkage-based Empirical Bayes method for
  7 | estimating the values of effects $\beta_j$ based on estimates of the effects
  8 | ($\hat{\beta}_j$) and their standard errors ($s_j$).
  9 | 
 10 | The default model is that the effects $\beta_j$ are identically distributed
 11 | from a unimodal distribution $g$. In particular, ashr assumes that $\beta_j$
 12 | is independent of the standard error $s_j$. That is,
 13 | 
 14 | $$\beta_j | s_j \sim g(\cdot) \quad (*).$$
 15 | 
 16 | Here we consider the more general model:
 17 | 
 18 | $$\beta_j/s_j^\alpha | s_j \sim g(\cdot)   \quad (**).$$
 19 | 
 20 | The case $\alpha = 0$ is the model (*) above. The case $\alpha = 1$ assumes
 21 | that the *t* statistics $\beta_j/s_j$ are identically distributed from
 22 | a unimodal distribution. Under this assumption the expected size of the
 23 | *unstandardized* effect $\beta_j$ depends on the standard error $s_j$; the
 24 | larger $s_j$ is, the larger (in absolute value) $\beta_j$ is expected to be.
 25 | 
 26 | By analogy with Wen and Stephens (AoAS), we refer to the model (*) as the
 27 | EE model ("exchangeable effects"), and the model (**) as the ET model
 28 | ("exchangeable T statistics model").
 29 | 
 30 | What might motivate the ET model? We can provide two distinct motivations.
 31 | First, suppose for concreteness that the effects $\beta_j$ reflect differences
 32 | in gene expression between two conditions. One factor that affects $s_j$ is
 33 | the variance of gene $j$ within each condition. One could imagine that, perhaps,
 34 | genes with a larger variance within each condition are less tightly regulated,
 35 | and therefore more likely to show a large difference between conditions
 36 | (*i.e.* large $\beta_j$) than genes with a small variance. This provides
 37 | a biological motivation for the possibility that larger $s_j$ might
 38 | correlate with larger $\beta_j$ (although of course not for the exact
 39 | functional form above).
 40 | 
 41 | A second motivation is more statistical: it turns out that this assumption is
 42 | in some sense the implicit assumption made by existing methods to fdr analysis
 43 | based on p values. More specifically, under this alternative assumption, when
 44 | attempting to identify "significant" effects, the empirical Bayes approach
 45 | will rank the genes in the same way as the usual $p$ values computed from
 46 | $\hat{\beta}_j/s_j$.
 47 | 
 48 | It is straightforward to use the ashr package to perform analysis under the
 49 | ET model: simply specify alpha = 1. (Internally, this causes ash to replace
 50 | betahat with the standardized betahat, $\hat{\beta}_j/s_j$, and the standard
 51 | errors for these standarized betahat values with 1; there there is some
 52 | bookkeeping to be done to make sure we return the right likelihoods and
 53 | posteriors for the original beta, and not for these standardized values...
 54 | ash takes care of this.) It is also straightforward to compare
 55 | the two competing modelling assumptions (EE vs ET) by computing the
 56 | log-likelihood ratio,
 57 | 
 58 | $$\log\{p_{EE}(\hat{\beta} | s, \hat{g}_{EE}) / p_{ET}(\hat{\beta} | s, \hat{g}_{ET})\}.$$
 59 | 
 60 | We now illustrate by a simulated example. We assume that the standard errors
 61 | come from a gamma distribution, and then generate the effects $\beta_j$ under
 62 | the ET model so that genes with bigger standard errors tend to have bigger
 63 | effects.
 64 | 
 65 | ```{r generate_data}
 66 | set.seed(1234)
 67 | nsamp      = 1000
 68 | betahat.se = rgamma(nsamp,1,1)
 69 | beta       = betahat.se * rnorm(nsamp)
 70 | betahat    = rnorm(nsamp,beta,betahat.se)
 71 | zscore     = betahat/betahat.se
 72 | pval       = pchisq(zscore^2,df=1,lower.tail=FALSE)
 73 | plot(betahat,-log(pval),pch = 20)
 74 | ```
 75 | 
 76 | Here is the EE analysis.
 77 | 
 78 | ```{r analysis_ee}
 79 | library(ashr)
 80 | ashEE.res = ash(betahat,betahat.se,method = "fdr",alpha = 0)
 81 | ```
 82 | 
 83 | And here is how we can perform the ET analysis.
 84 | 
 85 | ```{r analysis_et}
 86 | ashET.res = ash(betahat,betahat.se,method = "fdr",alpha = 1)
 87 | ```
 88 | 
 89 | Now we compare the EE vs ET models:
 90 | ```{r compare_analyses}
 91 | print(ashEE.res$loglik - ashET.res$loglik)
 92 | ```
 93 | 
 94 | Then the log likelihood ratio is loglikEE - loglikET =
 95 | `r ashEE.res$loglik - ashET.res$loglik`. This highly negative loglikelihood
 96 | indicates that the data strongly favor the ET model, which is expected
 97 | because the data were generated under this model. (One might be tempted to ask
 98 | whether the log likelihood ratio is "significant". We don't know how to address
 99 | this question, but suggest in practice it doesn't matter: if the loglikelihood
100 | ratio is positive then use EE, if it is negative then use ET.)
101 | 
102 | The above illustrates these ideas on simulations from the ET model. For
103 | comparison, we now provide results for simulations under the EE model.
104 | 
105 | ```{r generate_data_from_ee_model}
106 | set.seed(1234)
107 | samp       = 1000
108 | betahat.se = rgamma(nsamp,1,1)
109 | beta       = rnorm(nsamp)
110 | betahat    = rnorm(nsamp,beta,betahat.se)
111 | zscore     = betahat/betahat.se
112 | pval       = pchisq(zscore^2,df = 1,lower.tail = FALSE)
113 | ashEE.res  = ash(betahat,betahat.se,method = "fdr",alpha = 0)
114 | ashET.res  = ash(betahat,betahat.se,method = "fdr",alpha = 1)
115 | print(ashEE.res$loglik - ashET.res$loglik)
116 | ```
117 | 
118 | So here the log likelihood ratio is positive, indicating that the EE model
119 | is preferred (which is expected since the data were generated under that
120 | model).
121 | 


--------------------------------------------------------------------------------
/analysis/IPvsEM.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "A brief comparison of speed of Interior Point and EM algorithms"
  3 | author: "Matthew Stephens"
  4 | date: 2016-01-26
  5 | ---
  6 | 
  7 | **Last updated:** `r Sys.Date()`
  8 | 
  9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)`
 10 | 
 11 | First, we load the necessary libraries.
 12 | 
 13 | ```{r packages}
 14 | library(REBayes)
 15 | library(ashr)
 16 | library(ggplot2)
 17 | ```
 18 | 
 19 | ```{r chunk_options, include=FALSE}
 20 | # Specify settings for displaying the plots in the rendered document.
 21 | source("chunk-options.R")
 22 | ```
 23 | 
 24 | ## Set up simulation
 25 | 
 26 | The following simulation assumes $\beta \sim N(0,\sigma_{\sf betasd}^2)$ and
 27 | with $\hat{\beta}$ having standard deviation 1.
 28 | 
 29 | ```{r simulation_setup}
 30 | timed_sims = function(ash.args,nsim = 20,nmin = 100,nmax = 1e3,betasd = 1) {
 31 |   n = 10^seq(log10(nmin),log10(nmax),length = nsim)
 32 |   elapsed.time = rep(0,nsim)
 33 |   for (i in 1:nsim) {
 34 |     set.seed(i)
 35 |     with(ash.args,cat(sprintf("%6s %13s %3d\n",method,optmethod,i)))
 36 |     betahat = rnorm(n[i],sd = sqrt(1 + betasd^2))
 37 |     elapsed.time[i] =
 38 |       system.time(do.call(ash,args = modifyList(ash.args,
 39 |         list(betahat = betahat, sebetahat = 1))))[3]
 40 |   }
 41 |   return(data.frame(elapsed.time = elapsed.time,seed = 1:nsim,n = n))
 42 | }
 43 | ```
 44 | 
 45 | Now run a simulation study, for $n$ (number of tests; $p$ in paper) in
 46 | range 10 to 100,000. Note that the warnings are being generated by the EM
 47 | algorithm in big problems due to lack of convergence.
 48 | 
 49 | ```{r model_fitting, results='hide'}
 50 | df = data.frame()
 51 | cat("method --optmethod-- sim\n")
 52 | for(method in c("fdr","shrink")) {
 53 |   for(optmethod in c("mixIP","cxxMixSquarem")) {
 54 |     df = rbind(df,data.frame(method = method,optmethod = optmethod,
 55 |            timed_sims(list(method = method,optmethod = optmethod),
 56 |                            nsim = 50,nmin = 10,nmax = 1e5)))
 57 |   }
 58 | }
 59 | cat("\n")
 60 | ```
 61 | 
 62 | Summarize the model fitting results in a table.
 63 | 
 64 | ```{r model_fitting_summary}
 65 | cat("Average computation time (in seconds):\n")
 66 | print(as.table(by(df,df[c("method","optmethod")],
 67 |                   function (x) mean(x$elapsed.time))))
 68 | ```
 69 | 
 70 | ## Results
 71 | 
 72 | Now plot time as a function of $n$:
 73 | 
 74 | ```{r plot_results}
 75 | qplot(x = n,y = elapsed.time,data = df,col = optmethod,facets = .~method,
 76 |       ylab = "elapsed time (s)")
 77 | ```
 78 | 
 79 | Zoom-in on "small" problems with $n < 5000$.
 80 | 
 81 | ```{r plot_results_magnified}
 82 | qplot(x = n,y = elapsed.time, data = subset(df,n < 5000),
 83 |       col = optmethod,facets = .~method,ylab = "elapsed time (s)")
 84 | ```
 85 | 
 86 | ## Summary
 87 | 
 88 | The IP method clearly scales to large problem much better than EM. It is faster,
 89 | and also more reliable (sometimes reaches higher log-likelihood than EM,
 90 | never smaller); see [checkIP.html](checkIP.html).
 91 | 
 92 | However, for small problems (n < 5000) the EM is adequate for many practical
 93 | purposes, solving within a few seconds. This is particularly true for the
 94 | penalty term (method = fdr), which helps the EM converge, presumably because
 95 | it is helping identifiability, removing the large flat parts of the likelihood
 96 | objective function.
 97 | 
 98 | Indeed, for small problems with the penalty term (n < 2000) EM with squarem
 99 | acceleration is a little faster in these comparisons than IP.
100 | 
101 | ## Session information
102 | 
103 | ```{r info}
104 | sessionInfo()
105 | ```
106 | 


--------------------------------------------------------------------------------
/analysis/Makefile:
--------------------------------------------------------------------------------
 1 | HTML_FILES := $(patsubst %.Rmd, %.html ,$(wildcard *.Rmd)) \
 2 |               $(patsubst %.md, %.html ,$(wildcard *.md))
 3 | 
 4 | all: html
 5 | 
 6 | 
 7 | html: $(HTML_FILES)
 8 | 
 9 | %.html: %.Rmd
10 | 	export R_LIBS_USER=../packrat/lib/*/*; R --slave -e "set.seed(100);rmarkdown::render('$<')"
11 | 
12 | %.html: %.md
13 | 	export R_LIBS_USER=../packrat/lib/*/*; R --slave -e "set.seed(100);rmarkdown::render('$<')"
14 | 
15 | .PHONY: clean
16 | clean:
17 | 	$(RM) $(HTML_FILES)
18 | 	$(RM) -r figure/*
19 | 	$(RM) table/*.tex
20 | 


--------------------------------------------------------------------------------
/analysis/adaptive_shrinkage.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Illustration of Adaptive Shrinkage"
  3 | author: "Matthew Stephens"
  4 | date: 2016-09-29
  5 | ---
  6 | 
  7 | **Last updated:** `r Sys.Date()`
  8 | 
  9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)`
 10 | 
 11 | The goal here is to illustrate the "adaptive" nature of the adaptive
 12 | shrinkage. The shrinkage is adaptive in two senses. First, the amount
 13 | of shrinkage depends on the distribution $g$ of the true effects,
 14 | which is learned from the data: when $g$ is very peaked about zero
 15 | then ash learns this and deduces that signals should be more strongly
 16 | shrunk towards zero than when $g$ is less peaked about zero.  Second,
 17 | the amount of shrinkage of each observation depends on its standard
 18 | error: the smaller the standard error, the more informative the data,
 19 | and so the less shrinkage that occurs. From an Empirical Bayesian
 20 | perspective both of these points are entirely natural: the posterior
 21 | depends on both the prior and the likelihood; the prior, $g$, is
 22 | learned from the data, and the likelihood incorporates the standard
 23 | error of each observation.
 24 | 
 25 | First, we load the necessary libraries.
 26 | 
 27 | ```{r packages}
 28 | library(REBayes)
 29 | library(ashr)
 30 | library(ggplot2)
 31 | library(scales)
 32 | ```
 33 | 
 34 | ```{r chunk_options, include=FALSE}
 35 | # Specify settings for displaying the plots in the rendered document.
 36 | source("chunk-options.R")
 37 | ```
 38 | 
 39 | We simulate from two scenarios: in the first scenario, the effects are more
 40 | peaked about zero (**sim.spiky**); in the second scenario, the effects are
 41 | less peaked at zero (**sim.bignormal**). A summary of the two data sets is
 42 | printed at the end of this chunk.
 43 | 
 44 | ```{r initialize}
 45 | set.seed(100)
 46 | source("../code/dsc-shrink/datamakers/datamaker.R")
 47 | 
 48 | NSAMP = 1000
 49 | s     = 1/rgamma(NSAMP,5,5)
 50 | 
 51 | sim.spiky =
 52 |   rnormmix_datamaker(args = list(g = normalmix(c(0.4,0.2,0.2,0.2),
 53 |                                                c(0,0,0,0),
 54 |                                                c(0.25,0.5,1,2)),
 55 |                                   min_pi0   = 0,
 56 |                                   max_pi0   = 0,
 57 |                                   nsamp     = NSAMP,
 58 |                                   betahatsd = s))
 59 | 
 60 | sim.bignormal =
 61 |   rnormmix_datamaker(args = list(g         = normalmix(1,0,4),
 62 |                                  min_pi0   = 0,
 63 |                                  max_pi0   = 0,
 64 |                                  nsamp     = NSAMP,
 65 |                                  betahatsd = s))
 66 | 
 67 | cat("Summary of observed beta-hats:\n")
 68 | print(rbind(spiky     = quantile(sim.spiky$input$betahat,seq(0,1,0.1)),
 69 |             bignormal = quantile(sim.bignormal$input$betahat,seq(0,1,0.1))),
 70 |       digits = 3)
 71 | ```
 72 | 
 73 | Now we run ash on both data sets.
 74 | 
 75 | ```{r run_ash}
 76 | beta.spiky.ash     = ash(sim.spiky$input$betahat,s)
 77 | beta.bignormal.ash = ash(sim.bignormal$input$betahat,s)
 78 | ```
 79 | 
 80 | Next we plot the shrunken estimates against the observed values, colored
 81 | according to the (square root of) precision: precise estimates being colored
 82 | red, and less precise estimates being blue. Two key features of the plots
 83 | illustrate the ideas of adaptive shrinkage: i) the estimates under the spiky
 84 | scenario are shrunk more strongly, illustrating that shrinkage adapts to the
 85 | underlying distribution of beta; ii) in both cases, estimates with large
 86 | standard error (blue) are shrunk more than estimates with small standard
 87 | error (red) illustrating that shrinkage adapts to measurement precision.
 88 | 
 89 | ```{r plot_shrunk_vs_obs}
 90 | make_df_for_ashplot =
 91 |   function (sim1, sim2, ash1, ash2, name1 = "spiky", name2 = "big-normal") {
 92 |     n = length(sim1$input$betahat)
 93 |     x = c(get_lfsr(ash1),get_lfsr(ash2))
 94 |     return(data.frame(betahat  = c(sim1$input$betahat,sim2$input$betahat),
 95 |                       beta_est = c(get_pm(ash1),get_pm(ash2)),
 96 |                       lfsr     = x,
 97 |                       s        = c(sim1$input$sebetahat,sim2$input$sebetahat),
 98 |                       scenario = c(rep(name1,n),rep(name2,n)),
 99 |                       signif   = x < 0.05))
100 |   }
101 | 
102 | ashplot = function(df,xlab="Observed beta-hat",ylab="Shrunken beta estimate")
103 |   ggplot(df,aes(x = betahat,y = beta_est,color = 1/s)) +
104 |     xlab(xlab) + ylab(ylab) + geom_point() +
105 |     facet_grid(.~scenario) +
106 |     geom_abline(intercept = 0,slope = 1,linetype = "dotted") +
107 |     scale_colour_gradient2(midpoint = median(1/s),low = "blue",
108 |                            mid = "white",high = "red",space = "Lab") +
109 |     coord_fixed(ratio = 1)
110 | 
111 | df = make_df_for_ashplot(sim.spiky,sim.bignormal,beta.spiky.ash,
112 |                          beta.bignormal.ash)
113 | print(ashplot(df))
114 | ```
115 | 
116 | Now plot lfsr against z scores, colored according to the (square root of)
117 | precision.
118 | 
119 | ```{r plot_lfsr}
120 | z_lfsr_plot = function(df,ylab = "Observed Z score",xlab = "lfsr")
121 |   ggplot(df,aes(x = lfsr,y = betahat/s,color = 1/s)) +
122 |     xlab(xlab) + ylab(ylab) + geom_point() + facet_grid(.~scenario) +
123 |     scale_colour_gradient2(midpoint = median(1/s),low = "blue",
124 |                            mid = "white",high = "red",space = "Lab")
125 | 
126 | print(z_lfsr_plot(df))
127 | ```
128 | 
129 | A related consequence is that significance of each observation is no longer
130 | monotonic with $p$ value.
131 | 
132 | ```{r plot_pvalues}
133 | pval_plot = function (df)
134 |   ggplot(df,aes(x = pnorm(-abs(betahat/s)),y = lfsr,color = log(s))) +
135 |   geom_point() + facet_grid(.~scenario) + xlim(c(0,0.025)) +
136 |   xlab("p value") + ylab("lfsr") +
137 |   scale_colour_gradient2(midpoint = 0,low = "red",
138 |                          mid = "white",high = "blue")
139 | 
140 | print(pval_plot(df))
141 | ```
142 | 
143 | Let's see how these are affected by changing the modelling assumptions so that
144 | the *standardized* beta are exchangeable (rather than the beta being exchangeable).
145 | 
146 | ```{r run_ash_ET}
147 | beta.bignormal.ash.ET =
148 |   ash(sim.bignormal$input$betahat,s,alpha = 1,mixcompdist = "normal")
149 | beta.spiky.ash.ET =
150 |   ash(sim.spiky$input$betahat,s,alpha = 1,mixcompdist = "normal")
151 | df.ET = make_df_for_ashplot(sim.spiky,sim.bignormal,beta.spiky.ash.ET,
152 |                             beta.bignormal.ash.ET)
153 | ashplot(df.ET,ylab = "Shrunken beta estimate (ET model)")
154 | pval_plot(df.ET)
155 | ```
156 | 
157 | This is a "volcano plot" showing effect size against p value. The blue points
158 | are "significant" in that they have lfsr < 0.05.
159 | 
160 | ```{r volcano}
161 | print(ggplot(df,aes(x = betahat,y = -log10(2*pnorm(-abs(betahat/s))),
162 |                     col = signif)) +
163 |   geom_point(alpha = 1,size = 1.75) + facet_grid(.~scenario) +
164 |   theme(legend.position = "none") + xlim(c(-10,10)) + ylim(c(0,15)) +
165 |   xlab("Effect (beta)") + ylab("-log10 p-value"))
166 | ```
167 | 
168 | In this case the significance by lfsr is not quite the same as cutting off
169 | at a given p value (you can see that the decision boundary is not quite the
170 | same as drawing a horizontal line), but also not that different, presumably
171 | because the standard errors, although varying across observations, do not
172 | vary greatly.
173 | 
174 | ```{r sehist}
175 | hist(s,main = "histogram of standard errors")
176 | print(summary(s))
177 | ```
178 | 
179 | ## Session information.
180 | 
181 | ```{r info}
182 | print(sessionInfo())
183 | ```
184 | 


--------------------------------------------------------------------------------
/analysis/ash_npmle.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Using ash for NPMLE"
 3 | author: "Matthew Stephens"
 4 | date: 2016-05-31
 5 | ---
 6 | 
 7 | **Last updated:** `r Sys.Date()`
 8 | 
 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)`
10 | 
11 | First, we load the necessary libraries.
12 | 
13 | ```{r packages}
14 | library(REBayes)
15 | library(ashr)
16 | ```
17 | 
18 | ```{r chunk_options, include=FALSE}
19 | # Specify settings for displaying the plots in the rendered document.
20 | source("chunk-options.R")
21 | ```
22 | 
23 | ## The NPMLE
24 | 
25 | Here we show how we can use ash to (approximately) compute the unconstrained
26 | NPMLE - that is, estimate the underlying distribution $g$ by maximizing the
27 | likelihood without the unimodal constraint. See Koenker and Mizera, JASA 2014,
28 | for background.
29 | 
30 | Within ash one can approximate the npmle as a mixture of uniforms on a dense
31 | grid of non-overlapping values - this results in a piecewise constant density
32 | with changes in the density only at the grid points.
33 | 
34 | The following example comes from the REBayes vignette by Koenker and Gu. The
35 | underlying $g$ is a mixture of a point mass (weight 0.8) at 0 and a point
36 | mass (weight 0.2) at 2.
37 | ```{r fit_glmix_model}
38 | set.seed(102)
39 | y <- c(rep(0,800),rnorm(200,2)) + rnorm(1000)
40 | z <- GLmix(y)
41 | ```
42 | 
43 | Now we fit the NPMLE using 'ashr' and compare with the REBayes solution.
44 | ```{r fit_ash_model}
45 | grid = seq(from = min(z$x),to = max(z$x),length = 1000)
46 | k    = length(grid)
47 | y.ash.npmle =
48 |   ash(y,1,g = unimix(pi = rep(1/(k-1),(k-1)),a = grid[-k],b = grid[-1]),
49 |       method = "shrink")
50 | 
51 | # Plot the model fitted using GLmix against the fitted ash model.
52 | plot(z$x,cumsum(z$y)/sum(z$y),col = 2,main = "Estimated cdf",type = "l")
53 | lines(cdf.ash(y.ash.npmle,x = z$x),type = "l",lty = "dashed",lwd = 2)
54 | 
55 | # Compare the likelihood given the GLmix model and given ash model.
56 | print(z$logLik,digits = 12)
57 | print(y.ash.npmle$loglik,digits = 12)
58 | ```
59 | 
60 | ## Session information
61 | 
62 | ```{r info}
63 | sessionInfo()
64 | ```
65 | 


--------------------------------------------------------------------------------
/analysis/checkIP.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Check interior point method"
 3 | author: "Matthew Stephens"
 4 | date: 2015-11-18
 5 | ---
 6 | 
 7 | **Last updated:** `r Sys.Date()`
 8 | 
 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)`
10 | 
11 | Here we compare the likelihoods achieved by the newly implemented
12 | interior point method against the EM algorithm.
13 | 
14 | **Note:** the last code chunk ("compare_lfsr") may take several minutes to 
15 | render in R or RStudio because it uses ggplot to plot more than a million data 
16 | points. Also, it it may not be possible to run the last code chunk 
17 | interactively in RStudio.
18 | 
19 | First, we load the necessary libraries.
20 | 
21 | ```{r packages}
22 | library(ggplot2)
23 | library(dplyr)
24 | library(reshape2)
25 | ```
26 | 
27 | ```{r chunk_options, include=FALSE}
28 | # Specify settings for displaying the plots in the rendered document.
29 | source("chunk-options.R")
30 | ```
31 | 
32 | ## Log-likelihood comparison
33 | 
34 | The 'dsc-opt' script simply runs both methods (IP and EM) on the same
35 | data sets, and compares the difference in the log-likelihoods. We can
36 | see that in >90% of cases the difference is negligible. Then there is
37 | a tail of cases where the IP log-likelihood is higher - presumably
38 | where the EM doesn't really converge so well.
39 | 
40 | ```{r compare_likelihoods}
41 | load("../output/dsc-opt-files/dsc_opt.RData")
42 | plot(ecdf(dsc_opt$res$diff1),
43 |      main = "ECDF of log-likelihood difference (IP - EM)")
44 | ```
45 | 
46 | ## lfsr comparison
47 | 
48 | However, what we really care about is inference quantities, such as
49 | the lfsr, rather than the log-likelihood. Here we compare the lfsr for
50 | the simulations where we ran both the IP method and EM method (nocxx)
51 | in dsc-shrink.
52 | 
53 | ```{r compare_lfsr}
54 | load("../output/dsc-shrink-files/res.RData")
55 | 
56 | # Select out the lfsr estimates for each method.
57 | df1 = res$lfsr %>% filter(method %in% c("ash.hu.nocxx")) %>% 
58 |         select(-user.self,-sys.self,-elapsed,-user.child,-sys.child) %>%
59 |         melt(id.vars = c("method","scenario","seed",".id"),
60 |              value.name = "lfsr.nocxx")
61 | df2 = res$lfsr %>% filter(method %in% c("ash.hu")) %>%
62 |         select(-user.self,-sys.self,-elapsed,-user.child,-sys.child) %>%
63 |         melt(id.vars = c("method","scenario","seed",".id"),
64 |              value.name = "lfsr.IP")
65 | df = inner_join(df1,df2,by = c("scenario","seed","variable"))
66 | df = transform(df,scenario = factor(scenario))
67 | print(ggplot(df,aes(lfsr.nocxx,lfsr.IP)) + 
68 |       geom_point(shape = 1) +
69 |       facet_wrap(~ scenario,nrow = 2) +
70 |       geom_abline(colour = "black") +
71 |       xlab("EM algorithm (no cxx)") +
72 |       ylab("IP method"))
73 | ```
74 | 
75 | ## Session information
76 | 
77 | ```{r info}
78 | sessionInfo()
79 | ```
80 | 


--------------------------------------------------------------------------------
/analysis/check_mixfdr_lownoise.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Check mixfdr in low noise simulation"
 3 | author: "Matthew Stephens"
 4 | date: 2015-10-26
 5 | ---
 6 | 
 7 | **Last updated:** `r Sys.Date()`
 8 | 
 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)`
10 | 
11 | Here we check that mixfdr actually does OK at fitting the bimodal distribution
12 | when noise is small compared with bimodality.
13 | 
14 | ```{r chunk_options, include=FALSE}
15 | # Specify settings for displaying the plots in the rendered document.
16 | source("chunk-options.R")
17 | ```
18 | 
19 | Simulate bimodal data with well-separated peaks. The histogram shows the
20 | multi-modal nature of the underlying g is clear.
21 | 
22 | ```{r sim_data}
23 | source("../code/dsc-shrink/datamakers/datamaker.R")
24 | sim.bimodal =
25 |   rnormmix_datamaker(args = list(g = normalmix(c(0.5,0.5),c(-5,5),c(1,1)),
26 |                                  min_pi0   = 0.4,
27 |                                  max_pi0   = 0.4,
28 |                                  nsamp     = 1000,
29 |                                  betahatsd = 1))
30 | hist(sim.bimodal$input$betahat,nclass = 20,xlim = c(-10,10),prob = TRUE,
31 |      main="histogram of simulated betahat values")
32 | ```
33 | 
34 | Now run mixfdr on the simulated data. Note that the fit captures the
35 | multi-modal nature of g, although mixfdr continues to overestimate pi0.
36 | 
37 | ```{r fit_models}
38 | source("../code/dsc-shrink/methods/mixfdr.wrapper.R")
39 | sim.bimodal.mixfdroutput =
40 |   mixfdr.wrapper(sim.bimodal$input,args = list(theonull = TRUE))
41 | g = mixfdr2fitted.g(sim.bimodal.mixfdroutput)$fitted.g
42 | x = seq(-10,10,length = 100)
43 | par(cex.main = 1,font.main = 1)
44 | plot(ecdf(sim.bimodal$meta$beta),cex = 0.6,
45 |      main = "CDF of true beta: empirical (black) and fitted (red)")
46 | lines(x,mixcdf(g,x),lwd = 2,lty = "dotted",col = "orangered")
47 | ```
48 | 
49 | ## Session information
50 | 
51 | ```{r info}
52 | sessionInfo()
53 | ```
54 | 


--------------------------------------------------------------------------------
/analysis/chunk-options.R:
--------------------------------------------------------------------------------
 1 | # For more information on available chunk options, see
 2 | # http://yihui.name/knitr/options#chunk_options
 3 | 
 4 | library("knitr")
 5 | opts_chunk$set(tidy = FALSE,
 6 |                comment = NA,
 7 |                fig.align = "center",
 8 |                fig.path = paste0("figure/", current_input(), "/"))
 9 | 
10 | knit_hooks$set(crop = hook_pdfcrop)
11 | 


--------------------------------------------------------------------------------
/analysis/efron.fcr.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Post-selection CI example, assymetric, not unimodal at 0"
  3 | author: "Matthew Stephens"
  4 | date: 2016-05-09
  5 | ---
  6 | 
  7 | **Last updated:** `r Sys.Date()`
  8 | 
  9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)`
 10 | 
 11 | First, we load the necessary libraries.
 12 | 
 13 | ```{r packages}
 14 | library(ashr)
 15 | library(qvalue)
 16 | library(locfdr)
 17 | ```
 18 | 
 19 | ```{r chunk_options, include=FALSE}
 20 | # Specify settings for displaying the plots in the rendered document.
 21 | source("chunk-options.R")
 22 | ```
 23 | 
 24 | ## Introduction
 25 | 
 26 | This example comes from Efron (2008) p16 when examining the false coverage rate
 27 | (FCR). I selected this example because the distribution of the non-zero effects
 28 | is highly assymetric and not at all unimodal at zero, both issues a referee
 29 | asked me to elaborate on. Specifically, the distribution of the non-zero
 30 | effects is N(-3,1). Here I simulate data, and apply ash (with the "halfuniform"
 31 | option to allow for asymmetric g).
 32 | 
 33 | ```{r fit_halfuniform_model}
 34 | set.seed(10)
 35 | nsamp   = 1e4
 36 | altmean = -3
 37 | mu0     = rep(0,nsamp)
 38 | mu1     = rnorm(nsamp,altmean,1)
 39 | comp    = rbinom(nsamp,1,0.1)
 40 | mu      = ifelse(comp == 0,mu0,mu1)
 41 | z       = rnorm(nsamp,mu,1)
 42 | 
 43 | # Fit the model.
 44 | res.ash = ash(z,1,mixcompdist = "halfuniform")
 45 | ```
 46 | 
 47 | We can also run ash with the "true" g, to allow us to compare the lfsr, lfdr,
 48 | etc.
 49 | 
 50 | ```{r fit_normalmix_model}
 51 | true.g       = normalmix(c(0.9,0.1),c(0,-3),c(0,1))
 52 | res.ash.true = ash(z,1,g = true.g,fixg = TRUE)
 53 | ```
 54 | 
 55 | Here we can see how the partition of $z$ scores compares with the truth. Note
 56 | the effect of the unimodal assumption is to extend the inferred alternative
 57 | distribution toward 0. Here, `nullalthist` plots a histogram of z scores,
 58 | highlighting the alternative distribution of z scores that is implied by
 59 | the localfdr values lfdr.
 60 | 
 61 | ```{r plot_zscore_hist}
 62 | source("../R/nullalthist.R")
 63 | par(mfcol = c(2,1),cex.main = 1,font.main = 1)
 64 | nullalthist(z,lfdr = get_lfdr(res.ash.true),main = "true partition (res.ash.true)")
 65 | nullalthist(z,lfdr = get_lfdr(res.ash),main = "inferred partition (res.ash)")
 66 | ```
 67 | 
 68 | Comparing the inferred posterior means, lfdr, and lfsr with the true values of
 69 | these quantities, we find reassuringly good correspondence.
 70 | 
 71 | ```{r scatterplots}
 72 | par(mfrow = c(1,3))
 73 | plot(get_pm(res.ash.true),get_pm(res.ash),xlab = "Truth",ylab = "ash.hu",
 74 |      main = "Posterior Mean (inferred vs truth)",pch = 20,cex = 0.6,
 75 |      xlim = c(-6,1),ylim = c(-6,1))
 76 | abline(a = 0,b = 1,col = "orangered",lwd = 2,lty = "dotted")
 77 | 
 78 | plot(get_lfdr(res.ash.true),get_lfdr(res.ash),xlab = "Truth", ylab = "ash.hu",
 79 |      main = "lfdr (inferred vs truth)",pch = 20,cex = 0.6,
 80 |      xlim = c(0,1),ylim = c(0,1))
 81 | abline(a = 0,b = 1,col = "orangered",lwd = 2,lty = "dotted")
 82 | 
 83 | plot(get_lfsr(res.ash.true),get_lfsr(res.ash),xlab = "Truth", ylab = "ash.hu",
 84 |      main = "lfsr (inferred vs truth)",pch = 20,cex = 0.6,
 85 |      xlim = c(0,1),ylim = c(0,1))
 86 | abline(a = 0,b = 1,col = "orangered",lwd = 2,lty = "dotted")
 87 | ```
 88 | 
 89 | ## Comparison with qvalue and locfdr
 90 | 
 91 | We can also run qvalue and locfdr. We see that locfdr perhaps performs a bit
 92 | better than ash for the decomposition here, but the estimated local fdrs are
 93 | pretty similar. Here qvalue does less well because of the asymmetry which it
 94 | didn't take account of.
 95 | 
 96 | ```{r comparison_to_other_methods}
 97 | res.locfdr = locfdr(z,nulltype = 0)
 98 | res.qvalue = qvalue(p = pchisq(z^2,df = 1,lower.tail = FALSE))
 99 | 
100 | par(mfrow = c(1,3))
101 | plot(get_lfdr(res.ash.true),get_lfdr(res.ash),pch = 20,cex = 0.6,
102 |      xlab = "Truth (lfdr)",ylab = "ash.hu",main = "ash.hu",
103 |      xlim = c(0,1),ylim = c(0,1))
104 | abline(a = 0,b = 1,col = "orangered",lty = "dotted",lwd = 2)
105 | 
106 | plot(get_lfdr(res.ash.true),res.locfdr$fdr,pch = 20,cex = 0.6,
107 |      xlab = "Truth (lfdr)",ylab = "Estimate",main = "locfdr",
108 |      xlim = c(0,1),ylim = c(0,1))
109 | abline(a = 0,b = 1,lwd = 2,col = "orangered",lty = "dotted")
110 | 
111 | plot(get_lfdr(res.ash.true),res.qvalue$lfdr,pch = 20,cex = 0.6,
112 |      xlab = "Truth (lfdr)",ylab = "Estimate",main = "qvalue",
113 |      xlim = c(0,1),ylim = c(0,1))
114 | abline(a = 0,b = 1,lwd = 2,col = "orangered",lty = "dotted")
115 | ```
116 | 
117 | ## Uniform tail curtails Credible Intervals
118 | 
119 | The following plot compares the (symmetric-tail) 95% CIs from ash (red) for
120 | the "significant" observations with Bayes rule (green), similar to Figure 8
121 | from Efron. Note that the lower 97.5% point is pretty accurate, but the
122 | upper 97.5% point is curtailed - presumably due, at least in part, to the
123 | short tails of the uniform mixture.
124 | 
125 | ```{r plot_credints}
126 | CImatrix = ashci(res.ash,level = 0.95)
127 | BayesComparePlot = function(CImatrix, altmean = -3,...) {
128 |   plot(z,mu,xlim = c(-8,0),pch = 20,cex = 0.6,...)
129 |   points(z,CImatrix[,1],col = "orangered",cex = 0.6,pch = 20)
130 |   points(z,CImatrix[,2],col = "orangered",cex = 0.6,pch = 20)
131 | 
132 |   fdr   = 0.9*dnorm(z)/(0.9*dnorm(z) + 0.1*dnorm(z,altmean,sqrt(2)))
133 |   o     = order(z)
134 |   upper = ifelse(fdr[o] < 0.025,
135 |                  (z[o] + altmean)/2 + qnorm(0.975 + fdr[o])/sqrt(2),0)
136 |   lines(z[o],upper,col = "limegreen",lwd = 2)
137 |   lines(z[o],(z[o] + altmean)/2 - qnorm(0.975)/sqrt(2),col = "limegreen",lwd = 2)
138 |   abline(v = max(z[fdr < 0.05]))
139 | }
140 | par(mfrow = c(1,1))
141 | BayesComparePlot(CImatrix,
142 |   main = "CIs for highly asymmetric and non-unimodal-at-zero data")
143 | ```
144 | 
145 | ## Variational version
146 | 
147 | Although not a focus of the paper, ash does have an option to do variational
148 | inference for the mixture components (with a Dirichlet prior). In practice
149 | this approach usually ends up spreading the posterior mass up more among the
150 | mixture components. It seemed plausible that this might lead to slightly
151 | less extreme tail behaviour than above (because the model will put a little
152 | more weight on the uniforms with larger variance, which are essentially set
153 | to zero in the above).
154 | 
155 | ```{r vb_model_fitting}
156 | res.ash.VB  = ash(z,1,mixcompdist = "halfuniform",optmethod = "mixVBEM")
157 | CImatrix.VB = ashci(res.ash.VB,level = 0.95)
158 | ```
159 | 
160 | Again, we can compare results with Bayes rule:
161 | 
162 | ```{r plot_vb_credints}
163 | BayesComparePlot(CImatrix.VB,
164 |   main = paste("CIs for highly asymmetric and non-unimodal-at-zero data\n",
165 |               "Variational Version"))
166 | ```
167 | 
168 | ## Session information
169 | 
170 | ```{r info}
171 | sessionInfo()
172 | ```
173 | 


--------------------------------------------------------------------------------
/analysis/estimate_mode.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Estimate Non-zero mode"
 3 | author: "Matthew Stephens"
 4 | date: 2016-02-01
 5 | ---
 6 | 
 7 | **Last updated:** `r Sys.Date()`
 8 | 
 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)`
10 | 
11 | The purpose of this analysis is to check the performance of the non-zero mode
12 | option.
13 | 
14 | First, we load the necessary libraries.
15 | 
16 | ```{r packages}
17 | library(ashr)
18 | ```
19 | 
20 | ```{r chunk_options, include=FALSE}
21 | # Specify settings for displaying the plots in the rendered document.
22 | source("chunk-options.R")
23 | ```
24 | 
25 | ## Simple simulation
26 | 
27 | I conjecture that the mean of the posterior means should be close to the
28 | optimum for the mode... maybe even equal to it. (That is, this would constitute
29 | a fixed point for the update. We aren't explicitly using that directly in the
30 | current implementation; the uniform mixture uses `optim` to do it numerically;
31 | the normal mixture uses a true EM I think...)
32 | 
33 | ```{r evaluate_mode_estimates}
34 | check_mode = function(betahat, sebetahat, mixcompdist) {
35 |   z.ash = ash(betahat,sebetahat,mixcompdist = mixcompdist,mode = "estimate")
36 |   average.posteriormean = mean(get_pm(z.ash))
37 |   fitted.mode           = comp_mean(get_fitted_g(z.ash))[1]
38 | 
39 |   # Refit to get g.
40 |   z.ash1 = ash(betahat - fitted.mode,sebetahat,mixcompdist = mixcompdist)
41 |   g      = get_fitted_g(z.ash1)
42 |   loglik = get_loglik(z.ash1)
43 | 
44 |   loglik.down          = ash(z - fitted.mode - 0.01,1,g = g)$loglik
45 |   loglik.up            = ash(z - fitted.mode + 0.01,1,g = g)$loglik
46 |   loglik.posteriormean = ash(z - average.posteriormean,1,g = g)$loglik
47 | 
48 |   return(list(fitted.mode = fitted.mode,
49 |               average.posteriormean = average.posteriormean,
50 |               loglik = c(loglik,loglik.down,loglik.up,loglik.posteriormean)))
51 | }
52 | 
53 | set.seed(100)
54 | z = rnorm(1000) + 3
55 | print(check_mode(z,1,mixcompdist = "uniform"))
56 | print(check_mode(z,1,mixcompdist = "normal"))
57 | print(check_mode(z,1,mixcompdist = "halfuniform"))
58 | ```
59 | 
60 | An additional experiment:
61 | 
62 | ```{r}
63 | set.seed(100)
64 | beta     = rexp(1000)
65 | betahat  = beta + rnorm(1000,0,0.1)
66 | z.ash.hu = ash(betahat,0.1,mixcompdist="halfuniform",outputlevel=4,method="shrink")
67 | z.ash.pu = ash(betahat,0.1,mixcompdist="+uniform",outputlevel=4,method="shrink")
68 | 
69 | z.ash.hu2    = ash(betahat - 0.2,0.1,mixcompdist = "halfuniform",outputlevel = 4,
70 |                    method = "shrink")
71 | z.ash.pu2    = ash(betahat - 0.1,0.1,mixcompdist = "+uniform",outputlevel = 4,
72 |                    method = "shrink")
73 | z.ash.hu.nzm = ash(betahat,0.1,mixcompdist = "halfuniform",mode = "estimate",
74 |                    method = "shrink")
75 | print(z.ash.hu$loglik,digits = 8)
76 | print(z.ash.hu.nzm$loglik,digits = 8)
77 | ```
78 | 
79 | *Note to self:* check that the normal version works too; check
80 | nonzeromodeEMobj as it doesn't seem needed.
81 | 
82 | ## Session information
83 | 
84 | ```{r info}
85 | sessionInfo()
86 | ```
87 | 


--------------------------------------------------------------------------------
/analysis/figure/IPvsEM.Rmd/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/IPvsEM.Rmd/unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/analysis/figure/IPvsEM.Rmd/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/IPvsEM.Rmd/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/analysis/figure/IPvsEM.Rmd/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/IPvsEM.Rmd/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/analysis/figure/adaptive_shrinkage.Rmd/plotting lfsr-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/adaptive_shrinkage.Rmd/plotting lfsr-1.png


--------------------------------------------------------------------------------
/analysis/figure/adaptive_shrinkage.Rmd/plotting-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/adaptive_shrinkage.Rmd/plotting-1.png


--------------------------------------------------------------------------------
/analysis/figure/adaptive_shrinkage.Rmd/run-ash-ET-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/adaptive_shrinkage.Rmd/run-ash-ET-1.png


--------------------------------------------------------------------------------
/analysis/figure/adaptive_shrinkage.Rmd/run-ash-ET-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/adaptive_shrinkage.Rmd/run-ash-ET-2.png


--------------------------------------------------------------------------------
/analysis/figure/adaptive_shrinkage.Rmd/sehist-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/adaptive_shrinkage.Rmd/sehist-1.png


--------------------------------------------------------------------------------
/analysis/figure/adaptive_shrinkage.Rmd/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/adaptive_shrinkage.Rmd/unnamed-chunk-1-1.png


--------------------------------------------------------------------------------
/analysis/figure/adaptive_shrinkage.Rmd/volcano-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/adaptive_shrinkage.Rmd/volcano-1.png


--------------------------------------------------------------------------------
/analysis/figure/ash_npmle.Rmd/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/ash_npmle.Rmd/unnamed-chunk-1-1.png


--------------------------------------------------------------------------------
/analysis/figure/ash_npmle.Rmd/unnamed-chunk-1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/ash_npmle.Rmd/unnamed-chunk-1-2.png


--------------------------------------------------------------------------------
/analysis/figure/ash_npmle.Rmd/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/ash_npmle.Rmd/unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/analysis/figure/checkIP.Rmd/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/checkIP.Rmd/unnamed-chunk-1-1.png


--------------------------------------------------------------------------------
/analysis/figure/checkIP.Rmd/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/checkIP.Rmd/unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/analysis/figure/check_mixfdr_lownoise.Rmd/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/check_mixfdr_lownoise.Rmd/unnamed-chunk-1-1.png


--------------------------------------------------------------------------------
/analysis/figure/check_mixfdr_lownoise.Rmd/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/check_mixfdr_lownoise.Rmd/unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/analysis/figure/efron.fcr.Rmd/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/efron.fcr.Rmd/unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/analysis/figure/efron.fcr.Rmd/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/efron.fcr.Rmd/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/analysis/figure/efron.fcr.Rmd/unnamed-chunk-3-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/efron.fcr.Rmd/unnamed-chunk-3-2.png


--------------------------------------------------------------------------------
/analysis/figure/efron.fcr.Rmd/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/efron.fcr.Rmd/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/analysis/figure/efron.fcr.Rmd/unnamed-chunk-4-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/efron.fcr.Rmd/unnamed-chunk-4-2.png


--------------------------------------------------------------------------------
/analysis/figure/efron.fcr.Rmd/unnamed-chunk-4-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/efron.fcr.Rmd/unnamed-chunk-4-3.png


--------------------------------------------------------------------------------
/analysis/figure/efron.fcr.Rmd/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/efron.fcr.Rmd/unnamed-chunk-5-1.png


--------------------------------------------------------------------------------
/analysis/figure/efron.fcr.Rmd/unnamed-chunk-5-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/efron.fcr.Rmd/unnamed-chunk-5-2.png


--------------------------------------------------------------------------------
/analysis/figure/efron.fcr.Rmd/unnamed-chunk-5-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/efron.fcr.Rmd/unnamed-chunk-5-3.png


--------------------------------------------------------------------------------
/analysis/figure/efron.fcr.Rmd/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/efron.fcr.Rmd/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/analysis/figure/efron.fcr.Rmd/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/efron.fcr.Rmd/unnamed-chunk-7-1.png


--------------------------------------------------------------------------------
/analysis/figure/efron.fcr.Rmd/unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/efron.fcr.Rmd/unnamed-chunk-8-1.png


--------------------------------------------------------------------------------
/analysis/figure/investigate_hu_badcoverage.Rmd/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/investigate_hu_badcoverage.Rmd/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/analysis/figure/investigate_hu_badcoverage.Rmd/unnamed-chunk-4-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/investigate_hu_badcoverage.Rmd/unnamed-chunk-4-2.png


--------------------------------------------------------------------------------
/analysis/figure/investigate_hu_badcoverage.Rmd/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/investigate_hu_badcoverage.Rmd/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/analysis/figure/investigate_hu_badcoverage.Rmd/unnamed-chunk-6-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/investigate_hu_badcoverage.Rmd/unnamed-chunk-6-2.png


--------------------------------------------------------------------------------
/analysis/figure/make_GOODPOOR_figs.Rmd/GOODPOOReg_hist-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/make_GOODPOOR_figs.Rmd/GOODPOOReg_hist-1.pdf


--------------------------------------------------------------------------------
/analysis/figure/make_GOODPOOR_figs.Rmd/GOODPOOReg_scatter-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/make_GOODPOOR_figs.Rmd/GOODPOOReg_scatter-1.pdf


--------------------------------------------------------------------------------
/analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR-1.pdf


--------------------------------------------------------------------------------
/analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR-2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR-2.pdf


--------------------------------------------------------------------------------
/analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR_single-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR_single-1.pdf


--------------------------------------------------------------------------------
/analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR_single-2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR_single-2.pdf


--------------------------------------------------------------------------------
/analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR_single-3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR_single-3.pdf


--------------------------------------------------------------------------------
/analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR_single-4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR_single-4.pdf


--------------------------------------------------------------------------------
/analysis/figure/make_GOODPOOR_figs.Rmd/roc-curve-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/make_GOODPOOR_figs.Rmd/roc-curve-1.pdf


--------------------------------------------------------------------------------
/analysis/figure/make_GOODPOOR_figs.Rmd/tp_vs_fp-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/make_GOODPOOR_figs.Rmd/tp_vs_fp-1.pdf


--------------------------------------------------------------------------------
/analysis/figure/make_GOODPOOR_figs.Rmd/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/make_GOODPOOR_figs.Rmd/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/analysis/figure/makefig_FDReg.Rmd/decomp_ZA-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/makefig_FDReg.Rmd/decomp_ZA-1.pdf


--------------------------------------------------------------------------------
/analysis/figure/makefig_FDReg.Rmd/decomp_ZA_poster-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/makefig_FDReg.Rmd/decomp_ZA_poster-1.pdf


--------------------------------------------------------------------------------
/analysis/figure/makefig_FDReg.Rmd/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/makefig_FDReg.Rmd/unnamed-chunk-1-1.png


--------------------------------------------------------------------------------
/analysis/figure/makefig_FDReg.Rmd/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/makefig_FDReg.Rmd/unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/analysis/figure/plot_cdf_eg.Rmd/egcdf-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_cdf_eg.Rmd/egcdf-1.pdf


--------------------------------------------------------------------------------
/analysis/figure/plot_cdf_eg.Rmd/egcdf-reduce-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_cdf_eg.Rmd/egcdf-reduce-1.pdf


--------------------------------------------------------------------------------
/analysis/figure/plot_cdf_eg.Rmd/egcdf-reduce-with-npmle-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_cdf_eg.Rmd/egcdf-reduce-with-npmle-1.pdf


--------------------------------------------------------------------------------
/analysis/figure/plot_cdf_eg.Rmd/egcdf-reduce-with-npmle-2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_cdf_eg.Rmd/egcdf-reduce-with-npmle-2.pdf


--------------------------------------------------------------------------------
/analysis/figure/plot_cdf_eg.Rmd/mean_cdf-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_cdf_eg.Rmd/mean_cdf-1.pdf


--------------------------------------------------------------------------------
/analysis/figure/plot_cdf_eg.Rmd/mean_cdf-reduce-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_cdf_eg.Rmd/mean_cdf-reduce-1.pdf


--------------------------------------------------------------------------------
/analysis/figure/plot_cdf_eg.Rmd/mean_cdf_nopen-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_cdf_eg.Rmd/mean_cdf_nopen-1.pdf


--------------------------------------------------------------------------------
/analysis/figure/plot_cdf_eg.Rmd/mean_cdf_nopen-reduce-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_cdf_eg.Rmd/mean_cdf_nopen-reduce-1.pdf


--------------------------------------------------------------------------------
/analysis/figure/plot_egdens.Rmd/scenario_density-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_egdens.Rmd/scenario_density-1.pdf


--------------------------------------------------------------------------------
/analysis/figure/plot_lfsr.Rmd/plot_lfdr-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_lfsr.Rmd/plot_lfdr-1.pdf


--------------------------------------------------------------------------------
/analysis/figure/plot_lfsr.Rmd/plot_lfdr-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_lfsr.Rmd/plot_lfdr-1.png


--------------------------------------------------------------------------------
/analysis/figure/plot_lfsr.Rmd/plot_lfsr-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_lfsr.Rmd/plot_lfsr-1.pdf


--------------------------------------------------------------------------------
/analysis/figure/plot_lfsr.Rmd/plot_lfsr-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_lfsr.Rmd/plot_lfsr-1.png


--------------------------------------------------------------------------------
/analysis/figure/plot_lfsr.Rmd/plot_lfsr_s-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_lfsr.Rmd/plot_lfsr_s-1.pdf


--------------------------------------------------------------------------------
/analysis/figure/plot_lfsr.Rmd/plot_lfsr_s-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_lfsr.Rmd/plot_lfsr_s-1.png


--------------------------------------------------------------------------------
/analysis/figure/plot_lfsr.Rmd/plot_lfsr_s_nn-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_lfsr.Rmd/plot_lfsr_s_nn-1.pdf


--------------------------------------------------------------------------------
/analysis/figure/plot_lfsr.Rmd/plot_lfsr_s_nn-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_lfsr.Rmd/plot_lfsr_s_nn-1.png


--------------------------------------------------------------------------------
/analysis/figure/plot_pi0est.Rmd/plot_pi0est-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_pi0est.Rmd/plot_pi0est-1.pdf


--------------------------------------------------------------------------------
/analysis/figure/referee.response.Rmd/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/referee.response.Rmd/unnamed-chunk-1-1.png


--------------------------------------------------------------------------------
/analysis/figure/referee.response.Rmd/unnamed-chunk-1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/referee.response.Rmd/unnamed-chunk-1-2.png


--------------------------------------------------------------------------------
/analysis/figure/referee_uaza.Rmd/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/referee_uaza.Rmd/unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/analysis/figure/referee_uaza.Rmd/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/referee_uaza.Rmd/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/analysis/figure/referee_uaza.Rmd/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/referee_uaza.Rmd/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/analysis/figure/referee_uaza.Rmd/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/referee_uaza.Rmd/unnamed-chunk-5-1.png


--------------------------------------------------------------------------------
/analysis/figure/summarize_dsc_znull.Rmd/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/summarize_dsc_znull.Rmd/unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/analysis/figure/summarize_dsc_znull.Rmd/unnamed-chunk-2-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/summarize_dsc_znull.Rmd/unnamed-chunk-2-2.png


--------------------------------------------------------------------------------
/analysis/figure/summarize_dsc_znull.Rmd/unnamed-chunk-2-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/summarize_dsc_znull.Rmd/unnamed-chunk-2-3.png


--------------------------------------------------------------------------------
/analysis/index.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Home"
 3 | ---
 4 | 
 5 | **Last updated:** `r Sys.Date()`
 6 | 
 7 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)`
 8 | 
 9 | 
10 | ## Simple illustrations
11 | 
12 | *  [Figures illustrating FDR](makefig_FDReg.html)
13 | *  [Illustatrate dilution effect of low-precision observations](make_GOODPOOR_figs.html)
14 | *  [Adaptive Shrinkage](adaptive_shrinkage.html)
15 | *  [Adaptive Shrinkage: shrunken interval estimates](metaplot_examples.html)
16 | 
17 | *  [Comparing models: EE vs ET models](CompareEEvsETmodel.html)
18 | *  [Checking how qvalue behaves in simulation suggested by referee](referee_uaza.html)
19 | *  [Comparison of methods, and example of Credible Interval behaviour when UA does not hold](efron.fcr.html)
20 | *  [Using ash to fit the NPMLE; comparison with REBayes](ash_npmle.html)
21 | 
22 | ## DSC Shrink (the main simulation study in the paper)
23 | 
24 | *  [Plot Densities of simulations](plot_egdens.html)
25 | *  [Plot estimates of pi0](plot_pi0est.html)
26 | *  [Plot local false sign rates and lfdr vs truth](plot_lfsr.html)
27 | *  [Plot CDFs](plot_cdf_eg.html)
28 | 
29 | 
30 | *  [Investigate coverage of hu method without penalty](investigate_hu_badcoverage.html)
31 | *  [Run mixfdr on low noise bimodal situation](check_mixfdr_lownoise.html)
32 | *  [Make coverage tables](summarize_coverage.html)
33 | 
34 | 
35 | ## Optimization method: EM vs Interior Point
36 | *  [Compare loglik achieved by EM vs Interior Point](checkIP.html)
37 | *  [Compare speed of EM vs Interior Point](IPvsEM.html)
38 | 
39 | ## DSC Znull (assess behaviour of ash LR under null)
40 | 
41 | * [Summarize results of null simulations](summarize_dsc_znull.html)
42 | 
43 | ## Check how estimating the mode performs
44 | 
45 | * [Basic check of non-zero model option](estimate_mode.html)
46 | 


--------------------------------------------------------------------------------
/analysis/investigate_hu_badcoverage.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Investigate poor coverage for ash.hu"
  3 | author: "Matthew Stephens"
  4 | date: 2015-10-26
  5 | ---
  6 | 
  7 | **Last updated:** `r Sys.Date()`
  8 | 
  9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)`
 10 | 
 11 | First, we load the necessary libraries.
 12 | 
 13 | ```{r packages}
 14 | library(ashr)
 15 | library(reshape2)
 16 | library(ggplot2)
 17 | library(dplyr)
 18 | library(dscr)
 19 | ```
 20 | 
 21 | ```{r chunk_options, include=FALSE}
 22 | # Specify settings for displaying the plots in the rendered document.
 23 | source("chunk-options.R")
 24 | ```
 25 | 
 26 | Compile the tables which we will use to examine the results of the simulation
 27 | experiments.
 28 | 
 29 | ```{r compile_tables}
 30 | load("../output/dsc-shrink-files/res.RData")
 31 | 
 32 | neglong = 
 33 |   res$negprob %>% 
 34 |     select(-user.self,-sys.self,-elapsed,-user.child,-sys.child) %>%
 35 |     melt(id.vars = c("method","scenario","seed",".id"),value.name = "negprob") %>%
 36 |     filter(negprob > 0.95)
 37 |  
 38 | poslong = 
 39 |   res$posprob %>% 
 40 |     select(-user.self,-sys.self,-elapsed,-user.child,-sys.child) %>%
 41 |     melt(id.vars = c("method","scenario","seed",".id"),value.name = "posprob") %>%
 42 |     filter(posprob > 0.95)
 43 | 
 44 | reslong = 
 45 |   res$cdf_score %>% 
 46 |     select(-user.self,-sys.self,-elapsed,-user.child,-sys.child) %>%    
 47 |     melt(id.vars = c("method","scenario","seed",".id"))
 48 | 
 49 | reslong.pos = inner_join(reslong,poslong)
 50 | reslong.neg = inner_join(reslong,neglong)
 51 | 
 52 | ash.hu.ft = reslong.pos %>% filter(method == "ash.hu.s" & 
 53 |                                    scenario == "flat-top")
 54 | ```
 55 | 
 56 | Extract an example to illustrate coverage of ash.hu.
 57 | 
 58 | ```{r show_example}
 59 | eg = load_example(dsc_shrink,42,"flat-top",methodname = "ash.hu.s",
 60 |                   homedir = "../code/dsc-shrink")
 61 | out        <- eg$output$fitted_g
 62 | class(out) <- "list"
 63 | out        <- as.data.frame(out)
 64 | print(subset(out,pi > 0.01))
 65 | ```
 66 | 
 67 | Notice how almost all the inferred weight is on a small positive component. 
 68 | As a result, false sign rate will be small, and there will be a strong tendency
 69 | to overestimate zero effects. This leads to coverage problems observed.
 70 | 
 71 | Now let's look at an example with u and spiky which seems to be somewhat 
 72 | badly calibrated for the negative discoveries.
 73 | 
 74 | ```{r show_more_examples}
 75 | ash.u.s.spiky = reslong.neg %>% filter(method == "ash.u.s" & 
 76 |                                        scenario == "spiky")
 77 | ash.n.s.spiky = reslong.neg %>% filter(method == "ash.n.s" & 
 78 |                                          scenario == "spiky")
 79 | hist(ash.u.s.spiky$value,nclass = 100,
 80 |      main = paste("histogram of quantile where observation falls in its CI"))
 81 | hist(ash.n.s.spiky$value,nclass = 100,
 82 |      main = paste("histogram of quantile where observation falls in its CI"))
 83 | ```
 84 | 
 85 | So what seems to be happening here is that the uniform tail is too short; 
 86 | when observation falls outside of this tail it gets a zero quantile of 
 87 | posterior interval.
 88 | 
 89 | Can we find an example illustrating this trend? (Note: the next chunk below
 90 | did not work for me, so I se* `eval=FALSE, include=FALSE`. Perhaps this can be 
 91 | fixed at some point. -Peter)
 92 | 
 93 | ```{r example_short_tail, eval=FALSE, include=FALSE}
 94 | table((ash.u.s.spiky %>% filter(value == 0))$seed)
 95 | eg = load_example(dsc_shrink,seed = 39,scenario = "spiky",
 96 |                   methodname = "ash.u.s","../code/dsc-shrink")
 97 | ```
 98 | 
 99 | For comparison, here are the positive discoveries; here they are not too bad.
100 | 
101 | ```{r histograms_pos}
102 | ash.u.s.spiky = reslong.pos %>% filter(method == "ash.u.s" & scenario == "spiky")
103 | ash.n.s.spiky = reslong.pos %>% filter(method == "ash.n.s" & scenario == "spiky")
104 | hist(ash.n.s.spiky$value)
105 | hist(ash.u.s.spiky$value)
106 | ```
107 | 
108 | ## Session information
109 | 
110 | ```{r info}
111 | sessionInfo()
112 | ```
113 | 


--------------------------------------------------------------------------------
/analysis/make_GOODPOOR_figs.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Make figures for simple simulation with good and poor precision observations"
  3 | author: "Matthew Stephens"
  4 | date: 2015-10-26
  5 | ---
  6 | 
  7 | **Last updated:** `r Sys.Date()`
  8 | 
  9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)`
 10 | 
 11 | First, we load the necessary libraries and function definitions.
 12 | 
 13 | ```{r packages}
 14 | library(ashr)
 15 | library(qvalue)
 16 | library(locfdr)
 17 | library(mixfdr)
 18 | library(ggplot2)
 19 | source("../R/plot_FDReg_hist.R")
 20 | ```
 21 | 
 22 | ```{r chunk_options, include=FALSE}
 23 | # Specify settings for displaying the plots in the rendered document.
 24 | source("chunk-options.R")
 25 | ```
 26 | 
 27 | Create a simple simulated data set to illustrate high and low signal. True 
 28 | values of beta simulated from $(0.5 N(0,2^2) + 0.5 \delta_0)$.
 29 | 
 30 | ```{r sim_data}
 31 | ntest = 10000
 32 | set.seed(112)
 33 | 
 34 | null_alt  = rbinom(ntest,1,0.5)
 35 | beta      = rnorm(ntest,0,sd = 1)
 36 | beta      = ifelse(null_alt == 1,beta,0)
 37 | GOOD      = 1:(ntest/2)
 38 | sebetahat = rep(1,ntest)
 39 | sebetahat[-GOOD] = 10
 40 | 
 41 | betahat = beta + rnorm(ntest, 0, sebetahat)
 42 | zscore  = betahat/sebetahat
 43 | pval    = pchisq(zscore^2,df = 1,lower.tail = F)
 44 | ```
 45 | 
 46 | Show how poor precision observations dilute good ones.
 47 | 
 48 | ```{r GOODPOOReg_hist, dev='pdf', fig.width=6.5, fig.height=3}
 49 | par(mai = c(0.3,0.3,0.2,0.2),mgp = c(3, 0.5, 0))
 50 | layout(matrix(1:3,ncol = 3,byrow = TRUE))
 51 | plot_FDReg_hist(pval[GOOD],1,type = 1,title = "Good-precision observations", 
 52 |                 ylab = "",nc = 20,cex.axis = 1,cex.main = 1.2,ylim = c(0,2.5))
 53 | plot_FDReg_hist(pval[-GOOD],1,type = 1,
 54 |                 title = "Poor-precision observations",ylab = "",nc = 20,
 55 |                 yaxt = 'n',cex.axis = 1,cex.main = 1.2,ylim = c(0,2.5))
 56 | axis(side = 2, labels = FALSE,tck = -0.01)
 57 | plot_FDReg_hist(pval,1,type = 1,title = "Combined",yaxt = 'n',ylab = "",
 58 |                 nc = 20,cex.axis = 1,cex.main = 1.2,ylim = c(0,2.5))
 59 | axis(side = 2, labels = FALSE,tck = -0.01)
 60 | ```
 61 | 
 62 | Apply alternative methods to the same data set:
 63 | 
 64 | ```{r other_methods}
 65 | res.qvalue = qvalue(pval)
 66 | res.locfdr = locfdr(zscore,nulltype = 0,plot = 0)
 67 | res.ash    = ash(betahat,sebetahat,method = "fdr",outputlevel = 4)
 68 | 
 69 | res.qvalue.good = qvalue(pval[GOOD])
 70 | res.locfdr.good = locfdr(zscore[GOOD],nulltype = 0,plot = 0)
 71 | res.ash.good    = ash(betahat[GOOD],sebetahat[GOOD],method = "fdr")
 72 | ```
 73 | 
 74 | Compare the ash's accuracy against the alternative approaches.
 75 | 
 76 | ```{r GOODPOOReg_scatter, dev='pdf', fig.width=6.5, fig.height=3}
 77 | res = rbind(data.frame(x    = res.qvalue.good$qvalues,
 78 |                        y    = res.qvalue$qvalues[GOOD],
 79 |                        type = "qvalue"),
 80 |             data.frame(x    = res.locfdr.good$fdr,
 81 |                        y    = res.locfdr$fdr[GOOD],
 82 |                        type = 'locfdr'), 
 83 |             data.frame(x    = get_lfsr(res.ash.good),
 84 |                        y    = get_lfsr(res.ash)[GOOD],
 85 |                        type = "ashr"))
 86 | 
 87 | pp = ggplot(data = res,aes(x,y)) + geom_point(shape = 1) +
 88 |   facet_grid(. ~ type) +
 89 |   geom_abline(colour = "red") +
 90 |   xlab("Analysing good-precision data only") +
 91 |   ylab("Analysing combined data")
 92 | print(pp + scale_y_continuous(limits = c(0,1)) +
 93 |       scale_x_continuous(limits = c(0,1))  +
 94 |       coord_equal(ratio = 1))
 95 | ```
 96 | 
 97 | Compare the LFSR against the p-values with different prior choices, and at
 98 | different levels of precision in the observations.
 99 | 
100 | ```{r lfsr_vs_pval_GOODPOOR, dev='pdf', fig.width=6.5, fig.height=3}
101 | make_df_for_pval = function(ash,method = "default")
102 |   data.frame(p      = pnorm(-abs(ash$data$x/ash$data$s)),
103 |              lfsr   = get_lfsr(ash),
104 |              s      = ash$data$s,
105 |              method = method)
106 | 
107 | plot_pval_vs_lfsr=function(df,plot.it=TRUE){
108 |   if (length(unique(df$s)) > 2) {
109 |     df$s = log(df$s)
110 |   } else { 
111 |     df$s = as.factor(df$s)
112 |   }
113 |   
114 |   p = ggplot(df,aes(x = p,y = lfsr,color = s)) + geom_point() +
115 |       facet_grid(. ~ method) + xlim(c(0, 0.025)) + xlab("p value") + 
116 |       ylab("lfsr")
117 |   
118 |   if (length(unique(df$s)) > 2)
119 |     p = p + scale_colour_gradient2(midpoint = 1,low = "blue",mid = "white", 
120 |                                    high = "red",space = "Lab")
121 |   if (plot.it)
122 |     print(p)
123 |   return(p)
124 | }
125 | 
126 | res.ash.ET = ash(betahat,sebetahat,method = "fdr",alpha = 1,
127 |                  mixcompdist = "normal",outputlevel = 4)
128 | p = plot_pval_vs_lfsr(rbind(
129 |   make_df_for_pval(res.ash,method = "Default prior (alpha=0)"),
130 |   make_df_for_pval(res.ash.ET,method = "p-value prior (alpha=1)")))
131 | print(p + theme(axis.text.x = element_text(size = 8,angle = 45)) + 
132 |       scale_colour_manual(values = c("blue","red")))
133 | ```
134 | 
135 | Now plot one at a time.
136 | 
137 | ```{r  lfsr_vs_pval_GOODPOOR_single, dev='pdf', fig.width=4.5, fig.height=3}
138 | p = plot_pval_vs_lfsr(make_df_for_pval(res.ash,method =" ash"))
139 | print(p + theme(axis.text.x = element_text(size = 8,angle = 45)))
140 | 
141 | p = plot_pval_vs_lfsr(make_df_for_pval(res.ash.ET,method = "ash (p-value prior)"))
142 | print(p + theme(axis.text.x = element_text(size = 8,angle = 45)))
143 | ```
144 | 
145 | Compare the log-likelihoods:
146 | 
147 | ```{r compare_ash_likelihoods}
148 | res.ash.ET$logLR
149 | res.ash$logLR
150 | ```
151 | 
152 | Plot number of true positives against false positives (an "ROC curve"):
153 | 
154 | ```{r tp_vs_fp, dev='pdf', fig.width=4.5, fig.height=3}
155 | df0 = cbind(make_df_for_pval(res.ash,method = "Default prior (alpha=0)"), 
156 |             beta = beta)
157 | df1 = cbind(make_df_for_pval(res.ash.ET,method = "p-value prior (alpha=1)"),
158 |             beta = beta)
159 |   
160 | # False positives (fp) and true positives (tp).
161 | fp = function(df) cumsum((df$beta == 0)[order(df$lfsr,decreasing = FALSE)])
162 | tp = function(df) cumsum((df$beta != 0)[order(df$lfsr,decreasing = FALSE)])
163 | df.fptp = rbind(data.frame(method = "ash lfsr",tp = tp(df0),fp = fp(df0)),
164 |                 data.frame(method = "p values",tp = tp(df1),fp = fp(df1)))
165 | ggplot(df.fptp,aes(x = fp,y = tp,col = method)) + geom_line() + 
166 |   xlab("False positives") + ylab("True positives") + xlim(c(0,100)) + 
167 |   ylim(c(0,400)) + scale_colour_manual(values = c("black","green")) 
168 | ```
169 | 
170 | ## Session information
171 | 
172 | ```{r info}
173 | sessionInfo()
174 | ```
175 | 


--------------------------------------------------------------------------------
/analysis/makefig_FDReg.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Some figures to illustrate basic ideas in FDR"
  3 | author: "Matthew Stephens"
  4 | date: 2015-10-26
  5 | ---
  6 | 
  7 | **Last updated:** `r Sys.Date()`
  8 | 
  9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)`
 10 | 
 11 | This file makes two figures:
 12 | 
 13 | 1. Illustrates basic idea of way q value does FDR analysis in p value space.
 14 | 
 15 | 2. Compares methods in the way they decompose p values and z scores into
 16 | two groups.
 17 | 
 18 | First, we load the necessary libraries and function definitions.
 19 | 
 20 | ```{r packages}
 21 | library(ashr)
 22 | library(qvalue)
 23 | library(locfdr)
 24 | library(mixfdr)
 25 | source("../R/plot_FDReg_hist.R")
 26 | source("../R/nullalthist.R")
 27 | ```
 28 | 
 29 | ```{r chunk_options, include=FALSE}
 30 | # Specify settings for displaying the plots in the rendered document.
 31 | source("chunk-options.R")
 32 | ```
 33 | 
 34 | ## Simple simulated example
 35 | 
 36 | Generate a small data set, and run the different methods on this data set.
 37 | 
 38 | ```{r qvalue_analysis}
 39 | 
 40 | # ncz is the number of bins in the z score histograms.
 41 | ncz   = 100
 42 | ntest = 10000
 43 | set.seed(111)
 44 | 
 45 | # Simulate (with all tests alternative, true effects $\beta \sim N(0,1)$).
 46 | beta      = rnorm(ntest)
 47 | sebetahat = 1
 48 | betahat   = beta + rnorm(ntest,0,sebetahat)
 49 | zscore    = betahat/sebetahat
 50 | pval      = pchisq(zscore^2,df = 1,lower.tail = FALSE)
 51 | 
 52 | # Apply the different methods.
 53 | res.qvalue = qvalue(pval)
 54 | res.locfdr = locfdr(zscore,nulltype = 0,plot = 0)
 55 | res.mixfdr = mixFdr(zscore,noiseSD = 1,theonull = TRUE,plot = FALSE)
 56 | res.ash    = ash(betahat,1,method = "fdr")
 57 | 
 58 | # Roughly compute a local fdr for qvalue (to aid the plotting of the
 59 | # decomposition in zscore space) in each bin of histogram. I set the 
 60 | # threshold at 1 so that fdr is never larger than 1.
 61 | temp     = hist(pval,nclass = 50)
 62 | bin_fdr  = res.qvalue$pi0/temp$density 
 63 | qval_fdr = bin_fdr[as.numeric(cut(pval,temp$breaks))]
 64 | qval_fdr = pmin(1,qval_fdr)
 65 | qval_fdr = qvalue::lfdr(pval)
 66 | ```
 67 | 
 68 | Here's an example of how qvalue decomposes $p$ values into null and alternative components. First the $p$ values:
 69 | 
 70 | ```{r plot_pvalues}
 71 | plot_FDReg_hist(pval,res.qvalue$pi0,type = 1,title = "p values",
 72 |                 cex.axis = 0.8,cex.main = 0.8)
 73 | ```
 74 | 
 75 | Now plot the decomposition:
 76 | 
 77 | ```{r decompose_pvalues}
 78 | plot_FDReg_hist(pval,res.qvalue$pi0,type = 4,cex.axis = 0.8,yaxt = 'n',
 79 |                 textsize = 0.9,cex.main = 0.8,
 80 |                 title = "Decomposition into null/alternative")
 81 | axis(side = 2,labels = FALSE,tck = -0.01)
 82 | ```
 83 | 
 84 | Show the distribution of the p-values and the z-scores, stratified by their
 85 | classification into the "null" and "alternative".
 86 | 
 87 | ```{r decomp_ZA, dev='pdf', fig.width=6, fig.height=6, crop=TRUE}
 88 | layout(matrix(1:12,ncol = 3,byrow = FALSE))
 89 | plotlabel = function(label,cex = 1.5){
 90 |   plot(0,0,type = "n",axes = FALSE,xlab = "",ylab = "")
 91 |   text(0,0,label,cex = cex)
 92 | }
 93 | plotlabel("qvalue")
 94 | plotlabel("locfdr")
 95 | plotlabel("mixfdr")
 96 | plotlabel("ash")
 97 | 
 98 | par(mar = c(1.3,2,1.5,0.2),mgp = c(3, 0.5, 0))
 99 | 
100 | # p value histograms
101 | altnullhist(pval,qval_fdr,main = "p values",ncz = 50,xaxt = 'n',cex.axis = 0.8)
102 | axis(side = 1, labels = FALSE,tck = -0.01)
103 | altnullhist(pval,res.locfdr$fdr,main = "",ncz = 50,xaxt = 'n',cex.axis = 0.8)
104 | axis(side = 1, labels = FALSE,tck = -0.01)
105 | altnullhist(pval,res.mixfdr$fdr,main = "",ncz = 50,xaxt = 'n',cex.axis = 0.8)
106 | axis(side = 1, labels = FALSE,tck = -0.01)
107 | altnullhist(pval,get_lfdr(res.ash),main = "",ncz = 50,xaxt = 'n',cex.axis = 0.8)
108 | axis(side = 1, labels = TRUE,tck = -0.01,cex.axis = 0.8)
109 | 
110 | # z score histograms
111 | nullalthist(zscore,qval_fdr,main = "z scores",ncz = 50,xaxt = 'n',
112 |             cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6))
113 | axis(side = 1,labels = FALSE,tck = -0.01)
114 | nullalthist(zscore,res.locfdr$fdr,main = "",ncz = 50,xaxt = 'n',
115 |             cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6))
116 | axis(side = 1,labels = FALSE,tck = -0.01)
117 | nullalthist(zscore,res.mixfdr$fdr,main = "",ncz = 50,xaxt = 'n',
118 |             cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6))
119 | axis(side = 1,labels = FALSE,tck = -0.01)
120 | nullalthist(zscore,get_lfdr(res.ash),main = "",ncz = 50,xaxt = 'n',
121 |             cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6))
122 | axis(side = 1,labels = TRUE,tck = -0.01,cex.axis = 0.8)
123 | ```
124 | 
125 | This one is a different layout (4 columns, 2 rows) for my Tukey poster
126 | 
127 | ```{r decomp_ZA_poster, dev='pdf', fig.width=6, fig.height=2, crop=TRUE}
128 | # pdf("../figures/decomp_ZA_poster.pdf",width=6,height=2)
129 | layout(matrix(1:8,ncol = 4,byrow = TRUE))
130 | par(mar = c(1.3,2,1.5,0.2),mgp = c(3, 0.5, 0))
131 | ncz = 25
132 | 
133 | # p value histograms
134 | altnullhist(pval,qval_fdr,main = "p values: qvalue",ncz = ncz,
135 |             xaxt = 'n',cex.axis = 0.8)
136 | # plot_FDReg_hist(pval,res.qvalue$pi0,type = 2,title = "p values", 
137 | #                 xaxt = 'n',cex.axis = 0.8)
138 | axis(side = 1,labels = FALSE,tck = -0.01)
139 | # mtext(side = 3,"p values",line = 1)
140 | altnullhist(pval,res.locfdr$fdr,main = "locfdr",ncz = ncz,xaxt = 'n',
141 |             cex.axis = 0.8)
142 | axis(side = 1,labels = FALSE,tck = -0.01)
143 | altnullhist(pval,res.mixfdr$fdr,main = "mixfdr",ncz = ncz,xaxt = 'n',
144 |             cex.axis = 0.8)
145 | axis(side = 1,labels = FALSE,tck = -0.01)
146 | altnullhist(pval,get_lfdr(res.ash),main = "ash",ncz = ncz,xaxt = 'n',
147 |             cex.axis = 0.8)
148 | axis(side = 1,labels = TRUE,tck = -0.01,cex.axis = 0.8)
149 | # mtext(side = 1,"p values",line = 2)
150 | 
151 | #z score histograms
152 | nullalthist(zscore,qval_fdr,main = "z scores: qvalue",ncz = ncz,xaxt = 'n',
153 |             cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6))
154 | axis(side = 1,labels = FALSE,tck = -0.01)
155 | # mtext(side = 3,"z scores",line = 1)
156 | nullalthist(zscore,res.locfdr$fdr,main = "locfdr",ncz = ncz,xaxt = 'n',
157 |             cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6))
158 | axis(side = 1,labels = FALSE,tck = -0.01)
159 | nullalthist(zscore,res.mixfdr$fdr,main = "mixfdr",ncz = ncz,xaxt = 'n',
160 |             cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6))
161 | axis(side = 1,labels = FALSE,tck = -0.01)
162 | nullalthist(zscore,get_lfdr(res.ash),main = "ash",ncz = ncz,xaxt = 'n',
163 |             cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6))
164 | axis(side = 1,labels = TRUE,tck = -0.01,cex.axis = 0.8)
165 | # mtext(side = 1,"z scores",line = 2)
166 | # dev.off()
167 | ```
168 | 
169 | ## Session information
170 | 
171 | ```{r info}
172 | sessionInfo()
173 | ```
174 | 


--------------------------------------------------------------------------------
/analysis/metaplot_examples.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "DDD plots"
 3 | author: "Matthew Stephens"
 4 | date: 2016-10-25
 5 | ---
 6 | 
 7 | **Last updated:** `r Sys.Date()`
 8 | 
 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)`
10 | 
11 | This file produces ``meta-analysis" style plots that illustrate the shrinkage behaviour of ash on some simple simulated data.
12 | 
13 | First, we load the necessary libraries.
14 | 
15 | ```{r packages}
16 | library(knitr)
17 | library(rmeta)
18 | library(ashr)
19 | ```
20 | 
21 | ```{r chunk_options, include=FALSE}
22 | # Specify settings for displaying the plots in the rendered document.
23 | source("chunk-options.R")
24 | opts_chunk$set(echo = TRUE)
25 | ```
26 | 
27 | ## Simulate Data
28 | 
29 | We simulate 50 effects ($\beta$), half of which are truly "null" (ie 0) and 
30 | the other half are $N(0,2^2)$. The observed effects ($\hat\beta$) all have 
31 | standard error 1 here.
32 | 
33 | ```{r chunk_label}
34 | set.seed(1)
35 | n        = 50
36 | beta     = c(rep(0,n/2),rnorm(n/2,0,2))
37 | betahat  = beta + rnorm(n)
38 | beta.ash = ash(betahat,1)
39 | ```
40 | 
41 | ## Meta Plots
42 | 
43 | Here we plot the standard Confidence Intervals, ordered by the observed effect 
44 | sizes. The red colors indicate observations corresponding to effects that were 
45 | truly 0.
46 | 
47 | ```{r plot_unshrunk}
48 | i = order(betahat)
49 | metaplot(betahat[i],rep(1,n),ylab = "",xlab = "",xlim = c(-6,6),
50 |          colors = meta.colors(box = c(rep("red",n/2),rep("black",n/2))[i]))
51 | ```
52 | 
53 | Here we plot the *shrunken* Confidence Intervals (more formally, posterior 
54 | Credible Intervals), in the same order, with same color code. (We use the
55 | approximation of the posterior mean $\pm$ 2 posterior standard deviation; 
56 | really we should compute the posterior intervals using `ashci`, but this 
57 | simple approximation suffices for illustrating the idea.)
58 | 
59 | Notice how the CIs are strongly shrunk towards 0, except for those that 
60 | correspond to large effects, where the shrinkage is more modest. What is 
61 | happening here is that ash recognizes that although many effects are null, 
62 | others are quite big, so it avoids shrinking too much the intervals whose 
63 | data are sufficiently strong to convince it that they are non-null.
64 | 
65 | ```{r plot_shrunk}
66 | metaplot(get_pm(beta.ash)[i],get_psd(beta.ash)[i],
67 |          ylab = "",xlab = "",xlim = c(-6,6),
68 |          colors = meta.colors(box = c(rep("red",n/2),rep("black",n/2))[i]))
69 | ```
70 | 


--------------------------------------------------------------------------------
/analysis/plot_cdf_eg.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Various examples of cdf plots"
  3 | author: "Matthew Stephens"
  4 | date: '2015-10-26'
  5 | ---
  6 | 
  7 | **Last updated:** `r Sys.Date()`
  8 | 
  9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)`
 10 | 
 11 | First, we load the necessary libraries and useful function definitions.
 12 | 
 13 | ```{r packages}
 14 | library(plyr)
 15 | library(dplyr)
 16 | library(magrittr)
 17 | library(ashr)
 18 | library(ggplot2)
 19 | library(dscr)
 20 | library(REBayes)
 21 | library(ashr)
 22 | source("../code/dsc-shrink/methods/mixfdr.wrapper.R")
 23 | source("../R/set_plot_colors.R")
 24 | ```
 25 | 
 26 | ```{r chunk_options, include=FALSE}
 27 | # Specify settings for displaying the plots in the rendered document.
 28 | source("chunk-options.R")
 29 | ```
 30 | 
 31 | Define a couple functions that compile CDF data in such a way that it is 
 32 | easy to plot the CDFs using ggplot2.
 33 | 
 34 | ```{r chunk_label}
 35 | load("../output/dsc-shrink-files/res.RData")
 36 | 
 37 | # df is a list with components method,seed, scenario
 38 | # cdf is evaluated at x
 39 | # returns list of x,y,pi0, where cdf values are in y
 40 | get_cdf = function (df,dsc = dsc_shrink,x = seq(-6,6,length = 100),
 41 |                     homedir = "../code/dsc-shrink") {
 42 |   m = df$method
 43 |   if (m == "truth")
 44 |     m = "ash.n"
 45 |   temp = load_example(dsc,df$seed,df$scenario,m,homedir)
 46 |   pi0  = temp$meta$pi0
 47 |   
 48 |   if (df$method == "truth") {
 49 |     s    = dsc$scenarios[[df$scenario]]
 50 |     galt = s$args$g
 51 |     g    = normalmix(c(pi0,(1 - pi0) * mixprop(galt)),
 52 |                      c(0,comp_mean(galt)),
 53 |                      c(0,comp_sd(galt)))
 54 |   } else {
 55 |     if (grepl("mixfdr",df$method)) {
 56 |       temp$output$fitted_g = mixfdr2fitted.g(temp$output)$fitted.g
 57 |     }
 58 |     g = temp$output$fitted_g
 59 |   }
 60 |   return(data.frame(x = x,y = as.numeric(mixcdf(g,x)),pi0 = pi0))
 61 | }
 62 | 
 63 | plot_mean_cdf = 
 64 |   function(SEEDS,
 65 |            PLOTMETHODS   = c("ash.n","ash.u","ash.hu","truth","mixfdr.tnull"),
 66 |            PLOTSCENARIOS = c("spiky","near-normal","flat-top","skew","bimodal"),
 67 |            pi0filter     = FALSE,...) {
 68 |   PLOTNAMES = PLOTSCENARIOS
 69 |   
 70 |   #set up dataframe with cdf for all methods and all datasets
 71 |   df     = expand.grid(seed = SEEDS,scenario = PLOTSCENARIOS,
 72 |                        method = PLOTMETHODS,stringsAsFactors = FALSE)
 73 |   df.cdf = ddply(df,.(seed,scenario,method),get_cdf)
 74 |   
 75 |   if (pi0filter)
 76 |     df.cdf %<>% filter(pi0 < 0.55 & pi0 > 0.45)
 77 |   if (length(SEEDS) > 1)
 78 |     df.cdf %<>% group_by(x,method,scenario) %>% summarise(y = mean(y))
 79 |   
 80 |   df.cdf$scenario         = factor(df.cdf$scenario,levels = PLOTSCENARIOS)
 81 |   levels(df.cdf$scenario) = PLOTNAMES
 82 | 
 83 |   return(ggplot(df.cdf,aes(x = x,y = y,color = method),...) + colScale + 
 84 |     geom_line(lwd = 1.5,alpha = 0.7) + facet_grid(.~scenario) + 
 85 |       theme(legend.position = "bottom"))
 86 | }
 87 | ```
 88 | 
 89 | Show the CDFs for all methods and all scenarios, based on a single data set.
 90 | 
 91 | ```{r egcdf, fig.height=3, fig.width=9}
 92 | # These chunk options were used to create the figure for the paper:
 93 | # dev='pdf', fig.height=3, fig.width=9, crop=TRUE
 94 | plot_mean_cdf(1)
 95 | ```
 96 | 
 97 | Show the CDFs for all scenarios, this time averaged over 100 simulated data 
 98 | sets.
 99 | 
100 | ```{r mean_cdf, fig.height=3, fig.width=9}
101 | # These chunk options were used to create the figure for the paper:
102 | # dev='pdf', fig.height=3, fig.width=9, crop=TRUE
103 | plot_mean_cdf(1:100,PLOTMETHODS = c("ash.n","ash.u","ash.hu","truth"),
104 |               pi0filter = TRUE)
105 | ```
106 | 
107 | Same as above, but with custom colours.
108 | 
109 | ```{r mean_cdf_nopen, fig.height=3, fig.width=9}
110 | # These chunk options were used to create the figure for the paper:
111 | # dev='pdf', fig.height=3, fig.width=9, crop=TRUE
112 | names(myColors) <- c("truth","ash.hu.s","ash.n.s","ash.u.s","qvalue",
113 |                      "locfdr","mixfdr.tnull")
114 | colScale <- scale_colour_manual(name = "method",values = myColors)
115 | plot_mean_cdf(1:100,PLOTMETHODS = c("ash.n.s","ash.u.s","ash.hu.s","truth"),
116 |               pi0filter = TRUE)
117 | 
118 | # Reset color scale for other plots.
119 | source("../R/set_plot_colors.R") 
120 | ```
121 | 
122 | The following plots have fewer methods and scenarios for more clarity
123 | 
124 | ```{r egcdf-reduce, fig.height=3.5, fig.width=7}
125 | # These chunk options were used to create the figure for the paper:
126 | # fig.height=3, fig.width=6, crop=TRUE, dev='pdf'
127 | plot_mean_cdf(1,PLOTMETHODS = c("ash.n","truth"),
128 |               PLOTSCENARIOS = c("spiky","near-normal","bimodal"))
129 | ```
130 | 
131 | Same as above, but this time the CDFs are averaged over 100 simulated data sets.
132 | 
133 | ```{r mean_cdf-reduce, fig.height=3.5, fig.width=7}
134 | # These chunk options were used to create the figure for the paper:
135 | # dev='pdf', fig.height=3, fig.width=6, crop=TRUE
136 | plot_mean_cdf(1:100,PLOTMETHODS = c("ash.n","truth"),
137 |               PLOTSCENARIOS = c("spiky","near-normal","bimodal"),
138 |               pi0filter = TRUE)
139 | ```
140 | 
141 | Now with custom colours.
142 | 
143 | ```{r mean_cdf_nopen-reduce, fig.height=3.5, fig.width=7}
144 | # These chunk options were used to create the figure for the paper:
145 | # fig.height=3, fig.width=6, crop=TRUE, dev='pdf'
146 | names(myColors) <- c("truth","ash.hu.s","ash.n.s","ash.u.s","qvalue","locfdr",
147 |                      "mixfdr.tnull")
148 | colScale        <- scale_colour_manual(name = "method",values = myColors)
149 | plot_mean_cdf(1:100,PLOTMETHODS = c("ash.n.s","truth"),
150 |               PLOTSCENARIOS = c("spiky","near-normal","bimodal"),
151 |               pi0filter = TRUE)
152 | source("../R/set_plot_colors.R")
153 | ```
154 | 
155 | I wanted to add the npmle method (fit using the GLmix function) to the plot...
156 | 
157 | ```{r egcdf-reduce-with-npmle, fig.height=5.25, fig.width=7}
158 | # These chunk options were used to create the figure for the paper:
159 | # dev='pdf', fig.height=4.5, fig.width=6, crop=TRUE
160 | run_nplme = function (SEED = 1,PLOTSCENARIOS = c("spiky","near-normal",
161 |                                                  "flat-top","skew","bimodal")) {
162 |   df = data.frame(seed = NULL,scenario = NULL,method = NULL,x = NULL,
163 |                   y = NULL,pi0 = NULL)
164 |   for (SCENARIO in PLOTSCENARIOS) {
165 |     temp = load_example(dsc_shrink,SEED,SCENARIO,"ash.n","../output/dsc-shrink-files/")
166 |     z    = GLmix(temp$input$betahat)
167 |     df   = rbind(df,data.frame(seed = SEED,scenario = SCENARIO,method = "NPMLE",
168 |                                x = z$x,y = cumsum(z$y)/sum(z$y),pi0 = NA))
169 |   }
170 |   return(df)
171 | }
172 | 
173 | plot_mean_cdf_with_npmle = 
174 |   function(SEED          = 1,
175 |            PLOTMETHODS   = c("ash.n","ash.u","ash.hu","truth","mixfdr.tnull"),
176 |            PLOTSCENARIOS = c("spiky","near-normal","flat-top","skew","bimodal"),
177 |            pi0filter     = FALSE,...) {
178 |   if (length(SEED) > 1)
179 |     stop("plot with npmle only implemented for a single seed")
180 |   PLOTNAMES = PLOTSCENARIOS
181 |   
182 |   # Set up dataframe with cdf for all methods and all datasets.
183 |   df = expand.grid(seed = SEED,scenario = PLOTSCENARIOS,method = PLOTMETHODS,
184 |                    stringsAsFactors = FALSE)
185 |   df.cdf = ddply(df,.(seed,scenario,method),get_cdf)
186 |   
187 |   df.cdf$scenario         = factor(df.cdf$scenario,levels = PLOTSCENARIOS)
188 |   levels(df.cdf$scenario) = PLOTNAMES
189 |   
190 |   df.npmle = run_nplme(SEED,PLOTSCENARIOS)
191 |   df.cdf   = rbind(df.cdf,df.npmle)
192 |   
193 |   p = ggplot(df.cdf,aes(x = x,y = y,color = method),...) + 
194 |     xlim(c(-4,4)) + colScale + geom_line(lwd = 1.2,alpha = 0.7) + 
195 |     theme(legend.position = "bottom")
196 |   return(p)
197 | }
198 | 
199 | plot_mean_cdf_with_npmle(1,PLOTMETHODS = c("ash.hu","truth"),
200 |                          PLOTSCENARIOS = c("spiky","near-normal","flat-top",
201 |                                            "skew","bimodal")) +
202 |   facet_wrap(~ scenario, nrow = 2)
203 | ```
204 | 
205 | ## Session information
206 | 
207 | ```{r info}
208 | sessionInfo()
209 | ```
210 | 


--------------------------------------------------------------------------------
/analysis/plot_egdens.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Plot densities for simulation studies"
 3 | author: "Matthew Stephens"
 4 | date: 2015-10-26
 5 | ---
 6 | 
 7 | **Last updated:** `r Sys.Date()`
 8 | 
 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)`
10 | 
11 | First, we load the necessary libraries.
12 | 
13 | ```{r packages}
14 | library(ashr)
15 | library(ggplot2)
16 | library(dplyr)
17 | ```
18 | 
19 | ```{r chunk_options, include=FALSE}
20 | # Specify settings for displaying the plots in the rendered document.
21 | source("chunk-options.R")
22 | ```
23 | 
24 | Load the results of the simulation experiments, and generate the density
25 | data for all the simulation scenarios in a single data frame ("df").
26 | 
27 | ```{r retrieve_sim_results}
28 | load("../output/dsc-shrink-files/res.RData")
29 | PLOTSCENARIOS = c("spiky","near-normal","flat-top","skew","bimodal")
30 | PLOTNAMES     = PLOTSCENARIOS
31 | 
32 | df = data.frame()
33 | for(i in PLOTSCENARIOS) {
34 |   s  = dsc_shrink$scenarios[[i]]
35 |   g  = s$args$g
36 |   x  = seq(-6,6,length = 100)
37 |   y  = as.numeric(dens(g,x))
38 |   df = rbind(df,data.frame(x = x,y = y,scenario = i))
39 | }
40 | 
41 | df$scenario         = factor(df$scenario,levels = PLOTSCENARIOS)
42 | levels(df$scenario) = PLOTNAMES
43 | ```
44 | 
45 | Generate density plots using ggplot.
46 | 
47 | ```{r scenario_density, dev='pdf',fig.height=3, fig.width=9, crop=TRUE}
48 | ggplot(df,aes(x = x,y = y)) + 
49 |   geom_line(size = 1.2,linetype = 1) + 
50 |   facet_grid(.~scenario) + 
51 |   ylab("density")
52 | ```
53 | 
54 | ## Session information
55 | 
56 | ```{r info}
57 | sessionInfo()
58 | ```
59 | 
60 | 


--------------------------------------------------------------------------------
/analysis/plot_egdens2.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Plot two densities for strong vs weak shrinkage"
 3 | author: "Matthew Stephens"
 4 | date: 2016-10-25
 5 | ---
 6 | 
 7 | **Last updated:** `r Sys.Date()`
 8 | 
 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)`
10 | 
11 | ```{r chunk_options, include=FALSE}
12 | # Specify settings for displaying the plots in the rendered document.
13 | source("chunk-options.R")
14 | ```
15 | 
16 | ```{r}
17 | library("ashr")
18 | library("ggplot2")
19 | library("dplyr")
20 | library("dscr")
21 | load("../output/dsc-shrink-files/res.RData")
22 | PLOTSCENARIOS = c("spiky","big-normal")
23 | PLOTNAMES = c("Strong Shrinkage","Weak Shrinkage")
24 | 
25 | df=data.frame()
26 | 
27 | for(i in PLOTSCENARIOS){
28 |   s=dsc_shrink$scenarios[[i]]
29 |   g=s$args$g
30 |   x = seq(-6,6,length=100)
31 |   y = as.numeric(dens(g,x))
32 |   df = rbind(df,data.frame(x=x,y=y,scenario=i))
33 | }
34 | 
35 | 
36 | df$scenario = factor(df$scenario,levels=PLOTSCENARIOS)
37 | levels(df$scenario)= PLOTNAMES
38 | ```
39 | 
40 | ```{r scenario_density_2, dev='pdf',fig.height=3, fig.width=9, crop=TRUE}
41 | #pdf(PLOTFILE,height=3,width=9)
42 | ggplot(df, aes(x=x,y=y)) + geom_line(size=1.2,linetype=1) + facet_grid(.~scenario) + ylab("density")
43 | #dev.off()
44 | ```
45 | 
46 | 
47 | ## Session information
48 | 
49 | ```{r info}
50 | sessionInfo()
51 | ```
52 | 
53 | 


--------------------------------------------------------------------------------
/analysis/plot_lfsr.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Plot local false sign rate results"
  3 | author: "Matthew Stephens"
  4 | date: 2015-10-26
  5 | ---
  6 | 
  7 | **Last updated:** `r Sys.Date()`
  8 | 
  9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)`
 10 | 
 11 | First, we load the necessary libraries and other useful function definitions.
 12 | 
 13 | ```{r packages}
 14 | library(dplyr)
 15 | library(reshape2)
 16 | library(ggplot2)
 17 | source("../R/set_plot_colors.R")
 18 | ```
 19 | 
 20 | ```{r chunk_options, include=FALSE}
 21 | # Specify settings for displaying the plots in the rendered document.
 22 | source("chunk-options.R")
 23 | ```
 24 | 
 25 | Load the results of the simulation experiments generated by 
 26 | code/dsc-shrink/run_dsc.R, and prepare the posterior statistics for 
 27 | plotting.
 28 | 
 29 | ```{r chunk_label}
 30 | load("../output/dsc-shrink-files/res.RData")
 31 | 
 32 | #' @param df dataframe of scores for many methods/scenrios etc
 33 | #' @return tall dataframe with columns of scores for each method and the "goldmethod" against which plot is to be made
 34 | process_score_for_plotting_against_gold = 
 35 |   function(df, 
 36 |            PLOTMETHODS   = c("ash.n","ash.u","ash.hu"),
 37 |            GOLDMETHOD    = "bayes",PLOTSEEDS = 1:100,
 38 |            PLOTSCENARIOS = c("spiky","near-normal","flat-top","skew",
 39 |                              "big-normal","bimodal"),
 40 |            PLOTNAMES     = PLOTSCENARIOS) {
 41 |   df %<>% filter(seed %in% PLOTSEEDS) %>% 
 42 |     filter(scenario %in% PLOTSCENARIOS) %>% 
 43 |     filter(method %in% c(PLOTMETHODS,GOLDMETHOD))
 44 |   df$scenario = factor(df$scenario,levels = PLOTSCENARIOS)
 45 |   levels(df$scenario) = PLOTNAMES
 46 |   
 47 |   # Create "tall"" version of dataframe.
 48 |   df %<>% select(-user.self,-sys.self,-elapsed,-user.child,-sys.child) %>%
 49 |     melt(id.vars = c("method","scenario","seed",".id"),value.name = "val")
 50 | 
 51 |   #separate bayes and remainder
 52 |   df.bayes = df %>% filter(method == GOLDMETHOD)
 53 |   df.rest  = df %>% filter(method != GOLDMETHOD)
 54 | 
 55 |   # Join bayes with others, so each line has both the bayes and the 
 56 |   # non-bayes version.
 57 |   return(inner_join(df.bayes,df.rest,by = c("scenario","seed","variable")))
 58 | }
 59 | 
 60 | plot_lfsr = function(lfsr,xlab = "True lfsr",ylab = "Estimated lfsr",
 61 |                      xlim = c(0,0.2),ylim = c(0,0.2),
 62 |                      legend.position = "bottom")
 63 |   ggplot(lfsr,aes(val.x,val.y,colour = method.y)) +
 64 |     facet_grid(. ~ scenario) +
 65 |     guides(alpha = FALSE) +
 66 |     geom_abline(colour = "black") +
 67 |     geom_abline(colour = "red",slope = 2) +
 68 |     xlab(xlab) + ylab(ylab) +
 69 |     geom_point(shape = 1,size = 0.1,alpha = 0.2) +
 70 |     scale_y_continuous(limits = ylim) +
 71 |     scale_x_continuous(limits = xlim)
 72 | 
 73 | lfsr = process_score_for_plotting_against_gold(res$lfsr,PLOTSEEDS = 1:100,
 74 |                                                PLOTMETHODS = "ash.n")
 75 | lfdr = process_score_for_plotting_against_gold(res$lfdr,PLOTSEEDS = 1:100,
 76 |                                                PLOTMETHODS = "ash.n")
 77 | 
 78 | p1 = plot_lfsr(lfsr,ylim = c(0,1),xlim = c(0,0.2))
 79 | p2 = plot_lfsr(lfdr,ylim = c(0,1),xlim = c(0,0.2),
 80 |                xlab = "True lfdr",ylab = "Estimated lfdr")
 81 | ```
 82 | 
 83 | Separately for each model, create a scatterplot comparing the estimated LFSR 
 84 | against the "gold-standard" LFSR.
 85 | 
 86 | ```{r plot_lfsr, dev=c('png','pdf'), fig.width=9, fig.height=3, crop=TRUE}
 87 | print(p1 + theme(legend.position = "none",
 88 |                  axis.text.x = element_text(size = 8,angle = 45)) +
 89 |       coord_equal(ratio = 1/5) + colScale)
 90 | ```
 91 | 
 92 | Separately for each model, create a scatterplot comparing the estimated LFDR 
 93 | against the "gold-standard" LFDR.
 94 | 
 95 | ```{r plot_lfdr, dev=c('png','pdf'), fig.width=9, fig.height=3, crop=TRUE}
 96 | print(p2 + theme(legend.position = "none",
 97 |                  axis.text.x = element_text(size = 8,angle = 45)) +
 98 |       coord_equal(ratio = 1/5) + colScale)
 99 | ```
100 | 
101 | Separately for each of the ash.n.s methods, create a scatterplot comparing the 
102 | estimated LFSR against the "gold-standard" LFSR.
103 | 
104 | ```{r plot_lfsr_s, dev=c('png','pdf'), fig.width=9, fig.height=3, crop=TRUE}
105 | lfsr.s = process_score_for_plotting_against_gold(res$lfsr,PLOTSEEDS = 1:100,
106 |                                                  PLOTMETHODS = "ash.n.s")
107 | p1.s   = plot_lfsr(lfsr.s,ylim = c(0,1),xlim = c(0,0.2))
108 | print(p1.s + theme(legend.position = "none",
109 |                    axis.text.x = element_text(size = 8,angle = 45)) +
110 |       coord_equal(ratio = 1/5))
111 | ```
112 | 
113 | Separately for each of the ash.n.s methods with the -nn data sets, 
114 | create a scatterplot comparing the estimated LFSR against the 
115 | "gold-standard" LFSR.
116 | 
117 | ```{r plot_lfsr_s_nn, dev=c('png','pdf'), fig.width=9, fig.height=3, crop=TRUE}
118 | lfsr.s.nn = 
119 |   process_score_for_plotting_against_gold(res$lfsr,PLOTSEEDS = 1:100,
120 |     PLOTMETHODS = "ash.n.s",
121 |     PLOTSCENARIOS = paste0(c("spiky","near-normal","flat-top","skew",
122 |                              "big-normal","bimodal"),"-nn"))
123 | p1.s.nn = plot_lfsr(lfsr.s.nn,ylim = c(0,1),xlim = c(0,0.2))
124 | print(p1.s.nn + theme(legend.position = "none",
125 |                       axis.text.x = element_text(size = 8,angle = 45)) +
126 |       coord_equal(ratio = 1/5))
127 | ```
128 | 
129 | ## Session information.
130 | 
131 | ```{r info}
132 | print(sessionInfo())
133 | ```
134 | 


--------------------------------------------------------------------------------
/analysis/plot_pi0est.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Plot estimates of pi0"
 3 | author: "Matthew Stephens"
 4 | date: 2015-10-26
 5 | ---
 6 | 
 7 | **Last updated:** `r Sys.Date()`
 8 | 
 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)`
10 | 
11 | First, we load the necessary libraries, and specify settings for
12 | displaying the plots in the rendered document.
13 | 
14 | ```{r packages}
15 | library(magrittr)
16 | library(dplyr)
17 | source("../R/set_plot_colors.R")
18 | ```
19 | 
20 | ```{r chunk_options, include=FALSE}
21 | # Specify settings for displaying the plots in the rendered document.
22 | source("chunk-options.R")
23 | ```
24 | 
25 | Generate scatterplots compare the estimated estimate of pi0 against the 
26 | ground-truth value, colored by the method used, and split by the scenario
27 | type.
28 | 
29 | ```{r plot_pi0est, dev='pdf', crop=TRUE}
30 | load("../output/dsc-shrink-files/res.RData")
31 | 
32 | PLOTMETHODS   = c("mixfdr.tnull","ash.n","ash.u","qvalue","locfdr")
33 | PLOTSCENARIOS = c("spiky","near-normal","flat-top","skew","bimodal")
34 | PLOTNAMES     = PLOTSCENARIOS
35 | ALPHALEVEL    = 0.8 # controls transparency
36 | 
37 | df = res$pi0_score %>% 
38 |        filter(scenario %in% PLOTSCENARIOS) %>% 
39 |        filter(method %in% PLOTMETHODS)
40 | df$scenario = factor(df$scenario,levels=PLOTSCENARIOS)
41 | levels(df$scenario) = PLOTNAMES
42 | 
43 | pi0_plot = ggplot(df,aes(pi0,pi0_est,colour = method,alpha = ALPHALEVEL)) +
44 |   geom_point(shape = 1) +
45 |   facet_grid(. ~ scenario) + 
46 |   guides(alpha = FALSE) +
47 |   geom_abline(colour = "black") +
48 |   xlab("True pi0") +
49 |   ylab("Estimated pi0") 
50 | print(pi0_plot + scale_y_continuous(limits = c(0,1.01)) +
51 |         scale_x_continuous(limits = c(0,1.01)) +
52 |         coord_equal(ratio = 1) + 
53 |         colScale + 
54 |         theme(legend.position = "top",
55 |               axis.text.x     = element_text(size = 8,angle = 45)))
56 | ```
57 | 
58 | ## Session information
59 | 
60 | ```{r info}
61 | sessionInfo()
62 | ```
63 | 


--------------------------------------------------------------------------------
/analysis/referee.response.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Response to referees"
  3 | author: "Matthew Stephens"
  4 | date: 2016-03-21
  5 | ---
  6 | 
  7 | **Last updated:** `r Sys.Date()`
  8 | 
  9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)`
 10 | 
 11 | ```{r chunk_options, include=FALSE}
 12 | # Specify settings for displaying the plots in the rendered document.
 13 | source("chunk-options.R")
 14 | ```
 15 | 
 16 | ## Referee 1
 17 | 
 18 | I agree with a lot of what the referee says, although there are some
 19 | points of disagreement. The most fundamental disagreement regards
 20 | Figure 1. This Figure shows not the detectable effects (as the referee
 21 | believed) but the decomposition of *all* effects into null and
 22 | alternative components. Since this seems to be an important
 23 | misunderstanding I have revised the text to try to make it clearer how
 24 | this figure was derived. Specifically the revised text says:
 25 | 
 26 | "We used each of the methods qvalue, locfdr, mixfdr and ash to
 27 | decompose the $z$ scores ($z_j = \hat{\beta}_j$), or their
 28 | corresponding p values, into null and alternative components. Here we
 29 | are using the fact that these methods all provide an estimate of the
 30 | lfdr for each observation, which implies such a decomposition;
 31 | specifically the average lfdr within each histogram bin estimates the
 32 | fraction of observations in that bin that come from the null vs the
 33 | alternative component.”
 34 | 
 35 | Since the referee objected to the presentation of the ZA and UA as
 36 | opposing assumptions I have removed all explicit reference to the
 37 | ZA. (This also solves the issue that I had defined the ZA differently
 38 | than Efron’s original definition — see below — which may have created
 39 | confusion.) Instead, as the referee suggested, I have presented the UA
 40 | as an additional assumption “not made by other methods”, and - as the
 41 | editor suggested — I have focussed on contrasting the behavior of ash
 42 | with the behavior of the other methods (which, as Figure 1
 43 | demonstrates, is to create a hole at 0 in the distribution of the
 44 | alternative Z scores) rather than contrasting their assumptions.
 45 | 
 46 | However, it seems unsatisfactory to merely point out this behavioral
 47 | difference between methods without giving any explanation for the
 48 | reason behind it. So I have provided my best attempt to give the
 49 | reason. Essentially the reason is that the existing methods (locfdr
 50 | and qvalue), when estimating $\pi_0$, make the assumption that *all* p
 51 | values near 1 are null, or *all* z scores near 0 are null. (This was
 52 | how I defined the ZA in the original paper; however this is stronger
 53 | than Efron’s original statement of the ZA, which the referee -
 54 | understandably - uses in his/her report, that ``most” z scores near 0
 55 | are null.  So I don’t refer to this as the ZA any more.) This
 56 | assumption necessarily creates a hole in the implied distribution of z
 57 | scores under the alternative, and explains the behavior.
 58 | 
 59 | To underscore that this behavior is general, and not just a feature of
 60 | the particular simulation I did in Figure 1, I provide results using
 61 | qvalue under the setting suggested by the referee at; see
 62 | (http://stephenslab.github.io/ash/analysis/referee_uaza.html).
 63 | 
 64 | ### Minor Concerns
 65 | 
 66 | - I added the citation, thank you.
 67 | 
 68 | - I took up the suggestion to define FSR-hat and the s-value, thank
 69 | you.
 70 | 
 71 | - I agree that returning the length J results as a dataframe would
 72 | have been better, but unfortunately changing this would break other
 73 | packages that depend on ashr. So instead I have provided a method
 74 | `as.data.frame` to extract the most important components of the ash
 75 | object into a dataframe.
 76 | 
 77 | - I fixed the typo, thank you.
 78 | 
 79 | ## Referee 2
 80 | 
 81 | ### Major Concern
 82 | 
 83 | The referee says: "it would be interesting to include distributions
 84 | that favor other models, such as a bimodal distribution with no mass
 85 | at zero, an assymmetrical distribution,. . . The unimodality is a
 86 | strong assumption so it would be good to see how anti-conservative the
 87 | results would get under different alternative distributions."
 88 | 
 89 | I found this puzzling since both an assymetrical distribution and a
 90 | strongly bimodal distribution are already included in the simulation
 91 | results (Figure 2, Figure 3, Table 1). I believe these existing
 92 | results address the referee comment.  I also want to emphasise that
 93 | the unimodal assumption is actually less strong than the assumptions
 94 | usually made in variable selection for regression, which is directly
 95 | analogous to the fdr context. For example, it is common to assume that
 96 | effects are a mixture of a point mass at zero and a single normal,
 97 | which is considerably more restrictive than the general unimodal
 98 | assumption.
 99 | 
100 | However, to help reassure the referee, I have added another example to
101 | illustrate an assymetric situation where the unimodal-at-zero
102 | assumption is badly contradicted
103 | [here](http://stephenslab.github.io/ash/analysis/efron.fcr.html). As
104 | the referee will see, the resulting estimates of the lfdr from ashr
105 | are reasonably close to the true values (and also to those from
106 | locfdr).
107 | 
108 | ### Minor Concern
109 | 
110 | *Is there a way to model the distribution as a mixture of both
111 | uniforms and normals? If not: why?*
112 | 
113 | This is straightforward in principle. However, it would require
114 | non-trivial code modifications because of the way I implemented the
115 | mixture class (assuming that all components come from the same
116 | family.) Another way to achieve an assymetric distribution without the
117 | "hard" tails of the uniform might be to use a mixture of truncated
118 | normals. I have a student investigating this option.  However, the
119 | uniform mixture has the advantage of allowing not only the assymetry,
120 | but also the $t$ likelihood. Although the paper does not focus on
121 | this, I believe this is likely to be an important issue in
122 | practice. So I suspect that the mixture of uniforms is often going to
123 | be the method of choice in practice.
124 | 
125 | *How would the model perform with moderate correlations?*
126 | 
127 | There seems to be no reason to think that moderate correlations among
128 | tests will have a substantial detrimental effect. But I do not want to
129 | focus on this because I think it will give the reader the wrong
130 | message: I think that users of both ashr and other fdr methods should
131 | very much worry about correlations in practice (which I believe will
132 | often be large, and not moderate).
133 | 
134 | *In equation 3, the null component (I think) is represented as delta0,
135 | but delta0 hasn’t been defined. What is assumed for the distribution
136 | of delta0?*
137 | 
138 | Thank you for noticing this. I now define $\delta_0$ (which represents
139 | a point mass on 0).
140 | 
141 | *Figure 3: the truth is hard to distinguish from the different
142 | methods. And in general this figure is hard to read.*
143 | 
144 | I agree. I have completely rethought this figure, which now has fewer,
145 | larger, panels, and compares just two methods (ash.hu and the NPMLE)
146 | against the truth on a few examples. The less important point - the
147 | effect of the penalty term on pi0 -is now illustrated in an additional
148 | figure included in the appendix.
149 | 
150 | *I particularly like the idea that the ordering of the tests differs
151 | from the classical p-values. Which influence does this have on the
152 | balance between sensitivity and specificity (for example with ROC
153 | curves)? It should change, but does it get better?*
154 | 
155 | Thanks for the question. This is now addressed in the new Figure 4d.
156 | 
157 | ### Typos and grammar
158 | 
159 | *page 14, line 20. The first mentioned pi0 and lfdr should have a hat.*
160 | 
161 | Thanks - fixed.
162 | 
163 | ## Session information
164 | 
165 | ```{r info}
166 | sessionInfo()
167 | ```
168 | 


--------------------------------------------------------------------------------
/analysis/referee_uaza.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Assessment of referee comment regarding q value and zero assumption"
 3 | author: "Matthew Stephens"
 4 | date: 2016-03-21
 5 | ---
 6 | 
 7 | **Last updated:** `r Sys.Date()`
 8 | 
 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)`
10 | 
11 | A referee reported a simulation of 10,000 hypotheses, a fraction 0.8
12 | are null ($\mu_i = 0$), and a fraction 0.2 are alternative with effect
13 | $\mu_i \sim N(0,1)$. For each effect $i$ there are n=30 observations
14 | from $x_{i,j} \sim N(\mu_i,1)$ and a one-sample t test is used to
15 | compute a $p$ value. The referee correctly notes that these
16 | simulations i) obey the unimodal assumption (UA), and ii) have the
17 | property that "most" $z$ scores near 0 are null (Efron's "Zero
18 | Assumption"; ZA). The motivation is to demonstrate that the UA and the
19 | ZA are not contradictory.
20 | 
21 | I agree with this, if we define the ZA as "most" $z$ scores near 0 are
22 | null.  However, in practice the existing methods effectively make a
23 | stronger assumption (or at least, behave as if they do): that *all*
24 | $z$ scores near 0 are null. Consequently they end up creating a hole
25 | in the distribution of alternative $z$ scores at 0. Here we illustrate
26 | this by applying qvalue to this scenario.
27 | 
28 | ```{r chunk_options, include=FALSE}
29 | # Specify settings for displaying the plots in the rendered document.
30 | source("chunk-options.R")
31 | ```
32 | 
33 | ## Simulation
34 | 
35 | First, we generate a small data set.
36 | 
37 | ```{r sim_data}
38 | set.seed(100)
39 | mu = c(rep(0,8000),rnorm(2000))
40 | x = matrix(rnorm(30*10000),nrow=10000,ncol=30)+mu
41 | ttest.pval = function(x){return(t.test(x)$p.value)}
42 | x.p = apply(x,1,ttest.pval)
43 | ```
44 | 
45 | Here is how qvalue (arguably, implicitly) decomposes the distribution of p values into null and alternative
46 | ```{r}
47 | source("../R/nullalthist.R")
48 | lfdr.qv = qvalue::qvalue(x.p)$lfdr
49 | altnullhist(x.p,lfdr.qv,main="p values: qvalue",ncz=40,xlab="p value",cex.axis=0.8)
50 | ```
51 | 
52 | 
53 | And here is the corresponding plot in z score space. Note the "hole" in the distribution of $z$ scores at 0 under the alternative.
54 | ```{r}
55 | ttest.est = function(x){return(t.test(x)$estimate)}
56 | x.est = apply(x,1,ttest.est)
57 | zscore = x.est/(1/sqrt(30))
58 | nullalthist(zscore,lfdr.qv,main="qvalue's implicit partition of z \n into null and alternative",ncz=60,cex.axis=0.8,ylim=c(0,0.3),xlim=c(-6,6),xlab="z score")
59 | ```
60 | 
61 | For comparison here is the decomposition, using ash, of $z$ scores into null and alternative components.
62 | ```{r}
63 | library(ashr)
64 | lfdr.ash=get_lfdr(ash(x.est,1/sqrt(30)))
65 | nullalthist(zscore,lfdr.ash,main="ash partition of z \n into null and alternative",ncz=60,cex.axis=0.8,ylim=c(0,0.3),xlim=c(-6,6),xlab="z score")
66 | ```
67 | 
68 | For completeness here is (roughly) the ``true" decomposition of $z$
69 | scores into null and alternative components.
70 | 
71 | ```{r}
72 | trueg = ashr::normalmix(c(0.8,0.2),c(0,0),c(0,1))
73 | lfdr.true=get_lfdr(ash(x.est,1/sqrt(30),g=trueg,fixg=TRUE))
74 | nullalthist(zscore,lfdr.true,main="true partition of z \n into null and alternative",ncz=60,cex.axis=0.8,ylim=c(0,0.3),xlim=c(-6,6),xlab="z score")
75 | ```
76 | 
77 | ## Explanation
78 | 
79 | In estimating $\pi_0$ qvalue assumes that *all* p values near 1 are
80 | null. This is equivalent to assuming that *all* z scores near 0 are
81 | null. Thus, despite the fact that qvalue does not explicitly model the
82 | distribution of $p$ values or $z$ scores under the alternative, its
83 | way of estimating $\pi_0$ necessarily creates a hole in the $z$ score
84 | distribution at 0 under the alternative.
85 | 
86 | ## Session information
87 | 
88 | ```{r info}
89 | sessionInfo()
90 | ```
91 | 


--------------------------------------------------------------------------------
/analysis/summarize_coverage.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Summarize coverage"
  3 | author: "Matthew Stephens"
  4 | date: 2015-10-26
  5 | ---
  6 | 
  7 | **Last updated:** `r Sys.Date()`
  8 | 
  9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)`
 10 | 
 11 | Note that rendering this RMarkdown document may take a few minutes because 
 12 | it involves loading and processing tables with millions of rows.
 13 | 
 14 | First, we load the necessary libraries.
 15 | 
 16 | ```{r packages}
 17 | library(dscr)
 18 | library(ashr)
 19 | library(reshape2)
 20 | library(ggplot2)
 21 | library(magrittr)
 22 | library(dplyr)
 23 | library(xtable)
 24 | ```
 25 | 
 26 | ```{r chunk_options, include=FALSE}
 27 | # Specify settings for displaying the plots in the rendered document.
 28 | source("chunk-options.R")
 29 | ```
 30 | 
 31 | Compile the results on the simulated data sets for the summaries below.
 32 | 
 33 | ```{r compile_tables}
 34 | load("../output/dsc-shrink-files/res.RData")
 35 | coverthresh = 0.05  # threshold at which we look at coverage
 36 | findthresh  = 0.95  # threshold at we define a discovery significant
 37 | 
 38 | neglong = 
 39 |   res$negprob %>% 
 40 |     select(-user.self,-sys.self,-elapsed,-user.child,-sys.child) %>%
 41 |     melt(id.vars = c("method","scenario","seed",".id"),
 42 |          value.name = "negprob") %>%
 43 |     filter(negprob > findthresh)
 44 |  
 45 | poslong = 
 46 |   res$posprob %>% 
 47 |     select(-user.self,-sys.self,-elapsed,-user.child,-sys.child) %>%
 48 |     melt(id.vars = c("method","scenario","seed",".id"),
 49 |          value.name = "posprob") %>%
 50 |     filter(posprob > findthresh)
 51 | 
 52 | reslong = 
 53 |   res$cdf_score %>% 
 54 |     select(-user.self,-sys.self,-elapsed,-user.child,-sys.child) %>%    
 55 |     melt(id.vars = c("method","scenario","seed",".id"))
 56 | 
 57 | reslong.pos = inner_join(reslong,poslong)
 58 | reslong.neg = inner_join(reslong,neglong)
 59 | ```
 60 | 
 61 | Overall proportion of negative findings is `r nrow(reslong.neg)/nrow(reslong)`.
 62 | 
 63 | Overall proportion of positive findings is `r nrow(reslong.pos)/nrow(reslong)`.
 64 | 
 65 | Table of lower tail for all observations:
 66 | 
 67 | ```{r table_all_obs}
 68 | print(xtabs(lt ~ method + scenario,
 69 |             reslong %>% group_by(scenario,method) %>% 
 70 |             summarize(lt = mean(value < coverthresh))) %>% round(2))
 71 | ```
 72 | 
 73 | Table of lower tail of positive findings. Because of the unimodal assumption
 74 | and the favoritism toward the null, this should assess problems with "over 
 75 | shrinkage" toward 0.
 76 | 
 77 | ```{r table_pos}
 78 | print(xtabs(lt ~ method + scenario,
 79 |             reslong.pos %>% group_by(scenario,method) %>% 
 80 |               summarize(lt = mean(value < coverthresh))) %>% round(2))
 81 | ```
 82 | 
 83 | Table of lower tail of negative findings. This should indicate problems with 
 84 | tail behaviour of $g$. The uniform methods tend to over-shrink.
 85 | 
 86 | ```{r table_neg}
 87 | print(xtabs(lt ~ method + scenario,reslong.neg %>% 
 88 |             group_by(scenario,method) %>% 
 89 |             summarize(lt = mean(value < coverthresh))) %>% round(2))
 90 | ```
 91 | 
 92 | Compile some more summary tables in Latex format.
 93 | 
 94 | ```{r write_latex_tables}
 95 | save_latex_coverage_table = 
 96 |   function(df, methodnames, filename,
 97 |            SCENARIONAMES = c("spiky","near-normal","flat-top","skew",
 98 |                              "big-normal","bimodal"),
 99 |            switch = FALSE) {
100 |   df$method   <- factor(df$method,levels = methodnames)
101 |   df$scenario <- factor(df$scenario,levels = SCENARIONAMES)
102 |   mat         <- as.matrix(xtabs(lt~method+scenario,df))
103 |   if (switch)
104 |     mat <- 1 - mat
105 |   mat <- xtable(mat,digits = rep(2,ncol(mat) + 1))
106 |   write(print(mat,sanitize.text.function = function (x) x,
107 |         floating = FALSE,hline.after = NULL, 
108 |         add.to.row = list(pos = list(-1,0,nrow(mat)), 
109 |                           command = c('\\toprule ',
110 |                                       '\\midrule ',
111 |                                       '\\bottomrule '))),file = filename)
112 | }
113 |   
114 | save_latex_coverage_table(reslong.neg %>% group_by(scenario,method) %>% 
115 |                             summarize(lt = mean(value < coverthresh)),
116 |                           c("ash.n","ash.u","ash.hu"),
117 |                           "table/coverage_neg.tex")
118 | save_latex_coverage_table(reslong.pos %>% group_by(scenario,method) %>% 
119 |                             summarize(lt = mean(value < coverthresh)),
120 |                           c("ash.n","ash.u","ash.hu"),
121 |                           "table/coverage_pos.tex")
122 | save_latex_coverage_table(reslong %>% group_by(scenario,method) %>% 
123 |                             summarize(lt = mean(value < coverthresh)),
124 |                           c("ash.n","ash.u","ash.hu"),
125 |                           "table/coverage_all.tex")
126 | save_latex_coverage_table(reslong.neg %>% group_by(scenario,method) %>% 
127 |                             summarize(lt = mean(value < coverthresh)),
128 |                           c("ash.n.s","ash.u.s","ash.hu.s"),
129 |                           "table/coverage_neg_nopen.tex")
130 | save_latex_coverage_table(reslong.pos %>% group_by(scenario,method) %>% 
131 |                             summarize(lt = mean(value < coverthresh)),
132 |                           c("ash.n.s","ash.u.s","ash.hu.s"),
133 |                           "table/coverage_pos_nopen.tex")
134 | save_latex_coverage_table(reslong %>% group_by(scenario,method) %>% 
135 |                             summarize(lt = mean(value < coverthresh)),
136 |                           c("ash.n.s","ash.u.s","ash.hu.s"),
137 |                           "table/coverage_all_nopen.tex")
138 | ```
139 | 
140 | These tables show right tail instead of the left tail.
141 | 
142 | ```{r write_latex_tables_right}
143 | save_latex_coverage_table(reslong.neg %>% group_by(scenario,method) %>% 
144 |                             summarize(lt = mean(value < coverthresh)),
145 |                           c("ash.n","ash.u","ash.hu"),
146 |                           "table/scoverage_neg.tex",switch = TRUE)
147 | save_latex_coverage_table(reslong.pos %>% group_by(scenario,method) %>% 
148 |                             summarize(lt = mean(value < coverthresh)),
149 |                           c("ash.n","ash.u","ash.hu"),
150 |                           "table/scoverage_pos.tex",switch = TRUE)
151 | save_latex_coverage_table(reslong %>% group_by(scenario,method) %>% 
152 |                             summarize(lt = mean(value < coverthresh)),
153 |                           c("ash.n","ash.u","ash.hu"),
154 |                           "table/scoverage_all.tex",switch=TRUE)
155 | 
156 | save_latex_coverage_table(reslong.neg %>% group_by(scenario,method) %>% 
157 |                             summarize(lt = mean(value < coverthresh)),
158 |                           c("ash.n.s","ash.u.s","ash.hu.s"),
159 |                           "table/scoverage_neg_nopen.tex",switch = TRUE)
160 | save_latex_coverage_table(reslong.pos %>% group_by(scenario,method) %>% 
161 |                             summarize(lt = mean(value < coverthresh)),
162 |                           c("ash.n.s","ash.u.s","ash.hu.s"),
163 |                           "table/scoverage_pos_nopen.tex",switch = TRUE)
164 | save_latex_coverage_table(reslong %>% group_by(scenario,method) %>% 
165 |                             summarize(lt = mean(value < coverthresh)),
166 |                           c("ash.n.s","ash.u.s","ash.hu.s"),
167 |                           "table/scoverage_all_nopen.tex",switch = TRUE)
168 | ```
169 | 
170 | ## Session information
171 | 
172 | ```{r info}
173 | sessionInfo()
174 | ```
175 | 


--------------------------------------------------------------------------------
/analysis/summarize_dsc_znull.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Summarize distribution of LR statistic under null"
 3 | author: "Matthew Stephens"
 4 | date: '2015-10-26'
 5 | ---
 6 | 
 7 | **Last updated:** `r Sys.Date()`
 8 | 
 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)`
10 | 
11 | First, we load the necessary libraries.
12 | 
13 | ```{r packages}
14 | library(ggplot2)
15 | library(dplyr)
16 | ```
17 | 
18 | ```{r chunk_options, include=FALSE}
19 | # Specify settings for displaying the plots in the rendered document.
20 | source("chunk-options.R")
21 | ```
22 | 
23 | Load and summarize the results of the data simulations.
24 | 
25 | ```{r load_sim_results}
26 | load("../output/dsc-znull-files/res.znull.RData")
27 | out <- ungroup(res %>% group_by(scenario,method) %>% 
28 |                   summarise(gt0 = mean(logLR > 0)))
29 | xtabs(gt0 ~ method + scenario,out)
30 | ```
31 | 
32 | Note that the ash.n.s conforms closely to the null expectation of 0.5 
33 | chi2_0 + 0.5 chi2_1 from Stram and Lee, Biometrics.
34 | 
35 | *Conjecture:* the uniform has the same asymptotic behaviour and the half 
36 | uniform is like the sum of two of these?
37 | 
38 | From this a 95% procedure would need to check for logLR > `r qchisq(0.9,df=1)/2`.
39 | 
40 | ```{r scatterplots}
41 | ggplot(res %>% filter(logLR > 0) %>% filter(grepl("ash.hu",method)),
42 |        aes(sample = logLR)) + 
43 |   facet_grid(scenario ~ method) +
44 |   stat_qq(distribution = qchisq,dparams = list(df=1)) + 
45 |   geom_abline() + 
46 |   ggtitle("qqplot under null vs chisq-1; line slope=1")
47 | 
48 | ggplot(res %>% filter(logLR > 0) %>% filter(grepl("ash.u",method)),
49 |        aes(sample = logLR)) + 
50 |   facet_grid(scenario~method) +
51 |   stat_qq(distribution = qchisq,dparams = list(df = 1)) + 
52 |   geom_abline(intercept = 0,slope = 0.5) + 
53 |   ggtitle("qqplot under null vs chisq-1; line slope=0.5")
54 | 
55 | ggplot(res %>% filter(logLR > 0) %>% filter(grepl("ash.n",method)),
56 |        aes(sample = logLR)) + 
57 |   facet_grid(scenario ~ method) +
58 |   stat_qq(distribution = qchisq,dparams = list(df = 1)) + 
59 |   geom_abline(intercept = 0,slope = 0.5) + 
60 |   ggtitle("qqplot under null vs chisq-1; line slope=0.5")
61 | ```
62 | 
63 | ## Session information
64 | 
65 | ```{r info}
66 | sessionInfo()
67 | ```
68 | 


--------------------------------------------------------------------------------
/analysis/table/README.md:
--------------------------------------------------------------------------------
1 | This directory is used to store latextables created by summarize_coverage.Rmd
2 | 


--------------------------------------------------------------------------------
/analysis/table/coverage_all.tex:
--------------------------------------------------------------------------------
 1 | % latex table generated in R 3.3.2 by xtable 1.8-2 package
 2 | % Sun Jan 15 17:52:31 2017
 3 | \begin{tabular}{rrrrrrr}
 4 |   \toprule  & spiky & near-normal & flat-top & skew & big-normal & bimodal \\ 
 5 |   \midrule ash.n & 0.10 & 0.06 & 0.05 & 0.06 & 0.00 & 0.04 \\ 
 6 |   ash.u & 0.13 & 0.07 & 0.06 & 0.07 & 0.00 & 0.04 \\ 
 7 |   ash.hu & 0.12 & 0.07 & 0.06 & 0.06 & 0.00 & 0.04 \\ 
 8 |    \bottomrule \end{tabular}
 9 | 
10 | 


--------------------------------------------------------------------------------
/analysis/table/coverage_all_nopen.tex:
--------------------------------------------------------------------------------
 1 | % latex table generated in R 3.3.2 by xtable 1.8-2 package
 2 | % Sun Jan 15 17:52:34 2017
 3 | \begin{tabular}{rrrrrrr}
 4 |   \toprule  & spiky & near-normal & flat-top & skew & big-normal & bimodal \\ 
 5 |   \midrule ash.n.s & 0.05 & 0.05 & 0.05 & 0.05 & 0.00 & 0.04 \\ 
 6 |   ash.u.s & 0.06 & 0.05 & 0.05 & 0.06 & 0.00 & 0.04 \\ 
 7 |   ash.hu.s & 0.12 & 0.08 & 0.08 & 0.08 & 0.00 & 0.07 \\ 
 8 |    \bottomrule \end{tabular}
 9 | 
10 | 


--------------------------------------------------------------------------------
/analysis/table/coverage_neg.tex:
--------------------------------------------------------------------------------
 1 | % latex table generated in R 3.3.2 by xtable 1.8-2 package
 2 | % Sun Jan 15 17:52:28 2017
 3 | \begin{tabular}{rrrrrrr}
 4 |   \toprule  & spiky & near-normal & flat-top & skew & big-normal & bimodal \\ 
 5 |   \midrule ash.n & 0.07 & 0.06 & 0.00 & 0.06 & 0.00 & 0.02 \\ 
 6 |   ash.u & 0.12 & 0.10 & 0.07 & 0.09 & 0.00 & 0.06 \\ 
 7 |   ash.hu & 0.13 & 0.13 & 0.08 & 0.07 & 0.00 & 0.06 \\ 
 8 |    \bottomrule \end{tabular}
 9 | 
10 | 


--------------------------------------------------------------------------------
/analysis/table/coverage_neg_nopen.tex:
--------------------------------------------------------------------------------
 1 | % latex table generated in R 3.3.2 by xtable 1.8-2 package
 2 | % Sun Jan 15 17:52:31 2017
 3 | \begin{tabular}{rrrrrrr}
 4 |   \toprule  & spiky & near-normal & flat-top & skew & big-normal & bimodal \\ 
 5 |   \midrule ash.n.s & 0.05 & 0.05 & 0.02 & 0.07 & 0.00 & 0.03 \\ 
 6 |   ash.u.s & 0.11 & 0.08 & 0.10 & 0.08 & 0.00 & 0.06 \\ 
 7 |   ash.hu.s & 0.11 & 0.08 & 0.09 & 0.06 & 0.00 & 0.06 \\ 
 8 |    \bottomrule \end{tabular}
 9 | 
10 | 


--------------------------------------------------------------------------------
/analysis/table/coverage_pos.tex:
--------------------------------------------------------------------------------
 1 | % latex table generated in R 3.3.2 by xtable 1.8-2 package
 2 | % Sun Jan 15 17:52:29 2017
 3 | \begin{tabular}{rrrrrrr}
 4 |   \toprule  & spiky & near-normal & flat-top & skew & big-normal & bimodal \\ 
 5 |   \midrule ash.n & 0.06 & 0.06 & 0.06 & 0.14 & 0.00 & 0.04 \\ 
 6 |   ash.u & 0.07 & 0.07 & 0.07 & 0.16 & 0.00 & 0.05 \\ 
 7 |   ash.hu & 0.08 & 0.08 & 0.07 & 0.08 & 0.00 & 0.05 \\ 
 8 |    \bottomrule \end{tabular}
 9 | 
10 | 


--------------------------------------------------------------------------------
/analysis/table/coverage_pos_nopen.tex:
--------------------------------------------------------------------------------
 1 | % latex table generated in R 3.3.2 by xtable 1.8-2 package
 2 | % Sun Jan 15 17:52:31 2017
 3 | \begin{tabular}{rrrrrrr}
 4 |   \toprule  & spiky & near-normal & flat-top & skew & big-normal & bimodal \\ 
 5 |   \midrule ash.n.s & 0.06 & 0.06 & 0.08 & 0.12 & 0.00 & 0.06 \\ 
 6 |   ash.u.s & 0.07 & 0.07 & 0.08 & 0.12 & 0.00 & 0.05 \\ 
 7 |   ash.hu.s & 0.66 & 0.40 & 0.48 & 0.46 & 0.00 & 0.18 \\ 
 8 |    \bottomrule \end{tabular}
 9 | 
10 | 


--------------------------------------------------------------------------------
/analysis/table/scoverage_all.tex:
--------------------------------------------------------------------------------
 1 | % latex table generated in R 3.3.2 by xtable 1.8-2 package
 2 | % Sun Jan 15 17:52:37 2017
 3 | \begin{tabular}{rrrrrrr}
 4 |   \toprule  & spiky & near-normal & flat-top & skew & big-normal & bimodal \\ 
 5 |   \midrule ash.n & 0.90 & 0.94 & 0.95 & 0.94 & 1.00 & 0.96 \\ 
 6 |   ash.u & 0.87 & 0.93 & 0.94 & 0.93 & 1.00 & 0.96 \\ 
 7 |   ash.hu & 0.88 & 0.93 & 0.94 & 0.94 & 1.00 & 0.96 \\ 
 8 |    \bottomrule \end{tabular}
 9 | 
10 | 


--------------------------------------------------------------------------------
/analysis/table/scoverage_all_nopen.tex:
--------------------------------------------------------------------------------
 1 | % latex table generated in R 3.3.2 by xtable 1.8-2 package
 2 | % Sun Jan 15 17:52:40 2017
 3 | \begin{tabular}{rrrrrrr}
 4 |   \toprule  & spiky & near-normal & flat-top & skew & big-normal & bimodal \\ 
 5 |   \midrule ash.n.s & 0.95 & 0.95 & 0.95 & 0.95 & 1.00 & 0.96 \\ 
 6 |   ash.u.s & 0.94 & 0.95 & 0.95 & 0.94 & 1.00 & 0.96 \\ 
 7 |   ash.hu.s & 0.88 & 0.92 & 0.92 & 0.92 & 1.00 & 0.93 \\ 
 8 |    \bottomrule \end{tabular}
 9 | 
10 | 


--------------------------------------------------------------------------------
/analysis/table/scoverage_neg.tex:
--------------------------------------------------------------------------------
 1 | % latex table generated in R 3.3.2 by xtable 1.8-2 package
 2 | % Sun Jan 15 17:52:34 2017
 3 | \begin{tabular}{rrrrrrr}
 4 |   \toprule  & spiky & near-normal & flat-top & skew & big-normal & bimodal \\ 
 5 |   \midrule ash.n & 0.93 & 0.94 & 1.00 & 0.94 & 1.00 & 0.98 \\ 
 6 |   ash.u & 0.88 & 0.90 & 0.93 & 0.91 & 1.00 & 0.94 \\ 
 7 |   ash.hu & 0.87 & 0.87 & 0.92 & 0.93 & 1.00 & 0.94 \\ 
 8 |    \bottomrule \end{tabular}
 9 | 
10 | 


--------------------------------------------------------------------------------
/analysis/table/scoverage_neg_nopen.tex:
--------------------------------------------------------------------------------
 1 | % latex table generated in R 3.3.2 by xtable 1.8-2 package
 2 | % Sun Jan 15 17:52:37 2017
 3 | \begin{tabular}{rrrrrrr}
 4 |   \toprule  & spiky & near-normal & flat-top & skew & big-normal & bimodal \\ 
 5 |   \midrule ash.n.s & 0.95 & 0.95 & 0.98 & 0.93 & 1.00 & 0.97 \\ 
 6 |   ash.u.s & 0.89 & 0.92 & 0.90 & 0.92 & 1.00 & 0.94 \\ 
 7 |   ash.hu.s & 0.89 & 0.92 & 0.91 & 0.94 & 1.00 & 0.94 \\ 
 8 |    \bottomrule \end{tabular}
 9 | 
10 | 


--------------------------------------------------------------------------------
/analysis/table/scoverage_pos.tex:
--------------------------------------------------------------------------------
 1 | % latex table generated in R 3.3.2 by xtable 1.8-2 package
 2 | % Sun Jan 15 17:52:34 2017
 3 | \begin{tabular}{rrrrrrr}
 4 |   \toprule  & spiky & near-normal & flat-top & skew & big-normal & bimodal \\ 
 5 |   \midrule ash.n & 0.94 & 0.94 & 0.94 & 0.86 & 1.00 & 0.96 \\ 
 6 |   ash.u & 0.93 & 0.93 & 0.93 & 0.84 & 1.00 & 0.95 \\ 
 7 |   ash.hu & 0.92 & 0.92 & 0.93 & 0.92 & 1.00 & 0.95 \\ 
 8 |    \bottomrule \end{tabular}
 9 | 
10 | 


--------------------------------------------------------------------------------
/analysis/table/scoverage_pos_nopen.tex:
--------------------------------------------------------------------------------
 1 | % latex table generated in R 3.3.2 by xtable 1.8-2 package
 2 | % Sun Jan 15 17:52:37 2017
 3 | \begin{tabular}{rrrrrrr}
 4 |   \toprule  & spiky & near-normal & flat-top & skew & big-normal & bimodal \\ 
 5 |   \midrule ash.n.s & 0.94 & 0.94 & 0.92 & 0.88 & 1.00 & 0.94 \\ 
 6 |   ash.u.s & 0.93 & 0.93 & 0.92 & 0.88 & 1.00 & 0.95 \\ 
 7 |   ash.hu.s & 0.34 & 0.60 & 0.52 & 0.54 & 1.00 & 0.82 \\ 
 8 |    \bottomrule \end{tabular}
 9 | 
10 | 


--------------------------------------------------------------------------------
/analysis/template.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Untitled"
 3 | author: "First Last"
 4 | date: YYYY-MM-DD
 5 | ---
 6 | 
 7 | **Last updated:** `r Sys.Date()`
 8 | 
 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)`
10 | 
11 | First, we load the necessary libraries.
12 | 
13 | ```{r packages}
14 | library(ggplot2) # For illustration only.
15 | ```
16 | 
17 | ```{r chunk_options, include=FALSE}
18 | source("chunk-options.R")
19 | ```
20 | 
21 | This is a template for writing reports with R Markdown, copied from @jdblischak.
22 | 
23 | ## Section title
24 | 
25 | You can create a new file from the template using the command `cp`.
26 | 
27 | ```bash
28 | cp analysis/template.Rmd analysis/newfile.Rmd
29 | ```
30 | 
31 | ## Session information
32 | 
33 | ```{r info}
34 | sessionInfo()
35 | ```
36 | 


--------------------------------------------------------------------------------
/ash-packrat.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: knitr
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Makefile
16 | 


--------------------------------------------------------------------------------
/code/Makefile:
--------------------------------------------------------------------------------
 1 | all:	dsc-shrink dsc-null dsc-opt
 2 | 
 3 | dsc-shrink:	../output/dsc-shrink-files/res.RData	
 4 | dsc-null:	../output/dsc-znull-files/res.znull.RData	
 5 | dsc-opt:	../output/dsc-opt-files/dsc_opt.RData
 6 | 
 7 | ../output/dsc-shrink-files/res.RData:
 8 | 	export R_LIBS_USER=../../packrat/lib/*/*; cd dsc-shrink; R CMD BATCH run_dsc.R
 9 | 
10 | ../output/dsc-znull-files/res.znull.RData:
11 | 	export R_LIBS_USER=../../packrat/lib/*/*; cd dsc-shrink; R CMD BATCH run_dsc_znull.R
12 | 
13 | ../output/dsc-opt-files/dsc_opt.RData:
14 | 	export R_LIBS_USER=../../packrat/lib/*/*; cd dsc-opt; R CMD BATCH run_dsc_opt.R
15 | 


--------------------------------------------------------------------------------
/code/README.md:
--------------------------------------------------------------------------------
1 | This directory is intended for code, possibly long-running, that
2 | creates output in `output`.
3 | 


--------------------------------------------------------------------------------
/code/dsc-opt/methods/ash.multiopt.wrapper.R:
--------------------------------------------------------------------------------
 1 | #' @title wrapper for ash for shrinkage DSC
 2 | #'
 3 | #' @description Runs ash to compute betahat values  
 4 | #' @details None
 5 | #' 
 6 | #' @param input a list with elements betahat and sebetahat
 7 | #' @param args a list containing other additional arguments to ash
 8 | #' 
 9 | #' @return output a list containing a vector of loglikelihoods from multiple runs of ash
10 | #'
11 | library(ashr)
12 | 
13 | ash.multiopt.wrapper=function(input,args=NULL){
14 |   if(is.null(args)){  #set shrink so that the likelihood is unpenalized
15 |     args=list(mixcompdist="halfuniform",method="shrink")
16 |   }
17 |   loglik=rep(0,2)
18 |   
19 |   #first run is with interior point
20 |   res=do.call(ash, args= c(list(betahat=input$betahat,sebetahat=input$sebetahat,optmethod="mixIP"),args))
21 |   loglik[1]= get_loglik(res)
22 |   
23 |   #now with EM
24 |   res=do.call(ash, args= c(list(betahat=input$betahat,sebetahat=input$sebetahat,optmethod="cxxMixSquarem"),args))
25 |   loglik[2]= get_loglik(res)
26 |   
27 |   
28 |   #first run is with non-random start (ie default start)
29 |   #res=do.call(ash, args= c(list(betahat=input$betahat,sebetahat=input$sebetahat,randomstart=FALSE),args))
30 |   #loglik[1]= get_loglik(res)
31 |     
32 |   #for(i in 2:11){
33 |   #  res = do.call(ash, args= c(list(betahat=input$betahat,sebetahat=input$sebetahat,randomstart=TRUE),args))
34 |   #  loglik[i]= get_loglik(res)
35 |   #}
36 |   return(list(loglik=loglik))
37 | }
38 | 


--------------------------------------------------------------------------------
/code/dsc-opt/run_dsc_opt.R:
--------------------------------------------------------------------------------
 1 | library(dscr)
 2 | dscr::source_dir("methods")
 3 | 
 4 | ###### Initialize #######
 5 | 
 6 | dsc_opt=new_dsc("opt","../../output/dsc-opt-files")
 7 | 
 8 | ###### Add Scenarios #####
 9 | dscr::source_dir("../dsc-shrink/datamakers")
10 | source("../dsc-shrink/add_named_scenarios.R")
11 | add_named_scenarios(dsc_opt,c("spiky","near-normal","flat-top","skew",
12 |                               "big-normal","bimodal"))
13 | 
14 | ###### Add Methods #####
15 | 
16 | add_method(dsc_opt,"ash.u",ash.multiopt.wrapper,
17 |            args = list(mixcompdist = "uniform",method = "shrink"))
18 | add_method(dsc_opt,"ash.hu",ash.multiopt.wrapper,
19 |            args = list(mixcompdist = "halfuniform",method = "shrink"))
20 | add_method(dsc_opt,"ash.n",ash.multiopt.wrapper,
21 |            args = list(mixcompdist = "normal",method = "shrink"))
22 | 
23 | ####### Define Score and Add it #######
24 | 
25 | score = function(data, output) {
26 |   # This loglik is a vector of length 2 with logliks from two
27 |   # different optimizations.
28 |   x = output$loglik
29 |   return(list(diff1 = x[1] - x[2]))  
30 | }
31 | 
32 | add_score(dsc_opt,score)
33 | 
34 | ######## Run the DSC #################
35 | 
36 | res_opt = run_dsc(dsc_opt)
37 | save(dsc_opt,file = "../../output/dsc-opt-files/dsc_opt.RData")
38 | 


--------------------------------------------------------------------------------
/code/dsc-shrink/add_methods.R:
--------------------------------------------------------------------------------
 1 | dscr::source_dir("methods")
 2 | 
 3 | add_method(dsc_shrink,name="ash.hu.nocxx",fn =ash.wrapper,args=list(mixcompdist="halfunif"),outputtype = "ash_output")
 4 | add_method(dsc_shrink,name="ash.hu",fn =ash.wrapper,args=list(mixcompdist="halfunif"),outputtype = "ash_output")
 5 | add_method(dsc_shrink,name="ash.u",fn =ash.wrapper,args=list(mixcompdist="unif"),outputtype = "ash_output")
 6 | add_method(dsc_shrink,name="ash.n",fn =ash.wrapper,args=list(mixcompdist="normal"),outputtype = "ash_output")
 7 | 
 8 | 
 9 | add_method(dsc_shrink,name="bayes",fn=bayes.wrapper,gold_flag=TRUE,outputtype = "ash_output")
10 | 
11 | # try smaller null weight
12 | add_method(dsc_shrink,name="ash.hu.nw2",fn =ash.wrapper,args=list(mixcompdist="halfunif",nullweight=2),outputtype = "ash_output")
13 | add_method(dsc_shrink,name="ash.u.nw2",fn =ash.wrapper,args=list(mixcompdist="unif",nullweight=2),outputtype = "ash_output")
14 | add_method(dsc_shrink,name="ash.n.nw2",fn =ash.wrapper,args=list(mixcompdist="normal",nullweight=2),outputtype = "ash_output")
15 | 
16 | 
17 | add_method(dsc_shrink,name="ash.hu.s",fn =ash.wrapper,args=list(mixcompdist="halfunif",method="shrink"),outputtype = "ash_output")
18 | add_method(dsc_shrink,name="ash.u.s",fn =ash.wrapper,args=list(mixcompdist="unif",method="shrink"),outputtype = "ash_output")
19 | add_method(dsc_shrink,name="ash.n.s",fn =ash.wrapper,args=list(mixcompdist="normal",method="shrink"),outputtype = "ash_output")
20 | 
21 | 
22 | add_method(dsc_shrink,name="mixfdr.tnull", fn=mixfdr.wrapper, args = list(theonull=TRUE),outputtype = "mixfdr_output")
23 | add_method(dsc_shrink,name="mixfdr.enull", fn=mixfdr.wrapper, args = list(theonull=FALSE),outputtype = "mixfdr_output")
24 | 
25 | add_method(dsc_shrink,name="locfdr", fn=locfdr.wrapper,outputtype = "locfdr_output")
26 | add_method(dsc_shrink,name="qvalue", fn=qvalue.wrapper,outputtype = "qvalue_output")
27 | 
28 | #add_method(dsc_shrink,name="mixfdr.tnull.J10", fn=mixfdr.wrapper, args = list(theonull=TRUE,J=10),outputtype = "mixfdr_output")
29 | #add_method(dsc_shrink,name="mixfdr.enull.J10", fn=mixfdr.wrapper, args = list(theonull=FALSE,J=10),outputtype = "mixfdr_output")
30 | #add_method(dsc_shrink,name="mixfdr.tnull.J10P0", fn=mixfdr.wrapper, args = list(theonull=TRUE,J=10,P=0),outputtype = "mixfdr_output")
31 | 


--------------------------------------------------------------------------------
/code/dsc-shrink/add_methods.null.R:
--------------------------------------------------------------------------------
 1 | dscr::source_dir("methods")
 2 | 
 3 | add_method(dsc_znull,name="ash.hu",fn =ash.wrapper,args=list(mixcompdist="halfunif"),outputtype = "ash_output")
 4 | add_method(dsc_znull,name="ash.u",fn =ash.wrapper,args=list(mixcompdist="unif"),outputtype = "ash_output")
 5 | add_method(dsc_znull,name="ash.n",fn =ash.wrapper,args=list(mixcompdist="normal"),outputtype = "ash_output")
 6 | 
 7 | add_method(dsc_znull,name="ash.hu.s",fn =ash.wrapper,args=list(mixcompdist="halfunif",method="shrink"),outputtype = "ash_output")
 8 | add_method(dsc_znull,name="ash.u.s",fn =ash.wrapper,args=list(mixcompdist="unif",method="shrink"),outputtype = "ash_output")
 9 | add_method(dsc_znull,name="ash.n.s",fn =ash.wrapper,args=list(mixcompdist="normal",method="shrink"),outputtype = "ash_output")
10 | 
11 | add_method(dsc_znull,name="ash.hu.nw2",fn =ash.wrapper,args=list(mixcompdist="halfunif",nullweight=2),outputtype = "ash_output")
12 | add_method(dsc_znull,name="ash.u.nw2",fn =ash.wrapper,args=list(mixcompdist="unif",nullweight=2),outputtype = "ash_output")
13 | add_method(dsc_znull,name="ash.n.nw2",fn =ash.wrapper,args=list(mixcompdist="normal",nullweight=2),outputtype = "ash_output")
14 | 
15 | 
16 | add_method(dsc_znull,name="ash.hu.s.gmfine",fn =ash.wrapper,args=list(mixcompdist="halfunif",method="shrink",gridmult=2^.25),outputtype = "ash_output")
17 | add_method(dsc_znull,name="ash.u.s.gmfine",fn =ash.wrapper,args=list(mixcompdist="unif",method="shrink",gridmult=2^.25),outputtype = "ash_output")
18 | add_method(dsc_znull,name="ash.n.s.gmfine",fn =ash.wrapper,args=list(mixcompdist="normal",method="shrink",gridmult=2^.25),outputtype = "ash_output")
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/code/dsc-shrink/add_named_scenarios.R:
--------------------------------------------------------------------------------
 1 | dscr::source_dir("datamakers")
 2 | 
 3 | gdef = list(spiky=normalmix(c(.4,.2,.2,.2),c(0,0,0,0),c(.25,.5,1,2)),
 4 |             skew=normalmix(c(1/4,1/4,1/3,1/6),c(-2,-1,0,1),c(2,1.5,1,1)),
 5 |             bignormal=normalmix(c(1),c(0),c(4)),
 6 |             bimodal=normalmix(c(0.5,0.5),c(-2,2),c(1,1)),
 7 |             flat_top=normalmix(rep(1/7,7),c(-1.5,-1,-0.5,0,0.5,1,1.5),rep(0.5,7)),
 8 |             near_normal=normalmix(c(2/3,1/3),c(0,0),c(1,2))
 9 |             )
10 | 
11 | add_named_scenarios = function(dsc,names,nsamp=1000,min_pi0=0,max_pi0=1,suffix=""){
12 | 
13 | if("near-normal" %in% names){
14 |   add_scenario(dsc,name=paste0("near-normal",suffix),
15 |                     fn=rnormmix_datamaker,
16 |                     args=list(
17 |                       g=gdef$near_normal,
18 |                       min_pi0=min_pi0,
19 |                       max_pi0=max_pi0,
20 |                       nsamp=nsamp,
21 |                       betahatsd=1
22 |                     ),
23 |                     seed=1:100)
24 | }
25 | 
26 | if("flat-top" %in% names){
27 |   add_scenario(dsc,name=paste0("flat-top",suffix),
28 |                     fn=rnormmix_datamaker,
29 |                     args=list(
30 |                       g=gdef$flat_top,
31 |                       min_pi0=min_pi0,
32 |                       max_pi0=max_pi0,
33 |                       nsamp=nsamp,
34 |                       betahatsd=1
35 |                     ),
36 |                     seed=1:100)
37 | }
38 | 
39 | if("skew" %in% names){
40 |   add_scenario(dsc,name=paste0("skew",suffix),
41 |                       fn=rnormmix_datamaker,
42 |                       args=list(
43 |                         g=gdef$skew,
44 |                         min_pi0=min_pi0,
45 |                         max_pi0=max_pi0,
46 |                         nsamp=nsamp,
47 |                         betahatsd=1
48 |                       ),
49 |                       seed=1:100)
50 | }
51 | 
52 | if("spiky" %in% names){
53 |   add_scenario(dsc,name=paste0("spiky",suffix),
54 |                       fn=rnormmix_datamaker,
55 |                       args=list(
56 |                         g=gdef$spiky,
57 |                         min_pi0=min_pi0,
58 |                         max_pi0=max_pi0,
59 |                         nsamp=nsamp,
60 |                         betahatsd=1
61 |                       ),
62 |                       seed=1:100)
63 | }
64 | 
65 | if("big-normal" %in% names){
66 |   add_scenario(dsc,name=paste0("big-normal",suffix),
67 |                       fn=rnormmix_datamaker,
68 |                       args=list(
69 |                         g=gdef$bignormal,
70 |                         min_pi0=min_pi0,
71 |                         max_pi0=max_pi0,
72 |                         nsamp=nsamp,
73 |                         betahatsd=1
74 |                       ),
75 |                       seed=1:100)
76 | }
77 | 
78 | if("bimodal" %in% names){
79 |   add_scenario(dsc,name=paste0("bimodal",suffix),
80 |             fn=rnormmix_datamaker,
81 |             args=list(
82 |               g=gdef$bimodal,
83 |               min_pi0=min_pi0,
84 |               max_pi0=max_pi0,
85 |               nsamp=nsamp,
86 |               betahatsd=1
87 |             ),
88 |             seed=1:100)
89 | }
90 | 
91 | }
92 | 


--------------------------------------------------------------------------------
/code/dsc-shrink/add_scenarios.null.R:
--------------------------------------------------------------------------------
 1 | dscr::source_dir("datamakers")
 2 | 
 3 | 
 4 | #Now, for each scenario create an element of scenarios of the following form
 5 | add_scenario(dsc_znull,name="znull.1000",
 6 |             fn=null_z_datamaker,
 7 |             args=list(
 8 |               nsamp=1000
 9 |             ),
10 |             seed=1:500)
11 | 
12 | add_scenario(dsc_znull,name="znull.100",
13 |             fn=null_z_datamaker,
14 |             args=list(
15 |               nsamp=100
16 |             ),
17 |             seed=1:500)
18 | 


--------------------------------------------------------------------------------
/code/dsc-shrink/datamakers/datamaker.R:
--------------------------------------------------------------------------------
 1 | library(ashr)
 2 | 
 3 | #' @title datamaker for FDR DSC
 4 | #'
 5 | #' @description Simulates data for a DSC for methods to compute FDR/q values
 6 | #' @details None
 7 | #'
 8 | #' @param args A list of the remaining arguments, which in this case is
 9 | #' \item{nsamp}{The number of samples to create}
10 | #' \item{g}{An object of class normalmix specifying the mixture distribution from
11 | #'          which non-null beta values are to be simulated}
12 | #' \item{min_pi0}{The minimum value of pi0, the proportion of true nulls}
13 | #' \item{max_pi0}{The maximum value of pi0, the proportion of true null}
14 | #' \item{betahatsd}{The standard deviation of betahat to use}
15 | #'
16 | #' @return a list with the following elements
17 | #' \item{meta}{A list containing the meta data. In this case beta}
18 | #' \item{input}{A list containing the input data; in this case the set of betahat values and their standard errors}
19 | #'
20 | rnormmix_datamaker = function(args){
21 |   #here is the meat of the function that needs to be defined for each dsc to be done
22 |   pi0 = runif(1,args$min_pi0,args$max_pi0) #generate the proportion of true nulls randomly
23 | 
24 |   k = ncomp(args$g)
25 |   comp = sample(1:k,args$nsamp,mixprop(args$g),replace=TRUE) #randomly draw a component
26 |   isnull = (runif(args$nsamp,0,1) < pi0)
27 |   beta = ifelse(isnull, 0,rnorm(args$nsamp,comp_mean(args$g)[comp],comp_sd(args$g)[comp]))
28 |   sebetahat = args$betahatsd
29 |   betahat = beta + rnorm(args$nsamp,0,sebetahat)
30 |   meta=list(g1=args$g,beta=beta,pi0=pi0)
31 |   input=list(betahat=betahat,sebetahat=sebetahat,df=NULL)
32 | 
33 |   #end of meat of function
34 | 
35 |   data = list(meta=meta,input=input)
36 | 
37 |   return(data)
38 | 
39 | }
40 | 


--------------------------------------------------------------------------------
/code/dsc-shrink/datamakers/null_t_datamaker.R:
--------------------------------------------------------------------------------
 1 | null_t_datamaker = function(args){
 2 |   
 3 |    #assuming true standard deviation is 1, then betahatsd is rchisq
 4 |   sebetahat = sqrt(rchisq(args$nsamp, args$df)/args$df)
 5 |   
 6 |   beta = rep(0,args$nsamp)  
 7 |   betahat = beta + rnorm(args$nsamp,0,1)
 8 |   
 9 |   
10 |   meta=list(beta=beta,pi0=1)
11 |   input=list(betahat=betahat,sebetahat=sebetahat,df=args$df)
12 |   
13 |   #end of meat of function
14 |   
15 |   data = list(meta=meta,input=input)
16 |   
17 |   return(data)
18 |   
19 | }


--------------------------------------------------------------------------------
/code/dsc-shrink/datamakers/null_z_datamaker.R:
--------------------------------------------------------------------------------
 1 | null_z_datamaker = function(args){
 2 |   
 3 |   #assuming true standard deviation is 1, then betahatsd is rchisq
 4 |   sebetahat = rep(1,args$nsamp)
 5 |   
 6 |   beta = rep(0,args$nsamp)  
 7 |   betahat = beta + rnorm(args$nsamp,0,1)
 8 |   
 9 |   
10 |   meta=list(beta=beta,pi0=1)
11 |   input=list(betahat=betahat,sebetahat=sebetahat,df=NULL)
12 |   
13 |   #end of meat of function
14 |   
15 |   data = list(meta=meta,input=input)
16 |   
17 |   return(data)
18 |   
19 | }


--------------------------------------------------------------------------------
/code/dsc-shrink/methods/ash.wrapper.R:
--------------------------------------------------------------------------------
 1 | #' @title wrapper for ash for shrinkage DSC
 2 | #'
 3 | #' @description Runs ash to compute betahat values  
 4 | #' @details None
 5 | #' 
 6 | #' @param input a list with elements betahat and sebetahat
 7 | #' @param args a list containing other additional arguments to ash
 8 | #' 
 9 | #' @return output a list with the following elements
10 | #' \item{beta_est}{vector containing point estimates for beta}
11 | #'
12 | library(ashr)
13 | 
14 | ash.wrapper=function(input,args=NULL){
15 |   if(is.null(args)){
16 |     args=list(mixcompdist="halfuniform",method="fdr")
17 |   }
18 |   res = do.call(ash, args= c(list(betahat=input$betahat,sebetahat=input$sebetahat,df=input$df),args))
19 |   return(res)
20 | }
21 | 
22 | #uses ash function to compute bayes rule by running it with true g
23 | bayes.wrapper = function(input, meta, args=NULL){
24 |   pi0 = meta$pi0
25 |   g1 = meta$g1 
26 |   g=g1
27 |   #create g by adding null component to g1
28 |   g$pi= c(pi0, (1-pi0)*g1$pi)
29 |   g$mean = c(0,g1$mean)
30 |   g$sd = c(0,g1$sd)
31 |   #do computations for bayes rule by running ash with g, with 0 iterations
32 |   res = do.call(ash, args= list(betahat=input$betahat,sebetahat=input$sebetahat,df=input$df,g=g,fixg=TRUE))
33 |   return(res)
34 | }
35 | 
36 | ash2beta_est =function(output){
37 |   return (list(beta_est=get_pm(output)))
38 | } 
39 | 
40 | ash2pi0_est =function(output){
41 |   return (list(pi0_est=get_pi0(output)))
42 | } 
43 | 
44 | ash2fitted.g = function(output){
45 |   return (list(fitted.g=get_fitted_g(output)))
46 | }
47 | 


--------------------------------------------------------------------------------
/code/dsc-shrink/methods/locfdr.wrapper.R:
--------------------------------------------------------------------------------
 1 | #' @title wrapper for locfdr 
 2 | #'
 3 | #' @description Runs locfdr on z scores
 4 | #' @details None
 5 | #' 
 6 | #' @param input a list with elements betahat and sebetahat
 7 | #' @param args a list containing other additional arguments to locfdr
 8 | #' 
 9 | #' @return output a list 
10 | #'
11 | library(locfdr)
12 | 
13 | locfdr.wrapper=function(input,args=NULL){
14 |   res = try(do.call(locfdr, args= c(list(zz=input$betahat/input$sebetahat, nulltype=0, plot=0),args)))
15 |   if(inherits(res,"try-error")){res=list(fdr=rep(NA,length(input$betahat)),fp0=matrix(NA,nrow=6,ncol=3))}
16 |   return(res)
17 | }
18 | 
19 | 
20 | locfdr2pi0_est = function(output){
21 |   return (list(pi0_est=output$fp0[1,3]))
22 | }
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/code/dsc-shrink/methods/mixfdr.wrapper.R:
--------------------------------------------------------------------------------
 1 | #' @title wrapper for mixfdr for shrinkage DSC
 2 | #'
 3 | #' @description Runs mixfdr to compute effect estimates from z scores (computed from betahat and sebetahat)  
 4 | #' @details None
 5 | #' 
 6 | #' @param input a list with elements betahat and sebetahat
 7 | #' @param args a list containing other additional arguments to ash
 8 | #' 
 9 | #' @return output a list with the following elements
10 | #' \item{beta_est}{vector containing point estimates for beta}
11 | #' \item{pi0_est}{scalar estimate of pi0}
12 | #'
13 | library(mixfdr)
14 | 
15 | mixfdr.wrapper=function(input,args=NULL){
16 |   res = try(do.call(mixFdr, args= c(list(x=input$betahat/input$sebetahat, noiseSD=1, plots=FALSE),args)))
17 |   if(inherits(res,"try-error")){res=list(effectSize=rep(NA,length(input$betahat)),pi0_est=NA,pi=NA,mu=NA,sigma=NA,noiseSD=NA)}
18 |   return(list(res=res,input=input))
19 | }
20 | 
21 | mixfdr2beta_est = function(output){
22 |   return (list(beta_est=output$res$effectSize*output$input$sebetahat))
23 | }
24 | 
25 | mixfdr2pi0_est = function(output){
26 |   return (list(pi0_est=output$res$pi[1]))
27 | }
28 | 
29 | mixfdr2fitted.g = function(output){
30 |   return (list(fitted.g=
31 |     normalmix(output$res$pi,output$res$mu,
32 |               sqrt(output$res$sigma^2-output$res$noiseSD^2))))
33 | }
34 | 
35 | 


--------------------------------------------------------------------------------
/code/dsc-shrink/methods/qvalue.wrapper.R:
--------------------------------------------------------------------------------
 1 | #' @title wrapper for qvalue for FDR DSC
 2 | #'
 3 | #' @description Runs qvalue to compute FDR/q values  
 4 | #' @details None
 5 | #' 
 6 | #' @param input a list with elements betahat and sebetahat
 7 | #' @param add.args a list with additional arguments to qvalue 
 8 | #' 
 9 | #' @return output a list with the following elements
10 | #' \item{qvalue}{vector of qvalues, with jth element being the q value corresponding to (betahat_j,sebetahat_j)
11 | #'
12 | #'
13 | library(qvalue)
14 | 
15 | qvalue.wrapper = function(input,args=NULL){
16 |   zscore = input$betahat/input$sebetahat
17 |   pvalue = 2*pnorm(-abs(zscore),lower.tail=TRUE)
18 |   res = qvalue(pvalue)
19 |   return(res)
20 | }
21 | 
22 | qvalue2pi0_est = function(output){
23 |   if(!is.list(output)){return(list(pi0_est=NA))} #deal with case where ERROR thrown
24 |   else{
25 |     return (list(pi0_est=output$pi0))
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/code/dsc-shrink/run_dsc.R:
--------------------------------------------------------------------------------
 1 | library(dscr)
 2 | library(ashr)
 3 | sessionInfo()
 4 | 
 5 | dsc_shrink=new_dsc("shrink","../../output/dsc-shrink-files")
 6 | source("add_named_scenarios.R")
 7 | add_named_scenarios(dsc_shrink,c("spiky","near-normal","flat-top","skew","big-nqormal","bimodal","near-normal-nn"))
 8 | add_named_scenarios(dsc_shrink,c("spiky","near-normal","flat-top","skew","big-normal","bimodal"),min_pi0=0,max_pi0=0,suffix="-nn")
 9 | 
10 | source("add_methods.R")
11 | 
12 | add_output_parser(dsc_shrink,"ash2beta",ash2beta_est,"ash_output",
13 |                   "beta_est_output")
14 | add_output_parser(dsc_shrink,"mixfdr2beta",mixfdr2beta_est,"mixfdr_output","beta_est_output")
15 | 
16 | add_output_parser(dsc_shrink,"ash2pi0",ash2pi0_est,"ash_output","pi0_est_output")
17 | add_output_parser(dsc_shrink,"mixfdr2pi0",mixfdr2pi0_est,"mixfdr_output","pi0_est_output")
18 | add_output_parser(dsc_shrink,"locfdr2pi0",locfdr2pi0_est,"locfdr_output","pi0_est_output")
19 | add_output_parser(dsc_shrink,"qvalue2pi0",qvalue2pi0_est,"qvalue_output","pi0_est_output")
20 | 
21 | add_output_parser(dsc_shrink,"ash2fitted.g",ash2fitted.g,"ash_output","g_output")
22 | add_output_parser(dsc_shrink,"mixfdr2fitted.g",mixfdr2fitted.g,"mixfdr_output","g_output")
23 | 
24 | source("score.R")
25 | add_score(dsc_shrink,score,"beta_err","beta_est_output")
26 | add_score(dsc_shrink,score2,"pi0_score","pi0_est_output")
27 | add_score(dsc_shrink,score3,"cdf_score","g_output")
28 | add_score(dsc_shrink,score_neg,"negprob","ash_output") #just extract the negativeprobs
29 | add_score(dsc_shrink,score_pos,"posprob","ash_output") #just extracts the positiveprobs
30 | add_score(dsc_shrink,score_fdr,"fdr","mixfdr_output") #just extracts the fdr
31 | add_score(dsc_shrink,score_betahat,"betahat","mixfdr_output") #just extracts betahat
32 | add_score(dsc_shrink,score_lfsr,"lfsr","ash_output") #just extracts the lfsr
33 | add_score(dsc_shrink,score_lfdr,"lfdr","ash_output") #just extracts the lfdr
34 | 
35 | res=run_dsc(dsc_shrink)
36 | save(res,dsc_shrink,file="../../output/dsc-shrink-files/res.RData")
37 | 


--------------------------------------------------------------------------------
/code/dsc-shrink/run_dsc_mini.R:
--------------------------------------------------------------------------------
 1 | library(dscr)
 2 | library(ashr)
 3 | sessionInfo()
 4 | 
 5 | dsc_shrink_mini=new_dsc("shrink_mini","../../output/dsc-shrink-mini-files")
 6 | source("add_named_scenarios.R")
 7 | add_named_scenarios(dsc_shrink_mini,c("spiky"))
 8 | dscr::source_dir("methods")
 9 | add_method(dsc_shrink_mini,name="ash.n",fn =ash.wrapper,args=list(mixcompdist="normal"),outputtype = "ash_output")
10 | add_method(dsc_shrink_mini,name="mixfdr.tnull", fn=mixfdr.wrapper, args = list(theonull=TRUE),outputtype = "mixfdr_output")
11 | 
12 | add_output_parser(dsc_shrink_mini,"ash2beta",ash2beta_est,"ash_output","beta_est_output")
13 | add_output_parser(dsc_shrink_mini,"mixfdr2beta",mixfdr2beta_est,"mixfdr_output","beta_est_output")
14 | 
15 | add_output_parser(dsc_shrink_mini,"ash2pi0",ash2pi0_est,"ash_output","pi0_est_output")
16 | #add_output_parser(dsc_shrink_mini,"mixfdr2pi0",mixfdr2pi0_est,"mixfdr_output","pi0_est_output")
17 | #add_output_parser(dsc_shrink_mini,"locfdr2pi0",locfdr2pi0_est,"locfdr_output","pi0_est_output")
18 | #add_output_parser(dsc_shrink_mini,"qvalue2pi0",qvalue2pi0_est,"qvalue_output","pi0_est_output")
19 | 
20 | #add_output_parser(dsc_shrink_mini,"ash2fitted.g",ash2fitted.g,"ash_output","g_output")
21 | #add_output_parser(dsc_shrink_mini,"mixfdr2fitted.g",mixfdr2fitted.g,"mixfdr_output","g_output")
22 | 
23 | 
24 | 
25 | source("score.R")
26 | add_score(dsc_shrink_mini,score,"beta_err","beta_est_output")
27 | # add_score(dsc_shrink,score2,"pi0_score","pi0_est_output")
28 | # add_score(dsc_shrink,score3,"cdf_score","g_output")
29 | # add_score(dsc_shrink,score_neg,"negprob","ash_output") #just extract the negativeprobs
30 | # add_score(dsc_shrink,score_pos,"posprob","ash_output") #just extracts the positiveprobs
31 | # add_score(dsc_shrink,score_fdr,"fdr","mixfdr_output") #just extracts the fdr
32 | # add_score(dsc_shrink,score_betahat,"betahat","mixfdr_output") #just extracts the fdr
33 | # add_score(dsc_shrink,score_lfsr,"lfsr","ash_output") #just extracts the lfsr
34 | # add_score(dsc_shrink,score_lfdr,"lfdr","ash_output") #just extracts the lfdr
35 | 
36 | 
37 | res=run_dsc(dsc_shrink_mini)
38 | save(res,dsc_shrink_mini,file="../../output/dsc-shrink-files/res.mini.RData")
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/code/dsc-shrink/run_dsc_znull.R:
--------------------------------------------------------------------------------
 1 | #the aim of this dsc is to assess the distribution of the logLR 
 2 | #value for ash under the null that all effects are 0
 3 | 
 4 | library(dscr)
 5 | 
 6 | dsc_znull=new_dsc("znull","../../output/dsc-znull-files")
 7 | source("add_scenarios.null.R")
 8 | source("add_methods.null.R")
 9 | 
10 | source("score.R")
11 | add_score(dsc_znull,score_logLR,"logLR","ash_output")
12 | 
13 | res=run_dsc(dsc_znull)
14 | save(res,dsc_znull,file="../../output/dsc-znull-files/res.znull.RData")
15 | 
16 | 


--------------------------------------------------------------------------------
/code/dsc-shrink/score.R:
--------------------------------------------------------------------------------
 1 | #' @title compute score for shrinkage DSC
 2 | #'
 3 | #' @description Outputs the RMSE and MAE of estimated beta values
 4 | #' @details None
 5 | #'
 6 | #' @param data
 7 | #' @param output
 8 | #'
 9 | #' @return score a list with
10 | #' \item{RMSE}{root mean squared error of estimated beta values}
11 | #' \item{MAE}{Median absolute error of estimated beta values}
12 | #'
13 | #'
14 | score = function(data, output){
15 |   return(list(RMSE=sqrt(mean((data$meta$beta-output$beta_est)^2)),
16 |               MAE = median(abs(data$meta$beta-output$beta_est))))
17 | }
18 | 
19 | score2 = function(data, output){
20 |   return(list(pi0 = data$meta$pi0,
21 |               pi0_est = output$pi0_est))
22 | }
23 | 
24 | score3 = function(data, output){
25 |   return(c(S=pcdf_post(output$fitted.g,data$meta$beta,
26 |                        set_data(data$input$betahat,
27 |                                 data$input$sebetahat))))
28 | }
29 | 
30 | score_neg = function(data, output){
31 |   return(c(S=get_np(output)))
32 | }
33 | 
34 | score_pos = function(data, output){
35 |   return(c(S=get_pp(output)))
36 | }
37 | 
38 | score_fdr = function(data, output){ #for mixfdr output
39 |   return(c(S=output$fdr))
40 | }
41 | 
42 | score_lfsr = function(data, output){
43 |   return(c(S=get_lfsr(output)))
44 | }
45 | 
46 | score_lfdr = function(data, output){
47 |   return(c(S=get_lfdr(output)))
48 | }
49 | 
50 | score_betahat = function(data, output){
51 |   return(c(S=data$input$betahat))
52 | }
53 | 
54 | score_logLR = function(data,output){
55 |   return(c(logLR = ashr::calc_logLR(output,set_data(data$input$betahat,
56 |                                                     data$input$sebetahat))))
57 | }
58 | 


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
1 | Directory for data sets that typically will not change after being deposited. Use output/ for postprocessed data.
2 | 


--------------------------------------------------------------------------------
/output/README.md:
--------------------------------------------------------------------------------
1 | This directory is intended for output from code/ (eg postprocessed data)
2 | 


--------------------------------------------------------------------------------
/packrat/init.R:
--------------------------------------------------------------------------------
  1 | local({
  2 | 
  3 |   libDir <- file.path('packrat', 'lib', R.version$platform, getRversion())
  4 | 
  5 |   ## Escape hatch to allow RStudio to handle initialization
  6 |   if (!is.na(Sys.getenv("RSTUDIO", unset = NA)) &&
  7 |         is.na(Sys.getenv("RSTUDIO_PACKRAT_BOOTSTRAP", unset = NA))) {
  8 |     Sys.setenv("RSTUDIO_PACKRAT_BOOTSTRAP" = "1")
  9 |     setHook("rstudio.sessionInit", function(...) {
 10 |       # Ensure that, on sourcing 'packrat/init.R', we are
 11 |       # within the project root directory
 12 |       if (exists(".rs.getProjectDirectory")) {
 13 |         owd <- getwd()
 14 |         setwd(.rs.getProjectDirectory())
 15 |         on.exit(setwd(owd), add = TRUE)
 16 |       }
 17 |       source("packrat/init.R")
 18 |     })
 19 |     return(invisible(NULL))
 20 |   }
 21 | 
 22 |   ## Unload packrat in case it's loaded -- this ensures packrat _must_ be
 23 |   ## loaded from the private library. Note that `requireNamespace` will
 24 |   ## succeed if the package is already loaded, regardless of lib.loc!
 25 |   if ("packrat" %in% loadedNamespaces())
 26 |     try(unloadNamespace("packrat"), silent = TRUE)
 27 | 
 28 |   if (suppressWarnings(requireNamespace("packrat", quietly = TRUE, lib.loc = libDir))) {
 29 | 
 30 |     # Check 'print.banner.on.startup' -- when NA and RStudio, don't print
 31 |     print.banner <- packrat::get_opts("print.banner.on.startup")
 32 |     if (print.banner == "auto" && is.na(Sys.getenv("RSTUDIO", unset = NA))) {
 33 |       print.banner <- TRUE
 34 |     } else {
 35 |       print.banner <- FALSE
 36 |     }
 37 |     return(packrat::on(print.banner = print.banner))
 38 |   }
 39 | 
 40 |   ## Bootstrapping -- only performed in interactive contexts,
 41 |   ## or when explicitly asked for on the command line
 42 |   if (interactive() || "--bootstrap-packrat" %in% commandArgs(TRUE)) {
 43 | 
 44 |     message("Packrat is not installed in the local library -- ",
 45 |             "attempting to bootstrap an installation...")
 46 | 
 47 |     ## We need utils for the following to succeed -- there are calls to functions
 48 |     ## in 'restore' that are contained within utils. utils gets loaded at the
 49 |     ## end of start-up anyhow, so this should be fine
 50 |     library("utils", character.only = TRUE)
 51 | 
 52 |     ## Install packrat into local project library
 53 |     packratSrcPath <- list.files(full.names = TRUE,
 54 |                                  file.path("packrat", "src", "packrat")
 55 |     )
 56 | 
 57 |     ## No packrat tarballs available locally -- try some other means of installation
 58 |     if (!length(packratSrcPath)) {
 59 | 
 60 |       message("> No source tarball of packrat available locally")
 61 | 
 62 |       ## There are no packrat sources available -- try using a version of
 63 |       ## packrat installed in the user library to bootstrap
 64 |       if (requireNamespace("packrat", quietly = TRUE) && packageVersion("packrat") >= "0.2.0.99") {
 65 |         message("> Using user-library packrat (",
 66 |                 packageVersion("packrat"),
 67 |                 ") to bootstrap this project")
 68 |       }
 69 | 
 70 |       ## Couldn't find a user-local packrat -- try finding and using devtools
 71 |       ## to install
 72 |       else if (requireNamespace("devtools", quietly = TRUE)) {
 73 |         message("> Attempting to use devtools::install_github to install ",
 74 |                 "a temporary version of packrat")
 75 |         library(stats) ## for setNames
 76 |         devtools::install_github("rstudio/packrat")
 77 |       }
 78 | 
 79 |       ## Try downloading packrat from CRAN if available
 80 |       else if ("packrat" %in% rownames(available.packages())) {
 81 |         message("> Installing packrat from CRAN")
 82 |         install.packages("packrat")
 83 |       }
 84 | 
 85 |       ## Fail -- couldn't find an appropriate means of installing packrat
 86 |       else {
 87 |         stop("Could not automatically bootstrap packrat -- try running ",
 88 |              "\"'install.packages('devtools'); devtools::install_github('rstudio/packrat')\"",
 89 |              "and restarting R to bootstrap packrat.")
 90 |       }
 91 | 
 92 |       # Restore the project, unload the temporary packrat, and load the private packrat
 93 |       packrat::restore(prompt = FALSE, restart = TRUE)
 94 | 
 95 |       ## This code path only reached if we didn't restart earlier
 96 |       unloadNamespace("packrat")
 97 |       requireNamespace("packrat", lib.loc = libDir, quietly = TRUE)
 98 |       return(packrat::on())
 99 | 
100 |     }
101 | 
102 |     ## Multiple packrat tarballs available locally -- try to choose one
103 |     ## TODO: read lock file and infer most appropriate from there; low priority because
104 |     ## after bootstrapping packrat a restore should do the right thing
105 |     if (length(packratSrcPath) > 1) {
106 |       warning("Multiple versions of packrat available in the source directory;",
107 |               "using packrat source:\n- ", shQuote(packratSrcPath))
108 |       packratSrcPath <- packratSrcPath[[1]]
109 |     }
110 | 
111 | 
112 |     lib <- file.path("packrat", "lib", R.version$platform, getRversion())
113 |     if (!file.exists(lib)) {
114 |       dir.create(lib, recursive = TRUE)
115 |     }
116 |     lib <- normalizePath(lib, winslash = "/")
117 | 
118 |     message("> Installing packrat into project private library:")
119 |     message("- ", shQuote(lib))
120 | 
121 |     surround <- function(x, with) {
122 |       if (!length(x)) return(character())
123 |       paste0(with, x, with)
124 |     }
125 | 
126 |     ## The following is performed because a regular install.packages call can fail
127 |     peq <- function(x, y) paste(x, y, sep = " = ")
128 |     installArgs <- c(
129 |       peq("pkgs", surround(packratSrcPath, with = "'")),
130 |       peq("lib", surround(lib, with = "'")),
131 |       peq("repos", "NULL"),
132 |       peq("type", surround("source", with = "'"))
133 |     )
134 |     installCmd <- paste(sep = "",
135 |                         "utils::install.packages(",
136 |                         paste(installArgs, collapse = ", "),
137 |                         ")")
138 | 
139 |     fullCmd <- paste(
140 |       surround(file.path(R.home("bin"), "R"), with = "\""),
141 |       "--vanilla",
142 |       "--slave",
143 |       "-e",
144 |       surround(installCmd, with = "\"")
145 |     )
146 |     system(fullCmd)
147 | 
148 |     ## Tag the installed packrat so we know it's managed by packrat
149 |     ## TODO: should this be taking information from the lockfile? this is a bit awkward
150 |     ## because we're taking an un-annotated packrat source tarball and simply assuming it's now
151 |     ## an 'installed from source' version
152 | 
153 |     ## -- InstallAgent -- ##
154 |     installAgent <- 'InstallAgent: packrat 0.4.4'
155 | 
156 |     ## -- InstallSource -- ##
157 |     installSource <- 'InstallSource: source'
158 | 
159 |     packratDescPath <- file.path(lib, "packrat", "DESCRIPTION")
160 |     DESCRIPTION <- readLines(packratDescPath)
161 |     DESCRIPTION <- c(DESCRIPTION, installAgent, installSource)
162 |     cat(DESCRIPTION, file = packratDescPath, sep = "\n")
163 | 
164 |     # Otherwise, continue on as normal
165 |     message("> Attaching packrat")
166 |     library("packrat", character.only = TRUE, lib.loc = lib)
167 | 
168 |     message("> Restoring library")
169 |     restore(restart = FALSE)
170 | 
171 |     # If the environment allows us to restart, do so with a call to restore
172 |     restart <- getOption("restart")
173 |     if (!is.null(restart)) {
174 |       message("> Packrat bootstrap successfully completed. ",
175 |               "Restarting R and entering packrat mode...")
176 |       return(restart())
177 |     }
178 | 
179 |     # Callers (source-erers) can define this hidden variable to make sure we don't enter packrat mode
180 |     # Primarily useful for testing
181 |     if (!exists(".__DONT_ENTER_PACKRAT_MODE__.") && interactive()) {
182 |       message("> Packrat bootstrap successfully completed. Entering packrat mode...")
183 |       packrat::on()
184 |     }
185 | 
186 |     Sys.unsetenv("RSTUDIO_PACKRAT_BOOTSTRAP")
187 | 
188 |   }
189 | 
190 | })
191 | 


--------------------------------------------------------------------------------
/packrat/packrat.lock:
--------------------------------------------------------------------------------
  1 | PackratFormat: 1.4
  2 | PackratVersion: 0.4.6.9
  3 | RVersion: 3.3.1
  4 | Repos: BioCsoft=http://bioconductor.org/packages/3.1/bioc,
  5 |     BioCann=http://bioconductor.org/packages/3.1/data/annotation,
  6 |     BioCexp=http://bioconductor.org/packages/3.1/data/experiment,
  7 |     BioCextra=http://bioconductor.org/packages/3.1/extra,
  8 |     CRAN=http://cran.rstudio.com
  9 | 
 10 | Package: BBmisc
 11 | Source: CRAN
 12 | Version: 1.9
 13 | Hash: 0f4232b1208da9b625c1412c6ff328e6
 14 | Requires: checkmate
 15 | 
 16 | Package: BH
 17 | Source: CRAN
 18 | Version: 1.58.0-1
 19 | Hash: 4f6db3b5311bb006f536a2e67278d058
 20 | 
 21 | Package: BatchJobs
 22 | Source: CRAN
 23 | Version: 1.6
 24 | Hash: 408fbd27762a92e1597f1a5f306fa248
 25 | Requires: DBI, brew, checkmate, digest, RSQLite, sendmailR, BBmisc,
 26 |     stringr, fail
 27 | 
 28 | Package: DBI
 29 | Source: CRAN
 30 | Version: 0.3.1
 31 | Hash: 096699d1ac1cf530acfc646a0c90ee5d
 32 | 
 33 | Package: MASS
 34 | Source: CRAN
 35 | Version: 7.3-44
 36 | Hash: 3a901d1f70036f5b1e11bcdf8fdb66d9
 37 | 
 38 | Package: R6
 39 | Source: CRAN
 40 | Version: 2.1.1
 41 | Hash: 20a88b2c9c84aecff2702789a4d102f5
 42 | 
 43 | Package: RColorBrewer
 44 | Source: CRAN
 45 | Version: 1.1-2
 46 | Hash: c0d56cd15034f395874c870141870c25
 47 | 
 48 | Package: RSQLite
 49 | Source: CRAN
 50 | Version: 1.0.0
 51 | Hash: 3df52a54766f420d257debad962146c0
 52 | Requires: DBI
 53 | 
 54 | Package: Rcpp
 55 | Source: CRAN
 56 | Version: 0.12.3
 57 | Hash: 11ace6a9a186c17a42e0fa2c49af1223
 58 | 
 59 | Package: SQUAREM
 60 | Source: CRAN
 61 | Version: 2014.8-1
 62 | Hash: e2282cd322c70a2d16a35880428f84ec
 63 | 
 64 | Package: ashr
 65 | Source: github
 66 | Version: 1.0.5
 67 | Hash: 7b3d5f894f9b2907597b4c8b1089d8a4
 68 | Requires: Rcpp, SQUAREM, assertthat, truncnorm, plyr, pscl, doParallel,
 69 |     testthat
 70 | GithubRepo: ashr
 71 | GithubUsername: stephens999
 72 | GithubRef: master
 73 | GithubSha1: d8c7743fcb9e03c369ff7a7db2271aad85a53d69
 74 | 
 75 | Package: assertthat
 76 | Source: CRAN
 77 | Version: 0.1
 78 | Hash: 0afb92b59b02593c70ff8046700ba9d3
 79 | 
 80 | Package: base64enc
 81 | Source: CRAN
 82 | Version: 0.1-3
 83 | Hash: c590d29e555926af053055e23ee79efb
 84 | 
 85 | Package: bitops
 86 | Source: CRAN
 87 | Version: 1.0-6
 88 | Hash: 67d0775189fd0041d95abca618c5c07e
 89 | 
 90 | Package: brew
 91 | Source: CRAN
 92 | Version: 1.0-6
 93 | Hash: 931f9972deae0f205e1c78a51f33149b
 94 | 
 95 | Package: caTools
 96 | Source: CRAN
 97 | Version: 1.17.1
 98 | Hash: 97cb6f6293cd18d17df77a6383cc6763
 99 | Requires: bitops
100 | 
101 | Package: checkmate
102 | Source: CRAN
103 | Version: 1.6.3
104 | Hash: 90b6c60390d3001934a4a0f70b975fc0
105 | 
106 | Package: colorspace
107 | Source: CRAN
108 | Version: 1.2-6
109 | Hash: 00bb12245cd975c450cc4a960884fa15
110 | 
111 | Package: crayon
112 | Source: CRAN
113 | Version: 1.3.1
114 | Hash: b61d34886cf0f4b4fc4e4f52ea249390
115 | Requires: memoise
116 | 
117 | Package: dichromat
118 | Source: CRAN
119 | Version: 2.0-0
120 | Hash: 08eed0c80510af29bb15f840ccfe37ce
121 | 
122 | Package: digest
123 | Source: CRAN
124 | Version: 0.6.9
125 | Hash: fd55d5a024f160fc001a5ece1e27782d
126 | 
127 | Package: doParallel
128 | Source: CRAN
129 | Version: 1.0.10
130 | Hash: df91a7abfa938c06ad87b9a2b9269adb
131 | Requires: iterators, foreach
132 | 
133 | Package: dplyr
134 | Source: CRAN
135 | Version: 0.4.3
136 | Hash: 466e853fc8da049e7812a9e56b9ad70f
137 | Requires: BH, DBI, R6, Rcpp, assertthat, lazyeval, magrittr
138 | 
139 | Package: dscr
140 | Source: github
141 | Version: 0.1.1
142 | Hash: 2300895537986fa6886a5c5d033f9587
143 | Requires: assertthat, magrittr, plyr, dplyr, psych, shiny, reshape2,
144 |     BatchJobs, knitr, ggplot2
145 | GithubRepo: dscr
146 | GithubUsername: stephens999
147 | GithubRef: master
148 | GithubSha1: 2610af83ade1a44cb7f966d829279191b9ba3630
149 | 
150 | Package: evaluate
151 | Source: CRAN
152 | Version: 0.8
153 | Hash: aac00bd789bac10970b50e3b7e0cab04
154 | Requires: stringr
155 | 
156 | Package: fail
157 | Source: CRAN
158 | Version: 1.3
159 | Hash: 6f916304b570a0dbe224092ba60f3498
160 | Requires: checkmate, BBmisc
161 | 
162 | Package: foreach
163 | Source: CRAN
164 | Version: 1.4.3
165 | Hash: cd53ef4cf29dc59ce3f8c5c1af735fd1
166 | Requires: iterators
167 | 
168 | Package: formatR
169 | Source: CRAN
170 | Version: 1.2.1
171 | Hash: 54c730c712edd6087972ecf99bf87c55
172 | 
173 | Package: ggplot2
174 | Source: CRAN
175 | Version: 1.0.1
176 | Hash: 4aff1b269135bd2cdea378f4f2f2895e
177 | Requires: MASS, digest, gtable, proto, plyr, scales, reshape2
178 | 
179 | Package: gtable
180 | Source: CRAN
181 | Version: 0.1.2
182 | Hash: 38a0f066373a60824cd3ade2362af363
183 | 
184 | Package: highr
185 | Source: CRAN
186 | Version: 0.5.1
187 | Hash: 114ef5abcf58bebbf6ac083b9cacbbd8
188 | 
189 | Package: htmltools
190 | Source: CRAN
191 | Version: 0.2.6
192 | Hash: 595e7dea94f5f049dfc1ed739f5161af
193 | Requires: digest
194 | 
195 | Package: httpuv
196 | Source: CRAN
197 | Version: 1.3.3
198 | Hash: d440b2e539ccef77b9105051291a7628
199 | Requires: Rcpp
200 | 
201 | Package: iterators
202 | Source: CRAN
203 | Version: 1.0.8
204 | Hash: 488b93c2a4166db0d15f1e8d882cb1d4
205 | 
206 | Package: jsonlite
207 | Source: CRAN
208 | Version: 0.9.17
209 | Hash: d44b8f8c89ff794d6ec8a07dde103d24
210 | 
211 | Package: knitr
212 | Source: CRAN
213 | Version: 1.11
214 | Hash: 0a6d1006425040af05e29504af948535
215 | Requires: digest, formatR, highr, yaml, markdown, stringr, evaluate
216 | 
217 | Package: labeling
218 | Source: CRAN
219 | Version: 0.3
220 | Hash: ecf589b42cd284b03a4beb9665482d3e
221 | 
222 | Package: lazyeval
223 | Source: CRAN
224 | Version: 0.1.10
225 | Hash: 9679f1ac7f6bc07bc79755f34cd15e1f
226 | 
227 | Package: locfdr
228 | Source: CRAN
229 | Version: 1.1-8
230 | Hash: d267ae300fd73b9cb6686175519402c3
231 | 
232 | Package: magrittr
233 | Source: CRAN
234 | Version: 1.5
235 | Hash: bdc4d48c3135e8f3b399536ddf160df4
236 | 
237 | Package: markdown
238 | Source: CRAN
239 | Version: 0.7.7
240 | Hash: fea2343a1119d61b0cc5c0a950d103a3
241 | Requires: mime
242 | 
243 | Package: memoise
244 | Source: CRAN
245 | Version: 0.2.1
246 | Hash: 812e6a1dd77a0ca4da41f3239de8e447
247 | Requires: digest
248 | 
249 | Package: mime
250 | Source: CRAN
251 | Version: 0.4
252 | Hash: b08c52dae92a0a11e64a4deea032ec33
253 | 
254 | Package: mixfdr
255 | Source: CRAN
256 | Version: 1.0
257 | Hash: 8aa5ecb4434d568fd75010689b69bf2c
258 | 
259 | Package: mnormt
260 | Source: CRAN
261 | Version: 1.5-3
262 | Hash: d23b1cdede7a20437b68aa11f51958b8
263 | 
264 | Package: munsell
265 | Source: CRAN
266 | Version: 0.4.2
267 | Hash: 72d343a6664778029f49a7ad36de1cab
268 | Requires: colorspace
269 | 
270 | Package: packrat
271 | Source: github
272 | Version: 0.4.6-9
273 | Hash: b34872d6ae7c3b4d3191490e4c3d384b
274 | GithubRepo: packrat
275 | GithubUsername: rstudio
276 | GithubRef: master
277 | GithubSha1: dfa32291eb8ae9fad2868d0fc1ffd5516aa3a402
278 | 
279 | Package: plyr
280 | Source: CRAN
281 | Version: 1.8.3
282 | Hash: e38fe93fa50135fac98421abb43bbfea
283 | Requires: Rcpp
284 | 
285 | Package: praise
286 | Source: CRAN
287 | Version: 1.0.0
288 | Hash: 77da8f1df873a4b91e5c4a68fe2fb1b6
289 | 
290 | Package: proto
291 | Source: CRAN
292 | Version: 0.3-10
293 | Hash: 05d7ad64db29a39d1b02c4b9f1f4edb2
294 | 
295 | Package: pscl
296 | Source: CRAN
297 | Version: 1.4.9
298 | Hash: b89db41fa250ee766e0be58c8b4227b0
299 | Requires: MASS
300 | 
301 | Package: psych
302 | Source: CRAN
303 | Version: 1.5.8
304 | Hash: 68c4e1212b52e8bf12ef1bfa1e950409
305 | Requires: mnormt
306 | 
307 | Package: qvalue
308 | Source: github
309 | Version: 2.1.1
310 | Hash: ad35373b6d521d6bcf802b06acf1b386
311 | Requires: reshape2, ggplot2
312 | GithubRepo: qvalue
313 | GithubUsername: jdstorey
314 | GithubRef: master
315 | GithubSha1: b3e906542ce4b56ee131503b44236cea75b9f6cc
316 | 
317 | Package: reshape2
318 | Source: CRAN
319 | Version: 1.4.1
320 | Hash: 55963009094746c3aef64bfbf9eb4731
321 | Requires: Rcpp, plyr, stringr
322 | 
323 | Package: rmarkdown
324 | Source: CRAN
325 | Version: 0.8.1
326 | Hash: 13ce5c86c765918f857c602d97d1ffb9
327 | Requires: yaml, caTools, htmltools, knitr
328 | 
329 | Package: scales
330 | Source: CRAN
331 | Version: 0.3.0
332 | Hash: 9c140be1bda3cf690f8714cfc810aa45
333 | Requires: RColorBrewer, Rcpp, dichromat, labeling, plyr, munsell
334 | 
335 | Package: sendmailR
336 | Source: CRAN
337 | Version: 1.2-1
338 | Hash: 26561ceb5871f3983b40c2bfdf90ed09
339 | Requires: base64enc
340 | 
341 | Package: shiny
342 | Source: CRAN
343 | Version: 0.12.2
344 | Hash: 31cd1fe3028ac7ef846adacf8ed5bfd2
345 | Requires: R6, digest, jsonlite, mime, xtable, httpuv, htmltools
346 | 
347 | Package: stringi
348 | Source: CRAN
349 | Version: 1.0-1
350 | Hash: cf342bc407bd5daec77ed1009d5244e1
351 | 
352 | Package: stringr
353 | Source: CRAN
354 | Version: 1.0.0
355 | Hash: 2676dd5f88890910962b733b0f9540e1
356 | Requires: magrittr, stringi
357 | 
358 | Package: testthat
359 | Source: CRAN
360 | Version: 0.11.0
361 | Hash: e10882241e569f584fb1f8c19599a13f
362 | Requires: digest, praise, crayon
363 | 
364 | Package: truncnorm
365 | Source: CRAN
366 | Version: 1.0-7
367 | Hash: eefedc58b087a7f09ae770b6e159acb7
368 | 
369 | Package: xtable
370 | Source: CRAN
371 | Version: 1.7-4
372 | Hash: 30ed90ebdd90a529aab31fd6792a8d61
373 | 
374 | Package: yaml
375 | Source: CRAN
376 | Version: 2.1.13
377 | Hash: 4854ccabebc225e8a7309fb4a74980de
378 | 


--------------------------------------------------------------------------------
/packrat/packrat.opts:
--------------------------------------------------------------------------------
 1 | auto.snapshot: TRUE
 2 | use.cache: FALSE
 3 | print.banner.on.startup: auto
 4 | vcs.ignore.lib: TRUE
 5 | vcs.ignore.src: TRUE
 6 | external.packages: 
 7 | local.repos: ~/Downloads/
 8 | load.external.packages.on.startup: TRUE
 9 | ignored.packages: 
10 | 


--------------------------------------------------------------------------------
/packrat/src/mixfdr/mixfdr_1.0.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/packrat/src/mixfdr/mixfdr_1.0.tar.gz


--------------------------------------------------------------------------------
/packrat/src/packrat/packrat_0.4.4.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/packrat/src/packrat/packrat_0.4.4.tar.gz


--------------------------------------------------------------------------------
/paper/Makefile:
--------------------------------------------------------------------------------
1 | %.png:  %.pdf
2 | 	convert -density 300 $< $@
3 | 
4 | 
5 | main.pdf:	main.tex ../analysis/figure/plot_lfsr.Rmd/plot_lfdr-1.png ../analysis/figure/plot_lfsr.Rmd/plot_lfsr-1.png ../analysis/figure/plot_lfsr.Rmd/plot_lfsr_s-1.png ../analysis/figure/plot_lfsr.Rmd/plot_lfsr_s_nn-1.png
6 | 	pdflatex main.tex; bibtex main.tex; pdflatex main.tex; pdflatex main.tex
7 | 


--------------------------------------------------------------------------------
/paper/README.md:
--------------------------------------------------------------------------------
1 | Directory for paper
2 | 


--------------------------------------------------------------------------------
/paper/main.bbl:
--------------------------------------------------------------------------------
  1 | \begin{thebibliography}{10}
  2 | \providecommand{\url}[1]{\texttt{#1}}
  3 | \providecommand{\urlprefix}{URL }
  4 | \expandafter\ifx\csname urlstyle\endcsname\relax
  5 |   \providecommand{\doi}[1]{doi:\discretionary{}{}{}#1}\else
  6 |   \providecommand{\doi}{doi:\discretionary{}{}{}\begingroup
  7 |   \urlstyle{rm}\Url}\fi
  8 | \providecommand{\bibAnnoteFile}[1]{%
  9 |   \IfFileExists{#1}{\begin{quotation}\noindent\textsc{Key:} #1\\
 10 |   \textsc{Annotation:}\ \input{#1}\end{quotation}}{}}
 11 | \providecommand{\bibAnnote}[2]{%
 12 |   \begin{quotation}\noindent\textsc{Key:} #1\\
 13 |   \textsc{Annotation:}\ #2\end{quotation}}
 14 | \providecommand{\eprint}[2][]{\url{#2}}
 15 | 
 16 | \bibitem{benjamini1995controlling}
 17 | Benjamini Y, Hochberg Y (1995) Controlling the false discovery rate: a
 18 |   practical and powerful approach to multiple testing.
 19 | \newblock Journal of the Royal Statistical Society Series B (Methodological) :
 20 |   289--300.
 21 | \bibAnnoteFile{benjamini1995controlling}
 22 | 
 23 | \bibitem{greenland1991empirical}
 24 | Greenland S, Robins JM (1991) Empirical-bayes adjustments for multiple
 25 |   comparisons are sometimes useful.
 26 | \newblock Epidemiology : 244--251.
 27 | \bibAnnoteFile{greenland1991empirical}
 28 | 
 29 | \bibitem{efron2001empirical}
 30 | Efron B, Tibshirani R, Storey JD, Tusher V (2001) Empirical bayes analysis of a
 31 |   microarray experiment.
 32 | \newblock Journal of the American statistical association 96: 1151--1160.
 33 | \bibAnnoteFile{efron2001empirical}
 34 | 
 35 | \bibitem{efron2002empirical}
 36 | Efron B, Tibshirani R (2002) Empirical bayes methods and false discovery rates
 37 |   for microarrays.
 38 | \newblock Genetic epidemiology 23: 70--86.
 39 | \bibAnnoteFile{efron2002empirical}
 40 | 
 41 | \bibitem{efron2003robbins}
 42 | Efron B, et~al. (2003) Robbins, empirical bayes and microarrays.
 43 | \newblock The annals of Statistics 31: 366--378.
 44 | \bibAnnoteFile{efron2003robbins}
 45 | 
 46 | \bibitem{efron2008microarrays}
 47 | Efron B (2008) Microarrays, empirical bayes and the two-groups model.
 48 | \newblock Statistical Science 23: 1--22.
 49 | \bibAnnoteFile{efron2008microarrays}
 50 | 
 51 | \bibitem{efron2010large}
 52 | Efron B (2010) Large-scale inference: empirical Bayes methods for estimation,
 53 |   testing, and prediction, volume~1.
 54 | \newblock Cambridge University Press.
 55 | \bibAnnoteFile{efron2010large}
 56 | 
 57 | \bibitem{kendziorski2003parametric}
 58 | Kendziorski C, Newton M, Lan H, Gould M (2003) On parametric empirical bayes
 59 |   methods for comparing multiple groups using replicated gene expression
 60 |   profiles.
 61 | \newblock Statistics in medicine 22: 3899--3914.
 62 | \bibAnnoteFile{kendziorski2003parametric}
 63 | 
 64 | \bibitem{newton2004detecting}
 65 | Newton MA, Noueiry A, Sarkar D, Ahlquist P (2004) Detecting differential gene
 66 |   expression with a semiparametric hierarchical mixture method.
 67 | \newblock Biostatistics 5: 155--176.
 68 | \bibAnnoteFile{newton2004detecting}
 69 | 
 70 | \bibitem{datta2005empirical}
 71 | Datta S, Datta S (2005) Empirical bayes screening of many p-values with
 72 |   applications to microarray studies.
 73 | \newblock Bioinformatics 21: 1987--1994.
 74 | \bibAnnoteFile{datta2005empirical}
 75 | 
 76 | \bibitem{muralidharan2010empirical}
 77 | Muralidharan O (2010) An empirical bayes mixture method for effect size and
 78 |   false discovery rate estimation.
 79 | \newblock The Annals of Applied Statistics : 422--438.
 80 | \bibAnnoteFile{muralidharan2010empirical}
 81 | 
 82 | \bibitem{ploner2006multidimensional}
 83 | Ploner A, Calza S, Gusnanto A, Pawitan Y (2006) Multidimensional local false
 84 |   discovery rate for microarray studies.
 85 | \newblock Bioinformatics 22: 556--565.
 86 | \bibAnnoteFile{ploner2006multidimensional}
 87 | 
 88 | \bibitem{benjamini2005false}
 89 | Benjamini Y, Yekutieli D (2005) False discovery rate--adjusted multiple
 90 |   confidence intervals for selected parameters.
 91 | \newblock Journal of the American Statistical Association 100: 71--81.
 92 | \bibAnnoteFile{benjamini2005false}
 93 | 
 94 | \bibitem{zhao2012empirical}
 95 | Zhao Z, Gene~Hwang J (2012) Empirical bayes false coverage rate controlling
 96 |   confidence intervals.
 97 | \newblock Journal of the Royal Statistical Society: Series B (Statistical
 98 |   Methodology) 74: 871--891.
 99 | \bibAnnoteFile{zhao2012empirical}
100 | 
101 | \bibitem{gelman2012we}
102 | Gelman A, Hill J, Yajima M (2012) Why we (usually) don't have to worry about
103 |   multiple comparisons.
104 | \newblock Journal of Research on Educational Effectiveness 5: 189--211.
105 | \bibAnnoteFile{gelman2012we}
106 | 
107 | \bibitem{donoho:1995}
108 | Donoho D, Johnstone I (1995) Adapting to unknown smoothness via wavelet
109 |   shrinkage.
110 | \newblock Journal of the American Statistical Association 90: 1200--1224.
111 | \bibAnnoteFile{donoho:1995}
112 | 
113 | \bibitem{xing2016smoothing}
114 | Xing Z, Stephens M (2016) Smoothing via adaptive shrinkage (smash): denoising
115 |   poisson and heteroskedastic gaussian signals.
116 | \newblock arXiv preprint arXiv:160507787 .
117 | \bibAnnoteFile{xing2016smoothing}
118 | 
119 | \bibitem{lu2016variance}
120 | Lu M, Stephens M (2016) Variance adaptive shrinkage (vash): Flexible empirical
121 |   bayes estimation of variances.
122 | \newblock bioRxiv : 048660.
123 | \bibAnnoteFile{lu2016variance}
124 | 
125 | \bibitem{johnstone2004needles}
126 | Johnstone IM, Silverman BW (2004) Needles and straw in haystacks: Empirical
127 |   bayes estimates of possibly sparse sequences.
128 | \newblock Annals of Statistics : 1594--1649.
129 | \bibAnnoteFile{johnstone2004needles}
130 | 
131 | \bibitem{efron2007correlation}
132 | Efron B (2007) Correlation and large-scale simultaneous significance testing.
133 | \newblock Journal of the American Statistical Association 102.
134 | \bibAnnoteFile{efron2007correlation}
135 | 
136 | \bibitem{leek:2007}
137 | Leek JT, Storey JD (2007) Capturing heterogeneity in gene expression studies by
138 |   surrogate variable analysis.
139 | \newblock PLoS Genet 3: 1724-35.
140 | \bibAnnoteFile{leek:2007}
141 | 
142 | \bibitem{boyd2004convex}
143 | Boyd S, Vandenberghe L (2004) Convex optimization.
144 | \newblock Cambridge university press.
145 | \bibAnnoteFile{boyd2004convex}
146 | 
147 | \bibitem{koenker2013convex}
148 | Koenker R, Mizera I (2013) Convex optimization in {R}.
149 | \newblock Journal of Statistical Software .
150 | \bibAnnoteFile{koenker2013convex}
151 | 
152 | \bibitem{tukey1991philosophy}
153 | Tukey JW (1991) The philosophy of multiple comparisons.
154 | \newblock Statistical science : 100--116.
155 | \bibAnnoteFile{tukey1991philosophy}
156 | 
157 | \bibitem{tukey1962future}
158 | Tukey JW (1962) The future of data analysis.
159 | \newblock The Annals of Mathematical Statistics : 1--67.
160 | \bibAnnoteFile{tukey1962future}
161 | 
162 | \bibitem{gelman2000type}
163 | Gelman A, Tuerlinckx F (2000) Type s error rates for classical and bayesian
164 |   single and multiple comparison procedures.
165 | \newblock Computational Statistics 15: 373--390.
166 | \bibAnnoteFile{gelman2000type}
167 | 
168 | \bibitem{storey.03}
169 | Storey J (2003) The positive false discovery rate: A {Bayesian} interpretation
170 |   and the q-value.
171 | \newblock The Annals of Statistics 31: 2013--2035.
172 | \bibAnnoteFile{storey.03}
173 | 
174 | \bibitem{wakefield:2009}
175 | Wakefield J (2009) Bayes factors for genome-wide association studies:
176 |   comparison with p-values.
177 | \newblock Genet Epidemiol 33: 79-86.
178 | \bibAnnoteFile{wakefield:2009}
179 | 
180 | \bibitem{efron1993bayes}
181 | Efron B (1993) Bayes and likelihood calculations from confidence intervals.
182 | \newblock Biometrika 80: 3--26.
183 | \bibAnnoteFile{efron1993bayes}
184 | 
185 | \bibitem{carvalho2010horseshoe}
186 | Carvalho CM, Polson NG, Scott JG (2010) The horseshoe estimator for sparse
187 |   signals.
188 | \newblock Biometrika : asq017.
189 | \bibAnnoteFile{carvalho2010horseshoe}
190 | 
191 | \bibitem{moser:2015}
192 | Moser G, Lee SH, Hayes BJ, Goddard ME, Wray NR, et~al. (2015) Simultaneous
193 |   discovery, estimation and prediction analysis of complex traits using a
194 |   bayesian mixture model.
195 | \newblock PLoS Genet 11: e1004969.
196 | \bibAnnoteFile{moser:2015}
197 | 
198 | \bibitem{cordy1997deconvolution}
199 | Cordy CB, Thomas DR (1997) Deconvolution of a distribution function.
200 | \newblock Journal of the American Statistical Association 92: 1459--1465.
201 | \bibAnnoteFile{cordy1997deconvolution}
202 | 
203 | \bibitem{xie2012sure}
204 | Xie X, Kou S, Brown LD (2012) Sure estimates for a heteroscedastic hierarchical
205 |   model.
206 | \newblock Journal of the American Statistical Association 107: 1465--1479.
207 | \bibAnnoteFile{xie2012sure}
208 | 
209 | \bibitem{sarkar:2014}
210 | Sarkar A, Mallick BK, Staudenmayer J, Pati D, Carroll RJ (2014) Bayesian
211 |   semiparametric density deconvolution in the presence of conditionally
212 |   heteroscedastic measurement errors.
213 | \newblock J Comput Graph Stat 23: 1101-1125.
214 | \bibAnnoteFile{sarkar:2014}
215 | 
216 | \bibitem{koenker2014convex}
217 | Koenker R, Mizera I (2014) Convex optimization, shape constraints, compound
218 |   decisions, and empirical bayes rules.
219 | \newblock Journal of the American Statistical Association 109: 674--685.
220 | \bibAnnoteFile{koenker2014convex}
221 | 
222 | \bibitem{efron2016empirical}
223 | Efron B (2016) Empirical bayes deconvolution estimates.
224 | \newblock Biometrika 103: 1--20.
225 | \bibAnnoteFile{efron2016empirical}
226 | 
227 | \bibitem{jiang2009general}
228 | Jiang W, Zhang CH (2009) General maximum likelihood empirical bayes estimation
229 |   of normal means.
230 | \newblock The Annals of Statistics 37: 1647--1684.
231 | \bibAnnoteFile{jiang2009general}
232 | 
233 | \bibitem{guan.stephens.08}
234 | Guan Y, Stephens M (2008) Practical issues in imputation-based association
235 |   mapping.
236 | \newblock PLoS Genet 4.
237 | \bibAnnoteFile{guan.stephens.08}
238 | 
239 | \bibitem{devlin1999genomic}
240 | Devlin B, Roeder K (1999) Genomic control for association studies.
241 | \newblock Biometrics 55: 997--1004.
242 | \bibAnnoteFile{devlin1999genomic}
243 | 
244 | \bibitem{efron2004large}
245 | Efron B (2004) Large-scale simultaneous hypothesis testing: the choice of a
246 |   null hypothesis.
247 | \newblock Journal of the American Statistical Association 99: 96--104.
248 | \bibAnnoteFile{efron2004large}
249 | 
250 | \bibitem{pritchard.stephens.rosenberg.donnelly.00}
251 | Pritchard JK, Stephens M, Rosenberg NA, Donnelly P (2000) Association mapping
252 |   in structured populations.
253 | \newblock American Journal of Human Genetics 67: 170--181.
254 | \bibAnnoteFile{pritchard.stephens.rosenberg.donnelly.00}
255 | 
256 | \bibitem{price:2006}
257 | Price AL, Patterson NJ, Plenge RM, Weinblatt ME, Shadick NA, et~al. (2006)
258 |   Principal components analysis corrects for stratification in genome-wide
259 |   association studies.
260 | \newblock Nat Genet 38: 904-9.
261 | \bibAnnoteFile{price:2006}
262 | 
263 | \bibitem{gagnon2012using}
264 | Gagnon-Bartsch JA, Speed TP (2012) Using control genes to correct for unwanted
265 |   variation in microarray data.
266 | \newblock Biostatistics 13: 539--552.
267 | \bibAnnoteFile{gagnon2012using}
268 | 
269 | \bibitem{smyth:2004}
270 | Smyth GK (2004) Linear models and empirical bayes methods for assessing
271 |   differential expression in microarray experiments.
272 | \newblock Stat Appl Genet Mol Biol 3: Article3.
273 | \bibAnnoteFile{smyth:2004}
274 | 
275 | \bibitem{khintchine1938unimodal}
276 | Khintchine AY (1938) On unimodal distributions.
277 | \newblock Izv Nauchno-Isled Inst Mat Mech Tomsk Gos Univ 2: 1--7.
278 | \bibAnnoteFile{khintchine1938unimodal}
279 | 
280 | \bibitem{shepp1962symmetric}
281 | Shepp L (1962) Symmetric random walk.
282 | \newblock Transactions of the American Mathematical Society : 144--153.
283 | \bibAnnoteFile{shepp1962symmetric}
284 | 
285 | \bibitem{feller1971introduction}
286 | Feller W (1971) An introduction to probability and its applications, vol. ii.
287 | \newblock Wiley, New York .
288 | \bibAnnoteFile{feller1971introduction}
289 | 
290 | \bibitem{REBayes}
291 | Koenker R (2015) REBayes: Empirical Bayes Estimation and Inference in R.
292 | \newblock \urlprefix\url{http://CRAN.R-project.org/package=REBayes}.
293 | \newblock R package version 0.58.
294 | \bibAnnoteFile{REBayes}
295 | 
296 | \bibitem{Rmosek}
297 | Aps M Rmosek: The R to MOSEK Optimization Interface.
298 | \newblock \urlprefix\url{http://rmosek.r-forge.r-project.org/,
299 |   http://www.mosek.com/}.
300 | \newblock R package version 7.1.2.
301 | \bibAnnoteFile{Rmosek}
302 | 
303 | \bibitem{dempster77}
304 | Dempster AP, Laird NM, Rubin DB (1977) Maximum likelihood estimation from
305 |   incomplete data via the {EM} algorithm (with discussion).
306 | \newblock Journal of the Royal Statistical Society, series B 39: 1--38.
307 | \bibAnnoteFile{dempster77}
308 | 
309 | \bibitem{varadhan2008simple}
310 | Varadhan R, Roland C (2008) Simple and globally convergent methods for
311 |   accelerating the convergence of any em algorithm.
312 | \newblock Scandinavian Journal of Statistics 35: 335--353.
313 | \bibAnnoteFile{varadhan2008simple}
314 | 
315 | \bibitem{Rcore:2012}
316 | {R Core Team} (2012) R: A Language and Environment for Statistical Computing.
317 | \newblock R Foundation for Statistical Computing, Vienna, Austria.
318 | \newblock \urlprefix\url{http://www.R-project.org/}.
319 | \newblock Accessed June 3, 2013.
320 | \bibAnnoteFile{Rcore:2012}
321 | 
322 | \bibitem{ggplot2}
323 | Wickham H (2009) ggplot2: elegant graphics for data analysis.
324 | \newblock Springer New York.
325 | \bibAnnoteFile{ggplot2}
326 | 
327 | \bibitem{xie2013dynamic}
328 | Xie Y (2013) Dynamic Documents with R and knitr, volume~29.
329 | \newblock CRC Press.
330 | \bibAnnoteFile{xie2013dynamic}
331 | 
332 | \end{thebibliography}
333 | 


--------------------------------------------------------------------------------
/paper/main.blg:
--------------------------------------------------------------------------------
 1 | This is BibTeX, Version 0.99d (TeX Live 2015)
 2 | Capacity: max_strings=35307, hash_size=35307, hash_prime=30011
 3 | The top-level auxiliary file: main.aux
 4 | The style file: /Users/stephens/Dropbox/Documents/stylefiles/plos2009.bst
 5 | Database file #1: /Users/stephens/Dropbox/Documents/mainbib.bib
 6 | Reallocated singl_function (elt_size=4) to 100 items from 50.
 7 | You've used 54 entries,
 8 |             2441 wiz_defined-function locations,
 9 |             882 strings with 13046 characters,
10 | and the built_in function-call counts, 38631 in all, are:
11 | = -- 3640
12 | > -- 452
13 | < -- 2
14 | + -- 226
15 | - -- 113
16 | * -- 3280
17 | := -- 4635
18 | add.period$ -- 112
19 | call.type$ -- 54
20 | change.case$ -- 50
21 | chr.to.int$ -- 0
22 | cite$ -- 108
23 | duplicate$ -- 3896
24 | empty$ -- 4113
25 | format.name$ -- 147
26 | if$ -- 8598
27 | int.to.chr$ -- 0
28 | int.to.str$ -- 54
29 | missing$ -- 486
30 | newline$ -- 289
31 | num.names$ -- 54
32 | pop$ -- 578
33 | preamble$ -- 1
34 | purify$ -- 0
35 | quote$ -- 0
36 | skip$ -- 259
37 | stack$ -- 0
38 | substring$ -- 5643
39 | swap$ -- 928
40 | text.length$ -- 2
41 | text.prefix$ -- 0
42 | top$ -- 0
43 | type$ -- 0
44 | warning$ -- 0
45 | while$ -- 256
46 | width$ -- 56
47 | write$ -- 599
48 | 


--------------------------------------------------------------------------------
/paper/main.dvi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/paper/main.dvi


--------------------------------------------------------------------------------
/paper/main.fff:
--------------------------------------------------------------------------------
 1 | \begin{figure}
 2 | \center \includegraphics[height=6in]{../analysis/figure/makefig_FDReg.Rmd/decomp_ZA-1.pdf}
 3 | \caption{Illustration that the unimodal assumption (UA) in \ashr can produce very different results from existing methods.
 4 | The figure shows, for a single simulated dataset, the way different methods decompose $p$ values (left) and $z$ scores (right) into a null component (dark blue) and an alternative component (cyan). In the $z$ score space the alternative distribution is placed on the bottom to highlight the differences in its shape among methods.
 5 | The three existing methods (\qvalue, \locfdr, \mixfdr) all effectively make the Zero Assumption, which results in a ``hole" in the alternative $z$ score distribution around 0.
 6 | In contrast the method introduced here (\ashr) makes the Unimodal Assumption -- that the effect sizes, and thus the $z$ scores, have a unimodal distribution about 0 -- which yields a very different decomposition. (In this case the \ashr decomposition is closer to the truth: the data were simulated under a model where all of the effects are non-zero, so the ``true" decomposition would make everything cyan.)} \label{fig:ZA}
 7 | \end{figure}
 8 | \efloatseparator
 9 |  
10 | \begin{figure}
11 | \begin{center}
12 | \begin{subfigure}{\textwidth}
13 | \includegraphics[width=\textwidth]{../analysis/figure/plot_egdens.Rmd/scenario_density-1.pdf}
14 | \caption{Densities of non-zero effects, $g_1$, used in simulations.} \label{fig:altdens}
15 | \end{subfigure}
16 | \begin{subfigure}{\textwidth}
17 | \includegraphics[width=\textwidth]{../analysis/figure/plot_pi0est.Rmd/plot_pi0est-1.pdf}
18 | \caption{Comparison of true and estimated values of $\pi_0$. When the UA holds all methods yield conservative (over-)estimates for $\pi_0$, with \ashr being least conservative, and hence most accurate. When the UA does not hold (``bimodal" scenario) the \ashr estimates are slightly anti-conservative.} \label{fig:pi0sims}
19 | \end{subfigure}
20 | \begin{subfigure}{\textwidth}
21 | \includegraphics[width=\textwidth]{../analysis/figure/plot_lfsr.Rmd/plot_lfdr-1.pdf}
22 | \caption{Comparison of true and estimated $\lfdr$ from \ashr (ash.n). Black line is $y=x$ and red line is $y=2x$. Estimates of $\lfdr$ are conservative when UA holds, due to conservative estimates of $\pi_0$.} \label{fig:lfdr}
23 | \end{subfigure}
24 | \begin{subfigure}{\textwidth}
25 | \includegraphics[width=\textwidth]{../analysis/figure/plot_lfsr.Rmd/plot_lfsr-1.pdf}
26 | \caption{As in c), but for $\lfsr$ instead of $\lfdr$. Estimates of $\lfsr$ are consistently less conservative than $\lfdr$ when UA holds, and also less anti-conservative in bimodal scenario.}  \label{fig:lfsr}
27 | \end{subfigure}
28 | \end{center}
29 | \caption{Results of simulation studies (constant precision $s_j=1$).}
30 | \end{figure}
31 | \efloatseparator
32 |  
33 | \begin{figure}[h!]
34 | \begin{subfigure}{\textwidth}
35 | \includegraphics[height=2in]{../analysis/figure/plot_cdf_eg.Rmd/egcdf-1.pdf}
36 | \caption{Example estimated cdfs for single data sets compared with truth. The unimodal assumption made by the ash methods effectively regularizes estimates compared with \mixfdr.}
37 | \end{subfigure}
38 | \begin{subfigure}{\textwidth}
39 | \includegraphics[height=2in]{../analysis/figure/plot_cdf_eg.Rmd/mean_cdf-1.pdf}
40 | \caption{Average estimated cdfs across $\sim10$ data sets compared with truth; methods here use penalty (\ref{eqn:penalty}) so $\pi_0$ is systematically overestimated.}
41 | \end{subfigure}
42 | \begin{subfigure}{\textwidth}
43 | \includegraphics[height=2in]{../analysis/figure/plot_cdf_eg.Rmd/mean_cdf_nopen-1.pdf}
44 | \caption{Average estimated cdfs across $\sim10$ data sets compared with truth; methods here do not use penalty (\ref{eqn:penalty}) so $\pi_0$ is not systematically overestimated. Systematic differences from the truth in ``skew" and ``bimodal" scenarios highlight the effects of model mis-specification.}
45 | \end{subfigure}
46 | \caption{Comparisons of estimated cdfs of $g$ and true cdf of $g$. See Figure \ref{fig:pi0sims} for simulation scenarios.} \label{fig:egcdf}
47 | \end{figure}
48 | \efloatseparator
49 |  
50 | \begin{figure}
51 | \begin{subfigure}{\textwidth}
52 | \centering\includegraphics[width=4in]{../analysis/figure/make_GOODPOOR_figs.Rmd/GOODPOOReg_hist-1.pdf}
53 |     \label{fig:goodpoor-hist}
54 |     \caption{Density histograms of $p$ values for good-precision, poor-precision, and combined observations}
55 |  \end{subfigure}
56 | \begin{subfigure}{\textwidth}
57 | \centering\includegraphics[width=4in]{../analysis/figure/make_GOODPOOR_figs.Rmd/GOODPOOReg_scatter-1.pdf}
58 |     \caption{Comparison of results of different methods applied to good-precision observations only ($x$ axis) and combined data ($y$ axis). Each point shows the ``significance" ($q$ values from \qvalue; $\lfdr$ for \locfdr; $\lfsr$ for \ashr) of a good-precision observation under the two different analyses.} \label{fig:goodpoor-scatter}
59 | \end{subfigure}
60 | \caption{Simulation illustrating how, for existing FDR methods,
61 | poor-precision observations can contaminate signal from good-precision observations. The top panel (a) illustrates that when
62 | $p$ values from good-precision observations (left) and from poor-precision observations (center) are combined (right), they produce
63 | a distribution of $p$ values with less overall signal - and so, by conventional methods, will give a higher estimated FDR at any given threshold.
64 | The bottom panel (b) illustrates this behavior directly for the methods \qvalue and \locfdr: the $q$-values from \qvalue and the $\lfdr$ estimates from \locfdr are higher when applied to all data than when applied to good-precision observations only. In contrast the methods described here (\ashr) produce effectively the same results (here, the lfsr) in the good-precision and combined data analyses.} \label{fig:goodpoor}
65 | \end{figure}
66 | \efloatseparator
67 |  
68 | \begin{figure}
69 | \centering\includegraphics[width=4in]{../analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR-1.pdf}
70 | \caption{Figure illustrating affects of prior assumptions on re-ordering of significance. Left panel shows results under our ``default prior" which assumes that effects $\beta_j$ are identically distributed, independent of $s_j$. Right panel shows results under the ``$p$-value prior", which assumes that $z$ scores $\beta_j/s_j$ are identically distributed, independent of $s_j$.} \label{fig:lfsr_pval}
71 | \end{figure}
72 | \efloatseparator
73 |  
74 | \begin{figure}
75 | \begin{center}
76 | \begin{subfigure}{\textwidth}
77 | \includegraphics[width=\textwidth]{../analysis/figure/plot_lfsr.Rmd/plot_lfsr_s_nn-1.pdf}
78 | \caption{Comparison of true and estimated $\lfsr$ when data are simulated with no point mass at zero ($\pi_0=0$), and also analyzed by \ashr with no point mass on 0 (and mixture of normal components for $g$). Black line is $y=x$ and red line is $y=2x$. The results illustrate how estimates of $\lfsr$ can be more accurate in this case. That is, assuming there is no point mass can be beneficial if that is indeed true.}  \label{fig:lfsr-nn}
79 | \end{subfigure}
80 | \begin{subfigure}{\textwidth}
81 | \includegraphics[width=\textwidth]{../analysis/figure/plot_lfsr.Rmd/plot_lfsr_s-1.pdf}
82 | \caption{Comparison of true and estimated $\lfsr$ when data are simulated with point mass at zero (drawn uniformly from [0,1] in each simulation), but analyzed by \ashr with no point mass on 0 (and mixture of normal components for $g$). Black line is $y=x$ and red line is $y=2x$. The results illustrate how estimates of $\lfsr$ can be anti-conservative if we assume there is no point mass when the truth is that there is a point mass.} \label{fig:lfsr-s}
83 | \end{subfigure}
84 | \end{center}
85 | \caption{Illustration of effects of excluding a point mass from the analysis.} \label{fig:lfsr-nopointmass}
86 | \end{figure}
87 | \efloatseparator
88 |  
89 | 


--------------------------------------------------------------------------------
/paper/main.lof:
--------------------------------------------------------------------------------
1 | \contentsline {figure}{\numberline {1}{\ignorespaces Illustration that the unimodal assumption (UA) in {\tt ashr}\xspace can produce very different results from existing methods. The figure shows, for a single simulated dataset, the way different methods decompose $p$ values (left) and $z$ scores (right) into a null component (dark blue) and an alternative component (cyan). In the $z$ score space the alternative distribution is placed on the bottom to highlight the differences in its shape among methods. The three existing methods ({\tt qvalue}\xspace , {\tt locfdr}\xspace , {\tt mixfdr}\xspace ) all effectively make the Zero Assumption, which results in a ``hole" in the alternative $z$ score distribution around 0. In contrast the method introduced here ({\tt ashr}\xspace ) makes the Unimodal Assumption -- that the effect sizes, and thus the $z$ scores, have a unimodal distribution about 0 -- which yields a very different decomposition. (In this case the {\tt ashr}\xspace decomposition is closer to the truth: the data were simulated under a model where all of the effects are non-zero, so the ``true" decomposition would make everything cyan.)\relax }}{38}
2 | \contentsline {figure}{\numberline {2}{\ignorespaces Results of simulation studies (constant precision $s_j=1$).\relax }}{39}
3 | \contentsline {figure}{\numberline {3}{\ignorespaces Comparisons of estimated cdfs of $g$ and true cdf of $g$. See Figure 2b\hbox {} for simulation scenarios.\relax }}{40}
4 | \contentsline {figure}{\numberline {4}{\ignorespaces Simulation illustrating how, for existing FDR methods, poor-precision observations can contaminate signal from good-precision observations. The top panel (a) illustrates that when $p$ values from good-precision observations (left) and from poor-precision observations (center) are combined (right), they produce a distribution of $p$ values with less overall signal - and so, by conventional methods, will give a higher estimated FDR at any given threshold. The bottom panel (b) illustrates this behavior directly for the methods {\tt qvalue}\xspace and {\tt locfdr}\xspace : the $q$-values from {\tt qvalue}\xspace and the $\textit {lfdr}$ estimates from {\tt locfdr}\xspace are higher when applied to all data than when applied to good-precision observations only. In contrast the methods described here ({\tt ashr}\xspace ) produce effectively the same results (here, the lfsr) in the good-precision and combined data analyses.\relax }}{41}
5 | \contentsline {figure}{\numberline {5}{\ignorespaces Figure illustrating affects of prior assumptions on re-ordering of significance. Left panel shows results under our ``default prior" which assumes that effects $\beta _j$ are identically distributed, independent of $s_j$. Right panel shows results under the ``$p$-value prior", which assumes that $z$ scores $\beta _j/s_j$ are identically distributed, independent of $s_j$.\relax }}{42}
6 | \contentsline {figure}{\numberline {6}{\ignorespaces Illustration of effects of excluding a point mass from the analysis.\relax }}{43}
7 | 


--------------------------------------------------------------------------------
/paper/main.lot:
--------------------------------------------------------------------------------
1 | \contentsline {table}{\numberline {1}{\ignorespaces Table of empirical coverage for nominal 95\% lower credible bounds\relax }}{45}
2 | \contentsline {table}{\numberline {2}{\ignorespaces Summary of simulation scenarios considered\relax }}{46}
3 | \contentsline {table}{\numberline {3}{\ignorespaces Table of empirical coverage for nominal 95\% lower credible bounds for methods {\it without} the penalty term). \relax }}{47}
4 | 


--------------------------------------------------------------------------------
/paper/main.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/paper/main.pdf


--------------------------------------------------------------------------------
/paper/main.ttt:
--------------------------------------------------------------------------------
 1 | \begin{table}[!ht]
 2 | \begin{subtable}{\textwidth}
 3 | \centering\input{../analysis/table/scoverage_all.tex}
 4 | \caption{All observations. Coverage rates are generally satisfactory, except for the extreme ``spiky" scenario. This is due to the penalty term (\ref{eqn:penalty}) which tends to cause over-shrinking towards zero. Removing this penalty term produces coverage rates closer to the nominal levels for uniform and normal methods (Table \ref{tab:nopen}). Removing the penalty in the half-uniform case is not recommended (see Appendix).}
 5 | \end{subtable}
 6 | 
 7 | \begin{subtable}{\textwidth}
 8 | \centering\input{../analysis/table/scoverage_neg.tex}
 9 | \caption{``Significant" negative discoveries. Coverage rates are generally satisfactory, except for the uniform-based methods in the spiky and near-normal scenarios,
10 | and the normal-based method in the flat-top scenario. These results likely reflect inaccurate estimates of the tails of $g$ due to a disconnect between the tail of $g$ and the component distributions in these cases. For example, the uniform methods sometimes substantially underestimate the length of the tail of $g$ in these long-tailed scenarios,
11 | causing over-shrinkage of the tail toward 0.}
12 | \end{subtable}
13 | 
14 | \begin{subtable}{\textwidth}
15 | \centering\input{../analysis/table/scoverage_pos.tex}
16 | \caption{``Significant" positive discoveries. Coverage rates are generally satisfactory, except for the symmetric methods under the asymmetric (``skew") scenario. }
17 | \end{subtable}
18 | 
19 | \caption{Table of empirical coverage for nominal 95\% lower credible bounds} \label{tab:coverage}
20 | \end{table}
21 | \efloatseparator
22 |  
23 | \begin{table}[!ht]
24 | \centering\begin{tabular}{c c } \toprule
25 | Scenario & Alternative distribution, $g_1$  \\ \midrule
26 | spiky & $0.4 N(0,0.25^2) + 0.2 N(0,0.5^2) + 0.2 N(0,1^2), 0.2 N(0,2^2) $\\
27 | near normal & $2/3 N(0,1^2) + 1/3 N(0,2^2)$ \\
28 | flattop& $(1/7) [N(-1.5,.5^2) + N(-1,.5^2) + N(-.5,.5^2) +$ \\
29 |  &  $N(0,.5^2) +N(0.5,.5^2)  +N(1.0,.5^2) + N(1.5,.5^2)]$  \\
30 | skew & $(1/4) N(-2,2^2) + (1/4) N(-1,1.5^2) +  (1/3) N(0,1^2) + (1/6) N(1,1^2) $\\
31 | big-normal & $N(0,4^2)$ \\
32 | bimodal & $0.5 N(-2,1^2) + 0.5 N(2,1^2)$ \\ \bottomrule
33 | \end{tabular}
34 | \caption{Summary of simulation scenarios considered} \label{table:scenarios}
35 | \end{table}
36 | \efloatseparator
37 |  
38 | \begin{table}[!ht]
39 | \begin{subtable}{\textwidth}
40 | \centering\input{../analysis/table/scoverage_all_nopen.tex}
41 | \caption{All observations}
42 | \end{subtable}
43 | 
44 | \begin{subtable}{\textwidth}
45 | \centering\input{../analysis/table/scoverage_neg_nopen.tex}
46 | \caption{``Significant" negative discoveries.}
47 | \end{subtable}
48 | 
49 | \begin{subtable}{\textwidth}
50 | \centering\input{../analysis/table/scoverage_pos_nopen.tex}
51 | \caption{``Significant" positive discoveries.}
52 | \end{subtable}
53 | 
54 | \caption{Table of empirical coverage for nominal 95\% lower credible bounds for methods {\it without} the penalty term). } \label{tab:nopen}
55 | \end{table}
56 | \efloatseparator
57 |  
58 | 


--------------------------------------------------------------------------------
/talks/README.md:
--------------------------------------------------------------------------------
1 | Diretory for talks
2 | 


--------------------------------------------------------------------------------