├── .Rprofile ├── .gitignore ├── Makefile ├── R ├── README.md ├── nullalthist.R ├── plot_FDReg_hist.R └── set_plot_colors.R ├── README.md ├── analysis ├── CompareEEvsETmodel.Rmd ├── CompareEEvsETmodel.html ├── IPvsEM.Rmd ├── IPvsEM.html ├── Makefile ├── adaptive_shrinkage.Rmd ├── adaptive_shrinkage.html ├── ash_npmle.Rmd ├── ash_npmle.html ├── checkIP.Rmd ├── checkIP.html ├── check_mixfdr_lownoise.Rmd ├── check_mixfdr_lownoise.html ├── chunk-options.R ├── efron.fcr.Rmd ├── efron.fcr.html ├── estimate_mode.Rmd ├── estimate_mode.html ├── figure │ ├── IPvsEM.Rmd │ │ ├── unnamed-chunk-2-1.png │ │ ├── unnamed-chunk-3-1.png │ │ └── unnamed-chunk-4-1.png │ ├── adaptive_shrinkage.Rmd │ │ ├── plotting lfsr-1.png │ │ ├── plotting-1.png │ │ ├── run-ash-ET-1.png │ │ ├── run-ash-ET-2.png │ │ ├── sehist-1.png │ │ ├── unnamed-chunk-1-1.png │ │ └── volcano-1.png │ ├── ash_npmle.Rmd │ │ ├── unnamed-chunk-1-1.png │ │ ├── unnamed-chunk-1-2.png │ │ └── unnamed-chunk-2-1.png │ ├── checkIP.Rmd │ │ ├── unnamed-chunk-1-1.png │ │ └── unnamed-chunk-2-1.png │ ├── check_mixfdr_lownoise.Rmd │ │ ├── unnamed-chunk-1-1.png │ │ └── unnamed-chunk-2-1.png │ ├── efron.fcr.Rmd │ │ ├── unnamed-chunk-2-1.png │ │ ├── unnamed-chunk-3-1.png │ │ ├── unnamed-chunk-3-2.png │ │ ├── unnamed-chunk-4-1.png │ │ ├── unnamed-chunk-4-2.png │ │ ├── unnamed-chunk-4-3.png │ │ ├── unnamed-chunk-5-1.png │ │ ├── unnamed-chunk-5-2.png │ │ ├── unnamed-chunk-5-3.png │ │ ├── unnamed-chunk-6-1.png │ │ ├── unnamed-chunk-7-1.png │ │ └── unnamed-chunk-8-1.png │ ├── investigate_hu_badcoverage.Rmd │ │ ├── unnamed-chunk-4-1.png │ │ ├── unnamed-chunk-4-2.png │ │ ├── unnamed-chunk-6-1.png │ │ └── unnamed-chunk-6-2.png │ ├── make_GOODPOOR_figs.Rmd │ │ ├── GOODPOOReg_hist-1.pdf │ │ ├── GOODPOOReg_scatter-1.pdf │ │ ├── lfsr_vs_pval_GOODPOOR-1.pdf │ │ ├── lfsr_vs_pval_GOODPOOR-2.pdf │ │ ├── lfsr_vs_pval_GOODPOOR_single-1.pdf │ │ ├── lfsr_vs_pval_GOODPOOR_single-2.pdf │ │ ├── lfsr_vs_pval_GOODPOOR_single-3.pdf │ │ ├── lfsr_vs_pval_GOODPOOR_single-4.pdf │ │ ├── roc-curve-1.pdf │ │ ├── tp_vs_fp-1.pdf │ │ └── unnamed-chunk-3-1.png │ ├── makefig_FDReg.Rmd │ │ ├── decomp_ZA-1.pdf │ │ ├── decomp_ZA_poster-1.pdf │ │ ├── unnamed-chunk-1-1.png │ │ └── unnamed-chunk-2-1.png │ ├── plot_cdf_eg.Rmd │ │ ├── egcdf-1.pdf │ │ ├── egcdf-reduce-1.pdf │ │ ├── egcdf-reduce-with-npmle-1.pdf │ │ ├── egcdf-reduce-with-npmle-2.pdf │ │ ├── mean_cdf-1.pdf │ │ ├── mean_cdf-reduce-1.pdf │ │ ├── mean_cdf_nopen-1.pdf │ │ └── mean_cdf_nopen-reduce-1.pdf │ ├── plot_egdens.Rmd │ │ └── scenario_density-1.pdf │ ├── plot_lfsr.Rmd │ │ ├── plot_lfdr-1.pdf │ │ ├── plot_lfdr-1.png │ │ ├── plot_lfsr-1.pdf │ │ ├── plot_lfsr-1.png │ │ ├── plot_lfsr_s-1.pdf │ │ ├── plot_lfsr_s-1.png │ │ ├── plot_lfsr_s_nn-1.pdf │ │ └── plot_lfsr_s_nn-1.png │ ├── plot_pi0est.Rmd │ │ └── plot_pi0est-1.pdf │ ├── referee.response.Rmd │ │ ├── unnamed-chunk-1-1.png │ │ └── unnamed-chunk-1-2.png │ ├── referee_uaza.Rmd │ │ ├── unnamed-chunk-2-1.png │ │ ├── unnamed-chunk-3-1.png │ │ ├── unnamed-chunk-4-1.png │ │ └── unnamed-chunk-5-1.png │ └── summarize_dsc_znull.Rmd │ │ ├── unnamed-chunk-2-1.png │ │ ├── unnamed-chunk-2-2.png │ │ └── unnamed-chunk-2-3.png ├── index.Rmd ├── index.html ├── investigate_hu_badcoverage.Rmd ├── investigate_hu_badcoverage.html ├── make_GOODPOOR_figs.Rmd ├── make_GOODPOOR_figs.html ├── makefig_FDReg.Rmd ├── makefig_FDReg.html ├── metaplot_examples.Rmd ├── metaplot_examples.html ├── plot_cdf_eg.Rmd ├── plot_cdf_eg.html ├── plot_egdens.Rmd ├── plot_egdens.html ├── plot_egdens2.Rmd ├── plot_egdens2.html ├── plot_lfsr.Rmd ├── plot_lfsr.html ├── plot_pi0est.Rmd ├── plot_pi0est.html ├── referee.response.Rmd ├── referee.response.html ├── referee_uaza.Rmd ├── referee_uaza.html ├── summarize_coverage.Rmd ├── summarize_coverage.html ├── summarize_dsc_znull.Rmd ├── summarize_dsc_znull.html ├── table │ ├── README.md │ ├── coverage_all.tex │ ├── coverage_all_nopen.tex │ ├── coverage_neg.tex │ ├── coverage_neg_nopen.tex │ ├── coverage_pos.tex │ ├── coverage_pos_nopen.tex │ ├── scoverage_all.tex │ ├── scoverage_all_nopen.tex │ ├── scoverage_neg.tex │ ├── scoverage_neg_nopen.tex │ ├── scoverage_pos.tex │ └── scoverage_pos_nopen.tex ├── template.Rmd └── template.html ├── ash-packrat.Rproj ├── code ├── Makefile ├── README.md ├── dsc-opt │ ├── methods │ │ └── ash.multiopt.wrapper.R │ └── run_dsc_opt.R └── dsc-shrink │ ├── add_methods.R │ ├── add_methods.null.R │ ├── add_named_scenarios.R │ ├── add_scenarios.null.R │ ├── datamakers │ ├── datamaker.R │ ├── null_t_datamaker.R │ └── null_z_datamaker.R │ ├── methods │ ├── ash.wrapper.R │ ├── locfdr.wrapper.R │ ├── mixfdr.wrapper.R │ └── qvalue.wrapper.R │ ├── run_dsc.R │ ├── run_dsc_mini.R │ ├── run_dsc_znull.R │ └── score.R ├── data └── README.md ├── output └── README.md ├── packrat ├── init.R ├── packrat.lock ├── packrat.opts └── src │ ├── mixfdr │ └── mixfdr_1.0.tar.gz │ └── packrat │ └── packrat_0.4.4.tar.gz ├── paper ├── Makefile ├── README.md ├── main.bbl ├── main.blg ├── main.dvi ├── main.fff ├── main.lof ├── main.lot ├── main.pdf ├── main.tex └── main.ttt └── talks └── README.md /.Rprofile: -------------------------------------------------------------------------------- 1 | #### -- Packrat Autoloader (version 0.4.4) -- #### 2 | source("packrat/init.R") 3 | #### -- End Packrat Autoloader -- #### 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.Rout 2 | paper/*.aux 3 | paper/*.log 4 | output/* 5 | packrat/lib*/ 6 | figure/*/*.pdf 7 | *.png 8 | .Rhistory 9 | .Rproj.user 10 | packrat/src/ 11 | .DS_Store 12 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all clean output analysis paper 2 | 3 | all: output analysis paper 4 | 5 | clean: 6 | cd analysis && $(MAKE) clean 7 | 8 | output: 9 | cd code && $(MAKE) 10 | 11 | analysis: 12 | cd analysis && $(MAKE) 13 | 14 | paper: 15 | cd paper && $(MAKE) 16 | 17 | -------------------------------------------------------------------------------- /R/README.md: -------------------------------------------------------------------------------- 1 | Directory for R scripts that may be used in analysis 2 | -------------------------------------------------------------------------------- /R/nullalthist.R: -------------------------------------------------------------------------------- 1 | #plot a histogram of z scores, highlighting the alternative distribution 2 | #of z scores that is implied by localfdr values lfdr. 3 | nullalthist = function(z,lfdr,nullcol="blue",altcol="cyan",ncz=100,...){ 4 | h=hist(z, freq=FALSE,col=nullcol,nclass=ncz,...) 5 | avlfdr = unlist(lapply(split(lfdr,cut(z,h$breaks),drop=FALSE),mean)) 6 | h$density = (1-avlfdr) * h$density 7 | plot(h,add=TRUE,col=altcol,freq=FALSE) 8 | } 9 | 10 | #this one puts the null on the bottom 11 | altnullhist = function(z,lfdr,nullcol="blue",altcol="cyan",ncz=100,...){ 12 | h=hist(z, freq=FALSE,col=altcol,nclass=ncz,...) 13 | avlfdr = unlist(lapply(split(lfdr,cut(z,h$breaks),drop=FALSE),mean)) 14 | h$density = avlfdr * h$density 15 | plot(h,add=TRUE,col=nullcol,freq=FALSE) 16 | } 17 | 18 | plotall_hist=function(sim,iter=1,histfun=nullalthist){ 19 | hh.zscore=sim$zscore[[iter]] 20 | par(mfcol=c(2,2)) 21 | histfun(hh.zscore,sim$betahat.fdrtool[[iter]]$lfdr,main="fdrtool") 22 | histfun(hh.zscore,sim$betahat.locfdr[[iter]]$fdr,main="locfdr") 23 | histfun(hh.zscore,sim$betahat.mixfdr[[iter]]$fdr,main="mixfdr") 24 | histfun(hh.zscore,sim$betahat.ash.n[[iter]]$lfdr,main="ash") 25 | par(mfcol=c(1,1)) 26 | } 27 | -------------------------------------------------------------------------------- /R/plot_FDReg_hist.R: -------------------------------------------------------------------------------- 1 | plot_FDReg_hist = function(hh.pval,pi0,nc=40,nullcol="blue",altcol="cyan",type=4,textsize=1.2,title="Distribution of p values",...){ 2 | hh.hist=hist(hh.pval,freq=FALSE,xlab="p value",main=title,nclass=nc,col=altcol,...) 3 | if(type>1){ 4 | abline(h=pi0,col=nullcol,lwd=2) 5 | 6 | hh.hist$density=rep(pi0,length(hh.hist$density)) 7 | #hh.hist$counts=rep(hh.q$pi0*length(hh.pval)/nc,length(hh.hist$counts)) 8 | plot(hh.hist,add=TRUE,col=nullcol,freq=FALSE) 9 | } 10 | if(type>2){ 11 | abline(v=0.1,lwd=2,col=2) 12 | } 13 | if(type>3){ 14 | text(0.05,1.2,labels="A",col=2,cex=1.2) 15 | text(0.05,0.4,labels="B",col=2,cex=1.2) 16 | text(0.6,3,labels=paste0("FDR = B/(A+B) = ",round(pi0*0.1*length(hh.pval)/sum(hh.pval<0.1),2)),cex=textsize) 17 | } 18 | } 19 | 20 | 21 | -------------------------------------------------------------------------------- /R/set_plot_colors.R: -------------------------------------------------------------------------------- 1 | library("RColorBrewer") 2 | library("ggplot2") 3 | myColors <- brewer.pal(8,"Set1") 4 | names(myColors) <- c("truth","ash.hu","ash.n","ash.u","qvalue","locfdr","mixfdr.tnull","NPMLE") 5 | colScale <- scale_colour_manual(name = "method",values = myColors) 6 | 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | This repo is intended to contain code to reproduce results 4 | from the paper, *False Discovery Rates: A New Deal*. 5 | 6 | The hardest part may be making sure R is set up with all the right 7 | packages installed. I have used the R package `packrat` to try to 8 | make this easier, but on some systems it may be easier to install all 9 | the packages you need by hand. If things go wrong, look also at 10 | "If things go wrong" below. 11 | 12 | ### Step-by-step instructions 13 | 14 | 1. Preliminaries: I made the paper with `R v3.2.3`, so you might start 15 | by installing this version. Or more recent versions should (!) also 16 | work. Install `pandoc v1.12.3` or higher. You will also need a working 17 | `pdflatex` installation to make the paper. The `Rmosek` package 18 | [`mosek`](https://www.mosek.com/resources/downloads) needs to be 19 | installed, so do that now, too. Don't forget to follow instructions 20 | regarding the license file. Possibly you will come across other 21 | dependencies as you run the following steps. 22 | 23 | 2. Clone (or download and unzip) this repository. 24 | 25 | 3. Install the `R` packages you need. I have tried to use the 26 | [`packrat`](https://rstudio.github.io/packrat/) package to automate 27 | this process, with some measure of success. To do it this way, start up 28 | `R` (e.g. from the command line) within the repository directory. The 29 | first time you enter `R` the hidden `.Rprofile` file will cause `R` to 30 | try to install all the packages you need to a local library in the 31 | `packrat` subdirectory. (Specifically it should create a `packrat/lib` 32 | directory with more files in a subdirectory whose name will depend on 33 | your architecture.) 34 | 35 | **To do:** Mention that the data generation scripts use the `dscr` 36 | package; see [here](http://github.com/stephens999/dscr). 37 | 38 | If this does not work first time - e.g. because you don't have some 39 | dependencies installed - then install the dependencies and try again. 40 | This time on entering `R` you will have to tell `packrat` to try again 41 | yourself by typing `packrat::restore()`. If this still does not work 42 | for you, or you already have the packages you need installed then you 43 | may prefer to remove the packrat subdirectory and install the packages 44 | you need yourself. Quit `R`. 45 | 46 | 4. Within the repository directory type `make clean`. This will remove 47 | figure etc files that I have already included in the repository. 48 | 49 | 5. Within the repository directory type `make`. This will try to: 50 | 51 | i) Run all the code for the simulation studies. It will take a 52 | while (hours), so you might want to run it overnight. This should 53 | create a bunch of output files in the `output` directory. Particularly 54 | you will know that it worked iff you can find the files 55 | `dsc-shrink-files/res.RData` and `dsc-robust-files/dsc_robust.RData`. 56 | 57 | ii) Build/render the .Rmd files in the `analysis` directory. If 58 | successful you should have a file `analysis/index.html` that you 59 | can open to see a list of all the rendered files. 60 | 61 | iii) `pdflatex` the paper. 62 | 63 | If you have problems (more than likely!) you might like to try each of 64 | these steps in turn, by sequentially typing `make output`, `make 65 | analysis`, and `make paper`. 66 | 67 | # If things go wrong 68 | 69 | - If you have trouble installing Rmosek, maybe 70 | [this](http://r-forge.r-project.org/scm/viewvc.php/*checkout*/pkg/inst/doc/userguide.pdf?root=rmosek) 71 | will help. 72 | 73 | - Ultimately you don't need Rmosek to make things run - if you don't 74 | have Rmosek installed then the ashr package will use an EM algorithm 75 | instead. The results from this method are very similar to those from 76 | using the interior point method (but the interior point method is 77 | faster and provides better convergence). 78 | 79 | - If things go wrong in making the output files, try looking at the 80 | `.Rout` files created in the appropriate output subdirectory 81 | (`output/dsc-shrink/`, `output/dsc-znull` or `output/dsc-robust`) to 82 | see what went wrong. 83 | 84 | - If things go wrong in making the analysis files, try looking at the 85 | `.html` files produced to see what went wrong. 86 | 87 | # Directory Structure 88 | 89 | The directory structure here, and features of the `analysis` 90 | subdirectory (including the `Makefile`), are based on 91 | [https://github.com/jdblischak/singleCellSeq](https://github.com/jdblischak/singleCellSeq). Here's 92 | a brief summary of the directory structure. 93 | 94 | + analysis: Rmd files for investigations; will generate figures in 95 | `figure/` subdirectory 96 | 97 | + R: R scripts/functions used in analysis; no code that is actually 98 | run put here 99 | 100 | + output: largish files of data, typically created by code 101 | (e.g. post-processed data, simulations) 102 | 103 | + code: code used for preprocessing, generation of output files etc ; 104 | may be long-running 105 | 106 | + data: datasets that generally will not change once deposited 107 | 108 | + paper: the paper 109 | 110 | + packrat: a directory that contains information about all the R package used. 111 | See the R package `packrat` for more details. 112 | 113 | + talks: any presentations 114 | -------------------------------------------------------------------------------- /analysis/CompareEEvsETmodel.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Comparing Exchangeable Effects (EE) and Exchangeable Standardized Effects (ET) models" 3 | output: html_document 4 | --- 5 | 6 | The ashr package implements a shrinkage-based Empirical Bayes method for 7 | estimating the values of effects $\beta_j$ based on estimates of the effects 8 | ($\hat{\beta}_j$) and their standard errors ($s_j$). 9 | 10 | The default model is that the effects $\beta_j$ are identically distributed 11 | from a unimodal distribution $g$. In particular, ashr assumes that $\beta_j$ 12 | is independent of the standard error $s_j$. That is, 13 | 14 | $$\beta_j | s_j \sim g(\cdot) \quad (*).$$ 15 | 16 | Here we consider the more general model: 17 | 18 | $$\beta_j/s_j^\alpha | s_j \sim g(\cdot) \quad (**).$$ 19 | 20 | The case $\alpha = 0$ is the model (*) above. The case $\alpha = 1$ assumes 21 | that the *t* statistics $\beta_j/s_j$ are identically distributed from 22 | a unimodal distribution. Under this assumption the expected size of the 23 | *unstandardized* effect $\beta_j$ depends on the standard error $s_j$; the 24 | larger $s_j$ is, the larger (in absolute value) $\beta_j$ is expected to be. 25 | 26 | By analogy with Wen and Stephens (AoAS), we refer to the model (*) as the 27 | EE model ("exchangeable effects"), and the model (**) as the ET model 28 | ("exchangeable T statistics model"). 29 | 30 | What might motivate the ET model? We can provide two distinct motivations. 31 | First, suppose for concreteness that the effects $\beta_j$ reflect differences 32 | in gene expression between two conditions. One factor that affects $s_j$ is 33 | the variance of gene $j$ within each condition. One could imagine that, perhaps, 34 | genes with a larger variance within each condition are less tightly regulated, 35 | and therefore more likely to show a large difference between conditions 36 | (*i.e.* large $\beta_j$) than genes with a small variance. This provides 37 | a biological motivation for the possibility that larger $s_j$ might 38 | correlate with larger $\beta_j$ (although of course not for the exact 39 | functional form above). 40 | 41 | A second motivation is more statistical: it turns out that this assumption is 42 | in some sense the implicit assumption made by existing methods to fdr analysis 43 | based on p values. More specifically, under this alternative assumption, when 44 | attempting to identify "significant" effects, the empirical Bayes approach 45 | will rank the genes in the same way as the usual $p$ values computed from 46 | $\hat{\beta}_j/s_j$. 47 | 48 | It is straightforward to use the ashr package to perform analysis under the 49 | ET model: simply specify alpha = 1. (Internally, this causes ash to replace 50 | betahat with the standardized betahat, $\hat{\beta}_j/s_j$, and the standard 51 | errors for these standarized betahat values with 1; there there is some 52 | bookkeeping to be done to make sure we return the right likelihoods and 53 | posteriors for the original beta, and not for these standardized values... 54 | ash takes care of this.) It is also straightforward to compare 55 | the two competing modelling assumptions (EE vs ET) by computing the 56 | log-likelihood ratio, 57 | 58 | $$\log\{p_{EE}(\hat{\beta} | s, \hat{g}_{EE}) / p_{ET}(\hat{\beta} | s, \hat{g}_{ET})\}.$$ 59 | 60 | We now illustrate by a simulated example. We assume that the standard errors 61 | come from a gamma distribution, and then generate the effects $\beta_j$ under 62 | the ET model so that genes with bigger standard errors tend to have bigger 63 | effects. 64 | 65 | ```{r generate_data} 66 | set.seed(1234) 67 | nsamp = 1000 68 | betahat.se = rgamma(nsamp,1,1) 69 | beta = betahat.se * rnorm(nsamp) 70 | betahat = rnorm(nsamp,beta,betahat.se) 71 | zscore = betahat/betahat.se 72 | pval = pchisq(zscore^2,df=1,lower.tail=FALSE) 73 | plot(betahat,-log(pval),pch = 20) 74 | ``` 75 | 76 | Here is the EE analysis. 77 | 78 | ```{r analysis_ee} 79 | library(ashr) 80 | ashEE.res = ash(betahat,betahat.se,method = "fdr",alpha = 0) 81 | ``` 82 | 83 | And here is how we can perform the ET analysis. 84 | 85 | ```{r analysis_et} 86 | ashET.res = ash(betahat,betahat.se,method = "fdr",alpha = 1) 87 | ``` 88 | 89 | Now we compare the EE vs ET models: 90 | ```{r compare_analyses} 91 | print(ashEE.res$loglik - ashET.res$loglik) 92 | ``` 93 | 94 | Then the log likelihood ratio is loglikEE - loglikET = 95 | `r ashEE.res$loglik - ashET.res$loglik`. This highly negative loglikelihood 96 | indicates that the data strongly favor the ET model, which is expected 97 | because the data were generated under this model. (One might be tempted to ask 98 | whether the log likelihood ratio is "significant". We don't know how to address 99 | this question, but suggest in practice it doesn't matter: if the loglikelihood 100 | ratio is positive then use EE, if it is negative then use ET.) 101 | 102 | The above illustrates these ideas on simulations from the ET model. For 103 | comparison, we now provide results for simulations under the EE model. 104 | 105 | ```{r generate_data_from_ee_model} 106 | set.seed(1234) 107 | samp = 1000 108 | betahat.se = rgamma(nsamp,1,1) 109 | beta = rnorm(nsamp) 110 | betahat = rnorm(nsamp,beta,betahat.se) 111 | zscore = betahat/betahat.se 112 | pval = pchisq(zscore^2,df = 1,lower.tail = FALSE) 113 | ashEE.res = ash(betahat,betahat.se,method = "fdr",alpha = 0) 114 | ashET.res = ash(betahat,betahat.se,method = "fdr",alpha = 1) 115 | print(ashEE.res$loglik - ashET.res$loglik) 116 | ``` 117 | 118 | So here the log likelihood ratio is positive, indicating that the EE model 119 | is preferred (which is expected since the data were generated under that 120 | model). 121 | -------------------------------------------------------------------------------- /analysis/IPvsEM.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "A brief comparison of speed of Interior Point and EM algorithms" 3 | author: "Matthew Stephens" 4 | date: 2016-01-26 5 | --- 6 | 7 | **Last updated:** `r Sys.Date()` 8 | 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)` 10 | 11 | First, we load the necessary libraries. 12 | 13 | ```{r packages} 14 | library(REBayes) 15 | library(ashr) 16 | library(ggplot2) 17 | ``` 18 | 19 | ```{r chunk_options, include=FALSE} 20 | # Specify settings for displaying the plots in the rendered document. 21 | source("chunk-options.R") 22 | ``` 23 | 24 | ## Set up simulation 25 | 26 | The following simulation assumes $\beta \sim N(0,\sigma_{\sf betasd}^2)$ and 27 | with $\hat{\beta}$ having standard deviation 1. 28 | 29 | ```{r simulation_setup} 30 | timed_sims = function(ash.args,nsim = 20,nmin = 100,nmax = 1e3,betasd = 1) { 31 | n = 10^seq(log10(nmin),log10(nmax),length = nsim) 32 | elapsed.time = rep(0,nsim) 33 | for (i in 1:nsim) { 34 | set.seed(i) 35 | with(ash.args,cat(sprintf("%6s %13s %3d\n",method,optmethod,i))) 36 | betahat = rnorm(n[i],sd = sqrt(1 + betasd^2)) 37 | elapsed.time[i] = 38 | system.time(do.call(ash,args = modifyList(ash.args, 39 | list(betahat = betahat, sebetahat = 1))))[3] 40 | } 41 | return(data.frame(elapsed.time = elapsed.time,seed = 1:nsim,n = n)) 42 | } 43 | ``` 44 | 45 | Now run a simulation study, for $n$ (number of tests; $p$ in paper) in 46 | range 10 to 100,000. Note that the warnings are being generated by the EM 47 | algorithm in big problems due to lack of convergence. 48 | 49 | ```{r model_fitting, results='hide'} 50 | df = data.frame() 51 | cat("method --optmethod-- sim\n") 52 | for(method in c("fdr","shrink")) { 53 | for(optmethod in c("mixIP","cxxMixSquarem")) { 54 | df = rbind(df,data.frame(method = method,optmethod = optmethod, 55 | timed_sims(list(method = method,optmethod = optmethod), 56 | nsim = 50,nmin = 10,nmax = 1e5))) 57 | } 58 | } 59 | cat("\n") 60 | ``` 61 | 62 | Summarize the model fitting results in a table. 63 | 64 | ```{r model_fitting_summary} 65 | cat("Average computation time (in seconds):\n") 66 | print(as.table(by(df,df[c("method","optmethod")], 67 | function (x) mean(x$elapsed.time)))) 68 | ``` 69 | 70 | ## Results 71 | 72 | Now plot time as a function of $n$: 73 | 74 | ```{r plot_results} 75 | qplot(x = n,y = elapsed.time,data = df,col = optmethod,facets = .~method, 76 | ylab = "elapsed time (s)") 77 | ``` 78 | 79 | Zoom-in on "small" problems with $n < 5000$. 80 | 81 | ```{r plot_results_magnified} 82 | qplot(x = n,y = elapsed.time, data = subset(df,n < 5000), 83 | col = optmethod,facets = .~method,ylab = "elapsed time (s)") 84 | ``` 85 | 86 | ## Summary 87 | 88 | The IP method clearly scales to large problem much better than EM. It is faster, 89 | and also more reliable (sometimes reaches higher log-likelihood than EM, 90 | never smaller); see [checkIP.html](checkIP.html). 91 | 92 | However, for small problems (n < 5000) the EM is adequate for many practical 93 | purposes, solving within a few seconds. This is particularly true for the 94 | penalty term (method = fdr), which helps the EM converge, presumably because 95 | it is helping identifiability, removing the large flat parts of the likelihood 96 | objective function. 97 | 98 | Indeed, for small problems with the penalty term (n < 2000) EM with squarem 99 | acceleration is a little faster in these comparisons than IP. 100 | 101 | ## Session information 102 | 103 | ```{r info} 104 | sessionInfo() 105 | ``` 106 | -------------------------------------------------------------------------------- /analysis/Makefile: -------------------------------------------------------------------------------- 1 | HTML_FILES := $(patsubst %.Rmd, %.html ,$(wildcard *.Rmd)) \ 2 | $(patsubst %.md, %.html ,$(wildcard *.md)) 3 | 4 | all: html 5 | 6 | 7 | html: $(HTML_FILES) 8 | 9 | %.html: %.Rmd 10 | export R_LIBS_USER=../packrat/lib/*/*; R --slave -e "set.seed(100);rmarkdown::render('$<')" 11 | 12 | %.html: %.md 13 | export R_LIBS_USER=../packrat/lib/*/*; R --slave -e "set.seed(100);rmarkdown::render('$<')" 14 | 15 | .PHONY: clean 16 | clean: 17 | $(RM) $(HTML_FILES) 18 | $(RM) -r figure/* 19 | $(RM) table/*.tex 20 | -------------------------------------------------------------------------------- /analysis/adaptive_shrinkage.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Illustration of Adaptive Shrinkage" 3 | author: "Matthew Stephens" 4 | date: 2016-09-29 5 | --- 6 | 7 | **Last updated:** `r Sys.Date()` 8 | 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)` 10 | 11 | The goal here is to illustrate the "adaptive" nature of the adaptive 12 | shrinkage. The shrinkage is adaptive in two senses. First, the amount 13 | of shrinkage depends on the distribution $g$ of the true effects, 14 | which is learned from the data: when $g$ is very peaked about zero 15 | then ash learns this and deduces that signals should be more strongly 16 | shrunk towards zero than when $g$ is less peaked about zero. Second, 17 | the amount of shrinkage of each observation depends on its standard 18 | error: the smaller the standard error, the more informative the data, 19 | and so the less shrinkage that occurs. From an Empirical Bayesian 20 | perspective both of these points are entirely natural: the posterior 21 | depends on both the prior and the likelihood; the prior, $g$, is 22 | learned from the data, and the likelihood incorporates the standard 23 | error of each observation. 24 | 25 | First, we load the necessary libraries. 26 | 27 | ```{r packages} 28 | library(REBayes) 29 | library(ashr) 30 | library(ggplot2) 31 | library(scales) 32 | ``` 33 | 34 | ```{r chunk_options, include=FALSE} 35 | # Specify settings for displaying the plots in the rendered document. 36 | source("chunk-options.R") 37 | ``` 38 | 39 | We simulate from two scenarios: in the first scenario, the effects are more 40 | peaked about zero (**sim.spiky**); in the second scenario, the effects are 41 | less peaked at zero (**sim.bignormal**). A summary of the two data sets is 42 | printed at the end of this chunk. 43 | 44 | ```{r initialize} 45 | set.seed(100) 46 | source("../code/dsc-shrink/datamakers/datamaker.R") 47 | 48 | NSAMP = 1000 49 | s = 1/rgamma(NSAMP,5,5) 50 | 51 | sim.spiky = 52 | rnormmix_datamaker(args = list(g = normalmix(c(0.4,0.2,0.2,0.2), 53 | c(0,0,0,0), 54 | c(0.25,0.5,1,2)), 55 | min_pi0 = 0, 56 | max_pi0 = 0, 57 | nsamp = NSAMP, 58 | betahatsd = s)) 59 | 60 | sim.bignormal = 61 | rnormmix_datamaker(args = list(g = normalmix(1,0,4), 62 | min_pi0 = 0, 63 | max_pi0 = 0, 64 | nsamp = NSAMP, 65 | betahatsd = s)) 66 | 67 | cat("Summary of observed beta-hats:\n") 68 | print(rbind(spiky = quantile(sim.spiky$input$betahat,seq(0,1,0.1)), 69 | bignormal = quantile(sim.bignormal$input$betahat,seq(0,1,0.1))), 70 | digits = 3) 71 | ``` 72 | 73 | Now we run ash on both data sets. 74 | 75 | ```{r run_ash} 76 | beta.spiky.ash = ash(sim.spiky$input$betahat,s) 77 | beta.bignormal.ash = ash(sim.bignormal$input$betahat,s) 78 | ``` 79 | 80 | Next we plot the shrunken estimates against the observed values, colored 81 | according to the (square root of) precision: precise estimates being colored 82 | red, and less precise estimates being blue. Two key features of the plots 83 | illustrate the ideas of adaptive shrinkage: i) the estimates under the spiky 84 | scenario are shrunk more strongly, illustrating that shrinkage adapts to the 85 | underlying distribution of beta; ii) in both cases, estimates with large 86 | standard error (blue) are shrunk more than estimates with small standard 87 | error (red) illustrating that shrinkage adapts to measurement precision. 88 | 89 | ```{r plot_shrunk_vs_obs} 90 | make_df_for_ashplot = 91 | function (sim1, sim2, ash1, ash2, name1 = "spiky", name2 = "big-normal") { 92 | n = length(sim1$input$betahat) 93 | x = c(get_lfsr(ash1),get_lfsr(ash2)) 94 | return(data.frame(betahat = c(sim1$input$betahat,sim2$input$betahat), 95 | beta_est = c(get_pm(ash1),get_pm(ash2)), 96 | lfsr = x, 97 | s = c(sim1$input$sebetahat,sim2$input$sebetahat), 98 | scenario = c(rep(name1,n),rep(name2,n)), 99 | signif = x < 0.05)) 100 | } 101 | 102 | ashplot = function(df,xlab="Observed beta-hat",ylab="Shrunken beta estimate") 103 | ggplot(df,aes(x = betahat,y = beta_est,color = 1/s)) + 104 | xlab(xlab) + ylab(ylab) + geom_point() + 105 | facet_grid(.~scenario) + 106 | geom_abline(intercept = 0,slope = 1,linetype = "dotted") + 107 | scale_colour_gradient2(midpoint = median(1/s),low = "blue", 108 | mid = "white",high = "red",space = "Lab") + 109 | coord_fixed(ratio = 1) 110 | 111 | df = make_df_for_ashplot(sim.spiky,sim.bignormal,beta.spiky.ash, 112 | beta.bignormal.ash) 113 | print(ashplot(df)) 114 | ``` 115 | 116 | Now plot lfsr against z scores, colored according to the (square root of) 117 | precision. 118 | 119 | ```{r plot_lfsr} 120 | z_lfsr_plot = function(df,ylab = "Observed Z score",xlab = "lfsr") 121 | ggplot(df,aes(x = lfsr,y = betahat/s,color = 1/s)) + 122 | xlab(xlab) + ylab(ylab) + geom_point() + facet_grid(.~scenario) + 123 | scale_colour_gradient2(midpoint = median(1/s),low = "blue", 124 | mid = "white",high = "red",space = "Lab") 125 | 126 | print(z_lfsr_plot(df)) 127 | ``` 128 | 129 | A related consequence is that significance of each observation is no longer 130 | monotonic with $p$ value. 131 | 132 | ```{r plot_pvalues} 133 | pval_plot = function (df) 134 | ggplot(df,aes(x = pnorm(-abs(betahat/s)),y = lfsr,color = log(s))) + 135 | geom_point() + facet_grid(.~scenario) + xlim(c(0,0.025)) + 136 | xlab("p value") + ylab("lfsr") + 137 | scale_colour_gradient2(midpoint = 0,low = "red", 138 | mid = "white",high = "blue") 139 | 140 | print(pval_plot(df)) 141 | ``` 142 | 143 | Let's see how these are affected by changing the modelling assumptions so that 144 | the *standardized* beta are exchangeable (rather than the beta being exchangeable). 145 | 146 | ```{r run_ash_ET} 147 | beta.bignormal.ash.ET = 148 | ash(sim.bignormal$input$betahat,s,alpha = 1,mixcompdist = "normal") 149 | beta.spiky.ash.ET = 150 | ash(sim.spiky$input$betahat,s,alpha = 1,mixcompdist = "normal") 151 | df.ET = make_df_for_ashplot(sim.spiky,sim.bignormal,beta.spiky.ash.ET, 152 | beta.bignormal.ash.ET) 153 | ashplot(df.ET,ylab = "Shrunken beta estimate (ET model)") 154 | pval_plot(df.ET) 155 | ``` 156 | 157 | This is a "volcano plot" showing effect size against p value. The blue points 158 | are "significant" in that they have lfsr < 0.05. 159 | 160 | ```{r volcano} 161 | print(ggplot(df,aes(x = betahat,y = -log10(2*pnorm(-abs(betahat/s))), 162 | col = signif)) + 163 | geom_point(alpha = 1,size = 1.75) + facet_grid(.~scenario) + 164 | theme(legend.position = "none") + xlim(c(-10,10)) + ylim(c(0,15)) + 165 | xlab("Effect (beta)") + ylab("-log10 p-value")) 166 | ``` 167 | 168 | In this case the significance by lfsr is not quite the same as cutting off 169 | at a given p value (you can see that the decision boundary is not quite the 170 | same as drawing a horizontal line), but also not that different, presumably 171 | because the standard errors, although varying across observations, do not 172 | vary greatly. 173 | 174 | ```{r sehist} 175 | hist(s,main = "histogram of standard errors") 176 | print(summary(s)) 177 | ``` 178 | 179 | ## Session information. 180 | 181 | ```{r info} 182 | print(sessionInfo()) 183 | ``` 184 | -------------------------------------------------------------------------------- /analysis/ash_npmle.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Using ash for NPMLE" 3 | author: "Matthew Stephens" 4 | date: 2016-05-31 5 | --- 6 | 7 | **Last updated:** `r Sys.Date()` 8 | 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)` 10 | 11 | First, we load the necessary libraries. 12 | 13 | ```{r packages} 14 | library(REBayes) 15 | library(ashr) 16 | ``` 17 | 18 | ```{r chunk_options, include=FALSE} 19 | # Specify settings for displaying the plots in the rendered document. 20 | source("chunk-options.R") 21 | ``` 22 | 23 | ## The NPMLE 24 | 25 | Here we show how we can use ash to (approximately) compute the unconstrained 26 | NPMLE - that is, estimate the underlying distribution $g$ by maximizing the 27 | likelihood without the unimodal constraint. See Koenker and Mizera, JASA 2014, 28 | for background. 29 | 30 | Within ash one can approximate the npmle as a mixture of uniforms on a dense 31 | grid of non-overlapping values - this results in a piecewise constant density 32 | with changes in the density only at the grid points. 33 | 34 | The following example comes from the REBayes vignette by Koenker and Gu. The 35 | underlying $g$ is a mixture of a point mass (weight 0.8) at 0 and a point 36 | mass (weight 0.2) at 2. 37 | ```{r fit_glmix_model} 38 | set.seed(102) 39 | y <- c(rep(0,800),rnorm(200,2)) + rnorm(1000) 40 | z <- GLmix(y) 41 | ``` 42 | 43 | Now we fit the NPMLE using 'ashr' and compare with the REBayes solution. 44 | ```{r fit_ash_model} 45 | grid = seq(from = min(z$x),to = max(z$x),length = 1000) 46 | k = length(grid) 47 | y.ash.npmle = 48 | ash(y,1,g = unimix(pi = rep(1/(k-1),(k-1)),a = grid[-k],b = grid[-1]), 49 | method = "shrink") 50 | 51 | # Plot the model fitted using GLmix against the fitted ash model. 52 | plot(z$x,cumsum(z$y)/sum(z$y),col = 2,main = "Estimated cdf",type = "l") 53 | lines(cdf.ash(y.ash.npmle,x = z$x),type = "l",lty = "dashed",lwd = 2) 54 | 55 | # Compare the likelihood given the GLmix model and given ash model. 56 | print(z$logLik,digits = 12) 57 | print(y.ash.npmle$loglik,digits = 12) 58 | ``` 59 | 60 | ## Session information 61 | 62 | ```{r info} 63 | sessionInfo() 64 | ``` 65 | -------------------------------------------------------------------------------- /analysis/checkIP.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Check interior point method" 3 | author: "Matthew Stephens" 4 | date: 2015-11-18 5 | --- 6 | 7 | **Last updated:** `r Sys.Date()` 8 | 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)` 10 | 11 | Here we compare the likelihoods achieved by the newly implemented 12 | interior point method against the EM algorithm. 13 | 14 | **Note:** the last code chunk ("compare_lfsr") may take several minutes to 15 | render in R or RStudio because it uses ggplot to plot more than a million data 16 | points. Also, it it may not be possible to run the last code chunk 17 | interactively in RStudio. 18 | 19 | First, we load the necessary libraries. 20 | 21 | ```{r packages} 22 | library(ggplot2) 23 | library(dplyr) 24 | library(reshape2) 25 | ``` 26 | 27 | ```{r chunk_options, include=FALSE} 28 | # Specify settings for displaying the plots in the rendered document. 29 | source("chunk-options.R") 30 | ``` 31 | 32 | ## Log-likelihood comparison 33 | 34 | The 'dsc-opt' script simply runs both methods (IP and EM) on the same 35 | data sets, and compares the difference in the log-likelihoods. We can 36 | see that in >90% of cases the difference is negligible. Then there is 37 | a tail of cases where the IP log-likelihood is higher - presumably 38 | where the EM doesn't really converge so well. 39 | 40 | ```{r compare_likelihoods} 41 | load("../output/dsc-opt-files/dsc_opt.RData") 42 | plot(ecdf(dsc_opt$res$diff1), 43 | main = "ECDF of log-likelihood difference (IP - EM)") 44 | ``` 45 | 46 | ## lfsr comparison 47 | 48 | However, what we really care about is inference quantities, such as 49 | the lfsr, rather than the log-likelihood. Here we compare the lfsr for 50 | the simulations where we ran both the IP method and EM method (nocxx) 51 | in dsc-shrink. 52 | 53 | ```{r compare_lfsr} 54 | load("../output/dsc-shrink-files/res.RData") 55 | 56 | # Select out the lfsr estimates for each method. 57 | df1 = res$lfsr %>% filter(method %in% c("ash.hu.nocxx")) %>% 58 | select(-user.self,-sys.self,-elapsed,-user.child,-sys.child) %>% 59 | melt(id.vars = c("method","scenario","seed",".id"), 60 | value.name = "lfsr.nocxx") 61 | df2 = res$lfsr %>% filter(method %in% c("ash.hu")) %>% 62 | select(-user.self,-sys.self,-elapsed,-user.child,-sys.child) %>% 63 | melt(id.vars = c("method","scenario","seed",".id"), 64 | value.name = "lfsr.IP") 65 | df = inner_join(df1,df2,by = c("scenario","seed","variable")) 66 | df = transform(df,scenario = factor(scenario)) 67 | print(ggplot(df,aes(lfsr.nocxx,lfsr.IP)) + 68 | geom_point(shape = 1) + 69 | facet_wrap(~ scenario,nrow = 2) + 70 | geom_abline(colour = "black") + 71 | xlab("EM algorithm (no cxx)") + 72 | ylab("IP method")) 73 | ``` 74 | 75 | ## Session information 76 | 77 | ```{r info} 78 | sessionInfo() 79 | ``` 80 | -------------------------------------------------------------------------------- /analysis/check_mixfdr_lownoise.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Check mixfdr in low noise simulation" 3 | author: "Matthew Stephens" 4 | date: 2015-10-26 5 | --- 6 | 7 | **Last updated:** `r Sys.Date()` 8 | 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)` 10 | 11 | Here we check that mixfdr actually does OK at fitting the bimodal distribution 12 | when noise is small compared with bimodality. 13 | 14 | ```{r chunk_options, include=FALSE} 15 | # Specify settings for displaying the plots in the rendered document. 16 | source("chunk-options.R") 17 | ``` 18 | 19 | Simulate bimodal data with well-separated peaks. The histogram shows the 20 | multi-modal nature of the underlying g is clear. 21 | 22 | ```{r sim_data} 23 | source("../code/dsc-shrink/datamakers/datamaker.R") 24 | sim.bimodal = 25 | rnormmix_datamaker(args = list(g = normalmix(c(0.5,0.5),c(-5,5),c(1,1)), 26 | min_pi0 = 0.4, 27 | max_pi0 = 0.4, 28 | nsamp = 1000, 29 | betahatsd = 1)) 30 | hist(sim.bimodal$input$betahat,nclass = 20,xlim = c(-10,10),prob = TRUE, 31 | main="histogram of simulated betahat values") 32 | ``` 33 | 34 | Now run mixfdr on the simulated data. Note that the fit captures the 35 | multi-modal nature of g, although mixfdr continues to overestimate pi0. 36 | 37 | ```{r fit_models} 38 | source("../code/dsc-shrink/methods/mixfdr.wrapper.R") 39 | sim.bimodal.mixfdroutput = 40 | mixfdr.wrapper(sim.bimodal$input,args = list(theonull = TRUE)) 41 | g = mixfdr2fitted.g(sim.bimodal.mixfdroutput)$fitted.g 42 | x = seq(-10,10,length = 100) 43 | par(cex.main = 1,font.main = 1) 44 | plot(ecdf(sim.bimodal$meta$beta),cex = 0.6, 45 | main = "CDF of true beta: empirical (black) and fitted (red)") 46 | lines(x,mixcdf(g,x),lwd = 2,lty = "dotted",col = "orangered") 47 | ``` 48 | 49 | ## Session information 50 | 51 | ```{r info} 52 | sessionInfo() 53 | ``` 54 | -------------------------------------------------------------------------------- /analysis/chunk-options.R: -------------------------------------------------------------------------------- 1 | # For more information on available chunk options, see 2 | # http://yihui.name/knitr/options#chunk_options 3 | 4 | library("knitr") 5 | opts_chunk$set(tidy = FALSE, 6 | comment = NA, 7 | fig.align = "center", 8 | fig.path = paste0("figure/", current_input(), "/")) 9 | 10 | knit_hooks$set(crop = hook_pdfcrop) 11 | -------------------------------------------------------------------------------- /analysis/efron.fcr.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Post-selection CI example, assymetric, not unimodal at 0" 3 | author: "Matthew Stephens" 4 | date: 2016-05-09 5 | --- 6 | 7 | **Last updated:** `r Sys.Date()` 8 | 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)` 10 | 11 | First, we load the necessary libraries. 12 | 13 | ```{r packages} 14 | library(ashr) 15 | library(qvalue) 16 | library(locfdr) 17 | ``` 18 | 19 | ```{r chunk_options, include=FALSE} 20 | # Specify settings for displaying the plots in the rendered document. 21 | source("chunk-options.R") 22 | ``` 23 | 24 | ## Introduction 25 | 26 | This example comes from Efron (2008) p16 when examining the false coverage rate 27 | (FCR). I selected this example because the distribution of the non-zero effects 28 | is highly assymetric and not at all unimodal at zero, both issues a referee 29 | asked me to elaborate on. Specifically, the distribution of the non-zero 30 | effects is N(-3,1). Here I simulate data, and apply ash (with the "halfuniform" 31 | option to allow for asymmetric g). 32 | 33 | ```{r fit_halfuniform_model} 34 | set.seed(10) 35 | nsamp = 1e4 36 | altmean = -3 37 | mu0 = rep(0,nsamp) 38 | mu1 = rnorm(nsamp,altmean,1) 39 | comp = rbinom(nsamp,1,0.1) 40 | mu = ifelse(comp == 0,mu0,mu1) 41 | z = rnorm(nsamp,mu,1) 42 | 43 | # Fit the model. 44 | res.ash = ash(z,1,mixcompdist = "halfuniform") 45 | ``` 46 | 47 | We can also run ash with the "true" g, to allow us to compare the lfsr, lfdr, 48 | etc. 49 | 50 | ```{r fit_normalmix_model} 51 | true.g = normalmix(c(0.9,0.1),c(0,-3),c(0,1)) 52 | res.ash.true = ash(z,1,g = true.g,fixg = TRUE) 53 | ``` 54 | 55 | Here we can see how the partition of $z$ scores compares with the truth. Note 56 | the effect of the unimodal assumption is to extend the inferred alternative 57 | distribution toward 0. Here, `nullalthist` plots a histogram of z scores, 58 | highlighting the alternative distribution of z scores that is implied by 59 | the localfdr values lfdr. 60 | 61 | ```{r plot_zscore_hist} 62 | source("../R/nullalthist.R") 63 | par(mfcol = c(2,1),cex.main = 1,font.main = 1) 64 | nullalthist(z,lfdr = get_lfdr(res.ash.true),main = "true partition (res.ash.true)") 65 | nullalthist(z,lfdr = get_lfdr(res.ash),main = "inferred partition (res.ash)") 66 | ``` 67 | 68 | Comparing the inferred posterior means, lfdr, and lfsr with the true values of 69 | these quantities, we find reassuringly good correspondence. 70 | 71 | ```{r scatterplots} 72 | par(mfrow = c(1,3)) 73 | plot(get_pm(res.ash.true),get_pm(res.ash),xlab = "Truth",ylab = "ash.hu", 74 | main = "Posterior Mean (inferred vs truth)",pch = 20,cex = 0.6, 75 | xlim = c(-6,1),ylim = c(-6,1)) 76 | abline(a = 0,b = 1,col = "orangered",lwd = 2,lty = "dotted") 77 | 78 | plot(get_lfdr(res.ash.true),get_lfdr(res.ash),xlab = "Truth", ylab = "ash.hu", 79 | main = "lfdr (inferred vs truth)",pch = 20,cex = 0.6, 80 | xlim = c(0,1),ylim = c(0,1)) 81 | abline(a = 0,b = 1,col = "orangered",lwd = 2,lty = "dotted") 82 | 83 | plot(get_lfsr(res.ash.true),get_lfsr(res.ash),xlab = "Truth", ylab = "ash.hu", 84 | main = "lfsr (inferred vs truth)",pch = 20,cex = 0.6, 85 | xlim = c(0,1),ylim = c(0,1)) 86 | abline(a = 0,b = 1,col = "orangered",lwd = 2,lty = "dotted") 87 | ``` 88 | 89 | ## Comparison with qvalue and locfdr 90 | 91 | We can also run qvalue and locfdr. We see that locfdr perhaps performs a bit 92 | better than ash for the decomposition here, but the estimated local fdrs are 93 | pretty similar. Here qvalue does less well because of the asymmetry which it 94 | didn't take account of. 95 | 96 | ```{r comparison_to_other_methods} 97 | res.locfdr = locfdr(z,nulltype = 0) 98 | res.qvalue = qvalue(p = pchisq(z^2,df = 1,lower.tail = FALSE)) 99 | 100 | par(mfrow = c(1,3)) 101 | plot(get_lfdr(res.ash.true),get_lfdr(res.ash),pch = 20,cex = 0.6, 102 | xlab = "Truth (lfdr)",ylab = "ash.hu",main = "ash.hu", 103 | xlim = c(0,1),ylim = c(0,1)) 104 | abline(a = 0,b = 1,col = "orangered",lty = "dotted",lwd = 2) 105 | 106 | plot(get_lfdr(res.ash.true),res.locfdr$fdr,pch = 20,cex = 0.6, 107 | xlab = "Truth (lfdr)",ylab = "Estimate",main = "locfdr", 108 | xlim = c(0,1),ylim = c(0,1)) 109 | abline(a = 0,b = 1,lwd = 2,col = "orangered",lty = "dotted") 110 | 111 | plot(get_lfdr(res.ash.true),res.qvalue$lfdr,pch = 20,cex = 0.6, 112 | xlab = "Truth (lfdr)",ylab = "Estimate",main = "qvalue", 113 | xlim = c(0,1),ylim = c(0,1)) 114 | abline(a = 0,b = 1,lwd = 2,col = "orangered",lty = "dotted") 115 | ``` 116 | 117 | ## Uniform tail curtails Credible Intervals 118 | 119 | The following plot compares the (symmetric-tail) 95% CIs from ash (red) for 120 | the "significant" observations with Bayes rule (green), similar to Figure 8 121 | from Efron. Note that the lower 97.5% point is pretty accurate, but the 122 | upper 97.5% point is curtailed - presumably due, at least in part, to the 123 | short tails of the uniform mixture. 124 | 125 | ```{r plot_credints} 126 | CImatrix = ashci(res.ash,level = 0.95) 127 | BayesComparePlot = function(CImatrix, altmean = -3,...) { 128 | plot(z,mu,xlim = c(-8,0),pch = 20,cex = 0.6,...) 129 | points(z,CImatrix[,1],col = "orangered",cex = 0.6,pch = 20) 130 | points(z,CImatrix[,2],col = "orangered",cex = 0.6,pch = 20) 131 | 132 | fdr = 0.9*dnorm(z)/(0.9*dnorm(z) + 0.1*dnorm(z,altmean,sqrt(2))) 133 | o = order(z) 134 | upper = ifelse(fdr[o] < 0.025, 135 | (z[o] + altmean)/2 + qnorm(0.975 + fdr[o])/sqrt(2),0) 136 | lines(z[o],upper,col = "limegreen",lwd = 2) 137 | lines(z[o],(z[o] + altmean)/2 - qnorm(0.975)/sqrt(2),col = "limegreen",lwd = 2) 138 | abline(v = max(z[fdr < 0.05])) 139 | } 140 | par(mfrow = c(1,1)) 141 | BayesComparePlot(CImatrix, 142 | main = "CIs for highly asymmetric and non-unimodal-at-zero data") 143 | ``` 144 | 145 | ## Variational version 146 | 147 | Although not a focus of the paper, ash does have an option to do variational 148 | inference for the mixture components (with a Dirichlet prior). In practice 149 | this approach usually ends up spreading the posterior mass up more among the 150 | mixture components. It seemed plausible that this might lead to slightly 151 | less extreme tail behaviour than above (because the model will put a little 152 | more weight on the uniforms with larger variance, which are essentially set 153 | to zero in the above). 154 | 155 | ```{r vb_model_fitting} 156 | res.ash.VB = ash(z,1,mixcompdist = "halfuniform",optmethod = "mixVBEM") 157 | CImatrix.VB = ashci(res.ash.VB,level = 0.95) 158 | ``` 159 | 160 | Again, we can compare results with Bayes rule: 161 | 162 | ```{r plot_vb_credints} 163 | BayesComparePlot(CImatrix.VB, 164 | main = paste("CIs for highly asymmetric and non-unimodal-at-zero data\n", 165 | "Variational Version")) 166 | ``` 167 | 168 | ## Session information 169 | 170 | ```{r info} 171 | sessionInfo() 172 | ``` 173 | -------------------------------------------------------------------------------- /analysis/estimate_mode.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Estimate Non-zero mode" 3 | author: "Matthew Stephens" 4 | date: 2016-02-01 5 | --- 6 | 7 | **Last updated:** `r Sys.Date()` 8 | 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)` 10 | 11 | The purpose of this analysis is to check the performance of the non-zero mode 12 | option. 13 | 14 | First, we load the necessary libraries. 15 | 16 | ```{r packages} 17 | library(ashr) 18 | ``` 19 | 20 | ```{r chunk_options, include=FALSE} 21 | # Specify settings for displaying the plots in the rendered document. 22 | source("chunk-options.R") 23 | ``` 24 | 25 | ## Simple simulation 26 | 27 | I conjecture that the mean of the posterior means should be close to the 28 | optimum for the mode... maybe even equal to it. (That is, this would constitute 29 | a fixed point for the update. We aren't explicitly using that directly in the 30 | current implementation; the uniform mixture uses `optim` to do it numerically; 31 | the normal mixture uses a true EM I think...) 32 | 33 | ```{r evaluate_mode_estimates} 34 | check_mode = function(betahat, sebetahat, mixcompdist) { 35 | z.ash = ash(betahat,sebetahat,mixcompdist = mixcompdist,mode = "estimate") 36 | average.posteriormean = mean(get_pm(z.ash)) 37 | fitted.mode = comp_mean(get_fitted_g(z.ash))[1] 38 | 39 | # Refit to get g. 40 | z.ash1 = ash(betahat - fitted.mode,sebetahat,mixcompdist = mixcompdist) 41 | g = get_fitted_g(z.ash1) 42 | loglik = get_loglik(z.ash1) 43 | 44 | loglik.down = ash(z - fitted.mode - 0.01,1,g = g)$loglik 45 | loglik.up = ash(z - fitted.mode + 0.01,1,g = g)$loglik 46 | loglik.posteriormean = ash(z - average.posteriormean,1,g = g)$loglik 47 | 48 | return(list(fitted.mode = fitted.mode, 49 | average.posteriormean = average.posteriormean, 50 | loglik = c(loglik,loglik.down,loglik.up,loglik.posteriormean))) 51 | } 52 | 53 | set.seed(100) 54 | z = rnorm(1000) + 3 55 | print(check_mode(z,1,mixcompdist = "uniform")) 56 | print(check_mode(z,1,mixcompdist = "normal")) 57 | print(check_mode(z,1,mixcompdist = "halfuniform")) 58 | ``` 59 | 60 | An additional experiment: 61 | 62 | ```{r} 63 | set.seed(100) 64 | beta = rexp(1000) 65 | betahat = beta + rnorm(1000,0,0.1) 66 | z.ash.hu = ash(betahat,0.1,mixcompdist="halfuniform",outputlevel=4,method="shrink") 67 | z.ash.pu = ash(betahat,0.1,mixcompdist="+uniform",outputlevel=4,method="shrink") 68 | 69 | z.ash.hu2 = ash(betahat - 0.2,0.1,mixcompdist = "halfuniform",outputlevel = 4, 70 | method = "shrink") 71 | z.ash.pu2 = ash(betahat - 0.1,0.1,mixcompdist = "+uniform",outputlevel = 4, 72 | method = "shrink") 73 | z.ash.hu.nzm = ash(betahat,0.1,mixcompdist = "halfuniform",mode = "estimate", 74 | method = "shrink") 75 | print(z.ash.hu$loglik,digits = 8) 76 | print(z.ash.hu.nzm$loglik,digits = 8) 77 | ``` 78 | 79 | *Note to self:* check that the normal version works too; check 80 | nonzeromodeEMobj as it doesn't seem needed. 81 | 82 | ## Session information 83 | 84 | ```{r info} 85 | sessionInfo() 86 | ``` 87 | -------------------------------------------------------------------------------- /analysis/figure/IPvsEM.Rmd/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/IPvsEM.Rmd/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /analysis/figure/IPvsEM.Rmd/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/IPvsEM.Rmd/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /analysis/figure/IPvsEM.Rmd/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/IPvsEM.Rmd/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /analysis/figure/adaptive_shrinkage.Rmd/plotting lfsr-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/adaptive_shrinkage.Rmd/plotting lfsr-1.png -------------------------------------------------------------------------------- /analysis/figure/adaptive_shrinkage.Rmd/plotting-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/adaptive_shrinkage.Rmd/plotting-1.png -------------------------------------------------------------------------------- /analysis/figure/adaptive_shrinkage.Rmd/run-ash-ET-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/adaptive_shrinkage.Rmd/run-ash-ET-1.png -------------------------------------------------------------------------------- /analysis/figure/adaptive_shrinkage.Rmd/run-ash-ET-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/adaptive_shrinkage.Rmd/run-ash-ET-2.png -------------------------------------------------------------------------------- /analysis/figure/adaptive_shrinkage.Rmd/sehist-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/adaptive_shrinkage.Rmd/sehist-1.png -------------------------------------------------------------------------------- /analysis/figure/adaptive_shrinkage.Rmd/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/adaptive_shrinkage.Rmd/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /analysis/figure/adaptive_shrinkage.Rmd/volcano-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/adaptive_shrinkage.Rmd/volcano-1.png -------------------------------------------------------------------------------- /analysis/figure/ash_npmle.Rmd/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/ash_npmle.Rmd/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /analysis/figure/ash_npmle.Rmd/unnamed-chunk-1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/ash_npmle.Rmd/unnamed-chunk-1-2.png -------------------------------------------------------------------------------- /analysis/figure/ash_npmle.Rmd/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/ash_npmle.Rmd/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /analysis/figure/checkIP.Rmd/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/checkIP.Rmd/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /analysis/figure/checkIP.Rmd/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/checkIP.Rmd/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /analysis/figure/check_mixfdr_lownoise.Rmd/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/check_mixfdr_lownoise.Rmd/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /analysis/figure/check_mixfdr_lownoise.Rmd/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/check_mixfdr_lownoise.Rmd/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /analysis/figure/efron.fcr.Rmd/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/efron.fcr.Rmd/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /analysis/figure/efron.fcr.Rmd/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/efron.fcr.Rmd/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /analysis/figure/efron.fcr.Rmd/unnamed-chunk-3-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/efron.fcr.Rmd/unnamed-chunk-3-2.png -------------------------------------------------------------------------------- /analysis/figure/efron.fcr.Rmd/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/efron.fcr.Rmd/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /analysis/figure/efron.fcr.Rmd/unnamed-chunk-4-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/efron.fcr.Rmd/unnamed-chunk-4-2.png -------------------------------------------------------------------------------- /analysis/figure/efron.fcr.Rmd/unnamed-chunk-4-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/efron.fcr.Rmd/unnamed-chunk-4-3.png -------------------------------------------------------------------------------- /analysis/figure/efron.fcr.Rmd/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/efron.fcr.Rmd/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /analysis/figure/efron.fcr.Rmd/unnamed-chunk-5-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/efron.fcr.Rmd/unnamed-chunk-5-2.png -------------------------------------------------------------------------------- /analysis/figure/efron.fcr.Rmd/unnamed-chunk-5-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/efron.fcr.Rmd/unnamed-chunk-5-3.png -------------------------------------------------------------------------------- /analysis/figure/efron.fcr.Rmd/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/efron.fcr.Rmd/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /analysis/figure/efron.fcr.Rmd/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/efron.fcr.Rmd/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /analysis/figure/efron.fcr.Rmd/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/efron.fcr.Rmd/unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /analysis/figure/investigate_hu_badcoverage.Rmd/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/investigate_hu_badcoverage.Rmd/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /analysis/figure/investigate_hu_badcoverage.Rmd/unnamed-chunk-4-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/investigate_hu_badcoverage.Rmd/unnamed-chunk-4-2.png -------------------------------------------------------------------------------- /analysis/figure/investigate_hu_badcoverage.Rmd/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/investigate_hu_badcoverage.Rmd/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /analysis/figure/investigate_hu_badcoverage.Rmd/unnamed-chunk-6-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/investigate_hu_badcoverage.Rmd/unnamed-chunk-6-2.png -------------------------------------------------------------------------------- /analysis/figure/make_GOODPOOR_figs.Rmd/GOODPOOReg_hist-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/make_GOODPOOR_figs.Rmd/GOODPOOReg_hist-1.pdf -------------------------------------------------------------------------------- /analysis/figure/make_GOODPOOR_figs.Rmd/GOODPOOReg_scatter-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/make_GOODPOOR_figs.Rmd/GOODPOOReg_scatter-1.pdf -------------------------------------------------------------------------------- /analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR-1.pdf -------------------------------------------------------------------------------- /analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR-2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR-2.pdf -------------------------------------------------------------------------------- /analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR_single-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR_single-1.pdf -------------------------------------------------------------------------------- /analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR_single-2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR_single-2.pdf -------------------------------------------------------------------------------- /analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR_single-3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR_single-3.pdf -------------------------------------------------------------------------------- /analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR_single-4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR_single-4.pdf -------------------------------------------------------------------------------- /analysis/figure/make_GOODPOOR_figs.Rmd/roc-curve-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/make_GOODPOOR_figs.Rmd/roc-curve-1.pdf -------------------------------------------------------------------------------- /analysis/figure/make_GOODPOOR_figs.Rmd/tp_vs_fp-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/make_GOODPOOR_figs.Rmd/tp_vs_fp-1.pdf -------------------------------------------------------------------------------- /analysis/figure/make_GOODPOOR_figs.Rmd/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/make_GOODPOOR_figs.Rmd/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /analysis/figure/makefig_FDReg.Rmd/decomp_ZA-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/makefig_FDReg.Rmd/decomp_ZA-1.pdf -------------------------------------------------------------------------------- /analysis/figure/makefig_FDReg.Rmd/decomp_ZA_poster-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/makefig_FDReg.Rmd/decomp_ZA_poster-1.pdf -------------------------------------------------------------------------------- /analysis/figure/makefig_FDReg.Rmd/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/makefig_FDReg.Rmd/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /analysis/figure/makefig_FDReg.Rmd/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/makefig_FDReg.Rmd/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /analysis/figure/plot_cdf_eg.Rmd/egcdf-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_cdf_eg.Rmd/egcdf-1.pdf -------------------------------------------------------------------------------- /analysis/figure/plot_cdf_eg.Rmd/egcdf-reduce-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_cdf_eg.Rmd/egcdf-reduce-1.pdf -------------------------------------------------------------------------------- /analysis/figure/plot_cdf_eg.Rmd/egcdf-reduce-with-npmle-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_cdf_eg.Rmd/egcdf-reduce-with-npmle-1.pdf -------------------------------------------------------------------------------- /analysis/figure/plot_cdf_eg.Rmd/egcdf-reduce-with-npmle-2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_cdf_eg.Rmd/egcdf-reduce-with-npmle-2.pdf -------------------------------------------------------------------------------- /analysis/figure/plot_cdf_eg.Rmd/mean_cdf-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_cdf_eg.Rmd/mean_cdf-1.pdf -------------------------------------------------------------------------------- /analysis/figure/plot_cdf_eg.Rmd/mean_cdf-reduce-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_cdf_eg.Rmd/mean_cdf-reduce-1.pdf -------------------------------------------------------------------------------- /analysis/figure/plot_cdf_eg.Rmd/mean_cdf_nopen-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_cdf_eg.Rmd/mean_cdf_nopen-1.pdf -------------------------------------------------------------------------------- /analysis/figure/plot_cdf_eg.Rmd/mean_cdf_nopen-reduce-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_cdf_eg.Rmd/mean_cdf_nopen-reduce-1.pdf -------------------------------------------------------------------------------- /analysis/figure/plot_egdens.Rmd/scenario_density-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_egdens.Rmd/scenario_density-1.pdf -------------------------------------------------------------------------------- /analysis/figure/plot_lfsr.Rmd/plot_lfdr-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_lfsr.Rmd/plot_lfdr-1.pdf -------------------------------------------------------------------------------- /analysis/figure/plot_lfsr.Rmd/plot_lfdr-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_lfsr.Rmd/plot_lfdr-1.png -------------------------------------------------------------------------------- /analysis/figure/plot_lfsr.Rmd/plot_lfsr-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_lfsr.Rmd/plot_lfsr-1.pdf -------------------------------------------------------------------------------- /analysis/figure/plot_lfsr.Rmd/plot_lfsr-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_lfsr.Rmd/plot_lfsr-1.png -------------------------------------------------------------------------------- /analysis/figure/plot_lfsr.Rmd/plot_lfsr_s-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_lfsr.Rmd/plot_lfsr_s-1.pdf -------------------------------------------------------------------------------- /analysis/figure/plot_lfsr.Rmd/plot_lfsr_s-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_lfsr.Rmd/plot_lfsr_s-1.png -------------------------------------------------------------------------------- /analysis/figure/plot_lfsr.Rmd/plot_lfsr_s_nn-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_lfsr.Rmd/plot_lfsr_s_nn-1.pdf -------------------------------------------------------------------------------- /analysis/figure/plot_lfsr.Rmd/plot_lfsr_s_nn-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_lfsr.Rmd/plot_lfsr_s_nn-1.png -------------------------------------------------------------------------------- /analysis/figure/plot_pi0est.Rmd/plot_pi0est-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/plot_pi0est.Rmd/plot_pi0est-1.pdf -------------------------------------------------------------------------------- /analysis/figure/referee.response.Rmd/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/referee.response.Rmd/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /analysis/figure/referee.response.Rmd/unnamed-chunk-1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/referee.response.Rmd/unnamed-chunk-1-2.png -------------------------------------------------------------------------------- /analysis/figure/referee_uaza.Rmd/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/referee_uaza.Rmd/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /analysis/figure/referee_uaza.Rmd/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/referee_uaza.Rmd/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /analysis/figure/referee_uaza.Rmd/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/referee_uaza.Rmd/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /analysis/figure/referee_uaza.Rmd/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/referee_uaza.Rmd/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /analysis/figure/summarize_dsc_znull.Rmd/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/summarize_dsc_znull.Rmd/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /analysis/figure/summarize_dsc_znull.Rmd/unnamed-chunk-2-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/summarize_dsc_znull.Rmd/unnamed-chunk-2-2.png -------------------------------------------------------------------------------- /analysis/figure/summarize_dsc_znull.Rmd/unnamed-chunk-2-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/analysis/figure/summarize_dsc_znull.Rmd/unnamed-chunk-2-3.png -------------------------------------------------------------------------------- /analysis/index.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Home" 3 | --- 4 | 5 | **Last updated:** `r Sys.Date()` 6 | 7 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)` 8 | 9 | 10 | ## Simple illustrations 11 | 12 | * [Figures illustrating FDR](makefig_FDReg.html) 13 | * [Illustatrate dilution effect of low-precision observations](make_GOODPOOR_figs.html) 14 | * [Adaptive Shrinkage](adaptive_shrinkage.html) 15 | * [Adaptive Shrinkage: shrunken interval estimates](metaplot_examples.html) 16 | 17 | * [Comparing models: EE vs ET models](CompareEEvsETmodel.html) 18 | * [Checking how qvalue behaves in simulation suggested by referee](referee_uaza.html) 19 | * [Comparison of methods, and example of Credible Interval behaviour when UA does not hold](efron.fcr.html) 20 | * [Using ash to fit the NPMLE; comparison with REBayes](ash_npmle.html) 21 | 22 | ## DSC Shrink (the main simulation study in the paper) 23 | 24 | * [Plot Densities of simulations](plot_egdens.html) 25 | * [Plot estimates of pi0](plot_pi0est.html) 26 | * [Plot local false sign rates and lfdr vs truth](plot_lfsr.html) 27 | * [Plot CDFs](plot_cdf_eg.html) 28 | 29 | 30 | * [Investigate coverage of hu method without penalty](investigate_hu_badcoverage.html) 31 | * [Run mixfdr on low noise bimodal situation](check_mixfdr_lownoise.html) 32 | * [Make coverage tables](summarize_coverage.html) 33 | 34 | 35 | ## Optimization method: EM vs Interior Point 36 | * [Compare loglik achieved by EM vs Interior Point](checkIP.html) 37 | * [Compare speed of EM vs Interior Point](IPvsEM.html) 38 | 39 | ## DSC Znull (assess behaviour of ash LR under null) 40 | 41 | * [Summarize results of null simulations](summarize_dsc_znull.html) 42 | 43 | ## Check how estimating the mode performs 44 | 45 | * [Basic check of non-zero model option](estimate_mode.html) 46 | -------------------------------------------------------------------------------- /analysis/investigate_hu_badcoverage.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Investigate poor coverage for ash.hu" 3 | author: "Matthew Stephens" 4 | date: 2015-10-26 5 | --- 6 | 7 | **Last updated:** `r Sys.Date()` 8 | 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)` 10 | 11 | First, we load the necessary libraries. 12 | 13 | ```{r packages} 14 | library(ashr) 15 | library(reshape2) 16 | library(ggplot2) 17 | library(dplyr) 18 | library(dscr) 19 | ``` 20 | 21 | ```{r chunk_options, include=FALSE} 22 | # Specify settings for displaying the plots in the rendered document. 23 | source("chunk-options.R") 24 | ``` 25 | 26 | Compile the tables which we will use to examine the results of the simulation 27 | experiments. 28 | 29 | ```{r compile_tables} 30 | load("../output/dsc-shrink-files/res.RData") 31 | 32 | neglong = 33 | res$negprob %>% 34 | select(-user.self,-sys.self,-elapsed,-user.child,-sys.child) %>% 35 | melt(id.vars = c("method","scenario","seed",".id"),value.name = "negprob") %>% 36 | filter(negprob > 0.95) 37 | 38 | poslong = 39 | res$posprob %>% 40 | select(-user.self,-sys.self,-elapsed,-user.child,-sys.child) %>% 41 | melt(id.vars = c("method","scenario","seed",".id"),value.name = "posprob") %>% 42 | filter(posprob > 0.95) 43 | 44 | reslong = 45 | res$cdf_score %>% 46 | select(-user.self,-sys.self,-elapsed,-user.child,-sys.child) %>% 47 | melt(id.vars = c("method","scenario","seed",".id")) 48 | 49 | reslong.pos = inner_join(reslong,poslong) 50 | reslong.neg = inner_join(reslong,neglong) 51 | 52 | ash.hu.ft = reslong.pos %>% filter(method == "ash.hu.s" & 53 | scenario == "flat-top") 54 | ``` 55 | 56 | Extract an example to illustrate coverage of ash.hu. 57 | 58 | ```{r show_example} 59 | eg = load_example(dsc_shrink,42,"flat-top",methodname = "ash.hu.s", 60 | homedir = "../code/dsc-shrink") 61 | out <- eg$output$fitted_g 62 | class(out) <- "list" 63 | out <- as.data.frame(out) 64 | print(subset(out,pi > 0.01)) 65 | ``` 66 | 67 | Notice how almost all the inferred weight is on a small positive component. 68 | As a result, false sign rate will be small, and there will be a strong tendency 69 | to overestimate zero effects. This leads to coverage problems observed. 70 | 71 | Now let's look at an example with u and spiky which seems to be somewhat 72 | badly calibrated for the negative discoveries. 73 | 74 | ```{r show_more_examples} 75 | ash.u.s.spiky = reslong.neg %>% filter(method == "ash.u.s" & 76 | scenario == "spiky") 77 | ash.n.s.spiky = reslong.neg %>% filter(method == "ash.n.s" & 78 | scenario == "spiky") 79 | hist(ash.u.s.spiky$value,nclass = 100, 80 | main = paste("histogram of quantile where observation falls in its CI")) 81 | hist(ash.n.s.spiky$value,nclass = 100, 82 | main = paste("histogram of quantile where observation falls in its CI")) 83 | ``` 84 | 85 | So what seems to be happening here is that the uniform tail is too short; 86 | when observation falls outside of this tail it gets a zero quantile of 87 | posterior interval. 88 | 89 | Can we find an example illustrating this trend? (Note: the next chunk below 90 | did not work for me, so I se* `eval=FALSE, include=FALSE`. Perhaps this can be 91 | fixed at some point. -Peter) 92 | 93 | ```{r example_short_tail, eval=FALSE, include=FALSE} 94 | table((ash.u.s.spiky %>% filter(value == 0))$seed) 95 | eg = load_example(dsc_shrink,seed = 39,scenario = "spiky", 96 | methodname = "ash.u.s","../code/dsc-shrink") 97 | ``` 98 | 99 | For comparison, here are the positive discoveries; here they are not too bad. 100 | 101 | ```{r histograms_pos} 102 | ash.u.s.spiky = reslong.pos %>% filter(method == "ash.u.s" & scenario == "spiky") 103 | ash.n.s.spiky = reslong.pos %>% filter(method == "ash.n.s" & scenario == "spiky") 104 | hist(ash.n.s.spiky$value) 105 | hist(ash.u.s.spiky$value) 106 | ``` 107 | 108 | ## Session information 109 | 110 | ```{r info} 111 | sessionInfo() 112 | ``` 113 | -------------------------------------------------------------------------------- /analysis/make_GOODPOOR_figs.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Make figures for simple simulation with good and poor precision observations" 3 | author: "Matthew Stephens" 4 | date: 2015-10-26 5 | --- 6 | 7 | **Last updated:** `r Sys.Date()` 8 | 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)` 10 | 11 | First, we load the necessary libraries and function definitions. 12 | 13 | ```{r packages} 14 | library(ashr) 15 | library(qvalue) 16 | library(locfdr) 17 | library(mixfdr) 18 | library(ggplot2) 19 | source("../R/plot_FDReg_hist.R") 20 | ``` 21 | 22 | ```{r chunk_options, include=FALSE} 23 | # Specify settings for displaying the plots in the rendered document. 24 | source("chunk-options.R") 25 | ``` 26 | 27 | Create a simple simulated data set to illustrate high and low signal. True 28 | values of beta simulated from $(0.5 N(0,2^2) + 0.5 \delta_0)$. 29 | 30 | ```{r sim_data} 31 | ntest = 10000 32 | set.seed(112) 33 | 34 | null_alt = rbinom(ntest,1,0.5) 35 | beta = rnorm(ntest,0,sd = 1) 36 | beta = ifelse(null_alt == 1,beta,0) 37 | GOOD = 1:(ntest/2) 38 | sebetahat = rep(1,ntest) 39 | sebetahat[-GOOD] = 10 40 | 41 | betahat = beta + rnorm(ntest, 0, sebetahat) 42 | zscore = betahat/sebetahat 43 | pval = pchisq(zscore^2,df = 1,lower.tail = F) 44 | ``` 45 | 46 | Show how poor precision observations dilute good ones. 47 | 48 | ```{r GOODPOOReg_hist, dev='pdf', fig.width=6.5, fig.height=3} 49 | par(mai = c(0.3,0.3,0.2,0.2),mgp = c(3, 0.5, 0)) 50 | layout(matrix(1:3,ncol = 3,byrow = TRUE)) 51 | plot_FDReg_hist(pval[GOOD],1,type = 1,title = "Good-precision observations", 52 | ylab = "",nc = 20,cex.axis = 1,cex.main = 1.2,ylim = c(0,2.5)) 53 | plot_FDReg_hist(pval[-GOOD],1,type = 1, 54 | title = "Poor-precision observations",ylab = "",nc = 20, 55 | yaxt = 'n',cex.axis = 1,cex.main = 1.2,ylim = c(0,2.5)) 56 | axis(side = 2, labels = FALSE,tck = -0.01) 57 | plot_FDReg_hist(pval,1,type = 1,title = "Combined",yaxt = 'n',ylab = "", 58 | nc = 20,cex.axis = 1,cex.main = 1.2,ylim = c(0,2.5)) 59 | axis(side = 2, labels = FALSE,tck = -0.01) 60 | ``` 61 | 62 | Apply alternative methods to the same data set: 63 | 64 | ```{r other_methods} 65 | res.qvalue = qvalue(pval) 66 | res.locfdr = locfdr(zscore,nulltype = 0,plot = 0) 67 | res.ash = ash(betahat,sebetahat,method = "fdr",outputlevel = 4) 68 | 69 | res.qvalue.good = qvalue(pval[GOOD]) 70 | res.locfdr.good = locfdr(zscore[GOOD],nulltype = 0,plot = 0) 71 | res.ash.good = ash(betahat[GOOD],sebetahat[GOOD],method = "fdr") 72 | ``` 73 | 74 | Compare the ash's accuracy against the alternative approaches. 75 | 76 | ```{r GOODPOOReg_scatter, dev='pdf', fig.width=6.5, fig.height=3} 77 | res = rbind(data.frame(x = res.qvalue.good$qvalues, 78 | y = res.qvalue$qvalues[GOOD], 79 | type = "qvalue"), 80 | data.frame(x = res.locfdr.good$fdr, 81 | y = res.locfdr$fdr[GOOD], 82 | type = 'locfdr'), 83 | data.frame(x = get_lfsr(res.ash.good), 84 | y = get_lfsr(res.ash)[GOOD], 85 | type = "ashr")) 86 | 87 | pp = ggplot(data = res,aes(x,y)) + geom_point(shape = 1) + 88 | facet_grid(. ~ type) + 89 | geom_abline(colour = "red") + 90 | xlab("Analysing good-precision data only") + 91 | ylab("Analysing combined data") 92 | print(pp + scale_y_continuous(limits = c(0,1)) + 93 | scale_x_continuous(limits = c(0,1)) + 94 | coord_equal(ratio = 1)) 95 | ``` 96 | 97 | Compare the LFSR against the p-values with different prior choices, and at 98 | different levels of precision in the observations. 99 | 100 | ```{r lfsr_vs_pval_GOODPOOR, dev='pdf', fig.width=6.5, fig.height=3} 101 | make_df_for_pval = function(ash,method = "default") 102 | data.frame(p = pnorm(-abs(ash$data$x/ash$data$s)), 103 | lfsr = get_lfsr(ash), 104 | s = ash$data$s, 105 | method = method) 106 | 107 | plot_pval_vs_lfsr=function(df,plot.it=TRUE){ 108 | if (length(unique(df$s)) > 2) { 109 | df$s = log(df$s) 110 | } else { 111 | df$s = as.factor(df$s) 112 | } 113 | 114 | p = ggplot(df,aes(x = p,y = lfsr,color = s)) + geom_point() + 115 | facet_grid(. ~ method) + xlim(c(0, 0.025)) + xlab("p value") + 116 | ylab("lfsr") 117 | 118 | if (length(unique(df$s)) > 2) 119 | p = p + scale_colour_gradient2(midpoint = 1,low = "blue",mid = "white", 120 | high = "red",space = "Lab") 121 | if (plot.it) 122 | print(p) 123 | return(p) 124 | } 125 | 126 | res.ash.ET = ash(betahat,sebetahat,method = "fdr",alpha = 1, 127 | mixcompdist = "normal",outputlevel = 4) 128 | p = plot_pval_vs_lfsr(rbind( 129 | make_df_for_pval(res.ash,method = "Default prior (alpha=0)"), 130 | make_df_for_pval(res.ash.ET,method = "p-value prior (alpha=1)"))) 131 | print(p + theme(axis.text.x = element_text(size = 8,angle = 45)) + 132 | scale_colour_manual(values = c("blue","red"))) 133 | ``` 134 | 135 | Now plot one at a time. 136 | 137 | ```{r lfsr_vs_pval_GOODPOOR_single, dev='pdf', fig.width=4.5, fig.height=3} 138 | p = plot_pval_vs_lfsr(make_df_for_pval(res.ash,method =" ash")) 139 | print(p + theme(axis.text.x = element_text(size = 8,angle = 45))) 140 | 141 | p = plot_pval_vs_lfsr(make_df_for_pval(res.ash.ET,method = "ash (p-value prior)")) 142 | print(p + theme(axis.text.x = element_text(size = 8,angle = 45))) 143 | ``` 144 | 145 | Compare the log-likelihoods: 146 | 147 | ```{r compare_ash_likelihoods} 148 | res.ash.ET$logLR 149 | res.ash$logLR 150 | ``` 151 | 152 | Plot number of true positives against false positives (an "ROC curve"): 153 | 154 | ```{r tp_vs_fp, dev='pdf', fig.width=4.5, fig.height=3} 155 | df0 = cbind(make_df_for_pval(res.ash,method = "Default prior (alpha=0)"), 156 | beta = beta) 157 | df1 = cbind(make_df_for_pval(res.ash.ET,method = "p-value prior (alpha=1)"), 158 | beta = beta) 159 | 160 | # False positives (fp) and true positives (tp). 161 | fp = function(df) cumsum((df$beta == 0)[order(df$lfsr,decreasing = FALSE)]) 162 | tp = function(df) cumsum((df$beta != 0)[order(df$lfsr,decreasing = FALSE)]) 163 | df.fptp = rbind(data.frame(method = "ash lfsr",tp = tp(df0),fp = fp(df0)), 164 | data.frame(method = "p values",tp = tp(df1),fp = fp(df1))) 165 | ggplot(df.fptp,aes(x = fp,y = tp,col = method)) + geom_line() + 166 | xlab("False positives") + ylab("True positives") + xlim(c(0,100)) + 167 | ylim(c(0,400)) + scale_colour_manual(values = c("black","green")) 168 | ``` 169 | 170 | ## Session information 171 | 172 | ```{r info} 173 | sessionInfo() 174 | ``` 175 | -------------------------------------------------------------------------------- /analysis/makefig_FDReg.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Some figures to illustrate basic ideas in FDR" 3 | author: "Matthew Stephens" 4 | date: 2015-10-26 5 | --- 6 | 7 | **Last updated:** `r Sys.Date()` 8 | 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)` 10 | 11 | This file makes two figures: 12 | 13 | 1. Illustrates basic idea of way q value does FDR analysis in p value space. 14 | 15 | 2. Compares methods in the way they decompose p values and z scores into 16 | two groups. 17 | 18 | First, we load the necessary libraries and function definitions. 19 | 20 | ```{r packages} 21 | library(ashr) 22 | library(qvalue) 23 | library(locfdr) 24 | library(mixfdr) 25 | source("../R/plot_FDReg_hist.R") 26 | source("../R/nullalthist.R") 27 | ``` 28 | 29 | ```{r chunk_options, include=FALSE} 30 | # Specify settings for displaying the plots in the rendered document. 31 | source("chunk-options.R") 32 | ``` 33 | 34 | ## Simple simulated example 35 | 36 | Generate a small data set, and run the different methods on this data set. 37 | 38 | ```{r qvalue_analysis} 39 | 40 | # ncz is the number of bins in the z score histograms. 41 | ncz = 100 42 | ntest = 10000 43 | set.seed(111) 44 | 45 | # Simulate (with all tests alternative, true effects $\beta \sim N(0,1)$). 46 | beta = rnorm(ntest) 47 | sebetahat = 1 48 | betahat = beta + rnorm(ntest,0,sebetahat) 49 | zscore = betahat/sebetahat 50 | pval = pchisq(zscore^2,df = 1,lower.tail = FALSE) 51 | 52 | # Apply the different methods. 53 | res.qvalue = qvalue(pval) 54 | res.locfdr = locfdr(zscore,nulltype = 0,plot = 0) 55 | res.mixfdr = mixFdr(zscore,noiseSD = 1,theonull = TRUE,plot = FALSE) 56 | res.ash = ash(betahat,1,method = "fdr") 57 | 58 | # Roughly compute a local fdr for qvalue (to aid the plotting of the 59 | # decomposition in zscore space) in each bin of histogram. I set the 60 | # threshold at 1 so that fdr is never larger than 1. 61 | temp = hist(pval,nclass = 50) 62 | bin_fdr = res.qvalue$pi0/temp$density 63 | qval_fdr = bin_fdr[as.numeric(cut(pval,temp$breaks))] 64 | qval_fdr = pmin(1,qval_fdr) 65 | qval_fdr = qvalue::lfdr(pval) 66 | ``` 67 | 68 | Here's an example of how qvalue decomposes $p$ values into null and alternative components. First the $p$ values: 69 | 70 | ```{r plot_pvalues} 71 | plot_FDReg_hist(pval,res.qvalue$pi0,type = 1,title = "p values", 72 | cex.axis = 0.8,cex.main = 0.8) 73 | ``` 74 | 75 | Now plot the decomposition: 76 | 77 | ```{r decompose_pvalues} 78 | plot_FDReg_hist(pval,res.qvalue$pi0,type = 4,cex.axis = 0.8,yaxt = 'n', 79 | textsize = 0.9,cex.main = 0.8, 80 | title = "Decomposition into null/alternative") 81 | axis(side = 2,labels = FALSE,tck = -0.01) 82 | ``` 83 | 84 | Show the distribution of the p-values and the z-scores, stratified by their 85 | classification into the "null" and "alternative". 86 | 87 | ```{r decomp_ZA, dev='pdf', fig.width=6, fig.height=6, crop=TRUE} 88 | layout(matrix(1:12,ncol = 3,byrow = FALSE)) 89 | plotlabel = function(label,cex = 1.5){ 90 | plot(0,0,type = "n",axes = FALSE,xlab = "",ylab = "") 91 | text(0,0,label,cex = cex) 92 | } 93 | plotlabel("qvalue") 94 | plotlabel("locfdr") 95 | plotlabel("mixfdr") 96 | plotlabel("ash") 97 | 98 | par(mar = c(1.3,2,1.5,0.2),mgp = c(3, 0.5, 0)) 99 | 100 | # p value histograms 101 | altnullhist(pval,qval_fdr,main = "p values",ncz = 50,xaxt = 'n',cex.axis = 0.8) 102 | axis(side = 1, labels = FALSE,tck = -0.01) 103 | altnullhist(pval,res.locfdr$fdr,main = "",ncz = 50,xaxt = 'n',cex.axis = 0.8) 104 | axis(side = 1, labels = FALSE,tck = -0.01) 105 | altnullhist(pval,res.mixfdr$fdr,main = "",ncz = 50,xaxt = 'n',cex.axis = 0.8) 106 | axis(side = 1, labels = FALSE,tck = -0.01) 107 | altnullhist(pval,get_lfdr(res.ash),main = "",ncz = 50,xaxt = 'n',cex.axis = 0.8) 108 | axis(side = 1, labels = TRUE,tck = -0.01,cex.axis = 0.8) 109 | 110 | # z score histograms 111 | nullalthist(zscore,qval_fdr,main = "z scores",ncz = 50,xaxt = 'n', 112 | cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6)) 113 | axis(side = 1,labels = FALSE,tck = -0.01) 114 | nullalthist(zscore,res.locfdr$fdr,main = "",ncz = 50,xaxt = 'n', 115 | cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6)) 116 | axis(side = 1,labels = FALSE,tck = -0.01) 117 | nullalthist(zscore,res.mixfdr$fdr,main = "",ncz = 50,xaxt = 'n', 118 | cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6)) 119 | axis(side = 1,labels = FALSE,tck = -0.01) 120 | nullalthist(zscore,get_lfdr(res.ash),main = "",ncz = 50,xaxt = 'n', 121 | cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6)) 122 | axis(side = 1,labels = TRUE,tck = -0.01,cex.axis = 0.8) 123 | ``` 124 | 125 | This one is a different layout (4 columns, 2 rows) for my Tukey poster 126 | 127 | ```{r decomp_ZA_poster, dev='pdf', fig.width=6, fig.height=2, crop=TRUE} 128 | # pdf("../figures/decomp_ZA_poster.pdf",width=6,height=2) 129 | layout(matrix(1:8,ncol = 4,byrow = TRUE)) 130 | par(mar = c(1.3,2,1.5,0.2),mgp = c(3, 0.5, 0)) 131 | ncz = 25 132 | 133 | # p value histograms 134 | altnullhist(pval,qval_fdr,main = "p values: qvalue",ncz = ncz, 135 | xaxt = 'n',cex.axis = 0.8) 136 | # plot_FDReg_hist(pval,res.qvalue$pi0,type = 2,title = "p values", 137 | # xaxt = 'n',cex.axis = 0.8) 138 | axis(side = 1,labels = FALSE,tck = -0.01) 139 | # mtext(side = 3,"p values",line = 1) 140 | altnullhist(pval,res.locfdr$fdr,main = "locfdr",ncz = ncz,xaxt = 'n', 141 | cex.axis = 0.8) 142 | axis(side = 1,labels = FALSE,tck = -0.01) 143 | altnullhist(pval,res.mixfdr$fdr,main = "mixfdr",ncz = ncz,xaxt = 'n', 144 | cex.axis = 0.8) 145 | axis(side = 1,labels = FALSE,tck = -0.01) 146 | altnullhist(pval,get_lfdr(res.ash),main = "ash",ncz = ncz,xaxt = 'n', 147 | cex.axis = 0.8) 148 | axis(side = 1,labels = TRUE,tck = -0.01,cex.axis = 0.8) 149 | # mtext(side = 1,"p values",line = 2) 150 | 151 | #z score histograms 152 | nullalthist(zscore,qval_fdr,main = "z scores: qvalue",ncz = ncz,xaxt = 'n', 153 | cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6)) 154 | axis(side = 1,labels = FALSE,tck = -0.01) 155 | # mtext(side = 3,"z scores",line = 1) 156 | nullalthist(zscore,res.locfdr$fdr,main = "locfdr",ncz = ncz,xaxt = 'n', 157 | cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6)) 158 | axis(side = 1,labels = FALSE,tck = -0.01) 159 | nullalthist(zscore,res.mixfdr$fdr,main = "mixfdr",ncz = ncz,xaxt = 'n', 160 | cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6)) 161 | axis(side = 1,labels = FALSE,tck = -0.01) 162 | nullalthist(zscore,get_lfdr(res.ash),main = "ash",ncz = ncz,xaxt = 'n', 163 | cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6)) 164 | axis(side = 1,labels = TRUE,tck = -0.01,cex.axis = 0.8) 165 | # mtext(side = 1,"z scores",line = 2) 166 | # dev.off() 167 | ``` 168 | 169 | ## Session information 170 | 171 | ```{r info} 172 | sessionInfo() 173 | ``` 174 | -------------------------------------------------------------------------------- /analysis/metaplot_examples.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "DDD plots" 3 | author: "Matthew Stephens" 4 | date: 2016-10-25 5 | --- 6 | 7 | **Last updated:** `r Sys.Date()` 8 | 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)` 10 | 11 | This file produces ``meta-analysis" style plots that illustrate the shrinkage behaviour of ash on some simple simulated data. 12 | 13 | First, we load the necessary libraries. 14 | 15 | ```{r packages} 16 | library(knitr) 17 | library(rmeta) 18 | library(ashr) 19 | ``` 20 | 21 | ```{r chunk_options, include=FALSE} 22 | # Specify settings for displaying the plots in the rendered document. 23 | source("chunk-options.R") 24 | opts_chunk$set(echo = TRUE) 25 | ``` 26 | 27 | ## Simulate Data 28 | 29 | We simulate 50 effects ($\beta$), half of which are truly "null" (ie 0) and 30 | the other half are $N(0,2^2)$. The observed effects ($\hat\beta$) all have 31 | standard error 1 here. 32 | 33 | ```{r chunk_label} 34 | set.seed(1) 35 | n = 50 36 | beta = c(rep(0,n/2),rnorm(n/2,0,2)) 37 | betahat = beta + rnorm(n) 38 | beta.ash = ash(betahat,1) 39 | ``` 40 | 41 | ## Meta Plots 42 | 43 | Here we plot the standard Confidence Intervals, ordered by the observed effect 44 | sizes. The red colors indicate observations corresponding to effects that were 45 | truly 0. 46 | 47 | ```{r plot_unshrunk} 48 | i = order(betahat) 49 | metaplot(betahat[i],rep(1,n),ylab = "",xlab = "",xlim = c(-6,6), 50 | colors = meta.colors(box = c(rep("red",n/2),rep("black",n/2))[i])) 51 | ``` 52 | 53 | Here we plot the *shrunken* Confidence Intervals (more formally, posterior 54 | Credible Intervals), in the same order, with same color code. (We use the 55 | approximation of the posterior mean $\pm$ 2 posterior standard deviation; 56 | really we should compute the posterior intervals using `ashci`, but this 57 | simple approximation suffices for illustrating the idea.) 58 | 59 | Notice how the CIs are strongly shrunk towards 0, except for those that 60 | correspond to large effects, where the shrinkage is more modest. What is 61 | happening here is that ash recognizes that although many effects are null, 62 | others are quite big, so it avoids shrinking too much the intervals whose 63 | data are sufficiently strong to convince it that they are non-null. 64 | 65 | ```{r plot_shrunk} 66 | metaplot(get_pm(beta.ash)[i],get_psd(beta.ash)[i], 67 | ylab = "",xlab = "",xlim = c(-6,6), 68 | colors = meta.colors(box = c(rep("red",n/2),rep("black",n/2))[i])) 69 | ``` 70 | -------------------------------------------------------------------------------- /analysis/plot_cdf_eg.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Various examples of cdf plots" 3 | author: "Matthew Stephens" 4 | date: '2015-10-26' 5 | --- 6 | 7 | **Last updated:** `r Sys.Date()` 8 | 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)` 10 | 11 | First, we load the necessary libraries and useful function definitions. 12 | 13 | ```{r packages} 14 | library(plyr) 15 | library(dplyr) 16 | library(magrittr) 17 | library(ashr) 18 | library(ggplot2) 19 | library(dscr) 20 | library(REBayes) 21 | library(ashr) 22 | source("../code/dsc-shrink/methods/mixfdr.wrapper.R") 23 | source("../R/set_plot_colors.R") 24 | ``` 25 | 26 | ```{r chunk_options, include=FALSE} 27 | # Specify settings for displaying the plots in the rendered document. 28 | source("chunk-options.R") 29 | ``` 30 | 31 | Define a couple functions that compile CDF data in such a way that it is 32 | easy to plot the CDFs using ggplot2. 33 | 34 | ```{r chunk_label} 35 | load("../output/dsc-shrink-files/res.RData") 36 | 37 | # df is a list with components method,seed, scenario 38 | # cdf is evaluated at x 39 | # returns list of x,y,pi0, where cdf values are in y 40 | get_cdf = function (df,dsc = dsc_shrink,x = seq(-6,6,length = 100), 41 | homedir = "../code/dsc-shrink") { 42 | m = df$method 43 | if (m == "truth") 44 | m = "ash.n" 45 | temp = load_example(dsc,df$seed,df$scenario,m,homedir) 46 | pi0 = temp$meta$pi0 47 | 48 | if (df$method == "truth") { 49 | s = dsc$scenarios[[df$scenario]] 50 | galt = s$args$g 51 | g = normalmix(c(pi0,(1 - pi0) * mixprop(galt)), 52 | c(0,comp_mean(galt)), 53 | c(0,comp_sd(galt))) 54 | } else { 55 | if (grepl("mixfdr",df$method)) { 56 | temp$output$fitted_g = mixfdr2fitted.g(temp$output)$fitted.g 57 | } 58 | g = temp$output$fitted_g 59 | } 60 | return(data.frame(x = x,y = as.numeric(mixcdf(g,x)),pi0 = pi0)) 61 | } 62 | 63 | plot_mean_cdf = 64 | function(SEEDS, 65 | PLOTMETHODS = c("ash.n","ash.u","ash.hu","truth","mixfdr.tnull"), 66 | PLOTSCENARIOS = c("spiky","near-normal","flat-top","skew","bimodal"), 67 | pi0filter = FALSE,...) { 68 | PLOTNAMES = PLOTSCENARIOS 69 | 70 | #set up dataframe with cdf for all methods and all datasets 71 | df = expand.grid(seed = SEEDS,scenario = PLOTSCENARIOS, 72 | method = PLOTMETHODS,stringsAsFactors = FALSE) 73 | df.cdf = ddply(df,.(seed,scenario,method),get_cdf) 74 | 75 | if (pi0filter) 76 | df.cdf %<>% filter(pi0 < 0.55 & pi0 > 0.45) 77 | if (length(SEEDS) > 1) 78 | df.cdf %<>% group_by(x,method,scenario) %>% summarise(y = mean(y)) 79 | 80 | df.cdf$scenario = factor(df.cdf$scenario,levels = PLOTSCENARIOS) 81 | levels(df.cdf$scenario) = PLOTNAMES 82 | 83 | return(ggplot(df.cdf,aes(x = x,y = y,color = method),...) + colScale + 84 | geom_line(lwd = 1.5,alpha = 0.7) + facet_grid(.~scenario) + 85 | theme(legend.position = "bottom")) 86 | } 87 | ``` 88 | 89 | Show the CDFs for all methods and all scenarios, based on a single data set. 90 | 91 | ```{r egcdf, fig.height=3, fig.width=9} 92 | # These chunk options were used to create the figure for the paper: 93 | # dev='pdf', fig.height=3, fig.width=9, crop=TRUE 94 | plot_mean_cdf(1) 95 | ``` 96 | 97 | Show the CDFs for all scenarios, this time averaged over 100 simulated data 98 | sets. 99 | 100 | ```{r mean_cdf, fig.height=3, fig.width=9} 101 | # These chunk options were used to create the figure for the paper: 102 | # dev='pdf', fig.height=3, fig.width=9, crop=TRUE 103 | plot_mean_cdf(1:100,PLOTMETHODS = c("ash.n","ash.u","ash.hu","truth"), 104 | pi0filter = TRUE) 105 | ``` 106 | 107 | Same as above, but with custom colours. 108 | 109 | ```{r mean_cdf_nopen, fig.height=3, fig.width=9} 110 | # These chunk options were used to create the figure for the paper: 111 | # dev='pdf', fig.height=3, fig.width=9, crop=TRUE 112 | names(myColors) <- c("truth","ash.hu.s","ash.n.s","ash.u.s","qvalue", 113 | "locfdr","mixfdr.tnull") 114 | colScale <- scale_colour_manual(name = "method",values = myColors) 115 | plot_mean_cdf(1:100,PLOTMETHODS = c("ash.n.s","ash.u.s","ash.hu.s","truth"), 116 | pi0filter = TRUE) 117 | 118 | # Reset color scale for other plots. 119 | source("../R/set_plot_colors.R") 120 | ``` 121 | 122 | The following plots have fewer methods and scenarios for more clarity 123 | 124 | ```{r egcdf-reduce, fig.height=3.5, fig.width=7} 125 | # These chunk options were used to create the figure for the paper: 126 | # fig.height=3, fig.width=6, crop=TRUE, dev='pdf' 127 | plot_mean_cdf(1,PLOTMETHODS = c("ash.n","truth"), 128 | PLOTSCENARIOS = c("spiky","near-normal","bimodal")) 129 | ``` 130 | 131 | Same as above, but this time the CDFs are averaged over 100 simulated data sets. 132 | 133 | ```{r mean_cdf-reduce, fig.height=3.5, fig.width=7} 134 | # These chunk options were used to create the figure for the paper: 135 | # dev='pdf', fig.height=3, fig.width=6, crop=TRUE 136 | plot_mean_cdf(1:100,PLOTMETHODS = c("ash.n","truth"), 137 | PLOTSCENARIOS = c("spiky","near-normal","bimodal"), 138 | pi0filter = TRUE) 139 | ``` 140 | 141 | Now with custom colours. 142 | 143 | ```{r mean_cdf_nopen-reduce, fig.height=3.5, fig.width=7} 144 | # These chunk options were used to create the figure for the paper: 145 | # fig.height=3, fig.width=6, crop=TRUE, dev='pdf' 146 | names(myColors) <- c("truth","ash.hu.s","ash.n.s","ash.u.s","qvalue","locfdr", 147 | "mixfdr.tnull") 148 | colScale <- scale_colour_manual(name = "method",values = myColors) 149 | plot_mean_cdf(1:100,PLOTMETHODS = c("ash.n.s","truth"), 150 | PLOTSCENARIOS = c("spiky","near-normal","bimodal"), 151 | pi0filter = TRUE) 152 | source("../R/set_plot_colors.R") 153 | ``` 154 | 155 | I wanted to add the npmle method (fit using the GLmix function) to the plot... 156 | 157 | ```{r egcdf-reduce-with-npmle, fig.height=5.25, fig.width=7} 158 | # These chunk options were used to create the figure for the paper: 159 | # dev='pdf', fig.height=4.5, fig.width=6, crop=TRUE 160 | run_nplme = function (SEED = 1,PLOTSCENARIOS = c("spiky","near-normal", 161 | "flat-top","skew","bimodal")) { 162 | df = data.frame(seed = NULL,scenario = NULL,method = NULL,x = NULL, 163 | y = NULL,pi0 = NULL) 164 | for (SCENARIO in PLOTSCENARIOS) { 165 | temp = load_example(dsc_shrink,SEED,SCENARIO,"ash.n","../output/dsc-shrink-files/") 166 | z = GLmix(temp$input$betahat) 167 | df = rbind(df,data.frame(seed = SEED,scenario = SCENARIO,method = "NPMLE", 168 | x = z$x,y = cumsum(z$y)/sum(z$y),pi0 = NA)) 169 | } 170 | return(df) 171 | } 172 | 173 | plot_mean_cdf_with_npmle = 174 | function(SEED = 1, 175 | PLOTMETHODS = c("ash.n","ash.u","ash.hu","truth","mixfdr.tnull"), 176 | PLOTSCENARIOS = c("spiky","near-normal","flat-top","skew","bimodal"), 177 | pi0filter = FALSE,...) { 178 | if (length(SEED) > 1) 179 | stop("plot with npmle only implemented for a single seed") 180 | PLOTNAMES = PLOTSCENARIOS 181 | 182 | # Set up dataframe with cdf for all methods and all datasets. 183 | df = expand.grid(seed = SEED,scenario = PLOTSCENARIOS,method = PLOTMETHODS, 184 | stringsAsFactors = FALSE) 185 | df.cdf = ddply(df,.(seed,scenario,method),get_cdf) 186 | 187 | df.cdf$scenario = factor(df.cdf$scenario,levels = PLOTSCENARIOS) 188 | levels(df.cdf$scenario) = PLOTNAMES 189 | 190 | df.npmle = run_nplme(SEED,PLOTSCENARIOS) 191 | df.cdf = rbind(df.cdf,df.npmle) 192 | 193 | p = ggplot(df.cdf,aes(x = x,y = y,color = method),...) + 194 | xlim(c(-4,4)) + colScale + geom_line(lwd = 1.2,alpha = 0.7) + 195 | theme(legend.position = "bottom") 196 | return(p) 197 | } 198 | 199 | plot_mean_cdf_with_npmle(1,PLOTMETHODS = c("ash.hu","truth"), 200 | PLOTSCENARIOS = c("spiky","near-normal","flat-top", 201 | "skew","bimodal")) + 202 | facet_wrap(~ scenario, nrow = 2) 203 | ``` 204 | 205 | ## Session information 206 | 207 | ```{r info} 208 | sessionInfo() 209 | ``` 210 | -------------------------------------------------------------------------------- /analysis/plot_egdens.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Plot densities for simulation studies" 3 | author: "Matthew Stephens" 4 | date: 2015-10-26 5 | --- 6 | 7 | **Last updated:** `r Sys.Date()` 8 | 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)` 10 | 11 | First, we load the necessary libraries. 12 | 13 | ```{r packages} 14 | library(ashr) 15 | library(ggplot2) 16 | library(dplyr) 17 | ``` 18 | 19 | ```{r chunk_options, include=FALSE} 20 | # Specify settings for displaying the plots in the rendered document. 21 | source("chunk-options.R") 22 | ``` 23 | 24 | Load the results of the simulation experiments, and generate the density 25 | data for all the simulation scenarios in a single data frame ("df"). 26 | 27 | ```{r retrieve_sim_results} 28 | load("../output/dsc-shrink-files/res.RData") 29 | PLOTSCENARIOS = c("spiky","near-normal","flat-top","skew","bimodal") 30 | PLOTNAMES = PLOTSCENARIOS 31 | 32 | df = data.frame() 33 | for(i in PLOTSCENARIOS) { 34 | s = dsc_shrink$scenarios[[i]] 35 | g = s$args$g 36 | x = seq(-6,6,length = 100) 37 | y = as.numeric(dens(g,x)) 38 | df = rbind(df,data.frame(x = x,y = y,scenario = i)) 39 | } 40 | 41 | df$scenario = factor(df$scenario,levels = PLOTSCENARIOS) 42 | levels(df$scenario) = PLOTNAMES 43 | ``` 44 | 45 | Generate density plots using ggplot. 46 | 47 | ```{r scenario_density, dev='pdf',fig.height=3, fig.width=9, crop=TRUE} 48 | ggplot(df,aes(x = x,y = y)) + 49 | geom_line(size = 1.2,linetype = 1) + 50 | facet_grid(.~scenario) + 51 | ylab("density") 52 | ``` 53 | 54 | ## Session information 55 | 56 | ```{r info} 57 | sessionInfo() 58 | ``` 59 | 60 | -------------------------------------------------------------------------------- /analysis/plot_egdens2.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Plot two densities for strong vs weak shrinkage" 3 | author: "Matthew Stephens" 4 | date: 2016-10-25 5 | --- 6 | 7 | **Last updated:** `r Sys.Date()` 8 | 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)` 10 | 11 | ```{r chunk_options, include=FALSE} 12 | # Specify settings for displaying the plots in the rendered document. 13 | source("chunk-options.R") 14 | ``` 15 | 16 | ```{r} 17 | library("ashr") 18 | library("ggplot2") 19 | library("dplyr") 20 | library("dscr") 21 | load("../output/dsc-shrink-files/res.RData") 22 | PLOTSCENARIOS = c("spiky","big-normal") 23 | PLOTNAMES = c("Strong Shrinkage","Weak Shrinkage") 24 | 25 | df=data.frame() 26 | 27 | for(i in PLOTSCENARIOS){ 28 | s=dsc_shrink$scenarios[[i]] 29 | g=s$args$g 30 | x = seq(-6,6,length=100) 31 | y = as.numeric(dens(g,x)) 32 | df = rbind(df,data.frame(x=x,y=y,scenario=i)) 33 | } 34 | 35 | 36 | df$scenario = factor(df$scenario,levels=PLOTSCENARIOS) 37 | levels(df$scenario)= PLOTNAMES 38 | ``` 39 | 40 | ```{r scenario_density_2, dev='pdf',fig.height=3, fig.width=9, crop=TRUE} 41 | #pdf(PLOTFILE,height=3,width=9) 42 | ggplot(df, aes(x=x,y=y)) + geom_line(size=1.2,linetype=1) + facet_grid(.~scenario) + ylab("density") 43 | #dev.off() 44 | ``` 45 | 46 | 47 | ## Session information 48 | 49 | ```{r info} 50 | sessionInfo() 51 | ``` 52 | 53 | -------------------------------------------------------------------------------- /analysis/plot_lfsr.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Plot local false sign rate results" 3 | author: "Matthew Stephens" 4 | date: 2015-10-26 5 | --- 6 | 7 | **Last updated:** `r Sys.Date()` 8 | 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)` 10 | 11 | First, we load the necessary libraries and other useful function definitions. 12 | 13 | ```{r packages} 14 | library(dplyr) 15 | library(reshape2) 16 | library(ggplot2) 17 | source("../R/set_plot_colors.R") 18 | ``` 19 | 20 | ```{r chunk_options, include=FALSE} 21 | # Specify settings for displaying the plots in the rendered document. 22 | source("chunk-options.R") 23 | ``` 24 | 25 | Load the results of the simulation experiments generated by 26 | code/dsc-shrink/run_dsc.R, and prepare the posterior statistics for 27 | plotting. 28 | 29 | ```{r chunk_label} 30 | load("../output/dsc-shrink-files/res.RData") 31 | 32 | #' @param df dataframe of scores for many methods/scenrios etc 33 | #' @return tall dataframe with columns of scores for each method and the "goldmethod" against which plot is to be made 34 | process_score_for_plotting_against_gold = 35 | function(df, 36 | PLOTMETHODS = c("ash.n","ash.u","ash.hu"), 37 | GOLDMETHOD = "bayes",PLOTSEEDS = 1:100, 38 | PLOTSCENARIOS = c("spiky","near-normal","flat-top","skew", 39 | "big-normal","bimodal"), 40 | PLOTNAMES = PLOTSCENARIOS) { 41 | df %<>% filter(seed %in% PLOTSEEDS) %>% 42 | filter(scenario %in% PLOTSCENARIOS) %>% 43 | filter(method %in% c(PLOTMETHODS,GOLDMETHOD)) 44 | df$scenario = factor(df$scenario,levels = PLOTSCENARIOS) 45 | levels(df$scenario) = PLOTNAMES 46 | 47 | # Create "tall"" version of dataframe. 48 | df %<>% select(-user.self,-sys.self,-elapsed,-user.child,-sys.child) %>% 49 | melt(id.vars = c("method","scenario","seed",".id"),value.name = "val") 50 | 51 | #separate bayes and remainder 52 | df.bayes = df %>% filter(method == GOLDMETHOD) 53 | df.rest = df %>% filter(method != GOLDMETHOD) 54 | 55 | # Join bayes with others, so each line has both the bayes and the 56 | # non-bayes version. 57 | return(inner_join(df.bayes,df.rest,by = c("scenario","seed","variable"))) 58 | } 59 | 60 | plot_lfsr = function(lfsr,xlab = "True lfsr",ylab = "Estimated lfsr", 61 | xlim = c(0,0.2),ylim = c(0,0.2), 62 | legend.position = "bottom") 63 | ggplot(lfsr,aes(val.x,val.y,colour = method.y)) + 64 | facet_grid(. ~ scenario) + 65 | guides(alpha = FALSE) + 66 | geom_abline(colour = "black") + 67 | geom_abline(colour = "red",slope = 2) + 68 | xlab(xlab) + ylab(ylab) + 69 | geom_point(shape = 1,size = 0.1,alpha = 0.2) + 70 | scale_y_continuous(limits = ylim) + 71 | scale_x_continuous(limits = xlim) 72 | 73 | lfsr = process_score_for_plotting_against_gold(res$lfsr,PLOTSEEDS = 1:100, 74 | PLOTMETHODS = "ash.n") 75 | lfdr = process_score_for_plotting_against_gold(res$lfdr,PLOTSEEDS = 1:100, 76 | PLOTMETHODS = "ash.n") 77 | 78 | p1 = plot_lfsr(lfsr,ylim = c(0,1),xlim = c(0,0.2)) 79 | p2 = plot_lfsr(lfdr,ylim = c(0,1),xlim = c(0,0.2), 80 | xlab = "True lfdr",ylab = "Estimated lfdr") 81 | ``` 82 | 83 | Separately for each model, create a scatterplot comparing the estimated LFSR 84 | against the "gold-standard" LFSR. 85 | 86 | ```{r plot_lfsr, dev=c('png','pdf'), fig.width=9, fig.height=3, crop=TRUE} 87 | print(p1 + theme(legend.position = "none", 88 | axis.text.x = element_text(size = 8,angle = 45)) + 89 | coord_equal(ratio = 1/5) + colScale) 90 | ``` 91 | 92 | Separately for each model, create a scatterplot comparing the estimated LFDR 93 | against the "gold-standard" LFDR. 94 | 95 | ```{r plot_lfdr, dev=c('png','pdf'), fig.width=9, fig.height=3, crop=TRUE} 96 | print(p2 + theme(legend.position = "none", 97 | axis.text.x = element_text(size = 8,angle = 45)) + 98 | coord_equal(ratio = 1/5) + colScale) 99 | ``` 100 | 101 | Separately for each of the ash.n.s methods, create a scatterplot comparing the 102 | estimated LFSR against the "gold-standard" LFSR. 103 | 104 | ```{r plot_lfsr_s, dev=c('png','pdf'), fig.width=9, fig.height=3, crop=TRUE} 105 | lfsr.s = process_score_for_plotting_against_gold(res$lfsr,PLOTSEEDS = 1:100, 106 | PLOTMETHODS = "ash.n.s") 107 | p1.s = plot_lfsr(lfsr.s,ylim = c(0,1),xlim = c(0,0.2)) 108 | print(p1.s + theme(legend.position = "none", 109 | axis.text.x = element_text(size = 8,angle = 45)) + 110 | coord_equal(ratio = 1/5)) 111 | ``` 112 | 113 | Separately for each of the ash.n.s methods with the -nn data sets, 114 | create a scatterplot comparing the estimated LFSR against the 115 | "gold-standard" LFSR. 116 | 117 | ```{r plot_lfsr_s_nn, dev=c('png','pdf'), fig.width=9, fig.height=3, crop=TRUE} 118 | lfsr.s.nn = 119 | process_score_for_plotting_against_gold(res$lfsr,PLOTSEEDS = 1:100, 120 | PLOTMETHODS = "ash.n.s", 121 | PLOTSCENARIOS = paste0(c("spiky","near-normal","flat-top","skew", 122 | "big-normal","bimodal"),"-nn")) 123 | p1.s.nn = plot_lfsr(lfsr.s.nn,ylim = c(0,1),xlim = c(0,0.2)) 124 | print(p1.s.nn + theme(legend.position = "none", 125 | axis.text.x = element_text(size = 8,angle = 45)) + 126 | coord_equal(ratio = 1/5)) 127 | ``` 128 | 129 | ## Session information. 130 | 131 | ```{r info} 132 | print(sessionInfo()) 133 | ``` 134 | -------------------------------------------------------------------------------- /analysis/plot_pi0est.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Plot estimates of pi0" 3 | author: "Matthew Stephens" 4 | date: 2015-10-26 5 | --- 6 | 7 | **Last updated:** `r Sys.Date()` 8 | 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)` 10 | 11 | First, we load the necessary libraries, and specify settings for 12 | displaying the plots in the rendered document. 13 | 14 | ```{r packages} 15 | library(magrittr) 16 | library(dplyr) 17 | source("../R/set_plot_colors.R") 18 | ``` 19 | 20 | ```{r chunk_options, include=FALSE} 21 | # Specify settings for displaying the plots in the rendered document. 22 | source("chunk-options.R") 23 | ``` 24 | 25 | Generate scatterplots compare the estimated estimate of pi0 against the 26 | ground-truth value, colored by the method used, and split by the scenario 27 | type. 28 | 29 | ```{r plot_pi0est, dev='pdf', crop=TRUE} 30 | load("../output/dsc-shrink-files/res.RData") 31 | 32 | PLOTMETHODS = c("mixfdr.tnull","ash.n","ash.u","qvalue","locfdr") 33 | PLOTSCENARIOS = c("spiky","near-normal","flat-top","skew","bimodal") 34 | PLOTNAMES = PLOTSCENARIOS 35 | ALPHALEVEL = 0.8 # controls transparency 36 | 37 | df = res$pi0_score %>% 38 | filter(scenario %in% PLOTSCENARIOS) %>% 39 | filter(method %in% PLOTMETHODS) 40 | df$scenario = factor(df$scenario,levels=PLOTSCENARIOS) 41 | levels(df$scenario) = PLOTNAMES 42 | 43 | pi0_plot = ggplot(df,aes(pi0,pi0_est,colour = method,alpha = ALPHALEVEL)) + 44 | geom_point(shape = 1) + 45 | facet_grid(. ~ scenario) + 46 | guides(alpha = FALSE) + 47 | geom_abline(colour = "black") + 48 | xlab("True pi0") + 49 | ylab("Estimated pi0") 50 | print(pi0_plot + scale_y_continuous(limits = c(0,1.01)) + 51 | scale_x_continuous(limits = c(0,1.01)) + 52 | coord_equal(ratio = 1) + 53 | colScale + 54 | theme(legend.position = "top", 55 | axis.text.x = element_text(size = 8,angle = 45))) 56 | ``` 57 | 58 | ## Session information 59 | 60 | ```{r info} 61 | sessionInfo() 62 | ``` 63 | -------------------------------------------------------------------------------- /analysis/referee.response.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Response to referees" 3 | author: "Matthew Stephens" 4 | date: 2016-03-21 5 | --- 6 | 7 | **Last updated:** `r Sys.Date()` 8 | 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)` 10 | 11 | ```{r chunk_options, include=FALSE} 12 | # Specify settings for displaying the plots in the rendered document. 13 | source("chunk-options.R") 14 | ``` 15 | 16 | ## Referee 1 17 | 18 | I agree with a lot of what the referee says, although there are some 19 | points of disagreement. The most fundamental disagreement regards 20 | Figure 1. This Figure shows not the detectable effects (as the referee 21 | believed) but the decomposition of *all* effects into null and 22 | alternative components. Since this seems to be an important 23 | misunderstanding I have revised the text to try to make it clearer how 24 | this figure was derived. Specifically the revised text says: 25 | 26 | "We used each of the methods qvalue, locfdr, mixfdr and ash to 27 | decompose the $z$ scores ($z_j = \hat{\beta}_j$), or their 28 | corresponding p values, into null and alternative components. Here we 29 | are using the fact that these methods all provide an estimate of the 30 | lfdr for each observation, which implies such a decomposition; 31 | specifically the average lfdr within each histogram bin estimates the 32 | fraction of observations in that bin that come from the null vs the 33 | alternative component.” 34 | 35 | Since the referee objected to the presentation of the ZA and UA as 36 | opposing assumptions I have removed all explicit reference to the 37 | ZA. (This also solves the issue that I had defined the ZA differently 38 | than Efron’s original definition — see below — which may have created 39 | confusion.) Instead, as the referee suggested, I have presented the UA 40 | as an additional assumption “not made by other methods”, and - as the 41 | editor suggested — I have focussed on contrasting the behavior of ash 42 | with the behavior of the other methods (which, as Figure 1 43 | demonstrates, is to create a hole at 0 in the distribution of the 44 | alternative Z scores) rather than contrasting their assumptions. 45 | 46 | However, it seems unsatisfactory to merely point out this behavioral 47 | difference between methods without giving any explanation for the 48 | reason behind it. So I have provided my best attempt to give the 49 | reason. Essentially the reason is that the existing methods (locfdr 50 | and qvalue), when estimating $\pi_0$, make the assumption that *all* p 51 | values near 1 are null, or *all* z scores near 0 are null. (This was 52 | how I defined the ZA in the original paper; however this is stronger 53 | than Efron’s original statement of the ZA, which the referee - 54 | understandably - uses in his/her report, that ``most” z scores near 0 55 | are null. So I don’t refer to this as the ZA any more.) This 56 | assumption necessarily creates a hole in the implied distribution of z 57 | scores under the alternative, and explains the behavior. 58 | 59 | To underscore that this behavior is general, and not just a feature of 60 | the particular simulation I did in Figure 1, I provide results using 61 | qvalue under the setting suggested by the referee at; see 62 | (http://stephenslab.github.io/ash/analysis/referee_uaza.html). 63 | 64 | ### Minor Concerns 65 | 66 | - I added the citation, thank you. 67 | 68 | - I took up the suggestion to define FSR-hat and the s-value, thank 69 | you. 70 | 71 | - I agree that returning the length J results as a dataframe would 72 | have been better, but unfortunately changing this would break other 73 | packages that depend on ashr. So instead I have provided a method 74 | `as.data.frame` to extract the most important components of the ash 75 | object into a dataframe. 76 | 77 | - I fixed the typo, thank you. 78 | 79 | ## Referee 2 80 | 81 | ### Major Concern 82 | 83 | The referee says: "it would be interesting to include distributions 84 | that favor other models, such as a bimodal distribution with no mass 85 | at zero, an assymmetrical distribution,. . . The unimodality is a 86 | strong assumption so it would be good to see how anti-conservative the 87 | results would get under different alternative distributions." 88 | 89 | I found this puzzling since both an assymetrical distribution and a 90 | strongly bimodal distribution are already included in the simulation 91 | results (Figure 2, Figure 3, Table 1). I believe these existing 92 | results address the referee comment. I also want to emphasise that 93 | the unimodal assumption is actually less strong than the assumptions 94 | usually made in variable selection for regression, which is directly 95 | analogous to the fdr context. For example, it is common to assume that 96 | effects are a mixture of a point mass at zero and a single normal, 97 | which is considerably more restrictive than the general unimodal 98 | assumption. 99 | 100 | However, to help reassure the referee, I have added another example to 101 | illustrate an assymetric situation where the unimodal-at-zero 102 | assumption is badly contradicted 103 | [here](http://stephenslab.github.io/ash/analysis/efron.fcr.html). As 104 | the referee will see, the resulting estimates of the lfdr from ashr 105 | are reasonably close to the true values (and also to those from 106 | locfdr). 107 | 108 | ### Minor Concern 109 | 110 | *Is there a way to model the distribution as a mixture of both 111 | uniforms and normals? If not: why?* 112 | 113 | This is straightforward in principle. However, it would require 114 | non-trivial code modifications because of the way I implemented the 115 | mixture class (assuming that all components come from the same 116 | family.) Another way to achieve an assymetric distribution without the 117 | "hard" tails of the uniform might be to use a mixture of truncated 118 | normals. I have a student investigating this option. However, the 119 | uniform mixture has the advantage of allowing not only the assymetry, 120 | but also the $t$ likelihood. Although the paper does not focus on 121 | this, I believe this is likely to be an important issue in 122 | practice. So I suspect that the mixture of uniforms is often going to 123 | be the method of choice in practice. 124 | 125 | *How would the model perform with moderate correlations?* 126 | 127 | There seems to be no reason to think that moderate correlations among 128 | tests will have a substantial detrimental effect. But I do not want to 129 | focus on this because I think it will give the reader the wrong 130 | message: I think that users of both ashr and other fdr methods should 131 | very much worry about correlations in practice (which I believe will 132 | often be large, and not moderate). 133 | 134 | *In equation 3, the null component (I think) is represented as delta0, 135 | but delta0 hasn’t been defined. What is assumed for the distribution 136 | of delta0?* 137 | 138 | Thank you for noticing this. I now define $\delta_0$ (which represents 139 | a point mass on 0). 140 | 141 | *Figure 3: the truth is hard to distinguish from the different 142 | methods. And in general this figure is hard to read.* 143 | 144 | I agree. I have completely rethought this figure, which now has fewer, 145 | larger, panels, and compares just two methods (ash.hu and the NPMLE) 146 | against the truth on a few examples. The less important point - the 147 | effect of the penalty term on pi0 -is now illustrated in an additional 148 | figure included in the appendix. 149 | 150 | *I particularly like the idea that the ordering of the tests differs 151 | from the classical p-values. Which influence does this have on the 152 | balance between sensitivity and specificity (for example with ROC 153 | curves)? It should change, but does it get better?* 154 | 155 | Thanks for the question. This is now addressed in the new Figure 4d. 156 | 157 | ### Typos and grammar 158 | 159 | *page 14, line 20. The first mentioned pi0 and lfdr should have a hat.* 160 | 161 | Thanks - fixed. 162 | 163 | ## Session information 164 | 165 | ```{r info} 166 | sessionInfo() 167 | ``` 168 | -------------------------------------------------------------------------------- /analysis/referee_uaza.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Assessment of referee comment regarding q value and zero assumption" 3 | author: "Matthew Stephens" 4 | date: 2016-03-21 5 | --- 6 | 7 | **Last updated:** `r Sys.Date()` 8 | 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)` 10 | 11 | A referee reported a simulation of 10,000 hypotheses, a fraction 0.8 12 | are null ($\mu_i = 0$), and a fraction 0.2 are alternative with effect 13 | $\mu_i \sim N(0,1)$. For each effect $i$ there are n=30 observations 14 | from $x_{i,j} \sim N(\mu_i,1)$ and a one-sample t test is used to 15 | compute a $p$ value. The referee correctly notes that these 16 | simulations i) obey the unimodal assumption (UA), and ii) have the 17 | property that "most" $z$ scores near 0 are null (Efron's "Zero 18 | Assumption"; ZA). The motivation is to demonstrate that the UA and the 19 | ZA are not contradictory. 20 | 21 | I agree with this, if we define the ZA as "most" $z$ scores near 0 are 22 | null. However, in practice the existing methods effectively make a 23 | stronger assumption (or at least, behave as if they do): that *all* 24 | $z$ scores near 0 are null. Consequently they end up creating a hole 25 | in the distribution of alternative $z$ scores at 0. Here we illustrate 26 | this by applying qvalue to this scenario. 27 | 28 | ```{r chunk_options, include=FALSE} 29 | # Specify settings for displaying the plots in the rendered document. 30 | source("chunk-options.R") 31 | ``` 32 | 33 | ## Simulation 34 | 35 | First, we generate a small data set. 36 | 37 | ```{r sim_data} 38 | set.seed(100) 39 | mu = c(rep(0,8000),rnorm(2000)) 40 | x = matrix(rnorm(30*10000),nrow=10000,ncol=30)+mu 41 | ttest.pval = function(x){return(t.test(x)$p.value)} 42 | x.p = apply(x,1,ttest.pval) 43 | ``` 44 | 45 | Here is how qvalue (arguably, implicitly) decomposes the distribution of p values into null and alternative 46 | ```{r} 47 | source("../R/nullalthist.R") 48 | lfdr.qv = qvalue::qvalue(x.p)$lfdr 49 | altnullhist(x.p,lfdr.qv,main="p values: qvalue",ncz=40,xlab="p value",cex.axis=0.8) 50 | ``` 51 | 52 | 53 | And here is the corresponding plot in z score space. Note the "hole" in the distribution of $z$ scores at 0 under the alternative. 54 | ```{r} 55 | ttest.est = function(x){return(t.test(x)$estimate)} 56 | x.est = apply(x,1,ttest.est) 57 | zscore = x.est/(1/sqrt(30)) 58 | nullalthist(zscore,lfdr.qv,main="qvalue's implicit partition of z \n into null and alternative",ncz=60,cex.axis=0.8,ylim=c(0,0.3),xlim=c(-6,6),xlab="z score") 59 | ``` 60 | 61 | For comparison here is the decomposition, using ash, of $z$ scores into null and alternative components. 62 | ```{r} 63 | library(ashr) 64 | lfdr.ash=get_lfdr(ash(x.est,1/sqrt(30))) 65 | nullalthist(zscore,lfdr.ash,main="ash partition of z \n into null and alternative",ncz=60,cex.axis=0.8,ylim=c(0,0.3),xlim=c(-6,6),xlab="z score") 66 | ``` 67 | 68 | For completeness here is (roughly) the ``true" decomposition of $z$ 69 | scores into null and alternative components. 70 | 71 | ```{r} 72 | trueg = ashr::normalmix(c(0.8,0.2),c(0,0),c(0,1)) 73 | lfdr.true=get_lfdr(ash(x.est,1/sqrt(30),g=trueg,fixg=TRUE)) 74 | nullalthist(zscore,lfdr.true,main="true partition of z \n into null and alternative",ncz=60,cex.axis=0.8,ylim=c(0,0.3),xlim=c(-6,6),xlab="z score") 75 | ``` 76 | 77 | ## Explanation 78 | 79 | In estimating $\pi_0$ qvalue assumes that *all* p values near 1 are 80 | null. This is equivalent to assuming that *all* z scores near 0 are 81 | null. Thus, despite the fact that qvalue does not explicitly model the 82 | distribution of $p$ values or $z$ scores under the alternative, its 83 | way of estimating $\pi_0$ necessarily creates a hole in the $z$ score 84 | distribution at 0 under the alternative. 85 | 86 | ## Session information 87 | 88 | ```{r info} 89 | sessionInfo() 90 | ``` 91 | -------------------------------------------------------------------------------- /analysis/summarize_coverage.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Summarize coverage" 3 | author: "Matthew Stephens" 4 | date: 2015-10-26 5 | --- 6 | 7 | **Last updated:** `r Sys.Date()` 8 | 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)` 10 | 11 | Note that rendering this RMarkdown document may take a few minutes because 12 | it involves loading and processing tables with millions of rows. 13 | 14 | First, we load the necessary libraries. 15 | 16 | ```{r packages} 17 | library(dscr) 18 | library(ashr) 19 | library(reshape2) 20 | library(ggplot2) 21 | library(magrittr) 22 | library(dplyr) 23 | library(xtable) 24 | ``` 25 | 26 | ```{r chunk_options, include=FALSE} 27 | # Specify settings for displaying the plots in the rendered document. 28 | source("chunk-options.R") 29 | ``` 30 | 31 | Compile the results on the simulated data sets for the summaries below. 32 | 33 | ```{r compile_tables} 34 | load("../output/dsc-shrink-files/res.RData") 35 | coverthresh = 0.05 # threshold at which we look at coverage 36 | findthresh = 0.95 # threshold at we define a discovery significant 37 | 38 | neglong = 39 | res$negprob %>% 40 | select(-user.self,-sys.self,-elapsed,-user.child,-sys.child) %>% 41 | melt(id.vars = c("method","scenario","seed",".id"), 42 | value.name = "negprob") %>% 43 | filter(negprob > findthresh) 44 | 45 | poslong = 46 | res$posprob %>% 47 | select(-user.self,-sys.self,-elapsed,-user.child,-sys.child) %>% 48 | melt(id.vars = c("method","scenario","seed",".id"), 49 | value.name = "posprob") %>% 50 | filter(posprob > findthresh) 51 | 52 | reslong = 53 | res$cdf_score %>% 54 | select(-user.self,-sys.self,-elapsed,-user.child,-sys.child) %>% 55 | melt(id.vars = c("method","scenario","seed",".id")) 56 | 57 | reslong.pos = inner_join(reslong,poslong) 58 | reslong.neg = inner_join(reslong,neglong) 59 | ``` 60 | 61 | Overall proportion of negative findings is `r nrow(reslong.neg)/nrow(reslong)`. 62 | 63 | Overall proportion of positive findings is `r nrow(reslong.pos)/nrow(reslong)`. 64 | 65 | Table of lower tail for all observations: 66 | 67 | ```{r table_all_obs} 68 | print(xtabs(lt ~ method + scenario, 69 | reslong %>% group_by(scenario,method) %>% 70 | summarize(lt = mean(value < coverthresh))) %>% round(2)) 71 | ``` 72 | 73 | Table of lower tail of positive findings. Because of the unimodal assumption 74 | and the favoritism toward the null, this should assess problems with "over 75 | shrinkage" toward 0. 76 | 77 | ```{r table_pos} 78 | print(xtabs(lt ~ method + scenario, 79 | reslong.pos %>% group_by(scenario,method) %>% 80 | summarize(lt = mean(value < coverthresh))) %>% round(2)) 81 | ``` 82 | 83 | Table of lower tail of negative findings. This should indicate problems with 84 | tail behaviour of $g$. The uniform methods tend to over-shrink. 85 | 86 | ```{r table_neg} 87 | print(xtabs(lt ~ method + scenario,reslong.neg %>% 88 | group_by(scenario,method) %>% 89 | summarize(lt = mean(value < coverthresh))) %>% round(2)) 90 | ``` 91 | 92 | Compile some more summary tables in Latex format. 93 | 94 | ```{r write_latex_tables} 95 | save_latex_coverage_table = 96 | function(df, methodnames, filename, 97 | SCENARIONAMES = c("spiky","near-normal","flat-top","skew", 98 | "big-normal","bimodal"), 99 | switch = FALSE) { 100 | df$method <- factor(df$method,levels = methodnames) 101 | df$scenario <- factor(df$scenario,levels = SCENARIONAMES) 102 | mat <- as.matrix(xtabs(lt~method+scenario,df)) 103 | if (switch) 104 | mat <- 1 - mat 105 | mat <- xtable(mat,digits = rep(2,ncol(mat) + 1)) 106 | write(print(mat,sanitize.text.function = function (x) x, 107 | floating = FALSE,hline.after = NULL, 108 | add.to.row = list(pos = list(-1,0,nrow(mat)), 109 | command = c('\\toprule ', 110 | '\\midrule ', 111 | '\\bottomrule '))),file = filename) 112 | } 113 | 114 | save_latex_coverage_table(reslong.neg %>% group_by(scenario,method) %>% 115 | summarize(lt = mean(value < coverthresh)), 116 | c("ash.n","ash.u","ash.hu"), 117 | "table/coverage_neg.tex") 118 | save_latex_coverage_table(reslong.pos %>% group_by(scenario,method) %>% 119 | summarize(lt = mean(value < coverthresh)), 120 | c("ash.n","ash.u","ash.hu"), 121 | "table/coverage_pos.tex") 122 | save_latex_coverage_table(reslong %>% group_by(scenario,method) %>% 123 | summarize(lt = mean(value < coverthresh)), 124 | c("ash.n","ash.u","ash.hu"), 125 | "table/coverage_all.tex") 126 | save_latex_coverage_table(reslong.neg %>% group_by(scenario,method) %>% 127 | summarize(lt = mean(value < coverthresh)), 128 | c("ash.n.s","ash.u.s","ash.hu.s"), 129 | "table/coverage_neg_nopen.tex") 130 | save_latex_coverage_table(reslong.pos %>% group_by(scenario,method) %>% 131 | summarize(lt = mean(value < coverthresh)), 132 | c("ash.n.s","ash.u.s","ash.hu.s"), 133 | "table/coverage_pos_nopen.tex") 134 | save_latex_coverage_table(reslong %>% group_by(scenario,method) %>% 135 | summarize(lt = mean(value < coverthresh)), 136 | c("ash.n.s","ash.u.s","ash.hu.s"), 137 | "table/coverage_all_nopen.tex") 138 | ``` 139 | 140 | These tables show right tail instead of the left tail. 141 | 142 | ```{r write_latex_tables_right} 143 | save_latex_coverage_table(reslong.neg %>% group_by(scenario,method) %>% 144 | summarize(lt = mean(value < coverthresh)), 145 | c("ash.n","ash.u","ash.hu"), 146 | "table/scoverage_neg.tex",switch = TRUE) 147 | save_latex_coverage_table(reslong.pos %>% group_by(scenario,method) %>% 148 | summarize(lt = mean(value < coverthresh)), 149 | c("ash.n","ash.u","ash.hu"), 150 | "table/scoverage_pos.tex",switch = TRUE) 151 | save_latex_coverage_table(reslong %>% group_by(scenario,method) %>% 152 | summarize(lt = mean(value < coverthresh)), 153 | c("ash.n","ash.u","ash.hu"), 154 | "table/scoverage_all.tex",switch=TRUE) 155 | 156 | save_latex_coverage_table(reslong.neg %>% group_by(scenario,method) %>% 157 | summarize(lt = mean(value < coverthresh)), 158 | c("ash.n.s","ash.u.s","ash.hu.s"), 159 | "table/scoverage_neg_nopen.tex",switch = TRUE) 160 | save_latex_coverage_table(reslong.pos %>% group_by(scenario,method) %>% 161 | summarize(lt = mean(value < coverthresh)), 162 | c("ash.n.s","ash.u.s","ash.hu.s"), 163 | "table/scoverage_pos_nopen.tex",switch = TRUE) 164 | save_latex_coverage_table(reslong %>% group_by(scenario,method) %>% 165 | summarize(lt = mean(value < coverthresh)), 166 | c("ash.n.s","ash.u.s","ash.hu.s"), 167 | "table/scoverage_all_nopen.tex",switch = TRUE) 168 | ``` 169 | 170 | ## Session information 171 | 172 | ```{r info} 173 | sessionInfo() 174 | ``` 175 | -------------------------------------------------------------------------------- /analysis/summarize_dsc_znull.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Summarize distribution of LR statistic under null" 3 | author: "Matthew Stephens" 4 | date: '2015-10-26' 5 | --- 6 | 7 | **Last updated:** `r Sys.Date()` 8 | 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)` 10 | 11 | First, we load the necessary libraries. 12 | 13 | ```{r packages} 14 | library(ggplot2) 15 | library(dplyr) 16 | ``` 17 | 18 | ```{r chunk_options, include=FALSE} 19 | # Specify settings for displaying the plots in the rendered document. 20 | source("chunk-options.R") 21 | ``` 22 | 23 | Load and summarize the results of the data simulations. 24 | 25 | ```{r load_sim_results} 26 | load("../output/dsc-znull-files/res.znull.RData") 27 | out <- ungroup(res %>% group_by(scenario,method) %>% 28 | summarise(gt0 = mean(logLR > 0))) 29 | xtabs(gt0 ~ method + scenario,out) 30 | ``` 31 | 32 | Note that the ash.n.s conforms closely to the null expectation of 0.5 33 | chi2_0 + 0.5 chi2_1 from Stram and Lee, Biometrics. 34 | 35 | *Conjecture:* the uniform has the same asymptotic behaviour and the half 36 | uniform is like the sum of two of these? 37 | 38 | From this a 95% procedure would need to check for logLR > `r qchisq(0.9,df=1)/2`. 39 | 40 | ```{r scatterplots} 41 | ggplot(res %>% filter(logLR > 0) %>% filter(grepl("ash.hu",method)), 42 | aes(sample = logLR)) + 43 | facet_grid(scenario ~ method) + 44 | stat_qq(distribution = qchisq,dparams = list(df=1)) + 45 | geom_abline() + 46 | ggtitle("qqplot under null vs chisq-1; line slope=1") 47 | 48 | ggplot(res %>% filter(logLR > 0) %>% filter(grepl("ash.u",method)), 49 | aes(sample = logLR)) + 50 | facet_grid(scenario~method) + 51 | stat_qq(distribution = qchisq,dparams = list(df = 1)) + 52 | geom_abline(intercept = 0,slope = 0.5) + 53 | ggtitle("qqplot under null vs chisq-1; line slope=0.5") 54 | 55 | ggplot(res %>% filter(logLR > 0) %>% filter(grepl("ash.n",method)), 56 | aes(sample = logLR)) + 57 | facet_grid(scenario ~ method) + 58 | stat_qq(distribution = qchisq,dparams = list(df = 1)) + 59 | geom_abline(intercept = 0,slope = 0.5) + 60 | ggtitle("qqplot under null vs chisq-1; line slope=0.5") 61 | ``` 62 | 63 | ## Session information 64 | 65 | ```{r info} 66 | sessionInfo() 67 | ``` 68 | -------------------------------------------------------------------------------- /analysis/table/README.md: -------------------------------------------------------------------------------- 1 | This directory is used to store latextables created by summarize_coverage.Rmd 2 | -------------------------------------------------------------------------------- /analysis/table/coverage_all.tex: -------------------------------------------------------------------------------- 1 | % latex table generated in R 3.3.2 by xtable 1.8-2 package 2 | % Sun Jan 15 17:52:31 2017 3 | \begin{tabular}{rrrrrrr} 4 | \toprule & spiky & near-normal & flat-top & skew & big-normal & bimodal \\ 5 | \midrule ash.n & 0.10 & 0.06 & 0.05 & 0.06 & 0.00 & 0.04 \\ 6 | ash.u & 0.13 & 0.07 & 0.06 & 0.07 & 0.00 & 0.04 \\ 7 | ash.hu & 0.12 & 0.07 & 0.06 & 0.06 & 0.00 & 0.04 \\ 8 | \bottomrule \end{tabular} 9 | 10 | -------------------------------------------------------------------------------- /analysis/table/coverage_all_nopen.tex: -------------------------------------------------------------------------------- 1 | % latex table generated in R 3.3.2 by xtable 1.8-2 package 2 | % Sun Jan 15 17:52:34 2017 3 | \begin{tabular}{rrrrrrr} 4 | \toprule & spiky & near-normal & flat-top & skew & big-normal & bimodal \\ 5 | \midrule ash.n.s & 0.05 & 0.05 & 0.05 & 0.05 & 0.00 & 0.04 \\ 6 | ash.u.s & 0.06 & 0.05 & 0.05 & 0.06 & 0.00 & 0.04 \\ 7 | ash.hu.s & 0.12 & 0.08 & 0.08 & 0.08 & 0.00 & 0.07 \\ 8 | \bottomrule \end{tabular} 9 | 10 | -------------------------------------------------------------------------------- /analysis/table/coverage_neg.tex: -------------------------------------------------------------------------------- 1 | % latex table generated in R 3.3.2 by xtable 1.8-2 package 2 | % Sun Jan 15 17:52:28 2017 3 | \begin{tabular}{rrrrrrr} 4 | \toprule & spiky & near-normal & flat-top & skew & big-normal & bimodal \\ 5 | \midrule ash.n & 0.07 & 0.06 & 0.00 & 0.06 & 0.00 & 0.02 \\ 6 | ash.u & 0.12 & 0.10 & 0.07 & 0.09 & 0.00 & 0.06 \\ 7 | ash.hu & 0.13 & 0.13 & 0.08 & 0.07 & 0.00 & 0.06 \\ 8 | \bottomrule \end{tabular} 9 | 10 | -------------------------------------------------------------------------------- /analysis/table/coverage_neg_nopen.tex: -------------------------------------------------------------------------------- 1 | % latex table generated in R 3.3.2 by xtable 1.8-2 package 2 | % Sun Jan 15 17:52:31 2017 3 | \begin{tabular}{rrrrrrr} 4 | \toprule & spiky & near-normal & flat-top & skew & big-normal & bimodal \\ 5 | \midrule ash.n.s & 0.05 & 0.05 & 0.02 & 0.07 & 0.00 & 0.03 \\ 6 | ash.u.s & 0.11 & 0.08 & 0.10 & 0.08 & 0.00 & 0.06 \\ 7 | ash.hu.s & 0.11 & 0.08 & 0.09 & 0.06 & 0.00 & 0.06 \\ 8 | \bottomrule \end{tabular} 9 | 10 | -------------------------------------------------------------------------------- /analysis/table/coverage_pos.tex: -------------------------------------------------------------------------------- 1 | % latex table generated in R 3.3.2 by xtable 1.8-2 package 2 | % Sun Jan 15 17:52:29 2017 3 | \begin{tabular}{rrrrrrr} 4 | \toprule & spiky & near-normal & flat-top & skew & big-normal & bimodal \\ 5 | \midrule ash.n & 0.06 & 0.06 & 0.06 & 0.14 & 0.00 & 0.04 \\ 6 | ash.u & 0.07 & 0.07 & 0.07 & 0.16 & 0.00 & 0.05 \\ 7 | ash.hu & 0.08 & 0.08 & 0.07 & 0.08 & 0.00 & 0.05 \\ 8 | \bottomrule \end{tabular} 9 | 10 | -------------------------------------------------------------------------------- /analysis/table/coverage_pos_nopen.tex: -------------------------------------------------------------------------------- 1 | % latex table generated in R 3.3.2 by xtable 1.8-2 package 2 | % Sun Jan 15 17:52:31 2017 3 | \begin{tabular}{rrrrrrr} 4 | \toprule & spiky & near-normal & flat-top & skew & big-normal & bimodal \\ 5 | \midrule ash.n.s & 0.06 & 0.06 & 0.08 & 0.12 & 0.00 & 0.06 \\ 6 | ash.u.s & 0.07 & 0.07 & 0.08 & 0.12 & 0.00 & 0.05 \\ 7 | ash.hu.s & 0.66 & 0.40 & 0.48 & 0.46 & 0.00 & 0.18 \\ 8 | \bottomrule \end{tabular} 9 | 10 | -------------------------------------------------------------------------------- /analysis/table/scoverage_all.tex: -------------------------------------------------------------------------------- 1 | % latex table generated in R 3.3.2 by xtable 1.8-2 package 2 | % Sun Jan 15 17:52:37 2017 3 | \begin{tabular}{rrrrrrr} 4 | \toprule & spiky & near-normal & flat-top & skew & big-normal & bimodal \\ 5 | \midrule ash.n & 0.90 & 0.94 & 0.95 & 0.94 & 1.00 & 0.96 \\ 6 | ash.u & 0.87 & 0.93 & 0.94 & 0.93 & 1.00 & 0.96 \\ 7 | ash.hu & 0.88 & 0.93 & 0.94 & 0.94 & 1.00 & 0.96 \\ 8 | \bottomrule \end{tabular} 9 | 10 | -------------------------------------------------------------------------------- /analysis/table/scoverage_all_nopen.tex: -------------------------------------------------------------------------------- 1 | % latex table generated in R 3.3.2 by xtable 1.8-2 package 2 | % Sun Jan 15 17:52:40 2017 3 | \begin{tabular}{rrrrrrr} 4 | \toprule & spiky & near-normal & flat-top & skew & big-normal & bimodal \\ 5 | \midrule ash.n.s & 0.95 & 0.95 & 0.95 & 0.95 & 1.00 & 0.96 \\ 6 | ash.u.s & 0.94 & 0.95 & 0.95 & 0.94 & 1.00 & 0.96 \\ 7 | ash.hu.s & 0.88 & 0.92 & 0.92 & 0.92 & 1.00 & 0.93 \\ 8 | \bottomrule \end{tabular} 9 | 10 | -------------------------------------------------------------------------------- /analysis/table/scoverage_neg.tex: -------------------------------------------------------------------------------- 1 | % latex table generated in R 3.3.2 by xtable 1.8-2 package 2 | % Sun Jan 15 17:52:34 2017 3 | \begin{tabular}{rrrrrrr} 4 | \toprule & spiky & near-normal & flat-top & skew & big-normal & bimodal \\ 5 | \midrule ash.n & 0.93 & 0.94 & 1.00 & 0.94 & 1.00 & 0.98 \\ 6 | ash.u & 0.88 & 0.90 & 0.93 & 0.91 & 1.00 & 0.94 \\ 7 | ash.hu & 0.87 & 0.87 & 0.92 & 0.93 & 1.00 & 0.94 \\ 8 | \bottomrule \end{tabular} 9 | 10 | -------------------------------------------------------------------------------- /analysis/table/scoverage_neg_nopen.tex: -------------------------------------------------------------------------------- 1 | % latex table generated in R 3.3.2 by xtable 1.8-2 package 2 | % Sun Jan 15 17:52:37 2017 3 | \begin{tabular}{rrrrrrr} 4 | \toprule & spiky & near-normal & flat-top & skew & big-normal & bimodal \\ 5 | \midrule ash.n.s & 0.95 & 0.95 & 0.98 & 0.93 & 1.00 & 0.97 \\ 6 | ash.u.s & 0.89 & 0.92 & 0.90 & 0.92 & 1.00 & 0.94 \\ 7 | ash.hu.s & 0.89 & 0.92 & 0.91 & 0.94 & 1.00 & 0.94 \\ 8 | \bottomrule \end{tabular} 9 | 10 | -------------------------------------------------------------------------------- /analysis/table/scoverage_pos.tex: -------------------------------------------------------------------------------- 1 | % latex table generated in R 3.3.2 by xtable 1.8-2 package 2 | % Sun Jan 15 17:52:34 2017 3 | \begin{tabular}{rrrrrrr} 4 | \toprule & spiky & near-normal & flat-top & skew & big-normal & bimodal \\ 5 | \midrule ash.n & 0.94 & 0.94 & 0.94 & 0.86 & 1.00 & 0.96 \\ 6 | ash.u & 0.93 & 0.93 & 0.93 & 0.84 & 1.00 & 0.95 \\ 7 | ash.hu & 0.92 & 0.92 & 0.93 & 0.92 & 1.00 & 0.95 \\ 8 | \bottomrule \end{tabular} 9 | 10 | -------------------------------------------------------------------------------- /analysis/table/scoverage_pos_nopen.tex: -------------------------------------------------------------------------------- 1 | % latex table generated in R 3.3.2 by xtable 1.8-2 package 2 | % Sun Jan 15 17:52:37 2017 3 | \begin{tabular}{rrrrrrr} 4 | \toprule & spiky & near-normal & flat-top & skew & big-normal & bimodal \\ 5 | \midrule ash.n.s & 0.94 & 0.94 & 0.92 & 0.88 & 1.00 & 0.94 \\ 6 | ash.u.s & 0.93 & 0.93 & 0.92 & 0.88 & 1.00 & 0.95 \\ 7 | ash.hu.s & 0.34 & 0.60 & 0.52 & 0.54 & 1.00 & 0.82 \\ 8 | \bottomrule \end{tabular} 9 | 10 | -------------------------------------------------------------------------------- /analysis/template.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Untitled" 3 | author: "First Last" 4 | date: YYYY-MM-DD 5 | --- 6 | 7 | **Last updated:** `r Sys.Date()` 8 | 9 | **Code version:** `r system("git log -1 --format='%H'", intern = TRUE)` 10 | 11 | First, we load the necessary libraries. 12 | 13 | ```{r packages} 14 | library(ggplot2) # For illustration only. 15 | ``` 16 | 17 | ```{r chunk_options, include=FALSE} 18 | source("chunk-options.R") 19 | ``` 20 | 21 | This is a template for writing reports with R Markdown, copied from @jdblischak. 22 | 23 | ## Section title 24 | 25 | You can create a new file from the template using the command `cp`. 26 | 27 | ```bash 28 | cp analysis/template.Rmd analysis/newfile.Rmd 29 | ``` 30 | 31 | ## Session information 32 | 33 | ```{r info} 34 | sessionInfo() 35 | ``` 36 | -------------------------------------------------------------------------------- /ash-packrat.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Makefile 16 | -------------------------------------------------------------------------------- /code/Makefile: -------------------------------------------------------------------------------- 1 | all: dsc-shrink dsc-null dsc-opt 2 | 3 | dsc-shrink: ../output/dsc-shrink-files/res.RData 4 | dsc-null: ../output/dsc-znull-files/res.znull.RData 5 | dsc-opt: ../output/dsc-opt-files/dsc_opt.RData 6 | 7 | ../output/dsc-shrink-files/res.RData: 8 | export R_LIBS_USER=../../packrat/lib/*/*; cd dsc-shrink; R CMD BATCH run_dsc.R 9 | 10 | ../output/dsc-znull-files/res.znull.RData: 11 | export R_LIBS_USER=../../packrat/lib/*/*; cd dsc-shrink; R CMD BATCH run_dsc_znull.R 12 | 13 | ../output/dsc-opt-files/dsc_opt.RData: 14 | export R_LIBS_USER=../../packrat/lib/*/*; cd dsc-opt; R CMD BATCH run_dsc_opt.R 15 | -------------------------------------------------------------------------------- /code/README.md: -------------------------------------------------------------------------------- 1 | This directory is intended for code, possibly long-running, that 2 | creates output in `output`. 3 | -------------------------------------------------------------------------------- /code/dsc-opt/methods/ash.multiopt.wrapper.R: -------------------------------------------------------------------------------- 1 | #' @title wrapper for ash for shrinkage DSC 2 | #' 3 | #' @description Runs ash to compute betahat values 4 | #' @details None 5 | #' 6 | #' @param input a list with elements betahat and sebetahat 7 | #' @param args a list containing other additional arguments to ash 8 | #' 9 | #' @return output a list containing a vector of loglikelihoods from multiple runs of ash 10 | #' 11 | library(ashr) 12 | 13 | ash.multiopt.wrapper=function(input,args=NULL){ 14 | if(is.null(args)){ #set shrink so that the likelihood is unpenalized 15 | args=list(mixcompdist="halfuniform",method="shrink") 16 | } 17 | loglik=rep(0,2) 18 | 19 | #first run is with interior point 20 | res=do.call(ash, args= c(list(betahat=input$betahat,sebetahat=input$sebetahat,optmethod="mixIP"),args)) 21 | loglik[1]= get_loglik(res) 22 | 23 | #now with EM 24 | res=do.call(ash, args= c(list(betahat=input$betahat,sebetahat=input$sebetahat,optmethod="cxxMixSquarem"),args)) 25 | loglik[2]= get_loglik(res) 26 | 27 | 28 | #first run is with non-random start (ie default start) 29 | #res=do.call(ash, args= c(list(betahat=input$betahat,sebetahat=input$sebetahat,randomstart=FALSE),args)) 30 | #loglik[1]= get_loglik(res) 31 | 32 | #for(i in 2:11){ 33 | # res = do.call(ash, args= c(list(betahat=input$betahat,sebetahat=input$sebetahat,randomstart=TRUE),args)) 34 | # loglik[i]= get_loglik(res) 35 | #} 36 | return(list(loglik=loglik)) 37 | } 38 | -------------------------------------------------------------------------------- /code/dsc-opt/run_dsc_opt.R: -------------------------------------------------------------------------------- 1 | library(dscr) 2 | dscr::source_dir("methods") 3 | 4 | ###### Initialize ####### 5 | 6 | dsc_opt=new_dsc("opt","../../output/dsc-opt-files") 7 | 8 | ###### Add Scenarios ##### 9 | dscr::source_dir("../dsc-shrink/datamakers") 10 | source("../dsc-shrink/add_named_scenarios.R") 11 | add_named_scenarios(dsc_opt,c("spiky","near-normal","flat-top","skew", 12 | "big-normal","bimodal")) 13 | 14 | ###### Add Methods ##### 15 | 16 | add_method(dsc_opt,"ash.u",ash.multiopt.wrapper, 17 | args = list(mixcompdist = "uniform",method = "shrink")) 18 | add_method(dsc_opt,"ash.hu",ash.multiopt.wrapper, 19 | args = list(mixcompdist = "halfuniform",method = "shrink")) 20 | add_method(dsc_opt,"ash.n",ash.multiopt.wrapper, 21 | args = list(mixcompdist = "normal",method = "shrink")) 22 | 23 | ####### Define Score and Add it ####### 24 | 25 | score = function(data, output) { 26 | # This loglik is a vector of length 2 with logliks from two 27 | # different optimizations. 28 | x = output$loglik 29 | return(list(diff1 = x[1] - x[2])) 30 | } 31 | 32 | add_score(dsc_opt,score) 33 | 34 | ######## Run the DSC ################# 35 | 36 | res_opt = run_dsc(dsc_opt) 37 | save(dsc_opt,file = "../../output/dsc-opt-files/dsc_opt.RData") 38 | -------------------------------------------------------------------------------- /code/dsc-shrink/add_methods.R: -------------------------------------------------------------------------------- 1 | dscr::source_dir("methods") 2 | 3 | add_method(dsc_shrink,name="ash.hu.nocxx",fn =ash.wrapper,args=list(mixcompdist="halfunif"),outputtype = "ash_output") 4 | add_method(dsc_shrink,name="ash.hu",fn =ash.wrapper,args=list(mixcompdist="halfunif"),outputtype = "ash_output") 5 | add_method(dsc_shrink,name="ash.u",fn =ash.wrapper,args=list(mixcompdist="unif"),outputtype = "ash_output") 6 | add_method(dsc_shrink,name="ash.n",fn =ash.wrapper,args=list(mixcompdist="normal"),outputtype = "ash_output") 7 | 8 | 9 | add_method(dsc_shrink,name="bayes",fn=bayes.wrapper,gold_flag=TRUE,outputtype = "ash_output") 10 | 11 | # try smaller null weight 12 | add_method(dsc_shrink,name="ash.hu.nw2",fn =ash.wrapper,args=list(mixcompdist="halfunif",nullweight=2),outputtype = "ash_output") 13 | add_method(dsc_shrink,name="ash.u.nw2",fn =ash.wrapper,args=list(mixcompdist="unif",nullweight=2),outputtype = "ash_output") 14 | add_method(dsc_shrink,name="ash.n.nw2",fn =ash.wrapper,args=list(mixcompdist="normal",nullweight=2),outputtype = "ash_output") 15 | 16 | 17 | add_method(dsc_shrink,name="ash.hu.s",fn =ash.wrapper,args=list(mixcompdist="halfunif",method="shrink"),outputtype = "ash_output") 18 | add_method(dsc_shrink,name="ash.u.s",fn =ash.wrapper,args=list(mixcompdist="unif",method="shrink"),outputtype = "ash_output") 19 | add_method(dsc_shrink,name="ash.n.s",fn =ash.wrapper,args=list(mixcompdist="normal",method="shrink"),outputtype = "ash_output") 20 | 21 | 22 | add_method(dsc_shrink,name="mixfdr.tnull", fn=mixfdr.wrapper, args = list(theonull=TRUE),outputtype = "mixfdr_output") 23 | add_method(dsc_shrink,name="mixfdr.enull", fn=mixfdr.wrapper, args = list(theonull=FALSE),outputtype = "mixfdr_output") 24 | 25 | add_method(dsc_shrink,name="locfdr", fn=locfdr.wrapper,outputtype = "locfdr_output") 26 | add_method(dsc_shrink,name="qvalue", fn=qvalue.wrapper,outputtype = "qvalue_output") 27 | 28 | #add_method(dsc_shrink,name="mixfdr.tnull.J10", fn=mixfdr.wrapper, args = list(theonull=TRUE,J=10),outputtype = "mixfdr_output") 29 | #add_method(dsc_shrink,name="mixfdr.enull.J10", fn=mixfdr.wrapper, args = list(theonull=FALSE,J=10),outputtype = "mixfdr_output") 30 | #add_method(dsc_shrink,name="mixfdr.tnull.J10P0", fn=mixfdr.wrapper, args = list(theonull=TRUE,J=10,P=0),outputtype = "mixfdr_output") 31 | -------------------------------------------------------------------------------- /code/dsc-shrink/add_methods.null.R: -------------------------------------------------------------------------------- 1 | dscr::source_dir("methods") 2 | 3 | add_method(dsc_znull,name="ash.hu",fn =ash.wrapper,args=list(mixcompdist="halfunif"),outputtype = "ash_output") 4 | add_method(dsc_znull,name="ash.u",fn =ash.wrapper,args=list(mixcompdist="unif"),outputtype = "ash_output") 5 | add_method(dsc_znull,name="ash.n",fn =ash.wrapper,args=list(mixcompdist="normal"),outputtype = "ash_output") 6 | 7 | add_method(dsc_znull,name="ash.hu.s",fn =ash.wrapper,args=list(mixcompdist="halfunif",method="shrink"),outputtype = "ash_output") 8 | add_method(dsc_znull,name="ash.u.s",fn =ash.wrapper,args=list(mixcompdist="unif",method="shrink"),outputtype = "ash_output") 9 | add_method(dsc_znull,name="ash.n.s",fn =ash.wrapper,args=list(mixcompdist="normal",method="shrink"),outputtype = "ash_output") 10 | 11 | add_method(dsc_znull,name="ash.hu.nw2",fn =ash.wrapper,args=list(mixcompdist="halfunif",nullweight=2),outputtype = "ash_output") 12 | add_method(dsc_znull,name="ash.u.nw2",fn =ash.wrapper,args=list(mixcompdist="unif",nullweight=2),outputtype = "ash_output") 13 | add_method(dsc_znull,name="ash.n.nw2",fn =ash.wrapper,args=list(mixcompdist="normal",nullweight=2),outputtype = "ash_output") 14 | 15 | 16 | add_method(dsc_znull,name="ash.hu.s.gmfine",fn =ash.wrapper,args=list(mixcompdist="halfunif",method="shrink",gridmult=2^.25),outputtype = "ash_output") 17 | add_method(dsc_znull,name="ash.u.s.gmfine",fn =ash.wrapper,args=list(mixcompdist="unif",method="shrink",gridmult=2^.25),outputtype = "ash_output") 18 | add_method(dsc_znull,name="ash.n.s.gmfine",fn =ash.wrapper,args=list(mixcompdist="normal",method="shrink",gridmult=2^.25),outputtype = "ash_output") 19 | 20 | 21 | -------------------------------------------------------------------------------- /code/dsc-shrink/add_named_scenarios.R: -------------------------------------------------------------------------------- 1 | dscr::source_dir("datamakers") 2 | 3 | gdef = list(spiky=normalmix(c(.4,.2,.2,.2),c(0,0,0,0),c(.25,.5,1,2)), 4 | skew=normalmix(c(1/4,1/4,1/3,1/6),c(-2,-1,0,1),c(2,1.5,1,1)), 5 | bignormal=normalmix(c(1),c(0),c(4)), 6 | bimodal=normalmix(c(0.5,0.5),c(-2,2),c(1,1)), 7 | flat_top=normalmix(rep(1/7,7),c(-1.5,-1,-0.5,0,0.5,1,1.5),rep(0.5,7)), 8 | near_normal=normalmix(c(2/3,1/3),c(0,0),c(1,2)) 9 | ) 10 | 11 | add_named_scenarios = function(dsc,names,nsamp=1000,min_pi0=0,max_pi0=1,suffix=""){ 12 | 13 | if("near-normal" %in% names){ 14 | add_scenario(dsc,name=paste0("near-normal",suffix), 15 | fn=rnormmix_datamaker, 16 | args=list( 17 | g=gdef$near_normal, 18 | min_pi0=min_pi0, 19 | max_pi0=max_pi0, 20 | nsamp=nsamp, 21 | betahatsd=1 22 | ), 23 | seed=1:100) 24 | } 25 | 26 | if("flat-top" %in% names){ 27 | add_scenario(dsc,name=paste0("flat-top",suffix), 28 | fn=rnormmix_datamaker, 29 | args=list( 30 | g=gdef$flat_top, 31 | min_pi0=min_pi0, 32 | max_pi0=max_pi0, 33 | nsamp=nsamp, 34 | betahatsd=1 35 | ), 36 | seed=1:100) 37 | } 38 | 39 | if("skew" %in% names){ 40 | add_scenario(dsc,name=paste0("skew",suffix), 41 | fn=rnormmix_datamaker, 42 | args=list( 43 | g=gdef$skew, 44 | min_pi0=min_pi0, 45 | max_pi0=max_pi0, 46 | nsamp=nsamp, 47 | betahatsd=1 48 | ), 49 | seed=1:100) 50 | } 51 | 52 | if("spiky" %in% names){ 53 | add_scenario(dsc,name=paste0("spiky",suffix), 54 | fn=rnormmix_datamaker, 55 | args=list( 56 | g=gdef$spiky, 57 | min_pi0=min_pi0, 58 | max_pi0=max_pi0, 59 | nsamp=nsamp, 60 | betahatsd=1 61 | ), 62 | seed=1:100) 63 | } 64 | 65 | if("big-normal" %in% names){ 66 | add_scenario(dsc,name=paste0("big-normal",suffix), 67 | fn=rnormmix_datamaker, 68 | args=list( 69 | g=gdef$bignormal, 70 | min_pi0=min_pi0, 71 | max_pi0=max_pi0, 72 | nsamp=nsamp, 73 | betahatsd=1 74 | ), 75 | seed=1:100) 76 | } 77 | 78 | if("bimodal" %in% names){ 79 | add_scenario(dsc,name=paste0("bimodal",suffix), 80 | fn=rnormmix_datamaker, 81 | args=list( 82 | g=gdef$bimodal, 83 | min_pi0=min_pi0, 84 | max_pi0=max_pi0, 85 | nsamp=nsamp, 86 | betahatsd=1 87 | ), 88 | seed=1:100) 89 | } 90 | 91 | } 92 | -------------------------------------------------------------------------------- /code/dsc-shrink/add_scenarios.null.R: -------------------------------------------------------------------------------- 1 | dscr::source_dir("datamakers") 2 | 3 | 4 | #Now, for each scenario create an element of scenarios of the following form 5 | add_scenario(dsc_znull,name="znull.1000", 6 | fn=null_z_datamaker, 7 | args=list( 8 | nsamp=1000 9 | ), 10 | seed=1:500) 11 | 12 | add_scenario(dsc_znull,name="znull.100", 13 | fn=null_z_datamaker, 14 | args=list( 15 | nsamp=100 16 | ), 17 | seed=1:500) 18 | -------------------------------------------------------------------------------- /code/dsc-shrink/datamakers/datamaker.R: -------------------------------------------------------------------------------- 1 | library(ashr) 2 | 3 | #' @title datamaker for FDR DSC 4 | #' 5 | #' @description Simulates data for a DSC for methods to compute FDR/q values 6 | #' @details None 7 | #' 8 | #' @param args A list of the remaining arguments, which in this case is 9 | #' \item{nsamp}{The number of samples to create} 10 | #' \item{g}{An object of class normalmix specifying the mixture distribution from 11 | #' which non-null beta values are to be simulated} 12 | #' \item{min_pi0}{The minimum value of pi0, the proportion of true nulls} 13 | #' \item{max_pi0}{The maximum value of pi0, the proportion of true null} 14 | #' \item{betahatsd}{The standard deviation of betahat to use} 15 | #' 16 | #' @return a list with the following elements 17 | #' \item{meta}{A list containing the meta data. In this case beta} 18 | #' \item{input}{A list containing the input data; in this case the set of betahat values and their standard errors} 19 | #' 20 | rnormmix_datamaker = function(args){ 21 | #here is the meat of the function that needs to be defined for each dsc to be done 22 | pi0 = runif(1,args$min_pi0,args$max_pi0) #generate the proportion of true nulls randomly 23 | 24 | k = ncomp(args$g) 25 | comp = sample(1:k,args$nsamp,mixprop(args$g),replace=TRUE) #randomly draw a component 26 | isnull = (runif(args$nsamp,0,1) < pi0) 27 | beta = ifelse(isnull, 0,rnorm(args$nsamp,comp_mean(args$g)[comp],comp_sd(args$g)[comp])) 28 | sebetahat = args$betahatsd 29 | betahat = beta + rnorm(args$nsamp,0,sebetahat) 30 | meta=list(g1=args$g,beta=beta,pi0=pi0) 31 | input=list(betahat=betahat,sebetahat=sebetahat,df=NULL) 32 | 33 | #end of meat of function 34 | 35 | data = list(meta=meta,input=input) 36 | 37 | return(data) 38 | 39 | } 40 | -------------------------------------------------------------------------------- /code/dsc-shrink/datamakers/null_t_datamaker.R: -------------------------------------------------------------------------------- 1 | null_t_datamaker = function(args){ 2 | 3 | #assuming true standard deviation is 1, then betahatsd is rchisq 4 | sebetahat = sqrt(rchisq(args$nsamp, args$df)/args$df) 5 | 6 | beta = rep(0,args$nsamp) 7 | betahat = beta + rnorm(args$nsamp,0,1) 8 | 9 | 10 | meta=list(beta=beta,pi0=1) 11 | input=list(betahat=betahat,sebetahat=sebetahat,df=args$df) 12 | 13 | #end of meat of function 14 | 15 | data = list(meta=meta,input=input) 16 | 17 | return(data) 18 | 19 | } -------------------------------------------------------------------------------- /code/dsc-shrink/datamakers/null_z_datamaker.R: -------------------------------------------------------------------------------- 1 | null_z_datamaker = function(args){ 2 | 3 | #assuming true standard deviation is 1, then betahatsd is rchisq 4 | sebetahat = rep(1,args$nsamp) 5 | 6 | beta = rep(0,args$nsamp) 7 | betahat = beta + rnorm(args$nsamp,0,1) 8 | 9 | 10 | meta=list(beta=beta,pi0=1) 11 | input=list(betahat=betahat,sebetahat=sebetahat,df=NULL) 12 | 13 | #end of meat of function 14 | 15 | data = list(meta=meta,input=input) 16 | 17 | return(data) 18 | 19 | } -------------------------------------------------------------------------------- /code/dsc-shrink/methods/ash.wrapper.R: -------------------------------------------------------------------------------- 1 | #' @title wrapper for ash for shrinkage DSC 2 | #' 3 | #' @description Runs ash to compute betahat values 4 | #' @details None 5 | #' 6 | #' @param input a list with elements betahat and sebetahat 7 | #' @param args a list containing other additional arguments to ash 8 | #' 9 | #' @return output a list with the following elements 10 | #' \item{beta_est}{vector containing point estimates for beta} 11 | #' 12 | library(ashr) 13 | 14 | ash.wrapper=function(input,args=NULL){ 15 | if(is.null(args)){ 16 | args=list(mixcompdist="halfuniform",method="fdr") 17 | } 18 | res = do.call(ash, args= c(list(betahat=input$betahat,sebetahat=input$sebetahat,df=input$df),args)) 19 | return(res) 20 | } 21 | 22 | #uses ash function to compute bayes rule by running it with true g 23 | bayes.wrapper = function(input, meta, args=NULL){ 24 | pi0 = meta$pi0 25 | g1 = meta$g1 26 | g=g1 27 | #create g by adding null component to g1 28 | g$pi= c(pi0, (1-pi0)*g1$pi) 29 | g$mean = c(0,g1$mean) 30 | g$sd = c(0,g1$sd) 31 | #do computations for bayes rule by running ash with g, with 0 iterations 32 | res = do.call(ash, args= list(betahat=input$betahat,sebetahat=input$sebetahat,df=input$df,g=g,fixg=TRUE)) 33 | return(res) 34 | } 35 | 36 | ash2beta_est =function(output){ 37 | return (list(beta_est=get_pm(output))) 38 | } 39 | 40 | ash2pi0_est =function(output){ 41 | return (list(pi0_est=get_pi0(output))) 42 | } 43 | 44 | ash2fitted.g = function(output){ 45 | return (list(fitted.g=get_fitted_g(output))) 46 | } 47 | -------------------------------------------------------------------------------- /code/dsc-shrink/methods/locfdr.wrapper.R: -------------------------------------------------------------------------------- 1 | #' @title wrapper for locfdr 2 | #' 3 | #' @description Runs locfdr on z scores 4 | #' @details None 5 | #' 6 | #' @param input a list with elements betahat and sebetahat 7 | #' @param args a list containing other additional arguments to locfdr 8 | #' 9 | #' @return output a list 10 | #' 11 | library(locfdr) 12 | 13 | locfdr.wrapper=function(input,args=NULL){ 14 | res = try(do.call(locfdr, args= c(list(zz=input$betahat/input$sebetahat, nulltype=0, plot=0),args))) 15 | if(inherits(res,"try-error")){res=list(fdr=rep(NA,length(input$betahat)),fp0=matrix(NA,nrow=6,ncol=3))} 16 | return(res) 17 | } 18 | 19 | 20 | locfdr2pi0_est = function(output){ 21 | return (list(pi0_est=output$fp0[1,3])) 22 | } 23 | 24 | 25 | -------------------------------------------------------------------------------- /code/dsc-shrink/methods/mixfdr.wrapper.R: -------------------------------------------------------------------------------- 1 | #' @title wrapper for mixfdr for shrinkage DSC 2 | #' 3 | #' @description Runs mixfdr to compute effect estimates from z scores (computed from betahat and sebetahat) 4 | #' @details None 5 | #' 6 | #' @param input a list with elements betahat and sebetahat 7 | #' @param args a list containing other additional arguments to ash 8 | #' 9 | #' @return output a list with the following elements 10 | #' \item{beta_est}{vector containing point estimates for beta} 11 | #' \item{pi0_est}{scalar estimate of pi0} 12 | #' 13 | library(mixfdr) 14 | 15 | mixfdr.wrapper=function(input,args=NULL){ 16 | res = try(do.call(mixFdr, args= c(list(x=input$betahat/input$sebetahat, noiseSD=1, plots=FALSE),args))) 17 | if(inherits(res,"try-error")){res=list(effectSize=rep(NA,length(input$betahat)),pi0_est=NA,pi=NA,mu=NA,sigma=NA,noiseSD=NA)} 18 | return(list(res=res,input=input)) 19 | } 20 | 21 | mixfdr2beta_est = function(output){ 22 | return (list(beta_est=output$res$effectSize*output$input$sebetahat)) 23 | } 24 | 25 | mixfdr2pi0_est = function(output){ 26 | return (list(pi0_est=output$res$pi[1])) 27 | } 28 | 29 | mixfdr2fitted.g = function(output){ 30 | return (list(fitted.g= 31 | normalmix(output$res$pi,output$res$mu, 32 | sqrt(output$res$sigma^2-output$res$noiseSD^2)))) 33 | } 34 | 35 | -------------------------------------------------------------------------------- /code/dsc-shrink/methods/qvalue.wrapper.R: -------------------------------------------------------------------------------- 1 | #' @title wrapper for qvalue for FDR DSC 2 | #' 3 | #' @description Runs qvalue to compute FDR/q values 4 | #' @details None 5 | #' 6 | #' @param input a list with elements betahat and sebetahat 7 | #' @param add.args a list with additional arguments to qvalue 8 | #' 9 | #' @return output a list with the following elements 10 | #' \item{qvalue}{vector of qvalues, with jth element being the q value corresponding to (betahat_j,sebetahat_j) 11 | #' 12 | #' 13 | library(qvalue) 14 | 15 | qvalue.wrapper = function(input,args=NULL){ 16 | zscore = input$betahat/input$sebetahat 17 | pvalue = 2*pnorm(-abs(zscore),lower.tail=TRUE) 18 | res = qvalue(pvalue) 19 | return(res) 20 | } 21 | 22 | qvalue2pi0_est = function(output){ 23 | if(!is.list(output)){return(list(pi0_est=NA))} #deal with case where ERROR thrown 24 | else{ 25 | return (list(pi0_est=output$pi0)) 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /code/dsc-shrink/run_dsc.R: -------------------------------------------------------------------------------- 1 | library(dscr) 2 | library(ashr) 3 | sessionInfo() 4 | 5 | dsc_shrink=new_dsc("shrink","../../output/dsc-shrink-files") 6 | source("add_named_scenarios.R") 7 | add_named_scenarios(dsc_shrink,c("spiky","near-normal","flat-top","skew","big-nqormal","bimodal","near-normal-nn")) 8 | add_named_scenarios(dsc_shrink,c("spiky","near-normal","flat-top","skew","big-normal","bimodal"),min_pi0=0,max_pi0=0,suffix="-nn") 9 | 10 | source("add_methods.R") 11 | 12 | add_output_parser(dsc_shrink,"ash2beta",ash2beta_est,"ash_output", 13 | "beta_est_output") 14 | add_output_parser(dsc_shrink,"mixfdr2beta",mixfdr2beta_est,"mixfdr_output","beta_est_output") 15 | 16 | add_output_parser(dsc_shrink,"ash2pi0",ash2pi0_est,"ash_output","pi0_est_output") 17 | add_output_parser(dsc_shrink,"mixfdr2pi0",mixfdr2pi0_est,"mixfdr_output","pi0_est_output") 18 | add_output_parser(dsc_shrink,"locfdr2pi0",locfdr2pi0_est,"locfdr_output","pi0_est_output") 19 | add_output_parser(dsc_shrink,"qvalue2pi0",qvalue2pi0_est,"qvalue_output","pi0_est_output") 20 | 21 | add_output_parser(dsc_shrink,"ash2fitted.g",ash2fitted.g,"ash_output","g_output") 22 | add_output_parser(dsc_shrink,"mixfdr2fitted.g",mixfdr2fitted.g,"mixfdr_output","g_output") 23 | 24 | source("score.R") 25 | add_score(dsc_shrink,score,"beta_err","beta_est_output") 26 | add_score(dsc_shrink,score2,"pi0_score","pi0_est_output") 27 | add_score(dsc_shrink,score3,"cdf_score","g_output") 28 | add_score(dsc_shrink,score_neg,"negprob","ash_output") #just extract the negativeprobs 29 | add_score(dsc_shrink,score_pos,"posprob","ash_output") #just extracts the positiveprobs 30 | add_score(dsc_shrink,score_fdr,"fdr","mixfdr_output") #just extracts the fdr 31 | add_score(dsc_shrink,score_betahat,"betahat","mixfdr_output") #just extracts betahat 32 | add_score(dsc_shrink,score_lfsr,"lfsr","ash_output") #just extracts the lfsr 33 | add_score(dsc_shrink,score_lfdr,"lfdr","ash_output") #just extracts the lfdr 34 | 35 | res=run_dsc(dsc_shrink) 36 | save(res,dsc_shrink,file="../../output/dsc-shrink-files/res.RData") 37 | -------------------------------------------------------------------------------- /code/dsc-shrink/run_dsc_mini.R: -------------------------------------------------------------------------------- 1 | library(dscr) 2 | library(ashr) 3 | sessionInfo() 4 | 5 | dsc_shrink_mini=new_dsc("shrink_mini","../../output/dsc-shrink-mini-files") 6 | source("add_named_scenarios.R") 7 | add_named_scenarios(dsc_shrink_mini,c("spiky")) 8 | dscr::source_dir("methods") 9 | add_method(dsc_shrink_mini,name="ash.n",fn =ash.wrapper,args=list(mixcompdist="normal"),outputtype = "ash_output") 10 | add_method(dsc_shrink_mini,name="mixfdr.tnull", fn=mixfdr.wrapper, args = list(theonull=TRUE),outputtype = "mixfdr_output") 11 | 12 | add_output_parser(dsc_shrink_mini,"ash2beta",ash2beta_est,"ash_output","beta_est_output") 13 | add_output_parser(dsc_shrink_mini,"mixfdr2beta",mixfdr2beta_est,"mixfdr_output","beta_est_output") 14 | 15 | add_output_parser(dsc_shrink_mini,"ash2pi0",ash2pi0_est,"ash_output","pi0_est_output") 16 | #add_output_parser(dsc_shrink_mini,"mixfdr2pi0",mixfdr2pi0_est,"mixfdr_output","pi0_est_output") 17 | #add_output_parser(dsc_shrink_mini,"locfdr2pi0",locfdr2pi0_est,"locfdr_output","pi0_est_output") 18 | #add_output_parser(dsc_shrink_mini,"qvalue2pi0",qvalue2pi0_est,"qvalue_output","pi0_est_output") 19 | 20 | #add_output_parser(dsc_shrink_mini,"ash2fitted.g",ash2fitted.g,"ash_output","g_output") 21 | #add_output_parser(dsc_shrink_mini,"mixfdr2fitted.g",mixfdr2fitted.g,"mixfdr_output","g_output") 22 | 23 | 24 | 25 | source("score.R") 26 | add_score(dsc_shrink_mini,score,"beta_err","beta_est_output") 27 | # add_score(dsc_shrink,score2,"pi0_score","pi0_est_output") 28 | # add_score(dsc_shrink,score3,"cdf_score","g_output") 29 | # add_score(dsc_shrink,score_neg,"negprob","ash_output") #just extract the negativeprobs 30 | # add_score(dsc_shrink,score_pos,"posprob","ash_output") #just extracts the positiveprobs 31 | # add_score(dsc_shrink,score_fdr,"fdr","mixfdr_output") #just extracts the fdr 32 | # add_score(dsc_shrink,score_betahat,"betahat","mixfdr_output") #just extracts the fdr 33 | # add_score(dsc_shrink,score_lfsr,"lfsr","ash_output") #just extracts the lfsr 34 | # add_score(dsc_shrink,score_lfdr,"lfdr","ash_output") #just extracts the lfdr 35 | 36 | 37 | res=run_dsc(dsc_shrink_mini) 38 | save(res,dsc_shrink_mini,file="../../output/dsc-shrink-files/res.mini.RData") 39 | 40 | 41 | -------------------------------------------------------------------------------- /code/dsc-shrink/run_dsc_znull.R: -------------------------------------------------------------------------------- 1 | #the aim of this dsc is to assess the distribution of the logLR 2 | #value for ash under the null that all effects are 0 3 | 4 | library(dscr) 5 | 6 | dsc_znull=new_dsc("znull","../../output/dsc-znull-files") 7 | source("add_scenarios.null.R") 8 | source("add_methods.null.R") 9 | 10 | source("score.R") 11 | add_score(dsc_znull,score_logLR,"logLR","ash_output") 12 | 13 | res=run_dsc(dsc_znull) 14 | save(res,dsc_znull,file="../../output/dsc-znull-files/res.znull.RData") 15 | 16 | -------------------------------------------------------------------------------- /code/dsc-shrink/score.R: -------------------------------------------------------------------------------- 1 | #' @title compute score for shrinkage DSC 2 | #' 3 | #' @description Outputs the RMSE and MAE of estimated beta values 4 | #' @details None 5 | #' 6 | #' @param data 7 | #' @param output 8 | #' 9 | #' @return score a list with 10 | #' \item{RMSE}{root mean squared error of estimated beta values} 11 | #' \item{MAE}{Median absolute error of estimated beta values} 12 | #' 13 | #' 14 | score = function(data, output){ 15 | return(list(RMSE=sqrt(mean((data$meta$beta-output$beta_est)^2)), 16 | MAE = median(abs(data$meta$beta-output$beta_est)))) 17 | } 18 | 19 | score2 = function(data, output){ 20 | return(list(pi0 = data$meta$pi0, 21 | pi0_est = output$pi0_est)) 22 | } 23 | 24 | score3 = function(data, output){ 25 | return(c(S=pcdf_post(output$fitted.g,data$meta$beta, 26 | set_data(data$input$betahat, 27 | data$input$sebetahat)))) 28 | } 29 | 30 | score_neg = function(data, output){ 31 | return(c(S=get_np(output))) 32 | } 33 | 34 | score_pos = function(data, output){ 35 | return(c(S=get_pp(output))) 36 | } 37 | 38 | score_fdr = function(data, output){ #for mixfdr output 39 | return(c(S=output$fdr)) 40 | } 41 | 42 | score_lfsr = function(data, output){ 43 | return(c(S=get_lfsr(output))) 44 | } 45 | 46 | score_lfdr = function(data, output){ 47 | return(c(S=get_lfdr(output))) 48 | } 49 | 50 | score_betahat = function(data, output){ 51 | return(c(S=data$input$betahat)) 52 | } 53 | 54 | score_logLR = function(data,output){ 55 | return(c(logLR = ashr::calc_logLR(output,set_data(data$input$betahat, 56 | data$input$sebetahat)))) 57 | } 58 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | Directory for data sets that typically will not change after being deposited. Use output/ for postprocessed data. 2 | -------------------------------------------------------------------------------- /output/README.md: -------------------------------------------------------------------------------- 1 | This directory is intended for output from code/ (eg postprocessed data) 2 | -------------------------------------------------------------------------------- /packrat/init.R: -------------------------------------------------------------------------------- 1 | local({ 2 | 3 | libDir <- file.path('packrat', 'lib', R.version$platform, getRversion()) 4 | 5 | ## Escape hatch to allow RStudio to handle initialization 6 | if (!is.na(Sys.getenv("RSTUDIO", unset = NA)) && 7 | is.na(Sys.getenv("RSTUDIO_PACKRAT_BOOTSTRAP", unset = NA))) { 8 | Sys.setenv("RSTUDIO_PACKRAT_BOOTSTRAP" = "1") 9 | setHook("rstudio.sessionInit", function(...) { 10 | # Ensure that, on sourcing 'packrat/init.R', we are 11 | # within the project root directory 12 | if (exists(".rs.getProjectDirectory")) { 13 | owd <- getwd() 14 | setwd(.rs.getProjectDirectory()) 15 | on.exit(setwd(owd), add = TRUE) 16 | } 17 | source("packrat/init.R") 18 | }) 19 | return(invisible(NULL)) 20 | } 21 | 22 | ## Unload packrat in case it's loaded -- this ensures packrat _must_ be 23 | ## loaded from the private library. Note that `requireNamespace` will 24 | ## succeed if the package is already loaded, regardless of lib.loc! 25 | if ("packrat" %in% loadedNamespaces()) 26 | try(unloadNamespace("packrat"), silent = TRUE) 27 | 28 | if (suppressWarnings(requireNamespace("packrat", quietly = TRUE, lib.loc = libDir))) { 29 | 30 | # Check 'print.banner.on.startup' -- when NA and RStudio, don't print 31 | print.banner <- packrat::get_opts("print.banner.on.startup") 32 | if (print.banner == "auto" && is.na(Sys.getenv("RSTUDIO", unset = NA))) { 33 | print.banner <- TRUE 34 | } else { 35 | print.banner <- FALSE 36 | } 37 | return(packrat::on(print.banner = print.banner)) 38 | } 39 | 40 | ## Bootstrapping -- only performed in interactive contexts, 41 | ## or when explicitly asked for on the command line 42 | if (interactive() || "--bootstrap-packrat" %in% commandArgs(TRUE)) { 43 | 44 | message("Packrat is not installed in the local library -- ", 45 | "attempting to bootstrap an installation...") 46 | 47 | ## We need utils for the following to succeed -- there are calls to functions 48 | ## in 'restore' that are contained within utils. utils gets loaded at the 49 | ## end of start-up anyhow, so this should be fine 50 | library("utils", character.only = TRUE) 51 | 52 | ## Install packrat into local project library 53 | packratSrcPath <- list.files(full.names = TRUE, 54 | file.path("packrat", "src", "packrat") 55 | ) 56 | 57 | ## No packrat tarballs available locally -- try some other means of installation 58 | if (!length(packratSrcPath)) { 59 | 60 | message("> No source tarball of packrat available locally") 61 | 62 | ## There are no packrat sources available -- try using a version of 63 | ## packrat installed in the user library to bootstrap 64 | if (requireNamespace("packrat", quietly = TRUE) && packageVersion("packrat") >= "0.2.0.99") { 65 | message("> Using user-library packrat (", 66 | packageVersion("packrat"), 67 | ") to bootstrap this project") 68 | } 69 | 70 | ## Couldn't find a user-local packrat -- try finding and using devtools 71 | ## to install 72 | else if (requireNamespace("devtools", quietly = TRUE)) { 73 | message("> Attempting to use devtools::install_github to install ", 74 | "a temporary version of packrat") 75 | library(stats) ## for setNames 76 | devtools::install_github("rstudio/packrat") 77 | } 78 | 79 | ## Try downloading packrat from CRAN if available 80 | else if ("packrat" %in% rownames(available.packages())) { 81 | message("> Installing packrat from CRAN") 82 | install.packages("packrat") 83 | } 84 | 85 | ## Fail -- couldn't find an appropriate means of installing packrat 86 | else { 87 | stop("Could not automatically bootstrap packrat -- try running ", 88 | "\"'install.packages('devtools'); devtools::install_github('rstudio/packrat')\"", 89 | "and restarting R to bootstrap packrat.") 90 | } 91 | 92 | # Restore the project, unload the temporary packrat, and load the private packrat 93 | packrat::restore(prompt = FALSE, restart = TRUE) 94 | 95 | ## This code path only reached if we didn't restart earlier 96 | unloadNamespace("packrat") 97 | requireNamespace("packrat", lib.loc = libDir, quietly = TRUE) 98 | return(packrat::on()) 99 | 100 | } 101 | 102 | ## Multiple packrat tarballs available locally -- try to choose one 103 | ## TODO: read lock file and infer most appropriate from there; low priority because 104 | ## after bootstrapping packrat a restore should do the right thing 105 | if (length(packratSrcPath) > 1) { 106 | warning("Multiple versions of packrat available in the source directory;", 107 | "using packrat source:\n- ", shQuote(packratSrcPath)) 108 | packratSrcPath <- packratSrcPath[[1]] 109 | } 110 | 111 | 112 | lib <- file.path("packrat", "lib", R.version$platform, getRversion()) 113 | if (!file.exists(lib)) { 114 | dir.create(lib, recursive = TRUE) 115 | } 116 | lib <- normalizePath(lib, winslash = "/") 117 | 118 | message("> Installing packrat into project private library:") 119 | message("- ", shQuote(lib)) 120 | 121 | surround <- function(x, with) { 122 | if (!length(x)) return(character()) 123 | paste0(with, x, with) 124 | } 125 | 126 | ## The following is performed because a regular install.packages call can fail 127 | peq <- function(x, y) paste(x, y, sep = " = ") 128 | installArgs <- c( 129 | peq("pkgs", surround(packratSrcPath, with = "'")), 130 | peq("lib", surround(lib, with = "'")), 131 | peq("repos", "NULL"), 132 | peq("type", surround("source", with = "'")) 133 | ) 134 | installCmd <- paste(sep = "", 135 | "utils::install.packages(", 136 | paste(installArgs, collapse = ", "), 137 | ")") 138 | 139 | fullCmd <- paste( 140 | surround(file.path(R.home("bin"), "R"), with = "\""), 141 | "--vanilla", 142 | "--slave", 143 | "-e", 144 | surround(installCmd, with = "\"") 145 | ) 146 | system(fullCmd) 147 | 148 | ## Tag the installed packrat so we know it's managed by packrat 149 | ## TODO: should this be taking information from the lockfile? this is a bit awkward 150 | ## because we're taking an un-annotated packrat source tarball and simply assuming it's now 151 | ## an 'installed from source' version 152 | 153 | ## -- InstallAgent -- ## 154 | installAgent <- 'InstallAgent: packrat 0.4.4' 155 | 156 | ## -- InstallSource -- ## 157 | installSource <- 'InstallSource: source' 158 | 159 | packratDescPath <- file.path(lib, "packrat", "DESCRIPTION") 160 | DESCRIPTION <- readLines(packratDescPath) 161 | DESCRIPTION <- c(DESCRIPTION, installAgent, installSource) 162 | cat(DESCRIPTION, file = packratDescPath, sep = "\n") 163 | 164 | # Otherwise, continue on as normal 165 | message("> Attaching packrat") 166 | library("packrat", character.only = TRUE, lib.loc = lib) 167 | 168 | message("> Restoring library") 169 | restore(restart = FALSE) 170 | 171 | # If the environment allows us to restart, do so with a call to restore 172 | restart <- getOption("restart") 173 | if (!is.null(restart)) { 174 | message("> Packrat bootstrap successfully completed. ", 175 | "Restarting R and entering packrat mode...") 176 | return(restart()) 177 | } 178 | 179 | # Callers (source-erers) can define this hidden variable to make sure we don't enter packrat mode 180 | # Primarily useful for testing 181 | if (!exists(".__DONT_ENTER_PACKRAT_MODE__.") && interactive()) { 182 | message("> Packrat bootstrap successfully completed. Entering packrat mode...") 183 | packrat::on() 184 | } 185 | 186 | Sys.unsetenv("RSTUDIO_PACKRAT_BOOTSTRAP") 187 | 188 | } 189 | 190 | }) 191 | -------------------------------------------------------------------------------- /packrat/packrat.lock: -------------------------------------------------------------------------------- 1 | PackratFormat: 1.4 2 | PackratVersion: 0.4.6.9 3 | RVersion: 3.3.1 4 | Repos: BioCsoft=http://bioconductor.org/packages/3.1/bioc, 5 | BioCann=http://bioconductor.org/packages/3.1/data/annotation, 6 | BioCexp=http://bioconductor.org/packages/3.1/data/experiment, 7 | BioCextra=http://bioconductor.org/packages/3.1/extra, 8 | CRAN=http://cran.rstudio.com 9 | 10 | Package: BBmisc 11 | Source: CRAN 12 | Version: 1.9 13 | Hash: 0f4232b1208da9b625c1412c6ff328e6 14 | Requires: checkmate 15 | 16 | Package: BH 17 | Source: CRAN 18 | Version: 1.58.0-1 19 | Hash: 4f6db3b5311bb006f536a2e67278d058 20 | 21 | Package: BatchJobs 22 | Source: CRAN 23 | Version: 1.6 24 | Hash: 408fbd27762a92e1597f1a5f306fa248 25 | Requires: DBI, brew, checkmate, digest, RSQLite, sendmailR, BBmisc, 26 | stringr, fail 27 | 28 | Package: DBI 29 | Source: CRAN 30 | Version: 0.3.1 31 | Hash: 096699d1ac1cf530acfc646a0c90ee5d 32 | 33 | Package: MASS 34 | Source: CRAN 35 | Version: 7.3-44 36 | Hash: 3a901d1f70036f5b1e11bcdf8fdb66d9 37 | 38 | Package: R6 39 | Source: CRAN 40 | Version: 2.1.1 41 | Hash: 20a88b2c9c84aecff2702789a4d102f5 42 | 43 | Package: RColorBrewer 44 | Source: CRAN 45 | Version: 1.1-2 46 | Hash: c0d56cd15034f395874c870141870c25 47 | 48 | Package: RSQLite 49 | Source: CRAN 50 | Version: 1.0.0 51 | Hash: 3df52a54766f420d257debad962146c0 52 | Requires: DBI 53 | 54 | Package: Rcpp 55 | Source: CRAN 56 | Version: 0.12.3 57 | Hash: 11ace6a9a186c17a42e0fa2c49af1223 58 | 59 | Package: SQUAREM 60 | Source: CRAN 61 | Version: 2014.8-1 62 | Hash: e2282cd322c70a2d16a35880428f84ec 63 | 64 | Package: ashr 65 | Source: github 66 | Version: 1.0.5 67 | Hash: 7b3d5f894f9b2907597b4c8b1089d8a4 68 | Requires: Rcpp, SQUAREM, assertthat, truncnorm, plyr, pscl, doParallel, 69 | testthat 70 | GithubRepo: ashr 71 | GithubUsername: stephens999 72 | GithubRef: master 73 | GithubSha1: d8c7743fcb9e03c369ff7a7db2271aad85a53d69 74 | 75 | Package: assertthat 76 | Source: CRAN 77 | Version: 0.1 78 | Hash: 0afb92b59b02593c70ff8046700ba9d3 79 | 80 | Package: base64enc 81 | Source: CRAN 82 | Version: 0.1-3 83 | Hash: c590d29e555926af053055e23ee79efb 84 | 85 | Package: bitops 86 | Source: CRAN 87 | Version: 1.0-6 88 | Hash: 67d0775189fd0041d95abca618c5c07e 89 | 90 | Package: brew 91 | Source: CRAN 92 | Version: 1.0-6 93 | Hash: 931f9972deae0f205e1c78a51f33149b 94 | 95 | Package: caTools 96 | Source: CRAN 97 | Version: 1.17.1 98 | Hash: 97cb6f6293cd18d17df77a6383cc6763 99 | Requires: bitops 100 | 101 | Package: checkmate 102 | Source: CRAN 103 | Version: 1.6.3 104 | Hash: 90b6c60390d3001934a4a0f70b975fc0 105 | 106 | Package: colorspace 107 | Source: CRAN 108 | Version: 1.2-6 109 | Hash: 00bb12245cd975c450cc4a960884fa15 110 | 111 | Package: crayon 112 | Source: CRAN 113 | Version: 1.3.1 114 | Hash: b61d34886cf0f4b4fc4e4f52ea249390 115 | Requires: memoise 116 | 117 | Package: dichromat 118 | Source: CRAN 119 | Version: 2.0-0 120 | Hash: 08eed0c80510af29bb15f840ccfe37ce 121 | 122 | Package: digest 123 | Source: CRAN 124 | Version: 0.6.9 125 | Hash: fd55d5a024f160fc001a5ece1e27782d 126 | 127 | Package: doParallel 128 | Source: CRAN 129 | Version: 1.0.10 130 | Hash: df91a7abfa938c06ad87b9a2b9269adb 131 | Requires: iterators, foreach 132 | 133 | Package: dplyr 134 | Source: CRAN 135 | Version: 0.4.3 136 | Hash: 466e853fc8da049e7812a9e56b9ad70f 137 | Requires: BH, DBI, R6, Rcpp, assertthat, lazyeval, magrittr 138 | 139 | Package: dscr 140 | Source: github 141 | Version: 0.1.1 142 | Hash: 2300895537986fa6886a5c5d033f9587 143 | Requires: assertthat, magrittr, plyr, dplyr, psych, shiny, reshape2, 144 | BatchJobs, knitr, ggplot2 145 | GithubRepo: dscr 146 | GithubUsername: stephens999 147 | GithubRef: master 148 | GithubSha1: 2610af83ade1a44cb7f966d829279191b9ba3630 149 | 150 | Package: evaluate 151 | Source: CRAN 152 | Version: 0.8 153 | Hash: aac00bd789bac10970b50e3b7e0cab04 154 | Requires: stringr 155 | 156 | Package: fail 157 | Source: CRAN 158 | Version: 1.3 159 | Hash: 6f916304b570a0dbe224092ba60f3498 160 | Requires: checkmate, BBmisc 161 | 162 | Package: foreach 163 | Source: CRAN 164 | Version: 1.4.3 165 | Hash: cd53ef4cf29dc59ce3f8c5c1af735fd1 166 | Requires: iterators 167 | 168 | Package: formatR 169 | Source: CRAN 170 | Version: 1.2.1 171 | Hash: 54c730c712edd6087972ecf99bf87c55 172 | 173 | Package: ggplot2 174 | Source: CRAN 175 | Version: 1.0.1 176 | Hash: 4aff1b269135bd2cdea378f4f2f2895e 177 | Requires: MASS, digest, gtable, proto, plyr, scales, reshape2 178 | 179 | Package: gtable 180 | Source: CRAN 181 | Version: 0.1.2 182 | Hash: 38a0f066373a60824cd3ade2362af363 183 | 184 | Package: highr 185 | Source: CRAN 186 | Version: 0.5.1 187 | Hash: 114ef5abcf58bebbf6ac083b9cacbbd8 188 | 189 | Package: htmltools 190 | Source: CRAN 191 | Version: 0.2.6 192 | Hash: 595e7dea94f5f049dfc1ed739f5161af 193 | Requires: digest 194 | 195 | Package: httpuv 196 | Source: CRAN 197 | Version: 1.3.3 198 | Hash: d440b2e539ccef77b9105051291a7628 199 | Requires: Rcpp 200 | 201 | Package: iterators 202 | Source: CRAN 203 | Version: 1.0.8 204 | Hash: 488b93c2a4166db0d15f1e8d882cb1d4 205 | 206 | Package: jsonlite 207 | Source: CRAN 208 | Version: 0.9.17 209 | Hash: d44b8f8c89ff794d6ec8a07dde103d24 210 | 211 | Package: knitr 212 | Source: CRAN 213 | Version: 1.11 214 | Hash: 0a6d1006425040af05e29504af948535 215 | Requires: digest, formatR, highr, yaml, markdown, stringr, evaluate 216 | 217 | Package: labeling 218 | Source: CRAN 219 | Version: 0.3 220 | Hash: ecf589b42cd284b03a4beb9665482d3e 221 | 222 | Package: lazyeval 223 | Source: CRAN 224 | Version: 0.1.10 225 | Hash: 9679f1ac7f6bc07bc79755f34cd15e1f 226 | 227 | Package: locfdr 228 | Source: CRAN 229 | Version: 1.1-8 230 | Hash: d267ae300fd73b9cb6686175519402c3 231 | 232 | Package: magrittr 233 | Source: CRAN 234 | Version: 1.5 235 | Hash: bdc4d48c3135e8f3b399536ddf160df4 236 | 237 | Package: markdown 238 | Source: CRAN 239 | Version: 0.7.7 240 | Hash: fea2343a1119d61b0cc5c0a950d103a3 241 | Requires: mime 242 | 243 | Package: memoise 244 | Source: CRAN 245 | Version: 0.2.1 246 | Hash: 812e6a1dd77a0ca4da41f3239de8e447 247 | Requires: digest 248 | 249 | Package: mime 250 | Source: CRAN 251 | Version: 0.4 252 | Hash: b08c52dae92a0a11e64a4deea032ec33 253 | 254 | Package: mixfdr 255 | Source: CRAN 256 | Version: 1.0 257 | Hash: 8aa5ecb4434d568fd75010689b69bf2c 258 | 259 | Package: mnormt 260 | Source: CRAN 261 | Version: 1.5-3 262 | Hash: d23b1cdede7a20437b68aa11f51958b8 263 | 264 | Package: munsell 265 | Source: CRAN 266 | Version: 0.4.2 267 | Hash: 72d343a6664778029f49a7ad36de1cab 268 | Requires: colorspace 269 | 270 | Package: packrat 271 | Source: github 272 | Version: 0.4.6-9 273 | Hash: b34872d6ae7c3b4d3191490e4c3d384b 274 | GithubRepo: packrat 275 | GithubUsername: rstudio 276 | GithubRef: master 277 | GithubSha1: dfa32291eb8ae9fad2868d0fc1ffd5516aa3a402 278 | 279 | Package: plyr 280 | Source: CRAN 281 | Version: 1.8.3 282 | Hash: e38fe93fa50135fac98421abb43bbfea 283 | Requires: Rcpp 284 | 285 | Package: praise 286 | Source: CRAN 287 | Version: 1.0.0 288 | Hash: 77da8f1df873a4b91e5c4a68fe2fb1b6 289 | 290 | Package: proto 291 | Source: CRAN 292 | Version: 0.3-10 293 | Hash: 05d7ad64db29a39d1b02c4b9f1f4edb2 294 | 295 | Package: pscl 296 | Source: CRAN 297 | Version: 1.4.9 298 | Hash: b89db41fa250ee766e0be58c8b4227b0 299 | Requires: MASS 300 | 301 | Package: psych 302 | Source: CRAN 303 | Version: 1.5.8 304 | Hash: 68c4e1212b52e8bf12ef1bfa1e950409 305 | Requires: mnormt 306 | 307 | Package: qvalue 308 | Source: github 309 | Version: 2.1.1 310 | Hash: ad35373b6d521d6bcf802b06acf1b386 311 | Requires: reshape2, ggplot2 312 | GithubRepo: qvalue 313 | GithubUsername: jdstorey 314 | GithubRef: master 315 | GithubSha1: b3e906542ce4b56ee131503b44236cea75b9f6cc 316 | 317 | Package: reshape2 318 | Source: CRAN 319 | Version: 1.4.1 320 | Hash: 55963009094746c3aef64bfbf9eb4731 321 | Requires: Rcpp, plyr, stringr 322 | 323 | Package: rmarkdown 324 | Source: CRAN 325 | Version: 0.8.1 326 | Hash: 13ce5c86c765918f857c602d97d1ffb9 327 | Requires: yaml, caTools, htmltools, knitr 328 | 329 | Package: scales 330 | Source: CRAN 331 | Version: 0.3.0 332 | Hash: 9c140be1bda3cf690f8714cfc810aa45 333 | Requires: RColorBrewer, Rcpp, dichromat, labeling, plyr, munsell 334 | 335 | Package: sendmailR 336 | Source: CRAN 337 | Version: 1.2-1 338 | Hash: 26561ceb5871f3983b40c2bfdf90ed09 339 | Requires: base64enc 340 | 341 | Package: shiny 342 | Source: CRAN 343 | Version: 0.12.2 344 | Hash: 31cd1fe3028ac7ef846adacf8ed5bfd2 345 | Requires: R6, digest, jsonlite, mime, xtable, httpuv, htmltools 346 | 347 | Package: stringi 348 | Source: CRAN 349 | Version: 1.0-1 350 | Hash: cf342bc407bd5daec77ed1009d5244e1 351 | 352 | Package: stringr 353 | Source: CRAN 354 | Version: 1.0.0 355 | Hash: 2676dd5f88890910962b733b0f9540e1 356 | Requires: magrittr, stringi 357 | 358 | Package: testthat 359 | Source: CRAN 360 | Version: 0.11.0 361 | Hash: e10882241e569f584fb1f8c19599a13f 362 | Requires: digest, praise, crayon 363 | 364 | Package: truncnorm 365 | Source: CRAN 366 | Version: 1.0-7 367 | Hash: eefedc58b087a7f09ae770b6e159acb7 368 | 369 | Package: xtable 370 | Source: CRAN 371 | Version: 1.7-4 372 | Hash: 30ed90ebdd90a529aab31fd6792a8d61 373 | 374 | Package: yaml 375 | Source: CRAN 376 | Version: 2.1.13 377 | Hash: 4854ccabebc225e8a7309fb4a74980de 378 | -------------------------------------------------------------------------------- /packrat/packrat.opts: -------------------------------------------------------------------------------- 1 | auto.snapshot: TRUE 2 | use.cache: FALSE 3 | print.banner.on.startup: auto 4 | vcs.ignore.lib: TRUE 5 | vcs.ignore.src: TRUE 6 | external.packages: 7 | local.repos: ~/Downloads/ 8 | load.external.packages.on.startup: TRUE 9 | ignored.packages: 10 | -------------------------------------------------------------------------------- /packrat/src/mixfdr/mixfdr_1.0.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/packrat/src/mixfdr/mixfdr_1.0.tar.gz -------------------------------------------------------------------------------- /packrat/src/packrat/packrat_0.4.4.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/packrat/src/packrat/packrat_0.4.4.tar.gz -------------------------------------------------------------------------------- /paper/Makefile: -------------------------------------------------------------------------------- 1 | %.png: %.pdf 2 | convert -density 300 $< $@ 3 | 4 | 5 | main.pdf: main.tex ../analysis/figure/plot_lfsr.Rmd/plot_lfdr-1.png ../analysis/figure/plot_lfsr.Rmd/plot_lfsr-1.png ../analysis/figure/plot_lfsr.Rmd/plot_lfsr_s-1.png ../analysis/figure/plot_lfsr.Rmd/plot_lfsr_s_nn-1.png 6 | pdflatex main.tex; bibtex main.tex; pdflatex main.tex; pdflatex main.tex 7 | -------------------------------------------------------------------------------- /paper/README.md: -------------------------------------------------------------------------------- 1 | Directory for paper 2 | -------------------------------------------------------------------------------- /paper/main.bbl: -------------------------------------------------------------------------------- 1 | \begin{thebibliography}{10} 2 | \providecommand{\url}[1]{\texttt{#1}} 3 | \providecommand{\urlprefix}{URL } 4 | \expandafter\ifx\csname urlstyle\endcsname\relax 5 | \providecommand{\doi}[1]{doi:\discretionary{}{}{}#1}\else 6 | \providecommand{\doi}{doi:\discretionary{}{}{}\begingroup 7 | \urlstyle{rm}\Url}\fi 8 | \providecommand{\bibAnnoteFile}[1]{% 9 | \IfFileExists{#1}{\begin{quotation}\noindent\textsc{Key:} #1\\ 10 | \textsc{Annotation:}\ \input{#1}\end{quotation}}{}} 11 | \providecommand{\bibAnnote}[2]{% 12 | \begin{quotation}\noindent\textsc{Key:} #1\\ 13 | \textsc{Annotation:}\ #2\end{quotation}} 14 | \providecommand{\eprint}[2][]{\url{#2}} 15 | 16 | \bibitem{benjamini1995controlling} 17 | Benjamini Y, Hochberg Y (1995) Controlling the false discovery rate: a 18 | practical and powerful approach to multiple testing. 19 | \newblock Journal of the Royal Statistical Society Series B (Methodological) : 20 | 289--300. 21 | \bibAnnoteFile{benjamini1995controlling} 22 | 23 | \bibitem{greenland1991empirical} 24 | Greenland S, Robins JM (1991) Empirical-bayes adjustments for multiple 25 | comparisons are sometimes useful. 26 | \newblock Epidemiology : 244--251. 27 | \bibAnnoteFile{greenland1991empirical} 28 | 29 | \bibitem{efron2001empirical} 30 | Efron B, Tibshirani R, Storey JD, Tusher V (2001) Empirical bayes analysis of a 31 | microarray experiment. 32 | \newblock Journal of the American statistical association 96: 1151--1160. 33 | \bibAnnoteFile{efron2001empirical} 34 | 35 | \bibitem{efron2002empirical} 36 | Efron B, Tibshirani R (2002) Empirical bayes methods and false discovery rates 37 | for microarrays. 38 | \newblock Genetic epidemiology 23: 70--86. 39 | \bibAnnoteFile{efron2002empirical} 40 | 41 | \bibitem{efron2003robbins} 42 | Efron B, et~al. (2003) Robbins, empirical bayes and microarrays. 43 | \newblock The annals of Statistics 31: 366--378. 44 | \bibAnnoteFile{efron2003robbins} 45 | 46 | \bibitem{efron2008microarrays} 47 | Efron B (2008) Microarrays, empirical bayes and the two-groups model. 48 | \newblock Statistical Science 23: 1--22. 49 | \bibAnnoteFile{efron2008microarrays} 50 | 51 | \bibitem{efron2010large} 52 | Efron B (2010) Large-scale inference: empirical Bayes methods for estimation, 53 | testing, and prediction, volume~1. 54 | \newblock Cambridge University Press. 55 | \bibAnnoteFile{efron2010large} 56 | 57 | \bibitem{kendziorski2003parametric} 58 | Kendziorski C, Newton M, Lan H, Gould M (2003) On parametric empirical bayes 59 | methods for comparing multiple groups using replicated gene expression 60 | profiles. 61 | \newblock Statistics in medicine 22: 3899--3914. 62 | \bibAnnoteFile{kendziorski2003parametric} 63 | 64 | \bibitem{newton2004detecting} 65 | Newton MA, Noueiry A, Sarkar D, Ahlquist P (2004) Detecting differential gene 66 | expression with a semiparametric hierarchical mixture method. 67 | \newblock Biostatistics 5: 155--176. 68 | \bibAnnoteFile{newton2004detecting} 69 | 70 | \bibitem{datta2005empirical} 71 | Datta S, Datta S (2005) Empirical bayes screening of many p-values with 72 | applications to microarray studies. 73 | \newblock Bioinformatics 21: 1987--1994. 74 | \bibAnnoteFile{datta2005empirical} 75 | 76 | \bibitem{muralidharan2010empirical} 77 | Muralidharan O (2010) An empirical bayes mixture method for effect size and 78 | false discovery rate estimation. 79 | \newblock The Annals of Applied Statistics : 422--438. 80 | \bibAnnoteFile{muralidharan2010empirical} 81 | 82 | \bibitem{ploner2006multidimensional} 83 | Ploner A, Calza S, Gusnanto A, Pawitan Y (2006) Multidimensional local false 84 | discovery rate for microarray studies. 85 | \newblock Bioinformatics 22: 556--565. 86 | \bibAnnoteFile{ploner2006multidimensional} 87 | 88 | \bibitem{benjamini2005false} 89 | Benjamini Y, Yekutieli D (2005) False discovery rate--adjusted multiple 90 | confidence intervals for selected parameters. 91 | \newblock Journal of the American Statistical Association 100: 71--81. 92 | \bibAnnoteFile{benjamini2005false} 93 | 94 | \bibitem{zhao2012empirical} 95 | Zhao Z, Gene~Hwang J (2012) Empirical bayes false coverage rate controlling 96 | confidence intervals. 97 | \newblock Journal of the Royal Statistical Society: Series B (Statistical 98 | Methodology) 74: 871--891. 99 | \bibAnnoteFile{zhao2012empirical} 100 | 101 | \bibitem{gelman2012we} 102 | Gelman A, Hill J, Yajima M (2012) Why we (usually) don't have to worry about 103 | multiple comparisons. 104 | \newblock Journal of Research on Educational Effectiveness 5: 189--211. 105 | \bibAnnoteFile{gelman2012we} 106 | 107 | \bibitem{donoho:1995} 108 | Donoho D, Johnstone I (1995) Adapting to unknown smoothness via wavelet 109 | shrinkage. 110 | \newblock Journal of the American Statistical Association 90: 1200--1224. 111 | \bibAnnoteFile{donoho:1995} 112 | 113 | \bibitem{xing2016smoothing} 114 | Xing Z, Stephens M (2016) Smoothing via adaptive shrinkage (smash): denoising 115 | poisson and heteroskedastic gaussian signals. 116 | \newblock arXiv preprint arXiv:160507787 . 117 | \bibAnnoteFile{xing2016smoothing} 118 | 119 | \bibitem{lu2016variance} 120 | Lu M, Stephens M (2016) Variance adaptive shrinkage (vash): Flexible empirical 121 | bayes estimation of variances. 122 | \newblock bioRxiv : 048660. 123 | \bibAnnoteFile{lu2016variance} 124 | 125 | \bibitem{johnstone2004needles} 126 | Johnstone IM, Silverman BW (2004) Needles and straw in haystacks: Empirical 127 | bayes estimates of possibly sparse sequences. 128 | \newblock Annals of Statistics : 1594--1649. 129 | \bibAnnoteFile{johnstone2004needles} 130 | 131 | \bibitem{efron2007correlation} 132 | Efron B (2007) Correlation and large-scale simultaneous significance testing. 133 | \newblock Journal of the American Statistical Association 102. 134 | \bibAnnoteFile{efron2007correlation} 135 | 136 | \bibitem{leek:2007} 137 | Leek JT, Storey JD (2007) Capturing heterogeneity in gene expression studies by 138 | surrogate variable analysis. 139 | \newblock PLoS Genet 3: 1724-35. 140 | \bibAnnoteFile{leek:2007} 141 | 142 | \bibitem{boyd2004convex} 143 | Boyd S, Vandenberghe L (2004) Convex optimization. 144 | \newblock Cambridge university press. 145 | \bibAnnoteFile{boyd2004convex} 146 | 147 | \bibitem{koenker2013convex} 148 | Koenker R, Mizera I (2013) Convex optimization in {R}. 149 | \newblock Journal of Statistical Software . 150 | \bibAnnoteFile{koenker2013convex} 151 | 152 | \bibitem{tukey1991philosophy} 153 | Tukey JW (1991) The philosophy of multiple comparisons. 154 | \newblock Statistical science : 100--116. 155 | \bibAnnoteFile{tukey1991philosophy} 156 | 157 | \bibitem{tukey1962future} 158 | Tukey JW (1962) The future of data analysis. 159 | \newblock The Annals of Mathematical Statistics : 1--67. 160 | \bibAnnoteFile{tukey1962future} 161 | 162 | \bibitem{gelman2000type} 163 | Gelman A, Tuerlinckx F (2000) Type s error rates for classical and bayesian 164 | single and multiple comparison procedures. 165 | \newblock Computational Statistics 15: 373--390. 166 | \bibAnnoteFile{gelman2000type} 167 | 168 | \bibitem{storey.03} 169 | Storey J (2003) The positive false discovery rate: A {Bayesian} interpretation 170 | and the q-value. 171 | \newblock The Annals of Statistics 31: 2013--2035. 172 | \bibAnnoteFile{storey.03} 173 | 174 | \bibitem{wakefield:2009} 175 | Wakefield J (2009) Bayes factors for genome-wide association studies: 176 | comparison with p-values. 177 | \newblock Genet Epidemiol 33: 79-86. 178 | \bibAnnoteFile{wakefield:2009} 179 | 180 | \bibitem{efron1993bayes} 181 | Efron B (1993) Bayes and likelihood calculations from confidence intervals. 182 | \newblock Biometrika 80: 3--26. 183 | \bibAnnoteFile{efron1993bayes} 184 | 185 | \bibitem{carvalho2010horseshoe} 186 | Carvalho CM, Polson NG, Scott JG (2010) The horseshoe estimator for sparse 187 | signals. 188 | \newblock Biometrika : asq017. 189 | \bibAnnoteFile{carvalho2010horseshoe} 190 | 191 | \bibitem{moser:2015} 192 | Moser G, Lee SH, Hayes BJ, Goddard ME, Wray NR, et~al. (2015) Simultaneous 193 | discovery, estimation and prediction analysis of complex traits using a 194 | bayesian mixture model. 195 | \newblock PLoS Genet 11: e1004969. 196 | \bibAnnoteFile{moser:2015} 197 | 198 | \bibitem{cordy1997deconvolution} 199 | Cordy CB, Thomas DR (1997) Deconvolution of a distribution function. 200 | \newblock Journal of the American Statistical Association 92: 1459--1465. 201 | \bibAnnoteFile{cordy1997deconvolution} 202 | 203 | \bibitem{xie2012sure} 204 | Xie X, Kou S, Brown LD (2012) Sure estimates for a heteroscedastic hierarchical 205 | model. 206 | \newblock Journal of the American Statistical Association 107: 1465--1479. 207 | \bibAnnoteFile{xie2012sure} 208 | 209 | \bibitem{sarkar:2014} 210 | Sarkar A, Mallick BK, Staudenmayer J, Pati D, Carroll RJ (2014) Bayesian 211 | semiparametric density deconvolution in the presence of conditionally 212 | heteroscedastic measurement errors. 213 | \newblock J Comput Graph Stat 23: 1101-1125. 214 | \bibAnnoteFile{sarkar:2014} 215 | 216 | \bibitem{koenker2014convex} 217 | Koenker R, Mizera I (2014) Convex optimization, shape constraints, compound 218 | decisions, and empirical bayes rules. 219 | \newblock Journal of the American Statistical Association 109: 674--685. 220 | \bibAnnoteFile{koenker2014convex} 221 | 222 | \bibitem{efron2016empirical} 223 | Efron B (2016) Empirical bayes deconvolution estimates. 224 | \newblock Biometrika 103: 1--20. 225 | \bibAnnoteFile{efron2016empirical} 226 | 227 | \bibitem{jiang2009general} 228 | Jiang W, Zhang CH (2009) General maximum likelihood empirical bayes estimation 229 | of normal means. 230 | \newblock The Annals of Statistics 37: 1647--1684. 231 | \bibAnnoteFile{jiang2009general} 232 | 233 | \bibitem{guan.stephens.08} 234 | Guan Y, Stephens M (2008) Practical issues in imputation-based association 235 | mapping. 236 | \newblock PLoS Genet 4. 237 | \bibAnnoteFile{guan.stephens.08} 238 | 239 | \bibitem{devlin1999genomic} 240 | Devlin B, Roeder K (1999) Genomic control for association studies. 241 | \newblock Biometrics 55: 997--1004. 242 | \bibAnnoteFile{devlin1999genomic} 243 | 244 | \bibitem{efron2004large} 245 | Efron B (2004) Large-scale simultaneous hypothesis testing: the choice of a 246 | null hypothesis. 247 | \newblock Journal of the American Statistical Association 99: 96--104. 248 | \bibAnnoteFile{efron2004large} 249 | 250 | \bibitem{pritchard.stephens.rosenberg.donnelly.00} 251 | Pritchard JK, Stephens M, Rosenberg NA, Donnelly P (2000) Association mapping 252 | in structured populations. 253 | \newblock American Journal of Human Genetics 67: 170--181. 254 | \bibAnnoteFile{pritchard.stephens.rosenberg.donnelly.00} 255 | 256 | \bibitem{price:2006} 257 | Price AL, Patterson NJ, Plenge RM, Weinblatt ME, Shadick NA, et~al. (2006) 258 | Principal components analysis corrects for stratification in genome-wide 259 | association studies. 260 | \newblock Nat Genet 38: 904-9. 261 | \bibAnnoteFile{price:2006} 262 | 263 | \bibitem{gagnon2012using} 264 | Gagnon-Bartsch JA, Speed TP (2012) Using control genes to correct for unwanted 265 | variation in microarray data. 266 | \newblock Biostatistics 13: 539--552. 267 | \bibAnnoteFile{gagnon2012using} 268 | 269 | \bibitem{smyth:2004} 270 | Smyth GK (2004) Linear models and empirical bayes methods for assessing 271 | differential expression in microarray experiments. 272 | \newblock Stat Appl Genet Mol Biol 3: Article3. 273 | \bibAnnoteFile{smyth:2004} 274 | 275 | \bibitem{khintchine1938unimodal} 276 | Khintchine AY (1938) On unimodal distributions. 277 | \newblock Izv Nauchno-Isled Inst Mat Mech Tomsk Gos Univ 2: 1--7. 278 | \bibAnnoteFile{khintchine1938unimodal} 279 | 280 | \bibitem{shepp1962symmetric} 281 | Shepp L (1962) Symmetric random walk. 282 | \newblock Transactions of the American Mathematical Society : 144--153. 283 | \bibAnnoteFile{shepp1962symmetric} 284 | 285 | \bibitem{feller1971introduction} 286 | Feller W (1971) An introduction to probability and its applications, vol. ii. 287 | \newblock Wiley, New York . 288 | \bibAnnoteFile{feller1971introduction} 289 | 290 | \bibitem{REBayes} 291 | Koenker R (2015) REBayes: Empirical Bayes Estimation and Inference in R. 292 | \newblock \urlprefix\url{http://CRAN.R-project.org/package=REBayes}. 293 | \newblock R package version 0.58. 294 | \bibAnnoteFile{REBayes} 295 | 296 | \bibitem{Rmosek} 297 | Aps M Rmosek: The R to MOSEK Optimization Interface. 298 | \newblock \urlprefix\url{http://rmosek.r-forge.r-project.org/, 299 | http://www.mosek.com/}. 300 | \newblock R package version 7.1.2. 301 | \bibAnnoteFile{Rmosek} 302 | 303 | \bibitem{dempster77} 304 | Dempster AP, Laird NM, Rubin DB (1977) Maximum likelihood estimation from 305 | incomplete data via the {EM} algorithm (with discussion). 306 | \newblock Journal of the Royal Statistical Society, series B 39: 1--38. 307 | \bibAnnoteFile{dempster77} 308 | 309 | \bibitem{varadhan2008simple} 310 | Varadhan R, Roland C (2008) Simple and globally convergent methods for 311 | accelerating the convergence of any em algorithm. 312 | \newblock Scandinavian Journal of Statistics 35: 335--353. 313 | \bibAnnoteFile{varadhan2008simple} 314 | 315 | \bibitem{Rcore:2012} 316 | {R Core Team} (2012) R: A Language and Environment for Statistical Computing. 317 | \newblock R Foundation for Statistical Computing, Vienna, Austria. 318 | \newblock \urlprefix\url{http://www.R-project.org/}. 319 | \newblock Accessed June 3, 2013. 320 | \bibAnnoteFile{Rcore:2012} 321 | 322 | \bibitem{ggplot2} 323 | Wickham H (2009) ggplot2: elegant graphics for data analysis. 324 | \newblock Springer New York. 325 | \bibAnnoteFile{ggplot2} 326 | 327 | \bibitem{xie2013dynamic} 328 | Xie Y (2013) Dynamic Documents with R and knitr, volume~29. 329 | \newblock CRC Press. 330 | \bibAnnoteFile{xie2013dynamic} 331 | 332 | \end{thebibliography} 333 | -------------------------------------------------------------------------------- /paper/main.blg: -------------------------------------------------------------------------------- 1 | This is BibTeX, Version 0.99d (TeX Live 2015) 2 | Capacity: max_strings=35307, hash_size=35307, hash_prime=30011 3 | The top-level auxiliary file: main.aux 4 | The style file: /Users/stephens/Dropbox/Documents/stylefiles/plos2009.bst 5 | Database file #1: /Users/stephens/Dropbox/Documents/mainbib.bib 6 | Reallocated singl_function (elt_size=4) to 100 items from 50. 7 | You've used 54 entries, 8 | 2441 wiz_defined-function locations, 9 | 882 strings with 13046 characters, 10 | and the built_in function-call counts, 38631 in all, are: 11 | = -- 3640 12 | > -- 452 13 | < -- 2 14 | + -- 226 15 | - -- 113 16 | * -- 3280 17 | := -- 4635 18 | add.period$ -- 112 19 | call.type$ -- 54 20 | change.case$ -- 50 21 | chr.to.int$ -- 0 22 | cite$ -- 108 23 | duplicate$ -- 3896 24 | empty$ -- 4113 25 | format.name$ -- 147 26 | if$ -- 8598 27 | int.to.chr$ -- 0 28 | int.to.str$ -- 54 29 | missing$ -- 486 30 | newline$ -- 289 31 | num.names$ -- 54 32 | pop$ -- 578 33 | preamble$ -- 1 34 | purify$ -- 0 35 | quote$ -- 0 36 | skip$ -- 259 37 | stack$ -- 0 38 | substring$ -- 5643 39 | swap$ -- 928 40 | text.length$ -- 2 41 | text.prefix$ -- 0 42 | top$ -- 0 43 | type$ -- 0 44 | warning$ -- 0 45 | while$ -- 256 46 | width$ -- 56 47 | write$ -- 599 48 | -------------------------------------------------------------------------------- /paper/main.dvi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/paper/main.dvi -------------------------------------------------------------------------------- /paper/main.fff: -------------------------------------------------------------------------------- 1 | \begin{figure} 2 | \center \includegraphics[height=6in]{../analysis/figure/makefig_FDReg.Rmd/decomp_ZA-1.pdf} 3 | \caption{Illustration that the unimodal assumption (UA) in \ashr can produce very different results from existing methods. 4 | The figure shows, for a single simulated dataset, the way different methods decompose $p$ values (left) and $z$ scores (right) into a null component (dark blue) and an alternative component (cyan). In the $z$ score space the alternative distribution is placed on the bottom to highlight the differences in its shape among methods. 5 | The three existing methods (\qvalue, \locfdr, \mixfdr) all effectively make the Zero Assumption, which results in a ``hole" in the alternative $z$ score distribution around 0. 6 | In contrast the method introduced here (\ashr) makes the Unimodal Assumption -- that the effect sizes, and thus the $z$ scores, have a unimodal distribution about 0 -- which yields a very different decomposition. (In this case the \ashr decomposition is closer to the truth: the data were simulated under a model where all of the effects are non-zero, so the ``true" decomposition would make everything cyan.)} \label{fig:ZA} 7 | \end{figure} 8 | \efloatseparator 9 | 10 | \begin{figure} 11 | \begin{center} 12 | \begin{subfigure}{\textwidth} 13 | \includegraphics[width=\textwidth]{../analysis/figure/plot_egdens.Rmd/scenario_density-1.pdf} 14 | \caption{Densities of non-zero effects, $g_1$, used in simulations.} \label{fig:altdens} 15 | \end{subfigure} 16 | \begin{subfigure}{\textwidth} 17 | \includegraphics[width=\textwidth]{../analysis/figure/plot_pi0est.Rmd/plot_pi0est-1.pdf} 18 | \caption{Comparison of true and estimated values of $\pi_0$. When the UA holds all methods yield conservative (over-)estimates for $\pi_0$, with \ashr being least conservative, and hence most accurate. When the UA does not hold (``bimodal" scenario) the \ashr estimates are slightly anti-conservative.} \label{fig:pi0sims} 19 | \end{subfigure} 20 | \begin{subfigure}{\textwidth} 21 | \includegraphics[width=\textwidth]{../analysis/figure/plot_lfsr.Rmd/plot_lfdr-1.pdf} 22 | \caption{Comparison of true and estimated $\lfdr$ from \ashr (ash.n). Black line is $y=x$ and red line is $y=2x$. Estimates of $\lfdr$ are conservative when UA holds, due to conservative estimates of $\pi_0$.} \label{fig:lfdr} 23 | \end{subfigure} 24 | \begin{subfigure}{\textwidth} 25 | \includegraphics[width=\textwidth]{../analysis/figure/plot_lfsr.Rmd/plot_lfsr-1.pdf} 26 | \caption{As in c), but for $\lfsr$ instead of $\lfdr$. Estimates of $\lfsr$ are consistently less conservative than $\lfdr$ when UA holds, and also less anti-conservative in bimodal scenario.} \label{fig:lfsr} 27 | \end{subfigure} 28 | \end{center} 29 | \caption{Results of simulation studies (constant precision $s_j=1$).} 30 | \end{figure} 31 | \efloatseparator 32 | 33 | \begin{figure}[h!] 34 | \begin{subfigure}{\textwidth} 35 | \includegraphics[height=2in]{../analysis/figure/plot_cdf_eg.Rmd/egcdf-1.pdf} 36 | \caption{Example estimated cdfs for single data sets compared with truth. The unimodal assumption made by the ash methods effectively regularizes estimates compared with \mixfdr.} 37 | \end{subfigure} 38 | \begin{subfigure}{\textwidth} 39 | \includegraphics[height=2in]{../analysis/figure/plot_cdf_eg.Rmd/mean_cdf-1.pdf} 40 | \caption{Average estimated cdfs across $\sim10$ data sets compared with truth; methods here use penalty (\ref{eqn:penalty}) so $\pi_0$ is systematically overestimated.} 41 | \end{subfigure} 42 | \begin{subfigure}{\textwidth} 43 | \includegraphics[height=2in]{../analysis/figure/plot_cdf_eg.Rmd/mean_cdf_nopen-1.pdf} 44 | \caption{Average estimated cdfs across $\sim10$ data sets compared with truth; methods here do not use penalty (\ref{eqn:penalty}) so $\pi_0$ is not systematically overestimated. Systematic differences from the truth in ``skew" and ``bimodal" scenarios highlight the effects of model mis-specification.} 45 | \end{subfigure} 46 | \caption{Comparisons of estimated cdfs of $g$ and true cdf of $g$. See Figure \ref{fig:pi0sims} for simulation scenarios.} \label{fig:egcdf} 47 | \end{figure} 48 | \efloatseparator 49 | 50 | \begin{figure} 51 | \begin{subfigure}{\textwidth} 52 | \centering\includegraphics[width=4in]{../analysis/figure/make_GOODPOOR_figs.Rmd/GOODPOOReg_hist-1.pdf} 53 | \label{fig:goodpoor-hist} 54 | \caption{Density histograms of $p$ values for good-precision, poor-precision, and combined observations} 55 | \end{subfigure} 56 | \begin{subfigure}{\textwidth} 57 | \centering\includegraphics[width=4in]{../analysis/figure/make_GOODPOOR_figs.Rmd/GOODPOOReg_scatter-1.pdf} 58 | \caption{Comparison of results of different methods applied to good-precision observations only ($x$ axis) and combined data ($y$ axis). Each point shows the ``significance" ($q$ values from \qvalue; $\lfdr$ for \locfdr; $\lfsr$ for \ashr) of a good-precision observation under the two different analyses.} \label{fig:goodpoor-scatter} 59 | \end{subfigure} 60 | \caption{Simulation illustrating how, for existing FDR methods, 61 | poor-precision observations can contaminate signal from good-precision observations. The top panel (a) illustrates that when 62 | $p$ values from good-precision observations (left) and from poor-precision observations (center) are combined (right), they produce 63 | a distribution of $p$ values with less overall signal - and so, by conventional methods, will give a higher estimated FDR at any given threshold. 64 | The bottom panel (b) illustrates this behavior directly for the methods \qvalue and \locfdr: the $q$-values from \qvalue and the $\lfdr$ estimates from \locfdr are higher when applied to all data than when applied to good-precision observations only. In contrast the methods described here (\ashr) produce effectively the same results (here, the lfsr) in the good-precision and combined data analyses.} \label{fig:goodpoor} 65 | \end{figure} 66 | \efloatseparator 67 | 68 | \begin{figure} 69 | \centering\includegraphics[width=4in]{../analysis/figure/make_GOODPOOR_figs.Rmd/lfsr_vs_pval_GOODPOOR-1.pdf} 70 | \caption{Figure illustrating affects of prior assumptions on re-ordering of significance. Left panel shows results under our ``default prior" which assumes that effects $\beta_j$ are identically distributed, independent of $s_j$. Right panel shows results under the ``$p$-value prior", which assumes that $z$ scores $\beta_j/s_j$ are identically distributed, independent of $s_j$.} \label{fig:lfsr_pval} 71 | \end{figure} 72 | \efloatseparator 73 | 74 | \begin{figure} 75 | \begin{center} 76 | \begin{subfigure}{\textwidth} 77 | \includegraphics[width=\textwidth]{../analysis/figure/plot_lfsr.Rmd/plot_lfsr_s_nn-1.pdf} 78 | \caption{Comparison of true and estimated $\lfsr$ when data are simulated with no point mass at zero ($\pi_0=0$), and also analyzed by \ashr with no point mass on 0 (and mixture of normal components for $g$). Black line is $y=x$ and red line is $y=2x$. The results illustrate how estimates of $\lfsr$ can be more accurate in this case. That is, assuming there is no point mass can be beneficial if that is indeed true.} \label{fig:lfsr-nn} 79 | \end{subfigure} 80 | \begin{subfigure}{\textwidth} 81 | \includegraphics[width=\textwidth]{../analysis/figure/plot_lfsr.Rmd/plot_lfsr_s-1.pdf} 82 | \caption{Comparison of true and estimated $\lfsr$ when data are simulated with point mass at zero (drawn uniformly from [0,1] in each simulation), but analyzed by \ashr with no point mass on 0 (and mixture of normal components for $g$). Black line is $y=x$ and red line is $y=2x$. The results illustrate how estimates of $\lfsr$ can be anti-conservative if we assume there is no point mass when the truth is that there is a point mass.} \label{fig:lfsr-s} 83 | \end{subfigure} 84 | \end{center} 85 | \caption{Illustration of effects of excluding a point mass from the analysis.} \label{fig:lfsr-nopointmass} 86 | \end{figure} 87 | \efloatseparator 88 | 89 | -------------------------------------------------------------------------------- /paper/main.lof: -------------------------------------------------------------------------------- 1 | \contentsline {figure}{\numberline {1}{\ignorespaces Illustration that the unimodal assumption (UA) in {\tt ashr}\xspace can produce very different results from existing methods. The figure shows, for a single simulated dataset, the way different methods decompose $p$ values (left) and $z$ scores (right) into a null component (dark blue) and an alternative component (cyan). In the $z$ score space the alternative distribution is placed on the bottom to highlight the differences in its shape among methods. The three existing methods ({\tt qvalue}\xspace , {\tt locfdr}\xspace , {\tt mixfdr}\xspace ) all effectively make the Zero Assumption, which results in a ``hole" in the alternative $z$ score distribution around 0. In contrast the method introduced here ({\tt ashr}\xspace ) makes the Unimodal Assumption -- that the effect sizes, and thus the $z$ scores, have a unimodal distribution about 0 -- which yields a very different decomposition. (In this case the {\tt ashr}\xspace decomposition is closer to the truth: the data were simulated under a model where all of the effects are non-zero, so the ``true" decomposition would make everything cyan.)\relax }}{38} 2 | \contentsline {figure}{\numberline {2}{\ignorespaces Results of simulation studies (constant precision $s_j=1$).\relax }}{39} 3 | \contentsline {figure}{\numberline {3}{\ignorespaces Comparisons of estimated cdfs of $g$ and true cdf of $g$. See Figure 2b\hbox {} for simulation scenarios.\relax }}{40} 4 | \contentsline {figure}{\numberline {4}{\ignorespaces Simulation illustrating how, for existing FDR methods, poor-precision observations can contaminate signal from good-precision observations. The top panel (a) illustrates that when $p$ values from good-precision observations (left) and from poor-precision observations (center) are combined (right), they produce a distribution of $p$ values with less overall signal - and so, by conventional methods, will give a higher estimated FDR at any given threshold. The bottom panel (b) illustrates this behavior directly for the methods {\tt qvalue}\xspace and {\tt locfdr}\xspace : the $q$-values from {\tt qvalue}\xspace and the $\textit {lfdr}$ estimates from {\tt locfdr}\xspace are higher when applied to all data than when applied to good-precision observations only. In contrast the methods described here ({\tt ashr}\xspace ) produce effectively the same results (here, the lfsr) in the good-precision and combined data analyses.\relax }}{41} 5 | \contentsline {figure}{\numberline {5}{\ignorespaces Figure illustrating affects of prior assumptions on re-ordering of significance. Left panel shows results under our ``default prior" which assumes that effects $\beta _j$ are identically distributed, independent of $s_j$. Right panel shows results under the ``$p$-value prior", which assumes that $z$ scores $\beta _j/s_j$ are identically distributed, independent of $s_j$.\relax }}{42} 6 | \contentsline {figure}{\numberline {6}{\ignorespaces Illustration of effects of excluding a point mass from the analysis.\relax }}{43} 7 | -------------------------------------------------------------------------------- /paper/main.lot: -------------------------------------------------------------------------------- 1 | \contentsline {table}{\numberline {1}{\ignorespaces Table of empirical coverage for nominal 95\% lower credible bounds\relax }}{45} 2 | \contentsline {table}{\numberline {2}{\ignorespaces Summary of simulation scenarios considered\relax }}{46} 3 | \contentsline {table}{\numberline {3}{\ignorespaces Table of empirical coverage for nominal 95\% lower credible bounds for methods {\it without} the penalty term). \relax }}{47} 4 | -------------------------------------------------------------------------------- /paper/main.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenslab/ash/d06047e47fd78922e47b183a380dc0581db571bc/paper/main.pdf -------------------------------------------------------------------------------- /paper/main.ttt: -------------------------------------------------------------------------------- 1 | \begin{table}[!ht] 2 | \begin{subtable}{\textwidth} 3 | \centering\input{../analysis/table/scoverage_all.tex} 4 | \caption{All observations. Coverage rates are generally satisfactory, except for the extreme ``spiky" scenario. This is due to the penalty term (\ref{eqn:penalty}) which tends to cause over-shrinking towards zero. Removing this penalty term produces coverage rates closer to the nominal levels for uniform and normal methods (Table \ref{tab:nopen}). Removing the penalty in the half-uniform case is not recommended (see Appendix).} 5 | \end{subtable} 6 | 7 | \begin{subtable}{\textwidth} 8 | \centering\input{../analysis/table/scoverage_neg.tex} 9 | \caption{``Significant" negative discoveries. Coverage rates are generally satisfactory, except for the uniform-based methods in the spiky and near-normal scenarios, 10 | and the normal-based method in the flat-top scenario. These results likely reflect inaccurate estimates of the tails of $g$ due to a disconnect between the tail of $g$ and the component distributions in these cases. For example, the uniform methods sometimes substantially underestimate the length of the tail of $g$ in these long-tailed scenarios, 11 | causing over-shrinkage of the tail toward 0.} 12 | \end{subtable} 13 | 14 | \begin{subtable}{\textwidth} 15 | \centering\input{../analysis/table/scoverage_pos.tex} 16 | \caption{``Significant" positive discoveries. Coverage rates are generally satisfactory, except for the symmetric methods under the asymmetric (``skew") scenario. } 17 | \end{subtable} 18 | 19 | \caption{Table of empirical coverage for nominal 95\% lower credible bounds} \label{tab:coverage} 20 | \end{table} 21 | \efloatseparator 22 | 23 | \begin{table}[!ht] 24 | \centering\begin{tabular}{c c } \toprule 25 | Scenario & Alternative distribution, $g_1$ \\ \midrule 26 | spiky & $0.4 N(0,0.25^2) + 0.2 N(0,0.5^2) + 0.2 N(0,1^2), 0.2 N(0,2^2) $\\ 27 | near normal & $2/3 N(0,1^2) + 1/3 N(0,2^2)$ \\ 28 | flattop& $(1/7) [N(-1.5,.5^2) + N(-1,.5^2) + N(-.5,.5^2) +$ \\ 29 | & $N(0,.5^2) +N(0.5,.5^2) +N(1.0,.5^2) + N(1.5,.5^2)]$ \\ 30 | skew & $(1/4) N(-2,2^2) + (1/4) N(-1,1.5^2) + (1/3) N(0,1^2) + (1/6) N(1,1^2) $\\ 31 | big-normal & $N(0,4^2)$ \\ 32 | bimodal & $0.5 N(-2,1^2) + 0.5 N(2,1^2)$ \\ \bottomrule 33 | \end{tabular} 34 | \caption{Summary of simulation scenarios considered} \label{table:scenarios} 35 | \end{table} 36 | \efloatseparator 37 | 38 | \begin{table}[!ht] 39 | \begin{subtable}{\textwidth} 40 | \centering\input{../analysis/table/scoverage_all_nopen.tex} 41 | \caption{All observations} 42 | \end{subtable} 43 | 44 | \begin{subtable}{\textwidth} 45 | \centering\input{../analysis/table/scoverage_neg_nopen.tex} 46 | \caption{``Significant" negative discoveries.} 47 | \end{subtable} 48 | 49 | \begin{subtable}{\textwidth} 50 | \centering\input{../analysis/table/scoverage_pos_nopen.tex} 51 | \caption{``Significant" positive discoveries.} 52 | \end{subtable} 53 | 54 | \caption{Table of empirical coverage for nominal 95\% lower credible bounds for methods {\it without} the penalty term). } \label{tab:nopen} 55 | \end{table} 56 | \efloatseparator 57 | 58 | -------------------------------------------------------------------------------- /talks/README.md: -------------------------------------------------------------------------------- 1 | Diretory for talks 2 | --------------------------------------------------------------------------------