├── figs
    ├── convex.pdf
    ├── ciBivar.pdf
    ├── fullvred.pdf
    ├── fig_lasso1.pdf
    ├── fig_lasso1.png
    ├── new_convex.pdf
    ├── umpuUnivar.pdf
    ├── fullvredNulls.pdf
    ├── tradeoff1_will.pdf
    ├── tradeoff2_will.pdf
    ├── informationBivar.pdf
    ├── informationUnivar.pdf
    ├── convex.pdf_tex
    ├── convex.pdf_tex_edit
    └── convex.svg
├── .gitignore
├── code
    ├── twodim_example
    │   ├── Rplot.R
    │   ├── equal_tailed_lengths.csv
    │   ├── power.py
    │   ├── interval.py
    │   └── interval_umau.py
    ├── lasso_example
    │   ├── tables
    │   │   ├── T5.tex
    │   │   └── gaussian.tex
    │   ├── df_5
    │   │   ├── makeRplots.r
    │   │   └── summary.csv
    │   ├── gaussian
    │   │   ├── makeRplots.r
    │   │   └── summary.csv
    │   ├── summary_table.csv
    │   ├── tables.py
    │   ├── data_carving.py
    │   └── batch.py
    └── misc_plots
    │   ├── constants.py
    │   ├── twodim_regression.r
    │   ├── fisherInfo.r
    │   ├── full_data.py
    │   ├── figs_onesparse.py
    │   ├── figs_lasso.py
    │   ├── umpuUnivar.r
    │   └── sample_splitting.py
├── README.md
├── mycommands.sty
└── biblio.bib


/figs/convex.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfithian/optimal-inference/HEAD/figs/convex.pdf


--------------------------------------------------------------------------------
/figs/ciBivar.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfithian/optimal-inference/HEAD/figs/ciBivar.pdf


--------------------------------------------------------------------------------
/figs/fullvred.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfithian/optimal-inference/HEAD/figs/fullvred.pdf


--------------------------------------------------------------------------------
/figs/fig_lasso1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfithian/optimal-inference/HEAD/figs/fig_lasso1.pdf


--------------------------------------------------------------------------------
/figs/fig_lasso1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfithian/optimal-inference/HEAD/figs/fig_lasso1.png


--------------------------------------------------------------------------------
/figs/new_convex.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfithian/optimal-inference/HEAD/figs/new_convex.pdf


--------------------------------------------------------------------------------
/figs/umpuUnivar.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfithian/optimal-inference/HEAD/figs/umpuUnivar.pdf


--------------------------------------------------------------------------------
/figs/fullvredNulls.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfithian/optimal-inference/HEAD/figs/fullvredNulls.pdf


--------------------------------------------------------------------------------
/figs/tradeoff1_will.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfithian/optimal-inference/HEAD/figs/tradeoff1_will.pdf


--------------------------------------------------------------------------------
/figs/tradeoff2_will.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfithian/optimal-inference/HEAD/figs/tradeoff2_will.pdf


--------------------------------------------------------------------------------
/figs/informationBivar.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfithian/optimal-inference/HEAD/figs/informationBivar.pdf


--------------------------------------------------------------------------------
/figs/informationUnivar.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfithian/optimal-inference/HEAD/figs/informationUnivar.pdf


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | lasso_example/*npz
 2 | twodim_example/*npz
 3 | ignore/*
 4 | *.pdf
 5 | *.aux
 6 | *.log
 7 | *.bbl
 8 | *.blg
 9 | *.nav
10 | *.out
11 | *.snm
12 | *.toc
13 | *~
14 | .DS_Store
15 | .Rhistory


--------------------------------------------------------------------------------
/code/twodim_example/Rplot.R:
--------------------------------------------------------------------------------
 1 | D = read.table('equal_tailed_lengths.csv', sep=',')
 2 | mu.vals = D[,1]
 3 | ci.length = D[,2]
 4 | 
 5 | pdf("ciBivar.pdf",height=4,width=4)
 6 | par(mar=c(4.1,4.1,3.1,1.1))
 7 | plot(mu.vals,ci.length,type="l",ylim=c(0,4.5),
 8 |      xlab=expression(mu),
 9 |      ylab="Interval Length",
10 |      main="Expected CI Length",
11 |      yaxt="n")
12 | axis(2,0:4)
13 | abline(2*qnorm(0.975),0,lty=2)
14 | legend("bottomleft",legend=c("Data Splitting","Data Carving"),lty=2:1,bty="n")
15 | dev.off()


--------------------------------------------------------------------------------
/code/lasso_example/tables/T5.tex:
--------------------------------------------------------------------------------
 1 | \begin{tabular}{|l|c|c|c|c|c|c|c|}
 2 | \hline
 3 |             Algorithm &  $p_{\text{screen}}$ &  $\mathbb{E}[V]$ &  $\mathbb{E}[R-V]$ &  FDR &  Power &  Level \\
 4 | \hline
 5 |  $\text{Carve}_{100}$ &                 0.97 &             8.11 &               6.97 & 0.54 &   0.80 &   0.04 \\
 6 |   $\text{Split}_{50}$ &                 0.09 &             9.20 &               4.77 & 0.66 &   0.93 &   0.05 \\
 7 |   $\text{Carve}_{50}$ &                 0.09 &             9.20 &               4.77 & 0.66 &   0.99 &   0.06 \\
 8 | \hline
 9 | \end{tabular}
10 | 


--------------------------------------------------------------------------------
/code/misc_plots/constants.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from selection.covtest import covtest
 3 | 
 4 | def parameters(n, rho, pos):
 5 |     """
 6 |     design matrix and mu vector
 7 |     """
 8 | 
 9 |     cov = ((1-rho) * np.identity(n) + rho * np.ones((n,n)))
10 |     X = np.linalg.cholesky(cov).T
11 |     beta = np.zeros(n)
12 |     beta[pos] = 1.
13 |     mu = np.dot(X, beta)
14 |     return X, mu, beta
15 | 
16 | def constraints(X, pos):
17 |     n, p = X.shape
18 |     while True:
19 |         Y = np.random.standard_normal(n)
20 |         con, _, idx, sign = covtest(X, Y, sigma=1)
21 |         if idx == pos and sign == +1:
22 |             initial = Y.copy()
23 |             break
24 |     return con, initial
25 | 


--------------------------------------------------------------------------------
/code/lasso_example/tables/gaussian.tex:
--------------------------------------------------------------------------------
 1 | \begin{tabular}{|l|c|c|c|c|c|c|c|}
 2 | \hline
 3 |             Algorithm &  $p_{\text{screen}}$ &  $\mathbb{E}[V]$ &  $\mathbb{E}[R-V]$ &  FDR &  Power &  Level \\
 4 | \hline
 5 |  $\text{Carve}_{100}$ &                 0.99 &             8.13 &               6.99 & 0.54 &   0.80 &   0.05 \\
 6 |   $\text{Split}_{50}$ &                 0.09 &             9.13 &               4.74 & 0.66 &   0.93 &   0.06 \\
 7 |   $\text{Carve}_{50}$ &                 0.09 &             9.13 &               4.74 & 0.66 &   0.99 &   0.06 \\
 8 |   $\text{Split}_{75}$ &                 0.68 &             9.24 &               6.59 & 0.58 &   0.47 &   0.05 \\
 9 |   $\text{Carve}_{75}$ &                 0.68 &             9.24 &               6.59 & 0.58 &   0.97 &   0.06 \\
10 | \hline
11 | \end{tabular}
12 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Optimal Inference After Model Selection
 2 | ==========
 3 | 
 4 | The main files in the top-level directory are `paper.tex` (the LaTeX source), `biblio.bib` (bibliography), and `mycommands.sty` (LaTeX style file).
 5 | 
 6 | Figures directory
 7 | ------
 8 | 
 9 | Figures appearing in paper are in `figs/`.
10 | 
11 | Code directory
12 | ------
13 | 
14 | The `code/` directory contains source code for generating figures and running simulations. Each figure in `paper.tex` is accompanied by a comment giving a path to the file that generated it. There are three subdirectories:
15 | 
16 | - `code/lasso_example/`: Source code for the simulation in Section 7.
17 | 
18 | - `code/twodim_example/`: Source code for generating the expected confidence interval lengths for data splitting and data carving in Figure 4b.
19 | 
20 | - `code/misc_plots/`: Source code for generating other plots interspersed throughout the paper.
21 | 


--------------------------------------------------------------------------------
/code/lasso_example/df_5/makeRplots.r:
--------------------------------------------------------------------------------
 1 | 
 2 | data <- read.table("summary.csv",sep=",",header=T)
 3 | 
 4 | pdf("tradeoff1_will.pdf",height=3.5,width=3.5)
 5 | par(yaxs="i",mar=c(4.1,4.1,0.7,0.2))
 6 | matplot(100*data$split,data[,c(6,5,4)],type="l",
 7 |         main="",xlab="# data points used for selection",ylab="Probability",
 8 |         lty=3:1,col=1,xlim=c(0,100),ylim=c(0,1))
 9 | legend(-7,.85,legend=c("Screening","Power, Carving","Power, Splitting"),bty="n",lty=c(3,1,2),
10 |        cex=1)
11 | dev.off()
12 | 
13 | pdf("tradeoff2_will.pdf",height=3.5,width=3.5)
14 | par(yaxs="i",mar=c(4.1,4.1,0.7,0.2))
15 | matplot(100*data$split,data[,6]*data[,c(5,4)],type="l",
16 |         main="",xlab="# data points used for selection",ylab="Screening x Power",
17 |         lty=2:1,col=1,xlim=c(0,100),ylim=c(0,1))
18 | legend("topleft",legend=c("Data Carving","Data Splitting"),bty="n",lty=c(1,2),
19 |        cex=1)
20 | dev.off()
21 | 


--------------------------------------------------------------------------------
/code/lasso_example/gaussian/makeRplots.r:
--------------------------------------------------------------------------------
 1 | 
 2 | data <- read.table("summary.csv",sep=",",header=T)
 3 | 
 4 | pdf("tradeoff1_will.pdf",height=3.5,width=3.5)
 5 | par(yaxs="i",mar=c(4.1,4.1,0.7,0.2))
 6 | matplot(100*data$split,data[,c(6,5,4)],type="l",
 7 |         main="",xlab="# data points used for selection",ylab="Probability",
 8 |         lty=3:1,col=1,xlim=c(0,100),ylim=c(0,1))
 9 | legend(-7,.85,legend=c("Screening","Power, Carving","Power, Splitting"),bty="n",lty=c(3,1,2),
10 |        cex=1)
11 | dev.off()
12 | 
13 | pdf("tradeoff2_will.pdf",height=3.5,width=3.5)
14 | par(yaxs="i",mar=c(4.1,4.1,0.7,0.2))
15 | matplot(100*data$split,data[,6]*data[,c(5,4)],type="l",
16 |         main="",xlab="# data points used for selection",ylab="Screening x Power",
17 |         lty=2:1,col=1,xlim=c(0,100),ylim=c(0,1))
18 | legend("topleft",legend=c("Data Carving","Data Splitting"),bty="n",lty=c(1,2),
19 |        cex=1)
20 | dev.off()
21 | 


--------------------------------------------------------------------------------
/code/twodim_example/equal_tailed_lengths.csv:
--------------------------------------------------------------------------------
 1 | -2.000000000000000000e+00,3.886672609909204823e+00
 2 | -1.500000000000000000e+00,3.865518890337183322e+00
 3 | -1.000000000000000000e+00,3.859897900638205925e+00
 4 | -5.000000000000000000e-01,3.836323840078153857e+00
 5 | 0.000000000000000000e+00,3.807789039872688797e+00
 6 | 5.000000000000000000e-01,3.760151454429654816e+00
 7 | 1.000000000000000000e+00,3.719816528539057643e+00
 8 | 1.500000000000000000e+00,3.643873767977436451e+00
 9 | 2.000000000000000000e+00,3.580979711553412770e+00
10 | 2.500000000000000000e+00,3.481963516900840716e+00
11 | 3.000000000000000000e+00,3.361485211461119782e+00
12 | 3.500000000000000000e+00,3.244551657846729498e+00
13 | 4.000000000000000000e+00,3.126008283667582432e+00
14 | 4.500000000000000000e+00,3.020252938780552121e+00
15 | 5.000000000000000000e+00,2.931887609951345119e+00
16 | 5.500000000000000000e+00,2.870217558545478997e+00
17 | 6.000000000000000000e+00,2.824779221760226111e+00
18 | 6.500000000000000000e+00,2.802637149836213482e+00
19 | 7.000000000000000000e+00,2.801438879751963817e+00
20 | 7.500000000000000000e+00,2.797096152183685902e+00
21 | 8.000000000000000000e+00,2.789429304359240636e+00
22 | 8.500000000000000000e+00,2.793058743261815025e+00
23 | 9.000000000000000000e+00,2.796901044622824628e+00
24 | 


--------------------------------------------------------------------------------
/code/twodim_example/power.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | from selection import affine 
 5 | from selection.discrete_family import discrete_family
 6 | from scipy.stats import norm as ndist
 7 | 
 8 | cutoff = ndist.ppf(0.95)
 9 | 
10 | null_constraint = affine.constraints(np.array([[-1,0.]]), np.array([-cutoff]))
11 | null_sample = affine.sample_from_constraints(null_constraint, np.array([4,2.]),
12 |                                              ndraw=100000).sum(1)
13 | null_dbn = discrete_family(null_sample, np.ones_like(null_sample))
14 | 
15 | def power(mu, ndraw=100000, keep_every=100):
16 |     constraint = affine.constraints(np.array([[-1,0.]]), np.array([-cutoff]))
17 |     constraint.mean = np.array([mu,mu])
18 |     sample = affine.sample_from_constraints(constraint, np.array([4,2.]),
19 |                                             ndraw=ndraw)[::keep_every]
20 |     print sample.mean(0)
21 |     sample = sample.sum(1)
22 |     decisions = []
23 |     for s in sample:
24 |         decisions.append(null_dbn.one_sided_test(0, s, alternative='greater'))
25 |     print np.mean(decisions)
26 |     return np.mean(decisions)
27 | 
28 | if not os.path.exists('power_curve.npy'):
29 |     muvals = np.linspace(0, 5, 21)
30 |     P = [power(mu, ndraw=100000, keep_every=25) for mu in muvals]
31 |     np.save('power_curve.npy', np.vstack([muvals, P]))
32 | else:
33 |     muvals, P = np.load('power_curve.npy')
34 | 
35 | plt.clf()
36 | plt.plot(muvals, P, 'k', linewidth=2, label='Selective $z$ test')
37 | plt.plot(muvals, [ndist.sf(ndist.ppf(0.95) - mu) for mu in muvals],
38 |          c='red', label='Sample splitting', linewidth=2)
39 | ax = plt.gca()
40 | ax.set_xlabel(r'$\mu$', fontsize=20)
41 | ax.set_ylabel(r'Power($\mu$)', fontsize=20)
42 | ax.legend(loc='lower right')
43 | f = plt.gcf()
44 | f.savefig('figure_c.pdf')
45 | 


--------------------------------------------------------------------------------
/figs/convex.pdf_tex:
--------------------------------------------------------------------------------
 1 | %% Creator: Inkscape inkscape 0.48.2, www.inkscape.org
 2 | %% PDF/EPS/PS + LaTeX output extension by Johan Engelen, 2010
 3 | %% Accompanies image file 'convex.pdf' (pdf, eps, ps)
 4 | %%
 5 | %% To include the image in your LaTeX document, write
 6 | %%   \input{<filename>.pdf_tex}
 7 | %%  instead of
 8 | %%   \includegraphics{<filename>.pdf}
 9 | %% To scale the image, write
10 | %%   \def\svgwidth{<desired width>}
11 | %%   \input{<filename>.pdf_tex}
12 | %%  instead of
13 | %%   \includegraphics[width=<desired width>]{<filename>.pdf}
14 | %%
15 | %% Images with a different path to the parent latex file can
16 | %% be accessed with the `import' package (which may need to be
17 | %% installed) using
18 | %%   \usepackage{import}
19 | %% in the preamble, and then including the image with
20 | %%   \import{<path to file>}{<filename>.pdf_tex}
21 | %% Alternatively, one can specify
22 | %%   \graphicspath{{<path to file>/}}
23 | %% 
24 | %% For more information, please see info/svg-inkscape on CTAN:
25 | %%   http://tug.ctan.org/tex-archive/info/svg-inkscape
26 | %%
27 | \begingroup%
28 |   \makeatletter%
29 |   \providecommand\color[2][]{%
30 |     \errmessage{(Inkscape) Color is used for the text in Inkscape, but the package 'color.sty' is not loaded}%
31 |     \renewcommand\color[2][]{}%
32 |   }%
33 |   \providecommand\transparent[1]{%
34 |     \errmessage{(Inkscape) Transparency is used (non-zero) for the text in Inkscape, but the package 'transparent.sty' is not loaded}%
35 |     \renewcommand\transparent[1]{}%
36 |   }%
37 |   \providecommand\rotatebox[2]{#2}%
38 |   \ifx\svgwidth\undefined%
39 |     \setlength{\unitlength}{400bp}%
40 |     \ifx\svgscale\undefined%
41 |       \relax%
42 |     \else%
43 |       \setlength{\unitlength}{\unitlength * \real{\svgscale}}%
44 |     \fi%
45 |   \else%
46 |     \setlength{\unitlength}{\svgwidth}%
47 |   \fi%
48 |   \global\let\svgwidth\undefined%
49 |   \global\let\svgscale\undefined%
50 |   \makeatother%
51 |   \begin{picture}(1,1.02)%
52 |     \put(0,0){\includegraphics[width=\unitlength]{convex.pdf}}%
53 |     \put(0.14066225,0.13805743){\color[rgb]{0,0,0}\makebox(0,0)[lb]{\smash{EVENT}}}%
54 |   \end{picture}%
55 | \endgroup%
56 | 


--------------------------------------------------------------------------------
/figs/convex.pdf_tex_edit:
--------------------------------------------------------------------------------
 1 | %% Creator: Inkscape inkscape 0.48.2, www.inkscape.org
 2 | %% PDF/EPS/PS + LaTeX output extension by Johan Engelen, 2010
 3 | %% Accompanies image file 'convex.pdf' (pdf, eps, ps)
 4 | %%
 5 | %% To include the image in your LaTeX document, write
 6 | %%   \input{<filename>.pdf_tex}
 7 | %%  instead of
 8 | %%   \includegraphics{<filename>.pdf}
 9 | %% To scale the image, write
10 | %%   \def\svgwidth{<desired width>}
11 | %%   \input{<filename>.pdf_tex}
12 | %%  instead of
13 | %%   \includegraphics[width=<desired width>]{<filename>.pdf}
14 | %%
15 | %% Images with a different path to the parent latex file can
16 | %% be accessed with the `import' package (which may need to be
17 | %% installed) using
18 | %%   \usepackage{import}
19 | %% in the preamble, and then including the image with
20 | %%   \import{<path to file>}{<filename>.pdf_tex}
21 | %% Alternatively, one can specify
22 | %%   \graphicspath{{<path to file>/}}
23 | %% 
24 | %% For more information, please see info/svg-inkscape on CTAN:
25 | %%   http://tug.ctan.org/tex-archive/info/svg-inkscape
26 | %%
27 | \begingroup%
28 |   \makeatletter%
29 |   \providecommand\color[2][]{%
30 |     \errmessage{(Inkscape) Color is used for the text in Inkscape, but the package 'color.sty' is not loaded}%
31 |     \renewcommand\color[2][]{}%
32 |   }%
33 |   \providecommand\transparent[1]{%
34 |     \errmessage{(Inkscape) Transparency is used (non-zero) for the text in Inkscape, but the package 'transparent.sty' is not loaded}%
35 |     \renewcommand\transparent[1]{}%
36 |   }%
37 |   \providecommand\rotatebox[2]{#2}%
38 |   \ifx\svgwidth\undefined%
39 |     \setlength{\unitlength}{400bp}%
40 |     \ifx\svgscale\undefined%
41 |       \relax%
42 |     \else%
43 |       \setlength{\unitlength}{\unitlength * \real{\svgscale}}%
44 |     \fi%
45 |   \else%
46 |     \setlength{\unitlength}{\svgwidth}%
47 |   \fi%
48 |   \global\let\svgwidth\undefined%
49 |   \global\let\svgscale\undefined%
50 |   \makeatother%
51 |   \begin{picture}(1,1.02)%
52 |     \put(0,0){\includegraphics[width=\unitlength]{figs/convex.pdf}}%
53 |     \put(0.14066225,0.13805743){\color[rgb]{0,0,0}\makebox(0,0)[lb]{\smash{}}}%
54 |   \end{picture}%
55 | \endgroup%
56 | 


--------------------------------------------------------------------------------
/code/misc_plots/twodim_regression.r:
--------------------------------------------------------------------------------
 1 | 
 2 | # Fig 5a
 3 | pdf("fullvred.pdf",height=4.3,width=4)
 4 | par(mar=c(4.1,4.1,3.1,0.1))
 5 | y <- t(c(2.9,2.5))
 6 | plot(y,xlim=c(-5,5),ylim=c(-5,5),xlab=expression(Y[1]),ylab=expression(Y[2]),
 7 |      main="Conditioning Sets")#"Full vs. Reduced Model: First Step",asp=1)
 8 | polygon(c(0,10,10),c(0,10,-10),lty=2,col="#F4E918")
 9 | polygon(c(0,-10,-10),c(0,10,-10),lty=2,col="#F4E918")
10 | abline(h=0)
11 | abline(v=0)
12 | text(2,.5,"A")
13 | text(y+c(.3,-.4),labels="Y")
14 | lines(c(y[2],10),c(y[2],y[2]),lwd=2,col="brown")
15 | lines(c(-y[2],-10),c(y[2],y[2]),lwd=2,col="brown")
16 | points(y,pch=16)
17 | dev.off()
18 | 
19 | unnorm.cond.dens <- function(x) (1-2*pnorm(-abs(x)))*dnorm(x) #p(|Y2| < |Y1| = x)
20 | cond.dens <- function(x) unnorm.cond.dens(x) / 2 / integrate(function(u) unnorm.cond.dens(u),0,10)$value
21 | 
22 | # Fig 5b
23 | pdf("fullvredNulls.pdf",height=4.3,width=4)
24 | par(mar=c(4.1,4.1,3.1,0.1),yaxs="i")
25 | x <- seq(-6,6,.01)
26 | plot(x,(abs(x)>2.5)*dnorm(x)/2/pnorm(-2.5),ylim=c(0,1.4),lty=1,
27 |       col="brown",type="l",
28 |       main="Conditional Null Distributions",
29 |       ylab="Density",xlab=expression(Y[1]))
30 | polygon(c(x,0),c(cond.dens(x),0),lty=2,col="#F4E918")
31 | lines(x,(abs(x)>2.5)*dnorm(x)/2/pnorm(-2.5),col="brown")
32 | legend("topleft",legend=c("Saturated Model","Selected Model","Observed Value"),lty=1:3,bg="white", col=c("brown","black","black"))
33 | #norm.y <- sqrt(sum(y^2))
34 | #curve((abs(x)>2.5)*dbeta((x/norm.y)^2,.5,.5)*abs(x/norm.y)/norm.y/2,-norm.y,norm.y,add=T)
35 | abline(v=2.9,lty=3)
36 | dev.off()
37 | 
38 | # p-values for selected and saturated models
39 | 2*integrate(function(x) cond.dens(x), 2.9,10)$value
40 | pnorm(-2.9)/pnorm(-2.5)
41 | 
42 | B <- 10000
43 | mu <- c(5,5)
44 | pvals <- NULL
45 | for(b in 1:B) {
46 |     y <- mu + rnorm(2)
47 |     if(abs(y[1]) > abs(y[2])) {
48 |         pvals <- rbind(pvals, c(
49 |             2*integrate(function(x) cond.dens(x), abs(y[1]),10)$value,
50 |             pnorm(-abs(y[1]))/pnorm(-abs(y[2]))
51 |             ))
52 |     }
53 | }
54 | mean(pvals[,1]<.05)
55 | mean(pvals[,2]<.05)
56 | 
57 | #hist(cos(2*pi*runif(1000000)),freq=F,breaks=seq(-1,1,.025))
58 | #curve(dbeta(x^2,.5,.5)*abs(x),-1,1,add=T)
59 | 
60 | pdf("fullvredXty.pdf",height=4.3,width=4)
61 | par(mar=c(4.1,4.1,2.1,0.1))
62 | y <- t(c(2.9,2.5))
63 | plot(y,xlim=c(-5,5),ylim=c(-5,5),
64 |      xlab=expression(paste(X[1],"' Y",sep="")),
65 |      ylab=expression(paste(X[2],"' Y",sep="")),
66 |      main="Full vs. Reduced Model: First Step",asp=1)
67 | polygon(c(0,10,10),c(0,10,-10),lty=2,col="#F0F0FF")
68 | polygon(c(0,-10,-10),c(0,10,-10),lty=2,col="#F0F0FF")
69 | abline(h=0)
70 | abline(v=0)
71 | text(2,.5,"A")
72 | text(y+c(.3,-.4),labels="X'Y")
73 | lines(c(y[2],10),c(y[2],y[2]),lwd=2,col="blue")
74 | lines(c(-y[2],-10),c(y[2],y[2]),lwd=2,col="blue")
75 | points(y)
76 | dev.off()
77 | 


--------------------------------------------------------------------------------
/code/misc_plots/fisherInfo.r:
--------------------------------------------------------------------------------
 1 | 
 2 | cut.vals <- seq(-6,6,.1)
 3 | 
 4 | tail.prob <- EY <- VarY <- NULL
 5 | i <- 40
 6 | for(i in 1:length(cut.vals)) {
 7 |     cut <- cut.vals[i]
 8 |     tail.prob[i] <- integrate(dnorm,cut,Inf)$value
 9 |     EY[i] <- integrate(function(y) y*dnorm(y)/tail.prob[i],cut,Inf)$value
10 |     VarY[i] <- integrate(function(y) (y-EY[i])^2*dnorm(y)/tail.prob[i],cut,Inf)$value
11 | }
12 | 
13 | def.par <- par(no.readonly=TRUE)
14 | 
15 | pdf("informationUnivar.pdf",height=4,width=4)
16 | par(mar=c(4.1,4.1,3.1,1.1))
17 | plot(3-cut.vals,VarY,type="l",xlab=expression(mu),
18 |      ylab="Information",
19 |      main="Leftover Fisher Information",
20 |      yaxt="n")
21 | axis(2,c(0,.5,1))
22 | dev.off()
23 | 
24 | pdf("informationBivar.pdf",height=4,width=4)
25 | par(mar=c(4.1,4.1,3.1,1.1))
26 | plot(3-cut.vals,1+VarY,type="l",ylim=c(0,2),
27 |      xlab=expression(mu),
28 |      ylab="Information",
29 |      main="Leftover Fisher Information",
30 |      yaxt="n")
31 | axis(2,0:2)
32 | abline(1,0,lty=2)
33 | legend("bottomleft",legend=c("Data Splitting","Data Carving"),lty=2:1,bty="n")
34 | dev.off()
35 | 
36 | 
37 | pdf("information.pdf",height=4,width=7)
38 | layout(matrix(c(1,1,2,3),2,2,byrow=TRUE),heights=c(1,7))
39 | #par(mfrow=c(1,2),mar=c(2.1,3.1,3.1,0.1))
40 | par(mar = c(0,0,0,0))
41 | plot(0:1,0:1,type="n",xaxt="n",yaxt="n",ann = F, bty = 'n')
42 | text(.5,.5,"Leftover Information",cex=1.5)
43 | par(mar = c(4.1,4.1,3.1,0.1))
44 | plot(3-cut.vals,VarY,type="l",xlab=expression(mu),ylab="Fisher Information",
45 |      main=expression(paste(N(mu,1),"1{Y>3}")),
46 |      yaxt="n")
47 | axis(2,c(0,.5,1))
48 | par(mar=c(4.1,3.1,3.1,1.1))
49 | plot(3-cut.vals,1+VarY,type="l",ylim=c(0,2),
50 |      xlab=expression(mu),ylab="Fisher Information",
51 |      main=expression(paste(N(mu*1[2],I[2])*1,group("{",Y[1]>3,"}"))),
52 |      yaxt="n")
53 | axis(2,0:2)
54 | abline(1,0,lty=2)
55 | legend("bottomleft",legend=c("Sample Splitting","Sample Carving"),lty=2:1,bty="n")
56 | dev.off()
57 | 
58 | 
59 | rt.ar <- function() arrows(.95,.5,1,.5,.1)
60 | lt.ar <- function() arrows(.05,.5,0,.5,.1)
61 | up.ar <- function() arrows(.5,.9,.5,1,.1)
62 | dn.ar <- function() arrows(.5,.05*14/8.5,.5,0,.1)
63 | num <- function(n)
64 |     text(0,.9,
65 |          paste(n,
66 |                c(". Draw ",". Describe ")[n %% 2 + 1],
67 |                ifelse(n>1,n-1,"a scene"),
68 |                sep=""),
69 |          pos=4)
70 | 
71 | panel <- function(n) {
72 |     plot(0:1,0:1,type="n",xaxt="n",yaxt="n",ann = F)
73 |     num(n)
74 |     if(n < 10) {
75 |         if(n %% 4 == 1) rt.ar()
76 |         if(n %% 2 == 0) dn.ar()
77 |         if(n %% 4 == 3) lt.ar()
78 |     }
79 | }
80 | 
81 | 
82 | pdf("pictionary.pdf",height=14,width=8.5)
83 | layout(matrix(
84 |        c(1,2,
85 |          4,3,
86 |          5,6,
87 |          8,7,
88 |          9,10),5,byrow=TRUE))
89 | op <- par(mar=c(0,0,0,0))
90 | for(n in 1:10) panel(n)
91 | dev.off()
92 | 


--------------------------------------------------------------------------------
/code/lasso_example/summary_table.csv:
--------------------------------------------------------------------------------
 1 | split_frac,burnin,ndraw,lam_frac,rho,tdf,snr,nactive,md5
 2 | 0.82,10000,40000,2.0,0.3,inf,7.0,7,030cde95c0263a4a5b8fe23028eec5fe
 3 | 0.96,10000,40000,2.0,0.3,inf,7.0,7,0809f7a6c20d4524357dc7d3d70706b0
 4 | 0.87,10000,40000,2.0,0.3,inf,7.0,7,0c91e58e2d6d1685568e0a8e352c5cdf
 5 | 0.5,10000,90000,2.0,0.3,inf,5.0,7,0d9cfdb95626785e99ad07f6548333d2
 6 | 0.5,10000,20000,1.0,0.3,inf,7.0,7,16465ce9f1ab32f75bf007efbc7b6c22
 7 | 0.75,10000,20000,2.0,0.3,inf,7.0,7,196ec19bfb36706187054963ffdfb2f2
 8 | 0.6,10000,40000,2.0,0.3,inf,7.0,7,21b43752cea763d71ae75abb319d88c7
 9 | 0.5,10000,40000,2.0,0.3,inf,5.0,7,28be66d4aad6e459f28cc3d6313cf44b
10 | 0.5,20000,80000,2.0,0.3,inf,9.0,7,2d4cd728a41e9f8ecbdb163216023814
11 | 0.5,10000,90000,0.5,0.3,inf,7.0,7,3469d228f23f424ebd36eae0ed9e0840
12 | 0.5,20000,80000,2.0,0.3,inf,7.0,7,36bfdb653c83da2f03f37adfa8bf42af
13 | 0.75,10000,40000,2.0,0.3,inf,7.0,7,37aee3cf7ebab51982a404cf956d5681
14 | 0.8,10000,40000,2.0,0.3,inf,7.0,7,45f378825e9ab64adde1b90a57696543
15 | 0.5,10000,20000,0.5,0.3,inf,7.0,7,4d2bab0ae7d1ea7ea91b41cc05d28fc5
16 | 0.5,10000,90000,2.0,0.3,5.0,7.0,7,5105db19113d40d7c19fa4ca94b5a051
17 | 0.9,10000,40000,2.0,0.3,inf,7.0,7,5411371e24eb1d4cb200157f51249451
18 | 0.5,10000,40000,2.0,0.3,inf,10.0,7,579f730626a506a31f3d8750571d67ff
19 | 0.99,10000,40000,2.0,0.3,inf,7.0,7,5b7bd04494a5e83e035f7edb4044e145
20 | 0.75,20000,80000,2.0,0.3,inf,9.0,7,5ec6b43c04971f3957a755d9d90c78f5
21 | 0.5,10000,20000,2.0,0.3,inf,9.0,7,62cebee13bdc803b4ba9905e855dd97c
22 | 0.4,10000,40000,2.0,0.3,inf,7.0,7,6fbdaae853a4acd38098a53fd5e0296c
23 | 0.98,10000,40000,2.0,0.3,inf,7.0,7,723b84789efe55f0908376b56399c7aa
24 | 0.5,10000,20000,2.0,0.3,inf,7.0,7,7973aceb05d14d8c9964c051963cb555
25 | 0.5,10000,40000,2.0,0.3,5.0,7.0,7,7bfdf12db625ac36573248a069bd2d7a
26 | 0.5,10000,90000,2.0,0.3,inf,9.0,7,7fced40b20995ececb226b2e8a8e717d
27 | 0.835,10000,40000,2.0,0.3,inf,7.0,7,8127fc58496d491fce9447a80c4d35c5
28 | 0.5,10000,40000,2.0,0.3,inf,7.0,7,83a135475b4462ee9725501363b29dff
29 | 0.5,10000,40000,1.0,0.3,inf,7.0,7,83f7ed6aee498394551e71bfae221911
30 | 0.3,10000,40000,2.0,0.3,inf,7.0,7,8437deb983a0af4aa9caef73569c572b
31 | 0.2,10000,40000,2.0,0.3,inf,7.0,7,973ff1c75a4f462ed08df4e926f980e1
32 | 0.5,10000,20000,2.0,0.3,inf,5.0,7,9af62bcc11f08d30394f8bacb944d75a
33 | 0.7,10000,40000,2.0,0.3,inf,7.0,7,a3cd2d77bd9a03f9613b877ce446c2ed
34 | 0.85,10000,40000,2.0,0.3,inf,7.0,7,bdb90346c2afb7f527bf1bdec8dd3814
35 | 0.5,10000,20000,2.0,0.3,5.0,7.0,7,be28f0888c7b4a17d2f094f4275e6e3c
36 | 0.5,10000,40000,2.0,0.3,inf,9.0,7,c5b750d7489734efadac978d128a8296
37 | 0.5,10000,20000,1.0,0.3,inf,5.0,7,d1e4d82f017ce33c2a28cc609441aaa2
38 | 0.5,10000,20000,2.0,0.3,inf,10.0,7,d6c99540ce040fec44722071dd480223
39 | 0.5,10000,90000,2.0,0.3,inf,7.0,7,d9727d462dc7b41801e0ab34a841c136
40 | 0.95,10000,40000,2.0,0.3,inf,7.0,7,e0c08feee8f2e27f386ddcfae97b8b79
41 | 0.5,10000,90000,2.0,0.3,inf,10.0,7,e0c3da17b36afc3d599c78b4816c0484
42 | 0.75,10000,90000,2.0,0.3,inf,7.0,7,e53875375cbc4064bde9005352c9c4bd
43 | 0.75,20000,80000,2.0,0.3,inf,7.0,7,f30c96c94b50a4200de00692c626c70f
44 | 


--------------------------------------------------------------------------------
/mycommands.sty:
--------------------------------------------------------------------------------
  1 | \newcommand{\vspa}{\vskip+1em\pause}
  2 | 
  3 | \renewcommand{\t}{\textrm}
  4 | \renewcommand{\th}{\textrm{th}}
  5 | \renewcommand{\sec}{\textsection}
  6 | 
  7 | \newcommand{\Lq}[2]{\left|\left|#2\right|\right|_{#1}}
  8 | \newcommand{\Ltwo}[1]{\Lq{2}{#1}}
  9 | 
 10 | \renewcommand{\to}{\rightarrow}
 11 | \newcommand{\toDist}{\xrightarrow[]{\mathcal{D}}}
 12 | \newcommand{\toAS}{\xrightarrow[]{a.s.}}
 13 | \newcommand{\toWeak}{\xrightarrow[]{w}}
 14 | %\newcommand{\ToWeak}{\overset{w}{\Rightarrow}}
 15 | \newcommand{\ToWeak}{\Rightarrow}
 16 | \newcommand{\toVague}{\xrightarrow[]{v}}
 17 | \newcommand{\toProb}{\xrightarrow[]{p}}
 18 | \newcommand{\toTV}{\xrightarrow[]{t.v.}}
 19 | \newcommand{\toLp}[1]{\xrightarrow[]{L^#1}}
 20 | \newcommand{\toLone}{\toLp{1}}
 21 | \newcommand{\toLtwo}{\toLp{2}}
 22 | 
 23 | \newcommand{\eqAS}{\overset{\textrm{a.s.}}{=}}
 24 | \newcommand{\eqDist}{\overset{\mathcal{D}}{=}}
 25 | \newcommand{\simiid}{\overset{\textrm{i.i.d.}}{\sim}}
 26 | \newcommand{\simind}{\overset{\textrm{ind.}}{\sim}}
 27 | 
 28 | \newcommand{\imp}{\Rightarrow}
 29 | \newcommand{\nimp}{\nRightarrow}
 30 | \newcommand{\bimp}{\Leftarrow}
 31 | \newcommand{\nbimp}{\nLeftarrow}
 32 | \newcommand{\dimp}{\Leftrightarrow}
 33 | \newcommand{\ndimp}{\nLeftrightarrow}
 34 | \newcommand{\upto}{\uparrow}
 35 | \newcommand{\downto}{\downarrow}
 36 | \newcommand{\sub}{\subseteq}
 37 | \newcommand{\Perp}{\perp \! \! \! \perp}
 38 | \newcommand{\indep}{\Perp}
 39 | \newcommand{\gv}{\,|\,}
 40 | \newcommand{\1}{\mathbf{1}}
 41 | 
 42 | \newcommand{\re}{\t{re}}
 43 | \newcommand{\im}{\t{im}}
 44 | \newcommand{\ep}{\varepsilon}
 45 | 
 46 | \newcommand{\Z}{\mathbb{Z}}
 47 | \newcommand{\N}{\mathbb{N}}
 48 | \newcommand{\R}{\mathbb{R}}
 49 | \newcommand{\Q}{\mathbb{Q}}
 50 | \renewcommand{\P}{\mathbb{P}}
 51 | \newcommand{\E}{\mathbb{E}}
 52 | 
 53 | \newcommand{\Var}{\textrm{Var}}
 54 | \newcommand{\Cov}{\textrm{Cov}}
 55 | \newcommand{\sgn}{\textrm{sgn}}
 56 | \newcommand{\Tr}{\textrm{Tr}}
 57 | 
 58 | \newcommand{\Bin}{\textrm{Bin}}
 59 | \newcommand{\Beta}{\textrm{Beta}}
 60 | \newcommand{\Bern}{\textrm{Bern}}
 61 | 
 62 | %\ means mathcal for A,B,C, F,G,H, L, O, X,Y
 63 | \newcommand{\A}{\mathcal{A}}
 64 | \newcommand{\B}{\mathcal{B}}
 65 | \newcommand{\C}{\mathcal{C}}
 66 | \newcommand{\F}{\mathcal{F}}
 67 | \newcommand{\G}{\mathcal{G}}
 68 | \renewcommand{\H}{\mathcal{H}}
 69 | \renewcommand{\L}{\mathcal{L}}
 70 | \renewcommand{\O}{\mathcal{O}}
 71 | \newcommand{\X}{\mathcal{X}}
 72 | \newcommand{\Y}{\mathcal{Y}}
 73 | 
 74 | \newcommand{\bbS}{\mathbb{S}}
 75 | \newcommand{\bbT}{\mathbb{T}}
 76 | \newcommand{\bbI}{\mathbb{I}}
 77 | 
 78 | \newcommand{\cP}{\mathcal{P}}
 79 | \newcommand{\cS}{\mathcal{S}}
 80 | 
 81 | \newcommand{\EF}{\E^\F}
 82 | 
 83 | \renewcommand{\b}{\textbf}
 84 | \renewcommand{\bar}{\overline}
 85 | 
 86 | \newcommand{\abs}[1]{\left| #1 \right|}
 87 | \newcommand{\floor}[1]{\lfloor #1 \rfloor}
 88 | \newcommand{\ceiling}[1]{\lceil #1 \rceil}
 89 | \newcommand{\grad}{\nabla}
 90 | \newcommand{\pardd}[2]{\frac{\partial #1}{\partial #2}}
 91 | \newcommand{\parddtwo}[2]{\frac{\partial^2 #1}{\partial #2^2}}
 92 | 
 93 | \newcommand{\rank}{\textrm{rank}}
 94 | 
 95 | \DeclareMathOperator*{\argmin}{arg\,min}
 96 | \DeclareMathOperator*{\argmax}{arg\,max}
 97 | \newcommand{\logit}{\textrm{logit}}
 98 | \newcommand{\logistic}{\textrm{logistic}}
 99 | \newcommand{\diag}{\textrm{diag}}
100 | 
101 | \newcommand{\Sn}[2][i]{\sum_{#1=1}^n{#2}}
102 | \newcommand{\SI}[2][n]{\sum_{#1=1}^\infty{#2}}
103 | \newcommand{\limn}[1]{\lim_{n\to\infty}{#1}}
104 | \newcommand{\liminfn}[1]{\liminf_{n\to\infty}{#1}}
105 | \newcommand{\limsupn}[1]{\limsup_{n\to\infty}{#1}}
106 | \newcommand{\lgst}[1]{\frac{e^{#1}}{1+e^{#1}}}
107 | 
108 | 
109 | \newcommand{\Indic}{\mathbb{I}}
110 | 
111 | \newcommand{\TypeI}{\textrm{Type I Error}}
112 | \newcommand{\TypeII}{\textrm{Type II Error}}
113 | 
114 | %GRAPHICS
115 | \newcommand{\addplot}[1]{\includegraphics[width=.5\textwidth]{#1}}
116 | \newcommand{\addbigplot}[1]{\includegraphics[width=1\textwidth]{#1}}
117 | 
118 | %STYLE
119 | \newcommand{\red}{\textcolor{red}}
120 | 
121 | 


--------------------------------------------------------------------------------
/code/lasso_example/gaussian/summary.csv:
--------------------------------------------------------------------------------
 1 | split,level_carve,level_split,power_carve,power_split,p_screen,count_null,count_alt,ntrial,fp,tp,fdp
 2 | 0.29999999999999999,0.050000000000000003,0.050000000000000003,1.0,1.0,0.0,-1,-1,-1,-1.0,-1.0,-1.0
 3 | 0.40000000000000002,0.059121621621621621,0.042229729729729729,0.99470899470899465,0.96523053665910807,0.01623850846292637,2368,2646,189,8.552109287739496,3.6072686656929291,0.70333444033831949
 4 | 0.5,0.063202791779759598,0.060876308646762314,0.98596201486374901,0.92815854665565645,0.093387314439946023,5158,4844,346,9.1336032388663959,4.7441295546558706,0.65814808331874664
 5 | 0.55000000000000004,0.062918340026773767,0.053165041116848347,0.98876592890677395,0.89616867873910122,0.17552533992583438,52290,47712,3408,9.3622270292542229,5.2614853728883393,0.6402086400360647
 6 | 0.59999999999999998,0.057114870210601666,0.048298986550126211,0.98780747751204334,0.83527305282005371,0.29315020558131399,53086,46914,3351,9.3807190971918466,5.7140232700551135,0.62145605860398268
 7 | 0.65000000000000002,0.061674664285979987,0.052784287467916524,0.98283465928744385,0.75475613974403322,0.43548174509028603,53766,46256,3304,9.3259522868063787,6.1117701331224463,0.60410156583508356
 8 | 0.69999999999999996,0.059166144431746968,0.05083496147749475,0.9765764978368221,0.63003102740025352,0.56537530266343827,54254,45766,3269,9.3047388446904193,6.3770321687997233,0.59334745014999124
 9 | 0.75,0.059844995247495793,0.047561599766030561,0.97413831148770913,0.47455757094311313,0.68377693282636243,54708,45318,3237,9.2376425855513311,6.586607520067596,0.58376495087569424
10 | 0.80000000000000004,0.056509351256575101,0.04913062536528346,0.96888260254596892,0.28792432814710045,0.79822178315633485,54752,45248,3232,8.9925907631513962,6.7574709804890096,0.57095590541303398
11 | 0.84999999999999998,0.053683671977781025,0.050687034059348046,0.96551115036431878,0.1185250607198057,0.87669376693766932,54728,45290,3235,8.7796747967479671,6.8544715447154472,0.56157046281851275
12 | 0.87,0.054264419723170684,0.049344641480339242,0.96360523311967683,0.07489683027482659,0.89493949394939498,54474,45556,3254,8.7046204620462042,6.8831133113311331,0.55842758085290323
13 | 0.88,0.056013179571663921,0.050814570748672891,0.96284215630096526,0.069423017587164459,0.90758891066928027,54630,45374,3241,8.6858022962755523,6.8963875665079808,0.55741858960534829
14 | 0.90000000000000002,0.052509992299512302,0.04961314216567049,0.96011959723871076,0.053027305104867431,0.93308443423319931,54542,45486,3249,8.5907524411257903,6.9290637564618036,0.55353441894892674
15 | 0.91000000000000003,0.050907626994583517,0.050724637681159424,0.95790540838365579,0.048353682725790097,0.93185738930419781,54648,45374,3241,8.617883841288096,6.9249568717653824,0.5544600244182174
16 | 0.92000000000000004,0.052509992299512302,0.049943163066994242,0.95455145408948916,0.052136037661137752,0.94252539912917266,54542,45458,3247,8.5886792452830196,6.9349782293178519,0.55326389797864584
17 | 0.93000000000000005,0.048447893569844788,0.050332594235033258,0.9533251982916412,0.049289636537958682,0.95097185958804753,54120,45892,3278,8.4168842471714527,6.9480707861908906,0.54779751902271401
18 | 0.93999999999999995,0.053179360817917694,0.049133904600787023,0.94754601226993862,0.051621384750219108,0.95882352941176474,54382,45640,3260,8.4526470588235298,6.9564705882352937,0.54854841480406935
19 | 0.94999999999999996,0.052976716769820222,0.04955054524020041,0.94239349138308115,0.052226401889598463,0.95833333333333337,54288,45724,3266,8.445422535211268,6.955105633802817,0.54838525292940843
20 | 0.95999999999999996,0.053255745433117264,0.049278137890394813,0.93644957983193278,0.049982492997198882,0.97027348394768143,54304,45696,3264,8.4063614744351955,6.9693816884661119,0.54672879127677665
21 | 0.96999999999999997,0.048171407462135205,0.050424824528998893,0.92458258860455989,0.046427481581585946,0.97675111773472423,54140,45878,3277,8.3385991058122197,6.9758569299552908,0.54449202024133903
22 | 0.97999999999999998,0.052870653064263903,0.050636681808027405,0.90768499719210338,0.0510173225625297,0.97236107027344898,53716,46298,3307,8.2119964716259926,6.9705968832696268,0.54088233015725462
23 | 0.98999999999999999,0.052732431735269189,0.050263477908390759,0.8622152258515895,0.048712230530412348,0.98167067307692313,54274,45738,3267,8.3740985576923084,6.98046875,0.54538160469667318
24 | 1.0,0.050518231302661996,0.050443665647602713,0.79756875592723508,0.049271488921458745,0.98689696247766534,53644,46396,3314,8.1331149493746278,6.9863013698630141,0.53792519351598356
25 | 


--------------------------------------------------------------------------------
/code/lasso_example/df_5/summary.csv:
--------------------------------------------------------------------------------
 1 | split,level_carve,level_split,power_carve,power_split,p_screen,count_null,count_alt,ntrial,fp,tp,fdp
 2 | 0.29999999999999999,0.050000000000000003,0.050000000000000003,1.0,1.0,0.0,-1,-1,-1,-1.0,-1.0,-1.0
 3 | 0.40000000000000002,0.06721991701244813,0.049792531120331947,0.99382239382239379,0.97374517374517378,0.015405112832042636,2410,2590,185,8.540011657923225,3.6267799150637021,0.70191156038901936
 4 | 0.5,0.059029126213592235,0.051262135922330095,0.98847262247838619,0.92548373816385343,0.093254501478097288,5150,4858,347,9.1977962913195377,4.7710292932007521,0.65845165262226335
 5 | 0.55000000000000004,0.059615980179622177,0.052880148652833692,0.9892877822814129,0.89506989825461158,0.18587880744806803,51664,48356,3454,9.2987299537186523,5.3010440210956844,0.6369091720022263
 6 | 0.59999999999999998,0.059811320754716985,0.05067924528301887,0.98723730111460906,0.83714796222241128,0.29552054915075243,53000,47012,3358,9.3776291472322448,5.7254246237789319,0.62090947231027416
 7 | 0.65000000000000002,0.060598111227701992,0.049692699745165644,0.98400308787579882,0.76051807693957196,0.43605183924597457,53368,46634,3331,9.2985992931011907,6.0905877732687523,0.60422940165705441
 8 | 0.69999999999999996,0.058059277304100688,0.050197467980659209,0.97865654052638473,0.63624459866439698,0.5587231136906794,54186,45822,3273,9.2504267668146127,6.3627517924206209,0.59247556362205067
 9 | 0.75,0.061984687868080098,0.050390164899882212,0.97394236664622935,0.47893492160812823,0.68949482139082641,54336,45668,3262,9.1126611709997878,6.5977594588881843,0.58003928638699775
10 | 0.80000000000000004,0.058707051446591614,0.051483260606505078,0.96943173821252637,0.28786945812807879,0.78170878459687132,54542,45472,3248,8.9966305655836347,6.7287605294825514,0.57210854160608515
11 | 0.84999999999999998,0.054040181844845284,0.049457398445519871,0.96500021984786533,0.11515631183221212,0.87315237839290516,54552,45486,3249,8.7820478366030645,6.8460091373286751,0.56194111982390982
12 | 0.87,0.050754910583406629,0.047749926707710347,0.96442879154743566,0.080783623156504517,0.89246424642464239,54576,45430,3245,8.7079207920792072,6.8776127612761275,0.55871817042827643
13 | 0.88,0.050276871172393559,0.049176720818511863,0.96257037297677694,0.062236101337086562,0.90222222222222215,54538,45472,3248,8.6858333333333331,6.8869444444444445,0.5577574827869145
14 | 0.90000000000000002,0.055009242144177448,0.048539741219963031,0.96085688161274874,0.051987634432011147,0.92736009044657997,54100,45934,3281,8.4821933295647263,6.9157716223855283,0.55086456918389082
15 | 0.91000000000000003,0.053502339806182984,0.048970116806072439,0.95456731820368179,0.051292142201233112,0.92944523470839258,54278,45738,3267,8.5544807965860592,6.9229018492176388,0.55270849034060621
16 | 0.92000000000000004,0.049420849420849421,0.050156278727707296,0.95623958607384019,0.049285275804612824,0.93620689655172418,54390,45612,3258,8.5267241379310352,6.9321839080459773,0.551573507816421
17 | 0.93000000000000005,0.051449918627015832,0.05000739754401539,0.95438321580917562,0.048620179333159222,0.94800693240901224,54072,45948,3282,8.4376083188908151,6.942518775274408,0.54860459001615147
18 | 0.93999999999999995,0.054447339039821359,0.049646445850390769,0.95016209206829483,0.04923276421007132,0.94862227324913884,53740,46270,3305,8.317164179104477,6.9443168771526977,0.544977525342762
19 | 0.94999999999999996,0.05289384320094493,0.051380481322899747,0.94198473282442752,0.050817884405670664,0.95564633790487297,54184,45850,3275,8.4012255617157869,6.9501021301429819,0.54726377615997268
20 | 0.95999999999999996,0.051729212656364973,0.049816041206769683,0.93148464537608977,0.049064704078503524,0.96109637488947819,54360,45654,3261,8.4694960212201593,6.9581491305629237,0.54898177511175639
21 | 0.96999999999999997,0.055475103041105048,0.04990531357914671,0.92315360610263519,0.048240291262135922,0.96742001761080121,53862,46144,3296,8.2929263281479315,6.9638978573525092,0.54355521354367065
22 | 0.97999999999999998,0.051263056522705387,0.053522483146899773,0.90169064279195099,0.0495892911469425,0.96280023432923256,53996,46018,3287,8.3898652606912716,6.9598711189220852,0.54658041370887722
23 | 0.98999999999999999,0.048235469670969186,0.047675893456688802,0.86839382705405643,0.051556168635227176,0.97384660593593897,53612,46396,3314,8.200411401704379,6.9729650308551276,0.54044737096930373
24 | 1.0,0.043492919757248817,0.049112160035963137,0.80273537986623222,0.051363402503858686,0.97313084112149539,53388,46648,3332,8.1121495327102799,6.9713785046728969,0.53781512605042014
25 | 


--------------------------------------------------------------------------------
/code/twodim_example/interval.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from glob import glob
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | from selection import affine 
  6 | from selection.discrete_family import discrete_family
  7 | from scipy.stats import norm as ndist
  8 | from sklearn.isotonic import IsotonicRegression
  9 | 
 10 | cutoff = 3.
 11 | null_constraint = affine.constraints(np.array([[-1,0.]]), np.array([-cutoff]))
 12 | null_sample = affine.sample_from_constraints(null_constraint, np.array([4,2.]),
 13 |                                              ndraw=100000).sum(1)
 14 | null_dbn = discrete_family(null_sample, np.ones_like(null_sample))
 15 | 
 16 | def draw_sample(mu, cutoff, nsample=10000):
 17 |     if mu >= cutoff - 4:
 18 |         sample = []
 19 |         while True:
 20 |             candidate = np.random.standard_normal(1000000) + mu
 21 |             candidate = candidate[candidate > cutoff]
 22 |             sample.extend(candidate)
 23 |             if len(sample) > nsample:
 24 |                 break
 25 |         sample = np.array(sample)
 26 |         sample += np.random.standard_normal(sample.shape) + mu
 27 |     else:
 28 |         constraint = affine.constraints(np.array([[-1,0.]]), np.array([-cutoff]))
 29 | 	constraint.mean = np.array([mu,mu])
 30 |         sample = affine.sample_from_constraints(constraint, np.array([cutoff + 0.1,0]),
 31 |                                                 ndraw=2000000,
 32 |                                                 direction_of_interest=np.array([1,1.]))
 33 |         sample = sample.sum(1)[::(2000000/nsample)]
 34 |     return sample[:nsample]
 35 | 
 36 | def interval(mu, ndraw=100000, keep_every=100):
 37 | 
 38 |     if not os.path.exists('lengths%0.2f.npz' % mu):
 39 |         lengths = []
 40 |     else:
 41 |         lengths = list(np.load('lengths%0.2f.npz' % mu)['lengths'])
 42 | 
 43 |     big_sample = draw_sample(mu, cutoff, nsample=50000)[:50000]
 44 |     mean, scale = big_sample.mean(), big_sample.std()
 45 |     big_sample -= mean
 46 |     big_sample /= scale
 47 | 
 48 |     dbn = discrete_family(big_sample, np.ones_like(big_sample))
 49 |     dbn.theta = 0.
 50 |     new_sample = draw_sample(mu, cutoff, nsample=2500)[:2500]
 51 |     for i, s in enumerate(new_sample):
 52 |         try:
 53 |             _interval = dbn.equal_tailed_interval((s - mean) / scale)
 54 |             lengths.append(np.fabs(_interval[1] - _interval[0]) / scale)
 55 |         except:
 56 |             print 'exception raised'
 57 |         if i % 20 == 0 and i > 0:
 58 |             print np.median(lengths), np.mean(lengths)
 59 |             np.savez('lengths%0.2f' % mu, **{'lengths':lengths,'mu':mu})
 60 |         if i % 1000 == 0 and i > 0:
 61 |             big_sample = draw_sample(mu, cutoff, nsample=50000)[:50000]
 62 |             mean, scale = big_sample.mean(), big_sample.std()
 63 |             big_sample -= mean
 64 |             big_sample /= scale
 65 |         dbn.theta = 0.
 66 |         print i
 67 |     return (np.mean(lengths), np.std(lengths), np.median(lengths))
 68 | 
 69 | def main():
 70 |     muvals = np.linspace(-2, 9, 23)[::-1]
 71 |     L = []
 72 |     np.random.shuffle(muvals)
 73 |     for mu in muvals:
 74 |         print 'trying %0.2f' % mu
 75 |         for f in glob('lengths*npz'):
 76 |             d = np.load(f)
 77 |             if d['mu'] == mu and d['lengths'].shape[0] > 50000:
 78 |                 print '%0.2f already done' % mu
 79 |             else:
 80 |                 interval(mu)
 81 | 
 82 | def plot():
 83 | 
 84 |     results = []
 85 |     for f in glob('lengths*npz'):
 86 |         d = np.load(f)
 87 |         l = d['lengths']
 88 |         l = l[l>0.]
 89 |         print d['mu'], l.shape
 90 |         results.append([d['mu'], l.mean()])
 91 | 
 92 |     results = sorted(results)
 93 |     results = np.array(results).T
 94 |     muvals, mean_length = results
 95 |     f = plt.figure()
 96 |     f.clf()
 97 |     ax = f.gca()
 98 |     iso = IsotonicRegression(increasing=False)
 99 |     mean_length_iso = iso.fit_transform(np.arange(mean_length.shape[0]), mean_length)    
100 |     ax.plot(muvals, mean_length, 'k', linewidth=2, label='UMAU')
101 |     ax.plot([muvals.min(), muvals.max()], [2*ndist.ppf(0.975)]*2, c='red', label='Sample splitting', linewidth=2)
102 |     ax.plot([muvals.min(), muvals.max()], [np.sqrt(2)*ndist.ppf(0.975)]*2, 'k--')
103 |     ax.set_xlabel(r'$\mu$', fontsize=20)
104 |     ax.set_ylabel(r'E(|CI($\mu$)|)', fontsize=20)
105 |     ax.legend(loc='lower right')
106 |     ax.set_ylim([0,4])
107 |     ax.set_xlim([-2,9])
108 |     f.savefig('figure_b.pdf')
109 |     output = np.array(zip(muvals, mean_length))
110 |     np.savetxt('equal_tailed_lengths.csv', output, delimiter=',')
111 |     
112 | if __name__ == '__main__':
113 |     # main()
114 |     pass
115 | 


--------------------------------------------------------------------------------
/code/lasso_example/tables.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import pandas as pd, numpy as np
  3 | 
  4 | from data_carving import summary
  5 | 
  6 | # Gaussian first
  7 | 
  8 | summary(np.inf)
  9 | gaussian = pd.read_csv('gaussian/summary.csv')
 10 | 
 11 | gaussian_df = pd.DataFrame({'Algorithm': [r'$\text{Carve}_{100}$',
 12 |                                           r'$\text{Split}_{50}$',
 13 |                                           r'$\text{Carve}_{50}$',            
 14 |                                           r'$\text{Split}_{75}$',
 15 |                                           r'$\text{Carve}_{75}$'],
 16 |                             'power_names':['power_carve', 
 17 |                                            'power_split', 
 18 |                                            'power_carve', 
 19 |                                            'power_split', 
 20 |                                            'power_carve'],
 21 |                             'level_names':['level_carve', 
 22 |                                            'level_split', 
 23 |                                            'level_carve', 
 24 |                                            'level_split', 
 25 |                                            'level_carve'],
 26 |                             'split':[1., 0.5, 0.5, 0.75, 0.75]})
 27 | 
 28 | level = []
 29 | power = []
 30 | FP = []
 31 | TP = []
 32 | FDP = []
 33 | screen = []
 34 | 
 35 | for lw, pw, frac in zip(gaussian_df['level_names'],
 36 |                         gaussian_df['power_names'], 
 37 |                         gaussian_df['split']):
 38 |     level.append(np.mean(gaussian[lw][gaussian['split'] == frac]))
 39 |     power.append(np.mean(gaussian[pw][gaussian['split'] == frac]))
 40 |     FP.append(np.mean(gaussian['fp'][gaussian['split'] == frac]))
 41 |     TP.append(np.mean(gaussian['tp'][gaussian['split'] == frac]))
 42 |     FDP.append(np.mean(gaussian['fdp'][gaussian['split'] == frac]))
 43 |     screen.append(np.mean(gaussian['p_screen'][gaussian['split'] == frac]))
 44 | 
 45 | gaussian_df['Power'] = power
 46 | gaussian_df['Level'] = level
 47 | gaussian_df[r'$\mathbb{E}[V]$'] = FP
 48 | gaussian_df[r'$\mathbb{E}[R-V]$'] = TP
 49 | gaussian_df['FDR'] = FDP
 50 | gaussian_df[r'$p_{\text{screen}}$'] = screen
 51 | 
 52 | del(gaussian_df['power_names'])
 53 | del(gaussian_df['level_names'])
 54 | del(gaussian_df['split'])
 55 | 
 56 | gaussian_df = gaussian_df.reindex_axis([
 57 |         'Algorithm',
 58 |         r'$p_{\text{screen}}$',
 59 |         r'$\mathbb{E}[V]$',
 60 |         r'$\mathbb{E}[R-V]$',
 61 |         'FDR',
 62 |         'Power',
 63 |         'Level'], axis=1)
 64 | 
 65 | file('tables/gaussian.tex', 'w').write(gaussian_df.to_latex(index=False, float_format=lambda x: "%0.02f" % x).replace("\\_", "_"))
 66 | 
 67 | 
 68 | # now to T_5
 69 | 
 70 | summary(5)
 71 | T5 = pd.read_csv('df_5/summary.csv')
 72 | T5_df = pd.DataFrame({'Algorithm': [r'$\text{Carve}_{100}$',
 73 |                                           r'$\text{Split}_{50}$',
 74 |                                           r'$\text{Carve}_{50}$'],
 75 |                             'power_names':['power_carve', 
 76 |                                            'power_split', 
 77 |                                            'power_carve'],
 78 |                             'level_names':['level_carve', 
 79 |                                            'level_split', 
 80 |                                            'level_carve'],
 81 |                             'split':[1., 0.5, 0.5]})
 82 | 
 83 | level = []
 84 | power = []
 85 | FP = []
 86 | TP = []
 87 | FDP = []
 88 | screen = []
 89 | 
 90 | for lw, pw, frac in zip(T5_df['level_names'],
 91 |                         T5_df['power_names'], 
 92 |                         T5_df['split']):
 93 |     level.append(np.mean(T5[lw][T5['split'] == frac]))
 94 |     power.append(np.mean(T5[pw][T5['split'] == frac]))
 95 |     FP.append(np.mean(T5['fp'][T5['split'] == frac]))
 96 |     TP.append(np.mean(T5['tp'][T5['split'] == frac]))
 97 |     FDP.append(np.mean(T5['fdp'][T5['split'] == frac]))
 98 |     screen.append(np.mean(T5['p_screen'][T5['split'] == frac]))
 99 | 
100 | T5_df['Power'] = power
101 | T5_df['Level'] = level
102 | T5_df[r'$\mathbb{E}[V]$'] = FP
103 | T5_df[r'$\mathbb{E}[R-V]$'] = TP
104 | T5_df['FDR'] = FDP
105 | T5_df[r'$p_{\text{screen}}$'] = screen
106 | 
107 | del(T5_df['power_names'])
108 | del(T5_df['level_names'])
109 | del(T5_df['split'])
110 | 
111 | T5_df = T5_df.reindex_axis([
112 |         'Algorithm',
113 |         r'$p_{\text{screen}}$',
114 |         r'$\mathbb{E}[V]$',
115 |         r'$\mathbb{E}[R-V]$',
116 |         'FDR',
117 |         'Power',
118 |         'Level'], axis=1)
119 | 
120 | file('tables/T5.tex', 'w').write(T5_df.to_latex(index=False, float_format=lambda x: "%0.02f" % x).replace("\\_", "_"))
121 | 


--------------------------------------------------------------------------------
/code/lasso_example/data_carving.py:
--------------------------------------------------------------------------------
  1 | import numpy as np, os
  2 | import matplotlib.pyplot as plt
  3 | from matplotlib.mlab import rec2csv
  4 | 
  5 | split_vals = ([0.4, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.87, 0.88, 0.9] + 
  6 |               [0.91, 0.92, 0.93, 0.94, 0.95, 0.96,
  7 |                        0.97, 0.98, 0.99, 1.00, 0.87, 0.88]*4)
  8 | vals = [('n', 100),
  9 |         ('p', 200),
 10 |         ('s', 7),
 11 |         ('sigma', 5.),
 12 |         ('rho', 0.3),
 13 |         ('snr', 7.)
 14 |         ]
 15 | opts = dict(vals)
 16 | 
 17 | df = 5
 18 | if df < np.inf:
 19 |     dname = 'df_%d' % df
 20 | else:
 21 |     dname = 'gaussian'
 22 | 
 23 | def summary(df, save=True):
 24 | 
 25 |     if df < np.inf:
 26 |         dname = 'df_%d' % df
 27 |     else:
 28 |         dname = 'gaussian'
 29 | 
 30 |     # put in point at 0.3
 31 | 
 32 |     (split_frac, level_carve, level_split, 
 33 |      power_carve, power_split, p_screen) = \
 34 |         (0.3, 0.05, 0.05, 1., 1., 0.)
 35 |     results = [(split_frac, level_carve, level_split, power_carve, 
 36 |                 power_split, p_screen, -1, -1, -1, -1, -1, -1)]
 37 |     
 38 |     if True: # try:
 39 |         screen = np.load('%s/screening.npy' % dname)
 40 |         disc = np.load('%s/discovery_rates.npy' % dname)
 41 | 
 42 |         for split_frac in sorted(np.unique(split_vals)):
 43 |             fname = '%s/results_split_%0.2f.npy' % (dname, split_frac)
 44 |             data = np.load(fname)
 45 |             null_carve = np.array([d['pval'] for d in data if d['method'] == 'carve' 
 46 |                                    and d['null'] == True])
 47 |             null_split = np.array([d['pval'] for d in data if d['method'] == 'split' 
 48 |                                    and d['null'] == True])
 49 |             alt_carve = np.array([d['pval'] for d in data if d['method'] == 'carve' 
 50 |                                    and d['null'] == False])
 51 |             alt_split = np.array([d['pval'] for d in data if d['method'] == 'split' 
 52 |                                    and d['null'] == False])
 53 | 
 54 |             disc_rate = disc[disc['split_frac'] == split_frac]
 55 |             FP = np.nanmean(disc_rate['FP']) # V
 56 |             TP = np.nanmean(disc_rate['TP']) # R-V
 57 | 
 58 |             R = FP + TP
 59 |             FDP = np.nanmean(FP / np.maximum(R, 1))
 60 | 
 61 | #             for _, trial in df.groupby('uuid'):
 62 | #                 FP_carve.append(((trial['method'] == 'carve') * (trial['null'] == True)).sum())
 63 | #                 FP_split.append(((trial['method'] == 'split') * (trial['null'] == True)).sum())
 64 | #                 TP_carve.append(((trial['method'] == 'carve') * (trial['null'] == False)).sum())
 65 | #                 TP_split.append(((trial['method'] == 'split') * (trial['null'] == False)).sum())
 66 | 
 67 | #             FP_carve = np.array(FP_carve)
 68 | #             FP_split = np.array(FP_split)
 69 | #             TP_carve = np.array(TP_carve)
 70 | #             TP_split = np.array(TP_split)
 71 | 
 72 | #             FDP_carve.append(FP_carve * 1. / (FP_carve + TP_carve))
 73 | #             FDP_split.append(FP_split * 1. / (FP_split + TP_split))
 74 | 
 75 |             power_carve = np.nanmean(alt_carve < 0.05)
 76 |             power_split = np.nanmean(alt_split < 0.05)
 77 |             level_carve = np.nanmean(null_carve < 0.05)
 78 |             level_split = np.nanmean(null_split < 0.05)
 79 | 
 80 |             p_screen = 1. / np.nanmean(screen[screen['split'] == split_frac]['counter'])
 81 |             result = (split_frac, 
 82 |                       level_carve, 
 83 |                       level_split, 
 84 |                       power_carve, 
 85 |                       power_split, 
 86 |                       p_screen, 
 87 |                       (data['null'] == True).sum(), 
 88 |                       (data['null'] == False).sum(), 
 89 |                       len(set(data['uuid'])),
 90 |                       FP,
 91 |                       TP,
 92 |                       FDP,
 93 |                       )
 94 |             results.append(result)
 95 |             print split_frac
 96 | 
 97 |         R = np.array(results, np.dtype([ \
 98 |                     ('split', np.float),
 99 |                     ('level_carve', np.float),
100 |                     ('level_split', np.float),
101 |                     ('power_carve', np.float),
102 |                     ('power_split', np.float),
103 |                     ('p_screen', np.float),
104 |                     ('count_null', np.int),
105 |                     ('count_alt', np.int),
106 |                     ('ntrial', np.int),
107 |                     ('fp', np.float),
108 |                     ('tp', np.float),
109 |                     ('fdp', np.float)
110 |                     ]))
111 | 
112 |         if save:
113 |             np.save('%s/summary.npy' % dname, R)
114 |             rec2csv(R, '%s/summary.csv' % dname)
115 |             os.system('cd %s; R CMD BATCH makeRplots.r' % dname)
116 | 
117 |         plt.clf()
118 |         plt.plot(R['split'], R['p_screen'])
119 |         plt.plot(R['split'], R['power_split'])
120 |         plt.plot(R['split'], R['power_carve'])
121 |         plt.plot(R['split'], R['level_split'])
122 |         plt.plot(R['split'], R['level_carve'])
123 |         plt.plot([0.3,1],[0.05,0.05], 'k--')
124 |         plt.savefig('%s/summary.pdf' % dname)
125 | 
126 |     else: #except:
127 |         print 'no results yet'
128 |         pass
129 | 


--------------------------------------------------------------------------------
/code/lasso_example/batch.py:
--------------------------------------------------------------------------------
  1 | import numpy as np, csv, hashlib, os.path
  2 | import pandas as pd
  3 | import uuid
  4 | from selection.algorithms.tests.test_lasso import test_data_carving
  5 | 
  6 | from data_carving import split_vals, vals, df, dname
  7 | 
  8 | # run on commit with SHA b91c434e74ad9d623d256db9f20e66c643504239 on jonathan-taylor/selective-inference
  9 | 
 10 | np.random.seed(0)
 11 | 
 12 | # how many points do we want for each fraction
 13 | 
 14 | min_sample_size = 100000
 15 | 
 16 | vals = vals + [('lam_frac', 2.),
 17 |                ('split_frac', 0.9)]
 18 | 
 19 | dtype = np.dtype([('n', np.int), 
 20 |                   ('p', np.int), 
 21 |                   ('s', np.int), 
 22 |                   ('sigma', np.int), 
 23 |                   ('rho', np.float), 
 24 |                   ('snr', np.float), 
 25 |                   ('lam_frac', np.float), 
 26 |                   ('split_frac', np.float), 
 27 |                   ('method', 'S5'), 
 28 |                   ('null', np.bool), 
 29 |                   ('pval', np.float),
 30 |                   ('uuid', 'S40')])
 31 | 
 32 | num_except = 0
 33 | 
 34 | for i in range(5000):
 35 |     for split_frac in split_vals[::-1]:
 36 |         opts = dict(vals)
 37 |         opts['split_frac'] = split_frac
 38 |         identifier = str(uuid.uuid1())
 39 |         fname = '%s/results_split_%0.2f.npy' % (dname, split_frac)
 40 |         opts['df'] = df # degrees of freedom for noise
 41 |         opts['compute_intervals'] = False
 42 |         opts['ndraw'] =  8000
 43 |         opts['burnin'] = 2000
 44 | 
 45 |         test = not os.path.exists(fname)
 46 |         if not test:
 47 |             prev_results = np.load(fname)
 48 |             if split_frac not in [0.3, 0.4, 0.5]:
 49 |                 test = prev_results.shape[0] < min_sample_size
 50 |             elif split_frac in [0.3, 0.4]:
 51 |                 test = prev_results.shape[0] < 5000
 52 |             else:
 53 |                 test = prev_results.shape[0] < 10000
 54 |         if test:
 55 |             try:
 56 |                 results = test_data_carving(**opts)
 57 |                 (null_carve, 
 58 |                  null_split, 
 59 |                  alt_carve,
 60 |                  alt_split,
 61 |                  counter) = results[-1][:-4]
 62 | 
 63 |                 FP_cur = [result[-1] for result in results]
 64 |                 TP_cur = [result[-2] for result in results]
 65 | 
 66 |                 print FP_cur, TP_cur
 67 |                 params = [v for _, v in vals]
 68 |                 params[-1] = split_frac
 69 |                 results = []
 70 | 
 71 |                 if os.path.exists("%s/discovery_rates.npy" % dname):
 72 |                     prev_results = np.load('%s/discovery_rates.npy' % dname)
 73 |                     disc = np.empty(prev_results.shape[0] + len(FP_cur), 
 74 |                                     prev_results.dtype)
 75 |                     disc[:-len(FP_cur)] = prev_results
 76 |                     disc[-len(FP_cur):]['split_frac'] = split_frac
 77 |                     disc[-len(FP_cur):]['FP'] = FP_cur
 78 |                     disc[-len(FP_cur):]['TP'] = TP_cur
 79 |                     np.save('%s/discovery_rates.npy' % dname, disc)
 80 | 
 81 |                 else:
 82 |                     dtype_disc = np.dtype([('split_frac', np.float),
 83 |                                            ('FP', np.int),
 84 |                                            ('TP', np.int)])
 85 |                     disc = np.empty(len(FP_cur), dtype_disc)
 86 |                     disc['FP'] = FP_cur
 87 |                     disc['TP'] = TP_cur
 88 |                     disc['split_frac'][:] = split_frac
 89 |                     np.save('%s/discovery_rates.npy' % dname, disc)
 90 | 
 91 |                 if os.path.exists('%s/screening.npy' % dname):
 92 |                     prev_results = np.load('%s/screening.npy' % dname)
 93 |                     screening = np.empty(prev_results.shape[0]+1, 
 94 |                                           prev_results.dtype)
 95 |                     screening[:-1] = prev_results
 96 |                     screening[-1] = (split_frac, counter, identifier)
 97 |                     np.save('%s/screening.npy' % dname, screening)
 98 |                 else:
 99 |                     dtype_screen = np.dtype([('split', np.float),
100 |                                              ('counter', np.float),
101 |                                              ('uuid', 'S40')])
102 |                     screening = np.array([(split_frac, counter, identifier)],
103 |                                          dtype_screen)
104 |                     np.save('%s/screening.npy' % dname, screening)
105 | 
106 |                 results.extend([tuple(params) + ('carve', True, p, identifier) 
107 |                                 for p in null_carve])
108 |                 results.extend([tuple(params) + ('split', True, p, identifier)
109 |                                 for p in null_split])
110 |                 results.extend([tuple(params) + ('carve', False, p, identifier) 
111 |                                 for p in alt_carve])
112 |                 results.extend([tuple(params) + ('split', False, p, identifier) 
113 |                                 for p in alt_split])
114 | 
115 |                 rec_results = np.array(results, dtype)
116 |                 if os.path.exists(fname):
117 |                     prev_results = np.load(fname)
118 |                     rec_results = np.hstack([prev_results, rec_results])
119 |                 np.save(fname, rec_results)
120 |                 print rec_results.shape, 1. / screening[screening['split'] == split_frac]['counter'].mean(), fname
121 |             except:
122 |                 num_except += 1
123 |                 print("exception raised: %d" % num_except)
124 |                 pass
125 |     print "num exception: %d" % num_except
126 | 


--------------------------------------------------------------------------------
/code/misc_plots/full_data.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.stats import norm as ndist
  3 | from statsmodels.distributions import ECDF
  4 | from selection.covtest import covtest, reduced_covtest
  5 | from selection.affine import constraints, sample_from_constraints
  6 | from selection.discrete_family import discrete_family
  7 | import matplotlib.pyplot as plt
  8 | 
  9 | from constants import parameters, constraints
 10 | 
 11 | def simulation(n, snr, pos, rho=0.25, nsim=5000, sigma=1.5):
 12 | 
 13 |     # Design, mean vector and parameter vector
 14 | 
 15 |     X, mu, beta = parameters(n, rho, pos)
 16 | 
 17 |     Pcov = []
 18 |     Pexact = []
 19 |     Pu = []
 20 |     Pr = []
 21 |     Pfixed = []
 22 |     Pmax = []
 23 |     hypotheses = []
 24 |     
 25 |     
 26 |     # Set seed
 27 | 
 28 |     np.random.seed(0)
 29 | 
 30 |     # Max test
 31 | 
 32 |     max_stat = np.fabs(np.dot(X.T, np.random.standard_normal((n, 10000)))).max(0) * sigma
 33 |     max_fam = discrete_family(max_stat, np.ones(max_stat.shape))
 34 |     max_fam.theta = 0
 35 | 
 36 |     for i in range(nsim):
 37 |         Y = (snr * mu + np.random.standard_normal(n)) * sigma
 38 |         Z = np.dot(X.T, Y)
 39 | 
 40 |         # did this find the correct position and sign?
 41 |         correct = np.all(np.less_equal(np.fabs(Z), Z[pos]))
 42 |         hypotheses.append(correct)
 43 | 
 44 |         Pcov.append(covtest(X, Y, sigma=sigma, exact=False)[1])
 45 |         Pexact.append(covtest(X, Y, sigma=sigma, exact=True)[1])
 46 |         Pfixed.append(2 * ndist.sf(np.fabs(np.dot(X.T, Y))[pos] / sigma))
 47 |         Pu.append(reduced_covtest(X, Y, burnin=500, ndraw=5000)[1])
 48 |         Pr.append(reduced_covtest(X, Y, burnin=500, ndraw=5000, sigma=sigma)[1])
 49 |         p = max_fam.ccdf(0, np.fabs(np.dot(X.T, Y)).max())
 50 |         Pmax.append(p)
 51 | 
 52 |     Ugrid = np.linspace(0,1,101)
 53 | 
 54 |     Pcov = np.array(Pcov)
 55 |     Pexact = np.array(Pexact)
 56 |     Pu = np.array(Pu)
 57 |     Pr = np.array(Pr)
 58 |     Pfixed = np.array(Pfixed)
 59 |     Pmax = np.array(Pmax)
 60 | 
 61 |     # plot of marginal distribution of p-values
 62 | 
 63 |     fig1 = plt.figure(figsize=(8,8))
 64 |     ax1 = fig1.gca()
 65 |     ax1.plot(Ugrid, ECDF(Pcov)(Ugrid), label='Full (exact)', c='red', linewidth=5, alpha=0.5)
 66 |     ax1.plot(Ugrid, ECDF(Pexact)(Ugrid), label='Full (asymptotic)', c='k', linewidth=5, alpha=0.5)
 67 |     ax1.plot(Ugrid, ECDF(Pmax)(Ugrid), label='Max test', c='cyan', linewidth=5, alpha=0.5)
 68 |     ax1.plot(Ugrid, ECDF(Pu)(Ugrid), label=r'Selected 1-sparse, $\sigma$ unknown', c='blue', linewidth=5, alpha=0.5)
 69 |     ax1.plot(Ugrid, ECDF(Pr)(Ugrid), label=r'Selected 1-sparse, $\sigma$ known', c='green', linewidth=5, alpha=0.5)
 70 |     ax1.plot(Ugrid, ECDF(Pfixed)(Ugrid), label=r'Fixed 1-sparse, $\sigma$ known', c='yellow', linewidth=5, alpha=0.5)
 71 |     ax1.set_xlabel('P-value, $p$', fontsize=20)
 72 |     ax1.set_ylabel('ECDF($p$)', fontsize=20)
 73 |     ax1.plot([0.05,0.05],[0,1], 'k--')
 74 |     ax1.legend(loc='lower right')
 75 |     
 76 |     # conditional distribution of p-values
 77 |     # conditioned on selection choosing correct position and sign
 78 | 
 79 |     fig2 = plt.figure(figsize=(8,8))
 80 |     hypotheses = np.array(hypotheses, np.bool)
 81 |     ax2 = fig2.gca()
 82 |     ax2.plot(Ugrid, ECDF(Pcov[hypotheses])(Ugrid), label='Full (exact)', c='red', linewidth=5, alpha=0.5)
 83 |     ax2.plot(Ugrid, ECDF(Pexact[hypotheses])(Ugrid), label='Full (asymptotic)', c='k', linewidth=5, alpha=0.5)
 84 |     ax2.plot(Ugrid, ECDF(Pu[hypotheses])(Ugrid), label=r'Selected 1-sparse, $\sigma$ unknown', c='blue', linewidth=5, alpha=0.5)
 85 |     ax2.plot(Ugrid, ECDF(Pr[hypotheses])(Ugrid), label=r'Selected 1-sparse, $\sigma$ known', c='green', linewidth=5, alpha=0.5)
 86 |     ax2.set_xlabel('P-value, $p$', fontsize=20)
 87 |     ax2.set_ylabel('ECDF($p$)', fontsize=20)
 88 |     ax2.plot([0.05,0.05],[0,1], 'k--')
 89 |     ax2.legend(loc='lower right')
 90 | 
 91 |     dbn1 = {}
 92 |     dbn1['exact'] = Pexact
 93 |     dbn1['covtest'] = Pcov
 94 |     dbn1['unknown'] = Pu
 95 |     dbn1['known'] = Pr
 96 |     dbn1['fixed'] = Pfixed
 97 |     dbn1['max'] = Pmax
 98 |     dbn1['hypotheses'] = hypotheses
 99 | 
100 |     return fig1, fig2, dbn1
101 | 
102 | def power(n, snr, pos, rho=0.25,
103 |           muval = np.linspace(0,5,51)):
104 | 
105 |     X, mu, beta = parameters(n, rho, pos)
106 | 
107 |     # form the correct constraints
108 | 
109 |     con, initial = constraints(X, pos)
110 | 
111 |     Z_selection = sample_from_constraints(con, initial, ndraw=4000000, burnin=100000)
112 |     S0 = np.dot(X.T, Z_selection.T).T
113 |     W0 = np.ones(S0.shape[0])
114 |     dfam0 = discrete_family(S0[:,pos], W0)
115 | 
116 |     one_sided_acceptance_region = dfam0.one_sided_acceptance(0)
117 |     def one_sided_power(mu):
118 |         L, U = one_sided_acceptance_region
119 |         return 1 - (dfam0.cdf(mu,U) - dfam0.cdf(mu, L))
120 | 
121 |     power_fig = plt.figure(figsize=(8,8))
122 |     power_ax = power_fig.gca()
123 |     power_ax.set_ylabel('Power', fontsize=20)
124 |     power_ax.legend(loc='lower right')
125 |     power_ax.set_xlabel('Effect size $\mu$', fontsize=20)
126 |     full_power = np.array([one_sided_power(m) for m in muval])
127 |     print full_power
128 |     power_ax.plot(muval, full_power, label='Reduced model UMPU', linewidth=7, alpha=0.5)
129 |     power_ax.legend(loc='lower right')
130 |     power_ax.set_xlim([0,5])
131 |     power_ax.plot([snr,snr],[0,1], 'k--')
132 |     print one_sided_power(snr)
133 |     return power_fig, {'full':full_power}
134 | 
135 | def main():
136 | 
137 |     power_fig = power(20, 3., 3)[0]
138 |     power_fig.savefig('full_data_power.pdf')
139 | 
140 |     fig1, fig2, dbn1 = simulation(20, 3., 3, nsim=1000)
141 |     fig1.savefig('reduced_1sparse_20.pdf')
142 |     fig2.savefig('reduced_1sparse_20_cond.pdf')
143 |     np.savez('pval_20.npz', **dbn1)
144 | 
145 |     fig1, fig2, dbn1 = simulation(100, 3., 3, nsim=1000)
146 |     fig1.savefig('reduced_1sparse_100.pdf')
147 |     fig2.savefig('reduced_1sparse_100_cond.pdf')
148 |     np.savez('pval_100.npz', **dbn1)
149 | 
150 | if __name__ == "__main__":
151 |     main()
152 | 


--------------------------------------------------------------------------------
/code/twodim_example/interval_umau.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from glob import glob
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | from selection import affine 
  6 | from selection.discrete_family import discrete_family
  7 | from scipy.stats import norm as ndist
  8 | from sklearn.isotonic import IsotonicRegression
  9 | 
 10 | cutoff = 3.
 11 | null_constraint = affine.constraints(np.array([[-1,0.]]), np.array([-cutoff]))
 12 | null_sample = affine.sample_from_constraints(null_constraint, np.array([4,2.]),
 13 |                                              ndraw=100000).sum(1)
 14 | null_dbn = discrete_family(null_sample, np.ones_like(null_sample))
 15 | 
 16 | def draw_sample(mu, cutoff, nsample=10000):
 17 |     if mu >= cutoff - 4:
 18 |         sample = []
 19 |         while True:
 20 |             candidate = np.random.standard_normal(1000000) + mu
 21 |             candidate = candidate[candidate > cutoff]
 22 |             sample.extend(candidate)
 23 |             if len(sample) > nsample:
 24 |                 break
 25 |         sample = np.array(sample)
 26 |         sample += np.random.standard_normal(sample.shape) + mu
 27 |     else:
 28 |         constraint = affine.constraints(np.array([[-1,0.]]), np.array([-cutoff]))
 29 | 	constraint.mean = np.array([mu,mu])
 30 |         sample = affine.sample_from_constraints(constraint, np.array([cutoff + 0.1,0]),
 31 |                                                 ndraw=2000000,
 32 |                                                 direction_of_interest=np.array([1,1.]))
 33 |         sample = sample.sum(1)[::(2000000/nsample)]
 34 |     return sample
 35 | 
 36 | def form_samples(nsample=10000):
 37 |     samples = {}
 38 |     for mu in range(-6, 13):
 39 |         label = 'mu%d' % mu
 40 |         print label
 41 |         samples[label] = draw_sample(mu, cutoff, nsample=nsample)[:nsample]
 42 |     return samples
 43 | 
 44 | def form_dbn(mu, samples):
 45 |     pts = np.arange(-6,13)
 46 |     keep = np.fabs(pts - mu) <= 2
 47 |     pts = pts[keep]
 48 |     _samples = np.hstack([samples['mu%d' % l] for l in pts])
 49 |     _log_weights = np.hstack([(mu-l)*samples['mu%d' % l] for l in pts])
 50 |     _weights = np.exp(_log_weights)
 51 |     dbn = discrete_family(_samples, _weights)
 52 |     dbn.theta = 0.
 53 |     return dbn
 54 | 
 55 | def interval(mu, ndraw=100000, keep_every=100):
 56 |     #dbn = form_dbn(mu, samples)
 57 | 
 58 |     if not os.path.exists('umau_lengths%0.2f.npz' % mu):
 59 |         lengths = []
 60 |     else:
 61 |         lengths = list(np.load('umau_lengths%0.2f.npz' % mu)['lengths'])
 62 | 
 63 |     if mu < 10:
 64 |         big_sample = draw_sample(mu, cutoff, nsample=50000)[:50000]
 65 |         mean, scale = big_sample.mean(), big_sample.std()
 66 |         big_sample -= mean
 67 |         big_sample /= scale
 68 | 
 69 |         dbn = discrete_family(big_sample, np.ones_like(big_sample))
 70 |         dbn.theta = 0.
 71 |         new_sample = draw_sample(mu, cutoff, nsample=2500)[:2500]
 72 |         for i, s in enumerate(new_sample):
 73 |             try:
 74 |                 _interval = dbn.interval((s - mean) / scale)
 75 |                 lengths.append(np.fabs(_interval[1] - _interval[0]) / scale)
 76 |             except:
 77 |                 print 'exception raised'
 78 |             if i % 20 == 0 and i > 0:
 79 |                 print np.median(lengths), np.mean(lengths)
 80 |                 np.savez('umau_lengths%0.2f' % mu, **{'lengths':lengths,'mu':mu})
 81 |             if i % 1000 == 0 and i > 0:
 82 |                 big_sample = draw_sample(mu, cutoff, nsample=50000)[:50000]
 83 |                 mean, scale = big_sample.mean(), big_sample.std()
 84 |                 big_sample -= mean
 85 |                 big_sample /= scale
 86 |             print i
 87 |     else:
 88 |         for i in range(2500):
 89 |             big_sample = draw_sample(mu, cutoff, nsample=50000)[:50000]
 90 |             s = big_sample[-1]
 91 |             big_sample = big_sample[:-1]
 92 |             mean, scale = big_sample.mean(), big_sample.std()
 93 |             big_sample -= mean
 94 |             big_sample /= scale
 95 |             s = (s - mean) / scale
 96 |             dbn = discrete_family(big_sample, np.ones_like(big_sample))
 97 |             try:
 98 |                 _interval = dbn.interval(s)
 99 |                 lengths.append(np.fabs(_interval[1] - _interval[0]) / scale)
100 |             except:
101 |                 print 'exception raised'
102 |             print i
103 |             if i % 10 == 0 and i > 0:
104 |                 print np.median(lengths), np.mean(lengths)
105 |                 np.savez('umau_lengths%0.2f' % mu, **{'lengths':lengths,'mu':mu})
106 | 
107 |     print 'final', np.mean(lengths)
108 |     return (np.mean(lengths), np.std(lengths), np.median(lengths))
109 | 
110 | if not os.path.exists('interval_samples.npz'):
111 |     samples = form_samples()
112 |     np.savez('interval_samples.npz', **samples)
113 | else:
114 |     samples = np.load('interval_samples.npz')
115 | 
116 | def main():
117 |     muvals = np.linspace(-2, 9, 23)[::-1]
118 |     L = []
119 |     np.random.shuffle(muvals)
120 |     for mu in muvals:
121 |         print 'trying %0.2f' % mu
122 |         for f in glob('umau_lengths*npz'):
123 |             d = np.load(f)
124 |             if d['mu'] == mu and d['lengths'].shape[0] > 5000:
125 |                 print '%0.2f already done' % mu
126 |             else:
127 |                 interval(mu)
128 | 
129 | def plot():
130 | 
131 |     results = []
132 |     for f in glob('umau_lengths*npz'):
133 |         d = np.load(f)
134 |         l = d['lengths']
135 |         l = l[~np.isnan(l)]
136 |         l = l[np.isfinite(l)]
137 |         l = l[l>0]
138 |         results.append([d['mu'], l.mean()])
139 |     for f in glob('miller/lengths*npz'):
140 |         d = np.load(f)
141 |         if d['mu'] not in [r[0] for r in results]:
142 |             l = d['lengths']
143 |             l = l[np.isfinite(l)]
144 |             l = l[~np.isnan(l)]
145 |             l = l[l>0]
146 |             results.append([d['mu'], l.mean()])
147 |         else:
148 |             idx = [r[0] for r in results].index(d['mu'])
149 |             l = d['lengths']
150 |             l = l[np.isfinite(l)]
151 |             l = l[~np.isnan(l)]
152 |             l = l[l>0]
153 |             results[idx][1] = 0.5 * (results[idx][1] + l.mean())
154 |     results = sorted(results)
155 |     results = np.array(results).T
156 |     muvals, mean_length = results
157 |     f = plt.figure()
158 |     f.clf()
159 |     ax = f.gca()
160 |     iso = IsotonicRegression(increasing=False)
161 |     mean_length_iso = iso.fit_transform(np.arange(mean_length.shape[0]), mean_length)    
162 |     ax.plot(muvals, mean_length, 'k', linewidth=2, label='UMAU')
163 |     ax.plot([muvals.min(), muvals.max()], [2*ndist.ppf(0.975)]*2, c='red', label='Sample splitting', linewidth=2)
164 |     ax.plot([muvals.min(), muvals.max()], [np.sqrt(2)*ndist.ppf(0.975)]*2, 'k--')
165 |     ax.set_xlabel(r'$\mu$', fontsize=20)
166 |     ax.set_ylabel(r'E(|CI($\mu$)|)', fontsize=20)
167 |     ax.legend(loc='lower right')
168 |     ax.set_ylim([0,4])
169 |     ax.set_xlim([-2,9])
170 |     f.savefig('figure_b_umau.pdf')
171 | 
172 | if __name__ == '__main__':
173 |     # main()
174 |     pass
175 | 


--------------------------------------------------------------------------------
/code/misc_plots/figs_onesparse.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import numpy as np
  3 | from scipy.spatial import (voronoi_plot_2d, Voronoi, 
  4 |                            ConvexHull, convex_hull_plot_2d)
  5 | from scipy.spatial._plotutils import _adjust_bounds
  6 | from matplotlib import pyplot
  7 | from selection.affine import constraints, sample_from_constraints
  8 | import os
  9 | from scipy.stats import norm
 10 | 
 11 | figl, figw = 9.5, 9.5
 12 | # data point
 13 | Y = [-.45, .85]
 14 | 
 15 | def hull_without_points(hull, ax=None):
 16 |     """
 17 |     modified from scipy.spatial.convex_hull_plot_2d
 18 |     """
 19 |     if ax is None:
 20 |         ax = pyplot.gcf().gca()
 21 |         
 22 |     if hull.points.shape[1] != 2:
 23 |         raise ValueError("Convex hull is not 2-D")
 24 | 
 25 |     for simplex in hull.simplices:
 26 |         ax.plot(hull.points[simplex,0], hull.points[simplex,1], 'k-')
 27 | 
 28 |     _adjust_bounds(ax, hull.points)
 29 | 
 30 |     return ax.figure
 31 | 
 32 | def angle(x):
 33 |     """
 34 |     recover angle from a 2 vector
 35 |     """
 36 |     theta = np.arccos(x[0] / np.linalg.norm(x))
 37 |     if x[1] < 0:
 38 |         theta = 2 * np.pi - theta
 39 |     return theta
 40 | 
 41 | def just_hull(W, fill=True, fill_args={}, label=None, ax=None, ptlabel=None):
 42 |     
 43 |     """
 44 |     Draw the hull without points
 45 |     """
 46 |     hull = ConvexHull(W)
 47 |     f = hull_without_points(hull, ax=ax)
 48 |     a = f.gca()
 49 |     a.set_xticks([])
 50 |     a.set_yticks([])
 51 | 
 52 |     A, b, pairs, angles, perimeter, ai = extract_constraints(hull)
 53 |     if fill:
 54 |         perimeter_vertices = np.array([v for _, v in perimeter])
 55 | 
 56 |         pyplot.scatter(perimeter_vertices[:,0],
 57 |                        perimeter_vertices[:,1], c='gray', s=100, label=ptlabel)
 58 |         pyplot.fill(perimeter_vertices[:,0], perimeter_vertices[:,1],
 59 |                     label=label, **fill_args)
 60 |     a.scatter(0,0, marker='+', c='k', s=50)
 61 |     return f, A, b, pairs, angles, perimeter, ai 
 62 |     
 63 | def extract_constraints(hull):
 64 |     """
 65 |     given a convex hull, extract
 66 | 
 67 |     (A,b) such that
 68 | 
 69 |     $$hull = \{x: Ax+b \geq 0 \}$$
 70 | 
 71 |     also, return rays of the normal cone associated to each vertex as `pairs`
 72 | 
 73 |     """
 74 |     A = []
 75 |     b = []
 76 |     pairs = []
 77 |     angles = []
 78 | 
 79 |     perimeter = []
 80 | 
 81 |     angle_intersection = []
 82 | 
 83 |     for simplex1, simplex2 in itertools.combinations(hull.simplices, 2):
 84 |         intersect = set(simplex1).intersection(simplex2)
 85 | 
 86 |         for p in simplex1:
 87 |             perimeter.append((angle(hull.points[p]), list(hull.points[p])))
 88 | 
 89 |         for p in simplex2:
 90 |             perimeter.append((angle(hull.points[p]), list(hull.points[p])))
 91 | 
 92 |         if intersect:
 93 |             v1, v2 = hull.points[[simplex1[0], simplex1[1]]]
 94 |             diff = v1-v2
 95 |             normal1 = np.array([diff[1],-diff[0]])
 96 | 
 97 |             # find a point not in the simplex
 98 |             i = 0
 99 |             while True:
100 |                 s = hull.points[i]
101 |                 if i not in simplex1:
102 |                     break
103 |                 i += 1    
104 |             if np.dot(normal1, s-hull.points[simplex1[0]]) > 0:
105 |                 normal1 = -normal1
106 |                 
107 |             v1, v2 = hull.points[[simplex2[0], simplex2[1]]]
108 |             diff = v1-v2
109 |             normal2 = np.array([diff[1],-diff[0]])
110 |             
111 |             # find a point not in the simplex
112 |             i = 0
113 |             while True:
114 |                 s = hull.points[i]
115 |                 if i not in simplex2:
116 |                     break
117 |                 i += 1
118 |                 
119 |             if np.dot(normal2, s-hull.points[simplex2[0]]) > 0:
120 |                 normal2 = -normal2
121 |                 
122 |             dual_basis = np.vstack([normal1, normal2])
123 |             angles.extend([angle(normal1), angle(normal2)])
124 |             angle_intersection.append([angle(normal1), angle(normal2), intersect])
125 |             pairs.append((hull.points[list(intersect)[0]], dual_basis))
126 | 
127 |     for simplex in hull.simplices:
128 |         v1, v2 = hull.points[[simplex[0], simplex[1]]]
129 |         diff = v1-v2
130 |         normal = np.array([diff[1],-diff[0]])
131 |         offset = -np.dot(normal, v1)
132 |         scale = np.linalg.norm(normal)
133 |         if offset < 0:
134 |             scale *= -1
135 |             normal /= scale
136 |             offset /= scale
137 |         A.append(normal)
138 |         b.append(offset)
139 | 
140 |     # crude rounding
141 |     angles = np.array(angles)
142 |     angles *= 50000
143 |     angles = np.unique(angles.astype(np.int))
144 |     angles = angles / 50000.
145 | 
146 |     return np.array(A), np.array(b), pairs, angles, sorted(perimeter), angle_intersection
147 | 
148 | symmetric = False
149 | np.random.seed(10)
150 | W = np.array([(np.cos(a), np.sin(a)) for a in sorted([0.4,3.3,4.0, 5.1])])
151 | if symmetric: 
152 |     W = np.vstack([W,-W])
153 | hull = ConvexHull(W)
154 | f, A, b, pairs, angles, perimeter, ai = just_hull(W, 
155 |                                                   fill_args={'facecolor':'gray', 'alpha':0.2}, 
156 |                                                   label=r'$K$')
157 | 
158 | def cone_rays(angles, ai, hull, which=None, ax=None, fill_args={},
159 |               plot=True):
160 |    """
161 | 
162 |    Plot the given Voronoi diagram in 2-D based on a set of directions
163 | 
164 |    Parameters
165 |    ----------
166 |    vor : scipy.spatial.Voronoi instance
167 |        Diagram to plot
168 |    ax : matplotlib.axes.Axes instance, optional
169 |        Axes to plot on
170 | 
171 |    Returns
172 |    -------
173 |    fig : matplotlib.figure.Figure instance
174 |        Figure for the plot
175 | 
176 |    See Also
177 |    --------
178 |    Voronoi
179 | 
180 |    Notes
181 |    -----
182 |    Requires Matplotlib.
183 | 
184 |    """
185 |    angles = np.sort(angles)
186 |    points = np.array([np.cos(angles), np.sin(angles)]).T
187 |    
188 |    vor = Voronoi(points)
189 |    
190 |    if vor.points.shape[1] != 2:
191 |        raise ValueError("Voronoi diagram is not 2-D")
192 | 
193 |    if ax is None:
194 |        ax = pyplot.gca()
195 | 
196 |    rays = np.array([(np.cos(_angle), np.sin(_angle)) for _angle in angles])
197 |    for i in range(rays.shape[0]):
198 |        rays[i] /= np.linalg.norm(rays[i])
199 |    rays *= 100
200 |    
201 |    if plot:
202 |        for ray in rays:
203 |            ax.plot([0,ray[0]],[0,ray[1]], 'k--')
204 |    
205 |    if which is not None:
206 |        if which < rays.shape[0]-1:
207 |            active_rays = [rays[which], rays[which+1]]    
208 |        else:
209 |            active_rays = [rays[0], rays[-1]]
210 |        poly = np.vstack([active_rays[0], np.zeros(2), active_rays[1], 100*(active_rays[0]+active_rays[1])])
211 |        dual_rays = np.linalg.pinv(np.array(active_rays))
212 |        angle1 = angle(dual_rays[:,0])
213 |        angle2 = angle(dual_rays[:,1])
214 | 
215 |    else:
216 |        poly = None
217 |        active_rays = None
218 |        dual_rays = None
219 | 
220 |    _adjust_bounds(ax, vor.points)
221 |    
222 |    ax.set_xticks([])
223 |    ax.set_yticks([])
224 |    return ax, poly, dual_rays, np.array(active_rays)
225 | 
226 | def all_dual_rays(W):
227 | 
228 |    f, A, b, pairs, angles, perimeter, ai = just_hull(W)
229 |     
230 |    angles = np.sort(angles)
231 |    points = np.array([np.cos(angles), np.sin(angles)]).T
232 |    
233 |    vor = Voronoi(points)
234 |    if vor.points.shape[1] != 2:
235 |        raise ValueError("Voronoi diagram is not 2-D")
236 | 
237 |    rays = np.array([(np.cos(_angle), np.sin(_angle)) for _angle in angles])
238 |    for i in range(rays.shape[0]):
239 |        rays[i] /= np.linalg.norm(rays[i])
240 |    rays  *= 100
241 | 
242 |    active_rays = []
243 |    dual_rays = []
244 |    for i in range(rays.shape[0]):
245 |        if i < rays.shape[0] - 1:
246 |            active_rays.append(np.array([rays[i], rays[i+1]]))
247 |        else:
248 |            active_rays.append(np.array([rays[0], rays[-1]]))
249 |        dual_rays.append(np.linalg.pinv(active_rays[-1]))
250 |    return angles, active_rays, dual_rays, rays
251 | 
252 | angles, A, D = all_dual_rays(W)[:3]
253 | signs = np.array([np.sign(np.dot(d.T, Y)) for d in D])
254 | region = np.argmax(signs.sum(1))
255 | 
256 | def cone_with_slice(angles, ai, hull, which, fill_args={}, ax=None, label=None,
257 |                     suffix='', 
258 |                     Y=None):
259 | 
260 |     ax, poly, constraint, rays = cone_rays(angles, ai, hull, which, ax=ax, fill_args=fill_args)
261 |     eta_idx = np.argmax(np.dot(hull.points, Y))
262 |     eta = 40 * hull.points[eta_idx]
263 | 
264 |     representation = constraints(-constraint.T, np.zeros(2))
265 | 
266 |     if Y is None:
267 |         Y = sample_from_constraints(representation)
268 | 
269 |     ax.fill(poly[:,0], poly[:,1], label=r'$A_{(M,H_0)}$', **fill_args)
270 |     if symmetric:
271 |         ax.fill(-poly[:,0], -poly[:,1], **fill_args)
272 | 
273 |     legend_args = {'scatterpoints':1, 'fontsize':30, 'loc':'lower left'}
274 |     ax.legend(**legend_args)
275 |     ax.figure.savefig('fig_onesparse1.png', dpi=300)
276 | 
277 |     ax.scatter(Y[0], Y[1], c='k', s=150, label=label)
278 | 
279 |     Vp, _, Vm = representation.bounds(eta, Y)[:3]
280 | 
281 |     Yperp = Y - (np.dot(eta, Y) / 
282 |                  np.linalg.norm(eta)**2 * eta)
283 | 
284 |     if Vm == np.inf:
285 |         Vm = 10000
286 | 
287 |     width_points = np.array([(Yperp + Vp*eta /  
288 |                               np.linalg.norm(eta)**2),
289 |                              (Yperp + Vm*eta /  
290 |                               np.linalg.norm(eta)**2)])
291 | 
292 |     ax.plot(width_points[:,0], width_points[:,1], '-', c='k', linewidth=4)
293 |     legend_args = {'scatterpoints':1, 'fontsize':30, 'loc':'lower left'}
294 |     ax.legend(**legend_args)
295 |     ax.figure.savefig('fig_onesparse2.png', dpi=300)
296 | 
297 |     return ax, poly, constraint, rays
298 | 
299 | f = pyplot.figure(figsize=(figl,figw))
300 | ax = f.gca()
301 | ax = cone_with_slice(angles,
302 |                      ai, 
303 |                      hull,
304 |                      region,
305 |                      ax=ax,
306 |                      label=r'$y$',
307 |                      Y=Y,
308 |                      fill_args=\
309 |                   {'facecolor':'gray', 'alpha':0.2})[0]
310 | restriction = ax.figure
311 | restriction.savefig('cone_with_slice.png')
312 | 


--------------------------------------------------------------------------------
/code/misc_plots/figs_lasso.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import numpy as np
  3 | from scipy.spatial import (voronoi_plot_2d, Voronoi, 
  4 |                            ConvexHull, convex_hull_plot_2d)
  5 | from scipy.spatial._plotutils import _adjust_bounds
  6 | from matplotlib import pyplot
  7 | import matplotlib.colors
  8 | from selection.affine import constraints, sample_from_constraints
  9 | import os
 10 | from scipy.stats import norm
 11 | from selection.lasso import lasso
 12 | 
 13 | figl, figw = 9.5, 9.5
 14 | # data point
 15 | Y = [1.3,2]
 16 | 
 17 | yellow = matplotlib.colors.colorConverter.to_rgb("#f4e918")
 18 | 
 19 | def hull_without_points(hull, ax=None):
 20 |     """
 21 |     modified from scipy.spatial.convex_hull_plot_2d
 22 |     """
 23 |     if ax is None:
 24 |         ax = pyplot.gcf().gca()
 25 |         
 26 |     if hull.points.shape[1] != 2:
 27 |         raise ValueError("Convex hull is not 2-D")
 28 | 
 29 |     for simplex in hull.simplices:
 30 |         ax.plot(hull.points[simplex,0], hull.points[simplex,1], 'k--')
 31 | 
 32 |     _adjust_bounds(ax, hull.points)
 33 | 
 34 |     return ax.figure
 35 | 
 36 | def angle(x):
 37 |     """
 38 |     recover angle from a 2 vector
 39 |     """
 40 |     theta = np.arccos(x[0] / np.linalg.norm(x))
 41 |     if x[1] < 0:
 42 |         theta = 2 * np.pi - theta
 43 |     return theta
 44 | 
 45 | def just_hull(W, fill_args={}, label=None, ax=None, ptlabel=None,
 46 |               vertices=True, fill=True):
 47 |     
 48 |     """
 49 |     Draw the hull without points
 50 |     """
 51 |     hull = ConvexHull(W)
 52 |     f = hull_without_points(hull, ax=ax)
 53 |     a = f.gca()
 54 |     a.set_xticks([])
 55 |     a.set_yticks([])
 56 | 
 57 |     A, b, pairs, angles, perimeter, ai = extract_constraints(hull)
 58 |     if fill:
 59 |         perimeter_vertices = np.array([v for _, v in perimeter])
 60 | 
 61 |         if fill:
 62 |             pyplot.fill(perimeter_vertices[:,0], perimeter_vertices[:,1],
 63 |                         label=label, **fill_args)
 64 |     if vertices:
 65 |         pyplot.scatter(perimeter_vertices[:,0],
 66 |                        perimeter_vertices[:,1], c=yellow, s=100, label=ptlabel)
 67 | 
 68 |     return f, A, b, pairs, angles, perimeter, ai 
 69 |     
 70 | def extract_constraints(hull):
 71 |     """
 72 |     given a convex hull, extract
 73 | 
 74 |     (A,b) such that
 75 | 
 76 |     $$hull = \{x: Ax+b \geq 0 \}$$
 77 | 
 78 |     also, return rays of the normal cone associated to each vertex as `pairs`
 79 | 
 80 |     """
 81 |     A = []
 82 |     b = []
 83 |     pairs = []
 84 |     angles = []
 85 | 
 86 |     perimeter = []
 87 | 
 88 |     angle_intersection = []
 89 | 
 90 |     for simplex1, simplex2 in itertools.combinations(hull.simplices, 2):
 91 |         intersect = set(simplex1).intersection(simplex2)
 92 | 
 93 |         for p in simplex1:
 94 |             perimeter.append((angle(hull.points[p]), list(hull.points[p])))
 95 | 
 96 |         for p in simplex2:
 97 |             perimeter.append((angle(hull.points[p]), list(hull.points[p])))
 98 | 
 99 |         if intersect:
100 |             v1, v2 = hull.points[[simplex1[0], simplex1[1]]]
101 |             diff = v1-v2
102 |             normal1 = np.array([diff[1],-diff[0]])
103 | 
104 |             # find a point not in the simplex
105 |             i = 0
106 |             while True:
107 |                 s = hull.points[i]
108 |                 if i not in simplex1:
109 |                     break
110 |                 i += 1    
111 |             if np.dot(normal1, s-hull.points[simplex1[0]]) > 0:
112 |                 normal1 = -normal1
113 |                 
114 |             v1, v2 = hull.points[[simplex2[0], simplex2[1]]]
115 |             diff = v1-v2
116 |             normal2 = np.array([diff[1],-diff[0]])
117 |             
118 |             # find a point not in the simplex
119 |             i = 0
120 |             while True:
121 |                 s = hull.points[i]
122 |                 if i not in simplex2:
123 |                     break
124 |                 i += 1
125 |                 
126 |             if np.dot(normal2, s-hull.points[simplex2[0]]) > 0:
127 |                 normal2 = -normal2
128 |                 
129 |             dual_basis = np.vstack([normal1, normal2])
130 |             angles.extend([angle(normal1), angle(normal2)])
131 |             angle_intersection.append([angle(normal1), angle(normal2), intersect])
132 |             pairs.append((hull.points[list(intersect)[0]], dual_basis))
133 | 
134 |     for simplex in hull.simplices:
135 |         v1, v2 = hull.points[[simplex[0], simplex[1]]]
136 |         diff = v1-v2
137 |         normal = np.array([diff[1],-diff[0]])
138 |         offset = -np.dot(normal, v1)
139 |         scale = np.linalg.norm(normal)
140 |         if offset < 0:
141 |             scale *= -1
142 |             normal /= scale
143 |             offset /= scale
144 |         A.append(normal)
145 |         b.append(offset)
146 | 
147 |     # crude rounding
148 |     angles = np.array(angles)
149 |     angles *= 50000
150 |     angles = np.unique(angles.astype(np.int))
151 |     angles = angles / 50000.
152 | 
153 |     return np.array(A), np.array(b), pairs, angles, sorted(perimeter), angle_intersection
154 | 
155 | symmetric = True
156 | np.random.seed(10)
157 | W = np.array([(np.cos(a), np.sin(a)) for a in (np.arange(0, np.pi, np.pi/3) + np.random.sample(3) * 0.6)])
158 | if symmetric: 
159 |     W = np.vstack([W,-W])
160 | hull = ConvexHull(W)
161 | 
162 | def cone_rays(angles, ai, hull, which=None, ax=None, fill_args={},
163 |               plot=True):
164 |    """
165 |    draw the cone rays
166 |    """
167 |    angles = np.sort(angles)
168 |    points = np.array([np.cos(angles), np.sin(angles)]).T
169 |    
170 |    vor = Voronoi(points)
171 |    
172 |    if vor.points.shape[1] != 2:
173 |        raise ValueError("Voronoi diagram is not 2-D")
174 | 
175 |    if ax is None:
176 |        ax = pyplot.gca()
177 | 
178 |    rays = np.array([(np.cos(_angle), np.sin(_angle)) for _angle in angles])
179 |    for i in range(rays.shape[0]):
180 |        rays[i] /= np.linalg.norm(rays[i])
181 |    
182 |    if plot:
183 |        for ray in rays:
184 |            ax.plot([0,100*ray[0]],[0,100*ray[1]], 'k--')
185 |    
186 |    if which is not None:
187 |        if which < rays.shape[0]-1:
188 |            active_rays = [100*rays[which], 100*rays[which+1]]    
189 |        else:
190 |            active_rays = [100*rays[0], 100*rays[-1]]
191 |        poly = np.vstack([active_rays[0], np.zeros(2), active_rays[1], 100*(active_rays[0]+active_rays[1])])
192 |        dual_rays = np.linalg.pinv(np.array(active_rays))
193 |        angle1 = angle(dual_rays[:,0])
194 |        angle2 = angle(dual_rays[:,1])
195 | 
196 |    else:
197 |        poly = None
198 |        active_rays = None
199 |        dual_rays = None
200 | 
201 |    _adjust_bounds(ax, vor.points)
202 |    
203 |    ax.set_xticks([])
204 |    ax.set_yticks([])
205 |    return ax, poly, dual_rays, np.array(active_rays)
206 | 
207 | def all_dual_rays(W):
208 | 
209 |    f, A, b, pairs, angles, perimeter, ai = just_hull(W)
210 |     
211 |    angles = np.sort(angles)
212 |    points = np.array([np.cos(angles), np.sin(angles)]).T
213 |    
214 |    vor = Voronoi(points)
215 |    if vor.points.shape[1] != 2:
216 |        raise ValueError("Voronoi diagram is not 2-D")
217 | 
218 |    rays = np.array([(np.cos(_angle), np.sin(_angle)) for _angle in angles])
219 |    for i in range(rays.shape[0]):
220 |        rays[i] /= np.linalg.norm(rays[i])
221 | 
222 |    active_rays = []
223 |    dual_rays = []
224 |    for i in range(rays.shape[0]):
225 |        if i < rays.shape[0] - 1:
226 |            active_rays.append(np.array([rays[i], rays[i+1]]))
227 |        else:
228 |            active_rays.append(np.array([rays[0], rays[-1]]))
229 |        dual_rays.append(np.linalg.pinv(active_rays[-1]))
230 |    return angles, active_rays, dual_rays, rays, A, b
231 | 
232 | angles, _, Dual, rays, A, b = all_dual_rays(W)
233 | signs = np.array([np.sign(np.dot(d.T, Y)) for d in Dual])
234 | 
235 | # this part finds hyperplanes that can be used to form design 
236 | # X for LASSO.
237 | 
238 | # there is no guarantee that the [0,2,3] will work for other (Y,W)
239 | # it was done by inspection here
240 | 
241 | A /= b[:,None]
242 | X = -A[[0,2,3]].T
243 | 
244 | region = np.argmax(signs.sum(1))
245 | 
246 | def hull_with_rays(W, fill=True, fill_args={}, label=None, ax=None,
247 |                     Y=None, vertices=True, which=region):
248 |     f, A, b, pairs, angles, perimeter, ai = just_hull(W,
249 |                                                       fill=fill,
250 |                                                       label=label,
251 |                                                       ax=ax,
252 |                                                       fill_args=fill_args,
253 |                                                       vertices=vertices)
254 |     
255 | 
256 |     ax, poly, constraint, rays = cone_rays(angles, ai, hull, which, ax=ax, plot=False, fill_args=fill_args)    
257 | 
258 |     if Y is not None:
259 |         L = lasso(Y, X, lam=1.)
260 |         L.fit(min_its=200, tol=1.e-14)
261 |         representation = L.constraints
262 |         eta = L._XEinv[1]
263 | 
264 |     vtx_idx = np.argmax(np.dot(hull.points, Y))
265 |     vtx = hull.points[vtx_idx]
266 | 
267 |     for i in range(len(pairs)):
268 |         v, D = pairs[i]
269 |         ax.plot([v[0],v[0]+10000*D[0,0]],[v[1],v[1]+10000*D[0,1]], 'k--')
270 |         ax.plot([v[0],v[0]+10000*D[1,0]],[v[1],v[1]+10000*D[1,1]], 'k--')
271 |     
272 |     ax.set_xlim(3*np.array(ax.get_xlim()))
273 |     ax.set_ylim(3*np.array(ax.get_ylim()))
274 |     legend_args = {'scatterpoints':1, 'fontsize':25, 'loc':'lower left'}
275 |     ax.legend(**legend_args)
276 | 
277 |     for i in range(3):
278 |         ax.arrow(0,0,X[0,i]/3.,X[1,i]/3., linewidth=3, head_width=0.02, fc='k')
279 | 
280 |     Vp, V, Vm = representation.bounds(eta, Y)[:3]
281 | 
282 |     Yperp = Y - (np.dot(eta, Y) / 
283 |                  np.linalg.norm(eta)**2 * eta)
284 | 
285 |     if Vm == np.inf:
286 |         Vm = 10000
287 | 
288 |     slice_points = np.array([(Yperp + Vp*eta /  
289 |                               np.linalg.norm(eta)**2),
290 |                              (Yperp + Vm*eta /  
291 |                               np.linalg.norm(eta)**2)])
292 | 
293 | 
294 |     ax.legend(**legend_args)
295 | 
296 |     ax.text(0.01,0.48, r'$X_3$', fontsize=25)
297 |     ax.text(0.39,0.15, r'$X_1$', fontsize=25)
298 |     ax.text(-0.57,-0.30, r'$X_2$', fontsize=25)
299 | 
300 |     f.savefig('fig_lasso3.pdf')
301 |     f.savefig('fig_lasso3.png')
302 | 
303 |     ax.fill(poly[:,0] + vtx[0], poly[:,1] + vtx[1], label=r'$\{1,3\}$ selected', **fill_args)
304 |     ax.fill(-poly[:,0] - vtx[0], -poly[:,1] - vtx[1], **fill_args)
305 | 
306 |     f.savefig('fig_lasso0.pdf')
307 |     f.savefig('fig_lasso0.png')
308 |     ax.text(1.4,2.1, r'$Y$', fontsize=25)
309 |     ax.add_patch(pyplot.Circle(Y, radius=.08, facecolor='k'))
310 |     f.savefig('fig_lasso1.pdf')
311 |     f.savefig('fig_lasso1.png')
312 | 
313 |     ax.plot(slice_points[:,0] - 0*V, slice_points[:,1] - 0*V, '-', c='k', linewidth=4)
314 |     ax.plot([Y[0]]*2, [Y[1]]*2, c='k')
315 |     f.savefig('fig_lasso2.pdf')
316 |     f.savefig('fig_lasso2.png')
317 | 
318 |     return f, A, b, pairs, angles, perimeter 
319 | 
320 | f = pyplot.figure(figsize=(figl,figw))
321 | f.clf()
322 | ax = f.gca()
323 | polytope_with_cones = hull_with_rays(W, 
324 |                                      fill_args=( 
325 |                                      {'facecolor':yellow, 'alpha':0.8}), 
326 |                                      ax=f.gca(),
327 |                                      label=r'$K$',
328 |                                      fill=False,
329 |                                      vertices=False,
330 |                                      Y=Y)[0]
331 | 
332 | 
333 | 


--------------------------------------------------------------------------------
/code/misc_plots/umpuUnivar.r:
--------------------------------------------------------------------------------
  1 | ### Construct UMAU intervals for a truncated Gaussian
  2 | 
  3 | 
  4 | 
  5 | 
  6 | ## Simple functions for specifying and checking a "cutoffs" object, which
  7 | ##   is just a matrix, each of whose rows is a non-empty real interval (infinite endpoints allowed)
  8 | 
  9 | check.cutoffs <- function(cutoffs) {
 10 |     if(!is.matrix(cutoffs) || dim(cutoffs)[2] != 2) stop("cutoffs should be a matrix with 2 columns")
 11 |     if(sum(cutoffs[,2] <= cutoffs[,1]) > 0) stop("all right endpoints should be > left endpoints")
 12 |     if(sum(diff(c(t(cutoffs))) <= 0) > 0) stop("endpoints should be strictly increasing")
 13 | }
 14 | 
 15 | negate.cutoffs <- function(cutoffs) {
 16 |     -cutoffs[nrow(cutoffs):1,2:1,drop=FALSE]
 17 | }
 18 | 
 19 | negate.cutoffs(rbind(c(-Inf,-4),c(-3,-2),c(7,Inf)))
 20 | 
 21 | two.sided.cutoff <- function(x) rbind(neg=c(-Inf,-abs(x)),pos=c(abs(x),Inf))
 22 | 
 23 | two.sided.cutoff(3)
 24 | 
 25 | ## Compute Phi(b-mu) - Phi(a-mu) in a numerically robust way
 26 | ## mu can be a vector
 27 | pnorm.interval <- function(mu, ab) {
 28 |     ifelse(mean(ab) - mu < 0,
 29 |            pnorm(ab[2] - mu) - pnorm(ab[1] - mu),
 30 |            pnorm(mu - ab[1]) - pnorm(mu - ab[2]))
 31 | }
 32 | 
 33 | ## Compute Phi(b-mu) - Phi(a-mu) for each [a,b] in S
 34 | pnorm.cutoffs <- function(mu, cutoffs) {
 35 |     ret <- apply(cutoffs, 1, function(cut) pnorm.interval(mu, cut))
 36 |     if(!is.matrix(ret)) ret <- t(ret)
 37 |     dimnames(ret) <- list(as.character(mu),row.names(cutoffs))
 38 |     ret
 39 | }
 40 | 
 41 | pnorm.cutoffs(-1:1,two.sided.cutoff(3))
 42 | pnorm.cutoffs(-1,two.sided.cutoff(3))
 43 | 
 44 | 
 45 | ## Compute phi(b-mu) - phi(a-mu) for each [a,b] in S
 46 | ## mu can be a vector
 47 | dnorm.cutoffs <- function(mu, cutoffs) {
 48 |     ret <- apply(cutoffs, 1, function(cut) dnorm(cut[2] - mu) - dnorm(cut[1] - mu))
 49 |     if(!is.matrix(ret)) ret <- t(ret)
 50 |     dimnames(ret) <- list(as.character(mu),row.names(cutoffs))
 51 |     ret
 52 | }
 53 | 
 54 | dnorm.cutoffs(-1:1,two.sided.cutoff(3))
 55 | dnorm.cutoffs(-1,two.sided.cutoff(3))
 56 | 
 57 | ## Compute P_mu(X<x)
 58 | ## mu CANNOT be a vector, pk is a one-row matrix or vector
 59 | ## x must be in one of the intervals
 60 | F.mu <- function(x, mu, cutoffs, pk=pnorm.cutoffs(mu, cutoffs)) {
 61 |     stopifnot(length(mu)==1, nrow(pk)==1 || !is.matrix(pk))
 62 |     K <- length(pk)
 63 |     p <- sum(pk)
 64 |     k <- which(x >= cutoffs[,1] & x <= cutoffs[,2])
 65 |     stopifnot(length(k)==1)
 66 |     (sum(pk[(1:K) < k]) + pnorm.interval(mu, c(cutoffs[k,1], x)) ) / p
 67 | }
 68 | 
 69 | F.mu(-10.01,0,two.sided.cutoff(10))
 70 | F.mu(-9,0,two.sided.cutoff(10))
 71 | F.mu(10.2,0,two.sided.cutoff(10))
 72 | 
 73 | ## Compute the inverse of the previous function
 74 | ## mu CANNOT be a vector, pk is a one-row matrix or vector
 75 | F.inv.mu <- function(F, mu, cutoffs, pk=pnorm.cutoffs(mu, cutoffs)) {
 76 |     stopifnot(length(mu)==1, nrow(pk)==1 || !is.matrix(pk))
 77 |     p <- sum(pk)
 78 |     k <- max(which(c(0,cumsum(pk))/p < F))
 79 |     pnorm.increment <- p*F - c(0,cumsum(pk))[k]
 80 |     if(mean(cutoffs[k,]) < 0)
 81 |         mu + qnorm(pnorm(cutoffs[k,1]-mu) + pnorm.increment)
 82 |     else
 83 |         mu + qnorm(pnorm(cutoffs[k,1]-mu,lower.tail=FALSE) - pnorm.increment,lower.tail=FALSE)
 84 | }
 85 | 
 86 | ## Compute c2(c1) for a single c1
 87 | ## mu CANNOT be a vector, pk is a one-row matrix or vector
 88 | c2.single <- function(c1, mu, alpha, cutoffs, pk=pnorm.cutoffs(mu, cutoffs)) {
 89 |     stopifnot(length(mu)==1, nrow(pk)==1 || !is.matrix(pk))
 90 |     K <- length(pk)
 91 |     alpha1 <- F.mu(c1, mu, cutoffs, pk)
 92 |     if(alpha1 > alpha) return(NA)
 93 | 
 94 |     alpha2 <- alpha-alpha1
 95 |     return(F.inv.mu(1-alpha2, mu, cutoffs, pk))
 96 | }
 97 | 
 98 | c2.single(-10.3, 0, .05, two.sided.cutoff(10))
 99 | F.mu(-10.3, 0, two.sided.cutoff(10))
100 | 
101 | ## Do the same, for a vector of c1 and mu
102 | c2 <- function(c1, mu, alpha, cutoffs, pk=pnorm.cutoffs(mu, cutoffs)) {
103 |     sapply(1:length(c1),function(i)
104 |            c2.single(c1[i], mu[i], alpha, cutoffs,
105 |                      pk[i,,drop=FALSE]))
106 | }
107 | c2(-10.3, 0, .05, two.sided.cutoff(10))
108 | 
109 | 
110 | ## Compute g_mu(c1) for a single mu and c1 (see LaTeX documentation)
111 | ## c1 and mu CANNOT be vectors, pk is NOT a matrix
112 | g.mu.single <- function(c1, mu, alpha, cutoffs,
113 |                         pk=pnorm.cutoffs(mu, cutoffs), dk=dnorm.cutoffs(mu, cutoffs)) {
114 |     const <- (1-alpha) * (sum(-dk) + mu * sum(pk))
115 |     cc2 <- c2(c1, mu, alpha, cutoffs, pk)
116 |     if(is.na(cc2)) return(Inf)
117 | 
118 |     K <- length(pk)
119 |     p <- sum(pk)
120 |     k1 <- which(c1 >= cutoffs[,1] & c1 <= cutoffs[,2])
121 |     stopifnot(length(k1)==1)
122 |     k2 <- which(cc2 >= cutoffs[,1] & cc2 <= cutoffs[,2])
123 |     stopifnot(length(k2)==1)
124 | 
125 |     if(k1 < k2) {
126 |         sum(-dk[(1:K) > k1 & (1:K) < k2]) + mu * sum(pk[(1:K) > k1 & (1:K) < k2]) +
127 |             - dnorm(cutoffs[k1,2] - mu) + dnorm(c1 - mu) - dnorm(cc2 - mu) + dnorm(cutoffs[k2,1] - mu) +
128 |                 mu * (pnorm.interval(mu,c(c1,cutoffs[k1,2])) + pnorm.interval(mu,c(cutoffs[k2,1], cc2))) -
129 |                     const
130 |     } else {
131 |         - dnorm(cc2 - mu) + dnorm(c1 - mu) + mu * pnorm.interval(mu,c(c1,cc2)) - const
132 |     }
133 | }
134 | 
135 | ## Compute g_mu(c1) for a vector of mu and c1 (see LaTeX documentation)
136 | g.mu <- function(c1, mu, alpha, cutoffs,
137 |                  pk=pnorm.cutoffs(mu, cutoffs), dk=dnorm.cutoffs(mu, cutoffs)) {
138 |     sapply(1:length(c1),function(i)
139 |            g.mu.single(c1[i], mu[i], alpha, cutoffs,
140 |                        pk[i,,drop=FALSE], dk[i,,drop=FALSE]))
141 | }
142 | 
143 | dnorm.cutoffs(c(0,0),two.sided.cutoff(10))
144 | c.vals <- seq(-10.3,-10.28,.001)
145 | plot(c.vals,g.mu(c.vals,rep(0,length(c.vals)), .05, two.sided.cutoff(10)))
146 | 
147 | g.mu(-10.2925, 0, .05, two.sided.cutoff(10))
148 | 
149 | ## Compute g_mu'(c1)
150 | dg.mu <- function(c1, mu, alpha, cutoffs,
151 |                   pk=pnorm.cutoffs(mu, cutoffs)) {
152 |     (c2(c1, mu, alpha, cutoffs, pk) - c1) * dnorm(c1 - mu)
153 | }
154 | 
155 | points(c.vals, g.mu(c.vals-.001, rep(0,length(c.vals)), .05, two.sided.cutoff(10)) +
156 |        dg.mu(c.vals-.001, rep(0,length(c.vals)), .05, two.sided.cutoff(10)) * .001,col="red",pch=3)
157 | 
158 | mu.vals <- seq(-10,15,.1)
159 | plot(mu.vals, g.mu(rep(-10.2925,length(mu.vals)), mu.vals, .05, two.sided.cutoff(10)))
160 | mu.vals <- seq(-.001,.001,.0001)
161 | plot(mu.vals, g.mu(rep(-10.2925,length(mu.vals)), mu.vals, .05, two.sided.cutoff(10)))
162 | 
163 | 
164 | ## Compute upper CI endpoint, for a single x, when sigma=1
165 | umau.normal.unit.var.upper.single <- function(x, cutoffs, alpha=.05, mu.lo=x, mu.hi=x+2, tol=1E-8) {
166 |     check.cutoffs(cutoffs)
167 | 
168 |     mu.too.low <- function(mu) {
169 |         g.mu(x,mu,alpha,cutoffs) > 0
170 |     }
171 |     mu.too.high <- function(mu) {
172 |         g.mu(x,mu,alpha,cutoffs) < 0
173 |     }
174 | 
175 |     while(mu.too.high(mu.lo)) {
176 |         mu.hi <- mu.lo
177 |         mu.lo <- mu.lo - 2
178 |     }
179 |     while(mu.too.low(mu.hi)) {
180 |         mu.lo <- mu.hi
181 |         mu.hi <- mu.hi + 2
182 |     }
183 |     while(mu.hi - mu.lo > tol) {
184 |         mu.avg <- (mu.lo + mu.hi) / 2
185 |         if(mu.too.high(mu.avg)) {
186 |             mu.hi <- mu.avg
187 |         } else {
188 |             mu.lo <- mu.avg
189 |         }
190 |     }
191 |     mu.avg
192 | }
193 | a <- try
194 | a <- ifelse(try((1:2) %*% matrix(0,4,4)),1,2)
195 | 
196 | umau.normal.unit.var.upper.single(-10.29, two.sided.cutoff(10), mu.lo=-1, mu.hi=5)
197 | umau.normal.unit.var.upper.single(-10.2925, two.sided.cutoff(10), mu.lo=-1, mu.hi=5)
198 | 
199 | ## Compute both CI endpoints, for a single x
200 | umau.normal.single <- function(x, cutoffs, sigma=1, alpha=.05, mu.lo=x, mu.hi=x+2, tol=1E-8) {
201 |     mu.upper <- mu.lower <- NA
202 |     try(mu.upper <- sigma * umau.normal.unit.var.upper.single(x/sigma, cutoffs/sigma, alpha,
203 |                                                               mu.lo/sigma, mu.hi/sigma, tol))
204 |     try(mu.lower <- -sigma * umau.normal.unit.var.upper.single(-x/sigma, negate.cutoffs(cutoffs)/sigma, alpha,
205 |                                                                -mu.hi/sigma, -mu.lo/sigma, tol))
206 |     return(c(mu.lower, mu.upper))
207 | }
208 | 
209 | umau.normal.single(10.29, two.sided.cutoff(10))
210 | umau.normal.single(1+10.29, 1+two.sided.cutoff(10))
211 | umau.normal.single(-10.29, two.sided.cutoff(10))
212 | umau.normal.single(1-10.29, 1+two.sided.cutoff(10))
213 | 
214 | umau.normal.single(10.005, two.sided.cutoff(10))
215 | umau.normal.single(10.005, two.sided.cutoff(10))
216 | 
217 | umau.normal.single(-13, two.sided.cutoff(10))
218 | 
219 | #debug(c2.single)
220 | umau.normal(10.01, t(c(10,Inf)), tol=1E-2)
221 | 
222 | 
223 | ## Compute both CI endpoints, for a vector of x
224 | umau.normal <- function(x, cutoffs, sigma=1, alpha=.05, tol=1E-8) {
225 |     sapply(1:length(x), function(i) umau.normal.single(x[i], cutoffs, sigma, alpha, tol=tol))
226 | }
227 | 
228 | 
229 | ## Make UMAU CIs for two different S's:
230 | ##   first,  (-Inf, -10) U (10, Inf)
231 | ##   second, (-Inf, -10) U (-0.1, 0.1) U (10, Inf)
232 | ## We see that even a small sliver of additional support keeps the
233 | ##  CIs from "reaching back" too far
234 | x.vals <- c(seq(-20,-14,1),seq(-13,-10.4,.1),seq(-10.3,-10.06,.02),seq(-10.05,-10.01,.01),seq(-10.01,-10,.002))
235 | length(x.vals)
236 | CIs10 <- umau.normal(x.vals, two.sided.cutoff(10))
237 | CIsMid <- umau.normal(x.vals, rbind(c(-Inf,-10),c(-.1,.1),c(10,Inf)))
238 | 
239 | x.vals1side <- c(seq(10.14,10.3,.01),seq(10.4,11,.1),12:15)
240 | x.vals1side <- -7-x.vals
241 | CIs1side <- umau.normal(x.vals1side, t(c(3,Inf)))
242 | 
243 | quants <- optim(par = c(.1,2),
244 |                 fn = function(x) { (exp(-x[1])-exp(-x[2]) - .95)^2 + (x[1]*exp(-x[1]) - x[2]*exp(-x[2]))^2 })$par
245 | 
246 | pdf("umpuUnivar.pdf",height=4,width=4)
247 | par(mar=c(4.1,4.1,3.1,1.1))
248 | matplot(x.vals1side[1:ncol(CIs1side)],t(CIs1side),type="l",xlim=c(0,15),ylim=c(-5,10),lty=1,col=1,
249 |         xlab="Observed Y", ylab=expression(mu),
250 |         main="Selective Confidence Interval")
251 | #curve(3-quants[1]/(x-3),3.001,3.5,col="red",add=T)
252 | #curve(3-quants[2]/(x-3),3.001,3.5,col="red",add=T)
253 | abline(h=0,lty=3)
254 | abline(v=3,lty=3)
255 | abline(v=0,lty=3)
256 | #abline(0,1,lty=3)
257 | abline(1.96,1,lty=2)
258 | abline(-1.96,1,lty=2)
259 | legend("bottomright",bg="white",legend=c("Selective CI", "Nominal CI"),lty=c(1,2),bty="n")
260 | dev.off()
261 | 
262 | exp(-quants[1])
263 | log(.025)
264 | quants[1]*exp(-quants[1])
265 | quants[2]*exp(-quants[2])
266 | curve(x*exp(-x),0,3)
267 | 
268 | 
269 | pdf("UMAU.pdf",width=16)
270 | par(mfrow=c(1,3))
271 | 
272 | matplot(x.vals1side,t(CIs1side),type="l",xlim=c(-20,20),ylim=c(-23,23),lty=1,col=1,
273 |         xlab="Observed X", ylab="UMAU Confidence Interval",
274 |         main=expression(S==(10*","*infinity)))
275 | abline(h=0,lty=2)
276 | abline(v=10,lty=3)
277 | abline(0,1,lty=3)
278 | 
279 | matplot(x.vals,t(CIs10),type="l",xlim=c(-20,20),ylim=c(-23,23),lty=1,col=1,
280 |         xlab="Observed X", ylab="UMAU Confidence Interval",
281 |         main=expression(S==(-infinity*","*10)~U~(10*","*infinity)))
282 | matplot(-x.vals,t(-CIs10),type="l",lty=1,col=1,add=TRUE)
283 | abline(h=0,lty=2)
284 | abline(v=c(-10,10),lty=3)
285 | abline(0,1,lty=3)
286 | 
287 | matplot(x.vals,t(CIsMid),type="l",xlim=c(-20,20),ylim=c(-23,23),lty=1,col=1,
288 |         xlab="Observed X", ylab="UMAU Confidence Interval",
289 |         main=expression(S==(-infinity*","*10)~U~(-.1*","*.1)~U~(10*","*infinity)))
290 | matplot(-x.vals,-t(CIsMid),type="l",lty=1,col=1,add=TRUE)
291 | abline(h=0,lty=2)
292 | abline(v=c(-10,10),lty=3)
293 | abline(0,1,lty=3)
294 | dev.off()
295 | 
296 | exp(-1000)
297 | 
298 | 
299 | 
300 | 
301 | 


--------------------------------------------------------------------------------
/code/misc_plots/sample_splitting.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.stats import norm as ndist
  3 | from statsmodels.distributions import ECDF
  4 | from selection.covtest import covtest, reduced_covtest
  5 | from selection.affine import constraints, sample_from_constraints
  6 | from selection.discrete_family import discrete_family
  7 | import matplotlib.pyplot as plt
  8 | 
  9 | from constants import parameters, constraints
 10 | from full_data import power as power_full
 11 | 
 12 | def simulation(n, snr, pos, rho=0.25, ndraw=5000, burnin=1000):
 13 | 
 14 |     X, mu, beta = parameters(n, rho, pos)
 15 |     con, initial = constraints(X, pos)
 16 | 
 17 |     con.mean = snr * mu / np.sqrt(2)
 18 |     Z_selection = sample_from_constraints(con, initial, ndraw=ndraw, burnin=burnin)
 19 |     Z_inference_pos = np.random.standard_normal(Z_selection.shape[0]) + snr / np.sqrt(2)
 20 |     return (np.dot(X.T, Z_selection.T)[pos] + Z_inference_pos) / np.sqrt(2)
 21 | 
 22 | def power_onesided(n, snr, pos, rho=0.25, ndraw=10000,
 23 |                    muval = np.linspace(0,5,51), burnin=1000):
 24 | 
 25 |     S0 = simulation(n, 0, pos, rho=rho, ndraw=ndraw, burnin=burnin)
 26 |     W0 = np.ones(S0.shape)
 27 |     dfam0 = discrete_family(S0, W0)
 28 | 
 29 |     cutoff = dfam0.one_sided_acceptance(0, alternative='greater')[1]
 30 | 
 31 |     def UMPU_power_onesided(mu):
 32 |         return dfam0.ccdf(mu, cutoff)
 33 | 
 34 |     def sample_split_onesided(mu, alpha=0.05):
 35 |         cutoff = ndist.ppf(1 - alpha)
 36 |         if np.any(mu < 0):
 37 |             raise ValueError('mu is negative: in null hypothesis')
 38 |         power = 1 - ndist.cdf(cutoff - mu / np.sqrt(2))
 39 |         return np.squeeze(power)
 40 | 
 41 |     power_fig = plt.figure(figsize=(8,8))
 42 |     P_split = np.array(sample_split_onesided(muval))
 43 |     plt.plot(muval, P_split, label='Sample splitting', c='red', linewidth=5, alpha=0.5)
 44 |     power_ax = power_fig.gca()
 45 |     power_ax.set_ylabel('Power', fontsize=20)
 46 |     power_ax.legend(loc='lower right')
 47 |     power_ax.set_xlabel('Effect size $\mu$', fontsize=20)
 48 |     P_UMPU = np.array([UMPU_power_onesided(m) for m in muval])
 49 |     power_ax.plot(muval, P_UMPU, label=r'Selected using $i^*(Z_S)$', linewidth=5, alpha=0.5)
 50 |     P_full = power_full(n, snr, pos, rho=rho, muval=muval)[1]['full']
 51 |     power_ax.plot(muval, P_full, label=r'Selected using $i^*(Z)$', color='blue', linewidth=5, alpha=0.5)
 52 |     print UMPU_power_onesided(snr)
 53 |     power_ax.legend(loc='lower right')
 54 |     power_ax.set_xlim([0,5])
 55 |     power_ax.plot([snr,snr], [0,1], 'k--')
 56 |     return power_fig, {'umpu':P_UMPU, 'split':P_split}
 57 | 
 58 | def marginal(n, snr, pos, rho=0.25, ndraw=5000,
 59 |              burnin=1000, nsim=5000, sigma=1.):
 60 | 
 61 |     X, mu, beta = parameters(n, rho, pos)
 62 | 
 63 |     Psplit = []
 64 |     Pselect = []
 65 |     hypotheses = []
 66 | 
 67 | 
 68 |     for _ in range(nsim):
 69 |         Y_select = (snr * mu / np.sqrt(2) + np.random.standard_normal(n)) * sigma
 70 |         con, _, select_pos, sign = covtest(X, Y_select, sigma=sigma, exact=True)
 71 | 
 72 |         cond_ncp = snr * np.dot(X.T[select_pos], mu) / np.sqrt(2) * sign
 73 | 
 74 |         correct = (sign == +1) and (pos == select_pos)
 75 |         hypotheses.append(correct)
 76 |         Y_null = sample_from_constraints(con, Y_select, ndraw=ndraw, burnin=burnin)
 77 |         Z_null = (np.dot(X.T[select_pos], Y_null.T) + sigma * np.random.standard_normal(ndraw)) / np.sqrt(2)
 78 |         Z_inference = sigma * (cond_ncp + np.random.standard_normal())
 79 |         Z_observed = (np.dot(X.T[select_pos], Y_select) * sign + Z_inference) / np.sqrt(2)
 80 |         dfam = discrete_family(Z_null, np.ones(Z_null.shape))
 81 |         Pselect.append(dfam.ccdf(0, Z_observed))
 82 |         if sign == +1:
 83 |             Psplit.append(ndist.sf(Z_inference / sigma))
 84 |         else:
 85 |             Psplit.append(ndist.cdf(Z_inference / sigma))
 86 | 
 87 |     Ugrid = np.linspace(0,1,101)
 88 | 
 89 |     Psplit = np.array(Psplit)
 90 |     Pselect = np.array(Pselect)
 91 |     hypotheses = np.array(hypotheses, np.bool)
 92 | 
 93 |     # plot of marginal distribution of p-values
 94 | 
 95 |     fig1 = plt.figure(figsize=(8,8))
 96 |     ax1 = fig1.gca()
 97 |     ax1.plot(Ugrid, ECDF(Psplit)(Ugrid), label='Sample splitting', c='red', linewidth=5, alpha=0.5)
 98 |     ax1.plot(Ugrid, ECDF(Pselect)(Ugrid), label='Selected using $i^*(Z_S)$', c='blue', linewidth=5, alpha=0.5)
 99 |     ax1.set_xlabel('P-value, $p$', fontsize=20)
100 |     ax1.set_ylabel('ECDF($p$)', fontsize=20)
101 |     ax1.plot([0.05,0.05],[0,1], 'k--')
102 |     ax1.legend(loc='lower right')
103 |     
104 |     # conditional distribution of p-values
105 |     # conditioned on selection choosing correct position and sign
106 | 
107 |     fig2 = plt.figure(figsize=(8,8))
108 |     ax2 = fig2.gca()
109 |     ax2.plot(Ugrid, ECDF(Psplit[hypotheses])(Ugrid), label='Sample splitting', c='red', linewidth=5, alpha=0.5)
110 |     ax2.plot(Ugrid, ECDF(Pselect[hypotheses])(Ugrid), label='Selected using $i^*(Z_S)$', c='blue', linewidth=5, alpha=0.5)
111 |     ax2.set_xlabel('P-value, $p$', fontsize=20)
112 |     ax2.set_ylabel('ECDF($p$)', fontsize=20)
113 |     ax2.plot([0.05,0.05],[0,1], 'k--')
114 |     ax2.legend(loc='lower right')
115 | 
116 |     dbn1 = {}
117 |     dbn1['split'] = Psplit
118 |     dbn1['select'] = Pselect
119 |     dbn1['hypotheses'] = hypotheses
120 | 
121 |     return fig1, fig2, dbn1
122 | 
123 | # # ## Selection intervals
124 | # # 
125 | # # To create the selection intervals, it helps to have data sampled near our observation.
126 | # # We will store these, then form two intervals one based on data to the left, the other
127 | # # on data to the right. In this simple example, we just average the endpoints based on where
128 | # # the observation falls into the interval. A better way would be to pool the sufficient statistics
129 | # # and use importance weights.
130 | 
131 | # # In[ ]:
132 | 
133 | # S = {}
134 | # dfam = {}
135 | # for i in range(7):
136 | #     S[i] = sample(i, ndraw=100000)
137 | #     W = np.ones(S[i].shape)
138 | #     dfam[i] = discrete_family(S[i], W)
139 | #     dfam[i].theta = 0
140 | 
141 | 
142 | # # In[ ]:
143 | 
144 | # [dfam0.interval(3, randomize=False) for _ in range(4)]
145 | 
146 | 
147 | # # In[ ]:
148 | 
149 | # Zvals = np.linspace(0,6,101)
150 | # UMPU_intervals = []
151 | # twotailed_intervals = []
152 | # for z in Zvals:
153 | #     z1, z2 = np.floor(z), np.ceil(z)
154 | #     # weight for convex combination
155 | #     w = (z - z1)
156 |     
157 | #     u1, l1 = dfam[z1].interval(z, randomize=True, auxVar=0.5)
158 | #     u2, l2 = dfam[z2].interval(z, randomize=True, auxVar=0.5)
159 | #     u1, l1 = u1 + z1, l1 + z1
160 | #     u2, l2 = u2 + z2, l2 + z2
161 | #     u, l = ((1-w)*u1+w*u2), ((1-w)*l1+w*l2)
162 | #     UMPU_intervals.append((u,l))
163 |     
164 | #     u1, l1 = dfam[z1].equal_tailed_interval(z, auxVar=0.5)
165 | #     u2, l2 = dfam[z2].equal_tailed_interval(z, auxVar=0.5)
166 | #     u1, l1 = u1 + z1, l1 + z1
167 | #     u2, l2 = u2 + z2, l2 + z2
168 | #     u, l = ((1-w)*u1+w*u2), ((1-w)*l1+w*l2)
169 | #     twotailed_intervals.append((u,l))
170 | 
171 | 
172 | # # In[ ]:
173 | 
174 | # from scipy.stats import norm as ndist
175 | 
176 | # UMPU_intervals = np.array(UMPU_intervals)
177 | # twotailed_intervals = np.array(twotailed_intervals)
178 | # interval_fig = plt.figure(figsize=(8,8))
179 | # interval_ax = interval_fig.gca()
180 | # interval_ax.plot(Zvals, UMPU_intervals[:,0], c="green", linewidth=3, label='Reduced model UMAU')
181 | # interval_ax.plot(Zvals, UMPU_intervals[:,1], c='green', linewidth=3)
182 | # interval_ax.plot(Zvals, twotailed_intervals[:,0], c="purple", linewidth=3, label='Reduced model equal-tailed')
183 | # interval_ax.plot(Zvals, twotailed_intervals[:,1], c='purple', linewidth=3)
184 | 
185 | # interval_ax.plot(Zvals, Zvals - ndist.ppf(0.975) * np.sqrt(2), c='red', linewidth=3, label='Sample splitting')
186 | # interval_ax.plot(Zvals, Zvals + ndist.ppf(0.975) * np.sqrt(2), c='red', linewidth=3)
187 | # interval_ax.plot(Zvals, Zvals - ndist.ppf(0.975), '--', c='blue', linewidth=1, label='Nominal')
188 | # interval_ax.plot(Zvals, Zvals + ndist.ppf(0.975), '--', c='blue', linewidth=1)
189 | # interval_ax.set_xlabel(r'Observed statistic: $T_{i^*}$ for reduced, $\sqrt{2} \cdot T_{i^*,2}$ for splitting)', fontsize=20)
190 | # interval_ax.legend(loc='upper left')
191 | # interval_ax.plot([np.sqrt(2*np.log(n))]*2,[-6,10], 'k--')
192 | # interval_fig.savefig('sample_splitting_intervals.pdf')
193 | 
194 | 
195 | 
196 | def main():
197 | 
198 |     fig1, fig2, dbn = marginal(20, 3., 3, nsim=1000)
199 |     full = np.load('pval_20.npz')
200 |     Ugrid = np.linspace(0,1,101)
201 | 
202 |     ax1 = fig1.gca()
203 |     ax1.plot(Ugrid, ECDF(full['known'])(Ugrid), label=r'Selected using $i^*(Z)$', c='green', linewidth=5, alpha=0.5)
204 |     ax1.legend(loc='lower right')
205 | 
206 |     ax2 = fig2.gca()
207 |     ax2.plot(Ugrid, ECDF(full['known'][full['hypotheses']])(Ugrid), label=r'Selected using $i^*(Z)$', c='green', linewidth=5, alpha=0.5)
208 |     ax2.legend(loc='lower right')
209 | 
210 |     fig1.savefig('splitting_marginal_1sparse.pdf')
211 |     fig2.savefig('splitting_conditional_1sparse.pdf')
212 | 
213 |     #power_one = power_onesided(20, 3., 3, ndraw=4000000, burnin=100000)[0]
214 |     #power_one.savefig('splitting_onesided_power.pdf')
215 | 
216 | if __name__ == '__main__':
217 |     main()
218 | 
219 | # # ## Data for illustrative purposes
220 | # # 
221 | # # At an effect size of 3, sample splitting has power roughly 60% of rejecting (conditonal on the first position)
222 | # # being largest. Let's sample some data from this distribution to use as illustration.
223 | 
224 | # # In[ ]:
225 | 
226 | # np.random.seed(10)
227 | # con.mean = np.zeros(n+1)
228 | # snr = 3
229 | # con.mean = snr * mu_vec / np.sqrt(2)
230 | # data_selection = sample_from_constraints(con, initial, ndraw=80000, burnin=5000)[-1]
231 | # data_inference = np.dot(X.T, np.random.standard_normal(n)) + snr * np.dot(X.T, mu_vec) / np.sqrt(2)
232 | # data_fig = plt.figure(figsize=(8,8))
233 | # data_ax = data_fig.gca()
234 | # data_ax.scatter(np.arange(n), data_inference, c='b', marker='o', s=100)
235 | # data_ax.scatter(np.arange(n), data_selection, c='r', marker='+', s=100)
236 | # data_ax.plot(np.arange(n), data_inference, c='b', label=r'Inference: $Z_I$', alpha=0.5, linewidth=3)
237 | # data_ax.plot(np.arange(n), data_selection, c='r', label=r'Selection: $Z_S$', alpha=0.5, linewidth=3)
238 | # data_ax.set_xlim([-0.5,20.5])
239 | # data_ax.legend(fontsize=20)
240 | # data_ax.set_xticks([4,9,14,19])
241 | # data_ax.set_xticklabels([5,10,15,20], fontsize=20)
242 | # data_ax.set_xlabel('Index', fontsize=20)
243 | # data_ax.set_ylabel('Observed data', fontsize=20)
244 | # data_ax.plot([3,3],[-3,5], 'k--')
245 | # data_ax.set_ylim([-3,4])
246 | # data_fig.savefig('data_instance.pdf')
247 | 
248 | 
249 | # # In[ ]:
250 | 
251 | # get_ipython().magic(u'load_ext rmagic')
252 | # get_ipython().magic(u'R -i X,data_selection,data_inference')
253 | 
254 | 
255 | # # In[ ]:
256 | 
257 | # print np.linalg.norm(X[:,3])
258 | 
259 | 
260 | # # In[ ]:
261 | 
262 | # get_ipython().run_cell_magic(u'R', u'', u'Z = (data_selection + data_inference) / sqrt(2)\nclassical_model = lm(Z ~ X[,1] + X[,2] + X[,3] + X[,4] + X[,5] + X[,6] + X[,7] - 1)\nanova(classical_model)')
263 | 
264 | 
265 | # # In[ ]:
266 | 
267 | # get_ipython().run_cell_magic(u'R', u'', u"selection_model = lm(data_selection ~ X[,1] + X[,2] + X[,3] + X[,4] + X[,5] + X[,6] + X[,7] + X[,8] + X[,9] +\n                 X[,10] + X[,11] + X[,12] + X[,13] + X[,14] + X[,15] + X[,16] + X[,17] + X[,18] + X[,19] + X[,20] - 1)\nstep(lm(data_selection ~ -1), list(upper=~ X[,1] + X[,2] + X[,3] + X[,4] + X[,5] + X[,6] + X[,7] + X[,8] + X[,9] +\n                 X[,10] + X[,11] + X[,12] + X[,13] + X[,14] + X[,15] + X[,16] + X[,17] + X[,18] + X[,19] + X[,20] - 1),\n     steps=1, direction='forward')\n\ninference_model = glm(data_inference ~ X[,4] - 1)\nprint(vcov(inference_model))\nprint(summary(inference_model))\neffect = sum(X[,4]*data_inference)\nlower = effect - qnorm(0.975)\nupper = effect + qnorm(0.975)\nprint(data.frame(effect, lower, upper))")
268 | 
269 | 
270 | # # ## Non-adaptive inference
271 | 
272 | # # In[ ]:
273 | 
274 | # Zmax = np.max(np.fabs(np.dot(X.T, np.random.standard_normal((X.shape[0], 10000)))), 0)
275 | # nonadapt_fig = plt.figure(figsize=(8,8))
276 | # nonadapt_ax = nonadapt_fig.gca()
277 | # Zgrid = np.linspace(0,5,201)
278 | # from scipy.stats import norm as ndist
279 | # pmax = 2 * ndist.sf(Zmax)
280 | # nonadapt_ax.plot(grid, ECDF(pmax)(grid), linewidth=3, alpha=0.5)
281 | # nonadapt_ax.set_xlabel('P-value, $p$', fontsize=20)
282 | # nonadapt_ax.set_ylabel('ECDF($p$)', fontsize=20)
283 | # nonadapt_fig.savefig('nonadapt_pvalue.pdf')
284 | 
285 | 
286 | 


--------------------------------------------------------------------------------
/figs/convex.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
  3 | 
  4 | <svg
  5 |    xmlns:osb="http://www.openswatchbook.org/uri/2009/osb"
  6 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
  7 |    xmlns:cc="http://creativecommons.org/ns#"
  8 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  9 |    xmlns:svg="http://www.w3.org/2000/svg"
 10 |    xmlns="http://www.w3.org/2000/svg"
 11 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 12 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 13 |    width="500"
 14 |    height="510"
 15 |    id="svg2"
 16 |    version="1.1"
 17 |    inkscape:version="0.48.2 r9819"
 18 |    sodipodi:docname="convex.svg">
 19 |   <sodipodi:namedview
 20 |      id="base"
 21 |      pagecolor="#ffffff"
 22 |      bordercolor="#666666"
 23 |      borderopacity="1.0"
 24 |      inkscape:pageopacity="0.0"
 25 |      inkscape:pageshadow="2"
 26 |      inkscape:zoom="0.76779658"
 27 |      inkscape:cx="-195.96698"
 28 |      inkscape:cy="309.89677"
 29 |      inkscape:document-units="px"
 30 |      inkscape:current-layer="layer2"
 31 |      showgrid="false"
 32 |      inkscape:window-width="1280"
 33 |      inkscape:window-height="752"
 34 |      inkscape:window-x="1234"
 35 |      inkscape:window-y="119"
 36 |      inkscape:window-maximized="0" />
 37 |   <defs
 38 |      id="defs4">
 39 |     <marker
 40 |        inkscape:stockid="EmptyTriangleOutL"
 41 |        orient="auto"
 42 |        refY="0"
 43 |        refX="0"
 44 |        id="EmptyTriangleOutL"
 45 |        style="overflow:visible">
 46 |       <path
 47 |          id="path5072"
 48 |          d="m 5.77,0 -8.65,5 0,-10 8.65,5 z"
 49 |          style="fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:1pt"
 50 |          transform="matrix(0.8,0,0,0.8,-4.8,0)"
 51 |          inkscape:connector-curvature="0" />
 52 |     </marker>
 53 |     <marker
 54 |        inkscape:stockid="Tail"
 55 |        orient="auto"
 56 |        refY="0"
 57 |        refX="0"
 58 |        id="Tail"
 59 |        style="overflow:visible">
 60 |       <g
 61 |          id="g4948"
 62 |          transform="scale(-1.2,-1.2)">
 63 |         <path
 64 |            id="path4950"
 65 |            d="M -3.8048674,-3.9585227 0.54352094,0"
 66 |            style="fill:none;stroke:#000000;stroke-width:0.80000001;stroke-linecap:round"
 67 |            inkscape:connector-curvature="0" />
 68 |         <path
 69 |            id="path4952"
 70 |            d="M -1.2866832,-3.9585227 3.0617053,0"
 71 |            style="fill:none;stroke:#000000;stroke-width:0.80000001;stroke-linecap:round"
 72 |            inkscape:connector-curvature="0" />
 73 |         <path
 74 |            id="path4954"
 75 |            d="M 1.3053582,-3.9585227 5.6537466,0"
 76 |            style="fill:none;stroke:#000000;stroke-width:0.80000001;stroke-linecap:round"
 77 |            inkscape:connector-curvature="0" />
 78 |         <path
 79 |            id="path4956"
 80 |            d="M -3.8048674,4.1775838 0.54352094,0.21974226"
 81 |            style="fill:none;stroke:#000000;stroke-width:0.80000001;stroke-linecap:round"
 82 |            inkscape:connector-curvature="0" />
 83 |         <path
 84 |            id="path4958"
 85 |            d="M -1.2866832,4.1775838 3.0617053,0.21974226"
 86 |            style="fill:none;stroke:#000000;stroke-width:0.80000001;stroke-linecap:round"
 87 |            inkscape:connector-curvature="0" />
 88 |         <path
 89 |            id="path4960"
 90 |            d="M 1.3053582,4.1775838 5.6537466,0.21974226"
 91 |            style="fill:none;stroke:#000000;stroke-width:0.80000001;stroke-linecap:round"
 92 |            inkscape:connector-curvature="0" />
 93 |       </g>
 94 |     </marker>
 95 |     <marker
 96 |        inkscape:stockid="Arrow1Lend"
 97 |        orient="auto"
 98 |        refY="0"
 99 |        refX="0"
100 |        id="Arrow1Lend"
101 |        style="overflow:visible">
102 |       <path
103 |          id="path4915"
104 |          d="M 0,0 5,-5 -12.5,0 5,5 0,0 z"
105 |          style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt"
106 |          transform="matrix(-0.8,0,0,-0.8,-10,0)"
107 |          inkscape:connector-curvature="0" />
108 |     </marker>
109 |     <marker
110 |        inkscape:stockid="Arrow1Mend"
111 |        orient="auto"
112 |        refY="0"
113 |        refX="0"
114 |        id="Arrow1Mend"
115 |        style="overflow:visible">
116 |       <path
117 |          id="path4921"
118 |          d="M 0,0 5,-5 -12.5,0 5,5 0,0 z"
119 |          style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt"
120 |          transform="matrix(-0.4,0,0,-0.4,-4,0)"
121 |          inkscape:connector-curvature="0" />
122 |     </marker>
123 |     <linearGradient
124 |        id="linearGradient6482"
125 |        osb:paint="solid">
126 |       <stop
127 |          style="stop-color:#000000;stop-opacity:1;"
128 |          offset="0"
129 |          id="stop6484" />
130 |     </linearGradient>
131 |     <marker
132 |        inkscape:stockid="TriangleOutL"
133 |        orient="auto"
134 |        refY="0"
135 |        refX="0"
136 |        id="TriangleOutL"
137 |        style="overflow:visible">
138 |       <path
139 |          id="path5054"
140 |          d="m 5.77,0 -8.65,5 0,-10 8.65,5 z"
141 |          style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt"
142 |          transform="scale(0.8,0.8)"
143 |          inkscape:connector-curvature="0" />
144 |     </marker>
145 |     <marker
146 |        inkscape:stockid="Arrow2Lend"
147 |        orient="auto"
148 |        refY="0"
149 |        refX="0"
150 |        id="Arrow2Lend"
151 |        style="overflow:visible">
152 |       <path
153 |          id="path4933"
154 |          style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
155 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
156 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
157 |          inkscape:connector-curvature="0" />
158 |     </marker>
159 |     <clipPath
160 |        clipPathUnits="userSpaceOnUse"
161 |        id="clipPath11921">
162 |       <rect
163 |          style="fill:none;stroke-width:1.29999995;stroke-miterlimit:4;stroke-dasharray:5.19999981, 2.5999999, 1.29999995, 2.5999999;stroke-dashoffset:0"
164 |          id="rect11923"
165 |          width="1211.9452"
166 |          height="1236.2449"
167 |          x="-139.7513"
168 |          y="42.475098"
169 |          ry="0" />
170 |     </clipPath>
171 |     <clipPath
172 |        clipPathUnits="userSpaceOnUse"
173 |        id="clipPath11925">
174 |       <rect
175 |          style="fill:none;stroke-width:1.29999995;stroke-miterlimit:4;stroke-dasharray:5.19999981, 2.5999999, 1.29999995, 2.5999999;stroke-dashoffset:0"
176 |          id="rect11927"
177 |          width="756.52771"
178 |          height="771.69623"
179 |          x="240.39465"
180 |          y="-267.84863"
181 |          ry="0" />
182 |     </clipPath>
183 |     <clipPath
184 |        clipPathUnits="userSpaceOnUse"
185 |        id="clipPath11929">
186 |       <rect
187 |          style="fill:none;stroke-width:1.29999995;stroke-miterlimit:4;stroke-dasharray:5.19999981, 2.5999999, 1.29999995, 2.5999999;stroke-dashoffset:0"
188 |          id="rect11931"
189 |          width="1188.9218"
190 |          height="1212.7598"
191 |          x="-556.94226"
192 |          y="-293.50949"
193 |          ry="0" />
194 |     </clipPath>
195 |     <clipPath
196 |        clipPathUnits="userSpaceOnUse"
197 |        id="clipPath11933">
198 |       <rect
199 |          style="fill:none;stroke-width:1.29999995;stroke-miterlimit:4;stroke-dasharray:5.19999981, 2.5999999, 1.29999995, 2.5999999;stroke-dashoffset:0"
200 |          id="rect11935"
201 |          width="828.69135"
202 |          height="845.30676"
203 |          x="223.54489"
204 |          y="132.89755"
205 |          ry="0" />
206 |     </clipPath>
207 |     <clipPath
208 |        clipPathUnits="userSpaceOnUse"
209 |        id="clipPath11937">
210 |       <rect
211 |          style="fill:none;stroke-width:1.29999995;stroke-miterlimit:4;stroke-dasharray:5.19999981, 2.5999999, 1.29999995, 2.5999999;stroke-dashoffset:0"
212 |          id="rect11939"
213 |          width="828.69135"
214 |          height="845.30676"
215 |          x="-437.22287"
216 |          y="124.94621"
217 |          ry="0" />
218 |     </clipPath>
219 |     <clipPath
220 |        clipPathUnits="userSpaceOnUse"
221 |        id="clipPath11941">
222 |       <rect
223 |          style="fill:none;stroke-width:1.29999995;stroke-miterlimit:4;stroke-dasharray:5.19999981, 2.5999999, 1.29999995, 2.5999999;stroke-dashoffset:0"
224 |          id="rect11943"
225 |          width="704.82233"
226 |          height="718.9541"
227 |          x="-207.0757"
228 |          y="177.05516"
229 |          ry="0" />
230 |     </clipPath>
231 |     <clipPath
232 |        clipPathUnits="userSpaceOnUse"
233 |        id="clipPath11945">
234 |       <rect
235 |          style="fill:none;stroke-width:1.29999995;stroke-miterlimit:4;stroke-dasharray:5.19999981, 2.5999999, 1.29999995, 2.5999999;stroke-dashoffset:0"
236 |          id="rect11947"
237 |          width="519.66888"
238 |          height="530.08832"
239 |          x="104.19427"
240 |          y="139.35994"
241 |          ry="0" />
242 |     </clipPath>
243 |     <clipPath
244 |        clipPathUnits="userSpaceOnUse"
245 |        id="clipPath11949">
246 |       <rect
247 |          style="fill:none;stroke-width:1.29999995;stroke-miterlimit:4;stroke-dasharray:5.19999981, 2.5999999, 1.29999995, 2.5999999;stroke-dashoffset:0"
248 |          id="rect11951"
249 |          width="855.70343"
250 |          height="872.86041"
251 |          x="-242.4808"
252 |          y="-45.96669"
253 |          ry="0" />
254 |     </clipPath>
255 |     <clipPath
256 |        clipPathUnits="userSpaceOnUse"
257 |        id="clipPath11953">
258 |       <rect
259 |          style="fill:none;stroke-width:1.29999995;stroke-miterlimit:4;stroke-dasharray:5.19999981, 2.5999999, 1.29999995, 2.5999999;stroke-dashoffset:0"
260 |          id="rect11955"
261 |          width="519.66888"
262 |          height="530.08832"
263 |          x="104.19427"
264 |          y="139.35994"
265 |          ry="0" />
266 |     </clipPath>
267 |     <clipPath
268 |        clipPathUnits="userSpaceOnUse"
269 |        id="clipPath11957">
270 |       <rect
271 |          style="fill:none;stroke-width:1.29999995;stroke-miterlimit:4;stroke-dasharray:5.19999981, 2.5999999, 1.29999995, 2.5999999;stroke-dashoffset:0"
272 |          id="rect11959"
273 |          width="519.66888"
274 |          height="530.08832"
275 |          x="104.19427"
276 |          y="139.35994"
277 |          ry="0" />
278 |     </clipPath>
279 |     <clipPath
280 |        clipPathUnits="userSpaceOnUse"
281 |        id="clipPath11961">
282 |       <rect
283 |          style="fill:none;stroke-width:1.29999995;stroke-miterlimit:4;stroke-dasharray:5.19999981, 2.5999999, 1.29999995, 2.5999999;stroke-dashoffset:0"
284 |          id="rect11963"
285 |          width="519.66888"
286 |          height="530.08832"
287 |          x="104.19427"
288 |          y="139.35994"
289 |          ry="0" />
290 |     </clipPath>
291 |     <clipPath
292 |        clipPathUnits="userSpaceOnUse"
293 |        id="clipPath11965">
294 |       <rect
295 |          style="fill:none;stroke-width:1.29999995;stroke-miterlimit:4;stroke-dasharray:5.19999981, 2.5999999, 1.29999995, 2.5999999;stroke-dashoffset:0"
296 |          id="rect11967"
297 |          width="519.66888"
298 |          height="530.08832"
299 |          x="104.19427"
300 |          y="139.35994"
301 |          ry="0" />
302 |     </clipPath>
303 |     <clipPath
304 |        clipPathUnits="userSpaceOnUse"
305 |        id="clipPath11969">
306 |       <rect
307 |          style="fill:none;stroke-width:1.29999995;stroke-miterlimit:4;stroke-dasharray:5.19999981, 2.5999999, 1.29999995, 2.5999999;stroke-dashoffset:0"
308 |          id="rect11971"
309 |          width="519.66888"
310 |          height="530.08832"
311 |          x="104.19427"
312 |          y="139.35994"
313 |          ry="0" />
314 |     </clipPath>
315 |     <clipPath
316 |        clipPathUnits="userSpaceOnUse"
317 |        id="clipPath11973">
318 |       <rect
319 |          style="fill:none;stroke-width:1.29999995;stroke-miterlimit:4;stroke-dasharray:5.19999981, 2.5999999, 1.29999995, 2.5999999;stroke-dashoffset:0"
320 |          id="rect11975"
321 |          width="519.66888"
322 |          height="530.08832"
323 |          x="104.19427"
324 |          y="139.35994"
325 |          ry="0" />
326 |     </clipPath>
327 |   </defs>
328 |   <metadata
329 |      id="metadata7">
330 |     <rdf:RDF>
331 |       <cc:Work
332 |          rdf:about="">
333 |         <dc:format>image/svg+xml</dc:format>
334 |         <dc:type
335 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
336 |         <dc:title />
337 |       </cc:Work>
338 |     </rdf:RDF>
339 |   </metadata>
340 |   <g
341 |      inkscape:groupmode="layer"
342 |      id="layer2"
343 |      inkscape:label="Layer">
344 |     <path
345 |        style="fill:#f4e918;fill-opacity:1;stroke:#000000;stroke-width:1.27788507999999990;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
346 |        inkscape:transform-center-x="-1.4995169"
347 |        inkscape:transform-center-y="-6.6647583"
348 |        d="M 376.30521,303.65122 C 324.75688,395.44883 256.11165,392.73078 160.49186,345.53911 64.872085,298.34743 75.273909,298.80155 53.964813,199.36103 31.298337,93.586296 50.121922,86.644632 69.97267,70.704897 158.89535,-0.69814741 136.90533,0.01495671 239.88176,24.908451 342.8582,49.801929 333.73299,61.759939 379.01184,146.63402 c 45.27885,84.87411 48.84171,65.21957 -2.70663,157.0172 z"
349 |        id="path4374"
350 |        inkscape:connector-curvature="0"
351 |        sodipodi:nodetypes="sssssss" />
352 |     <text
353 |        xml:space="preserve"
354 |        style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
355 |        x="70.331123"
356 |        y="440.97128"
357 |        id="text4333"
358 |        sodipodi:linespacing="125%"><tspan
359 |          sodipodi:role="line"
360 |          id="tspan4335"
361 |          x="70.331123"
362 |          y="440.97128">EVENT</tspan></text>
363 |     <text
364 |        sodipodi:linespacing="125%"
365 |        id="text4337"
366 |        y="440.97128"
367 |        x="70.331123"
368 |        style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
369 |        xml:space="preserve"><tspan
370 |          y="440.97128"
371 |          x="70.331123"
372 |          id="tspan4339"
373 |          sodipodi:role="line" /></text>
374 |   </g>
375 |   <g
376 |      id="layer1"
377 |      inkscape:groupmode="layer"
378 |      inkscape:label="Layer 1"
379 |      transform="translate(0,-542.36218)">
380 |     <path
381 |        style="fill:none;stroke:#000000;stroke-width:1.60000002;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
382 |        d="m 335.24791,397.93361 107.94346,0"
383 |        id="path5898"
384 |        inkscape:connector-curvature="0"
385 |        clip-path="url(#clipPath11969)"
386 |        transform="matrix(0.9517974,0,0,0.98220874,-141.38601,420.56248)" />
387 |     <path
388 |        inkscape:connector-curvature="0"
389 |        id="path6488"
390 |        d="M 336.1513,398.30454 489.67829,290.5873"
391 |        style="fill:none;stroke:#000000;stroke-width:1.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
392 |        clip-path="url(#clipPath11965)"
393 |        transform="matrix(0.9517974,0,0,0.98220874,-141.38601,420.56248)" />
394 |     <rect
395 |        style="fill:none;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:3, 3;stroke-dashoffset:0"
396 |        id="rect6496"
397 |        width="17"
398 |        height="15"
399 |        x="519"
400 |        y="384.36218"
401 |        clip-path="url(#clipPath11961)"
402 |        transform="matrix(0.9517974,0,0,0.98220874,-141.38601,420.56248)" />
403 |     <path
404 |        style="fill:none;stroke:#000000;stroke-width:1.29999995;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:5.2, 5.2;stroke-dashoffset:0;marker-end:url(#Arrow1Mend)"
405 |        d="m 336.23799,397.37053 146.52402,0.98315"
406 |        id="path6686"
407 |        inkscape:connector-curvature="0"
408 |        clip-path="url(#clipPath11957)"
409 |        transform="matrix(0.9517974,0,0,0.98220874,-141.38601,420.56248)" />
410 |     <path
411 |        inkscape:connector-curvature="0"
412 |        id="path8632"
413 |        d="m 200.14782,288.51767 356.70436,0.68793"
414 |        style="fill:none;stroke:#000000;stroke-width:1.29999995;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:5.19999981, 2.5999999, 1.29999995, 2.5999999;stroke-dashoffset:0;marker-end:none"
415 |        clip-path="url(#clipPath11953)"
416 |        transform="matrix(0.9517974,0,0,0.98220874,-141.38601,420.56248)" />
417 |     <g
418 |        transform="matrix(0.57802677,0,0,0.59649558,97.946207,584.86196)"
419 |        id="g10448"
420 |        clip-path="url(#clipPath11949)">
421 |       <g
422 |          style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;letter-spacing:normal;word-spacing:normal;text-anchor:start;fill:none;stroke:#000000;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:10.43299961;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
423 |          id="content"
424 |          transform="matrix(1.0629921,0,0,-1.0629921,-186.02362,789.27165)"
425 |          xml:space="preserve"
426 |          stroke-miterlimit="10.433"
427 |          font-style="normal"
428 |          font-variant="normal"
429 |          font-weight="normal"
430 |          font-stretch="normal"
431 |          font-size-adjust="none"
432 |          letter-spacing="normal"
433 |          word-spacing="normal"><path
434 |            style="fill:#000000;stroke-width:0"
435 |            inkscape:connector-curvature="0"
436 |            d="m 480.31,419.8 0.04,0.16 0.05,0.16 0.04,0.16 0.04,0.16 0.03,0.15 0.03,0.16 0.03,0.16 0.03,0.17 0.03,0.16 0.02,0.18 0.01,0.09 0.01,0.09 0,0.1 0.01,0.1 0.01,0.1 0,0.1 0.01,0.11 0,0.1 0,0.12 0.01,0.11 0,0.12 0,0.13 c 0,3.24 -2,5.33 -5.73,5.33 -3.94,0 -6.53,-2.59 -7.72,-4.34 -0.25,2.84 -2.34,4.34 -4.58,4.34 -2.3,0 -3.24,-1.95 -3.69,-2.84 -0.9,-1.7 -1.54,-4.68 -1.54,-4.83 0,-0.5 0.49,-0.5 0.59,-0.5 0.5,0 0.55,0.05 0.85,1.14 0.84,3.54 1.84,5.93 3.64,5.93 0.84,0 1.54,-0.4 1.54,-2.29 0,-1.04 -0.15,-1.59 -0.8,-4.18 l -2.89,-11.51 c -0.15,-0.75 -0.45,-1.89 -0.45,-2.14 0,-0.9 0.7,-1.35 1.45,-1.35 0.6,0 1.5,0.4 1.84,1.4 0.05,0.1 0.65,2.44 0.95,3.68 l 1.1,4.49 c 0.29,1.09 0.59,2.19 0.84,3.34 0.1,0.29 0.5,1.94 0.55,2.23 0.15,0.45 1.7,3.24 3.39,4.59 1.09,0.8 2.64,1.74 4.78,1.74 2.14,0 2.69,-1.69 2.69,-3.49 0,-0.25 0,-1.14 -0.5,-3.13 l -5.68,-22.92 c -0.15,-0.59 -0.15,-0.7 -0.15,-0.79 0,-0.75 0.55,-1.35 1.4,-1.35 1.54,0 1.89,1.44 2.04,1.99 z"
437 |            id="path10451" /></g>    </g>
438 |     <flowRoot
439 |        style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
440 |        id="flowRoot10486"
441 |        xml:space="preserve"><flowRegion
442 |          id="flowRegion10488"><rect
443 |            y="350.36218"
444 |            x="311"
445 |            height="44"
446 |            width="51"
447 |            id="rect10490" /></flowRegion><flowPara
448 |          id="flowPara10492" /></flowRoot>    <path
449 |        inkscape:connector-curvature="0"
450 |        id="path10692"
451 |        d="m 335.0058,393.61086 1.0247,-102.0752"
452 |        style="fill:none;stroke:#000000;stroke-width:1.29999995;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:5.2, 5.2;stroke-dashoffset:0;marker-end:url(#Arrow1Mend)"
453 |        clip-path="url(#clipPath11945)"
454 |        transform="matrix(0.9517974,0,0,0.98220874,-141.38601,420.56248)" />
455 |     <g
456 |        transform="matrix(0.70176477,0,0,0.7241872,103.10425,429.22196)"
457 |        id="g11019"
458 |        clip-path="url(#clipPath11941)">
459 |       <g
460 |          style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;letter-spacing:normal;word-spacing:normal;text-anchor:start;fill:none;stroke:#000000;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:10.43299961;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
461 |          id="g11021"
462 |          transform="matrix(1.0629921,0,0,-1.0629921,-186.02362,789.27165)"
463 |          xml:space="preserve"
464 |          stroke-miterlimit="10.433"
465 |          font-style="normal"
466 |          font-variant="normal"
467 |          font-weight="normal"
468 |          font-stretch="normal"
469 |          font-size-adjust="none"
470 |          letter-spacing="normal"
471 |          word-spacing="normal"><path
472 |            style="fill:#000000;stroke-width:0"
473 |            inkscape:connector-curvature="0"
474 |            d="m 480.21,424.98 0.01,0.03 0.01,0.03 0.01,0.03 0.01,0.03 0.01,0.06 0.02,0.05 0.01,0.06 0.01,0.04 0.02,0.05 0.01,0.04 0.01,0.04 0.01,0.04 0,0.04 0.01,0.04 0.01,0.03 0,0.03 0.01,0.03 0.01,0.03 0,0.03 0,0.03 0.01,0.03 0,0.02 0,0.06 0.01,0.05 0,0.05 0,0.05 0,0.03 0,0.03 0,0.03 0,0.03 c 0,0.9 -0.7,1.35 -1.45,1.35 -0.5,0 -1.29,-0.3 -1.74,-1.05 -0.1,-0.24 -0.5,-1.79 -0.7,-2.69 -0.34,-1.29 -0.7,-2.64 -0.99,-3.98 l -2.24,-8.97 c -0.2,-0.74 -2.34,-4.23 -5.63,-4.23 -2.54,0 -3.09,2.19 -3.09,4.03 0,2.3 0.84,5.38 2.54,9.77 0.8,2.04 1,2.59 1,3.58 0,2.25 -1.6,4.09 -4.09,4.09 -4.73,0 -6.57,-7.23 -6.57,-7.67 0,-0.5 0.49,-0.5 0.59,-0.5 0.5,0 0.55,0.1 0.8,0.9 1.34,4.68 3.34,6.17 5.03,6.17 0.4,0 1.25,0 1.25,-1.59 0,-1.25 -0.5,-2.54 -0.85,-3.49 -1.99,-5.28 -2.89,-8.12 -2.89,-10.46 0,-4.43 3.14,-5.93 6.08,-5.93 1.94,0 3.63,0.85 5.03,2.25 -0.65,-2.6 -1.25,-5.04 -3.24,-7.68 -1.29,-1.69 -3.19,-3.14 -5.48,-3.14 -0.7,0 -2.94,0.16 -3.79,2.1 0.8,0 1.45,0 2.15,0.59 0.49,0.45 0.99,1.1 0.99,2.05 0,1.54 -1.34,1.74 -1.84,1.74 -1.15,0 -2.79,-0.8 -2.79,-3.24 0,-2.49 2.19,-4.33 5.28,-4.33 5.13,0 10.26,4.53 11.66,10.16 z"
475 |            id="path11023" /></g>    </g>
476 |     <g
477 |        id="g11110"
478 |        transform="matrix(0.59686816,0,0,0.61593898,218.75023,480.48379)"
479 |        clip-path="url(#clipPath11937)">
480 |       <g
481 |          word-spacing="normal"
482 |          letter-spacing="normal"
483 |          font-size-adjust="none"
484 |          font-stretch="normal"
485 |          font-weight="normal"
486 |          font-variant="normal"
487 |          font-style="normal"
488 |          stroke-miterlimit="10.433"
489 |          xml:space="preserve"
490 |          transform="matrix(1.0629921,0,0,-1.0629921,-186.02362,789.27165)"
491 |          id="g11112"
492 |          style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;letter-spacing:normal;word-spacing:normal;text-anchor:start;fill:none;stroke:#000000;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:10.43299961;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"><path
493 |            id="path11114"
494 |            d="m 469.45,410.29 0.07,0.56 0.06,0.56 0.06,0.55 0.06,0.54 0.05,0.53 0.04,0.51 0.05,0.51 0.04,0.49 0.04,0.48 0.03,0.46 0.03,0.45 0.03,0.44 0.03,0.43 0.02,0.41 0.02,0.39 0.02,0.38 0.02,0.36 0.01,0.35 0.01,0.33 0.02,0.32 0,0.29 0.01,0.28 0.01,0.26 0,0.24 0.01,0.23 0,0.2 0,0.19 0,0.16 0,0.14 0,0.13 0,0.1 0.01,0.08 c 0,3.54 0,18.38 -9.07,18.38 -1.69,0 -3.43,-1.39 -3.43,-2.09 0,-0.35 0.24,-0.35 0.94,-0.4 7.27,-0.65 7.87,-12.05 7.87,-17.68 0,-5.03 -0.65,-11.01 -1.24,-14.35 -0.11,-0.6 -0.11,-0.7 -0.11,-0.89 0,-0.25 0,-0.85 0.36,-0.85 0.54,0 12.6,9.21 18.92,18.83 2.54,3.88 4.44,8.32 4.44,11.65 0,4.24 -2.79,5.78 -4.84,5.78 -1.44,0 -2.14,-3.14 -2.14,-3.78 0,-0.75 0.25,-0.75 0.85,-0.8 3.34,-0.4 3.93,-3.04 3.93,-4.23 0,-4.04 -8.66,-14.25 -17.23,-20.92 z"
495 |            inkscape:connector-curvature="0"
496 |            style="fill:#000000;stroke-width:0" /><path
497 |            id="path11116"
498 |            d="m 506.14,431.88 h 10.67 c 0.46,0 1.29,0 1.29,0.84 0,0.87 -0.8,0.87 -1.29,0.87 h -10.67 v 10.7 c 0,0.46 0,1.29 -0.83,1.29 -0.87,0 -0.87,-0.8 -0.87,-1.29 v -10.7 h -10.71 c -0.45,0 -1.29,0 -1.29,-0.84 0,-0.87 0.8,-0.87 1.29,-0.87 h 10.71 v -10.7 c 0,-0.46 0,-1.29 0.83,-1.29 0.87,0 0.87,0.8 0.87,1.29 z"
499 |            inkscape:connector-curvature="0"
500 |            style="fill:#000000;stroke-width:0" /></g>    </g>
501 |     <g
502 |        transform="matrix(0.59686816,0,0,0.61593898,-175.641,475.58625)"
503 |        id="g11162"
504 |        clip-path="url(#clipPath11933)">
505 |       <g
506 |          transform="translate(7.3430237,2.9372095)"
507 |          id="g11217">
508 |         <g
509 |            style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;letter-spacing:normal;word-spacing:normal;text-anchor:start;fill:none;stroke:#000000;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:10.43299961;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
510 |            id="g11219"
511 |            transform="matrix(1.0629921,0,0,-1.0629921,-186.02362,789.27165)"
512 |            xml:space="preserve"
513 |            stroke-miterlimit="10.433"
514 |            font-style="normal"
515 |            font-variant="normal"
516 |            font-weight="normal"
517 |            font-stretch="normal"
518 |            font-size-adjust="none"
519 |            letter-spacing="normal"
520 |            word-spacing="normal"><path
521 |              style="fill:#000000;stroke-width:0"
522 |              inkscape:connector-curvature="0"
523 |              d="m 469.45,410.29 0.07,0.56 0.06,0.56 0.06,0.55 0.06,0.54 0.05,0.53 0.04,0.51 0.05,0.51 0.04,0.49 0.04,0.48 0.03,0.46 0.03,0.45 0.03,0.44 0.03,0.43 0.02,0.41 0.02,0.39 0.02,0.38 0.02,0.36 0.01,0.35 0.01,0.33 0.02,0.32 0,0.29 0.01,0.28 0.01,0.26 0,0.24 0.01,0.23 0,0.2 0,0.19 0,0.16 0,0.14 0,0.13 0,0.1 0.01,0.08 c 0,3.54 0,18.38 -9.07,18.38 -1.69,0 -3.43,-1.39 -3.43,-2.09 0,-0.35 0.24,-0.35 0.94,-0.4 7.27,-0.65 7.87,-12.05 7.87,-17.68 0,-5.03 -0.65,-11.01 -1.24,-14.35 -0.11,-0.6 -0.11,-0.7 -0.11,-0.89 0,-0.25 0,-0.85 0.36,-0.85 0.54,0 12.6,9.21 18.92,18.83 2.54,3.88 4.44,8.32 4.44,11.65 0,4.24 -2.79,5.78 -4.84,5.78 -1.44,0 -2.14,-3.14 -2.14,-3.78 0,-0.75 0.25,-0.75 0.85,-0.8 3.34,-0.4 3.93,-3.04 3.93,-4.23 0,-4.04 -8.66,-14.25 -17.23,-20.92 z"
524 |              id="path11221" /><path
525 |              style="fill:#000000;stroke-width:0"
526 |              inkscape:connector-curvature="0"
527 |              d="m 515.94,431.88 0.05,0 0.06,0 0.05,0 0.06,0 0.12,0 0.06,0.01 0.05,0 0.06,0.01 0.06,0 0.06,0.01 0.06,0.01 0.05,0.02 0.06,0.01 0.06,0.02 0.05,0.01 0.05,0.02 0.05,0.03 0.05,0.02 0.04,0.03 0.04,0.04 0.05,0.03 0.03,0.04 0.02,0.02 0.02,0.02 0.01,0.03 0.02,0.02 0.01,0.02 0.02,0.03 0.01,0.03 0.01,0.02 0.01,0.03 0.01,0.03 0.01,0.03 0,0.03 0.01,0.04 0.01,0.03 0,0.04 0,0.03 0.01,0.04 0,0.04 c 0,0.87 -0.81,0.87 -1.4,0.87 h -20.78 c -0.56,0 -1.39,0 -1.39,-0.84 0,-0.87 0.8,-0.87 1.39,-0.87 z"
528 |              id="path11223" /></g>      </g>
529 |     </g>
530 |     <g
531 |        id="g11325"
532 |        transform="matrix(0.41602358,0,0,0.42931615,189.48693,683.4514)"
533 |        clip-path="url(#clipPath11929)">
534 |       <g
535 |          word-spacing="normal"
536 |          letter-spacing="normal"
537 |          font-size-adjust="none"
538 |          font-stretch="normal"
539 |          font-weight="normal"
540 |          font-variant="normal"
541 |          font-style="normal"
542 |          stroke-miterlimit="10.433"
543 |          xml:space="preserve"
544 |          transform="matrix(1.0629921,0,0,-1.0629921,-186.02362,789.27165)"
545 |          id="g11327"
546 |          style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;letter-spacing:normal;word-spacing:normal;text-anchor:start;fill:none;stroke:#000000;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:10.43299961;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"><path
547 |            id="path11329"
548 |            d="m 480.31,419.8 0.04,0.16 0.05,0.16 0.04,0.16 0.04,0.16 0.03,0.15 0.03,0.16 0.03,0.16 0.03,0.17 0.03,0.16 0.02,0.18 0.01,0.09 0.01,0.09 0,0.1 0.01,0.1 0.01,0.1 0,0.1 0.01,0.11 0,0.1 0,0.12 0.01,0.11 0,0.12 0,0.13 c 0,3.24 -2,5.33 -5.73,5.33 -3.94,0 -6.53,-2.59 -7.72,-4.34 -0.25,2.84 -2.34,4.34 -4.58,4.34 -2.3,0 -3.24,-1.95 -3.69,-2.84 -0.9,-1.7 -1.54,-4.68 -1.54,-4.83 0,-0.5 0.49,-0.5 0.59,-0.5 0.5,0 0.55,0.05 0.85,1.14 0.84,3.54 1.84,5.93 3.64,5.93 0.84,0 1.54,-0.4 1.54,-2.29 0,-1.04 -0.15,-1.59 -0.8,-4.18 l -2.89,-11.51 c -0.15,-0.75 -0.45,-1.89 -0.45,-2.14 0,-0.9 0.7,-1.35 1.45,-1.35 0.6,0 1.5,0.4 1.84,1.4 0.05,0.1 0.65,2.44 0.95,3.68 l 1.1,4.49 c 0.29,1.09 0.59,2.19 0.84,3.34 0.1,0.29 0.5,1.94 0.55,2.23 0.15,0.45 1.7,3.24 3.39,4.59 1.09,0.8 2.64,1.74 4.78,1.74 2.14,0 2.69,-1.69 2.69,-3.49 0,-0.25 0,-1.14 -0.5,-3.13 l -5.68,-22.92 c -0.15,-0.59 -0.15,-0.7 -0.15,-0.79 0,-0.75 0.55,-1.35 1.4,-1.35 1.54,0 1.89,1.44 2.04,1.99 z"
549 |            inkscape:connector-curvature="0"
550 |            style="fill:#000000;stroke-width:0" /><path
551 |            id="path11331"
552 |            d="m 498.95,445.1 0.01,0.04 0.01,0.05 0.01,0.04 0.01,0.05 0.01,0.04 0.01,0.04 0.01,0.04 0.01,0.04 0.02,0.07 0.02,0.07 0.02,0.06 0.02,0.06 0.02,0.05 0.02,0.05 0.02,0.05 0.02,0.05 0.03,0.04 0.02,0.03 0.03,0.04 0.02,0.03 0.03,0.03 0.03,0.03 0.03,0.02 0.03,0.02 0.04,0.02 0.04,0.02 0.04,0.01 0.04,0.02 0.05,0.01 0.04,0.01 0.06,0.01 0.05,0.01 0.06,0.01 0.06,0.01 0.07,0 0.07,0.01 c 0.17,0.04 1.39,0.04 2.09,0.04 2.13,0 3.03,0 3.9,-0.28 1.57,-0.49 1.64,-1.5 1.64,-2.76 0,-0.55 0,-1.01 -0.24,-2.82 l -0.07,-0.39 c 0,-0.34 0.24,-0.52 0.59,-0.52 0.52,0 0.6,0.32 0.67,0.87 l 0.94,6.66 c 0,0.49 -0.42,0.49 -1.05,0.49 h -21.45 c -0.87,0 -0.94,0 -1.18,-0.73 l -2.2,-6.17 c -0.03,-0.14 -0.17,-0.46 -0.17,-0.63 0,-0.14 0.07,-0.49 0.59,-0.49 0.46,0 0.52,0.18 0.73,0.84 2.03,5.58 3.18,5.93 8.48,5.93 h 1.46 c 1.05,0 1.08,-0.04 1.08,-0.35 0,-0.04 0,-0.21 -0.14,-0.74 l -4.6,-18.34 c -0.31,-1.28 -0.42,-1.64 -4.08,-1.64 -1.25,0 -1.57,0 -1.57,-0.76 0,-0.11 0.07,-0.49 0.59,-0.49 h 12.07 c 0.28,0 0.8,0 0.8,0.77 0,0.48 -0.35,0.48 -1.43,0.48 -0.66,0 -1.36,0.04 -2.02,0.07 -1.19,0.11 -1.26,0.25 -1.26,0.67 0,0.24 0,0.31 0.14,0.8 z"
553 |            inkscape:connector-curvature="0"
554 |            style="fill:#000000;stroke-width:0" /><path
555 |            id="path11333"
556 |            d="m 537.21,424.98 0.01,0.03 0.01,0.03 0.01,0.03 0.01,0.03 0.01,0.06 0.02,0.05 0.01,0.06 0.01,0.04 0.02,0.05 0.01,0.04 0.01,0.04 0.01,0.04 0,0.04 0.01,0.04 0.01,0.03 0,0.03 0.01,0.03 0.01,0.03 0,0.03 0,0.03 0.01,0.03 0,0.02 0,0.06 0.01,0.05 0,0.05 0,0.05 0,0.03 0,0.03 0,0.03 0,0.03 c 0,0.9 -0.7,1.35 -1.44,1.35 -0.5,0 -1.3,-0.3 -1.75,-1.05 -0.1,-0.24 -0.5,-1.79 -0.7,-2.69 -0.34,-1.29 -0.7,-2.64 -0.99,-3.98 l -2.25,-8.97 c -0.19,-0.74 -2.33,-4.23 -5.62,-4.23 -2.54,0 -3.09,2.19 -3.09,4.03 0,2.3 0.84,5.38 2.54,9.77 0.8,2.04 0.99,2.59 0.99,3.58 0,2.25 -1.59,4.09 -4.08,4.09 -4.73,0 -6.57,-7.23 -6.57,-7.67 0,-0.5 0.49,-0.5 0.59,-0.5 0.5,0 0.55,0.1 0.8,0.9 1.34,4.68 3.34,6.17 5.03,6.17 0.4,0 1.25,0 1.25,-1.59 0,-1.25 -0.5,-2.54 -0.85,-3.49 -1.99,-5.28 -2.89,-8.12 -2.89,-10.46 0,-4.43 3.14,-5.93 6.08,-5.93 1.94,0 3.63,0.85 5.03,2.25 -0.65,-2.6 -1.25,-5.04 -3.24,-7.68 -1.29,-1.69 -3.19,-3.14 -5.48,-3.14 -0.7,0 -2.94,0.16 -3.79,2.1 0.8,0 1.45,0 2.15,0.59 0.49,0.45 0.99,1.1 0.99,2.05 0,1.54 -1.34,1.74 -1.84,1.74 -1.15,0 -2.79,-0.8 -2.79,-3.24 0,-2.49 2.19,-4.33 5.28,-4.33 5.13,0 10.26,4.53 11.66,10.16 z"
557 |            inkscape:connector-curvature="0"
558 |            style="fill:#000000;stroke-width:0" /></g>    </g>
559 |     <g
560 |        id="g11591"
561 |        transform="matrix(0.40812035,0,0,0.4211604,14.82116,539.5542)"
562 |        clip-path="url(#clipPath11921)">
563 |       <g
564 |          word-spacing="normal"
565 |          letter-spacing="normal"
566 |          font-size-adjust="none"
567 |          font-stretch="normal"
568 |          font-weight="normal"
569 |          font-variant="normal"
570 |          font-style="normal"
571 |          stroke-miterlimit="10.433"
572 |          xml:space="preserve"
573 |          transform="matrix(1.0629921,0,0,-1.0629921,-186.02362,789.27165)"
574 |          id="g11593"
575 |          style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;letter-spacing:normal;word-spacing:normal;text-anchor:start;fill:none;stroke:#000000;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:10.43299961;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"><path
576 |            id="path11595"
577 |            d="m 471.09,421.74 0.2,1.3 3.39,13.55 c 0.45,1.74 0.55,1.89 2.69,1.89 h 4.78 c 4.14,0 6.78,-1.35 6.78,-4.78 0,-1.95 -1,-6.23 -2.94,-8.02 -2.49,-2.25 -5.48,-2.64 -7.67,-2.64 h -7.03 l -0.2,-1.3 h 8.47 c 7.08,0 14,5.18 14,10.76 0,3.84 -3.29,7.52 -9.81,7.52 h -16.14 c -0.95,0 -1.5,0 -1.5,-0.95 0,-0.59 0.45,-0.59 1.45,-0.59 0.64,0 1.54,-0.05 2.14,-0.1 0.8,-0.1 1.09,-0.25 1.09,-0.8 0,-0.2 -0.04,-0.35 -0.2,-0.94 l -6.67,-26.75 c -0.5,-1.95 -0.6,-2.35 -4.53,-2.35 -0.85,0 -1.4,0 -1.4,-0.94 0,-0.6 0.6,-0.6 0.75,-0.6 h 12.75 c 0.35,0 1,0 1,1 0,0.54 -0.45,0.54 -1.4,0.54 -1.84,0 -3.23,0 -3.23,0.9 0,0.3 0.1,0.55 0.14,0.85 z"
578 |            inkscape:connector-curvature="0"
579 |            style="fill:#000000;stroke-width:0" /><path
580 |            id="path11597"
581 |            d="m 507.49,405.48 0.02,0.09 0.03,0.09 0.01,0.08 0.02,0.07 0.02,0.08 0.02,0.07 0.01,0.06 0.02,0.06 0.01,0.06 0.01,0.06 0.02,0.06 0.01,0.05 0.01,0.05 0.01,0.05 0.01,0.1 0.02,0.1 0.01,0.09 0,0.1 0.01,0.05 0,0.05 0,0.06 0.01,0.05 0,0.06 0,0.06 0,0.06 0,0.07 0,0.07 0,0.07 c 0,0.56 0,2.03 -1.29,3.04 -0.59,0.45 -1.6,0.94 -3.24,0.94 -1.47,0 -3.63,-0.39 -5.9,-3.14 -0.24,2.62 -2.61,3.14 -3.69,3.14 -1.36,0 -2.2,-0.91 -2.76,-1.85 -0.69,-1.19 -1.25,-3.28 -1.25,-3.49 0,-0.45 0.49,-0.45 0.59,-0.45 0.49,0 0.52,0.11 0.77,1.05 0.52,2.02 1.18,3.76 2.54,3.76 0.91,0 1.16,-0.77 1.16,-1.71 0,-0.66 -0.32,-1.95 -0.56,-2.89 -0.25,-0.94 -0.6,-2.37 -0.77,-3.14 l -1.12,-4.46 c -0.14,-0.46 -0.34,-1.33 -0.34,-1.43 0,-0.77 0.62,-1.12 1.18,-1.12 0.63,0 1.19,0.46 1.36,0.77 0.17,0.31 0.45,1.43 0.63,2.16 0.17,0.66 0.56,2.27 0.76,3.14 0.21,0.76 0.43,1.53 0.6,2.33 0.38,1.5 0.38,1.57 1.08,2.65 1.12,1.71 2.86,3.7 5.58,3.7 1.95,0 2.06,-1.6 2.06,-2.44 0,-0.94 -0.07,-1.26 -0.25,-1.92 l -3.94,-15.65 c -0.14,-0.53 -0.14,-0.7 -0.14,-0.74 0,-0.76 0.63,-1.11 1.19,-1.11 1.25,0 1.57,1.18 1.67,1.67 z"
582 |            inkscape:connector-curvature="0"
583 |            style="fill:#000000;stroke-width:0" /><path
584 |            id="path11599"
585 |            d="m 523.2,422.21 -0.01,0.04 0,0.04 0,0.04 0,0.05 0,0.04 0,0.04 0,0.05 -0.01,0.04 0,0.04 -0.01,0.05 0,0.04 -0.01,0.05 -0.01,0.04 -0.01,0.05 -0.02,0.04 -0.01,0.04 -0.02,0.04 -0.02,0.04 -0.02,0.04 -0.02,0.03 -0.03,0.04 -0.03,0.03 -0.03,0.03 -0.02,0.01 -0.02,0.01 -0.02,0.02 -0.02,0.01 -0.02,0.01 -0.02,0.01 -0.02,0.01 -0.02,0.01 -0.03,0.01 -0.02,0.01 -0.03,0 -0.02,0.01 -0.03,0 -0.03,0.01 -0.03,0 -0.03,0 -0.03,0 -0.03,0.01 c -0.7,0 -0.7,-0.7 -0.7,-1.08 v -14.82 h -8.62 c -0.37,0 -1.07,0 -1.07,-0.69 0,-0.7 0.7,-0.7 1.07,-0.7 h 18.63 c 0.41,0 1.08,0 1.08,0.7 0,0.69 -0.67,0.69 -1.08,0.69 h -8.61 z"
586 |            inkscape:connector-curvature="0"
587 |            style="fill:#000000;stroke-width:0" /><path
588 |            id="path11601"
589 |            d="m 565.21,424.98 0.01,0.03 0.01,0.03 0.01,0.03 0.01,0.03 0.01,0.06 0.02,0.05 0.01,0.06 0.01,0.04 0.02,0.05 0.01,0.04 0.01,0.04 0.01,0.04 0,0.04 0.01,0.04 0.01,0.03 0,0.03 0.01,0.03 0.01,0.03 0,0.03 0,0.03 0.01,0.03 0,0.02 0,0.06 0.01,0.05 0,0.05 0,0.05 0,0.03 0,0.03 0,0.03 0,0.03 c 0,0.9 -0.7,1.35 -1.44,1.35 -0.5,0 -1.3,-0.3 -1.75,-1.05 -0.1,-0.24 -0.5,-1.79 -0.7,-2.69 -0.34,-1.29 -0.7,-2.64 -0.99,-3.98 l -2.25,-8.97 c -0.19,-0.74 -2.33,-4.23 -5.62,-4.23 -2.54,0 -3.09,2.19 -3.09,4.03 0,2.3 0.84,5.38 2.54,9.77 0.8,2.04 0.99,2.59 0.99,3.58 0,2.25 -1.59,4.09 -4.08,4.09 -4.73,0 -6.57,-7.23 -6.57,-7.67 0,-0.5 0.49,-0.5 0.59,-0.5 0.5,0 0.55,0.1 0.8,0.9 1.34,4.68 3.34,6.17 5.03,6.17 0.4,0 1.25,0 1.25,-1.59 0,-1.25 -0.5,-2.54 -0.85,-3.49 -1.99,-5.28 -2.89,-8.12 -2.89,-10.46 0,-4.43 3.14,-5.93 6.08,-5.93 1.94,0 3.63,0.85 5.03,2.25 -0.65,-2.6 -1.25,-5.04 -3.24,-7.68 -1.29,-1.69 -3.19,-3.14 -5.48,-3.14 -0.7,0 -2.94,0.16 -3.79,2.1 0.8,0 1.45,0 2.15,0.59 0.49,0.45 0.99,1.1 0.99,2.05 0,1.54 -1.34,1.74 -1.84,1.74 -1.15,0 -2.79,-0.8 -2.79,-3.24 0,-2.49 2.19,-4.33 5.28,-4.33 5.13,0 10.26,4.53 11.66,10.16 z"
590 |            inkscape:connector-curvature="0"
591 |            style="fill:#000000;stroke-width:0" /></g>    </g>
592 |     <text
593 |        xml:space="preserve"
594 |        style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
595 |        x="59.911701"
596 |        y="440.97131"
597 |        id="text4488"
598 |        sodipodi:linespacing="125%"
599 |        transform="translate(0,542.36218)"><tspan
600 |          sodipodi:role="line"
601 |          id="tspan4490"
602 |          x="59.911701"
603 |          y="440.97131" /></text>
604 |   </g>
605 | </svg>
606 | 


--------------------------------------------------------------------------------
/biblio.bib:
--------------------------------------------------------------------------------
   1 | @article{akaike1974new,
   2 |   title={A new look at the statistical model identification},
   3 |   author={Akaike, Hirotugu},
   4 |   journal={Automatic Control, IEEE Transactions on},
   5 |   volume={19},
   6 |   number={6},
   7 |   pages={716--723},
   8 |   year={1974},
   9 |   publisher={Ieee}
  10 | }
  11 | 
  12 | @book{hastie2009elements,
  13 |   title={The elements of statistical learning},
  14 |   author={Hastie, Trevor and Tibshirani, Robert and Friedman, Jerome and Hastie, T and Friedman, J and Tibshirani, R},
  15 |   volume={2},
  16 |   year={2009},
  17 |   publisher={Springer}
  18 | }
  19 | 
  20 | @article{spacings,
  21 | 	title = {Post-selection adaptive inference for Least Angle Regression and the Lasso},
  22 | 	url = {http://arxiv.org/abs/1401.3889},
  23 | 	abstract = {We propose inference tools for least angle regression and the lasso, from the joint distribution of suitably normalized spacings of the {LARS} algorithm. From this we extend the results of the asymptotic null distribution of the "covariance test" of Lockhart et al (2013). But we go much further, deriving exact finite sample results for a new asymptotically equivalent procedure called the "spacing test". This provides exact conditional tests at any step of the {LAR} algorithm as well as "selection intervals" for the appropriate true underlying regression parameter. Remarkably, these tests and intervals account correctly for the adaptive selection done by {LARS}.},
  24 | 	urldate = {2014-03-14},
  25 | 	journal = {{arXiv}:1401.3889 [stat]},
  26 | 	author = {Taylor, Jonathan and Lockhart, Richard and Tibshirani, Ryan J. and Tibshirani, Robert},
  27 | 	month = jan,
  28 | 	year = {2014},
  29 | 	keywords = {62F03, 62G15, {JT}2014, Statistics - Methodology},
  30 | 	annote = {Comment: 22 pages, 6 figures},
  31 | 	file = {1401.3889 PDF:/Users/jonathantaylor/Library/Application Support/Zotero/Profiles/z42x53bj.default/zotero/storage/BAT6HEQB/Taylor et al. - 2014 - Post-selection adaptive inference for Least Angle .pdf:application/pdf;arXiv.org Snapshot:/Users/jonathantaylor/Library/Application Support/Zotero/Profiles/z42x53bj.default/zotero/storage/ZDFTTIMV/1401.html:text/html}
  32 | }
  33 | 
  34 | @article{negahban_unified,
  35 | 	title = {A Unified Framework for High-Dimensional Analysis of {MM}-Estimators with Decomposable Regularizers},
  36 | 	volume = {27},
  37 | 	issn = {0883-4237},
  38 | 	url = {http://projecteuclid.org/euclid.ss/1356098555},
  39 | 	doi = {10.1214/12-STS400},
  40 | 	abstract = {High-dimensional statistical inference deals with models in which the the number of parameters pp is comparable to or larger than the sample size nn. Since it is usually impossible to obtain consistent procedures unless p/n→0p/n{\textbackslash}rightarrow0, a line of recent work has studied models with various types of low-dimensional structure, including sparse vectors, sparse and structured matrices, low-rank matrices and combinations thereof. In such settings, a general approach to estimation is to solve a regularized optimization problem, which combines a loss function measuring how well the model fits the data with some regularization function that encourages the assumed structure. This paper provides a unified framework for establishing consistency and convergence rates for such regularized {MM}-estimators under high-dimensional scaling. We state one main theorem and show how it can be used to re-derive some existing results, and also to obtain a number of new results on consistency and convergence rates, in both ℓ2{\textbackslash}ell\_\{2\}-error and related norms. Our analysis also identifies two key properties of loss and regularization functions, referred to as restricted strong convexity and decomposability, that ensure corresponding regularized {MM}-estimators have fast convergence rates and which are optimal in many well-studied cases.},
  41 | 	language = {{EN}},
  42 | 	number = {4},
  43 | 	urldate = {2013-01-31},
  44 | 	journal = {Statistical Science},
  45 | 	author = {Negahban, Sahand N. and Ravikumar, Pradeep and Wainwright, Martin J. and Yu, Bin},
  46 | 	month = nov,
  47 | 	year = {2012},
  48 | 	pages = {538--557}
  49 | }
  50 | 
  51 | @article{yong2012replication,
  52 |   title={Replication studies: Bad copy},
  53 |   author={Yong, Ed},
  54 |   journal={Nature},
  55 |   volume={485},
  56 |   number={7398},
  57 |   year={2012},
  58 |   pages={298--300},
  59 | }
  60 | 
  61 | @article{barber2015controlling,
  62 |   title={Controlling the false discovery rate via knockoffs},
  63 |   author={Barber, Rina Foygel and Cand{\`e}s, Emmanuel J},
  64 |   journal={The Annals of Statistics},
  65 |   volume={43},
  66 |   number={5},
  67 |   pages={2055--2085},
  68 |   year={2015},
  69 |   publisher={Institute of Mathematical Statistics}
  70 | }
  71 | 
  72 | 
  73 | @article{efron2011tweedie,
  74 |   title={Tweedie’s formula and selection bias},
  75 |   author={Efron, Bradley},
  76 |   journal={Journal of the American Statistical Association},
  77 |   volume={106},
  78 |   number={496},
  79 |   pages={1602--1614},
  80 |   year={2011},
  81 |   publisher={Taylor \& Francis}
  82 | }
  83 | 
  84 | 
  85 | @article{johnson2014new,
  86 |   title={New truths that only one can see},
  87 |   author={Johnson, George},
  88 |   journal={The New York Times},
  89 |   year={2014}
  90 | }
  91 | 
  92 | @article{cox1975note,
  93 |   title={A note on data-splitting for the evaluation of significance levels},
  94 |   author={Cox, DR},
  95 |   journal={Biometrika},
  96 |   volume={62},
  97 |   number={2},
  98 |   pages={441--444},
  99 |   year={1975},
 100 |   publisher={Biometrika Trust}
 101 | }
 102 | 
 103 | @article{hurvich1990impact,
 104 |   title={The impact of model selection on inference in linear regression},
 105 |   author={Hurvich, Clifford M and Tsai, Chih—Ling},
 106 |   journal={The American Statistician},
 107 |   volume={44},
 108 |   number={3},
 109 |   pages={214--217},
 110 |   year={1990},
 111 |   publisher={Taylor \& Francis Group}
 112 | }
 113 | 
 114 | @article{sladek2007genome,
 115 | 	Author = {Sladek, Robert and Rocheleau, Ghislain and Rung, Johan and Dina, Christian and Shen, Lishuang and Serre, David and Boutin, Philippe and Vincent, Daniel and Belisle, Alexandre and Hadjadj, Samy and others},
 116 | 	Date-Added = {2014-09-12 00:33:34 +0000},
 117 | 	Date-Modified = {2014-09-12 00:35:38 +0000},
 118 | 	Journal = {Nature},
 119 | 	Number = {7130},
 120 | 	Pages = {881-885},
 121 | 	Title = {A genome-wide association study identifies novel risk loci for type 2 diabetes},
 122 | 	Volume = {445},
 123 | 	Year = {2007}}
 124 | 
 125 | @article{fraser2004ancillaries,
 126 | 	Author = {DAS Fraser},
 127 | 	Date-Added = {2014-09-11 04:16:01 +0000},
 128 | 	Date-Modified = {2014-09-11 04:16:26 +0000},
 129 | 	Journal = {Statistical Science},
 130 | 	Number = {2},
 131 | 	Pages = {333-369},
 132 | 	Title = {Ancillaries and conditional inference},
 133 | 	Volume = {19},
 134 | 	Year = {2004}}
 135 | 
 136 | @article{wasserman2014discussion,
 137 | 	Author = {Larry Wasserman},
 138 | 	Date-Added = {2014-09-11 03:52:31 +0000},
 139 | 	Date-Modified = {2014-09-11 03:54:39 +0000},
 140 | 	Journal = {Annals of Statistics},
 141 | 	Number = {2},
 142 | 	Pages = {501-508},
 143 | 	Title = {Discussion: "A significance test for the lasso"},
 144 | 	Volume = {42},
 145 | 	Year = {2014}}
 146 | 
 147 | 
 148 | @article{fisher1956test,
 149 |   title={On a test of significance in Pearson's Biometrika Tables (No. 11)},
 150 |   author={Fisher, Ronald},
 151 |   journal={Journal of the Royal Statistical Society. Series B (Methodological)},
 152 |   pages={56--60},
 153 |   year={1956},
 154 |   publisher={JSTOR}
 155 | }
 156 | 
 157 | @article{olshen1973conditional,
 158 |   title={The Conditional Level of the F—Test},
 159 |   author={Olshen, Richard A},
 160 |   journal={Journal of the American Statistical Association},
 161 |   volume={68},
 162 |   number={343},
 163 |   pages={692--698},
 164 |   year={1973},
 165 |   publisher={Taylor \& Francis}
 166 | }
 167 | 
 168 | @article{efron1996using,
 169 |   title={Using specially designed exponential families for density estimation},
 170 |   author={Efron, Bradley and Tibshirani, Robert and others},
 171 |   journal={The Annals of Statistics},
 172 |   volume={24},
 173 |   number={6},
 174 |   pages={2431--2461},
 175 |   year={1996},
 176 |   publisher={Institute of Mathematical Statistics}
 177 | }
 178 | 
 179 | @article{birnbaum1955characterizations,
 180 |   title={Characterizations of complete classes of tests of some multiparametric hypotheses, with applications to likelihood ratio tests},
 181 |   author={Birnbaum, Allan},
 182 |   journal={The Annals of Mathematical Statistics},
 183 |   pages={21--36},
 184 |   year={1955},
 185 |   publisher={JSTOR}
 186 | }
 187 | 
 188 | @article{robinson1979conditional,
 189 | 	Author = {G. K. Robinson},
 190 | 	Date-Added = {2014-09-11 02:57:45 +0000},
 191 | 	Date-Modified = {2014-09-11 02:58:22 +0000},
 192 | 	Journal = {Annals of Statistics},
 193 | 	Number = {4},
 194 | 	Pages = {742-755},
 195 | 	Title = {Conditional properties of statistical procedures},
 196 | 	Volume = {7},
 197 | 	Year = {1979}}
 198 | 
 199 | @article{fisher1956test,
 200 | 	Author = {R. A. Fisher},
 201 | 	Date-Added = {2014-09-11 01:59:06 +0000},
 202 | 	Date-Modified = {2014-09-11 02:01:11 +0000},
 203 | 	Journal = {Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
 204 | 	Number = {1},
 205 | 	Pages = {56-60},
 206 | 	Title = {On a test of significance in Pearson's Biometrika tables},
 207 | 	Volume = {18},
 208 | 	Year = {1956}}
 209 | 
 210 | @article{birnbaum1962,
 211 | 	Author = {Allan Birnbaum},
 212 | 	Date-Added = {2014-09-09 00:23:02 +0000},
 213 | 	Date-Modified = {2014-09-09 00:23:55 +0000},
 214 | 	Journal = {Journal of the American Statistical Association},
 215 | 	Number = {298},
 216 | 	Pages = {269--306},
 217 | 	Title = {On the foundations of statistical inference},
 218 | 	Volume = {57},
 219 | 	Year = {1962}}
 220 | 
 221 | @article{fisher1934,
 222 | 	Author = {R. A. Fisher},
 223 | 	Date-Added = {2014-09-09 00:13:06 +0000},
 224 | 	Date-Modified = {2014-09-09 00:24:36 +0000},
 225 | 	Journal = {Proceedings of the Royal Society of London, Series A.},
 226 | 	Number = {852},
 227 | 	Pages = {285--307},
 228 | 	Title = {Two new properties of mathematical likelihood},
 229 | 	Volume = {144},
 230 | 	Year = {1934}}
 231 | 
 232 | @article{franco14,
 233 | 	Author = {Franco, Annie and Malhotra, Neil and Simonovits, Gabor},
 234 | 	Date-Added = {2014-09-08 22:13:11 +0000},
 235 | 	Date-Modified = {2014-09-09 00:24:53 +0000},
 236 | 	Journal = {Science},
 237 | 	Title = {Publication bias in the social sciences: unlocking the file drawer},
 238 | 	Year = {2014}}
 239 | 
 240 | @article{owen2007infinitely,
 241 | 	Author = {Owen, A.B.},
 242 | 	Journal = {The Journal of Machine Learning Research},
 243 | 	Pages = {761--773},
 244 | 	Publisher = {JMLR. org},
 245 | 	Title = {Infinitely imbalanced logistic regression},
 246 | 	Volume = {8},
 247 | 	Year = {2007}}
 248 | 
 249 | @article{LARS,
 250 | 	Author = {Efron, B. and Hastie, T. and Johnstone, I. and Tibshirani, R.},
 251 | 	Journal = {The Annals of statistics},
 252 | 	Number = {2},
 253 | 	Pages = {407--499},
 254 | 	Publisher = {Institute of Mathematical Statistics},
 255 | 	Title = {Least angle regression},
 256 | 	Volume = {32},
 257 | 	Year = {2004}}
 258 | 
 259 | @article{VIFRegression,
 260 | 	Author = {Lin, D. and Foster, D.P. and Ungar, L.H.},
 261 | 	Journal = {Journal of the American Statistical Association},
 262 | 	Number = {493},
 263 | 	Pages = {232--247},
 264 | 	Publisher = {ASA},
 265 | 	Title = {VIF regression: A fast regression algorithm for large data},
 266 | 	Volume = {106},
 267 | 	Year = {2011}}
 268 | 
 269 | @book{agresti,
 270 | 	Author = {Agresti, A.},
 271 | 	Publisher = {New York: John Wiley and Sons},
 272 | 	Title = {Categorical Data Analysis},
 273 | 	Year = {2002}}
 274 | 
 275 | @article{bottou2008tradeoffs,
 276 | 	Author = {Bottou, L. and Bousquet, O.},
 277 | 	Journal = {Advances in neural information processing systems},
 278 | 	Pages = {161--168},
 279 | 	Publisher = {Cambridge, MA: MIT Press},
 280 | 	Title = {The tradeoffs of large scale learning},
 281 | 	Volume = {20},
 282 | 	Year = {2008}}
 283 | 
 284 | @article{bartlett2006convexity,
 285 | 	Author = {Bartlett, P.L. and Jordan, M.I. and McAuliffe, J.D.},
 286 | 	Journal = {Journal of the American Statistical Association},
 287 | 	Number = {473},
 288 | 	Pages = {138--156},
 289 | 	Publisher = {ASA},
 290 | 	Title = {Convexity, classification, and risk bounds},
 291 | 	Volume = {101},
 292 | 	Year = {2006}}
 293 | 
 294 | @inproceedings{le2004large,
 295 | 	Author = {Le Cun, L.B.Y.},
 296 | 	Booktitle = {Advances in neural information processing systems 16: proceedings of the 2003 conference},
 297 | 	Organization = {The MIT Press},
 298 | 	Pages = {217},
 299 | 	Title = {Large Scale Online Learning.},
 300 | 	Volume = {16},
 301 | 	Year = {2004}}
 302 | 
 303 | @inproceedings{massart2000some,
 304 | 	Author = {Massart, P.},
 305 | 	Booktitle = {ANNALES-FACULTE DES SCIENCES TOULOUSE MATHEMATIQUES},
 306 | 	Number = {2},
 307 | 	Organization = {Universit{\'e} Paul Sabatier},
 308 | 	Pages = {245--303},
 309 | 	Title = {Some applications of concentration inequalities to statistics},
 310 | 	Volume = {9},
 311 | 	Year = {2000}}
 312 | 
 313 | @article{murata1998statistical,
 314 | 	Author = {Murata, N.},
 315 | 	Journal = {Online Learning and Neural Networks. Cambridge University Press, Cambridge, UK},
 316 | 	Title = {A statistical study of on-line learning},
 317 | 	Year = {1998}}
 318 | 
 319 | @article{horvitz1952generalization,
 320 | 	Author = {Horvitz, Daniel G and Thompson, Donovan J},
 321 | 	Journal = {Journal of the American Statistical Association},
 322 | 	Number = {260},
 323 | 	Pages = {663--685},
 324 | 	Publisher = {Taylor \& Francis Group},
 325 | 	Title = {A generalization of sampling without replacement from a finite universe},
 326 | 	Volume = {47},
 327 | 	Year = {1952}}
 328 | 
 329 | @book{huber2011robust,
 330 | 	Author = {Huber, Peter J},
 331 | 	Publisher = {Springer},
 332 | 	Title = {Robust statistics},
 333 | 	Year = {2011}}
 334 | 
 335 | @article{RoyleEtAlWillowTit,
 336 | 	Author = {Royle, J. Andrew and Nichols, James D. and K\'{e}ry, Marc},
 337 | 	Doi = {10.1111/j.0030-1299.2005.13534.x},
 338 | 	Issn = {1600-0706},
 339 | 	Journal = {Oikos},
 340 | 	Number = {2},
 341 | 	Pages = {353--359},
 342 | 	Publisher = {Munksgaard International Publishers},
 343 | 	Title = {Modelling occurrence and abundance of species when detection is imperfect},
 344 | 	Url = {http://dx.doi.org/10.1111/j.0030-1299.2005.13534.x},
 345 | 	Volume = {110},
 346 | 	Year = {2005},
 347 | 	Bdsk-Url-1 = {http://dx.doi.org/10.1111/j.0030-1299.2005.13534.x}}
 348 | 
 349 | @book{gaetan2009spatial,
 350 | 	Author = {Gaetan, C. and Guyon, X.},
 351 | 	Publisher = {Springer Verlag},
 352 | 	Title = {Spatial statistics and modeling},
 353 | 	Year = {2009}}
 354 | 
 355 | @article{WartonIPP,
 356 | 	Author = {Warton, D.I. and Shepherd, L.C.},
 357 | 	Journal = {The Annals of Applied Statistics},
 358 | 	Number = {3},
 359 | 	Pages = {1383--1402},
 360 | 	Publisher = {Institute of Mathematical Statistics},
 361 | 	Title = {Poisson point process models solve the "pseudo-absence problem" for presence-only data in ecology},
 362 | 	Volume = {4},
 363 | 	Year = {2010}}
 364 | 
 365 | @inproceedings{Phillipsetal2004,
 366 | 	Author = {Phillips, S.J. and Dud{\'\i}k, M. and Schapire, R.E.},
 367 | 	Booktitle = {Proceedings of the Twenty-First International Conference on Machine Learning},
 368 | 	Organization = {ACM},
 369 | 	Pages = {83},
 370 | 	Title = {A maximum entropy approach to species distribution modeling},
 371 | 	Year = {2004}}
 372 | 
 373 | @article{AartsIPP,
 374 | 	Author = {Aarts, G. and Fieberg, J. and Matthiopoulos, J.},
 375 | 	Journal = {Methods in Ecology and Evolution},
 376 | 	Number = {1},
 377 | 	Pages = {177--187},
 378 | 	Publisher = {Wiley Online Library},
 379 | 	Title = {Comparative interpretation of count, presence--absence and point methods for species distribution models},
 380 | 	Volume = {3},
 381 | 	Year = {2012}}
 382 | 
 383 | @article{renner2013equivalence,
 384 | 	Author = {Renner, Ian W and Warton, David I},
 385 | 	Journal = {Biometrics},
 386 | 	Publisher = {Wiley Online Library},
 387 | 	Title = {Equivalence of MAXENT and poisson point process models for species distribution modeling in ecology},
 388 | 	Year = {2013}}
 389 | 
 390 | @article{Phillipsetal2006,
 391 | 	Author = {Phillips, S.J. and Anderson, R.P. and Schapire, R.E.},
 392 | 	Journal = {Ecological Modelling},
 393 | 	Number = {3},
 394 | 	Pages = {231--259},
 395 | 	Publisher = {Elsevier},
 396 | 	Title = {Maximum entropy modeling of species geographic distributions},
 397 | 	Volume = {190},
 398 | 	Year = {2006}}
 399 | 
 400 | @article{Phillipsetal2008,
 401 | 	Author = {Phillips, S.J. and Dud{\'\i}k, M.},
 402 | 	Journal = {Ecography},
 403 | 	Number = {2},
 404 | 	Pages = {161--175},
 405 | 	Publisher = {Wiley Online Library},
 406 | 	Title = {Modeling of species distributions with Maxent: new extensions and a comprehensive evaluation},
 407 | 	Volume = {31},
 408 | 	Year = {2008}}
 409 | 
 410 | @article{ward2009em,
 411 | 	Author = {Ward, G. and Hastie, T. and Barry, S. and Elith, J. and Leathwick, J.R.},
 412 | 	Journal = {Biometrics},
 413 | 	Number = {2},
 414 | 	Pages = {554--563},
 415 | 	Publisher = {Wiley Online Library},
 416 | 	Title = {Presence-only data and the EM algorithm},
 417 | 	Volume = {65},
 418 | 	Year = {2009}}
 419 | 
 420 | @article{me2012local,
 421 | 	Author = {Fithian, W. and Hastie, T.},
 422 | 	Title = {Local Case-Control Sampling},
 423 | 	Year = {2012}}
 424 | 
 425 | @article{phillips2009sample,
 426 | 	Author = {Phillips, S.J. and Dud{\'\i}k, M. and Elith, J. and Graham, C.H. and Lehmann, A. and Leathwick, J. and Ferrier, S.},
 427 | 	Journal = {Ecological Applications},
 428 | 	Number = {1},
 429 | 	Pages = {181--197},
 430 | 	Publisher = {Eco Soc America},
 431 | 	Title = {Sample selection bias and presence-only distribution models: implications for background and pseudo-absence data},
 432 | 	Volume = {19},
 433 | 	Year = {2009}}
 434 | 
 435 | @article{hefley2013nondetection,
 436 | 	Author = {Hefley, Trevor J and Tyre, Andrew J and Baasch, David M and Blankenship, Erin E},
 437 | 	Journal = {Ecology and Evolution},
 438 | 	Publisher = {Wiley Online Library},
 439 | 	Title = {Nondetection sampling bias in marked presence-only data},
 440 | 	Year = {2013}}
 441 | 
 442 | @book{ESL,
 443 | 	Author = {Hastie, T. and Tibshirani, R. and Friedman, J.},
 444 | 	Publisher = {Springer Series in Statistics},
 445 | 	Title = {The elements of statistical learning},
 446 | 	Year = {2009}}
 447 | 
 448 | @article{royle2012maxlike,
 449 | 	Author = {Royle, J.A. and Chandler, R.B. and Yackulic, C. and Nichols, J.D.},
 450 | 	Journal = {Methods in Ecology and Evolution},
 451 | 	Publisher = {Wiley Online Library},
 452 | 	Title = {Likelihood analysis of species occurrence probability from presence-only data for modelling species distributions},
 453 | 	Year = {2012}}
 454 | 
 455 | @book{cressie1993,
 456 | 	Author = {Cressie, N.A.C.},
 457 | 	Publisher = {Wiley, New York},
 458 | 	Title = {Statistics for Spatial Data, revised edition},
 459 | 	Volume = {928},
 460 | 	Year = {1993}}
 461 | 
 462 | @article{baddeley2000practical,
 463 | 	Author = {Baddeley, Adrian and Turner, Rolf},
 464 | 	Journal = {Australian \& New Zealand Journal of Statistics},
 465 | 	Number = {3},
 466 | 	Pages = {283--322},
 467 | 	Publisher = {Wiley Online Library},
 468 | 	Title = {Practical maximum pseudolikelihood for spatial point patterns},
 469 | 	Volume = {42},
 470 | 	Year = {2000}}
 471 | 
 472 | @article{warton2013model,
 473 | 	Author = {Warton, David I and Renner, Ian W and Ramp, Daniel},
 474 | 	Journal = {PloS one},
 475 | 	Number = {11},
 476 | 	Pages = {e79168},
 477 | 	Publisher = {Public Library of Science},
 478 | 	Title = {Model-Based Control of Observer Bias for the Analysis of Presence-Only Data in Ecology},
 479 | 	Volume = {8},
 480 | 	Year = {2013}}
 481 | 
 482 | @article{baddeley2010spatial,
 483 | 	Author = {Baddeley, A and Berman, M and Fisher, NI and Hardegen, A and Milne, RK and Schuhmacher, D and Shah, R and Turner, R},
 484 | 	Journal = {Electronic Journal of Statistics},
 485 | 	Pages = {1151--1201},
 486 | 	Publisher = {Institute of Mathematical Statistics},
 487 | 	Title = {Spatial logistic regression and change-of-support in Poisson point processes},
 488 | 	Volume = {4},
 489 | 	Year = {2010}}
 490 | 
 491 | @article{lele2006weighted,
 492 | 	Author = {Lele, Subhash R and Keim, Jonah L},
 493 | 	Journal = {Ecology},
 494 | 	Number = {12},
 495 | 	Pages = {3021--3028},
 496 | 	Publisher = {Eco Soc America},
 497 | 	Title = {Weighted distributions and estimation of resource selection probability functions},
 498 | 	Volume = {87},
 499 | 	Year = {2006}}
 500 | 
 501 | @article{chakraborty2011point,
 502 | 	Author = {Chakraborty, Avishek and Gelfand, Alan E and Wilson, Adam M and Latimer, Andrew M and Silander, John A},
 503 | 	Journal = {Journal of the Royal Statistical Society: Series C (Applied Statistics)},
 504 | 	Number = {5},
 505 | 	Pages = {757--776},
 506 | 	Publisher = {Wiley Online Library},
 507 | 	Title = {Point pattern modelling for degraded presence-only data over large regions},
 508 | 	Volume = {60},
 509 | 	Year = {2011}}
 510 | 
 511 | @article{margules1994biological,
 512 | 	Author = {Margules, CR and Austin, MP and Mollison, D. and Smith, F.},
 513 | 	Journal = {Philosophical Transactions of the Royal Society of London. Series B: Biological Sciences},
 514 | 	Number = {1307},
 515 | 	Pages = {69--75},
 516 | 	Publisher = {The Royal Society},
 517 | 	Title = {Biological Models for Monitoring Species Decline: The Construction and Use of Data Bases [and Discussion]},
 518 | 	Volume = {344},
 519 | 	Year = {1994}}
 520 | 
 521 | @article{berman1992,
 522 | 	Author = {Berman, M. and Turner, T.R.},
 523 | 	Journal = {Applied Statistics},
 524 | 	Pages = {31--38},
 525 | 	Publisher = {JSTOR},
 526 | 	Title = {Approximating point process likelihoods with GLIM},
 527 | 	Year = {1992}}
 528 | 
 529 | @article{elith2011statistical,
 530 | 	Author = {Elith, J. and Phillips, S.J. and Hastie, T. and Dud{\'\i}k, M. and Chee, Y.E. and Yates, C.J.},
 531 | 	Journal = {Diversity and Distributions},
 532 | 	Publisher = {Wiley Online Library},
 533 | 	Title = {A statistical explanation of MaxEnt for ecologists},
 534 | 	Year = {2011}}
 535 | 
 536 | @article{elith2006novel,
 537 | 	Author = {Elith, J. and Graham, C.H. and Anderson, R.P. and Dudik, M. and Ferrier, S. and Guisan, A. and Hijmans, R.J. and Huettmann, F. and Leathwick, J.R. and Lehmann, A. and others},
 538 | 	Journal = {Ecography},
 539 | 	Number = {2},
 540 | 	Pages = {129--151},
 541 | 	Publisher = {Wiley Online Library},
 542 | 	Title = {Novel methods improve prediction of species' distributions from occurrence data},
 543 | 	Volume = {29},
 544 | 	Year = {2006}}
 545 | 
 546 | @book{manly2002resource,
 547 | 	Author = {Manly, BFJ and McDonald, LL and Thomas, DL and McDonald, TL and Erickson, WP},
 548 | 	Publisher = {Nordrecht, Netherlands: Kluwer},
 549 | 	Title = {Resource selection by animals: statistical analysis and design for field studies},
 550 | 	Year = {2002}}
 551 | 
 552 | @article{dorazio2012predicting,
 553 | 	Author = {Dorazio, Robert M},
 554 | 	Journal = {Biometrics},
 555 | 	Number = {4},
 556 | 	Pages = {1303--1312},
 557 | 	Publisher = {Wiley Online Library},
 558 | 	Title = {Predicting the Geographic Distribution of a Species from Presence-Only Data Subject to Detection Errors},
 559 | 	Volume = {68},
 560 | 	Year = {2012}}
 561 | 
 562 | @article{lee2006fitting,
 563 | 	Author = {Lee, AJ and Scott, AJ and Wild, CJ},
 564 | 	Journal = {Biometrika},
 565 | 	Number = {2},
 566 | 	Pages = {385--397},
 567 | 	Publisher = {Biometrika Trust},
 568 | 	Title = {Fitting binary regression models with case-augmented samples},
 569 | 	Volume = {93},
 570 | 	Year = {2006}}
 571 | 
 572 | @book{mackenzie2006occupancy,
 573 | 	Author = {MacKenzie, Darryl I},
 574 | 	Publisher = {Academic Press},
 575 | 	Title = {Occupancy estimation and modeling: inferring patterns and dynamics of species occurrence},
 576 | 	Year = {2006}}
 577 | 
 578 | @article{phillips2009sample,
 579 | 	Author = {Phillips, Steven J and Dud{\'\i}k, Miroslav and Elith, Jane and Graham, Catherine H and Lehmann, Anthony and Leathwick, John and Ferrier, Simon},
 580 | 	Journal = {Ecological Applications},
 581 | 	Number = {1},
 582 | 	Pages = {181--197},
 583 | 	Publisher = {Eco Soc America},
 584 | 	Title = {Sample selection bias and presence-only distribution models: implications for background and pseudo-absence data},
 585 | 	Volume = {19},
 586 | 	Year = {2009}}
 587 | 
 588 | @article{phillips2013estimating,
 589 | 	Author = {Phillips, Steven J and Elith, Jane},
 590 | 	Journal = {Ecology},
 591 | 	Publisher = {Eco Soc America},
 592 | 	Title = {On estimating probability of presence from use-availability or presence-background data},
 593 | 	Year = {2013}}
 594 | 
 595 | @article{johnson2006resource,
 596 | 	Author = {Johnson, Chris J and Nielsen, Scott E and Merrill, Evelyn H and McDonald, Trent L and Boyce, Mark S},
 597 | 	Journal = {Journal of Wildlife Management},
 598 | 	Number = {2},
 599 | 	Pages = {347--357},
 600 | 	Publisher = {BioOne},
 601 | 	Title = {Resource selection functions based on use-availability data: theoretical motivation and evaluation methods},
 602 | 	Volume = {70},
 603 | 	Year = {2006}}
 604 | 
 605 | @article{fithian2013finite,
 606 | 	Author = {Fithian, William and Hastie, Trevor},
 607 | 	Journal = {The Annals of Applied Statistics},
 608 | 	Number = {4},
 609 | 	Pages = {1917--1939},
 610 | 	Publisher = {Institute of Mathematical Statistics},
 611 | 	Title = {Finite-sample equivalence in statistical models for presence-only data},
 612 | 	Volume = {7},
 613 | 	Year = {2013}}
 614 | 
 615 | @article{fithian2013local,
 616 | 	Author = {Fithian, William and Hastie, Trevor},
 617 | 	Journal = {arXiv preprint arXiv:1306.3706},
 618 | 	Title = {Local Case-Control Sampling: Efficient Subsampling in Imbalanced Data Sets},
 619 | 	Year = {2013}}
 620 | 
 621 | @article{hastie2013inference,
 622 | 	Author = {Hastie, Trevor and Fithian, Will},
 623 | 	Journal = {Ecography},
 624 | 	Number = {8},
 625 | 	Pages = {864--867},
 626 | 	Publisher = {Wiley Online Library},
 627 | 	Title = {Inference from presence-only data; the ongoing controversy},
 628 | 	Volume = {36},
 629 | 	Year = {2013}}
 630 | 
 631 | @article{lancaster1996case,
 632 | 	Author = {Lancaster, Tony and Imbens, Guido},
 633 | 	Journal = {Journal of Econometrics},
 634 | 	Number = {1},
 635 | 	Pages = {145--160},
 636 | 	Publisher = {Elsevier},
 637 | 	Title = {Case-control studies with contaminated controls},
 638 | 	Volume = {71},
 639 | 	Year = {1996}}
 640 | 
 641 | @book{andersen1973conditional,
 642 | 	Author = {Andersen, Erling B},
 643 | 	Publisher = {Mentalhygiejnisk forlag},
 644 | 	Title = {Conditional inference and models for measuring},
 645 | 	Volume = {5},
 646 | 	Year = {1973}}
 647 | 
 648 | @article{cox1975partial,
 649 | 	Author = {Cox, David R},
 650 | 	Journal = {Biometrika},
 651 | 	Number = {2},
 652 | 	Pages = {269--276},
 653 | 	Publisher = {Biometrika Trust},
 654 | 	Title = {Partial likelihood},
 655 | 	Volume = {62},
 656 | 	Year = {1975}}
 657 | 
 658 | @article{geyer1992constrained,
 659 | 	Author = {Geyer, Charles J and Thompson, Elizabeth A},
 660 | 	Journal = {Journal of the Royal Statistical Society. Series B (Methodological)},
 661 | 	Pages = {657--699},
 662 | 	Publisher = {JSTOR},
 663 | 	Title = {Constrained Monte Carlo maximum likelihood for dependent data},
 664 | 	Year = {1992}}
 665 | 
 666 | @article{chen1997statistical,
 667 | 	Author = {Chen, Sean X and Liu, Jun S},
 668 | 	Journal = {Statistica Sinica},
 669 | 	Pages = {875--892},
 670 | 	Title = {Statistical applications of the Poisson-binomial and conditional Bernoulli distributions},
 671 | 	Volume = {7},
 672 | 	Year = {1997}}
 673 | 
 674 | @article{chen1994weighted,
 675 | 	Author = {Chen, Xiang-Hui and Dempster, Arthur P and Liu, Jun S},
 676 | 	Journal = {Biometrika},
 677 | 	Number = {3},
 678 | 	Pages = {457--469},
 679 | 	Publisher = {Biometrika Trust},
 680 | 	Title = {Weighted finite population sampling to maximize entropy},
 681 | 	Volume = {81},
 682 | 	Year = {1994}}
 683 | 
 684 | @book{breslow1980statistical,
 685 | 	Author = {Breslow, Norman E and Day, Nicholas E and others},
 686 | 	Number = {32},
 687 | 	Publisher = {Distributed for IARC by WHO, Geneva, Switzerland},
 688 | 	Title = {Statistical methods in cancer research. Vol. 1. The analysis of case-control studies.},
 689 | 	Volume = {1},
 690 | 	Year = {1980}}
 691 | 
 692 | @article{mantel1959statistical,
 693 | 	Author = {Mantel, Nathan and Haenszel, William},
 694 | 	Journal = {Journal of the National Cancer Institute},
 695 | 	Number = {4},
 696 | 	Pages = {719--748},
 697 | 	Publisher = {Oxford University Press},
 698 | 	Title = {Statistical Aspects of the Analysis of Data From Retrospective Studies of Disease},
 699 | 	Volume = {22},
 700 | 	Year = {1959}}
 701 | 
 702 | @article{chawla2004editorial,
 703 | 	Author = {Chawla, Nitesh V and Japkowicz, Nathalie and Kotcz, Aleksander},
 704 | 	Journal = {ACM SIGKDD Explorations Newsletter},
 705 | 	Number = {1},
 706 | 	Pages = {1--6},
 707 | 	Publisher = {ACM},
 708 | 	Title = {Editorial: special issue on learning from imbalanced data sets},
 709 | 	Volume = {6},
 710 | 	Year = {2004}}
 711 | 
 712 | @article{he2009learning,
 713 | 	Author = {He, Haibo and Garcia, Edwardo A},
 714 | 	Journal = {Knowledge and Data Engineering, IEEE Transactions on},
 715 | 	Number = {9},
 716 | 	Pages = {1263--1284},
 717 | 	Publisher = {IEEE},
 718 | 	Title = {Learning from imbalanced data},
 719 | 	Volume = {21},
 720 | 	Year = {2009}}
 721 | 
 722 | @inproceedings{mani2003knn,
 723 | 	Author = {Mani, Inderjeet and Zhang, I},
 724 | 	Booktitle = {Proceedings of Workshop on Learning from Imbalanced Datasets},
 725 | 	Title = {kNN approach to unbalanced data distributions: a case study involving information extraction},
 726 | 	Year = {2003}}
 727 | 
 728 | @article{breslow1988logistic,
 729 | 	Author = {Breslow, NE and Cain, KC},
 730 | 	Journal = {Biometrika},
 731 | 	Number = {1},
 732 | 	Pages = {11--20},
 733 | 	Publisher = {Biometrika Trust},
 734 | 	Title = {Logistic regression for two-stage case-control data},
 735 | 	Volume = {75},
 736 | 	Year = {1988}}
 737 | 
 738 | @article{weinberg1990design,
 739 | 	Author = {Weinberg, Clarice R and Wacholder, Sholom},
 740 | 	Journal = {Biometrics},
 741 | 	Pages = {963--975},
 742 | 	Publisher = {JSTOR},
 743 | 	Title = {The design and analysis of case-control studies with biased sampling},
 744 | 	Year = {1990}}
 745 | 
 746 | @article{friedman2000additive,
 747 | 	Author = {Friedman, Jerome and Hastie, Trevor and Tibshirani, Robert},
 748 | 	Journal = {The annals of statistics},
 749 | 	Number = {2},
 750 | 	Pages = {337--407},
 751 | 	Publisher = {Institute of Mathematical Statistics},
 752 | 	Title = {Additive logistic regression: a statistical view of boosting (With discussion and a rejoinder by the authors)},
 753 | 	Volume = {28},
 754 | 	Year = {2000}}
 755 | 
 756 | @article{freund1997decision,
 757 | 	Author = {Freund, Yoav and Schapire, Robert E},
 758 | 	Journal = {Journal of computer and system sciences},
 759 | 	Number = {1},
 760 | 	Pages = {119--139},
 761 | 	Publisher = {Elsevier},
 762 | 	Title = {A decision-theoretic generalization of on-line learning and an application to boosting},
 763 | 	Volume = {55},
 764 | 	Year = {1997}}
 765 | 
 766 | @inproceedings{webb2006introducing,
 767 | 	Author = {Webb, Steve and Caverlee, James and Pu, Calton},
 768 | 	Booktitle = {Proceedings of the Third Conference on Email and Anti-Spam (CEAS)},
 769 | 	Title = {Introducing the webb spam corpus: Using email spam to identify web spam automatically},
 770 | 	Year = {2006}}
 771 | 
 772 | @article{weiss2004mining,
 773 | 	Author = {Weiss, Gary M},
 774 | 	Journal = {ACM SIGKDD Explorations Newsletter},
 775 | 	Number = {1},
 776 | 	Pages = {7--19},
 777 | 	Publisher = {ACM},
 778 | 	Title = {Mining with rarity: a unifying framework},
 779 | 	Volume = {6},
 780 | 	Year = {2004}}
 781 | 
 782 | @article{scott1991fitting,
 783 | 	Author = {Scott, AJ and Wild, CJ},
 784 | 	Journal = {Biometrics},
 785 | 	Pages = {497--510},
 786 | 	Publisher = {JSTOR},
 787 | 	Title = {Fitting logistic regression models in stratified case-control studies},
 788 | 	Year = {1991}}
 789 | 
 790 | @article{fears1986logistic,
 791 | 	Author = {Fears, Thomas R and Brown, Charles C},
 792 | 	Journal = {Biometrics},
 793 | 	Pages = {955--960},
 794 | 	Publisher = {JSTOR},
 795 | 	Title = {Logistic regression methods for retrospective case-control studies using complex sampling procedures},
 796 | 	Year = {1986}}
 797 | 
 798 | @article{scott1997fitting,
 799 | 	Author = {Scott, Alastair J and Wild, Chris J},
 800 | 	Journal = {Biometrika},
 801 | 	Number = {1},
 802 | 	Pages = {57--71},
 803 | 	Publisher = {Biometrika Trust},
 804 | 	Title = {Fitting regression models to case-control data by maximum likelihood},
 805 | 	Volume = {84},
 806 | 	Year = {1997}}
 807 | 
 808 | @article{scott1986fitting,
 809 | 	Author = {Scott, Alastair J and Wild, CJ},
 810 | 	Journal = {Journal of the Royal Statistical Society. Series B (Methodological)},
 811 | 	Pages = {170--182},
 812 | 	Publisher = {JSTOR},
 813 | 	Title = {Fitting logistic models under case-control or choice based sampling},
 814 | 	Year = {1986}}
 815 | 
 816 | @article{scott2002robustness,
 817 | 	Author = {Scott, Alastair and Wild, Chris},
 818 | 	Journal = {Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
 819 | 	Number = {2},
 820 | 	Pages = {207--219},
 821 | 	Publisher = {Wiley Online Library},
 822 | 	Title = {On the robustness of weighted methods for fitting models to case--control data},
 823 | 	Volume = {64},
 824 | 	Year = {2002}}
 825 | 
 826 | @article{lumley2011connections,
 827 | 	Author = {Lumley, Thomas and Shaw, Pamela A and Dai, James Y},
 828 | 	Journal = {International Statistical Review},
 829 | 	Number = {2},
 830 | 	Pages = {200--220},
 831 | 	Publisher = {Wiley Online Library},
 832 | 	Title = {Connections between survey calibration estimators and semiparametric models for incomplete data},
 833 | 	Volume = {79},
 834 | 	Year = {2011}}
 835 | 
 836 | @article{manski1989estimation,
 837 | 	Author = {Manski, Charles F and Thompson, T Scott},
 838 | 	Journal = {Journal of Econometrics},
 839 | 	Number = {1},
 840 | 	Pages = {97--123},
 841 | 	Publisher = {Elsevier},
 842 | 	Title = {Estimation of best predictors of binary response},
 843 | 	Volume = {40},
 844 | 	Year = {1989}}
 845 | 
 846 | @article{xie1989logit,
 847 | 	Author = {Xie, Yu and Manski, Charles F},
 848 | 	Journal = {Sociological Methods \& Research},
 849 | 	Number = {3},
 850 | 	Pages = {283--302},
 851 | 	Publisher = {Sage Publications},
 852 | 	Title = {The logit model and response-based samples},
 853 | 	Volume = {17},
 854 | 	Year = {1989}}
 855 | 
 856 | @article{prentice1979logistic,
 857 | 	Author = {Prentice, Ross L and Pyke, Ronald},
 858 | 	Journal = {Biometrika},
 859 | 	Number = {3},
 860 | 	Pages = {403--411},
 861 | 	Publisher = {Biometrika Trust},
 862 | 	Title = {Logistic disease incidence models and case-control studies},
 863 | 	Volume = {66},
 864 | 	Year = {1979}}
 865 | 
 866 | @article{anderson1972separate,
 867 | 	Author = {Anderson, James A},
 868 | 	Journal = {Biometrika},
 869 | 	Number = {1},
 870 | 	Pages = {19--35},
 871 | 	Publisher = {Biometrika Trust},
 872 | 	Title = {Separate sample logistic discrimination},
 873 | 	Volume = {59},
 874 | 	Year = {1972}}
 875 | 
 876 | @book{mardia1980multivariate,
 877 | 	Author = {Mardia, Kantilal Varichand and Kent, John T and Bibby, John M},
 878 | 	Publisher = {Academic press},
 879 | 	Title = {Multivariate analysis},
 880 | 	Year = {1980}}
 881 | 
 882 | @book{benzecri1973analyse,
 883 | 	Author = {Benz{\'e}cri, Jean-Paul},
 884 | 	Publisher = {Dunod},
 885 | 	Title = {L'analyse des donn{\'e}es: l'analyse des correspondances},
 886 | 	Volume = {2},
 887 | 	Year = {1973}}
 888 | 
 889 | @phdthesis{srebro2004learning,
 890 | 	Author = {Srebro, Nathan},
 891 | 	School = {Massachusetts Institute of Technology},
 892 | 	Title = {Learning with Matrix Factorizations},
 893 | 	Year = {2004}}
 894 | 
 895 | @inproceedings{angst2011generalized,
 896 | 	Author = {Angst, Roland and Zach, Christopher and Pollefeys, Marc},
 897 | 	Booktitle = {Computer Vision (ICCV), 2011 IEEE International Conference on},
 898 | 	Organization = {IEEE},
 899 | 	Pages = {2502--2509},
 900 | 	Title = {The generalized trace-norm and its application to structure-from-motion problems},
 901 | 	Year = {2011}}
 902 | 
 903 | @article{bach2008consistency,
 904 | 	Author = {Bach, Francis R},
 905 | 	Journal = {The Journal of Machine Learning Research},
 906 | 	Pages = {1019--1048},
 907 | 	Publisher = {JMLR. org},
 908 | 	Title = {Consistency of trace norm minimization},
 909 | 	Volume = {9},
 910 | 	Year = {2008}}
 911 | 
 912 | @article{abernethy2009new,
 913 | 	Author = {Abernethy, Jacob and Bach, Francis and Evgeniou, Theodoros and Vert, Jean-Philippe},
 914 | 	Journal = {The Journal of Machine Learning Research},
 915 | 	Pages = {803--826},
 916 | 	Publisher = {JMLR. org},
 917 | 	Title = {A new approach to collaborative filtering: Operator estimation with spectral regularization},
 918 | 	Volume = {10},
 919 | 	Year = {2009}}
 920 | 
 921 | @article{salakhutdinov2008probabilistic,
 922 | 	Author = {Salakhutdinov, Ruslan and Mnih, Andriy},
 923 | 	Journal = {Advances in neural information processing systems},
 924 | 	Pages = {1257--1264},
 925 | 	Title = {Probabilistic matrix factorization},
 926 | 	Volume = {20},
 927 | 	Year = {2008}}
 928 | 
 929 | @inproceedings{agarwal2009regression,
 930 | 	Author = {Agarwal, Deepak and Chen, Bee-Chung},
 931 | 	Booktitle = {Proceedings of the 15th ACM SIGKDD international conference on Knowledge discovery and data mining},
 932 | 	Organization = {ACM},
 933 | 	Pages = {19--28},
 934 | 	Title = {Regression-based latent factor models},
 935 | 	Year = {2009}}
 936 | 
 937 | @inproceedings{collins2001generalization,
 938 | 	Author = {Collins, Michael and Dasgupta, Sanjoy and Schapire, Robert E},
 939 | 	Booktitle = {NIPS 2001},
 940 | 	Title = {A generalization of principal component analysis to the exponential family},
 941 | 	Year = {2001}}
 942 | 
 943 | @inproceedings{menon2010log,
 944 | 	Author = {Menon, Aditya Krishna and Elkan, Charles},
 945 | 	Booktitle = {Data Mining (ICDM), 2010 IEEE 10th International Conference on},
 946 | 	Organization = {IEEE},
 947 | 	Pages = {364--373},
 948 | 	Title = {A log-linear model with latent features for dyadic prediction},
 949 | 	Year = {2010}}
 950 | 
 951 | @article{salakhutdinov2010collaborative,
 952 | 	Author = {Salakhutdinov, Ruslan and Srebro, Nathan},
 953 | 	Journal = {arXiv preprint arXiv:1002.2780},
 954 | 	Title = {Collaborative filtering in a non-uniform world: Learning with the weighted trace norm},
 955 | 	Year = {2010}}
 956 | 
 957 | @article{rahul2013unpublished,
 958 | 	Author = {Mazumder, Rahul and Hastie, Trevor},
 959 | 	Journal = {Unpublished manuscript},
 960 | 	Title = {Warm-Started Singular Value Decompositions and the Nuclear Norm},
 961 | 	Year = {2013}}
 962 | 
 963 | @article{srebro2005maximum,
 964 | 	Author = {Srebro, Nathan and Rennie, Jason DM and Jaakkola, Tommi},
 965 | 	Journal = {Advances in neural information processing systems},
 966 | 	Number = {5},
 967 | 	Pages = {1329--1336},
 968 | 	Publisher = {Citeseer},
 969 | 	Title = {Maximum-margin matrix factorization},
 970 | 	Volume = {17},
 971 | 	Year = {2005}}
 972 | 
 973 | @phdthesis{fazel2002matrix,
 974 | 	Author = {Fazel, Maryam},
 975 | 	School = {PhD thesis, Stanford University},
 976 | 	Title = {Matrix rank minimization with applications},
 977 | 	Year = {2002}}
 978 | 
 979 | @article{james2003clustering,
 980 | 	Author = {James, Gareth M and Sugar, Catherine A},
 981 | 	Journal = {Journal of the American Statistical Association},
 982 | 	Number = {462},
 983 | 	Pages = {397--408},
 984 | 	Publisher = {American Statistical Association},
 985 | 	Title = {Clustering for sparsely sampled functional data},
 986 | 	Volume = {98},
 987 | 	Year = {2003}}
 988 | 
 989 | @article{yao2005functional,
 990 | 	Author = {Yao, Fang and M{\"u}ller, Hans-Georg and Wang, Jane-Ling},
 991 | 	Journal = {Journal of the American Statistical Association},
 992 | 	Number = {470},
 993 | 	Pages = {577--590},
 994 | 	Publisher = {Taylor \& Francis},
 995 | 	Title = {Functional data analysis for sparse longitudinal data},
 996 | 	Volume = {100},
 997 | 	Year = {2005}}
 998 | 
 999 | @article{james2000principal,
1000 | 	Author = {James, Gareth M and Hastie, Trevor J and Sugar, Catherine A},
1001 | 	Journal = {Biometrika},
1002 | 	Number = {3},
1003 | 	Pages = {587--602},
1004 | 	Publisher = {Biometrika Trust},
1005 | 	Title = {Principal component models for sparse functional data},
1006 | 	Volume = {87},
1007 | 	Year = {2000}}
1008 | 
1009 | @article{hastie1995penalized,
1010 | 	Author = {Hastie, Trevor and Buja, Andreas and Tibshirani, Robert},
1011 | 	Journal = {The Annals of Statistics},
1012 | 	Pages = {73--102},
1013 | 	Publisher = {JSTOR},
1014 | 	Title = {Penalized discriminant analysis},
1015 | 	Year = {1995}}
1016 | 
1017 | @article{martin2002dynamic,
1018 | 	Author = {Martin, Andrew D and Quinn, Kevin M},
1019 | 	Journal = {Political Analysis},
1020 | 	Number = {2},
1021 | 	Pages = {134--153},
1022 | 	Publisher = {SPM-PMSAPSA},
1023 | 	Title = {Dynamic ideal point estimation via Markov chain Monte Carlo for the US Supreme Court, 1953--1999},
1024 | 	Volume = {10},
1025 | 	Year = {2002}}
1026 | 
1027 | @article{candes2009robust,
1028 | 	Author = {Cand{\`e}s, Emmanuel J and Li, Xiaodong and Ma, Yi and Wright, John},
1029 | 	Journal = {arXiv preprint arXiv:0912.3599},
1030 | 	Title = {Robust principal component analysis?},
1031 | 	Year = {2009}}
1032 | 
1033 | @article{yee2003reduced,
1034 | 	Author = {Yee, Thomas W and Hastie, Trevor J},
1035 | 	Journal = {Statistical modelling},
1036 | 	Number = {1},
1037 | 	Pages = {15--41},
1038 | 	Publisher = {SAGE Publications},
1039 | 	Title = {Reduced-rank vector generalized linear models},
1040 | 	Volume = {3},
1041 | 	Year = {2003}}
1042 | 
1043 | @article{mazumder2010spectral,
1044 | 	Author = {Mazumder, Rahul and Hastie, Trevor and Tibshirani, Robert},
1045 | 	Journal = {The Journal of Machine Learning Research},
1046 | 	Pages = {2287--2322},
1047 | 	Publisher = {MIT Press},
1048 | 	Title = {Spectral regularization algorithms for learning large incomplete matrices},
1049 | 	Volume = {99},
1050 | 	Year = {2010}}
1051 | 
1052 | @article{argyriou2008convex,
1053 | 	Author = {Argyriou, Andreas and Evgeniou, Theodoros and Pontil, Massimiliano},
1054 | 	Journal = {Machine Learning},
1055 | 	Number = {3},
1056 | 	Pages = {243--272},
1057 | 	Publisher = {Springer},
1058 | 	Title = {Convex multi-task feature learning},
1059 | 	Volume = {73},
1060 | 	Year = {2008}}
1061 | 
1062 | @article{Jaggi:2010tz,
1063 | 	Author = {Jaggi, Martin and Sulovsk{\'{y}}, Marek},
1064 | 	Citeulike-Article-Id = {9502074},
1065 | 	Citeulike-Linkout-0 = {http://www.icml2010.org/papers/196.pdf},
1066 | 	Journal = {ICML 2010: Proceedings of the 27th international conference on Machine learning},
1067 | 	Posted-At = {2011-07-04 10:29:20},
1068 | 	Priority = {2},
1069 | 	Title = {{A Simple Algorithm for Nuclear Norm Regularized Problems}},
1070 | 	Url = {http://www.icml2010.org/papers/196.pdf},
1071 | 	Year = {2010},
1072 | 	Bdsk-Url-1 = {http://www.icml2010.org/papers/196.pdf}}
1073 | 
1074 | @article{tfocs,
1075 | 	Author = {Becker, StephenR. and Cand{\`e}s, EmmanuelJ. and Grant, MichaelC.},
1076 | 	Doi = {10.1007/s12532-011-0029-5},
1077 | 	Issn = {1867-2949},
1078 | 	Journal = {Mathematical Programming Computation},
1079 | 	Keywords = {Optimal first-order methods; Nesterov's accelerated descent algorithms; Proximal algorithms; Conic duality; Smoothing by conjugation; The Dantzig selector; The LASSO; Nuclear-norm minimization; 90C05; 90C06; 90C25; 62J077},
1080 | 	Language = {English},
1081 | 	Number = {3},
1082 | 	Pages = {165-218},
1083 | 	Publisher = {Springer-Verlag},
1084 | 	Title = {Templates for convex cone problems with applications to sparse signal recovery},
1085 | 	Url = {http://dx.doi.org/10.1007/s12532-011-0029-5},
1086 | 	Volume = {3},
1087 | 	Year = {2011},
1088 | 	Bdsk-Url-1 = {http://dx.doi.org/10.1007/s12532-011-0029-5}}
1089 | 
1090 | @inproceedings{ICML2013_jaggi13,
1091 | 	Abstract = {We provide stronger and more general primal-dual convergence results for Frank-Wolfe-type algorithms (a.k.a. conditional gradient) for constrained convex optimization, enabled by a simple framework of duality gap certificates. Our analysis also holds if the linear subproblems are only solved approximately (as well as if the gradients are inexact), and is proven to be worst-case optimal in the sparsity of the obtained solutions. On the application side, this allows us to unify a large variety of existing sparse greedy methods, in particular for optimization over convex hulls of an atomic set, even if those sets can only be approximated, including sparse (or structured sparse) vectors or matrices, low-rank matrices, permutation matrices, or max-norm bounded matrices. We present a new general framework for convex optimization over matrix factorizations, where every Frank-Wolfe iteration will consist of a low-rank update, and discuss the broad application areas of this approach.},
1092 | 	Author = {Martin Jaggi},
1093 | 	Booktitle = {Proceedings of the 30th International Conference on Machine Learning (ICML-13)},
1094 | 	Editor = {Sanjoy Dasgupta and David Mcallester},
1095 | 	Number = {1},
1096 | 	Pages = {427-435},
1097 | 	Publisher = {JMLR Workshop and Conference Proceedings},
1098 | 	Title = {Revisiting {Frank-Wolfe}: Projection-Free Sparse Convex Optimization},
1099 | 	Url = {http://jmlr.csail.mit.edu/proceedings/papers/v28/jaggi13.pdf},
1100 | 	Volume = {28},
1101 | 	Year = {2013},
1102 | 	Bdsk-Url-1 = {http://jmlr.csail.mit.edu/proceedings/papers/v28/jaggi13.pdf}}
1103 | 
1104 | @techreport{nest-07,
1105 | 	Author = {Y. Nesterov},
1106 | 	Institution = {Center for Operations Research and Econometrics (CORE), Catholic University of Louvain},
1107 | 	Note = {Tech. Rep, 76},
1108 | 	Title = {Gradient methods for minimizing composite objective function},
1109 | 	Year = {2007}}
1110 | 
1111 | @article{nest_03,
1112 | 	Author = {Nesterov, Y.},
1113 | 	Journal = {Kluwer, Boston},
1114 | 	Title = {Introductory lectures on convex optimization: Basic course},
1115 | 	Year = {2003}}
1116 | 
1117 | @book{GVL83,
1118 | 	Author = {G. Golub and C. {Van Loan}},
1119 | 	Publisher = {Johns Hopkins University Press, Baltimore.},
1120 | 	Title = {Matrix Computations},
1121 | 	Year = {1983}}
1122 | 
1123 | @article{cox1958some,
1124 | 	Author = {Cox, David R},
1125 | 	Journal = {The Annals of Mathematical Statistics},
1126 | 	Pages = {357--372},
1127 | 	Publisher = {JSTOR},
1128 | 	Title = {Some problems connected with statistical inference},
1129 | 	Year = {1958}}
1130 | 
1131 | @misc{goodstein1989feynman,
1132 | 	Author = {Goodstein, DL},
1133 | 	Journal = {Physics Today},
1134 | 	Number = {2},
1135 | 	Pages = {70--75},
1136 | 	Publisher = {American Institute of Physics},
1137 | 	Title = {Richard P. Feynman, Teacher},
1138 | 	Volume = {42},
1139 | 	Year = {1989}}
1140 | 
1141 | @article{lehrer2010truth,
1142 | 	Author = {Lehrer, Jonah},
1143 | 	Journal = {The New Yorker},
1144 | 	Pages = {52},
1145 | 	Title = {The truth wears off},
1146 | 	Volume = {13},
1147 | 	Year = {2010}}
1148 | 
1149 | @book{tukey1994collected,
1150 | 	Author = {Tukey, John Wilder},
1151 | 	Publisher = {Chapman \& Hall/CRC},
1152 | 	Title = {The collected works of John W. Tukey: Multiple comparisons, 1948-1983},
1153 | 	Volume = {8},
1154 | 	Year = {1994}}
1155 | 
1156 | @article{benjamini1995controlling,
1157 | 	Author = {Benjamini, Yoav and Hochberg, Yosef},
1158 | 	Journal = {Journal of the Royal Statistical Society. Series B (Methodological)},
1159 | 	Pages = {289--300},
1160 | 	Publisher = {JSTOR},
1161 | 	Title = {Controlling the false discovery rate: a practical and powerful approach to multiple testing},
1162 | 	Year = {1995}}
1163 | 
1164 | @article{gelman2013garden,
1165 | 	Author = {Gelman, Andrew and Loken, Eric},
1166 | 	Journal = {Downloaded January},
1167 | 	Pages = {2014},
1168 | 	Title = {The garden of forking paths: Why multiple comparisons can be a problem, even when there is no ``fishing expedition'' or ``p-hacking'' and the research hypothesis was posited ahead of time},
1169 | 	Volume = {30},
1170 | 	Year = {2013}}
1171 | 
1172 | @article{benjamini2005false,
1173 | 	Author = {Benjamini, Yoav and Yekutieli, Daniel},
1174 | 	Journal = {Journal of the American Statistical Association},
1175 | 	Number = {469},
1176 | 	Pages = {71--81},
1177 | 	Publisher = {Taylor \& Francis},
1178 | 	Title = {False discovery rate--adjusted multiple confidence intervals for selected parameters},
1179 | 	Volume = {100},
1180 | 	Year = {2005}}
1181 | 
1182 | @article{rosenblatt2014selective,
1183 |   title={Selective correlations; not voodoo},
1184 |   author={Rosenblatt, JD and Benjamini, Yoav},
1185 |   journal={NeuroImage},
1186 |   volume={103},
1187 |   pages={401--410},
1188 |   year={2014},
1189 |   publisher={Elsevier}
1190 | }
1191 | 
1192 | @article{benjamini2010simultaneous,
1193 |   title={Simultaneous and selective inference: current successes and future challenges},
1194 |   author={Benjamini, Yoav},
1195 |   journal={Biometrical Journal},
1196 |   volume={52},
1197 |   number={6},
1198 |   pages={708--721},
1199 |   year={2010},
1200 |   publisher={Wiley Online Library}
1201 | }
1202 | 
1203 | @article{zhong2008bias,
1204 |   title={Bias-reduced estimators and confidence intervals for odds ratios in genome-wide association studies},
1205 |   author={Zhong, Hua and Prentice, Ross L},
1206 |   journal={Biostatistics},
1207 |   volume={9},
1208 |   number={4},
1209 |   pages={621--634},
1210 |   year={2008},
1211 |   publisher={Biometrika Trust}
1212 | }
1213 | 
1214 | @article{zollner2007overcoming,
1215 |   title={Overcoming the winner’s curse: estimating penetrance parameters from case-control data},
1216 |   author={Z{\"o}llner, Sebastian and Pritchard, Jonathan K},
1217 |   journal={The American Journal of Human Genetics},
1218 |   volume={80},
1219 |   number={4},
1220 |   pages={605--615},
1221 |   year={2007},
1222 |   publisher={Elsevier}
1223 | }
1224 | 
1225 | @article{cohen1989two,
1226 |   title={Two stage conditionally unbiased estimators of the selected mean},
1227 |   author={Cohen, Arthur and Sackrowitz, Harold B},
1228 |   journal={Statistics \& Probability Letters},
1229 |   volume={8},
1230 |   number={3},
1231 |   pages={273--278},
1232 |   year={1989},
1233 |   publisher={Elsevier}
1234 | }
1235 | 
1236 | @article{sampson2005drop,
1237 |   title={Drop-the-Losers Design: Normal Case},
1238 |   author={Sampson, Allan R and Sill, Michael W},
1239 |   journal={Biometrical Journal},
1240 |   volume={47},
1241 |   number={3},
1242 |   pages={257--268},
1243 |   year={2005},
1244 |   publisher={Wiley Online Library}
1245 | }
1246 | 
1247 | @article{sill2009drop,
1248 |   title={Drop-the-losers design: Binomial case},
1249 |   author={Sill, Michael W and Sampson, Allan R},
1250 |   journal={Computational statistics \& data analysis},
1251 |   volume={53},
1252 |   number={3},
1253 |   pages={586--595},
1254 |   year={2009},
1255 |   publisher={Elsevier}
1256 | }
1257 | 
1258 | @article{yekutieli2012adjusted,
1259 |   title={Adjusted Bayesian inference for selected parameters},
1260 |   author={Yekutieli, Daniel},
1261 |   journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
1262 |   volume={74},
1263 |   number={3},
1264 |   pages={515--541},
1265 |   year={2012},
1266 |   publisher={Wiley Online Library}
1267 | }
1268 | 
1269 | @article{dawid1994selection,
1270 |   title={Selection paradoxes of Bayesian inference},
1271 |   author={Dawid, AP},
1272 |   journal={Lecture Notes-Monograph Series},
1273 |   pages={211--220},
1274 |   year={1994},
1275 |   publisher={JSTOR}
1276 | }
1277 | 
1278 | @article{pakman2014exact,
1279 | 	Author = {Pakman, Ari and Paninski, Liam},
1280 | 	Journal = {Journal of Computational and Graphical Statistics},
1281 | 	Number = {2},
1282 | 	Pages = {518--542},
1283 | 	Publisher = {Taylor \& Francis},
1284 | 	Title = {Exact hamiltonian monte carlo for truncated multivariate gaussians},
1285 | 	Volume = {23},
1286 | 	Year = {2014}}
1287 | 
1288 | @inproceedings{lovasz2006fast,
1289 |   title={Fast algorithms for logconcave functions: Sampling, rounding, integration and optimization},
1290 |   author={Lov{\'a}sz, L{\'a}szl{\'o} and Vempala, Santosh},
1291 |   booktitle={Foundations of Computer Science, 2006. FOCS'06. 47th Annual IEEE Symposium on},
1292 |   pages={57--68},
1293 |   year={2006},
1294 |   organization={IEEE}
1295 | }
1296 | 
1297 | @article{berk2013valid,
1298 | 	Author = {Berk, Richard and Brown, Lawrence and Buja, Andreas and Zhang, Kai and Zhao, Linda},
1299 | 	Journal = {The Annals of Statistics},
1300 | 	Number = {2},
1301 | 	Pages = {802--837},
1302 | 	Publisher = {Institute of Mathematical Statistics},
1303 | 	Title = {Valid post-selection inference},
1304 | 	Volume = {41},
1305 | 	Year = {2013}}
1306 | 
1307 | @article{tibshirani1996regression,
1308 |   title={Regression shrinkage and selection via the lasso},
1309 |   author={Tibshirani, Robert},
1310 |   journal={Journal of the Royal Statistical Society. Series B (Methodological)},
1311 |   pages={267--288},
1312 |   year={1996},
1313 |   publisher={JSTOR}
1314 | }
1315 | 
1316 | @article{dezeure2015high,
1317 |   title={High-Dimensional Inference: Confidence Intervals, $ p $-Values and R-Software hdi},
1318 |   author={Dezeure, Ruben and B{\"u}hlmann, Peter and Meier, Lukas and Meinshausen, Nicolai and others},
1319 |   journal={Statistical Science},
1320 |   volume={30},
1321 |   number={4},
1322 |   pages={533--558},
1323 |   year={2015},
1324 |   publisher={Institute of Mathematical Statistics}
1325 | }
1326 | 
1327 | @article{van2014asymptotically,
1328 |   title={On asymptotically optimal confidence regions and tests for high-dimensional models},
1329 |   author={Van de Geer, Sara and B{\"u}hlmann, Peter and Ritov, Ya’acov and Dezeure, Ruben and others},
1330 |   journal={The Annals of Statistics},
1331 |   volume={42},
1332 |   number={3},
1333 |   pages={1166--1202},
1334 |   year={2014},
1335 |   publisher={Institute of Mathematical Statistics}
1336 | }
1337 | 
1338 | @article{javanmard2014hypothesis,
1339 |   title={Hypothesis testing in high-dimensional regression under the gaussian random design model: Asymptotic theory},
1340 |   author={Javanmard, Adel and Montanari, Andrea},
1341 |   journal={IEEE Transactions on Information Theory},
1342 |   volume={60},
1343 |   number={10},
1344 |   pages={6522--6554},
1345 |   year={2014},
1346 |   publisher={IEEE}
1347 | }
1348 | @article{ioannidis2005most,
1349 | 	Author = {Ioannidis, John PA},
1350 | 	Journal = {PLoS medicine},
1351 | 	Number = {8},
1352 | 	Pages = {e124},
1353 | 	Publisher = {Public Library of Science},
1354 | 	Title = {Why most published research findings are false},
1355 | 	Volume = {2},
1356 | 	Year = {2005}}
1357 | 
1358 | @article{weinstein2013selection,
1359 | 	Author = {Weinstein, Asaf and Fithian, William and Benjamini, Yoav},
1360 | 	Journal = {Journal of the American Statistical Association},
1361 | 	Number = {501},
1362 | 	Pages = {165--176},
1363 | 	Publisher = {Taylor \& Francis Group},
1364 | 	Title = {Selection adjusted confidence intervals with more power to determine the sign},
1365 | 	Volume = {108},
1366 | 	Year = {2013}}
1367 | 
1368 | @article{taylor2013tests,
1369 | 	Author = {Taylor, Jonathan and Loftus, Joshua and Tibshirani, Ryan and Tibshirani, Rob},
1370 | 	Journal = {arXiv preprint arXiv:1308.3020},
1371 | 	Title = {Tests in adaptive regression via the Kac-Rice formula},
1372 | 	Year = {2013}}
1373 | 
1374 | @article{tian2017asymptotics,
1375 |   title={Asymptotics of selective inference},
1376 |   author={Tian, Xiaoying and Taylor, Jonathan},
1377 |   journal={Scandinavian Journal of Statistics},
1378 |   year={2017},
1379 |   publisher={Wiley Online Library}
1380 | }
1381 | 
1382 | @article{rivera2013optimal,
1383 | 	Author = {Rivera, Camilo and Walther, Guenther},
1384 | 	Journal = {Scandinavian Journal of Statistics},
1385 | 	Number = {4},
1386 | 	Pages = {752--769},
1387 | 	Publisher = {Wiley Online Library},
1388 | 	Title = {Optimal detection of a jump in the intensity of a Poisson process or in a density with likelihood ratio statistics},
1389 | 	Volume = {40},
1390 | 	Year = {2013}}
1391 | 
1392 | @article{lee2016exact,
1393 |   title={Exact post-selection inference, with application to the lasso},
1394 |   author={Lee, Jason D and Sun, Dennis L and Sun, Yuekai and Taylor, Jonathan E},
1395 |   journal={The Annals of Statistics},
1396 |   volume={44},
1397 |   number={3},
1398 |   pages={907--927},
1399 |   year={2016},
1400 |   publisher={Institute of Mathematical Statistics}
1401 | }
1402 | 
1403 | @inproceedings{lee2014marginal,
1404 |   title={Exact post model selection inference for marginal screening},
1405 |   author={Lee, Jason D and Taylor, Jonathan E},
1406 |   booktitle={Advances in Neural Information Processing Systems},
1407 |   pages={136--144},
1408 |   year={2014}
1409 | }
1410 | 
1411 | @article{loftus2014significance,
1412 |   title={A significance test for forward stepwise model selection},
1413 |   author={Loftus, Joshua R and Taylor, Jonathan E},
1414 |   journal={arXiv preprint arXiv:1405.3920},
1415 |   year={2014}
1416 | }
1417 | 
1418 | @article{lockhart2014significance,
1419 | 	Author = {Lockhart, Richard and Taylor, Jonathan and Tibshirani, Ryan J and Tibshirani, Robert},
1420 | 	Journal = {The Annals of Statistics},
1421 | 	Number = {2},
1422 | 	Pages = {413--468},
1423 | 	Publisher = {Institute of Mathematical Statistics},
1424 | 	Title = {A significance test for the lasso (with discussion)},
1425 | 	Volume = {42},
1426 | 	Year = {2014}}
1427 | 
1428 | @article{tibshirani2014exact,
1429 |   title={Exact post-selection inference for sequential regression procedures},
1430 |   author={Tibshirani, Ryan J and Taylor, Jonathan and Lockhart, Richard and Tibshirani, Robert},
1431 |   journal={arXiv preprint arXiv:1401.3889},
1432 |   year={2014}
1433 | }
1434 | @article{g2013adaptive,
1435 | 	Author = {G'Sell, Max Grazier and Taylor, Jonathan and Tibshirani, Robert},
1436 | 	Journal = {arXiv preprint arXiv:1307.4765},
1437 | 	Title = {Adaptive testing for the graphical lasso},
1438 | 	Year = {2013}}
1439 | 
1440 | @article{simon2013estimating,
1441 | 	Author = {Simon, Noah and Simon, Richard},
1442 | 	Journal = {arXiv preprint arXiv:1311.3709},
1443 | 	Title = {On Estimating Many Means, Selection Bias, and the Bootstrap},
1444 | 	Year = {2013}}
1445 | 
1446 | @article{g2016sequential,
1447 |   title={Sequential selection procedures and false discovery rate control},
1448 |   author={G'Sell, Max Grazier and Wager, Stefan and Chouldechova, Alexandra and Tibshirani, Robert},
1449 |   journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
1450 |   volume={78},
1451 |   number={2},
1452 |   pages={423--444},
1453 |   year={2016},
1454 |   publisher={Wiley Online Library}
1455 | }
1456 | @article{brown1986fundamentals,
1457 | 	Author = {Brown, Lawrence D},
1458 | 	Journal = {Lecture Notes-monograph series},
1459 | 	Pages = {i--279},
1460 | 	Publisher = {JSTOR},
1461 | 	Title = {Fundamentals of statistical exponential families with applications in statistical decision theory},
1462 | 	Year = {1986}}
1463 | 
1464 | @article{wasserman2009high,
1465 |   title={High dimensional variable selection},
1466 |   author={Wasserman, Larry and Roeder, Kathryn},
1467 |   journal={Annals of statistics},
1468 |   volume={37},
1469 |   number={5A},
1470 |   pages={2178},
1471 |   year={2009},
1472 |   publisher={NIH Public Access}
1473 | }
1474 | 
1475 | @article{meinshausen2009p,
1476 |   title={P-values for high-dimensional regression},
1477 |   author={Meinshausen, Nicolai and Meier, Lukas and B{\"u}hlmann, Peter},
1478 |   journal={Journal of the American Statistical Association},
1479 |   volume={104},
1480 |   number={488},
1481 |   year={2009}
1482 | }
1483 | 
1484 | 
1485 | @article{buhlmann2013statistical,
1486 | 	Author = {B{\"u}hlmann, Peter and others},
1487 | 	Journal = {Bernoulli},
1488 | 	Number = {4},
1489 | 	Pages = {1212--1242},
1490 | 	Publisher = {Bernoulli Society for Mathematical Statistics and Probability},
1491 | 	Title = {Statistical significance in high-dimensional linear models},
1492 | 	Volume = {19},
1493 | 	Year = {2013}}
1494 | 
1495 | @article{yuan2007model,
1496 | 	Author = {Yuan, Ming and Lin, Yi},
1497 | 	Journal = {Biometrika},
1498 | 	Number = {1},
1499 | 	Pages = {19--35},
1500 | 	Publisher = {Biometrika Trust},
1501 | 	Title = {Model selection and estimation in the Gaussian graphical model},
1502 | 	Volume = {94},
1503 | 	Year = {2007}}
1504 | 
1505 | @article{friedman2008sparse,
1506 | 	Author = {Friedman, Jerome and Hastie, Trevor and Tibshirani, Robert},
1507 | 	Journal = {Biostatistics},
1508 | 	Number = {3},
1509 | 	Pages = {432--441},
1510 | 	Publisher = {Biometrika Trust},
1511 | 	Title = {Sparse inverse covariance estimation with the graphical lasso},
1512 | 	Volume = {9},
1513 | 	Year = {2008}}
1514 | 
1515 | @article{barnard1963discussion,
1516 |   title={Discussion of Professor Bartlett's paper},
1517 |   author={Barnard, GA},
1518 |   journal={Journal of the Royal Statistical Society},
1519 |   year={1963}
1520 | }
1521 | 
1522 | @article{jockel1986finite,
1523 |   title={Finite sample properties and asymptotic efficiency of Monte Carlo tests},
1524 |   author={Jockel, Karl-Heinz},
1525 |   journal={The annals of Statistics},
1526 |   pages={336--347},
1527 |   year={1986},
1528 |   publisher={JSTOR}
1529 | }
1530 | 
1531 | @article{besag1989generalized,
1532 | 	Author = {Besag, Julian and Clifford, Peter},
1533 | 	Journal = {Biometrika},
1534 | 	Number = {4},
1535 | 	Pages = {633--642},
1536 | 	Publisher = {Biometrika Trust},
1537 | 	Title = {Generalized monte carlo significance tests},
1538 | 	Volume = {76},
1539 | 	Year = {1989}}
1540 | 
1541 | @article{besag2001markov,
1542 |   title={Markov chain Monte Carlo for statistical inference},
1543 |   author={Besag, Julian},
1544 |   journal={Center for Statistics and the Social Sciences},
1545 |   year={2001},
1546 |   publisher={Citeseer}
1547 | }
1548 | 
1549 | @article{forster1996monte,
1550 | 	Author = {Forster, Jonathan J and McDonald, John W and Smith, Peter WF},
1551 | 	Journal = {Journal of the Royal Statistical Society. Series B (Methodological)},
1552 | 	Pages = {445--453},
1553 | 	Publisher = {JSTOR},
1554 | 	Title = {Monte Carlo exact conditional tests for log-linear and logistic models},
1555 | 	Year = {1996}}
1556 | 
1557 | @article{mehta2000efficient,
1558 | 	Author = {Mehta, Cyrus R and Patel, Nitin R and Senchaudhuri, Pralay},
1559 | 	Journal = {Journal of The American Statistical Association},
1560 | 	Number = {449},
1561 | 	Pages = {99--108},
1562 | 	Publisher = {Taylor \& Francis Group},
1563 | 	Title = {Efficient Monte Carlo methods for conditional logistic regression},
1564 | 	Volume = {95},
1565 | 	Year = {2000}}
1566 | 
1567 | @article{lehmann1955completeness,
1568 | 	Author = {Lehmann, EL and Scheff{\'e}, Henry},
1569 | 	Journal = {Sankhy{\=a}: The Indian Journal of Statistics (1933-1960)},
1570 | 	Number = {3},
1571 | 	Pages = {219--236},
1572 | 	Publisher = {JSTOR},
1573 | 	Title = {Completeness, similar regions, and unbiased estimation: Part II},
1574 | 	Volume = {15},
1575 | 	Year = {1955}}
1576 | 
1577 | @article{matthes1967tests,
1578 | 	Author = {Matthes, Ted K and Truax, Donald R},
1579 | 	Journal = {The Annals of Mathematical Statistics},
1580 | 	Pages = {681--697},
1581 | 	Publisher = {JSTOR},
1582 | 	Title = {Tests of composite hypotheses for the multivariate exponential family},
1583 | 	Year = {1967}}
1584 | 
1585 | @book{van2000asymptotic,
1586 | 	Author = {Van der Vaart, Aad W},
1587 | 	Publisher = {Cambridge university press},
1588 | 	Title = {Asymptotic statistics},
1589 | 	Volume = {3},
1590 | 	Year = {2000}}
1591 | 
1592 | @book{lehmann2005testing,
1593 | 	Author = {Lehmann, EL and Romano, Joseph P},
1594 | 	Publisher = {New York:. Springer},
1595 | 	Title = {Testing statistical hypotheses},
1596 | 	Year = {2005}}
1597 | 
1598 | @book{efron1993introduction,
1599 | 	Author = {Efron, Bradley and Tibshirani, Robert},
1600 | 	Publisher = {CRC press},
1601 | 	Title = {An introduction to the bootstrap},
1602 | 	Volume = {57},
1603 | 	Year = {1993}}
1604 | 
1605 | @article{janssen2000global,
1606 | 	Author = {Janssen, Arnold},
1607 | 	Journal = {Annals of Statistics},
1608 | 	Pages = {239--253},
1609 | 	Publisher = {JSTOR},
1610 | 	Title = {Global power functions of goodness of fit tests},
1611 | 	Year = {2000}}
1612 | 
1613 | @article{aronszajn1950theory,
1614 | 	Author = {Aronszajn, Nachman},
1615 | 	Journal = {Transactions of the American mathematical society},
1616 | 	Pages = {337--404},
1617 | 	Publisher = {JSTOR},
1618 | 	Title = {Theory of reproducing kernels},
1619 | 	Year = {1950}}
1620 | 
1621 | @article{vapnik1997support,
1622 | 	Author = {Vapnik, Vladimir and Golowich, Steven E and Smola, Alex},
1623 | 	Journal = {Advances in neural information processing systems},
1624 | 	Pages = {281--287},
1625 | 	Publisher = {MORGAN KAUFMANN PUBLISHERS},
1626 | 	Title = {Support vector method for function approximation, regression estimation, and signal processing},
1627 | 	Year = {1997}}
1628 | 
1629 | @article{wahba1999support,
1630 | 	Author = {Wahba, Grace and others},
1631 | 	Journal = {Advances in Kernel Methods-Support Vector Learning},
1632 | 	Pages = {69--87},
1633 | 	Publisher = {Citeseer},
1634 | 	Title = {Support vector machines, reproducing kernel Hilbert spaces and the randomized GACV},
1635 | 	Volume = {6},
1636 | 	Year = {1999}}
1637 | 
1638 | @article{fukumizu2004dimensionality,
1639 | 	Author = {Fukumizu, Kenji and Bach, Francis R and Jordan, Michael I},
1640 | 	Journal = {The Journal of Machine Learning Research},
1641 | 	Pages = {73--99},
1642 | 	Publisher = {JMLR. org},
1643 | 	Title = {Dimensionality reduction for supervised learning with reproducing kernel Hilbert spaces},
1644 | 	Volume = {5},
1645 | 	Year = {2004}}
1646 | 
1647 | @article{hedges1992modeling,
1648 |   title={Modeling publication selection effects in meta-analysis},
1649 |   author={Hedges, Larry V},
1650 |   journal={Statistical Science},
1651 |   pages={246--255},
1652 |   year={1992},
1653 |   publisher={JSTOR}
1654 | }
1655 | 
1656 | @article{hedges1984estimation,
1657 |   title={Estimation of effect size under nonrandom sampling: The effects of censoring studies yielding statistically insignificant mean differences},
1658 |   author={Hedges, Larry V},
1659 |   journal={Journal of Educational and Behavioral Statistics},
1660 |   volume={9},
1661 |   number={1},
1662 |   pages={61--85},
1663 |   year={1984},
1664 |   publisher={Sage Publications}
1665 | }
1666 | 
1667 | 
1668 | @misc{harris2014visualizing,
1669 | author = {Harris, Naftali},
1670 | title = {Visualizing lasso polytope geometry},
1671 | month=jun,
1672 | year = {2014},
1673 | url = {http://www.naftaliharris.com/blog/lasso-polytope-geometry/}
1674 | }
1675 | 
1676 | @article{kiefer1977conditional,
1677 |   title={Conditional confidence statements and confidence estimators},
1678 |   author={Kiefer, Jack},
1679 |   journal={Journal of the American Statistical Association},
1680 |   volume={72},
1681 |   number={360a},
1682 |   pages={789--808},
1683 |   year={1977},
1684 |   publisher={Taylor \& Francis}
1685 | }
1686 | 
1687 | @article{kiefer1976admissibility,
1688 |   title={Admissibility of conditional confidence procedures},
1689 |   author={Kiefer, Jack},
1690 |   journal={The Annals of Statistics},
1691 |   pages={836--865},
1692 |   year={1976},
1693 |   publisher={JSTOR}
1694 | }
1695 | 
1696 | @article{brownie1977ideas,
1697 |   title={The ideas of conditional confidence in the simplest setting},
1698 |   author={Brownie, C and Kiefer, J},
1699 |   journal={Communications in Statistics-Theory and Methods},
1700 |   volume={6},
1701 |   number={8},
1702 |   pages={691--751},
1703 |   year={1977},
1704 |   publisher={Taylor \& Francis}
1705 | }
1706 | 
1707 | @article{brown1978contribution,
1708 |   title={A contribution to Kiefer's theory of conditional confidence procedures},
1709 |   author={Brown, Lawrence D},
1710 |   journal={The Annals of Statistics},
1711 |   pages={59--71},
1712 |   year={1978},
1713 |   publisher={JSTOR}
1714 | }
1715 | 
1716 | @article{berger1994unified,
1717 |   title={A unified conditional frequentist and Bayesian test for fixed and sequential simple hypothesis testing},
1718 |   author={Berger, James O and Brown, Lawrence D and Wolpert, Robert L},
1719 |   journal={The Annals of Statistics},
1720 |   pages={1787--1807},
1721 |   year={1994},
1722 |   publisher={JSTOR}
1723 | }
1724 | 
1725 | @article{leeb2005model,
1726 |   title={Model selection and inference: Facts and fiction},
1727 |   author={Leeb, Hannes and P{\"o}tscher, Benedikt M},
1728 |   journal={Econometric Theory},
1729 |   volume={21},
1730 |   number={01},
1731 |   pages={21--59},
1732 |   year={2005},
1733 |   publisher={Cambridge Univ Press}
1734 | }
1735 | 
1736 | @article{leeb2006can,
1737 |   title={Can one estimate the conditional distribution of post-model-selection estimators?},
1738 |   author={Leeb, Hannes and P{\"o}tscher, Benedikt M},
1739 |   journal={The Annals of Statistics},
1740 |   pages={2554--2591},
1741 |   year={2006},
1742 |   publisher={JSTOR}
1743 | }
1744 | 
1745 | @article{leeb2008can,
1746 |   title={Can one estimate the unconditional distribution of post-model-selection estimators?},
1747 |   author={Leeb, Hannes and P{\"o}tscher, Benedikt M},
1748 |   journal={Econometric Theory},
1749 |   volume={24},
1750 |   number={02},
1751 |   pages={338--376},
1752 |   year={2008},
1753 |   publisher={Cambridge Univ Press}
1754 | }
1755 | 
1756 | @article{belloni2011inference,
1757 |   title={Inference for high-dimensional sparse econometric models},
1758 |   author={Belloni, Alexandre and Chernozhukov, Victor and Hansen, Christian},
1759 |   journal={arXiv preprint arXiv:1201.0220},
1760 |   year={2011}
1761 | }
1762 | 
1763 | @article{belloni2014inference,
1764 |   title={Inference on treatment effects after selection among high-dimensional controls},
1765 |   author={Belloni, Alexandre and Chernozhukov, Victor and Hansen, Christian},
1766 |   journal={The Review of Economic Studies},
1767 |   volume={81},
1768 |   number={2},
1769 |   pages={608--650},
1770 |   year={2014},
1771 |   publisher={Oxford University Press}
1772 | }
1773 | 
1774 | @article{zhang2014confidence,
1775 |   title={Confidence intervals for low dimensional parameters in high dimensional linear models},
1776 |   author={Zhang, Cun-Hui and Zhang, Stephanie S},
1777 |   journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
1778 |   volume={76},
1779 |   number={1},
1780 |   pages={217--242},
1781 |   year={2014},
1782 |   publisher={Wiley Online Library}
1783 | }
1784 | 
1785 | @article{scheffe1953method,
1786 |   title={A method for judging all contrasts in the analysis of variance},
1787 |   author={Scheffe, Henry},
1788 |   journal={Biometrika},
1789 |   volume={40},
1790 |   number={1-2},
1791 |   pages={87--110},
1792 |   year={1953},
1793 |   publisher={Biometrika Trust}
1794 | }
1795 | 
1796 | @article{hung2016rank,
1797 |   title={Rank Verification for Exponential Families},
1798 |   author={Hung, Kenneth and Fithian, William},
1799 |   journal={arXiv preprint arXiv:1610.03944},
1800 |   year={2016}
1801 | }
1802 | 
1803 | @article{fithian2015adaptive,
1804 |   title={Adaptive sequential model selection},
1805 |   author={Fithian, William and Taylor, Jonathan and Tibshirani, Robert and Tibshirani, Ryan},
1806 |   journal={arXiv preprint arXiv:1512.02565},
1807 |   year={2015}
1808 | }
1809 | 
1810 | @article{taylor2016post,
1811 |   title={Post-selection inference for l1-penalized likelihood models},
1812 |   author={Taylor, Jonathan and Tibshirani, Robert},
1813 |   journal={arXiv preprint arXiv:1602.07358},
1814 |   year={2016}
1815 | }
1816 | 
1817 | @article{taylor2015statistical,
1818 |   title={Statistical learning and selective inference},
1819 |   author={Taylor, Jonathan and Tibshirani, Robert J},
1820 |   journal={Proceedings of the National Academy of Sciences},
1821 |   volume={112},
1822 |   number={25},
1823 |   pages={7629--7634},
1824 |   year={2015},
1825 |   publisher={National Acad Sciences}
1826 | }
1827 | 
1828 | @article{tian2015selective,
1829 |   title={Selective inference with a randomized response},
1830 |   author={Tian, Xiaoying and Taylor, Jonathan E},
1831 |   journal={arXiv preprint arXiv:1507.06739},
1832 |   year={2015}
1833 | }
1834 | 
1835 | @article{tibshirani2015uniform,
1836 |   title={Uniform asymptotic inference and the bootstrap after model selection},
1837 |   author={Tibshirani, Ryan J and Rinaldo, Alessandro and Tibshirani, Robert and Wasserman, Larry},
1838 |   journal={arXiv preprint arXiv:1506.06266},
1839 |   year={2015}
1840 | }
1841 | 
1842 | @inproceedings{orchard1972missing,
1843 |   title={A missing information principle: theory and applications},
1844 |   author={Orchard, Terence and Woodbury, Max A and others},
1845 |   booktitle={Proceedings of the 6th Berkeley Symposium on mathematical statistics and probability},
1846 |   volume={1},
1847 |   pages={697--715},
1848 |   year={1972},
1849 |   organization={University of California Press Berkeley, CA}
1850 | }


--------------------------------------------------------------------------------