├── .coveragerc
├── .gitignore
├── .gitmodules
├── .readthedocs.yml
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.rst
├── THANKS
├── TODO
├── _config.yml
├── appveyor.yml
├── constraints.txt
├── core.py
├── cythexts.py
├── dev-requirements.txt
├── doc-requirements.txt
├── doc
    ├── Makefile
    ├── adjusted_MLE
    │   ├── __init__.py
    │   ├── sampler_based_quantiles.py
    │   └── tests
    │   │   ├── __init__.py
    │   │   ├── comparison_metrics.py
    │   │   ├── risk_comparisons.py
    │   │   ├── test_compare_sampler_mle.py
    │   │   ├── test_cv_MLE_inference.py
    │   │   └── test_risk.py
    ├── examples
    │   ├── compute_coverages.rst
    │   ├── conditional_sampling.py
    │   ├── hiv_approx_ci.py
    │   └── power_comparison.py
    ├── learning_examples
    │   ├── BH
    │   │   ├── gbm_targets_BH.py
    │   │   ├── gbm_targets_BH_larger.py
    │   │   ├── gbm_targets_BH_smallB.py
    │   │   ├── keras_targets_BH.py
    │   │   ├── keras_targets_BH_marginal.py
    │   │   ├── logit_targets_BH.py
    │   │   ├── logit_targets_BH_marginal.py
    │   │   ├── logit_targets_BH_single.py
    │   │   └── random_forest_targets_BH.py
    │   ├── HIV
    │   │   ├── CV.py
    │   │   ├── HIV_scale_CV.py
    │   │   ├── NRTI_DATA.txt
    │   │   ├── fixed.py
    │   │   ├── lambda_1se.py
    │   │   ├── stability_CV.py
    │   │   ├── stability_CV_6000.py
    │   │   ├── stability_CV_6000_null.py
    │   │   └── stability_selection.py
    │   ├── bootstrap
    │   │   ├── test_boot.py
    │   │   └── test_boot_scale1.py
    │   ├── calibration
    │   │   └── lasso_calibration.py
    │   ├── cross_inference
    │   │   └── cross_inference.py
    │   ├── keras
    │   │   ├── keras_example.py
    │   │   ├── keras_targets.py
    │   │   ├── keras_targets_BH_strong.py
    │   │   ├── keras_targets_BH_weak.py
    │   │   ├── keras_targets_medium.py
    │   │   └── keras_targets_small.py
    │   ├── knockoffs
    │   │   ├── knockoff_followup.py
    │   │   ├── knockoff_kernel.py
    │   │   └── knockoff_kernel_multi.py
    │   ├── lasso
    │   │   └── lasso_example.py
    │   ├── lasso_CV
    │   │   ├── followup.py
    │   │   ├── lasso_exact_CV_null.py
    │   │   └── lasso_example_CV.py
    │   ├── multi_target
    │   │   ├── additive_targets.py
    │   │   ├── additive_targets_small.py
    │   │   ├── followup_multi.py
    │   │   ├── gbm2.py
    │   │   ├── gbm_targets.py
    │   │   ├── gbm_targets_small.py
    │   │   ├── lasso_example_multi.py
    │   │   ├── lasso_example_multi_CV.py
    │   │   ├── lasso_example_multi_CV_random.py
    │   │   ├── lasso_example_multi_CV_stronger.py
    │   │   ├── lasso_example_multi_bigger.py
    │   │   ├── lasso_example_multi_gbm.py
    │   │   ├── lasso_example_multi_gbm_sk.py
    │   │   ├── lasso_example_multi_random.py
    │   │   ├── lasso_example_multi_random_gbm.py
    │   │   ├── lasso_example_multi_random_rf.py
    │   │   ├── lasso_example_multi_rf.py
    │   │   ├── lasso_example_multi_rf_sk.py
    │   │   ├── lee_multi.py
    │   │   ├── lee_multi_500.py
    │   │   └── lee_multi_bigger.py
    │   ├── parametric
    │   │   ├── lasso_selected.py
    │   │   ├── lasso_selected_resid.py
    │   │   ├── probit_step.py
    │   │   └── probit_step_both.py
    │   ├── riboflavin
    │   │   ├── CV.py
    │   │   └── CV_smaller.py
    │   ├── stability
    │   │   ├── stability_selection.py
    │   │   ├── stability_selection_harder.py
    │   │   └── stability_selection_harder_big.py
    │   └── standalone
    │   │   ├── basic_example.py
    │   │   ├── cleaner_basic_example.py
    │   │   ├── full_model_example.py
    │   │   ├── regression_example.py
    │   │   └── replicate_basic_example.py
    ├── license.rst
    ├── notebooks
    │   ├── Group LASSO Jacobian.Rmd
    │   ├── Group LASSO Jacobian.ipynb
    │   ├── UMPU.ipynb
    │   ├── isotonic.ipynb
    │   ├── lasso.ipynb
    │   ├── learning
    │   │   ├── Different pivots.ipynb
    │   │   ├── Multiple events in algorithm.ipynb
    │   │   ├── Multiple events not monotone.ipynb
    │   │   ├── Multiple randomization with fitting.ipynb
    │   │   ├── Multiple randomization with fitting_boot.ipynb
    │   │   ├── Multiple randomization.ipynb
    │   │   ├── Non convex region II.ipynb
    │   │   ├── Non convex region.ipynb
    │   │   ├── simple_example_pivots.pdf
    │   │   └── simple_example_sel_prob.pdf
    │   ├── pca_rank1.ipynb
    │   ├── quadratic_decisions.ipynb
    │   ├── reduced_covtest.ipynb
    │   ├── screening.ipynb
    │   ├── selection_objects.ipynb
    │   └── spacings.ipynb
    └── source
    │   ├── _static
    │       ├── logo.png
    │       └── selection.css
    │   ├── _templates
    │       └── layout.html
    │   ├── algorithms
    │       ├── covtest.Rmd
    │       ├── covtest.ipynb
    │       ├── index.rst
    │       ├── spacings.rst
    │       └── spacings_files
    │       │   ├── spacings_23_0.png
    │       │   ├── spacings_25_0.png
    │       │   ├── spacings_27_0.png
    │       │   ├── spacings_29_0.png
    │       │   ├── spacings_31_0.png
    │       │   ├── spacings_3_0.png
    │       │   ├── spacings_4_0.png
    │       │   ├── spacings_5_0.png
    │       │   ├── spacings_6_0.png
    │       │   ├── spacings_7_0.png
    │       │   └── spacings_9_0.png
    │   ├── conf.py
    │   ├── docattribute.rst
    │   ├── documentation.rst
    │   ├── download.rst
    │   ├── index.rst
    │   ├── learning
    │       ├── Learning1.Rmd
    │       ├── Learning1.ipynb
    │       ├── Learning2.Rmd
    │       ├── Learning2.ipynb
    │       └── index.rst
    │   ├── license.rst
    │   ├── links_names.txt
    │   ├── randomized
    │       ├── index.rst
    │       ├── lasso.Rmd
    │       └── lasso.ipynb
    │   └── sphinxext
    │       └── math_dollar.py
├── figs
    ├── pictures.r
    └── voronoi_figs.py
├── lasso_example_null_CV.py
├── requirements.txt
├── sandbox
    ├── SPRT.ipynb
    ├── absurd.py
    ├── bayesian
    │   ├── __init__.py
    │   ├── crime_data_attempt.py
    │   ├── crime_data_set.py
    │   ├── dual_bayesian.py
    │   ├── dual_lasso_test.py
    │   ├── hiv_inference.py
    │   ├── lasso_selection.py
    │   ├── logistic_bayesian.py
    │   ├── mixed_model.py
    │   ├── ms_lasso_2stage.py
    │   ├── random_reduced_lasso_bayesian_model.py
    │   ├── random_reduced_lasso_test.py
    │   ├── random_reduced_logistic_test.py
    │   ├── read_file.py
    │   ├── reduced_forward_stepwise_test.py
    │   ├── reduced_lasso_bayesian_model.py
    │   └── reduced_marginal_screening.py
    ├── inference_hiv_data.py
    ├── isotonic.py
    ├── kmeans.py
    ├── multi_forward_step.py
    ├── multistep.ipynb
    ├── randomized2.py
    ├── randomized_tests
    │   ├── test_estimation.py
    │   ├── test_greedy_step.py
    │   ├── test_marginalize_subgrad.py
    │   ├── test_multiple_queries.py
    │   ├── test_multiple_queries_CI.py
    │   ├── test_nonrandomized.py
    │   ├── test_randomization_to_zero.py
    │   ├── test_reconstruction.py
    │   ├── test_scaling.py
    │   ├── test_threshold_score.py
    │   └── test_without_screening.py
    ├── sample_splitting.ipynb
    ├── sample_splitting.py
    ├── sample_splitting_alex.py
    ├── sample_splitting_alex_null.py
    ├── tensorflow_test.py
    ├── test_cover.py
    ├── test_isotonic.py
    ├── test_variance.py
    └── variance_estimation.py
├── selectinf
    ├── __init__.py
    ├── _version.py
    ├── algorithms
    │   ├── __init__.py
    │   ├── api.py
    │   ├── change_point.py
    │   ├── covtest.py
    │   ├── cv.py
    │   ├── cv_glmnet.py
    │   ├── debiased_lasso.py
    │   ├── debiased_lasso_utils.pyx
    │   ├── forward_step.py
    │   ├── lasso.py
    │   ├── pca.py
    │   ├── screening.py
    │   ├── softmax.py
    │   ├── sqrt_lasso.py
    │   ├── stopping_rules.py
    │   └── tests
    │   │   ├── __init__.py
    │   │   ├── test_IC.py
    │   │   ├── test_ROSI.py
    │   │   ├── test_change_point.py
    │   │   ├── test_compareR.py
    │   │   ├── test_covtest.py
    │   │   ├── test_data_carving.py
    │   │   ├── test_debiased_lasso.py
    │   │   ├── test_forward_step.py
    │   │   ├── test_lasso.py
    │   │   ├── test_screening.py
    │   │   ├── test_softmax.py
    │   │   └── test_sqrt_lasso.py
    ├── api.py
    ├── base.py
    ├── constraints
    │   ├── __init__.py
    │   ├── affine.py
    │   ├── api.py
    │   ├── base.py
    │   ├── estimation.py
    │   ├── intervals.py
    │   ├── quadratic.py
    │   ├── quasi_affine.py
    │   └── tests
    │   │   ├── __init__.py
    │   │   ├── test_affine.py
    │   │   ├── test_estimation.py
    │   │   ├── test_quadratic_tests.py
    │   │   ├── test_quasi.py
    │   │   └── test_unknown_sigma.py
    ├── distributions
    │   ├── __init__.py
    │   ├── api.py
    │   ├── chain.py
    │   ├── chisq.py
    │   ├── discrete_family.py
    │   ├── discrete_multiparameter.py
    │   ├── intervals.py
    │   ├── pvalue.py
    │   └── tests
    │   │   ├── __init__.py
    │   │   ├── test_chains.py
    │   │   ├── test_discreteExFam.py
    │   │   └── test_multiparameter.py
    ├── glm.py
    ├── info.py
    ├── learning
    │   ├── Rfitters.py
    │   ├── Rutils.py
    │   ├── __init__.py
    │   ├── core.py
    │   ├── fitters.py
    │   ├── keras_fit.py
    │   ├── learners.py
    │   ├── samplers.py
    │   └── utils.py
    ├── randomized
    │   ├── __init__.py
    │   ├── api.py
    │   ├── cv_view.py
    │   ├── group_lasso.py
    │   ├── lasso.py
    │   ├── modelQ.py
    │   ├── query.py
    │   ├── randomization.py
    │   ├── sandbox
    │   │   ├── M_estimator_group_lasso.py
    │   │   ├── M_estimator_nonrandom.py
    │   │   ├── convenience.py
    │   │   ├── general_lasso.py
    │   │   ├── greedy_step.py
    │   │   ├── group_lasso.py
    │   │   └── lasso_iv.py
    │   ├── screening.py
    │   ├── selective_MLE_utils.pyx
    │   ├── slope.py
    │   └── tests
    │   │   ├── __init__.py
    │   │   ├── sandbox
    │   │       ├── test_Mest.py
    │   │       ├── test_convenience.py
    │   │       ├── test_cv.py
    │   │       ├── test_cv_corrected_nonrandomized_lasso.py
    │   │       ├── test_cv_glmnet.py
    │   │       ├── test_cv_lee_et_al.py
    │   │       ├── test_decompose_subgrad.py
    │   │       ├── test_fixedX.py
    │   │       ├── test_full_lasso.py
    │   │       ├── test_general_lasso.py
    │   │       ├── test_general_lasso_pval.py
    │   │       ├── test_intervals.py
    │   │       ├── test_lasso_iv.py
    │   │       ├── test_multiple_splits.py
    │   │       ├── test_opt_weighted_intervals.py
    │   │       ├── test_optimization_sampler.py
    │   │       ├── test_sampling.py
    │   │       ├── test_split.py
    │   │       ├── test_split_compare.py
    │   │       └── test_sqrt_lasso.py
    │   │   ├── test_BH.py
    │   │   ├── test_group_lasso.py
    │   │   ├── test_lasso.py
    │   │   ├── test_marginal_screening.py
    │   │   ├── test_modelQ.py
    │   │   ├── test_multiple_queries.py
    │   │   ├── test_naive.py
    │   │   ├── test_randomization.py
    │   │   ├── test_selective_MLE.py
    │   │   ├── test_selective_MLE_high.py
    │   │   ├── test_selective_MLE_onedim.py
    │   │   ├── test_slope.py
    │   │   ├── test_slope_subgrad.py
    │   │   ├── test_split_lasso.py
    │   │   └── test_topK.py
    ├── reduced_optimization
    │   └── tests
    │   │   └── __init__.py
    ├── sampling
    │   ├── __init__.py
    │   ├── api.py
    │   ├── langevin.py
    │   ├── sequential.py
    │   ├── sqrt_lasso.pyx
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── plots_fs.py
    │   │   ├── test_fstep_langevin.py
    │   │   ├── test_kfstep.py
    │   │   ├── test_pca_langevin.py
    │   │   ├── test_sample_sphere.py
    │   │   └── test_sequential.py
    │   ├── truncnorm.pyx
    │   └── truncnorm_quadratic.pyx
    ├── sandbox
    │   ├── approx_ci
    │   │   ├── __init__.py
    │   │   ├── ci_approx_density.py
    │   │   ├── ci_approx_greedy_step.py
    │   │   ├── selection_map.py
    │   │   └── tests
    │   │   │   ├── __init__.py
    │   │   │   ├── test_glm.py
    │   │   │   ├── test_greedy_step.py
    │   │   │   └── test_threshold_score.py
    │   └── bayesian
    │   │   ├── __init__.py
    │   │   ├── barrier.py
    │   │   ├── credible_intervals.py
    │   │   ├── dual_lasso.py
    │   │   ├── estimator.py
    │   │   ├── forward_stepwise_reduced.py
    │   │   ├── initial_soln.py
    │   │   ├── lasso_reduced.py
    │   │   ├── marginal_screening_reduced.py
    │   │   ├── ms_lasso_2stage_reduced.py
    │   │   ├── par_carved_reduced.py
    │   │   ├── par_random_lasso_reduced.py
    │   │   ├── random_lasso_reduced.py
    │   │   └── tests
    │   │       ├── __init__.py
    │   │       ├── test_carved_lasso.py
    │   │       ├── test_dual_lasso.py
    │   │       ├── test_fs.py
    │   │       ├── test_lasso.py
    │   │       └── test_ms_lasso_2stage.py
    ├── src_C
    │   ├── #sample_preparation.pyx#
    │   ├── HmcSampler.cpp
    │   ├── HmcSampler.h
    │   ├── logfile.txt
    │   ├── preparation_Eig_Vect.cpp
    │   ├── preparation_Eig_Vect.h
    │   ├── sample_preparation.cpp
    │   ├── sample_preparation.pyx
    │   └── setup.py
    ├── tests
    │   ├── __init__.py
    │   ├── decorators.py
    │   ├── flags.py
    │   ├── instance.py
    │   ├── test_instance.py
    │   └── tests.py
    ├── truncated
    │   ├── F.py
    │   ├── T.py
    │   ├── __init__.py
    │   ├── api.py
    │   ├── base.py
    │   ├── chi.py
    │   ├── gaussian.py
    │   └── tests
    │   │   ├── __init__.py
    │   │   ├── test_truncated.py
    │   │   └── test_truncatedFT.py
    └── utils
    │   ├── __init__.py
    │   └── tools.py
├── setup.cfg
├── setup.py
├── setup_helpers.py
├── tools
    ├── apigen.py
    ├── build_modref_templates.py
    ├── gitwash_dumper.py
    ├── nbtools.py
    ├── noseall_with_coverage
    └── strip_notebook.py
├── umpu
    ├── UMAU.pdf
    ├── umpu.r
    └── umpuWriteup.tex
└── versioneer.py


/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | branch = True
3 | source = selection
4 | include = */selection/*
5 | omit =
6 |     */setup.py
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | */*pyc
 2 | */*~
 3 | */*/*~
 4 | */*/*/*~
 5 | */*.out
 6 | */*.aux
 7 | */*.bbl
 8 | */*.blg
 9 | */*.vrb
10 | */*.synctex*
11 | */*.toc
12 | */*.snm
13 | */*.odt
14 | */*.ps
15 | */*.eps
16 | */*.dvi
17 | */*.log
18 | */*.nav
19 | */*.bak
20 | */*.vrb
21 | */*.pyc
22 | */*/*.pyc
23 | *.pyc
24 | selectinf/*.so
25 | selectinf/*.c
26 | selectinf/*/*.so
27 | selectinf/*/*.c
28 | build
29 | *ipynb_checkpoints
30 | */*ipynb_checkpoints
31 | .idea/*
32 | */.idea/*
33 | */*/.idea/*
34 | *.log
35 | *~
36 | .*sw*
37 | */*~
38 | *pyc
39 | */*pyc
40 | *.pdf
41 | *.csv
42 | doc/source/api/generated/*
43 | docs/source/api/generated/*
44 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "travis-tools"]
2 | 	path = travis-tools
3 | 	url = https://github.com/matthew-brett/travis-tools.git
4 | [submodule "C-software"]
5 | 	path = C-software
6 | 	url = https://github.com/selective-inference/C-software.git
7 | 
8 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Build documentation in the docs/ directory with Sphinx
 9 | sphinx:
10 |   builder: html
11 |   configuration: doc/source/conf.py
12 | 
13 | # Build documentation with MkDocs
14 | #mkdocs:
15 | #  configuration: mkdocs.yml
16 | 
17 | # Optionally build your docs in additional formats such as PDF and ePub
18 | #formats: all
19 | 
20 | # Optionally set the version of Python and requirements required to build your docs
21 | python:
22 |   version: 3.6
23 |   install:
24 |     - requirements: requirements.txt
25 |     - requirements: doc-requirements.txt
26 |     - method: setuptools
27 |       path: .
28 | 
29 |   


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015, Selective Inference development team
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are
 6 | met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright
 9 |        notice, this list of conditions and the following disclaimer.
10 | 
11 |     * Redistributions in binary form must reproduce the above
12 |        copyright notice, this list of conditions and the following
13 |        disclaimer in the documentation and/or other materials provided
14 |        with the distribution.
15 | 
16 |     * The names of any contributors to this software
17 |        may not be used to endorse or promote products derived
18 |        from this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include AUTHOR LICENSE Makefile* MANIFEST.in setup* README.*
 2 | include Changelog TODO
 3 | recursive-include doc *
 4 | recursive-include tools *
 5 | # setup utilities
 6 | include setup_helpers.py
 7 | include cythexts.py
 8 | recursive-include fake_pyrex *
 9 | include versioneer.py
10 | include selection/_version.py
11 | include C-software/src/*.h


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | The selection project
 3 | =====================
 4 | 
 5 | This project contains software for selective inference, with emphasis on
 6 | selective inference in regression.
 7 | 
 8 | Some key references
 9 | -------------------
10 | 
11 | -  ``A significance test for the lasso``: http://arxiv.org/abs/1301.7161
12 | -  ``Tests in adaptive regression via the Kac-Rice formula``:
13 |    http://arxiv.org/abs/1308.3020
14 | -  ``Post-selection adaptive inference for Least Angle Regression and the Lasso``:
15 |    http://arxiv.org/abs/1401.3889
16 | -  ``Exact post-selection inference with the lasso``:
17 |    http://arxiv.org/abs/1311.6238
18 | -  ``Exact Post Model Selection Inference for Marginal Screening``:
19 |    http://arxiv.org/abs/1402.5596
20 | 
21 | Install
22 | -------
23 | 
24 | .. code:: python
25 | 
26 |    git submodule init # travis_tools and C-software
27 |    git submodule update
28 |    pip install -r requirements.txt
29 |    python setup.py install
30 | 
31 | Potential speedups
32 | ------------------
33 | 
34 | -  We can condition on “parts” of each draw of the sampler, in
35 |    particular if we condition on the projection of the rejection
36 |    ``sample - center`` onto direction then resampling on the ray can be
37 |    sped up for some things like LASSO. Could be some cost in power.
38 | 
39 | -  Learning a higher dimensional function can perhaps save some time –
40 |    proper conditioning has to be checked.
41 | 
42 | 


--------------------------------------------------------------------------------
/THANKS:
--------------------------------------------------------------------------------
 1 | Selective Inference Team
 2 | ------------------------
 3 | 
 4 | Contributors to this project include:
 5 | 
 6 | Yuval Benjamini
 7 | Leonard Blier
 8 | Will Fithian
 9 | Jason Lee
10 | Joshua Loftus
11 | Stephen Reid
12 | Dennis Sun
13 | Yuekai Sun
14 | Jonathan Taylor
15 | Xiaoying Tian
16 | Ryan Tibshirani
17 | Robert Tibshirani
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/TODO:
--------------------------------------------------------------------------------
1 | - Marginalize group LASSO
2 | - SLOPE: randomized and non-randomized
3 | - selective debiased LASSO
4 | - randomized sqrt LASSO
5 | - alternate randomization
6 | - user's choice of model for non-randomized


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-slate


--------------------------------------------------------------------------------
/constraints.txt:
--------------------------------------------------------------------------------
1 | rpy2<2.9
2 | 


--------------------------------------------------------------------------------
/dev-requirements.txt:
--------------------------------------------------------------------------------
1 | # Requirements for developing regreg
2 | # Check these dependencies against regreg/info.py
3 | -r requirements.txt
4 | nose
5 | 


--------------------------------------------------------------------------------
/doc-requirements.txt:
--------------------------------------------------------------------------------
 1 | # Requirements for building docs
 2 | # Check these dependencies against doc/conf.py
 3 | -r dev-requirements.txt
 4 | sphinx>=1.4
 5 | numpydoc
 6 | matplotlib
 7 | texext
 8 | nb2plots
 9 | rpy2
10 | seaborn
11 | statsmodels
12 | tensorflow
13 | keras
14 | nbsphinx
15 | 


--------------------------------------------------------------------------------
/doc/adjusted_MLE/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/adjusted_MLE/__init__.py


--------------------------------------------------------------------------------
/doc/adjusted_MLE/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/adjusted_MLE/tests/__init__.py


--------------------------------------------------------------------------------
/doc/adjusted_MLE/tests/test_risk.py:
--------------------------------------------------------------------------------
 1 | import numpy as np, os, itertools
 2 | import pandas as pd
 3 | 
 4 | from rpy2 import robjects
 5 | import rpy2.robjects.numpy2ri
 6 | rpy2.robjects.numpy2ri.activate()
 7 | import rpy2.robjects.pandas2ri
 8 | from rpy2.robjects.packages import importr
 9 | 
10 | from .comparison_metrics import (sim_xy,
11 |                                  glmnet_lasso,
12 |                                  relative_risk)
13 | from .risk_comparisons import risk_comparison
14 | 
15 | def output_file(n=200, 
16 |                 p=500, 
17 |                 rho=0.35, 
18 |                 s=5, 
19 |                 beta_type=1, 
20 |                 snr_values=np.array([0.10, 0.15, 0.20, 0.25, 0.30,
21 |                                      0.35, 0.42, 0.71, 1.22, 2.07]),
22 |                 tuning_nonrand="lambda.1se", 
23 |                 tuning_rand="lambda.1se",
24 |                 randomizing_scale=np.sqrt(0.50), 
25 |                 ndraw=50, 
26 |                 outpath = None):
27 | 
28 |     df_risk = pd.DataFrame()
29 |     if n > p:
30 |         full_dispersion = True
31 |     else:
32 |         full_dispersion = False
33 | 
34 |     snr_list = []
35 |     for snr in snr_values:
36 |         snr_list.append(snr)
37 |         relative_risk = np.squeeze(risk_comparison(n=n, 
38 |                                                    p=p, 
39 |                                                    nval=n, 
40 |                                                    rho=rho, 
41 |                                                    s=s, 
42 |                                                    beta_type=beta_type, 
43 |                                                    snr=snr,
44 |                                                    randomizer_scale=randomizing_scale, 
45 |                                                    full_dispersion=full_dispersion,
46 |                                                    tuning_nonrand =tuning_nonrand, 
47 |                                                    tuning_rand=tuning_rand, ndraw = ndraw))
48 | 
49 |         df_risk = df_risk.append(pd.DataFrame(data=relative_risk.reshape((1, 6)), columns=['sel-MLE', 'ind-est', 'rand-LASSO',
50 |                                                                             'rel-rand-LASSO', 'rel-LASSO','LASSO']), ignore_index=True)
51 | 
52 |     df_risk['n'] = n
53 |     df_risk['p'] = p
54 |     df_risk['s'] = s
55 |     df_risk['rho'] = rho
56 |     df_risk['beta-type'] = beta_type
57 |     df_risk['snr'] = pd.Series(np.asarray(snr_list))
58 |     df_risk['target'] = "selected"
59 | 
60 |     if outpath is None:
61 |         outpath = os.path.dirname(__file__)
62 | 
63 |     outfile_risk_csv = os.path.join(outpath, "dims_" + str(n) + "_" + str(p) + "_risk_betatype" + str(beta_type) + "_rho_" + str(rho) + ".csv")
64 |     outfile_risk_html = os.path.join(outpath, "dims_" + str(n) + "_" + str(p) + "_risk_betatype" + str(beta_type) + "_rho_" + str(rho) + ".html")
65 |     df_risk.to_csv(outfile_risk_csv, index=False)
66 |     df_risk.to_html(outfile_risk_html)
67 | 
68 | 


--------------------------------------------------------------------------------
/doc/examples/conditional_sampling.py:
--------------------------------------------------------------------------------
 1 | """
 2 | We demonstrate that our optimization variables have
 3 | the correct distribution given the data.
 4 | """
 5 | 
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | from statsmodels.distributions import ECDF
 9 | 
10 | from selection.randomized.tests.test_sampling import test_conditional_law
11 | 
12 | def main(ndraw=50000, burnin=5000, remove_atom=False, unpenalized=True, stepsize=1.e-2):
13 | 
14 |     fig_idx = 0
15 |     for (rand,
16 |          mcmc_opt, 
17 |          mcmc_omega,
18 |          truncated_opt,
19 |          truncated_omega) in test_conditional_law(ndraw=ndraw, burnin=burnin, stepsize=stepsize, unpenalized=unpenalized):
20 | 
21 |         fig_idx += 1
22 |         fig = plt.figure(num=fig_idx, figsize=(8,8))
23 | 
24 |         plt.clf()
25 |         idx = 0
26 |         for i in range(mcmc_opt.shape[1]):
27 |             plt.subplot(3,3,idx+1)
28 | 
29 |             mcmc_ = mcmc_opt[:, i]
30 |             truncated_ = truncated_opt[:, i]
31 | 
32 |             xval = np.linspace(min(mcmc_.min(), truncated_.min()), 
33 |                                max(mcmc_.max(), truncated_.max()), 
34 |                                200)
35 | 
36 |             if remove_atom:
37 |                 mcmc_ = mcmc_[mcmc_ < np.max(mcmc_)]
38 |                 mcmc_ = mcmc_[mcmc_ > np.min(mcmc_)]
39 | 
40 |             plt.plot(xval, ECDF(mcmc_)(xval), label='MCMC')
41 |             plt.plot(xval, ECDF(truncated_)(xval), label='truncated')
42 |             idx += 1
43 |             if idx == 1:
44 |                 plt.legend(loc='lower right')
45 | 
46 |         fig.suptitle(' '.join([rand, "opt"]))
47 | 
48 |         fig_idx += 1
49 |         fig = plt.figure(num=fig_idx, figsize=(8,8))
50 |         plt.clf()
51 |         idx = 0
52 |         for i in range(mcmc_opt.shape[1]):
53 |             plt.subplot(3,3,idx+1)
54 | 
55 |             mcmc_ = mcmc_omega[:, i]
56 |             truncated_ = truncated_omega[:, i]
57 | 
58 |             xval = np.linspace(min(mcmc_.min(), truncated_.min()), 
59 |                                max(mcmc_.max(), truncated_.max()), 
60 |                                200)
61 | 
62 |             if remove_atom:
63 |                 mcmc_ = mcmc_[mcmc_ < np.max(mcmc_)]
64 |                 mcmc_ = mcmc_[mcmc_ > np.min(mcmc_)]
65 |             plt.plot(xval, ECDF(mcmc_)(xval), label='MCMC')
66 |             plt.plot(xval, ECDF(truncated_)(xval), label='truncated')
67 |             idx += 1
68 |             if idx == 1:
69 |                 plt.legend(loc='lower right')
70 | 
71 |         fig.suptitle(' '.join([rand, "omega"]))
72 |     plt.show()
73 | 
74 |             
75 |             
76 |     
77 | 
78 | 


--------------------------------------------------------------------------------
/doc/learning_examples/calibration/lasso_calibration.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm as ndist
 5 | 
 6 | import regreg.api as rr
 7 | 
 8 | from selection.tests.instance import gaussian_instance
 9 | 
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import normal_sampler, logit_fit
12 | 
13 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000):
14 | 
15 |     # description of statistical problem
16 | 
17 |     X, y, truth = gaussian_instance(n=n,
18 |                                     p=p, 
19 |                                     s=s,
20 |                                     equicorrelated=False,
21 |                                     rho=0.5, 
22 |                                     sigma=sigma,
23 |                                     signal=signal,
24 |                                     random_signs=True,
25 |                                     scale=False)[:3]
26 | 
27 |     dispersion = sigma**2
28 | 
29 |     S = X.T.dot(y)
30 |     covS = dispersion * X.T.dot(X)
31 |     smooth_sampler = normal_sampler(S, covS)
32 | 
33 |     def meta_algorithm(XTX, XTXi, lam, sampler):
34 | 
35 |         p = XTX.shape[0]
36 |         success = np.zeros(p)
37 | 
38 |         loss = rr.quadratic_loss((p,), Q=XTX)
39 |         pen = rr.l1norm(p, lagrange=lam)
40 | 
41 |         scale = 0.
42 |         noisy_S = sampler(scale=scale)
43 |         loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
44 |         problem = rr.simple_problem(loss, pen)
45 |         soln = problem.solve(max_its=50, tol=1.e-6)
46 |         success += soln != 0
47 |         return set(np.nonzero(success)[0])
48 | 
49 |     XTX = X.T.dot(X)
50 |     XTXi = np.linalg.inv(XTX)
51 |     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
52 |     dispersion = np.linalg.norm(resid)**2 / (n-p)
53 |                          
54 |     lam = 4. * np.sqrt(n)
55 |     selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
56 | 
57 |     # run selection algorithm
58 | 
59 | 
60 |     return full_model_inference(X,
61 |                                 y,
62 |                                 truth,
63 |                                 selection_algorithm,
64 |                                 smooth_sampler,
65 |                                 success_params=(1, 1),
66 |                                 B=B,
67 |                                 fit_probability=logit_fit,
68 |                                 fit_args={'df':20})
69 | 
70 | if __name__ == "__main__":
71 |     import statsmodels.api as sm
72 |     import matplotlib.pyplot as plt
73 |     import pandas as pd
74 | 
75 |     csvfile = 'lasso_calibration.csv'
76 |     outbase = csvfile[:-4]
77 | 
78 |     for i in range(2000):
79 |         for B in np.random.choice([50, 100, 500, 1000, 1500, 2000], 1, replace=True):
80 |             df = simulate(B=B)
81 | 
82 |             if df is not None and i > 0:
83 | 
84 |                 try: # concatenate to disk
85 |                     df = pd.concat([df, pd.read_csv(csvfile)])
86 |                 except FileNotFoundError:
87 |                     pass
88 |                 df.to_csv(csvfile, index=False)
89 | 
90 |                 if len(df['pivot']) > 0:
91 |                     pivot_ax, length_ax = pivot_plot(df, outbase)
92 | 
93 | 


--------------------------------------------------------------------------------
/doc/learning_examples/cross_inference/cross_inference.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from selection.learning.core import cross_inference
 4 | from selection.learning.keras_fit import keras_fit
 5 | 
 6 | data = np.load('lasso_multi_learning.npz')
 7 | learning_data = (data['T'][:2000], data['Y'][:2000])
 8 | 
 9 | result = cross_inference(learning_data, 
10 |                          data['nuisance'],
11 |                          data['direction'],
12 |                          keras_fit,
13 |                          fit_args={'epochs':3, 'sizes':[10]*5, 'dropout':0., 'activation':'relu'})
14 | 


--------------------------------------------------------------------------------
/doc/learning_examples/keras/keras_targets.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm as ndist
 5 | 
 6 | import regreg.api as rr
 7 | 
 8 | from selection.tests.instance import gaussian_instance
 9 | 
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import split_sampler, keras_fit
12 | from selection.learning.learners import mixture_learner
13 | mixture_learner.scales = [1]*10 + [1.5,2,3,4,5,10]
14 | 
15 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000):
16 | 
17 |     # description of statistical problem
18 | 
19 |     X, y, truth = gaussian_instance(n=n,
20 |                                     p=p, 
21 |                                     s=s,
22 |                                     equicorrelated=False,
23 |                                     rho=0.5, 
24 |                                     sigma=sigma,
25 |                                     signal=signal,
26 |                                     random_signs=True,
27 |                                     scale=False)[:3]
28 | 
29 |     XTX = X.T.dot(X)
30 |     XTXi = np.linalg.inv(XTX)
31 |     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
32 |     dispersion = np.linalg.norm(resid)**2 / (n-p)
33 |                          
34 |     S = X.T.dot(y)
35 |     covS = dispersion * X.T.dot(X)
36 |     splitting_sampler = split_sampler(X * y[:, None], covS)
37 | 
38 |     def meta_algorithm(XTX, XTXi, dispersion, lam, sampler):
39 | 
40 |         p = XTX.shape[0]
41 |         success = np.zeros(p)
42 | 
43 |         loss = rr.quadratic_loss((p,), Q=XTX)
44 |         pen = rr.l1norm(p, lagrange=lam)
45 | 
46 |         scale = 0.5
47 |         noisy_S = sampler(scale=scale)
48 |         soln = XTXi.dot(noisy_S)
49 |         solnZ = soln / (np.sqrt(np.diag(XTXi)) * np.sqrt(dispersion))
50 |         return set(np.nonzero(np.fabs(solnZ) > 2.1)[0])
51 | 
52 |     lam = 4. * np.sqrt(n)
53 |     selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, dispersion, lam)
54 | 
55 |     # run selection algorithm
56 | 
57 |     return full_model_inference(X,
58 |                                 y,
59 |                                 truth,
60 |                                 selection_algorithm,
61 |                                 splitting_sampler,
62 |                                 success_params=(5, 7),
63 |                                 B=B,
64 |                                 fit_probability=keras_fit,
65 |                                 fit_args={'epochs':30, 'sizes':[100, 100], 'activation':'relu'})
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     import statsmodels.api as sm
70 |     import matplotlib.pyplot as plt
71 |     import pandas as pd
72 | 
73 |     for i in range(500):
74 |         df = simulate(B=10000)
75 |         csvfile = 'keras_targets.csv'
76 |         outbase = csvfile[:-4]
77 | 
78 |         if df is not None and i > 0:
79 | 
80 |             try: # concatenate to disk
81 |                 df = pd.concat([df, pd.read_csv(csvfile)])
82 |             except FileNotFoundError:
83 |                 pass
84 |             df.to_csv(csvfile, index=False)
85 | 
86 |             if len(df['pivot']) > 0:
87 |                 pivot_ax, length_ax = pivot_plot(df, outbase)
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/doc/learning_examples/keras/keras_targets_medium.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm as ndist
 5 | 
 6 | import regreg.api as rr
 7 | 
 8 | from selection.tests.instance import gaussian_instance
 9 | 
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import split_sampler, keras_fit
12 | from selection.learning.learners import mixture_learner
13 | mixture_learner.scales = [1]*10 + [1.5,2,3,4,5,10]
14 | 
15 | def simulate(n=200, p=50, s=5, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000):
16 | 
17 |     # description of statistical problem
18 | 
19 |     X, y, truth = gaussian_instance(n=n,
20 |                                     p=p, 
21 |                                     s=s,
22 |                                     equicorrelated=False,
23 |                                     rho=0.5, 
24 |                                     sigma=sigma,
25 |                                     signal=signal,
26 |                                     random_signs=True,
27 |                                     scale=False)[:3]
28 | 
29 |     XTX = X.T.dot(X)
30 |     XTXi = np.linalg.inv(XTX)
31 |     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
32 |     dispersion = np.linalg.norm(resid)**2 / (n-p)
33 |                          
34 |     S = X.T.dot(y)
35 |     covS = dispersion * X.T.dot(X)
36 |     splitting_sampler = split_sampler(X * y[:, None], covS)
37 | 
38 |     def meta_algorithm(XTX, XTXi, dispersion, lam, sampler):
39 | 
40 |         p = XTX.shape[0]
41 |         success = np.zeros(p)
42 | 
43 |         loss = rr.quadratic_loss((p,), Q=XTX)
44 |         pen = rr.l1norm(p, lagrange=lam)
45 | 
46 |         scale = 0.5
47 |         noisy_S = sampler(scale=scale)
48 |         soln = XTXi.dot(noisy_S)
49 |         solnZ = soln / (np.sqrt(np.diag(XTXi)) * np.sqrt(dispersion))
50 |         return set(np.nonzero(np.fabs(solnZ) > 2.1)[0])
51 | 
52 |     lam = 4. * np.sqrt(n)
53 |     selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, dispersion, lam)
54 | 
55 |     # run selection algorithm
56 | 
57 |     return full_model_inference(X,
58 |                                 y,
59 |                                 truth,
60 |                                 selection_algorithm,
61 |                                 splitting_sampler,
62 |                                 success_params=(5, 7),
63 |                                 B=B,
64 |                                 fit_probability=keras_fit,
65 |                                 fit_args={'epochs':30, 'sizes':[100, 100], 'activation':'relu'})
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     import statsmodels.api as sm
70 |     import matplotlib.pyplot as plt
71 |     import pandas as pd
72 | 
73 |     for i in range(500):
74 |         df = simulate(B=10000)
75 |         csvfile = 'keras_targets_medium.csv'
76 |         outbase = csvfile[:-4]
77 | 
78 |         if df is not None and i > 0:
79 | 
80 |             try: # concatenate to disk
81 |                 df = pd.concat([df, pd.read_csv(csvfile)])
82 |             except FileNotFoundError:
83 |                 pass
84 |             df.to_csv(csvfile, index=False)
85 | 
86 |             if len(df['pivot']) > 0:
87 |                 pivot_ax, length_ax = pivot_plot(df, outbase)
88 | 


--------------------------------------------------------------------------------
/doc/learning_examples/keras/keras_targets_small.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm as ndist
 5 | 
 6 | import regreg.api as rr
 7 | 
 8 | from selection.tests.instance import gaussian_instance
 9 | 
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import split_sampler, keras_fit
12 | from selection.learning.learners import mixture_learner
13 | mixture_learner.scales = [1]*10 + [1.5,2,3,4,5,10]
14 | 
15 | def simulate(n=100, p=10, s=5, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000):
16 | 
17 |     # description of statistical problem
18 | 
19 |     X, y, truth = gaussian_instance(n=n,
20 |                                     p=p, 
21 |                                     s=s,
22 |                                     equicorrelated=False,
23 |                                     rho=0.5, 
24 |                                     sigma=sigma,
25 |                                     signal=signal,
26 |                                     random_signs=True,
27 |                                     scale=False)[:3]
28 | 
29 |     XTX = X.T.dot(X)
30 |     XTXi = np.linalg.inv(XTX)
31 |     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
32 |     dispersion = np.linalg.norm(resid)**2 / (n-p)
33 |                          
34 |     S = X.T.dot(y)
35 |     covS = dispersion * X.T.dot(X)
36 |     splitting_sampler = split_sampler(X * y[:, None], covS)
37 | 
38 |     def meta_algorithm(XTX, XTXi, dispersion, lam, sampler):
39 | 
40 |         p = XTX.shape[0]
41 |         success = np.zeros(p)
42 | 
43 |         loss = rr.quadratic_loss((p,), Q=XTX)
44 |         pen = rr.l1norm(p, lagrange=lam)
45 | 
46 |         scale = 0.5
47 |         noisy_S = sampler(scale=scale)
48 |         soln = XTXi.dot(noisy_S)
49 |         solnZ = soln / (np.sqrt(np.diag(XTXi)) * np.sqrt(dispersion))
50 |         return set(np.nonzero(np.fabs(solnZ) > 2.1)[0])
51 | 
52 |     lam = 4. * np.sqrt(n)
53 |     selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, dispersion, lam)
54 | 
55 |     # run selection algorithm
56 | 
57 |     return full_model_inference(X,
58 |                                 y,
59 |                                 truth,
60 |                                 selection_algorithm,
61 |                                 splitting_sampler,
62 |                                 success_params=(5, 7),
63 |                                 B=B,
64 |                                 fit_probability=keras_fit,
65 |                                 fit_args={'epochs':30, 'sizes':[100, 100], 'activation':'relu'})
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     import statsmodels.api as sm
70 |     import matplotlib.pyplot as plt
71 |     import pandas as pd
72 | 
73 |     for i in range(500):
74 |         df = simulate(B=10000)
75 |         csvfile = 'keras_targets_small.csv'
76 |         outbase = csvfile[:-4]
77 | 
78 |         if df is not None and i > 0:
79 | 
80 |             try: # concatenate to disk
81 |                 df = pd.concat([df, pd.read_csv(csvfile)])
82 |             except FileNotFoundError:
83 |                 pass
84 |             df.to_csv(csvfile, index=False)
85 | 
86 |             if len(df['pivot']) > 0:
87 |                 pivot_ax, length_ax = pivot_plot(df, outbase)
88 | 


--------------------------------------------------------------------------------
/doc/learning_examples/knockoffs/knockoff_kernel.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm as ndist
 5 | 
 6 | import regreg.api as rr
 7 | 
 8 | from selection.tests.instance import gaussian_instance
 9 | 
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import normal_sampler, logit_fit
12 | 
13 | def simulate(n=1000, p=50, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, seed=0, B=1000):
14 | 
15 |     # description of statistical problem
16 | 
17 |     np.random.seed(seed)
18 |     X, y, truth = gaussian_instance(n=n,
19 |                                     p=p, 
20 |                                     s=s,
21 |                                     equicorrelated=False,
22 |                                     rho=0.5, 
23 |                                     sigma=sigma,
24 |                                     signal=signal,
25 |                                     random_signs=True,
26 |                                     scale=False,
27 |                                     center=False)[:3]
28 | 
29 |     dispersion = sigma**2
30 | 
31 |     S = X.T.dot(y)
32 |     covS = dispersion * X.T.dot(X)
33 |     smooth_sampler = normal_sampler(S, covS)
34 | 
35 |     def meta_algorithm(X, XTXi, resid, sampler):
36 | 
37 |         n, p = X.shape
38 | 
39 |         rho = 0.8
40 |         S = sampler(scale=0.) # deterministic with scale=0
41 |         ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X
42 |         Xnew = rho * X + np.sqrt(1 - rho**2) * np.random.standard_normal(X.shape)
43 | 
44 |         X_full = np.hstack([X, Xnew])
45 |         beta_full = np.linalg.pinv(X_full).dot(ynew)
46 |         winners = np.fabs(beta_full)[:p] > np.fabs(beta_full)[p:]
47 |         return set(np.nonzero(winners)[0])
48 | 
49 |     XTX = X.T.dot(X)
50 |     XTXi = np.linalg.inv(XTX)
51 |     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
52 |     dispersion = np.linalg.norm(resid)**2 / (n-p)
53 |                          
54 |     selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid)
55 | 
56 |     # run selection algorithm
57 | 
58 |     return full_model_inference(X,
59 |                                 y,
60 |                                 truth,
61 |                                 selection_algorithm,
62 |                                 smooth_sampler,
63 |                                 success_params=(8, 10),
64 |                                 B=B,
65 |                                 fit_probability=logit_fit,
66 |                                 fit_args={'df':20},
67 |                                 how_many=1)
68 | 
69 | if __name__ == "__main__":
70 |     import statsmodels.api as sm
71 |     import matplotlib.pyplot as plt
72 |     import pandas as pd
73 | 
74 |     iseed = int(np.fabs(np.random.standard_normal() * 50000))
75 |     for i in range(500):
76 |         df = simulate(seed=i + iseed, B=2000)
77 |         csvfile = 'knockoff_kernel.csv'
78 |         outbase = csvfile[:-4]
79 | 
80 |         if df is not None and i > 0:
81 | 
82 |             try: # concatenate to disk
83 |                 df = pd.concat([df, pd.read_csv(csvfile)])
84 |             except FileNotFoundError:
85 |                 pass
86 |             df.to_csv(csvfile, index=False)
87 | 
88 |             if len(df['pivot']) > 0:
89 |                 pivot_ax, length_ax = pivot_plot(df, outbase)
90 | 


--------------------------------------------------------------------------------
/doc/learning_examples/lasso/lasso_example.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm as ndist
 5 | 
 6 | import regreg.api as rr
 7 | 
 8 | from selection.tests.instance import gaussian_instance
 9 | 
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import normal_sampler, logit_fit
12 | 
13 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=2000):
14 | 
15 |     # description of statistical problem
16 | 
17 |     X, y, truth = gaussian_instance(n=n,
18 |                                     p=p, 
19 |                                     s=s,
20 |                                     equicorrelated=False,
21 |                                     rho=0.5, 
22 |                                     sigma=sigma,
23 |                                     signal=signal,
24 |                                     random_signs=True,
25 |                                     scale=False)[:3]
26 | 
27 |     dispersion = sigma**2
28 | 
29 |     S = X.T.dot(y)
30 |     covS = dispersion * X.T.dot(X)
31 |     sampler = normal_sampler(S, covS)
32 | 
33 |     def meta_algorithm(XTX, XTXi, lam, sampler):
34 | 
35 |         p = XTX.shape[0]
36 |         success = np.zeros(p)
37 | 
38 |         loss = rr.quadratic_loss((p,), Q=XTX)
39 |         pen = rr.l1norm(p, lagrange=lam)
40 | 
41 |         scale = 0.
42 |         noisy_S = sampler(scale=scale)
43 |         loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
44 |         problem = rr.simple_problem(loss, pen)
45 |         soln = problem.solve(max_its=100, tol=1.e-10)
46 |         success += soln != 0
47 |         return set(np.nonzero(success)[0])
48 | 
49 |     XTX = X.T.dot(X)
50 |     XTXi = np.linalg.inv(XTX)
51 |     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
52 |     dispersion = np.linalg.norm(resid)**2 / (n-p)
53 |                          
54 |     lam = 4. * np.sqrt(n)
55 |     selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
56 | 
57 |     # run selection algorithm
58 | 
59 | 
60 |     return full_model_inference(X,
61 |                                 y,
62 |                                 truth,
63 |                                 selection_algorithm,
64 |                                 sampler,
65 |                                 success_params=(1, 1),
66 |                                 B=B,
67 |                                 fit_probability=logit_fit,
68 |                                 fit_args={'df':20},
69 |                                 how_many=1)
70 | 
71 | 
72 | if __name__ == "__main__":
73 |     import statsmodels.api as sm
74 |     import matplotlib.pyplot as plt
75 |     import pandas as pd
76 | 
77 |     for i in range(500):
78 |         df = simulate()
79 |         csvfile = 'lasso_exact.csv'
80 |         outbase = csvfile[:-4]
81 | 
82 |         if df is not None and i > 0:
83 | 
84 |             try: # concatenate to disk
85 |                 df = pd.concat([df, pd.read_csv(csvfile)])
86 |             except FileNotFoundError:
87 |                 pass
88 |             df.to_csv(csvfile, index=False)
89 | 
90 |             if len(df['pivot']) > 0:
91 |                 pivot_ax, length_ax = pivot_plot(df, outbase)
92 | 
93 | 


--------------------------------------------------------------------------------
/doc/learning_examples/lasso_CV/lasso_exact_CV_null.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm as ndist
 5 | 
 6 | import regreg.api as rr
 7 | 
 8 | from selection.tests.instance import gaussian_instance
 9 | 
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import split_sampler, probit_fit
12 | from selection.learning.Rutils import lasso_glmnet
13 | 
14 | def simulate(n=200, p=100, s=10, signal=(0, 0), sigma=2, alpha=0.1):
15 | 
16 |     # description of statistical problem
17 | 
18 |     X, y, truth = gaussian_instance(n=n,
19 |                                     p=p, 
20 |                                     s=s,
21 |                                     equicorrelated=False,
22 |                                     rho=0.5, 
23 |                                     sigma=sigma,
24 |                                     signal=signal,
25 |                                     random_signs=True,
26 |                                     scale=False)[:3]
27 | 
28 |     XTX = X.T.dot(X)
29 |     XTXi = np.linalg.inv(XTX)
30 |     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
31 |     dispersion = np.linalg.norm(resid)**2 / (n-p)
32 |                          
33 |     S = X.T.dot(y)
34 |     covS = dispersion * X.T.dot(X)
35 |     splitting_sampler = split_sampler(X * y[:, None], covS)
36 | 
37 |     def meta_algorithm(X, XTXi, resid, sampler):
38 | 
39 |         S = sampler(scale=0.) # deterministic with scale=0
40 |         ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X
41 |         G = lasso_glmnet(X, ynew, *[None]*4)
42 |         select = G.select()
43 |         return set(list(select[0]))
44 | 
45 |     selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid)
46 | 
47 |     # run selection algorithm
48 | 
49 |     return full_model_inference(X,
50 |                                 y,
51 |                                 truth,
52 |                                 selection_algorithm,
53 |                                 splitting_sampler,
54 |                                 success_params=(1, 1),
55 |                                 B=B,
56 |                                 fit_probability=probit_fit,
57 |                                 fit_args={'df':20},
58 |                                 how_many=1)
59 | 
60 | if __name__ == "__main__":
61 |     import statsmodels.api as sm
62 |     import matplotlib.pyplot as plt
63 |     import pandas as pd
64 | 
65 |     for i in range(500):
66 |         df = simulate()
67 |         csvfile = 'lasso_exact_CV_null.csv'
68 |         outbase = csvfile[:-4]
69 | 
70 |         if df is not None and i > 0:
71 | 
72 |             try: # concatenate to disk
73 |                 df = pd.concat([df, pd.read_csv(csvfile)])
74 |             except FileNotFoundError:
75 |                 pass
76 |             df.to_csv(csvfile, index=False)
77 | 
78 |             if len(df['pivot']) > 0:
79 |                 pivot_ax, length_ax = pivot_plot(df, outbase)
80 | 


--------------------------------------------------------------------------------
/doc/learning_examples/lasso_CV/lasso_example_CV.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm as ndist
 5 | 
 6 | import regreg.api as rr
 7 | 
 8 | from selection.tests.instance import gaussian_instance
 9 | 
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import split_sampler, probit_fit
12 | from selection.learning.Rutils import lasso_glmnet
13 | 
14 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1):
15 | 
16 |     # description of statistical problem
17 | 
18 |     X, y, truth = gaussian_instance(n=n,
19 |                                     p=p, 
20 |                                     s=s,
21 |                                     equicorrelated=False,
22 |                                     rho=0.5, 
23 |                                     sigma=sigma,
24 |                                     signal=signal,
25 |                                     random_signs=True,
26 |                                     scale=False)[:3]
27 | 
28 |     dispersion = sigma**2
29 | 
30 |     S = X.T.dot(y)
31 |     covS = dispersion * X.T.dot(X)
32 |     splitting_sampler = split_sampler(X * y[:, None], covS)
33 | 
34 | 
35 |     def meta_algorithm(X, XTXi, resid, sampler):
36 | 
37 |         S = sampler(scale=0.) # deterministic with scale=0
38 |         ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X
39 |         G = lasso_glmnet(X, ynew, *[None]*4)
40 |         select = G.select()
41 |         return set(list(select[0]))
42 | 
43 |     XTX = X.T.dot(X)
44 |     XTXi = np.linalg.inv(XTX)
45 |     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
46 |     dispersion = np.linalg.norm(resid)**2 / (n-p)
47 |                          
48 |     selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid)
49 | 
50 |     # run selection algorithm
51 | 
52 |     return full_model_inference(X,
53 |                                 y,
54 |                                 truth,
55 |                                 selection_algorithm,
56 |                                 splitting_sampler,
57 |                                 success_params=(1, 1),
58 |                                 B=B,
59 |                                 fit_probability=probit_fit,
60 |                                 fit_args={'df':20},
61 |                                 how_many=1)
62 | 
63 | if __name__ == "__main__":
64 |     import statsmodels.api as sm
65 |     import matplotlib.pyplot as plt
66 |     import pandas as pd
67 | 
68 |     for i in range(500):
69 |         df = simulate()
70 |         csvfile = 'lasso_exact_CV.csv'
71 |         outbase = csvfile[:-4]
72 | 
73 |         if df is not None and i > 0:
74 | 
75 |             try: # concatenate to disk
76 |                 df = pd.concat([df, pd.read_csv(csvfile)])
77 |             except FileNotFoundError:
78 |                 pass
79 |             df.to_csv(csvfile, index=False)
80 | 
81 |             if len(df['pivot']) > 0:
82 |                 pivot_ax, length_ax = pivot_plot(df, outbase)
83 | 


--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/additive_targets.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm as ndist
 5 | 
 6 | import regreg.api as rr
 7 | 
 8 | from selection.tests.instance import gaussian_instance
 9 | 
10 | 
11 | from selection.learning.utils import full_model_inference, pivot_plot
12 | from selection.learning.core import split_sampler, logit_fit
13 | 
14 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000):
15 | 
16 |     # description of statistical problem
17 | 
18 |     X, y, truth = gaussian_instance(n=n,
19 |                                     p=p, 
20 |                                     s=s,
21 |                                     equicorrelated=False,
22 |                                     rho=0.5, 
23 |                                     sigma=sigma,
24 |                                     signal=signal,
25 |                                     random_signs=True,
26 |                                     scale=False)[:3]
27 | 
28 |     dispersion = sigma**2
29 | 
30 |     S = X.T.dot(y)
31 |     covS = dispersion * X.T.dot(X)
32 |     smooth_sampler = normal_sampler(S, covS)
33 |     splitting_sampler = split_sampler(X * y[:, None], covS)
34 | 
35 |     def meta_algorithm(XTX, XTXi, lam, sampler):
36 | 
37 |         p = XTX.shape[0]
38 |         success = np.zeros(p)
39 | 
40 |         loss = rr.quadratic_loss((p,), Q=XTX)
41 |         pen = rr.l1norm(p, lagrange=lam)
42 | 
43 |         scale = 0.5
44 |         noisy_S = sampler(scale=scale)
45 |         loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
46 |         problem = rr.simple_problem(loss, pen)
47 |         soln = problem.solve(max_its=50, tol=1.e-6)
48 |         success += soln != 0
49 |         return set(np.nonzero(success)[0])
50 | 
51 |     XTX = X.T.dot(X)
52 |     XTXi = np.linalg.inv(XTX)
53 |     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
54 |     dispersion = np.linalg.norm(resid)**2 / (n-p)
55 |                          
56 |     lam = 4. * np.sqrt(n)
57 |     selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
58 | 
59 |     # run selection algorithm
60 | 
61 |     return full_model_inference(X,
62 |                                 y,
63 |                                 truth,
64 |                                 selection_algorithm,
65 |                                 splitting_sampler,
66 |                                 success_params=(1, 1),
67 |                                 B=B,
68 |                                 fit_probability=logit_fit,
69 |                                 fit_args={'df':20})
70 | 
71 | if __name__ == "__main__":
72 |     import statsmodels.api as sm
73 |     import matplotlib.pyplot as plt
74 |     import pandas as pd
75 | 
76 |     U = np.linspace(0, 1, 101)
77 |     plt.clf()
78 | 
79 |     for i in range(500):
80 |         for B in [5000]:
81 |             print(B)
82 |             df = simulate(B=B)
83 |             csvfile = 'additive_targets.csv'
84 |             outbase = csvfile[:-4]
85 | 
86 |             if i % 2 == 1 and i > 0:
87 | 
88 |                 try:
89 |                     df = pd.concat([df, pd.read_csv(csvfile)])
90 |                     df.to_csv(csvfile, index=False)
91 |                 except FileNotFoundError:
92 |                     pass
93 | 
94 |                 if len(df['pivot']) > 0:
95 |                     pivot_ax, length_ax = pivot_plot(df, outbase)
96 | 
97 | 
98 | 
99 | 


--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/additive_targets_small.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm as ndist
 5 | 
 6 | import regreg.api as rr
 7 | 
 8 | from selection.tests.instance import gaussian_instance
 9 | 
10 | 
11 | from selection.learning.utils import full_model_inference, pivot_plot
12 | from selection.learning.core import split_sampler, logit_fit
13 | 
14 | def simulate(n=100, p=30, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000):
15 | 
16 |     # description of statistical problem
17 | 
18 |     X, y, truth = gaussian_instance(n=n,
19 |                                     p=p, 
20 |                                     s=s,
21 |                                     equicorrelated=False,
22 |                                     rho=0.5, 
23 |                                     sigma=sigma,
24 |                                     signal=signal,
25 |                                     random_signs=True,
26 |                                     scale=False)[:3]
27 | 
28 |     dispersion = sigma**2
29 | 
30 |     S = X.T.dot(y)
31 |     covS = dispersion * X.T.dot(X)
32 |     smooth_sampler = normal_sampler(S, covS)
33 |     splitting_sampler = split_sampler(X * y[:, None], covS)
34 | 
35 |     def meta_algorithm(XTX, XTXi, lam, sampler):
36 | 
37 |         p = XTX.shape[0]
38 |         success = np.zeros(p)
39 | 
40 |         loss = rr.quadratic_loss((p,), Q=XTX)
41 |         pen = rr.l1norm(p, lagrange=lam)
42 | 
43 |         scale = 0.5
44 |         noisy_S = sampler(scale=scale)
45 |         loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
46 |         problem = rr.simple_problem(loss, pen)
47 |         soln = problem.solve(max_its=50, tol=1.e-6)
48 |         success += soln != 0
49 |         return set(np.nonzero(success)[0])
50 | 
51 |     XTX = X.T.dot(X)
52 |     XTXi = np.linalg.inv(XTX)
53 |     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
54 |     dispersion = np.linalg.norm(resid)**2 / (n-p)
55 |                          
56 |     lam = 4. * np.sqrt(n)
57 |     selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
58 | 
59 |     # run selection algorithm
60 | 
61 |     return full_model_inference(X,
62 |                                 y,
63 |                                 truth,
64 |                                 selection_algorithm,
65 |                                 splitting_sampler,
66 |                                 success_params=(1, 1),
67 |                                 B=B,
68 |                                 fit_probability=logit_fit,
69 |                                 fit_args={'df':20})
70 | 
71 | if __name__ == "__main__":
72 |     import statsmodels.api as sm
73 |     import matplotlib.pyplot as plt
74 |     import pandas as pd
75 | 
76 |     U = np.linspace(0, 1, 101)
77 |     plt.clf()
78 | 
79 |     for i in range(500):
80 |         for B in [5000]:
81 |             print(B)
82 |             df = simulate(B=B)
83 |             csvfile = 'additive_targets_small.csv'
84 |             outbase = csvfile[:-4]
85 | 
86 |             if i % 2 == 1 and i > 0:
87 | 
88 |                 try:
89 |                     df = pd.concat([df, pd.read_csv(csvfile)])
90 |                     df.to_csv(csvfile, index=False)
91 |                 except FileNotFoundError:
92 |                     pass
93 | 
94 |                 if len(df['pivot']) > 0:
95 |                     pivot_ax, length_ax = pivot_plot(df, outbase)
96 | 
97 | 
98 | 
99 | 


--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/gbm2.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm as ndist
 5 | 
 6 | import regreg.api as rr
 7 | 
 8 | from selection.tests.instance import gaussian_instance
 9 | from selection.algorithms.lasso import ROSI
10 | 
11 | from selection.learning.Rutils import lasso_glmnet
12 | from selection.learning.utils import full_model_inference, pivot_plot
13 | from selection.learning.core import normal_sampler, gbm_fit_sk
14 | 
15 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000):
16 | 
17 |     # description of statistical problem
18 | 
19 |     X, y, truth = gaussian_instance(n=n,
20 |                                     p=p, 
21 |                                     s=s,
22 |                                     equicorrelated=False,
23 |                                     rho=0.5, 
24 |                                     sigma=sigma,
25 |                                     signal=signal,
26 |                                     random_signs=True,
27 |                                     scale=False)[:3]
28 | 
29 |     dispersion = sigma**2
30 | 
31 |     S = X.T.dot(y)
32 |     covS = dispersion * X.T.dot(X)
33 |     smooth_sampler = normal_sampler(S, covS)
34 | 
35 |     def meta_algorithm(X, XTXi, resid, sampler):
36 | 
37 |         S = sampler(scale=0.5) # deterministic with scale=0
38 |         ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X
39 |         G = lasso_glmnet(X, ynew, *[None]*4)
40 |         select = G.select()
41 |         return set(list(select[0]))
42 | 
43 |     XTX = X.T.dot(X)
44 |     XTXi = np.linalg.inv(XTX)
45 |     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
46 |     dispersion = np.linalg.norm(resid)**2 / (n-p)
47 |                          
48 |     selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid)
49 | 
50 |     # run selection algorithm
51 | 
52 |     return full_model_inference(X,
53 |                                 y,
54 |                                 truth,
55 |                                 selection_algorithm,
56 |                                 smooth_sampler,
57 |                                 success_params=(1, 1),
58 |                                 B=B,
59 |                                 fit_probability=gbm_fit_sk,
60 |                                 fit_args={'n_estimators':2000})
61 | 
62 | if __name__ == "__main__":
63 |     import statsmodels.api as sm
64 |     import matplotlib.pyplot as plt
65 |     import pandas as pd
66 | 
67 |     U = np.linspace(0, 1, 101)
68 |     plt.clf()
69 | 
70 |     for i in range(500):
71 |         df = simulate()
72 |         csvfile = 'lasso_multi_CV_random_gbm.csv'
73 |         outbase = csvfile[:-4]
74 | 
75 |         if df is not None and i > 0:
76 | 
77 |             try:
78 |                 df = pd.concat([df, pd.read_csv(csvfile)])
79 |             except FileNotFoundError:
80 |                 pass
81 |             df.to_csv(csvfile, index=False)
82 | 
83 |             if len(df['pivot']) > 0:
84 |                 pivot_plot(df, outbase)
85 | 
86 | 


--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/gbm_targets.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm as ndist
 5 | 
 6 | import regreg.api as rr
 7 | 
 8 | from selection.tests.instance import gaussian_instance
 9 | from selection.algorithms.lasso import ROSI
10 | 
11 | from selection.learning.Rutils import lasso_glmnet
12 | from selection.learning.utils import full_model_inference, pivot_plot
13 | from selection.learning.core import normal_sampler, gbm_fit
14 | 
15 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000):
16 | 
17 |     # description of statistical problem
18 | 
19 |     X, y, truth = gaussian_instance(n=n,
20 |                                     p=p, 
21 |                                     s=s,
22 |                                     equicorrelated=False,
23 |                                     rho=0.5, 
24 |                                     sigma=sigma,
25 |                                     signal=signal,
26 |                                     random_signs=True,
27 |                                     scale=False)[:3]
28 | 
29 |     dispersion = sigma**2
30 | 
31 |     S = X.T.dot(y)
32 |     covS = dispersion * X.T.dot(X)
33 |     smooth_sampler = normal_sampler(S, covS)
34 | 
35 |     def meta_algorithm(XTX, XTXi, lam, sampler):
36 | 
37 |         p = XTX.shape[0]
38 |         success = np.zeros(p)
39 | 
40 |         loss = rr.quadratic_loss((p,), Q=XTX)
41 |         pen = rr.l1norm(p, lagrange=lam)
42 | 
43 |         scale = 0.5
44 |         noisy_S = sampler(scale=scale)
45 |         loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
46 |         problem = rr.simple_problem(loss, pen)
47 |         soln = problem.solve(max_its=50, tol=1.e-6)
48 |         success += soln != 0
49 |         return set(np.nonzero(success)[0])
50 | 
51 |     XTX = X.T.dot(X)
52 |     XTXi = np.linalg.inv(XTX)
53 |     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
54 |     dispersion = np.linalg.norm(resid)**2 / (n-p)
55 |                          
56 |     lam = 4. * np.sqrt(n)
57 |     selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
58 | 
59 |     # run selection algorithm
60 | 
61 |     return full_model_inference(X,
62 |                                 y,
63 |                                 truth,
64 |                                 selection_algorithm,
65 |                                 smooth_sampler,
66 |                                 success_params=(1, 1),
67 |                                 B=B,
68 |                                 fit_probability=gbm_fit,
69 |                                 fit_args={})
70 | 
71 | if __name__ == "__main__":
72 |     import statsmodels.api as sm
73 |     import matplotlib.pyplot as plt
74 |     import pandas as pd
75 | 
76 |     U = np.linspace(0, 1, 101)
77 |     plt.clf()
78 | 
79 |     for i in range(500):
80 |         for B in [5000]:
81 |             print(B)
82 |             df = simulate(B=B)
83 |             csvfile = 'gbm_targets.csv'
84 |             outbase = csvfile[:-4]
85 | 
86 |             if df is not None and i > 0:
87 | 
88 |                 try:
89 |                     df = pd.concat([df, pd.read_csv(csvfile)])
90 |                 except FileNotFoundError:
91 |                     pass
92 |                 df.to_csv(csvfile, index=False)
93 | 
94 |                 if len(df['pivot']) > 0:
95 |                     pivot_plot(df, outbase)
96 | 


--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/gbm_targets_small.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm as ndist
 5 | 
 6 | import regreg.api as rr
 7 | 
 8 | from selection.tests.instance import gaussian_instance
 9 | from selection.algorithms.lasso import ROSI
10 | 
11 | from selection.learning.Rutils import lasso_glmnet
12 | from selection.learning.utils import full_model_inference, pivot_plot
13 | from selection.learning.core import normal_sampler, gbm_fit
14 | 
15 | def simulate(n=100, p=30, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000):
16 | 
17 |     # description of statistical problem
18 | 
19 |     X, y, truth = gaussian_instance(n=n,
20 |                                     p=p, 
21 |                                     s=s,
22 |                                     equicorrelated=False,
23 |                                     rho=0.5, 
24 |                                     sigma=sigma,
25 |                                     signal=signal,
26 |                                     random_signs=True,
27 |                                     scale=False)[:3]
28 | 
29 |     dispersion = sigma**2
30 | 
31 |     S = X.T.dot(y)
32 |     covS = dispersion * X.T.dot(X)
33 |     smooth_sampler = normal_sampler(S, covS)
34 | 
35 |     def meta_algorithm(XTX, XTXi, lam, sampler):
36 | 
37 |         p = XTX.shape[0]
38 |         success = np.zeros(p)
39 | 
40 |         loss = rr.quadratic_loss((p,), Q=XTX)
41 |         pen = rr.l1norm(p, lagrange=lam)
42 | 
43 |         scale = 0.5
44 |         noisy_S = sampler(scale=scale)
45 |         loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
46 |         problem = rr.simple_problem(loss, pen)
47 |         soln = problem.solve(max_its=50, tol=1.e-6)
48 |         success += soln != 0
49 |         return set(np.nonzero(success)[0])
50 | 
51 |     XTX = X.T.dot(X)
52 |     XTXi = np.linalg.inv(XTX)
53 |     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
54 |     dispersion = np.linalg.norm(resid)**2 / (n-p)
55 |                          
56 |     lam = 4. * np.sqrt(n)
57 |     selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
58 | 
59 |     # run selection algorithm
60 | 
61 |     return full_model_inference(X,
62 |                                 y,
63 |                                 truth,
64 |                                 selection_algorithm,
65 |                                 smooth_sampler,
66 |                                 success_params=(1, 1),
67 |                                 B=B,
68 |                                 fit_probability=gbm_fit,
69 |                                 fit_args={})
70 | 
71 | if __name__ == "__main__":
72 |     import statsmodels.api as sm
73 |     import matplotlib.pyplot as plt
74 |     import pandas as pd
75 | 
76 |     U = np.linspace(0, 1, 101)
77 |     plt.clf()
78 | 
79 |     for i in range(500):
80 |         for B in [5000]:
81 |             print(B)
82 |             df = simulate(B=B)
83 |             csvfile = 'gbm_targets_small.csv'
84 |             outbase = csvfile[:-4]
85 | 
86 |             if df is not None and i > 0:
87 | 
88 |                 try:
89 |                     df = pd.concat([df, pd.read_csv(csvfile)])
90 |                 except FileNotFoundError:
91 |                     pass
92 |                 df.to_csv(csvfile, index=False)
93 | 
94 |                 if len(df['pivot']) > 0:
95 |                     pivot_plot(df, outbase)
96 | 


--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/lasso_example_multi.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm as ndist
 5 | 
 6 | import regreg.api as rr
 7 | 
 8 | from selection.tests.instance import gaussian_instance
 9 | 
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import split_sampler, keras_fit
12 | 
13 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=2000):
14 | 
15 |     # description of statistical problem
16 | 
17 |     X, y, truth = gaussian_instance(n=n,
18 |                                     p=p, 
19 |                                     s=s,
20 |                                     equicorrelated=False,
21 |                                     rho=0.5, 
22 |                                     sigma=sigma,
23 |                                     signal=signal,
24 |                                     random_signs=True,
25 |                                     scale=False)[:3]
26 | 
27 |    
28 |     dispersion = sigma**2
29 | 
30 |     S = X.T.dot(y)
31 |     covS = dispersion * X.T.dot(X)
32 |     splitting_sampler = split_sampler(X * y[:, None], covS)
33 | 
34 |     def meta_algorithm(XTX, XTXi, lam, sampler):
35 | 
36 |         p = XTX.shape[0]
37 |         success = np.zeros(p)
38 | 
39 |         loss = rr.quadratic_loss((p,), Q=XTX)
40 |         pen = rr.l1norm(p, lagrange=lam)
41 | 
42 |         scale = 0.
43 |         noisy_S = sampler(scale=scale)
44 |         loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
45 |         problem = rr.simple_problem(loss, pen)
46 |         soln = problem.solve(max_its=100, tol=1.e-10)
47 |         success += soln != 0
48 |         return set(np.nonzero(success)[0])
49 | 
50 |     XTX = X.T.dot(X)
51 |     XTXi = np.linalg.inv(XTX)
52 |     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
53 |     dispersion = np.linalg.norm(resid)**2 / (n-p)
54 |                          
55 |     lam = 4. * np.sqrt(n)
56 |     selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
57 | 
58 |     # run selection algorithm
59 | 
60 |     return full_model_inference(X,
61 |                                 y,
62 |                                 truth,
63 |                                 selection_algorithm,
64 |                                 splitting_sampler,
65 |                                 success_params=(1, 1),
66 |                                 B=B,
67 |                                 fit_probability=keras_fit,
68 |                                 fit_args={'epochs':10, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'})
69 | 
70 | if __name__ == "__main__":
71 |     import statsmodels.api as sm
72 |     import matplotlib.pyplot as plt
73 |     import pandas as pd
74 | 
75 |     for i in range(2000):
76 |         df = simulate(B=2000)
77 |         csvfile = 'lasso_multi.csv'
78 |         outbase = csvfile[:-4]
79 | 
80 |         if df is not None and i > 0:
81 | 
82 |             try: # concatenate to disk
83 |                 df = pd.concat([df, pd.read_csv(csvfile)])
84 |             except FileNotFoundError:
85 |                 pass
86 |             df.to_csv(csvfile, index=False)
87 | 
88 |             if len(df['pivot']) > 0:
89 |                 pivot_ax, length_ax = pivot_plot(df, outbase)
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/lasso_example_multi_CV.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm as ndist
 5 | 
 6 | import regreg.api as rr
 7 | 
 8 | from selection.tests.instance import gaussian_instance
 9 | 
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import split_sampler, keras_fit
12 | from selection.learning.Rutils import lasso_glmnet
13 | 
14 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000):
15 | 
16 |     # description of statistical problem
17 | 
18 |     X, y, truth = gaussian_instance(n=n,
19 |                                     p=p, 
20 |                                     s=s,
21 |                                     equicorrelated=False,
22 |                                     rho=0.5, 
23 |                                     sigma=sigma,
24 |                                     signal=signal,
25 |                                     random_signs=True,
26 |                                     scale=False)[:3]
27 | 
28 |     dispersion = sigma**2
29 | 
30 |     S = X.T.dot(y)
31 |     covS = dispersion * X.T.dot(X)
32 |     splitting_sampler = split_sampler(X * y[:, None], covS)
33 | 
34 |     def meta_algorithm(X, XTXi, resid, sampler):
35 | 
36 |         S = sampler(scale=0.) # deterministic with scale=0
37 |         ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X
38 |         G = lasso_glmnet(X, ynew, *[None]*4)
39 |         select = G.select()
40 |         return set(list(select[0]))
41 | 
42 |     XTX = X.T.dot(X)
43 |     XTXi = np.linalg.inv(XTX)
44 |     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
45 |     dispersion = np.linalg.norm(resid)**2 / (n-p)
46 |                          
47 |     selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid)
48 | 
49 |     # run selection algorithm
50 | 
51 |     return full_model_inference(X,
52 |                                 y,
53 |                                 truth,
54 |                                 selection_algorithm,
55 |                                 splitting_sampler,
56 |                                 success_params=(1, 1),
57 |                                 B=B,
58 |                                 fit_probability=keras_fit,
59 |                                 fit_args={'epochs':10, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'})
60 | 
61 | if __name__ == "__main__":
62 |     import statsmodels.api as sm
63 |     import matplotlib.pyplot as plt
64 |     import pandas as pd
65 | 
66 |     U = np.linspace(0, 1, 101)
67 |     plt.clf()
68 | 
69 |     for i in range(500):
70 |         df = simulate()
71 |         csvfile = 'lasso_multi_CV.csv'
72 |         outbase = csvfile[:-4]
73 | 
74 |         if df is not None:
75 | 
76 |             try:
77 |                 df = pd.concat([df, pd.read_csv(csvfile)])
78 |             except FileNotFoundError:
79 |                 pass
80 |             df.to_csv(csvfile, index=False)
81 | 
82 |             if len(df['pivot']) > 0:
83 |                 pivot_plot(df, outbase)
84 | 
85 | 


--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/lasso_example_multi_CV_stronger.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm as ndist
 5 | 
 6 | import regreg.api as rr
 7 | 
 8 | from selection.tests.instance import gaussian_instance
 9 | 
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import split_sampler, keras_fit
12 | from selection.learning.Rutils import cv_glmnet_lam, lasso_glmnet
13 | 
14 | def simulate(n=200, p=100, s=10, signal=(1.5, 2), sigma=2, alpha=0.1, B=3000):
15 | 
16 |     # description of statistical problem
17 | 
18 |     X, y, truth = gaussian_instance(n=n,
19 |                                     p=p, 
20 |                                     s=s,
21 |                                     equicorrelated=False,
22 |                                     rho=0.5, 
23 |                                     sigma=sigma,
24 |                                     signal=signal,
25 |                                     random_signs=True,
26 |                                     scale=False)[:3]
27 | 
28 |     dispersion = sigma**2
29 | 
30 |     S = X.T.dot(y)
31 |     covS = dispersion * X.T.dot(X)
32 |     smooth_sampler = normal_sampler(S, covS)
33 |     splitting_sampler = split_sampler(X * y[:, None], covS)
34 | 
35 |     def meta_algorithm(X, XTXi, resid, sampler):
36 | 
37 |         S = sampler(scale=0.) # deterministic with scale=0
38 |         ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X
39 |         G = lasso_glmnet(X, ynew, *[None]*4)
40 |         select = G.select()
41 |         return set(list(select[0]))
42 | 
43 |     XTX = X.T.dot(X)
44 |     XTXi = np.linalg.inv(XTX)
45 |     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
46 |     dispersion = np.linalg.norm(resid)**2 / (n-p)
47 |                          
48 |     selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid)
49 | 
50 |     # run selection algorithm
51 | 
52 |     return full_model_inference(X,
53 |                                 y,
54 |                                 truth,
55 |                                 selection_algorithm,
56 |                                 splitting_sampler,
57 |                                 success_params=(1, 1),
58 |                                 B=B,
59 |                                 fit_probability=keras_fit,
60 |                                 fit_args={'epochs':10, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'})
61 | 
62 | if __name__ == "__main__":
63 |     import statsmodels.api as sm
64 |     import matplotlib.pyplot as plt
65 |     import pandas as pd
66 | 
67 |     U = np.linspace(0, 1, 101)
68 |     plt.clf()
69 | 
70 |     for i in range(500):
71 |         df = simulate()
72 |         csvfile = 'lasso_multi_CV_stronger.csv'
73 |         outbase = csvfile[:-4]
74 | 
75 |         if df is not None and i > 0:
76 | 
77 |             try:
78 |                 df = pd.concat([df, pd.read_csv(csvfile)])
79 |             except FileNotFoundError:
80 |                 pass
81 |             df.to_csv(csvfile, index=False)
82 | 
83 |             if len(df['pivot']) > 0:
84 |                 pivot_plot(df, outbase)
85 | 


--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/lasso_example_multi_bigger.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm as ndist
 5 | 
 6 | import regreg.api as rr
 7 | 
 8 | from selection.tests.instance import gaussian_instance
 9 | 
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import split_sampler, keras_fit
12 | 
13 | def simulate(n=2000, p=1000, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=4000):
14 | 
15 |     # description of statistical problem
16 | 
17 |     X, y, truth = gaussian_instance(n=n,
18 |                                     p=p, 
19 |                                     s=s,
20 |                                     equicorrelated=False,
21 |                                     rho=0.5, 
22 |                                     sigma=sigma,
23 |                                     signal=signal,
24 |                                     random_signs=True,
25 |                                     scale=False)[:3]
26 | 
27 |     dispersion = sigma**2
28 | 
29 |     S = X.T.dot(y)
30 |     covS = dispersion * X.T.dot(X)
31 |     smooth_sampler = normal_sampler(S, covS)
32 |     splitting_sampler = split_sampler(X * y[:, None], covS)
33 | 
34 |     def meta_algorithm(XTX, XTXi, lam, sampler):
35 | 
36 |         p = XTX.shape[0]
37 |         success = np.zeros(p)
38 | 
39 |         loss = rr.quadratic_loss((p,), Q=XTX)
40 |         pen = rr.l1norm(p, lagrange=lam)
41 | 
42 |         scale = 0.
43 |         noisy_S = sampler(scale=scale)
44 |         loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
45 |         problem = rr.simple_problem(loss, pen)
46 |         soln = problem.solve(max_its=100, tol=1.e-10)
47 |         success += soln != 0
48 |         return set(np.nonzero(success)[0])
49 | 
50 |     XTX = X.T.dot(X)
51 |     XTXi = np.linalg.inv(XTX)
52 |     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
53 |     dispersion = np.linalg.norm(resid)**2 / (n-p)
54 |                          
55 |     lam = 5. * np.sqrt(n)
56 |     selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
57 | 
58 |     # run selection algorithm
59 | 
60 |     return full_model_inference(X,
61 |                                 y,
62 |                                 truth,
63 |                                 selection_algorithm,
64 |                                 splitting_sampler,
65 |                                 success_params=(1, 1),
66 |                                 B=B,
67 |                                 fit_probability=logit_fit,
68 |                                 fit_args={'df':20})
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     import statsmodels.api as sm
73 |     import matplotlib.pyplot as plt
74 |     import pandas as pd
75 | 
76 |     U = np.linspace(0, 1, 101)
77 |     plt.clf()
78 | 
79 |     for i in range(500):
80 |         df = simulate(B=4000)
81 |         csvfile = 'lasso_multi_bigger.csv'
82 |         outbase = csvfile[:-4]
83 | 
84 |         if df is not None and i > 0:
85 | 
86 |             try: # concatenate to disk
87 |                 df = pd.concat([df, pd.read_csv(csvfile)])
88 |             except FileNotFoundError:
89 |                 pass
90 |             df.to_csv(csvfile, index=False)
91 | 
92 |             if len(df['pivot']) > 0:
93 |                 pivot_ax, length_ax = pivot_plot(df, outbase)
94 | 


--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/lasso_example_multi_gbm.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm as ndist
 5 | 
 6 | import regreg.api as rr
 7 | 
 8 | from selection.tests.instance import gaussian_instance
 9 | 
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import split_sampler, gbm_fit
12 | 
13 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000):
14 | 
15 |     # description of statistical problem
16 | 
17 |     X, y, truth = gaussian_instance(n=n,
18 |                                     p=p, 
19 |                                     s=s,
20 |                                     equicorrelated=False,
21 |                                     rho=0.5, 
22 |                                     sigma=sigma,
23 |                                     signal=signal,
24 |                                     random_signs=True,
25 |                                     scale=False)[:3]
26 | 
27 |     dispersion = sigma**2
28 | 
29 |     S = X.T.dot(y)
30 |     covS = dispersion * X.T.dot(X)
31 |     splitting_sampler = split_sampler(X * y[:, None], covS)
32 | 
33 |     def meta_algorithm(XTX, XTXi, lam, sampler):
34 | 
35 |         p = XTX.shape[0]
36 |         success = np.zeros(p)
37 | 
38 |         loss = rr.quadratic_loss((p,), Q=XTX)
39 |         pen = rr.l1norm(p, lagrange=lam)
40 | 
41 |         scale = 0.
42 |         noisy_S = sampler(scale=scale)
43 |         loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
44 |         problem = rr.simple_problem(loss, pen)
45 |         soln = problem.solve(max_its=100, tol=1.e-10)
46 |         success += soln != 0
47 |         return set(np.nonzero(success)[0])
48 | 
49 |     XTX = X.T.dot(X)
50 |     XTXi = np.linalg.inv(XTX)
51 |     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
52 |     dispersion = np.linalg.norm(resid)**2 / (n-p)
53 |                          
54 |     lam = 4. * np.sqrt(n)
55 |     selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
56 | 
57 |     # run selection algorithm
58 | 
59 |     return full_model_inference(X,
60 |                                 y,
61 |                                 truth,
62 |                                 selection_algorithm,
63 |                                 splitting_sampler,
64 |                                 success_params=(1, 1),
65 |                                 B=B,
66 |                                 fit_probability=gbm_fit,
67 |                                 fit_args={'ntrees':5000})
68 | 
69 | 
70 | if __name__ == "__main__":
71 |     import statsmodels.api as sm
72 |     import matplotlib.pyplot as plt
73 |     import pandas as pd
74 | 
75 |     U = np.linspace(0, 1, 101)
76 |     plt.clf()
77 | 
78 |     for i in range(500):
79 |         df = simulate()
80 |         csvfile = 'lasso_multi_gbm.csv'
81 |         outbase = csvfile[:-4]
82 | 
83 |         if df is not None and i > 0:
84 | 
85 |             try: # concatenate to disk
86 |                 df = pd.concat([df, pd.read_csv(csvfile)])
87 |             except FileNotFoundError:
88 |                 pass
89 |             df.to_csv(csvfile, index=False)
90 | 
91 |             if len(df['pivot']) > 0:
92 |                 pivot_ax, length_ax = pivot_plot(df, outbase)
93 | 


--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/lasso_example_multi_gbm_sk.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm as ndist
 5 | 
 6 | import regreg.api as rr
 7 | 
 8 | from selection.tests.instance import gaussian_instance
 9 | 
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import split_sampler, gbm_fit
12 | 
13 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1):
14 | 
15 |     # description of statistical problem
16 | 
17 |     X, y, truth = gaussian_instance(n=n,
18 |                                     p=p, 
19 |                                     s=s,
20 |                                     equicorrelated=False,
21 |                                     rho=0.5, 
22 |                                     sigma=sigma,
23 |                                     signal=signal,
24 |                                     random_signs=True,
25 |                                     scale=False)[:3]
26 | 
27 |     dispersion = sigma**2
28 | 
29 |     S = X.T.dot(y)
30 |     covS = dispersion * X.T.dot(X)
31 |     splitting_sampler = split_sampler(X * y[:, None], covS)
32 | 
33 |     def meta_algorithm(XTX, XTXi, lam, sampler):
34 | 
35 |         p = XTX.shape[0]
36 |         success = np.zeros(p)
37 | 
38 |         loss = rr.quadratic_loss((p,), Q=XTX)
39 |         pen = rr.l1norm(p, lagrange=lam)
40 | 
41 |         scale = 0.
42 |         noisy_S = sampler(scale=scale)
43 |         loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
44 |         problem = rr.simple_problem(loss, pen)
45 |         soln = problem.solve(max_its=100, tol=1.e-10)
46 |         success += soln != 0
47 |         return set(np.nonzero(success)[0])
48 | 
49 |     XTX = X.T.dot(X)
50 |     XTXi = np.linalg.inv(XTX)
51 |     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
52 |     dispersion = np.linalg.norm(resid)**2 / (n-p)
53 |                          
54 |     lam = 4. * np.sqrt(n)
55 |     selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
56 | 
57 |     # run selection algorithm
58 | 
59 |     return full_model_inference(X,
60 |                                 y,
61 |                                 truth,
62 |                                 selection_algorithm,
63 |                                 splitting_sampler,
64 |                                 success_params=(1, 1),
65 |                                 B=B,
66 |                                 fit_probability=gbm_fit_sk,
67 |                                 fit_args={'n_estimators':1000})
68 | 
69 | 
70 | if __name__ == "__main__":
71 |     import statsmodels.api as sm
72 |     import matplotlib.pyplot as plt
73 |     import pandas as pd
74 | 
75 |     U = np.linspace(0, 1, 101)
76 |     plt.clf()
77 | 
78 |     for i in range(500):
79 |         df = simulate()
80 |         csvfile = 'lasso_multi_gbm_sk.csv'
81 |         outbase = csvfile[:-4]
82 | 
83 |         if df is not None and i > 0:
84 | 
85 |             try: # concatenate to disk
86 |                 df = pd.concat([df, pd.read_csv(csvfile)])
87 |             except FileNotFoundError:
88 |                 pass
89 |             df.to_csv(csvfile, index=False)
90 | 
91 |             if len(df['pivot']) > 0:
92 |                 pivot_ax, length_ax = pivot_plot(df, outbase)
93 | 


--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/lasso_example_multi_random.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm as ndist
 5 | 
 6 | import regreg.api as rr
 7 | 
 8 | from selection.tests.instance import gaussian_instance
 9 | 
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import normal_sampler, keras_fit
12 | 
13 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000):
14 | 
15 |     # description of statistical problem
16 | 
17 |     X, y, truth = gaussian_instance(n=n,
18 |                                     p=p, 
19 |                                     s=s,
20 |                                     equicorrelated=False,
21 |                                     rho=0.5, 
22 |                                     sigma=sigma,
23 |                                     signal=signal,
24 |                                     random_signs=True,
25 |                                     scale=False)[:3]
26 | 
27 |     dispersion = sigma**2
28 | 
29 |     S = X.T.dot(y)
30 |     covS = dispersion * X.T.dot(X)
31 |     smooth_sampler = normal_sampler(S, covS)
32 | 
33 |     def meta_algorithm(XTX, XTXi, lam, sampler):
34 | 
35 |         p = XTX.shape[0]
36 |         success = np.zeros(p)
37 | 
38 |         loss = rr.quadratic_loss((p,), Q=XTX)
39 |         pen = rr.l1norm(p, lagrange=lam)
40 | 
41 |         scale = 0.5
42 |         noisy_S = sampler(scale=scale)
43 |         loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
44 |         problem = rr.simple_problem(loss, pen)
45 |         soln = problem.solve(max_its=100, tol=1.e-10)
46 |         success += soln != 0
47 |         return set(np.nonzero(success)[0])
48 | 
49 |     XTX = X.T.dot(X)
50 |     XTXi = np.linalg.inv(XTX)
51 |     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
52 |     dispersion = np.linalg.norm(resid)**2 / (n-p)
53 |                          
54 |     lam = 4. * np.sqrt(n)
55 |     selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
56 | 
57 |     # run selection algorithm
58 | 
59 |     return full_model_inference(X,
60 |                                 y,
61 |                                 truth,
62 |                                 selection_algorithm,
63 |                                 smooth_sampler,
64 |                                 success_params=(1, 1),
65 |                                 B=B,
66 |                                 fit_probability=keras_fit,
67 |                                 fit_args={'epochs':20, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'})
68 | 
69 | 
70 | if __name__ == "__main__":
71 |     import statsmodels.api as sm
72 |     import matplotlib.pyplot as plt
73 |     import pandas as pd
74 | 
75 |     U = np.linspace(0, 1, 101)
76 |     plt.clf()
77 | 
78 |     for i in range(500):
79 |         df = simulate()
80 |         csvfile = 'lasso_multi_random.csv'
81 |         outbase = csvfile[:-4]
82 | 
83 |         if df is not None and i > 0:
84 | 
85 |             try: # concatenate to disk
86 |                 df = pd.concat([df, pd.read_csv(csvfile)])
87 |             except FileNotFoundError:
88 |                 pass
89 |             df.to_csv(csvfile, index=False)
90 | 
91 |             if len(df['pivot']) > 0:
92 |                 pivot_ax, length_ax = pivot_plot(df, outbase)
93 | 


--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/lasso_example_multi_random_gbm.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm as ndist
 5 | 
 6 | import regreg.api as rr
 7 | 
 8 | from selection.tests.instance import gaussian_instance
 9 | 
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import normal_sampler, gbm_fit
12 | 
13 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000):
14 | 
15 |     # description of statistical problem
16 | 
17 |     X, y, truth = gaussian_instance(n=n,
18 |                                     p=p, 
19 |                                     s=s,
20 |                                     equicorrelated=False,
21 |                                     rho=0.5, 
22 |                                     sigma=sigma,
23 |                                     signal=signal,
24 |                                     random_signs=True,
25 |                                     scale=False)[:3]
26 | 
27 |     dispersion = sigma**2
28 | 
29 |     S = X.T.dot(y)
30 |     covS = dispersion * X.T.dot(X)
31 |     smooth_sampler = normal_sampler(S, covS)
32 |     splitting_sampler = split_sampler(X * y[:, None], covS)
33 | 
34 |     def meta_algorithm(XTX, XTXi, lam, sampler):
35 | 
36 |         p = XTX.shape[0]
37 |         success = np.zeros(p)
38 | 
39 |         loss = rr.quadratic_loss((p,), Q=XTX)
40 |         pen = rr.l1norm(p, lagrange=lam)
41 | 
42 |         scale = 0.5
43 |         noisy_S = sampler(scale=scale)
44 |         loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
45 |         problem = rr.simple_problem(loss, pen)
46 |         soln = problem.solve(max_its=100, tol=1.e-10)
47 |         success += soln != 0
48 |         return set(np.nonzero(success)[0])
49 | 
50 |     XTX = X.T.dot(X)
51 |     XTXi = np.linalg.inv(XTX)
52 |     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
53 |     dispersion = np.linalg.norm(resid)**2 / (n-p)
54 |                          
55 |     lam = 4. * np.sqrt(n)
56 |     selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
57 | 
58 |     # run selection algorithm
59 | 
60 |     return full_model_inference(X,
61 |                                 y,
62 |                                 truth,
63 |                                 selection_algorithm,
64 |                                 smooth_sampler,
65 |                                 success_params=(1, 1),
66 |                                 B=B,
67 |                                 fit_probability=gbm_fit,
68 |                                 fit_args={'ntrees':5000})
69 | 
70 | if __name__ == "__main__":
71 |     import statsmodels.api as sm
72 |     import matplotlib.pyplot as plt
73 |     import pandas as pd
74 | 
75 |     U = np.linspace(0, 1, 101)
76 |     plt.clf()
77 | 
78 |     for i in range(500):
79 |         df = simulate()
80 |         csvfile = 'lasso_multi_random_gbm.csv'
81 |         outbase = csvfile[:-4]
82 | 
83 |         if df is not None and i > 0:
84 | 
85 |             try: # concatenate to disk
86 |                 df = pd.concat([df, pd.read_csv(csvfile)])
87 |             except FileNotFoundError:
88 |                 pass
89 |             df.to_csv(csvfile, index=False)
90 | 
91 |             if len(df['pivot']) > 0:
92 |                 pivot_ax, length_ax = pivot_plot(df, outbase)
93 | 


--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/lasso_example_multi_random_rf.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm as ndist
 5 | 
 6 | import regreg.api as rr
 7 | 
 8 | from selection.tests.instance import gaussian_instance
 9 | 
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import normal_sampler, keras_fit
12 | 
13 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000):
14 | 
15 |     # description of statistical problem
16 | 
17 |     X, y, truth = gaussian_instance(n=n,
18 |                                     p=p, 
19 |                                     s=s,
20 |                                     equicorrelated=False,
21 |                                     rho=0.5, 
22 |                                     sigma=sigma,
23 |                                     signal=signal,
24 |                                     random_signs=True,
25 |                                     scale=False)[:3]
26 | 
27 |     dispersion = sigma**2
28 | 
29 |     S = X.T.dot(y)
30 |     covS = dispersion * X.T.dot(X)
31 |     smooth_sampler = normal_sampler(S, covS)
32 |     splitting_sampler = split_sampler(X * y[:, None], covS)
33 | 
34 |     def meta_algorithm(XTX, XTXi, lam, sampler):
35 | 
36 |         p = XTX.shape[0]
37 |         success = np.zeros(p)
38 | 
39 |         loss = rr.quadratic_loss((p,), Q=XTX)
40 |         pen = rr.l1norm(p, lagrange=lam)
41 | 
42 |         scale = 0.5
43 |         noisy_S = sampler(scale=scale)
44 |         loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
45 |         problem = rr.simple_problem(loss, pen)
46 |         soln = problem.solve(max_its=100, tol=1.e-10)
47 |         success += soln != 0
48 |         return set(np.nonzero(success)[0])
49 | 
50 |     XTX = X.T.dot(X)
51 |     XTXi = np.linalg.inv(XTX)
52 |     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
53 |     dispersion = np.linalg.norm(resid)**2 / (n-p)
54 |                          
55 |     lam = 4. * np.sqrt(n)
56 |     selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
57 | 
58 |     # run selection algorithm
59 | 
60 |     return full_model_inference(X,
61 |                                 y,
62 |                                 truth,
63 |                                 selection_algorithm,
64 |                                 smooth_sampler,
65 |                                 success_params=(1, 1),
66 |                                 B=B,
67 |                                 fit_probability=random_forest_fit,
68 |                                 fit_args={'ntrees':5000})
69 | 
70 | if __name__ == "__main__":
71 |     import statsmodels.api as sm
72 |     import matplotlib.pyplot as plt
73 |     import pandas as pd
74 | 
75 |     U = np.linspace(0, 1, 101)
76 |     plt.clf()
77 | 
78 |     for i in range(500):
79 |         df = simulate()
80 |         csvfile = 'lasso_multi_random_rf.csv'
81 |         outbase = csvfile[:-4]
82 | 
83 |         if df is not None and i > 0:
84 | 
85 |             try: # concatenate to disk
86 |                 df = pd.concat([df, pd.read_csv(csvfile)])
87 |             except FileNotFoundError:
88 |                 pass
89 |             df.to_csv(csvfile, index=False)
90 | 
91 |             if len(df['pivot']) > 0:
92 |                 pivot_ax, length_ax = pivot_plot(df, outbase)
93 | 


--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/lasso_example_multi_rf_sk.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm as ndist
 5 | 
 6 | import regreg.api as rr
 7 | 
 8 | from selection.tests.instance import gaussian_instance
 9 | 
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import split_sampler, random_forest_fit_sk
12 | 
13 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1):
14 | 
15 |     # description of statistical problem
16 | 
17 |     X, y, truth = gaussian_instance(n=n,
18 |                                     p=p, 
19 |                                     s=s,
20 |                                     equicorrelated=False,
21 |                                     rho=0.5, 
22 |                                     sigma=sigma,
23 |                                     signal=signal,
24 |                                     random_signs=True,
25 |                                     scale=False)[:3]
26 | 
27 |     dispersion = sigma**2
28 | 
29 |     S = X.T.dot(y)
30 |     covS = dispersion * X.T.dot(X)
31 |     smooth_sampler = normal_sampler(S, covS)
32 |     splitting_sampler = split_sampler(X * y[:, None], covS)
33 | 
34 |     def meta_algorithm(XTX, XTXi, lam, sampler):
35 | 
36 |         p = XTX.shape[0]
37 |         success = np.zeros(p)
38 | 
39 |         loss = rr.quadratic_loss((p,), Q=XTX)
40 |         pen = rr.l1norm(p, lagrange=lam)
41 | 
42 |         scale = 0.
43 |         noisy_S = sampler(scale=scale)
44 |         loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
45 |         problem = rr.simple_problem(loss, pen)
46 |         soln = problem.solve(max_its=100, tol=1.e-10)
47 |         success += soln != 0
48 |         return set(np.nonzero(success)[0])
49 | 
50 |     XTX = X.T.dot(X)
51 |     XTXi = np.linalg.inv(XTX)
52 |     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
53 |     dispersion = np.linalg.norm(resid)**2 / (n-p)
54 |                          
55 |     lam = 4. * np.sqrt(n)
56 |     selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
57 | 
58 |     # run selection algorithm
59 | 
60 |     # run selection algorithm
61 | 
62 |     return full_model_inference(X,
63 |                                 y,
64 |                                 truth,
65 |                                 selection_algorithm,
66 |                                 splitting_sampler,
67 |                                 success_params=(1, 1),
68 |                                 B=B,
69 |                                 fit_probability=random_forest_fit_sk,
70 |                                 fit_args={'n_estimators':5000})
71 | 
72 | 
73 | if __name__ == "__main__":
74 |     import statsmodels.api as sm
75 |     import matplotlib.pyplot as plt
76 |     import pandas as pd
77 | 
78 |     U = np.linspace(0, 1, 101)
79 |     plt.clf()
80 | 
81 |     for i in range(500):
82 |         df = simulate()
83 |         csvfile = 'lasso_multi_rf_sk.csv'
84 |         outbase = csvfile[:-4]
85 | 
86 |         if df is not None and i > 0:
87 | 
88 |             try:
89 |                 df = pd.concat([df, pd.read_csv(csvfile)])
90 |             except FileNotFoundError:
91 |                 pass
92 |             df.to_csv(csvfile, index=False)
93 | 
94 |             if len(df['pivot']) > 0:
95 |                 pivot_plot(df, outbase)
96 | 
97 | 


--------------------------------------------------------------------------------
/doc/learning_examples/standalone/cleaner_basic_example.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from selection.learning.core import (infer_general_target,
 4 |                                   normal_sampler,
 5 |                                   logit_fit,
 6 |                                   probit_fit)
 7 | 
 8 | def simulate(n=100):
 9 | 
10 |     # description of statistical problem
11 | 
12 |     truth = np.array([2. , -2.]) / np.sqrt(n)
13 | 
14 |     data = np.random.standard_normal((n, 2)) + np.multiply.outer(np.ones(n), truth) 
15 |     S = np.mean(data, 0)
16 |     observed_sampler = normal_sampler(S, 1/n * np.identity(2))   
17 | 
18 |     def selection_algorithm(sampler):
19 |         min_success = 1
20 |         ntries = 3
21 |         success = 0
22 |         for _ in range(ntries):
23 |             noisyS = sampler(scale=0.5)
24 |             success += noisyS.sum() > 0.2 / np.sqrt(n)
25 |         return success >= min_success
26 | 
27 |     # run selection algorithm
28 | 
29 |     observed_outcome = selection_algorithm(observed_sampler)
30 | 
31 |     # find the target, based on the observed outcome
32 | 
33 |     if observed_outcome: # target is truth[0]
34 |         (true_target, 
35 |          observed_target, 
36 |          target_cov, 
37 |          cross_cov) = (truth[0], 
38 |                        S[0], 
39 |                        1./n * np.identity(1), 
40 |                        np.array([1., 0.]).reshape((2,1)) / n)
41 |     else:
42 |         (true_target, 
43 |          observed_target, 
44 |          target_cov, 
45 |          cross_cov) = (truth[1], 
46 |                        S[1], 
47 |                        1./n * np.identity(1), 
48 |                        np.array([0., 1.]).reshape((2,1)) / n)
49 | 
50 |     pivot, interval = infer_general_target(selection_algorithm,
51 |                                            observed_outcome,
52 |                                            observed_sampler,
53 |                                            observed_target,
54 |                                            cross_cov,
55 |                                            target_cov,
56 |                                            hypothesis=true_target,
57 |                                            fit_probability=probit_fit)[:2]
58 | 
59 |     return pivot, (interval[0] < true_target) * (interval[1] > true_target), interval[1] - interval[0]
60 | 
61 | if __name__ == "__main__":
62 |     import statsmodels.api as sm
63 |     import matplotlib.pyplot as plt
64 |     
65 |     n = 100
66 |     U = np.linspace(0, 1, 101)
67 |     P, L = [], []
68 |     plt.clf()
69 |     coverage = 0
70 |     for i in range(300):
71 |         p, cover, l = simulate(n=n)
72 |         coverage += cover
73 |         P.append(p)
74 |         L.append(l)
75 |         print(np.mean(P), np.std(P), np.mean(L) / (2 * 1.65 / np.sqrt(n)), coverage / (i+1))
76 | 
77 |     plt.clf()
78 |     plt.plot(U, sm.distributions.ECDF(P)(U), 'r', linewidth=3)
79 |     plt.plot([0,1], [0,1], 'k--', linewidth=2)
80 |     plt.show()
81 | 


--------------------------------------------------------------------------------
/doc/learning_examples/standalone/full_model_example.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from selection.learning.core import (infer_full_target,
 3 |                                   normal_sampler,
 4 |                                   logit_fit,
 5 |                                   probit_fit)
 6 | 
 7 | def simulate(n=100):
 8 | 
 9 |     # description of statistical problem
10 | 
11 |     truth = np.array([2. , -2.]) / np.sqrt(n)
12 | 
13 |     dispersion = 2
14 |     data = np.sqrt(dispersion) * np.random.standard_normal((n, 2)) + np.multiply.outer(np.ones(n), truth) 
15 |     S = np.sum(data, 0)
16 |     observed_sampler = normal_sampler(S, dispersion * n * np.identity(2))   
17 | 
18 |     def selection_algorithm(sampler):
19 |         min_success = 1
20 |         ntries = 3
21 |         success = 0
22 |         for _ in range(ntries):
23 |             noisyS = sampler(scale=0.5)
24 |             success += noisyS.sum() > 0.2 * np.sqrt(n) * np.sqrt(dispersion)
25 |         if success >= min_success:
26 |             return set([1, 0])
27 |         return set([1])
28 | 
29 |     # run selection algorithm
30 | 
31 |     observed_set = selection_algorithm(observed_sampler)
32 | 
33 |     # find the target, based on the observed outcome
34 | 
35 |     # we just take the first target  
36 | 
37 |     pivots, covered, lengths = [], [], []
38 |     for idx in observed_set:
39 |         true_target = truth[idx]
40 | 
41 |         pivot, interval = infer_full_target(selection_algorithm,
42 |                                             observed_set,
43 |                                             [idx],
44 |                                             observed_sampler,
45 |                                             dispersion,
46 |                                             hypothesis=[true_target],
47 |                                             fit_probability=probit_fit)[0][:2]
48 | 
49 |         pivots.append(pivot)
50 |         covered.append((interval[0] < true_target) * (interval[1] > true_target))
51 |         lengths.append(interval[1] - interval[0])
52 | 
53 |     return pivots, covered, lengths
54 | 
55 | if __name__ == "__main__":
56 |     import statsmodels.api as sm
57 |     import matplotlib.pyplot as plt
58 |     
59 |     n = 100
60 |     U = np.linspace(0, 1, 101)
61 |     P, L, coverage = [], [], []
62 |     plt.clf()
63 |     for i in range(300):
64 |         p, cover, l = simulate(n=n)
65 |         coverage.extend(cover)
66 |         P.extend(p)
67 |         L.extend(l)
68 |         print(np.mean(P), np.std(P), np.mean(L) / (2 * 1.65 / np.sqrt(n)), np.mean(coverage))
69 | 
70 |     plt.clf()
71 |     plt.plot(U, sm.distributions.ECDF(P)(U), 'r', linewidth=3)
72 |     plt.plot([0,1], [0,1], 'k--', linewidth=2)
73 |     plt.show()
74 | 


--------------------------------------------------------------------------------
/doc/license.rst:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015, Selective Inference development team
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are
 6 | met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright
 9 |        notice, this list of conditions and the following disclaimer.
10 | 
11 |     * Redistributions in binary form must reproduce the above
12 |        copyright notice, this list of conditions and the following
13 |        disclaimer in the documentation and/or other materials provided
14 |        with the distribution.
15 | 
16 |     * The names of any contributors to this software
17 |        may not be used to endorse or promote products derived
18 |        from this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/doc/notebooks/learning/simple_example_pivots.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/notebooks/learning/simple_example_pivots.pdf


--------------------------------------------------------------------------------
/doc/notebooks/learning/simple_example_sel_prob.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/notebooks/learning/simple_example_sel_prob.pdf


--------------------------------------------------------------------------------
/doc/source/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/source/_static/logo.png


--------------------------------------------------------------------------------
/doc/source/_templates/layout.html:
--------------------------------------------------------------------------------
 1 | {% extends "!layout.html" %}
 2 | {% set title = 'Selection' %}
 3 | 
 4 | {% block rootrellink %}
 5 |   <li><a href="{{pathto('index')}}">Selection home</a> |&nbsp;</li>
 6 | {% endblock %}
 7 | 
 8 | 
 9 | {% block extrahead %}
10 | 
11 | {% endblock %}
12 | 
13 | {% block header %}
14 | <div style="background-color: white; text-align: left; padding: 10px 10px 15px 15px">
15 |  <a href="{{pathto('index') }}">
16 |   <img src="{{ pathto("_static/logo.png", 1) }}" height="200" width="200" alt="Selection logo"  border="0" /><h1>Post-selection inference</h1>
17 | </div>
18 | {% endblock %}
19 | 
20 | {# This block gets put at the top of the sidebar #}
21 | {% block sidebarlogo %}
22 | {% endblock %}
23 |   
24 | <h4> Site Navigation </h4>
25 |   <ul>
26 |     <li><a href="{{pathto('documentation')}}">Documentation</a></li>
27 |     <li><a href="{{pathto('devel/index')}}">Development</a></li>
28 |   </ul>
29 | 
30 | {# I had to copy the whole search block just to change the rendered text,
31 |    so it doesn't mention modules or classes #}
32 | {%- block sidebarsearch %}
33 | {%- if pagename != "search" %}
34 | 
35 | <div id="searchbox-site" style="display: none">
36 |   <h3>{{ _('Search this site') }}</h3>
37 |     <form class="search" action="{{ pathto('search') }}" method="get">
38 |       <input type="text" name="q" size="13" />
39 |       <input type="submit" value="{{ _('Go') }}" />
40 |       <input type="hidden" name="check_keywords" value="yes" />
41 |       <input type="hidden" name="area" value="default" />
42 |     </form>
43 |     <p class="searchtip" style="font-size: 90%">
44 |     </p>
45 | </div>
46 | <script type="text/javascript">$('#searchbox-ml').show(0);</script>
47 | <script type="text/javascript">$('#searchbox-site').show(0);</script>
48 | {%- endif %}
49 | 
50 | {# The sidebarsearch block is the last one available in the default sidebar()
51 |    macro, so the only way to add something to the bottom of the sidebar is to
52 |    put it here, at the end of the sidebarsearch block (before it closes).
53 |    #}
54 | 
55 | {%- endblock %}
56 | 


--------------------------------------------------------------------------------
/doc/source/algorithms/index.rst:
--------------------------------------------------------------------------------
 1 | =========================
 2 | Non-randomized algorithms
 3 | =========================
 4 | 
 5 | This is a project that collects various tools for
 6 | post-selection inference.
 7 | 
 8 | .. toctree:: 
 9 |    :maxdepth: 2
10 | 
11 |    covtest.ipynb
12 |    spacings
13 | 


--------------------------------------------------------------------------------
/doc/source/algorithms/spacings_files/spacings_23_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/source/algorithms/spacings_files/spacings_23_0.png


--------------------------------------------------------------------------------
/doc/source/algorithms/spacings_files/spacings_25_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/source/algorithms/spacings_files/spacings_25_0.png


--------------------------------------------------------------------------------
/doc/source/algorithms/spacings_files/spacings_27_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/source/algorithms/spacings_files/spacings_27_0.png


--------------------------------------------------------------------------------
/doc/source/algorithms/spacings_files/spacings_29_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/source/algorithms/spacings_files/spacings_29_0.png


--------------------------------------------------------------------------------
/doc/source/algorithms/spacings_files/spacings_31_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/source/algorithms/spacings_files/spacings_31_0.png


--------------------------------------------------------------------------------
/doc/source/algorithms/spacings_files/spacings_3_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/source/algorithms/spacings_files/spacings_3_0.png


--------------------------------------------------------------------------------
/doc/source/algorithms/spacings_files/spacings_4_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/source/algorithms/spacings_files/spacings_4_0.png


--------------------------------------------------------------------------------
/doc/source/algorithms/spacings_files/spacings_5_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/source/algorithms/spacings_files/spacings_5_0.png


--------------------------------------------------------------------------------
/doc/source/algorithms/spacings_files/spacings_6_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/source/algorithms/spacings_files/spacings_6_0.png


--------------------------------------------------------------------------------
/doc/source/algorithms/spacings_files/spacings_7_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/source/algorithms/spacings_files/spacings_7_0.png


--------------------------------------------------------------------------------
/doc/source/algorithms/spacings_files/spacings_9_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/source/algorithms/spacings_files/spacings_9_0.png


--------------------------------------------------------------------------------
/doc/source/docattribute.rst:
--------------------------------------------------------------------------------
1 | .. _doc-attribute:
2 | 
3 | Selection documentation attribution
4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 | 
6 | This website is based on the `NIPY project website <http://nipy.sourceforge.net/nipy/stable/index.html>`_, which is licensed under a `Creative Commons Attribution 3.0 License <http://creativecommons.org/licenses/by/3.0/>`_.
7 | 
8 | We have licensed our own documention using the same license, see :ref:`selectinf-license`.


--------------------------------------------------------------------------------
/doc/source/documentation.rst:
--------------------------------------------------------------------------------
 1 | .. _documentation-main:
 2 | 
 3 | =============
 4 | Documentation
 5 | =============
 6 | 
 7 | .. only:: html
 8 | 
 9 |    :Release: |version|
10 |    :Date: |today|
11 | 
12 |    Download `PDF <selectinf.pdf>`_
13 | 
14 |    Contents:
15 | 
16 | .. toctree::
17 |    :maxdepth: 1
18 |    :glob:
19 | 
20 |    download.rst
21 |    license.rst
22 |    api/index.rst
23 |    docattribute.rst
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/doc/source/download.rst:
--------------------------------------------------------------------------------
 1 | .. _download:
 2 | 
 3 | Downloading and installing the code
 4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 5 | 
 6 | The post-selection inference source code is hosted at 
 7 | 
 8 | http://github.com/selective-inference/Python-software
 9 | 
10 | Selection depends on the following Python tools
11 | 
12 | * `NumPy <http://numpy.scipy.org>`_
13 | 
14 | * `SciPy <http://www.scipy.org>`_
15 | 
16 | * `Cython <http://www.cython.org>`_
17 | 
18 | * `Pandas <http://www.pandas.org>`_
19 | 
20 | You can clone the selection repo using::
21 | 
22 |      git clone https://github.com/selective-inference/Python-software.git
23 | 
24 | Then installation is a simple call to python::
25 | 
26 |      cd selection
27 |      git submodule update --init
28 |      pip install -r requirements.txt
29 |      python setup.py install --prefix=MYDIR
30 | 
31 | where MYDIR is a site-packages directory you can write to. This
32 | directory will need to be on your PYTHONPATH for you to import
33 | `selectinf`. That's it!
34 | 
35 | Testing your installation
36 | -------------------------
37 | 
38 | There is a small but growing suite of tests that be easily checked using `nose <http://somethingaboutorange.com/mrl/projects/nose/1.0.0/>`_::
39 | 
40 |      mkdir tmp
41 |      cd tmp
42 |      nosetests -v selectinf
43 | 
44 | 


--------------------------------------------------------------------------------
/doc/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. _about_selection:
 2 | 
 3 | =====================
 4 | The Selection project
 5 | =====================
 6 | 
 7 | .. include:: ./links_names.txt
 8 | 
 9 | This is a project that collects various tools for
10 | post-selection inference.
11 | 
12 | 
13 | .. toctree:: 
14 |    :maxdepth: 2
15 | 
16 |    documentation
17 |    algorithms/index
18 |    randomized/index
19 |    learning/index
20 | 
21 | 
22 | Jonathan Taylor was funded by NSF in writing their portion of the
23 | software. As such, this material is based upon work supported by the
24 | National Science Foundation under Grant DMS 1208857, and by the AFOSR
25 | grant 113039.
26 | 
27 | Any opinions, findings, and conclusions or recommendations expressed
28 | in this material are those of the author(s) and do not necessarily
29 | reflect the views of the National Science Foundation.
30 | 
31 | .. include:: ../links_names.txt


--------------------------------------------------------------------------------
/doc/source/learning/Learning1.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | jupyter:
 3 |   jupytext:
 4 |     cell_metadata_filter: all,-slideshow
 5 |     formats: ipynb,Rmd
 6 |     text_representation:
 7 |       extension: .Rmd
 8 |       format_name: rmarkdown
 9 |       format_version: '1.1'
10 |       jupytext_version: 1.1.1
11 |   kernelspec:
12 |     display_name: Python 3
13 |     language: python
14 |     name: python3
15 | ---
16 | 
17 | # Learning 1
18 | 
19 | ```{python}
20 | import numpy as np
21 | print('notebook 1')
22 | ```
23 | 
24 | ```{python collapsed=TRUE}
25 | 
26 | ```
27 | 


--------------------------------------------------------------------------------
/doc/source/learning/Learning1.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Learning 1"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": 1,
13 |    "metadata": {},
14 |    "outputs": [
15 |     {
16 |      "name": "stdout",
17 |      "output_type": "stream",
18 |      "text": [
19 |       "notebook 1\n"
20 |      ]
21 |     }
22 |    ],
23 |    "source": [
24 |     "import numpy as np\n",
25 |     "print('notebook 1')"
26 |    ]
27 |   },
28 |   {
29 |    "cell_type": "code",
30 |    "execution_count": null,
31 |    "metadata": {
32 |     "collapsed": true
33 |    },
34 |    "outputs": [],
35 |    "source": []
36 |   }
37 |  ],
38 |  "metadata": {
39 |   "jupytext": {
40 |    "cell_metadata_filter": "all,-slideshow",
41 |    "formats": "ipynb,Rmd"
42 |   },
43 |   "kernelspec": {
44 |    "display_name": "Python 3",
45 |    "language": "python",
46 |    "name": "python3"
47 |   },
48 |   "language_info": {
49 |    "codemirror_mode": {
50 |     "name": "ipython",
51 |     "version": 3
52 |    },
53 |    "file_extension": ".py",
54 |    "mimetype": "text/x-python",
55 |    "name": "python",
56 |    "nbconvert_exporter": "python",
57 |    "pygments_lexer": "ipython3",
58 |    "version": "3.6.2"
59 |   }
60 |  },
61 |  "nbformat": 4,
62 |  "nbformat_minor": 2
63 | }
64 | 


--------------------------------------------------------------------------------
/doc/source/learning/Learning2.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | jupyter:
 3 |   jupytext:
 4 |     cell_metadata_filter: all,-slideshow
 5 |     formats: ipynb,Rmd
 6 |     text_representation:
 7 |       extension: .Rmd
 8 |       format_name: rmarkdown
 9 |       format_version: '1.1'
10 |       jupytext_version: 1.1.1
11 |   kernelspec:
12 |     display_name: Python 3
13 |     language: python
14 |     name: python3
15 | ---
16 | 
17 | # Learning 2
18 | 
19 | ```{python}
20 | import numpy as np
21 | print('notebook 2')
22 | ```
23 | 
24 | ```{python collapsed=TRUE}
25 | 
26 | ```
27 | 


--------------------------------------------------------------------------------
/doc/source/learning/Learning2.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Learning 2"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": 1,
13 |    "metadata": {},
14 |    "outputs": [
15 |     {
16 |      "name": "stdout",
17 |      "output_type": "stream",
18 |      "text": [
19 |       "notebook 2\n"
20 |      ]
21 |     }
22 |    ],
23 |    "source": [
24 |     "import numpy as np\n",
25 |     "print('notebook 2')"
26 |    ]
27 |   },
28 |   {
29 |    "cell_type": "code",
30 |    "execution_count": null,
31 |    "metadata": {
32 |     "collapsed": true
33 |    },
34 |    "outputs": [],
35 |    "source": []
36 |   }
37 |  ],
38 |  "metadata": {
39 |   "jupytext": {
40 |    "cell_metadata_filter": "all,-slideshow",
41 |    "formats": "ipynb,Rmd"
42 |   },
43 |   "kernelspec": {
44 |    "display_name": "Python 3",
45 |    "language": "python",
46 |    "name": "python3"
47 |   },
48 |   "language_info": {
49 |    "codemirror_mode": {
50 |     "name": "ipython",
51 |     "version": 3
52 |    },
53 |    "file_extension": ".py",
54 |    "mimetype": "text/x-python",
55 |    "name": "python",
56 |    "nbconvert_exporter": "python",
57 |    "pygments_lexer": "ipython3",
58 |    "version": "3.6.2"
59 |   }
60 |  },
61 |  "nbformat": 4,
62 |  "nbformat_minor": 2
63 | }
64 | 


--------------------------------------------------------------------------------
/doc/source/learning/index.rst:
--------------------------------------------------------------------------------
 1 | Learning selection
 2 | ------------------
 3 | 
 4 | This package illustrates examples in  `Inference after selection through a block box <https://arxiv.org/abs/1901.09973>`_
 5 | as well as generalizations based on learning multiparameter functions rather than the simple univariate
 6 | case considered above.
 7 | 
 8 | .. toctree:: 
 9 |    :maxdepth: 2
10 | 
11 |    Learning1.ipynb
12 |    Learning2.ipynb


--------------------------------------------------------------------------------
/doc/source/license.rst:
--------------------------------------------------------------------------------
 1 | .. _selectinf-license:
 2 | 
 3 | =======================================
 4 | Selective Inference License Information
 5 | =======================================
 6 | 
 7 | .. _selectinf-software-license:
 8 | 
 9 | Software License
10 | -----------------
11 | 
12 | Except where otherwise noted, all `selective-inference <https://github.com/selective-inference>`_ software is licensed under a
13 | `revised BSD license <http://www.opensource.org/licenses/bsd-license.php>`_.
14 | 
15 | .. _selectinf-documentation-license:
16 | 
17 | Documentation License
18 | ---------------------
19 | 
20 | Except where otherwise noted, all `selective-inference <https://github.com/selective-inference>`_ documentation is licensed under a
21 | `Creative Commons Attribution 3.0 License <http://creativecommons.org/licenses/by/3.0/>`_.
22 | 
23 | All code fragments in the documentation are licensed under our
24 | software license.
25 | 


--------------------------------------------------------------------------------
/doc/source/links_names.txt:
--------------------------------------------------------------------------------
 1 | .. This (-*- rst -*-) format file contains commonly used link targets
 2 |    and name substitutions.  It may be included in many files,
 3 |    therefore it should only contain link targets and name
 4 |    substitutions.  Try grepping for "^\.\. _" to find plausible
 5 |    candidates for this list.  
 6 | 
 7 | .. NOTE: reST targets are
 8 |    __not_case_sensitive__, so only one target definition is needed for
 9 |    nipy, NIPY, Nipy, etc...
10 | 
11 | .. Post selection papers
12 | .. _covtest: http://arxiv.org/abs/1301.7161
13 | 
14 | .. Documentation tools
15 | .. _graphviz: http://www.graphviz.org/
16 | .. _Sphinx: http://sphinx.pocoo.org/
17 | .. _`Sphinx reST`: http://sphinx.pocoo.org/rest.html
18 | .. _reST: http://docutils.sourceforge.net/rst.html
19 | .. _docutils: http://docutils.sourceforge.net
20 | 
21 | .. Licenses
22 | .. _GPL: http://www.gnu.org/licenses/gpl.html
23 | .. _BSD: http://www.opensource.org/licenses/bsd-license.php
24 | .. _LGPL: http://www.gnu.org/copyleft/lesser.html
25 | .. _MIT License: http://www.opensource.org/licenses/mit-license.php
26 | 
27 | .. Working process
28 | .. _sourceforge: http://nipy.sourceforge.net/
29 | .. _github: http://github.com
30 | 
31 | .. Code support stuff
32 | .. _pychecker: http://pychecker.sourceforge.net/
33 | .. _pylint: http://www.logilab.org/project/pylint
34 | .. _pyflakes: http://divmod.org/trac/wiki/DivmodPyflakes
35 | .. _virtualenv: http://pypi.python.org/pypi/virtualenv
36 | .. _git: http://git.or.cz/
37 | .. _flymake: http://flymake.sourceforge.net/
38 | .. _rope: http://rope.sourceforge.net/
39 | .. _pymacs: http://pymacs.progiciels-bpi.ca/pymacs.html
40 | .. _ropemacs: http://rope.sourceforge.net/ropemacs.html
41 | .. _ECB: http://ecb.sourceforge.net/
42 | .. _emacs_python_mode: http://www.emacswiki.org/cgi-bin/wiki/PythonMode
43 | .. _doctest-mode: http://www.cis.upenn.edu/~edloper/projects/doctestmode/
44 | .. _nose: http://somethingaboutorange.com/mrl/projects/nose
45 | .. _`python coverage tester`: http://nedbatchelder.com/code/modules/coverage.html
46 | 
47 | .. Other python projects
48 | .. _numpy: http://www.scipy.org/NumPy
49 | .. _scipy: http://www.scipy.org
50 | .. _cython: http://www.cython.org/
51 | .. _ipython: http://ipython.scipy.org
52 | .. _`ipython manual`: http://ipython.scipy.org/doc/manual/html
53 | .. _matplotlib: http://matplotlib.sourceforge.net
54 | .. _python: http://www.python.org
55 | .. _networkx: http://networkx.lanl.gov/
56 | 
57 | .. General software
58 | .. _gcc: http://gcc.gnu.org
59 | .. _xcode: http://developer.apple.com/TOOLS/xcode
60 | .. _mingw: http://www.mingw.org
61 | .. _macports: http://www.macports.org/
62 | 
63 | 


--------------------------------------------------------------------------------
/doc/source/randomized/index.rst:
--------------------------------------------------------------------------------
 1 | =====================
 2 | Randomized algorithms
 3 | =====================
 4 | 
 5 | This module implements several methods for inference after a randomized
 6 | selection as described in this paper on `proximal change of variables <https://arxiv.org/abs/1609.05609>`_
 7 | 
 8 | .. toctree:: 
 9 |    :maxdepth: 2
10 | 
11 |    lasso.ipynb
12 | 


--------------------------------------------------------------------------------
/doc/source/sphinxext/math_dollar.py:
--------------------------------------------------------------------------------
 1 | # emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*-
 2 | # vi: set ft=python sts=4 ts=4 sw=4 et:
 3 | ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
 4 | #
 5 | #   See COPYING file distributed along with the NiBabel package for the
 6 | #   copyright and license terms.
 7 | #
 8 | ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
 9 | import re
10 | 
11 | def dollars_to_math(source):
12 |     r"""
13 |     Replace dollar signs with backticks.
14 | 
15 |     More precisely, do a regular expression search.  Replace a plain
16 |     dollar sign ($) by a backtick (`).  Replace an escaped dollar sign
17 |     (\$) by a dollar sign ($).  Don't change a dollar sign preceded or
18 |     followed by a backtick (`$ or $`), because of strings like
19 |     "``$HOME``".  Don't make any changes on lines starting with
20 |     spaces, because those are indented and hence part of a block of
21 |     code or examples.
22 | 
23 |     This also doesn't replaces dollar signs enclosed in curly braces,
24 |     to avoid nested math environments, such as ::
25 | 
26 |       $f(n) = 0 \text{ if $n$ is prime}$
27 | 
28 |     Thus the above line would get changed to
29 | 
30 |       `f(n) = 0 \text{ if $n$ is prime}`
31 |     """
32 |     s = "\n".join(source)
33 |     if s.find("$") == -1:
34 |         return
35 |     # This searches for "$blah$" inside a pair of curly braces --
36 |     # don't change these, since they're probably coming from a nested
37 |     # math environment.  So for each match, we replace it with a temporary
38 |     # string, and later on we substitute the original back.
39 |     global _data
40 |     _data = {}
41 |     def repl(matchobj):
42 |         global _data
43 |         s = matchobj.group(0)
44 |         t = "___XXX_REPL_%d___" % len(_data)
45 |         _data[t] = s
46 |         return t
47 |     s = re.sub(r"({[^{}$]*\$[^{}$]*\$[^{}]*})", repl, s)
48 |     # matches $...$
49 |     dollars = re.compile(r"(?<!\$)(?<!\\)\$([^\$]+?)\$")
50 |     # regular expression for \$
51 |     slashdollar = re.compile(r"\\\$")
52 |     s = dollars.sub(r":math:`\1`", s)
53 |     s = slashdollar.sub(r"$", s)
54 |     # change the original {...} things in:
55 |     for r in _data:
56 |         s = s.replace(r, _data[r])
57 |     # now save results in "source"
58 |     source[:] = [s]
59 |     
60 | 
61 | def process_dollars(app, docname, source):
62 |     dollars_to_math(source)
63 | 
64 | 
65 | def mathdollar_docstrings(app, what, name, obj, options, lines):
66 |     dollars_to_math(lines)
67 | 
68 |     
69 | def setup(app):
70 |     app.connect("source-read", process_dollars)
71 |     app.connect('autodoc-process-docstring', mathdollar_docstrings)
72 | 


--------------------------------------------------------------------------------
/figs/pictures.r:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Get package implementing Weinstein et al., from GitHub
 3 | library(devtools)
 4 | install_github("selectiveCI", "johnros", subdir='selectiveCI')
 5 | library(selectiveCI)
 6 | 
 7 | library(raster)
 8 | 
 9 | 
10 | x <- scale(matrix(c(1,1,1.1,.9,.8,1.2),2),center=FALSE)
11 | 
12 | 
13 | b1 <- (x[2,1]+x[2,2])/(x[1,1]+x[1,2])
14 | b2 <- (x[2,1]+x[2,3])/(x[1,1]+x[1,3])
15 | 
16 | ci.len.wrapper <- function(xy) {
17 |     good <- which((xy[,2] > b1*xy[,1]) & (xy[,2] < b2 * xy[,1]))
18 |     z <- rep(NA,nrow(xy))
19 |     z[good] <- ci.len(xy[good,1],xy[good,2])
20 |     z
21 | }
22 | 
23 | ci.len <- function(x,y) {
24 |     cutoff.x <- ifelse(x>y,(y-x)/(b1-1),(y-x)/(b2-1))
25 |     cutoff.y <- ifelse(x>y,b1*(y-x)/(b1-1),b2*(y-x)/(b2-1))
26 |     cutoff <- (cutoff.x + cutoff.y)/sqrt(2)
27 |     observed <- (x+y)/sqrt(2)
28 |     apply(cbind(observed,cutoff),1,function(x) {
29 |         ci <- try(ShortestCI(x[1],1,x[2],.05),silent=TRUE)
30 |         if(is.list(ci)) {
31 |             return(ci$upper - ci$lower)
32 |         } else {
33 |             return(NA)
34 |         }
35 |     })
36 | }
37 | 
38 | xy <- expand.grid(c(-1,seq(0,4,.02)),c(-1,seq(0,4,.02)))
39 | z <- ci.len.wrapper(xy)
40 | ## This is a hack because of a bug in the package
41 | z[xy[,1] > 0 & abs(xy[,1]-xy[,2])<.023] <- 2*1.96
42 | 
43 | 
44 | rast <- rasterFromXYZ(cbind(xy,z))
45 | 
46 | pdf("CILengthCorr.pdf")
47 | plot(rast,xlim=c(-2.5,4),ylim=c(-2.5,4),xlab=expression(y[1]),ylab=expression(y[2]),col=rev(heat.colors(20)),
48 |      main="CI Length for Univariate Model")
49 | abline(h=0,lty=3,col="gray")
50 | abline(v=0,lty=3,col="gray")
51 | arrows(x0=c(0,0,0),y0=c(0,0,0),x1=c(x[1,]),y1=c(x[2,]),length=.15)
52 | abline(0,b1,lty=2)
53 | abline(0,b2,lty=2)
54 | dev.off()
55 | 
56 | 
57 | 
58 | 
59 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | cython
 2 | numpy
 3 | scipy
 4 | pandas
 5 | mpmath
 6 | pyinter
 7 | sklearn
 8 | regreg
 9 | # keras
10 | # tensorflow
11 | traitlets
12 | 


--------------------------------------------------------------------------------
/sandbox/absurd.py:
--------------------------------------------------------------------------------
 1 | import kmeans
 2 | import numpy as np
 3 | 
 4 | kmeans = reload(kmeans)
 5 | 
 6 | n = 20
 7 | p = 5
 8 | n_sample = 50
 9 | p_array = []
10 | 
11 | t_distance = [0]
12 | #distance = 5
13 | 
14 | import matplotlib.pyplot as plt
15 | x = np.arange(0, 1, 1./n_sample);
16 | plt.plot(x, x, 'g')
17 | 
18 | for distance in t_distance:
19 |     i=0
20 |     while i < n_sample:
21 |         compteur_bug = 0
22 |         if True: #i%1 == 0:
23 |             print i, " / ", n_sample, distance
24 |         try:
25 |             #kmeans = reload(kmeans)
26 |             p_value = kmeans.f(n, p, distance)[0]
27 |             if p_value > 0 and p_value < 1:
28 |                 p_array.append(p_value)
29 |                 i+=1
30 |         except:
31 |             raise
32 |     
33 | 
34 |     
35 | 
36 |     p_array = sorted(p_array)
37 |     print p_array
38 |     
39 |     plt.plot(x, p_array, 'b')
40 | 
41 | 
42 | 
43 | plt.show()
44 | 


--------------------------------------------------------------------------------
/sandbox/bayesian/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/sandbox/bayesian/__init__.py


--------------------------------------------------------------------------------
/sandbox/bayesian/crime_data_attempt.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os, numpy as np, pandas, statsmodels.api as sm
 3 | import time
 4 | import matplotlib.pyplot as plt
 5 | import regreg.api as rr
 6 | from selection.reduced_optimization.initial_soln import selection
 7 | from selection.randomized.api import randomization
 8 | from selection.reduced_optimization.lasso_reduced import nonnegative_softmax_scaled, neg_log_cube_probability, selection_probability_lasso, \
 9 |     sel_prob_gradient_map_lasso, selective_inf_lasso
10 | 
11 | crime = pandas.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/communities/communities.data', header=None, na_values=['?'])
12 | crime = crime.iloc[:, 5:]
13 | crime.dropna(inplace=True)
14 | crime.head()
15 | 
16 | # define X and y
17 | X = crime.iloc[:, :-1]
18 | n, p = X.shape
19 | X -= X.mean(0)[None, :]
20 | X /= (X.std(0)[None, :] * np.sqrt(n))
21 | 
22 | Y = crime.iloc[:, -1]
23 | print("shape", X.shape, Y.shape)
24 | 
25 | ols_fit = sm.OLS(Y, X).fit()
26 | print("residual", np.linalg.norm(ols_fit.resid))
27 | sigma_3TC = np.linalg.norm(ols_fit.resid) / np.sqrt(n-p-1)
28 | OLS_3TC = ols_fit.params
29 | print("sigma", sigma_3TC)
30 | 


--------------------------------------------------------------------------------
/sandbox/bayesian/mixed_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class instance_mixed(object):
 4 | 
 5 |     def __init__(self, n, p, s, sigma=1., rho=0, random_signs=False, scale =True, center=True):
 6 |          (self.n, self.p, self.s,
 7 |          self.sigma,
 8 |          self.rho) = (n, p, s,
 9 |                      sigma,
10 |                      rho)
11 | 
12 |          self.X = (np.sqrt(1 - self.rho) * np.random.standard_normal((self.n, self.p)) +
13 |               np.sqrt(self.rho) * np.random.standard_normal(self.n)[:, None])
14 |          if center:
15 |              self.X -= self.X.mean(0)[None, :]
16 |          if scale:
17 |              self.X /= (self.X.std(0)[None, :] * np.sqrt(self.n))
18 | 
19 |          self.beta = np.zeros(p)
20 |          self.beta[:self.s] = np.linspace(0.5, 5.0, num=s)
21 |          if random_signs:
22 |              self.beta[:self.s] *= (2 * np.random.binomial(1, 0.5, size=(s,)) - 1.)
23 |          self.active = np.zeros(p, np.bool)
24 |          self.active[:self.s] = True
25 | 
26 |     def _noise(self):
27 |         return np.random.standard_normal(self.n)
28 | 
29 |     def generate_response(self):
30 | 
31 |         Y = (self.X.dot(self.beta) + self._noise()) * self.sigma
32 |         return self.X, Y, self.beta * self.sigma, np.nonzero(self.active)[0], self.sigma
33 | 


--------------------------------------------------------------------------------
/sandbox/bayesian/read_file.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os, numpy as np, pandas, statsmodels.api as sm
 3 | 
 4 | #path =r'/Users/snigdhapanigrahi/Results_freq_EQTL/sparsity_5/dim_1/dim_1'
 5 | #path =r'/Users/snigdhapanigrahi/Results_reduced_optimization/fixed_lasso/fixed_lasso'
 6 | 
 7 | path =r'/Users/snigdhapanigrahi/Results_reduced_optimization/experiment_dual_0'
 8 | #path =r'/Users/snigdhapanigrahi/Results_reduced_optimization/bayesian_dual'
 9 | allFiles = glob.glob(path + "/*.txt")
10 | 
11 | list_ = []
12 | for file_ in allFiles:
13 |     df = np.loadtxt(file_)
14 |     list_.append(df)
15 | 
16 | def summary_files(list_):
17 | 
18 |     coverage_ad = 0.
19 |     coverage_unad = 0.
20 |     length_ad = 0.
21 |     length_unad = 0.
22 |     loss_ad = 0.
23 |     loss_unad = 0.
24 | 
25 |     length = len(list_)
26 |     print("number of simulations", length)
27 | 
28 |     for i in range(length):
29 |         print("iteration", i)
30 |         lasso = list_[i].reshape((6, 1))
31 |         coverage_ad += lasso[0,0]
32 |         coverage_unad += lasso[1,0]
33 |         length_ad += lasso[2,0]
34 |         length_unad += lasso[3,0]
35 |         loss_ad += lasso[4,0]
36 |         loss_unad += lasso[5, 0]
37 | 
38 |     return coverage_ad / length, coverage_unad / length, length_ad / length, length_unad / length,\
39 |            loss_ad/length, loss_unad/length
40 | 
41 | print(summary_files(list_))
42 | 
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/sandbox/randomized_tests/test_reconstruction.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import numpy as np
 3 | 
 4 | import regreg.api as rr
 5 | 
 6 | from selection.tests.decorators import wait_for_return_value, register_report
 7 | import selection.tests.reports as reports
 8 | 
 9 | from selection.api import multiple_queries
10 | from selection.randomized.glm import split_glm_group_lasso, target as glm_target
11 | from selection.tests.instance import logistic_instance
12 | 
13 | @wait_for_return_value()
14 | def test_reconstruction(s=3,
15 |                         n=200,
16 |                         p=50, 
17 |                         signal=7,
18 |                         rho=0.1,
19 |                         split_frac=0.8,
20 |                         lam_frac=0.7,
21 |                         ndraw=100, 
22 |                         burnin=200, 
23 |                         bootstrap=True,
24 |                         solve_args={'min_its':50, 'tol':1.e-10},
25 |                         reference_known=False): 
26 | 
27 |     X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=rho, signal=signal)
28 | 
29 |     m = int(split_frac * n)
30 |     nonzero = np.where(beta)[0]
31 | 
32 |     loss = rr.glm.logistic(X, y)
33 |     epsilon = 1. / np.sqrt(n)
34 | 
35 |     lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 2000)))).max(0))
36 |     W = np.ones(p)*lam
37 |     W[0] = 0 # use at least some unpenalized
38 |     penalty = rr.group_lasso(np.arange(p),
39 |                              weights=dict(zip(np.arange(p), W)), lagrange=1.)
40 | 
41 |     M_est = split_glm_group_lasso(loss, epsilon, m, penalty)
42 |     mv = multiple_queries([M_est])
43 |     mv.solve()
44 | 
45 |     M_est.selection_variable['variables'] = M_est.selection_variable['variables']
46 |     nactive = np.sum(M_est.selection_variable['variables'])
47 | 
48 |     if nactive==0:
49 |         return None
50 | 
51 |     if set(nonzero).issubset(np.nonzero(M_est.selection_variable['variables'])[0]):
52 | 
53 |         active_set = np.nonzero(M_est.selection_variable['variables'])[0]
54 | 
55 |         target_sampler, target_observed = glm_target(loss, 
56 |                                                      M_est.selection_variable['variables'],
57 |                                                      mv)
58 | 
59 |         target_sample = target_sampler.sample(ndraw=ndraw,
60 |                                               burnin=burnin,
61 |                                               keep_opt=True)
62 |         
63 |         reconstruction = target_sampler.reconstruct(target_sample)
64 |         logdens = target_sampler.log_density(target_sample)
65 |         return logdens.shape
66 | 


--------------------------------------------------------------------------------
/sandbox/tensorflow_test.py:
--------------------------------------------------------------------------------
 1 | import tensorflow_fit
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | ntries, sigma, q = 21, 1, 0.3
 5 | Z = np.linspace(-8, 8, 1001)
 6 | 
 7 | def algorithm(Z, ntries=ntries, q=q):
 8 |     proportion = 0
 9 |     for _ in range(ntries):
10 |         proportion += ((Z + sigma * np.random.standard_normal() > 0) * 
11 |                        (Z + 1 + sigma * np.random.standard_normal() > 0) *
12 |                        (Z - 0.5 + sigma * np.random.standard_normal() > 0))
13 |     proportion /= ntries
14 |     return proportion > q
15 | 
16 | Z = np.linspace(-8, 8, 1001)
17 | 
18 | 
19 | # a function that is parameterized by hyperparameters
20 | def create_network(num_hidden,num_outputs):
21 |     def create(features):
22 |         N = features.shape[0]
23 |         X = features # np.reshape(features,(None,1))
24 |         hidA = tf.layers.Dense(activation=tf.nn.relu,units=num_hidden, name='hidA')
25 |         outlayer = tf.layers.Dense(activation=tf.nn.relu,units=num_outputs, name='hid')
26 |         #outlayer = tf.layers.Dense(activation=tf.nn.relu, name='hid')
27 |         output = outlayer(hidA(X))
28 |         return output
29 |     return create
30 | 
31 | def fit_algorithm(algorithm, B=500, ntries=ntries, q=q, Zval=Z, link='probit'):
32 |     
33 |     Z = np.random.standard_normal(B) * 2
34 |     Z = np.hstack([Z, 
35 |                    np.random.standard_normal(B), 
36 |                    np.random.standard_normal(B) * 3, 
37 |                    np.random.standard_normal(B) * 0.5])
38 |     print('ZS=',Z.shape)
39 | 
40 |     # is there no "active part" that updates the Z proposals somewhere?
41 |     Y = np.array([algorithm(z, ntries=ntries, q=q) for z in Z])
42 |     optimize = tensorflow_fit.create_optimizer() # a default optimizer
43 |     predictor_f = tensorflow_fit.fit(np.reshape(Z, (Z.shape[0], 1)),
44 |                                      np.reshape(Y, (Y.shape[0], 1)),
45 |                                      create_network(10, 1),
46 |                                      tensorflow_fit.create_l2_loss,
47 |                                      optimize) 
48 |     print('ZS2=',Zval.shape)
49 |     return predictor_f(np.reshape(Zval,(Zval.shape[0],1)))
50 | 
51 | def simulate(ntries=ntries, sigma=sigma, truth=0):
52 |                
53 |     while True:
54 |         Z = np.random.standard_normal() + truth
55 |         if algorithm(Z, ntries, q=q):
56 |             return Z
57 | 
58 | Z = np.linspace(-8, 8, 1001)
59 | W1 = fit_algorithm(algorithm, ntries=ntries, q=q, Zval=Z)
60 | print('done')
61 | plt.plot(Z, np.log(W1))
62 | selective_law1 = discrete_family(Z, W1 * scipy.stats.norm.pdf(Z))
63 | 
64 | 
65 | def pivot1(z, truth=0):
66 |     return 1 - selective_law1.cdf(truth, z)
67 | 
68 | 


--------------------------------------------------------------------------------
/sandbox/test_cover.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from selection.algorithms.tests.test_lasso import test_data_carving
 4 | 
 5 | P = []
 6 | covered = []
 7 | 
 8 | num_except = 0
 9 | for _ in range(500):
10 |     try:
11 |         results = test_data_carving(compute_intervals=True,
12 |                                     burnin=5000,
13 |                                     ndraw=10000)[0]
14 |         covered.extend(results[-4])
15 |         P.extend(results[0])
16 |         print np.mean(P), np.std(P), 'null'
17 |         print np.mean(covered), 'covered'
18 |         
19 |     except KeyboardInterrupt:
20 |         raise KeyboardInterrupt
21 |     except:
22 |         num_except += 1; print('num except: %d' % num_except); pass
23 |         pass
24 |         
25 | 
26 | 


--------------------------------------------------------------------------------
/sandbox/test_isotonic.py:
--------------------------------------------------------------------------------
 1 | from ..isotonic import isotonic
 2 | import numpy as np
 3 | 
 4 | def test_isotonic():
 5 |     y = np.random.standard_normal(50)
 6 |     I = isotonic(y)
 7 |     print I.first_jump
 8 |     print I.largest_jump
 9 |     print I.combine_jumps(2)
10 | 


--------------------------------------------------------------------------------
/selectinf/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/selectinf/algorithms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/algorithms/__init__.py


--------------------------------------------------------------------------------
/selectinf/algorithms/api.py:
--------------------------------------------------------------------------------
 1 | from .lasso import (lasso, 
 2 |                     data_carving as data_carving_lasso, 
 3 |                     additive_noise as additive_noise_lasso)
 4 | 
 5 | from .sqrt_lasso import (choose_lambda as choose_lambda_sqrt_lasso, 
 6 |                          solve_sqrt_lasso)
 7 | 
 8 | from .forward_step import (forward_step, 
 9 |                            info_crit_stop)
10 | 
11 | from .covtest import (covtest, 
12 |                       selected_covtest)
13 | 


--------------------------------------------------------------------------------
/selectinf/algorithms/pca.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Step 1 test based on largest singular vector. 
 3 | 
 4 | This is the test described in `Kac Rice`_ for $X=I$ and the penalty being the nuclear norm
 5 | 
 6 | .. math::
 7 | 
 8 |      {\cal P}(\beta) = \sim_{i=1}^{\text{min(n,p)}} \sigma_i(\beta)
 9 | 
10 | for $\beta \in \mathbb{R}^{n \times p}$.
11 | 
12 | .. _Kac Rice: http://arxiv.org/abs/1308.3020
13 | """
14 | 
15 | import numpy as np
16 | from ..distributions.pvalue import general_pvalue
17 | 
18 | def pvalue(X, sigma=1, nsim=5000):
19 |     n, p = X.shape
20 |     D = np.linalg.svd(X)[1] / sigma
21 |     m = n+p-2
22 |     H = np.zeros(m)
23 |     
24 |     nonzero = np.hstack([D[1:],-D[1:]])
25 |     H[:nonzero.shape[0]] = nonzero
26 |         
27 |     return max(0, min(general_pvalue(D[0], D[1], np.inf, H, nsim=nsim), 1))
28 | 


--------------------------------------------------------------------------------
/selectinf/algorithms/screening.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.sparse import eye as sparse_eye
 3 | 
 4 | from ..constraints.affine import constraints
 5 | 
 6 | def _basis_vector(j,n):
 7 |     """
 8 |     j-th elementary basis vector in R^n
 9 |     """
10 |     e = np.zeros(n)
11 |     e[j] = 1.
12 |     return e
13 | 
14 | class topK(object):
15 | 
16 |     alpha = 0.1
17 | 
18 |     def __init__(self, X, Y, K, sigma, covariance=None):
19 |         n, p = X.shape
20 |         self.Z = np.dot(X.T, Y)
21 |         self.X, self.Y = X, Y
22 |         self.sign = np.sign(self.Z)
23 |         self.covariance = covariance
24 |         self.K = K
25 |         order = np.argsort(np.fabs(self.Z))
26 |         self.selected = order[-K:]
27 |         self.selected_sign = self.sign[order[-K:]]
28 | 
29 |         partial = np.identity(p)[order[:-K]]
30 |         partial = np.vstack([partial, -partial])
31 | 
32 |         full_matrix = []
33 |         for k in range(1, K+1):
34 |             partial_cp = partial.copy()
35 |             partial_cp[:,order[-k]] = -self.sign[order[-k]]
36 |             full_matrix.append(np.dot(partial_cp, X.T))
37 |         linear_part = np.vstack(full_matrix)
38 |         self.constraints = constraints(linear_part, 
39 |                                        np.zeros(linear_part.shape[0]),
40 |                                        covariance=covariance)
41 |         self.constraints.covariance *= sigma**2
42 |         self.sigma = sigma
43 | 
44 |     @property
45 |     def intervals(self, doc="OLS intervals for active variables adjusted for selection."):
46 |         if not hasattr(self, "_intervals"):
47 |             p = self.Z.shape[0]
48 |             self._intervals = []
49 |             C = self.constraints
50 |             for j in self.selected:
51 |                 s = self.sign[j]
52 |                 eta = self.X[:,j] * s
53 |                 _interval = C.interval(eta,
54 |                                        self.Y,
55 |                                        self.alpha)
56 |                 self._intervals.append((j, (eta*self.Y).sum(), 
57 |                                         _interval))
58 |         return self._intervals
59 |         
60 | def test():
61 |     n, p, sigma = 40, 100, 1.4
62 |     X = np.random.standard_normal((n,p))
63 |     Y = np.random.standard_normal(n) * sigma
64 | 
65 |     top10 = topK(X, Y, 10, sigma)
66 |     return top10, top10.intervals
67 | 


--------------------------------------------------------------------------------
/selectinf/algorithms/stopping_rules.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Stopping rules used in sequential FDR control.
  3 | 
  4 | See `http://arxiv.org/abs/1309.5352`_
  5 | 
  6 | """
  7 | 
  8 | import numpy as np
  9 | 
 10 | def simple_stop(pvalues, alpha):
 11 |     """
 12 |     Compute the number of rejections using 
 13 |     simple stop, the first time a p-value is above
 14 |     alpha.
 15 | 
 16 |     Parameters
 17 |     ----------
 18 | 
 19 |     pvalues : np.float
 20 | 
 21 |     alpha : float
 22 | 
 23 |     Returns
 24 |     -------
 25 | 
 26 |     num_rejections : int
 27 | 
 28 |     """
 29 |     if not np.all(pvalues <= alpha):
 30 |         return np.min(np.nonzero(pvalues > alpha)[0])
 31 |     else:
 32 |         return pvalues.shape[0]
 33 | 
 34 | def strong_stop(pvalues, alpha):
 35 |     """
 36 | 
 37 |     Compute the number of rejections using 
 38 |     strong stop of `http://arxiv.org/abs/1309.5352`_
 39 | 
 40 |     >>> strong_stop(np.array([0.5,0.6,0.7,0.8,0.9]), 0.05)
 41 |     0
 42 |     >>> strong_stop(np.array([0.001, 0.002, 0.0015, 0.0013, 0.05, 0.6]), 0.05)
 43 |     3
 44 | 
 45 |     In R:
 46 | 
 47 |     > strongstop(c(0.001, 0.002, 0.0015, 0.0013, 0.05, 0.6), 0.05)
 48 |     [1] 3
 49 |     > strongstop(c(0.5,0.6,0.7,0.8,0.9), 0.05)
 50 |     [1] 0
 51 | 
 52 |     Parameters
 53 |     ----------
 54 | 
 55 |     pvalues : np.float
 56 | 
 57 |     alpha : float
 58 | 
 59 |     Returns
 60 |     -------
 61 | 
 62 |     num_rejections : int
 63 | 
 64 |     Based on R code:
 65 |     ----------------
 66 | 
 67 |     strongstop <- function(p.values,alpha) {
 68 |        d <- length(p.values)
 69 |        lhs <- exp(rev(cumsum(rev(log(p.values)/(1:d))))) # LHS from G'Sell et al.
 70 |        rhs <- alpha * (1:d) / d # RHS from G'Sell et al.
 71 |        return(max(c(0,which(lhs <= rhs))))
 72 |     }
 73 | 
 74 |     """
 75 |     n = pvalues.shape[0]
 76 |     LHS = np.exp(np.cumsum((np.log(pvalues) / np.linspace(1., n, n))[::-1])[::-1])
 77 |     RHS = alpha * np.linspace(1., n, n) / n
 78 |     if np.any(LHS <= RHS):
 79 |         return max(np.nonzero(LHS <= RHS)[0])+1
 80 |     return 0
 81 | 
 82 | 
 83 | def forward_stop(pvalues, alpha):
 84 |     """
 85 | 
 86 |     Compute the number of rejections using 
 87 |     forward stop of  `http://arxiv.org/abs/1309.5352`_
 88 | 
 89 |     >>> forward_stop(np.array([0.5,0.6,0.7,0.8,0.9]), 0.05)
 90 |     0
 91 |     >>> forward_stop(np.array([0.001, 0.002, 0.0015, 0.0013, 0.05, 0.6]), 0.05)
 92 |     5
 93 | 
 94 |     In R:
 95 | 
 96 |     > forwardstop(c(0.5,0.6,0.7,0.8,0.9), 0.05)
 97 |     [1] 0
 98 |     > forwardstop(c(0.001, 0.002, 0.0015, 0.0013, 0.05, 0.6), 0.05)
 99 |     [1] 5
100 |     > 
101 | 
102 |     Parameters
103 |     ----------
104 | 
105 |     pvalues : np.float
106 | 
107 |     alpha : float
108 | 
109 |     Returns
110 |     -------
111 | 
112 |     num_rejections : int
113 | 
114 |     Based on R code:
115 |     ----------------
116 | 
117 |     forwardstop <- function(p, alpha) {
118 |        m <- length(p)
119 |        sums <- -(1/(1:m))*cumsum(log(1-p))
120 |        return(max(c(0, which(sums < alpha))))
121 |     }
122 | 
123 |     """
124 | 
125 |     n = pvalues.shape[0]
126 |     sums = (-1. / np.linspace(1, n, n)) * np.cumsum(np.log(1 - pvalues))
127 |     if np.any(sums < alpha):
128 |         return max(np.nonzero(sums < alpha)[0])+1
129 |     return 0
130 | 
131 | 
132 | 


--------------------------------------------------------------------------------
/selectinf/algorithms/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/algorithms/tests/__init__.py


--------------------------------------------------------------------------------
/selectinf/algorithms/tests/test_IC.py:
--------------------------------------------------------------------------------
 1 | from copy import copy
 2 | 
 3 | import numpy as np
 4 | from ...tests.instance import gaussian_instance
 5 | from ...constraints.affine import sample_from_constraints
 6 | from ...distributions.discrete_family import discrete_family
 7 | 
 8 | from ..forward_step import info_crit_stop
 9 | 
10 | def test_data_carving_IC(n=600,
11 |                          p=100,
12 |                          s=10,
13 |                          sigma=5,
14 |                          rho=0.25,
15 |                          signal=(3.5,5.),
16 |                          split_frac=0.9,
17 |                          ndraw=25000,
18 |                          burnin=5000, 
19 |                          df=np.inf,
20 |                          coverage=0.90,
21 |                          compute_intervals=False):
22 | 
23 |     X, y, beta, active, sigma, _ = gaussian_instance(n=n, 
24 |                                                      p=p, 
25 |                                                      s=s, 
26 |                                                      sigma=sigma, 
27 |                                                      rho=rho, 
28 |                                                      signal=signal, 
29 |                                                      df=df,
30 |                                                      equicorrelated=False)
31 |     mu = np.dot(X, beta)
32 |     splitn = int(n*split_frac)
33 |     indices = np.arange(n)
34 |     np.random.shuffle(indices)
35 |     stage_one = indices[:splitn]
36 | 
37 |     FS = info_crit_stop(y, X, sigma, cost=np.log(n), subset=stage_one)
38 | 
39 |     con = FS.constraints()
40 | 
41 |     X_E = X[:,FS.active]
42 |     X_Ei = np.linalg.pinv(X_E)
43 |     beta_bar = X_Ei.dot(y)
44 |     mu_E = X_E.dot(beta_bar)
45 |     sigma_E = np.linalg.norm(y-mu_E) / np.sqrt(n - len(FS.active))
46 | 
47 |     con.mean[:] = mu_E
48 |     con.covariance = sigma_E**2 * np.identity(n)
49 | 
50 |     print(sigma_E, sigma)
51 |     Z = sample_from_constraints(con, 
52 |                                 y,
53 |                                 ndraw=ndraw,
54 |                                 burnin=burnin)
55 |     
56 |     pvalues = []
57 |     for idx, var in enumerate(FS.active):
58 |         active = copy(FS.active)
59 |         active.remove(var)
60 |         X_r = X[:,active] # restricted design
61 |         mu_r = X_r.dot(np.linalg.pinv(X_r).dot(y))
62 |         delta_mu = (mu_r - mu_E) / sigma_E**2
63 | 
64 |         W = np.exp(Z.dot(delta_mu))
65 |         fam = discrete_family(Z.dot(X_Ei[idx].T), W)
66 |         pval = fam.cdf(0, x=beta_bar[idx])
67 |         pval = 2 * min(pval, 1 - pval)
68 |         pvalues.append((pval, beta[var]))
69 | 
70 |     return pvalues
71 | 


--------------------------------------------------------------------------------
/selectinf/algorithms/tests/test_change_point.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from ..change_point import one_jump_instance, change_point
 3 | 
 4 | def test_change_point(delta=0.1, p=60, sigma=1, plot=False):
 5 | 
 6 |     y, signal = one_jump_instance(delta, p, sigma)
 7 |     CP = change_point(y)
 8 |     fit, relaxed_fit, summary, segments = CP.fit()
 9 |     if plot:
10 |         import matplotlib.pyplot as plt
11 |         plt.figure(figsize=(8,6))
12 |         plt.scatter(np.arange(y.shape[0]), y)
13 |         plt.plot(fit, 'r', label='Penalized', linewidth=3)
14 |         plt.plot(relaxed_fit, 'k', label='Relaxed', linewidth=3)
15 |         plt.plot(signal, 'g', label='Truth', linewidth=3)
16 |         plt.legend(loc='upper left')
17 |     return segments
18 | 


--------------------------------------------------------------------------------
/selectinf/algorithms/tests/test_data_carving.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from ...tests.instance import gaussian_instance
 3 | from ..lasso import data_carving, data_splitting
 4 | 
 5 | def sim():
 6 |     X, Y, _, active, sigma = gaussian_instance()
 7 |     print(sigma)
 8 |     G = data_carving.gaussian(X, Y, 1., split_frac=0.9, sigma=sigma)
 9 |     G.fit()
10 |     if set(active).issubset(G.active) and G.active.shape[0] > len(active):
11 |         return [G.hypothesis_test(G.active[len(active)], burnin=5000, ndraw=10000)]
12 |     return []
13 | 
14 | def sim2():
15 |     X, Y, _, active, sigma = gaussian_instance(n=150, s=3)
16 |     G = data_splitting.gaussian(X, Y, 5., split_frac=0.5, sigma=sigma)
17 |     G.fit(use_full=True)
18 |     if set(active).issubset(G.active) and G.active.shape[0] > len(active):
19 |         return [G.hypothesis_test(G.active[len(active)])]
20 |     return []
21 | 
22 | 


--------------------------------------------------------------------------------
/selectinf/algorithms/tests/test_screening.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from ..screening import topK
 3 | import nose.tools as nt
 4 | 
 5 | def test_class(threshold=1):
 6 |     
 7 |     Z = np.random.standard_normal(10)
 8 |     C = np.eye(10)
 9 |     M = topK(C, Z, 1, 1)
10 |     M.constraints
11 | 
12 |     M.intervals
13 |     return M
14 | 
15 | 


--------------------------------------------------------------------------------
/selectinf/algorithms/tests/test_softmax.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import numpy.testing.decorators as dec
 3 | 
 4 | from itertools import product
 5 | from ..softmax import softmax_objective
 6 | 
 7 | @dec.skipif(True, "need some tests for softmax objective")
 8 | def test_softmax():
 9 |     raise ValueError('need some tests for softmax objective')
10 | 


--------------------------------------------------------------------------------
/selectinf/algorithms/tests/test_sqrt_lasso.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | 
 3 | import numpy as np
 4 | import numpy.testing.decorators as dec
 5 | import nose.tools as nt
 6 | 
 7 | import regreg.api as rr
 8 | 
 9 | from ...tests.instance import gaussian_instance as instance
10 | from ...tests.decorators import (set_sampling_params_iftrue, 
11 |                                  set_seed_iftrue, 
12 |                                  wait_for_return_value)
13 | 
14 | from ...tests.flags import SET_SEED, SMALL_SAMPLES
15 | from ..sqrt_lasso import (solve_sqrt_lasso, 
16 |                                       choose_lambda,
17 |                                       goodness_of_fit,
18 |                                       sqlasso_objective,
19 |                                       sqlasso_objective_skinny,
20 |                                       solve_sqrt_lasso_fat,
21 |                                       solve_sqrt_lasso_skinny)
22 | from ..lasso import lasso
23 | 
24 | @wait_for_return_value()
25 | @set_sampling_params_iftrue(SMALL_SAMPLES, nsim=10, burnin=10, ndraw=10)
26 | @dec.slow
27 | def test_goodness_of_fit(n=20, p=25, s=10, sigma=20.,
28 |                          nsim=10, burnin=2000, ndraw=8000):
29 |     P = []
30 |     while True:
31 |         y = np.random.standard_normal(n) * sigma
32 |         beta = np.zeros(p)
33 |         X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None]
34 |         X /= (X.std(0)[None,:] * np.sqrt(n))
35 |         y += np.dot(X, beta) * sigma
36 |         lam_theor = .7 * choose_lambda(X, quantile=0.9)
37 |         L = lasso.sqrt_lasso(X, y, lam_theor)
38 |         L.fit()
39 |         pval = goodness_of_fit(L, 
40 |                                lambda x: np.max(np.fabs(x)),
41 |                                burnin=burnin,
42 |                                ndraw=ndraw)
43 |         P.append(pval)
44 |         Pa = np.array(P)
45 |         Pa = Pa[~np.isnan(Pa)]
46 |         if (~np.isnan(np.array(Pa))).sum() >= nsim:
47 |             break
48 | 
49 |     return Pa, np.zeros_like(Pa, np.bool)
50 |     
51 | @set_seed_iftrue(SET_SEED)
52 | def test_skinny_fat():
53 | 
54 |     X, Y = instance()[:2]
55 |     n, p = X.shape
56 |     lam = choose_lambda(X)
57 |     obj1 = sqlasso_objective(X, Y)
58 |     obj2 = sqlasso_objective_skinny(X, Y)
59 |     soln1 = solve_sqrt_lasso_fat(X, Y, weights=np.ones(p) * lam, solve_args={'min_its':500})[0]
60 |     soln2 = solve_sqrt_lasso_skinny(X, Y, weights=np.ones(p) * lam, solve_args={'min_its':500})[0]
61 | 
62 |     np.testing.assert_allclose(soln1, soln2, rtol=1.e-3)
63 | 
64 |     X, Y = instance(p=50)[:2]
65 |     n, p = X.shape
66 |     lam = choose_lambda(X)
67 |     obj1 = sqlasso_objective(X, Y)
68 |     obj2 = sqlasso_objective_skinny(X, Y)
69 |     soln1 = solve_sqrt_lasso_fat(X, Y, weights=np.ones(p) * lam, solve_args={'min_its':500})[0]
70 |     soln2 = solve_sqrt_lasso_skinny(X, Y, weights=np.ones(p) * lam, solve_args={'min_its':500})[0]
71 | 
72 |     np.testing.assert_allclose(soln1, soln2, rtol=1.e-3)
73 | 
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/selectinf/api.py:
--------------------------------------------------------------------------------
1 | from .constraints.api import *
2 | from .algorithms.api import *
3 | from .distributions.api import *
4 | from .randomized.api import *
5 | from .truncated.api import *
6 | from .sampling.api import *
7 | 


--------------------------------------------------------------------------------
/selectinf/base.py:
--------------------------------------------------------------------------------
 1 | import regreg.api as rr
 2 | import regreg.affine as ra
 3 | 
 4 | def restricted_estimator(loss, active, solve_args={'min_its':50, 'tol':1.e-10}):
 5 |     """
 6 |     Fit a restricted model using only columns `active`.
 7 | 
 8 |     Parameters
 9 |     ----------
10 | 
11 |     Mest_loss : objective function
12 |         A GLM loss.
13 | 
14 |     active : ndarray
15 |         Which columns to use.
16 | 
17 |     solve_args : dict
18 |         Passed to `solve`.
19 | 
20 |     Returns
21 |     -------
22 | 
23 |     soln : ndarray
24 |         Solution to restricted problem.
25 | 
26 |     """
27 |     X, Y = loss.data
28 | 
29 |     if not loss._is_transform and hasattr(loss, 'saturated_loss'): # M_est is a glm
30 |         X_restricted = X[:,active]
31 |         loss_restricted = rr.affine_smooth(loss.saturated_loss, X_restricted)
32 |     else:
33 |         I_restricted = ra.selector(active, ra.astransform(X).input_shape[0], ra.identity((active.sum(),)))
34 |         loss_restricted = rr.affine_smooth(loss, I_restricted.T)
35 |     beta_E = loss_restricted.solve(**solve_args)
36 |     
37 |     return beta_E
38 | 


--------------------------------------------------------------------------------
/selectinf/constraints/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/selectinf/constraints/api.py:
--------------------------------------------------------------------------------
1 | from .affine import constraints as affine_constraints
2 | 


--------------------------------------------------------------------------------
/selectinf/constraints/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/constraints/tests/__init__.py


--------------------------------------------------------------------------------
/selectinf/constraints/tests/test_quasi.py:
--------------------------------------------------------------------------------
 1 | """
 2 | test_quasi.py
 3 | Date: 2014-10-17
 4 | Author: Xiaoying Tian
 5 | """
 6 | 
 7 | from __future__ import division, print_function
 8 | import nose.tools as nt
 9 | import numpy as np
10 | 
11 | from ..quasi_affine import (quadratic_inequality_solver, 
12 |                             intersection, 
13 |                             sqrt_inequality_solver)
14 | from ...tests.flags import SET_SEED
15 | from ...tests.decorators import set_seed_iftrue
16 | 
17 | def test_quadratic_solver():
18 |     yield np.testing.assert_almost_equal, quadratic_inequality_solver(7,0.,-28),[[-2.0,2.0]]
19 |     yield (np.testing.assert_almost_equal, quadratic_inequality_solver(1,-1,-5.),
20 |           [[-1.7912878474779199, 2.7912878474779199]])
21 |     yield (np.testing.assert_almost_equal, quadratic_inequality_solver(1,-1,5.), [[]])
22 |     yield (np.testing.assert_almost_equal, quadratic_inequality_solver(-1,-1,-5.), 
23 |            [[float("-inf"), float("inf")]])
24 |     yield (np.testing.assert_almost_equal, 
25 |            quadratic_inequality_solver(-1,6,-5.), [[float("-inf"), 1.0], [5.0, float("inf")]])
26 |     yield (np.testing.assert_almost_equal, quadratic_inequality_solver(0.,6,-5.),
27 |            [[float("-inf"), 0.8333333333333334]])
28 |     yield (np.testing.assert_almost_equal, 
29 |            quadratic_inequality_solver(0.,6,5.),[[float("-inf"), -0.8333333333333334]])
30 |     yield nt.assert_raises, ValueError, quadratic_inequality_solver, 0., 0., 5.
31 |     yield (np.testing.assert_almost_equal, 
32 |            quadratic_inequality_solver(1,3,2,"greater than"), [[float("-inf"), -2.], [-1., float("inf")]])
33 | 
34 | def test_intersection():
35 |     yield np.testing.assert_almost_equal, intersection([], []), []
36 |     yield np.testing.assert_almost_equal, intersection([], [1,2]), []
37 |     yield np.testing.assert_almost_equal, intersection([2,3], []), []
38 |     yield np.testing.assert_almost_equal, intersection([2,3], [1,2]), []
39 |     yield np.testing.assert_almost_equal, intersection([3,4], [1,2]), []
40 |     yield np.testing.assert_almost_equal, intersection([-1,4], [1,2]), [1,2]
41 |     yield np.testing.assert_almost_equal, intersection([1,4], [-1,2]), [1,2]
42 |     yield np.testing.assert_almost_equal, intersection([1,4], [-1,12]), [1,4]
43 | 
44 | @set_seed_iftrue(SET_SEED)
45 | def test_sqrt_solver():
46 |     a, b, c = np.random.random_integers(-50, 50, 3)
47 |     n = 100
48 |     intervals = sqrt_inequality_solver(a, b, c, n)
49 |     print(a, b, c, intervals)
50 |     for x in np.linspace(-20, 20):
51 |         hold = (func(x, a, b, c, n) <= 0)
52 |         in_interval = any([contains(x, I) for I in intervals])
53 |         yield (np.testing.assert_almost_equal, np.array(hold, np.float), 
54 |                np.array(in_interval, np.float))
55 | 
56 | 
57 | def contains(x, I):
58 |     if I:
59 |         return (x >= I[0] and x <= I[1])
60 |     else:
61 |         return False
62 | 
63 | 
64 | def func(x, a, b, c, n):
65 |     return a*x + b * np.sqrt(n + x**2) - c 
66 | 
67 | 


--------------------------------------------------------------------------------
/selectinf/constraints/tests/test_unknown_sigma.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import numpy as np
  3 | from .. import affine
  4 | from ..quasi_affine import constraints_unknown_sigma
  5 | 
  6 | def simulate(A=None, theta=0, R=None, eta=None):
  7 | 
  8 |     n = 22
  9 |     p = 4
 10 |     k = 18
 11 |     if R is None:
 12 |         R = np.linalg.svd(np.random.standard_normal((n,n-k)), full_matrices=0)[0]
 13 |         R = np.dot(R, R.T)
 14 |         R = 0.1 * R + np.diag([0]*p + [1.] * (n-p))
 15 |         R = np.linalg.svd(R, full_matrices=0)[0]
 16 |         R = R[:,:(n-p)]
 17 |         R = np.dot(R, R.T)
 18 |     if A is None:
 19 |         A = np.diag([1.]*p) + 0.05 * np.random.standard_normal((p,p))
 20 |         sel = np.identity(n)[:p]
 21 |         A = np.dot(A, sel)
 22 |     b = -np.ones(p)
 23 |     n = R.shape[0]
 24 |     df = np.diag(R).sum()
 25 | 
 26 |     if eta is None:
 27 |         eta = np.random.standard_normal(n) * 3
 28 |         eta = eta - np.dot(R, eta)
 29 | 
 30 |     counter = 0
 31 |     while True:
 32 |         counter += 1
 33 |         Z = np.random.standard_normal(n) * 1.5 + eta * theta / np.linalg.norm(eta)**2
 34 |         sigma_hat = np.linalg.norm(np.dot(R, Z)) / np.sqrt(df)
 35 |         if np.all(np.dot(A, Z) <= b * sigma_hat):
 36 |             return A, b, R, Z, eta, counter
 37 |         if counter >= 1000:
 38 |             break
 39 |     return None
 40 | 
 41 | 
 42 | def instance(theta=0, A=None, R=None, eta=None):
 43 | 
 44 |     result = None
 45 |     while not result:
 46 |         result = simulate(theta=theta, A=A, R=R, eta=eta)
 47 | 
 48 |     A, b, R, Z, eta, counter = result
 49 |     from ..truncated_T import truncated_T
 50 |     
 51 |     intervals, obs = constraints_unknown_sigma(A, b, Z, eta, R,
 52 |                                                value_under_null=theta)
 53 |     df = np.diag(R).sum()
 54 |     truncT = truncated_T(np.array([(interval.lower_value,
 55 |                                     interval.upper_value) for interval in intervals]), df)
 56 |     sf = truncT.sf(obs)
 57 |     pval = 2 * min(sf, 1.-sf)
 58 |     if pval < 1.e-6:
 59 |         print(sf, obs, intervals)
 60 |     return float(pval)
 61 | 
 62 | if __name__ == "__main__":
 63 |     
 64 |     P = []
 65 | 
 66 |     n = 22
 67 |     p = 4
 68 |     k = 18
 69 | 
 70 |     A = np.diag([1.]*p) + 0.05 * np.random.standard_normal((p,p))
 71 |     sel = np.identity(n)[:p]
 72 |     A = np.dot(A, sel)
 73 | 
 74 |     R = np.linalg.svd(np.random.standard_normal((n,n-k)), full_matrices=0)[0]
 75 |     R = np.dot(R, R.T)
 76 |     R = 0.1 * R + np.diag([0]*p + [1.] * (n-p))
 77 |     R = np.linalg.svd(R, full_matrices=0)[0]
 78 |     R = R[:,:(n-p)]
 79 |     R = np.dot(R, R.T)
 80 | 
 81 |     eta = np.random.standard_normal(n) * 3
 82 |     eta = eta - np.dot(R, eta)
 83 | 
 84 |     for i in range(1000):
 85 |         P.append(instance(theta=3.,R=R, A=A, eta=eta))
 86 |         print(i, np.mean(P), np.std(P))
 87 |     U = np.linspace(0,1,51)
 88 | 
 89 |     # make any plots not use display
 90 | 
 91 |     from matplotlib import use
 92 |     use('Agg')
 93 |     import matplotlib.pyplot as plt
 94 | 
 95 |     # used for ECDF
 96 | 
 97 |     import statsmodels.api as sm
 98 |     plt.plot(U, sm.distributions.ECDF(P)(U))
 99 |     plt.plot([0,1],[0,1])
100 |     plt.show()
101 | 


--------------------------------------------------------------------------------
/selectinf/distributions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/distributions/__init__.py


--------------------------------------------------------------------------------
/selectinf/distributions/api.py:
--------------------------------------------------------------------------------
1 | from .discrete_family import discrete_family
2 | from .intervals import intervals_from_sample
3 | 


--------------------------------------------------------------------------------
/selectinf/distributions/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/distributions/tests/__init__.py


--------------------------------------------------------------------------------
/selectinf/distributions/tests/test_chains.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from ..chain import parallel_test, serial_test
 4 | from ...constraints.affine import constraints, gaussian_hit_and_run
 5 | 
 6 | def test_gaussian_chain():
 7 | 
 8 |     n = 30
 9 | 
10 |     A = np.eye(n)[:3]
11 |     b = np.ones(A.shape[0])
12 | 
13 |     con = constraints(A, b)
14 |     state = np.random.standard_normal(n)
15 |     state[:3] = 0
16 | 
17 |     gaussian_chain = gaussian_hit_and_run(con, state, nstep=100)
18 | 
19 |     counter = 0
20 |     for step in gaussian_chain:
21 |         counter += 1
22 |         
23 |         if counter >= 100:
24 |             break
25 | 
26 |     test_statistic = lambda z: np.sum(z)
27 | 
28 |     parallel = parallel_test(gaussian_chain, 
29 |                              gaussian_chain.state,
30 |                              test_statistic,
31 |                              ndraw=20)
32 | 
33 |     serial = serial_test(gaussian_chain, 
34 |                          gaussian_chain.state,
35 |                          test_statistic,
36 |                          ndraw=20)
37 | 
38 |     return parallel, serial
39 | 


--------------------------------------------------------------------------------
/selectinf/distributions/tests/test_discreteExFam.py:
--------------------------------------------------------------------------------
 1 | # Testing
 2 | from __future__ import print_function
 3 | import numpy as np
 4 | import nose.tools as nt
 5 | from scipy.stats import poisson
 6 | from ..discrete_family import discrete_family
 7 | 
 8 | def test_MLE():
 9 | 
10 |     X = np.arange(100)
11 |     observed = 4
12 |     pois = discrete_family(X, poisson.pmf(X, 4.5))
13 | 
14 |     MLE, var = pois.MLE(observed, tol=1.e-7, max_iter=30)[:2]
15 |     mean_param = pois.E(MLE, lambda x: x)
16 |     nt.assert_true(np.fabs(mean_param - observed) / observed < 1.e-4)
17 |     nt.assert_true(np.fabs(mean_param - var*mean_param**2) < 1.e-3)
18 | 
19 | def test_discreteExFam():
20 | 
21 |     X = np.arange(100)
22 |     pois = discrete_family(X, poisson.pmf(X, 1))
23 |     tol = 1e-5
24 | 
25 |     print(pois._leftCutFromRight(theta=0.4618311,rightCut=(5,.5)), pois._test2RejectsLeft(theta=2.39,observed=5,auxVar=.5))
26 |     print (pois.interval(observed=5,alpha=.05,randomize=True,auxVar=.5))
27 | 
28 |     print (abs(1-sum(pois.pdf(0))))
29 |     pois.ccdf(0, 3, .4)
30 | 
31 |     print(pois.MLE(1.3))
32 | 
33 |     print (pois.Var(np.log(2), lambda x: x))
34 |     print (pois.Cov(np.log(2), lambda x: x, lambda x: x))
35 | 
36 |     lc = pois._rightCutFromLeft(0, (0,.01))
37 |     print ((0,0.01), pois._leftCutFromRight(0, lc))
38 | 
39 |     pois._rightCutFromLeft(-10, (0,.01))
40 |     #[pois.test2Cutoffs(t)[1] for t in range(-10,3)]
41 |     pois._critCovFromLeft(-10, (0,.01))
42 | 
43 |     pois._critCovFromLeft(0, (0,.01))
44 |     pois._critCovFromRight(0, lc)
45 | 
46 |     pois._critCovFromLeft(5, (5, 1))
47 | 
48 |     pois._test2RejectsLeft(np.log(5),5)
49 |     pois._test2RejectsRight(np.log(5),5)
50 | 
51 |     pois._test2RejectsLeft(np.log(20),5)
52 |     pois._test2RejectsRight(np.log(.1),5)
53 | 
54 |     print (pois._inter2Upper(5,auxVar=.5))
55 |     print (pois.interval(5,auxVar=.5))
56 | 
57 | 


--------------------------------------------------------------------------------
/selectinf/distributions/tests/test_multiparameter.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from ..discrete_multiparameter import multiparameter_family
 3 | 
 4 | def test_multiparameter():
 5 | 
 6 |     X = [[3,4],[4,5],[5,8.]]
 7 |     w = [0.3, 0.5, 0.4]
 8 |     theta = [0.1,0.3]
 9 | 
10 |     family = multiparameter_family(X, w)
11 |     mu1 = family.mean(theta)
12 | 
13 |     X_arr = np.array(X)
14 |     exponent = np.dot(X_arr, theta)
15 | 
16 |     w_arr = np.array(w) * np.exp(exponent)
17 |     w_arr /= w_arr.sum()
18 | 
19 |     mu2 = (X_arr * w_arr[:,None]).sum(0)
20 | 
21 |     np.testing.assert_allclose(mu1, mu2)
22 | 
23 |     info1 = family.information(theta)[1]
24 | 
25 |     T = np.zeros((3,2,2))
26 |     for i in range(2):
27 |         for j in range(2):
28 |             T[:,i,j] = X_arr[:,i] * X_arr[:,j]
29 |         
30 |     second_moment = (T * w_arr[:,None,None]).sum(0)
31 |     info2 = second_moment - np.outer(mu1, mu1)
32 |     
33 |     np.testing.assert_allclose(info1, info2)
34 | 
35 |     mu3 = np.array([family.E(theta, lambda x: x[:,i]) for i in range(2)])
36 |     np.testing.assert_allclose(mu1, mu3)
37 | 
38 |     cov01 = np.array(family.Cov(theta, lambda x: x[:,0], lambda x: x[:,1]))
39 |     np.testing.assert_allclose(cov01, info1[0,1])
40 | 
41 |     var0 = np.array(family.Var(theta, lambda x: x[:,0]))
42 |     np.testing.assert_allclose(var0, info1[0,0])
43 | 
44 |     observed = np.array([4.2,6.3])
45 |     theta_hat = family.MLE(observed, tol=1.e-12, max_iters=50)
46 | 
47 |     np.testing.assert_allclose(observed, family.mean(theta_hat))
48 | 


--------------------------------------------------------------------------------
/selectinf/info.py:
--------------------------------------------------------------------------------
 1 | """ This file contains defines parameters for regreg that we use to fill
 2 | settings in setup.py, the regreg top-level docstring, and for building the docs.
 3 | In setup.py in particular, we exec this file, so it cannot import regreg
 4 | """
 5 | 
 6 | # regreg version information.  An empty _version_extra corresponds to a
 7 | # full release.  '.dev' as a _version_extra string means this is a development
 8 | # version
 9 | _version_major = 0
10 | _version_minor = 1
11 | _version_micro = 0
12 | _version_extra = ''
13 | 
14 | # Format expected by setup.py and doc/source/conf.py: string of form "X.Y.Z"
15 | __version__ = "%s.%s.%s%s" % (_version_major,
16 |                               _version_minor,
17 |                               _version_micro,
18 |                               _version_extra)
19 | 
20 | CLASSIFIERS = ["Development Status :: 3 - Alpha",
21 |                "Environment :: Console",
22 |                "Intended Audience :: Science/Research",
23 |                "License :: OSI Approved :: BSD License",
24 |                "Operating System :: OS Independent",
25 |                "Programming Language :: Python",
26 |                "Topic :: Scientific/Engineering"]
27 | 
28 | description  = 'Testing a fixed value of lambda'
29 | 
30 | # Note: this long_description is actually a copy/paste from the top-level
31 | # README.txt, so that it shows up nicely on PyPI.  So please remember to edit
32 | # it only in one place and sync it correctly.
33 | long_description = \
34 | """
35 | ============
36 | Fixed lambda
37 | ============
38 | 
39 | This mini-package contains a module to perform
40 | a fixed lambda test for the LASSO.
41 | """
42 | 
43 | # versions
44 | NUMPY_MIN_VERSION='1.7.1'
45 | SCIPY_MIN_VERSION = '0.9'
46 | CYTHON_MIN_VERSION = '0.21'
47 | MPMATH_MIN_VERSION = "0.18"
48 | PYINTER_MIN_VERSION = "0.1.6"
49 | SKLEARN_MIN_VERSION = '0.19'
50 | 
51 | NAME                = 'selectinf'
52 | MAINTAINER          = "Jonathan Taylor"
53 | MAINTAINER_EMAIL    = ""
54 | DESCRIPTION         = description
55 | LONG_DESCRIPTION    = long_description
56 | URL                 = "http://github.org/jonathan.taylor/selective-inference"
57 | DOWNLOAD_URL        = ""
58 | LICENSE             = "BSD license"
59 | CLASSIFIERS         = CLASSIFIERS
60 | AUTHOR              = "fixed_lambda developers"
61 | AUTHOR_EMAIL        = ""
62 | PLATFORMS           = "OS Independent"
63 | MAJOR               = _version_major
64 | MINOR               = _version_minor
65 | MICRO               = _version_micro
66 | ISRELEASE           = _version_extra == ''
67 | VERSION             = __version__
68 | STATUS              = 'alpha'
69 | PROVIDES            = ["fixed_lambda"]
70 | REQUIRES            = ["numpy (>=%s)" % NUMPY_MIN_VERSION,
71 |                        "scipy (>=%s)" % SCIPY_MIN_VERSION,
72 |                        "mpmath (>=%s)" % MPMATH_MIN_VERSION,
73 |                        "pyinter"]
74 | 


--------------------------------------------------------------------------------
/selectinf/learning/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/learning/__init__.py


--------------------------------------------------------------------------------
/selectinf/learning/fitters.py:
--------------------------------------------------------------------------------
 1 | import uuid, functools
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm as ndist
 5 | from sklearn import ensemble
 6 | 
 7 | def gbm_fit_sk(T, Y, **params):
 8 | 
 9 |     fitfns = []
10 |     for j in range(Y.shape[1]):
11 |         print('variable %d' % (j+1,))
12 |         y = Y[:,j].astype(np.int)
13 |         clf = ensemble.GradientBoostingClassifier(**params)
14 |         clf.fit(T, y)
15 | 
16 |         def fit_fn(clf, t):
17 |             return clf.predict_proba(t)[:,1]
18 | 
19 |         fitfns.append(functools.partial(fit_fn, clf))
20 | 
21 |     return fitfns
22 | 
23 | def random_forest_fit_sk(T, Y, **params):
24 | 
25 |     fitfns = []
26 |     for j in range(Y.shape[1]):
27 |         print('variable %d' % (j+1,))
28 |         y = Y[:,j].astype(np.int)
29 |         clf = ensemble.RandomForestClassifier(**params)
30 |         clf.fit(T, y)
31 | 
32 |         def fit_fn(clf, t):
33 |             return clf.predict_proba(t)[:,1]
34 | 
35 |         fitfns.append(functools.partial(fit_fn, clf))
36 | 
37 |     return fitfns
38 | 
39 | 


--------------------------------------------------------------------------------
/selectinf/learning/keras_fit.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Based on https://stackoverflow.com/questions/44164749/how-does-keras-handle-multilabel-classification
 3 | '''
 4 | 
 5 | import warnings
 6 | 
 7 | try:
 8 | 
 9 |     from keras.models import Sequential
10 |     from keras.layers import Dense, Dropout
11 |     from keras.optimizers import SGD
12 | 
13 |     def keras_fit(T, Y, **kwargs):
14 | 
15 |         if Y.ndim == 1:
16 |             Y.shape = (-1, 1)
17 | 
18 |         fitfns = []
19 | 
20 |         for j in range(Y.shape[1]):
21 |             y = Y[:,j]
22 | 
23 |             fit_fn = keras_fit_multilabel(T, y, **kwargs)[0]
24 |             fitfns.append(fit_fn)
25 |         return fitfns
26 | 
27 |     def keras_fit_multilabel(T, Y, sizes=[500, 500], epochs=50, activation='relu', dropout=0, **ignored):
28 | 
29 |         if Y.ndim == 1:
30 |             Y.shape = (-1, 1)
31 | 
32 |         model = Sequential()
33 |         for s in sizes:
34 |             model.add(Dense(s, activation=activation, input_dim=T.shape[1]))
35 |             if dropout > 0:
36 |                 model.add(Dropout(dropout))
37 | 
38 |         # the final layer
39 |         model.add(Dense(Y.shape[1], activation='sigmoid'))
40 | 
41 |         sgd = SGD(lr=0.03, decay=1e-3, momentum=0.6, nesterov=True)
42 |         model.compile(loss='binary_crossentropy',
43 |                       optimizer=sgd)
44 | 
45 |         model.fit(T, Y, epochs=epochs)
46 |         fitfns = [lambda T_test: model.predict(T_test)[:,j] for j in range(Y.shape[1])]
47 |         return fitfns
48 | 
49 | except ImportError:
50 |     warnings.warn('module `keras` not importable, `keras_fit` and `keras_fit_multilabel` will not be importable')
51 | 


--------------------------------------------------------------------------------
/selectinf/randomized/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/randomized/__init__.py


--------------------------------------------------------------------------------
/selectinf/randomized/api.py:
--------------------------------------------------------------------------------
1 | from .query import multiple_queries, query
2 | from .randomization import randomization
3 | from .lasso import lasso, split_lasso
4 | from .screening import marginal_screening, stepup, topK
5 | from .slope import slope
6 | from .group_lasso import group_lasso
7 | 


--------------------------------------------------------------------------------
/selectinf/randomized/tests/__init__.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | from ...tests.decorators import wait_for_return_value, set_sampling_params_iftrue
4 | from ...tests.instance import logistic_instance, gaussian_instance
5 | 


--------------------------------------------------------------------------------
/selectinf/randomized/tests/sandbox/test_cv_glmnet.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import regreg.api as rr
 3 | 
 4 | from ...algorithms.cv_glmnet import CV_glmnet
 5 | from ...tests.instance import gaussian_instance
 6 | 
 7 | def test_cv_glmnet():
 8 |     np.random.seed(2)
 9 |     n, p = 3000, 1000
10 |     X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=30, rho=0., sigma=1)
11 |     loss = rr.glm.gaussian(X,y)
12 |     CV_glmnet_gaussian = CV_glmnet(loss, 'gaussian')
13 |     lam_CV, lam_1SD, lam_seq, CV_err, SD = CV_glmnet_gaussian.using_glmnet()
14 |     print("CV error curve (nonrandomized):", CV_err)
15 |     lam_grid_size = CV_glmnet_gaussian.lam_seq.shape[0]
16 |     lam_CVR, SD, CVR, CV1, lam_seq = CV_glmnet_gaussian.choose_lambda_CVR(scale1=0.1, scale2=0.1)
17 |     print("nonrandomized index:", list(lam_seq).index(lam_CV)) # index of the minimizer
18 |     print("lam for nonrandomized CV plus sigma rule:",lam_CV,lam_1SD)
19 |     print("lam_CVR:",lam_CVR)
20 |     print("randomized index:", list(lam_seq).index(lam_CVR))
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/selectinf/randomized/tests/sandbox/test_fixedX.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.stats import norm as ndist
 3 | 
 4 | import regreg.api as rr
 5 | 
 6 | from ...tests.flags import SMALL_SAMPLES, SET_SEED
 7 | from ...tests.instance import gaussian_instance
 8 | from ...tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue
 9 | 
10 | from ..api import randomization 
11 | from ..glm import (resid_bootstrap, 
12 |                    glm_nonparametric_bootstrap,
13 |                    fixedX_group_lasso)
14 | 
15 | 
16 | @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
17 | @set_seed_iftrue(SET_SEED)
18 | @wait_for_return_value()
19 | def test_fixedX(ndraw=10000, burnin=2000): # nsim needed for decorator
20 |     s, n, p = 5, 200, 20 
21 | 
22 |     randomizer = randomization.laplace((p,), scale=1.)
23 |     X, Y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.1, signal=7)
24 | 
25 |     lam_frac = 1.
26 |     lam = lam_frac * np.mean(np.fabs(X.T.dot(np.random.standard_normal((n, 50000)))).max(0)) * sigma
27 |     W = np.ones(p) * lam
28 |     epsilon = 1. / np.sqrt(n)
29 | 
30 |     penalty = rr.group_lasso(np.arange(p),
31 |                              weights=dict(zip(np.arange(p), W)), lagrange=1.)
32 | 
33 |     M_est = fixedX_group_lasso(X, Y, epsilon, penalty, randomizer)
34 |     M_est.solve()
35 | 
36 |     active_set = M_est.selection_variable['variables']
37 |     nactive = active_set.sum()
38 | 
39 |     if set(nonzero).issubset(np.nonzero(active_set)[0]) and active_set.sum() > len(nonzero):
40 | 
41 |         selected_features = np.zeros(p, np.bool)
42 |         selected_features[active_set] = True
43 | 
44 |         Xactive = X[:,active_set]
45 |         unpenalized_mle = np.linalg.pinv(Xactive).dot(Y)
46 | 
47 |         form_covariances = glm_nonparametric_bootstrap(n, n)
48 |         target_info, target_observed = resid_bootstrap(M_est.loss, active_set)
49 | 
50 |         cov_info = M_est.setup_sampler()
51 |         target_cov, score_cov = form_covariances(target_info,  
52 |                                                  cross_terms=[cov_info],
53 |                                                  nsample=M_est.nboot)
54 | 
55 |         opt_sample = M_est.sampler.sample(ndraw,
56 |                                           burnin)
57 | 
58 |         pvalues = M_est.sampler.coefficient_pvalues(unpenalized_mle, 
59 |                                                     target_cov, 
60 |                                                     score_cov, 
61 |                                                     parameter=np.zeros(selected_features.sum()), 
62 |                                                     sample=opt_sample)
63 |         intervals = M_est.sampler.confidence_intervals(unpenalized_mle, target_cov, score_cov, sample=opt_sample)
64 | 
65 |         true_vec = beta[M_est.selection_variable['variables']] 
66 | 
67 |         L, U = intervals.T
68 | 
69 |         covered = np.zeros(nactive, np.bool)
70 |         active_var = np.zeros(nactive, np.bool)
71 |         active_set = np.nonzero(active_set)[0]
72 | 
73 |         for j in range(nactive):
74 |             if (L[j] <= true_vec[j]) and (U[j] >= true_vec[j]):
75 |                 covered[j] = 1
76 |             active_var[j] = active_set[j] in nonzero
77 | 
78 |         return pvalues, covered, active_var
79 | 
80 | 


--------------------------------------------------------------------------------
/selectinf/randomized/tests/sandbox/test_full_lasso.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import nose.tools as nt
 3 | 
 4 | import selection.randomized.lasso as L; reload(L)
 5 | from selection.randomized.lasso import lasso
 6 | from selection.tests.instance import gaussian_instance
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | def test_full_lasso(n=200, p=30, signal_fac=1.5, s=5, ndraw=5000, burnin=1000, sigma=3, full=False, rho=0.4, randomizer_scale=1):
10 |     """
11 |     General LASSO -- 
12 |     """
13 | 
14 |     inst, const = gaussian_instance, lasso.gaussian
15 |     signal = np.sqrt(signal_fac * np.log(p))
16 |     X, Y, beta = inst(n=n,
17 |                       p=p, 
18 |                       signal=signal, 
19 |                       s=s, 
20 |                       equicorrelated=False, 
21 |                       rho=rho, 
22 |                       sigma=sigma, 
23 |                       random_signs=True)[:3]
24 | 
25 |     n, p = X.shape
26 | 
27 |     W = np.ones(X.shape[1]) * np.sqrt(1.5 * np.log(p)) * sigma
28 | 
29 |     conv = const(X, 
30 |                  Y, 
31 |                  W, 
32 |                  randomizer_scale=randomizer_scale * sigma)
33 |     
34 |     signs = conv.fit(solve_args={'min_its':500, 'tol':1.e-13})
35 |     nonzero = signs != 0
36 | 
37 |     conv2 = lasso.gaussian(X, 
38 |                            Y, 
39 |                            W,
40 |                            randomizer_scale=randomizer_scale * sigma)
41 |     conv2.fit(perturb=conv._initial_omega, solve_args={'min_its':500, 'tol':1.e-13})
42 |     conv2.decompose_subgradient(condition=np.ones(p, np.bool))
43 | 
44 |     np.testing.assert_allclose(conv2._view.sampler.affine_con.covariance,
45 |                                conv.sampler.affine_con.covariance)
46 | 
47 |     np.testing.assert_allclose(conv2._view.sampler.affine_con.mean,
48 |                                conv.sampler.affine_con.mean)
49 | 
50 |     np.testing.assert_allclose(conv2._view.sampler.affine_con.linear_part,
51 |                                conv.sampler.affine_con.linear_part)
52 | 
53 |     np.testing.assert_allclose(conv2._view.sampler.affine_con.offset,
54 |                                conv.sampler.affine_con.offset)
55 | 
56 |     np.testing.assert_allclose(conv2._view.initial_soln,
57 |                                conv.initial_soln)
58 | 
59 |     np.testing.assert_allclose(conv2._view.initial_subgrad,
60 |                                conv.initial_subgrad)
61 | 


--------------------------------------------------------------------------------
/selectinf/randomized/tests/sandbox/test_general_lasso.py:
--------------------------------------------------------------------------------
 1 | from itertools import product
 2 | import numpy as np
 3 | import nose.tools as nt
 4 | 
 5 | from ..lasso import lasso
 6 | from ...tests.instance import (gaussian_instance,
 7 |                                logistic_instance,
 8 |                                poisson_instance)
 9 | from ...tests.flags import SMALL_SAMPLES
10 | from ...tests.decorators import set_sampling_params_iftrue 
11 | 
12 | @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=50, burnin=20)
13 | def test_lasso_constructors(ndraw=1000, burnin=200):
14 |     """
15 |     Smoke tests for lasso convenience constructors
16 |     """
17 |     cls = lasso
18 |     for const_info, rand, marginalize, condition in product(zip([gaussian_instance,
19 |                                                                  logistic_instance,
20 |                                                                  poisson_instance],
21 |                                                                 [cls.gaussian,
22 |                                                                  cls.logistic,
23 |                                                                  cls.poisson]),
24 |                                                             ['gaussian', 'logistic', 'laplace'],
25 |                                                             [False, True],
26 |                                                             [False, True]):
27 | 
28 |         print(rand)
29 |         inst, const = const_info
30 |         X, Y = inst(n=100, p=20, signal=5, s=10)[:2]
31 |         n, p = X.shape
32 | 
33 |         W = np.ones(X.shape[1]) * 0.2
34 |         W[0] = 0
35 |         W[3:] = 50.
36 |         np.random.shuffle(W)
37 |         conv = const(X, Y, W, randomizer=rand)
38 |         nboot = 1000
39 |         if SMALL_SAMPLES:
40 |             nboot = 20
41 |         signs = conv.fit(nboot=nboot)
42 | 
43 |         marginalize = None
44 |         if marginalize:
45 |             marginalize = np.zeros(p, np.bool)
46 |             marginalize[:int(p/2)] = True
47 |         
48 |         condition = None
49 |         if condition:
50 |             if marginalize:
51 |                 condition = ~marginalize
52 |             else:
53 |                 condition = np.ones(p, np.bool)
54 |             condition[-int(p/4):] = False
55 | 
56 |         selected_features = np.zeros(p, np.bool)
57 |         selected_features[:3] = True
58 | 
59 |         conv.summary(selected_features,
60 |                      ndraw=ndraw,
61 |                      burnin=burnin,
62 |                      compute_intervals=True)
63 | 
64 |         conv.decompose_subgradient(marginalize=marginalize,
65 |                                    condition=condition)
66 | 
67 |         conv.summary(selected_features,
68 |                      ndraw=ndraw,
69 |                      burnin=burnin)
70 | 
71 |         conv.decompose_subgradient(condition=np.ones(p, np.bool))
72 | 
73 |         conv.summary(selected_features,
74 |                      ndraw=ndraw,
75 |                      burnin=burnin)
76 | 


--------------------------------------------------------------------------------
/selectinf/randomized/tests/sandbox/test_opt_weighted_intervals.py:
--------------------------------------------------------------------------------
 1 | from itertools import product
 2 | import numpy as np
 3 | import nose.tools as nt
 4 | 
 5 | from ..convenience import lasso, step, threshold
 6 | from ..query import optimization_sampler
 7 | from ...tests.instance import (gaussian_instance,
 8 |                                logistic_instance,
 9 |                                poisson_instance)
10 | from ...tests.flags import SMALL_SAMPLES
11 | from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
12 | import matplotlib.pyplot as plt
13 | 
14 | from scipy.stats import t as tdist
15 | from ..glm import glm_nonparametric_bootstrap, pairs_bootstrap_glm
16 | from ..M_estimator import restricted_Mest
17 | 
18 | @set_seed_iftrue(False, 200)
19 | @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=1000, burnin=100)
20 | def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
21 | 
22 |     results = []
23 |     cls = lasso
24 |     for const_info, rand in product(zip([gaussian_instance], [cls.gaussian]), ['laplace', 'gaussian']):
25 | 
26 |         inst, const = const_info
27 | 
28 |         X, Y, beta = inst(n=100, p=20, s=0, signal=5., sigma=5.)[:3]
29 |         n, p = X.shape
30 | 
31 |         W = np.ones(X.shape[1]) * 8
32 |         conv = const(X, Y, W, randomizer=rand, parametric_cov_estimator=True)
33 |         signs = conv.fit()
34 |         print("signs", signs)
35 | 
36 |         marginalizing_groups = np.ones(p, np.bool)
37 |         #marginalizing_groups[:int(p/2)] = True
38 |         conditioning_groups = ~marginalizing_groups
39 |         #conditioning_groups[-int(p/4):] = False
40 |         conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
41 |                                    conditioning_groups=conditioning_groups)
42 | 
43 |         selected_features = conv._view.selection_variable['variables']
44 |         nactive=selected_features.sum()
45 |         print("nactive", nactive)
46 |         if nactive==0:
47 |             results.append(None)
48 |         else:
49 |             sel_pivots, sel_pval, sel_ci = conv.summary(selected_features,
50 |                                                         parameter=beta[selected_features],
51 |                                                         ndraw=ndraw,
52 |                                                         burnin=burnin,
53 |                                                         compute_intervals=True)
54 |             print(sel_pivots)
55 |             results.append((rand, sel_pivots, sel_ci, beta[selected_features]))
56 | 
57 |     return results
58 | 
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/selectinf/randomized/tests/test_modelQ.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function
 2 | 
 3 | import numpy as np
 4 | import nose.tools as nt
 5 | 
 6 | import regreg.api as rr
 7 | 
 8 | from ..modelQ import modelQ
 9 | from ..lasso import lasso
10 | from ...tests.instance import gaussian_instance
11 | 
12 | def test_modelQ():
13 | 
14 |     n, p, s = 200, 50, 4
15 |     X, y, beta = gaussian_instance(n=n,
16 |                                    p=p,
17 |                                    s=s,
18 |                                    sigma=1)[:3]
19 | 
20 |     lagrange = 5. * np.ones(p) * np.sqrt(n)
21 |     perturb = np.random.standard_normal(p) * n
22 |     LH = lasso.gaussian(X, y, lagrange)
23 |     LH.fit(perturb=perturb, solve_args={'min_its':1000})
24 | 
25 |     LQ = modelQ(X.T.dot(X), X, y, lagrange)
26 |     LQ.fit(perturb=perturb, solve_args={'min_its':1000})
27 |     LQ.summary() # smoke test
28 | 
29 |     conH = LH.sampler.affine_con
30 |     conQ = LQ.sampler.affine_con
31 | 
32 |     np.testing.assert_allclose(LH.initial_soln, LQ.initial_soln)
33 |     np.testing.assert_allclose(LH.initial_subgrad, LQ.initial_subgrad)
34 | 
35 |     np.testing.assert_allclose(conH.linear_part, conQ.linear_part)
36 |     np.testing.assert_allclose(conH.offset, conQ.offset)
37 | 
38 |     np.testing.assert_allclose(LH._beta_full, LQ._beta_full)
39 | 
40 | 


--------------------------------------------------------------------------------
/selectinf/randomized/tests/test_randomization.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import numpy as np
 4 | import nose.tools as nt
 5 | 
 6 | from ..randomization import randomization
 7 | 
 8 | def test_noise_dbns():
 9 | 
10 |     X = np.random.standard_normal((10, 5))
11 |     Q = X.T.dot(X)
12 |     noises = [randomization.isotropic_gaussian((5,), 1.),
13 |               randomization.laplace((5,), 1.),
14 |               randomization.logistic((5,), 1.),
15 |               randomization.gaussian(Q)]
16 | 
17 |     v1, v2 = [], []
18 | 
19 |     for i, noise in enumerate(noises):
20 | 
21 |         x = np.random.standard_normal(5)
22 |         u = np.random.standard_normal(5)
23 |         v1.append(np.exp(noise.log_density(x)))
24 |         v2.append(noise._density(x))
25 | 
26 |         noise.smooth_objective(x, 'func')
27 |         noise.smooth_objective(x, 'grad')
28 |         noise.smooth_objective(x, 'both')
29 |         noise.gradient(x)
30 | 
31 |         nt.assert_equal(noise.sample().shape, (5,))
32 |         nt.assert_equal(noise.sample().shape, (5,))
33 | 
34 |         if noise.CGF is not None:
35 |             u = np.zeros(5)
36 |             u[:2] = 0.1
37 |             noise.CGF.smooth_objective(u, 'both')
38 | 
39 |         if noise.CGF_conjugate is not None:
40 |             noise.CGF_conjugate.smooth_objective(x, 'both')
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/selectinf/randomized/tests/test_slope_subgrad.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from ..slope import _projection_onto_selected_subgradients
 4 | 
 5 | def test_projection():
 6 | 
 7 |     prox_arg = np.random.normal(0,1,10)
 8 |     weights = np.linspace(3, 5, 10)[::-1]
 9 |     ordering = np.random.choice(10, 10, replace=False)
10 |     cluster_sizes = [2,3,1,1,3]
11 |     active_signs = np.ones(10)
12 | 
13 |     proj = _projection_onto_selected_subgradients(prox_arg,
14 |                                                   weights,
15 |                                                   ordering,
16 |                                                   cluster_sizes,
17 |                                                   active_signs)
18 | 
19 |     print("projection", proj)
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/selectinf/reduced_optimization/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/reduced_optimization/tests/__init__.py


--------------------------------------------------------------------------------
/selectinf/sampling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/sampling/__init__.py


--------------------------------------------------------------------------------
/selectinf/sampling/api.py:
--------------------------------------------------------------------------------
1 | from .langevin import projected_langevin
2 | from .truncnorm import (sample_truncnorm_white, 
3 |                         sample_truncnorm_white_sphere,
4 |                         sample_truncnorm_white_ball)
5 | 


--------------------------------------------------------------------------------
/selectinf/sampling/langevin.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Projected Langevin sampler of `http://arxiv.org/abs/1507.02564`_
 3 | """
 4 | from __future__ import print_function
 5 | 
 6 | import numpy as np
 7 | from scipy.stats import norm as ndist
 8 | 
 9 | class projected_langevin(object):
10 | 
11 |     def __init__(self, 
12 |                  initial_condition,
13 |                  gradient_map,
14 |                  projection_map,
15 |                  stepsize):
16 | 
17 |         (self.state,
18 |          self.gradient_map,
19 |          self.projection_map,
20 |          self.stepsize) = (np.copy(initial_condition),
21 |                            gradient_map,
22 |                            projection_map,
23 |                            stepsize)
24 |         self._shape = self.state.shape[0]
25 |         self._sqrt_step = np.sqrt(self.stepsize)
26 |         self._noise = ndist(loc=0,scale=1)
27 | 
28 |     def __iter__(self):
29 |         return self
30 | 
31 |     def next(self):
32 |         nattempt = 0
33 |         while True:
34 |             
35 |             proj_arg = (self.state
36 |                         + 0.5 * self.stepsize * self.gradient_map(self.state)
37 |                         + self._noise.rvs(self._shape) * self._sqrt_step)
38 |             candidate = self.projection_map(proj_arg)
39 |             if not np.all(np.isfinite(self.gradient_map(candidate))):
40 |                 nattempt += 1
41 |                 self._sqrt_step *= 0.8
42 |                 self.stepsize = self._sqrt_step**2
43 |                 if nattempt >= 30:
44 |                     raise ValueError('unable to find feasible step')
45 |             else:
46 |                 self.state[:] = candidate
47 |                 break
48 | 


--------------------------------------------------------------------------------
/selectinf/sampling/sequential.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Sequential Monte Carlo for approximately constrained Gaussians.
 3 | 
 4 | http://arxiv.org/abs/1410.8209
 5 | 
 6 | """
 7 | 
 8 | import numpy as np
 9 | 
10 | def sample(white_constraint,
11 |            nsample,
12 |            proposal_sigma=0.2,
13 |            temps=np.linspace(0, 50, 51.)):
14 |     """
15 |     Build up an approximately constrained Gaussian
16 |     based on relaxations of the constraint.
17 | 
18 |     Parameters
19 |     ----------
20 | 
21 |     white_constraint : `selection.constraints.affine`
22 |         Affine constraint with identity covariance
23 | 
24 |     nsample : int
25 |         How many samples to draw?
26 | 
27 |     proposal_sigma : float
28 |         
29 |     """
30 | 
31 |     n = white_constraint.dim
32 |     sample_z = np.random.standard_normal((n, nsample))
33 | 
34 |     def constraint_function(z, con):
35 |         value = (np.dot(con.linear_part, z) - con.offset[:,None])
36 |         return value.max(0)
37 | 
38 |     def constraint_logit(temp, z, con):
39 |         tmp_z = constraint_function(z, con)
40 |         tmp_v = np.exp(-temp * tmp_z)
41 |         return tmp_v / (1 + tmp_v)
42 | 
43 |     def MH_sample(temp, z_cur, con):
44 |         step = np.random.standard_normal(z_cur.shape) * proposal_sigma
45 |         z_new = z_cur + step
46 | 
47 |         W_new = constraint_logit(temp, z_new, con)
48 |         W_cur = constraint_logit(temp, z_cur, con)
49 |         W_new *= np.exp(-(z_new**2).sum(0)/2)
50 |         W_cur *= np.exp(-(z_cur**2).sum(0)/2)
51 | 
52 |         coin_flip = np.less_equal(np.random.sample(z_cur.shape[1]), W_new / W_cur)
53 |         final_sample = coin_flip * z_new + (1 - coin_flip) * z_cur
54 |         return final_sample
55 | 
56 |     weights = np.ones(nsample, np.float) / nsample
57 | 
58 |     num = np.ones(nsample) / 2
59 |     for i in range(temps.shape[0]-1):
60 | 
61 |         num, den = constraint_logit(temps[i+1], sample_z, white_constraint), num
62 | 
63 |         weights *= np.exp(np.log(num) - np.log(den))
64 |         weights /= weights.sum()
65 | 
66 |         ESS = 1. / (weights**2).sum()
67 |         if ESS < nsample / 2.:
68 |             idx_z = np.random.choice(np.arange(nsample), size=(nsample,), replace=True, p=weights)
69 |             sample_z = sample_z[:, idx_z]
70 |             weights = np.ones(nsample, np.float) / nsample
71 |         sample_z = MH_sample(temps[i+1], sample_z, white_constraint)
72 | 
73 |     return sample_z
74 | 
75 |           
76 | 


--------------------------------------------------------------------------------
/selectinf/sampling/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/sampling/tests/__init__.py


--------------------------------------------------------------------------------
/selectinf/sampling/tests/plots_fs.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import numpy as np
 3 | from .test_fstep_langevin import test_fstep
 4 | from .test_kfstep import test_kfstep
 5 | import random
 6 | 
 7 | def main():
 8 | 
 9 |     import statsmodels.api as sm
10 |     from scipy.stats import probplot, uniform
11 |     from matplotlib import pyplot as plt
12 |     random.seed(4)
13 | 
14 |     fig = plt.figure()
15 |     plot_1step = fig.add_subplot(121)
16 |     plot_kstep = fig.add_subplot(122)
17 | 
18 | 
19 |     P0 = []
20 |     for i in range(300):
21 | 
22 |         print("iteration", i)
23 |         p0 = test_fstep(Langevin_steps=10000, burning=2000)
24 |         P0.append(p0)
25 | 
26 |     print("one step FS done! mean: ", np.mean(P0), "std: ", np.std(P0))
27 |     #probplot(P0, dist=uniform, sparams=(0,1), plot=plot_1step, fit=False)
28 |     #plot_1step.plot([0, 1], color='k', linestyle='-', linewidth=2)
29 | 
30 |     ecdf = sm.distributions.ECDF(P0)
31 |     x = np.linspace(min(P0), max(P0))
32 |     y = ecdf(x)
33 |     plot_1step.plot(x, y, '-o',lw=2)
34 |     plot_1step.plot([0, 1], [0, 1], 'k-', lw=2)
35 | 
36 |     plot_1step.set_title("One step FS")
37 |     plot_1step.set_xlim([0,1])
38 |     plot_1step.set_ylim([0,1])
39 | 
40 | 
41 |     P0 = []
42 |     for i in range(300):
43 |         print("iteration", i)
44 |         p0 = test_kfstep(Langevin_steps=10000, burning=2000)
45 |         P0.append(p0)
46 | 
47 |     print("k steps FS done done! mean: ", np.mean(P0), "std: ", np.std(P0))
48 |     #probplot(P0, dist=uniform, sparams=(0,1), plot=plot_kstep, fit=False)
49 |     #plot_kstep.plot([0, 1], color='k', linestyle='-', linewidth=2)
50 | 
51 | 
52 |     ecdf = sm.distributions.ECDF(P0)
53 |     x = np.linspace(min(P0), max(P0))
54 |     y = ecdf(x)
55 |     plot_kstep.plot(x, y,'-o', lw=2)
56 |     plot_kstep.plot([0, 1], [0, 1], 'k-', lw=2)
57 | 
58 |     plot_kstep.set_title("Four steps FS")
59 |     plot_kstep.set_xlim([0,1])
60 |     plot_kstep.set_ylim([0,1])
61 | 
62 | 
63 | 
64 |     plt.show()
65 |     plt.savefig('FS_Langevin.pdf')
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/selectinf/sampling/tests/test_pca_langevin.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from ..langevin import projected_langevin
 4 | 
 5 | ### Some examples: PCA from https://arxiv.org/abs/1410.8260
 6 |  
 7 | def _log_vandermonde(eigenvals, power=1):
 8 |     """
 9 |     Log of the Van der Monde determinant.
10 |     """
11 |     eigenvals = np.asarray(eigenvals)
12 |     p = eigenvals.shape[0]
13 |     idx = np.arange(p)
14 |     logdiff = np.log(np.fabs(np.subtract.outer(eigenvals, eigenvals)))
15 |     mask = np.greater.outer(idx, idx)
16 | 
17 |     return power * (logdiff * mask).sum()
18 | 
19 | def _grad_log_vandermonde(eigenvals, power=1):
20 |     """
21 |     Log of the Van der Monde determinant.
22 |     """
23 |     eigenvals = np.asarray(eigenvals)
24 |     p = eigenvals.shape[0]
25 |     idx = np.arange(p)
26 |     diff = np.subtract.outer(eigenvals, eigenvals)
27 |     diff_sign = -np.sign(diff)
28 |     mask = (diff > 0)
29 |     return (1. / (np.fabs(diff) + np.identity(p)) * mask * diff_sign).sum(1)
30 | 
31 | def _log_wishart_white(eigenvals, n):
32 |     """
33 |     Log-eigenvalue density of Wishart($I_{p \times p}$, n) assuming n>p,
34 |     up to normalizing constant.
35 |     """
36 |     eigenvals = np.asarray(eigenvals)
37 |     p = eigenvals.shape[0]
38 | 
39 |     return ((n - p - 1) * 0.5 * np.log(eigenvals).sum() 
40 |             + _log_vandermonde(eigenvals, power=1) 
41 |             - eigenvals.sum() * 0.5)
42 | 
43 | def _grad_log_wishart_white(eigenvals, n):
44 |     """
45 |     Gradient of log-eigenvalue density of Wishart($I_{p \times p}$, n) 
46 |     assuming n>p.
47 |     """
48 |     eigenvals = np.asarray(eigenvals)
49 |     p = eigenvals.shape[0]
50 |     return ((n - p - 1) * 0.5 / (eigenvals + 1.e-7)
51 |             + _grad_log_vandermonde(eigenvals, power=1) - 0.5)
52 | 
53 | def main(n=50):
54 | 
55 |     from regreg.atoms._isotonic import _isotonic_regression
56 |     import matplotlib.pyplot as plt
57 |     initial = np.ones(n) + 0.01 * np.random.standard_normal(n)
58 |     grad_map = lambda val: _grad_log_wishart_white(val, n)
59 | 
60 |     def projection_map(vals):
61 |         iso = np.zeros_like(vals)
62 |         _isotonic_regression(vals, np.ones_like(vals), iso)
63 |         vals = np.asarray(iso)
64 |         return np.maximum(vals, 1.e-6)
65 | 
66 |     sampler = projected_langevin(initial,
67 |                                  grad_map,
68 |                                  projection_map,
69 |                                  0.01)
70 |     sampler = iter(sampler)
71 | 
72 |     path = [initial.copy()]
73 |     for _ in range(200):
74 |         print(sampler.state)
75 |         sampler.next()
76 |         path.append(sampler.state.copy())
77 |     path = np.array(path)
78 | 
79 |     [plt.plot(path[:,i]) for i in range(5)]
80 |     plt.show()
81 | 
82 | 


--------------------------------------------------------------------------------
/selectinf/sampling/tests/test_sequential.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import numpy.testing.decorators as dec
 3 | from scipy.stats import norm as ndist
 4 | 
 5 | from ...constraints.affine import constraints
 6 | from ..sequential import sample
 7 | from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
 8 | from ...tests.flags import SMALL_SAMPLES, SET_SEED
 9 | 
10 | @dec.slow
11 | @set_seed_iftrue(SET_SEED)
12 | @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, nsim=10)
13 | def test_sequentially_constrained(ndraw=100, nsim=50):
14 |     S = -np.identity(10)[:3]
15 |     b = -6 * np.ones(3)
16 |     C = constraints(S, b)
17 |     W = sample(C, nsim, temps=np.linspace(0, 200, 1001))
18 |     U = np.linspace(0, 1, 101)
19 | 
20 | 


--------------------------------------------------------------------------------
/selectinf/sandbox/approx_ci/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/sandbox/approx_ci/__init__.py


--------------------------------------------------------------------------------
/selectinf/sandbox/approx_ci/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/sandbox/approx_ci/tests/__init__.py


--------------------------------------------------------------------------------
/selectinf/sandbox/bayesian/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/sandbox/bayesian/__init__.py


--------------------------------------------------------------------------------
/selectinf/sandbox/bayesian/credible_intervals.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.stats import norm as ndist
 3 | 
 4 | class projected_langevin(object):
 5 | 
 6 |     def __init__(self,
 7 |                  initial_condition,
 8 |                  gradient_map,
 9 |                  projection_map,
10 |                  stepsize):
11 | 
12 |         (self.state,
13 |          self.gradient_map,
14 |          self.projection_map,
15 |          self.stepsize) = (np.copy(initial_condition),
16 |                            gradient_map,
17 |                            projection_map,
18 |                            stepsize)
19 |         self._shape = self.state.shape[0]
20 |         self._sqrt_step = np.sqrt(self.stepsize)
21 |         self._noise = ndist(loc=0,scale=1)
22 | 
23 |     def __iter__(self):
24 |         return self
25 | 
26 |     def next(self):
27 |         while True:
28 |             proj_arg = (self.state + 0.5 * self.stepsize * self.gradient_map(self.state)
29 |                         + self._noise.rvs(self._shape) * self._sqrt_step)
30 |             candidate = self.projection_map(proj_arg)
31 |             if not np.all(np.isfinite(self.gradient_map(candidate))):
32 |                 print(candidate, self._sqrt_step)
33 |                 self._sqrt_step *= 0.8
34 |             else:
35 |                 self.state[:] = candidate
36 |                 break
37 | 


--------------------------------------------------------------------------------
/selectinf/sandbox/bayesian/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/sandbox/bayesian/tests/__init__.py


--------------------------------------------------------------------------------
/selectinf/src_C/#sample_preparation.pyx#:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | cimport numpy as np
 4 | 
 5 | #from cython.view cimport array as cvarray
 6 | from libc.stdlib cimport malloc, free
 7 | 
 8 | 
 9 | cdef extern from "preparation_Eig_Vect.h":
10 |     void samples(int n,
11 |                  int dim,
12 |                  int seed,
13 |                  double* initial,
14 |                  int numlin,
15 |                  int numquad,
16 |                  double* lin,
17 |                  double* quad,
18 |                  double* quad_lin,
19 |                  double* offset_lin,
20 |                  double* offset_quad,
21 |                  double* samples_Carray)
22 | 
23 | 
24 | 
25 | def quad_sampler(int n_sample, 
26 |                  initial,
27 |                  quad,# = np.array([]).reshape((0, 0, 0)), 
28 |                  quad_lin,# = np.array([]).reshape((0, 0)), 
29 |                  lin,# = np.array([]).reshape((0,0)), 
30 |                  offset_quad,# = np.array([]), 
31 |                  offset_lin # = np.array([]) 
32 |                  ):
33 | 
34 |     
35 | 
36 |     cdef int numquad = quad.shape[0]
37 |     cdef int p = quad.shape[1]
38 |     cdef int numlin = lin.shape[0]
39 | 
40 |     cdef np.ndarray[np.double_t, ndim=3] quad2 = np.ascontiguousarray(-quad)
41 |     cdef np.ndarray[np.double_t, ndim=2] quad_lin2 = np.ascontiguousarray(-quad_lin)
42 |     cdef np.ndarray[np.double_t, ndim=1] offset_quad2 = np.ascontiguousarray(offset_quad)
43 | 
44 |     cdef double *pt_quad
45 |     cdef double *pt_quad_lin
46 |     cdef double *pt_quad_offset
47 |     if numquad > 0:
48 |         pt_quad = &quad2[0, 0, 0]
49 |         pt_quad_lin = &quad_lin2[0, 0]
50 |         pt_quad_offset = &offset_quad2[0]
51 |         
52 |     
53 |     cdef np.ndarray[np.double_t, ndim=2] lin2  = np.ascontiguousarray(-lin )
54 |     cdef np.ndarray[np.double_t, ndim=1] offset_lin2  = np.ascontiguousarray(offset_lin )
55 |     
56 |     cdef double *pt_lin
57 |     cdef double *pt_lin_offset
58 |     if numlin > 0:
59 |         pt_lin_offset = &offset_lin2[0]
60 |         pt_lin = &lin2[0, 0]
61 | 
62 |     cdef np.ndarray[np.double_t, ndim=1] initial2 = np.ascontiguousarray(initial)
63 | 
64 |     cdef int seed = np.random.randint(1, 100000)
65 | 
66 |     cdef double *samples_Carray = <double *>malloc(n_sample*p * sizeof(double))
67 |     
68 |     samples(n_sample, 
69 |             p,
70 |             seed,
71 |             &initial2[0],
72 |             numlin,
73 |             numquad,
74 |             pt_lin,
75 |             pt_quad,
76 |             pt_quad_lin,
77 |             pt_lin_offset,
78 |             pt_quad_offset,
79 |             samples_Carray)
80 | 
81 | 
82 |     cdef np.ndarray[np.double_t, ndim=2] samples_array = np.zeros((n_sample, p))
83 |     for i in range(n_sample):
84 |         for j in range(p):
85 |             samples_array[i, j] = samples_Carray[i*p + j]
86 | 
87 |     free(samples_Carray)
88 | 
89 |     return samples_array
90 | 
91 |     
92 | 


--------------------------------------------------------------------------------
/selectinf/src_C/HmcSampler.h:
--------------------------------------------------------------------------------
 1 | /* 
 2 |  * File:   HmcSampler.h
 3 |  * Author: aripakman
 4 |  *
 5 |  * Created on July 4, 2012, 10:44 AM
 6 |  */
 7 | 
 8 | #ifndef HMCSAMPLER_H
 9 | #define	HMCSAMPLER_H
10 | 
11 | #define _USE_MATH_DEFINES
12 | 
13 | #include <cmath>
14 | #include <tr1/random>
15 | #include <vector>
16 | #include <Eigen/Dense>
17 | 
18 | using namespace Eigen;
19 | using namespace std;
20 | using namespace std::tr1;
21 | 
22 | struct LinearConstraint{
23 |   VectorXd f;
24 |   double  g;
25 | };
26 | 
27 | struct QuadraticConstraint{
28 |     MatrixXd A;
29 |     VectorXd B;
30 |     double  C;    
31 | };
32 | 
33 | 
34 | class HmcSampler   {
35 | public:
36 |     
37 |     HmcSampler(const int & d, const int & seed);
38 | 
39 |     void setInitialValue(const VectorXd & initial);
40 |     void addLinearConstraint(const VectorXd & f, const double & g);
41 |     void addQuadraticConstraint(const MatrixXd & A, const VectorXd & B, const double & C);
42 |     MatrixXd sampleNext(bool returnTrace = false);
43 |     
44 | private:
45 |     int dim;
46 |     VectorXd lastSample;    
47 |     static const double min_t; 
48 |     vector<LinearConstraint> linearConstraints;
49 |     vector<QuadraticConstraint> quadraticConstraints;
50 |     
51 |     ranlux64_base_01 eng1;
52 | //    mt19937 eng1; //to sample time and momenta 
53 |     uniform_real<> ud; 
54 |     normal_distribution<> nd; 
55 | 
56 |     void _getNextLinearHitTime(const VectorXd & a, const VectorXd & b,  double & t, int & cn );
57 |     void _getNextQuadraticHitTime(const VectorXd & a, const VectorXd & b, double & t, int & cn, const bool );
58 |     double _verifyConstraints(const VectorXd &);
59 |     void _updateTrace( VectorXd const & a,  VectorXd const & b, double const & tt, MatrixXd & tracePoints);
60 | };
61 | 
62 | #endif	/* HMCSAMPLER_H */
63 | 
64 | 


--------------------------------------------------------------------------------
/selectinf/src_C/logfile.txt:
--------------------------------------------------------------------------------
 1 | -1-0
 2 | -0-1
 3 | 
 4 | 0.846196-0.9041
 5 | 0.7401690.590085
 6 | -0.18959-0.17084
 7 | -0.4238650.0333025
 8 | -0.592693-0.266382
 9 | 0.0690678-0.00674659
10 | -0.174223-0.431466
11 | 0.6978830.440892
12 | 0.144409-0.675854
13 | -0.3425970.0214389
14 | 
15 | 0.846196-0.9041
16 | 0.7401690.590085
17 | -0.18959-0.17084
18 | -0.4238650.0333025
19 | -0.592693-0.266382
20 | 0.0690678-0.00674659
21 | -0.174223-0.431466
22 | 0.6978830.440892
23 | 0.144409-0.675854
24 | -0.3425970.0214389
25 | 


--------------------------------------------------------------------------------
/selectinf/src_C/preparation_Eig_Vect.cpp:
--------------------------------------------------------------------------------
  1 | #include <Eigen/Dense>
  2 | #include <vector>
  3 | #include "HmcSampler.h"
  4 | 
  5 | #include "preparation_Eig_Vect.h"
  6 | 
  7 | 
  8 | #include <fstream>
  9 | 
 10 | using namespace std;
 11 | using namespace Eigen;
 12 | 
 13 | void samples(
 14 |                 int n,
 15 |                 int dim,
 16 |                 int seed,
 17 |                 double *initial, 
 18 |                 int numlin,
 19 |                 int numquad,
 20 |                 double *lin,
 21 |                 double *quad, 
 22 |                 double *quad_lin,
 23 |                 double *offset_lin,
 24 |                 double *offset_quad,
 25 |                 double *samples_Carray
 26 | 		 ){
 27 | 
 28 |   
 29 |   const Map<VectorXd> initial_value(initial, dim);
 30 | 
 31 | 
 32 | 
 33 |   ofstream logfile;
 34 |   logfile.open ("logfile.txt");
 35 |   
 36 | 
 37 |   HmcSampler hmc1(dim, seed);
 38 |   if (numlin >0){		
 39 |     const Map<MatrixXd> F(lin, numlin, dim);
 40 |     const Map<VectorXd> g(offset_lin, numlin);
 41 | 
 42 |     for(int i=0; i<numlin; i++){
 43 |       hmc1.addLinearConstraint(F.row(i),g(i));
 44 |     }
 45 |   }
 46 | 
 47 |   if (numquad >0){
 48 | 
 49 |     for(int i=0; i<numquad; i++){
 50 |       double *indice = &quad[i*dim*dim];
 51 |       const Map<MatrixXd> A_Map(indice, dim, dim);
 52 | 
 53 | 
 54 | for(int k=0; k<dim; k++){
 55 | for(int l=0; l<dim; l++){
 56 | logfile << A_Map(k, l);
 57 | }
 58 | logfile << endl;
 59 | }
 60 | logfile << endl;
 61 | 
 62 |       MatrixXd A(A_Map);
 63 |       const Map<VectorXd> B_Map(&quad_lin[i*dim], dim);
 64 |       VectorXd B(B_Map);
 65 |       double C = offset_quad[i];  
 66 |       hmc1.addQuadraticConstraint(A,B,C);
 67 |     }
 68 | 
 69 |   }
 70 | 
 71 |   hmc1.setInitialValue(initial_value);
 72 |   
 73 |   MatrixXd samples(n,dim);
 74 |   
 75 |   for (int i=0; i<n; i++){     
 76 |       samples.row(i) = hmc1.sampleNext();  
 77 |   }
 78 | 
 79 | //static double samples_Carray [n][dim];
 80 | 
 81 |   double* result = samples.data();
 82 | 
 83 |   for(int k=0; k<n; k++){
 84 |     for(int l=0; l<dim;l++){
 85 |       samples_Carray[k*dim + l] = result[k*dim + l];
 86 |     }
 87 |   }
 88 | 
 89 | 
 90 | for(int k=0; k< n; k++){
 91 | for(int l=0; l<dim; l++){
 92 | logfile << result[k*dim + l];
 93 | }
 94 | logfile << endl;
 95 | }
 96 | 
 97 | logfile << endl;
 98 | 
 99 | for(int k=0; k< n; k++){
100 | for(int l=0; l<dim; l++){
101 | logfile << samples_Carray[k*dim + l];
102 | }
103 | logfile << endl;
104 | }
105 | 
106 | 
107 | 
108 |   logfile.close();
109 | 
110 | //return samples_Carray;
111 | 
112 | }
113 | 


--------------------------------------------------------------------------------
/selectinf/src_C/preparation_Eig_Vect.h:
--------------------------------------------------------------------------------
 1 | 
 2 | void samples(int n,
 3 | 	     int dim,
 4 | 	     int seed,
 5 | 	     double *initial,
 6 | 	     int numlin,
 7 | 	     int numquad,
 8 | 	     double *lin,
 9 | 	     double *quad, 	
10 | 	     double *quad_lin,
11 | 	     double *offset_lin,
12 | 	     double *offset_quad,
13 | 	     double *samples_Carray
14 | 	     );
15 | 
16 | 


--------------------------------------------------------------------------------
/selectinf/src_C/sample_preparation.pyx:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | cimport numpy as np
 4 | 
 5 | #from cython.view cimport array as cvarray
 6 | from libc.stdlib cimport malloc, free
 7 | 
 8 | 
 9 | cdef extern from "preparation_Eig_Vect.h":
10 |     void samples(int n,
11 |                  int dim,
12 |                  int seed,
13 |                  double* initial,
14 |                  int numlin,
15 |                  int numquad,
16 |                  double* lin,
17 |                  double* quad,
18 |                  double* quad_lin,
19 |                  double* offset_lin,
20 |                  double* offset_quad,
21 |                  double* samples_Carray)
22 | 
23 | 
24 | 
25 | def quad_sampler(int n_sample, 
26 |                  initial,
27 |                  quad,# = np.array([]).reshape((0, 0, 0)), 
28 |                  quad_lin,# = np.array([]).reshape((0, 0)), 
29 |                  lin,# = np.array([]).reshape((0,0)), 
30 |                  offset_quad,# = np.array([]), 
31 |                  offset_lin # = np.array([]) 
32 |                  ):
33 | 
34 |     
35 | 
36 |     cdef int numquad = quad.shape[0]
37 |     cdef int p = quad.shape[1]
38 |     cdef int numlin = lin.shape[0]
39 | 
40 |     cdef np.ndarray[np.double_t, ndim=3] quad2 = np.ascontiguousarray(-quad)
41 |     cdef np.ndarray[np.double_t, ndim=2] quad_lin2 = np.ascontiguousarray(-quad_lin)
42 |     cdef np.ndarray[np.double_t, ndim=1] offset_quad2 = np.ascontiguousarray(offset_quad)
43 | 
44 |     cdef double *pt_quad
45 |     cdef double *pt_quad_lin
46 |     cdef double *pt_quad_offset
47 |     if numquad > 0:
48 |         pt_quad = &quad2[0, 0, 0]
49 |         pt_quad_lin = &quad_lin2[0, 0]
50 |         pt_quad_offset = &offset_quad2[0]
51 |         
52 | 
53 | 
54 |     print "quad inequalities generated"
55 | 
56 |     
57 |     cdef np.ndarray[np.double_t, ndim=2] lin2  = np.ascontiguousarray(-lin )
58 |     cdef np.ndarray[np.double_t, ndim=1] offset_lin2  = np.ascontiguousarray(offset_lin )
59 |     
60 |     cdef double *pt_lin
61 |     cdef double *pt_lin_offset
62 |     if numlin > 0:
63 |         pt_lin_offset = &offset_lin2[0]
64 |         pt_lin = &lin2[0, 0]
65 | 
66 |     cdef np.ndarray[np.double_t, ndim=1] initial2 = np.ascontiguousarray(initial)
67 | 
68 |     cdef int seed = np.random.randint(1, 100000)
69 | 
70 |     cdef double *samples_Carray = <double *>malloc(n_sample*p * sizeof(double))
71 |     
72 |     samples(n_sample, 
73 |             p,
74 |             seed,
75 |             &initial2[0],
76 |             numlin,
77 |             numquad,
78 |             pt_lin,
79 |             pt_quad,
80 |             pt_quad_lin,
81 |             pt_lin_offset,
82 |             pt_quad_offset,
83 |             samples_Carray)
84 | 
85 | 
86 |     cdef np.ndarray[np.double_t, ndim=2] samples_array = np.zeros((n_sample, p))
87 |     for i in range(n_sample):
88 |         for j in range(p):
89 |             samples_array[i, j] = samples_Carray[i*p + j]
90 | 
91 |     free(samples_Carray)
92 | 
93 |     return samples_array
94 | 
95 |     
96 | 


--------------------------------------------------------------------------------
/selectinf/src_C/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup, Extension
 2 | from Cython.Build import cythonize
 3 | 
 4 | from Cython.Distutils import build_ext
 5 | import numpy as np                        
 6 | 
 7 | 
 8 | 
 9 | setup(
10 |     #name = 'kmean',
11 |     cmdclass = {'build_ext': build_ext},
12 |     include_dirs = [np.get_include()],
13 |     ## ext_modules = cythonize("sample_preparation.pyx", 
14 |     ##                        language="c++")
15 |     ext_modules = [Extension('sampler', 
16 |                              ["sample_preparation.pyx" ,
17 |                              'preparation_Eig_Vect.cpp' ,
18 |                              'HmcSampler.cpp'],
19 |                              language="c++",
20 |                              extra_compile_args = ["-W", 
21 |                                                    "-Wall", 
22 |                                                    "-ansi", 
23 |                                                    "-pedantic", 
24 |                                                    "-stdlib=libstdc++"#, 
25 |                                                    #"-fPIC"
26 |                                                ],
27 |                              extra_link_args = ["-stdlib=libstdc++"]
28 |                              )]
29 |                          
30 | )
31 | 
32 | 
33 | 
34 | 
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/selectinf/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/tests/__init__.py


--------------------------------------------------------------------------------
/selectinf/tests/flags.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | SMALL_SAMPLES = False
 4 | SET_SEED = False
 5 | 
 6 | if "USE_SMALL_SAMPLES" in os.environ:
 7 |     SMALL_SAMPLES = True
 8 | 
 9 | if "USE_TEST_SEED" in os.environ:
10 |     SET_SEED = True
11 | 


--------------------------------------------------------------------------------
/selectinf/tests/test_instance.py:
--------------------------------------------------------------------------------
 1 | from numpy import inf
 2 | from itertools import product
 3 | from .instance import gaussian_instance, logistic_instance, HIV_NRTI
 4 | 
 5 | def test_gaussian_instance():
 6 | 
 7 |     for scale, center, random_signs, df in product(
 8 |         [True, False],
 9 |         [True, False],
10 |         [True, False],
11 |         [40, inf]):
12 |         gaussian_instance(n=10,
13 |                           p=20,
14 |                           s=4,
15 |                           random_signs=random_signs,
16 |                           scale=scale,
17 |                           center=center,
18 |                           df=df)
19 | 
20 | def test_logistic_instance():
21 | 
22 |     for scale, center, random_signs in product(
23 |         [True, False],
24 |         [True, False],
25 |         [True, False]):
26 |         logistic_instance(n=10,
27 |                           p=20,
28 |                           s=4,
29 |                           random_signs=random_signs,
30 |                           scale=scale,
31 |                           center=center)
32 | 
33 | def test_HIV_instance():
34 | 
35 |     HIV_NRTI()
36 | 
37 | 
38 |     
39 | 


--------------------------------------------------------------------------------
/selectinf/tests/tests.py:
--------------------------------------------------------------------------------
1 | from ..algorithms import tests as truncated
2 | from ..distributions import tests as distributions
3 | from ..truncated import tests as truncated
4 | from ..constraints import tests as constraints
5 | from ..sampling import tests as sampling
6 | 


--------------------------------------------------------------------------------
/selectinf/truncated/F.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import mpmath as mp
 3 | 
 4 | from .base import truncated
 5 | 
 6 | 
 7 | def sf_F(d1, d2, scale):
 8 | 
 9 |     def sf(a, b=np.inf, dps=15):
10 |         dps_temp = mp.mp.dps
11 |         mp.mp.dps = dps
12 | 
13 |         tmp_a = d1*a/d2
14 |         tmp_b = d1*b/d2
15 |         beta_a = tmp_a / (1. + tmp_a)
16 |         beta_b = tmp_b / (1. + tmp_b)
17 |         if b == np.inf:
18 |             beta_b = 1.
19 |         sf = mp.betainc(d1/2., d2/2., 
20 |                         x1=beta_a, x2=beta_b,
21 |                         regularized=True)
22 |         mp.mp.dps = dps_temp
23 |         return sf
24 | 
25 |     return sf
26 | 
27 | def null_f(x):
28 |     raise ValueError("Shouldn't be called")
29 |     return 0
30 | 
31 | 
32 | class truncated_F(truncated):
33 |     def __init__(self, intervals, d1, d2, scale=1):
34 |         self._d1 = d1
35 |         self._d2 = d2
36 |         self._scale = scale
37 | 
38 |         truncated.__init__(self,
39 |                            intervals,
40 |                            null_f,
41 |                            null_f,
42 |                            sf_F(d1, d2, scale),
43 |                            null_f)
44 | 


--------------------------------------------------------------------------------
/selectinf/truncated/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/truncated/__init__.py


--------------------------------------------------------------------------------
/selectinf/truncated/api.py:
--------------------------------------------------------------------------------
1 | from .base import find_root
2 | 
3 | from .gaussian import truncated_gaussian
4 | from .chi import truncated_chi, truncated_chi2
5 | from .T import truncated_T
6 | from .F import truncated_F
7 | 


--------------------------------------------------------------------------------
/selectinf/truncated/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/truncated/tests/__init__.py


--------------------------------------------------------------------------------
/selectinf/truncated/tests/test_truncated.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import nose.tools as nt
 3 | import numpy as np
 4 | import numpy.testing.decorators as dec
 5 | 
 6 | from ..gaussian import truncated_gaussian, truncated_gaussian_old
 7 | from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
 8 | from ...tests.flags import SMALL_SAMPLES, SET_SEED
 9 | 
10 | intervals = [(-np.inf,-4.),(3.,np.inf)]
11 | 
12 | tg = truncated_gaussian(intervals)
13 | 
14 | X = np.linspace(-5,5,101)
15 | F = [tg.cdf(x) for x in X]
16 | 
17 | def test_sigma():
18 |     tg2 = truncated_gaussian_old(intervals, scale=2.)
19 |     tg1 = truncated_gaussian_old(np.array(intervals)/2., scale=1.)
20 | 
21 |     Z = 3.5
22 |     nt.assert_equal(np.around(float(tg1.cdf(Z/2.)), 3),
23 |                     np.around(float(tg2.cdf(Z)), 3))
24 |     np.testing.assert_equal(np.around(np.array(2 * tg1.equal_tailed_interval(Z/2,0.05)), 4),
25 |                             np.around(np.array(tg2.equal_tailed_interval(Z,0.05)), 4))
26 | 
27 | @set_seed_iftrue(SET_SEED)
28 | @dec.skipif(True, 'checking coverage: this is random with highish failure rate')
29 | @set_sampling_params_iftrue(SMALL_SAMPLES, nsim=100)
30 | def test_equal_tailed_coverage(nsim=1000):
31 | 
32 |     alpha = 0.25
33 |     tg = truncated_gaussian_old([(2.3,np.inf)], scale=2)
34 |     coverage = 0
35 |     for i in range(nsim):
36 |         while True:
37 |             Z = np.random.standard_normal() * 2
38 |             if Z > 2.3:
39 |                 break
40 |         L, U = tg.equal_tailed_interval(Z, alpha)
41 |         coverage += (U > 0) * (L < 0)
42 |     SE = np.sqrt(alpha*(1-alpha)*nsim)
43 |     print(coverage)
44 |     nt.assert_true(np.fabs(coverage - (1-alpha)*nsim) < 2*SE)
45 | 
46 | @set_seed_iftrue(SET_SEED)
47 | @dec.skipif(True, 'really slow')
48 | @set_sampling_params_iftrue(SMALL_SAMPLES, nsim=100)
49 | def test_UMAU_coverage(nsim=1000):
50 | 
51 |     alpha = 0.25
52 |     tg = truncated_gaussian_old([(2.3,np.inf)], scale=2)
53 |     coverage = 0
54 |     for i in range(nsim):
55 |         while True:
56 |             Z = np.random.standard_normal()*2
57 |             if Z > 2.3:
58 |                 break
59 |         L, U = tg.UMAU_interval(Z, alpha)
60 |         coverage += (U > 0) * (L < 0)
61 |     SE = np.sqrt(alpha*(1-alpha)*nsim)
62 |     print(coverage)
63 |     nt.assert_true(np.fabs(coverage - (1-alpha)*nsim) < 2.1*SE)
64 | 


--------------------------------------------------------------------------------
/selectinf/truncated/tests/test_truncatedFT.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.stats import f as fdist, t as tdist
 3 | 
 4 | from ..F import sf_F
 5 | from ..T import sf_T
 6 | 
 7 | def test_F():
 8 | 
 9 |     f1 = sf_F(3.,20.,1)
10 |     f2 = fdist(3.,20.)
11 | 
12 |     V = np.linspace(1,7,201)
13 |     V1 = [float(f1(v)) for v in V]
14 |     V2 = f2.sf(V)
15 |     np.testing.assert_allclose(V1, V2)
16 | 
17 |     V = np.linspace(1,7,11)
18 |     V1 = [float(f1(u,v)) for u,v in zip(V[:-1],V[1:])]
19 |     V2 = [f2.sf(u)-f2.sf(v) for u,v in zip(V[:-1],V[1:])]
20 |     np.testing.assert_allclose(V1, V2)
21 | 
22 | def test_T():
23 | 
24 |     f1 = sf_T(20.)
25 |     f2 = tdist(20.)
26 | 
27 |     V = np.linspace(-2,3,201)
28 |     V1 = [float(f1(v)) for v in V]
29 |     V2 = f2.sf(V)
30 |     np.testing.assert_allclose(V1, V2)
31 | 
32 |     V = np.linspace(-2,3,11)
33 |     V1 = [float(f1(u,v)) for u,v in zip(V[:-1],V[1:])]
34 |     V2 = [f2.sf(u)-f2.sf(v) for u,v in zip(V[:-1],V[1:])]
35 |     np.testing.assert_allclose(V1, V2)
36 | 


--------------------------------------------------------------------------------
/selectinf/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/utils/__init__.py


--------------------------------------------------------------------------------
/selectinf/utils/tools.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from functools import wraps
 3 | 
 4 | 
 5 | dict_time = dict()
 6 | 
 7 | 
 8 | def timethis(func): 
 9 |     '''
10 |     Decorator that reports the execution time.
11 |     '''
12 |     dict_time[func.__name__] = (0, 0)
13 |     
14 |     @wraps(func)
15 |     def wrapper(*args, **kwargs):
16 |         start = time.time()
17 |         result = func(*args, **kwargs) 
18 |         end = time.time() 
19 |         #print(func.__name__, end-start) 
20 |         
21 |         k, t = dict_time[func.__name__]
22 |         dict_time[func.__name__] = k+1, t + end-start
23 | 
24 |         return result
25 |     return wrapper
26 | 
27 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [versioneer]
2 | VCS = git
3 | style = pep440
4 | versionfile_source = selection/_version.py
5 | tag_prefix =
6 | parentdir_prefix = selection-
7 | 


--------------------------------------------------------------------------------
/tools/build_modref_templates.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
 3 | # vi: set ft=python sts=4 ts=4 sw=4 et:
 4 | """Script to auto-generate our API docs.
 5 | 
 6 | This script should run in Python 2 and Python 3
 7 | """
 8 | # stdlib imports
 9 | import os
10 | 
11 | # local imports
12 | from apigen import ApiDocWriter
13 | 
14 | #*****************************************************************************
15 | if __name__ == '__main__':
16 |     package = 'selectinf'
17 |     outdir = os.path.join('source', 'api', 'generated')
18 |     docwriter = ApiDocWriter(package)
19 |     docwriter.package_skip_patterns += [r'\.fixes$',
20 |                                         r'\.externals$',
21 |                                         #r'\.labs\.viz',
22 |                                         ]
23 |     docwriter.write_api_docs(outdir)
24 |     docwriter.write_index(outdir, 'gen', relative_to=os.path.join('source', 'api'))
25 |     print('%d files written' % len(docwriter.written_modules))
26 | 


--------------------------------------------------------------------------------
/tools/noseall_with_coverage:
--------------------------------------------------------------------------------
1 | #! /usr/bin/python
2 | 
3 | import os
4 | 
5 | os.system("""
6 | env USE_TEST_SEED=1 USE_SMALL_SAMPLES=1 nosetests --with-coverage --cover-package=selection --verbose selection
7 | """)
8 | 


--------------------------------------------------------------------------------
/tools/strip_notebook.py:
--------------------------------------------------------------------------------
 1 | """
 2 | simple example script for running notebooks and saving the resulting notebook.
 3 | 
 4 | Usage: `strip_notebook.py` foo.ipynb [bar.ipynb [...]]`
 5 | 
 6 | Each notebook is stripped of its outputs after checking that it executes.
 7 | Used to clean notebooks before committing to git.
 8 | """
 9 | 
10 | from selection.utils.nbtools import strip_outputs, reads, writes
11 | from argparse import ArgumentParser
12 | 
13 | def main():
14 |     parser = ArgumentParser(
15 |         description='Run cells in notebook and strip outputs.')
16 |     parser.add_argument('--clobber', action='store_true',
17 |                         help='if set, overwrite existing notebook files with stripped version')
18 |     parser.add_argument('--norun', action='store_true',
19 |                         help='if set, do not run cells before stripping')
20 |     parser.add_argument('notebooks',
21 |                         metavar='NB',
22 |                         help='Notebooks to strip outputs from.',
23 |                         nargs='+',
24 |                         type=str)
25 | 
26 |     args = parser.parse_args()
27 | 
28 |     for ipynb in args.notebooks:
29 |         print("running and stripping %s" % ipynb)
30 |         with open(ipynb) as f:
31 |             stripped_nb = strip_outputs(reads(f.read(), 'json'),
32 |                                         run_cells=not args.norun)
33 |         if args.clobber:
34 |             print('clobbering %s' % ipynb)
35 |             with open(ipynb, 'w') as f:
36 |                 f.write(writes(stripped_nb, 'json'))
37 |         else:
38 |             print('not clobbering %s' % ipynb)
39 | 
40 | if __name__ == '__main__':
41 |     main()
42 | 


--------------------------------------------------------------------------------
/umpu/UMAU.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/umpu/UMAU.pdf


--------------------------------------------------------------------------------