├── .coveragerc
├── .gitignore
├── .gitmodules
├── .readthedocs.yml
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.rst
├── THANKS
├── TODO
├── _config.yml
├── appveyor.yml
├── constraints.txt
├── core.py
├── cythexts.py
├── dev-requirements.txt
├── doc-requirements.txt
├── doc
├── Makefile
├── adjusted_MLE
│ ├── __init__.py
│ ├── sampler_based_quantiles.py
│ └── tests
│ │ ├── __init__.py
│ │ ├── comparison_metrics.py
│ │ ├── risk_comparisons.py
│ │ ├── test_compare_sampler_mle.py
│ │ ├── test_cv_MLE_inference.py
│ │ └── test_risk.py
├── examples
│ ├── compute_coverages.rst
│ ├── conditional_sampling.py
│ ├── hiv_approx_ci.py
│ └── power_comparison.py
├── learning_examples
│ ├── BH
│ │ ├── gbm_targets_BH.py
│ │ ├── gbm_targets_BH_larger.py
│ │ ├── gbm_targets_BH_smallB.py
│ │ ├── keras_targets_BH.py
│ │ ├── keras_targets_BH_marginal.py
│ │ ├── logit_targets_BH.py
│ │ ├── logit_targets_BH_marginal.py
│ │ ├── logit_targets_BH_single.py
│ │ └── random_forest_targets_BH.py
│ ├── HIV
│ │ ├── CV.py
│ │ ├── HIV_scale_CV.py
│ │ ├── NRTI_DATA.txt
│ │ ├── fixed.py
│ │ ├── lambda_1se.py
│ │ ├── stability_CV.py
│ │ ├── stability_CV_6000.py
│ │ ├── stability_CV_6000_null.py
│ │ └── stability_selection.py
│ ├── bootstrap
│ │ ├── test_boot.py
│ │ └── test_boot_scale1.py
│ ├── calibration
│ │ └── lasso_calibration.py
│ ├── cross_inference
│ │ └── cross_inference.py
│ ├── keras
│ │ ├── keras_example.py
│ │ ├── keras_targets.py
│ │ ├── keras_targets_BH_strong.py
│ │ ├── keras_targets_BH_weak.py
│ │ ├── keras_targets_medium.py
│ │ └── keras_targets_small.py
│ ├── knockoffs
│ │ ├── knockoff_followup.py
│ │ ├── knockoff_kernel.py
│ │ └── knockoff_kernel_multi.py
│ ├── lasso
│ │ └── lasso_example.py
│ ├── lasso_CV
│ │ ├── followup.py
│ │ ├── lasso_exact_CV_null.py
│ │ └── lasso_example_CV.py
│ ├── multi_target
│ │ ├── additive_targets.py
│ │ ├── additive_targets_small.py
│ │ ├── followup_multi.py
│ │ ├── gbm2.py
│ │ ├── gbm_targets.py
│ │ ├── gbm_targets_small.py
│ │ ├── lasso_example_multi.py
│ │ ├── lasso_example_multi_CV.py
│ │ ├── lasso_example_multi_CV_random.py
│ │ ├── lasso_example_multi_CV_stronger.py
│ │ ├── lasso_example_multi_bigger.py
│ │ ├── lasso_example_multi_gbm.py
│ │ ├── lasso_example_multi_gbm_sk.py
│ │ ├── lasso_example_multi_random.py
│ │ ├── lasso_example_multi_random_gbm.py
│ │ ├── lasso_example_multi_random_rf.py
│ │ ├── lasso_example_multi_rf.py
│ │ ├── lasso_example_multi_rf_sk.py
│ │ ├── lee_multi.py
│ │ ├── lee_multi_500.py
│ │ └── lee_multi_bigger.py
│ ├── parametric
│ │ ├── lasso_selected.py
│ │ ├── lasso_selected_resid.py
│ │ ├── probit_step.py
│ │ └── probit_step_both.py
│ ├── riboflavin
│ │ ├── CV.py
│ │ └── CV_smaller.py
│ ├── stability
│ │ ├── stability_selection.py
│ │ ├── stability_selection_harder.py
│ │ └── stability_selection_harder_big.py
│ └── standalone
│ │ ├── basic_example.py
│ │ ├── cleaner_basic_example.py
│ │ ├── full_model_example.py
│ │ ├── regression_example.py
│ │ └── replicate_basic_example.py
├── license.rst
├── notebooks
│ ├── Group LASSO Jacobian.Rmd
│ ├── Group LASSO Jacobian.ipynb
│ ├── UMPU.ipynb
│ ├── isotonic.ipynb
│ ├── lasso.ipynb
│ ├── learning
│ │ ├── Different pivots.ipynb
│ │ ├── Multiple events in algorithm.ipynb
│ │ ├── Multiple events not monotone.ipynb
│ │ ├── Multiple randomization with fitting.ipynb
│ │ ├── Multiple randomization with fitting_boot.ipynb
│ │ ├── Multiple randomization.ipynb
│ │ ├── Non convex region II.ipynb
│ │ ├── Non convex region.ipynb
│ │ ├── simple_example_pivots.pdf
│ │ └── simple_example_sel_prob.pdf
│ ├── pca_rank1.ipynb
│ ├── quadratic_decisions.ipynb
│ ├── reduced_covtest.ipynb
│ ├── screening.ipynb
│ ├── selection_objects.ipynb
│ └── spacings.ipynb
└── source
│ ├── _static
│ ├── logo.png
│ └── selection.css
│ ├── _templates
│ └── layout.html
│ ├── algorithms
│ ├── covtest.Rmd
│ ├── covtest.ipynb
│ ├── index.rst
│ ├── spacings.rst
│ └── spacings_files
│ │ ├── spacings_23_0.png
│ │ ├── spacings_25_0.png
│ │ ├── spacings_27_0.png
│ │ ├── spacings_29_0.png
│ │ ├── spacings_31_0.png
│ │ ├── spacings_3_0.png
│ │ ├── spacings_4_0.png
│ │ ├── spacings_5_0.png
│ │ ├── spacings_6_0.png
│ │ ├── spacings_7_0.png
│ │ └── spacings_9_0.png
│ ├── conf.py
│ ├── docattribute.rst
│ ├── documentation.rst
│ ├── download.rst
│ ├── index.rst
│ ├── learning
│ ├── Learning1.Rmd
│ ├── Learning1.ipynb
│ ├── Learning2.Rmd
│ ├── Learning2.ipynb
│ └── index.rst
│ ├── license.rst
│ ├── links_names.txt
│ ├── randomized
│ ├── index.rst
│ ├── lasso.Rmd
│ └── lasso.ipynb
│ └── sphinxext
│ └── math_dollar.py
├── figs
├── pictures.r
└── voronoi_figs.py
├── lasso_example_null_CV.py
├── requirements.txt
├── sandbox
├── SPRT.ipynb
├── absurd.py
├── bayesian
│ ├── __init__.py
│ ├── crime_data_attempt.py
│ ├── crime_data_set.py
│ ├── dual_bayesian.py
│ ├── dual_lasso_test.py
│ ├── hiv_inference.py
│ ├── lasso_selection.py
│ ├── logistic_bayesian.py
│ ├── mixed_model.py
│ ├── ms_lasso_2stage.py
│ ├── random_reduced_lasso_bayesian_model.py
│ ├── random_reduced_lasso_test.py
│ ├── random_reduced_logistic_test.py
│ ├── read_file.py
│ ├── reduced_forward_stepwise_test.py
│ ├── reduced_lasso_bayesian_model.py
│ └── reduced_marginal_screening.py
├── inference_hiv_data.py
├── isotonic.py
├── kmeans.py
├── multi_forward_step.py
├── multistep.ipynb
├── randomized2.py
├── randomized_tests
│ ├── test_estimation.py
│ ├── test_greedy_step.py
│ ├── test_marginalize_subgrad.py
│ ├── test_multiple_queries.py
│ ├── test_multiple_queries_CI.py
│ ├── test_nonrandomized.py
│ ├── test_randomization_to_zero.py
│ ├── test_reconstruction.py
│ ├── test_scaling.py
│ ├── test_threshold_score.py
│ └── test_without_screening.py
├── sample_splitting.ipynb
├── sample_splitting.py
├── sample_splitting_alex.py
├── sample_splitting_alex_null.py
├── tensorflow_test.py
├── test_cover.py
├── test_isotonic.py
├── test_variance.py
└── variance_estimation.py
├── selectinf
├── __init__.py
├── _version.py
├── algorithms
│ ├── __init__.py
│ ├── api.py
│ ├── change_point.py
│ ├── covtest.py
│ ├── cv.py
│ ├── cv_glmnet.py
│ ├── debiased_lasso.py
│ ├── debiased_lasso_utils.pyx
│ ├── forward_step.py
│ ├── lasso.py
│ ├── pca.py
│ ├── screening.py
│ ├── softmax.py
│ ├── sqrt_lasso.py
│ ├── stopping_rules.py
│ └── tests
│ │ ├── __init__.py
│ │ ├── test_IC.py
│ │ ├── test_ROSI.py
│ │ ├── test_change_point.py
│ │ ├── test_compareR.py
│ │ ├── test_covtest.py
│ │ ├── test_data_carving.py
│ │ ├── test_debiased_lasso.py
│ │ ├── test_forward_step.py
│ │ ├── test_lasso.py
│ │ ├── test_screening.py
│ │ ├── test_softmax.py
│ │ └── test_sqrt_lasso.py
├── api.py
├── base.py
├── constraints
│ ├── __init__.py
│ ├── affine.py
│ ├── api.py
│ ├── base.py
│ ├── estimation.py
│ ├── intervals.py
│ ├── quadratic.py
│ ├── quasi_affine.py
│ └── tests
│ │ ├── __init__.py
│ │ ├── test_affine.py
│ │ ├── test_estimation.py
│ │ ├── test_quadratic_tests.py
│ │ ├── test_quasi.py
│ │ └── test_unknown_sigma.py
├── distributions
│ ├── __init__.py
│ ├── api.py
│ ├── chain.py
│ ├── chisq.py
│ ├── discrete_family.py
│ ├── discrete_multiparameter.py
│ ├── intervals.py
│ ├── pvalue.py
│ └── tests
│ │ ├── __init__.py
│ │ ├── test_chains.py
│ │ ├── test_discreteExFam.py
│ │ └── test_multiparameter.py
├── glm.py
├── info.py
├── learning
│ ├── Rfitters.py
│ ├── Rutils.py
│ ├── __init__.py
│ ├── core.py
│ ├── fitters.py
│ ├── keras_fit.py
│ ├── learners.py
│ ├── samplers.py
│ └── utils.py
├── randomized
│ ├── __init__.py
│ ├── api.py
│ ├── cv_view.py
│ ├── group_lasso.py
│ ├── lasso.py
│ ├── modelQ.py
│ ├── query.py
│ ├── randomization.py
│ ├── sandbox
│ │ ├── M_estimator_group_lasso.py
│ │ ├── M_estimator_nonrandom.py
│ │ ├── convenience.py
│ │ ├── general_lasso.py
│ │ ├── greedy_step.py
│ │ ├── group_lasso.py
│ │ └── lasso_iv.py
│ ├── screening.py
│ ├── selective_MLE_utils.pyx
│ ├── slope.py
│ └── tests
│ │ ├── __init__.py
│ │ ├── sandbox
│ │ ├── test_Mest.py
│ │ ├── test_convenience.py
│ │ ├── test_cv.py
│ │ ├── test_cv_corrected_nonrandomized_lasso.py
│ │ ├── test_cv_glmnet.py
│ │ ├── test_cv_lee_et_al.py
│ │ ├── test_decompose_subgrad.py
│ │ ├── test_fixedX.py
│ │ ├── test_full_lasso.py
│ │ ├── test_general_lasso.py
│ │ ├── test_general_lasso_pval.py
│ │ ├── test_intervals.py
│ │ ├── test_lasso_iv.py
│ │ ├── test_multiple_splits.py
│ │ ├── test_opt_weighted_intervals.py
│ │ ├── test_optimization_sampler.py
│ │ ├── test_sampling.py
│ │ ├── test_split.py
│ │ ├── test_split_compare.py
│ │ └── test_sqrt_lasso.py
│ │ ├── test_BH.py
│ │ ├── test_group_lasso.py
│ │ ├── test_lasso.py
│ │ ├── test_marginal_screening.py
│ │ ├── test_modelQ.py
│ │ ├── test_multiple_queries.py
│ │ ├── test_naive.py
│ │ ├── test_randomization.py
│ │ ├── test_selective_MLE.py
│ │ ├── test_selective_MLE_high.py
│ │ ├── test_selective_MLE_onedim.py
│ │ ├── test_slope.py
│ │ ├── test_slope_subgrad.py
│ │ ├── test_split_lasso.py
│ │ └── test_topK.py
├── reduced_optimization
│ └── tests
│ │ └── __init__.py
├── sampling
│ ├── __init__.py
│ ├── api.py
│ ├── langevin.py
│ ├── sequential.py
│ ├── sqrt_lasso.pyx
│ ├── tests
│ │ ├── __init__.py
│ │ ├── plots_fs.py
│ │ ├── test_fstep_langevin.py
│ │ ├── test_kfstep.py
│ │ ├── test_pca_langevin.py
│ │ ├── test_sample_sphere.py
│ │ └── test_sequential.py
│ ├── truncnorm.pyx
│ └── truncnorm_quadratic.pyx
├── sandbox
│ ├── approx_ci
│ │ ├── __init__.py
│ │ ├── ci_approx_density.py
│ │ ├── ci_approx_greedy_step.py
│ │ ├── selection_map.py
│ │ └── tests
│ │ │ ├── __init__.py
│ │ │ ├── test_glm.py
│ │ │ ├── test_greedy_step.py
│ │ │ └── test_threshold_score.py
│ └── bayesian
│ │ ├── __init__.py
│ │ ├── barrier.py
│ │ ├── credible_intervals.py
│ │ ├── dual_lasso.py
│ │ ├── estimator.py
│ │ ├── forward_stepwise_reduced.py
│ │ ├── initial_soln.py
│ │ ├── lasso_reduced.py
│ │ ├── marginal_screening_reduced.py
│ │ ├── ms_lasso_2stage_reduced.py
│ │ ├── par_carved_reduced.py
│ │ ├── par_random_lasso_reduced.py
│ │ ├── random_lasso_reduced.py
│ │ └── tests
│ │ ├── __init__.py
│ │ ├── test_carved_lasso.py
│ │ ├── test_dual_lasso.py
│ │ ├── test_fs.py
│ │ ├── test_lasso.py
│ │ └── test_ms_lasso_2stage.py
├── src_C
│ ├── #sample_preparation.pyx#
│ ├── HmcSampler.cpp
│ ├── HmcSampler.h
│ ├── logfile.txt
│ ├── preparation_Eig_Vect.cpp
│ ├── preparation_Eig_Vect.h
│ ├── sample_preparation.cpp
│ ├── sample_preparation.pyx
│ └── setup.py
├── tests
│ ├── __init__.py
│ ├── decorators.py
│ ├── flags.py
│ ├── instance.py
│ ├── test_instance.py
│ └── tests.py
├── truncated
│ ├── F.py
│ ├── T.py
│ ├── __init__.py
│ ├── api.py
│ ├── base.py
│ ├── chi.py
│ ├── gaussian.py
│ └── tests
│ │ ├── __init__.py
│ │ ├── test_truncated.py
│ │ └── test_truncatedFT.py
└── utils
│ ├── __init__.py
│ └── tools.py
├── setup.cfg
├── setup.py
├── setup_helpers.py
├── tools
├── apigen.py
├── build_modref_templates.py
├── gitwash_dumper.py
├── nbtools.py
├── noseall_with_coverage
└── strip_notebook.py
├── umpu
├── UMAU.pdf
├── umpu.r
└── umpuWriteup.tex
└── versioneer.py
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | branch = True
3 | source = selection
4 | include = */selection/*
5 | omit =
6 | */setup.py
7 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | */*pyc
2 | */*~
3 | */*/*~
4 | */*/*/*~
5 | */*.out
6 | */*.aux
7 | */*.bbl
8 | */*.blg
9 | */*.vrb
10 | */*.synctex*
11 | */*.toc
12 | */*.snm
13 | */*.odt
14 | */*.ps
15 | */*.eps
16 | */*.dvi
17 | */*.log
18 | */*.nav
19 | */*.bak
20 | */*.vrb
21 | */*.pyc
22 | */*/*.pyc
23 | *.pyc
24 | selectinf/*.so
25 | selectinf/*.c
26 | selectinf/*/*.so
27 | selectinf/*/*.c
28 | build
29 | *ipynb_checkpoints
30 | */*ipynb_checkpoints
31 | .idea/*
32 | */.idea/*
33 | */*/.idea/*
34 | *.log
35 | *~
36 | .*sw*
37 | */*~
38 | *pyc
39 | */*pyc
40 | *.pdf
41 | *.csv
42 | doc/source/api/generated/*
43 | docs/source/api/generated/*
44 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "travis-tools"]
2 | path = travis-tools
3 | url = https://github.com/matthew-brett/travis-tools.git
4 | [submodule "C-software"]
5 | path = C-software
6 | url = https://github.com/selective-inference/C-software.git
7 |
8 |
--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | # .readthedocs.yml
2 | # Read the Docs configuration file
3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4 |
5 | # Required
6 | version: 2
7 |
8 | # Build documentation in the docs/ directory with Sphinx
9 | sphinx:
10 | builder: html
11 | configuration: doc/source/conf.py
12 |
13 | # Build documentation with MkDocs
14 | #mkdocs:
15 | # configuration: mkdocs.yml
16 |
17 | # Optionally build your docs in additional formats such as PDF and ePub
18 | #formats: all
19 |
20 | # Optionally set the version of Python and requirements required to build your docs
21 | python:
22 | version: 3.6
23 | install:
24 | - requirements: requirements.txt
25 | - requirements: doc-requirements.txt
26 | - method: setuptools
27 | path: .
28 |
29 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2015, Selective Inference development team
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are
6 | met:
7 |
8 | * Redistributions of source code must retain the above copyright
9 | notice, this list of conditions and the following disclaimer.
10 |
11 | * Redistributions in binary form must reproduce the above
12 | copyright notice, this list of conditions and the following
13 | disclaimer in the documentation and/or other materials provided
14 | with the distribution.
15 |
16 | * The names of any contributors to this software
17 | may not be used to endorse or promote products derived
18 | from this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include AUTHOR LICENSE Makefile* MANIFEST.in setup* README.*
2 | include Changelog TODO
3 | recursive-include doc *
4 | recursive-include tools *
5 | # setup utilities
6 | include setup_helpers.py
7 | include cythexts.py
8 | recursive-include fake_pyrex *
9 | include versioneer.py
10 | include selection/_version.py
11 | include C-software/src/*.h
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 |
2 | The selection project
3 | =====================
4 |
5 | This project contains software for selective inference, with emphasis on
6 | selective inference in regression.
7 |
8 | Some key references
9 | -------------------
10 |
11 | - ``A significance test for the lasso``: http://arxiv.org/abs/1301.7161
12 | - ``Tests in adaptive regression via the Kac-Rice formula``:
13 | http://arxiv.org/abs/1308.3020
14 | - ``Post-selection adaptive inference for Least Angle Regression and the Lasso``:
15 | http://arxiv.org/abs/1401.3889
16 | - ``Exact post-selection inference with the lasso``:
17 | http://arxiv.org/abs/1311.6238
18 | - ``Exact Post Model Selection Inference for Marginal Screening``:
19 | http://arxiv.org/abs/1402.5596
20 |
21 | Install
22 | -------
23 |
24 | .. code:: python
25 |
26 | git submodule init # travis_tools and C-software
27 | git submodule update
28 | pip install -r requirements.txt
29 | python setup.py install
30 |
31 | Potential speedups
32 | ------------------
33 |
34 | - We can condition on “parts” of each draw of the sampler, in
35 | particular if we condition on the projection of the rejection
36 | ``sample - center`` onto direction then resampling on the ray can be
37 | sped up for some things like LASSO. Could be some cost in power.
38 |
39 | - Learning a higher dimensional function can perhaps save some time –
40 | proper conditioning has to be checked.
41 |
42 |
--------------------------------------------------------------------------------
/THANKS:
--------------------------------------------------------------------------------
1 | Selective Inference Team
2 | ------------------------
3 |
4 | Contributors to this project include:
5 |
6 | Yuval Benjamini
7 | Leonard Blier
8 | Will Fithian
9 | Jason Lee
10 | Joshua Loftus
11 | Stephen Reid
12 | Dennis Sun
13 | Yuekai Sun
14 | Jonathan Taylor
15 | Xiaoying Tian
16 | Ryan Tibshirani
17 | Robert Tibshirani
18 |
19 |
20 |
--------------------------------------------------------------------------------
/TODO:
--------------------------------------------------------------------------------
1 | - Marginalize group LASSO
2 | - SLOPE: randomized and non-randomized
3 | - selective debiased LASSO
4 | - randomized sqrt LASSO
5 | - alternate randomization
6 | - user's choice of model for non-randomized
--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-slate
--------------------------------------------------------------------------------
/constraints.txt:
--------------------------------------------------------------------------------
1 | rpy2<2.9
2 |
--------------------------------------------------------------------------------
/dev-requirements.txt:
--------------------------------------------------------------------------------
1 | # Requirements for developing regreg
2 | # Check these dependencies against regreg/info.py
3 | -r requirements.txt
4 | nose
5 |
--------------------------------------------------------------------------------
/doc-requirements.txt:
--------------------------------------------------------------------------------
1 | # Requirements for building docs
2 | # Check these dependencies against doc/conf.py
3 | -r dev-requirements.txt
4 | sphinx>=1.4
5 | numpydoc
6 | matplotlib
7 | texext
8 | nb2plots
9 | rpy2
10 | seaborn
11 | statsmodels
12 | tensorflow
13 | keras
14 | nbsphinx
15 |
--------------------------------------------------------------------------------
/doc/adjusted_MLE/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/adjusted_MLE/__init__.py
--------------------------------------------------------------------------------
/doc/adjusted_MLE/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/adjusted_MLE/tests/__init__.py
--------------------------------------------------------------------------------
/doc/adjusted_MLE/tests/test_risk.py:
--------------------------------------------------------------------------------
1 | import numpy as np, os, itertools
2 | import pandas as pd
3 |
4 | from rpy2 import robjects
5 | import rpy2.robjects.numpy2ri
6 | rpy2.robjects.numpy2ri.activate()
7 | import rpy2.robjects.pandas2ri
8 | from rpy2.robjects.packages import importr
9 |
10 | from .comparison_metrics import (sim_xy,
11 | glmnet_lasso,
12 | relative_risk)
13 | from .risk_comparisons import risk_comparison
14 |
15 | def output_file(n=200,
16 | p=500,
17 | rho=0.35,
18 | s=5,
19 | beta_type=1,
20 | snr_values=np.array([0.10, 0.15, 0.20, 0.25, 0.30,
21 | 0.35, 0.42, 0.71, 1.22, 2.07]),
22 | tuning_nonrand="lambda.1se",
23 | tuning_rand="lambda.1se",
24 | randomizing_scale=np.sqrt(0.50),
25 | ndraw=50,
26 | outpath = None):
27 |
28 | df_risk = pd.DataFrame()
29 | if n > p:
30 | full_dispersion = True
31 | else:
32 | full_dispersion = False
33 |
34 | snr_list = []
35 | for snr in snr_values:
36 | snr_list.append(snr)
37 | relative_risk = np.squeeze(risk_comparison(n=n,
38 | p=p,
39 | nval=n,
40 | rho=rho,
41 | s=s,
42 | beta_type=beta_type,
43 | snr=snr,
44 | randomizer_scale=randomizing_scale,
45 | full_dispersion=full_dispersion,
46 | tuning_nonrand =tuning_nonrand,
47 | tuning_rand=tuning_rand, ndraw = ndraw))
48 |
49 | df_risk = df_risk.append(pd.DataFrame(data=relative_risk.reshape((1, 6)), columns=['sel-MLE', 'ind-est', 'rand-LASSO',
50 | 'rel-rand-LASSO', 'rel-LASSO','LASSO']), ignore_index=True)
51 |
52 | df_risk['n'] = n
53 | df_risk['p'] = p
54 | df_risk['s'] = s
55 | df_risk['rho'] = rho
56 | df_risk['beta-type'] = beta_type
57 | df_risk['snr'] = pd.Series(np.asarray(snr_list))
58 | df_risk['target'] = "selected"
59 |
60 | if outpath is None:
61 | outpath = os.path.dirname(__file__)
62 |
63 | outfile_risk_csv = os.path.join(outpath, "dims_" + str(n) + "_" + str(p) + "_risk_betatype" + str(beta_type) + "_rho_" + str(rho) + ".csv")
64 | outfile_risk_html = os.path.join(outpath, "dims_" + str(n) + "_" + str(p) + "_risk_betatype" + str(beta_type) + "_rho_" + str(rho) + ".html")
65 | df_risk.to_csv(outfile_risk_csv, index=False)
66 | df_risk.to_html(outfile_risk_html)
67 |
68 |
--------------------------------------------------------------------------------
/doc/examples/conditional_sampling.py:
--------------------------------------------------------------------------------
1 | """
2 | We demonstrate that our optimization variables have
3 | the correct distribution given the data.
4 | """
5 |
6 | import numpy as np
7 | import matplotlib.pyplot as plt
8 | from statsmodels.distributions import ECDF
9 |
10 | from selection.randomized.tests.test_sampling import test_conditional_law
11 |
12 | def main(ndraw=50000, burnin=5000, remove_atom=False, unpenalized=True, stepsize=1.e-2):
13 |
14 | fig_idx = 0
15 | for (rand,
16 | mcmc_opt,
17 | mcmc_omega,
18 | truncated_opt,
19 | truncated_omega) in test_conditional_law(ndraw=ndraw, burnin=burnin, stepsize=stepsize, unpenalized=unpenalized):
20 |
21 | fig_idx += 1
22 | fig = plt.figure(num=fig_idx, figsize=(8,8))
23 |
24 | plt.clf()
25 | idx = 0
26 | for i in range(mcmc_opt.shape[1]):
27 | plt.subplot(3,3,idx+1)
28 |
29 | mcmc_ = mcmc_opt[:, i]
30 | truncated_ = truncated_opt[:, i]
31 |
32 | xval = np.linspace(min(mcmc_.min(), truncated_.min()),
33 | max(mcmc_.max(), truncated_.max()),
34 | 200)
35 |
36 | if remove_atom:
37 | mcmc_ = mcmc_[mcmc_ < np.max(mcmc_)]
38 | mcmc_ = mcmc_[mcmc_ > np.min(mcmc_)]
39 |
40 | plt.plot(xval, ECDF(mcmc_)(xval), label='MCMC')
41 | plt.plot(xval, ECDF(truncated_)(xval), label='truncated')
42 | idx += 1
43 | if idx == 1:
44 | plt.legend(loc='lower right')
45 |
46 | fig.suptitle(' '.join([rand, "opt"]))
47 |
48 | fig_idx += 1
49 | fig = plt.figure(num=fig_idx, figsize=(8,8))
50 | plt.clf()
51 | idx = 0
52 | for i in range(mcmc_opt.shape[1]):
53 | plt.subplot(3,3,idx+1)
54 |
55 | mcmc_ = mcmc_omega[:, i]
56 | truncated_ = truncated_omega[:, i]
57 |
58 | xval = np.linspace(min(mcmc_.min(), truncated_.min()),
59 | max(mcmc_.max(), truncated_.max()),
60 | 200)
61 |
62 | if remove_atom:
63 | mcmc_ = mcmc_[mcmc_ < np.max(mcmc_)]
64 | mcmc_ = mcmc_[mcmc_ > np.min(mcmc_)]
65 | plt.plot(xval, ECDF(mcmc_)(xval), label='MCMC')
66 | plt.plot(xval, ECDF(truncated_)(xval), label='truncated')
67 | idx += 1
68 | if idx == 1:
69 | plt.legend(loc='lower right')
70 |
71 | fig.suptitle(' '.join([rand, "omega"]))
72 | plt.show()
73 |
74 |
75 |
76 |
77 |
78 |
--------------------------------------------------------------------------------
/doc/learning_examples/calibration/lasso_calibration.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import numpy as np
4 | from scipy.stats import norm as ndist
5 |
6 | import regreg.api as rr
7 |
8 | from selection.tests.instance import gaussian_instance
9 |
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import normal_sampler, logit_fit
12 |
13 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000):
14 |
15 | # description of statistical problem
16 |
17 | X, y, truth = gaussian_instance(n=n,
18 | p=p,
19 | s=s,
20 | equicorrelated=False,
21 | rho=0.5,
22 | sigma=sigma,
23 | signal=signal,
24 | random_signs=True,
25 | scale=False)[:3]
26 |
27 | dispersion = sigma**2
28 |
29 | S = X.T.dot(y)
30 | covS = dispersion * X.T.dot(X)
31 | smooth_sampler = normal_sampler(S, covS)
32 |
33 | def meta_algorithm(XTX, XTXi, lam, sampler):
34 |
35 | p = XTX.shape[0]
36 | success = np.zeros(p)
37 |
38 | loss = rr.quadratic_loss((p,), Q=XTX)
39 | pen = rr.l1norm(p, lagrange=lam)
40 |
41 | scale = 0.
42 | noisy_S = sampler(scale=scale)
43 | loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
44 | problem = rr.simple_problem(loss, pen)
45 | soln = problem.solve(max_its=50, tol=1.e-6)
46 | success += soln != 0
47 | return set(np.nonzero(success)[0])
48 |
49 | XTX = X.T.dot(X)
50 | XTXi = np.linalg.inv(XTX)
51 | resid = y - X.dot(XTXi.dot(X.T.dot(y)))
52 | dispersion = np.linalg.norm(resid)**2 / (n-p)
53 |
54 | lam = 4. * np.sqrt(n)
55 | selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
56 |
57 | # run selection algorithm
58 |
59 |
60 | return full_model_inference(X,
61 | y,
62 | truth,
63 | selection_algorithm,
64 | smooth_sampler,
65 | success_params=(1, 1),
66 | B=B,
67 | fit_probability=logit_fit,
68 | fit_args={'df':20})
69 |
70 | if __name__ == "__main__":
71 | import statsmodels.api as sm
72 | import matplotlib.pyplot as plt
73 | import pandas as pd
74 |
75 | csvfile = 'lasso_calibration.csv'
76 | outbase = csvfile[:-4]
77 |
78 | for i in range(2000):
79 | for B in np.random.choice([50, 100, 500, 1000, 1500, 2000], 1, replace=True):
80 | df = simulate(B=B)
81 |
82 | if df is not None and i > 0:
83 |
84 | try: # concatenate to disk
85 | df = pd.concat([df, pd.read_csv(csvfile)])
86 | except FileNotFoundError:
87 | pass
88 | df.to_csv(csvfile, index=False)
89 |
90 | if len(df['pivot']) > 0:
91 | pivot_ax, length_ax = pivot_plot(df, outbase)
92 |
93 |
--------------------------------------------------------------------------------
/doc/learning_examples/cross_inference/cross_inference.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from selection.learning.core import cross_inference
4 | from selection.learning.keras_fit import keras_fit
5 |
6 | data = np.load('lasso_multi_learning.npz')
7 | learning_data = (data['T'][:2000], data['Y'][:2000])
8 |
9 | result = cross_inference(learning_data,
10 | data['nuisance'],
11 | data['direction'],
12 | keras_fit,
13 | fit_args={'epochs':3, 'sizes':[10]*5, 'dropout':0., 'activation':'relu'})
14 |
--------------------------------------------------------------------------------
/doc/learning_examples/keras/keras_targets.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import numpy as np
4 | from scipy.stats import norm as ndist
5 |
6 | import regreg.api as rr
7 |
8 | from selection.tests.instance import gaussian_instance
9 |
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import split_sampler, keras_fit
12 | from selection.learning.learners import mixture_learner
13 | mixture_learner.scales = [1]*10 + [1.5,2,3,4,5,10]
14 |
15 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000):
16 |
17 | # description of statistical problem
18 |
19 | X, y, truth = gaussian_instance(n=n,
20 | p=p,
21 | s=s,
22 | equicorrelated=False,
23 | rho=0.5,
24 | sigma=sigma,
25 | signal=signal,
26 | random_signs=True,
27 | scale=False)[:3]
28 |
29 | XTX = X.T.dot(X)
30 | XTXi = np.linalg.inv(XTX)
31 | resid = y - X.dot(XTXi.dot(X.T.dot(y)))
32 | dispersion = np.linalg.norm(resid)**2 / (n-p)
33 |
34 | S = X.T.dot(y)
35 | covS = dispersion * X.T.dot(X)
36 | splitting_sampler = split_sampler(X * y[:, None], covS)
37 |
38 | def meta_algorithm(XTX, XTXi, dispersion, lam, sampler):
39 |
40 | p = XTX.shape[0]
41 | success = np.zeros(p)
42 |
43 | loss = rr.quadratic_loss((p,), Q=XTX)
44 | pen = rr.l1norm(p, lagrange=lam)
45 |
46 | scale = 0.5
47 | noisy_S = sampler(scale=scale)
48 | soln = XTXi.dot(noisy_S)
49 | solnZ = soln / (np.sqrt(np.diag(XTXi)) * np.sqrt(dispersion))
50 | return set(np.nonzero(np.fabs(solnZ) > 2.1)[0])
51 |
52 | lam = 4. * np.sqrt(n)
53 | selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, dispersion, lam)
54 |
55 | # run selection algorithm
56 |
57 | return full_model_inference(X,
58 | y,
59 | truth,
60 | selection_algorithm,
61 | splitting_sampler,
62 | success_params=(5, 7),
63 | B=B,
64 | fit_probability=keras_fit,
65 | fit_args={'epochs':30, 'sizes':[100, 100], 'activation':'relu'})
66 |
67 |
68 | if __name__ == "__main__":
69 | import statsmodels.api as sm
70 | import matplotlib.pyplot as plt
71 | import pandas as pd
72 |
73 | for i in range(500):
74 | df = simulate(B=10000)
75 | csvfile = 'keras_targets.csv'
76 | outbase = csvfile[:-4]
77 |
78 | if df is not None and i > 0:
79 |
80 | try: # concatenate to disk
81 | df = pd.concat([df, pd.read_csv(csvfile)])
82 | except FileNotFoundError:
83 | pass
84 | df.to_csv(csvfile, index=False)
85 |
86 | if len(df['pivot']) > 0:
87 | pivot_ax, length_ax = pivot_plot(df, outbase)
88 |
89 |
90 |
--------------------------------------------------------------------------------
/doc/learning_examples/keras/keras_targets_medium.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import numpy as np
4 | from scipy.stats import norm as ndist
5 |
6 | import regreg.api as rr
7 |
8 | from selection.tests.instance import gaussian_instance
9 |
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import split_sampler, keras_fit
12 | from selection.learning.learners import mixture_learner
13 | mixture_learner.scales = [1]*10 + [1.5,2,3,4,5,10]
14 |
15 | def simulate(n=200, p=50, s=5, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000):
16 |
17 | # description of statistical problem
18 |
19 | X, y, truth = gaussian_instance(n=n,
20 | p=p,
21 | s=s,
22 | equicorrelated=False,
23 | rho=0.5,
24 | sigma=sigma,
25 | signal=signal,
26 | random_signs=True,
27 | scale=False)[:3]
28 |
29 | XTX = X.T.dot(X)
30 | XTXi = np.linalg.inv(XTX)
31 | resid = y - X.dot(XTXi.dot(X.T.dot(y)))
32 | dispersion = np.linalg.norm(resid)**2 / (n-p)
33 |
34 | S = X.T.dot(y)
35 | covS = dispersion * X.T.dot(X)
36 | splitting_sampler = split_sampler(X * y[:, None], covS)
37 |
38 | def meta_algorithm(XTX, XTXi, dispersion, lam, sampler):
39 |
40 | p = XTX.shape[0]
41 | success = np.zeros(p)
42 |
43 | loss = rr.quadratic_loss((p,), Q=XTX)
44 | pen = rr.l1norm(p, lagrange=lam)
45 |
46 | scale = 0.5
47 | noisy_S = sampler(scale=scale)
48 | soln = XTXi.dot(noisy_S)
49 | solnZ = soln / (np.sqrt(np.diag(XTXi)) * np.sqrt(dispersion))
50 | return set(np.nonzero(np.fabs(solnZ) > 2.1)[0])
51 |
52 | lam = 4. * np.sqrt(n)
53 | selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, dispersion, lam)
54 |
55 | # run selection algorithm
56 |
57 | return full_model_inference(X,
58 | y,
59 | truth,
60 | selection_algorithm,
61 | splitting_sampler,
62 | success_params=(5, 7),
63 | B=B,
64 | fit_probability=keras_fit,
65 | fit_args={'epochs':30, 'sizes':[100, 100], 'activation':'relu'})
66 |
67 |
68 | if __name__ == "__main__":
69 | import statsmodels.api as sm
70 | import matplotlib.pyplot as plt
71 | import pandas as pd
72 |
73 | for i in range(500):
74 | df = simulate(B=10000)
75 | csvfile = 'keras_targets_medium.csv'
76 | outbase = csvfile[:-4]
77 |
78 | if df is not None and i > 0:
79 |
80 | try: # concatenate to disk
81 | df = pd.concat([df, pd.read_csv(csvfile)])
82 | except FileNotFoundError:
83 | pass
84 | df.to_csv(csvfile, index=False)
85 |
86 | if len(df['pivot']) > 0:
87 | pivot_ax, length_ax = pivot_plot(df, outbase)
88 |
--------------------------------------------------------------------------------
/doc/learning_examples/keras/keras_targets_small.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import numpy as np
4 | from scipy.stats import norm as ndist
5 |
6 | import regreg.api as rr
7 |
8 | from selection.tests.instance import gaussian_instance
9 |
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import split_sampler, keras_fit
12 | from selection.learning.learners import mixture_learner
13 | mixture_learner.scales = [1]*10 + [1.5,2,3,4,5,10]
14 |
15 | def simulate(n=100, p=10, s=5, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000):
16 |
17 | # description of statistical problem
18 |
19 | X, y, truth = gaussian_instance(n=n,
20 | p=p,
21 | s=s,
22 | equicorrelated=False,
23 | rho=0.5,
24 | sigma=sigma,
25 | signal=signal,
26 | random_signs=True,
27 | scale=False)[:3]
28 |
29 | XTX = X.T.dot(X)
30 | XTXi = np.linalg.inv(XTX)
31 | resid = y - X.dot(XTXi.dot(X.T.dot(y)))
32 | dispersion = np.linalg.norm(resid)**2 / (n-p)
33 |
34 | S = X.T.dot(y)
35 | covS = dispersion * X.T.dot(X)
36 | splitting_sampler = split_sampler(X * y[:, None], covS)
37 |
38 | def meta_algorithm(XTX, XTXi, dispersion, lam, sampler):
39 |
40 | p = XTX.shape[0]
41 | success = np.zeros(p)
42 |
43 | loss = rr.quadratic_loss((p,), Q=XTX)
44 | pen = rr.l1norm(p, lagrange=lam)
45 |
46 | scale = 0.5
47 | noisy_S = sampler(scale=scale)
48 | soln = XTXi.dot(noisy_S)
49 | solnZ = soln / (np.sqrt(np.diag(XTXi)) * np.sqrt(dispersion))
50 | return set(np.nonzero(np.fabs(solnZ) > 2.1)[0])
51 |
52 | lam = 4. * np.sqrt(n)
53 | selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, dispersion, lam)
54 |
55 | # run selection algorithm
56 |
57 | return full_model_inference(X,
58 | y,
59 | truth,
60 | selection_algorithm,
61 | splitting_sampler,
62 | success_params=(5, 7),
63 | B=B,
64 | fit_probability=keras_fit,
65 | fit_args={'epochs':30, 'sizes':[100, 100], 'activation':'relu'})
66 |
67 |
68 | if __name__ == "__main__":
69 | import statsmodels.api as sm
70 | import matplotlib.pyplot as plt
71 | import pandas as pd
72 |
73 | for i in range(500):
74 | df = simulate(B=10000)
75 | csvfile = 'keras_targets_small.csv'
76 | outbase = csvfile[:-4]
77 |
78 | if df is not None and i > 0:
79 |
80 | try: # concatenate to disk
81 | df = pd.concat([df, pd.read_csv(csvfile)])
82 | except FileNotFoundError:
83 | pass
84 | df.to_csv(csvfile, index=False)
85 |
86 | if len(df['pivot']) > 0:
87 | pivot_ax, length_ax = pivot_plot(df, outbase)
88 |
--------------------------------------------------------------------------------
/doc/learning_examples/knockoffs/knockoff_kernel.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import numpy as np
4 | from scipy.stats import norm as ndist
5 |
6 | import regreg.api as rr
7 |
8 | from selection.tests.instance import gaussian_instance
9 |
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import normal_sampler, logit_fit
12 |
13 | def simulate(n=1000, p=50, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, seed=0, B=1000):
14 |
15 | # description of statistical problem
16 |
17 | np.random.seed(seed)
18 | X, y, truth = gaussian_instance(n=n,
19 | p=p,
20 | s=s,
21 | equicorrelated=False,
22 | rho=0.5,
23 | sigma=sigma,
24 | signal=signal,
25 | random_signs=True,
26 | scale=False,
27 | center=False)[:3]
28 |
29 | dispersion = sigma**2
30 |
31 | S = X.T.dot(y)
32 | covS = dispersion * X.T.dot(X)
33 | smooth_sampler = normal_sampler(S, covS)
34 |
35 | def meta_algorithm(X, XTXi, resid, sampler):
36 |
37 | n, p = X.shape
38 |
39 | rho = 0.8
40 | S = sampler(scale=0.) # deterministic with scale=0
41 | ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X
42 | Xnew = rho * X + np.sqrt(1 - rho**2) * np.random.standard_normal(X.shape)
43 |
44 | X_full = np.hstack([X, Xnew])
45 | beta_full = np.linalg.pinv(X_full).dot(ynew)
46 | winners = np.fabs(beta_full)[:p] > np.fabs(beta_full)[p:]
47 | return set(np.nonzero(winners)[0])
48 |
49 | XTX = X.T.dot(X)
50 | XTXi = np.linalg.inv(XTX)
51 | resid = y - X.dot(XTXi.dot(X.T.dot(y)))
52 | dispersion = np.linalg.norm(resid)**2 / (n-p)
53 |
54 | selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid)
55 |
56 | # run selection algorithm
57 |
58 | return full_model_inference(X,
59 | y,
60 | truth,
61 | selection_algorithm,
62 | smooth_sampler,
63 | success_params=(8, 10),
64 | B=B,
65 | fit_probability=logit_fit,
66 | fit_args={'df':20},
67 | how_many=1)
68 |
69 | if __name__ == "__main__":
70 | import statsmodels.api as sm
71 | import matplotlib.pyplot as plt
72 | import pandas as pd
73 |
74 | iseed = int(np.fabs(np.random.standard_normal() * 50000))
75 | for i in range(500):
76 | df = simulate(seed=i + iseed, B=2000)
77 | csvfile = 'knockoff_kernel.csv'
78 | outbase = csvfile[:-4]
79 |
80 | if df is not None and i > 0:
81 |
82 | try: # concatenate to disk
83 | df = pd.concat([df, pd.read_csv(csvfile)])
84 | except FileNotFoundError:
85 | pass
86 | df.to_csv(csvfile, index=False)
87 |
88 | if len(df['pivot']) > 0:
89 | pivot_ax, length_ax = pivot_plot(df, outbase)
90 |
--------------------------------------------------------------------------------
/doc/learning_examples/lasso/lasso_example.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import numpy as np
4 | from scipy.stats import norm as ndist
5 |
6 | import regreg.api as rr
7 |
8 | from selection.tests.instance import gaussian_instance
9 |
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import normal_sampler, logit_fit
12 |
13 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=2000):
14 |
15 | # description of statistical problem
16 |
17 | X, y, truth = gaussian_instance(n=n,
18 | p=p,
19 | s=s,
20 | equicorrelated=False,
21 | rho=0.5,
22 | sigma=sigma,
23 | signal=signal,
24 | random_signs=True,
25 | scale=False)[:3]
26 |
27 | dispersion = sigma**2
28 |
29 | S = X.T.dot(y)
30 | covS = dispersion * X.T.dot(X)
31 | sampler = normal_sampler(S, covS)
32 |
33 | def meta_algorithm(XTX, XTXi, lam, sampler):
34 |
35 | p = XTX.shape[0]
36 | success = np.zeros(p)
37 |
38 | loss = rr.quadratic_loss((p,), Q=XTX)
39 | pen = rr.l1norm(p, lagrange=lam)
40 |
41 | scale = 0.
42 | noisy_S = sampler(scale=scale)
43 | loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
44 | problem = rr.simple_problem(loss, pen)
45 | soln = problem.solve(max_its=100, tol=1.e-10)
46 | success += soln != 0
47 | return set(np.nonzero(success)[0])
48 |
49 | XTX = X.T.dot(X)
50 | XTXi = np.linalg.inv(XTX)
51 | resid = y - X.dot(XTXi.dot(X.T.dot(y)))
52 | dispersion = np.linalg.norm(resid)**2 / (n-p)
53 |
54 | lam = 4. * np.sqrt(n)
55 | selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
56 |
57 | # run selection algorithm
58 |
59 |
60 | return full_model_inference(X,
61 | y,
62 | truth,
63 | selection_algorithm,
64 | sampler,
65 | success_params=(1, 1),
66 | B=B,
67 | fit_probability=logit_fit,
68 | fit_args={'df':20},
69 | how_many=1)
70 |
71 |
72 | if __name__ == "__main__":
73 | import statsmodels.api as sm
74 | import matplotlib.pyplot as plt
75 | import pandas as pd
76 |
77 | for i in range(500):
78 | df = simulate()
79 | csvfile = 'lasso_exact.csv'
80 | outbase = csvfile[:-4]
81 |
82 | if df is not None and i > 0:
83 |
84 | try: # concatenate to disk
85 | df = pd.concat([df, pd.read_csv(csvfile)])
86 | except FileNotFoundError:
87 | pass
88 | df.to_csv(csvfile, index=False)
89 |
90 | if len(df['pivot']) > 0:
91 | pivot_ax, length_ax = pivot_plot(df, outbase)
92 |
93 |
--------------------------------------------------------------------------------
/doc/learning_examples/lasso_CV/lasso_exact_CV_null.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import numpy as np
4 | from scipy.stats import norm as ndist
5 |
6 | import regreg.api as rr
7 |
8 | from selection.tests.instance import gaussian_instance
9 |
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import split_sampler, probit_fit
12 | from selection.learning.Rutils import lasso_glmnet
13 |
14 | def simulate(n=200, p=100, s=10, signal=(0, 0), sigma=2, alpha=0.1):
15 |
16 | # description of statistical problem
17 |
18 | X, y, truth = gaussian_instance(n=n,
19 | p=p,
20 | s=s,
21 | equicorrelated=False,
22 | rho=0.5,
23 | sigma=sigma,
24 | signal=signal,
25 | random_signs=True,
26 | scale=False)[:3]
27 |
28 | XTX = X.T.dot(X)
29 | XTXi = np.linalg.inv(XTX)
30 | resid = y - X.dot(XTXi.dot(X.T.dot(y)))
31 | dispersion = np.linalg.norm(resid)**2 / (n-p)
32 |
33 | S = X.T.dot(y)
34 | covS = dispersion * X.T.dot(X)
35 | splitting_sampler = split_sampler(X * y[:, None], covS)
36 |
37 | def meta_algorithm(X, XTXi, resid, sampler):
38 |
39 | S = sampler(scale=0.) # deterministic with scale=0
40 | ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X
41 | G = lasso_glmnet(X, ynew, *[None]*4)
42 | select = G.select()
43 | return set(list(select[0]))
44 |
45 | selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid)
46 |
47 | # run selection algorithm
48 |
49 | return full_model_inference(X,
50 | y,
51 | truth,
52 | selection_algorithm,
53 | splitting_sampler,
54 | success_params=(1, 1),
55 | B=B,
56 | fit_probability=probit_fit,
57 | fit_args={'df':20},
58 | how_many=1)
59 |
60 | if __name__ == "__main__":
61 | import statsmodels.api as sm
62 | import matplotlib.pyplot as plt
63 | import pandas as pd
64 |
65 | for i in range(500):
66 | df = simulate()
67 | csvfile = 'lasso_exact_CV_null.csv'
68 | outbase = csvfile[:-4]
69 |
70 | if df is not None and i > 0:
71 |
72 | try: # concatenate to disk
73 | df = pd.concat([df, pd.read_csv(csvfile)])
74 | except FileNotFoundError:
75 | pass
76 | df.to_csv(csvfile, index=False)
77 |
78 | if len(df['pivot']) > 0:
79 | pivot_ax, length_ax = pivot_plot(df, outbase)
80 |
--------------------------------------------------------------------------------
/doc/learning_examples/lasso_CV/lasso_example_CV.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import numpy as np
4 | from scipy.stats import norm as ndist
5 |
6 | import regreg.api as rr
7 |
8 | from selection.tests.instance import gaussian_instance
9 |
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import split_sampler, probit_fit
12 | from selection.learning.Rutils import lasso_glmnet
13 |
14 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1):
15 |
16 | # description of statistical problem
17 |
18 | X, y, truth = gaussian_instance(n=n,
19 | p=p,
20 | s=s,
21 | equicorrelated=False,
22 | rho=0.5,
23 | sigma=sigma,
24 | signal=signal,
25 | random_signs=True,
26 | scale=False)[:3]
27 |
28 | dispersion = sigma**2
29 |
30 | S = X.T.dot(y)
31 | covS = dispersion * X.T.dot(X)
32 | splitting_sampler = split_sampler(X * y[:, None], covS)
33 |
34 |
35 | def meta_algorithm(X, XTXi, resid, sampler):
36 |
37 | S = sampler(scale=0.) # deterministic with scale=0
38 | ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X
39 | G = lasso_glmnet(X, ynew, *[None]*4)
40 | select = G.select()
41 | return set(list(select[0]))
42 |
43 | XTX = X.T.dot(X)
44 | XTXi = np.linalg.inv(XTX)
45 | resid = y - X.dot(XTXi.dot(X.T.dot(y)))
46 | dispersion = np.linalg.norm(resid)**2 / (n-p)
47 |
48 | selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid)
49 |
50 | # run selection algorithm
51 |
52 | return full_model_inference(X,
53 | y,
54 | truth,
55 | selection_algorithm,
56 | splitting_sampler,
57 | success_params=(1, 1),
58 | B=B,
59 | fit_probability=probit_fit,
60 | fit_args={'df':20},
61 | how_many=1)
62 |
63 | if __name__ == "__main__":
64 | import statsmodels.api as sm
65 | import matplotlib.pyplot as plt
66 | import pandas as pd
67 |
68 | for i in range(500):
69 | df = simulate()
70 | csvfile = 'lasso_exact_CV.csv'
71 | outbase = csvfile[:-4]
72 |
73 | if df is not None and i > 0:
74 |
75 | try: # concatenate to disk
76 | df = pd.concat([df, pd.read_csv(csvfile)])
77 | except FileNotFoundError:
78 | pass
79 | df.to_csv(csvfile, index=False)
80 |
81 | if len(df['pivot']) > 0:
82 | pivot_ax, length_ax = pivot_plot(df, outbase)
83 |
--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/additive_targets.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import numpy as np
4 | from scipy.stats import norm as ndist
5 |
6 | import regreg.api as rr
7 |
8 | from selection.tests.instance import gaussian_instance
9 |
10 |
11 | from selection.learning.utils import full_model_inference, pivot_plot
12 | from selection.learning.core import split_sampler, logit_fit
13 |
14 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000):
15 |
16 | # description of statistical problem
17 |
18 | X, y, truth = gaussian_instance(n=n,
19 | p=p,
20 | s=s,
21 | equicorrelated=False,
22 | rho=0.5,
23 | sigma=sigma,
24 | signal=signal,
25 | random_signs=True,
26 | scale=False)[:3]
27 |
28 | dispersion = sigma**2
29 |
30 | S = X.T.dot(y)
31 | covS = dispersion * X.T.dot(X)
32 | smooth_sampler = normal_sampler(S, covS)
33 | splitting_sampler = split_sampler(X * y[:, None], covS)
34 |
35 | def meta_algorithm(XTX, XTXi, lam, sampler):
36 |
37 | p = XTX.shape[0]
38 | success = np.zeros(p)
39 |
40 | loss = rr.quadratic_loss((p,), Q=XTX)
41 | pen = rr.l1norm(p, lagrange=lam)
42 |
43 | scale = 0.5
44 | noisy_S = sampler(scale=scale)
45 | loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
46 | problem = rr.simple_problem(loss, pen)
47 | soln = problem.solve(max_its=50, tol=1.e-6)
48 | success += soln != 0
49 | return set(np.nonzero(success)[0])
50 |
51 | XTX = X.T.dot(X)
52 | XTXi = np.linalg.inv(XTX)
53 | resid = y - X.dot(XTXi.dot(X.T.dot(y)))
54 | dispersion = np.linalg.norm(resid)**2 / (n-p)
55 |
56 | lam = 4. * np.sqrt(n)
57 | selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
58 |
59 | # run selection algorithm
60 |
61 | return full_model_inference(X,
62 | y,
63 | truth,
64 | selection_algorithm,
65 | splitting_sampler,
66 | success_params=(1, 1),
67 | B=B,
68 | fit_probability=logit_fit,
69 | fit_args={'df':20})
70 |
71 | if __name__ == "__main__":
72 | import statsmodels.api as sm
73 | import matplotlib.pyplot as plt
74 | import pandas as pd
75 |
76 | U = np.linspace(0, 1, 101)
77 | plt.clf()
78 |
79 | for i in range(500):
80 | for B in [5000]:
81 | print(B)
82 | df = simulate(B=B)
83 | csvfile = 'additive_targets.csv'
84 | outbase = csvfile[:-4]
85 |
86 | if i % 2 == 1 and i > 0:
87 |
88 | try:
89 | df = pd.concat([df, pd.read_csv(csvfile)])
90 | df.to_csv(csvfile, index=False)
91 | except FileNotFoundError:
92 | pass
93 |
94 | if len(df['pivot']) > 0:
95 | pivot_ax, length_ax = pivot_plot(df, outbase)
96 |
97 |
98 |
99 |
--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/additive_targets_small.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import numpy as np
4 | from scipy.stats import norm as ndist
5 |
6 | import regreg.api as rr
7 |
8 | from selection.tests.instance import gaussian_instance
9 |
10 |
11 | from selection.learning.utils import full_model_inference, pivot_plot
12 | from selection.learning.core import split_sampler, logit_fit
13 |
14 | def simulate(n=100, p=30, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000):
15 |
16 | # description of statistical problem
17 |
18 | X, y, truth = gaussian_instance(n=n,
19 | p=p,
20 | s=s,
21 | equicorrelated=False,
22 | rho=0.5,
23 | sigma=sigma,
24 | signal=signal,
25 | random_signs=True,
26 | scale=False)[:3]
27 |
28 | dispersion = sigma**2
29 |
30 | S = X.T.dot(y)
31 | covS = dispersion * X.T.dot(X)
32 | smooth_sampler = normal_sampler(S, covS)
33 | splitting_sampler = split_sampler(X * y[:, None], covS)
34 |
35 | def meta_algorithm(XTX, XTXi, lam, sampler):
36 |
37 | p = XTX.shape[0]
38 | success = np.zeros(p)
39 |
40 | loss = rr.quadratic_loss((p,), Q=XTX)
41 | pen = rr.l1norm(p, lagrange=lam)
42 |
43 | scale = 0.5
44 | noisy_S = sampler(scale=scale)
45 | loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
46 | problem = rr.simple_problem(loss, pen)
47 | soln = problem.solve(max_its=50, tol=1.e-6)
48 | success += soln != 0
49 | return set(np.nonzero(success)[0])
50 |
51 | XTX = X.T.dot(X)
52 | XTXi = np.linalg.inv(XTX)
53 | resid = y - X.dot(XTXi.dot(X.T.dot(y)))
54 | dispersion = np.linalg.norm(resid)**2 / (n-p)
55 |
56 | lam = 4. * np.sqrt(n)
57 | selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
58 |
59 | # run selection algorithm
60 |
61 | return full_model_inference(X,
62 | y,
63 | truth,
64 | selection_algorithm,
65 | splitting_sampler,
66 | success_params=(1, 1),
67 | B=B,
68 | fit_probability=logit_fit,
69 | fit_args={'df':20})
70 |
71 | if __name__ == "__main__":
72 | import statsmodels.api as sm
73 | import matplotlib.pyplot as plt
74 | import pandas as pd
75 |
76 | U = np.linspace(0, 1, 101)
77 | plt.clf()
78 |
79 | for i in range(500):
80 | for B in [5000]:
81 | print(B)
82 | df = simulate(B=B)
83 | csvfile = 'additive_targets_small.csv'
84 | outbase = csvfile[:-4]
85 |
86 | if i % 2 == 1 and i > 0:
87 |
88 | try:
89 | df = pd.concat([df, pd.read_csv(csvfile)])
90 | df.to_csv(csvfile, index=False)
91 | except FileNotFoundError:
92 | pass
93 |
94 | if len(df['pivot']) > 0:
95 | pivot_ax, length_ax = pivot_plot(df, outbase)
96 |
97 |
98 |
99 |
--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/gbm2.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import numpy as np
4 | from scipy.stats import norm as ndist
5 |
6 | import regreg.api as rr
7 |
8 | from selection.tests.instance import gaussian_instance
9 | from selection.algorithms.lasso import ROSI
10 |
11 | from selection.learning.Rutils import lasso_glmnet
12 | from selection.learning.utils import full_model_inference, pivot_plot
13 | from selection.learning.core import normal_sampler, gbm_fit_sk
14 |
15 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000):
16 |
17 | # description of statistical problem
18 |
19 | X, y, truth = gaussian_instance(n=n,
20 | p=p,
21 | s=s,
22 | equicorrelated=False,
23 | rho=0.5,
24 | sigma=sigma,
25 | signal=signal,
26 | random_signs=True,
27 | scale=False)[:3]
28 |
29 | dispersion = sigma**2
30 |
31 | S = X.T.dot(y)
32 | covS = dispersion * X.T.dot(X)
33 | smooth_sampler = normal_sampler(S, covS)
34 |
35 | def meta_algorithm(X, XTXi, resid, sampler):
36 |
37 | S = sampler(scale=0.5) # deterministic with scale=0
38 | ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X
39 | G = lasso_glmnet(X, ynew, *[None]*4)
40 | select = G.select()
41 | return set(list(select[0]))
42 |
43 | XTX = X.T.dot(X)
44 | XTXi = np.linalg.inv(XTX)
45 | resid = y - X.dot(XTXi.dot(X.T.dot(y)))
46 | dispersion = np.linalg.norm(resid)**2 / (n-p)
47 |
48 | selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid)
49 |
50 | # run selection algorithm
51 |
52 | return full_model_inference(X,
53 | y,
54 | truth,
55 | selection_algorithm,
56 | smooth_sampler,
57 | success_params=(1, 1),
58 | B=B,
59 | fit_probability=gbm_fit_sk,
60 | fit_args={'n_estimators':2000})
61 |
62 | if __name__ == "__main__":
63 | import statsmodels.api as sm
64 | import matplotlib.pyplot as plt
65 | import pandas as pd
66 |
67 | U = np.linspace(0, 1, 101)
68 | plt.clf()
69 |
70 | for i in range(500):
71 | df = simulate()
72 | csvfile = 'lasso_multi_CV_random_gbm.csv'
73 | outbase = csvfile[:-4]
74 |
75 | if df is not None and i > 0:
76 |
77 | try:
78 | df = pd.concat([df, pd.read_csv(csvfile)])
79 | except FileNotFoundError:
80 | pass
81 | df.to_csv(csvfile, index=False)
82 |
83 | if len(df['pivot']) > 0:
84 | pivot_plot(df, outbase)
85 |
86 |
--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/gbm_targets.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import numpy as np
4 | from scipy.stats import norm as ndist
5 |
6 | import regreg.api as rr
7 |
8 | from selection.tests.instance import gaussian_instance
9 | from selection.algorithms.lasso import ROSI
10 |
11 | from selection.learning.Rutils import lasso_glmnet
12 | from selection.learning.utils import full_model_inference, pivot_plot
13 | from selection.learning.core import normal_sampler, gbm_fit
14 |
15 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000):
16 |
17 | # description of statistical problem
18 |
19 | X, y, truth = gaussian_instance(n=n,
20 | p=p,
21 | s=s,
22 | equicorrelated=False,
23 | rho=0.5,
24 | sigma=sigma,
25 | signal=signal,
26 | random_signs=True,
27 | scale=False)[:3]
28 |
29 | dispersion = sigma**2
30 |
31 | S = X.T.dot(y)
32 | covS = dispersion * X.T.dot(X)
33 | smooth_sampler = normal_sampler(S, covS)
34 |
35 | def meta_algorithm(XTX, XTXi, lam, sampler):
36 |
37 | p = XTX.shape[0]
38 | success = np.zeros(p)
39 |
40 | loss = rr.quadratic_loss((p,), Q=XTX)
41 | pen = rr.l1norm(p, lagrange=lam)
42 |
43 | scale = 0.5
44 | noisy_S = sampler(scale=scale)
45 | loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
46 | problem = rr.simple_problem(loss, pen)
47 | soln = problem.solve(max_its=50, tol=1.e-6)
48 | success += soln != 0
49 | return set(np.nonzero(success)[0])
50 |
51 | XTX = X.T.dot(X)
52 | XTXi = np.linalg.inv(XTX)
53 | resid = y - X.dot(XTXi.dot(X.T.dot(y)))
54 | dispersion = np.linalg.norm(resid)**2 / (n-p)
55 |
56 | lam = 4. * np.sqrt(n)
57 | selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
58 |
59 | # run selection algorithm
60 |
61 | return full_model_inference(X,
62 | y,
63 | truth,
64 | selection_algorithm,
65 | smooth_sampler,
66 | success_params=(1, 1),
67 | B=B,
68 | fit_probability=gbm_fit,
69 | fit_args={})
70 |
71 | if __name__ == "__main__":
72 | import statsmodels.api as sm
73 | import matplotlib.pyplot as plt
74 | import pandas as pd
75 |
76 | U = np.linspace(0, 1, 101)
77 | plt.clf()
78 |
79 | for i in range(500):
80 | for B in [5000]:
81 | print(B)
82 | df = simulate(B=B)
83 | csvfile = 'gbm_targets.csv'
84 | outbase = csvfile[:-4]
85 |
86 | if df is not None and i > 0:
87 |
88 | try:
89 | df = pd.concat([df, pd.read_csv(csvfile)])
90 | except FileNotFoundError:
91 | pass
92 | df.to_csv(csvfile, index=False)
93 |
94 | if len(df['pivot']) > 0:
95 | pivot_plot(df, outbase)
96 |
--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/gbm_targets_small.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import numpy as np
4 | from scipy.stats import norm as ndist
5 |
6 | import regreg.api as rr
7 |
8 | from selection.tests.instance import gaussian_instance
9 | from selection.algorithms.lasso import ROSI
10 |
11 | from selection.learning.Rutils import lasso_glmnet
12 | from selection.learning.utils import full_model_inference, pivot_plot
13 | from selection.learning.core import normal_sampler, gbm_fit
14 |
15 | def simulate(n=100, p=30, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000):
16 |
17 | # description of statistical problem
18 |
19 | X, y, truth = gaussian_instance(n=n,
20 | p=p,
21 | s=s,
22 | equicorrelated=False,
23 | rho=0.5,
24 | sigma=sigma,
25 | signal=signal,
26 | random_signs=True,
27 | scale=False)[:3]
28 |
29 | dispersion = sigma**2
30 |
31 | S = X.T.dot(y)
32 | covS = dispersion * X.T.dot(X)
33 | smooth_sampler = normal_sampler(S, covS)
34 |
35 | def meta_algorithm(XTX, XTXi, lam, sampler):
36 |
37 | p = XTX.shape[0]
38 | success = np.zeros(p)
39 |
40 | loss = rr.quadratic_loss((p,), Q=XTX)
41 | pen = rr.l1norm(p, lagrange=lam)
42 |
43 | scale = 0.5
44 | noisy_S = sampler(scale=scale)
45 | loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
46 | problem = rr.simple_problem(loss, pen)
47 | soln = problem.solve(max_its=50, tol=1.e-6)
48 | success += soln != 0
49 | return set(np.nonzero(success)[0])
50 |
51 | XTX = X.T.dot(X)
52 | XTXi = np.linalg.inv(XTX)
53 | resid = y - X.dot(XTXi.dot(X.T.dot(y)))
54 | dispersion = np.linalg.norm(resid)**2 / (n-p)
55 |
56 | lam = 4. * np.sqrt(n)
57 | selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
58 |
59 | # run selection algorithm
60 |
61 | return full_model_inference(X,
62 | y,
63 | truth,
64 | selection_algorithm,
65 | smooth_sampler,
66 | success_params=(1, 1),
67 | B=B,
68 | fit_probability=gbm_fit,
69 | fit_args={})
70 |
71 | if __name__ == "__main__":
72 | import statsmodels.api as sm
73 | import matplotlib.pyplot as plt
74 | import pandas as pd
75 |
76 | U = np.linspace(0, 1, 101)
77 | plt.clf()
78 |
79 | for i in range(500):
80 | for B in [5000]:
81 | print(B)
82 | df = simulate(B=B)
83 | csvfile = 'gbm_targets_small.csv'
84 | outbase = csvfile[:-4]
85 |
86 | if df is not None and i > 0:
87 |
88 | try:
89 | df = pd.concat([df, pd.read_csv(csvfile)])
90 | except FileNotFoundError:
91 | pass
92 | df.to_csv(csvfile, index=False)
93 |
94 | if len(df['pivot']) > 0:
95 | pivot_plot(df, outbase)
96 |
--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/lasso_example_multi.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import numpy as np
4 | from scipy.stats import norm as ndist
5 |
6 | import regreg.api as rr
7 |
8 | from selection.tests.instance import gaussian_instance
9 |
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import split_sampler, keras_fit
12 |
13 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=2000):
14 |
15 | # description of statistical problem
16 |
17 | X, y, truth = gaussian_instance(n=n,
18 | p=p,
19 | s=s,
20 | equicorrelated=False,
21 | rho=0.5,
22 | sigma=sigma,
23 | signal=signal,
24 | random_signs=True,
25 | scale=False)[:3]
26 |
27 |
28 | dispersion = sigma**2
29 |
30 | S = X.T.dot(y)
31 | covS = dispersion * X.T.dot(X)
32 | splitting_sampler = split_sampler(X * y[:, None], covS)
33 |
34 | def meta_algorithm(XTX, XTXi, lam, sampler):
35 |
36 | p = XTX.shape[0]
37 | success = np.zeros(p)
38 |
39 | loss = rr.quadratic_loss((p,), Q=XTX)
40 | pen = rr.l1norm(p, lagrange=lam)
41 |
42 | scale = 0.
43 | noisy_S = sampler(scale=scale)
44 | loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
45 | problem = rr.simple_problem(loss, pen)
46 | soln = problem.solve(max_its=100, tol=1.e-10)
47 | success += soln != 0
48 | return set(np.nonzero(success)[0])
49 |
50 | XTX = X.T.dot(X)
51 | XTXi = np.linalg.inv(XTX)
52 | resid = y - X.dot(XTXi.dot(X.T.dot(y)))
53 | dispersion = np.linalg.norm(resid)**2 / (n-p)
54 |
55 | lam = 4. * np.sqrt(n)
56 | selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
57 |
58 | # run selection algorithm
59 |
60 | return full_model_inference(X,
61 | y,
62 | truth,
63 | selection_algorithm,
64 | splitting_sampler,
65 | success_params=(1, 1),
66 | B=B,
67 | fit_probability=keras_fit,
68 | fit_args={'epochs':10, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'})
69 |
70 | if __name__ == "__main__":
71 | import statsmodels.api as sm
72 | import matplotlib.pyplot as plt
73 | import pandas as pd
74 |
75 | for i in range(2000):
76 | df = simulate(B=2000)
77 | csvfile = 'lasso_multi.csv'
78 | outbase = csvfile[:-4]
79 |
80 | if df is not None and i > 0:
81 |
82 | try: # concatenate to disk
83 | df = pd.concat([df, pd.read_csv(csvfile)])
84 | except FileNotFoundError:
85 | pass
86 | df.to_csv(csvfile, index=False)
87 |
88 | if len(df['pivot']) > 0:
89 | pivot_ax, length_ax = pivot_plot(df, outbase)
90 |
91 |
92 |
--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/lasso_example_multi_CV.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import numpy as np
4 | from scipy.stats import norm as ndist
5 |
6 | import regreg.api as rr
7 |
8 | from selection.tests.instance import gaussian_instance
9 |
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import split_sampler, keras_fit
12 | from selection.learning.Rutils import lasso_glmnet
13 |
14 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000):
15 |
16 | # description of statistical problem
17 |
18 | X, y, truth = gaussian_instance(n=n,
19 | p=p,
20 | s=s,
21 | equicorrelated=False,
22 | rho=0.5,
23 | sigma=sigma,
24 | signal=signal,
25 | random_signs=True,
26 | scale=False)[:3]
27 |
28 | dispersion = sigma**2
29 |
30 | S = X.T.dot(y)
31 | covS = dispersion * X.T.dot(X)
32 | splitting_sampler = split_sampler(X * y[:, None], covS)
33 |
34 | def meta_algorithm(X, XTXi, resid, sampler):
35 |
36 | S = sampler(scale=0.) # deterministic with scale=0
37 | ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X
38 | G = lasso_glmnet(X, ynew, *[None]*4)
39 | select = G.select()
40 | return set(list(select[0]))
41 |
42 | XTX = X.T.dot(X)
43 | XTXi = np.linalg.inv(XTX)
44 | resid = y - X.dot(XTXi.dot(X.T.dot(y)))
45 | dispersion = np.linalg.norm(resid)**2 / (n-p)
46 |
47 | selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid)
48 |
49 | # run selection algorithm
50 |
51 | return full_model_inference(X,
52 | y,
53 | truth,
54 | selection_algorithm,
55 | splitting_sampler,
56 | success_params=(1, 1),
57 | B=B,
58 | fit_probability=keras_fit,
59 | fit_args={'epochs':10, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'})
60 |
61 | if __name__ == "__main__":
62 | import statsmodels.api as sm
63 | import matplotlib.pyplot as plt
64 | import pandas as pd
65 |
66 | U = np.linspace(0, 1, 101)
67 | plt.clf()
68 |
69 | for i in range(500):
70 | df = simulate()
71 | csvfile = 'lasso_multi_CV.csv'
72 | outbase = csvfile[:-4]
73 |
74 | if df is not None:
75 |
76 | try:
77 | df = pd.concat([df, pd.read_csv(csvfile)])
78 | except FileNotFoundError:
79 | pass
80 | df.to_csv(csvfile, index=False)
81 |
82 | if len(df['pivot']) > 0:
83 | pivot_plot(df, outbase)
84 |
85 |
--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/lasso_example_multi_CV_stronger.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import numpy as np
4 | from scipy.stats import norm as ndist
5 |
6 | import regreg.api as rr
7 |
8 | from selection.tests.instance import gaussian_instance
9 |
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import split_sampler, keras_fit
12 | from selection.learning.Rutils import cv_glmnet_lam, lasso_glmnet
13 |
14 | def simulate(n=200, p=100, s=10, signal=(1.5, 2), sigma=2, alpha=0.1, B=3000):
15 |
16 | # description of statistical problem
17 |
18 | X, y, truth = gaussian_instance(n=n,
19 | p=p,
20 | s=s,
21 | equicorrelated=False,
22 | rho=0.5,
23 | sigma=sigma,
24 | signal=signal,
25 | random_signs=True,
26 | scale=False)[:3]
27 |
28 | dispersion = sigma**2
29 |
30 | S = X.T.dot(y)
31 | covS = dispersion * X.T.dot(X)
32 | smooth_sampler = normal_sampler(S, covS)
33 | splitting_sampler = split_sampler(X * y[:, None], covS)
34 |
35 | def meta_algorithm(X, XTXi, resid, sampler):
36 |
37 | S = sampler(scale=0.) # deterministic with scale=0
38 | ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X
39 | G = lasso_glmnet(X, ynew, *[None]*4)
40 | select = G.select()
41 | return set(list(select[0]))
42 |
43 | XTX = X.T.dot(X)
44 | XTXi = np.linalg.inv(XTX)
45 | resid = y - X.dot(XTXi.dot(X.T.dot(y)))
46 | dispersion = np.linalg.norm(resid)**2 / (n-p)
47 |
48 | selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid)
49 |
50 | # run selection algorithm
51 |
52 | return full_model_inference(X,
53 | y,
54 | truth,
55 | selection_algorithm,
56 | splitting_sampler,
57 | success_params=(1, 1),
58 | B=B,
59 | fit_probability=keras_fit,
60 | fit_args={'epochs':10, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'})
61 |
62 | if __name__ == "__main__":
63 | import statsmodels.api as sm
64 | import matplotlib.pyplot as plt
65 | import pandas as pd
66 |
67 | U = np.linspace(0, 1, 101)
68 | plt.clf()
69 |
70 | for i in range(500):
71 | df = simulate()
72 | csvfile = 'lasso_multi_CV_stronger.csv'
73 | outbase = csvfile[:-4]
74 |
75 | if df is not None and i > 0:
76 |
77 | try:
78 | df = pd.concat([df, pd.read_csv(csvfile)])
79 | except FileNotFoundError:
80 | pass
81 | df.to_csv(csvfile, index=False)
82 |
83 | if len(df['pivot']) > 0:
84 | pivot_plot(df, outbase)
85 |
--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/lasso_example_multi_bigger.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import numpy as np
4 | from scipy.stats import norm as ndist
5 |
6 | import regreg.api as rr
7 |
8 | from selection.tests.instance import gaussian_instance
9 |
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import split_sampler, keras_fit
12 |
13 | def simulate(n=2000, p=1000, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=4000):
14 |
15 | # description of statistical problem
16 |
17 | X, y, truth = gaussian_instance(n=n,
18 | p=p,
19 | s=s,
20 | equicorrelated=False,
21 | rho=0.5,
22 | sigma=sigma,
23 | signal=signal,
24 | random_signs=True,
25 | scale=False)[:3]
26 |
27 | dispersion = sigma**2
28 |
29 | S = X.T.dot(y)
30 | covS = dispersion * X.T.dot(X)
31 | smooth_sampler = normal_sampler(S, covS)
32 | splitting_sampler = split_sampler(X * y[:, None], covS)
33 |
34 | def meta_algorithm(XTX, XTXi, lam, sampler):
35 |
36 | p = XTX.shape[0]
37 | success = np.zeros(p)
38 |
39 | loss = rr.quadratic_loss((p,), Q=XTX)
40 | pen = rr.l1norm(p, lagrange=lam)
41 |
42 | scale = 0.
43 | noisy_S = sampler(scale=scale)
44 | loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
45 | problem = rr.simple_problem(loss, pen)
46 | soln = problem.solve(max_its=100, tol=1.e-10)
47 | success += soln != 0
48 | return set(np.nonzero(success)[0])
49 |
50 | XTX = X.T.dot(X)
51 | XTXi = np.linalg.inv(XTX)
52 | resid = y - X.dot(XTXi.dot(X.T.dot(y)))
53 | dispersion = np.linalg.norm(resid)**2 / (n-p)
54 |
55 | lam = 5. * np.sqrt(n)
56 | selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
57 |
58 | # run selection algorithm
59 |
60 | return full_model_inference(X,
61 | y,
62 | truth,
63 | selection_algorithm,
64 | splitting_sampler,
65 | success_params=(1, 1),
66 | B=B,
67 | fit_probability=logit_fit,
68 | fit_args={'df':20})
69 |
70 |
71 | if __name__ == "__main__":
72 | import statsmodels.api as sm
73 | import matplotlib.pyplot as plt
74 | import pandas as pd
75 |
76 | U = np.linspace(0, 1, 101)
77 | plt.clf()
78 |
79 | for i in range(500):
80 | df = simulate(B=4000)
81 | csvfile = 'lasso_multi_bigger.csv'
82 | outbase = csvfile[:-4]
83 |
84 | if df is not None and i > 0:
85 |
86 | try: # concatenate to disk
87 | df = pd.concat([df, pd.read_csv(csvfile)])
88 | except FileNotFoundError:
89 | pass
90 | df.to_csv(csvfile, index=False)
91 |
92 | if len(df['pivot']) > 0:
93 | pivot_ax, length_ax = pivot_plot(df, outbase)
94 |
--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/lasso_example_multi_gbm.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import numpy as np
4 | from scipy.stats import norm as ndist
5 |
6 | import regreg.api as rr
7 |
8 | from selection.tests.instance import gaussian_instance
9 |
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import split_sampler, gbm_fit
12 |
13 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000):
14 |
15 | # description of statistical problem
16 |
17 | X, y, truth = gaussian_instance(n=n,
18 | p=p,
19 | s=s,
20 | equicorrelated=False,
21 | rho=0.5,
22 | sigma=sigma,
23 | signal=signal,
24 | random_signs=True,
25 | scale=False)[:3]
26 |
27 | dispersion = sigma**2
28 |
29 | S = X.T.dot(y)
30 | covS = dispersion * X.T.dot(X)
31 | splitting_sampler = split_sampler(X * y[:, None], covS)
32 |
33 | def meta_algorithm(XTX, XTXi, lam, sampler):
34 |
35 | p = XTX.shape[0]
36 | success = np.zeros(p)
37 |
38 | loss = rr.quadratic_loss((p,), Q=XTX)
39 | pen = rr.l1norm(p, lagrange=lam)
40 |
41 | scale = 0.
42 | noisy_S = sampler(scale=scale)
43 | loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
44 | problem = rr.simple_problem(loss, pen)
45 | soln = problem.solve(max_its=100, tol=1.e-10)
46 | success += soln != 0
47 | return set(np.nonzero(success)[0])
48 |
49 | XTX = X.T.dot(X)
50 | XTXi = np.linalg.inv(XTX)
51 | resid = y - X.dot(XTXi.dot(X.T.dot(y)))
52 | dispersion = np.linalg.norm(resid)**2 / (n-p)
53 |
54 | lam = 4. * np.sqrt(n)
55 | selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
56 |
57 | # run selection algorithm
58 |
59 | return full_model_inference(X,
60 | y,
61 | truth,
62 | selection_algorithm,
63 | splitting_sampler,
64 | success_params=(1, 1),
65 | B=B,
66 | fit_probability=gbm_fit,
67 | fit_args={'ntrees':5000})
68 |
69 |
70 | if __name__ == "__main__":
71 | import statsmodels.api as sm
72 | import matplotlib.pyplot as plt
73 | import pandas as pd
74 |
75 | U = np.linspace(0, 1, 101)
76 | plt.clf()
77 |
78 | for i in range(500):
79 | df = simulate()
80 | csvfile = 'lasso_multi_gbm.csv'
81 | outbase = csvfile[:-4]
82 |
83 | if df is not None and i > 0:
84 |
85 | try: # concatenate to disk
86 | df = pd.concat([df, pd.read_csv(csvfile)])
87 | except FileNotFoundError:
88 | pass
89 | df.to_csv(csvfile, index=False)
90 |
91 | if len(df['pivot']) > 0:
92 | pivot_ax, length_ax = pivot_plot(df, outbase)
93 |
--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/lasso_example_multi_gbm_sk.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import numpy as np
4 | from scipy.stats import norm as ndist
5 |
6 | import regreg.api as rr
7 |
8 | from selection.tests.instance import gaussian_instance
9 |
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import split_sampler, gbm_fit
12 |
13 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1):
14 |
15 | # description of statistical problem
16 |
17 | X, y, truth = gaussian_instance(n=n,
18 | p=p,
19 | s=s,
20 | equicorrelated=False,
21 | rho=0.5,
22 | sigma=sigma,
23 | signal=signal,
24 | random_signs=True,
25 | scale=False)[:3]
26 |
27 | dispersion = sigma**2
28 |
29 | S = X.T.dot(y)
30 | covS = dispersion * X.T.dot(X)
31 | splitting_sampler = split_sampler(X * y[:, None], covS)
32 |
33 | def meta_algorithm(XTX, XTXi, lam, sampler):
34 |
35 | p = XTX.shape[0]
36 | success = np.zeros(p)
37 |
38 | loss = rr.quadratic_loss((p,), Q=XTX)
39 | pen = rr.l1norm(p, lagrange=lam)
40 |
41 | scale = 0.
42 | noisy_S = sampler(scale=scale)
43 | loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
44 | problem = rr.simple_problem(loss, pen)
45 | soln = problem.solve(max_its=100, tol=1.e-10)
46 | success += soln != 0
47 | return set(np.nonzero(success)[0])
48 |
49 | XTX = X.T.dot(X)
50 | XTXi = np.linalg.inv(XTX)
51 | resid = y - X.dot(XTXi.dot(X.T.dot(y)))
52 | dispersion = np.linalg.norm(resid)**2 / (n-p)
53 |
54 | lam = 4. * np.sqrt(n)
55 | selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
56 |
57 | # run selection algorithm
58 |
59 | return full_model_inference(X,
60 | y,
61 | truth,
62 | selection_algorithm,
63 | splitting_sampler,
64 | success_params=(1, 1),
65 | B=B,
66 | fit_probability=gbm_fit_sk,
67 | fit_args={'n_estimators':1000})
68 |
69 |
70 | if __name__ == "__main__":
71 | import statsmodels.api as sm
72 | import matplotlib.pyplot as plt
73 | import pandas as pd
74 |
75 | U = np.linspace(0, 1, 101)
76 | plt.clf()
77 |
78 | for i in range(500):
79 | df = simulate()
80 | csvfile = 'lasso_multi_gbm_sk.csv'
81 | outbase = csvfile[:-4]
82 |
83 | if df is not None and i > 0:
84 |
85 | try: # concatenate to disk
86 | df = pd.concat([df, pd.read_csv(csvfile)])
87 | except FileNotFoundError:
88 | pass
89 | df.to_csv(csvfile, index=False)
90 |
91 | if len(df['pivot']) > 0:
92 | pivot_ax, length_ax = pivot_plot(df, outbase)
93 |
--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/lasso_example_multi_random.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import numpy as np
4 | from scipy.stats import norm as ndist
5 |
6 | import regreg.api as rr
7 |
8 | from selection.tests.instance import gaussian_instance
9 |
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import normal_sampler, keras_fit
12 |
13 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000):
14 |
15 | # description of statistical problem
16 |
17 | X, y, truth = gaussian_instance(n=n,
18 | p=p,
19 | s=s,
20 | equicorrelated=False,
21 | rho=0.5,
22 | sigma=sigma,
23 | signal=signal,
24 | random_signs=True,
25 | scale=False)[:3]
26 |
27 | dispersion = sigma**2
28 |
29 | S = X.T.dot(y)
30 | covS = dispersion * X.T.dot(X)
31 | smooth_sampler = normal_sampler(S, covS)
32 |
33 | def meta_algorithm(XTX, XTXi, lam, sampler):
34 |
35 | p = XTX.shape[0]
36 | success = np.zeros(p)
37 |
38 | loss = rr.quadratic_loss((p,), Q=XTX)
39 | pen = rr.l1norm(p, lagrange=lam)
40 |
41 | scale = 0.5
42 | noisy_S = sampler(scale=scale)
43 | loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
44 | problem = rr.simple_problem(loss, pen)
45 | soln = problem.solve(max_its=100, tol=1.e-10)
46 | success += soln != 0
47 | return set(np.nonzero(success)[0])
48 |
49 | XTX = X.T.dot(X)
50 | XTXi = np.linalg.inv(XTX)
51 | resid = y - X.dot(XTXi.dot(X.T.dot(y)))
52 | dispersion = np.linalg.norm(resid)**2 / (n-p)
53 |
54 | lam = 4. * np.sqrt(n)
55 | selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
56 |
57 | # run selection algorithm
58 |
59 | return full_model_inference(X,
60 | y,
61 | truth,
62 | selection_algorithm,
63 | smooth_sampler,
64 | success_params=(1, 1),
65 | B=B,
66 | fit_probability=keras_fit,
67 | fit_args={'epochs':20, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'})
68 |
69 |
70 | if __name__ == "__main__":
71 | import statsmodels.api as sm
72 | import matplotlib.pyplot as plt
73 | import pandas as pd
74 |
75 | U = np.linspace(0, 1, 101)
76 | plt.clf()
77 |
78 | for i in range(500):
79 | df = simulate()
80 | csvfile = 'lasso_multi_random.csv'
81 | outbase = csvfile[:-4]
82 |
83 | if df is not None and i > 0:
84 |
85 | try: # concatenate to disk
86 | df = pd.concat([df, pd.read_csv(csvfile)])
87 | except FileNotFoundError:
88 | pass
89 | df.to_csv(csvfile, index=False)
90 |
91 | if len(df['pivot']) > 0:
92 | pivot_ax, length_ax = pivot_plot(df, outbase)
93 |
--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/lasso_example_multi_random_gbm.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import numpy as np
4 | from scipy.stats import norm as ndist
5 |
6 | import regreg.api as rr
7 |
8 | from selection.tests.instance import gaussian_instance
9 |
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import normal_sampler, gbm_fit
12 |
13 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000):
14 |
15 | # description of statistical problem
16 |
17 | X, y, truth = gaussian_instance(n=n,
18 | p=p,
19 | s=s,
20 | equicorrelated=False,
21 | rho=0.5,
22 | sigma=sigma,
23 | signal=signal,
24 | random_signs=True,
25 | scale=False)[:3]
26 |
27 | dispersion = sigma**2
28 |
29 | S = X.T.dot(y)
30 | covS = dispersion * X.T.dot(X)
31 | smooth_sampler = normal_sampler(S, covS)
32 | splitting_sampler = split_sampler(X * y[:, None], covS)
33 |
34 | def meta_algorithm(XTX, XTXi, lam, sampler):
35 |
36 | p = XTX.shape[0]
37 | success = np.zeros(p)
38 |
39 | loss = rr.quadratic_loss((p,), Q=XTX)
40 | pen = rr.l1norm(p, lagrange=lam)
41 |
42 | scale = 0.5
43 | noisy_S = sampler(scale=scale)
44 | loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
45 | problem = rr.simple_problem(loss, pen)
46 | soln = problem.solve(max_its=100, tol=1.e-10)
47 | success += soln != 0
48 | return set(np.nonzero(success)[0])
49 |
50 | XTX = X.T.dot(X)
51 | XTXi = np.linalg.inv(XTX)
52 | resid = y - X.dot(XTXi.dot(X.T.dot(y)))
53 | dispersion = np.linalg.norm(resid)**2 / (n-p)
54 |
55 | lam = 4. * np.sqrt(n)
56 | selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
57 |
58 | # run selection algorithm
59 |
60 | return full_model_inference(X,
61 | y,
62 | truth,
63 | selection_algorithm,
64 | smooth_sampler,
65 | success_params=(1, 1),
66 | B=B,
67 | fit_probability=gbm_fit,
68 | fit_args={'ntrees':5000})
69 |
70 | if __name__ == "__main__":
71 | import statsmodels.api as sm
72 | import matplotlib.pyplot as plt
73 | import pandas as pd
74 |
75 | U = np.linspace(0, 1, 101)
76 | plt.clf()
77 |
78 | for i in range(500):
79 | df = simulate()
80 | csvfile = 'lasso_multi_random_gbm.csv'
81 | outbase = csvfile[:-4]
82 |
83 | if df is not None and i > 0:
84 |
85 | try: # concatenate to disk
86 | df = pd.concat([df, pd.read_csv(csvfile)])
87 | except FileNotFoundError:
88 | pass
89 | df.to_csv(csvfile, index=False)
90 |
91 | if len(df['pivot']) > 0:
92 | pivot_ax, length_ax = pivot_plot(df, outbase)
93 |
--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/lasso_example_multi_random_rf.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import numpy as np
4 | from scipy.stats import norm as ndist
5 |
6 | import regreg.api as rr
7 |
8 | from selection.tests.instance import gaussian_instance
9 |
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import normal_sampler, keras_fit
12 |
13 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000):
14 |
15 | # description of statistical problem
16 |
17 | X, y, truth = gaussian_instance(n=n,
18 | p=p,
19 | s=s,
20 | equicorrelated=False,
21 | rho=0.5,
22 | sigma=sigma,
23 | signal=signal,
24 | random_signs=True,
25 | scale=False)[:3]
26 |
27 | dispersion = sigma**2
28 |
29 | S = X.T.dot(y)
30 | covS = dispersion * X.T.dot(X)
31 | smooth_sampler = normal_sampler(S, covS)
32 | splitting_sampler = split_sampler(X * y[:, None], covS)
33 |
34 | def meta_algorithm(XTX, XTXi, lam, sampler):
35 |
36 | p = XTX.shape[0]
37 | success = np.zeros(p)
38 |
39 | loss = rr.quadratic_loss((p,), Q=XTX)
40 | pen = rr.l1norm(p, lagrange=lam)
41 |
42 | scale = 0.5
43 | noisy_S = sampler(scale=scale)
44 | loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
45 | problem = rr.simple_problem(loss, pen)
46 | soln = problem.solve(max_its=100, tol=1.e-10)
47 | success += soln != 0
48 | return set(np.nonzero(success)[0])
49 |
50 | XTX = X.T.dot(X)
51 | XTXi = np.linalg.inv(XTX)
52 | resid = y - X.dot(XTXi.dot(X.T.dot(y)))
53 | dispersion = np.linalg.norm(resid)**2 / (n-p)
54 |
55 | lam = 4. * np.sqrt(n)
56 | selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
57 |
58 | # run selection algorithm
59 |
60 | return full_model_inference(X,
61 | y,
62 | truth,
63 | selection_algorithm,
64 | smooth_sampler,
65 | success_params=(1, 1),
66 | B=B,
67 | fit_probability=random_forest_fit,
68 | fit_args={'ntrees':5000})
69 |
70 | if __name__ == "__main__":
71 | import statsmodels.api as sm
72 | import matplotlib.pyplot as plt
73 | import pandas as pd
74 |
75 | U = np.linspace(0, 1, 101)
76 | plt.clf()
77 |
78 | for i in range(500):
79 | df = simulate()
80 | csvfile = 'lasso_multi_random_rf.csv'
81 | outbase = csvfile[:-4]
82 |
83 | if df is not None and i > 0:
84 |
85 | try: # concatenate to disk
86 | df = pd.concat([df, pd.read_csv(csvfile)])
87 | except FileNotFoundError:
88 | pass
89 | df.to_csv(csvfile, index=False)
90 |
91 | if len(df['pivot']) > 0:
92 | pivot_ax, length_ax = pivot_plot(df, outbase)
93 |
--------------------------------------------------------------------------------
/doc/learning_examples/multi_target/lasso_example_multi_rf_sk.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import numpy as np
4 | from scipy.stats import norm as ndist
5 |
6 | import regreg.api as rr
7 |
8 | from selection.tests.instance import gaussian_instance
9 |
10 | from selection.learning.utils import full_model_inference, pivot_plot
11 | from selection.learning.core import split_sampler, random_forest_fit_sk
12 |
13 | def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1):
14 |
15 | # description of statistical problem
16 |
17 | X, y, truth = gaussian_instance(n=n,
18 | p=p,
19 | s=s,
20 | equicorrelated=False,
21 | rho=0.5,
22 | sigma=sigma,
23 | signal=signal,
24 | random_signs=True,
25 | scale=False)[:3]
26 |
27 | dispersion = sigma**2
28 |
29 | S = X.T.dot(y)
30 | covS = dispersion * X.T.dot(X)
31 | smooth_sampler = normal_sampler(S, covS)
32 | splitting_sampler = split_sampler(X * y[:, None], covS)
33 |
34 | def meta_algorithm(XTX, XTXi, lam, sampler):
35 |
36 | p = XTX.shape[0]
37 | success = np.zeros(p)
38 |
39 | loss = rr.quadratic_loss((p,), Q=XTX)
40 | pen = rr.l1norm(p, lagrange=lam)
41 |
42 | scale = 0.
43 | noisy_S = sampler(scale=scale)
44 | loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
45 | problem = rr.simple_problem(loss, pen)
46 | soln = problem.solve(max_its=100, tol=1.e-10)
47 | success += soln != 0
48 | return set(np.nonzero(success)[0])
49 |
50 | XTX = X.T.dot(X)
51 | XTXi = np.linalg.inv(XTX)
52 | resid = y - X.dot(XTXi.dot(X.T.dot(y)))
53 | dispersion = np.linalg.norm(resid)**2 / (n-p)
54 |
55 | lam = 4. * np.sqrt(n)
56 | selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
57 |
58 | # run selection algorithm
59 |
60 | # run selection algorithm
61 |
62 | return full_model_inference(X,
63 | y,
64 | truth,
65 | selection_algorithm,
66 | splitting_sampler,
67 | success_params=(1, 1),
68 | B=B,
69 | fit_probability=random_forest_fit_sk,
70 | fit_args={'n_estimators':5000})
71 |
72 |
73 | if __name__ == "__main__":
74 | import statsmodels.api as sm
75 | import matplotlib.pyplot as plt
76 | import pandas as pd
77 |
78 | U = np.linspace(0, 1, 101)
79 | plt.clf()
80 |
81 | for i in range(500):
82 | df = simulate()
83 | csvfile = 'lasso_multi_rf_sk.csv'
84 | outbase = csvfile[:-4]
85 |
86 | if df is not None and i > 0:
87 |
88 | try:
89 | df = pd.concat([df, pd.read_csv(csvfile)])
90 | except FileNotFoundError:
91 | pass
92 | df.to_csv(csvfile, index=False)
93 |
94 | if len(df['pivot']) > 0:
95 | pivot_plot(df, outbase)
96 |
97 |
--------------------------------------------------------------------------------
/doc/learning_examples/standalone/cleaner_basic_example.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from selection.learning.core import (infer_general_target,
4 | normal_sampler,
5 | logit_fit,
6 | probit_fit)
7 |
8 | def simulate(n=100):
9 |
10 | # description of statistical problem
11 |
12 | truth = np.array([2. , -2.]) / np.sqrt(n)
13 |
14 | data = np.random.standard_normal((n, 2)) + np.multiply.outer(np.ones(n), truth)
15 | S = np.mean(data, 0)
16 | observed_sampler = normal_sampler(S, 1/n * np.identity(2))
17 |
18 | def selection_algorithm(sampler):
19 | min_success = 1
20 | ntries = 3
21 | success = 0
22 | for _ in range(ntries):
23 | noisyS = sampler(scale=0.5)
24 | success += noisyS.sum() > 0.2 / np.sqrt(n)
25 | return success >= min_success
26 |
27 | # run selection algorithm
28 |
29 | observed_outcome = selection_algorithm(observed_sampler)
30 |
31 | # find the target, based on the observed outcome
32 |
33 | if observed_outcome: # target is truth[0]
34 | (true_target,
35 | observed_target,
36 | target_cov,
37 | cross_cov) = (truth[0],
38 | S[0],
39 | 1./n * np.identity(1),
40 | np.array([1., 0.]).reshape((2,1)) / n)
41 | else:
42 | (true_target,
43 | observed_target,
44 | target_cov,
45 | cross_cov) = (truth[1],
46 | S[1],
47 | 1./n * np.identity(1),
48 | np.array([0., 1.]).reshape((2,1)) / n)
49 |
50 | pivot, interval = infer_general_target(selection_algorithm,
51 | observed_outcome,
52 | observed_sampler,
53 | observed_target,
54 | cross_cov,
55 | target_cov,
56 | hypothesis=true_target,
57 | fit_probability=probit_fit)[:2]
58 |
59 | return pivot, (interval[0] < true_target) * (interval[1] > true_target), interval[1] - interval[0]
60 |
61 | if __name__ == "__main__":
62 | import statsmodels.api as sm
63 | import matplotlib.pyplot as plt
64 |
65 | n = 100
66 | U = np.linspace(0, 1, 101)
67 | P, L = [], []
68 | plt.clf()
69 | coverage = 0
70 | for i in range(300):
71 | p, cover, l = simulate(n=n)
72 | coverage += cover
73 | P.append(p)
74 | L.append(l)
75 | print(np.mean(P), np.std(P), np.mean(L) / (2 * 1.65 / np.sqrt(n)), coverage / (i+1))
76 |
77 | plt.clf()
78 | plt.plot(U, sm.distributions.ECDF(P)(U), 'r', linewidth=3)
79 | plt.plot([0,1], [0,1], 'k--', linewidth=2)
80 | plt.show()
81 |
--------------------------------------------------------------------------------
/doc/learning_examples/standalone/full_model_example.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from selection.learning.core import (infer_full_target,
3 | normal_sampler,
4 | logit_fit,
5 | probit_fit)
6 |
7 | def simulate(n=100):
8 |
9 | # description of statistical problem
10 |
11 | truth = np.array([2. , -2.]) / np.sqrt(n)
12 |
13 | dispersion = 2
14 | data = np.sqrt(dispersion) * np.random.standard_normal((n, 2)) + np.multiply.outer(np.ones(n), truth)
15 | S = np.sum(data, 0)
16 | observed_sampler = normal_sampler(S, dispersion * n * np.identity(2))
17 |
18 | def selection_algorithm(sampler):
19 | min_success = 1
20 | ntries = 3
21 | success = 0
22 | for _ in range(ntries):
23 | noisyS = sampler(scale=0.5)
24 | success += noisyS.sum() > 0.2 * np.sqrt(n) * np.sqrt(dispersion)
25 | if success >= min_success:
26 | return set([1, 0])
27 | return set([1])
28 |
29 | # run selection algorithm
30 |
31 | observed_set = selection_algorithm(observed_sampler)
32 |
33 | # find the target, based on the observed outcome
34 |
35 | # we just take the first target
36 |
37 | pivots, covered, lengths = [], [], []
38 | for idx in observed_set:
39 | true_target = truth[idx]
40 |
41 | pivot, interval = infer_full_target(selection_algorithm,
42 | observed_set,
43 | [idx],
44 | observed_sampler,
45 | dispersion,
46 | hypothesis=[true_target],
47 | fit_probability=probit_fit)[0][:2]
48 |
49 | pivots.append(pivot)
50 | covered.append((interval[0] < true_target) * (interval[1] > true_target))
51 | lengths.append(interval[1] - interval[0])
52 |
53 | return pivots, covered, lengths
54 |
55 | if __name__ == "__main__":
56 | import statsmodels.api as sm
57 | import matplotlib.pyplot as plt
58 |
59 | n = 100
60 | U = np.linspace(0, 1, 101)
61 | P, L, coverage = [], [], []
62 | plt.clf()
63 | for i in range(300):
64 | p, cover, l = simulate(n=n)
65 | coverage.extend(cover)
66 | P.extend(p)
67 | L.extend(l)
68 | print(np.mean(P), np.std(P), np.mean(L) / (2 * 1.65 / np.sqrt(n)), np.mean(coverage))
69 |
70 | plt.clf()
71 | plt.plot(U, sm.distributions.ECDF(P)(U), 'r', linewidth=3)
72 | plt.plot([0,1], [0,1], 'k--', linewidth=2)
73 | plt.show()
74 |
--------------------------------------------------------------------------------
/doc/license.rst:
--------------------------------------------------------------------------------
1 | Copyright (c) 2015, Selective Inference development team
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are
6 | met:
7 |
8 | * Redistributions of source code must retain the above copyright
9 | notice, this list of conditions and the following disclaimer.
10 |
11 | * Redistributions in binary form must reproduce the above
12 | copyright notice, this list of conditions and the following
13 | disclaimer in the documentation and/or other materials provided
14 | with the distribution.
15 |
16 | * The names of any contributors to this software
17 | may not be used to endorse or promote products derived
18 | from this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |
--------------------------------------------------------------------------------
/doc/notebooks/learning/simple_example_pivots.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/notebooks/learning/simple_example_pivots.pdf
--------------------------------------------------------------------------------
/doc/notebooks/learning/simple_example_sel_prob.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/notebooks/learning/simple_example_sel_prob.pdf
--------------------------------------------------------------------------------
/doc/source/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/source/_static/logo.png
--------------------------------------------------------------------------------
/doc/source/_templates/layout.html:
--------------------------------------------------------------------------------
1 | {% extends "!layout.html" %}
2 | {% set title = 'Selection' %}
3 |
4 | {% block rootrellink %}
5 |
Selection home |
6 | {% endblock %}
7 |
8 |
9 | {% block extrahead %}
10 |
11 | {% endblock %}
12 |
13 | {% block header %}
14 |
18 | {% endblock %}
19 |
20 | {# This block gets put at the top of the sidebar #}
21 | {% block sidebarlogo %}
22 | {% endblock %}
23 |
24 | Site Navigation
25 |
29 |
30 | {# I had to copy the whole search block just to change the rendered text,
31 | so it doesn't mention modules or classes #}
32 | {%- block sidebarsearch %}
33 | {%- if pagename != "search" %}
34 |
35 |
36 |
{{ _('Search this site') }}
37 |
43 |
44 |
45 |
46 |
47 |
48 | {%- endif %}
49 |
50 | {# The sidebarsearch block is the last one available in the default sidebar()
51 | macro, so the only way to add something to the bottom of the sidebar is to
52 | put it here, at the end of the sidebarsearch block (before it closes).
53 | #}
54 |
55 | {%- endblock %}
56 |
--------------------------------------------------------------------------------
/doc/source/algorithms/index.rst:
--------------------------------------------------------------------------------
1 | =========================
2 | Non-randomized algorithms
3 | =========================
4 |
5 | This is a project that collects various tools for
6 | post-selection inference.
7 |
8 | .. toctree::
9 | :maxdepth: 2
10 |
11 | covtest.ipynb
12 | spacings
13 |
--------------------------------------------------------------------------------
/doc/source/algorithms/spacings_files/spacings_23_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/source/algorithms/spacings_files/spacings_23_0.png
--------------------------------------------------------------------------------
/doc/source/algorithms/spacings_files/spacings_25_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/source/algorithms/spacings_files/spacings_25_0.png
--------------------------------------------------------------------------------
/doc/source/algorithms/spacings_files/spacings_27_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/source/algorithms/spacings_files/spacings_27_0.png
--------------------------------------------------------------------------------
/doc/source/algorithms/spacings_files/spacings_29_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/source/algorithms/spacings_files/spacings_29_0.png
--------------------------------------------------------------------------------
/doc/source/algorithms/spacings_files/spacings_31_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/source/algorithms/spacings_files/spacings_31_0.png
--------------------------------------------------------------------------------
/doc/source/algorithms/spacings_files/spacings_3_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/source/algorithms/spacings_files/spacings_3_0.png
--------------------------------------------------------------------------------
/doc/source/algorithms/spacings_files/spacings_4_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/source/algorithms/spacings_files/spacings_4_0.png
--------------------------------------------------------------------------------
/doc/source/algorithms/spacings_files/spacings_5_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/source/algorithms/spacings_files/spacings_5_0.png
--------------------------------------------------------------------------------
/doc/source/algorithms/spacings_files/spacings_6_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/source/algorithms/spacings_files/spacings_6_0.png
--------------------------------------------------------------------------------
/doc/source/algorithms/spacings_files/spacings_7_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/source/algorithms/spacings_files/spacings_7_0.png
--------------------------------------------------------------------------------
/doc/source/algorithms/spacings_files/spacings_9_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/doc/source/algorithms/spacings_files/spacings_9_0.png
--------------------------------------------------------------------------------
/doc/source/docattribute.rst:
--------------------------------------------------------------------------------
1 | .. _doc-attribute:
2 |
3 | Selection documentation attribution
4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 |
6 | This website is based on the `NIPY project website `_, which is licensed under a `Creative Commons Attribution 3.0 License `_.
7 |
8 | We have licensed our own documention using the same license, see :ref:`selectinf-license`.
--------------------------------------------------------------------------------
/doc/source/documentation.rst:
--------------------------------------------------------------------------------
1 | .. _documentation-main:
2 |
3 | =============
4 | Documentation
5 | =============
6 |
7 | .. only:: html
8 |
9 | :Release: |version|
10 | :Date: |today|
11 |
12 | Download `PDF `_
13 |
14 | Contents:
15 |
16 | .. toctree::
17 | :maxdepth: 1
18 | :glob:
19 |
20 | download.rst
21 | license.rst
22 | api/index.rst
23 | docattribute.rst
24 |
25 |
26 |
--------------------------------------------------------------------------------
/doc/source/download.rst:
--------------------------------------------------------------------------------
1 | .. _download:
2 |
3 | Downloading and installing the code
4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 |
6 | The post-selection inference source code is hosted at
7 |
8 | http://github.com/selective-inference/Python-software
9 |
10 | Selection depends on the following Python tools
11 |
12 | * `NumPy `_
13 |
14 | * `SciPy `_
15 |
16 | * `Cython `_
17 |
18 | * `Pandas `_
19 |
20 | You can clone the selection repo using::
21 |
22 | git clone https://github.com/selective-inference/Python-software.git
23 |
24 | Then installation is a simple call to python::
25 |
26 | cd selection
27 | git submodule update --init
28 | pip install -r requirements.txt
29 | python setup.py install --prefix=MYDIR
30 |
31 | where MYDIR is a site-packages directory you can write to. This
32 | directory will need to be on your PYTHONPATH for you to import
33 | `selectinf`. That's it!
34 |
35 | Testing your installation
36 | -------------------------
37 |
38 | There is a small but growing suite of tests that be easily checked using `nose `_::
39 |
40 | mkdir tmp
41 | cd tmp
42 | nosetests -v selectinf
43 |
44 |
--------------------------------------------------------------------------------
/doc/source/index.rst:
--------------------------------------------------------------------------------
1 | .. _about_selection:
2 |
3 | =====================
4 | The Selection project
5 | =====================
6 |
7 | .. include:: ./links_names.txt
8 |
9 | This is a project that collects various tools for
10 | post-selection inference.
11 |
12 |
13 | .. toctree::
14 | :maxdepth: 2
15 |
16 | documentation
17 | algorithms/index
18 | randomized/index
19 | learning/index
20 |
21 |
22 | Jonathan Taylor was funded by NSF in writing their portion of the
23 | software. As such, this material is based upon work supported by the
24 | National Science Foundation under Grant DMS 1208857, and by the AFOSR
25 | grant 113039.
26 |
27 | Any opinions, findings, and conclusions or recommendations expressed
28 | in this material are those of the author(s) and do not necessarily
29 | reflect the views of the National Science Foundation.
30 |
31 | .. include:: ../links_names.txt
--------------------------------------------------------------------------------
/doc/source/learning/Learning1.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | jupyter:
3 | jupytext:
4 | cell_metadata_filter: all,-slideshow
5 | formats: ipynb,Rmd
6 | text_representation:
7 | extension: .Rmd
8 | format_name: rmarkdown
9 | format_version: '1.1'
10 | jupytext_version: 1.1.1
11 | kernelspec:
12 | display_name: Python 3
13 | language: python
14 | name: python3
15 | ---
16 |
17 | # Learning 1
18 |
19 | ```{python}
20 | import numpy as np
21 | print('notebook 1')
22 | ```
23 |
24 | ```{python collapsed=TRUE}
25 |
26 | ```
27 |
--------------------------------------------------------------------------------
/doc/source/learning/Learning1.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Learning 1"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [
15 | {
16 | "name": "stdout",
17 | "output_type": "stream",
18 | "text": [
19 | "notebook 1\n"
20 | ]
21 | }
22 | ],
23 | "source": [
24 | "import numpy as np\n",
25 | "print('notebook 1')"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": null,
31 | "metadata": {
32 | "collapsed": true
33 | },
34 | "outputs": [],
35 | "source": []
36 | }
37 | ],
38 | "metadata": {
39 | "jupytext": {
40 | "cell_metadata_filter": "all,-slideshow",
41 | "formats": "ipynb,Rmd"
42 | },
43 | "kernelspec": {
44 | "display_name": "Python 3",
45 | "language": "python",
46 | "name": "python3"
47 | },
48 | "language_info": {
49 | "codemirror_mode": {
50 | "name": "ipython",
51 | "version": 3
52 | },
53 | "file_extension": ".py",
54 | "mimetype": "text/x-python",
55 | "name": "python",
56 | "nbconvert_exporter": "python",
57 | "pygments_lexer": "ipython3",
58 | "version": "3.6.2"
59 | }
60 | },
61 | "nbformat": 4,
62 | "nbformat_minor": 2
63 | }
64 |
--------------------------------------------------------------------------------
/doc/source/learning/Learning2.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | jupyter:
3 | jupytext:
4 | cell_metadata_filter: all,-slideshow
5 | formats: ipynb,Rmd
6 | text_representation:
7 | extension: .Rmd
8 | format_name: rmarkdown
9 | format_version: '1.1'
10 | jupytext_version: 1.1.1
11 | kernelspec:
12 | display_name: Python 3
13 | language: python
14 | name: python3
15 | ---
16 |
17 | # Learning 2
18 |
19 | ```{python}
20 | import numpy as np
21 | print('notebook 2')
22 | ```
23 |
24 | ```{python collapsed=TRUE}
25 |
26 | ```
27 |
--------------------------------------------------------------------------------
/doc/source/learning/Learning2.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Learning 2"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [
15 | {
16 | "name": "stdout",
17 | "output_type": "stream",
18 | "text": [
19 | "notebook 2\n"
20 | ]
21 | }
22 | ],
23 | "source": [
24 | "import numpy as np\n",
25 | "print('notebook 2')"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": null,
31 | "metadata": {
32 | "collapsed": true
33 | },
34 | "outputs": [],
35 | "source": []
36 | }
37 | ],
38 | "metadata": {
39 | "jupytext": {
40 | "cell_metadata_filter": "all,-slideshow",
41 | "formats": "ipynb,Rmd"
42 | },
43 | "kernelspec": {
44 | "display_name": "Python 3",
45 | "language": "python",
46 | "name": "python3"
47 | },
48 | "language_info": {
49 | "codemirror_mode": {
50 | "name": "ipython",
51 | "version": 3
52 | },
53 | "file_extension": ".py",
54 | "mimetype": "text/x-python",
55 | "name": "python",
56 | "nbconvert_exporter": "python",
57 | "pygments_lexer": "ipython3",
58 | "version": "3.6.2"
59 | }
60 | },
61 | "nbformat": 4,
62 | "nbformat_minor": 2
63 | }
64 |
--------------------------------------------------------------------------------
/doc/source/learning/index.rst:
--------------------------------------------------------------------------------
1 | Learning selection
2 | ------------------
3 |
4 | This package illustrates examples in `Inference after selection through a block box `_
5 | as well as generalizations based on learning multiparameter functions rather than the simple univariate
6 | case considered above.
7 |
8 | .. toctree::
9 | :maxdepth: 2
10 |
11 | Learning1.ipynb
12 | Learning2.ipynb
--------------------------------------------------------------------------------
/doc/source/license.rst:
--------------------------------------------------------------------------------
1 | .. _selectinf-license:
2 |
3 | =======================================
4 | Selective Inference License Information
5 | =======================================
6 |
7 | .. _selectinf-software-license:
8 |
9 | Software License
10 | -----------------
11 |
12 | Except where otherwise noted, all `selective-inference `_ software is licensed under a
13 | `revised BSD license `_.
14 |
15 | .. _selectinf-documentation-license:
16 |
17 | Documentation License
18 | ---------------------
19 |
20 | Except where otherwise noted, all `selective-inference `_ documentation is licensed under a
21 | `Creative Commons Attribution 3.0 License `_.
22 |
23 | All code fragments in the documentation are licensed under our
24 | software license.
25 |
--------------------------------------------------------------------------------
/doc/source/links_names.txt:
--------------------------------------------------------------------------------
1 | .. This (-*- rst -*-) format file contains commonly used link targets
2 | and name substitutions. It may be included in many files,
3 | therefore it should only contain link targets and name
4 | substitutions. Try grepping for "^\.\. _" to find plausible
5 | candidates for this list.
6 |
7 | .. NOTE: reST targets are
8 | __not_case_sensitive__, so only one target definition is needed for
9 | nipy, NIPY, Nipy, etc...
10 |
11 | .. Post selection papers
12 | .. _covtest: http://arxiv.org/abs/1301.7161
13 |
14 | .. Documentation tools
15 | .. _graphviz: http://www.graphviz.org/
16 | .. _Sphinx: http://sphinx.pocoo.org/
17 | .. _`Sphinx reST`: http://sphinx.pocoo.org/rest.html
18 | .. _reST: http://docutils.sourceforge.net/rst.html
19 | .. _docutils: http://docutils.sourceforge.net
20 |
21 | .. Licenses
22 | .. _GPL: http://www.gnu.org/licenses/gpl.html
23 | .. _BSD: http://www.opensource.org/licenses/bsd-license.php
24 | .. _LGPL: http://www.gnu.org/copyleft/lesser.html
25 | .. _MIT License: http://www.opensource.org/licenses/mit-license.php
26 |
27 | .. Working process
28 | .. _sourceforge: http://nipy.sourceforge.net/
29 | .. _github: http://github.com
30 |
31 | .. Code support stuff
32 | .. _pychecker: http://pychecker.sourceforge.net/
33 | .. _pylint: http://www.logilab.org/project/pylint
34 | .. _pyflakes: http://divmod.org/trac/wiki/DivmodPyflakes
35 | .. _virtualenv: http://pypi.python.org/pypi/virtualenv
36 | .. _git: http://git.or.cz/
37 | .. _flymake: http://flymake.sourceforge.net/
38 | .. _rope: http://rope.sourceforge.net/
39 | .. _pymacs: http://pymacs.progiciels-bpi.ca/pymacs.html
40 | .. _ropemacs: http://rope.sourceforge.net/ropemacs.html
41 | .. _ECB: http://ecb.sourceforge.net/
42 | .. _emacs_python_mode: http://www.emacswiki.org/cgi-bin/wiki/PythonMode
43 | .. _doctest-mode: http://www.cis.upenn.edu/~edloper/projects/doctestmode/
44 | .. _nose: http://somethingaboutorange.com/mrl/projects/nose
45 | .. _`python coverage tester`: http://nedbatchelder.com/code/modules/coverage.html
46 |
47 | .. Other python projects
48 | .. _numpy: http://www.scipy.org/NumPy
49 | .. _scipy: http://www.scipy.org
50 | .. _cython: http://www.cython.org/
51 | .. _ipython: http://ipython.scipy.org
52 | .. _`ipython manual`: http://ipython.scipy.org/doc/manual/html
53 | .. _matplotlib: http://matplotlib.sourceforge.net
54 | .. _python: http://www.python.org
55 | .. _networkx: http://networkx.lanl.gov/
56 |
57 | .. General software
58 | .. _gcc: http://gcc.gnu.org
59 | .. _xcode: http://developer.apple.com/TOOLS/xcode
60 | .. _mingw: http://www.mingw.org
61 | .. _macports: http://www.macports.org/
62 |
63 |
--------------------------------------------------------------------------------
/doc/source/randomized/index.rst:
--------------------------------------------------------------------------------
1 | =====================
2 | Randomized algorithms
3 | =====================
4 |
5 | This module implements several methods for inference after a randomized
6 | selection as described in this paper on `proximal change of variables `_
7 |
8 | .. toctree::
9 | :maxdepth: 2
10 |
11 | lasso.ipynb
12 |
--------------------------------------------------------------------------------
/doc/source/sphinxext/math_dollar.py:
--------------------------------------------------------------------------------
1 | # emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*-
2 | # vi: set ft=python sts=4 ts=4 sw=4 et:
3 | ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
4 | #
5 | # See COPYING file distributed along with the NiBabel package for the
6 | # copyright and license terms.
7 | #
8 | ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
9 | import re
10 |
11 | def dollars_to_math(source):
12 | r"""
13 | Replace dollar signs with backticks.
14 |
15 | More precisely, do a regular expression search. Replace a plain
16 | dollar sign ($) by a backtick (`). Replace an escaped dollar sign
17 | (\$) by a dollar sign ($). Don't change a dollar sign preceded or
18 | followed by a backtick (`$ or $`), because of strings like
19 | "``$HOME``". Don't make any changes on lines starting with
20 | spaces, because those are indented and hence part of a block of
21 | code or examples.
22 |
23 | This also doesn't replaces dollar signs enclosed in curly braces,
24 | to avoid nested math environments, such as ::
25 |
26 | $f(n) = 0 \text{ if $n$ is prime}$
27 |
28 | Thus the above line would get changed to
29 |
30 | `f(n) = 0 \text{ if $n$ is prime}`
31 | """
32 | s = "\n".join(source)
33 | if s.find("$") == -1:
34 | return
35 | # This searches for "$blah$" inside a pair of curly braces --
36 | # don't change these, since they're probably coming from a nested
37 | # math environment. So for each match, we replace it with a temporary
38 | # string, and later on we substitute the original back.
39 | global _data
40 | _data = {}
41 | def repl(matchobj):
42 | global _data
43 | s = matchobj.group(0)
44 | t = "___XXX_REPL_%d___" % len(_data)
45 | _data[t] = s
46 | return t
47 | s = re.sub(r"({[^{}$]*\$[^{}$]*\$[^{}]*})", repl, s)
48 | # matches $...$
49 | dollars = re.compile(r"(? b1*xy[,1]) & (xy[,2] < b2 * xy[,1]))
18 | z <- rep(NA,nrow(xy))
19 | z[good] <- ci.len(xy[good,1],xy[good,2])
20 | z
21 | }
22 |
23 | ci.len <- function(x,y) {
24 | cutoff.x <- ifelse(x>y,(y-x)/(b1-1),(y-x)/(b2-1))
25 | cutoff.y <- ifelse(x>y,b1*(y-x)/(b1-1),b2*(y-x)/(b2-1))
26 | cutoff <- (cutoff.x + cutoff.y)/sqrt(2)
27 | observed <- (x+y)/sqrt(2)
28 | apply(cbind(observed,cutoff),1,function(x) {
29 | ci <- try(ShortestCI(x[1],1,x[2],.05),silent=TRUE)
30 | if(is.list(ci)) {
31 | return(ci$upper - ci$lower)
32 | } else {
33 | return(NA)
34 | }
35 | })
36 | }
37 |
38 | xy <- expand.grid(c(-1,seq(0,4,.02)),c(-1,seq(0,4,.02)))
39 | z <- ci.len.wrapper(xy)
40 | ## This is a hack because of a bug in the package
41 | z[xy[,1] > 0 & abs(xy[,1]-xy[,2])<.023] <- 2*1.96
42 |
43 |
44 | rast <- rasterFromXYZ(cbind(xy,z))
45 |
46 | pdf("CILengthCorr.pdf")
47 | plot(rast,xlim=c(-2.5,4),ylim=c(-2.5,4),xlab=expression(y[1]),ylab=expression(y[2]),col=rev(heat.colors(20)),
48 | main="CI Length for Univariate Model")
49 | abline(h=0,lty=3,col="gray")
50 | abline(v=0,lty=3,col="gray")
51 | arrows(x0=c(0,0,0),y0=c(0,0,0),x1=c(x[1,]),y1=c(x[2,]),length=.15)
52 | abline(0,b1,lty=2)
53 | abline(0,b2,lty=2)
54 | dev.off()
55 |
56 |
57 |
58 |
59 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | cython
2 | numpy
3 | scipy
4 | pandas
5 | mpmath
6 | pyinter
7 | sklearn
8 | regreg
9 | # keras
10 | # tensorflow
11 | traitlets
12 |
--------------------------------------------------------------------------------
/sandbox/absurd.py:
--------------------------------------------------------------------------------
1 | import kmeans
2 | import numpy as np
3 |
4 | kmeans = reload(kmeans)
5 |
6 | n = 20
7 | p = 5
8 | n_sample = 50
9 | p_array = []
10 |
11 | t_distance = [0]
12 | #distance = 5
13 |
14 | import matplotlib.pyplot as plt
15 | x = np.arange(0, 1, 1./n_sample);
16 | plt.plot(x, x, 'g')
17 |
18 | for distance in t_distance:
19 | i=0
20 | while i < n_sample:
21 | compteur_bug = 0
22 | if True: #i%1 == 0:
23 | print i, " / ", n_sample, distance
24 | try:
25 | #kmeans = reload(kmeans)
26 | p_value = kmeans.f(n, p, distance)[0]
27 | if p_value > 0 and p_value < 1:
28 | p_array.append(p_value)
29 | i+=1
30 | except:
31 | raise
32 |
33 |
34 |
35 |
36 | p_array = sorted(p_array)
37 | print p_array
38 |
39 | plt.plot(x, p_array, 'b')
40 |
41 |
42 |
43 | plt.show()
44 |
--------------------------------------------------------------------------------
/sandbox/bayesian/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/sandbox/bayesian/__init__.py
--------------------------------------------------------------------------------
/sandbox/bayesian/crime_data_attempt.py:
--------------------------------------------------------------------------------
1 |
2 | import os, numpy as np, pandas, statsmodels.api as sm
3 | import time
4 | import matplotlib.pyplot as plt
5 | import regreg.api as rr
6 | from selection.reduced_optimization.initial_soln import selection
7 | from selection.randomized.api import randomization
8 | from selection.reduced_optimization.lasso_reduced import nonnegative_softmax_scaled, neg_log_cube_probability, selection_probability_lasso, \
9 | sel_prob_gradient_map_lasso, selective_inf_lasso
10 |
11 | crime = pandas.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/communities/communities.data', header=None, na_values=['?'])
12 | crime = crime.iloc[:, 5:]
13 | crime.dropna(inplace=True)
14 | crime.head()
15 |
16 | # define X and y
17 | X = crime.iloc[:, :-1]
18 | n, p = X.shape
19 | X -= X.mean(0)[None, :]
20 | X /= (X.std(0)[None, :] * np.sqrt(n))
21 |
22 | Y = crime.iloc[:, -1]
23 | print("shape", X.shape, Y.shape)
24 |
25 | ols_fit = sm.OLS(Y, X).fit()
26 | print("residual", np.linalg.norm(ols_fit.resid))
27 | sigma_3TC = np.linalg.norm(ols_fit.resid) / np.sqrt(n-p-1)
28 | OLS_3TC = ols_fit.params
29 | print("sigma", sigma_3TC)
30 |
--------------------------------------------------------------------------------
/sandbox/bayesian/mixed_model.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | class instance_mixed(object):
4 |
5 | def __init__(self, n, p, s, sigma=1., rho=0, random_signs=False, scale =True, center=True):
6 | (self.n, self.p, self.s,
7 | self.sigma,
8 | self.rho) = (n, p, s,
9 | sigma,
10 | rho)
11 |
12 | self.X = (np.sqrt(1 - self.rho) * np.random.standard_normal((self.n, self.p)) +
13 | np.sqrt(self.rho) * np.random.standard_normal(self.n)[:, None])
14 | if center:
15 | self.X -= self.X.mean(0)[None, :]
16 | if scale:
17 | self.X /= (self.X.std(0)[None, :] * np.sqrt(self.n))
18 |
19 | self.beta = np.zeros(p)
20 | self.beta[:self.s] = np.linspace(0.5, 5.0, num=s)
21 | if random_signs:
22 | self.beta[:self.s] *= (2 * np.random.binomial(1, 0.5, size=(s,)) - 1.)
23 | self.active = np.zeros(p, np.bool)
24 | self.active[:self.s] = True
25 |
26 | def _noise(self):
27 | return np.random.standard_normal(self.n)
28 |
29 | def generate_response(self):
30 |
31 | Y = (self.X.dot(self.beta) + self._noise()) * self.sigma
32 | return self.X, Y, self.beta * self.sigma, np.nonzero(self.active)[0], self.sigma
33 |
--------------------------------------------------------------------------------
/sandbox/bayesian/read_file.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import os, numpy as np, pandas, statsmodels.api as sm
3 |
4 | #path =r'/Users/snigdhapanigrahi/Results_freq_EQTL/sparsity_5/dim_1/dim_1'
5 | #path =r'/Users/snigdhapanigrahi/Results_reduced_optimization/fixed_lasso/fixed_lasso'
6 |
7 | path =r'/Users/snigdhapanigrahi/Results_reduced_optimization/experiment_dual_0'
8 | #path =r'/Users/snigdhapanigrahi/Results_reduced_optimization/bayesian_dual'
9 | allFiles = glob.glob(path + "/*.txt")
10 |
11 | list_ = []
12 | for file_ in allFiles:
13 | df = np.loadtxt(file_)
14 | list_.append(df)
15 |
16 | def summary_files(list_):
17 |
18 | coverage_ad = 0.
19 | coverage_unad = 0.
20 | length_ad = 0.
21 | length_unad = 0.
22 | loss_ad = 0.
23 | loss_unad = 0.
24 |
25 | length = len(list_)
26 | print("number of simulations", length)
27 |
28 | for i in range(length):
29 | print("iteration", i)
30 | lasso = list_[i].reshape((6, 1))
31 | coverage_ad += lasso[0,0]
32 | coverage_unad += lasso[1,0]
33 | length_ad += lasso[2,0]
34 | length_unad += lasso[3,0]
35 | loss_ad += lasso[4,0]
36 | loss_unad += lasso[5, 0]
37 |
38 | return coverage_ad / length, coverage_unad / length, length_ad / length, length_unad / length,\
39 | loss_ad/length, loss_unad/length
40 |
41 | print(summary_files(list_))
42 |
43 |
44 |
45 |
--------------------------------------------------------------------------------
/sandbox/randomized_tests/test_reconstruction.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import numpy as np
3 |
4 | import regreg.api as rr
5 |
6 | from selection.tests.decorators import wait_for_return_value, register_report
7 | import selection.tests.reports as reports
8 |
9 | from selection.api import multiple_queries
10 | from selection.randomized.glm import split_glm_group_lasso, target as glm_target
11 | from selection.tests.instance import logistic_instance
12 |
13 | @wait_for_return_value()
14 | def test_reconstruction(s=3,
15 | n=200,
16 | p=50,
17 | signal=7,
18 | rho=0.1,
19 | split_frac=0.8,
20 | lam_frac=0.7,
21 | ndraw=100,
22 | burnin=200,
23 | bootstrap=True,
24 | solve_args={'min_its':50, 'tol':1.e-10},
25 | reference_known=False):
26 |
27 | X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=rho, signal=signal)
28 |
29 | m = int(split_frac * n)
30 | nonzero = np.where(beta)[0]
31 |
32 | loss = rr.glm.logistic(X, y)
33 | epsilon = 1. / np.sqrt(n)
34 |
35 | lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 2000)))).max(0))
36 | W = np.ones(p)*lam
37 | W[0] = 0 # use at least some unpenalized
38 | penalty = rr.group_lasso(np.arange(p),
39 | weights=dict(zip(np.arange(p), W)), lagrange=1.)
40 |
41 | M_est = split_glm_group_lasso(loss, epsilon, m, penalty)
42 | mv = multiple_queries([M_est])
43 | mv.solve()
44 |
45 | M_est.selection_variable['variables'] = M_est.selection_variable['variables']
46 | nactive = np.sum(M_est.selection_variable['variables'])
47 |
48 | if nactive==0:
49 | return None
50 |
51 | if set(nonzero).issubset(np.nonzero(M_est.selection_variable['variables'])[0]):
52 |
53 | active_set = np.nonzero(M_est.selection_variable['variables'])[0]
54 |
55 | target_sampler, target_observed = glm_target(loss,
56 | M_est.selection_variable['variables'],
57 | mv)
58 |
59 | target_sample = target_sampler.sample(ndraw=ndraw,
60 | burnin=burnin,
61 | keep_opt=True)
62 |
63 | reconstruction = target_sampler.reconstruct(target_sample)
64 | logdens = target_sampler.log_density(target_sample)
65 | return logdens.shape
66 |
--------------------------------------------------------------------------------
/sandbox/tensorflow_test.py:
--------------------------------------------------------------------------------
1 | import tensorflow_fit
2 | import tensorflow as tf
3 | import numpy as np
4 | ntries, sigma, q = 21, 1, 0.3
5 | Z = np.linspace(-8, 8, 1001)
6 |
7 | def algorithm(Z, ntries=ntries, q=q):
8 | proportion = 0
9 | for _ in range(ntries):
10 | proportion += ((Z + sigma * np.random.standard_normal() > 0) *
11 | (Z + 1 + sigma * np.random.standard_normal() > 0) *
12 | (Z - 0.5 + sigma * np.random.standard_normal() > 0))
13 | proportion /= ntries
14 | return proportion > q
15 |
16 | Z = np.linspace(-8, 8, 1001)
17 |
18 |
19 | # a function that is parameterized by hyperparameters
20 | def create_network(num_hidden,num_outputs):
21 | def create(features):
22 | N = features.shape[0]
23 | X = features # np.reshape(features,(None,1))
24 | hidA = tf.layers.Dense(activation=tf.nn.relu,units=num_hidden, name='hidA')
25 | outlayer = tf.layers.Dense(activation=tf.nn.relu,units=num_outputs, name='hid')
26 | #outlayer = tf.layers.Dense(activation=tf.nn.relu, name='hid')
27 | output = outlayer(hidA(X))
28 | return output
29 | return create
30 |
31 | def fit_algorithm(algorithm, B=500, ntries=ntries, q=q, Zval=Z, link='probit'):
32 |
33 | Z = np.random.standard_normal(B) * 2
34 | Z = np.hstack([Z,
35 | np.random.standard_normal(B),
36 | np.random.standard_normal(B) * 3,
37 | np.random.standard_normal(B) * 0.5])
38 | print('ZS=',Z.shape)
39 |
40 | # is there no "active part" that updates the Z proposals somewhere?
41 | Y = np.array([algorithm(z, ntries=ntries, q=q) for z in Z])
42 | optimize = tensorflow_fit.create_optimizer() # a default optimizer
43 | predictor_f = tensorflow_fit.fit(np.reshape(Z, (Z.shape[0], 1)),
44 | np.reshape(Y, (Y.shape[0], 1)),
45 | create_network(10, 1),
46 | tensorflow_fit.create_l2_loss,
47 | optimize)
48 | print('ZS2=',Zval.shape)
49 | return predictor_f(np.reshape(Zval,(Zval.shape[0],1)))
50 |
51 | def simulate(ntries=ntries, sigma=sigma, truth=0):
52 |
53 | while True:
54 | Z = np.random.standard_normal() + truth
55 | if algorithm(Z, ntries, q=q):
56 | return Z
57 |
58 | Z = np.linspace(-8, 8, 1001)
59 | W1 = fit_algorithm(algorithm, ntries=ntries, q=q, Zval=Z)
60 | print('done')
61 | plt.plot(Z, np.log(W1))
62 | selective_law1 = discrete_family(Z, W1 * scipy.stats.norm.pdf(Z))
63 |
64 |
65 | def pivot1(z, truth=0):
66 | return 1 - selective_law1.cdf(truth, z)
67 |
68 |
--------------------------------------------------------------------------------
/sandbox/test_cover.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from selection.algorithms.tests.test_lasso import test_data_carving
4 |
5 | P = []
6 | covered = []
7 |
8 | num_except = 0
9 | for _ in range(500):
10 | try:
11 | results = test_data_carving(compute_intervals=True,
12 | burnin=5000,
13 | ndraw=10000)[0]
14 | covered.extend(results[-4])
15 | P.extend(results[0])
16 | print np.mean(P), np.std(P), 'null'
17 | print np.mean(covered), 'covered'
18 |
19 | except KeyboardInterrupt:
20 | raise KeyboardInterrupt
21 | except:
22 | num_except += 1; print('num except: %d' % num_except); pass
23 | pass
24 |
25 |
26 |
--------------------------------------------------------------------------------
/sandbox/test_isotonic.py:
--------------------------------------------------------------------------------
1 | from ..isotonic import isotonic
2 | import numpy as np
3 |
4 | def test_isotonic():
5 | y = np.random.standard_normal(50)
6 | I = isotonic(y)
7 | print I.first_jump
8 | print I.largest_jump
9 | print I.combine_jumps(2)
10 |
--------------------------------------------------------------------------------
/selectinf/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/selectinf/algorithms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/algorithms/__init__.py
--------------------------------------------------------------------------------
/selectinf/algorithms/api.py:
--------------------------------------------------------------------------------
1 | from .lasso import (lasso,
2 | data_carving as data_carving_lasso,
3 | additive_noise as additive_noise_lasso)
4 |
5 | from .sqrt_lasso import (choose_lambda as choose_lambda_sqrt_lasso,
6 | solve_sqrt_lasso)
7 |
8 | from .forward_step import (forward_step,
9 | info_crit_stop)
10 |
11 | from .covtest import (covtest,
12 | selected_covtest)
13 |
--------------------------------------------------------------------------------
/selectinf/algorithms/pca.py:
--------------------------------------------------------------------------------
1 | """
2 | Step 1 test based on largest singular vector.
3 |
4 | This is the test described in `Kac Rice`_ for $X=I$ and the penalty being the nuclear norm
5 |
6 | .. math::
7 |
8 | {\cal P}(\beta) = \sim_{i=1}^{\text{min(n,p)}} \sigma_i(\beta)
9 |
10 | for $\beta \in \mathbb{R}^{n \times p}$.
11 |
12 | .. _Kac Rice: http://arxiv.org/abs/1308.3020
13 | """
14 |
15 | import numpy as np
16 | from ..distributions.pvalue import general_pvalue
17 |
18 | def pvalue(X, sigma=1, nsim=5000):
19 | n, p = X.shape
20 | D = np.linalg.svd(X)[1] / sigma
21 | m = n+p-2
22 | H = np.zeros(m)
23 |
24 | nonzero = np.hstack([D[1:],-D[1:]])
25 | H[:nonzero.shape[0]] = nonzero
26 |
27 | return max(0, min(general_pvalue(D[0], D[1], np.inf, H, nsim=nsim), 1))
28 |
--------------------------------------------------------------------------------
/selectinf/algorithms/screening.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from scipy.sparse import eye as sparse_eye
3 |
4 | from ..constraints.affine import constraints
5 |
6 | def _basis_vector(j,n):
7 | """
8 | j-th elementary basis vector in R^n
9 | """
10 | e = np.zeros(n)
11 | e[j] = 1.
12 | return e
13 |
14 | class topK(object):
15 |
16 | alpha = 0.1
17 |
18 | def __init__(self, X, Y, K, sigma, covariance=None):
19 | n, p = X.shape
20 | self.Z = np.dot(X.T, Y)
21 | self.X, self.Y = X, Y
22 | self.sign = np.sign(self.Z)
23 | self.covariance = covariance
24 | self.K = K
25 | order = np.argsort(np.fabs(self.Z))
26 | self.selected = order[-K:]
27 | self.selected_sign = self.sign[order[-K:]]
28 |
29 | partial = np.identity(p)[order[:-K]]
30 | partial = np.vstack([partial, -partial])
31 |
32 | full_matrix = []
33 | for k in range(1, K+1):
34 | partial_cp = partial.copy()
35 | partial_cp[:,order[-k]] = -self.sign[order[-k]]
36 | full_matrix.append(np.dot(partial_cp, X.T))
37 | linear_part = np.vstack(full_matrix)
38 | self.constraints = constraints(linear_part,
39 | np.zeros(linear_part.shape[0]),
40 | covariance=covariance)
41 | self.constraints.covariance *= sigma**2
42 | self.sigma = sigma
43 |
44 | @property
45 | def intervals(self, doc="OLS intervals for active variables adjusted for selection."):
46 | if not hasattr(self, "_intervals"):
47 | p = self.Z.shape[0]
48 | self._intervals = []
49 | C = self.constraints
50 | for j in self.selected:
51 | s = self.sign[j]
52 | eta = self.X[:,j] * s
53 | _interval = C.interval(eta,
54 | self.Y,
55 | self.alpha)
56 | self._intervals.append((j, (eta*self.Y).sum(),
57 | _interval))
58 | return self._intervals
59 |
60 | def test():
61 | n, p, sigma = 40, 100, 1.4
62 | X = np.random.standard_normal((n,p))
63 | Y = np.random.standard_normal(n) * sigma
64 |
65 | top10 = topK(X, Y, 10, sigma)
66 | return top10, top10.intervals
67 |
--------------------------------------------------------------------------------
/selectinf/algorithms/stopping_rules.py:
--------------------------------------------------------------------------------
1 | """
2 | Stopping rules used in sequential FDR control.
3 |
4 | See `http://arxiv.org/abs/1309.5352`_
5 |
6 | """
7 |
8 | import numpy as np
9 |
10 | def simple_stop(pvalues, alpha):
11 | """
12 | Compute the number of rejections using
13 | simple stop, the first time a p-value is above
14 | alpha.
15 |
16 | Parameters
17 | ----------
18 |
19 | pvalues : np.float
20 |
21 | alpha : float
22 |
23 | Returns
24 | -------
25 |
26 | num_rejections : int
27 |
28 | """
29 | if not np.all(pvalues <= alpha):
30 | return np.min(np.nonzero(pvalues > alpha)[0])
31 | else:
32 | return pvalues.shape[0]
33 |
34 | def strong_stop(pvalues, alpha):
35 | """
36 |
37 | Compute the number of rejections using
38 | strong stop of `http://arxiv.org/abs/1309.5352`_
39 |
40 | >>> strong_stop(np.array([0.5,0.6,0.7,0.8,0.9]), 0.05)
41 | 0
42 | >>> strong_stop(np.array([0.001, 0.002, 0.0015, 0.0013, 0.05, 0.6]), 0.05)
43 | 3
44 |
45 | In R:
46 |
47 | > strongstop(c(0.001, 0.002, 0.0015, 0.0013, 0.05, 0.6), 0.05)
48 | [1] 3
49 | > strongstop(c(0.5,0.6,0.7,0.8,0.9), 0.05)
50 | [1] 0
51 |
52 | Parameters
53 | ----------
54 |
55 | pvalues : np.float
56 |
57 | alpha : float
58 |
59 | Returns
60 | -------
61 |
62 | num_rejections : int
63 |
64 | Based on R code:
65 | ----------------
66 |
67 | strongstop <- function(p.values,alpha) {
68 | d <- length(p.values)
69 | lhs <- exp(rev(cumsum(rev(log(p.values)/(1:d))))) # LHS from G'Sell et al.
70 | rhs <- alpha * (1:d) / d # RHS from G'Sell et al.
71 | return(max(c(0,which(lhs <= rhs))))
72 | }
73 |
74 | """
75 | n = pvalues.shape[0]
76 | LHS = np.exp(np.cumsum((np.log(pvalues) / np.linspace(1., n, n))[::-1])[::-1])
77 | RHS = alpha * np.linspace(1., n, n) / n
78 | if np.any(LHS <= RHS):
79 | return max(np.nonzero(LHS <= RHS)[0])+1
80 | return 0
81 |
82 |
83 | def forward_stop(pvalues, alpha):
84 | """
85 |
86 | Compute the number of rejections using
87 | forward stop of `http://arxiv.org/abs/1309.5352`_
88 |
89 | >>> forward_stop(np.array([0.5,0.6,0.7,0.8,0.9]), 0.05)
90 | 0
91 | >>> forward_stop(np.array([0.001, 0.002, 0.0015, 0.0013, 0.05, 0.6]), 0.05)
92 | 5
93 |
94 | In R:
95 |
96 | > forwardstop(c(0.5,0.6,0.7,0.8,0.9), 0.05)
97 | [1] 0
98 | > forwardstop(c(0.001, 0.002, 0.0015, 0.0013, 0.05, 0.6), 0.05)
99 | [1] 5
100 | >
101 |
102 | Parameters
103 | ----------
104 |
105 | pvalues : np.float
106 |
107 | alpha : float
108 |
109 | Returns
110 | -------
111 |
112 | num_rejections : int
113 |
114 | Based on R code:
115 | ----------------
116 |
117 | forwardstop <- function(p, alpha) {
118 | m <- length(p)
119 | sums <- -(1/(1:m))*cumsum(log(1-p))
120 | return(max(c(0, which(sums < alpha))))
121 | }
122 |
123 | """
124 |
125 | n = pvalues.shape[0]
126 | sums = (-1. / np.linspace(1, n, n)) * np.cumsum(np.log(1 - pvalues))
127 | if np.any(sums < alpha):
128 | return max(np.nonzero(sums < alpha)[0])+1
129 | return 0
130 |
131 |
132 |
--------------------------------------------------------------------------------
/selectinf/algorithms/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/algorithms/tests/__init__.py
--------------------------------------------------------------------------------
/selectinf/algorithms/tests/test_IC.py:
--------------------------------------------------------------------------------
1 | from copy import copy
2 |
3 | import numpy as np
4 | from ...tests.instance import gaussian_instance
5 | from ...constraints.affine import sample_from_constraints
6 | from ...distributions.discrete_family import discrete_family
7 |
8 | from ..forward_step import info_crit_stop
9 |
10 | def test_data_carving_IC(n=600,
11 | p=100,
12 | s=10,
13 | sigma=5,
14 | rho=0.25,
15 | signal=(3.5,5.),
16 | split_frac=0.9,
17 | ndraw=25000,
18 | burnin=5000,
19 | df=np.inf,
20 | coverage=0.90,
21 | compute_intervals=False):
22 |
23 | X, y, beta, active, sigma, _ = gaussian_instance(n=n,
24 | p=p,
25 | s=s,
26 | sigma=sigma,
27 | rho=rho,
28 | signal=signal,
29 | df=df,
30 | equicorrelated=False)
31 | mu = np.dot(X, beta)
32 | splitn = int(n*split_frac)
33 | indices = np.arange(n)
34 | np.random.shuffle(indices)
35 | stage_one = indices[:splitn]
36 |
37 | FS = info_crit_stop(y, X, sigma, cost=np.log(n), subset=stage_one)
38 |
39 | con = FS.constraints()
40 |
41 | X_E = X[:,FS.active]
42 | X_Ei = np.linalg.pinv(X_E)
43 | beta_bar = X_Ei.dot(y)
44 | mu_E = X_E.dot(beta_bar)
45 | sigma_E = np.linalg.norm(y-mu_E) / np.sqrt(n - len(FS.active))
46 |
47 | con.mean[:] = mu_E
48 | con.covariance = sigma_E**2 * np.identity(n)
49 |
50 | print(sigma_E, sigma)
51 | Z = sample_from_constraints(con,
52 | y,
53 | ndraw=ndraw,
54 | burnin=burnin)
55 |
56 | pvalues = []
57 | for idx, var in enumerate(FS.active):
58 | active = copy(FS.active)
59 | active.remove(var)
60 | X_r = X[:,active] # restricted design
61 | mu_r = X_r.dot(np.linalg.pinv(X_r).dot(y))
62 | delta_mu = (mu_r - mu_E) / sigma_E**2
63 |
64 | W = np.exp(Z.dot(delta_mu))
65 | fam = discrete_family(Z.dot(X_Ei[idx].T), W)
66 | pval = fam.cdf(0, x=beta_bar[idx])
67 | pval = 2 * min(pval, 1 - pval)
68 | pvalues.append((pval, beta[var]))
69 |
70 | return pvalues
71 |
--------------------------------------------------------------------------------
/selectinf/algorithms/tests/test_change_point.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from ..change_point import one_jump_instance, change_point
3 |
4 | def test_change_point(delta=0.1, p=60, sigma=1, plot=False):
5 |
6 | y, signal = one_jump_instance(delta, p, sigma)
7 | CP = change_point(y)
8 | fit, relaxed_fit, summary, segments = CP.fit()
9 | if plot:
10 | import matplotlib.pyplot as plt
11 | plt.figure(figsize=(8,6))
12 | plt.scatter(np.arange(y.shape[0]), y)
13 | plt.plot(fit, 'r', label='Penalized', linewidth=3)
14 | plt.plot(relaxed_fit, 'k', label='Relaxed', linewidth=3)
15 | plt.plot(signal, 'g', label='Truth', linewidth=3)
16 | plt.legend(loc='upper left')
17 | return segments
18 |
--------------------------------------------------------------------------------
/selectinf/algorithms/tests/test_data_carving.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from ...tests.instance import gaussian_instance
3 | from ..lasso import data_carving, data_splitting
4 |
5 | def sim():
6 | X, Y, _, active, sigma = gaussian_instance()
7 | print(sigma)
8 | G = data_carving.gaussian(X, Y, 1., split_frac=0.9, sigma=sigma)
9 | G.fit()
10 | if set(active).issubset(G.active) and G.active.shape[0] > len(active):
11 | return [G.hypothesis_test(G.active[len(active)], burnin=5000, ndraw=10000)]
12 | return []
13 |
14 | def sim2():
15 | X, Y, _, active, sigma = gaussian_instance(n=150, s=3)
16 | G = data_splitting.gaussian(X, Y, 5., split_frac=0.5, sigma=sigma)
17 | G.fit(use_full=True)
18 | if set(active).issubset(G.active) and G.active.shape[0] > len(active):
19 | return [G.hypothesis_test(G.active[len(active)])]
20 | return []
21 |
22 |
--------------------------------------------------------------------------------
/selectinf/algorithms/tests/test_screening.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from ..screening import topK
3 | import nose.tools as nt
4 |
5 | def test_class(threshold=1):
6 |
7 | Z = np.random.standard_normal(10)
8 | C = np.eye(10)
9 | M = topK(C, Z, 1, 1)
10 | M.constraints
11 |
12 | M.intervals
13 | return M
14 |
15 |
--------------------------------------------------------------------------------
/selectinf/algorithms/tests/test_softmax.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import numpy.testing.decorators as dec
3 |
4 | from itertools import product
5 | from ..softmax import softmax_objective
6 |
7 | @dec.skipif(True, "need some tests for softmax objective")
8 | def test_softmax():
9 | raise ValueError('need some tests for softmax objective')
10 |
--------------------------------------------------------------------------------
/selectinf/algorithms/tests/test_sqrt_lasso.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 |
3 | import numpy as np
4 | import numpy.testing.decorators as dec
5 | import nose.tools as nt
6 |
7 | import regreg.api as rr
8 |
9 | from ...tests.instance import gaussian_instance as instance
10 | from ...tests.decorators import (set_sampling_params_iftrue,
11 | set_seed_iftrue,
12 | wait_for_return_value)
13 |
14 | from ...tests.flags import SET_SEED, SMALL_SAMPLES
15 | from ..sqrt_lasso import (solve_sqrt_lasso,
16 | choose_lambda,
17 | goodness_of_fit,
18 | sqlasso_objective,
19 | sqlasso_objective_skinny,
20 | solve_sqrt_lasso_fat,
21 | solve_sqrt_lasso_skinny)
22 | from ..lasso import lasso
23 |
24 | @wait_for_return_value()
25 | @set_sampling_params_iftrue(SMALL_SAMPLES, nsim=10, burnin=10, ndraw=10)
26 | @dec.slow
27 | def test_goodness_of_fit(n=20, p=25, s=10, sigma=20.,
28 | nsim=10, burnin=2000, ndraw=8000):
29 | P = []
30 | while True:
31 | y = np.random.standard_normal(n) * sigma
32 | beta = np.zeros(p)
33 | X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None]
34 | X /= (X.std(0)[None,:] * np.sqrt(n))
35 | y += np.dot(X, beta) * sigma
36 | lam_theor = .7 * choose_lambda(X, quantile=0.9)
37 | L = lasso.sqrt_lasso(X, y, lam_theor)
38 | L.fit()
39 | pval = goodness_of_fit(L,
40 | lambda x: np.max(np.fabs(x)),
41 | burnin=burnin,
42 | ndraw=ndraw)
43 | P.append(pval)
44 | Pa = np.array(P)
45 | Pa = Pa[~np.isnan(Pa)]
46 | if (~np.isnan(np.array(Pa))).sum() >= nsim:
47 | break
48 |
49 | return Pa, np.zeros_like(Pa, np.bool)
50 |
51 | @set_seed_iftrue(SET_SEED)
52 | def test_skinny_fat():
53 |
54 | X, Y = instance()[:2]
55 | n, p = X.shape
56 | lam = choose_lambda(X)
57 | obj1 = sqlasso_objective(X, Y)
58 | obj2 = sqlasso_objective_skinny(X, Y)
59 | soln1 = solve_sqrt_lasso_fat(X, Y, weights=np.ones(p) * lam, solve_args={'min_its':500})[0]
60 | soln2 = solve_sqrt_lasso_skinny(X, Y, weights=np.ones(p) * lam, solve_args={'min_its':500})[0]
61 |
62 | np.testing.assert_allclose(soln1, soln2, rtol=1.e-3)
63 |
64 | X, Y = instance(p=50)[:2]
65 | n, p = X.shape
66 | lam = choose_lambda(X)
67 | obj1 = sqlasso_objective(X, Y)
68 | obj2 = sqlasso_objective_skinny(X, Y)
69 | soln1 = solve_sqrt_lasso_fat(X, Y, weights=np.ones(p) * lam, solve_args={'min_its':500})[0]
70 | soln2 = solve_sqrt_lasso_skinny(X, Y, weights=np.ones(p) * lam, solve_args={'min_its':500})[0]
71 |
72 | np.testing.assert_allclose(soln1, soln2, rtol=1.e-3)
73 |
74 |
75 |
76 |
--------------------------------------------------------------------------------
/selectinf/api.py:
--------------------------------------------------------------------------------
1 | from .constraints.api import *
2 | from .algorithms.api import *
3 | from .distributions.api import *
4 | from .randomized.api import *
5 | from .truncated.api import *
6 | from .sampling.api import *
7 |
--------------------------------------------------------------------------------
/selectinf/base.py:
--------------------------------------------------------------------------------
1 | import regreg.api as rr
2 | import regreg.affine as ra
3 |
4 | def restricted_estimator(loss, active, solve_args={'min_its':50, 'tol':1.e-10}):
5 | """
6 | Fit a restricted model using only columns `active`.
7 |
8 | Parameters
9 | ----------
10 |
11 | Mest_loss : objective function
12 | A GLM loss.
13 |
14 | active : ndarray
15 | Which columns to use.
16 |
17 | solve_args : dict
18 | Passed to `solve`.
19 |
20 | Returns
21 | -------
22 |
23 | soln : ndarray
24 | Solution to restricted problem.
25 |
26 | """
27 | X, Y = loss.data
28 |
29 | if not loss._is_transform and hasattr(loss, 'saturated_loss'): # M_est is a glm
30 | X_restricted = X[:,active]
31 | loss_restricted = rr.affine_smooth(loss.saturated_loss, X_restricted)
32 | else:
33 | I_restricted = ra.selector(active, ra.astransform(X).input_shape[0], ra.identity((active.sum(),)))
34 | loss_restricted = rr.affine_smooth(loss, I_restricted.T)
35 | beta_E = loss_restricted.solve(**solve_args)
36 |
37 | return beta_E
38 |
--------------------------------------------------------------------------------
/selectinf/constraints/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/selectinf/constraints/api.py:
--------------------------------------------------------------------------------
1 | from .affine import constraints as affine_constraints
2 |
--------------------------------------------------------------------------------
/selectinf/constraints/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/constraints/tests/__init__.py
--------------------------------------------------------------------------------
/selectinf/constraints/tests/test_quasi.py:
--------------------------------------------------------------------------------
1 | """
2 | test_quasi.py
3 | Date: 2014-10-17
4 | Author: Xiaoying Tian
5 | """
6 |
7 | from __future__ import division, print_function
8 | import nose.tools as nt
9 | import numpy as np
10 |
11 | from ..quasi_affine import (quadratic_inequality_solver,
12 | intersection,
13 | sqrt_inequality_solver)
14 | from ...tests.flags import SET_SEED
15 | from ...tests.decorators import set_seed_iftrue
16 |
17 | def test_quadratic_solver():
18 | yield np.testing.assert_almost_equal, quadratic_inequality_solver(7,0.,-28),[[-2.0,2.0]]
19 | yield (np.testing.assert_almost_equal, quadratic_inequality_solver(1,-1,-5.),
20 | [[-1.7912878474779199, 2.7912878474779199]])
21 | yield (np.testing.assert_almost_equal, quadratic_inequality_solver(1,-1,5.), [[]])
22 | yield (np.testing.assert_almost_equal, quadratic_inequality_solver(-1,-1,-5.),
23 | [[float("-inf"), float("inf")]])
24 | yield (np.testing.assert_almost_equal,
25 | quadratic_inequality_solver(-1,6,-5.), [[float("-inf"), 1.0], [5.0, float("inf")]])
26 | yield (np.testing.assert_almost_equal, quadratic_inequality_solver(0.,6,-5.),
27 | [[float("-inf"), 0.8333333333333334]])
28 | yield (np.testing.assert_almost_equal,
29 | quadratic_inequality_solver(0.,6,5.),[[float("-inf"), -0.8333333333333334]])
30 | yield nt.assert_raises, ValueError, quadratic_inequality_solver, 0., 0., 5.
31 | yield (np.testing.assert_almost_equal,
32 | quadratic_inequality_solver(1,3,2,"greater than"), [[float("-inf"), -2.], [-1., float("inf")]])
33 |
34 | def test_intersection():
35 | yield np.testing.assert_almost_equal, intersection([], []), []
36 | yield np.testing.assert_almost_equal, intersection([], [1,2]), []
37 | yield np.testing.assert_almost_equal, intersection([2,3], []), []
38 | yield np.testing.assert_almost_equal, intersection([2,3], [1,2]), []
39 | yield np.testing.assert_almost_equal, intersection([3,4], [1,2]), []
40 | yield np.testing.assert_almost_equal, intersection([-1,4], [1,2]), [1,2]
41 | yield np.testing.assert_almost_equal, intersection([1,4], [-1,2]), [1,2]
42 | yield np.testing.assert_almost_equal, intersection([1,4], [-1,12]), [1,4]
43 |
44 | @set_seed_iftrue(SET_SEED)
45 | def test_sqrt_solver():
46 | a, b, c = np.random.random_integers(-50, 50, 3)
47 | n = 100
48 | intervals = sqrt_inequality_solver(a, b, c, n)
49 | print(a, b, c, intervals)
50 | for x in np.linspace(-20, 20):
51 | hold = (func(x, a, b, c, n) <= 0)
52 | in_interval = any([contains(x, I) for I in intervals])
53 | yield (np.testing.assert_almost_equal, np.array(hold, np.float),
54 | np.array(in_interval, np.float))
55 |
56 |
57 | def contains(x, I):
58 | if I:
59 | return (x >= I[0] and x <= I[1])
60 | else:
61 | return False
62 |
63 |
64 | def func(x, a, b, c, n):
65 | return a*x + b * np.sqrt(n + x**2) - c
66 |
67 |
--------------------------------------------------------------------------------
/selectinf/constraints/tests/test_unknown_sigma.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import numpy as np
3 | from .. import affine
4 | from ..quasi_affine import constraints_unknown_sigma
5 |
6 | def simulate(A=None, theta=0, R=None, eta=None):
7 |
8 | n = 22
9 | p = 4
10 | k = 18
11 | if R is None:
12 | R = np.linalg.svd(np.random.standard_normal((n,n-k)), full_matrices=0)[0]
13 | R = np.dot(R, R.T)
14 | R = 0.1 * R + np.diag([0]*p + [1.] * (n-p))
15 | R = np.linalg.svd(R, full_matrices=0)[0]
16 | R = R[:,:(n-p)]
17 | R = np.dot(R, R.T)
18 | if A is None:
19 | A = np.diag([1.]*p) + 0.05 * np.random.standard_normal((p,p))
20 | sel = np.identity(n)[:p]
21 | A = np.dot(A, sel)
22 | b = -np.ones(p)
23 | n = R.shape[0]
24 | df = np.diag(R).sum()
25 |
26 | if eta is None:
27 | eta = np.random.standard_normal(n) * 3
28 | eta = eta - np.dot(R, eta)
29 |
30 | counter = 0
31 | while True:
32 | counter += 1
33 | Z = np.random.standard_normal(n) * 1.5 + eta * theta / np.linalg.norm(eta)**2
34 | sigma_hat = np.linalg.norm(np.dot(R, Z)) / np.sqrt(df)
35 | if np.all(np.dot(A, Z) <= b * sigma_hat):
36 | return A, b, R, Z, eta, counter
37 | if counter >= 1000:
38 | break
39 | return None
40 |
41 |
42 | def instance(theta=0, A=None, R=None, eta=None):
43 |
44 | result = None
45 | while not result:
46 | result = simulate(theta=theta, A=A, R=R, eta=eta)
47 |
48 | A, b, R, Z, eta, counter = result
49 | from ..truncated_T import truncated_T
50 |
51 | intervals, obs = constraints_unknown_sigma(A, b, Z, eta, R,
52 | value_under_null=theta)
53 | df = np.diag(R).sum()
54 | truncT = truncated_T(np.array([(interval.lower_value,
55 | interval.upper_value) for interval in intervals]), df)
56 | sf = truncT.sf(obs)
57 | pval = 2 * min(sf, 1.-sf)
58 | if pval < 1.e-6:
59 | print(sf, obs, intervals)
60 | return float(pval)
61 |
62 | if __name__ == "__main__":
63 |
64 | P = []
65 |
66 | n = 22
67 | p = 4
68 | k = 18
69 |
70 | A = np.diag([1.]*p) + 0.05 * np.random.standard_normal((p,p))
71 | sel = np.identity(n)[:p]
72 | A = np.dot(A, sel)
73 |
74 | R = np.linalg.svd(np.random.standard_normal((n,n-k)), full_matrices=0)[0]
75 | R = np.dot(R, R.T)
76 | R = 0.1 * R + np.diag([0]*p + [1.] * (n-p))
77 | R = np.linalg.svd(R, full_matrices=0)[0]
78 | R = R[:,:(n-p)]
79 | R = np.dot(R, R.T)
80 |
81 | eta = np.random.standard_normal(n) * 3
82 | eta = eta - np.dot(R, eta)
83 |
84 | for i in range(1000):
85 | P.append(instance(theta=3.,R=R, A=A, eta=eta))
86 | print(i, np.mean(P), np.std(P))
87 | U = np.linspace(0,1,51)
88 |
89 | # make any plots not use display
90 |
91 | from matplotlib import use
92 | use('Agg')
93 | import matplotlib.pyplot as plt
94 |
95 | # used for ECDF
96 |
97 | import statsmodels.api as sm
98 | plt.plot(U, sm.distributions.ECDF(P)(U))
99 | plt.plot([0,1],[0,1])
100 | plt.show()
101 |
--------------------------------------------------------------------------------
/selectinf/distributions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/distributions/__init__.py
--------------------------------------------------------------------------------
/selectinf/distributions/api.py:
--------------------------------------------------------------------------------
1 | from .discrete_family import discrete_family
2 | from .intervals import intervals_from_sample
3 |
--------------------------------------------------------------------------------
/selectinf/distributions/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/distributions/tests/__init__.py
--------------------------------------------------------------------------------
/selectinf/distributions/tests/test_chains.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from ..chain import parallel_test, serial_test
4 | from ...constraints.affine import constraints, gaussian_hit_and_run
5 |
6 | def test_gaussian_chain():
7 |
8 | n = 30
9 |
10 | A = np.eye(n)[:3]
11 | b = np.ones(A.shape[0])
12 |
13 | con = constraints(A, b)
14 | state = np.random.standard_normal(n)
15 | state[:3] = 0
16 |
17 | gaussian_chain = gaussian_hit_and_run(con, state, nstep=100)
18 |
19 | counter = 0
20 | for step in gaussian_chain:
21 | counter += 1
22 |
23 | if counter >= 100:
24 | break
25 |
26 | test_statistic = lambda z: np.sum(z)
27 |
28 | parallel = parallel_test(gaussian_chain,
29 | gaussian_chain.state,
30 | test_statistic,
31 | ndraw=20)
32 |
33 | serial = serial_test(gaussian_chain,
34 | gaussian_chain.state,
35 | test_statistic,
36 | ndraw=20)
37 |
38 | return parallel, serial
39 |
--------------------------------------------------------------------------------
/selectinf/distributions/tests/test_discreteExFam.py:
--------------------------------------------------------------------------------
1 | # Testing
2 | from __future__ import print_function
3 | import numpy as np
4 | import nose.tools as nt
5 | from scipy.stats import poisson
6 | from ..discrete_family import discrete_family
7 |
8 | def test_MLE():
9 |
10 | X = np.arange(100)
11 | observed = 4
12 | pois = discrete_family(X, poisson.pmf(X, 4.5))
13 |
14 | MLE, var = pois.MLE(observed, tol=1.e-7, max_iter=30)[:2]
15 | mean_param = pois.E(MLE, lambda x: x)
16 | nt.assert_true(np.fabs(mean_param - observed) / observed < 1.e-4)
17 | nt.assert_true(np.fabs(mean_param - var*mean_param**2) < 1.e-3)
18 |
19 | def test_discreteExFam():
20 |
21 | X = np.arange(100)
22 | pois = discrete_family(X, poisson.pmf(X, 1))
23 | tol = 1e-5
24 |
25 | print(pois._leftCutFromRight(theta=0.4618311,rightCut=(5,.5)), pois._test2RejectsLeft(theta=2.39,observed=5,auxVar=.5))
26 | print (pois.interval(observed=5,alpha=.05,randomize=True,auxVar=.5))
27 |
28 | print (abs(1-sum(pois.pdf(0))))
29 | pois.ccdf(0, 3, .4)
30 |
31 | print(pois.MLE(1.3))
32 |
33 | print (pois.Var(np.log(2), lambda x: x))
34 | print (pois.Cov(np.log(2), lambda x: x, lambda x: x))
35 |
36 | lc = pois._rightCutFromLeft(0, (0,.01))
37 | print ((0,0.01), pois._leftCutFromRight(0, lc))
38 |
39 | pois._rightCutFromLeft(-10, (0,.01))
40 | #[pois.test2Cutoffs(t)[1] for t in range(-10,3)]
41 | pois._critCovFromLeft(-10, (0,.01))
42 |
43 | pois._critCovFromLeft(0, (0,.01))
44 | pois._critCovFromRight(0, lc)
45 |
46 | pois._critCovFromLeft(5, (5, 1))
47 |
48 | pois._test2RejectsLeft(np.log(5),5)
49 | pois._test2RejectsRight(np.log(5),5)
50 |
51 | pois._test2RejectsLeft(np.log(20),5)
52 | pois._test2RejectsRight(np.log(.1),5)
53 |
54 | print (pois._inter2Upper(5,auxVar=.5))
55 | print (pois.interval(5,auxVar=.5))
56 |
57 |
--------------------------------------------------------------------------------
/selectinf/distributions/tests/test_multiparameter.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from ..discrete_multiparameter import multiparameter_family
3 |
4 | def test_multiparameter():
5 |
6 | X = [[3,4],[4,5],[5,8.]]
7 | w = [0.3, 0.5, 0.4]
8 | theta = [0.1,0.3]
9 |
10 | family = multiparameter_family(X, w)
11 | mu1 = family.mean(theta)
12 |
13 | X_arr = np.array(X)
14 | exponent = np.dot(X_arr, theta)
15 |
16 | w_arr = np.array(w) * np.exp(exponent)
17 | w_arr /= w_arr.sum()
18 |
19 | mu2 = (X_arr * w_arr[:,None]).sum(0)
20 |
21 | np.testing.assert_allclose(mu1, mu2)
22 |
23 | info1 = family.information(theta)[1]
24 |
25 | T = np.zeros((3,2,2))
26 | for i in range(2):
27 | for j in range(2):
28 | T[:,i,j] = X_arr[:,i] * X_arr[:,j]
29 |
30 | second_moment = (T * w_arr[:,None,None]).sum(0)
31 | info2 = second_moment - np.outer(mu1, mu1)
32 |
33 | np.testing.assert_allclose(info1, info2)
34 |
35 | mu3 = np.array([family.E(theta, lambda x: x[:,i]) for i in range(2)])
36 | np.testing.assert_allclose(mu1, mu3)
37 |
38 | cov01 = np.array(family.Cov(theta, lambda x: x[:,0], lambda x: x[:,1]))
39 | np.testing.assert_allclose(cov01, info1[0,1])
40 |
41 | var0 = np.array(family.Var(theta, lambda x: x[:,0]))
42 | np.testing.assert_allclose(var0, info1[0,0])
43 |
44 | observed = np.array([4.2,6.3])
45 | theta_hat = family.MLE(observed, tol=1.e-12, max_iters=50)
46 |
47 | np.testing.assert_allclose(observed, family.mean(theta_hat))
48 |
--------------------------------------------------------------------------------
/selectinf/info.py:
--------------------------------------------------------------------------------
1 | """ This file contains defines parameters for regreg that we use to fill
2 | settings in setup.py, the regreg top-level docstring, and for building the docs.
3 | In setup.py in particular, we exec this file, so it cannot import regreg
4 | """
5 |
6 | # regreg version information. An empty _version_extra corresponds to a
7 | # full release. '.dev' as a _version_extra string means this is a development
8 | # version
9 | _version_major = 0
10 | _version_minor = 1
11 | _version_micro = 0
12 | _version_extra = ''
13 |
14 | # Format expected by setup.py and doc/source/conf.py: string of form "X.Y.Z"
15 | __version__ = "%s.%s.%s%s" % (_version_major,
16 | _version_minor,
17 | _version_micro,
18 | _version_extra)
19 |
20 | CLASSIFIERS = ["Development Status :: 3 - Alpha",
21 | "Environment :: Console",
22 | "Intended Audience :: Science/Research",
23 | "License :: OSI Approved :: BSD License",
24 | "Operating System :: OS Independent",
25 | "Programming Language :: Python",
26 | "Topic :: Scientific/Engineering"]
27 |
28 | description = 'Testing a fixed value of lambda'
29 |
30 | # Note: this long_description is actually a copy/paste from the top-level
31 | # README.txt, so that it shows up nicely on PyPI. So please remember to edit
32 | # it only in one place and sync it correctly.
33 | long_description = \
34 | """
35 | ============
36 | Fixed lambda
37 | ============
38 |
39 | This mini-package contains a module to perform
40 | a fixed lambda test for the LASSO.
41 | """
42 |
43 | # versions
44 | NUMPY_MIN_VERSION='1.7.1'
45 | SCIPY_MIN_VERSION = '0.9'
46 | CYTHON_MIN_VERSION = '0.21'
47 | MPMATH_MIN_VERSION = "0.18"
48 | PYINTER_MIN_VERSION = "0.1.6"
49 | SKLEARN_MIN_VERSION = '0.19'
50 |
51 | NAME = 'selectinf'
52 | MAINTAINER = "Jonathan Taylor"
53 | MAINTAINER_EMAIL = ""
54 | DESCRIPTION = description
55 | LONG_DESCRIPTION = long_description
56 | URL = "http://github.org/jonathan.taylor/selective-inference"
57 | DOWNLOAD_URL = ""
58 | LICENSE = "BSD license"
59 | CLASSIFIERS = CLASSIFIERS
60 | AUTHOR = "fixed_lambda developers"
61 | AUTHOR_EMAIL = ""
62 | PLATFORMS = "OS Independent"
63 | MAJOR = _version_major
64 | MINOR = _version_minor
65 | MICRO = _version_micro
66 | ISRELEASE = _version_extra == ''
67 | VERSION = __version__
68 | STATUS = 'alpha'
69 | PROVIDES = ["fixed_lambda"]
70 | REQUIRES = ["numpy (>=%s)" % NUMPY_MIN_VERSION,
71 | "scipy (>=%s)" % SCIPY_MIN_VERSION,
72 | "mpmath (>=%s)" % MPMATH_MIN_VERSION,
73 | "pyinter"]
74 |
--------------------------------------------------------------------------------
/selectinf/learning/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/learning/__init__.py
--------------------------------------------------------------------------------
/selectinf/learning/fitters.py:
--------------------------------------------------------------------------------
1 | import uuid, functools
2 |
3 | import numpy as np
4 | from scipy.stats import norm as ndist
5 | from sklearn import ensemble
6 |
7 | def gbm_fit_sk(T, Y, **params):
8 |
9 | fitfns = []
10 | for j in range(Y.shape[1]):
11 | print('variable %d' % (j+1,))
12 | y = Y[:,j].astype(np.int)
13 | clf = ensemble.GradientBoostingClassifier(**params)
14 | clf.fit(T, y)
15 |
16 | def fit_fn(clf, t):
17 | return clf.predict_proba(t)[:,1]
18 |
19 | fitfns.append(functools.partial(fit_fn, clf))
20 |
21 | return fitfns
22 |
23 | def random_forest_fit_sk(T, Y, **params):
24 |
25 | fitfns = []
26 | for j in range(Y.shape[1]):
27 | print('variable %d' % (j+1,))
28 | y = Y[:,j].astype(np.int)
29 | clf = ensemble.RandomForestClassifier(**params)
30 | clf.fit(T, y)
31 |
32 | def fit_fn(clf, t):
33 | return clf.predict_proba(t)[:,1]
34 |
35 | fitfns.append(functools.partial(fit_fn, clf))
36 |
37 | return fitfns
38 |
39 |
--------------------------------------------------------------------------------
/selectinf/learning/keras_fit.py:
--------------------------------------------------------------------------------
1 | '''
2 | Based on https://stackoverflow.com/questions/44164749/how-does-keras-handle-multilabel-classification
3 | '''
4 |
5 | import warnings
6 |
7 | try:
8 |
9 | from keras.models import Sequential
10 | from keras.layers import Dense, Dropout
11 | from keras.optimizers import SGD
12 |
13 | def keras_fit(T, Y, **kwargs):
14 |
15 | if Y.ndim == 1:
16 | Y.shape = (-1, 1)
17 |
18 | fitfns = []
19 |
20 | for j in range(Y.shape[1]):
21 | y = Y[:,j]
22 |
23 | fit_fn = keras_fit_multilabel(T, y, **kwargs)[0]
24 | fitfns.append(fit_fn)
25 | return fitfns
26 |
27 | def keras_fit_multilabel(T, Y, sizes=[500, 500], epochs=50, activation='relu', dropout=0, **ignored):
28 |
29 | if Y.ndim == 1:
30 | Y.shape = (-1, 1)
31 |
32 | model = Sequential()
33 | for s in sizes:
34 | model.add(Dense(s, activation=activation, input_dim=T.shape[1]))
35 | if dropout > 0:
36 | model.add(Dropout(dropout))
37 |
38 | # the final layer
39 | model.add(Dense(Y.shape[1], activation='sigmoid'))
40 |
41 | sgd = SGD(lr=0.03, decay=1e-3, momentum=0.6, nesterov=True)
42 | model.compile(loss='binary_crossentropy',
43 | optimizer=sgd)
44 |
45 | model.fit(T, Y, epochs=epochs)
46 | fitfns = [lambda T_test: model.predict(T_test)[:,j] for j in range(Y.shape[1])]
47 | return fitfns
48 |
49 | except ImportError:
50 | warnings.warn('module `keras` not importable, `keras_fit` and `keras_fit_multilabel` will not be importable')
51 |
--------------------------------------------------------------------------------
/selectinf/randomized/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/randomized/__init__.py
--------------------------------------------------------------------------------
/selectinf/randomized/api.py:
--------------------------------------------------------------------------------
1 | from .query import multiple_queries, query
2 | from .randomization import randomization
3 | from .lasso import lasso, split_lasso
4 | from .screening import marginal_screening, stepup, topK
5 | from .slope import slope
6 | from .group_lasso import group_lasso
7 |
--------------------------------------------------------------------------------
/selectinf/randomized/tests/__init__.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from ...tests.decorators import wait_for_return_value, set_sampling_params_iftrue
4 | from ...tests.instance import logistic_instance, gaussian_instance
5 |
--------------------------------------------------------------------------------
/selectinf/randomized/tests/sandbox/test_cv_glmnet.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import regreg.api as rr
3 |
4 | from ...algorithms.cv_glmnet import CV_glmnet
5 | from ...tests.instance import gaussian_instance
6 |
7 | def test_cv_glmnet():
8 | np.random.seed(2)
9 | n, p = 3000, 1000
10 | X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=30, rho=0., sigma=1)
11 | loss = rr.glm.gaussian(X,y)
12 | CV_glmnet_gaussian = CV_glmnet(loss, 'gaussian')
13 | lam_CV, lam_1SD, lam_seq, CV_err, SD = CV_glmnet_gaussian.using_glmnet()
14 | print("CV error curve (nonrandomized):", CV_err)
15 | lam_grid_size = CV_glmnet_gaussian.lam_seq.shape[0]
16 | lam_CVR, SD, CVR, CV1, lam_seq = CV_glmnet_gaussian.choose_lambda_CVR(scale1=0.1, scale2=0.1)
17 | print("nonrandomized index:", list(lam_seq).index(lam_CV)) # index of the minimizer
18 | print("lam for nonrandomized CV plus sigma rule:",lam_CV,lam_1SD)
19 | print("lam_CVR:",lam_CVR)
20 | print("randomized index:", list(lam_seq).index(lam_CVR))
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/selectinf/randomized/tests/sandbox/test_fixedX.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from scipy.stats import norm as ndist
3 |
4 | import regreg.api as rr
5 |
6 | from ...tests.flags import SMALL_SAMPLES, SET_SEED
7 | from ...tests.instance import gaussian_instance
8 | from ...tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue
9 |
10 | from ..api import randomization
11 | from ..glm import (resid_bootstrap,
12 | glm_nonparametric_bootstrap,
13 | fixedX_group_lasso)
14 |
15 |
16 | @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
17 | @set_seed_iftrue(SET_SEED)
18 | @wait_for_return_value()
19 | def test_fixedX(ndraw=10000, burnin=2000): # nsim needed for decorator
20 | s, n, p = 5, 200, 20
21 |
22 | randomizer = randomization.laplace((p,), scale=1.)
23 | X, Y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.1, signal=7)
24 |
25 | lam_frac = 1.
26 | lam = lam_frac * np.mean(np.fabs(X.T.dot(np.random.standard_normal((n, 50000)))).max(0)) * sigma
27 | W = np.ones(p) * lam
28 | epsilon = 1. / np.sqrt(n)
29 |
30 | penalty = rr.group_lasso(np.arange(p),
31 | weights=dict(zip(np.arange(p), W)), lagrange=1.)
32 |
33 | M_est = fixedX_group_lasso(X, Y, epsilon, penalty, randomizer)
34 | M_est.solve()
35 |
36 | active_set = M_est.selection_variable['variables']
37 | nactive = active_set.sum()
38 |
39 | if set(nonzero).issubset(np.nonzero(active_set)[0]) and active_set.sum() > len(nonzero):
40 |
41 | selected_features = np.zeros(p, np.bool)
42 | selected_features[active_set] = True
43 |
44 | Xactive = X[:,active_set]
45 | unpenalized_mle = np.linalg.pinv(Xactive).dot(Y)
46 |
47 | form_covariances = glm_nonparametric_bootstrap(n, n)
48 | target_info, target_observed = resid_bootstrap(M_est.loss, active_set)
49 |
50 | cov_info = M_est.setup_sampler()
51 | target_cov, score_cov = form_covariances(target_info,
52 | cross_terms=[cov_info],
53 | nsample=M_est.nboot)
54 |
55 | opt_sample = M_est.sampler.sample(ndraw,
56 | burnin)
57 |
58 | pvalues = M_est.sampler.coefficient_pvalues(unpenalized_mle,
59 | target_cov,
60 | score_cov,
61 | parameter=np.zeros(selected_features.sum()),
62 | sample=opt_sample)
63 | intervals = M_est.sampler.confidence_intervals(unpenalized_mle, target_cov, score_cov, sample=opt_sample)
64 |
65 | true_vec = beta[M_est.selection_variable['variables']]
66 |
67 | L, U = intervals.T
68 |
69 | covered = np.zeros(nactive, np.bool)
70 | active_var = np.zeros(nactive, np.bool)
71 | active_set = np.nonzero(active_set)[0]
72 |
73 | for j in range(nactive):
74 | if (L[j] <= true_vec[j]) and (U[j] >= true_vec[j]):
75 | covered[j] = 1
76 | active_var[j] = active_set[j] in nonzero
77 |
78 | return pvalues, covered, active_var
79 |
80 |
--------------------------------------------------------------------------------
/selectinf/randomized/tests/sandbox/test_full_lasso.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import nose.tools as nt
3 |
4 | import selection.randomized.lasso as L; reload(L)
5 | from selection.randomized.lasso import lasso
6 | from selection.tests.instance import gaussian_instance
7 | import matplotlib.pyplot as plt
8 |
9 | def test_full_lasso(n=200, p=30, signal_fac=1.5, s=5, ndraw=5000, burnin=1000, sigma=3, full=False, rho=0.4, randomizer_scale=1):
10 | """
11 | General LASSO --
12 | """
13 |
14 | inst, const = gaussian_instance, lasso.gaussian
15 | signal = np.sqrt(signal_fac * np.log(p))
16 | X, Y, beta = inst(n=n,
17 | p=p,
18 | signal=signal,
19 | s=s,
20 | equicorrelated=False,
21 | rho=rho,
22 | sigma=sigma,
23 | random_signs=True)[:3]
24 |
25 | n, p = X.shape
26 |
27 | W = np.ones(X.shape[1]) * np.sqrt(1.5 * np.log(p)) * sigma
28 |
29 | conv = const(X,
30 | Y,
31 | W,
32 | randomizer_scale=randomizer_scale * sigma)
33 |
34 | signs = conv.fit(solve_args={'min_its':500, 'tol':1.e-13})
35 | nonzero = signs != 0
36 |
37 | conv2 = lasso.gaussian(X,
38 | Y,
39 | W,
40 | randomizer_scale=randomizer_scale * sigma)
41 | conv2.fit(perturb=conv._initial_omega, solve_args={'min_its':500, 'tol':1.e-13})
42 | conv2.decompose_subgradient(condition=np.ones(p, np.bool))
43 |
44 | np.testing.assert_allclose(conv2._view.sampler.affine_con.covariance,
45 | conv.sampler.affine_con.covariance)
46 |
47 | np.testing.assert_allclose(conv2._view.sampler.affine_con.mean,
48 | conv.sampler.affine_con.mean)
49 |
50 | np.testing.assert_allclose(conv2._view.sampler.affine_con.linear_part,
51 | conv.sampler.affine_con.linear_part)
52 |
53 | np.testing.assert_allclose(conv2._view.sampler.affine_con.offset,
54 | conv.sampler.affine_con.offset)
55 |
56 | np.testing.assert_allclose(conv2._view.initial_soln,
57 | conv.initial_soln)
58 |
59 | np.testing.assert_allclose(conv2._view.initial_subgrad,
60 | conv.initial_subgrad)
61 |
--------------------------------------------------------------------------------
/selectinf/randomized/tests/sandbox/test_general_lasso.py:
--------------------------------------------------------------------------------
1 | from itertools import product
2 | import numpy as np
3 | import nose.tools as nt
4 |
5 | from ..lasso import lasso
6 | from ...tests.instance import (gaussian_instance,
7 | logistic_instance,
8 | poisson_instance)
9 | from ...tests.flags import SMALL_SAMPLES
10 | from ...tests.decorators import set_sampling_params_iftrue
11 |
12 | @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=50, burnin=20)
13 | def test_lasso_constructors(ndraw=1000, burnin=200):
14 | """
15 | Smoke tests for lasso convenience constructors
16 | """
17 | cls = lasso
18 | for const_info, rand, marginalize, condition in product(zip([gaussian_instance,
19 | logistic_instance,
20 | poisson_instance],
21 | [cls.gaussian,
22 | cls.logistic,
23 | cls.poisson]),
24 | ['gaussian', 'logistic', 'laplace'],
25 | [False, True],
26 | [False, True]):
27 |
28 | print(rand)
29 | inst, const = const_info
30 | X, Y = inst(n=100, p=20, signal=5, s=10)[:2]
31 | n, p = X.shape
32 |
33 | W = np.ones(X.shape[1]) * 0.2
34 | W[0] = 0
35 | W[3:] = 50.
36 | np.random.shuffle(W)
37 | conv = const(X, Y, W, randomizer=rand)
38 | nboot = 1000
39 | if SMALL_SAMPLES:
40 | nboot = 20
41 | signs = conv.fit(nboot=nboot)
42 |
43 | marginalize = None
44 | if marginalize:
45 | marginalize = np.zeros(p, np.bool)
46 | marginalize[:int(p/2)] = True
47 |
48 | condition = None
49 | if condition:
50 | if marginalize:
51 | condition = ~marginalize
52 | else:
53 | condition = np.ones(p, np.bool)
54 | condition[-int(p/4):] = False
55 |
56 | selected_features = np.zeros(p, np.bool)
57 | selected_features[:3] = True
58 |
59 | conv.summary(selected_features,
60 | ndraw=ndraw,
61 | burnin=burnin,
62 | compute_intervals=True)
63 |
64 | conv.decompose_subgradient(marginalize=marginalize,
65 | condition=condition)
66 |
67 | conv.summary(selected_features,
68 | ndraw=ndraw,
69 | burnin=burnin)
70 |
71 | conv.decompose_subgradient(condition=np.ones(p, np.bool))
72 |
73 | conv.summary(selected_features,
74 | ndraw=ndraw,
75 | burnin=burnin)
76 |
--------------------------------------------------------------------------------
/selectinf/randomized/tests/sandbox/test_opt_weighted_intervals.py:
--------------------------------------------------------------------------------
1 | from itertools import product
2 | import numpy as np
3 | import nose.tools as nt
4 |
5 | from ..convenience import lasso, step, threshold
6 | from ..query import optimization_sampler
7 | from ...tests.instance import (gaussian_instance,
8 | logistic_instance,
9 | poisson_instance)
10 | from ...tests.flags import SMALL_SAMPLES
11 | from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
12 | import matplotlib.pyplot as plt
13 |
14 | from scipy.stats import t as tdist
15 | from ..glm import glm_nonparametric_bootstrap, pairs_bootstrap_glm
16 | from ..M_estimator import restricted_Mest
17 |
18 | @set_seed_iftrue(False, 200)
19 | @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=1000, burnin=100)
20 | def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
21 |
22 | results = []
23 | cls = lasso
24 | for const_info, rand in product(zip([gaussian_instance], [cls.gaussian]), ['laplace', 'gaussian']):
25 |
26 | inst, const = const_info
27 |
28 | X, Y, beta = inst(n=100, p=20, s=0, signal=5., sigma=5.)[:3]
29 | n, p = X.shape
30 |
31 | W = np.ones(X.shape[1]) * 8
32 | conv = const(X, Y, W, randomizer=rand, parametric_cov_estimator=True)
33 | signs = conv.fit()
34 | print("signs", signs)
35 |
36 | marginalizing_groups = np.ones(p, np.bool)
37 | #marginalizing_groups[:int(p/2)] = True
38 | conditioning_groups = ~marginalizing_groups
39 | #conditioning_groups[-int(p/4):] = False
40 | conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
41 | conditioning_groups=conditioning_groups)
42 |
43 | selected_features = conv._view.selection_variable['variables']
44 | nactive=selected_features.sum()
45 | print("nactive", nactive)
46 | if nactive==0:
47 | results.append(None)
48 | else:
49 | sel_pivots, sel_pval, sel_ci = conv.summary(selected_features,
50 | parameter=beta[selected_features],
51 | ndraw=ndraw,
52 | burnin=burnin,
53 | compute_intervals=True)
54 | print(sel_pivots)
55 | results.append((rand, sel_pivots, sel_ci, beta[selected_features]))
56 |
57 | return results
58 |
59 |
60 |
61 |
--------------------------------------------------------------------------------
/selectinf/randomized/tests/test_modelQ.py:
--------------------------------------------------------------------------------
1 | from __future__ import division, print_function
2 |
3 | import numpy as np
4 | import nose.tools as nt
5 |
6 | import regreg.api as rr
7 |
8 | from ..modelQ import modelQ
9 | from ..lasso import lasso
10 | from ...tests.instance import gaussian_instance
11 |
12 | def test_modelQ():
13 |
14 | n, p, s = 200, 50, 4
15 | X, y, beta = gaussian_instance(n=n,
16 | p=p,
17 | s=s,
18 | sigma=1)[:3]
19 |
20 | lagrange = 5. * np.ones(p) * np.sqrt(n)
21 | perturb = np.random.standard_normal(p) * n
22 | LH = lasso.gaussian(X, y, lagrange)
23 | LH.fit(perturb=perturb, solve_args={'min_its':1000})
24 |
25 | LQ = modelQ(X.T.dot(X), X, y, lagrange)
26 | LQ.fit(perturb=perturb, solve_args={'min_its':1000})
27 | LQ.summary() # smoke test
28 |
29 | conH = LH.sampler.affine_con
30 | conQ = LQ.sampler.affine_con
31 |
32 | np.testing.assert_allclose(LH.initial_soln, LQ.initial_soln)
33 | np.testing.assert_allclose(LH.initial_subgrad, LQ.initial_subgrad)
34 |
35 | np.testing.assert_allclose(conH.linear_part, conQ.linear_part)
36 | np.testing.assert_allclose(conH.offset, conQ.offset)
37 |
38 | np.testing.assert_allclose(LH._beta_full, LQ._beta_full)
39 |
40 |
--------------------------------------------------------------------------------
/selectinf/randomized/tests/test_randomization.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import numpy as np
4 | import nose.tools as nt
5 |
6 | from ..randomization import randomization
7 |
8 | def test_noise_dbns():
9 |
10 | X = np.random.standard_normal((10, 5))
11 | Q = X.T.dot(X)
12 | noises = [randomization.isotropic_gaussian((5,), 1.),
13 | randomization.laplace((5,), 1.),
14 | randomization.logistic((5,), 1.),
15 | randomization.gaussian(Q)]
16 |
17 | v1, v2 = [], []
18 |
19 | for i, noise in enumerate(noises):
20 |
21 | x = np.random.standard_normal(5)
22 | u = np.random.standard_normal(5)
23 | v1.append(np.exp(noise.log_density(x)))
24 | v2.append(noise._density(x))
25 |
26 | noise.smooth_objective(x, 'func')
27 | noise.smooth_objective(x, 'grad')
28 | noise.smooth_objective(x, 'both')
29 | noise.gradient(x)
30 |
31 | nt.assert_equal(noise.sample().shape, (5,))
32 | nt.assert_equal(noise.sample().shape, (5,))
33 |
34 | if noise.CGF is not None:
35 | u = np.zeros(5)
36 | u[:2] = 0.1
37 | noise.CGF.smooth_objective(u, 'both')
38 |
39 | if noise.CGF_conjugate is not None:
40 | noise.CGF_conjugate.smooth_objective(x, 'both')
41 |
42 |
43 |
--------------------------------------------------------------------------------
/selectinf/randomized/tests/test_slope_subgrad.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from ..slope import _projection_onto_selected_subgradients
4 |
5 | def test_projection():
6 |
7 | prox_arg = np.random.normal(0,1,10)
8 | weights = np.linspace(3, 5, 10)[::-1]
9 | ordering = np.random.choice(10, 10, replace=False)
10 | cluster_sizes = [2,3,1,1,3]
11 | active_signs = np.ones(10)
12 |
13 | proj = _projection_onto_selected_subgradients(prox_arg,
14 | weights,
15 | ordering,
16 | cluster_sizes,
17 | active_signs)
18 |
19 | print("projection", proj)
20 |
21 |
22 |
--------------------------------------------------------------------------------
/selectinf/reduced_optimization/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/reduced_optimization/tests/__init__.py
--------------------------------------------------------------------------------
/selectinf/sampling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/sampling/__init__.py
--------------------------------------------------------------------------------
/selectinf/sampling/api.py:
--------------------------------------------------------------------------------
1 | from .langevin import projected_langevin
2 | from .truncnorm import (sample_truncnorm_white,
3 | sample_truncnorm_white_sphere,
4 | sample_truncnorm_white_ball)
5 |
--------------------------------------------------------------------------------
/selectinf/sampling/langevin.py:
--------------------------------------------------------------------------------
1 | """
2 | Projected Langevin sampler of `http://arxiv.org/abs/1507.02564`_
3 | """
4 | from __future__ import print_function
5 |
6 | import numpy as np
7 | from scipy.stats import norm as ndist
8 |
9 | class projected_langevin(object):
10 |
11 | def __init__(self,
12 | initial_condition,
13 | gradient_map,
14 | projection_map,
15 | stepsize):
16 |
17 | (self.state,
18 | self.gradient_map,
19 | self.projection_map,
20 | self.stepsize) = (np.copy(initial_condition),
21 | gradient_map,
22 | projection_map,
23 | stepsize)
24 | self._shape = self.state.shape[0]
25 | self._sqrt_step = np.sqrt(self.stepsize)
26 | self._noise = ndist(loc=0,scale=1)
27 |
28 | def __iter__(self):
29 | return self
30 |
31 | def next(self):
32 | nattempt = 0
33 | while True:
34 |
35 | proj_arg = (self.state
36 | + 0.5 * self.stepsize * self.gradient_map(self.state)
37 | + self._noise.rvs(self._shape) * self._sqrt_step)
38 | candidate = self.projection_map(proj_arg)
39 | if not np.all(np.isfinite(self.gradient_map(candidate))):
40 | nattempt += 1
41 | self._sqrt_step *= 0.8
42 | self.stepsize = self._sqrt_step**2
43 | if nattempt >= 30:
44 | raise ValueError('unable to find feasible step')
45 | else:
46 | self.state[:] = candidate
47 | break
48 |
--------------------------------------------------------------------------------
/selectinf/sampling/sequential.py:
--------------------------------------------------------------------------------
1 | """
2 | Sequential Monte Carlo for approximately constrained Gaussians.
3 |
4 | http://arxiv.org/abs/1410.8209
5 |
6 | """
7 |
8 | import numpy as np
9 |
10 | def sample(white_constraint,
11 | nsample,
12 | proposal_sigma=0.2,
13 | temps=np.linspace(0, 50, 51.)):
14 | """
15 | Build up an approximately constrained Gaussian
16 | based on relaxations of the constraint.
17 |
18 | Parameters
19 | ----------
20 |
21 | white_constraint : `selection.constraints.affine`
22 | Affine constraint with identity covariance
23 |
24 | nsample : int
25 | How many samples to draw?
26 |
27 | proposal_sigma : float
28 |
29 | """
30 |
31 | n = white_constraint.dim
32 | sample_z = np.random.standard_normal((n, nsample))
33 |
34 | def constraint_function(z, con):
35 | value = (np.dot(con.linear_part, z) - con.offset[:,None])
36 | return value.max(0)
37 |
38 | def constraint_logit(temp, z, con):
39 | tmp_z = constraint_function(z, con)
40 | tmp_v = np.exp(-temp * tmp_z)
41 | return tmp_v / (1 + tmp_v)
42 |
43 | def MH_sample(temp, z_cur, con):
44 | step = np.random.standard_normal(z_cur.shape) * proposal_sigma
45 | z_new = z_cur + step
46 |
47 | W_new = constraint_logit(temp, z_new, con)
48 | W_cur = constraint_logit(temp, z_cur, con)
49 | W_new *= np.exp(-(z_new**2).sum(0)/2)
50 | W_cur *= np.exp(-(z_cur**2).sum(0)/2)
51 |
52 | coin_flip = np.less_equal(np.random.sample(z_cur.shape[1]), W_new / W_cur)
53 | final_sample = coin_flip * z_new + (1 - coin_flip) * z_cur
54 | return final_sample
55 |
56 | weights = np.ones(nsample, np.float) / nsample
57 |
58 | num = np.ones(nsample) / 2
59 | for i in range(temps.shape[0]-1):
60 |
61 | num, den = constraint_logit(temps[i+1], sample_z, white_constraint), num
62 |
63 | weights *= np.exp(np.log(num) - np.log(den))
64 | weights /= weights.sum()
65 |
66 | ESS = 1. / (weights**2).sum()
67 | if ESS < nsample / 2.:
68 | idx_z = np.random.choice(np.arange(nsample), size=(nsample,), replace=True, p=weights)
69 | sample_z = sample_z[:, idx_z]
70 | weights = np.ones(nsample, np.float) / nsample
71 | sample_z = MH_sample(temps[i+1], sample_z, white_constraint)
72 |
73 | return sample_z
74 |
75 |
76 |
--------------------------------------------------------------------------------
/selectinf/sampling/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/sampling/tests/__init__.py
--------------------------------------------------------------------------------
/selectinf/sampling/tests/plots_fs.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import numpy as np
3 | from .test_fstep_langevin import test_fstep
4 | from .test_kfstep import test_kfstep
5 | import random
6 |
7 | def main():
8 |
9 | import statsmodels.api as sm
10 | from scipy.stats import probplot, uniform
11 | from matplotlib import pyplot as plt
12 | random.seed(4)
13 |
14 | fig = plt.figure()
15 | plot_1step = fig.add_subplot(121)
16 | plot_kstep = fig.add_subplot(122)
17 |
18 |
19 | P0 = []
20 | for i in range(300):
21 |
22 | print("iteration", i)
23 | p0 = test_fstep(Langevin_steps=10000, burning=2000)
24 | P0.append(p0)
25 |
26 | print("one step FS done! mean: ", np.mean(P0), "std: ", np.std(P0))
27 | #probplot(P0, dist=uniform, sparams=(0,1), plot=plot_1step, fit=False)
28 | #plot_1step.plot([0, 1], color='k', linestyle='-', linewidth=2)
29 |
30 | ecdf = sm.distributions.ECDF(P0)
31 | x = np.linspace(min(P0), max(P0))
32 | y = ecdf(x)
33 | plot_1step.plot(x, y, '-o',lw=2)
34 | plot_1step.plot([0, 1], [0, 1], 'k-', lw=2)
35 |
36 | plot_1step.set_title("One step FS")
37 | plot_1step.set_xlim([0,1])
38 | plot_1step.set_ylim([0,1])
39 |
40 |
41 | P0 = []
42 | for i in range(300):
43 | print("iteration", i)
44 | p0 = test_kfstep(Langevin_steps=10000, burning=2000)
45 | P0.append(p0)
46 |
47 | print("k steps FS done done! mean: ", np.mean(P0), "std: ", np.std(P0))
48 | #probplot(P0, dist=uniform, sparams=(0,1), plot=plot_kstep, fit=False)
49 | #plot_kstep.plot([0, 1], color='k', linestyle='-', linewidth=2)
50 |
51 |
52 | ecdf = sm.distributions.ECDF(P0)
53 | x = np.linspace(min(P0), max(P0))
54 | y = ecdf(x)
55 | plot_kstep.plot(x, y,'-o', lw=2)
56 | plot_kstep.plot([0, 1], [0, 1], 'k-', lw=2)
57 |
58 | plot_kstep.set_title("Four steps FS")
59 | plot_kstep.set_xlim([0,1])
60 | plot_kstep.set_ylim([0,1])
61 |
62 |
63 |
64 | plt.show()
65 | plt.savefig('FS_Langevin.pdf')
66 |
67 |
68 |
--------------------------------------------------------------------------------
/selectinf/sampling/tests/test_pca_langevin.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from ..langevin import projected_langevin
4 |
5 | ### Some examples: PCA from https://arxiv.org/abs/1410.8260
6 |
7 | def _log_vandermonde(eigenvals, power=1):
8 | """
9 | Log of the Van der Monde determinant.
10 | """
11 | eigenvals = np.asarray(eigenvals)
12 | p = eigenvals.shape[0]
13 | idx = np.arange(p)
14 | logdiff = np.log(np.fabs(np.subtract.outer(eigenvals, eigenvals)))
15 | mask = np.greater.outer(idx, idx)
16 |
17 | return power * (logdiff * mask).sum()
18 |
19 | def _grad_log_vandermonde(eigenvals, power=1):
20 | """
21 | Log of the Van der Monde determinant.
22 | """
23 | eigenvals = np.asarray(eigenvals)
24 | p = eigenvals.shape[0]
25 | idx = np.arange(p)
26 | diff = np.subtract.outer(eigenvals, eigenvals)
27 | diff_sign = -np.sign(diff)
28 | mask = (diff > 0)
29 | return (1. / (np.fabs(diff) + np.identity(p)) * mask * diff_sign).sum(1)
30 |
31 | def _log_wishart_white(eigenvals, n):
32 | """
33 | Log-eigenvalue density of Wishart($I_{p \times p}$, n) assuming n>p,
34 | up to normalizing constant.
35 | """
36 | eigenvals = np.asarray(eigenvals)
37 | p = eigenvals.shape[0]
38 |
39 | return ((n - p - 1) * 0.5 * np.log(eigenvals).sum()
40 | + _log_vandermonde(eigenvals, power=1)
41 | - eigenvals.sum() * 0.5)
42 |
43 | def _grad_log_wishart_white(eigenvals, n):
44 | """
45 | Gradient of log-eigenvalue density of Wishart($I_{p \times p}$, n)
46 | assuming n>p.
47 | """
48 | eigenvals = np.asarray(eigenvals)
49 | p = eigenvals.shape[0]
50 | return ((n - p - 1) * 0.5 / (eigenvals + 1.e-7)
51 | + _grad_log_vandermonde(eigenvals, power=1) - 0.5)
52 |
53 | def main(n=50):
54 |
55 | from regreg.atoms._isotonic import _isotonic_regression
56 | import matplotlib.pyplot as plt
57 | initial = np.ones(n) + 0.01 * np.random.standard_normal(n)
58 | grad_map = lambda val: _grad_log_wishart_white(val, n)
59 |
60 | def projection_map(vals):
61 | iso = np.zeros_like(vals)
62 | _isotonic_regression(vals, np.ones_like(vals), iso)
63 | vals = np.asarray(iso)
64 | return np.maximum(vals, 1.e-6)
65 |
66 | sampler = projected_langevin(initial,
67 | grad_map,
68 | projection_map,
69 | 0.01)
70 | sampler = iter(sampler)
71 |
72 | path = [initial.copy()]
73 | for _ in range(200):
74 | print(sampler.state)
75 | sampler.next()
76 | path.append(sampler.state.copy())
77 | path = np.array(path)
78 |
79 | [plt.plot(path[:,i]) for i in range(5)]
80 | plt.show()
81 |
82 |
--------------------------------------------------------------------------------
/selectinf/sampling/tests/test_sequential.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import numpy.testing.decorators as dec
3 | from scipy.stats import norm as ndist
4 |
5 | from ...constraints.affine import constraints
6 | from ..sequential import sample
7 | from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
8 | from ...tests.flags import SMALL_SAMPLES, SET_SEED
9 |
10 | @dec.slow
11 | @set_seed_iftrue(SET_SEED)
12 | @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, nsim=10)
13 | def test_sequentially_constrained(ndraw=100, nsim=50):
14 | S = -np.identity(10)[:3]
15 | b = -6 * np.ones(3)
16 | C = constraints(S, b)
17 | W = sample(C, nsim, temps=np.linspace(0, 200, 1001))
18 | U = np.linspace(0, 1, 101)
19 |
20 |
--------------------------------------------------------------------------------
/selectinf/sandbox/approx_ci/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/sandbox/approx_ci/__init__.py
--------------------------------------------------------------------------------
/selectinf/sandbox/approx_ci/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/sandbox/approx_ci/tests/__init__.py
--------------------------------------------------------------------------------
/selectinf/sandbox/bayesian/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/sandbox/bayesian/__init__.py
--------------------------------------------------------------------------------
/selectinf/sandbox/bayesian/credible_intervals.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from scipy.stats import norm as ndist
3 |
4 | class projected_langevin(object):
5 |
6 | def __init__(self,
7 | initial_condition,
8 | gradient_map,
9 | projection_map,
10 | stepsize):
11 |
12 | (self.state,
13 | self.gradient_map,
14 | self.projection_map,
15 | self.stepsize) = (np.copy(initial_condition),
16 | gradient_map,
17 | projection_map,
18 | stepsize)
19 | self._shape = self.state.shape[0]
20 | self._sqrt_step = np.sqrt(self.stepsize)
21 | self._noise = ndist(loc=0,scale=1)
22 |
23 | def __iter__(self):
24 | return self
25 |
26 | def next(self):
27 | while True:
28 | proj_arg = (self.state + 0.5 * self.stepsize * self.gradient_map(self.state)
29 | + self._noise.rvs(self._shape) * self._sqrt_step)
30 | candidate = self.projection_map(proj_arg)
31 | if not np.all(np.isfinite(self.gradient_map(candidate))):
32 | print(candidate, self._sqrt_step)
33 | self._sqrt_step *= 0.8
34 | else:
35 | self.state[:] = candidate
36 | break
37 |
--------------------------------------------------------------------------------
/selectinf/sandbox/bayesian/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/sandbox/bayesian/tests/__init__.py
--------------------------------------------------------------------------------
/selectinf/src_C/#sample_preparation.pyx#:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | cimport numpy as np
4 |
5 | #from cython.view cimport array as cvarray
6 | from libc.stdlib cimport malloc, free
7 |
8 |
9 | cdef extern from "preparation_Eig_Vect.h":
10 | void samples(int n,
11 | int dim,
12 | int seed,
13 | double* initial,
14 | int numlin,
15 | int numquad,
16 | double* lin,
17 | double* quad,
18 | double* quad_lin,
19 | double* offset_lin,
20 | double* offset_quad,
21 | double* samples_Carray)
22 |
23 |
24 |
25 | def quad_sampler(int n_sample,
26 | initial,
27 | quad,# = np.array([]).reshape((0, 0, 0)),
28 | quad_lin,# = np.array([]).reshape((0, 0)),
29 | lin,# = np.array([]).reshape((0,0)),
30 | offset_quad,# = np.array([]),
31 | offset_lin # = np.array([])
32 | ):
33 |
34 |
35 |
36 | cdef int numquad = quad.shape[0]
37 | cdef int p = quad.shape[1]
38 | cdef int numlin = lin.shape[0]
39 |
40 | cdef np.ndarray[np.double_t, ndim=3] quad2 = np.ascontiguousarray(-quad)
41 | cdef np.ndarray[np.double_t, ndim=2] quad_lin2 = np.ascontiguousarray(-quad_lin)
42 | cdef np.ndarray[np.double_t, ndim=1] offset_quad2 = np.ascontiguousarray(offset_quad)
43 |
44 | cdef double *pt_quad
45 | cdef double *pt_quad_lin
46 | cdef double *pt_quad_offset
47 | if numquad > 0:
48 | pt_quad = &quad2[0, 0, 0]
49 | pt_quad_lin = &quad_lin2[0, 0]
50 | pt_quad_offset = &offset_quad2[0]
51 |
52 |
53 | cdef np.ndarray[np.double_t, ndim=2] lin2 = np.ascontiguousarray(-lin )
54 | cdef np.ndarray[np.double_t, ndim=1] offset_lin2 = np.ascontiguousarray(offset_lin )
55 |
56 | cdef double *pt_lin
57 | cdef double *pt_lin_offset
58 | if numlin > 0:
59 | pt_lin_offset = &offset_lin2[0]
60 | pt_lin = &lin2[0, 0]
61 |
62 | cdef np.ndarray[np.double_t, ndim=1] initial2 = np.ascontiguousarray(initial)
63 |
64 | cdef int seed = np.random.randint(1, 100000)
65 |
66 | cdef double *samples_Carray = malloc(n_sample*p * sizeof(double))
67 |
68 | samples(n_sample,
69 | p,
70 | seed,
71 | &initial2[0],
72 | numlin,
73 | numquad,
74 | pt_lin,
75 | pt_quad,
76 | pt_quad_lin,
77 | pt_lin_offset,
78 | pt_quad_offset,
79 | samples_Carray)
80 |
81 |
82 | cdef np.ndarray[np.double_t, ndim=2] samples_array = np.zeros((n_sample, p))
83 | for i in range(n_sample):
84 | for j in range(p):
85 | samples_array[i, j] = samples_Carray[i*p + j]
86 |
87 | free(samples_Carray)
88 |
89 | return samples_array
90 |
91 |
92 |
--------------------------------------------------------------------------------
/selectinf/src_C/HmcSampler.h:
--------------------------------------------------------------------------------
1 | /*
2 | * File: HmcSampler.h
3 | * Author: aripakman
4 | *
5 | * Created on July 4, 2012, 10:44 AM
6 | */
7 |
8 | #ifndef HMCSAMPLER_H
9 | #define HMCSAMPLER_H
10 |
11 | #define _USE_MATH_DEFINES
12 |
13 | #include
14 | #include
15 | #include
16 | #include
17 |
18 | using namespace Eigen;
19 | using namespace std;
20 | using namespace std::tr1;
21 |
22 | struct LinearConstraint{
23 | VectorXd f;
24 | double g;
25 | };
26 |
27 | struct QuadraticConstraint{
28 | MatrixXd A;
29 | VectorXd B;
30 | double C;
31 | };
32 |
33 |
34 | class HmcSampler {
35 | public:
36 |
37 | HmcSampler(const int & d, const int & seed);
38 |
39 | void setInitialValue(const VectorXd & initial);
40 | void addLinearConstraint(const VectorXd & f, const double & g);
41 | void addQuadraticConstraint(const MatrixXd & A, const VectorXd & B, const double & C);
42 | MatrixXd sampleNext(bool returnTrace = false);
43 |
44 | private:
45 | int dim;
46 | VectorXd lastSample;
47 | static const double min_t;
48 | vector linearConstraints;
49 | vector quadraticConstraints;
50 |
51 | ranlux64_base_01 eng1;
52 | // mt19937 eng1; //to sample time and momenta
53 | uniform_real<> ud;
54 | normal_distribution<> nd;
55 |
56 | void _getNextLinearHitTime(const VectorXd & a, const VectorXd & b, double & t, int & cn );
57 | void _getNextQuadraticHitTime(const VectorXd & a, const VectorXd & b, double & t, int & cn, const bool );
58 | double _verifyConstraints(const VectorXd &);
59 | void _updateTrace( VectorXd const & a, VectorXd const & b, double const & tt, MatrixXd & tracePoints);
60 | };
61 |
62 | #endif /* HMCSAMPLER_H */
63 |
64 |
--------------------------------------------------------------------------------
/selectinf/src_C/logfile.txt:
--------------------------------------------------------------------------------
1 | -1-0
2 | -0-1
3 |
4 | 0.846196-0.9041
5 | 0.7401690.590085
6 | -0.18959-0.17084
7 | -0.4238650.0333025
8 | -0.592693-0.266382
9 | 0.0690678-0.00674659
10 | -0.174223-0.431466
11 | 0.6978830.440892
12 | 0.144409-0.675854
13 | -0.3425970.0214389
14 |
15 | 0.846196-0.9041
16 | 0.7401690.590085
17 | -0.18959-0.17084
18 | -0.4238650.0333025
19 | -0.592693-0.266382
20 | 0.0690678-0.00674659
21 | -0.174223-0.431466
22 | 0.6978830.440892
23 | 0.144409-0.675854
24 | -0.3425970.0214389
25 |
--------------------------------------------------------------------------------
/selectinf/src_C/preparation_Eig_Vect.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "HmcSampler.h"
4 |
5 | #include "preparation_Eig_Vect.h"
6 |
7 |
8 | #include
9 |
10 | using namespace std;
11 | using namespace Eigen;
12 |
13 | void samples(
14 | int n,
15 | int dim,
16 | int seed,
17 | double *initial,
18 | int numlin,
19 | int numquad,
20 | double *lin,
21 | double *quad,
22 | double *quad_lin,
23 | double *offset_lin,
24 | double *offset_quad,
25 | double *samples_Carray
26 | ){
27 |
28 |
29 | const Map initial_value(initial, dim);
30 |
31 |
32 |
33 | ofstream logfile;
34 | logfile.open ("logfile.txt");
35 |
36 |
37 | HmcSampler hmc1(dim, seed);
38 | if (numlin >0){
39 | const Map F(lin, numlin, dim);
40 | const Map g(offset_lin, numlin);
41 |
42 | for(int i=0; i0){
48 |
49 | for(int i=0; i A_Map(indice, dim, dim);
52 |
53 |
54 | for(int k=0; k B_Map(&quad_lin[i*dim], dim);
64 | VectorXd B(B_Map);
65 | double C = offset_quad[i];
66 | hmc1.addQuadraticConstraint(A,B,C);
67 | }
68 |
69 | }
70 |
71 | hmc1.setInitialValue(initial_value);
72 |
73 | MatrixXd samples(n,dim);
74 |
75 | for (int i=0; i 0:
48 | pt_quad = &quad2[0, 0, 0]
49 | pt_quad_lin = &quad_lin2[0, 0]
50 | pt_quad_offset = &offset_quad2[0]
51 |
52 |
53 |
54 | print "quad inequalities generated"
55 |
56 |
57 | cdef np.ndarray[np.double_t, ndim=2] lin2 = np.ascontiguousarray(-lin )
58 | cdef np.ndarray[np.double_t, ndim=1] offset_lin2 = np.ascontiguousarray(offset_lin )
59 |
60 | cdef double *pt_lin
61 | cdef double *pt_lin_offset
62 | if numlin > 0:
63 | pt_lin_offset = &offset_lin2[0]
64 | pt_lin = &lin2[0, 0]
65 |
66 | cdef np.ndarray[np.double_t, ndim=1] initial2 = np.ascontiguousarray(initial)
67 |
68 | cdef int seed = np.random.randint(1, 100000)
69 |
70 | cdef double *samples_Carray = malloc(n_sample*p * sizeof(double))
71 |
72 | samples(n_sample,
73 | p,
74 | seed,
75 | &initial2[0],
76 | numlin,
77 | numquad,
78 | pt_lin,
79 | pt_quad,
80 | pt_quad_lin,
81 | pt_lin_offset,
82 | pt_quad_offset,
83 | samples_Carray)
84 |
85 |
86 | cdef np.ndarray[np.double_t, ndim=2] samples_array = np.zeros((n_sample, p))
87 | for i in range(n_sample):
88 | for j in range(p):
89 | samples_array[i, j] = samples_Carray[i*p + j]
90 |
91 | free(samples_Carray)
92 |
93 | return samples_array
94 |
95 |
96 |
--------------------------------------------------------------------------------
/selectinf/src_C/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup, Extension
2 | from Cython.Build import cythonize
3 |
4 | from Cython.Distutils import build_ext
5 | import numpy as np
6 |
7 |
8 |
9 | setup(
10 | #name = 'kmean',
11 | cmdclass = {'build_ext': build_ext},
12 | include_dirs = [np.get_include()],
13 | ## ext_modules = cythonize("sample_preparation.pyx",
14 | ## language="c++")
15 | ext_modules = [Extension('sampler',
16 | ["sample_preparation.pyx" ,
17 | 'preparation_Eig_Vect.cpp' ,
18 | 'HmcSampler.cpp'],
19 | language="c++",
20 | extra_compile_args = ["-W",
21 | "-Wall",
22 | "-ansi",
23 | "-pedantic",
24 | "-stdlib=libstdc++"#,
25 | #"-fPIC"
26 | ],
27 | extra_link_args = ["-stdlib=libstdc++"]
28 | )]
29 |
30 | )
31 |
32 |
33 |
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/selectinf/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/tests/__init__.py
--------------------------------------------------------------------------------
/selectinf/tests/flags.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | SMALL_SAMPLES = False
4 | SET_SEED = False
5 |
6 | if "USE_SMALL_SAMPLES" in os.environ:
7 | SMALL_SAMPLES = True
8 |
9 | if "USE_TEST_SEED" in os.environ:
10 | SET_SEED = True
11 |
--------------------------------------------------------------------------------
/selectinf/tests/test_instance.py:
--------------------------------------------------------------------------------
1 | from numpy import inf
2 | from itertools import product
3 | from .instance import gaussian_instance, logistic_instance, HIV_NRTI
4 |
5 | def test_gaussian_instance():
6 |
7 | for scale, center, random_signs, df in product(
8 | [True, False],
9 | [True, False],
10 | [True, False],
11 | [40, inf]):
12 | gaussian_instance(n=10,
13 | p=20,
14 | s=4,
15 | random_signs=random_signs,
16 | scale=scale,
17 | center=center,
18 | df=df)
19 |
20 | def test_logistic_instance():
21 |
22 | for scale, center, random_signs in product(
23 | [True, False],
24 | [True, False],
25 | [True, False]):
26 | logistic_instance(n=10,
27 | p=20,
28 | s=4,
29 | random_signs=random_signs,
30 | scale=scale,
31 | center=center)
32 |
33 | def test_HIV_instance():
34 |
35 | HIV_NRTI()
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/selectinf/tests/tests.py:
--------------------------------------------------------------------------------
1 | from ..algorithms import tests as truncated
2 | from ..distributions import tests as distributions
3 | from ..truncated import tests as truncated
4 | from ..constraints import tests as constraints
5 | from ..sampling import tests as sampling
6 |
--------------------------------------------------------------------------------
/selectinf/truncated/F.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import mpmath as mp
3 |
4 | from .base import truncated
5 |
6 |
7 | def sf_F(d1, d2, scale):
8 |
9 | def sf(a, b=np.inf, dps=15):
10 | dps_temp = mp.mp.dps
11 | mp.mp.dps = dps
12 |
13 | tmp_a = d1*a/d2
14 | tmp_b = d1*b/d2
15 | beta_a = tmp_a / (1. + tmp_a)
16 | beta_b = tmp_b / (1. + tmp_b)
17 | if b == np.inf:
18 | beta_b = 1.
19 | sf = mp.betainc(d1/2., d2/2.,
20 | x1=beta_a, x2=beta_b,
21 | regularized=True)
22 | mp.mp.dps = dps_temp
23 | return sf
24 |
25 | return sf
26 |
27 | def null_f(x):
28 | raise ValueError("Shouldn't be called")
29 | return 0
30 |
31 |
32 | class truncated_F(truncated):
33 | def __init__(self, intervals, d1, d2, scale=1):
34 | self._d1 = d1
35 | self._d2 = d2
36 | self._scale = scale
37 |
38 | truncated.__init__(self,
39 | intervals,
40 | null_f,
41 | null_f,
42 | sf_F(d1, d2, scale),
43 | null_f)
44 |
--------------------------------------------------------------------------------
/selectinf/truncated/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/truncated/__init__.py
--------------------------------------------------------------------------------
/selectinf/truncated/api.py:
--------------------------------------------------------------------------------
1 | from .base import find_root
2 |
3 | from .gaussian import truncated_gaussian
4 | from .chi import truncated_chi, truncated_chi2
5 | from .T import truncated_T
6 | from .F import truncated_F
7 |
--------------------------------------------------------------------------------
/selectinf/truncated/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/truncated/tests/__init__.py
--------------------------------------------------------------------------------
/selectinf/truncated/tests/test_truncated.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import nose.tools as nt
3 | import numpy as np
4 | import numpy.testing.decorators as dec
5 |
6 | from ..gaussian import truncated_gaussian, truncated_gaussian_old
7 | from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
8 | from ...tests.flags import SMALL_SAMPLES, SET_SEED
9 |
10 | intervals = [(-np.inf,-4.),(3.,np.inf)]
11 |
12 | tg = truncated_gaussian(intervals)
13 |
14 | X = np.linspace(-5,5,101)
15 | F = [tg.cdf(x) for x in X]
16 |
17 | def test_sigma():
18 | tg2 = truncated_gaussian_old(intervals, scale=2.)
19 | tg1 = truncated_gaussian_old(np.array(intervals)/2., scale=1.)
20 |
21 | Z = 3.5
22 | nt.assert_equal(np.around(float(tg1.cdf(Z/2.)), 3),
23 | np.around(float(tg2.cdf(Z)), 3))
24 | np.testing.assert_equal(np.around(np.array(2 * tg1.equal_tailed_interval(Z/2,0.05)), 4),
25 | np.around(np.array(tg2.equal_tailed_interval(Z,0.05)), 4))
26 |
27 | @set_seed_iftrue(SET_SEED)
28 | @dec.skipif(True, 'checking coverage: this is random with highish failure rate')
29 | @set_sampling_params_iftrue(SMALL_SAMPLES, nsim=100)
30 | def test_equal_tailed_coverage(nsim=1000):
31 |
32 | alpha = 0.25
33 | tg = truncated_gaussian_old([(2.3,np.inf)], scale=2)
34 | coverage = 0
35 | for i in range(nsim):
36 | while True:
37 | Z = np.random.standard_normal() * 2
38 | if Z > 2.3:
39 | break
40 | L, U = tg.equal_tailed_interval(Z, alpha)
41 | coverage += (U > 0) * (L < 0)
42 | SE = np.sqrt(alpha*(1-alpha)*nsim)
43 | print(coverage)
44 | nt.assert_true(np.fabs(coverage - (1-alpha)*nsim) < 2*SE)
45 |
46 | @set_seed_iftrue(SET_SEED)
47 | @dec.skipif(True, 'really slow')
48 | @set_sampling_params_iftrue(SMALL_SAMPLES, nsim=100)
49 | def test_UMAU_coverage(nsim=1000):
50 |
51 | alpha = 0.25
52 | tg = truncated_gaussian_old([(2.3,np.inf)], scale=2)
53 | coverage = 0
54 | for i in range(nsim):
55 | while True:
56 | Z = np.random.standard_normal()*2
57 | if Z > 2.3:
58 | break
59 | L, U = tg.UMAU_interval(Z, alpha)
60 | coverage += (U > 0) * (L < 0)
61 | SE = np.sqrt(alpha*(1-alpha)*nsim)
62 | print(coverage)
63 | nt.assert_true(np.fabs(coverage - (1-alpha)*nsim) < 2.1*SE)
64 |
--------------------------------------------------------------------------------
/selectinf/truncated/tests/test_truncatedFT.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from scipy.stats import f as fdist, t as tdist
3 |
4 | from ..F import sf_F
5 | from ..T import sf_T
6 |
7 | def test_F():
8 |
9 | f1 = sf_F(3.,20.,1)
10 | f2 = fdist(3.,20.)
11 |
12 | V = np.linspace(1,7,201)
13 | V1 = [float(f1(v)) for v in V]
14 | V2 = f2.sf(V)
15 | np.testing.assert_allclose(V1, V2)
16 |
17 | V = np.linspace(1,7,11)
18 | V1 = [float(f1(u,v)) for u,v in zip(V[:-1],V[1:])]
19 | V2 = [f2.sf(u)-f2.sf(v) for u,v in zip(V[:-1],V[1:])]
20 | np.testing.assert_allclose(V1, V2)
21 |
22 | def test_T():
23 |
24 | f1 = sf_T(20.)
25 | f2 = tdist(20.)
26 |
27 | V = np.linspace(-2,3,201)
28 | V1 = [float(f1(v)) for v in V]
29 | V2 = f2.sf(V)
30 | np.testing.assert_allclose(V1, V2)
31 |
32 | V = np.linspace(-2,3,11)
33 | V1 = [float(f1(u,v)) for u,v in zip(V[:-1],V[1:])]
34 | V2 = [f2.sf(u)-f2.sf(v) for u,v in zip(V[:-1],V[1:])]
35 | np.testing.assert_allclose(V1, V2)
36 |
--------------------------------------------------------------------------------
/selectinf/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/selectinf/utils/__init__.py
--------------------------------------------------------------------------------
/selectinf/utils/tools.py:
--------------------------------------------------------------------------------
1 | import time
2 | from functools import wraps
3 |
4 |
5 | dict_time = dict()
6 |
7 |
8 | def timethis(func):
9 | '''
10 | Decorator that reports the execution time.
11 | '''
12 | dict_time[func.__name__] = (0, 0)
13 |
14 | @wraps(func)
15 | def wrapper(*args, **kwargs):
16 | start = time.time()
17 | result = func(*args, **kwargs)
18 | end = time.time()
19 | #print(func.__name__, end-start)
20 |
21 | k, t = dict_time[func.__name__]
22 | dict_time[func.__name__] = k+1, t + end-start
23 |
24 | return result
25 | return wrapper
26 |
27 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [versioneer]
2 | VCS = git
3 | style = pep440
4 | versionfile_source = selection/_version.py
5 | tag_prefix =
6 | parentdir_prefix = selection-
7 |
--------------------------------------------------------------------------------
/tools/build_modref_templates.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
3 | # vi: set ft=python sts=4 ts=4 sw=4 et:
4 | """Script to auto-generate our API docs.
5 |
6 | This script should run in Python 2 and Python 3
7 | """
8 | # stdlib imports
9 | import os
10 |
11 | # local imports
12 | from apigen import ApiDocWriter
13 |
14 | #*****************************************************************************
15 | if __name__ == '__main__':
16 | package = 'selectinf'
17 | outdir = os.path.join('source', 'api', 'generated')
18 | docwriter = ApiDocWriter(package)
19 | docwriter.package_skip_patterns += [r'\.fixes$',
20 | r'\.externals$',
21 | #r'\.labs\.viz',
22 | ]
23 | docwriter.write_api_docs(outdir)
24 | docwriter.write_index(outdir, 'gen', relative_to=os.path.join('source', 'api'))
25 | print('%d files written' % len(docwriter.written_modules))
26 |
--------------------------------------------------------------------------------
/tools/noseall_with_coverage:
--------------------------------------------------------------------------------
1 | #! /usr/bin/python
2 |
3 | import os
4 |
5 | os.system("""
6 | env USE_TEST_SEED=1 USE_SMALL_SAMPLES=1 nosetests --with-coverage --cover-package=selection --verbose selection
7 | """)
8 |
--------------------------------------------------------------------------------
/tools/strip_notebook.py:
--------------------------------------------------------------------------------
1 | """
2 | simple example script for running notebooks and saving the resulting notebook.
3 |
4 | Usage: `strip_notebook.py` foo.ipynb [bar.ipynb [...]]`
5 |
6 | Each notebook is stripped of its outputs after checking that it executes.
7 | Used to clean notebooks before committing to git.
8 | """
9 |
10 | from selection.utils.nbtools import strip_outputs, reads, writes
11 | from argparse import ArgumentParser
12 |
13 | def main():
14 | parser = ArgumentParser(
15 | description='Run cells in notebook and strip outputs.')
16 | parser.add_argument('--clobber', action='store_true',
17 | help='if set, overwrite existing notebook files with stripped version')
18 | parser.add_argument('--norun', action='store_true',
19 | help='if set, do not run cells before stripping')
20 | parser.add_argument('notebooks',
21 | metavar='NB',
22 | help='Notebooks to strip outputs from.',
23 | nargs='+',
24 | type=str)
25 |
26 | args = parser.parse_args()
27 |
28 | for ipynb in args.notebooks:
29 | print("running and stripping %s" % ipynb)
30 | with open(ipynb) as f:
31 | stripped_nb = strip_outputs(reads(f.read(), 'json'),
32 | run_cells=not args.norun)
33 | if args.clobber:
34 | print('clobbering %s' % ipynb)
35 | with open(ipynb, 'w') as f:
36 | f.write(writes(stripped_nb, 'json'))
37 | else:
38 | print('not clobbering %s' % ipynb)
39 |
40 | if __name__ == '__main__':
41 | main()
42 |
--------------------------------------------------------------------------------
/umpu/UMAU.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/selective-inference/Python-software/e906fbb98946b129eb6713e8956bde7a080181f4/umpu/UMAU.pdf
--------------------------------------------------------------------------------