├── .gitignore
├── .nojekyll
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.md
├── appveyor.yml
├── ci_scripts
    ├── appveyor
    │   └── run_with_env.cmd
    ├── circleci
    │   ├── build_doc.sh
    │   ├── checkout_merge_commit.sh
    │   └── push_doc.sh
    └── travis
    │   ├── install.sh
    │   ├── success.sh
    │   └── test.sh
├── circle.yml
├── doc
    ├── Makefile
    ├── api.rst
    ├── conf.py
    ├── index.rst
    ├── make.bat
    ├── randomized_lasso.rst
    └── stability_selection.rst
├── examples
    ├── README.txt
    ├── plot_randomized_lasso_path.py
    └── plot_stability_scores.py
├── requirements.txt
├── setup.cfg
├── setup.py
└── stability_selection
    ├── __init__.py
    ├── bootstrap.py
    ├── randomized_lasso.py
    ├── stability_selection.py
    └── tests
        ├── __init__.py
        ├── test_common.py
        ├── test_randomized_lasso.py
        ├── test_stability_selection.py
        └── test_stratified_bootstrap.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # scikit-learn specific
10 | doc/_build/
11 | doc/auto_examples/
12 | doc/modules/generated/
13 | doc/datasets/generated/
14 | 
15 | # Distribution / packaging
16 | 
17 | .Python
18 | env/
19 | build/
20 | develop-eggs/
21 | dist/
22 | downloads/
23 | eggs/
24 | .eggs/
25 | lib/
26 | lib64/
27 | parts/
28 | sdist/
29 | var/
30 | *.egg-info/
31 | .installed.cfg
32 | *.egg
33 | 
34 | # PyInstaller
35 | #  Usually these files are written by a python script from a template
36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
37 | *.manifest
38 | *.spec
39 | 
40 | # Installer logs
41 | pip-log.txt
42 | pip-delete-this-directory.txt
43 | 
44 | # Unit test / coverage reports
45 | htmlcov/
46 | .tox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | *,cover
53 | .hypothesis/
54 | 
55 | # Translations
56 | *.mo
57 | *.pot
58 | 
59 | # Django stuff:
60 | *.log
61 | 
62 | # Sphinx documentation
63 | docs/_build/
64 | 
65 | # PyBuilder
66 | target/
67 | 


--------------------------------------------------------------------------------
/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/stability-selection/e6e34da3601cc8215cd0b08d5c5f3a9dd3ccfe01/.nojekyll


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | cache:
 4 |   apt: true
 5 |   # We use three different cache directory
 6 |   # to work around a Travis bug with multi-platform cache
 7 |   directories:
 8 |   - $HOME/.cache/pip
 9 |   - $HOME/download
10 | env:
11 |   global:
12 |     # Directory where tests are run from
13 |     - TEST_DIR=/tmp/test_dir/
14 |     - MODULE=stability_selection
15 |   matrix:
16 |     #- DISTRIB="conda" PYTHON_VERSION="2.7"
17 |     #  NUMPY_VERSION="1.7.1" SCIPY_VERSION="0.11.0" CYTHON_VERSION="0.21"
18 |     - DISTRIB="conda" PYTHON_VERSION="3.5" COVERAGE="true"
19 |       NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.23.4"
20 | 
21 | install: source ci_scripts/travis/install.sh
22 | script: bash ci_scripts/travis/test.sh
23 | after_success: source ci_scripts/travis/success.sh
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2018, Thomas Huijskens.
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | * Neither the name of project-template nor the names of its
15 |   contributors may be used to endorse or promote products derived from
16 |   this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # stability-selection - A scikit-learn compatible implementation of stability selection
  2 | 
  3 | [![Build Status](https://travis-ci.org/scikit-learn-contrib/stability-selection.svg?branch=master)](https://travis-ci.org/scikit-learn-contrib/stability-selection)
  4 | [![Coverage Status](https://coveralls.io/repos/github/scikit-learn-contrib/stability-selection/badge.svg?branch=master)](https://coveralls.io/github/scikit-learn-contrib/stability-selection?branch=master)
  5 | [![CircleCI](https://circleci.com/gh/scikit-learn-contrib/stability-selection.svg?style=svg)](https://circleci.com/gh/scikit-learn-contrib/stability-selection)
  6 | 
  7 | **stability-selection** is a Python implementation of the stability selection feature selection algorithm, first proposed by [Meinshausen and Buhlmann](https://stat.ethz.ch/~nicolai/stability.pdf). 
  8 | 
  9 | The idea behind stability selection is to inject more noise into the original problem by generating bootstrap samples of the data, and to use a base feature selection algorithm (like the LASSO) to find out which features are important in every sampled version of the data. The results on each bootstrap sample are then aggregated to compute a *stability score* for each feature in the data. Features can then be selected by choosing an appropriate threshold for the stability scores.
 10 | 
 11 | ## Installation
 12 | 
 13 | To install the module, clone the repository
 14 | ```bash
 15 | git clone https://github.com/scikit-learn-contrib/stability-selection.git
 16 | ```
 17 | Before installing the module you will need `numpy`, `matplotlib`, and `sklearn`. Install these modules separately, or install using the `requirements.txt` file:
 18 | ```bash
 19 | pip install -r requirements.txt
 20 | ```
 21 | and execute the following in the project directory to install `stability-selection`:
 22 | ```bash
 23 | python setup.py install
 24 | ```
 25 | 
 26 | ## Documentation and algorithmic details
 27 | 
 28 | See the [documentation](https://thuijskens.github.io/stability-selection/docs/index.html) for details on the module, and the accompanying [blog post](https://thuijskens.github.io/2018/07/25/stability-selection/) for details on the algorithmic details.
 29 | 
 30 | ## Example usage
 31 | 
 32 | `stability-selection` implements a class `StabilitySelection`, that takes any scikit-learn compatible estimator that has either a ``feature_importances_`` or ``coef_`` attribute after fitting. Important other parameters are
 33 | 
 34 | - `lambda_name`: the name of the penalization parameter of the base estimator (for example, `C` in the case of `LogisticRegression`).
 35 | - `lambda_grid`: an array of values of the penalization parameter to iterate over.
 36 | 
 37 | After instantiation, the algorithm can be run with the familiar `fit` and `transform` calls.
 38 | 
 39 | ### Basic example
 40 | See below for an example:
 41 | ```python
 42 | import numpy as np
 43 | 
 44 | from sklearn.linear_model import LogisticRegression
 45 | from sklearn.pipeline import Pipeline
 46 | from sklearn.preprocessing import StandardScaler
 47 | from sklearn.utils import check_random_state
 48 | from stability_selection import StabilitySelection
 49 | 
 50 | 
 51 | def _generate_dummy_classification_data(p=1000, n=1000, k=5, random_state=123321):
 52 | 
 53 |     rng = check_random_state(random_state)
 54 | 
 55 |     X = rng.normal(loc=0.0, scale=1.0, size=(n, p))
 56 |     betas = np.zeros(p)
 57 |     important_betas = np.sort(rng.choice(a=np.arange(p), size=k))
 58 |     betas[important_betas] = rng.uniform(size=k)
 59 | 
 60 |     probs = 1 / (1 + np.exp(-1 * np.matmul(X, betas)))
 61 |     y = (probs > 0.5).astype(int)
 62 | 
 63 |     return X, y, important_betas
 64 | 
 65 | ## This is all preparation of the dummy data set
 66 | n, p, k = 500, 1000, 5
 67 | 
 68 | X, y, important_betas = _generate_dummy_classification_data(n=n, k=k)
 69 | base_estimator = Pipeline([
 70 |     ('scaler', StandardScaler()),
 71 |     ('model', LogisticRegression(penalty='l1'))
 72 | ])
 73 | 
 74 | ## Here stability selection is instantiated and run
 75 | selector = StabilitySelection(base_estimator=base_estimator, lambda_name='model__C',
 76 |                               lambda_grid=np.logspace(-5, -1, 50)).fit(X, y)
 77 | 
 78 | print(selector.get_support(indices=True))
 79 | ```
 80 | 
 81 | ### Bootstrapping strategies
 82 | 
 83 | `stability-selection` uses bootstrapping without replacement by default (as proposed in the original paper), but does support different bootstrapping strategies. [Shah and Samworth] proposed *complementary pairs* bootstrapping, where the data set is bootstrapped in pairs, such that the intersection is empty but the union equals the original data set. `StabilitySelection` supports this through the `bootstrap_func` parameter.
 84 | 
 85 | This parameter can be:
 86 | - A string, which must be one of
 87 |     - 'subsample': For subsampling without replacement (default).
 88 |     - 'complementary_pairs': For complementary pairs subsampling [2].
 89 |     - 'stratified': For stratified bootstrapping in imbalanced
 90 |        classification.
 91 | - A function that takes `y`, and a random state
 92 |   as inputs and returns a list of sample indices in the range
 93 |   `(0, len(y)-1)`. 
 94 | 
 95 | For example, the `StabilitySelection` call in the above example can be replaced with 
 96 | ```python
 97 | selector = StabilitySelection(base_estimator=base_estimator,
 98 |                               lambda_name='model__C',
 99 |                               lambda_grid=np.logspace(-5, -1, 50),
100 |                               bootstrap_func='complementary_pairs')
101 | selector.fit(X, y)
102 | ```
103 | to run stability selection with complementary pairs bootstrapping.
104 | 
105 | ## Feedback and contributing
106 | 
107 | Feedback and contributions are much appreciated. If you have any feedback, please post it on the [issue tracker](https://github.com/scikit-learn-contrib/stability-selection/issues). 
108 | 
109 | ## References
110 | 
111 | [1]: Meinshausen, N. and Buhlmann, P., 2010. Stability selection. Journal of the Royal Statistical Society:
112 |     Series B (Statistical Methodology), 72(4), pp.417-473.
113 |     
114 | [2] Shah, R.D. and Samworth, R.J., 2013. Variable selection with
115 |    error control: another look at stability selection. Journal
116 |    of the Royal Statistical Society: Series B (Statistical Methodology),
117 |     75(1), pp.55-80.
118 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | # AppVeyor.com is a Continuous Integration service to build and run tests under
 2 | # Windows
 3 | 
 4 | 
 5 | environment:
 6 |   global:
 7 |     # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the
 8 |     # /E:ON and /V:ON options are not enabled in the batch script interpreter
 9 |     # See: http://stackoverflow.com/a/13751649/163740
10 |     CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\ci_scripts\\appveyor\\run_with_env.cmd"
11 |     WHEELHOUSE_UPLOADER_USERNAME: sklearn
12 |     WHEELHOUSE_UPLOADER_SECRET:
13 |       secure: XzK+Mi6Ba5frV2B/jHq7h4aD8/nox9SsI3T8Kub1L2XNevRSIurUEry3PdWESzRY
14 |     MODULE: skltemplate
15 |     PROJECT_NAME: sklearn-template
16 |     CLOUD_STORAGE: CLOUDFILES
17 |     CLOUD_CONTATINER: sklearn-template-trial
18 | 
19 |   matrix:
20 |     - PYTHON: "C:\\Python27"
21 |       PYTHON_VERSION: "2.7.8"
22 |       PYTHON_ARCH: "32"
23 |       MINICONDA: "C:\\Miniconda"
24 | 
25 |     - PYTHON: "C:\\Python27-x64"
26 |       PYTHON_VERSION: "2.7.8"
27 |       PYTHON_ARCH: "64"
28 |       MINICONDA: "C:\\Miniconda-x64"
29 | 
30 |     - PYTHON: "C:\\Python35"
31 |       PYTHON_VERSION: "3.5.0"
32 |       PYTHON_ARCH: "32"
33 |       MINICONDA: "C:\\Miniconda35"
34 | 
35 |     - PYTHON: "C:\\Python35-x64"
36 |       PYTHON_VERSION: "3.5.0"
37 |       PYTHON_ARCH: "64"
38 |       MINICONDA: "C:\\Miniconda35-x64"
39 | 
40 | install:
41 |   # Miniconda is pre-installed in the worker build
42 |   - "SET PATH=%MINICONDA%;%MINICONDA%\\Scripts;%PATH%"
43 |   - "python -m pip install -U pip"
44 | 
45 |   # Check that we have the expected version and architecture for Python
46 |   - "python --version"
47 |   - "python -c \"import struct; print(struct.calcsize('P') * 8)\""
48 |   - "pip --version"
49 | 
50 |   # Remove cygwin because it clashes with conda
51 |   # see http://help.appveyor.com/discussions/problems/3712-git-remote-https-seems-to-be-broken
52 |   - rmdir C:\\cygwin /s /q
53 | 
54 |   # Install the build and runtime dependencies of the project.
55 |   - conda install --quiet --yes numpy scipy cython nose scikit-learn wheel
56 |   - pip install wheelhouse_uploader nose-timer
57 |   - "%CMD_IN_ENV% python setup.py bdist_wheel bdist_wininst"
58 |   - ps: "ls dist"
59 | 
60 |   # Install the generated wheel package to test it
61 |   - "pip install --pre --no-index --find-links dist %PROJECT_NAME%"
62 | 
63 | 
64 | # Not a .NET project, we build scikit-learn in the install step instead
65 | build: false
66 | 
67 | 
68 | artifacts:
69 |   # Archive the generated wheel package in the ci.appveyor.com build report.
70 |   - path: dist\*
71 | 
72 | 
73 | on_success:
74 |   # Upload the generated wheel package to Rackspace
75 |   # On Windows, Apache Libcloud cannot find a standard CA cert bundle so we
76 |   # disable the ssl checks.
77 |   - "python -m wheelhouse_uploader upload provider=%CLOUD_STORAGE% --no-ssl-check --local-folder=dist %CLOUD_CONTAINER%"
78 | 
79 | 
80 | test_script:
81 |   # Change to a non-source folder to make sure we run the tests on the
82 |   # installed library.
83 |   - "mkdir empty_folder"
84 |   - "cd empty_folder"
85 | 
86 |   - "python -c \"import nose; nose.main()\" --with-timer --timer-top-n 20 -s -v %MODULE%"
87 | 
88 |   # Move back to the project folder
89 |   - "cd .."
90 | 
91 | 
92 | cache:
93 |   # Use the appveyor cache to avoid re-downloading large archives such
94 |   # the MKL numpy and scipy wheels mirrored on a rackspace cloud
95 |   # container, speed up the appveyor jobs and reduce bandwidth
96 |   # usage on our rackspace account.
97 |   - '%APPDATA%\pip\Cache'


--------------------------------------------------------------------------------
/ci_scripts/appveyor/run_with_env.cmd:
--------------------------------------------------------------------------------
 1 | :: To build extensions for 64 bit Python 3, we need to configure environment
 2 | :: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of:
 3 | :: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1)
 4 | ::
 5 | :: To build extensions for 64 bit Python 2, we need to configure environment
 6 | :: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of:
 7 | :: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0)
 8 | ::
 9 | :: 32 bit builds, and 64-bit builds for 3.5 and beyond, do not require specific
10 | :: environment configurations.
11 | ::
12 | :: Note: this script needs to be run with the /E:ON and /V:ON flags for the
13 | :: cmd interpreter, at least for (SDK v7.0)
14 | ::
15 | :: More details at:
16 | :: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows
17 | :: http://stackoverflow.com/a/13751649/163740
18 | ::
19 | :: Author: Olivier Grisel
20 | :: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/
21 | ::
22 | :: Notes about batch files for Python people:
23 | ::
24 | :: Quotes in values are literally part of the values:
25 | ::      SET FOO="bar"
26 | :: FOO is now five characters long: " b a r "
27 | :: If you don't want quotes, don't include them on the right-hand side.
28 | ::
29 | :: The CALL lines at the end of this file look redundant, but if you move them
30 | :: outside of the IF clauses, they do not run properly in the SET_SDK_64==Y
31 | :: case, I don't know why.
32 | @ECHO OFF
33 | 
34 | SET COMMAND_TO_RUN=%*
35 | SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows
36 | SET WIN_WDK=c:\Program Files (x86)\Windows Kits\10\Include\wdf
37 | 
38 | :: Extract the major and minor versions, and allow for the minor version to be
39 | :: more than 9.  This requires the version number to have two dots in it.
40 | SET MAJOR_PYTHON_VERSION=%PYTHON_VERSION:~0,1%
41 | IF "%PYTHON_VERSION:~3,1%" == "." (
42 |     SET MINOR_PYTHON_VERSION=%PYTHON_VERSION:~2,1%
43 | ) ELSE (
44 |     SET MINOR_PYTHON_VERSION=%PYTHON_VERSION:~2,2%
45 | )
46 | 
47 | :: Based on the Python version, determine what SDK version to use, and whether
48 | :: to set the SDK for 64-bit.
49 | IF %MAJOR_PYTHON_VERSION% == 2 (
50 |     SET WINDOWS_SDK_VERSION="v7.0"
51 |     SET SET_SDK_64=Y
52 | ) ELSE (
53 |     IF %MAJOR_PYTHON_VERSION% == 3 (
54 |         SET WINDOWS_SDK_VERSION="v7.1"
55 |         IF %MINOR_PYTHON_VERSION% LEQ 4 (
56 |             SET SET_SDK_64=Y
57 |         ) ELSE (
58 |             SET SET_SDK_64=N
59 |             IF EXIST "%WIN_WDK%" (
60 |                 :: See: https://connect.microsoft.com/VisualStudio/feedback/details/1610302/
61 |                 REN "%WIN_WDK%" 0wdf
62 |             )
63 |         )
64 |     ) ELSE (
65 |         ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%"
66 |         EXIT 1
67 |     )
68 | )
69 | 
70 | IF %PYTHON_ARCH% == 64 (
71 |     IF %SET_SDK_64% == Y (
72 |         ECHO Configuring Windows SDK %WINDOWS_SDK_VERSION% for Python %MAJOR_PYTHON_VERSION% on a 64 bit architecture
73 |         SET DISTUTILS_USE_SDK=1
74 |         SET MSSdk=1
75 |         "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION%
76 |         "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release
77 |         ECHO Executing: %COMMAND_TO_RUN%
78 |         call %COMMAND_TO_RUN% || EXIT 1
79 |     ) ELSE (
80 |         ECHO Using default MSVC build environment for 64 bit architecture
81 |         ECHO Executing: %COMMAND_TO_RUN%
82 |         call %COMMAND_TO_RUN% || EXIT 1
83 |     )
84 | ) ELSE (
85 |     ECHO Using default MSVC build environment for 32 bit architecture
86 |     ECHO Executing: %COMMAND_TO_RUN%
87 |     call %COMMAND_TO_RUN% || EXIT 1
88 | )
89 | 


--------------------------------------------------------------------------------
/ci_scripts/circleci/build_doc.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | set -x
  3 | set -e
  4 | 
  5 | # Decide what kind of documentation build to run, and run it.
  6 | #
  7 | # If the last commit message has a "[doc skip]" marker, do not build
  8 | # the doc. On the contrary if a "[doc build]" marker is found, build the doc
  9 | # instead of relying on the subsequent rules.
 10 | #
 11 | # We always build the documentation for jobs that are not related to a specific
 12 | # PR (e.g. a merge to master or a maintenance branch).
 13 | #
 14 | # If this is a PR, do a full build if there are some files in this PR that are
 15 | # under the "doc/" or "examples/" folders, otherwise perform a quick build.
 16 | #
 17 | # If the inspection of the current commit fails for any reason, the default
 18 | # behavior is to quick build the documentation.
 19 | 
 20 | get_build_type() {
 21 |     if [ -z "$CIRCLE_SHA1" ]
 22 |     then
 23 | 	echo SKIP: undefined CIRCLE_SHA1
 24 | 	return
 25 |     fi
 26 |     commit_msg=$(git log --format=%B -n 1 $CIRCLE_SHA1)
 27 |     if [ -z "$commit_msg" ]
 28 |     then
 29 | 	echo QUICK BUILD: failed to inspect commit $CIRCLE_SHA1
 30 | 	return
 31 |     fi
 32 |     if [[ "$commit_msg" =~ \[doc\ skip\] ]]
 33 |     then
 34 | 	echo SKIP: [doc skip] marker found
 35 | 	return
 36 |     fi
 37 |     if [[ "$commit_msg" =~ \[doc\ quick\] ]]
 38 |     then
 39 | 	echo QUICK: [doc quick] marker found
 40 | 	return
 41 |     fi
 42 |     if [[ "$commit_msg" =~ \[doc\ build\] ]]
 43 |     then
 44 | 	echo BUILD: [doc build] marker found
 45 | 	return
 46 |     fi
 47 |     if [ -z "$CI_PULL_REQUEST" ]
 48 |     then
 49 | 	echo BUILD: not a pull request
 50 | 	return
 51 |     fi
 52 |     git_range="origin/master...$CIRCLE_SHA1"
 53 |     git fetch origin master >&2 || (echo QUICK BUILD: failed to get changed filenames for $git_range; return)
 54 |     filenames=$(git diff --name-only $git_range)
 55 |     if [ -z "$filenames" ]
 56 |     then
 57 | 	echo QUICK BUILD: no changed filenames for $git_range
 58 | 	return
 59 |     fi
 60 |     if echo "$filenames" | grep -q -e ^examples/
 61 |     then
 62 | 	echo BUILD: detected examples/ filename modified in $git_range: $(echo "$filenames" | grep -e ^examples/ | head -n1)
 63 | 	return
 64 |     fi
 65 |     echo QUICK BUILD: no examples/ filename modified in $git_range:
 66 |     echo "$filenames"
 67 | }
 68 | 
 69 | build_type=$(get_build_type)
 70 | if [[ "$build_type" =~ ^SKIP ]]
 71 | then
 72 |     exit 0
 73 | fi
 74 | 
 75 | MAKE_TARGET=html
 76 | 
 77 | # deactivate circleci virtualenv and setup a miniconda env instead
 78 | if [[ `type -t deactivate` ]]; then
 79 |     deactivate
 80 | fi
 81 | 
 82 | # Install dependencies with miniconda
 83 | wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \
 84 |      -O miniconda.sh
 85 | chmod +x miniconda.sh && ./miniconda.sh -b -p $MINICONDA_PATH
 86 | export PATH="$MINICONDA_PATH/bin:$PATH"
 87 | conda update --yes --quiet conda
 88 | 
 89 | # Configure the conda environment and put it in the path using the
 90 | # provided versions
 91 | conda create -n $CONDA_ENV_NAME --yes --quiet python=3
 92 | source activate $CONDA_ENV_NAME
 93 | 
 94 | conda install --yes pip numpy scipy scikit-learn pillow matplotlib sphinx \
 95 |       sphinx_rtd_theme numpydoc
 96 | pip install -U git+https://github.com/sphinx-gallery/sphinx-gallery.git
 97 | 
 98 | # Build and install imbalanced-learn in dev mode
 99 | ls -l
100 | pip install -e .
101 | 
102 | # The pipefail is requested to propagate exit code
103 | set -o pipefail && cd doc && make $MAKE_TARGET 2>&1 | tee ~/log.txt
104 | 
105 | cd -
106 | set +o pipefail
107 | 


--------------------------------------------------------------------------------
/ci_scripts/circleci/checkout_merge_commit.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Add `master` branch to the update list.
 4 | # Otherwise CircleCI will give us a cached one.
 5 | FETCH_REFS="+master:master"
 6 | 
 7 | # Update PR refs for testing.
 8 | if [[ -n "${CIRCLE_PR_NUMBER}" ]]
 9 | then
10 |     FETCH_REFS="${FETCH_REFS} +refs/pull/${CIRCLE_PR_NUMBER}/head:pr/${CIRCLE_PR_NUMBER}/head"
11 |     FETCH_REFS="${FETCH_REFS} +refs/pull/${CIRCLE_PR_NUMBER}/merge:pr/${CIRCLE_PR_NUMBER}/merge"
12 | fi
13 | 
14 | # Retrieve the refs.
15 | git fetch -u origin ${FETCH_REFS}
16 | 
17 | # Checkout the PR merge ref.
18 | if [[ -n "${CIRCLE_PR_NUMBER}" ]]
19 | then
20 |     git checkout -qf "pr/${CIRCLE_PR_NUMBER}/merge" || (
21 |         echo Could not fetch merge commit. >&2
22 |         echo There may be conflicts in merging PR \#${CIRCLE_PR_NUMBER} with master. >&2;
23 |         exit 1)
24 | fi
25 | 
26 | # Check for merge conflicts.
27 | if [[ -n "${CIRCLE_PR_NUMBER}" ]]
28 | then
29 |     git branch --merged | grep master > /dev/null
30 |     git branch --merged | grep "pr/${CIRCLE_PR_NUMBER}/head" > /dev/null
31 | fi
32 | 


--------------------------------------------------------------------------------
/ci_scripts/circleci/push_doc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # This script is meant to be called in the "deploy" step defined in
 3 | # circle.yml. See https://circleci.com/docs/ for more details.
 4 | # The behavior of the script is controlled by environment variable defined
 5 | # in the circle.yml in the top level folder of the project.
 6 | 
 7 | GENERATED_DOC_DIR=$1
 8 | 
 9 | if [[ -z "$GENERATED_DOC_DIR" ]]; then
10 |     echo "Need to pass directory of the generated doc as argument"
11 |     echo "Usage: $0 <generated_doc_dir>"
12 |     exit 1
13 | fi
14 | 
15 | # Absolute path needed because we use cd further down in this script
16 | GENERATED_DOC_DIR=$(readlink -f $GENERATED_DOC_DIR)
17 | 
18 | if [ "$CIRCLE_BRANCH" = "master" ]
19 | then
20 |     dir=docs  # NOTE: I needed to change this from dev to docs for gh-pages to work
21 | else
22 |     # Strip off .X
23 |     dir="${CIRCLE_BRANCH::-2}"
24 | fi
25 | 
26 | MSG="Pushing the docs to $dir/ for branch: $CIRCLE_BRANCH, commit $CIRCLE_SHA1"
27 | 
28 | cd $HOME
29 | if [ ! -d $DOC_REPO ];
30 | then git clone --depth 1 --no-checkout -b gh-pages "git@github.com:"$USERNAME"/"$DOC_REPO".git";
31 | fi
32 | cd $DOC_REPO
33 | git config core.sparseCheckout true
34 | echo $dir > .git/info/sparse-checkout
35 | git checkout gh-pages
36 | git reset --hard origin/gh-pages
37 | git rm -rf $dir/ && rm -rf $dir/
38 | cp -R $GENERATED_DOC_DIR $dir
39 | touch $dir/.nojekyll
40 | git config --global user.email $EMAIL
41 | git config --global user.name $USERNAME
42 | git config --global push.default matching
43 | git add -f $dir/
44 | git commit -m "$MSG" $dir
45 | git push origin gh-pages
46 | 
47 | echo $MSG
48 | 


--------------------------------------------------------------------------------
/ci_scripts/travis/install.sh:
--------------------------------------------------------------------------------
 1 | # Deactivate the travis-provided virtual environment and setup a
 2 | # conda-based environment instead
 3 | deactivate
 4 | 
 5 | # Use the miniconda installer for faster download / install of conda
 6 | # itself
 7 | pushd .
 8 | cd
 9 | mkdir -p download
10 | cd download
11 | echo "Cached in $HOME/download :"
12 | ls -l
13 | echo
14 | if [[ ! -f miniconda.sh ]]
15 |    then
16 |    wget http://repo.continuum.io/miniconda/Miniconda-3.6.0-Linux-x86_64.sh \
17 |        -O miniconda.sh
18 |    fi
19 | chmod +x miniconda.sh && ./miniconda.sh -b
20 | cd ..
21 | export PATH=/home/travis/miniconda/bin:$PATH
22 | conda update --yes conda
23 | popd
24 | 
25 | # Configure the conda environment and put it in the path using the
26 | # provided versions
27 | conda create -n testenv --yes python=$PYTHON_VERSION pip nose \
28 |       numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION cython=$CYTHON_VERSION
29 | 
30 | source activate testenv
31 | 
32 | 
33 | if [[ "$COVERAGE" == "true" ]]; then
34 |     pip install coverage coveralls
35 | fi
36 | 
37 | python --version
38 | python -c "import numpy; print('numpy %s' % numpy.__version__)"
39 | python -c "import scipy; print('scipy %s' % scipy.__version__)"
40 | python setup.py develop
41 | 


--------------------------------------------------------------------------------
/ci_scripts/travis/success.sh:
--------------------------------------------------------------------------------
 1 | set -e
 2 | 
 3 | if [[ "$COVERAGE" == "true" ]]; then
 4 |     # Need to run coveralls from a git checkout, so we copy .coverage
 5 |     # from TEST_DIR where nosetests has been run
 6 |     cp $TEST_DIR/.coverage $TRAVIS_BUILD_DIR
 7 |     cd $TRAVIS_BUILD_DIR
 8 |     # Ignore coveralls failures as the coveralls server is not
 9 |     # very reliable but we don't want travis to report a failure
10 |     # in the github UI just because the coverage report failed to
11 |     # be published.
12 |     coveralls || echo "Coveralls upload failed"
13 | fi


--------------------------------------------------------------------------------
/ci_scripts/travis/test.sh:
--------------------------------------------------------------------------------
 1 | set -e
 2 | 
 3 | # Get into a temp directory to run test from the installed scikit learn and
 4 | # check if we do not leave artifacts
 5 | mkdir -p $TEST_DIR
 6 | 
 7 | cd $TEST_DIR
 8 | 
 9 | if [[ "$COVERAGE" == "true" ]]; then
10 |     nosetests -s --with-coverage --cover-package=$MODULE $MODULE
11 | else
12 |     nosetests -s $MODULE
13 | fi
14 | 


--------------------------------------------------------------------------------
/circle.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | jobs:
 3 |   build:
 4 |     docker:
 5 |       - image: circleci/python:3.6.1
 6 |     environment:
 7 |       - USERNAME: "thuijskens"
 8 |       - DOC_REPO: "stability-selection"
 9 |       - DOC_URL: "docs"
10 |       - EMAIL: "thomas_huijskens@hotmail.com"
11 |       - MINICONDA_PATH: ~/miniconda
12 |       - CONDA_ENV_NAME: testenv
13 |       - PYTHON_VERSION: 3
14 | 
15 |     steps:
16 |       - checkout
17 |       - run: ./ci_scripts/circleci/checkout_merge_commit.sh
18 |       - run: ./ci_scripts/circleci/build_doc.sh
19 |       - store_artifacts:
20 |           path: doc/_build/html
21 |           destination: doc
22 |       - store_artifacts:
23 |           path: ~/log.txt
24 |       - persist_to_workspace:
25 |           root: doc/_build/html
26 |           paths: .
27 |       - attach_workspace:
28 |           at: doc/_build/html
29 |       - run: ls -ltrh doc/_build/html
30 |       - deploy:
31 |           command: |
32 |             if [[ "${CIRCLE_BRANCH}" =~ ^master$|^[0-9]+\.[0-9]+\.X$ ]]; then
33 |               bash ./ci_scripts/circleci/push_doc.sh doc/_build/html
34 |             fi
35 |     filters:
36 |       branches:
37 |         ignore:
38 |            - gh-pages
39 | 
40 | 
41 | workflows:
42 |   version: 2
43 |   build-doc-and-deploy:
44 |     jobs:
45 |       - build
46 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 
 49 | clean:
 50 | 	-rm -rf $(BUILDDIR)/*
 51 | 	-rm -rf auto_examples/
 52 | 	-rm -rf generated/*
 53 | 	-rm -rf modules/generated/*
 54 | 
 55 | html:
 56 | 	# These two lines make the build a bit more lengthy, and the
 57 | 	# the embedding of images more robust
 58 | 	rm -rf $(BUILDDIR)/html/_images
 59 | 	#rm -rf _build/doctrees/
 60 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 61 | 	@echo
 62 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 63 | 
 64 | dirhtml:
 65 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 66 | 	@echo
 67 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 68 | 
 69 | singlehtml:
 70 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 71 | 	@echo
 72 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 73 | 
 74 | pickle:
 75 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 76 | 	@echo
 77 | 	@echo "Build finished; now you can process the pickle files."
 78 | 
 79 | json:
 80 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 81 | 	@echo
 82 | 	@echo "Build finished; now you can process the JSON files."
 83 | 
 84 | htmlhelp:
 85 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 86 | 	@echo
 87 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 88 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 89 | 
 90 | qthelp:
 91 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 92 | 	@echo
 93 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 94 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 95 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/project-template.qhcp"
 96 | 	@echo "To view the help file:"
 97 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/project-template.qhc"
 98 | 
 99 | devhelp:
100 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
101 | 	@echo
102 | 	@echo "Build finished."
103 | 	@echo "To view the help file:"
104 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/project-template"
105 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/project-template"
106 | 	@echo "# devhelp"
107 | 
108 | epub:
109 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
110 | 	@echo
111 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
112 | 
113 | latex:
114 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
115 | 	@echo
116 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
117 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
118 | 	      "(use \`make latexpdf' here to do that automatically)."
119 | 
120 | latexpdf:
121 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
122 | 	@echo "Running LaTeX files through pdflatex..."
123 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
124 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
125 | 
126 | latexpdfja:
127 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
128 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
129 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
130 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
131 | 
132 | text:
133 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
134 | 	@echo
135 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
136 | 
137 | man:
138 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
139 | 	@echo
140 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
141 | 
142 | texinfo:
143 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
144 | 	@echo
145 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
146 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
147 | 	      "(use \`make info' here to do that automatically)."
148 | 
149 | info:
150 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
151 | 	@echo "Running Texinfo files through makeinfo..."
152 | 	make -C $(BUILDDIR)/texinfo info
153 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
154 | 
155 | gettext:
156 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
157 | 	@echo
158 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
159 | 
160 | changes:
161 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
162 | 	@echo
163 | 	@echo "The overview file is in $(BUILDDIR)/changes."
164 | 
165 | linkcheck:
166 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
167 | 	@echo
168 | 	@echo "Link check complete; look for any errors in the above output " \
169 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
170 | 
171 | doctest:
172 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
173 | 	@echo "Testing of doctests in the sources finished, look at the " \
174 | 	      "results in $(BUILDDIR)/doctest/output.txt."
175 | 
176 | xml:
177 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
178 | 	@echo
179 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
180 | 
181 | pseudoxml:
182 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
183 | 	@echo
184 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
185 | 


--------------------------------------------------------------------------------
/doc/api.rst:
--------------------------------------------------------------------------------
1 | API Documentation
2 | =================
3 | 
4 | * :doc:`stability_selection`
5 | * :doc:`randomized_lasso`


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # project-template documentation build configuration file, created by
  4 | # sphinx-quickstart on Mon Jan 18 14:44:12 2016.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import sys
 16 | import os
 17 | 
 18 | import sphinx_rtd_theme
 19 | 
 20 | # If extensions (or modules to document with autodoc) are in another directory,
 21 | # add these directories to sys.path here. If the directory is relative to the
 22 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 23 | #sys.path.insert(0, os.path.abspath('.'))
 24 | 
 25 | # -- General configuration ---------------------------------------------------
 26 | 
 27 | # Try to override the matplotlib configuration as early as possible
 28 | try:
 29 |     import gen_rst
 30 | except:
 31 |     pass
 32 | # -- General configuration ------------------------------------------------
 33 | 
 34 | # If your documentation needs a minimal Sphinx version, state it here.
 35 | #needs_sphinx = '1.0'
 36 | 
 37 | # Add any Sphinx extension module names here, as strings. They can be
 38 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 39 | # ones.
 40 | extensions = [
 41 |     'sphinx.ext.autodoc',
 42 |     'sphinx.ext.autosummary',
 43 |     'sphinx.ext.doctest',
 44 |     'sphinx.ext.intersphinx',
 45 |     'sphinx.ext.todo',
 46 |     'numpydoc',
 47 |     'sphinx.ext.ifconfig',
 48 |     'sphinx.ext.viewcode',
 49 |     'sphinx_gallery.gen_gallery',
 50 |     'sphinx.ext.mathjax'
 51 | ]
 52 | 
 53 | numpydoc_show_class_members = False
 54 | 
 55 | sphinx_gallery_conf = {
 56 |     # path to your examples scripts
 57 |     'examples_dirs' : '../examples',
 58 |     # path where to save gallery generated examples
 59 |     'gallery_dirs' : 'auto_examples',
 60 |     'backreferences_dir': os.path.join('generated'),
 61 | }
 62 | 
 63 | # Add any paths that contain templates here, relative to this directory.
 64 | templates_path = ['_templates']
 65 | 
 66 | # The suffix of source filenames.
 67 | source_suffix = '.rst'
 68 | 
 69 | # The encoding of source files.
 70 | #source_encoding = 'utf-8-sig'
 71 | 
 72 | # Generate the plots for the gallery
 73 | plot_gallery = True
 74 | 
 75 | # The master toctree document.
 76 | master_doc = 'index'
 77 | 
 78 | # General information about the project.
 79 | project = u'stability-selection'
 80 | copyright = u'2018, Thomas Huijskens'
 81 | 
 82 | # The version info for the project you're documenting, acts as replacement for
 83 | # |version| and |release|, also used in various other places throughout the
 84 | # built documents.
 85 | #
 86 | # The short X.Y version.
 87 | version = '0.1'
 88 | # The full version, including alpha/beta/rc tags.
 89 | release = '0.1.0'
 90 | 
 91 | # The language for content autogenerated by Sphinx. Refer to documentation
 92 | # for a list of supported languages.
 93 | #language = None
 94 | 
 95 | # There are two options for replacing |today|: either, you set today to some
 96 | # non-false value, then it is used:
 97 | #today = ''
 98 | # Else, today_fmt is used as the format for a strftime call.
 99 | #today_fmt = '%B %d, %Y'
100 | 
101 | # List of patterns, relative to source directory, that match files and
102 | # directories to ignore when looking for source files.
103 | exclude_patterns = ['_build']
104 | 
105 | # The reST default role (used for this markup: `text`) to use for all
106 | # documents.
107 | #default_role = None
108 | 
109 | # If true, '()' will be appended to :func: etc. cross-reference text.
110 | #add_function_parentheses = True
111 | 
112 | # If true, the current module name will be prepended to all description
113 | # unit titles (such as .. function::).
114 | #add_module_names = True
115 | 
116 | # If true, sectionauthor and moduleauthor directives will be shown in the
117 | # output. They are ignored by default.
118 | #show_authors = False
119 | 
120 | # The name of the Pygments (syntax highlighting) style to use.
121 | pygments_style = 'sphinx'
122 | 
123 | # A list of ignored prefixes for module index sorting.
124 | #modindex_common_prefix = []
125 | 
126 | # If true, keep warnings as "system message" paragraphs in the built documents.
127 | #keep_warnings = False
128 | 
129 | 
130 | # -- Options for HTML output ----------------------------------------------
131 | 
132 | # The theme to use for HTML and HTML Help pages.  See the documentation for
133 | # a list of builtin themes.
134 | html_theme = 'sphinx_rtd_theme'
135 | 
136 | # Theme options are theme-specific and customize the look and feel of a theme
137 | # further.  For a list of options available for each theme, see the
138 | # documentation.
139 | #html_theme_options = {}
140 | 
141 | # Add any paths that contain custom themes here, relative to this directory.
142 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
143 | 
144 | # The name for this set of Sphinx documents.  If None, it defaults to
145 | # "<project> v<release> documentation".
146 | #html_title = None
147 | 
148 | # A shorter title for the navigation bar.  Default is the same as html_title.
149 | #html_short_title = None
150 | 
151 | # The name of an image file (relative to this directory) to place at the top
152 | # of the sidebar.
153 | #html_logo = None
154 | 
155 | # The name of an image file (within the static path) to use as favicon of the
156 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
157 | # pixels large.
158 | #html_favicon = None
159 | 
160 | # Add any paths that contain custom static files (such as style sheets) here,
161 | # relative to this directory. They are copied after the builtin static files,
162 | # so a file named "default.css" will overwrite the builtin "default.css".
163 | html_static_path = ['_static']
164 | 
165 | # Add any extra paths that contain custom files (such as robots.txt or
166 | # .htaccess) here, relative to this directory. These files are copied
167 | # directly to the root of the documentation.
168 | #html_extra_path = []
169 | 
170 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
171 | # using the given strftime format.
172 | #html_last_updated_fmt = '%b %d, %Y'
173 | 
174 | # If true, SmartyPants will be used to convert quotes and dashes to
175 | # typographically correct entities.
176 | #html_use_smartypants = True
177 | 
178 | # Custom sidebar templates, maps document names to template names.
179 | #html_sidebars = {}
180 | 
181 | # Additional templates that should be rendered to pages, maps page names to
182 | # template names.
183 | #html_additional_pages = {}
184 | 
185 | # If false, no module index is generated.
186 | #html_domain_indices = True
187 | 
188 | # If false, no index is generated.
189 | #html_use_index = True
190 | 
191 | # If true, the index is split into individual pages for each letter.
192 | #html_split_index = False
193 | 
194 | # If true, links to the reST sources are added to the pages.
195 | #html_show_sourcelink = True
196 | 
197 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
198 | #html_show_sphinx = True
199 | 
200 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
201 | #html_show_copyright = True
202 | 
203 | # If true, an OpenSearch description file will be output, and all pages will
204 | # contain a <link> tag referring to it.  The value of this option must be the
205 | # base URL from which the finished HTML is served.
206 | #html_use_opensearch = ''
207 | 
208 | # This is the file name suffix for HTML files (e.g. ".xhtml").
209 | #html_file_suffix = None
210 | 
211 | # Output file base name for HTML help builder.
212 | htmlhelp_basename = 'project-templatedoc'
213 | 
214 | 
215 | # -- Options for LaTeX output ---------------------------------------------
216 | 
217 | latex_elements = {
218 | # The paper size ('letterpaper' or 'a4paper').
219 | #'papersize': 'letterpaper',
220 | 
221 | # The font size ('10pt', '11pt' or '12pt').
222 | #'pointsize': '10pt',
223 | 
224 | # Additional stuff for the LaTeX preamble.
225 | #'preamble': '',
226 | }
227 | 
228 | # Grouping the document tree into LaTeX files. List of tuples
229 | # (source start file, target name, title,
230 | #  author, documentclass [howto, manual, or own class]).
231 | latex_documents = [
232 |   ('index', 'stability-selection.tex', u'stability-selection Documentation',
233 |    u'Thomas Huijskens', 'manual'),
234 | ]
235 | 
236 | # The name of an image file (relative to this directory) to place at the top of
237 | # the title page.
238 | #latex_logo = None
239 | 
240 | # For "manual" documents, if this is true, then toplevel headings are parts,
241 | # not chapters.
242 | #latex_use_parts = False
243 | 
244 | # If true, show page references after internal links.
245 | #latex_show_pagerefs = False
246 | 
247 | # If true, show URL addresses after external links.
248 | #latex_show_urls = False
249 | 
250 | # Documents to append as an appendix to all manuals.
251 | #latex_appendices = []
252 | 
253 | # If false, no module index is generated.
254 | #latex_domain_indices = True
255 | 
256 | 
257 | # -- Options for manual page output ---------------------------------------
258 | 
259 | # One entry per manual page. List of tuples
260 | # (source start file, name, description, authors, manual section).
261 | man_pages = [
262 |     ('index', 'stability-selection', u'stability-selection Documentation',
263 |      [u'Thomas Huijskens'], 1)
264 | ]
265 | 
266 | # If true, show URL addresses after external links.
267 | #man_show_urls = False
268 | 
269 | 
270 | # -- Options for Texinfo output -------------------------------------------
271 | 
272 | # Grouping the document tree into Texinfo files. List of tuples
273 | # (source start file, target name, title, author,
274 | #  dir menu entry, description, category)
275 | texinfo_documents = [
276 |   ('index', 'stability-selection', u'stability-selection Documentation',
277 |    u'Thomas Huijskens', 'stability-selection', 'scikit-learn compatible implementation of stability selection.',
278 |    'Miscellaneous'),
279 | ]
280 | 
281 | # def generate_example_rst(app, what, name, obj, options, lines):
282 | #     # generate empty examples files, so that we don't get
283 | #     # inclusion errors if there are no examples for a class / module
284 | #     examples_path = os.path.join(app.srcdir, "modules", "generated",
285 | #                                  "%s.examples" % name)
286 | #     if not os.path.exists(examples_path):
287 | #         # touch file
288 | #         open(examples_path, 'w').close()
289 | #
290 | #
291 | # def setup(app):
292 | #     app.connect('autodoc-process-docstring', generate_example_rst)
293 | 
294 | # Documents to append as an appendix to all manuals.
295 | #texinfo_appendices = []
296 | 
297 | # If false, no module index is generated.
298 | #texinfo_domain_indices = True
299 | 
300 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
301 | #texinfo_show_urls = 'footnote'
302 | 
303 | # If true, do not generate a @detailmenu in the "Top" node's menu.
304 | #texinfo_no_detailmenu = False
305 | 
306 | 
307 | # Example configuration for intersphinx: refer to the Python standard library.
308 | intersphinx_mapping = {'http://docs.python.org/': None}
309 | 


--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
 1 | .. project-template documentation master file, created by
 2 |    sphinx-quickstart on Mon Jan 18 14:44:12 2016.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to stability-selection's documentation!
 7 | ===============================================
 8 | 
 9 | This project contains an implementation of the stability selection algorithm.
10 | 
11 | Stability selection is a technique that aims to enhance and improve existing feature
12 | selection algorithms. For a generic feature selection algorithm, we have a tuning
13 | parameter :math:`\lambda \in \Lambda` that controls the amount of regularisation. Examples
14 | of such algorithms are:
15 | 
16 | 1. :math:`\ell_1`-penalized regression (penalization parameter :math:`\lambda`).
17 | 2. Orthogonal matching pursuit (number of steps in forward selection).
18 | 3. Boosting (:math:`\ell_1` penalty)
19 | 
20 | These structure learning algorithms have in common is a parameter :math:`\lambda \in \Lambda`
21 | that controls the amount of regularisation. For every value of :math:`\lambda`, we obtain a structure
22 | estimate :math:`S^\lambda = \{1, \ldots, p\}`, which indicates which variables to select. We are
23 | interested to determine whether there exists a :math:`\lambda` such that :math:`S^\lambda` is identical to
24 | :math:`S` with high probability, and how to achieve the right amount of regularisation.
25 | 
26 | 
27 | Sability selection works as follows:
28 | 
29 | 1. Define a candidate set of regularization parameters :math:`\Lambda` and a subsample number :math:`N`.
30 | 2. For each value :math:`\lambda \in \Lambda` do:
31 | 
32 |     a. For each :math:`i` in :math:`\{1, \ldots, N\}`, do:
33 | 
34 |         i. Generate a bootstrap sample of the original data :math:`X^{n \times p}` of size :math:`\frac{n}{2}`.
35 |         ii. Run the selection algorithm (LASSO) on the bootstrap sample with regularization parameter :math:`\lambda`.
36 | 
37 |     b. Given the selection sets from each subsample, calculate the empirical selection probability for each model component:
38 | 
39 |        :math:`\hat{\Pi}^\lambda_k = \mathbb{P}[k \in \hat{S}^\lambda] = \frac{1}{N} \sum_{i = 1}^N \mathbb{I}_{\{k \in \hat{S}_i^\lambda\}}.`
40 | 
41 |     c. The selection probability for component :math:`k` is its probability of being selected by the algorithm.
42 | 
43 | 3. Given the selection probabilities for each component and for each value of :math:`\lambda`, construct the
44 |    stable set according to the following definition:
45 | 
46 |    :math:`\hat{S}^{\text{stable}} = \{k : \max_{\lambda \in \Lambda} \hat{\Pi}_k^\lambda \geq \pi_\text{thr}\}.`
47 | 
48 |    where :math:`\pi_\text{thr}` is a predefined threshold.
49 | 
50 | This algorithm identifies a set of “stable” variables that are selected with high probability.
51 | 
52 | 
53 |     .. toctree::
54 |        :maxdepth: 2
55 |        
56 |        api
57 |        stability_selection
58 |        randomized_lasso
59 |        auto_examples/index
60 |        ...
61 | 
62 | See the `README <https://github.com/thuijskens/stability-selection/blob/master/README.md>`_
63 | for more information.
64 | 
65 | 
66 | Indices and tables
67 | ==================
68 | 
69 | * :ref:`genindex`
70 | * :ref:`modindex`
71 | * :ref:`search`
72 | 
73 | 


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  xml        to make Docutils-native XML files
 37 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 38 | 	echo.  linkcheck  to check all external links for integrity
 39 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 40 | 	goto end
 41 | )
 42 | 
 43 | if "%1" == "clean" (
 44 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 45 | 	del /q /s %BUILDDIR%\*
 46 | 	goto end
 47 | )
 48 | 
 49 | 
 50 | %SPHINXBUILD% 2> nul
 51 | if errorlevel 9009 (
 52 | 	echo.
 53 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 54 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 55 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 56 | 	echo.may add the Sphinx directory to PATH.
 57 | 	echo.
 58 | 	echo.If you don't have Sphinx installed, grab it from
 59 | 	echo.http://sphinx-doc.org/
 60 | 	exit /b 1
 61 | )
 62 | 
 63 | if "%1" == "html" (
 64 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "dirhtml" (
 72 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "singlehtml" (
 80 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "pickle" (
 88 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can process the pickle files.
 92 | 	goto end
 93 | )
 94 | 
 95 | if "%1" == "json" (
 96 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 97 | 	if errorlevel 1 exit /b 1
 98 | 	echo.
 99 | 	echo.Build finished; now you can process the JSON files.
100 | 	goto end
101 | )
102 | 
103 | if "%1" == "htmlhelp" (
104 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
105 | 	if errorlevel 1 exit /b 1
106 | 	echo.
107 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
108 | .hhp project file in %BUILDDIR%/htmlhelp.
109 | 	goto end
110 | )
111 | 
112 | if "%1" == "qthelp" (
113 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
114 | 	if errorlevel 1 exit /b 1
115 | 	echo.
116 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
117 | .qhcp project file in %BUILDDIR%/qthelp, like this:
118 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\project-template.qhcp
119 | 	echo.To view the help file:
120 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\project-template.ghc
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "devhelp" (
125 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished.
129 | 	goto end
130 | )
131 | 
132 | if "%1" == "epub" (
133 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
134 | 	if errorlevel 1 exit /b 1
135 | 	echo.
136 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
137 | 	goto end
138 | )
139 | 
140 | if "%1" == "latex" (
141 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
142 | 	if errorlevel 1 exit /b 1
143 | 	echo.
144 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
145 | 	goto end
146 | )
147 | 
148 | if "%1" == "latexpdf" (
149 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
150 | 	cd %BUILDDIR%/latex
151 | 	make all-pdf
152 | 	cd %BUILDDIR%/..
153 | 	echo.
154 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
155 | 	goto end
156 | )
157 | 
158 | if "%1" == "latexpdfja" (
159 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
160 | 	cd %BUILDDIR%/latex
161 | 	make all-pdf-ja
162 | 	cd %BUILDDIR%/..
163 | 	echo.
164 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
165 | 	goto end
166 | )
167 | 
168 | if "%1" == "text" (
169 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
170 | 	if errorlevel 1 exit /b 1
171 | 	echo.
172 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
173 | 	goto end
174 | )
175 | 
176 | if "%1" == "man" (
177 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
178 | 	if errorlevel 1 exit /b 1
179 | 	echo.
180 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
181 | 	goto end
182 | )
183 | 
184 | if "%1" == "texinfo" (
185 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
186 | 	if errorlevel 1 exit /b 1
187 | 	echo.
188 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
189 | 	goto end
190 | )
191 | 
192 | if "%1" == "gettext" (
193 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
194 | 	if errorlevel 1 exit /b 1
195 | 	echo.
196 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
197 | 	goto end
198 | )
199 | 
200 | if "%1" == "changes" (
201 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
202 | 	if errorlevel 1 exit /b 1
203 | 	echo.
204 | 	echo.The overview file is in %BUILDDIR%/changes.
205 | 	goto end
206 | )
207 | 
208 | if "%1" == "linkcheck" (
209 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
210 | 	if errorlevel 1 exit /b 1
211 | 	echo.
212 | 	echo.Link check complete; look for any errors in the above output ^
213 | or in %BUILDDIR%/linkcheck/output.txt.
214 | 	goto end
215 | )
216 | 
217 | if "%1" == "doctest" (
218 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
219 | 	if errorlevel 1 exit /b 1
220 | 	echo.
221 | 	echo.Testing of doctests in the sources finished, look at the ^
222 | results in %BUILDDIR%/doctest/output.txt.
223 | 	goto end
224 | )
225 | 
226 | if "%1" == "xml" (
227 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
228 | 	if errorlevel 1 exit /b 1
229 | 	echo.
230 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
231 | 	goto end
232 | )
233 | 
234 | if "%1" == "pseudoxml" (
235 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
236 | 	if errorlevel 1 exit /b 1
237 | 	echo.
238 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
239 | 	goto end
240 | )
241 | 
242 | :end
243 | 


--------------------------------------------------------------------------------
/doc/randomized_lasso.rst:
--------------------------------------------------------------------------------
1 | Randomized LASSO
2 | ================
3 | 
4 | The documentation of the randomized_lasso module.
5 | 
6 | .. automodule:: stability_selection.randomized_lasso
7 |    :members:


--------------------------------------------------------------------------------
/doc/stability_selection.rst:
--------------------------------------------------------------------------------
1 | Stability selection
2 | ===================
3 | 
4 | The documentation of the stability_selection module.
5 | 
6 | .. automodule:: stability_selection.stability_selection
7 |    :members:


--------------------------------------------------------------------------------
/examples/README.txt:
--------------------------------------------------------------------------------
1 | .. _general_examples:
2 | 
3 | General examples
4 | ================
5 | 
6 | Introductory examples.
7 | 


--------------------------------------------------------------------------------
/examples/plot_randomized_lasso_path.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================
 3 | Randomized LASSO example
 4 | ===========================
 5 | 
 6 | An example plot of the stability scores for each variable after fitting :class:`stability_selection.StabilitySelection`
 7 | with :class:`stability_selection.RandomizedLasso`
 8 | """
 9 | 
10 | import numpy as np
11 | 
12 | from sklearn.utils import check_random_state
13 | from stability_selection import StabilitySelection, RandomizedLasso, plot_stability_path
14 | 
15 | 
16 | def generate_experiment_data(n=200, p=200, rho=0.6, random_state=3245):
17 |     rng = check_random_state(random_state)
18 | 
19 |     sigma = np.eye(p)
20 |     sigma[0, 2] = rho
21 |     sigma[2, 0] = rho
22 |     sigma[1, 2] = rho
23 |     sigma[2, 1] = rho
24 | 
25 |     X = rng.multivariate_normal(mean=np.zeros(p), cov=sigma, size=(n,))
26 |     beta = np.zeros(p)
27 |     beta[:2] = 1.0
28 |     epsilon = rng.normal(0.0, 0.25, size=(n,))
29 | 
30 |     y = np.matmul(X, beta) + epsilon
31 | 
32 |     return X, y
33 | 
34 | 
35 | if __name__ == '__main__':
36 |     n, p = 200, 200
37 |     rho = 0.6
38 | 
39 |     X, y = generate_experiment_data()
40 |     lambda_grid = np.linspace(0.001, 0.5, num=100)
41 | 
42 |     for weakness in [0.2, 0.5, 1.0]:
43 |         estimator = RandomizedLasso(weakness=weakness)
44 |         selector = StabilitySelection(base_estimator=estimator, lambda_name='alpha',
45 |                                       lambda_grid=lambda_grid, threshold=0.9, verbose=1)
46 |         selector.fit(X, y)
47 | 
48 |         fig, ax = plot_stability_path(selector)
49 |         fig.show()
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/examples/plot_stability_scores.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================
 3 | Plotting stability scores
 4 | ===========================
 5 | 
 6 | An example plot of the stability scores for each variable after fitting :class:`stability_selection.stability_selection.StabilitySelection`
 7 | """
 8 | import numpy as np
 9 | 
10 | from sklearn.linear_model import LogisticRegression
11 | from sklearn.pipeline import Pipeline
12 | from sklearn.preprocessing import StandardScaler
13 | from sklearn.utils import check_random_state
14 | from stability_selection import StabilitySelection, plot_stability_path
15 | 
16 | 
17 | def _generate_dummy_classification_data(p=1000, n=1000, k=5, random_state=123321):
18 | 
19 |     rng = check_random_state(random_state)
20 | 
21 |     X = rng.normal(loc=0.0, scale=1.0, size=(n, p))
22 |     betas = np.zeros(p)
23 |     important_betas = np.sort(rng.choice(a=np.arange(p), size=k))
24 |     betas[important_betas] = rng.uniform(size=k)
25 | 
26 |     probs = 1 / (1 + np.exp(-1 * np.matmul(X, betas)))
27 |     y = (probs > 0.5).astype(int)
28 | 
29 |     return X, y, important_betas
30 | 
31 | 
32 | if __name__ == '__main__':
33 |     n, p, k = 500, 1000, 5
34 | 
35 |     X, y, important_betas = _generate_dummy_classification_data(n=n, k=k)
36 | 
37 |     base_estimator = Pipeline([
38 |         ('scaler', StandardScaler()),
39 |         ('model', LogisticRegression(penalty='l1'))
40 |     ])
41 |     selector = StabilitySelection(base_estimator=base_estimator, lambda_name='model__C',
42 |                                   lambda_grid=np.logspace(-5, -1, 50))
43 |     selector.fit(X, y)
44 | 
45 |     fig, ax = plot_stability_path(selector)
46 |     fig.show()
47 | 
48 |     selected_variables = selector.get_support(indices=True)
49 |     selected_scores = selector.stability_scores_.max(axis=1)
50 | 
51 |     print('Selected variables are:')
52 |     print('-----------------------')
53 | 
54 |     for idx, (variable, score) in enumerate(zip(selected_variables, selected_scores[selected_variables])):
55 |         print('Variable %d: [%d], score %.3f' % (idx + 1, variable, score))
56 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | nose>=1.1.2
2 | scikit-learn>=0.19
3 | matplotlib>=2.0.0
4 | numpy>=1.8.0


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | description-file = README.md
 3 | 
 4 | [aliases]
 5 | # python2.7 has upgraded unittest and it is no longer compatible with some
 6 | # of our tests, so we run all through nose
 7 | test = nosetests
 8 | 
 9 | [nosetests]
10 | # nosetests skips test files with the executable bit by default
11 | # which can silently hide failing tests.
12 | # There are no executable scripts within the scikit-learn project
13 | # so let's turn the --exe flag on to avoid skipping tests by
14 | # mistake.
15 | exe = 1
16 | cover-html = 1
17 | cover-html-dir = coverage
18 | cover-package = sklearn
19 | 
20 | detailed-errors = 1
21 | with-doctest = 1
22 | doctest-tests = 1
23 | doctest-extension = rst
24 | doctest-fixtures = _fixture
25 | ignore-files=^setup\.py$


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import sys
 3 | from setuptools import setup, find_packages
 4 | 
 5 | with open('requirements.txt') as f:
 6 |     INSTALL_REQUIRES = [l.strip() for l in f.readlines() if l]
 7 | 
 8 | 
 9 | try:
10 |     import numpy
11 | except ImportError:
12 |     print('numpy is required during installation')
13 |     sys.exit(1)
14 | 
15 | try:
16 |     import scipy
17 | except ImportError:
18 |     print('scipy is required during installation')
19 |     sys.exit(1)
20 | 
21 | setup(name='stability-selection',
22 |       version='0.0.1',
23 |       description='A scikit-learn compatible implementation of stability selection for feature selection',
24 |       author='Thomas Huijskens',
25 |       packages=find_packages(),
26 |       install_requires=INSTALL_REQUIRES,
27 |       author_email='thomas_huijskens@hotmail.com',
28 |       )
29 | 


--------------------------------------------------------------------------------
/stability_selection/__init__.py:
--------------------------------------------------------------------------------
1 | from .stability_selection import StabilitySelection, plot_stability_path
2 | from .randomized_lasso import RandomizedLasso, RandomizedLogisticRegression
3 | 
4 | __all__ = [
5 |     'StabilitySelection', 'plot_stability_path', 'RandomizedLasso',
6 |     'RandomizedLogisticRegression'
7 | ]
8 | 


--------------------------------------------------------------------------------
/stability_selection/bootstrap.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ===============================
  3 | Bootstrap helper functions
  4 | ===============================
  5 | 
  6 | This module contains helper functions for stability_selection.py
  7 |  that do bootstrap sampling
  8 | """
  9 | 
 10 | import numpy as np
 11 | 
 12 | from sklearn.utils.random import sample_without_replacement
 13 | from sklearn.utils.multiclass import type_of_target
 14 | 
 15 | 
 16 | __all__ = [
 17 |     'bootstrap_without_replacement',
 18 |     'complementary_pairs_bootstrap',
 19 |     'stratified_bootstrap'
 20 | ]
 21 | 
 22 | 
 23 | def bootstrap_without_replacement(y, n_subsamples, random_state=None):
 24 |     """
 25 |     Bootstrap without replacement, irrespective of label. It is a wrapper around
 26 |     sklearn.utils.random.sample_without_replacement.
 27 | 
 28 |     Parameters
 29 |     ----------
 30 |     y : array of size [n_subsamples,]
 31 |         True labels
 32 |     n_subsamples : int
 33 |         Number of subsamples in the bootstrap sample
 34 |     random_state : int, RandomState instance or None, optional, default=None
 35 |         Pseudo random number generator state used for random uniform sampling
 36 |         from lists of possible values instead of scipy.stats distributions.
 37 |         If int, random_state is the seed used by the random number generator;
 38 |         If RandomState instance, random_state is the random number generator;
 39 |         If None, the random number generator is the RandomState instance used
 40 |         by `np.random`.
 41 | 
 42 |     Returns
 43 |     -------
 44 |     out : array of size [n_subsamples,]
 45 |             The sampled subsets of integer. The subset of selected integer might
 46 |             not be randomized, see the method argument.
 47 |     """
 48 |     n_samples = y.shape[0]
 49 |     return sample_without_replacement(n_samples, n_subsamples,
 50 |                                       random_state=random_state)
 51 | 
 52 | 
 53 | def complementary_pairs_bootstrap(y, n_subsamples, random_state=None):
 54 |     """
 55 |     Complementary pairs bootstrap. Two subsamples A and B are generated, such
 56 |     that |A| = n_subsamples, the union of A and B equals {0, ..., n_samples - 1},
 57 |     and the intersection of A and B is the empty set. Samples irrespective of
 58 |     label.
 59 | 
 60 |     Parameters
 61 |     ----------
 62 |     y : array of size [n_subsamples,]
 63 |         True labels
 64 |     n_subsamples : int
 65 |         Number of subsamples in the bootstrap sample
 66 |     random_state : int, RandomState instance or None, optional, default=None
 67 |         Pseudo random number generator state used for random uniform sampling
 68 |         from lists of possible values instead of scipy.stats distributions.
 69 |         If int, random_state is the seed used by the random number generator;
 70 |         If RandomState instance, random_state is the random number generator;
 71 |         If None, the random number generator is the RandomState instance used
 72 |         by `np.random`.
 73 | 
 74 |     Returns
 75 |     -------
 76 |     A : array of size [n_subsamples,]
 77 |             The sampled subsets of integer. The subset of selected integer
 78 |             might not be randomized, see the method argument.
 79 |     B : array of size [n_samples - n_subsamples,]
 80 |             The complement of A.
 81 |     """
 82 |     n_samples = y.shape[0]
 83 |     subsample = bootstrap_without_replacement(y, n_subsamples, random_state)
 84 |     complementary_subsample = np.setdiff1d(np.arange(n_samples), subsample)
 85 | 
 86 |     return subsample, complementary_subsample
 87 | 
 88 | 
 89 | def stratified_bootstrap(y, n_subsamples, random_state=None):
 90 |     """
 91 |     Bootstrap without replacement, performed separately for each group in y.
 92 | 
 93 |     Parameters
 94 |     ----------
 95 |     y : array of size [n_subsamples,]
 96 |         True labels
 97 |     n_subsamples : int
 98 |         Number of subsamples in the bootstrap sample
 99 |     random_state : int, RandomState instance or None, optional, default=None
100 |         Pseudo random number generator state used for random uniform sampling
101 |         from lists of possible values instead of scipy.stats distributions.
102 |         If int, random_state is the seed used by the random number generator;
103 |         If RandomState instance, random_state is the random number generator;
104 |         If None, the random number generator is the RandomState instance used
105 |         by `np.random`.
106 | 
107 |     Returns
108 |     -------
109 |     out : array of size [n_subsamples,]
110 |             The sampled subsets of integer. The subset of selected integer might
111 |             not be randomized, see the method argument.
112 |     """
113 |     type_of_target_y = type_of_target(y)
114 |     allowed_target_types = ('binary', 'multiclass')
115 |     if type_of_target_y not in allowed_target_types:
116 |         raise ValueError(
117 |             'Supported target types are: {}. Got {!r} instead.'.format(
118 |                 allowed_target_types, type_of_target_y))
119 | 
120 |     unique_y, y_counts = np.unique(y, return_counts=True)
121 |     y_counts_relative = y_counts / y_counts.sum()
122 |     y_n_samples = np.int32(np.round(y_counts_relative * n_subsamples))
123 | 
124 |     # the above should return grouped subsamples which approximately sum up
125 |     # to n_subsamples but may not work out exactly due to rounding errors.
126 |     # If this is the case, adjust the count of the largest class
127 |     if y_n_samples.sum() != n_subsamples:
128 |         delta = n_subsamples - y_n_samples.sum()
129 |         majority_class = np.argmax(y_counts)
130 |         y_n_samples[majority_class] += delta
131 | 
132 |     all_selected = np.array([], dtype=np.int32)
133 |     for i, u in enumerate(unique_y):
134 |         indices = np.where(y == u)[0]
135 |         selected_indices = indices[bootstrap_without_replacement(indices,
136 |                                                                  y_n_samples[i],
137 |                                                                  random_state)]
138 |         all_selected = np.concatenate((all_selected, selected_indices))
139 | 
140 |     return all_selected
141 | 


--------------------------------------------------------------------------------
/stability_selection/randomized_lasso.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ===========================
  3 | Randomized LASSO estimators
  4 | ===========================
  5 | 
  6 | This module contains implementations of randomized logistic regression
  7 | and randomized LASSO regression [1]_ .
  8 | 
  9 | References
 10 | ----------
 11 | .. [1] Meinshausen, N. and Buhlmann, P., 2010. Stability selection.
 12 |     Journal of the Royal Statistical Society: Series B
 13 |     (Statistical Methodology), 72(4), pp.417-473.
 14 | """
 15 | import numpy as np
 16 | 
 17 | from scipy import sparse
 18 | from scipy.sparse import issparse
 19 | 
 20 | from sklearn.linear_model import LogisticRegression, Lasso
 21 | from sklearn.linear_model.base import _preprocess_data
 22 | from sklearn.utils import check_X_y, check_random_state
 23 | 
 24 | __all__ = ['RandomizedLogisticRegression', 'RandomizedLasso']
 25 | 
 26 | 
 27 | def _rescale_data(X, weights):
 28 |     if issparse(X):
 29 |         size = weights.shape[0]
 30 |         weight_dia = sparse.dia_matrix((1 - weights, 0), (size, size))
 31 |         X_rescaled = X * weight_dia
 32 |     else:
 33 |         X_rescaled = X * (1 - weights)
 34 | 
 35 |     return X_rescaled
 36 | 
 37 | 
 38 | class RandomizedLogisticRegression(LogisticRegression):
 39 |     """
 40 |     Randomized version of scikit-learns LogisticRegression class.
 41 | 
 42 |     Randomized LASSO is a generalization of the LASSO. The LASSO
 43 |     penalises the absolute value of the coefficients with a penalty
 44 |     term proportional to `C`, but the randomized LASSO changes the
 45 |     penalty to a randomly chosen value in the range `[C, C/weakness]`.
 46 | 
 47 |     Parameters
 48 |     ----------
 49 |     weakness : float
 50 |         Weakness value for randomized LASSO. Must be in (0, 1].
 51 | 
 52 |     See also
 53 |     --------
 54 |     sklearn.linear_model.LogisticRegression : learns logistic regression
 55 |     models using the same algorithm.
 56 |     """
 57 |     def __init__(self, weakness=0.5, tol=1e-4, C=1.0,
 58 |                  fit_intercept=True, intercept_scaling=1, class_weight=None,
 59 |                  random_state=None, solver='liblinear', max_iter=100,
 60 |                  multi_class='ovr', verbose=0, warm_start=False, n_jobs=1):
 61 |         self.weakness = weakness
 62 |         super(RandomizedLogisticRegression, self).__init__(
 63 |             penalty='l1', dual=False, tol=tol, C=C, fit_intercept=fit_intercept,
 64 |             intercept_scaling=intercept_scaling, class_weight=class_weight,
 65 |             random_state=random_state, solver=solver, max_iter=max_iter,
 66 |             multi_class=multi_class, verbose=verbose, warm_start=warm_start,
 67 |             n_jobs=n_jobs)
 68 | 
 69 |     def fit(self, X, y, sample_weight=None):
 70 |         """Fit the model according to the given training data.
 71 | 
 72 |         Parameters
 73 |         ----------
 74 |         X : {array-like, sparse matrix}, shape = [n_samples, n_features]
 75 |             The training input samples.
 76 | 
 77 |         y : array-like, shape = [n_samples]
 78 |             The target values.
 79 | 
 80 |         sample_weight : array-like, shape (n_samples,) optional
 81 |             Array of weights that are assigned to individual samples.
 82 |             If not provided, then each sample is given unit weight.
 83 |         """
 84 |         if not isinstance(self.weakness, float) or not (0.0 < self.weakness <= 1.0):
 85 |             raise ValueError('weakness should be a float in (0, 1], got %s' % self.weakness)
 86 | 
 87 |         X, y = check_X_y(X, y, accept_sparse='csr', dtype=[np.float64, np.float32],
 88 |                          order="C")
 89 | 
 90 |         n_features = X.shape[1]
 91 |         weakness = 1. - self.weakness
 92 |         random_state = check_random_state(self.random_state)
 93 | 
 94 |         weights = weakness * random_state.randint(0, 1 + 1, size=(n_features,))
 95 |         X_rescaled = _rescale_data(X, weights)
 96 |         return super(RandomizedLogisticRegression, self).fit(X_rescaled, y, sample_weight)
 97 | 
 98 | 
 99 | class RandomizedLasso(Lasso):
100 |     """
101 |     Randomized version of scikit-learns Lasso class.
102 | 
103 |     Randomized LASSO is a generalization of the LASSO. The LASSO penalises
104 |     the absolute value of the coefficients with a penalty term proportional
105 |     to `alpha`, but the randomized LASSO changes the penalty to a randomly
106 |     chosen value in the range `[alpha, alpha/weakness]`.
107 | 
108 |     Parameters
109 |     ----------
110 |     weakness : float
111 |         Weakness value for randomized LASSO. Must be in (0, 1].
112 | 
113 |     See also
114 |     --------
115 |     sklearn.linear_model.LogisticRegression : learns logistic regression models
116 |     using the same algorithm.
117 |     """
118 |     def __init__(self, weakness=0.5, alpha=1.0, fit_intercept=True, normalize=False,
119 |                  precompute=False, copy_X=True, max_iter=1000,
120 |                  tol=1e-4, warm_start=False, positive=False,
121 |                  random_state=None, selection='cyclic'):
122 |         self.weakness = weakness
123 |         super(RandomizedLasso, self).__init__(
124 |             alpha=alpha, fit_intercept=fit_intercept,
125 |             normalize=normalize, precompute=precompute, copy_X=copy_X,
126 |             max_iter=max_iter, tol=tol, warm_start=warm_start,
127 |             positive=positive, random_state=random_state,
128 |             selection=selection)
129 | 
130 |     def fit(self, X, y):
131 |         """Fit the model according to the given training data.
132 | 
133 |         Parameters
134 |         ----------
135 |         X : {array-like, sparse matrix}, shape = [n_samples, n_features]
136 |             The training input samples.
137 | 
138 |         y : array-like, shape = [n_samples]
139 |             The target values.
140 |         """
141 |         if not isinstance(self.weakness, float) or not (0.0 < self.weakness <= 1.0):
142 |             raise ValueError('weakness should be a float in (0, 1], got %s' % self.weakness)
143 | 
144 |         X, y = check_X_y(X, y, accept_sparse=True)
145 | 
146 |         n_features = X.shape[1]
147 |         weakness = 1. - self.weakness
148 |         random_state = check_random_state(self.random_state)
149 | 
150 |         weights = weakness * random_state.randint(0, 1 + 1, size=(n_features,))
151 | 
152 |         # TODO: I am afraid this will do double normalization if set to true
153 |         #X, y, _, _ = _preprocess_data(X, y, self.fit_intercept, normalize=self.normalize, copy=False,
154 |         #             sample_weight=None, return_mean=False)
155 | 
156 |         # TODO: Check if this is a problem if it happens before standardization
157 |         X_rescaled = _rescale_data(X, weights)
158 |         return super(RandomizedLasso, self).fit(X_rescaled, y)
159 | 


--------------------------------------------------------------------------------
/stability_selection/stability_selection.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ===============================
  3 | Stability selection transformer
  4 | ===============================
  5 | 
  6 | This module contains a scikit-learn compatible implementation of
  7 | stability selection [1]_ .
  8 | 
  9 | References
 10 | ----------
 11 | .. [1] Meinshausen, N. and Buhlmann, P., 2010. Stability selection.
 12 |     Journal of the Royal Statistical Society: Series B
 13 |     (Statistical Methodology), 72(4), pp.417-473.
 14 | 
 15 | .. [2] Shah, R.D. and Samworth, R.J., 2013. Variable selection with
 16 |    error control: another look at stability selection. Journal
 17 |    of the Royal Statistical Society: Series B (Statistical Methodology),
 18 |     75(1), pp.55-80.
 19 | """
 20 | 
 21 | from warnings import warn
 22 | 
 23 | import matplotlib.pyplot as plt
 24 | import numpy as np
 25 | from sklearn.base import BaseEstimator, TransformerMixin, clone
 26 | from sklearn.externals.joblib import Parallel, delayed
 27 | from sklearn.feature_selection import SelectFromModel
 28 | from sklearn.linear_model import LogisticRegression
 29 | from sklearn.pipeline import Pipeline
 30 | from sklearn.utils import check_array, check_random_state, check_X_y, safe_mask
 31 | from sklearn.utils.validation import check_is_fitted
 32 | 
 33 | from .bootstrap import (bootstrap_without_replacement,
 34 |                         complementary_pairs_bootstrap, stratified_bootstrap)
 35 | 
 36 | __all__ = ['StabilitySelection', 'plot_stability_path']
 37 | 
 38 | BOOTSTRAP_FUNC_MAPPING = {
 39 |     'subsample': bootstrap_without_replacement,
 40 |     'complementary_pairs': complementary_pairs_bootstrap,
 41 |     'stratified': stratified_bootstrap
 42 | }
 43 | 
 44 | 
 45 | def _return_estimator_from_pipeline(pipeline):
 46 |     """Returns the final estimator in a Pipeline, or the estimator
 47 |     if it is not"""
 48 |     if isinstance(pipeline, Pipeline):
 49 |         return pipeline._final_estimator
 50 |     else:
 51 |         return pipeline
 52 | 
 53 | 
 54 | def _bootstrap_generator(n_bootstrap_iterations, bootstrap_func, y,
 55 |                          n_subsamples, random_state=None):
 56 |     for _ in range(n_bootstrap_iterations):
 57 |         subsample = bootstrap_func(y, n_subsamples, random_state)
 58 |         if isinstance(subsample, tuple):
 59 |             for item in subsample:
 60 |                 yield item
 61 |         else:
 62 |             yield subsample
 63 | 
 64 | 
 65 | def _fit_bootstrap_sample(base_estimator, X, y, lambda_name, lambda_value,
 66 |                           threshold=None):
 67 |     """
 68 |     Fits base_estimator on a bootstrap sample of the original data,
 69 |     and returns a mas of the variables that are selected by the fitted model.
 70 | 
 71 |     Parameters
 72 |     ----------
 73 |     base_estimator : Estimator
 74 |         Estimator to be fitted on the data
 75 | 
 76 |     X : {array-like, sparse matrix}, shape = [n_samples, n_features]
 77 |         The training input samples.
 78 | 
 79 |     y : array-like, shape = [n_samples]
 80 |         The target values.
 81 | 
 82 |     lambda_name : str
 83 |         Name of the penalization parameter of base_estimator
 84 | 
 85 |     lambda_value : float
 86 |         Value of the penalization parameter
 87 | 
 88 |     threshold : string, float, optional default None
 89 |         The threshold value to use for feature selection. Features whose
 90 |         importance is greater or equal are kept while the others are
 91 |         discarded. If "median" (resp. "mean"), then the ``threshold`` value is
 92 |         the median (resp. the mean) of the feature importances. A scaling
 93 |         factor (e.g., "1.25*mean") may also be used. If None and if the
 94 |         estimator has a parameter penalty set to l1, either explicitly
 95 |         or implicitly (e.g, Lasso), the threshold used is 1e-5.
 96 |         Otherwise, "mean" is used by default.
 97 | 
 98 |     Returns
 99 |     -------
100 |     selected_variables : array-like, shape = [n_features]
101 |         Boolean mask of selected variables.
102 |     """
103 | 
104 |     base_estimator.set_params(**{lambda_name: lambda_value})
105 |     base_estimator.fit(X, y)
106 | 
107 |     # TODO: Reconsider if we really want to use SelectFromModel here or not
108 |     selector_model = _return_estimator_from_pipeline(base_estimator)
109 |     variable_selector = SelectFromModel(estimator=selector_model,
110 |                                         threshold=threshold,
111 |                                         prefit=True)
112 |     return variable_selector.get_support()
113 | 
114 | 
115 | def plot_stability_path(stability_selection, threshold_highlight=None,
116 |                         **kwargs):
117 |     """Plots stability path.
118 | 
119 |     Parameters
120 |     ----------
121 |     stability_selection : StabilitySelection
122 |         Fitted instance of StabilitySelection.
123 | 
124 |     threshold_highlight : float
125 |         Threshold defining the cutoff for the stability scores for the
126 |         variables that need to be highlighted.
127 | 
128 |     kwargs : dict
129 |         Arguments passed to matplotlib plot function.
130 |     """
131 |     check_is_fitted(stability_selection, 'stability_scores_')
132 | 
133 |     threshold = stability_selection.threshold if threshold_highlight is None else threshold_highlight
134 |     paths_to_highlight = stability_selection.get_support(threshold=threshold)
135 | 
136 |     x_grid = stability_selection.lambda_grid / np.max(stability_selection.lambda_grid)
137 | 
138 |     fig, ax = plt.subplots(1, 1, **kwargs)
139 |     if not paths_to_highlight.all():
140 |         ax.plot(x_grid, stability_selection.stability_scores_[~paths_to_highlight].T,
141 |                 'k:', linewidth=0.5)
142 | 
143 |     if paths_to_highlight.any():
144 |         ax.plot(x_grid, stability_selection.stability_scores_[paths_to_highlight].T,
145 |                 'r-', linewidth=0.5)
146 | 
147 |     if threshold is not None:
148 |         ax.plot(x_grid, threshold * np.ones_like(stability_selection.lambda_grid),
149 |                 'b--', linewidth=0.5)
150 | 
151 |     ax.set_ylabel('Stability score')
152 |     ax.set_xlabel('Lambda / max(Lambda)')
153 | 
154 |     fig.tight_layout()
155 | 
156 |     return fig, ax
157 | 
158 | 
159 | class StabilitySelection(BaseEstimator, TransformerMixin):
160 |     """Stability selection [1]_ fits the estimator `base_estimator` on
161 |     bootstrap samples of the original data set, for different values of
162 |     the regularization parameter for `base_estimator`. Variables that
163 |     reliably get selected by the model in these bootstrap samples are
164 |     considered to be stable variables.
165 | 
166 |     Parameters
167 |     ----------
168 |     base_estimator : object.
169 |         The base estimator used for stability selection. The estimator
170 |         must have either a ``feature_importances_`` or ``coef_``
171 |         attribute after fitting.
172 | 
173 |     lambda_name : str.
174 |         The name of the penalization parameter for the estimator
175 |         `base_estimator`.
176 | 
177 |     lambda_grid : array-like.
178 |         Grid of values of the penalization parameter to iterate over.
179 | 
180 |     n_bootstrap_iterations : integer.
181 |         Number of bootstrap samples to create.
182 | 
183 |     sample_fraction : float, optional
184 |         The fraction of samples to be used in each bootstrap sample.
185 |         Should be between 0 and 1. If 1, all samples are used.
186 | 
187 |     threshold : float.
188 |         Threshold defining the minimum cutoff value for the stability scores.
189 | 
190 |     bootstrap_func : str or callable fun (default=bootstrap_without_replacement)
191 |         The function used to subsample the data. This parameter can be:
192 |             - A string, which must be one of
193 |                 - 'subsample': For subsampling without replacement.
194 |                 - 'complementary_pairs': For complementary pairs subsampling [2]_ .
195 |                 - 'stratified': For stratified bootstrapping in imbalanced
196 |                    classification.
197 |             - A function that takes y, and a random state
198 |               as inputs and returns a list of sample indices in the range
199 |               (0, len(y)-1). By default, indices are uniformly subsampled.
200 | 
201 |     bootstrap_threshold : string, float, optional default None
202 |         The threshold value to use for feature selection. Features whose
203 |         importance is greater or equal are kept while the others are
204 |         discarded. If "median" (resp. "mean"), then the ``threshold`` value is
205 |         the median (resp. the mean) of the feature importances. A scaling
206 |         factor (e.g., "1.25*mean") may also be used. If None and if the
207 |         estimator has a parameter penalty set to l1, either explicitly
208 |         or implicitly (e.g, Lasso), the threshold used is 1e-5.
209 |         Otherwise, "mean" is used by default.
210 | 
211 |     verbose : integer.
212 |         Controls the verbosity: the higher, the more messages.
213 | 
214 |     n_jobs : int, default=1
215 |         Number of jobs to run in parallel.
216 | 
217 |     pre_dispatch : int, or string, optional
218 |         Controls the number of jobs that get dispatched during parallel
219 |         execution. Reducing this number can be useful to avoid an
220 |         explosion of memory consumption when more jobs get dispatched
221 |         than CPUs can process. This parameter can be:
222 |             - None, in which case all the jobs are immediately
223 |               created and spawned. Use this for lightweight and
224 |               fast-running jobs, to avoid delays due to on-demand
225 |               spawning of the jobs
226 |             - An int, giving the exact number of total jobs that are
227 |               spawned
228 |             - A string, giving an expression as a function of n_jobs,
229 |               as in '2*n_jobs'
230 | 
231 |     random_state : int, RandomState instance or None, optional, default=None
232 |         Pseudo random number generator state used for random uniform sampling
233 |         from lists of possible values instead of scipy.stats distributions.
234 |         If int, random_state is the seed used by the random number generator;
235 |         If RandomState instance, random_state is the random number generator;
236 |         If None, the random number generator is the RandomState instance used
237 |         by `np.random`.
238 | 
239 |     Attributes
240 |     ----------
241 |     stability_scores_ : array, shape = [n_features, n_alphas]
242 |         Array of stability scores for each feature for each value of the
243 |         penalization parameter.
244 | 
245 |     References
246 |     ----------
247 | 
248 |     .. [1] Meinshausen, N. and Buhlmann, P., 2010. Stability selection.
249 |            Journal of the Royal Statistical Society: Series B
250 |            (Statistical Methodology), 72(4), pp.417-473.
251 |     .. [2] Shah, R.D. and Samworth, R.J., 2013. Variable selection with
252 |            error control: another look at stability selection. Journal
253 |            of the Royal Statistical Society: Series B (Statistical Methodology),
254 |             75(1), pp.55-80.
255 |     """
256 |     def __init__(self, base_estimator=LogisticRegression(penalty='l1'), lambda_name='C',
257 |                  lambda_grid=np.logspace(-5, -2, 25), n_bootstrap_iterations=100,
258 |                  sample_fraction=0.5, threshold=0.6, bootstrap_func=bootstrap_without_replacement,
259 |                  bootstrap_threshold=None, verbose=0, n_jobs=1, pre_dispatch='2*n_jobs',
260 |                  random_state=None):
261 |         self.base_estimator = base_estimator
262 |         self.lambda_name = lambda_name
263 |         self.lambda_grid = lambda_grid
264 |         self.n_bootstrap_iterations = n_bootstrap_iterations
265 |         self.sample_fraction = sample_fraction
266 |         self.threshold = threshold
267 |         self.bootstrap_func = bootstrap_func
268 |         self.bootstrap_threshold = bootstrap_threshold
269 |         self.verbose = verbose
270 |         self.n_jobs = n_jobs
271 |         self.pre_dispatch = pre_dispatch
272 |         self.random_state = random_state
273 | 
274 |     def _validate_input(self):
275 |         if not isinstance(self.n_bootstrap_iterations, int) or self.n_bootstrap_iterations <= 0:
276 |             raise ValueError('n_bootstrap_iterations should be a positive integer, got %s' %
277 |                              self.n_bootstrap_iterations)
278 | 
279 |         if not isinstance(self.sample_fraction, float) or not (0.0 < self.sample_fraction <= 1.0):
280 |             raise ValueError('sample_fraction should be a float in (0, 1], got %s' % self.sample_fraction)
281 | 
282 |         if not isinstance(self.threshold, float) or not (0.0 < self.threshold <= 1.0):
283 |             raise ValueError('threshold should be a float in (0, 1], got %s' % self.threshold)
284 | 
285 |         if self.lambda_name not in self.base_estimator.get_params().keys():
286 |             raise ValueError('lambda_name is set to %s, but base_estimator %s '
287 |                              'does not have a parameter '
288 |                              'with that name' % (self.lambda_name,
289 |                                                  self.base_estimator.__class__.__name__))
290 | 
291 |         if isinstance(self.bootstrap_func, str):
292 |             if self.bootstrap_func not in BOOTSTRAP_FUNC_MAPPING.keys():
293 |                 raise ValueError('bootstrap_func is set to %s, but must be one of '
294 |                                  '%s or a callable' %
295 |                                  (self.bootstrap_func, BOOTSTRAP_FUNC_MAPPING.keys()))
296 | 
297 |             self.bootstrap_func = BOOTSTRAP_FUNC_MAPPING[self.bootstrap_func]
298 |         elif not callable(self.bootstrap_func):
299 |             raise ValueError('bootstrap_func must be one of %s or a callable' %
300 |                              BOOTSTRAP_FUNC_MAPPING.keys())
301 | 
302 |     def fit(self, X, y):
303 |         """Fit the stability selection model on the given data.
304 | 
305 |         Parameters
306 |         ----------
307 |         X : {array-like, sparse matrix}, shape = [n_samples, n_features]
308 |             The training input samples.
309 | 
310 |         y : array-like, shape = [n_samples]
311 |             The target values.
312 |         """
313 | 
314 |         self._validate_input()
315 | 
316 |         X, y = check_X_y(X, y, accept_sparse='csr')
317 | 
318 |         n_samples, n_variables = X.shape
319 |         n_subsamples = np.floor(self.sample_fraction * n_samples).astype(int)
320 |         n_lambdas = self.lambda_grid.shape[0]
321 | 
322 |         base_estimator = clone(self.base_estimator)
323 |         random_state = check_random_state(self.random_state)
324 |         stability_scores = np.zeros((n_variables, n_lambdas))
325 | 
326 |         for idx, lambda_value in enumerate(self.lambda_grid):
327 |             if self.verbose > 0:
328 |                 print("Fitting estimator for lambda = %.5f (%d / %d) on %d bootstrap samples" %
329 |                       (lambda_value, idx + 1, n_lambdas, self.n_bootstrap_iterations))
330 | 
331 |             bootstrap_samples = _bootstrap_generator(self.n_bootstrap_iterations,
332 |                                                      self.bootstrap_func, y,
333 |                                                      n_subsamples, random_state=random_state)
334 | 
335 |             selected_variables = Parallel(
336 |                 n_jobs=self.n_jobs, verbose=self.verbose,
337 |                 pre_dispatch=self.pre_dispatch
338 |             )(delayed(_fit_bootstrap_sample)(clone(base_estimator),
339 |                                              X=X[safe_mask(X, subsample), :],
340 |                                              y=y[subsample],
341 |                                              lambda_name=self.lambda_name,
342 |                                              lambda_value=lambda_value,
343 |                                              threshold=self.bootstrap_threshold)
344 |               for subsample in bootstrap_samples)
345 | 
346 |             stability_scores[:, idx] = np.vstack(selected_variables).mean(axis=0)
347 | 
348 |         self.stability_scores_ = stability_scores
349 |         return self
350 | 
351 |     def get_support(self, indices=False, threshold=None):
352 |         """Get a mask, or integer index, of the features selected
353 | 
354 |         Parameters
355 |         ----------
356 |         indices : boolean (default False)
357 |             If True, the return value will be an array of integers,
358 |             rather than a boolean mask.
359 | 
360 |         threshold: float.
361 |             Threshold defining the minimum cutoff value for the
362 |             stability scores.
363 | 
364 |         Returns
365 |         -------
366 |         support : array
367 |             An index that selects the retained features from a feature vector.
368 |             If `indices` is False, this is a boolean array of shape
369 |             [# input features], in which an element is True iff its
370 |             corresponding feature is selected for retention. If `indices` is
371 |             True, this is an integer array of shape [# output features] whose
372 |             values are indices into the input feature vector.
373 |         """
374 | 
375 |         if threshold is not None and (not isinstance(threshold, float)
376 |                                       or not (0.0 < threshold <= 1.0)):
377 |             raise ValueError('threshold should be a float in (0, 1], '
378 |                              'got %s' % self.threshold)
379 | 
380 |         cutoff = self.threshold if threshold is None else threshold
381 |         mask = (self.stability_scores_.max(axis=1) > cutoff)
382 | 
383 |         return mask if not indices else np.where(mask)[0]
384 | 
385 |     def transform(self, X, threshold=None):
386 |         """Reduce X to the selected features.
387 | 
388 |         Parameters
389 |         ----------
390 |         X : array of shape [n_samples, n_features]
391 |             The input samples.
392 | 
393 |         threshold: float.
394 |             Threshold defining the minimum cutoff value for the
395 |             stability scores.
396 | 
397 |         Returns
398 |         -------
399 |         X_r : array of shape [n_samples, n_selected_features]
400 |             The input samples with only the selected features.
401 |         """
402 |         X = check_array(X, accept_sparse='csr')
403 |         mask = self.get_support(threshold=threshold)
404 | 
405 |         check_is_fitted(self, 'stability_scores_')
406 | 
407 |         if len(mask) != X.shape[1]:
408 |             raise ValueError("X has a different shape than during fitting.")
409 | 
410 |         if not mask.any():
411 |             warn("No features were selected: either the data is"
412 |                  " too noisy or the selection test too strict.",
413 |                  UserWarning)
414 |             return np.empty(0).reshape((X.shape[0], 0))
415 | 
416 |         return X[:, safe_mask(X, mask)]
417 | 


--------------------------------------------------------------------------------
/stability_selection/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/stability-selection/e6e34da3601cc8215cd0b08d5c5f3a9dd3ccfe01/stability_selection/tests/__init__.py


--------------------------------------------------------------------------------
/stability_selection/tests/test_common.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from nose.tools import raises
 4 | from numpy.testing import assert_array_equal
 5 | from sklearn.utils.estimator_checks import check_estimator
 6 | from stability_selection import StabilitySelection
 7 | 
 8 | 
 9 | def test_transformer():
10 |     # With defaults this can fail because in the low sample size case
11 |     # some of the bootstrap samples can have zero cases of the positive class
12 |     return check_estimator(StabilitySelection(n_bootstrap_iterations=10, sample_fraction=1.0))
13 | 
14 | 
15 | @raises(ValueError)
16 | def test_check_string_threshold():
17 |     StabilitySelection(threshold='wrong_value')._validate_input()
18 | 
19 | 
20 | @raises(ValueError)
21 | def test_check_threshold_too_large():
22 |     StabilitySelection(threshold=1.5)._validate_input()
23 | 
24 | 
25 | @raises(ValueError)
26 | def test_check_threshold_too_small():
27 |     StabilitySelection(threshold=0.0)._validate_input()
28 | 
29 | 
30 | @raises(ValueError)
31 | def test_check_threshold_too_small():
32 |     StabilitySelection().get_support(threshold='wrong_value')
33 | 
34 | 
35 | @raises(ValueError)
36 | def test_check_arguments():
37 |     StabilitySelection(threshold='wrong_value')._validate_input()
38 | 
39 | 
40 | @raises(ValueError)
41 | def test_check_wrong_lambda_name():
42 |     StabilitySelection(lambda_name='alpha')._validate_input()
43 | 
44 | 
45 | @raises(ValueError)
46 | def test_check_wrong_lambda_name():
47 |     StabilitySelection(n_bootstrap_iterations=-1)._validate_input()
48 | 
49 | 
50 | def test_automatic_lambda_grid():
51 |     selector = StabilitySelection()
52 |     selector._validate_input()
53 |     assert_array_equal(np.logspace(-5, -2, 25), selector.lambda_grid)
54 | 
55 | 
56 | @raises(ValueError)
57 | def test_bootstrap_func():
58 |     StabilitySelection(bootstrap_func='nonexistent')._validate_input()
59 | 
60 | 
61 | @raises(ValueError)
62 | def test_callable_bootstrap_func():
63 |     StabilitySelection(bootstrap_func=0.5)._validate_input()
64 | 
65 | 
66 | @raises(ValueError)
67 | def test_sample_fraction():
68 |     StabilitySelection(sample_fraction=0.0)._validate_input()
69 | 
70 | 
71 | @raises(ValueError)
72 | def test_lambda_name():
73 |     StabilitySelection(lambda_name='n_estimators')._validate_input()
74 | 


--------------------------------------------------------------------------------
/stability_selection/tests/test_randomized_lasso.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from numpy.testing import assert_almost_equal
 3 | 
 4 | from nose.tools import raises
 5 | from sklearn.utils.estimator_checks import check_estimator
 6 | from sklearn.utils import check_random_state
 7 | from scipy.sparse import csr_matrix
 8 | 
 9 | from stability_selection import StabilitySelection, RandomizedLasso, \
10 |     RandomizedLogisticRegression
11 | 
12 | 
13 | def generate_experiment_data(n=200, p=200, rho=0.6, random_state=3245):
14 |     rng = check_random_state(random_state)
15 | 
16 |     sigma = np.eye(p)
17 |     sigma[0, 2] = rho
18 |     sigma[2, 0] = rho
19 |     sigma[1, 2] = rho
20 |     sigma[2, 1] = rho
21 | 
22 |     X = rng.multivariate_normal(mean=np.zeros(p), cov=sigma, size=(n,))
23 |     beta = np.zeros(p)
24 |     beta[:2] = 1.0
25 |     epsilon = rng.normal(0.0, 0.25, size=(n,))
26 | 
27 |     y = np.matmul(X, beta) + epsilon
28 | 
29 |     return X, y
30 | 
31 | 
32 | def test_estimator():
33 |     check_estimator(RandomizedLasso)
34 |     check_estimator(RandomizedLogisticRegression)
35 | 
36 | 
37 | @raises(ValueError)
38 | def test_logistic_weakness():
39 |     n, p = 200, 200
40 |     rho = 0.6
41 | 
42 |     X, y = generate_experiment_data(n, p, rho)
43 |     RandomizedLogisticRegression(weakness=0.0).fit(X, y)
44 | 
45 | 
46 | @raises(ValueError)
47 | def test_logistic_weakness():
48 |     n, p = 200, 200
49 |     rho = 0.6
50 | 
51 |     X, y = generate_experiment_data(n, p, rho)
52 |     RandomizedLasso(weakness=0.0).fit(X, y)
53 | 
54 | 
55 | def test_randomized_lasso():
56 |     n, p = 200, 200
57 |     rho = 0.6
58 |     weakness = 0.2
59 | 
60 |     X, y = generate_experiment_data(n, p, rho)
61 |     lambda_grid = np.linspace(0.01, 0.5, num=100)
62 | 
63 |     estimator = RandomizedLasso(weakness=weakness)
64 |     selector = StabilitySelection(base_estimator=estimator, lambda_name='alpha',
65 |                                   lambda_grid=lambda_grid, threshold=0.9, verbose=1)
66 |     selector.fit(X, y)
67 | 
68 |     chosen_betas = selector.get_support(indices=True)
69 | 
70 |     assert_almost_equal(np.array([0, 1]), chosen_betas)
71 | 
72 | 
73 | def test_issparse():
74 |     n, p = 200, 200
75 |     rho = 0.6
76 |     weakness = 0.2
77 | 
78 |     X, y = generate_experiment_data(n, p, rho)
79 |     lambda_grid = np.linspace(0.01, 0.5, num=100)
80 | 
81 |     estimator = RandomizedLasso(weakness=weakness)
82 |     selector = StabilitySelection(base_estimator=estimator, lambda_name='alpha',
83 |                                   lambda_grid=lambda_grid, threshold=0.9, verbose=1)
84 |     selector.fit(csr_matrix(X), y)
85 | 


--------------------------------------------------------------------------------
/stability_selection/tests/test_stability_selection.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | 
  3 | import numpy as np
  4 | from numpy.testing import assert_almost_equal
  5 | from nose.tools import raises
  6 | 
  7 | from sklearn.linear_model import Lasso
  8 | from sklearn.preprocessing import StandardScaler
  9 | from sklearn.pipeline import Pipeline
 10 | from sklearn.utils import check_random_state
 11 | from stability_selection import StabilitySelection, plot_stability_path
 12 | 
 13 | 
 14 | def _generate_dummy_regression_data(p=1000, n=1000, k=5,
 15 |                                     random_state=123321):
 16 |     rng = check_random_state(random_state)
 17 | 
 18 |     X = rng.normal(loc=0.0, scale=1.0, size=(n, p))
 19 |     betas = np.zeros(p)
 20 |     important_betas = np.sort(rng.choice(a=np.arange(p), size=k))
 21 |     betas[important_betas] = rng.uniform(size=k)
 22 | 
 23 |     y = np.matmul(X, betas)
 24 | 
 25 |     return X, y, important_betas
 26 | 
 27 | 
 28 | def _generate_dummy_classification_data(p=1000, n=1000, k=5,
 29 |                                         random_state=123321):
 30 | 
 31 |     rng = check_random_state(random_state)
 32 | 
 33 |     X = rng.normal(loc=0.0, scale=1.0, size=(n, p))
 34 |     betas = np.zeros(p)
 35 |     important_betas = np.sort(rng.choice(a=np.arange(p), size=k))
 36 |     betas[important_betas] = rng.uniform(size=k)
 37 | 
 38 |     probs = 1 / (1 + np.exp(-1 * np.matmul(X, betas)))
 39 |     y = (probs > 0.5).astype(int)
 40 | 
 41 |     return X, y, important_betas
 42 | 
 43 | 
 44 | def test_stability_selection_classification():
 45 |     n, p, k = 1000, 1000, 5
 46 | 
 47 |     X, y, important_betas = _generate_dummy_classification_data(n=n, k=k)
 48 |     selector = StabilitySelection(lambda_grid=np.logspace(-5, -1, 25), verbose=1)
 49 |     selector.fit(X, y)
 50 | 
 51 |     chosen_betas = selector.get_support(indices=True)
 52 |     X_r = selector.transform(X)
 53 | 
 54 |     assert_almost_equal(important_betas, chosen_betas)
 55 |     assert(X_r.shape == (n, k))
 56 |     assert(selector.stability_scores_.shape == (p, selector.lambda_grid.shape[0]))
 57 | 
 58 | 
 59 | def test_stability_selection_regression():
 60 |     n, p, k = 500, 1000, 5
 61 | 
 62 |     X, y, important_betas = _generate_dummy_regression_data(n=n, k=k)
 63 | 
 64 |     base_estimator = Pipeline([
 65 |         ('scaler', StandardScaler()),
 66 |         ('model', Lasso())
 67 |     ])
 68 | 
 69 |     lambdas_grid = np.logspace(-1, 1, num=10)
 70 | 
 71 |     selector = StabilitySelection(base_estimator=base_estimator,
 72 |                                   lambda_name='model__alpha',
 73 |                                   lambda_grid=lambdas_grid)
 74 |     selector.fit(X, y)
 75 | 
 76 |     chosen_betas = selector.get_support(indices=True)
 77 | 
 78 |     assert_almost_equal(important_betas, chosen_betas)
 79 | 
 80 | 
 81 | def test_with_complementary_pairs_bootstrap():
 82 |     n, p, k = 500, 1000, 5
 83 | 
 84 |     X, y, important_betas = _generate_dummy_regression_data(n=n, k=k)
 85 | 
 86 |     base_estimator = Pipeline([
 87 |         ('scaler', StandardScaler()),
 88 |         ('model', Lasso())
 89 |     ])
 90 | 
 91 |     lambdas_grid = np.logspace(-1, 1, num=10)
 92 | 
 93 |     selector = StabilitySelection(base_estimator=base_estimator,
 94 |                                   lambda_name='model__alpha',
 95 |                                   lambda_grid=lambdas_grid,
 96 |                                   bootstrap_func='complementary_pairs')
 97 |     selector.fit(X, y)
 98 | 
 99 |     chosen_betas = selector.get_support(indices=True)
100 | 
101 |     assert_almost_equal(important_betas, chosen_betas)
102 | 
103 | 
104 | def test_with_stratified_bootstrap():
105 |     n, p, k = 1000, 1000, 5
106 | 
107 |     X, y, important_betas = _generate_dummy_classification_data(n=n, k=k)
108 |     selector = StabilitySelection(lambda_grid=np.logspace(-5, -1, 25), verbose=1,
109 |                                   bootstrap_func='stratified')
110 |     selector.fit(X, y)
111 | 
112 |     chosen_betas = selector.get_support(indices=True)
113 |     assert_almost_equal(important_betas, chosen_betas)
114 | 
115 | 
116 | @raises(ValueError)
117 | def test_different_shape():
118 |     n, p, k = 100, 200, 5
119 | 
120 |     X, y, important_betas = _generate_dummy_regression_data(n=n, k=k)
121 | 
122 |     base_estimator = Pipeline([
123 |         ('scaler', StandardScaler()),
124 |         ('model', Lasso())
125 |     ])
126 | 
127 |     lambdas_grid = np.logspace(-1, 1, num=10)
128 | 
129 |     selector = StabilitySelection(base_estimator=base_estimator,
130 |                                   lambda_name='model__alpha',
131 |                                   lambda_grid=lambdas_grid)
132 |     selector.fit(X, y)
133 |     selector.transform(X[:, :-2])
134 | 
135 | 
136 | def test_no_features():
137 |     n, p, k = 100, 200, 0
138 | 
139 |     X, y, important_betas = _generate_dummy_regression_data(n=n, k=k)
140 | 
141 |     base_estimator = Pipeline([
142 |         ('scaler', StandardScaler()),
143 |         ('model', Lasso())
144 |     ])
145 | 
146 |     lambdas_grid = np.logspace(-1, 1, num=10)
147 | 
148 |     selector = StabilitySelection(base_estimator=base_estimator,
149 |                                   lambda_name='model__alpha',
150 |                                   lambda_grid=lambdas_grid)
151 |     selector.fit(X, y)
152 | 
153 |     assert_almost_equal(selector.transform(X),
154 |                         np.empty(0).reshape((X.shape[0], 0)))
155 | 
156 | 
157 | def test_stability_plot():
158 |     n, p, k = 500, 200, 5
159 | 
160 |     X, y, important_betas = _generate_dummy_regression_data(n=n, k=k)
161 | 
162 |     base_estimator = Pipeline([
163 |         ('scaler', StandardScaler()),
164 |         ('model', Lasso())
165 |     ])
166 | 
167 |     lambdas_grid = np.logspace(-1, 1, num=10)
168 | 
169 |     selector = StabilitySelection(base_estimator=base_estimator,
170 |                                   lambda_name='model__alpha',
171 |                                   lambda_grid=lambdas_grid)
172 |     selector.fit(X, y)
173 | 
174 |     plot_stability_path(selector, threshold_highlight=0.5)
175 | 


--------------------------------------------------------------------------------
/stability_selection/tests/test_stratified_bootstrap.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from nose.tools import raises
 3 | 
 4 | from stability_selection.bootstrap import stratified_bootstrap
 5 | 
 6 | 
 7 | @raises(ValueError)
 8 | def test_check_not_classification():
 9 |     y = np.linspace(0, 1, 21)
10 |     stratified_bootstrap(y, 10, random_state=0)
11 | 
12 | 
13 | def test_stratified_bootstrap():
14 |     zero_to_one_ratio = 3
15 |     n_ones = 10
16 | 
17 |     y = np.array(n_ones * ([0] * zero_to_one_ratio + [1]))
18 |     for n_subsamples in [4, 8, 12, 16, 20]:
19 |         sample_idx = stratified_bootstrap(y, n_subsamples, random_state=0)
20 |         samples = y[sample_idx]
21 | 
22 |         assert(len(samples) == n_subsamples)
23 | 
24 |         n_ones = (samples == 1).sum()
25 |         n_zeros = (samples == 0).sum()
26 |         assert(n_zeros == n_ones * zero_to_one_ratio)
27 | 
28 | 
29 | def test_random_state():
30 |     zero_to_one_ratio = 3
31 |     n_ones = 10
32 | 
33 |     y = np.array(n_ones * ([0] * zero_to_one_ratio + [1]))
34 | 
35 |     samples0 = np.sort(stratified_bootstrap(y, 12, random_state=0))
36 |     samples0b = np.sort(stratified_bootstrap(y, 12, random_state=0))
37 |     samples1 = np.sort(stratified_bootstrap(y, 12, random_state=1))
38 | 
39 |     assert(np.array_equal(samples0, samples0b))
40 |     assert(not np.array_equal(samples0, samples1))
41 | 


--------------------------------------------------------------------------------