├── .coveragerc ├── .gitignore ├── .landscape.yaml ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── README.md ├── ci ├── .travis_install.sh └── .travis_test.sh ├── data ├── 3Class_Datasets_Loc_2_01.txt ├── GAMETES_Epistasis_2-Way_20atts_0.4H_EDM-1_1.tsv.gz ├── GAMETES_Epistasis_2-Way_continuous_endpoint_a_20s_1600her_0.4__maf_0.2_EDM-2_01.tsv.gz ├── GAMETES_Epistasis_2-Way_missing_values_0.1_a_20s_1600her_0.4__maf_0.2_EDM-2_01.tsv.gz └── GAMETES_Epistasis_2-Way_mixed_attribute_a_20s_1600her_0.4__maf_0.2_EDM-2_01.tsv.gz ├── docs ├── 404.html ├── citing │ └── index.html ├── contributing │ └── index.html ├── css │ ├── highlight.css │ ├── theme.css │ └── theme_extra.css ├── fonts │ ├── fontawesome-webfont.eot │ ├── fontawesome-webfont.svg │ ├── fontawesome-webfont.ttf │ └── fontawesome-webfont.woff ├── img │ └── favicon.ico ├── index.html ├── installing │ └── index.html ├── js │ ├── highlight.pack.js │ ├── jquery-2.1.1.min.js │ ├── modernizr-2.8.3.min.js │ └── theme.js ├── releases │ └── index.html ├── search.html ├── search │ ├── lunr.min.js │ ├── mustache.min.js │ ├── require.js │ ├── search-results-template.mustache │ ├── search.js │ ├── search_index.json │ └── text.js ├── sitemap.xml ├── support │ └── index.html └── using │ └── index.html ├── docs_sources ├── citing.md ├── contributing.md ├── index.md ├── installing.md ├── releases.md ├── support.md └── using.md ├── mkdocs.yml ├── setup.py ├── skrebate ├── __init__.py ├── _version.py ├── iter.py ├── multisurf.py ├── multisurfstar.py ├── relieff.py ├── scoring_utils.py ├── surf.py ├── surfstar.py ├── turf.py └── vls.py └── tests.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | source = skrebate 4 | include = */skrebate/* 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #Custom test files 2 | run_test.py 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | env/ 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *,cover 49 | .hypothesis/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | 58 | # Sphinx documentation 59 | docs/_build/ 60 | 61 | # PyBuilder 62 | target/ 63 | 64 | #Ipython Notebook 65 | .ipynb_checkpoints 66 | 67 | testing.ipynb 68 | 69 | *.lprof 70 | 71 | *.prof 72 | /demo_scikitrebate.ipynb 73 | 74 | *.DS_Store 75 | .idea/ 76 | 77 | analysis_pipeline/skrebatewip 78 | -------------------------------------------------------------------------------- /.landscape.yaml: -------------------------------------------------------------------------------- 1 | doc-warnings: yes 2 | 3 | ignore-patterns: 4 | - __init__.py 5 | 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | virtualenv: 3 | system_site_packages: true 4 | env: 5 | matrix: 6 | # let's start simple: 7 | - PYTHON_VERSION="2.7" LATEST="true" 8 | - PYTHON_VERSION="3.6" COVERAGE="true" LATEST="true" 9 | - PYTHON_VERSION="3.6" LATEST="true" 10 | install: source ./ci/.travis_install.sh 11 | script: bash ./ci/.travis_test.sh 12 | after_success: 13 | # Ignore coveralls failures as the coveralls server is not very reliable 14 | # but we don't want travis to report a failure in the github UI just 15 | # because the coverage report failed to be published. 16 | - if [[ "$COVERAGE" == "true" ]]; then coveralls || echo "failed"; fi 17 | cache: apt 18 | sudo: false 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Randal S. Olson and Ryan J. Urbanowicz 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.md 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Master status: [![Master Build Status](https://travis-ci.org/EpistasisLab/scikit-rebate.svg?branch=master)](https://travis-ci.org/EpistasisLab/scikit-rebate) 2 | [![Master Code Health](https://landscape.io/github/EpistasisLab/scikit-rebate/master/landscape.svg?style=flat)](https://landscape.io/github/EpistasisLab/scikit-rebate/master) 3 | [![Master Coverage Status](https://coveralls.io/repos/github/EpistasisLab/scikit-rebate/badge.svg?branch=master&service=github)](https://coveralls.io/github/EpistasisLab/scikit-rebate?branch=master) 4 | 5 | Development status: [![Development Build Status](https://travis-ci.org/EpistasisLab/scikit-rebate.svg?branch=development)](https://travis-ci.org/EpistasisLab/scikit-rebate) 6 | [![Development Code Health](https://landscape.io/github/EpistasisLab/scikit-rebate/development/landscape.svg?style=flat)](https://landscape.io/github/EpistasisLab/scikit-rebate/development) 7 | [![Development Coverage Status](https://coveralls.io/repos/github/EpistasisLab/scikit-rebate/badge.svg?branch=development&service=github)](https://coveralls.io/github/EpistasisLab/scikit-rebate?branch=development) 8 | 9 | Package information: ![Python 2.7](https://img.shields.io/badge/python-2.7-blue.svg) 10 | ![Python 3.5](https://img.shields.io/badge/python-3.6-blue.svg) 11 | ![License](https://img.shields.io/badge/license-MIT%20License-blue.svg) 12 | [![PyPI version](https://badge.fury.io/py/skrebate.svg)](https://badge.fury.io/py/skrebate) 13 | 14 | # scikit-rebate 15 | This package includes a scikit-learn-compatible Python implementation of ReBATE, a suite of [Relief-based feature selection algorithms](https://en.wikipedia.org/wiki/Relief_(feature_selection)) for Machine Learning. These Relief-Based algorithms (RBAs) are designed for feature weighting/selection as part of a machine learning pipeline (supervised learning). Presently this includes the following core RBAs: ReliefF, SURF, SURF\*, MultiSURF\*, and MultiSURF. Additionally, an implementation of the iterative TuRF mechanism and VLSRelief is included. **It is still under active development** and we encourage you to check back on this repository regularly for updates. 16 | 17 | These algorithms offer a computationally efficient way to perform feature selection that is sensitive to feature interactions as well as simple univariate associations, unlike most currently available filter-based feature selection methods. The main benefit of Relief algorithms is that they identify feature interactions without having to exhaustively check every pairwise interaction, thus taking significantly less time than exhaustive pairwise search. 18 | 19 | Certain algorithms require user specified run parameters (e.g. ReliefF requires the user to specify some 'k' number of nearest neighbors). 20 | 21 | Relief algorithms are commonly applied to genetic analyses, where epistasis (i.e., feature interactions) is common. However, the algorithms implemented in this package can be applied to almost any supervised classification data set and supports: 22 | 23 | * Feature sets that are discrete/categorical, continuous-valued or a mix of both 24 | 25 | * Data with missing values 26 | 27 | * Binary endpoints (i.e., classification) 28 | 29 | * Multi-class endpoints (i.e., classification) 30 | 31 | * Continuous endpoints (i.e., regression) 32 | 33 | Built into this code, is a strategy to 'automatically' detect from the loaded data, these relevant characteristics. 34 | 35 | Of our two initial ReBATE software releases, this scikit-learn compatible version primarily focuses on ease of incorporation into a scikit learn analysis pipeline. 36 | This code is most appropriate for scikit-learn users, Windows operating system users, beginners, or those looking for the most recent ReBATE developments. 37 | 38 | An alternative 'stand-alone' version of [ReBATE](https://github.com/EpistasisLab/ReBATE) is also available that focuses on improving run-time with the use of Cython for optimization. This implementation also outputs feature names and associated feature scores as a text file by default. 39 | 40 | ## License 41 | 42 | Please see the [repository license](https://github.com/EpistasisLab/scikit-rebate/blob/master/LICENSE) for the licensing and usage information for scikit-rebate. 43 | 44 | Generally, we have licensed scikit-rebate to make it as widely usable as possible. 45 | 46 | ## Installation 47 | 48 | scikit-rebate is built on top of the following existing Python packages: 49 | 50 | * NumPy 51 | 52 | * SciPy 53 | 54 | * scikit-learn 55 | 56 | All of the necessary Python packages can be installed via the [Anaconda Python distribution](https://www.continuum.io/downloads), which we strongly recommend that you use. We also strongly recommend that you use Python 3 over Python 2 if you're given the choice. 57 | 58 | NumPy, SciPy, and scikit-learn can be installed in Anaconda via the command: 59 | 60 | ``` 61 | conda install numpy scipy scikit-learn 62 | ``` 63 | 64 | Once the prerequisites are installed, you should be able to install scikit-rebate with a pip command: 65 | 66 | ``` 67 | pip install skrebate 68 | ``` 69 | 70 | Please [file a new issue](https://github.com/EpistasisLab/scikit-rebate/issues/new) if you run into installation problems. 71 | 72 | ## Usage 73 | 74 | We have designed the Relief algorithms to be integrated directly into scikit-learn machine learning workflows. For example, the ReliefF algorithm can be used as a feature selection step in a scikit-learn pipeline as follows. 75 | 76 | ```python 77 | import pandas as pd 78 | import numpy as np 79 | from sklearn.pipeline import make_pipeline 80 | from skrebate import ReliefF 81 | from sklearn.ensemble import RandomForestClassifier 82 | from sklearn.model_selection import cross_val_score 83 | 84 | genetic_data = pd.read_csv('https://github.com/EpistasisLab/scikit-rebate/raw/master/data/' 85 | 'GAMETES_Epistasis_2-Way_20atts_0.4H_EDM-1_1.tsv.gz', 86 | sep='\t', compression='gzip') 87 | 88 | features, labels = genetic_data.drop('class', axis=1).values, genetic_data['class'].values 89 | 90 | clf = make_pipeline(ReliefF(n_features_to_select=2, n_neighbors=100), 91 | RandomForestClassifier(n_estimators=100)) 92 | 93 | print(np.mean(cross_val_score(clf, features, labels))) 94 | >>> 0.795 95 | ``` 96 | 97 | For more information on the Relief algorithms available in this package and how to use them, please refer to our [usage documentation](https://EpistasisLab.github.io/scikit-rebate/using/). 98 | 99 | ## Contributing to scikit-rebate 100 | 101 | We welcome you to [check the existing issues](https://github.com/EpistasisLab/scikit-rebate/issues/) for bugs or enhancements to work on. If you have an idea for an extension to scikit-rebate, please [file a new issue](https://github.com/EpistasisLab/scikit-rebate/issues/new) so we can discuss it. 102 | 103 | Please refer to our [contribution guidelines](https://EpistasisLab.github.io/scikit-rebate/contributing/) prior to working on a new feature or bug fix. 104 | 105 | ## Citing scikit-rebate 106 | 107 | If you use scikit-rebate in a scientific publication, please consider citing the following paper: 108 | 109 | Ryan J. Urbanowicz, Randal S. Olson, Peter Schmitt, Melissa Meeker, Jason H. Moore (2017). [Benchmarking Relief-Based Feature Selection Methods](https://arxiv.org/abs/1711.08477). *arXiv preprint*, under review. 110 | 111 | BibTeX entry: 112 | 113 | ```bibtex 114 | @misc{Urbanowicz2017Benchmarking, 115 | author = {Urbanowicz, Ryan J. and Olson, Randal S. and Schmitt, Peter and Meeker, Melissa and Moore, Jason H.}, 116 | title = {Benchmarking Relief-Based Feature Selection Methods}, 117 | year = {2017}, 118 | howpublished = {arXiv e-print. https://arxiv.org/abs/1711.08477}, 119 | } 120 | ``` 121 | -------------------------------------------------------------------------------- /ci/.travis_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # modified from https://github.com/trevorstephens/gplearn 4 | 5 | # This script is meant to be called by the "install" step defined in 6 | # .travis.yml. See http://docs.travis-ci.com/ for more details. 7 | # The behavior of the script is controlled by environment variabled defined 8 | # in the .travis.yml in the top level folder of the project. 9 | 10 | 11 | # License: GNU/GPLv3 12 | 13 | set -e 14 | 15 | # Fix the compilers to workaround avoid having the Python 3.4 build 16 | # lookup for g++44 unexpectedly. 17 | export CC=gcc 18 | export CXX=g++ 19 | 20 | # Deactivate the travis-provided virtual environment and setup a 21 | # conda-based environment instead 22 | deactivate 23 | 24 | # Use the miniconda installer for faster download / install of conda 25 | # itself 26 | wget http://repo.continuum.io/miniconda/Miniconda-3.9.1-Linux-x86_64.sh \ 27 | -O miniconda.sh 28 | chmod +x miniconda.sh && ./miniconda.sh -b 29 | export PATH=/home/travis/miniconda/bin:$PATH 30 | conda update --yes conda 31 | 32 | # Configure the conda environment and put it in the path using the 33 | # provided versions 34 | conda create -n testenv --yes python=$PYTHON_VERSION pip nose \ 35 | numpy scipy scikit-learn cython pandas 36 | 37 | source activate testenv 38 | 39 | if [[ "$COVERAGE" == "true" ]]; then 40 | pip install coverage coveralls 41 | fi 42 | 43 | # build output in the travis output when it succeeds. 44 | python --version 45 | python -c "import numpy; print('numpy %s' % numpy.__version__)" 46 | python -c "import scipy; print('scipy %s' % scipy.__version__)" 47 | python -c "import sklearn; print('sklearn %s' % sklearn.__version__)" 48 | python -c "import pandas; print('pandas %s' % pandas.__version__)" 49 | python setup.py build_ext --inplace 50 | -------------------------------------------------------------------------------- /ci/.travis_test.sh: -------------------------------------------------------------------------------- 1 | # modified from https://github.com/trevorstephens/gplearn 2 | 3 | # This script is meant to be called by the "install" step defined in 4 | # .travis.yml. See http://docs.travis-ci.com/ for more details. 5 | # The behavior of the script is controlled by environment variabled defined 6 | # in the .travis.yml in the top level folder of the project. 7 | 8 | # License: GNU/GPLv3 9 | 10 | set -e 11 | 12 | python --version 13 | python -c "import numpy; print('numpy %s' % numpy.__version__)" 14 | python -c "import scipy; print('scipy %s' % scipy.__version__)" 15 | python -c "import sklearn; print('sklearn %s' % sklearn.__version__)" 16 | python -c "import pandas; print('pandas %s' % pandas.__version__)" 17 | 18 | if [[ "$COVERAGE" == "true" ]]; then 19 | nosetests -s -v --with-coverage 20 | else 21 | nosetests -s -v 22 | fi 23 | -------------------------------------------------------------------------------- /data/GAMETES_Epistasis_2-Way_20atts_0.4H_EDM-1_1.tsv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EpistasisLab/scikit-rebate/16798854e7fbca553416409be8f9ff6f71204dac/data/GAMETES_Epistasis_2-Way_20atts_0.4H_EDM-1_1.tsv.gz -------------------------------------------------------------------------------- /data/GAMETES_Epistasis_2-Way_continuous_endpoint_a_20s_1600her_0.4__maf_0.2_EDM-2_01.tsv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EpistasisLab/scikit-rebate/16798854e7fbca553416409be8f9ff6f71204dac/data/GAMETES_Epistasis_2-Way_continuous_endpoint_a_20s_1600her_0.4__maf_0.2_EDM-2_01.tsv.gz -------------------------------------------------------------------------------- /data/GAMETES_Epistasis_2-Way_missing_values_0.1_a_20s_1600her_0.4__maf_0.2_EDM-2_01.tsv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EpistasisLab/scikit-rebate/16798854e7fbca553416409be8f9ff6f71204dac/data/GAMETES_Epistasis_2-Way_missing_values_0.1_a_20s_1600her_0.4__maf_0.2_EDM-2_01.tsv.gz -------------------------------------------------------------------------------- /data/GAMETES_Epistasis_2-Way_mixed_attribute_a_20s_1600her_0.4__maf_0.2_EDM-2_01.tsv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EpistasisLab/scikit-rebate/16798854e7fbca553416409be8f9ff6f71204dac/data/GAMETES_Epistasis_2-Way_mixed_attribute_a_20s_1600her_0.4__maf_0.2_EDM-2_01.tsv.gz -------------------------------------------------------------------------------- /docs/404.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | scikit-rebate 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 |
27 | 28 | 29 | 82 | 83 |
84 | 85 | 86 | 90 | 91 | 92 |
93 |
94 |
95 |
    96 |
  • Docs »
  • 97 | 98 | 99 |
  • 100 | 101 |
  • 102 |
103 |
104 |
105 |
106 |
107 | 108 | 109 |

404

110 | 111 |

Page not found

112 | 113 | 114 |
115 |
116 | 130 | 131 |
132 |
133 | 134 |
135 | 136 |
137 | 138 |
139 | 140 | 141 | GitHub 142 | 143 | 144 | 145 | 146 |
147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | -------------------------------------------------------------------------------- /docs/citing/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | Citing - scikit-rebate 12 | 13 | 14 | 15 | 16 | 17 | 18 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 | 36 | 92 | 93 |
94 | 95 | 96 | 100 | 101 | 102 |
103 |
104 |
105 |
    106 |
  • Docs »
  • 107 | 108 | 109 | 110 |
  • Citing
  • 111 |
  • 112 | 113 | Edit on GitHub 115 | 116 |
  • 117 |
118 |
119 |
120 |
121 |
122 | 123 |

If you use scikit-rebate or the MultiSURF algorithm in a scientific publication, please consider citing the following paper (currently available as a pre-print in arXiv):

124 |

Urbanowicz, Ryan J., Randal S. Olson, Peter Schmitt, Melissa Meeker, and Jason H. Moore. "Benchmarking relief-based feature selection methods." arXiv preprint arXiv:1711.08477 (2017).

125 |

Alternatively a complete review of Relief-based algorithms is available at:

126 |

Urbanowicz, Ryan J., Melissa Meeker, William LaCava, Randal S. Olson, and Jason H. Moore. "Relief-based feature selection: introduction and review." arXiv preprint arXiv:1711.08421 (2017).

127 |

To cite the original Relief paper:

128 |

Kira, Kenji, and Larry A. Rendell. "A practical approach to feature selection." In Machine Learning Proceedings 1992, pp. 249-256. 1992.

129 |

To cite the original ReliefF paper:

130 |

Kononenko, Igor. "Estimating attributes: analysis and extensions of RELIEF." In European conference on machine learning, pp. 171-182. Springer, Berlin, Heidelberg, 1994.

131 |

To cite the original SURF paper:

132 |

Greene, Casey S., Nadia M. Penrod, Jeff Kiralis, and Jason H. Moore. "Spatially uniform relieff (SURF) for computationally-efficient filtering of gene-gene interactions." BioData mining 2, no. 1 (2009): 5.

133 |

To cite the original SURF* paper:

134 |

Greene, Casey S., Daniel S. Himmelstein, Jeff Kiralis, and Jason H. Moore. "The informative extremes: using both nearest and farthest individuals can improve relief algorithms in the domain of human genetics." In European Conference on Evolutionary Computation, Machine Learning and Data Mining in Bioinformatics, pp. 182-193. Springer, Berlin, Heidelberg, 2010.

135 |

To cite the original MultiSURF* paper:

136 |

Granizo-Mackenzie, Delaney, and Jason H. Moore. "Multiple threshold spatially uniform relieff for the genetic analysis of complex human diseases." In European Conference on Evolutionary Computation, Machine Learning and Data Mining in Bioinformatics, pp. 1-10. Springer, Berlin, Heidelberg, 2013.

137 |

To cite the original TuRF paper:

138 |

Moore, Jason H., and Bill C. White. "Tuning ReliefF for genome-wide genetic analysis." In European Conference on Evolutionary Computation, Machine Learning and Data Mining in Bioinformatics, pp. 166-175. Springer, Berlin, Heidelberg, 2007.

139 | 140 |
141 |
142 | 165 | 166 |
167 |
168 | 169 |
170 | 171 |
172 | 173 |
174 | 175 | 176 | GitHub 177 | 178 | 179 | « Previous 180 | 181 | 182 | Next » 183 | 184 | 185 |
186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | -------------------------------------------------------------------------------- /docs/contributing/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | Contributing - scikit-rebate 12 | 13 | 14 | 15 | 16 | 17 | 18 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 | 36 | 107 | 108 |
109 | 110 | 111 | 115 | 116 | 117 |
118 |
119 |
120 |
    121 |
  • Docs »
  • 122 | 123 | 124 | 125 |
  • Contributing
  • 126 |
  • 127 | 128 | Edit on GitHub 130 | 131 |
  • 132 |
133 |
134 |
135 |
136 |
137 | 138 |

We welcome you to check the existing issues for bugs or enhancements to work on. If you have an idea for an extension to scikit-rebate, please file a new issue so we can discuss it.

139 |

Project layout

140 |

The latest stable release of scikit-rebate is on the master branch, whereas the latest version of scikit-rebate in development is on the development branch. Make sure you are looking at and working on the correct branch if you're looking to contribute code.

141 |

In terms of directory structure:

142 |
    143 |
  • All of scikit-rebate's code sources are in the skrebate directory
  • 144 |
  • The documentation sources are in the docs_sources directory
  • 145 |
  • The latest documentation build is in the docs directory
  • 146 |
  • Unit tests for scikit-rebate are in the tests.py file
  • 147 |
148 |

Make sure to familiarize yourself with the project layout before making any major contributions, and especially make sure to send all code changes to the development branch.

149 |

How to contribute

150 |

The preferred way to contribute to scikit-rebate is to fork the 151 | main repository on 152 | GitHub:

153 |
    154 |
  1. 155 |

    Fork the project repository: 156 | click on the 'Fork' button near the top of the page. This creates 157 | a copy of the code under your account on the GitHub server.

    158 |
  2. 159 |
  3. 160 |

    Clone this copy to your local disk:

    161 |
      $ git clone git@github.com:YourLogin/scikit-rebate.git
    162 |   $ cd scikit-rebate
    163 | 
    164 |
  4. 165 |
  5. 166 |

    Create a branch to hold your changes:

    167 |
      $ git checkout -b my-contribution
    168 | 
    169 |
  6. 170 |
  7. 171 |

    Make sure your local environment is setup correctly for development. Installation instructions are almost identical to the user instructions except that scikit-rebate should not be installed. If you have scikit-rebate installed on your computer, then make sure you are using a virtual environment that does not have scikit-rebate installed. Furthermore, you should make sure you have installed the nose package into your development environment so that you can test changes locally.

    172 |
      $ conda install nose
    173 | 
    174 |
  8. 175 |
  9. 176 |

    Start making changes on your newly created branch, remembering to never work on the master branch! Work on this copy on your computer using Git to do the version control.

    177 |
  10. 178 |
  11. 179 |

    Once some changes are saved locally, you can use your tweaked version of scikit-rebate by navigating to the project's base directory and running scikit-rebate in a script.

    180 |
  12. 181 |
  13. 182 |

    To check your changes haven't broken any existing tests and to check new tests you've added pass run the following (note, you must have the nose package installed within your dev environment for this to work):

    183 |
      $ nosetests -s -v
    184 | 
    185 |
  14. 186 |
  15. 187 |

    When you're done editing and local testing, run:

    188 |
      $ git add modified_files
    189 |   $ git commit
    190 | 
    191 |
  16. 192 |
193 |

to record your changes in Git, then push them to GitHub with:

194 |
      $ git push -u origin my-contribution
195 | 
196 |

Finally, go to the web page of your fork of the scikit-rebate repo, and click 'Pull Request' (PR) to send your changes to the maintainers for review. Make sure that you send your PR to the development branch, as the master branch is reserved for the latest stable release. This will start the CI server to check all the project's unit tests run and send an email to the maintainers.

197 |

(For details on the above look up the Git documentation on the web.)

198 |

Before submitting your pull request

199 |

Before you submit a pull request for your contribution, please work through this checklist to make sure that you have done everything necessary so we can efficiently review and accept your changes.

200 |

If your contribution changes scikit-rebate in any way:

201 |
    202 |
  • 203 |

    Update the documentation so all of your changes are reflected there.

    204 |
  • 205 |
  • 206 |

    Update the README if anything there has changed.

    207 |
  • 208 |
209 |

If your contribution involves any code changes:

210 |
    211 |
  • 212 |

    Update the project unit tests to test your code changes.

    213 |
  • 214 |
  • 215 |

    Make sure that your code is properly commented with docstrings and comments explaining your rationale behind non-obvious coding practices.

    216 |
  • 217 |
218 |

If your contribution requires a new library dependency:

219 |
    220 |
  • 221 |

    Double-check that the new dependency is easy to install via pip or Anaconda and supports both Python 2 and 3. If the dependency requires a complicated installation, then we most likely won't merge your changes because we want to keep scikit-rebate easy to install.

    222 |
  • 223 |
  • 224 |

    Add a line to pip install the library to .travis_install.sh

    225 |
  • 226 |
  • 227 |

    Add a line to print the version of the library to .travis_install.sh

    228 |
  • 229 |
  • 230 |

    Similarly add a line to print the version of the library to .travis_test.sh

    231 |
  • 232 |
233 |

Updating the documentation

234 |

We use mkdocs to manage our documentation. This allows us to write the docs in Markdown and compile them to HTML as needed. Below are a few useful commands to know when updating the documentation. Make sure that you are running them in the base repository directory.

235 |
    236 |
  • 237 |

    mkdocs serve: Hosts of a local version of the documentation that you can access at the provided URL. The local version will update automatically as you save changes to the documentation.

    238 |
  • 239 |
  • 240 |

    mkdocs build --clean: Creates a fresh build of the documentation in HTML. Always run this before deploying the documentation to GitHub.

    241 |
  • 242 |
  • 243 |

    mkdocs gh-deploy: Deploys the documentation to GitHub. If you're deploying on your fork of scikit-rebate, the online documentation should be accessible at http://<YOUR GITHUB USERNAME>.github.io/scikit-rebate/. Generally, you shouldn't need to run this command because you can view your changes with mkdocs serve.

    244 |
  • 245 |
246 |

After submitting your pull request

247 |

After submitting your pull request, Travis-CI will automatically run unit tests on your changes and make sure that your updated code builds and runs on Python 2 and 3. We also use services that automatically check code quality and test coverage.

248 |

Check back shortly after submitting your pull request to make sure that your code passes these checks. If any of the checks come back with a red X, then do your best to address the errors.

249 | 250 |
251 |
252 | 275 | 276 |
277 |
278 | 279 |
280 | 281 |
282 | 283 |
284 | 285 | 286 | GitHub 287 | 288 | 289 | « Previous 290 | 291 | 292 | Next » 293 | 294 | 295 |
296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | -------------------------------------------------------------------------------- /docs/css/highlight.css: -------------------------------------------------------------------------------- 1 | /* 2 | This is the GitHub theme for highlight.js 3 | 4 | github.com style (c) Vasily Polovnyov 5 | 6 | */ 7 | 8 | .hljs { 9 | display: block; 10 | overflow-x: auto; 11 | color: #333; 12 | -webkit-text-size-adjust: none; 13 | } 14 | 15 | .hljs-comment, 16 | .diff .hljs-header, 17 | .hljs-javadoc { 18 | color: #998; 19 | font-style: italic; 20 | } 21 | 22 | .hljs-keyword, 23 | .css .rule .hljs-keyword, 24 | .hljs-winutils, 25 | .nginx .hljs-title, 26 | .hljs-subst, 27 | .hljs-request, 28 | .hljs-status { 29 | color: #333; 30 | font-weight: bold; 31 | } 32 | 33 | .hljs-number, 34 | .hljs-hexcolor, 35 | .ruby .hljs-constant { 36 | color: #008080; 37 | } 38 | 39 | .hljs-string, 40 | .hljs-tag .hljs-value, 41 | .hljs-phpdoc, 42 | .hljs-dartdoc, 43 | .tex .hljs-formula { 44 | color: #d14; 45 | } 46 | 47 | .hljs-title, 48 | .hljs-id, 49 | .scss .hljs-preprocessor { 50 | color: #900; 51 | font-weight: bold; 52 | } 53 | 54 | .hljs-list .hljs-keyword, 55 | .hljs-subst { 56 | font-weight: normal; 57 | } 58 | 59 | .hljs-class .hljs-title, 60 | .hljs-type, 61 | .vhdl .hljs-literal, 62 | .tex .hljs-command { 63 | color: #458; 64 | font-weight: bold; 65 | } 66 | 67 | .hljs-tag, 68 | .hljs-tag .hljs-title, 69 | .hljs-rule .hljs-property, 70 | .django .hljs-tag .hljs-keyword { 71 | color: #000080; 72 | font-weight: normal; 73 | } 74 | 75 | .hljs-attribute, 76 | .hljs-variable, 77 | .lisp .hljs-body, 78 | .hljs-name { 79 | color: #008080; 80 | } 81 | 82 | .hljs-regexp { 83 | color: #009926; 84 | } 85 | 86 | .hljs-symbol, 87 | .ruby .hljs-symbol .hljs-string, 88 | .lisp .hljs-keyword, 89 | .clojure .hljs-keyword, 90 | .scheme .hljs-keyword, 91 | .tex .hljs-special, 92 | .hljs-prompt { 93 | color: #990073; 94 | } 95 | 96 | .hljs-built_in { 97 | color: #0086b3; 98 | } 99 | 100 | .hljs-preprocessor, 101 | .hljs-pragma, 102 | .hljs-pi, 103 | .hljs-doctype, 104 | .hljs-shebang, 105 | .hljs-cdata { 106 | color: #999; 107 | font-weight: bold; 108 | } 109 | 110 | .hljs-deletion { 111 | background: #fdd; 112 | } 113 | 114 | .hljs-addition { 115 | background: #dfd; 116 | } 117 | 118 | .diff .hljs-change { 119 | background: #0086b3; 120 | } 121 | 122 | .hljs-chunk { 123 | color: #aaa; 124 | } 125 | -------------------------------------------------------------------------------- /docs/css/theme_extra.css: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx doesn't have support for section dividers like we do in 3 | * MkDocs, this styles the section titles in the nav 4 | * 5 | * https://github.com/mkdocs/mkdocs/issues/175 6 | */ 7 | .wy-menu-vertical span { 8 | line-height: 18px; 9 | padding: 0.4045em 1.618em; 10 | display: block; 11 | position: relative; 12 | font-size: 90%; 13 | color: #838383; 14 | } 15 | 16 | .wy-menu-vertical .subnav a { 17 | padding: 0.4045em 2.427em; 18 | } 19 | 20 | /* 21 | * Long navigations run off the bottom of the screen as the nav 22 | * area doesn't scroll. 23 | * 24 | * https://github.com/mkdocs/mkdocs/pull/202 25 | * 26 | * Builds upon pull 202 https://github.com/mkdocs/mkdocs/pull/202 27 | * to make toc scrollbar end before navigations buttons to not be overlapping. 28 | */ 29 | .wy-nav-side { 30 | height: calc(100% - 45px); 31 | overflow-y: auto; 32 | min-height: 0; 33 | } 34 | 35 | .rst-versions{ 36 | border-top: 0; 37 | height: 45px; 38 | } 39 | 40 | @media screen and (max-width: 768px) { 41 | .wy-nav-side { 42 | height: 100%; 43 | } 44 | } 45 | 46 | /* 47 | * readthedocs theme hides nav items when the window height is 48 | * too small to contain them. 49 | * 50 | * https://github.com/mkdocs/mkdocs/issues/#348 51 | */ 52 | .wy-menu-vertical ul { 53 | margin-bottom: 2em; 54 | } 55 | 56 | /* 57 | * Wrap inline code samples otherwise they shoot of the side and 58 | * can't be read at all. 59 | * 60 | * https://github.com/mkdocs/mkdocs/issues/313 61 | * https://github.com/mkdocs/mkdocs/issues/233 62 | * https://github.com/mkdocs/mkdocs/issues/834 63 | */ 64 | code { 65 | white-space: pre-wrap; 66 | word-wrap: break-word; 67 | padding: 2px 5px; 68 | } 69 | 70 | /** 71 | * Make code blocks display as blocks and give them the appropriate 72 | * font size and padding. 73 | * 74 | * https://github.com/mkdocs/mkdocs/issues/855 75 | * https://github.com/mkdocs/mkdocs/issues/834 76 | * https://github.com/mkdocs/mkdocs/issues/233 77 | */ 78 | pre code { 79 | white-space: pre; 80 | word-wrap: normal; 81 | display: block; 82 | padding: 12px; 83 | font-size: 12px; 84 | } 85 | 86 | /* 87 | * Fix link colors when the link text is inline code. 88 | * 89 | * https://github.com/mkdocs/mkdocs/issues/718 90 | */ 91 | a code { 92 | color: #2980B9; 93 | } 94 | a:hover code { 95 | color: #3091d1; 96 | } 97 | a:visited code { 98 | color: #9B59B6; 99 | } 100 | 101 | /* 102 | * The CSS classes from highlight.js seem to clash with the 103 | * ReadTheDocs theme causing some code to be incorrectly made 104 | * bold and italic. 105 | * 106 | * https://github.com/mkdocs/mkdocs/issues/411 107 | */ 108 | pre .cs, pre .c { 109 | font-weight: inherit; 110 | font-style: inherit; 111 | } 112 | 113 | /* 114 | * Fix some issues with the theme and non-highlighted code 115 | * samples. Without and highlighting styles attached the 116 | * formatting is broken. 117 | * 118 | * https://github.com/mkdocs/mkdocs/issues/319 119 | */ 120 | .no-highlight { 121 | display: block; 122 | padding: 0.5em; 123 | color: #333; 124 | } 125 | 126 | 127 | /* 128 | * Additions specific to the search functionality provided by MkDocs 129 | */ 130 | 131 | .search-results article { 132 | margin-top: 23px; 133 | border-top: 1px solid #E1E4E5; 134 | padding-top: 24px; 135 | } 136 | 137 | .search-results article:first-child { 138 | border-top: none; 139 | } 140 | 141 | form .search-query { 142 | width: 100%; 143 | border-radius: 50px; 144 | padding: 6px 12px; /* csslint allow: box-model */ 145 | border-color: #D1D4D5; 146 | } 147 | 148 | .wy-menu-vertical li ul { 149 | display: inherit; 150 | } 151 | 152 | .wy-menu-vertical li ul.subnav ul.subnav{ 153 | padding-left: 1em; 154 | } 155 | 156 | .wy-menu-vertical .subnav li.current > a { 157 | padding-left: 2.42em; 158 | } 159 | .wy-menu-vertical .subnav li.current > ul li a { 160 | padding-left: 3.23em; 161 | } 162 | 163 | /* 164 | * Improve inline code blocks within admonitions. 165 | * 166 | * https://github.com/mkdocs/mkdocs/issues/656 167 | */ 168 | .admonition code { 169 | color: #404040; 170 | border: 1px solid #c7c9cb; 171 | border: 1px solid rgba(0, 0, 0, 0.2); 172 | background: #f8fbfd; 173 | background: rgba(255, 255, 255, 0.7); 174 | } 175 | 176 | /* 177 | * Account for wide tables which go off the side. 178 | * Override borders to avoid wierdness on narrow tables. 179 | * 180 | * https://github.com/mkdocs/mkdocs/issues/834 181 | * https://github.com/mkdocs/mkdocs/pull/1034 182 | */ 183 | .rst-content .section .docutils { 184 | width: 100%; 185 | overflow: auto; 186 | display: block; 187 | border: none; 188 | } 189 | 190 | td, th { 191 | border: 1px solid #e1e4e5 !important; /* csslint allow: important */ 192 | border-collapse: collapse; 193 | } 194 | 195 | -------------------------------------------------------------------------------- /docs/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EpistasisLab/scikit-rebate/16798854e7fbca553416409be8f9ff6f71204dac/docs/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /docs/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EpistasisLab/scikit-rebate/16798854e7fbca553416409be8f9ff6f71204dac/docs/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /docs/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EpistasisLab/scikit-rebate/16798854e7fbca553416409be8f9ff6f71204dac/docs/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /docs/img/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EpistasisLab/scikit-rebate/16798854e7fbca553416409be8f9ff6f71204dac/docs/img/favicon.ico -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | Home - scikit-rebate 12 | 13 | 14 | 15 | 16 | 17 | 18 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 | 36 | 92 | 93 |
94 | 95 | 96 | 100 | 101 | 102 |
103 |
104 |
105 |
    106 |
  • Docs »
  • 107 | 108 | 109 | 110 |
  • Home
  • 111 |
  • 112 | 113 | Edit on GitHub 115 | 116 |
  • 117 |
118 |
119 |
120 |
121 |
122 | 123 |

scikit-rebate is a scikit-learn-compatible Python implementation of ReBATE, a suite of Relief-based feature selection algorithms for Machine Learning. As of 5/7/18, this project is still under active development and we encourage you to check back on this repository regularly for updates.

124 |

These algorithms excel at identifying features that are predictive of the outcome in supervised learning problems, and are especially good at identifying feature interactions that are normally overlooked by standard feature selection methods.

125 |

The main benefit of Relief-based algorithms is that they identify feature interactions without having to exhaustively check every pairwise interaction, thus taking significantly less time than exhaustive pairwise search.

126 |

Relief-based algorithms are commonly applied to genetic analyses, where epistasis (i.e., feature interactions) is common. However, the algorithms implemented in this package can be applied to almost any supervised classification data set and supports:

127 |
    128 |
  • 129 |

    A mix of categorical and/or continuous features

    130 |
  • 131 |
  • 132 |

    Data with missing values

    133 |
  • 134 |
  • 135 |

    Binary endpoints (i.e., classification)

    136 |
  • 137 |
  • 138 |

    Multi-class endpoints (i.e., classification)

    139 |
  • 140 |
  • 141 |

    Continuous endpoints (i.e., regression)

    142 |
  • 143 |
144 | 145 |
146 |
147 | 168 | 169 |
170 |
171 | 172 |
173 | 174 |
175 | 176 |
177 | 178 | 179 | GitHub 180 | 181 | 182 | 183 | Next » 184 | 185 | 186 |
187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 199 | -------------------------------------------------------------------------------- /docs/installing/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | Installation - scikit-rebate 12 | 13 | 14 | 15 | 16 | 17 | 18 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 | 36 | 92 | 93 |
94 | 95 | 96 | 100 | 101 | 102 |
103 |
104 |
105 |
    106 |
  • Docs »
  • 107 | 108 | 109 | 110 |
  • Installation
  • 111 |
  • 112 | 113 | Edit on GitHub 115 | 116 |
  • 117 |
118 |
119 |
120 |
121 |
122 | 123 |

scikit-rebate is built on top of the following existing Python packages:

124 |
    125 |
  • 126 |

    NumPy

    127 |
  • 128 |
  • 129 |

    SciPy

    130 |
  • 131 |
  • 132 |

    scikit-learn

    133 |
  • 134 |
135 |

All of the necessary Python packages can be installed via the Anaconda Python distribution, which we strongly recommend that you use. We also strongly recommend that you use Python 3 over Python 2 if you're given the choice.

136 |

NumPy, SciPy, and scikit-learn can be installed in Anaconda via the command:

137 |
conda install numpy scipy scikit-learn
138 | 
139 | 140 |

Once the prerequisites are installed, you should be able to install scikit-rebate with a pip command:

141 |
pip install skrebate
142 | 
143 | 144 |

You can retrieve basic information about your installed version of skrebate with the following pip command:

145 |
pip show skrebate
146 | 
147 | 148 |

You can check that you have the most up to date pypi release of skrebate with the following pip command:

149 |
pip install skrebate -U
150 | 
151 | 152 |

Please file a new issue if you run into installation problems.

153 | 154 |
155 |
156 | 179 | 180 |
181 |
182 | 183 |
184 | 185 |
186 | 187 |
188 | 189 | 190 | GitHub 191 | 192 | 193 | « Previous 194 | 195 | 196 | Next » 197 | 198 | 199 |
200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | -------------------------------------------------------------------------------- /docs/js/modernizr-2.8.3.min.js: -------------------------------------------------------------------------------- 1 | window.Modernizr=function(e,t,n){function r(e){b.cssText=e}function o(e,t){return r(S.join(e+";")+(t||""))}function a(e,t){return typeof e===t}function i(e,t){return!!~(""+e).indexOf(t)}function c(e,t){for(var r in e){var o=e[r];if(!i(o,"-")&&b[o]!==n)return"pfx"==t?o:!0}return!1}function s(e,t,r){for(var o in e){var i=t[e[o]];if(i!==n)return r===!1?e[o]:a(i,"function")?i.bind(r||t):i}return!1}function u(e,t,n){var r=e.charAt(0).toUpperCase()+e.slice(1),o=(e+" "+k.join(r+" ")+r).split(" ");return a(t,"string")||a(t,"undefined")?c(o,t):(o=(e+" "+T.join(r+" ")+r).split(" "),s(o,t,n))}function l(){p.input=function(n){for(var r=0,o=n.length;o>r;r++)j[n[r]]=!!(n[r]in E);return j.list&&(j.list=!(!t.createElement("datalist")||!e.HTMLDataListElement)),j}("autocomplete autofocus list placeholder max min multiple pattern required step".split(" ")),p.inputtypes=function(e){for(var r,o,a,i=0,c=e.length;c>i;i++)E.setAttribute("type",o=e[i]),r="text"!==E.type,r&&(E.value=x,E.style.cssText="position:absolute;visibility:hidden;",/^range$/.test(o)&&E.style.WebkitAppearance!==n?(g.appendChild(E),a=t.defaultView,r=a.getComputedStyle&&"textfield"!==a.getComputedStyle(E,null).WebkitAppearance&&0!==E.offsetHeight,g.removeChild(E)):/^(search|tel)$/.test(o)||(r=/^(url|email)$/.test(o)?E.checkValidity&&E.checkValidity()===!1:E.value!=x)),P[e[i]]=!!r;return P}("search tel url email datetime date month week time datetime-local number range color".split(" "))}var d,f,m="2.8.3",p={},h=!0,g=t.documentElement,v="modernizr",y=t.createElement(v),b=y.style,E=t.createElement("input"),x=":)",w={}.toString,S=" -webkit- -moz- -o- -ms- ".split(" "),C="Webkit Moz O ms",k=C.split(" "),T=C.toLowerCase().split(" "),N={svg:"http://www.w3.org/2000/svg"},M={},P={},j={},$=[],D=$.slice,F=function(e,n,r,o){var a,i,c,s,u=t.createElement("div"),l=t.body,d=l||t.createElement("body");if(parseInt(r,10))for(;r--;)c=t.createElement("div"),c.id=o?o[r]:v+(r+1),u.appendChild(c);return a=["­",'"].join(""),u.id=v,(l?u:d).innerHTML+=a,d.appendChild(u),l||(d.style.background="",d.style.overflow="hidden",s=g.style.overflow,g.style.overflow="hidden",g.appendChild(d)),i=n(u,e),l?u.parentNode.removeChild(u):(d.parentNode.removeChild(d),g.style.overflow=s),!!i},z=function(t){var n=e.matchMedia||e.msMatchMedia;if(n)return n(t)&&n(t).matches||!1;var r;return F("@media "+t+" { #"+v+" { position: absolute; } }",function(t){r="absolute"==(e.getComputedStyle?getComputedStyle(t,null):t.currentStyle).position}),r},A=function(){function e(e,o){o=o||t.createElement(r[e]||"div"),e="on"+e;var i=e in o;return i||(o.setAttribute||(o=t.createElement("div")),o.setAttribute&&o.removeAttribute&&(o.setAttribute(e,""),i=a(o[e],"function"),a(o[e],"undefined")||(o[e]=n),o.removeAttribute(e))),o=null,i}var r={select:"input",change:"input",submit:"form",reset:"form",error:"img",load:"img",abort:"img"};return e}(),L={}.hasOwnProperty;f=a(L,"undefined")||a(L.call,"undefined")?function(e,t){return t in e&&a(e.constructor.prototype[t],"undefined")}:function(e,t){return L.call(e,t)},Function.prototype.bind||(Function.prototype.bind=function(e){var t=this;if("function"!=typeof t)throw new TypeError;var n=D.call(arguments,1),r=function(){if(this instanceof r){var o=function(){};o.prototype=t.prototype;var a=new o,i=t.apply(a,n.concat(D.call(arguments)));return Object(i)===i?i:a}return t.apply(e,n.concat(D.call(arguments)))};return r}),M.flexbox=function(){return u("flexWrap")},M.flexboxlegacy=function(){return u("boxDirection")},M.canvas=function(){var e=t.createElement("canvas");return!(!e.getContext||!e.getContext("2d"))},M.canvastext=function(){return!(!p.canvas||!a(t.createElement("canvas").getContext("2d").fillText,"function"))},M.webgl=function(){return!!e.WebGLRenderingContext},M.touch=function(){var n;return"ontouchstart"in e||e.DocumentTouch&&t instanceof DocumentTouch?n=!0:F(["@media (",S.join("touch-enabled),("),v,")","{#modernizr{top:9px;position:absolute}}"].join(""),function(e){n=9===e.offsetTop}),n},M.geolocation=function(){return"geolocation"in navigator},M.postmessage=function(){return!!e.postMessage},M.websqldatabase=function(){return!!e.openDatabase},M.indexedDB=function(){return!!u("indexedDB",e)},M.hashchange=function(){return A("hashchange",e)&&(t.documentMode===n||t.documentMode>7)},M.history=function(){return!(!e.history||!history.pushState)},M.draganddrop=function(){var e=t.createElement("div");return"draggable"in e||"ondragstart"in e&&"ondrop"in e},M.websockets=function(){return"WebSocket"in e||"MozWebSocket"in e},M.rgba=function(){return r("background-color:rgba(150,255,150,.5)"),i(b.backgroundColor,"rgba")},M.hsla=function(){return r("background-color:hsla(120,40%,100%,.5)"),i(b.backgroundColor,"rgba")||i(b.backgroundColor,"hsla")},M.multiplebgs=function(){return r("background:url(https://),url(https://),red url(https://)"),/(url\s*\(.*?){3}/.test(b.background)},M.backgroundsize=function(){return u("backgroundSize")},M.borderimage=function(){return u("borderImage")},M.borderradius=function(){return u("borderRadius")},M.boxshadow=function(){return u("boxShadow")},M.textshadow=function(){return""===t.createElement("div").style.textShadow},M.opacity=function(){return o("opacity:.55"),/^0.55$/.test(b.opacity)},M.cssanimations=function(){return u("animationName")},M.csscolumns=function(){return u("columnCount")},M.cssgradients=function(){var e="background-image:",t="gradient(linear,left top,right bottom,from(#9f9),to(white));",n="linear-gradient(left top,#9f9, white);";return r((e+"-webkit- ".split(" ").join(t+e)+S.join(n+e)).slice(0,-e.length)),i(b.backgroundImage,"gradient")},M.cssreflections=function(){return u("boxReflect")},M.csstransforms=function(){return!!u("transform")},M.csstransforms3d=function(){var e=!!u("perspective");return e&&"webkitPerspective"in g.style&&F("@media (transform-3d),(-webkit-transform-3d){#modernizr{left:9px;position:absolute;height:3px;}}",function(t){e=9===t.offsetLeft&&3===t.offsetHeight}),e},M.csstransitions=function(){return u("transition")},M.fontface=function(){var e;return F('@font-face {font-family:"font";src:url("https://")}',function(n,r){var o=t.getElementById("smodernizr"),a=o.sheet||o.styleSheet,i=a?a.cssRules&&a.cssRules[0]?a.cssRules[0].cssText:a.cssText||"":"";e=/src/i.test(i)&&0===i.indexOf(r.split(" ")[0])}),e},M.generatedcontent=function(){var e;return F(["#",v,"{font:0/0 a}#",v,':after{content:"',x,'";visibility:hidden;font:3px/1 a}'].join(""),function(t){e=t.offsetHeight>=3}),e},M.video=function(){var e=t.createElement("video"),n=!1;try{(n=!!e.canPlayType)&&(n=new Boolean(n),n.ogg=e.canPlayType('video/ogg; codecs="theora"').replace(/^no$/,""),n.h264=e.canPlayType('video/mp4; codecs="avc1.42E01E"').replace(/^no$/,""),n.webm=e.canPlayType('video/webm; codecs="vp8, vorbis"').replace(/^no$/,""))}catch(r){}return n},M.audio=function(){var e=t.createElement("audio"),n=!1;try{(n=!!e.canPlayType)&&(n=new Boolean(n),n.ogg=e.canPlayType('audio/ogg; codecs="vorbis"').replace(/^no$/,""),n.mp3=e.canPlayType("audio/mpeg;").replace(/^no$/,""),n.wav=e.canPlayType('audio/wav; codecs="1"').replace(/^no$/,""),n.m4a=(e.canPlayType("audio/x-m4a;")||e.canPlayType("audio/aac;")).replace(/^no$/,""))}catch(r){}return n},M.localstorage=function(){try{return localStorage.setItem(v,v),localStorage.removeItem(v),!0}catch(e){return!1}},M.sessionstorage=function(){try{return sessionStorage.setItem(v,v),sessionStorage.removeItem(v),!0}catch(e){return!1}},M.webworkers=function(){return!!e.Worker},M.applicationcache=function(){return!!e.applicationCache},M.svg=function(){return!!t.createElementNS&&!!t.createElementNS(N.svg,"svg").createSVGRect},M.inlinesvg=function(){var e=t.createElement("div");return e.innerHTML="",(e.firstChild&&e.firstChild.namespaceURI)==N.svg},M.smil=function(){return!!t.createElementNS&&/SVGAnimate/.test(w.call(t.createElementNS(N.svg,"animate")))},M.svgclippaths=function(){return!!t.createElementNS&&/SVGClipPath/.test(w.call(t.createElementNS(N.svg,"clipPath")))};for(var H in M)f(M,H)&&(d=H.toLowerCase(),p[d]=M[H](),$.push((p[d]?"":"no-")+d));return p.input||l(),p.addTest=function(e,t){if("object"==typeof e)for(var r in e)f(e,r)&&p.addTest(r,e[r]);else{if(e=e.toLowerCase(),p[e]!==n)return p;t="function"==typeof t?t():t,"undefined"!=typeof h&&h&&(g.className+=" "+(t?"":"no-")+e),p[e]=t}return p},r(""),y=E=null,function(e,t){function n(e,t){var n=e.createElement("p"),r=e.getElementsByTagName("head")[0]||e.documentElement;return n.innerHTML="x",r.insertBefore(n.lastChild,r.firstChild)}function r(){var e=y.elements;return"string"==typeof e?e.split(" "):e}function o(e){var t=v[e[h]];return t||(t={},g++,e[h]=g,v[g]=t),t}function a(e,n,r){if(n||(n=t),l)return n.createElement(e);r||(r=o(n));var a;return a=r.cache[e]?r.cache[e].cloneNode():p.test(e)?(r.cache[e]=r.createElem(e)).cloneNode():r.createElem(e),!a.canHaveChildren||m.test(e)||a.tagUrn?a:r.frag.appendChild(a)}function i(e,n){if(e||(e=t),l)return e.createDocumentFragment();n=n||o(e);for(var a=n.frag.cloneNode(),i=0,c=r(),s=c.length;s>i;i++)a.createElement(c[i]);return a}function c(e,t){t.cache||(t.cache={},t.createElem=e.createElement,t.createFrag=e.createDocumentFragment,t.frag=t.createFrag()),e.createElement=function(n){return y.shivMethods?a(n,e,t):t.createElem(n)},e.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+r().join().replace(/[\w\-]+/g,function(e){return t.createElem(e),t.frag.createElement(e),'c("'+e+'")'})+");return n}")(y,t.frag)}function s(e){e||(e=t);var r=o(e);return!y.shivCSS||u||r.hasCSS||(r.hasCSS=!!n(e,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),l||c(e,r),e}var u,l,d="3.7.0",f=e.html5||{},m=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,p=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,h="_html5shiv",g=0,v={};!function(){try{var e=t.createElement("a");e.innerHTML="",u="hidden"in e,l=1==e.childNodes.length||function(){t.createElement("a");var e=t.createDocumentFragment();return"undefined"==typeof e.cloneNode||"undefined"==typeof e.createDocumentFragment||"undefined"==typeof e.createElement}()}catch(n){u=!0,l=!0}}();var y={elements:f.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output progress section summary template time video",version:d,shivCSS:f.shivCSS!==!1,supportsUnknownElements:l,shivMethods:f.shivMethods!==!1,type:"default",shivDocument:s,createElement:a,createDocumentFragment:i};e.html5=y,s(t)}(this,t),p._version=m,p._prefixes=S,p._domPrefixes=T,p._cssomPrefixes=k,p.mq=z,p.hasEvent=A,p.testProp=function(e){return c([e])},p.testAllProps=u,p.testStyles=F,p.prefixed=function(e,t,n){return t?u(e,t,n):u(e,"pfx")},g.className=g.className.replace(/(^|\s)no-js(\s|$)/,"$1$2")+(h?" js "+$.join(" "):""),p}(this,this.document); -------------------------------------------------------------------------------- /docs/js/theme.js: -------------------------------------------------------------------------------- 1 | $( document ).ready(function() { 2 | // Shift nav in mobile when clicking the menu. 3 | $(document).on('click', "[data-toggle='wy-nav-top']", function() { 4 | $("[data-toggle='wy-nav-shift']").toggleClass("shift"); 5 | $("[data-toggle='rst-versions']").toggleClass("shift"); 6 | }); 7 | 8 | // Close menu when you click a link. 9 | $(document).on('click', ".wy-menu-vertical .current ul li a", function() { 10 | $("[data-toggle='wy-nav-shift']").removeClass("shift"); 11 | $("[data-toggle='rst-versions']").toggleClass("shift"); 12 | }); 13 | 14 | // Keyboard navigation 15 | document.addEventListener("keydown", function(e) { 16 | if ($(e.target).is(':input')) return true; 17 | var key = e.which || e.keyCode || window.event && window.event.keyCode; 18 | var page; 19 | switch (key) { 20 | case 39: // right arrow 21 | page = $('[role="navigation"] a:contains(Next):first').prop('href'); 22 | break; 23 | case 37: // left arrow 24 | page = $('[role="navigation"] a:contains(Previous):first').prop('href'); 25 | break; 26 | default: break; 27 | } 28 | if (page) window.location.href = page; 29 | }); 30 | 31 | $(document).on('click', "[data-toggle='rst-current-version']", function() { 32 | $("[data-toggle='rst-versions']").toggleClass("shift-up"); 33 | }); 34 | 35 | // Make tables responsive 36 | $("table.docutils:not(.field-list)").wrap("
"); 37 | 38 | hljs.initHighlightingOnLoad(); 39 | 40 | $('table').addClass('docutils'); 41 | }); 42 | 43 | window.SphinxRtdTheme = (function (jquery) { 44 | var stickyNav = (function () { 45 | var navBar, 46 | win, 47 | stickyNavCssClass = 'stickynav', 48 | applyStickNav = function () { 49 | if (navBar.height() <= win.height()) { 50 | navBar.addClass(stickyNavCssClass); 51 | } else { 52 | navBar.removeClass(stickyNavCssClass); 53 | } 54 | }, 55 | enable = function () { 56 | applyStickNav(); 57 | win.on('resize', applyStickNav); 58 | }, 59 | init = function () { 60 | navBar = jquery('nav.wy-nav-side:first'); 61 | win = jquery(window); 62 | }; 63 | jquery(init); 64 | return { 65 | enable : enable 66 | }; 67 | }()); 68 | return { 69 | StickyNav : stickyNav 70 | }; 71 | }($)); 72 | 73 | // The code below is a copy of @seanmadsen code posted Jan 10, 2017 on issue 803. 74 | // https://github.com/mkdocs/mkdocs/issues/803 75 | // This just incorporates the auto scroll into the theme itself without 76 | // the need for additional custom.js file. 77 | // 78 | $(function() { 79 | $.fn.isFullyWithinViewport = function(){ 80 | var viewport = {}; 81 | viewport.top = $(window).scrollTop(); 82 | viewport.bottom = viewport.top + $(window).height(); 83 | var bounds = {}; 84 | bounds.top = this.offset().top; 85 | bounds.bottom = bounds.top + this.outerHeight(); 86 | return ( ! ( 87 | (bounds.top <= viewport.top) || 88 | (bounds.bottom >= viewport.bottom) 89 | ) ); 90 | }; 91 | if( $('li.toctree-l1.current').length && !$('li.toctree-l1.current').isFullyWithinViewport() ) { 92 | $('.wy-nav-side') 93 | .scrollTop( 94 | $('li.toctree-l1.current').offset().top - 95 | $('.wy-nav-side').offset().top - 96 | 60 97 | ); 98 | } 99 | }); 100 | -------------------------------------------------------------------------------- /docs/releases/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | Release Notes - scikit-rebate 12 | 13 | 14 | 15 | 16 | 17 | 18 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 | 36 | 110 | 111 |
112 | 113 | 114 | 118 | 119 | 120 |
121 |
122 |
123 |
    124 |
  • Docs »
  • 125 | 126 | 127 | 128 |
  • Release Notes
  • 129 |
  • 130 | 131 | Edit on GitHub 133 | 134 |
  • 135 |
136 |
137 |
138 |
139 |
140 | 141 |

scikit-rebate 0.6

142 |
    143 |
  • 144 |

    Fixed internal TuRF implementation so that it outputs scores for all features. Those that make it to the last iteration get true core algorithm scoring, while those that were removed along the way are assigned token scores (lower than the lowest true scoring feature) that indicate when the respective feature(s) were removed. This also alows for greater flexibility in the user specifying the number for features to return.

    145 |
  • 146 |
  • 147 |

    Updated the usage documentation to demonstrate how to use RFE as well as the newly updated internal TuRF implementation.

    148 |
  • 149 |
  • 150 |

    Fixed the pct paramter of TuRF to properly determine the percent of features removed each iteration as well as the total number of iterations as described in the original TuRF paper. Also managed the edge case to ensure that at least one feature would be removed each TuRF iteration.

    151 |
  • 152 |
  • 153 |

    Fixed ability to parallelize run of core algorithm while using TuRF.

    154 |
  • 155 |
  • 156 |

    Updated the unit testing file to remove some excess unite tests, add other relevant ones, speed up testing overall, and make the testing better organized.

    157 |
  • 158 |
  • 159 |

    Added a preliminary implementation of VLSRelief to scikit-rebate, along with associated unit tests. Documentation and code examples not yet supported.

    160 |
  • 161 |
  • 162 |

    Removed some unused code from TuRF implementation.

    163 |
  • 164 |
  • 165 |

    Added check in the transform method required by scikit-learn in both relieff.py and turf.py to ensure that the number of selected features requested by the user was not larger than the number of features in the dataset.

    166 |
  • 167 |
  • 168 |

    Reduced the default value for number of features selected

    169 |
  • 170 |
171 |

scikit-rebate 0.5

172 |
    173 |
  • 174 |

    Added fixes to score normalizations that should ensure that feature scores for all algorithms fall between -1 and 1.

    175 |
  • 176 |
  • 177 |

    Added multi-class endpoint functionality. (now discriminates between binary and multiclass endpoints) Includes new methods for multi-class score update normalization.

    178 |
  • 179 |
  • 180 |

    Fixed normalization for missing data.

    181 |
  • 182 |
  • 183 |

    Fixed inconsistent pre-normalization for continuous feature data.

    184 |
  • 185 |
  • 186 |

    Added a custom ramp function to improve performance of all algorithms on data with a mix of discrete and continuous features. Based on the standard deviation of a given continuous feature.

    187 |
  • 188 |
  • 189 |

    Updated the implementation of TuRF as an internal custom component of ReBATE.

    190 |
  • 191 |
192 |

scikit-rebate 0.4

193 |
    194 |
  • 195 |

    Added support for multicore processing to all Relief algorithms. Multiprocessing is now also supported in Python 2.

    196 |
  • 197 |
  • 198 |

    The ReliefF algorithm now accepts float values in the range (0, 1.0] for the n_neighbors parameter. Float values will be interpreted as a fraction of the training set sample size.

    199 |
  • 200 |
  • 201 |

    Refined the MultiSURF and MultiSURF* algorithms. From our internal research, MultiSURF is now one of our best-performing feature selection algorithms.

    202 |
  • 203 |
204 |

scikit-rebate 0.3

205 |
    206 |
  • 207 |

    Added a parallelization parameter, n_jobs, to ReliefF, SURF, SURF*, and MultiSURF via joblib.

    208 |
  • 209 |
  • 210 |

    Renamed the dlimit parameter to discrete_limit to better reflect the purpose of the parameter.

    211 |
  • 212 |
  • 213 |

    Minor code optimizations.

    214 |
  • 215 |
216 |

scikit-rebate 0.2

217 |
    218 |
  • 219 |

    Added documentation.

    220 |
  • 221 |
  • 222 |

    Minor code optimizations.

    223 |
  • 224 |
225 |

scikit-rebate 0.1

226 |
    227 |
  • Initial release of Relief algorithms, including ReliefF, SURF, SURF*, and MultiSURF.
  • 228 |
229 | 230 |
231 |
232 | 255 | 256 |
257 |
258 | 259 |
260 | 261 |
262 | 263 |
264 | 265 | 266 | GitHub 267 | 268 | 269 | « Previous 270 | 271 | 272 | Next » 273 | 274 | 275 |
276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | -------------------------------------------------------------------------------- /docs/search.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | scikit-rebate 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 |
27 | 28 | 29 | 82 | 83 |
84 | 85 | 86 | 90 | 91 | 92 |
93 |
94 |
95 |
    96 |
  • Docs »
  • 97 | 98 | 99 |
  • 100 | 101 |
  • 102 |
103 |
104 |
105 |
106 |
107 | 108 | 109 |

Search Results

110 | 111 | 115 | 116 |
117 | Searching... 118 |
119 | 120 | 121 |
122 |
123 | 137 | 138 |
139 |
140 | 141 |
142 | 143 |
144 | 145 |
146 | 147 | 148 | GitHub 149 | 150 | 151 | 152 | 153 |
154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | -------------------------------------------------------------------------------- /docs/search/lunr.min.js: -------------------------------------------------------------------------------- 1 | /** 2 | * lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - 0.7.0 3 | * Copyright (C) 2016 Oliver Nightingale 4 | * MIT Licensed 5 | * @license 6 | */ 7 | !function(){var t=function(e){var n=new t.Index;return n.pipeline.add(t.trimmer,t.stopWordFilter,t.stemmer),e&&e.call(n,n),n};t.version="0.7.0",t.utils={},t.utils.warn=function(t){return function(e){t.console&&console.warn&&console.warn(e)}}(this),t.utils.asString=function(t){return void 0===t||null===t?"":t.toString()},t.EventEmitter=function(){this.events={}},t.EventEmitter.prototype.addListener=function(){var t=Array.prototype.slice.call(arguments),e=t.pop(),n=t;if("function"!=typeof e)throw new TypeError("last argument must be a function");n.forEach(function(t){this.hasHandler(t)||(this.events[t]=[]),this.events[t].push(e)},this)},t.EventEmitter.prototype.removeListener=function(t,e){if(this.hasHandler(t)){var n=this.events[t].indexOf(e);this.events[t].splice(n,1),this.events[t].length||delete this.events[t]}},t.EventEmitter.prototype.emit=function(t){if(this.hasHandler(t)){var e=Array.prototype.slice.call(arguments,1);this.events[t].forEach(function(t){t.apply(void 0,e)})}},t.EventEmitter.prototype.hasHandler=function(t){return t in this.events},t.tokenizer=function(e){return arguments.length&&null!=e&&void 0!=e?Array.isArray(e)?e.map(function(e){return t.utils.asString(e).toLowerCase()}):e.toString().trim().toLowerCase().split(t.tokenizer.seperator):[]},t.tokenizer.seperator=/[\s\-]+/,t.tokenizer.load=function(t){var e=this.registeredFunctions[t];if(!e)throw new Error("Cannot load un-registered function: "+t);return e},t.tokenizer.label="default",t.tokenizer.registeredFunctions={"default":t.tokenizer},t.tokenizer.registerFunction=function(e,n){n in this.registeredFunctions&&t.utils.warn("Overwriting existing tokenizer: "+n),e.label=n,this.registeredFunctions[n]=e},t.Pipeline=function(){this._stack=[]},t.Pipeline.registeredFunctions={},t.Pipeline.registerFunction=function(e,n){n in this.registeredFunctions&&t.utils.warn("Overwriting existing registered function: "+n),e.label=n,t.Pipeline.registeredFunctions[e.label]=e},t.Pipeline.warnIfFunctionNotRegistered=function(e){var n=e.label&&e.label in this.registeredFunctions;n||t.utils.warn("Function is not registered with pipeline. This may cause problems when serialising the index.\n",e)},t.Pipeline.load=function(e){var n=new t.Pipeline;return e.forEach(function(e){var i=t.Pipeline.registeredFunctions[e];if(!i)throw new Error("Cannot load un-registered function: "+e);n.add(i)}),n},t.Pipeline.prototype.add=function(){var e=Array.prototype.slice.call(arguments);e.forEach(function(e){t.Pipeline.warnIfFunctionNotRegistered(e),this._stack.push(e)},this)},t.Pipeline.prototype.after=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._stack.indexOf(e);if(-1==i)throw new Error("Cannot find existingFn");i+=1,this._stack.splice(i,0,n)},t.Pipeline.prototype.before=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._stack.indexOf(e);if(-1==i)throw new Error("Cannot find existingFn");this._stack.splice(i,0,n)},t.Pipeline.prototype.remove=function(t){var e=this._stack.indexOf(t);-1!=e&&this._stack.splice(e,1)},t.Pipeline.prototype.run=function(t){for(var e=[],n=t.length,i=this._stack.length,r=0;n>r;r++){for(var o=t[r],s=0;i>s&&(o=this._stack[s](o,r,t),void 0!==o&&""!==o);s++);void 0!==o&&""!==o&&e.push(o)}return e},t.Pipeline.prototype.reset=function(){this._stack=[]},t.Pipeline.prototype.toJSON=function(){return this._stack.map(function(e){return t.Pipeline.warnIfFunctionNotRegistered(e),e.label})},t.Vector=function(){this._magnitude=null,this.list=void 0,this.length=0},t.Vector.Node=function(t,e,n){this.idx=t,this.val=e,this.next=n},t.Vector.prototype.insert=function(e,n){this._magnitude=void 0;var i=this.list;if(!i)return this.list=new t.Vector.Node(e,n,i),this.length++;if(en.idx?n=n.next:(i+=e.val*n.val,e=e.next,n=n.next);return i},t.Vector.prototype.similarity=function(t){return this.dot(t)/(this.magnitude()*t.magnitude())},t.SortedSet=function(){this.length=0,this.elements=[]},t.SortedSet.load=function(t){var e=new this;return e.elements=t,e.length=t.length,e},t.SortedSet.prototype.add=function(){var t,e;for(t=0;t1;){if(o===t)return r;t>o&&(e=r),o>t&&(n=r),i=n-e,r=e+Math.floor(i/2),o=this.elements[r]}return o===t?r:-1},t.SortedSet.prototype.locationFor=function(t){for(var e=0,n=this.elements.length,i=n-e,r=e+Math.floor(i/2),o=this.elements[r];i>1;)t>o&&(e=r),o>t&&(n=r),i=n-e,r=e+Math.floor(i/2),o=this.elements[r];return o>t?r:t>o?r+1:void 0},t.SortedSet.prototype.intersect=function(e){for(var n=new t.SortedSet,i=0,r=0,o=this.length,s=e.length,a=this.elements,h=e.elements;;){if(i>o-1||r>s-1)break;a[i]!==h[r]?a[i]h[r]&&r++:(n.add(a[i]),i++,r++)}return n},t.SortedSet.prototype.clone=function(){var e=new t.SortedSet;return e.elements=this.toArray(),e.length=e.elements.length,e},t.SortedSet.prototype.union=function(t){var e,n,i;this.length>=t.length?(e=this,n=t):(e=t,n=this),i=e.clone();for(var r=0,o=n.toArray();rp;p++)c[p]===a&&d++;h+=d/f*l.boost}}this.tokenStore.add(a,{ref:o,tf:h})}n&&this.eventEmitter.emit("add",e,this)},t.Index.prototype.remove=function(t,e){var n=t[this._ref],e=void 0===e?!0:e;if(this.documentStore.has(n)){var i=this.documentStore.get(n);this.documentStore.remove(n),i.forEach(function(t){this.tokenStore.remove(t,n)},this),e&&this.eventEmitter.emit("remove",t,this)}},t.Index.prototype.update=function(t,e){var e=void 0===e?!0:e;this.remove(t,!1),this.add(t,!1),e&&this.eventEmitter.emit("update",t,this)},t.Index.prototype.idf=function(t){var e="@"+t;if(Object.prototype.hasOwnProperty.call(this._idfCache,e))return this._idfCache[e];var n=this.tokenStore.count(t),i=1;return n>0&&(i=1+Math.log(this.documentStore.length/n)),this._idfCache[e]=i},t.Index.prototype.search=function(e){var n=this.pipeline.run(this.tokenizerFn(e)),i=new t.Vector,r=[],o=this._fields.reduce(function(t,e){return t+e.boost},0),s=n.some(function(t){return this.tokenStore.has(t)},this);if(!s)return[];n.forEach(function(e,n,s){var a=1/s.length*this._fields.length*o,h=this,u=this.tokenStore.expand(e).reduce(function(n,r){var o=h.corpusTokens.indexOf(r),s=h.idf(r),u=1,l=new t.SortedSet;if(r!==e){var c=Math.max(3,r.length-e.length);u=1/Math.log(c)}o>-1&&i.insert(o,a*s*u);for(var f=h.tokenStore.get(r),d=Object.keys(f),p=d.length,v=0;p>v;v++)l.add(f[d[v]].ref);return n.union(l)},new t.SortedSet);r.push(u)},this);var a=r.reduce(function(t,e){return t.intersect(e)});return a.map(function(t){return{ref:t,score:i.similarity(this.documentVector(t))}},this).sort(function(t,e){return e.score-t.score})},t.Index.prototype.documentVector=function(e){for(var n=this.documentStore.get(e),i=n.length,r=new t.Vector,o=0;i>o;o++){var s=n.elements[o],a=this.tokenStore.get(s)[e].tf,h=this.idf(s);r.insert(this.corpusTokens.indexOf(s),a*h)}return r},t.Index.prototype.toJSON=function(){return{version:t.version,fields:this._fields,ref:this._ref,tokenizer:this.tokenizerFn.label,documentStore:this.documentStore.toJSON(),tokenStore:this.tokenStore.toJSON(),corpusTokens:this.corpusTokens.toJSON(),pipeline:this.pipeline.toJSON()}},t.Index.prototype.use=function(t){var e=Array.prototype.slice.call(arguments,1);e.unshift(this),t.apply(this,e)},t.Store=function(){this.store={},this.length=0},t.Store.load=function(e){var n=new this;return n.length=e.length,n.store=Object.keys(e.store).reduce(function(n,i){return n[i]=t.SortedSet.load(e.store[i]),n},{}),n},t.Store.prototype.set=function(t,e){this.has(t)||this.length++,this.store[t]=e},t.Store.prototype.get=function(t){return this.store[t]},t.Store.prototype.has=function(t){return t in this.store},t.Store.prototype.remove=function(t){this.has(t)&&(delete this.store[t],this.length--)},t.Store.prototype.toJSON=function(){return{store:this.store,length:this.length}},t.stemmer=function(){var t={ational:"ate",tional:"tion",enci:"ence",anci:"ance",izer:"ize",bli:"ble",alli:"al",entli:"ent",eli:"e",ousli:"ous",ization:"ize",ation:"ate",ator:"ate",alism:"al",iveness:"ive",fulness:"ful",ousness:"ous",aliti:"al",iviti:"ive",biliti:"ble",logi:"log"},e={icate:"ic",ative:"",alize:"al",iciti:"ic",ical:"ic",ful:"",ness:""},n="[^aeiou]",i="[aeiouy]",r=n+"[^aeiouy]*",o=i+"[aeiou]*",s="^("+r+")?"+o+r,a="^("+r+")?"+o+r+"("+o+")?$",h="^("+r+")?"+o+r+o+r,u="^("+r+")?"+i,l=new RegExp(s),c=new RegExp(h),f=new RegExp(a),d=new RegExp(u),p=/^(.+?)(ss|i)es$/,v=/^(.+?)([^s])s$/,g=/^(.+?)eed$/,m=/^(.+?)(ed|ing)$/,y=/.$/,S=/(at|bl|iz)$/,w=new RegExp("([^aeiouylsz])\\1$"),k=new RegExp("^"+r+i+"[^aeiouwxy]$"),x=/^(.+?[^aeiou])y$/,b=/^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/,E=/^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/,F=/^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/,_=/^(.+?)(s|t)(ion)$/,z=/^(.+?)e$/,O=/ll$/,P=new RegExp("^"+r+i+"[^aeiouwxy]$"),T=function(n){var i,r,o,s,a,h,u;if(n.length<3)return n;if(o=n.substr(0,1),"y"==o&&(n=o.toUpperCase()+n.substr(1)),s=p,a=v,s.test(n)?n=n.replace(s,"$1$2"):a.test(n)&&(n=n.replace(a,"$1$2")),s=g,a=m,s.test(n)){var T=s.exec(n);s=l,s.test(T[1])&&(s=y,n=n.replace(s,""))}else if(a.test(n)){var T=a.exec(n);i=T[1],a=d,a.test(i)&&(n=i,a=S,h=w,u=k,a.test(n)?n+="e":h.test(n)?(s=y,n=n.replace(s,"")):u.test(n)&&(n+="e"))}if(s=x,s.test(n)){var T=s.exec(n);i=T[1],n=i+"i"}if(s=b,s.test(n)){var T=s.exec(n);i=T[1],r=T[2],s=l,s.test(i)&&(n=i+t[r])}if(s=E,s.test(n)){var T=s.exec(n);i=T[1],r=T[2],s=l,s.test(i)&&(n=i+e[r])}if(s=F,a=_,s.test(n)){var T=s.exec(n);i=T[1],s=c,s.test(i)&&(n=i)}else if(a.test(n)){var T=a.exec(n);i=T[1]+T[2],a=c,a.test(i)&&(n=i)}if(s=z,s.test(n)){var T=s.exec(n);i=T[1],s=c,a=f,h=P,(s.test(i)||a.test(i)&&!h.test(i))&&(n=i)}return s=O,a=c,s.test(n)&&a.test(n)&&(s=y,n=n.replace(s,"")),"y"==o&&(n=o.toLowerCase()+n.substr(1)),n};return T}(),t.Pipeline.registerFunction(t.stemmer,"stemmer"),t.generateStopWordFilter=function(t){var e=t.reduce(function(t,e){return t[e]=e,t},{});return function(t){return t&&e[t]!==t?t:void 0}},t.stopWordFilter=t.generateStopWordFilter(["a","able","about","across","after","all","almost","also","am","among","an","and","any","are","as","at","be","because","been","but","by","can","cannot","could","dear","did","do","does","either","else","ever","every","for","from","get","got","had","has","have","he","her","hers","him","his","how","however","i","if","in","into","is","it","its","just","least","let","like","likely","may","me","might","most","must","my","neither","no","nor","not","of","off","often","on","only","or","other","our","own","rather","said","say","says","she","should","since","so","some","than","that","the","their","them","then","there","these","they","this","tis","to","too","twas","us","wants","was","we","were","what","when","where","which","while","who","whom","why","will","with","would","yet","you","your"]),t.Pipeline.registerFunction(t.stopWordFilter,"stopWordFilter"),t.trimmer=function(t){return t.replace(/^\W+/,"").replace(/\W+$/,"")},t.Pipeline.registerFunction(t.trimmer,"trimmer"),t.TokenStore=function(){this.root={docs:{}},this.length=0},t.TokenStore.load=function(t){var e=new this;return e.root=t.root,e.length=t.length,e},t.TokenStore.prototype.add=function(t,e,n){var n=n||this.root,i=t.charAt(0),r=t.slice(1);return i in n||(n[i]={docs:{}}),0===r.length?(n[i].docs[e.ref]=e,void(this.length+=1)):this.add(r,e,n[i])},t.TokenStore.prototype.has=function(t){if(!t)return!1;for(var e=this.root,n=0;n":">",'"':""","'":"'","/":"/"};function escapeHtml(string){return String(string).replace(/[&<>"'\/]/g,function(s){return entityMap[s]})}var whiteRe=/\s*/;var spaceRe=/\s+/;var equalsRe=/\s*=/;var curlyRe=/\s*\}/;var tagRe=/#|\^|\/|>|\{|&|=|!/;function parseTemplate(template,tags){if(!template)return[];var sections=[];var tokens=[];var spaces=[];var hasTag=false;var nonSpace=false;function stripSpace(){if(hasTag&&!nonSpace){while(spaces.length)delete tokens[spaces.pop()]}else{spaces=[]}hasTag=false;nonSpace=false}var openingTagRe,closingTagRe,closingCurlyRe;function compileTags(tags){if(typeof tags==="string")tags=tags.split(spaceRe,2);if(!isArray(tags)||tags.length!==2)throw new Error("Invalid tags: "+tags);openingTagRe=new RegExp(escapeRegExp(tags[0])+"\\s*");closingTagRe=new RegExp("\\s*"+escapeRegExp(tags[1]));closingCurlyRe=new RegExp("\\s*"+escapeRegExp("}"+tags[1]))}compileTags(tags||mustache.tags);var scanner=new Scanner(template);var start,type,value,chr,token,openSection;while(!scanner.eos()){start=scanner.pos;value=scanner.scanUntil(openingTagRe);if(value){for(var i=0,valueLength=value.length;i0?sections[sections.length-1][4]:nestedTokens;break;default:collector.push(token)}}return nestedTokens}function Scanner(string){this.string=string;this.tail=string;this.pos=0}Scanner.prototype.eos=function(){return this.tail===""};Scanner.prototype.scan=function(re){var match=this.tail.match(re);if(!match||match.index!==0)return"";var string=match[0];this.tail=this.tail.substring(string.length);this.pos+=string.length;return string};Scanner.prototype.scanUntil=function(re){var index=this.tail.search(re),match;switch(index){case-1:match=this.tail;this.tail="";break;case 0:match="";break;default:match=this.tail.substring(0,index);this.tail=this.tail.substring(index)}this.pos+=match.length;return match};function Context(view,parentContext){this.view=view;this.cache={".":this.view};this.parent=parentContext}Context.prototype.push=function(view){return new Context(view,this)};Context.prototype.lookup=function(name){var cache=this.cache;var value;if(name in cache){value=cache[name]}else{var context=this,names,index,lookupHit=false;while(context){if(name.indexOf(".")>0){value=context.view;names=name.split(".");index=0;while(value!=null&&index")value=this._renderPartial(token,context,partials,originalTemplate);else if(symbol==="&")value=this._unescapedValue(token,context);else if(symbol==="name")value=this._escapedValue(token,context);else if(symbol==="text")value=this._rawValue(token);if(value!==undefined)buffer+=value}return buffer};Writer.prototype._renderSection=function(token,context,partials,originalTemplate){var self=this;var buffer="";var value=context.lookup(token[1]);function subRender(template){return self.render(template,context,partials)}if(!value)return;if(isArray(value)){for(var j=0,valueLength=value.length;jthis.depCount&&!this.defined){if(G(l)){if(this.events.error&&this.map.isDefine||g.onError!==ca)try{f=i.execCb(c,l,b,f)}catch(d){a=d}else f=i.execCb(c,l,b,f);this.map.isDefine&&void 0===f&&((b=this.module)?f=b.exports:this.usingExports&& 19 | (f=this.exports));if(a)return a.requireMap=this.map,a.requireModules=this.map.isDefine?[this.map.id]:null,a.requireType=this.map.isDefine?"define":"require",w(this.error=a)}else f=l;this.exports=f;if(this.map.isDefine&&!this.ignore&&(r[c]=f,g.onResourceLoad))g.onResourceLoad(i,this.map,this.depMaps);y(c);this.defined=!0}this.defining=!1;this.defined&&!this.defineEmitted&&(this.defineEmitted=!0,this.emit("defined",this.exports),this.defineEmitComplete=!0)}}else this.fetch()}},callPlugin:function(){var a= 20 | this.map,b=a.id,d=p(a.prefix);this.depMaps.push(d);q(d,"defined",u(this,function(f){var l,d;d=m(aa,this.map.id);var e=this.map.name,P=this.map.parentMap?this.map.parentMap.name:null,n=i.makeRequire(a.parentMap,{enableBuildCallback:!0});if(this.map.unnormalized){if(f.normalize&&(e=f.normalize(e,function(a){return c(a,P,!0)})||""),f=p(a.prefix+"!"+e,this.map.parentMap),q(f,"defined",u(this,function(a){this.init([],function(){return a},null,{enabled:!0,ignore:!0})})),d=m(h,f.id)){this.depMaps.push(f); 21 | if(this.events.error)d.on("error",u(this,function(a){this.emit("error",a)}));d.enable()}}else d?(this.map.url=i.nameToUrl(d),this.load()):(l=u(this,function(a){this.init([],function(){return a},null,{enabled:!0})}),l.error=u(this,function(a){this.inited=!0;this.error=a;a.requireModules=[b];B(h,function(a){0===a.map.id.indexOf(b+"_unnormalized")&&y(a.map.id)});w(a)}),l.fromText=u(this,function(f,c){var d=a.name,e=p(d),P=M;c&&(f=c);P&&(M=!1);s(e);t(j.config,b)&&(j.config[d]=j.config[b]);try{g.exec(f)}catch(h){return w(C("fromtexteval", 22 | "fromText eval for "+b+" failed: "+h,h,[b]))}P&&(M=!0);this.depMaps.push(e);i.completeLoad(d);n([d],l)}),f.load(a.name,n,l,j))}));i.enable(d,this);this.pluginMaps[d.id]=d},enable:function(){V[this.map.id]=this;this.enabling=this.enabled=!0;v(this.depMaps,u(this,function(a,b){var c,f;if("string"===typeof a){a=p(a,this.map.isDefine?this.map:this.map.parentMap,!1,!this.skipMap);this.depMaps[b]=a;if(c=m(L,a.id)){this.depExports[b]=c(this);return}this.depCount+=1;q(a,"defined",u(this,function(a){this.defineDep(b, 23 | a);this.check()}));this.errback?q(a,"error",u(this,this.errback)):this.events.error&&q(a,"error",u(this,function(a){this.emit("error",a)}))}c=a.id;f=h[c];!t(L,c)&&(f&&!f.enabled)&&i.enable(a,this)}));B(this.pluginMaps,u(this,function(a){var b=m(h,a.id);b&&!b.enabled&&i.enable(a,this)}));this.enabling=!1;this.check()},on:function(a,b){var c=this.events[a];c||(c=this.events[a]=[]);c.push(b)},emit:function(a,b){v(this.events[a],function(a){a(b)});"error"===a&&delete this.events[a]}};i={config:j,contextName:b, 24 | registry:h,defined:r,urlFetched:S,defQueue:A,Module:Z,makeModuleMap:p,nextTick:g.nextTick,onError:w,configure:function(a){a.baseUrl&&"/"!==a.baseUrl.charAt(a.baseUrl.length-1)&&(a.baseUrl+="/");var b=j.shim,c={paths:!0,bundles:!0,config:!0,map:!0};B(a,function(a,b){c[b]?(j[b]||(j[b]={}),U(j[b],a,!0,!0)):j[b]=a});a.bundles&&B(a.bundles,function(a,b){v(a,function(a){a!==b&&(aa[a]=b)})});a.shim&&(B(a.shim,function(a,c){H(a)&&(a={deps:a});if((a.exports||a.init)&&!a.exportsFn)a.exportsFn=i.makeShimExports(a); 25 | b[c]=a}),j.shim=b);a.packages&&v(a.packages,function(a){var b,a="string"===typeof a?{name:a}:a;b=a.name;a.location&&(j.paths[b]=a.location);j.pkgs[b]=a.name+"/"+(a.main||"main").replace(ia,"").replace(Q,"")});B(h,function(a,b){!a.inited&&!a.map.unnormalized&&(a.map=p(b))});if(a.deps||a.callback)i.require(a.deps||[],a.callback)},makeShimExports:function(a){return function(){var b;a.init&&(b=a.init.apply(ba,arguments));return b||a.exports&&da(a.exports)}},makeRequire:function(a,e){function j(c,d,m){var n, 26 | q;e.enableBuildCallback&&(d&&G(d))&&(d.__requireJsBuild=!0);if("string"===typeof c){if(G(d))return w(C("requireargs","Invalid require call"),m);if(a&&t(L,c))return L[c](h[a.id]);if(g.get)return g.get(i,c,a,j);n=p(c,a,!1,!0);n=n.id;return!t(r,n)?w(C("notloaded",'Module name "'+n+'" has not been loaded yet for context: '+b+(a?"":". Use require([])"))):r[n]}J();i.nextTick(function(){J();q=s(p(null,a));q.skipMap=e.skipMap;q.init(c,d,m,{enabled:!0});D()});return j}e=e||{};U(j,{isBrowser:z,toUrl:function(b){var d, 27 | e=b.lastIndexOf("."),k=b.split("/")[0];if(-1!==e&&(!("."===k||".."===k)||1e.attachEvent.toString().indexOf("[native code"))&& 34 | !Y?(M=!0,e.attachEvent("onreadystatechange",b.onScriptLoad)):(e.addEventListener("load",b.onScriptLoad,!1),e.addEventListener("error",b.onScriptError,!1)),e.src=d,J=e,D?y.insertBefore(e,D):y.appendChild(e),J=null,e;if(ea)try{importScripts(d),b.completeLoad(c)}catch(m){b.onError(C("importscripts","importScripts failed for "+c+" at "+d,m,[c]))}};z&&!q.skipDataMain&&T(document.getElementsByTagName("script"),function(b){y||(y=b.parentNode);if(I=b.getAttribute("data-main"))return s=I,q.baseUrl||(E=s.split("/"), 35 | s=E.pop(),O=E.length?E.join("/")+"/":"./",q.baseUrl=O),s=s.replace(Q,""),g.jsExtRegExp.test(s)&&(s=I),q.deps=q.deps?q.deps.concat(s):[s],!0});define=function(b,c,d){var e,g;"string"!==typeof b&&(d=c,c=b,b=null);H(c)||(d=c,c=null);!c&&G(d)&&(c=[],d.length&&(d.toString().replace(ka,"").replace(la,function(b,d){c.push(d)}),c=(1===d.length?["require"]:["require","exports","module"]).concat(c)));if(M){if(!(e=J))N&&"interactive"===N.readyState||T(document.getElementsByTagName("script"),function(b){if("interactive"=== 36 | b.readyState)return N=b}),e=N;e&&(b||(b=e.getAttribute("data-requiremodule")),g=F[e.getAttribute("data-requirecontext")])}(g?g.defQueue:R).push([b,c,d])};define.amd={jQuery:!0};g.exec=function(b){return eval(b)};g(q)}})(this); 37 | -------------------------------------------------------------------------------- /docs/search/search-results-template.mustache: -------------------------------------------------------------------------------- 1 | 5 | -------------------------------------------------------------------------------- /docs/search/search.js: -------------------------------------------------------------------------------- 1 | require.config({ 2 | baseUrl: base_url + "/search/" 3 | }); 4 | 5 | require([ 6 | 'mustache.min', 7 | 'lunr.min', 8 | 'text!search-results-template.mustache', 9 | 'text!search_index.json', 10 | ], function (Mustache, lunr, results_template, data) { 11 | "use strict"; 12 | 13 | function getSearchTerm() 14 | { 15 | var sPageURL = window.location.search.substring(1); 16 | var sURLVariables = sPageURL.split('&'); 17 | for (var i = 0; i < sURLVariables.length; i++) 18 | { 19 | var sParameterName = sURLVariables[i].split('='); 20 | if (sParameterName[0] == 'q') 21 | { 22 | return decodeURIComponent(sParameterName[1].replace(/\+/g, '%20')); 23 | } 24 | } 25 | } 26 | 27 | var index = lunr(function () { 28 | this.field('title', {boost: 10}); 29 | this.field('text'); 30 | this.ref('location'); 31 | }); 32 | 33 | data = JSON.parse(data); 34 | var documents = {}; 35 | 36 | for (var i=0; i < data.docs.length; i++){ 37 | var doc = data.docs[i]; 38 | doc.location = base_url + doc.location; 39 | index.add(doc); 40 | documents[doc.location] = doc; 41 | } 42 | 43 | var search = function(){ 44 | 45 | var query = document.getElementById('mkdocs-search-query').value; 46 | var search_results = document.getElementById("mkdocs-search-results"); 47 | while (search_results.firstChild) { 48 | search_results.removeChild(search_results.firstChild); 49 | } 50 | 51 | if(query === ''){ 52 | return; 53 | } 54 | 55 | var results = index.search(query); 56 | 57 | if (results.length > 0){ 58 | for (var i=0; i < results.length; i++){ 59 | var result = results[i]; 60 | doc = documents[result.ref]; 61 | doc.base_url = base_url; 62 | doc.summary = doc.text.substring(0, 200); 63 | var html = Mustache.to_html(results_template, doc); 64 | search_results.insertAdjacentHTML('beforeend', html); 65 | } 66 | } else { 67 | search_results.insertAdjacentHTML('beforeend', "

No results found

"); 68 | } 69 | 70 | if(jQuery){ 71 | /* 72 | * We currently only automatically hide bootstrap models. This 73 | * requires jQuery to work. 74 | */ 75 | jQuery('#mkdocs_search_modal a').click(function(){ 76 | jQuery('#mkdocs_search_modal').modal('hide'); 77 | }); 78 | } 79 | 80 | }; 81 | 82 | var search_input = document.getElementById('mkdocs-search-query'); 83 | 84 | var term = getSearchTerm(); 85 | if (term){ 86 | search_input.value = term; 87 | search(); 88 | } 89 | 90 | if (search_input){search_input.addEventListener("keyup", search);} 91 | 92 | }); 93 | -------------------------------------------------------------------------------- /docs/search/text.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @license RequireJS text 2.0.12 Copyright (c) 2010-2014, The Dojo Foundation All Rights Reserved. 3 | * Available via the MIT or new BSD license. 4 | * see: http://github.com/requirejs/text for details 5 | */ 6 | /*jslint regexp: true */ 7 | /*global require, XMLHttpRequest, ActiveXObject, 8 | define, window, process, Packages, 9 | java, location, Components, FileUtils */ 10 | 11 | define(['module'], function (module) { 12 | 'use strict'; 13 | 14 | var text, fs, Cc, Ci, xpcIsWindows, 15 | progIds = ['Msxml2.XMLHTTP', 'Microsoft.XMLHTTP', 'Msxml2.XMLHTTP.4.0'], 16 | xmlRegExp = /^\s*<\?xml(\s)+version=[\'\"](\d)*.(\d)*[\'\"](\s)*\?>/im, 17 | bodyRegExp = /]*>\s*([\s\S]+)\s*<\/body>/im, 18 | hasLocation = typeof location !== 'undefined' && location.href, 19 | defaultProtocol = hasLocation && location.protocol && location.protocol.replace(/\:/, ''), 20 | defaultHostName = hasLocation && location.hostname, 21 | defaultPort = hasLocation && (location.port || undefined), 22 | buildMap = {}, 23 | masterConfig = (module.config && module.config()) || {}; 24 | 25 | text = { 26 | version: '2.0.12', 27 | 28 | strip: function (content) { 29 | //Strips declarations so that external SVG and XML 30 | //documents can be added to a document without worry. Also, if the string 31 | //is an HTML document, only the part inside the body tag is returned. 32 | if (content) { 33 | content = content.replace(xmlRegExp, ""); 34 | var matches = content.match(bodyRegExp); 35 | if (matches) { 36 | content = matches[1]; 37 | } 38 | } else { 39 | content = ""; 40 | } 41 | return content; 42 | }, 43 | 44 | jsEscape: function (content) { 45 | return content.replace(/(['\\])/g, '\\$1') 46 | .replace(/[\f]/g, "\\f") 47 | .replace(/[\b]/g, "\\b") 48 | .replace(/[\n]/g, "\\n") 49 | .replace(/[\t]/g, "\\t") 50 | .replace(/[\r]/g, "\\r") 51 | .replace(/[\u2028]/g, "\\u2028") 52 | .replace(/[\u2029]/g, "\\u2029"); 53 | }, 54 | 55 | createXhr: masterConfig.createXhr || function () { 56 | //Would love to dump the ActiveX crap in here. Need IE 6 to die first. 57 | var xhr, i, progId; 58 | if (typeof XMLHttpRequest !== "undefined") { 59 | return new XMLHttpRequest(); 60 | } else if (typeof ActiveXObject !== "undefined") { 61 | for (i = 0; i < 3; i += 1) { 62 | progId = progIds[i]; 63 | try { 64 | xhr = new ActiveXObject(progId); 65 | } catch (e) {} 66 | 67 | if (xhr) { 68 | progIds = [progId]; // so faster next time 69 | break; 70 | } 71 | } 72 | } 73 | 74 | return xhr; 75 | }, 76 | 77 | /** 78 | * Parses a resource name into its component parts. Resource names 79 | * look like: module/name.ext!strip, where the !strip part is 80 | * optional. 81 | * @param {String} name the resource name 82 | * @returns {Object} with properties "moduleName", "ext" and "strip" 83 | * where strip is a boolean. 84 | */ 85 | parseName: function (name) { 86 | var modName, ext, temp, 87 | strip = false, 88 | index = name.indexOf("."), 89 | isRelative = name.indexOf('./') === 0 || 90 | name.indexOf('../') === 0; 91 | 92 | if (index !== -1 && (!isRelative || index > 1)) { 93 | modName = name.substring(0, index); 94 | ext = name.substring(index + 1, name.length); 95 | } else { 96 | modName = name; 97 | } 98 | 99 | temp = ext || modName; 100 | index = temp.indexOf("!"); 101 | if (index !== -1) { 102 | //Pull off the strip arg. 103 | strip = temp.substring(index + 1) === "strip"; 104 | temp = temp.substring(0, index); 105 | if (ext) { 106 | ext = temp; 107 | } else { 108 | modName = temp; 109 | } 110 | } 111 | 112 | return { 113 | moduleName: modName, 114 | ext: ext, 115 | strip: strip 116 | }; 117 | }, 118 | 119 | xdRegExp: /^((\w+)\:)?\/\/([^\/\\]+)/, 120 | 121 | /** 122 | * Is an URL on another domain. Only works for browser use, returns 123 | * false in non-browser environments. Only used to know if an 124 | * optimized .js version of a text resource should be loaded 125 | * instead. 126 | * @param {String} url 127 | * @returns Boolean 128 | */ 129 | useXhr: function (url, protocol, hostname, port) { 130 | var uProtocol, uHostName, uPort, 131 | match = text.xdRegExp.exec(url); 132 | if (!match) { 133 | return true; 134 | } 135 | uProtocol = match[2]; 136 | uHostName = match[3]; 137 | 138 | uHostName = uHostName.split(':'); 139 | uPort = uHostName[1]; 140 | uHostName = uHostName[0]; 141 | 142 | return (!uProtocol || uProtocol === protocol) && 143 | (!uHostName || uHostName.toLowerCase() === hostname.toLowerCase()) && 144 | ((!uPort && !uHostName) || uPort === port); 145 | }, 146 | 147 | finishLoad: function (name, strip, content, onLoad) { 148 | content = strip ? text.strip(content) : content; 149 | if (masterConfig.isBuild) { 150 | buildMap[name] = content; 151 | } 152 | onLoad(content); 153 | }, 154 | 155 | load: function (name, req, onLoad, config) { 156 | //Name has format: some.module.filext!strip 157 | //The strip part is optional. 158 | //if strip is present, then that means only get the string contents 159 | //inside a body tag in an HTML string. For XML/SVG content it means 160 | //removing the declarations so the content can be inserted 161 | //into the current doc without problems. 162 | 163 | // Do not bother with the work if a build and text will 164 | // not be inlined. 165 | if (config && config.isBuild && !config.inlineText) { 166 | onLoad(); 167 | return; 168 | } 169 | 170 | masterConfig.isBuild = config && config.isBuild; 171 | 172 | var parsed = text.parseName(name), 173 | nonStripName = parsed.moduleName + 174 | (parsed.ext ? '.' + parsed.ext : ''), 175 | url = req.toUrl(nonStripName), 176 | useXhr = (masterConfig.useXhr) || 177 | text.useXhr; 178 | 179 | // Do not load if it is an empty: url 180 | if (url.indexOf('empty:') === 0) { 181 | onLoad(); 182 | return; 183 | } 184 | 185 | //Load the text. Use XHR if possible and in a browser. 186 | if (!hasLocation || useXhr(url, defaultProtocol, defaultHostName, defaultPort)) { 187 | text.get(url, function (content) { 188 | text.finishLoad(name, parsed.strip, content, onLoad); 189 | }, function (err) { 190 | if (onLoad.error) { 191 | onLoad.error(err); 192 | } 193 | }); 194 | } else { 195 | //Need to fetch the resource across domains. Assume 196 | //the resource has been optimized into a JS module. Fetch 197 | //by the module name + extension, but do not include the 198 | //!strip part to avoid file system issues. 199 | req([nonStripName], function (content) { 200 | text.finishLoad(parsed.moduleName + '.' + parsed.ext, 201 | parsed.strip, content, onLoad); 202 | }); 203 | } 204 | }, 205 | 206 | write: function (pluginName, moduleName, write, config) { 207 | if (buildMap.hasOwnProperty(moduleName)) { 208 | var content = text.jsEscape(buildMap[moduleName]); 209 | write.asModule(pluginName + "!" + moduleName, 210 | "define(function () { return '" + 211 | content + 212 | "';});\n"); 213 | } 214 | }, 215 | 216 | writeFile: function (pluginName, moduleName, req, write, config) { 217 | var parsed = text.parseName(moduleName), 218 | extPart = parsed.ext ? '.' + parsed.ext : '', 219 | nonStripName = parsed.moduleName + extPart, 220 | //Use a '.js' file name so that it indicates it is a 221 | //script that can be loaded across domains. 222 | fileName = req.toUrl(parsed.moduleName + extPart) + '.js'; 223 | 224 | //Leverage own load() method to load plugin value, but only 225 | //write out values that do not have the strip argument, 226 | //to avoid any potential issues with ! in file names. 227 | text.load(nonStripName, req, function (value) { 228 | //Use own write() method to construct full module value. 229 | //But need to create shell that translates writeFile's 230 | //write() to the right interface. 231 | var textWrite = function (contents) { 232 | return write(fileName, contents); 233 | }; 234 | textWrite.asModule = function (moduleName, contents) { 235 | return write.asModule(moduleName, fileName, contents); 236 | }; 237 | 238 | text.write(pluginName, nonStripName, textWrite, config); 239 | }, config); 240 | } 241 | }; 242 | 243 | if (masterConfig.env === 'node' || (!masterConfig.env && 244 | typeof process !== "undefined" && 245 | process.versions && 246 | !!process.versions.node && 247 | !process.versions['node-webkit'])) { 248 | //Using special require.nodeRequire, something added by r.js. 249 | fs = require.nodeRequire('fs'); 250 | 251 | text.get = function (url, callback, errback) { 252 | try { 253 | var file = fs.readFileSync(url, 'utf8'); 254 | //Remove BOM (Byte Mark Order) from utf8 files if it is there. 255 | if (file.indexOf('\uFEFF') === 0) { 256 | file = file.substring(1); 257 | } 258 | callback(file); 259 | } catch (e) { 260 | if (errback) { 261 | errback(e); 262 | } 263 | } 264 | }; 265 | } else if (masterConfig.env === 'xhr' || (!masterConfig.env && 266 | text.createXhr())) { 267 | text.get = function (url, callback, errback, headers) { 268 | var xhr = text.createXhr(), header; 269 | xhr.open('GET', url, true); 270 | 271 | //Allow plugins direct access to xhr headers 272 | if (headers) { 273 | for (header in headers) { 274 | if (headers.hasOwnProperty(header)) { 275 | xhr.setRequestHeader(header.toLowerCase(), headers[header]); 276 | } 277 | } 278 | } 279 | 280 | //Allow overrides specified in config 281 | if (masterConfig.onXhr) { 282 | masterConfig.onXhr(xhr, url); 283 | } 284 | 285 | xhr.onreadystatechange = function (evt) { 286 | var status, err; 287 | //Do not explicitly handle errors, those should be 288 | //visible via console output in the browser. 289 | if (xhr.readyState === 4) { 290 | status = xhr.status || 0; 291 | if (status > 399 && status < 600) { 292 | //An http 4xx or 5xx error. Signal an error. 293 | err = new Error(url + ' HTTP status: ' + status); 294 | err.xhr = xhr; 295 | if (errback) { 296 | errback(err); 297 | } 298 | } else { 299 | callback(xhr.responseText); 300 | } 301 | 302 | if (masterConfig.onXhrComplete) { 303 | masterConfig.onXhrComplete(xhr, url); 304 | } 305 | } 306 | }; 307 | xhr.send(null); 308 | }; 309 | } else if (masterConfig.env === 'rhino' || (!masterConfig.env && 310 | typeof Packages !== 'undefined' && typeof java !== 'undefined')) { 311 | //Why Java, why is this so awkward? 312 | text.get = function (url, callback) { 313 | var stringBuffer, line, 314 | encoding = "utf-8", 315 | file = new java.io.File(url), 316 | lineSeparator = java.lang.System.getProperty("line.separator"), 317 | input = new java.io.BufferedReader(new java.io.InputStreamReader(new java.io.FileInputStream(file), encoding)), 318 | content = ''; 319 | try { 320 | stringBuffer = new java.lang.StringBuffer(); 321 | line = input.readLine(); 322 | 323 | // Byte Order Mark (BOM) - The Unicode Standard, version 3.0, page 324 324 | // http://www.unicode.org/faq/utf_bom.html 325 | 326 | // Note that when we use utf-8, the BOM should appear as "EF BB BF", but it doesn't due to this bug in the JDK: 327 | // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4508058 328 | if (line && line.length() && line.charAt(0) === 0xfeff) { 329 | // Eat the BOM, since we've already found the encoding on this file, 330 | // and we plan to concatenating this buffer with others; the BOM should 331 | // only appear at the top of a file. 332 | line = line.substring(1); 333 | } 334 | 335 | if (line !== null) { 336 | stringBuffer.append(line); 337 | } 338 | 339 | while ((line = input.readLine()) !== null) { 340 | stringBuffer.append(lineSeparator); 341 | stringBuffer.append(line); 342 | } 343 | //Make sure we return a JavaScript string and not a Java string. 344 | content = String(stringBuffer.toString()); //String 345 | } finally { 346 | input.close(); 347 | } 348 | callback(content); 349 | }; 350 | } else if (masterConfig.env === 'xpconnect' || (!masterConfig.env && 351 | typeof Components !== 'undefined' && Components.classes && 352 | Components.interfaces)) { 353 | //Avert your gaze! 354 | Cc = Components.classes; 355 | Ci = Components.interfaces; 356 | Components.utils['import']('resource://gre/modules/FileUtils.jsm'); 357 | xpcIsWindows = ('@mozilla.org/windows-registry-key;1' in Cc); 358 | 359 | text.get = function (url, callback) { 360 | var inStream, convertStream, fileObj, 361 | readData = {}; 362 | 363 | if (xpcIsWindows) { 364 | url = url.replace(/\//g, '\\'); 365 | } 366 | 367 | fileObj = new FileUtils.File(url); 368 | 369 | //XPCOM, you so crazy 370 | try { 371 | inStream = Cc['@mozilla.org/network/file-input-stream;1'] 372 | .createInstance(Ci.nsIFileInputStream); 373 | inStream.init(fileObj, 1, 0, false); 374 | 375 | convertStream = Cc['@mozilla.org/intl/converter-input-stream;1'] 376 | .createInstance(Ci.nsIConverterInputStream); 377 | convertStream.init(inStream, "utf-8", inStream.available(), 378 | Ci.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER); 379 | 380 | convertStream.readString(inStream.available(), readData); 381 | convertStream.close(); 382 | inStream.close(); 383 | callback(readData.value); 384 | } catch (e) { 385 | throw new Error((fileObj && fileObj.path || '') + ': ' + e); 386 | } 387 | }; 388 | } 389 | return text; 390 | }); 391 | -------------------------------------------------------------------------------- /docs/sitemap.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | http://EpistasisLab.github.io/scikit-rebate/ 7 | 2018-05-22 8 | daily 9 | 10 | 11 | 12 | 13 | 14 | http://EpistasisLab.github.io/scikit-rebate/installing/ 15 | 2018-05-22 16 | daily 17 | 18 | 19 | 20 | 21 | 22 | http://EpistasisLab.github.io/scikit-rebate/using/ 23 | 2018-05-22 24 | daily 25 | 26 | 27 | 28 | 29 | 30 | http://EpistasisLab.github.io/scikit-rebate/contributing/ 31 | 2018-05-22 32 | daily 33 | 34 | 35 | 36 | 37 | 38 | http://EpistasisLab.github.io/scikit-rebate/releases/ 39 | 2018-05-22 40 | daily 41 | 42 | 43 | 44 | 45 | 46 | http://EpistasisLab.github.io/scikit-rebate/citing/ 47 | 2018-05-22 48 | daily 49 | 50 | 51 | 52 | 53 | 54 | http://EpistasisLab.github.io/scikit-rebate/support/ 55 | 2018-05-22 56 | daily 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /docs/support/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | Support - scikit-rebate 12 | 13 | 14 | 15 | 16 | 17 | 18 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 |
34 | 35 | 36 | 92 | 93 |
94 | 95 | 96 | 100 | 101 | 102 |
103 |
104 |
105 |
    106 |
  • Docs »
  • 107 | 108 | 109 | 110 |
  • Support
  • 111 |
  • 112 | 113 | Edit on GitHub 115 | 116 |
  • 117 |
118 |
119 |
120 |
121 |
122 | 123 |

scikit-rebate was developed in the Computational Genetics Lab with funding from the NIH. We are incredibly grateful for their support during the development of this project.

124 | 125 |
126 |
127 | 148 | 149 |
150 |
151 | 152 |
153 | 154 |
155 | 156 |
157 | 158 | 159 | GitHub 160 | 161 | 162 | « Previous 163 | 164 | 165 | 166 |
167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | -------------------------------------------------------------------------------- /docs_sources/citing.md: -------------------------------------------------------------------------------- 1 | If you use **scikit-rebate** or the **MultiSURF** algorithm in a scientific publication, please consider citing the following paper (currently available as a pre-print in arXiv): 2 | 3 | *Urbanowicz, Ryan J., Randal S. Olson, Peter Schmitt, Melissa Meeker, and Jason H. Moore. "Benchmarking relief-based feature selection methods." arXiv preprint arXiv:1711.08477 (2017).* 4 | 5 | Alternatively a complete **review of Relief-based algorithms** is available at: 6 | 7 | *Urbanowicz, Ryan J., Melissa Meeker, William LaCava, Randal S. Olson, and Jason H. Moore. "Relief-based feature selection: introduction and review." arXiv preprint arXiv:1711.08421 (2017).* 8 | 9 | To cite the **original Relief** paper: 10 | 11 | *Kira, Kenji, and Larry A. Rendell. "A practical approach to feature selection." In Machine Learning Proceedings 1992, pp. 249-256. 1992.* 12 | 13 | To cite the **original ReliefF** paper: 14 | 15 | *Kononenko, Igor. "Estimating attributes: analysis and extensions of RELIEF." In European conference on machine learning, pp. 171-182. Springer, Berlin, Heidelberg, 1994.* 16 | 17 | To cite the **original SURF** paper: 18 | 19 | *Greene, Casey S., Nadia M. Penrod, Jeff Kiralis, and Jason H. Moore. "Spatially uniform relieff (SURF) for computationally-efficient filtering of gene-gene interactions." BioData mining 2, no. 1 (2009): 5.* 20 | 21 | To cite the **original SURF\*** paper: 22 | 23 | *Greene, Casey S., Daniel S. Himmelstein, Jeff Kiralis, and Jason H. Moore. "The informative extremes: using both nearest and farthest individuals can improve relief algorithms in the domain of human genetics." In European Conference on Evolutionary Computation, Machine Learning and Data Mining in Bioinformatics, pp. 182-193. Springer, Berlin, Heidelberg, 2010.* 24 | 25 | To cite the **original MultiSURF\*** paper: 26 | 27 | *Granizo-Mackenzie, Delaney, and Jason H. Moore. "Multiple threshold spatially uniform relieff for the genetic analysis of complex human diseases." In European Conference on Evolutionary Computation, Machine Learning and Data Mining in Bioinformatics, pp. 1-10. Springer, Berlin, Heidelberg, 2013.* 28 | 29 | To cite the **original TuRF** paper: 30 | 31 | *Moore, Jason H., and Bill C. White. "Tuning ReliefF for genome-wide genetic analysis." In European Conference on Evolutionary Computation, Machine Learning and Data Mining in Bioinformatics, pp. 166-175. Springer, Berlin, Heidelberg, 2007.* -------------------------------------------------------------------------------- /docs_sources/contributing.md: -------------------------------------------------------------------------------- 1 | We welcome you to [check the existing issues](https://github.com/EpistasisLab/scikit-rebate/issues/) for bugs or enhancements to work on. If you have an idea for an extension to scikit-rebate, please [file a new issue](https://github.com/EpistasisLab/scikit-rebate/issues//new) so we can discuss it. 2 | 3 | ## Project layout 4 | 5 | The latest stable release of scikit-rebate is on the [master branch](https://github.com/EpistasisLab/scikit-rebate/tree/master), whereas the latest version of scikit-rebate in development is on the [development branch](https://github.com/EpistasisLab/scikit-rebate/tree/development). Make sure you are looking at and working on the correct branch if you're looking to contribute code. 6 | 7 | In terms of directory structure: 8 | 9 | * All of scikit-rebate's code sources are in the `skrebate` directory 10 | * The documentation sources are in the `docs_sources` directory 11 | * The latest documentation build is in the `docs` directory 12 | * Unit tests for scikit-rebate are in the `tests.py` file 13 | 14 | Make sure to familiarize yourself with the project layout before making any major contributions, and especially make sure to send all code changes to the `development` branch. 15 | 16 | ## How to contribute 17 | 18 | The preferred way to contribute to scikit-rebate is to fork the 19 | [main repository](https://github.com/EpistasisLab/scikit-rebate/) on 20 | GitHub: 21 | 22 | 1. Fork the [project repository](https://github.com/EpistasisLab/scikit-rebate/): 23 | click on the 'Fork' button near the top of the page. This creates 24 | a copy of the code under your account on the GitHub server. 25 | 26 | 2. Clone this copy to your local disk: 27 | 28 | $ git clone git@github.com:YourLogin/scikit-rebate.git 29 | $ cd scikit-rebate 30 | 31 | 3. Create a branch to hold your changes: 32 | 33 | $ git checkout -b my-contribution 34 | 35 | 4. Make sure your local environment is setup correctly for development. Installation instructions are almost identical to [the user instructions](installing.md) except that scikit-rebate should *not* be installed. If you have scikit-rebate installed on your computer, then make sure you are using a virtual environment that does not have scikit-rebate installed. Furthermore, you should make sure you have installed the `nose` package into your development environment so that you can test changes locally. 36 | 37 | $ conda install nose 38 | 39 | 5. Start making changes on your newly created branch, remembering to never work on the ``master`` branch! Work on this copy on your computer using Git to do the version control. 40 | 41 | 6. Once some changes are saved locally, you can use your tweaked version of scikit-rebate by navigating to the project's base directory and running scikit-rebate in a script. 42 | 43 | 7. To check your changes haven't broken any existing tests and to check new tests you've added pass run the following (note, you must have the `nose` package installed within your dev environment for this to work): 44 | 45 | $ nosetests -s -v 46 | 47 | 8. When you're done editing and local testing, run: 48 | 49 | $ git add modified_files 50 | $ git commit 51 | 52 | to record your changes in Git, then push them to GitHub with: 53 | 54 | $ git push -u origin my-contribution 55 | 56 | Finally, go to the web page of your fork of the scikit-rebate repo, and click 'Pull Request' (PR) to send your changes to the maintainers for review. Make sure that you send your PR to the `development` branch, as the `master` branch is reserved for the latest stable release. This will start the CI server to check all the project's unit tests run and send an email to the maintainers. 57 | 58 | (For details on the above look up the [Git documentation](http://git-scm.com/documentation) on the web.) 59 | 60 | ## Before submitting your pull request 61 | 62 | Before you submit a pull request for your contribution, please work through this checklist to make sure that you have done everything necessary so we can efficiently review and accept your changes. 63 | 64 | If your contribution changes scikit-rebate in any way: 65 | 66 | * Update the [documentation](https://github.com/EpistasisLab/scikit-rebate/tree/master/docs_sources) so all of your changes are reflected there. 67 | 68 | * Update the [README](https://github.com/EpistasisLab/scikit-rebate/blob/master/README.md) if anything there has changed. 69 | 70 | If your contribution involves any code changes: 71 | 72 | * Update the [project unit tests](https://github.com/EpistasisLab/scikit-rebate/blob/master/tests.py) to test your code changes. 73 | 74 | * Make sure that your code is properly commented with [docstrings](https://www.python.org/dev/peps/pep-0257/) and comments explaining your rationale behind non-obvious coding practices. 75 | 76 | If your contribution requires a new library dependency: 77 | 78 | * Double-check that the new dependency is easy to install via `pip` or Anaconda and supports both Python 2 and 3. If the dependency requires a complicated installation, then we most likely won't merge your changes because we want to keep scikit-rebate easy to install. 79 | 80 | * Add a line to pip install the library to [.travis_install.sh](https://github.com/EpistasisLab/scikit-rebate/blob/master/ci/.travis_install.sh#L46) 81 | 82 | * Add a line to print the version of the library to [.travis_install.sh](https://github.com/EpistasisLab/scikit-rebate/blob/master/ci/.travis_install.sh#L56) 83 | 84 | * Similarly add a line to print the version of the library to [.travis_test.sh](https://github.com/EpistasisLab/scikit-rebate/blob/master/ci/.travis_test.sh#L16) 85 | 86 | ## Updating the documentation 87 | 88 | We use [mkdocs](http://www.mkdocs.org/) to manage our [documentation](http://EpistasisLab.github.io/scikit-rebate/). This allows us to write the docs in Markdown and compile them to HTML as needed. Below are a few useful commands to know when updating the documentation. Make sure that you are running them in the base repository directory. 89 | 90 | * `mkdocs serve`: Hosts of a local version of the documentation that you can access at the provided URL. The local version will update automatically as you save changes to the documentation. 91 | 92 | * `mkdocs build --clean`: Creates a fresh build of the documentation in HTML. Always run this before deploying the documentation to GitHub. 93 | 94 | * `mkdocs gh-deploy`: Deploys the documentation to GitHub. If you're deploying on your fork of scikit-rebate, the online documentation should be accessible at `http://.github.io/scikit-rebate/`. Generally, you shouldn't need to run this command because you can view your changes with `mkdocs serve`. 95 | 96 | ## After submitting your pull request 97 | 98 | After submitting your pull request, [Travis-CI](https://travis-ci.com/) will automatically run unit tests on your changes and make sure that your updated code builds and runs on Python 2 and 3. We also use services that automatically check code quality and test coverage. 99 | 100 | Check back shortly after submitting your pull request to make sure that your code passes these checks. If any of the checks come back with a red X, then do your best to address the errors. 101 | -------------------------------------------------------------------------------- /docs_sources/index.md: -------------------------------------------------------------------------------- 1 | [scikit-rebate](https://github.com/EpistasisLab/scikit-rebate) is a scikit-learn-compatible Python implementation of ReBATE, a suite of [Relief](https://en.wikipedia.org/wiki/Relief_(feature_selection))-based feature selection algorithms for Machine Learning. As of 5/7/18, **this project is still under active development** and we encourage you to check back on this repository regularly for updates. 2 | 3 | These algorithms excel at identifying features that are predictive of the outcome in supervised learning problems, and are especially good at identifying feature interactions that are normally overlooked by standard feature selection methods. 4 | 5 | The main benefit of Relief-based algorithms is that they identify feature interactions without having to exhaustively check every pairwise interaction, thus taking significantly less time than exhaustive pairwise search. 6 | 7 | Relief-based algorithms are commonly applied to genetic analyses, where epistasis (i.e., feature interactions) is common. However, the algorithms implemented in this package can be applied to almost any supervised classification data set and supports: 8 | 9 | * A mix of categorical and/or continuous features 10 | 11 | * Data with missing values 12 | 13 | * Binary endpoints (i.e., classification) 14 | 15 | * Multi-class endpoints (i.e., classification) 16 | 17 | * Continuous endpoints (i.e., regression) 18 | -------------------------------------------------------------------------------- /docs_sources/installing.md: -------------------------------------------------------------------------------- 1 | scikit-rebate is built on top of the following existing Python packages: 2 | 3 | * NumPy 4 | 5 | * SciPy 6 | 7 | * scikit-learn 8 | 9 | All of the necessary Python packages can be installed via the [Anaconda Python distribution](https://www.continuum.io/downloads), which we strongly recommend that you use. We also strongly recommend that you use Python 3 over Python 2 if you're given the choice. 10 | 11 | NumPy, SciPy, and scikit-learn can be installed in Anaconda via the command: 12 | 13 | ``` 14 | conda install numpy scipy scikit-learn 15 | ``` 16 | 17 | Once the prerequisites are installed, you should be able to install scikit-rebate with a pip command: 18 | 19 | ``` 20 | pip install skrebate 21 | ``` 22 | You can retrieve basic information about your installed version of skrebate with the following pip command: 23 | 24 | ``` 25 | pip show skrebate 26 | ``` 27 | 28 | You can check that you have the most up to date pypi release of skrebate with the following pip command: 29 | 30 | ``` 31 | pip install skrebate -U 32 | ``` 33 | 34 | 35 | Please [file a new issue](https://github.com/EpistasisLab/scikit-rebate/issues/new) if you run into installation problems. 36 | -------------------------------------------------------------------------------- /docs_sources/releases.md: -------------------------------------------------------------------------------- 1 | # scikit-rebate 0.6 2 | 3 | * Fixed internal TuRF implementation so that it outputs scores for all features. Those that make it to the last iteration get true core algorithm scoring, while those that were removed along the way are assigned token scores (lower than the lowest true scoring feature) that indicate when the respective feature(s) were removed. This also alows for greater flexibility in the user specifying the number for features to return. 4 | 5 | * Updated the usage documentation to demonstrate how to use RFE as well as the newly updated internal TuRF implementation. 6 | 7 | * Fixed the pct paramter of TuRF to properly determine the percent of features removed each iteration as well as the total number of iterations as described in the original TuRF paper. Also managed the edge case to ensure that at least one feature would be removed each TuRF iteration. 8 | 9 | * Fixed ability to parallelize run of core algorithm while using TuRF. 10 | 11 | * Updated the unit testing file to remove some excess unite tests, add other relevant ones, speed up testing overall, and make the testing better organized. 12 | 13 | * Added a preliminary implementation of VLSRelief to scikit-rebate, along with associated unit tests. Documentation and code examples not yet supported. 14 | 15 | * Removed some unused code from TuRF implementation. 16 | 17 | * Added check in the transform method required by scikit-learn in both relieff.py and turf.py to ensure that the number of selected features requested by the user was not larger than the number of features in the dataset. 18 | 19 | * Reduced the default value for number of features selected 20 | 21 | # scikit-rebate 0.5 22 | 23 | * Added fixes to score normalizations that should ensure that feature scores for all algorithms fall between -1 and 1. 24 | 25 | * Added multi-class endpoint functionality. (now discriminates between binary and multiclass endpoints) Includes new methods for multi-class score update normalization. 26 | 27 | * Fixed normalization for missing data. 28 | 29 | * Fixed inconsistent pre-normalization for continuous feature data. 30 | 31 | * Added a custom ramp function to improve performance of all algorithms on data with a mix of discrete and continuous features. Based on the standard deviation of a given continuous feature. 32 | 33 | * Updated the implementation of TuRF as an internal custom component of ReBATE. 34 | 35 | # scikit-rebate 0.4 36 | 37 | * Added support for multicore processing to all Relief algorithms. Multiprocessing is now also supported in Python 2. 38 | 39 | * The `ReliefF` algorithm now accepts float values in the range (0, 1.0] for the `n_neighbors` parameter. Float values will be interpreted as a fraction of the training set sample size. 40 | 41 | * Refined the MultiSURF and MultiSURF* algorithms. From our internal research, MultiSURF is now one of our best-performing feature selection algorithms. 42 | 43 | # scikit-rebate 0.3 44 | 45 | * Added a parallelization parameter, `n_jobs`, to ReliefF, SURF, SURF*, and MultiSURF via joblib. 46 | 47 | * Renamed the `dlimit` parameter to `discrete_limit` to better reflect the purpose of the parameter. 48 | 49 | * Minor code optimizations. 50 | 51 | # scikit-rebate 0.2 52 | 53 | * Added documentation. 54 | 55 | * Minor code optimizations. 56 | 57 | # scikit-rebate 0.1 58 | 59 | * Initial release of Relief algorithms, including ReliefF, SURF, SURF*, and MultiSURF. 60 | -------------------------------------------------------------------------------- /docs_sources/support.md: -------------------------------------------------------------------------------- 1 | scikit-rebate was developed in the [Computational Genetics Lab](http://epistasis.org) with funding from the [NIH](http://www.nih.gov). We are incredibly grateful for their support during the development of this project. 2 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: scikit-rebate 2 | site_url: http://EpistasisLab.github.io/scikit-rebate 3 | site_author: Randal S. Olson and Ryan J. Urbanowicz 4 | site_description: A scikit-learn-compatible Python implementation of ReBATE, a suite of Relief-based feature selection algorithms for Machine Learning. 5 | 6 | repo_url: https://github.com/EpistasisLab/scikit-rebate 7 | edit_uri: edit/master/docs_sources/ 8 | docs_dir: docs_sources/ 9 | site_dir: docs/ 10 | theme: readthedocs 11 | 12 | markdown_extensions: 13 | - tables 14 | - fenced_code 15 | 16 | copyright: Developed by Randal S. Olson, Pete Schmitt, and Ryan J. Urbanowicz at the University of Pennsylvania 17 | 18 | pages: 19 | - Home: index.md 20 | - Installation: installing.md 21 | - Using skrebate: using.md 22 | - Contributing: contributing.md 23 | - Release Notes: releases.md 24 | - Citing: citing.md 25 | - Support: support.md 26 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from setuptools import setup, find_packages 4 | 5 | def calculate_version(): 6 | initpy = open('skrebate/_version.py').read().split('\n') 7 | version = list(filter(lambda x: '__version__' in x, initpy))[0].split('\'')[1] 8 | return version 9 | 10 | package_version = calculate_version() 11 | 12 | setup( 13 | name='skrebate', 14 | version=package_version, 15 | author='Randal S. Olson, Pete Schmitt, and Ryan J. Urbanowicz', 16 | author_email='rso@randalolson.com, ryanurb@upenn.edu', 17 | packages=find_packages(), 18 | url='https://github.com/EpistasisLab/scikit-rebate', 19 | license='License :: OSI Approved :: MIT License', 20 | description=('Relief-based feature selection algorithms'), 21 | long_description=''' 22 | A sklearn-compatible Python implementation of ReBATE, a suite of Relief-based feature selection algorithms. 23 | 24 | Contact 25 | ============= 26 | If you have any questions or comments about skrebate, please feel free to contact us via e-mail: rso@randalolson.com and ryanurb@upenn.edu 27 | 28 | This project is hosted at https://github.com/EpistasisLab/scikit-rebate 29 | ''', 30 | zip_safe=True, 31 | install_requires=['numpy', 'scipy', 'scikit-learn'], 32 | classifiers=[ 33 | 'Intended Audience :: Developers', 34 | 'Intended Audience :: Information Technology', 35 | 'Intended Audience :: Science/Research', 36 | 'License :: OSI Approved :: MIT License', 37 | 'Programming Language :: Python :: 2', 38 | 'Programming Language :: Python :: 2.7', 39 | 'Programming Language :: Python :: 3', 40 | 'Programming Language :: Python :: 3.4', 41 | 'Programming Language :: Python :: 3.5', 42 | 'Programming Language :: Python :: 3.6', 43 | 'Topic :: Utilities' 44 | ], 45 | keywords=['data mining', 'feature selection', 'feature importance', 'machine learning', 'data analysis', 'data engineering', 'data science'], 46 | include_package_data=True, 47 | ) 48 | -------------------------------------------------------------------------------- /skrebate/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | scikit-rebate was primarily developed at the University of Pennsylvania by: 5 | - Randal S. Olson (rso@randalolson.com) 6 | - Pete Schmitt (pschmitt@upenn.edu) 7 | - Ryan J. Urbanowicz (ryanurb@upenn.edu) 8 | - Weixuan Fu (weixuanf@upenn.edu) 9 | - and many more generous open source contributors 10 | 11 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software 12 | and associated documentation files (the "Software"), to deal in the Software without restriction, 13 | including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, 14 | and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, 15 | subject to the following conditions: 16 | 17 | The above copyright notice and this permission notice shall be included in all copies or substantial 18 | portions of the Software. 19 | 20 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT 21 | LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 23 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 | """ 26 | 27 | from ._version import __version__ 28 | from .relieff import ReliefF 29 | from .surf import SURF 30 | from .surfstar import SURFstar 31 | from .multisurf import MultiSURF 32 | from .multisurfstar import MultiSURFstar 33 | from .turf import TURF 34 | from .vls import VLS 35 | from .iter import Iter -------------------------------------------------------------------------------- /skrebate/_version.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | scikit-rebate was primarily developed at the University of Pennsylvania by: 5 | - Randal S. Olson (rso@randalolson.com) 6 | - Pete Schmitt (pschmitt@upenn.edu) 7 | - Ryan J. Urbanowicz (ryanurb@upenn.edu) 8 | - Weixuan Fu (weixuanf@upenn.edu) 9 | - and many more generous open source contributors 10 | 11 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software 12 | and associated documentation files (the "Software"), to deal in the Software without restriction, 13 | including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, 14 | and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, 15 | subject to the following conditions: 16 | 17 | The above copyright notice and this permission notice shall be included in all copies or substantial 18 | portions of the Software. 19 | 20 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT 21 | LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 23 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 | """ 26 | 27 | __version__ = '0.7' 28 | -------------------------------------------------------------------------------- /skrebate/iter.py: -------------------------------------------------------------------------------- 1 | from sklearn.base import BaseEstimator 2 | import copy 3 | import numpy as np 4 | 5 | class Iter(BaseEstimator): 6 | 7 | def __init__(self,relief_object,max_iter=10,convergence_threshold=0.0001,beta=0.1): 8 | ''' 9 | :param relief_object: Must be an object that implements the standard sklearn fit function, and after fit, has attribute feature_importances_ 10 | that can be accessed. Scores must be a 1D np.ndarray of length # of features. The fit function must also be able to 11 | take in an optional 1D np.ndarray 'weights' parameter of length num_features. 12 | :param max_iter: Maximum number of iterations to run 13 | :param convergence_threshold Difference between iteration feature weights to determine convergence 14 | :param beta Learning Rate for Widrow Hoff Weight Update 15 | ''' 16 | 17 | if not self.check_is_int(max_iter) or max_iter < 0: 18 | raise Exception('max_iter must be a nonnegative integer') 19 | 20 | if not self.check_is_float(convergence_threshold) or convergence_threshold < 0: 21 | raise Exception('convergence_threshold must be a nonnegative float') 22 | 23 | if not self.check_is_float(beta): 24 | raise Exception('beta must be a float') 25 | 26 | self.relief_object = relief_object 27 | self.max_iter = max_iter 28 | self.converage_threshold = convergence_threshold 29 | self.rank_absolute = self.relief_object.rank_absolute 30 | self.beta = beta 31 | 32 | def fit(self, X, y): 33 | """Scikit-learn required: Computes the feature importance scores from the training data. 34 | Parameters 35 | ---------- 36 | X: array-like {n_samples, n_features} Training instances to compute the feature importance scores from 37 | y: array-like {n_samples} Training labels 38 | Returns 39 | ------- 40 | self 41 | """ 42 | 43 | #Iterate, feeding the resulting weights of the first run into the fit of the next run (how are they translated?) 44 | last_iteration_scores = None 45 | last_last_iteration_scores = None 46 | for i in range(self.max_iter): 47 | copy_relief_object = copy.deepcopy(self.relief_object) 48 | if i == 0: 49 | copy_relief_object.fit(X,y) 50 | last_iteration_scores = copy_relief_object.feature_importances_ 51 | elif i == 1: 52 | if self.rank_absolute: 53 | absolute_weights = np.absolute(last_iteration_scores) 54 | transformed_weights = absolute_weights/np.max(absolute_weights) 55 | else: 56 | transformed_weights = self.transform_weights(last_iteration_scores) 57 | copy_relief_object.fit(X, y, weights=transformed_weights) 58 | if self.has_converged(last_iteration_scores,copy_relief_object.feature_importances_): 59 | last_iteration_scores = copy_relief_object.feature_importances_ 60 | break 61 | last_last_iteration_scores = copy.deepcopy(transformed_weights) 62 | last_iteration_scores = copy_relief_object.feature_importances_ 63 | else: 64 | if self.rank_absolute: 65 | absolute_weights = np.absolute(last_iteration_scores) 66 | new_weights = absolute_weights/np.max(absolute_weights) 67 | else: 68 | new_weights = self.transform_weights(last_iteration_scores) 69 | 70 | transformed_weights = self.widrow_hoff(last_last_iteration_scores,new_weights,self.beta) 71 | copy_relief_object.fit(X,y,weights=transformed_weights) 72 | if self.has_converged(last_iteration_scores,copy_relief_object.feature_importances_): 73 | last_iteration_scores = copy_relief_object.feature_importances_ 74 | break 75 | last_last_iteration_scores = copy.deepcopy(transformed_weights) 76 | last_iteration_scores = copy_relief_object.feature_importances_ 77 | 78 | #DEBUGGING 79 | #print(last_iteration_scores) 80 | 81 | #Save final FI as feature_importances_ 82 | self.feature_importances_ = last_iteration_scores 83 | 84 | if self.rank_absolute: 85 | self.top_features_ = np.argsort(np.absolute(self.feature_importances_))[::-1] 86 | else: 87 | self.top_features_ = np.argsort(self.feature_importances_)[::-1] 88 | 89 | return self 90 | 91 | def widrow_hoff(self,originalw, neww,beta): 92 | diff = neww-originalw 93 | return originalw + (beta*diff) 94 | 95 | def has_converged(self,weight1,weight2): 96 | for i in range(len(weight1)): 97 | if abs(weight1[i] - weight2[i]) >= self.converage_threshold: 98 | return False 99 | return True 100 | 101 | def transform_weights(self,weights): 102 | max_val = np.max(weights) 103 | for i in range(len(weights)): 104 | if weights[i] < 0: 105 | weights[i] = 0 106 | else: 107 | if max_val == 0: 108 | weights[i] = 0 109 | else: 110 | weights[i] = weights[i]/max_val 111 | return weights 112 | 113 | def check_is_int(self, num): 114 | try: 115 | n = float(num) 116 | if num - int(num) == 0: 117 | return True 118 | else: 119 | return False 120 | except: 121 | return False 122 | 123 | def check_is_float(self, num): 124 | try: 125 | n = float(num) 126 | return True 127 | except: 128 | return False 129 | 130 | def transform(self, X): 131 | if X.shape[1] < self.relief_object.n_features_to_select: 132 | raise ValueError('Number of features to select is larger than the number of features in the dataset.') 133 | 134 | return X[:, self.top_features_[:self.relief_object.n_features_to_select]] 135 | 136 | def fit_transform(self, X, y): 137 | self.fit(X, y) 138 | return self.transform(X) -------------------------------------------------------------------------------- /skrebate/multisurf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | scikit-rebate was primarily developed at the University of Pennsylvania by: 5 | - Randal S. Olson (rso@randalolson.com) 6 | - Pete Schmitt (pschmitt@upenn.edu) 7 | - Ryan J. Urbanowicz (ryanurb@upenn.edu) 8 | - Weixuan Fu (weixuanf@upenn.edu) 9 | - and many more generous open source contributors 10 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software 11 | and associated documentation files (the "Software"), to deal in the Software without restriction, 12 | including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, 14 | subject to the following conditions: 15 | The above copyright notice and this permission notice shall be included in all copies or substantial 16 | portions of the Software. 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT 18 | LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 21 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | """ 23 | 24 | from __future__ import print_function 25 | import numpy as np 26 | from .surfstar import SURFstar 27 | from joblib import Parallel, delayed 28 | from .scoring_utils import MultiSURF_compute_scores 29 | 30 | 31 | class MultiSURF(SURFstar): 32 | 33 | """Feature selection using data-mined expert knowledge. 34 | Based on the MultiSURF algorithm as introduced in: 35 | Moore, Jason et al. Multiple Threshold Spatially Uniform ReliefF 36 | for the Genetic Analysis of Complex Human Diseases. 37 | """ 38 | 39 | ############################# MultiSURF ######################################## 40 | def _find_neighbors(self, inst): 41 | """ Identify nearest hits and misses within radius defined by average distance and standard deviation around each target training instance. 42 | This works the same regardless of endpoint type. """ 43 | dist_vect = [] 44 | for j in range(self._datalen): 45 | if inst != j: 46 | locator = [inst, j] 47 | if inst < j: 48 | locator.reverse() 49 | dist_vect.append(self._distance_array[locator[0]][locator[1]]) 50 | 51 | dist_vect = np.array(dist_vect) 52 | inst_avg_dist = np.average(dist_vect) 53 | inst_std = np.std(dist_vect) / 2. 54 | # Defining a narrower radius based on the average instance distance minus the standard deviation of instance distances. 55 | near_threshold = inst_avg_dist - inst_std 56 | 57 | NN_near = [] 58 | for j in range(self._datalen): 59 | if inst != j: 60 | locator = [inst, j] 61 | if inst < j: 62 | locator.reverse() 63 | if self._distance_array[locator[0]][locator[1]] < near_threshold: 64 | NN_near.append(j) 65 | 66 | return np.array(NN_near) 67 | 68 | def _run_algorithm(self): 69 | """ Runs nearest neighbor (NN) identification and feature scoring to yield MultiSURF scores. """ 70 | nan_entries = np.isnan(self._X) 71 | 72 | NNlist = [self._find_neighbors(datalen) for datalen in range(self._datalen)] 73 | 74 | if isinstance(self._weights, np.ndarray) and self.weight_final_scores: 75 | scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed( 76 | MultiSURF_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap, 77 | NN_near, self._headers, self._class_type, self._X, self._y, self._labels_std, self.data_type, self._weights) 78 | for instance_num, NN_near in zip(range(self._datalen), NNlist)), axis=0) 79 | else: 80 | scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed( 81 | MultiSURF_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap, 82 | NN_near, self._headers, self._class_type, self._X, self._y, self._labels_std, self.data_type) 83 | for instance_num, NN_near in zip(range(self._datalen), NNlist)), axis=0) 84 | 85 | return np.array(scores) 86 | -------------------------------------------------------------------------------- /skrebate/multisurfstar.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | scikit-rebate was primarily developed at the University of Pennsylvania by: 5 | - Randal S. Olson (rso@randalolson.com) 6 | - Pete Schmitt (pschmitt@upenn.edu) 7 | - Ryan J. Urbanowicz (ryanurb@upenn.edu) 8 | - Weixuan Fu (weixuanf@upenn.edu) 9 | - and many more generous open source contributors 10 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software 11 | and associated documentation files (the "Software"), to deal in the Software without restriction, 12 | including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, 14 | subject to the following conditions: 15 | The above copyright notice and this permission notice shall be included in all copies or substantial 16 | portions of the Software. 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT 18 | LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 21 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | """ 23 | 24 | from __future__ import print_function 25 | import numpy as np 26 | from .surfstar import SURFstar 27 | from .scoring_utils import MultiSURFstar_compute_scores 28 | from joblib import Parallel, delayed 29 | 30 | 31 | class MultiSURFstar(SURFstar): 32 | 33 | """Feature selection using data-mined expert knowledge. 34 | Based on the MultiSURF algorithm as introduced in: 35 | Moore, Jason et al. Multiple Threshold Spatially Uniform ReliefF 36 | for the Genetic Analysis of Complex Human Diseases. 37 | """ 38 | 39 | ############################# MultiSURF* ######################################## 40 | def _find_neighbors(self, inst): 41 | """ Identify nearest as well as farthest hits and misses within radius defined by average distance and standard deviation of distances from target instanace. 42 | This works the same regardless of endpoint type. """ 43 | dist_vect = [] 44 | for j in range(self._datalen): 45 | if inst != j: 46 | locator = [inst, j] 47 | if inst < j: 48 | locator.reverse() 49 | dist_vect.append(self._distance_array[locator[0]][locator[1]]) 50 | 51 | dist_vect = np.array(dist_vect) 52 | inst_avg_dist = np.average(dist_vect) 53 | inst_std = np.std(dist_vect) / 2. 54 | near_threshold = inst_avg_dist - inst_std 55 | far_threshold = inst_avg_dist + inst_std 56 | 57 | NN_near = [] 58 | NN_far = [] 59 | for j in range(self._datalen): 60 | if inst != j: 61 | locator = [inst, j] 62 | if inst < j: 63 | locator.reverse() 64 | if self._distance_array[locator[0]][locator[1]] < near_threshold: 65 | NN_near.append(j) 66 | elif self._distance_array[locator[0]][locator[1]] > far_threshold: 67 | NN_far.append(j) 68 | 69 | return np.array(NN_near), np.array(NN_far) 70 | 71 | def _run_algorithm(self): 72 | """ Runs nearest neighbor (NN) identification and feature scoring to yield MultiSURF* scores. """ 73 | nan_entries = np.isnan(self._X) 74 | 75 | NNlist = [self._find_neighbors(datalen) for datalen in range(self._datalen)] 76 | NN_near_list = [i[0] for i in NNlist] 77 | NN_far_list = [i[1] for i in NNlist] 78 | 79 | if isinstance(self._weights, np.ndarray) and self.weight_final_scores: 80 | scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed( 81 | MultiSURFstar_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap, 82 | NN_near, NN_far, self._headers, self._class_type, self._X, self._y, 83 | self._labels_std, self.data_type, self._weights) 84 | for instance_num, NN_near, NN_far in 85 | zip(range(self._datalen), NN_near_list, NN_far_list)), axis=0) 86 | else: 87 | scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed( 88 | MultiSURFstar_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap, 89 | NN_near, NN_far, self._headers, self._class_type, self._X, self._y, 90 | self._labels_std, self.data_type) 91 | for instance_num, NN_near, NN_far in 92 | zip(range(self._datalen), NN_near_list, NN_far_list)), axis=0) 93 | 94 | return np.array(scores) 95 | -------------------------------------------------------------------------------- /skrebate/surf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | scikit-rebate was primarily developed at the University of Pennsylvania by: 5 | - Randal S. Olson (rso@randalolson.com) 6 | - Pete Schmitt (pschmitt@upenn.edu) 7 | - Ryan J. Urbanowicz (ryanurb@upenn.edu) 8 | - Weixuan Fu (weixuanf@upenn.edu) 9 | - and many more generous open source contributors 10 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software 11 | and associated documentation files (the "Software"), to deal in the Software without restriction, 12 | including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, 14 | subject to the following conditions: 15 | The above copyright notice and this permission notice shall be included in all copies or substantial 16 | portions of the Software. 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT 18 | LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 21 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | """ 23 | 24 | from __future__ import print_function 25 | import numpy as np 26 | from joblib import Parallel, delayed 27 | from .relieff import ReliefF 28 | from .scoring_utils import SURF_compute_scores 29 | 30 | 31 | class SURF(ReliefF): 32 | 33 | """Feature selection using data-mined expert knowledge. 34 | Based on the SURF algorithm as introduced in: 35 | Moore, Jason et al. Multiple Threshold Spatially Uniform ReliefF 36 | for the Genetic Analysis of Complex Human Diseases. 37 | """ 38 | 39 | def __init__(self, n_features_to_select=10, discrete_threshold=10, verbose=False, n_jobs=1,weight_final_scores=False,rank_absolute=False): 40 | """Sets up ReliefF to perform feature selection. 41 | Parameters 42 | ---------- 43 | n_features_to_select: int (default: 10) 44 | the number of top features (according to the relieff score) to 45 | retain after feature selection is applied. 46 | discrete_threshold: int (default: 10) 47 | Value used to determine if a feature is discrete or continuous. 48 | If the number of unique levels in a feature is > discrete_threshold, then it is 49 | considered continuous, or discrete otherwise. 50 | verbose: bool (default: False) 51 | If True, output timing of distance array and scoring 52 | n_jobs: int (default: 1) 53 | The number of cores to dedicate to computing the scores with joblib. 54 | Assigning this parameter to -1 will dedicate as many cores as are available on your system. 55 | We recommend setting this parameter to -1 to speed up the algorithm as much as possible. 56 | weight_final_scores: bool (default: False) 57 | Whether to multiply given weights (in fit) to final scores. Only applicable if weights are given. 58 | rank_absolute: bool (default: False) 59 | Whether to give top features as by ranking features by absolute value. 60 | """ 61 | self.n_features_to_select = n_features_to_select 62 | self.discrete_threshold = discrete_threshold 63 | self.verbose = verbose 64 | self.n_jobs = n_jobs 65 | self.weight_final_scores = weight_final_scores 66 | self.rank_absolute = rank_absolute 67 | 68 | ############################# SURF ############################################ 69 | def _find_neighbors(self, inst, avg_dist): 70 | """ Identify nearest hits and misses within radius defined by average distance over whole distance array. 71 | This works the same regardless of endpoint type. """ 72 | NN = [] 73 | min_indicies = [] 74 | 75 | for i in range(self._datalen): 76 | if inst != i: 77 | locator = [inst, i] 78 | if i > inst: 79 | locator.reverse() 80 | d = self._distance_array[locator[0]][locator[1]] 81 | if d < avg_dist: # Defining the neighborhood with an average distance radius. 82 | min_indicies.append(i) 83 | for i in range(len(min_indicies)): 84 | NN.append(min_indicies[i]) 85 | return np.array(NN, dtype=np.int32) 86 | 87 | def _run_algorithm(self): 88 | """ Runs nearest neighbor (NN) identification and feature scoring to yield SURF scores. """ 89 | sm = cnt = 0 90 | for i in range(self._datalen): 91 | sm += sum(self._distance_array[i]) 92 | cnt += len(self._distance_array[i]) 93 | avg_dist = sm / float(cnt) 94 | 95 | nan_entries = np.isnan(self._X) 96 | 97 | NNlist = [self._find_neighbors(datalen, avg_dist) for datalen in range(self._datalen)] 98 | if isinstance(self._weights, np.ndarray) and self.weight_final_scores: 99 | scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed( 100 | SURF_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap, 101 | NN, self._headers, self._class_type, self._X, self._y, self._labels_std,self.data_type, self._weights) 102 | for instance_num, NN in zip(range(self._datalen), NNlist)),axis=0) 103 | else: 104 | scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed( 105 | SURF_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap, 106 | NN, self._headers, self._class_type, self._X, self._y, self._labels_std,self.data_type) 107 | for instance_num, NN in zip(range(self._datalen), NNlist)),axis=0) 108 | 109 | return np.array(scores) 110 | -------------------------------------------------------------------------------- /skrebate/surfstar.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | scikit-rebate was primarily developed at the University of Pennsylvania by: 5 | - Randal S. Olson (rso@randalolson.com) 6 | - Pete Schmitt (pschmitt@upenn.edu) 7 | - Ryan J. Urbanowicz (ryanurb@upenn.edu) 8 | - Weixuan Fu (weixuanf@upenn.edu) 9 | - and many more generous open source contributors 10 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software 11 | and associated documentation files (the "Software"), to deal in the Software without restriction, 12 | including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, 14 | subject to the following conditions: 15 | The above copyright notice and this permission notice shall be included in all copies or substantial 16 | portions of the Software. 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT 18 | LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 21 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | """ 23 | 24 | from __future__ import print_function 25 | import numpy as np 26 | from .surf import SURF 27 | from .scoring_utils import SURFstar_compute_scores 28 | from joblib import Parallel, delayed 29 | 30 | 31 | class SURFstar(SURF): 32 | 33 | """Feature selection using data-mined expert knowledge. 34 | Based on the SURF* algorithm as introduced in: 35 | Moore, Jason et al. Multiple Threshold Spatially Uniform ReliefF 36 | for the Genetic Analysis of Complex Human Diseases. 37 | """ 38 | 39 | ############################# SURF* ######################################## 40 | def _find_neighbors(self, inst, avg_dist): 41 | """ Identify nearest as well as farthest hits and misses within radius defined by average distance over whole distance array. 42 | This works the same regardless of endpoint type. """ 43 | NN_near = [] 44 | NN_far = [] 45 | min_indices = [] 46 | max_indices = [] 47 | 48 | for i in range(self._datalen): 49 | if inst != i: 50 | locator = [inst, i] 51 | if i > inst: 52 | locator.reverse() 53 | d = self._distance_array[locator[0]][locator[1]] 54 | if d < avg_dist: 55 | min_indices.append(i) 56 | if d > avg_dist: 57 | max_indices.append(i) 58 | 59 | for i in range(len(min_indices)): 60 | NN_near.append(min_indices[i]) 61 | for i in range(len(max_indices)): 62 | NN_far.append(max_indices[i]) 63 | 64 | return np.array(NN_near, dtype=np.int32), np.array(NN_far, dtype=np.int32) 65 | 66 | def _run_algorithm(self): 67 | """ Runs nearest neighbor (NN) identification and feature scoring to yield SURF* scores. """ 68 | sm = cnt = 0 69 | for i in range(self._datalen): 70 | sm += sum(self._distance_array[i]) 71 | cnt += len(self._distance_array[i]) 72 | avg_dist = sm / float(cnt) 73 | 74 | nan_entries = np.isnan(self._X) 75 | 76 | NNlist = [self._find_neighbors(datalen, avg_dist) for datalen in range(self._datalen)] 77 | NN_near_list = [i[0] for i in NNlist] 78 | NN_far_list = [i[1] for i in NNlist] 79 | 80 | if isinstance(self._weights, np.ndarray) and self.weight_final_scores: 81 | scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed( 82 | SURFstar_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap, 83 | NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std, self.data_type, self._weights) 84 | for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)), axis=0) 85 | 86 | else: 87 | scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed( 88 | SURFstar_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap, 89 | NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std, self.data_type) 90 | for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)), axis=0) 91 | 92 | return np.array(scores) 93 | -------------------------------------------------------------------------------- /skrebate/turf.py: -------------------------------------------------------------------------------- 1 | from sklearn.base import BaseEstimator 2 | import copy 3 | import numpy as np 4 | 5 | class TURF(BaseEstimator): 6 | 7 | def __init__(self,relief_object,pct=0.5,num_scores_to_return=100): 8 | ''' 9 | :param relief_object: Must be an object that implements the standard sklearn fit function, and after fit, has attributes feature_importances_ 10 | and top_features_ that can be accessed. Scores must be a 1D np.ndarray of length # of features. 11 | :param pct: % of features to remove from removing features each iteration (if float). Or # of features to remove each iteration (if int) 12 | :param num_scores_to_return: Number of nonzero scores to return after training. Default = min(num_features, 100) 13 | ''' 14 | if not self.check_is_int(num_scores_to_return) or num_scores_to_return < 0: 15 | raise Exception('num_scores_to_return must be a nonnegative integer') 16 | 17 | if (not self.check_is_int(pct) and not self.check_is_float(pct)) or pct < 0: 18 | raise Exception('pct must be a nonnegative integer/float') 19 | 20 | if (not self.check_is_int(pct) and self.check_is_float(pct)) and (pct < 0 or pct > 1): 21 | raise Exception('if pct is a float, it must be from [0,1]') 22 | 23 | self.relief_object = relief_object 24 | self.pct = pct 25 | self.num_scores_to_return = num_scores_to_return 26 | self.rank_absolute = self.relief_object.rank_absolute 27 | 28 | def fit(self, X, y): 29 | """Scikit-learn required: Computes the feature importance scores from the training data. 30 | Parameters 31 | ---------- 32 | X: array-like {n_samples, n_features} Training instances to compute the feature importance scores from 33 | y: array-like {n_samples} Training labels 34 | Returns 35 | ------- 36 | self 37 | """ 38 | #Adjust num_scores_to_return 39 | num_features = X.shape[1] 40 | self.num_scores_to_return = min(self.num_scores_to_return,num_features) 41 | 42 | if self.num_scores_to_return != num_features and self.pct == 1: 43 | raise Exception('num_scores_to_return != num_features and pct == 1. TURF will never reach your intended destination.') 44 | 45 | #Find out out how many features to use in each iteration 46 | features_per_iteration = self.get_features_per_iteration(num_features,self.pct,self.num_scores_to_return) 47 | 48 | #Iterate runs 49 | binary_scores_existence_tracker = np.ones(num_features) #1 means score still left 50 | 51 | copy_relief_object = copy.deepcopy(self.relief_object) 52 | copy_relief_object.fit(X, y) 53 | features_per_iteration.pop(0) 54 | for num_features_to_use_in_iteration in features_per_iteration: 55 | #Find top raw features indices 56 | best_raw_indices = copy_relief_object.top_features_[:num_features_to_use_in_iteration] 57 | 58 | #Map raw features indices to original feature indices array 59 | onesCounter = 0 60 | copy_tracker = copy.deepcopy(binary_scores_existence_tracker) 61 | for i in range(len(binary_scores_existence_tracker)): 62 | if not (onesCounter in best_raw_indices): 63 | binary_scores_existence_tracker[i] = 0 64 | if copy_tracker[i] == 1: 65 | onesCounter+=1 66 | 67 | #Get new X 68 | new_indices = [] 69 | for i in range(len(binary_scores_existence_tracker)): 70 | if binary_scores_existence_tracker[i] == 1: 71 | new_indices.append(i) 72 | 73 | ###DEBUGGING 74 | # print(num_features_to_use_in_iteration) 75 | # print(best_raw_indices) 76 | # print(binary_scores_existence_tracker) 77 | # print(new_indices) 78 | # print() 79 | 80 | new_X = X[:,new_indices] 81 | 82 | #fit 83 | copy_relief_object = copy.deepcopy(self.relief_object) 84 | copy_relief_object.fit(new_X, y) 85 | 86 | #Return remaining scores in their original indices, having zeros for the rest 87 | raw_scores = copy_relief_object.feature_importances_ 88 | counter = 0 89 | for i in range(len(binary_scores_existence_tracker)): 90 | if binary_scores_existence_tracker[i] == 1: 91 | binary_scores_existence_tracker[i] = raw_scores[counter] 92 | counter += 1 93 | 94 | # Save FI as feature_importances_ 95 | self.feature_importances_ = binary_scores_existence_tracker 96 | 97 | if self.rank_absolute: 98 | self.top_features_ = np.argsort(np.absolute(self.feature_importances_))[::-1] 99 | else: 100 | self.top_features_ = np.argsort(self.feature_importances_)[::-1] 101 | 102 | return self 103 | 104 | def get_features_per_iteration(self,num_features,pct,num_scores_to_return): 105 | features_per_iteration = [num_features] 106 | features_left = num_features 107 | if num_features != num_scores_to_return: 108 | if self.check_is_int(pct): # Is int 109 | while True: 110 | if features_left - pct > num_scores_to_return: 111 | features_left -= pct 112 | features_per_iteration.append(features_left) 113 | else: 114 | features_per_iteration.append(num_scores_to_return) 115 | break 116 | else: # Is float 117 | while True: 118 | if int(features_left * pct) > num_scores_to_return: 119 | features_left = int(features_left * pct) 120 | features_per_iteration.append(features_left) 121 | else: 122 | features_per_iteration.append(num_scores_to_return) 123 | break 124 | return features_per_iteration 125 | 126 | def check_is_int(self, num): 127 | try: 128 | n = float(num) 129 | if num - int(num) == 0: 130 | return True 131 | else: 132 | return False 133 | except: 134 | return False 135 | 136 | def check_is_float(self, num): 137 | try: 138 | n = float(num) 139 | return True 140 | except: 141 | return False 142 | 143 | def transform(self, X): 144 | if X.shape[1] < self.relief_object.n_features_to_select: 145 | raise ValueError('Number of features to select is larger than the number of features in the dataset.') 146 | 147 | return X[:, self.top_features_[:self.relief_object.n_features_to_select]] 148 | 149 | def fit_transform(self, X, y): 150 | self.fit(X, y) 151 | return self.transform(X) -------------------------------------------------------------------------------- /skrebate/vls.py: -------------------------------------------------------------------------------- 1 | from sklearn.base import BaseEstimator 2 | import copy 3 | import random 4 | import numpy as np 5 | 6 | class VLS(BaseEstimator): 7 | 8 | def __init__(self,relief_object,num_feature_subset=40,size_feature_subset=5,random_state = None): 9 | ''' 10 | :param relief_object: Must be an object that implements the standard sklearn fit function, and after fit, has attribute feature_importances_ 11 | that can be accessed. Scores must be a 1D np.ndarray of length # of features. The fit function must also be able to 12 | take in an optional 1D np.ndarray 'weights' parameter of length num_features. 13 | :param num_feature_subset: Number of feature subsets generated at random 14 | :param size_feature_subset: Number of features in each subset. Cannot exceed number of features. 15 | :param random_state: random seed 16 | ''' 17 | 18 | if not self.check_is_int(num_feature_subset) or num_feature_subset <= 0: 19 | raise Exception('num_feature_subset must be a positive integer') 20 | 21 | if not self.check_is_int(size_feature_subset) or size_feature_subset <= 0: 22 | raise Exception('size_feature_subset must be a positive integer') 23 | 24 | if random_state != None and not self.check_is_int(random_state): 25 | raise Exception('random_state must be None or integer') 26 | 27 | self.relief_object = relief_object 28 | self.num_feature_subset = num_feature_subset 29 | self.size_feature_subset = size_feature_subset 30 | self.random_state = random_state 31 | self.rank_absolute = self.relief_object.rank_absolute 32 | 33 | def fit(self, X, y,weights=None): 34 | """Scikit-learn required: Computes the feature importance scores from the training data. 35 | Parameters 36 | ---------- 37 | X: array-like {n_samples, n_features} Training instances to compute the feature importance scores from 38 | y: array-like {n_samples} Training labels 39 | Returns 40 | ------- 41 | self 42 | """ 43 | #random_state 44 | if self.random_state != None: 45 | np.random.seed(self.random_state) 46 | random.seed(self.random_state) 47 | 48 | #Make subsets with all the features 49 | num_features = X.shape[1] 50 | self.size_feature_subset = min(self.size_feature_subset,num_features) 51 | subsets = self.make_subsets(list(range(num_features)),self.num_feature_subset,self.size_feature_subset) 52 | 53 | #Fit each subset 54 | scores = [] 55 | for subset in subsets: 56 | new_X = self.custom_transform(X,subset) 57 | copy_relief_object = copy.deepcopy(self.relief_object) 58 | if not isinstance(weights,np.ndarray): 59 | copy_relief_object.fit(new_X,y) 60 | else: 61 | copy_relief_object.fit(new_X,y,weights=weights[subset]) 62 | raw_score = copy_relief_object.feature_importances_ 63 | score = np.empty(num_features) 64 | if self.rank_absolute: 65 | score.fill(0) 66 | else: 67 | score.fill(np.NINF) 68 | counter = 0 69 | for index in subset: 70 | score[index] = raw_score[counter] 71 | counter+=1 72 | scores.append(score) 73 | 74 | #DEBUGGING 75 | #print(score) 76 | 77 | scores = np.array(scores) 78 | 79 | #Merge results by selecting largest found weight for each feature 80 | max_scores = [] 81 | for score in scores.T: 82 | if self.rank_absolute: 83 | max = np.max(np.absolute(score)) 84 | if max in score: 85 | max_scores.append(max) 86 | else: 87 | max_scores.append(-max) 88 | else: 89 | max_scores.append(np.max(score)) 90 | max_scores = np.array(max_scores) 91 | 92 | #Save FI as feature_importances_ 93 | self.feature_importances_ = max_scores 94 | 95 | if self.rank_absolute: 96 | self.top_features_ = np.argsort(np.absolute(self.feature_importances_))[::-1] 97 | else: 98 | self.top_features_ = np.argsort(self.feature_importances_)[::-1] 99 | 100 | return self 101 | 102 | def custom_transform(self,X,indices_to_preserve): 103 | return X[:,indices_to_preserve] 104 | 105 | def make_subsets(self,possible_indices,num_feature_subset,size_feature_subset): 106 | if num_feature_subset * size_feature_subset < len(possible_indices): 107 | raise Exception('num_feature_subset * size_feature_subset must be >= number of total features') 108 | 109 | if size_feature_subset > len(possible_indices): 110 | raise Exception('size_feature_subset cannot be > number of total features') 111 | 112 | random.shuffle(possible_indices) 113 | remaining_indices = copy.deepcopy(possible_indices) 114 | 115 | subsets = [] 116 | while True: 117 | subset = [] 118 | while len(remaining_indices) > 0 and len(subset) < size_feature_subset: 119 | subset.append(remaining_indices.pop(0)) 120 | subsets.append(subset) 121 | if len(remaining_indices) < size_feature_subset: 122 | break 123 | 124 | if len(remaining_indices) != 0: 125 | while len(remaining_indices) < size_feature_subset: 126 | index_bad = True 127 | while index_bad: 128 | potential_index = random.choice(possible_indices) 129 | if not (potential_index in remaining_indices): 130 | remaining_indices.append(potential_index) 131 | break 132 | subsets.append(remaining_indices) 133 | 134 | subsets_left = num_feature_subset - len(subsets) 135 | for i in range(subsets_left): 136 | subsets.append(random.sample(possible_indices,size_feature_subset)) 137 | 138 | return subsets 139 | 140 | def check_is_int(self, num): 141 | try: 142 | n = float(num) 143 | if num - int(num) == 0: 144 | return True 145 | else: 146 | return False 147 | except: 148 | return False 149 | 150 | def check_is_float(self, num): 151 | try: 152 | n = float(num) 153 | return True 154 | except: 155 | return False 156 | 157 | def transform(self, X): 158 | if X.shape[1] < self.relief_object.n_features_to_select: 159 | raise ValueError('Number of features to select is larger than the number of features in the dataset.') 160 | 161 | return X[:, self.top_features_[:self.relief_object.n_features_to_select]] 162 | 163 | def fit_transform(self, X, y, weights=None): 164 | self.fit(X, y, weights) 165 | return self.transform(X) --------------------------------------------------------------------------------