{{title}}
3 |{{summary}}
4 |├── .coveragerc ├── .gitignore ├── .landscape.yaml ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── README.md ├── ci ├── .travis_install.sh └── .travis_test.sh ├── data ├── 3Class_Datasets_Loc_2_01.txt ├── GAMETES_Epistasis_2-Way_20atts_0.4H_EDM-1_1.tsv.gz ├── GAMETES_Epistasis_2-Way_continuous_endpoint_a_20s_1600her_0.4__maf_0.2_EDM-2_01.tsv.gz ├── GAMETES_Epistasis_2-Way_missing_values_0.1_a_20s_1600her_0.4__maf_0.2_EDM-2_01.tsv.gz └── GAMETES_Epistasis_2-Way_mixed_attribute_a_20s_1600her_0.4__maf_0.2_EDM-2_01.tsv.gz ├── docs ├── 404.html ├── citing │ └── index.html ├── contributing │ └── index.html ├── css │ ├── highlight.css │ ├── theme.css │ └── theme_extra.css ├── fonts │ ├── fontawesome-webfont.eot │ ├── fontawesome-webfont.svg │ ├── fontawesome-webfont.ttf │ └── fontawesome-webfont.woff ├── img │ └── favicon.ico ├── index.html ├── installing │ └── index.html ├── js │ ├── highlight.pack.js │ ├── jquery-2.1.1.min.js │ ├── modernizr-2.8.3.min.js │ └── theme.js ├── releases │ └── index.html ├── search.html ├── search │ ├── lunr.min.js │ ├── mustache.min.js │ ├── require.js │ ├── search-results-template.mustache │ ├── search.js │ ├── search_index.json │ └── text.js ├── sitemap.xml ├── support │ └── index.html └── using │ └── index.html ├── docs_sources ├── citing.md ├── contributing.md ├── index.md ├── installing.md ├── releases.md ├── support.md └── using.md ├── mkdocs.yml ├── setup.py ├── skrebate ├── __init__.py ├── _version.py ├── iter.py ├── multisurf.py ├── multisurfstar.py ├── relieff.py ├── scoring_utils.py ├── surf.py ├── surfstar.py ├── turf.py └── vls.py └── tests.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | source = skrebate 4 | include = */skrebate/* 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #Custom test files 2 | run_test.py 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | env/ 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *,cover 49 | .hypothesis/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | 58 | # Sphinx documentation 59 | docs/_build/ 60 | 61 | # PyBuilder 62 | target/ 63 | 64 | #Ipython Notebook 65 | .ipynb_checkpoints 66 | 67 | testing.ipynb 68 | 69 | *.lprof 70 | 71 | *.prof 72 | /demo_scikitrebate.ipynb 73 | 74 | *.DS_Store 75 | .idea/ 76 | 77 | analysis_pipeline/skrebatewip 78 | -------------------------------------------------------------------------------- /.landscape.yaml: -------------------------------------------------------------------------------- 1 | doc-warnings: yes 2 | 3 | ignore-patterns: 4 | - __init__.py 5 | 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | virtualenv: 3 | system_site_packages: true 4 | env: 5 | matrix: 6 | # let's start simple: 7 | - PYTHON_VERSION="2.7" LATEST="true" 8 | - PYTHON_VERSION="3.6" COVERAGE="true" LATEST="true" 9 | - PYTHON_VERSION="3.6" LATEST="true" 10 | install: source ./ci/.travis_install.sh 11 | script: bash ./ci/.travis_test.sh 12 | after_success: 13 | # Ignore coveralls failures as the coveralls server is not very reliable 14 | # but we don't want travis to report a failure in the github UI just 15 | # because the coverage report failed to be published. 16 | - if [[ "$COVERAGE" == "true" ]]; then coveralls || echo "failed"; fi 17 | cache: apt 18 | sudo: false 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Randal S. Olson and Ryan J. Urbanowicz 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.md 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Master status: [](https://travis-ci.org/EpistasisLab/scikit-rebate) 2 | [](https://landscape.io/github/EpistasisLab/scikit-rebate/master) 3 | [](https://coveralls.io/github/EpistasisLab/scikit-rebate?branch=master) 4 | 5 | Development status: [](https://travis-ci.org/EpistasisLab/scikit-rebate) 6 | [](https://landscape.io/github/EpistasisLab/scikit-rebate/development) 7 | [](https://coveralls.io/github/EpistasisLab/scikit-rebate?branch=development) 8 | 9 | Package information:  10 |  11 |  12 | [](https://badge.fury.io/py/skrebate) 13 | 14 | # scikit-rebate 15 | This package includes a scikit-learn-compatible Python implementation of ReBATE, a suite of [Relief-based feature selection algorithms](https://en.wikipedia.org/wiki/Relief_(feature_selection)) for Machine Learning. These Relief-Based algorithms (RBAs) are designed for feature weighting/selection as part of a machine learning pipeline (supervised learning). Presently this includes the following core RBAs: ReliefF, SURF, SURF\*, MultiSURF\*, and MultiSURF. Additionally, an implementation of the iterative TuRF mechanism and VLSRelief is included. **It is still under active development** and we encourage you to check back on this repository regularly for updates. 16 | 17 | These algorithms offer a computationally efficient way to perform feature selection that is sensitive to feature interactions as well as simple univariate associations, unlike most currently available filter-based feature selection methods. The main benefit of Relief algorithms is that they identify feature interactions without having to exhaustively check every pairwise interaction, thus taking significantly less time than exhaustive pairwise search. 18 | 19 | Certain algorithms require user specified run parameters (e.g. ReliefF requires the user to specify some 'k' number of nearest neighbors). 20 | 21 | Relief algorithms are commonly applied to genetic analyses, where epistasis (i.e., feature interactions) is common. However, the algorithms implemented in this package can be applied to almost any supervised classification data set and supports: 22 | 23 | * Feature sets that are discrete/categorical, continuous-valued or a mix of both 24 | 25 | * Data with missing values 26 | 27 | * Binary endpoints (i.e., classification) 28 | 29 | * Multi-class endpoints (i.e., classification) 30 | 31 | * Continuous endpoints (i.e., regression) 32 | 33 | Built into this code, is a strategy to 'automatically' detect from the loaded data, these relevant characteristics. 34 | 35 | Of our two initial ReBATE software releases, this scikit-learn compatible version primarily focuses on ease of incorporation into a scikit learn analysis pipeline. 36 | This code is most appropriate for scikit-learn users, Windows operating system users, beginners, or those looking for the most recent ReBATE developments. 37 | 38 | An alternative 'stand-alone' version of [ReBATE](https://github.com/EpistasisLab/ReBATE) is also available that focuses on improving run-time with the use of Cython for optimization. This implementation also outputs feature names and associated feature scores as a text file by default. 39 | 40 | ## License 41 | 42 | Please see the [repository license](https://github.com/EpistasisLab/scikit-rebate/blob/master/LICENSE) for the licensing and usage information for scikit-rebate. 43 | 44 | Generally, we have licensed scikit-rebate to make it as widely usable as possible. 45 | 46 | ## Installation 47 | 48 | scikit-rebate is built on top of the following existing Python packages: 49 | 50 | * NumPy 51 | 52 | * SciPy 53 | 54 | * scikit-learn 55 | 56 | All of the necessary Python packages can be installed via the [Anaconda Python distribution](https://www.continuum.io/downloads), which we strongly recommend that you use. We also strongly recommend that you use Python 3 over Python 2 if you're given the choice. 57 | 58 | NumPy, SciPy, and scikit-learn can be installed in Anaconda via the command: 59 | 60 | ``` 61 | conda install numpy scipy scikit-learn 62 | ``` 63 | 64 | Once the prerequisites are installed, you should be able to install scikit-rebate with a pip command: 65 | 66 | ``` 67 | pip install skrebate 68 | ``` 69 | 70 | Please [file a new issue](https://github.com/EpistasisLab/scikit-rebate/issues/new) if you run into installation problems. 71 | 72 | ## Usage 73 | 74 | We have designed the Relief algorithms to be integrated directly into scikit-learn machine learning workflows. For example, the ReliefF algorithm can be used as a feature selection step in a scikit-learn pipeline as follows. 75 | 76 | ```python 77 | import pandas as pd 78 | import numpy as np 79 | from sklearn.pipeline import make_pipeline 80 | from skrebate import ReliefF 81 | from sklearn.ensemble import RandomForestClassifier 82 | from sklearn.model_selection import cross_val_score 83 | 84 | genetic_data = pd.read_csv('https://github.com/EpistasisLab/scikit-rebate/raw/master/data/' 85 | 'GAMETES_Epistasis_2-Way_20atts_0.4H_EDM-1_1.tsv.gz', 86 | sep='\t', compression='gzip') 87 | 88 | features, labels = genetic_data.drop('class', axis=1).values, genetic_data['class'].values 89 | 90 | clf = make_pipeline(ReliefF(n_features_to_select=2, n_neighbors=100), 91 | RandomForestClassifier(n_estimators=100)) 92 | 93 | print(np.mean(cross_val_score(clf, features, labels))) 94 | >>> 0.795 95 | ``` 96 | 97 | For more information on the Relief algorithms available in this package and how to use them, please refer to our [usage documentation](https://EpistasisLab.github.io/scikit-rebate/using/). 98 | 99 | ## Contributing to scikit-rebate 100 | 101 | We welcome you to [check the existing issues](https://github.com/EpistasisLab/scikit-rebate/issues/) for bugs or enhancements to work on. If you have an idea for an extension to scikit-rebate, please [file a new issue](https://github.com/EpistasisLab/scikit-rebate/issues/new) so we can discuss it. 102 | 103 | Please refer to our [contribution guidelines](https://EpistasisLab.github.io/scikit-rebate/contributing/) prior to working on a new feature or bug fix. 104 | 105 | ## Citing scikit-rebate 106 | 107 | If you use scikit-rebate in a scientific publication, please consider citing the following paper: 108 | 109 | Ryan J. Urbanowicz, Randal S. Olson, Peter Schmitt, Melissa Meeker, Jason H. Moore (2017). [Benchmarking Relief-Based Feature Selection Methods](https://arxiv.org/abs/1711.08477). *arXiv preprint*, under review. 110 | 111 | BibTeX entry: 112 | 113 | ```bibtex 114 | @misc{Urbanowicz2017Benchmarking, 115 | author = {Urbanowicz, Ryan J. and Olson, Randal S. and Schmitt, Peter and Meeker, Melissa and Moore, Jason H.}, 116 | title = {Benchmarking Relief-Based Feature Selection Methods}, 117 | year = {2017}, 118 | howpublished = {arXiv e-print. https://arxiv.org/abs/1711.08477}, 119 | } 120 | ``` 121 | -------------------------------------------------------------------------------- /ci/.travis_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # modified from https://github.com/trevorstephens/gplearn 4 | 5 | # This script is meant to be called by the "install" step defined in 6 | # .travis.yml. See http://docs.travis-ci.com/ for more details. 7 | # The behavior of the script is controlled by environment variabled defined 8 | # in the .travis.yml in the top level folder of the project. 9 | 10 | 11 | # License: GNU/GPLv3 12 | 13 | set -e 14 | 15 | # Fix the compilers to workaround avoid having the Python 3.4 build 16 | # lookup for g++44 unexpectedly. 17 | export CC=gcc 18 | export CXX=g++ 19 | 20 | # Deactivate the travis-provided virtual environment and setup a 21 | # conda-based environment instead 22 | deactivate 23 | 24 | # Use the miniconda installer for faster download / install of conda 25 | # itself 26 | wget http://repo.continuum.io/miniconda/Miniconda-3.9.1-Linux-x86_64.sh \ 27 | -O miniconda.sh 28 | chmod +x miniconda.sh && ./miniconda.sh -b 29 | export PATH=/home/travis/miniconda/bin:$PATH 30 | conda update --yes conda 31 | 32 | # Configure the conda environment and put it in the path using the 33 | # provided versions 34 | conda create -n testenv --yes python=$PYTHON_VERSION pip nose \ 35 | numpy scipy scikit-learn cython pandas 36 | 37 | source activate testenv 38 | 39 | if [[ "$COVERAGE" == "true" ]]; then 40 | pip install coverage coveralls 41 | fi 42 | 43 | # build output in the travis output when it succeeds. 44 | python --version 45 | python -c "import numpy; print('numpy %s' % numpy.__version__)" 46 | python -c "import scipy; print('scipy %s' % scipy.__version__)" 47 | python -c "import sklearn; print('sklearn %s' % sklearn.__version__)" 48 | python -c "import pandas; print('pandas %s' % pandas.__version__)" 49 | python setup.py build_ext --inplace 50 | -------------------------------------------------------------------------------- /ci/.travis_test.sh: -------------------------------------------------------------------------------- 1 | # modified from https://github.com/trevorstephens/gplearn 2 | 3 | # This script is meant to be called by the "install" step defined in 4 | # .travis.yml. See http://docs.travis-ci.com/ for more details. 5 | # The behavior of the script is controlled by environment variabled defined 6 | # in the .travis.yml in the top level folder of the project. 7 | 8 | # License: GNU/GPLv3 9 | 10 | set -e 11 | 12 | python --version 13 | python -c "import numpy; print('numpy %s' % numpy.__version__)" 14 | python -c "import scipy; print('scipy %s' % scipy.__version__)" 15 | python -c "import sklearn; print('sklearn %s' % sklearn.__version__)" 16 | python -c "import pandas; print('pandas %s' % pandas.__version__)" 17 | 18 | if [[ "$COVERAGE" == "true" ]]; then 19 | nosetests -s -v --with-coverage 20 | else 21 | nosetests -s -v 22 | fi 23 | -------------------------------------------------------------------------------- /data/GAMETES_Epistasis_2-Way_20atts_0.4H_EDM-1_1.tsv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EpistasisLab/scikit-rebate/16798854e7fbca553416409be8f9ff6f71204dac/data/GAMETES_Epistasis_2-Way_20atts_0.4H_EDM-1_1.tsv.gz -------------------------------------------------------------------------------- /data/GAMETES_Epistasis_2-Way_continuous_endpoint_a_20s_1600her_0.4__maf_0.2_EDM-2_01.tsv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EpistasisLab/scikit-rebate/16798854e7fbca553416409be8f9ff6f71204dac/data/GAMETES_Epistasis_2-Way_continuous_endpoint_a_20s_1600her_0.4__maf_0.2_EDM-2_01.tsv.gz -------------------------------------------------------------------------------- /data/GAMETES_Epistasis_2-Way_missing_values_0.1_a_20s_1600her_0.4__maf_0.2_EDM-2_01.tsv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EpistasisLab/scikit-rebate/16798854e7fbca553416409be8f9ff6f71204dac/data/GAMETES_Epistasis_2-Way_missing_values_0.1_a_20s_1600her_0.4__maf_0.2_EDM-2_01.tsv.gz -------------------------------------------------------------------------------- /data/GAMETES_Epistasis_2-Way_mixed_attribute_a_20s_1600her_0.4__maf_0.2_EDM-2_01.tsv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EpistasisLab/scikit-rebate/16798854e7fbca553416409be8f9ff6f71204dac/data/GAMETES_Epistasis_2-Way_mixed_attribute_a_20s_1600her_0.4__maf_0.2_EDM-2_01.tsv.gz -------------------------------------------------------------------------------- /docs/404.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
5 | 6 | 7 | 8 | 9 | 10 | 11 |Page not found
112 | 113 | 114 |If you use scikit-rebate or the MultiSURF algorithm in a scientific publication, please consider citing the following paper (currently available as a pre-print in arXiv):
124 |Urbanowicz, Ryan J., Randal S. Olson, Peter Schmitt, Melissa Meeker, and Jason H. Moore. "Benchmarking relief-based feature selection methods." arXiv preprint arXiv:1711.08477 (2017).
125 |Alternatively a complete review of Relief-based algorithms is available at:
126 |Urbanowicz, Ryan J., Melissa Meeker, William LaCava, Randal S. Olson, and Jason H. Moore. "Relief-based feature selection: introduction and review." arXiv preprint arXiv:1711.08421 (2017).
127 |To cite the original Relief paper:
128 |Kira, Kenji, and Larry A. Rendell. "A practical approach to feature selection." In Machine Learning Proceedings 1992, pp. 249-256. 1992.
129 |To cite the original ReliefF paper:
130 |Kononenko, Igor. "Estimating attributes: analysis and extensions of RELIEF." In European conference on machine learning, pp. 171-182. Springer, Berlin, Heidelberg, 1994.
131 |To cite the original SURF paper:
132 |Greene, Casey S., Nadia M. Penrod, Jeff Kiralis, and Jason H. Moore. "Spatially uniform relieff (SURF) for computationally-efficient filtering of gene-gene interactions." BioData mining 2, no. 1 (2009): 5.
133 |To cite the original SURF* paper:
134 |Greene, Casey S., Daniel S. Himmelstein, Jeff Kiralis, and Jason H. Moore. "The informative extremes: using both nearest and farthest individuals can improve relief algorithms in the domain of human genetics." In European Conference on Evolutionary Computation, Machine Learning and Data Mining in Bioinformatics, pp. 182-193. Springer, Berlin, Heidelberg, 2010.
135 |To cite the original MultiSURF* paper:
136 |Granizo-Mackenzie, Delaney, and Jason H. Moore. "Multiple threshold spatially uniform relieff for the genetic analysis of complex human diseases." In European Conference on Evolutionary Computation, Machine Learning and Data Mining in Bioinformatics, pp. 1-10. Springer, Berlin, Heidelberg, 2013.
137 |To cite the original TuRF paper:
138 |Moore, Jason H., and Bill C. White. "Tuning ReliefF for genome-wide genetic analysis." In European Conference on Evolutionary Computation, Machine Learning and Data Mining in Bioinformatics, pp. 166-175. Springer, Berlin, Heidelberg, 2007.
139 | 140 |We welcome you to check the existing issues for bugs or enhancements to work on. If you have an idea for an extension to scikit-rebate, please file a new issue so we can discuss it.
139 |The latest stable release of scikit-rebate is on the master branch, whereas the latest version of scikit-rebate in development is on the development branch. Make sure you are looking at and working on the correct branch if you're looking to contribute code.
141 |In terms of directory structure:
142 |skrebate
directorydocs_sources
directorydocs
directorytests.py
fileMake sure to familiarize yourself with the project layout before making any major contributions, and especially make sure to send all code changes to the development
branch.
The preferred way to contribute to scikit-rebate is to fork the 151 | main repository on 152 | GitHub:
153 |Fork the project repository: 156 | click on the 'Fork' button near the top of the page. This creates 157 | a copy of the code under your account on the GitHub server.
158 |Clone this copy to your local disk:
161 | $ git clone git@github.com:YourLogin/scikit-rebate.git
162 | $ cd scikit-rebate
163 |
164 | Create a branch to hold your changes:
167 | $ git checkout -b my-contribution
168 |
169 | Make sure your local environment is setup correctly for development. Installation instructions are almost identical to the user instructions except that scikit-rebate should not be installed. If you have scikit-rebate installed on your computer, then make sure you are using a virtual environment that does not have scikit-rebate installed. Furthermore, you should make sure you have installed the nose
package into your development environment so that you can test changes locally.
$ conda install nose
173 |
174 | Start making changes on your newly created branch, remembering to never work on the master
branch! Work on this copy on your computer using Git to do the version control.
Once some changes are saved locally, you can use your tweaked version of scikit-rebate by navigating to the project's base directory and running scikit-rebate in a script.
180 |To check your changes haven't broken any existing tests and to check new tests you've added pass run the following (note, you must have the nose
package installed within your dev environment for this to work):
$ nosetests -s -v
184 |
185 | When you're done editing and local testing, run:
188 | $ git add modified_files
189 | $ git commit
190 |
191 | to record your changes in Git, then push them to GitHub with:
194 | $ git push -u origin my-contribution
195 |
196 | Finally, go to the web page of your fork of the scikit-rebate repo, and click 'Pull Request' (PR) to send your changes to the maintainers for review. Make sure that you send your PR to the development
branch, as the master
branch is reserved for the latest stable release. This will start the CI server to check all the project's unit tests run and send an email to the maintainers.
(For details on the above look up the Git documentation on the web.)
198 |Before you submit a pull request for your contribution, please work through this checklist to make sure that you have done everything necessary so we can efficiently review and accept your changes.
200 |If your contribution changes scikit-rebate in any way:
201 |Update the documentation so all of your changes are reflected there.
204 |Update the README if anything there has changed.
207 |If your contribution involves any code changes:
210 |Update the project unit tests to test your code changes.
213 |Make sure that your code is properly commented with docstrings and comments explaining your rationale behind non-obvious coding practices.
216 |If your contribution requires a new library dependency:
219 |Double-check that the new dependency is easy to install via pip
or Anaconda and supports both Python 2 and 3. If the dependency requires a complicated installation, then we most likely won't merge your changes because we want to keep scikit-rebate easy to install.
Add a line to pip install the library to .travis_install.sh
225 |Add a line to print the version of the library to .travis_install.sh
228 |Similarly add a line to print the version of the library to .travis_test.sh
231 |We use mkdocs to manage our documentation. This allows us to write the docs in Markdown and compile them to HTML as needed. Below are a few useful commands to know when updating the documentation. Make sure that you are running them in the base repository directory.
235 |mkdocs serve
: Hosts of a local version of the documentation that you can access at the provided URL. The local version will update automatically as you save changes to the documentation.
mkdocs build --clean
: Creates a fresh build of the documentation in HTML. Always run this before deploying the documentation to GitHub.
mkdocs gh-deploy
: Deploys the documentation to GitHub. If you're deploying on your fork of scikit-rebate, the online documentation should be accessible at http://<YOUR GITHUB USERNAME>.github.io/scikit-rebate/
. Generally, you shouldn't need to run this command because you can view your changes with mkdocs serve
.
After submitting your pull request, Travis-CI will automatically run unit tests on your changes and make sure that your updated code builds and runs on Python 2 and 3. We also use services that automatically check code quality and test coverage.
248 |Check back shortly after submitting your pull request to make sure that your code passes these checks. If any of the checks come back with a red X, then do your best to address the errors.
249 | 250 |scikit-rebate is a scikit-learn-compatible Python implementation of ReBATE, a suite of Relief-based feature selection algorithms for Machine Learning. As of 5/7/18, this project is still under active development and we encourage you to check back on this repository regularly for updates.
124 |These algorithms excel at identifying features that are predictive of the outcome in supervised learning problems, and are especially good at identifying feature interactions that are normally overlooked by standard feature selection methods.
125 |The main benefit of Relief-based algorithms is that they identify feature interactions without having to exhaustively check every pairwise interaction, thus taking significantly less time than exhaustive pairwise search.
126 |Relief-based algorithms are commonly applied to genetic analyses, where epistasis (i.e., feature interactions) is common. However, the algorithms implemented in this package can be applied to almost any supervised classification data set and supports:
127 |A mix of categorical and/or continuous features
130 |Data with missing values
133 |Binary endpoints (i.e., classification)
136 |Multi-class endpoints (i.e., classification)
139 |Continuous endpoints (i.e., regression)
142 |scikit-rebate is built on top of the following existing Python packages:
124 |NumPy
127 |SciPy
130 |scikit-learn
133 |All of the necessary Python packages can be installed via the Anaconda Python distribution, which we strongly recommend that you use. We also strongly recommend that you use Python 3 over Python 2 if you're given the choice.
136 |NumPy, SciPy, and scikit-learn can be installed in Anaconda via the command:
137 |conda install numpy scipy scikit-learn
138 |
139 |
140 | Once the prerequisites are installed, you should be able to install scikit-rebate with a pip command:
141 |pip install skrebate
142 |
143 |
144 | You can retrieve basic information about your installed version of skrebate with the following pip command:
145 |pip show skrebate
146 |
147 |
148 | You can check that you have the most up to date pypi release of skrebate with the following pip command:
149 |pip install skrebate -U
150 |
151 |
152 | Please file a new issue if you run into installation problems.
153 | 154 |Fixed internal TuRF implementation so that it outputs scores for all features. Those that make it to the last iteration get true core algorithm scoring, while those that were removed along the way are assigned token scores (lower than the lowest true scoring feature) that indicate when the respective feature(s) were removed. This also alows for greater flexibility in the user specifying the number for features to return.
145 |Updated the usage documentation to demonstrate how to use RFE as well as the newly updated internal TuRF implementation.
148 |Fixed the pct paramter of TuRF to properly determine the percent of features removed each iteration as well as the total number of iterations as described in the original TuRF paper. Also managed the edge case to ensure that at least one feature would be removed each TuRF iteration.
151 |Fixed ability to parallelize run of core algorithm while using TuRF.
154 |Updated the unit testing file to remove some excess unite tests, add other relevant ones, speed up testing overall, and make the testing better organized.
157 |Added a preliminary implementation of VLSRelief to scikit-rebate, along with associated unit tests. Documentation and code examples not yet supported.
160 |Removed some unused code from TuRF implementation.
163 |Added check in the transform method required by scikit-learn in both relieff.py and turf.py to ensure that the number of selected features requested by the user was not larger than the number of features in the dataset.
166 |Reduced the default value for number of features selected
169 |Added fixes to score normalizations that should ensure that feature scores for all algorithms fall between -1 and 1.
175 |Added multi-class endpoint functionality. (now discriminates between binary and multiclass endpoints) Includes new methods for multi-class score update normalization.
178 |Fixed normalization for missing data.
181 |Fixed inconsistent pre-normalization for continuous feature data.
184 |Added a custom ramp function to improve performance of all algorithms on data with a mix of discrete and continuous features. Based on the standard deviation of a given continuous feature.
187 |Updated the implementation of TuRF as an internal custom component of ReBATE.
190 |Added support for multicore processing to all Relief algorithms. Multiprocessing is now also supported in Python 2.
196 |The ReliefF
algorithm now accepts float values in the range (0, 1.0] for the n_neighbors
parameter. Float values will be interpreted as a fraction of the training set sample size.
Refined the MultiSURF and MultiSURF* algorithms. From our internal research, MultiSURF is now one of our best-performing feature selection algorithms.
202 |Added a parallelization parameter, n_jobs
, to ReliefF, SURF, SURF*, and MultiSURF via joblib.
Renamed the dlimit
parameter to discrete_limit
to better reflect the purpose of the parameter.
Minor code optimizations.
214 |Added documentation.
220 |Minor code optimizations.
223 |{{summary}}
4 |No results found
"); 68 | } 69 | 70 | if(jQuery){ 71 | /* 72 | * We currently only automatically hide bootstrap models. This 73 | * requires jQuery to work. 74 | */ 75 | jQuery('#mkdocs_search_modal a').click(function(){ 76 | jQuery('#mkdocs_search_modal').modal('hide'); 77 | }); 78 | } 79 | 80 | }; 81 | 82 | var search_input = document.getElementById('mkdocs-search-query'); 83 | 84 | var term = getSearchTerm(); 85 | if (term){ 86 | search_input.value = term; 87 | search(); 88 | } 89 | 90 | if (search_input){search_input.addEventListener("keyup", search);} 91 | 92 | }); 93 | -------------------------------------------------------------------------------- /docs/search/text.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @license RequireJS text 2.0.12 Copyright (c) 2010-2014, The Dojo Foundation All Rights Reserved. 3 | * Available via the MIT or new BSD license. 4 | * see: http://github.com/requirejs/text for details 5 | */ 6 | /*jslint regexp: true */ 7 | /*global require, XMLHttpRequest, ActiveXObject, 8 | define, window, process, Packages, 9 | java, location, Components, FileUtils */ 10 | 11 | define(['module'], function (module) { 12 | 'use strict'; 13 | 14 | var text, fs, Cc, Ci, xpcIsWindows, 15 | progIds = ['Msxml2.XMLHTTP', 'Microsoft.XMLHTTP', 'Msxml2.XMLHTTP.4.0'], 16 | xmlRegExp = /^\s*<\?xml(\s)+version=[\'\"](\d)*.(\d)*[\'\"](\s)*\?>/im, 17 | bodyRegExp = /]*>\s*([\s\S]+)\s*<\/body>/im, 18 | hasLocation = typeof location !== 'undefined' && location.href, 19 | defaultProtocol = hasLocation && location.protocol && location.protocol.replace(/\:/, ''), 20 | defaultHostName = hasLocation && location.hostname, 21 | defaultPort = hasLocation && (location.port || undefined), 22 | buildMap = {}, 23 | masterConfig = (module.config && module.config()) || {}; 24 | 25 | text = { 26 | version: '2.0.12', 27 | 28 | strip: function (content) { 29 | //Strips declarations so that external SVG and XML 30 | //documents can be added to a document without worry. Also, if the string 31 | //is an HTML document, only the part inside the body tag is returned. 32 | if (content) { 33 | content = content.replace(xmlRegExp, ""); 34 | var matches = content.match(bodyRegExp); 35 | if (matches) { 36 | content = matches[1]; 37 | } 38 | } else { 39 | content = ""; 40 | } 41 | return content; 42 | }, 43 | 44 | jsEscape: function (content) { 45 | return content.replace(/(['\\])/g, '\\$1') 46 | .replace(/[\f]/g, "\\f") 47 | .replace(/[\b]/g, "\\b") 48 | .replace(/[\n]/g, "\\n") 49 | .replace(/[\t]/g, "\\t") 50 | .replace(/[\r]/g, "\\r") 51 | .replace(/[\u2028]/g, "\\u2028") 52 | .replace(/[\u2029]/g, "\\u2029"); 53 | }, 54 | 55 | createXhr: masterConfig.createXhr || function () { 56 | //Would love to dump the ActiveX crap in here. Need IE 6 to die first. 57 | var xhr, i, progId; 58 | if (typeof XMLHttpRequest !== "undefined") { 59 | return new XMLHttpRequest(); 60 | } else if (typeof ActiveXObject !== "undefined") { 61 | for (i = 0; i < 3; i += 1) { 62 | progId = progIds[i]; 63 | try { 64 | xhr = new ActiveXObject(progId); 65 | } catch (e) {} 66 | 67 | if (xhr) { 68 | progIds = [progId]; // so faster next time 69 | break; 70 | } 71 | } 72 | } 73 | 74 | return xhr; 75 | }, 76 | 77 | /** 78 | * Parses a resource name into its component parts. Resource names 79 | * look like: module/name.ext!strip, where the !strip part is 80 | * optional. 81 | * @param {String} name the resource name 82 | * @returns {Object} with properties "moduleName", "ext" and "strip" 83 | * where strip is a boolean. 84 | */ 85 | parseName: function (name) { 86 | var modName, ext, temp, 87 | strip = false, 88 | index = name.indexOf("."), 89 | isRelative = name.indexOf('./') === 0 || 90 | name.indexOf('../') === 0; 91 | 92 | if (index !== -1 && (!isRelative || index > 1)) { 93 | modName = name.substring(0, index); 94 | ext = name.substring(index + 1, name.length); 95 | } else { 96 | modName = name; 97 | } 98 | 99 | temp = ext || modName; 100 | index = temp.indexOf("!"); 101 | if (index !== -1) { 102 | //Pull off the strip arg. 103 | strip = temp.substring(index + 1) === "strip"; 104 | temp = temp.substring(0, index); 105 | if (ext) { 106 | ext = temp; 107 | } else { 108 | modName = temp; 109 | } 110 | } 111 | 112 | return { 113 | moduleName: modName, 114 | ext: ext, 115 | strip: strip 116 | }; 117 | }, 118 | 119 | xdRegExp: /^((\w+)\:)?\/\/([^\/\\]+)/, 120 | 121 | /** 122 | * Is an URL on another domain. Only works for browser use, returns 123 | * false in non-browser environments. Only used to know if an 124 | * optimized .js version of a text resource should be loaded 125 | * instead. 126 | * @param {String} url 127 | * @returns Boolean 128 | */ 129 | useXhr: function (url, protocol, hostname, port) { 130 | var uProtocol, uHostName, uPort, 131 | match = text.xdRegExp.exec(url); 132 | if (!match) { 133 | return true; 134 | } 135 | uProtocol = match[2]; 136 | uHostName = match[3]; 137 | 138 | uHostName = uHostName.split(':'); 139 | uPort = uHostName[1]; 140 | uHostName = uHostName[0]; 141 | 142 | return (!uProtocol || uProtocol === protocol) && 143 | (!uHostName || uHostName.toLowerCase() === hostname.toLowerCase()) && 144 | ((!uPort && !uHostName) || uPort === port); 145 | }, 146 | 147 | finishLoad: function (name, strip, content, onLoad) { 148 | content = strip ? text.strip(content) : content; 149 | if (masterConfig.isBuild) { 150 | buildMap[name] = content; 151 | } 152 | onLoad(content); 153 | }, 154 | 155 | load: function (name, req, onLoad, config) { 156 | //Name has format: some.module.filext!strip 157 | //The strip part is optional. 158 | //if strip is present, then that means only get the string contents 159 | //inside a body tag in an HTML string. For XML/SVG content it means 160 | //removing the declarations so the content can be inserted 161 | //into the current doc without problems. 162 | 163 | // Do not bother with the work if a build and text will 164 | // not be inlined. 165 | if (config && config.isBuild && !config.inlineText) { 166 | onLoad(); 167 | return; 168 | } 169 | 170 | masterConfig.isBuild = config && config.isBuild; 171 | 172 | var parsed = text.parseName(name), 173 | nonStripName = parsed.moduleName + 174 | (parsed.ext ? '.' + parsed.ext : ''), 175 | url = req.toUrl(nonStripName), 176 | useXhr = (masterConfig.useXhr) || 177 | text.useXhr; 178 | 179 | // Do not load if it is an empty: url 180 | if (url.indexOf('empty:') === 0) { 181 | onLoad(); 182 | return; 183 | } 184 | 185 | //Load the text. Use XHR if possible and in a browser. 186 | if (!hasLocation || useXhr(url, defaultProtocol, defaultHostName, defaultPort)) { 187 | text.get(url, function (content) { 188 | text.finishLoad(name, parsed.strip, content, onLoad); 189 | }, function (err) { 190 | if (onLoad.error) { 191 | onLoad.error(err); 192 | } 193 | }); 194 | } else { 195 | //Need to fetch the resource across domains. Assume 196 | //the resource has been optimized into a JS module. Fetch 197 | //by the module name + extension, but do not include the 198 | //!strip part to avoid file system issues. 199 | req([nonStripName], function (content) { 200 | text.finishLoad(parsed.moduleName + '.' + parsed.ext, 201 | parsed.strip, content, onLoad); 202 | }); 203 | } 204 | }, 205 | 206 | write: function (pluginName, moduleName, write, config) { 207 | if (buildMap.hasOwnProperty(moduleName)) { 208 | var content = text.jsEscape(buildMap[moduleName]); 209 | write.asModule(pluginName + "!" + moduleName, 210 | "define(function () { return '" + 211 | content + 212 | "';});\n"); 213 | } 214 | }, 215 | 216 | writeFile: function (pluginName, moduleName, req, write, config) { 217 | var parsed = text.parseName(moduleName), 218 | extPart = parsed.ext ? '.' + parsed.ext : '', 219 | nonStripName = parsed.moduleName + extPart, 220 | //Use a '.js' file name so that it indicates it is a 221 | //script that can be loaded across domains. 222 | fileName = req.toUrl(parsed.moduleName + extPart) + '.js'; 223 | 224 | //Leverage own load() method to load plugin value, but only 225 | //write out values that do not have the strip argument, 226 | //to avoid any potential issues with ! in file names. 227 | text.load(nonStripName, req, function (value) { 228 | //Use own write() method to construct full module value. 229 | //But need to create shell that translates writeFile's 230 | //write() to the right interface. 231 | var textWrite = function (contents) { 232 | return write(fileName, contents); 233 | }; 234 | textWrite.asModule = function (moduleName, contents) { 235 | return write.asModule(moduleName, fileName, contents); 236 | }; 237 | 238 | text.write(pluginName, nonStripName, textWrite, config); 239 | }, config); 240 | } 241 | }; 242 | 243 | if (masterConfig.env === 'node' || (!masterConfig.env && 244 | typeof process !== "undefined" && 245 | process.versions && 246 | !!process.versions.node && 247 | !process.versions['node-webkit'])) { 248 | //Using special require.nodeRequire, something added by r.js. 249 | fs = require.nodeRequire('fs'); 250 | 251 | text.get = function (url, callback, errback) { 252 | try { 253 | var file = fs.readFileSync(url, 'utf8'); 254 | //Remove BOM (Byte Mark Order) from utf8 files if it is there. 255 | if (file.indexOf('\uFEFF') === 0) { 256 | file = file.substring(1); 257 | } 258 | callback(file); 259 | } catch (e) { 260 | if (errback) { 261 | errback(e); 262 | } 263 | } 264 | }; 265 | } else if (masterConfig.env === 'xhr' || (!masterConfig.env && 266 | text.createXhr())) { 267 | text.get = function (url, callback, errback, headers) { 268 | var xhr = text.createXhr(), header; 269 | xhr.open('GET', url, true); 270 | 271 | //Allow plugins direct access to xhr headers 272 | if (headers) { 273 | for (header in headers) { 274 | if (headers.hasOwnProperty(header)) { 275 | xhr.setRequestHeader(header.toLowerCase(), headers[header]); 276 | } 277 | } 278 | } 279 | 280 | //Allow overrides specified in config 281 | if (masterConfig.onXhr) { 282 | masterConfig.onXhr(xhr, url); 283 | } 284 | 285 | xhr.onreadystatechange = function (evt) { 286 | var status, err; 287 | //Do not explicitly handle errors, those should be 288 | //visible via console output in the browser. 289 | if (xhr.readyState === 4) { 290 | status = xhr.status || 0; 291 | if (status > 399 && status < 600) { 292 | //An http 4xx or 5xx error. Signal an error. 293 | err = new Error(url + ' HTTP status: ' + status); 294 | err.xhr = xhr; 295 | if (errback) { 296 | errback(err); 297 | } 298 | } else { 299 | callback(xhr.responseText); 300 | } 301 | 302 | if (masterConfig.onXhrComplete) { 303 | masterConfig.onXhrComplete(xhr, url); 304 | } 305 | } 306 | }; 307 | xhr.send(null); 308 | }; 309 | } else if (masterConfig.env === 'rhino' || (!masterConfig.env && 310 | typeof Packages !== 'undefined' && typeof java !== 'undefined')) { 311 | //Why Java, why is this so awkward? 312 | text.get = function (url, callback) { 313 | var stringBuffer, line, 314 | encoding = "utf-8", 315 | file = new java.io.File(url), 316 | lineSeparator = java.lang.System.getProperty("line.separator"), 317 | input = new java.io.BufferedReader(new java.io.InputStreamReader(new java.io.FileInputStream(file), encoding)), 318 | content = ''; 319 | try { 320 | stringBuffer = new java.lang.StringBuffer(); 321 | line = input.readLine(); 322 | 323 | // Byte Order Mark (BOM) - The Unicode Standard, version 3.0, page 324 324 | // http://www.unicode.org/faq/utf_bom.html 325 | 326 | // Note that when we use utf-8, the BOM should appear as "EF BB BF", but it doesn't due to this bug in the JDK: 327 | // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4508058 328 | if (line && line.length() && line.charAt(0) === 0xfeff) { 329 | // Eat the BOM, since we've already found the encoding on this file, 330 | // and we plan to concatenating this buffer with others; the BOM should 331 | // only appear at the top of a file. 332 | line = line.substring(1); 333 | } 334 | 335 | if (line !== null) { 336 | stringBuffer.append(line); 337 | } 338 | 339 | while ((line = input.readLine()) !== null) { 340 | stringBuffer.append(lineSeparator); 341 | stringBuffer.append(line); 342 | } 343 | //Make sure we return a JavaScript string and not a Java string. 344 | content = String(stringBuffer.toString()); //String 345 | } finally { 346 | input.close(); 347 | } 348 | callback(content); 349 | }; 350 | } else if (masterConfig.env === 'xpconnect' || (!masterConfig.env && 351 | typeof Components !== 'undefined' && Components.classes && 352 | Components.interfaces)) { 353 | //Avert your gaze! 354 | Cc = Components.classes; 355 | Ci = Components.interfaces; 356 | Components.utils['import']('resource://gre/modules/FileUtils.jsm'); 357 | xpcIsWindows = ('@mozilla.org/windows-registry-key;1' in Cc); 358 | 359 | text.get = function (url, callback) { 360 | var inStream, convertStream, fileObj, 361 | readData = {}; 362 | 363 | if (xpcIsWindows) { 364 | url = url.replace(/\//g, '\\'); 365 | } 366 | 367 | fileObj = new FileUtils.File(url); 368 | 369 | //XPCOM, you so crazy 370 | try { 371 | inStream = Cc['@mozilla.org/network/file-input-stream;1'] 372 | .createInstance(Ci.nsIFileInputStream); 373 | inStream.init(fileObj, 1, 0, false); 374 | 375 | convertStream = Cc['@mozilla.org/intl/converter-input-stream;1'] 376 | .createInstance(Ci.nsIConverterInputStream); 377 | convertStream.init(inStream, "utf-8", inStream.available(), 378 | Ci.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER); 379 | 380 | convertStream.readString(inStream.available(), readData); 381 | convertStream.close(); 382 | inStream.close(); 383 | callback(readData.value); 384 | } catch (e) { 385 | throw new Error((fileObj && fileObj.path || '') + ': ' + e); 386 | } 387 | }; 388 | } 389 | return text; 390 | }); 391 | -------------------------------------------------------------------------------- /docs/sitemap.xml: -------------------------------------------------------------------------------- 1 | 2 |scikit-rebate was developed in the Computational Genetics Lab with funding from the NIH. We are incredibly grateful for their support during the development of this project.
124 | 125 |