├── .coveragerc ├── .github └── workflows │ ├── docbuild.yml │ └── pythonpackage.yml ├── .gitignore ├── .nojekyll ├── LICENSE ├── MANIFEST.in ├── README.md ├── docs ├── Makefile ├── _static │ ├── css │ │ └── project-template.css │ ├── eScience_Logo_HR.png │ └── js │ │ └── copybutton.js ├── _templates │ ├── class.rst │ ├── function.rst │ └── numpydoc_docstring.py ├── api.rst ├── conf.py ├── contributing.rst ├── index.rst └── installation_guide.rst ├── examples ├── README.txt ├── plot_mpg.py ├── plot_mpg_svr.py └── plot_spam.py ├── forestci ├── __init__.py ├── calibration.py ├── due.py ├── forestci.py ├── tests │ ├── __init__.py │ └── test_forestci.py └── version.py ├── paper ├── paper.bib ├── paper.html ├── paper.md ├── plot_mpg.png ├── plot_mpg_no_variance.png ├── plot_spam.png └── plot_spam_no_variance.png ├── requirements-dev.txt ├── requirements.txt ├── setup.cfg └── setup.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | show_missing = True 3 | -------------------------------------------------------------------------------- /.github/workflows/docbuild.yml: -------------------------------------------------------------------------------- 1 | name: Documentation build 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: ubuntu-latest 9 | strategy: 10 | max-parallel: 4 11 | matrix: 12 | python-version: [3.9] 13 | 14 | steps: 15 | - name: Checkout repo 16 | uses: actions/checkout@v4 17 | - name: Set up Python ${{ matrix.python-version }} 18 | uses: actions/setup-python@v5 19 | with: 20 | python-version: ${{ matrix.python-version }} 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install -r requirements.txt 25 | pip install -r requirements-dev.txt 26 | pip install . 27 | - name: Build docs 28 | run: | 29 | cd docs 30 | make html 31 | - name: Upload docs 32 | uses: actions/upload-artifact@v4 33 | with: 34 | name: docs 35 | path: docs/_build/html 36 | - name: Publish docs to Github Pages 37 | if: startsWith(github.event.ref, 'refs/tags') 38 | uses: JamesIves/github-pages-deploy-action@releases/v4 39 | with: 40 | token: ${{ secrets.GITHUB_TOKEN }} 41 | branch: gh-pages # The branch the action should deploy to. 42 | folder: 'docs/_build/html' # The folder the action should deploy. 43 | -------------------------------------------------------------------------------- /.github/workflows/pythonpackage.yml: -------------------------------------------------------------------------------- 1 | name: Python package 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: ubuntu-latest 9 | strategy: 10 | max-parallel: 4 11 | matrix: 12 | python-version: [3.9] 13 | 14 | steps: 15 | - uses: actions/checkout@v4 16 | - name: Set up Python ${{ matrix.python-version }} 17 | uses: actions/setup-python@v5 18 | with: 19 | python-version: ${{ matrix.python-version }} 20 | - name: Install dependencies 21 | run: | 22 | python -m pip install --upgrade pip 23 | pip install -r requirements.txt 24 | pip install -r requirements-dev.txt 25 | pip install . 26 | - name: Test with pytest 27 | run: | 28 | pytest forestci --doctest-modules 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # scikit-learn specific 10 | doc/_build/ 11 | doc/auto_examples/ 12 | doc/modules/generated/ 13 | doc/datasets/generated/ 14 | 15 | # Distribution / packaging 16 | 17 | .Python 18 | env/ 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *,cover 53 | .hypothesis/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | 62 | # Sphinx documentation 63 | docs/_build/ 64 | 65 | # PyBuilder 66 | target/ 67 | 68 | # Mac files 69 | .DS_Store 70 | 71 | # Paper files 72 | paper/references/* 73 | paper/*.txt 74 | 75 | # Jupiter notebooks 76 | .ipynb_checkpoints 77 | 78 | # pyenv environment 79 | .python-version -------------------------------------------------------------------------------- /.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/forest-confidence-interval/5241344a342810b3ea21413bf2ebcf4e90ec6976/.nojekyll -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2016, Ariel Rokem, Bryna Hazelton, Kivan Polimis (The University of Washington eScience Institute) 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # `forestci`: confidence intervals for Forest algorithms 2 | 3 | [![Travis Status](https://travis-ci.org/scikit-learn-contrib/forest-confidence-interval.svg?branch=master)](https://travis-ci.org/scikit-learn-contrib/forest-confidence-interval) 4 | [![Coveralls Status](https://coveralls.io/repos/scikit-learn-contrib/forest-confidence-interval/badge.svg?branch=master&service=github)](https://coveralls.io/r/scikit-learn-contrib/forest-confidence-interval) 5 | [![CircleCI Status](https://circleci.com/gh/scikit-learn-contrib/forest-confidence-interval.svg?style=shield&circle-token=:circle-token)](https://circleci.com/gh/scikit-learn-contrib/forest-confidence-interval/tree/master) 6 | [![status](http://joss.theoj.org/papers/b40f03cc069b43b341a92bd26b660f35/status.svg)](http://joss.theoj.org/papers/b40f03cc069b43b341a92bd26b660f35) 7 | 8 | Forest algorithms are powerful [ensemble methods](http://scikit-learn.org/stable/modules/classes.html#module-sklearn.ensemble) for classification and regression. 9 | However, predictions from these algorithms do contain some amount of error. 10 | Prediction variability can illustrate how influential 11 | the training set is for producing the observed random forest predictions. 12 | 13 | `forest-confidence-interval` is a Python module that adds a calculation of 14 | variance and computes confidence intervals to the basic functionality 15 | implemented in scikit-learn random forest regression or classification objects. 16 | The core functions calculate an in-bag and error bars for random forest 17 | objects. 18 | 19 | This module is based on R code from Stefan Wager 20 | ([`randomForestCI`](https://github.com/swager/randomForestCI) deprecated in favor of [`grf`](https://github.com/swager/grf)) 21 | and is licensed under the MIT open source license (see [LICENSE](LICENSE)). 22 | The present project makes the algorithm compatible with [`scikit-learn`](https://scikit-learn.org/stable/). 23 | 24 | To get the proper confidence interval, you need to use a large number of trees (estimators). 25 | The [calibration routine](https://github.com/scikit-learn-contrib/forest-confidence-interval/pull/114) 26 | (which can be included or excluded on top of the algorithm) tries to extrapolate 27 | the results for an infinite number of trees, but it is instable and it can cause numerical errors: 28 | if this is the case, the suggestion is to exclude it with `calibrate=False` 29 | and test increasing the number of trees in the model to reach convergence. 30 | 31 | ## Installation and Usage 32 | 33 | Before installing the module you will need `numpy`, `scipy` and `scikit-learn`. 34 | 35 | To install `forest-confidence-interval` execute: 36 | ``` 37 | pip install forestci 38 | ``` 39 | If would like to install the development version of the software use: 40 | 41 | ```shell 42 | pip install git+git://github.com/scikit-learn-contrib/forest-confidence-interval.git 43 | ``` 44 | 45 | Usage: 46 | 47 | ```python 48 | import forestci as fci 49 | ci = fci.random_forest_error( 50 | forest=model, # scikit-learn Forest model fitted on X_train 51 | X_train_shape=X_train.shape, 52 | X_test=X, # the samples you want to compute the CI 53 | inbag=None, 54 | calibrate=True, 55 | memory_constrained=False, 56 | memory_limit=None, 57 | y_output=0 # in case of multioutput model, consider target 0 58 | ) 59 | ``` 60 | 61 | ## Examples 62 | 63 | The examples (gallery below) demonstrates the package functionality with random forest classifiers and regression models. 64 | The regression example uses a popular UCI Machine Learning data set on cars while the classifier example simulates how to add measurements of uncertainty to tasks like predicting spam emails. 65 | 66 | [Examples gallery](http://contrib.scikit-learn.org/forest-confidence-interval/auto_examples/index.html) 67 | 68 | ## Contributing 69 | 70 | Contributions are very welcome, but we ask that contributors abide by the 71 | [contributor covenant](http://contributor-covenant.org/version/1/4/). 72 | 73 | To report issues with the software, please post to the 74 | [issue log](https://github.com/scikit-learn-contrib/forest-confidence-interval/issues) 75 | Bug reports are also appreciated, please add them to the issue log after 76 | verifying that the issue does not already exist. 77 | Comments on existing issues are also welcome. 78 | 79 | Please submit improvements as pull requests against the repo after verifying 80 | that the existing tests pass and any new code is well covered by unit tests. 81 | Please write code that complies with the Python style guide, 82 | [PEP8](https://www.python.org/dev/peps/pep-0008/). 83 | 84 | E-mail [Ariel Rokem](mailto:arokem@gmail.com), [Kivan Polimis](mailto:kivan.polimis@gmail.com), or [Bryna Hazelton](mailto:brynah@phys.washington.edu ) if you have any questions, suggestions or feedback. 85 | 86 | ## Testing 87 | 88 | Requires installation of `pytest` package. 89 | 90 | Tests are located in the `forestci/tests` folder and can be run with this command in the root directory: 91 | 92 | ```shell 93 | pytest forestci --doctest-modules 94 | ``` 95 | 96 | ## Citation 97 | 98 | Click on the JOSS status badge for the Journal of Open Source Software article on this project. 99 | The BibTeX citation for the JOSS article is below: 100 | 101 | ``` 102 | @article{polimisconfidence, 103 | title={Confidence Intervals for Random Forests in Python}, 104 | author={Polimis, Kivan and Rokem, Ariel and Hazelton, Bryna}, 105 | journal={Journal of Open Source Software}, 106 | volume={2}, 107 | number={1}, 108 | year={2017} 109 | } 110 | ``` 111 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | -rm -rf $(BUILDDIR)/* 51 | -rm -rf auto_examples/ 52 | -rm -rf generated/* 53 | -rm -rf modules/generated/* 54 | 55 | html: 56 | # These two lines make the build a bit more lengthy, and the 57 | # the embedding of images more robust 58 | rm -rf $(BUILDDIR)/html/_images 59 | #rm -rf _build/doctrees/ 60 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 61 | @echo 62 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 63 | 64 | dirhtml: 65 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 66 | @echo 67 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 68 | 69 | singlehtml: 70 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 71 | @echo 72 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 73 | 74 | pickle: 75 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 76 | @echo 77 | @echo "Build finished; now you can process the pickle files." 78 | 79 | json: 80 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 81 | @echo 82 | @echo "Build finished; now you can process the JSON files." 83 | 84 | htmlhelp: 85 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 86 | @echo 87 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 88 | ".hhp project file in $(BUILDDIR)/htmlhelp." 89 | 90 | qthelp: 91 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 92 | @echo 93 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 94 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 95 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/fracridge.qhcp" 96 | @echo "To view the help file:" 97 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/fracridge.qhc" 98 | 99 | devhelp: 100 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 101 | @echo 102 | @echo "Build finished." 103 | @echo "To view the help file:" 104 | @echo "# mkdir -p $$HOME/.local/share/devhelp/fracridge" 105 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/fracridge" 106 | @echo "# devhelp" 107 | 108 | epub: 109 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 110 | @echo 111 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 112 | 113 | latex: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo 116 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 117 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 118 | "(use \`make latexpdf' here to do that automatically)." 119 | 120 | latexpdf: 121 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 122 | @echo "Running LaTeX files through pdflatex..." 123 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 124 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 125 | 126 | latexpdfja: 127 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 128 | @echo "Running LaTeX files through platex and dvipdfmx..." 129 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 130 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 131 | 132 | text: 133 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 134 | @echo 135 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 136 | 137 | man: 138 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 139 | @echo 140 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 141 | 142 | texinfo: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo 145 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 146 | @echo "Run \`make' in that directory to run these through makeinfo" \ 147 | "(use \`make info' here to do that automatically)." 148 | 149 | info: 150 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 151 | @echo "Running Texinfo files through makeinfo..." 152 | make -C $(BUILDDIR)/texinfo info 153 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 154 | 155 | gettext: 156 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 157 | @echo 158 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 159 | 160 | changes: 161 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 162 | @echo 163 | @echo "The overview file is in $(BUILDDIR)/changes." 164 | 165 | linkcheck: 166 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 167 | @echo 168 | @echo "Link check complete; look for any errors in the above output " \ 169 | "or in $(BUILDDIR)/linkcheck/output.txt." 170 | 171 | doctest: 172 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 173 | @echo "Testing of doctests in the sources finished, look at the " \ 174 | "results in $(BUILDDIR)/doctest/output.txt." 175 | 176 | xml: 177 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 178 | @echo 179 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 180 | 181 | pseudoxml: 182 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 183 | @echo 184 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 185 | -------------------------------------------------------------------------------- /docs/_static/css/project-template.css: -------------------------------------------------------------------------------- 1 | @import url("theme.css"); 2 | 3 | .highlight a { 4 | text-decoration: underline; 5 | } 6 | 7 | .deprecated p { 8 | padding: 10px 7px 10px 10px; 9 | color: #b94a48; 10 | background-color: #F3E5E5; 11 | border: 1px solid #eed3d7; 12 | } 13 | 14 | .deprecated p span.versionmodified { 15 | font-weight: bold; 16 | } 17 | -------------------------------------------------------------------------------- /docs/_static/eScience_Logo_HR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/forest-confidence-interval/5241344a342810b3ea21413bf2ebcf4e90ec6976/docs/_static/eScience_Logo_HR.png -------------------------------------------------------------------------------- /docs/_static/js/copybutton.js: -------------------------------------------------------------------------------- 1 | $(document).ready(function() { 2 | /* Add a [>>>] button on the top-right corner of code samples to hide 3 | * the >>> and ... prompts and the output and thus make the code 4 | * copyable. */ 5 | var div = $('.highlight-python .highlight,' + 6 | '.highlight-python3 .highlight,' + 7 | '.highlight-pycon .highlight,' + 8 | '.highlight-default .highlight') 9 | var pre = div.find('pre'); 10 | 11 | // get the styles from the current theme 12 | pre.parent().parent().css('position', 'relative'); 13 | var hide_text = 'Hide the prompts and output'; 14 | var show_text = 'Show the prompts and output'; 15 | var border_width = pre.css('border-top-width'); 16 | var border_style = pre.css('border-top-style'); 17 | var border_color = pre.css('border-top-color'); 18 | var button_styles = { 19 | 'cursor':'pointer', 'position': 'absolute', 'top': '0', 'right': '0', 20 | 'border-color': border_color, 'border-style': border_style, 21 | 'border-width': border_width, 'color': border_color, 'text-size': '75%', 22 | 'font-family': 'monospace', 'padding-left': '0.2em', 'padding-right': '0.2em', 23 | 'border-radius': '0 3px 0 0' 24 | } 25 | 26 | // create and add the button to all the code blocks that contain >>> 27 | div.each(function(index) { 28 | var jthis = $(this); 29 | if (jthis.find('.gp').length > 0) { 30 | var button = $('>>>'); 31 | button.css(button_styles) 32 | button.attr('title', hide_text); 33 | button.data('hidden', 'false'); 34 | jthis.prepend(button); 35 | } 36 | // tracebacks (.gt) contain bare text elements that need to be 37 | // wrapped in a span to work with .nextUntil() (see later) 38 | jthis.find('pre:has(.gt)').contents().filter(function() { 39 | return ((this.nodeType == 3) && (this.data.trim().length > 0)); 40 | }).wrap(''); 41 | }); 42 | 43 | // define the behavior of the button when it's clicked 44 | $('.copybutton').click(function(e){ 45 | e.preventDefault(); 46 | var button = $(this); 47 | if (button.data('hidden') === 'false') { 48 | // hide the code output 49 | button.parent().find('.go, .gp, .gt').hide(); 50 | button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'hidden'); 51 | button.css('text-decoration', 'line-through'); 52 | button.attr('title', show_text); 53 | button.data('hidden', 'true'); 54 | } else { 55 | // show the code output 56 | button.parent().find('.go, .gp, .gt').show(); 57 | button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'visible'); 58 | button.css('text-decoration', 'none'); 59 | button.attr('title', hide_text); 60 | button.data('hidden', 'false'); 61 | } 62 | }); 63 | }); 64 | -------------------------------------------------------------------------------- /docs/_templates/class.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | {% block methods %} 9 | .. automethod:: __init__ 10 | {% endblock %} 11 | 12 | .. include:: {{module}}.{{objname}}.examples 13 | 14 | .. raw:: html 15 | 16 |
17 | -------------------------------------------------------------------------------- /docs/_templates/function.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}==================== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autofunction:: {{ objname }} 7 | 8 | .. include:: {{module}}.{{objname}}.examples 9 | 10 | .. raw:: html 11 | 12 |
13 | -------------------------------------------------------------------------------- /docs/_templates/numpydoc_docstring.py: -------------------------------------------------------------------------------- 1 | {{index}} 2 | {{summary}} 3 | {{extended_summary}} 4 | {{parameters}} 5 | {{returns}} 6 | {{yields}} 7 | {{other_parameters}} 8 | {{attributes}} 9 | {{raises}} 10 | {{warns}} 11 | {{warnings}} 12 | {{see_also}} 13 | {{notes}} 14 | {{references}} 15 | {{examples}} 16 | {{methods}} 17 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | #################### 2 | ``forestci`` API 3 | #################### 4 | 5 | 6 | .. autosummary:: 7 | :toctree: generated/ 8 | :template: function.rst 9 | 10 | forestci.random_forest_error 11 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # forestci sphinx configuration file, based on the sklearn 3 | # project-template documentation build configuration file, created by 4 | # sphinx-quickstart on Mon Jan 18 14:44:12 2016. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | 18 | import sphinx_gallery 19 | import sphinx_rtd_theme 20 | 21 | # If extensions (or modules to document with autodoc) are in another directory, 22 | # add these directories to sys.path here. If the directory is relative to the 23 | # documentation root, use os.path.abspath to make it absolute, like shown here. 24 | #sys.path.insert(0, os.path.abspath('.')) 25 | 26 | # -- General configuration ------------------------------------------------ 27 | 28 | # If your documentation needs a minimal Sphinx version, state it here. 29 | #needs_sphinx = '1.0' 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 33 | # ones. 34 | extensions = [ 35 | 'sphinx.ext.autodoc', 36 | 'sphinx.ext.autosummary', 37 | 'sphinx.ext.doctest', 38 | 'sphinx.ext.intersphinx', 39 | 'sphinx.ext.viewcode', 40 | 'numpydoc', 41 | 'sphinx_gallery.gen_gallery', 42 | 'sphinx.ext.githubpages', 43 | ] 44 | 45 | 46 | # this is needed for some reason... 47 | # see https://github.com/numpy/numpydoc/issues/69 48 | numpydoc_show_class_members = False 49 | 50 | # pngmath / imgmath compatibility layer for different sphinx versions 51 | import sphinx 52 | from distutils.version import LooseVersion 53 | if LooseVersion(sphinx.__version__) < LooseVersion('1.4'): 54 | extensions.append('sphinx.ext.pngmath') 55 | else: 56 | extensions.append('sphinx.ext.imgmath') 57 | 58 | # autodoc_default_flags = ['members', 'inherited-members'] 59 | 60 | # autodoc_default_options = True 61 | 62 | autodoc_default_options = { 63 | 'members': True, 64 | 'inherited-members': True} 65 | 66 | # Add any paths that contain templates here, relative to this directory. 67 | templates_path = ['_templates'] 68 | 69 | # generate autosummary even if no references 70 | autosummary_generate = True 71 | 72 | # The suffix of source filenames. 73 | source_suffix = '.rst' 74 | 75 | # The encoding of source files. 76 | #source_encoding = 'utf-8-sig' 77 | 78 | # Generate the plots for the gallery 79 | # plot_gallery = True 80 | 81 | # The master toctree document. 82 | master_doc = 'index' 83 | 84 | # -- Project information ----------------------------------------------------- 85 | 86 | project = 'forestci' 87 | copyright = '2016--, Kivan Polimis, Ariel Rokem, Bryna Hazelton, The University of Washington' 88 | author = "Kivan Polimis, Ariel Rokem, and Bryna Hazelton" 89 | 90 | # The version info for the project you're documenting, acts as replacement for 91 | # |version| and |release|, also used in various other places throughout the 92 | # built documents. 93 | # 94 | # The short X.Y version. 95 | from forestci import __version__ 96 | version = __version__ 97 | # The full version, including alpha/beta/rc tags. 98 | release = __version__ 99 | 100 | # The language for content autogenerated by Sphinx. Refer to documentation 101 | # for a list of supported languages. 102 | #language = None 103 | 104 | # There are two options for replacing |today|: either, you set today to some 105 | # non-false value, then it is used: 106 | #today = '' 107 | # Else, today_fmt is used as the format for a strftime call. 108 | #today_fmt = '%B %d, %Y' 109 | 110 | # List of patterns, relative to source directory, that match files and 111 | # directories to ignore when looking for source files. 112 | exclude_patterns = ['_build', '_templates'] 113 | 114 | # The reST default role (used for this markup: `text`) to use for all 115 | # documents. 116 | #default_role = None 117 | 118 | # If true, '()' will be appended to :func: etc. cross-reference text. 119 | #add_function_parentheses = True 120 | 121 | # If true, the current module name will be prepended to all description 122 | # unit titles (such as .. function::). 123 | add_module_names = False 124 | 125 | # If true, sectionauthor and moduleauthor directives will be shown in the 126 | # output. They are ignored by default. 127 | #show_authors = False 128 | 129 | # The name of the Pygments (syntax highlighting) style to use. 130 | pygments_style = 'sphinx' 131 | 132 | # Custom style 133 | html_style = 'css/project-template.css' 134 | 135 | # A list of ignored prefixes for module index sorting. 136 | #modindex_common_prefix = [] 137 | 138 | # If true, keep warnings as "system message" paragraphs in the built documents. 139 | #keep_warnings = False 140 | 141 | 142 | # -- Options for HTML output ---------------------------------------------- 143 | 144 | # The theme to use for HTML and HTML Help pages. See the documentation for 145 | # a list of builtin themes. 146 | html_theme = 'sphinx_rtd_theme' 147 | 148 | # Theme options are theme-specific and customize the look and feel of a theme 149 | # further. For a list of options available for each theme, see the 150 | # documentation. 151 | #html_theme_options = {} 152 | 153 | # Add any paths that contain custom themes here, relative to this directory. 154 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 155 | 156 | # The name for this set of Sphinx documents. If None, it defaults to 157 | # " v documentation". 158 | #html_title = None 159 | 160 | # A shorter title for the navigation bar. Default is the same as html_title. 161 | #html_short_title = None 162 | 163 | # The name of an image file (relative to this directory) to place at the top 164 | # of the sidebar. 165 | #html_logo = None 166 | 167 | # The name of an image file (within the static path) to use as favicon of the 168 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 169 | # pixels large. 170 | #html_favicon = None 171 | 172 | # Add any paths that contain custom static files (such as style sheets) here, 173 | # relative to this directory. They are copied after the builtin static files, 174 | # so a file named "default.css" will overwrite the builtin "default.css". 175 | html_static_path = ['_static'] 176 | 177 | # Add any extra paths that contain custom files (such as robots.txt or 178 | # .htaccess) here, relative to this directory. These files are copied 179 | # directly to the root of the documentation. 180 | #html_extra_path = [] 181 | 182 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 183 | # using the given strftime format. 184 | #html_last_updated_fmt = '%b %d, %Y' 185 | 186 | # If true, SmartyPants will be used to convert quotes and dashes to 187 | # typographically correct entities. 188 | #html_use_smartypants = True 189 | 190 | # Custom sidebar templates, maps document names to template names. 191 | #html_sidebars = {} 192 | 193 | # Additional templates that should be rendered to pages, maps page names to 194 | # template names. 195 | #html_additional_pages = {} 196 | 197 | # If false, no module index is generated. 198 | #html_domain_indices = True 199 | 200 | # If false, no index is generated. 201 | #html_use_index = True 202 | 203 | # If true, the index is split into individual pages for each letter. 204 | #html_split_index = False 205 | 206 | # If true, links to the reST sources are added to the pages. 207 | #html_show_sourcelink = True 208 | 209 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 210 | #html_show_sphinx = True 211 | 212 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 213 | #html_show_copyright = True 214 | 215 | # If true, an OpenSearch description file will be output, and all pages will 216 | # contain a tag referring to it. The value of this option must be the 217 | # base URL from which the finished HTML is served. 218 | #html_use_opensearch = '' 219 | 220 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 221 | #html_file_suffix = None 222 | 223 | # Output file base name for HTML help builder. 224 | htmlhelp_basename = 'forestcidoc' 225 | 226 | 227 | # Example configuration for intersphinx: refer to the Python standard library. 228 | # intersphinx configuration 229 | intersphinx_mapping = { 230 | 'python': ('https://docs.python.org/{.major}'.format( 231 | sys.version_info), None), 232 | 'numpy': ('https://docs.scipy.org/doc/numpy/', None), 233 | 'scipy': ('https://docs.scipy.org/doc/scipy/reference', None), 234 | 'matplotlib': ('https://matplotlib.org/', None), 235 | 'sklearn': ('http://scikit-learn.org/stable', None) 236 | } 237 | 238 | # sphinx-gallery configuration 239 | sphinx_gallery_conf = { 240 | 'doc_module': 'forestci', 241 | 'backreferences_dir': os.path.join('generated'), 242 | 'reference_url': { 243 | 'forestci': None} 244 | } 245 | 246 | def setup(app): 247 | # a copy button to copy snippet of code from the documentation 248 | app.add_js_file('js/copybutton.js') 249 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | ########################################## 2 | Contributing to ``forestci`` development 3 | ########################################## 4 | 5 | We welcome suggestions and contributions. To contribute to the software 6 | please submit a pull request with the proposed changes. If you are planning 7 | a large change to the software, it is a good idea to first submit an issue 8 | to discuss the changes that you are proposing to implement. 9 | 10 | 11 | Code of Conduct 12 | ---------------- 13 | 14 | We aim to make the use of this project and contribution to it a harassment-free 15 | experience for everyone, regardless of age, body size, visible or invisible 16 | disability, ethnicity, sex characteristics, gender identity and expression, 17 | level of experience, education, socio-economic status, nationality, personal 18 | appearance, race, religion, or sexual identity and orientation. 19 | 20 | We pledge to act and interact in ways that contribute to an open, welcoming, 21 | diverse, inclusive, and healthy community. 22 | 23 | Our Standards 24 | ~~~~~~~~~~~~~~ 25 | 26 | Examples of behavior that contributes to a positive environment for our 27 | community include: 28 | 29 | * Demonstrating empathy and kindness toward other people 30 | * Being respectful of differing opinions, viewpoints, and experiences 31 | * Giving and gracefully accepting constructive feedback 32 | * Accepting responsibility and apologizing to those affected by our mistakes, 33 | and learning from the experience 34 | * Focusing on what is best not just for us as individuals, but for the 35 | overall community 36 | 37 | Examples of unacceptable behavior include: 38 | 39 | * The use of sexualized language or imagery, and sexual attention or 40 | advances of any kind 41 | * Trolling, insulting or derogatory comments, and personal or political attacks 42 | * Public or private harassment 43 | * Publishing others' private information, such as a physical or email 44 | address, without their explicit permission 45 | * Other conduct which could reasonably be considered inappropriate in a 46 | professional setting 47 | 48 | Enforcement Responsibilities 49 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 50 | 51 | Community leaders are responsible for clarifying and enforcing our standards of 52 | acceptable behavior and will take appropriate and fair corrective action in 53 | response to any behavior that they deem inappropriate, threatening, offensive, 54 | or harmful. 55 | 56 | Community leaders have the right and responsibility to remove, edit, or reject 57 | comments, commits, code, wiki edits, issues, and other contributions that are 58 | not aligned to this Code of Conduct, and will communicate reasons for moderation 59 | decisions when appropriate. 60 | 61 | Scope 62 | ~~~~~ 63 | 64 | This Code of Conduct applies within all community spaces, and also applies when 65 | an individual is officially representing the community in public spaces. 66 | Examples of representing our community include using an official e-mail address, 67 | posting via an official social media account, or acting as an appointed 68 | representative at an online or offline event. 69 | 70 | Enforcement 71 | ~~~~~~~~~~~~~~ 72 | 73 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 74 | reported to the community leaders responsible for enforcement at 75 | `arokem@gmail.com `_ 76 | All complaints will be reviewed and investigated promptly and fairly. 77 | 78 | All community leaders are obligated to respect the privacy and security of the 79 | reporter of any incident. 80 | 81 | Enforcement Guidelines 82 | ~~~~~~~~~~~~~~~~~~~~~~~ 83 | Community leaders will follow these Community Impact Guidelines in determining 84 | the consequences for any action they deem in violation of this Code of Conduct: 85 | 86 | 1. Correction 87 | 88 | **Community Impact**: Use of inappropriate language or other behavior deemed 89 | unprofessional or unwelcome in the community. 90 | 91 | **Consequence**: A private, written warning from community leaders, providing 92 | clarity around the nature of the violation and an explanation of why the 93 | behavior was inappropriate. A public apology may be requested. 94 | 95 | 2. Warning 96 | 97 | **Community Impact**: A violation through a single incident or series 98 | of actions. 99 | 100 | **Consequence**: A warning with consequences for continued behavior. No 101 | interaction with the people involved, including unsolicited interaction with 102 | those enforcing the Code of Conduct, for a specified period of time. This 103 | includes avoiding interactions in community spaces as well as external channels 104 | like social media. Violating these terms may lead to a temporary or 105 | permanent ban. 106 | 107 | 3. Temporary Ban 108 | 109 | **Community Impact**: A serious violation of community standards, including 110 | sustained inappropriate behavior. 111 | 112 | **Consequence**: A temporary ban from any sort of interaction or public 113 | communication with the community for a specified period of time. No public or 114 | private interaction with the people involved, including unsolicited interaction 115 | with those enforcing the Code of Conduct, is allowed during this period. 116 | Violating these terms may lead to a permanent ban. 117 | 118 | 4. Permanent Ban 119 | 120 | **Community Impact**: Demonstrating a pattern of violation of community 121 | standards, including sustained inappropriate behavior, harassment of an 122 | individual, or aggression toward or disparagement of classes of individuals. 123 | 124 | **Consequence**: A permanent ban from any sort of public interaction within 125 | the community. 126 | 127 | Attribution 128 | ~~~~~~~~~~~~ 129 | This Code of Conduct is adapted from the Contributor Covenant homepage 130 | version 2.0, available at: 131 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 132 | 133 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 134 | enforcement ladder](https://github.com/mozilla/diversity). 135 | 136 | For answers to common questions about this code of conduct, see the FAQ at 137 | https://www.contributor-covenant.org/faq. Translations are available at 138 | https://www.contributor-covenant.org/translations. 139 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Confidence Intervals for Scikit Learn Random Forests 3 | ===================================================== 4 | 5 | Random forest algorithms are useful for both classification and regression 6 | problems. This package adds to scikit-learn the ability to calculate confidence 7 | intervals of the predictions generated from scikit-learn 8 | :class:`sklearn.ensemble.RandomForestRegressor` and :class:`sklearn.ensemble.RandomForestClassifier` objects. 9 | 10 | This is an implementation of an algorithm developed by Wager et al. [Wager2014]_ 11 | and previously implemented in R (`here `_). 12 | 13 | To examine and download the source code, visit our `github repo `_. 14 | 15 | .. [Wager2014] S. Wager, T. Hastie, B. Efron. "Confidence Intervals for 16 | Random Forests: The Jackknife and the Infinitesimal Jackknife", Journal 17 | of Machine Learning Research vol. 15, pp. 1625-1651, 2014. 18 | 19 | .. toctree:: 20 | :maxdepth: 2 21 | 22 | installation_guide 23 | api 24 | auto_examples/index 25 | contributing 26 | 27 | .. figure:: _static/eScience_Logo_HR.png 28 | :align: center 29 | :figclass: align-center 30 | :target: http://escience.washington.edu 31 | 32 | Acknowledgements: this work was supported by a grant from the 33 | `Gordon & Betty Moore Foundation `_, and from the 34 | `Alfred P. Sloan Foundation `_ to the 35 | `University of Washington eScience Institute `_ , and through a grant from the `Bill & Melinda Gates Foundation `_. 36 | -------------------------------------------------------------------------------- /docs/installation_guide.rst: -------------------------------------------------------------------------------- 1 | .. _installation_guide: 2 | 3 | Installation Guide 4 | ================== 5 | 6 | Before installing the `forestci` module, you will need `numpy`, `scipy` 7 | and `scikit-learn` 8 | 9 | .. code-block:: bash 10 | 11 | pip install numpy scipy scikit-learn 12 | 13 | Then, to install `forestci`: 14 | 15 | .. code-block:: bash 16 | 17 | pip install forestci 18 | 19 | If you wish to install from the source code (available `here `_ ), change your working directory to the top-level directory of the source code, and issue: 20 | 21 | .. code-block:: bash 22 | 23 | python setup.py install 24 | -------------------------------------------------------------------------------- /examples/README.txt: -------------------------------------------------------------------------------- 1 | .. _general_examples: 2 | 3 | Examples 4 | ========= 5 | 6 | The examples use data from standard machine learning libraries to demonstrate 7 | how `forestci` can be used to calculate error bars on 8 | :class:`RandomForestRegressor` and :class:`RandomForestClassifier` objects. The 9 | regression example uses a data-set from the `UC Irvine Machine Learning Repository `_ with features of 10 | different cars and their MPG. The classification example generates synthetic 11 | data to simulate a task like that of a spam filter: classifying items into one 12 | of two categories (e.g., spam/non-spam) based on a number of features. 13 | -------------------------------------------------------------------------------- /examples/plot_mpg.py: -------------------------------------------------------------------------------- 1 | """ 2 | ====================================== 3 | Plotting Regression Forest Error Bars 4 | ====================================== 5 | 6 | This example demonstrates using `forestci` to calculate the error bars of 7 | the predictions of a :class:`sklearn.ensemble.RandomForestRegressor` object. 8 | 9 | The data used here are a classical machine learning data-set, describing 10 | various features of different cars, and their MPG. 11 | """ 12 | 13 | # Regression Forest Example 14 | import numpy as np 15 | from matplotlib import pyplot as plt 16 | from sklearn.ensemble import RandomForestRegressor 17 | import sklearn.model_selection as xval 18 | from sklearn.datasets import fetch_openml 19 | import forestci as fci 20 | 21 | # retreive mpg data from machine learning library 22 | mpg_data = fetch_openml(data_id=196) 23 | 24 | # separate mpg data into predictors and outcome variable 25 | mpg_X = mpg_data["data"] 26 | mpg_y = mpg_data["target"] 27 | 28 | # remove rows where the data is nan 29 | not_null_sel = np.where(mpg_X.isna().sum(axis=1).values == 0) 30 | mpg_X = mpg_X.values[not_null_sel] 31 | mpg_y = mpg_y.values[not_null_sel] 32 | 33 | # split mpg data into training and test set 34 | mpg_X_train, mpg_X_test, mpg_y_train, mpg_y_test = xval.train_test_split( 35 | mpg_X, 36 | mpg_y, 37 | test_size=0.25, 38 | random_state=42) 39 | 40 | # Create RandomForestRegressor 41 | n_trees = 2000 42 | mpg_forest = RandomForestRegressor(n_estimators=n_trees, random_state=42) 43 | mpg_forest.fit(mpg_X_train, mpg_y_train) 44 | mpg_y_hat = mpg_forest.predict(mpg_X_test) 45 | 46 | # Plot predicted MPG without error bars 47 | plt.scatter(mpg_y_test, mpg_y_hat) 48 | plt.plot([5, 45], [5, 45], 'k--') 49 | plt.xlabel('Reported MPG') 50 | plt.ylabel('Predicted MPG') 51 | plt.show() 52 | 53 | # Calculate the variance 54 | mpg_V_IJ_unbiased = fci.random_forest_error(mpg_forest, mpg_X_train.shape, 55 | mpg_X_test) 56 | 57 | # Plot error bars for predicted MPG using unbiased variance 58 | plt.errorbar(mpg_y_test, mpg_y_hat, yerr=np.sqrt(mpg_V_IJ_unbiased), fmt='o') 59 | plt.plot([5, 45], [5, 45], 'k--') 60 | plt.xlabel('Reported MPG') 61 | plt.ylabel('Predicted MPG') 62 | plt.show() 63 | -------------------------------------------------------------------------------- /examples/plot_mpg_svr.py: -------------------------------------------------------------------------------- 1 | """ 2 | ====================================== 3 | Plotting Bagging Regression Error Bars 4 | ====================================== 5 | 6 | This example demonstrates using `forestci` to calculate the error bars of 7 | the predictions of a :class:`sklearn.ensemble.BaggingRegressor` object. 8 | 9 | The data used here are a classical machine learning data-set, describing 10 | various features of different cars, and their MPG. 11 | """ 12 | 13 | # Regression Forest Example 14 | import numpy as np 15 | from matplotlib import pyplot as plt 16 | from sklearn.ensemble import BaggingRegressor 17 | from sklearn.svm import SVR 18 | import sklearn.model_selection as xval 19 | from sklearn.datasets import fetch_openml 20 | import forestci as fci 21 | 22 | # retreive mpg data from machine learning library 23 | mpg_data = fetch_openml(data_id=196) 24 | 25 | # separate mpg data into predictors and outcome variable 26 | mpg_X = mpg_data["data"] 27 | mpg_y = mpg_data["target"] 28 | 29 | # remove rows where the data is nan 30 | not_null_sel = np.where(mpg_X.isna().sum(axis=1).values == 0) 31 | mpg_X = mpg_X.values[not_null_sel] 32 | mpg_y = mpg_y.values[not_null_sel] 33 | 34 | # split mpg data into training and test set 35 | mpg_X_train, mpg_X_test, mpg_y_train, mpg_y_test = xval.train_test_split( 36 | mpg_X, mpg_y, test_size=0.25, random_state=42 37 | ) 38 | 39 | # Create RandomForestRegressor 40 | n_estimators = 1000 41 | mpg_bagger = BaggingRegressor( 42 | estimator=SVR(), n_estimators=n_estimators, random_state=42 43 | ) 44 | mpg_bagger.fit(mpg_X_train, mpg_y_train) 45 | mpg_y_hat = mpg_bagger.predict(mpg_X_test) 46 | 47 | # Plot predicted MPG without error bars 48 | plt.scatter(mpg_y_test, mpg_y_hat) 49 | plt.plot([5, 45], [5, 45], "k--") 50 | plt.xlabel("Reported MPG") 51 | plt.ylabel("Predicted MPG") 52 | plt.show() 53 | 54 | # Calculate the variance 55 | mpg_V_IJ_unbiased = fci.random_forest_error(mpg_bagger, mpg_X_train.shape, mpg_X_test) 56 | 57 | # Plot error bars for predicted MPG using unbiased variance 58 | plt.errorbar(mpg_y_test, mpg_y_hat, yerr=np.sqrt(mpg_V_IJ_unbiased), fmt="o") 59 | plt.plot([5, 45], [5, 45], "k--") 60 | plt.xlabel("Reported MPG") 61 | plt.ylabel("Predicted MPG") 62 | plt.show() 63 | -------------------------------------------------------------------------------- /examples/plot_spam.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================================= 3 | Plotting Classification Forest Error Bars 4 | ========================================= 5 | 6 | This example demonstrates the calculation of confidence intervals for 7 | :class:`sklearn.ensemble.RandomForestClassifier` objects. 8 | 9 | The data used here are synthetically generated to simulate a data-set in which 10 | email messages are labeled as spam based on 20 different features (the default 11 | of :func:`sklearn.datasets.make_classification`). 12 | """ 13 | 14 | import numpy as np 15 | from matplotlib import pyplot as plt 16 | from sklearn.model_selection import train_test_split 17 | from sklearn.ensemble import RandomForestClassifier 18 | import forestci as fci 19 | from sklearn.datasets import make_classification 20 | 21 | spam_X, spam_y = make_classification(5000) 22 | 23 | # split the datainto training and test set 24 | spam_X_train, spam_X_test, spam_y_train, spam_y_test = train_test_split( 25 | spam_X, spam_y, 26 | test_size=0.2) 27 | 28 | # create RandomForestClassifier 29 | n_trees = 500 30 | spam_RFC = RandomForestClassifier(max_features=5, n_estimators=n_trees, 31 | random_state=42) 32 | spam_RFC.fit(spam_X_train, spam_y_train) 33 | spam_y_hat = spam_RFC.predict_proba(spam_X_test) 34 | 35 | idx_spam = np.where(spam_y_test == 1)[0] 36 | idx_ham = np.where(spam_y_test == 0)[0] 37 | 38 | # Histogram predictions without error bars: 39 | fig, ax = plt.subplots(1) 40 | ax.hist(spam_y_hat[idx_spam, 1], histtype='step', label='spam') 41 | ax.hist(spam_y_hat[idx_ham, 1], histtype='step', label='not spam') 42 | ax.set_xlabel('Prediction (spam probability)') 43 | ax.set_ylabel('Number of observations') 44 | plt.legend() 45 | 46 | # Calculate the variance 47 | spam_V_IJ_unbiased = fci.random_forest_error(spam_RFC, spam_X_train.shape, 48 | spam_X_test) 49 | 50 | # Plot forest prediction for emails and standard deviation for estimates 51 | # Blue points are spam emails; Green points are non-spam emails 52 | fig, ax = plt.subplots(1) 53 | ax.scatter(spam_y_hat[idx_spam, 1], 54 | np.sqrt(spam_V_IJ_unbiased[idx_spam]), 55 | label='spam') 56 | 57 | ax.scatter(spam_y_hat[idx_ham, 1], 58 | np.sqrt(spam_V_IJ_unbiased[idx_ham]), 59 | label='not spam') 60 | 61 | ax.set_xlabel('Prediction (spam probability)') 62 | ax.set_ylabel('Standard deviation') 63 | plt.legend() 64 | plt.show() 65 | -------------------------------------------------------------------------------- /forestci/__init__.py: -------------------------------------------------------------------------------- 1 | from .forestci import (calc_inbag, random_forest_error, 2 | _core_computation, _bias_correction) # noqa 3 | 4 | from .version import __version__ # noqa 5 | 6 | __all__ = ("calc_inbag", "random_forest_error") 7 | -------------------------------------------------------------------------------- /forestci/calibration.py: -------------------------------------------------------------------------------- 1 | """ 2 | Calibration based on empirical Bayes estimation [Efron2014]_. 3 | 4 | This calibration procedure can be useful when the number of trees in the 5 | random forest is small. 6 | 7 | """ 8 | import warnings 9 | import numpy as np 10 | from scipy.optimize import minimize 11 | from scipy.signal import fftconvolve 12 | from scipy.stats import norm 13 | from .due import _due, _BibTeX 14 | 15 | __all__ = ("gfit", "gbayes", "calibrateEB") 16 | 17 | 18 | _due.cite(_BibTeX(""" 19 | @ARTICLE{Wager2014-wn, 20 | title = "Two modeling strategies for empirical Bayes estimation.", 21 | author = Efron, Bradley 22 | journal = "Stat. Sci.", 23 | volume = 29, 24 | number = 2, 25 | pages = "285--301", 26 | month = feb, 27 | year = 2014,}"""), 28 | description=("Confidence Intervals for Random", 29 | " Forests: The Jackknife and the Infinitesimal", 30 | "Jackknife"), 31 | path='forestci') 32 | 33 | 34 | def gfit(X, sigma, p=2, nbin=1000, unif_fraction=0.1): 35 | """ 36 | Fit empirical Bayes prior in the hierarchical model [Efron2014]_. 37 | 38 | .. math:: 39 | 40 | mu ~ G, X ~ N(mu, sigma^2) 41 | 42 | Parameters 43 | ---------- 44 | X: ndarray 45 | A 1D array of observations. 46 | sigma: float 47 | Noise estimate on X. 48 | p: int 49 | Number of parameters used to fit G. 50 | nbin: int 51 | Number of bins used for discrete approximation. 52 | unif_fraction: float 53 | Fraction of G modeled as "slab". 54 | 55 | Returns 56 | ------- 57 | An array of the posterior density estimate g. 58 | """ 59 | min_x = max(min(X) - 2 * np.std(X, ddof=1), 0) 60 | max_x = max(max(X) + 2 * np.std(X, ddof=1), 61 | np.std(X, ddof=1)) 62 | xvals = np.linspace(min_x, max_x, nbin) 63 | 64 | noise_kernel = norm(scale=sigma,loc=xvals.mean()).pdf(xvals) 65 | noise_kernel /= noise_kernel.sum() 66 | 67 | mask = xvals > 0 68 | assert sum(mask) > 0 69 | g_eta_slab = mask / sum(mask) 70 | 71 | XX = np.column_stack([ pow(xvals, exp) for exp in range(1, p+1)]) 72 | XX /= np.sum(XX,axis = 0, keepdims=True) # normalize each feature column for better numerical stability 73 | 74 | def neg_loglik(eta): 75 | with np.errstate(over='ignore'): 76 | # if eta > 0 the exponential will likely get overflow. that is fine. 77 | g_eta_raw = np.exp(np.dot(XX, eta)) * mask 78 | 79 | if ((np.sum(g_eta_raw) == np.inf) | 80 | (np.sum(g_eta_raw) <= 81 | 100 * np.finfo(np.double).tiny)): 82 | return (1000 * (len(X) + sum(eta ** 2))) 83 | 84 | assert sum(g_eta_raw) > 0, "Unexpected error" 85 | assert np.isfinite(sum(g_eta_raw)), "Unexpected error" 86 | g_eta_main = g_eta_raw / sum(g_eta_raw) 87 | g_eta = ( 88 | (1 - unif_fraction) * g_eta_main + 89 | unif_fraction * g_eta_slab) 90 | f_eta = fftconvolve(g_eta, noise_kernel, mode='same') 91 | return np.sum(np.interp(X, xvals, 92 | -np.log(np.maximum(f_eta, 0.0000001)))) 93 | 94 | res = minimize( 95 | neg_loglik, 96 | np.full(p, -1, dtype='float'), 97 | tol=5e-5 # adjusted so that the MPG example in the docs passes 98 | ) 99 | if not res.success: 100 | warnings.warn("Fitting the empirical bayes prior failed with message %s." % res.message) 101 | eta_hat = res.x 102 | g_eta_raw = np.exp(np.dot(XX, eta_hat)) * mask 103 | g_eta_main = g_eta_raw / sum(g_eta_raw) 104 | g_eta = ( 105 | (1 - unif_fraction) * g_eta_main + 106 | unif_fraction * g_eta_slab) 107 | 108 | assert np.all(np.isfinite(g_eta)), "Fitting the empirical bayes prior failed." 109 | return xvals, g_eta 110 | 111 | 112 | def gbayes(x0, g_est, sigma): 113 | """ 114 | Estimate Bayes posterior with Gaussian noise [Efron2014]_. 115 | 116 | Parameters 117 | ---------- 118 | x0: ndarray 119 | an observation 120 | g_est: (ndarray,ndarray) 121 | a prior density, as returned by gfit 122 | g_est[0] is the x-positions 123 | g_est[1] is the densities 124 | sigma: int 125 | noise estimate 126 | 127 | Returns 128 | ------- 129 | An array of the posterior estimate E[mu | x0] 130 | """ 131 | 132 | Kx = norm().pdf((g_est[0] - x0) / sigma) 133 | post = Kx * g_est[1] 134 | post /= sum(post) 135 | return sum(post * g_est[0]) 136 | 137 | 138 | def calibrateEB(variances, sigma2): 139 | """ 140 | Calibrate noisy variance estimates with empirical Bayes. 141 | 142 | Parameters 143 | ---------- 144 | vars: ndarray 145 | List of variance estimates. 146 | sigma2: int 147 | Estimate of the Monte Carlo noise in vars. 148 | 149 | Returns 150 | ------- 151 | An array of the calibrated variance estimates 152 | """ 153 | if (sigma2 <= 0 or min(variances) == max(variances)): 154 | return(np.maximum(variances, 0)) 155 | 156 | sigma = np.sqrt(sigma2) 157 | eb_prior = gfit(variances, sigma) 158 | 159 | if len(variances) >= 200: 160 | # Interpolate to speed up computations: 161 | calib_x = np.percentile(variances, 162 | np.arange(0, 102, 2)) 163 | calib_y = [gbayes(x,g_est=eb_prior,sigma=sigma) for x in calib_x] 164 | calib_all = np.interp(variances, calib_x, calib_y) 165 | else: 166 | calib_all = [gbayes(x,g_est=eb_prior,sigma=sigma) for x in variances] 167 | 168 | return np.asarray(calib_all) 169 | -------------------------------------------------------------------------------- /forestci/due.py: -------------------------------------------------------------------------------- 1 | # emacs: at the end of the file 2 | # ex: set sts=4 ts=4 sw=4 et: 3 | # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### # 4 | """ 5 | 6 | Due-credit 7 | ========== 8 | 9 | `duecredit `_ is a framework conceived to address the 10 | problem of inadequate citation of scientific software and methods. It automates 11 | the insertion of citations into code. We use it here to refer to the original 12 | publication introducing the method we have implemented. 13 | 14 | See https://github.com/duecredit/duecredit/blob/master/README.md for examples. 15 | 16 | Origin: Originally a part of the duecredit software package 17 | 18 | Copyright: 2015-2016 DueCredit developers 19 | 20 | License: BSD-2 21 | """ 22 | 23 | __version__ = '0.0.5' 24 | 25 | 26 | class _InactiveDueCreditCollector(object): 27 | """Just a stub at the Collector which would not do anything""" 28 | def _donothing(self, *args, **kwargs): 29 | """Perform no good and no bad""" 30 | pass 31 | 32 | def dcite(self, *args, **kwargs): 33 | """If I could cite I would""" 34 | def nondecorating_decorator(func): 35 | return func 36 | return nondecorating_decorator 37 | 38 | cite = load = add = _donothing 39 | 40 | def __repr__(self): 41 | return self.__class__.__name__ + '()' 42 | 43 | 44 | def _donothing_func(*args, **kwargs): 45 | """Perform no good and no bad""" 46 | pass 47 | 48 | try: 49 | from duecredit import due as _due 50 | from duecredit import BibTeX as _BibTeX 51 | from duecredit import Doi as _Doi 52 | from duecredit import Url as _Url 53 | if '_due' in locals() and not hasattr(_due, 'cite'): 54 | raise RuntimeError( 55 | "Imported due lacks .cite. DueCredit is now disabled") 56 | except Exception as e: 57 | if type(e).__name__ != 'ImportError': 58 | import logging 59 | logging.getLogger("duecredit").error( 60 | "Failed to import duecredit due to %s" % str(e)) 61 | # Initiate due stub 62 | _due = _InactiveDueCreditCollector() 63 | _BibTeX = _Doi = _Url = _donothing_func 64 | 65 | # Emacs mode definitions 66 | # Local Variables: 67 | # mode: python 68 | # py-indent-offset: 4 69 | # tab-width: 4 70 | # indent-tabs-mode: nil 71 | # End: 72 | -------------------------------------------------------------------------------- /forestci/forestci.py: -------------------------------------------------------------------------------- 1 | """ 2 | Forest confidence intervals. 3 | 4 | Calculate confidence intervals for scikit-learn RandomForestRegressor and 5 | RandomForestClassifier predictions. 6 | """ 7 | 8 | import numpy as np 9 | import copy 10 | 11 | from sklearn.ensemble._forest import BaseForest 12 | from sklearn.ensemble._forest import (_generate_sample_indices, 13 | _get_n_samples_bootstrap) 14 | from sklearn.ensemble._bagging import BaseBagging 15 | 16 | from .calibration import calibrateEB 17 | from .due import _due, _BibTeX 18 | 19 | __all__ = ("calc_inbag", "random_forest_error", "_bias_correction", 20 | "_core_computation") 21 | 22 | _due.cite( 23 | _BibTeX( 24 | """ 25 | @ARTICLE{Wager2014-wn, 26 | title = "Confidence Intervals for Random Forests: The Jackknife and the Infinitesimal Jackknife", 27 | author = "Wager, Stefan and Hastie, Trevor and Efron, Bradley", 28 | journal = "J. Mach. Learn. Res.", 29 | volume = 15, 30 | number = 1, 31 | pages = "1625--1651", 32 | month = jan, 33 | year = 2014,}""" 34 | ), 35 | description=( 36 | "Confidence Intervals for Random Forests:", 37 | "The Jackknife and the Infinitesimal Jackknife", 38 | ), 39 | path="forestci", 40 | ) 41 | 42 | 43 | def calc_inbag(n_samples, forest): 44 | """ 45 | Derive samples used to create trees in scikit-learn RandomForest objects. 46 | 47 | Recovers the samples in each tree from the random state of that tree using 48 | :func:`forest._generate_sample_indices`. 49 | 50 | Parameters 51 | ---------- 52 | n_samples : int 53 | The number of samples used to fit the scikit-learn RandomForest object. 54 | 55 | forest : RandomForest 56 | Regressor or Classifier object that is already fit by scikit-learn. 57 | 58 | Returns 59 | ------- 60 | Array that records how many times a data point was placed in a tree. 61 | Columns are individual trees. Rows are the number of times a sample was 62 | used in a tree. 63 | """ 64 | 65 | if not forest.bootstrap: 66 | e_s = "Cannot calculate the inbag from a forest that has bootstrap=False" 67 | raise ValueError(e_s) 68 | 69 | n_trees = forest.n_estimators 70 | inbag = np.zeros((n_samples, n_trees)) 71 | sample_idx = [] 72 | if isinstance(forest, BaseForest): 73 | n_samples_bootstrap = _get_n_samples_bootstrap(n_samples, forest.max_samples) 74 | 75 | for t_idx in range(n_trees): 76 | sample_idx.append( 77 | _generate_sample_indices( 78 | forest.estimators_[t_idx].random_state, 79 | n_samples, 80 | n_samples_bootstrap, 81 | ) 82 | ) 83 | inbag[:, t_idx] = np.bincount(sample_idx[-1], minlength=n_samples) 84 | elif isinstance(forest, BaseBagging): 85 | for t_idx, estimator_sample in enumerate(forest.estimators_samples_): 86 | sample_idx.append(estimator_sample) 87 | inbag[:, t_idx] = np.bincount(sample_idx[-1], minlength=n_samples) 88 | 89 | return inbag 90 | 91 | 92 | def _core_computation( 93 | X_train_shape, 94 | X_test, 95 | inbag, 96 | pred_centered, 97 | n_trees, 98 | memory_constrained=False, 99 | memory_limit=None, 100 | test_mode=False, 101 | ): 102 | """ 103 | Helper function, that performs the core computation 104 | 105 | Parameters 106 | ---------- 107 | X_train_shape : tuple (int, int) 108 | Shape (n_train_sample, n_features). 109 | 110 | X_test : ndarray 111 | An array with shape (n_test_sample, n_features). 112 | 113 | inbag : ndarray 114 | The inbag matrix that fit the data. If set to `None` (default) it 115 | will be inferred from the forest. However, this only works for trees 116 | for which bootstrapping was set to `True`. That is, if sampling was 117 | done with replacement. Otherwise, users need to provide their own 118 | inbag matrix. 119 | 120 | pred_centered : ndarray 121 | Centered predictions that are an intermediate result in the 122 | computation. 123 | 124 | memory_constrained: boolean (optional) 125 | Whether or not there is a restriction on memory. If False, it is 126 | assumed that a ndarry of shape (n_train_sample,n_test_sample) fits 127 | in main memory. Setting to True can actually provide a speed up if 128 | memory_limit is tuned to the optimal range. 129 | 130 | memory_limit: int (optional) 131 | An upper bound for how much memory the itermediate matrices will take 132 | up in Megabytes. This must be provided if memory_constrained=True. 133 | 134 | 135 | """ 136 | if not memory_constrained: 137 | return np.sum((np.dot(inbag - 1, pred_centered.T) / n_trees) ** 2, 0) 138 | 139 | if not memory_limit: 140 | raise ValueError("If memory_constrained=True, must provide", "memory_limit.") 141 | 142 | # Assumes double precision float 143 | chunk_size = int((memory_limit * 1e6) / (8.0 * X_train_shape[0])) 144 | 145 | if chunk_size == 0: 146 | min_limit = 8.0 * X_train_shape[0] / 1e6 147 | raise ValueError( 148 | "memory_limit provided is too small." 149 | + "For these dimensions, memory_limit must " 150 | + "be greater than or equal to %.3e" % min_limit 151 | ) 152 | 153 | chunk_edges = np.arange(0, X_test.shape[0] + chunk_size, chunk_size) 154 | inds = range(X_test.shape[0]) 155 | chunks = [ 156 | inds[chunk_edges[i] : chunk_edges[i + 1]] for i in range(len(chunk_edges) - 1) 157 | ] 158 | if test_mode: 159 | print("Number of chunks: %d" % (len(chunks),)) 160 | V_IJ = np.concatenate( 161 | [ 162 | np.sum((np.dot(inbag - 1, pred_centered[chunk].T) / n_trees) ** 2, 0) 163 | for chunk in chunks 164 | ] 165 | ) 166 | return V_IJ 167 | 168 | 169 | def _bias_correction(V_IJ, inbag, pred_centered, n_trees): 170 | """ 171 | Helper functions that implements bias correction 172 | 173 | Parameters 174 | ---------- 175 | V_IJ : ndarray 176 | Intermediate result in the computation. 177 | 178 | inbag : ndarray 179 | The inbag matrix that fit the data. If set to `None` (default) it 180 | will be inferred from the forest. However, this only works for trees 181 | for which bootstrapping was set to `True`. That is, if sampling was 182 | done with replacement. Otherwise, users need to provide their own 183 | inbag matrix. 184 | 185 | pred_centered : ndarray 186 | Centered predictions that are an intermediate result in the 187 | computation. 188 | 189 | n_trees : int 190 | The number of trees in the forest object. 191 | """ 192 | n_train_samples = inbag.shape[0] 193 | n_var = np.mean( 194 | np.square(inbag[0:n_trees]).mean(axis=1).T.view() 195 | - np.square(inbag[0:n_trees].mean(axis=1)).T.view() 196 | ) 197 | boot_var = np.square(pred_centered).sum(axis=1) / n_trees 198 | bias_correction = n_train_samples * n_var * boot_var / n_trees 199 | V_IJ_unbiased = V_IJ - bias_correction 200 | return V_IJ_unbiased 201 | 202 | 203 | def _centered_prediction_forest(forest, X_test, y_output=None): 204 | """ 205 | Center the tree predictions by the mean prediction (forest) 206 | 207 | The centering is done for all provided test samples. 208 | This function allows unit testing for internal correctness. 209 | 210 | Parameters 211 | ---------- 212 | forest : RandomForest 213 | Regressor or Classifier object. 214 | 215 | X_test : ndarray 216 | An array with shape (n_test_sample, n_features). The design matrix 217 | for testing data 218 | 219 | Returns 220 | ------- 221 | pred_centered : ndarray 222 | An array with shape (n_test_sample, n_estimators). 223 | The predictions of each single tree centered by the 224 | mean prediction (i.e. the prediction of the forest) 225 | 226 | """ 227 | # In case the user provided a (n_features)-shaped array for a single sample 228 | # shape it as (1, n_features) 229 | # NOTE: a single-feature set of samples needs to be provided with shape 230 | # (n_samples, 1) or it will be wrongly interpreted! 231 | if len(X_test.shape) == 1: 232 | X_test = X_test.reshape(1, -1) 233 | 234 | pred = np.array([tree.predict(X_test) for tree in forest]) 235 | if 'n_outputs_' in dir(forest) and forest.n_outputs_ > 1: 236 | pred = pred[:,:,y_output] 237 | 238 | pred_mean = np.mean(pred, 0) 239 | 240 | return (pred - pred_mean).T 241 | 242 | 243 | def random_forest_error( 244 | forest, 245 | X_train_shape, 246 | X_test, 247 | inbag=None, 248 | calibrate=True, 249 | memory_constrained=False, 250 | memory_limit=None, 251 | y_output=None 252 | ): 253 | """ 254 | Calculate error bars from scikit-learn RandomForest estimators. 255 | 256 | RandomForest is a regressor or classifier object 257 | this variance can be used to plot error bars for RandomForest objects 258 | 259 | Parameters 260 | ---------- 261 | forest : RandomForest 262 | Regressor or Classifier object. 263 | 264 | X_train_shape : tuple (int, int) 265 | Shape (n_train_sample, n_features) of the design matrix for training data. 266 | 267 | X_test : ndarray 268 | An array with shape (n_test_sample, n_features). The design matrix 269 | for testing data 270 | 271 | inbag : ndarray, optional 272 | The inbag matrix that fit the data. If set to `None` (default) it 273 | will be inferred from the forest. However, this only works for trees 274 | for which bootstrapping was set to `True`. That is, if sampling was 275 | done with replacement. Otherwise, users need to provide their own 276 | inbag matrix. 277 | 278 | calibrate: boolean, optional 279 | Whether to apply calibration to mitigate Monte Carlo noise. 280 | Some variance estimates may be negative due to Monte Carlo effects if 281 | the number of trees in the forest is too small. To use calibration, 282 | Default: True 283 | 284 | memory_constrained: boolean, optional 285 | Whether or not there is a restriction on memory. If False, it is 286 | assumed that a ndarry of shape (n_train_sample,n_test_sample) fits 287 | in main memory. Setting to True can actually provide a speed up if 288 | memory_limit is tuned to the optimal range. 289 | 290 | memory_limit: int, optional. 291 | An upper bound for how much memory the itermediate matrices will take 292 | up in Megabytes. This must be provided if memory_constrained=True. 293 | 294 | y_output: int, mandatory only for MultiOutput regressor. 295 | In case of MultiOutput regressor, indicate the index of the target to 296 | analyse. The program will return the IJ variance related to that target 297 | only. 298 | 299 | Returns 300 | ------- 301 | An array with the unbiased sampling variance (V_IJ_unbiased) 302 | for a RandomForest object. 303 | 304 | See Also 305 | ---------- 306 | :func:`calc_inbag` 307 | 308 | Notes 309 | ----- 310 | The calculation of error is based on the infinitesimal jackknife variance, 311 | as described in [Wager2014]_ and is a Python implementation of the R code 312 | provided at: https://github.com/swager/randomForestCI 313 | 314 | .. [Wager2014] S. Wager, T. Hastie, B. Efron. "Confidence Intervals for 315 | Random Forests: The Jackknife and the Infinitesimal Jackknife", Journal 316 | of Machine Learning Research vol. 15, pp. 1625-1651, 2014. 317 | """ 318 | 319 | if 'n_outputs_' in dir(forest) and forest.n_outputs_ > 1 and y_output == None: 320 | e_s = "MultiOutput regressor: specify the index of the target to analyse (y_output)" 321 | raise ValueError(e_s) 322 | 323 | if inbag is None: 324 | inbag = calc_inbag(X_train_shape[0], forest) 325 | 326 | pred_centered = _centered_prediction_forest(forest, X_test, y_output) 327 | n_trees = forest.n_estimators 328 | V_IJ = _core_computation( 329 | X_train_shape, X_test, inbag, pred_centered, n_trees, memory_constrained, memory_limit 330 | ) 331 | V_IJ_unbiased = _bias_correction(V_IJ, inbag, pred_centered, n_trees) 332 | 333 | # Correct for cases where resampling is done without replacement: 334 | if np.max(inbag) == 1: 335 | variance_inflation = 1 / (1 - np.mean(inbag)) ** 2 336 | V_IJ_unbiased *= variance_inflation 337 | 338 | if not calibrate: 339 | return V_IJ_unbiased 340 | 341 | if V_IJ_unbiased.shape[0] <= 20: 342 | print("No calibration with n_samples <= 20: ", 343 | "consider using more n_estimators in your model, ", 344 | "for more accurate ci and to avoid negative values.") 345 | return V_IJ_unbiased 346 | if calibrate: 347 | # Calibration is a correction for converging quicker to the case of infinite n_estimators, 348 | # as presented in Wager (2014) http://jmlr.org/papers/v15/wager14a.html 349 | calibration_ratio = 2 350 | n_sample = np.ceil(n_trees / calibration_ratio) 351 | new_forest = copy.deepcopy(forest) 352 | random_idx = np.random.permutation(len(new_forest.estimators_))[: int(n_sample)] 353 | new_forest.estimators_ = list(np.array(new_forest.estimators_)[random_idx]) 354 | if hasattr(new_forest, "_seeds"): 355 | new_forest._seeds = new_forest._seeds[random_idx] 356 | 357 | new_forest.n_estimators = int(n_sample) 358 | 359 | results_ss = random_forest_error( 360 | new_forest, 361 | X_train_shape, 362 | X_test, 363 | calibrate=False, 364 | memory_constrained=memory_constrained, 365 | memory_limit=memory_limit, 366 | y_output=y_output 367 | ) 368 | # Use this second set of variance estimates 369 | # to estimate scale of Monte Carlo noise 370 | sigma2_ss = np.mean((results_ss - V_IJ_unbiased) ** 2) 371 | delta = n_sample / n_trees 372 | sigma2 = (delta ** 2 + (1 - delta) ** 2) / (2 * (1 - delta) ** 2) * sigma2_ss 373 | 374 | # Use Monte Carlo noise scale estimate for empirical Bayes calibration 375 | V_IJ_calibrated = calibrateEB(V_IJ_unbiased, sigma2) 376 | 377 | return V_IJ_calibrated 378 | -------------------------------------------------------------------------------- /forestci/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/forest-confidence-interval/5241344a342810b3ea21413bf2ebcf4e90ec6976/forestci/tests/__init__.py -------------------------------------------------------------------------------- /forestci/tests/test_forestci.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.testing as npt 3 | from sklearn.ensemble import RandomForestRegressor 4 | from sklearn.ensemble import BaggingRegressor 5 | from sklearn.svm import SVR 6 | import forestci as fci 7 | 8 | 9 | def test_random_forest_error(): 10 | X = np.array([[5, 2], [5, 5], [3, 3], [6, 4], [6, 6]]) 11 | 12 | y = np.array([70, 100, 60, 100, 120]) 13 | 14 | train_idx = [2, 3, 4] 15 | test_idx = [0, 1] 16 | 17 | y_test = y[test_idx] 18 | y_train = y[train_idx] 19 | X_test = X[test_idx] 20 | X_train = X[train_idx] 21 | 22 | n_trees = 4 23 | forest = RandomForestRegressor(n_estimators=n_trees) 24 | forest.fit(X_train, y_train) 25 | inbag = fci.calc_inbag(X_train.shape[0], forest) 26 | for ib in [inbag, None]: 27 | for calibrate in [True, False]: 28 | V_IJ_unbiased = fci.random_forest_error( 29 | forest, X_train.shape, X_test, inbag=ib, calibrate=calibrate 30 | ) 31 | npt.assert_equal(V_IJ_unbiased.shape[0], y_test.shape[0]) 32 | 33 | # We cannot calculate inbag from a non-bootstrapped forest. This is because 34 | # Scikit-learn trees do not store their own sample weights. If you did This 35 | # some other way, you can still use your own inbag 36 | non_bootstrap_forest = RandomForestRegressor(n_estimators=n_trees, bootstrap=False) 37 | 38 | npt.assert_raises( 39 | ValueError, fci.calc_inbag, X_train.shape[0], non_bootstrap_forest 40 | ) 41 | 42 | 43 | def test_random_forest_error_multioutput(): 44 | X = np.array([[5, 2], [5, 5], [3, 3], [6, 4], [6, 6]]) 45 | 46 | y = np.array([[70, 37], [100, 55], [60, 33], [100,54], [120, 66]]) 47 | 48 | train_idx = [2, 3, 4] 49 | test_idx = [0, 1] 50 | 51 | y_test = y[test_idx] 52 | y_train = y[train_idx] 53 | X_test = X[test_idx] 54 | X_train = X[train_idx] 55 | 56 | n_trees = 4 57 | forest = RandomForestRegressor(n_estimators=n_trees) 58 | forest.fit(X_train, y_train) 59 | 60 | V_IJ_unbiased_target0 = fci.random_forest_error( 61 | forest, X_train.shape, X_test, calibrate=True, y_output=0 62 | ) 63 | npt.assert_equal(V_IJ_unbiased_target0.shape[0], y_test.shape[0]) 64 | 65 | # With a MultiOutput RandomForestRegressor the user MUST specify a y_output 66 | npt.assert_raises( 67 | ValueError, 68 | fci.random_forest_error, 69 | forest, 70 | X_train.shape, 71 | X_test, 72 | inbag=None, 73 | calibrate=True, 74 | memory_constrained=False, 75 | memory_limit=None, 76 | y_output=None # This should trigger the ValueError 77 | ) 78 | 79 | 80 | def test_bagging_svr_error(): 81 | X = np.array([[5, 2], [5, 5], [3, 3], [6, 4], [6, 6]]) 82 | 83 | y = np.array([70, 100, 60, 100, 120]) 84 | 85 | train_idx = [2, 3, 4] 86 | test_idx = [0, 1] 87 | 88 | y_test = y[test_idx] 89 | y_train = y[train_idx] 90 | X_test = X[test_idx] 91 | X_train = X[train_idx] 92 | 93 | n_trees = 4 94 | bagger = BaggingRegressor(estimator=SVR(), n_estimators=n_trees) 95 | bagger.fit(X_train, y_train) 96 | inbag = fci.calc_inbag(X_train.shape[0], bagger) 97 | for ib in [inbag, None]: 98 | for calibrate in [True, False]: 99 | V_IJ_unbiased = fci.random_forest_error( 100 | bagger, X_train.shape, X_test, inbag=ib, calibrate=calibrate 101 | ) 102 | npt.assert_equal(V_IJ_unbiased.shape[0], y_test.shape[0]) 103 | 104 | 105 | def test_core_computation(): 106 | inbag_ex = np.array( 107 | [[1.0, 2.0, 0.0, 1.0], [1.0, 0.0, 2.0, 0.0], [1.0, 1.0, 1.0, 2.0]] 108 | ) 109 | 110 | X_train_ex = np.array([[3, 3], [6, 4], [6, 6]]) 111 | X_test_ex = np.vstack([np.array([[5, 2], [5, 5]]) for _ in range(1000)]) 112 | pred_centered_ex = np.vstack( 113 | [np.array([[-20, -20, 10, 30], [-20, 30, -20, 10]]) for _ in range(1000)] 114 | ) 115 | n_trees = 4 116 | 117 | our_vij = fci._core_computation( 118 | X_train_ex.shape, X_test_ex, inbag_ex, pred_centered_ex, n_trees 119 | ) 120 | 121 | r_vij = np.concatenate([np.array([112.5, 387.5]) for _ in range(1000)]) 122 | 123 | npt.assert_almost_equal(our_vij, r_vij) 124 | 125 | for mc, ml in zip([True, False], [0.01, None]): 126 | our_vij = fci._core_computation( 127 | X_train_ex.shape, 128 | X_test_ex, 129 | inbag_ex, 130 | pred_centered_ex, 131 | n_trees, 132 | memory_constrained=True, 133 | memory_limit=0.01, 134 | test_mode=True, 135 | ) 136 | 137 | npt.assert_almost_equal(our_vij, r_vij) 138 | 139 | 140 | def test_bias_correction(): 141 | inbag_ex = np.array( 142 | [[1.0, 2.0, 0.0, 1.0], [1.0, 0.0, 2.0, 0.0], [1.0, 1.0, 1.0, 2.0]] 143 | ) 144 | 145 | X_train_ex = np.array([[3, 3], [6, 4], [6, 6]]) 146 | 147 | X_test_ex = np.array([[5, 2], [5, 5]]) 148 | 149 | pred_centered_ex = np.array([[-20, -20, 10, 30], [-20, 30, -20, 10]]) 150 | n_trees = 4 151 | 152 | our_vij = fci._core_computation( 153 | X_train_ex.shape, X_test_ex, inbag_ex, pred_centered_ex, n_trees 154 | ) 155 | our_vij_unbiased = fci._bias_correction( 156 | our_vij, inbag_ex, pred_centered_ex, n_trees 157 | ) 158 | r_unbiased_vij = np.array([-42.1875, 232.8125]) 159 | npt.assert_almost_equal(our_vij_unbiased, r_unbiased_vij) 160 | 161 | 162 | def test_with_calibration(): 163 | # Test both with and without interpolation: 164 | for n in [25 * 5, 205 * 5]: 165 | X = np.random.rand(n).reshape(n // 5, 5) 166 | y = np.random.rand(n // 5) 167 | 168 | train_idx = np.arange(int(n // 5 * 0.75)) 169 | test_idx = np.arange(int(n // 5 * 0.75), n // 5) 170 | 171 | y_test = y[test_idx] 172 | y_train = y[train_idx] 173 | X_test = X[test_idx] 174 | X_train = X[train_idx] 175 | 176 | n_trees = 4 177 | forest = RandomForestRegressor(n_estimators=n_trees) 178 | forest.fit(X_train, y_train) 179 | V_IJ_unbiased = fci.random_forest_error(forest, X_train.shape, X_test) 180 | npt.assert_equal(V_IJ_unbiased.shape[0], y_test.shape[0]) 181 | 182 | 183 | def test_centered_prediction_forest(): 184 | X = np.array([[5, 2], 185 | [5, 5], 186 | [3, 3], 187 | [6, 4], 188 | [6, 6]]) 189 | 190 | y = np.array([70, 100, 60, 100, 120]) 191 | 192 | train_idx = [2, 3, 4] 193 | test_idx = [0, 1] 194 | 195 | y_test = y[test_idx] 196 | y_train = y[train_idx] 197 | X_test = X[test_idx] 198 | X_train = X[train_idx] 199 | 200 | n_trees = 8 201 | forest = RandomForestRegressor(n_estimators=n_trees) 202 | forest = forest.fit(X_train, y_train) 203 | 204 | # test different amount of test samples 205 | for i in range(len(X_test)): 206 | test_samples = X_test[:i+1] 207 | pred_centered = fci.forestci._centered_prediction_forest(forest, test_samples) 208 | 209 | # the vectorized solution has to match the single sample predictions 210 | for n_sample, sample in enumerate(test_samples): 211 | # the following assignment assures correctness of single test sample calculations 212 | # no additional tests for correct averaging required since for single test samples 213 | # dimension 0 (i.e. the number of test sets) disappears 214 | pred_centered_sample = fci.forestci._centered_prediction_forest( 215 | forest, sample) 216 | assert len(pred_centered_sample[0]) == n_trees 217 | npt.assert_almost_equal( 218 | pred_centered_sample[0], 219 | pred_centered[n_sample] 220 | ) 221 | -------------------------------------------------------------------------------- /forestci/version.py: -------------------------------------------------------------------------------- 1 | # Format expected by setup.py and doc/source/conf.py: string of form "X.Y.Z" 2 | _version_major = 0 3 | _version_minor = 7 4 | _version_micro = '' # use '' for first of series, number for 1 and above 5 | _version_extra = '' 6 | # _version_extra = '' # Uncomment this for full releases 7 | 8 | # Construct full version string from these. 9 | _ver = [_version_major, _version_minor] 10 | if _version_micro: 11 | _ver.append(_version_micro) 12 | if _version_extra: 13 | _ver.append(_version_extra) 14 | 15 | __version__ = '.'.join(map(str, _ver)) 16 | 17 | CLASSIFIERS = ["Development Status :: 3 - Alpha", 18 | "Environment :: Console", 19 | "Intended Audience :: Science/Research", 20 | "License :: OSI Approved :: MIT License", 21 | "Operating System :: OS Independent", 22 | "Programming Language :: Python", 23 | "Topic :: Scientific/Engineering"] 24 | 25 | # Description should be a one-liner: 26 | description = "forestci: confidence intervals for scikit-learn " 27 | description += "forest algorithms" 28 | # Long description will go up on the pypi page 29 | long_description = """ 30 | 31 | sklearn forest ci 32 | ================= 33 | 34 | `forest-confidence-interval` is a Python module for calculating variance and 35 | adding confidence intervals to scikit-learn random forest regression or 36 | classification objects. The core functions calculate an in-bag and error bars 37 | for random forest objects 38 | 39 | Please read the repository README_ on Github or our documentation_ 40 | 41 | .. _README: https://github.com/scikit-learn-contrib/forest-confidence-interval/blob/master/README.md 42 | 43 | .. _documentation: http://contrib.scikit-learn.org/forest-confidence-interval/ 44 | 45 | """ 46 | 47 | NAME = "forestci" 48 | MAINTAINER = "Ariel Rokem" 49 | MAINTAINER_EMAIL = "arokem@uw.edu" 50 | DESCRIPTION = description 51 | LONG_DESCRIPTION = long_description 52 | URL = "http://github.com/scikit-learn-contrib/forest-confidence-interval" 53 | DOWNLOAD_URL = "" 54 | LICENSE = "MIT" 55 | AUTHOR = "Ariel Rokem, Bryna Hazelton, Kivan Polimis" 56 | AUTHOR_EMAIL = "arokem@uw.edu" 57 | PLATFORMS = "OS Independent" 58 | MAJOR = _version_major 59 | MINOR = _version_minor 60 | MICRO = _version_micro 61 | VERSION = __version__ 62 | -------------------------------------------------------------------------------- /paper/paper.bib: -------------------------------------------------------------------------------- 1 | 2 | @article{wager_confidence_2014, 3 | title = {Confidence {Intervals} for {Random} {Forests}: {The} {Jackknife} and the {Infinitesimal} {Jackknife}}, 4 | volume = {15}, 5 | issn = {1532-4435}, 6 | url = {http://dl.acm.org/citation.cfm?id=2627435.2638587}, 7 | number = {1}, 8 | journal = {Journal of Machine Learning Research}, 9 | author = {Wager, Stefan and Hastie, Trevor and Efron, Bradley}, 10 | month = jan, 11 | year = {2014}, 12 | keywords = {bagging, jackknife methods, Monte Carlo noise, variance estimation}, 13 | pages = {1625--1651} 14 | } 15 | 16 | @misc{wager_randomforestci_2016, 17 | title = {{randomForestCI}}, 18 | url = {https://github.com/swager/randomForestCI}, 19 | abstract = {randomForestCI}, 20 | urldate = {2016-09-23}, 21 | author = {Wager, Stefan}, 22 | month = sep, 23 | year = {2016} 24 | } 25 | 26 | @inproceedings{quinlan_combining_1993, 27 | address = {San Francisco, CA, USA}, 28 | series = {{ICML}'93}, 29 | title = {Combining {Instance}-based and {Model}-based {Learning}}, 30 | isbn = {1-55860-307-7}, 31 | url = {http://dl.acm.org/citation.cfm?id=3091529.3091560}, 32 | booktitle = {Proceedings of the {Tenth} {International} {Conference} on {International} {Conference} on {Machine} {Learning}}, 33 | publisher = {Morgan Kaufmann Publishers Inc.}, 34 | author = {Quinlan, J. Ross}, 35 | year = {1993}, 36 | pages = {236--243} 37 | } -------------------------------------------------------------------------------- /paper/paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'Confidence Intervals for Random Forests in Python' 3 | tags: 4 | - Python 5 | - scikit-learn 6 | - random forest 7 | - confidence intervals 8 | authors: 9 | - name: Kivan Polimis 10 | orcid: 0000-0002-3498-0479 11 | affiliation: 1 12 | - name: Ariel Rokem 13 | orcid: 0000-0003-0679-1985 14 | affiliation: 1 15 | - name: Bryna Hazelton 16 | orcid: 0000-0001-7532-645X 17 | affiliation: 1 18 | affiliations: 19 | - name: eScience Institute, University of Washington 20 | index: 1 21 | date: 7 July 2017 22 | bibliography: paper.bib 23 | --- 24 | 25 | # Summary 26 | Random forests are a method for predicting numerous ensemble learning tasks. Prediction variability can illustrate how influential the training set is for producing the observed random forest predictions and provides additional information about prediction accuracy. `forest-confidence-interval` is a Python module for calculating variance and adding confidence intervals to `scikit-learn` random forest regression or classification objects. The core functions calculate an in-bag and error bars for random forest objects. Our software is designed for individuals using `scikit-learn` random forest objects that want to add estimates of uncertainty to random forest predictors. This module is an implementation of an algorithm developed by @wager_confidence_2014 and previously implemented in R [@wager_randomforestci_2016]. 27 | 28 | # Usage 29 | Our package's `random_forest_error` and `calc_inbag` functions use the random forest object (including training and test data) to create variance estimates that can be plotted (e.g. as confidence intervals or standard deviations). The in-bag matrix that fit the data is set to `None` by default, and the in-bag will be inferred from the forest. However, this only works for trees for which bootstrapping was set to `True`. That is, if sampling was done with replacement. Otherwise, users need to provide their own in-bag matrix. 30 | 31 | # Examples gallery 32 | The regression example uses a slightly modified data-set from the Carnegie Mellon University's StatLib library (available from the [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)) with features of different cars and their MPG [@quinlan_combining_1993]. The classification example generates synthetic data to simulate a task like that of a spam filter: classifying items into one of two categories (e.g., spam/non-spam) based on a number of features. This module will work for matrices or `pandas` data frames. Then, `scikit-learn` functions split the example data into training and test data and generate a random forest object (regression or classifier). The examples calculate variance from random forest objects that use the highest mean probability estimate across the trees. The focus on means for estimates and unit comparability between sample mean and dispersion measures is the basis for plotting with the square root of the variance (standard deviation). As the plots with variance estimated show, some predictions have more error than others. For instance, in the regression (MPG) example, predictions of higher mileage MPG are associated with greater variance than lower mileage predictions. 33 | 34 | ## Regression example 35 | 36 | ### No variance estimated 37 | -![plot-mpg-no-variance](plot_mpg_no_variance.png) 38 | 39 | ### Plot with variance 40 | -![plot-mpg-variance](plot_mpg.png) 41 | 42 | ## Classification example 43 | ### No variance estimated 44 | -![plot-spam-no-variance](plot_spam_no_variance.png) 45 | 46 | ### Plot with variance 47 | -![plot-spam](plot_spam.png) 48 | 49 | 50 | ## Community guidelines 51 | 52 | Contributions are very welcome, but we ask that contributors abide by the [contributor covenant](http://contributor-covenant.org/version/1/4/). 53 | 54 | To report issues with the software, please post to the 55 | [issue log](https://github.com/scikit-learn-contrib/forest-confidence-interval/issues) 56 | Bug reports are also appreciated, please add them to the issue log after 57 | verifying that the issue does not already exist. 58 | Comments on existing issues are also welcome. 59 | 60 | Please submit improvements as pull requests against the repo after verifying 61 | that the existing tests pass and any new code is well covered by unit tests. 62 | Please write code that complies with the Python style guide, 63 | [PEP8](https://www.python.org/dev/peps/pep-0008/) 64 | 65 | Please e-mail [Ariel Rokem](mailto:arokem@gmail.com), [Kivan Polimis](mailto:kivan.polimis@gmail.com), or [Bryna Hazelton](mailto:brynah@phys.washington.edu ) if you have any questions, suggestions or feedback. 66 | 67 | # References 68 | -------------------------------------------------------------------------------- /paper/plot_mpg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/forest-confidence-interval/5241344a342810b3ea21413bf2ebcf4e90ec6976/paper/plot_mpg.png -------------------------------------------------------------------------------- /paper/plot_mpg_no_variance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/forest-confidence-interval/5241344a342810b3ea21413bf2ebcf4e90ec6976/paper/plot_mpg_no_variance.png -------------------------------------------------------------------------------- /paper/plot_spam.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/forest-confidence-interval/5241344a342810b3ea21413bf2ebcf4e90ec6976/paper/plot_spam.png -------------------------------------------------------------------------------- /paper/plot_spam_no_variance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/forest-confidence-interval/5241344a342810b3ea21413bf2ebcf4e90ec6976/paper/plot_spam_no_variance.png -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | sphinx 3 | pytest==5.2.2 4 | pytest-cov==2.8.1 5 | flake8 6 | sphinx_gallery 7 | sphinx_rtd_theme 8 | numpydoc 9 | sphinx-autoapi 10 | matplotlib 11 | pillow 12 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.20 2 | scikit-learn>=0.23.1 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import sys, os 3 | import warnings 4 | from setuptools import setup, find_packages 5 | 6 | with open('requirements.txt') as f: 7 | INSTALL_REQUIRES = [l.strip() for l in f.readlines() if l] 8 | 9 | try: 10 | import numpy 11 | except ImportError: 12 | warnings.warn('numpy is required during installation', ImportWarning) 13 | 14 | try: 15 | import scipy 16 | except ImportError: 17 | warnings.warn('scipy is required during installation', ImportWarning) 18 | 19 | # Get version and release info, which is all stored in forestci/version.py 20 | ver_file = os.path.join('forestci', 'version.py') 21 | with open(ver_file) as f: 22 | exec(f.read()) 23 | 24 | opts = dict(name=NAME, 25 | maintainer=MAINTAINER, 26 | maintainer_email=MAINTAINER_EMAIL, 27 | description=DESCRIPTION, 28 | long_description=LONG_DESCRIPTION, 29 | url=URL, 30 | download_url=DOWNLOAD_URL, 31 | license=LICENSE, 32 | classifiers=CLASSIFIERS, 33 | author=AUTHOR, 34 | author_email=AUTHOR_EMAIL, 35 | platforms=PLATFORMS, 36 | version=VERSION, 37 | packages=find_packages(), 38 | install_requires=INSTALL_REQUIRES) 39 | 40 | if __name__ == '__main__': 41 | setup(**opts) 42 | --------------------------------------------------------------------------------