├── .gitattribute
├── .gitignore
├── .landscape.yaml
├── .travis.yml
├── Makefile
├── README.rst
├── continuous_integration
    ├── install.sh
    └── test_script.sh
├── doc
    ├── Makefile
    ├── _templates
    │   ├── class.rst
    │   ├── class_with_call.rst
    │   ├── function.rst
    │   └── layout.html
    ├── conf.py
    ├── images
    │   └── no_image.png
    ├── index.rst
    ├── make.bat
    ├── references.rst
    └── sphinxext
    │   ├── LICENSE.txt
    │   ├── MANIFEST.in
    │   ├── README.txt
    │   ├── gen_rst.py
    │   ├── install_sphinx_bootstrap_theme.sh
    │   └── numpy_ext
    │       ├── __init__.py
    │       ├── docscrape.py
    │       ├── docscrape_sphinx.py
    │       └── numpydoc.py
├── examples
    ├── README.txt
    ├── plot_randomized_output_decision_tree.py
    └── plot_variance_preservation.py
├── random_output_trees
    ├── __init__.py
    ├── _sklearn_tree.c
    ├── _sklearn_tree.pxd
    ├── _sklearn_tree.pyx
    ├── _sklearn_tree_utils.c
    ├── _sklearn_tree_utils.pxd
    ├── _sklearn_tree_utils.pyx
    ├── _tree.c
    ├── _tree.pyx
    ├── _utils.py
    ├── datasets.py
    ├── ensemble
    │   ├── __init__.py
    │   ├── _sklearn_forest.py
    │   ├── forest.py
    │   ├── lazy_bagging.py
    │   └── tests
    │   │   ├── test_forest.py
    │   │   └── test_lazy_bagging.py
    ├── random_projection.py
    ├── setup.py
    ├── tests
    │   ├── test_datasets.py
    │   ├── test_random_projection.py
    │   ├── test_sklearn_ensemble.py
    │   ├── test_sklearn_tree.py
    │   ├── test_transformer.py
    │   ├── test_tree.py
    │   └── test_validations.py
    ├── transformer.py
    └── tree.py
├── setup.cfg
└── setup.py


/.gitattribute:
--------------------------------------------------------------------------------
1 | /random_output_trees/_tree.c -diff
2 | /random_output_trees/_sklearn_tree.c -diff
3 | /random_output_trees/_sklearn_tree_utils.c -diff
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | eggs/
15 | lib/
16 | lib64/
17 | parts/
18 | sdist/
19 | var/
20 | *.egg-info/
21 | .installed.cfg
22 | *.egg
23 | 
24 | # PyInstaller
25 | #  Usually these files are written by a python script from a template
26 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
27 | *.manifest
28 | *.spec
29 | 
30 | # Installer logs
31 | pip-log.txt
32 | pip-delete-this-directory.txt
33 | 
34 | # Unit test / coverage reports
35 | htmlcov/
36 | .tox/
37 | .coverage
38 | .cache
39 | nosetests.xml
40 | coverage.xml
41 | 
42 | # Translations
43 | *.mo
44 | *.pot
45 | 
46 | # Django stuff:
47 | *.log
48 | 
49 | # Sphinx documentation
50 | docs/_build/
51 | 
52 | # PyBuilder
53 | target/
54 | 
55 | # Cython
56 | cython_debug/
57 | .DS_Store
58 | 
59 | 
60 | doc/_build/
61 | doc/auto_examples/
62 | doc/generated/
63 | .buildinfo
64 | doc/modules/generated
65 | _sources
66 | 


--------------------------------------------------------------------------------
/.landscape.yaml:
--------------------------------------------------------------------------------
1 | doc-warnings: yes
2 | test-warnings: yes
3 | strictness: veryhigh
4 | max-line-length: 80
5 | autodetect: yes
6 | ignore-paths:
7 |     - doc
8 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | virtualenv:
 3 |   system_site_packages: true
 4 | env:
 5 |   matrix:
 6 |     - PYTHON_VERSION="2.7"
 7 |       COVERAGE="true" NUMPY_VERSION="1.6.2" SCIPY_VERSION="0.11.0"
 8 |     # This environment tests the oldest supported anaconda env
 9 |     - PYTHON_VERSION="2.6"
10 |       NUMPY_VERSION="1.6.2" SCIPY_VERSION="0.11.0"
11 |     # This environment tests the newest supported anaconda env
12 |     - PYTHON_VERSION="3.4"
13 |       NUMPY_VERSION="1.8.2" SCIPY_VERSION="0.14.0"
14 | install: source continuous_integration/install.sh
15 | script: bash continuous_integration/test_script.sh
16 | after_success:
17 |     # Ignore coveralls failures as the coveralls server is not very reliable
18 |     # but we don't want travis to report a failure in the github UI just
19 |     # because the coverage report failed to be published.
20 |     - if [[ "$COVERAGE" == "true" ]]; then coveralls || echo "failed"; fi
21 | cache: apt
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Author: Arnaud Joly
 2 | 
 3 | all: clean inplace test
 4 | 
 5 | clean:
 6 | 	python setup.py clean
 7 | 
 8 | in: inplace
 9 | 
10 | inplace:
11 | 	python setup.py build_ext --inplace
12 | 
13 | test:
14 | 	nosetests random_output_trees
15 | 
16 | doc: inplace
17 | 	$(MAKE) -C doc html
18 | 
19 | doc-noplot: inplace
20 | 	$(MAKE) -C doc html-noplot
21 | 
22 | view-doc: doc
23 | 	open doc/_build/html/index.html
24 | 
25 | gh-pages:
26 | 	git checkout master
27 | 	make doc
28 | 	rm -rf ../random-output-trees-doc
29 | 	cp -a doc/_build/html ../random-output-trees-doc
30 | 	git checkout gh-pages
31 | 	cp -a ../random-output-trees-doc/* .
32 | 	echo 'Add new file to git'
33 | 	git add `ls ../random-output-trees-doc`
34 | 	git commit -m "Generated gh-pages for `git log master -1 --pretty=short --abbrev-commit`"
35 | 	git push origin gh-pages
36 | 	git checkout master
37 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | Random output trees
  2 | ===================
  3 | 
  4 | .. image:: https://travis-ci.org/arjoly/random-output-trees.svg?branch=master
  5 |    :target: https://travis-ci.org/arjoly/random-output-trees
  6 |    :alt: Build status
  7 | 
  8 | .. image:: https://coveralls.io/repos/arjoly/random-output-trees/badge.png?branch=master
  9 |    :target: https://coveralls.io/r/arjoly/random-output-trees?branch=master
 10 | 
 11 | .. image:: https://landscape.io/github/arjoly/random-output-trees/master/landscape.svg
 12 |    :target: https://landscape.io/github/arjoly/random-output-trees/master
 13 |    :alt: Code Health
 14 | 
 15 | 
 16 | Random output trees is a python package to grow decision tree ensemble on
 17 | randomized output space. The core tree implementation is based on scikit-learn
 18 | 0.15.2. All provided estimators and transformers are scikit-learn compatible.
 19 | 
 20 | If you use this package, please cite
 21 | 
 22 |   Joly, A., Geurts, P., & Wehenkel, L. (2014). Random forests with random
 23 |   projections of the output space for high dimensional multi-label
 24 |   classification.
 25 | 
 26 |   ECML-PKDD 2014, Nancy, France
 27 | 
 28 | 
 29 | The paper is avaiblable at http://orbi.ulg.ac.be/handle/2268/172146.
 30 | 
 31 | Documentation
 32 | -------------
 33 | 
 34 | The documentation is available at http://arjoly.github.io/random-output-trees/
 35 | 
 36 | 
 37 | Dependencies
 38 | ------------
 39 | 
 40 | The required dependencies to build the software are Python >= 2.7,
 41 | NumPy >= 1.6.2, SciPy >= 0.9, scikit-learn>=0.15.2 and a working C/C++
 42 | compiler.
 43 | 
 44 | For running the examples Matplotlib >= 1.1.1 is required and for running the
 45 | tests you need nose >= 1.1.2.
 46 | 
 47 | For making the documentation, Sphinx==1.2.2 and sphinx-bootstrap-theme==0.4.0
 48 | are needed.
 49 | 
 50 | 
 51 | Install
 52 | -------
 53 | 
 54 | This package uses distutils, which is the default way of installing
 55 | python modules. To install in your home directory, use::
 56 | 
 57 |   python setup.py install --user
 58 | 
 59 | To install for all users on Unix/Linux::
 60 | 
 61 |   python setup.py build
 62 |   sudo python setup.py install
 63 | 
 64 | 
 65 | Development
 66 | -----------
 67 | 
 68 | You can check the latest sources with the command::
 69 | 
 70 |     git clone https://github.com/arjoly/random-output-trees
 71 | 
 72 | or if you have write privileges::
 73 | 
 74 |     git@github.com:arjoly/random-output-trees.git
 75 | 
 76 | After installation, you can launch the test suite from outside the
 77 | source directory (you will need to have the ``nose`` package installed)::
 78 | 
 79 |    $ nosetests -v random_output_trees
 80 | 
 81 | 
 82 | Licenses
 83 | --------
 84 | 
 85 | Copyright (c) 2014, Arnaud Joly. All rights reserved.
 86 | 
 87 | Redistribution and use in source and binary forms, with or without
 88 | modification, are permitted provided that the following conditions are met:
 89 | 
 90 |     1. Redistributions of source code must retain the above copyright notice,
 91 |        this list of conditions and the following disclaimer.
 92 | 
 93 |     2. Redistributions in binary form must reproduce the above copyright
 94 |        notice, this list of conditions and the following disclaimer in the
 95 |        documentation and/or other materials provided with the distribution.
 96 | 
 97 |     3. Neither the name of the copyright holder nor the names of its
 98 |        contributors may be used to endorse or promote products derived from
 99 |        this software without specific prior written permission.
100 | 
101 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
102 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
103 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
104 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
105 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
106 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
107 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
108 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
109 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
110 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
111 | POSSIBILITY OF SUCH DAMAGE.
112 | 


--------------------------------------------------------------------------------
/continuous_integration/install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # This script is meant to be called by the "install" step defined in
 3 | # .travis.yml. See http://docs.travis-ci.com/ for more details.
 4 | # The behavior of the script is controlled by environment variabled defined
 5 | # in the .travis.yml in the top level folder of the project.
 6 | 
 7 | # License: 3-clause BSD
 8 | 
 9 | # This file is originally from the scikit-learn project
10 | 
11 | set -e
12 | 
13 | # Fix the compilers to workaround avoid having the Python 3.4 build
14 | # lookup for g++44 unexpectedly.
15 | export CC=gcc
16 | export CXX=g++
17 | 
18 | sudo apt-get update -qq
19 | 
20 | # Deactivate the travis-provided virtual environment and setup a
21 | # conda-based environment instead
22 | deactivate
23 | 
24 | # Use the miniconda installer for faster download / install of conda
25 | # itself
26 | wget http://repo.continuum.io/miniconda/Miniconda-3.6.0-Linux-x86_64.sh \
27 |     -O miniconda.sh
28 | chmod +x miniconda.sh && ./miniconda.sh -b
29 | export PATH=/home/travis/miniconda/bin:$PATH
30 | conda update --yes conda
31 | 
32 | # Configure the conda environment and put it in the path using the
33 | # provided versions
34 | conda create -n testenv --yes python=$PYTHON_VERSION pip nose \
35 |     numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION
36 | source activate testenv
37 | 
38 | python --version
39 | python -c "import numpy; print('numpy %s' % numpy.__version__)"
40 | python -c "import scipy; print('scipy %s' % scipy.__version__)"
41 | 
42 | pip install scikit-learn
43 | 
44 | python -c "import sklearn; print('sklearn %s' % sklearn.__version__)"
45 | python setup.py build_ext --inplace
46 | 


--------------------------------------------------------------------------------
/continuous_integration/test_script.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # This script is meant to be called by the "script" step defined in
 3 | # .travis.yml. See http://docs.travis-ci.com/ for more details.
 4 | # The behavior of the script is controlled by environment variabled defined
 5 | # in the .travis.yml in the top level folder of the project.
 6 | 
 7 | # License: 3-clause BSD
 8 | 
 9 | # This file is originally from the scikit-learn project
10 | 
11 | set -e
12 | 
13 | python --version
14 | python -c "import numpy; print('numpy %s' % numpy.__version__)"
15 | python -c "import scipy; print('scipy %s' % scipy.__version__)"
16 | python -c "import sklearn; print('sklearn %s' % sklearn.__version__)"
17 | 
18 | # Do not use "make test" or "make test-coverage" as they enable verbose mode
19 | # which renders travis output too slow to display in a browser.
20 | if [[ "$COVERAGE" == "true" ]]; then
21 |     nosetests -s --with-coverage random_output_trees
22 | else
23 |     nosetests -s random_output_trees
24 | fi
25 | 
26 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 
 49 | clean:
 50 | 	rm -rf $(BUILDDIR)/*
 51 | 
 52 | html:
 53 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 54 | 	@echo
 55 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 56 | 
 57 | dirhtml:
 58 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 59 | 	@echo
 60 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 61 | 
 62 | singlehtml:
 63 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 64 | 	@echo
 65 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 66 | 
 67 | pickle:
 68 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 69 | 	@echo
 70 | 	@echo "Build finished; now you can process the pickle files."
 71 | 
 72 | json:
 73 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 74 | 	@echo
 75 | 	@echo "Build finished; now you can process the JSON files."
 76 | 
 77 | htmlhelp:
 78 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 79 | 	@echo
 80 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 81 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 82 | 
 83 | qthelp:
 84 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 85 | 	@echo
 86 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 87 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 88 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/random_output_trees.qhcp"
 89 | 	@echo "To view the help file:"
 90 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/random_output_trees.qhc"
 91 | 
 92 | devhelp:
 93 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 94 | 	@echo
 95 | 	@echo "Build finished."
 96 | 	@echo "To view the help file:"
 97 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/random_output_trees"
 98 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/random_output_trees"
 99 | 	@echo "# devhelp"
100 | 
101 | epub:
102 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
103 | 	@echo
104 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
105 | 
106 | latex:
107 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
108 | 	@echo
109 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
110 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
111 | 	      "(use \`make latexpdf' here to do that automatically)."
112 | 
113 | latexpdf:
114 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
115 | 	@echo "Running LaTeX files through pdflatex..."
116 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
117 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
118 | 
119 | latexpdfja:
120 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
121 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
122 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
123 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
124 | 
125 | text:
126 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
127 | 	@echo
128 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
129 | 
130 | man:
131 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
132 | 	@echo
133 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
134 | 
135 | texinfo:
136 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
137 | 	@echo
138 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
139 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
140 | 	      "(use \`make info' here to do that automatically)."
141 | 
142 | info:
143 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
144 | 	@echo "Running Texinfo files through makeinfo..."
145 | 	make -C $(BUILDDIR)/texinfo info
146 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
147 | 
148 | gettext:
149 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
150 | 	@echo
151 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
152 | 
153 | changes:
154 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
155 | 	@echo
156 | 	@echo "The overview file is in $(BUILDDIR)/changes."
157 | 
158 | linkcheck:
159 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
160 | 	@echo
161 | 	@echo "Link check complete; look for any errors in the above output " \
162 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
163 | 
164 | doctest:
165 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
166 | 	@echo "Testing of doctests in the sources finished, look at the " \
167 | 	      "results in $(BUILDDIR)/doctest/output.txt."
168 | 
169 | xml:
170 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
171 | 	@echo
172 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
173 | 
174 | pseudoxml:
175 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
176 | 	@echo
177 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
178 | 


--------------------------------------------------------------------------------
/doc/_templates/class.rst:
--------------------------------------------------------------------------------
 1 | {{ fullname }}
 2 | {{ underline }}
 3 | 
 4 | .. currentmodule:: {{ module }}
 5 | 
 6 | .. autoclass:: {{ objname }}
 7 | 
 8 |    {% block methods %}
 9 |    .. automethod:: __init__
10 |    {% endblock %}
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/doc/_templates/class_with_call.rst:
--------------------------------------------------------------------------------
 1 | {{ fullname }}
 2 | {{ underline }}
 3 | 
 4 | .. currentmodule:: {{ module }}
 5 | 
 6 | .. autoclass:: {{ objname }}
 7 | 
 8 |    {% block methods %}
 9 |    .. automethod:: __init__
10 |    .. automethod:: __call__
11 |    {% endblock %}
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/doc/_templates/function.rst:
--------------------------------------------------------------------------------
1 | {{ fullname }}
2 | {{ underline }}
3 | 
4 | .. currentmodule:: {{ module }}
5 | 
6 | .. autofunction:: {{ objname }}
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/doc/_templates/layout.html:
--------------------------------------------------------------------------------
 1 | {# Import the theme's layout. #}
 2 | {% extends "!layout.html" %}
 3 | 
 4 | {# remove site and page menus #}
 5 | {%- block sidebartoc %}
 6 | {% endblock %}
 7 | {%- block sidebarrel %}
 8 | {% endblock %}
 9 | 
10 | {%- block navbartoc %}
11 | {% endblock %}
12 | 
13 | {# Include our new CSS file into existing ones. #}
14 | {% set css_files = css_files + ['_static/bootstrap.min.css']%}
15 | 
16 | {%- block content %}
17 | {{ navBar() }}
18 | <div class="container content-container">
19 |   {% block body %}{% endblock %}
20 | </div>
21 | 
22 | {%- endblock %}
23 | 
24 | 


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # random_output_trees documentation build configuration file, created by
  4 | # sphinx-quickstart on Wed Aug 20 10:22:49 2014.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import sys
 16 | import os
 17 | 
 18 | # If extensions (or modules to document with autodoc) are in another directory,
 19 | # add these directories to sys.path here. If the directory is relative to the
 20 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 21 | # sys.path.insert(0, os.path.abspath('.'))
 22 | sys.path.append(os.path.abspath('sphinxext'))
 23 | 
 24 | import sphinx_bootstrap_theme
 25 | 
 26 | # Try to override the matplotlib configuration as early as possible
 27 | try:
 28 |     import gen_rst
 29 | except:
 30 |     pass
 31 | 
 32 | # -- General configuration ------------------------------------------------
 33 | 
 34 | # If your documentation needs a minimal Sphinx version, state it here.
 35 | #needs_sphinx = '1.0'
 36 | 
 37 | # Add any Sphinx extension module names here, as strings. They can be
 38 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 39 | # ones.
 40 | extensions = [
 41 |     'gen_rst',
 42 |     'sphinx.ext.autodoc',
 43 |     'sphinx.ext.autosummary',
 44 |     'sphinx.ext.doctest',
 45 |     'sphinx.ext.mathjax',
 46 |     # 'sphinx.ext.viewcode',
 47 |     'numpy_ext.numpydoc',
 48 | ]
 49 | 
 50 | # Generate autosummary even if no references
 51 | autosummary_generate = True
 52 | autodoc_default_flags = ['members', 'inherited-members']
 53 | 
 54 | # Add any paths that contain templates here, relative to this directory.
 55 | templates_path = ['_templates']
 56 | 
 57 | # The suffix of source filenames.
 58 | source_suffix = '.rst'
 59 | 
 60 | # The encoding of source files.
 61 | #source_encoding = 'utf-8-sig'
 62 | 
 63 | 
 64 | # Generate the plots for the gallery
 65 | plot_gallery = True
 66 | 
 67 | # The master toctree document.
 68 | master_doc = 'index'
 69 | 
 70 | # General information about the project.
 71 | project = u'random_output_trees'
 72 | copyright = u'2014, Arnaud Joly'
 73 | 
 74 | # The version info for the project you're documenting, acts as replacement for
 75 | # |version| and |release|, also used in various other places throughout the
 76 | # built documents.
 77 | #
 78 | # The short X.Y version.
 79 | version = 'dev'
 80 | # The full version, including alpha/beta/rc tags.
 81 | release = 'dev'
 82 | 
 83 | # The language for content autogenerated by Sphinx. Refer to documentation
 84 | # for a list of supported languages.
 85 | #language = None
 86 | 
 87 | # There are two options for replacing |today|: either, you set today to some
 88 | # non-false value, then it is used:
 89 | #today = ''
 90 | # Else, today_fmt is used as the format for a strftime call.
 91 | #today_fmt = '%B %d, %Y'
 92 | 
 93 | # List of patterns, relative to source directory, that match files and
 94 | # directories to ignore when looking for source files.
 95 | exclude_patterns = ['_build']
 96 | 
 97 | # The reST default role (used for this markup: `text`) to use for all
 98 | # documents.
 99 | #default_role = None
100 | 
101 | # If true, '()' will be appended to :func: etc. cross-reference text.
102 | #add_function_parentheses = True
103 | 
104 | # If true, the current module name will be prepended to all description
105 | # unit titles (such as .. function::).
106 | #add_module_names = True
107 | 
108 | # If true, sectionauthor and moduleauthor directives will be shown in the
109 | # output. They are ignored by default.
110 | #show_authors = False
111 | 
112 | # The name of the Pygments (syntax highlighting) style to use.
113 | pygments_style = 'sphinx'
114 | 
115 | # A list of ignored prefixes for module index sorting.
116 | #modindex_common_prefix = []
117 | 
118 | # If true, keep warnings as "system message" paragraphs in the built documents.
119 | #keep_warnings = False
120 | 
121 | 
122 | # -- Options for HTML output ----------------------------------------------
123 | 
124 | # The theme to use for HTML and HTML Help pages.  See the documentation for
125 | # a list of builtin themes.
126 | html_theme = 'bootstrap'
127 | 
128 | # Theme options are theme-specific and customize the look and feel of a theme
129 | # further.  For a list of options available for each theme, see the
130 | # documentation.
131 | html_theme_path = sphinx_bootstrap_theme.get_html_theme_path()
132 | 
133 | # Add any paths that contain custom themes here, relative to this directory.
134 | #html_theme_path = []
135 | 
136 | # The name for this set of Sphinx documents.  If None, it defaults to
137 | # "<project> v<release> documentation".
138 | #html_title = None
139 | 
140 | # A shorter title for the navigation bar.  Default is the same as html_title.
141 | #html_short_title = None
142 | 
143 | # The name of an image file (relative to this directory) to place at the top
144 | # of the sidebar.
145 | #html_logo = None
146 | 
147 | # The name of an image file (within the static path) to use as favicon of the
148 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
149 | # pixels large.
150 | #html_favicon = None
151 | 
152 | # Add any paths that contain custom static files (such as style sheets) here,
153 | # relative to this directory. They are copied after the builtin static files,
154 | # so a file named "default.css" will overwrite the builtin "default.css".
155 | html_static_path = ['_static']
156 | 
157 | # Add any extra paths that contain custom files (such as robots.txt or
158 | # .htaccess) here, relative to this directory. These files are copied
159 | # directly to the root of the documentation.
160 | #html_extra_path = []
161 | 
162 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
163 | # using the given strftime format.
164 | #html_last_updated_fmt = '%b %d, %Y'
165 | 
166 | # If true, SmartyPants will be used to convert quotes and dashes to
167 | # typographically correct entities.
168 | #html_use_smartypants = True
169 | 
170 | # Custom sidebar templates, maps document names to template names.
171 | #html_sidebars = {}
172 | 
173 | # Additional templates that should be rendered to pages, maps page names to
174 | # template names.
175 | #html_additional_pages = {}
176 | 
177 | # If false, no module index is generated.
178 | #html_domain_indices = True
179 | 
180 | # If false, no index is generated.
181 | #html_use_index = True
182 | 
183 | # If true, the index is split into individual pages for each letter.
184 | #html_split_index = False
185 | 
186 | # If true, links to the reST sources are added to the pages.
187 | #html_show_sourcelink = True
188 | 
189 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
190 | html_show_sphinx = False
191 | 
192 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
193 | #html_show_copyright = True
194 | 
195 | # If true, an OpenSearch description file will be output, and all pages will
196 | # contain a <link> tag referring to it.  The value of this option must be the
197 | # base URL from which the finished HTML is served.
198 | #html_use_opensearch = ''
199 | 
200 | # This is the file name suffix for HTML files (e.g. ".xhtml").
201 | #html_file_suffix = None
202 | 
203 | # Output file base name for HTML help builder.
204 | htmlhelp_basename = 'random_output_treesdoc'
205 | 
206 | # Theme options are theme-specific and customize the look and feel of a
207 | # theme further.
208 | html_theme_options = {
209 |     # Navigation bar title. (Default: ``project`` value)
210 |     'navbar_title': "Randommized output forest",
211 | 
212 |     # Tab name for entire site. (Default: "Site")
213 |     # 'navbar_site_name': "Site",
214 | 
215 |     # A list of tuples containing pages or urls to link to.
216 |     # Valid tuples should be in the following forms:
217 |     #    (name, page)                 # a link to a page
218 |     #    (name, "/aa/bb", 1)          # a link to an arbitrary relative url
219 |     #    (name, "http://example.com", True) # arbitrary absolute url
220 |     # Note the "1" or "True" value above as the third argument to indicate
221 |     # an arbitrary url.
222 |     'navbar_links': [
223 |         ("References", "references"),
224 |         ("Examples", "auto_examples/index"),
225 |     ],
226 | 
227 |     # Render the next and previous page links in navbar. (Default: true)
228 |     'navbar_sidebarrel': False,
229 | 
230 |     # Render the current pages TOC in the navbar. (Default: true)
231 |     'navbar_pagenav': False,
232 | 
233 |     # Global TOC depth for "site" navbar tab. (Default: 1)
234 |     # Switching to -1 shows all levels.
235 |     'globaltoc_depth': 0,
236 | 
237 |     # Include hidden TOCs in Site navbar?
238 |     #
239 |     # Note: If this is "false", you cannot have mixed ``:hidden:`` and
240 |     # non-hidden ``toctree`` directives in the same page, or else the build
241 |     # will break.
242 |     #
243 |     # Values: "true" (default) or "false"
244 |     'globaltoc_includehidden': "false",
245 | 
246 |     # HTML navbar class (Default: "navbar") to attach to <div> element.
247 |     # For black navbar, do "navbar navbar-inverse"
248 |     'navbar_class': "navbar",
249 | 
250 |     # Fix navigation bar to top of page?
251 |     # Values: "true" (default) or "false"
252 |     'navbar_fixed_top': "true",
253 | 
254 |     # Location of link to source.
255 |     # Options are "nav" (default), "footer" or anything else to exclude.
256 |     'source_link_position': "None",
257 | 
258 |     # Bootswatch (http://bootswatch.com/) theme.
259 |     #
260 |     # Options are nothing with "" (default) or the name of a valid theme
261 |     # such as "amelia" or "cosmo".
262 |     'bootswatch_theme': "lumen",
263 | 
264 |     # Choose Bootstrap version.
265 |     # Values: "3" (default) or "2" (in quotes)
266 |     'bootstrap_version': "3",
267 | }
268 | 
269 | 
270 | # -- Options for LaTeX output ---------------------------------------------
271 | 
272 | latex_elements = {
273 | # The paper size ('letterpaper' or 'a4paper').
274 | #'papersize': 'letterpaper',
275 | 
276 | # The font size ('10pt', '11pt' or '12pt').
277 | #'pointsize': '10pt',
278 | 
279 | # Additional stuff for the LaTeX preamble.
280 | #'preamble': '',
281 | }
282 | 
283 | # Grouping the document tree into LaTeX files. List of tuples
284 | # (source start file, target name, title,
285 | #  author, documentclass [howto, manual, or own class]).
286 | latex_documents = [
287 |   ('index', 'random_output_trees.tex', u'randomized\\_output\\_forest Documentation',
288 |    u'Arnaud Joly', 'manual'),
289 | ]
290 | 
291 | # The name of an image file (relative to this directory) to place at the top of
292 | # the title page.
293 | #latex_logo = None
294 | 
295 | # For "manual" documents, if this is true, then toplevel headings are parts,
296 | # not chapters.
297 | #latex_use_parts = False
298 | 
299 | # If true, show page references after internal links.
300 | #latex_show_pagerefs = False
301 | 
302 | # If true, show URL addresses after external links.
303 | #latex_show_urls = False
304 | 
305 | # Documents to append as an appendix to all manuals.
306 | #latex_appendices = []
307 | 
308 | # If false, no module index is generated.
309 | #latex_domain_indices = True
310 | 
311 | 
312 | # -- Options for manual page output ---------------------------------------
313 | 
314 | # One entry per manual page. List of tuples
315 | # (source start file, name, description, authors, manual section).
316 | man_pages = [
317 |     ('index', 'random_output_trees', u'random_output_trees Documentation',
318 |      [u'Arnaud Joly'], 1)
319 | ]
320 | 
321 | # If true, show URL addresses after external links.
322 | #man_show_urls = False
323 | 
324 | 
325 | # -- Options for Texinfo output -------------------------------------------
326 | 
327 | # Grouping the document tree into Texinfo files. List of tuples
328 | # (source start file, target name, title, author,
329 | #  dir menu entry, description, category)
330 | texinfo_documents = [
331 |   ('index', 'random_output_trees', u'random_output_trees Documentation',
332 |    u'Arnaud Joly', 'random_output_trees', 'One line description of project.',
333 |    'Miscellaneous'),
334 | ]
335 | 
336 | # Documents to append as an appendix to all manuals.
337 | #texinfo_appendices = []
338 | 
339 | # If false, no module index is generated.
340 | #texinfo_domain_indices = True
341 | 
342 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
343 | #texinfo_show_urls = 'footnote'
344 | 
345 | # If true, do not generate a @detailmenu in the "Top" node's menu.
346 | #texinfo_no_detailmenu = False
347 | 


--------------------------------------------------------------------------------
/doc/images/no_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arjoly/random-output-trees/4251a3ab99cf7b893b7dcb47b62be94ed74c1ab9/doc/images/no_image.png


--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../README.rst
2 | 
3 | .. toctree::
4 |     :hidden:
5 | 
6 |     auto_examples/index.rst
7 |     references.rst
8 | 


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  xml        to make Docutils-native XML files
 37 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 38 | 	echo.  linkcheck  to check all external links for integrity
 39 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 40 | 	goto end
 41 | )
 42 | 
 43 | if "%1" == "clean" (
 44 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 45 | 	del /q /s %BUILDDIR%\*
 46 | 	goto end
 47 | )
 48 | 
 49 | 
 50 | %SPHINXBUILD% 2> nul
 51 | if errorlevel 9009 (
 52 | 	echo.
 53 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 54 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 55 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 56 | 	echo.may add the Sphinx directory to PATH.
 57 | 	echo.
 58 | 	echo.If you don't have Sphinx installed, grab it from
 59 | 	echo.http://sphinx-doc.org/
 60 | 	exit /b 1
 61 | )
 62 | 
 63 | if "%1" == "html" (
 64 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "dirhtml" (
 72 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "singlehtml" (
 80 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "pickle" (
 88 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can process the pickle files.
 92 | 	goto end
 93 | )
 94 | 
 95 | if "%1" == "json" (
 96 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 97 | 	if errorlevel 1 exit /b 1
 98 | 	echo.
 99 | 	echo.Build finished; now you can process the JSON files.
100 | 	goto end
101 | )
102 | 
103 | if "%1" == "htmlhelp" (
104 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
105 | 	if errorlevel 1 exit /b 1
106 | 	echo.
107 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
108 | .hhp project file in %BUILDDIR%/htmlhelp.
109 | 	goto end
110 | )
111 | 
112 | if "%1" == "qthelp" (
113 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
114 | 	if errorlevel 1 exit /b 1
115 | 	echo.
116 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
117 | .qhcp project file in %BUILDDIR%/qthelp, like this:
118 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\random_output_trees.qhcp
119 | 	echo.To view the help file:
120 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\random_output_trees.ghc
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "devhelp" (
125 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished.
129 | 	goto end
130 | )
131 | 
132 | if "%1" == "epub" (
133 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
134 | 	if errorlevel 1 exit /b 1
135 | 	echo.
136 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
137 | 	goto end
138 | )
139 | 
140 | if "%1" == "latex" (
141 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
142 | 	if errorlevel 1 exit /b 1
143 | 	echo.
144 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
145 | 	goto end
146 | )
147 | 
148 | if "%1" == "latexpdf" (
149 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
150 | 	cd %BUILDDIR%/latex
151 | 	make all-pdf
152 | 	cd %BUILDDIR%/..
153 | 	echo.
154 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
155 | 	goto end
156 | )
157 | 
158 | if "%1" == "latexpdfja" (
159 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
160 | 	cd %BUILDDIR%/latex
161 | 	make all-pdf-ja
162 | 	cd %BUILDDIR%/..
163 | 	echo.
164 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
165 | 	goto end
166 | )
167 | 
168 | if "%1" == "text" (
169 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
170 | 	if errorlevel 1 exit /b 1
171 | 	echo.
172 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
173 | 	goto end
174 | )
175 | 
176 | if "%1" == "man" (
177 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
178 | 	if errorlevel 1 exit /b 1
179 | 	echo.
180 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
181 | 	goto end
182 | )
183 | 
184 | if "%1" == "texinfo" (
185 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
186 | 	if errorlevel 1 exit /b 1
187 | 	echo.
188 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
189 | 	goto end
190 | )
191 | 
192 | if "%1" == "gettext" (
193 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
194 | 	if errorlevel 1 exit /b 1
195 | 	echo.
196 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
197 | 	goto end
198 | )
199 | 
200 | if "%1" == "changes" (
201 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
202 | 	if errorlevel 1 exit /b 1
203 | 	echo.
204 | 	echo.The overview file is in %BUILDDIR%/changes.
205 | 	goto end
206 | )
207 | 
208 | if "%1" == "linkcheck" (
209 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
210 | 	if errorlevel 1 exit /b 1
211 | 	echo.
212 | 	echo.Link check complete; look for any errors in the above output ^
213 | or in %BUILDDIR%/linkcheck/output.txt.
214 | 	goto end
215 | )
216 | 
217 | if "%1" == "doctest" (
218 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
219 | 	if errorlevel 1 exit /b 1
220 | 	echo.
221 | 	echo.Testing of doctests in the sources finished, look at the ^
222 | results in %BUILDDIR%/doctest/output.txt.
223 | 	goto end
224 | )
225 | 
226 | if "%1" == "xml" (
227 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
228 | 	if errorlevel 1 exit /b 1
229 | 	echo.
230 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
231 | 	goto end
232 | )
233 | 
234 | if "%1" == "pseudoxml" (
235 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
236 | 	if errorlevel 1 exit /b 1
237 | 	echo.
238 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
239 | 	goto end
240 | )
241 | 
242 | :end
243 | 


--------------------------------------------------------------------------------
/doc/references.rst:
--------------------------------------------------------------------------------
 1 | ==========
 2 | References
 3 | ==========
 4 | 
 5 | This is the class and function reference of the package.
 6 | 
 7 | 
 8 | :mod:`random_output_trees.ensemble`: Ensemble
 9 | ---------------------------------------------
10 | 
11 | .. automodule:: random_output_trees.ensemble
12 |    :no-members:
13 |    :no-inherited-members:
14 | 
15 | .. currentmodule:: random_output_trees
16 | 
17 | .. autosummary::
18 |    :toctree: generated/
19 |    :template: class.rst
20 | 
21 |    ensemble.ExtraTreesClassifier
22 |    ensemble.ExtraTreesRegressor
23 |    ensemble.LazyBaggingClassifier
24 |    ensemble.LazyBaggingRegressor
25 |    ensemble.RandomForestClassifier
26 |    ensemble.RandomForestRegressor
27 | 
28 | :mod:`random_output_trees.datasets`: Datasets
29 | ---------------------------------------------
30 | 
31 | .. automodule:: random_output_trees.datasets
32 |    :no-members:
33 |    :no-inherited-members:
34 | 
35 | .. currentmodule:: random_output_trees
36 | 
37 | .. autosummary::
38 |    :toctree: generated/
39 |    :template: function.rst
40 | 
41 |    datasets.fetch_drug_interaction
42 |    datasets.fetch_protein_interaction
43 | 
44 | 
45 | :mod:`random_output_trees.random_projection`: Random projection
46 | ---------------------------------------------------------------
47 | 
48 | .. automodule:: random_output_trees.random_projection
49 |    :no-members:
50 |    :no-inherited-members:
51 | 
52 | .. currentmodule:: random_output_trees
53 | 
54 | .. autosummary::
55 |    :toctree: generated/
56 |    :template: class.rst
57 | 
58 |    random_projection.RademacherRandomProjection
59 |    random_projection.AchlioptasRandomProjection
60 |    random_projection.SampledHadamardProjection
61 |    random_projection.SampledIdentityProjection
62 | 
63 | 
64 | :mod:`random_output_trees.transformer`: Transformer
65 | ---------------------------------------------------
66 | 
67 | .. automodule:: random_output_trees.transformer
68 |    :no-members:
69 |    :no-inherited-members:
70 | 
71 | .. currentmodule:: random_output_trees
72 | 
73 | .. autosummary::
74 |    :toctree: generated/
75 |    :template: class.rst
76 | 
77 |    transformer.FixedStateTransformer
78 | 
79 | 
80 | :mod:`random_output_trees.tree`: Tree
81 | -------------------------------------
82 | 
83 | 
84 | .. automodule:: random_output_trees.tree
85 |    :no-members:
86 |    :no-inherited-members:
87 | 
88 | .. currentmodule:: random_output_trees
89 | 
90 | .. autosummary::
91 |    :toctree: generated/
92 |    :template: class.rst
93 | 
94 |    tree.DecisionTreeClassifier
95 |    tree.DecisionTreeRegressor
96 | 


--------------------------------------------------------------------------------
/doc/sphinxext/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | -------------------------------------------------------------------------------
 2 |     The files
 3 |     - numpydoc.py
 4 |     - autosummary.py
 5 |     - autosummary_generate.py
 6 |     - docscrape.py
 7 |     - docscrape_sphinx.py
 8 |     - phantom_import.py
 9 |     have the following license:
10 | 
11 | Copyright (C) 2008 Stefan van der Walt <stefan@mentat.za.net>, Pauli Virtanen <pav@iki.fi>
12 | 
13 | Redistribution and use in source and binary forms, with or without
14 | modification, are permitted provided that the following conditions are
15 | met:
16 | 
17 |  1. Redistributions of source code must retain the above copyright
18 |     notice, this list of conditions and the following disclaimer.
19 |  2. Redistributions in binary form must reproduce the above copyright
20 |     notice, this list of conditions and the following disclaimer in
21 |     the documentation and/or other materials provided with the
22 |     distribution.
23 | 
24 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
26 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27 | DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
28 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 | IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 | POSSIBILITY OF SUCH DAMAGE.
35 | 
36 | -------------------------------------------------------------------------------
37 |     The files
38 |     - compiler_unparse.py
39 |     - comment_eater.py
40 |     - traitsdoc.py
41 |     have the following license:
42 | 
43 | This software is OSI Certified Open Source Software.
44 | OSI Certified is a certification mark of the Open Source Initiative.
45 | 
46 | Copyright (c) 2006, Enthought, Inc.
47 | All rights reserved.
48 | 
49 | Redistribution and use in source and binary forms, with or without
50 | modification, are permitted provided that the following conditions are met:
51 | 
52 |  * Redistributions of source code must retain the above copyright notice, this
53 |    list of conditions and the following disclaimer.
54 |  * Redistributions in binary form must reproduce the above copyright notice,
55 |    this list of conditions and the following disclaimer in the documentation
56 |    and/or other materials provided with the distribution.
57 |  * Neither the name of Enthought, Inc. nor the names of its contributors may
58 |    be used to endorse or promote products derived from this software without
59 |    specific prior written permission.
60 | 
61 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
62 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
63 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
64 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
65 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
66 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
67 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
68 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
69 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
70 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
71 | 
72 | 
73 | -------------------------------------------------------------------------------
74 |     The files
75 |     - only_directives.py
76 |     - plot_directive.py
77 |     originate from Matplotlib (http://matplotlib.sf.net/) which has
78 |     the following license:
79 | 
80 | Copyright (c) 2002-2008 John D. Hunter; All Rights Reserved.
81 | 
82 | 1. This LICENSE AGREEMENT is between John D. Hunter (“JDH”), and the Individual or Organization (“Licensee”) accessing and otherwise using matplotlib software in source or binary form and its associated documentation.
83 | 
84 | 2. Subject to the terms and conditions of this License Agreement, JDH hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use matplotlib 0.98.3 alone or in any derivative version, provided, however, that JDH’s License Agreement and JDH’s notice of copyright, i.e., “Copyright (c) 2002-2008 John D. Hunter; All Rights Reserved” are retained in matplotlib 0.98.3 alone or in any derivative version prepared by Licensee.
85 | 
86 | 3. In the event Licensee prepares a derivative work that is based on or incorporates matplotlib 0.98.3 or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to matplotlib 0.98.3.
87 | 
88 | 4. JDH is making matplotlib 0.98.3 available to Licensee on an “AS IS” basis. JDH MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, JDH MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB 0.98.3 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
89 | 
90 | 5. JDH SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB 0.98.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING MATPLOTLIB 0.98.3, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
91 | 
92 | 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions.
93 | 
94 | 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between JDH and Licensee. This License Agreement does not grant permission to use JDH trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party.
95 | 
96 | 8. By copying, installing or otherwise using matplotlib 0.98.3, Licensee agrees to be bound by the terms and conditions of this License Agreement.
97 | 
98 | 


--------------------------------------------------------------------------------
/doc/sphinxext/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include tests *.py
2 | include *.txt
3 | 


--------------------------------------------------------------------------------
/doc/sphinxext/README.txt:
--------------------------------------------------------------------------------
 1 | =====================================
 2 | numpydoc -- Numpy's Sphinx extensions
 3 | =====================================
 4 | 
 5 | Numpy's documentation uses several custom extensions to Sphinx.  These
 6 | are shipped in this ``numpydoc`` package, in case you want to make use
 7 | of them in third-party projects.
 8 | 
 9 | The following extensions are available:
10 | 
11 |   - ``numpydoc``: support for the Numpy docstring format in Sphinx, and add
12 |     the code description directives ``np-function``, ``np-cfunction``, etc.
13 |     that support the Numpy docstring syntax.
14 | 
15 |   - ``numpydoc.traitsdoc``: For gathering documentation about Traits attributes.
16 | 
17 |   - ``numpydoc.plot_directives``: Adaptation of Matplotlib's ``plot::``
18 |     directive. Note that this implementation may still undergo severe
19 |     changes or eventually be deprecated.
20 | 
21 |   - ``numpydoc.only_directives``: (DEPRECATED)
22 | 
23 |   - ``numpydoc.autosummary``: (DEPRECATED) An ``autosummary::`` directive.
24 |     Available in Sphinx 0.6.2 and (to-be) 1.0 as ``sphinx.ext.autosummary``,
25 |     and it the Sphinx 1.0 version is recommended over that included in
26 |     Numpydoc.
27 | 
28 | 
29 | numpydoc
30 | ========
31 | 
32 | Numpydoc inserts a hook into Sphinx's autodoc that converts docstrings
33 | following the Numpy/Scipy format to a form palatable to Sphinx.
34 | 
35 | Options
36 | -------
37 | 
38 | The following options can be set in conf.py:
39 | 
40 | - numpydoc_use_plots: bool
41 | 
42 |   Whether to produce ``plot::`` directives for Examples sections that
43 |   contain ``import matplotlib``.
44 | 
45 | - numpydoc_show_class_members: bool
46 | 
47 |   Whether to show all members of a class in the Methods and Attributes
48 |   sections automatically.
49 | 
50 | - numpydoc_edit_link: bool  (DEPRECATED -- edit your HTML template instead)
51 | 
52 |   Whether to insert an edit link after docstrings.
53 | 


--------------------------------------------------------------------------------
/doc/sphinxext/install_sphinx_bootstrap_theme.sh:
--------------------------------------------------------------------------------
 1 | # remove prior version if any
 2 | rm -rf sphinx_bootstrap_theme
 3 | 
 4 | # Download and untar
 5 | wget https://pypi.python.org/packages/source/s/sphinx-bootstrap-theme/sphinx-bootstrap-theme-0.4.0.tar.gz
 6 | tar -zxf sphinx-bootstrap-theme-0.4.0.tar.gz
 7 | rm sphinx-bootstrap-theme-0.4.0.tar.gz
 8 | 
 9 | # Move everything to sphinx_bootstrap_theme
10 | mv sphinx-bootstrap-theme-0.4.0/sphinx_bootstrap_theme .
11 | mv sphinx-bootstrap-theme-0.4.0/*.txt sphinx_bootstrap_theme
12 | mv sphinx-bootstrap-theme-0.4.0/*.in sphinx_bootstrap_theme
13 | 
14 | # Clean theme that we don't want
15 |  # rm -rf -ignore myfile.txt *
16 | rm -rf sphinx_bootstrap_theme/bootstrap/static/bootstrap-2.*
17 | rm -rf sphinx_bootstrap_theme/bootstrap/static/bootswatch-2.*
18 | 
19 | # remove all bootstwatch theme except one
20 | mv sphinx_bootstrap_theme/bootstrap/static/bootswatch-3.1.0/lumen .
21 | rm -rf sphinx_bootstrap_theme/bootstrap/static/bootswatch-3.1.0/
22 | mkdir sphinx_bootstrap_theme/bootstrap/static/bootswatch-3.1.0
23 | mv lumen sphinx_bootstrap_theme/bootstrap/static/bootswatch-3.1.0/
24 | 
25 | 
26 | 
27 | # Clean remaining files
28 | rm -rf sphinx-bootstrap-theme-0.4.0
29 | 


--------------------------------------------------------------------------------
/doc/sphinxext/numpy_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arjoly/random-output-trees/4251a3ab99cf7b893b7dcb47b62be94ed74c1ab9/doc/sphinxext/numpy_ext/__init__.py


--------------------------------------------------------------------------------
/doc/sphinxext/numpy_ext/docscrape.py:
--------------------------------------------------------------------------------
  1 | """Extract reference documentation from the NumPy source tree.
  2 | 
  3 | """
  4 | 
  5 | import inspect
  6 | import textwrap
  7 | import re
  8 | import pydoc
  9 | from warnings import warn
 10 | # Try Python 2 first, otherwise load from Python 3
 11 | try:
 12 |     from StringIO import StringIO
 13 | except:
 14 |     from io import StringIO
 15 | 
 16 | 
 17 | class Reader(object):
 18 |     """A line-based string reader.
 19 | 
 20 |     """
 21 |     def __init__(self, data):
 22 |         """
 23 |         Parameters
 24 |         ----------
 25 |         data : str
 26 |            String with lines separated by '\n'.
 27 | 
 28 |         """
 29 |         if isinstance(data, list):
 30 |             self._str = data
 31 |         else:
 32 |             self._str = data.split('\n')  # store string as list of lines
 33 | 
 34 |         self.reset()
 35 | 
 36 |     def __getitem__(self, n):
 37 |         return self._str[n]
 38 | 
 39 |     def reset(self):
 40 |         self._l = 0  # current line nr
 41 | 
 42 |     def read(self):
 43 |         if not self.eof():
 44 |             out = self[self._l]
 45 |             self._l += 1
 46 |             return out
 47 |         else:
 48 |             return ''
 49 | 
 50 |     def seek_next_non_empty_line(self):
 51 |         for l in self[self._l:]:
 52 |             if l.strip():
 53 |                 break
 54 |             else:
 55 |                 self._l += 1
 56 | 
 57 |     def eof(self):
 58 |         return self._l >= len(self._str)
 59 | 
 60 |     def read_to_condition(self, condition_func):
 61 |         start = self._l
 62 |         for line in self[start:]:
 63 |             if condition_func(line):
 64 |                 return self[start:self._l]
 65 |             self._l += 1
 66 |             if self.eof():
 67 |                 return self[start:self._l + 1]
 68 |         return []
 69 | 
 70 |     def read_to_next_empty_line(self):
 71 |         self.seek_next_non_empty_line()
 72 | 
 73 |         def is_empty(line):
 74 |             return not line.strip()
 75 |         return self.read_to_condition(is_empty)
 76 | 
 77 |     def read_to_next_unindented_line(self):
 78 |         def is_unindented(line):
 79 |             return (line.strip() and (len(line.lstrip()) == len(line)))
 80 |         return self.read_to_condition(is_unindented)
 81 | 
 82 |     def peek(self, n=0):
 83 |         if self._l + n < len(self._str):
 84 |             return self[self._l + n]
 85 |         else:
 86 |             return ''
 87 | 
 88 |     def is_empty(self):
 89 |         return not ''.join(self._str).strip()
 90 | 
 91 | 
 92 | class NumpyDocString(object):
 93 |     def __init__(self, docstring, config={}):
 94 |         docstring = textwrap.dedent(docstring).split('\n')
 95 | 
 96 |         self._doc = Reader(docstring)
 97 |         self._parsed_data = {
 98 |             'Signature': '',
 99 |             'Summary': [''],
100 |             'Extended Summary': [],
101 |             'Parameters': [],
102 |             'Returns': [],
103 |             'Raises': [],
104 |             'Warns': [],
105 |             'Other Parameters': [],
106 |             'Attributes': [],
107 |             'Methods': [],
108 |             'See Also': [],
109 |             'Notes': [],
110 |             'Warnings': [],
111 |             'References': '',
112 |             'Examples': '',
113 |             'index': {}
114 |             }
115 | 
116 |         self._parse()
117 | 
118 |     def __getitem__(self, key):
119 |         return self._parsed_data[key]
120 | 
121 |     def __setitem__(self, key, val):
122 |         if key not in self._parsed_data:
123 |             warn("Unknown section %s" % key)
124 |         else:
125 |             self._parsed_data[key] = val
126 | 
127 |     def _is_at_section(self):
128 |         self._doc.seek_next_non_empty_line()
129 | 
130 |         if self._doc.eof():
131 |             return False
132 | 
133 |         l1 = self._doc.peek().strip()  # e.g. Parameters
134 | 
135 |         if l1.startswith('.. index::'):
136 |             return True
137 | 
138 |         l2 = self._doc.peek(1).strip()   # ---------- or ==========
139 |         return l2.startswith('-' * len(l1)) or l2.startswith('=' * len(l1))
140 | 
141 |     def _strip(self, doc):
142 |         i = 0
143 |         j = 0
144 |         for i, line in enumerate(doc):
145 |             if line.strip():
146 |                 break
147 | 
148 |         for j, line in enumerate(doc[::-1]):
149 |             if line.strip():
150 |                 break
151 | 
152 |         return doc[i:len(doc) - j]
153 | 
154 |     def _read_to_next_section(self):
155 |         section = self._doc.read_to_next_empty_line()
156 | 
157 |         while not self._is_at_section() and not self._doc.eof():
158 |             if not self._doc.peek(-1).strip():  # previous line was empty
159 |                 section += ['']
160 | 
161 |             section += self._doc.read_to_next_empty_line()
162 | 
163 |         return section
164 | 
165 |     def _read_sections(self):
166 |         while not self._doc.eof():
167 |             data = self._read_to_next_section()
168 |             name = data[0].strip()
169 | 
170 |             if name.startswith('..'):  # index section
171 |                 yield name, data[1:]
172 |             elif len(data) < 2:
173 |                 yield StopIteration
174 |             else:
175 |                 yield name, self._strip(data[2:])
176 | 
177 |     def _parse_param_list(self, content):
178 |         r = Reader(content)
179 |         params = []
180 |         while not r.eof():
181 |             header = r.read().strip()
182 |             if ' : ' in header:
183 |                 arg_name, arg_type = header.split(' : ')[:2]
184 |             else:
185 |                 arg_name, arg_type = header, ''
186 | 
187 |             desc = r.read_to_next_unindented_line()
188 |             desc = dedent_lines(desc)
189 | 
190 |             params.append((arg_name, arg_type, desc))
191 | 
192 |         return params
193 | 
194 |     _name_rgx = re.compile(r"^\s*(:(?P<role>\w+):`(?P<name>[a-zA-Z0-9_.-]+)`|"
195 |                            r" (?P<name2>[a-zA-Z0-9_.-]+))\s*", re.X)
196 | 
197 |     def _parse_see_also(self, content):
198 |         """
199 |         func_name : Descriptive text
200 |             continued text
201 |         another_func_name : Descriptive text
202 |         func_name1, func_name2, :meth:`func_name`, func_name3
203 | 
204 |         """
205 |         items = []
206 | 
207 |         def parse_item_name(text):
208 |             """Match ':role:`name`' or 'name'"""
209 |             m = self._name_rgx.match(text)
210 |             if m:
211 |                 g = m.groups()
212 |                 if g[1] is None:
213 |                     return g[3], None
214 |                 else:
215 |                     return g[2], g[1]
216 |             raise ValueError("%s is not a item name" % text)
217 | 
218 |         def push_item(name, rest):
219 |             if not name:
220 |                 return
221 |             name, role = parse_item_name(name)
222 |             items.append((name, list(rest), role))
223 |             del rest[:]
224 | 
225 |         current_func = None
226 |         rest = []
227 | 
228 |         for line in content:
229 |             if not line.strip():
230 |                 continue
231 | 
232 |             m = self._name_rgx.match(line)
233 |             if m and line[m.end():].strip().startswith(':'):
234 |                 push_item(current_func, rest)
235 |                 current_func, line = line[:m.end()], line[m.end():]
236 |                 rest = [line.split(':', 1)[1].strip()]
237 |                 if not rest[0]:
238 |                     rest = []
239 |             elif not line.startswith(' '):
240 |                 push_item(current_func, rest)
241 |                 current_func = None
242 |                 if ',' in line:
243 |                     for func in line.split(','):
244 |                         push_item(func, [])
245 |                 elif line.strip():
246 |                     current_func = line
247 |             elif current_func is not None:
248 |                 rest.append(line.strip())
249 |         push_item(current_func, rest)
250 |         return items
251 | 
252 |     def _parse_index(self, section, content):
253 |         """
254 |         .. index: default
255 |            :refguide: something, else, and more
256 | 
257 |         """
258 |         def strip_each_in(lst):
259 |             return [s.strip() for s in lst]
260 | 
261 |         out = {}
262 |         section = section.split('::')
263 |         if len(section) > 1:
264 |             out['default'] = strip_each_in(section[1].split(','))[0]
265 |         for line in content:
266 |             line = line.split(':')
267 |             if len(line) > 2:
268 |                 out[line[1]] = strip_each_in(line[2].split(','))
269 |         return out
270 | 
271 |     def _parse_summary(self):
272 |         """Grab signature (if given) and summary"""
273 |         if self._is_at_section():
274 |             return
275 | 
276 |         summary = self._doc.read_to_next_empty_line()
277 |         summary_str = " ".join([s.strip() for s in summary]).strip()
278 |         if re.compile('^([\w., ]+=)?\s*[\w\.]+\(.*\)$').match(summary_str):
279 |             self['Signature'] = summary_str
280 |             if not self._is_at_section():
281 |                 self['Summary'] = self._doc.read_to_next_empty_line()
282 |         else:
283 |             self['Summary'] = summary
284 | 
285 |         if not self._is_at_section():
286 |             self['Extended Summary'] = self._read_to_next_section()
287 | 
288 |     def _parse(self):
289 |         self._doc.reset()
290 |         self._parse_summary()
291 | 
292 |         for (section, content) in self._read_sections():
293 |             if not section.startswith('..'):
294 |                 section = ' '.join([s.capitalize()
295 |                                     for s in section.split(' ')])
296 |             if section in ('Parameters', 'Attributes', 'Methods',
297 |                            'Returns', 'Raises', 'Warns'):
298 |                 self[section] = self._parse_param_list(content)
299 |             elif section.startswith('.. index::'):
300 |                 self['index'] = self._parse_index(section, content)
301 |             elif section == 'See Also':
302 |                 self['See Also'] = self._parse_see_also(content)
303 |             else:
304 |                 self[section] = content
305 | 
306 |     # string conversion routines
307 | 
308 |     def _str_header(self, name, symbol='-'):
309 |         return [name, len(name) * symbol]
310 | 
311 |     def _str_indent(self, doc, indent=4):
312 |         out = []
313 |         for line in doc:
314 |             out += [' ' * indent + line]
315 |         return out
316 | 
317 |     def _str_signature(self):
318 |         if self['Signature']:
319 |             return [self['Signature'].replace('*', '\*')] + ['']
320 |         else:
321 |             return ['']
322 | 
323 |     def _str_summary(self):
324 |         if self['Summary']:
325 |             return self['Summary'] + ['']
326 |         else:
327 |             return []
328 | 
329 |     def _str_extended_summary(self):
330 |         if self['Extended Summary']:
331 |             return self['Extended Summary'] + ['']
332 |         else:
333 |             return []
334 | 
335 |     def _str_param_list(self, name):
336 |         out = []
337 |         if self[name]:
338 |             out += self._str_header(name)
339 |             for param, param_type, desc in self[name]:
340 |                 out += ['%s : %s' % (param, param_type)]
341 |                 out += self._str_indent(desc)
342 |             out += ['']
343 |         return out
344 | 
345 |     def _str_section(self, name):
346 |         out = []
347 |         if self[name]:
348 |             out += self._str_header(name)
349 |             out += self[name]
350 |             out += ['']
351 |         return out
352 | 
353 |     def _str_see_also(self, func_role):
354 |         if not self['See Also']:
355 |             return []
356 |         out = []
357 |         out += self._str_header("See Also")
358 |         last_had_desc = True
359 |         for func, desc, role in self['See Also']:
360 |             if role:
361 |                 link = ':%s:`%s`' % (role, func)
362 |             elif func_role:
363 |                 link = ':%s:`%s`' % (func_role, func)
364 |             else:
365 |                 link = "`%s`_" % func
366 |             if desc or last_had_desc:
367 |                 out += ['']
368 |                 out += [link]
369 |             else:
370 |                 out[-1] += ", %s" % link
371 |             if desc:
372 |                 out += self._str_indent([' '.join(desc)])
373 |                 last_had_desc = True
374 |             else:
375 |                 last_had_desc = False
376 |         out += ['']
377 |         return out
378 | 
379 |     def _str_index(self):
380 |         idx = self['index']
381 |         out = []
382 |         out += ['.. index:: %s' % idx.get('default', '')]
383 |         for section, references in idx.iteritems():
384 |             if section == 'default':
385 |                 continue
386 |             out += ['   :%s: %s' % (section, ', '.join(references))]
387 |         return out
388 | 
389 |     def __str__(self, func_role=''):
390 |         out = []
391 |         out += self._str_signature()
392 |         out += self._str_summary()
393 |         out += self._str_extended_summary()
394 |         for param_list in ('Parameters', 'Returns', 'Raises'):
395 |             out += self._str_param_list(param_list)
396 |         out += self._str_section('Warnings')
397 |         out += self._str_see_also(func_role)
398 |         for s in ('Notes', 'References', 'Examples'):
399 |             out += self._str_section(s)
400 |         for param_list in ('Attributes', 'Methods'):
401 |             out += self._str_param_list(param_list)
402 |         out += self._str_index()
403 |         return '\n'.join(out)
404 | 
405 | 
406 | def indent(str, indent=4):
407 |     indent_str = ' ' * indent
408 |     if str is None:
409 |         return indent_str
410 |     lines = str.split('\n')
411 |     return '\n'.join(indent_str + l for l in lines)
412 | 
413 | 
414 | def dedent_lines(lines):
415 |     """Deindent a list of lines maximally"""
416 |     return textwrap.dedent("\n".join(lines)).split("\n")
417 | 
418 | 
419 | def header(text, style='-'):
420 |     return text + '\n' + style * len(text) + '\n'
421 | 
422 | 
423 | class FunctionDoc(NumpyDocString):
424 |     def __init__(self, func, role='func', doc=None, config={}):
425 |         self._f = func
426 |         self._role = role  # e.g. "func" or "meth"
427 | 
428 |         if doc is None:
429 |             if func is None:
430 |                 raise ValueError("No function or docstring given")
431 |             doc = inspect.getdoc(func) or ''
432 |         NumpyDocString.__init__(self, doc)
433 | 
434 |         if not self['Signature'] and func is not None:
435 |             func, func_name = self.get_func()
436 |             try:
437 |                 # try to read signature
438 |                 argspec = inspect.getargspec(func)
439 |                 argspec = inspect.formatargspec(*argspec)
440 |                 argspec = argspec.replace('*', '\*')
441 |                 signature = '%s%s' % (func_name, argspec)
442 |             except TypeError as e:
443 |                 signature = '%s()' % func_name
444 |             self['Signature'] = signature
445 | 
446 |     def get_func(self):
447 |         func_name = getattr(self._f, '__name__', self.__class__.__name__)
448 |         if inspect.isclass(self._f):
449 |             func = getattr(self._f, '__call__', self._f.__init__)
450 |         else:
451 |             func = self._f
452 |         return func, func_name
453 | 
454 |     def __str__(self):
455 |         out = ''
456 | 
457 |         func, func_name = self.get_func()
458 |         signature = self['Signature'].replace('*', '\*')
459 | 
460 |         roles = {'func': 'function',
461 |                  'meth': 'method'}
462 | 
463 |         if self._role:
464 |             if not roles.has_key(self._role):
465 |                 print("Warning: invalid role %s" % self._role)
466 |             out += '.. %s:: %s\n    \n\n' % (roles.get(self._role, ''),
467 |                                              func_name)
468 | 
469 |         out += super(FunctionDoc, self).__str__(func_role=self._role)
470 |         return out
471 | 
472 | 
473 | class ClassDoc(NumpyDocString):
474 |     def __init__(self, cls, doc=None, modulename='', func_doc=FunctionDoc,
475 |                  config=None):
476 |         if not inspect.isclass(cls) and cls is not None:
477 |             raise ValueError("Expected a class or None, but got %r" % cls)
478 |         self._cls = cls
479 | 
480 |         if modulename and not modulename.endswith('.'):
481 |             modulename += '.'
482 |         self._mod = modulename
483 | 
484 |         if doc is None:
485 |             if cls is None:
486 |                 raise ValueError("No class or documentation string given")
487 |             doc = pydoc.getdoc(cls)
488 | 
489 |         NumpyDocString.__init__(self, doc)
490 | 
491 |         if config is not None and config.get('show_class_members', True):
492 |             if not self['Methods']:
493 |                 self['Methods'] = [(name, '', '')
494 |                                    for name in sorted(self.methods)]
495 |             if not self['Attributes']:
496 |                 self['Attributes'] = [(name, '', '')
497 |                                       for name in sorted(self.properties)]
498 | 
499 |     @property
500 |     def methods(self):
501 |         if self._cls is None:
502 |             return []
503 |         return [name for name, func in inspect.getmembers(self._cls)
504 |                 if not name.startswith('_') and callable(func)]
505 | 
506 |     @property
507 |     def properties(self):
508 |         if self._cls is None:
509 |             return []
510 |         return [name for name, func in inspect.getmembers(self._cls)
511 |                 if not name.startswith('_') and func is None]
512 | 


--------------------------------------------------------------------------------
/doc/sphinxext/numpy_ext/docscrape_sphinx.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import inspect
  3 | import textwrap
  4 | import pydoc
  5 | from .docscrape import NumpyDocString
  6 | from .docscrape import FunctionDoc
  7 | from .docscrape import ClassDoc
  8 | 
  9 | 
 10 | class SphinxDocString(NumpyDocString):
 11 |     def __init__(self, docstring, config=None):
 12 |         config = {} if config is None else config
 13 |         self.use_plots = config.get('use_plots', False)
 14 |         NumpyDocString.__init__(self, docstring, config=config)
 15 | 
 16 |     # string conversion routines
 17 |     def _str_header(self, name, symbol='`'):
 18 |         return ['.. rubric:: ' + name, '']
 19 | 
 20 |     def _str_field_list(self, name):
 21 |         return [':' + name + ':']
 22 | 
 23 |     def _str_indent(self, doc, indent=4):
 24 |         out = []
 25 |         for line in doc:
 26 |             out += [' ' * indent + line]
 27 |         return out
 28 | 
 29 |     def _str_signature(self):
 30 |         return ['']
 31 |         if self['Signature']:
 32 |             return ['``%s``' % self['Signature']] + ['']
 33 |         else:
 34 |             return ['']
 35 | 
 36 |     def _str_summary(self):
 37 |         return self['Summary'] + ['']
 38 | 
 39 |     def _str_extended_summary(self):
 40 |         return self['Extended Summary'] + ['']
 41 | 
 42 |     def _str_param_list(self, name):
 43 |         out = []
 44 |         if self[name]:
 45 |             out += self._str_field_list(name)
 46 |             out += ['']
 47 |             for param, param_type, desc in self[name]:
 48 |                 out += self._str_indent(['**%s** : %s' % (param.strip(),
 49 |                                                           param_type)])
 50 |                 out += ['']
 51 |                 out += self._str_indent(desc, 8)
 52 |                 out += ['']
 53 |         return out
 54 | 
 55 |     @property
 56 |     def _obj(self):
 57 |         if hasattr(self, '_cls'):
 58 |             return self._cls
 59 |         elif hasattr(self, '_f'):
 60 |             return self._f
 61 |         return None
 62 | 
 63 |     def _str_member_list(self, name):
 64 |         """
 65 |         Generate a member listing, autosummary:: table where possible,
 66 |         and a table where not.
 67 | 
 68 |         """
 69 |         out = []
 70 |         if self[name]:
 71 |             out += ['.. rubric:: %s' % name, '']
 72 |             prefix = getattr(self, '_name', '')
 73 | 
 74 |             if prefix:
 75 |                 prefix = '~%s.' % prefix
 76 | 
 77 |             autosum = []
 78 |             others = []
 79 |             for param, param_type, desc in self[name]:
 80 |                 param = param.strip()
 81 |                 if not self._obj or hasattr(self._obj, param):
 82 |                     autosum += ["   %s%s" % (prefix, param)]
 83 |                 else:
 84 |                     others.append((param, param_type, desc))
 85 | 
 86 |             if autosum:
 87 |                 # GAEL: Toctree commented out below because it creates
 88 |                 # hundreds of sphinx warnings
 89 |                 # out += ['.. autosummary::', '   :toctree:', '']
 90 |                 out += ['.. autosummary::', '']
 91 |                 out += autosum
 92 | 
 93 |             if others:
 94 |                 maxlen_0 = max([len(x[0]) for x in others])
 95 |                 maxlen_1 = max([len(x[1]) for x in others])
 96 |                 hdr = "=" * maxlen_0 + "  " + "=" * maxlen_1 + "  " + "=" * 10
 97 |                 fmt = '%%%ds  %%%ds  ' % (maxlen_0, maxlen_1)
 98 |                 n_indent = maxlen_0 + maxlen_1 + 4
 99 |                 out += [hdr]
100 |                 for param, param_type, desc in others:
101 |                     out += [fmt % (param.strip(), param_type)]
102 |                     out += self._str_indent(desc, n_indent)
103 |                 out += [hdr]
104 |             out += ['']
105 |         return out
106 | 
107 |     def _str_section(self, name):
108 |         out = []
109 |         if self[name]:
110 |             out += self._str_header(name)
111 |             out += ['']
112 |             content = textwrap.dedent("\n".join(self[name])).split("\n")
113 |             out += content
114 |             out += ['']
115 |         return out
116 | 
117 |     def _str_see_also(self, func_role):
118 |         out = []
119 |         if self['See Also']:
120 |             see_also = super(SphinxDocString, self)._str_see_also(func_role)
121 |             out = ['.. seealso::', '']
122 |             out += self._str_indent(see_also[2:])
123 |         return out
124 | 
125 |     def _str_warnings(self):
126 |         out = []
127 |         if self['Warnings']:
128 |             out = ['.. warning::', '']
129 |             out += self._str_indent(self['Warnings'])
130 |         return out
131 | 
132 |     def _str_index(self):
133 |         idx = self['index']
134 |         out = []
135 |         if len(idx) == 0:
136 |             return out
137 | 
138 |         out += ['.. index:: %s' % idx.get('default', '')]
139 |         for section, references in idx.iteritems():
140 |             if section == 'default':
141 |                 continue
142 |             elif section == 'refguide':
143 |                 out += ['   single: %s' % (', '.join(references))]
144 |             else:
145 |                 out += ['   %s: %s' % (section, ','.join(references))]
146 |         return out
147 | 
148 |     def _str_references(self):
149 |         out = []
150 |         if self['References']:
151 |             out += self._str_header('References')
152 |             if isinstance(self['References'], str):
153 |                 self['References'] = [self['References']]
154 |             out.extend(self['References'])
155 |             out += ['']
156 |             # Latex collects all references to a separate bibliography,
157 |             # so we need to insert links to it
158 |             import sphinx  # local import to avoid test dependency
159 |             if sphinx.__version__ >= "0.6":
160 |                 out += ['.. only:: latex', '']
161 |             else:
162 |                 out += ['.. latexonly::', '']
163 |             items = []
164 |             for line in self['References']:
165 |                 m = re.match(r'.. \[([a-z0-9._-]+)\]', line, re.I)
166 |                 if m:
167 |                     items.append(m.group(1))
168 |             out += ['   ' + ", ".join(["[%s]_" % item for item in items]), '']
169 |         return out
170 | 
171 |     def _str_examples(self):
172 |         examples_str = "\n".join(self['Examples'])
173 | 
174 |         if (self.use_plots and 'import matplotlib' in examples_str
175 |                 and 'plot::' not in examples_str):
176 |             out = []
177 |             out += self._str_header('Examples')
178 |             out += ['.. plot::', '']
179 |             out += self._str_indent(self['Examples'])
180 |             out += ['']
181 |             return out
182 |         else:
183 |             return self._str_section('Examples')
184 | 
185 |     def __str__(self, indent=0, func_role="obj"):
186 |         out = []
187 |         out += self._str_signature()
188 |         out += self._str_index() + ['']
189 |         out += self._str_summary()
190 |         out += self._str_extended_summary()
191 |         for param_list in ('Parameters', 'Returns', 'Raises'):
192 |             out += self._str_param_list(param_list)
193 |         out += self._str_warnings()
194 |         out += self._str_see_also(func_role)
195 |         out += self._str_section('Notes')
196 |         out += self._str_references()
197 |         out += self._str_examples()
198 |         for param_list in ('Attributes', 'Methods'):
199 |             out += self._str_member_list(param_list)
200 |         out = self._str_indent(out, indent)
201 |         return '\n'.join(out)
202 | 
203 | 
204 | class SphinxFunctionDoc(SphinxDocString, FunctionDoc):
205 |     def __init__(self, obj, doc=None, config={}):
206 |         self.use_plots = config.get('use_plots', False)
207 |         FunctionDoc.__init__(self, obj, doc=doc, config=config)
208 | 
209 | 
210 | class SphinxClassDoc(SphinxDocString, ClassDoc):
211 |     def __init__(self, obj, doc=None, func_doc=None, config={}):
212 |         self.use_plots = config.get('use_plots', False)
213 |         ClassDoc.__init__(self, obj, doc=doc, func_doc=None, config=config)
214 | 
215 | 
216 | class SphinxObjDoc(SphinxDocString):
217 |     def __init__(self, obj, doc=None, config=None):
218 |         self._f = obj
219 |         SphinxDocString.__init__(self, doc, config=config)
220 | 
221 | 
222 | def get_doc_object(obj, what=None, doc=None, config={}):
223 |     if what is None:
224 |         if inspect.isclass(obj):
225 |             what = 'class'
226 |         elif inspect.ismodule(obj):
227 |             what = 'module'
228 |         elif callable(obj):
229 |             what = 'function'
230 |         else:
231 |             what = 'object'
232 |     if what == 'class':
233 |         return SphinxClassDoc(obj, func_doc=SphinxFunctionDoc, doc=doc,
234 |                               config=config)
235 |     elif what in ('function', 'method'):
236 |         return SphinxFunctionDoc(obj, doc=doc, config=config)
237 |     else:
238 |         if doc is None:
239 |             doc = pydoc.getdoc(obj)
240 |         return SphinxObjDoc(obj, doc, config=config)
241 | 


--------------------------------------------------------------------------------
/doc/sphinxext/numpy_ext/numpydoc.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ========
  3 | numpydoc
  4 | ========
  5 | 
  6 | Sphinx extension that handles docstrings in the Numpy standard format. [1]
  7 | 
  8 | It will:
  9 | 
 10 | - Convert Parameters etc. sections to field lists.
 11 | - Convert See Also section to a See also entry.
 12 | - Renumber references.
 13 | - Extract the signature from the docstring, if it can't be determined
 14 |   otherwise.
 15 | 
 16 | .. [1] http://projects.scipy.org/numpy/wiki/CodingStyleGuidelines#docstring-standard
 17 | 
 18 | """
 19 | 
 20 | from __future__ import unicode_literals
 21 | 
 22 | import sys # Only needed to check Python version
 23 | import os
 24 | import re
 25 | import pydoc
 26 | from .docscrape_sphinx import get_doc_object
 27 | from .docscrape_sphinx import SphinxDocString
 28 | import inspect
 29 | 
 30 | 
 31 | def mangle_docstrings(app, what, name, obj, options, lines,
 32 |                       reference_offset=[0]):
 33 | 
 34 |     cfg = dict(use_plots=app.config.numpydoc_use_plots,
 35 |                show_class_members=app.config.numpydoc_show_class_members)
 36 | 
 37 |     if what == 'module':
 38 |         # Strip top title
 39 |         title_re = re.compile(r'^\s*[#*=]{4,}\n[a-z0-9 -]+\n[#*=]{4,}\s*',
 40 |                               re.I | re.S)
 41 |         lines[:] = title_re.sub('', "\n".join(lines)).split("\n")
 42 |     else:
 43 |         doc = get_doc_object(obj, what, "\n".join(lines), config=cfg)
 44 |         if sys.version_info[0] < 3:
 45 |             lines[:] = unicode(doc).splitlines()
 46 |         else:
 47 |             lines[:] = str(doc).splitlines()
 48 | 
 49 |     if app.config.numpydoc_edit_link and hasattr(obj, '__name__') and \
 50 |            obj.__name__:
 51 |         if hasattr(obj, '__module__'):
 52 |             v = dict(full_name="%s.%s" % (obj.__module__, obj.__name__))
 53 |         else:
 54 |             v = dict(full_name=obj.__name__)
 55 |         lines += [u'', u'.. htmlonly::', '']
 56 |         lines += [u'    %s' % x for x in
 57 |                   (app.config.numpydoc_edit_link % v).split("\n")]
 58 | 
 59 |     # replace reference numbers so that there are no duplicates
 60 |     references = []
 61 |     for line in lines:
 62 |         line = line.strip()
 63 |         m = re.match(r'^.. \[([a-z0-9_.-])\]', line, re.I)
 64 |         if m:
 65 |             references.append(m.group(1))
 66 | 
 67 |     # start renaming from the longest string, to avoid overwriting parts
 68 |     references.sort(key=lambda x: -len(x))
 69 |     if references:
 70 |         for i, line in enumerate(lines):
 71 |             for r in references:
 72 |                 if re.match(r'^\d+$', r):
 73 |                     new_r = "R%d" % (reference_offset[0] + int(r))
 74 |                 else:
 75 |                     new_r = u"%s%d" % (r, reference_offset[0])
 76 |                 lines[i] = lines[i].replace(u'[%s]_' % r,
 77 |                                             u'[%s]_' % new_r)
 78 |                 lines[i] = lines[i].replace(u'.. [%s]' % r,
 79 |                                             u'.. [%s]' % new_r)
 80 | 
 81 |     reference_offset[0] += len(references)
 82 | 
 83 | 
 84 | def mangle_signature(app, what, name, obj,
 85 |                      options, sig, retann):
 86 |     # Do not try to inspect classes that don't define `__init__`
 87 |     if (inspect.isclass(obj) and
 88 |         (not hasattr(obj, '__init__') or
 89 |         'initializes x; see ' in pydoc.getdoc(obj.__init__))):
 90 |         return '', ''
 91 | 
 92 |     if not (callable(obj) or hasattr(obj, '__argspec_is_invalid_')):
 93 |         return
 94 |     if not hasattr(obj, '__doc__'):
 95 |         return
 96 | 
 97 |     doc = SphinxDocString(pydoc.getdoc(obj))
 98 |     if doc['Signature']:
 99 |         sig = re.sub("^[^(]*", "", doc['Signature'])
100 |         return sig, ''
101 | 
102 | 
103 | def setup(app, get_doc_object_=get_doc_object):
104 |     global get_doc_object
105 |     get_doc_object = get_doc_object_
106 | 
107 |     if sys.version_info[0] < 3:
108 |         app.connect(b'autodoc-process-docstring', mangle_docstrings)
109 |         app.connect(b'autodoc-process-signature', mangle_signature)
110 |     else:
111 |         app.connect('autodoc-process-docstring', mangle_docstrings)
112 |         app.connect('autodoc-process-signature', mangle_signature)
113 |     app.add_config_value('numpydoc_edit_link', None, False)
114 |     app.add_config_value('numpydoc_use_plots', None, False)
115 |     app.add_config_value('numpydoc_show_class_members', True, True)
116 | 
117 |     # Extra mangling domains
118 |     app.add_domain(NumpyPythonDomain)
119 |     app.add_domain(NumpyCDomain)
120 | 
121 | #-----------------------------------------------------------------------------
122 | # Docstring-mangling domains
123 | #-----------------------------------------------------------------------------
124 | 
125 | try:
126 |     import sphinx  # lazy to avoid test dependency
127 | except ImportError:
128 |     CDomain = PythonDomain = object
129 | else:
130 |     from sphinx.domains.c import CDomain
131 |     from sphinx.domains.python import PythonDomain
132 | 
133 | 
134 | class ManglingDomainBase(object):
135 |     directive_mangling_map = {}
136 | 
137 |     def __init__(self, *a, **kw):
138 |         super(ManglingDomainBase, self).__init__(*a, **kw)
139 |         self.wrap_mangling_directives()
140 | 
141 |     def wrap_mangling_directives(self):
142 |         for name, objtype in self.directive_mangling_map.items():
143 |             self.directives[name] = wrap_mangling_directive(
144 |                 self.directives[name], objtype)
145 | 
146 | 
147 | class NumpyPythonDomain(ManglingDomainBase, PythonDomain):
148 |     name = 'np'
149 |     directive_mangling_map = {
150 |         'function': 'function',
151 |         'class': 'class',
152 |         'exception': 'class',
153 |         'method': 'function',
154 |         'classmethod': 'function',
155 |         'staticmethod': 'function',
156 |         'attribute': 'attribute',
157 |     }
158 | 
159 | 
160 | class NumpyCDomain(ManglingDomainBase, CDomain):
161 |     name = 'np-c'
162 |     directive_mangling_map = {
163 |         'function': 'function',
164 |         'member': 'attribute',
165 |         'macro': 'function',
166 |         'type': 'class',
167 |         'var': 'object',
168 |     }
169 | 
170 | 
171 | def wrap_mangling_directive(base_directive, objtype):
172 |     class directive(base_directive):
173 |         def run(self):
174 |             env = self.state.document.settings.env
175 | 
176 |             name = None
177 |             if self.arguments:
178 |                 m = re.match(r'^(.*\s+)?(.*?)(\(.*)?', self.arguments[0])
179 |                 name = m.group(2).strip()
180 | 
181 |             if not name:
182 |                 name = self.arguments[0]
183 | 
184 |             lines = list(self.content)
185 |             mangle_docstrings(env.app, objtype, name, None, None, lines)
186 |             # local import to avoid testing dependency
187 |             from docutils.statemachine import ViewList
188 |             self.content = ViewList(lines, self.content.parent)
189 | 
190 |             return base_directive.run(self)
191 | 
192 |     return directive
193 | 


--------------------------------------------------------------------------------
/examples/README.txt:
--------------------------------------------------------------------------------
1 | .. _general_examples:
2 | 
3 | 
4 | General-purpose and introductory examples.
5 | 


--------------------------------------------------------------------------------
/examples/plot_randomized_output_decision_tree.py:
--------------------------------------------------------------------------------
  1 | """
  2 | =========================================
  3 | Growing tree on a randomized output space
  4 | =========================================
  5 | 
  6 | The bottleneck of random forest on multi-label and multi-output regression
  7 | tasks with many outputs is the computation of the impurity measure at
  8 | each tree node for each possible split.
  9 | 
 10 | Growing a tree on lower dimensional random output subspace allow to decrease
 11 | computing time while having the same or improved performance with a sufficient
 12 | number of projections.
 13 | 
 14 | """
 15 | from __future__ import division
 16 | from time import time
 17 | 
 18 | import numpy as np
 19 | import matplotlib.pyplot as plt
 20 | 
 21 | from sklearn.base import clone
 22 | from sklearn.cross_validation import train_test_split
 23 | from sklearn.random_projection import SparseRandomProjection
 24 | from sklearn.metrics import label_ranking_average_precision_score as lrap_score
 25 | 
 26 | from random_output_trees.datasets import fetch_drug_interaction
 27 | from random_output_trees.ensemble import RandomForestClassifier
 28 | 
 29 | random_state = np.random.RandomState(0)
 30 | 
 31 | # Let's load a multilabel dataset
 32 | dataset = fetch_drug_interaction()
 33 | X = dataset.data
 34 | y = dataset.target  # y.shape = (1862, 1554)
 35 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5,
 36 |                                                     random_state=0)
 37 | n_outputs = y.shape[1]
 38 | 
 39 | 
 40 | def benchmark(base_estimator, random_state=None, n_iter=3):
 41 |     scores = []
 42 |     times = []
 43 |     for iter_ in range(n_iter):
 44 |         estimator = clone(base_estimator)
 45 |         estimator.set_params(random_state=random_state)
 46 | 
 47 |         time_start = time()
 48 |         estimator.fit(X_train, y_train)
 49 |         times.append(time() - time_start)
 50 | 
 51 |         y_proba_pred = estimator.predict_proba(X_test)
 52 |         y_scores = 1 - np.vstack([p[:, 0] for p in y_proba_pred]).T
 53 |         scores.append(lrap_score(y_test, y_scores))
 54 | 
 55 |     return scores, times
 56 | 
 57 | 
 58 | # NB: Increase the number of estimators to improve performance
 59 | n_estimators = 20
 60 | 
 61 | # Let's learn a random forest model
 62 | rf = RandomForestClassifier(n_estimators=n_estimators,
 63 |                             random_state=0)
 64 | rf_score, rf_times = benchmark(rf, random_state)
 65 | 
 66 | rf_score_mean = np.mean(rf_score)
 67 | rf_score_std = np.std(rf_score)
 68 | 
 69 | rf_times_mean = np.mean(rf_times)
 70 | rf_times_std = np.std(rf_times)
 71 | 
 72 | # Let's learn random forest on a Gaussian subspace
 73 | all_n_components = np.ceil(np.array([1, 5, 10, 50, 100]))
 74 | all_n_components = all_n_components.astype(int)
 75 | scores_mean = []
 76 | scores_std = []
 77 | times_mean = []
 78 | times_std = []
 79 | 
 80 | for i, n_components in enumerate(all_n_components):
 81 |     # First instatiate a transformer to modify the output space
 82 |     output_transformer = SparseRandomProjection(n_components=n_components,
 83 |                                                   random_state=0)
 84 | 
 85 |     # To fix the random output space for each estimator
 86 |     # Uncomment the following lines
 87 |     # from random_output_trees.transformer import FixedStateTransformer
 88 |     # output_transformer = FixedStateTransformer(output_transformer,
 89 |     #                                            random_seed=0)
 90 | 
 91 |     # Let's learn random forest on randomized subspace
 92 |     gaussian_rf = RandomForestClassifier(n_estimators=n_estimators,
 93 |                                          output_transformer=output_transformer,
 94 |                                          random_state=0)
 95 | 
 96 |     scores, times = benchmark(gaussian_rf, random_state)
 97 |     scores_mean.append(np.mean(scores))
 98 |     scores_std.append(np.std(scores))
 99 |     times_mean.append(np.mean(times))
100 |     times_std.append(np.std(times))
101 | 
102 | scores_mean = np.array(scores_mean)
103 | scores_std = np.array(scores_std)
104 | times_mean = np.array(times_mean)
105 | times_std = np.array(times_std)
106 | 
107 | # Let's plot the outcome of the experiments
108 | fraction_outputs = all_n_components / n_outputs
109 | 
110 | plt.figure()
111 | plt.plot(fraction_outputs, rf_score_mean * np.ones_like(fraction_outputs),
112 |          "-o", color='r', label="Original output space")
113 | plt.fill_between(fraction_outputs,
114 |                  rf_score_mean - rf_score_std,
115 |                  rf_score_mean + rf_score_std, alpha=0.25, color="r")
116 | plt.plot(fraction_outputs, scores_mean, "-o", color='g',
117 |          label="Sparse rademacher output subspace")
118 | plt.fill_between(fraction_outputs,
119 |                  scores_mean - scores_std,
120 |                  scores_mean + scores_std, alpha=0.25, color="g")
121 | plt.legend(loc="best")
122 | plt.xlabel("n_components / n_outputs")
123 | plt.ylabel("Label ranking average precision")
124 | plt.show()
125 | 
126 | 
127 | plt.figure()
128 | plt.plot(fraction_outputs, rf_times_mean * np.ones_like(fraction_outputs),
129 |          "-o", color='r', label="Original output space")
130 | plt.fill_between(fraction_outputs,
131 |                  rf_times_mean - rf_times_std,
132 |                  rf_times_mean + rf_times_std, alpha=0.25, color="r")
133 | plt.plot(fraction_outputs, times_mean, "-o", color='g',
134 |          label="Sparse rademacher output subspace")
135 | plt.fill_between(fraction_outputs,
136 |                  times_mean - times_std,
137 |                  times_mean + times_std, alpha=0.25, color="g")
138 | plt.legend(loc="best")
139 | plt.ylim((0., max(np.max(times_mean + times_std),
140 |                   rf_times_mean + rf_times_std) * 1.1))
141 | plt.xlabel("n_components / n_outputs")
142 | plt.ylabel("Time [s]")
143 | plt.show()
144 | 


--------------------------------------------------------------------------------
/examples/plot_variance_preservation.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ==============================================
 3 | Variance is preserved under random projections
 4 | ==============================================
 5 | 
 6 | If a random projection matrix satisfies the Johson-Lindenstrauss, then
 7 | the variance computed on the randomly projected output space is equal to
 8 | the variance on the original output space up to an epsilon factor.
 9 | 
10 | This is an illustration of Theorem 1 from the paper
11 | 
12 |     Joly, A., Geurts, P., & Wehenkel, L. (2014). Random forests with
13 |     random projections of the output space for high dimensional multi-label
14 |     classification. ECML-PKDD 2014, Nancy, France
15 | 
16 | """
17 | from __future__ import division
18 | import numpy as np
19 | 
20 | import matplotlib.pyplot as plt
21 | from sklearn.random_projection import GaussianRandomProjection
22 | 
23 | random_state = np.random.RandomState(0)
24 | 
25 | # Let's first generate a set of samples
26 | n_samples = 2000
27 | n_outputs = 500
28 | X = 3 + 5 * random_state.normal(size=(n_samples, n_outputs))
29 | 
30 | # Let's compute the sum of the variance in the orignal output space
31 | var_origin = np.var(X, axis=0).sum()
32 | 
33 | # Let's compute the variance on a random subspace
34 | all_n_components = np.array([1, 50, 100, 200, 400, 500])
35 | n_repetitions = 10
36 | distortion = np.empty((len(all_n_components), n_repetitions))
37 | 
38 | for i, n_components in enumerate(all_n_components):
39 |     for j in range(n_repetitions):
40 |         transformer = GaussianRandomProjection(n_components=n_components,
41 |                                                random_state=random_state)
42 |         X_subspace = transformer.fit_transform(X)
43 |         distortion[i, j] = np.var(X_subspace, axis=0).sum() / var_origin
44 | 
45 | # Let's plot the distortion as a function of the compression ratio
46 | distortion_mean = distortion.mean(axis=1)
47 | distortion_std = distortion.std(axis=1)
48 | 
49 | plt.figure()
50 | plt.plot(all_n_components / n_outputs, distortion_mean, "o-", color="g")
51 | plt.plot(all_n_components / n_outputs, np.ones_like(distortion_mean),
52 |          "--", color="r")
53 | plt.fill_between(all_n_components / n_outputs,
54 |                  distortion_mean - distortion_std,
55 |                  distortion_mean + distortion_std, alpha=0.25, color="g")
56 | plt.xlabel("n_components / n_outputs")
57 | plt.ylabel('Distortion of the variance on a Gaussian subspace')
58 | plt.show()
59 | 


--------------------------------------------------------------------------------
/random_output_trees/__init__.py:
--------------------------------------------------------------------------------
1 | # Author : Arnaud Joly
2 | #
3 | # License: BSD 3 clause
4 | 
5 | __version__ = "dev"
6 | 
7 | 


--------------------------------------------------------------------------------
/random_output_trees/_sklearn_tree.pxd:
--------------------------------------------------------------------------------
  1 | # Authors: Gilles Louppe <g.louppe@gmail.com>
  2 | #          Peter Prettenhofer <peter.prettenhofer@gmail.com>
  3 | #          Brian Holt <bdholt1@gmail.com>
  4 | #          Joel Nothman <joel.nothman@gmail.com>
  5 | #          Arnaud Joly <arnaud.v.joly@gmail.com>
  6 | #
  7 | # Licence: BSD 3 clause
  8 | 
  9 | # See _tree.pyx for details.
 10 | 
 11 | import numpy as np
 12 | cimport numpy as np
 13 | 
 14 | ctypedef np.npy_float32 DTYPE_t          # Type of X
 15 | ctypedef np.npy_float64 DOUBLE_t         # Type of y, sample_weight
 16 | ctypedef np.npy_intp SIZE_t              # Type for indices and counters
 17 | ctypedef np.npy_int32 INT32_t            # Signed 32 bit integer
 18 | ctypedef np.npy_uint32 UINT32_t          # Unsigned 32 bit integer
 19 | 
 20 | 
 21 | # =============================================================================
 22 | # Criterion
 23 | # =============================================================================
 24 | 
 25 | cdef class Criterion:
 26 |     # The criterion computes the impurity of a node and the reduction of
 27 |     # impurity of a split on that node. It also computes the output statistics
 28 |     # such as the mean in regression and class probabilities in classification.
 29 | 
 30 |     # Internal structures
 31 |     cdef DOUBLE_t* y                     # Values of y
 32 |     cdef SIZE_t y_stride                 # Stride in y (since n_outputs >= 1)
 33 |     cdef DOUBLE_t* sample_weight         # Sample weights
 34 | 
 35 |     cdef SIZE_t* samples                 # Sample indices in X, y
 36 |     cdef SIZE_t start                    # samples[start:pos] are the samples in the left node
 37 |     cdef SIZE_t pos                      # samples[pos:end] are the samples in the right node
 38 |     cdef SIZE_t end
 39 | 
 40 |     cdef SIZE_t n_outputs                # Number of outputs
 41 |     cdef SIZE_t n_node_samples           # Number of samples in the node (end-start)
 42 |     cdef double weighted_n_samples       # Weighted number of samples (in total)
 43 |     cdef double weighted_n_node_samples  # Weighted number of samples in the node
 44 |     cdef double weighted_n_left          # Weighted number of samples in the left node
 45 |     cdef double weighted_n_right         # Weighted number of samples in the right node
 46 | 
 47 |     # The criterion object is maintained such that left and right collected
 48 |     # statistics correspond to samples[start:pos] and samples[pos:end].
 49 | 
 50 |     # Methods
 51 |     cdef void init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight,
 52 |                    double weighted_n_samples, SIZE_t* samples, SIZE_t start,
 53 |                    SIZE_t end) nogil
 54 |     cdef void reset(self) nogil
 55 |     cdef void update(self, SIZE_t new_pos) nogil
 56 |     cdef double node_impurity(self) nogil
 57 |     cdef void children_impurity(self, double* impurity_left,
 58 |                                 double* impurity_right) nogil
 59 |     cdef void node_value(self, double* dest) nogil
 60 |     cdef double impurity_improvement(self, double impurity) nogil
 61 | 
 62 | 
 63 | # =============================================================================
 64 | # Splitter
 65 | # =============================================================================
 66 | 
 67 | cdef struct SplitRecord:
 68 |     # Data to track sample split
 69 |     SIZE_t feature         # Which feature to split on.
 70 |     SIZE_t pos             # Split samples array at the given position,
 71 |                            # i.e. count of samples below threshold for feature.
 72 |                            # pos is >= end if the node is a leaf.
 73 |     double threshold       # Threshold to split at.
 74 |     double improvement     # Impurity improvement given parent node.
 75 |     double impurity_left   # Impurity of the left split.
 76 |     double impurity_right  # Impurity of the right split.
 77 | 
 78 | 
 79 | cdef class Splitter:
 80 |     # The splitter searches in the input space for a feature and a threshold
 81 |     # to split the samples samples[start:end].
 82 |     #
 83 |     # The impurity computations are delegated to a criterion object.
 84 | 
 85 |     # Internal structures
 86 |     cdef public Criterion criterion      # Impurity criterion
 87 |     cdef public SIZE_t max_features      # Number of features to test
 88 |     cdef public SIZE_t min_samples_leaf  # Min samples in a leaf
 89 |     cdef public double min_weight_leaf   # Minimum weight in a leaf
 90 | 
 91 |     cdef object random_state             # Random state
 92 |     cdef UINT32_t rand_r_state           # sklearn_rand_r random number state
 93 | 
 94 |     cdef SIZE_t* samples                 # Sample indices in X, y
 95 |     cdef SIZE_t n_samples                # X.shape[0]
 96 |     cdef double weighted_n_samples       # Weighted number of samples
 97 |     cdef SIZE_t* features                # Feature indices in X
 98 |     cdef SIZE_t* constant_features       # Constant features indices
 99 |     cdef SIZE_t n_features               # X.shape[1]
100 |     cdef DTYPE_t* feature_values         # temp. array holding feature values
101 |     cdef SIZE_t start                    # Start position for the current node
102 |     cdef SIZE_t end                      # End position for the current node
103 | 
104 |     cdef DTYPE_t* X
105 |     cdef SIZE_t X_sample_stride
106 |     cdef SIZE_t X_fx_stride
107 |     cdef DOUBLE_t* y
108 |     cdef SIZE_t y_stride
109 |     cdef DOUBLE_t* sample_weight
110 | 
111 |     # The samples vector `samples` is maintained by the Splitter object such
112 |     # that the samples contained in a node are contiguous. With this setting,
113 |     # `node_split` reorganizes the node samples `samples[start:end]` in two
114 |     # subsets `samples[start:pos]` and `samples[pos:end]`.
115 | 
116 |     # The 1-d  `features` array of size n_features contains the features
117 |     # indices and allows fast sampling without replacement of features.
118 | 
119 |     # The 1-d `constant_features` array of size n_features holds in
120 |     # `constant_features[:n_constant_features]` the feature ids with
121 |     # constant values for all the samples that reached a specific node.
122 |     # The value `n_constant_features` is given by the the parent node to its
123 |     # child nodes.  The content of the range `[n_constant_features:]` is left
124 |     # undefined, but preallocated for performance reasons
125 |     # This allows optimization with depth-based tree building.
126 | 
127 |     # Methods
128 |     cdef void init(self, np.ndarray X, np.ndarray y, DOUBLE_t* sample_weight)
129 | 
130 |     cdef void node_reset(self, SIZE_t start, SIZE_t end,
131 |                          double* weighted_n_node_samples) nogil
132 | 
133 |     cdef void node_split(self,
134 |                          double impurity,   # Impurity of the node
135 |                          SplitRecord* split,
136 |                          SIZE_t* n_constant_features) nogil
137 | 
138 |     cdef void node_value(self, double* dest) nogil
139 | 
140 |     cdef double node_impurity(self) nogil
141 | 
142 | 
143 | # =============================================================================
144 | # Tree
145 | # =============================================================================
146 | 
147 | cdef struct Node:
148 |     # Base storage structure for the nodes in a Tree object
149 | 
150 |     SIZE_t left_child                    # id of the left child of the node
151 |     SIZE_t right_child                   # id of the right child of the node
152 |     SIZE_t feature                       # Feature used for splitting the node
153 |     DOUBLE_t threshold                   # Threshold value at the node
154 |     DOUBLE_t impurity                    # Impurity of the node (i.e., the value of the criterion)
155 |     SIZE_t n_node_samples                # Number of samples at the node
156 |     DOUBLE_t weighted_n_node_samples     # Weighted number of samples at the node
157 | 
158 | cdef class Tree:
159 |     # The Tree object is a binary tree structure constructed by the
160 |     # TreeBuilder. The tree structure is used for predictions and
161 |     # feature importances.
162 | 
163 |     # Input/Output layout
164 |     cdef public SIZE_t n_features        # Number of features in X
165 |     cdef SIZE_t* n_classes               # Number of classes in y[:, k]
166 |     cdef public SIZE_t n_outputs         # Number of outputs in y
167 |     cdef public SIZE_t max_n_classes     # max(n_classes)
168 | 
169 |     # Inner structures: values are stored separately from node structure,
170 |     # since size is determined at runtime.
171 |     cdef public SIZE_t max_depth         # Max depth of the tree
172 |     cdef public SIZE_t node_count        # Counter for node IDs
173 |     cdef public SIZE_t capacity          # Capacity of tree, in terms of nodes
174 |     cdef Node* nodes                     # Array of nodes
175 |     cdef double* value                   # (capacity, n_outputs, max_n_classes) array of values
176 |     cdef SIZE_t value_stride             # = n_outputs * max_n_classes
177 | 
178 |     # Methods
179 |     cdef SIZE_t _add_node(self, SIZE_t parent, bint is_left, bint is_leaf,
180 |                           SIZE_t feature, double threshold, double impurity,
181 |                           SIZE_t n_node_samples,
182 |                           double weighted_n_samples) nogil
183 |     cdef void _resize(self, SIZE_t capacity)
184 |     cdef int _resize_c(self, SIZE_t capacity=*) nogil
185 | 
186 |     cdef np.ndarray _get_value_ndarray(self)
187 |     cdef np.ndarray _get_node_ndarray(self)
188 | 
189 |     cpdef np.ndarray predict(self, np.ndarray[DTYPE_t, ndim=2] X)
190 |     cpdef np.ndarray apply(self, np.ndarray[DTYPE_t, ndim=2] X)
191 |     cpdef compute_feature_importances(self, normalize=*)
192 | 
193 | 
194 | # =============================================================================
195 | # Tree builder
196 | # =============================================================================
197 | 
198 | cdef class TreeBuilder:
199 |     # The TreeBuilder recursively builds a Tree object from training samples,
200 |     # using a Splitter object for splitting internal nodes and assigning
201 |     # values to leaves.
202 |     #
203 |     # This class controls the various stopping criteria and the node splitting
204 |     # evaluation order, e.g. depth-first or best-first.
205 | 
206 |     cdef Splitter splitter          # Splitting algorithm
207 | 
208 |     cdef SIZE_t min_samples_split   # Minimum number of samples in an internal node
209 |     cdef SIZE_t min_samples_leaf    # Minimum number of samples in a leaf
210 |     cdef double min_weight_leaf     # Minimum weight in a leaf
211 |     cdef SIZE_t max_depth           # Maximal tree depth
212 | 
213 |     cpdef build(self, Tree tree, np.ndarray X, np.ndarray y,
214 |                 np.ndarray sample_weight=*)
215 | 


--------------------------------------------------------------------------------
/random_output_trees/_sklearn_tree_utils.pxd:
--------------------------------------------------------------------------------
 1 | # Authors: Gilles Louppe <g.louppe@gmail.com>
 2 | #          Peter Prettenhofer <peter.prettenhofer@gmail.com>
 3 | #          Arnaud Joly <arnaud.v.joly@gmail.com>
 4 | #
 5 | # Licence: BSD 3 clause
 6 | 
 7 | # See _utils.pyx for details.
 8 | 
 9 | import numpy as np
10 | cimport numpy as np
11 | 
12 | ctypedef np.npy_intp SIZE_t              # Type for indices and counters
13 | 
14 | 
15 | # =============================================================================
16 | # Stack data structure
17 | # =============================================================================
18 | 
19 | # A record on the stack for depth-first tree growing
20 | cdef struct StackRecord:
21 |     SIZE_t start
22 |     SIZE_t end
23 |     SIZE_t depth
24 |     SIZE_t parent
25 |     bint is_left
26 |     double impurity
27 |     SIZE_t n_constant_features
28 | 
29 | cdef class Stack:
30 |     cdef SIZE_t capacity
31 |     cdef SIZE_t top
32 |     cdef StackRecord* stack_
33 | 
34 |     cdef bint is_empty(self) nogil
35 |     cdef int push(self, SIZE_t start, SIZE_t end, SIZE_t depth, SIZE_t parent,
36 |                   bint is_left, double impurity,
37 |                   SIZE_t n_constant_features) nogil
38 |     cdef int pop(self, StackRecord* res) nogil
39 | 
40 | 
41 | # =============================================================================
42 | # PriorityHeap data structure
43 | # =============================================================================
44 | 
45 | # A record on the frontier for best-first tree growing
46 | cdef struct PriorityHeapRecord:
47 |     SIZE_t node_id
48 |     SIZE_t start
49 |     SIZE_t end
50 |     SIZE_t pos
51 |     SIZE_t depth
52 |     bint is_leaf
53 |     double impurity
54 |     double impurity_left
55 |     double impurity_right
56 |     double improvement
57 | 
58 | cdef class PriorityHeap:
59 |     cdef SIZE_t capacity
60 |     cdef SIZE_t heap_ptr
61 |     cdef PriorityHeapRecord* heap_
62 | 
63 |     cdef bint is_empty(self) nogil
64 |     cdef int push(self, SIZE_t node_id, SIZE_t start, SIZE_t end, SIZE_t pos,
65 |                   SIZE_t depth, bint is_leaf, double improvement,
66 |                   double impurity, double impurity_left,
67 |                   double impurity_right) nogil
68 |     cdef int pop(self, PriorityHeapRecord* res) nogil
69 | 


--------------------------------------------------------------------------------
/random_output_trees/_sklearn_tree_utils.pyx:
--------------------------------------------------------------------------------
  1 | # cython: cdivision=True
  2 | # cython: boundscheck=False
  3 | # cython: wraparound=False
  4 | 
  5 | # Authors: Gilles Louppe <g.louppe@gmail.com>
  6 | #          Peter Prettenhofer <peter.prettenhofer@gmail.com>
  7 | #          Arnaud Joly <arnaud.v.joly@gmail.com>
  8 | #
  9 | # Licence: BSD 3 clause
 10 | 
 11 | from libc.stdlib cimport free, malloc, realloc
 12 | 
 13 | 
 14 | # This file is taken from scikit-learn to allow easy installation
 15 | # and not to rely on a specific version of scikit-learn decision tree
 16 | # implementation
 17 | 
 18 | # =============================================================================
 19 | # Stack data structure
 20 | # =============================================================================
 21 | 
 22 | cdef class Stack:
 23 |     """A LIFO data structure.
 24 | 
 25 |     Attributes
 26 |     ----------
 27 |     capacity : SIZE_t
 28 |         The elements the stack can hold; if more added then ``self.stack_``
 29 |         needs to be resized.
 30 | 
 31 |     top : SIZE_t
 32 |         The number of elements currently on the stack.
 33 | 
 34 |     stack : StackRecord pointer
 35 |         The stack of records (upward in the stack corresponds to the right).
 36 |     """
 37 | 
 38 |     def __cinit__(self, SIZE_t capacity):
 39 |         self.capacity = capacity
 40 |         self.top = 0
 41 |         self.stack_ = <StackRecord*> malloc(capacity * sizeof(StackRecord))
 42 |         if self.stack_ == NULL:
 43 |             raise MemoryError()
 44 | 
 45 |     def __dealloc__(self):
 46 |         free(self.stack_)
 47 | 
 48 |     cdef bint is_empty(self) nogil:
 49 |         return self.top <= 0
 50 | 
 51 |     cdef int push(self, SIZE_t start, SIZE_t end, SIZE_t depth, SIZE_t parent,
 52 |                   bint is_left, double impurity,
 53 |                   SIZE_t n_constant_features) nogil:
 54 |         """Push a new element onto the stack.
 55 | 
 56 |         Returns 0 if successful; -1 on out of memory error.
 57 |         """
 58 |         cdef SIZE_t top = self.top
 59 |         cdef StackRecord* stack = NULL
 60 | 
 61 |         # Resize if capacity not sufficient
 62 |         if top >= self.capacity:
 63 |             self.capacity *= 2
 64 |             stack = <StackRecord*> realloc(self.stack_,
 65 |                                            self.capacity * sizeof(StackRecord))
 66 |             if stack == NULL:
 67 |                 # no free; __dealloc__ handles that
 68 |                 return -1
 69 |             self.stack_ = stack
 70 | 
 71 |         stack = self.stack_
 72 |         stack[top].start = start
 73 |         stack[top].end = end
 74 |         stack[top].depth = depth
 75 |         stack[top].parent = parent
 76 |         stack[top].is_left = is_left
 77 |         stack[top].impurity = impurity
 78 |         stack[top].n_constant_features = n_constant_features
 79 | 
 80 |         # Increment stack pointer
 81 |         self.top = top + 1
 82 |         return 0
 83 | 
 84 |     cdef int pop(self, StackRecord* res) nogil:
 85 |         """Remove the top element from the stack and copy to ``res``.
 86 | 
 87 |         Returns 0 if pop was successful (and ``res`` is set); -1
 88 |         otherwise.
 89 |         """
 90 |         cdef SIZE_t top = self.top
 91 |         cdef StackRecord* stack = self.stack_
 92 | 
 93 |         if top <= 0:
 94 |             return -1
 95 | 
 96 |         res[0] = stack[top - 1]
 97 |         self.top = top - 1
 98 | 
 99 |         return 0
100 | 
101 | 
102 | # =============================================================================
103 | # PriorityHeap data structure
104 | # =============================================================================
105 | 
106 | cdef void heapify_up(PriorityHeapRecord* heap, SIZE_t pos) nogil:
107 |     """Restore heap invariant parent.improvement > child.improvement from
108 |        ``pos`` upwards. """
109 |     if pos == 0:
110 |         return
111 | 
112 |     cdef SIZE_t parent_pos = (pos - 1) / 2
113 | 
114 |     if heap[parent_pos].improvement < heap[pos].improvement:
115 |         heap[parent_pos], heap[pos] = heap[pos], heap[parent_pos]
116 |         heapify_up(heap, parent_pos)
117 | 
118 | 
119 | cdef void heapify_down(PriorityHeapRecord* heap, SIZE_t pos,
120 |                        SIZE_t heap_length) nogil:
121 |     """Restore heap invariant parent.improvement > children.improvement from
122 |        ``pos`` downwards. """
123 |     cdef SIZE_t left_pos = 2 * (pos + 1) - 1
124 |     cdef SIZE_t right_pos = 2 * (pos + 1)
125 |     cdef SIZE_t largest = pos
126 | 
127 |     if (left_pos < heap_length and
128 |             heap[left_pos].improvement > heap[largest].improvement):
129 |         largest = left_pos
130 | 
131 |     if (right_pos < heap_length and
132 |             heap[right_pos].improvement > heap[largest].improvement):
133 |         largest = right_pos
134 | 
135 |     if largest != pos:
136 |         heap[pos], heap[largest] = heap[largest], heap[pos]
137 |         heapify_down(heap, largest, heap_length)
138 | 
139 | 
140 | cdef class PriorityHeap:
141 |     """A priority queue implemented as a binary heap.
142 | 
143 |     The heap invariant is that the impurity improvement of the parent record
144 |     is larger then the impurity improvement of the children.
145 | 
146 |     Attributes
147 |     ----------
148 |     capacity : SIZE_t
149 |         The capacity of the heap
150 | 
151 |     heap_ptr : SIZE_t
152 |         The water mark of the heap; the heap grows from left to right in the
153 |         array ``heap_``. The following invariant holds ``heap_ptr < capacity``.
154 | 
155 |     heap_ : PriorityHeapRecord*
156 |         The array of heap records. The maximum element is on the left;
157 |         the heap grows from left to right
158 |     """
159 | 
160 |     def __cinit__(self, SIZE_t capacity):
161 |         self.capacity = capacity
162 |         self.heap_ptr = 0
163 |         self.heap_ = <PriorityHeapRecord*> malloc(capacity * sizeof(PriorityHeapRecord))
164 |         if self.heap_ == NULL:
165 |             raise MemoryError()
166 | 
167 |     def __dealloc__(self):
168 |         free(self.heap_)
169 | 
170 |     cdef bint is_empty(self) nogil:
171 |         return self.heap_ptr <= 0
172 | 
173 |     cdef int push(self, SIZE_t node_id, SIZE_t start, SIZE_t end, SIZE_t pos,
174 |                   SIZE_t depth, bint is_leaf, double improvement,
175 |                   double impurity, double impurity_left,
176 |                   double impurity_right) nogil:
177 |         """Push record on the priority heap.
178 | 
179 |         Returns 0 if successful; -1 on out of memory error.
180 |         """
181 |         cdef SIZE_t heap_ptr = self.heap_ptr
182 |         cdef PriorityHeapRecord* heap = NULL
183 | 
184 |         # Resize if capacity not sufficient
185 |         if heap_ptr >= self.capacity:
186 |             self.capacity *= 2
187 |             heap = <PriorityHeapRecord*> realloc(self.heap_,
188 |                                                  self.capacity *
189 |                                                  sizeof(PriorityHeapRecord))
190 |             if heap == NULL:
191 |                 # no free; __dealloc__ handles that
192 |                 return -1
193 |             self.heap_ = heap
194 | 
195 |         # Put element as last element of heap
196 |         heap = self.heap_
197 |         heap[heap_ptr].node_id = node_id
198 |         heap[heap_ptr].start = start
199 |         heap[heap_ptr].end = end
200 |         heap[heap_ptr].pos = pos
201 |         heap[heap_ptr].depth = depth
202 |         heap[heap_ptr].is_leaf = is_leaf
203 |         heap[heap_ptr].impurity = impurity
204 |         heap[heap_ptr].impurity_left = impurity_left
205 |         heap[heap_ptr].impurity_right = impurity_right
206 |         heap[heap_ptr].improvement = improvement
207 | 
208 |         # Heapify up
209 |         heapify_up(heap, heap_ptr)
210 | 
211 |         # Increase element count
212 |         self.heap_ptr = heap_ptr + 1
213 |         return 0
214 | 
215 |     cdef int pop(self, PriorityHeapRecord* res) nogil:
216 |         """Remove max element from the heap. """
217 |         cdef SIZE_t heap_ptr = self.heap_ptr
218 |         cdef PriorityHeapRecord* heap = self.heap_
219 | 
220 |         if heap_ptr <= 0:
221 |             return -1
222 | 
223 |         # Take first element
224 |         res[0] = heap[0]
225 | 
226 |         # Put last element to the front
227 |         heap[0], heap[heap_ptr - 1] = heap[heap_ptr - 1], heap[0]
228 | 
229 |         # Restore heap invariant
230 |         if heap_ptr > 1:
231 |             heapify_down(heap, 0, heap_ptr - 1)
232 | 
233 |         self.heap_ptr = heap_ptr - 1
234 | 
235 |         return 0
236 | 


--------------------------------------------------------------------------------
/random_output_trees/_tree.pyx:
--------------------------------------------------------------------------------
  1 | # cython: cdivision=True
  2 | # cython: boundscheck=False
  3 | # cython: wraparound=False
  4 | 
  5 | # Authors: Arnaud Joly
  6 | #
  7 | # Licence: BSD 3 clause
  8 | 
  9 | from libc.stdlib cimport calloc, free, malloc, realloc
 10 | from libc.string cimport memcpy, memset
 11 | from libc.math cimport log as ln
 12 | from libc.math cimport floor
 13 | 
 14 | 
 15 | 
 16 | import numpy as np
 17 | cimport numpy as np
 18 | np.import_array()
 19 | 
 20 | 
 21 | # =============================================================================
 22 | # Types and constants
 23 | # =============================================================================
 24 | 
 25 | from numpy import float32 as DTYPE
 26 | from numpy import float64 as DOUBLE
 27 | 
 28 | 
 29 | # =============================================================================
 30 | # Scikit-learn import
 31 | # =============================================================================
 32 | 
 33 | # Criterion
 34 | from ._sklearn_tree import Criterion
 35 | from ._sklearn_tree cimport Criterion
 36 | from ._sklearn_tree import MSE
 37 | 
 38 | from ._sklearn_tree import Splitter
 39 | from ._sklearn_tree cimport Splitter
 40 | from ._sklearn_tree cimport SplitRecord
 41 | 
 42 | from ._sklearn_tree cimport SIZE_t
 43 | from ._sklearn_tree cimport DOUBLE_t
 44 | from ._sklearn_tree cimport DTYPE_t
 45 | from ._sklearn_tree cimport UINT32_t
 46 | 
 47 | 
 48 | cdef inline np.ndarray sizet_ptr_to_ndarray(SIZE_t* data, SIZE_t size):
 49 |     """Encapsulate data into a 1D numpy array of intp's."""
 50 |     cdef np.npy_intp shape[1]
 51 |     shape[0] = <np.npy_intp> size
 52 |     return np.PyArray_SimpleNewFromData(1, shape, np.NPY_INTP, data)
 53 | 
 54 | 
 55 | # =============================================================================
 56 | # Custom Criterion
 57 | # =============================================================================
 58 | 
 59 | cdef class VarianceCriterion(Criterion):
 60 |     """Abstract criterion for regression.
 61 | 
 62 |     Computes variance of the target values left and right of the split point.
 63 |     Computation is linear in `n_samples` by using ::
 64 | 
 65 |         var = \sum_i^n (y_i - y_bar) ** 2
 66 |             = (\sum_i^n y_i ** 2) - n_samples y_bar ** 2
 67 |     """
 68 |     cdef double* mean_left
 69 |     cdef double* mean_right
 70 |     cdef double* mean_total
 71 |     cdef double* sq_sum_left
 72 |     cdef double* sq_sum_right
 73 |     cdef double* sq_sum_total
 74 |     cdef double* var_left
 75 |     cdef double* var_right
 76 | 
 77 |     cdef double* sum_left
 78 |     cdef double* sum_right
 79 |     cdef double* sum_total
 80 | 
 81 |     cdef SIZE_t* n_classes
 82 |     cdef SIZE_t label_count_stride
 83 | 
 84 |     def __cinit__(self, SIZE_t n_outputs, np.ndarray[SIZE_t, ndim=1] n_classes):
 85 |         # Default values
 86 |         self.y = NULL
 87 |         self.y_stride = 0
 88 |         self.sample_weight = NULL
 89 | 
 90 |         self.samples = NULL
 91 |         self.start = 0
 92 |         self.pos = 0
 93 |         self.end = 0
 94 | 
 95 |         self.n_outputs = n_outputs
 96 |         self.n_node_samples = 0
 97 |         self.weighted_n_node_samples = 0.0
 98 |         self.weighted_n_left = 0.0
 99 |         self.weighted_n_right = 0.0
100 | 
101 |         # Allocate accumulators
102 |         self.mean_left = <double*> calloc(n_outputs, sizeof(double))
103 |         self.mean_right = <double*> calloc(n_outputs, sizeof(double))
104 |         self.mean_total = <double*> calloc(n_outputs, sizeof(double))
105 |         self.sq_sum_left = <double*> calloc(n_outputs, sizeof(double))
106 |         self.sq_sum_right = <double*> calloc(n_outputs, sizeof(double))
107 |         self.sq_sum_total = <double*> calloc(n_outputs, sizeof(double))
108 |         self.var_left = <double*> calloc(n_outputs, sizeof(double))
109 |         self.var_right = <double*> calloc(n_outputs, sizeof(double))
110 | 
111 |         self.sum_left = <double*> calloc(n_outputs, sizeof(double))
112 |         self.sum_right = <double*> calloc(n_outputs, sizeof(double))
113 |         self.sum_total = <double*> calloc(n_outputs, sizeof(double))
114 | 
115 |         # Check for allocation errors
116 |         if (self.mean_left == NULL or
117 |                 self.mean_right == NULL or
118 |                 self.mean_total == NULL or
119 |                 self.sq_sum_left == NULL or
120 |                 self.sq_sum_right == NULL or
121 |                 self.sq_sum_total == NULL or
122 |                 self.var_left == NULL or
123 |                 self.var_right == NULL or
124 |                 self.sum_left == NULL or
125 |                 self.sum_right == NULL or
126 |                 self.sum_total == NULL):
127 |             raise MemoryError()
128 | 
129 | 
130 |         # Count labels for each output
131 |         self.n_classes = <SIZE_t*> malloc(n_outputs * sizeof(SIZE_t))
132 |         if self.n_classes == NULL:
133 |             raise MemoryError()
134 | 
135 |         cdef SIZE_t k = 0
136 |         cdef SIZE_t label_count_stride = 0
137 | 
138 |         for k in range(n_outputs):
139 |             self.n_classes[k] = n_classes[k]
140 | 
141 |             if n_classes[k] > label_count_stride:
142 |                 label_count_stride = n_classes[k]
143 | 
144 |             if n_classes[k] > 2:
145 |                 raise ValueError("Implementation limited to binary "
146 |                                  "classification")
147 | 
148 |         self.label_count_stride = label_count_stride
149 | 
150 | 
151 |     def __dealloc__(self):
152 |         """Destructor."""
153 |         free(self.mean_left)
154 |         free(self.mean_right)
155 |         free(self.mean_total)
156 |         free(self.sq_sum_left)
157 |         free(self.sq_sum_right)
158 |         free(self.sq_sum_total)
159 |         free(self.var_left)
160 |         free(self.var_right)
161 | 
162 |         free(self.sum_left)
163 |         free(self.sum_right)
164 |         free(self.sum_total)
165 | 
166 |         free(self.n_classes)
167 | 
168 |     def __reduce__(self):
169 |         return (VarianceCriterion,
170 |                 (self.n_outputs,
171 |                  sizet_ptr_to_ndarray(self.n_classes, self.n_outputs)),
172 |                  self.__getstate__())
173 | 
174 | 
175 |     def __getstate__(self):
176 |         return {}
177 | 
178 |     def __setstate__(self, d):
179 |         pass
180 | 
181 |     cdef void init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight,
182 |                    double weighted_n_samples, SIZE_t* samples, SIZE_t start,
183 |                    SIZE_t end) nogil:
184 |         """Initialize the criterion at node samples[start:end] and
185 |         children samples[start:start] and samples[start:end]."""
186 |         self.y = y
187 |         self.y_stride = y_stride
188 |         self.sample_weight = sample_weight
189 |         self.samples = samples
190 |         self.start = start
191 |         self.end = end
192 |         self.n_node_samples = end - start
193 |         self.weighted_n_samples = weighted_n_samples
194 |         cdef double weighted_n_node_samples = 0.
195 | 
196 |         # Initialize accumulators
197 |         cdef SIZE_t n_outputs = self.n_outputs
198 |         cdef double* mean_left = self.mean_left
199 |         cdef double* mean_right = self.mean_right
200 |         cdef double* mean_total = self.mean_total
201 |         cdef double* sq_sum_left = self.sq_sum_left
202 |         cdef double* sq_sum_right = self.sq_sum_right
203 |         cdef double* sq_sum_total = self.sq_sum_total
204 |         cdef double* var_left = self.var_left
205 |         cdef double* var_right = self.var_right
206 |         cdef double* sum_left = self.sum_left
207 |         cdef double* sum_right = self.sum_right
208 |         cdef double* sum_total = self.sum_total
209 | 
210 |         cdef SIZE_t i = 0
211 |         cdef SIZE_t p = 0
212 |         cdef SIZE_t k = 0
213 |         cdef DOUBLE_t y_ik = 0.0
214 |         cdef DOUBLE_t w_y_ik = 0.0
215 |         cdef DOUBLE_t w = 1.0
216 | 
217 |         cdef SIZE_t n_bytes = n_outputs * sizeof(double)
218 |         memset(mean_left, 0, n_bytes)
219 |         memset(mean_right, 0, n_bytes)
220 |         memset(mean_total, 0, n_bytes)
221 |         memset(sq_sum_left, 0, n_bytes)
222 |         memset(sq_sum_right, 0, n_bytes)
223 |         memset(sq_sum_total, 0, n_bytes)
224 |         memset(var_left, 0, n_bytes)
225 |         memset(var_right, 0, n_bytes)
226 |         memset(sum_left, 0, n_bytes)
227 |         memset(sum_right, 0, n_bytes)
228 |         memset(sum_total, 0, n_bytes)
229 | 
230 |         for p in range(start, end):
231 |             i = samples[p]
232 | 
233 |             if sample_weight != NULL:
234 |                 w = sample_weight[i]
235 | 
236 |             for k in range(n_outputs):
237 |                 y_ik = y[i * y_stride + k]
238 |                 w_y_ik = w * y_ik
239 |                 sum_total[k] += w_y_ik
240 |                 sq_sum_total[k] += w_y_ik * y_ik
241 | 
242 |             weighted_n_node_samples += w
243 | 
244 |         self.weighted_n_node_samples = weighted_n_node_samples
245 | 
246 |         for k in range(n_outputs):
247 |             mean_total[k] = sum_total[k] / weighted_n_node_samples
248 | 
249 |         # Reset to pos=start
250 |         self.reset()
251 | 
252 |     cdef void reset(self) nogil:
253 |         """Reset the criterion at pos=start."""
254 |         self.pos = self.start
255 | 
256 |         self.weighted_n_left = 0.0
257 |         self.weighted_n_right = self.weighted_n_node_samples
258 |         cdef double weighted_n_right = self.weighted_n_right
259 | 
260 | 
261 |         cdef SIZE_t n_outputs = self.n_outputs
262 |         cdef double* mean_left = self.mean_left
263 |         cdef double* mean_right = self.mean_right
264 |         cdef double* mean_total = self.mean_total
265 |         cdef double* sq_sum_left = self.sq_sum_left
266 |         cdef double* sq_sum_right = self.sq_sum_right
267 |         cdef double* sq_sum_total = self.sq_sum_total
268 |         cdef double* var_left = self.var_left
269 |         cdef double* var_right = self.var_right
270 |         cdef double* sum_left = self.sum_left
271 |         cdef double* sum_right = self.sum_right
272 |         cdef double* sum_total = self.sum_total
273 | 
274 |         cdef SIZE_t k = 0
275 | 
276 |         for k in range(n_outputs):
277 |             mean_right[k] = mean_total[k]
278 |             mean_left[k] = 0.0
279 |             sq_sum_right[k] = sq_sum_total[k]
280 |             sq_sum_left[k] = 0.0
281 |             var_right[k] = (sq_sum_right[k] / weighted_n_right -
282 |                             mean_right[k] * mean_right[k])
283 |             var_left[k] = 0.0
284 |             sum_right[k] = sum_total[k]
285 |             sum_left[k] = 0.0
286 | 
287 | 
288 |     cdef void update(self, SIZE_t new_pos) nogil:
289 |         """Update the collected statistics by moving samples[pos:new_pos] from
290 |            the right child to the left child."""
291 |         cdef DOUBLE_t* y = self.y
292 |         cdef SIZE_t y_stride = self.y_stride
293 |         cdef DOUBLE_t* sample_weight = self.sample_weight
294 | 
295 |         cdef SIZE_t* samples = self.samples
296 |         cdef SIZE_t pos = self.pos
297 | 
298 |         cdef SIZE_t n_outputs = self.n_outputs
299 |         cdef double* mean_left = self.mean_left
300 |         cdef double* mean_right = self.mean_right
301 |         cdef double* sq_sum_left = self.sq_sum_left
302 |         cdef double* sq_sum_right = self.sq_sum_right
303 |         cdef double* var_left = self.var_left
304 |         cdef double* var_right = self.var_right
305 |         cdef double* sum_left = self.sum_left
306 |         cdef double* sum_right = self.sum_right
307 | 
308 |         cdef double weighted_n_left = self.weighted_n_left
309 |         cdef double weighted_n_right = self.weighted_n_right
310 | 
311 |         cdef SIZE_t i
312 |         cdef SIZE_t p
313 |         cdef SIZE_t k
314 |         cdef DOUBLE_t w = 1.0
315 |         cdef DOUBLE_t y_ik, w_y_ik
316 | 
317 |         # Note: We assume start <= pos < new_pos <= end
318 |         for p in range(pos, new_pos):
319 |             i = samples[p]
320 | 
321 |             if sample_weight != NULL:
322 |                 w  = sample_weight[i]
323 | 
324 |             for k in range(n_outputs):
325 |                 y_ik = y[i * y_stride + k]
326 |                 w_y_ik = w * y_ik
327 | 
328 |                 sum_left[k] += w_y_ik
329 |                 sum_right[k] -= w_y_ik
330 | 
331 |                 sq_sum_left[k] += w_y_ik * y_ik
332 |                 sq_sum_right[k] -= w_y_ik * y_ik
333 | 
334 |             weighted_n_left += w
335 |             weighted_n_right -= w
336 | 
337 |         for k in range(n_outputs):
338 |             mean_left[k] = sum_left[k] / weighted_n_left
339 |             mean_right[k] = sum_right[k] / weighted_n_right
340 |             var_left[k] = (sq_sum_left[k] / weighted_n_left -
341 |                            mean_left[k] * mean_left[k])
342 |             var_right[k] = (sq_sum_right[k] / weighted_n_right -
343 |                             mean_right[k] * mean_right[k])
344 | 
345 |         self.weighted_n_left = weighted_n_left
346 |         self.weighted_n_right = weighted_n_right
347 | 
348 |         self.pos = new_pos
349 | 
350 |     cdef double node_impurity(self) nogil:
351 |         """Evaluate the impurity of the current node, i.e. the impurity of
352 |            samples[start:end]."""
353 |         cdef SIZE_t n_outputs = self.n_outputs
354 |         cdef double* sq_sum_total = self.sq_sum_total
355 |         cdef double* mean_total = self.mean_total
356 |         cdef double weighted_n_node_samples = self.weighted_n_node_samples
357 |         cdef double total = 0.0
358 |         cdef SIZE_t k
359 | 
360 |         for k in range(n_outputs):
361 |             total += (sq_sum_total[k] / weighted_n_node_samples -
362 |                       mean_total[k] * mean_total[k])
363 | 
364 |         return total / n_outputs
365 | 
366 |     cdef void children_impurity(self, double* impurity_left, double* impurity_right) nogil:
367 |         """Evaluate the impurity in children nodes, i.e. the impurity of the
368 |            left child (samples[start:pos]) and the impurity the right child
369 |            (samples[pos:end])."""
370 |         cdef SIZE_t n_outputs = self.n_outputs
371 |         cdef double* var_left = self.var_left
372 |         cdef double* var_right = self.var_right
373 |         cdef double total_left = 0.0
374 |         cdef double total_right = 0.0
375 |         cdef SIZE_t k
376 | 
377 |         for k in range(n_outputs):
378 |             total_left += var_left[k]
379 |             total_right += var_right[k]
380 | 
381 |         impurity_left[0] = total_left / n_outputs
382 |         impurity_right[0] = total_right / n_outputs
383 | 
384 |     cdef void node_value(self, double* dest) nogil:
385 |         """Compute the node value of samples[start:end] into dest."""
386 |         cdef SIZE_t n_outputs = self.n_outputs
387 |         cdef SIZE_t* n_classes = self.n_classes
388 |         cdef SIZE_t label_count_stride = self.label_count_stride
389 | 
390 |         cdef DOUBLE_t* y = self.y
391 |         cdef SIZE_t y_stride = self.y_stride
392 |         cdef DOUBLE_t* sample_weight = self.sample_weight
393 |         cdef SIZE_t* samples = self.samples
394 |         cdef SIZE_t start = self.start
395 |         cdef SIZE_t end = self.end
396 | 
397 |         cdef SIZE_t c
398 |         cdef DOUBLE_t w = 1.
399 | 
400 |         cdef SIZE_t i = 0
401 |         cdef SIZE_t p = 0
402 |         cdef SIZE_t k = 0
403 |         cdef SIZE_t offset = 0
404 | 
405 |         for k in range(n_outputs):
406 |             memset(dest + offset, 0, n_classes[k] * sizeof(double))
407 |             offset += label_count_stride
408 | 
409 |         for p in range(start, end):
410 |             i = samples[p]
411 | 
412 |             if sample_weight != NULL:
413 |                 w = sample_weight[i]
414 | 
415 |             for k in range(n_outputs):
416 |                 c = <SIZE_t> y[i * y_stride + k]
417 |                 dest[k * label_count_stride + c] += w
418 | 
419 | 
420 | 
421 | # =============================================================================
422 | # Custom splitter
423 | # =============================================================================
424 | 
425 | cdef class SplitterTransformer(Splitter):
426 |     """Base splitter for working on a transformed space"""
427 | 
428 |     cdef Splitter splitter                 # Splitter used for the data
429 | 
430 |     cdef np.ndarray y_transformed
431 |     cdef DOUBLE_t* y_transformed_data      # Storage of transformed output
432 |     cdef SIZE_t y_transformed_stride  # Stride of transformed output
433 | 
434 | 
435 |     def __getstate__(self):
436 |         return {"splitter": self.splitter,
437 |                 "y_transformed": self.y_transformed}
438 | 
439 |     def __setstate__(self, d):
440 |         self.set_output_space(d["splitter"], d["y_transformed"])
441 | 
442 |     def __reduce__(self):
443 |         return (SplitterTransformer, (self.criterion,
444 |                                       self.max_features,
445 |                                       self.min_samples_leaf,
446 |                                       self.min_weight_leaf,
447 |                                       self.random_state), self.__getstate__())
448 | 
449 | 
450 |     def set_output_space(self,
451 |                          Splitter splitter,
452 |                          np.ndarray[DOUBLE_t, ndim=2,  mode="c"] y):
453 | 
454 |         # Set transformed output space
455 |         self.y_transformed = y
456 |         self.y_transformed_data = <DOUBLE_t*> y.data
457 |         self.y_transformed_stride = (<SIZE_t> y.strides[0] /
458 |                                      <SIZE_t> y.itemsize)
459 | 
460 |         # Set sub-splitter and its criterion
461 |         self.splitter = splitter
462 | 
463 | 
464 |     cdef void init(self, np.ndarray[DTYPE_t, ndim=2] X,
465 |                          np.ndarray[DOUBLE_t, ndim=2, mode="c"] y,
466 |                          DOUBLE_t* sample_weight) except *:
467 |         """Initialize the splitter."""
468 | 
469 |         if not self.splitter:
470 |             raise ValueError('Unspecify base splitter')
471 | 
472 |         if self.y_transformed_data == NULL:
473 |             raise ValueError("Unspectify subspace use set_output_space")
474 | 
475 |         # Initialize the splitter
476 |         self.splitter.criterion = MSE(self.y_transformed.shape[1])
477 |         self.splitter.init(X, self.y_transformed, sample_weight)
478 |         self.n_samples = self.splitter.n_samples
479 | 
480 | 
481 |         # State of the splitter
482 |         self.y = <DOUBLE_t*> y.data
483 |         self.y_stride = (<SIZE_t> y.strides[0] / <SIZE_t> y.itemsize)
484 | 
485 | 
486 |     cdef void node_reset(self, SIZE_t start, SIZE_t end,
487 |                          double* weighted_n_node_samples) nogil:
488 |         """Reset splitter on node samples[start:end]."""
489 |         # Reset the base splitter
490 |         self.start = start
491 |         self.end = end
492 |         self.splitter.node_reset(start, end, weighted_n_node_samples)
493 | 
494 |     cdef void node_split(self, double impurity,
495 |                          SplitRecord* split,
496 |                          SIZE_t* n_constant_features) nogil:
497 |         """Find a split on node samples[start:end]."""
498 |         self.splitter.node_split(impurity, split, n_constant_features)
499 | 
500 |     cdef void node_value(self, double* dest) nogil:
501 |         """Copy the value of node samples[start:end] into dest."""
502 |         self.criterion.init(self.y,
503 |                             self.y_stride,
504 |                             self.splitter.sample_weight,
505 |                             self.splitter.weighted_n_samples,
506 |                             self.splitter.samples,
507 |                             self.splitter.start,
508 |                             self.splitter.end)
509 |         self.criterion.node_value(dest)
510 | 
511 |     cdef double node_impurity(self) nogil:
512 |         """Impurity at the node"""
513 |         return self.splitter.node_impurity()
514 | 


--------------------------------------------------------------------------------
/random_output_trees/_utils.py:
--------------------------------------------------------------------------------
  1 | """Utilities"""
  2 | 
  3 | # Originally from sklearn.utils.validation
  4 | # Authors: Olivier Grisel
  5 | #          Gael Varoquaux
  6 | #          Andreas Mueller
  7 | #          Lars Buitinck
  8 | #          Alexandre Gramfort
  9 | #          Nicolas Tresegnie
 10 | # License: BSD 3 clause
 11 | 
 12 | import warnings
 13 | from inspect import getargspec
 14 | 
 15 | import numpy as np
 16 | import scipy.sparse as sp
 17 | 
 18 | 
 19 | class DataConversionWarning(UserWarning):
 20 |     "A warning on implicit data conversions happening in the code"
 21 |     pass
 22 | 
 23 | warnings.simplefilter("always", DataConversionWarning)
 24 | 
 25 | 
 26 | def _assert_all_finite(X):
 27 |     """Like assert_all_finite, but only for ndarray."""
 28 |     X = np.asanyarray(X)
 29 |     # First try an O(n) time, O(1) space solution for the common case that
 30 |     # everything is finite; fall back to O(n) space np.isfinite to prevent
 31 |     # false positives from overflow in sum method.
 32 |     if (X.dtype.char in np.typecodes['AllFloat'] and not np.isfinite(X.sum())
 33 |             and not np.isfinite(X).all()):
 34 |         raise ValueError("Input contains NaN, infinity"
 35 |                          " or a value too large for %r." % X.dtype)
 36 | 
 37 | def _num_samples(x):
 38 |     """Return number of samples in array-like x."""
 39 |     if not hasattr(x, '__len__') and not hasattr(x, 'shape'):
 40 |         if hasattr(x, '__array__'):
 41 |             x = np.asarray(x)
 42 |         else:
 43 |             raise TypeError("Expected sequence or array-like, got %r" % x)
 44 |     return x.shape[0] if hasattr(x, 'shape') else len(x)
 45 | 
 46 | 
 47 | def check_consistent_length(*arrays):
 48 |     """Check that all arrays have consistent first dimensions.
 49 | 
 50 |     Checks whether all objects in arrays have the same shape or length.
 51 | 
 52 |     Parameters
 53 |     ----------
 54 |     arrays : list or tuple of input objects.
 55 |         Objects that will be checked for consistent length.
 56 |     """
 57 | 
 58 |     uniques = np.unique([_num_samples(X) for X in arrays if X is not None])
 59 |     if len(uniques) > 1:
 60 |         raise ValueError("Found arrays with inconsistent numbers of samples: %s"
 61 |                          % str(uniques))
 62 | 
 63 | 
 64 | def _ensure_sparse_format(spmatrix, accept_sparse, dtype, order, copy,
 65 |                           force_all_finite):
 66 |     """Convert a sparse matrix to a given format.
 67 | 
 68 |     Checks the sparse format of spmatrix and converts if necessary.
 69 | 
 70 |     Parameters
 71 |     ----------
 72 |     spmatrix : scipy sparse matrix
 73 |         Input to validate and convert.
 74 | 
 75 |     accept_sparse : string, list of string or None (default=None)
 76 |         String[s] representing allowed sparse matrix formats ('csc',
 77 |         'csr', 'coo', 'dok', 'bsr', 'lil', 'dia'). None means that sparse
 78 |         matrix input will raise an error.  If the input is sparse but not in
 79 |         the allowed format, it will be converted to the first listed format.
 80 | 
 81 |     dtype : string, type or None (default=none)
 82 |         Data type of result. If None, the dtype of the input is preserved.
 83 | 
 84 |     order : 'F', 'C' or None (default=None)
 85 |         Whether an array will be forced to be fortran or c-style.
 86 | 
 87 |     copy : boolean (default=False)
 88 |         Whether a forced copy will be triggered. If copy=False, a copy might
 89 |         be triggered by a conversion.
 90 | 
 91 |     force_all_finite : boolean (default=True)
 92 |         Whether to raise an error on np.inf and np.nan in X.
 93 | 
 94 |     Returns
 95 |     -------
 96 |     spmatrix_converted : scipy sparse matrix.
 97 |         Matrix that is ensured to have an allowed type.
 98 |     """
 99 |     if accept_sparse is None:
100 |         raise TypeError('A sparse matrix was passed, but dense '
101 |                         'data is required. Use X.toarray() to '
102 |                         'convert to a dense numpy array.')
103 |     sparse_type = spmatrix.format
104 |     if dtype is None:
105 |         dtype = spmatrix.dtype
106 |     if sparse_type in accept_sparse:
107 |         # correct type
108 |         if dtype == spmatrix.dtype:
109 |             # correct dtype
110 |             if copy:
111 |                 spmatrix = spmatrix.copy()
112 |         else:
113 |             # convert dtype
114 |             spmatrix = spmatrix.astype(dtype)
115 |     else:
116 |         # create new
117 |         spmatrix = spmatrix.asformat(accept_sparse[0]).astype(dtype)
118 |     if force_all_finite:
119 |         if not hasattr(spmatrix, "data"):
120 |             warnings.warn("Can't check %s sparse matrix for nan or inf."
121 |                           % spmatrix.format)
122 |         else:
123 |             _assert_all_finite(spmatrix.data)
124 |     if hasattr(spmatrix, "data"):
125 |         spmatrix.data = np.array(spmatrix.data, copy=False, order=order)
126 |     return spmatrix
127 | 
128 | 
129 | def check_array(array, accept_sparse=None, dtype=None, order=None, copy=False,
130 |                 force_all_finite=True, ensure_2d=True, allow_nd=False):
131 |     """Input validation on an array, list, sparse matrix or similar.
132 | 
133 |     By default, the input is converted to an at least 2nd numpy array.
134 | 
135 |     Parameters
136 |     ----------
137 |     array : object
138 |         Input object to check / convert.
139 | 
140 |     accept_sparse : string, list of string or None (default=None)
141 |         String[s] representing allowed sparse matrix formats, such as 'csc',
142 |         'csr', etc.  None means that sparse matrix input will raise an error.
143 |         If the input is sparse but not in the allowed format, it will be
144 |         converted to the first listed format.
145 | 
146 |     dtype : string, type or None (default=none)
147 |         Data type of result. If None, the dtype of the input is preserved.
148 | 
149 |     order : 'F', 'C' or None (default=None)
150 |         Whether an array will be forced to be fortran or c-style.
151 | 
152 |     copy : boolean (default=False)
153 |         Whether a forced copy will be triggered. If copy=False, a copy might
154 |         be triggered by a conversion.
155 | 
156 |     force_all_finite : boolean (default=True)
157 |         Whether to raise an error on np.inf and np.nan in X.
158 | 
159 |     ensure_2d : boolean (default=True)
160 |         Whether to make X at least 2d.
161 | 
162 |     allow_nd : boolean (default=False)
163 |         Whether to allow X.ndim > 2.
164 | 
165 |     Returns
166 |     -------
167 |     X_converted : object
168 |         The converted and validated X.
169 |     """
170 |     if isinstance(accept_sparse, str):
171 |         accept_sparse = [accept_sparse]
172 | 
173 |     if sp.issparse(array):
174 |         array = _ensure_sparse_format(array, accept_sparse, dtype, order,
175 |                                       copy, force_all_finite)
176 |     else:
177 |         if ensure_2d:
178 |             array = np.atleast_2d(array)
179 |         array = np.array(array, dtype=dtype, order=order, copy=copy)
180 |         if not allow_nd and array.ndim >= 3:
181 |             raise ValueError("Found array with dim %d. Expected <= 2" %
182 |                              array.ndim)
183 |         if force_all_finite:
184 |             _assert_all_finite(array)
185 | 
186 |     return array
187 | 
188 | 
189 | def check_X_y(X, y, accept_sparse=None, dtype=None, order=None, copy=False,
190 |               force_all_finite=True, ensure_2d=True, allow_nd=False,
191 |               multi_output=False):
192 |     """Input validation for standard estimators.
193 | 
194 |     Checks X and y for consistent length, enforces X 2d and y 1d.
195 |     Standard input checks are only applied to y. For multi-label y,
196 |     set multi_ouput=True to allow 2d and sparse y.
197 | 
198 |     Parameters
199 |     ----------
200 |     X : nd-array, list or sparse matrix
201 |         Input data.
202 | 
203 |     y : nd-array, list or sparse matrix
204 |         Labels.
205 | 
206 |     accept_sparse : string, list of string or None (default=None)
207 |         String[s] representing allowed sparse matrix formats, such as 'csc',
208 |         'csr', etc.  None means that sparse matrix input will raise an error.
209 |         If the input is sparse but not in the allowed format, it will be
210 |         converted to the first listed format.
211 | 
212 |     dtype : string, type or None (default=none)
213 |         Data type of result. If None, the dtype of the input is preserved.
214 | 
215 |     order : 'F', 'C' or None (default=None)
216 |         Whether an array will be forced to be fortran or c-style.
217 | 
218 |     copy : boolean (default=False)
219 |         Whether a forced copy will be triggered. If copy=False, a copy might
220 |         be triggered by a conversion.
221 | 
222 |     force_all_finite : boolean (default=True)
223 |         Whether to raise an error on np.inf and np.nan in X.
224 | 
225 |     ensure_2d : boolean (default=True)
226 |         Whether to make X at least 2d.
227 | 
228 |     allow_nd : boolean (default=False)
229 |         Whether to allow X.ndim > 2.
230 | 
231 |     multi_output : boolean (default=False)
232 |         Whether to allow 2-d y (array or sparse matrix). If false, y will be
233 |         validated as a vector.
234 | 
235 |     Returns
236 |     -------
237 |     X_converted : object
238 |         The converted and validated X.
239 |     """
240 |     X = check_array(X, accept_sparse, dtype, order, copy, force_all_finite,
241 |                     ensure_2d, allow_nd)
242 |     if multi_output:
243 |         y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False)
244 |     else:
245 |         y = column_or_1d(y, warn=True)
246 |         _assert_all_finite(y)
247 | 
248 |     check_consistent_length(X, y)
249 | 
250 |     return X, y
251 | 
252 | 
253 | def column_or_1d(y, warn=False):
254 |     """ Ravel column or 1d numpy array, else raises an error
255 | 
256 |     Parameters
257 |     ----------
258 |     y : array-like
259 | 
260 |     Returns
261 |     -------
262 |     y : array
263 | 
264 |     """
265 |     shape = np.shape(y)
266 |     if len(shape) == 1:
267 |         return np.ravel(y)
268 |     if len(shape) == 2 and shape[1] == 1:
269 |         if warn:
270 |             warnings.warn("A column-vector y was passed when a 1d array was"
271 |                           " expected. Please change the shape of y to "
272 |                           "(n_samples, ), for example using ravel().",
273 |                           DataConversionWarning, stacklevel=2)
274 |         return np.ravel(y)
275 | 
276 |     raise ValueError("bad input shape {0}".format(shape))
277 | 
278 | 
279 | def has_fit_parameter(estimator, parameter):
280 |     """ Checks whether the estimator's fit method supports the given parameter.
281 |     Example
282 |     -------
283 |     >>> from sklearn.svm import SVC
284 |     >>> has_fit_parameter(SVC(), "sample_weight")
285 |     True
286 |     """
287 |     return parameter in getargspec(estimator.fit)[0]
288 | 
289 | 
290 | def skipped(func):
291 |     from nose.plugins.skip import SkipTest
292 | 
293 |     def _func():
294 |         raise SkipTest("Test %s is skipped" % func.__name__)
295 |     _func.__name__ = func.__name__
296 |     return _func
297 | 


--------------------------------------------------------------------------------
/random_output_trees/datasets.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 
  3 | Module for datasets loading and fetchers.
  4 | 
  5 | """
  6 | 
  7 | from __future__ import division, print_function, absolute_import
  8 | 
  9 | import os
 10 | from functools import partial
 11 | 
 12 | import shutil
 13 | import tarfile
 14 | 
 15 | try:
 16 |     # Python 2
 17 |     from urllib2 import HTTPError
 18 |     from urllib2 import quote
 19 |     from urllib2 import urlopen
 20 | except ImportError:
 21 |     # Python 3+
 22 |     from urllib.error import HTTPError
 23 |     from urllib.parse import quote
 24 |     from urllib.request import urlopen
 25 | 
 26 | 
 27 | import numpy as np
 28 | 
 29 | from sklearn.datasets import get_data_home
 30 | from sklearn.datasets.base import Bunch
 31 | 
 32 | 
 33 | __all__ = [
 34 |     "fetch_drug_interaction",
 35 |     "fetch_protein_interaction",
 36 | ]
 37 | 
 38 | 
 39 | def _fetch_drug_protein(data_home=None):
 40 |     """Fetch drug-protein dataset from the server"""
 41 | 
 42 |     base_url = "http://cbio.ensmp.fr/~yyamanishi/substr-domain/"
 43 | 
 44 |     # check if this data set has been already downloaded
 45 |     data_home = get_data_home(data_home)
 46 |     data_home = os.path.join(data_home, 'drug-protein')
 47 |     if not os.path.exists(data_home):
 48 |         os.makedirs(data_home)
 49 | 
 50 |     for base_name in ["drug_repmat.txt", "target_repmat.txt",
 51 |                       "inter_admat.txt"]:
 52 |         filename = os.path.join(data_home, base_name)
 53 | 
 54 |         if not os.path.exists(filename):
 55 |             urlname = base_url + base_name
 56 | 
 57 |             print("Download data at {}".format(urlname))
 58 | 
 59 |             try:
 60 |                 url = urlopen(urlname)
 61 |             except HTTPError as e:
 62 |                 if e.code == 404:
 63 |                     e.msg = "Dataset drug-protein '%s' not found." % base_name
 64 |                 raise
 65 | 
 66 |             try:
 67 |                 with open(filename, 'w+b') as fhandle:
 68 |                     shutil.copyfileobj(url, fhandle)
 69 |             except:
 70 |                 os.remove(filename)
 71 |                 raise
 72 | 
 73 |             url.close()
 74 | 
 75 |     return data_home
 76 | 
 77 | 
 78 | def fetch_drug_interaction(data_home=None):
 79 |     """Fetch the drug-interaction dataset
 80 | 
 81 |     Constant features were removed.
 82 | 
 83 |     =========================== ===================================
 84 |     Domain                         drug-protein interaction network
 85 |     Features                                   Biological (see [1])
 86 |     output                                      interaction network
 87 |     Drug matrix                    (sample, features) = (1862, 660)
 88 |     Newtork interaction matrix     (samples, labels) = (1862, 1554)
 89 |     =========================== ===================================
 90 | 
 91 | 
 92 |     Parameters
 93 |     ----------
 94 |     data_home: optional, default: None
 95 |         Specify another download and cache folder for the data sets. By default
 96 |         all scikit learn data is stored in '~/scikit_learn_data' subfolders.
 97 | 
 98 |     Returns
 99 |     -------
100 |     data : Bunch
101 |         Dictionary-like object, the interesting attributes are:
102 |         'data', the data to learn, 'target', the classification labels,
103 |         'target_names', the original names of the target columns and
104 |         'feature_names', the original names of the dataset columns.
105 | 
106 |     References
107 |     ----------
108 |     .. [1] Yamanishi, Y., Pauwels, E., Saigo, H., & Stoven, V. (2011).
109 |            Extracting sets of chemical substructures and protein domains
110 |            governing drug-target interactions. Journal of chemical information
111 |            and modeling, 51(5), 1183-1194.
112 | 
113 |     """
114 |     data_home = _fetch_drug_protein(data_home=data_home)
115 | 
116 |     drug_fname = os.path.join(data_home, "drug_repmat.txt")
117 |     data = np.loadtxt(drug_fname, dtype=float, skiprows=1)
118 |     data = data[:, 1:]  # skip id column
119 |     mask_constant = np.var(data, axis=0) != 0.
120 |     data = data[:, mask_constant]  # remove constant columns
121 | 
122 |     with open(drug_fname, 'r') as fhandle:
123 |         feature_names = fhandle.readline().split("\t")
124 |         feature_names = np.array(feature_names)[mask_constant].tolist()
125 | 
126 |     interaction_fname = os.path.join(data_home, "inter_admat.txt")
127 |     target = np.loadtxt(interaction_fname, dtype=float, skiprows=1)
128 |     target = target[:, 1:]  # skip id column
129 |     with open(interaction_fname, 'r') as fhandle:
130 |         target_names = fhandle.readline().split("\t")
131 | 
132 |     return Bunch(data=data, target=target, feature_names=feature_names,
133 |                  target_names=target_names)
134 | 
135 | 
136 | def fetch_protein_interaction(data_home=None):
137 |     """Fetch the protein-interaction dataset
138 | 
139 |     Constant features were removed
140 | 
141 |     =========================== ===================================
142 |     Domain                         drug-protein interaction network
143 |     Features                                   Biological (see [1])
144 |     output                                      interaction network
145 |     Drug matrix                    (sample, features) = (1554, 876)
146 |     Newtork interaction matrix     (samples, labels) = (1554, 1862)
147 |     =========================== ===================================
148 | 
149 |     Parameters
150 |     ----------
151 |     data_home: optional, default: None
152 |         Specify another download and cache folder for the data sets. By default
153 |         all scikit learn data is stored in '~/scikit_learn_data' subfolders.
154 | 
155 |     Returns
156 |     -------
157 |     data : Bunch
158 |         Dictionary-like object, the interesting attributes are:
159 |         'data', the data to learn, 'target', the classification labels and
160 |         'feature_names', the original names of the dataset columns.
161 | 
162 |     References
163 |     ----------
164 |     .. [1] Yamanishi, Y., Pauwels, E., Saigo, H., & Stoven, V. (2011).
165 |            Extracting sets of chemical substructures and protein domains
166 |            governing drug-target interactions. Journal of chemical information
167 |            and modeling, 51(5), 1183-1194.
168 | 
169 |     """
170 |     data_home = _fetch_drug_protein(data_home=data_home)
171 | 
172 |     protein_fname = os.path.join(data_home, "target_repmat.txt")
173 |     data = np.loadtxt(protein_fname, dtype=float, skiprows=1,
174 |                       usecols=range(1, 877))  # skip id column
175 | 
176 |     mask_constant = np.var(data, axis=0) != 0.
177 |     data = data[:, mask_constant]   # remove constant columns
178 | 
179 |     with open(protein_fname, 'r') as fhandle:
180 |         feature_names = fhandle.readline().split("\t")
181 |         feature_names = np.array(feature_names)[mask_constant].tolist()
182 | 
183 |     interaction_fname = os.path.join(data_home, "inter_admat.txt")
184 |     target = np.loadtxt(interaction_fname, dtype=float, skiprows=1)
185 |     target = target[:, 1:]  # skip id column
186 |     target = target.T
187 | 
188 |     return Bunch(data=data, target=target, feature_names=feature_names)
189 | 


--------------------------------------------------------------------------------
/random_output_trees/ensemble/__init__.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | This module provides ensemble estimators which work transformed output-space.
 3 | 
 4 | '''
 5 | 
 6 | from .forest import RandomForestClassifier
 7 | from .forest import RandomForestRegressor
 8 | from .forest import ExtraTreesClassifier
 9 | from .forest import ExtraTreesRegressor
10 | from .lazy_bagging import LazyBaggingClassifier
11 | from .lazy_bagging import LazyBaggingRegressor
12 | 
13 | __all__ = [
14 |     "RandomForestClassifier",
15 |     "RandomForestRegressor",
16 |     "ExtraTreesClassifier",
17 |     "ExtraTreesRegressor",
18 | ]
19 | 


--------------------------------------------------------------------------------
/random_output_trees/ensemble/tests/test_forest.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Testing for the forest module (sklearn.ensemble.forest).
  3 | """
  4 | 
  5 | # Most tests comes from scikit-learn and ensure that everything is working
  6 | # as expected
  7 | 
  8 | # Authors: Gilles Louppe,
  9 | #          Brian Holt,
 10 | #          Andreas Mueller,
 11 | #          Arnaud Joly
 12 | # License: BSD 3 clause
 13 | 
 14 | 
 15 | 
 16 | from sklearn.utils.testing import assert_almost_equal
 17 | from sklearn.utils.testing import assert_equal
 18 | 
 19 | from sklearn import datasets
 20 | from sklearn.utils.validation import check_random_state
 21 | 
 22 | from sklearn.cross_validation import train_test_split
 23 | from sklearn.random_projection import GaussianRandomProjection
 24 | from sklearn.base import BaseEstimator, TransformerMixin
 25 | 
 26 | from random_output_trees.transformer import FixedStateTransformer
 27 | from random_output_trees.ensemble import ExtraTreesClassifier
 28 | from random_output_trees.ensemble import ExtraTreesRegressor
 29 | from random_output_trees.ensemble import RandomForestClassifier
 30 | from random_output_trees.ensemble import RandomForestRegressor
 31 | 
 32 | 
 33 | # toy sample
 34 | X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
 35 | y = [-1, -1, -1, 1, 1, 1]
 36 | T = [[-1, -1], [2, 2], [3, 2]]
 37 | true_result = [-1, 1, 1]
 38 | 
 39 | # also load the iris dataset
 40 | # and randomly permute it
 41 | iris = datasets.load_iris()
 42 | rng = check_random_state(0)
 43 | perm = rng.permutation(iris.target.size)
 44 | iris.data = iris.data[perm]
 45 | iris.target = iris.target[perm]
 46 | 
 47 | # also load the boston dataset
 48 | # and randomly permute it
 49 | boston = datasets.load_boston()
 50 | perm = rng.permutation(boston.target.size)
 51 | boston.data = boston.data[perm]
 52 | boston.target = boston.target[perm]
 53 | 
 54 | FOREST_CLASSIFIERS = {
 55 |     "ExtraTreesClassifier": ExtraTreesClassifier,
 56 |     "RandomForestClassifier": RandomForestClassifier,
 57 | }
 58 | 
 59 | FOREST_REGRESSORS = {
 60 |     "ExtraTreesRegressor": ExtraTreesRegressor,
 61 |     "RandomForestRegressor": RandomForestRegressor,
 62 | }
 63 | 
 64 | FOREST_TRANSFORMERS = {}
 65 | 
 66 | FOREST_ESTIMATORS = dict()
 67 | FOREST_ESTIMATORS.update(FOREST_CLASSIFIERS)
 68 | FOREST_ESTIMATORS.update(FOREST_REGRESSORS)
 69 | FOREST_ESTIMATORS.update(FOREST_TRANSFORMERS)
 70 | 
 71 | 
 72 | class IdentityProjections(BaseEstimator, TransformerMixin):
 73 |     """ Project the input data on the identity matrix (noop operation)"""
 74 |     def __init__(self):
 75 |         pass
 76 | 
 77 |     def fit(self, X, y=None):
 78 |         return self
 79 | 
 80 |     def transform(selft, X):
 81 |         return X
 82 | 
 83 | 
 84 | def test_output_transformer():
 85 |     X, y = datasets.make_multilabel_classification(return_indicator=True)
 86 |     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
 87 | 
 88 |     # Check that random_state are different
 89 |     transformer = GaussianRandomProjection(n_components=5, random_state=None)
 90 |     for name, ForestEstimator in FOREST_ESTIMATORS.items():
 91 |         est = ForestEstimator(random_state=5, output_transformer=transformer)
 92 |         est.fit(X_train, y_train)
 93 |         y_pred = est.predict(X_test)
 94 |         assert_equal(y_pred.shape, y_test.shape)
 95 | 
 96 |         random_state = [sub.output_transformer_.random_state
 97 |                         for sub in est.estimators_]
 98 | 
 99 |         assert_equal(len(set(random_state)), est.n_estimators)
100 | 
101 | 
102 |     # Check that random_state are equals
103 |     transformer = FixedStateTransformer(GaussianRandomProjection(
104 |         n_components=5), random_seed=0)
105 |     for name, ForestEstimator in FOREST_ESTIMATORS.items():
106 |         est = ForestEstimator(random_state=5, output_transformer=transformer)
107 |         est.fit(X_train, y_train)
108 |         y_pred = est.predict(X_test)
109 |         assert_equal(y_pred.shape, y_test.shape)
110 | 
111 | 
112 |         random_state = [sub.output_transformer_.random_state
113 |                         for sub in est.estimators_]
114 | 
115 |         assert_equal(len(set(random_state)), 1)
116 |         assert_equal(random_state[0], 0)
117 | 
118 | 
119 | def test_identity_output_transformer():
120 |     X, y = datasets.make_multilabel_classification(return_indicator=True,
121 |                                                    random_state=0)
122 |     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
123 | 
124 |     for name, ForestEstimator in FOREST_ESTIMATORS.items():
125 |         est = ForestEstimator(random_state=0, max_features=None, max_depth=4)
126 |         est.fit(X_train, y_train)
127 |         y_pred_origin = est.predict(X_test)
128 | 
129 | 
130 |         est_transf = est.set_params(output_transformer=IdentityProjections())
131 |         est_transf.fit(X_train, y_train)
132 |         y_pred_transformed = est_transf.predict(X_test)
133 |         assert_almost_equal(y_pred_origin, y_pred_transformed)
134 | 
135 | 
136 | if __name__ == "__main__":
137 |     import nose
138 |     nose.runmodule()
139 | 


--------------------------------------------------------------------------------
/random_output_trees/ensemble/tests/test_lazy_bagging.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Testing for the lazy bagging ensemble module
  3 | 
  4 | Many tests are adapted from sklearn/ensemble/tests/test_bagging.py
  5 | """
  6 | 
  7 | # Author: Gilles Louppe
  8 | # License: BSD 3 clause
  9 | 
 10 | # Author: Arnaud Joly
 11 | # License: BSD 3 clause
 12 | import numpy as np
 13 | 
 14 | from sklearn.utils.testing import assert_array_equal
 15 | from sklearn.utils.testing import assert_array_almost_equal
 16 | from sklearn.utils.testing import assert_equal
 17 | from sklearn.utils.testing import assert_raises
 18 | from sklearn.utils.testing import assert_greater
 19 | from sklearn.utils.testing import assert_less
 20 | from sklearn.utils.testing import assert_true
 21 | from sklearn.utils.testing import assert_warns
 22 | from sklearn.utils.testing import assert_almost_equal
 23 | 
 24 | from sklearn.base import clone
 25 | from sklearn.dummy import DummyClassifier, DummyRegressor
 26 | from sklearn.grid_search import GridSearchCV, ParameterGrid
 27 | from sklearn.linear_model import Perceptron
 28 | from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
 29 | from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 30 | from sklearn.svm import SVC, SVR
 31 | from sklearn.cross_validation import train_test_split
 32 | from sklearn.datasets import load_boston
 33 | from sklearn.datasets import load_iris
 34 | from sklearn.datasets import make_multilabel_classification
 35 | from sklearn.utils import check_random_state
 36 | 
 37 | from scipy.sparse import csc_matrix, csr_matrix
 38 | 
 39 | from random_output_trees.ensemble import LazyBaggingClassifier
 40 | from random_output_trees.ensemble import LazyBaggingRegressor
 41 | 
 42 | 
 43 | rng = check_random_state(0)
 44 | 
 45 | # also load the iris dataset
 46 | # and randomly permute it
 47 | iris = load_iris()
 48 | perm = rng.permutation(iris.target.size)
 49 | iris.data = iris.data[perm]
 50 | iris.target = iris.target[perm]
 51 | 
 52 | # also load the boston dataset
 53 | # and randomly permute it
 54 | boston = load_boston()
 55 | perm = rng.permutation(boston.target.size)
 56 | boston.data = boston.data[perm]
 57 | boston.target = boston.target[perm]
 58 | 
 59 | 
 60 | def test_classification():
 61 |     """Check classification for various parameter settings."""
 62 |     rng = check_random_state(0)
 63 |     X_train, X_test, y_train, y_test = train_test_split(iris.data,
 64 |                                                         iris.target,
 65 |                                                         random_state=rng)
 66 |     grid = ParameterGrid({"max_samples": [0.5, 1.0],
 67 |                           "max_features": [1, 2, 4],
 68 |                           "bootstrap": [True, False],
 69 |                           "bootstrap_features": [True, False]})
 70 | 
 71 |     for base_estimator in [None,
 72 |                            DummyClassifier(),
 73 |                            Perceptron(),
 74 |                            DecisionTreeClassifier(),
 75 |                            KNeighborsClassifier(),
 76 |                            SVC()]:
 77 |         for params in grid:
 78 |             LazyBaggingClassifier(base_estimator=base_estimator,
 79 |                                   random_state=rng,
 80 |                                   **params).fit(X_train, y_train).predict(X_test)
 81 | 
 82 | 
 83 | def test_sparse_classification():
 84 |     """Check classification for various parameter settings on sparse input."""
 85 | 
 86 |     class CustomSVC(SVC):
 87 |         """SVC variant that records the nature of the training set"""
 88 | 
 89 |         def fit(self, X, y):
 90 |             super(CustomSVC, self).fit(X, y)
 91 |             self.data_type_ = type(X)
 92 |             return self
 93 | 
 94 |     rng = check_random_state(0)
 95 |     X_train, X_test, y_train, y_test = train_test_split(iris.data,
 96 |                                                         iris.target,
 97 |                                                         random_state=rng)
 98 |     parameter_sets = [
 99 |         {"max_samples": 0.5,
100 |          "max_features": 2,
101 |          "bootstrap": True,
102 |          "bootstrap_features": True},
103 |         {"max_samples": 1.0,
104 |          "max_features": 4,
105 |          "bootstrap": True,
106 |          "bootstrap_features": True},
107 |         {"max_features": 2,
108 |          "bootstrap": False,
109 |          "bootstrap_features": True},
110 |         {"max_samples": 0.5,
111 |          "bootstrap": True,
112 |          "bootstrap_features": False},
113 |     ]
114 | 
115 |     for sparse_format in [csc_matrix, csr_matrix]:
116 |         X_train_sparse = sparse_format(X_train)
117 |         X_test_sparse = sparse_format(X_test)
118 |         for params in parameter_sets:
119 | 
120 |             # Trained on sparse format
121 |             sparse_classifier = LazyBaggingClassifier(
122 |                 base_estimator=CustomSVC(),
123 |                 random_state=1,
124 |                 **params
125 |             ).fit(X_train_sparse, y_train)
126 |             sparse_results = sparse_classifier.predict(X_test_sparse)
127 | 
128 |             # Trained on dense format
129 |             dense_results = LazyBaggingClassifier(
130 |                 base_estimator=CustomSVC(),
131 |                 random_state=1,
132 |                 **params
133 |             ).fit(X_train, y_train).predict(X_test)
134 | 
135 |             sparse_type = type(X_train_sparse)
136 |             types = [i.data_type_ for i in sparse_classifier.estimators_]
137 | 
138 |             assert_array_equal(sparse_results, dense_results)
139 |             assert all([t == sparse_type for t in types])
140 | 
141 | 
142 | def test_regression():
143 |     """Check regression for various parameter settings."""
144 |     rng = check_random_state(0)
145 |     X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
146 |                                                         boston.target[:50],
147 |                                                         random_state=rng)
148 |     grid = ParameterGrid({"max_samples": [0.5, 1.0],
149 |                           "max_features": [0.5, 1.0],
150 |                           "bootstrap": [True, False],
151 |                           "bootstrap_features": [True, False]})
152 | 
153 |     for base_estimator in [None,
154 |                            DummyRegressor(),
155 |                            DecisionTreeRegressor(),
156 |                            KNeighborsRegressor(),
157 |                            SVR()]:
158 |         for params in grid:
159 |             LazyBaggingRegressor(base_estimator=base_estimator,
160 |                                  random_state=rng,
161 |                                  **params).fit(X_train,
162 |                                                y_train).predict(X_test)
163 | 
164 | 
165 | def test_sparse_regression():
166 |     """Check regression for various parameter settings on sparse input."""
167 |     rng = check_random_state(0)
168 |     X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
169 |                                                         boston.target[:50],
170 |                                                         random_state=rng)
171 | 
172 |     class CustomSVR(SVR):
173 |         """SVC variant that records the nature of the training set"""
174 | 
175 |         def fit(self, X, y):
176 |             super(CustomSVR, self).fit(X, y)
177 |             self.data_type_ = type(X)
178 |             return self
179 | 
180 |     parameter_sets = [
181 |         {"max_samples": 0.5,
182 |          "max_features": 2,
183 |          "bootstrap": True,
184 |          "bootstrap_features": True},
185 |         {"max_samples": 1.0,
186 |          "max_features": 4,
187 |          "bootstrap": True,
188 |          "bootstrap_features": True},
189 |         {"max_features": 2,
190 |          "bootstrap": False,
191 |          "bootstrap_features": True},
192 |         {"max_samples": 0.5,
193 |          "bootstrap": True,
194 |          "bootstrap_features": False},
195 |     ]
196 | 
197 |     for sparse_format in [csc_matrix, csr_matrix]:
198 |         X_train_sparse = sparse_format(X_train)
199 |         X_test_sparse = sparse_format(X_test)
200 |         for params in parameter_sets:
201 | 
202 |             # Trained on sparse format
203 |             sparse_classifier = LazyBaggingRegressor(
204 |                 base_estimator=CustomSVR(),
205 |                 random_state=1,
206 |                 **params
207 |             ).fit(X_train_sparse, y_train)
208 |             sparse_results = sparse_classifier.predict(X_test_sparse)
209 | 
210 |             # Trained on dense format
211 |             dense_results = LazyBaggingRegressor(
212 |                 base_estimator=CustomSVR(),
213 |                 random_state=1,
214 |                 **params
215 |             ).fit(X_train, y_train).predict(X_test)
216 | 
217 |             sparse_type = type(X_train_sparse)
218 |             types = [i.data_type_ for i in sparse_classifier.estimators_]
219 | 
220 |             assert_array_equal(sparse_results, dense_results)
221 |             assert all([t == sparse_type for t in types])
222 |             assert_array_equal(sparse_results, dense_results)
223 | 
224 | 
225 | def test_bootstrap_samples():
226 |     """Test that bootstraping samples generate non-perfect base estimators."""
227 |     rng = check_random_state(0)
228 |     X_train, X_test, y_train, y_test = train_test_split(boston.data,
229 |                                                         boston.target,
230 |                                                         random_state=rng)
231 | 
232 |     base_estimator = DecisionTreeRegressor().fit(X_train, y_train)
233 | 
234 |     # without bootstrap, all trees are perfect on the training set
235 |     ensemble = LazyBaggingRegressor(base_estimator=DecisionTreeRegressor(),
236 |                                     max_samples=1.0,
237 |                                     bootstrap=False,
238 |                                     random_state=rng).fit(X_train, y_train)
239 | 
240 |     assert_equal(base_estimator.score(X_train, y_train),
241 |                  ensemble.score(X_train, y_train))
242 | 
243 |     # with bootstrap, trees are no longer perfect on the training set
244 |     ensemble = LazyBaggingRegressor(base_estimator=DecisionTreeRegressor(),
245 |                                     max_samples=1.0,
246 |                                     bootstrap=True,
247 |                                     random_state=rng).fit(X_train, y_train)
248 | 
249 |     assert_greater(base_estimator.score(X_train, y_train),
250 |                    ensemble.score(X_train, y_train))
251 | 
252 | # NB: we don't save features for memory reasons
253 | # def test_bootstrap_features():
254 | #     """Test that bootstraping features may generate dupplicate features."""
255 | #     rng = check_random_state(0)
256 | #     X_train, X_test, y_train, y_test = train_test_split(boston.data,
257 | #                                                         boston.target,
258 | #                                                         random_state=rng)
259 | 
260 | #     ensemble = LazyBaggingRegressor(base_estimator=DecisionTreeRegressor(),
261 | #                                     max_features=1.0,
262 | #                                     bootstrap_features=False,
263 | #                                     random_state=rng).fit(X_train, y_train)
264 | 
265 | #     for features in ensemble.estimators_features_:
266 | #         assert_equal(boston.data.shape[1], np.unique(features).shape[0])
267 | 
268 | #     ensemble = LazyBaggingRegressor(base_estimator=DecisionTreeRegressor(),
269 | #                                     max_features=1.0,
270 | #                                     bootstrap_features=True,
271 | #                                     random_state=rng).fit(X_train, y_train)
272 | 
273 | #     for features in ensemble.estimators_features_:
274 | #         assert_greater(boston.data.shape[1], np.unique(features).shape[0])
275 | 
276 | 
277 | def test_probability():
278 |     """Predict probabilities."""
279 |     rng = check_random_state(0)
280 |     X_train, X_test, y_train, y_test = train_test_split(iris.data,
281 |                                                         iris.target,
282 |                                                         random_state=rng)
283 | 
284 |     with np.errstate(divide="ignore", invalid="ignore"):
285 |         # Normal case
286 |         print("start")
287 |         ensemble = LazyBaggingClassifier(random_state=rng).fit(X_train, y_train)
288 | 
289 |         assert_array_almost_equal(np.sum(ensemble.predict_proba(X_test),
290 |                                          axis=1),
291 |                                   np.ones(len(X_test)))
292 | 
293 |         assert_array_almost_equal(ensemble.predict_proba(X_test),
294 |                                   np.exp(ensemble.predict_log_proba(X_test)))
295 |         print("stop")
296 | 
297 |         # Degenerate case, where some classes are missing
298 |         ensemble = LazyBaggingClassifier(base_estimator=DecisionTreeClassifier(),
299 |                        		             random_state=rng,
300 |                             	         max_samples=5).fit(X_train, y_train)
301 | 
302 |         assert_array_almost_equal(np.sum(ensemble.predict_proba(X_test),
303 |                                          axis=1),
304 |                                   np.ones(len(X_test)))
305 | 
306 |         assert_array_almost_equal(ensemble.predict_proba(X_test),
307 |                                   np.exp(ensemble.predict_log_proba(X_test)))
308 | 
309 | 
310 | def test_single_estimator():
311 |     """Check singleton ensembles."""
312 |     rng = check_random_state(0)
313 |     X_train, X_test, y_train, y_test = train_test_split(boston.data,
314 |                                                         boston.target,
315 |                                                         random_state=rng)
316 | 
317 |     clf1 = LazyBaggingRegressor(base_estimator=KNeighborsRegressor(),
318 |                		            n_estimators=1,
319 |                     	        bootstrap=False,
320 |                         	    bootstrap_features=False,
321 |                             	random_state=rng).fit(X_train, y_train)
322 | 
323 |     clf2 = KNeighborsRegressor().fit(X_train, y_train)
324 | 
325 |     assert_array_equal(clf1.predict(X_test), clf2.predict(X_test))
326 | 
327 | 
328 | def test_error():
329 |     """Test that it gives proper exception on deficient input."""
330 |     X, y = iris.data, iris.target
331 |     base = DecisionTreeClassifier()
332 | 
333 |     # Test max_samples
334 |     assert_raises(ValueError,
335 |                   LazyBaggingClassifier(base, max_samples=-1).fit, X, y)
336 |     assert_raises(ValueError,
337 |                   LazyBaggingClassifier(base, max_samples=0.0).fit, X, y)
338 |     assert_raises(ValueError,
339 |                   LazyBaggingClassifier(base, max_samples=2.0).fit, X, y)
340 |     assert_raises(ValueError,
341 |                   LazyBaggingClassifier(base, max_samples=1000).fit, X, y)
342 |     assert_raises(ValueError,
343 |                   LazyBaggingClassifier(base, max_samples="foobar").fit, X, y)
344 | 
345 |     # Test max_features
346 |     assert_raises(ValueError,
347 |                   LazyBaggingClassifier(base, max_features=-1).fit, X, y)
348 |     assert_raises(ValueError,
349 |                   LazyBaggingClassifier(base, max_features=0.0).fit, X, y)
350 |     assert_raises(ValueError,
351 |                   LazyBaggingClassifier(base, max_features=2.0).fit, X, y)
352 |     assert_raises(ValueError,
353 |                   LazyBaggingClassifier(base, max_features=5).fit, X, y)
354 |     assert_raises(ValueError,
355 |                   LazyBaggingClassifier(base, max_features="foobar").fit, X, y)
356 | 
357 |     # Test support of decision_function
358 |     assert_raises(NotImplementedError,
359 |                   LazyBaggingClassifier(base).fit(X, y).decision_function, X)
360 | 
361 | 
362 | def test_gridsearch():
363 |     """Check that bagging ensembles can be grid-searched."""
364 |     # Transform iris into a binary classification task
365 |     X, y = iris.data, iris.target
366 |     y[y == 2] = 1
367 | 
368 |     # Grid search with scoring based on decision_function
369 |     parameters = {'n_estimators': (1, 2),
370 |                   'base_estimator__C': (1, 2)}
371 | 
372 |     GridSearchCV(LazyBaggingClassifier(SVC()),
373 |                  parameters,
374 |                  scoring="roc_auc").fit(X, y)
375 | 
376 | 
377 | def test_base_estimator():
378 |     """Check base_estimator and its default values."""
379 |     rng = check_random_state(0)
380 | 
381 |     # Classification
382 |     X_train, X_test, y_train, y_test = train_test_split(iris.data,
383 |                                                         iris.target,
384 |                                                         random_state=rng)
385 | 
386 |     ensemble = LazyBaggingClassifier(None,
387 |                                  	 random_state=0).fit(X_train, y_train)
388 | 
389 |     assert_true(isinstance(ensemble.base_estimator_, DecisionTreeClassifier))
390 | 
391 |     ensemble = LazyBaggingClassifier(DecisionTreeClassifier(),
392 |                                      random_state=0).fit(X_train, y_train)
393 | 
394 |     assert_true(isinstance(ensemble.base_estimator_, DecisionTreeClassifier))
395 | 
396 |     ensemble = LazyBaggingClassifier(Perceptron(),
397 |                    	                 random_state=0).fit(X_train, y_train)
398 | 
399 |     assert_true(isinstance(ensemble.base_estimator_, Perceptron))
400 | 
401 |     # Regression
402 |     X_train, X_test, y_train, y_test = train_test_split(boston.data,
403 |                                                         boston.target,
404 |                                                         random_state=rng)
405 | 
406 |     ensemble = LazyBaggingRegressor(random_state=0).fit(X_train, y_train)
407 | 
408 |     assert_true(isinstance(ensemble.base_estimator_, DecisionTreeRegressor))
409 | 
410 |     ensemble = LazyBaggingRegressor(DecisionTreeRegressor(),
411 |                                		random_state=0).fit(X_train, y_train)
412 | 
413 |     assert_true(isinstance(ensemble.base_estimator_, DecisionTreeRegressor))
414 | 
415 |     ensemble = LazyBaggingRegressor(SVR(),
416 |                          	        random_state=0).fit(X_train, y_train)
417 |     assert_true(isinstance(ensemble.base_estimator_, SVR))
418 | 
419 | 
420 | def test_reproducibility():
421 |     rng = check_random_state(0)
422 | 
423 |     # Classification
424 |     X_train, X_test, y_train, y_test = train_test_split(iris.data,
425 |                                                         iris.target,
426 |                                                         random_state=rng)
427 |     ensemble = LazyBaggingClassifier(random_state=rng)
428 |     ensemble.fit(X_train, y_train)
429 | 
430 |     assert_array_equal(ensemble.predict(X_test), ensemble.predict(X_test))
431 | 
432 |     # Regression
433 |     X_train, X_test, y_train, y_test = train_test_split(boston.data,
434 |                                                         boston.target,
435 |                                                         random_state=rng)
436 |     ensemble = LazyBaggingRegressor(random_state=rng)
437 |     ensemble.fit(X_train, y_train)
438 |     assert_array_equal(ensemble.predict(X_test), ensemble.predict(X_test))
439 | 
440 | 
441 | def test_multioutput():
442 |     X, y = make_multilabel_classification(n_samples=100, n_labels=1,
443 |                                           n_classes=5, random_state=0,
444 |                                           return_indicator=True)
445 | 
446 |     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
447 | 
448 |     est = LazyBaggingClassifier(random_state=0, n_estimators=10,
449 |                                 bootstrap=False)
450 |     est.fit(X_train, y_train)
451 | 
452 |     assert_almost_equal(est.score(X_train, y_train), 1.)
453 | 
454 |     y_proba = est.predict_proba(X_test)
455 |     y_log_proba = est.predict_log_proba(X_test)
456 |     for p, log_p in zip(y_proba, y_log_proba):
457 |         assert_array_almost_equal(p, np.exp(log_p))
458 | 
459 |     est = LazyBaggingRegressor(random_state=0, n_estimators=10,
460 |                                bootstrap=False)
461 |     est.fit(X_train, y_train)
462 |     assert_almost_equal(est.score(X_train, y_train), 1.)
463 | 
464 | 
465 | if __name__ == "__main__":
466 |     import nose
467 |     nose.runmodule()
468 | 


--------------------------------------------------------------------------------
/random_output_trees/random_projection.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | This module provides dimensionality reduction methods based on random
  3 | projection.
  4 | 
  5 | '''
  6 | # Authors: Arnaud Joly <arnaud.v.joly@gmail.com>
  7 | #
  8 | # License: BSD 3 clause
  9 | 
 10 | import numpy as np
 11 | 
 12 | from scipy.linalg import hadamard as sp_hadamard
 13 | from scipy import sparse
 14 | 
 15 | from sklearn.random_projection import BaseRandomProjection
 16 | from sklearn.random_projection import SparseRandomProjection
 17 | 
 18 | from sklearn.utils.random import sample_without_replacement
 19 | from sklearn.utils.validation import check_random_state
 20 | 
 21 | __all__ = [
 22 |     "RademacherRandomProjection",
 23 |     "AchlioptasRandomProjection",
 24 |     "SampledHadamardProjection",
 25 |     "SampledIdentityProjection",
 26 | ]
 27 | 
 28 | 
 29 | class RademacherRandomProjection(SparseRandomProjection):
 30 |     """Rademacher random projection
 31 | 
 32 |     The components of the random matrix
 33 |     are drawn from:
 34 | 
 35 |       - -sqrt(s) / sqrt(n_components)   with probability 1 / 2
 36 |       - +sqrt(s) / sqrt(n_components)   with probability 1 / 2
 37 | 
 38 |     Parameters
 39 |     ----------
 40 |     n_components : int or 'auto', optional (default = 'auto')
 41 |         Dimensionality of the target projection space.
 42 | 
 43 |         n_components can be automatically adjusted according to the
 44 |         number of samples in the dataset and the bound given by the
 45 |         Johnson-Lindenstrauss lemma. In that case the quality of the
 46 |         embedding is controlled by the ``eps`` parameter.
 47 | 
 48 |         It should be noted that Johnson-Lindenstrauss lemma can yield
 49 |         very conservative estimated of the required number of components
 50 |         as it makes no assumption on the structure of the dataset.
 51 | 
 52 |     eps : strictly positive float, optional (default=0.1)
 53 |         Parameter to control the quality of the embedding according to
 54 |         the Johnson-Lindenstrauss lemma when n_components is set to
 55 |         'auto'.
 56 | 
 57 |         Smaller values lead to better embedding and higher number of
 58 |         dimensions (n_components) in the target projection space.
 59 | 
 60 |     random_state : integer, RandomState instance or None (default=None)
 61 |         Control the pseudo random number generator used to generate the
 62 |         matrix at fit time.
 63 | 
 64 |     Attributes
 65 |     ----------
 66 |     ``n_component_`` : int
 67 |         Concrete number of components computed when n_components="auto".
 68 | 
 69 |     ``components_`` : numpy array of shape [n_components, n_features]
 70 |         Random matrix used for the projection.
 71 | 
 72 |     """
 73 |     def __init__(self, n_components="auto", eps=0.1, random_state=None):
 74 |         super(RademacherRandomProjection, self).__init__(
 75 |             n_components=n_components,
 76 |             eps=eps,
 77 |             density=1,
 78 |             dense_output=True,
 79 |             random_state=random_state)
 80 | 
 81 | 
 82 | class AchlioptasRandomProjection(SparseRandomProjection):
 83 |     """Sparse random projection using Achlioptas random matrix
 84 | 
 85 |     If we note `s = 1 / density = 1 / 3 ` the components of the random matrix
 86 |     are drawn from:
 87 | 
 88 |       - -sqrt(s) / sqrt(n_components)   with probability 1 / 2s
 89 |       -  0                              with probability 1 - 1 / s
 90 |       - +sqrt(s) / sqrt(n_components)   with probability 1 / 2s
 91 | 
 92 |     Parameters
 93 |     ----------
 94 |     n_components : int or 'auto', optional (default = 'auto')
 95 |         Dimensionality of the target projection space.
 96 | 
 97 |         n_components can be automatically adjusted according to the
 98 |         number of samples in the dataset and the bound given by the
 99 |         Johnson-Lindenstrauss lemma. In that case the quality of the
100 |         embedding is controlled by the ``eps`` parameter.
101 | 
102 |         It should be noted that Johnson-Lindenstrauss lemma can yield
103 |         very conservative estimated of the required number of components
104 |         as it makes no assumption on the structure of the dataset.
105 | 
106 |     eps : strictly positive float, optional (default=0.1)
107 |         Parameter to control the quality of the embedding according to
108 |         the Johnson-Lindenstrauss lemma when n_components is set to
109 |         'auto'.
110 | 
111 |         Smaller values lead to better embedding and higher number of
112 |         dimensions (n_components) in the target projection space.
113 | 
114 |     dense_output : boolean, optional (default=False)
115 |         If True, ensure that the output of the random projection is a
116 |         dense numpy array even if the input and random projection matrix
117 |         are both sparse. In practice, if the number of components is
118 |         small the number of zero components in the projected data will
119 |         be very small and it will be more CPU and memory efficient to
120 |         use a dense representation.
121 | 
122 |         If False, the projected data uses a sparse representation if
123 |         the input is sparse.
124 | 
125 |     random_state : integer, RandomState instance or None (default=None)
126 |         Control the pseudo random number generator used to generate the
127 |         matrix at fit time.
128 | 
129 |     Attributes
130 |     ----------
131 |     ``n_component_`` : int
132 |         Concrete number of components computed when n_components="auto".
133 | 
134 |     ``components_`` : numpy array of shape [n_components, n_features]
135 |         Random matrix used for the projection.
136 | 
137 |     """
138 |     def __init__(self, n_components="auto", eps=0.1, random_state=None,
139 |                  dense_output=False):
140 |         super(AchlioptasRandomProjection, self).__init__(
141 |             n_components=n_components,
142 |             eps=eps,
143 |             density=1. / 3,
144 |             dense_output=dense_output,
145 |             random_state=random_state)
146 | 
147 | 
148 | def subsampled_hadamard_matrix(n_components, n_features, random_state=None):
149 |     """Sub-sampled hadamard matrix to have shape n_components and n_features
150 | 
151 |     A hadamard matrix of shape at (least n_components, n_features) is
152 |     subsampled without replacement.
153 | 
154 |     Parameters
155 |     ----------
156 |     n_components : int,
157 |         Dimensionality of the target projection space.
158 | 
159 |     n_features : int,
160 |         Dimensionality of the original source space.
161 | 
162 |     random_state : int, RandomState instance or None (default=None)
163 |         Control the pseudo random number generator used to generate the
164 |         matrix at fit time.
165 | 
166 |     Returns
167 |     -------
168 |     components : numpy array of shape [n_components, n_features]
169 |         The generated random matrix.
170 | 
171 |     """
172 |     if n_components <= 0:
173 |         raise ValueError("n_components must be strictly positive, got %d" %
174 |                          n_components)
175 |     if n_features <= 0:
176 |         raise ValueError("n_features must be strictly positive, got %d" %
177 |                          n_components)
178 | 
179 |     random_state = check_random_state(random_state)
180 |     n_hadmard_size = max(2 ** np.ceil(np.log2(x))
181 |                          for x in (n_components, n_features))
182 | 
183 |     row = sample_without_replacement(n_hadmard_size, n_components,
184 |                                      random_state=random_state)
185 |     col = sample_without_replacement(n_hadmard_size, n_features,
186 |                                      random_state=random_state)
187 |     hadamard_matrix = sp_hadamard(n_hadmard_size, dtype=np.float)[row][:, col]
188 |     hadamard_matrix *= 1 / np.sqrt(n_components)
189 |     return hadamard_matrix
190 | 
191 | 
192 | class SampledHadamardProjection(BaseRandomProjection):
193 |     """Subsample Hadamard random projection
194 | 
195 |     The components of the random matrix are obtnained by subsampling the
196 |     row and column of a sufficiently big Hadamard matrix.
197 | 
198 |     Parameters
199 |     ----------
200 |     n_components : int or 'auto', optional (default = 'auto')
201 |         Dimensionality of the target projection space.
202 | 
203 |         n_components can be automatically adjusted according to the
204 |         number of samples in the dataset and the bound given by the
205 |         Johnson-Lindenstrauss lemma. In that case the quality of the
206 |         embedding is controlled by the ``eps`` parameter.
207 | 
208 |         It should be noted that Johnson-Lindenstrauss lemma can yield
209 |         very conservative estimated of the required number of components
210 |         as it makes no assumption on the structure of the dataset.
211 | 
212 |     eps : strictly positive float, optional (default=0.1)
213 |         Parameter to control the quality of the embedding according to
214 |         the Johnson-Lindenstrauss lemma when n_components is set to
215 |         'auto'.
216 | 
217 |         Smaller values lead to better embedding and higher number of
218 |         dimensions (n_components) in the target projection space.
219 | 
220 |     random_state : integer, RandomState instance or None (default=None)
221 |         Control the pseudo random number generator used to generate the
222 |         matrix at fit time.
223 | 
224 |     Attributes
225 |     ----------
226 |     ``n_component_`` : int
227 |         Concrete number of components computed when n_components="auto".
228 | 
229 |     ``components_`` : numpy array of shape [n_components, n_features]
230 |         Random matrix used for the projection.
231 | 
232 |     """
233 |     def __init__(self, n_components="auto", eps=0.1, random_state=None):
234 |         super(SampledHadamardProjection, self).__init__(
235 |             n_components=n_components,
236 |             eps=eps,
237 |             random_state=random_state)
238 | 
239 |     def _make_random_matrix(self, n_components, n_features):
240 |         return subsampled_hadamard_matrix(n_components, n_features,
241 |                                           random_state=self.random_state)
242 | 
243 | 
244 | def subsampled_identity_matrix(n_components, n_features, random_state=None,
245 |                                with_replacement=True):
246 |     """Sub-sampled identity matrix to have shape n_components and n_features
247 | 
248 |     Parameters
249 |     ----------
250 |     n_components : int,
251 |         Dimensionality of the target projection space.
252 | 
253 |     n_features : int,
254 |         Dimensionality of the original source space.
255 | 
256 |     random_state : int, RandomState instance or None (default=None)
257 |         Control the pseudo random number generator used to generate the
258 |         matrix at fit time.
259 | 
260 |     with_replacement : bool,
261 |         Whether or not drawing components with replacements.
262 | 
263 |     Returns
264 |     -------
265 |     components : numpy array of shape [n_components, n_features]
266 |         The generated random matrix.
267 | 
268 |     """
269 | 
270 |     if n_components <= 0:
271 |         raise ValueError("n_components must be strictly positive, got %d" %
272 |                          n_components)
273 |     if n_features <= 0:
274 |         raise ValueError("n_features must be strictly positive, got %d" %
275 |                          n_components)
276 | 
277 |     rng = check_random_state(random_state)
278 | 
279 |     components = sparse.dia_matrix((np.ones(n_features), [0]),
280 |                                    shape=(n_features, n_features)).tocsr()
281 |     if with_replacement:
282 |         mask = rng.randint(n_features, size=(n_components,))
283 | 
284 |     else:
285 |         mask = sample_without_replacement(n_features, n_components,
286 |                                           random_state=rng)
287 | 
288 |     components = components[mask]
289 |     return components * np.sqrt(1.0 * n_features / n_components)
290 | 
291 | 
292 | class SampledIdentityProjection(BaseRandomProjection):
293 |     """Subsample identity matrix projection
294 | 
295 |     The components of the random matrix are obtnained by subsampling the
296 |     row and column of the identity matrix.
297 | 
298 |     Parameters
299 |     ----------
300 |     n_components : int or 'auto', optional (default = 'auto')
301 |         Dimensionality of the target projection space.
302 | 
303 |         n_components can be automatically adjusted according to the
304 |         number of samples in the dataset and the bound given by the
305 |         Johnson-Lindenstrauss lemma. In that case the quality of the
306 |         embedding is controlled by the ``eps`` parameter.
307 | 
308 |         It should be noted that Johnson-Lindenstrauss lemma can yield
309 |         very conservative estimated of the required number of components
310 |         as it makes no assumption on the structure of the dataset.
311 | 
312 |     eps : strictly positive float, optional (default=0.1)
313 |         Parameter to control the quality of the embedding according to
314 |         the Johnson-Lindenstrauss lemma when n_components is set to
315 |         'auto'.
316 | 
317 |         Smaller values lead to better embedding and higher number of
318 |         dimensions (n_components) in the target projection space.
319 | 
320 |         Note that the JL-lemma is not appropriate for the projection of a
321 |         sample identity projection.
322 | 
323 |     random_state : integer, RandomState instance or None (default=None)
324 |         Control the pseudo random number generator used to generate the
325 |         matrix at fit time.
326 | 
327 |     Attributes
328 |     ----------
329 |     ``n_component_`` : int
330 |         Concrete number of components computed when n_components="auto".
331 | 
332 |     ``components_`` : numpy array of shape [n_components, n_features]
333 |         Random matrix used for the projection.
334 | 
335 |     """
336 |     def __init__(self, n_components="auto", eps=0.1, random_state=None,
337 |                  dense_output=False, with_replacement=True):
338 |         super(SampledIdentityProjection, self).__init__(
339 |             n_components=n_components,
340 |             eps=eps,
341 |             dense_output=dense_output,
342 |             random_state=random_state)
343 | 
344 |         self.with_replacement = with_replacement
345 | 
346 |     def _make_random_matrix(self, n_components, n_features):
347 |         return subsampled_identity_matrix(n_components, n_features,
348 |                                           self.random_state,
349 |                                           with_replacement=self.with_replacement)
350 | 


--------------------------------------------------------------------------------
/random_output_trees/setup.py:
--------------------------------------------------------------------------------
 1 | # Authors: Arnaud Joly <arnaud.v.joly@gmail.com>
 2 | #
 3 | # License: BSD 3 clause
 4 | 
 5 | import os
 6 | 
 7 | 
 8 | def configuration(parent_package='', top_path=None):
 9 |     from numpy.distutils.misc_util import Configuration
10 |     import numpy
11 | 
12 |     libraries = []
13 |     if os.name == 'posix':
14 |         libraries.append('m')
15 | 
16 |     config = Configuration('random_output_trees', parent_package,
17 |                            top_path)
18 | 
19 | 
20 | 
21 |     for module in ["_tree", "_sklearn_tree", "_sklearn_tree_utils"]:
22 |         config.add_extension(module,
23 |                              sources=["%s.c" % module],
24 |                              include_dirs=[numpy.get_include()],
25 |                              libraries=libraries,
26 |                              extra_compile_args=["-O3"])
27 | 
28 | 
29 |     # add the test directory
30 |     config.add_subpackage('tests')
31 | 
32 |     return config
33 | 
34 | if __name__ == '__main__':
35 |     from numpy.distutils.core import setup
36 |     setup(**configuration(top_path='').todict())
37 | 


--------------------------------------------------------------------------------
/random_output_trees/tests/test_datasets.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import tempfile
 4 | 
 5 | from sklearn.utils.testing import with_setup
 6 | from sklearn.utils.testing import assert_equal
 7 | 
 8 | from random_output_trees.datasets import fetch_drug_interaction
 9 | from random_output_trees.datasets import fetch_protein_interaction
10 | from random_output_trees._utils import skipped
11 | 
12 | tmpdir = None
13 | 
14 | 
15 | def setup_tmpdata():
16 |     # create temporary dir
17 |     global tmpdir
18 |     tmpdir = tempfile.mkdtemp()
19 |     os.makedirs(os.path.join(tmpdir, 'drug-protein'))
20 | 
21 | 
22 | def teardown_tmpdata():
23 |     # remove temporary dir
24 |     if tmpdir is not None:
25 |         shutil.rmtree(tmpdir)
26 | 
27 | @skipped
28 | @with_setup(setup_tmpdata, teardown_tmpdata)
29 | def test_fetch_drug_protein():
30 |     dataset = fetch_drug_interaction(tmpdir)
31 | 
32 |     assert_equal(dataset.data.shape, (1862, 660))
33 |     assert_equal(dataset.target.shape, (1862, 1554))
34 |     assert_equal(len(dataset.feature_names), 660)
35 |     assert_equal(len(dataset.target_names), 1554)
36 | 
37 |     dataset = fetch_protein_interaction(tmpdir)
38 |     assert_equal(dataset.data.shape, (1554, 876))
39 |     assert_equal(dataset.target.shape, (1554, 1862))
40 |     assert_equal(len(dataset.feature_names), 876)
41 | 


--------------------------------------------------------------------------------
/random_output_trees/tests/test_random_projection.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | 
  3 | import numpy as np
  4 | from scipy.sparse import issparse, coo_matrix, csr_matrix
  5 | from sklearn.utils.testing import assert_raises
  6 | from sklearn.utils.testing import assert_equal
  7 | from sklearn.utils.testing import assert_array_almost_equal
  8 | from sklearn.utils.testing import assert_almost_equal
  9 | from sklearn.utils.testing import assert_raise_message
 10 | from sklearn.utils.testing import assert_array_equal
 11 | from sklearn.utils.testing import assert_warns
 12 | 
 13 | from random_output_trees.random_projection import RademacherRandomProjection
 14 | from random_output_trees.random_projection import AchlioptasRandomProjection
 15 | from random_output_trees.random_projection import SampledHadamardProjection
 16 | from random_output_trees.random_projection import SampledIdentityProjection
 17 | 
 18 | from random_output_trees.random_projection import subsampled_hadamard_matrix
 19 | from random_output_trees.random_projection import subsampled_identity_matrix
 20 | 
 21 | 
 22 | RANDOM_PROJECTION = {
 23 |     "RademacherRandomProjection": RademacherRandomProjection,
 24 |     "AchlioptasRandomProjection": AchlioptasRandomProjection,
 25 |     "SampledHadamardProjection": SampledHadamardProjection,
 26 |     "SampledIdentityProjection": SampledIdentityProjection,
 27 |     "SampledIdentityProjection_without_replacement":
 28 |     partial(SampledIdentityProjection, with_replacement=False)
 29 | }
 30 | 
 31 | all_random_matrix = {
 32 |     "subsample_hadamard_matrix": subsampled_hadamard_matrix,
 33 |     "random_subsample_normalized": subsampled_identity_matrix,
 34 | }
 35 | 
 36 | def make_sparse_random_data(n_samples, n_features, n_nonzeros):
 37 |     rng = np.random.RandomState(0)
 38 |     data_coo = coo_matrix(
 39 |         (rng.randn(n_nonzeros),
 40 |          (rng.randint(n_samples, size=n_nonzeros),
 41 |           rng.randint(n_features, size=n_nonzeros))),
 42 |         shape=(n_samples, n_features))
 43 |     return data_coo.toarray(), data_coo.tocsr()
 44 | 
 45 | n_samples, n_features = (10, 1000)
 46 | n_nonzeros = int(n_samples * n_features / 100.)
 47 | data, data_csr = make_sparse_random_data(n_samples, n_features, n_nonzeros)
 48 | 
 49 | def densify(matrix):
 50 |     if not issparse(matrix):
 51 |         return matrix
 52 |     else:
 53 |         return matrix.toarray()
 54 | 
 55 | def check_random_projection(name):
 56 |     RandomProjection = RANDOM_PROJECTION[name]
 57 | 
 58 |     # Invalid input
 59 |     assert_raises(ValueError, RandomProjection(n_components='auto').fit,
 60 |                   [0, 1, 2])
 61 |     assert_raises(ValueError, RandomProjection(n_components=-10).fit, data)
 62 | 
 63 |     # Try to transform before fit
 64 |     assert_raises(ValueError, RandomProjection(n_components='auto').transform,
 65 |                   data)
 66 | 
 67 | 
 68 | def test_too_many_samples_to_find_a_safe_embedding():
 69 |     data, _ = make_sparse_random_data(1000, 100, 1000)
 70 | 
 71 |     for name, RandomProjection in RANDOM_PROJECTION.items():
 72 |         rp = RandomProjection(n_components='auto', eps=0.1)
 73 |         expected_msg = (
 74 |             'eps=0.100000 and n_samples=1000 lead to a target dimension'
 75 |             ' of 5920 which is larger than the original space with'
 76 |             ' n_features=100')
 77 |         assert_raise_message(ValueError, expected_msg, rp.fit, data)
 78 | 
 79 | 
 80 | 
 81 | def test_correct_RandomProjection_dimensions_embedding():
 82 |     for name, RandomProjection in RANDOM_PROJECTION.items():
 83 |         rp = RandomProjection(n_components='auto',
 84 |                               random_state=0,
 85 |                               eps=0.5).fit(data)
 86 | 
 87 |         # the number of components is adjusted from the shape of the training
 88 |         # set
 89 |         assert_equal(rp.n_components, 'auto')
 90 |         assert_equal(rp.n_components_, 110)
 91 | 
 92 |         assert_equal(rp.components_.shape, (110, n_features))
 93 | 
 94 |         projected_1 = rp.transform(data)
 95 |         assert_equal(projected_1.shape, (n_samples, 110))
 96 | 
 97 |         # once the RP is 'fitted' the projection is always the same
 98 |         projected_2 = rp.transform(data)
 99 |         assert_array_equal(projected_1, projected_2)
100 | 
101 |         # fit transform with same random seed will lead to the same results
102 |         rp2 = RandomProjection(random_state=0, eps=0.5)
103 |         projected_3 = rp2.fit_transform(data)
104 |         assert_array_equal(projected_1, projected_3)
105 | 
106 |         # Try to transform with an input X of size different from fitted.
107 |         assert_raises(ValueError, rp.transform, data[:, 1:5])
108 | 
109 | 
110 | def test_works_with_sparse_data():
111 |     n_features = 20
112 |     data, _ = make_sparse_random_data(5, n_features, int(n_features / 4))
113 | 
114 |     for name, RandomProjection in RANDOM_PROJECTION.items():
115 |         rp_dense = RandomProjection(n_components=3,
116 |                                     random_state=1).fit(data)
117 |         rp_sparse = RandomProjection(n_components=3,
118 |                                      random_state=1).fit(csr_matrix(data))
119 |         assert_array_almost_equal(densify(rp_dense.components_),
120 |                                   densify(rp_sparse.components_))
121 | 
122 | 
123 | ###############################################################################
124 | # tests random matrix generation
125 | ###############################################################################
126 | def check_input_size_random_matrix(random_matrix):
127 |     assert_raises(ValueError, random_matrix, 0, 0)
128 |     assert_raises(ValueError, random_matrix, -1, 1)
129 |     assert_raises(ValueError, random_matrix, 1, -1)
130 |     assert_raises(ValueError, random_matrix, 1, 0)
131 |     assert_raises(ValueError, random_matrix, -1, 0)
132 | 
133 | 
134 | def check_size_generated(random_matrix):
135 |     assert_equal(random_matrix(1, 5).shape, (1, 5))
136 |     assert_equal(random_matrix(5, 1).shape, (5, 1))
137 |     assert_equal(random_matrix(5, 5).shape, (5, 5))
138 |     assert_equal(random_matrix(1, 1).shape, (1, 1))
139 | 
140 | 
141 | def check_zero_mean_and_unit_norm(random_matrix):
142 |     # All random matrix should produce a transformation matrix
143 |     # with zero mean and unit norm for each columns
144 | 
145 |     A = densify(random_matrix(1000, 1, random_state=0)).ravel()
146 |     assert_array_almost_equal(0, np.mean(A), 3)
147 |     assert_array_almost_equal(1.0, np.linalg.norm(A),  1)
148 | 
149 | 
150 | def check_approximate_isometry(random_matrix):
151 |     A =  densify(random_matrix(50, 10, 0))
152 |     assert_almost_equal(np.mean(np.diag(np.dot(A.T, A))), 1.)
153 | 
154 | def test_basic_property_of_random_matrix():
155 |     """Check basic properties of random matrix generation"""
156 |     for name, random_matrix in all_random_matrix.items():
157 |         print(name)
158 | 
159 |         check_input_size_random_matrix(random_matrix)
160 |         check_size_generated(random_matrix)
161 |         if name != "random_subsample_normalized":
162 |             check_zero_mean_and_unit_norm(random_matrix)
163 |         check_approximate_isometry(random_matrix)
164 | 
165 | 
166 | def test_subsampled_identity_matrix_without_repl():
167 |     random_array = subsampled_identity_matrix(100, 1000, random_state=0,
168 |                                               with_replacement=False)
169 |     assert_array_almost_equal(random_array.toarray().sum(axis=1),
170 |                               3.162278 * np.ones(100))
171 | 


--------------------------------------------------------------------------------
/random_output_trees/tests/test_transformer.py:
--------------------------------------------------------------------------------
 1 | from sklearn.base import BaseEstimator
 2 | from sklearn.utils.testing import assert_array_almost_equal
 3 | from sklearn.utils.testing import assert_equal
 4 | from sklearn.utils import check_random_state
 5 | 
 6 | from sklearn.random_projection import GaussianRandomProjection
 7 | from random_output_trees.transformer import FixedStateTransformer
 8 | 
 9 | class IdentityProjection(BaseEstimator):
10 | 
11 |     def fit(self, X):
12 |         return self
13 | 
14 |     def transform(self, X):
15 |         return X
16 | 
17 | 
18 | def test_fixed_state_transformer():
19 | 
20 |     random_state = check_random_state(0)
21 |     X = random_state.rand(500, 100)
22 | 
23 |     # Check that setting the random_seed is equivalent to set the
24 |     # random_state
25 |     transf = GaussianRandomProjection(n_components=5, random_state=0)
26 |     fixed_transf = FixedStateTransformer(
27 |         GaussianRandomProjection(n_components=5), random_seed=0)
28 |     assert_array_almost_equal(fixed_transf.fit_transform(X),
29 |                               transf.fit_transform(X))
30 | 
31 |     # Check that set_params doesn't modify the results
32 |     fixed_transf = FixedStateTransformer(
33 |         GaussianRandomProjection(n_components=5, random_state=None))
34 | 
35 |     fixed_transf2 = FixedStateTransformer(
36 |         GaussianRandomProjection(random_state=1, n_components=5))
37 | 
38 |     assert_array_almost_equal(fixed_transf.fit_transform(X),
39 |                               fixed_transf2.fit_transform(X))
40 | 
41 |     # Check that it work when there is no random_state
42 |     fixed_transf = FixedStateTransformer(IdentityProjection())
43 |     assert_array_almost_equal(fixed_transf.fit_transform(X), X)
44 | 


--------------------------------------------------------------------------------
/random_output_trees/tests/test_tree.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Testing for the tree module.
  3 | """
  4 | from functools import partial
  5 | 
  6 | from sklearn import datasets
  7 | from sklearn.cross_validation import train_test_split
  8 | 
  9 | from sklearn.decomposition import PCA
 10 | 
 11 | 
 12 | from sklearn.utils.testing import assert_array_equal
 13 | from sklearn.utils.testing import assert_almost_equal
 14 | from sklearn.utils.testing import assert_equal
 15 | 
 16 | from random_output_trees.tree import DecisionTreeClassifier
 17 | from random_output_trees.tree import DecisionTreeRegressor
 18 | 
 19 | from sklearn.random_projection import GaussianRandomProjection
 20 | from sklearn.base import BaseEstimator, TransformerMixin
 21 | 
 22 | class IdentityProjection(BaseEstimator, TransformerMixin):
 23 | 
 24 |     def fit(self, X):
 25 |         return self
 26 | 
 27 |     def transform(self, X):
 28 |         return X
 29 | 
 30 | 
 31 | CLF_TREES = {
 32 |     "DecisionTreeClassifier": DecisionTreeClassifier,
 33 |     "Presort-DecisionTreeClassifier": partial(DecisionTreeClassifier,
 34 |                                               splitter="presort-best"),
 35 |     "ExtraTreeClassifier": partial(DecisionTreeClassifier,
 36 |                                               splitter="random"),
 37 | }
 38 | 
 39 | REG_TREES = {
 40 |     "DecisionTreeRegressor": DecisionTreeRegressor,
 41 |     "Presort-DecisionTreeRegressor": partial(DecisionTreeRegressor,
 42 |                                              splitter="presort-best"),
 43 |     "ExtraTreeRegressor": partial(DecisionTreeRegressor,
 44 |                                   splitter="random"),
 45 | }
 46 | 
 47 | ALL_TREES = dict()
 48 | ALL_TREES.update(CLF_TREES)
 49 | ALL_TREES.update(REG_TREES)
 50 | 
 51 | 
 52 | def test_output_transformer():
 53 |     X, y = datasets.make_multilabel_classification(return_indicator=True)
 54 |     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
 55 | 
 56 |     transformer = GaussianRandomProjection(n_components=10)
 57 |     for name, TreeEstimator in ALL_TREES.items():
 58 |         est = TreeEstimator(random_state=0, output_transformer=transformer)
 59 |         est.fit(X_train, y_train)
 60 |         y_pred = est.predict(X_test)
 61 |         assert_equal(y_pred.shape, y_test.shape)
 62 | 
 63 | 
 64 | def test_identity_output_transformer():
 65 | 
 66 |     X, y = datasets.make_multilabel_classification(return_indicator=True,
 67 |         random_state=0)
 68 |     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
 69 |     transformer = IdentityProjection()
 70 | 
 71 |     for name, TreeEstimator in ALL_TREES.items():
 72 |         est = TreeEstimator(random_state=0, max_features=None)
 73 |         est.fit(X_train, y_train)
 74 |         y_pred_origin = est.predict(X_test)
 75 | 
 76 | 
 77 |         est_transf = TreeEstimator(random_state=0, max_features=None,
 78 |                                    output_transformer=transformer)
 79 |         est_transf.fit(X_train, y_train)
 80 |         y_pred_transformed = est_transf.predict(X_test)
 81 |         assert_almost_equal(y_pred_origin, y_pred_transformed, decimal=5,
 82 |                             err_msg="failed with {0}".format(name))
 83 | 
 84 | 
 85 | def test_pca_output_transformer():
 86 |     X, y = datasets.make_multilabel_classification(return_indicator=True)
 87 |     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
 88 |     transformer = PCA(n_components=1)
 89 | 
 90 |     for name, TreeEstimator in ALL_TREES.items():
 91 |         est_transf = TreeEstimator(random_state=0,
 92 |                                    max_features=None,
 93 |                                    output_transformer=transformer)
 94 |         est_transf.fit(X_train, y_train)
 95 |         y_pred_transformed = est_transf.predict(X_test)
 96 |         assert_equal(y_pred_transformed.shape, y_test.shape,
 97 |                      msg="failed with {0}".format(name))
 98 | 
 99 | 
100 | def test_importances_variance_equal_mse():
101 |     """Check that gini is equivalent to mse for binary output variable"""
102 | 
103 |     from sklearn.tree._tree import TREE_LEAF
104 | 
105 |     X, y = datasets.make_classification(n_samples=2000,
106 |                                         n_features=10,
107 |                                         n_informative=3,
108 |                                         n_redundant=0,
109 |                                         n_repeated=0,
110 |                                         shuffle=False,
111 |                                         random_state=0)
112 |     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
113 | 
114 | 
115 |     var = DecisionTreeClassifier(criterion="variance",
116 |                                  random_state=0).fit(X_train, y_train)
117 |     gini = DecisionTreeClassifier(criterion="gini",
118 |                                   random_state=0).fit(X_train, y_train)
119 |     reg = DecisionTreeRegressor(criterion="mse",
120 |                                 random_state=0).fit(X_train, y_train)
121 | 
122 |     gini_leaves = gini.tree_.children_left == TREE_LEAF
123 |     var_leaves = var.tree_.children_left == TREE_LEAF
124 | 
125 |     assert_array_equal(var.tree_.feature, reg.tree_.feature)
126 |     assert_almost_equal(var.feature_importances_, reg.feature_importances_)
127 |     assert_array_equal(var.tree_.children_left, reg.tree_.children_left)
128 |     assert_array_equal(var.tree_.children_right, reg.tree_.children_right)
129 |     assert_array_equal(var.tree_.n_node_samples, reg.tree_.n_node_samples)
130 | 
131 |     assert_array_equal(var.tree_.feature, gini.tree_.feature)
132 |     assert_almost_equal(var.feature_importances_, gini.feature_importances_)
133 |     assert_array_equal(var.tree_.children_left, gini.tree_.children_left)
134 |     assert_array_equal(var.tree_.children_right, gini.tree_.children_right)
135 |     assert_array_equal(var.tree_.n_node_samples, gini.tree_.n_node_samples)
136 |     assert_almost_equal(var.tree_.value[var_leaves], gini.tree_.value[gini_leaves])
137 | 
138 | 
139 |     clf = DecisionTreeClassifier(criterion="gini", random_state=0,
140 |                                   output_transformer=IdentityProjection(),
141 |                                  ).fit(X_train, y_train)
142 | 
143 |     clf_leaves = clf.tree_.children_left == TREE_LEAF
144 |     assert_array_equal(clf.tree_.feature, reg.tree_.feature)
145 |     assert_almost_equal(clf.feature_importances_, reg.feature_importances_)
146 |     assert_array_equal(clf.tree_.children_left, reg.tree_.children_left)
147 |     assert_array_equal(clf.tree_.children_right, reg.tree_.children_right)
148 |     assert_array_equal(clf.tree_.n_node_samples, reg.tree_.n_node_samples)
149 |     assert_array_equal(clf.tree_.n_node_samples, reg.tree_.n_node_samples)
150 | 
151 |     assert_array_equal(clf.tree_.feature, gini.tree_.feature)
152 |     assert_almost_equal(clf.feature_importances_, gini.feature_importances_)
153 |     assert_array_equal(clf.tree_.children_left, gini.tree_.children_left)
154 |     assert_array_equal(clf.tree_.children_right, gini.tree_.children_right)
155 |     assert_array_equal(clf.tree_.n_node_samples, gini.tree_.n_node_samples)
156 |     assert_almost_equal(clf.tree_.value[clf_leaves], gini.tree_.value[gini_leaves])
157 | 


--------------------------------------------------------------------------------
/random_output_trees/tests/test_validations.py:
--------------------------------------------------------------------------------
  1 | """Tests for input validation functions"""
  2 | 
  3 | import numpy as np
  4 | import scipy.sparse as sp
  5 | from nose.tools import assert_raises, assert_true, assert_false, assert_equal
  6 | from itertools import product
  7 | 
  8 | 
  9 | # from sklearn.utils.estimator_checks import NotAnArray
 10 | 
 11 | 
 12 | from sklearn.neighbors import KNeighborsClassifier
 13 | from sklearn.ensemble import RandomForestRegressor
 14 | from sklearn.svm import SVR
 15 | 
 16 | from random_output_trees._utils import has_fit_parameter
 17 | from random_output_trees._utils import check_array
 18 | 
 19 | 
 20 | def test_ordering():
 21 |     """Check that ordering is enforced correctly by validation utilities.
 22 | 
 23 |     We need to check each validation utility, because a 'copy' without
 24 |     'order=K' will kill the ordering.
 25 |     """
 26 |     X = np.ones((10, 5))
 27 |     for A in X, X.T:
 28 |         for copy in (True, False):
 29 |             B = check_array(A, order='C', copy=copy)
 30 |             assert_true(B.flags['C_CONTIGUOUS'])
 31 |             B = check_array(A, order='F', copy=copy)
 32 |             assert_true(B.flags['F_CONTIGUOUS'])
 33 |             if copy:
 34 |                 assert_false(A is B)
 35 | 
 36 |     X = sp.csr_matrix(X)
 37 |     X.data = X.data[::-1]
 38 |     assert_false(X.data.flags['C_CONTIGUOUS'])
 39 | 
 40 |     for copy in (True, False):
 41 |         Y = check_array(X, accept_sparse='csr', copy=copy, order='C')
 42 |         assert_true(Y.data.flags['C_CONTIGUOUS'])
 43 | 
 44 | 
 45 | def test_check_array():
 46 |     # accept_sparse == None
 47 |     # raise error on sparse inputs
 48 |     X = [[1, 2], [3, 4]]
 49 |     X_csr = sp.csr_matrix(X)
 50 |     assert_raises(TypeError, check_array, X_csr)
 51 |     # ensure_2d
 52 |     X_array = check_array([0, 1, 2])
 53 |     assert_equal(X_array.ndim, 2)
 54 |     X_array = check_array([0, 1, 2], ensure_2d=False)
 55 |     assert_equal(X_array.ndim, 1)
 56 |     # don't allow ndim > 3
 57 |     X_ndim = np.arange(8).reshape(2, 2, 2)
 58 |     assert_raises(ValueError, check_array, X_ndim)
 59 |     check_array(X_ndim, allow_nd=True)  # doesn't raise
 60 |     # force_all_finite
 61 |     X_inf = np.arange(4).reshape(2, 2).astype(np.float)
 62 |     X_inf[0, 0] = np.inf
 63 |     assert_raises(ValueError, check_array, X_inf)
 64 |     check_array(X_inf, force_all_finite=False)  # no raise
 65 |     # nan check
 66 |     X_nan = np.arange(4).reshape(2, 2).astype(np.float)
 67 |     X_nan[0, 0] = np.nan
 68 |     assert_raises(ValueError, check_array, X_nan)
 69 |     check_array(X_inf, force_all_finite=False)  # no raise
 70 | 
 71 |     # dtype and order enforcement.
 72 |     X_C = np.arange(4).reshape(2, 2).copy("C")
 73 |     X_F = X_C.copy("F")
 74 |     X_int = X_C.astype(np.int)
 75 |     X_float = X_C.astype(np.float)
 76 |     Xs = [X_C, X_F, X_int, X_float]
 77 |     dtypes = [np.int32, np.int, np.float, np.float32, None, np.bool, object]
 78 |     orders = ['C', 'F', None]
 79 |     copys = [True, False]
 80 | 
 81 |     for X, dtype, order, copy in product(Xs, dtypes, orders, copys):
 82 |         X_checked = check_array(X, dtype=dtype, order=order, copy=copy)
 83 |         if dtype is not None:
 84 |             assert_equal(X_checked.dtype, dtype)
 85 |         else:
 86 |             assert_equal(X_checked.dtype, X.dtype)
 87 |         if order == 'C':
 88 |             assert_true(X_checked.flags['C_CONTIGUOUS'])
 89 |             assert_false(X_checked.flags['F_CONTIGUOUS'])
 90 |         elif order == 'F':
 91 |             assert_true(X_checked.flags['F_CONTIGUOUS'])
 92 |             assert_false(X_checked.flags['C_CONTIGUOUS'])
 93 |         if copy:
 94 |             assert_false(X is X_checked)
 95 |         else:
 96 |             # doesn't copy if it was already good
 97 |             if (X.dtype == X_checked.dtype and
 98 |                     X_checked.flags['C_CONTIGUOUS'] == X.flags['C_CONTIGUOUS']
 99 |                     and X_checked.flags['F_CONTIGUOUS'] == X.flags['F_CONTIGUOUS']):
100 |                 assert_true(X is X_checked)
101 | 
102 |     # allowed sparse != None
103 |     X_csc = sp.csc_matrix(X_C)
104 |     X_coo = X_csc.tocoo()
105 |     X_dok = X_csc.todok()
106 |     X_int = X_csc.astype(np.int)
107 |     X_float = X_csc.astype(np.float)
108 | 
109 |     Xs = [X_csc, X_coo, X_dok, X_int, X_float]
110 |     accept_sparses = [['csr', 'coo'], ['coo', 'dok']]
111 |     for X, dtype, accept_sparse, copy in product(Xs, dtypes, accept_sparses,
112 |                                                   copys):
113 |         X_checked = check_array(X, dtype=dtype, accept_sparse=accept_sparse,
114 |                                 copy=copy)
115 |         if dtype is not None:
116 |             assert_equal(X_checked.dtype, dtype)
117 |         else:
118 |             assert_equal(X_checked.dtype, X.dtype)
119 |         if X.format in accept_sparse:
120 |             # no change if allowed
121 |             assert_equal(X.format, X_checked.format)
122 |         else:
123 |             # got converted
124 |             assert_equal(X_checked.format, accept_sparse[0])
125 |         if copy:
126 |             assert_false(X is X_checked)
127 |         else:
128 |             # doesn't copy if it was already good
129 |             if (X.dtype == X_checked.dtype and X.format == X_checked.format):
130 |                 assert_true(X is X_checked)
131 | 
132 |     # other input formats
133 |     # convert lists to arrays
134 |     X_dense = check_array([[1, 2], [3, 4]])
135 |     assert_true(isinstance(X_dense, np.ndarray))
136 |     # raise on too deep lists
137 |     assert_raises(ValueError, check_array, X_ndim.tolist())
138 |     check_array(X_ndim.tolist(), allow_nd=True)  # doesn't raise
139 |     # convert weird stuff to arrays
140 |     # X_no_array = NotAnArray(X_dense)
141 |     # result = check_array(X_no_array)
142 |     # assert_true(isinstance(result, np.ndarray))
143 | 
144 | def test_has_fit_parameter():
145 |     assert_false(has_fit_parameter(KNeighborsClassifier, "sample_weight"))
146 |     assert_true(has_fit_parameter(RandomForestRegressor, "sample_weight"))
147 |     assert_true(has_fit_parameter(SVR, "sample_weight"))
148 |     assert_true(has_fit_parameter(SVR(), "sample_weight"))
149 | 


--------------------------------------------------------------------------------
/random_output_trees/transformer.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | This module provides general purpose meta-transformer.
 3 | 
 4 | '''
 5 | 
 6 | # Authors: Arnaud Joly <arnaud.v.joly@gmail.com>
 7 | #
 8 | # License: BSD 3 clause
 9 | 
10 | from sklearn.base import BaseEstimator
11 | from sklearn.base import clone
12 | from sklearn.base import TransformerMixin
13 | from sklearn.utils import check_random_state
14 | 
15 | 
16 | class FixedStateTransformer(BaseEstimator, TransformerMixin):
17 |     """Fixe the random_state of the transformer
18 | 
19 |     This meta-transformer is usefull when you want to fix the random_state
20 |     of a transformer, which is modified by some meta-estimator.
21 | 
22 |     Parameters
23 |     ----------
24 |     transformer : scikit-learn transformer
25 | 
26 |     random_seed : int, RandomState instance, optional (default=0)
27 |         If int, random_state is the seed used by the random number generator;
28 |         If RandomState instance, random_state is the random number generator;
29 | 
30 |     Attributes
31 |     ----------
32 |     transformer_ : transformer
33 |         A clone of the fitted transformer
34 | 
35 |     """
36 |     def __init__(self, transformer, random_seed=0):
37 |         self.transformer = transformer
38 |         self.random_seed = random_seed
39 | 
40 |         self.transformer_ = None
41 | 
42 |     @property
43 |     def random_state(self):
44 |         return self.random_seed
45 | 
46 |     def fit(self, X, y=None):
47 |         """Fit estimator.
48 | 
49 |         Parameters
50 |         ----------
51 |         X : array-like, shape=(n_samples, n_features)
52 |             Input data used to build forests.
53 | 
54 |         Returns
55 |         -------
56 |         self : object
57 |             Returns self.
58 |         """
59 |         random_state = check_random_state(self.random_seed)
60 |         self.transformer_ = clone(self.transformer)
61 | 
62 |         try:
63 |             self.transformer_.set_params(random_state=random_state)
64 |         except ValueError:
65 |             pass
66 | 
67 |         try:
68 |             self.transformer_.fit(X, y)
69 |         except TypeError:
70 |             self.transformer_.fit(X)
71 | 
72 |         return self
73 | 
74 |     def transform(self, X):
75 |         """Transform dataset.
76 | 
77 |         Parameters
78 |         ----------
79 |         X : array-like, shape=(n_samples, n_features)
80 |             Input data to be transformed.
81 | 
82 |         Returns
83 |         -------
84 |         X_transformed: sparse matrix, shape=(n_samples, n_out)
85 |             Transformed dataset.
86 |         """
87 |         return self.transformer_.transform(X)
88 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [nosetests]
 2 | # nosetests skips test files with the executable bit by default
 3 | # which can silently hide failing tests.
 4 | # There are no executable scripts within the scikit-learn project
 5 | # so let's turn the --exe flag on to avoid skipping tests by
 6 | # mistake.
 7 | exe = 1
 8 | cover-html = 1
 9 | cover-html-dir = coverage
10 | cover-package = random_output_trees
11 | 
12 | detailed-errors = 1
13 | with-doctest = 1
14 | doctest-tests = 1
15 | doctest-extension = rst
16 | doctest-fixtures = _fixture
17 | #doctest-options = +ELLIPSIS,+NORMALIZE_WHITESPACE
18 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | #
 3 | # Author : Arnaud Joly
 4 | #
 5 | # License: BSD 3 clause
 6 | 
 7 | import sys
 8 | import os
 9 | import shutil
10 | from distutils.command.clean import clean as Clean
11 | 
12 | DISTNAME = 'random-output-trees'
13 | DESCRIPTION = "High dimension output tree classifier and regressor"
14 | LONG_DESCRIPTION = open('README.rst').read()
15 | MAINTAINER = 'Arnaud Joly'
16 | MAINTAINER_EMAIL = 'arnaud.v.joly@gmail.com'
17 | URL = 'http://arjoly.github.io/random-output-trees/'
18 | LICENSE = 'BSD'
19 | DOWNLOAD_URL = 'https://github.com/arjoly/random-output-trees/archive/master.zip'
20 | CLASSIFIERS = [
21 |     'Intended Audience :: Science/Research',
22 |     'Intended Audience :: Developers',
23 |     'License :: OSI Approved',
24 |     'Programming Language :: C',
25 |     'Programming Language :: Python',
26 |     'Topic :: Software Development',
27 |     'Topic :: Scientific/Engineering',
28 |     'Operating System :: Microsoft :: Windows',
29 |     'Operating System :: POSIX',
30 |     'Operating System :: Unix',
31 |     'Operating System :: MacOS'
32 | ]
33 | 
34 | import random_output_trees
35 | VERSION = random_output_trees.__version__
36 | 
37 | import setuptools  # we are using a setuptools namespace
38 | from numpy.distutils.core import setup
39 | 
40 | class CleanCommand(Clean):
41 |     description = "Remove build directories, and compiled file in the source tree"
42 | 
43 |     def run(self):
44 |         Clean.run(self)
45 |         if os.path.exists('build'):
46 |             shutil.rmtree('build')
47 |         for dirpath, dirnames, filenames in os.walk('random_output_trees'):
48 |             for filename in filenames:
49 |                 if (filename.endswith('.so') or filename.endswith('.pyd')
50 |                              or filename.endswith('.dll')
51 |                              or filename.endswith('.pyc')):
52 |                     os.unlink(os.path.join(dirpath, filename))
53 |             for dirname in dirnames:
54 |                 if dirname == '__pycache__':
55 |                     shutil.rmtree(os.path.join(dirpath, dirname))
56 | 
57 | 
58 | def configuration(parent_package='', top_path=None):
59 |     if os.path.exists('MANIFEST'):
60 |         os.remove('MANIFEST')
61 | 
62 |     from numpy.distutils.misc_util import Configuration
63 |     config = Configuration(None, parent_package, top_path)
64 | 
65 |     config.add_subpackage('random_output_trees')
66 | 
67 |     return config
68 | 
69 | if __name__ == "__main__":
70 | 
71 |     old_path = os.getcwd()
72 |     local_path = os.path.dirname(os.path.abspath(sys.argv[0]))
73 | 
74 |     os.chdir(local_path)
75 |     sys.path.insert(0, local_path)
76 | 
77 |     setup(configuration=configuration,
78 |           name=DISTNAME,
79 |           maintainer=MAINTAINER,
80 |           include_package_data=True,
81 |           maintainer_email=MAINTAINER_EMAIL,
82 |           description=DESCRIPTION,
83 |           license=LICENSE,
84 |           url=URL,
85 |           version=VERSION,
86 |           download_url=DOWNLOAD_URL,
87 |           long_description=LONG_DESCRIPTION,
88 |           zip_safe=False, # the package can run out of an .egg file
89 |           classifiers=CLASSIFIERS,
90 |           cmdclass={'clean': CleanCommand},
91 |     )
92 | 


--------------------------------------------------------------------------------