├── .gitignore
├── .nojekyll
├── .travis.yml
├── LICENSE
├── Makefile
├── README.rst
├── appveyor.yml
├── benchmarks
    ├── bench_20newsgroups.py
    └── bench_other_libs.py
├── ci_scripts
    ├── appveyor
    │   ├── install.ps1
    │   └── run_with_env.cmd
    ├── install.sh
    ├── push_doc.sh
    ├── success.sh
    └── test.sh
├── circle.yml
├── doc
    ├── Makefile
    ├── _templates
    │   ├── class.rst
    │   ├── function.rst
    │   └── layout.html
    ├── conf.py
    ├── index.rst
    ├── make.bat
    ├── references.rst
    └── sphinxext
    │   ├── LICENSE.txt
    │   ├── MANIFEST.in
    │   ├── README.txt
    │   ├── gen_rst.py
    │   └── numpy_ext
    │       ├── __init__.py
    │       ├── docscrape.py
    │       ├── docscrape_sphinx.py
    │       └── numpydoc.py
├── examples
    ├── README.txt
    ├── plot_regularization_path.py
    └── plot_xor.py
├── polylearn
    ├── __init__.py
    ├── base.py
    ├── cd_direct_fast.cpp
    ├── cd_direct_fast.pyx
    ├── cd_lifted_fast.cpp
    ├── cd_lifted_fast.pyx
    ├── cd_linear_fast.cpp
    ├── cd_linear_fast.pxd
    ├── cd_linear_fast.pyx
    ├── factorization_machine.py
    ├── kernels.py
    ├── loss.py
    ├── loss_fast.cpp
    ├── loss_fast.pxd
    ├── loss_fast.pyx
    ├── polynomial_network.py
    ├── setup.py
    └── tests
    │   ├── __init__.py
    │   ├── test_cd_linear.py
    │   ├── test_common.py
    │   ├── test_factorization_machine.py
    │   ├── test_kernels.py
    │   └── test_polynomial_network.py
├── setup.cfg
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask instance folder
57 | instance/
58 | 
59 | # Scrapy stuff:
60 | .scrapy
61 | 
62 | # Sphinx documentation
63 | docs/_build/
64 | 
65 | # PyBuilder
66 | target/
67 | 
68 | # IPython Notebook
69 | .ipynb_checkpoints
70 | 
71 | # pyenv
72 | .python-version
73 | 
74 | # dotenv
75 | .env
76 | 
77 | # ide
78 | .idea
79 | 
80 | 
81 | doc/_build/
82 | doc/generated/
83 | doc/auto_examples/
84 | doc/modules/generated/
85 | doc/datasets/generated/
86 | .coverage
87 | coverage
88 | tags
89 | coverages.zip
90 | samples.zip
91 | doc/coverages.zip
92 | doc/samples.zip
93 | coverages
94 | samples
95 | doc/coverages
96 | doc/samples


--------------------------------------------------------------------------------
/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/polylearn/4dd9d4b8aca029628a4c934829526b8552db2e1b/.nojekyll


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | cache:
 4 |   apt: true
 5 |   # We use three different cache directory
 6 |   # to work around a Travis bug with multi-platform cache
 7 |   directories:
 8 |   - $HOME/.cache/pip
 9 |   - $HOME/download
10 | env:
11 |   global:
12 |     # Directory where tests are run from
13 |     - TEST_DIR=/tmp/test_dir/
14 |     - MODULE=polylearn
15 |   matrix:
16 |     - DISTRIB="conda" PYTHON_VERSION="2.7"
17 |       NUMPY_VERSION="1.7.1" SCIPY_VERSION="0.12.0" CYTHON_VERSION="0.21"
18 |       SKLEARN_VERSION="0.16.1"
19 |     - DISTRIB="conda" PYTHON_VERSION="3.5" COVERAGE="true"
20 |       NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.23.4"
21 |       SKLEARN_VERSION="0.17.1"
22 | 
23 | install: source ci_scripts/install.sh
24 | script: bash ci_scripts/test.sh
25 | after_success: source ci_scripts/success.sh
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2016, Vlad Niculae
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this
 8 | list of conditions and the following disclaimer.
 9 | 
10 | 2. Redistributions in binary form must reproduce the above copyright notice,
11 | this list of conditions and the following disclaimer in the documentation and/or
12 | other materials provided with the distribution.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
18 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 | NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
20 | OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
21 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
22 | OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23 | THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | PYTHON ?= python
 2 | CYTHON ?= cython
 3 | NOSETESTS ?= nosetests
 4 | 
 5 | # Compilation...
 6 | 
 7 | CYTHONSRC= $(wildcard polylearn/*.pyx)
 8 | CSRC= $(CYTHONSRC:.pyx=.cpp)
 9 | 
10 | inplace:
11 | 	$(PYTHON) setup.py build_ext -i
12 | 
13 | all: cython inplace
14 | 
15 | cython: $(CSRC)
16 | 
17 | clean:
18 | 	rm -f polylearn/*.c polylearn/*.cpp polylearn/*.html
19 | 	rm -f `find polylearn -name "*.pyc"`
20 | 	rm -f `find polylearn -name "*.so"`
21 | 
22 | %.cpp: %.pyx
23 | 	$(CYTHON) --cplus $<
24 | 
25 | # Tests...
26 | #
27 | test-code: inplace
28 | 	$(NOSETESTS) -s polylearn
29 | 
30 | test-coverage:
31 | 	$(NOSETESTS) -s --with-coverage --cover-html --cover-html-dir=coverage \
32 | 	--cover-package=polylearn polylearn
33 | 
34 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | polylearn
 4 | =========
 5 | 
 6 | A library for **factorization machines** and **polynomial networks**
 7 | for classification and regression in Python.
 8 | 
 9 | `Github repository. <https://github.com/scikit-learn-contrib/polylearn/>`_
10 | 
11 | .. image:: https://travis-ci.org/scikit-learn-contrib/polylearn.svg?branch=master
12 |     :target: https://travis-ci.org/scikit-learn-contrib/polylearn
13 | 
14 | .. image:: https://ci.appveyor.com/api/projects/status/g9xnar9081l3vsw7/branch/master?svg=true
15 |     :target: https://ci.appveyor.com/project/vene/polylearn
16 | 
17 | .. image:: https://coveralls.io/repos/scikit-learn-contrib/polylearn/badge.svg?branch=master&service=github
18 |     :target: https://coveralls.io/r/scikit-learn-contrib/polylearn
19 | 
20 | .. image:: https://circleci.com/gh/scikit-learn-contrib/polylearn/tree/master.svg?style=shield&circle-token=:circle-token
21 |     :target: https://circleci.com/gh/scikit-learn-contrib/polylearn/
22 | 
23 | Factorization machines and polynomial networks are machine learning models
24 | that can capture **feature interaction** (co-occurrence) through polynomial terms.
25 | Because feature interactions can be very sparse, it's common to use **low rank,
26 | factorized representations**; this way, we can learn weights even for feature
27 | co-occurrences that haven't been observed at training time.
28 | 
29 | Factorization machines are popular for recommender systems, as they are a
30 | generalization of matrix completion models.
31 | 
32 | This package provides:
33 | 
34 | - coordinate descent algorithm for fitting factorization machines of degree 2 or 3,
35 | - coordinate descent algorithm for fitting polynomial networks of arbitrary degree,
36 | - `scikit-learn <http://scikit-learn.org>`_-compatible API,
37 | - `Cython <http://cython.org>`_ implementations for computationally intensive parts.
38 | 
39 | Installation
40 | ------------
41 | 
42 | Binary packages are not yet available.
43 | 
44 | The development version of polylearn can be installed from its git repository. In
45 | this case it is assumed that you have a working
46 | C++ compiler.
47 | 
48 | 1. Obtain the sources by::
49 | 
50 |     git clone https://github.com/scikit-learn-contrib/polylearn.git
51 |  
52 | or, if `git` is unavailable, `download as a ZIP from GitHub <https://github.com/scikit-learn-contrib/polylearn/archive/master.zip>`_.
53 |  
54 |  
55 | 2. Install the dependencies::
56 |  
57 |     # via pip
58 | 
59 |     pip install numpy scipy scikit-learn nose
60 |     pip install sklearn-contrib-lightning
61 | 
62 |  
63 |     # via conda
64 | 
65 |     conda install numpy scipy scikit-learn nose
66 |     conda install -c conda-forge sklearn-contrib-lightning
67 | 
68 | 
69 | 3. Build and install polylearn::
70 | 
71 |     cd polylearn
72 |     python setup.py build
73 |     sudo python setup.py install
74 | 
75 | 
76 | References
77 | ----------
78 | 
79 | The solvers implemented are introduced in [1]_. Factorization machines are introduced
80 | in [2]_ and polynomial networks in [3]_.
81 | 
82 | .. [1] Mathieu Blondel, Masakazu Ishihata, Akinori Fujino, Naonori Ueda.
83 |        *Polynomial Networks and Factorization Machines: New Insights and
84 |        Efficient Training Algorithms.*  In: Proc. of ICML 2016.
85 |        [`PDF <http://mblondel.org/publications/mblondel-icml2016.pdf>`_]
86 | 
87 | .. [2] Steffen Rendle. *Factorization machines.* In: Proc. of IEEE ICDM 2010.
88 |        [`PDF <https://www.ismll.uni-hildesheim.de/pub/pdfs/Rendle2010FM.pdf>`_]
89 | 
90 | .. [3] Roi Livni, Shai Shalev-Shwartz, Ohad Shamir.
91 |        *On the computational efficiency of training neural networks.*
92 |        In: Proc. of NIPS 2014.
93 |        [`arXiv <http://arxiv.org/abs/1410.1141>`_]
94 | 
95 | Authors
96 | -------
97 | 
98 | - Vlad Niculae, 2016-present
99 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | # AppVeyor.com is a Continuous Integration service to build and run tests under
 2 | # Windows
 3 | environment:
 4 |   global:
 5 |     # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the
 6 |     # /E:ON and /V:ON options are not enabled in the batch script interpreter
 7 |     # See: http://stackoverflow.com/a/13751649/163740
 8 |     CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\ci_scripts\\appveyor\\run_with_env.cmd"
 9 | 
10 |   matrix:
11 |     - PYTHON: "C:\\Python27"
12 |       PYTHON_VERSION: "2.7.15"
13 |       PYTHON_ARCH: "32"
14 |       MINICONDA: "C:\\Miniconda"
15 | 
16 |     - PYTHON: "C:\\Python27-x64"
17 |       PYTHON_VERSION: "2.7.15"
18 |       PYTHON_ARCH: "64"
19 |       MINICONDA: "C:\\Miniconda-x64"
20 | 
21 |     - PYTHON: "C:\\Python37"
22 |       PYTHON_VERSION: "3.7.2"
23 |       PYTHON_ARCH: "32"
24 |       MINICONDA: "C:\\Miniconda37"
25 | 
26 |     - PYTHON: "C:\\Python37-x64"
27 |       PYTHON_VERSION: "3.7.2"
28 |       PYTHON_ARCH: "64"
29 |       MINICONDA: "C:\\Miniconda37-x64"
30 | 
31 | install:
32 |   # Miniconda is pre-installed in the worker build
33 |   - "SET PATH=%MINICONDA%;%MINICONDA%\\Scripts;%PATH%"
34 |   - "python -m pip install -U pip"
35 | 
36 |   # Check that we have the expected version and architecture for Python
37 |   - "python --version"
38 |   - "python -c \"import struct; print(struct.calcsize('P') * 8)\""
39 |   - "pip --version"
40 | 
41 |   # Remove cygwin because it clashes with conda
42 |   # see http://help.appveyor.com/discussions/problems/3712-git-remote-https-seems-to-be-broken
43 |   - rmdir C:\\cygwin /s /q
44 | 
45 |   # Update previous packages and install the build and runtime dependencies of the project.
46 |   - conda update --all --yes
47 |   - conda install --quiet --yes numpy scipy cython nose scikit-learn wheel"
48 |   - conda install --quiet --yes -c conda-forge sklearn-contrib-lightning
49 |   - conda install --quiet --yes conda-build
50 |   - "%CMD_IN_ENV% python setup.py bdist_wheel bdist_wininst"
51 | 
52 |   - ps: "ls dist"
53 |   #  # build the conda package
54 |   #  - "%CMD_IN_ENV% conda build build_tools/conda-recipe --quiet"
55 |   #
56 |   #  # Move the conda package into the dist directory, to register it
57 |   #  # as an "artifact" for Appveyor. cmd.exe does't have good globbing, so
58 |   #  # we'll use a simple python script.
59 |   #  - python build_tools/move-conda-package.py build_tools/conda-recipe
60 |   #
61 |   #  # Install the generated wheel package to test it
62 |   - "pip install --pre --no-index --find-links dist/ polylearn"
63 | 
64 | # Not a .NET project, we build scikit-learn in the install step instead
65 | build: false
66 | 
67 | test_script:
68 |   # Change to a non-source folder to make sure we run the tests on the
69 |   # installed library.
70 |   - "mkdir empty_folder"
71 |   - "cd empty_folder"
72 | 
73 |   - "python -c \"import nose; nose.main()\" -s -v polylearn"
74 | 
75 |   # Move back to the project folder
76 |   - "cd .."
77 | 
78 | artifacts:
79 |   # Archive the generated wheel package in the ci.appveyor.com build report.
80 |   - path: dist\*
81 | 
82 | 
83 | cache:
84 |   - '%APPDATA%\pip\Cache'
85 | 


--------------------------------------------------------------------------------
/benchmarks/bench_20newsgroups.py:
--------------------------------------------------------------------------------
 1 | # Benchmark polynomial classifiers on bag-of-words text classification
 2 | # Inspired from: https://github.com/scikit-learn/scikit-learn/blob/master
 3 | #                /benchmarks/bench_20newsgroups.py
 4 | 
 5 | from time import time
 6 | 
 7 | import numpy as np
 8 | import scipy.sparse as sp
 9 | 
10 | from sklearn.base import clone
11 | from sklearn.metrics import accuracy_score, f1_score
12 | from sklearn.datasets import fetch_20newsgroups_vectorized
13 | 
14 | from polylearn import (FactorizationMachineClassifier,
15 |                        PolynomialNetworkClassifier)
16 | 
17 | 
18 | estimators = {
19 |     'fm-2': FactorizationMachineClassifier(n_components=30,
20 |                                            fit_linear=False,
21 |                                            fit_lower=None,
22 |                                            degree=2,
23 |                                            random_state=0,
24 |                                            max_iter=10),
25 | 
26 |     'polynet-2': PolynomialNetworkClassifier(n_components=15, degree=2,
27 |                                              fit_lower=None,
28 |                                              max_iter=10,
29 |                                              random_state=0)
30 | }
31 | 
32 | estimators['fm-3'] = clone(estimators['fm-2']).set_params(degree=3)
33 | estimators['polynet-3'] = (clone(estimators['polynet-2'])
34 |                            .set_params(degree=3, n_components=10))
35 | 
36 | if __name__ == '__main__':
37 |     data_train = fetch_20newsgroups_vectorized(subset="train")
38 |     data_test = fetch_20newsgroups_vectorized(subset="test")
39 |     X_train = sp.csc_matrix(data_train.data)
40 |     X_test = sp.csc_matrix(data_test.data)
41 | 
42 |     y_train = data_train.target == 0  # atheism vs rest
43 |     y_test = data_test.target == 0
44 | 
45 |     print("20 newsgroups")
46 |     print("=============")
47 |     print("X_train.shape = {0}".format(X_train.shape))
48 |     print("X_train.format = {0}".format(X_train.format))
49 |     print("X_train.dtype = {0}".format(X_train.dtype))
50 |     print("X_train density = {0}"
51 |           "".format(X_train.nnz / np.product(X_train.shape)))
52 |     print("y_train {0}".format(y_train.shape))
53 |     print("X_test {0}".format(X_test.shape))
54 |     print("X_test.format = {0}".format(X_test.format))
55 |     print("X_test.dtype = {0}".format(X_test.dtype))
56 |     print("y_test {0}".format(y_test.shape))
57 |     print()
58 | 
59 |     print("Classifier Training")
60 |     print("===================")
61 |     f1, accuracy, train_time, test_time = {}, {}, {}, {}
62 | 
63 |     for name, clf in sorted(estimators.items()):
64 |         print("Training %s ... " % name, end="")
65 |         t0 = time()
66 |         clf.fit(X_train, y_train)
67 |         train_time[name] = time() - t0
68 |         t0 = time()
69 |         y_pred = clf.predict(X_test)
70 |         test_time[name] = time() - t0
71 |         accuracy[name] = accuracy_score(y_test, y_pred)
72 |         f1[name] = f1_score(y_test, y_pred)
73 |         print("done")
74 | 
75 |     print("Classification performance:")
76 |     print("===========================")
77 |     print()
78 |     print("%s %s %s %s %s" % ("Classifier".ljust(16),
79 |                               "train".rjust(10),
80 |                               "test".rjust(10),
81 |                               "f1".rjust(10),
82 |                               "accuracy".rjust(10)))
83 |     print("-" * (16 + 4 * 11))
84 |     for name in sorted(f1, key=f1.get):
85 |         print("%s %s %s %s %s" % (
86 |             name.ljust(16),
87 |             ("%.4fs" % train_time[name]).rjust(10),
88 |             ("%.4fs" % test_time[name]).rjust(10),
89 |             ("%.4f" % f1[name]).rjust(10),
90 |             ("%.4f" % accuracy[name]).rjust(10)))
91 | 
92 |     print()
93 | 


--------------------------------------------------------------------------------
/benchmarks/bench_other_libs.py:
--------------------------------------------------------------------------------
  1 | """ Benchmarking CD solvers for factorization machines.
  2 | 
  3 | Compares polylearn with with fastFM [1].
  4 | 
  5 | [1] http://ibayer.github.io/fastFM/
  6 | 
  7 | Note: this benchmark uses the squared loss and a regression formulation, for
  8 | the fairest comparison.  The CD solvers in polylearn support logistic loss and
  9 | squared hinge loss as well.
 10 | 
 11 | """
 12 | 
 13 | from time import time
 14 | 
 15 | import numpy as np
 16 | import scipy.sparse as sp
 17 | 
 18 | from sklearn.metrics import accuracy_score, f1_score
 19 | from sklearn.datasets import fetch_20newsgroups_vectorized
 20 | 
 21 | from polylearn import FactorizationMachineRegressor
 22 | if __name__ == '__main__':
 23 |     data_train = fetch_20newsgroups_vectorized(subset="train")
 24 |     data_test = fetch_20newsgroups_vectorized(subset="test")
 25 |     X_train = sp.csc_matrix(data_train.data)
 26 |     X_test = sp.csc_matrix(data_test.data)
 27 | 
 28 |     y_train = data_train.target == 0  # atheism vs rest
 29 |     y_test = data_test.target == 0
 30 | 
 31 |     y_train = (2 * y_train - 1).astype(np.float)
 32 | 
 33 |     print(__doc__)
 34 |     print("20 newsgroups")
 35 |     print("=============")
 36 |     print("X_train.shape = {0}".format(X_train.shape))
 37 |     print("X_train.format = {0}".format(X_train.format))
 38 |     print("X_train.dtype = {0}".format(X_train.dtype))
 39 |     print("X_train density = {0}"
 40 |           "".format(X_train.nnz / np.product(X_train.shape)))
 41 |     print("y_train {0}".format(y_train.shape))
 42 |     print("X_test {0}".format(X_test.shape))
 43 |     print("X_test.format = {0}".format(X_test.format))
 44 |     print("X_test.dtype = {0}".format(X_test.dtype))
 45 |     print("y_test {0}".format(y_test.shape))
 46 |     print()
 47 | 
 48 |     print("Training regressors")
 49 |     print("===================")
 50 |     f1, accuracy, train_time, test_time = {}, {}, {}, {}
 51 | 
 52 |     print("Training our solver... ", end="")
 53 |     fm = FactorizationMachineRegressor(n_components=20,
 54 |                                        fit_linear=True,
 55 |                                        fit_lower=False,
 56 |                                        alpha=5,
 57 |                                        beta=5,
 58 |                                        degree=2,
 59 |                                        random_state=0,
 60 |                                        max_iter=100)
 61 |     t0 = time()
 62 |     fm.fit(X_train, y_train)
 63 |     train_time['polylearn'] = time() - t0
 64 |     t0 = time()
 65 |     y_pred = fm.predict(X_test) > 0
 66 |     test_time['polylearn'] = time() - t0
 67 |     accuracy['polylearn'] = accuracy_score(y_test, y_pred)
 68 |     f1['polylearn'] = f1_score(y_test, y_pred)
 69 |     print("done")
 70 | 
 71 |     try:
 72 |         from fastFM import als
 73 | 
 74 |         print("Training fastfm... ", end="")
 75 |         clf = als.FMRegression(n_iter=100, init_stdev=0.01, rank=20,
 76 |                                random_state=0, l2_reg=10.)
 77 |         clf.ignore_w_0 = True  # since polylearn has no fit_intercept yet
 78 |         t0 = time()
 79 | 
 80 |         clf.fit(X_train, y_train)
 81 |         train_time['fastfm'] = time() - t0
 82 | 
 83 |         t0 = time()
 84 |         y_pred = clf.predict(X_test)
 85 |         test_time['fastfm'] = time() - t0
 86 |         y_pred = y_pred > 0
 87 |         accuracy['fastfm'] = accuracy_score(y_test, y_pred)
 88 |         f1['fastfm'] = f1_score(y_test, y_pred)
 89 | 
 90 |         print("done")
 91 |     except ImportError:
 92 |         print("fastfm not found")
 93 | 
 94 |     print("Regression performance:")
 95 |     print("=======================")
 96 |     print()
 97 |     print("%s %s %s %s %s" % ("Model".ljust(16),
 98 |                               "train".rjust(10),
 99 |                               "test".rjust(10),
100 |                               "f1".rjust(10),
101 |                               "accuracy".rjust(10)))
102 |     print("-" * (16 + 4 * 11))
103 |     for name in sorted(f1, key=f1.get):
104 |         print("%s %s %s %s %s" % (
105 |             name.ljust(16),
106 |             ("%.4fs" % train_time[name]).rjust(10),
107 |             ("%.4fs" % test_time[name]).rjust(10),
108 |             ("%.4f" % f1[name]).rjust(10),
109 |             ("%.4f" % accuracy[name]).rjust(10)))
110 | 
111 |     print()
112 | 


--------------------------------------------------------------------------------
/ci_scripts/appveyor/install.ps1:
--------------------------------------------------------------------------------
 1 | # Sample script to install Miniconda under Windows
 2 | # Authors: Olivier Grisel, Jonathan Helmus and Kyle Kastner, Robert McGibbon
 3 | # License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/
 4 | # taken from https://github.com/rmcgibbo/python-appveyor-conda-example
 5 | 
 6 | $MINICONDA_URL = "http://repo.continuum.io/miniconda/"
 7 | 
 8 | function DownloadMiniconda ($python_version, $platform_suffix) {
 9 |     $webclient = New-Object System.Net.WebClient
10 |     if ($python_version -match "3.4") {
11 |         $filename = "Miniconda3-latest-Windows-" + $platform_suffix + ".exe"
12 |     } else {
13 |         $filename = "Miniconda-latest-Windows-" + $platform_suffix + ".exe"
14 |     }
15 |     $url = $MINICONDA_URL + $filename
16 | 
17 |     $basedir = $pwd.Path + "\"
18 |     $filepath = $basedir + $filename
19 |     if (Test-Path $filename) {
20 |         Write-Host "Reusing" $filepath
21 |         return $filepath
22 |     }
23 | 
24 |     # Download and retry up to 3 times in case of network transient errors.
25 |     Write-Host "Downloading" $filename "from" $url
26 |     $retry_attempts = 2
27 |     for($i=0; $i -lt $retry_attempts; $i++){
28 |         try {
29 |             $webclient.DownloadFile($url, $filepath)
30 |             break
31 |         }
32 |         Catch [Exception]{
33 |             Start-Sleep 1
34 |         }
35 |    }
36 |    if (Test-Path $filepath) {
37 |        Write-Host "File saved at" $filepath
38 |    } else {
39 |        # Retry once to get the error message if any at the last try
40 |        $webclient.DownloadFile($url, $filepath)
41 |    }
42 |    return $filepath
43 | }
44 | 
45 | 
46 | function InstallMiniconda ($python_version, $architecture, $python_home) {
47 |     Write-Host "Installing Python" $python_version "for" $architecture "bit architecture to" $python_home
48 |     if (Test-Path $python_home) {
49 |         Write-Host $python_home "already exists, skipping."
50 |         return $false
51 |     }
52 |     if ($architecture -match "32") {
53 |         $platform_suffix = "x86"
54 |     } else {
55 |         $platform_suffix = "x86_64"
56 |     }
57 | 
58 |     $filepath = DownloadMiniconda $python_version $platform_suffix
59 |     Write-Host "Installing" $filepath "to" $python_home
60 |     $install_log = $python_home + ".log"
61 |     $args = "/S /D=$python_home"
62 |     Write-Host $filepath $args
63 |     Start-Process -FilePath $filepath -ArgumentList $args -Wait -Passthru
64 |     if (Test-Path $python_home) {
65 |         Write-Host "Python $python_version ($architecture) installation complete"
66 |     } else {
67 |         Write-Host "Failed to install Python in $python_home"
68 |         Get-Content -Path $install_log
69 |         Exit 1
70 |     }
71 | }
72 | 
73 | 
74 | function InstallCondaPackages ($python_home, $spec) {
75 |     $conda_path = $python_home + "\Scripts\conda.exe"
76 |     $args = "install --yes " + $spec
77 |     Write-Host ("conda " + $args)
78 |     Start-Process -FilePath "$conda_path" -ArgumentList $args -Wait -Passthru
79 | }
80 | 
81 | function UpdateConda ($python_home) {
82 |     $conda_path = $python_home + "\Scripts\conda.exe"
83 |     Write-Host "Updating conda..."
84 |     $args = "update --yes conda"
85 |     Write-Host $conda_path $args
86 |     Start-Process -FilePath "$conda_path" -ArgumentList $args -Wait -Passthru
87 | }
88 | 
89 | function main () {
90 |     InstallMiniconda $env:PYTHON_VERSION $env:PYTHON_ARCH $env:PYTHON
91 |     UpdateConda $env:PYTHON
92 |     InstallCondaPackages $env:PYTHON "conda-build anaconda-client"
93 | }
94 | 
95 | main
96 | 


--------------------------------------------------------------------------------
/ci_scripts/appveyor/run_with_env.cmd:
--------------------------------------------------------------------------------
 1 | :: To build extensions for 64 bit Python 3, we need to configure environment
 2 | :: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of:
 3 | :: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1)
 4 | ::
 5 | :: To build extensions for 64 bit Python 2, we need to configure environment
 6 | :: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of:
 7 | :: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0)
 8 | ::
 9 | :: 32 bit builds, and 64-bit builds for 3.5 and beyond, do not require specific
10 | :: environment configurations.
11 | ::
12 | :: Note: this script needs to be run with the /E:ON and /V:ON flags for the
13 | :: cmd interpreter, at least for (SDK v7.0)
14 | ::
15 | :: More details at:
16 | :: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows
17 | :: http://stackoverflow.com/a/13751649/163740
18 | ::
19 | :: Author: Olivier Grisel
20 | :: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/
21 | ::
22 | :: Notes about batch files for Python people:
23 | ::
24 | :: Quotes in values are literally part of the values:
25 | ::      SET FOO="bar"
26 | :: FOO is now five characters long: " b a r "
27 | :: If you don't want quotes, don't include them on the right-hand side.
28 | ::
29 | :: The CALL lines at the end of this file look redundant, but if you move them
30 | :: outside of the IF clauses, they do not run properly in the SET_SDK_64==Y
31 | :: case, I don't know why.
32 | @ECHO OFF
33 | 
34 | SET COMMAND_TO_RUN=%*
35 | SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows
36 | SET WIN_WDK=c:\Program Files (x86)\Windows Kits\10\Include\wdf
37 | 
38 | :: Extract the major and minor versions, and allow for the minor version to be
39 | :: more than 9.  This requires the version number to have two dots in it.
40 | SET MAJOR_PYTHON_VERSION=%PYTHON_VERSION:~0,1%
41 | IF "%PYTHON_VERSION:~3,1%" == "." (
42 |     SET MINOR_PYTHON_VERSION=%PYTHON_VERSION:~2,1%
43 | ) ELSE (
44 |     SET MINOR_PYTHON_VERSION=%PYTHON_VERSION:~2,2%
45 | )
46 | 
47 | :: Based on the Python version, determine what SDK version to use, and whether
48 | :: to set the SDK for 64-bit.
49 | IF %MAJOR_PYTHON_VERSION% == 2 (
50 |     SET WINDOWS_SDK_VERSION="v7.0"
51 |     SET SET_SDK_64=Y
52 | ) ELSE (
53 |     IF %MAJOR_PYTHON_VERSION% == 3 (
54 |         SET WINDOWS_SDK_VERSION="v7.1"
55 |         IF %MINOR_PYTHON_VERSION% LEQ 4 (
56 |             SET SET_SDK_64=Y
57 |         ) ELSE (
58 |             SET SET_SDK_64=N
59 |             IF EXIST "%WIN_WDK%" (
60 |                 :: See: https://connect.microsoft.com/VisualStudio/feedback/details/1610302/
61 |                 REN "%WIN_WDK%" 0wdf
62 |             )
63 |         )
64 |     ) ELSE (
65 |         ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%"
66 |         EXIT 1
67 |     )
68 | )
69 | 
70 | IF %PYTHON_ARCH% == 64 (
71 |     IF %SET_SDK_64% == Y (
72 |         ECHO Configuring Windows SDK %WINDOWS_SDK_VERSION% for Python %MAJOR_PYTHON_VERSION% on a 64 bit architecture
73 |         SET DISTUTILS_USE_SDK=1
74 |         SET MSSdk=1
75 |         "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION%
76 |         "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release
77 |         ECHO Executing: %COMMAND_TO_RUN%
78 |         call %COMMAND_TO_RUN% || EXIT 1
79 |     ) ELSE (
80 |         ECHO Using default MSVC build environment for 64 bit architecture
81 |         ECHO Executing: %COMMAND_TO_RUN%
82 |         call %COMMAND_TO_RUN% || EXIT 1
83 |     )
84 | ) ELSE (
85 |     ECHO Using default MSVC build environment for 32 bit architecture
86 |     ECHO Executing: %COMMAND_TO_RUN%
87 |     call %COMMAND_TO_RUN% || EXIT 1
88 | )
89 | 


--------------------------------------------------------------------------------
/ci_scripts/install.sh:
--------------------------------------------------------------------------------
 1 | # Deactivate the travis-provided virtual environment and setup a
 2 | # conda-based environment instead
 3 | deactivate
 4 | 
 5 | # Use the miniconda installer for faster download / install of conda
 6 | # itself
 7 | pushd .
 8 | cd
 9 | mkdir -p download
10 | cd download
11 | echo "Cached in $HOME/download :"
12 | ls -l
13 | echo
14 | if [[ ! -f miniconda.sh ]]
15 |    then
16 |    wget http://repo.continuum.io/miniconda/Miniconda-3.6.0-Linux-x86_64.sh \
17 |        -O miniconda.sh
18 |    fi
19 | chmod +x miniconda.sh && ./miniconda.sh -b
20 | cd ..
21 | export PATH=/home/travis/miniconda/bin:$PATH
22 | conda update --yes conda
23 | popd
24 | 
25 | # Configure the conda environment and put it in the path using the
26 | # provided versions
27 | conda create -n testenv --yes python=$PYTHON_VERSION pip nose \
28 |       numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION cython=$CYTHON_VERSION
29 | 
30 | source activate testenv
31 | 
32 | pip install scikit-learn==$SKLEARN_VERSION sklearn-contrib-lightning
33 | 
34 | if [[ "$COVERAGE" == "true" ]]; then
35 |     pip install coverage coveralls
36 | fi
37 | 
38 | python --version
39 | python -c "import numpy; print('numpy %s' % numpy.__version__)"
40 | python -c "import scipy; print('scipy %s' % scipy.__version__)"
41 | 
42 | python setup.py develop
43 | 


--------------------------------------------------------------------------------
/ci_scripts/push_doc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # This script is meant to be called in the "deploy" step defined in 
 3 | # circle.yml. See https://circleci.com/docs/ for more details.
 4 | # The behavior of the script is controlled by environment variable defined
 5 | # in the circle.yml in the top level folder of the project.
 6 | 
 7 | MSG="Pushing the docs for revision for branch: $CIRCLE_BRANCH, commit $CIRCLE_SHA1"
 8 | 
 9 | cd $HOME
10 | # Copy the build docs to a temporary folder
11 | rm -rf tmp
12 | mkdir tmp
13 | cp -R $HOME/$DOC_REPO/doc/_build/html/* ./tmp/ 
14 | 
15 | # Clone the docs repo if it isnt already there
16 | if [ ! -d $DOC_REPO ];
17 |     then git clone "git@github.com:$USERNAME/"$DOC_REPO".git";
18 | fi
19 | 
20 | cd $DOC_REPO
21 | git branch gh-pages
22 | git checkout -f gh-pages
23 | git reset --hard origin/gh-pages
24 | git clean -dfx
25 | 
26 | for name in $(ls -A $HOME/$DOC_REPO); do
27 |     case $name in
28 |         .nojekyll) # So that github does not build this as a Jekyll website.
29 |         ;;
30 |         circle.yml) # Config so that build gh-pages branch.
31 |         ;;
32 |         *)
33 |         git rm -rf $name
34 |         ;;
35 |     esac
36 | done
37 | 
38 | # Copy the new build docs  # VN: what's with the DOC_URL?
39 | # mkdir $DOC_URL
40 | # cp -R $HOME/tmp/* ./$DOC_URL/
41 | cp -R $HOME/tmp/* ./
42 | 
43 | git config --global user.email $EMAIL
44 | git config --global user.name $USERNAME
45 | # git add -f ./$DOC_URL/
46 | git add -f ./
47 | git commit -m "$MSG"
48 | git push -f origin gh-pages
49 | if [ $? -ne 0 ]; then
50 |     echo "Pushing docs failed"
51 |     echo
52 |     exit 1
53 | fi
54 | 
55 | echo $MSG 
56 | 


--------------------------------------------------------------------------------
/ci_scripts/success.sh:
--------------------------------------------------------------------------------
 1 | set -e
 2 | 
 3 | if [[ "$COVERAGE" == "true" ]]; then
 4 |     # Need to run coveralls from a git checkout, so we copy .coverage
 5 |     # from TEST_DIR where nosetests has been run
 6 |     cp $TEST_DIR/.coverage $TRAVIS_BUILD_DIR
 7 |     cd $TRAVIS_BUILD_DIR
 8 |     # Ignore coveralls failures as the coveralls server is not
 9 |     # very reliable but we don't want travis to report a failure
10 |     # in the github UI just because the coverage report failed to
11 |     # be published.
12 |     coveralls || echo "Coveralls upload failed"
13 | fi


--------------------------------------------------------------------------------
/ci_scripts/test.sh:
--------------------------------------------------------------------------------
 1 | set -e
 2 | 
 3 | # Get into a temp directory to run test from the installed scikit learn and
 4 | # check if we do not leave artifacts
 5 | mkdir -p $TEST_DIR
 6 | 
 7 | cd $TEST_DIR
 8 | 
 9 | if [[ "$COVERAGE" == "true" ]]; then
10 |     nosetests -s --with-coverage --cover-package=$MODULE $MODULE
11 | else
12 |     nosetests -s $MODULE
13 | fi
14 | 


--------------------------------------------------------------------------------
/circle.yml:
--------------------------------------------------------------------------------
 1 | machine:
 2 |   environment:
 3 |     # The github organization or username of the repository which hosts the
 4 |     # project and documentation.
 5 |     USERNAME: "vene"
 6 | 
 7 |     # The repository where the documentation will be hosted
 8 |     DOC_REPO: "polylearn"
 9 | 
10 |     # The base URL for the Github page where the documentation will be hosted
11 |     DOC_URL: "vene.ro"
12 | 
13 |     # The email is to be used for commits in the Github Page
14 |     EMAIL: "vlad@vene.ro"
15 | 
16 | dependencies:
17 | 
18 |   # Various dependencies
19 |   pre:
20 |     - sudo -E apt-get -yq remove texlive-binaries --purge
21 |     - sudo apt-get update
22 |     - sudo apt-get install libatlas-dev libatlas3gf-base
23 |     - sudo apt-get install build-essential python-dev python-setuptools
24 |     # install numpy first as it is a compile time dependency for other packages
25 |     - pip install --upgrade numpy
26 |     - pip install --upgrade scipy matplotlib setuptools nose coverage sphinx pillow sphinx-gallery sphinx_bootstrap_theme
27 |     # Installing required packages for `make -C doc check command` to work.
28 |     - sudo -E apt-get -yq update
29 |     - sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install dvipng texlive-latex-base texlive-latex-extra
30 |     - pip install --upgrade cython numpydoc
31 |     - pip install --upgrade scikit-learn
32 |     - pip install --upgrade sklearn-contrib-lightning
33 | 
34 |   # The --user is needed to let sphinx see the source and the binaries
35 |   # The pipefail is requested to propagate exit code
36 |   override:
37 |     - python setup.py clean
38 |     - python setup.py develop
39 |     - set -o pipefail && cd doc && make html 2>&1 | tee ~/log.txt
40 | test:
41 |   # Grep error on the documentation
42 |   override:
43 |     - cat ~/log.txt && if grep -q "Traceback (most recent call last):" ~/log.txt; then false; else true; fi
44 | deployment:
45 |   push:
46 |     branch: master
47 |     commands:
48 |       - bash ci_scripts/push_doc.sh
49 | general:
50 |   # Open the doc to the API
51 |   artifacts:
52 |     - "doc/_build/html"
53 |     - "~/log.txt"
54 |   # Restric the build to the branch master only
55 |   branches:
56 |     ignore:
57 |        - gh-pages


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 
 49 | clean:
 50 | 	-rm -rf $(BUILDDIR)/*
 51 | 	-rm -rf auto_examples/
 52 | 	-rm -rf generated/*
 53 | 	-rm -rf modules/generated/*
 54 | 
 55 | html:
 56 | 	# These two lines make the build a bit more lengthy, and the
 57 | 	# the embedding of images more robust
 58 | 	rm -rf $(BUILDDIR)/html/_images
 59 | 	#rm -rf _build/doctrees/
 60 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 61 | 	@echo
 62 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 63 | 
 64 | dirhtml:
 65 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 66 | 	@echo
 67 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 68 | 
 69 | singlehtml:
 70 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 71 | 	@echo
 72 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 73 | 
 74 | pickle:
 75 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 76 | 	@echo
 77 | 	@echo "Build finished; now you can process the pickle files."
 78 | 
 79 | json:
 80 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 81 | 	@echo
 82 | 	@echo "Build finished; now you can process the JSON files."
 83 | 
 84 | htmlhelp:
 85 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 86 | 	@echo
 87 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 88 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 89 | 
 90 | qthelp:
 91 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 92 | 	@echo
 93 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 94 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 95 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/polylearn.qhcp"
 96 | 	@echo "To view the help file:"
 97 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/polylearn.qhc"
 98 | 
 99 | devhelp:
100 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
101 | 	@echo
102 | 	@echo "Build finished."
103 | 	@echo "To view the help file:"
104 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/polylearn"
105 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/polylearn"
106 | 	@echo "# devhelp"
107 | 
108 | epub:
109 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
110 | 	@echo
111 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
112 | 
113 | latex:
114 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
115 | 	@echo
116 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
117 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
118 | 	      "(use \`make latexpdf' here to do that automatically)."
119 | 
120 | latexpdf:
121 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
122 | 	@echo "Running LaTeX files through pdflatex..."
123 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
124 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
125 | 
126 | latexpdfja:
127 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
128 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
129 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
130 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
131 | 
132 | text:
133 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
134 | 	@echo
135 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
136 | 
137 | man:
138 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
139 | 	@echo
140 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
141 | 
142 | texinfo:
143 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
144 | 	@echo
145 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
146 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
147 | 	      "(use \`make info' here to do that automatically)."
148 | 
149 | info:
150 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
151 | 	@echo "Running Texinfo files through makeinfo..."
152 | 	make -C $(BUILDDIR)/texinfo info
153 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
154 | 
155 | gettext:
156 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
157 | 	@echo
158 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
159 | 
160 | changes:
161 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
162 | 	@echo
163 | 	@echo "The overview file is in $(BUILDDIR)/changes."
164 | 
165 | linkcheck:
166 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
167 | 	@echo
168 | 	@echo "Link check complete; look for any errors in the above output " \
169 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
170 | 
171 | doctest:
172 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
173 | 	@echo "Testing of doctests in the sources finished, look at the " \
174 | 	      "results in $(BUILDDIR)/doctest/output.txt."
175 | 
176 | xml:
177 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
178 | 	@echo
179 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
180 | 
181 | pseudoxml:
182 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
183 | 	@echo
184 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
185 | 


--------------------------------------------------------------------------------
/doc/_templates/class.rst:
--------------------------------------------------------------------------------
 1 | {{ fullname }}
 2 | {{ underline }}
 3 | 
 4 | .. currentmodule:: {{ module }}
 5 | 
 6 | .. autoclass:: {{ objname }}
 7 | 
 8 |    {% block methods %}
 9 |    .. automethod:: __init__
10 |    {% endblock %}
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/doc/_templates/function.rst:
--------------------------------------------------------------------------------
1 | {{ fullname }}
2 | {{ underline }}
3 | 
4 | .. currentmodule:: {{ module }}
5 | 
6 | .. autofunction:: {{ objname }}
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/doc/_templates/layout.html:
--------------------------------------------------------------------------------
 1 | {# Import the theme's layout. #}
 2 | {% extends "!layout.html" %}
 3 | 
 4 | {# remove site and page menus #}
 5 | {%- block sidebartoc %}
 6 | {% endblock %}
 7 | {%- block sidebarrel %}
 8 | {% endblock %}
 9 | 
10 | {%- block navbartoc %}
11 | {% endblock %}
12 | 
13 | {# Include our new CSS file into existing ones. #}
14 | {% set css_files = css_files + ['_static/lightning.css']%}
15 | {% set css_files = css_files + ['_static/bootstrap.min.css']%}
16 | 
17 | {%- block content %}
18 | {{ navBar() }}
19 | <div class="container content-container">
20 |   {% block body %}{% endblock %}
21 | </div>
22 | 
23 | {%- endblock %}
24 | 
25 | 


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # polylearn documentation build configuration file, created by
  4 | # sphinx-quickstart on Mon Jan 18 14:44:12 2016.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import sys
 16 | import os
 17 | 
 18 | import sphinx_bootstrap_theme
 19 | 
 20 | # If extensions (or modules to document with autodoc) are in another directory,
 21 | # add these directories to sys.path here. If the directory is relative to the
 22 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 23 | sys.path.insert(0, os.path.abspath('sphinxext'))
 24 | 
 25 | 
 26 | # -- General configuration ---------------------------------------------------
 27 | 
 28 | # Try to override the matplotlib configuration as early as possible
 29 | try:
 30 |     import gen_rst
 31 | except:
 32 |     pass
 33 | # -- General configuration ------------------------------------------------
 34 | 
 35 | # If your documentation needs a minimal Sphinx version, state it here.
 36 | #needs_sphinx = '1.0'
 37 | 
 38 | # Add any Sphinx extension module names here, as strings. They can be
 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 40 | # ones.
 41 | extensions = [
 42 |     'gen_rst',
 43 |     'sphinx.ext.autodoc',
 44 |     'sphinx.ext.autosummary',
 45 |     'sphinx.ext.doctest',
 46 |     'sphinx.ext.intersphinx',
 47 |     'sphinx.ext.todo',
 48 |     'numpy_ext.numpydoc',
 49 |     'sphinx.ext.pngmath',
 50 |     'sphinx.ext.ifconfig',
 51 |     'sphinx.ext.viewcode',
 52 |     # 'sphinx_gallery.gen_gallery'
 53 | 
 54 | ]
 55 | 
 56 | sphinx_gallery_conf = {
 57 |     # path to your examples scripts
 58 |     'examples_dirs': '../examples',
 59 |     # path where to save gallery generated examples
 60 |     'gallery_dirs': 'auto_examples'}
 61 | 
 62 | autosummary_generate = True
 63 | 
 64 | autodoc_default_flags = ['members', 'inherited-members']
 65 | 
 66 | # Add any paths that contain templates here, relative to this directory.
 67 | templates_path = ['_templates']
 68 | 
 69 | # The suffix of source filenames.
 70 | source_suffix = '.rst'
 71 | 
 72 | # The encoding of source files.
 73 | #source_encoding = 'utf-8-sig'
 74 | 
 75 | # Generate the plots for the gallery
 76 | plot_gallery = True
 77 | 
 78 | # The master toctree document.
 79 | master_doc = 'index'
 80 | 
 81 | # General information about the project.
 82 | project = u'polylearn'
 83 | copyright = u'2016, Vlad Niculae'
 84 | 
 85 | # The version info for the project you're documenting, acts as replacement for
 86 | # |version| and |release|, also used in various other places throughout the
 87 | # built documents.
 88 | #
 89 | # The short X.Y version.
 90 | version = '0.1'
 91 | # The full version, including alpha/beta/rc tags.
 92 | release = '0.1.0'
 93 | 
 94 | # The language for content autogenerated by Sphinx. Refer to documentation
 95 | # for a list of supported languages.
 96 | #language = None
 97 | 
 98 | # There are two options for replacing |today|: either, you set today to some
 99 | # non-false value, then it is used:
100 | #today = ''
101 | # Else, today_fmt is used as the format for a strftime call.
102 | #today_fmt = '%B %d, %Y'
103 | 
104 | # List of patterns, relative to source directory, that match files and
105 | # directories to ignore when looking for source files.
106 | exclude_patterns = ['_build']
107 | 
108 | # The reST default role (used for this markup: `text`) to use for all
109 | # documents.
110 | #default_role = None
111 | 
112 | # If true, '()' will be appended to :func: etc. cross-reference text.
113 | #add_function_parentheses = True
114 | 
115 | # If true, the current module name will be prepended to all description
116 | # unit titles (such as .. function::).
117 | #add_module_names = True
118 | 
119 | # If true, sectionauthor and moduleauthor directives will be shown in the
120 | # output. They are ignored by default.
121 | #show_authors = False
122 | 
123 | # The name of the Pygments (syntax highlighting) style to use.
124 | pygments_style = 'sphinx'
125 | 
126 | # A list of ignored prefixes for module index sorting.
127 | #modindex_common_prefix = []
128 | 
129 | # If true, keep warnings as "system message" paragraphs in the built documents.
130 | #keep_warnings = False
131 | 
132 | 
133 | # -- Options for HTML output ----------------------------------------------
134 | 
135 | # The theme to use for HTML and HTML Help pages.  See the documentation for
136 | # a list of builtin themes.
137 | html_theme = 'bootstrap'
138 | 
139 | # Theme options are theme-specific and customize the look and feel of a theme
140 | # further.  For a list of options available for each theme, see the
141 | # documentation.
142 | html_theme_options = {
143 |     'navbar_links': [
144 |         # ('Introduction', 'intro'),
145 |         ('References', 'references'),
146 |         ('Examples', 'auto_examples/index'),
147 |     ],
148 |     'globaltoc_includehidden': "true",
149 | 
150 |     # Render the next and previous page links in navbar. (Default: true)
151 |     'navbar_sidebarrel': False,
152 | 
153 |     # Render the current pages TOC in the navbar. (Default: true)
154 |     'navbar_pagenav': False,
155 | 
156 | }
157 | 
158 | # Add any paths that contain custom themes here, relative to this directory.
159 | 
160 | html_theme_path = sphinx_bootstrap_theme.get_html_theme_path()
161 | 
162 | # The name for this set of Sphinx documents.  If None, it defaults to
163 | # "<project> v<release> documentation".
164 | #html_title = None
165 | 
166 | # A shorter title for the navigation bar.  Default is the same as html_title.
167 | #html_short_title = None
168 | 
169 | # The name of an image file (relative to this directory) to place at the top
170 | # of the sidebar.
171 | #html_logo = None
172 | 
173 | # The name of an image file (within the static path) to use as favicon of the
174 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
175 | # pixels large.
176 | #html_favicon = None
177 | 
178 | # Add any paths that contain custom static files (such as style sheets) here,
179 | # relative to this directory. They are copied after the builtin static files,
180 | # so a file named "default.css" will overwrite the builtin "default.css".
181 | html_static_path = ['_static']
182 | 
183 | # Add any extra paths that contain custom files (such as robots.txt or
184 | # .htaccess) here, relative to this directory. These files are copied
185 | # directly to the root of the documentation.
186 | #html_extra_path = []
187 | 
188 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
189 | # using the given strftime format.
190 | #html_last_updated_fmt = '%b %d, %Y'
191 | 
192 | # If true, SmartyPants will be used to convert quotes and dashes to
193 | # typographically correct entities.
194 | #html_use_smartypants = True
195 | 
196 | # Custom sidebar templates, maps document names to template names.
197 | #html_sidebars = {}
198 | 
199 | # Additional templates that should be rendered to pages, maps page names to
200 | # template names.
201 | #html_additional_pages = {}
202 | 
203 | # If false, no module index is generated.
204 | #html_domain_indices = True
205 | 
206 | # If false, no index is generated.
207 | #html_use_index = True
208 | 
209 | # If true, the index is split into individual pages for each letter.
210 | #html_split_index = False
211 | 
212 | # If true, links to the reST sources are added to the pages.
213 | #html_show_sourcelink = True
214 | 
215 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
216 | #html_show_sphinx = True
217 | 
218 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
219 | #html_show_copyright = True
220 | 
221 | # If true, an OpenSearch description file will be output, and all pages will
222 | # contain a <link> tag referring to it.  The value of this option must be the
223 | # base URL from which the finished HTML is served.
224 | #html_use_opensearch = ''
225 | 
226 | # This is the file name suffix for HTML files (e.g. ".xhtml").
227 | #html_file_suffix = None
228 | 
229 | # Output file base name for HTML help builder.
230 | htmlhelp_basename = 'polylearndoc'
231 | 
232 | 
233 | # -- Options for LaTeX output ---------------------------------------------
234 | 
235 | latex_elements = {
236 | # The paper size ('letterpaper' or 'a4paper').
237 | #'papersize': 'letterpaper',
238 | 
239 | # The font size ('10pt', '11pt' or '12pt').
240 | #'pointsize': '10pt',
241 | 
242 | # Additional stuff for the LaTeX preamble.
243 | #'preamble': '',
244 | }
245 | 
246 | # Grouping the document tree into LaTeX files. List of tuples
247 | # (source start file, target name, title,
248 | #  author, documentclass [howto, manual, or own class]).
249 | latex_documents = [
250 |   ('index', 'polylearn.tex', u'polylearn documentation',
251 |    u'Vlad Niculae', 'manual'),
252 | ]
253 | 
254 | # The name of an image file (relative to this directory) to place at the top of
255 | # the title page.
256 | #latex_logo = None
257 | 
258 | # For "manual" documents, if this is true, then toplevel headings are parts,
259 | # not chapters.
260 | #latex_use_parts = False
261 | 
262 | # If true, show page references after internal links.
263 | #latex_show_pagerefs = False
264 | 
265 | # If true, show URL addresses after external links.
266 | #latex_show_urls = False
267 | 
268 | # Documents to append as an appendix to all manuals.
269 | #latex_appendices = []
270 | 
271 | # If false, no module index is generated.
272 | #latex_domain_indices = True
273 | 
274 | 
275 | # -- Options for manual page output ---------------------------------------
276 | 
277 | # One entry per manual page. List of tuples
278 | # (source start file, name, description, authors, manual section).
279 | man_pages = [
280 |     ('index', 'polylearn', u'polylearn documentation',
281 |      [u'Vlad Niculae'], 1)
282 | ]
283 | 
284 | # If true, show URL addresses after external links.
285 | #man_show_urls = False
286 | 
287 | 
288 | # -- Options for Texinfo output -------------------------------------------
289 | 
290 | # Grouping the document tree into Texinfo files. List of tuples
291 | # (source start file, target name, title, author,
292 | #  dir menu entry, description, category)
293 | texinfo_documents = [
294 |   ('index', 'polylearn', u'polylearn documentation',
295 |    u'Vlad Niculae', 'polylearn',
296 |    'Factorization machines and polynomial models for machine learning.',
297 |    'Miscellaneous'),
298 | ]
299 | 
300 | def generate_example_rst(app, what, name, obj, options, lines):
301 |     # generate empty examples files, so that we don't get
302 |     # inclusion errors if there are no examples for a class / module
303 |     examples_path = os.path.join(app.srcdir, "modules", "generated",
304 |                                  "%s.examples" % name)
305 |     if not os.path.exists(examples_path):
306 |         # touch file
307 |         open(examples_path, 'w').close()
308 | 
309 | 
310 | def setup(app):
311 |     app.connect('autodoc-process-docstring', generate_example_rst)
312 | 
313 | # Documents to append as an appendix to all manuals.
314 | #texinfo_appendices = []
315 | 
316 | # If false, no module index is generated.
317 | #texinfo_domain_indices = True
318 | 
319 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
320 | #texinfo_show_urls = 'footnote'
321 | 
322 | # If true, do not generate a @detailmenu in the "Top" node's menu.
323 | #texinfo_no_detailmenu = False
324 | 
325 | 
326 | # Example configuration for intersphinx: refer to the Python standard library.
327 | intersphinx_mapping = {'http://docs.python.org/': None}
328 | 


--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../README.rst
2 | 
3 | .. toctree::
4 |     :hidden:
5 | 
6 |     auto_examples/index
7 |     references.rst
8 | 


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  xml        to make Docutils-native XML files
 37 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 38 | 	echo.  linkcheck  to check all external links for integrity
 39 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 40 | 	goto end
 41 | )
 42 | 
 43 | if "%1" == "clean" (
 44 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 45 | 	del /q /s %BUILDDIR%\*
 46 | 	goto end
 47 | )
 48 | 
 49 | 
 50 | %SPHINXBUILD% 2> nul
 51 | if errorlevel 9009 (
 52 | 	echo.
 53 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 54 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 55 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 56 | 	echo.may add the Sphinx directory to PATH.
 57 | 	echo.
 58 | 	echo.If you don't have Sphinx installed, grab it from
 59 | 	echo.http://sphinx-doc.org/
 60 | 	exit /b 1
 61 | )
 62 | 
 63 | if "%1" == "html" (
 64 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "dirhtml" (
 72 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "singlehtml" (
 80 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "pickle" (
 88 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can process the pickle files.
 92 | 	goto end
 93 | )
 94 | 
 95 | if "%1" == "json" (
 96 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 97 | 	if errorlevel 1 exit /b 1
 98 | 	echo.
 99 | 	echo.Build finished; now you can process the JSON files.
100 | 	goto end
101 | )
102 | 
103 | if "%1" == "htmlhelp" (
104 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
105 | 	if errorlevel 1 exit /b 1
106 | 	echo.
107 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
108 | .hhp project file in %BUILDDIR%/htmlhelp.
109 | 	goto end
110 | )
111 | 
112 | if "%1" == "qthelp" (
113 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
114 | 	if errorlevel 1 exit /b 1
115 | 	echo.
116 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
117 | .qhcp project file in %BUILDDIR%/qthelp, like this:
118 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\polylearn.qhcp
119 | 	echo.To view the help file:
120 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\polylearn.ghc
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "devhelp" (
125 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished.
129 | 	goto end
130 | )
131 | 
132 | if "%1" == "epub" (
133 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
134 | 	if errorlevel 1 exit /b 1
135 | 	echo.
136 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
137 | 	goto end
138 | )
139 | 
140 | if "%1" == "latex" (
141 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
142 | 	if errorlevel 1 exit /b 1
143 | 	echo.
144 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
145 | 	goto end
146 | )
147 | 
148 | if "%1" == "latexpdf" (
149 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
150 | 	cd %BUILDDIR%/latex
151 | 	make all-pdf
152 | 	cd %BUILDDIR%/..
153 | 	echo.
154 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
155 | 	goto end
156 | )
157 | 
158 | if "%1" == "latexpdfja" (
159 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
160 | 	cd %BUILDDIR%/latex
161 | 	make all-pdf-ja
162 | 	cd %BUILDDIR%/..
163 | 	echo.
164 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
165 | 	goto end
166 | )
167 | 
168 | if "%1" == "text" (
169 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
170 | 	if errorlevel 1 exit /b 1
171 | 	echo.
172 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
173 | 	goto end
174 | )
175 | 
176 | if "%1" == "man" (
177 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
178 | 	if errorlevel 1 exit /b 1
179 | 	echo.
180 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
181 | 	goto end
182 | )
183 | 
184 | if "%1" == "texinfo" (
185 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
186 | 	if errorlevel 1 exit /b 1
187 | 	echo.
188 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
189 | 	goto end
190 | )
191 | 
192 | if "%1" == "gettext" (
193 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
194 | 	if errorlevel 1 exit /b 1
195 | 	echo.
196 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
197 | 	goto end
198 | )
199 | 
200 | if "%1" == "changes" (
201 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
202 | 	if errorlevel 1 exit /b 1
203 | 	echo.
204 | 	echo.The overview file is in %BUILDDIR%/changes.
205 | 	goto end
206 | )
207 | 
208 | if "%1" == "linkcheck" (
209 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
210 | 	if errorlevel 1 exit /b 1
211 | 	echo.
212 | 	echo.Link check complete; look for any errors in the above output ^
213 | or in %BUILDDIR%/linkcheck/output.txt.
214 | 	goto end
215 | )
216 | 
217 | if "%1" == "doctest" (
218 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
219 | 	if errorlevel 1 exit /b 1
220 | 	echo.
221 | 	echo.Testing of doctests in the sources finished, look at the ^
222 | results in %BUILDDIR%/doctest/output.txt.
223 | 	goto end
224 | )
225 | 
226 | if "%1" == "xml" (
227 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
228 | 	if errorlevel 1 exit /b 1
229 | 	echo.
230 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
231 | 	goto end
232 | )
233 | 
234 | if "%1" == "pseudoxml" (
235 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
236 | 	if errorlevel 1 exit /b 1
237 | 	echo.
238 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
239 | 	goto end
240 | )
241 | 
242 | :end
243 | 


--------------------------------------------------------------------------------
/doc/references.rst:
--------------------------------------------------------------------------------
 1 | .. toctree::
 2 |    :maxdepth: 2
 3 | 
 4 | polylearn reference
 5 | ===================
 6 | 
 7 | .. _factorization_machine:
 8 | 
 9 | Factorization Machines
10 | ----------------------
11 | 
12 | .. automodule:: polylearn.factorization_machine
13 |    :no-members:
14 |    :no-inherited-members:
15 | 
16 | .. currentmodule:: polylearn
17 | 
18 | .. autosummary::
19 |    :toctree: generated/
20 |    :template: class.rst
21 | 
22 |    FactorizationMachineClassifier
23 |    FactorizationMachineRegressor
24 | 
25 | 
26 | .. _polynomial_network:
27 | 
28 | Polynomial Networks
29 | -------------------
30 | 
31 | .. automodule:: polylearn.polynomial_network
32 |    :no-members:
33 |    :no-inherited-members:
34 | 
35 | .. currentmodule:: polylearn
36 | 
37 | .. autosummary::
38 |    :toctree: generated/
39 |    :template: class.rst
40 | 
41 |    PolynomialNetworkClassifier
42 |    PolynomialNetworkRegressor
43 | 
44 | 
45 | .. kernels:
46 | 
47 | Utilities for computing kernels
48 | -------------------------------
49 | 
50 | .. currentmodule:: polylearn
51 | 
52 | .. autosummary::
53 |    :toctree: generated/
54 |    :template: function.rst
55 | 
56 |    kernels.anova_kernel
57 |    kernels.homogeneous_kernel
58 |    kernels.safe_power


--------------------------------------------------------------------------------
/doc/sphinxext/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | -------------------------------------------------------------------------------
 2 |     The files
 3 |     - numpydoc.py
 4 |     - autosummary.py
 5 |     - autosummary_generate.py
 6 |     - docscrape.py
 7 |     - docscrape_sphinx.py
 8 |     - phantom_import.py
 9 |     have the following license:
10 | 
11 | Copyright (C) 2008 Stefan van der Walt <stefan@mentat.za.net>, Pauli Virtanen <pav@iki.fi>
12 | 
13 | Redistribution and use in source and binary forms, with or without
14 | modification, are permitted provided that the following conditions are
15 | met:
16 | 
17 |  1. Redistributions of source code must retain the above copyright
18 |     notice, this list of conditions and the following disclaimer.
19 |  2. Redistributions in binary form must reproduce the above copyright
20 |     notice, this list of conditions and the following disclaimer in
21 |     the documentation and/or other materials provided with the
22 |     distribution.
23 | 
24 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
26 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27 | DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
28 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 | IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 | POSSIBILITY OF SUCH DAMAGE.
35 | 
36 | -------------------------------------------------------------------------------
37 |     The files
38 |     - compiler_unparse.py
39 |     - comment_eater.py
40 |     - traitsdoc.py
41 |     have the following license:
42 | 
43 | This software is OSI Certified Open Source Software.
44 | OSI Certified is a certification mark of the Open Source Initiative.
45 | 
46 | Copyright (c) 2006, Enthought, Inc.
47 | All rights reserved.
48 | 
49 | Redistribution and use in source and binary forms, with or without
50 | modification, are permitted provided that the following conditions are met:
51 | 
52 |  * Redistributions of source code must retain the above copyright notice, this
53 |    list of conditions and the following disclaimer.
54 |  * Redistributions in binary form must reproduce the above copyright notice,
55 |    this list of conditions and the following disclaimer in the documentation
56 |    and/or other materials provided with the distribution.
57 |  * Neither the name of Enthought, Inc. nor the names of its contributors may
58 |    be used to endorse or promote products derived from this software without
59 |    specific prior written permission.
60 | 
61 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
62 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
63 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
64 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
65 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
66 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
67 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
68 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
69 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
70 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
71 | 
72 | 
73 | -------------------------------------------------------------------------------
74 |     The files
75 |     - only_directives.py
76 |     - plot_directive.py
77 |     originate from Matplotlib (http://matplotlib.sf.net/) which has
78 |     the following license:
79 | 
80 | Copyright (c) 2002-2008 John D. Hunter; All Rights Reserved.
81 | 
82 | 1. This LICENSE AGREEMENT is between John D. Hunter (“JDH”), and the Individual or Organization (“Licensee”) accessing and otherwise using matplotlib software in source or binary form and its associated documentation.
83 | 
84 | 2. Subject to the terms and conditions of this License Agreement, JDH hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use matplotlib 0.98.3 alone or in any derivative version, provided, however, that JDH’s License Agreement and JDH’s notice of copyright, i.e., “Copyright (c) 2002-2008 John D. Hunter; All Rights Reserved” are retained in matplotlib 0.98.3 alone or in any derivative version prepared by Licensee.
85 | 
86 | 3. In the event Licensee prepares a derivative work that is based on or incorporates matplotlib 0.98.3 or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to matplotlib 0.98.3.
87 | 
88 | 4. JDH is making matplotlib 0.98.3 available to Licensee on an “AS IS” basis. JDH MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, JDH MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB 0.98.3 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
89 | 
90 | 5. JDH SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB 0.98.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING MATPLOTLIB 0.98.3, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
91 | 
92 | 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions.
93 | 
94 | 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between JDH and Licensee. This License Agreement does not grant permission to use JDH trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party.
95 | 
96 | 8. By copying, installing or otherwise using matplotlib 0.98.3, Licensee agrees to be bound by the terms and conditions of this License Agreement.
97 | 
98 | 


--------------------------------------------------------------------------------
/doc/sphinxext/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include tests *.py
2 | include *.txt
3 | 


--------------------------------------------------------------------------------
/doc/sphinxext/README.txt:
--------------------------------------------------------------------------------
 1 | =====================================
 2 | numpydoc -- Numpy's Sphinx extensions
 3 | =====================================
 4 | 
 5 | Numpy's documentation uses several custom extensions to Sphinx.  These
 6 | are shipped in this ``numpydoc`` package, in case you want to make use
 7 | of them in third-party projects.
 8 | 
 9 | The following extensions are available:
10 | 
11 |   - ``numpydoc``: support for the Numpy docstring format in Sphinx, and add
12 |     the code description directives ``np-function``, ``np-cfunction``, etc.
13 |     that support the Numpy docstring syntax.
14 | 
15 |   - ``numpydoc.traitsdoc``: For gathering documentation about Traits attributes.
16 | 
17 |   - ``numpydoc.plot_directives``: Adaptation of Matplotlib's ``plot::``
18 |     directive. Note that this implementation may still undergo severe
19 |     changes or eventually be deprecated.
20 | 
21 |   - ``numpydoc.only_directives``: (DEPRECATED)
22 | 
23 |   - ``numpydoc.autosummary``: (DEPRECATED) An ``autosummary::`` directive.
24 |     Available in Sphinx 0.6.2 and (to-be) 1.0 as ``sphinx.ext.autosummary``,
25 |     and it the Sphinx 1.0 version is recommended over that included in
26 |     Numpydoc.
27 | 
28 | 
29 | numpydoc
30 | ========
31 | 
32 | Numpydoc inserts a hook into Sphinx's autodoc that converts docstrings
33 | following the Numpy/Scipy format to a form palatable to Sphinx.
34 | 
35 | Options
36 | -------
37 | 
38 | The following options can be set in conf.py:
39 | 
40 | - numpydoc_use_plots: bool
41 | 
42 |   Whether to produce ``plot::`` directives for Examples sections that
43 |   contain ``import matplotlib``.
44 | 
45 | - numpydoc_show_class_members: bool
46 | 
47 |   Whether to show all members of a class in the Methods and Attributes
48 |   sections automatically.
49 | 
50 | - numpydoc_edit_link: bool  (DEPRECATED -- edit your HTML template instead)
51 | 
52 |   Whether to insert an edit link after docstrings.
53 | 


--------------------------------------------------------------------------------
/doc/sphinxext/numpy_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/polylearn/4dd9d4b8aca029628a4c934829526b8552db2e1b/doc/sphinxext/numpy_ext/__init__.py


--------------------------------------------------------------------------------
/doc/sphinxext/numpy_ext/docscrape.py:
--------------------------------------------------------------------------------
  1 | """Extract reference documentation from the NumPy source tree.
  2 | 
  3 | """
  4 | 
  5 | import inspect
  6 | import textwrap
  7 | import re
  8 | import pydoc
  9 | from warnings import warn
 10 | # Try Python 2 first, otherwise load from Python 3
 11 | try:
 12 |     from StringIO import StringIO
 13 | except:
 14 |     from io import StringIO
 15 | 
 16 | 
 17 | class Reader(object):
 18 |     """A line-based string reader.
 19 | 
 20 |     """
 21 |     def __init__(self, data):
 22 |         """
 23 |         Parameters
 24 |         ----------
 25 |         data : str
 26 |            String with lines separated by '\n'.
 27 | 
 28 |         """
 29 |         if isinstance(data, list):
 30 |             self._str = data
 31 |         else:
 32 |             self._str = data.split('\n')  # store string as list of lines
 33 | 
 34 |         self.reset()
 35 | 
 36 |     def __getitem__(self, n):
 37 |         return self._str[n]
 38 | 
 39 |     def reset(self):
 40 |         self._l = 0  # current line nr
 41 | 
 42 |     def read(self):
 43 |         if not self.eof():
 44 |             out = self[self._l]
 45 |             self._l += 1
 46 |             return out
 47 |         else:
 48 |             return ''
 49 | 
 50 |     def seek_next_non_empty_line(self):
 51 |         for l in self[self._l:]:
 52 |             if l.strip():
 53 |                 break
 54 |             else:
 55 |                 self._l += 1
 56 | 
 57 |     def eof(self):
 58 |         return self._l >= len(self._str)
 59 | 
 60 |     def read_to_condition(self, condition_func):
 61 |         start = self._l
 62 |         for line in self[start:]:
 63 |             if condition_func(line):
 64 |                 return self[start:self._l]
 65 |             self._l += 1
 66 |             if self.eof():
 67 |                 return self[start:self._l + 1]
 68 |         return []
 69 | 
 70 |     def read_to_next_empty_line(self):
 71 |         self.seek_next_non_empty_line()
 72 | 
 73 |         def is_empty(line):
 74 |             return not line.strip()
 75 |         return self.read_to_condition(is_empty)
 76 | 
 77 |     def read_to_next_unindented_line(self):
 78 |         def is_unindented(line):
 79 |             return (line.strip() and (len(line.lstrip()) == len(line)))
 80 |         return self.read_to_condition(is_unindented)
 81 | 
 82 |     def peek(self, n=0):
 83 |         if self._l + n < len(self._str):
 84 |             return self[self._l + n]
 85 |         else:
 86 |             return ''
 87 | 
 88 |     def is_empty(self):
 89 |         return not ''.join(self._str).strip()
 90 | 
 91 | 
 92 | class NumpyDocString(object):
 93 |     def __init__(self, docstring, config={}):
 94 |         docstring = textwrap.dedent(docstring).split('\n')
 95 | 
 96 |         self._doc = Reader(docstring)
 97 |         self._parsed_data = {
 98 |             'Signature': '',
 99 |             'Summary': [''],
100 |             'Extended Summary': [],
101 |             'Parameters': [],
102 |             'Returns': [],
103 |             'Raises': [],
104 |             'Warns': [],
105 |             'Other Parameters': [],
106 |             'Attributes': [],
107 |             'Methods': [],
108 |             'See Also': [],
109 |             'Notes': [],
110 |             'Warnings': [],
111 |             'References': '',
112 |             'Examples': '',
113 |             'index': {}
114 |             }
115 | 
116 |         self._parse()
117 | 
118 |     def __getitem__(self, key):
119 |         return self._parsed_data[key]
120 | 
121 |     def __setitem__(self, key, val):
122 |         if key not in self._parsed_data:
123 |             warn("Unknown section %s" % key)
124 |         else:
125 |             self._parsed_data[key] = val
126 | 
127 |     def _is_at_section(self):
128 |         self._doc.seek_next_non_empty_line()
129 | 
130 |         if self._doc.eof():
131 |             return False
132 | 
133 |         l1 = self._doc.peek().strip()  # e.g. Parameters
134 | 
135 |         if l1.startswith('.. index::'):
136 |             return True
137 | 
138 |         l2 = self._doc.peek(1).strip()   # ---------- or ==========
139 |         return l2.startswith('-' * len(l1)) or l2.startswith('=' * len(l1))
140 | 
141 |     def _strip(self, doc):
142 |         i = 0
143 |         j = 0
144 |         for i, line in enumerate(doc):
145 |             if line.strip():
146 |                 break
147 | 
148 |         for j, line in enumerate(doc[::-1]):
149 |             if line.strip():
150 |                 break
151 | 
152 |         return doc[i:len(doc) - j]
153 | 
154 |     def _read_to_next_section(self):
155 |         section = self._doc.read_to_next_empty_line()
156 | 
157 |         while not self._is_at_section() and not self._doc.eof():
158 |             if not self._doc.peek(-1).strip():  # previous line was empty
159 |                 section += ['']
160 | 
161 |             section += self._doc.read_to_next_empty_line()
162 | 
163 |         return section
164 | 
165 |     def _read_sections(self):
166 |         while not self._doc.eof():
167 |             data = self._read_to_next_section()
168 |             name = data[0].strip()
169 | 
170 |             if name.startswith('..'):  # index section
171 |                 yield name, data[1:]
172 |             elif len(data) < 2:
173 |                 yield StopIteration
174 |             else:
175 |                 yield name, self._strip(data[2:])
176 | 
177 |     def _parse_param_list(self, content):
178 |         r = Reader(content)
179 |         params = []
180 |         while not r.eof():
181 |             header = r.read().strip()
182 |             if ' : ' in header:
183 |                 arg_name, arg_type = header.split(' : ')[:2]
184 |             else:
185 |                 arg_name, arg_type = header, ''
186 | 
187 |             desc = r.read_to_next_unindented_line()
188 |             desc = dedent_lines(desc)
189 | 
190 |             params.append((arg_name, arg_type, desc))
191 | 
192 |         return params
193 | 
194 |     _name_rgx = re.compile(r"^\s*(:(?P<role>\w+):`(?P<name>[a-zA-Z0-9_.-]+)`|"
195 |                            r" (?P<name2>[a-zA-Z0-9_.-]+))\s*", re.X)
196 | 
197 |     def _parse_see_also(self, content):
198 |         """
199 |         func_name : Descriptive text
200 |             continued text
201 |         another_func_name : Descriptive text
202 |         func_name1, func_name2, :meth:`func_name`, func_name3
203 | 
204 |         """
205 |         items = []
206 | 
207 |         def parse_item_name(text):
208 |             """Match ':role:`name`' or 'name'"""
209 |             m = self._name_rgx.match(text)
210 |             if m:
211 |                 g = m.groups()
212 |                 if g[1] is None:
213 |                     return g[3], None
214 |                 else:
215 |                     return g[2], g[1]
216 |             raise ValueError("%s is not a item name" % text)
217 | 
218 |         def push_item(name, rest):
219 |             if not name:
220 |                 return
221 |             name, role = parse_item_name(name)
222 |             items.append((name, list(rest), role))
223 |             del rest[:]
224 | 
225 |         current_func = None
226 |         rest = []
227 | 
228 |         for line in content:
229 |             if not line.strip():
230 |                 continue
231 | 
232 |             m = self._name_rgx.match(line)
233 |             if m and line[m.end():].strip().startswith(':'):
234 |                 push_item(current_func, rest)
235 |                 current_func, line = line[:m.end()], line[m.end():]
236 |                 rest = [line.split(':', 1)[1].strip()]
237 |                 if not rest[0]:
238 |                     rest = []
239 |             elif not line.startswith(' '):
240 |                 push_item(current_func, rest)
241 |                 current_func = None
242 |                 if ',' in line:
243 |                     for func in line.split(','):
244 |                         push_item(func, [])
245 |                 elif line.strip():
246 |                     current_func = line
247 |             elif current_func is not None:
248 |                 rest.append(line.strip())
249 |         push_item(current_func, rest)
250 |         return items
251 | 
252 |     def _parse_index(self, section, content):
253 |         """
254 |         .. index: default
255 |            :refguide: something, else, and more
256 | 
257 |         """
258 |         def strip_each_in(lst):
259 |             return [s.strip() for s in lst]
260 | 
261 |         out = {}
262 |         section = section.split('::')
263 |         if len(section) > 1:
264 |             out['default'] = strip_each_in(section[1].split(','))[0]
265 |         for line in content:
266 |             line = line.split(':')
267 |             if len(line) > 2:
268 |                 out[line[1]] = strip_each_in(line[2].split(','))
269 |         return out
270 | 
271 |     def _parse_summary(self):
272 |         """Grab signature (if given) and summary"""
273 |         if self._is_at_section():
274 |             return
275 | 
276 |         summary = self._doc.read_to_next_empty_line()
277 |         summary_str = " ".join([s.strip() for s in summary]).strip()
278 |         if re.compile('^([\w., ]+=)?\s*[\w\.]+\(.*\)$').match(summary_str):
279 |             self['Signature'] = summary_str
280 |             if not self._is_at_section():
281 |                 self['Summary'] = self._doc.read_to_next_empty_line()
282 |         else:
283 |             self['Summary'] = summary
284 | 
285 |         if not self._is_at_section():
286 |             self['Extended Summary'] = self._read_to_next_section()
287 | 
288 |     def _parse(self):
289 |         self._doc.reset()
290 |         self._parse_summary()
291 | 
292 |         for (section, content) in self._read_sections():
293 |             if not section.startswith('..'):
294 |                 section = ' '.join([s.capitalize()
295 |                                     for s in section.split(' ')])
296 |             if section in ('Parameters', 'Attributes', 'Methods',
297 |                            'Returns', 'Raises', 'Warns'):
298 |                 self[section] = self._parse_param_list(content)
299 |             elif section.startswith('.. index::'):
300 |                 self['index'] = self._parse_index(section, content)
301 |             elif section == 'See Also':
302 |                 self['See Also'] = self._parse_see_also(content)
303 |             else:
304 |                 self[section] = content
305 | 
306 |     # string conversion routines
307 | 
308 |     def _str_header(self, name, symbol='-'):
309 |         return [name, len(name) * symbol]
310 | 
311 |     def _str_indent(self, doc, indent=4):
312 |         out = []
313 |         for line in doc:
314 |             out += [' ' * indent + line]
315 |         return out
316 | 
317 |     def _str_signature(self):
318 |         if self['Signature']:
319 |             return [self['Signature'].replace('*', '\*')] + ['']
320 |         else:
321 |             return ['']
322 | 
323 |     def _str_summary(self):
324 |         if self['Summary']:
325 |             return self['Summary'] + ['']
326 |         else:
327 |             return []
328 | 
329 |     def _str_extended_summary(self):
330 |         if self['Extended Summary']:
331 |             return self['Extended Summary'] + ['']
332 |         else:
333 |             return []
334 | 
335 |     def _str_param_list(self, name):
336 |         out = []
337 |         if self[name]:
338 |             out += self._str_header(name)
339 |             for param, param_type, desc in self[name]:
340 |                 out += ['%s : %s' % (param, param_type)]
341 |                 out += self._str_indent(desc)
342 |             out += ['']
343 |         return out
344 | 
345 |     def _str_section(self, name):
346 |         out = []
347 |         if self[name]:
348 |             out += self._str_header(name)
349 |             out += self[name]
350 |             out += ['']
351 |         return out
352 | 
353 |     def _str_see_also(self, func_role):
354 |         if not self['See Also']:
355 |             return []
356 |         out = []
357 |         out += self._str_header("See Also")
358 |         last_had_desc = True
359 |         for func, desc, role in self['See Also']:
360 |             if role:
361 |                 link = ':%s:`%s`' % (role, func)
362 |             elif func_role:
363 |                 link = ':%s:`%s`' % (func_role, func)
364 |             else:
365 |                 link = "`%s`_" % func
366 |             if desc or last_had_desc:
367 |                 out += ['']
368 |                 out += [link]
369 |             else:
370 |                 out[-1] += ", %s" % link
371 |             if desc:
372 |                 out += self._str_indent([' '.join(desc)])
373 |                 last_had_desc = True
374 |             else:
375 |                 last_had_desc = False
376 |         out += ['']
377 |         return out
378 | 
379 |     def _str_index(self):
380 |         idx = self['index']
381 |         out = []
382 |         out += ['.. index:: %s' % idx.get('default', '')]
383 |         for section, references in idx.iteritems():
384 |             if section == 'default':
385 |                 continue
386 |             out += ['   :%s: %s' % (section, ', '.join(references))]
387 |         return out
388 | 
389 |     def __str__(self, func_role=''):
390 |         out = []
391 |         out += self._str_signature()
392 |         out += self._str_summary()
393 |         out += self._str_extended_summary()
394 |         for param_list in ('Parameters', 'Returns', 'Raises'):
395 |             out += self._str_param_list(param_list)
396 |         out += self._str_section('Warnings')
397 |         out += self._str_see_also(func_role)
398 |         for s in ('Notes', 'References', 'Examples'):
399 |             out += self._str_section(s)
400 |         for param_list in ('Attributes', 'Methods'):
401 |             out += self._str_param_list(param_list)
402 |         out += self._str_index()
403 |         return '\n'.join(out)
404 | 
405 | 
406 | def indent(str, indent=4):
407 |     indent_str = ' ' * indent
408 |     if str is None:
409 |         return indent_str
410 |     lines = str.split('\n')
411 |     return '\n'.join(indent_str + l for l in lines)
412 | 
413 | 
414 | def dedent_lines(lines):
415 |     """Deindent a list of lines maximally"""
416 |     return textwrap.dedent("\n".join(lines)).split("\n")
417 | 
418 | 
419 | def header(text, style='-'):
420 |     return text + '\n' + style * len(text) + '\n'
421 | 
422 | 
423 | class FunctionDoc(NumpyDocString):
424 |     def __init__(self, func, role='func', doc=None, config={}):
425 |         self._f = func
426 |         self._role = role  # e.g. "func" or "meth"
427 | 
428 |         if doc is None:
429 |             if func is None:
430 |                 raise ValueError("No function or docstring given")
431 |             doc = inspect.getdoc(func) or ''
432 |         NumpyDocString.__init__(self, doc)
433 | 
434 |         if not self['Signature'] and func is not None:
435 |             func, func_name = self.get_func()
436 |             try:
437 |                 # try to read signature
438 |                 argspec = inspect.getargspec(func)
439 |                 argspec = inspect.formatargspec(*argspec)
440 |                 argspec = argspec.replace('*', '\*')
441 |                 signature = '%s%s' % (func_name, argspec)
442 |             except TypeError as e:
443 |                 signature = '%s()' % func_name
444 |             self['Signature'] = signature
445 | 
446 |     def get_func(self):
447 |         func_name = getattr(self._f, '__name__', self.__class__.__name__)
448 |         if inspect.isclass(self._f):
449 |             func = getattr(self._f, '__call__', self._f.__init__)
450 |         else:
451 |             func = self._f
452 |         return func, func_name
453 | 
454 |     def __str__(self):
455 |         out = ''
456 | 
457 |         func, func_name = self.get_func()
458 |         signature = self['Signature'].replace('*', '\*')
459 | 
460 |         roles = {'func': 'function',
461 |                  'meth': 'method'}
462 | 
463 |         if self._role:
464 |             if self._role not in roles:
465 |                 print("Warning: invalid role %s" % self._role)
466 |             out += '.. %s:: %s\n    \n\n' % (roles.get(self._role, ''),
467 |                                              func_name)
468 | 
469 |         out += super(FunctionDoc, self).__str__(func_role=self._role)
470 |         return out
471 | 
472 | 
473 | class ClassDoc(NumpyDocString):
474 |     def __init__(self, cls, doc=None, modulename='', func_doc=FunctionDoc,
475 |                  config=None):
476 |         if not inspect.isclass(cls) and cls is not None:
477 |             raise ValueError("Expected a class or None, but got %r" % cls)
478 |         self._cls = cls
479 | 
480 |         if modulename and not modulename.endswith('.'):
481 |             modulename += '.'
482 |         self._mod = modulename
483 | 
484 |         if doc is None:
485 |             if cls is None:
486 |                 raise ValueError("No class or documentation string given")
487 |             doc = pydoc.getdoc(cls)
488 | 
489 |         NumpyDocString.__init__(self, doc)
490 | 
491 |         if config is not None and config.get('show_class_members', True):
492 |             if not self['Methods']:
493 |                 self['Methods'] = [(name, '', '')
494 |                                    for name in sorted(self.methods)]
495 |             if not self['Attributes']:
496 |                 self['Attributes'] = [(name, '', '')
497 |                                       for name in sorted(self.properties)]
498 | 
499 |     @property
500 |     def methods(self):
501 |         if self._cls is None:
502 |             return []
503 |         return [name for name, func in inspect.getmembers(self._cls)
504 |                 if not name.startswith('_') and callable(func)]
505 | 
506 |     @property
507 |     def properties(self):
508 |         if self._cls is None:
509 |             return []
510 |         return [name for name, func in inspect.getmembers(self._cls)
511 |                 if not name.startswith('_') and func is None]
512 | 


--------------------------------------------------------------------------------
/doc/sphinxext/numpy_ext/docscrape_sphinx.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import inspect
  3 | import textwrap
  4 | import pydoc
  5 | from .docscrape import NumpyDocString
  6 | from .docscrape import FunctionDoc
  7 | from .docscrape import ClassDoc
  8 | 
  9 | 
 10 | class SphinxDocString(NumpyDocString):
 11 |     def __init__(self, docstring, config=None):
 12 |         config = {} if config is None else config
 13 |         self.use_plots = config.get('use_plots', False)
 14 |         NumpyDocString.__init__(self, docstring, config=config)
 15 | 
 16 |     # string conversion routines
 17 |     def _str_header(self, name, symbol='`'):
 18 |         return ['.. rubric:: ' + name, '']
 19 | 
 20 |     def _str_field_list(self, name):
 21 |         return [':' + name + ':']
 22 | 
 23 |     def _str_indent(self, doc, indent=4):
 24 |         out = []
 25 |         for line in doc:
 26 |             out += [' ' * indent + line]
 27 |         return out
 28 | 
 29 |     def _str_signature(self):
 30 |         return ['']
 31 |         if self['Signature']:
 32 |             return ['``%s``' % self['Signature']] + ['']
 33 |         else:
 34 |             return ['']
 35 | 
 36 |     def _str_summary(self):
 37 |         return self['Summary'] + ['']
 38 | 
 39 |     def _str_extended_summary(self):
 40 |         return self['Extended Summary'] + ['']
 41 | 
 42 |     def _str_param_list(self, name):
 43 |         out = []
 44 |         if self[name]:
 45 |             out += self._str_field_list(name)
 46 |             out += ['']
 47 |             for param, param_type, desc in self[name]:
 48 |                 out += self._str_indent(['**%s** : %s' % (param.strip(),
 49 |                                                           param_type)])
 50 |                 out += ['']
 51 |                 out += self._str_indent(desc, 8)
 52 |                 out += ['']
 53 |         return out
 54 | 
 55 |     @property
 56 |     def _obj(self):
 57 |         if hasattr(self, '_cls'):
 58 |             return self._cls
 59 |         elif hasattr(self, '_f'):
 60 |             return self._f
 61 |         return None
 62 | 
 63 |     def _str_member_list(self, name):
 64 |         """
 65 |         Generate a member listing, autosummary:: table where possible,
 66 |         and a table where not.
 67 | 
 68 |         """
 69 |         out = []
 70 |         if self[name]:
 71 |             out += ['.. rubric:: %s' % name, '']
 72 |             prefix = getattr(self, '_name', '')
 73 | 
 74 |             if prefix:
 75 |                 prefix = '~%s.' % prefix
 76 | 
 77 |             autosum = []
 78 |             others = []
 79 |             for param, param_type, desc in self[name]:
 80 |                 param = param.strip()
 81 |                 if not self._obj or hasattr(self._obj, param):
 82 |                     autosum += ["   %s%s" % (prefix, param)]
 83 |                 else:
 84 |                     others.append((param, param_type, desc))
 85 | 
 86 |             if autosum:
 87 |                 # GAEL: Toctree commented out below because it creates
 88 |                 # hundreds of sphinx warnings
 89 |                 # out += ['.. autosummary::', '   :toctree:', '']
 90 |                 out += ['.. autosummary::', '']
 91 |                 out += autosum
 92 | 
 93 |             if others:
 94 |                 maxlen_0 = max([len(x[0]) for x in others])
 95 |                 maxlen_1 = max([len(x[1]) for x in others])
 96 |                 hdr = "=" * maxlen_0 + "  " + "=" * maxlen_1 + "  " + "=" * 10
 97 |                 fmt = '%%%ds  %%%ds  ' % (maxlen_0, maxlen_1)
 98 |                 n_indent = maxlen_0 + maxlen_1 + 4
 99 |                 out += [hdr]
100 |                 for param, param_type, desc in others:
101 |                     out += [fmt % (param.strip(), param_type)]
102 |                     out += self._str_indent(desc, n_indent)
103 |                 out += [hdr]
104 |             out += ['']
105 |         return out
106 | 
107 |     def _str_section(self, name):
108 |         out = []
109 |         if self[name]:
110 |             out += self._str_header(name)
111 |             out += ['']
112 |             content = textwrap.dedent("\n".join(self[name])).split("\n")
113 |             out += content
114 |             out += ['']
115 |         return out
116 | 
117 |     def _str_see_also(self, func_role):
118 |         out = []
119 |         if self['See Also']:
120 |             see_also = super(SphinxDocString, self)._str_see_also(func_role)
121 |             out = ['.. seealso::', '']
122 |             out += self._str_indent(see_also[2:])
123 |         return out
124 | 
125 |     def _str_warnings(self):
126 |         out = []
127 |         if self['Warnings']:
128 |             out = ['.. warning::', '']
129 |             out += self._str_indent(self['Warnings'])
130 |         return out
131 | 
132 |     def _str_index(self):
133 |         idx = self['index']
134 |         out = []
135 |         if len(idx) == 0:
136 |             return out
137 | 
138 |         out += ['.. index:: %s' % idx.get('default', '')]
139 |         for section, references in idx.iteritems():
140 |             if section == 'default':
141 |                 continue
142 |             elif section == 'refguide':
143 |                 out += ['   single: %s' % (', '.join(references))]
144 |             else:
145 |                 out += ['   %s: %s' % (section, ','.join(references))]
146 |         return out
147 | 
148 |     def _str_references(self):
149 |         out = []
150 |         if self['References']:
151 |             out += self._str_header('References')
152 |             if isinstance(self['References'], str):
153 |                 self['References'] = [self['References']]
154 |             out.extend(self['References'])
155 |             out += ['']
156 |             # Latex collects all references to a separate bibliography,
157 |             # so we need to insert links to it
158 |             import sphinx  # local import to avoid test dependency
159 |             if sphinx.__version__ >= "0.6":
160 |                 out += ['.. only:: latex', '']
161 |             else:
162 |                 out += ['.. latexonly::', '']
163 |             items = []
164 |             for line in self['References']:
165 |                 m = re.match(r'.. \[([a-z0-9._-]+)\]', line, re.I)
166 |                 if m:
167 |                     items.append(m.group(1))
168 |             out += ['   ' + ", ".join(["[%s]_" % item for item in items]), '']
169 |         return out
170 | 
171 |     def _str_examples(self):
172 |         examples_str = "\n".join(self['Examples'])
173 | 
174 |         if (self.use_plots and 'import matplotlib' in examples_str
175 |                 and 'plot::' not in examples_str):
176 |             out = []
177 |             out += self._str_header('Examples')
178 |             out += ['.. plot::', '']
179 |             out += self._str_indent(self['Examples'])
180 |             out += ['']
181 |             return out
182 |         else:
183 |             return self._str_section('Examples')
184 | 
185 |     def __str__(self, indent=0, func_role="obj"):
186 |         out = []
187 |         out += self._str_signature()
188 |         out += self._str_index() + ['']
189 |         out += self._str_summary()
190 |         out += self._str_extended_summary()
191 |         for param_list in ('Parameters', 'Returns', 'Raises', 'Attributes'):
192 |             out += self._str_param_list(param_list)
193 |         out += self._str_warnings()
194 |         out += self._str_see_also(func_role)
195 |         out += self._str_section('Notes')
196 |         out += self._str_references()
197 |         out += self._str_examples()
198 |         for param_list in ('Methods',):
199 |             out += self._str_member_list(param_list)
200 |         out = self._str_indent(out, indent)
201 |         return '\n'.join(out)
202 | 
203 | 
204 | class SphinxFunctionDoc(SphinxDocString, FunctionDoc):
205 |     def __init__(self, obj, doc=None, config={}):
206 |         self.use_plots = config.get('use_plots', False)
207 |         FunctionDoc.__init__(self, obj, doc=doc, config=config)
208 | 
209 | 
210 | class SphinxClassDoc(SphinxDocString, ClassDoc):
211 |     def __init__(self, obj, doc=None, func_doc=None, config={}):
212 |         self.use_plots = config.get('use_plots', False)
213 |         ClassDoc.__init__(self, obj, doc=doc, func_doc=None, config=config)
214 | 
215 | 
216 | class SphinxObjDoc(SphinxDocString):
217 |     def __init__(self, obj, doc=None, config=None):
218 |         self._f = obj
219 |         SphinxDocString.__init__(self, doc, config=config)
220 | 
221 | 
222 | def get_doc_object(obj, what=None, doc=None, config={}):
223 |     if what is None:
224 |         if inspect.isclass(obj):
225 |             what = 'class'
226 |         elif inspect.ismodule(obj):
227 |             what = 'module'
228 |         elif callable(obj):
229 |             what = 'function'
230 |         else:
231 |             what = 'object'
232 |     if what == 'class':
233 |         return SphinxClassDoc(obj, func_doc=SphinxFunctionDoc, doc=doc,
234 |                               config=config)
235 |     elif what in ('function', 'method'):
236 |         return SphinxFunctionDoc(obj, doc=doc, config=config)
237 |     else:
238 |         if doc is None:
239 |             doc = pydoc.getdoc(obj)
240 |         return SphinxObjDoc(obj, doc, config=config)
241 | 


--------------------------------------------------------------------------------
/doc/sphinxext/numpy_ext/numpydoc.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ========
  3 | numpydoc
  4 | ========
  5 | 
  6 | Sphinx extension that handles docstrings in the Numpy standard format. [1]
  7 | 
  8 | It will:
  9 | 
 10 | - Convert Parameters etc. sections to field lists.
 11 | - Convert See Also section to a See also entry.
 12 | - Renumber references.
 13 | - Extract the signature from the docstring, if it can't be determined
 14 |   otherwise.
 15 | 
 16 | .. [1] http://projects.scipy.org/numpy/wiki/CodingStyleGuidelines#docstring-standard
 17 | 
 18 | """
 19 | 
 20 | from __future__ import unicode_literals
 21 | 
 22 | import sys # Only needed to check Python version
 23 | import os
 24 | import re
 25 | import pydoc
 26 | from .docscrape_sphinx import get_doc_object
 27 | from .docscrape_sphinx import SphinxDocString
 28 | import inspect
 29 | 
 30 | 
 31 | def mangle_docstrings(app, what, name, obj, options, lines,
 32 |                       reference_offset=[0]):
 33 | 
 34 |     cfg = dict(use_plots=app.config.numpydoc_use_plots,
 35 |                show_class_members=app.config.numpydoc_show_class_members)
 36 | 
 37 |     if what == 'module':
 38 |         # Strip top title
 39 |         title_re = re.compile(r'^\s*[#*=]{4,}\n[a-z0-9 -]+\n[#*=]{4,}\s*',
 40 |                               re.I | re.S)
 41 |         lines[:] = title_re.sub('', "\n".join(lines)).split("\n")
 42 |     else:
 43 |         doc = get_doc_object(obj, what, "\n".join(lines), config=cfg)
 44 |         if sys.version_info[0] < 3:
 45 |             lines[:] = unicode(doc).splitlines()
 46 |         else:
 47 |             lines[:] = str(doc).splitlines()
 48 | 
 49 |     if app.config.numpydoc_edit_link and hasattr(obj, '__name__') and \
 50 |            obj.__name__:
 51 |         if hasattr(obj, '__module__'):
 52 |             v = dict(full_name="%s.%s" % (obj.__module__, obj.__name__))
 53 |         else:
 54 |             v = dict(full_name=obj.__name__)
 55 |         lines += [u'', u'.. htmlonly::', '']
 56 |         lines += [u'    %s' % x for x in
 57 |                   (app.config.numpydoc_edit_link % v).split("\n")]
 58 | 
 59 |     # replace reference numbers so that there are no duplicates
 60 |     references = []
 61 |     for line in lines:
 62 |         line = line.strip()
 63 |         m = re.match(r'^.. \[([a-z0-9_.-])\]', line, re.I)
 64 |         if m:
 65 |             references.append(m.group(1))
 66 | 
 67 |     # start renaming from the longest string, to avoid overwriting parts
 68 |     references.sort(key=lambda x: -len(x))
 69 |     if references:
 70 |         for i, line in enumerate(lines):
 71 |             for r in references:
 72 |                 if re.match(r'^\d+$', r):
 73 |                     new_r = "R%d" % (reference_offset[0] + int(r))
 74 |                 else:
 75 |                     new_r = u"%s%d" % (r, reference_offset[0])
 76 |                 lines[i] = lines[i].replace(u'[%s]_' % r,
 77 |                                             u'[%s]_' % new_r)
 78 |                 lines[i] = lines[i].replace(u'.. [%s]' % r,
 79 |                                             u'.. [%s]' % new_r)
 80 | 
 81 |     reference_offset[0] += len(references)
 82 | 
 83 | 
 84 | def mangle_signature(app, what, name, obj,
 85 |                      options, sig, retann):
 86 |     # Do not try to inspect classes that don't define `__init__`
 87 |     if (inspect.isclass(obj) and
 88 |         (not hasattr(obj, '__init__') or
 89 |         'initializes x; see ' in pydoc.getdoc(obj.__init__))):
 90 |         return '', ''
 91 | 
 92 |     if not (callable(obj) or hasattr(obj, '__argspec_is_invalid_')):
 93 |         return
 94 |     if not hasattr(obj, '__doc__'):
 95 |         return
 96 | 
 97 |     doc = SphinxDocString(pydoc.getdoc(obj))
 98 |     if doc['Signature']:
 99 |         sig = re.sub("^[^(]*", "", doc['Signature'])
100 |         return sig, ''
101 | 
102 | 
103 | def setup(app, get_doc_object_=get_doc_object):
104 |     global get_doc_object
105 |     get_doc_object = get_doc_object_
106 | 
107 |     if sys.version_info[0] < 3:
108 |         app.connect(b'autodoc-process-docstring', mangle_docstrings)
109 |         app.connect(b'autodoc-process-signature', mangle_signature)
110 |     else:
111 |         app.connect('autodoc-process-docstring', mangle_docstrings)
112 |         app.connect('autodoc-process-signature', mangle_signature)
113 |     app.add_config_value('numpydoc_edit_link', None, False)
114 |     app.add_config_value('numpydoc_use_plots', None, False)
115 |     app.add_config_value('numpydoc_show_class_members', True, True)
116 | 
117 |     # Extra mangling domains
118 |     app.add_domain(NumpyPythonDomain)
119 |     app.add_domain(NumpyCDomain)
120 | 
121 | #-----------------------------------------------------------------------------
122 | # Docstring-mangling domains
123 | #-----------------------------------------------------------------------------
124 | 
125 | try:
126 |     import sphinx  # lazy to avoid test dependency
127 | except ImportError:
128 |     CDomain = PythonDomain = object
129 | else:
130 |     from sphinx.domains.c import CDomain
131 |     from sphinx.domains.python import PythonDomain
132 | 
133 | 
134 | class ManglingDomainBase(object):
135 |     directive_mangling_map = {}
136 | 
137 |     def __init__(self, *a, **kw):
138 |         super(ManglingDomainBase, self).__init__(*a, **kw)
139 |         self.wrap_mangling_directives()
140 | 
141 |     def wrap_mangling_directives(self):
142 |         for name, objtype in self.directive_mangling_map.items():
143 |             self.directives[name] = wrap_mangling_directive(
144 |                 self.directives[name], objtype)
145 | 
146 | 
147 | class NumpyPythonDomain(ManglingDomainBase, PythonDomain):
148 |     name = 'np'
149 |     directive_mangling_map = {
150 |         'function': 'function',
151 |         'class': 'class',
152 |         'exception': 'class',
153 |         'method': 'function',
154 |         'classmethod': 'function',
155 |         'staticmethod': 'function',
156 |         'attribute': 'attribute',
157 |     }
158 | 
159 | 
160 | class NumpyCDomain(ManglingDomainBase, CDomain):
161 |     name = 'np-c'
162 |     directive_mangling_map = {
163 |         'function': 'function',
164 |         'member': 'attribute',
165 |         'macro': 'function',
166 |         'type': 'class',
167 |         'var': 'object',
168 |     }
169 | 
170 | 
171 | def wrap_mangling_directive(base_directive, objtype):
172 |     class directive(base_directive):
173 |         def run(self):
174 |             env = self.state.document.settings.env
175 | 
176 |             name = None
177 |             if self.arguments:
178 |                 m = re.match(r'^(.*\s+)?(.*?)(\(.*)?', self.arguments[0])
179 |                 name = m.group(2).strip()
180 | 
181 |             if not name:
182 |                 name = self.arguments[0]
183 | 
184 |             lines = list(self.content)
185 |             mangle_docstrings(env.app, objtype, name, None, None, lines)
186 |             # local import to avoid testing dependency
187 |             from docutils.statemachine import ViewList
188 |             self.content = ViewList(lines, self.content.parent)
189 | 
190 |             return base_directive.run(self)
191 | 
192 |     return directive
193 | 


--------------------------------------------------------------------------------
/examples/README.txt:
--------------------------------------------------------------------------------
1 | Examples of using factorization machines and polynomial networks with the polylearn library.


--------------------------------------------------------------------------------
/examples/plot_regularization_path.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ==================================================
 3 | Plotting regularization paths using warm restarts.
 4 | ==================================================
 5 | 
 6 | In this example we show how to use the `warm_start` attribute to efficiently
 7 | compute the regularization path for a polynomial network when optimizing
 8 | for the `beta` regularization hyperparameter.
 9 | """
10 | print(__doc__)
11 | 
12 | # Author: Vlad Niculae <vlad@vene.ro>
13 | # License: Simplified BSD
14 | 
15 | import numpy as np
16 | 
17 | import matplotlib.pyplot as plt
18 | 
19 | from sklearn.linear_model import Ridge
20 | from sklearn.kernel_ridge import KernelRidge
21 | from sklearn.datasets import load_boston
22 | from sklearn.cross_validation import train_test_split
23 | from sklearn.metrics.scorer import mean_squared_error_scorer
24 | from sklearn.preprocessing import StandardScaler
25 | 
26 | from polylearn import PolynomialNetworkRegressor
27 | 
28 | boston = load_boston()
29 | X, y = boston.data, boston.target
30 | data_split = train_test_split(X, y, test_size=100, random_state=0)
31 | X_train, X_test, y_train, y_test = data_split
32 | 
33 | # Scale both the features (X) and the target (y) to zero mean, unit variance
34 | # (This is not necessary but makes the plots clearer)
35 | 
36 | scaler_X = StandardScaler(with_mean=True, with_std=True)
37 | X_train_sc = scaler_X.fit_transform(X_train)
38 | X_test_sc = scaler_X.transform(X_test)
39 | 
40 | scaler_y = StandardScaler(with_mean=True, with_std=True)
41 | y_train_sc = scaler_y.fit_transform(y_train.reshape(-1, 1)).ravel()
42 | y_test_sc = scaler_y.transform(y_test.reshape(-1, 1)).ravel()
43 | 
44 | n_alphas = 50
45 | alphas = np.logspace(-1, 8, n_alphas)
46 | ridge = Ridge(fit_intercept=True)
47 | kernel_ridge = KernelRidge(kernel='poly', gamma=1, degree=3, coef0=1)
48 | 
49 | test_scores_ridge = []
50 | test_scores_kernel = []
51 | 
52 | for alpha in alphas:
53 |     ridge.set_params(alpha=alpha)
54 |     ridge.fit(X_train_sc, y_train_sc)
55 |     test_mse = mean_squared_error_scorer(ridge, X_test_sc, y_test_sc)
56 |     test_scores_ridge.append(test_mse)
57 | 
58 |     kernel_ridge.set_params(alpha=alpha)
59 |     kernel_ridge.fit(X_train_sc, y_train_sc)
60 |     test_mse = mean_squared_error_scorer(kernel_ridge, X_test_sc, y_test_sc)
61 |     test_scores_kernel.append(test_mse)
62 | 
63 | 
64 | poly = PolynomialNetworkRegressor(degree=3, n_components=2, tol=1e-3,
65 |                                   warm_start=True, random_state=0)
66 | 
67 | test_scores_poly = []
68 | 
69 | for alpha in alphas:
70 |     poly.set_params(beta=alpha)
71 |     poly.fit(X_train_sc, y_train_sc)
72 |     test_mse = mean_squared_error_scorer(poly, X_test_sc, y_test_sc)
73 |     test_scores_poly.append(test_mse)
74 | 
75 | plt.plot(alphas, test_scores_ridge, label="Linear ridge")
76 | plt.plot(alphas, test_scores_kernel, label="Kernel ridge")
77 | plt.plot(alphas, test_scores_poly, label="Poly. network (rank=2)")
78 | plt.ylabel("Negated mean squared error (higher is better)")
79 | plt.xlabel("Regularization amount")
80 | plt.ylim(-1, -0.15)
81 | plt.semilogx()
82 | plt.legend()
83 | plt.show()
84 | 


--------------------------------------------------------------------------------
/examples/plot_xor.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===============================================
 3 | Factorization machine decision boundary for XOR
 4 | ===============================================
 5 | 
 6 | Plots the decision function learned by a factorization machine for a noisy
 7 | non-linearly separable XOR problem
 8 | 
 9 | This problem is a perfect example of feature interactions. As such,
10 | factorization machines can model it very robustly with a very small number of
11 | parameters.  (In this case, n_features * n_components = 2 * 1 = 2 params.)
12 | 
13 | Example based on:
14 | http://scikit-learn.org/stable/auto_examples/svm/plot_svm_nonlinear.html
15 | """
16 | print(__doc__)
17 | 
18 | # Author: Vlad Niculae <vlad@vene.ro>
19 | # License: Simplified BSD
20 | 
21 | import numpy as np
22 | import matplotlib.pyplot as plt
23 | from sklearn.svm import NuSVC
24 | 
25 | from polylearn import FactorizationMachineClassifier
26 | 
27 | xx, yy = np.meshgrid(np.linspace(-3, 3, 500),
28 |                      np.linspace(-3, 3, 500))
29 | 
30 | rng = np.random.RandomState(42)
31 | X = rng.randn(300, 2)
32 | y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0)
33 | 
34 | # XOR is too easy for factorization machines, so add noise :)
35 | flip = rng.randint(300, size=15)
36 | y[flip] = ~y[flip]
37 | 
38 | # fit the model
39 | fm = FactorizationMachineClassifier(n_components=1, fit_linear=False,
40 |                                     random_state=0)
41 | fm.fit(X, y)
42 | 
43 | # fit a NuSVC for comparison
44 | svc = NuSVC(kernel='poly', degree=2)
45 | svc.fit(X, y)
46 | 
47 | # plot the decision function for each datapoint on the grid
48 | Z = fm.decision_function(np.c_[xx.ravel(), yy.ravel()])
49 | Z = Z.reshape(xx.shape)
50 | 
51 | Z_svc = svc.decision_function(np.c_[xx.ravel(), yy.ravel()])
52 | Z_svc = Z_svc.reshape(xx.shape)
53 | 
54 | plt.imshow(Z, interpolation='nearest',
55 |            extent=(xx.min(), xx.max(), yy.min(), yy.max()), aspect='auto',
56 |            origin='lower', cmap=plt.cm.PuOr_r)
57 | 
58 | contour_fm = plt.contour(xx, yy, Z, levels=[0], linewidths=2)
59 | 
60 | contour_svc = plt.contour(xx, yy, Z_svc, levels=[0], linestyles='dashed')
61 | 
62 | plt.scatter(X[:, 0], X[:, 1], s=30, c=y, cmap=plt.cm.Paired)
63 | plt.xticks(())
64 | plt.yticks(())
65 | plt.axis([-3, 3, -3, 3])
66 | plt.legend((contour_fm.collections[0], contour_svc.collections[0]),
67 |            ('rank-1 factorization machine', 'SVC with polynomial kernel'))
68 | plt.show()
69 | 


--------------------------------------------------------------------------------
/polylearn/__init__.py:
--------------------------------------------------------------------------------
1 | from .factorization_machine import FactorizationMachineRegressor
2 | from .factorization_machine import FactorizationMachineClassifier
3 | from .polynomial_network import PolynomialNetworkRegressor
4 | from .polynomial_network import PolynomialNetworkClassifier
5 | 


--------------------------------------------------------------------------------
/polylearn/base.py:
--------------------------------------------------------------------------------
  1 | # Author: Vlad Niculae <vlad@vene.ro>
  2 | # License: Simplified BSD
  3 | 
  4 | from abc import ABCMeta
  5 | import numpy as np
  6 | from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
  7 | from sklearn.preprocessing import LabelBinarizer
  8 | from sklearn.utils.validation import check_X_y
  9 | from sklearn.utils.multiclass import type_of_target
 10 | import six
 11 | 
 12 | from .loss import CLASSIFICATION_LOSSES, REGRESSION_LOSSES
 13 | 
 14 | 
 15 | class _BasePoly(six.with_metaclass(ABCMeta, BaseEstimator)):
 16 | 
 17 |     def _get_loss(self, loss):
 18 |         # classification losses
 19 |         if loss not in self._LOSSES:
 20 |             raise ValueError(
 21 |                 'Loss function "{}" not supported. The available options '
 22 |                 'are: "{}".'.format(loss,
 23 |                                     '", "'.join(self._LOSSES)))
 24 |         return self._LOSSES[loss]
 25 | 
 26 | 
 27 | class _PolyRegressorMixin(RegressorMixin):
 28 | 
 29 |     _LOSSES = REGRESSION_LOSSES
 30 | 
 31 |     def _check_X_y(self, X, y):
 32 |         X, y = check_X_y(X, y, accept_sparse='csc', multi_output=False,
 33 |                          dtype=np.double, y_numeric=True)
 34 |         y = y.astype(np.double).ravel()
 35 |         return X, y
 36 | 
 37 |     def predict(self, X):
 38 |         """Predict regression output for the samples in X.
 39 | 
 40 |         Parameters
 41 |         ----------
 42 |         X : {array-like, sparse matrix}, shape = [n_samples, n_features]
 43 |             Samples.
 44 | 
 45 |         Returns
 46 |         -------
 47 |         y_pred : array, shape = [n_samples]
 48 |             Returns predicted values.
 49 |         """
 50 |         return self._predict(X)
 51 | 
 52 | 
 53 | class _PolyClassifierMixin(ClassifierMixin):
 54 | 
 55 |     _LOSSES = CLASSIFICATION_LOSSES
 56 | 
 57 |     def decision_function(self, X):
 58 |         """Compute the output of the factorization machine before thresholding.
 59 | 
 60 |         Parameters
 61 |         ----------
 62 |         X : {array-like, sparse matrix}, shape = [n_samples, n_features]
 63 |             Samples.
 64 | 
 65 |         Returns
 66 |         -------
 67 |         y_scores : array, shape = [n_samples]
 68 |             Returns predicted values.
 69 |         """
 70 |         return self._predict(X)
 71 | 
 72 |     def predict(self, X):
 73 |         """Predict using the factorization machine
 74 | 
 75 |         Parameters
 76 |         ----------
 77 |         X : {array-like, sparse matrix}, shape = [n_samples, n_features]
 78 |             Samples.
 79 | 
 80 |         Returns
 81 |         -------
 82 |         y_pred : array, shape = [n_samples]
 83 |             Returns predicted values.
 84 |         """
 85 |         y_pred = self.decision_function(X) > 0
 86 |         return self.label_binarizer_.inverse_transform(y_pred)
 87 | 
 88 |     def predict_proba(self, X):
 89 |         """Compute probability estimates for the test samples.
 90 | 
 91 |         Only available if `loss='logistic'`.
 92 | 
 93 |         Parameters
 94 |         ----------
 95 |         X : {array-like, sparse matrix}, shape = [n_samples, n_features]
 96 |             Samples.
 97 | 
 98 |         Returns
 99 |         -------
100 |         y_scores : array, shape = [n_samples]
101 |             Probability estimates that the samples are from the positive class.
102 |         """
103 |         if self.loss == 'logistic':
104 |             return 1 / (1 + np.exp(-self.decision_function(X)))
105 |         else:
106 |             raise ValueError("Probability estimates only available for "
107 |                              "loss='logistic'. You may use probability "
108 |                              "calibration methods from scikit-learn instead.")
109 | 
110 |     def _check_X_y(self, X, y):
111 | 
112 |         # helpful error message for sklearn < 1.17
113 |         is_2d = hasattr(y, 'shape') and len(y.shape) > 1 and y.shape[1] >= 2
114 | 
115 |         if is_2d or type_of_target(y) != 'binary':
116 |             raise TypeError("Only binary targets supported. For training "
117 |                             "multiclass or multilabel models, you may use the "
118 |                             "OneVsRest or OneVsAll metaestimators in "
119 |                             "scikit-learn.")
120 | 
121 |         X, Y = check_X_y(X, y, dtype=np.double, accept_sparse='csc',
122 |                          multi_output=False)
123 | 
124 |         self.label_binarizer_ = LabelBinarizer(pos_label=1, neg_label=-1)
125 |         y = self.label_binarizer_.fit_transform(Y).ravel().astype(np.double)
126 |         return X, y
127 | 


--------------------------------------------------------------------------------
/polylearn/cd_direct_fast.pyx:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | # cython: language_level=3
  3 | # cython: cdivision=True
  4 | # cython: boundscheck=False
  5 | # cython: wraparound=False
  6 | #
  7 | # Author: Vlad Niculae
  8 | # License: BSD
  9 | 
 10 | from libc.math cimport fabs
 11 | from cython.view cimport array
 12 | 
 13 | from lightning.impl.dataset_fast cimport ColumnDataset
 14 | 
 15 | from .loss_fast cimport LossFunction
 16 | from .cd_linear_fast cimport _cd_linear_epoch
 17 | 
 18 | 
 19 | cdef void _precompute(ColumnDataset X,
 20 |                       double[:, :, ::1] P,
 21 |                       Py_ssize_t order,
 22 |                       double[:, ::1] out,
 23 |                       Py_ssize_t s,
 24 |                       unsigned int degree):
 25 | 
 26 |     cdef Py_ssize_t n_samples = X.get_n_samples()
 27 |     cdef Py_ssize_t n_features = P.shape[2]
 28 |     
 29 |     # Data pointers
 30 |     cdef double* data
 31 |     cdef int* indices
 32 |     cdef int n_nz
 33 |     
 34 |     cdef Py_ssize_t i, j, ii
 35 |     cdef unsigned int d
 36 |     cdef double tmp
 37 | 
 38 |     for i in range(n_samples):
 39 |         out[degree - 1, i] = 0
 40 | 
 41 |     for j in range(n_features):
 42 |         X.get_column_ptr(j, &indices, &data, &n_nz)
 43 |         for ii in range(n_nz):
 44 |             i = indices[ii]
 45 |             out[degree - 1, i] += (data[ii] * P[order, s, j]) ** degree
 46 | 
 47 | 
 48 | cdef inline double _update(int* indices,
 49 |                            double* data,
 50 |                            int n_nz,
 51 |                            double p_js,
 52 |                            double[:] y,
 53 |                            double[:] y_pred,
 54 |                            LossFunction loss,
 55 |                            unsigned int degree,
 56 |                            double lam,
 57 |                            double beta,
 58 |                            double[:, ::1] D,
 59 |                            double[:] cache_kp):
 60 | 
 61 |     cdef double l1_reg = 2 * beta * fabs(lam)
 62 |     
 63 |     cdef Py_ssize_t i, ii
 64 | 
 65 |     cdef double inv_step_size = 0
 66 | 
 67 |     cdef double kp  # derivative of the ANOVA kernel
 68 |     cdef double update = 0
 69 | 
 70 |     for ii in range(n_nz):
 71 |         i = indices[ii]
 72 | 
 73 |         if degree == 2:
 74 |             kp = D[0, i] - p_js * data[ii]
 75 |         else:  #  degree == 3:
 76 |             kp = 0.5 * (D[0, i] ** 2 - D[1, i])
 77 |             kp -= p_js * data[ii] * D[0, i]
 78 |             kp += p_js ** 2 * data[ii] ** 2
 79 | 
 80 |         kp *= lam * data[ii]
 81 |         cache_kp[ii] = kp
 82 | 
 83 |         update += loss.dloss(y_pred[i], y[i]) * kp
 84 |         inv_step_size += kp ** 2
 85 | 
 86 |     inv_step_size *= loss.mu
 87 |     inv_step_size += l1_reg
 88 | 
 89 |     update += l1_reg * p_js
 90 |     update /= inv_step_size
 91 | 
 92 |     return update
 93 | 
 94 | 
 95 | cdef inline double _cd_direct_epoch(double[:, :, ::1] P,
 96 |                                     Py_ssize_t order,
 97 |                                     ColumnDataset X,
 98 |                                     double[:] y,
 99 |                                     double[:] y_pred,
100 |                                     double[:] lams,
101 |                                     unsigned int degree,
102 |                                     double beta,
103 |                                     LossFunction loss,
104 |                                     double[:, ::1] D,
105 |                                     double[:] cache_kp):
106 | 
107 |     cdef Py_ssize_t s, j
108 |     cdef double p_old, update, offset
109 |     cdef double sum_viol = 0
110 |     cdef Py_ssize_t n_components = P.shape[1]
111 |     cdef Py_ssize_t n_features = P.shape[2]
112 | 
113 |     # Data pointers
114 |     cdef double* data
115 |     cdef int* indices
116 |     cdef int n_nz
117 | 
118 |     for s in range(n_components):
119 | 
120 |         # initialize the cached ds for this s
121 |         _precompute(X, P, order, D, s, 1)
122 |         if degree == 3:
123 |             _precompute(X, P, order, D, s, 2)
124 | 
125 |         for j in range(n_features):
126 | 
127 |             X.get_column_ptr(j, &indices, &data, &n_nz)
128 | 
129 |             # compute coordinate update
130 |             p_old = P[order, s, j]
131 |             update = _update(indices, data, n_nz, p_old, y, y_pred,
132 |                              loss, degree, lams[s], beta, D, cache_kp)
133 |             P[order, s, j] -= update
134 |             sum_viol += fabs(update)
135 | 
136 |             # Synchronize predictions and ds
137 |             for ii in range(n_nz):
138 |                 i = indices[ii]
139 | 
140 |                 if degree == 3:
141 |                     D[1, i] -= ((p_old ** 2 - P[order, s, j] ** 2) *
142 |                                 data[ii] ** 2)
143 | 
144 |                 D[0, i] -= update * data[ii]
145 |                 y_pred[i] -= update * cache_kp[ii]
146 |     return sum_viol
147 | 
148 | 
149 | def _cd_direct_ho(double[:, :, ::1] P not None,
150 |                   double[:] w not None,
151 |                   ColumnDataset X,
152 |                   double[:] col_norm_sq not None,
153 |                   double[:] y not None,
154 |                   double[:] y_pred not None,
155 |                   double[:] lams not None,
156 |                   unsigned int degree,
157 |                   double alpha,
158 |                   double beta,
159 |                   bint fit_linear,
160 |                   bint fit_lower,
161 |                   LossFunction loss,
162 |                   unsigned int max_iter,
163 |                   double tol,
164 |                   int verbose):
165 | 
166 |     cdef Py_ssize_t n_samples = X.get_n_samples()
167 |     cdef unsigned int it
168 | 
169 |     cdef double viol
170 |     cdef bint converged = False
171 | 
172 |     # precomputed values
173 |     cdef double[:, ::1] D = array((degree - 1, n_samples), sizeof(double), 'd')
174 |     cdef double[:] cache_kp = array((n_samples,), sizeof(double), 'd')
175 | 
176 |     for it in range(max_iter):
177 |         viol = 0
178 | 
179 |         if fit_linear:
180 |             viol += _cd_linear_epoch(w, X, y, y_pred, col_norm_sq, alpha, loss)
181 | 
182 |         if fit_lower and degree == 3:  # fit degree 2. Will be looped later.
183 |             viol += _cd_direct_epoch(P, 1, X, y, y_pred, lams, 2, beta, loss,
184 |                                      D, cache_kp)
185 | 
186 |         viol += _cd_direct_epoch(P, 0, X, y, y_pred, lams, degree, beta, loss,
187 |                                  D, cache_kp)
188 | 
189 |         if verbose:
190 |             print("Iteration", it + 1, "violation sum", viol)
191 | 
192 |         if viol < tol:
193 |             if verbose:
194 |                 print("Converged at iteration", it + 1)
195 |             converged = True
196 |             break
197 | 
198 |     return converged, it
199 | 


--------------------------------------------------------------------------------
/polylearn/cd_lifted_fast.pyx:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | # cython: language_level=3
  3 | # cython: cdivision=True
  4 | # cython: boundscheck=False
  5 | # cython: wraparound=False
  6 | #
  7 | # Author: Vlad Niculae
  8 | # License: BSD
  9 | 
 10 | from libc.math cimport fabs
 11 | from cython.view cimport array
 12 | 
 13 | from lightning.impl.dataset_fast cimport ColumnDataset
 14 | 
 15 | from .loss_fast cimport LossFunction
 16 | 
 17 | 
 18 | def _fast_lifted_predict(double[:, :, ::1] U,
 19 |                          ColumnDataset X,
 20 |                          double[:] out):
 21 | 
 22 |     # np.product(safe_sparse_dot(U, X.T), axis=0).sum(axis=0)
 23 |     #
 24 |     # a bit of a misnomer, since at least for dense data it's a bit slower,
 25 |     # but it's more memory efficient.
 26 | 
 27 |     cdef Py_ssize_t degree = U.shape[0]
 28 |     cdef Py_ssize_t n_components = U.shape[1]
 29 | 
 30 |     cdef Py_ssize_t n_samples = X.get_n_samples()
 31 |     cdef Py_ssize_t n_features = X.get_n_features()
 32 | 
 33 |     cdef double* data
 34 |     cdef int* indices
 35 |     cdef int n_nz
 36 | 
 37 |     cdef Py_ssize_t i, j, ii
 38 | 
 39 |     cdef double[:] middle = array((n_samples,), sizeof(double), 'd')
 40 |     cdef double[:] inner = array((n_samples,), sizeof(double), 'd')
 41 | 
 42 |     for s in range(n_components):
 43 | 
 44 |         for i in range(n_samples):
 45 |             middle[i] = 1
 46 | 
 47 |         for t in range(degree):
 48 |             # inner = np.dot(U[t, s, :], X.T)
 49 | 
 50 |             for i in range(n_samples):
 51 |                 inner[i] = 0
 52 | 
 53 |             for j in range(n_features):
 54 |                 X.get_column_ptr(j, &indices, &data, &n_nz)
 55 |                 for ii in range(n_nz):
 56 |                     i = indices[ii]
 57 |                     inner[i] += data[ii] * U[t, s, j]
 58 | 
 59 |             # middle *= inner
 60 |             for i in range(n_samples):
 61 |                 middle[i] *= inner[i]
 62 | 
 63 |         for i in range(n_samples):
 64 |             out[i] += middle[i]
 65 | 
 66 | 
 67 | cdef void _precompute(double[:, :, ::1] U,
 68 |                       ColumnDataset X,
 69 |                       Py_ssize_t s,
 70 |                       Py_ssize_t t,
 71 |                       double[:] out,
 72 |                       double[:] tmp):
 73 | 
 74 |     cdef Py_ssize_t degree = U.shape[0]
 75 |     cdef Py_ssize_t n_components = U.shape[1]
 76 | 
 77 |     cdef Py_ssize_t n_samples = X.get_n_samples()
 78 |     cdef Py_ssize_t n_features = X.get_n_features()
 79 | 
 80 |     cdef double* data
 81 |     cdef int* indices
 82 |     cdef int n_nz
 83 | 
 84 |     cdef Py_ssize_t i, j, ii
 85 | 
 86 |     for i in range(n_samples):
 87 |         out[i] = 1
 88 | 
 89 |     for t_prime in range(degree):
 90 | 
 91 |         if t == t_prime:
 92 |             continue
 93 | 
 94 |         for i in range(n_samples):
 95 |             tmp[i] = 0
 96 | 
 97 |         for j in range(n_features):
 98 |             X.get_column_ptr(j, &indices, &data, &n_nz)
 99 |             for ii in range(n_nz):
100 |                 i = indices[ii]
101 |                 tmp[i] += data[ii] * U[t_prime, s, j]
102 |         for i in range(n_samples):
103 |             out[i] *= tmp[i]
104 | 
105 | 
106 | def _cd_lifted(double[:, :, ::1] U,
107 |                ColumnDataset X,
108 |                double[:] y,
109 |                double[:] y_pred,
110 |                double beta,
111 |                LossFunction loss,
112 |                int max_iter,
113 |                double tol,
114 |                int verbose):
115 | 
116 |     cdef Py_ssize_t n_samples = X.get_n_samples()
117 |     cdef Py_ssize_t n_features = X.get_n_features()
118 |     cdef Py_ssize_t degree = U.shape[0]
119 |     cdef Py_ssize_t n_components = U.shape[1]
120 |     cdef Py_ssize_t t, s, j
121 |     cdef int it
122 | 
123 |     cdef double sum_viol
124 |     cdef bint converged = False
125 | 
126 |     cdef double inv_step_size
127 |     cdef double update
128 |     cdef double u_old
129 | 
130 |     cdef double[:] xi = array((n_samples,), sizeof(double), 'd')
131 |     cdef double[:] tmp = array((n_samples,), sizeof(double), 'd')
132 | 
133 |     # Data pointers
134 |     cdef double* data
135 |     cdef int* indices
136 |     cdef int n_nz
137 | 
138 |     for it in range(max_iter):
139 |         sum_viol = 0
140 |         for t in range(degree):
141 |             for s in range(n_components):
142 |                 _precompute(U, X, s, t, xi, tmp)
143 |                 for j in range(n_features):
144 | 
145 |                     u_old = U[t, s, j]
146 |                     X.get_column_ptr(j, &indices, &data, &n_nz)
147 | 
148 |                     inv_step_size = 0
149 |                     update = 0
150 | 
151 |                     for ii in range(n_nz):
152 |                         i = indices[ii]
153 |                         inv_step_size += xi[i] ** 2 * data[ii] ** 2
154 |                         update += xi[i] * data[ii] * loss.dloss(y_pred[i],
155 |                                                                 y[i])
156 | 
157 |                     inv_step_size *= loss.mu
158 |                     inv_step_size += beta
159 | 
160 |                     update += beta * u_old
161 |                     update /= inv_step_size
162 | 
163 |                     U[t, s, j] -= update
164 |                     sum_viol += fabs(update)
165 | 
166 |                     # synchronize predictions
167 |                     for ii in range(n_nz):
168 |                         i = indices[ii]
169 |                         y_pred[i] -= data[ii] * xi[i] * update
170 | 
171 |         if verbose:
172 |             print("Iteration", it + 1, "violation sum", sum_viol)
173 | 
174 |         if sum_viol < tol:
175 |             if verbose:
176 |                 print("Converged at iteration", it + 1)
177 |             converged = True
178 |             break
179 | 
180 |     return converged, it
181 | 


--------------------------------------------------------------------------------
/polylearn/cd_linear_fast.pxd:
--------------------------------------------------------------------------------
 1 | # cython: language_level=3
 2 | 
 3 | from lightning.impl.dataset_fast cimport ColumnDataset
 4 | from .loss_fast cimport LossFunction
 5 | 
 6 | cpdef double _cd_linear_epoch(double[:] w, ColumnDataset X,
 7 |                               double[:] y,
 8 |                               double[:] y_pred,
 9 |                               double[:] col_norm_sq,
10 |                               double alpha,
11 |                               LossFunction loss)
12 | 


--------------------------------------------------------------------------------
/polylearn/cd_linear_fast.pyx:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | # cython: language_level=3
 3 | # cython: cdivision=True
 4 | # cython: boundscheck=False
 5 | # cython: wraparound=False
 6 | #
 7 | # Author: Vlad Niculae
 8 | # License: BSD
 9 | 
10 | from libc.math cimport fabs
11 | 
12 | from lightning.impl.dataset_fast cimport ColumnDataset
13 | 
14 | from .loss_fast cimport LossFunction
15 | 
16 | 
17 | cpdef double _cd_linear_epoch(double[:] w,
18 |                               ColumnDataset X,
19 |                               double[:] y,
20 |                               double[:] y_pred,
21 |                               double[:] col_norm_sq,
22 |                               double alpha,
23 |                               LossFunction loss):
24 | 
25 |     cdef Py_ssize_t i, j, ii
26 |     cdef double sum_viol = 0
27 |     cdef Py_ssize_t n_features = w.shape[0]
28 |     cdef double update
29 |     cdef double inv_step_size
30 | 
31 |     # Data pointers
32 |     cdef double* data
33 |     cdef int* indices
34 |     cdef int n_nz
35 | 
36 |     for j in range(n_features):
37 |         X.get_column_ptr(j, &indices, &data, &n_nz)
38 | 
39 |         # compute gradient with respect to w_j
40 |         update = alpha * w[j]
41 |         for ii in range(n_nz):
42 |             i = indices[ii]
43 |             update += loss.dloss(y_pred[i], y[i]) * data[ii]
44 | 
45 |         # compute second derivative upper bound
46 |         inv_step_size = loss.mu * col_norm_sq[j] + alpha
47 |         update /= inv_step_size
48 | 
49 |         w[j] -= update
50 |         sum_viol += fabs(update)
51 | 
52 |         # update predictions
53 |         for ii in range(n_nz):
54 |             i = indices[ii]
55 |             y_pred[i] -= update * data[ii]
56 | 
57 |     return sum_viol
58 | 


--------------------------------------------------------------------------------
/polylearn/factorization_machine.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | # Author: Vlad Niculae <vlad@vene.ro>
  4 | # License: Simplified BSD
  5 | 
  6 | import warnings
  7 | from abc import ABCMeta, abstractmethod
  8 | 
  9 | import numpy as np
 10 | from sklearn.preprocessing import add_dummy_feature
 11 | from sklearn.utils import check_random_state
 12 | from sklearn.utils.validation import check_array
 13 | from sklearn.utils.extmath import safe_sparse_dot, row_norms
 14 | import six
 15 | 
 16 | try:
 17 |     from sklearn.exceptions import NotFittedError
 18 | except ImportError:
 19 |     class NotFittedError(ValueError, AttributeError):
 20 |         pass
 21 | 
 22 | from lightning.impl.dataset_fast import get_dataset
 23 | 
 24 | from .base import _BasePoly, _PolyClassifierMixin, _PolyRegressorMixin
 25 | from .kernels import _poly_predict
 26 | from .cd_direct_fast import _cd_direct_ho
 27 | 
 28 | 
 29 | class _BaseFactorizationMachine(six.with_metaclass(ABCMeta, _BasePoly)):
 30 | 
 31 |     @abstractmethod
 32 |     def __init__(self, degree=2, loss='squared', n_components=2, alpha=1,
 33 |                  beta=1, tol=1e-6, fit_lower='explicit', fit_linear=True,
 34 |                  warm_start=False, init_lambdas='ones', max_iter=10000,
 35 |                  verbose=False, random_state=None):
 36 |         self.degree = degree
 37 |         self.loss = loss
 38 |         self.n_components = n_components
 39 |         self.alpha = alpha
 40 |         self.beta = beta
 41 |         self.tol = tol
 42 |         self.fit_lower = fit_lower
 43 |         self.fit_linear = fit_linear
 44 |         self.warm_start = warm_start
 45 |         self.init_lambdas = init_lambdas
 46 |         self.max_iter = max_iter
 47 |         self.verbose = verbose
 48 |         self.random_state = random_state
 49 | 
 50 |     def _augment(self, X):
 51 |         # for factorization machines, we add a dummy column for each order.
 52 | 
 53 |         if self.fit_lower == 'augment':
 54 |             k = 2 if self.fit_linear else 1
 55 |             for _ in range(self.degree - k):
 56 |                 X = add_dummy_feature(X, value=1)
 57 |         return X
 58 | 
 59 |     def fit(self, X, y):
 60 |         """Fit factorization machine to training data.
 61 | 
 62 |         Parameters
 63 |         ----------
 64 |         X : array-like or sparse, shape = [n_samples, n_features]
 65 |             Training vectors, where n_samples is the number of samples
 66 |             and n_features is the number of features.
 67 | 
 68 |         y : array-like, shape = [n_samples]
 69 |             Target values.
 70 | 
 71 |         Returns
 72 |         -------
 73 |         self : Estimator
 74 |             Returns self.
 75 |         """
 76 |         if self.degree > 3:
 77 |             raise ValueError("FMs with degree >3 not yet supported.")
 78 | 
 79 |         X, y = self._check_X_y(X, y)
 80 |         X = self._augment(X)
 81 |         n_features = X.shape[1]  # augmented
 82 |         X_col_norms = row_norms(X.T, squared=True)
 83 |         dataset = get_dataset(X, order="fortran")
 84 |         rng = check_random_state(self.random_state)
 85 |         loss_obj = self._get_loss(self.loss)
 86 | 
 87 |         if not (self.warm_start and hasattr(self, 'w_')):
 88 |             self.w_ = np.zeros(n_features, dtype=np.double)
 89 | 
 90 |         if self.fit_lower == 'explicit':
 91 |             n_orders = self.degree - 1
 92 |         else:
 93 |             n_orders = 1
 94 | 
 95 |         if not (self.warm_start and hasattr(self, 'P_')):
 96 |             self.P_ = 0.01 * rng.randn(n_orders, self.n_components, n_features)
 97 | 
 98 |         if not (self.warm_start and hasattr(self, 'lams_')):
 99 |             if self.init_lambdas == 'ones':
100 |                 self.lams_ = np.ones(self.n_components)
101 |             elif self.init_lambdas == 'random_signs':
102 |                 self.lams_ = np.sign(rng.randn(self.n_components))
103 |             else:
104 |                 raise ValueError("Lambdas must be initialized as ones "
105 |                                  "(init_lambdas='ones') or as random "
106 |                                  "+/- 1 (init_lambdas='random_signs').")
107 | 
108 |         y_pred = self._get_output(X)
109 | 
110 |         converged, self.n_iter_ = _cd_direct_ho(
111 |             self.P_, self.w_, dataset, X_col_norms, y, y_pred,
112 |             self.lams_, self.degree, self.alpha, self.beta, self.fit_linear,
113 |             self.fit_lower == 'explicit', loss_obj, self.max_iter,
114 |             self.tol, self.verbose)
115 |         if not converged:
116 |             warnings.warn("Objective did not converge. Increase max_iter.")
117 | 
118 |         return self
119 | 
120 |     def _get_output(self, X):
121 |         y_pred = _poly_predict(X, self.P_[0, :, :], self.lams_, kernel='anova',
122 |                                degree=self.degree)
123 | 
124 |         if self.fit_linear:
125 |             y_pred += safe_sparse_dot(X, self.w_)
126 | 
127 |         if self.fit_lower == 'explicit' and self.degree == 3:
128 |             # degree cannot currently be > 3
129 |             y_pred += _poly_predict(X, self.P_[1, :, :], self.lams_,
130 |                                     kernel='anova', degree=2)
131 | 
132 |         return y_pred
133 | 
134 |     def _predict(self, X):
135 |         if not hasattr(self, "P_"):
136 |             raise NotFittedError("Estimator not fitted.")
137 |         X = check_array(X, accept_sparse='csc', dtype=np.double)
138 |         X = self._augment(X)
139 |         return self._get_output(X)
140 | 
141 | 
142 | class FactorizationMachineRegressor(_BaseFactorizationMachine,
143 |                                     _PolyRegressorMixin):
144 |     """Factorization machine for regression (with squared loss).
145 | 
146 |     Parameters
147 |     ----------
148 | 
149 |     degree : int >= 2, default: 2
150 |         Degree of the polynomial. Corresponds to the order of feature
151 |         interactions captured by the model. Currently only supports
152 |         degrees up to 3.
153 | 
154 |     n_components : int, default: 2
155 |         Number of basis vectors to learn, a.k.a. the dimension of the
156 |         low-rank parametrization.
157 | 
158 |     alpha : float, default: 1
159 |         Regularization amount for linear term (if ``fit_linear=True``).
160 | 
161 |     beta : float, default: 1
162 |         Regularization amount for higher-order weights.
163 | 
164 |     tol : float, default: 1e-6
165 |         Tolerance for the stopping condition.
166 | 
167 |     fit_lower : {'explicit'|'augment'|None}, default: 'explicit'
168 |         Whether and how to fit lower-order, non-homogeneous terms.
169 | 
170 |         - 'explicit': fits a separate P directly for each lower order.
171 | 
172 |         - 'augment': adds the required number of dummy columns (columns
173 |            that are 1 everywhere) in order to capture lower-order terms.
174 |            Adds ``degree - 2`` columns if ``fit_linear`` is true, or
175 |            ``degree - 1`` columns otherwise, to account for the linear term.
176 | 
177 |         - None: only learns weights for the degree given.  If ``degree == 3``,
178 |           for example, the model will only have weights for third-order
179 |           feature interactions.
180 | 
181 |     fit_linear : {True|False}, default: True
182 |         Whether to fit an explicit linear term <w, x> to the model, using
183 |         coordinate descent. If False, the model can still capture linear
184 |         effects if ``fit_lower == 'augment'``.
185 | 
186 |     warm_start : boolean, optional, default: False
187 |         Whether to use the existing solution, if available. Useful for
188 |         computing regularization paths or pre-initializing the model.
189 | 
190 |     init_lambdas : {'ones'|'random_signs'}, default: 'ones'
191 |         How to initialize the predictive weights of each learned basis. The
192 |         lambdas are not trained; using alternate signs can theoretically
193 |         improve performance if the kernel degree is even.  The default value
194 |         of 'ones' matches the original formulation of factorization machines
195 |         (Rendle, 2010).
196 | 
197 |         To use custom values for the lambdas, ``warm_start`` may be used.
198 | 
199 |     max_iter : int, optional, default: 10000
200 |         Maximum number of passes over the dataset to perform.
201 | 
202 |     verbose : boolean, optional, default: False
203 |         Whether to print debugging information.
204 | 
205 |     random_state : int seed, RandomState instance, or None (default)
206 |         The seed of the pseudo random number generator to use for
207 |         initializing the parameters.
208 | 
209 |     Attributes
210 |     ----------
211 | 
212 |     self.P_ : array, shape [n_orders, n_components, n_features]
213 |         The learned basis functions.
214 | 
215 |         ``self.P_[0, :, :]`` is always available, and corresponds to
216 |         interactions of order ``self.degree``.
217 | 
218 |         ``self.P_[i, :, :]`` for i > 0 corresponds to interactions of order
219 |         ``self.degree - i``, available only if ``self.fit_lower='explicit'``.
220 | 
221 |     self.w_ : array, shape [n_features]
222 |         The learned linear model, completing the FM.
223 | 
224 |         Only present if ``self.fit_linear`` is true.
225 | 
226 |     self.lams_ : array, shape [n_components]
227 |         The predictive weights.
228 | 
229 |     References
230 |     ----------
231 |     Polynomial Networks and Factorization Machines:
232 |     New Insights and Efficient Training Algorithms.
233 |     Mathieu Blondel, Masakazu Ishihata, Akinori Fujino, Naonori Ueda.
234 |     In: Proceedings of ICML 2016.
235 |     http://mblondel.org/publications/mblondel-icml2016.pdf
236 | 
237 |     Factorization machines.
238 |     Steffen Rendle
239 |     In: Proceedings of IEEE 2010.
240 |     """
241 |     def __init__(self, degree=2, n_components=2, alpha=1, beta=1, tol=1e-6,
242 |                  fit_lower='explicit', fit_linear=True, warm_start=False,
243 |                  init_lambdas='ones', max_iter=10000, verbose=False,
244 |                  random_state=None):
245 | 
246 |         super(FactorizationMachineRegressor, self).__init__(
247 |             degree, 'squared', n_components, alpha, beta, tol, fit_lower,
248 |             fit_linear, warm_start, init_lambdas, max_iter, verbose,
249 |             random_state)
250 | 
251 | 
252 | class FactorizationMachineClassifier(_BaseFactorizationMachine,
253 |                                      _PolyClassifierMixin):
254 |     """Factorization machine for classification.
255 | 
256 |     Parameters
257 |     ----------
258 | 
259 |     degree : int >= 2, default: 2
260 |         Degree of the polynomial. Corresponds to the order of feature
261 |         interactions captured by the model. Currently only supports
262 |         degrees up to 3.
263 | 
264 |     loss : {'logistic'|'squared_hinge'|'squared'}, default: 'squared_hinge'
265 |         Which loss function to use.
266 | 
267 |         - logistic: L(y, p) = log(1 + exp(-yp))
268 | 
269 |         - squared hinge: L(y, p) = max(1 - yp, 0)²
270 | 
271 |         - squared: L(y, p) = 0.5 * (y - p)²
272 | 
273 |     n_components : int, default: 2
274 |         Number of basis vectors to learn, a.k.a. the dimension of the
275 |         low-rank parametrization.
276 | 
277 |     alpha : float, default: 1
278 |         Regularization amount for linear term (if ``fit_linear=True``).
279 | 
280 |     beta : float, default: 1
281 |         Regularization amount for higher-order weights.
282 | 
283 |     tol : float, default: 1e-6
284 |         Tolerance for the stopping condition.
285 | 
286 |     fit_lower : {'explicit'|'augment'|None}, default: 'explicit'
287 |         Whether and how to fit lower-order, non-homogeneous terms.
288 | 
289 |         - 'explicit': fits a separate P directly for each lower order.
290 | 
291 |         - 'augment': adds the required number of dummy columns (columns
292 |            that are 1 everywhere) in order to capture lower-order terms.
293 |            Adds ``degree - 2`` columns if ``fit_linear`` is true, or
294 |            ``degree - 1`` columns otherwise, to account for the linear term.
295 | 
296 |         - None: only learns weights for the degree given.  If ``degree == 3``,
297 |           for example, the model will only have weights for third-order
298 |           feature interactions.
299 | 
300 |     fit_linear : {True|False}, default: True
301 |         Whether to fit an explicit linear term <w, x> to the model, using
302 |         coordinate descent. If False, the model can still capture linear
303 |         effects if ``fit_lower == 'augment'``.
304 | 
305 |     warm_start : boolean, optional, default: False
306 |         Whether to use the existing solution, if available. Useful for
307 |         computing regularization paths or pre-initializing the model.
308 | 
309 |     init_lambdas : {'ones'|'random_signs'}, default: 'ones'
310 |         How to initialize the predictive weights of each learned basis. The
311 |         lambdas are not trained; using alternate signs can theoretically
312 |         improve performance if the kernel degree is even.  The default value
313 |         of 'ones' matches the original formulation of factorization machines
314 |         (Rendle, 2010).
315 | 
316 |         To use custom values for the lambdas, ``warm_start`` may be used.
317 | 
318 |     max_iter : int, optional, default: 10000
319 |         Maximum number of passes over the dataset to perform.
320 | 
321 |     verbose : boolean, optional, default: False
322 |         Whether to print debugging information.
323 | 
324 |     random_state : int seed, RandomState instance, or None (default)
325 |         The seed of the pseudo random number generator to use for
326 |         initializing the parameters.
327 | 
328 |     Attributes
329 |     ----------
330 | 
331 |     self.P_ : array, shape [n_orders, n_components, n_features]
332 |         The learned basis functions.
333 | 
334 |         ``self.P_[0, :, :]`` is always available, and corresponds to
335 |         interactions of order ``self.degree``.
336 | 
337 |         ``self.P_[i, :, :]`` for i > 0 corresponds to interactions of order
338 |         ``self.degree - i``, available only if ``self.fit_lower='explicit'``.
339 | 
340 |     self.w_ : array, shape [n_features]
341 |         The learned linear model, completing the FM.
342 | 
343 |         Only present if ``self.fit_linear`` is true.
344 | 
345 |     self.lams_ : array, shape [n_components]
346 |         The predictive weights.
347 | 
348 |     References
349 |     ----------
350 |     Polynomial Networks and Factorization Machines:
351 |     New Insights and Efficient Training Algorithms.
352 |     Mathieu Blondel, Masakazu Ishihata, Akinori Fujino, Naonori Ueda.
353 |     In: Proceedings of ICML 2016.
354 |     http://mblondel.org/publications/mblondel-icml2016.pdf
355 | 
356 |     Factorization machines.
357 |     Steffen Rendle
358 |     In: Proceedings of IEEE 2010.
359 |     """
360 | 
361 |     def __init__(self, degree=2, loss='squared_hinge', n_components=2, alpha=1,
362 |                  beta=1, tol=1e-6, fit_lower='explicit', fit_linear=True,
363 |                  warm_start=False, init_lambdas='ones', max_iter=10000,
364 |                  verbose=False, random_state=None):
365 | 
366 |         super(FactorizationMachineClassifier, self).__init__(
367 |             degree, loss, n_components, alpha, beta, tol, fit_lower,
368 |             fit_linear, warm_start, init_lambdas, max_iter, verbose,
369 |             random_state)
370 | 


--------------------------------------------------------------------------------
/polylearn/kernels.py:
--------------------------------------------------------------------------------
  1 | # Author: Vlad Niculae <vlad@vene.ro>
  2 | # License: Simplified BSD
  3 | 
  4 | from sklearn.metrics.pairwise import polynomial_kernel
  5 | from sklearn.utils.extmath import safe_sparse_dot
  6 | from scipy.sparse import issparse
  7 | 
  8 | import numpy as np
  9 | 
 10 | 
 11 | def safe_power(X, degree=2):
 12 |     """Element-wise power supporting both sparse and dense data.
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     X : ndarray or sparse
 17 |         The array whose entries to raise to the power.
 18 | 
 19 |     degree : int, default: 2
 20 |         The power to which to raise the elements.
 21 | 
 22 |     Returns
 23 |     -------
 24 | 
 25 |     X_ret : ndarray or sparse
 26 |         Same shape as X, but (x_ret)_ij = (x)_ij ^ degree
 27 |     """
 28 |     if issparse(X):
 29 |         if hasattr(X, 'power'):
 30 |             return X.power(degree)
 31 |         else:
 32 |             # old scipy
 33 |             X = X.copy()
 34 |             X.data **= degree
 35 |             return X
 36 |     else:
 37 |         return X ** degree
 38 | 
 39 | 
 40 | def _D(X, P, degree=2):
 41 |     """The "replacement" part of the homogeneous polynomial kernel.
 42 | 
 43 |     D[i, j] = sum_k [(X_ik * P_jk) ** degree]
 44 |     """
 45 |     return safe_sparse_dot(safe_power(X, degree), P.T ** degree)
 46 | 
 47 | 
 48 | def homogeneous_kernel(X, P, degree=2):
 49 |     """Convenience alias for homogeneous polynomial kernel between X and P::
 50 | 
 51 |         K_P(x, p) = <x, p> ^ degree
 52 | 
 53 |     Parameters
 54 |     ----------
 55 |     X : ndarray of shape (n_samples_1, n_features)
 56 | 
 57 |     Y : ndarray of shape (n_samples_2, n_features)
 58 | 
 59 |     degree : int, default 2
 60 | 
 61 |     Returns
 62 |     -------
 63 |     Gram matrix : array of shape (n_samples_1, n_samples_2)
 64 |     """
 65 |     return polynomial_kernel(X, P, degree=degree, gamma=1, coef0=0)
 66 | 
 67 | 
 68 | def anova_kernel(X, P, degree=2):
 69 |     """ANOVA kernel between X and P::
 70 | 
 71 |         K_A(x, p) = sum_i1>i2>...>id x_i1 p_i1 x_i2 p_i2 ... x_id p_id
 72 | 
 73 |     See John Shawe-Taylor and Nello Cristianini,
 74 |     Kernel Methods for Pattern Analysis section 9.2.
 75 | 
 76 |     Parameters
 77 |     ----------
 78 |     X : ndarray of shape (n_samples_1, n_features)
 79 | 
 80 |     Y : ndarray of shape (n_samples_2, n_features)
 81 | 
 82 |     degree : int, default 2
 83 | 
 84 |     Returns
 85 |     -------
 86 |     Gram matrix : array of shape (n_samples_1, n_samples_2)
 87 |     """
 88 |     if degree == 2:
 89 |         K = homogeneous_kernel(X, P, degree=2)
 90 |         K -= _D(X, P, degree=2)
 91 |         K /= 2
 92 |     elif degree == 3:
 93 |         K = homogeneous_kernel(X, P, degree=3)
 94 |         K -= 3 * _D(X, P, degree=2) * _D(X, P, degree=1)
 95 |         K += 2 * _D(X, P, degree=3)
 96 |         K /= 6
 97 |     else:
 98 |         raise NotImplementedError("ANOVA kernel for degree >= 4 not yet "
 99 |                                   "implemented efficiently.")
100 |     return K
101 | 
102 | 
103 | def _poly_predict(X, P, lams, kernel, degree=2):
104 |     if kernel == "anova":
105 |         K = anova_kernel(X, P, degree)
106 |     elif kernel == "poly":
107 |         K = homogeneous_kernel(X, P, degree)
108 |     else:
109 |         raise ValueError(("Unsuppported kernel: {}. Use one "
110 |                           "of {{'anova'|'poly'}}").format(kernel))
111 | 
112 |     return np.dot(K, lams)
113 | 


--------------------------------------------------------------------------------
/polylearn/loss.py:
--------------------------------------------------------------------------------
 1 | # Author: Vlad Niculae <vlad@vene.ro>
 2 | # License: Simplified BSD
 3 | 
 4 | from .loss_fast import Squared, SquaredHinge, Logistic
 5 | 
 6 | 
 7 | REGRESSION_LOSSES = {
 8 |     'squared': Squared()
 9 | }
10 | 
11 | CLASSIFICATION_LOSSES = {
12 |     'squared': Squared(),
13 |     'squared_hinge': SquaredHinge(),
14 |     'logistic': Logistic()
15 | }
16 | 


--------------------------------------------------------------------------------
/polylearn/loss_fast.pxd:
--------------------------------------------------------------------------------
1 | # cython: language_level=3
2 | 
3 | cdef class LossFunction:
4 | 
5 |      cdef double mu
6 |      cdef double loss(self, double p, double y)
7 |      cdef double dloss(self, double p, double y)
8 | 


--------------------------------------------------------------------------------
/polylearn/loss_fast.pyx:
--------------------------------------------------------------------------------
 1 | # cython: language_level=3
 2 | # cython: cdivision=True
 3 | 
 4 | from libc.math cimport log, exp
 5 | 
 6 | cdef class LossFunction:
 7 | 
 8 |      cdef double loss(self, double p, double y):
 9 |          raise NotImplementedError()
10 | 
11 |      cdef double dloss(self, double p, double y):
12 |          raise NotImplementedError()
13 | 
14 | 
15 | cdef class Squared(LossFunction):
16 |     """Squared loss: L(p, y) = 0.5 * (y - p)²"""
17 | 
18 |     def __init__(self):
19 |         self.mu = 1
20 | 
21 |     cdef double loss(self, double p, double y):
22 |         return 0.5 * (p - y) ** 2
23 | 
24 |     cdef double dloss(self, double p, double y):
25 |         return p - y
26 | 
27 | 
28 | cdef class Logistic(LossFunction):
29 |     """Logistic loss: L(p, y) = log(1 + exp(-yp))"""
30 | 
31 |     def __init__(self):
32 |         self.mu = 0.25
33 | 
34 |     cdef double loss(self, double p, double y):
35 |         cdef double z = p * y
36 |         # log(1 + exp(-z))
37 |         if z > 18:
38 |             return exp(-z)
39 |         if z < -18:
40 |             return -z
41 |         return log(1.0 + exp(-z))
42 | 
43 |     cdef double dloss(self, double p, double y):
44 |         cdef double z = p * y
45 |         # cdef double tau = 1 / (1 + exp(-z))
46 |         # return y * (tau - 1)
47 |         if z > 18.0:
48 |             return -y * exp(-z)
49 |         if z < -18.0:
50 |             return -y
51 |         return -y / (exp(z) + 1.0)
52 | 
53 | 
54 | cdef class SquaredHinge(LossFunction):
55 |     """Squared hinge loss: L(p, y) = max(1 - yp, 0)²"""
56 | 
57 |     def __init__(self):
58 |         self.mu = 2
59 | 
60 |     cdef double loss(self, double p, double y):
61 |         cdef double z = 1 - p * y
62 |         if z > 0:
63 |             return z * z
64 |         return 0.0
65 | 
66 |     cdef double dloss(self, double p, double y):
67 |         cdef double z = 1 - p * y
68 |         if z > 0:
69 |             return -2 * y * z
70 |         return 0.0
71 | 


--------------------------------------------------------------------------------
/polylearn/polynomial_network.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | """Polynomial networks for regression and classification."""
  4 | 
  5 | # Author: Vlad Niculae <vlad@vene.ro>
  6 | # License: Simplified BSD
  7 | 
  8 | import warnings
  9 | from abc import ABCMeta, abstractmethod
 10 | 
 11 | import numpy as np
 12 | from sklearn.preprocessing import add_dummy_feature
 13 | from sklearn.utils import check_random_state
 14 | from sklearn.utils.validation import check_array
 15 | import six
 16 | 
 17 | try:
 18 |     from sklearn.exceptions import NotFittedError
 19 | except ImportError:
 20 |     class NotFittedError(ValueError, AttributeError):
 21 |         pass
 22 | 
 23 | from lightning.impl.dataset_fast import get_dataset
 24 | 
 25 | from .base import _BasePoly, _PolyClassifierMixin, _PolyRegressorMixin
 26 | from .cd_lifted_fast import _cd_lifted, _fast_lifted_predict
 27 | 
 28 | 
 29 | def _lifted_predict(U, dataset):
 30 |     out = np.zeros(dataset.get_n_samples(), dtype=np.double)
 31 |     _fast_lifted_predict(U, dataset, out)
 32 |     return out
 33 | 
 34 | 
 35 | class _BasePolynomialNetwork(six.with_metaclass(ABCMeta, _BasePoly)):
 36 |     @abstractmethod
 37 |     def __init__(self, degree=2, loss='squared', n_components=5, beta=1,
 38 |                  tol=1e-6, fit_lower='augment', warm_start=False,
 39 |                  max_iter=10000, verbose=False, random_state=None):
 40 |         self.degree = degree
 41 |         self.loss = loss
 42 |         self.n_components = n_components
 43 |         self.beta = beta
 44 |         self.tol = tol
 45 |         self.fit_lower = fit_lower
 46 |         self.warm_start = warm_start
 47 |         self.max_iter = max_iter
 48 |         self.verbose = verbose
 49 |         self.random_state = random_state
 50 | 
 51 |     def _augment(self, X):
 52 |         # for polynomial nets, we add a single dummy column
 53 |         if self.fit_lower == 'augment':
 54 |             X = add_dummy_feature(X, value=1)
 55 |         return X
 56 | 
 57 |     def fit(self, X, y):
 58 |         """Fit polynomial network to training data.
 59 | 
 60 |         Parameters
 61 |         ----------
 62 |         X : array-like or sparse, shape = [n_samples, n_features]
 63 |             Training vectors, where n_samples is the number of samples
 64 |             and n_features is the number of features.
 65 | 
 66 |         y : array-like, shape = [n_samples]
 67 |             Target values.
 68 | 
 69 |         Returns
 70 |         -------
 71 |         self : Estimator
 72 |             Returns self.
 73 |         """
 74 |         if self.fit_lower == 'explicit':
 75 |             raise NotImplementedError('Explicit fitting of lower orders '
 76 |                                       'not yet implemented for polynomial'
 77 |                                       'network models.')
 78 | 
 79 |         X, y = self._check_X_y(X, y)
 80 |         X = self._augment(X)
 81 |         n_features = X.shape[1]  # augmented
 82 |         dataset = get_dataset(X, order="fortran")
 83 |         rng = check_random_state(self.random_state)
 84 |         loss_obj = self._get_loss(self.loss)
 85 | 
 86 |         if not (self.warm_start and hasattr(self, 'U_')):
 87 |             self.U_ = 0.01 * rng.randn(self.degree, self.n_components,
 88 |                                        n_features)
 89 | 
 90 |         y_pred = _lifted_predict(self.U_, dataset)
 91 | 
 92 |         converged, self.n_iter_ = _cd_lifted(
 93 |             self.U_, dataset, y, y_pred, self.beta, loss_obj, self.max_iter,
 94 |             self.tol, self.verbose)
 95 | 
 96 |         if not converged:
 97 |             warnings.warn("Objective did not converge. Increase max_iter.")
 98 | 
 99 |         return self
100 | 
101 |     def _predict(self, X):
102 |         if not hasattr(self, "U_"):
103 |             raise NotFittedError("Estimator not fitted.")
104 | 
105 |         X = check_array(X, accept_sparse='csc', dtype=np.double)
106 |         X = self._augment(X)
107 |         X = get_dataset(X, order='fortran')
108 |         return _lifted_predict(self.U_, X)
109 | 
110 | 
111 | class PolynomialNetworkRegressor(_BasePolynomialNetwork, _PolyRegressorMixin):
112 |     """Polynomial network for regression (with squared loss).
113 | 
114 |     Parameters
115 |     ----------
116 | 
117 |     degree : int >= 2, default: 2
118 |         Degree of the polynomial. Corresponds to the order of feature
119 |         interactions captured by the model. Currently only supports
120 |         degrees up to 3.
121 | 
122 |     n_components : int, default: 2
123 |         Dimension of the lifted tensor.
124 | 
125 |     beta : float, default: 1
126 |         Regularization amount for higher-order weights.
127 | 
128 |     tol : float, default: 1e-6
129 |         Tolerance for the stopping condition.
130 | 
131 |     fit_lower : {'augment'|None}, default: 'augment'
132 |         Whether and how to fit lower-order, non-homogeneous terms.
133 | 
134 |         - 'augment': adds a dummy column (1 everywhere) in order to capture
135 |         lower-order terms (including linear terms).
136 | 
137 |         - None: only learns weights for the degree given.
138 | 
139 |     warm_start : boolean, optional, default: False
140 |         Whether to use the existing solution, if available. Useful for
141 |         computing regularization paths or pre-initializing the model.
142 | 
143 |     max_iter : int, optional, default: 10000
144 |         Maximum number of passes over the dataset to perform.
145 | 
146 |     verbose : boolean, optional, default: False
147 |         Whether to print debugging information.
148 | 
149 |     random_state : int seed, RandomState instance, or None (default)
150 |         The seed of the pseudo random number generator to use for
151 |         initializing the parameters.
152 | 
153 |     Attributes
154 |     ----------
155 | 
156 |     self.U_ : array, shape [n_components, n_features, degree]
157 |         The learned weights in the lifted tensor parametrization.
158 | 
159 |     References
160 |     ----------
161 |     Polynomial Networks and Factorization Machines:
162 |     New Insights and Efficient Training Algorithms.
163 |     Mathieu Blondel, Masakazu Ishihata, Akinori Fujino, Naonori Ueda.
164 |     In: Proceedings of ICML 2016.
165 |     http://mblondel.org/publications/mblondel-icml2016.pdf
166 | 
167 |     On the computational efficiency of training neural networks.
168 |     Roi Livni, Shai Shalev-Shwartz, Ohad Shamir.
169 |     In: Proceedings of NIPS 2014.
170 |     """
171 | 
172 |     def __init__(self, degree=2, n_components=2, beta=1, tol=1e-6,
173 |                  fit_lower='augment', warm_start=False,
174 |                  max_iter=10000, verbose=False, random_state=None):
175 | 
176 |         super(PolynomialNetworkRegressor, self).__init__(
177 |             degree, 'squared', n_components, beta, tol, fit_lower,
178 |             warm_start, max_iter, verbose, random_state)
179 | 
180 | 
181 | class PolynomialNetworkClassifier(_BasePolynomialNetwork,
182 |                                   _PolyClassifierMixin):
183 |     """Polynomial network for classification.
184 | 
185 |     Parameters
186 |     ----------
187 | 
188 |     degree : int >= 2, default: 2
189 |         Degree of the polynomial. Corresponds to the order of feature
190 |         interactions captured by the model. Currently only supports
191 |         degrees up to 3.
192 | 
193 |     loss : {'logistic'|'squared_hinge'|'squared'}, default: 'squared_hinge'
194 |         Which loss function to use.
195 | 
196 |         - logistic: L(y, p) = log(1 + exp(-yp))
197 | 
198 |         - squared hinge: L(y, p) = max(1 - yp, 0)²
199 | 
200 |         - squared: L(y, p) = 0.5 * (y - p)²
201 | 
202 |     n_components : int, default: 2
203 |         Dimension of the lifted tensor.
204 | 
205 |     beta : float, default: 1
206 |         Regularization amount for higher-order weights.
207 | 
208 |     tol : float, default: 1e-6
209 |         Tolerance for the stopping condition.
210 | 
211 |     fit_lower : {'augment'|None}, default: 'augment'
212 |         Whether and how to fit lower-order, non-homogeneous terms.
213 | 
214 |         - 'augment': adds a dummy column (1 everywhere) in order to capture
215 |         lower-order terms (including linear terms).
216 | 
217 |         - None: only learns weights for the degree given.
218 | 
219 |     warm_start : boolean, optional, default: False
220 |         Whether to use the existing solution, if available. Useful for
221 |         computing regularization paths or pre-initializing the model.
222 | 
223 |     max_iter : int, optional, default: 10000
224 |         Maximum number of passes over the dataset to perform.
225 | 
226 |     verbose : boolean, optional, default: False
227 |         Whether to print debugging information.
228 | 
229 |     random_state : int seed, RandomState instance, or None (default)
230 |         The seed of the pseudo random number generator to use for
231 |         initializing the parameters.
232 | 
233 |     Attributes
234 |     ----------
235 | 
236 |     self.U_ : array, shape [n_components, n_features, degree]
237 |         The learned weights in the lifted tensor parametrization.
238 | 
239 |     References
240 |     ----------
241 |     Polynomial Networks and Factorization Machines:
242 |     New Insights and Efficient Training Algorithms.
243 |     Mathieu Blondel, Masakazu Ishihata, Akinori Fujino, Naonori Ueda.
244 |     In: Proceedings of ICML 2016.
245 |     http://mblondel.org/publications/mblondel-icml2016.pdf
246 | 
247 |     On the computational efficiency of training neural networks.
248 |     Roi Livni, Shai Shalev-Shwartz, Ohad Shamir.
249 |     In: Proceedings of NIPS 2014.
250 |     """
251 | 
252 |     def __init__(self, degree=2, loss='squared_hinge', n_components=2, beta=1,
253 |                  tol=1e-6, fit_lower='augment', warm_start=False,
254 |                  max_iter=10000, verbose=False, random_state=None):
255 | 
256 |         super(PolynomialNetworkClassifier, self).__init__(
257 |             degree, loss, n_components, beta, tol, fit_lower,
258 |             warm_start, max_iter, verbose, random_state)
259 | 


--------------------------------------------------------------------------------
/polylearn/setup.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | 
 3 | import numpy
 4 | 
 5 | 
 6 | def configuration(parent_package='', top_path=None):
 7 |     from numpy.distutils.misc_util import Configuration
 8 | 
 9 |     config = Configuration('polylearn', parent_package, top_path)
10 | 
11 |     config.add_extension('loss_fast', sources=['loss_fast.cpp'],
12 |                          include_dirs=[numpy.get_include()])
13 | 
14 |     config.add_extension('cd_direct_fast', sources=['cd_direct_fast.cpp'],
15 |                          include_dirs=[numpy.get_include()])
16 | 
17 |     config.add_extension('cd_linear_fast', sources=['cd_linear_fast.cpp'],
18 |                          include_dirs=[numpy.get_include()])
19 | 
20 |     config.add_extension('cd_lifted_fast', sources=['cd_lifted_fast.cpp'],
21 |                          include_dirs=[numpy.get_include()])
22 | 
23 |     config.add_subpackage('tests')
24 | 
25 |     return config
26 | 
27 | 
28 | if __name__ == '__main__':
29 |     from numpy.distutils.core import setup
30 |     setup(**configuration(top_path='').todict())
31 | 


--------------------------------------------------------------------------------
/polylearn/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/polylearn/4dd9d4b8aca029628a4c934829526b8552db2e1b/polylearn/tests/__init__.py


--------------------------------------------------------------------------------
/polylearn/tests/test_cd_linear.py:
--------------------------------------------------------------------------------
  1 | from nose.tools import assert_less_equal, assert_greater_equal
  2 | from numpy.testing import assert_array_almost_equal
  3 | 
  4 | import numpy as np
  5 | from sklearn.utils.validation import assert_all_finite
  6 | from polylearn.cd_linear_fast import _cd_linear_epoch
  7 | from polylearn.loss_fast import Squared, SquaredHinge, Logistic
  8 | from lightning.impl.dataset_fast import get_dataset
  9 | 
 10 | rng = np.random.RandomState(0)
 11 | X = rng.randn(50, 10)
 12 | w_true = rng.randn(10)
 13 | 
 14 | y = np.dot(X, w_true)
 15 | X_ds = get_dataset(X, order='fortran')
 16 | X_col_norm_sq = (X ** 2).sum(axis=0)
 17 | 
 18 | n_iter = 100
 19 | 
 20 | 
 21 | def _fit_linear(X, y, alpha, n_iter, loss, callback=None):
 22 |     n_samples, n_features = X.shape
 23 |     X_col_norm_sq = (X ** 2).sum(axis=0)
 24 |     X_ds = get_dataset(X, order='fortran')
 25 |     w_init = np.zeros(n_features)
 26 |     y_pred = np.zeros(n_samples)
 27 | 
 28 |     for _ in range(n_iter):
 29 |         viol = _cd_linear_epoch(w_init, X_ds, y, y_pred, X_col_norm_sq,
 30 |                                 alpha, loss)
 31 |         if callback is not None:
 32 |             callback(w_init, viol)
 33 |     return w_init
 34 | 
 35 | 
 36 | class Callback(object):
 37 |     def __init__(self, X, y, alpha):
 38 |         self.X = X
 39 |         self.y = y
 40 |         self.alpha = alpha
 41 | 
 42 |         self.losses_ = []
 43 | 
 44 |     def __call__(self, w, viol):
 45 |         y_pred = np.dot(self.X, w)
 46 |         lv = np.mean((y_pred - self.y) ** 2)
 47 |         lv += 2 * self.alpha * np.sum(w ** 2)
 48 |         self.losses_.append(lv)
 49 | 
 50 | 
 51 | def test_cd_linear_fit():
 52 |     loss = Squared()
 53 |     alpha = 1e-6
 54 |     cb = Callback(X, y, alpha)
 55 |     w = _fit_linear(X, y, alpha, n_iter, loss, cb)
 56 | 
 57 |     assert_array_almost_equal(w_true, w)
 58 |     assert_less_equal(cb.losses_[1], cb.losses_[0])
 59 |     assert_less_equal(cb.losses_[-1], cb.losses_[0])
 60 | 
 61 | 
 62 | def check_cd_linear_clf(loss):
 63 |     alpha = 1e-3
 64 |     y_bin = np.sign(y)
 65 | 
 66 |     w = _fit_linear(X, y_bin, alpha, n_iter, loss)
 67 |     y_pred = np.dot(X, w)
 68 |     accuracy = np.mean(np.sign(y_pred) == y_bin)
 69 | 
 70 |     assert_greater_equal(accuracy, 0.97,
 71 |                          msg="classification loss {}".format(loss))
 72 | 
 73 | 
 74 | def test_cd_linear_clf():
 75 |     for loss in (Squared(), SquaredHinge(), Logistic()):
 76 |         yield check_cd_linear_clf, loss
 77 | 
 78 | 
 79 | def test_cd_linear_offset():
 80 |     loss = Squared()
 81 |     alpha = 1e-3
 82 |     w_a = np.zeros_like(w_true)
 83 |     w_b = np.zeros_like(w_true)
 84 | 
 85 |     n_features = X.shape[0]
 86 |     y_pred_a = np.zeros(n_features)
 87 |     y_pred_b = np.zeros(n_features)
 88 |     y_offset = np.arange(n_features).astype(np.double)
 89 | 
 90 |     # one epoch with offset
 91 |     _cd_linear_epoch(w_a, X_ds, y, y_pred_a + y_offset, X_col_norm_sq, alpha,
 92 |                      loss)
 93 | 
 94 |     # one epoch with shifted target
 95 |     _cd_linear_epoch(w_b, X_ds, y - y_offset, y_pred_b, X_col_norm_sq, alpha,
 96 |                      loss)
 97 | 
 98 |     assert_array_almost_equal(w_a, w_b)
 99 | 
100 | 
101 | def test_cd_linear_trivial():
102 |     # trivial example that failed due to gh#4
103 |     loss = Squared()
104 |     alpha = 1e-5
105 |     n_features = 100
106 |     x = np.zeros((1, n_features))
107 |     x[0, 1] = 1
108 |     y = np.ones(1)
109 |     cb = Callback(x, y, alpha)
110 |     w = _fit_linear(x, y, alpha, n_iter=20, loss=loss, callback=cb)
111 | 
112 |     assert_all_finite(w)
113 |     assert_all_finite(cb.losses_)


--------------------------------------------------------------------------------
/polylearn/tests/test_common.py:
--------------------------------------------------------------------------------
  1 | from nose import SkipTest
  2 | from nose.tools import assert_raises, assert_greater
  3 | from nose.tools import assert_equal
  4 | import numpy as np
  5 | from numpy.testing import assert_array_almost_equal
  6 | from scipy.sparse import csc_matrix
  7 | 
  8 | from polylearn import (PolynomialNetworkClassifier, PolynomialNetworkRegressor,
  9 |                        FactorizationMachineClassifier,
 10 |                        FactorizationMachineRegressor)
 11 | 
 12 | 
 13 | def test_check_estimator():
 14 |     # TODO: classifiers that provide predict_proba but are not multiclass fail
 15 |     # No trivial way to use OneVsRestClassifier even if it actually works.
 16 | 
 17 |     try:
 18 |         from sklearn.utils.estimator_checks import check_estimator
 19 |     except ImportError:
 20 |         raise SkipTest('Common scikit-learn tests not available. '
 21 |                        'You must be running an older version of scikit-learn.')
 22 |     yield check_estimator, PolynomialNetworkRegressor
 23 |     # FM Regressor fails because 5 iter is not enough :(
 24 |     # yield check_estimator, FactorizationMachineRegressor
 25 | 
 26 | 
 27 | X = np.array([[-10, -10], [-10, 10], [10, -10], [10, 10]])
 28 | y = np.array(['true', 'false', 'false', 'true'])
 29 | 
 30 | 
 31 | def check_classify_xor(Clf):
 32 |     """Tests that the factorization machine can solve XOR"""
 33 |     clf = Clf(tol=1e-2, fit_lower=None, random_state=0)
 34 | 
 35 |     # temporary workaround until fit_linear is implemented
 36 |     try:
 37 |         clf.set_params(fit_linear=False)
 38 |     except ValueError:
 39 |         pass
 40 | 
 41 |     assert_equal(clf.fit(X, y).score(X, y), 1.0)
 42 | 
 43 | 
 44 | def test_classify_xor():
 45 |     yield check_classify_xor, PolynomialNetworkClassifier
 46 |     yield check_classify_xor, FactorizationMachineClassifier
 47 | 
 48 | 
 49 | def check_predict_proba(Clf):
 50 |     clf = Clf(loss='logistic', tol=1e-2, random_state=0).fit(X, y)
 51 |     y_proba = clf.predict_proba(X)
 52 |     assert_greater(y_proba[0], y_proba[1])
 53 |     assert_greater(y_proba[3], y_proba[2])
 54 | 
 55 | 
 56 | def test_predict_proba():
 57 |     yield check_predict_proba, FactorizationMachineClassifier
 58 |     yield check_predict_proba, PolynomialNetworkClassifier
 59 | 
 60 | 
 61 | def check_predict_proba_raises(Clf):
 62 |     """Test that predict_proba doesn't work with hinge loss"""
 63 |     pp = Clf(loss='squared_hinge', random_state=0).predict_proba
 64 |     assert_raises(ValueError, pp, X)
 65 | 
 66 | 
 67 | def test_predict_proba_raises():
 68 |     yield check_predict_proba_raises, FactorizationMachineClassifier
 69 |     yield check_predict_proba_raises, PolynomialNetworkClassifier
 70 | 
 71 | 
 72 | def check_loss_raises(Clf):
 73 |     """Test error on unsupported loss"""
 74 |     clf = Clf(loss='hinge', random_state=0)
 75 |     assert_raises(ValueError, clf.fit, X, y)
 76 | 
 77 | 
 78 | def test_loss_raises():
 79 |     yield check_loss_raises, FactorizationMachineClassifier
 80 |     yield check_loss_raises, PolynomialNetworkClassifier
 81 | 
 82 | 
 83 | def check_clf_multiclass_error(Clf):
 84 |     """Test that classifier raises TypeError on multiclass/multilabel y"""
 85 |     y_ = np.column_stack([y, y])
 86 |     clf = Clf(random_state=0)
 87 |     assert_raises(TypeError, clf.fit, X, y_)
 88 | 
 89 | 
 90 | def test_clf_multiclass_error():
 91 |     yield check_clf_multiclass_error, FactorizationMachineClassifier
 92 |     yield check_clf_multiclass_error, PolynomialNetworkClassifier
 93 | 
 94 | 
 95 | def check_clf_float_error(Clf):
 96 |     """Test that classifier raises TypeError on multiclass/multilabel y"""
 97 |     y_ = [0.1, 0.2, 0.3, 0.4]
 98 |     clf = Clf(random_state=0)
 99 |     assert_raises(TypeError, clf.fit, X, y_)
100 | 
101 | 
102 | def test_clf_float_error():
103 |     yield check_clf_float_error, FactorizationMachineClassifier
104 |     yield check_clf_float_error, PolynomialNetworkClassifier
105 | 
106 | 
107 | def check_not_fitted(Est):
108 |     est = Est()
109 |     assert_raises(ValueError, est.predict, X)
110 | 
111 | 
112 | def test_not_fitted():
113 |     yield check_not_fitted, FactorizationMachineClassifier
114 |     yield check_not_fitted, PolynomialNetworkClassifier
115 |     yield check_not_fitted, FactorizationMachineRegressor
116 |     yield check_not_fitted, PolynomialNetworkRegressor
117 | 
118 | 
119 | def test_augment():
120 |     # The following linear separable dataset cannot be modeled with just an FM
121 |     X_evil = np.array([[-1, -1], [1, 1]])
122 |     y_evil = np.array([-1, 1])
123 |     clf = FactorizationMachineClassifier(fit_linear=False, fit_lower=None,
124 |                                          random_state=0)
125 |     clf.fit(X_evil, y_evil)
126 |     assert_equal(0.5, clf.score(X_evil, y_evil))
127 | 
128 |     # However, by adding a dummy feature (a column of all ones), the linear
129 |     # effect can be captured.
130 |     clf = FactorizationMachineClassifier(fit_linear=False, fit_lower='augment',
131 |                                          random_state=0)
132 |     clf.fit(X_evil, y_evil)
133 |     assert_equal(1.0, clf.score(X_evil, y_evil))
134 | 
135 | 
136 | def check_sparse(Clf):
137 |     X_sp = csc_matrix(X)
138 |     # simple y that works for both clf and regression
139 |     y_simple = [0, 1, 0, 1]
140 |     clf = Clf(tol=1e-2, random_state=0)
141 |     assert_array_almost_equal(clf.fit(X, y_simple).predict(X),
142 |                               clf.fit(X_sp, y_simple).predict(X_sp))
143 | 
144 | 
145 | def test_sparse():
146 |     yield check_sparse, FactorizationMachineClassifier
147 |     yield check_sparse, PolynomialNetworkClassifier
148 |     yield check_sparse, FactorizationMachineRegressor
149 |     yield check_sparse, PolynomialNetworkRegressor
150 | 


--------------------------------------------------------------------------------
/polylearn/tests/test_factorization_machine.py:
--------------------------------------------------------------------------------
  1 | # Author: Vlad Niculae <vlad@vene.ro>
  2 | # License: Simplified BSD
  3 | 
  4 | import warnings
  5 | 
  6 | from nose.tools import assert_less_equal, assert_equal
  7 | 
  8 | import numpy as np
  9 | from numpy.testing import assert_array_almost_equal
 10 | 
 11 | from sklearn.metrics import mean_squared_error
 12 | from sklearn.utils.testing import assert_warns_message
 13 | 
 14 | from polylearn.kernels import _poly_predict
 15 | from polylearn import FactorizationMachineRegressor
 16 | from polylearn import FactorizationMachineClassifier
 17 | 
 18 | 
 19 | def cd_direct_slow(X, y, lams=None, degree=2, n_components=5, beta=1.,
 20 |                    n_iter=10, tol=1e-5, verbose=False, random_state=None):
 21 |     from sklearn.utils import check_random_state
 22 |     from polylearn.kernels import anova_kernel
 23 | 
 24 |     n_samples, n_features = X.shape
 25 | 
 26 |     rng = check_random_state(random_state)
 27 |     P = 0.01 * rng.randn(n_components, n_features)
 28 |     if lams is None:
 29 |         lams = np.ones(n_components)
 30 | 
 31 |     K = anova_kernel(X, P, degree=degree)
 32 |     pred = np.dot(lams, K.T)
 33 | 
 34 |     mu = 1  # squared loss
 35 |     converged = False
 36 | 
 37 |     for i in range(n_iter):
 38 |         sum_viol = 0
 39 |         for s in range(n_components):
 40 |             ps = P[s]
 41 |             for j in range(n_features):
 42 | 
 43 |                 # trivial approach:
 44 |                 # multilinearity allows us to isolate the term with ps_j * x_j
 45 |                 x = X[:, j]
 46 |                 notj_mask = np.arange(n_features) != j
 47 |                 X_notj = X[:, notj_mask]
 48 |                 ps_notj = ps[notj_mask]
 49 | 
 50 |                 if degree == 2:
 51 |                     grad_y = lams[s] * x * np.dot(X_notj, ps_notj)
 52 |                 elif degree == 3:
 53 |                     grad_y = lams[s] * x * anova_kernel(np.atleast_2d(ps_notj),
 54 |                                                         X_notj, degree=2)
 55 |                 else:
 56 |                     raise NotImplementedError("Degree > 3 not supported.")
 57 | 
 58 |                 l1_reg = 2 * beta * np.abs(lams[s])
 59 |                 inv_step_size = mu * (grad_y ** 2).sum() + l1_reg
 60 | 
 61 |                 dloss = pred - y  # squared loss
 62 |                 step = (dloss * grad_y).sum() + l1_reg * ps[j]
 63 |                 step /= inv_step_size
 64 | 
 65 |                 P[s, j] -= step
 66 |                 sum_viol += np.abs(step)
 67 | 
 68 |                 # stupidly recompute all predictions. No rush yet.
 69 |                 K = anova_kernel(X, P, degree=degree)
 70 |                 pred = np.dot(lams, K.T)
 71 | 
 72 |         reg_obj = beta * np.sum((P ** 2).sum(axis=1) * np.abs(lams))
 73 | 
 74 |         if verbose:
 75 |             print("Epoch", i, "violations", sum_viol, "obj",
 76 |                   0.5 * ((pred - y) ** 2).sum() + reg_obj)
 77 | 
 78 |         if sum_viol < tol:
 79 |             converged = True
 80 |             break
 81 | 
 82 |     if not converged:
 83 |         warnings.warn("Objective did not converge. Increase max_iter.")
 84 | 
 85 |     return P
 86 | 
 87 | 
 88 | n_components = 5
 89 | n_features = 4
 90 | n_samples = 20
 91 | 
 92 | rng = np.random.RandomState(1)
 93 | 
 94 | X = rng.randn(n_samples, n_features)
 95 | P = rng.randn(n_components, n_features)
 96 | 
 97 | lams = rng.randn(n_components)
 98 | 
 99 | 
100 | def test_augment():
101 |     """Test that augmenting the data increases the dimension as expected"""
102 |     y = _poly_predict(X, P, lams, kernel="anova", degree=3)
103 |     fm = FactorizationMachineRegressor(degree=3, fit_lower='augment',
104 |                                        fit_linear=True, tol=0.1)
105 |     fm.fit(X, y)
106 |     assert_equal(n_features + 1, fm.P_.shape[2],
107 |                  msg="Augmenting is wrong with explicit linear term.")
108 | 
109 |     fm.set_params(fit_linear=False)
110 |     fm.fit(X, y)
111 |     assert_equal(n_features + 2, fm.P_.shape[2],
112 |                  msg="Augmenting is wrong with augmented linear term.")
113 | 
114 | 
115 | def check_fit(degree):
116 |     y = _poly_predict(X, P, lams, kernel="anova", degree=degree)
117 | 
118 |     est = FactorizationMachineRegressor(degree=degree, n_components=5,
119 |                                         fit_linear=False, fit_lower=None,
120 |                                         max_iter=15000, beta=1e-6, tol=1e-3,
121 |                                         random_state=0)
122 |     est.fit(X, y)
123 |     y_pred = est.predict(X)
124 |     err = mean_squared_error(y, y_pred)
125 | 
126 |     assert_less_equal(
127 |         err,
128 |         1e-6,
129 |         msg="Error {} too big for degree {}.".format(err, degree))
130 | 
131 | 
132 | def test_fit():
133 |     yield check_fit, 2
134 |     yield check_fit, 3
135 | 
136 | 
137 | def check_improve(degree):
138 |     y = _poly_predict(X, P, lams, kernel="anova", degree=degree)
139 | 
140 |     est = FactorizationMachineRegressor(degree=degree, n_components=5,
141 |                                         fit_lower=None, fit_linear=False,
142 |                                         beta=0.0001, max_iter=5, tol=0,
143 |                                         random_state=0)
144 |     with warnings.catch_warnings():
145 |         warnings.simplefilter("ignore")
146 |         y_pred_5 = est.fit(X, y).predict(X)
147 |         est.set_params(max_iter=10)
148 |         y_pred_10 = est.fit(X, y).predict(X)
149 | 
150 |     assert_less_equal(mean_squared_error(y, y_pred_10),
151 |                       mean_squared_error(y, y_pred_5),
152 |                       msg="More iterations do not improve fit.")
153 | 
154 | 
155 | def test_improve():
156 |     yield check_improve, 2
157 |     yield check_improve, 3
158 | 
159 | 
160 | def check_overfit(degree):
161 |     noisy_y = _poly_predict(X, P, lams, kernel="anova", degree=degree)
162 |     noisy_y += 5. * rng.randn(noisy_y.shape[0])
163 |     X_train, X_test = X[:10], X[10:]
164 |     y_train, y_test = noisy_y[:10], noisy_y[10:]
165 | 
166 |     # weak regularization, should overfit
167 |     est = FactorizationMachineRegressor(degree=degree, n_components=5,
168 |                                         fit_linear=False, fit_lower=None,
169 |                                         beta=1e-4, tol=0.01, random_state=0)
170 |     y_train_pred_weak = est.fit(X_train, y_train).predict(X_train)
171 |     y_test_pred_weak = est.predict(X_test)
172 | 
173 |     est.set_params(beta=10)  # high value of beta -> strong regularization
174 |     y_train_pred_strong = est.fit(X_train, y_train).predict(X_train)
175 |     y_test_pred_strong = est.predict(X_test)
176 | 
177 |     assert_less_equal(mean_squared_error(y_train, y_train_pred_weak),
178 |                       mean_squared_error(y_train, y_train_pred_strong),
179 |                       msg="Training error does not get worse with regul.")
180 | 
181 |     assert_less_equal(mean_squared_error(y_test, y_test_pred_strong),
182 |                       mean_squared_error(y_test, y_test_pred_weak),
183 |                       msg="Test error does not get better with regul.")
184 | 
185 | 
186 | def test_overfit():
187 |     yield check_overfit, 2
188 |     yield check_overfit, 3
189 | 
190 | 
191 | def test_convergence_warning():
192 |     y = _poly_predict(X, P, lams, kernel="anova", degree=3)
193 | 
194 |     est = FactorizationMachineRegressor(degree=3, beta=1e-8, max_iter=1,
195 |                                         random_state=0)
196 |     assert_warns_message(UserWarning, "converge", est.fit, X, y)
197 | 
198 | 
199 | def test_random_starts():
200 |     noisy_y = _poly_predict(X, P, lams, kernel="anova", degree=2)
201 |     noisy_y += 5. * rng.randn(noisy_y.shape[0])
202 |     X_train, X_test = X[:10], X[10:]
203 |     y_train, y_test = noisy_y[:10], noisy_y[10:]
204 | 
205 |     scores = []
206 |     # init_lambdas='ones' is important to reduce variance here
207 |     reg = FactorizationMachineRegressor(degree=2, n_components=n_components,
208 |                                         beta=5, fit_lower=None,
209 |                                         fit_linear=False, max_iter=2000,
210 |                                         init_lambdas='ones', tol=0.001)
211 |     for k in range(10):
212 |         reg.set_params(random_state=k)
213 |         y_pred = reg.fit(X_train, y_train).predict(X_test)
214 |         scores.append(mean_squared_error(y_test, y_pred))
215 | 
216 |     assert_less_equal(np.std(scores), 0.001)
217 | 
218 | 
219 | def check_same_as_slow(degree):
220 |     y = _poly_predict(X, P, lams, kernel="anova", degree=degree)
221 | 
222 |     reg = FactorizationMachineRegressor(degree=degree, n_components=5,
223 |                                         fit_lower=None, fit_linear=False,
224 |                                         beta=1, warm_start=False, tol=1e-3,
225 |                                         max_iter=5, random_state=0)
226 | 
227 |     with warnings.catch_warnings():
228 |         warnings.simplefilter('ignore')
229 |         reg.fit(X, y)
230 | 
231 |         P_fit_slow = cd_direct_slow(X, y, lams=reg.lams_, degree=degree,
232 |                                     n_components=5, beta=1, n_iter=5,
233 |                                     tol=1e-3, random_state=0)
234 | 
235 |     assert_array_almost_equal(reg.P_[0, :, :], P_fit_slow, decimal=4)
236 | 
237 | 
238 | def test_same_as_slow():
239 |     yield check_same_as_slow, 2
240 |     yield check_same_as_slow, 3
241 | 
242 | 
243 | def check_classification_losses(loss, degree):
244 |     y = np.sign(_poly_predict(X, P, lams, kernel="anova", degree=degree))
245 |     clf = FactorizationMachineClassifier(degree=degree, loss=loss, beta=1e-3,
246 |                                          fit_lower=None, fit_linear=False,
247 |                                          tol=1e-3, random_state=0)
248 |     clf.fit(X, y)
249 |     assert_equal(1.0, clf.score(X, y))
250 | 
251 | 
252 | def test_classification_losses():
253 |     for loss in ('squared_hinge', 'logistic'):
254 |         for degree in (2, 3):
255 |             yield check_classification_losses, loss, degree
256 | 
257 | 
258 | def check_warm_start(degree):
259 |     y = _poly_predict(X, P, lams, kernel="anova", degree=degree)
260 |     # Result should be the same if:
261 |     # (a) running 10 iterations
262 |     clf_10 = FactorizationMachineRegressor(degree=degree, n_components=5,
263 |                                            fit_lower=None, fit_linear=False,
264 |                                            max_iter=10, warm_start=False,
265 |                                            random_state=0)
266 |     with warnings.catch_warnings():
267 |         warnings.simplefilter("ignore")
268 |         clf_10.fit(X, y)
269 | 
270 |     # (b) running 5 iterations and 5 more
271 |     clf_5_5 = FactorizationMachineRegressor(degree=degree, n_components=5,
272 |                                             fit_lower=None, fit_linear=False,
273 |                                             max_iter=5, warm_start=True,
274 |                                             random_state=0)
275 |     with warnings.catch_warnings():
276 |         warnings.simplefilter("ignore")
277 |         clf_5_5.fit(X, y)
278 |         P_fit = clf_5_5.P_.copy()
279 |         lams_fit = clf_5_5.lams_.copy()
280 |         clf_5_5.fit(X, y)
281 | 
282 |     # (c) running 5 iterations when starting from previous point.
283 |     clf_5 = FactorizationMachineRegressor(degree=degree, n_components=5,
284 |                                           fit_lower=None, fit_linear=False,
285 |                                           max_iter=5, warm_start=True,
286 |                                           random_state=0)
287 |     clf_5.P_ = P_fit
288 |     clf_5.lams_ = lams_fit
289 |     with warnings.catch_warnings():
290 |         warnings.simplefilter("ignore")
291 |         clf_5.fit(X, y)
292 | 
293 |     assert_array_almost_equal(clf_10.P_, clf_5_5.P_)
294 |     assert_array_almost_equal(clf_10.P_, clf_5.P_)
295 | 
296 |     # Prediction results should also be the same if:
297 |     # (note: could not get this test to work for the exact P_.)
298 | 
299 |     noisy_y = _poly_predict(X, P, lams, kernel="anova", degree=2)
300 |     noisy_y += rng.randn(noisy_y.shape[0])
301 |     X_train, X_test = X[:10], X[10:]
302 |     y_train, y_test = noisy_y[:10], noisy_y[10:]
303 | 
304 |     beta_low = 0.5
305 |     beta = 0.1
306 |     beta_hi = 1
307 |     ref = FactorizationMachineRegressor(degree=degree, n_components=5,
308 |                                         fit_linear=False, fit_lower=None,
309 |                                         beta=beta, max_iter=20000,
310 |                                         random_state=0)
311 |     ref.fit(X_train, y_train)
312 |     y_pred_ref = ref.predict(X_test)
313 | 
314 |     # (a) starting from lower beta, increasing and refitting
315 |     from_low = FactorizationMachineRegressor(degree=degree, n_components=5,
316 |                                              fit_lower=None, fit_linear=False,
317 |                                              beta=beta_low, warm_start=True,
318 |                                              random_state=0)
319 |     from_low.fit(X_train, y_train)
320 |     from_low.set_params(beta=beta)
321 |     from_low.fit(X_train, y_train)
322 |     y_pred_low = from_low.predict(X_test)
323 | 
324 |     # (b) starting from higher beta, decreasing and refitting
325 |     from_hi = FactorizationMachineRegressor(degree=degree, n_components=5,
326 |                                             fit_lower=None, fit_linear=False,
327 |                                             beta=beta_hi, warm_start=True,
328 |                                             random_state=0)
329 |     from_hi.fit(X_train, y_train)
330 |     from_hi.set_params(beta=beta)
331 |     from_hi.fit(X_train, y_train)
332 |     y_pred_hi = from_hi.predict(X_test)
333 | 
334 |     assert_array_almost_equal(y_pred_low, y_pred_ref, decimal=4)
335 |     assert_array_almost_equal(y_pred_hi, y_pred_ref, decimal=4)
336 | 
337 | 
338 | def test_warm_start():
339 |     yield check_warm_start, 2
340 |     yield check_warm_start, 3
341 | 


--------------------------------------------------------------------------------
/polylearn/tests/test_kernels.py:
--------------------------------------------------------------------------------
  1 | # Author: Vlad Niculae <vlad@vene.ro>
  2 | # License: Simplified BSD
  3 | 
  4 | from itertools import product, combinations
  5 | from functools import reduce
  6 | from nose.tools import assert_true, assert_raises
  7 | 
  8 | import numpy as np
  9 | from numpy.testing import assert_array_almost_equal
 10 | from scipy import sparse as sp
 11 | 
 12 | from polylearn.kernels import homogeneous_kernel, anova_kernel, safe_power
 13 | from polylearn.kernels import _poly_predict
 14 | 
 15 | 
 16 | def _product(x):
 17 |     return reduce(lambda a, b: a * b, x, 1)
 18 | 
 19 | 
 20 | def _power_iter(x, degree):
 21 |     return product(*([x] * degree))
 22 | 
 23 | 
 24 | def dumb_homogeneous(x, p, degree=2):
 25 |     return sum(_product(x[k] * p[k] for k in ix)
 26 |                for ix in _power_iter(range(len(x)), degree))
 27 | 
 28 | 
 29 | def dumb_anova(x, p, degree=2):
 30 |     return sum(_product(x[k] * p[k] for k in ix)
 31 |                for ix in combinations(range(len(x)), degree))
 32 | 
 33 | 
 34 | n_samples = 5
 35 | n_bases = 4
 36 | n_features = 7
 37 | rng = np.random.RandomState(0)
 38 | X = rng.randn(n_samples, n_features)
 39 | P = rng.randn(n_bases, n_features)
 40 | lams = np.array([2, 1, -1, 3])
 41 | 
 42 | 
 43 | def test_homogeneous():
 44 |     for m in range(1, 5):
 45 |         expected = np.zeros((n_samples, n_bases))
 46 |         for i in range(n_samples):
 47 |             for j in range(n_bases):
 48 |                 expected[i, j] = dumb_homogeneous(X[i], P[j], degree=m)
 49 |         got = homogeneous_kernel(X, P, degree=m)
 50 |         assert_array_almost_equal(got, expected, err_msg=(
 51 |             "Homogeneous kernel incorrect for degree {}".format(m)))
 52 | 
 53 | 
 54 | def test_anova():
 55 |     for m in (2, 3):
 56 |         expected = np.zeros((n_samples, n_bases))
 57 |         for i in range(n_samples):
 58 |             for j in range(n_bases):
 59 |                 expected[i, j] = dumb_anova(X[i], P[j], degree=m)
 60 |         got = anova_kernel(X, P, degree=m)
 61 |         assert_array_almost_equal(got, expected, err_msg=(
 62 |             "ANOVA kernel incorrect for degree {}".format(m)))
 63 | 
 64 | 
 65 | def test_anova_ignore_diag_equivalence():
 66 |     # predicting using anova kernel
 67 |     K = 2 * anova_kernel(X, P, degree=2)
 68 |     y_pred = np.dot(K, lams)
 69 | 
 70 |     # explicit
 71 |     Z = np.dot(P.T, (lams[:, np.newaxis] * P))
 72 |     y_manual = np.zeros_like(y_pred)
 73 |     for i in range(n_samples):
 74 |         x = X[i].ravel()
 75 |         xx = np.outer(x, x) - np.diag(x ** 2)
 76 |         y_manual[i] = np.trace(np.dot(Z.T, xx))
 77 | 
 78 |     assert_array_almost_equal(y_pred, y_manual)
 79 | 
 80 | 
 81 | def test_safe_power_sparse():
 82 |     # TODO maybe move to a util module or something
 83 |     # scikit-learn has safe_sqr but not general power
 84 | 
 85 |     X_quad = X ** 4
 86 |     # assert X stays sparse
 87 |     X_sp = sp.csr_matrix(X)
 88 |     for sp_format in ('csr', 'csc', 'coo'):  # not working with lil for now
 89 |         X_sp = X_sp.asformat(sp_format)
 90 |         X_sp_quad = safe_power(X_sp, degree=4)
 91 |         assert_true(sp.issparse(X_sp_quad),
 92 |                     msg="safe_power breaks {} sparsity".format(sp_format))
 93 |         assert_array_almost_equal(X_quad,
 94 |                                   X_sp_quad.A,
 95 |                                   err_msg="safe_power differs for {} and "
 96 |                                           "dense".format(sp_format))
 97 | 
 98 | 
 99 | def test_anova_sparse():
100 |     X_sp = sp.csr_matrix(X)
101 |     for m in (2, 3):
102 |         dense = anova_kernel(X, P, degree=m)
103 |         sparse = anova_kernel(X_sp, P, degree=m)
104 |         assert_array_almost_equal(dense, sparse, err_msg=(
105 |             "ANOVA kernel sparse != dense for degree {}".format(m)))
106 | 
107 | 
108 | def test_predict():
109 |     # predict with homogeneous kernel
110 |     y_pred_poly = _poly_predict(X, P, lams, kernel='poly', degree=3)
111 |     K = homogeneous_kernel(X, P, degree=3)
112 |     y_pred = np.dot(K, lams)
113 |     assert_array_almost_equal(y_pred_poly, y_pred,
114 |                               err_msg="Homogeneous prediction incorrect.")
115 | 
116 |     # predict with homogeneous kernel
117 |     y_pred_poly = _poly_predict(X, P, lams, kernel='anova', degree=3)
118 |     K = anova_kernel(X, P, degree=3)
119 |     y_pred = np.dot(K, lams)
120 |     assert_array_almost_equal(y_pred_poly, y_pred,
121 |                               err_msg="ANOVA prediction incorrect.")
122 | 
123 | 
124 | def test_unsupported_degree():
125 |     assert_raises(NotImplementedError, anova_kernel, X, P, degree=4)
126 | 
127 | 
128 | def test_unsupported_kernel():
129 |     assert_raises(ValueError, _poly_predict, X, P, lams, kernel='rbf')
130 | 


--------------------------------------------------------------------------------
/polylearn/tests/test_polynomial_network.py:
--------------------------------------------------------------------------------
  1 | # Author: Vlad Niculae <vlad@vene.ro>
  2 | # License: Simplified BSD
  3 | 
  4 | import warnings
  5 | 
  6 | from nose.tools import assert_less_equal, assert_equal
  7 | 
  8 | import numpy as np
  9 | from numpy.testing import assert_array_almost_equal
 10 | from sklearn.metrics import mean_squared_error
 11 | from sklearn.utils.testing import assert_warns_message
 12 | from sklearn.utils.extmath import fast_dot
 13 | 
 14 | from lightning.impl.dataset_fast import get_dataset
 15 | 
 16 | from polylearn import PolynomialNetworkClassifier, PolynomialNetworkRegressor
 17 | from polylearn.polynomial_network import _lifted_predict as _ds_lifted_predict
 18 | 
 19 | 
 20 | # to shave off some test seconds, since the data is tiny, we can use this.
 21 | def _lifted_predict(U, X):
 22 |     return np.product(fast_dot(U, X.T), axis=0).sum(axis=0)
 23 | 
 24 | max_degree = 5
 25 | n_components = 3
 26 | n_features = 7
 27 | n_samples = 10
 28 | 
 29 | rng = np.random.RandomState(1)
 30 | U = rng.randn(max_degree, n_components, n_features)
 31 | X = rng.randn(n_samples, n_features)
 32 | 
 33 | 
 34 | def cd_lifted_slow(X, y, degree=2, n_components=5, beta=1., n_iter=10000,
 35 |                    tol=1e-5, verbose=False, random_state=None):
 36 |     from sklearn.utils import check_random_state
 37 | 
 38 |     n_samples, n_features = X.shape
 39 |     rng = check_random_state(random_state)
 40 |     U = 0.01 * rng.randn(degree, n_components, n_features)
 41 | 
 42 |     # homogeneous kernel
 43 |     pred = np.product(np.dot(U, X.T), axis=0).sum(axis=0)
 44 | 
 45 |     mu = 1  # squared loss
 46 |     converged = False
 47 | 
 48 |     for i in range(n_iter):
 49 |         sum_viol = 0
 50 |         for t in range(degree):
 51 |             deg_idx = np.zeros(degree, dtype=np.bool)
 52 |             deg_idx[t] = True
 53 |             for s in range(n_components):
 54 |                 xi = np.product(np.dot(U[~deg_idx, s, :], X.T), axis=0)
 55 |                 for j in range(n_features):
 56 |                     x = X[:, j]
 57 | 
 58 |                     inv_step_size = mu * (xi ** 2 * x ** 2).sum()
 59 |                     inv_step_size += beta
 60 | 
 61 |                     dloss = pred - y  # squared loss
 62 |                     step = (xi * x * dloss).sum()
 63 |                     step += beta * U[t, s, j]
 64 |                     step /= inv_step_size
 65 | 
 66 |                     U[t, s, j] -= step
 67 |                     sum_viol += np.abs(step)
 68 | 
 69 |                     # dumb synchronize
 70 |                     pred = np.product(np.dot(U, X.T), axis=0).sum(axis=0)
 71 |                     xi = np.product(np.dot(U[~deg_idx, s, :], X.T), axis=0)
 72 |         nrm = np.sum(U.ravel() ** 2)
 73 |         if verbose:
 74 |             print("Epoch", i, "violations", sum_viol, "loss",
 75 |                   0.5 * (np.sum((y - pred) ** 2) + beta * nrm))
 76 | 
 77 |         if sum_viol < tol:
 78 |             converged = True
 79 |             break
 80 | 
 81 |     if not converged:
 82 |         warnings.warn("Objective did not converge. Increase max_iter.")
 83 | 
 84 |     return U
 85 | 
 86 | 
 87 | def test_lifted_predict():
 88 |     y_ref = _lifted_predict(U, X)
 89 |     ds = get_dataset(X, order='fortran')
 90 |     y = _ds_lifted_predict(U, ds)
 91 |     assert_array_almost_equal(y_ref, y)
 92 | 
 93 | 
 94 | def check_fit(degree):
 95 |     y = _lifted_predict(U[:degree], X)
 96 | 
 97 |     est = PolynomialNetworkRegressor(degree=degree, n_components=n_components,
 98 |                                      max_iter=50000, beta=0.001, tol=1e-2,
 99 |                                      random_state=0)
100 |     y_pred = est.fit(X, y).predict(X)
101 |     assert_less_equal(mean_squared_error(y, y_pred), 1e-4,
102 |                       msg="Cannot learn degree {} function.".format(degree))
103 | 
104 | 
105 | def test_fit():
106 |     for degree in range(2, max_degree + 1):
107 |         yield check_fit, degree
108 | 
109 | 
110 | def check_improve(degree):
111 |     y = _lifted_predict(U[:degree], X)
112 | 
113 |     common_settings = dict(degree=degree, n_components=n_components,
114 |                            beta=1e-10, tol=0, random_state=0)
115 | 
116 |     est_5 = PolynomialNetworkRegressor(max_iter=5, **common_settings)
117 |     est_10 = PolynomialNetworkRegressor(max_iter=10, **common_settings)
118 | 
119 |     with warnings.catch_warnings():
120 |         warnings.simplefilter("ignore")
121 |         est_5.fit(X, y)
122 |         est_10.fit(X, y)
123 | 
124 |     y_pred_5 = est_5.predict(X)
125 |     y_pred_10 = est_10.predict(X)
126 | 
127 |     assert_less_equal(mean_squared_error(y, y_pred_10),
128 |                       mean_squared_error(y, y_pred_5),
129 |                       msg="More iterations do not improve fit.")
130 | 
131 | 
132 | def test_improve():
133 |     for degree in range(2, max_degree + 1):
134 |         yield check_improve, degree
135 | 
136 | 
137 | def test_convergence_warning():
138 |     degree = 4
139 |     y = _lifted_predict(U[:degree], X)
140 | 
141 |     est = PolynomialNetworkRegressor(degree=degree, n_components=n_components,
142 |                                      beta=1e-10, max_iter=1, tol=1e-5,
143 |                                      random_state=0)
144 |     assert_warns_message(UserWarning, "converge", est.fit, X, y)
145 | 
146 | 
147 | def test_random_starts():
148 |     # not as strong a test as the direct case!
149 |     # using training error here, and a higher threshold.
150 |     # We observe the lifted solver reaches rather diff. solutions.
151 |     degree = 3
152 |     noisy_y = _lifted_predict(U[:degree], X)
153 |     noisy_y += 5. * rng.randn(noisy_y.shape[0])
154 | 
155 |     common_settings = dict(degree=degree, n_components=n_components,
156 |                            beta=0.01, tol=0.01)
157 |     scores = []
158 |     for k in range(5):
159 |         est = PolynomialNetworkRegressor(random_state=k, **common_settings)
160 |         y_pred = est.fit(X, noisy_y).predict(X)
161 |         scores.append(mean_squared_error(noisy_y, y_pred))
162 | 
163 |     assert_less_equal(np.std(scores), 1e-4)
164 | 
165 | 
166 | def check_same_as_slow(degree):
167 |     y = _lifted_predict(U[:degree], X)
168 |     reg = PolynomialNetworkRegressor(degree=degree, n_components=n_components,
169 |                                      fit_lower=None, beta=1, max_iter=5,
170 |                                      random_state=0)
171 | 
172 |     with warnings.catch_warnings():
173 |         warnings.simplefilter("ignore")
174 |         reg.fit(X, y)
175 | 
176 |         U_fit_slow = cd_lifted_slow(X, y, degree=degree,
177 |                                     n_components=n_components, beta=1,
178 |                                     random_state=0, n_iter=5)
179 | 
180 |     assert_array_almost_equal(reg.U_, U_fit_slow)
181 | 
182 | 
183 | def test_same_as_slow():
184 |     for degree in range(2, max_degree + 1):
185 |         yield check_same_as_slow, degree
186 | 
187 | 
188 | def check_classification_losses(loss, degree):
189 |     y = np.sign(_lifted_predict(U[:degree], X))
190 | 
191 |     clf = PolynomialNetworkClassifier(degree=degree, n_components=n_components,
192 |                                       loss=loss, beta=1e-4, tol=1e-2,
193 |                                       random_state=0)
194 |     clf.fit(X, y)
195 |     assert_equal(1.0, clf.score(X, y))
196 | 
197 | 
198 | def test_classification_losses():
199 |     for loss in ('squared_hinge', 'logistic'):
200 |         for degree in range(2, max_degree + 1):
201 |             yield check_classification_losses, loss, degree
202 | 
203 | 
204 | def check_warm_start(degree):
205 |     y = np.sign(_lifted_predict(U[:degree], X))
206 |     # Result should be the same if:
207 |     # (a) running 10 iterations
208 | 
209 |     common_settings = dict(fit_lower=None, degree=degree, n_components=2,
210 |                            random_state=0)
211 |     clf_10 = PolynomialNetworkRegressor(max_iter=10, warm_start=False,
212 |                                         **common_settings)
213 |     with warnings.catch_warnings():
214 |         warnings.simplefilter("ignore")
215 |         clf_10.fit(X, y)
216 | 
217 |     # (b) running 5 iterations and 5 more
218 |     clf_5_5 = PolynomialNetworkRegressor(max_iter=5, warm_start=True,
219 |                                          **common_settings)
220 |     with warnings.catch_warnings():
221 |         warnings.simplefilter("ignore")
222 |         clf_5_5.fit(X, y)
223 |         U_fit = clf_5_5.U_.copy()
224 |         clf_5_5.fit(X, y)
225 | 
226 |     # (c) running 5 iterations when starting from previous point.
227 |     clf_5 = PolynomialNetworkRegressor(max_iter=5, warm_start=True,
228 |                                        **common_settings)
229 |     clf_5.U_ = U_fit
230 |     with warnings.catch_warnings():
231 |         warnings.simplefilter("ignore")
232 |         clf_5.fit(X, y)
233 | 
234 |     assert_array_almost_equal(clf_10.U_, clf_5_5.U_)
235 |     assert_array_almost_equal(clf_10.U_, clf_5.U_)
236 | 
237 |     # Prediction results should also be the same if:
238 |     # (note: could not get this test to work for the exact P_.)
239 |     # This test is very flimsy!
240 | 
241 |     y = np.sign(_lifted_predict(U[:degree], X))
242 | 
243 |     beta_low = 0.51
244 |     beta = 0.5
245 |     beta_hi = 0.49
246 | 
247 |     common_settings = dict(degree=degree, n_components=n_components,
248 |                            tol=1e-3, random_state=0)
249 |     ref = PolynomialNetworkRegressor(beta=beta, **common_settings)
250 |     ref.fit(X, y)
251 |     y_pred_ref = ref.predict(X)
252 | 
253 |     # # (a) starting from lower beta, increasing and refitting
254 |     from_low = PolynomialNetworkRegressor(beta=beta_low, warm_start=True,
255 |                                           **common_settings)
256 |     from_low.fit(X, y)
257 |     from_low.set_params(beta=beta)
258 |     from_low.fit(X, y)
259 |     y_pred_low = from_low.predict(X)
260 | 
261 |     # (b) starting from higher beta, decreasing and refitting
262 |     from_hi = PolynomialNetworkRegressor(beta=beta_hi, warm_start=True,
263 |                                          **common_settings)
264 |     from_hi.fit(X, y)
265 |     from_hi.set_params(beta=beta)
266 |     from_hi.fit(X, y)
267 |     y_pred_hi = from_hi.predict(X)
268 | 
269 |     decimal = 3
270 |     assert_array_almost_equal(y_pred_low, y_pred_ref, decimal=decimal)
271 |     assert_array_almost_equal(y_pred_hi, y_pred_ref, decimal=decimal)
272 | 
273 | 
274 | def test_warm_start():
275 |     for degree in range(2, max_degree + 1):
276 |         yield check_warm_start, degree
277 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.rst


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os.path
 3 | import sys
 4 | import setuptools
 5 | from numpy.distutils.core import setup
 6 | 
 7 | 
 8 | try:
 9 |     import numpy
10 | except ImportError:
11 |     print('numpy is required during installation')
12 |     sys.exit(1)
13 | 
14 | 
15 | DISTNAME = 'polylearn'
16 | DESCRIPTION = ("Factorization machines and polynomial networks "
17 |                "for classification and regression in Python.")
18 | LONG_DESCRIPTION = open('README.rst').read()
19 | MAINTAINER = 'Vlad Niculae'
20 | MAINTAINER_EMAIL = 'vlad@vene.ro'
21 | URL = 'https://contrib.scikit-learn.org/polylearn'
22 | LICENSE = 'Simplified BSD'
23 | DOWNLOAD_URL = 'https://github.com/scikit-learn-contrib/polylearn'
24 | VERSION = '0.1.dev0'
25 | 
26 | 
27 | def configuration(parent_package='', top_path=None):
28 |     from numpy.distutils.misc_util import Configuration
29 | 
30 |     config = Configuration(None, parent_package, top_path)
31 | 
32 |     config.add_subpackage('polylearn')
33 | 
34 |     return config
35 | 
36 | 
37 | if __name__ == '__main__':
38 |     old_path = os.getcwd()
39 |     local_path = os.path.dirname(os.path.abspath(sys.argv[0]))
40 | 
41 |     os.chdir(local_path)
42 |     sys.path.insert(0, local_path)
43 | 
44 |     setup(configuration=configuration,
45 |           name=DISTNAME,
46 |           maintainer=MAINTAINER,
47 |           include_package_data=True,
48 |           install_requires=[
49 |               'six',
50 |               'scikit-learn'
51 |           ],
52 |           maintainer_email=MAINTAINER_EMAIL,
53 |           description=DESCRIPTION,
54 |           license=LICENSE,
55 |           url=URL,
56 |           version=VERSION,
57 |           download_url=DOWNLOAD_URL,
58 |           long_description=LONG_DESCRIPTION,
59 |           zip_safe=False,  # the package can run out of an .egg file
60 |           classifiers=[
61 |               'Intended Audience :: Science/Research',
62 |               'Intended Audience :: Developers', 'License :: OSI Approved',
63 |               'Programming Language :: C', 'Programming Language :: Python',
64 |               'Topic :: Software Development',
65 |               'Topic :: Scientific/Engineering',
66 |               'Operating System :: Microsoft :: Windows',
67 |               'Operating System :: POSIX', 'Operating System :: Unix',
68 |               'Operating System :: MacOS'
69 |              ]
70 |           )


--------------------------------------------------------------------------------