├── .gitignore ├── .nojekyll ├── .travis.yml ├── LICENSE ├── Makefile ├── README.rst ├── appveyor.yml ├── benchmarks ├── bench_20newsgroups.py └── bench_other_libs.py ├── ci_scripts ├── appveyor │ ├── install.ps1 │ └── run_with_env.cmd ├── install.sh ├── push_doc.sh ├── success.sh └── test.sh ├── circle.yml ├── doc ├── Makefile ├── _templates │ ├── class.rst │ ├── function.rst │ └── layout.html ├── conf.py ├── index.rst ├── make.bat ├── references.rst └── sphinxext │ ├── LICENSE.txt │ ├── MANIFEST.in │ ├── README.txt │ ├── gen_rst.py │ └── numpy_ext │ ├── __init__.py │ ├── docscrape.py │ ├── docscrape_sphinx.py │ └── numpydoc.py ├── examples ├── README.txt ├── plot_regularization_path.py └── plot_xor.py ├── polylearn ├── __init__.py ├── base.py ├── cd_direct_fast.cpp ├── cd_direct_fast.pyx ├── cd_lifted_fast.cpp ├── cd_lifted_fast.pyx ├── cd_linear_fast.cpp ├── cd_linear_fast.pxd ├── cd_linear_fast.pyx ├── factorization_machine.py ├── kernels.py ├── loss.py ├── loss_fast.cpp ├── loss_fast.pxd ├── loss_fast.pyx ├── polynomial_network.py ├── setup.py └── tests │ ├── __init__.py │ ├── test_cd_linear.py │ ├── test_common.py │ ├── test_factorization_machine.py │ ├── test_kernels.py │ └── test_polynomial_network.py ├── setup.cfg └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask instance folder 57 | instance/ 58 | 59 | # Scrapy stuff: 60 | .scrapy 61 | 62 | # Sphinx documentation 63 | docs/_build/ 64 | 65 | # PyBuilder 66 | target/ 67 | 68 | # IPython Notebook 69 | .ipynb_checkpoints 70 | 71 | # pyenv 72 | .python-version 73 | 74 | # dotenv 75 | .env 76 | 77 | # ide 78 | .idea 79 | 80 | 81 | doc/_build/ 82 | doc/generated/ 83 | doc/auto_examples/ 84 | doc/modules/generated/ 85 | doc/datasets/generated/ 86 | .coverage 87 | coverage 88 | tags 89 | coverages.zip 90 | samples.zip 91 | doc/coverages.zip 92 | doc/samples.zip 93 | coverages 94 | samples 95 | doc/coverages 96 | doc/samples -------------------------------------------------------------------------------- /.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/polylearn/4dd9d4b8aca029628a4c934829526b8552db2e1b/.nojekyll -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | cache: 4 | apt: true 5 | # We use three different cache directory 6 | # to work around a Travis bug with multi-platform cache 7 | directories: 8 | - $HOME/.cache/pip 9 | - $HOME/download 10 | env: 11 | global: 12 | # Directory where tests are run from 13 | - TEST_DIR=/tmp/test_dir/ 14 | - MODULE=polylearn 15 | matrix: 16 | - DISTRIB="conda" PYTHON_VERSION="2.7" 17 | NUMPY_VERSION="1.7.1" SCIPY_VERSION="0.12.0" CYTHON_VERSION="0.21" 18 | SKLEARN_VERSION="0.16.1" 19 | - DISTRIB="conda" PYTHON_VERSION="3.5" COVERAGE="true" 20 | NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.23.4" 21 | SKLEARN_VERSION="0.17.1" 22 | 23 | install: source ci_scripts/install.sh 24 | script: bash ci_scripts/test.sh 25 | after_success: source ci_scripts/success.sh 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Vlad Niculae 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation and/or 12 | other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 18 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 | NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 20 | OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 21 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 22 | OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 23 | THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PYTHON ?= python 2 | CYTHON ?= cython 3 | NOSETESTS ?= nosetests 4 | 5 | # Compilation... 6 | 7 | CYTHONSRC= $(wildcard polylearn/*.pyx) 8 | CSRC= $(CYTHONSRC:.pyx=.cpp) 9 | 10 | inplace: 11 | $(PYTHON) setup.py build_ext -i 12 | 13 | all: cython inplace 14 | 15 | cython: $(CSRC) 16 | 17 | clean: 18 | rm -f polylearn/*.c polylearn/*.cpp polylearn/*.html 19 | rm -f `find polylearn -name "*.pyc"` 20 | rm -f `find polylearn -name "*.so"` 21 | 22 | %.cpp: %.pyx 23 | $(CYTHON) --cplus $< 24 | 25 | # Tests... 26 | # 27 | test-code: inplace 28 | $(NOSETESTS) -s polylearn 29 | 30 | test-coverage: 31 | $(NOSETESTS) -s --with-coverage --cover-html --cover-html-dir=coverage \ 32 | --cover-package=polylearn polylearn 33 | 34 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. -*- mode: rst -*- 2 | 3 | polylearn 4 | ========= 5 | 6 | A library for **factorization machines** and **polynomial networks** 7 | for classification and regression in Python. 8 | 9 | `Github repository. `_ 10 | 11 | .. image:: https://travis-ci.org/scikit-learn-contrib/polylearn.svg?branch=master 12 | :target: https://travis-ci.org/scikit-learn-contrib/polylearn 13 | 14 | .. image:: https://ci.appveyor.com/api/projects/status/g9xnar9081l3vsw7/branch/master?svg=true 15 | :target: https://ci.appveyor.com/project/vene/polylearn 16 | 17 | .. image:: https://coveralls.io/repos/scikit-learn-contrib/polylearn/badge.svg?branch=master&service=github 18 | :target: https://coveralls.io/r/scikit-learn-contrib/polylearn 19 | 20 | .. image:: https://circleci.com/gh/scikit-learn-contrib/polylearn/tree/master.svg?style=shield&circle-token=:circle-token 21 | :target: https://circleci.com/gh/scikit-learn-contrib/polylearn/ 22 | 23 | Factorization machines and polynomial networks are machine learning models 24 | that can capture **feature interaction** (co-occurrence) through polynomial terms. 25 | Because feature interactions can be very sparse, it's common to use **low rank, 26 | factorized representations**; this way, we can learn weights even for feature 27 | co-occurrences that haven't been observed at training time. 28 | 29 | Factorization machines are popular for recommender systems, as they are a 30 | generalization of matrix completion models. 31 | 32 | This package provides: 33 | 34 | - coordinate descent algorithm for fitting factorization machines of degree 2 or 3, 35 | - coordinate descent algorithm for fitting polynomial networks of arbitrary degree, 36 | - `scikit-learn `_-compatible API, 37 | - `Cython `_ implementations for computationally intensive parts. 38 | 39 | Installation 40 | ------------ 41 | 42 | Binary packages are not yet available. 43 | 44 | The development version of polylearn can be installed from its git repository. In 45 | this case it is assumed that you have a working 46 | C++ compiler. 47 | 48 | 1. Obtain the sources by:: 49 | 50 | git clone https://github.com/scikit-learn-contrib/polylearn.git 51 | 52 | or, if `git` is unavailable, `download as a ZIP from GitHub `_. 53 | 54 | 55 | 2. Install the dependencies:: 56 | 57 | # via pip 58 | 59 | pip install numpy scipy scikit-learn nose 60 | pip install sklearn-contrib-lightning 61 | 62 | 63 | # via conda 64 | 65 | conda install numpy scipy scikit-learn nose 66 | conda install -c conda-forge sklearn-contrib-lightning 67 | 68 | 69 | 3. Build and install polylearn:: 70 | 71 | cd polylearn 72 | python setup.py build 73 | sudo python setup.py install 74 | 75 | 76 | References 77 | ---------- 78 | 79 | The solvers implemented are introduced in [1]_. Factorization machines are introduced 80 | in [2]_ and polynomial networks in [3]_. 81 | 82 | .. [1] Mathieu Blondel, Masakazu Ishihata, Akinori Fujino, Naonori Ueda. 83 | *Polynomial Networks and Factorization Machines: New Insights and 84 | Efficient Training Algorithms.* In: Proc. of ICML 2016. 85 | [`PDF `_] 86 | 87 | .. [2] Steffen Rendle. *Factorization machines.* In: Proc. of IEEE ICDM 2010. 88 | [`PDF `_] 89 | 90 | .. [3] Roi Livni, Shai Shalev-Shwartz, Ohad Shamir. 91 | *On the computational efficiency of training neural networks.* 92 | In: Proc. of NIPS 2014. 93 | [`arXiv `_] 94 | 95 | Authors 96 | ------- 97 | 98 | - Vlad Niculae, 2016-present 99 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | # AppVeyor.com is a Continuous Integration service to build and run tests under 2 | # Windows 3 | environment: 4 | global: 5 | # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the 6 | # /E:ON and /V:ON options are not enabled in the batch script interpreter 7 | # See: http://stackoverflow.com/a/13751649/163740 8 | CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\ci_scripts\\appveyor\\run_with_env.cmd" 9 | 10 | matrix: 11 | - PYTHON: "C:\\Python27" 12 | PYTHON_VERSION: "2.7.15" 13 | PYTHON_ARCH: "32" 14 | MINICONDA: "C:\\Miniconda" 15 | 16 | - PYTHON: "C:\\Python27-x64" 17 | PYTHON_VERSION: "2.7.15" 18 | PYTHON_ARCH: "64" 19 | MINICONDA: "C:\\Miniconda-x64" 20 | 21 | - PYTHON: "C:\\Python37" 22 | PYTHON_VERSION: "3.7.2" 23 | PYTHON_ARCH: "32" 24 | MINICONDA: "C:\\Miniconda37" 25 | 26 | - PYTHON: "C:\\Python37-x64" 27 | PYTHON_VERSION: "3.7.2" 28 | PYTHON_ARCH: "64" 29 | MINICONDA: "C:\\Miniconda37-x64" 30 | 31 | install: 32 | # Miniconda is pre-installed in the worker build 33 | - "SET PATH=%MINICONDA%;%MINICONDA%\\Scripts;%PATH%" 34 | - "python -m pip install -U pip" 35 | 36 | # Check that we have the expected version and architecture for Python 37 | - "python --version" 38 | - "python -c \"import struct; print(struct.calcsize('P') * 8)\"" 39 | - "pip --version" 40 | 41 | # Remove cygwin because it clashes with conda 42 | # see http://help.appveyor.com/discussions/problems/3712-git-remote-https-seems-to-be-broken 43 | - rmdir C:\\cygwin /s /q 44 | 45 | # Update previous packages and install the build and runtime dependencies of the project. 46 | - conda update --all --yes 47 | - conda install --quiet --yes numpy scipy cython nose scikit-learn wheel" 48 | - conda install --quiet --yes -c conda-forge sklearn-contrib-lightning 49 | - conda install --quiet --yes conda-build 50 | - "%CMD_IN_ENV% python setup.py bdist_wheel bdist_wininst" 51 | 52 | - ps: "ls dist" 53 | # # build the conda package 54 | # - "%CMD_IN_ENV% conda build build_tools/conda-recipe --quiet" 55 | # 56 | # # Move the conda package into the dist directory, to register it 57 | # # as an "artifact" for Appveyor. cmd.exe does't have good globbing, so 58 | # # we'll use a simple python script. 59 | # - python build_tools/move-conda-package.py build_tools/conda-recipe 60 | # 61 | # # Install the generated wheel package to test it 62 | - "pip install --pre --no-index --find-links dist/ polylearn" 63 | 64 | # Not a .NET project, we build scikit-learn in the install step instead 65 | build: false 66 | 67 | test_script: 68 | # Change to a non-source folder to make sure we run the tests on the 69 | # installed library. 70 | - "mkdir empty_folder" 71 | - "cd empty_folder" 72 | 73 | - "python -c \"import nose; nose.main()\" -s -v polylearn" 74 | 75 | # Move back to the project folder 76 | - "cd .." 77 | 78 | artifacts: 79 | # Archive the generated wheel package in the ci.appveyor.com build report. 80 | - path: dist\* 81 | 82 | 83 | cache: 84 | - '%APPDATA%\pip\Cache' 85 | -------------------------------------------------------------------------------- /benchmarks/bench_20newsgroups.py: -------------------------------------------------------------------------------- 1 | # Benchmark polynomial classifiers on bag-of-words text classification 2 | # Inspired from: https://github.com/scikit-learn/scikit-learn/blob/master 3 | # /benchmarks/bench_20newsgroups.py 4 | 5 | from time import time 6 | 7 | import numpy as np 8 | import scipy.sparse as sp 9 | 10 | from sklearn.base import clone 11 | from sklearn.metrics import accuracy_score, f1_score 12 | from sklearn.datasets import fetch_20newsgroups_vectorized 13 | 14 | from polylearn import (FactorizationMachineClassifier, 15 | PolynomialNetworkClassifier) 16 | 17 | 18 | estimators = { 19 | 'fm-2': FactorizationMachineClassifier(n_components=30, 20 | fit_linear=False, 21 | fit_lower=None, 22 | degree=2, 23 | random_state=0, 24 | max_iter=10), 25 | 26 | 'polynet-2': PolynomialNetworkClassifier(n_components=15, degree=2, 27 | fit_lower=None, 28 | max_iter=10, 29 | random_state=0) 30 | } 31 | 32 | estimators['fm-3'] = clone(estimators['fm-2']).set_params(degree=3) 33 | estimators['polynet-3'] = (clone(estimators['polynet-2']) 34 | .set_params(degree=3, n_components=10)) 35 | 36 | if __name__ == '__main__': 37 | data_train = fetch_20newsgroups_vectorized(subset="train") 38 | data_test = fetch_20newsgroups_vectorized(subset="test") 39 | X_train = sp.csc_matrix(data_train.data) 40 | X_test = sp.csc_matrix(data_test.data) 41 | 42 | y_train = data_train.target == 0 # atheism vs rest 43 | y_test = data_test.target == 0 44 | 45 | print("20 newsgroups") 46 | print("=============") 47 | print("X_train.shape = {0}".format(X_train.shape)) 48 | print("X_train.format = {0}".format(X_train.format)) 49 | print("X_train.dtype = {0}".format(X_train.dtype)) 50 | print("X_train density = {0}" 51 | "".format(X_train.nnz / np.product(X_train.shape))) 52 | print("y_train {0}".format(y_train.shape)) 53 | print("X_test {0}".format(X_test.shape)) 54 | print("X_test.format = {0}".format(X_test.format)) 55 | print("X_test.dtype = {0}".format(X_test.dtype)) 56 | print("y_test {0}".format(y_test.shape)) 57 | print() 58 | 59 | print("Classifier Training") 60 | print("===================") 61 | f1, accuracy, train_time, test_time = {}, {}, {}, {} 62 | 63 | for name, clf in sorted(estimators.items()): 64 | print("Training %s ... " % name, end="") 65 | t0 = time() 66 | clf.fit(X_train, y_train) 67 | train_time[name] = time() - t0 68 | t0 = time() 69 | y_pred = clf.predict(X_test) 70 | test_time[name] = time() - t0 71 | accuracy[name] = accuracy_score(y_test, y_pred) 72 | f1[name] = f1_score(y_test, y_pred) 73 | print("done") 74 | 75 | print("Classification performance:") 76 | print("===========================") 77 | print() 78 | print("%s %s %s %s %s" % ("Classifier".ljust(16), 79 | "train".rjust(10), 80 | "test".rjust(10), 81 | "f1".rjust(10), 82 | "accuracy".rjust(10))) 83 | print("-" * (16 + 4 * 11)) 84 | for name in sorted(f1, key=f1.get): 85 | print("%s %s %s %s %s" % ( 86 | name.ljust(16), 87 | ("%.4fs" % train_time[name]).rjust(10), 88 | ("%.4fs" % test_time[name]).rjust(10), 89 | ("%.4f" % f1[name]).rjust(10), 90 | ("%.4f" % accuracy[name]).rjust(10))) 91 | 92 | print() 93 | -------------------------------------------------------------------------------- /benchmarks/bench_other_libs.py: -------------------------------------------------------------------------------- 1 | """ Benchmarking CD solvers for factorization machines. 2 | 3 | Compares polylearn with with fastFM [1]. 4 | 5 | [1] http://ibayer.github.io/fastFM/ 6 | 7 | Note: this benchmark uses the squared loss and a regression formulation, for 8 | the fairest comparison. The CD solvers in polylearn support logistic loss and 9 | squared hinge loss as well. 10 | 11 | """ 12 | 13 | from time import time 14 | 15 | import numpy as np 16 | import scipy.sparse as sp 17 | 18 | from sklearn.metrics import accuracy_score, f1_score 19 | from sklearn.datasets import fetch_20newsgroups_vectorized 20 | 21 | from polylearn import FactorizationMachineRegressor 22 | if __name__ == '__main__': 23 | data_train = fetch_20newsgroups_vectorized(subset="train") 24 | data_test = fetch_20newsgroups_vectorized(subset="test") 25 | X_train = sp.csc_matrix(data_train.data) 26 | X_test = sp.csc_matrix(data_test.data) 27 | 28 | y_train = data_train.target == 0 # atheism vs rest 29 | y_test = data_test.target == 0 30 | 31 | y_train = (2 * y_train - 1).astype(np.float) 32 | 33 | print(__doc__) 34 | print("20 newsgroups") 35 | print("=============") 36 | print("X_train.shape = {0}".format(X_train.shape)) 37 | print("X_train.format = {0}".format(X_train.format)) 38 | print("X_train.dtype = {0}".format(X_train.dtype)) 39 | print("X_train density = {0}" 40 | "".format(X_train.nnz / np.product(X_train.shape))) 41 | print("y_train {0}".format(y_train.shape)) 42 | print("X_test {0}".format(X_test.shape)) 43 | print("X_test.format = {0}".format(X_test.format)) 44 | print("X_test.dtype = {0}".format(X_test.dtype)) 45 | print("y_test {0}".format(y_test.shape)) 46 | print() 47 | 48 | print("Training regressors") 49 | print("===================") 50 | f1, accuracy, train_time, test_time = {}, {}, {}, {} 51 | 52 | print("Training our solver... ", end="") 53 | fm = FactorizationMachineRegressor(n_components=20, 54 | fit_linear=True, 55 | fit_lower=False, 56 | alpha=5, 57 | beta=5, 58 | degree=2, 59 | random_state=0, 60 | max_iter=100) 61 | t0 = time() 62 | fm.fit(X_train, y_train) 63 | train_time['polylearn'] = time() - t0 64 | t0 = time() 65 | y_pred = fm.predict(X_test) > 0 66 | test_time['polylearn'] = time() - t0 67 | accuracy['polylearn'] = accuracy_score(y_test, y_pred) 68 | f1['polylearn'] = f1_score(y_test, y_pred) 69 | print("done") 70 | 71 | try: 72 | from fastFM import als 73 | 74 | print("Training fastfm... ", end="") 75 | clf = als.FMRegression(n_iter=100, init_stdev=0.01, rank=20, 76 | random_state=0, l2_reg=10.) 77 | clf.ignore_w_0 = True # since polylearn has no fit_intercept yet 78 | t0 = time() 79 | 80 | clf.fit(X_train, y_train) 81 | train_time['fastfm'] = time() - t0 82 | 83 | t0 = time() 84 | y_pred = clf.predict(X_test) 85 | test_time['fastfm'] = time() - t0 86 | y_pred = y_pred > 0 87 | accuracy['fastfm'] = accuracy_score(y_test, y_pred) 88 | f1['fastfm'] = f1_score(y_test, y_pred) 89 | 90 | print("done") 91 | except ImportError: 92 | print("fastfm not found") 93 | 94 | print("Regression performance:") 95 | print("=======================") 96 | print() 97 | print("%s %s %s %s %s" % ("Model".ljust(16), 98 | "train".rjust(10), 99 | "test".rjust(10), 100 | "f1".rjust(10), 101 | "accuracy".rjust(10))) 102 | print("-" * (16 + 4 * 11)) 103 | for name in sorted(f1, key=f1.get): 104 | print("%s %s %s %s %s" % ( 105 | name.ljust(16), 106 | ("%.4fs" % train_time[name]).rjust(10), 107 | ("%.4fs" % test_time[name]).rjust(10), 108 | ("%.4f" % f1[name]).rjust(10), 109 | ("%.4f" % accuracy[name]).rjust(10))) 110 | 111 | print() 112 | -------------------------------------------------------------------------------- /ci_scripts/appveyor/install.ps1: -------------------------------------------------------------------------------- 1 | # Sample script to install Miniconda under Windows 2 | # Authors: Olivier Grisel, Jonathan Helmus and Kyle Kastner, Robert McGibbon 3 | # License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/ 4 | # taken from https://github.com/rmcgibbo/python-appveyor-conda-example 5 | 6 | $MINICONDA_URL = "http://repo.continuum.io/miniconda/" 7 | 8 | function DownloadMiniconda ($python_version, $platform_suffix) { 9 | $webclient = New-Object System.Net.WebClient 10 | if ($python_version -match "3.4") { 11 | $filename = "Miniconda3-latest-Windows-" + $platform_suffix + ".exe" 12 | } else { 13 | $filename = "Miniconda-latest-Windows-" + $platform_suffix + ".exe" 14 | } 15 | $url = $MINICONDA_URL + $filename 16 | 17 | $basedir = $pwd.Path + "\" 18 | $filepath = $basedir + $filename 19 | if (Test-Path $filename) { 20 | Write-Host "Reusing" $filepath 21 | return $filepath 22 | } 23 | 24 | # Download and retry up to 3 times in case of network transient errors. 25 | Write-Host "Downloading" $filename "from" $url 26 | $retry_attempts = 2 27 | for($i=0; $i -lt $retry_attempts; $i++){ 28 | try { 29 | $webclient.DownloadFile($url, $filepath) 30 | break 31 | } 32 | Catch [Exception]{ 33 | Start-Sleep 1 34 | } 35 | } 36 | if (Test-Path $filepath) { 37 | Write-Host "File saved at" $filepath 38 | } else { 39 | # Retry once to get the error message if any at the last try 40 | $webclient.DownloadFile($url, $filepath) 41 | } 42 | return $filepath 43 | } 44 | 45 | 46 | function InstallMiniconda ($python_version, $architecture, $python_home) { 47 | Write-Host "Installing Python" $python_version "for" $architecture "bit architecture to" $python_home 48 | if (Test-Path $python_home) { 49 | Write-Host $python_home "already exists, skipping." 50 | return $false 51 | } 52 | if ($architecture -match "32") { 53 | $platform_suffix = "x86" 54 | } else { 55 | $platform_suffix = "x86_64" 56 | } 57 | 58 | $filepath = DownloadMiniconda $python_version $platform_suffix 59 | Write-Host "Installing" $filepath "to" $python_home 60 | $install_log = $python_home + ".log" 61 | $args = "/S /D=$python_home" 62 | Write-Host $filepath $args 63 | Start-Process -FilePath $filepath -ArgumentList $args -Wait -Passthru 64 | if (Test-Path $python_home) { 65 | Write-Host "Python $python_version ($architecture) installation complete" 66 | } else { 67 | Write-Host "Failed to install Python in $python_home" 68 | Get-Content -Path $install_log 69 | Exit 1 70 | } 71 | } 72 | 73 | 74 | function InstallCondaPackages ($python_home, $spec) { 75 | $conda_path = $python_home + "\Scripts\conda.exe" 76 | $args = "install --yes " + $spec 77 | Write-Host ("conda " + $args) 78 | Start-Process -FilePath "$conda_path" -ArgumentList $args -Wait -Passthru 79 | } 80 | 81 | function UpdateConda ($python_home) { 82 | $conda_path = $python_home + "\Scripts\conda.exe" 83 | Write-Host "Updating conda..." 84 | $args = "update --yes conda" 85 | Write-Host $conda_path $args 86 | Start-Process -FilePath "$conda_path" -ArgumentList $args -Wait -Passthru 87 | } 88 | 89 | function main () { 90 | InstallMiniconda $env:PYTHON_VERSION $env:PYTHON_ARCH $env:PYTHON 91 | UpdateConda $env:PYTHON 92 | InstallCondaPackages $env:PYTHON "conda-build anaconda-client" 93 | } 94 | 95 | main 96 | -------------------------------------------------------------------------------- /ci_scripts/appveyor/run_with_env.cmd: -------------------------------------------------------------------------------- 1 | :: To build extensions for 64 bit Python 3, we need to configure environment 2 | :: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of: 3 | :: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1) 4 | :: 5 | :: To build extensions for 64 bit Python 2, we need to configure environment 6 | :: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of: 7 | :: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0) 8 | :: 9 | :: 32 bit builds, and 64-bit builds for 3.5 and beyond, do not require specific 10 | :: environment configurations. 11 | :: 12 | :: Note: this script needs to be run with the /E:ON and /V:ON flags for the 13 | :: cmd interpreter, at least for (SDK v7.0) 14 | :: 15 | :: More details at: 16 | :: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows 17 | :: http://stackoverflow.com/a/13751649/163740 18 | :: 19 | :: Author: Olivier Grisel 20 | :: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/ 21 | :: 22 | :: Notes about batch files for Python people: 23 | :: 24 | :: Quotes in values are literally part of the values: 25 | :: SET FOO="bar" 26 | :: FOO is now five characters long: " b a r " 27 | :: If you don't want quotes, don't include them on the right-hand side. 28 | :: 29 | :: The CALL lines at the end of this file look redundant, but if you move them 30 | :: outside of the IF clauses, they do not run properly in the SET_SDK_64==Y 31 | :: case, I don't know why. 32 | @ECHO OFF 33 | 34 | SET COMMAND_TO_RUN=%* 35 | SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows 36 | SET WIN_WDK=c:\Program Files (x86)\Windows Kits\10\Include\wdf 37 | 38 | :: Extract the major and minor versions, and allow for the minor version to be 39 | :: more than 9. This requires the version number to have two dots in it. 40 | SET MAJOR_PYTHON_VERSION=%PYTHON_VERSION:~0,1% 41 | IF "%PYTHON_VERSION:~3,1%" == "." ( 42 | SET MINOR_PYTHON_VERSION=%PYTHON_VERSION:~2,1% 43 | ) ELSE ( 44 | SET MINOR_PYTHON_VERSION=%PYTHON_VERSION:~2,2% 45 | ) 46 | 47 | :: Based on the Python version, determine what SDK version to use, and whether 48 | :: to set the SDK for 64-bit. 49 | IF %MAJOR_PYTHON_VERSION% == 2 ( 50 | SET WINDOWS_SDK_VERSION="v7.0" 51 | SET SET_SDK_64=Y 52 | ) ELSE ( 53 | IF %MAJOR_PYTHON_VERSION% == 3 ( 54 | SET WINDOWS_SDK_VERSION="v7.1" 55 | IF %MINOR_PYTHON_VERSION% LEQ 4 ( 56 | SET SET_SDK_64=Y 57 | ) ELSE ( 58 | SET SET_SDK_64=N 59 | IF EXIST "%WIN_WDK%" ( 60 | :: See: https://connect.microsoft.com/VisualStudio/feedback/details/1610302/ 61 | REN "%WIN_WDK%" 0wdf 62 | ) 63 | ) 64 | ) ELSE ( 65 | ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%" 66 | EXIT 1 67 | ) 68 | ) 69 | 70 | IF %PYTHON_ARCH% == 64 ( 71 | IF %SET_SDK_64% == Y ( 72 | ECHO Configuring Windows SDK %WINDOWS_SDK_VERSION% for Python %MAJOR_PYTHON_VERSION% on a 64 bit architecture 73 | SET DISTUTILS_USE_SDK=1 74 | SET MSSdk=1 75 | "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION% 76 | "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release 77 | ECHO Executing: %COMMAND_TO_RUN% 78 | call %COMMAND_TO_RUN% || EXIT 1 79 | ) ELSE ( 80 | ECHO Using default MSVC build environment for 64 bit architecture 81 | ECHO Executing: %COMMAND_TO_RUN% 82 | call %COMMAND_TO_RUN% || EXIT 1 83 | ) 84 | ) ELSE ( 85 | ECHO Using default MSVC build environment for 32 bit architecture 86 | ECHO Executing: %COMMAND_TO_RUN% 87 | call %COMMAND_TO_RUN% || EXIT 1 88 | ) 89 | -------------------------------------------------------------------------------- /ci_scripts/install.sh: -------------------------------------------------------------------------------- 1 | # Deactivate the travis-provided virtual environment and setup a 2 | # conda-based environment instead 3 | deactivate 4 | 5 | # Use the miniconda installer for faster download / install of conda 6 | # itself 7 | pushd . 8 | cd 9 | mkdir -p download 10 | cd download 11 | echo "Cached in $HOME/download :" 12 | ls -l 13 | echo 14 | if [[ ! -f miniconda.sh ]] 15 | then 16 | wget http://repo.continuum.io/miniconda/Miniconda-3.6.0-Linux-x86_64.sh \ 17 | -O miniconda.sh 18 | fi 19 | chmod +x miniconda.sh && ./miniconda.sh -b 20 | cd .. 21 | export PATH=/home/travis/miniconda/bin:$PATH 22 | conda update --yes conda 23 | popd 24 | 25 | # Configure the conda environment and put it in the path using the 26 | # provided versions 27 | conda create -n testenv --yes python=$PYTHON_VERSION pip nose \ 28 | numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION cython=$CYTHON_VERSION 29 | 30 | source activate testenv 31 | 32 | pip install scikit-learn==$SKLEARN_VERSION sklearn-contrib-lightning 33 | 34 | if [[ "$COVERAGE" == "true" ]]; then 35 | pip install coverage coveralls 36 | fi 37 | 38 | python --version 39 | python -c "import numpy; print('numpy %s' % numpy.__version__)" 40 | python -c "import scipy; print('scipy %s' % scipy.__version__)" 41 | 42 | python setup.py develop 43 | -------------------------------------------------------------------------------- /ci_scripts/push_doc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This script is meant to be called in the "deploy" step defined in 3 | # circle.yml. See https://circleci.com/docs/ for more details. 4 | # The behavior of the script is controlled by environment variable defined 5 | # in the circle.yml in the top level folder of the project. 6 | 7 | MSG="Pushing the docs for revision for branch: $CIRCLE_BRANCH, commit $CIRCLE_SHA1" 8 | 9 | cd $HOME 10 | # Copy the build docs to a temporary folder 11 | rm -rf tmp 12 | mkdir tmp 13 | cp -R $HOME/$DOC_REPO/doc/_build/html/* ./tmp/ 14 | 15 | # Clone the docs repo if it isnt already there 16 | if [ ! -d $DOC_REPO ]; 17 | then git clone "git@github.com:$USERNAME/"$DOC_REPO".git"; 18 | fi 19 | 20 | cd $DOC_REPO 21 | git branch gh-pages 22 | git checkout -f gh-pages 23 | git reset --hard origin/gh-pages 24 | git clean -dfx 25 | 26 | for name in $(ls -A $HOME/$DOC_REPO); do 27 | case $name in 28 | .nojekyll) # So that github does not build this as a Jekyll website. 29 | ;; 30 | circle.yml) # Config so that build gh-pages branch. 31 | ;; 32 | *) 33 | git rm -rf $name 34 | ;; 35 | esac 36 | done 37 | 38 | # Copy the new build docs # VN: what's with the DOC_URL? 39 | # mkdir $DOC_URL 40 | # cp -R $HOME/tmp/* ./$DOC_URL/ 41 | cp -R $HOME/tmp/* ./ 42 | 43 | git config --global user.email $EMAIL 44 | git config --global user.name $USERNAME 45 | # git add -f ./$DOC_URL/ 46 | git add -f ./ 47 | git commit -m "$MSG" 48 | git push -f origin gh-pages 49 | if [ $? -ne 0 ]; then 50 | echo "Pushing docs failed" 51 | echo 52 | exit 1 53 | fi 54 | 55 | echo $MSG 56 | -------------------------------------------------------------------------------- /ci_scripts/success.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | if [[ "$COVERAGE" == "true" ]]; then 4 | # Need to run coveralls from a git checkout, so we copy .coverage 5 | # from TEST_DIR where nosetests has been run 6 | cp $TEST_DIR/.coverage $TRAVIS_BUILD_DIR 7 | cd $TRAVIS_BUILD_DIR 8 | # Ignore coveralls failures as the coveralls server is not 9 | # very reliable but we don't want travis to report a failure 10 | # in the github UI just because the coverage report failed to 11 | # be published. 12 | coveralls || echo "Coveralls upload failed" 13 | fi -------------------------------------------------------------------------------- /ci_scripts/test.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | # Get into a temp directory to run test from the installed scikit learn and 4 | # check if we do not leave artifacts 5 | mkdir -p $TEST_DIR 6 | 7 | cd $TEST_DIR 8 | 9 | if [[ "$COVERAGE" == "true" ]]; then 10 | nosetests -s --with-coverage --cover-package=$MODULE $MODULE 11 | else 12 | nosetests -s $MODULE 13 | fi 14 | -------------------------------------------------------------------------------- /circle.yml: -------------------------------------------------------------------------------- 1 | machine: 2 | environment: 3 | # The github organization or username of the repository which hosts the 4 | # project and documentation. 5 | USERNAME: "vene" 6 | 7 | # The repository where the documentation will be hosted 8 | DOC_REPO: "polylearn" 9 | 10 | # The base URL for the Github page where the documentation will be hosted 11 | DOC_URL: "vene.ro" 12 | 13 | # The email is to be used for commits in the Github Page 14 | EMAIL: "vlad@vene.ro" 15 | 16 | dependencies: 17 | 18 | # Various dependencies 19 | pre: 20 | - sudo -E apt-get -yq remove texlive-binaries --purge 21 | - sudo apt-get update 22 | - sudo apt-get install libatlas-dev libatlas3gf-base 23 | - sudo apt-get install build-essential python-dev python-setuptools 24 | # install numpy first as it is a compile time dependency for other packages 25 | - pip install --upgrade numpy 26 | - pip install --upgrade scipy matplotlib setuptools nose coverage sphinx pillow sphinx-gallery sphinx_bootstrap_theme 27 | # Installing required packages for `make -C doc check command` to work. 28 | - sudo -E apt-get -yq update 29 | - sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install dvipng texlive-latex-base texlive-latex-extra 30 | - pip install --upgrade cython numpydoc 31 | - pip install --upgrade scikit-learn 32 | - pip install --upgrade sklearn-contrib-lightning 33 | 34 | # The --user is needed to let sphinx see the source and the binaries 35 | # The pipefail is requested to propagate exit code 36 | override: 37 | - python setup.py clean 38 | - python setup.py develop 39 | - set -o pipefail && cd doc && make html 2>&1 | tee ~/log.txt 40 | test: 41 | # Grep error on the documentation 42 | override: 43 | - cat ~/log.txt && if grep -q "Traceback (most recent call last):" ~/log.txt; then false; else true; fi 44 | deployment: 45 | push: 46 | branch: master 47 | commands: 48 | - bash ci_scripts/push_doc.sh 49 | general: 50 | # Open the doc to the API 51 | artifacts: 52 | - "doc/_build/html" 53 | - "~/log.txt" 54 | # Restric the build to the branch master only 55 | branches: 56 | ignore: 57 | - gh-pages -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | -rm -rf $(BUILDDIR)/* 51 | -rm -rf auto_examples/ 52 | -rm -rf generated/* 53 | -rm -rf modules/generated/* 54 | 55 | html: 56 | # These two lines make the build a bit more lengthy, and the 57 | # the embedding of images more robust 58 | rm -rf $(BUILDDIR)/html/_images 59 | #rm -rf _build/doctrees/ 60 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 61 | @echo 62 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 63 | 64 | dirhtml: 65 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 66 | @echo 67 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 68 | 69 | singlehtml: 70 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 71 | @echo 72 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 73 | 74 | pickle: 75 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 76 | @echo 77 | @echo "Build finished; now you can process the pickle files." 78 | 79 | json: 80 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 81 | @echo 82 | @echo "Build finished; now you can process the JSON files." 83 | 84 | htmlhelp: 85 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 86 | @echo 87 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 88 | ".hhp project file in $(BUILDDIR)/htmlhelp." 89 | 90 | qthelp: 91 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 92 | @echo 93 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 94 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 95 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/polylearn.qhcp" 96 | @echo "To view the help file:" 97 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/polylearn.qhc" 98 | 99 | devhelp: 100 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 101 | @echo 102 | @echo "Build finished." 103 | @echo "To view the help file:" 104 | @echo "# mkdir -p $$HOME/.local/share/devhelp/polylearn" 105 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/polylearn" 106 | @echo "# devhelp" 107 | 108 | epub: 109 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 110 | @echo 111 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 112 | 113 | latex: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo 116 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 117 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 118 | "(use \`make latexpdf' here to do that automatically)." 119 | 120 | latexpdf: 121 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 122 | @echo "Running LaTeX files through pdflatex..." 123 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 124 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 125 | 126 | latexpdfja: 127 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 128 | @echo "Running LaTeX files through platex and dvipdfmx..." 129 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 130 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 131 | 132 | text: 133 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 134 | @echo 135 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 136 | 137 | man: 138 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 139 | @echo 140 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 141 | 142 | texinfo: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo 145 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 146 | @echo "Run \`make' in that directory to run these through makeinfo" \ 147 | "(use \`make info' here to do that automatically)." 148 | 149 | info: 150 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 151 | @echo "Running Texinfo files through makeinfo..." 152 | make -C $(BUILDDIR)/texinfo info 153 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 154 | 155 | gettext: 156 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 157 | @echo 158 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 159 | 160 | changes: 161 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 162 | @echo 163 | @echo "The overview file is in $(BUILDDIR)/changes." 164 | 165 | linkcheck: 166 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 167 | @echo 168 | @echo "Link check complete; look for any errors in the above output " \ 169 | "or in $(BUILDDIR)/linkcheck/output.txt." 170 | 171 | doctest: 172 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 173 | @echo "Testing of doctests in the sources finished, look at the " \ 174 | "results in $(BUILDDIR)/doctest/output.txt." 175 | 176 | xml: 177 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 178 | @echo 179 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 180 | 181 | pseudoxml: 182 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 183 | @echo 184 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 185 | -------------------------------------------------------------------------------- /doc/_templates/class.rst: -------------------------------------------------------------------------------- 1 | {{ fullname }} 2 | {{ underline }} 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | {% block methods %} 9 | .. automethod:: __init__ 10 | {% endblock %} 11 | 12 | 13 | -------------------------------------------------------------------------------- /doc/_templates/function.rst: -------------------------------------------------------------------------------- 1 | {{ fullname }} 2 | {{ underline }} 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autofunction:: {{ objname }} 7 | 8 | 9 | -------------------------------------------------------------------------------- /doc/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {# Import the theme's layout. #} 2 | {% extends "!layout.html" %} 3 | 4 | {# remove site and page menus #} 5 | {%- block sidebartoc %} 6 | {% endblock %} 7 | {%- block sidebarrel %} 8 | {% endblock %} 9 | 10 | {%- block navbartoc %} 11 | {% endblock %} 12 | 13 | {# Include our new CSS file into existing ones. #} 14 | {% set css_files = css_files + ['_static/lightning.css']%} 15 | {% set css_files = css_files + ['_static/bootstrap.min.css']%} 16 | 17 | {%- block content %} 18 | {{ navBar() }} 19 |
20 | {% block body %}{% endblock %} 21 |
22 | 23 | {%- endblock %} 24 | 25 | -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # polylearn documentation build configuration file, created by 4 | # sphinx-quickstart on Mon Jan 18 14:44:12 2016. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | 18 | import sphinx_bootstrap_theme 19 | 20 | # If extensions (or modules to document with autodoc) are in another directory, 21 | # add these directories to sys.path here. If the directory is relative to the 22 | # documentation root, use os.path.abspath to make it absolute, like shown here. 23 | sys.path.insert(0, os.path.abspath('sphinxext')) 24 | 25 | 26 | # -- General configuration --------------------------------------------------- 27 | 28 | # Try to override the matplotlib configuration as early as possible 29 | try: 30 | import gen_rst 31 | except: 32 | pass 33 | # -- General configuration ------------------------------------------------ 34 | 35 | # If your documentation needs a minimal Sphinx version, state it here. 36 | #needs_sphinx = '1.0' 37 | 38 | # Add any Sphinx extension module names here, as strings. They can be 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 40 | # ones. 41 | extensions = [ 42 | 'gen_rst', 43 | 'sphinx.ext.autodoc', 44 | 'sphinx.ext.autosummary', 45 | 'sphinx.ext.doctest', 46 | 'sphinx.ext.intersphinx', 47 | 'sphinx.ext.todo', 48 | 'numpy_ext.numpydoc', 49 | 'sphinx.ext.pngmath', 50 | 'sphinx.ext.ifconfig', 51 | 'sphinx.ext.viewcode', 52 | # 'sphinx_gallery.gen_gallery' 53 | 54 | ] 55 | 56 | sphinx_gallery_conf = { 57 | # path to your examples scripts 58 | 'examples_dirs': '../examples', 59 | # path where to save gallery generated examples 60 | 'gallery_dirs': 'auto_examples'} 61 | 62 | autosummary_generate = True 63 | 64 | autodoc_default_flags = ['members', 'inherited-members'] 65 | 66 | # Add any paths that contain templates here, relative to this directory. 67 | templates_path = ['_templates'] 68 | 69 | # The suffix of source filenames. 70 | source_suffix = '.rst' 71 | 72 | # The encoding of source files. 73 | #source_encoding = 'utf-8-sig' 74 | 75 | # Generate the plots for the gallery 76 | plot_gallery = True 77 | 78 | # The master toctree document. 79 | master_doc = 'index' 80 | 81 | # General information about the project. 82 | project = u'polylearn' 83 | copyright = u'2016, Vlad Niculae' 84 | 85 | # The version info for the project you're documenting, acts as replacement for 86 | # |version| and |release|, also used in various other places throughout the 87 | # built documents. 88 | # 89 | # The short X.Y version. 90 | version = '0.1' 91 | # The full version, including alpha/beta/rc tags. 92 | release = '0.1.0' 93 | 94 | # The language for content autogenerated by Sphinx. Refer to documentation 95 | # for a list of supported languages. 96 | #language = None 97 | 98 | # There are two options for replacing |today|: either, you set today to some 99 | # non-false value, then it is used: 100 | #today = '' 101 | # Else, today_fmt is used as the format for a strftime call. 102 | #today_fmt = '%B %d, %Y' 103 | 104 | # List of patterns, relative to source directory, that match files and 105 | # directories to ignore when looking for source files. 106 | exclude_patterns = ['_build'] 107 | 108 | # The reST default role (used for this markup: `text`) to use for all 109 | # documents. 110 | #default_role = None 111 | 112 | # If true, '()' will be appended to :func: etc. cross-reference text. 113 | #add_function_parentheses = True 114 | 115 | # If true, the current module name will be prepended to all description 116 | # unit titles (such as .. function::). 117 | #add_module_names = True 118 | 119 | # If true, sectionauthor and moduleauthor directives will be shown in the 120 | # output. They are ignored by default. 121 | #show_authors = False 122 | 123 | # The name of the Pygments (syntax highlighting) style to use. 124 | pygments_style = 'sphinx' 125 | 126 | # A list of ignored prefixes for module index sorting. 127 | #modindex_common_prefix = [] 128 | 129 | # If true, keep warnings as "system message" paragraphs in the built documents. 130 | #keep_warnings = False 131 | 132 | 133 | # -- Options for HTML output ---------------------------------------------- 134 | 135 | # The theme to use for HTML and HTML Help pages. See the documentation for 136 | # a list of builtin themes. 137 | html_theme = 'bootstrap' 138 | 139 | # Theme options are theme-specific and customize the look and feel of a theme 140 | # further. For a list of options available for each theme, see the 141 | # documentation. 142 | html_theme_options = { 143 | 'navbar_links': [ 144 | # ('Introduction', 'intro'), 145 | ('References', 'references'), 146 | ('Examples', 'auto_examples/index'), 147 | ], 148 | 'globaltoc_includehidden': "true", 149 | 150 | # Render the next and previous page links in navbar. (Default: true) 151 | 'navbar_sidebarrel': False, 152 | 153 | # Render the current pages TOC in the navbar. (Default: true) 154 | 'navbar_pagenav': False, 155 | 156 | } 157 | 158 | # Add any paths that contain custom themes here, relative to this directory. 159 | 160 | html_theme_path = sphinx_bootstrap_theme.get_html_theme_path() 161 | 162 | # The name for this set of Sphinx documents. If None, it defaults to 163 | # " v documentation". 164 | #html_title = None 165 | 166 | # A shorter title for the navigation bar. Default is the same as html_title. 167 | #html_short_title = None 168 | 169 | # The name of an image file (relative to this directory) to place at the top 170 | # of the sidebar. 171 | #html_logo = None 172 | 173 | # The name of an image file (within the static path) to use as favicon of the 174 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 175 | # pixels large. 176 | #html_favicon = None 177 | 178 | # Add any paths that contain custom static files (such as style sheets) here, 179 | # relative to this directory. They are copied after the builtin static files, 180 | # so a file named "default.css" will overwrite the builtin "default.css". 181 | html_static_path = ['_static'] 182 | 183 | # Add any extra paths that contain custom files (such as robots.txt or 184 | # .htaccess) here, relative to this directory. These files are copied 185 | # directly to the root of the documentation. 186 | #html_extra_path = [] 187 | 188 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 189 | # using the given strftime format. 190 | #html_last_updated_fmt = '%b %d, %Y' 191 | 192 | # If true, SmartyPants will be used to convert quotes and dashes to 193 | # typographically correct entities. 194 | #html_use_smartypants = True 195 | 196 | # Custom sidebar templates, maps document names to template names. 197 | #html_sidebars = {} 198 | 199 | # Additional templates that should be rendered to pages, maps page names to 200 | # template names. 201 | #html_additional_pages = {} 202 | 203 | # If false, no module index is generated. 204 | #html_domain_indices = True 205 | 206 | # If false, no index is generated. 207 | #html_use_index = True 208 | 209 | # If true, the index is split into individual pages for each letter. 210 | #html_split_index = False 211 | 212 | # If true, links to the reST sources are added to the pages. 213 | #html_show_sourcelink = True 214 | 215 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 216 | #html_show_sphinx = True 217 | 218 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 219 | #html_show_copyright = True 220 | 221 | # If true, an OpenSearch description file will be output, and all pages will 222 | # contain a tag referring to it. The value of this option must be the 223 | # base URL from which the finished HTML is served. 224 | #html_use_opensearch = '' 225 | 226 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 227 | #html_file_suffix = None 228 | 229 | # Output file base name for HTML help builder. 230 | htmlhelp_basename = 'polylearndoc' 231 | 232 | 233 | # -- Options for LaTeX output --------------------------------------------- 234 | 235 | latex_elements = { 236 | # The paper size ('letterpaper' or 'a4paper'). 237 | #'papersize': 'letterpaper', 238 | 239 | # The font size ('10pt', '11pt' or '12pt'). 240 | #'pointsize': '10pt', 241 | 242 | # Additional stuff for the LaTeX preamble. 243 | #'preamble': '', 244 | } 245 | 246 | # Grouping the document tree into LaTeX files. List of tuples 247 | # (source start file, target name, title, 248 | # author, documentclass [howto, manual, or own class]). 249 | latex_documents = [ 250 | ('index', 'polylearn.tex', u'polylearn documentation', 251 | u'Vlad Niculae', 'manual'), 252 | ] 253 | 254 | # The name of an image file (relative to this directory) to place at the top of 255 | # the title page. 256 | #latex_logo = None 257 | 258 | # For "manual" documents, if this is true, then toplevel headings are parts, 259 | # not chapters. 260 | #latex_use_parts = False 261 | 262 | # If true, show page references after internal links. 263 | #latex_show_pagerefs = False 264 | 265 | # If true, show URL addresses after external links. 266 | #latex_show_urls = False 267 | 268 | # Documents to append as an appendix to all manuals. 269 | #latex_appendices = [] 270 | 271 | # If false, no module index is generated. 272 | #latex_domain_indices = True 273 | 274 | 275 | # -- Options for manual page output --------------------------------------- 276 | 277 | # One entry per manual page. List of tuples 278 | # (source start file, name, description, authors, manual section). 279 | man_pages = [ 280 | ('index', 'polylearn', u'polylearn documentation', 281 | [u'Vlad Niculae'], 1) 282 | ] 283 | 284 | # If true, show URL addresses after external links. 285 | #man_show_urls = False 286 | 287 | 288 | # -- Options for Texinfo output ------------------------------------------- 289 | 290 | # Grouping the document tree into Texinfo files. List of tuples 291 | # (source start file, target name, title, author, 292 | # dir menu entry, description, category) 293 | texinfo_documents = [ 294 | ('index', 'polylearn', u'polylearn documentation', 295 | u'Vlad Niculae', 'polylearn', 296 | 'Factorization machines and polynomial models for machine learning.', 297 | 'Miscellaneous'), 298 | ] 299 | 300 | def generate_example_rst(app, what, name, obj, options, lines): 301 | # generate empty examples files, so that we don't get 302 | # inclusion errors if there are no examples for a class / module 303 | examples_path = os.path.join(app.srcdir, "modules", "generated", 304 | "%s.examples" % name) 305 | if not os.path.exists(examples_path): 306 | # touch file 307 | open(examples_path, 'w').close() 308 | 309 | 310 | def setup(app): 311 | app.connect('autodoc-process-docstring', generate_example_rst) 312 | 313 | # Documents to append as an appendix to all manuals. 314 | #texinfo_appendices = [] 315 | 316 | # If false, no module index is generated. 317 | #texinfo_domain_indices = True 318 | 319 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 320 | #texinfo_show_urls = 'footnote' 321 | 322 | # If true, do not generate a @detailmenu in the "Top" node's menu. 323 | #texinfo_no_detailmenu = False 324 | 325 | 326 | # Example configuration for intersphinx: refer to the Python standard library. 327 | intersphinx_mapping = {'http://docs.python.org/': None} 328 | -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../README.rst 2 | 3 | .. toctree:: 4 | :hidden: 5 | 6 | auto_examples/index 7 | references.rst 8 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | goto end 41 | ) 42 | 43 | if "%1" == "clean" ( 44 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 45 | del /q /s %BUILDDIR%\* 46 | goto end 47 | ) 48 | 49 | 50 | %SPHINXBUILD% 2> nul 51 | if errorlevel 9009 ( 52 | echo. 53 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 54 | echo.installed, then set the SPHINXBUILD environment variable to point 55 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 56 | echo.may add the Sphinx directory to PATH. 57 | echo. 58 | echo.If you don't have Sphinx installed, grab it from 59 | echo.http://sphinx-doc.org/ 60 | exit /b 1 61 | ) 62 | 63 | if "%1" == "html" ( 64 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 68 | goto end 69 | ) 70 | 71 | if "%1" == "dirhtml" ( 72 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 76 | goto end 77 | ) 78 | 79 | if "%1" == "singlehtml" ( 80 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 84 | goto end 85 | ) 86 | 87 | if "%1" == "pickle" ( 88 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can process the pickle files. 92 | goto end 93 | ) 94 | 95 | if "%1" == "json" ( 96 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 97 | if errorlevel 1 exit /b 1 98 | echo. 99 | echo.Build finished; now you can process the JSON files. 100 | goto end 101 | ) 102 | 103 | if "%1" == "htmlhelp" ( 104 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 105 | if errorlevel 1 exit /b 1 106 | echo. 107 | echo.Build finished; now you can run HTML Help Workshop with the ^ 108 | .hhp project file in %BUILDDIR%/htmlhelp. 109 | goto end 110 | ) 111 | 112 | if "%1" == "qthelp" ( 113 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 114 | if errorlevel 1 exit /b 1 115 | echo. 116 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 117 | .qhcp project file in %BUILDDIR%/qthelp, like this: 118 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\polylearn.qhcp 119 | echo.To view the help file: 120 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\polylearn.ghc 121 | goto end 122 | ) 123 | 124 | if "%1" == "devhelp" ( 125 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished. 129 | goto end 130 | ) 131 | 132 | if "%1" == "epub" ( 133 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 137 | goto end 138 | ) 139 | 140 | if "%1" == "latex" ( 141 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 145 | goto end 146 | ) 147 | 148 | if "%1" == "latexpdf" ( 149 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 150 | cd %BUILDDIR%/latex 151 | make all-pdf 152 | cd %BUILDDIR%/.. 153 | echo. 154 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 155 | goto end 156 | ) 157 | 158 | if "%1" == "latexpdfja" ( 159 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 160 | cd %BUILDDIR%/latex 161 | make all-pdf-ja 162 | cd %BUILDDIR%/.. 163 | echo. 164 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 165 | goto end 166 | ) 167 | 168 | if "%1" == "text" ( 169 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 170 | if errorlevel 1 exit /b 1 171 | echo. 172 | echo.Build finished. The text files are in %BUILDDIR%/text. 173 | goto end 174 | ) 175 | 176 | if "%1" == "man" ( 177 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 178 | if errorlevel 1 exit /b 1 179 | echo. 180 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 181 | goto end 182 | ) 183 | 184 | if "%1" == "texinfo" ( 185 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 186 | if errorlevel 1 exit /b 1 187 | echo. 188 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 189 | goto end 190 | ) 191 | 192 | if "%1" == "gettext" ( 193 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 194 | if errorlevel 1 exit /b 1 195 | echo. 196 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 197 | goto end 198 | ) 199 | 200 | if "%1" == "changes" ( 201 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 202 | if errorlevel 1 exit /b 1 203 | echo. 204 | echo.The overview file is in %BUILDDIR%/changes. 205 | goto end 206 | ) 207 | 208 | if "%1" == "linkcheck" ( 209 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 210 | if errorlevel 1 exit /b 1 211 | echo. 212 | echo.Link check complete; look for any errors in the above output ^ 213 | or in %BUILDDIR%/linkcheck/output.txt. 214 | goto end 215 | ) 216 | 217 | if "%1" == "doctest" ( 218 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 219 | if errorlevel 1 exit /b 1 220 | echo. 221 | echo.Testing of doctests in the sources finished, look at the ^ 222 | results in %BUILDDIR%/doctest/output.txt. 223 | goto end 224 | ) 225 | 226 | if "%1" == "xml" ( 227 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 228 | if errorlevel 1 exit /b 1 229 | echo. 230 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 231 | goto end 232 | ) 233 | 234 | if "%1" == "pseudoxml" ( 235 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 236 | if errorlevel 1 exit /b 1 237 | echo. 238 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 239 | goto end 240 | ) 241 | 242 | :end 243 | -------------------------------------------------------------------------------- /doc/references.rst: -------------------------------------------------------------------------------- 1 | .. toctree:: 2 | :maxdepth: 2 3 | 4 | polylearn reference 5 | =================== 6 | 7 | .. _factorization_machine: 8 | 9 | Factorization Machines 10 | ---------------------- 11 | 12 | .. automodule:: polylearn.factorization_machine 13 | :no-members: 14 | :no-inherited-members: 15 | 16 | .. currentmodule:: polylearn 17 | 18 | .. autosummary:: 19 | :toctree: generated/ 20 | :template: class.rst 21 | 22 | FactorizationMachineClassifier 23 | FactorizationMachineRegressor 24 | 25 | 26 | .. _polynomial_network: 27 | 28 | Polynomial Networks 29 | ------------------- 30 | 31 | .. automodule:: polylearn.polynomial_network 32 | :no-members: 33 | :no-inherited-members: 34 | 35 | .. currentmodule:: polylearn 36 | 37 | .. autosummary:: 38 | :toctree: generated/ 39 | :template: class.rst 40 | 41 | PolynomialNetworkClassifier 42 | PolynomialNetworkRegressor 43 | 44 | 45 | .. kernels: 46 | 47 | Utilities for computing kernels 48 | ------------------------------- 49 | 50 | .. currentmodule:: polylearn 51 | 52 | .. autosummary:: 53 | :toctree: generated/ 54 | :template: function.rst 55 | 56 | kernels.anova_kernel 57 | kernels.homogeneous_kernel 58 | kernels.safe_power -------------------------------------------------------------------------------- /doc/sphinxext/LICENSE.txt: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------------- 2 | The files 3 | - numpydoc.py 4 | - autosummary.py 5 | - autosummary_generate.py 6 | - docscrape.py 7 | - docscrape_sphinx.py 8 | - phantom_import.py 9 | have the following license: 10 | 11 | Copyright (C) 2008 Stefan van der Walt , Pauli Virtanen 12 | 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are 15 | met: 16 | 17 | 1. Redistributions of source code must retain the above copyright 18 | notice, this list of conditions and the following disclaimer. 19 | 2. Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in 21 | the documentation and/or other materials provided with the 22 | distribution. 23 | 24 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 26 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 27 | DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 28 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 | IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 | POSSIBILITY OF SUCH DAMAGE. 35 | 36 | ------------------------------------------------------------------------------- 37 | The files 38 | - compiler_unparse.py 39 | - comment_eater.py 40 | - traitsdoc.py 41 | have the following license: 42 | 43 | This software is OSI Certified Open Source Software. 44 | OSI Certified is a certification mark of the Open Source Initiative. 45 | 46 | Copyright (c) 2006, Enthought, Inc. 47 | All rights reserved. 48 | 49 | Redistribution and use in source and binary forms, with or without 50 | modification, are permitted provided that the following conditions are met: 51 | 52 | * Redistributions of source code must retain the above copyright notice, this 53 | list of conditions and the following disclaimer. 54 | * Redistributions in binary form must reproduce the above copyright notice, 55 | this list of conditions and the following disclaimer in the documentation 56 | and/or other materials provided with the distribution. 57 | * Neither the name of Enthought, Inc. nor the names of its contributors may 58 | be used to endorse or promote products derived from this software without 59 | specific prior written permission. 60 | 61 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 62 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 63 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 64 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 65 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 66 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 67 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 68 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 69 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 70 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 71 | 72 | 73 | ------------------------------------------------------------------------------- 74 | The files 75 | - only_directives.py 76 | - plot_directive.py 77 | originate from Matplotlib (http://matplotlib.sf.net/) which has 78 | the following license: 79 | 80 | Copyright (c) 2002-2008 John D. Hunter; All Rights Reserved. 81 | 82 | 1. This LICENSE AGREEMENT is between John D. Hunter (“JDH”), and the Individual or Organization (“Licensee”) accessing and otherwise using matplotlib software in source or binary form and its associated documentation. 83 | 84 | 2. Subject to the terms and conditions of this License Agreement, JDH hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use matplotlib 0.98.3 alone or in any derivative version, provided, however, that JDH’s License Agreement and JDH’s notice of copyright, i.e., “Copyright (c) 2002-2008 John D. Hunter; All Rights Reserved” are retained in matplotlib 0.98.3 alone or in any derivative version prepared by Licensee. 85 | 86 | 3. In the event Licensee prepares a derivative work that is based on or incorporates matplotlib 0.98.3 or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to matplotlib 0.98.3. 87 | 88 | 4. JDH is making matplotlib 0.98.3 available to Licensee on an “AS IS” basis. JDH MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, JDH MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB 0.98.3 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 89 | 90 | 5. JDH SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB 0.98.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING MATPLOTLIB 0.98.3, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 91 | 92 | 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 93 | 94 | 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between JDH and Licensee. This License Agreement does not grant permission to use JDH trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. 95 | 96 | 8. By copying, installing or otherwise using matplotlib 0.98.3, Licensee agrees to be bound by the terms and conditions of this License Agreement. 97 | 98 | -------------------------------------------------------------------------------- /doc/sphinxext/MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include tests *.py 2 | include *.txt 3 | -------------------------------------------------------------------------------- /doc/sphinxext/README.txt: -------------------------------------------------------------------------------- 1 | ===================================== 2 | numpydoc -- Numpy's Sphinx extensions 3 | ===================================== 4 | 5 | Numpy's documentation uses several custom extensions to Sphinx. These 6 | are shipped in this ``numpydoc`` package, in case you want to make use 7 | of them in third-party projects. 8 | 9 | The following extensions are available: 10 | 11 | - ``numpydoc``: support for the Numpy docstring format in Sphinx, and add 12 | the code description directives ``np-function``, ``np-cfunction``, etc. 13 | that support the Numpy docstring syntax. 14 | 15 | - ``numpydoc.traitsdoc``: For gathering documentation about Traits attributes. 16 | 17 | - ``numpydoc.plot_directives``: Adaptation of Matplotlib's ``plot::`` 18 | directive. Note that this implementation may still undergo severe 19 | changes or eventually be deprecated. 20 | 21 | - ``numpydoc.only_directives``: (DEPRECATED) 22 | 23 | - ``numpydoc.autosummary``: (DEPRECATED) An ``autosummary::`` directive. 24 | Available in Sphinx 0.6.2 and (to-be) 1.0 as ``sphinx.ext.autosummary``, 25 | and it the Sphinx 1.0 version is recommended over that included in 26 | Numpydoc. 27 | 28 | 29 | numpydoc 30 | ======== 31 | 32 | Numpydoc inserts a hook into Sphinx's autodoc that converts docstrings 33 | following the Numpy/Scipy format to a form palatable to Sphinx. 34 | 35 | Options 36 | ------- 37 | 38 | The following options can be set in conf.py: 39 | 40 | - numpydoc_use_plots: bool 41 | 42 | Whether to produce ``plot::`` directives for Examples sections that 43 | contain ``import matplotlib``. 44 | 45 | - numpydoc_show_class_members: bool 46 | 47 | Whether to show all members of a class in the Methods and Attributes 48 | sections automatically. 49 | 50 | - numpydoc_edit_link: bool (DEPRECATED -- edit your HTML template instead) 51 | 52 | Whether to insert an edit link after docstrings. 53 | -------------------------------------------------------------------------------- /doc/sphinxext/numpy_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/polylearn/4dd9d4b8aca029628a4c934829526b8552db2e1b/doc/sphinxext/numpy_ext/__init__.py -------------------------------------------------------------------------------- /doc/sphinxext/numpy_ext/docscrape.py: -------------------------------------------------------------------------------- 1 | """Extract reference documentation from the NumPy source tree. 2 | 3 | """ 4 | 5 | import inspect 6 | import textwrap 7 | import re 8 | import pydoc 9 | from warnings import warn 10 | # Try Python 2 first, otherwise load from Python 3 11 | try: 12 | from StringIO import StringIO 13 | except: 14 | from io import StringIO 15 | 16 | 17 | class Reader(object): 18 | """A line-based string reader. 19 | 20 | """ 21 | def __init__(self, data): 22 | """ 23 | Parameters 24 | ---------- 25 | data : str 26 | String with lines separated by '\n'. 27 | 28 | """ 29 | if isinstance(data, list): 30 | self._str = data 31 | else: 32 | self._str = data.split('\n') # store string as list of lines 33 | 34 | self.reset() 35 | 36 | def __getitem__(self, n): 37 | return self._str[n] 38 | 39 | def reset(self): 40 | self._l = 0 # current line nr 41 | 42 | def read(self): 43 | if not self.eof(): 44 | out = self[self._l] 45 | self._l += 1 46 | return out 47 | else: 48 | return '' 49 | 50 | def seek_next_non_empty_line(self): 51 | for l in self[self._l:]: 52 | if l.strip(): 53 | break 54 | else: 55 | self._l += 1 56 | 57 | def eof(self): 58 | return self._l >= len(self._str) 59 | 60 | def read_to_condition(self, condition_func): 61 | start = self._l 62 | for line in self[start:]: 63 | if condition_func(line): 64 | return self[start:self._l] 65 | self._l += 1 66 | if self.eof(): 67 | return self[start:self._l + 1] 68 | return [] 69 | 70 | def read_to_next_empty_line(self): 71 | self.seek_next_non_empty_line() 72 | 73 | def is_empty(line): 74 | return not line.strip() 75 | return self.read_to_condition(is_empty) 76 | 77 | def read_to_next_unindented_line(self): 78 | def is_unindented(line): 79 | return (line.strip() and (len(line.lstrip()) == len(line))) 80 | return self.read_to_condition(is_unindented) 81 | 82 | def peek(self, n=0): 83 | if self._l + n < len(self._str): 84 | return self[self._l + n] 85 | else: 86 | return '' 87 | 88 | def is_empty(self): 89 | return not ''.join(self._str).strip() 90 | 91 | 92 | class NumpyDocString(object): 93 | def __init__(self, docstring, config={}): 94 | docstring = textwrap.dedent(docstring).split('\n') 95 | 96 | self._doc = Reader(docstring) 97 | self._parsed_data = { 98 | 'Signature': '', 99 | 'Summary': [''], 100 | 'Extended Summary': [], 101 | 'Parameters': [], 102 | 'Returns': [], 103 | 'Raises': [], 104 | 'Warns': [], 105 | 'Other Parameters': [], 106 | 'Attributes': [], 107 | 'Methods': [], 108 | 'See Also': [], 109 | 'Notes': [], 110 | 'Warnings': [], 111 | 'References': '', 112 | 'Examples': '', 113 | 'index': {} 114 | } 115 | 116 | self._parse() 117 | 118 | def __getitem__(self, key): 119 | return self._parsed_data[key] 120 | 121 | def __setitem__(self, key, val): 122 | if key not in self._parsed_data: 123 | warn("Unknown section %s" % key) 124 | else: 125 | self._parsed_data[key] = val 126 | 127 | def _is_at_section(self): 128 | self._doc.seek_next_non_empty_line() 129 | 130 | if self._doc.eof(): 131 | return False 132 | 133 | l1 = self._doc.peek().strip() # e.g. Parameters 134 | 135 | if l1.startswith('.. index::'): 136 | return True 137 | 138 | l2 = self._doc.peek(1).strip() # ---------- or ========== 139 | return l2.startswith('-' * len(l1)) or l2.startswith('=' * len(l1)) 140 | 141 | def _strip(self, doc): 142 | i = 0 143 | j = 0 144 | for i, line in enumerate(doc): 145 | if line.strip(): 146 | break 147 | 148 | for j, line in enumerate(doc[::-1]): 149 | if line.strip(): 150 | break 151 | 152 | return doc[i:len(doc) - j] 153 | 154 | def _read_to_next_section(self): 155 | section = self._doc.read_to_next_empty_line() 156 | 157 | while not self._is_at_section() and not self._doc.eof(): 158 | if not self._doc.peek(-1).strip(): # previous line was empty 159 | section += [''] 160 | 161 | section += self._doc.read_to_next_empty_line() 162 | 163 | return section 164 | 165 | def _read_sections(self): 166 | while not self._doc.eof(): 167 | data = self._read_to_next_section() 168 | name = data[0].strip() 169 | 170 | if name.startswith('..'): # index section 171 | yield name, data[1:] 172 | elif len(data) < 2: 173 | yield StopIteration 174 | else: 175 | yield name, self._strip(data[2:]) 176 | 177 | def _parse_param_list(self, content): 178 | r = Reader(content) 179 | params = [] 180 | while not r.eof(): 181 | header = r.read().strip() 182 | if ' : ' in header: 183 | arg_name, arg_type = header.split(' : ')[:2] 184 | else: 185 | arg_name, arg_type = header, '' 186 | 187 | desc = r.read_to_next_unindented_line() 188 | desc = dedent_lines(desc) 189 | 190 | params.append((arg_name, arg_type, desc)) 191 | 192 | return params 193 | 194 | _name_rgx = re.compile(r"^\s*(:(?P\w+):`(?P[a-zA-Z0-9_.-]+)`|" 195 | r" (?P[a-zA-Z0-9_.-]+))\s*", re.X) 196 | 197 | def _parse_see_also(self, content): 198 | """ 199 | func_name : Descriptive text 200 | continued text 201 | another_func_name : Descriptive text 202 | func_name1, func_name2, :meth:`func_name`, func_name3 203 | 204 | """ 205 | items = [] 206 | 207 | def parse_item_name(text): 208 | """Match ':role:`name`' or 'name'""" 209 | m = self._name_rgx.match(text) 210 | if m: 211 | g = m.groups() 212 | if g[1] is None: 213 | return g[3], None 214 | else: 215 | return g[2], g[1] 216 | raise ValueError("%s is not a item name" % text) 217 | 218 | def push_item(name, rest): 219 | if not name: 220 | return 221 | name, role = parse_item_name(name) 222 | items.append((name, list(rest), role)) 223 | del rest[:] 224 | 225 | current_func = None 226 | rest = [] 227 | 228 | for line in content: 229 | if not line.strip(): 230 | continue 231 | 232 | m = self._name_rgx.match(line) 233 | if m and line[m.end():].strip().startswith(':'): 234 | push_item(current_func, rest) 235 | current_func, line = line[:m.end()], line[m.end():] 236 | rest = [line.split(':', 1)[1].strip()] 237 | if not rest[0]: 238 | rest = [] 239 | elif not line.startswith(' '): 240 | push_item(current_func, rest) 241 | current_func = None 242 | if ',' in line: 243 | for func in line.split(','): 244 | push_item(func, []) 245 | elif line.strip(): 246 | current_func = line 247 | elif current_func is not None: 248 | rest.append(line.strip()) 249 | push_item(current_func, rest) 250 | return items 251 | 252 | def _parse_index(self, section, content): 253 | """ 254 | .. index: default 255 | :refguide: something, else, and more 256 | 257 | """ 258 | def strip_each_in(lst): 259 | return [s.strip() for s in lst] 260 | 261 | out = {} 262 | section = section.split('::') 263 | if len(section) > 1: 264 | out['default'] = strip_each_in(section[1].split(','))[0] 265 | for line in content: 266 | line = line.split(':') 267 | if len(line) > 2: 268 | out[line[1]] = strip_each_in(line[2].split(',')) 269 | return out 270 | 271 | def _parse_summary(self): 272 | """Grab signature (if given) and summary""" 273 | if self._is_at_section(): 274 | return 275 | 276 | summary = self._doc.read_to_next_empty_line() 277 | summary_str = " ".join([s.strip() for s in summary]).strip() 278 | if re.compile('^([\w., ]+=)?\s*[\w\.]+\(.*\)$').match(summary_str): 279 | self['Signature'] = summary_str 280 | if not self._is_at_section(): 281 | self['Summary'] = self._doc.read_to_next_empty_line() 282 | else: 283 | self['Summary'] = summary 284 | 285 | if not self._is_at_section(): 286 | self['Extended Summary'] = self._read_to_next_section() 287 | 288 | def _parse(self): 289 | self._doc.reset() 290 | self._parse_summary() 291 | 292 | for (section, content) in self._read_sections(): 293 | if not section.startswith('..'): 294 | section = ' '.join([s.capitalize() 295 | for s in section.split(' ')]) 296 | if section in ('Parameters', 'Attributes', 'Methods', 297 | 'Returns', 'Raises', 'Warns'): 298 | self[section] = self._parse_param_list(content) 299 | elif section.startswith('.. index::'): 300 | self['index'] = self._parse_index(section, content) 301 | elif section == 'See Also': 302 | self['See Also'] = self._parse_see_also(content) 303 | else: 304 | self[section] = content 305 | 306 | # string conversion routines 307 | 308 | def _str_header(self, name, symbol='-'): 309 | return [name, len(name) * symbol] 310 | 311 | def _str_indent(self, doc, indent=4): 312 | out = [] 313 | for line in doc: 314 | out += [' ' * indent + line] 315 | return out 316 | 317 | def _str_signature(self): 318 | if self['Signature']: 319 | return [self['Signature'].replace('*', '\*')] + [''] 320 | else: 321 | return [''] 322 | 323 | def _str_summary(self): 324 | if self['Summary']: 325 | return self['Summary'] + [''] 326 | else: 327 | return [] 328 | 329 | def _str_extended_summary(self): 330 | if self['Extended Summary']: 331 | return self['Extended Summary'] + [''] 332 | else: 333 | return [] 334 | 335 | def _str_param_list(self, name): 336 | out = [] 337 | if self[name]: 338 | out += self._str_header(name) 339 | for param, param_type, desc in self[name]: 340 | out += ['%s : %s' % (param, param_type)] 341 | out += self._str_indent(desc) 342 | out += [''] 343 | return out 344 | 345 | def _str_section(self, name): 346 | out = [] 347 | if self[name]: 348 | out += self._str_header(name) 349 | out += self[name] 350 | out += [''] 351 | return out 352 | 353 | def _str_see_also(self, func_role): 354 | if not self['See Also']: 355 | return [] 356 | out = [] 357 | out += self._str_header("See Also") 358 | last_had_desc = True 359 | for func, desc, role in self['See Also']: 360 | if role: 361 | link = ':%s:`%s`' % (role, func) 362 | elif func_role: 363 | link = ':%s:`%s`' % (func_role, func) 364 | else: 365 | link = "`%s`_" % func 366 | if desc or last_had_desc: 367 | out += [''] 368 | out += [link] 369 | else: 370 | out[-1] += ", %s" % link 371 | if desc: 372 | out += self._str_indent([' '.join(desc)]) 373 | last_had_desc = True 374 | else: 375 | last_had_desc = False 376 | out += [''] 377 | return out 378 | 379 | def _str_index(self): 380 | idx = self['index'] 381 | out = [] 382 | out += ['.. index:: %s' % idx.get('default', '')] 383 | for section, references in idx.iteritems(): 384 | if section == 'default': 385 | continue 386 | out += [' :%s: %s' % (section, ', '.join(references))] 387 | return out 388 | 389 | def __str__(self, func_role=''): 390 | out = [] 391 | out += self._str_signature() 392 | out += self._str_summary() 393 | out += self._str_extended_summary() 394 | for param_list in ('Parameters', 'Returns', 'Raises'): 395 | out += self._str_param_list(param_list) 396 | out += self._str_section('Warnings') 397 | out += self._str_see_also(func_role) 398 | for s in ('Notes', 'References', 'Examples'): 399 | out += self._str_section(s) 400 | for param_list in ('Attributes', 'Methods'): 401 | out += self._str_param_list(param_list) 402 | out += self._str_index() 403 | return '\n'.join(out) 404 | 405 | 406 | def indent(str, indent=4): 407 | indent_str = ' ' * indent 408 | if str is None: 409 | return indent_str 410 | lines = str.split('\n') 411 | return '\n'.join(indent_str + l for l in lines) 412 | 413 | 414 | def dedent_lines(lines): 415 | """Deindent a list of lines maximally""" 416 | return textwrap.dedent("\n".join(lines)).split("\n") 417 | 418 | 419 | def header(text, style='-'): 420 | return text + '\n' + style * len(text) + '\n' 421 | 422 | 423 | class FunctionDoc(NumpyDocString): 424 | def __init__(self, func, role='func', doc=None, config={}): 425 | self._f = func 426 | self._role = role # e.g. "func" or "meth" 427 | 428 | if doc is None: 429 | if func is None: 430 | raise ValueError("No function or docstring given") 431 | doc = inspect.getdoc(func) or '' 432 | NumpyDocString.__init__(self, doc) 433 | 434 | if not self['Signature'] and func is not None: 435 | func, func_name = self.get_func() 436 | try: 437 | # try to read signature 438 | argspec = inspect.getargspec(func) 439 | argspec = inspect.formatargspec(*argspec) 440 | argspec = argspec.replace('*', '\*') 441 | signature = '%s%s' % (func_name, argspec) 442 | except TypeError as e: 443 | signature = '%s()' % func_name 444 | self['Signature'] = signature 445 | 446 | def get_func(self): 447 | func_name = getattr(self._f, '__name__', self.__class__.__name__) 448 | if inspect.isclass(self._f): 449 | func = getattr(self._f, '__call__', self._f.__init__) 450 | else: 451 | func = self._f 452 | return func, func_name 453 | 454 | def __str__(self): 455 | out = '' 456 | 457 | func, func_name = self.get_func() 458 | signature = self['Signature'].replace('*', '\*') 459 | 460 | roles = {'func': 'function', 461 | 'meth': 'method'} 462 | 463 | if self._role: 464 | if self._role not in roles: 465 | print("Warning: invalid role %s" % self._role) 466 | out += '.. %s:: %s\n \n\n' % (roles.get(self._role, ''), 467 | func_name) 468 | 469 | out += super(FunctionDoc, self).__str__(func_role=self._role) 470 | return out 471 | 472 | 473 | class ClassDoc(NumpyDocString): 474 | def __init__(self, cls, doc=None, modulename='', func_doc=FunctionDoc, 475 | config=None): 476 | if not inspect.isclass(cls) and cls is not None: 477 | raise ValueError("Expected a class or None, but got %r" % cls) 478 | self._cls = cls 479 | 480 | if modulename and not modulename.endswith('.'): 481 | modulename += '.' 482 | self._mod = modulename 483 | 484 | if doc is None: 485 | if cls is None: 486 | raise ValueError("No class or documentation string given") 487 | doc = pydoc.getdoc(cls) 488 | 489 | NumpyDocString.__init__(self, doc) 490 | 491 | if config is not None and config.get('show_class_members', True): 492 | if not self['Methods']: 493 | self['Methods'] = [(name, '', '') 494 | for name in sorted(self.methods)] 495 | if not self['Attributes']: 496 | self['Attributes'] = [(name, '', '') 497 | for name in sorted(self.properties)] 498 | 499 | @property 500 | def methods(self): 501 | if self._cls is None: 502 | return [] 503 | return [name for name, func in inspect.getmembers(self._cls) 504 | if not name.startswith('_') and callable(func)] 505 | 506 | @property 507 | def properties(self): 508 | if self._cls is None: 509 | return [] 510 | return [name for name, func in inspect.getmembers(self._cls) 511 | if not name.startswith('_') and func is None] 512 | -------------------------------------------------------------------------------- /doc/sphinxext/numpy_ext/docscrape_sphinx.py: -------------------------------------------------------------------------------- 1 | import re 2 | import inspect 3 | import textwrap 4 | import pydoc 5 | from .docscrape import NumpyDocString 6 | from .docscrape import FunctionDoc 7 | from .docscrape import ClassDoc 8 | 9 | 10 | class SphinxDocString(NumpyDocString): 11 | def __init__(self, docstring, config=None): 12 | config = {} if config is None else config 13 | self.use_plots = config.get('use_plots', False) 14 | NumpyDocString.__init__(self, docstring, config=config) 15 | 16 | # string conversion routines 17 | def _str_header(self, name, symbol='`'): 18 | return ['.. rubric:: ' + name, ''] 19 | 20 | def _str_field_list(self, name): 21 | return [':' + name + ':'] 22 | 23 | def _str_indent(self, doc, indent=4): 24 | out = [] 25 | for line in doc: 26 | out += [' ' * indent + line] 27 | return out 28 | 29 | def _str_signature(self): 30 | return [''] 31 | if self['Signature']: 32 | return ['``%s``' % self['Signature']] + [''] 33 | else: 34 | return [''] 35 | 36 | def _str_summary(self): 37 | return self['Summary'] + [''] 38 | 39 | def _str_extended_summary(self): 40 | return self['Extended Summary'] + [''] 41 | 42 | def _str_param_list(self, name): 43 | out = [] 44 | if self[name]: 45 | out += self._str_field_list(name) 46 | out += [''] 47 | for param, param_type, desc in self[name]: 48 | out += self._str_indent(['**%s** : %s' % (param.strip(), 49 | param_type)]) 50 | out += [''] 51 | out += self._str_indent(desc, 8) 52 | out += [''] 53 | return out 54 | 55 | @property 56 | def _obj(self): 57 | if hasattr(self, '_cls'): 58 | return self._cls 59 | elif hasattr(self, '_f'): 60 | return self._f 61 | return None 62 | 63 | def _str_member_list(self, name): 64 | """ 65 | Generate a member listing, autosummary:: table where possible, 66 | and a table where not. 67 | 68 | """ 69 | out = [] 70 | if self[name]: 71 | out += ['.. rubric:: %s' % name, ''] 72 | prefix = getattr(self, '_name', '') 73 | 74 | if prefix: 75 | prefix = '~%s.' % prefix 76 | 77 | autosum = [] 78 | others = [] 79 | for param, param_type, desc in self[name]: 80 | param = param.strip() 81 | if not self._obj or hasattr(self._obj, param): 82 | autosum += [" %s%s" % (prefix, param)] 83 | else: 84 | others.append((param, param_type, desc)) 85 | 86 | if autosum: 87 | # GAEL: Toctree commented out below because it creates 88 | # hundreds of sphinx warnings 89 | # out += ['.. autosummary::', ' :toctree:', ''] 90 | out += ['.. autosummary::', ''] 91 | out += autosum 92 | 93 | if others: 94 | maxlen_0 = max([len(x[0]) for x in others]) 95 | maxlen_1 = max([len(x[1]) for x in others]) 96 | hdr = "=" * maxlen_0 + " " + "=" * maxlen_1 + " " + "=" * 10 97 | fmt = '%%%ds %%%ds ' % (maxlen_0, maxlen_1) 98 | n_indent = maxlen_0 + maxlen_1 + 4 99 | out += [hdr] 100 | for param, param_type, desc in others: 101 | out += [fmt % (param.strip(), param_type)] 102 | out += self._str_indent(desc, n_indent) 103 | out += [hdr] 104 | out += [''] 105 | return out 106 | 107 | def _str_section(self, name): 108 | out = [] 109 | if self[name]: 110 | out += self._str_header(name) 111 | out += [''] 112 | content = textwrap.dedent("\n".join(self[name])).split("\n") 113 | out += content 114 | out += [''] 115 | return out 116 | 117 | def _str_see_also(self, func_role): 118 | out = [] 119 | if self['See Also']: 120 | see_also = super(SphinxDocString, self)._str_see_also(func_role) 121 | out = ['.. seealso::', ''] 122 | out += self._str_indent(see_also[2:]) 123 | return out 124 | 125 | def _str_warnings(self): 126 | out = [] 127 | if self['Warnings']: 128 | out = ['.. warning::', ''] 129 | out += self._str_indent(self['Warnings']) 130 | return out 131 | 132 | def _str_index(self): 133 | idx = self['index'] 134 | out = [] 135 | if len(idx) == 0: 136 | return out 137 | 138 | out += ['.. index:: %s' % idx.get('default', '')] 139 | for section, references in idx.iteritems(): 140 | if section == 'default': 141 | continue 142 | elif section == 'refguide': 143 | out += [' single: %s' % (', '.join(references))] 144 | else: 145 | out += [' %s: %s' % (section, ','.join(references))] 146 | return out 147 | 148 | def _str_references(self): 149 | out = [] 150 | if self['References']: 151 | out += self._str_header('References') 152 | if isinstance(self['References'], str): 153 | self['References'] = [self['References']] 154 | out.extend(self['References']) 155 | out += [''] 156 | # Latex collects all references to a separate bibliography, 157 | # so we need to insert links to it 158 | import sphinx # local import to avoid test dependency 159 | if sphinx.__version__ >= "0.6": 160 | out += ['.. only:: latex', ''] 161 | else: 162 | out += ['.. latexonly::', ''] 163 | items = [] 164 | for line in self['References']: 165 | m = re.match(r'.. \[([a-z0-9._-]+)\]', line, re.I) 166 | if m: 167 | items.append(m.group(1)) 168 | out += [' ' + ", ".join(["[%s]_" % item for item in items]), ''] 169 | return out 170 | 171 | def _str_examples(self): 172 | examples_str = "\n".join(self['Examples']) 173 | 174 | if (self.use_plots and 'import matplotlib' in examples_str 175 | and 'plot::' not in examples_str): 176 | out = [] 177 | out += self._str_header('Examples') 178 | out += ['.. plot::', ''] 179 | out += self._str_indent(self['Examples']) 180 | out += [''] 181 | return out 182 | else: 183 | return self._str_section('Examples') 184 | 185 | def __str__(self, indent=0, func_role="obj"): 186 | out = [] 187 | out += self._str_signature() 188 | out += self._str_index() + [''] 189 | out += self._str_summary() 190 | out += self._str_extended_summary() 191 | for param_list in ('Parameters', 'Returns', 'Raises', 'Attributes'): 192 | out += self._str_param_list(param_list) 193 | out += self._str_warnings() 194 | out += self._str_see_also(func_role) 195 | out += self._str_section('Notes') 196 | out += self._str_references() 197 | out += self._str_examples() 198 | for param_list in ('Methods',): 199 | out += self._str_member_list(param_list) 200 | out = self._str_indent(out, indent) 201 | return '\n'.join(out) 202 | 203 | 204 | class SphinxFunctionDoc(SphinxDocString, FunctionDoc): 205 | def __init__(self, obj, doc=None, config={}): 206 | self.use_plots = config.get('use_plots', False) 207 | FunctionDoc.__init__(self, obj, doc=doc, config=config) 208 | 209 | 210 | class SphinxClassDoc(SphinxDocString, ClassDoc): 211 | def __init__(self, obj, doc=None, func_doc=None, config={}): 212 | self.use_plots = config.get('use_plots', False) 213 | ClassDoc.__init__(self, obj, doc=doc, func_doc=None, config=config) 214 | 215 | 216 | class SphinxObjDoc(SphinxDocString): 217 | def __init__(self, obj, doc=None, config=None): 218 | self._f = obj 219 | SphinxDocString.__init__(self, doc, config=config) 220 | 221 | 222 | def get_doc_object(obj, what=None, doc=None, config={}): 223 | if what is None: 224 | if inspect.isclass(obj): 225 | what = 'class' 226 | elif inspect.ismodule(obj): 227 | what = 'module' 228 | elif callable(obj): 229 | what = 'function' 230 | else: 231 | what = 'object' 232 | if what == 'class': 233 | return SphinxClassDoc(obj, func_doc=SphinxFunctionDoc, doc=doc, 234 | config=config) 235 | elif what in ('function', 'method'): 236 | return SphinxFunctionDoc(obj, doc=doc, config=config) 237 | else: 238 | if doc is None: 239 | doc = pydoc.getdoc(obj) 240 | return SphinxObjDoc(obj, doc, config=config) 241 | -------------------------------------------------------------------------------- /doc/sphinxext/numpy_ext/numpydoc.py: -------------------------------------------------------------------------------- 1 | """ 2 | ======== 3 | numpydoc 4 | ======== 5 | 6 | Sphinx extension that handles docstrings in the Numpy standard format. [1] 7 | 8 | It will: 9 | 10 | - Convert Parameters etc. sections to field lists. 11 | - Convert See Also section to a See also entry. 12 | - Renumber references. 13 | - Extract the signature from the docstring, if it can't be determined 14 | otherwise. 15 | 16 | .. [1] http://projects.scipy.org/numpy/wiki/CodingStyleGuidelines#docstring-standard 17 | 18 | """ 19 | 20 | from __future__ import unicode_literals 21 | 22 | import sys # Only needed to check Python version 23 | import os 24 | import re 25 | import pydoc 26 | from .docscrape_sphinx import get_doc_object 27 | from .docscrape_sphinx import SphinxDocString 28 | import inspect 29 | 30 | 31 | def mangle_docstrings(app, what, name, obj, options, lines, 32 | reference_offset=[0]): 33 | 34 | cfg = dict(use_plots=app.config.numpydoc_use_plots, 35 | show_class_members=app.config.numpydoc_show_class_members) 36 | 37 | if what == 'module': 38 | # Strip top title 39 | title_re = re.compile(r'^\s*[#*=]{4,}\n[a-z0-9 -]+\n[#*=]{4,}\s*', 40 | re.I | re.S) 41 | lines[:] = title_re.sub('', "\n".join(lines)).split("\n") 42 | else: 43 | doc = get_doc_object(obj, what, "\n".join(lines), config=cfg) 44 | if sys.version_info[0] < 3: 45 | lines[:] = unicode(doc).splitlines() 46 | else: 47 | lines[:] = str(doc).splitlines() 48 | 49 | if app.config.numpydoc_edit_link and hasattr(obj, '__name__') and \ 50 | obj.__name__: 51 | if hasattr(obj, '__module__'): 52 | v = dict(full_name="%s.%s" % (obj.__module__, obj.__name__)) 53 | else: 54 | v = dict(full_name=obj.__name__) 55 | lines += [u'', u'.. htmlonly::', ''] 56 | lines += [u' %s' % x for x in 57 | (app.config.numpydoc_edit_link % v).split("\n")] 58 | 59 | # replace reference numbers so that there are no duplicates 60 | references = [] 61 | for line in lines: 62 | line = line.strip() 63 | m = re.match(r'^.. \[([a-z0-9_.-])\]', line, re.I) 64 | if m: 65 | references.append(m.group(1)) 66 | 67 | # start renaming from the longest string, to avoid overwriting parts 68 | references.sort(key=lambda x: -len(x)) 69 | if references: 70 | for i, line in enumerate(lines): 71 | for r in references: 72 | if re.match(r'^\d+$', r): 73 | new_r = "R%d" % (reference_offset[0] + int(r)) 74 | else: 75 | new_r = u"%s%d" % (r, reference_offset[0]) 76 | lines[i] = lines[i].replace(u'[%s]_' % r, 77 | u'[%s]_' % new_r) 78 | lines[i] = lines[i].replace(u'.. [%s]' % r, 79 | u'.. [%s]' % new_r) 80 | 81 | reference_offset[0] += len(references) 82 | 83 | 84 | def mangle_signature(app, what, name, obj, 85 | options, sig, retann): 86 | # Do not try to inspect classes that don't define `__init__` 87 | if (inspect.isclass(obj) and 88 | (not hasattr(obj, '__init__') or 89 | 'initializes x; see ' in pydoc.getdoc(obj.__init__))): 90 | return '', '' 91 | 92 | if not (callable(obj) or hasattr(obj, '__argspec_is_invalid_')): 93 | return 94 | if not hasattr(obj, '__doc__'): 95 | return 96 | 97 | doc = SphinxDocString(pydoc.getdoc(obj)) 98 | if doc['Signature']: 99 | sig = re.sub("^[^(]*", "", doc['Signature']) 100 | return sig, '' 101 | 102 | 103 | def setup(app, get_doc_object_=get_doc_object): 104 | global get_doc_object 105 | get_doc_object = get_doc_object_ 106 | 107 | if sys.version_info[0] < 3: 108 | app.connect(b'autodoc-process-docstring', mangle_docstrings) 109 | app.connect(b'autodoc-process-signature', mangle_signature) 110 | else: 111 | app.connect('autodoc-process-docstring', mangle_docstrings) 112 | app.connect('autodoc-process-signature', mangle_signature) 113 | app.add_config_value('numpydoc_edit_link', None, False) 114 | app.add_config_value('numpydoc_use_plots', None, False) 115 | app.add_config_value('numpydoc_show_class_members', True, True) 116 | 117 | # Extra mangling domains 118 | app.add_domain(NumpyPythonDomain) 119 | app.add_domain(NumpyCDomain) 120 | 121 | #----------------------------------------------------------------------------- 122 | # Docstring-mangling domains 123 | #----------------------------------------------------------------------------- 124 | 125 | try: 126 | import sphinx # lazy to avoid test dependency 127 | except ImportError: 128 | CDomain = PythonDomain = object 129 | else: 130 | from sphinx.domains.c import CDomain 131 | from sphinx.domains.python import PythonDomain 132 | 133 | 134 | class ManglingDomainBase(object): 135 | directive_mangling_map = {} 136 | 137 | def __init__(self, *a, **kw): 138 | super(ManglingDomainBase, self).__init__(*a, **kw) 139 | self.wrap_mangling_directives() 140 | 141 | def wrap_mangling_directives(self): 142 | for name, objtype in self.directive_mangling_map.items(): 143 | self.directives[name] = wrap_mangling_directive( 144 | self.directives[name], objtype) 145 | 146 | 147 | class NumpyPythonDomain(ManglingDomainBase, PythonDomain): 148 | name = 'np' 149 | directive_mangling_map = { 150 | 'function': 'function', 151 | 'class': 'class', 152 | 'exception': 'class', 153 | 'method': 'function', 154 | 'classmethod': 'function', 155 | 'staticmethod': 'function', 156 | 'attribute': 'attribute', 157 | } 158 | 159 | 160 | class NumpyCDomain(ManglingDomainBase, CDomain): 161 | name = 'np-c' 162 | directive_mangling_map = { 163 | 'function': 'function', 164 | 'member': 'attribute', 165 | 'macro': 'function', 166 | 'type': 'class', 167 | 'var': 'object', 168 | } 169 | 170 | 171 | def wrap_mangling_directive(base_directive, objtype): 172 | class directive(base_directive): 173 | def run(self): 174 | env = self.state.document.settings.env 175 | 176 | name = None 177 | if self.arguments: 178 | m = re.match(r'^(.*\s+)?(.*?)(\(.*)?', self.arguments[0]) 179 | name = m.group(2).strip() 180 | 181 | if not name: 182 | name = self.arguments[0] 183 | 184 | lines = list(self.content) 185 | mangle_docstrings(env.app, objtype, name, None, None, lines) 186 | # local import to avoid testing dependency 187 | from docutils.statemachine import ViewList 188 | self.content = ViewList(lines, self.content.parent) 189 | 190 | return base_directive.run(self) 191 | 192 | return directive 193 | -------------------------------------------------------------------------------- /examples/README.txt: -------------------------------------------------------------------------------- 1 | Examples of using factorization machines and polynomial networks with the polylearn library. -------------------------------------------------------------------------------- /examples/plot_regularization_path.py: -------------------------------------------------------------------------------- 1 | """ 2 | ================================================== 3 | Plotting regularization paths using warm restarts. 4 | ================================================== 5 | 6 | In this example we show how to use the `warm_start` attribute to efficiently 7 | compute the regularization path for a polynomial network when optimizing 8 | for the `beta` regularization hyperparameter. 9 | """ 10 | print(__doc__) 11 | 12 | # Author: Vlad Niculae 13 | # License: Simplified BSD 14 | 15 | import numpy as np 16 | 17 | import matplotlib.pyplot as plt 18 | 19 | from sklearn.linear_model import Ridge 20 | from sklearn.kernel_ridge import KernelRidge 21 | from sklearn.datasets import load_boston 22 | from sklearn.cross_validation import train_test_split 23 | from sklearn.metrics.scorer import mean_squared_error_scorer 24 | from sklearn.preprocessing import StandardScaler 25 | 26 | from polylearn import PolynomialNetworkRegressor 27 | 28 | boston = load_boston() 29 | X, y = boston.data, boston.target 30 | data_split = train_test_split(X, y, test_size=100, random_state=0) 31 | X_train, X_test, y_train, y_test = data_split 32 | 33 | # Scale both the features (X) and the target (y) to zero mean, unit variance 34 | # (This is not necessary but makes the plots clearer) 35 | 36 | scaler_X = StandardScaler(with_mean=True, with_std=True) 37 | X_train_sc = scaler_X.fit_transform(X_train) 38 | X_test_sc = scaler_X.transform(X_test) 39 | 40 | scaler_y = StandardScaler(with_mean=True, with_std=True) 41 | y_train_sc = scaler_y.fit_transform(y_train.reshape(-1, 1)).ravel() 42 | y_test_sc = scaler_y.transform(y_test.reshape(-1, 1)).ravel() 43 | 44 | n_alphas = 50 45 | alphas = np.logspace(-1, 8, n_alphas) 46 | ridge = Ridge(fit_intercept=True) 47 | kernel_ridge = KernelRidge(kernel='poly', gamma=1, degree=3, coef0=1) 48 | 49 | test_scores_ridge = [] 50 | test_scores_kernel = [] 51 | 52 | for alpha in alphas: 53 | ridge.set_params(alpha=alpha) 54 | ridge.fit(X_train_sc, y_train_sc) 55 | test_mse = mean_squared_error_scorer(ridge, X_test_sc, y_test_sc) 56 | test_scores_ridge.append(test_mse) 57 | 58 | kernel_ridge.set_params(alpha=alpha) 59 | kernel_ridge.fit(X_train_sc, y_train_sc) 60 | test_mse = mean_squared_error_scorer(kernel_ridge, X_test_sc, y_test_sc) 61 | test_scores_kernel.append(test_mse) 62 | 63 | 64 | poly = PolynomialNetworkRegressor(degree=3, n_components=2, tol=1e-3, 65 | warm_start=True, random_state=0) 66 | 67 | test_scores_poly = [] 68 | 69 | for alpha in alphas: 70 | poly.set_params(beta=alpha) 71 | poly.fit(X_train_sc, y_train_sc) 72 | test_mse = mean_squared_error_scorer(poly, X_test_sc, y_test_sc) 73 | test_scores_poly.append(test_mse) 74 | 75 | plt.plot(alphas, test_scores_ridge, label="Linear ridge") 76 | plt.plot(alphas, test_scores_kernel, label="Kernel ridge") 77 | plt.plot(alphas, test_scores_poly, label="Poly. network (rank=2)") 78 | plt.ylabel("Negated mean squared error (higher is better)") 79 | plt.xlabel("Regularization amount") 80 | plt.ylim(-1, -0.15) 81 | plt.semilogx() 82 | plt.legend() 83 | plt.show() 84 | -------------------------------------------------------------------------------- /examples/plot_xor.py: -------------------------------------------------------------------------------- 1 | """ 2 | =============================================== 3 | Factorization machine decision boundary for XOR 4 | =============================================== 5 | 6 | Plots the decision function learned by a factorization machine for a noisy 7 | non-linearly separable XOR problem 8 | 9 | This problem is a perfect example of feature interactions. As such, 10 | factorization machines can model it very robustly with a very small number of 11 | parameters. (In this case, n_features * n_components = 2 * 1 = 2 params.) 12 | 13 | Example based on: 14 | http://scikit-learn.org/stable/auto_examples/svm/plot_svm_nonlinear.html 15 | """ 16 | print(__doc__) 17 | 18 | # Author: Vlad Niculae 19 | # License: Simplified BSD 20 | 21 | import numpy as np 22 | import matplotlib.pyplot as plt 23 | from sklearn.svm import NuSVC 24 | 25 | from polylearn import FactorizationMachineClassifier 26 | 27 | xx, yy = np.meshgrid(np.linspace(-3, 3, 500), 28 | np.linspace(-3, 3, 500)) 29 | 30 | rng = np.random.RandomState(42) 31 | X = rng.randn(300, 2) 32 | y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0) 33 | 34 | # XOR is too easy for factorization machines, so add noise :) 35 | flip = rng.randint(300, size=15) 36 | y[flip] = ~y[flip] 37 | 38 | # fit the model 39 | fm = FactorizationMachineClassifier(n_components=1, fit_linear=False, 40 | random_state=0) 41 | fm.fit(X, y) 42 | 43 | # fit a NuSVC for comparison 44 | svc = NuSVC(kernel='poly', degree=2) 45 | svc.fit(X, y) 46 | 47 | # plot the decision function for each datapoint on the grid 48 | Z = fm.decision_function(np.c_[xx.ravel(), yy.ravel()]) 49 | Z = Z.reshape(xx.shape) 50 | 51 | Z_svc = svc.decision_function(np.c_[xx.ravel(), yy.ravel()]) 52 | Z_svc = Z_svc.reshape(xx.shape) 53 | 54 | plt.imshow(Z, interpolation='nearest', 55 | extent=(xx.min(), xx.max(), yy.min(), yy.max()), aspect='auto', 56 | origin='lower', cmap=plt.cm.PuOr_r) 57 | 58 | contour_fm = plt.contour(xx, yy, Z, levels=[0], linewidths=2) 59 | 60 | contour_svc = plt.contour(xx, yy, Z_svc, levels=[0], linestyles='dashed') 61 | 62 | plt.scatter(X[:, 0], X[:, 1], s=30, c=y, cmap=plt.cm.Paired) 63 | plt.xticks(()) 64 | plt.yticks(()) 65 | plt.axis([-3, 3, -3, 3]) 66 | plt.legend((contour_fm.collections[0], contour_svc.collections[0]), 67 | ('rank-1 factorization machine', 'SVC with polynomial kernel')) 68 | plt.show() 69 | -------------------------------------------------------------------------------- /polylearn/__init__.py: -------------------------------------------------------------------------------- 1 | from .factorization_machine import FactorizationMachineRegressor 2 | from .factorization_machine import FactorizationMachineClassifier 3 | from .polynomial_network import PolynomialNetworkRegressor 4 | from .polynomial_network import PolynomialNetworkClassifier 5 | -------------------------------------------------------------------------------- /polylearn/base.py: -------------------------------------------------------------------------------- 1 | # Author: Vlad Niculae 2 | # License: Simplified BSD 3 | 4 | from abc import ABCMeta 5 | import numpy as np 6 | from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin 7 | from sklearn.preprocessing import LabelBinarizer 8 | from sklearn.utils.validation import check_X_y 9 | from sklearn.utils.multiclass import type_of_target 10 | import six 11 | 12 | from .loss import CLASSIFICATION_LOSSES, REGRESSION_LOSSES 13 | 14 | 15 | class _BasePoly(six.with_metaclass(ABCMeta, BaseEstimator)): 16 | 17 | def _get_loss(self, loss): 18 | # classification losses 19 | if loss not in self._LOSSES: 20 | raise ValueError( 21 | 'Loss function "{}" not supported. The available options ' 22 | 'are: "{}".'.format(loss, 23 | '", "'.join(self._LOSSES))) 24 | return self._LOSSES[loss] 25 | 26 | 27 | class _PolyRegressorMixin(RegressorMixin): 28 | 29 | _LOSSES = REGRESSION_LOSSES 30 | 31 | def _check_X_y(self, X, y): 32 | X, y = check_X_y(X, y, accept_sparse='csc', multi_output=False, 33 | dtype=np.double, y_numeric=True) 34 | y = y.astype(np.double).ravel() 35 | return X, y 36 | 37 | def predict(self, X): 38 | """Predict regression output for the samples in X. 39 | 40 | Parameters 41 | ---------- 42 | X : {array-like, sparse matrix}, shape = [n_samples, n_features] 43 | Samples. 44 | 45 | Returns 46 | ------- 47 | y_pred : array, shape = [n_samples] 48 | Returns predicted values. 49 | """ 50 | return self._predict(X) 51 | 52 | 53 | class _PolyClassifierMixin(ClassifierMixin): 54 | 55 | _LOSSES = CLASSIFICATION_LOSSES 56 | 57 | def decision_function(self, X): 58 | """Compute the output of the factorization machine before thresholding. 59 | 60 | Parameters 61 | ---------- 62 | X : {array-like, sparse matrix}, shape = [n_samples, n_features] 63 | Samples. 64 | 65 | Returns 66 | ------- 67 | y_scores : array, shape = [n_samples] 68 | Returns predicted values. 69 | """ 70 | return self._predict(X) 71 | 72 | def predict(self, X): 73 | """Predict using the factorization machine 74 | 75 | Parameters 76 | ---------- 77 | X : {array-like, sparse matrix}, shape = [n_samples, n_features] 78 | Samples. 79 | 80 | Returns 81 | ------- 82 | y_pred : array, shape = [n_samples] 83 | Returns predicted values. 84 | """ 85 | y_pred = self.decision_function(X) > 0 86 | return self.label_binarizer_.inverse_transform(y_pred) 87 | 88 | def predict_proba(self, X): 89 | """Compute probability estimates for the test samples. 90 | 91 | Only available if `loss='logistic'`. 92 | 93 | Parameters 94 | ---------- 95 | X : {array-like, sparse matrix}, shape = [n_samples, n_features] 96 | Samples. 97 | 98 | Returns 99 | ------- 100 | y_scores : array, shape = [n_samples] 101 | Probability estimates that the samples are from the positive class. 102 | """ 103 | if self.loss == 'logistic': 104 | return 1 / (1 + np.exp(-self.decision_function(X))) 105 | else: 106 | raise ValueError("Probability estimates only available for " 107 | "loss='logistic'. You may use probability " 108 | "calibration methods from scikit-learn instead.") 109 | 110 | def _check_X_y(self, X, y): 111 | 112 | # helpful error message for sklearn < 1.17 113 | is_2d = hasattr(y, 'shape') and len(y.shape) > 1 and y.shape[1] >= 2 114 | 115 | if is_2d or type_of_target(y) != 'binary': 116 | raise TypeError("Only binary targets supported. For training " 117 | "multiclass or multilabel models, you may use the " 118 | "OneVsRest or OneVsAll metaestimators in " 119 | "scikit-learn.") 120 | 121 | X, Y = check_X_y(X, y, dtype=np.double, accept_sparse='csc', 122 | multi_output=False) 123 | 124 | self.label_binarizer_ = LabelBinarizer(pos_label=1, neg_label=-1) 125 | y = self.label_binarizer_.fit_transform(Y).ravel().astype(np.double) 126 | return X, y 127 | -------------------------------------------------------------------------------- /polylearn/cd_direct_fast.pyx: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # cython: language_level=3 3 | # cython: cdivision=True 4 | # cython: boundscheck=False 5 | # cython: wraparound=False 6 | # 7 | # Author: Vlad Niculae 8 | # License: BSD 9 | 10 | from libc.math cimport fabs 11 | from cython.view cimport array 12 | 13 | from lightning.impl.dataset_fast cimport ColumnDataset 14 | 15 | from .loss_fast cimport LossFunction 16 | from .cd_linear_fast cimport _cd_linear_epoch 17 | 18 | 19 | cdef void _precompute(ColumnDataset X, 20 | double[:, :, ::1] P, 21 | Py_ssize_t order, 22 | double[:, ::1] out, 23 | Py_ssize_t s, 24 | unsigned int degree): 25 | 26 | cdef Py_ssize_t n_samples = X.get_n_samples() 27 | cdef Py_ssize_t n_features = P.shape[2] 28 | 29 | # Data pointers 30 | cdef double* data 31 | cdef int* indices 32 | cdef int n_nz 33 | 34 | cdef Py_ssize_t i, j, ii 35 | cdef unsigned int d 36 | cdef double tmp 37 | 38 | for i in range(n_samples): 39 | out[degree - 1, i] = 0 40 | 41 | for j in range(n_features): 42 | X.get_column_ptr(j, &indices, &data, &n_nz) 43 | for ii in range(n_nz): 44 | i = indices[ii] 45 | out[degree - 1, i] += (data[ii] * P[order, s, j]) ** degree 46 | 47 | 48 | cdef inline double _update(int* indices, 49 | double* data, 50 | int n_nz, 51 | double p_js, 52 | double[:] y, 53 | double[:] y_pred, 54 | LossFunction loss, 55 | unsigned int degree, 56 | double lam, 57 | double beta, 58 | double[:, ::1] D, 59 | double[:] cache_kp): 60 | 61 | cdef double l1_reg = 2 * beta * fabs(lam) 62 | 63 | cdef Py_ssize_t i, ii 64 | 65 | cdef double inv_step_size = 0 66 | 67 | cdef double kp # derivative of the ANOVA kernel 68 | cdef double update = 0 69 | 70 | for ii in range(n_nz): 71 | i = indices[ii] 72 | 73 | if degree == 2: 74 | kp = D[0, i] - p_js * data[ii] 75 | else: # degree == 3: 76 | kp = 0.5 * (D[0, i] ** 2 - D[1, i]) 77 | kp -= p_js * data[ii] * D[0, i] 78 | kp += p_js ** 2 * data[ii] ** 2 79 | 80 | kp *= lam * data[ii] 81 | cache_kp[ii] = kp 82 | 83 | update += loss.dloss(y_pred[i], y[i]) * kp 84 | inv_step_size += kp ** 2 85 | 86 | inv_step_size *= loss.mu 87 | inv_step_size += l1_reg 88 | 89 | update += l1_reg * p_js 90 | update /= inv_step_size 91 | 92 | return update 93 | 94 | 95 | cdef inline double _cd_direct_epoch(double[:, :, ::1] P, 96 | Py_ssize_t order, 97 | ColumnDataset X, 98 | double[:] y, 99 | double[:] y_pred, 100 | double[:] lams, 101 | unsigned int degree, 102 | double beta, 103 | LossFunction loss, 104 | double[:, ::1] D, 105 | double[:] cache_kp): 106 | 107 | cdef Py_ssize_t s, j 108 | cdef double p_old, update, offset 109 | cdef double sum_viol = 0 110 | cdef Py_ssize_t n_components = P.shape[1] 111 | cdef Py_ssize_t n_features = P.shape[2] 112 | 113 | # Data pointers 114 | cdef double* data 115 | cdef int* indices 116 | cdef int n_nz 117 | 118 | for s in range(n_components): 119 | 120 | # initialize the cached ds for this s 121 | _precompute(X, P, order, D, s, 1) 122 | if degree == 3: 123 | _precompute(X, P, order, D, s, 2) 124 | 125 | for j in range(n_features): 126 | 127 | X.get_column_ptr(j, &indices, &data, &n_nz) 128 | 129 | # compute coordinate update 130 | p_old = P[order, s, j] 131 | update = _update(indices, data, n_nz, p_old, y, y_pred, 132 | loss, degree, lams[s], beta, D, cache_kp) 133 | P[order, s, j] -= update 134 | sum_viol += fabs(update) 135 | 136 | # Synchronize predictions and ds 137 | for ii in range(n_nz): 138 | i = indices[ii] 139 | 140 | if degree == 3: 141 | D[1, i] -= ((p_old ** 2 - P[order, s, j] ** 2) * 142 | data[ii] ** 2) 143 | 144 | D[0, i] -= update * data[ii] 145 | y_pred[i] -= update * cache_kp[ii] 146 | return sum_viol 147 | 148 | 149 | def _cd_direct_ho(double[:, :, ::1] P not None, 150 | double[:] w not None, 151 | ColumnDataset X, 152 | double[:] col_norm_sq not None, 153 | double[:] y not None, 154 | double[:] y_pred not None, 155 | double[:] lams not None, 156 | unsigned int degree, 157 | double alpha, 158 | double beta, 159 | bint fit_linear, 160 | bint fit_lower, 161 | LossFunction loss, 162 | unsigned int max_iter, 163 | double tol, 164 | int verbose): 165 | 166 | cdef Py_ssize_t n_samples = X.get_n_samples() 167 | cdef unsigned int it 168 | 169 | cdef double viol 170 | cdef bint converged = False 171 | 172 | # precomputed values 173 | cdef double[:, ::1] D = array((degree - 1, n_samples), sizeof(double), 'd') 174 | cdef double[:] cache_kp = array((n_samples,), sizeof(double), 'd') 175 | 176 | for it in range(max_iter): 177 | viol = 0 178 | 179 | if fit_linear: 180 | viol += _cd_linear_epoch(w, X, y, y_pred, col_norm_sq, alpha, loss) 181 | 182 | if fit_lower and degree == 3: # fit degree 2. Will be looped later. 183 | viol += _cd_direct_epoch(P, 1, X, y, y_pred, lams, 2, beta, loss, 184 | D, cache_kp) 185 | 186 | viol += _cd_direct_epoch(P, 0, X, y, y_pred, lams, degree, beta, loss, 187 | D, cache_kp) 188 | 189 | if verbose: 190 | print("Iteration", it + 1, "violation sum", viol) 191 | 192 | if viol < tol: 193 | if verbose: 194 | print("Converged at iteration", it + 1) 195 | converged = True 196 | break 197 | 198 | return converged, it 199 | -------------------------------------------------------------------------------- /polylearn/cd_lifted_fast.pyx: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # cython: language_level=3 3 | # cython: cdivision=True 4 | # cython: boundscheck=False 5 | # cython: wraparound=False 6 | # 7 | # Author: Vlad Niculae 8 | # License: BSD 9 | 10 | from libc.math cimport fabs 11 | from cython.view cimport array 12 | 13 | from lightning.impl.dataset_fast cimport ColumnDataset 14 | 15 | from .loss_fast cimport LossFunction 16 | 17 | 18 | def _fast_lifted_predict(double[:, :, ::1] U, 19 | ColumnDataset X, 20 | double[:] out): 21 | 22 | # np.product(safe_sparse_dot(U, X.T), axis=0).sum(axis=0) 23 | # 24 | # a bit of a misnomer, since at least for dense data it's a bit slower, 25 | # but it's more memory efficient. 26 | 27 | cdef Py_ssize_t degree = U.shape[0] 28 | cdef Py_ssize_t n_components = U.shape[1] 29 | 30 | cdef Py_ssize_t n_samples = X.get_n_samples() 31 | cdef Py_ssize_t n_features = X.get_n_features() 32 | 33 | cdef double* data 34 | cdef int* indices 35 | cdef int n_nz 36 | 37 | cdef Py_ssize_t i, j, ii 38 | 39 | cdef double[:] middle = array((n_samples,), sizeof(double), 'd') 40 | cdef double[:] inner = array((n_samples,), sizeof(double), 'd') 41 | 42 | for s in range(n_components): 43 | 44 | for i in range(n_samples): 45 | middle[i] = 1 46 | 47 | for t in range(degree): 48 | # inner = np.dot(U[t, s, :], X.T) 49 | 50 | for i in range(n_samples): 51 | inner[i] = 0 52 | 53 | for j in range(n_features): 54 | X.get_column_ptr(j, &indices, &data, &n_nz) 55 | for ii in range(n_nz): 56 | i = indices[ii] 57 | inner[i] += data[ii] * U[t, s, j] 58 | 59 | # middle *= inner 60 | for i in range(n_samples): 61 | middle[i] *= inner[i] 62 | 63 | for i in range(n_samples): 64 | out[i] += middle[i] 65 | 66 | 67 | cdef void _precompute(double[:, :, ::1] U, 68 | ColumnDataset X, 69 | Py_ssize_t s, 70 | Py_ssize_t t, 71 | double[:] out, 72 | double[:] tmp): 73 | 74 | cdef Py_ssize_t degree = U.shape[0] 75 | cdef Py_ssize_t n_components = U.shape[1] 76 | 77 | cdef Py_ssize_t n_samples = X.get_n_samples() 78 | cdef Py_ssize_t n_features = X.get_n_features() 79 | 80 | cdef double* data 81 | cdef int* indices 82 | cdef int n_nz 83 | 84 | cdef Py_ssize_t i, j, ii 85 | 86 | for i in range(n_samples): 87 | out[i] = 1 88 | 89 | for t_prime in range(degree): 90 | 91 | if t == t_prime: 92 | continue 93 | 94 | for i in range(n_samples): 95 | tmp[i] = 0 96 | 97 | for j in range(n_features): 98 | X.get_column_ptr(j, &indices, &data, &n_nz) 99 | for ii in range(n_nz): 100 | i = indices[ii] 101 | tmp[i] += data[ii] * U[t_prime, s, j] 102 | for i in range(n_samples): 103 | out[i] *= tmp[i] 104 | 105 | 106 | def _cd_lifted(double[:, :, ::1] U, 107 | ColumnDataset X, 108 | double[:] y, 109 | double[:] y_pred, 110 | double beta, 111 | LossFunction loss, 112 | int max_iter, 113 | double tol, 114 | int verbose): 115 | 116 | cdef Py_ssize_t n_samples = X.get_n_samples() 117 | cdef Py_ssize_t n_features = X.get_n_features() 118 | cdef Py_ssize_t degree = U.shape[0] 119 | cdef Py_ssize_t n_components = U.shape[1] 120 | cdef Py_ssize_t t, s, j 121 | cdef int it 122 | 123 | cdef double sum_viol 124 | cdef bint converged = False 125 | 126 | cdef double inv_step_size 127 | cdef double update 128 | cdef double u_old 129 | 130 | cdef double[:] xi = array((n_samples,), sizeof(double), 'd') 131 | cdef double[:] tmp = array((n_samples,), sizeof(double), 'd') 132 | 133 | # Data pointers 134 | cdef double* data 135 | cdef int* indices 136 | cdef int n_nz 137 | 138 | for it in range(max_iter): 139 | sum_viol = 0 140 | for t in range(degree): 141 | for s in range(n_components): 142 | _precompute(U, X, s, t, xi, tmp) 143 | for j in range(n_features): 144 | 145 | u_old = U[t, s, j] 146 | X.get_column_ptr(j, &indices, &data, &n_nz) 147 | 148 | inv_step_size = 0 149 | update = 0 150 | 151 | for ii in range(n_nz): 152 | i = indices[ii] 153 | inv_step_size += xi[i] ** 2 * data[ii] ** 2 154 | update += xi[i] * data[ii] * loss.dloss(y_pred[i], 155 | y[i]) 156 | 157 | inv_step_size *= loss.mu 158 | inv_step_size += beta 159 | 160 | update += beta * u_old 161 | update /= inv_step_size 162 | 163 | U[t, s, j] -= update 164 | sum_viol += fabs(update) 165 | 166 | # synchronize predictions 167 | for ii in range(n_nz): 168 | i = indices[ii] 169 | y_pred[i] -= data[ii] * xi[i] * update 170 | 171 | if verbose: 172 | print("Iteration", it + 1, "violation sum", sum_viol) 173 | 174 | if sum_viol < tol: 175 | if verbose: 176 | print("Converged at iteration", it + 1) 177 | converged = True 178 | break 179 | 180 | return converged, it 181 | -------------------------------------------------------------------------------- /polylearn/cd_linear_fast.pxd: -------------------------------------------------------------------------------- 1 | # cython: language_level=3 2 | 3 | from lightning.impl.dataset_fast cimport ColumnDataset 4 | from .loss_fast cimport LossFunction 5 | 6 | cpdef double _cd_linear_epoch(double[:] w, ColumnDataset X, 7 | double[:] y, 8 | double[:] y_pred, 9 | double[:] col_norm_sq, 10 | double alpha, 11 | LossFunction loss) 12 | -------------------------------------------------------------------------------- /polylearn/cd_linear_fast.pyx: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # cython: language_level=3 3 | # cython: cdivision=True 4 | # cython: boundscheck=False 5 | # cython: wraparound=False 6 | # 7 | # Author: Vlad Niculae 8 | # License: BSD 9 | 10 | from libc.math cimport fabs 11 | 12 | from lightning.impl.dataset_fast cimport ColumnDataset 13 | 14 | from .loss_fast cimport LossFunction 15 | 16 | 17 | cpdef double _cd_linear_epoch(double[:] w, 18 | ColumnDataset X, 19 | double[:] y, 20 | double[:] y_pred, 21 | double[:] col_norm_sq, 22 | double alpha, 23 | LossFunction loss): 24 | 25 | cdef Py_ssize_t i, j, ii 26 | cdef double sum_viol = 0 27 | cdef Py_ssize_t n_features = w.shape[0] 28 | cdef double update 29 | cdef double inv_step_size 30 | 31 | # Data pointers 32 | cdef double* data 33 | cdef int* indices 34 | cdef int n_nz 35 | 36 | for j in range(n_features): 37 | X.get_column_ptr(j, &indices, &data, &n_nz) 38 | 39 | # compute gradient with respect to w_j 40 | update = alpha * w[j] 41 | for ii in range(n_nz): 42 | i = indices[ii] 43 | update += loss.dloss(y_pred[i], y[i]) * data[ii] 44 | 45 | # compute second derivative upper bound 46 | inv_step_size = loss.mu * col_norm_sq[j] + alpha 47 | update /= inv_step_size 48 | 49 | w[j] -= update 50 | sum_viol += fabs(update) 51 | 52 | # update predictions 53 | for ii in range(n_nz): 54 | i = indices[ii] 55 | y_pred[i] -= update * data[ii] 56 | 57 | return sum_viol 58 | -------------------------------------------------------------------------------- /polylearn/factorization_machine.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # Author: Vlad Niculae 4 | # License: Simplified BSD 5 | 6 | import warnings 7 | from abc import ABCMeta, abstractmethod 8 | 9 | import numpy as np 10 | from sklearn.preprocessing import add_dummy_feature 11 | from sklearn.utils import check_random_state 12 | from sklearn.utils.validation import check_array 13 | from sklearn.utils.extmath import safe_sparse_dot, row_norms 14 | import six 15 | 16 | try: 17 | from sklearn.exceptions import NotFittedError 18 | except ImportError: 19 | class NotFittedError(ValueError, AttributeError): 20 | pass 21 | 22 | from lightning.impl.dataset_fast import get_dataset 23 | 24 | from .base import _BasePoly, _PolyClassifierMixin, _PolyRegressorMixin 25 | from .kernels import _poly_predict 26 | from .cd_direct_fast import _cd_direct_ho 27 | 28 | 29 | class _BaseFactorizationMachine(six.with_metaclass(ABCMeta, _BasePoly)): 30 | 31 | @abstractmethod 32 | def __init__(self, degree=2, loss='squared', n_components=2, alpha=1, 33 | beta=1, tol=1e-6, fit_lower='explicit', fit_linear=True, 34 | warm_start=False, init_lambdas='ones', max_iter=10000, 35 | verbose=False, random_state=None): 36 | self.degree = degree 37 | self.loss = loss 38 | self.n_components = n_components 39 | self.alpha = alpha 40 | self.beta = beta 41 | self.tol = tol 42 | self.fit_lower = fit_lower 43 | self.fit_linear = fit_linear 44 | self.warm_start = warm_start 45 | self.init_lambdas = init_lambdas 46 | self.max_iter = max_iter 47 | self.verbose = verbose 48 | self.random_state = random_state 49 | 50 | def _augment(self, X): 51 | # for factorization machines, we add a dummy column for each order. 52 | 53 | if self.fit_lower == 'augment': 54 | k = 2 if self.fit_linear else 1 55 | for _ in range(self.degree - k): 56 | X = add_dummy_feature(X, value=1) 57 | return X 58 | 59 | def fit(self, X, y): 60 | """Fit factorization machine to training data. 61 | 62 | Parameters 63 | ---------- 64 | X : array-like or sparse, shape = [n_samples, n_features] 65 | Training vectors, where n_samples is the number of samples 66 | and n_features is the number of features. 67 | 68 | y : array-like, shape = [n_samples] 69 | Target values. 70 | 71 | Returns 72 | ------- 73 | self : Estimator 74 | Returns self. 75 | """ 76 | if self.degree > 3: 77 | raise ValueError("FMs with degree >3 not yet supported.") 78 | 79 | X, y = self._check_X_y(X, y) 80 | X = self._augment(X) 81 | n_features = X.shape[1] # augmented 82 | X_col_norms = row_norms(X.T, squared=True) 83 | dataset = get_dataset(X, order="fortran") 84 | rng = check_random_state(self.random_state) 85 | loss_obj = self._get_loss(self.loss) 86 | 87 | if not (self.warm_start and hasattr(self, 'w_')): 88 | self.w_ = np.zeros(n_features, dtype=np.double) 89 | 90 | if self.fit_lower == 'explicit': 91 | n_orders = self.degree - 1 92 | else: 93 | n_orders = 1 94 | 95 | if not (self.warm_start and hasattr(self, 'P_')): 96 | self.P_ = 0.01 * rng.randn(n_orders, self.n_components, n_features) 97 | 98 | if not (self.warm_start and hasattr(self, 'lams_')): 99 | if self.init_lambdas == 'ones': 100 | self.lams_ = np.ones(self.n_components) 101 | elif self.init_lambdas == 'random_signs': 102 | self.lams_ = np.sign(rng.randn(self.n_components)) 103 | else: 104 | raise ValueError("Lambdas must be initialized as ones " 105 | "(init_lambdas='ones') or as random " 106 | "+/- 1 (init_lambdas='random_signs').") 107 | 108 | y_pred = self._get_output(X) 109 | 110 | converged, self.n_iter_ = _cd_direct_ho( 111 | self.P_, self.w_, dataset, X_col_norms, y, y_pred, 112 | self.lams_, self.degree, self.alpha, self.beta, self.fit_linear, 113 | self.fit_lower == 'explicit', loss_obj, self.max_iter, 114 | self.tol, self.verbose) 115 | if not converged: 116 | warnings.warn("Objective did not converge. Increase max_iter.") 117 | 118 | return self 119 | 120 | def _get_output(self, X): 121 | y_pred = _poly_predict(X, self.P_[0, :, :], self.lams_, kernel='anova', 122 | degree=self.degree) 123 | 124 | if self.fit_linear: 125 | y_pred += safe_sparse_dot(X, self.w_) 126 | 127 | if self.fit_lower == 'explicit' and self.degree == 3: 128 | # degree cannot currently be > 3 129 | y_pred += _poly_predict(X, self.P_[1, :, :], self.lams_, 130 | kernel='anova', degree=2) 131 | 132 | return y_pred 133 | 134 | def _predict(self, X): 135 | if not hasattr(self, "P_"): 136 | raise NotFittedError("Estimator not fitted.") 137 | X = check_array(X, accept_sparse='csc', dtype=np.double) 138 | X = self._augment(X) 139 | return self._get_output(X) 140 | 141 | 142 | class FactorizationMachineRegressor(_BaseFactorizationMachine, 143 | _PolyRegressorMixin): 144 | """Factorization machine for regression (with squared loss). 145 | 146 | Parameters 147 | ---------- 148 | 149 | degree : int >= 2, default: 2 150 | Degree of the polynomial. Corresponds to the order of feature 151 | interactions captured by the model. Currently only supports 152 | degrees up to 3. 153 | 154 | n_components : int, default: 2 155 | Number of basis vectors to learn, a.k.a. the dimension of the 156 | low-rank parametrization. 157 | 158 | alpha : float, default: 1 159 | Regularization amount for linear term (if ``fit_linear=True``). 160 | 161 | beta : float, default: 1 162 | Regularization amount for higher-order weights. 163 | 164 | tol : float, default: 1e-6 165 | Tolerance for the stopping condition. 166 | 167 | fit_lower : {'explicit'|'augment'|None}, default: 'explicit' 168 | Whether and how to fit lower-order, non-homogeneous terms. 169 | 170 | - 'explicit': fits a separate P directly for each lower order. 171 | 172 | - 'augment': adds the required number of dummy columns (columns 173 | that are 1 everywhere) in order to capture lower-order terms. 174 | Adds ``degree - 2`` columns if ``fit_linear`` is true, or 175 | ``degree - 1`` columns otherwise, to account for the linear term. 176 | 177 | - None: only learns weights for the degree given. If ``degree == 3``, 178 | for example, the model will only have weights for third-order 179 | feature interactions. 180 | 181 | fit_linear : {True|False}, default: True 182 | Whether to fit an explicit linear term to the model, using 183 | coordinate descent. If False, the model can still capture linear 184 | effects if ``fit_lower == 'augment'``. 185 | 186 | warm_start : boolean, optional, default: False 187 | Whether to use the existing solution, if available. Useful for 188 | computing regularization paths or pre-initializing the model. 189 | 190 | init_lambdas : {'ones'|'random_signs'}, default: 'ones' 191 | How to initialize the predictive weights of each learned basis. The 192 | lambdas are not trained; using alternate signs can theoretically 193 | improve performance if the kernel degree is even. The default value 194 | of 'ones' matches the original formulation of factorization machines 195 | (Rendle, 2010). 196 | 197 | To use custom values for the lambdas, ``warm_start`` may be used. 198 | 199 | max_iter : int, optional, default: 10000 200 | Maximum number of passes over the dataset to perform. 201 | 202 | verbose : boolean, optional, default: False 203 | Whether to print debugging information. 204 | 205 | random_state : int seed, RandomState instance, or None (default) 206 | The seed of the pseudo random number generator to use for 207 | initializing the parameters. 208 | 209 | Attributes 210 | ---------- 211 | 212 | self.P_ : array, shape [n_orders, n_components, n_features] 213 | The learned basis functions. 214 | 215 | ``self.P_[0, :, :]`` is always available, and corresponds to 216 | interactions of order ``self.degree``. 217 | 218 | ``self.P_[i, :, :]`` for i > 0 corresponds to interactions of order 219 | ``self.degree - i``, available only if ``self.fit_lower='explicit'``. 220 | 221 | self.w_ : array, shape [n_features] 222 | The learned linear model, completing the FM. 223 | 224 | Only present if ``self.fit_linear`` is true. 225 | 226 | self.lams_ : array, shape [n_components] 227 | The predictive weights. 228 | 229 | References 230 | ---------- 231 | Polynomial Networks and Factorization Machines: 232 | New Insights and Efficient Training Algorithms. 233 | Mathieu Blondel, Masakazu Ishihata, Akinori Fujino, Naonori Ueda. 234 | In: Proceedings of ICML 2016. 235 | http://mblondel.org/publications/mblondel-icml2016.pdf 236 | 237 | Factorization machines. 238 | Steffen Rendle 239 | In: Proceedings of IEEE 2010. 240 | """ 241 | def __init__(self, degree=2, n_components=2, alpha=1, beta=1, tol=1e-6, 242 | fit_lower='explicit', fit_linear=True, warm_start=False, 243 | init_lambdas='ones', max_iter=10000, verbose=False, 244 | random_state=None): 245 | 246 | super(FactorizationMachineRegressor, self).__init__( 247 | degree, 'squared', n_components, alpha, beta, tol, fit_lower, 248 | fit_linear, warm_start, init_lambdas, max_iter, verbose, 249 | random_state) 250 | 251 | 252 | class FactorizationMachineClassifier(_BaseFactorizationMachine, 253 | _PolyClassifierMixin): 254 | """Factorization machine for classification. 255 | 256 | Parameters 257 | ---------- 258 | 259 | degree : int >= 2, default: 2 260 | Degree of the polynomial. Corresponds to the order of feature 261 | interactions captured by the model. Currently only supports 262 | degrees up to 3. 263 | 264 | loss : {'logistic'|'squared_hinge'|'squared'}, default: 'squared_hinge' 265 | Which loss function to use. 266 | 267 | - logistic: L(y, p) = log(1 + exp(-yp)) 268 | 269 | - squared hinge: L(y, p) = max(1 - yp, 0)² 270 | 271 | - squared: L(y, p) = 0.5 * (y - p)² 272 | 273 | n_components : int, default: 2 274 | Number of basis vectors to learn, a.k.a. the dimension of the 275 | low-rank parametrization. 276 | 277 | alpha : float, default: 1 278 | Regularization amount for linear term (if ``fit_linear=True``). 279 | 280 | beta : float, default: 1 281 | Regularization amount for higher-order weights. 282 | 283 | tol : float, default: 1e-6 284 | Tolerance for the stopping condition. 285 | 286 | fit_lower : {'explicit'|'augment'|None}, default: 'explicit' 287 | Whether and how to fit lower-order, non-homogeneous terms. 288 | 289 | - 'explicit': fits a separate P directly for each lower order. 290 | 291 | - 'augment': adds the required number of dummy columns (columns 292 | that are 1 everywhere) in order to capture lower-order terms. 293 | Adds ``degree - 2`` columns if ``fit_linear`` is true, or 294 | ``degree - 1`` columns otherwise, to account for the linear term. 295 | 296 | - None: only learns weights for the degree given. If ``degree == 3``, 297 | for example, the model will only have weights for third-order 298 | feature interactions. 299 | 300 | fit_linear : {True|False}, default: True 301 | Whether to fit an explicit linear term to the model, using 302 | coordinate descent. If False, the model can still capture linear 303 | effects if ``fit_lower == 'augment'``. 304 | 305 | warm_start : boolean, optional, default: False 306 | Whether to use the existing solution, if available. Useful for 307 | computing regularization paths or pre-initializing the model. 308 | 309 | init_lambdas : {'ones'|'random_signs'}, default: 'ones' 310 | How to initialize the predictive weights of each learned basis. The 311 | lambdas are not trained; using alternate signs can theoretically 312 | improve performance if the kernel degree is even. The default value 313 | of 'ones' matches the original formulation of factorization machines 314 | (Rendle, 2010). 315 | 316 | To use custom values for the lambdas, ``warm_start`` may be used. 317 | 318 | max_iter : int, optional, default: 10000 319 | Maximum number of passes over the dataset to perform. 320 | 321 | verbose : boolean, optional, default: False 322 | Whether to print debugging information. 323 | 324 | random_state : int seed, RandomState instance, or None (default) 325 | The seed of the pseudo random number generator to use for 326 | initializing the parameters. 327 | 328 | Attributes 329 | ---------- 330 | 331 | self.P_ : array, shape [n_orders, n_components, n_features] 332 | The learned basis functions. 333 | 334 | ``self.P_[0, :, :]`` is always available, and corresponds to 335 | interactions of order ``self.degree``. 336 | 337 | ``self.P_[i, :, :]`` for i > 0 corresponds to interactions of order 338 | ``self.degree - i``, available only if ``self.fit_lower='explicit'``. 339 | 340 | self.w_ : array, shape [n_features] 341 | The learned linear model, completing the FM. 342 | 343 | Only present if ``self.fit_linear`` is true. 344 | 345 | self.lams_ : array, shape [n_components] 346 | The predictive weights. 347 | 348 | References 349 | ---------- 350 | Polynomial Networks and Factorization Machines: 351 | New Insights and Efficient Training Algorithms. 352 | Mathieu Blondel, Masakazu Ishihata, Akinori Fujino, Naonori Ueda. 353 | In: Proceedings of ICML 2016. 354 | http://mblondel.org/publications/mblondel-icml2016.pdf 355 | 356 | Factorization machines. 357 | Steffen Rendle 358 | In: Proceedings of IEEE 2010. 359 | """ 360 | 361 | def __init__(self, degree=2, loss='squared_hinge', n_components=2, alpha=1, 362 | beta=1, tol=1e-6, fit_lower='explicit', fit_linear=True, 363 | warm_start=False, init_lambdas='ones', max_iter=10000, 364 | verbose=False, random_state=None): 365 | 366 | super(FactorizationMachineClassifier, self).__init__( 367 | degree, loss, n_components, alpha, beta, tol, fit_lower, 368 | fit_linear, warm_start, init_lambdas, max_iter, verbose, 369 | random_state) 370 | -------------------------------------------------------------------------------- /polylearn/kernels.py: -------------------------------------------------------------------------------- 1 | # Author: Vlad Niculae 2 | # License: Simplified BSD 3 | 4 | from sklearn.metrics.pairwise import polynomial_kernel 5 | from sklearn.utils.extmath import safe_sparse_dot 6 | from scipy.sparse import issparse 7 | 8 | import numpy as np 9 | 10 | 11 | def safe_power(X, degree=2): 12 | """Element-wise power supporting both sparse and dense data. 13 | 14 | Parameters 15 | ---------- 16 | X : ndarray or sparse 17 | The array whose entries to raise to the power. 18 | 19 | degree : int, default: 2 20 | The power to which to raise the elements. 21 | 22 | Returns 23 | ------- 24 | 25 | X_ret : ndarray or sparse 26 | Same shape as X, but (x_ret)_ij = (x)_ij ^ degree 27 | """ 28 | if issparse(X): 29 | if hasattr(X, 'power'): 30 | return X.power(degree) 31 | else: 32 | # old scipy 33 | X = X.copy() 34 | X.data **= degree 35 | return X 36 | else: 37 | return X ** degree 38 | 39 | 40 | def _D(X, P, degree=2): 41 | """The "replacement" part of the homogeneous polynomial kernel. 42 | 43 | D[i, j] = sum_k [(X_ik * P_jk) ** degree] 44 | """ 45 | return safe_sparse_dot(safe_power(X, degree), P.T ** degree) 46 | 47 | 48 | def homogeneous_kernel(X, P, degree=2): 49 | """Convenience alias for homogeneous polynomial kernel between X and P:: 50 | 51 | K_P(x, p) = ^ degree 52 | 53 | Parameters 54 | ---------- 55 | X : ndarray of shape (n_samples_1, n_features) 56 | 57 | Y : ndarray of shape (n_samples_2, n_features) 58 | 59 | degree : int, default 2 60 | 61 | Returns 62 | ------- 63 | Gram matrix : array of shape (n_samples_1, n_samples_2) 64 | """ 65 | return polynomial_kernel(X, P, degree=degree, gamma=1, coef0=0) 66 | 67 | 68 | def anova_kernel(X, P, degree=2): 69 | """ANOVA kernel between X and P:: 70 | 71 | K_A(x, p) = sum_i1>i2>...>id x_i1 p_i1 x_i2 p_i2 ... x_id p_id 72 | 73 | See John Shawe-Taylor and Nello Cristianini, 74 | Kernel Methods for Pattern Analysis section 9.2. 75 | 76 | Parameters 77 | ---------- 78 | X : ndarray of shape (n_samples_1, n_features) 79 | 80 | Y : ndarray of shape (n_samples_2, n_features) 81 | 82 | degree : int, default 2 83 | 84 | Returns 85 | ------- 86 | Gram matrix : array of shape (n_samples_1, n_samples_2) 87 | """ 88 | if degree == 2: 89 | K = homogeneous_kernel(X, P, degree=2) 90 | K -= _D(X, P, degree=2) 91 | K /= 2 92 | elif degree == 3: 93 | K = homogeneous_kernel(X, P, degree=3) 94 | K -= 3 * _D(X, P, degree=2) * _D(X, P, degree=1) 95 | K += 2 * _D(X, P, degree=3) 96 | K /= 6 97 | else: 98 | raise NotImplementedError("ANOVA kernel for degree >= 4 not yet " 99 | "implemented efficiently.") 100 | return K 101 | 102 | 103 | def _poly_predict(X, P, lams, kernel, degree=2): 104 | if kernel == "anova": 105 | K = anova_kernel(X, P, degree) 106 | elif kernel == "poly": 107 | K = homogeneous_kernel(X, P, degree) 108 | else: 109 | raise ValueError(("Unsuppported kernel: {}. Use one " 110 | "of {{'anova'|'poly'}}").format(kernel)) 111 | 112 | return np.dot(K, lams) 113 | -------------------------------------------------------------------------------- /polylearn/loss.py: -------------------------------------------------------------------------------- 1 | # Author: Vlad Niculae 2 | # License: Simplified BSD 3 | 4 | from .loss_fast import Squared, SquaredHinge, Logistic 5 | 6 | 7 | REGRESSION_LOSSES = { 8 | 'squared': Squared() 9 | } 10 | 11 | CLASSIFICATION_LOSSES = { 12 | 'squared': Squared(), 13 | 'squared_hinge': SquaredHinge(), 14 | 'logistic': Logistic() 15 | } 16 | -------------------------------------------------------------------------------- /polylearn/loss_fast.pxd: -------------------------------------------------------------------------------- 1 | # cython: language_level=3 2 | 3 | cdef class LossFunction: 4 | 5 | cdef double mu 6 | cdef double loss(self, double p, double y) 7 | cdef double dloss(self, double p, double y) 8 | -------------------------------------------------------------------------------- /polylearn/loss_fast.pyx: -------------------------------------------------------------------------------- 1 | # cython: language_level=3 2 | # cython: cdivision=True 3 | 4 | from libc.math cimport log, exp 5 | 6 | cdef class LossFunction: 7 | 8 | cdef double loss(self, double p, double y): 9 | raise NotImplementedError() 10 | 11 | cdef double dloss(self, double p, double y): 12 | raise NotImplementedError() 13 | 14 | 15 | cdef class Squared(LossFunction): 16 | """Squared loss: L(p, y) = 0.5 * (y - p)²""" 17 | 18 | def __init__(self): 19 | self.mu = 1 20 | 21 | cdef double loss(self, double p, double y): 22 | return 0.5 * (p - y) ** 2 23 | 24 | cdef double dloss(self, double p, double y): 25 | return p - y 26 | 27 | 28 | cdef class Logistic(LossFunction): 29 | """Logistic loss: L(p, y) = log(1 + exp(-yp))""" 30 | 31 | def __init__(self): 32 | self.mu = 0.25 33 | 34 | cdef double loss(self, double p, double y): 35 | cdef double z = p * y 36 | # log(1 + exp(-z)) 37 | if z > 18: 38 | return exp(-z) 39 | if z < -18: 40 | return -z 41 | return log(1.0 + exp(-z)) 42 | 43 | cdef double dloss(self, double p, double y): 44 | cdef double z = p * y 45 | # cdef double tau = 1 / (1 + exp(-z)) 46 | # return y * (tau - 1) 47 | if z > 18.0: 48 | return -y * exp(-z) 49 | if z < -18.0: 50 | return -y 51 | return -y / (exp(z) + 1.0) 52 | 53 | 54 | cdef class SquaredHinge(LossFunction): 55 | """Squared hinge loss: L(p, y) = max(1 - yp, 0)²""" 56 | 57 | def __init__(self): 58 | self.mu = 2 59 | 60 | cdef double loss(self, double p, double y): 61 | cdef double z = 1 - p * y 62 | if z > 0: 63 | return z * z 64 | return 0.0 65 | 66 | cdef double dloss(self, double p, double y): 67 | cdef double z = 1 - p * y 68 | if z > 0: 69 | return -2 * y * z 70 | return 0.0 71 | -------------------------------------------------------------------------------- /polylearn/polynomial_network.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | """Polynomial networks for regression and classification.""" 4 | 5 | # Author: Vlad Niculae 6 | # License: Simplified BSD 7 | 8 | import warnings 9 | from abc import ABCMeta, abstractmethod 10 | 11 | import numpy as np 12 | from sklearn.preprocessing import add_dummy_feature 13 | from sklearn.utils import check_random_state 14 | from sklearn.utils.validation import check_array 15 | import six 16 | 17 | try: 18 | from sklearn.exceptions import NotFittedError 19 | except ImportError: 20 | class NotFittedError(ValueError, AttributeError): 21 | pass 22 | 23 | from lightning.impl.dataset_fast import get_dataset 24 | 25 | from .base import _BasePoly, _PolyClassifierMixin, _PolyRegressorMixin 26 | from .cd_lifted_fast import _cd_lifted, _fast_lifted_predict 27 | 28 | 29 | def _lifted_predict(U, dataset): 30 | out = np.zeros(dataset.get_n_samples(), dtype=np.double) 31 | _fast_lifted_predict(U, dataset, out) 32 | return out 33 | 34 | 35 | class _BasePolynomialNetwork(six.with_metaclass(ABCMeta, _BasePoly)): 36 | @abstractmethod 37 | def __init__(self, degree=2, loss='squared', n_components=5, beta=1, 38 | tol=1e-6, fit_lower='augment', warm_start=False, 39 | max_iter=10000, verbose=False, random_state=None): 40 | self.degree = degree 41 | self.loss = loss 42 | self.n_components = n_components 43 | self.beta = beta 44 | self.tol = tol 45 | self.fit_lower = fit_lower 46 | self.warm_start = warm_start 47 | self.max_iter = max_iter 48 | self.verbose = verbose 49 | self.random_state = random_state 50 | 51 | def _augment(self, X): 52 | # for polynomial nets, we add a single dummy column 53 | if self.fit_lower == 'augment': 54 | X = add_dummy_feature(X, value=1) 55 | return X 56 | 57 | def fit(self, X, y): 58 | """Fit polynomial network to training data. 59 | 60 | Parameters 61 | ---------- 62 | X : array-like or sparse, shape = [n_samples, n_features] 63 | Training vectors, where n_samples is the number of samples 64 | and n_features is the number of features. 65 | 66 | y : array-like, shape = [n_samples] 67 | Target values. 68 | 69 | Returns 70 | ------- 71 | self : Estimator 72 | Returns self. 73 | """ 74 | if self.fit_lower == 'explicit': 75 | raise NotImplementedError('Explicit fitting of lower orders ' 76 | 'not yet implemented for polynomial' 77 | 'network models.') 78 | 79 | X, y = self._check_X_y(X, y) 80 | X = self._augment(X) 81 | n_features = X.shape[1] # augmented 82 | dataset = get_dataset(X, order="fortran") 83 | rng = check_random_state(self.random_state) 84 | loss_obj = self._get_loss(self.loss) 85 | 86 | if not (self.warm_start and hasattr(self, 'U_')): 87 | self.U_ = 0.01 * rng.randn(self.degree, self.n_components, 88 | n_features) 89 | 90 | y_pred = _lifted_predict(self.U_, dataset) 91 | 92 | converged, self.n_iter_ = _cd_lifted( 93 | self.U_, dataset, y, y_pred, self.beta, loss_obj, self.max_iter, 94 | self.tol, self.verbose) 95 | 96 | if not converged: 97 | warnings.warn("Objective did not converge. Increase max_iter.") 98 | 99 | return self 100 | 101 | def _predict(self, X): 102 | if not hasattr(self, "U_"): 103 | raise NotFittedError("Estimator not fitted.") 104 | 105 | X = check_array(X, accept_sparse='csc', dtype=np.double) 106 | X = self._augment(X) 107 | X = get_dataset(X, order='fortran') 108 | return _lifted_predict(self.U_, X) 109 | 110 | 111 | class PolynomialNetworkRegressor(_BasePolynomialNetwork, _PolyRegressorMixin): 112 | """Polynomial network for regression (with squared loss). 113 | 114 | Parameters 115 | ---------- 116 | 117 | degree : int >= 2, default: 2 118 | Degree of the polynomial. Corresponds to the order of feature 119 | interactions captured by the model. Currently only supports 120 | degrees up to 3. 121 | 122 | n_components : int, default: 2 123 | Dimension of the lifted tensor. 124 | 125 | beta : float, default: 1 126 | Regularization amount for higher-order weights. 127 | 128 | tol : float, default: 1e-6 129 | Tolerance for the stopping condition. 130 | 131 | fit_lower : {'augment'|None}, default: 'augment' 132 | Whether and how to fit lower-order, non-homogeneous terms. 133 | 134 | - 'augment': adds a dummy column (1 everywhere) in order to capture 135 | lower-order terms (including linear terms). 136 | 137 | - None: only learns weights for the degree given. 138 | 139 | warm_start : boolean, optional, default: False 140 | Whether to use the existing solution, if available. Useful for 141 | computing regularization paths or pre-initializing the model. 142 | 143 | max_iter : int, optional, default: 10000 144 | Maximum number of passes over the dataset to perform. 145 | 146 | verbose : boolean, optional, default: False 147 | Whether to print debugging information. 148 | 149 | random_state : int seed, RandomState instance, or None (default) 150 | The seed of the pseudo random number generator to use for 151 | initializing the parameters. 152 | 153 | Attributes 154 | ---------- 155 | 156 | self.U_ : array, shape [n_components, n_features, degree] 157 | The learned weights in the lifted tensor parametrization. 158 | 159 | References 160 | ---------- 161 | Polynomial Networks and Factorization Machines: 162 | New Insights and Efficient Training Algorithms. 163 | Mathieu Blondel, Masakazu Ishihata, Akinori Fujino, Naonori Ueda. 164 | In: Proceedings of ICML 2016. 165 | http://mblondel.org/publications/mblondel-icml2016.pdf 166 | 167 | On the computational efficiency of training neural networks. 168 | Roi Livni, Shai Shalev-Shwartz, Ohad Shamir. 169 | In: Proceedings of NIPS 2014. 170 | """ 171 | 172 | def __init__(self, degree=2, n_components=2, beta=1, tol=1e-6, 173 | fit_lower='augment', warm_start=False, 174 | max_iter=10000, verbose=False, random_state=None): 175 | 176 | super(PolynomialNetworkRegressor, self).__init__( 177 | degree, 'squared', n_components, beta, tol, fit_lower, 178 | warm_start, max_iter, verbose, random_state) 179 | 180 | 181 | class PolynomialNetworkClassifier(_BasePolynomialNetwork, 182 | _PolyClassifierMixin): 183 | """Polynomial network for classification. 184 | 185 | Parameters 186 | ---------- 187 | 188 | degree : int >= 2, default: 2 189 | Degree of the polynomial. Corresponds to the order of feature 190 | interactions captured by the model. Currently only supports 191 | degrees up to 3. 192 | 193 | loss : {'logistic'|'squared_hinge'|'squared'}, default: 'squared_hinge' 194 | Which loss function to use. 195 | 196 | - logistic: L(y, p) = log(1 + exp(-yp)) 197 | 198 | - squared hinge: L(y, p) = max(1 - yp, 0)² 199 | 200 | - squared: L(y, p) = 0.5 * (y - p)² 201 | 202 | n_components : int, default: 2 203 | Dimension of the lifted tensor. 204 | 205 | beta : float, default: 1 206 | Regularization amount for higher-order weights. 207 | 208 | tol : float, default: 1e-6 209 | Tolerance for the stopping condition. 210 | 211 | fit_lower : {'augment'|None}, default: 'augment' 212 | Whether and how to fit lower-order, non-homogeneous terms. 213 | 214 | - 'augment': adds a dummy column (1 everywhere) in order to capture 215 | lower-order terms (including linear terms). 216 | 217 | - None: only learns weights for the degree given. 218 | 219 | warm_start : boolean, optional, default: False 220 | Whether to use the existing solution, if available. Useful for 221 | computing regularization paths or pre-initializing the model. 222 | 223 | max_iter : int, optional, default: 10000 224 | Maximum number of passes over the dataset to perform. 225 | 226 | verbose : boolean, optional, default: False 227 | Whether to print debugging information. 228 | 229 | random_state : int seed, RandomState instance, or None (default) 230 | The seed of the pseudo random number generator to use for 231 | initializing the parameters. 232 | 233 | Attributes 234 | ---------- 235 | 236 | self.U_ : array, shape [n_components, n_features, degree] 237 | The learned weights in the lifted tensor parametrization. 238 | 239 | References 240 | ---------- 241 | Polynomial Networks and Factorization Machines: 242 | New Insights and Efficient Training Algorithms. 243 | Mathieu Blondel, Masakazu Ishihata, Akinori Fujino, Naonori Ueda. 244 | In: Proceedings of ICML 2016. 245 | http://mblondel.org/publications/mblondel-icml2016.pdf 246 | 247 | On the computational efficiency of training neural networks. 248 | Roi Livni, Shai Shalev-Shwartz, Ohad Shamir. 249 | In: Proceedings of NIPS 2014. 250 | """ 251 | 252 | def __init__(self, degree=2, loss='squared_hinge', n_components=2, beta=1, 253 | tol=1e-6, fit_lower='augment', warm_start=False, 254 | max_iter=10000, verbose=False, random_state=None): 255 | 256 | super(PolynomialNetworkClassifier, self).__init__( 257 | degree, loss, n_components, beta, tol, fit_lower, 258 | warm_start, max_iter, verbose, random_state) 259 | -------------------------------------------------------------------------------- /polylearn/setup.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | 3 | import numpy 4 | 5 | 6 | def configuration(parent_package='', top_path=None): 7 | from numpy.distutils.misc_util import Configuration 8 | 9 | config = Configuration('polylearn', parent_package, top_path) 10 | 11 | config.add_extension('loss_fast', sources=['loss_fast.cpp'], 12 | include_dirs=[numpy.get_include()]) 13 | 14 | config.add_extension('cd_direct_fast', sources=['cd_direct_fast.cpp'], 15 | include_dirs=[numpy.get_include()]) 16 | 17 | config.add_extension('cd_linear_fast', sources=['cd_linear_fast.cpp'], 18 | include_dirs=[numpy.get_include()]) 19 | 20 | config.add_extension('cd_lifted_fast', sources=['cd_lifted_fast.cpp'], 21 | include_dirs=[numpy.get_include()]) 22 | 23 | config.add_subpackage('tests') 24 | 25 | return config 26 | 27 | 28 | if __name__ == '__main__': 29 | from numpy.distutils.core import setup 30 | setup(**configuration(top_path='').todict()) 31 | -------------------------------------------------------------------------------- /polylearn/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/polylearn/4dd9d4b8aca029628a4c934829526b8552db2e1b/polylearn/tests/__init__.py -------------------------------------------------------------------------------- /polylearn/tests/test_cd_linear.py: -------------------------------------------------------------------------------- 1 | from nose.tools import assert_less_equal, assert_greater_equal 2 | from numpy.testing import assert_array_almost_equal 3 | 4 | import numpy as np 5 | from sklearn.utils.validation import assert_all_finite 6 | from polylearn.cd_linear_fast import _cd_linear_epoch 7 | from polylearn.loss_fast import Squared, SquaredHinge, Logistic 8 | from lightning.impl.dataset_fast import get_dataset 9 | 10 | rng = np.random.RandomState(0) 11 | X = rng.randn(50, 10) 12 | w_true = rng.randn(10) 13 | 14 | y = np.dot(X, w_true) 15 | X_ds = get_dataset(X, order='fortran') 16 | X_col_norm_sq = (X ** 2).sum(axis=0) 17 | 18 | n_iter = 100 19 | 20 | 21 | def _fit_linear(X, y, alpha, n_iter, loss, callback=None): 22 | n_samples, n_features = X.shape 23 | X_col_norm_sq = (X ** 2).sum(axis=0) 24 | X_ds = get_dataset(X, order='fortran') 25 | w_init = np.zeros(n_features) 26 | y_pred = np.zeros(n_samples) 27 | 28 | for _ in range(n_iter): 29 | viol = _cd_linear_epoch(w_init, X_ds, y, y_pred, X_col_norm_sq, 30 | alpha, loss) 31 | if callback is not None: 32 | callback(w_init, viol) 33 | return w_init 34 | 35 | 36 | class Callback(object): 37 | def __init__(self, X, y, alpha): 38 | self.X = X 39 | self.y = y 40 | self.alpha = alpha 41 | 42 | self.losses_ = [] 43 | 44 | def __call__(self, w, viol): 45 | y_pred = np.dot(self.X, w) 46 | lv = np.mean((y_pred - self.y) ** 2) 47 | lv += 2 * self.alpha * np.sum(w ** 2) 48 | self.losses_.append(lv) 49 | 50 | 51 | def test_cd_linear_fit(): 52 | loss = Squared() 53 | alpha = 1e-6 54 | cb = Callback(X, y, alpha) 55 | w = _fit_linear(X, y, alpha, n_iter, loss, cb) 56 | 57 | assert_array_almost_equal(w_true, w) 58 | assert_less_equal(cb.losses_[1], cb.losses_[0]) 59 | assert_less_equal(cb.losses_[-1], cb.losses_[0]) 60 | 61 | 62 | def check_cd_linear_clf(loss): 63 | alpha = 1e-3 64 | y_bin = np.sign(y) 65 | 66 | w = _fit_linear(X, y_bin, alpha, n_iter, loss) 67 | y_pred = np.dot(X, w) 68 | accuracy = np.mean(np.sign(y_pred) == y_bin) 69 | 70 | assert_greater_equal(accuracy, 0.97, 71 | msg="classification loss {}".format(loss)) 72 | 73 | 74 | def test_cd_linear_clf(): 75 | for loss in (Squared(), SquaredHinge(), Logistic()): 76 | yield check_cd_linear_clf, loss 77 | 78 | 79 | def test_cd_linear_offset(): 80 | loss = Squared() 81 | alpha = 1e-3 82 | w_a = np.zeros_like(w_true) 83 | w_b = np.zeros_like(w_true) 84 | 85 | n_features = X.shape[0] 86 | y_pred_a = np.zeros(n_features) 87 | y_pred_b = np.zeros(n_features) 88 | y_offset = np.arange(n_features).astype(np.double) 89 | 90 | # one epoch with offset 91 | _cd_linear_epoch(w_a, X_ds, y, y_pred_a + y_offset, X_col_norm_sq, alpha, 92 | loss) 93 | 94 | # one epoch with shifted target 95 | _cd_linear_epoch(w_b, X_ds, y - y_offset, y_pred_b, X_col_norm_sq, alpha, 96 | loss) 97 | 98 | assert_array_almost_equal(w_a, w_b) 99 | 100 | 101 | def test_cd_linear_trivial(): 102 | # trivial example that failed due to gh#4 103 | loss = Squared() 104 | alpha = 1e-5 105 | n_features = 100 106 | x = np.zeros((1, n_features)) 107 | x[0, 1] = 1 108 | y = np.ones(1) 109 | cb = Callback(x, y, alpha) 110 | w = _fit_linear(x, y, alpha, n_iter=20, loss=loss, callback=cb) 111 | 112 | assert_all_finite(w) 113 | assert_all_finite(cb.losses_) -------------------------------------------------------------------------------- /polylearn/tests/test_common.py: -------------------------------------------------------------------------------- 1 | from nose import SkipTest 2 | from nose.tools import assert_raises, assert_greater 3 | from nose.tools import assert_equal 4 | import numpy as np 5 | from numpy.testing import assert_array_almost_equal 6 | from scipy.sparse import csc_matrix 7 | 8 | from polylearn import (PolynomialNetworkClassifier, PolynomialNetworkRegressor, 9 | FactorizationMachineClassifier, 10 | FactorizationMachineRegressor) 11 | 12 | 13 | def test_check_estimator(): 14 | # TODO: classifiers that provide predict_proba but are not multiclass fail 15 | # No trivial way to use OneVsRestClassifier even if it actually works. 16 | 17 | try: 18 | from sklearn.utils.estimator_checks import check_estimator 19 | except ImportError: 20 | raise SkipTest('Common scikit-learn tests not available. ' 21 | 'You must be running an older version of scikit-learn.') 22 | yield check_estimator, PolynomialNetworkRegressor 23 | # FM Regressor fails because 5 iter is not enough :( 24 | # yield check_estimator, FactorizationMachineRegressor 25 | 26 | 27 | X = np.array([[-10, -10], [-10, 10], [10, -10], [10, 10]]) 28 | y = np.array(['true', 'false', 'false', 'true']) 29 | 30 | 31 | def check_classify_xor(Clf): 32 | """Tests that the factorization machine can solve XOR""" 33 | clf = Clf(tol=1e-2, fit_lower=None, random_state=0) 34 | 35 | # temporary workaround until fit_linear is implemented 36 | try: 37 | clf.set_params(fit_linear=False) 38 | except ValueError: 39 | pass 40 | 41 | assert_equal(clf.fit(X, y).score(X, y), 1.0) 42 | 43 | 44 | def test_classify_xor(): 45 | yield check_classify_xor, PolynomialNetworkClassifier 46 | yield check_classify_xor, FactorizationMachineClassifier 47 | 48 | 49 | def check_predict_proba(Clf): 50 | clf = Clf(loss='logistic', tol=1e-2, random_state=0).fit(X, y) 51 | y_proba = clf.predict_proba(X) 52 | assert_greater(y_proba[0], y_proba[1]) 53 | assert_greater(y_proba[3], y_proba[2]) 54 | 55 | 56 | def test_predict_proba(): 57 | yield check_predict_proba, FactorizationMachineClassifier 58 | yield check_predict_proba, PolynomialNetworkClassifier 59 | 60 | 61 | def check_predict_proba_raises(Clf): 62 | """Test that predict_proba doesn't work with hinge loss""" 63 | pp = Clf(loss='squared_hinge', random_state=0).predict_proba 64 | assert_raises(ValueError, pp, X) 65 | 66 | 67 | def test_predict_proba_raises(): 68 | yield check_predict_proba_raises, FactorizationMachineClassifier 69 | yield check_predict_proba_raises, PolynomialNetworkClassifier 70 | 71 | 72 | def check_loss_raises(Clf): 73 | """Test error on unsupported loss""" 74 | clf = Clf(loss='hinge', random_state=0) 75 | assert_raises(ValueError, clf.fit, X, y) 76 | 77 | 78 | def test_loss_raises(): 79 | yield check_loss_raises, FactorizationMachineClassifier 80 | yield check_loss_raises, PolynomialNetworkClassifier 81 | 82 | 83 | def check_clf_multiclass_error(Clf): 84 | """Test that classifier raises TypeError on multiclass/multilabel y""" 85 | y_ = np.column_stack([y, y]) 86 | clf = Clf(random_state=0) 87 | assert_raises(TypeError, clf.fit, X, y_) 88 | 89 | 90 | def test_clf_multiclass_error(): 91 | yield check_clf_multiclass_error, FactorizationMachineClassifier 92 | yield check_clf_multiclass_error, PolynomialNetworkClassifier 93 | 94 | 95 | def check_clf_float_error(Clf): 96 | """Test that classifier raises TypeError on multiclass/multilabel y""" 97 | y_ = [0.1, 0.2, 0.3, 0.4] 98 | clf = Clf(random_state=0) 99 | assert_raises(TypeError, clf.fit, X, y_) 100 | 101 | 102 | def test_clf_float_error(): 103 | yield check_clf_float_error, FactorizationMachineClassifier 104 | yield check_clf_float_error, PolynomialNetworkClassifier 105 | 106 | 107 | def check_not_fitted(Est): 108 | est = Est() 109 | assert_raises(ValueError, est.predict, X) 110 | 111 | 112 | def test_not_fitted(): 113 | yield check_not_fitted, FactorizationMachineClassifier 114 | yield check_not_fitted, PolynomialNetworkClassifier 115 | yield check_not_fitted, FactorizationMachineRegressor 116 | yield check_not_fitted, PolynomialNetworkRegressor 117 | 118 | 119 | def test_augment(): 120 | # The following linear separable dataset cannot be modeled with just an FM 121 | X_evil = np.array([[-1, -1], [1, 1]]) 122 | y_evil = np.array([-1, 1]) 123 | clf = FactorizationMachineClassifier(fit_linear=False, fit_lower=None, 124 | random_state=0) 125 | clf.fit(X_evil, y_evil) 126 | assert_equal(0.5, clf.score(X_evil, y_evil)) 127 | 128 | # However, by adding a dummy feature (a column of all ones), the linear 129 | # effect can be captured. 130 | clf = FactorizationMachineClassifier(fit_linear=False, fit_lower='augment', 131 | random_state=0) 132 | clf.fit(X_evil, y_evil) 133 | assert_equal(1.0, clf.score(X_evil, y_evil)) 134 | 135 | 136 | def check_sparse(Clf): 137 | X_sp = csc_matrix(X) 138 | # simple y that works for both clf and regression 139 | y_simple = [0, 1, 0, 1] 140 | clf = Clf(tol=1e-2, random_state=0) 141 | assert_array_almost_equal(clf.fit(X, y_simple).predict(X), 142 | clf.fit(X_sp, y_simple).predict(X_sp)) 143 | 144 | 145 | def test_sparse(): 146 | yield check_sparse, FactorizationMachineClassifier 147 | yield check_sparse, PolynomialNetworkClassifier 148 | yield check_sparse, FactorizationMachineRegressor 149 | yield check_sparse, PolynomialNetworkRegressor 150 | -------------------------------------------------------------------------------- /polylearn/tests/test_factorization_machine.py: -------------------------------------------------------------------------------- 1 | # Author: Vlad Niculae 2 | # License: Simplified BSD 3 | 4 | import warnings 5 | 6 | from nose.tools import assert_less_equal, assert_equal 7 | 8 | import numpy as np 9 | from numpy.testing import assert_array_almost_equal 10 | 11 | from sklearn.metrics import mean_squared_error 12 | from sklearn.utils.testing import assert_warns_message 13 | 14 | from polylearn.kernels import _poly_predict 15 | from polylearn import FactorizationMachineRegressor 16 | from polylearn import FactorizationMachineClassifier 17 | 18 | 19 | def cd_direct_slow(X, y, lams=None, degree=2, n_components=5, beta=1., 20 | n_iter=10, tol=1e-5, verbose=False, random_state=None): 21 | from sklearn.utils import check_random_state 22 | from polylearn.kernels import anova_kernel 23 | 24 | n_samples, n_features = X.shape 25 | 26 | rng = check_random_state(random_state) 27 | P = 0.01 * rng.randn(n_components, n_features) 28 | if lams is None: 29 | lams = np.ones(n_components) 30 | 31 | K = anova_kernel(X, P, degree=degree) 32 | pred = np.dot(lams, K.T) 33 | 34 | mu = 1 # squared loss 35 | converged = False 36 | 37 | for i in range(n_iter): 38 | sum_viol = 0 39 | for s in range(n_components): 40 | ps = P[s] 41 | for j in range(n_features): 42 | 43 | # trivial approach: 44 | # multilinearity allows us to isolate the term with ps_j * x_j 45 | x = X[:, j] 46 | notj_mask = np.arange(n_features) != j 47 | X_notj = X[:, notj_mask] 48 | ps_notj = ps[notj_mask] 49 | 50 | if degree == 2: 51 | grad_y = lams[s] * x * np.dot(X_notj, ps_notj) 52 | elif degree == 3: 53 | grad_y = lams[s] * x * anova_kernel(np.atleast_2d(ps_notj), 54 | X_notj, degree=2) 55 | else: 56 | raise NotImplementedError("Degree > 3 not supported.") 57 | 58 | l1_reg = 2 * beta * np.abs(lams[s]) 59 | inv_step_size = mu * (grad_y ** 2).sum() + l1_reg 60 | 61 | dloss = pred - y # squared loss 62 | step = (dloss * grad_y).sum() + l1_reg * ps[j] 63 | step /= inv_step_size 64 | 65 | P[s, j] -= step 66 | sum_viol += np.abs(step) 67 | 68 | # stupidly recompute all predictions. No rush yet. 69 | K = anova_kernel(X, P, degree=degree) 70 | pred = np.dot(lams, K.T) 71 | 72 | reg_obj = beta * np.sum((P ** 2).sum(axis=1) * np.abs(lams)) 73 | 74 | if verbose: 75 | print("Epoch", i, "violations", sum_viol, "obj", 76 | 0.5 * ((pred - y) ** 2).sum() + reg_obj) 77 | 78 | if sum_viol < tol: 79 | converged = True 80 | break 81 | 82 | if not converged: 83 | warnings.warn("Objective did not converge. Increase max_iter.") 84 | 85 | return P 86 | 87 | 88 | n_components = 5 89 | n_features = 4 90 | n_samples = 20 91 | 92 | rng = np.random.RandomState(1) 93 | 94 | X = rng.randn(n_samples, n_features) 95 | P = rng.randn(n_components, n_features) 96 | 97 | lams = rng.randn(n_components) 98 | 99 | 100 | def test_augment(): 101 | """Test that augmenting the data increases the dimension as expected""" 102 | y = _poly_predict(X, P, lams, kernel="anova", degree=3) 103 | fm = FactorizationMachineRegressor(degree=3, fit_lower='augment', 104 | fit_linear=True, tol=0.1) 105 | fm.fit(X, y) 106 | assert_equal(n_features + 1, fm.P_.shape[2], 107 | msg="Augmenting is wrong with explicit linear term.") 108 | 109 | fm.set_params(fit_linear=False) 110 | fm.fit(X, y) 111 | assert_equal(n_features + 2, fm.P_.shape[2], 112 | msg="Augmenting is wrong with augmented linear term.") 113 | 114 | 115 | def check_fit(degree): 116 | y = _poly_predict(X, P, lams, kernel="anova", degree=degree) 117 | 118 | est = FactorizationMachineRegressor(degree=degree, n_components=5, 119 | fit_linear=False, fit_lower=None, 120 | max_iter=15000, beta=1e-6, tol=1e-3, 121 | random_state=0) 122 | est.fit(X, y) 123 | y_pred = est.predict(X) 124 | err = mean_squared_error(y, y_pred) 125 | 126 | assert_less_equal( 127 | err, 128 | 1e-6, 129 | msg="Error {} too big for degree {}.".format(err, degree)) 130 | 131 | 132 | def test_fit(): 133 | yield check_fit, 2 134 | yield check_fit, 3 135 | 136 | 137 | def check_improve(degree): 138 | y = _poly_predict(X, P, lams, kernel="anova", degree=degree) 139 | 140 | est = FactorizationMachineRegressor(degree=degree, n_components=5, 141 | fit_lower=None, fit_linear=False, 142 | beta=0.0001, max_iter=5, tol=0, 143 | random_state=0) 144 | with warnings.catch_warnings(): 145 | warnings.simplefilter("ignore") 146 | y_pred_5 = est.fit(X, y).predict(X) 147 | est.set_params(max_iter=10) 148 | y_pred_10 = est.fit(X, y).predict(X) 149 | 150 | assert_less_equal(mean_squared_error(y, y_pred_10), 151 | mean_squared_error(y, y_pred_5), 152 | msg="More iterations do not improve fit.") 153 | 154 | 155 | def test_improve(): 156 | yield check_improve, 2 157 | yield check_improve, 3 158 | 159 | 160 | def check_overfit(degree): 161 | noisy_y = _poly_predict(X, P, lams, kernel="anova", degree=degree) 162 | noisy_y += 5. * rng.randn(noisy_y.shape[0]) 163 | X_train, X_test = X[:10], X[10:] 164 | y_train, y_test = noisy_y[:10], noisy_y[10:] 165 | 166 | # weak regularization, should overfit 167 | est = FactorizationMachineRegressor(degree=degree, n_components=5, 168 | fit_linear=False, fit_lower=None, 169 | beta=1e-4, tol=0.01, random_state=0) 170 | y_train_pred_weak = est.fit(X_train, y_train).predict(X_train) 171 | y_test_pred_weak = est.predict(X_test) 172 | 173 | est.set_params(beta=10) # high value of beta -> strong regularization 174 | y_train_pred_strong = est.fit(X_train, y_train).predict(X_train) 175 | y_test_pred_strong = est.predict(X_test) 176 | 177 | assert_less_equal(mean_squared_error(y_train, y_train_pred_weak), 178 | mean_squared_error(y_train, y_train_pred_strong), 179 | msg="Training error does not get worse with regul.") 180 | 181 | assert_less_equal(mean_squared_error(y_test, y_test_pred_strong), 182 | mean_squared_error(y_test, y_test_pred_weak), 183 | msg="Test error does not get better with regul.") 184 | 185 | 186 | def test_overfit(): 187 | yield check_overfit, 2 188 | yield check_overfit, 3 189 | 190 | 191 | def test_convergence_warning(): 192 | y = _poly_predict(X, P, lams, kernel="anova", degree=3) 193 | 194 | est = FactorizationMachineRegressor(degree=3, beta=1e-8, max_iter=1, 195 | random_state=0) 196 | assert_warns_message(UserWarning, "converge", est.fit, X, y) 197 | 198 | 199 | def test_random_starts(): 200 | noisy_y = _poly_predict(X, P, lams, kernel="anova", degree=2) 201 | noisy_y += 5. * rng.randn(noisy_y.shape[0]) 202 | X_train, X_test = X[:10], X[10:] 203 | y_train, y_test = noisy_y[:10], noisy_y[10:] 204 | 205 | scores = [] 206 | # init_lambdas='ones' is important to reduce variance here 207 | reg = FactorizationMachineRegressor(degree=2, n_components=n_components, 208 | beta=5, fit_lower=None, 209 | fit_linear=False, max_iter=2000, 210 | init_lambdas='ones', tol=0.001) 211 | for k in range(10): 212 | reg.set_params(random_state=k) 213 | y_pred = reg.fit(X_train, y_train).predict(X_test) 214 | scores.append(mean_squared_error(y_test, y_pred)) 215 | 216 | assert_less_equal(np.std(scores), 0.001) 217 | 218 | 219 | def check_same_as_slow(degree): 220 | y = _poly_predict(X, P, lams, kernel="anova", degree=degree) 221 | 222 | reg = FactorizationMachineRegressor(degree=degree, n_components=5, 223 | fit_lower=None, fit_linear=False, 224 | beta=1, warm_start=False, tol=1e-3, 225 | max_iter=5, random_state=0) 226 | 227 | with warnings.catch_warnings(): 228 | warnings.simplefilter('ignore') 229 | reg.fit(X, y) 230 | 231 | P_fit_slow = cd_direct_slow(X, y, lams=reg.lams_, degree=degree, 232 | n_components=5, beta=1, n_iter=5, 233 | tol=1e-3, random_state=0) 234 | 235 | assert_array_almost_equal(reg.P_[0, :, :], P_fit_slow, decimal=4) 236 | 237 | 238 | def test_same_as_slow(): 239 | yield check_same_as_slow, 2 240 | yield check_same_as_slow, 3 241 | 242 | 243 | def check_classification_losses(loss, degree): 244 | y = np.sign(_poly_predict(X, P, lams, kernel="anova", degree=degree)) 245 | clf = FactorizationMachineClassifier(degree=degree, loss=loss, beta=1e-3, 246 | fit_lower=None, fit_linear=False, 247 | tol=1e-3, random_state=0) 248 | clf.fit(X, y) 249 | assert_equal(1.0, clf.score(X, y)) 250 | 251 | 252 | def test_classification_losses(): 253 | for loss in ('squared_hinge', 'logistic'): 254 | for degree in (2, 3): 255 | yield check_classification_losses, loss, degree 256 | 257 | 258 | def check_warm_start(degree): 259 | y = _poly_predict(X, P, lams, kernel="anova", degree=degree) 260 | # Result should be the same if: 261 | # (a) running 10 iterations 262 | clf_10 = FactorizationMachineRegressor(degree=degree, n_components=5, 263 | fit_lower=None, fit_linear=False, 264 | max_iter=10, warm_start=False, 265 | random_state=0) 266 | with warnings.catch_warnings(): 267 | warnings.simplefilter("ignore") 268 | clf_10.fit(X, y) 269 | 270 | # (b) running 5 iterations and 5 more 271 | clf_5_5 = FactorizationMachineRegressor(degree=degree, n_components=5, 272 | fit_lower=None, fit_linear=False, 273 | max_iter=5, warm_start=True, 274 | random_state=0) 275 | with warnings.catch_warnings(): 276 | warnings.simplefilter("ignore") 277 | clf_5_5.fit(X, y) 278 | P_fit = clf_5_5.P_.copy() 279 | lams_fit = clf_5_5.lams_.copy() 280 | clf_5_5.fit(X, y) 281 | 282 | # (c) running 5 iterations when starting from previous point. 283 | clf_5 = FactorizationMachineRegressor(degree=degree, n_components=5, 284 | fit_lower=None, fit_linear=False, 285 | max_iter=5, warm_start=True, 286 | random_state=0) 287 | clf_5.P_ = P_fit 288 | clf_5.lams_ = lams_fit 289 | with warnings.catch_warnings(): 290 | warnings.simplefilter("ignore") 291 | clf_5.fit(X, y) 292 | 293 | assert_array_almost_equal(clf_10.P_, clf_5_5.P_) 294 | assert_array_almost_equal(clf_10.P_, clf_5.P_) 295 | 296 | # Prediction results should also be the same if: 297 | # (note: could not get this test to work for the exact P_.) 298 | 299 | noisy_y = _poly_predict(X, P, lams, kernel="anova", degree=2) 300 | noisy_y += rng.randn(noisy_y.shape[0]) 301 | X_train, X_test = X[:10], X[10:] 302 | y_train, y_test = noisy_y[:10], noisy_y[10:] 303 | 304 | beta_low = 0.5 305 | beta = 0.1 306 | beta_hi = 1 307 | ref = FactorizationMachineRegressor(degree=degree, n_components=5, 308 | fit_linear=False, fit_lower=None, 309 | beta=beta, max_iter=20000, 310 | random_state=0) 311 | ref.fit(X_train, y_train) 312 | y_pred_ref = ref.predict(X_test) 313 | 314 | # (a) starting from lower beta, increasing and refitting 315 | from_low = FactorizationMachineRegressor(degree=degree, n_components=5, 316 | fit_lower=None, fit_linear=False, 317 | beta=beta_low, warm_start=True, 318 | random_state=0) 319 | from_low.fit(X_train, y_train) 320 | from_low.set_params(beta=beta) 321 | from_low.fit(X_train, y_train) 322 | y_pred_low = from_low.predict(X_test) 323 | 324 | # (b) starting from higher beta, decreasing and refitting 325 | from_hi = FactorizationMachineRegressor(degree=degree, n_components=5, 326 | fit_lower=None, fit_linear=False, 327 | beta=beta_hi, warm_start=True, 328 | random_state=0) 329 | from_hi.fit(X_train, y_train) 330 | from_hi.set_params(beta=beta) 331 | from_hi.fit(X_train, y_train) 332 | y_pred_hi = from_hi.predict(X_test) 333 | 334 | assert_array_almost_equal(y_pred_low, y_pred_ref, decimal=4) 335 | assert_array_almost_equal(y_pred_hi, y_pred_ref, decimal=4) 336 | 337 | 338 | def test_warm_start(): 339 | yield check_warm_start, 2 340 | yield check_warm_start, 3 341 | -------------------------------------------------------------------------------- /polylearn/tests/test_kernels.py: -------------------------------------------------------------------------------- 1 | # Author: Vlad Niculae 2 | # License: Simplified BSD 3 | 4 | from itertools import product, combinations 5 | from functools import reduce 6 | from nose.tools import assert_true, assert_raises 7 | 8 | import numpy as np 9 | from numpy.testing import assert_array_almost_equal 10 | from scipy import sparse as sp 11 | 12 | from polylearn.kernels import homogeneous_kernel, anova_kernel, safe_power 13 | from polylearn.kernels import _poly_predict 14 | 15 | 16 | def _product(x): 17 | return reduce(lambda a, b: a * b, x, 1) 18 | 19 | 20 | def _power_iter(x, degree): 21 | return product(*([x] * degree)) 22 | 23 | 24 | def dumb_homogeneous(x, p, degree=2): 25 | return sum(_product(x[k] * p[k] for k in ix) 26 | for ix in _power_iter(range(len(x)), degree)) 27 | 28 | 29 | def dumb_anova(x, p, degree=2): 30 | return sum(_product(x[k] * p[k] for k in ix) 31 | for ix in combinations(range(len(x)), degree)) 32 | 33 | 34 | n_samples = 5 35 | n_bases = 4 36 | n_features = 7 37 | rng = np.random.RandomState(0) 38 | X = rng.randn(n_samples, n_features) 39 | P = rng.randn(n_bases, n_features) 40 | lams = np.array([2, 1, -1, 3]) 41 | 42 | 43 | def test_homogeneous(): 44 | for m in range(1, 5): 45 | expected = np.zeros((n_samples, n_bases)) 46 | for i in range(n_samples): 47 | for j in range(n_bases): 48 | expected[i, j] = dumb_homogeneous(X[i], P[j], degree=m) 49 | got = homogeneous_kernel(X, P, degree=m) 50 | assert_array_almost_equal(got, expected, err_msg=( 51 | "Homogeneous kernel incorrect for degree {}".format(m))) 52 | 53 | 54 | def test_anova(): 55 | for m in (2, 3): 56 | expected = np.zeros((n_samples, n_bases)) 57 | for i in range(n_samples): 58 | for j in range(n_bases): 59 | expected[i, j] = dumb_anova(X[i], P[j], degree=m) 60 | got = anova_kernel(X, P, degree=m) 61 | assert_array_almost_equal(got, expected, err_msg=( 62 | "ANOVA kernel incorrect for degree {}".format(m))) 63 | 64 | 65 | def test_anova_ignore_diag_equivalence(): 66 | # predicting using anova kernel 67 | K = 2 * anova_kernel(X, P, degree=2) 68 | y_pred = np.dot(K, lams) 69 | 70 | # explicit 71 | Z = np.dot(P.T, (lams[:, np.newaxis] * P)) 72 | y_manual = np.zeros_like(y_pred) 73 | for i in range(n_samples): 74 | x = X[i].ravel() 75 | xx = np.outer(x, x) - np.diag(x ** 2) 76 | y_manual[i] = np.trace(np.dot(Z.T, xx)) 77 | 78 | assert_array_almost_equal(y_pred, y_manual) 79 | 80 | 81 | def test_safe_power_sparse(): 82 | # TODO maybe move to a util module or something 83 | # scikit-learn has safe_sqr but not general power 84 | 85 | X_quad = X ** 4 86 | # assert X stays sparse 87 | X_sp = sp.csr_matrix(X) 88 | for sp_format in ('csr', 'csc', 'coo'): # not working with lil for now 89 | X_sp = X_sp.asformat(sp_format) 90 | X_sp_quad = safe_power(X_sp, degree=4) 91 | assert_true(sp.issparse(X_sp_quad), 92 | msg="safe_power breaks {} sparsity".format(sp_format)) 93 | assert_array_almost_equal(X_quad, 94 | X_sp_quad.A, 95 | err_msg="safe_power differs for {} and " 96 | "dense".format(sp_format)) 97 | 98 | 99 | def test_anova_sparse(): 100 | X_sp = sp.csr_matrix(X) 101 | for m in (2, 3): 102 | dense = anova_kernel(X, P, degree=m) 103 | sparse = anova_kernel(X_sp, P, degree=m) 104 | assert_array_almost_equal(dense, sparse, err_msg=( 105 | "ANOVA kernel sparse != dense for degree {}".format(m))) 106 | 107 | 108 | def test_predict(): 109 | # predict with homogeneous kernel 110 | y_pred_poly = _poly_predict(X, P, lams, kernel='poly', degree=3) 111 | K = homogeneous_kernel(X, P, degree=3) 112 | y_pred = np.dot(K, lams) 113 | assert_array_almost_equal(y_pred_poly, y_pred, 114 | err_msg="Homogeneous prediction incorrect.") 115 | 116 | # predict with homogeneous kernel 117 | y_pred_poly = _poly_predict(X, P, lams, kernel='anova', degree=3) 118 | K = anova_kernel(X, P, degree=3) 119 | y_pred = np.dot(K, lams) 120 | assert_array_almost_equal(y_pred_poly, y_pred, 121 | err_msg="ANOVA prediction incorrect.") 122 | 123 | 124 | def test_unsupported_degree(): 125 | assert_raises(NotImplementedError, anova_kernel, X, P, degree=4) 126 | 127 | 128 | def test_unsupported_kernel(): 129 | assert_raises(ValueError, _poly_predict, X, P, lams, kernel='rbf') 130 | -------------------------------------------------------------------------------- /polylearn/tests/test_polynomial_network.py: -------------------------------------------------------------------------------- 1 | # Author: Vlad Niculae 2 | # License: Simplified BSD 3 | 4 | import warnings 5 | 6 | from nose.tools import assert_less_equal, assert_equal 7 | 8 | import numpy as np 9 | from numpy.testing import assert_array_almost_equal 10 | from sklearn.metrics import mean_squared_error 11 | from sklearn.utils.testing import assert_warns_message 12 | from sklearn.utils.extmath import fast_dot 13 | 14 | from lightning.impl.dataset_fast import get_dataset 15 | 16 | from polylearn import PolynomialNetworkClassifier, PolynomialNetworkRegressor 17 | from polylearn.polynomial_network import _lifted_predict as _ds_lifted_predict 18 | 19 | 20 | # to shave off some test seconds, since the data is tiny, we can use this. 21 | def _lifted_predict(U, X): 22 | return np.product(fast_dot(U, X.T), axis=0).sum(axis=0) 23 | 24 | max_degree = 5 25 | n_components = 3 26 | n_features = 7 27 | n_samples = 10 28 | 29 | rng = np.random.RandomState(1) 30 | U = rng.randn(max_degree, n_components, n_features) 31 | X = rng.randn(n_samples, n_features) 32 | 33 | 34 | def cd_lifted_slow(X, y, degree=2, n_components=5, beta=1., n_iter=10000, 35 | tol=1e-5, verbose=False, random_state=None): 36 | from sklearn.utils import check_random_state 37 | 38 | n_samples, n_features = X.shape 39 | rng = check_random_state(random_state) 40 | U = 0.01 * rng.randn(degree, n_components, n_features) 41 | 42 | # homogeneous kernel 43 | pred = np.product(np.dot(U, X.T), axis=0).sum(axis=0) 44 | 45 | mu = 1 # squared loss 46 | converged = False 47 | 48 | for i in range(n_iter): 49 | sum_viol = 0 50 | for t in range(degree): 51 | deg_idx = np.zeros(degree, dtype=np.bool) 52 | deg_idx[t] = True 53 | for s in range(n_components): 54 | xi = np.product(np.dot(U[~deg_idx, s, :], X.T), axis=0) 55 | for j in range(n_features): 56 | x = X[:, j] 57 | 58 | inv_step_size = mu * (xi ** 2 * x ** 2).sum() 59 | inv_step_size += beta 60 | 61 | dloss = pred - y # squared loss 62 | step = (xi * x * dloss).sum() 63 | step += beta * U[t, s, j] 64 | step /= inv_step_size 65 | 66 | U[t, s, j] -= step 67 | sum_viol += np.abs(step) 68 | 69 | # dumb synchronize 70 | pred = np.product(np.dot(U, X.T), axis=0).sum(axis=0) 71 | xi = np.product(np.dot(U[~deg_idx, s, :], X.T), axis=0) 72 | nrm = np.sum(U.ravel() ** 2) 73 | if verbose: 74 | print("Epoch", i, "violations", sum_viol, "loss", 75 | 0.5 * (np.sum((y - pred) ** 2) + beta * nrm)) 76 | 77 | if sum_viol < tol: 78 | converged = True 79 | break 80 | 81 | if not converged: 82 | warnings.warn("Objective did not converge. Increase max_iter.") 83 | 84 | return U 85 | 86 | 87 | def test_lifted_predict(): 88 | y_ref = _lifted_predict(U, X) 89 | ds = get_dataset(X, order='fortran') 90 | y = _ds_lifted_predict(U, ds) 91 | assert_array_almost_equal(y_ref, y) 92 | 93 | 94 | def check_fit(degree): 95 | y = _lifted_predict(U[:degree], X) 96 | 97 | est = PolynomialNetworkRegressor(degree=degree, n_components=n_components, 98 | max_iter=50000, beta=0.001, tol=1e-2, 99 | random_state=0) 100 | y_pred = est.fit(X, y).predict(X) 101 | assert_less_equal(mean_squared_error(y, y_pred), 1e-4, 102 | msg="Cannot learn degree {} function.".format(degree)) 103 | 104 | 105 | def test_fit(): 106 | for degree in range(2, max_degree + 1): 107 | yield check_fit, degree 108 | 109 | 110 | def check_improve(degree): 111 | y = _lifted_predict(U[:degree], X) 112 | 113 | common_settings = dict(degree=degree, n_components=n_components, 114 | beta=1e-10, tol=0, random_state=0) 115 | 116 | est_5 = PolynomialNetworkRegressor(max_iter=5, **common_settings) 117 | est_10 = PolynomialNetworkRegressor(max_iter=10, **common_settings) 118 | 119 | with warnings.catch_warnings(): 120 | warnings.simplefilter("ignore") 121 | est_5.fit(X, y) 122 | est_10.fit(X, y) 123 | 124 | y_pred_5 = est_5.predict(X) 125 | y_pred_10 = est_10.predict(X) 126 | 127 | assert_less_equal(mean_squared_error(y, y_pred_10), 128 | mean_squared_error(y, y_pred_5), 129 | msg="More iterations do not improve fit.") 130 | 131 | 132 | def test_improve(): 133 | for degree in range(2, max_degree + 1): 134 | yield check_improve, degree 135 | 136 | 137 | def test_convergence_warning(): 138 | degree = 4 139 | y = _lifted_predict(U[:degree], X) 140 | 141 | est = PolynomialNetworkRegressor(degree=degree, n_components=n_components, 142 | beta=1e-10, max_iter=1, tol=1e-5, 143 | random_state=0) 144 | assert_warns_message(UserWarning, "converge", est.fit, X, y) 145 | 146 | 147 | def test_random_starts(): 148 | # not as strong a test as the direct case! 149 | # using training error here, and a higher threshold. 150 | # We observe the lifted solver reaches rather diff. solutions. 151 | degree = 3 152 | noisy_y = _lifted_predict(U[:degree], X) 153 | noisy_y += 5. * rng.randn(noisy_y.shape[0]) 154 | 155 | common_settings = dict(degree=degree, n_components=n_components, 156 | beta=0.01, tol=0.01) 157 | scores = [] 158 | for k in range(5): 159 | est = PolynomialNetworkRegressor(random_state=k, **common_settings) 160 | y_pred = est.fit(X, noisy_y).predict(X) 161 | scores.append(mean_squared_error(noisy_y, y_pred)) 162 | 163 | assert_less_equal(np.std(scores), 1e-4) 164 | 165 | 166 | def check_same_as_slow(degree): 167 | y = _lifted_predict(U[:degree], X) 168 | reg = PolynomialNetworkRegressor(degree=degree, n_components=n_components, 169 | fit_lower=None, beta=1, max_iter=5, 170 | random_state=0) 171 | 172 | with warnings.catch_warnings(): 173 | warnings.simplefilter("ignore") 174 | reg.fit(X, y) 175 | 176 | U_fit_slow = cd_lifted_slow(X, y, degree=degree, 177 | n_components=n_components, beta=1, 178 | random_state=0, n_iter=5) 179 | 180 | assert_array_almost_equal(reg.U_, U_fit_slow) 181 | 182 | 183 | def test_same_as_slow(): 184 | for degree in range(2, max_degree + 1): 185 | yield check_same_as_slow, degree 186 | 187 | 188 | def check_classification_losses(loss, degree): 189 | y = np.sign(_lifted_predict(U[:degree], X)) 190 | 191 | clf = PolynomialNetworkClassifier(degree=degree, n_components=n_components, 192 | loss=loss, beta=1e-4, tol=1e-2, 193 | random_state=0) 194 | clf.fit(X, y) 195 | assert_equal(1.0, clf.score(X, y)) 196 | 197 | 198 | def test_classification_losses(): 199 | for loss in ('squared_hinge', 'logistic'): 200 | for degree in range(2, max_degree + 1): 201 | yield check_classification_losses, loss, degree 202 | 203 | 204 | def check_warm_start(degree): 205 | y = np.sign(_lifted_predict(U[:degree], X)) 206 | # Result should be the same if: 207 | # (a) running 10 iterations 208 | 209 | common_settings = dict(fit_lower=None, degree=degree, n_components=2, 210 | random_state=0) 211 | clf_10 = PolynomialNetworkRegressor(max_iter=10, warm_start=False, 212 | **common_settings) 213 | with warnings.catch_warnings(): 214 | warnings.simplefilter("ignore") 215 | clf_10.fit(X, y) 216 | 217 | # (b) running 5 iterations and 5 more 218 | clf_5_5 = PolynomialNetworkRegressor(max_iter=5, warm_start=True, 219 | **common_settings) 220 | with warnings.catch_warnings(): 221 | warnings.simplefilter("ignore") 222 | clf_5_5.fit(X, y) 223 | U_fit = clf_5_5.U_.copy() 224 | clf_5_5.fit(X, y) 225 | 226 | # (c) running 5 iterations when starting from previous point. 227 | clf_5 = PolynomialNetworkRegressor(max_iter=5, warm_start=True, 228 | **common_settings) 229 | clf_5.U_ = U_fit 230 | with warnings.catch_warnings(): 231 | warnings.simplefilter("ignore") 232 | clf_5.fit(X, y) 233 | 234 | assert_array_almost_equal(clf_10.U_, clf_5_5.U_) 235 | assert_array_almost_equal(clf_10.U_, clf_5.U_) 236 | 237 | # Prediction results should also be the same if: 238 | # (note: could not get this test to work for the exact P_.) 239 | # This test is very flimsy! 240 | 241 | y = np.sign(_lifted_predict(U[:degree], X)) 242 | 243 | beta_low = 0.51 244 | beta = 0.5 245 | beta_hi = 0.49 246 | 247 | common_settings = dict(degree=degree, n_components=n_components, 248 | tol=1e-3, random_state=0) 249 | ref = PolynomialNetworkRegressor(beta=beta, **common_settings) 250 | ref.fit(X, y) 251 | y_pred_ref = ref.predict(X) 252 | 253 | # # (a) starting from lower beta, increasing and refitting 254 | from_low = PolynomialNetworkRegressor(beta=beta_low, warm_start=True, 255 | **common_settings) 256 | from_low.fit(X, y) 257 | from_low.set_params(beta=beta) 258 | from_low.fit(X, y) 259 | y_pred_low = from_low.predict(X) 260 | 261 | # (b) starting from higher beta, decreasing and refitting 262 | from_hi = PolynomialNetworkRegressor(beta=beta_hi, warm_start=True, 263 | **common_settings) 264 | from_hi.fit(X, y) 265 | from_hi.set_params(beta=beta) 266 | from_hi.fit(X, y) 267 | y_pred_hi = from_hi.predict(X) 268 | 269 | decimal = 3 270 | assert_array_almost_equal(y_pred_low, y_pred_ref, decimal=decimal) 271 | assert_array_almost_equal(y_pred_hi, y_pred_ref, decimal=decimal) 272 | 273 | 274 | def test_warm_start(): 275 | for degree in range(2, max_degree + 1): 276 | yield check_warm_start, degree 277 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.rst -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os.path 3 | import sys 4 | import setuptools 5 | from numpy.distutils.core import setup 6 | 7 | 8 | try: 9 | import numpy 10 | except ImportError: 11 | print('numpy is required during installation') 12 | sys.exit(1) 13 | 14 | 15 | DISTNAME = 'polylearn' 16 | DESCRIPTION = ("Factorization machines and polynomial networks " 17 | "for classification and regression in Python.") 18 | LONG_DESCRIPTION = open('README.rst').read() 19 | MAINTAINER = 'Vlad Niculae' 20 | MAINTAINER_EMAIL = 'vlad@vene.ro' 21 | URL = 'https://contrib.scikit-learn.org/polylearn' 22 | LICENSE = 'Simplified BSD' 23 | DOWNLOAD_URL = 'https://github.com/scikit-learn-contrib/polylearn' 24 | VERSION = '0.1.dev0' 25 | 26 | 27 | def configuration(parent_package='', top_path=None): 28 | from numpy.distutils.misc_util import Configuration 29 | 30 | config = Configuration(None, parent_package, top_path) 31 | 32 | config.add_subpackage('polylearn') 33 | 34 | return config 35 | 36 | 37 | if __name__ == '__main__': 38 | old_path = os.getcwd() 39 | local_path = os.path.dirname(os.path.abspath(sys.argv[0])) 40 | 41 | os.chdir(local_path) 42 | sys.path.insert(0, local_path) 43 | 44 | setup(configuration=configuration, 45 | name=DISTNAME, 46 | maintainer=MAINTAINER, 47 | include_package_data=True, 48 | install_requires=[ 49 | 'six', 50 | 'scikit-learn' 51 | ], 52 | maintainer_email=MAINTAINER_EMAIL, 53 | description=DESCRIPTION, 54 | license=LICENSE, 55 | url=URL, 56 | version=VERSION, 57 | download_url=DOWNLOAD_URL, 58 | long_description=LONG_DESCRIPTION, 59 | zip_safe=False, # the package can run out of an .egg file 60 | classifiers=[ 61 | 'Intended Audience :: Science/Research', 62 | 'Intended Audience :: Developers', 'License :: OSI Approved', 63 | 'Programming Language :: C', 'Programming Language :: Python', 64 | 'Topic :: Software Development', 65 | 'Topic :: Scientific/Engineering', 66 | 'Operating System :: Microsoft :: Windows', 67 | 'Operating System :: POSIX', 'Operating System :: Unix', 68 | 'Operating System :: MacOS' 69 | ] 70 | ) --------------------------------------------------------------------------------