├── .gitattributes ├── .github └── workflows │ ├── gh_pages.yml │ └── main.yml ├── .gitignore ├── MANIFEST.in ├── Makefile ├── README.rst ├── benchmarks ├── bench_amazon7.py ├── bench_dual_cd.py ├── bench_fista.py ├── bench_sag.py ├── bench_shrinking_dual_cd.py ├── bench_shrinking_primal_cd.py ├── bench_svrg.py └── bench_warm_start.py ├── doc ├── Makefile ├── _static │ └── lightning.css ├── conf.py ├── index.rst ├── intro.rst ├── make.bat ├── references.rst └── requirements.txt ├── download.sh ├── examples ├── README.rst ├── document_classification_news20.py ├── plot_1d_total_variation.py ├── plot_l2_solvers.py ├── plot_robust_regression.py ├── plot_sample_weight.py ├── plot_sgd_loss_functions.py ├── plot_sparse_non_linear.py ├── plot_svrg.py └── trace.py ├── lightning ├── __init__.py ├── classification.py ├── datasets.py ├── impl │ ├── __init__.py │ ├── adagrad.py │ ├── adagrad_fast.pyx │ ├── base.py │ ├── dataset_fast.pxd │ ├── dataset_fast.pyx │ ├── datasets │ │ ├── __init__.py │ │ ├── loaders.py │ │ ├── samples_generator.py │ │ ├── tests │ │ │ ├── __init__.py │ │ │ └── test_samples_generator.py │ │ └── utils.py │ ├── dual_cd.py │ ├── dual_cd_fast.pyx │ ├── fista.py │ ├── loss_fast.pyx │ ├── penalty.py │ ├── prank.py │ ├── prank_fast.pyx │ ├── primal_cd.py │ ├── primal_cd_fast.pyx │ ├── primal_newton.py │ ├── prox_fast.pxd │ ├── prox_fast.pyx │ ├── randomkit │ │ ├── __init__.py │ │ ├── random_fast.pxd │ │ ├── random_fast.pyx │ │ ├── randomkit.c │ │ ├── randomkit.h │ │ ├── setup.py │ │ └── tests │ │ │ ├── __init__.py │ │ │ └── test_random.py │ ├── sag.py │ ├── sag_fast.pxd │ ├── sag_fast.pyx │ ├── sdca.py │ ├── sdca_fast.pyx │ ├── setup.py │ ├── sgd.py │ ├── sgd_fast.pxd │ ├── sgd_fast.pyx │ ├── svrg.py │ ├── svrg_fast.pyx │ └── tests │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── test_adagrad.py │ │ ├── test_dataset.py │ │ ├── test_dual_cd.py │ │ ├── test_fista.py │ │ ├── test_penalty.py │ │ ├── test_prank.py │ │ ├── test_primal_cd.py │ │ ├── test_primal_newton.py │ │ ├── test_prox.py │ │ ├── test_sag.py │ │ ├── test_sdca.py │ │ ├── test_sgd.py │ │ ├── test_svrg.py │ │ └── utils.py ├── ranking.py ├── regression.py └── setup.py ├── requirements.txt ├── requirements_build.txt ├── requirements_test.txt └── setup.py /.gitattributes: -------------------------------------------------------------------------------- 1 | /lightning/impl/adagrad_fast.cpp -diff 2 | /lightning/impl/dataset_fast.cpp -diff 3 | /lightning/impl/dual_cd_fast.cpp -diff 4 | /lightning/impl/loss_fast.cpp -diff 5 | /lightning/impl/prank_fast.cpp -diff 6 | /lightning/impl/primal_cd_fast.cpp -diff 7 | /lightning/impl/randomkit/random_fast.cpp -diff 8 | /lightning/impl/sag_fast.cpp -diff 9 | /lightning/impl/sdca_fast.cpp -diff 10 | /lightning/impl/sgd_fast.cpp -diff 11 | /lightning/impl/svrg_fast.cpp -diff 12 | -------------------------------------------------------------------------------- /.github/workflows/gh_pages.yml: -------------------------------------------------------------------------------- 1 | name: Deploy Docs to GitHub Pages 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | 8 | jobs: 9 | build-and-deploy-docs: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout repository 13 | uses: actions/checkout@v2 14 | with: 15 | fetch-depth: 3 16 | - name: Install Python 17 | uses: actions/setup-python@v2 18 | with: 19 | python-version: '3.9' 20 | - name: Install package 21 | run: | 22 | pip install -r requirements_build.txt 23 | python setup.py install 24 | - name: Build docs 25 | run: | 26 | cd doc 27 | pip install -r requirements.txt 28 | make html 29 | - name: Deploy docs 30 | uses: JamesIves/github-pages-deploy-action@4.1.8 31 | with: 32 | branch: gh-pages 33 | folder: ${{ github.workspace }}/doc/_build/html 34 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Package tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | tags: 8 | - '[0-9]+.[0-9]+.[0-9]+*' 9 | pull_request: 10 | branches: 11 | - master 12 | 13 | jobs: 14 | test: 15 | name: ${{ matrix.os }}, Python ${{ matrix.python_version }} (${{ matrix.python_arch }}) 16 | runs-on: ${{ matrix.os }} 17 | timeout-minutes: 30 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | os: 22 | - ubuntu-latest 23 | - macos-latest 24 | - windows-latest 25 | python_version: 26 | - '3.7' 27 | - '3.8' 28 | - '3.9' 29 | - '3.10' 30 | python_arch: 31 | - x64 32 | include: 33 | - os: windows-latest 34 | python_version: '3.7' 35 | python_arch: x86 36 | - os: windows-latest 37 | python_version: '3.8' 38 | python_arch: x86 39 | - os: windows-latest 40 | python_version: '3.9' 41 | python_arch: x86 42 | container: ${{ matrix.os == 'ubuntu-latest' && 'quay.io/pypa/manylinux2014_x86_64' || '' }} 43 | steps: 44 | - name: Checkout repository 45 | uses: actions/checkout@v2 46 | with: 47 | fetch-depth: 3 48 | - name: Install Python 49 | if: matrix.os != 'ubuntu-latest' 50 | uses: actions/setup-python@v2 51 | with: 52 | python-version: ${{ matrix.python_version }} 53 | architecture: ${{ matrix.python_arch }} 54 | - name: Set environment variables 55 | shell: bash 56 | run: | 57 | PY_TAG=${{ matrix.python_version }} 58 | PY_TAG="${PY_TAG//.}" 59 | if [[ $PY_TAG -lt 38 ]]; then 60 | PY_TAG_FULL="cp${PY_TAG}-cp${PY_TAG}m" 61 | else 62 | PY_TAG_FULL="cp${PY_TAG}-cp${PY_TAG}" 63 | fi 64 | if [[ ${{ matrix.os }} == "ubuntu-latest" ]]; then 65 | PLAT_NAME=manylinux2014_x86_64 66 | elif [[ ${{ matrix.os }} == "windows-latest" ]]; then 67 | if [[ ${{ matrix.python_arch }} == "x64" ]]; then 68 | PLAT_NAME=win_amd64 69 | else 70 | PLAT_NAME=win32 71 | fi 72 | else 73 | PLAT_NAME=macosx_10_9_x86_64 74 | fi 75 | PACKAGE_VERSION=$(python -c "import lightning;print(lightning.__version__)") 76 | echo "PY_TAG=$PY_TAG" >> $GITHUB_ENV 77 | echo "PY_TAG_FULL=$PY_TAG_FULL" >> $GITHUB_ENV 78 | echo "PLAT_NAME=$PLAT_NAME" >> $GITHUB_ENV 79 | echo "PACKAGE_NAME=sklearn_contrib_lightning" >> $GITHUB_ENV 80 | echo "PACKAGE_VERSION=$PACKAGE_VERSION" >> $GITHUB_ENV 81 | - name: Modify PATH variable 82 | if: matrix.os == 'ubuntu-latest' 83 | run: echo "/opt/python/${{ env.PY_TAG_FULL }}/bin" >> $GITHUB_PATH 84 | - name: Check Python location 85 | if: matrix.os == 'ubuntu-latest' 86 | shell: bash 87 | run: | 88 | if [[ $(which python) != "/opt/python/${{ env.PY_TAG_FULL }}/bin/python" ]]; then 89 | exit -1 90 | fi 91 | - name: Check Python version 92 | shell: python 93 | run: | 94 | import struct 95 | import sys 96 | 97 | assert sys.version_info[:2] == tuple(map(int, "${{ matrix.python_version }}".split("."))) 98 | assert f"x{struct.calcsize('P') * 8}".replace("32", "86") == "${{ matrix.python_arch }}" 99 | - name: Install package 100 | run: | 101 | python -m pip install --upgrade pip 102 | pip install -r requirements_build.txt -r requirements_test.txt 103 | python setup.py install 104 | - name: Run tests 105 | run: pytest -v --pyargs lightning 106 | - name: Create archive with sources 107 | if: matrix.os == 'ubuntu-latest' && matrix.python_version == '3.10' && startsWith(github.ref, 'refs/tags/') 108 | run: python setup.py sdist 109 | - name: Create wheels 110 | if: startsWith(github.ref, 'refs/tags/') 111 | run: | 112 | pip install wheel 113 | python setup.py bdist_wheel --python-tag="cp${{ env.PY_TAG }}" --plat-name=${{ env.PLAT_NAME }} 114 | - name: Run auditwheel 115 | if: matrix.os == 'ubuntu-latest' && startsWith(github.ref, 'refs/tags/') 116 | run: | 117 | pip install auditwheel 118 | auditwheel repair --plat ${{ env.PLAT_NAME }} dist/${{ env.PACKAGE_NAME }}*.whl 119 | mv -f wheelhouse/${{ env.PACKAGE_NAME }}*.whl dist/${{ env.PACKAGE_NAME }}-${{ env.PACKAGE_VERSION }}-${{ env.PY_TAG_FULL }}-${{ env.PLAT_NAME }}.whl 120 | - name: Create GitHub Release 121 | if: startsWith(github.ref, 'refs/tags/') 122 | uses: softprops/action-gh-release@v1 123 | env: 124 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 125 | with: 126 | tag_name: ${{ env.PACKAGE_VERSION }} 127 | name: ${{ env.PACKAGE_VERSION }} 128 | draft: false 129 | prerelease: false 130 | files: | 131 | dist/${{ env.PACKAGE_NAME }}*.whl 132 | dist/*.tar.gz 133 | - name: Create PyPI Release 134 | if: startsWith(github.ref, 'refs/tags/') 135 | shell: bash 136 | env: 137 | TWINE_USERNAME: __token__ 138 | TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} 139 | run: | 140 | pip install twine 141 | rm -f dist/*.egg 142 | twine upload --skip-existing dist/* 143 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.cpp 3 | *.so 4 | *~ 5 | .#* 6 | *.swp 7 | .DS_Store 8 | build 9 | *.egg-info/ 10 | lightning/**/*.html 11 | 12 | dist/ 13 | doc/_build/ 14 | doc/generated/ 15 | doc/auto_examples/ 16 | doc/modules/generated/ 17 | doc/datasets/generated/ 18 | pip-log.txt 19 | lightning.egg-info/ 20 | .coverage 21 | coverage 22 | tags 23 | coverages.zip 24 | samples.zip 25 | doc/coverages.zip 26 | doc/samples.zip 27 | coverages 28 | samples 29 | doc/coverages 30 | doc/samples 31 | 32 | 33 | *.nt.bz2 34 | *.tar.gz 35 | *.tgz 36 | joblib 37 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include MANIFEST.in 2 | include README.rst 3 | include Makefile 4 | include requirements*.txt 5 | recursive-include lightning *.c *.h *.pyx *.pxd 6 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PYTHON ?= python 2 | PYTEST ?= pytest 3 | DATADIR=$(HOME)/lightning_data 4 | 5 | # Compilation... 6 | 7 | inplace: 8 | $(PYTHON) setup.py build_ext -i 9 | 10 | all: inplace 11 | 12 | clean: 13 | rm -f lightning/impl/*.cpp lightning/impl/*.html 14 | rm -f `find lightning -name "*.pyc"` 15 | rm -f `find lightning -name "*.so"` 16 | 17 | # Tests... 18 | # 19 | test-code: inplace 20 | $(PYTEST) -s -v lightning 21 | 22 | test: test-code 23 | 24 | # Datasets... 25 | # 26 | datadir: 27 | mkdir -p $(DATADIR) 28 | 29 | # regression 30 | download-abalone: datadir 31 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/abalone_scale 32 | mv abalone_scale $(DATADIR) 33 | 34 | download-cadata: datadir 35 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/cadata 36 | mv cadata $(DATADIR) 37 | 38 | download-cpusmall: datadir 39 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/cpusmall_scale 40 | mv cpusmall_scale $(DATADIR) 41 | 42 | download-space_ga: datadir 43 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/space_ga_scale 44 | mv space_ga_scale $(DATADIR) 45 | 46 | download-YearPredictionMSD: datadir 47 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/YearPredictionMSD.bz2 48 | bunzip2 YearPredictionMSD.bz2 49 | mv YearPredictionMSD $(DATADIR) 50 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/YearPredictionMSD.t.bz2 51 | bunzip2 YearPredictionMSD.t.bz2 52 | mv YearPredictionMSD.t $(DATADIR) 53 | 54 | # binary classification 55 | download-adult: datadir 56 | ./download.sh http://leon.bottou.org/_media/papers/lasvm-adult.tar.bz2 57 | tar xvfj lasvm-adult.tar.bz2 58 | mv adult $(DATADIR) 59 | rm lasvm-adult.tar.bz2 60 | 61 | download-banana: datadir 62 | ./download.sh http://leon.bottou.org/_media/papers/lasvm-banana.tar.bz2 63 | tar xvfj lasvm-banana.tar.bz2 64 | mv banana $(DATADIR) 65 | rm lasvm-banana.tar.bz2 66 | 67 | download-covtype: datadir 68 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/covtype.libsvm.binary.scale.bz2 69 | bunzip2 covtype.libsvm.binary.scale.bz2 70 | mv covtype.libsvm.binary.scale $(DATADIR) 71 | 72 | download-ijcnn: datadir 73 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/ijcnn1.tr.bz2 74 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/ijcnn1.t.bz2 75 | bunzip2 ijcnn1.tr.bz2 76 | bunzip2 ijcnn1.t.bz2 77 | mv ijcnn1* $(DATADIR) 78 | 79 | download-real-sim: datadir 80 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/real-sim.bz2 81 | bunzip2 real-sim.bz2 82 | mv real-sim $(DATADIR)/realsim 83 | 84 | download-reuters: datadir 85 | ./download.sh http://leon.bottou.org/_media/papers/lasvm-reuters.tar.bz2 86 | tar xvfj lasvm-reuters.tar.bz2 87 | mv reuters $(DATADIR) 88 | rm lasvm-reuters.tar.bz2 89 | 90 | download-url: datadir 91 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/url_combined.bz2 92 | bunzip2 url_combined.bz2 93 | mv url_combined $(DATADIR) 94 | 95 | download-waveform: datadir 96 | ./download.sh http://leon.bottou.org/_media/papers/lasvm-waveform.tar.bz2 97 | tar xvfj lasvm-waveform.tar.bz2 98 | mv waveform $(DATADIR) 99 | rm lasvm-waveform.tar.bz2 100 | 101 | download-webspam: datadir 102 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/webspam_wc_normalized_unigram.svm.bz2 103 | bunzip2 webspam_wc_normalized_unigram.svm.bz2 104 | mv webspam_wc_normalized_unigram.svm $(DATADIR)/webspam 105 | 106 | # multi-class 107 | 108 | download-dna: datadir 109 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/dna.scale.tr 110 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/dna.scale.t 111 | mv dna* $(DATADIR) 112 | 113 | download-letter: datadir 114 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/letter.scale.tr 115 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/letter.scale.t 116 | mv letter* $(DATADIR) 117 | 118 | download-mnist: datadir 119 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/mnist.scale.bz2 120 | bunzip2 mnist.scale.bz2 121 | mv mnist.scale $(DATADIR) 122 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/mnist.scale.t.bz2 123 | bunzip2 mnist.scale.t.bz2 124 | mv mnist.scale.t $(DATADIR) 125 | 126 | download-news20: datadir 127 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/news20.scale.bz2 128 | bunzip2 news20.scale.bz2 129 | mv news20.scale $(DATADIR) 130 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/news20.t.scale.bz2 131 | bunzip2 news20.t.scale.bz2 132 | mv news20.t.scale $(DATADIR) 133 | 134 | download-pendigits: datadir 135 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/pendigits 136 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/pendigits.t 137 | mv pendigits* $(DATADIR) 138 | 139 | download-protein: datadir 140 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/protein.tr.bz2 141 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/protein.t.bz2 142 | bunzip2 protein.tr.bz2 143 | bunzip2 protein.t.bz2 144 | mv protein* $(DATADIR) 145 | 146 | download-rcv1: datadir 147 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/rcv1_train.multiclass.bz2 148 | bunzip2 rcv1_train.multiclass.bz2 149 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/rcv1_test.multiclass.bz2 150 | bunzip2 rcv1_test.multiclass.bz2 151 | mv rcv1* $(DATADIR) 152 | 153 | download-satimage: datadir 154 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/satimage.scale.tr 155 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/satimage.scale.t 156 | mv satimage* $(DATADIR) 157 | 158 | download-sector: datadir 159 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/sector/sector.scale.bz2 160 | bunzip2 sector.scale.bz2 161 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/sector/sector.t.scale.bz2 162 | bunzip2 sector.t.scale.bz2 163 | mv sector* $(DATADIR) 164 | 165 | download-usps: datadir 166 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/usps.bz2 167 | bunzip2 usps.bz2 168 | mv usps $(DATADIR) 169 | ./download.sh http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/usps.t.bz2 170 | bunzip2 usps.t.bz2 171 | mv usps.t $(DATADIR) 172 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. -*- mode: rst -*- 2 | 3 | .. image:: https://github.com/scikit-learn-contrib/lightning/actions/workflows/main.yml/badge.svg?branch=master 4 | :target: https://github.com/scikit-learn-contrib/lightning/actions/workflows/main.yml 5 | 6 | .. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.200504.svg 7 | :target: https://doi.org/10.5281/zenodo.200504 8 | 9 | lightning 10 | ========== 11 | 12 | lightning is a library for large-scale linear classification, regression and 13 | ranking in Python. 14 | 15 | Highlights: 16 | 17 | - follows the `scikit-learn `_ API conventions 18 | - supports natively both dense and sparse data representations 19 | - computationally demanding parts implemented in `Cython `_ 20 | 21 | Solvers supported: 22 | 23 | - primal coordinate descent 24 | - dual coordinate descent (SDCA, Prox-SDCA) 25 | - SGD, AdaGrad, SAG, SAGA, SVRG 26 | - FISTA 27 | 28 | Example 29 | ------- 30 | 31 | Example that shows how to learn a multiclass classifier with group lasso 32 | penalty on the News20 dataset (c.f., `Blondel et al. 2013 33 | `_): 34 | 35 | .. code-block:: python 36 | 37 | from sklearn.datasets import fetch_20newsgroups_vectorized 38 | from lightning.classification import CDClassifier 39 | 40 | # Load News20 dataset from scikit-learn. 41 | bunch = fetch_20newsgroups_vectorized(subset="all") 42 | X = bunch.data 43 | y = bunch.target 44 | 45 | # Set classifier options. 46 | clf = CDClassifier(penalty="l1/l2", 47 | loss="squared_hinge", 48 | multiclass=True, 49 | max_iter=20, 50 | alpha=1e-4, 51 | C=1.0 / X.shape[0], 52 | tol=1e-3) 53 | 54 | # Train the model. 55 | clf.fit(X, y) 56 | 57 | # Accuracy 58 | print(clf.score(X, y)) 59 | 60 | # Percentage of selected features 61 | print(clf.n_nonzero(percentage=True)) 62 | 63 | Dependencies 64 | ------------ 65 | 66 | lightning requires Python >= 3.7, setuptools, Joblib, Numpy >= 1.12, SciPy >= 0.19 and 67 | scikit-learn >= 0.19. Building from source also requires Cython and a working C/C++ compiler. To run the tests you will also need pytest. 68 | 69 | Installation 70 | ------------ 71 | 72 | Precompiled binaries for the stable version of lightning are available for the main platforms and can be installed using pip: 73 | 74 | .. code-block:: sh 75 | 76 | pip install sklearn-contrib-lightning 77 | 78 | or conda: 79 | 80 | .. code-block:: sh 81 | 82 | conda install -c conda-forge sklearn-contrib-lightning 83 | 84 | The development version of lightning can be installed from its git repository. In this case it is assumed that you have the git version control system, a working C++ compiler, Cython and the numpy development libraries. In order to install the development version, type: 85 | 86 | .. code-block:: sh 87 | 88 | git clone https://github.com/scikit-learn-contrib/lightning.git 89 | cd lightning 90 | python setup.py install 91 | 92 | Documentation 93 | ------------- 94 | 95 | http://contrib.scikit-learn.org/lightning/ 96 | 97 | On GitHub 98 | --------- 99 | 100 | https://github.com/scikit-learn-contrib/lightning 101 | 102 | Citing 103 | ------ 104 | 105 | If you use this software, please cite it. Here is a BibTex snippet that you can use: 106 | 107 | .. code-block:: 108 | 109 | @misc{lightning_2016, 110 | author = {Blondel, Mathieu and 111 | Pedregosa, Fabian}, 112 | title = {{Lightning: large-scale linear classification, 113 | regression and ranking in Python}}, 114 | year = 2016, 115 | doi = {10.5281/zenodo.200504}, 116 | url = {https://doi.org/10.5281/zenodo.200504} 117 | } 118 | 119 | Other citing formats are available in `its Zenodo entry `_. 120 | 121 | Authors 122 | ------- 123 | 124 | - Mathieu Blondel 125 | - Manoj Kumar 126 | - Arnaud Rachez 127 | - Fabian Pedregosa 128 | - Nikita Titov 129 | -------------------------------------------------------------------------------- /benchmarks/bench_amazon7.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import joblib 4 | 5 | from lightning.classification import SDCAClassifier 6 | 7 | if len(sys.argv) == 1: 8 | print """ 9 | Please enter the path to amazon7_uncompressed_pkl/amazon7.pkl 10 | 11 | Download data from 12 | http://www.mblondel.org/data/amazon7_uncompressed_pkl.tar.bz2 13 | """ 14 | exit() 15 | 16 | data = joblib.load(sys.argv[1], mmap_mode="r") 17 | X = data["X"] 18 | y = data["y"].copy() # copy is needed to modify y. 19 | 20 | y[y >= 1] = 1 # Create a binary classification problem. 21 | 22 | clf = SDCAClassifier(tol=1e-5, max_iter=10, verbose=1) 23 | clf.fit(X, y) 24 | print clf.score(X, y) 25 | -------------------------------------------------------------------------------- /benchmarks/bench_dual_cd.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import numpy as np 4 | 5 | from sklearn.datasets import fetch_20newsgroups_vectorized 6 | from lightning.classification import LinearSVC 7 | from lightning.classification import SDCAClassifier 8 | 9 | bunch = fetch_20newsgroups_vectorized(subset="all") 10 | X = bunch.data 11 | y = bunch.target 12 | y[y >= 1] = 1 13 | 14 | alpha = 1e-4 15 | 16 | clf1 = LinearSVC(loss="squared_hinge", C=1.0 / (alpha * X.shape[0]), tol=1e-3, 17 | max_iter=20, random_state=0) 18 | clf2 = SDCAClassifier(loss="squared_hinge", alpha=alpha, tol=1e-6, 19 | max_iter=20, random_state=0) 20 | 21 | 22 | for clf in (clf1, clf2): 23 | print clf.__class__.__name__ 24 | start = time.time() 25 | clf.fit(X, y) 26 | 27 | print "Training time", time.time() - start 28 | print "Accuracy", np.mean(clf.predict(X) == y) 29 | print 30 | -------------------------------------------------------------------------------- /benchmarks/bench_fista.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import numpy as np 4 | 5 | from sklearn.datasets import fetch_20newsgroups_vectorized 6 | from lightning.classification import FistaClassifier 7 | 8 | bunch = fetch_20newsgroups_vectorized(subset="all") 9 | X = bunch.data 10 | y = bunch.target 11 | y[y >= 1] = 1 12 | 13 | clf = FistaClassifier(C=1./X.shape[0], alpha=1e-5, max_iter=200) 14 | start = time.time() 15 | clf.fit(X, y) 16 | 17 | print "Training time", time.time() - start 18 | print "Accuracy", np.mean(clf.predict(X) == y) 19 | print "% non-zero", clf.n_nonzero(percentage=True) 20 | -------------------------------------------------------------------------------- /benchmarks/bench_sag.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import numpy as np 4 | 5 | from sklearn.datasets import fetch_20newsgroups_vectorized 6 | from lightning.classification import SAGClassifier 7 | 8 | bunch = fetch_20newsgroups_vectorized(subset="all") 9 | X = bunch.data 10 | y = bunch.target 11 | y[y >= 1] = 1 12 | 13 | clf = SAGClassifier(eta=1e-4, alpha=1e-5, tol=1e-3, max_iter=20, verbose=1, 14 | random_state=0) 15 | start = time.time() 16 | clf.fit(X, y) 17 | 18 | print "Training time", time.time() - start 19 | print "Accuracy", np.mean(clf.predict(X) == y) 20 | print "% non-zero", clf.n_nonzero(percentage=True) 21 | -------------------------------------------------------------------------------- /benchmarks/bench_shrinking_dual_cd.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import numpy as np 4 | 5 | from sklearn.datasets import fetch_20newsgroups_vectorized 6 | from lightning.classification import LinearSVC 7 | 8 | # Load News20 dataset from scikit-learn. 9 | bunch = fetch_20newsgroups_vectorized(subset="all") 10 | X = bunch.data 11 | y = bunch.target 12 | y[y >= 1] = 1 13 | 14 | for shrinking in (True, False): 15 | clf = LinearSVC(C=1.0, loss="hinge", tol=1e-3, 16 | max_iter=1000, shrinking=shrinking, random_state=0) 17 | start = time.time() 18 | clf.fit(X, y) 19 | print "Training time", time.time() - start 20 | print "Accuracy", clf.score(X, y) 21 | -------------------------------------------------------------------------------- /benchmarks/bench_shrinking_primal_cd.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import numpy as np 4 | 5 | from sklearn.datasets import fetch_20newsgroups_vectorized 6 | from lightning.classification import CDClassifier 7 | 8 | # Load News20 dataset from scikit-learn. 9 | bunch = fetch_20newsgroups_vectorized(subset="all") 10 | X = bunch.data 11 | y = bunch.target 12 | y[y >= 1] = 1 13 | 14 | for shrinking in (True, False): 15 | clf = CDClassifier(C=1.0, loss="squared_hinge", penalty="l1", tol=1e-3, 16 | max_iter=1000, shrinking=shrinking, random_state=0) 17 | start = time.time() 18 | clf.fit(X, y) 19 | print "Training time", time.time() - start 20 | print "Accuracy", clf.score(X, y) 21 | -------------------------------------------------------------------------------- /benchmarks/bench_svrg.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import numpy as np 4 | 5 | from sklearn.datasets import fetch_20newsgroups_vectorized 6 | from lightning.classification import SVRGClassifier 7 | 8 | bunch = fetch_20newsgroups_vectorized(subset="all") 9 | X = bunch.data 10 | y = bunch.target 11 | y[y >= 1] = 1 12 | 13 | clf = SVRGClassifier(eta=0.1, alpha=1e-5, tol=1e-3, max_iter=20, verbose=1, 14 | random_state=0) 15 | start = time.time() 16 | clf.fit(X, y) 17 | 18 | print "Training time", time.time() - start 19 | print "Accuracy", np.mean(clf.predict(X) == y) 20 | print "% non-zero", clf.n_nonzero(percentage=True) 21 | -------------------------------------------------------------------------------- /benchmarks/bench_warm_start.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import numpy as np 4 | 5 | from sklearn.datasets import fetch_20newsgroups_vectorized 6 | from lightning.classification import CDClassifier 7 | 8 | bunch = fetch_20newsgroups_vectorized(subset="all") 9 | X = bunch.data 10 | y = bunch.target 11 | y[y >= 1] = 1 12 | 13 | Cs = np.logspace(-3, 3, 20) 14 | 15 | for warm_start in (True, False): 16 | clf = CDClassifier(loss="squared_hinge", tol=1e-3, max_iter=100, 17 | warm_start=warm_start) 18 | 19 | scores = [] 20 | start = time.time() 21 | for C in Cs: 22 | clf.C = C 23 | clf.fit(X, y) 24 | scores.append(clf.score(X, y)) 25 | 26 | print "Total time", time.time() - start 27 | print "Average accuracy", np.mean(scores) 28 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /doc/_static/lightning.css: -------------------------------------------------------------------------------- 1 | .navbar-text { 2 | display: none !important; 3 | } 4 | 5 | div.body.content { 6 | margin-top: 15px !important; 7 | } 8 | -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | # 7 | # -*- coding: utf-8 -*- 8 | 9 | from datetime import datetime 10 | 11 | import sphinx_bootstrap_theme 12 | 13 | 14 | # -- Project information ----------------------------------------------------- 15 | 16 | # General information about the project. 17 | project = 'lightning' 18 | author = 'Mathieu Blondel' 19 | copyright = '{}, {}'.format(datetime.now().year, author) 20 | 21 | # The full version, including alpha/beta/rc tags. 22 | try: 23 | from lightning import __version__ 24 | except (ImportError, ModuleNotFoundError) as e: 25 | raise ImportError( 26 | f"You must install '{project}' package itself to build docs for it" 27 | ) from e 28 | 29 | release = __version__ 30 | 31 | # -- General configuration --------------------------------------------------- 32 | 33 | # Add any Sphinx extension module names here, as strings. They can be 34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 35 | extensions = [ 36 | 'sphinx.ext.autodoc', 37 | 'sphinx.ext.autosummary', 38 | 'sphinx.ext.viewcode', 39 | 'sphinx.ext.napoleon', 40 | 'sphinx_gallery.gen_gallery', 41 | ] 42 | 43 | # Generate autosummary pages. 44 | autosummary_generate = ["references.rst"] 45 | 46 | autodoc_default_flags = [ 47 | 'members', 48 | 'inherited-members', 49 | 'undoc-members', 50 | ] 51 | autodoc_default_options = { 52 | "members": True, 53 | "inherited-members": True, 54 | "undoc-members": True, 55 | } 56 | 57 | # The master toctree document. 58 | master_doc = 'index' 59 | 60 | # The name of the Pygments (syntax highlighting) style to use. 61 | pygments_style = 'sphinx' 62 | 63 | # List of patterns, relative to source directory, that match files and 64 | # directories to ignore when looking for source files. 65 | # This pattern also affects html_static_path and html_extra_path. 66 | exclude_patterns = ['_build', '_templates', '_themes', 'Thumbs.db', '.DS_Store'] 67 | 68 | sphinx_gallery_conf = { 69 | 'examples_dirs': '../examples', 70 | 'gallery_dirs': 'auto_examples', 71 | 'filename_pattern' : '.py', 72 | 'plot_gallery': 'True', 73 | } 74 | 75 | # -- Options for HTML output ------------------------------------------------- 76 | 77 | # The theme to use for HTML and HTML Help pages. See the documentation for 78 | # a list of builtin themes. 79 | # 80 | html_theme = 'bootstrap' 81 | html_theme_path = sphinx_bootstrap_theme.get_html_theme_path() 82 | 83 | # Theme options are theme-specific and customize the look and feel of a 84 | # theme further. 85 | html_theme_options = { 86 | # Navigation bar title. (Default: ``project`` value) 87 | 'navbar_title': project, 88 | 89 | # Tab name for entire site. (Default: "Site") 90 | 'navbar_site_name': "Site", 91 | 92 | # A list of tuples containting pages to link to. The value should 93 | # be in the form [(name, page), ..] 94 | 'navbar_links': [ 95 | ('Introduction', 'intro'), 96 | ('References', 'references'), 97 | ('Examples', 'auto_examples/index'), 98 | ], 99 | 100 | # Render the next and previous page links in navbar. (Default: true) 101 | 'navbar_sidebarrel': False, 102 | 103 | # Render the current pages TOC in the navbar. (Default: true) 104 | 'navbar_pagenav': False, 105 | 106 | # Global TOC depth for "site" navbar tab. (Default: 1) 107 | # Switching to -1 shows all levels. 108 | 'globaltoc_depth': 0, 109 | 110 | # Tab name for the current pages TOC. (Default: "Page") 111 | 'navbar_pagenav_name': "Page", 112 | 113 | # Include hidden TOCs in Site navbar? 114 | # 115 | # Note: If this is "false", you cannot have mixed ``:hidden:`` and 116 | # non-hidden ``toctree`` directives in the same page, or else the build 117 | # will break. 118 | # 119 | # Values: "true" (default) or "false" 120 | 'globaltoc_includehidden': "true", 121 | 122 | # HTML navbar class (Default: "navbar") to attach to
element. 123 | # For black navbar, do "navbar navbar-inverse" 124 | 'navbar_class': "navbar", 125 | 126 | # Fix navigation bar to top of page? 127 | # Values: "true" (default) or "false" 128 | 'navbar_fixed_top': "true", 129 | 130 | # Location of link to source. 131 | # Options are "nav" (default), "footer" or anything else to exclude. 132 | 'source_link_position': "None", 133 | 134 | # Bootswatch (http://bootswatch.com/) theme. 135 | # 136 | # Options are nothing with "" (default) or the name of a valid theme 137 | # such as "amelia" or "cosmo". 138 | # 139 | # Note that this is served off CDN, so won't be available offline. 140 | #'bootswatch_theme': "cerulean", 141 | 142 | # Choose Bootstrap version. 143 | # Values: "3" (default) or "2" (in quotes) 144 | 'bootstrap_version': "3", 145 | } 146 | 147 | # Add any paths that contain custom static files (such as style sheets) here, 148 | # relative to this directory. They are copied after the builtin static files, 149 | # so a file named "default.css" will overwrite the builtin "default.css". 150 | html_static_path = ['_static'] 151 | html_css_files = ["lightning.css"] 152 | -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../README.rst 2 | 3 | .. toctree:: 4 | :hidden: 5 | 6 | auto_examples/index 7 | references.rst 8 | intro.rst 9 | -------------------------------------------------------------------------------- /doc/intro.rst: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | .. currentmodule:: lightning 5 | 6 | lightning is composed of three modules: classification, regression and ranking. 7 | Several solvers are available from each. 8 | 9 | If you're not sure what solver to use, just go for :class:`classification.CDClassifier` / 10 | :class:`regression.CDRegressor` or :class:`classification.SDCAClassifier` / :class:`regression.SDCARegressor`. They 11 | are very fast and do not require any tedious tuning of a learning rate. 12 | 13 | Primal coordinate descent 14 | ------------------------- 15 | 16 | :class:`classification.CDClassifier`, :class:`regression.CDRegressor` 17 | 18 | - Main idea: update a single coordinate at a time (closed-form update when possible, coordinate-wise gradient descent otherwise) 19 | - Non-smooth losses: No 20 | - Penalties: L2, L1, L1/L2 21 | - Learning rate: No 22 | - Multiclass: one-vs-rest, multiclass logistic, multiclass squared hinge 23 | 24 | Dual coordinate ascent 25 | ---------------------- 26 | 27 | :class:`classification.LinearSVC`, :class:`regression.LinearSVR` (L2-regularization, supports shrinking) 28 | 29 | :class:`classification.SDCAClassifier`, :class:`regression.SDCARegressor` (Elastic-net, supports many losses) 30 | 31 | - Main idea: update a single dual coordinate at a time (closed-form solution available for many loss functions) 32 | - Non-smooth losses: Yes 33 | - Penalties: L2, Elastic-net 34 | - Learning rate: No 35 | - Multiclass: one-vs-rest 36 | 37 | FISTA 38 | ----- 39 | 40 | :class:`classification.FistaClassifier`, :class:`regression.FistaRegressor` 41 | 42 | - Main idea: accelerated proximal gradient method (uses full gradients) 43 | - Non-smooth losses: No 44 | - Penalties: L1, L1/L2, Trace/Nuclear 45 | - Learning rate: No 46 | - Multiclass: one-vs-rest, multiclass logistic, multiclass squared hinge 47 | 48 | Stochastic gradient method (SGD) 49 | -------------------------------- 50 | 51 | :class:`classification.SGDClassifier`, :class:`regression.SGDRegressor` 52 | 53 | - Main idea: replace full gradient with stochastic estimate obtained from a single sample 54 | - Non-smooth losses: Yes 55 | - Penalties: L2, L1, L1/L2 56 | - Learning rate: Yes (very sensitive) 57 | - Multiclass: one-vs-rest, multiclass logistic, multiclass squared hinge 58 | 59 | AdaGrad 60 | ------- 61 | 62 | :class:`classification.AdaGradClassifier`, :class:`regression.AdaGradRegressor` 63 | 64 | - Main idea: use per-feature learning rates (frequently occurring features in the gradients get small learning rates and infrequent features get higher ones) 65 | - Non-smooth losses: Yes 66 | - Penalties: L2, Elastic-net 67 | - Learning rate: Yes (not very sensitive) 68 | - Multiclass: one-vs-rest 69 | 70 | Stochastic averaged gradient (SAG and SAGA) 71 | ------------------------------------------- 72 | 73 | :class:`classification.SAGClassifier`, :class:`classification.SAGAClassifier`, :class:`regression.SAGRegressor`, :class:`regression.SAGARegressor` 74 | 75 | - Main idea: instead of using the full gradient (average of sample-wise gradients), compute gradient for a randomly selected sample and use out-dated gradients for other samples 76 | - Non-smooth losses: Yes (:class:`classification.SAGAClassifier` and :class:`regression.SAGARegressor`) 77 | - Penalties: L1, L2, Elastic-net 78 | - Learning rate: Yes (not very sensitive) 79 | - Multiclass: one-vs-rest 80 | 81 | Stochastic variance-reduced gradient (SVRG) 82 | ------------------------------------------- 83 | 84 | :class:`classification.SVRGClassifier`, :class:`regression.SVRGRegressor` 85 | 86 | - Main idea: compute full gradient periodically and use it to center the gradient estimate (this can be shown to reduce the variance) 87 | - Non-smooth losses: No 88 | - Penalties: L2 89 | - Learning rate: Yes (not very sensitive) 90 | - Multiclass: one-vs-rest 91 | 92 | PRank 93 | ----- 94 | 95 | :class:`ranking.PRank`, :class:`ranking.KernelPRank` 96 | 97 | - Main idea: Perceptron-like algorithm for ordinal regression 98 | - Penalties: L2 99 | - Learning rate: No 100 | 101 | 102 | .. toctree:: 103 | :hidden: 104 | 105 | auto_examples/index 106 | references.rst 107 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /doc/references.rst: -------------------------------------------------------------------------------- 1 | .. toctree:: 2 | :maxdepth: 2 3 | 4 | .. _classification: 5 | 6 | Classification 7 | ============== 8 | 9 | This module contains algorithms for solving classification problems. 10 | 11 | .. automodule:: lightning.classification 12 | :no-members: 13 | :no-inherited-members: 14 | 15 | .. currentmodule:: lightning 16 | 17 | .. autosummary:: 18 | :toctree: generated/ 19 | :template: class.rst 20 | 21 | classification.AdaGradClassifier 22 | classification.CDClassifier 23 | classification.FistaClassifier 24 | classification.KernelSVC 25 | classification.LinearSVC 26 | classification.SDCAClassifier 27 | classification.SAGClassifier 28 | classification.SAGAClassifier 29 | classification.SGDClassifier 30 | classification.SVRGClassifier 31 | 32 | .. _regression: 33 | 34 | Regression 35 | ========== 36 | 37 | This module contains algorithms for solving regression problems. 38 | 39 | .. automodule:: lightning.regression 40 | :no-members: 41 | :no-inherited-members: 42 | 43 | .. currentmodule:: lightning 44 | 45 | .. autosummary:: 46 | :toctree: generated/ 47 | :template: class.rst 48 | 49 | regression.AdaGradRegressor 50 | regression.CDRegressor 51 | regression.FistaRegressor 52 | regression.LinearSVR 53 | regression.SAGRegressor 54 | regression.SAGARegressor 55 | regression.SDCARegressor 56 | regression.SGDRegressor 57 | regression.SVRGRegressor 58 | 59 | Ranking 60 | ======= 61 | 62 | This module contains algorithms for solving ranking and ordinal regression problems. 63 | 64 | .. automodule:: lightning.ranking 65 | :no-members: 66 | :no-inherited-members: 67 | 68 | .. currentmodule:: lightning 69 | 70 | .. autosummary:: 71 | :toctree: generated/ 72 | :template: class.rst 73 | 74 | ranking.PRank 75 | ranking.KernelPRank 76 | -------------------------------------------------------------------------------- /doc/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | sphinx 3 | sphinx_bootstrap_theme 4 | sphinx_gallery 5 | -------------------------------------------------------------------------------- /download.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # download a file with either wget or curl 4 | 5 | if [ "$1foo" = "foo" ]; then 6 | echo "usage: `basename $0` url" 7 | exit 1 8 | fi 9 | 10 | wget_path=`which wget` 11 | 12 | if [ $? = 0 ]; then 13 | $wget_path $1 14 | exit $? 15 | fi 16 | 17 | curl_path=`which curl` 18 | 19 | if [ $? = 0 ]; then 20 | $curl_path -O $1 21 | exit $? 22 | fi 23 | 24 | -------------------------------------------------------------------------------- /examples/README.rst: -------------------------------------------------------------------------------- 1 | Examples 2 | ======== 3 | 4 | These are some examples using the lightning machine learning library. 5 | -------------------------------------------------------------------------------- /examples/document_classification_news20.py: -------------------------------------------------------------------------------- 1 | """ 2 | ================================ 3 | Classification of text documents 4 | ================================ 5 | 6 | """ 7 | import numpy as np 8 | 9 | from sklearn.datasets import fetch_20newsgroups_vectorized 10 | from sklearn.model_selection import train_test_split 11 | 12 | from lightning.classification import CDClassifier 13 | from lightning.classification import LinearSVC 14 | from lightning.classification import SGDClassifier 15 | 16 | # Load News20 dataset from scikit-learn. 17 | bunch = fetch_20newsgroups_vectorized(subset="all") 18 | X = bunch.data 19 | y = bunch.target 20 | 21 | # Select a subset of the classes for faster training. 22 | ind = np.arange(X.shape[0]) 23 | subset = y < 5 24 | X = X[ind[subset]] 25 | y = y[subset] 26 | 27 | # Train / test split. 28 | X_tr, X_te, y_tr, y_te = train_test_split(X, y, 29 | train_size=0.75, 30 | test_size=0.25, 31 | random_state=0) 32 | 33 | clfs = (CDClassifier(loss="squared_hinge", 34 | penalty="l2", 35 | max_iter=20, 36 | random_state=0), 37 | 38 | LinearSVC(max_iter=20, 39 | random_state=0), 40 | 41 | SGDClassifier(learning_rate="constant", 42 | alpha=1e-3, 43 | max_iter=20, 44 | random_state=0)) 45 | 46 | for clf in clfs: 47 | print(clf.__class__.__name__) 48 | clf.fit(X_tr, y_tr) 49 | print("score =", clf.score(X_te, y_te)) 50 | -------------------------------------------------------------------------------- /examples/plot_1d_total_variation.py: -------------------------------------------------------------------------------- 1 | """ 2 | ===================================== 3 | Signal recovery by 1D total variation 4 | ===================================== 5 | 6 | In this example, we generate a signal that is piecewise constant. We then 7 | observe some random and corrupted measurements from that signal and 8 | then try to recover that signal using L1 and 1D total variation (TV1D) penalties. 9 | 10 | Given a ground truth vectors, the signal that we observe is given by 11 | 12 | y = sign(X ground_truth + noise) 13 | 14 | where X is a random matrix. We obtain the vector ground_truth by solving 15 | an optimization problem using lightning's :class:`lightning.classification.FistaClassifier`. 16 | 17 | The 1D total variation is also known as fused lasso. 18 | """ 19 | # Author: Fabian Pedregosa 20 | 21 | import numpy as np 22 | import matplotlib.pyplot as plt 23 | from lightning.classification import FistaClassifier 24 | from sklearn.model_selection import GridSearchCV 25 | 26 | # generate some synthetic data 27 | n_samples = 200 28 | ground_truth = np.concatenate(( 29 | np.ones(20), - np.ones(20), np.zeros(40))) 30 | n_features = ground_truth.size 31 | np.random.seed(0) # for reproducibility 32 | X = np.random.rand(n_samples, n_features) 33 | # generate y as a linear model, y = sign(X w + noise) 34 | y = np.sign(X.dot(ground_truth) + 0.5 * np.random.randn(n_samples)).astype(int) 35 | 36 | 37 | for penalty in ('l1', 'tv1d'): 38 | clf = FistaClassifier(penalty=penalty) 39 | gs = GridSearchCV(clf, {'alpha': np.logspace(-3, 3, 10)}) 40 | gs.fit(X, y) 41 | coefs = gs.best_estimator_.coef_ 42 | plt.plot(coefs.ravel(), label='%s penalty' % penalty, lw=3) 43 | 44 | plt.plot(ground_truth, lw=3, marker='^', markevery=5, markersize=10, label="ground truth") 45 | plt.grid() 46 | plt.legend() 47 | plt.ylim((-1.5, 1.5)) 48 | plt.show() 49 | -------------------------------------------------------------------------------- /examples/plot_l2_solvers.py: -------------------------------------------------------------------------------- 1 | """ 2 | ===================== 3 | L2 solver comparison 4 | ===================== 5 | 6 | This example compares different solvers with L2 regularization. 7 | """ 8 | print(__doc__) 9 | 10 | import sys 11 | import time 12 | 13 | import numpy as np 14 | import matplotlib.pyplot as plt 15 | 16 | from sklearn.datasets import make_classification 17 | from sklearn.datasets import fetch_20newsgroups_vectorized 18 | 19 | from lightning.classification import SVRGClassifier 20 | from lightning.classification import SDCAClassifier 21 | from lightning.classification import CDClassifier 22 | from lightning.classification import AdaGradClassifier 23 | from lightning.classification import SAGAClassifier, SAGClassifier 24 | 25 | from lightning.impl.adagrad_fast import _proj_elastic_all 26 | 27 | class Callback(object): 28 | 29 | def __init__(self, X, y): 30 | self.X = X 31 | self.y = y 32 | self.obj = [] 33 | self.times = [] 34 | self.start_time = time.time() 35 | self.test_time = 0 36 | 37 | def __call__(self, clf, t=None): 38 | test_time = time.time() 39 | 40 | if hasattr(clf, "_finalize_coef"): 41 | clf._finalize_coef() 42 | 43 | if t is not None: 44 | _proj_elastic_all(clf.eta, t, clf.g_sum_[0], clf.g_norms_[0], 45 | alpha1=0, alpha2=clf.alpha, delta=0, 46 | w=clf.coef_[0]) 47 | 48 | 49 | y_pred = clf.decision_function(self.X).ravel() 50 | loss = (np.maximum(1 - self.y * y_pred, 0) ** 2).mean() 51 | coef = clf.coef_.ravel() 52 | regul = 0.5 * clf.alpha * np.dot(coef, coef) 53 | self.obj.append(loss + regul) 54 | self.test_time += time.time() - test_time 55 | self.times.append(time.time() - self.start_time - self.test_time) 56 | 57 | try: 58 | dataset = sys.argv[1] 59 | except: 60 | dataset = "synthetic" 61 | 62 | if dataset == "news20": 63 | bunch = fetch_20newsgroups_vectorized(subset="all") 64 | X = bunch.data 65 | y = bunch.target 66 | y[y >= 1] = 1 67 | alpha = 1e-4 68 | eta_svrg = 1e-1 69 | eta_adagrad = 1 70 | xlim = (0, 20) 71 | 72 | else: 73 | X, y = make_classification(n_samples=10000, 74 | n_features=100, 75 | n_classes=2, 76 | random_state=0) 77 | alpha = 1e-2 78 | eta_svrg = 1e-3 79 | eta_adagrad = 1e-2 80 | xlim = [0, 2] 81 | 82 | y = y * 2 - 1 83 | 84 | # make sure the method does not stop prematurely, we want to see 85 | # the full convergence path 86 | tol = 1e-24 87 | 88 | clf1 = SVRGClassifier(loss="squared_hinge", alpha=alpha, eta=eta_svrg, 89 | n_inner=1.0, max_iter=100, random_state=0, tol=1e-24) 90 | clf2 = SDCAClassifier(loss="squared_hinge", alpha=alpha, 91 | max_iter=100, n_calls=X.shape[0]/2, random_state=0, tol=tol) 92 | clf3 = CDClassifier(loss="squared_hinge", alpha=alpha, C=1.0/X.shape[0], 93 | max_iter=50, n_calls=X.shape[1]/3, random_state=0, tol=tol) 94 | clf4 = AdaGradClassifier(loss="squared_hinge", alpha=alpha, eta=eta_adagrad, 95 | n_iter=100, n_calls=X.shape[0]/2, random_state=0) 96 | clf5 = SAGAClassifier(loss="squared_hinge", alpha=alpha, 97 | max_iter=100, random_state=0, tol=tol) 98 | clf6 = SAGClassifier(loss="squared_hinge", alpha=alpha, 99 | max_iter=100, random_state=0, tol=tol) 100 | 101 | plt.figure() 102 | 103 | data = {} 104 | for clf, name in ((clf1, "SVRG"), 105 | (clf2, "SDCA"), 106 | (clf3, "PCD"), 107 | (clf4, "AdaGrad"), 108 | (clf5, "SAGA"), 109 | (clf6, "SAG") 110 | ): 111 | print(name) 112 | cb = Callback(X, y) 113 | clf.callback = cb 114 | 115 | if name == "PCD" and hasattr(X, "tocsc"): 116 | clf.fit(X.tocsc(), y) 117 | else: 118 | clf.fit(X, y) 119 | data[name] = (cb.times, np.array(cb.obj)) 120 | 121 | # get best value 122 | fmin = min([np.min(a[1]) for a in data.values()]) 123 | for name in data: 124 | plt.plot(data[name][0], data[name][1] - fmin, label=name, lw=3) 125 | 126 | plt.xlim(xlim) 127 | plt.yscale('log') 128 | plt.xlabel("CPU time") 129 | plt.ylabel("Objective value minus optimum") 130 | plt.legend() 131 | plt.grid() 132 | 133 | plt.show() 134 | -------------------------------------------------------------------------------- /examples/plot_robust_regression.py: -------------------------------------------------------------------------------- 1 | """ 2 | ================== 3 | Robust regression 4 | ================== 5 | 6 | """ 7 | print(__doc__) 8 | 9 | import numpy as np 10 | import pylab as pl 11 | 12 | from sklearn.datasets import make_regression 13 | from sklearn.utils import check_random_state 14 | from sklearn.linear_model import Ridge 15 | 16 | from lightning.regression import LinearSVR 17 | 18 | # Generate regression data. 19 | X_train, y_train = make_regression(n_samples=15, n_features=1, 20 | n_informative=1, random_state=0) 21 | 22 | # Add noise. 23 | rs = check_random_state(0) 24 | y_train += rs.normal(np.std(y_train), size=X_train.shape[0]) 25 | # Add an outlier. 26 | y_train[5] *= 5 27 | 28 | X_test = np.linspace(-5, 5, 100).reshape(-1, 1) 29 | 30 | pl.figure() 31 | pl.scatter(X_train.ravel(), y_train) 32 | 33 | reg = Ridge(alpha=1e-1) 34 | reg.fit(X_train, y_train) 35 | pl.plot(X_test.ravel(), reg.predict(X_test), label="Ridge") 36 | 37 | # LinearSVR is equivalent to absolute-loss regression (robust regression) 38 | # when epsilon=0. 39 | reg = LinearSVR(C=10, epsilon=0, fit_intercept=True, random_state=0) 40 | reg.fit(X_train, y_train) 41 | pl.plot(X_test.ravel(), reg.predict(X_test), label="Robust") 42 | 43 | pl.legend(loc="upper left") 44 | 45 | pl.show() 46 | -------------------------------------------------------------------------------- /examples/plot_sample_weight.py: -------------------------------------------------------------------------------- 1 | """ 2 | ====================== 3 | SAGA: Weighted samples 4 | ====================== 5 | 6 | Plot decision function of a weighted dataset, where the size of points 7 | is proportional to its weight. 8 | 9 | Adapted from scikit-learn's plot_sgd_weighted_samples.py 10 | """ 11 | print(__doc__) 12 | 13 | import numpy as np 14 | import matplotlib.pyplot as plt 15 | from lightning.impl.sag import SAGAClassifier 16 | 17 | # we create 20 points 18 | np.random.seed(0) 19 | X = np.r_[np.random.randn(10, 2) + [1, 1], np.random.randn(10, 2)] 20 | y = np.array([1] * 10 + [-1] * 10) 21 | sample_weight = 100 * np.abs(np.random.randn(20)) 22 | # and assign a bigger weight to the last 10 samples 23 | sample_weight[:10] *= 10 24 | 25 | # plot the weighted data points 26 | xx, yy = np.meshgrid(np.linspace(-4, 5, 500), np.linspace(-4, 5, 500)) 27 | plt.figure() 28 | plt.scatter(X[:, 0], X[:, 1], c=y, s=sample_weight, alpha=0.9, 29 | cmap=plt.cm.bone) 30 | 31 | # fit the unweighted model 32 | clf = SAGAClassifier(alpha=0.01, loss='log') 33 | clf.fit(X, y) 34 | Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) 35 | Z = Z.reshape(xx.shape) 36 | no_weights = plt.contour(xx, yy, Z, levels=[0], linestyles=['solid']) 37 | 38 | # fit the weighted model 39 | clf = SAGAClassifier(alpha=0.01, loss='log') 40 | clf.fit(X, y, sample_weight=sample_weight) 41 | Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) 42 | Z = Z.reshape(xx.shape) 43 | samples_weights = plt.contour(xx, yy, Z, levels=[0], linestyles=['dashed']) 44 | 45 | plt.legend([no_weights.collections[0], samples_weights.collections[0]], 46 | ["no weights", "with weights"], loc="lower left") 47 | 48 | plt.xticks(()) 49 | plt.yticks(()) 50 | plt.show() -------------------------------------------------------------------------------- /examples/plot_sgd_loss_functions.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================== 3 | SGD: Convex Loss Functions 4 | ========================== 5 | 6 | """ 7 | print(__doc__) 8 | 9 | import numpy as np 10 | import pylab as pl 11 | from lightning.impl.sgd import Hinge 12 | from lightning.impl.sgd import SquaredHinge 13 | from lightning.impl.sgd import Log 14 | from lightning.impl.sgd import SquaredLoss 15 | 16 | ############################################################################### 17 | # Define loss funcitons 18 | xmin, xmax = -3, 3 19 | hinge = Hinge(1) 20 | squared_hinge = SquaredHinge() 21 | log = Log() 22 | squared_loss = SquaredLoss() 23 | 24 | ############################################################################### 25 | # Plot loss funcitons 26 | xx = np.linspace(xmin, xmax, 100) 27 | pl.plot([xmin, 0, 0, xmax], [1, 1, 0, 0], 'k-', 28 | label="Zero-one loss") 29 | pl.plot(xx, [hinge.loss(x, 1) for x in xx], 'g-', 30 | label="Hinge loss") 31 | pl.plot(xx, [squared_hinge.loss(x, 1) for x in xx], 'b--', 32 | label="Squared hinge loss", zorder=3) 33 | pl.plot(xx, [log.loss(x, 1) for x in xx], 'r-', 34 | label="Log loss") 35 | pl.plot(xx, [2.0*squared_loss.loss(x, 1) for x in xx], 'c-', 36 | label="Squared loss") 37 | pl.ylim((0, 5)) 38 | pl.legend(loc="upper right") 39 | pl.xlabel(r"$y \cdot f(x)$") 40 | pl.ylabel("$L(y, f(x))$") 41 | pl.show() 42 | -------------------------------------------------------------------------------- /examples/plot_sparse_non_linear.py: -------------------------------------------------------------------------------- 1 | """ 2 | ================================ 3 | Sparse non-linear classification 4 | ================================ 5 | 6 | This examples demonstrates how to use :class:`lightning.classification.CDClassifier` with L1 penalty to do 7 | sparse non-linear classification. The trick simply consists in fitting the 8 | classifier with a kernel matrix (e.g., using an RBF kernel). 9 | 10 | There are a few interesting differences with standard kernel SVMs: 11 | 12 | 1. the kernel matrix does not need to be positive semi-definite (hence the 13 | expression "kernel matrix" above is an abuse of terminology) 14 | 15 | 2. the number of "support vectors" will be typically smaller thanks to L1 16 | regularization and can be adjusted by the regularization parameter C (the 17 | smaller C, the fewer the support vectors) 18 | 19 | 3. the "support vectors" need not be located at the margin 20 | """ 21 | 22 | import numpy as np 23 | import pylab as pl 24 | 25 | from sklearn.metrics.pairwise import rbf_kernel 26 | 27 | from lightning.classification import CDClassifier 28 | from lightning.classification import KernelSVC 29 | 30 | np.random.seed(0) 31 | 32 | class SparseNonlinearClassifier(CDClassifier): 33 | 34 | def __init__(self, gamma=1e-2, C=1, alpha=1): 35 | self.gamma = gamma 36 | super().__init__(C=C, 37 | alpha=alpha, 38 | loss="squared_hinge", 39 | penalty="l1") 40 | 41 | def fit(self, X, y): 42 | K = rbf_kernel(X, gamma=self.gamma) 43 | self.X_train_ = X 44 | super().fit(K, y) 45 | return self 46 | 47 | def decision_function(self, X): 48 | K = rbf_kernel(X, self.X_train_, gamma=self.gamma) 49 | return super().decision_function(K) 50 | 51 | 52 | def gen_non_lin_separable_data(): 53 | mean1 = [-1, 2] 54 | mean2 = [1, -1] 55 | mean3 = [4, -4] 56 | mean4 = [-4, 4] 57 | cov = [[1.0,0.8], [0.8, 1.0]] 58 | X1 = np.random.multivariate_normal(mean1, cov, 50) 59 | X1 = np.vstack((X1, np.random.multivariate_normal(mean3, cov, 50))) 60 | y1 = np.ones(len(X1)) 61 | X2 = np.random.multivariate_normal(mean2, cov, 50) 62 | X2 = np.vstack((X2, np.random.multivariate_normal(mean4, cov, 50))) 63 | y2 = np.ones(len(X2)) * -1 64 | return X1, y1, X2, y2 65 | 66 | def plot_contour(X, X1, X2, clf, title): 67 | pl.figure() 68 | pl.title(title) 69 | 70 | # Plot instances of class 1. 71 | pl.plot(X1[:,0], X1[:,1], "ro") 72 | # Plot instances of class 2. 73 | pl.plot(X2[:,0], X2[:,1], "bo") 74 | 75 | # Select "support vectors". 76 | if hasattr(clf, "support_vectors_"): 77 | sv = clf.support_vectors_ 78 | else: 79 | sv = X[clf.coef_.ravel() != 0] 80 | 81 | # Plot support vectors. 82 | pl.scatter(sv[:, 0], sv[:, 1], s=100, c="g") 83 | 84 | # Plot decision surface. 85 | A, B = np.meshgrid(np.linspace(-6,6,50), np.linspace(-6,6,50)) 86 | C = np.array([[x1, x2] for x1, x2 in zip(np.ravel(A), np.ravel(B))]) 87 | Z = clf.decision_function(C).reshape(A.shape) 88 | pl.contour(A, B, Z, [0.0], colors='k', linewidths=1, origin='lower') 89 | 90 | pl.axis("tight") 91 | 92 | # Generate synthetic data from 2 classes. 93 | X1, y1, X2, y2 = gen_non_lin_separable_data() 94 | 95 | # Combine them to form a training set. 96 | X = np.vstack((X1, X2)) 97 | y = np.hstack((y1, y2)) 98 | 99 | # Train the classifiers. 100 | clf = SparseNonlinearClassifier(gamma=0.1, alpha=1./0.05) 101 | clf.fit(X, y) 102 | 103 | clf2 = KernelSVC(gamma=0.1, kernel="rbf", alpha=1e-2) 104 | clf2.fit(X, y) 105 | 106 | # Plot contours. 107 | plot_contour(X, X1, X2, clf, "Sparse") 108 | plot_contour(X, X1, X2, clf2, "Kernel SVM") 109 | 110 | pl.show() 111 | -------------------------------------------------------------------------------- /examples/plot_svrg.py: -------------------------------------------------------------------------------- 1 | """ 2 | ======================================= 3 | Sensitivity to hyper-parameters in SVRG 4 | ======================================= 5 | 6 | This example shows the sensitivity of SVRG with respect 7 | to different hyperparameters. 8 | """ 9 | print(__doc__) 10 | 11 | import sys 12 | import time 13 | 14 | import numpy as np 15 | import matplotlib.pyplot as plt 16 | 17 | from sklearn.datasets import make_classification 18 | from sklearn.datasets import fetch_20newsgroups_vectorized 19 | 20 | from lightning.classification import SVRGClassifier 21 | 22 | 23 | class Callback(object): 24 | 25 | def __init__(self, X, y): 26 | self.X = X 27 | self.y = y 28 | self.obj = [] 29 | self.times = [] 30 | self.start_time = time.time() 31 | self.test_time = 0 32 | 33 | def __call__(self, clf): 34 | test_time = time.time() 35 | clf._finalize_coef() 36 | y_pred = clf.decision_function(self.X).ravel() 37 | loss = (np.maximum(1 - self.y * y_pred, 0) ** 2).mean() 38 | coef = clf.coef_.ravel() 39 | regul = 0.5 * clf.alpha * np.dot(coef, coef) 40 | self.obj.append(loss + regul) 41 | self.test_time += time.time() - test_time 42 | self.times.append(time.time() - self.start_time - self.test_time) 43 | 44 | try: 45 | dataset = sys.argv[1] 46 | except: 47 | dataset = "synthetic" 48 | 49 | if dataset == "news20": 50 | bunch = fetch_20newsgroups_vectorized(subset="all") 51 | X = bunch.data 52 | y = bunch.target 53 | y[y >= 1] = 1 54 | 55 | etas = (0.5, 1e-1, 1e-2) 56 | n_inners = (1.0, 2.0, 3.0) 57 | else: 58 | X, y = make_classification(n_samples=10000, 59 | n_features=100, 60 | n_classes=2, 61 | random_state=0) 62 | etas = (1e-3, 1e-4, 1e-5) 63 | n_inners = (0.25, 0.5, 1.0, 1.5) 64 | 65 | y = y * 2 - 1 66 | 67 | 68 | plt.figure() 69 | 70 | for eta in etas: 71 | print("eta =", eta) 72 | cb = Callback(X, y) 73 | clf = SVRGClassifier(loss="squared_hinge", alpha=1e-5, eta=eta, 74 | n_inner=1.0, max_iter=20, random_state=0, callback=cb) 75 | clf.fit(X, y) 76 | plt.plot(cb.times, cb.obj, label="eta=" + str(eta)) 77 | 78 | plt.xlabel("CPU time") 79 | plt.ylabel("Objective value") 80 | plt.legend() 81 | 82 | plt.figure() 83 | 84 | for n_inner in n_inners: 85 | print("n_inner =", n_inner) 86 | cb = Callback(X, y) 87 | clf = SVRGClassifier(loss="squared_hinge", alpha=1e-5, eta=1e-4, 88 | n_inner=n_inner, max_iter=20, random_state=0, 89 | callback=cb) 90 | clf.fit(X, y) 91 | plt.plot(cb.times, cb.obj, label="n_inner=" + str(n_inner)) 92 | 93 | plt.xlabel("CPU time") 94 | plt.ylabel("Objective value") 95 | plt.legend() 96 | 97 | plt.show() 98 | -------------------------------------------------------------------------------- /examples/trace.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========== 3 | Trace norm 4 | =========== 5 | 6 | """ 7 | print(__doc__) 8 | import numpy as np 9 | from scipy.linalg import svd 10 | 11 | from sklearn.datasets import fetch_20newsgroups_vectorized 12 | from sklearn.feature_selection import SelectKBest, chi2 13 | 14 | from lightning.classification import FistaClassifier 15 | 16 | def rank(M, eps=1e-9): 17 | U, s, V = svd(M, full_matrices=False) 18 | return np.sum(s > eps) 19 | 20 | 21 | bunch = fetch_20newsgroups_vectorized(subset="train") 22 | X_train = bunch.data 23 | y_train = bunch.target 24 | 25 | # Reduces dimensionality to make the example faster 26 | ch2 = SelectKBest(chi2, k=5000) 27 | X_train = ch2.fit_transform(X_train, y_train) 28 | 29 | bunch = fetch_20newsgroups_vectorized(subset="test") 30 | X_test = bunch.data 31 | y_test = bunch.target 32 | X_test = ch2.transform(X_test) 33 | 34 | clf = FistaClassifier(C=1.0 / X_train.shape[0], 35 | max_iter=200, 36 | penalty="trace", 37 | multiclass=True) 38 | 39 | print(f"{'alpha': <10}| {'score': <25}| {'rank': <5}") 40 | for alpha in (1e-3, 1e-2, 0.1, 0.2, 0.3): 41 | clf.alpha = alpha 42 | clf.fit(X_train, y_train) 43 | print(f"{alpha: <10}| {clf.score(X_test, y_test): <25}| {rank(clf.coef_): <5}") 44 | -------------------------------------------------------------------------------- /lightning/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.6.3.dev0" 2 | -------------------------------------------------------------------------------- /lightning/classification.py: -------------------------------------------------------------------------------- 1 | from .impl.adagrad import AdaGradClassifier 2 | from .impl.primal_cd import CDClassifier 3 | from .impl.fista import FistaClassifier 4 | from .impl.sag import SAGClassifier 5 | from .impl.sag import SAGAClassifier 6 | from .impl.sdca import SDCAClassifier 7 | from .impl.sgd import SGDClassifier 8 | from .impl.svrg import SVRGClassifier 9 | from .impl.dual_cd import LinearSVC 10 | from .impl.primal_newton import KernelSVC 11 | -------------------------------------------------------------------------------- /lightning/datasets.py: -------------------------------------------------------------------------------- 1 | from .impl.datasets.loaders import get_data_home 2 | from .impl.datasets.loaders import load_news20 3 | from .impl.datasets.loaders import load_usps 4 | from .impl.datasets.loaders import load_mnist 5 | from .impl.datasets.loaders import get_loader 6 | from .impl.datasets.loaders import load_dataset 7 | from .impl.datasets.utils import make_ovo 8 | from .impl.datasets.samples_generator import make_nn_regression 9 | from .impl.datasets.samples_generator import make_classification 10 | -------------------------------------------------------------------------------- /lightning/impl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/lightning/dbbe833963280e675c124bbd5caadfcb13d89bd7/lightning/impl/__init__.py -------------------------------------------------------------------------------- /lightning/impl/adagrad.py: -------------------------------------------------------------------------------- 1 | # Author: Mathieu Blondel 2 | # License: BSD 3 | 4 | import numpy as np 5 | 6 | from sklearn.utils import check_random_state 7 | 8 | from .base import BaseClassifier, BaseRegressor 9 | from .dataset_fast import get_dataset 10 | from .adagrad_fast import _adagrad_fit 11 | 12 | from .sgd_fast import ModifiedHuber 13 | from .sgd_fast import Hinge 14 | from .sgd_fast import SmoothHinge 15 | from .sgd_fast import SquaredHinge 16 | from .sgd_fast import Log 17 | from .sgd_fast import SquaredLoss 18 | from .sgd_fast import EpsilonInsensitive 19 | from .sgd_fast import Huber 20 | 21 | 22 | class _BaseAdagrad(object): 23 | 24 | def _fit(self, X, Y): 25 | n_samples, n_features = X.shape 26 | rng = check_random_state(self.random_state) 27 | n_vectors = Y.shape[1] 28 | ds = get_dataset(X, order="c") 29 | 30 | self.coef_ = np.zeros((n_vectors, n_features), dtype=np.float64) 31 | self.g_sum_ = np.zeros((n_vectors, n_features), dtype=np.float64) 32 | self.g_norms_ = np.zeros((n_vectors, n_features), dtype=np.float64) 33 | 34 | delta = 0 35 | alpha1 = self.l1_ratio * self.alpha 36 | alpha2 = (1 - self.l1_ratio) * self.alpha 37 | loss = self._get_loss() 38 | n_calls = n_samples if self.n_calls is None else self.n_calls 39 | 40 | for i in range(n_vectors): 41 | _adagrad_fit(self, ds, Y[:, i], self.coef_[i], self.g_sum_[i], 42 | self.g_norms_[i], loss, self.eta, delta, alpha1, 43 | alpha2, self.n_iter, self.shuffle, self.callback, 44 | n_calls, rng) 45 | 46 | return self 47 | 48 | 49 | class AdaGradClassifier(BaseClassifier, _BaseAdagrad): 50 | r""" 51 | Estimator for learning linear classifiers by AdaGrad. 52 | 53 | Solves the following objective: 54 | 55 | .. code-block:: 56 | 57 | minimize_w 1 / n_samples * \sum_i loss(w^T x_i, y_i) 58 | + alpha * l1_ratio * ||w||_1 59 | + alpha * (1 - l1_ratio) * 0.5 * ||w||^2_2 60 | """ 61 | 62 | def __init__(self, eta=1.0, alpha=1.0, l1_ratio=0, loss="hinge", gamma=1.0, 63 | n_iter=10, shuffle=True, callback=None, n_calls=None, 64 | random_state=None): 65 | self.eta = eta 66 | self.alpha = alpha 67 | self.l1_ratio = l1_ratio 68 | self.loss = loss 69 | self.gamma = gamma 70 | self.n_iter = n_iter 71 | self.shuffle = shuffle 72 | self.callback = callback 73 | self.n_calls = n_calls 74 | self.random_state = random_state 75 | 76 | def _get_loss(self): 77 | losses = { 78 | "modified_huber": ModifiedHuber(), 79 | "hinge": Hinge(1.0), 80 | "smooth_hinge": SmoothHinge(self.gamma), 81 | "squared_hinge": SquaredHinge(1.0), 82 | "perceptron": Hinge(0.0), 83 | "log": Log(), 84 | "squared": SquaredLoss(), 85 | } 86 | return losses[self.loss] 87 | 88 | def fit(self, X, y): 89 | self._set_label_transformers(y) 90 | Y = np.asfortranarray(self.label_binarizer_.transform(y), 91 | dtype=np.float64) 92 | return self._fit(X, Y) 93 | 94 | 95 | class AdaGradRegressor(BaseRegressor, _BaseAdagrad): 96 | r""" 97 | Estimator for learning linear regressors by AdaGrad. 98 | 99 | Solves the following objective: 100 | 101 | .. code-block:: 102 | 103 | minimize_w 1 / n_samples * \sum_i loss(w^T x_i, y_i) 104 | + alpha * l1_ratio * ||w||_1 105 | + alpha * (1 - l1_ratio) * 0.5 * ||w||^2_2 106 | """ 107 | 108 | def __init__(self, eta=1.0, alpha=1.0, l1_ratio=0, loss="squared", 109 | gamma=1.0, epsilon=0, n_iter=10, shuffle=True, callback=None, 110 | n_calls=None, random_state=None): 111 | self.eta = eta 112 | self.alpha = alpha 113 | self.l1_ratio = l1_ratio 114 | self.loss = loss 115 | self.gamma = gamma 116 | self.epsilon = epsilon 117 | self.n_iter = n_iter 118 | self.shuffle = shuffle 119 | self.callback = callback 120 | self.n_calls = n_calls 121 | self.random_state = random_state 122 | 123 | def _get_loss(self): 124 | losses = { 125 | "squared": SquaredLoss(), 126 | "huber": Huber(self.epsilon), 127 | "epsilon_insensitive": EpsilonInsensitive(self.epsilon), 128 | "absolute": EpsilonInsensitive(0) 129 | } 130 | return losses[self.loss] 131 | 132 | def fit(self, X, y): 133 | self.outputs_2d_ = len(y.shape) > 1 134 | Y = y.reshape(-1, 1) if not self.outputs_2d_ else y 135 | Y = Y.astype(np.float64) 136 | return self._fit(X, Y) 137 | -------------------------------------------------------------------------------- /lightning/impl/adagrad_fast.pyx: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # cython: cdivision=True 3 | # cython: boundscheck=False 4 | # cython: wraparound=False 5 | # cython: language_level=3 6 | # 7 | # Author: Mathieu Blondel 8 | # License: BSD 9 | 10 | import numpy as np 11 | cimport numpy as np 12 | 13 | ctypedef np.int64_t LONG 14 | 15 | from libc.math cimport sqrt 16 | 17 | from lightning.impl.dataset_fast cimport RowDataset 18 | from lightning.impl.sgd_fast cimport LossFunction 19 | 20 | 21 | cdef double _pred(double* data, 22 | int* indices, 23 | int n_nz, 24 | double* w): 25 | 26 | cdef int j, jj 27 | cdef double dot = 0 28 | 29 | for jj in range(n_nz): 30 | j = indices[jj] 31 | dot += w[j] * data[jj] 32 | 33 | return dot 34 | 35 | 36 | cdef double _proj_elastic(double eta, 37 | LONG t, 38 | double g_sum, 39 | double g_norm, 40 | double alpha1, 41 | double alpha2, 42 | double delta): 43 | 44 | cdef double eta_t = eta * t 45 | cdef double denom = (delta + sqrt(g_norm) + eta_t * alpha2) 46 | cdef double wj_new1 = eta_t * (-g_sum / t - alpha1) / denom 47 | cdef double wj_new2 = eta_t * (-g_sum / t + alpha1) / denom 48 | 49 | if wj_new1 > 0: 50 | return wj_new1 51 | elif wj_new2 < 0: 52 | return wj_new2 53 | else: 54 | return 0 55 | 56 | 57 | cpdef _proj_elastic_all(double eta, 58 | LONG t, 59 | np.ndarray[double, ndim=1] g_sum, 60 | np.ndarray[double, ndim=1] g_norms, 61 | double alpha1, 62 | double alpha2, 63 | double delta, 64 | np.ndarray[double, ndim=1] w): 65 | cdef int n_features = w.shape[0] 66 | cdef int j 67 | for j in range(n_features): 68 | if g_norms[j] != 0: 69 | w[j] = _proj_elastic(eta, t, g_sum[j], g_norms[j], alpha1, alpha2, 70 | delta) 71 | 72 | 73 | def _adagrad_fit(self, 74 | RowDataset X, 75 | np.ndarray[double, ndim=1]y, 76 | np.ndarray[double, ndim=1]coef, 77 | np.ndarray[double, ndim=1]g_sum, 78 | np.ndarray[double, ndim=1]g_norms, 79 | LossFunction loss, 80 | double eta, 81 | double delta, 82 | double alpha1, 83 | double alpha2, 84 | int n_iter, 85 | int shuffle, 86 | callback, 87 | int n_calls, 88 | rng): 89 | 90 | cdef int n_samples = X.get_n_samples() 91 | cdef int n_features = X.get_n_features() 92 | 93 | # Variables 94 | cdef LONG t 95 | cdef int it, ii, i, jj, j 96 | cdef double y_pred, tmp, scale 97 | cdef np.ndarray[int, ndim=1] sindices 98 | sindices = np.arange(n_samples, dtype=np.int32) 99 | cdef int has_callback = callback is not None 100 | 101 | # Data pointers. 102 | cdef double* data 103 | cdef int* indices 104 | cdef int n_nz 105 | 106 | # Pointers 107 | cdef double* w = coef.data 108 | 109 | t = 1 110 | for it in range(n_iter): 111 | 112 | # Shuffle sample indices. 113 | if shuffle: 114 | rng.shuffle(sindices) 115 | 116 | for ii in range(n_samples): 117 | i = sindices[ii] 118 | 119 | # Retrieve sample i. 120 | X.get_row_ptr(i, &indices, &data, &n_nz) 121 | 122 | # Update w lazily. 123 | if t > 1: 124 | for jj in range(n_nz): 125 | j = indices[jj] 126 | if g_norms[j] != 0: 127 | w[j] = _proj_elastic(eta, t - 1, g_sum[j], g_norms[j], 128 | alpha1, alpha2, delta) 129 | 130 | # Make prediction. 131 | y_pred = _pred(data, indices, n_nz, w) 132 | 133 | # A subgradient is given by scale * X[i]. 134 | scale = -loss.get_update(y_pred, y[i]) 135 | 136 | # Update g_sum and g_norms. 137 | if scale != 0: 138 | for jj in range(n_nz): 139 | j = indices[jj] 140 | tmp = scale * data[jj] 141 | g_sum[j] += tmp 142 | g_norms[j] += tmp * tmp 143 | 144 | # Update w by naive implementation: very slow. 145 | # for j in range(n_features): 146 | # w[j] = _proj_elastic(eta, t, g_sum[j], g_norms[j], alpha1, 147 | # alpha2, delta) 148 | 149 | # Callback. 150 | if has_callback and t % n_calls == 0: 151 | ret = callback(self, t) 152 | if ret is not None: 153 | break 154 | 155 | t += 1 156 | 157 | 158 | # Finalize. 159 | _proj_elastic_all(eta, t - 1, g_sum, g_norms, alpha1, alpha2, delta, coef) 160 | -------------------------------------------------------------------------------- /lightning/impl/base.py: -------------------------------------------------------------------------------- 1 | # Author: Mathieu Blondel 2 | # License: BSD 3 | 4 | import numpy as np 5 | import scipy.sparse as sp 6 | import scipy.special 7 | 8 | from sklearn.base import BaseEstimator as _BaseEstimator 9 | from sklearn.base import ClassifierMixin, RegressorMixin 10 | from sklearn.utils.extmath import safe_sparse_dot 11 | from sklearn.preprocessing import LabelBinarizer 12 | from sklearn.preprocessing import LabelEncoder 13 | 14 | from .randomkit import RandomState 15 | 16 | 17 | class BaseEstimator(_BaseEstimator): 18 | 19 | def _get_random_state(self): 20 | return RandomState(seed=self.random_state) 21 | 22 | def n_nonzero(self, percentage=False): 23 | if hasattr(self, "coef_"): 24 | coef = self.coef_ 25 | else: 26 | coef = self.dual_coef_ 27 | 28 | n_nz = np.sum(np.sum(coef != 0, axis=0, dtype=bool)) 29 | 30 | if percentage: 31 | if hasattr(self, "support_vectors_") and \ 32 | self.support_vectors_ is not None: 33 | n_nz /= self.n_samples_ 34 | else: 35 | n_nz /= coef.shape[1] 36 | 37 | return n_nz 38 | 39 | 40 | class BaseClassifier(BaseEstimator, ClassifierMixin): 41 | 42 | @property 43 | def predict_proba(self): 44 | if self.loss not in ("log", "modified_huber"): 45 | raise AttributeError("predict_proba only supported when" 46 | " loss='log' or loss='modified_huber' " 47 | "(%s given)" % self.loss) 48 | return self._predict_proba 49 | 50 | def _predict_proba(self, X): 51 | if len(self.classes_) != 2: 52 | raise NotImplementedError("predict_proba only supported" 53 | " for binary classification") 54 | if self.loss == "log": 55 | df = self.decision_function(X).ravel() 56 | prob = scipy.special.expit(df) 57 | elif self.loss == "modified_huber": 58 | df = self.decision_function(X).ravel() 59 | prob = np.minimum(1, np.maximum(-1, df)) 60 | prob += 1 61 | prob /= 2 62 | else: 63 | raise NotImplementedError("predict_proba only supported when" 64 | " loss='log' or loss='modified_huber' " 65 | "(%s given)" % self.loss) 66 | 67 | out = np.zeros((X.shape[0], 2), dtype=np.float64) 68 | out[:, 1] = prob 69 | out[:, 0] = 1 - prob 70 | 71 | return out 72 | 73 | def _set_label_transformers(self, y, reencode=False, neg_label=-1): 74 | if reencode: 75 | self.label_encoder_ = LabelEncoder() 76 | y = self.label_encoder_.fit_transform(y).astype(np.int32) 77 | else: 78 | y = y.astype(np.int32) 79 | 80 | self.label_binarizer_ = LabelBinarizer(neg_label=neg_label, 81 | pos_label=1) 82 | self.label_binarizer_.fit(y) 83 | self.classes_ = self.label_binarizer_.classes_.astype(np.int32) 84 | n_classes = len(self.label_binarizer_.classes_) 85 | n_vectors = 1 if n_classes <= 2 else n_classes 86 | 87 | return y, n_classes, n_vectors 88 | 89 | def decision_function(self, X): 90 | pred = safe_sparse_dot(X, self.coef_.T) 91 | if hasattr(self, "intercept_"): 92 | pred += self.intercept_ 93 | return pred 94 | 95 | def predict(self, X): 96 | pred = self.decision_function(X) 97 | out = self.label_binarizer_.inverse_transform(pred) 98 | 99 | if hasattr(self, "label_encoder_"): 100 | out = self.label_encoder_.inverse_transform(out) 101 | 102 | return out 103 | 104 | 105 | class BaseRegressor(BaseEstimator, RegressorMixin): 106 | 107 | def predict(self, X): 108 | pred = safe_sparse_dot(X, self.coef_.T) 109 | 110 | if hasattr(self, "intercept_"): 111 | pred += self.intercept_ 112 | 113 | if not self.outputs_2d_: 114 | pred = pred.ravel() 115 | 116 | return pred 117 | -------------------------------------------------------------------------------- /lightning/impl/dataset_fast.pxd: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # cython: cdivision=True 3 | # cython: boundscheck=False 4 | # cython: wraparound=False 5 | # cython: language_level=3 6 | # 7 | # Author: Mathieu Blondel 8 | # License: BSD 9 | 10 | cdef class Dataset: 11 | 12 | cdef int n_samples 13 | cdef int n_features 14 | 15 | cpdef int get_n_samples(self) 16 | cpdef int get_n_features(self) 17 | 18 | 19 | cdef class RowDataset(Dataset): 20 | 21 | cdef void get_row_ptr(self, 22 | int i, 23 | int** indices, 24 | double** data, 25 | int* n_nz) nogil 26 | 27 | cpdef get_row(self, int i) 28 | 29 | 30 | cdef class ColumnDataset(Dataset): 31 | 32 | cdef void get_column_ptr(self, 33 | int j, 34 | int** indices, 35 | double** data, 36 | int* n_nz) nogil 37 | 38 | cpdef get_column(self, int j) 39 | 40 | 41 | cdef class ContiguousDataset(RowDataset): 42 | 43 | cdef int* indices 44 | cdef double* data 45 | cdef object X 46 | 47 | cdef void get_row_ptr(self, 48 | int i, 49 | int** indices, 50 | double** data, 51 | int* n_nz) nogil 52 | 53 | 54 | cdef class FortranDataset(ColumnDataset): 55 | 56 | cdef int* indices 57 | cdef double* data 58 | cdef object X 59 | 60 | cdef void get_column_ptr(self, 61 | int j, 62 | int** indices, 63 | double** data, 64 | int* n_nz) nogil 65 | 66 | 67 | cdef class CSRDataset(RowDataset): 68 | 69 | cdef int* indices 70 | cdef double* data 71 | cdef int* indptr 72 | cdef object X 73 | 74 | cdef void get_row_ptr(self, 75 | int i, 76 | int** indices, 77 | double** data, 78 | int* n_nz) nogil 79 | 80 | 81 | cdef class CSCDataset(ColumnDataset): 82 | 83 | cdef int* indices 84 | cdef double* data 85 | cdef int* indptr 86 | cdef object X 87 | 88 | cdef void get_column_ptr(self, 89 | int j, 90 | int** indices, 91 | double** data, 92 | int* n_nz) nogil 93 | -------------------------------------------------------------------------------- /lightning/impl/dataset_fast.pyx: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # cython: cdivision=True 3 | # cython: boundscheck=False 4 | # cython: wraparound=False 5 | # cython: language_level=3 6 | # 7 | # Author: Mathieu Blondel 8 | # License: BSD 9 | 10 | from libc cimport stdlib 11 | 12 | import numpy as np 13 | cimport numpy as np 14 | np.import_array() 15 | 16 | import scipy.sparse as sp 17 | 18 | cdef class Dataset: 19 | 20 | cpdef int get_n_samples(self): 21 | return self.n_samples 22 | 23 | cpdef int get_n_features(self): 24 | return self.n_features 25 | 26 | 27 | cdef class RowDataset(Dataset): 28 | 29 | cdef void get_row_ptr(self, 30 | int i, 31 | int** indices, 32 | double** data, 33 | int* n_nz) nogil: 34 | pass 35 | 36 | cpdef get_row(self, int i): 37 | cdef double* data 38 | cdef int* indices 39 | cdef int n_nz 40 | cdef np.npy_intp shape[1] 41 | 42 | self.get_row_ptr(i, &indices, &data, &n_nz) 43 | 44 | shape[0] = self.n_features 45 | indices_ = np.PyArray_SimpleNewFromData(1, shape, np.NPY_INT, indices) 46 | data_ = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, data) 47 | 48 | return indices_, data_, n_nz 49 | 50 | 51 | cdef class ColumnDataset(Dataset): 52 | 53 | cdef void get_column_ptr(self, 54 | int j, 55 | int** indices, 56 | double** data, 57 | int* n_nz) nogil: 58 | pass 59 | 60 | cpdef get_column(self, int j): 61 | cdef double* data 62 | cdef int* indices 63 | cdef int n_nz 64 | cdef np.npy_intp shape[1] 65 | 66 | self.get_column_ptr(j, &indices, &data, &n_nz) 67 | 68 | shape[0] = self.n_samples 69 | indices_ = np.PyArray_SimpleNewFromData(1, shape, np.NPY_INT, indices) 70 | data_ = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, data) 71 | 72 | return indices_, data_, n_nz 73 | 74 | 75 | cdef class ContiguousDataset(RowDataset): 76 | 77 | def __init__(self, np.ndarray[double, ndim=2, mode='c'] X): 78 | self.n_samples = X.shape[0] 79 | self.n_features = X.shape[1] 80 | self.data = X.data 81 | self.X = X 82 | 83 | def __cinit__(self, np.ndarray[double, ndim=2, mode='c'] X): 84 | cdef int i 85 | cdef int n_features = X.shape[1] 86 | self.indices = stdlib.malloc(sizeof(int) * n_features) 87 | for j in range(n_features): 88 | self.indices[j] = j 89 | 90 | def __dealloc__(self): 91 | stdlib.free(self.indices) 92 | 93 | # This is used to reconstruct the object in order to make it picklable. 94 | def __reduce__(self): 95 | return (ContiguousDataset, (self.X, )) 96 | 97 | cdef void get_row_ptr(self, 98 | int i, 99 | int** indices, 100 | double** data, 101 | int* n_nz) nogil: 102 | indices[0] = self.indices 103 | data[0] = self.data + i * self.n_features 104 | n_nz[0] = self.n_features 105 | 106 | 107 | cdef class FortranDataset(ColumnDataset): 108 | 109 | def __init__(self, np.ndarray[double, ndim=2, mode='fortran'] X): 110 | self.n_samples = X.shape[0] 111 | self.n_features = X.shape[1] 112 | self.data = X.data 113 | self.X = X 114 | 115 | def __cinit__(self, np.ndarray[double, ndim=2, mode='fortran'] X): 116 | cdef int i 117 | cdef int n_samples = X.shape[0] 118 | self.indices = stdlib.malloc(sizeof(int) * n_samples) 119 | for i in range(n_samples): 120 | self.indices[i] = i 121 | 122 | def __dealloc__(self): 123 | stdlib.free(self.indices) 124 | 125 | # This is used to reconstruct the object in order to make it picklable. 126 | def __reduce__(self): 127 | return (FortranDataset, (self.X, )) 128 | 129 | cdef void get_column_ptr(self, 130 | int j, 131 | int** indices, 132 | double** data, 133 | int* n_nz) nogil: 134 | indices[0] = self.indices 135 | data[0] = self.data + j * self.n_samples 136 | n_nz[0] = self.n_samples 137 | 138 | 139 | cdef class CSRDataset(RowDataset): 140 | 141 | def __init__(self, X): 142 | cdef np.ndarray[double, ndim=1, mode='c'] X_data = X.data 143 | cdef np.ndarray[int, ndim=1, mode='c'] X_indices = X.indices 144 | cdef np.ndarray[int, ndim=1, mode='c'] X_indptr = X.indptr 145 | 146 | self.n_samples = X.shape[0] 147 | self.n_features = X.shape[1] 148 | self.data = X_data.data 149 | self.indices = X_indices.data 150 | self.indptr = X_indptr.data 151 | 152 | self.X = X 153 | 154 | # This is used to reconstruct the object in order to make it picklable. 155 | def __reduce__(self): 156 | return (CSRDataset, (self.X, )) 157 | 158 | cdef void get_row_ptr(self, 159 | int i, 160 | int** indices, 161 | double** data, 162 | int* n_nz) nogil: 163 | indices[0] = self.indices + self.indptr[i] 164 | data[0] = self.data + self.indptr[i] 165 | n_nz[0] = self.indptr[i + 1] - self.indptr[i] 166 | 167 | 168 | cdef class CSCDataset(ColumnDataset): 169 | 170 | def __init__(self, X): 171 | cdef np.ndarray[double, ndim=1, mode='c'] X_data = X.data 172 | cdef np.ndarray[int, ndim=1, mode='c'] X_indices = X.indices 173 | cdef np.ndarray[int, ndim=1, mode='c'] X_indptr = X.indptr 174 | 175 | self.n_samples = X.shape[0] 176 | self.n_features = X.shape[1] 177 | self.data = X_data.data 178 | self.indices = X_indices.data 179 | self.indptr = X_indptr.data 180 | 181 | self.X = X 182 | 183 | # This is used to reconstruct the object in order to make it picklable. 184 | def __reduce__(self): 185 | return (CSCDataset, (self.X, )) 186 | 187 | cdef void get_column_ptr(self, 188 | int j, 189 | int** indices, 190 | double** data, 191 | int* n_nz) nogil: 192 | indices[0] = self.indices + self.indptr[j] 193 | data[0] = self.data + self.indptr[j] 194 | n_nz[0] = self.indptr[j + 1] - self.indptr[j] 195 | 196 | 197 | def get_dataset(X, order="c"): 198 | if isinstance(X, Dataset): 199 | return X 200 | 201 | if sp.isspmatrix(X): 202 | if order == "fortran": 203 | X = X.tocsc() 204 | ds = CSCDataset(X) 205 | else: 206 | X = X.tocsr() 207 | ds = CSRDataset(X) 208 | else: 209 | if order == "fortran": 210 | X = np.asfortranarray(X, dtype=np.float64) 211 | ds = FortranDataset(X) 212 | else: 213 | X = np.ascontiguousarray(X, dtype=np.float64) 214 | ds = ContiguousDataset(X) 215 | return ds 216 | -------------------------------------------------------------------------------- /lightning/impl/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/lightning/dbbe833963280e675c124bbd5caadfcb13d89bd7/lightning/impl/datasets/__init__.py -------------------------------------------------------------------------------- /lightning/impl/datasets/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/lightning/dbbe833963280e675c124bbd5caadfcb13d89bd7/lightning/impl/datasets/tests/__init__.py -------------------------------------------------------------------------------- /lightning/impl/datasets/tests/test_samples_generator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from lightning.impl.datasets.samples_generator import make_nn_regression 4 | 5 | 6 | def test_make_nn_regression(): 7 | X, y, w = make_nn_regression(n_samples=10, n_features=50, n_informative=5) 8 | assert X.shape[0] == 10 9 | assert X.shape[1] == 50 10 | assert y.shape[0] == 10 11 | assert w.shape[0] == 50 12 | assert np.sum(X.data != 0) == 10 * 5 13 | 14 | X, y, w = make_nn_regression(n_samples=10, n_features=50, n_informative=50) 15 | assert np.sum(X.data != 0) == 10 * 50 16 | -------------------------------------------------------------------------------- /lightning/impl/datasets/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def make_ovo(X, y, class1, class2): 4 | classes = np.unique(y) 5 | 6 | if len(y) <= 2: 7 | return X, y 8 | 9 | c1 = classes[class1] 10 | c2 = classes[class2] 11 | cond = np.logical_or(y == classes[c1], y == classes[c2]) 12 | y = y[cond] 13 | y[y == c1] = 0 14 | y[y == c2] = 1 15 | ind = np.arange(X.shape[0]) 16 | 17 | return X[ind[cond]], y 18 | -------------------------------------------------------------------------------- /lightning/impl/dual_cd.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================================== 3 | Dual Coordinate Descent Solvers 4 | ========================================== 5 | 6 | This module provides coordinate descent solvers for support vector machines 7 | (SVMs) and support vector regression (SVR) with L2 regularization. 8 | """ 9 | 10 | # Author: Mathieu Blondel 11 | # License: BSD 12 | 13 | import numpy as np 14 | 15 | from sklearn.preprocessing import add_dummy_feature 16 | 17 | from .base import BaseClassifier, BaseRegressor 18 | from .dataset_fast import get_dataset 19 | from .dual_cd_fast import _dual_cd 20 | from .dual_cd_fast import _dual_cd_auc 21 | from .dual_cd_fast import _dual_cd_svr 22 | 23 | 24 | class LinearSVC(BaseClassifier): 25 | r"""Estimator for learning linear support vector machine by coordinate 26 | descent in the dual. 27 | 28 | Parameters 29 | ---------- 30 | loss : str, 'hinge', 'squared_hinge' 31 | The loss function to be used. 32 | 33 | criterion : str, 'accuracy', 'auc' 34 | Whether to optimize for classification accuracy or AUC. 35 | 36 | C : float 37 | Weight of the loss term. 38 | 39 | max_iter : int 40 | Maximum number of iterations to perform. 41 | 42 | tol : float 43 | Tolerance of the stopping criterion. 44 | 45 | shrinking : bool 46 | Whether to activate shrinking or not. 47 | 48 | warm_start : bool 49 | Whether to activate warm-start or not. 50 | 51 | permute : bool 52 | Whether to permute coordinates or not before cycling. 53 | 54 | callback : callable 55 | Callback function. 56 | 57 | n_calls : int 58 | Frequency with which `callback` must be called. 59 | 60 | random_state : RandomState or int 61 | The seed of the pseudo random number generator to use. 62 | 63 | verbose : int 64 | Verbosity level. 65 | 66 | Examples 67 | -------- 68 | The following example demonstrates how to learn a classification 69 | model: 70 | 71 | >>> from sklearn.datasets import fetch_20newsgroups_vectorized 72 | >>> from lightning.classification import LinearSVC 73 | >>> bunch = fetch_20newsgroups_vectorized(subset="all") 74 | >>> X, y = bunch.data, bunch.target 75 | >>> clf = LinearSVC().fit(X, y) 76 | >>> accuracy = clf.score(X, y) 77 | """ 78 | 79 | def __init__(self, C=1.0, loss="hinge", criterion="accuracy", 80 | max_iter=1000, tol=1e-3, 81 | permute=True, shrinking=True, warm_start=False, 82 | random_state=None, callback=None, n_calls=100, verbose=0): 83 | self.C = C 84 | self.loss = loss 85 | self.criterion = criterion 86 | self.max_iter = max_iter 87 | self.tol = tol 88 | self.permute = permute 89 | self.shrinking = shrinking 90 | self.warm_start = warm_start 91 | self.random_state = random_state 92 | self.callback = callback 93 | self.n_calls = n_calls 94 | self.verbose = verbose 95 | self.coef_ = None 96 | 97 | def _get_loss(self): 98 | loss = {"l1": 1, 99 | "hinge": 1, 100 | "l2": 2, 101 | "squared_hinge": 2} 102 | return loss[self.loss] 103 | 104 | def fit(self, X, y): 105 | """Fit model according to X and y. 106 | 107 | Parameters 108 | ---------- 109 | X : array-like, shape = [n_samples, n_features] 110 | Training vectors, where n_samples is the number of samples 111 | and n_features is the number of features. 112 | 113 | y : array-like, shape = [n_samples] 114 | Target values. 115 | 116 | Returns 117 | ------- 118 | self : classifier 119 | Returns self. 120 | """ 121 | n_samples, n_features = X.shape 122 | rs = self._get_random_state() 123 | 124 | self._set_label_transformers(y) 125 | Y = np.asfortranarray(self.label_binarizer_.transform(y), 126 | dtype=np.float64) 127 | n_vectors = Y.shape[1] 128 | 129 | ds = get_dataset(X) 130 | 131 | if not self.warm_start or self.coef_ is None: 132 | self.coef_ = np.zeros((n_vectors, n_features), dtype=np.float64) 133 | if self.criterion == "accuracy": 134 | self.dual_coef_ = np.zeros((n_vectors, n_samples), 135 | dtype=np.float64) 136 | 137 | for i in range(n_vectors): 138 | if self.criterion == "accuracy": 139 | _dual_cd(self, self.coef_[i], self.dual_coef_[i], 140 | ds, Y[:, i], self.permute, 141 | self.C, self._get_loss(), self.max_iter, rs, self.tol, 142 | self.shrinking, self.callback, self.n_calls, 143 | verbose=self.verbose) 144 | else: 145 | _dual_cd_auc(self, self.coef_[i], ds, Y[:, i], 146 | self.C, self._get_loss(), self.max_iter, rs, 147 | self.verbose) 148 | 149 | return self 150 | 151 | 152 | class LinearSVR(BaseRegressor): 153 | r"""Estimator for learning a linear support vector regressor by coordinate 154 | descent in the dual. 155 | 156 | Parameters 157 | ---------- 158 | loss : str, 'epsilon_insensitive', 'squared_epsilon_insensitive' 159 | The loss function to be used. 160 | 161 | C : float 162 | Weight of the loss term. 163 | 164 | epsilon : float 165 | Parameter of the epsilon-insensitive loss. 166 | 167 | max_iter : int 168 | Maximum number of iterations to perform. 169 | 170 | tol : float 171 | Tolerance of the stopping criterion. 172 | 173 | fit_intercept : bool 174 | Whether to fit an intercept term or not. 175 | 176 | warm_start : bool 177 | Whether to activate warm-start or not. 178 | 179 | permute : bool 180 | Whether to permute coordinates or not before cycling. 181 | 182 | callback : callable 183 | Callback function. 184 | 185 | n_calls : int 186 | Frequency with which `callback` must be called. 187 | 188 | random_state : RandomState or int 189 | The seed of the pseudo random number generator to use. 190 | 191 | verbose : int 192 | Verbosity level. 193 | """ 194 | 195 | def __init__(self, C=1.0, epsilon=0, loss="epsilon_insensitive", 196 | max_iter=1000, tol=1e-3, fit_intercept=False, 197 | permute=True, warm_start=False, 198 | random_state=None, callback=None, n_calls=100, verbose=0): 199 | self.C = C 200 | self.epsilon = epsilon 201 | self.loss = loss 202 | self.max_iter = max_iter 203 | self.tol = tol 204 | self.fit_intercept = fit_intercept 205 | self.permute = permute 206 | self.warm_start = warm_start 207 | self.random_state = random_state 208 | self.callback = callback 209 | self.n_calls = n_calls 210 | self.verbose = verbose 211 | self.coef_ = None 212 | 213 | def _get_loss(self): 214 | loss = {"l1": 1, 215 | "epsilon_insensitive": 1, 216 | "l2": 2, 217 | "squared_epsilon_insensitive": 2} 218 | return loss[self.loss] 219 | 220 | def fit(self, X, y): 221 | """Fit model according to X and y. 222 | 223 | Parameters 224 | ---------- 225 | X : array-like, shape = [n_samples, n_features] 226 | Training vectors, where n_samples is the number of samples 227 | and n_features is the number of features. 228 | 229 | y : array-like, shape = [n_samples] 230 | Target values. 231 | 232 | Returns 233 | ------- 234 | self : regressor 235 | Returns self. 236 | """ 237 | if self.fit_intercept: 238 | X = add_dummy_feature(X) 239 | 240 | n_samples, n_features = X.shape 241 | rs = self._get_random_state() 242 | 243 | self.outputs_2d_ = len(y.shape) == 2 244 | if self.outputs_2d_: 245 | Y = y 246 | else: 247 | Y = y.reshape(-1, 1) 248 | Y = np.asfortranarray(Y) 249 | n_vectors = Y.shape[1] 250 | 251 | ds = get_dataset(X) 252 | 253 | if not self.warm_start or self.coef_ is None: 254 | self.coef_ = np.zeros((n_vectors, n_features), dtype=np.float64) 255 | self.dual_coef_ = np.zeros((n_vectors, n_samples), 256 | dtype=np.float64) 257 | 258 | for i in range(n_vectors): 259 | _dual_cd_svr(self, self.coef_[i], self.dual_coef_[i], 260 | ds, Y[:, i], self.permute, 261 | self.C, self.epsilon, self._get_loss(), 262 | self.max_iter, rs, self.tol, 263 | self.callback, self.n_calls, 264 | verbose=self.verbose) 265 | 266 | if self.fit_intercept: 267 | self.intercept_ = self.coef_[:, 0] 268 | self.coef_ = self.coef_[:, 1:] 269 | 270 | return self 271 | -------------------------------------------------------------------------------- /lightning/impl/loss_fast.pyx: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # cython: cdivision=True 3 | # cython: boundscheck=False 4 | # cython: wraparound=False 5 | # cython: language_level=3 6 | # 7 | # Author: Mathieu Blondel 8 | # License: BSD 9 | 10 | import numpy as np 11 | cimport numpy as np 12 | 13 | from libc.math cimport exp, log, sqrt 14 | 15 | from lightning.impl.dataset_fast cimport RowDataset 16 | 17 | DEF LOWER = 1e-2 18 | DEF UPPER = 1e9 19 | 20 | 21 | cdef double _l2_norm_sums(RowDataset X, int squared): 22 | cdef int i, j, jj 23 | cdef int n_samples = X.get_n_samples() 24 | cdef double norm, G = 0 25 | 26 | cdef double* data 27 | cdef int* indices 28 | cdef int n_nz 29 | 30 | for i in range(n_samples): 31 | X.get_row_ptr(i, &indices, &data, &n_nz) 32 | 33 | norm = 0 34 | for jj in range(n_nz): 35 | norm += data[jj] * data[jj] 36 | 37 | if squared: 38 | G += norm 39 | else: 40 | G += sqrt(norm) 41 | 42 | return G 43 | 44 | 45 | cdef class Squared: 46 | 47 | cpdef gradient(self, 48 | np.ndarray[double, ndim=2] df, 49 | RowDataset X, 50 | np.ndarray[double, ndim=2, mode='fortran'] y, 51 | np.ndarray[double, ndim=2, mode='c'] G): 52 | 53 | cdef double* data 54 | cdef int* indices 55 | cdef int n_nz 56 | 57 | cdef int n_samples = df.shape[0] 58 | cdef int n_vectors = df.shape[1] 59 | cdef int i, k, j, jj 60 | cdef double residual 61 | 62 | for i in range(n_samples): 63 | for k in range(n_vectors): 64 | residual = y[i, k] - df[i, k] 65 | X.get_row_ptr(i, &indices, &data, &n_nz) 66 | for jj in range(n_nz): 67 | j = indices[jj] 68 | G[k, j] -= residual * data[jj] 69 | 70 | cpdef objective(self, 71 | np.ndarray[double, ndim=2] df, 72 | np.ndarray[double, ndim=2, mode='fortran'] y): 73 | 74 | cdef int n_samples = df.shape[0] 75 | cdef int n_vectors = df.shape[1] 76 | 77 | cdef int i, k 78 | cdef double obj, residual 79 | 80 | obj = 0 81 | 82 | for i in range(n_samples): 83 | for k in range(n_vectors): 84 | residual = y[i, k] - df[i, k] 85 | obj += residual * residual 86 | 87 | return 0.5 * obj 88 | 89 | cpdef double lipschitz_constant(self, RowDataset X, int n_vectors): 90 | return 1.0 # TODO: return maximum eigen value of X^T X 91 | 92 | 93 | cdef class SquaredHinge: 94 | 95 | cpdef gradient(self, 96 | np.ndarray[double, ndim=2] df, 97 | RowDataset X, 98 | np.ndarray[double, ndim=2, mode='fortran'] y, 99 | np.ndarray[double, ndim=2, mode='c'] G): 100 | 101 | cdef double* data 102 | cdef int* indices 103 | cdef int n_nz 104 | 105 | cdef int n_samples = df.shape[0] 106 | cdef int n_vectors = df.shape[1] 107 | cdef int i, k, j, jj 108 | cdef double tmp 109 | 110 | for i in range(n_samples): 111 | for k in range(n_vectors): 112 | tmp = 1 - y[i, k] * df[i, k] 113 | if tmp > 0: 114 | tmp *= 2 * y[i, k] 115 | X.get_row_ptr(i, &indices, &data, &n_nz) 116 | for jj in range(n_nz): 117 | j = indices[jj] 118 | G[k, j] -= tmp * data[jj] 119 | 120 | cpdef objective(self, 121 | np.ndarray[double, ndim=2] df, 122 | np.ndarray[double, ndim=2, mode='fortran'] y): 123 | 124 | cdef int n_samples = df.shape[0] 125 | cdef int n_vectors = df.shape[1] 126 | 127 | cdef int i, k 128 | cdef double obj, value 129 | 130 | obj = 0 131 | 132 | for i in range(n_samples): 133 | for k in range(n_vectors): 134 | value = max(1 - y[i, k] * df[i, k], 0) 135 | obj += value * value 136 | 137 | return obj 138 | 139 | cpdef double lipschitz_constant(self, RowDataset X, int n_vectors): 140 | return 2 * n_vectors * _l2_norm_sums(X, True) 141 | 142 | 143 | cdef class MulticlassSquaredHinge: 144 | 145 | cpdef gradient(self, 146 | np.ndarray[double, ndim=2] df, 147 | RowDataset X, 148 | np.ndarray[int, ndim=1, mode='c'] y, 149 | np.ndarray[double, ndim=2, mode='c'] G): 150 | 151 | cdef double* data 152 | cdef int* indices 153 | cdef int n_nz 154 | 155 | cdef int n_samples = df.shape[0] 156 | cdef int n_vectors = df.shape[1] 157 | cdef int i, k, j, jj 158 | cdef double update, tmp 159 | 160 | for i in range(n_samples): 161 | X.get_row_ptr(i, &indices, &data, &n_nz) 162 | 163 | for k in range(n_vectors): 164 | if y[i] == k: 165 | continue 166 | 167 | update = max(1 - df[i, y[i]] + df[i, k], 0) 168 | if update != 0: 169 | update *= 2 170 | for jj in range(n_nz): 171 | j = indices[jj] 172 | tmp = update * data[jj] 173 | G[y[i], j] -= tmp 174 | G[k, j] += tmp 175 | 176 | cpdef objective(self, 177 | np.ndarray[double, ndim=2] df, 178 | np.ndarray[int, ndim=1, mode='c'] y): 179 | 180 | cdef int n_samples = df.shape[0] 181 | cdef int n_vectors = df.shape[1] 182 | 183 | cdef int i, k 184 | cdef double obj, value 185 | 186 | obj = 0 187 | 188 | for i in range(n_samples): 189 | for k in range(n_vectors): 190 | if y[i] == k: 191 | continue 192 | value = max(1 - df[i, y[i]] + df[i, k], 0) 193 | obj += value * value 194 | 195 | return obj 196 | 197 | cpdef double lipschitz_constant(self, RowDataset X, int n_vectors): 198 | return 4 * (n_vectors - 1) * _l2_norm_sums(X, True) 199 | 200 | 201 | cdef class MulticlassLog: 202 | 203 | cdef int margin 204 | 205 | def __init__(self, int margin=0): 206 | self.margin = margin 207 | 208 | cpdef gradient(self, 209 | np.ndarray[double, ndim=2] df, 210 | RowDataset X, 211 | np.ndarray[int, ndim=1, mode='c'] y, 212 | np.ndarray[double, ndim=2, mode='c'] G): 213 | 214 | cdef double* data 215 | cdef int* indices 216 | cdef int n_nz 217 | 218 | cdef int n_samples = df.shape[0] 219 | cdef int n_vectors = df.shape[1] 220 | cdef int i, k, j, jj 221 | cdef double tmp, Z 222 | cdef np.ndarray[double, ndim=1, mode='c'] scores 223 | scores = np.zeros(n_vectors, dtype=np.float64) 224 | 225 | for i in range(n_samples): 226 | X.get_row_ptr(i, &indices, &data, &n_nz) 227 | 228 | Z = 0 229 | for k in range(n_vectors): 230 | tmp = df[i, k] - df[i, y[i]] 231 | if self.margin and k != y[i]: 232 | tmp += 1 233 | tmp = exp(tmp) 234 | scores[k] = tmp 235 | Z += tmp 236 | 237 | for k in range(n_vectors): 238 | tmp = scores[k] / Z 239 | if k == y[i]: 240 | tmp -= 1 241 | 242 | for jj in range(n_nz): 243 | j = indices[jj] 244 | G[k, j] += tmp * data[jj] 245 | 246 | cpdef objective(self, 247 | np.ndarray[double, ndim=2] df, 248 | np.ndarray[int, ndim=1, mode='c'] y): 249 | 250 | cdef int n_samples = df.shape[0] 251 | cdef int n_vectors = df.shape[1] 252 | 253 | cdef int i, k 254 | cdef double obj, s 255 | 256 | obj = 0 257 | 258 | for i in range(n_samples): 259 | s = 1 260 | for k in range(n_vectors): 261 | tmp = df[i, k] - df[i, y[i]] 262 | if self.margin and k != y[i]: 263 | tmp += 1 264 | s += exp(tmp) 265 | obj += log(s) 266 | 267 | return obj 268 | 269 | cpdef double lipschitz_constant(self, RowDataset X, int n_vectors): 270 | return 0.5 * _l2_norm_sums(X, True) 271 | -------------------------------------------------------------------------------- /lightning/impl/penalty.py: -------------------------------------------------------------------------------- 1 | # Author: Mathieu Blondel 2 | # License: BSD 3 | 4 | import numpy as np 5 | from scipy.linalg import svd 6 | from lightning.impl.prox_fast import prox_tv1d 7 | 8 | 9 | class L1Penalty(object): 10 | 11 | def projection(self, coef, alpha, L): 12 | return np.sign(coef) * np.maximum(np.abs(coef) - alpha / L, 0) 13 | 14 | def regularization(self, coef): 15 | return np.sum(np.abs(coef)) 16 | 17 | 18 | class L1L2Penalty(object): 19 | 20 | def projection(self, coef, alpha, L): 21 | l2norms = np.sqrt(np.sum(coef ** 2, axis=0)) 22 | scales = np.maximum(1.0 - alpha / (L * l2norms), 0) 23 | coef *= scales 24 | return coef 25 | 26 | def regularization(self, coef): 27 | return np.sum(np.sqrt(np.sum(coef ** 2, axis=0))) 28 | 29 | 30 | class TracePenalty(object): 31 | 32 | def projection(self, coef, alpha, L): 33 | U, s, V = svd(coef, full_matrices=False) 34 | s = np.maximum(s - alpha / L, 0) 35 | #return np.dot(np.dot(U, np.diag(s)), V) 36 | U *= s 37 | return np.dot(U, V) 38 | 39 | def regularization(self, coef): 40 | U, s, V = svd(coef, full_matrices=False) 41 | return np.sum(s) 42 | 43 | 44 | class NNConstraint(object): 45 | 46 | def projection(self, coef, alpha, L): 47 | return np.maximum(0, coef) 48 | 49 | def regularization(self, coef): 50 | return 0 51 | 52 | 53 | # See https://gist.github.com/mblondel/6f3b7aaad90606b98f71 54 | # for more algorithms. 55 | def project_simplex(v, z=1): 56 | if np.sum(v) <= z: 57 | return v 58 | 59 | n_features = v.shape[0] 60 | u = np.sort(v)[::-1] 61 | cssv = np.cumsum(u) - z 62 | ind = np.arange(n_features) + 1 63 | cond = u - cssv / ind > 0 64 | rho = ind[cond][-1] 65 | theta = cssv[cond][-1] / rho 66 | w = np.maximum(v - theta, 0) 67 | return w 68 | 69 | 70 | class SimplexConstraint(object): 71 | 72 | def projection(self, coef, alpha, L): 73 | return project_simplex(coef[0]).reshape(1,-1) 74 | 75 | def regularization(self, coef): 76 | return 0 77 | 78 | 79 | def project_l1_ball(v, z=1): 80 | return np.sign(v) * project_simplex(np.abs(v), z) 81 | 82 | 83 | class L1BallConstraint(object): 84 | 85 | def projection(self, coef, alpha, L): 86 | return project_l1_ball(coef[0], alpha).reshape(1,-1) 87 | 88 | def regularization(self, coef): 89 | return 0 90 | 91 | 92 | class TotalVariation1DPenalty(object): 93 | def projection(self, coef, alpha, L): 94 | tmp = coef.copy() 95 | for i in range(tmp.shape[0]): 96 | prox_tv1d(tmp[i, :], alpha / L) # operates inplace 97 | return tmp 98 | 99 | def regularization(self, coef): 100 | return np.sum(np.abs(np.diff(coef))) 101 | -------------------------------------------------------------------------------- /lightning/impl/prank.py: -------------------------------------------------------------------------------- 1 | # Author: Mathieu Blondel 2 | # License: BSD 3 | 4 | import numpy as np 5 | 6 | from sklearn.preprocessing import LabelEncoder 7 | from sklearn.utils.extmath import safe_sparse_dot 8 | from sklearn.metrics.pairwise import pairwise_kernels 9 | 10 | from .base import BaseEstimator 11 | from .dataset_fast import get_dataset 12 | from .prank_fast import _prank_fit 13 | from .prank_fast import _prank_fit_kernel 14 | from .prank_fast import _prank_predict 15 | 16 | 17 | class _BasePRank(BaseEstimator): 18 | 19 | def score(self, X, y): 20 | y_pred = self.predict(X) 21 | return np.mean(np.abs(y - y_pred)) 22 | 23 | @property 24 | def classes_(self): 25 | return self._label_encoder.classes_ 26 | 27 | 28 | class PRank(_BasePRank): 29 | r"""Online algorithm for learning an ordinal regression model. 30 | 31 | Parameters 32 | ---------- 33 | n_iter : int 34 | Number of iterations to run. 35 | 36 | shuffle : boolean 37 | Whether to shuffle data. 38 | 39 | random_state : RandomState or int 40 | The seed of the pseudo random number generator to use. 41 | 42 | Attributes 43 | ---------- 44 | coef_ : array, shape=[n_features] 45 | Estimated weights. 46 | 47 | thresholds_ : array, shape=[n_classes] 48 | Estimated thresholds. 49 | 50 | References 51 | ---------- 52 | Pranking with Ranking 53 | Koby Crammer, Yoram Singer 54 | NIPS 2001 55 | """ 56 | 57 | def __init__(self, n_iter=10, shuffle=True, random_state=None): 58 | self.n_iter = n_iter 59 | self.shuffle = shuffle 60 | self.random_state = random_state 61 | 62 | def fit(self, X, y): 63 | """Fit model according to X and y. 64 | 65 | Parameters 66 | ---------- 67 | X : array-like, shape = [n_samples, n_features] 68 | Training vectors, where n_samples is the number of samples 69 | and n_features is the number of features. 70 | 71 | y : array-like, shape = [n_samples] 72 | Target values. 73 | 74 | Returns 75 | ------- 76 | self : classifier 77 | Returns self. 78 | """ 79 | n_samples, n_features = X.shape 80 | rs = self._get_random_state() 81 | 82 | self._label_encoder = LabelEncoder() 83 | y = self._label_encoder.fit_transform(y).astype(np.int32) 84 | n_classes = len(self.classes_) 85 | 86 | 87 | self.coef_ = np.zeros(n_features, dtype=np.float64) 88 | self.thresholds_ = np.zeros(n_classes, dtype=np.float64) 89 | self.thresholds_[-1] = np.inf 90 | 91 | ds = get_dataset(X) 92 | 93 | _prank_fit(self.coef_, self.thresholds_, ds, y, n_classes, 94 | self.n_iter, rs, self.shuffle) 95 | 96 | return self 97 | 98 | def predict(self, X): 99 | n_samples = X.shape[0] 100 | dot = safe_sparse_dot(X, self.coef_) 101 | out = np.zeros(n_samples, dtype=np.int32) 102 | _prank_predict(dot, self.thresholds_, len(self.classes_), out) 103 | return self._label_encoder.inverse_transform(out) 104 | 105 | 106 | class KernelPRank(_BasePRank): 107 | r"""Kernelized online algorithm for learning an ordinal regression model. 108 | 109 | Parameters 110 | ---------- 111 | n_iter : int 112 | Number of iterations to run. 113 | 114 | shuffle : boolean 115 | Whether to shuffle data. 116 | 117 | random_state : RandomState or int 118 | The seed of the pseudo random number generator to use. 119 | 120 | kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed" 121 | Kernel. 122 | Default: "linear" 123 | 124 | degree : int, default=3 125 | Degree for poly kernels. Ignored by other kernels. 126 | 127 | gamma : float, optional 128 | Kernel coefficient for rbf and poly kernels. Default: 1/n_features. 129 | Ignored by other kernels. 130 | 131 | coef0 : float, optional 132 | Independent term in poly and sigmoid kernels. 133 | Ignored by other kernels. 134 | 135 | kernel_params : mapping of string to any, optional 136 | Parameters (keyword arguments) and values for kernel passed as 137 | callable object. Ignored by other kernels. 138 | 139 | Attributes 140 | ---------- 141 | dual_coef_ : array, shape=[n_samples] 142 | Estimated weights. 143 | 144 | thresholds_ : array, shape=[n_classes] 145 | Estimated thresholds. 146 | 147 | References 148 | ---------- 149 | Pranking with Ranking 150 | Koby Crammer, Yoram Singer 151 | NIPS 2001 152 | """ 153 | 154 | def __init__(self, n_iter=10, shuffle=True, random_state=None, 155 | kernel="linear", gamma=None, degree=3, coef0=1, 156 | kernel_params=None): 157 | self.n_iter = n_iter 158 | self.shuffle = shuffle 159 | self.random_state = random_state 160 | self.kernel = kernel 161 | self.gamma = gamma 162 | self.degree = degree 163 | self.coef0 = coef0 164 | self.kernel_params = kernel_params 165 | 166 | @property 167 | def _pairwise(self): 168 | return self.kernel == "precomputed" 169 | 170 | def _get_kernel(self, X, Y=None): 171 | if callable(self.kernel): 172 | params = self.kernel_params or {} 173 | else: 174 | params = {"gamma": self.gamma, 175 | "degree": self.degree, 176 | "coef0": self.coef0} 177 | return pairwise_kernels(X, Y, metric=self.kernel, 178 | filter_params=True, **params) 179 | 180 | def fit(self, X, y): 181 | """Fit model according to X and y. 182 | 183 | Parameters 184 | ---------- 185 | X : array-like, shape = [n_samples, n_features] 186 | Training vectors, where n_samples is the number of samples 187 | and n_features is the number of features. 188 | 189 | y : array-like, shape = [n_samples] 190 | Target values. 191 | 192 | Returns 193 | ------- 194 | self : classifier 195 | Returns self. 196 | """ 197 | n_samples, n_features = X.shape 198 | rs = self._get_random_state() 199 | 200 | self._label_encoder = LabelEncoder() 201 | y = self._label_encoder.fit_transform(y).astype(np.int32) 202 | n_classes = len(self.classes_) 203 | 204 | K = self._get_kernel(X) 205 | self.X_train_ = X 206 | 207 | self.dual_coef_ = np.zeros(n_samples, dtype=np.float64) 208 | self.thresholds_ = np.zeros(n_classes, dtype=np.float64) 209 | self.thresholds_[-1] = np.inf 210 | 211 | _prank_fit_kernel(self.dual_coef_, self.thresholds_, K, y, n_classes, 212 | self.n_iter, rs, self.shuffle) 213 | 214 | return self 215 | 216 | def predict(self, X): 217 | K = self._get_kernel(X, self.X_train_) 218 | n_samples = X.shape[0] 219 | dot = np.dot(K, self.dual_coef_) 220 | out = np.zeros(n_samples, dtype=np.int32) 221 | _prank_predict(dot, self.thresholds_, len(self.classes_), out) 222 | return self._label_encoder.inverse_transform(out) 223 | -------------------------------------------------------------------------------- /lightning/impl/prank_fast.pyx: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # cython: cdivision=True 3 | # cython: boundscheck=False 4 | # cython: wraparound=False 5 | # cython: language_level=3 6 | # 7 | # Author: Mathieu Blondel 8 | # License: BSD 9 | 10 | import numpy as np 11 | cimport numpy as np 12 | 13 | from lightning.impl.randomkit.random_fast cimport RandomState 14 | from lightning.impl.dataset_fast cimport RowDataset 15 | 16 | 17 | cdef int _predict(double dot, 18 | np.ndarray[double, ndim=1, mode='c'] b, 19 | int n_classes): 20 | cdef int r 21 | cdef int y_hat = 0 22 | 23 | for r in range(n_classes): 24 | if dot - b[r] < 0: 25 | y_hat = r 26 | break 27 | 28 | return y_hat 29 | 30 | 31 | cdef int _update_thresholds(double dot, 32 | np.ndarray[double, ndim=1, mode='c'] b, 33 | int y, 34 | int n_classes): 35 | cdef int tau = 0 36 | cdef int r, yr 37 | 38 | for r in range(n_classes - 1): 39 | if y <= r: 40 | yr = -1 41 | else: 42 | yr = 1 43 | 44 | if yr * (dot - b[r]) <= 0: 45 | tau += yr 46 | b[r] -= yr 47 | 48 | return tau 49 | 50 | 51 | def _prank_fit(np.ndarray[double, ndim=1, mode='c'] w, 52 | np.ndarray[double, ndim=1, mode='c'] b, 53 | RowDataset X, 54 | np.ndarray[int, ndim=1] y, 55 | int n_classes, 56 | int n_iter, 57 | RandomState rs, 58 | int shuffle): 59 | 60 | cdef int n_samples = X.get_n_samples() 61 | cdef int n_features = X.get_n_features() 62 | 63 | cdef int n, i, ii, j, jj, y_hat, tau 64 | cdef double dot 65 | 66 | # Data pointers. 67 | cdef double* data 68 | cdef int* indices 69 | cdef int n_nz 70 | 71 | # Data indices. 72 | cdef np.ndarray[int, ndim=1] ind 73 | ind = np.arange(n_samples, dtype=np.int32) 74 | 75 | for n in range(n_iter): 76 | if shuffle: 77 | rs.shuffle(ind) 78 | 79 | for ii in range(n_samples): 80 | i = ind[ii] 81 | 82 | # Retrieve row. 83 | X.get_row_ptr(i, &indices, &data, &n_nz) 84 | 85 | # Compute dot product. 86 | dot = 0 87 | for jj in range(n_nz): 88 | j = indices[jj] 89 | dot += w[j] * data[jj] 90 | 91 | y_hat = _predict(dot, b, n_classes) 92 | 93 | # Nothing to do if prediction was correct. 94 | if y_hat == y[i]: 95 | continue 96 | 97 | tau = _update_thresholds(dot, b, y[i], n_classes) 98 | 99 | # Update w. 100 | for jj in range(n_nz): 101 | j = indices[jj] 102 | w[j] += tau * data[jj] 103 | 104 | 105 | def _prank_fit_kernel(np.ndarray[double, ndim=1, mode='c'] alpha, 106 | np.ndarray[double, ndim=1, mode='c'] b, 107 | np.ndarray[double, ndim=2] K, 108 | np.ndarray[int, ndim=1] y, 109 | int n_classes, 110 | int n_iter, 111 | RandomState rs, 112 | int shuffle): 113 | 114 | cdef int n_samples = K.shape[0] 115 | 116 | cdef int n, i, ii, j, y_hat, tau 117 | cdef double dot 118 | 119 | # Data indices. 120 | cdef np.ndarray[int, ndim=1] ind 121 | ind = np.arange(n_samples, dtype=np.int32) 122 | 123 | for n in range(n_iter): 124 | if shuffle: 125 | rs.shuffle(ind) 126 | 127 | for ii in range(n_samples): 128 | i = ind[ii] 129 | 130 | # Compute dot product. 131 | dot = 0 132 | for j in range(n_samples): 133 | dot += alpha[j] * K[i, j] 134 | 135 | y_hat = _predict(dot, b, n_classes) 136 | 137 | # Nothing to do if prediction was correct. 138 | if y_hat == y[i]: 139 | continue 140 | 141 | tau = _update_thresholds(dot, b, y[i], n_classes) 142 | 143 | # Update alpha. 144 | alpha[i] += tau 145 | 146 | 147 | def _prank_predict(np.ndarray[double, ndim=1, mode='c'] dot, 148 | np.ndarray[double, ndim=1, mode='c'] b, 149 | int n_classes, 150 | np.ndarray[int, ndim=1, mode='c'] out): 151 | 152 | cdef int n_samples = dot.shape[0] 153 | cdef int i 154 | 155 | for i in range(n_samples): 156 | out[i] = _predict(dot[i], b, n_classes) 157 | -------------------------------------------------------------------------------- /lightning/impl/primal_newton.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================= 3 | Kernel SVM by Newton's method 4 | ============================= 5 | 6 | This module provides a kernel SVM solver using Newton's method. 7 | 8 | """ 9 | # Author: Mathieu Blondel 10 | # License: BSD 11 | 12 | import numpy as np 13 | 14 | from scipy.sparse.linalg import cg 15 | from scipy.linalg import solve 16 | 17 | from sklearn.utils import safe_mask 18 | from sklearn.preprocessing import LabelBinarizer 19 | from sklearn.utils import check_random_state 20 | from sklearn.metrics.pairwise import pairwise_kernels 21 | 22 | from .base import BaseClassifier 23 | 24 | 25 | class KernelSVC(BaseClassifier): 26 | r"""Estimator for learning kernel SVMs by Newton's method. 27 | 28 | Parameters 29 | ---------- 30 | alpha : float 31 | Weight of the penalty term. 32 | 33 | solver : str, 'cg', 'dense' 34 | 35 | max_iter : int 36 | Maximum number of iterations to perform. 37 | 38 | tol : float 39 | Tolerance of the stopping criterion. 40 | 41 | kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed" 42 | Kernel to use. Default: "linear" 43 | 44 | degree : int, default=3 45 | Degree for poly, rbf and sigmoid kernels. Ignored by other kernels. 46 | 47 | gamma : float, optional 48 | Kernel coefficient for rbf and poly kernels. Default: 1/n_features. 49 | Ignored by other kernels. 50 | 51 | coef0 : float, optional 52 | Independent term in poly and sigmoid kernels. 53 | Ignored by other kernels. 54 | 55 | random_state : RandomState or int 56 | The seed of the pseudo random number generator to use. 57 | 58 | verbose : int 59 | Verbosity level. 60 | 61 | n_jobs : int 62 | Number of jobs to use to compute the kernel matrix. 63 | 64 | Examples 65 | -------- 66 | >>> from sklearn.datasets import make_classification 67 | >>> from lightning.classification import KernelSVC 68 | >>> X, y = make_classification() 69 | >>> clf = KernelSVC().fit(X, y) 70 | >>> accuracy = clf.score(X, y) 71 | """ 72 | 73 | def __init__(self, alpha=1.0, solver="cg", 74 | max_iter=50, tol=1e-3, 75 | kernel="linear", gamma=0.1, coef0=1, degree=4, 76 | random_state=None, verbose=0, n_jobs=1): 77 | self.alpha = alpha 78 | self.solver = solver 79 | self.max_iter = max_iter 80 | self.tol = tol 81 | self.kernel = kernel 82 | self.gamma = gamma 83 | self.coef0 = coef0 84 | self.degree = degree 85 | self.random_state = random_state 86 | self.verbose = verbose 87 | self.n_jobs = n_jobs 88 | 89 | def _kernel_params(self): 90 | return {"gamma": self.gamma, 91 | "degree": self.degree, 92 | "coef0": self.coef0} 93 | 94 | def _solve(self, A, b): 95 | if self.solver == "cg": 96 | x, info = cg(A, b, tol=self.tol) 97 | elif self.solver == "dense": 98 | x = solve(A, b, sym_pos=True) 99 | return x 100 | 101 | def _fit_binary(self, K, y, rs): 102 | n_samples = K.shape[0] 103 | coef = np.zeros(n_samples) 104 | if n_samples < 1000: 105 | sv = np.ones(n_samples, dtype=bool) 106 | else: 107 | sv = np.zeros(n_samples, dtype=bool) 108 | sv[:1000] = True 109 | rs.shuffle(sv) 110 | 111 | for t in range(1, self.max_iter + 1): 112 | if self.verbose: 113 | print("Iteration", t, "#SV=", np.sum(sv)) 114 | 115 | K_sv = K[sv][:, sv] 116 | I = np.diag(self.alpha * np.ones(K_sv.shape[0])) 117 | 118 | coef_sv = self._solve(K_sv + I, y[sv]) 119 | 120 | coef *= 0 121 | coef[sv] = coef_sv 122 | pred = np.dot(K, coef) 123 | errors = 1 - y * pred 124 | last_sv = sv 125 | sv = errors > 0 126 | 127 | if np.array_equal(last_sv, sv): 128 | if self.verbose: 129 | print("Converged at iteration", t) 130 | break 131 | 132 | return coef 133 | 134 | def _post_process(self, X): 135 | # We can't know the support vectors when using precomputed kernels. 136 | if self.kernel != "precomputed": 137 | sv = np.sum(self.coef_ != 0, axis=0, dtype=bool) 138 | if np.sum(sv) > 0: 139 | self.coef_ = np.ascontiguousarray(self.coef_[:, sv]) 140 | mask = safe_mask(X, sv) 141 | self.support_vectors_ = np.ascontiguousarray(X[mask]) 142 | self.support_indices_ = np.arange(X.shape[0], 143 | dtype=np.int32)[sv] 144 | self.n_samples_ = X.shape[0] 145 | 146 | if self.verbose >= 1: 147 | print("Number of support vectors:", np.sum(sv)) 148 | 149 | def fit(self, X, y): 150 | """Fit model according to X and y. 151 | 152 | Parameters 153 | ---------- 154 | X : array-like, shape = [n_samples, n_features] 155 | Training vectors, where n_samples is the number of samples 156 | and n_features is the number of features. 157 | 158 | y : array-like, shape = [n_samples] 159 | Target values. 160 | 161 | Returns 162 | ------- 163 | self : classifier 164 | Returns self. 165 | """ 166 | n_samples, n_features = X.shape 167 | rs = check_random_state(self.random_state) 168 | 169 | self.label_binarizer_ = LabelBinarizer(neg_label=-1, pos_label=1) 170 | Y = self.label_binarizer_.fit_transform(y) 171 | self.classes_ = self.label_binarizer_.classes_.astype(np.int32) 172 | n_vectors = Y.shape[1] 173 | 174 | if self.verbose: 175 | print("Pre-computing kernel matrix...") 176 | 177 | K = pairwise_kernels(X, filter_params=True, n_jobs=self.n_jobs, 178 | metric=self.kernel, **self._kernel_params()) 179 | 180 | coef = [self._fit_binary(K, Y[:, i], rs) for i in range(n_vectors)] 181 | self.coef_ = np.array(coef) 182 | self.intercept_ = np.zeros(n_vectors, dtype=np.float64) 183 | 184 | self._post_process(X) 185 | 186 | return self 187 | 188 | def decision_function(self, X): 189 | """ 190 | Return the decision function for test vectors X. 191 | 192 | Parameters 193 | ---------- 194 | X : array-like, shape = [n_samples, n_features] 195 | 196 | Returns 197 | ------- 198 | P : array, shape = [n_classes, n_samples] 199 | Decision function for X 200 | """ 201 | K = pairwise_kernels(X, self.support_vectors_, filter_params=True, 202 | n_jobs=self.n_jobs, metric=self.kernel, 203 | **self._kernel_params()) 204 | return np.dot(K, self.coef_.T) 205 | -------------------------------------------------------------------------------- /lightning/impl/prox_fast.pxd: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # cython: cdivision=True 3 | # cython: boundscheck=False 4 | # cython: wraparound=False 5 | # cython: language_level=3 6 | # 7 | # Authors: Fabian Pedregosa 8 | # License: BSD 9 | 10 | cimport numpy as np 11 | from cython cimport floating 12 | 13 | cpdef prox_tv1d(np.ndarray[ndim=1, dtype=floating] w, floating stepsize) 14 | -------------------------------------------------------------------------------- /lightning/impl/prox_fast.pyx: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # cython: cdivision=True 3 | # cython: boundscheck=False 4 | # cython: wraparound=False 5 | # cython: language_level=3 6 | # 7 | # Authors: Fabian Pedregosa 8 | # License: BSD 9 | 10 | """ 11 | These are some helper functions to compute the proximal operator of some common penalties 12 | """ 13 | 14 | cimport numpy as np 15 | from cython cimport floating 16 | 17 | cpdef prox_tv1d(np.ndarray[ndim=1, dtype=floating] w, floating stepsize): 18 | """ 19 | Computes the proximal operator of the 1-dimensional total variation operator. 20 | 21 | This solves a problem of the form 22 | 23 | argmin_x TV(x) + (1/(2 stepsize)) ||x - w||^2 24 | 25 | where TV(x) is the one-dimensional total variation 26 | 27 | Parameters 28 | ---------- 29 | w: array 30 | vector of coefficieents 31 | stepsize: float 32 | step size (sometimes denoted gamma) in proximal objective function 33 | 34 | References 35 | ---------- 36 | Condat, Laurent. "A direct algorithm for 1D total variation denoising." 37 | IEEE Signal Processing Letters (2013) 38 | """ 39 | cdef long width, k, k0, kplus, kminus 40 | cdef floating umin, umax, vmin, vmax, twolambda, minlambda 41 | width = w.size 42 | 43 | # /to avoid invalid memory access to input[0] and invalid lambda values 44 | if width > 0 and stepsize >= 0: 45 | k, k0 = 0, 0 # k: current sample location, k0: beginning of current segment 46 | umin = stepsize # u is the dual variable 47 | umax = - stepsize 48 | vmin = w[0] - stepsize 49 | vmax = w[0] + stepsize # bounds for the segment's value 50 | kplus = 0 51 | kminus = 0 # last positions where umax=-lambda, umin=lambda, respectively 52 | twolambda = 2.0 * stepsize # auxiliary variable 53 | minlambda = -stepsize # auxiliary variable 54 | while True: # simple loop, the exit test is inside 55 | while k >= width-1: # we use the right boundary condition 56 | if umin < 0.0: # vmin is too high -> negative jump necessary 57 | while True: 58 | w[k0] = vmin 59 | k0 += 1 60 | if k0 > kminus: 61 | break 62 | k = k0 63 | kminus = k 64 | vmin = w[kminus] 65 | umin = stepsize 66 | umax = vmin + umin - vmax 67 | elif umax > 0.0: # vmax is too low -> positive jump necessary 68 | while True: 69 | w[k0] = vmax 70 | k0 += 1 71 | if k0 > kplus: 72 | break 73 | k = k0 74 | kplus = k 75 | vmax = w[kplus] 76 | umax = minlambda 77 | umin = vmax + umax -vmin 78 | else: 79 | vmin += umin / (k-k0+1) 80 | while True: 81 | w[k0] = vmin 82 | k0 += 1 83 | if k0 > k: 84 | break 85 | return 86 | umin += w[k + 1] - vmin 87 | if umin < minlambda: # negative jump necessary 88 | while True: 89 | w[k0] = vmin 90 | k0 += 1 91 | if k0 > kminus: 92 | break 93 | k = k0 94 | kminus = k 95 | kplus = kminus 96 | vmin = w[kplus] 97 | vmax = vmin + twolambda 98 | umin = stepsize 99 | umax = minlambda 100 | else: 101 | umax += w[k + 1] - vmax 102 | if umax > stepsize: 103 | while True: 104 | w[k0] = vmax 105 | k0 += 1 106 | if k0 > kplus: 107 | break 108 | k = k0 109 | kminus = k 110 | kplus = kminus 111 | vmax = w[kplus] 112 | vmin = vmax - twolambda 113 | umin = stepsize 114 | umax = minlambda 115 | else: # no jump necessary, we continue 116 | k += 1 117 | if umin >= stepsize: # update of vmin 118 | kminus = k 119 | vmin += (umin - stepsize) / (kminus - k0 + 1) 120 | umin = stepsize 121 | if umax <= minlambda: # update of vmax 122 | kplus = k 123 | vmax += (umax + stepsize) / (kplus - k0 + 1) 124 | umax = minlambda 125 | -------------------------------------------------------------------------------- /lightning/impl/randomkit/__init__.py: -------------------------------------------------------------------------------- 1 | from .random_fast import RandomState 2 | -------------------------------------------------------------------------------- /lightning/impl/randomkit/random_fast.pxd: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # cython: cdivision=True 3 | # cython: boundscheck=False 4 | # cython: wraparound=False 5 | # cython: language_level=3 6 | # 7 | # Copyright 2005 Robert Kern (robert.kern@gmail.com) 8 | 9 | cdef extern from "randomkit.h": 10 | 11 | ctypedef struct rk_state: 12 | unsigned long key[624] 13 | int pos 14 | int has_gauss 15 | double gauss 16 | 17 | cdef class RandomState: 18 | 19 | cdef rk_state *internal_state 20 | cdef object initial_seed 21 | cpdef long randint(self, unsigned long high) 22 | -------------------------------------------------------------------------------- /lightning/impl/randomkit/random_fast.pyx: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # cython: cdivision=True 3 | # cython: boundscheck=False 4 | # cython: wraparound=False 5 | # cython: language_level=3 6 | # 7 | # Copyright 2005 Robert Kern (robert.kern@gmail.com) 8 | 9 | from libc cimport stdlib 10 | 11 | import numpy as np 12 | cimport numpy as np 13 | 14 | cdef extern from "randomkit.h": 15 | 16 | ctypedef struct rk_state: 17 | unsigned long key[624] 18 | int pos 19 | int has_gauss 20 | double gauss 21 | 22 | ctypedef enum rk_error: 23 | RK_NOERR = 0 24 | RK_ENODEV = 1 25 | RK_ERR_MAX = 2 26 | 27 | char *rk_strerror[2] 28 | 29 | # 0xFFFFFFFFUL 30 | unsigned long RK_MAX 31 | 32 | void rk_seed(unsigned long seed, rk_state *state) 33 | rk_error rk_randomseed(rk_state *state) 34 | unsigned long rk_random(rk_state *state) 35 | long rk_long(rk_state *state) 36 | unsigned long rk_ulong(rk_state *state) 37 | unsigned long rk_interval(unsigned long max, rk_state *state) 38 | double rk_double(rk_state *state) 39 | void rk_fill(void *buffer, size_t size, rk_state *state) 40 | rk_error rk_devfill(void *buffer, size_t size, int strong) 41 | rk_error rk_altfill(void *buffer, size_t size, int strong, 42 | rk_state *state) 43 | double rk_gauss(rk_state *state) 44 | 45 | cdef class RandomState: 46 | 47 | def __init__(self, seed=None): 48 | self.internal_state = stdlib.malloc(sizeof(rk_state)) 49 | self.initial_seed = seed 50 | self.seed(seed) 51 | 52 | def __dealloc__(self): 53 | if self.internal_state != NULL: 54 | stdlib.free(self.internal_state) 55 | self.internal_state = NULL 56 | 57 | def seed(self, seed=None): 58 | cdef rk_error errcode 59 | if seed is None: 60 | errcode = rk_randomseed(self.internal_state) 61 | elif type(seed) is int: 62 | rk_seed(seed, self.internal_state) 63 | elif isinstance(seed, np.integer): 64 | iseed = int(seed) 65 | rk_seed(iseed, self.internal_state) 66 | else: 67 | raise ValueError("Wrong seed") 68 | 69 | cpdef long randint(self, unsigned long high): 70 | return rk_interval(high, self.internal_state) 71 | 72 | def shuffle(self, object x): 73 | cdef int i, j 74 | cdef int copy 75 | 76 | i = len(x) - 1 77 | try: 78 | j = len(x[0]) 79 | except: 80 | j = 0 81 | 82 | if (j == 0): 83 | # adaptation of random.shuffle() 84 | while i > 0: 85 | j = rk_interval(i, self.internal_state) 86 | x[i], x[j] = x[j], x[i] 87 | i = i - 1 88 | else: 89 | # make copies 90 | copy = hasattr(x[0], 'copy') 91 | if copy: 92 | while(i > 0): 93 | j = rk_interval(i, self.internal_state) 94 | x[i], x[j] = x[j].copy(), x[i].copy() 95 | i = i - 1 96 | else: 97 | while(i > 0): 98 | j = rk_interval(i, self.internal_state) 99 | x[i], x[j] = x[j][:], x[i][:] 100 | i = i - 1 101 | 102 | def __reduce__(self): 103 | return (RandomState, (self.initial_seed, )) 104 | -------------------------------------------------------------------------------- /lightning/impl/randomkit/randomkit.h: -------------------------------------------------------------------------------- 1 | /* Random kit 1.3 */ 2 | 3 | /* 4 | * Copyright (c) 2003-2005, Jean-Sebastien Roy (js@jeannot.org) 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the 8 | * "Software"), to deal in the Software without restriction, including 9 | * without limitation the rights to use, copy, modify, merge, publish, 10 | * distribute, sublicense, and/or sell copies of the Software, and to 11 | * permit persons to whom the Software is furnished to do so, subject to 12 | * the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included 15 | * in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 21 | * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 | */ 25 | 26 | /* @(#) $Jeannot: randomkit.h,v 1.24 2005/07/21 22:14:09 js Exp $ */ 27 | 28 | /* 29 | * Typical use: 30 | * 31 | * { 32 | * rk_state state; 33 | * unsigned long seed = 1, random_value; 34 | * 35 | * rk_seed(seed, &state); // Initialize the RNG 36 | * ... 37 | * random_value = rk_random(&state); // Generate random values in [0..RK_MAX] 38 | * } 39 | * 40 | * Instead of rk_seed, you can use rk_randomseed which will get a random seed 41 | * from /dev/urandom (or the clock, if /dev/urandom is unavailable): 42 | * 43 | * { 44 | * rk_state state; 45 | * unsigned long random_value; 46 | * 47 | * rk_randomseed(&state); // Initialize the RNG with a random seed 48 | * ... 49 | * random_value = rk_random(&state); // Generate random values in [0..RK_MAX] 50 | * } 51 | */ 52 | 53 | /* 54 | * Useful macro: 55 | * RK_DEV_RANDOM: the device used for random seeding. 56 | * defaults to "/dev/urandom" 57 | */ 58 | 59 | #include 60 | 61 | #ifndef _RANDOMKIT_ 62 | #define _RANDOMKIT_ 63 | 64 | #define RK_STATE_LEN 624 65 | 66 | typedef struct rk_state_ 67 | { 68 | unsigned long key[RK_STATE_LEN]; 69 | int pos; 70 | int has_gauss; /* !=0: gauss contains a gaussian deviate */ 71 | double gauss; 72 | 73 | /* The rk_state structure has been extended to store the following 74 | * information for the binomial generator. If the input values of n or p 75 | * are different than nsave and psave, then the other parameters will be 76 | * recomputed. RTK 2005-09-02 */ 77 | 78 | int has_binomial; /* !=0: following parameters initialized for 79 | binomial */ 80 | double psave; 81 | long nsave; 82 | double r; 83 | double q; 84 | double fm; 85 | long m; 86 | double p1; 87 | double xm; 88 | double xl; 89 | double xr; 90 | double c; 91 | double laml; 92 | double lamr; 93 | double p2; 94 | double p3; 95 | double p4; 96 | 97 | } 98 | rk_state; 99 | 100 | typedef enum { 101 | RK_NOERR = 0, /* no error */ 102 | RK_ENODEV = 1, /* no RK_DEV_RANDOM device */ 103 | RK_ERR_MAX = 2 104 | } rk_error; 105 | 106 | /* error strings */ 107 | extern char *rk_strerror[RK_ERR_MAX]; 108 | 109 | /* Maximum generated random value */ 110 | #define RK_MAX 0xFFFFFFFFUL 111 | 112 | #ifdef __cplusplus 113 | extern "C" { 114 | #endif 115 | 116 | /* 117 | * Initialize the RNG state using the given seed. 118 | */ 119 | extern void rk_seed(unsigned long seed, rk_state *state); 120 | 121 | /* 122 | * Initialize the RNG state using a random seed. 123 | * Uses /dev/random or, when unavailable, the clock (see randomkit.c). 124 | * Returns RK_NOERR when no errors occurs. 125 | * Returns RK_ENODEV when the use of RK_DEV_RANDOM failed (for example because 126 | * there is no such device). In this case, the RNG was initialized using the 127 | * clock. 128 | */ 129 | extern rk_error rk_randomseed(rk_state *state); 130 | 131 | /* 132 | * Returns a random unsigned long between 0 and RK_MAX inclusive 133 | */ 134 | extern unsigned long rk_random(rk_state *state); 135 | 136 | /* 137 | * Returns a random long between 0 and LONG_MAX inclusive 138 | */ 139 | extern long rk_long(rk_state *state); 140 | 141 | /* 142 | * Returns a random unsigned long between 0 and ULONG_MAX inclusive 143 | */ 144 | extern unsigned long rk_ulong(rk_state *state); 145 | 146 | /* 147 | * Returns a random unsigned long between 0 and max inclusive. 148 | */ 149 | extern unsigned long rk_interval(unsigned long max, rk_state *state); 150 | 151 | /* 152 | * Returns a random double between 0.0 and 1.0, 1.0 excluded. 153 | */ 154 | extern double rk_double(rk_state *state); 155 | 156 | /* 157 | * fill the buffer with size random bytes 158 | */ 159 | extern void rk_fill(void *buffer, size_t size, rk_state *state); 160 | 161 | /* 162 | * fill the buffer with randombytes from the random device 163 | * Returns RK_ENODEV if the device is unavailable, or RK_NOERR if it is 164 | * On Unix, if strong is defined, RK_DEV_RANDOM is used. If not, RK_DEV_URANDOM 165 | * is used instead. This parameter has no effect on Windows. 166 | * Warning: on most unixes RK_DEV_RANDOM will wait for enough entropy to answer 167 | * which can take a very long time on quiet systems. 168 | */ 169 | extern rk_error rk_devfill(void *buffer, size_t size, int strong); 170 | 171 | /* 172 | * fill the buffer using rk_devfill if the random device is available and using 173 | * rk_fill if is is not 174 | * parameters have the same meaning as rk_fill and rk_devfill 175 | * Returns RK_ENODEV if the device is unavailable, or RK_NOERR if it is 176 | */ 177 | extern rk_error rk_altfill(void *buffer, size_t size, int strong, 178 | rk_state *state); 179 | 180 | /* 181 | * return a random gaussian deviate with variance unity and zero mean. 182 | */ 183 | extern double rk_gauss(rk_state *state); 184 | 185 | #ifdef __cplusplus 186 | } 187 | #endif 188 | 189 | #endif /* _RANDOMKIT_ */ 190 | -------------------------------------------------------------------------------- /lightning/impl/randomkit/setup.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from numpy import get_include 4 | from numpy.distutils.core import setup 5 | from numpy.distutils.misc_util import Configuration 6 | 7 | 8 | def configuration(parent_package='', top_path=None): 9 | rnd_kit = 'randomkit' 10 | rnd_fast = 'random_fast' 11 | config = Configuration(rnd_kit, parent_package, top_path) 12 | libs = [] 13 | if sys.platform == 'win32': 14 | libs.append('Advapi32') 15 | 16 | config.add_extension( 17 | rnd_fast, 18 | sources=[f'{rnd_fast}.pyx', f'{rnd_kit}.c'], 19 | language='c++', 20 | libraries=libs, 21 | include_dirs=[get_include()] 22 | ) 23 | 24 | config.add_subpackage('tests') 25 | config.add_data_files(f'{rnd_fast}.pxd') 26 | config.add_data_files(f'{rnd_kit}.h') 27 | 28 | return config 29 | 30 | if __name__ == '__main__': 31 | setup(**configuration(top_path='').todict()) 32 | -------------------------------------------------------------------------------- /lightning/impl/randomkit/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/lightning/dbbe833963280e675c124bbd5caadfcb13d89bd7/lightning/impl/randomkit/tests/__init__.py -------------------------------------------------------------------------------- /lightning/impl/randomkit/tests/test_random.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | 4 | from lightning.impl.randomkit import RandomState 5 | 6 | 7 | def test_randint(): 8 | rs = RandomState(seed=0) 9 | vals = [rs.randint(10) for t in range(10000)] 10 | np.testing.assert_almost_equal(np.mean(vals), 5.018) 11 | 12 | 13 | def test_shuffle(): 14 | ind = np.arange(10) 15 | rs = RandomState(seed=0) 16 | rs.shuffle(ind) 17 | np.testing.assert_array_equal(ind, [2, 8, 4, 9, 1, 6, 7, 3, 0, 5]) 18 | 19 | 20 | def test_random_state_pickle(): 21 | rs = RandomState(seed=0) 22 | random_integer = rs.randint(5) 23 | pickle_rs = pickle.dumps(rs) 24 | pickle_rs = pickle.loads(pickle_rs) 25 | pickle_random_integer = pickle_rs.randint(5) 26 | assert random_integer == pickle_random_integer 27 | -------------------------------------------------------------------------------- /lightning/impl/sag_fast.pxd: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # cython: cdivision=True 3 | # cython: boundscheck=False 4 | # cython: wraparound=False 5 | # cython: language_level=3 6 | # 7 | # Authors: Mathieu Blondel 8 | # Fabian Pedregosa 9 | # Arnaud Rachez 10 | # License: BSD 11 | 12 | cimport numpy as np 13 | 14 | cdef class Penalty: 15 | 16 | # wether it supports lagged updates. 0 indicates no support, 17 | # and anything else indicates support. 18 | # In the case of no support, only the method 19 | # projection will be used and not projection_lagged 20 | cdef bint support_lagged 21 | 22 | cdef void projection(self, 23 | double* w, 24 | int* indices, 25 | double stepsize, 26 | int n_nz) 27 | 28 | cdef void projection_lagged(self, 29 | int t, 30 | double* w, 31 | double* g_sum, 32 | int* indices, 33 | double stepsize_prox, 34 | double stepsize_grad, 35 | double* lag_scaling, 36 | int n_nz, 37 | int* last, 38 | double* scaling_seq) 39 | 40 | cdef double regularization(self, np.ndarray[double, ndim=1]coef) 41 | -------------------------------------------------------------------------------- /lightning/impl/sdca.py: -------------------------------------------------------------------------------- 1 | # Author: Mathieu Blondel 2 | # License: BSD 3 | 4 | import numpy as np 5 | 6 | from sklearn.utils import check_random_state 7 | 8 | from .base import BaseClassifier, BaseRegressor 9 | from .dataset_fast import get_dataset 10 | from .sdca_fast import _prox_sdca_fit 11 | 12 | 13 | class _BaseSDCA(object): 14 | 15 | def _get_alpha2_lasso(self, y, alpha1): 16 | if self.loss == "squared": 17 | y_bar = 0.5 * np.mean(y ** 2) 18 | 19 | elif self.loss == "absolute": 20 | y_bar = np.mean(np.abs(y)) 21 | 22 | elif self.loss in ("hinge", "squared_hinge"): 23 | y_bar = 1.0 24 | 25 | elif self.loss == "smooth_hinge": 26 | if self.gamma < 1: 27 | y_bar = 1 - 0.5 * self.gamma 28 | else: 29 | y_bar = 0.5 / self.gamma 30 | 31 | else: 32 | raise ValueError("Unknown loss.") 33 | 34 | return self.tol * (alpha1 / y_bar) ** 2 35 | 36 | def _fit(self, X, Y): 37 | n_samples, n_features = X.shape 38 | n_vectors = Y.shape[1] 39 | 40 | ds = get_dataset(X, order="c") 41 | self.coef_ = np.zeros((n_vectors, n_features), dtype=np.float64) 42 | self.dual_coef_ = np.zeros((n_vectors, n_samples), dtype=np.float64) 43 | 44 | alpha1 = self.l1_ratio * self.alpha 45 | alpha2 = (1 - self.l1_ratio) * self.alpha 46 | 47 | if self.loss == "squared_hinge": 48 | # For consistency with the rest of lightning. 49 | alpha1 *= 0.5 50 | alpha2 *= 0.5 51 | 52 | tol = self.tol 53 | n_calls = n_samples if self.n_calls is None else self.n_calls 54 | rng = check_random_state(self.random_state) 55 | loss = self._get_loss() 56 | 57 | for i in range(n_vectors): 58 | y = Y[:, i] 59 | 60 | if self.l1_ratio == 1.0: 61 | # Prox-SDCA needs a strongly convex regularizer so adds some 62 | # L2 penalty (see paper). 63 | alpha2 = self._get_alpha2_lasso(y, alpha1) 64 | tol = self.tol * 0.5 65 | 66 | _prox_sdca_fit(self, ds, y, self.coef_[i], self.dual_coef_[i], 67 | alpha1, alpha2, loss, self.gamma, self.max_iter, 68 | tol, self.callback, n_calls, self.verbose, rng) 69 | 70 | return self 71 | 72 | 73 | class SDCAClassifier(BaseClassifier, _BaseSDCA): 74 | r""" 75 | Estimator for learning linear classifiers by (proximal) SDCA. 76 | 77 | Solves the following objective: 78 | 79 | .. code-block:: 80 | 81 | minimize_w 1 / n_samples * \sum_i loss(w^T x_i, y_i) 82 | + alpha * l1_ratio * ||w||_1 83 | + alpha * (1 - l1_ratio) * 0.5 * ||w||^2_2 84 | 85 | Parameters 86 | ---------- 87 | loss: string, {'squared', 'absolute', 'hinge', 'smooth_hinge', 'squared_hinge'} 88 | Loss function to use in the model. 89 | alpha: float 90 | Amount of regularization (see model formulation above). 91 | l1_ratio: float 92 | Ratio between the L1 and L2 regularization (see model formulation above). 93 | gamma : float 94 | gamma parameter in the "smooth_hinge" loss (not used for other 95 | loss functions) 96 | tol : float 97 | stopping criterion tolerance. 98 | max_iter : int 99 | maximum number of outer iterations (also known as epochs). 100 | verbose : int 101 | verbosity level. Set positive to print progress information. 102 | callback : callable or None 103 | if given, callback(self) will be called on each outer iteration 104 | (epoch). 105 | random_state: int or RandomState 106 | Pseudo-random number generator state used for random sampling. 107 | """ 108 | 109 | def __init__(self, alpha=1.0, l1_ratio=0, loss="hinge", gamma=1.0, 110 | max_iter=100, tol=1e-3, callback=None, n_calls=None, verbose=0, 111 | random_state=None): 112 | self.alpha = alpha 113 | self.l1_ratio = l1_ratio 114 | self.loss = loss 115 | self.gamma = gamma 116 | self.max_iter = max_iter 117 | self.tol = tol 118 | self.callback = callback 119 | self.n_calls = n_calls 120 | self.verbose = verbose 121 | self.random_state = random_state 122 | 123 | def _get_loss(self): 124 | losses = { 125 | "squared": 0, 126 | "absolute": 1, 127 | "hinge": 2, 128 | "smooth_hinge": 3, 129 | "squared_hinge": 4, 130 | } 131 | return losses[self.loss] 132 | 133 | def fit(self, X, y): 134 | self._set_label_transformers(y) 135 | Y = np.asfortranarray(self.label_binarizer_.transform(y), 136 | dtype=np.float64) 137 | return self._fit(X, Y) 138 | 139 | 140 | class SDCARegressor(BaseRegressor, _BaseSDCA): 141 | r""" 142 | Estimator for learning linear regressors by (proximal) SDCA. 143 | 144 | Solves the following objective: 145 | 146 | .. code-block:: 147 | 148 | minimize_w 1 / n_samples * \sum_i loss(w^T x_i, y_i) 149 | + alpha * l1_ratio * ||w||_1 150 | + alpha * (1 - l1_ratio) * 0.5 * ||w||^2_2 151 | 152 | Parameters 153 | ---------- 154 | loss: string, {'squared', 'absolute'} 155 | Loss function to use in the model. 156 | alpha: float 157 | Amount of regularization (see model formulation above). 158 | l1_ratio: float 159 | Ratio between the L1 and L2 regularization (see model formulation above). 160 | tol : float 161 | stopping criterion tolerance. 162 | max_iter : int 163 | maximum number of outer iterations (also known as epochs). 164 | verbose : int 165 | verbosity level. Set positive to print progress information. 166 | callback : callable or None 167 | if given, callback(self) will be called on each outer iteration 168 | (epoch). 169 | random_state: int or RandomState 170 | Pseudo-random number generator state used for random sampling. 171 | """ 172 | 173 | def __init__(self, alpha=1.0, l1_ratio=0, loss="squared", 174 | max_iter=100, tol=1e-3, callback=None, n_calls=None, verbose=0, 175 | random_state=None): 176 | self.alpha = alpha 177 | self.l1_ratio = l1_ratio 178 | self.loss = loss 179 | self.gamma = 1.0 180 | self.max_iter = max_iter 181 | self.tol = tol 182 | self.callback = callback 183 | self.n_calls = n_calls 184 | self.verbose = verbose 185 | self.random_state = random_state 186 | 187 | def _get_loss(self): 188 | losses = { 189 | "squared": 0, 190 | "absolute": 1, 191 | } 192 | return losses[self.loss] 193 | 194 | def fit(self, X, y): 195 | self.outputs_2d_ = len(y.shape) > 1 196 | Y = y.reshape(-1, 1) if not self.outputs_2d_ else y 197 | Y = Y.astype(np.float64) 198 | return self._fit(X, Y) 199 | -------------------------------------------------------------------------------- /lightning/impl/sdca_fast.pyx: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # cython: cdivision=True 3 | # cython: boundscheck=False 4 | # cython: wraparound=False 5 | # cython: language_level=3 6 | # 7 | # Author: Mathieu Blondel 8 | # License: BSD 9 | 10 | import numpy as np 11 | cimport numpy as np 12 | 13 | ctypedef np.int64_t LONG 14 | 15 | from libc.math cimport fabs 16 | 17 | from lightning.impl.dataset_fast cimport RowDataset 18 | 19 | 20 | cdef _add_l2(double* data, 21 | int* indices, 22 | int n_nz, 23 | double* w, 24 | double update, 25 | double* regul): 26 | 27 | cdef int j, jj 28 | cdef double delta, w_old 29 | 30 | for jj in range(n_nz): 31 | j = indices[jj] 32 | delta = update * data[jj] 33 | w_old = w[j] 34 | w[j] += delta 35 | regul[0] += delta * (2 * w_old + delta) 36 | 37 | 38 | cdef inline double _truncate(double v, 39 | double sigma): 40 | if v > sigma: 41 | return v - sigma 42 | elif v < -sigma: 43 | return v + sigma 44 | else: 45 | return 0 46 | 47 | 48 | cdef _add_elastic(double* data, 49 | int* indices, 50 | int n_nz, 51 | double*w, 52 | double* v, 53 | double update, 54 | double* regul, 55 | double sigma): 56 | 57 | cdef int j, jj 58 | cdef double delta, w_old, v_old 59 | 60 | for jj in range(n_nz): 61 | j = indices[jj] 62 | delta = update * data[jj] 63 | v_old = v[j] 64 | w_old = w[j] 65 | v[j] += delta 66 | w[j] = _truncate(v[j], sigma) 67 | regul[0] -= v_old * w_old 68 | regul[0] += v[j] * w[j] 69 | 70 | 71 | cdef _sqnorms(RowDataset X, 72 | np.ndarray[double, ndim=1, mode='c'] sqnorms): 73 | 74 | cdef int n_samples = X.get_n_samples() 75 | cdef int i, j 76 | cdef double dot 77 | 78 | # Data pointers. 79 | cdef double* data 80 | cdef int* indices 81 | cdef int n_nz 82 | 83 | for i in range(n_samples): 84 | X.get_row_ptr(i, &indices, &data, &n_nz) 85 | dot = 0 86 | for jj in range(n_nz): 87 | dot += data[jj] * data[jj] 88 | sqnorms[i] = dot 89 | 90 | 91 | cdef double _pred(double* data, 92 | int* indices, 93 | int n_nz, 94 | double* w): 95 | 96 | cdef int j, jj 97 | cdef double dot = 0 98 | 99 | for jj in range(n_nz): 100 | j = indices[jj] 101 | dot += w[j] * data[jj] 102 | 103 | return dot 104 | 105 | 106 | cdef _solve_subproblem(double*data, 107 | int* indices, 108 | int n_nz, 109 | double y, 110 | double* w, 111 | double* v, 112 | double* dcoef, 113 | int loss_func, 114 | double sqnorm, 115 | double scale, 116 | double sigma, 117 | double gamma, 118 | double* primal, 119 | double* dual, 120 | double* regul): 121 | 122 | cdef double pred, dcoef_old, residual, error, loss, update 123 | 124 | pred = _pred(data, indices, n_nz, w) 125 | 126 | dcoef_old = dcoef[0] 127 | 128 | if loss_func == 0: # square loss 129 | residual = pred - y 130 | loss = 0.5 * residual * residual 131 | update = -(dcoef_old + residual) / (1 + sqnorm * scale) 132 | dual[0] += update * (y - dcoef_old - 0.5 * update) 133 | 134 | elif loss_func == 1: # absolute loss 135 | residual = y - pred 136 | loss = fabs(residual) 137 | update = residual / (sqnorm * scale) + dcoef_old 138 | update = min(1.0, update) 139 | update = max(-1.0, update) 140 | update -= dcoef_old 141 | dual[0] += y * update 142 | 143 | elif loss_func == 2: # hinge loss 144 | error = 1 - y * pred 145 | loss = max(0.0, error) 146 | update = error / (sqnorm * scale) + dcoef_old * y 147 | update = min(1.0, update) 148 | update = max(0.0, update) 149 | update = y * update - dcoef_old 150 | dual[0] += y * update 151 | 152 | elif loss_func == 3: # smooth hinge loss 153 | error = 1 - y * pred 154 | 155 | if error < 0: 156 | loss = 0 157 | elif error > gamma: 158 | loss = error - 0.5 * gamma 159 | else: 160 | loss = 0.5 / gamma * error * error 161 | 162 | update = (error - gamma * dcoef_old * y) / (sqnorm * scale + gamma) 163 | update += dcoef_old * y 164 | update = min(1.0, update) 165 | update = max(0.0, update) 166 | update = y * update - dcoef_old 167 | dual[0] += y * update 168 | dual[0] -= gamma * dcoef_old * update 169 | dual[0] -= 0.5 * gamma * update * update 170 | 171 | elif loss_func == 4: # squared hinge loss 172 | # Update is the same as squared loss but with a truncation. 173 | residual = pred - y 174 | update = -(dcoef_old + residual) / (1 + sqnorm * scale) 175 | if (dcoef_old + update) * y < 0: 176 | update = -dcoef_old 177 | 178 | error = 1 - y * pred 179 | if error >= 0: 180 | loss = residual * residual 181 | 182 | dual[0] += (y - dcoef_old) * update - 0.5 * update * update 183 | 184 | # Use accumulated loss rather than true primal objective value, which is 185 | # expensive to compute. 186 | primal[0] += loss 187 | 188 | if update != 0: 189 | dcoef[0] += update 190 | if sigma > 0: 191 | _add_elastic(data, indices, n_nz, w, v, update * scale, regul, 192 | sigma) 193 | else: 194 | _add_l2(data, indices, n_nz, w, update * scale, regul) 195 | 196 | 197 | def _prox_sdca_fit(self, 198 | RowDataset X, 199 | np.ndarray[double, ndim=1]y, 200 | np.ndarray[double, ndim=1]coef, 201 | np.ndarray[double, ndim=1]dual_coef, 202 | double alpha1, 203 | double alpha2, 204 | int loss_func, 205 | double gamma, 206 | int max_iter, 207 | double tol, 208 | callback, 209 | int n_calls, 210 | int verbose, 211 | rng): 212 | 213 | cdef int n_samples = X.get_n_samples() 214 | cdef int n_features = X.get_n_features() 215 | 216 | # Variables 217 | cdef double sigma, scale, primal, dual, regul, gap 218 | cdef int it, ii, i 219 | cdef int has_callback = callback is not None 220 | cdef LONG t 221 | 222 | # Pre-compute square norms. 223 | cdef np.ndarray[double, ndim=1, mode='c'] sqnorms 224 | sqnorms = np.zeros(n_samples, dtype=np.float64) 225 | _sqnorms(X, sqnorms) 226 | 227 | # Pointers 228 | cdef double* w = coef.data 229 | cdef double* dcoef = dual_coef.data 230 | cdef np.ndarray[double, ndim=1] v_data 231 | v_data = np.zeros(n_features, dtype=np.float64) 232 | cdef double* v = v_data.data 233 | cdef np.ndarray[int, ndim=1] sindices 234 | sindices = np.arange(n_samples, dtype=np.int32) 235 | 236 | # Data pointers. 237 | cdef double* data 238 | cdef int* indices 239 | cdef int n_nz 240 | 241 | if alpha1 > 0: # Elastic-net case 242 | sigma = alpha1 / alpha2 243 | else: # L2-only case 244 | sigma = 0 245 | 246 | scale = 1. / (alpha2 * n_samples) 247 | 248 | dual = 0 249 | regul = 0 250 | 251 | t = 0 252 | for it in range(max_iter): 253 | primal = 0 254 | 255 | rng.shuffle(sindices) 256 | 257 | for ii in range(n_samples): 258 | 259 | i = sindices[ii] 260 | 261 | if sqnorms[i] == 0: 262 | continue 263 | 264 | # Retrieve row. 265 | X.get_row_ptr(i, &indices, &data, &n_nz) 266 | 267 | _solve_subproblem(data, indices, n_nz, y[i], w, v, dcoef + i, 268 | loss_func, sqnorms[i], scale, sigma, gamma, 269 | &primal, &dual, ®ul) 270 | 271 | if has_callback and t % n_calls == 0: 272 | ret = callback(self) 273 | if ret is not None: 274 | break 275 | 276 | t += 1 277 | 278 | # end for ii in range(n_samples) 279 | 280 | gap = (primal - dual) / n_samples + alpha2 * regul 281 | gap = fabs(gap) 282 | 283 | if verbose: 284 | print("iter", it + 1, gap) 285 | 286 | if gap <= tol: 287 | if verbose: 288 | print("Converged") 289 | break 290 | 291 | # for it in range(max_iter) 292 | 293 | for i in range(n_samples): 294 | dcoef[i] *= scale 295 | -------------------------------------------------------------------------------- /lightning/impl/setup.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | 3 | from numpy import get_include 4 | from numpy.distutils.core import setup 5 | from numpy.distutils.misc_util import Configuration 6 | 7 | 8 | def configuration(parent_package='', top_path=None): 9 | config = Configuration('impl', parent_package, top_path) 10 | 11 | randomdir = os.path.join(top_path, "lightning", "impl", "randomkit") 12 | currdir = os.path.dirname(os.path.abspath(__file__)) 13 | 14 | files = [ 15 | 'adagrad_fast', 16 | 'dataset_fast', 17 | 'dual_cd_fast', 18 | 'loss_fast', 19 | 'prank_fast', 20 | 'primal_cd_fast', 21 | 'prox_fast', 22 | 'sag_fast', 23 | 'sdca_fast', 24 | 'sgd_fast', 25 | 'svrg_fast', 26 | ] 27 | for f in files: 28 | config.add_extension(f, 29 | sources=[f'{f}.pyx'], 30 | language='c++', 31 | include_dirs=[get_include(), randomdir]) 32 | 33 | # add .pxd files to be re-used by third party software 34 | pxd_file = os.path.join(currdir, f'{f}.pxd') 35 | if os.path.exists(pxd_file): 36 | config.add_data_files(f'{f}.pxd') 37 | 38 | config.add_subpackage('datasets') 39 | config.add_subpackage('randomkit') 40 | config.add_subpackage('tests') 41 | 42 | return config 43 | 44 | if __name__ == '__main__': 45 | setup(**configuration(top_path='').todict()) 46 | -------------------------------------------------------------------------------- /lightning/impl/sgd_fast.pxd: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # cython: cdivision=True 3 | # cython: boundscheck=False 4 | # cython: wraparound=False 5 | # cython: language_level=3 6 | # 7 | # Author: Mathieu Blondel 8 | # Peter Prettenhofer (loss functions) 9 | # License: BSD 10 | 11 | cdef class LossFunction: 12 | 13 | cpdef double loss(self, double p, double y) 14 | cpdef double get_update(self, double p, double y) 15 | 16 | 17 | cdef class ModifiedHuber(LossFunction): 18 | 19 | cpdef double loss(self, double p, double y) 20 | 21 | cpdef double get_update(self, double p, double y) 22 | 23 | 24 | cdef class Hinge(LossFunction): 25 | 26 | cdef double threshold 27 | 28 | cpdef double loss(self, double p, double y) 29 | 30 | cpdef double get_update(self, double p, double y) 31 | 32 | 33 | cdef class SmoothHinge(LossFunction): 34 | 35 | cdef double gamma 36 | 37 | cpdef double loss(self, double p, double y) 38 | 39 | cpdef double get_update(self, double p, double y) 40 | 41 | 42 | cdef class SquaredHinge(LossFunction): 43 | 44 | cdef double threshold 45 | 46 | cpdef double loss(self, double p, double y) 47 | 48 | cpdef double get_update(self, double p, double y) 49 | 50 | 51 | cdef class Log(LossFunction): 52 | 53 | cpdef double loss(self, double p, double y) 54 | 55 | cpdef double get_update(self, double p, double y) 56 | 57 | 58 | cdef class SquaredLoss(LossFunction): 59 | 60 | cpdef double loss(self, double p, double y) 61 | 62 | cpdef double get_update(self, double p, double y) 63 | 64 | 65 | cdef class Huber(LossFunction): 66 | 67 | cdef double c 68 | 69 | cpdef double loss(self, double p, double y) 70 | 71 | cpdef double get_update(self, double p, double y) 72 | 73 | 74 | cdef class EpsilonInsensitive(LossFunction): 75 | 76 | cdef double epsilon 77 | 78 | cpdef double loss(self, double p, double y) 79 | 80 | cpdef double get_update(self, double p, double y) 81 | 82 | -------------------------------------------------------------------------------- /lightning/impl/svrg.py: -------------------------------------------------------------------------------- 1 | # Author: Mathieu Blondel 2 | # License: BSD 3 | 4 | import numpy as np 5 | 6 | from .base import BaseClassifier, BaseRegressor 7 | from .dataset_fast import get_dataset 8 | from .svrg_fast import _svrg_fit 9 | 10 | from .sgd_fast import ModifiedHuber 11 | from .sgd_fast import SmoothHinge 12 | from .sgd_fast import SquaredHinge 13 | from .sgd_fast import Log 14 | from .sgd_fast import SquaredLoss 15 | 16 | 17 | class _BaseSVRG(object): 18 | 19 | def _finalize_coef(self): 20 | self.coef_ *= self.coef_scale_ 21 | self.coef_scale_.fill(1.0) 22 | 23 | def _fit(self, X, Y): 24 | n_samples, n_features = X.shape 25 | rng = self._get_random_state() 26 | loss = self._get_loss() 27 | n_vectors = Y.shape[1] 28 | n_inner = int(self.n_inner * n_samples) 29 | ds = get_dataset(X, order="c") 30 | 31 | self.coef_ = np.zeros((n_vectors, n_features), dtype=np.float64) 32 | full_grad = np.zeros_like(self.coef_) 33 | grad = np.zeros((n_vectors, n_samples), dtype=np.float64) 34 | self.coef_scale_ = np.ones(n_vectors, dtype=np.float64) 35 | 36 | for i in range(n_vectors): 37 | y = Y[:, i] 38 | 39 | _svrg_fit(self, ds, y, self.coef_[i], self.coef_scale_[i:], 40 | full_grad[i], grad[i], self.eta, self.alpha, loss, 41 | self.max_iter, n_inner, self.tol, self.verbose, 42 | self.callback, rng) 43 | 44 | return self 45 | 46 | 47 | class SVRGClassifier(BaseClassifier, _BaseSVRG): 48 | r""" 49 | Estimator for learning linear classifiers by SVRG. 50 | 51 | Solves the following objective: 52 | 53 | .. code-block:: 54 | 55 | minimize_w 1 / n_samples * \sum_i loss(w^T x_i, y_i) 56 | + alpha * 0.5 * ||w||^2_2 57 | """ 58 | 59 | def __init__(self, eta=1.0, alpha=1.0, loss="smooth_hinge", gamma=1.0, 60 | max_iter=10, n_inner=1.0, tol=1e-3, verbose=0, 61 | callback=None, random_state=None): 62 | self.eta = eta 63 | self.alpha = alpha 64 | self.loss = loss 65 | self.gamma = gamma 66 | self.max_iter = max_iter 67 | self.n_inner = n_inner 68 | self.tol = tol 69 | self.verbose = verbose 70 | self.callback = callback 71 | self.random_state = random_state 72 | 73 | def _get_loss(self): 74 | losses = { 75 | "modified_huber": ModifiedHuber(), 76 | "smooth_hinge": SmoothHinge(self.gamma), 77 | "squared_hinge": SquaredHinge(1.0), 78 | "log": Log(), 79 | "squared": SquaredLoss(), 80 | } 81 | return losses[self.loss] 82 | 83 | def fit(self, X, y): 84 | self._set_label_transformers(y) 85 | Y = np.asfortranarray(self.label_binarizer_.transform(y), 86 | dtype=np.float64) 87 | return self._fit(X, Y) 88 | 89 | 90 | class SVRGRegressor(BaseRegressor, _BaseSVRG): 91 | r""" 92 | Estimator for learning linear regressors by SVRG. 93 | 94 | Solves the following objective: 95 | 96 | .. code-block:: 97 | 98 | minimize_w 1 / n_samples * \sum_i loss(w^T x_i, y_i) 99 | + alpha * 0.5 * ||w||^2_2 100 | """ 101 | 102 | def __init__(self, eta=1.0, alpha=1.0, loss="squared", gamma=1.0, 103 | max_iter=10, n_inner=1.0, tol=1e-3, verbose=0, 104 | callback=None, random_state=None): 105 | self.eta = eta 106 | self.alpha = alpha 107 | self.loss = loss 108 | self.gamma = gamma 109 | self.max_iter = max_iter 110 | self.n_inner = n_inner 111 | self.tol = tol 112 | self.verbose = verbose 113 | self.callback = callback 114 | self.random_state = random_state 115 | 116 | def _get_loss(self): 117 | losses = { 118 | "squared": SquaredLoss(), 119 | } 120 | return losses[self.loss] 121 | 122 | def fit(self, X, y): 123 | self.outputs_2d_ = len(y.shape) > 1 124 | Y = y.reshape(-1, 1) if not self.outputs_2d_ else y 125 | Y = Y.astype(np.float64) 126 | return self._fit(X, Y) 127 | -------------------------------------------------------------------------------- /lightning/impl/svrg_fast.pyx: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # cython: cdivision=True 3 | # cython: boundscheck=False 4 | # cython: wraparound=False 5 | # cython: language_level=3 6 | # 7 | # Author: Mathieu Blondel 8 | # License: BSD 9 | 10 | import numpy as np 11 | cimport numpy as np 12 | 13 | ctypedef np.int64_t LONG 14 | 15 | from libc.math cimport sqrt 16 | 17 | from lightning.impl.randomkit.random_fast cimport RandomState 18 | from lightning.impl.dataset_fast cimport RowDataset 19 | from lightning.impl.sgd_fast cimport LossFunction 20 | 21 | 22 | cdef double _pred(double* data, 23 | int* indices, 24 | int n_nz, 25 | double* w): 26 | 27 | cdef int j, jj 28 | cdef double dot = 0 29 | 30 | for jj in range(n_nz): 31 | j = indices[jj] 32 | dot += w[j] * data[jj] 33 | 34 | return dot 35 | 36 | 37 | cdef void _add(double* data, 38 | int* indices, 39 | int n_nz, 40 | double scale, 41 | double* w): 42 | cdef int jj, j 43 | 44 | for jj in range(n_nz): 45 | j = indices[jj] 46 | w[j] += scale * data[jj] 47 | 48 | 49 | def _svrg_fit(self, 50 | RowDataset X, 51 | np.ndarray[double, ndim=1]y, 52 | np.ndarray[double, ndim=1]coef, 53 | np.ndarray[double, ndim=1]coef_scale, 54 | np.ndarray[double, ndim=1]full_grad, 55 | np.ndarray[double, ndim=1]grad, 56 | double eta, 57 | double alpha, 58 | LossFunction loss, 59 | int max_iter, 60 | int n_inner, 61 | double tol, 62 | int verbose, 63 | callback, 64 | RandomState rng): 65 | 66 | cdef int n_samples = X.get_n_samples() 67 | cdef int n_features = X.get_n_features() 68 | 69 | # Variables. 70 | cdef int i, jj, j, it, t 71 | cdef double y_pred, scale, tmp, alpha_scaled 72 | cdef double violation, violation_init, violation_ratio 73 | cdef double eta_avg = eta / n_samples 74 | cdef double eta_alpha = eta * alpha 75 | cdef int has_callback = callback is not None 76 | 77 | # Data pointers. 78 | cdef double* data 79 | cdef int* indices 80 | cdef int n_nz 81 | 82 | # Buffers and pointers. 83 | cdef np.ndarray[int, ndim=1]last = np.zeros(n_features, dtype=np.int32) 84 | cdef double* w = coef.data 85 | cdef double* w_scale = coef_scale.data 86 | cdef double* fg = full_grad.data 87 | cdef double* g = grad.data 88 | 89 | for it in range(max_iter): 90 | 91 | # Reset full gradient 92 | for j in range(n_features): 93 | fg[j] = 0 94 | 95 | # Compute full gradient. 96 | for i in range(n_samples): 97 | 98 | # Retrieve sample i. 99 | X.get_row_ptr(i, &indices, &data, &n_nz) 100 | 101 | # Make prediction. 102 | y_pred = _pred(data, indices, n_nz, w) * w_scale[0] 103 | 104 | # A gradient is given by g[i] * X[i]. 105 | g[i] = -loss.get_update(y_pred, y[i]) 106 | 107 | _add(data, indices, n_nz, g[i], fg) 108 | 109 | # Compute optimality violation. 110 | violation = 0 111 | alpha_scaled = alpha * w_scale[0] 112 | for j in range(n_features): 113 | tmp = fg[j] / n_samples + alpha_scaled * w[j] 114 | violation += tmp * tmp 115 | violation = sqrt(violation) 116 | 117 | # Convergence monitoring. 118 | if it == 0: 119 | violation_init = violation 120 | 121 | violation_ratio = violation / violation_init 122 | 123 | if verbose: 124 | print(it + 1, violation_ratio) 125 | 126 | if violation_ratio <= tol: 127 | if verbose: 128 | print("Converged") 129 | break 130 | 131 | # Inner loop. 132 | for t in range(n_inner): 133 | i = rng.randint(n_samples - 1) 134 | 135 | # Retrieve sample i. 136 | X.get_row_ptr(i, &indices, &data, &n_nz) 137 | 138 | # Add deterministic part, just in time. 139 | if t > 0: 140 | for jj in range(n_nz): 141 | j = indices[jj] 142 | w[j] -= eta_avg / w_scale[0] * (t - last[j]) * fg[j] 143 | last[j] = t 144 | 145 | # Make prediction. 146 | y_pred = _pred(data, indices, n_nz, w) * w_scale[0] 147 | 148 | # A gradient is given by scale * X[i]. 149 | scale = -loss.get_update(y_pred, y[i]) 150 | 151 | w_scale[0] *= (1 - eta_alpha) 152 | 153 | # Add deterministic part. 154 | #for j in range(n_features): 155 | #w[j] -= eta_avg / w_scale * fg[j] 156 | 157 | # Add stochastic part. 158 | _add(data, indices, n_nz, eta * (g[i] - scale) / w_scale[0], w) 159 | 160 | # Take care of possible underflows. 161 | if w_scale[0] < 1e-9: 162 | for j in range(n_features): 163 | w[j] *= w_scale[0] 164 | w_scale[0] = 1.0 165 | 166 | # Finalize. 167 | for j in range(n_features): 168 | w[j] -= eta_avg / w_scale[0] * (n_inner - last[j]) * fg[j] 169 | last[j] = 0 170 | 171 | # Callback. 172 | if has_callback: 173 | ret = callback(self) 174 | if ret is not None: 175 | break 176 | 177 | # Rescale coefficients. 178 | for j in range(n_features): 179 | w[j] *= w_scale[0] 180 | w_scale[0] = 1.0 181 | -------------------------------------------------------------------------------- /lightning/impl/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/lightning/dbbe833963280e675c124bbd5caadfcb13d89bd7/lightning/impl/tests/__init__.py -------------------------------------------------------------------------------- /lightning/impl/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import scipy.sparse as sp 3 | 4 | from sklearn.datasets import load_iris 5 | 6 | from lightning.impl.datasets.samples_generator import make_classification 7 | 8 | 9 | @pytest.fixture(scope="module") 10 | def train_data(): 11 | iris = load_iris() 12 | return iris.data, iris.target 13 | 14 | 15 | @pytest.fixture(scope="module") 16 | def bin_train_data(train_data): 17 | X, y = train_data 18 | X_bin = X[y <= 1] 19 | y_bin = y[y <= 1] * 2 - 1 20 | return X_bin, y_bin 21 | 22 | 23 | @pytest.fixture(scope="module") 24 | def bin_dense_train_data(): 25 | bin_dense, bin_target = make_classification(n_samples=200, n_features=100, 26 | n_informative=5, 27 | n_classes=2, random_state=0) 28 | return bin_dense, bin_target 29 | 30 | 31 | @pytest.fixture(scope="module") 32 | def bin_sparse_train_data(bin_dense_train_data): 33 | bin_dense, bin_target = bin_dense_train_data 34 | bin_csr = sp.csr_matrix(bin_dense) 35 | return bin_csr, bin_target 36 | 37 | 38 | @pytest.fixture(scope="module") 39 | def mult_dense_train_data(): 40 | mult_dense, mult_target = make_classification(n_samples=300, n_features=100, 41 | n_informative=5, 42 | n_classes=3, random_state=0) 43 | return mult_dense, mult_target 44 | 45 | 46 | @pytest.fixture(scope="module") 47 | def mult_sparse_train_data(mult_dense_train_data): 48 | mult_dense, mult_target = mult_dense_train_data 49 | mult_sparse = sp.csr_matrix(mult_dense) 50 | return mult_sparse, mult_target 51 | -------------------------------------------------------------------------------- /lightning/impl/tests/test_adagrad.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from lightning.classification import AdaGradClassifier 5 | from lightning.regression import AdaGradRegressor 6 | from lightning.impl.adagrad_fast import _proj_elastic_all 7 | from lightning.impl.tests.utils import check_predict_proba 8 | 9 | 10 | def test_adagrad_elastic_hinge(bin_train_data): 11 | X_bin, y_bin = bin_train_data 12 | clf = AdaGradClassifier(alpha=0.5, l1_ratio=0.85, n_iter=10, random_state=0) 13 | clf.fit(X_bin, y_bin) 14 | assert not hasattr(clf, "predict_proba") 15 | assert clf.score(X_bin, y_bin) == 1.0 16 | 17 | 18 | def test_adagrad_elastic_smooth_hinge(bin_train_data): 19 | X_bin, y_bin = bin_train_data 20 | clf = AdaGradClassifier(alpha=0.5, l1_ratio=0.85, loss="smooth_hinge", 21 | n_iter=10, random_state=0) 22 | clf.fit(X_bin, y_bin) 23 | assert not hasattr(clf, "predict_proba") 24 | assert clf.score(X_bin, y_bin) == 1.0 25 | 26 | 27 | def test_adagrad_elastic_log(bin_train_data): 28 | X_bin, y_bin = bin_train_data 29 | clf = AdaGradClassifier(alpha=0.1, l1_ratio=0.85, loss="log", n_iter=10, 30 | random_state=0) 31 | clf.fit(X_bin, y_bin) 32 | assert clf.score(X_bin, y_bin) == 1.0 33 | check_predict_proba(clf, X_bin) 34 | 35 | 36 | def test_adagrad_hinge_multiclass(train_data): 37 | X, y = train_data 38 | clf = AdaGradClassifier(alpha=1e-2, n_iter=100, loss="hinge", random_state=0) 39 | clf.fit(X, y) 40 | assert not hasattr(clf, "predict_proba") 41 | np.testing.assert_almost_equal(clf.score(X, y), 0.940, 3) 42 | 43 | 44 | def test_adagrad_classes_binary(bin_train_data): 45 | X_bin, y_bin = bin_train_data 46 | clf = AdaGradClassifier() 47 | assert not hasattr(clf, 'classes_') 48 | clf.fit(X_bin, y_bin) 49 | assert list(clf.classes_) == [-1, 1] 50 | 51 | 52 | def test_adagrad_classes_multiclass(train_data): 53 | X, y = train_data 54 | clf = AdaGradClassifier() 55 | assert not hasattr(clf, 'classes_') 56 | clf.fit(X, y) 57 | assert list(clf.classes_) == [0, 1, 2] 58 | 59 | 60 | def test_adagrad_callback(bin_train_data): 61 | class Callback(object): 62 | 63 | def __init__(self, X, y): 64 | self.X = X 65 | self.y = y 66 | self.acc = [] 67 | 68 | def __call__(self, clf, t): 69 | alpha1 = clf.l1_ratio * clf.alpha 70 | alpha2 = (1 - clf.l1_ratio) * clf.alpha 71 | _proj_elastic_all(clf.eta, t, clf.g_sum_[0], clf.g_norms_[0], 72 | alpha1, alpha2, 0, clf.coef_[0]) 73 | score = clf.score(self.X, self.y) 74 | self.acc.append(score) 75 | 76 | X_bin, y_bin = bin_train_data 77 | cb = Callback(X_bin, y_bin) 78 | clf = AdaGradClassifier(alpha=0.5, l1_ratio=0.85, n_iter=10, 79 | callback=cb, random_state=0) 80 | clf.fit(X_bin, y_bin) 81 | assert cb.acc[-1] == 1.0 82 | 83 | 84 | @pytest.mark.parametrize("loss", ["squared", "absolute"]) 85 | def test_adagrad_regression(loss, bin_train_data): 86 | X_bin, y_bin = bin_train_data 87 | reg = AdaGradRegressor(loss=loss) 88 | reg.fit(X_bin, y_bin) 89 | y_pred = np.sign(reg.predict(X_bin)) 90 | assert np.mean(y_bin == y_pred) == 1.0 91 | -------------------------------------------------------------------------------- /lightning/impl/tests/test_dataset.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import numpy as np 4 | import pytest 5 | import scipy.sparse as sp 6 | 7 | from sklearn.datasets import make_classification 8 | from sklearn.utils import check_random_state 9 | 10 | from lightning.impl.dataset_fast import ContiguousDataset 11 | from lightning.impl.dataset_fast import FortranDataset 12 | from lightning.impl.dataset_fast import CSRDataset 13 | from lightning.impl.dataset_fast import CSCDataset 14 | 15 | 16 | @pytest.fixture(scope="module") 17 | def test_data(): 18 | X, _ = make_classification(n_samples=20, n_features=100, 19 | n_informative=5, n_classes=2, random_state=0) 20 | X2, _ = make_classification(n_samples=10, n_features=100, 21 | n_informative=5, n_classes=2, random_state=0) 22 | 23 | # Sparsify datasets. 24 | X[X < 0.3] = 0 25 | 26 | X_csr = sp.csr_matrix(X) 27 | X_csc = sp.csc_matrix(X) 28 | 29 | rs = check_random_state(0) 30 | cds = ContiguousDataset(X) 31 | fds = FortranDataset(np.asfortranarray(X)) 32 | csr_ds = CSRDataset(X_csr) 33 | csc_ds = CSCDataset(X_csc) 34 | 35 | return { 36 | "X": X, 37 | "X_csr": X_csr, 38 | "X_csc": X_csc, 39 | "contiguous_dataset": cds, 40 | "fortran_dataset": fds, 41 | "dataset_csr": csr_ds, 42 | "dataset_csc": csc_ds 43 | } 44 | 45 | 46 | def test_contiguous_get_row(test_data): 47 | X = test_data["X"] 48 | cds = test_data["contiguous_dataset"] 49 | ind = np.arange(X.shape[1]) 50 | for i in range(X.shape[0]): 51 | indices, data, n_nz = cds.get_row(i) 52 | np.testing.assert_array_equal(indices, ind) 53 | np.testing.assert_array_equal(data, X[i]) 54 | assert n_nz == X.shape[1] 55 | 56 | 57 | def test_csr_get_row(test_data): 58 | X = test_data["X"] 59 | csr_ds = test_data["dataset_csr"] 60 | for i in range(X.shape[0]): 61 | indices, data, n_nz = csr_ds.get_row(i) 62 | for jj in range(n_nz): 63 | j = indices[jj] 64 | assert X[i, j] == data[jj] 65 | 66 | 67 | def test_fortran_get_column(test_data): 68 | X = test_data["X"] 69 | fds = test_data["fortran_dataset"] 70 | ind = np.arange(X.shape[0]) 71 | for j in range(X.shape[1]): 72 | indices, data, n_nz = fds.get_column(j) 73 | np.testing.assert_array_equal(indices, ind) 74 | np.testing.assert_array_equal(data, X[:, j]) 75 | assert n_nz == X.shape[0] 76 | 77 | 78 | def test_csc_get_column(test_data): 79 | X = test_data["X"] 80 | csc_ds = test_data["dataset_csc"] 81 | for j in range(X.shape[1]): 82 | indices, data, n_nz = csc_ds.get_column(j) 83 | for ii in range(n_nz): 84 | i = indices[ii] 85 | assert X[i, j] == data[ii] 86 | 87 | 88 | def test_picklable_datasets(test_data): 89 | # Test that the datasets are picklable. 90 | X = test_data["X"] 91 | for dataset in [ 92 | test_data["contiguous_dataset"], 93 | test_data["dataset_csr"], 94 | test_data["fortran_dataset"], 95 | test_data["dataset_csc"] 96 | ]: 97 | pds = pickle.dumps(dataset) 98 | dataset = pickle.loads(pds) 99 | assert dataset.get_n_samples() == X.shape[0] 100 | assert dataset.get_n_features() == X.shape[1] 101 | -------------------------------------------------------------------------------- /lightning/impl/tests/test_dual_cd.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from sklearn.metrics.pairwise import linear_kernel 5 | from sklearn.datasets import make_regression 6 | 7 | from lightning.impl.dual_cd import LinearSVC 8 | from lightning.impl.dual_cd import LinearSVR 9 | from lightning.impl.dual_cd_fast import sparse_dot 10 | from lightning.impl.dataset_fast import get_dataset 11 | 12 | 13 | @pytest.fixture(scope="module") 14 | def reg_train_data(): 15 | reg_dense, reg_target = make_regression(n_samples=200, n_features=100, 16 | n_informative=5, random_state=0) 17 | return reg_dense, reg_target 18 | 19 | 20 | @pytest.mark.parametrize("data", ["bin_dense_train_data", "bin_sparse_train_data"]) 21 | def test_sparse_dot(data, request): 22 | X, _ = request.getfixturevalue(data) 23 | K = linear_kernel(X) 24 | K2 = np.zeros_like(K) 25 | ds = get_dataset(X) 26 | 27 | for i in range(X.shape[0]): 28 | for j in range(i, X.shape[0]): 29 | K2[i, j] = sparse_dot(ds, i, j) 30 | K2[j, i] = K[i, j] 31 | 32 | np.testing.assert_array_almost_equal(K, K2) 33 | 34 | 35 | @pytest.mark.parametrize("data", ["bin_dense_train_data", "bin_sparse_train_data"]) 36 | @pytest.mark.parametrize("loss", ["l1", "l2"]) 37 | def test_fit_linear_binary(data, loss, request): 38 | X, y = request.getfixturevalue(data) 39 | clf = LinearSVC(loss=loss, random_state=0, max_iter=10) 40 | clf.fit(X, y) 41 | assert list(clf.classes_) == [0, 1] 42 | assert clf.score(X, y) == 1.0 43 | y_pred = clf.decision_function(X).ravel() 44 | 45 | 46 | @pytest.mark.parametrize("data", ["bin_dense_train_data", "bin_sparse_train_data"]) 47 | @pytest.mark.parametrize("loss", ["l1", "l2"]) 48 | def test_fit_linear_binary_auc(data, loss, request): 49 | X, y = request.getfixturevalue(data) 50 | clf = LinearSVC(loss=loss, criterion="auc", random_state=0, max_iter=25) 51 | clf.fit(X, y) 52 | assert clf.score(X, y) == 1.0 53 | 54 | 55 | @pytest.mark.parametrize("data", ["mult_dense_train_data", "mult_sparse_train_data"]) 56 | def test_fit_linear_multi(data, request): 57 | X, y = request.getfixturevalue(data) 58 | clf = LinearSVC(random_state=0) 59 | clf.fit(X, y) 60 | assert list(clf.classes_) == [0, 1, 2] 61 | y_pred = clf.predict(X) 62 | acc = np.mean(y_pred == y) 63 | assert acc > 0.85 64 | 65 | 66 | @pytest.mark.parametrize("C", [0.1, 0.2]) 67 | def test_warm_start(bin_dense_train_data, C): 68 | bin_dense, bin_target = bin_dense_train_data 69 | clf = LinearSVC(warm_start=True, loss="l1", random_state=0, max_iter=100) 70 | clf.C = C 71 | clf.fit(bin_dense, bin_target) 72 | acc = clf.score(bin_dense, bin_target) 73 | assert acc > 0.99 74 | 75 | 76 | @pytest.mark.parametrize("fit_intercept", [True, False]) 77 | @pytest.mark.parametrize("loss", ["epsilon_insensitive", "l2"]) 78 | def test_linear_svr(reg_train_data, fit_intercept, loss): 79 | reg_dense, reg_target = reg_train_data 80 | reg = LinearSVR(random_state=0, fit_intercept=fit_intercept, loss=loss) 81 | reg.fit(reg_dense, reg_target) 82 | assert reg.score(reg_dense, reg_target) > 0.99 83 | 84 | 85 | @pytest.mark.parametrize("C, warm_start", [(1e-3, True), (1, False)]) 86 | def test_linear_svr_warm_start(reg_train_data, C, warm_start): 87 | reg_dense, reg_target = reg_train_data 88 | reg = LinearSVR(C=C, random_state=0, warm_start=warm_start) 89 | reg.fit(reg_dense, reg_target) 90 | target_score = 0.99 if C == 1 else 0.96 91 | assert reg.score(reg_dense, reg_target) > target_score 92 | -------------------------------------------------------------------------------- /lightning/impl/tests/test_fista.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from scipy.linalg import svd, diagsvd 5 | 6 | from lightning.classification import FistaClassifier 7 | from lightning.regression import FistaRegressor 8 | from lightning.impl.penalty import project_simplex, L1Penalty 9 | 10 | 11 | @pytest.fixture(scope="module") 12 | def bin_dense_train_data(bin_dense_train_data): 13 | bin_dense, bin_target = bin_dense_train_data 14 | bin_target = bin_target * 2 - 1 15 | return bin_dense, bin_target 16 | 17 | 18 | @pytest.mark.parametrize("data", ["mult_dense_train_data", "mult_sparse_train_data"]) 19 | def test_fista_multiclass_l1l2(data, request): 20 | X, y = request.getfixturevalue(data) 21 | clf = FistaClassifier(max_iter=200, penalty="l1/l2", multiclass=True) 22 | clf.fit(X, y) 23 | np.testing.assert_almost_equal(clf.score(X, y), 0.99, 2) 24 | 25 | 26 | @pytest.mark.parametrize("data", ["mult_dense_train_data", "mult_sparse_train_data"]) 27 | def test_fista_multiclass_l1l2_log(data, request): 28 | X, y = request.getfixturevalue(data) 29 | clf = FistaClassifier(max_iter=200, penalty="l1/l2", loss="log", 30 | multiclass=True) 31 | clf.fit(X, y) 32 | np.testing.assert_almost_equal(clf.score(X, y), 0.90, 2) 33 | 34 | 35 | @pytest.mark.parametrize("data", ["mult_dense_train_data", "mult_sparse_train_data"]) 36 | def test_fista_multiclass_l1l2_log_margin(data, request): 37 | X, y = request.getfixturevalue(data) 38 | clf = FistaClassifier(max_iter=200, penalty="l1/l2", loss="log_margin", 39 | multiclass=True) 40 | clf.fit(X, y) 41 | np.testing.assert_almost_equal(clf.score(X, y), 0.93, 2) 42 | 43 | 44 | @pytest.mark.parametrize("data", ["mult_dense_train_data", "mult_sparse_train_data"]) 45 | def test_fista_multiclass_l1(data, request): 46 | X, y = request.getfixturevalue(data) 47 | clf = FistaClassifier(max_iter=200, penalty="l1", multiclass=True) 48 | clf.fit(X, y) 49 | np.testing.assert_almost_equal(clf.score(X, y), 0.98, 2) 50 | 51 | 52 | @pytest.mark.parametrize("data", ["mult_dense_train_data", "mult_sparse_train_data"]) 53 | def test_fista_multiclass_tv1d(data, request): 54 | X, y = request.getfixturevalue(data) 55 | clf = FistaClassifier(max_iter=200, penalty="tv1d", multiclass=True) 56 | clf.fit(X, y) 57 | np.testing.assert_almost_equal(clf.score(X, y), 0.97, 2) 58 | 59 | # adding a lot of regularization coef_ should be constant 60 | clf = FistaClassifier(max_iter=200, penalty="tv1d", multiclass=True, alpha=1e6) 61 | clf.fit(X, y) 62 | for i in range(clf.coef_.shape[0]): 63 | np.testing.assert_array_almost_equal( 64 | clf.coef_[i], np.mean(clf.coef_[i]) * np.ones(X.shape[1])) 65 | 66 | 67 | @pytest.mark.parametrize("data", ["mult_dense_train_data", "mult_sparse_train_data"]) 68 | @pytest.mark.parametrize("penalty", ["l1/l2", "l1"]) 69 | def test_fista_multiclass_no_line_search(data, penalty, request): 70 | X, y = request.getfixturevalue(data) 71 | clf = FistaClassifier(max_iter=500, penalty=penalty, multiclass=True, 72 | max_steps=0) 73 | clf.fit(X, y) 74 | np.testing.assert_almost_equal(clf.score(X, y), 0.94, 2) 75 | 76 | 77 | @pytest.mark.parametrize("data", ["bin_dense_train_data", "bin_sparse_train_data"]) 78 | def test_fista_bin_l1(data, request): 79 | X, y = request.getfixturevalue(data) 80 | clf = FistaClassifier(max_iter=200, penalty="l1") 81 | clf.fit(X, y) 82 | np.testing.assert_almost_equal(clf.score(X, y), 1.0, 2) 83 | 84 | 85 | @pytest.mark.parametrize("data", ["bin_dense_train_data", "bin_sparse_train_data"]) 86 | def test_fista_bin_l1_no_line_search(data, request): 87 | X, y = request.getfixturevalue(data) 88 | clf = FistaClassifier(max_iter=500, penalty="l1", max_steps=0) 89 | clf.fit(X, y) 90 | np.testing.assert_almost_equal(clf.score(X, y), 1.0, 2) 91 | 92 | 93 | @pytest.mark.parametrize("data", ["mult_dense_train_data", "mult_sparse_train_data"]) 94 | def test_fista_multiclass_trace(data, request): 95 | X, y = request.getfixturevalue(data) 96 | clf = FistaClassifier(max_iter=100, penalty="trace", multiclass=True) 97 | clf.fit(X, y) 98 | np.testing.assert_almost_equal(clf.score(X, y), 0.96, 2) 99 | 100 | 101 | def test_fista_bin_classes(bin_dense_train_data): 102 | X, y = bin_dense_train_data 103 | clf = FistaClassifier() 104 | clf.fit(X, y) 105 | assert list(clf.classes_) == [0, 1] 106 | 107 | 108 | def test_fista_multiclass_classes(mult_dense_train_data): 109 | X, y = mult_dense_train_data 110 | clf = FistaClassifier() 111 | clf.fit(X, y) 112 | assert list(clf.classes_) == [0, 1, 2] 113 | 114 | 115 | def test_fista_regression(bin_dense_train_data): 116 | X, y = bin_dense_train_data 117 | reg = FistaRegressor(max_iter=100, verbose=0) 118 | reg.fit(X, y) 119 | y_pred = np.sign(reg.predict(X)) 120 | np.testing.assert_almost_equal(np.mean(y == y_pred), 0.985) 121 | 122 | 123 | def test_fista_regression_simplex(): 124 | rng = np.random.RandomState(0) 125 | w = project_simplex(rng.rand(10)) 126 | X = rng.randn(1000, 10) 127 | y = np.dot(X, w) 128 | 129 | reg = FistaRegressor(penalty="simplex", max_iter=100, verbose=0) 130 | reg.fit(X, y) 131 | y_pred = reg.predict(X) 132 | error = np.sqrt(np.mean((y - y_pred) ** 2)) 133 | np.testing.assert_almost_equal(error, 0.000, 3) 134 | assert np.all(reg.coef_ >= -1e-12) 135 | np.testing.assert_almost_equal(np.sum(reg.coef_), 1.0, 3) 136 | 137 | 138 | def test_fista_regression_l1_ball(): 139 | rng = np.random.RandomState(0) 140 | alpha = 5.0 141 | w = project_simplex(rng.randn(10), alpha) 142 | X = rng.randn(1000, 10) 143 | y = np.dot(X, w) 144 | 145 | reg = FistaRegressor(penalty="l1-ball", alpha=alpha, max_iter=100, verbose=0) 146 | reg.fit(X, y) 147 | y_pred = reg.predict(X) 148 | error = np.sqrt(np.mean((y - y_pred) ** 2)) 149 | np.testing.assert_almost_equal(error, 0.000, 3) 150 | np.testing.assert_almost_equal(np.sum(np.abs(reg.coef_)), alpha, 3) 151 | 152 | 153 | def test_fista_regression_trace(): 154 | rng = np.random.RandomState(0) 155 | 156 | def _make_data(n_samples, n_features, n_tasks, n_components): 157 | W = rng.rand(n_tasks, n_features) - 0.5 158 | U, S, V = svd(W, full_matrices=True) 159 | S[n_components:] = 0 160 | S = diagsvd(S, U.shape[0], V.shape[0]) 161 | W = np.dot(np.dot(U, S), V) 162 | X = rng.rand(n_samples, n_features) - 0.5 163 | Y = np.dot(X, W.T) 164 | return X, Y, W 165 | 166 | X, Y, W = _make_data(200, 50, 30, 5) 167 | reg = FistaRegressor(max_iter=15, verbose=0) 168 | reg.fit(X, Y) 169 | Y_pred = reg.predict(X) 170 | error = (Y_pred - Y).ravel() 171 | error = np.dot(error, error) 172 | np.testing.assert_almost_equal(error, 77.44, 2) 173 | 174 | 175 | @pytest.mark.parametrize("data", ["bin_dense_train_data", "bin_sparse_train_data"]) 176 | def test_fista_custom_prox(data, request): 177 | # test FISTA with a custom prox 178 | l1_pen = L1Penalty() 179 | X, y = request.getfixturevalue(data) 180 | clf = FistaClassifier(max_iter=500, penalty="l1", max_steps=0) 181 | clf.fit(X, y) 182 | 183 | clf2 = FistaClassifier(max_iter=500, penalty=l1_pen, max_steps=0) 184 | clf2.fit(X, y) 185 | np.testing.assert_array_almost_equal_nulp(clf.coef_.ravel(), clf2.coef_.ravel()) 186 | -------------------------------------------------------------------------------- /lightning/impl/tests/test_penalty.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from lightning.impl.penalty import project_l1_ball, project_simplex 5 | 6 | 7 | def project_simplex_bisection(v, z=1, tau=0.0001, max_iter=1000): 8 | lower = 0 9 | upper = np.max(v) 10 | current = np.inf 11 | 12 | for it in range(max_iter): 13 | if np.abs(current) / z < tau and current < 0: 14 | break 15 | 16 | theta = (upper + lower) / 2.0 17 | w = np.maximum(v - theta, 0) 18 | current = np.sum(w) - z 19 | if current <= 0: 20 | upper = theta 21 | else: 22 | lower = theta 23 | return w 24 | 25 | 26 | @pytest.mark.parametrize("size, z", [(100, 10), 27 | (3, 1), 28 | (2, 1)]) 29 | def test_proj_simplex(size, z): 30 | rng = np.random.RandomState(0) 31 | 32 | v = rng.rand(size) 33 | w = project_simplex(v, z=z) 34 | w2 = project_simplex_bisection(v, z=z, max_iter=100) 35 | np.testing.assert_array_almost_equal(w, w2, 3) 36 | 37 | 38 | def test_proj_l1_ball(): 39 | rng = np.random.RandomState(0) 40 | v = rng.randn(100) 41 | w = project_l1_ball(v, z=50) 42 | np.testing.assert_almost_equal(np.sum(np.abs(w)), 50) 43 | -------------------------------------------------------------------------------- /lightning/impl/tests/test_prank.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from sklearn.datasets import load_diabetes 5 | 6 | from lightning.ranking import PRank 7 | from lightning.ranking import KernelPRank 8 | 9 | 10 | @pytest.fixture(scope="module") 11 | def train_data(): 12 | bunch = load_diabetes() 13 | X, y = bunch.data, bunch.target 14 | y = np.round(y, decimals=-2) 15 | return X, y 16 | 17 | 18 | def test_prank(train_data): 19 | X, y = train_data 20 | est = PRank(n_iter=10, shuffle=False, random_state=0) 21 | est.fit(X, y) 22 | np.testing.assert_almost_equal(est.score(X, y), 41.86, 2) 23 | 24 | est = PRank(n_iter=10, shuffle=True, random_state=0) 25 | est.fit(X, y) 26 | np.testing.assert_almost_equal(est.score(X, y), 71.04, 2) 27 | 28 | 29 | def test_prank_linear_kernel(train_data): 30 | X, y = train_data 31 | est = KernelPRank(kernel="linear", n_iter=10, shuffle=False, 32 | random_state=0) 33 | est.fit(X, y) 34 | np.testing.assert_almost_equal(est.score(X, y), 41.86, 2) 35 | 36 | 37 | def test_prank_rbf_kernel(train_data): 38 | X, y = train_data 39 | est = KernelPRank(kernel="rbf", gamma=100, n_iter=10, shuffle=False, 40 | random_state=0) 41 | est.fit(X, y) 42 | np.testing.assert_almost_equal(est.score(X, y), 15.84, 2) 43 | -------------------------------------------------------------------------------- /lightning/impl/tests/test_primal_newton.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from lightning.impl.primal_newton import KernelSVC 4 | 5 | 6 | def test_kernel_svc(bin_dense_train_data): 7 | bin_dense, bin_target = bin_dense_train_data 8 | clf = KernelSVC(kernel="rbf", gamma=0.1, random_state=0, verbose=0) 9 | clf.fit(bin_dense, bin_target) 10 | np.testing.assert_almost_equal(clf.score(bin_dense, bin_target), 1.0) 11 | assert list(clf.classes_) == [0, 1] 12 | -------------------------------------------------------------------------------- /lightning/impl/tests/test_prox.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from lightning.impl import prox_fast 5 | 6 | 7 | def test_tv1_denoise(): 8 | # test the prox of TV1D 9 | # since its not trivial to check the KKT conditions 10 | # we check that the proximal point algorithm converges 11 | # to a solution to the TV minimization 12 | n_iter = 100 13 | n_features = 100 14 | 15 | # repeat the test 10 times 16 | for nrun in range(10): 17 | x = np.random.randn(n_features) 18 | for _ in range(n_iter): 19 | prox_fast.prox_tv1d(x, 1.0) 20 | # check that the solution is flat 21 | np.testing.assert_allclose(x, x.mean() * np.ones(n_features)) 22 | 23 | 24 | @pytest.mark.parametrize("dtype", [np.float32, np.float64]) 25 | def test_tv1d_dtype(dtype): 26 | # check that prox_tv1d preserve 32bit 27 | x = np.arange(5) 28 | y = x.astype(dtype, copy=True) 29 | prox_fast.prox_tv1d(y, 0.01) 30 | assert y.dtype == dtype 31 | -------------------------------------------------------------------------------- /lightning/impl/tests/test_sdca.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from lightning.classification import SDCAClassifier 5 | from lightning.regression import SDCARegressor 6 | 7 | 8 | def test_sdca_hinge(bin_train_data): 9 | X_bin, y_bin = bin_train_data 10 | clf = SDCAClassifier(loss="hinge", random_state=0) 11 | clf.fit(X_bin, y_bin) 12 | assert not hasattr(clf, 'predict_proba') 13 | assert clf.score(X_bin, y_bin) == 1.0 14 | 15 | 16 | def test_sdca_hinge_multiclass(train_data): 17 | X, y = train_data 18 | clf = SDCAClassifier(alpha=1e-2, max_iter=100, loss="hinge", 19 | random_state=0) 20 | clf.fit(X, y) 21 | np.testing.assert_almost_equal(clf.score(X, y), 0.933, 3) 22 | 23 | 24 | def test_sdca_squared(bin_train_data): 25 | X_bin, y_bin = bin_train_data 26 | clf = SDCAClassifier(loss="squared", random_state=0) 27 | clf.fit(X_bin, y_bin) 28 | assert not hasattr(clf, 'predict_proba') 29 | assert clf.score(X_bin, y_bin) == 1.0 30 | 31 | 32 | def test_sdca_absolute(bin_train_data): 33 | X_bin, y_bin = bin_train_data 34 | clf = SDCAClassifier(loss="absolute", random_state=0) 35 | clf.fit(X_bin, y_bin) 36 | assert not hasattr(clf, 'predict_proba') 37 | assert clf.score(X_bin, y_bin) == 1.0 38 | 39 | 40 | def test_sdca_hinge_elastic(bin_train_data): 41 | X_bin, y_bin = bin_train_data 42 | clf = SDCAClassifier(alpha=0.5, l1_ratio=0.85, loss="hinge", 43 | random_state=0) 44 | clf.fit(X_bin, y_bin) 45 | assert clf.score(X_bin, y_bin) == 1.0 46 | 47 | 48 | def test_sdca_smooth_hinge_elastic(bin_train_data): 49 | X_bin, y_bin = bin_train_data 50 | clf = SDCAClassifier(alpha=0.5, l1_ratio=0.85, loss="smooth_hinge", 51 | random_state=0) 52 | clf.fit(X_bin, y_bin) 53 | assert not hasattr(clf, 'predict_proba') 54 | assert clf.score(X_bin, y_bin) == 1.0 55 | 56 | 57 | def test_sdca_squared_hinge_elastic(bin_train_data): 58 | X_bin, y_bin = bin_train_data 59 | clf = SDCAClassifier(alpha=0.5, l1_ratio=0.85, loss="squared_hinge", 60 | random_state=0) 61 | clf.fit(X_bin, y_bin) 62 | assert clf.score(X_bin, y_bin) == 1.0 63 | 64 | 65 | def test_sdca_hinge_l1_only(bin_train_data): 66 | X_bin, y_bin = bin_train_data 67 | clf = SDCAClassifier(alpha=0.5, l1_ratio=1.0, loss="hinge", tol=1e-2, 68 | max_iter=200, random_state=0) 69 | clf.fit(X_bin, y_bin) 70 | assert clf.score(X_bin, y_bin) == 1.0 71 | 72 | 73 | def test_sdca_smooth_hinge_l1_only(bin_train_data): 74 | X_bin, y_bin = bin_train_data 75 | clf = SDCAClassifier(alpha=0.5, l1_ratio=1.0, loss="smooth_hinge", 76 | tol=1e-2, max_iter=200, random_state=0) 77 | clf.fit(X_bin, y_bin) 78 | assert clf.score(X_bin, y_bin) == 1.0 79 | 80 | 81 | def test_sdca_squared_l1_only(bin_train_data): 82 | X_bin, y_bin = bin_train_data 83 | clf = SDCAClassifier(alpha=0.5, l1_ratio=1.0, loss="squared", tol=1e-2, 84 | max_iter=100, random_state=0) 85 | clf.fit(X_bin, y_bin) 86 | assert clf.score(X_bin, y_bin) == 1.0 87 | 88 | 89 | def test_sdca_absolute_l1_only(bin_train_data): 90 | X_bin, y_bin = bin_train_data 91 | clf = SDCAClassifier(alpha=0.5, l1_ratio=1.0, loss="absolute", 92 | tol=1e-2, max_iter=200, random_state=0) 93 | clf.fit(X_bin, y_bin) 94 | assert clf.score(X_bin, y_bin) == 1.0 95 | 96 | 97 | def test_sdca_callback(bin_train_data): 98 | class Callback(object): 99 | 100 | def __init__(self, X, y): 101 | self.X = X 102 | self.y = y 103 | self.acc = [] 104 | 105 | def __call__(self, clf): 106 | score = clf.score(self.X, self.y) 107 | self.acc.append(score) 108 | 109 | X_bin, y_bin = bin_train_data 110 | cb = Callback(X_bin, y_bin) 111 | clf = SDCAClassifier(alpha=0.5, l1_ratio=0.85, loss="hinge", 112 | callback=cb, random_state=0) 113 | clf.fit(X_bin, y_bin) 114 | assert cb.acc[0] == 0.5 115 | assert cb.acc[-1] == 1.0 116 | 117 | 118 | def test_bin_classes(bin_train_data): 119 | X_bin, y_bin = bin_train_data 120 | clf = SDCAClassifier() 121 | clf.fit(X_bin, y_bin) 122 | assert list(clf.classes_) == [-1, 1] 123 | 124 | 125 | def test_multiclass_classes(train_data): 126 | X, y = train_data 127 | clf = SDCAClassifier() 128 | clf.fit(X, y) 129 | assert list(clf.classes_) == [0, 1, 2] 130 | 131 | 132 | @pytest.mark.parametrize("loss", ["squared", "absolute"]) 133 | def test_sdca_regression(bin_train_data, loss): 134 | X_bin, y_bin = bin_train_data 135 | reg = SDCARegressor(loss=loss) 136 | reg.fit(X_bin, y_bin) 137 | y_pred = np.sign(reg.predict(X_bin)) 138 | assert np.mean(y_bin == y_pred) == 1.0 139 | -------------------------------------------------------------------------------- /lightning/impl/tests/test_sgd.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from sklearn.datasets import make_regression 5 | 6 | from lightning.impl.datasets.samples_generator import make_nn_regression 7 | from lightning.impl.sgd import SGDClassifier 8 | from lightning.impl.sgd import SGDRegressor 9 | from lightning.impl.tests.utils import check_predict_proba 10 | 11 | 12 | @pytest.fixture(scope="module") 13 | def reg_train_data(): 14 | X, y = make_regression(n_samples=100, n_features=10, n_informative=8, 15 | random_state=0) 16 | return X, y 17 | 18 | 19 | @pytest.fixture(scope="module") 20 | def reg_nn_train_data(): 21 | X, y, _ = make_nn_regression(n_samples=100, n_features=10, n_informative=8, 22 | random_state=0) 23 | return X, y 24 | 25 | 26 | @pytest.mark.parametrize("data", ["bin_dense_train_data", "bin_sparse_train_data"]) 27 | @pytest.mark.parametrize("clf", [SGDClassifier(random_state=0, loss="hinge", 28 | fit_intercept=True, learning_rate="pegasos"), 29 | SGDClassifier(random_state=0, loss="hinge", 30 | fit_intercept=False, learning_rate="pegasos"), 31 | SGDClassifier(random_state=0, loss="hinge", 32 | fit_intercept=True, learning_rate="invscaling"), 33 | SGDClassifier(random_state=0, loss="hinge", 34 | fit_intercept=True, learning_rate="constant"), 35 | SGDClassifier(random_state=0, loss="squared_hinge", 36 | eta0=1e-2, 37 | fit_intercept=True, learning_rate="constant"), 38 | SGDClassifier(random_state=0, loss="log", 39 | fit_intercept=True, learning_rate="constant"), 40 | SGDClassifier(random_state=0, loss="modified_huber", 41 | fit_intercept=True, learning_rate="constant")]) 42 | def test_binary_linear_sgd(data, clf, request): 43 | X, y = request.getfixturevalue(data) 44 | clf.fit(X, y) 45 | assert clf.score(X, y) > 0.934 46 | assert list(clf.classes_) == [0, 1] 47 | if clf.loss in {'log', 'modified_huber'}: 48 | check_predict_proba(clf, X) 49 | else: 50 | assert not hasattr(clf, 'predict_proba') 51 | 52 | 53 | def test_multiclass_sgd(mult_dense_train_data): 54 | mult_dense, mult_target = mult_dense_train_data 55 | clf = SGDClassifier(random_state=0) 56 | clf.fit(mult_dense, mult_target) 57 | assert clf.score(mult_dense, mult_target) > 0.80 58 | assert list(clf.classes_) == [0, 1, 2] 59 | 60 | 61 | @pytest.mark.parametrize("data", ["mult_dense_train_data", "mult_sparse_train_data"]) 62 | @pytest.mark.parametrize("fit_intercept", [True, False]) 63 | def test_multiclass_hinge_sgd(data, fit_intercept, request): 64 | X, y = request.getfixturevalue(data) 65 | clf = SGDClassifier(loss="hinge", multiclass=True, 66 | fit_intercept=fit_intercept, random_state=0) 67 | clf.fit(X, y) 68 | assert clf.score(X, y) > 0.78 69 | 70 | 71 | @pytest.mark.parametrize("data", ["mult_dense_train_data", "mult_sparse_train_data"]) 72 | def test_multiclass_hinge_sgd_l1l2(data, request): 73 | X, y = request.getfixturevalue(data) 74 | clf = SGDClassifier(loss="hinge", penalty="l1/l2", 75 | multiclass=True, random_state=0) 76 | clf.fit(X, y) 77 | assert clf.score(X, y) > 0.75 78 | 79 | 80 | @pytest.mark.parametrize("data", ["mult_dense_train_data", "mult_sparse_train_data"]) 81 | @pytest.mark.parametrize("fit_intercept", [True, False]) 82 | def test_multiclass_squared_hinge_sgd(data, fit_intercept, request): 83 | X, y = request.getfixturevalue(data) 84 | clf = SGDClassifier(loss="squared_hinge", multiclass=True, 85 | learning_rate="constant", eta0=1e-3, 86 | fit_intercept=fit_intercept, random_state=0) 87 | clf.fit(X, y) 88 | assert clf.score(X, y) > 0.78 89 | 90 | 91 | @pytest.mark.parametrize("data", ["mult_dense_train_data", "mult_sparse_train_data"]) 92 | @pytest.mark.parametrize("fit_intercept", [True, False]) 93 | def test_multiclass_log_sgd(data, fit_intercept, request): 94 | X, y = request.getfixturevalue(data) 95 | clf = SGDClassifier(loss="log", multiclass=True, 96 | fit_intercept=fit_intercept, 97 | random_state=0) 98 | clf.fit(X, y) 99 | assert clf.score(X, y) > 0.78 100 | 101 | 102 | def test_regression_squared_loss(reg_train_data): 103 | X, y = reg_train_data 104 | reg = SGDRegressor(loss="squared", penalty="l2", learning_rate="constant", 105 | eta0=1e-2, random_state=0) 106 | 107 | reg.fit(X, y) 108 | pred = reg.predict(X) 109 | np.testing.assert_almost_equal(np.mean((pred - y) ** 2), 4.749, 3) 110 | 111 | 112 | @pytest.mark.parametrize("alpha", [0, 1e-6]) 113 | def test_regression_squared_loss_nn_l1(reg_nn_train_data, alpha): 114 | X, y = reg_nn_train_data 115 | reg = SGDRegressor(loss="squared", penalty="nn", learning_rate="constant", 116 | eta0=1e-1, alpha=alpha, random_state=0) 117 | 118 | reg.fit(X, y) 119 | pred = reg.predict(X) 120 | np.testing.assert_almost_equal(np.mean((pred - y) ** 2), 0.016, 3) 121 | assert (reg.coef_ >= 0).all() 122 | 123 | 124 | def test_regression_squared_loss_nn_l2(reg_nn_train_data): 125 | X, y = reg_nn_train_data 126 | 127 | reg = SGDRegressor(loss="squared", penalty="nnl2", learning_rate="constant", 128 | eta0=1e-1, alpha=1e-4, random_state=0) 129 | 130 | reg.fit(X, y) 131 | pred = reg.predict(X) 132 | np.testing.assert_almost_equal(np.mean((pred - y) ** 2), 0.016, 3) 133 | np.testing.assert_almost_equal(reg.coef_.sum(), 2.131, 3) 134 | assert (reg.coef_ >= 0).all() 135 | 136 | 137 | def test_regression_squared_loss_multiple_output(reg_train_data): 138 | X, y = reg_train_data 139 | reg = SGDRegressor(loss="squared", penalty="l2", learning_rate="constant", 140 | eta0=1e-2, random_state=0, max_iter=10) 141 | Y = np.zeros((len(y), 2)) 142 | Y[:, 0] = y 143 | Y[:, 1] = y 144 | reg.fit(X, Y) 145 | pred = reg.predict(X) 146 | np.testing.assert_almost_equal(np.mean((pred - Y) ** 2), 4.397, 3) 147 | -------------------------------------------------------------------------------- /lightning/impl/tests/test_svrg.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from lightning.classification import SVRGClassifier 4 | from lightning.regression import SVRGRegressor 5 | 6 | 7 | def test_svrg(bin_train_data): 8 | X_bin, y_bin = bin_train_data 9 | clf = SVRGClassifier(eta=1e-3, max_iter=20, random_state=0, verbose=0) 10 | clf.fit(X_bin, y_bin) 11 | assert not hasattr(clf, 'predict_proba') 12 | assert clf.score(X_bin, y_bin) == 1.0 13 | 14 | 15 | def test_svrg_callback(bin_train_data): 16 | class Callback(object): 17 | 18 | def __init__(self, X, y): 19 | self.X = X 20 | self.y = y 21 | self.obj = [] 22 | 23 | def __call__(self, clf): 24 | clf._finalize_coef() 25 | y_pred = clf.decision_function(self.X).ravel() 26 | loss = (np.maximum(1 - self.y * y_pred, 0) ** 2).mean() 27 | coef = clf.coef_.ravel() 28 | regul = 0.5 * clf.alpha * np.dot(coef, coef) 29 | self.obj.append(loss + regul) 30 | 31 | X_bin, y_bin = bin_train_data 32 | cb = Callback(X_bin, y_bin) 33 | clf = SVRGClassifier(loss="squared_hinge", eta=1e-3, max_iter=20, 34 | random_state=0, callback=cb) 35 | clf.fit(X_bin, y_bin) 36 | assert np.all(np.diff(cb.obj) <= 0) 37 | 38 | 39 | def test_svrg_regression(bin_train_data): 40 | X_bin, y_bin = bin_train_data 41 | reg = SVRGRegressor(eta=1e-3) 42 | reg.fit(X_bin, y_bin) 43 | y_pred = np.sign(reg.predict(X_bin)) 44 | assert np.mean(y_bin == y_pred) == 1.0 45 | 46 | 47 | def test_bin_classes(bin_train_data): 48 | X_bin, y_bin = bin_train_data 49 | clf = SVRGClassifier() 50 | clf.fit(X_bin, y_bin) 51 | assert list(clf.classes_) == [-1, 1] 52 | 53 | 54 | def test_multiclass_classes(train_data): 55 | X, y = train_data 56 | clf = SVRGClassifier() 57 | clf.fit(X, y) 58 | assert list(clf.classes_) == [0, 1, 2] 59 | -------------------------------------------------------------------------------- /lightning/impl/tests/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import numpy as np 3 | 4 | 5 | def check_predict_proba(clf, X): 6 | y_pred = clf.predict(X) 7 | n_samples = y_pred.shape[0] 8 | # normalize negative class to 0 (it is sometimes 0, sometimes -1) 9 | y_pred = (y_pred == 1) 10 | 11 | # check that predict_proba result agree with y_true 12 | y_proba = clf.predict_proba(X) 13 | assert y_proba.shape == (n_samples, 2) 14 | y_proba_best = (y_proba.argmax(axis=1) == 1) 15 | np.testing.assert_array_equal(y_proba_best, y_pred) 16 | 17 | # check that y_proba looks like probability 18 | assert not (y_proba > 1).any() 19 | assert not (y_proba < 0).any() 20 | np.testing.assert_array_equal(y_proba.sum(axis=1), 1.0) 21 | -------------------------------------------------------------------------------- /lightning/ranking.py: -------------------------------------------------------------------------------- 1 | from .impl.prank import PRank 2 | from .impl.prank import KernelPRank 3 | -------------------------------------------------------------------------------- /lightning/regression.py: -------------------------------------------------------------------------------- 1 | from .impl.adagrad import AdaGradRegressor 2 | from .impl.dual_cd import LinearSVR 3 | from .impl.primal_cd import CDRegressor 4 | from .impl.fista import FistaRegressor 5 | from .impl.sag import SAGRegressor 6 | from .impl.sag import SAGARegressor 7 | from .impl.sdca import SDCARegressor 8 | from .impl.sgd import SGDRegressor 9 | from .impl.svrg import SVRGRegressor 10 | -------------------------------------------------------------------------------- /lightning/setup.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from numpy.distutils.core import setup 4 | from numpy.distutils.misc_util import Configuration 5 | 6 | 7 | def cythonize_extensions(top_path, config): 8 | try: 9 | from Cython.Build import cythonize 10 | except ModuleNotFoundError as e: 11 | raise ModuleNotFoundError( 12 | 'Please install Cython in order to build a lightning from source.') from e 13 | 14 | config.ext_modules = cythonize(config.ext_modules, 15 | compiler_directives={'language_level': 3}) 16 | 17 | 18 | def configuration(parent_package='', top_path=None): 19 | config = Configuration('lightning', parent_package, top_path) 20 | 21 | config.add_subpackage('impl') 22 | 23 | # Skip cythonization as we do not want to include the generated 24 | # C/C++ files in the release tarballs as they are not necessarily 25 | # forward compatible with future versions of Python for instance. 26 | if 'sdist' not in sys.argv: 27 | cythonize_extensions(top_path, config) 28 | 29 | return config 30 | 31 | if __name__ == '__main__': 32 | setup(**configuration(top_path='').todict()) 33 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | joblib 2 | numpy 3 | scikit-learn 4 | scipy 5 | -------------------------------------------------------------------------------- /requirements_build.txt: -------------------------------------------------------------------------------- 1 | cython 2 | numpy 3 | -------------------------------------------------------------------------------- /requirements_test.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | pytest 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # 3 | # Copyright (C) 2012 Mathieu Blondel 4 | 5 | import re 6 | import sys 7 | import os 8 | import setuptools 9 | 10 | from distutils.command.sdist import sdist 11 | 12 | from numpy.distutils.core import setup 13 | from numpy.distutils.misc_util import Configuration 14 | 15 | 16 | DISTNAME = 'sklearn-contrib-lightning' 17 | DESCRIPTION = ("Large-scale sparse linear classification, " 18 | "regression and ranking in Python") 19 | with open('README.rst', encoding='utf-8') as f: 20 | LONG_DESCRIPTION = f.read() 21 | MAINTAINER = 'Mathieu Blondel' 22 | MAINTAINER_EMAIL = 'mathieu@mblondel.org' 23 | URL = 'https://github.com/scikit-learn-contrib/lightning' 24 | LICENSE = 'new BSD' 25 | with open(os.path.join('lightning', '__init__.py'), encoding='utf-8') as f: 26 | match = re.search(r'__version__[ ]*=[ ]*[\"\'](?P.+)[\"\']', 27 | f.read()) 28 | VERSION = match.group('version').strip() 29 | MIN_PYTHON_VERSION = '3.7' 30 | with open('requirements.txt', encoding='utf-8') as f: 31 | REQUIREMENTS = [ 32 | line.strip() 33 | for line in f.read().splitlines() 34 | if line.strip() 35 | ] 36 | 37 | 38 | def configuration(parent_package='', top_path=None): 39 | if os.path.exists('MANIFEST'): 40 | os.remove('MANIFEST') 41 | 42 | config = Configuration(None, parent_package, top_path) 43 | 44 | # Avoid non-useful msg: 45 | # "Ignoring attempt to set 'name' (from ... " 46 | config.set_options(ignore_setup_xxx_py=True, 47 | assume_default_configuration=True, 48 | delegate_options_to_subpackages=True, 49 | quiet=True) 50 | 51 | config.add_subpackage('lightning') 52 | 53 | return config 54 | 55 | if __name__ == "__main__": 56 | 57 | old_path = os.getcwd() 58 | local_path = os.path.dirname(os.path.abspath(sys.argv[0])) 59 | 60 | os.chdir(local_path) 61 | sys.path.insert(0, local_path) 62 | 63 | setup(configuration=configuration, 64 | name=DISTNAME, 65 | maintainer=MAINTAINER, 66 | python_requires=f'>={MIN_PYTHON_VERSION}', 67 | install_requires=REQUIREMENTS, 68 | include_package_data=True, 69 | maintainer_email=MAINTAINER_EMAIL, 70 | description=DESCRIPTION, 71 | license=LICENSE, 72 | url=URL, 73 | version=VERSION, 74 | download_url=URL, 75 | long_description=LONG_DESCRIPTION, 76 | zip_safe=False, # the package can run out of an .egg file 77 | cmdclass={"sdist": sdist}, 78 | classifiers=[ 79 | 'Intended Audience :: Science/Research', 80 | 'Intended Audience :: Developers', 81 | 'License :: OSI Approved', 82 | 'Programming Language :: C', 83 | 'Programming Language :: Python :: 3', 84 | 'Topic :: Software Development', 85 | 'Topic :: Scientific/Engineering', 86 | 'Operating System :: Microsoft :: Windows', 87 | 'Operating System :: POSIX', 88 | 'Operating System :: Unix', 89 | 'Operating System :: MacOS' 90 | ] 91 | ) 92 | --------------------------------------------------------------------------------