├── .circleci └── config.yml ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── pull_request_template.md └── workflows │ ├── circleci.yml │ ├── flake8.yml │ └── main.yml ├── .gitignore ├── CITATION.bib ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── doc ├── Makefile ├── _static │ ├── images │ │ ├── cox-tutorial │ │ │ ├── A_dot_v.png │ │ │ ├── A_transpose_dot_v.png │ │ │ └── structure_matrix_A.png │ │ ├── landingpage │ │ │ ├── compatible.png │ │ │ ├── compatible.webp │ │ │ ├── ease.png │ │ │ ├── ease.webp │ │ │ ├── energy.png │ │ │ ├── energy.webp │ │ │ ├── finance.png │ │ │ ├── finance.webp │ │ │ ├── healthcare.png │ │ │ ├── healthcare.webp │ │ │ ├── hero.png │ │ │ ├── hero.webp │ │ │ ├── inrialogo.png │ │ │ ├── inrialogo.webp │ │ │ ├── modular.png │ │ │ ├── modular.webp │ │ │ ├── performance.png │ │ │ └── performance.webp │ │ └── logo.svg │ ├── scripts │ │ ├── asciimath-defines.js │ │ ├── instantpage.min.js │ │ └── lazyload.js │ ├── style.css │ └── switcher.json ├── _templates │ ├── layout.html │ └── sidebar │ │ └── version_toggler.html ├── api.rst ├── changes │ ├── 0.1.rst │ ├── 0.2.rst │ ├── 0.3.rst │ ├── 0.4.rst │ ├── 0.5.rst │ └── whats_new.rst ├── conf.py ├── contribute.rst ├── getting_started.rst ├── index.rst ├── robots.txt ├── sphinxext │ ├── gh_substitutions.py │ └── github_link.py └── tutorials │ ├── add_datafit.rst │ ├── add_penalty.rst │ ├── alpha_max.rst │ ├── cox_datafit.rst │ ├── intercept.rst │ ├── intercept2.md │ ├── prox_nn_group_lasso.rst │ └── tutorials.rst ├── examples ├── README.txt ├── plot_compare_time.py ├── plot_group_logistic_regression.py ├── plot_lasso_vs_weighted.py ├── plot_logreg_various_penalties.py ├── plot_pen_prox.py ├── plot_reweighted_l1.py ├── plot_sparse_group_lasso.py ├── plot_sparse_recovery.py ├── plot_survival_analysis.py ├── plot_ucurve.py └── plot_zero_weights_lasso.py ├── pyproject.toml └── skglm ├── __init__.py ├── datafits ├── __init__.py ├── base.py ├── group.py ├── multi_task.py └── single_task.py ├── estimators.py ├── experimental ├── __init__.py ├── _plot_sqrt_lasso.py ├── pdcd_ws.py ├── quantile_regression.py ├── reweighted.py ├── sqrt_lasso.py └── tests │ ├── test_quantile_regression.py │ ├── test_reweighted.py │ └── test_sqrt_lasso.py ├── penalties ├── __init__.py ├── base.py ├── block_separable.py ├── non_separable.py └── separable.py ├── solvers ├── __init__.py ├── anderson_cd.py ├── base.py ├── common.py ├── fista.py ├── gram_cd.py ├── group_bcd.py ├── group_prox_newton.py ├── lbfgs.py ├── multitask_bcd.py └── prox_newton.py ├── tests ├── test_datafits.py ├── test_docstring_parameters.py ├── test_estimators.py ├── test_fista.py ├── test_gram_solver.py ├── test_group.py ├── test_lbfgs_solver.py ├── test_penalties.py ├── test_prox_newton.py ├── test_sparse_ops.py └── test_validation.py └── utils ├── __init__.py ├── anderson.py ├── data.py ├── jit_compilation.py ├── prox_funcs.py ├── sparse_ops.py └── validation.py /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | jobs: 3 | build_docs: 4 | docker: 5 | - image: cimg/python:3.10 6 | steps: 7 | - checkout 8 | - run: 9 | name: Set BASH_ENV 10 | command: | 11 | echo "set -e" >> $BASH_ENV 12 | echo "export DISPLAY=:99" >> $BASH_ENV 13 | echo "export OPENBLAS_NUM_THREADS=4" >> $BASH_ENV 14 | echo "export LIBSVMDATA_HOME=$HOME/celer_data/" >> $BASH_ENV 15 | echo "BASH_ENV:" 16 | cat $BASH_ENV 17 | 18 | - run: 19 | name: Merge with upstream 20 | command: | 21 | echo $(git log -1 --pretty=%B) | tee gitlog.txt 22 | echo ${CI_PULL_REQUEST//*pull\//} | tee merge.txt 23 | if [[ $(cat merge.txt) != "" ]]; then 24 | echo "Merging $(cat merge.txt)"; 25 | git remote add upstream https://github.com/scikit-learn-contrib/skglm.git; 26 | git pull --ff-only upstream "refs/pull/$(cat merge.txt)/merge"; 27 | git fetch upstream main; 28 | fi 29 | 30 | # If both keys are in the same command only one is restored 31 | - restore_cache: 32 | keys: 33 | - pip-cache 34 | 35 | # Install Xvfb and related dependencies 36 | - run: 37 | name: Install Xvfb and dependencies 38 | command: | 39 | sudo apt-get update 40 | sudo apt-get install -y xvfb 41 | 42 | - run: 43 | name: Spin up Xvfb 44 | command: | 45 | /sbin/start-stop-daemon --start --quiet --pidfile /tmp/custom_xvfb_99.pid --make-pidfile --background --exec /usr/bin/Xvfb -- :99 -screen 0 1400x900x24 -ac +extension GLX +render -noreset; 46 | 47 | - run: 48 | name: Get Python running 49 | command: | 50 | python -m pip install --user --upgrade --progress-bar off pip 51 | python -m pip install --user -e . 52 | python -m pip install --user .[doc] 53 | 54 | - save_cache: 55 | key: pip-cache 56 | paths: 57 | - ~/.cache/pip 58 | 59 | # Look at what we have and fail early if there is some library conflict 60 | - run: 61 | name: Check installation 62 | command: | 63 | which python 64 | python -c "import skglm" 65 | 66 | # Build docs 67 | - run: 68 | name: make html 69 | no_output_timeout: 120m 70 | command: | 71 | cd doc; 72 | make clean; 73 | make SPHINXOPTS=-v html; 74 | cd ..; 75 | 76 | 77 | # Add stable doc 78 | - run: 79 | name: add stable doc 80 | command: | 81 | set -e 82 | mkdir -p ~/.ssh 83 | echo -e "Host *\nStrictHostKeyChecking no" > ~/.ssh/config 84 | chmod og= ~/.ssh/config 85 | cd doc; 86 | make add-stable-doc; 87 | 88 | 89 | # upload to gh-pages 90 | - run: 91 | name: deploy 92 | command: | 93 | if [[ ${CIRCLE_BRANCH} == "main" ]]; then 94 | cd doc; 95 | pip install ghp-import; 96 | make install 97 | fi 98 | 99 | 100 | # Save the outputs 101 | - store_artifacts: 102 | path: doc/_build/html/ 103 | destination: dev 104 | - persist_to_workspace: 105 | root: doc/_build 106 | paths: 107 | - html 108 | 109 | 110 | workflows: 111 | version: 2 112 | 113 | default: 114 | jobs: 115 | - build_docs 116 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Report a problem 4 | --- 5 | 6 | ## Describe the bug 7 | 8 | 9 | 10 | ## Steps to reproduce 11 | 12 | 13 | 14 | 15 | **Additional context** 16 | 17 | 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest a new feature 4 | title: FEAT - Give a title to the feature request 5 | --- 6 | 7 | ## Description of the feature 8 | 9 | 10 | 11 | 12 | ## Considered alternatives 13 | 14 | 15 | 16 | 17 | **Additional context** 18 | 19 | 20 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Context of the PR 2 | 3 | 9 | 10 | ## Contributions of the PR 11 | 12 | 13 | 14 | 15 | ### Checks before merging PR 16 | 17 | - [ ] added documentation for any new feature 18 | - [ ] added unit tests 19 | - [ ] edited the [what's new](../doc/changes/whats_new.rst) (if applicable) 20 | -------------------------------------------------------------------------------- /.github/workflows/circleci.yml: -------------------------------------------------------------------------------- 1 | name: circleci-redirector 2 | on: [status] 3 | jobs: 4 | circleci_artifacts_redirector_job: 5 | runs-on: ubuntu-latest 6 | name: Run CircleCI artifacts redirector 7 | steps: 8 | - name: GitHub Action step 9 | uses: larsoner/circleci-artifacts-redirector-action@master 10 | with: 11 | api-token: ${{ secrets.CIRCLE_TOKEN }} 12 | repo-token: ${{ secrets.GITHUB_TOKEN }} 13 | artifact-path: 0/dev/index.html 14 | circleci-jobs: build_docs 15 | -------------------------------------------------------------------------------- /.github/workflows/flake8.yml: -------------------------------------------------------------------------------- 1 | name: linter 2 | 3 | on: 4 | push: 5 | branches: 6 | - 'main' 7 | pull_request: 8 | branches: 9 | - 'main' 10 | 11 | jobs: 12 | lint: 13 | name: Lint code base 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - name: Checkout code 18 | uses: actions/checkout@v2 19 | 20 | - name: Setup Python 3.8 21 | uses: actions/setup-python@v2 22 | with: 23 | python-version: 3.8 24 | 25 | - name: Lint with flake 26 | run: | 27 | pip install flake8 28 | flake8 skglm/ --max-line-length=88 29 | 30 | - name: Check doc style with pydocstyle 31 | run: | 32 | pip install pydocstyle 33 | pydocstyle skglm --ignore='D100',D102,'D104','D105','D107','D203','D213','D413', 34 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: pytest 2 | 3 | on: 4 | push: 5 | branches: 6 | - 'main' 7 | 8 | pull_request: 9 | 10 | jobs: 11 | test: 12 | name: Test Code 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v3 16 | - name: Set up Python 3.10 17 | uses: actions/setup-python@v3 18 | with: 19 | python-version: "3.10" 20 | - name: Set up R 21 | uses: r-lib/actions/setup-r@v2 22 | - name: Install package and testing tools 23 | run: | 24 | python -m pip install --upgrade pip 25 | pip install . 26 | pip install .[test] 27 | python --version 28 | - name: Install other dependencies 29 | run: | 30 | pip install rpy2 31 | pip install statsmodels cvxopt 32 | pip install sortedl1 33 | # for testing Cox estimator 34 | pip install lifelines 35 | pip install pandas 36 | - name: Test with pytest 37 | run: pytest -v skglm/ 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info 2 | *.pyc 3 | 4 | /doc/_build/ 5 | /doc/gen_modules/ 6 | /doc/generated/ 7 | /doc/auto_examples/ 8 | /doc/sg_execution_times.rst 9 | 10 | __pycache__ 11 | __cache__ 12 | *_cache 13 | **/.DS_Store -------------------------------------------------------------------------------- /CITATION.bib: -------------------------------------------------------------------------------- 1 | @inproceedings{skglm, 2 | title = {Beyond L1: Faster and better sparse models with skglm}, 3 | author = {Q. Bertrand and Q. Klopfenstein and P.-A. Bannier and G. Gidel and M. Massias}, 4 | booktitle = {NeurIPS}, 5 | year = {2022}, 6 | } 7 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | As part of the [scikit-learn-contrib](https://github.com/scikit-learn-contrib) GitHub organization, we adopt the scikit-learn [code of conduct](https://github.com/scikit-learn/scikit-learn/blob/main/CODE_OF_CONDUCT.md). 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2023, scikit-learn-contrib 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | skglm logo 4 | 5 | 6 | ## A fast ⚡ and modular ⚒️ scikit-learn replacement for sparse GLMs 7 | 8 | ![build](https://github.com/scikit-learn-contrib/skglm/workflows/pytest/badge.svg) 9 | ![License](https://img.shields.io/badge/License-BSD_3--Clause-blue.svg) 10 | [![Downloads](https://static.pepy.tech/badge/skglm)](https://pepy.tech/project/skglm) 11 | [![Downloads](https://static.pepy.tech/badge/skglm/month)](https://pepy.tech/project/skglm) 12 | [![PyPI version](https://badge.fury.io/py/skglm.svg)](https://pypi.org/project/skglm/) 13 | 14 | 15 |
16 | 17 | 18 | ``skglm`` is a Python package that offers **fast estimators** for sparse Generalized Linear Models (GLMs) that are **100% compatible with ``scikit-learn``**. It is **highly flexible** and supports a wide range of GLMs. 19 | You get to choose from ``skglm``'s already-made estimators or **customize your own** by combining the available datafits and penalties. 20 | 21 | Excited to have a tour on ``skglm`` [documentation](https://contrib.scikit-learn.org/skglm/)? 22 | 23 | # Cite 24 | 25 | ``skglm`` is the result of perseverant research. It is licensed under [BSD 3-Clause](https://github.com/scikit-learn-contrib/skglm/blob/main/LICENSE). You are free to use it and if you do so, please cite 26 | 27 | ```bibtex 28 | @inproceedings{skglm, 29 | title = {Beyond L1: Faster and better sparse models with skglm}, 30 | author = {Q. Bertrand and Q. Klopfenstein and P.-A. Bannier and G. Gidel and M. Massias}, 31 | booktitle = {NeurIPS}, 32 | year = {2022}, 33 | } 34 | 35 | @article{moufad2023skglm, 36 | title={skglm: improving scikit-learn for regularized Generalized Linear Models}, 37 | author={Moufad, Badr and Bannier, Pierre-Antoine and Bertrand, Quentin and Klopfenstein, Quentin and Massias, Mathurin}, 38 | year={2023} 39 | } 40 | ``` 41 | 42 | # Why ``skglm``? 43 | 44 | ``skglm`` is specifically conceived to solve sparse GLMs. 45 | It supports many missing models in ``scikit-learn`` and ensures high performance. 46 | There are several reasons to opt for ``skglm`` among which: 47 | 48 | | | | 49 | | ----- | -------------- | 50 | | **Speed** | Fast solvers able to tackle large datasets, either dense or sparse, with millions of features **up to 100 times faster** than ``scikit-learn``| 51 | | **Modularity** | User-friendly API that enables **composing custom estimators** with any combination of its existing datafits and penalties | 52 | | **Extensibility** | Flexible design that makes it **simple and easy to implement new datafits and penalties**, a matter of few lines of code 53 | | **Compatibility** | Estimators **fully compatible with the ``scikit-learn`` API** and drop-in replacements of its GLM estimators 54 | | | | 55 | 56 | 57 | # Get started with ``skglm`` 58 | 59 | ## Installing ``skglm`` 60 | 61 | ``skglm`` is available on PyPi. Run the following command to get the latest version of the package 62 | 63 | ```shell 64 | pip install -U skglm 65 | ``` 66 | 67 | It is also available on conda-forge and can be installed using, for instance: 68 | 69 | ```shell 70 | conda install -c conda-forge skglm 71 | ``` 72 | 73 | ## First steps with ``skglm`` 74 | 75 | Once you installed ``skglm``, you can run the following code snippet to fit a MCP Regression model on a toy dataset 76 | 77 | ```python 78 | # import model to fit 79 | from skglm.estimators import MCPRegression 80 | # import util to create a toy dataset 81 | from skglm.utils.data import make_correlated_data 82 | 83 | # generate a toy dataset 84 | X, y, _ = make_correlated_data(n_samples=10, n_features=100) 85 | 86 | # init and fit estimator 87 | estimator = MCPRegression() 88 | estimator.fit(X, y) 89 | 90 | # print R² 91 | print(estimator.score(X, y)) 92 | ``` 93 | You can refer to the documentation to explore the list of ``skglm``'s already-made estimators. 94 | 95 | Didn't find one that suits you? you can still compose your own. 96 | Here is a code snippet that fits a MCP-regularized problem with Huber loss. 97 | 98 | ```python 99 | # import datafit, penalty and GLM estimator 100 | from skglm.datafits import Huber 101 | from skglm.penalties import MCPenalty 102 | from skglm.estimators import GeneralizedLinearEstimator 103 | 104 | from skglm.utils.data import make_correlated_data 105 | from skglm.solvers import AndersonCD 106 | 107 | X, y, _ = make_correlated_data(n_samples=10, n_features=100) 108 | # create and fit GLM estimator with Huber loss and MCP penalty 109 | estimator = GeneralizedLinearEstimator( 110 | datafit=Huber(delta=1.), 111 | penalty=MCPenalty(alpha=1e-2, gamma=3), 112 | solver=AndersonCD() 113 | ) 114 | estimator.fit(X, y) 115 | ``` 116 | 117 | You will find detailed description on the supported datafits and penalties and how to combine them in the API section of the documentation. 118 | You can also take our tutorial to learn how to create your own datafit and penalty. 119 | 120 | 121 | # Contribute to ``skglm`` 122 | 123 | ``skglm`` is a continuous endeavour that relies on the community efforts to last and evolve. Your contribution is welcome and highly valuable. It can be 124 | 125 | - **bug report**: you may encounter a bug while using ``skglm``. Don't hesitate to report it on the [issue section](https://github.com/scikit-learn-contrib/skglm/issues). 126 | - **feature request**: you may want to extend/add new features to ``skglm``. You can use [the issue section](https://github.com/scikit-learn-contrib/skglm/issues) to make suggestions. 127 | - **pull request**: you may have fixed a bug, added a features, or even fixed a small typo in the documentation, ... you can submit a [pull request](https://github.com/scikit-learn-contrib/skglm/pulls) and we will reach out to you asap. 128 | 129 | 130 | 131 | 132 | 133 | # Useful links 134 | 135 | - link to documentation: https://contrib.scikit-learn.org/skglm/ 136 | - link to ``skglm`` arXiv article: https://arxiv.org/pdf/2204.07826.pdf 137 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | GITHUB_PAGES_BRANCH = gh-pages 11 | OUTPUTDIR = _build/html 12 | STABLE_DOC_DIR = stable 13 | 14 | # User-friendly check for sphinx-build 15 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 16 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 17 | endif 18 | 19 | # Internal variables. 20 | PAPEROPT_a4 = -D latex_paper_size=a4 21 | PAPEROPT_letter = -D latex_paper_size=letter 22 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 23 | # the i18n builder cannot share the environment and doctrees with the others 24 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 25 | 26 | .PHONY: help 27 | help: 28 | @echo "Please use \`make ' where is one of" 29 | @echo " html-noplot to make standalone HTML files, without plotting anything" 30 | @echo " html to make standalone HTML files" 31 | @echo " dirhtml to make HTML files named index.html in directories" 32 | @echo " singlehtml to make a single large HTML file" 33 | @echo " pickle to make pickle files" 34 | @echo " htmlhelp to make HTML files and a HTML help project" 35 | @echo " qthelp to make HTML files and a qthelp project" 36 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 37 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 38 | @echo " changes to make an overview of all changed/added/deprecated items" 39 | @echo " linkcheck to check all external links for integrity" 40 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 41 | @echo " coverage to run coverage check of the documentation (if enabled)" 42 | @echo " install to make the html and push it online" 43 | 44 | .PHONY: clean 45 | 46 | clean: 47 | rm -rf $(BUILDDIR)/* 48 | rm -rf auto_examples/ 49 | rm -rf generated/* 50 | rm -rf modules/* 51 | 52 | html-noplot: 53 | $(SPHINXBUILD) -D plot_gallery=0 -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | .PHONY: html 58 | html: 59 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 60 | @echo 61 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 62 | 63 | .PHONY: dirhtml 64 | dirhtml: 65 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 66 | @echo 67 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 68 | 69 | .PHONY: singlehtml 70 | singlehtml: 71 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 72 | @echo 73 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 74 | 75 | .PHONY: pickle 76 | pickle: 77 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 78 | @echo 79 | @echo "Build finished; now you can process the pickle files." 80 | 81 | .PHONY: htmlhelp 82 | htmlhelp: 83 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 84 | @echo 85 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 86 | ".hhp project file in $(BUILDDIR)/htmlhelp." 87 | 88 | .PHONY: qthelp 89 | qthelp: 90 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 91 | @echo 92 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 93 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 94 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/skglm.qhcp" 95 | @echo "To view the help file:" 96 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/skglms.qhc" 97 | 98 | .PHONY: latex 99 | latex: 100 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 101 | @echo 102 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 103 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 104 | "(use \`make latexpdf' here to do that automatically)." 105 | 106 | .PHONY: latexpdf 107 | latexpdf: 108 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 109 | @echo "Running LaTeX files through pdflatex..." 110 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 111 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 112 | 113 | .PHONY: changes 114 | changes: 115 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 116 | @echo 117 | @echo "The overview file is in $(BUILDDIR)/changes." 118 | 119 | .PHONY: linkcheck 120 | linkcheck: 121 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 122 | @echo 123 | @echo "Link check complete; look for any errors in the above output " \ 124 | "or in $(BUILDDIR)/linkcheck/output.txt." 125 | 126 | .PHONY: doctest 127 | doctest: 128 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 129 | @echo "Testing of doctests in the sources finished, look at the " \ 130 | "results in $(BUILDDIR)/doctest/output.txt." 131 | 132 | .PHONY: coverage 133 | coverage: 134 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 135 | @echo "Testing of coverage in the sources finished, look at the " \ 136 | "results in $(BUILDDIR)/coverage/python.txt." 137 | 138 | install: 139 | touch $(OUTPUTDIR)/.nojekyll 140 | ghp-import -m "Generate Pelican site [ci skip]" -b $(GITHUB_PAGES_BRANCH) $(OUTPUTDIR) 141 | git push origin $(GITHUB_PAGES_BRANCH) 142 | 143 | .PHONY: add-stable-doc 144 | add-stable-doc: 145 | # switch to GITHUB_PAGES_BRANCH where stable build is located 146 | git fetch origin $(GITHUB_PAGES_BRANCH) 147 | git checkout $(GITHUB_PAGES_BRANCH) 148 | git pull origin $(GITHUB_PAGES_BRANCH) 149 | # move the content of the stable build to the output dir 150 | mv ../$(STABLE_DOC_DIR) $(OUTPUTDIR) 151 | # switch back to main and get to doc directory 152 | git checkout main 153 | -------------------------------------------------------------------------------- /doc/_static/images/cox-tutorial/A_dot_v.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/cox-tutorial/A_dot_v.png -------------------------------------------------------------------------------- /doc/_static/images/cox-tutorial/A_transpose_dot_v.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/cox-tutorial/A_transpose_dot_v.png -------------------------------------------------------------------------------- /doc/_static/images/cox-tutorial/structure_matrix_A.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/cox-tutorial/structure_matrix_A.png -------------------------------------------------------------------------------- /doc/_static/images/landingpage/compatible.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/compatible.png -------------------------------------------------------------------------------- /doc/_static/images/landingpage/compatible.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/compatible.webp -------------------------------------------------------------------------------- /doc/_static/images/landingpage/ease.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/ease.png -------------------------------------------------------------------------------- /doc/_static/images/landingpage/ease.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/ease.webp -------------------------------------------------------------------------------- /doc/_static/images/landingpage/energy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/energy.png -------------------------------------------------------------------------------- /doc/_static/images/landingpage/energy.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/energy.webp -------------------------------------------------------------------------------- /doc/_static/images/landingpage/finance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/finance.png -------------------------------------------------------------------------------- /doc/_static/images/landingpage/finance.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/finance.webp -------------------------------------------------------------------------------- /doc/_static/images/landingpage/healthcare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/healthcare.png -------------------------------------------------------------------------------- /doc/_static/images/landingpage/healthcare.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/healthcare.webp -------------------------------------------------------------------------------- /doc/_static/images/landingpage/hero.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/hero.png -------------------------------------------------------------------------------- /doc/_static/images/landingpage/hero.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/hero.webp -------------------------------------------------------------------------------- /doc/_static/images/landingpage/inrialogo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/inrialogo.png -------------------------------------------------------------------------------- /doc/_static/images/landingpage/inrialogo.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/inrialogo.webp -------------------------------------------------------------------------------- /doc/_static/images/landingpage/modular.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/modular.png -------------------------------------------------------------------------------- /doc/_static/images/landingpage/modular.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/modular.webp -------------------------------------------------------------------------------- /doc/_static/images/landingpage/performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/performance.png -------------------------------------------------------------------------------- /doc/_static/images/landingpage/performance.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/performance.webp -------------------------------------------------------------------------------- /doc/_static/scripts/asciimath-defines.js: -------------------------------------------------------------------------------- 1 | window.MathJax.startup = { 2 | ready: () => { 3 | AM = MathJax.InputJax.AsciiMath.AM; 4 | AM.newsymbol({ input: "ell", tag: "mi", output: "\u2113", tex: "ell", ttype: AM.TOKEN.CONST }); 5 | AM.newsymbol({ input: "||", tag: "mi", output: "\u2225", tex: "Vert", ttype: AM.TOKEN.CONST }); 6 | AM.newsymbol({ input: "triangleq", tag: "mo", output: "\u225C", tex: "triangleq", ttype: AM.TOKEN.CONST }); 7 | MathJax.startup.defaultReady(); 8 | } 9 | }; 10 | -------------------------------------------------------------------------------- /doc/_static/scripts/instantpage.min.js: -------------------------------------------------------------------------------- 1 | /*! instant.page v5.1.0 - (C) 2019 Alexandre Dieulot - https://instant.page/license */ 2 | document.addEventListener("DOMContentLoaded", function () { 3 | const supportsPrefetch = document.createElement("link").relList.supports("prefetch"); 4 | if (!supportsPrefetch) return; 5 | const links = document.querySelectorAll("a[href]"); 6 | links.forEach(link => { 7 | link.addEventListener("mouseover", () => { 8 | const prefetch = document.createElement("link"); 9 | prefetch.rel = "prefetch"; 10 | prefetch.href = link.href; 11 | document.head.appendChild(prefetch); 12 | }); 13 | }); 14 | }); -------------------------------------------------------------------------------- /doc/_static/scripts/lazyload.js: -------------------------------------------------------------------------------- 1 | document.addEventListener("DOMContentLoaded", function () { 2 | document.querySelectorAll("img").forEach(function (img) { 3 | const src = img.getAttribute("src") || ""; 4 | if ( 5 | src.includes("logo.svg") || 6 | img.classList.contains("hero-gallery-img") 7 | ) { 8 | // Don't lazy-load logo or hero image 9 | return; 10 | } 11 | img.setAttribute("loading", "lazy"); 12 | }); 13 | document.body.classList.add("ready"); 14 | }); -------------------------------------------------------------------------------- /doc/_static/switcher.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "version": "dev", 4 | "name": "0.5dev", 5 | "url": "https://contrib.scikit-learn.org/skglm/", 6 | "preferred": true 7 | }, 8 | { 9 | "version": "0.3.1", 10 | "name": "v0.3.1", 11 | "url": "https://contrib.scikit-learn.org/skglm/stable/" 12 | } 13 | ] 14 | -------------------------------------------------------------------------------- /doc/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "!layout.html" %} 2 | 3 | {% block extrahead %} 4 | {{ super() }} 5 | 6 | 7 | {% endblock %} -------------------------------------------------------------------------------- /doc/_templates/sidebar/version_toggler.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /doc/api.rst: -------------------------------------------------------------------------------- 1 | .. _api: 2 | 3 | .. meta:: 4 | :description: Browse the skglm API documentation covering estimators (Lasso, ElasticNet, Cox), penalties (L1, SCAD, MCP), datafits (Logistic, Poisson), and optimized solvers. 5 | 6 | ================= 7 | API 8 | ================= 9 | 10 | .. currentmodule:: skglm 11 | 12 | Estimators 13 | ========== 14 | 15 | .. currentmodule:: skglm 16 | 17 | .. autosummary:: 18 | :toctree: generated/ 19 | 20 | GeneralizedLinearEstimator 21 | CoxEstimator 22 | ElasticNet 23 | GroupLasso 24 | Lasso 25 | LinearSVC 26 | SparseLogisticRegression 27 | MCPRegression 28 | MultiTaskLasso 29 | WeightedLasso 30 | 31 | 32 | Penalties 33 | ========= 34 | 35 | 36 | .. currentmodule:: skglm.penalties 37 | 38 | .. autosummary:: 39 | :toctree: generated/ 40 | 41 | IndicatorBox 42 | L0_5 43 | L1 44 | L1_plus_L2 45 | L2 46 | L2_3 47 | LogSumPenalty 48 | MCPenalty 49 | PositiveConstraint 50 | WeightedL1 51 | WeightedGroupL2 52 | WeightedMCPenalty 53 | SCAD 54 | BlockSCAD 55 | SLOPE 56 | 57 | 58 | Datafits 59 | ======== 60 | 61 | .. currentmodule:: skglm.datafits 62 | 63 | .. autosummary:: 64 | :toctree: generated/ 65 | 66 | Cox 67 | Gamma 68 | Huber 69 | Logistic 70 | LogisticGroup 71 | Poisson 72 | Quadratic 73 | QuadraticGroup 74 | QuadraticHessian 75 | QuadraticSVC 76 | WeightedQuadratic 77 | 78 | 79 | Solvers 80 | ======= 81 | 82 | .. currentmodule:: skglm.solvers 83 | 84 | .. autosummary:: 85 | :toctree: generated/ 86 | 87 | AndersonCD 88 | FISTA 89 | GramCD 90 | GroupBCD 91 | GroupProxNewton 92 | LBFGS 93 | MultiTaskBCD 94 | ProxNewton 95 | 96 | 97 | Experimental 98 | ============ 99 | 100 | .. currentmodule:: skglm.experimental 101 | 102 | .. autosummary:: 103 | :toctree: generated/ 104 | 105 | IterativeReweightedL1 106 | PDCD_WS 107 | Pinball 108 | SqrtQuadratic 109 | SqrtLasso 110 | -------------------------------------------------------------------------------- /doc/changes/0.1.rst: -------------------------------------------------------------------------------- 1 | .. _changes_0_1: 2 | 3 | Version 0.1 (2022/04/25) 4 | ------------------------ 5 | 6 | - Initial release (initial commit: 2022/04/20) 7 | -------------------------------------------------------------------------------- /doc/changes/0.2.rst: -------------------------------------------------------------------------------- 1 | .. _changes_0_2: 2 | 3 | Version 0.2 (2022/10/18) 4 | ------------------------ 5 | 6 | - Experimental :ref:`Square root Lasso ` class with ProxNewton or Chambolle-Pock solver (PR :gh:`57`) 7 | 8 | - Accelerated block coordinate descent solver :ref:`GroupBCD ` with working sets for problems with group penalties (PR :gh:`29`, :gh:`28`, and :gh:`26`) 9 | 10 | - Proximal Newton solver :ref:`ProxNewton ` with working sets for problems with non-quadratic datafits (PR :gh:`51`) 11 | 12 | - :ref:`Huber ` datafit (PR :gh:`14`) 13 | 14 | - Added :ref:`SCAD ` and :ref:`BlockSCAD ` penalties (PR :gh:`12`) 15 | 16 | - Fitting intercept for :ref:`Quadratic `, :ref:`Logistic ` and :ref:`Huber ` datafits (PR :gh:`55`) 17 | 18 | - Added :ref:`GramCD `, a Gram-based coordinate descent solver for problems with ``n_features`` :math:`\ll` ``n_samples`` (PR :gh:`59`) 19 | 20 | - Transform solvers from functions to classes (PR :gh:`63`) 21 | -------------------------------------------------------------------------------- /doc/changes/0.3.rst: -------------------------------------------------------------------------------- 1 | .. _changes_0_3: 2 | 3 | Version 0.3 (2023/07/01) 4 | ------------------------ 5 | 6 | - Add :ref:`Cox Estimator ` with support of L1, L2, and Elastic regularization (PR: :gh:`171`) 7 | - Reduce time overhead when fitting :ref:`Lasso Estimator ` (PR: :gh:`129`) 8 | - Add :ref:`Cox ` datafit for survival analysis (PR :gh:`180`, :gh:`168`, :gh:`159`, :gh:`157`) 9 | - Experimental :ref:`Pinball ` datafit (PR: :gh:`134`) 10 | - Add :ref:`Gamma ` datafit (PR: :gh:`113`) 11 | - Add Positivity constraint to :ref:`L1 `, :ref:`L1_plus_L2 `, :ref:`WeightedL1 ` (PR: :gh:`110`) 12 | - Add :ref:`PositiveConstraint ` (PR: :gh:`126`) 13 | - Add :ref:`L-BFGS ` solver for problems with smooth datafits and penalties (PR: :gh:`165`, :gh:`173`) 14 | - Experimental :ref:`Primal-dual coordinate descent solve ` for problems with non-smooth datafits (PR: :gh:`131`) 15 | - Add support of ``float32`` in :ref:`ProxNewton ` 16 | -------------------------------------------------------------------------------- /doc/changes/0.4.rst: -------------------------------------------------------------------------------- 1 | .. _changes_0_4: 2 | 3 | Version 0.4 (2025/04/08) 4 | ------------------------- 5 | - Add :ref:`GroupLasso Estimator ` (PR: :gh:`228`) 6 | - Add support and tutorial for positive coefficients to :ref:`Group Lasso Penalty ` (PR: :gh:`221`) 7 | - Check compatibility with datafit and penalty in solver (PR :gh:`137`) 8 | - Add support to weight samples in the quadratic datafit :ref:`Weighted Quadratic Datafit ` (PR: :gh:`258`) 9 | - Add support for ElasticNet regularization (`penalty="l1_plus_l2"`) to :ref:`SparseLogisticRegression ` (PR: :gh:`244`) 10 | 11 | Version 0.3.1 (2023/12/21) 12 | -------------------------- 13 | - Add support for weights and positive coefficients to :ref:`MCPRegression Estimator ` (PR: :gh:`184`) 14 | - Move solver specific computations from ``Datafit.initialize()`` to separate ``Datafit`` methods to ease ``Solver`` - ``Datafit`` compatibility check (PR: :gh:`192`) 15 | - Add :ref:`LogSumPenalty ` (PR: :gh:`#127`) 16 | - Remove abstract methods in ``BaseDatafit`` and ``BasePenalty`` to make solver/penalty/datafit compatibility check easier (PR :gh:`#205`) 17 | - Add fixed-point distance to build working sets in :ref:`ProxNewton ` solver (:gh:`138`) 18 | -------------------------------------------------------------------------------- /doc/changes/0.5.rst: -------------------------------------------------------------------------------- 1 | .. _changes_0_5: 2 | 3 | Version 0.5 (in progress) 4 | ------------------------- 5 | - Add support for fitting an intercept in :ref:`SqrtLasso ` (PR: :gh:`298`) 6 | -------------------------------------------------------------------------------- /doc/changes/whats_new.rst: -------------------------------------------------------------------------------- 1 | .. _whats_new: 2 | 3 | What's new 4 | ========== 5 | 6 | .. currentmodule:: skglm 7 | 8 | .. include:: 0.5.rst 9 | 10 | .. include:: 0.4.rst 11 | 12 | .. include:: 0.3.rst 13 | 14 | .. include:: 0.2.rst 15 | 16 | .. include:: 0.1.rst 17 | -------------------------------------------------------------------------------- /doc/contribute.rst: -------------------------------------------------------------------------------- 1 | .. _contribute: 2 | 3 | .. meta:: 4 | :description: Contribute to skglm by reporting bugs, suggesting features, or submitting pull requests. Join us in making skglm even better! 5 | :og:title: Contribute to skglm 6 | 7 | Contribute 8 | ======================= 9 | 10 | ``skglm`` is a continuous endeavour that relies on community efforts to last and evolve. 11 | Your contribution is welcome and highly valuable. You can help with 12 | 13 | **bug report** 14 | ``skglm`` runs unit tests on the codebase to prevent bugs. 15 | Help us tighten these tests by reporting any bug that you encounter. 16 | To do so, use the `issue section `_. 17 | 18 | **feature request** 19 | We are constantly improving ``skglm`` and we would like to align that with the user needs. 20 | We highly appreciate any suggestion to extend or add new features to ``skglm``. 21 | You can use the `the issue section `_ to make suggestions. 22 | 23 | **pull request** 24 | You may have fixed a bug, added a feature, or even fixed a small typo in the documentation... 25 | You can submit a `pull request `_ 26 | to integrate your changes and we will reach out to you shortly. 27 | If this is your first pull request, you can refer to `this scikit-learn guide `_. 28 | 29 | As part of the `scikit-learn-contrib `_ GitHub organization, we adopt the scikit-learn `code of conduct `_. 30 | 31 | .. note:: 32 | 33 | If you are willing to contribute with code to ``skglm``, check the section below to learn how to install the development version. 34 | 35 | 36 | 37 | Setup ``skglm`` on your local machine 38 | --------------------------------------- 39 | 40 | Here are the key steps to help you setup ``skglm`` on your machine in case you want to 41 | contribute with code or documentation. 42 | 43 | 1. `Fork the repository `_ and run the following command to clone it on your local machine, make sure to replace ``{YOUR_GITHUB_USERNAME}`` with your GitHub username 44 | 45 | .. code-block:: shell 46 | 47 | $ git clone https://github.com/{YOUR_GITHUB_USERNAME}/skglm 48 | 49 | 50 | 2. ``cd`` to ``skglm`` directory and install it in edit mode by running 51 | 52 | .. code-block:: shell 53 | 54 | $ cd skglm 55 | $ pip install -e . 56 | 57 | 58 | 3. To build the documentation locally, run 59 | 60 | .. tab-set:: 61 | 62 | .. tab-item:: with plots in the example gallery 63 | 64 | .. code-block:: shell 65 | 66 | $ cd doc 67 | $ pip install .[doc] 68 | $ make html 69 | 70 | .. tab-item:: without plots in the example gallery 71 | 72 | .. code-block:: shell 73 | 74 | $ cd doc 75 | $ pip install .[doc] 76 | $ make html-noplot 77 | -------------------------------------------------------------------------------- /doc/getting_started.rst: -------------------------------------------------------------------------------- 1 | .. _getting_started: 2 | 3 | .. meta:: 4 | :description: Learn how to fit Lasso and custom GLM estimators with skglm, a modular Python library compatible with scikit-learn. Includes examples and code snippets. 5 | 6 | =============== 7 | Start 8 | =============== 9 | --------------- 10 | 11 | This page provides a starter example to get familiar with ``skglm`` and explore some of its features. 12 | 13 | In the first section, we fit a Lasso estimator on a high dimensional 14 | toy dataset (number of features is largely greater than the number of samples). Linear models don't generalize well 15 | for unseen dataset. By adding a penalty, :math:`\ell_1` penalty, we can train estimator that overcome this drawback. 16 | 17 | The last section, we explore other combinations of datafit and penalty to create a custom estimator that achieves a lower prediction error, 18 | in the sequel :math:`\ell_1` Huber regression. We show that ``skglm`` is perfectly adapted to these experiments thanks to its modular design. 19 | 20 | Beforehand, make sure that you have already installed ``skglm`` 21 | 22 | .. code-block:: shell 23 | 24 | # Installing from PyPI using pip 25 | pip install -U skglm 26 | 27 | # Installing from conda-forge using conda 28 | conda install -c conda-forge skglm 29 | 30 | ------------------------- 31 | 32 | 33 | Fitting a Lasso estimator 34 | ------------------------- 35 | 36 | Let's start first by generating a toy dataset and splitting it to train and test sets. 37 | For that, we will use ``scikit-learn`` 38 | `make_regression `_ 39 | 40 | .. code-block:: python 41 | 42 | # imports 43 | from sklearn.datasets import make_regression 44 | from sklearn.model_selection import train_test_split 45 | 46 | # generate toy data 47 | X, y = make_regression(n_samples=100, n_features=1000) 48 | 49 | # split data 50 | X_train, X_test, y_train, y_test = train_test_split(X, y) 51 | 52 | Then let's fit ``skglm`` :ref:`Lasso ` estimator and prints its score on the test set. 53 | 54 | .. code-block:: python 55 | 56 | # import estimator 57 | from skglm import Lasso 58 | 59 | # init and fit 60 | estimator = Lasso() 61 | estimator.fit(X_train, y_train) 62 | 63 | # compute R² 64 | estimator.score(X_test, y_test) 65 | 66 | 67 | .. note:: 68 | 69 | - The first fit after importing ``skglm`` has an overhead as ``skglm`` uses `Numba `_ 70 | The subsequent fits will achieve top speed since Numba compilation is cached. 71 | 72 | ``skglm`` has several other ``scikit-learn`` compatible estimators. 73 | Check the :ref:`API ` for more information about the available estimators. 74 | 75 | 76 | Fitting :math:`\ell_1` Huber regression 77 | --------------------------------------- 78 | 79 | Suppose that the latter dataset contains outliers and we would like to mitigate their effects on the learned coefficients 80 | while having an estimator that generalizes well to unseen data. Ideally, we would like to fit a :math:`\ell_1` Huber regressor. 81 | 82 | ``skglm`` offers high flexibility to compose custom estimators. Through a simple API, it is possible to combine any 83 | ``skglm`` :ref:`datafit ` and :ref:`penalty `. 84 | 85 | .. note:: 86 | 87 | - :math:`\ell_1` regularization is not supported in ``scikit-learn`` for HuberRegressor 88 | 89 | Let's explore how to achieve that. 90 | 91 | 92 | Generate corrupt data 93 | ********************* 94 | 95 | We will use the same script as before except that we will take 10 samples and corrupt their values. 96 | 97 | .. code-block:: python 98 | 99 | # imports 100 | import numpy as np 101 | from sklearn.datasets import make_regression 102 | from sklearn.model_selection import train_test_split 103 | 104 | # generate toy data 105 | X, y = make_regression(n_samples=100, n_features=1000) 106 | 107 | # select and corrupt 10 random samples 108 | y[np.random.choice(n_samples, 10)] = 100 * y.max() 109 | 110 | # split data 111 | X_train, X_test, y_train, y_test = train_test_split(X, y) 112 | 113 | 114 | Now let's compose a custom estimator using :ref:`GeneralizedLinearEstimator `. 115 | It's the go-to way to create custom estimator by combining a datafit and a penalty. 116 | 117 | .. code-block:: python 118 | 119 | # import penalty and datafit 120 | from skglm.penalties import L1 121 | from skglm.datafits import Huber 122 | 123 | # import GLM estimator 124 | from skglm import GeneralizedLinearEstimator 125 | 126 | # build and fit estimator 127 | estimator = GeneralizedLinearEstimator( 128 | Huber(1.), 129 | L1(alpha=1.) 130 | ) 131 | estimator.fit(X_train, y_train) 132 | 133 | 134 | .. note:: 135 | 136 | - Here the arguments given to the datafit and penalty are arbitrary and given just for sake of illustration. 137 | 138 | ``GeneralizedLinearEstimator`` allows to combine any penalties and datafits implemented in ``skglm``. 139 | If you don't find an estimator in the ``estimators`` module, you can build it by combining the appropriate datafit and penalty 140 | and pass it to ``GeneralizedLinearEstimator``. Explore the list of supported :ref:`datafits ` and :ref:`penalties `. 141 | 142 | .. important:: 143 | 144 | - It is possible to create your own datafit and penalties. Check the tutorials on :ref:`how to add a custom datafit ` 145 | and :ref:`how to add a custom penalty `. 146 | 147 | 148 | Explore further advanced topics and get hands-on examples on the :ref:`tutorials page ` -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. skglm documentation master file, created by 3 | sphinx-quickstart on Mon May 23 16:22:52 2016. 4 | You can adapt this file completely to your liking, but it should at least 5 | contain the root `toctree` directive. 6 | 7 | .. meta:: 8 | :og:title: skglm: Fast, Scalable & Flexible Regularized GLMs and Sparse Modeling for Python 9 | :description: skglm is the fastest, most modular Python library for regularized GLMs—fully scikit-learn compatible for advanced statistical modeling. 10 | :og:image: _static/images/logo.svg 11 | :og:url: https://contrib.scikit-learn.org/skglm/ 12 | :keywords: Generalized Linear Models, GLM, scikit-learn, Lasso, ElasticNet, Cox, modular, efficient, regularized 13 | :page-layout: full 14 | 15 | skglm 16 | ====== 17 | .. container:: hero-container 18 | 19 | .. container:: hero-text 20 | 21 | .. rubric:: skglm 22 | :class: hero-title 23 | 24 | 25 | .. container:: hero-description 26 | 27 | .. raw:: html 28 | 29 | The fastest and most modular Python package for regularized Generalized Linear Models — designed for researchers and engineers who demand speed, structure, and scikit-learn compatibility. 30 | 31 | .. container:: hero-buttons 32 | 33 | `Get Started `_ 34 | 35 | .. container:: hero-gallery 36 | 37 | .. image:: _static/images/landingpage/hero.webp 38 | :alt: Illustration showing modeling in skglm logo style 39 | :class: hero-gallery-img 40 | :target: auto_examples/index.html 41 | 42 | 43 | .. container:: section-spacer 44 | 45 | .. container:: section-intro 46 | 47 | .. rubric:: Simple. Modular. Powerful. 48 | :class: section-title 49 | 50 | .. container:: section-subtitle 51 | 52 | Everything you need to build fast, flexible, and scalable GLMs — in one modular library. 53 | 54 | .. container:: features-grid 55 | 56 | .. container:: feature-box 57 | 58 | .. image:: _static/images/landingpage/ease.webp 59 | :alt: Ease icon 60 | :class: feature-icon 61 | 62 | .. container:: feature-text 63 | 64 | .. rubric:: Easy to Use 65 | :class: feature-title 66 | 67 | Get started in minutes with an intuitive API, comprehensive examples, and out-of-the-box estimators. 68 | 69 | .. container:: feature-box 70 | 71 | .. image:: _static/images/landingpage/modular.webp 72 | :alt: Modular icon 73 | :class: feature-icon 74 | 75 | .. container:: feature-text 76 | 77 | .. rubric:: Modular Design 78 | :class: feature-title 79 | 80 | Compose custom estimators from interchangeable datafits and penalties tailored to your use case. 81 | 82 | .. container:: feature-box 83 | 84 | .. image:: _static/images/landingpage/performance.webp 85 | :alt: Performance icon 86 | :class: feature-icon 87 | 88 | .. container:: feature-text 89 | 90 | .. rubric:: Speed 91 | :class: feature-title 92 | 93 | Solve large-scale problems with lightning-fast solvers — up to 100× faster than ``scikit-learn``. 94 | 95 | .. container:: feature-box 96 | 97 | .. image:: _static/images/landingpage/compatible.webp 98 | :alt: Compatibility icon 99 | :class: feature-icon 100 | 101 | .. container:: feature-text 102 | 103 | .. rubric:: Plug & Extend 104 | :class: feature-title 105 | 106 | Fully scikit-learn compatible and ready for custom research and production workflows. 107 | 108 | .. container:: section-spacer 109 | 110 | .. container:: section-intro 111 | 112 | .. rubric:: Support Us 113 | :class: section-title 114 | 115 | .. container:: support-box 116 | 117 | .. rubric:: Citation 118 | :class: support-title 119 | Using ``skglm`` in your work? You are free to use it. It is licensed under 120 | `BSD 3-Clause `_. 121 | As the result of perseverant academic research, the best way to support its development is by citing it. 122 | :: 123 | @inproceedings{skglm, 124 | title = {Beyond L1: Faster and better sparse models with skglm}, 125 | author = {Q. Bertrand and Q. Klopfenstein and P.-A. Bannier 126 | and G. Gidel and M. Massias}, 127 | booktitle = {NeurIPS}, 128 | year = {2022}, 129 | } 130 | 131 | @article{moufad2023skglm, 132 | title = {skglm: improving scikit-learn for regularized Generalized Linear Models}, 133 | author = {Moufad, Badr and Bannier, Pierre-Antoine and Bertrand, Quentin 134 | and Klopfenstein, Quentin and Massias, Mathurin}, 135 | year = {2023} 136 | } 137 | 138 | .. container:: support-box 139 | 140 | .. rubric:: Contributions 141 | :class: support-title 142 | Contributions, improvements, and bug reports are always welcome. Help us make ``skglm`` better! 143 | 144 | .. container:: hero-buttons 145 | 146 | `How to Contribute `_ 147 | 148 | .. container:: section-spacer 149 | 150 | .. container:: section-intro 151 | 152 | .. rubric:: Real-World Applications 153 | :class: section-title 154 | 155 | .. container:: section-subtitle 156 | 157 | ``skglm`` drives impactful solutions across diverse sectors with its fast, modular approach to regularized GLMs and sparse modeling. 158 | Find various advanced topics in our `Tutorials `_ and `Examples `_ sections. 159 | 160 | .. container:: applications-grid 161 | 162 | .. container:: application-box 163 | 164 | .. image:: _static/images/landingpage/healthcare.webp 165 | :alt: Healthcare icon 166 | :class: application-icon 167 | 168 | .. container:: application-text 169 | 170 | .. rubric:: Healthcare 171 | :class: application-title 172 | 173 | Enhance clinical trial analytics and early biomarker discovery by efficiently analyzing high-dimensional biological data and features like cox regression modeling. 174 | 175 | .. container:: application-box 176 | 177 | .. image:: _static/images/landingpage/finance.webp 178 | :alt: Finance icon 179 | :class: application-icon 180 | 181 | .. container:: application-text 182 | 183 | .. rubric:: Finance 184 | :class: application-title 185 | 186 | Conduct transparent and interpretable risk modeling with scalable, robust sparse regression across vast datasets. 187 | 188 | .. container:: application-box 189 | 190 | .. image:: _static/images/landingpage/energy.webp 191 | :alt: Energy icon 192 | :class: application-icon 193 | 194 | .. container:: application-text 195 | 196 | .. rubric:: Energy 197 | :class: application-title 198 | 199 | Optimize real-time electricity forecasting and load analysis by processing large time-series datasets for predictive maintenance and anomaly detection. 200 | 201 | .. container:: sponsor-banner 202 | 203 | .. container:: sponsor-inline 204 | 205 | 206 | This project is made possible thanks to the support of 207 | 208 | .. image:: _static/images/landingpage/inrialogo.webp 209 | :alt: Inria logo 210 | :class: sponsor-logo 211 | :target: https://www.inria.fr/en 212 | 213 | 214 | .. it is mandatory to keep the toctree here although it doesn't show up in the page 215 | .. when adding/modifying pages, don't forget to update the toctree 216 | 217 | .. toctree:: 218 | :maxdepth: 1 219 | :hidden: 220 | :includehidden: 221 | 222 | getting_started.rst 223 | tutorials/tutorials.rst 224 | auto_examples/index.rst 225 | api.rst 226 | contribute.rst 227 | changes/whats_new.rst 228 | -------------------------------------------------------------------------------- /doc/robots.txt: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Sitemap: https://contrib.scikit-learn.org/skglm/sitemap.xml -------------------------------------------------------------------------------- /doc/sphinxext/gh_substitutions.py: -------------------------------------------------------------------------------- 1 | """Provide a convenient way to link to GitHub issues and pull requests. 2 | 3 | Adapted from: 4 | https://doughellmann.com/blog/2010/05/09/defining-custom-roles-in-sphinx/ 5 | """ 6 | from docutils.nodes import reference 7 | from docutils.parsers.rst.roles import set_classes 8 | 9 | 10 | def gh_role(name, rawtext, pr_number, lineno, inliner, options={}, content=[]): 11 | """Link to a GitHub pull request.""" 12 | ref = f'https://github.com/scikit-learn-contrib/skglm/pull/{pr_number}' 13 | set_classes(options) 14 | node = reference(rawtext, '#' + pr_number, refuri=ref, **options) 15 | return [node], [] 16 | 17 | 18 | def setup(app): 19 | """Do setup.""" 20 | app.add_role('gh', gh_role) 21 | return 22 | -------------------------------------------------------------------------------- /doc/sphinxext/github_link.py: -------------------------------------------------------------------------------- 1 | # this code is a copy/paste of 2 | # https://github.com/scikit-learn/scikit-learn/blob/ 3 | # b0b8a39d8bb80611398e4c57895420d5cb1dfe09/doc/sphinxext/github_link.py 4 | 5 | from operator import attrgetter 6 | import inspect 7 | import subprocess 8 | import os 9 | import sys 10 | from functools import partial 11 | 12 | REVISION_CMD = "git rev-parse --short HEAD" 13 | 14 | 15 | def _get_git_revision(): 16 | try: 17 | revision = subprocess.check_output(REVISION_CMD.split()).strip() 18 | except (subprocess.CalledProcessError, OSError): 19 | print("Failed to execute git to get revision") 20 | return None 21 | return revision.decode("utf-8") 22 | 23 | 24 | def _linkcode_resolve(domain, info, package, url_fmt, revision): 25 | """Determine a link to online source for a class/method/function 26 | This is called by sphinx.ext.linkcode 27 | An example with a long-untouched module that everyone has 28 | >>> _linkcode_resolve('py', {'module': 'tty', 29 | ... 'fullname': 'setraw'}, 30 | ... package='tty', 31 | ... url_fmt='http://hg.python.org/cpython/file/' 32 | ... '{revision}/Lib/{package}/{path}#L{lineno}', 33 | ... revision='xxxx') 34 | 'http://hg.python.org/cpython/file/xxxx/Lib/tty/tty.py#L18' 35 | """ 36 | 37 | if revision is None: 38 | return 39 | if domain not in ("py", "pyx"): 40 | return 41 | if not info.get("module") or not info.get("fullname"): 42 | return 43 | 44 | class_name = info["fullname"].split(".")[0] 45 | module = __import__(info["module"], fromlist=[class_name]) 46 | obj = attrgetter(info["fullname"])(module) 47 | 48 | # Unwrap the object to get the correct source 49 | # file in case that is wrapped by a decorator 50 | obj = inspect.unwrap(obj) 51 | 52 | try: 53 | fn = inspect.getsourcefile(obj) 54 | except Exception: 55 | fn = None 56 | if not fn: 57 | try: 58 | fn = inspect.getsourcefile(sys.modules[obj.__module__]) 59 | except Exception: 60 | fn = None 61 | if not fn: 62 | return 63 | 64 | fn = os.path.relpath(fn, start=os.path.dirname(__import__(package).__file__)) 65 | try: 66 | lineno = inspect.getsourcelines(obj)[1] 67 | except Exception: 68 | lineno = "" 69 | return url_fmt.format(revision=revision, package=package, path=fn, lineno=lineno) 70 | 71 | 72 | def make_linkcode_resolve(package, url_fmt): 73 | """Returns a linkcode_resolve function for the given URL format 74 | revision is a git commit reference (hash or name) 75 | package is the name of the root module of the package 76 | url_fmt is along the lines of ('https://github.com/USER/PROJECT/' 77 | 'blob/{revision}/{package}/' 78 | '{path}#L{lineno}') 79 | """ 80 | revision = _get_git_revision() 81 | return partial( 82 | _linkcode_resolve, revision=revision, package=package, url_fmt=url_fmt 83 | ) 84 | -------------------------------------------------------------------------------- /doc/tutorials/add_datafit.rst: -------------------------------------------------------------------------------- 1 | .. _how_to_add_custom_datafit: 2 | 3 | .. meta:: 4 | :description: Tutorial on creating and implementing a custom datafit in skglm. Step-by-step guide includes deriving gradients, Hessians, and an example with Poisson datafit. 5 | 6 | How to Add a Custom Datafit 7 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 8 | 9 | Motivated by generalized linear models but not limited to it, ``skglm`` solves problems of the form 10 | 11 | .. math:: 12 | \hat{\beta} \in 13 | \arg\min_{\beta \in \mathbb{R}^p} 14 | F(X\beta) + \Omega(\beta) 15 | := \sum_{i=1}^n f_i([X\beta]_i) + \sum_{j=1}^p \Omega_j(\beta_j) 16 | \ . 17 | 18 | 19 | Here, :math:`X \in \mathbb{R}^{n \times p}` denotes the design matrix with :math:`n` samples and :math:`p` features, 20 | and :math:`\beta \in \mathbb{R}^p` is the coefficient vector. 21 | 22 | skglm can solve any problems of this form with arbitrary smooth datafit :math:`F` and arbitrary penalty :math:`\Omega` whose proximal operator can be evaluated explicitly, by defining two classes: a ``Penalty`` and a ``Datafit``. 23 | 24 | They can then be passed to a :class:`~skglm.GeneralizedLinearEstimator`. 25 | 26 | .. code-block:: python 27 | 28 | clf = GeneralizedLinearEstimator( 29 | MyDatafit(), 30 | MyPenalty(), 31 | ) 32 | 33 | 34 | A ``Datafit`` is a jitclass that must inherit from the ``BaseDatafit`` class: 35 | 36 | .. literalinclude:: ../../skglm/datafits/base.py 37 | :pyobject: BaseDatafit 38 | 39 | 40 | To define a custom datafit, you need to inherit from ``BaseDatafit`` class and implement methods required by the targeted solver. 41 | These methods can be found in the solver documentation. 42 | Optionally, overloading the methods with the suffix ``_sparse`` adds support for sparse datasets (CSC matrix). 43 | 44 | This tutorial shows how to implement :ref:`Poisson ` datafit to be fitted with :ref:`ProxNewton ` solver. 45 | 46 | 47 | A case in point: defining Poisson datafit 48 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 49 | 50 | First, this requires deriving some quantities used by the solvers like the gradient or the Hessian matrix of the datafit. 51 | With :math:`y \in \mathbb{R}^n` the target vector, the Poisson datafit reads 52 | 53 | .. math:: 54 | f(X\beta) = \frac{1}{n}\sum_{i=1}^n \exp([X\beta]_i) - y_i[X\beta]_i 55 | \ . 56 | 57 | 58 | Let's define some useful quantities to simplify our computations. For :math:`z \in \mathbb{R}^n` and :math:`\beta \in \mathbb{R}^p`, 59 | 60 | .. math:: 61 | f(z) = \sum_{i=1}^n f_i(z_i) \qquad F(\beta) = f(X\beta) 62 | \ . 63 | 64 | 65 | Computing the gradient of :math:`F` and its Hessian matrix yields 66 | 67 | .. math:: 68 | \nabla F(\beta) = X^{\top} \underbrace{\nabla f(X\beta)}_"raw grad" \qquad \nabla^2 F(\beta) = X^{\top} \underbrace{\nabla^2 f(X\beta)}_"raw hessian" X 69 | \ . 70 | 71 | 72 | Besides, it directly follows that 73 | 74 | .. math:: 75 | \nabla f(z) = (f_i^'(z_i))_{1 \leq i \leq n} \qquad \nabla^2 f(z) = "diag"(f_i^('')(z_i))_{1 \leq i \leq n} 76 | \ . 77 | 78 | 79 | We can now apply these definitions to the Poisson datafit: 80 | 81 | .. math:: 82 | f_i(z_i) = \frac{1}{n} \left(\exp(z_i) - y_iz_i\right) 83 | \ . 84 | 85 | 86 | Therefore, 87 | 88 | .. math:: 89 | f_i^('')(z_i) = \frac{1}{n}(\exp(z_i) - y_i) \qquad f^''_i(z_i) = \frac{1}{n}\exp(z_i) 90 | \ . 91 | 92 | 93 | Computing ``raw_grad`` and ``raw_hessian`` for the Poisson datafit yields 94 | 95 | .. math:: 96 | \nabla f(X\beta) = \frac{1}{n}(\exp([X\beta]_i) - y_i)_{1 \leq i \leq n} \qquad \nabla^2 f(X\beta) = \frac{1}{n}"diag"(\exp([X\beta]_i))_{1 \leq i \leq n} 97 | \ . 98 | 99 | 100 | Both ``raw_grad`` and ``raw_hessian`` are methods used by the ``ProxNewton`` solver. 101 | But other optimizers require different methods to be implemented. For instance, ``AndersonCD`` uses the ``gradient_scalar`` method: 102 | it is the derivative of the datafit with respect to the :math:`j`-th coordinate of :math:`\beta`. 103 | 104 | For the Poisson datafit, this yields 105 | 106 | .. math:: 107 | \frac{\partial F(\beta)}{\partial \beta_j} = \frac{1}{n} 108 | \sum_{i=1}^n X_{i,j} \left( 109 | \exp([X\beta]_i) - y 110 | \right) 111 | \ . 112 | 113 | 114 | When implementing these quantities in the ``Poisson`` datafit class, this gives: 115 | 116 | .. literalinclude:: ../../skglm/datafits/single_task.py 117 | :pyobject: Poisson 118 | 119 | 120 | Note that we have not initialized any quantities in the ``initialize`` method. 121 | Usually, it serves to compute datafit attributes specific to a dataset ``X, y`` for computational efficiency, for example the computation of ``X.T @ y`` in :ref:`Quadratic ` datafit. 122 | -------------------------------------------------------------------------------- /doc/tutorials/add_penalty.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | .. _how_to_add_custom_penalty: 4 | 5 | .. meta:: 6 | :description: Step-by-step tutorial on adding custom penalties in skglm. Covers implementation details, proximal operators, and optimality conditions using the L1 penalty. 7 | 8 | How to Add a Custom Penalty 9 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 10 | 11 | skglm supports any arbitrary proximable penalty. 12 | 13 | 14 | It is implemented as a jitclass which must inherit from the ``BasePenalty`` class: 15 | 16 | .. literalinclude:: ../../skglm/penalties/base.py 17 | :pyobject: BasePenalty 18 | 19 | To implement your own penalty, you only need to define a new jitclass, inheriting from ``BasePenalty`` and implement the methods required by the targeted solver. 20 | Theses methods can be found in the solver documentation. 21 | 22 | 23 | A case in point: defining L1 penalty 24 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 25 | 26 | We detail how the :math:`\ell_1` penalty is implemented in skglm. 27 | For a vector :math:`\beta \in \mathbb{R}^p`, the :math:`\ell_1` penalty is defined as follows: 28 | 29 | .. math:: 30 | || \beta ||_1 = \sum_{i=1}^p |\beta _i| \ . 31 | 32 | 33 | The regularization level is controlled by the hyperparameter :math:`\lambda \in bb(R)^+`, that is defined and initialized in the constructor of the class. 34 | 35 | The method ``get_spec`` allows to strongly type the attributes of the penalty object, thus allowing Numba to JIT-compile the class. 36 | It should return an iterable of tuples, the first element being the name of the attribute, the second its Numba type (e.g. ``float64``, ``bool_``). 37 | Additionally, a penalty should implement ``params_to_dict``, a helper method to get all the parameters of a penalty returned in a dictionary. 38 | 39 | To optimize an objective with a given penalty, skglm needs at least the proximal operator of the penalty applied to the :math:`j`-th coordinate. 40 | For the ``L1`` penalty, it is the well-known soft-thresholding operator: 41 | 42 | .. math:: 43 | "ST"(\beta , \lambda) = "max"(0, |\beta| - \lambda) "sgn"(\beta)\ . 44 | 45 | 46 | Note that skglm expects the threshold level to be the regularization hyperparameter :math:`\lambda \in \mathbb{R}^+` **scaled by** the stepsize. 47 | 48 | 49 | Besides, by default all solvers in skglm have ``ws_strategy`` turned on to ``subdiff``. 50 | This means that the optimality conditions (thus the stopping criterion) is computed using the method ``subdiff_distance`` of the penalty. 51 | If not implemented, the user should set ``ws_strategy`` to ``fixpoint``. 52 | 53 | For the :math:`\ell_1` penalty, the distance of the negative gradient of the datafit :math:`F` to the subdifferential of the penalty reads 54 | 55 | .. math:: 56 | "dist"(-\nabla_j F(\beta), \partial |\beta_j|) = 57 | {("max"(0, | -\nabla_j F(\beta) | - \lambda),), 58 | (| -\nabla_j F(\beta) - \lambda "sgn"(\beta_j) |,):} 59 | \ . 60 | 61 | 62 | The method ``is_penalized`` returns a binary mask with the penalized features. 63 | For the :math:`\ell_1` penalty, all the coefficients are penalized. 64 | Finally, ``generalized_support`` returns the generalized support of the penalty for some coefficient vector ``w``. 65 | It is typically the non-zero coefficients of the solution vector for :math:`\ell_1`. 66 | 67 | 68 | Optionally, a penalty might implement ``alpha_max`` which returns the smallest :math:`\lambda` for which the optimal solution is a null vector. 69 | Note that since ``lambda`` is a reserved keyword in Python, ``alpha`` in skglm codebase corresponds to :math:`\lambda`. 70 | 71 | When putting all together, this gives the implementation of the ``L1`` penalty: 72 | 73 | 74 | .. literalinclude:: ../../skglm/penalties/separable.py 75 | :pyobject: L1 76 | 77 | -------------------------------------------------------------------------------- /doc/tutorials/alpha_max.rst: -------------------------------------------------------------------------------- 1 | .. _alpha_max: 2 | 3 | .. meta:: 4 | :description: Tutorial explaining the critical regularization strength (alpha_max) in skglm. Learn conditions for zero solutions in L1-regularized optimization problems. 5 | 6 | ========================================================== 7 | Critical Regularization Strength above which Solution is 0 8 | ========================================================== 9 | 10 | This tutorial shows that for :math:`\lambda \geq \lambda_{\text{max}} = || \nabla f(0) ||_{\infty}`, the solution to 11 | :math:`\min f(x) + \lambda || x ||_1` is 0. 12 | 13 | In skglm, we thus frequently use 14 | 15 | .. code-block:: 16 | 17 | alpha_max = np.max(np.abs(gradient0)) 18 | 19 | and choose for the regularization strength :\math:`\alpha` a fraction of this critical value, e.g. ``alpha = 0.01 * alpha_max``. 20 | 21 | Problem setup 22 | ============= 23 | 24 | Consider the optimization problem: 25 | 26 | .. math:: 27 | \min_x f(x) + \lambda || x||_1 28 | 29 | where: 30 | 31 | - :math:`f: \mathbb{R}^d \to \mathbb{R}` is a convex differentiable function, 32 | - :math:`|| x ||_1` is the L1 norm of :math:`x`, 33 | - :math:`\lambda > 0` is the regularization parameter. 34 | 35 | We aim to determine the conditions under which the solution to this problem is :math:`x = 0`. 36 | 37 | Theoretical background 38 | ====================== 39 | 40 | 41 | Let 42 | 43 | .. math:: 44 | 45 | g(x) = f(x) + \lambda || x||_1 46 | 47 | According to Fermat's rule, 0 is the minimizer of :math:`g` if and only if 0 is in the subdifferential of :math:`g` at 0. 48 | The subdifferential of :math:`|| x ||_1` at 0 is the L-infinity unit ball: 49 | 50 | .. math:: 51 | \partial || \cdot ||_1 (0) = \{ u \in \mathbb{R}^d : ||u||_{\infty} \leq 1 \} 52 | 53 | Thus, 54 | 55 | .. math:: 56 | :nowrap: 57 | 58 | \begin{equation} 59 | \begin{aligned} 60 | 0 \in \text{argmin} ~ g(x) 61 | &\Leftrightarrow 0 \in \partial g(0) \\ 62 | &\Leftrightarrow 63 | 0 \in \nabla f(0) + \lambda \partial || \cdot ||_1 (0) \\ 64 | &\Leftrightarrow - \nabla f(0) \in \lambda \{ u \in \mathbb{R}^d : ||u||_{\infty} \leq 1 \} \\ 65 | &\Leftrightarrow || \nabla f(0) ||_\infty \leq \lambda 66 | \end{aligned} 67 | \end{equation} 68 | 69 | 70 | We have just shown that the minimizer of :math:`g = f + \lambda || \cdot ||_1` is 0 if and only if :math:`\lambda \geq ||\nabla f(0)||_{\infty}`. 71 | 72 | Example 73 | ======= 74 | 75 | Consider the loss function for Ordinary Least Squares :math:`f(x) = \frac{1}{2n} ||Ax - b||_2^2`, where :math:`n` is the number of samples. We have: 76 | 77 | .. math:: 78 | \nabla f(x) = \frac{1}{n}A^T (Ax - b) 79 | 80 | At :math:`x=0`: 81 | 82 | .. math:: 83 | \nabla f(0) = -\frac{1}{n}A^T b 84 | 85 | The infinity norm of the gradient at 0 is: 86 | 87 | .. math:: 88 | ||\nabla f(0)||_{\infty} = \frac{1}{n}||A^T b||_{\infty} 89 | 90 | For :math:`\lambda \geq \frac{1}{n}||A^T b||_{\infty}`, the solution to :math:`\min_x \frac{1}{2n} ||Ax - b||_2^2 + \lambda || x||_1` is :math:`x=0`. 91 | 92 | 93 | 94 | References 95 | ========== 96 | 97 | Refer to Section 3.1 and Proposition 4 in particular of [1] for more details. 98 | 99 | .. _1: 100 | 101 | [1] Eugene Ndiaye, Olivier Fercoq, Alexandre Gramfort, and Joseph Salmon. 2017. Gap safe screening rules for sparsity enforcing penalties. J. Mach. Learn. Res. 18, 1 (January 2017), 4671–4703. 102 | -------------------------------------------------------------------------------- /doc/tutorials/intercept.rst: -------------------------------------------------------------------------------- 1 | .. _maths_unpenalized_intercept: 2 | 3 | .. meta:: 4 | :description: In-depth guide on intercept handling in skglm solvers. Covers mathematical derivations, gradient updates, Lipschitz constants, and examples for quadratic, logistic, and Huber datafits. 5 | 6 | Computation of the Intercept 7 | ============================ 8 | 9 | .. currentmodule:: skglm 10 | 11 | .. include:: intercept2.md 12 | :parser: myst_parser.sphinx_ 13 | -------------------------------------------------------------------------------- /doc/tutorials/intercept2.md: -------------------------------------------------------------------------------- 1 | This note gives insights and guidance for the handling of an intercept coefficient within the `skglm` solvers. 2 | 3 | Let the design matrix be $X in RR^{n times p}$ where $n$ is the number of samples and $p$ the number of features. 4 | We denote $beta in RR^p$ the coefficients of the Generalized Linear Model and $beta_0$ its intercept. 5 | In many packages such as `liblinear`, the intercept is handled by adding an extra column of ones in the design matrix. This is costly in memory, and may lead to different solutions if all coefficients are penalized, as the intercept $beta_0$ is usually not. 6 | `skglm` follows a different route and solves directly: 7 | 8 | ```{math} 9 | beta^star, beta_0^star 10 | in 11 | underset(beta in RR^p, beta_0 in RR)("argmin") 12 | Phi(beta) 13 | triangleq 14 | underbrace(F(X beta + beta_0 bb"1"_n))_(triangleq f(beta, beta_0)) 15 | + sum_(j=1)^p g_j(beta_j) 16 | \ , 17 | ``` 18 | 19 | 20 | where $bb"1"_{n}$ is the vector of size $n$ composed only of ones. 21 | 22 | 23 | The solvers of `skglm` update the intercept after each update of $beta$ by doing a (1 dimensional) gradient descent update: 24 | 25 | ```{math} 26 | beta_0^((k+1)) = beta_0^((k)) - 1/(L_0) nabla_(beta_0)F(X beta^((k)) + beta_0^((k)) bb"1"_{n}) 27 | \ , 28 | ``` 29 | 30 | where $L_0$ is the Lipschitz constant associated to the intercept. 31 | The local Lipschitz constant $L_0$ statisfies the following inequality 32 | 33 | $$ 34 | \forall x, x_0 in RR^p times RR, \forall h in RR, |nabla_(x_0) f(x, x_0 + h) - nabla_(x_0) f(x, x_0)| <= L_0 |h| \ . 35 | $$ 36 | 37 | This update rule should be implemented in the `intercept_update_step` method of the datafit class. 38 | 39 | The convergence criterion computed for the gradient is then only the absolute value of the gradient with respect to $beta_0$ since the intercept optimality condition, for a solution $beta^star$, $beta_0^star$ is: 40 | 41 | ```{math} 42 | nabla_(beta_0)F(X beta^star + beta_0^star bb"1"_n) = 0 43 | \ , 44 | ``` 45 | 46 | Moreover, we have that 47 | 48 | ```{math} 49 | nabla_(beta_0) F(X beta + beta_0 bb"1"_n) = bb"1"_n^\top nabla_beta F(X beta + beta_0 bb"1"_n) 50 | \ . 51 | ``` 52 | 53 | 54 | We will now derive the update used in Equation 2 for three different datafitting functions. 55 | 56 | --- 57 | 58 | ## The Quadratic datafit 59 | 60 | We define 61 | 62 | ```{math} 63 | F(X beta + beta_0 bb"1"_n) = 1/(2n) norm(y - X beta - beta_0 bb"1"_{n})_2^2 64 | \ . 65 | ``` 66 | 67 | In this case $nabla f(z) = 1/n (z - y)$ hence Eq. 4 is equal to: 68 | 69 | ```{math} 70 | nabla_(beta_0) F(X beta + beta_0 bb"1"_n) = 1/n sum_(i=1)^n (X_( i: ) beta + beta_0 - y_i) 71 | \ . 72 | ``` 73 | 74 | Finally, the Lipschitz constant is $L_0 = 1/n sum_(i=1)^n 1^2 = 1$. 75 | 76 | 77 | 78 | --- 79 | 80 | ## The Logistic datafit 81 | 82 | In this case, 83 | 84 | ```{math} 85 | F(X beta + beta_0 bb"1"_{n}) = 1/n sum_(i=1)^n log(1 + exp(-y_i(X_( i: ) beta + beta_0 bb"1"_n)) 86 | ``` 87 | 88 | 89 | We can then write 90 | 91 | ```{math} 92 | nabla_(beta_0) F(X beta + beta_0 bb"1"_n) = 1/n sum_(i=1)^n (-y_i)/(1 + exp(-y_i(X_( i: ) beta + beta_0 bb"1"_n))) \ . 93 | ``` 94 | 95 | 96 | Finally, the Lipschitz constant is $L_0 = 1/(4n) sum_(i=1)^n 1^2 = 1/4$. 97 | 98 | --- 99 | 100 | ## The Huber datafit 101 | 102 | In this case, 103 | 104 | ```{math} 105 | F(X beta + beta_0 bb"1"_{n}) = 1/n sum_(i=1)^n f_(delta) (y_i - X_( i: ) beta - beta_0 bb"1"_n) \ , 106 | ``` 107 | 108 | where 109 | 110 | ```{math} 111 | f_delta(x) = { 112 | (1/2 x^2, if x <= delta), 113 | (delta |x| - 1/2 delta^2, if x > delta) 114 | :} \ . 115 | ``` 116 | 117 | 118 | Let $r_i = y_i - X_( i: ) beta - beta_0 bb"1"_n$. We can then write 119 | 120 | ```{math} 121 | nabla_(beta_0) F(X beta + beta_0 bb"1"_{n}) = 1/n sum_(i=1)^n r_i bbb"1"_({|r_i| <= delta}) + "sign"(r_i) delta bbb"1"_({|r_i| > delta}) \ , 122 | ``` 123 | 124 | where $bbb"1"_({x > delta})$ is the classical indicator function. 125 | 126 | Finally, the Lipschitz constant is $L_0 = 1/n sum_(i=1)^n 1^2 = 1$. 127 | -------------------------------------------------------------------------------- /doc/tutorials/prox_nn_group_lasso.rst: -------------------------------------------------------------------------------- 1 | .. _prox_nn_group_lasso: 2 | .. meta:: 3 | :description: Detailed tutorial on deriving the proximity operator and subdifferential for the positive group Lasso penalty in skglm. Includes mathematical proofs and examples. 4 | 5 | =================================== 6 | Details on the Positive Group Lasso 7 | =================================== 8 | 9 | This tutorial presents how to derive the proximity operator and subdifferential of the :math:`l_2`-penalty, and the :math:`l_2`-penalty with nonnegative constraints. 10 | 11 | 12 | Proximity operator of the group Lasso 13 | ===================================== 14 | 15 | Let 16 | 17 | .. math:: 18 | g:x \mapsto \norm{x}_2 19 | , 20 | 21 | then its Fenchel-Legendre conjugate is 22 | 23 | .. math:: 24 | :label: fenchel 25 | 26 | g^{\star}:x \mapsto i_{\norm{x}_2 \leq 1} 27 | , 28 | 29 | and for all :math:`x \in \mathbb{R}^p` 30 | 31 | .. math:: 32 | :label: prox_projection 33 | 34 | \text{prox}_{g^{\star}}(x) 35 | = 36 | \text{proj}_{\mathcal{B}_2}(x) = \frac{x}{\max(\norm{x}_2, 1)} 37 | . 38 | 39 | Using the Moreau decomposition, Equations :eq:`fenchel` and :eq:`prox_projection`, one has 40 | 41 | 42 | .. math:: 43 | 44 | \text{prox}_{\lambda g}(x) 45 | = 46 | x 47 | - \lambda \text{prox}_{g^\star/\lambda }(x/\lambda) 48 | 49 | .. math:: 50 | 51 | = x 52 | - \lambda \text{prox}_{g^\star}(x/\lambda) 53 | 54 | .. math:: 55 | 56 | = x 57 | - \lambda \frac{x/\lambda}{\max(\norm{x/\lambda}_2, 1)} 58 | 59 | .. math:: 60 | 61 | = x 62 | - \frac{\lambda x}{\max(\norm{x}_2, \lambda)} 63 | 64 | .. math:: 65 | 66 | = (1 - \frac{\lambda}{\norm{x}})_{+} x 67 | . 68 | 69 | A similar formula can be derived for the group Lasso with nonnegative constraints. 70 | 71 | 72 | Proximity operator of the group Lasso with positivity constraints 73 | ================================================================= 74 | 75 | Let 76 | 77 | .. math:: 78 | h:x \mapsto \norm{x}_2 79 | + i_{x \geq 0} 80 | . 81 | 82 | Let :math:`x \in \mathbb{R}^p` and :math:`S = \{ j \in 1, ..., p | x_j > 0 \} \in \mathbb{R}^p`, then 83 | 84 | 85 | .. math:: 86 | :label: fenchel_nn 87 | 88 | h^{\star} :x \mapsto i_{\norm{x_S}_2 \leq 1} 89 | , 90 | 91 | and 92 | 93 | .. math:: 94 | :label: prox_projection_nn_Sc 95 | 96 | \text{prox}_{h^{\star}}(x)_{S^c} 97 | = 98 | x_{S^c} 99 | 100 | 101 | .. math:: 102 | :label: prox_projection_nn_S 103 | 104 | \text{prox}_{h^{\star}}(x)_S 105 | = 106 | \text{proj}_{\mathcal{B}_2}(x_S) = \frac{x_S}{\max(\norm{x_S}_2, 1)} 107 | . 108 | 109 | As before, using the Moreau decomposition and Equation :eq:`fenchel_nn` yields 110 | 111 | 112 | .. math:: 113 | 114 | \text{prox}_{\lambda h}(x) 115 | = 116 | x 117 | - \lambda \text{prox}_{h^\star / \lambda }(x/\lambda) 118 | 119 | .. math:: 120 | 121 | = x 122 | - \lambda \text{prox}_{h^\star}(x/\lambda) 123 | , 124 | 125 | and thus, combined with Equations :eq:`prox_projection_nn_Sc` and :eq:`prox_projection_nn_S` it leads to 126 | 127 | .. math:: 128 | 129 | \text{prox}_{\lambda h}(x)_{S^c} = 0 130 | 131 | .. math:: 132 | 133 | \text{prox}_{\lambda h}(x)_{S} 134 | = 135 | (1 - \frac{\lambda}{\norm{x_S}})_{+} x_S 136 | . 137 | 138 | 139 | 140 | .. _subdiff_positive_group_lasso: 141 | 142 | Subdifferential of the positive Group Lasso penalty 143 | =================================================== 144 | 145 | For the ``subdiff_diff`` working set strategy, we compute the distance :math:`D(v)` for some :math:`v` to the subdifferential of the :math:`h` penalty at a point :math:`w`. 146 | Since the penalty is group-separable, we reduce the case where :math:`w` is a block of variables in :math:`\mathbb{R}^g`. 147 | 148 | Case :math:`w \notin \mathbb{R}_+^g` 149 | ------------------------------------ 150 | 151 | If any component of :math:`w` is strictly negative, the subdifferential is empty, and the distance is :math:`+ \infty`. 152 | 153 | .. math:: 154 | 155 | D(v) = + \infty, \quad \forall v \in \mathbb{R}^g 156 | . 157 | 158 | Case :math:`w = 0` 159 | ------------------ 160 | 161 | At :math:`w = 0`, the subdifferential is: 162 | 163 | .. math:: 164 | 165 | \lambda \partial || \cdot ||_2 + \partial \iota_{x \geq 0} = \lambda \mathcal{B}_2 + \mathbb{R}_-^g 166 | , 167 | 168 | where :math:`\mathcal{B}_2` is the unit ball. 169 | 170 | Therefore, the distance to the subdifferential writes 171 | 172 | .. math:: 173 | 174 | D(v) = \min_{u \in \lambda \mathcal{B}_2, n \in \mathbb{R}_{-}^g} \ || u + n - v || 175 | . 176 | 177 | Minimizing over :math:`n` then over :math:`u`, thanks to [`1 `_], yields 178 | 179 | .. math:: 180 | 181 | D(v) = \max(0, ||v^+|| - \lambda) 182 | , 183 | 184 | where :math:`v^+` is :math:`v` restricted to its positive coordinates. 185 | Intuitively, it is clear that if :math:`v_i < 0`, we can cancel it exactly in the objective function by taking :math:`n_i = - v_i` and :math:`u_i = 0`; on the other hand, if :math:`v_i>0`, taking a non zero :math:`n_i` will only increase the quantity that :math:`u_i` needs to bring closer to 0. 186 | 187 | For a rigorous derivation of this, introduce the Lagrangian on a squared objective 188 | 189 | .. math:: 190 | 191 | \mathcal{L}(u, n, \nu, \mu) = 192 | \frac{1}{2}\norm{u + n - v}^2 + \nu(\frac{1}{2} \norm{u}^2 - \lambda^2 / 2) + \langle \mu, n \rangle 193 | , 194 | 195 | and write down the optimality condition with respect to :math:`u` and :math:`n`. 196 | Treat the case :math:`nu = 0` separately; in the other case show that :\math:`u` must be positive, and that :math:`v = (1 + \nu) u + n`, together with :math:`u = \mu / \nu` and complementary slackness, to reach the conclusion. 197 | 198 | Case :math:`|| w || \ne 0` 199 | --------------------------- 200 | The subdifferential in that case is :math:`\lambda w / {|| w ||} + C_1 \times \ldots \times C_g` where :math:`C_j = {0}` if :math:`w_j > 0` and :math:`C_j = mathbb{R}_-` otherwise (:math:`w_j =0`). 201 | 202 | By letting :math:`p` denotes the projection of :math:`v` onto this set, 203 | one has 204 | 205 | .. math:: 206 | 207 | p_j = \lambda \frac{w_j}{||w||} \text{ if } w_j > 0 208 | 209 | and 210 | 211 | .. math:: 212 | 213 | p_j = \min(v_j, 0) \text{ otherwise}. 214 | 215 | The distance to the subdifferential is then: 216 | 217 | .. math:: 218 | 219 | D(v) = || v - p || = \sqrt{\sum_{j, w_j > 0} (v_j - \lambda \frac{w_j}{||w||})^2 + \sum_{j, w_j=0} \max(0, v_j)^2 220 | 221 | since :math:`v_j - \min(v_j, 0) = v_j + \max(-v_j, 0) = \max(0, v_j)`. 222 | 223 | 224 | 225 | References 226 | ========== 227 | 228 | [1] ``_ 229 | -------------------------------------------------------------------------------- /doc/tutorials/tutorials.rst: -------------------------------------------------------------------------------- 1 | .. _tutorials: 2 | 3 | .. meta:: 4 | :description: Step-by-step skglm tutorials covering custom datafits, penalties, intercept computations, Cox datafit mathematics, group Lasso details, and regularization strategies. 5 | 6 | ========= 7 | Tutorials 8 | ========= 9 | 10 | .. grid:: 1 1 2 2 11 | :gutter: 2 12 | 13 | .. grid-item-card:: How to Add a Custom Datafit 14 | :link: add_datafit.html 15 | :text-align: left 16 | 17 | Learn to add a custom datafit through a hands-on examples: Implementing a Poisson datafit. 18 | 19 | .. grid-item-card:: How to Add a Custom Penalty 20 | :link: add_penalty.html 21 | :text-align: left 22 | 23 | Learn to add a custom penalty by implementing the :math:`\ell_1` penalty. 24 | 25 | .. grid-item-card:: Computation of the Intercept 26 | :link: intercept.html 27 | :text-align: left 28 | 29 | Explore how ``skglm`` fits an unpenalized intercept. 30 | 31 | .. grid-item-card:: Mathematics behind Cox Datafit 32 | :link: cox_datafit.html 33 | :text-align: left 34 | 35 | Understand the mathematical foundation of Cox datafit and its applications in survival analysis. 36 | 37 | .. grid-item-card:: Details on the Group Lasso 38 | :link: prox_nn_group_lasso.html 39 | :text-align: left 40 | 41 | Mathematical details about the group Lasso, in particular with nonnegativity constraints. 42 | 43 | .. grid-item-card:: Understanding `alpha_max` 44 | :link: alpha_max.html 45 | :text-align: left 46 | 47 | Learn how to choose the regularization strength in :math:`\ell_1`-regularization? 48 | 49 | .. toctree:: 50 | :hidden: 51 | 52 | add_datafit 53 | add_penalty 54 | intercept 55 | cox_datafit 56 | prox_nn_group_lasso 57 | alpha_max 58 | -------------------------------------------------------------------------------- /examples/README.txt: -------------------------------------------------------------------------------- 1 | .. _general_examples: 2 | 3 | .. title:: Examples 4 | 5 | Examples 6 | ======== 7 | 8 | .. toctree:: 9 | :maxdepth: 1 10 | 11 | auto_examples/index 12 | -------------------------------------------------------------------------------- /examples/plot_compare_time.py: -------------------------------------------------------------------------------- 1 | # Authors: Quentin Klopfenstein 2 | # Mathurin Massias 3 | """ 4 | ============================================= 5 | Timing comparison with scikit-learn for Lasso 6 | ============================================= 7 | Compare time to solve large scale Lasso problems with scikit-learn. 8 | """ 9 | 10 | 11 | import time 12 | import warnings 13 | import numpy as np 14 | from numpy.linalg import norm 15 | import matplotlib.pyplot as plt 16 | from libsvmdata import fetch_libsvm 17 | 18 | from sklearn.exceptions import ConvergenceWarning 19 | from sklearn.linear_model import Lasso as Lasso_sklearn 20 | from sklearn.linear_model import ElasticNet as Enet_sklearn 21 | 22 | from skglm import Lasso, ElasticNet 23 | 24 | warnings.filterwarnings('ignore', category=ConvergenceWarning) 25 | 26 | 27 | def compute_obj(X, y, w, alpha, l1_ratio=1): 28 | loss = norm(y - X @ w) ** 2 / (2 * len(y)) 29 | penalty = (alpha * l1_ratio * np.sum(np.abs(w)) 30 | + 0.5 * alpha * (1 - l1_ratio) * norm(w) ** 2) 31 | return loss + penalty 32 | 33 | 34 | X, y = fetch_libsvm("news20.binary" 35 | ) 36 | alpha = np.max(np.abs(X.T @ y)) / len(y) / 10 37 | 38 | dict_sklearn = {} 39 | dict_sklearn["lasso"] = Lasso_sklearn( 40 | alpha=alpha, fit_intercept=False, tol=1e-12) 41 | 42 | dict_sklearn["enet"] = Enet_sklearn( 43 | alpha=alpha, fit_intercept=False, tol=1e-12, l1_ratio=0.5) 44 | 45 | dict_ours = {} 46 | dict_ours["lasso"] = Lasso( 47 | alpha=alpha, fit_intercept=False, tol=1e-12) 48 | dict_ours["enet"] = ElasticNet( 49 | alpha=alpha, fit_intercept=False, tol=1e-12, l1_ratio=0.5) 50 | 51 | models = ["lasso", "enet"] 52 | 53 | fig, axarr = plt.subplots(2, 1, constrained_layout=True) 54 | 55 | for ax, model, l1_ratio in zip(axarr, models, [1, 0.5]): 56 | pobj_dict = {} 57 | pobj_dict["sklearn"] = list() 58 | pobj_dict["us"] = list() 59 | 60 | time_dict = {} 61 | time_dict["sklearn"] = list() 62 | time_dict["us"] = list() 63 | 64 | # Remove compilation time 65 | dict_ours[model].max_iter = 10_000 66 | w_star = dict_ours[model].fit(X, y).coef_ 67 | pobj_star = compute_obj(X, y, w_star, alpha, l1_ratio) 68 | for n_iter_sklearn in np.unique(np.geomspace(1, 50, num=15).astype(int)): 69 | dict_sklearn[model].max_iter = n_iter_sklearn 70 | 71 | t_start = time.time() 72 | w_sklearn = dict_sklearn[model].fit(X, y).coef_ 73 | time_dict["sklearn"].append(time.time() - t_start) 74 | pobj_dict["sklearn"].append(compute_obj(X, y, w_sklearn, alpha, l1_ratio)) 75 | 76 | for n_iter_us in range(1, 10): 77 | dict_ours[model].max_iter = n_iter_us 78 | t_start = time.time() 79 | w = dict_ours[model].fit(X, y).coef_ 80 | time_dict["us"].append(time.time() - t_start) 81 | pobj_dict["us"].append(compute_obj(X, y, w, alpha, l1_ratio)) 82 | 83 | ax.semilogy( 84 | time_dict["sklearn"], pobj_dict["sklearn"] - pobj_star, label='sklearn') 85 | ax.semilogy( 86 | time_dict["us"], pobj_dict["us"] - pobj_star, label='skglm') 87 | 88 | ax.set_ylim((1e-10, 1)) 89 | ax.set_title(model) 90 | ax.legend() 91 | ax.set_ylabel("Objective suboptimality") 92 | 93 | axarr[1].set_xlabel("Time (s)") 94 | plt.show(block=False) 95 | -------------------------------------------------------------------------------- /examples/plot_group_logistic_regression.py: -------------------------------------------------------------------------------- 1 | """ 2 | =================================== 3 | Group Logistic regression in python 4 | =================================== 5 | Scikit-learn is missing a Group Logistic regression estimator. We show how to implement 6 | one with ``skglm``. 7 | """ 8 | 9 | # Author: Mathurin Massias 10 | 11 | import numpy as np 12 | 13 | from skglm import GeneralizedLinearEstimator 14 | from skglm.datafits import LogisticGroup 15 | from skglm.penalties import WeightedGroupL2 16 | from skglm.solvers import GroupProxNewton 17 | from skglm.utils.data import make_correlated_data, grp_converter 18 | 19 | import matplotlib.pyplot as plt 20 | 21 | n_features = 30 22 | X, y, _ = make_correlated_data( 23 | n_samples=10, n_features=30, random_state=0) 24 | y = np.sign(y) 25 | 26 | 27 | # %% 28 | # Classifier creation: combination of penalty, datafit and solver. 29 | # 30 | grp_size = 3 # groups are made of groups of 3 consecutive features 31 | n_groups = n_features // grp_size 32 | grp_indices, grp_ptr = grp_converter(grp_size, n_features=n_features) 33 | alpha = 0.01 34 | weights = np.ones(n_groups) 35 | penalty = WeightedGroupL2(alpha, weights, grp_ptr, grp_indices) 36 | datafit = LogisticGroup(grp_ptr, grp_indices) 37 | solver = GroupProxNewton(verbose=2) 38 | 39 | # %% 40 | # Train the model 41 | clf = GeneralizedLinearEstimator(datafit, penalty, solver) 42 | clf.fit(X, y) 43 | 44 | # %% 45 | # Fit check that groups are either all 0 or all non zero 46 | print(clf.coef_.reshape(-1, grp_size)) 47 | 48 | # %% 49 | # Visualise group-level sparsity 50 | 51 | coef_by_group = clf.coef_.reshape(-1, grp_size) 52 | group_norms = np.linalg.norm(coef_by_group, axis=1) 53 | 54 | plt.figure(figsize=(8, 4)) 55 | plt.bar(np.arange(n_groups), group_norms) 56 | plt.xlabel("Group index") 57 | plt.ylabel("L2 norm of coefficients") 58 | plt.title("Group Sparsity Pattern") 59 | plt.tight_layout() 60 | plt.show() 61 | 62 | # %% 63 | # This plot shows the L2 norm of the coefficients for each group. 64 | # Groups with a zero norm have been set inactive by the model, 65 | # illustrating how Group Logistic Regression enforces sparsity at the group level. 66 | # (Note: This example uses a tiny synthetic dataset, so the pattern has limited interpretability.) 67 | -------------------------------------------------------------------------------- /examples/plot_lasso_vs_weighted.py: -------------------------------------------------------------------------------- 1 | """ 2 | ====================================== 3 | Comparison of Lasso and Weighted Lasso 4 | ====================================== 5 | Illustrate the importance of feature normalization when penalizing. 6 | """ 7 | 8 | # Author: Mathurin Massias 9 | # Quentin Bertrand 10 | 11 | import numpy as np 12 | from numpy.linalg import norm 13 | import matplotlib.pyplot as plt 14 | 15 | from skglm import Lasso, WeightedLasso 16 | from skglm.utils.data import make_correlated_data 17 | 18 | n_features = 30 19 | X, _, _ = make_correlated_data( 20 | n_samples=50, n_features=n_features, random_state=0) 21 | w_true = np.zeros(n_features) 22 | 23 | nnz = 5 24 | w_true[:nnz] = 1 25 | 26 | # assume for some reason important features have a smaller norm than the other 27 | X[:, :nnz] *= 0.1 28 | noise = np.random.randn(X.shape[0]) 29 | # use a signal-to-noise ratio of 2 30 | y = X @ w_true + 0.5 * norm(X @ w_true) / norm(noise) * noise 31 | 32 | 33 | # the Lasso does not select small norm features, while the weighted Lasso does: 34 | alpha_max = np.max(np.abs(X.T @ y)) / len(y) 35 | alpha = alpha_max / 10 36 | las = Lasso(alpha=alpha, fit_intercept=False).fit(X, y) 37 | wei = WeightedLasso( 38 | alpha=alpha, weights=norm(X, axis=0), fit_intercept=False).fit(X, y) 39 | 40 | 41 | fig, axarr = plt.subplots(1, 3, sharey=True, figsize=(10, 2.4)) 42 | axarr[0].stem(w_true) 43 | axarr[0].set_title("True coeffs") 44 | axarr[1].stem(las.coef_) 45 | axarr[1].set_title("Lasso") 46 | axarr[2].stem(wei.coef_) 47 | axarr[2].set_title("Weighted Lasso") 48 | plt.show(block=False) 49 | -------------------------------------------------------------------------------- /examples/plot_logreg_various_penalties.py: -------------------------------------------------------------------------------- 1 | """ 2 | ================================================================== 3 | Logistic regression with Elastic net and minimax concave penalties 4 | ================================================================== 5 | Illustrate the modularity of ``skglm`` by using ``GeneralizedLinearEstimator`` with one datafit and one penalty. 6 | """ 7 | 8 | # Author: Pierre-Antoine Bannier 9 | 10 | import numpy as np 11 | from numpy.linalg import norm 12 | import matplotlib.pyplot as plt 13 | 14 | from sklearn.metrics import f1_score 15 | 16 | from skglm import GeneralizedLinearEstimator 17 | from skglm.datafits import Logistic 18 | from skglm.penalties import L1_plus_L2, MCPenalty 19 | from skglm.utils.data import make_correlated_data 20 | 21 | 22 | n_samples, n_features = 50, 100 23 | X, y, w_star = make_correlated_data( 24 | n_samples=n_samples, n_features=n_features, random_state=0) 25 | y_ind = np.sign(y) 26 | 27 | # standardize for MCP 28 | X /= norm(X, axis=0) / np.sqrt(len(X)) 29 | 30 | # Split data in train set and test set 31 | X_train, y_train = X[: n_samples // 2], y_ind[: n_samples // 2] 32 | X_test, y_test = X[n_samples // 2:], y_ind[n_samples // 2:] 33 | 34 | 35 | alpha = 0.005 36 | gamma = 3.0 37 | l1_ratio = 0.3 38 | clf_enet = GeneralizedLinearEstimator( 39 | Logistic(), 40 | L1_plus_L2(alpha, l1_ratio), 41 | ) 42 | y_pred_enet = clf_enet.fit(X_train, y_train).predict(X_test) 43 | f1_score_enet = f1_score(y_test, y_pred_enet) 44 | 45 | clf_mcp = GeneralizedLinearEstimator( 46 | Logistic(), 47 | MCPenalty(alpha, gamma), 48 | ) 49 | y_pred_mcp = clf_mcp.fit(X_train, y_train).predict(X_test) 50 | f1_score_mcp = f1_score(y_test, y_pred_mcp) 51 | 52 | 53 | m, s, _ = plt.stem( 54 | np.where(clf_enet.coef_.ravel())[0], 55 | clf_enet.coef_[clf_enet.coef_ != 0], 56 | markerfmt="x", 57 | label="Elastic net coefficients", 58 | ) 59 | plt.setp([m, s], color="#2ca02c") 60 | m, s, _ = plt.stem( 61 | np.where(clf_mcp.coef_.ravel())[0], 62 | clf_mcp.coef_[clf_mcp.coef_ != 0], 63 | markerfmt="x", 64 | label="MCP coefficients", 65 | ) 66 | plt.setp([m, s], color="#ff7f0e") 67 | plt.stem( 68 | np.where(w_star)[0], 69 | w_star[w_star != 0], 70 | label="true coefficients", 71 | markerfmt="bx", 72 | ) 73 | 74 | plt.legend(loc="best") 75 | plt.title("MCP F1: %.3f, Elastic Net F1: %.3f" % (f1_score_mcp, f1_score_enet)) 76 | plt.show() 77 | -------------------------------------------------------------------------------- /examples/plot_pen_prox.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================================= 3 | Value and proximal operators of penalties 4 | ========================================= 5 | Illustrate the value and proximal operators of some sparse penalties. 6 | """ 7 | # Author: Mathurin Massias 8 | 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | 12 | from skglm.penalties import WeightedL1, L1, L1_plus_L2, MCPenalty, SCAD, L0_5, L2_3 13 | 14 | 15 | penalties = [ 16 | WeightedL1(alpha=1, weights=np.array([2.])), 17 | L1(alpha=1), 18 | L1_plus_L2(alpha=1, l1_ratio=0.7), 19 | MCPenalty(alpha=1, gamma=3.), 20 | SCAD(alpha=1, gamma=3.), 21 | L0_5(alpha=1), 22 | L2_3(alpha=1), 23 | ] 24 | 25 | 26 | x_range = np.linspace(-4, 4, num=300) 27 | 28 | fig, axarr = plt.subplots(1, 2, figsize=(8, 3), constrained_layout=True) 29 | 30 | for pen in penalties: 31 | axarr[0].plot(x_range, 32 | [pen.value(np.array([x])) for x in x_range], 33 | label=pen.__class__.__name__) 34 | axarr[1].plot(x_range, 35 | [pen.prox_1d(x, 1, 0) for x in x_range], 36 | label=pen.__class__.__name__) 37 | 38 | axarr[0].legend() 39 | axarr[0].set_title("Penalty value") 40 | axarr[1].set_title("Proximal operator of penalty") 41 | plt.show(block=False) 42 | -------------------------------------------------------------------------------- /examples/plot_reweighted_l1.py: -------------------------------------------------------------------------------- 1 | """ 2 | ================================================================= 3 | Timing comparison between direct prox computation and reweighting 4 | ================================================================= 5 | Compare time and objective value of L0_5-regularized problem with 6 | direct proximal computation and iterative reweighting. 7 | """ 8 | # Author: Pierre-Antoine Bannier 9 | 10 | import time 11 | import numpy as np 12 | import pandas as pd 13 | from numpy.linalg import norm 14 | import matplotlib.pyplot as plt 15 | 16 | from skglm.penalties.separable import L0_5 17 | from skglm.utils.data import make_correlated_data 18 | from skglm.estimators import GeneralizedLinearEstimator 19 | from skglm.experimental import IterativeReweightedL1 20 | from skglm.solvers import AndersonCD 21 | 22 | 23 | n_samples, n_features = 200, 500 24 | X, y, w_true = make_correlated_data( 25 | n_samples=n_samples, n_features=n_features, random_state=24) 26 | 27 | alpha_max = norm(X.T @ y, ord=np.inf) / n_samples 28 | alphas = [alpha_max / 10, alpha_max / 100, alpha_max / 1000] 29 | tol = 1e-10 30 | 31 | 32 | def _obj(w): 33 | return (np.sum((y - X @ w) ** 2) / (2 * n_samples) 34 | + alpha * np.sum(np.sqrt(np.abs(w)))) 35 | 36 | 37 | def fit_l05(alpha): 38 | start = time.time() 39 | iterative_l05 = IterativeReweightedL1( 40 | penalty=L0_5(alpha), 41 | solver=AndersonCD(tol=tol, fit_intercept=False)).fit(X, y) 42 | iterative_time = time.time() - start 43 | 44 | # `subdiff` strategy for WS is uninformative for L0_5 45 | start = time.time() 46 | direct_l05 = GeneralizedLinearEstimator( 47 | penalty=L0_5(alpha), 48 | solver=AndersonCD(tol=tol, fit_intercept=False, 49 | ws_strategy="fixpoint")).fit(X, y) 50 | direct_time = time.time() - start 51 | 52 | results = { 53 | "iterative": (iterative_l05, iterative_time), 54 | "direct": (direct_l05, direct_time), 55 | } 56 | return results 57 | 58 | 59 | # caching Numba compilation 60 | fit_l05(alpha_max/10) 61 | 62 | time_results = np.zeros((2, len(alphas))) 63 | obj_results = np.zeros((2, len(alphas))) 64 | 65 | # actual run 66 | for i, alpha in enumerate(alphas): 67 | results = fit_l05(alpha=alpha) 68 | iterative_l05, iterative_time = results["iterative"] 69 | direct_l05, direct_time = results["direct"] 70 | 71 | iterative_obj = _obj(iterative_l05.coef_) 72 | direct_obj = _obj(direct_l05.coef_) 73 | 74 | obj_results[:, i] = np.array([iterative_obj, direct_obj]) 75 | time_results[:, i] = np.array([iterative_time, direct_time]) 76 | 77 | time_df = pd.DataFrame(time_results.T, columns=["Iterative", "Direct"]) 78 | obj_df = pd.DataFrame(obj_results.T, columns=["Iterative", "Direct"]) 79 | 80 | time_df.index = [1e-1, 1e-2, 1e-3] 81 | obj_df.index = [1e-1, 1e-2, 1e-3] 82 | 83 | fig, axarr = plt.subplots(1, 2, figsize=(8, 3.5), constrained_layout=True) 84 | ax = axarr[0] 85 | time_df.plot.bar(rot=0, ax=ax) 86 | ax.set_xlabel(r"$\lambda/\lambda_{max}$") 87 | ax.set_ylabel("time (in s)") 88 | ax.set_title("Time to fit") 89 | 90 | ax = axarr[1] 91 | obj_df.plot.bar(rot=0, ax=ax) 92 | ax.set_xlabel(r"$\lambda/\lambda_{max}$") 93 | ax.set_ylabel("obj. value") 94 | ax.set_title("Objective at solution") 95 | plt.show(block=False) 96 | -------------------------------------------------------------------------------- /examples/plot_sparse_group_lasso.py: -------------------------------------------------------------------------------- 1 | """ 2 | ================================= 3 | Fast Sparse Group Lasso in python 4 | ================================= 5 | Scikit-learn is missing a Sparse Group Lasso regression estimator. We show how to 6 | implement one with ``skglm``. 7 | """ 8 | 9 | # Author: Mathurin Massias 10 | 11 | # %% 12 | import numpy as np 13 | import matplotlib.pyplot as plt 14 | 15 | from skglm.solvers import GroupBCD 16 | from skglm.datafits import QuadraticGroup 17 | from skglm import GeneralizedLinearEstimator 18 | from skglm.penalties import WeightedL1GroupL2 19 | from skglm.utils.data import make_correlated_data, grp_converter 20 | 21 | n_features = 30 22 | X, y, _ = make_correlated_data( 23 | n_samples=10, n_features=30, random_state=0) 24 | 25 | 26 | # %% 27 | # Model creation: combination of penalty, datafit and solver. 28 | # 29 | # penalty: 30 | grp_size = 10 # take groups of 10 consecutive features 31 | n_groups = n_features // grp_size 32 | grp_indices, grp_ptr = grp_converter(grp_size, n_features) 33 | n_groups = len(grp_ptr) - 1 34 | weights_g = np.ones(n_groups, dtype=np.float64) 35 | weights_f = 0.5 * np.ones(n_features) 36 | penalty = WeightedL1GroupL2( 37 | alpha=0.5, weights_groups=weights_g, 38 | weights_features=weights_f, grp_indices=grp_indices, grp_ptr=grp_ptr) 39 | 40 | # %% Datafit and solver 41 | datafit = QuadraticGroup(grp_ptr, grp_indices) 42 | solver = GroupBCD(ws_strategy="fixpoint", verbose=1, fit_intercept=False, tol=1e-10) 43 | 44 | model = GeneralizedLinearEstimator(datafit, penalty, solver=solver) 45 | 46 | # %% 47 | # Train the model 48 | clf = GeneralizedLinearEstimator(datafit, penalty, solver) 49 | clf.fit(X, y) 50 | 51 | # %% 52 | # Some groups are fully 0, and inside non zero groups, 53 | # some values are 0 too 54 | plt.imshow(clf.coef_.reshape(-1, grp_size) != 0, cmap='Greys') 55 | plt.title("Non zero values (in black) in model coefficients") 56 | plt.ylabel('Group index') 57 | plt.xlabel('Feature index inside group') 58 | plt.xticks(np.arange(grp_size)) 59 | plt.yticks(np.arange(n_groups)); 60 | 61 | # %% 62 | -------------------------------------------------------------------------------- /examples/plot_sparse_recovery.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================================= 3 | Sparse recovery with non-convex penalties 4 | ========================================= 5 | Illustrate the superior performance of penalties for sparse recovery. 6 | """ 7 | 8 | # Author: Mathurin Massias 9 | # Quentin Bertrand 10 | # Quentin Klopfenstein 11 | 12 | import numpy as np 13 | from numpy.linalg import norm 14 | import matplotlib.pyplot as plt 15 | from sklearn.model_selection import train_test_split 16 | from sklearn.metrics import f1_score, mean_squared_error 17 | 18 | from skglm.utils.data import make_correlated_data 19 | from skglm.solvers import AndersonCD 20 | from skglm.datafits import Quadratic 21 | from skglm.penalties import L1, MCPenalty, L0_5, L2_3, SCAD 22 | 23 | cmap = plt.get_cmap('tab10') 24 | 25 | # Simulate sparse data 26 | n_features = 1000 27 | density = 0.1 28 | np.random.seed(0) 29 | supp = np.random.choice(n_features, size=int(density * n_features), 30 | replace=False) 31 | w_true = np.zeros(n_features) 32 | w_true[supp] = 1 33 | X_, y_, w_true = make_correlated_data( 34 | n_samples=1000, n_features=1000, snr=5, random_state=2, 35 | rho=0.5, w_true=w_true) 36 | 37 | # standardize for MCP 38 | X_ /= norm(X_, axis=0) / np.sqrt(len(X_)) 39 | X, X_test, y, y_test = train_test_split(X_, y_, test_size=0.5) 40 | 41 | 42 | # Compute l1 penalty value which leads to 0 as solution 43 | alpha_max = norm(X.T @ y, ord=np.inf) / len(y) 44 | 45 | # Define a range of penalty values 46 | n_alphas = 30 47 | alphas = alpha_max * np.geomspace(1, 1e-2, num=n_alphas) 48 | 49 | datafit = Quadratic() 50 | 51 | penalties = {} 52 | penalties['lasso'] = L1(alpha=1) 53 | penalties['mcp'] = MCPenalty(alpha=1, gamma=3) 54 | penalties['scad'] = SCAD(alpha=1, gamma=3) 55 | penalties['l05'] = L0_5(alpha=1) 56 | penalties['l23'] = L2_3(alpha=1) 57 | 58 | colors = {} 59 | colors['lasso'] = cmap(0) 60 | colors['mcp'] = cmap(1) 61 | colors['scad'] = cmap(2) 62 | colors['l05'] = cmap(3) 63 | colors['l23'] = cmap(4) 64 | 65 | f1 = {} 66 | estimation_error = {} 67 | prediction_error = {} 68 | l0 = {} 69 | mse_ref = mean_squared_error(np.zeros_like(y_test), y_test) 70 | 71 | solver = AndersonCD(ws_strategy="fixpoint", fit_intercept=False) 72 | 73 | for idx, estimator in enumerate(penalties.keys()): 74 | print(f'Running {estimator}...') 75 | estimator_path = solver.path( 76 | X, y, datafit, penalties[estimator], 77 | alphas=alphas) 78 | 79 | f1_temp = np.zeros(n_alphas) 80 | prediction_error_temp = np.zeros(n_alphas) 81 | 82 | for j, w in enumerate(estimator_path[1].T): 83 | f1_temp[j] = f1_score(w != 0, w_true != 0) 84 | prediction_error_temp[j] = mean_squared_error(X_test @ w, y_test) / mse_ref 85 | 86 | f1[estimator] = f1_temp 87 | prediction_error[estimator] = prediction_error_temp 88 | 89 | name_estimators = {'lasso': "Lasso"} 90 | name_estimators['mcp'] = r"MCP, $\gamma=%s$" % 3 91 | name_estimators['scad'] = r"SCAD, $\gamma=%s$" % 3 92 | name_estimators['l05'] = r"$\ell_{1/2}$" 93 | name_estimators['l23'] = r"$\ell_{2/3}$" 94 | 95 | 96 | plt.close('all') 97 | fig, axarr = plt.subplots(2, 1, sharex=True, sharey=False, figsize=[ 98 | 6.3, 4], constrained_layout=True) 99 | 100 | for idx, estimator in enumerate(penalties.keys()): 101 | 102 | axarr[0].semilogx( 103 | alphas / alphas[0], f1[estimator], label=name_estimators[estimator], 104 | c=colors[estimator]) 105 | 106 | axarr[1].semilogx( 107 | alphas / alphas[0], prediction_error[estimator], 108 | label=name_estimators[estimator], c=colors[estimator]) 109 | 110 | max_f1 = np.argmax(f1[estimator]) 111 | axarr[0].vlines( 112 | x=alphas[max_f1] / alphas[0], ymin=0, 113 | ymax=np.max(f1[estimator]), 114 | color=colors[estimator], linestyle='--') 115 | line1 = axarr[0].plot( 116 | [alphas[max_f1] / alphas[0]], 0, clip_on=False, 117 | marker='X', color=colors[estimator], markersize=12) 118 | 119 | min_error = np.argmin(prediction_error[estimator]) 120 | 121 | lims = axarr[1].get_ylim() 122 | axarr[1].vlines( 123 | x=alphas[min_error] / alphas[0], ymin=0, 124 | ymax=np.min(prediction_error[estimator]), 125 | color=colors[estimator], linestyle='--') 126 | 127 | line2 = axarr[1].plot( 128 | [alphas[min_error] / alphas[0]], 0, clip_on=False, 129 | marker='X', color=colors[estimator], markersize=12) 130 | axarr[1].set_xlabel(r"$\lambda / \lambda_{\mathrm{max}}$") 131 | axarr[0].set_ylabel("F1-score") 132 | axarr[0].set_ylim(ymin=0, ymax=1.0) 133 | axarr[1].set_ylim(ymin=0, ymax=lims[1]) 134 | axarr[1].set_ylabel("pred. RMSE left-out") 135 | axarr[0].legend( 136 | bbox_to_anchor=(0, 1.02, 1, 0.2), loc="lower left", 137 | mode="expand", borderaxespad=0, ncol=5) 138 | 139 | plt.show(block=False) 140 | -------------------------------------------------------------------------------- /examples/plot_ucurve.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================== 3 | Show U-curve of regularization 4 | ============================== 5 | Illustrate the sweet spot of regularization: not too much, not too little. 6 | We showcase that for the Lasso estimator on the ``rcv1.binary`` dataset. 7 | """ 8 | 9 | import numpy as np 10 | from numpy.linalg import norm 11 | import matplotlib.pyplot as plt 12 | from libsvmdata import fetch_libsvm 13 | 14 | from sklearn.model_selection import train_test_split 15 | from sklearn.metrics import mean_squared_error 16 | 17 | from skglm import Lasso 18 | 19 | # %% 20 | # First, we load the dataset and keep 2000 features. 21 | # We also retrain 2000 samples in training dataset. 22 | X, y = fetch_libsvm("rcv1.binary") 23 | 24 | X = X[:, :2000] 25 | X_train, X_test, y_train, y_test = train_test_split(X, y) 26 | X_train, y_train = X_train[:2000], y_train[:2000] 27 | 28 | # %% 29 | # Next, we define the regularization path. 30 | # For Lasso, it is well know that there is an ``alpha_max`` above which the optimal solution is the zero vector. 31 | alpha_max = norm(X_train.T @ y_train, ord=np.inf) / len(y_train) 32 | alphas = alpha_max * np.geomspace(1, 1e-4) 33 | 34 | # %% 35 | # Let's train the estimator along the regularization path and then compute the MSE on train and test data. 36 | mse_train = [] 37 | mse_test = [] 38 | 39 | clf = Lasso(fit_intercept=False, tol=1e-8, warm_start=True) 40 | for idx, alpha in enumerate(alphas): 41 | clf.alpha = alpha 42 | clf.fit(X_train, y_train) 43 | 44 | mse_train.append(mean_squared_error(y_train, clf.predict(X_train))) 45 | mse_test.append(mean_squared_error(y_test, clf.predict(X_test))) 46 | 47 | # %% 48 | # Finally, we can plot the train and test MSE. 49 | # Notice the "sweet spot" at around ``1e-4``, which sits at the boundary between underfitting and overfitting. 50 | plt.close('all') 51 | plt.semilogx(alphas, mse_train, label='train MSE') 52 | plt.semilogx(alphas, mse_test, label='test MSE') 53 | plt.legend() 54 | plt.title("Mean squared error") 55 | plt.xlabel(r"Lasso regularization strength $\lambda$") 56 | plt.show(block=False) 57 | -------------------------------------------------------------------------------- /examples/plot_zero_weights_lasso.py: -------------------------------------------------------------------------------- 1 | """ 2 | ===================================== 3 | Weighted Lasso with some zero weights 4 | ===================================== 5 | 6 | This example demonstrates how to use a weighted lasso with some vanishing 7 | weights. The fast solver is adapted to use primal Anderson acceleration, 8 | allowing it to not compute the dual and handle 0 weights. 9 | """ 10 | 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | 14 | from skglm import WeightedLasso 15 | from skglm.utils.data import make_correlated_data 16 | 17 | n_features = 100 18 | w_true = np.zeros(n_features) 19 | np.random.seed() 20 | w_true[np.random.choice(n_features, 10, replace=False)] = np.random.choice([-1, 1], 10) 21 | X, y, w_true = make_correlated_data( 22 | n_samples=100, n_features=n_features, random_state=0, w_true=w_true) 23 | 24 | 25 | weights = np.empty(n_features) 26 | # unpenalize the first 10 features: 27 | weights[:10] = 0 28 | # put large penalty on the 10-50 features 29 | weights[10:50] = 5 30 | # put small penalty on last 50 features 31 | weights[50:] = 1 32 | 33 | alpha_max = np.max(np.abs(X[:, weights != 0].T @ y / weights[weights != 0])) / len(y) 34 | clf = WeightedLasso( 35 | alpha=alpha_max/50, weights=weights, fit_intercept=False).fit(X, y) 36 | 37 | 38 | fig, axarr = plt.subplots(1, 3, sharey=True, figsize=( 39 | 10.5, 3.5), constrained_layout=True) 40 | axarr[0].stem(np.arange(10), clf.coef_[:10]) 41 | axarr[0].set_title(r"unpenalized coefs: all $\neq 0$") 42 | axarr[1].stem(np.arange(10, 50), clf.coef_[10:50]) 43 | axarr[1].set_title(r"heavily penalized coefs: few $\neq 0$") 44 | axarr[2].stem(np.arange(50, 100), clf.coef_[50:]) 45 | axarr[2].set_title(r"lightly penalized coefs: many $\neq 0$") 46 | 47 | axarr[1].set_xlabel("feature index") 48 | plt.show(block=False) 49 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "skglm" 7 | description = "A fast and modular scikit-learn replacement for generalized linear models" 8 | authors = [ 9 | {name = "Mathurin Massias", email = "mathurin.massias@gmail.com"}, 10 | {name = "Badr Moufad", email = "badr.moufad@emines.um6p.ma"}, 11 | {name = "Pierre-Antoine Bannier", email = "pierreantoine.bannier@gmail.com"}, 12 | {name = "Quentin Bertrand", email = "quentin.bertrand@mila.quebec"}, 13 | {name = "Quentin Klopfenstein", email = "quentin.klopfenstein@uni.lu"} 14 | ] 15 | license = {text = "BSD (3-Clause)"} 16 | readme = {file = "README.md", content-type = "text/markdown"} 17 | dependencies = [ 18 | "numpy>=1.12", 19 | "numba", 20 | "scikit-learn>=1.6", 21 | "scipy>=0.18.0", 22 | ] 23 | dynamic = ["version"] 24 | 25 | requires-python = ">=3.9" 26 | 27 | classifiers = [ 28 | "Programming Language :: Python :: 3 :: Only", 29 | "Programming Language :: Python :: 3.9", 30 | "Programming Language :: Python :: 3.10", 31 | "Programming Language :: Python :: 3.11", 32 | "Programming Language :: Python :: 3.12", 33 | "Programming Language :: Python :: 3.13", 34 | ] 35 | 36 | [tool.setuptools.dynamic] 37 | version = {attr = "skglm.__version__"} 38 | 39 | 40 | [project.urls] 41 | Homepage = "https://contrib.scikit-learn.org/skglm" 42 | Source = "https://github.com/scikit-learn-contrib/skglm.git" 43 | 44 | 45 | [project.optional-dependencies] 46 | test = [ 47 | "pytest", 48 | "flake8", 49 | "coverage", 50 | "numpydoc", 51 | "celer", 52 | ] 53 | 54 | doc = [ 55 | "benchopt", 56 | "libsvmdata>=0.2", 57 | "matplotlib>=2.0.0", 58 | "myst_parser", 59 | "numpydoc", 60 | "pillow", 61 | "sphinx-bootstrap-theme", 62 | "sphinx_copybutton", 63 | "sphinx-gallery", 64 | "sphinx-design", 65 | "pytest", 66 | "lifelines", 67 | "pydata_sphinx_theme", 68 | "sphinx-sitemap", 69 | "sphinxext-opengraph", 70 | ] 71 | 72 | 73 | [tool.setuptools] 74 | license-files = [] 75 | -------------------------------------------------------------------------------- /skglm/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.5dev' 2 | 3 | from skglm.estimators import ( # noqa F401 4 | Lasso, WeightedLasso, ElasticNet, MCPRegression, MultiTaskLasso, LinearSVC, 5 | SparseLogisticRegression, GeneralizedLinearEstimator, CoxEstimator, GroupLasso, 6 | ) 7 | -------------------------------------------------------------------------------- /skglm/datafits/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseDatafit, BaseMultitaskDatafit 2 | from .single_task import (Quadratic, QuadraticSVC, Logistic, Huber, Poisson, Gamma, 3 | Cox, WeightedQuadratic, QuadraticHessian,) 4 | from .multi_task import QuadraticMultiTask 5 | from .group import QuadraticGroup, LogisticGroup 6 | 7 | 8 | __all__ = [ 9 | BaseDatafit, BaseMultitaskDatafit, 10 | Quadratic, QuadraticSVC, Logistic, Huber, Poisson, Gamma, Cox, 11 | QuadraticMultiTask, 12 | QuadraticGroup, LogisticGroup, WeightedQuadratic, 13 | QuadraticHessian 14 | ] 15 | -------------------------------------------------------------------------------- /skglm/datafits/base.py: -------------------------------------------------------------------------------- 1 | 2 | class BaseDatafit: 3 | """Base class for datafits.""" 4 | 5 | def get_spec(self): 6 | """Specify the numba types of the class attributes. 7 | 8 | Returns 9 | ------- 10 | spec: Tuple of (attribute_name, dtype) 11 | spec to be passed to Numba jitclass to compile the class. 12 | """ 13 | 14 | def params_to_dict(self): 15 | """Get the parameters to initialize an instance of the class. 16 | 17 | Returns 18 | ------- 19 | dict_of_params : dict 20 | The parameters to instantiate an object of the class. 21 | """ 22 | 23 | def initialize(self, X, y): 24 | """Pre-computations before fitting on X and y. 25 | 26 | Parameters 27 | ---------- 28 | X : array, shape (n_samples, n_features) 29 | Design matrix. 30 | 31 | y : array, shape (n_samples,) 32 | Target vector. 33 | """ 34 | 35 | def initialize_sparse(self, X_data, X_indptr, X_indices, y): 36 | """Pre-computations before fitting on X and y when X is a sparse matrix. 37 | 38 | Parameters 39 | ---------- 40 | X_data : array, shape (n_elements,) 41 | `data` attribute of the sparse CSC matrix X. 42 | 43 | X_indptr : array, shape (n_features + 1,) 44 | `indptr` attribute of the sparse CSC matrix X. 45 | 46 | X_indices : array, shape (n_elements,) 47 | `indices` attribute of the sparse CSC matrix X. 48 | 49 | y : array, shape (n_samples,) 50 | Target vector. 51 | """ 52 | 53 | def value(self, y, w, Xw): 54 | """Value of datafit at vector w. 55 | 56 | Parameters 57 | ---------- 58 | y : array_like, shape (n_samples,) 59 | Target vector. 60 | 61 | w : array_like, shape (n_features,) 62 | Coefficient vector. 63 | 64 | Xw: array_like, shape (n_samples,) 65 | Model fit. 66 | 67 | Returns 68 | ------- 69 | value : float 70 | The datafit value at vector w. 71 | """ 72 | 73 | 74 | class BaseMultitaskDatafit: 75 | """Base class for multitask datafits.""" 76 | 77 | def get_spec(self): 78 | """Specify the numba types of the class attributes. 79 | 80 | Returns 81 | ------- 82 | spec: Tuple of (attribute_name, dtype) 83 | spec to be passed to Numba jitclass to compile the class. 84 | """ 85 | 86 | def params_to_dict(self): 87 | """Get the parameters to initialize an instance of the class. 88 | 89 | Returns 90 | ------- 91 | dict_of_params : dict 92 | The parameters to instantiate an object of the class. 93 | """ 94 | 95 | def initialize(self, X, Y): 96 | """Store useful values before fitting on X and Y. 97 | 98 | Parameters 99 | ---------- 100 | X : array, shape (n_samples, n_features) 101 | Design matrix. 102 | 103 | Y : array, shape (n_samples, n_tasks) 104 | Multitask target. 105 | """ 106 | 107 | def initialize_sparse(self, X_data, X_indptr, X_indices, Y): 108 | """Store useful values before fitting on X and Y, when X is sparse. 109 | 110 | Parameters 111 | ---------- 112 | X_data : array-like 113 | `data` attribute of the sparse CSC matrix X. 114 | 115 | X_indptr : array-like 116 | `indptr` attribute of the sparse CSC matrix X. 117 | 118 | X_indices : array-like 119 | `indices` attribute of the sparse CSC matrix X. 120 | 121 | Y : array, shape (n_samples, n_tasks) 122 | Target matrix. 123 | """ 124 | 125 | def value(self, Y, W, XW): 126 | """Value of datafit at matrix W. 127 | 128 | Parameters 129 | ---------- 130 | Y : array_like, shape (n_samples, n_tasks) 131 | Target matrix. 132 | 133 | W : array_like, shape (n_features, n_tasks) 134 | Coefficient matrix. 135 | 136 | XW: array_like, shape (n_samples, n_tasks) 137 | Model fit. 138 | 139 | Returns 140 | ------- 141 | value : float 142 | The datafit value evaluated at matrix W. 143 | """ 144 | -------------------------------------------------------------------------------- /skglm/datafits/group.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.linalg import norm 3 | from numba import int32, float64 4 | 5 | from skglm.datafits.base import BaseDatafit 6 | from skglm.datafits.single_task import Logistic 7 | from skglm.utils.sparse_ops import spectral_norm, sparse_columns_slice 8 | 9 | 10 | class QuadraticGroup(BaseDatafit): 11 | r"""Quadratic datafit used with group penalties. 12 | 13 | The datafit reads: 14 | 15 | .. math:: 1 / (2 xx n_"samples") ||y - Xw||_2 ^ 2 16 | 17 | Attributes 18 | ---------- 19 | grp_indices : array, shape (n_features,) 20 | The group indices stacked contiguously 21 | ([grp1_indices, grp2_indices, ...]). 22 | 23 | grp_ptr : array, shape (n_groups + 1,) 24 | The group pointers such that two consecutive elements delimit 25 | the indices of a group in ``grp_indices``. 26 | """ 27 | 28 | def __init__(self, grp_ptr, grp_indices): 29 | self.grp_ptr, self.grp_indices = grp_ptr, grp_indices 30 | 31 | def get_spec(self): 32 | spec = ( 33 | ('grp_ptr', int32[:]), 34 | ('grp_indices', int32[:]), 35 | ) 36 | return spec 37 | 38 | def params_to_dict(self): 39 | return dict(grp_ptr=self.grp_ptr, 40 | grp_indices=self.grp_indices) 41 | 42 | def get_lipschitz(self, X, y): 43 | grp_ptr, grp_indices = self.grp_ptr, self.grp_indices 44 | n_groups = len(grp_ptr) - 1 45 | 46 | lipschitz = np.zeros(n_groups) 47 | for g in range(n_groups): 48 | grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]] 49 | X_g = X[:, grp_g_indices] 50 | lipschitz[g] = norm(X_g, ord=2) ** 2 / len(y) 51 | 52 | return lipschitz 53 | 54 | def get_lipschitz_sparse(self, X_data, X_indptr, X_indices, y): 55 | grp_ptr, grp_indices = self.grp_ptr, self.grp_indices 56 | n_groups = len(grp_ptr) - 1 57 | 58 | lipschitz = np.zeros(n_groups, dtype=X_data.dtype) 59 | for g in range(n_groups): 60 | grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]] 61 | X_data_g, X_indptr_g, X_indices_g = sparse_columns_slice( 62 | grp_g_indices, X_data, X_indptr, X_indices) 63 | lipschitz[g] = spectral_norm( 64 | X_data_g, X_indptr_g, X_indices_g, len(y)) ** 2 / len(y) 65 | 66 | return lipschitz 67 | 68 | def value(self, y, w, Xw): 69 | return norm(y - Xw) ** 2 / (2 * len(y)) 70 | 71 | def gradient_g(self, X, y, w, Xw, g): 72 | grp_ptr, grp_indices = self.grp_ptr, self.grp_indices 73 | grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]] 74 | 75 | grad_g = np.zeros(len(grp_g_indices)) 76 | for idx, j in enumerate(grp_g_indices): 77 | grad_g[idx] = self.gradient_scalar(X, y, w, Xw, j) 78 | 79 | return grad_g 80 | 81 | def gradient_g_sparse(self, X_data, X_indptr, X_indices, y, w, Xw, g): 82 | grp_ptr, grp_indices = self.grp_ptr, self.grp_indices 83 | grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]] 84 | 85 | grad_g = np.zeros(len(grp_g_indices)) 86 | for idx, j in enumerate(grp_g_indices): 87 | grad_g[idx] = self.gradient_scalar_sparse( 88 | X_data, X_indptr, X_indices, y, w, Xw, j) 89 | 90 | return grad_g 91 | 92 | def gradient_scalar_sparse(self, X_data, X_indptr, X_indices, y, w, Xw, j): 93 | grad_j = 0. 94 | for i in range(X_indptr[j], X_indptr[j+1]): 95 | grad_j += X_data[i] * (Xw[X_indices[i]] - y[X_indices[i]]) 96 | 97 | return grad_j / len(y) 98 | 99 | def gradient_scalar(self, X, y, w, Xw, j): 100 | return X[:, j] @ (Xw - y) / len(y) 101 | 102 | def intercept_update_step(self, y, Xw): 103 | return np.mean(Xw - y) 104 | 105 | 106 | class LogisticGroup(Logistic): 107 | r"""Logistic datafit used with group penalties. 108 | 109 | The datafit reads: 110 | 111 | .. math:: 1 / n_"samples" sum_(i=1)^(n_"samples") log(1 + exp(-y_i (Xw)_i)) 112 | 113 | Attributes 114 | ---------- 115 | grp_indices : array, shape (n_features,) 116 | The group indices stacked contiguously 117 | ``[grp1_indices, grp2_indices, ...]``. 118 | 119 | grp_ptr : array, shape (n_groups + 1,) 120 | The group pointers such that two consecutive elements delimit 121 | the indices of a group in ``grp_indices``. 122 | 123 | lipschitz : array, shape (n_groups,) 124 | The lipschitz constants for each group. 125 | """ 126 | 127 | def __init__(self, grp_ptr, grp_indices): 128 | self.grp_ptr, self.grp_indices = grp_ptr, grp_indices 129 | 130 | def get_spec(self): 131 | spec = ( 132 | ('grp_ptr', int32[:]), 133 | ('grp_indices', int32[:]), 134 | ('lipschitz', float64[:]) 135 | ) 136 | return spec 137 | 138 | def params_to_dict(self): 139 | return dict(grp_ptr=self.grp_ptr, 140 | grp_indices=self.grp_indices) 141 | 142 | def initialize(self, X, y): 143 | grp_ptr, grp_indices = self.grp_ptr, self.grp_indices 144 | n_groups = len(grp_ptr) - 1 145 | 146 | lipschitz = np.zeros(n_groups) 147 | for g in range(n_groups): 148 | grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]] 149 | X_g = X[:, grp_g_indices] 150 | lipschitz[g] = norm(X_g, ord=2) ** 2 / (4 * len(y)) 151 | 152 | self.lipschitz = lipschitz 153 | 154 | def gradient_g(self, X, y, w, Xw, g): 155 | grp_ptr, grp_indices = self.grp_ptr, self.grp_indices 156 | grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]] 157 | raw_grad_val = self.raw_grad(y, Xw) 158 | 159 | grad_g = np.zeros(len(grp_g_indices)) 160 | for idx, j in enumerate(grp_g_indices): 161 | grad_g[idx] = X[:, j] @ raw_grad_val 162 | 163 | return grad_g 164 | -------------------------------------------------------------------------------- /skglm/datafits/multi_task.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.linalg import norm 3 | from numba import float64 4 | 5 | from skglm.datafits.base import BaseMultitaskDatafit 6 | 7 | 8 | class QuadraticMultiTask(BaseMultitaskDatafit): 9 | """Quadratic datafit used for multi-task regression. 10 | 11 | The datafit reads: 12 | 13 | .. math: 1 / (2 xx n_"samples") ||Y - XW||_F ^ 2 14 | 15 | Attributes 16 | ---------- 17 | XtY : array, shape (n_features, n_tasks) 18 | Pre-computed quantity used during the gradient evaluation. 19 | """ 20 | 21 | def __init__(self): 22 | pass 23 | 24 | def get_spec(self): 25 | spec = ( 26 | ('XtY', float64[:, :]), 27 | ) 28 | return spec 29 | 30 | def params_to_dict(self): 31 | return dict() 32 | 33 | def get_lipschitz(self, X, Y): 34 | n_samples, n_features = X.shape 35 | 36 | lipschitz = np.zeros(n_features) 37 | for j in range(n_features): 38 | lipschitz[j] = norm(X[:, j]) ** 2 / n_samples 39 | 40 | return lipschitz 41 | 42 | def get_lipschitz_sparse(self, X_data, X_indptr, X_indices, Y): 43 | n_samples, n_tasks = Y.shape 44 | n_features = len(X_indptr) - 1 45 | 46 | lipschitz = np.zeros(n_features) 47 | for j in range(n_features): 48 | nrm2 = 0. 49 | for idx in range(X_indptr[j], X_indptr[j + 1]): 50 | nrm2 += X_data[idx] ** 2 51 | 52 | lipschitz[j] = nrm2 / n_samples 53 | 54 | return lipschitz 55 | 56 | def initialize(self, X, Y): 57 | """Compute optimization quantities before fitting on X and Y.""" 58 | self.XtY = X.T @ Y 59 | 60 | def initialize_sparse(self, X_data, X_indptr, X_indices, Y): 61 | """Pre-computations before fitting on X and Y, when X is sparse.""" 62 | _, n_tasks = Y.shape 63 | n_features = len(X_indptr) - 1 64 | 65 | self.XtY = np.zeros((n_features, n_tasks)) 66 | for j in range(n_features): 67 | xtY = np.zeros(n_tasks) 68 | for idx in range(X_indptr[j], X_indptr[j + 1]): 69 | for t in range(n_tasks): 70 | xtY[t] += X_data[idx] * Y[X_indices[idx], t] 71 | 72 | self.XtY[j, :] = xtY 73 | 74 | def value(self, Y, W, XW): 75 | """Value of datafit at matrix W.""" 76 | n_samples = Y.shape[0] 77 | return np.sum((Y - XW) ** 2) / (2 * n_samples) 78 | 79 | def gradient_j(self, X, Y, W, XW, j): 80 | """Gradient with respect to j-th coordinate of W.""" 81 | n_samples = X.shape[0] 82 | return (X[:, j] @ XW - self.XtY[j, :]) / n_samples 83 | 84 | def gradient_j_sparse(self, X_data, X_indptr, X_indices, Y, XW, j): 85 | """Gradient with respect to j-th coordinate of W when X is sparse.""" 86 | n_samples, n_tasks = Y.shape 87 | XjTXW = np.zeros(n_tasks) 88 | for t in range(n_tasks): 89 | for i in range(X_indptr[j], X_indptr[j+1]): 90 | XjTXW[t] += X_data[i] * XW[X_indices[i], t] 91 | return (XjTXW - self.XtY[j, :]) / n_samples 92 | 93 | def full_grad_sparse(self, X_data, X_indptr, X_indices, Y, XW): 94 | """Compute the full gradient when X is sparse.""" 95 | n_features = X_indptr.shape[0] - 1 96 | n_samples, n_tasks = Y.shape 97 | grad = np.zeros((n_features, n_tasks)) 98 | for j in range(n_features): 99 | XjTXW = np.zeros(n_tasks) 100 | for t in range(n_tasks): 101 | for i in range(X_indptr[j], X_indptr[j+1]): 102 | XjTXW[t] += X_data[i] * XW[X_indices[i], t] 103 | grad[j, :] = (XjTXW - self.XtY[j, :]) / n_samples 104 | return grad 105 | 106 | def intercept_update_step(self, Y, XW): 107 | return np.sum(XW - Y, axis=0) / len(Y) 108 | -------------------------------------------------------------------------------- /skglm/experimental/__init__.py: -------------------------------------------------------------------------------- 1 | from .reweighted import IterativeReweightedL1 2 | from .sqrt_lasso import SqrtLasso, SqrtQuadratic 3 | from .pdcd_ws import PDCD_WS 4 | from .quantile_regression import Pinball 5 | 6 | __all__ = [ 7 | IterativeReweightedL1, 8 | PDCD_WS, 9 | Pinball, 10 | SqrtQuadratic, 11 | SqrtLasso, 12 | ] 13 | -------------------------------------------------------------------------------- /skglm/experimental/_plot_sqrt_lasso.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | from numpy.linalg import norm 4 | import matplotlib.pyplot as plt 5 | from skglm.utils.data import make_correlated_data 6 | from skglm.experimental.sqrt_lasso import SqrtLasso, _chambolle_pock_sqrt 7 | 8 | X, y, _ = make_correlated_data(n_samples=200, n_features=100, random_state=24) 9 | 10 | n_samples, n_features = X.shape 11 | alpha_max = norm(X.T @ y, ord=np.inf) / (norm(y) * np.sqrt(n_samples)) 12 | 13 | alpha = alpha_max / 10 14 | 15 | 16 | max_iter = 1000 17 | obj_freq = 10 18 | w, _, objs = _chambolle_pock_sqrt(X, y, alpha, max_iter=max_iter, obj_freq=obj_freq) 19 | 20 | 21 | # no convergence issue if n_features < n_samples, can use ProxNewton 22 | # clf = SqrtLasso(alpha=alpha / np.sqrt(n_samples), verbose=2, tol=1e-10) 23 | clf = SqrtLasso(alpha=alpha, verbose=2, tol=1e-10) 24 | clf.fit(X, y) 25 | 26 | # consider that our solver has converged 27 | w_star = clf.coef_ 28 | p_star = norm(X @ w_star - y) / np.sqrt(n_samples) + alpha * norm(w_star, ord=1) 29 | 30 | plt.close("all") 31 | plt.semilogy(np.arange(1, max_iter+1, obj_freq), np.array(objs) - p_star) 32 | plt.xlabel("CP iteration") 33 | plt.ylabel("$F(x) - F(x^*)$") 34 | plt.show(block=False) 35 | -------------------------------------------------------------------------------- /skglm/experimental/pdcd_ws.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import numpy as np 4 | from numpy.linalg import norm 5 | from scipy.sparse import issparse 6 | 7 | from numba import njit 8 | from skglm.solvers import BaseSolver 9 | 10 | from sklearn.exceptions import ConvergenceWarning 11 | 12 | 13 | class PDCD_WS(BaseSolver): 14 | r"""Primal-Dual Coordinate Descent solver with working sets. 15 | 16 | It solves 17 | 18 | .. math:: 19 | 20 | \min_w F(Xw) + G(w) 21 | 22 | 23 | using a primal-dual method on the saddle point problem 24 | 25 | .. math:: 26 | 27 | min_w max_z (:Xw, z:) + G(w) - F^**(z) 28 | 29 | where :math:`F` is the datafit term (:math:`F^**` its Fenchel conjugate) 30 | and :math:`G` is the penalty term. 31 | 32 | The datafit is required to be convex and proximable. Also, the penalty 33 | is required to be convex, separable, and proximable. 34 | 35 | The solver is an adaptation of algorithm [1]_ to working sets [2]_. 36 | The working sets are built using a fixed point distance strategy 37 | where each feature is assigned a score based how much its coefficient varies 38 | when performing a primal update 39 | 40 | .. math:: 41 | 42 | "score"_j = abs(w_j - "prox"_(tau_j, G_j)(w_j - tau_j (:X_j, z:))) 43 | 44 | where :math:`tau_j` is the primal step associated with the j-th feature. 45 | 46 | Parameters 47 | ---------- 48 | max_iter : int, optional 49 | The maximum number of iterations or equivalently the 50 | the maximum number of solved subproblems. 51 | 52 | max_epochs : int, optional 53 | Maximum number of primal CD epochs on each subproblem. 54 | 55 | dual_init : array, shape (n_samples,) default None 56 | The initialization of dual variables. 57 | If ``None``, they are initialized as the 0 vector ``np.zeros(n_samples)``. 58 | 59 | p0 : int, optional 60 | First working set size. 61 | 62 | tol : float, optional 63 | The tolerance for the optimization. 64 | 65 | verbose : bool or int, default False 66 | Amount of verbosity. 0/False is silent. 67 | 68 | References 69 | ---------- 70 | .. [1] Olivier Fercoq and Pascal Bianchi, 71 | "A Coordinate-Descent Primal-Dual Algorithm with Large Step Size and Possibly 72 | Nonseparable Functions", SIAM Journal on Optimization, 2020, 73 | https://epubs.siam.org/doi/10.1137/18M1168480, 74 | code: https://github.com/Badr-MOUFAD/Fercoq-Bianchi-solver 75 | 76 | .. [2] Bertrand, Q. and Klopfenstein, Q. and Bannier, P.-A. and Gidel, G. 77 | and Massias, M. 78 | "Beyond L1: Faster and Better Sparse Models with skglm", NeurIPS, 2022 79 | https://arxiv.org/abs/2204.07826 80 | """ 81 | 82 | _datafit_required_attr = ('prox_conjugate',) 83 | _penalty_required_attr = ("prox_1d",) 84 | 85 | def __init__( 86 | self, max_iter=1000, max_epochs=1000, dual_init=None, p0=100, tol=1e-6, 87 | fit_intercept=False, warm_start=True, verbose=False 88 | ): 89 | self.max_iter = max_iter 90 | self.max_epochs = max_epochs 91 | self.dual_init = dual_init 92 | self.p0 = p0 93 | self.tol = tol 94 | self.fit_intercept = fit_intercept # TODO not handled 95 | self.warm_start = warm_start # TODO not handled 96 | self.verbose = verbose 97 | 98 | def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None): 99 | n_samples, n_features = X.shape 100 | 101 | # init steps 102 | # Despite violating the conditions mentioned in [1] 103 | # this choice of steps yield in practice a convergent algorithm 104 | # with better speed of convergence 105 | dual_step = 1 / norm(X, ord=2) 106 | primal_steps = 1 / norm(X, axis=0, ord=2) 107 | 108 | # primal vars 109 | w = np.zeros(n_features) if w_init is None else w_init 110 | Xw = np.zeros(n_samples) if Xw_init is None else Xw_init 111 | 112 | # dual vars 113 | if self.dual_init is None: 114 | z = np.zeros(n_samples) 115 | z_bar = np.zeros(n_samples) 116 | else: 117 | z = self.dual_init.copy() 118 | z_bar = self.dual_init.copy() 119 | 120 | p_objs = [] 121 | stop_crit = 0. 122 | all_features = np.arange(n_features) 123 | 124 | for iteration in range(self.max_iter): 125 | 126 | # check convergence using fixed-point criteria on both dual and primal 127 | opts_primal = _scores_primal(X, w, z, penalty, primal_steps, all_features) 128 | opt_dual = _score_dual(y, z, Xw, datafit, dual_step) 129 | 130 | stop_crit = max(max(opts_primal), opt_dual) 131 | 132 | if self.verbose: 133 | current_p_obj = datafit.value(y, w, Xw) + penalty.value(w) 134 | print( 135 | f"Iteration {iteration+1}: {current_p_obj:.10f}, " 136 | f"stopping crit: {stop_crit:.2e}") 137 | 138 | if stop_crit <= self.tol: 139 | break 140 | 141 | # build ws 142 | gsupp_size = (w != 0).sum() 143 | ws_size = max(min(self.p0, n_features), 144 | min(n_features, 2 * gsupp_size)) 145 | 146 | # similar to np.argsort()[-ws_size:] but without full sort 147 | ws = np.argpartition(opts_primal, -ws_size)[-ws_size:] 148 | 149 | # solve sub problem 150 | # inplace update of w, Xw, z, z_bar 151 | PDCD_WS._solve_subproblem( 152 | y, X, w, Xw, z, z_bar, datafit, penalty, 153 | primal_steps, dual_step, ws, self.max_epochs, tol_in=0.3*stop_crit) 154 | 155 | current_p_obj = datafit.value(y, w, Xw) + penalty.value(w) 156 | p_objs.append(current_p_obj) 157 | else: 158 | warnings.warn( 159 | f"PDCD_WS did not converge for tol={self.tol:.3e} " 160 | f"and max_iter={self.max_iter}.\n" 161 | "Considering increasing `max_iter` or `tol`.", 162 | category=ConvergenceWarning 163 | ) 164 | 165 | return w, np.asarray(p_objs), stop_crit 166 | 167 | @staticmethod 168 | @njit 169 | def _solve_subproblem(y, X, w, Xw, z, z_bar, datafit, penalty, 170 | primal_steps, dual_step, ws, max_epochs, tol_in): 171 | n_features = X.shape[1] 172 | 173 | for epoch in range(max_epochs): 174 | 175 | for j in ws: 176 | # update primal 177 | old_w_j = w[j] 178 | pseudo_grad = X[:, j] @ (2 * z_bar - z) 179 | w[j] = penalty.prox_1d( 180 | old_w_j - primal_steps[j] * pseudo_grad, 181 | primal_steps[j], j) 182 | 183 | # keep Xw syncr with X @ w 184 | delta_w_j = w[j] - old_w_j 185 | if delta_w_j: 186 | Xw += delta_w_j * X[:, j] 187 | 188 | # update dual 189 | z_bar[:] = datafit.prox_conjugate(z + dual_step * Xw, 190 | dual_step, y) 191 | z += (z_bar - z) / n_features 192 | 193 | # check convergence using fixed-point criteria on both dual and primal 194 | if epoch % 10 == 0: 195 | opts_primal_in = _scores_primal(X, w, z, penalty, primal_steps, ws) 196 | opt_dual_in = _score_dual(y, z, Xw, datafit, dual_step) 197 | 198 | stop_crit_in = max(max(opts_primal_in), opt_dual_in) 199 | 200 | if stop_crit_in <= tol_in: 201 | break 202 | 203 | def custom_checks(self, X, y, datafit, penalty): 204 | if issparse(X): 205 | raise ValueError( 206 | "Sparse matrices are not yet supported in `PDCD_WS` solver." 207 | ) 208 | 209 | 210 | @njit 211 | def _scores_primal(X, w, z, penalty, primal_steps, ws): 212 | scores_ws = np.zeros(len(ws)) 213 | 214 | for idx, j in enumerate(ws): 215 | next_w_j = penalty.prox_1d(w[j] - primal_steps[j] * X[:, j] @ z, 216 | primal_steps[j], j) 217 | scores_ws[idx] = abs(w[j] - next_w_j) 218 | 219 | return scores_ws 220 | 221 | 222 | @njit 223 | def _score_dual(y, z, Xw, datafit, dual_step): 224 | next_z = datafit.prox_conjugate(z + dual_step * Xw, 225 | dual_step, y) 226 | return norm(z - next_z, ord=np.inf) 227 | -------------------------------------------------------------------------------- /skglm/experimental/quantile_regression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numba import float64 3 | from skglm.datafits import BaseDatafit 4 | from skglm.utils.prox_funcs import ST_vec 5 | 6 | 7 | class Pinball(BaseDatafit): 8 | r"""Pinball datafit. 9 | 10 | The datafit reads:: 11 | 12 | sum_i quantile_level * max(y_i - Xw_i, 0) + 13 | (1 - quantile_level) * max(Xw_i - y_i, 0) 14 | 15 | with ``quantile_level`` in [0, 1]. 16 | 17 | Parameters 18 | ---------- 19 | quantile_level : float 20 | Quantile level must be in [0, 1]. When ``quantile_level=0.5``, 21 | the datafit becomes a Least Absolute Deviation (LAD) datafit. 22 | """ 23 | 24 | def __init__(self, quantile_level): 25 | self.quantile_level = quantile_level 26 | 27 | def value(self, y, w, Xw): 28 | # implementation taken from 29 | # github.com/benchopt/benchmark_quantile_regression/blob/main/objective.py 30 | quantile_level = self.quantile_level 31 | 32 | residual = y - Xw 33 | sign = residual >= 0 34 | 35 | loss = (quantile_level * sign * residual - 36 | (1 - quantile_level) * (1 - sign) * residual) 37 | return np.sum(loss) 38 | 39 | def prox(self, w, step, y): 40 | """Prox of ``step * pinball``.""" 41 | shift_cst = (self.quantile_level - 1/2) * step 42 | return y - ST_vec(y - w - shift_cst, step / 2) 43 | 44 | def prox_conjugate(self, z, step, y): 45 | """Prox of ``step * pinball^*``.""" 46 | # using Moreau decomposition 47 | inv_step = 1 / step 48 | return z - step * self.prox(inv_step * z, inv_step, y) 49 | 50 | def subdiff_distance(self, Xw, z, y): 51 | """Distance of ``z`` to subdiff of pinball at ``Xw``.""" 52 | # computation note: \partial ||y - . ||_1(Xw) = -\partial || . ||_1(y - Xw) 53 | y_minus_Xw = y - Xw 54 | shift_cst = self.quantile_level - 1/2 55 | 56 | max_distance = 0. 57 | for i in range(len(y)): 58 | 59 | if y_minus_Xw[i] == 0.: 60 | distance_i = max(0, abs(z[i] - shift_cst) - 1) 61 | else: 62 | distance_i = abs(z[i] + shift_cst + np.sign(y_minus_Xw[i])) 63 | 64 | max_distance = max(max_distance, distance_i) 65 | 66 | return max_distance 67 | 68 | def get_spec(self): 69 | spec = ( 70 | ('quantile_level', float64), 71 | ) 72 | return spec 73 | 74 | def params_to_dict(self): 75 | return dict(quantile_level=self.quantile_level) 76 | -------------------------------------------------------------------------------- /skglm/experimental/reweighted.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from skglm.datafits import Quadratic 3 | from skglm.estimators import GeneralizedLinearEstimator 4 | from skglm.penalties import WeightedL1, L0_5 5 | from skglm.utils.jit_compilation import compiled_clone 6 | 7 | 8 | class IterativeReweightedL1(GeneralizedLinearEstimator): 9 | r"""Reweighted L1-norm estimator. 10 | 11 | This estimator solves a non-convex problems by iteratively solving 12 | convex surrogates involving weighted L1 norms. 13 | 14 | Parameters 15 | ---------- 16 | datafit : instance of BaseDatafit, optional 17 | Datafit. If None, ``datafit`` is initialized as a :class:`.Quadratic` datafit. 18 | ``datafit`` is replaced by a JIT-compiled instance when calling fit. 19 | 20 | penalty : instance of BasePenalty, optional 21 | Penalty. If None, `penalty` is initialized as a :class:`.L0_5` penalty. 22 | `penalty` is replaced by a JIT-compiled instance when calling fit. 23 | 24 | solver : instance of BaseSolver, optional 25 | Solver. If None, ``solver`` is initialized as an :class:`.AndersonCD` solver. 26 | 27 | n_reweights : int, optional 28 | Number of reweighting performed (convex surrogates solved). 29 | 30 | Attributes 31 | ---------- 32 | coef_ : array, shape (n_features,) 33 | Parameter vector (:math:`w` in the cost function formula). 34 | 35 | loss_history_ : list 36 | Objective history after every reweighting. 37 | 38 | References 39 | ---------- 40 | .. [1] Candès et al. (2007), Enhancing sparsity by reweighted l1 minimization 41 | https://web.stanford.edu/~boyd/papers/pdf/rwl1.pdf 42 | """ 43 | 44 | def __init__(self, datafit=Quadratic(), penalty=L0_5(1.), solver=None, 45 | n_reweights=5): 46 | super().__init__(datafit=datafit, penalty=penalty, solver=solver) 47 | self.n_reweights = n_reweights 48 | 49 | def fit(self, X, y): 50 | """Fit the model according to the given training data. 51 | 52 | Parameters 53 | ---------- 54 | X : array-like, shape (n_samples, n_features) 55 | Training data, where n_samples is the number of samples and 56 | n_features is the number of features. 57 | 58 | y : array-like, shape (n_samples,) 59 | Target vector relative to X. 60 | 61 | Returns 62 | ------- 63 | self : 64 | Fitted estimator. 65 | """ 66 | if not hasattr(self.penalty, "derivative"): 67 | raise ValueError( 68 | "Missing `derivative` method. Reweighting is not implemented for " + 69 | f"penalty {self.penalty.__class__.__name__}") 70 | 71 | n_features = X.shape[1] 72 | # we need to compile this as it is not passed to solver.solve: 73 | self.penalty = compiled_clone(self.penalty) 74 | _penalty = WeightedL1(self.penalty.alpha, np.ones(n_features)) 75 | 76 | self.loss_history_ = [] 77 | 78 | for iter_reweight in range(self.n_reweights): 79 | coef_ = self.solver.solve(X, y, self.datafit, _penalty)[0] 80 | _penalty.weights = self.penalty.derivative(coef_) 81 | 82 | loss = (self.datafit.value(y, coef_, X @ coef_) 83 | + self.penalty.value(coef_)) 84 | self.loss_history_.append(loss) 85 | 86 | if self.solver.verbose: 87 | print(f"Reweight {iter_reweight}/{self.n_reweights}, objective {loss}") 88 | 89 | self.coef_ = coef_ 90 | 91 | return self 92 | -------------------------------------------------------------------------------- /skglm/experimental/tests/test_quantile_regression.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from numpy.linalg import norm 4 | 5 | from skglm.penalties import L1 6 | from skglm import GeneralizedLinearEstimator 7 | from skglm.experimental.pdcd_ws import PDCD_WS 8 | from skglm.experimental.quantile_regression import Pinball 9 | 10 | from skglm.utils.data import make_correlated_data 11 | from sklearn.linear_model import QuantileRegressor 12 | 13 | 14 | @pytest.mark.parametrize('quantile_level', [0.3, 0.5, 0.7]) 15 | def test_PDCD_WS(quantile_level): 16 | n_samples, n_features = 50, 10 17 | X, y, _ = make_correlated_data(n_samples, n_features, random_state=123) 18 | 19 | # optimality condition for w = 0. 20 | # for all g in subdiff pinball(y), g must be in subdiff ||.||_1(0) 21 | # hint: use max(x, 0) = (x + |x|) / 2 to get subdiff pinball 22 | alpha_max = norm(X.T @ (np.sign(y)/2 + (quantile_level - 0.5)), ord=np.inf) 23 | alpha = alpha_max / 5 24 | 25 | datafit = Pinball(quantile_level) 26 | penalty = L1(alpha) 27 | 28 | w = PDCD_WS( 29 | dual_init=np.sign(y)/2 + (quantile_level - 0.5) 30 | ).solve(X, y, datafit, penalty)[0] 31 | 32 | clf = QuantileRegressor( 33 | quantile=quantile_level, 34 | alpha=alpha/n_samples, 35 | fit_intercept=False, 36 | solver='highs', 37 | ).fit(X, y) 38 | 39 | np.testing.assert_allclose(w, clf.coef_, atol=1e-5) 40 | # test compatibility when inside GLM: 41 | estimator = GeneralizedLinearEstimator( 42 | datafit=Pinball(.2), 43 | penalty=L1(alpha=1.), 44 | solver=PDCD_WS(), 45 | ) 46 | estimator.fit(X, y) 47 | 48 | 49 | if __name__ == '__main__': 50 | pass 51 | -------------------------------------------------------------------------------- /skglm/experimental/tests/test_reweighted.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.linalg import norm 3 | 4 | from skglm.penalties.separable import L0_5 5 | from skglm.utils.data import make_correlated_data 6 | from skglm.experimental import IterativeReweightedL1 7 | from skglm.solvers import AndersonCD 8 | 9 | 10 | n_samples, n_features = 20, 50 11 | X, y, w_true = make_correlated_data( 12 | n_samples=n_samples, n_features=n_features, random_state=24) 13 | 14 | alpha_max = norm(X.T @ y, ord=np.inf) / n_samples 15 | alpha = alpha_max / 100 16 | tol = 1e-10 17 | 18 | 19 | def test_decreasing_loss(): 20 | # reweighting can't increase the L0.5 objective 21 | iterative_l05 = IterativeReweightedL1( 22 | penalty=L0_5(alpha), 23 | solver=AndersonCD(tol=tol, fit_intercept=False)).fit(X, y) 24 | np.testing.assert_array_less( 25 | iterative_l05.loss_history_[-1], iterative_l05.loss_history_[0]) 26 | diffs = np.diff(iterative_l05.loss_history_) 27 | np.testing.assert_array_less(diffs, 1e-5) 28 | -------------------------------------------------------------------------------- /skglm/experimental/tests/test_sqrt_lasso.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from numpy.linalg import norm 4 | 5 | from skglm.penalties import L1 6 | from skglm.utils.data import make_correlated_data 7 | from skglm.experimental.sqrt_lasso import (SqrtLasso, SqrtQuadratic, 8 | _chambolle_pock_sqrt) 9 | from skglm.experimental.pdcd_ws import PDCD_WS 10 | from skglm import Lasso 11 | 12 | 13 | def test_alpha_max(): 14 | n_samples, n_features = 50, 10 15 | X, y, _ = make_correlated_data(n_samples, n_features, random_state=0) 16 | alpha_max = norm(X.T @ y, ord=np.inf) / norm(y) 17 | 18 | sqrt_lasso = SqrtLasso(alpha=alpha_max).fit(X, y) 19 | 20 | if sqrt_lasso.fit_intercept: 21 | np.testing.assert_equal(sqrt_lasso.coef_[:-1], 0) 22 | else: 23 | np.testing.assert_equal(sqrt_lasso.coef_, 0) 24 | 25 | 26 | def test_vs_statsmodels(): 27 | try: 28 | from statsmodels.regression import linear_model # noqa 29 | except ImportError: 30 | pytest.xfail("This test requires statsmodels to run.") 31 | n_samples, n_features = 50, 10 32 | X, y, _ = make_correlated_data(n_samples, n_features, random_state=0) 33 | 34 | alpha_max = norm(X.T @ y, ord=np.inf) / norm(y) 35 | n_alphas = 3 36 | alphas = alpha_max * np.geomspace(1, 1e-2, n_alphas+1)[1:] 37 | 38 | sqrt_lasso = SqrtLasso(tol=1e-9, fit_intercept=False) 39 | coefs_skglm = sqrt_lasso.path(X, y, alphas)[1] 40 | 41 | coefs_statsmodels = np.zeros((len(alphas), n_features)) 42 | 43 | # fit statsmodels on path 44 | for i in range(n_alphas): 45 | alpha = alphas[i] 46 | # statsmodels solves: ||y - Xw||_2 + alpha * ||w||_1 / sqrt(n_samples) 47 | model = linear_model.OLS(y, X) 48 | model = model.fit_regularized(method='sqrt_lasso', L1_wt=1., 49 | alpha=np.sqrt(n_samples) * alpha) 50 | coefs_statsmodels[i] = model.params 51 | 52 | np.testing.assert_almost_equal(coefs_skglm, coefs_statsmodels, decimal=4) 53 | 54 | 55 | def test_prox_newton_cp(): 56 | n_samples, n_features = 50, 10 57 | X, y, _ = make_correlated_data(n_samples, n_features, random_state=0) 58 | 59 | alpha_max = norm(X.T @ y, ord=np.inf) / norm(y) 60 | alpha = alpha_max / 10 61 | clf = SqrtLasso(alpha=alpha, fit_intercept=False, tol=1e-12).fit(X, y) 62 | w, _, _ = _chambolle_pock_sqrt(X, y, alpha, max_iter=1000) 63 | np.testing.assert_allclose(clf.coef_, w) 64 | 65 | 66 | @pytest.mark.parametrize('with_dual_init', [True, False]) 67 | def test_PDCD_WS(with_dual_init): 68 | n_samples, n_features = 50, 10 69 | X, y, _ = make_correlated_data(n_samples, n_features, random_state=0) 70 | 71 | alpha_max = norm(X.T @ y, ord=np.inf) / norm(y) 72 | alpha = alpha_max / 10 73 | 74 | dual_init = y / norm(y) if with_dual_init else None 75 | 76 | datafit = SqrtQuadratic() 77 | penalty = L1(alpha) 78 | 79 | w = PDCD_WS(dual_init=dual_init).solve(X, y, datafit, penalty)[0] 80 | clf = SqrtLasso(alpha=alpha, fit_intercept=False, tol=1e-12).fit(X, y) 81 | np.testing.assert_allclose(clf.coef_, w, atol=1e-6) 82 | 83 | 84 | @pytest.mark.parametrize("fit_intercept", [True, False]) 85 | def test_lasso_sqrt_lasso_equivalence(fit_intercept): 86 | n_samples, n_features = 50, 10 87 | X, y, _ = make_correlated_data(n_samples, n_features, random_state=0) 88 | 89 | alpha_max = norm(X.T @ y, ord=np.inf) / norm(y) 90 | alpha = alpha_max / 10 91 | 92 | lasso = Lasso(alpha=alpha, fit_intercept=fit_intercept, tol=1e-8).fit(X, y) 93 | 94 | scal = n_samples / norm(y - lasso.predict(X)) 95 | sqrt = SqrtLasso( 96 | alpha=alpha * scal, fit_intercept=fit_intercept, tol=1e-8).fit(X, y) 97 | 98 | np.testing.assert_allclose(sqrt.coef_, lasso.coef_, rtol=1e-6) 99 | 100 | 101 | if __name__ == '__main__': 102 | pass 103 | -------------------------------------------------------------------------------- /skglm/penalties/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BasePenalty 2 | from .separable import ( 3 | L1_plus_L2, L0_5, L1, L2, L2_3, MCPenalty, WeightedMCPenalty, SCAD, 4 | WeightedL1, IndicatorBox, PositiveConstraint, LogSumPenalty 5 | ) 6 | from .block_separable import ( 7 | L2_05, L2_1, BlockMCPenalty, BlockSCAD, WeightedGroupL2, WeightedL1GroupL2 8 | ) 9 | 10 | from .non_separable import SLOPE 11 | 12 | 13 | __all__ = [ 14 | BasePenalty, 15 | L1_plus_L2, L0_5, L1, L2, L2_3, MCPenalty, WeightedMCPenalty, SCAD, WeightedL1, 16 | IndicatorBox, PositiveConstraint, L2_05, L2_1, BlockMCPenalty, BlockSCAD, 17 | WeightedGroupL2, WeightedL1GroupL2, SLOPE, LogSumPenalty 18 | ] 19 | -------------------------------------------------------------------------------- /skglm/penalties/base.py: -------------------------------------------------------------------------------- 1 | 2 | class BasePenalty: 3 | """Base class for penalty subclasses.""" 4 | 5 | def get_spec(self): 6 | """Specify the numba types of the class attributes. 7 | 8 | Returns 9 | ------- 10 | spec: Tuple of (attribute_name, dtype) 11 | spec to be passed to Numba jitclass to compile the class. 12 | """ 13 | 14 | def params_to_dict(self): 15 | """Get the parameters to initialize an instance of the class. 16 | 17 | Returns 18 | ------- 19 | dict_of_params : dict 20 | The parameters to instantiate an object of the class. 21 | """ 22 | 23 | def value(self, w): 24 | """Value of penalty at vector w.""" 25 | 26 | def is_penalized(self, n_features): 27 | """Return a binary mask with the penalized features.""" 28 | 29 | def generalized_support(self, w): 30 | """Return a mask which is True for coefficients in the generalized support.""" 31 | -------------------------------------------------------------------------------- /skglm/penalties/non_separable.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numba import float64 3 | 4 | from skglm.penalties.base import BasePenalty 5 | from skglm.utils.prox_funcs import prox_SLOPE 6 | 7 | 8 | class SLOPE(BasePenalty): 9 | """Sorted L-One Penalized Estimation (SLOPE) penalty. 10 | 11 | Attributes 12 | ---------- 13 | alphas : array, shape (n_features,) 14 | Contain regularization levels for every feature. 15 | When ``alphas`` contain a single unique value, ``SLOPE`` 16 | is equivalent to the ``L1``penalty. 17 | 18 | References 19 | ---------- 20 | .. [1] M. Bogdan, E. van den Berg, C. Sabatti, W. Su, E. Candes 21 | "SLOPE - Adaptive Variable Selection via Convex Optimization", 22 | The Annals of Applied Statistics 9 (3): 1103-40 23 | https://doi.org/10.1214/15-AOAS842 24 | """ 25 | 26 | def __init__(self, alphas): 27 | self.alphas = alphas 28 | 29 | def get_spec(self): 30 | spec = ( 31 | ('alphas', float64[:]), 32 | ) 33 | return spec 34 | 35 | def params_to_dict(self): 36 | return dict(alphas=self.alphas) 37 | 38 | def value(self, w): 39 | """Compute the value of SLOPE at w.""" 40 | return np.sum(np.sort(np.abs(w)) * self.alphas[::-1]) 41 | 42 | def prox_vec(self, x, stepsize): 43 | alphas = self.alphas 44 | prox = np.zeros_like(x) 45 | 46 | abs_x = np.abs(x) 47 | sorted_indices = np.argsort(abs_x)[::-1] 48 | prox[sorted_indices] = prox_SLOPE(abs_x[sorted_indices], alphas * stepsize) 49 | 50 | return np.sign(x) * prox 51 | -------------------------------------------------------------------------------- /skglm/solvers/__init__.py: -------------------------------------------------------------------------------- 1 | from .anderson_cd import AndersonCD 2 | from .base import BaseSolver 3 | from .fista import FISTA 4 | from .gram_cd import GramCD 5 | from .group_bcd import GroupBCD 6 | from .multitask_bcd import MultiTaskBCD 7 | from .prox_newton import ProxNewton 8 | from .group_prox_newton import GroupProxNewton 9 | from .lbfgs import LBFGS 10 | 11 | 12 | __all__ = [AndersonCD, BaseSolver, FISTA, GramCD, GroupBCD, MultiTaskBCD, ProxNewton, 13 | GroupProxNewton, LBFGS] 14 | -------------------------------------------------------------------------------- /skglm/solvers/base.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from abc import abstractmethod, ABC 3 | 4 | import numpy as np 5 | 6 | from skglm.utils.validation import check_attrs 7 | from skglm.utils.jit_compilation import compiled_clone 8 | 9 | 10 | class BaseSolver(ABC): 11 | """Base class for solvers. 12 | 13 | Attributes 14 | ---------- 15 | _datafit_required_attr : list 16 | List of attributes that must be implemented in Datafit. 17 | 18 | _penalty_required_attr : list 19 | List of attributes that must be implemented in Penalty. 20 | 21 | Notes 22 | ----- 23 | For required attributes, if an attribute is given as a list of attributes 24 | it means at least one of them should be implemented. 25 | For instance, if 26 | 27 | _datafit_required_attr = ( 28 | "get_global_lipschitz", 29 | ("gradient", "gradient_scalar") 30 | ) 31 | 32 | it mean datafit must implement the methods ``get_global_lipschitz`` 33 | and (``gradient`` or ``gradient_scaler``). 34 | """ 35 | 36 | _datafit_required_attr: list 37 | _penalty_required_attr: list 38 | 39 | @abstractmethod 40 | def _solve(self, X, y, datafit, penalty, w_init, Xw_init): 41 | """Solve an optimization problem. 42 | 43 | Parameters 44 | ---------- 45 | X : array, shape (n_samples, n_features) 46 | Training data. 47 | 48 | y : array, shape (n_samples,) 49 | Target values. 50 | 51 | datafit : instance of Datafit class 52 | Datafitting term. 53 | 54 | penalty : instance of Penalty class 55 | Penalty used in the model. 56 | 57 | w_init : array, shape (n_features,) 58 | Coefficient vector. 59 | 60 | Xw_init : array, shape (n_samples,) 61 | Model fit. 62 | 63 | Returns 64 | ------- 65 | coefs : array, shape (n_features + fit_intercept, n_alphas) 66 | Coefficients along the path. 67 | 68 | obj_out : array, shape (n_iter,) 69 | The objective values at every outer iteration. 70 | 71 | stop_crit : float 72 | Value of stopping criterion at convergence. 73 | """ 74 | 75 | def custom_checks(self, X, y, datafit, penalty): 76 | """Ensure the solver is suited for the `datafit` + `penalty` problem. 77 | 78 | This method includes extra checks to perform 79 | aside from checking attributes compatibility. 80 | 81 | Parameters 82 | ---------- 83 | X : array, shape (n_samples, n_features) 84 | Training data. 85 | 86 | y : array, shape (n_samples,) 87 | Target values. 88 | 89 | datafit : instance of BaseDatafit 90 | Datafit. 91 | 92 | penalty : instance of BasePenalty 93 | Penalty. 94 | """ 95 | pass 96 | 97 | def solve( 98 | self, X, y, datafit, penalty, w_init=None, Xw_init=None, *, run_checks=True 99 | ): 100 | """Solve the optimization problem after validating its compatibility. 101 | 102 | A proxy of ``_solve`` method that implicitly ensures the compatibility 103 | of ``datafit`` and ``penalty`` with the solver. 104 | 105 | Examples 106 | -------- 107 | >>> ... 108 | >>> coefs, obj_out, stop_crit = solver.solve(X, y, datafit, penalty) 109 | """ 110 | # TODO check for datafit/penalty being jit-compiled properly 111 | # instead of searching for a string 112 | if "jitclass" in str(type(datafit)): 113 | warnings.warn( 114 | "Passing in a compiled datafit is deprecated since skglm v0.5 " 115 | "Compilation is now done inside solver." 116 | "This will raise an error starting skglm v0.6 onwards." 117 | ) 118 | elif datafit is not None: 119 | datafit = compiled_clone(datafit, to_float32=X.dtype == np.float32) 120 | 121 | if "jitclass" in str(type(penalty)): 122 | warnings.warn( 123 | "Passing in a compiled penalty is deprecated since skglm v0.5 " 124 | "Compilation is now done inside solver. " 125 | "This will raise an error starting skglm v0.6 onwards." 126 | ) 127 | elif penalty is not None: 128 | penalty = compiled_clone(penalty) 129 | # TODO add support for bool spec in compiled_clone 130 | # currently, doing so break the code 131 | # penalty = compiled_clone(penalty, to_float32=X.dtype == np.float32) 132 | 133 | if run_checks: 134 | self._validate(X, y, datafit, penalty) 135 | 136 | return self._solve(X, y, datafit, penalty, w_init, Xw_init) 137 | 138 | def _validate(self, X, y, datafit, penalty): 139 | # execute: `custom_checks` then check attributes 140 | self.custom_checks(X, y, datafit, penalty) 141 | 142 | # do not check for sparse support here, make the check at the solver level 143 | # some solvers like ProxNewton don't require methods for sparse support 144 | check_attrs(datafit, self, self._datafit_required_attr) 145 | check_attrs(penalty, self, self._penalty_required_attr) 146 | -------------------------------------------------------------------------------- /skglm/solvers/common.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numba import njit 3 | from numpy.linalg import norm 4 | 5 | 6 | @njit 7 | def dist_fix_point_cd(w, grad_ws, lipschitz_ws, datafit, penalty, ws): 8 | """Compute the violation of the fixed point iterate scheme for CD. 9 | 10 | Parameters 11 | ---------- 12 | w : array, shape (n_features,) 13 | Coefficient vector. 14 | 15 | grad_ws : array, shape (ws_size,) 16 | Gradient restricted to the working set. 17 | 18 | lipschitz_ws : array, shape (len(ws),) 19 | Coordinatewise gradient Lipschitz constants, restricted to working set. 20 | 21 | datafit: instance of BaseDatafit 22 | Datafit. 23 | 24 | penalty: instance of BasePenalty 25 | Penalty. 26 | 27 | ws : array, shape (len(ws),) 28 | The working set. 29 | 30 | Returns 31 | ------- 32 | dist : array, shape (n_features,) 33 | Violation score for every feature. 34 | """ 35 | dist = np.zeros(ws.shape[0], dtype=w.dtype) 36 | 37 | for idx, j in enumerate(ws): 38 | if lipschitz_ws[idx] == 0.: 39 | continue 40 | 41 | step_j = 1 / lipschitz_ws[idx] 42 | dist[idx] = np.abs( 43 | w[j] - penalty.prox_1d(w[j] - step_j * grad_ws[idx], step_j, j) 44 | ) 45 | return dist 46 | 47 | 48 | @njit 49 | def dist_fix_point_bcd(w, grad_ws, lipschitz_ws, datafit, penalty, ws): 50 | """Compute the violation of the fixed point iterate scheme for BCD. 51 | 52 | Parameters 53 | ---------- 54 | w : array, shape (n_features,) 55 | Coefficient vector. 56 | 57 | grad_ws : array, shape (ws_size,) 58 | Gradient restricted to the working set. 59 | 60 | lipschitz_ws : array, shape (len(ws),) 61 | Coordinatewise gradient Lipschitz constants, restricted to working set. 62 | 63 | datafit: instance of BaseDatafit 64 | Datafit. 65 | 66 | penalty: instance of BasePenalty 67 | Penalty. 68 | 69 | ws : array, shape (len(ws),) 70 | The working set. 71 | 72 | Returns 73 | ------- 74 | dist : array, shape (n_groups,) 75 | Violation score for every group. 76 | 77 | Note: 78 | ---- 79 | ``grad_ws`` is a stacked array of gradients ``[grad_ws_1, grad_ws_2, ...]``. 80 | """ 81 | n_groups = len(penalty.grp_ptr) - 1 82 | dist = np.zeros(n_groups, dtype=w.dtype) 83 | 84 | grad_ptr = 0 85 | for idx, g in enumerate(ws): 86 | if lipschitz_ws[idx] == 0.: 87 | continue 88 | grp_g_indices = penalty.grp_indices[penalty.grp_ptr[g]: penalty.grp_ptr[g+1]] 89 | 90 | grad_g = grad_ws[grad_ptr: grad_ptr + len(grp_g_indices)] 91 | grad_ptr += len(grp_g_indices) 92 | 93 | step_g = 1 / lipschitz_ws[idx] 94 | w_g = w[grp_g_indices] 95 | dist[idx] = norm( 96 | w_g - penalty.prox_1group(w_g - grad_g * step_g, step_g, g) 97 | ) 98 | return dist 99 | 100 | 101 | @njit 102 | def construct_grad(X, y, w, Xw, datafit, ws): 103 | """Compute the gradient of the datafit restricted to the working set. 104 | 105 | Parameters 106 | ---------- 107 | X : array, shape (n_samples, n_features) 108 | Design matrix. 109 | 110 | y : array, shape (n_samples,) 111 | Target vector. 112 | 113 | w : array, shape (n_features,) 114 | Coefficient vector. 115 | 116 | Xw : array, shape (n_samples, ) 117 | Model fit. 118 | 119 | datafit : Datafit 120 | Datafit. 121 | 122 | ws : array, shape (ws_size,) 123 | The working set. 124 | 125 | Returns 126 | ------- 127 | grad : array, shape (ws_size, n_tasks) 128 | The gradient restricted to the working set. 129 | """ 130 | grad = np.zeros(ws.shape[0]) 131 | for idx, j in enumerate(ws): 132 | grad[idx] = datafit.gradient_scalar(X, y, w, Xw, j) 133 | return grad 134 | 135 | 136 | @njit 137 | def construct_grad_sparse(data, indptr, indices, y, w, Xw, datafit, ws): 138 | """Compute the gradient of the datafit restricted to the working set. 139 | 140 | Parameters 141 | ---------- 142 | data : array-like 143 | Data array of the matrix in CSC format. 144 | 145 | indptr : array-like 146 | CSC format index point array. 147 | 148 | indices : array-like 149 | CSC format index array. 150 | 151 | y : array, shape (n_samples, ) 152 | Target matrix. 153 | 154 | w : array, shape (n_features,) 155 | Coefficient matrix. 156 | 157 | Xw : array, shape (n_samples, ) 158 | Model fit. 159 | 160 | datafit : Datafit 161 | Datafit. 162 | 163 | ws : array, shape (ws_size,) 164 | The working set. 165 | 166 | Returns 167 | ------- 168 | grad : array, shape (ws_size, n_tasks) 169 | The gradient restricted to the working set. 170 | """ 171 | grad = np.zeros(ws.shape[0]) 172 | for idx, j in enumerate(ws): 173 | grad[idx] = datafit.gradient_scalar_sparse( 174 | data, indptr, indices, y, Xw, j) 175 | return grad 176 | -------------------------------------------------------------------------------- /skglm/solvers/fista.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.sparse import issparse 3 | from skglm.solvers.base import BaseSolver 4 | from skglm.solvers.common import construct_grad, construct_grad_sparse 5 | from skglm.utils.prox_funcs import _prox_vec 6 | from skglm.utils.validation import check_attrs 7 | 8 | 9 | class FISTA(BaseSolver): 10 | r"""ISTA solver with Nesterov acceleration (FISTA). 11 | 12 | Attributes 13 | ---------- 14 | max_iter : int, default 100 15 | Maximum number of iterations. 16 | 17 | tol : float, default 1e-4 18 | Tolerance for convergence. 19 | 20 | verbose : bool, default False 21 | Amount of verbosity. 0/False is silent. 22 | 23 | References 24 | ---------- 25 | .. [1] Beck, A. and Teboulle M. 26 | "A Fast Iterative Shrinkage-Thresholding Algorithm for Linear Inverse 27 | problems", 2009, SIAM J. Imaging Sci. 28 | https://epubs.siam.org/doi/10.1137/080716542 29 | """ 30 | 31 | _datafit_required_attr = ("get_global_lipschitz", ("gradient", "gradient_scalar")) 32 | _penalty_required_attr = (("prox_1d", "prox_vec"),) 33 | 34 | def __init__(self, max_iter=100, tol=1e-4, opt_strategy="subdiff", verbose=0): 35 | self.max_iter = max_iter 36 | self.tol = tol 37 | self.verbose = verbose 38 | self.opt_strategy = opt_strategy 39 | self.fit_intercept = False # needed to be passed to GeneralizedLinearEstimator 40 | self.warm_start = False 41 | 42 | def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None): 43 | p_objs_out = [] 44 | n_samples, n_features = X.shape 45 | all_features = np.arange(n_features) 46 | X_is_sparse = issparse(X) 47 | t_new = 1. 48 | 49 | w = w_init.copy() if w_init is not None else np.zeros(n_features) 50 | z = w_init.copy() if w_init is not None else np.zeros(n_features) 51 | Xw = Xw_init.copy() if Xw_init is not None else np.zeros(n_samples) 52 | 53 | if X_is_sparse: 54 | datafit.initialize_sparse(X.data, X.indptr, X.indices, y) 55 | lipschitz = datafit.get_global_lipschitz_sparse( 56 | X.data, X.indptr, X.indices, y 57 | ) 58 | else: 59 | datafit.initialize(X, y) 60 | lipschitz = datafit.get_global_lipschitz(X, y) 61 | 62 | for n_iter in range(self.max_iter): 63 | t_old = t_new 64 | t_new = (1 + np.sqrt(1 + 4 * t_old ** 2)) / 2 65 | w_old = w.copy() 66 | 67 | if X_is_sparse: 68 | if hasattr(datafit, "gradient_sparse"): 69 | grad = datafit.gradient_sparse( 70 | X.data, X.indptr, X.indices, y, X @ z) 71 | else: 72 | grad = construct_grad_sparse( 73 | X.data, X.indptr, X.indices, y, z, X @ z, datafit, all_features) 74 | else: 75 | if hasattr(datafit, "gradient"): 76 | grad = datafit.gradient(X, y, X @ z) 77 | else: 78 | grad = construct_grad(X, y, z, X @ z, datafit, all_features) 79 | 80 | step = 1 / lipschitz 81 | z -= step * grad 82 | if hasattr(penalty, "prox_vec"): 83 | w = penalty.prox_vec(z, step) 84 | else: 85 | w = _prox_vec(w, z, penalty, step) 86 | Xw = X @ w 87 | z = w + (t_old - 1.) / t_new * (w - w_old) 88 | 89 | if self.opt_strategy == "subdiff": 90 | opt = penalty.subdiff_distance(w, grad, all_features) 91 | elif self.opt_strategy == "fixpoint": 92 | opt = np.abs(w - penalty.prox_vec(w - grad / lipschitz, 1 / lipschitz)) 93 | else: 94 | raise ValueError( 95 | "Unknown error optimality strategy. Expected " 96 | f"`subdiff` or `fixpoint`. Got {self.opt_strategy}") 97 | 98 | stop_crit = np.max(opt) 99 | 100 | p_obj = datafit.value(y, w, Xw) + penalty.value(w) 101 | p_objs_out.append(p_obj) 102 | if self.verbose: 103 | print( 104 | f"Iteration {n_iter+1}: {p_obj:.10f}, " 105 | f"stopping crit: {stop_crit:.2e}" 106 | ) 107 | 108 | if stop_crit < self.tol: 109 | if self.verbose: 110 | print(f"Stopping criterion max violation: {stop_crit:.2e}") 111 | break 112 | return w, np.array(p_objs_out), stop_crit 113 | 114 | def custom_checks(self, X, y, datafit, penalty): 115 | # check datafit support sparse data 116 | check_attrs( 117 | datafit, solver=self, 118 | required_attr=self._datafit_required_attr, 119 | support_sparse=issparse(X) 120 | ) 121 | 122 | # optimality check 123 | if self.opt_strategy == "subdiff" and not hasattr(penalty, "subdiff_distance"): 124 | raise AttributeError( 125 | "Penalty must implement `subdiff_distance` " 126 | "to use `opt_strategy='subdiff'` in Fista solver." 127 | ) 128 | -------------------------------------------------------------------------------- /skglm/solvers/gram_cd.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | import numpy as np 3 | from numba import njit 4 | from scipy.sparse import issparse 5 | 6 | from skglm.solvers.base import BaseSolver 7 | from skglm.utils.anderson import AndersonAcceleration 8 | 9 | 10 | class GramCD(BaseSolver): 11 | r"""Coordinate descent solver keeping the gradients up-to-date with Gram updates. 12 | 13 | This solver should be used when ``n_features`` < ``n_samples``, and computes the 14 | (``n_features``, ``n_features``) Gram matrix which comes with an overhead. It is 15 | only suited to Quadratic datafits. 16 | 17 | It minimizes: 18 | 19 | .. math:: 1 / (2 xx n_"samples") ||y - Xw||^2 + "penalty"(w) 20 | 21 | which can be rewritten as: 22 | 23 | .. math:: 1 / (2 xx n_"samples") w^T Q w - 1 / n_"samples" q^T w + "penalty"(w) 24 | 25 | where: 26 | 27 | .. math:: Q = X^T X " (gram matrix), and " q = X^T y 28 | 29 | Attributes 30 | ---------- 31 | max_iter : int, default 100 32 | Maximum number of iterations. 33 | 34 | w_init : array, shape (n_features,), default None 35 | Initial value of coefficients. 36 | If set to ``None``, a zero vector is used instead. 37 | 38 | use_acc : bool, default False 39 | Extrapolate the iterates based on the past 5 iterates if set to ``True``. 40 | Can only be used when ``greedy_cd`` is ``False``. 41 | 42 | greedy_cd : bool, default True 43 | Use a greedy strategy to select features to update in coordinate descent epochs 44 | if set to ``True``. A cyclic strategy is used otherwise. 45 | 46 | tol : float, default 1e-4 47 | Tolerance for convergence. 48 | 49 | verbose : bool, default False 50 | Amount of verbosity. 0/False is silent. 51 | """ 52 | 53 | _datafit_required_attr = () 54 | _penalty_required_attr = ("prox_1d", "subdiff_distance") 55 | 56 | def __init__(self, max_iter=100, use_acc=False, greedy_cd=True, tol=1e-4, 57 | fit_intercept=True, warm_start=False, verbose=0): 58 | self.max_iter = max_iter 59 | self.use_acc = use_acc 60 | self.greedy_cd = greedy_cd 61 | self.tol = tol 62 | self.fit_intercept = fit_intercept 63 | self.warm_start = warm_start 64 | self.verbose = verbose 65 | 66 | def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None): 67 | # we don't pass Xw_init as the solver uses Gram updates 68 | # to keep the gradient up-to-date instead of Xw 69 | n_samples, n_features = X.shape 70 | 71 | if issparse(X): 72 | scaled_gram = X.T.dot(X) 73 | scaled_gram = scaled_gram.toarray() / n_samples 74 | scaled_Xty = X.T.dot(y) / n_samples 75 | else: 76 | scaled_gram = X.T @ X / n_samples 77 | scaled_Xty = X.T @ y / n_samples 78 | 79 | # TODO potential improvement: allow to pass scaled_gram 80 | # (e.g. for path computation) 81 | scaled_y_norm2 = np.linalg.norm(y) ** 2 / (2 * n_samples) 82 | 83 | all_features = np.arange(n_features) 84 | stop_crit = np.inf # prevent ref before assign 85 | p_objs_out = [] 86 | 87 | w = np.zeros(n_features) if w_init is None else w_init 88 | grad = - scaled_Xty if w_init is None else scaled_gram @ w_init - scaled_Xty 89 | opt = penalty.subdiff_distance(w, grad, all_features) 90 | 91 | if self.use_acc: 92 | if self.greedy_cd: 93 | warnings.warn( 94 | "Anderson acceleration does not work with greedy_cd, " + 95 | "set use_acc=False", UserWarning) 96 | accelerator = AndersonAcceleration(K=5) 97 | w_acc = np.zeros(n_features) 98 | grad_acc = np.zeros(n_features) 99 | 100 | for t in range(self.max_iter): 101 | # check convergences 102 | stop_crit = np.max(opt) 103 | if self.verbose: 104 | p_obj = (0.5 * w @ (scaled_gram @ w) - scaled_Xty @ w + 105 | scaled_y_norm2 + penalty.value(w)) 106 | print( 107 | f"Iteration {t+1}: {p_obj:.10f}, " 108 | f"stopping crit: {stop_crit:.2e}" 109 | ) 110 | 111 | if stop_crit <= self.tol: 112 | if self.verbose: 113 | print(f"Stopping criterion max violation: {stop_crit:.2e}") 114 | break 115 | 116 | # inplace update of w, grad 117 | opt = _gram_cd_epoch(scaled_gram, w, grad, penalty, self.greedy_cd) 118 | 119 | # perform Anderson extrapolation 120 | if self.use_acc: 121 | w_acc, grad_acc, is_extrapolated = accelerator.extrapolate(w, grad) 122 | 123 | if is_extrapolated: 124 | # omit constant term for comparison 125 | p_obj_acc = (0.5 * w_acc @ (scaled_gram @ w_acc) - 126 | scaled_Xty @ w_acc + penalty.value(w_acc)) 127 | p_obj = (0.5 * w @ (scaled_gram @ w) - scaled_Xty @ w 128 | + penalty.value(w)) 129 | if p_obj_acc < p_obj: 130 | w[:] = w_acc 131 | grad[:] = grad_acc 132 | 133 | # store p_obj 134 | p_obj = (0.5 * w @ (scaled_gram @ w) - scaled_Xty @ w + scaled_y_norm2 + 135 | penalty.value(w)) 136 | p_objs_out.append(p_obj) 137 | return w, np.array(p_objs_out), stop_crit 138 | 139 | def custom_checks(self, X, y, datafit, penalty): 140 | if datafit is not None: 141 | raise AttributeError( 142 | "`GramCD` supports only `Quadratic` datafit and fits it implicitly, " 143 | f"argument `datafit` must be `None`, got {datafit.__class__.__name__}." 144 | ) 145 | 146 | 147 | @njit 148 | def _gram_cd_epoch(scaled_gram, w, grad, penalty, greedy_cd): 149 | all_features = np.arange(len(w)) 150 | for cd_iter in all_features: 151 | # select feature j 152 | if greedy_cd: 153 | opt = penalty.subdiff_distance(w, grad, all_features) 154 | j = np.argmax(opt) 155 | else: # cyclic 156 | j = cd_iter 157 | 158 | # update w_j 159 | old_w_j = w[j] 160 | step = 1 / scaled_gram[j, j] # 1 / lipschitz_j 161 | w[j] = penalty.prox_1d(old_w_j - step * grad[j], step, j) 162 | 163 | # gradient update with Gram matrix 164 | if w[j] != old_w_j: 165 | grad += (w[j] - old_w_j) * scaled_gram[:, j] 166 | 167 | return penalty.subdiff_distance(w, grad, all_features) 168 | -------------------------------------------------------------------------------- /skglm/solvers/lbfgs.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from sklearn.exceptions import ConvergenceWarning 3 | 4 | import numpy as np 5 | import scipy.optimize 6 | from numpy.linalg import norm 7 | from scipy.sparse import issparse 8 | 9 | from skglm.solvers import BaseSolver 10 | from skglm.utils.validation import check_attrs 11 | 12 | 13 | class LBFGS(BaseSolver): 14 | """A wrapper for scipy L-BFGS solver. 15 | 16 | Refer to `scipy L-BFGS-B `_ documentation for details. 18 | 19 | Parameters 20 | ---------- 21 | max_iter : int, default 20 22 | Maximum number of iterations. 23 | 24 | tol : float, default 1e-4 25 | Tolerance for convergence. 26 | 27 | verbose : bool, default False 28 | Amount of verbosity. 0/False is silent. 29 | """ 30 | 31 | _datafit_required_attr = ("gradient",) 32 | _penalty_required_attr = ("gradient",) 33 | 34 | def __init__(self, max_iter=50, tol=1e-4, verbose=False): 35 | self.max_iter = max_iter 36 | self.tol = tol 37 | self.verbose = verbose 38 | 39 | def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None): 40 | 41 | # TODO: to be isolated in a seperated method 42 | is_sparse = issparse(X) 43 | if is_sparse: 44 | datafit.initialize_sparse(X.data, X.indptr, X.indices, y) 45 | else: 46 | datafit.initialize(X, y) 47 | 48 | def objective(w): 49 | Xw = X @ w 50 | datafit_value = datafit.value(y, w, Xw) 51 | penalty_value = penalty.value(w) 52 | 53 | return datafit_value + penalty_value 54 | 55 | def d_jac(w): 56 | Xw = X @ w 57 | datafit_grad = datafit.gradient(X, y, Xw) 58 | penalty_grad = penalty.gradient(w) 59 | 60 | return datafit_grad + penalty_grad 61 | 62 | def s_jac(w): 63 | Xw = X @ w 64 | datafit_grad = datafit.gradient_sparse(X.data, X.indptr, X.indices, y, Xw) 65 | penalty_grad = penalty.gradient(w) 66 | 67 | return datafit_grad + penalty_grad 68 | 69 | def callback_post_iter(w_k): 70 | # save p_obj 71 | p_obj = objective(w_k) 72 | p_objs_out.append(p_obj) 73 | 74 | if self.verbose: 75 | grad = jac(w_k) 76 | stop_crit = norm(grad, ord=np.inf) 77 | 78 | it = len(p_objs_out) 79 | print( 80 | f"Iteration {it}: {p_obj:.10f}, " f"stopping crit: {stop_crit:.2e}" 81 | ) 82 | 83 | n_features = X.shape[1] 84 | w = np.zeros(n_features) if w_init is None else w_init 85 | jac = s_jac if issparse(X) else d_jac 86 | p_objs_out = [] 87 | 88 | result = scipy.optimize.minimize( 89 | fun=objective, 90 | jac=jac, 91 | x0=w, 92 | method="L-BFGS-B", 93 | options=dict( 94 | maxiter=self.max_iter, 95 | gtol=self.tol, 96 | ftol=0.0, # set ftol=0. to control convergence using only gtol 97 | ), 98 | callback=callback_post_iter, 99 | ) 100 | 101 | if not result.success: 102 | warnings.warn( 103 | f"`LBFGS` did not converge for tol={self.tol:.3e} " 104 | f"and max_iter={self.max_iter}.\n" 105 | "Consider increasing `max_iter` and/or `tol`.", 106 | category=ConvergenceWarning, 107 | ) 108 | 109 | w = result.x 110 | # scipy LBFGS uses || projected gradient ||_oo to check convergence, cf. `gtol` 111 | # in https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html 112 | stop_crit = norm(result.jac, ord=np.inf) 113 | 114 | return w, np.asarray(p_objs_out), stop_crit 115 | 116 | def custom_checks(self, X, y, datafit, penalty): 117 | # check datafit support sparse data 118 | check_attrs( 119 | datafit, 120 | solver=self, 121 | required_attr=self._datafit_required_attr, 122 | support_sparse=issparse(X), 123 | ) 124 | -------------------------------------------------------------------------------- /skglm/tests/test_docstring_parameters.py: -------------------------------------------------------------------------------- 1 | """ 2 | Source: 3 | https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/tests/test_docstrings.py 4 | With a few minor tweaks (line 31 - 32) 5 | """ 6 | from inspect import signature, getmembers, isclass 7 | import pkgutil 8 | import inspect 9 | import importlib 10 | from typing import Optional 11 | 12 | import pytest 13 | import skglm 14 | 15 | numpydoc_validation = pytest.importorskip("numpydoc.validate") 16 | 17 | FUNCTION_DOCSTRING_IGNORE_LIST = [ 18 | "skglm.plot_utils.configure_plt", 19 | "skglm.utils.ST", 20 | "skglm.utils.ST_vec", 21 | "skglm.utils.BST", 22 | "skglm.utils.box_proj", 23 | "skglm.utils.BST_vec", 24 | "skglm.utils.prox_05", 25 | "skglm.utils.prox_block_2_05", 26 | "skglm.utils.prox_2_3" 27 | ] 28 | FUNCTION_DOCSTRING_IGNORE_LIST = set(FUNCTION_DOCSTRING_IGNORE_LIST) 29 | 30 | 31 | def get_all_methods(): 32 | estimators = getmembers(skglm.estimators, isclass) 33 | estimators = [estimator for estimator in estimators if estimator[1].__module__ == 34 | "skglm.estimators"] 35 | for name, Estimator in estimators: 36 | if name.startswith("_"): 37 | # skip private classes 38 | continue 39 | methods = [] 40 | for name in dir(Estimator): 41 | if name.startswith("_"): 42 | continue 43 | method_obj = getattr(Estimator, name) 44 | if hasattr(method_obj, "__call__") or isinstance(method_obj, property): 45 | methods.append(name) 46 | methods.append(None) 47 | 48 | for method in sorted(methods, key=lambda x: str(x)): 49 | yield Estimator, method 50 | 51 | 52 | def _is_checked_function(item): 53 | if not inspect.isfunction(item): 54 | return False 55 | 56 | if item.__name__.startswith("_"): 57 | return False 58 | 59 | mod = item.__module__ 60 | if not mod.startswith("skglm.") or mod.endswith("estimator_checks"): 61 | return False 62 | 63 | return True 64 | 65 | 66 | def get_all_functions_names(): 67 | """Get all public functions define in the skglm module""" 68 | modules_to_ignore = { 69 | "tests", 70 | "profile", 71 | "expes", 72 | "data", 73 | } 74 | 75 | all_functions_names = set() 76 | for module_finder, module_name, ispkg in pkgutil.walk_packages( 77 | path=skglm.__path__, prefix="skglm." 78 | ): 79 | module_parts = module_name.split(".") 80 | if ( 81 | any(part in modules_to_ignore for part in module_parts) 82 | or "._" in module_name 83 | ): 84 | continue 85 | 86 | module = importlib.import_module(module_name) 87 | functions = inspect.getmembers(module, _is_checked_function) 88 | for name, func in functions: 89 | full_name = f"{func.__module__}.{func.__name__}" 90 | all_functions_names.add(full_name) 91 | 92 | return sorted(all_functions_names) 93 | 94 | 95 | def filter_errors(errors, method, Estimator=None): 96 | """ 97 | Ignore some errors based on the method type. 98 | These rules are specific for scikit-learn.""" 99 | for code, message in errors: 100 | # We ignore following error code, 101 | # - RT02: The first line of the Returns section 102 | # should contain only the type, .. 103 | # (as we may need refer to the name of the returned 104 | # object) 105 | # - GL01: Docstring text (summary) should start in the line 106 | # immediately after the opening quotes (not in the same line, 107 | # or leaving a blank line in between) 108 | # - GL02: If there's a blank line, it should be before the 109 | # first line of the Returns section, not after (it allows to have 110 | # short docstrings for properties). 111 | 112 | if code in ["RT02", "GL01", "GL02"]: 113 | continue 114 | 115 | # skglm specific: we ignore: 116 | # - SA01: See Also section not found. 117 | # - EX01: No examples section found. 118 | if code in ['SA01', 'EX01']: 119 | continue 120 | 121 | # Ignore PR02: Unknown parameters for properties. We sometimes use 122 | # properties for ducktyping, i.e. SGDClassifier.predict_proba 123 | if code == "PR02" and Estimator is not None and method is not None: 124 | method_obj = getattr(Estimator, method) 125 | if isinstance(method_obj, property): 126 | continue 127 | 128 | # Following codes are only taken into account for the 129 | # top level class docstrings: 130 | # - ES01: No extended summary found 131 | # - SA01: See Also section not found 132 | # - EX01: No examples section found 133 | 134 | if method is not None and code in ["EX01", "SA01", "ES01"]: 135 | continue 136 | yield code, message 137 | 138 | 139 | def repr_errors(res, estimator=None, method: Optional[str] = None) -> str: 140 | """Pretty print original docstring and the obtained errors 141 | Parameters 142 | ---------- 143 | res : dict 144 | result of numpydoc.validate.validate 145 | estimator : {estimator, None} 146 | estimator object or None 147 | method : str 148 | if estimator is not None, either the method name or None. 149 | Returns 150 | ------- 151 | str 152 | String representation of the error. 153 | """ 154 | if method is None: 155 | if hasattr(estimator, "__init__"): 156 | method = "__init__" 157 | elif estimator is None: 158 | raise ValueError("At least one of estimator, method should be provided") 159 | else: 160 | raise NotImplementedError 161 | 162 | if estimator is not None: 163 | obj = getattr(estimator, method) 164 | try: 165 | obj_signature = str(signature(obj)) 166 | except TypeError: 167 | # In particular we can't parse the signature of properties 168 | obj_signature = ( 169 | "\nParsing of the method signature failed, " 170 | "possibly because this is a property." 171 | ) 172 | 173 | obj_name = estimator.__name__ + "." + method 174 | else: 175 | obj_signature = "" 176 | obj_name = method 177 | 178 | msg = "\n\n" + "\n\n".join( 179 | [ 180 | str(res["file"]), 181 | obj_name + obj_signature, 182 | res["docstring"], 183 | "# Errors", 184 | "\n".join( 185 | " - {}: {}".format(code, message) for code, message in res["errors"] 186 | ), 187 | ] 188 | ) 189 | return msg 190 | 191 | 192 | @pytest.mark.parametrize("function_name", get_all_functions_names()) 193 | def test_function_docstring(function_name, request): 194 | """Check function docstrings using numpydoc.""" 195 | if function_name in FUNCTION_DOCSTRING_IGNORE_LIST: 196 | request.applymarker( 197 | pytest.mark.xfail(run=False, reason="TODO pass numpydoc validation") 198 | ) 199 | 200 | res = numpydoc_validation.validate(function_name) 201 | 202 | res["errors"] = list(filter_errors(res["errors"], method="function")) 203 | 204 | if res["errors"]: 205 | msg = repr_errors(res, method=f"Tested function: {function_name}") 206 | 207 | raise ValueError(msg) 208 | 209 | 210 | @pytest.mark.parametrize("Estimator, method", get_all_methods()) 211 | def test_docstring(Estimator, method, request): 212 | base_import_path = Estimator.__module__ 213 | import_path = [base_import_path, Estimator.__name__] 214 | if method is not None: 215 | import_path.append(method) 216 | 217 | import_path = ".".join(import_path) 218 | 219 | res = numpydoc_validation.validate(import_path) 220 | 221 | res["errors"] = list(filter_errors(res["errors"], method, Estimator=Estimator)) 222 | 223 | if res["errors"]: 224 | msg = repr_errors(res, Estimator, method) 225 | 226 | raise ValueError(msg) 227 | 228 | 229 | if __name__ == "__main__": 230 | pass 231 | -------------------------------------------------------------------------------- /skglm/tests/test_fista.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | from numpy.linalg import norm 5 | 6 | from scipy.sparse import csc_matrix 7 | 8 | from skglm.penalties import L1 9 | from skglm.solvers import FISTA, AndersonCD 10 | from skglm.datafits import Quadratic, Logistic 11 | 12 | from skglm.utils.data import make_correlated_data 13 | 14 | 15 | random_state = 113 16 | n_samples, n_features = 50, 60 17 | 18 | rng = np.random.RandomState(random_state) 19 | X, y, _ = make_correlated_data(n_samples, n_features, random_state=rng) 20 | rng.seed(random_state) 21 | X_sparse = csc_matrix(X * np.random.binomial(1, 0.5, X.shape)) 22 | y_classif = np.sign(y) 23 | 24 | alpha_max = norm(X.T @ y, ord=np.inf) / len(y) 25 | alpha = alpha_max / 10 26 | 27 | tol = 1e-10 28 | 29 | 30 | @pytest.mark.parametrize("X", [X, X_sparse]) 31 | @pytest.mark.parametrize("Datafit, Penalty", [ 32 | (Quadratic, L1), 33 | (Logistic, L1), 34 | # (QuadraticSVC, IndicatorBox), 35 | ]) 36 | def test_fista_solver(X, Datafit, Penalty): 37 | _y = y if isinstance(Datafit, Quadratic) else y_classif 38 | datafit = Datafit() 39 | penalty = Penalty(alpha) 40 | 41 | solver = FISTA(max_iter=1000, tol=tol) 42 | w_fista = solver.solve(X, _y, datafit, penalty)[0] 43 | 44 | solver_cd = AndersonCD(tol=tol, fit_intercept=False) 45 | w_cd = solver_cd.solve(X, _y, datafit, penalty)[0] 46 | 47 | np.testing.assert_allclose(w_fista, w_cd, atol=1e-7) 48 | 49 | 50 | if __name__ == '__main__': 51 | pass 52 | -------------------------------------------------------------------------------- /skglm/tests/test_gram_solver.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from itertools import product 3 | 4 | import numpy as np 5 | from numpy.linalg import norm 6 | from sklearn.linear_model import Lasso 7 | 8 | from skglm.penalties import L1 9 | from skglm.solvers import GramCD 10 | 11 | from skglm.utils.data import make_correlated_data 12 | 13 | 14 | @pytest.mark.parametrize("rho, X_density, greedy_cd", 15 | product([1e-1, 1e-3], [1., 0.8], [True, False])) 16 | def test_vs_lasso_sklearn(rho, X_density, greedy_cd): 17 | X, y, _ = make_correlated_data( 18 | n_samples=18, n_features=8, random_state=0, X_density=X_density) 19 | alpha_max = norm(X.T @ y, ord=np.inf) / len(y) 20 | alpha = rho * alpha_max 21 | 22 | sk_lasso = Lasso(alpha, fit_intercept=False, tol=1e-9) 23 | sk_lasso.fit(X, y) 24 | 25 | l1_penalty = L1(alpha) 26 | w = GramCD(tol=1e-9, max_iter=1000, greedy_cd=greedy_cd).solve( 27 | X, y, None, l1_penalty)[0] 28 | np.testing.assert_allclose(w, sk_lasso.coef_.flatten(), rtol=1e-7, atol=1e-7) 29 | 30 | 31 | if __name__ == '__main__': 32 | pass 33 | -------------------------------------------------------------------------------- /skglm/tests/test_lbfgs_solver.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | import pandas as pd 4 | 5 | from skglm.penalties import L2 6 | from skglm.solvers import LBFGS 7 | from skglm.datafits import Logistic, Cox 8 | 9 | from sklearn.linear_model import LogisticRegression 10 | 11 | from skglm.utils.data import make_correlated_data, make_dummy_survival_data 12 | 13 | 14 | @pytest.mark.parametrize("X_sparse", [True, False]) 15 | def test_lbfgs_L2_logreg(X_sparse): 16 | reg = 1.0 17 | X_density = 1.0 if not X_sparse else 0.5 18 | n_samples, n_features = 100, 50 19 | 20 | X, y, _ = make_correlated_data( 21 | n_samples, 22 | n_features, 23 | random_state=0, 24 | X_density=X_density, 25 | ) 26 | y = np.sign(y) 27 | 28 | # fit L-BFGS 29 | datafit = Logistic() 30 | penalty = L2(reg) 31 | w, *_ = LBFGS(tol=1e-12).solve(X, y, datafit, penalty) 32 | 33 | # fit scikit learn 34 | estimator = LogisticRegression( 35 | penalty="l2", 36 | C=1 / (n_samples * reg), 37 | fit_intercept=False, 38 | tol=1e-12, 39 | ).fit(X, y) 40 | 41 | np.testing.assert_allclose(w, estimator.coef_.flatten(), atol=1e-5) 42 | 43 | 44 | @pytest.mark.parametrize("use_efron", [True, False]) 45 | def test_L2_Cox(use_efron): 46 | try: 47 | from lifelines import CoxPHFitter 48 | except ModuleNotFoundError: 49 | pytest.xfail( 50 | "Testing L2 Cox Estimator requires `lifelines` packages\n" 51 | "Run `pip install lifelines`" 52 | ) 53 | 54 | alpha = 10.0 55 | n_samples, n_features = 100, 50 56 | 57 | X, y = make_dummy_survival_data( 58 | n_samples, n_features, normalize=True, with_ties=use_efron, random_state=0 59 | ) 60 | 61 | datafit = Cox(use_efron) 62 | penalty = L2(alpha) 63 | 64 | # XXX: intialize is needed here although it is done in LBFGS 65 | # is used to evaluate the objective 66 | datafit.initialize(X, y) 67 | w, *_ = LBFGS().solve(X, y, datafit, penalty) 68 | 69 | # fit lifeline estimator 70 | stacked_y_X = np.hstack((y, X)) 71 | df = pd.DataFrame(stacked_y_X) 72 | 73 | estimator = CoxPHFitter(penalizer=alpha, l1_ratio=0.0).fit( 74 | df, duration_col=0, event_col=1 75 | ) 76 | w_ll = estimator.params_.values 77 | 78 | p_obj_skglm = datafit.value(y, w, X @ w) + penalty.value(w) 79 | p_obj_ll = datafit.value(y, w_ll, X @ w_ll) + penalty.value(w_ll) 80 | 81 | # despite increasing tol in lifelines, solutions are quite far apart 82 | # suspecting lifelines https://github.com/CamDavidsonPilon/lifelines/pull/1534 83 | # as our solution gives the lowest objective value 84 | np.testing.assert_allclose(w, w_ll, rtol=1e-1) 85 | np.testing.assert_allclose(p_obj_skglm, p_obj_ll, rtol=1e-6) 86 | 87 | 88 | if __name__ == "__main__": 89 | pass 90 | -------------------------------------------------------------------------------- /skglm/tests/test_penalties.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from numpy.linalg import norm 5 | from numpy.testing import assert_array_less 6 | 7 | from sklearn.linear_model import LinearRegression 8 | 9 | from skglm.datafits import Quadratic, QuadraticMultiTask 10 | from skglm.penalties import ( 11 | L1, L1_plus_L2, WeightedL1, MCPenalty, SCAD, IndicatorBox, L0_5, L2_3, SLOPE, 12 | LogSumPenalty, PositiveConstraint, L2_1, L2_05, BlockMCPenalty, BlockSCAD) 13 | from skglm import GeneralizedLinearEstimator, Lasso 14 | from skglm.solvers import AndersonCD, MultiTaskBCD, FISTA 15 | from skglm.utils.data import make_correlated_data 16 | 17 | from skglm.utils.prox_funcs import prox_log_sum, _log_sum_prox_val 18 | 19 | 20 | n_samples = 20 21 | n_features = 10 22 | n_tasks = 10 23 | X, Y, _ = make_correlated_data( 24 | n_samples=n_samples, n_features=n_features, n_tasks=n_tasks, density=0.5, 25 | random_state=0) 26 | y = Y[:, 0] 27 | 28 | n_samples, n_features = X.shape 29 | alpha_max = norm(X.T @ y, ord=np.inf) / n_samples 30 | alpha = alpha_max / 1000 31 | 32 | tol = 1e-10 33 | 34 | penalties = [ 35 | L1(alpha=alpha), 36 | L1_plus_L2(alpha=alpha, l1_ratio=0.5), 37 | WeightedL1(alpha=1, weights=np.arange(n_features)), 38 | MCPenalty(alpha=alpha, gamma=4), 39 | SCAD(alpha=alpha, gamma=4), 40 | IndicatorBox(alpha=alpha), 41 | L0_5(alpha), 42 | L2_3(alpha), 43 | LogSumPenalty(alpha=alpha, eps=1e-2) 44 | ] 45 | 46 | block_penalties = [ 47 | L2_1(alpha=alpha), L2_05(alpha=alpha), 48 | BlockMCPenalty(alpha=alpha, gamma=4), 49 | BlockSCAD(alpha=alpha, gamma=4) 50 | ] 51 | 52 | 53 | @pytest.mark.parametrize('penalty', penalties) 54 | def test_subdiff_diff(penalty): 55 | # tol=1e-14 is too low when coefs are of order 1. square roots are computed in 56 | # some penalties and precision is lost 57 | est = GeneralizedLinearEstimator( 58 | datafit=Quadratic(), 59 | penalty=penalty, 60 | solver=AndersonCD(tol=tol) 61 | ).fit(X, y) 62 | # assert the stopping criterion is satisfied 63 | assert_array_less(est.stop_crit_, tol) 64 | 65 | 66 | @pytest.mark.parametrize('block_penalty', block_penalties) 67 | def test_subdiff_diff_block(block_penalty): 68 | est = GeneralizedLinearEstimator( 69 | datafit=QuadraticMultiTask(), 70 | penalty=block_penalty, 71 | solver=MultiTaskBCD(tol=tol) 72 | ).fit(X, Y) 73 | # assert the stopping criterion is satisfied 74 | assert_array_less(est.stop_crit_, est.solver.tol) 75 | 76 | 77 | def test_slope_lasso(): 78 | # check that when alphas = [alpha, ..., alpha], SLOPE and L1 solutions are equal 79 | alphas = np.full(n_features, alpha) 80 | est = GeneralizedLinearEstimator( 81 | penalty=SLOPE(alphas), 82 | solver=FISTA(max_iter=1000, tol=tol, opt_strategy="fixpoint"), 83 | ).fit(X, y) 84 | lasso = Lasso(alpha, fit_intercept=False, tol=tol).fit(X, y) 85 | np.testing.assert_allclose(est.coef_, lasso.coef_, rtol=1e-5) 86 | 87 | 88 | def test_slope(): 89 | # compare solutions with `sortedl1`: https://github.com/jolars/sortedl1 90 | try: 91 | from sortedl1 import Slope as SlopeEst # noqa 92 | except ImportError: 93 | pytest.xfail( 94 | "This test requires slope to run.\n" 95 | "https://github.com/jolars/sortedl1") 96 | 97 | # q = 0.1 98 | # alphas = lambda_sequence( 99 | # X, y, fit_intercept=False, reg=alpha / alpha_max, q=q) 100 | clf = SlopeEst( 101 | alpha=0.01, fit_intercept=False, tol=1e-6 102 | ).fit(X, y) 103 | alphas = clf.lambda_ 104 | ours = GeneralizedLinearEstimator( 105 | penalty=SLOPE(clf.alpha * alphas), 106 | solver=FISTA(max_iter=1000, tol=tol, opt_strategy="fixpoint"), 107 | ).fit(X, y) 108 | np.testing.assert_allclose(ours.coef_, np.squeeze(clf.coef_), rtol=1e-3) 109 | 110 | 111 | @pytest.mark.parametrize("fit_intercept", [True, False]) 112 | def test_nnls(fit_intercept): 113 | # compare solutions with sklearn's LinearRegression, note that n_samples >= 114 | # n_features for the design matrix to be injective, hence the solution unique 115 | clf = GeneralizedLinearEstimator( 116 | datafit=Quadratic(), 117 | penalty=PositiveConstraint(), 118 | solver=AndersonCD(tol=tol, fit_intercept=fit_intercept), 119 | ).fit(X, y) 120 | reg_nnls = LinearRegression(positive=True, fit_intercept=fit_intercept).fit(X, y) 121 | 122 | np.testing.assert_allclose(clf.coef_, reg_nnls.coef_) 123 | np.testing.assert_allclose(clf.intercept_, reg_nnls.intercept_) 124 | 125 | 126 | def test_logsum_prox(): 127 | alpha = 1. 128 | 129 | grid_z = np.linspace(-2, 2, num=10) 130 | grid_test = np.linspace(-5, 5, num=100) 131 | grid_eps = np.linspace(0, 5, num=10 + 1)[1:] 132 | 133 | for z, eps in zip(grid_z, grid_eps): 134 | prox = prox_log_sum(z, alpha, eps) 135 | obj_at_prox = _log_sum_prox_val(prox, z, alpha, eps) 136 | 137 | is_lowest = all( 138 | obj_at_prox <= _log_sum_prox_val(x, z, alpha, eps) for x in grid_test 139 | ) 140 | 141 | np.testing.assert_equal(is_lowest, True) 142 | 143 | 144 | if __name__ == "__main__": 145 | pass 146 | -------------------------------------------------------------------------------- /skglm/tests/test_prox_newton.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from sklearn.linear_model import LogisticRegression 4 | 5 | from skglm.penalties import L1 6 | from skglm.datafits import Logistic 7 | from skglm.solvers.prox_newton import ProxNewton 8 | 9 | from skglm.utils.data import make_correlated_data 10 | 11 | 12 | @pytest.mark.parametrize("X_density", [1., 0.5]) 13 | @pytest.mark.parametrize("fit_intercept", [True, False]) 14 | @pytest.mark.parametrize("ws_strategy", ["subdiff", "fixpoint"]) 15 | def test_pn_vs_sklearn(X_density, fit_intercept, ws_strategy): 16 | n_samples, n_features = 12, 25 17 | rho = 1e-1 18 | 19 | X, y, _ = make_correlated_data(n_samples, n_features, random_state=0, 20 | X_density=X_density) 21 | y = np.sign(y) 22 | 23 | alpha_max = np.linalg.norm(X.T @ y, ord=np.inf) / (2 * n_samples) 24 | alpha = rho * alpha_max 25 | 26 | sk_log_reg = LogisticRegression(penalty='l1', C=1/(n_samples * alpha), 27 | fit_intercept=fit_intercept, random_state=0, 28 | tol=1e-12, solver='saga', max_iter=1_000_000) 29 | sk_log_reg.fit(X, y) 30 | 31 | log_datafit = Logistic() 32 | l1_penalty = L1(alpha) 33 | prox_solver = ProxNewton( 34 | fit_intercept=fit_intercept, tol=1e-12, ws_strategy=ws_strategy) 35 | w = prox_solver.solve(X, y, log_datafit, l1_penalty)[0] 36 | 37 | np.testing.assert_allclose(w[:n_features], sk_log_reg.coef_.flatten()) 38 | if fit_intercept: 39 | np.testing.assert_allclose(w[-1], sk_log_reg.intercept_) 40 | 41 | 42 | if __name__ == '__main__': 43 | pass 44 | -------------------------------------------------------------------------------- /skglm/tests/test_sparse_ops.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.sparse 3 | 4 | from skglm.utils.sparse_ops import spectral_norm, sparse_columns_slice 5 | 6 | 7 | def test_spectral_norm(): 8 | n_samples, n_features = 50, 60 9 | A_sparse = scipy.sparse.random( 10 | n_samples, n_features, density=0.7, format='csc', random_state=37) 11 | 12 | A_bundles = (A_sparse.data, A_sparse.indptr, A_sparse.indices) 13 | spectral_norm_our = spectral_norm(*A_bundles, n_samples=A_sparse.shape[0]) 14 | spectral_norm_sp = scipy.sparse.linalg.svds(A_sparse, k=1)[1] 15 | 16 | np.testing.assert_allclose(spectral_norm_our, spectral_norm_sp) 17 | 18 | 19 | def test_slice_cols_sparse(): 20 | n_samples, n_features = 20, 50 21 | rng = np.random.RandomState(546) 22 | 23 | M = scipy.sparse.random( 24 | n_samples, n_features, density=0.9, format="csc", random_state=rng) 25 | cols = rng.choice(n_features, size=n_features // 10, replace=False) 26 | 27 | sub_M_data, sub_M_indptr, sub_M_indices = sparse_columns_slice( 28 | cols, M.data, M.indptr, M.indices) 29 | sub_M = scipy.sparse.csc_matrix( 30 | (sub_M_data, sub_M_indices, sub_M_indptr), shape=(n_samples, len(cols))) 31 | 32 | np.testing.assert_array_equal(sub_M.toarray(), M.toarray()[:, cols]) 33 | 34 | 35 | if __name__ == "__main__": 36 | pass 37 | -------------------------------------------------------------------------------- /skglm/tests/test_validation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from scipy import sparse 4 | 5 | from skglm.penalties import L1, WeightedL1GroupL2, WeightedGroupL2 6 | from skglm.datafits import Poisson, Huber, QuadraticGroup, LogisticGroup 7 | from skglm.solvers import FISTA, ProxNewton, GroupBCD, GramCD, GroupProxNewton 8 | 9 | from skglm.utils.data import grp_converter 10 | from skglm.utils.data import make_correlated_data 11 | 12 | 13 | def test_datafit_penalty_solver_compatibility(): 14 | grp_size, n_features = 3, 9 15 | n_samples = 10 16 | X, y, _ = make_correlated_data(n_samples, n_features) 17 | X_sparse = sparse.csc_array(X) 18 | 19 | n_groups = n_features // grp_size 20 | weights_groups = np.ones(n_groups) 21 | weights_features = np.ones(n_features) 22 | grp_indices, grp_ptr = grp_converter(grp_size, n_features) 23 | 24 | # basic compatibility checks 25 | with pytest.raises( 26 | AttributeError, match="Missing `raw_grad` and `raw_hessian`" 27 | ): 28 | ProxNewton()._validate( 29 | X, y, Huber(1.), L1(1.) 30 | ) 31 | with pytest.raises( 32 | AttributeError, match="Missing `get_global_lipschitz`" 33 | ): 34 | FISTA()._validate( 35 | X, y, Poisson(), L1(1.) 36 | ) 37 | with pytest.raises( 38 | AttributeError, match="Missing `get_global_lipschitz`" 39 | ): 40 | FISTA()._validate( 41 | X, y, Poisson(), L1(1.) 42 | ) 43 | # check Gram Solver 44 | with pytest.raises( 45 | AttributeError, match="`GramCD` supports only `Quadratic` datafit" 46 | ): 47 | GramCD()._validate( 48 | X, y, Poisson(), L1(1.) 49 | ) 50 | # check working set strategy subdiff 51 | with pytest.raises( 52 | AttributeError, match="Penalty must implement `subdiff_distance`" 53 | ): 54 | GroupBCD()._validate( 55 | X, y, 56 | datafit=QuadraticGroup(grp_ptr, grp_indices), 57 | penalty=WeightedL1GroupL2( 58 | 1., weights_groups, weights_features, grp_ptr, grp_indices) 59 | ) 60 | # checks for sparsity 61 | with pytest.raises( 62 | ValueError, 63 | match="Sparse matrices are not yet supported in `GroupProxNewton` solver." 64 | ): 65 | GroupProxNewton()._validate( 66 | X_sparse, y, 67 | datafit=QuadraticGroup(grp_ptr, grp_indices), 68 | penalty=WeightedL1GroupL2( 69 | 1., weights_groups, weights_features, grp_ptr, grp_indices) 70 | ) 71 | with pytest.raises( 72 | AttributeError, 73 | match="LogisticGroup is not compatible with solver GroupBCD with sparse data." 74 | ): 75 | GroupBCD()._validate( 76 | X_sparse, y, 77 | datafit=LogisticGroup(grp_ptr, grp_indices), 78 | penalty=WeightedGroupL2(1., weights_groups, grp_ptr, grp_indices) 79 | ) 80 | 81 | 82 | if __name__ == "__main__": 83 | pass 84 | -------------------------------------------------------------------------------- /skglm/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/skglm/utils/__init__.py -------------------------------------------------------------------------------- /skglm/utils/anderson.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class AndersonAcceleration: 5 | """Abstraction of Anderson Acceleration. 6 | 7 | Extrapolate the asymptotic VAR ``w`` and ``Xw`` 8 | based on ``K`` previous iterations. 9 | 10 | Parameters 11 | ---------- 12 | K : int 13 | Number of previous iterates to consider for extrapolation. 14 | """ 15 | 16 | def __init__(self, K): 17 | self.K, self.current_iter = K, 0 18 | self.arr_w_, self.arr_Xw_ = None, None 19 | 20 | def extrapolate(self, w, Xw): 21 | """Return w, Xw, and a bool indicating whether they were extrapolated.""" 22 | if self.arr_w_ is None or self.arr_Xw_ is None: 23 | self.arr_w_ = np.zeros((w.shape[0], self.K+1)) 24 | self.arr_Xw_ = np.zeros((Xw.shape[0], self.K+1)) 25 | 26 | if self.current_iter <= self.K: 27 | self.arr_w_[:, self.current_iter] = w 28 | self.arr_Xw_[:, self.current_iter] = Xw 29 | self.current_iter += 1 30 | return w, Xw, False 31 | 32 | U = np.diff(self.arr_w_, axis=1) # compute residuals 33 | 34 | # compute extrapolation coefs 35 | try: 36 | inv_UTU_ones = np.linalg.solve(U.T @ U, np.ones(self.K)) 37 | except np.linalg.LinAlgError: 38 | return w, Xw, False 39 | finally: 40 | self.current_iter = 0 41 | 42 | # extrapolate 43 | C = inv_UTU_ones / np.sum(inv_UTU_ones) 44 | # floating point errors may cause w and Xw to disagree 45 | return self.arr_w_[:, 1:] @ C, self.arr_Xw_[:, 1:] @ C, True 46 | -------------------------------------------------------------------------------- /skglm/utils/jit_compilation.py: -------------------------------------------------------------------------------- 1 | from functools import lru_cache 2 | 3 | import numba 4 | from numba import float32, float64 5 | from numba.experimental import jitclass 6 | 7 | 8 | def spec_to_float32(spec): 9 | """Convert a numba specification to an equivalent float32 one. 10 | 11 | Parameters 12 | ---------- 13 | spec : list 14 | A list of (name, dtype) for every attribute of a jitclass. 15 | 16 | Returns 17 | ------- 18 | spec32 : list 19 | A list of (name, dtype) for every attribute of a jitclass, where float64 20 | have been replaced by float32. 21 | """ 22 | spec32 = [] 23 | for name, dtype in spec: 24 | if dtype == float64: 25 | dtype32 = float32 26 | elif isinstance(dtype, numba.core.types.npytypes.Array): 27 | if dtype.dtype == float64: 28 | dtype32 = dtype.copy(dtype=float32) 29 | else: 30 | dtype32 = dtype 31 | else: 32 | # raise ValueError(f"Unknown spec type {dtype}") 33 | # bool types and others are not affected: 34 | dtype32 = dtype 35 | spec32.append((name, dtype32)) 36 | return spec32 37 | 38 | 39 | @lru_cache() 40 | def jit_cached_compile(klass, spec, to_float32=False): 41 | """Jit compile class and cache compilation. 42 | 43 | Parameters 44 | ---------- 45 | klass : class 46 | Un instantiated Datafit or Penalty. 47 | 48 | spec : tuple 49 | A tuple of (name, dtype) for every attribute of a jitclass. 50 | 51 | to_float32 : bool, optional 52 | If ``True``converts float64 types to float32, by default False. 53 | 54 | Returns 55 | ------- 56 | Instance of Datafit or penalty 57 | Return a jitclass. 58 | """ 59 | if to_float32: 60 | spec = spec_to_float32(spec) 61 | 62 | return jitclass(spec)(klass) 63 | 64 | 65 | def compiled_clone(instance, to_float32=False): 66 | """Compile instance to a jitclass. 67 | 68 | Parameters 69 | ---------- 70 | instance : Instance of Datafit or Penalty 71 | Datafit or Penalty object. 72 | 73 | to_float32 : bool, optional 74 | If ``True``converts float64 types to float32, by default False. 75 | 76 | Returns 77 | ------- 78 | Instance of Datafit or penalty 79 | Return a jitclass. 80 | """ 81 | return jit_cached_compile( 82 | instance.__class__, 83 | instance.get_spec(), 84 | to_float32, 85 | )(**instance.params_to_dict()) 86 | -------------------------------------------------------------------------------- /skglm/utils/sparse_ops.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.linalg import norm 3 | from numba import njit 4 | 5 | 6 | @njit 7 | def spectral_norm(X_data, X_indptr, X_indices, n_samples, 8 | max_iter=100, tol=1e-6): 9 | """Compute the spectral norm of sparse matrix ``X`` with power method. 10 | 11 | Parameters 12 | ---------- 13 | X_data : array, shape (n_elements,) 14 | ``data`` attribute of the sparse CSC matrix ``X``. 15 | 16 | X_indptr : array, shape (n_features + 1,) 17 | ``indptr`` attribute of the sparse CSC matrix ``X``. 18 | 19 | X_indices : array, shape (n_elements,) 20 | ``indices`` attribute of the sparse CSC matrix ``X``. 21 | 22 | n_samples : int 23 | number of rows of ``X``. 24 | 25 | max_iter : int, default 20 26 | Maximum number of power method iterations. 27 | 28 | tol : float, default 1e-6 29 | Tolerance for convergence. 30 | 31 | Returns 32 | ------- 33 | eigenvalue : float 34 | The largest singular value of ``X``. 35 | 36 | References 37 | ---------- 38 | .. [1] Alfio Quarteroni, Riccardo Sacco, Fausto Saleri "Numerical Mathematics", 39 | chapter 5, page 192-195. 40 | """ 41 | # init vec with norm(vec) == 1. 42 | eigenvector = np.random.randn(n_samples) 43 | eigenvector /= norm(eigenvector) 44 | eigenvalue = 1. 45 | 46 | for _ in range(max_iter): 47 | vec = _XXT_dot_vec(X_data, X_indptr, X_indices, eigenvector, n_samples) 48 | norm_vec = norm(vec) 49 | eigenvalue = vec @ eigenvector 50 | 51 | # norm(X @ X.T @ eigenvector - eigenvalue * eigenvector) <= tol 52 | # inequality (5.25) in ref [1] is squared 53 | if norm_vec ** 2 - eigenvalue ** 2 <= tol ** 2: 54 | break 55 | 56 | eigenvector = vec / norm_vec 57 | 58 | return np.sqrt(eigenvalue) 59 | 60 | 61 | @njit 62 | def sparse_columns_slice(cols, X_data, X_indptr, X_indices): 63 | """Select a sub matrix from CSC sparse matrix. 64 | 65 | Similar to ``X[:, cols]`` but for ``X`` a CSC sparse matrix. 66 | 67 | Parameters 68 | ---------- 69 | cols : array of int 70 | Columns to select in matrix ``X``. 71 | 72 | X_data : array, shape (n_elements,) 73 | ``data`` attribute of the sparse CSC matrix ``X``. 74 | 75 | X_indptr : array, shape (n_features + 1,) 76 | ``indptr`` attribute of the sparse CSC matrix ``X``. 77 | 78 | X_indices : array, shape (n_elements,) 79 | ``indices`` attribute of the sparse CSC matrix ``X``. 80 | 81 | Returns 82 | ------- 83 | sub_X_data, sub_X_indptr, sub_X_indices 84 | The ``data``, ``indptr``, and ``indices`` attributes of the sub matrix. 85 | """ 86 | nnz = sum([X_indptr[j+1] - X_indptr[j] for j in cols]) 87 | 88 | sub_X_indptr = np.zeros(len(cols) + 1, dtype=cols.dtype) 89 | sub_X_indices = np.zeros(nnz, dtype=X_indices.dtype) 90 | sub_X_data = np.zeros(nnz, dtype=X_data.dtype) 91 | 92 | for idx, j in enumerate(cols): 93 | n_elements = X_indptr[j+1] - X_indptr[j] 94 | sub_X_indptr[idx + 1] = sub_X_indptr[idx] + n_elements 95 | 96 | col_j_slice = slice(X_indptr[j], X_indptr[j+1]) 97 | col_idx_slice = slice(sub_X_indptr[idx], sub_X_indptr[idx+1]) 98 | 99 | sub_X_indices[col_idx_slice] = X_indices[col_j_slice] 100 | sub_X_data[col_idx_slice] = X_data[col_j_slice] 101 | 102 | return sub_X_data, sub_X_indptr, sub_X_indices 103 | 104 | 105 | @njit 106 | def _XXT_dot_vec(X_data, X_indptr, X_indices, vec, n_samples): 107 | # computes X @ X.T @ vec, with X csc encoded 108 | return _X_dot_vec(X_data, X_indptr, X_indices, 109 | _XT_dot_vec(X_data, X_indptr, X_indices, vec), n_samples) 110 | 111 | 112 | @njit 113 | def _X_dot_vec(X_data, X_indptr, X_indices, vec, n_samples): 114 | # compute X @ vec, with X csc encoded 115 | result = np.zeros(n_samples) 116 | 117 | # loop over features 118 | for j in range(len(X_indptr) - 1): 119 | if vec[j] == 0: 120 | continue 121 | 122 | col_j_rows_idx = slice(X_indptr[j], X_indptr[j+1]) 123 | result[X_indices[col_j_rows_idx]] += vec[j] * X_data[col_j_rows_idx] 124 | 125 | return result 126 | 127 | 128 | @njit 129 | def _XT_dot_vec(X_data, X_indptr, X_indices, vec): 130 | # compute X.T @ vec, with X csc encoded 131 | n_features = len(X_indptr) - 1 132 | result = np.zeros(n_features) 133 | 134 | for j in range(n_features): 135 | for idx in range(X_indptr[j], X_indptr[j+1]): 136 | result[j] += X_data[idx] * vec[X_indices[idx]] 137 | 138 | return result 139 | 140 | 141 | @njit(fastmath=True) 142 | def _sparse_xj_dot(X_data, X_indptr, X_indices, j, other): 143 | # Compute X[:, j] @ other in case X sparse 144 | res = 0. 145 | for i in range(X_indptr[j], X_indptr[j+1]): 146 | res += X_data[i] * other[X_indices[i]] 147 | return res 148 | -------------------------------------------------------------------------------- /skglm/utils/validation.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | SPARSE_SUFFIX = "_sparse" 5 | 6 | 7 | def check_group_compatible(obj): 8 | """Check whether ``obj`` is compatible with ``bcd_solver``. 9 | 10 | Parameters 11 | ---------- 12 | obj : instance of BaseDatafit or BasePenalty 13 | Object to check. 14 | 15 | Raises 16 | ------ 17 | ValueError 18 | if the ``obj`` doesn't have a ``grp_ptr`` and ``grp_indices`` 19 | attributes. 20 | """ 21 | obj_name = obj.__class__.__name__ 22 | group_attrs = ('grp_ptr', 'grp_indices') 23 | 24 | for attr in group_attrs: 25 | if not hasattr(obj, attr): 26 | raise ValueError( 27 | f"datafit and penalty must be compatible with 'bcd_solver'.\n" 28 | f"'{obj_name}' is not block-separable. " 29 | f"Missing '{attr}' attribute." 30 | ) 31 | 32 | 33 | def check_attrs(obj, solver, required_attr, support_sparse=False): 34 | """Check whether datafit or penalty is compatible with solver. 35 | 36 | Parameters 37 | ---------- 38 | obj : Instance of Datafit or Penalty 39 | The instance Datafit (or Penalty) to check. 40 | 41 | solver : Instance of Solver 42 | The instance of Solver to check. 43 | 44 | required_attr : List or tuple of strings 45 | The attributes that ``obj`` must have. 46 | 47 | support_sparse : bool, default False 48 | If ``True`` adds a ``SPARSE_SUFFIX`` to check compatibility with sparse data. 49 | 50 | Raises 51 | ------ 52 | AttributeError 53 | if any of the attribute in ``required_attr`` is missing 54 | from ``obj`` attributes. 55 | """ 56 | missing_attrs = [] 57 | suffix = SPARSE_SUFFIX if support_sparse else "" 58 | 59 | # if `attr` is a list, check that at least one of them 60 | # is within `obj` attributes 61 | for attr in required_attr: 62 | attributes = attr if not isinstance(attr, str) else (attr,) 63 | 64 | for a in attributes: 65 | if hasattr(obj, f"{a}{suffix}"): 66 | break 67 | else: 68 | missing_attrs.append(_join_attrs_with_or(attributes, suffix)) 69 | 70 | if len(missing_attrs): 71 | required_attr = [_join_attrs_with_or(attrs, suffix) for attrs in required_attr] 72 | 73 | # get name obj and solver 74 | name_matcher = re.compile(r"\.(\w+)'>") 75 | 76 | obj_name = name_matcher.search(str(obj.__class__)).group(1) 77 | solver_name = name_matcher.search(str(solver.__class__)).group(1) 78 | 79 | if not support_sparse: 80 | err_message = f"{obj_name} is not compatible with solver {solver_name}." 81 | else: 82 | err_message = (f"{obj_name} is not compatible with solver {solver_name} " 83 | "with sparse data.") 84 | 85 | err_message += (f" It must implement {' and '.join(required_attr)}.\n" 86 | f"Missing {' and '.join(missing_attrs)}.") 87 | 88 | raise AttributeError(err_message) 89 | 90 | 91 | def _join_attrs_with_or(attrs, suffix=""): 92 | if isinstance(attrs, str): 93 | return f"`{attrs}{suffix}`" 94 | 95 | if len(attrs) == 1: 96 | return f"`{attrs[0]}{suffix}`" 97 | 98 | out = " or ".join([f"`{a}{suffix}`" for a in attrs]) 99 | return f"({out})" 100 | --------------------------------------------------------------------------------