├── .circleci
└── config.yml
├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
├── pull_request_template.md
└── workflows
│ ├── circleci.yml
│ ├── flake8.yml
│ └── main.yml
├── .gitignore
├── CITATION.bib
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── doc
├── Makefile
├── _static
│ ├── images
│ │ ├── cox-tutorial
│ │ │ ├── A_dot_v.png
│ │ │ ├── A_transpose_dot_v.png
│ │ │ └── structure_matrix_A.png
│ │ ├── landingpage
│ │ │ ├── compatible.png
│ │ │ ├── compatible.webp
│ │ │ ├── ease.png
│ │ │ ├── ease.webp
│ │ │ ├── energy.png
│ │ │ ├── energy.webp
│ │ │ ├── finance.png
│ │ │ ├── finance.webp
│ │ │ ├── healthcare.png
│ │ │ ├── healthcare.webp
│ │ │ ├── hero.png
│ │ │ ├── hero.webp
│ │ │ ├── inrialogo.png
│ │ │ ├── inrialogo.webp
│ │ │ ├── modular.png
│ │ │ ├── modular.webp
│ │ │ ├── performance.png
│ │ │ └── performance.webp
│ │ └── logo.svg
│ ├── scripts
│ │ ├── asciimath-defines.js
│ │ ├── instantpage.min.js
│ │ └── lazyload.js
│ ├── style.css
│ └── switcher.json
├── _templates
│ ├── layout.html
│ └── sidebar
│ │ └── version_toggler.html
├── api.rst
├── changes
│ ├── 0.1.rst
│ ├── 0.2.rst
│ ├── 0.3.rst
│ ├── 0.4.rst
│ ├── 0.5.rst
│ └── whats_new.rst
├── conf.py
├── contribute.rst
├── getting_started.rst
├── index.rst
├── robots.txt
├── sphinxext
│ ├── gh_substitutions.py
│ └── github_link.py
└── tutorials
│ ├── add_datafit.rst
│ ├── add_penalty.rst
│ ├── alpha_max.rst
│ ├── cox_datafit.rst
│ ├── intercept.rst
│ ├── intercept2.md
│ ├── prox_nn_group_lasso.rst
│ └── tutorials.rst
├── examples
├── README.txt
├── plot_compare_time.py
├── plot_group_logistic_regression.py
├── plot_lasso_vs_weighted.py
├── plot_logreg_various_penalties.py
├── plot_pen_prox.py
├── plot_reweighted_l1.py
├── plot_sparse_group_lasso.py
├── plot_sparse_recovery.py
├── plot_survival_analysis.py
├── plot_ucurve.py
└── plot_zero_weights_lasso.py
├── pyproject.toml
└── skglm
├── __init__.py
├── datafits
├── __init__.py
├── base.py
├── group.py
├── multi_task.py
└── single_task.py
├── estimators.py
├── experimental
├── __init__.py
├── _plot_sqrt_lasso.py
├── pdcd_ws.py
├── quantile_regression.py
├── reweighted.py
├── sqrt_lasso.py
└── tests
│ ├── test_quantile_regression.py
│ ├── test_reweighted.py
│ └── test_sqrt_lasso.py
├── penalties
├── __init__.py
├── base.py
├── block_separable.py
├── non_separable.py
└── separable.py
├── solvers
├── __init__.py
├── anderson_cd.py
├── base.py
├── common.py
├── fista.py
├── gram_cd.py
├── group_bcd.py
├── group_prox_newton.py
├── lbfgs.py
├── multitask_bcd.py
└── prox_newton.py
├── tests
├── test_datafits.py
├── test_docstring_parameters.py
├── test_estimators.py
├── test_fista.py
├── test_gram_solver.py
├── test_group.py
├── test_lbfgs_solver.py
├── test_penalties.py
├── test_prox_newton.py
├── test_sparse_ops.py
└── test_validation.py
└── utils
├── __init__.py
├── anderson.py
├── data.py
├── jit_compilation.py
├── prox_funcs.py
├── sparse_ops.py
└── validation.py
/.circleci/config.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | jobs:
3 | build_docs:
4 | docker:
5 | - image: cimg/python:3.10
6 | steps:
7 | - checkout
8 | - run:
9 | name: Set BASH_ENV
10 | command: |
11 | echo "set -e" >> $BASH_ENV
12 | echo "export DISPLAY=:99" >> $BASH_ENV
13 | echo "export OPENBLAS_NUM_THREADS=4" >> $BASH_ENV
14 | echo "export LIBSVMDATA_HOME=$HOME/celer_data/" >> $BASH_ENV
15 | echo "BASH_ENV:"
16 | cat $BASH_ENV
17 |
18 | - run:
19 | name: Merge with upstream
20 | command: |
21 | echo $(git log -1 --pretty=%B) | tee gitlog.txt
22 | echo ${CI_PULL_REQUEST//*pull\//} | tee merge.txt
23 | if [[ $(cat merge.txt) != "" ]]; then
24 | echo "Merging $(cat merge.txt)";
25 | git remote add upstream https://github.com/scikit-learn-contrib/skglm.git;
26 | git pull --ff-only upstream "refs/pull/$(cat merge.txt)/merge";
27 | git fetch upstream main;
28 | fi
29 |
30 | # If both keys are in the same command only one is restored
31 | - restore_cache:
32 | keys:
33 | - pip-cache
34 |
35 | # Install Xvfb and related dependencies
36 | - run:
37 | name: Install Xvfb and dependencies
38 | command: |
39 | sudo apt-get update
40 | sudo apt-get install -y xvfb
41 |
42 | - run:
43 | name: Spin up Xvfb
44 | command: |
45 | /sbin/start-stop-daemon --start --quiet --pidfile /tmp/custom_xvfb_99.pid --make-pidfile --background --exec /usr/bin/Xvfb -- :99 -screen 0 1400x900x24 -ac +extension GLX +render -noreset;
46 |
47 | - run:
48 | name: Get Python running
49 | command: |
50 | python -m pip install --user --upgrade --progress-bar off pip
51 | python -m pip install --user -e .
52 | python -m pip install --user .[doc]
53 |
54 | - save_cache:
55 | key: pip-cache
56 | paths:
57 | - ~/.cache/pip
58 |
59 | # Look at what we have and fail early if there is some library conflict
60 | - run:
61 | name: Check installation
62 | command: |
63 | which python
64 | python -c "import skglm"
65 |
66 | # Build docs
67 | - run:
68 | name: make html
69 | no_output_timeout: 120m
70 | command: |
71 | cd doc;
72 | make clean;
73 | make SPHINXOPTS=-v html;
74 | cd ..;
75 |
76 |
77 | # Add stable doc
78 | - run:
79 | name: add stable doc
80 | command: |
81 | set -e
82 | mkdir -p ~/.ssh
83 | echo -e "Host *\nStrictHostKeyChecking no" > ~/.ssh/config
84 | chmod og= ~/.ssh/config
85 | cd doc;
86 | make add-stable-doc;
87 |
88 |
89 | # upload to gh-pages
90 | - run:
91 | name: deploy
92 | command: |
93 | if [[ ${CIRCLE_BRANCH} == "main" ]]; then
94 | cd doc;
95 | pip install ghp-import;
96 | make install
97 | fi
98 |
99 |
100 | # Save the outputs
101 | - store_artifacts:
102 | path: doc/_build/html/
103 | destination: dev
104 | - persist_to_workspace:
105 | root: doc/_build
106 | paths:
107 | - html
108 |
109 |
110 | workflows:
111 | version: 2
112 |
113 | default:
114 | jobs:
115 | - build_docs
116 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Report a problem
4 | ---
5 |
6 | ## Describe the bug
7 |
8 |
9 |
10 | ## Steps to reproduce
11 |
12 |
13 |
14 |
15 | **Additional context**
16 |
17 |
18 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest a new feature
4 | title: FEAT - Give a title to the feature request
5 | ---
6 |
7 | ## Description of the feature
8 |
9 |
10 |
11 |
12 | ## Considered alternatives
13 |
14 |
15 |
16 |
17 | **Additional context**
18 |
19 |
20 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | ## Context of the PR
2 |
3 |
9 |
10 | ## Contributions of the PR
11 |
12 |
13 |
14 |
15 | ### Checks before merging PR
16 |
17 | - [ ] added documentation for any new feature
18 | - [ ] added unit tests
19 | - [ ] edited the [what's new](../doc/changes/whats_new.rst) (if applicable)
20 |
--------------------------------------------------------------------------------
/.github/workflows/circleci.yml:
--------------------------------------------------------------------------------
1 | name: circleci-redirector
2 | on: [status]
3 | jobs:
4 | circleci_artifacts_redirector_job:
5 | runs-on: ubuntu-latest
6 | name: Run CircleCI artifacts redirector
7 | steps:
8 | - name: GitHub Action step
9 | uses: larsoner/circleci-artifacts-redirector-action@master
10 | with:
11 | api-token: ${{ secrets.CIRCLE_TOKEN }}
12 | repo-token: ${{ secrets.GITHUB_TOKEN }}
13 | artifact-path: 0/dev/index.html
14 | circleci-jobs: build_docs
15 |
--------------------------------------------------------------------------------
/.github/workflows/flake8.yml:
--------------------------------------------------------------------------------
1 | name: linter
2 |
3 | on:
4 | push:
5 | branches:
6 | - 'main'
7 | pull_request:
8 | branches:
9 | - 'main'
10 |
11 | jobs:
12 | lint:
13 | name: Lint code base
14 | runs-on: ubuntu-latest
15 |
16 | steps:
17 | - name: Checkout code
18 | uses: actions/checkout@v2
19 |
20 | - name: Setup Python 3.8
21 | uses: actions/setup-python@v2
22 | with:
23 | python-version: 3.8
24 |
25 | - name: Lint with flake
26 | run: |
27 | pip install flake8
28 | flake8 skglm/ --max-line-length=88
29 |
30 | - name: Check doc style with pydocstyle
31 | run: |
32 | pip install pydocstyle
33 | pydocstyle skglm --ignore='D100',D102,'D104','D105','D107','D203','D213','D413',
34 |
--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
1 | name: pytest
2 |
3 | on:
4 | push:
5 | branches:
6 | - 'main'
7 |
8 | pull_request:
9 |
10 | jobs:
11 | test:
12 | name: Test Code
13 | runs-on: ubuntu-latest
14 | steps:
15 | - uses: actions/checkout@v3
16 | - name: Set up Python 3.10
17 | uses: actions/setup-python@v3
18 | with:
19 | python-version: "3.10"
20 | - name: Set up R
21 | uses: r-lib/actions/setup-r@v2
22 | - name: Install package and testing tools
23 | run: |
24 | python -m pip install --upgrade pip
25 | pip install .
26 | pip install .[test]
27 | python --version
28 | - name: Install other dependencies
29 | run: |
30 | pip install rpy2
31 | pip install statsmodels cvxopt
32 | pip install sortedl1
33 | # for testing Cox estimator
34 | pip install lifelines
35 | pip install pandas
36 | - name: Test with pytest
37 | run: pytest -v skglm/
38 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.egg-info
2 | *.pyc
3 |
4 | /doc/_build/
5 | /doc/gen_modules/
6 | /doc/generated/
7 | /doc/auto_examples/
8 | /doc/sg_execution_times.rst
9 |
10 | __pycache__
11 | __cache__
12 | *_cache
13 | **/.DS_Store
--------------------------------------------------------------------------------
/CITATION.bib:
--------------------------------------------------------------------------------
1 | @inproceedings{skglm,
2 | title = {Beyond L1: Faster and better sparse models with skglm},
3 | author = {Q. Bertrand and Q. Klopfenstein and P.-A. Bannier and G. Gidel and M. Massias},
4 | booktitle = {NeurIPS},
5 | year = {2022},
6 | }
7 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 |
3 | As part of the [scikit-learn-contrib](https://github.com/scikit-learn-contrib) GitHub organization, we adopt the scikit-learn [code of conduct](https://github.com/scikit-learn/scikit-learn/blob/main/CODE_OF_CONDUCT.md).
4 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2023, scikit-learn-contrib
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | 3. Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | ## A fast ⚡ and modular ⚒️ scikit-learn replacement for sparse GLMs
7 |
8 | 
9 | 
10 | [](https://pepy.tech/project/skglm)
11 | [](https://pepy.tech/project/skglm)
12 | [](https://pypi.org/project/skglm/)
13 |
14 |
15 |
16 |
17 |
18 | ``skglm`` is a Python package that offers **fast estimators** for sparse Generalized Linear Models (GLMs) that are **100% compatible with ``scikit-learn``**. It is **highly flexible** and supports a wide range of GLMs.
19 | You get to choose from ``skglm``'s already-made estimators or **customize your own** by combining the available datafits and penalties.
20 |
21 | Excited to have a tour on ``skglm`` [documentation](https://contrib.scikit-learn.org/skglm/)?
22 |
23 | # Cite
24 |
25 | ``skglm`` is the result of perseverant research. It is licensed under [BSD 3-Clause](https://github.com/scikit-learn-contrib/skglm/blob/main/LICENSE). You are free to use it and if you do so, please cite
26 |
27 | ```bibtex
28 | @inproceedings{skglm,
29 | title = {Beyond L1: Faster and better sparse models with skglm},
30 | author = {Q. Bertrand and Q. Klopfenstein and P.-A. Bannier and G. Gidel and M. Massias},
31 | booktitle = {NeurIPS},
32 | year = {2022},
33 | }
34 |
35 | @article{moufad2023skglm,
36 | title={skglm: improving scikit-learn for regularized Generalized Linear Models},
37 | author={Moufad, Badr and Bannier, Pierre-Antoine and Bertrand, Quentin and Klopfenstein, Quentin and Massias, Mathurin},
38 | year={2023}
39 | }
40 | ```
41 |
42 | # Why ``skglm``?
43 |
44 | ``skglm`` is specifically conceived to solve sparse GLMs.
45 | It supports many missing models in ``scikit-learn`` and ensures high performance.
46 | There are several reasons to opt for ``skglm`` among which:
47 |
48 | | | |
49 | | ----- | -------------- |
50 | | **Speed** | Fast solvers able to tackle large datasets, either dense or sparse, with millions of features **up to 100 times faster** than ``scikit-learn``|
51 | | **Modularity** | User-friendly API that enables **composing custom estimators** with any combination of its existing datafits and penalties |
52 | | **Extensibility** | Flexible design that makes it **simple and easy to implement new datafits and penalties**, a matter of few lines of code
53 | | **Compatibility** | Estimators **fully compatible with the ``scikit-learn`` API** and drop-in replacements of its GLM estimators
54 | | | |
55 |
56 |
57 | # Get started with ``skglm``
58 |
59 | ## Installing ``skglm``
60 |
61 | ``skglm`` is available on PyPi. Run the following command to get the latest version of the package
62 |
63 | ```shell
64 | pip install -U skglm
65 | ```
66 |
67 | It is also available on conda-forge and can be installed using, for instance:
68 |
69 | ```shell
70 | conda install -c conda-forge skglm
71 | ```
72 |
73 | ## First steps with ``skglm``
74 |
75 | Once you installed ``skglm``, you can run the following code snippet to fit a MCP Regression model on a toy dataset
76 |
77 | ```python
78 | # import model to fit
79 | from skglm.estimators import MCPRegression
80 | # import util to create a toy dataset
81 | from skglm.utils.data import make_correlated_data
82 |
83 | # generate a toy dataset
84 | X, y, _ = make_correlated_data(n_samples=10, n_features=100)
85 |
86 | # init and fit estimator
87 | estimator = MCPRegression()
88 | estimator.fit(X, y)
89 |
90 | # print R²
91 | print(estimator.score(X, y))
92 | ```
93 | You can refer to the documentation to explore the list of ``skglm``'s already-made estimators.
94 |
95 | Didn't find one that suits you? you can still compose your own.
96 | Here is a code snippet that fits a MCP-regularized problem with Huber loss.
97 |
98 | ```python
99 | # import datafit, penalty and GLM estimator
100 | from skglm.datafits import Huber
101 | from skglm.penalties import MCPenalty
102 | from skglm.estimators import GeneralizedLinearEstimator
103 |
104 | from skglm.utils.data import make_correlated_data
105 | from skglm.solvers import AndersonCD
106 |
107 | X, y, _ = make_correlated_data(n_samples=10, n_features=100)
108 | # create and fit GLM estimator with Huber loss and MCP penalty
109 | estimator = GeneralizedLinearEstimator(
110 | datafit=Huber(delta=1.),
111 | penalty=MCPenalty(alpha=1e-2, gamma=3),
112 | solver=AndersonCD()
113 | )
114 | estimator.fit(X, y)
115 | ```
116 |
117 | You will find detailed description on the supported datafits and penalties and how to combine them in the API section of the documentation.
118 | You can also take our tutorial to learn how to create your own datafit and penalty.
119 |
120 |
121 | # Contribute to ``skglm``
122 |
123 | ``skglm`` is a continuous endeavour that relies on the community efforts to last and evolve. Your contribution is welcome and highly valuable. It can be
124 |
125 | - **bug report**: you may encounter a bug while using ``skglm``. Don't hesitate to report it on the [issue section](https://github.com/scikit-learn-contrib/skglm/issues).
126 | - **feature request**: you may want to extend/add new features to ``skglm``. You can use [the issue section](https://github.com/scikit-learn-contrib/skglm/issues) to make suggestions.
127 | - **pull request**: you may have fixed a bug, added a features, or even fixed a small typo in the documentation, ... you can submit a [pull request](https://github.com/scikit-learn-contrib/skglm/pulls) and we will reach out to you asap.
128 |
129 |
130 |
131 |
132 |
133 | # Useful links
134 |
135 | - link to documentation: https://contrib.scikit-learn.org/skglm/
136 | - link to ``skglm`` arXiv article: https://arxiv.org/pdf/2204.07826.pdf
137 |
--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | PAPER =
8 | BUILDDIR = _build
9 |
10 | GITHUB_PAGES_BRANCH = gh-pages
11 | OUTPUTDIR = _build/html
12 | STABLE_DOC_DIR = stable
13 |
14 | # User-friendly check for sphinx-build
15 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
16 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
17 | endif
18 |
19 | # Internal variables.
20 | PAPEROPT_a4 = -D latex_paper_size=a4
21 | PAPEROPT_letter = -D latex_paper_size=letter
22 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
23 | # the i18n builder cannot share the environment and doctrees with the others
24 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
25 |
26 | .PHONY: help
27 | help:
28 | @echo "Please use \`make ' where is one of"
29 | @echo " html-noplot to make standalone HTML files, without plotting anything"
30 | @echo " html to make standalone HTML files"
31 | @echo " dirhtml to make HTML files named index.html in directories"
32 | @echo " singlehtml to make a single large HTML file"
33 | @echo " pickle to make pickle files"
34 | @echo " htmlhelp to make HTML files and a HTML help project"
35 | @echo " qthelp to make HTML files and a qthelp project"
36 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
37 | @echo " latexpdf to make LaTeX files and run them through pdflatex"
38 | @echo " changes to make an overview of all changed/added/deprecated items"
39 | @echo " linkcheck to check all external links for integrity"
40 | @echo " doctest to run all doctests embedded in the documentation (if enabled)"
41 | @echo " coverage to run coverage check of the documentation (if enabled)"
42 | @echo " install to make the html and push it online"
43 |
44 | .PHONY: clean
45 |
46 | clean:
47 | rm -rf $(BUILDDIR)/*
48 | rm -rf auto_examples/
49 | rm -rf generated/*
50 | rm -rf modules/*
51 |
52 | html-noplot:
53 | $(SPHINXBUILD) -D plot_gallery=0 -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
54 | @echo
55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
56 |
57 | .PHONY: html
58 | html:
59 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
60 | @echo
61 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
62 |
63 | .PHONY: dirhtml
64 | dirhtml:
65 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
66 | @echo
67 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
68 |
69 | .PHONY: singlehtml
70 | singlehtml:
71 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
72 | @echo
73 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
74 |
75 | .PHONY: pickle
76 | pickle:
77 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
78 | @echo
79 | @echo "Build finished; now you can process the pickle files."
80 |
81 | .PHONY: htmlhelp
82 | htmlhelp:
83 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
84 | @echo
85 | @echo "Build finished; now you can run HTML Help Workshop with the" \
86 | ".hhp project file in $(BUILDDIR)/htmlhelp."
87 |
88 | .PHONY: qthelp
89 | qthelp:
90 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
91 | @echo
92 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \
93 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
94 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/skglm.qhcp"
95 | @echo "To view the help file:"
96 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/skglms.qhc"
97 |
98 | .PHONY: latex
99 | latex:
100 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
101 | @echo
102 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
103 | @echo "Run \`make' in that directory to run these through (pdf)latex" \
104 | "(use \`make latexpdf' here to do that automatically)."
105 |
106 | .PHONY: latexpdf
107 | latexpdf:
108 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
109 | @echo "Running LaTeX files through pdflatex..."
110 | $(MAKE) -C $(BUILDDIR)/latex all-pdf
111 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
112 |
113 | .PHONY: changes
114 | changes:
115 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
116 | @echo
117 | @echo "The overview file is in $(BUILDDIR)/changes."
118 |
119 | .PHONY: linkcheck
120 | linkcheck:
121 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
122 | @echo
123 | @echo "Link check complete; look for any errors in the above output " \
124 | "or in $(BUILDDIR)/linkcheck/output.txt."
125 |
126 | .PHONY: doctest
127 | doctest:
128 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
129 | @echo "Testing of doctests in the sources finished, look at the " \
130 | "results in $(BUILDDIR)/doctest/output.txt."
131 |
132 | .PHONY: coverage
133 | coverage:
134 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
135 | @echo "Testing of coverage in the sources finished, look at the " \
136 | "results in $(BUILDDIR)/coverage/python.txt."
137 |
138 | install:
139 | touch $(OUTPUTDIR)/.nojekyll
140 | ghp-import -m "Generate Pelican site [ci skip]" -b $(GITHUB_PAGES_BRANCH) $(OUTPUTDIR)
141 | git push origin $(GITHUB_PAGES_BRANCH)
142 |
143 | .PHONY: add-stable-doc
144 | add-stable-doc:
145 | # switch to GITHUB_PAGES_BRANCH where stable build is located
146 | git fetch origin $(GITHUB_PAGES_BRANCH)
147 | git checkout $(GITHUB_PAGES_BRANCH)
148 | git pull origin $(GITHUB_PAGES_BRANCH)
149 | # move the content of the stable build to the output dir
150 | mv ../$(STABLE_DOC_DIR) $(OUTPUTDIR)
151 | # switch back to main and get to doc directory
152 | git checkout main
153 |
--------------------------------------------------------------------------------
/doc/_static/images/cox-tutorial/A_dot_v.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/cox-tutorial/A_dot_v.png
--------------------------------------------------------------------------------
/doc/_static/images/cox-tutorial/A_transpose_dot_v.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/cox-tutorial/A_transpose_dot_v.png
--------------------------------------------------------------------------------
/doc/_static/images/cox-tutorial/structure_matrix_A.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/cox-tutorial/structure_matrix_A.png
--------------------------------------------------------------------------------
/doc/_static/images/landingpage/compatible.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/compatible.png
--------------------------------------------------------------------------------
/doc/_static/images/landingpage/compatible.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/compatible.webp
--------------------------------------------------------------------------------
/doc/_static/images/landingpage/ease.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/ease.png
--------------------------------------------------------------------------------
/doc/_static/images/landingpage/ease.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/ease.webp
--------------------------------------------------------------------------------
/doc/_static/images/landingpage/energy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/energy.png
--------------------------------------------------------------------------------
/doc/_static/images/landingpage/energy.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/energy.webp
--------------------------------------------------------------------------------
/doc/_static/images/landingpage/finance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/finance.png
--------------------------------------------------------------------------------
/doc/_static/images/landingpage/finance.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/finance.webp
--------------------------------------------------------------------------------
/doc/_static/images/landingpage/healthcare.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/healthcare.png
--------------------------------------------------------------------------------
/doc/_static/images/landingpage/healthcare.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/healthcare.webp
--------------------------------------------------------------------------------
/doc/_static/images/landingpage/hero.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/hero.png
--------------------------------------------------------------------------------
/doc/_static/images/landingpage/hero.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/hero.webp
--------------------------------------------------------------------------------
/doc/_static/images/landingpage/inrialogo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/inrialogo.png
--------------------------------------------------------------------------------
/doc/_static/images/landingpage/inrialogo.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/inrialogo.webp
--------------------------------------------------------------------------------
/doc/_static/images/landingpage/modular.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/modular.png
--------------------------------------------------------------------------------
/doc/_static/images/landingpage/modular.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/modular.webp
--------------------------------------------------------------------------------
/doc/_static/images/landingpage/performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/performance.png
--------------------------------------------------------------------------------
/doc/_static/images/landingpage/performance.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/doc/_static/images/landingpage/performance.webp
--------------------------------------------------------------------------------
/doc/_static/scripts/asciimath-defines.js:
--------------------------------------------------------------------------------
1 | window.MathJax.startup = {
2 | ready: () => {
3 | AM = MathJax.InputJax.AsciiMath.AM;
4 | AM.newsymbol({ input: "ell", tag: "mi", output: "\u2113", tex: "ell", ttype: AM.TOKEN.CONST });
5 | AM.newsymbol({ input: "||", tag: "mi", output: "\u2225", tex: "Vert", ttype: AM.TOKEN.CONST });
6 | AM.newsymbol({ input: "triangleq", tag: "mo", output: "\u225C", tex: "triangleq", ttype: AM.TOKEN.CONST });
7 | MathJax.startup.defaultReady();
8 | }
9 | };
10 |
--------------------------------------------------------------------------------
/doc/_static/scripts/instantpage.min.js:
--------------------------------------------------------------------------------
1 | /*! instant.page v5.1.0 - (C) 2019 Alexandre Dieulot - https://instant.page/license */
2 | document.addEventListener("DOMContentLoaded", function () {
3 | const supportsPrefetch = document.createElement("link").relList.supports("prefetch");
4 | if (!supportsPrefetch) return;
5 | const links = document.querySelectorAll("a[href]");
6 | links.forEach(link => {
7 | link.addEventListener("mouseover", () => {
8 | const prefetch = document.createElement("link");
9 | prefetch.rel = "prefetch";
10 | prefetch.href = link.href;
11 | document.head.appendChild(prefetch);
12 | });
13 | });
14 | });
--------------------------------------------------------------------------------
/doc/_static/scripts/lazyload.js:
--------------------------------------------------------------------------------
1 | document.addEventListener("DOMContentLoaded", function () {
2 | document.querySelectorAll("img").forEach(function (img) {
3 | const src = img.getAttribute("src") || "";
4 | if (
5 | src.includes("logo.svg") ||
6 | img.classList.contains("hero-gallery-img")
7 | ) {
8 | // Don't lazy-load logo or hero image
9 | return;
10 | }
11 | img.setAttribute("loading", "lazy");
12 | });
13 | document.body.classList.add("ready");
14 | });
--------------------------------------------------------------------------------
/doc/_static/switcher.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "version": "dev",
4 | "name": "0.5dev",
5 | "url": "https://contrib.scikit-learn.org/skglm/",
6 | "preferred": true
7 | },
8 | {
9 | "version": "0.3.1",
10 | "name": "v0.3.1",
11 | "url": "https://contrib.scikit-learn.org/skglm/stable/"
12 | }
13 | ]
14 |
--------------------------------------------------------------------------------
/doc/_templates/layout.html:
--------------------------------------------------------------------------------
1 | {% extends "!layout.html" %}
2 |
3 | {% block extrahead %}
4 | {{ super() }}
5 |
6 |
7 | {% endblock %}
--------------------------------------------------------------------------------
/doc/_templates/sidebar/version_toggler.html:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/doc/api.rst:
--------------------------------------------------------------------------------
1 | .. _api:
2 |
3 | .. meta::
4 | :description: Browse the skglm API documentation covering estimators (Lasso, ElasticNet, Cox), penalties (L1, SCAD, MCP), datafits (Logistic, Poisson), and optimized solvers.
5 |
6 | =================
7 | API
8 | =================
9 |
10 | .. currentmodule:: skglm
11 |
12 | Estimators
13 | ==========
14 |
15 | .. currentmodule:: skglm
16 |
17 | .. autosummary::
18 | :toctree: generated/
19 |
20 | GeneralizedLinearEstimator
21 | CoxEstimator
22 | ElasticNet
23 | GroupLasso
24 | Lasso
25 | LinearSVC
26 | SparseLogisticRegression
27 | MCPRegression
28 | MultiTaskLasso
29 | WeightedLasso
30 |
31 |
32 | Penalties
33 | =========
34 |
35 |
36 | .. currentmodule:: skglm.penalties
37 |
38 | .. autosummary::
39 | :toctree: generated/
40 |
41 | IndicatorBox
42 | L0_5
43 | L1
44 | L1_plus_L2
45 | L2
46 | L2_3
47 | LogSumPenalty
48 | MCPenalty
49 | PositiveConstraint
50 | WeightedL1
51 | WeightedGroupL2
52 | WeightedMCPenalty
53 | SCAD
54 | BlockSCAD
55 | SLOPE
56 |
57 |
58 | Datafits
59 | ========
60 |
61 | .. currentmodule:: skglm.datafits
62 |
63 | .. autosummary::
64 | :toctree: generated/
65 |
66 | Cox
67 | Gamma
68 | Huber
69 | Logistic
70 | LogisticGroup
71 | Poisson
72 | Quadratic
73 | QuadraticGroup
74 | QuadraticHessian
75 | QuadraticSVC
76 | WeightedQuadratic
77 |
78 |
79 | Solvers
80 | =======
81 |
82 | .. currentmodule:: skglm.solvers
83 |
84 | .. autosummary::
85 | :toctree: generated/
86 |
87 | AndersonCD
88 | FISTA
89 | GramCD
90 | GroupBCD
91 | GroupProxNewton
92 | LBFGS
93 | MultiTaskBCD
94 | ProxNewton
95 |
96 |
97 | Experimental
98 | ============
99 |
100 | .. currentmodule:: skglm.experimental
101 |
102 | .. autosummary::
103 | :toctree: generated/
104 |
105 | IterativeReweightedL1
106 | PDCD_WS
107 | Pinball
108 | SqrtQuadratic
109 | SqrtLasso
110 |
--------------------------------------------------------------------------------
/doc/changes/0.1.rst:
--------------------------------------------------------------------------------
1 | .. _changes_0_1:
2 |
3 | Version 0.1 (2022/04/25)
4 | ------------------------
5 |
6 | - Initial release (initial commit: 2022/04/20)
7 |
--------------------------------------------------------------------------------
/doc/changes/0.2.rst:
--------------------------------------------------------------------------------
1 | .. _changes_0_2:
2 |
3 | Version 0.2 (2022/10/18)
4 | ------------------------
5 |
6 | - Experimental :ref:`Square root Lasso ` class with ProxNewton or Chambolle-Pock solver (PR :gh:`57`)
7 |
8 | - Accelerated block coordinate descent solver :ref:`GroupBCD ` with working sets for problems with group penalties (PR :gh:`29`, :gh:`28`, and :gh:`26`)
9 |
10 | - Proximal Newton solver :ref:`ProxNewton ` with working sets for problems with non-quadratic datafits (PR :gh:`51`)
11 |
12 | - :ref:`Huber ` datafit (PR :gh:`14`)
13 |
14 | - Added :ref:`SCAD ` and :ref:`BlockSCAD ` penalties (PR :gh:`12`)
15 |
16 | - Fitting intercept for :ref:`Quadratic `, :ref:`Logistic ` and :ref:`Huber ` datafits (PR :gh:`55`)
17 |
18 | - Added :ref:`GramCD `, a Gram-based coordinate descent solver for problems with ``n_features`` :math:`\ll` ``n_samples`` (PR :gh:`59`)
19 |
20 | - Transform solvers from functions to classes (PR :gh:`63`)
21 |
--------------------------------------------------------------------------------
/doc/changes/0.3.rst:
--------------------------------------------------------------------------------
1 | .. _changes_0_3:
2 |
3 | Version 0.3 (2023/07/01)
4 | ------------------------
5 |
6 | - Add :ref:`Cox Estimator ` with support of L1, L2, and Elastic regularization (PR: :gh:`171`)
7 | - Reduce time overhead when fitting :ref:`Lasso Estimator ` (PR: :gh:`129`)
8 | - Add :ref:`Cox ` datafit for survival analysis (PR :gh:`180`, :gh:`168`, :gh:`159`, :gh:`157`)
9 | - Experimental :ref:`Pinball ` datafit (PR: :gh:`134`)
10 | - Add :ref:`Gamma ` datafit (PR: :gh:`113`)
11 | - Add Positivity constraint to :ref:`L1 `, :ref:`L1_plus_L2 `, :ref:`WeightedL1 ` (PR: :gh:`110`)
12 | - Add :ref:`PositiveConstraint ` (PR: :gh:`126`)
13 | - Add :ref:`L-BFGS ` solver for problems with smooth datafits and penalties (PR: :gh:`165`, :gh:`173`)
14 | - Experimental :ref:`Primal-dual coordinate descent solve ` for problems with non-smooth datafits (PR: :gh:`131`)
15 | - Add support of ``float32`` in :ref:`ProxNewton `
16 |
--------------------------------------------------------------------------------
/doc/changes/0.4.rst:
--------------------------------------------------------------------------------
1 | .. _changes_0_4:
2 |
3 | Version 0.4 (2025/04/08)
4 | -------------------------
5 | - Add :ref:`GroupLasso Estimator ` (PR: :gh:`228`)
6 | - Add support and tutorial for positive coefficients to :ref:`Group Lasso Penalty ` (PR: :gh:`221`)
7 | - Check compatibility with datafit and penalty in solver (PR :gh:`137`)
8 | - Add support to weight samples in the quadratic datafit :ref:`Weighted Quadratic Datafit ` (PR: :gh:`258`)
9 | - Add support for ElasticNet regularization (`penalty="l1_plus_l2"`) to :ref:`SparseLogisticRegression ` (PR: :gh:`244`)
10 |
11 | Version 0.3.1 (2023/12/21)
12 | --------------------------
13 | - Add support for weights and positive coefficients to :ref:`MCPRegression Estimator ` (PR: :gh:`184`)
14 | - Move solver specific computations from ``Datafit.initialize()`` to separate ``Datafit`` methods to ease ``Solver`` - ``Datafit`` compatibility check (PR: :gh:`192`)
15 | - Add :ref:`LogSumPenalty ` (PR: :gh:`#127`)
16 | - Remove abstract methods in ``BaseDatafit`` and ``BasePenalty`` to make solver/penalty/datafit compatibility check easier (PR :gh:`#205`)
17 | - Add fixed-point distance to build working sets in :ref:`ProxNewton ` solver (:gh:`138`)
18 |
--------------------------------------------------------------------------------
/doc/changes/0.5.rst:
--------------------------------------------------------------------------------
1 | .. _changes_0_5:
2 |
3 | Version 0.5 (in progress)
4 | -------------------------
5 | - Add support for fitting an intercept in :ref:`SqrtLasso ` (PR: :gh:`298`)
6 |
--------------------------------------------------------------------------------
/doc/changes/whats_new.rst:
--------------------------------------------------------------------------------
1 | .. _whats_new:
2 |
3 | What's new
4 | ==========
5 |
6 | .. currentmodule:: skglm
7 |
8 | .. include:: 0.5.rst
9 |
10 | .. include:: 0.4.rst
11 |
12 | .. include:: 0.3.rst
13 |
14 | .. include:: 0.2.rst
15 |
16 | .. include:: 0.1.rst
17 |
--------------------------------------------------------------------------------
/doc/contribute.rst:
--------------------------------------------------------------------------------
1 | .. _contribute:
2 |
3 | .. meta::
4 | :description: Contribute to skglm by reporting bugs, suggesting features, or submitting pull requests. Join us in making skglm even better!
5 | :og:title: Contribute to skglm
6 |
7 | Contribute
8 | =======================
9 |
10 | ``skglm`` is a continuous endeavour that relies on community efforts to last and evolve.
11 | Your contribution is welcome and highly valuable. You can help with
12 |
13 | **bug report**
14 | ``skglm`` runs unit tests on the codebase to prevent bugs.
15 | Help us tighten these tests by reporting any bug that you encounter.
16 | To do so, use the `issue section `_.
17 |
18 | **feature request**
19 | We are constantly improving ``skglm`` and we would like to align that with the user needs.
20 | We highly appreciate any suggestion to extend or add new features to ``skglm``.
21 | You can use the `the issue section `_ to make suggestions.
22 |
23 | **pull request**
24 | You may have fixed a bug, added a feature, or even fixed a small typo in the documentation...
25 | You can submit a `pull request `_
26 | to integrate your changes and we will reach out to you shortly.
27 | If this is your first pull request, you can refer to `this scikit-learn guide `_.
28 |
29 | As part of the `scikit-learn-contrib `_ GitHub organization, we adopt the scikit-learn `code of conduct `_.
30 |
31 | .. note::
32 |
33 | If you are willing to contribute with code to ``skglm``, check the section below to learn how to install the development version.
34 |
35 |
36 |
37 | Setup ``skglm`` on your local machine
38 | ---------------------------------------
39 |
40 | Here are the key steps to help you setup ``skglm`` on your machine in case you want to
41 | contribute with code or documentation.
42 |
43 | 1. `Fork the repository `_ and run the following command to clone it on your local machine, make sure to replace ``{YOUR_GITHUB_USERNAME}`` with your GitHub username
44 |
45 | .. code-block:: shell
46 |
47 | $ git clone https://github.com/{YOUR_GITHUB_USERNAME}/skglm
48 |
49 |
50 | 2. ``cd`` to ``skglm`` directory and install it in edit mode by running
51 |
52 | .. code-block:: shell
53 |
54 | $ cd skglm
55 | $ pip install -e .
56 |
57 |
58 | 3. To build the documentation locally, run
59 |
60 | .. tab-set::
61 |
62 | .. tab-item:: with plots in the example gallery
63 |
64 | .. code-block:: shell
65 |
66 | $ cd doc
67 | $ pip install .[doc]
68 | $ make html
69 |
70 | .. tab-item:: without plots in the example gallery
71 |
72 | .. code-block:: shell
73 |
74 | $ cd doc
75 | $ pip install .[doc]
76 | $ make html-noplot
77 |
--------------------------------------------------------------------------------
/doc/getting_started.rst:
--------------------------------------------------------------------------------
1 | .. _getting_started:
2 |
3 | .. meta::
4 | :description: Learn how to fit Lasso and custom GLM estimators with skglm, a modular Python library compatible with scikit-learn. Includes examples and code snippets.
5 |
6 | ===============
7 | Start
8 | ===============
9 | ---------------
10 |
11 | This page provides a starter example to get familiar with ``skglm`` and explore some of its features.
12 |
13 | In the first section, we fit a Lasso estimator on a high dimensional
14 | toy dataset (number of features is largely greater than the number of samples). Linear models don't generalize well
15 | for unseen dataset. By adding a penalty, :math:`\ell_1` penalty, we can train estimator that overcome this drawback.
16 |
17 | The last section, we explore other combinations of datafit and penalty to create a custom estimator that achieves a lower prediction error,
18 | in the sequel :math:`\ell_1` Huber regression. We show that ``skglm`` is perfectly adapted to these experiments thanks to its modular design.
19 |
20 | Beforehand, make sure that you have already installed ``skglm``
21 |
22 | .. code-block:: shell
23 |
24 | # Installing from PyPI using pip
25 | pip install -U skglm
26 |
27 | # Installing from conda-forge using conda
28 | conda install -c conda-forge skglm
29 |
30 | -------------------------
31 |
32 |
33 | Fitting a Lasso estimator
34 | -------------------------
35 |
36 | Let's start first by generating a toy dataset and splitting it to train and test sets.
37 | For that, we will use ``scikit-learn``
38 | `make_regression `_
39 |
40 | .. code-block:: python
41 |
42 | # imports
43 | from sklearn.datasets import make_regression
44 | from sklearn.model_selection import train_test_split
45 |
46 | # generate toy data
47 | X, y = make_regression(n_samples=100, n_features=1000)
48 |
49 | # split data
50 | X_train, X_test, y_train, y_test = train_test_split(X, y)
51 |
52 | Then let's fit ``skglm`` :ref:`Lasso ` estimator and prints its score on the test set.
53 |
54 | .. code-block:: python
55 |
56 | # import estimator
57 | from skglm import Lasso
58 |
59 | # init and fit
60 | estimator = Lasso()
61 | estimator.fit(X_train, y_train)
62 |
63 | # compute R²
64 | estimator.score(X_test, y_test)
65 |
66 |
67 | .. note::
68 |
69 | - The first fit after importing ``skglm`` has an overhead as ``skglm`` uses `Numba `_
70 | The subsequent fits will achieve top speed since Numba compilation is cached.
71 |
72 | ``skglm`` has several other ``scikit-learn`` compatible estimators.
73 | Check the :ref:`API ` for more information about the available estimators.
74 |
75 |
76 | Fitting :math:`\ell_1` Huber regression
77 | ---------------------------------------
78 |
79 | Suppose that the latter dataset contains outliers and we would like to mitigate their effects on the learned coefficients
80 | while having an estimator that generalizes well to unseen data. Ideally, we would like to fit a :math:`\ell_1` Huber regressor.
81 |
82 | ``skglm`` offers high flexibility to compose custom estimators. Through a simple API, it is possible to combine any
83 | ``skglm`` :ref:`datafit ` and :ref:`penalty `.
84 |
85 | .. note::
86 |
87 | - :math:`\ell_1` regularization is not supported in ``scikit-learn`` for HuberRegressor
88 |
89 | Let's explore how to achieve that.
90 |
91 |
92 | Generate corrupt data
93 | *********************
94 |
95 | We will use the same script as before except that we will take 10 samples and corrupt their values.
96 |
97 | .. code-block:: python
98 |
99 | # imports
100 | import numpy as np
101 | from sklearn.datasets import make_regression
102 | from sklearn.model_selection import train_test_split
103 |
104 | # generate toy data
105 | X, y = make_regression(n_samples=100, n_features=1000)
106 |
107 | # select and corrupt 10 random samples
108 | y[np.random.choice(n_samples, 10)] = 100 * y.max()
109 |
110 | # split data
111 | X_train, X_test, y_train, y_test = train_test_split(X, y)
112 |
113 |
114 | Now let's compose a custom estimator using :ref:`GeneralizedLinearEstimator `.
115 | It's the go-to way to create custom estimator by combining a datafit and a penalty.
116 |
117 | .. code-block:: python
118 |
119 | # import penalty and datafit
120 | from skglm.penalties import L1
121 | from skglm.datafits import Huber
122 |
123 | # import GLM estimator
124 | from skglm import GeneralizedLinearEstimator
125 |
126 | # build and fit estimator
127 | estimator = GeneralizedLinearEstimator(
128 | Huber(1.),
129 | L1(alpha=1.)
130 | )
131 | estimator.fit(X_train, y_train)
132 |
133 |
134 | .. note::
135 |
136 | - Here the arguments given to the datafit and penalty are arbitrary and given just for sake of illustration.
137 |
138 | ``GeneralizedLinearEstimator`` allows to combine any penalties and datafits implemented in ``skglm``.
139 | If you don't find an estimator in the ``estimators`` module, you can build it by combining the appropriate datafit and penalty
140 | and pass it to ``GeneralizedLinearEstimator``. Explore the list of supported :ref:`datafits ` and :ref:`penalties `.
141 |
142 | .. important::
143 |
144 | - It is possible to create your own datafit and penalties. Check the tutorials on :ref:`how to add a custom datafit `
145 | and :ref:`how to add a custom penalty `.
146 |
147 |
148 | Explore further advanced topics and get hands-on examples on the :ref:`tutorials page `
--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
1 |
2 | .. skglm documentation master file, created by
3 | sphinx-quickstart on Mon May 23 16:22:52 2016.
4 | You can adapt this file completely to your liking, but it should at least
5 | contain the root `toctree` directive.
6 |
7 | .. meta::
8 | :og:title: skglm: Fast, Scalable & Flexible Regularized GLMs and Sparse Modeling for Python
9 | :description: skglm is the fastest, most modular Python library for regularized GLMs—fully scikit-learn compatible for advanced statistical modeling.
10 | :og:image: _static/images/logo.svg
11 | :og:url: https://contrib.scikit-learn.org/skglm/
12 | :keywords: Generalized Linear Models, GLM, scikit-learn, Lasso, ElasticNet, Cox, modular, efficient, regularized
13 | :page-layout: full
14 |
15 | skglm
16 | ======
17 | .. container:: hero-container
18 |
19 | .. container:: hero-text
20 |
21 | .. rubric:: skglm
22 | :class: hero-title
23 |
24 |
25 | .. container:: hero-description
26 |
27 | .. raw:: html
28 |
29 | The fastest and most modular Python package for regularized Generalized Linear Models — designed for researchers and engineers who demand speed, structure, and scikit-learn compatibility.
30 |
31 | .. container:: hero-buttons
32 |
33 | `Get Started `_
34 |
35 | .. container:: hero-gallery
36 |
37 | .. image:: _static/images/landingpage/hero.webp
38 | :alt: Illustration showing modeling in skglm logo style
39 | :class: hero-gallery-img
40 | :target: auto_examples/index.html
41 |
42 |
43 | .. container:: section-spacer
44 |
45 | .. container:: section-intro
46 |
47 | .. rubric:: Simple. Modular. Powerful.
48 | :class: section-title
49 |
50 | .. container:: section-subtitle
51 |
52 | Everything you need to build fast, flexible, and scalable GLMs — in one modular library.
53 |
54 | .. container:: features-grid
55 |
56 | .. container:: feature-box
57 |
58 | .. image:: _static/images/landingpage/ease.webp
59 | :alt: Ease icon
60 | :class: feature-icon
61 |
62 | .. container:: feature-text
63 |
64 | .. rubric:: Easy to Use
65 | :class: feature-title
66 |
67 | Get started in minutes with an intuitive API, comprehensive examples, and out-of-the-box estimators.
68 |
69 | .. container:: feature-box
70 |
71 | .. image:: _static/images/landingpage/modular.webp
72 | :alt: Modular icon
73 | :class: feature-icon
74 |
75 | .. container:: feature-text
76 |
77 | .. rubric:: Modular Design
78 | :class: feature-title
79 |
80 | Compose custom estimators from interchangeable datafits and penalties tailored to your use case.
81 |
82 | .. container:: feature-box
83 |
84 | .. image:: _static/images/landingpage/performance.webp
85 | :alt: Performance icon
86 | :class: feature-icon
87 |
88 | .. container:: feature-text
89 |
90 | .. rubric:: Speed
91 | :class: feature-title
92 |
93 | Solve large-scale problems with lightning-fast solvers — up to 100× faster than ``scikit-learn``.
94 |
95 | .. container:: feature-box
96 |
97 | .. image:: _static/images/landingpage/compatible.webp
98 | :alt: Compatibility icon
99 | :class: feature-icon
100 |
101 | .. container:: feature-text
102 |
103 | .. rubric:: Plug & Extend
104 | :class: feature-title
105 |
106 | Fully scikit-learn compatible and ready for custom research and production workflows.
107 |
108 | .. container:: section-spacer
109 |
110 | .. container:: section-intro
111 |
112 | .. rubric:: Support Us
113 | :class: section-title
114 |
115 | .. container:: support-box
116 |
117 | .. rubric:: Citation
118 | :class: support-title
119 | Using ``skglm`` in your work? You are free to use it. It is licensed under
120 | `BSD 3-Clause `_.
121 | As the result of perseverant academic research, the best way to support its development is by citing it.
122 | ::
123 | @inproceedings{skglm,
124 | title = {Beyond L1: Faster and better sparse models with skglm},
125 | author = {Q. Bertrand and Q. Klopfenstein and P.-A. Bannier
126 | and G. Gidel and M. Massias},
127 | booktitle = {NeurIPS},
128 | year = {2022},
129 | }
130 |
131 | @article{moufad2023skglm,
132 | title = {skglm: improving scikit-learn for regularized Generalized Linear Models},
133 | author = {Moufad, Badr and Bannier, Pierre-Antoine and Bertrand, Quentin
134 | and Klopfenstein, Quentin and Massias, Mathurin},
135 | year = {2023}
136 | }
137 |
138 | .. container:: support-box
139 |
140 | .. rubric:: Contributions
141 | :class: support-title
142 | Contributions, improvements, and bug reports are always welcome. Help us make ``skglm`` better!
143 |
144 | .. container:: hero-buttons
145 |
146 | `How to Contribute `_
147 |
148 | .. container:: section-spacer
149 |
150 | .. container:: section-intro
151 |
152 | .. rubric:: Real-World Applications
153 | :class: section-title
154 |
155 | .. container:: section-subtitle
156 |
157 | ``skglm`` drives impactful solutions across diverse sectors with its fast, modular approach to regularized GLMs and sparse modeling.
158 | Find various advanced topics in our `Tutorials `_ and `Examples `_ sections.
159 |
160 | .. container:: applications-grid
161 |
162 | .. container:: application-box
163 |
164 | .. image:: _static/images/landingpage/healthcare.webp
165 | :alt: Healthcare icon
166 | :class: application-icon
167 |
168 | .. container:: application-text
169 |
170 | .. rubric:: Healthcare
171 | :class: application-title
172 |
173 | Enhance clinical trial analytics and early biomarker discovery by efficiently analyzing high-dimensional biological data and features like cox regression modeling.
174 |
175 | .. container:: application-box
176 |
177 | .. image:: _static/images/landingpage/finance.webp
178 | :alt: Finance icon
179 | :class: application-icon
180 |
181 | .. container:: application-text
182 |
183 | .. rubric:: Finance
184 | :class: application-title
185 |
186 | Conduct transparent and interpretable risk modeling with scalable, robust sparse regression across vast datasets.
187 |
188 | .. container:: application-box
189 |
190 | .. image:: _static/images/landingpage/energy.webp
191 | :alt: Energy icon
192 | :class: application-icon
193 |
194 | .. container:: application-text
195 |
196 | .. rubric:: Energy
197 | :class: application-title
198 |
199 | Optimize real-time electricity forecasting and load analysis by processing large time-series datasets for predictive maintenance and anomaly detection.
200 |
201 | .. container:: sponsor-banner
202 |
203 | .. container:: sponsor-inline
204 |
205 |
206 | This project is made possible thanks to the support of
207 |
208 | .. image:: _static/images/landingpage/inrialogo.webp
209 | :alt: Inria logo
210 | :class: sponsor-logo
211 | :target: https://www.inria.fr/en
212 |
213 |
214 | .. it is mandatory to keep the toctree here although it doesn't show up in the page
215 | .. when adding/modifying pages, don't forget to update the toctree
216 |
217 | .. toctree::
218 | :maxdepth: 1
219 | :hidden:
220 | :includehidden:
221 |
222 | getting_started.rst
223 | tutorials/tutorials.rst
224 | auto_examples/index.rst
225 | api.rst
226 | contribute.rst
227 | changes/whats_new.rst
228 |
--------------------------------------------------------------------------------
/doc/robots.txt:
--------------------------------------------------------------------------------
1 | User-agent: *
2 | Sitemap: https://contrib.scikit-learn.org/skglm/sitemap.xml
--------------------------------------------------------------------------------
/doc/sphinxext/gh_substitutions.py:
--------------------------------------------------------------------------------
1 | """Provide a convenient way to link to GitHub issues and pull requests.
2 |
3 | Adapted from:
4 | https://doughellmann.com/blog/2010/05/09/defining-custom-roles-in-sphinx/
5 | """
6 | from docutils.nodes import reference
7 | from docutils.parsers.rst.roles import set_classes
8 |
9 |
10 | def gh_role(name, rawtext, pr_number, lineno, inliner, options={}, content=[]):
11 | """Link to a GitHub pull request."""
12 | ref = f'https://github.com/scikit-learn-contrib/skglm/pull/{pr_number}'
13 | set_classes(options)
14 | node = reference(rawtext, '#' + pr_number, refuri=ref, **options)
15 | return [node], []
16 |
17 |
18 | def setup(app):
19 | """Do setup."""
20 | app.add_role('gh', gh_role)
21 | return
22 |
--------------------------------------------------------------------------------
/doc/sphinxext/github_link.py:
--------------------------------------------------------------------------------
1 | # this code is a copy/paste of
2 | # https://github.com/scikit-learn/scikit-learn/blob/
3 | # b0b8a39d8bb80611398e4c57895420d5cb1dfe09/doc/sphinxext/github_link.py
4 |
5 | from operator import attrgetter
6 | import inspect
7 | import subprocess
8 | import os
9 | import sys
10 | from functools import partial
11 |
12 | REVISION_CMD = "git rev-parse --short HEAD"
13 |
14 |
15 | def _get_git_revision():
16 | try:
17 | revision = subprocess.check_output(REVISION_CMD.split()).strip()
18 | except (subprocess.CalledProcessError, OSError):
19 | print("Failed to execute git to get revision")
20 | return None
21 | return revision.decode("utf-8")
22 |
23 |
24 | def _linkcode_resolve(domain, info, package, url_fmt, revision):
25 | """Determine a link to online source for a class/method/function
26 | This is called by sphinx.ext.linkcode
27 | An example with a long-untouched module that everyone has
28 | >>> _linkcode_resolve('py', {'module': 'tty',
29 | ... 'fullname': 'setraw'},
30 | ... package='tty',
31 | ... url_fmt='http://hg.python.org/cpython/file/'
32 | ... '{revision}/Lib/{package}/{path}#L{lineno}',
33 | ... revision='xxxx')
34 | 'http://hg.python.org/cpython/file/xxxx/Lib/tty/tty.py#L18'
35 | """
36 |
37 | if revision is None:
38 | return
39 | if domain not in ("py", "pyx"):
40 | return
41 | if not info.get("module") or not info.get("fullname"):
42 | return
43 |
44 | class_name = info["fullname"].split(".")[0]
45 | module = __import__(info["module"], fromlist=[class_name])
46 | obj = attrgetter(info["fullname"])(module)
47 |
48 | # Unwrap the object to get the correct source
49 | # file in case that is wrapped by a decorator
50 | obj = inspect.unwrap(obj)
51 |
52 | try:
53 | fn = inspect.getsourcefile(obj)
54 | except Exception:
55 | fn = None
56 | if not fn:
57 | try:
58 | fn = inspect.getsourcefile(sys.modules[obj.__module__])
59 | except Exception:
60 | fn = None
61 | if not fn:
62 | return
63 |
64 | fn = os.path.relpath(fn, start=os.path.dirname(__import__(package).__file__))
65 | try:
66 | lineno = inspect.getsourcelines(obj)[1]
67 | except Exception:
68 | lineno = ""
69 | return url_fmt.format(revision=revision, package=package, path=fn, lineno=lineno)
70 |
71 |
72 | def make_linkcode_resolve(package, url_fmt):
73 | """Returns a linkcode_resolve function for the given URL format
74 | revision is a git commit reference (hash or name)
75 | package is the name of the root module of the package
76 | url_fmt is along the lines of ('https://github.com/USER/PROJECT/'
77 | 'blob/{revision}/{package}/'
78 | '{path}#L{lineno}')
79 | """
80 | revision = _get_git_revision()
81 | return partial(
82 | _linkcode_resolve, revision=revision, package=package, url_fmt=url_fmt
83 | )
84 |
--------------------------------------------------------------------------------
/doc/tutorials/add_datafit.rst:
--------------------------------------------------------------------------------
1 | .. _how_to_add_custom_datafit:
2 |
3 | .. meta::
4 | :description: Tutorial on creating and implementing a custom datafit in skglm. Step-by-step guide includes deriving gradients, Hessians, and an example with Poisson datafit.
5 |
6 | How to Add a Custom Datafit
7 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
8 |
9 | Motivated by generalized linear models but not limited to it, ``skglm`` solves problems of the form
10 |
11 | .. math::
12 | \hat{\beta} \in
13 | \arg\min_{\beta \in \mathbb{R}^p}
14 | F(X\beta) + \Omega(\beta)
15 | := \sum_{i=1}^n f_i([X\beta]_i) + \sum_{j=1}^p \Omega_j(\beta_j)
16 | \ .
17 |
18 |
19 | Here, :math:`X \in \mathbb{R}^{n \times p}` denotes the design matrix with :math:`n` samples and :math:`p` features,
20 | and :math:`\beta \in \mathbb{R}^p` is the coefficient vector.
21 |
22 | skglm can solve any problems of this form with arbitrary smooth datafit :math:`F` and arbitrary penalty :math:`\Omega` whose proximal operator can be evaluated explicitly, by defining two classes: a ``Penalty`` and a ``Datafit``.
23 |
24 | They can then be passed to a :class:`~skglm.GeneralizedLinearEstimator`.
25 |
26 | .. code-block:: python
27 |
28 | clf = GeneralizedLinearEstimator(
29 | MyDatafit(),
30 | MyPenalty(),
31 | )
32 |
33 |
34 | A ``Datafit`` is a jitclass that must inherit from the ``BaseDatafit`` class:
35 |
36 | .. literalinclude:: ../../skglm/datafits/base.py
37 | :pyobject: BaseDatafit
38 |
39 |
40 | To define a custom datafit, you need to inherit from ``BaseDatafit`` class and implement methods required by the targeted solver.
41 | These methods can be found in the solver documentation.
42 | Optionally, overloading the methods with the suffix ``_sparse`` adds support for sparse datasets (CSC matrix).
43 |
44 | This tutorial shows how to implement :ref:`Poisson ` datafit to be fitted with :ref:`ProxNewton ` solver.
45 |
46 |
47 | A case in point: defining Poisson datafit
48 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
49 |
50 | First, this requires deriving some quantities used by the solvers like the gradient or the Hessian matrix of the datafit.
51 | With :math:`y \in \mathbb{R}^n` the target vector, the Poisson datafit reads
52 |
53 | .. math::
54 | f(X\beta) = \frac{1}{n}\sum_{i=1}^n \exp([X\beta]_i) - y_i[X\beta]_i
55 | \ .
56 |
57 |
58 | Let's define some useful quantities to simplify our computations. For :math:`z \in \mathbb{R}^n` and :math:`\beta \in \mathbb{R}^p`,
59 |
60 | .. math::
61 | f(z) = \sum_{i=1}^n f_i(z_i) \qquad F(\beta) = f(X\beta)
62 | \ .
63 |
64 |
65 | Computing the gradient of :math:`F` and its Hessian matrix yields
66 |
67 | .. math::
68 | \nabla F(\beta) = X^{\top} \underbrace{\nabla f(X\beta)}_"raw grad" \qquad \nabla^2 F(\beta) = X^{\top} \underbrace{\nabla^2 f(X\beta)}_"raw hessian" X
69 | \ .
70 |
71 |
72 | Besides, it directly follows that
73 |
74 | .. math::
75 | \nabla f(z) = (f_i^'(z_i))_{1 \leq i \leq n} \qquad \nabla^2 f(z) = "diag"(f_i^('')(z_i))_{1 \leq i \leq n}
76 | \ .
77 |
78 |
79 | We can now apply these definitions to the Poisson datafit:
80 |
81 | .. math::
82 | f_i(z_i) = \frac{1}{n} \left(\exp(z_i) - y_iz_i\right)
83 | \ .
84 |
85 |
86 | Therefore,
87 |
88 | .. math::
89 | f_i^('')(z_i) = \frac{1}{n}(\exp(z_i) - y_i) \qquad f^''_i(z_i) = \frac{1}{n}\exp(z_i)
90 | \ .
91 |
92 |
93 | Computing ``raw_grad`` and ``raw_hessian`` for the Poisson datafit yields
94 |
95 | .. math::
96 | \nabla f(X\beta) = \frac{1}{n}(\exp([X\beta]_i) - y_i)_{1 \leq i \leq n} \qquad \nabla^2 f(X\beta) = \frac{1}{n}"diag"(\exp([X\beta]_i))_{1 \leq i \leq n}
97 | \ .
98 |
99 |
100 | Both ``raw_grad`` and ``raw_hessian`` are methods used by the ``ProxNewton`` solver.
101 | But other optimizers require different methods to be implemented. For instance, ``AndersonCD`` uses the ``gradient_scalar`` method:
102 | it is the derivative of the datafit with respect to the :math:`j`-th coordinate of :math:`\beta`.
103 |
104 | For the Poisson datafit, this yields
105 |
106 | .. math::
107 | \frac{\partial F(\beta)}{\partial \beta_j} = \frac{1}{n}
108 | \sum_{i=1}^n X_{i,j} \left(
109 | \exp([X\beta]_i) - y
110 | \right)
111 | \ .
112 |
113 |
114 | When implementing these quantities in the ``Poisson`` datafit class, this gives:
115 |
116 | .. literalinclude:: ../../skglm/datafits/single_task.py
117 | :pyobject: Poisson
118 |
119 |
120 | Note that we have not initialized any quantities in the ``initialize`` method.
121 | Usually, it serves to compute datafit attributes specific to a dataset ``X, y`` for computational efficiency, for example the computation of ``X.T @ y`` in :ref:`Quadratic ` datafit.
122 |
--------------------------------------------------------------------------------
/doc/tutorials/add_penalty.rst:
--------------------------------------------------------------------------------
1 | :orphan:
2 |
3 | .. _how_to_add_custom_penalty:
4 |
5 | .. meta::
6 | :description: Step-by-step tutorial on adding custom penalties in skglm. Covers implementation details, proximal operators, and optimality conditions using the L1 penalty.
7 |
8 | How to Add a Custom Penalty
9 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
10 |
11 | skglm supports any arbitrary proximable penalty.
12 |
13 |
14 | It is implemented as a jitclass which must inherit from the ``BasePenalty`` class:
15 |
16 | .. literalinclude:: ../../skglm/penalties/base.py
17 | :pyobject: BasePenalty
18 |
19 | To implement your own penalty, you only need to define a new jitclass, inheriting from ``BasePenalty`` and implement the methods required by the targeted solver.
20 | Theses methods can be found in the solver documentation.
21 |
22 |
23 | A case in point: defining L1 penalty
24 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
25 |
26 | We detail how the :math:`\ell_1` penalty is implemented in skglm.
27 | For a vector :math:`\beta \in \mathbb{R}^p`, the :math:`\ell_1` penalty is defined as follows:
28 |
29 | .. math::
30 | || \beta ||_1 = \sum_{i=1}^p |\beta _i| \ .
31 |
32 |
33 | The regularization level is controlled by the hyperparameter :math:`\lambda \in bb(R)^+`, that is defined and initialized in the constructor of the class.
34 |
35 | The method ``get_spec`` allows to strongly type the attributes of the penalty object, thus allowing Numba to JIT-compile the class.
36 | It should return an iterable of tuples, the first element being the name of the attribute, the second its Numba type (e.g. ``float64``, ``bool_``).
37 | Additionally, a penalty should implement ``params_to_dict``, a helper method to get all the parameters of a penalty returned in a dictionary.
38 |
39 | To optimize an objective with a given penalty, skglm needs at least the proximal operator of the penalty applied to the :math:`j`-th coordinate.
40 | For the ``L1`` penalty, it is the well-known soft-thresholding operator:
41 |
42 | .. math::
43 | "ST"(\beta , \lambda) = "max"(0, |\beta| - \lambda) "sgn"(\beta)\ .
44 |
45 |
46 | Note that skglm expects the threshold level to be the regularization hyperparameter :math:`\lambda \in \mathbb{R}^+` **scaled by** the stepsize.
47 |
48 |
49 | Besides, by default all solvers in skglm have ``ws_strategy`` turned on to ``subdiff``.
50 | This means that the optimality conditions (thus the stopping criterion) is computed using the method ``subdiff_distance`` of the penalty.
51 | If not implemented, the user should set ``ws_strategy`` to ``fixpoint``.
52 |
53 | For the :math:`\ell_1` penalty, the distance of the negative gradient of the datafit :math:`F` to the subdifferential of the penalty reads
54 |
55 | .. math::
56 | "dist"(-\nabla_j F(\beta), \partial |\beta_j|) =
57 | {("max"(0, | -\nabla_j F(\beta) | - \lambda),),
58 | (| -\nabla_j F(\beta) - \lambda "sgn"(\beta_j) |,):}
59 | \ .
60 |
61 |
62 | The method ``is_penalized`` returns a binary mask with the penalized features.
63 | For the :math:`\ell_1` penalty, all the coefficients are penalized.
64 | Finally, ``generalized_support`` returns the generalized support of the penalty for some coefficient vector ``w``.
65 | It is typically the non-zero coefficients of the solution vector for :math:`\ell_1`.
66 |
67 |
68 | Optionally, a penalty might implement ``alpha_max`` which returns the smallest :math:`\lambda` for which the optimal solution is a null vector.
69 | Note that since ``lambda`` is a reserved keyword in Python, ``alpha`` in skglm codebase corresponds to :math:`\lambda`.
70 |
71 | When putting all together, this gives the implementation of the ``L1`` penalty:
72 |
73 |
74 | .. literalinclude:: ../../skglm/penalties/separable.py
75 | :pyobject: L1
76 |
77 |
--------------------------------------------------------------------------------
/doc/tutorials/alpha_max.rst:
--------------------------------------------------------------------------------
1 | .. _alpha_max:
2 |
3 | .. meta::
4 | :description: Tutorial explaining the critical regularization strength (alpha_max) in skglm. Learn conditions for zero solutions in L1-regularized optimization problems.
5 |
6 | ==========================================================
7 | Critical Regularization Strength above which Solution is 0
8 | ==========================================================
9 |
10 | This tutorial shows that for :math:`\lambda \geq \lambda_{\text{max}} = || \nabla f(0) ||_{\infty}`, the solution to
11 | :math:`\min f(x) + \lambda || x ||_1` is 0.
12 |
13 | In skglm, we thus frequently use
14 |
15 | .. code-block::
16 |
17 | alpha_max = np.max(np.abs(gradient0))
18 |
19 | and choose for the regularization strength :\math:`\alpha` a fraction of this critical value, e.g. ``alpha = 0.01 * alpha_max``.
20 |
21 | Problem setup
22 | =============
23 |
24 | Consider the optimization problem:
25 |
26 | .. math::
27 | \min_x f(x) + \lambda || x||_1
28 |
29 | where:
30 |
31 | - :math:`f: \mathbb{R}^d \to \mathbb{R}` is a convex differentiable function,
32 | - :math:`|| x ||_1` is the L1 norm of :math:`x`,
33 | - :math:`\lambda > 0` is the regularization parameter.
34 |
35 | We aim to determine the conditions under which the solution to this problem is :math:`x = 0`.
36 |
37 | Theoretical background
38 | ======================
39 |
40 |
41 | Let
42 |
43 | .. math::
44 |
45 | g(x) = f(x) + \lambda || x||_1
46 |
47 | According to Fermat's rule, 0 is the minimizer of :math:`g` if and only if 0 is in the subdifferential of :math:`g` at 0.
48 | The subdifferential of :math:`|| x ||_1` at 0 is the L-infinity unit ball:
49 |
50 | .. math::
51 | \partial || \cdot ||_1 (0) = \{ u \in \mathbb{R}^d : ||u||_{\infty} \leq 1 \}
52 |
53 | Thus,
54 |
55 | .. math::
56 | :nowrap:
57 |
58 | \begin{equation}
59 | \begin{aligned}
60 | 0 \in \text{argmin} ~ g(x)
61 | &\Leftrightarrow 0 \in \partial g(0) \\
62 | &\Leftrightarrow
63 | 0 \in \nabla f(0) + \lambda \partial || \cdot ||_1 (0) \\
64 | &\Leftrightarrow - \nabla f(0) \in \lambda \{ u \in \mathbb{R}^d : ||u||_{\infty} \leq 1 \} \\
65 | &\Leftrightarrow || \nabla f(0) ||_\infty \leq \lambda
66 | \end{aligned}
67 | \end{equation}
68 |
69 |
70 | We have just shown that the minimizer of :math:`g = f + \lambda || \cdot ||_1` is 0 if and only if :math:`\lambda \geq ||\nabla f(0)||_{\infty}`.
71 |
72 | Example
73 | =======
74 |
75 | Consider the loss function for Ordinary Least Squares :math:`f(x) = \frac{1}{2n} ||Ax - b||_2^2`, where :math:`n` is the number of samples. We have:
76 |
77 | .. math::
78 | \nabla f(x) = \frac{1}{n}A^T (Ax - b)
79 |
80 | At :math:`x=0`:
81 |
82 | .. math::
83 | \nabla f(0) = -\frac{1}{n}A^T b
84 |
85 | The infinity norm of the gradient at 0 is:
86 |
87 | .. math::
88 | ||\nabla f(0)||_{\infty} = \frac{1}{n}||A^T b||_{\infty}
89 |
90 | For :math:`\lambda \geq \frac{1}{n}||A^T b||_{\infty}`, the solution to :math:`\min_x \frac{1}{2n} ||Ax - b||_2^2 + \lambda || x||_1` is :math:`x=0`.
91 |
92 |
93 |
94 | References
95 | ==========
96 |
97 | Refer to Section 3.1 and Proposition 4 in particular of [1] for more details.
98 |
99 | .. _1:
100 |
101 | [1] Eugene Ndiaye, Olivier Fercoq, Alexandre Gramfort, and Joseph Salmon. 2017. Gap safe screening rules for sparsity enforcing penalties. J. Mach. Learn. Res. 18, 1 (January 2017), 4671–4703.
102 |
--------------------------------------------------------------------------------
/doc/tutorials/intercept.rst:
--------------------------------------------------------------------------------
1 | .. _maths_unpenalized_intercept:
2 |
3 | .. meta::
4 | :description: In-depth guide on intercept handling in skglm solvers. Covers mathematical derivations, gradient updates, Lipschitz constants, and examples for quadratic, logistic, and Huber datafits.
5 |
6 | Computation of the Intercept
7 | ============================
8 |
9 | .. currentmodule:: skglm
10 |
11 | .. include:: intercept2.md
12 | :parser: myst_parser.sphinx_
13 |
--------------------------------------------------------------------------------
/doc/tutorials/intercept2.md:
--------------------------------------------------------------------------------
1 | This note gives insights and guidance for the handling of an intercept coefficient within the `skglm` solvers.
2 |
3 | Let the design matrix be $X in RR^{n times p}$ where $n$ is the number of samples and $p$ the number of features.
4 | We denote $beta in RR^p$ the coefficients of the Generalized Linear Model and $beta_0$ its intercept.
5 | In many packages such as `liblinear`, the intercept is handled by adding an extra column of ones in the design matrix. This is costly in memory, and may lead to different solutions if all coefficients are penalized, as the intercept $beta_0$ is usually not.
6 | `skglm` follows a different route and solves directly:
7 |
8 | ```{math}
9 | beta^star, beta_0^star
10 | in
11 | underset(beta in RR^p, beta_0 in RR)("argmin")
12 | Phi(beta)
13 | triangleq
14 | underbrace(F(X beta + beta_0 bb"1"_n))_(triangleq f(beta, beta_0))
15 | + sum_(j=1)^p g_j(beta_j)
16 | \ ,
17 | ```
18 |
19 |
20 | where $bb"1"_{n}$ is the vector of size $n$ composed only of ones.
21 |
22 |
23 | The solvers of `skglm` update the intercept after each update of $beta$ by doing a (1 dimensional) gradient descent update:
24 |
25 | ```{math}
26 | beta_0^((k+1)) = beta_0^((k)) - 1/(L_0) nabla_(beta_0)F(X beta^((k)) + beta_0^((k)) bb"1"_{n})
27 | \ ,
28 | ```
29 |
30 | where $L_0$ is the Lipschitz constant associated to the intercept.
31 | The local Lipschitz constant $L_0$ statisfies the following inequality
32 |
33 | $$
34 | \forall x, x_0 in RR^p times RR, \forall h in RR, |nabla_(x_0) f(x, x_0 + h) - nabla_(x_0) f(x, x_0)| <= L_0 |h| \ .
35 | $$
36 |
37 | This update rule should be implemented in the `intercept_update_step` method of the datafit class.
38 |
39 | The convergence criterion computed for the gradient is then only the absolute value of the gradient with respect to $beta_0$ since the intercept optimality condition, for a solution $beta^star$, $beta_0^star$ is:
40 |
41 | ```{math}
42 | nabla_(beta_0)F(X beta^star + beta_0^star bb"1"_n) = 0
43 | \ ,
44 | ```
45 |
46 | Moreover, we have that
47 |
48 | ```{math}
49 | nabla_(beta_0) F(X beta + beta_0 bb"1"_n) = bb"1"_n^\top nabla_beta F(X beta + beta_0 bb"1"_n)
50 | \ .
51 | ```
52 |
53 |
54 | We will now derive the update used in Equation 2 for three different datafitting functions.
55 |
56 | ---
57 |
58 | ## The Quadratic datafit
59 |
60 | We define
61 |
62 | ```{math}
63 | F(X beta + beta_0 bb"1"_n) = 1/(2n) norm(y - X beta - beta_0 bb"1"_{n})_2^2
64 | \ .
65 | ```
66 |
67 | In this case $nabla f(z) = 1/n (z - y)$ hence Eq. 4 is equal to:
68 |
69 | ```{math}
70 | nabla_(beta_0) F(X beta + beta_0 bb"1"_n) = 1/n sum_(i=1)^n (X_( i: ) beta + beta_0 - y_i)
71 | \ .
72 | ```
73 |
74 | Finally, the Lipschitz constant is $L_0 = 1/n sum_(i=1)^n 1^2 = 1$.
75 |
76 |
77 |
78 | ---
79 |
80 | ## The Logistic datafit
81 |
82 | In this case,
83 |
84 | ```{math}
85 | F(X beta + beta_0 bb"1"_{n}) = 1/n sum_(i=1)^n log(1 + exp(-y_i(X_( i: ) beta + beta_0 bb"1"_n))
86 | ```
87 |
88 |
89 | We can then write
90 |
91 | ```{math}
92 | nabla_(beta_0) F(X beta + beta_0 bb"1"_n) = 1/n sum_(i=1)^n (-y_i)/(1 + exp(-y_i(X_( i: ) beta + beta_0 bb"1"_n))) \ .
93 | ```
94 |
95 |
96 | Finally, the Lipschitz constant is $L_0 = 1/(4n) sum_(i=1)^n 1^2 = 1/4$.
97 |
98 | ---
99 |
100 | ## The Huber datafit
101 |
102 | In this case,
103 |
104 | ```{math}
105 | F(X beta + beta_0 bb"1"_{n}) = 1/n sum_(i=1)^n f_(delta) (y_i - X_( i: ) beta - beta_0 bb"1"_n) \ ,
106 | ```
107 |
108 | where
109 |
110 | ```{math}
111 | f_delta(x) = {
112 | (1/2 x^2, if x <= delta),
113 | (delta |x| - 1/2 delta^2, if x > delta)
114 | :} \ .
115 | ```
116 |
117 |
118 | Let $r_i = y_i - X_( i: ) beta - beta_0 bb"1"_n$. We can then write
119 |
120 | ```{math}
121 | nabla_(beta_0) F(X beta + beta_0 bb"1"_{n}) = 1/n sum_(i=1)^n r_i bbb"1"_({|r_i| <= delta}) + "sign"(r_i) delta bbb"1"_({|r_i| > delta}) \ ,
122 | ```
123 |
124 | where $bbb"1"_({x > delta})$ is the classical indicator function.
125 |
126 | Finally, the Lipschitz constant is $L_0 = 1/n sum_(i=1)^n 1^2 = 1$.
127 |
--------------------------------------------------------------------------------
/doc/tutorials/prox_nn_group_lasso.rst:
--------------------------------------------------------------------------------
1 | .. _prox_nn_group_lasso:
2 | .. meta::
3 | :description: Detailed tutorial on deriving the proximity operator and subdifferential for the positive group Lasso penalty in skglm. Includes mathematical proofs and examples.
4 |
5 | ===================================
6 | Details on the Positive Group Lasso
7 | ===================================
8 |
9 | This tutorial presents how to derive the proximity operator and subdifferential of the :math:`l_2`-penalty, and the :math:`l_2`-penalty with nonnegative constraints.
10 |
11 |
12 | Proximity operator of the group Lasso
13 | =====================================
14 |
15 | Let
16 |
17 | .. math::
18 | g:x \mapsto \norm{x}_2
19 | ,
20 |
21 | then its Fenchel-Legendre conjugate is
22 |
23 | .. math::
24 | :label: fenchel
25 |
26 | g^{\star}:x \mapsto i_{\norm{x}_2 \leq 1}
27 | ,
28 |
29 | and for all :math:`x \in \mathbb{R}^p`
30 |
31 | .. math::
32 | :label: prox_projection
33 |
34 | \text{prox}_{g^{\star}}(x)
35 | =
36 | \text{proj}_{\mathcal{B}_2}(x) = \frac{x}{\max(\norm{x}_2, 1)}
37 | .
38 |
39 | Using the Moreau decomposition, Equations :eq:`fenchel` and :eq:`prox_projection`, one has
40 |
41 |
42 | .. math::
43 |
44 | \text{prox}_{\lambda g}(x)
45 | =
46 | x
47 | - \lambda \text{prox}_{g^\star/\lambda }(x/\lambda)
48 |
49 | .. math::
50 |
51 | = x
52 | - \lambda \text{prox}_{g^\star}(x/\lambda)
53 |
54 | .. math::
55 |
56 | = x
57 | - \lambda \frac{x/\lambda}{\max(\norm{x/\lambda}_2, 1)}
58 |
59 | .. math::
60 |
61 | = x
62 | - \frac{\lambda x}{\max(\norm{x}_2, \lambda)}
63 |
64 | .. math::
65 |
66 | = (1 - \frac{\lambda}{\norm{x}})_{+} x
67 | .
68 |
69 | A similar formula can be derived for the group Lasso with nonnegative constraints.
70 |
71 |
72 | Proximity operator of the group Lasso with positivity constraints
73 | =================================================================
74 |
75 | Let
76 |
77 | .. math::
78 | h:x \mapsto \norm{x}_2
79 | + i_{x \geq 0}
80 | .
81 |
82 | Let :math:`x \in \mathbb{R}^p` and :math:`S = \{ j \in 1, ..., p | x_j > 0 \} \in \mathbb{R}^p`, then
83 |
84 |
85 | .. math::
86 | :label: fenchel_nn
87 |
88 | h^{\star} :x \mapsto i_{\norm{x_S}_2 \leq 1}
89 | ,
90 |
91 | and
92 |
93 | .. math::
94 | :label: prox_projection_nn_Sc
95 |
96 | \text{prox}_{h^{\star}}(x)_{S^c}
97 | =
98 | x_{S^c}
99 |
100 |
101 | .. math::
102 | :label: prox_projection_nn_S
103 |
104 | \text{prox}_{h^{\star}}(x)_S
105 | =
106 | \text{proj}_{\mathcal{B}_2}(x_S) = \frac{x_S}{\max(\norm{x_S}_2, 1)}
107 | .
108 |
109 | As before, using the Moreau decomposition and Equation :eq:`fenchel_nn` yields
110 |
111 |
112 | .. math::
113 |
114 | \text{prox}_{\lambda h}(x)
115 | =
116 | x
117 | - \lambda \text{prox}_{h^\star / \lambda }(x/\lambda)
118 |
119 | .. math::
120 |
121 | = x
122 | - \lambda \text{prox}_{h^\star}(x/\lambda)
123 | ,
124 |
125 | and thus, combined with Equations :eq:`prox_projection_nn_Sc` and :eq:`prox_projection_nn_S` it leads to
126 |
127 | .. math::
128 |
129 | \text{prox}_{\lambda h}(x)_{S^c} = 0
130 |
131 | .. math::
132 |
133 | \text{prox}_{\lambda h}(x)_{S}
134 | =
135 | (1 - \frac{\lambda}{\norm{x_S}})_{+} x_S
136 | .
137 |
138 |
139 |
140 | .. _subdiff_positive_group_lasso:
141 |
142 | Subdifferential of the positive Group Lasso penalty
143 | ===================================================
144 |
145 | For the ``subdiff_diff`` working set strategy, we compute the distance :math:`D(v)` for some :math:`v` to the subdifferential of the :math:`h` penalty at a point :math:`w`.
146 | Since the penalty is group-separable, we reduce the case where :math:`w` is a block of variables in :math:`\mathbb{R}^g`.
147 |
148 | Case :math:`w \notin \mathbb{R}_+^g`
149 | ------------------------------------
150 |
151 | If any component of :math:`w` is strictly negative, the subdifferential is empty, and the distance is :math:`+ \infty`.
152 |
153 | .. math::
154 |
155 | D(v) = + \infty, \quad \forall v \in \mathbb{R}^g
156 | .
157 |
158 | Case :math:`w = 0`
159 | ------------------
160 |
161 | At :math:`w = 0`, the subdifferential is:
162 |
163 | .. math::
164 |
165 | \lambda \partial || \cdot ||_2 + \partial \iota_{x \geq 0} = \lambda \mathcal{B}_2 + \mathbb{R}_-^g
166 | ,
167 |
168 | where :math:`\mathcal{B}_2` is the unit ball.
169 |
170 | Therefore, the distance to the subdifferential writes
171 |
172 | .. math::
173 |
174 | D(v) = \min_{u \in \lambda \mathcal{B}_2, n \in \mathbb{R}_{-}^g} \ || u + n - v ||
175 | .
176 |
177 | Minimizing over :math:`n` then over :math:`u`, thanks to [`1 `_], yields
178 |
179 | .. math::
180 |
181 | D(v) = \max(0, ||v^+|| - \lambda)
182 | ,
183 |
184 | where :math:`v^+` is :math:`v` restricted to its positive coordinates.
185 | Intuitively, it is clear that if :math:`v_i < 0`, we can cancel it exactly in the objective function by taking :math:`n_i = - v_i` and :math:`u_i = 0`; on the other hand, if :math:`v_i>0`, taking a non zero :math:`n_i` will only increase the quantity that :math:`u_i` needs to bring closer to 0.
186 |
187 | For a rigorous derivation of this, introduce the Lagrangian on a squared objective
188 |
189 | .. math::
190 |
191 | \mathcal{L}(u, n, \nu, \mu) =
192 | \frac{1}{2}\norm{u + n - v}^2 + \nu(\frac{1}{2} \norm{u}^2 - \lambda^2 / 2) + \langle \mu, n \rangle
193 | ,
194 |
195 | and write down the optimality condition with respect to :math:`u` and :math:`n`.
196 | Treat the case :math:`nu = 0` separately; in the other case show that :\math:`u` must be positive, and that :math:`v = (1 + \nu) u + n`, together with :math:`u = \mu / \nu` and complementary slackness, to reach the conclusion.
197 |
198 | Case :math:`|| w || \ne 0`
199 | ---------------------------
200 | The subdifferential in that case is :math:`\lambda w / {|| w ||} + C_1 \times \ldots \times C_g` where :math:`C_j = {0}` if :math:`w_j > 0` and :math:`C_j = mathbb{R}_-` otherwise (:math:`w_j =0`).
201 |
202 | By letting :math:`p` denotes the projection of :math:`v` onto this set,
203 | one has
204 |
205 | .. math::
206 |
207 | p_j = \lambda \frac{w_j}{||w||} \text{ if } w_j > 0
208 |
209 | and
210 |
211 | .. math::
212 |
213 | p_j = \min(v_j, 0) \text{ otherwise}.
214 |
215 | The distance to the subdifferential is then:
216 |
217 | .. math::
218 |
219 | D(v) = || v - p || = \sqrt{\sum_{j, w_j > 0} (v_j - \lambda \frac{w_j}{||w||})^2 + \sum_{j, w_j=0} \max(0, v_j)^2
220 |
221 | since :math:`v_j - \min(v_j, 0) = v_j + \max(-v_j, 0) = \max(0, v_j)`.
222 |
223 |
224 |
225 | References
226 | ==========
227 |
228 | [1] ``_
229 |
--------------------------------------------------------------------------------
/doc/tutorials/tutorials.rst:
--------------------------------------------------------------------------------
1 | .. _tutorials:
2 |
3 | .. meta::
4 | :description: Step-by-step skglm tutorials covering custom datafits, penalties, intercept computations, Cox datafit mathematics, group Lasso details, and regularization strategies.
5 |
6 | =========
7 | Tutorials
8 | =========
9 |
10 | .. grid:: 1 1 2 2
11 | :gutter: 2
12 |
13 | .. grid-item-card:: How to Add a Custom Datafit
14 | :link: add_datafit.html
15 | :text-align: left
16 |
17 | Learn to add a custom datafit through a hands-on examples: Implementing a Poisson datafit.
18 |
19 | .. grid-item-card:: How to Add a Custom Penalty
20 | :link: add_penalty.html
21 | :text-align: left
22 |
23 | Learn to add a custom penalty by implementing the :math:`\ell_1` penalty.
24 |
25 | .. grid-item-card:: Computation of the Intercept
26 | :link: intercept.html
27 | :text-align: left
28 |
29 | Explore how ``skglm`` fits an unpenalized intercept.
30 |
31 | .. grid-item-card:: Mathematics behind Cox Datafit
32 | :link: cox_datafit.html
33 | :text-align: left
34 |
35 | Understand the mathematical foundation of Cox datafit and its applications in survival analysis.
36 |
37 | .. grid-item-card:: Details on the Group Lasso
38 | :link: prox_nn_group_lasso.html
39 | :text-align: left
40 |
41 | Mathematical details about the group Lasso, in particular with nonnegativity constraints.
42 |
43 | .. grid-item-card:: Understanding `alpha_max`
44 | :link: alpha_max.html
45 | :text-align: left
46 |
47 | Learn how to choose the regularization strength in :math:`\ell_1`-regularization?
48 |
49 | .. toctree::
50 | :hidden:
51 |
52 | add_datafit
53 | add_penalty
54 | intercept
55 | cox_datafit
56 | prox_nn_group_lasso
57 | alpha_max
58 |
--------------------------------------------------------------------------------
/examples/README.txt:
--------------------------------------------------------------------------------
1 | .. _general_examples:
2 |
3 | .. title:: Examples
4 |
5 | Examples
6 | ========
7 |
8 | .. toctree::
9 | :maxdepth: 1
10 |
11 | auto_examples/index
12 |
--------------------------------------------------------------------------------
/examples/plot_compare_time.py:
--------------------------------------------------------------------------------
1 | # Authors: Quentin Klopfenstein
2 | # Mathurin Massias
3 | """
4 | =============================================
5 | Timing comparison with scikit-learn for Lasso
6 | =============================================
7 | Compare time to solve large scale Lasso problems with scikit-learn.
8 | """
9 |
10 |
11 | import time
12 | import warnings
13 | import numpy as np
14 | from numpy.linalg import norm
15 | import matplotlib.pyplot as plt
16 | from libsvmdata import fetch_libsvm
17 |
18 | from sklearn.exceptions import ConvergenceWarning
19 | from sklearn.linear_model import Lasso as Lasso_sklearn
20 | from sklearn.linear_model import ElasticNet as Enet_sklearn
21 |
22 | from skglm import Lasso, ElasticNet
23 |
24 | warnings.filterwarnings('ignore', category=ConvergenceWarning)
25 |
26 |
27 | def compute_obj(X, y, w, alpha, l1_ratio=1):
28 | loss = norm(y - X @ w) ** 2 / (2 * len(y))
29 | penalty = (alpha * l1_ratio * np.sum(np.abs(w))
30 | + 0.5 * alpha * (1 - l1_ratio) * norm(w) ** 2)
31 | return loss + penalty
32 |
33 |
34 | X, y = fetch_libsvm("news20.binary"
35 | )
36 | alpha = np.max(np.abs(X.T @ y)) / len(y) / 10
37 |
38 | dict_sklearn = {}
39 | dict_sklearn["lasso"] = Lasso_sklearn(
40 | alpha=alpha, fit_intercept=False, tol=1e-12)
41 |
42 | dict_sklearn["enet"] = Enet_sklearn(
43 | alpha=alpha, fit_intercept=False, tol=1e-12, l1_ratio=0.5)
44 |
45 | dict_ours = {}
46 | dict_ours["lasso"] = Lasso(
47 | alpha=alpha, fit_intercept=False, tol=1e-12)
48 | dict_ours["enet"] = ElasticNet(
49 | alpha=alpha, fit_intercept=False, tol=1e-12, l1_ratio=0.5)
50 |
51 | models = ["lasso", "enet"]
52 |
53 | fig, axarr = plt.subplots(2, 1, constrained_layout=True)
54 |
55 | for ax, model, l1_ratio in zip(axarr, models, [1, 0.5]):
56 | pobj_dict = {}
57 | pobj_dict["sklearn"] = list()
58 | pobj_dict["us"] = list()
59 |
60 | time_dict = {}
61 | time_dict["sklearn"] = list()
62 | time_dict["us"] = list()
63 |
64 | # Remove compilation time
65 | dict_ours[model].max_iter = 10_000
66 | w_star = dict_ours[model].fit(X, y).coef_
67 | pobj_star = compute_obj(X, y, w_star, alpha, l1_ratio)
68 | for n_iter_sklearn in np.unique(np.geomspace(1, 50, num=15).astype(int)):
69 | dict_sklearn[model].max_iter = n_iter_sklearn
70 |
71 | t_start = time.time()
72 | w_sklearn = dict_sklearn[model].fit(X, y).coef_
73 | time_dict["sklearn"].append(time.time() - t_start)
74 | pobj_dict["sklearn"].append(compute_obj(X, y, w_sklearn, alpha, l1_ratio))
75 |
76 | for n_iter_us in range(1, 10):
77 | dict_ours[model].max_iter = n_iter_us
78 | t_start = time.time()
79 | w = dict_ours[model].fit(X, y).coef_
80 | time_dict["us"].append(time.time() - t_start)
81 | pobj_dict["us"].append(compute_obj(X, y, w, alpha, l1_ratio))
82 |
83 | ax.semilogy(
84 | time_dict["sklearn"], pobj_dict["sklearn"] - pobj_star, label='sklearn')
85 | ax.semilogy(
86 | time_dict["us"], pobj_dict["us"] - pobj_star, label='skglm')
87 |
88 | ax.set_ylim((1e-10, 1))
89 | ax.set_title(model)
90 | ax.legend()
91 | ax.set_ylabel("Objective suboptimality")
92 |
93 | axarr[1].set_xlabel("Time (s)")
94 | plt.show(block=False)
95 |
--------------------------------------------------------------------------------
/examples/plot_group_logistic_regression.py:
--------------------------------------------------------------------------------
1 | """
2 | ===================================
3 | Group Logistic regression in python
4 | ===================================
5 | Scikit-learn is missing a Group Logistic regression estimator. We show how to implement
6 | one with ``skglm``.
7 | """
8 |
9 | # Author: Mathurin Massias
10 |
11 | import numpy as np
12 |
13 | from skglm import GeneralizedLinearEstimator
14 | from skglm.datafits import LogisticGroup
15 | from skglm.penalties import WeightedGroupL2
16 | from skglm.solvers import GroupProxNewton
17 | from skglm.utils.data import make_correlated_data, grp_converter
18 |
19 | import matplotlib.pyplot as plt
20 |
21 | n_features = 30
22 | X, y, _ = make_correlated_data(
23 | n_samples=10, n_features=30, random_state=0)
24 | y = np.sign(y)
25 |
26 |
27 | # %%
28 | # Classifier creation: combination of penalty, datafit and solver.
29 | #
30 | grp_size = 3 # groups are made of groups of 3 consecutive features
31 | n_groups = n_features // grp_size
32 | grp_indices, grp_ptr = grp_converter(grp_size, n_features=n_features)
33 | alpha = 0.01
34 | weights = np.ones(n_groups)
35 | penalty = WeightedGroupL2(alpha, weights, grp_ptr, grp_indices)
36 | datafit = LogisticGroup(grp_ptr, grp_indices)
37 | solver = GroupProxNewton(verbose=2)
38 |
39 | # %%
40 | # Train the model
41 | clf = GeneralizedLinearEstimator(datafit, penalty, solver)
42 | clf.fit(X, y)
43 |
44 | # %%
45 | # Fit check that groups are either all 0 or all non zero
46 | print(clf.coef_.reshape(-1, grp_size))
47 |
48 | # %%
49 | # Visualise group-level sparsity
50 |
51 | coef_by_group = clf.coef_.reshape(-1, grp_size)
52 | group_norms = np.linalg.norm(coef_by_group, axis=1)
53 |
54 | plt.figure(figsize=(8, 4))
55 | plt.bar(np.arange(n_groups), group_norms)
56 | plt.xlabel("Group index")
57 | plt.ylabel("L2 norm of coefficients")
58 | plt.title("Group Sparsity Pattern")
59 | plt.tight_layout()
60 | plt.show()
61 |
62 | # %%
63 | # This plot shows the L2 norm of the coefficients for each group.
64 | # Groups with a zero norm have been set inactive by the model,
65 | # illustrating how Group Logistic Regression enforces sparsity at the group level.
66 | # (Note: This example uses a tiny synthetic dataset, so the pattern has limited interpretability.)
67 |
--------------------------------------------------------------------------------
/examples/plot_lasso_vs_weighted.py:
--------------------------------------------------------------------------------
1 | """
2 | ======================================
3 | Comparison of Lasso and Weighted Lasso
4 | ======================================
5 | Illustrate the importance of feature normalization when penalizing.
6 | """
7 |
8 | # Author: Mathurin Massias
9 | # Quentin Bertrand
10 |
11 | import numpy as np
12 | from numpy.linalg import norm
13 | import matplotlib.pyplot as plt
14 |
15 | from skglm import Lasso, WeightedLasso
16 | from skglm.utils.data import make_correlated_data
17 |
18 | n_features = 30
19 | X, _, _ = make_correlated_data(
20 | n_samples=50, n_features=n_features, random_state=0)
21 | w_true = np.zeros(n_features)
22 |
23 | nnz = 5
24 | w_true[:nnz] = 1
25 |
26 | # assume for some reason important features have a smaller norm than the other
27 | X[:, :nnz] *= 0.1
28 | noise = np.random.randn(X.shape[0])
29 | # use a signal-to-noise ratio of 2
30 | y = X @ w_true + 0.5 * norm(X @ w_true) / norm(noise) * noise
31 |
32 |
33 | # the Lasso does not select small norm features, while the weighted Lasso does:
34 | alpha_max = np.max(np.abs(X.T @ y)) / len(y)
35 | alpha = alpha_max / 10
36 | las = Lasso(alpha=alpha, fit_intercept=False).fit(X, y)
37 | wei = WeightedLasso(
38 | alpha=alpha, weights=norm(X, axis=0), fit_intercept=False).fit(X, y)
39 |
40 |
41 | fig, axarr = plt.subplots(1, 3, sharey=True, figsize=(10, 2.4))
42 | axarr[0].stem(w_true)
43 | axarr[0].set_title("True coeffs")
44 | axarr[1].stem(las.coef_)
45 | axarr[1].set_title("Lasso")
46 | axarr[2].stem(wei.coef_)
47 | axarr[2].set_title("Weighted Lasso")
48 | plt.show(block=False)
49 |
--------------------------------------------------------------------------------
/examples/plot_logreg_various_penalties.py:
--------------------------------------------------------------------------------
1 | """
2 | ==================================================================
3 | Logistic regression with Elastic net and minimax concave penalties
4 | ==================================================================
5 | Illustrate the modularity of ``skglm`` by using ``GeneralizedLinearEstimator`` with one datafit and one penalty.
6 | """
7 |
8 | # Author: Pierre-Antoine Bannier
9 |
10 | import numpy as np
11 | from numpy.linalg import norm
12 | import matplotlib.pyplot as plt
13 |
14 | from sklearn.metrics import f1_score
15 |
16 | from skglm import GeneralizedLinearEstimator
17 | from skglm.datafits import Logistic
18 | from skglm.penalties import L1_plus_L2, MCPenalty
19 | from skglm.utils.data import make_correlated_data
20 |
21 |
22 | n_samples, n_features = 50, 100
23 | X, y, w_star = make_correlated_data(
24 | n_samples=n_samples, n_features=n_features, random_state=0)
25 | y_ind = np.sign(y)
26 |
27 | # standardize for MCP
28 | X /= norm(X, axis=0) / np.sqrt(len(X))
29 |
30 | # Split data in train set and test set
31 | X_train, y_train = X[: n_samples // 2], y_ind[: n_samples // 2]
32 | X_test, y_test = X[n_samples // 2:], y_ind[n_samples // 2:]
33 |
34 |
35 | alpha = 0.005
36 | gamma = 3.0
37 | l1_ratio = 0.3
38 | clf_enet = GeneralizedLinearEstimator(
39 | Logistic(),
40 | L1_plus_L2(alpha, l1_ratio),
41 | )
42 | y_pred_enet = clf_enet.fit(X_train, y_train).predict(X_test)
43 | f1_score_enet = f1_score(y_test, y_pred_enet)
44 |
45 | clf_mcp = GeneralizedLinearEstimator(
46 | Logistic(),
47 | MCPenalty(alpha, gamma),
48 | )
49 | y_pred_mcp = clf_mcp.fit(X_train, y_train).predict(X_test)
50 | f1_score_mcp = f1_score(y_test, y_pred_mcp)
51 |
52 |
53 | m, s, _ = plt.stem(
54 | np.where(clf_enet.coef_.ravel())[0],
55 | clf_enet.coef_[clf_enet.coef_ != 0],
56 | markerfmt="x",
57 | label="Elastic net coefficients",
58 | )
59 | plt.setp([m, s], color="#2ca02c")
60 | m, s, _ = plt.stem(
61 | np.where(clf_mcp.coef_.ravel())[0],
62 | clf_mcp.coef_[clf_mcp.coef_ != 0],
63 | markerfmt="x",
64 | label="MCP coefficients",
65 | )
66 | plt.setp([m, s], color="#ff7f0e")
67 | plt.stem(
68 | np.where(w_star)[0],
69 | w_star[w_star != 0],
70 | label="true coefficients",
71 | markerfmt="bx",
72 | )
73 |
74 | plt.legend(loc="best")
75 | plt.title("MCP F1: %.3f, Elastic Net F1: %.3f" % (f1_score_mcp, f1_score_enet))
76 | plt.show()
77 |
--------------------------------------------------------------------------------
/examples/plot_pen_prox.py:
--------------------------------------------------------------------------------
1 | """
2 | =========================================
3 | Value and proximal operators of penalties
4 | =========================================
5 | Illustrate the value and proximal operators of some sparse penalties.
6 | """
7 | # Author: Mathurin Massias
8 |
9 | import numpy as np
10 | import matplotlib.pyplot as plt
11 |
12 | from skglm.penalties import WeightedL1, L1, L1_plus_L2, MCPenalty, SCAD, L0_5, L2_3
13 |
14 |
15 | penalties = [
16 | WeightedL1(alpha=1, weights=np.array([2.])),
17 | L1(alpha=1),
18 | L1_plus_L2(alpha=1, l1_ratio=0.7),
19 | MCPenalty(alpha=1, gamma=3.),
20 | SCAD(alpha=1, gamma=3.),
21 | L0_5(alpha=1),
22 | L2_3(alpha=1),
23 | ]
24 |
25 |
26 | x_range = np.linspace(-4, 4, num=300)
27 |
28 | fig, axarr = plt.subplots(1, 2, figsize=(8, 3), constrained_layout=True)
29 |
30 | for pen in penalties:
31 | axarr[0].plot(x_range,
32 | [pen.value(np.array([x])) for x in x_range],
33 | label=pen.__class__.__name__)
34 | axarr[1].plot(x_range,
35 | [pen.prox_1d(x, 1, 0) for x in x_range],
36 | label=pen.__class__.__name__)
37 |
38 | axarr[0].legend()
39 | axarr[0].set_title("Penalty value")
40 | axarr[1].set_title("Proximal operator of penalty")
41 | plt.show(block=False)
42 |
--------------------------------------------------------------------------------
/examples/plot_reweighted_l1.py:
--------------------------------------------------------------------------------
1 | """
2 | =================================================================
3 | Timing comparison between direct prox computation and reweighting
4 | =================================================================
5 | Compare time and objective value of L0_5-regularized problem with
6 | direct proximal computation and iterative reweighting.
7 | """
8 | # Author: Pierre-Antoine Bannier
9 |
10 | import time
11 | import numpy as np
12 | import pandas as pd
13 | from numpy.linalg import norm
14 | import matplotlib.pyplot as plt
15 |
16 | from skglm.penalties.separable import L0_5
17 | from skglm.utils.data import make_correlated_data
18 | from skglm.estimators import GeneralizedLinearEstimator
19 | from skglm.experimental import IterativeReweightedL1
20 | from skglm.solvers import AndersonCD
21 |
22 |
23 | n_samples, n_features = 200, 500
24 | X, y, w_true = make_correlated_data(
25 | n_samples=n_samples, n_features=n_features, random_state=24)
26 |
27 | alpha_max = norm(X.T @ y, ord=np.inf) / n_samples
28 | alphas = [alpha_max / 10, alpha_max / 100, alpha_max / 1000]
29 | tol = 1e-10
30 |
31 |
32 | def _obj(w):
33 | return (np.sum((y - X @ w) ** 2) / (2 * n_samples)
34 | + alpha * np.sum(np.sqrt(np.abs(w))))
35 |
36 |
37 | def fit_l05(alpha):
38 | start = time.time()
39 | iterative_l05 = IterativeReweightedL1(
40 | penalty=L0_5(alpha),
41 | solver=AndersonCD(tol=tol, fit_intercept=False)).fit(X, y)
42 | iterative_time = time.time() - start
43 |
44 | # `subdiff` strategy for WS is uninformative for L0_5
45 | start = time.time()
46 | direct_l05 = GeneralizedLinearEstimator(
47 | penalty=L0_5(alpha),
48 | solver=AndersonCD(tol=tol, fit_intercept=False,
49 | ws_strategy="fixpoint")).fit(X, y)
50 | direct_time = time.time() - start
51 |
52 | results = {
53 | "iterative": (iterative_l05, iterative_time),
54 | "direct": (direct_l05, direct_time),
55 | }
56 | return results
57 |
58 |
59 | # caching Numba compilation
60 | fit_l05(alpha_max/10)
61 |
62 | time_results = np.zeros((2, len(alphas)))
63 | obj_results = np.zeros((2, len(alphas)))
64 |
65 | # actual run
66 | for i, alpha in enumerate(alphas):
67 | results = fit_l05(alpha=alpha)
68 | iterative_l05, iterative_time = results["iterative"]
69 | direct_l05, direct_time = results["direct"]
70 |
71 | iterative_obj = _obj(iterative_l05.coef_)
72 | direct_obj = _obj(direct_l05.coef_)
73 |
74 | obj_results[:, i] = np.array([iterative_obj, direct_obj])
75 | time_results[:, i] = np.array([iterative_time, direct_time])
76 |
77 | time_df = pd.DataFrame(time_results.T, columns=["Iterative", "Direct"])
78 | obj_df = pd.DataFrame(obj_results.T, columns=["Iterative", "Direct"])
79 |
80 | time_df.index = [1e-1, 1e-2, 1e-3]
81 | obj_df.index = [1e-1, 1e-2, 1e-3]
82 |
83 | fig, axarr = plt.subplots(1, 2, figsize=(8, 3.5), constrained_layout=True)
84 | ax = axarr[0]
85 | time_df.plot.bar(rot=0, ax=ax)
86 | ax.set_xlabel(r"$\lambda/\lambda_{max}$")
87 | ax.set_ylabel("time (in s)")
88 | ax.set_title("Time to fit")
89 |
90 | ax = axarr[1]
91 | obj_df.plot.bar(rot=0, ax=ax)
92 | ax.set_xlabel(r"$\lambda/\lambda_{max}$")
93 | ax.set_ylabel("obj. value")
94 | ax.set_title("Objective at solution")
95 | plt.show(block=False)
96 |
--------------------------------------------------------------------------------
/examples/plot_sparse_group_lasso.py:
--------------------------------------------------------------------------------
1 | """
2 | =================================
3 | Fast Sparse Group Lasso in python
4 | =================================
5 | Scikit-learn is missing a Sparse Group Lasso regression estimator. We show how to
6 | implement one with ``skglm``.
7 | """
8 |
9 | # Author: Mathurin Massias
10 |
11 | # %%
12 | import numpy as np
13 | import matplotlib.pyplot as plt
14 |
15 | from skglm.solvers import GroupBCD
16 | from skglm.datafits import QuadraticGroup
17 | from skglm import GeneralizedLinearEstimator
18 | from skglm.penalties import WeightedL1GroupL2
19 | from skglm.utils.data import make_correlated_data, grp_converter
20 |
21 | n_features = 30
22 | X, y, _ = make_correlated_data(
23 | n_samples=10, n_features=30, random_state=0)
24 |
25 |
26 | # %%
27 | # Model creation: combination of penalty, datafit and solver.
28 | #
29 | # penalty:
30 | grp_size = 10 # take groups of 10 consecutive features
31 | n_groups = n_features // grp_size
32 | grp_indices, grp_ptr = grp_converter(grp_size, n_features)
33 | n_groups = len(grp_ptr) - 1
34 | weights_g = np.ones(n_groups, dtype=np.float64)
35 | weights_f = 0.5 * np.ones(n_features)
36 | penalty = WeightedL1GroupL2(
37 | alpha=0.5, weights_groups=weights_g,
38 | weights_features=weights_f, grp_indices=grp_indices, grp_ptr=grp_ptr)
39 |
40 | # %% Datafit and solver
41 | datafit = QuadraticGroup(grp_ptr, grp_indices)
42 | solver = GroupBCD(ws_strategy="fixpoint", verbose=1, fit_intercept=False, tol=1e-10)
43 |
44 | model = GeneralizedLinearEstimator(datafit, penalty, solver=solver)
45 |
46 | # %%
47 | # Train the model
48 | clf = GeneralizedLinearEstimator(datafit, penalty, solver)
49 | clf.fit(X, y)
50 |
51 | # %%
52 | # Some groups are fully 0, and inside non zero groups,
53 | # some values are 0 too
54 | plt.imshow(clf.coef_.reshape(-1, grp_size) != 0, cmap='Greys')
55 | plt.title("Non zero values (in black) in model coefficients")
56 | plt.ylabel('Group index')
57 | plt.xlabel('Feature index inside group')
58 | plt.xticks(np.arange(grp_size))
59 | plt.yticks(np.arange(n_groups));
60 |
61 | # %%
62 |
--------------------------------------------------------------------------------
/examples/plot_sparse_recovery.py:
--------------------------------------------------------------------------------
1 | """
2 | =========================================
3 | Sparse recovery with non-convex penalties
4 | =========================================
5 | Illustrate the superior performance of penalties for sparse recovery.
6 | """
7 |
8 | # Author: Mathurin Massias
9 | # Quentin Bertrand
10 | # Quentin Klopfenstein
11 |
12 | import numpy as np
13 | from numpy.linalg import norm
14 | import matplotlib.pyplot as plt
15 | from sklearn.model_selection import train_test_split
16 | from sklearn.metrics import f1_score, mean_squared_error
17 |
18 | from skglm.utils.data import make_correlated_data
19 | from skglm.solvers import AndersonCD
20 | from skglm.datafits import Quadratic
21 | from skglm.penalties import L1, MCPenalty, L0_5, L2_3, SCAD
22 |
23 | cmap = plt.get_cmap('tab10')
24 |
25 | # Simulate sparse data
26 | n_features = 1000
27 | density = 0.1
28 | np.random.seed(0)
29 | supp = np.random.choice(n_features, size=int(density * n_features),
30 | replace=False)
31 | w_true = np.zeros(n_features)
32 | w_true[supp] = 1
33 | X_, y_, w_true = make_correlated_data(
34 | n_samples=1000, n_features=1000, snr=5, random_state=2,
35 | rho=0.5, w_true=w_true)
36 |
37 | # standardize for MCP
38 | X_ /= norm(X_, axis=0) / np.sqrt(len(X_))
39 | X, X_test, y, y_test = train_test_split(X_, y_, test_size=0.5)
40 |
41 |
42 | # Compute l1 penalty value which leads to 0 as solution
43 | alpha_max = norm(X.T @ y, ord=np.inf) / len(y)
44 |
45 | # Define a range of penalty values
46 | n_alphas = 30
47 | alphas = alpha_max * np.geomspace(1, 1e-2, num=n_alphas)
48 |
49 | datafit = Quadratic()
50 |
51 | penalties = {}
52 | penalties['lasso'] = L1(alpha=1)
53 | penalties['mcp'] = MCPenalty(alpha=1, gamma=3)
54 | penalties['scad'] = SCAD(alpha=1, gamma=3)
55 | penalties['l05'] = L0_5(alpha=1)
56 | penalties['l23'] = L2_3(alpha=1)
57 |
58 | colors = {}
59 | colors['lasso'] = cmap(0)
60 | colors['mcp'] = cmap(1)
61 | colors['scad'] = cmap(2)
62 | colors['l05'] = cmap(3)
63 | colors['l23'] = cmap(4)
64 |
65 | f1 = {}
66 | estimation_error = {}
67 | prediction_error = {}
68 | l0 = {}
69 | mse_ref = mean_squared_error(np.zeros_like(y_test), y_test)
70 |
71 | solver = AndersonCD(ws_strategy="fixpoint", fit_intercept=False)
72 |
73 | for idx, estimator in enumerate(penalties.keys()):
74 | print(f'Running {estimator}...')
75 | estimator_path = solver.path(
76 | X, y, datafit, penalties[estimator],
77 | alphas=alphas)
78 |
79 | f1_temp = np.zeros(n_alphas)
80 | prediction_error_temp = np.zeros(n_alphas)
81 |
82 | for j, w in enumerate(estimator_path[1].T):
83 | f1_temp[j] = f1_score(w != 0, w_true != 0)
84 | prediction_error_temp[j] = mean_squared_error(X_test @ w, y_test) / mse_ref
85 |
86 | f1[estimator] = f1_temp
87 | prediction_error[estimator] = prediction_error_temp
88 |
89 | name_estimators = {'lasso': "Lasso"}
90 | name_estimators['mcp'] = r"MCP, $\gamma=%s$" % 3
91 | name_estimators['scad'] = r"SCAD, $\gamma=%s$" % 3
92 | name_estimators['l05'] = r"$\ell_{1/2}$"
93 | name_estimators['l23'] = r"$\ell_{2/3}$"
94 |
95 |
96 | plt.close('all')
97 | fig, axarr = plt.subplots(2, 1, sharex=True, sharey=False, figsize=[
98 | 6.3, 4], constrained_layout=True)
99 |
100 | for idx, estimator in enumerate(penalties.keys()):
101 |
102 | axarr[0].semilogx(
103 | alphas / alphas[0], f1[estimator], label=name_estimators[estimator],
104 | c=colors[estimator])
105 |
106 | axarr[1].semilogx(
107 | alphas / alphas[0], prediction_error[estimator],
108 | label=name_estimators[estimator], c=colors[estimator])
109 |
110 | max_f1 = np.argmax(f1[estimator])
111 | axarr[0].vlines(
112 | x=alphas[max_f1] / alphas[0], ymin=0,
113 | ymax=np.max(f1[estimator]),
114 | color=colors[estimator], linestyle='--')
115 | line1 = axarr[0].plot(
116 | [alphas[max_f1] / alphas[0]], 0, clip_on=False,
117 | marker='X', color=colors[estimator], markersize=12)
118 |
119 | min_error = np.argmin(prediction_error[estimator])
120 |
121 | lims = axarr[1].get_ylim()
122 | axarr[1].vlines(
123 | x=alphas[min_error] / alphas[0], ymin=0,
124 | ymax=np.min(prediction_error[estimator]),
125 | color=colors[estimator], linestyle='--')
126 |
127 | line2 = axarr[1].plot(
128 | [alphas[min_error] / alphas[0]], 0, clip_on=False,
129 | marker='X', color=colors[estimator], markersize=12)
130 | axarr[1].set_xlabel(r"$\lambda / \lambda_{\mathrm{max}}$")
131 | axarr[0].set_ylabel("F1-score")
132 | axarr[0].set_ylim(ymin=0, ymax=1.0)
133 | axarr[1].set_ylim(ymin=0, ymax=lims[1])
134 | axarr[1].set_ylabel("pred. RMSE left-out")
135 | axarr[0].legend(
136 | bbox_to_anchor=(0, 1.02, 1, 0.2), loc="lower left",
137 | mode="expand", borderaxespad=0, ncol=5)
138 |
139 | plt.show(block=False)
140 |
--------------------------------------------------------------------------------
/examples/plot_ucurve.py:
--------------------------------------------------------------------------------
1 | """
2 | ==============================
3 | Show U-curve of regularization
4 | ==============================
5 | Illustrate the sweet spot of regularization: not too much, not too little.
6 | We showcase that for the Lasso estimator on the ``rcv1.binary`` dataset.
7 | """
8 |
9 | import numpy as np
10 | from numpy.linalg import norm
11 | import matplotlib.pyplot as plt
12 | from libsvmdata import fetch_libsvm
13 |
14 | from sklearn.model_selection import train_test_split
15 | from sklearn.metrics import mean_squared_error
16 |
17 | from skglm import Lasso
18 |
19 | # %%
20 | # First, we load the dataset and keep 2000 features.
21 | # We also retrain 2000 samples in training dataset.
22 | X, y = fetch_libsvm("rcv1.binary")
23 |
24 | X = X[:, :2000]
25 | X_train, X_test, y_train, y_test = train_test_split(X, y)
26 | X_train, y_train = X_train[:2000], y_train[:2000]
27 |
28 | # %%
29 | # Next, we define the regularization path.
30 | # For Lasso, it is well know that there is an ``alpha_max`` above which the optimal solution is the zero vector.
31 | alpha_max = norm(X_train.T @ y_train, ord=np.inf) / len(y_train)
32 | alphas = alpha_max * np.geomspace(1, 1e-4)
33 |
34 | # %%
35 | # Let's train the estimator along the regularization path and then compute the MSE on train and test data.
36 | mse_train = []
37 | mse_test = []
38 |
39 | clf = Lasso(fit_intercept=False, tol=1e-8, warm_start=True)
40 | for idx, alpha in enumerate(alphas):
41 | clf.alpha = alpha
42 | clf.fit(X_train, y_train)
43 |
44 | mse_train.append(mean_squared_error(y_train, clf.predict(X_train)))
45 | mse_test.append(mean_squared_error(y_test, clf.predict(X_test)))
46 |
47 | # %%
48 | # Finally, we can plot the train and test MSE.
49 | # Notice the "sweet spot" at around ``1e-4``, which sits at the boundary between underfitting and overfitting.
50 | plt.close('all')
51 | plt.semilogx(alphas, mse_train, label='train MSE')
52 | plt.semilogx(alphas, mse_test, label='test MSE')
53 | plt.legend()
54 | plt.title("Mean squared error")
55 | plt.xlabel(r"Lasso regularization strength $\lambda$")
56 | plt.show(block=False)
57 |
--------------------------------------------------------------------------------
/examples/plot_zero_weights_lasso.py:
--------------------------------------------------------------------------------
1 | """
2 | =====================================
3 | Weighted Lasso with some zero weights
4 | =====================================
5 |
6 | This example demonstrates how to use a weighted lasso with some vanishing
7 | weights. The fast solver is adapted to use primal Anderson acceleration,
8 | allowing it to not compute the dual and handle 0 weights.
9 | """
10 |
11 | import numpy as np
12 | import matplotlib.pyplot as plt
13 |
14 | from skglm import WeightedLasso
15 | from skglm.utils.data import make_correlated_data
16 |
17 | n_features = 100
18 | w_true = np.zeros(n_features)
19 | np.random.seed()
20 | w_true[np.random.choice(n_features, 10, replace=False)] = np.random.choice([-1, 1], 10)
21 | X, y, w_true = make_correlated_data(
22 | n_samples=100, n_features=n_features, random_state=0, w_true=w_true)
23 |
24 |
25 | weights = np.empty(n_features)
26 | # unpenalize the first 10 features:
27 | weights[:10] = 0
28 | # put large penalty on the 10-50 features
29 | weights[10:50] = 5
30 | # put small penalty on last 50 features
31 | weights[50:] = 1
32 |
33 | alpha_max = np.max(np.abs(X[:, weights != 0].T @ y / weights[weights != 0])) / len(y)
34 | clf = WeightedLasso(
35 | alpha=alpha_max/50, weights=weights, fit_intercept=False).fit(X, y)
36 |
37 |
38 | fig, axarr = plt.subplots(1, 3, sharey=True, figsize=(
39 | 10.5, 3.5), constrained_layout=True)
40 | axarr[0].stem(np.arange(10), clf.coef_[:10])
41 | axarr[0].set_title(r"unpenalized coefs: all $\neq 0$")
42 | axarr[1].stem(np.arange(10, 50), clf.coef_[10:50])
43 | axarr[1].set_title(r"heavily penalized coefs: few $\neq 0$")
44 | axarr[2].stem(np.arange(50, 100), clf.coef_[50:])
45 | axarr[2].set_title(r"lightly penalized coefs: many $\neq 0$")
46 |
47 | axarr[1].set_xlabel("feature index")
48 | plt.show(block=False)
49 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "skglm"
7 | description = "A fast and modular scikit-learn replacement for generalized linear models"
8 | authors = [
9 | {name = "Mathurin Massias", email = "mathurin.massias@gmail.com"},
10 | {name = "Badr Moufad", email = "badr.moufad@emines.um6p.ma"},
11 | {name = "Pierre-Antoine Bannier", email = "pierreantoine.bannier@gmail.com"},
12 | {name = "Quentin Bertrand", email = "quentin.bertrand@mila.quebec"},
13 | {name = "Quentin Klopfenstein", email = "quentin.klopfenstein@uni.lu"}
14 | ]
15 | license = {text = "BSD (3-Clause)"}
16 | readme = {file = "README.md", content-type = "text/markdown"}
17 | dependencies = [
18 | "numpy>=1.12",
19 | "numba",
20 | "scikit-learn>=1.6",
21 | "scipy>=0.18.0",
22 | ]
23 | dynamic = ["version"]
24 |
25 | requires-python = ">=3.9"
26 |
27 | classifiers = [
28 | "Programming Language :: Python :: 3 :: Only",
29 | "Programming Language :: Python :: 3.9",
30 | "Programming Language :: Python :: 3.10",
31 | "Programming Language :: Python :: 3.11",
32 | "Programming Language :: Python :: 3.12",
33 | "Programming Language :: Python :: 3.13",
34 | ]
35 |
36 | [tool.setuptools.dynamic]
37 | version = {attr = "skglm.__version__"}
38 |
39 |
40 | [project.urls]
41 | Homepage = "https://contrib.scikit-learn.org/skglm"
42 | Source = "https://github.com/scikit-learn-contrib/skglm.git"
43 |
44 |
45 | [project.optional-dependencies]
46 | test = [
47 | "pytest",
48 | "flake8",
49 | "coverage",
50 | "numpydoc",
51 | "celer",
52 | ]
53 |
54 | doc = [
55 | "benchopt",
56 | "libsvmdata>=0.2",
57 | "matplotlib>=2.0.0",
58 | "myst_parser",
59 | "numpydoc",
60 | "pillow",
61 | "sphinx-bootstrap-theme",
62 | "sphinx_copybutton",
63 | "sphinx-gallery",
64 | "sphinx-design",
65 | "pytest",
66 | "lifelines",
67 | "pydata_sphinx_theme",
68 | "sphinx-sitemap",
69 | "sphinxext-opengraph",
70 | ]
71 |
72 |
73 | [tool.setuptools]
74 | license-files = []
75 |
--------------------------------------------------------------------------------
/skglm/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.5dev'
2 |
3 | from skglm.estimators import ( # noqa F401
4 | Lasso, WeightedLasso, ElasticNet, MCPRegression, MultiTaskLasso, LinearSVC,
5 | SparseLogisticRegression, GeneralizedLinearEstimator, CoxEstimator, GroupLasso,
6 | )
7 |
--------------------------------------------------------------------------------
/skglm/datafits/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import BaseDatafit, BaseMultitaskDatafit
2 | from .single_task import (Quadratic, QuadraticSVC, Logistic, Huber, Poisson, Gamma,
3 | Cox, WeightedQuadratic, QuadraticHessian,)
4 | from .multi_task import QuadraticMultiTask
5 | from .group import QuadraticGroup, LogisticGroup
6 |
7 |
8 | __all__ = [
9 | BaseDatafit, BaseMultitaskDatafit,
10 | Quadratic, QuadraticSVC, Logistic, Huber, Poisson, Gamma, Cox,
11 | QuadraticMultiTask,
12 | QuadraticGroup, LogisticGroup, WeightedQuadratic,
13 | QuadraticHessian
14 | ]
15 |
--------------------------------------------------------------------------------
/skglm/datafits/base.py:
--------------------------------------------------------------------------------
1 |
2 | class BaseDatafit:
3 | """Base class for datafits."""
4 |
5 | def get_spec(self):
6 | """Specify the numba types of the class attributes.
7 |
8 | Returns
9 | -------
10 | spec: Tuple of (attribute_name, dtype)
11 | spec to be passed to Numba jitclass to compile the class.
12 | """
13 |
14 | def params_to_dict(self):
15 | """Get the parameters to initialize an instance of the class.
16 |
17 | Returns
18 | -------
19 | dict_of_params : dict
20 | The parameters to instantiate an object of the class.
21 | """
22 |
23 | def initialize(self, X, y):
24 | """Pre-computations before fitting on X and y.
25 |
26 | Parameters
27 | ----------
28 | X : array, shape (n_samples, n_features)
29 | Design matrix.
30 |
31 | y : array, shape (n_samples,)
32 | Target vector.
33 | """
34 |
35 | def initialize_sparse(self, X_data, X_indptr, X_indices, y):
36 | """Pre-computations before fitting on X and y when X is a sparse matrix.
37 |
38 | Parameters
39 | ----------
40 | X_data : array, shape (n_elements,)
41 | `data` attribute of the sparse CSC matrix X.
42 |
43 | X_indptr : array, shape (n_features + 1,)
44 | `indptr` attribute of the sparse CSC matrix X.
45 |
46 | X_indices : array, shape (n_elements,)
47 | `indices` attribute of the sparse CSC matrix X.
48 |
49 | y : array, shape (n_samples,)
50 | Target vector.
51 | """
52 |
53 | def value(self, y, w, Xw):
54 | """Value of datafit at vector w.
55 |
56 | Parameters
57 | ----------
58 | y : array_like, shape (n_samples,)
59 | Target vector.
60 |
61 | w : array_like, shape (n_features,)
62 | Coefficient vector.
63 |
64 | Xw: array_like, shape (n_samples,)
65 | Model fit.
66 |
67 | Returns
68 | -------
69 | value : float
70 | The datafit value at vector w.
71 | """
72 |
73 |
74 | class BaseMultitaskDatafit:
75 | """Base class for multitask datafits."""
76 |
77 | def get_spec(self):
78 | """Specify the numba types of the class attributes.
79 |
80 | Returns
81 | -------
82 | spec: Tuple of (attribute_name, dtype)
83 | spec to be passed to Numba jitclass to compile the class.
84 | """
85 |
86 | def params_to_dict(self):
87 | """Get the parameters to initialize an instance of the class.
88 |
89 | Returns
90 | -------
91 | dict_of_params : dict
92 | The parameters to instantiate an object of the class.
93 | """
94 |
95 | def initialize(self, X, Y):
96 | """Store useful values before fitting on X and Y.
97 |
98 | Parameters
99 | ----------
100 | X : array, shape (n_samples, n_features)
101 | Design matrix.
102 |
103 | Y : array, shape (n_samples, n_tasks)
104 | Multitask target.
105 | """
106 |
107 | def initialize_sparse(self, X_data, X_indptr, X_indices, Y):
108 | """Store useful values before fitting on X and Y, when X is sparse.
109 |
110 | Parameters
111 | ----------
112 | X_data : array-like
113 | `data` attribute of the sparse CSC matrix X.
114 |
115 | X_indptr : array-like
116 | `indptr` attribute of the sparse CSC matrix X.
117 |
118 | X_indices : array-like
119 | `indices` attribute of the sparse CSC matrix X.
120 |
121 | Y : array, shape (n_samples, n_tasks)
122 | Target matrix.
123 | """
124 |
125 | def value(self, Y, W, XW):
126 | """Value of datafit at matrix W.
127 |
128 | Parameters
129 | ----------
130 | Y : array_like, shape (n_samples, n_tasks)
131 | Target matrix.
132 |
133 | W : array_like, shape (n_features, n_tasks)
134 | Coefficient matrix.
135 |
136 | XW: array_like, shape (n_samples, n_tasks)
137 | Model fit.
138 |
139 | Returns
140 | -------
141 | value : float
142 | The datafit value evaluated at matrix W.
143 | """
144 |
--------------------------------------------------------------------------------
/skglm/datafits/group.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from numpy.linalg import norm
3 | from numba import int32, float64
4 |
5 | from skglm.datafits.base import BaseDatafit
6 | from skglm.datafits.single_task import Logistic
7 | from skglm.utils.sparse_ops import spectral_norm, sparse_columns_slice
8 |
9 |
10 | class QuadraticGroup(BaseDatafit):
11 | r"""Quadratic datafit used with group penalties.
12 |
13 | The datafit reads:
14 |
15 | .. math:: 1 / (2 xx n_"samples") ||y - Xw||_2 ^ 2
16 |
17 | Attributes
18 | ----------
19 | grp_indices : array, shape (n_features,)
20 | The group indices stacked contiguously
21 | ([grp1_indices, grp2_indices, ...]).
22 |
23 | grp_ptr : array, shape (n_groups + 1,)
24 | The group pointers such that two consecutive elements delimit
25 | the indices of a group in ``grp_indices``.
26 | """
27 |
28 | def __init__(self, grp_ptr, grp_indices):
29 | self.grp_ptr, self.grp_indices = grp_ptr, grp_indices
30 |
31 | def get_spec(self):
32 | spec = (
33 | ('grp_ptr', int32[:]),
34 | ('grp_indices', int32[:]),
35 | )
36 | return spec
37 |
38 | def params_to_dict(self):
39 | return dict(grp_ptr=self.grp_ptr,
40 | grp_indices=self.grp_indices)
41 |
42 | def get_lipschitz(self, X, y):
43 | grp_ptr, grp_indices = self.grp_ptr, self.grp_indices
44 | n_groups = len(grp_ptr) - 1
45 |
46 | lipschitz = np.zeros(n_groups)
47 | for g in range(n_groups):
48 | grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]]
49 | X_g = X[:, grp_g_indices]
50 | lipschitz[g] = norm(X_g, ord=2) ** 2 / len(y)
51 |
52 | return lipschitz
53 |
54 | def get_lipschitz_sparse(self, X_data, X_indptr, X_indices, y):
55 | grp_ptr, grp_indices = self.grp_ptr, self.grp_indices
56 | n_groups = len(grp_ptr) - 1
57 |
58 | lipschitz = np.zeros(n_groups, dtype=X_data.dtype)
59 | for g in range(n_groups):
60 | grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]]
61 | X_data_g, X_indptr_g, X_indices_g = sparse_columns_slice(
62 | grp_g_indices, X_data, X_indptr, X_indices)
63 | lipschitz[g] = spectral_norm(
64 | X_data_g, X_indptr_g, X_indices_g, len(y)) ** 2 / len(y)
65 |
66 | return lipschitz
67 |
68 | def value(self, y, w, Xw):
69 | return norm(y - Xw) ** 2 / (2 * len(y))
70 |
71 | def gradient_g(self, X, y, w, Xw, g):
72 | grp_ptr, grp_indices = self.grp_ptr, self.grp_indices
73 | grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]]
74 |
75 | grad_g = np.zeros(len(grp_g_indices))
76 | for idx, j in enumerate(grp_g_indices):
77 | grad_g[idx] = self.gradient_scalar(X, y, w, Xw, j)
78 |
79 | return grad_g
80 |
81 | def gradient_g_sparse(self, X_data, X_indptr, X_indices, y, w, Xw, g):
82 | grp_ptr, grp_indices = self.grp_ptr, self.grp_indices
83 | grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]]
84 |
85 | grad_g = np.zeros(len(grp_g_indices))
86 | for idx, j in enumerate(grp_g_indices):
87 | grad_g[idx] = self.gradient_scalar_sparse(
88 | X_data, X_indptr, X_indices, y, w, Xw, j)
89 |
90 | return grad_g
91 |
92 | def gradient_scalar_sparse(self, X_data, X_indptr, X_indices, y, w, Xw, j):
93 | grad_j = 0.
94 | for i in range(X_indptr[j], X_indptr[j+1]):
95 | grad_j += X_data[i] * (Xw[X_indices[i]] - y[X_indices[i]])
96 |
97 | return grad_j / len(y)
98 |
99 | def gradient_scalar(self, X, y, w, Xw, j):
100 | return X[:, j] @ (Xw - y) / len(y)
101 |
102 | def intercept_update_step(self, y, Xw):
103 | return np.mean(Xw - y)
104 |
105 |
106 | class LogisticGroup(Logistic):
107 | r"""Logistic datafit used with group penalties.
108 |
109 | The datafit reads:
110 |
111 | .. math:: 1 / n_"samples" sum_(i=1)^(n_"samples") log(1 + exp(-y_i (Xw)_i))
112 |
113 | Attributes
114 | ----------
115 | grp_indices : array, shape (n_features,)
116 | The group indices stacked contiguously
117 | ``[grp1_indices, grp2_indices, ...]``.
118 |
119 | grp_ptr : array, shape (n_groups + 1,)
120 | The group pointers such that two consecutive elements delimit
121 | the indices of a group in ``grp_indices``.
122 |
123 | lipschitz : array, shape (n_groups,)
124 | The lipschitz constants for each group.
125 | """
126 |
127 | def __init__(self, grp_ptr, grp_indices):
128 | self.grp_ptr, self.grp_indices = grp_ptr, grp_indices
129 |
130 | def get_spec(self):
131 | spec = (
132 | ('grp_ptr', int32[:]),
133 | ('grp_indices', int32[:]),
134 | ('lipschitz', float64[:])
135 | )
136 | return spec
137 |
138 | def params_to_dict(self):
139 | return dict(grp_ptr=self.grp_ptr,
140 | grp_indices=self.grp_indices)
141 |
142 | def initialize(self, X, y):
143 | grp_ptr, grp_indices = self.grp_ptr, self.grp_indices
144 | n_groups = len(grp_ptr) - 1
145 |
146 | lipschitz = np.zeros(n_groups)
147 | for g in range(n_groups):
148 | grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]]
149 | X_g = X[:, grp_g_indices]
150 | lipschitz[g] = norm(X_g, ord=2) ** 2 / (4 * len(y))
151 |
152 | self.lipschitz = lipschitz
153 |
154 | def gradient_g(self, X, y, w, Xw, g):
155 | grp_ptr, grp_indices = self.grp_ptr, self.grp_indices
156 | grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]]
157 | raw_grad_val = self.raw_grad(y, Xw)
158 |
159 | grad_g = np.zeros(len(grp_g_indices))
160 | for idx, j in enumerate(grp_g_indices):
161 | grad_g[idx] = X[:, j] @ raw_grad_val
162 |
163 | return grad_g
164 |
--------------------------------------------------------------------------------
/skglm/datafits/multi_task.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from numpy.linalg import norm
3 | from numba import float64
4 |
5 | from skglm.datafits.base import BaseMultitaskDatafit
6 |
7 |
8 | class QuadraticMultiTask(BaseMultitaskDatafit):
9 | """Quadratic datafit used for multi-task regression.
10 |
11 | The datafit reads:
12 |
13 | .. math: 1 / (2 xx n_"samples") ||Y - XW||_F ^ 2
14 |
15 | Attributes
16 | ----------
17 | XtY : array, shape (n_features, n_tasks)
18 | Pre-computed quantity used during the gradient evaluation.
19 | """
20 |
21 | def __init__(self):
22 | pass
23 |
24 | def get_spec(self):
25 | spec = (
26 | ('XtY', float64[:, :]),
27 | )
28 | return spec
29 |
30 | def params_to_dict(self):
31 | return dict()
32 |
33 | def get_lipschitz(self, X, Y):
34 | n_samples, n_features = X.shape
35 |
36 | lipschitz = np.zeros(n_features)
37 | for j in range(n_features):
38 | lipschitz[j] = norm(X[:, j]) ** 2 / n_samples
39 |
40 | return lipschitz
41 |
42 | def get_lipschitz_sparse(self, X_data, X_indptr, X_indices, Y):
43 | n_samples, n_tasks = Y.shape
44 | n_features = len(X_indptr) - 1
45 |
46 | lipschitz = np.zeros(n_features)
47 | for j in range(n_features):
48 | nrm2 = 0.
49 | for idx in range(X_indptr[j], X_indptr[j + 1]):
50 | nrm2 += X_data[idx] ** 2
51 |
52 | lipschitz[j] = nrm2 / n_samples
53 |
54 | return lipschitz
55 |
56 | def initialize(self, X, Y):
57 | """Compute optimization quantities before fitting on X and Y."""
58 | self.XtY = X.T @ Y
59 |
60 | def initialize_sparse(self, X_data, X_indptr, X_indices, Y):
61 | """Pre-computations before fitting on X and Y, when X is sparse."""
62 | _, n_tasks = Y.shape
63 | n_features = len(X_indptr) - 1
64 |
65 | self.XtY = np.zeros((n_features, n_tasks))
66 | for j in range(n_features):
67 | xtY = np.zeros(n_tasks)
68 | for idx in range(X_indptr[j], X_indptr[j + 1]):
69 | for t in range(n_tasks):
70 | xtY[t] += X_data[idx] * Y[X_indices[idx], t]
71 |
72 | self.XtY[j, :] = xtY
73 |
74 | def value(self, Y, W, XW):
75 | """Value of datafit at matrix W."""
76 | n_samples = Y.shape[0]
77 | return np.sum((Y - XW) ** 2) / (2 * n_samples)
78 |
79 | def gradient_j(self, X, Y, W, XW, j):
80 | """Gradient with respect to j-th coordinate of W."""
81 | n_samples = X.shape[0]
82 | return (X[:, j] @ XW - self.XtY[j, :]) / n_samples
83 |
84 | def gradient_j_sparse(self, X_data, X_indptr, X_indices, Y, XW, j):
85 | """Gradient with respect to j-th coordinate of W when X is sparse."""
86 | n_samples, n_tasks = Y.shape
87 | XjTXW = np.zeros(n_tasks)
88 | for t in range(n_tasks):
89 | for i in range(X_indptr[j], X_indptr[j+1]):
90 | XjTXW[t] += X_data[i] * XW[X_indices[i], t]
91 | return (XjTXW - self.XtY[j, :]) / n_samples
92 |
93 | def full_grad_sparse(self, X_data, X_indptr, X_indices, Y, XW):
94 | """Compute the full gradient when X is sparse."""
95 | n_features = X_indptr.shape[0] - 1
96 | n_samples, n_tasks = Y.shape
97 | grad = np.zeros((n_features, n_tasks))
98 | for j in range(n_features):
99 | XjTXW = np.zeros(n_tasks)
100 | for t in range(n_tasks):
101 | for i in range(X_indptr[j], X_indptr[j+1]):
102 | XjTXW[t] += X_data[i] * XW[X_indices[i], t]
103 | grad[j, :] = (XjTXW - self.XtY[j, :]) / n_samples
104 | return grad
105 |
106 | def intercept_update_step(self, Y, XW):
107 | return np.sum(XW - Y, axis=0) / len(Y)
108 |
--------------------------------------------------------------------------------
/skglm/experimental/__init__.py:
--------------------------------------------------------------------------------
1 | from .reweighted import IterativeReweightedL1
2 | from .sqrt_lasso import SqrtLasso, SqrtQuadratic
3 | from .pdcd_ws import PDCD_WS
4 | from .quantile_regression import Pinball
5 |
6 | __all__ = [
7 | IterativeReweightedL1,
8 | PDCD_WS,
9 | Pinball,
10 | SqrtQuadratic,
11 | SqrtLasso,
12 | ]
13 |
--------------------------------------------------------------------------------
/skglm/experimental/_plot_sqrt_lasso.py:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | from numpy.linalg import norm
4 | import matplotlib.pyplot as plt
5 | from skglm.utils.data import make_correlated_data
6 | from skglm.experimental.sqrt_lasso import SqrtLasso, _chambolle_pock_sqrt
7 |
8 | X, y, _ = make_correlated_data(n_samples=200, n_features=100, random_state=24)
9 |
10 | n_samples, n_features = X.shape
11 | alpha_max = norm(X.T @ y, ord=np.inf) / (norm(y) * np.sqrt(n_samples))
12 |
13 | alpha = alpha_max / 10
14 |
15 |
16 | max_iter = 1000
17 | obj_freq = 10
18 | w, _, objs = _chambolle_pock_sqrt(X, y, alpha, max_iter=max_iter, obj_freq=obj_freq)
19 |
20 |
21 | # no convergence issue if n_features < n_samples, can use ProxNewton
22 | # clf = SqrtLasso(alpha=alpha / np.sqrt(n_samples), verbose=2, tol=1e-10)
23 | clf = SqrtLasso(alpha=alpha, verbose=2, tol=1e-10)
24 | clf.fit(X, y)
25 |
26 | # consider that our solver has converged
27 | w_star = clf.coef_
28 | p_star = norm(X @ w_star - y) / np.sqrt(n_samples) + alpha * norm(w_star, ord=1)
29 |
30 | plt.close("all")
31 | plt.semilogy(np.arange(1, max_iter+1, obj_freq), np.array(objs) - p_star)
32 | plt.xlabel("CP iteration")
33 | plt.ylabel("$F(x) - F(x^*)$")
34 | plt.show(block=False)
35 |
--------------------------------------------------------------------------------
/skglm/experimental/pdcd_ws.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | import numpy as np
4 | from numpy.linalg import norm
5 | from scipy.sparse import issparse
6 |
7 | from numba import njit
8 | from skglm.solvers import BaseSolver
9 |
10 | from sklearn.exceptions import ConvergenceWarning
11 |
12 |
13 | class PDCD_WS(BaseSolver):
14 | r"""Primal-Dual Coordinate Descent solver with working sets.
15 |
16 | It solves
17 |
18 | .. math::
19 |
20 | \min_w F(Xw) + G(w)
21 |
22 |
23 | using a primal-dual method on the saddle point problem
24 |
25 | .. math::
26 |
27 | min_w max_z (:Xw, z:) + G(w) - F^**(z)
28 |
29 | where :math:`F` is the datafit term (:math:`F^**` its Fenchel conjugate)
30 | and :math:`G` is the penalty term.
31 |
32 | The datafit is required to be convex and proximable. Also, the penalty
33 | is required to be convex, separable, and proximable.
34 |
35 | The solver is an adaptation of algorithm [1]_ to working sets [2]_.
36 | The working sets are built using a fixed point distance strategy
37 | where each feature is assigned a score based how much its coefficient varies
38 | when performing a primal update
39 |
40 | .. math::
41 |
42 | "score"_j = abs(w_j - "prox"_(tau_j, G_j)(w_j - tau_j (:X_j, z:)))
43 |
44 | where :math:`tau_j` is the primal step associated with the j-th feature.
45 |
46 | Parameters
47 | ----------
48 | max_iter : int, optional
49 | The maximum number of iterations or equivalently the
50 | the maximum number of solved subproblems.
51 |
52 | max_epochs : int, optional
53 | Maximum number of primal CD epochs on each subproblem.
54 |
55 | dual_init : array, shape (n_samples,) default None
56 | The initialization of dual variables.
57 | If ``None``, they are initialized as the 0 vector ``np.zeros(n_samples)``.
58 |
59 | p0 : int, optional
60 | First working set size.
61 |
62 | tol : float, optional
63 | The tolerance for the optimization.
64 |
65 | verbose : bool or int, default False
66 | Amount of verbosity. 0/False is silent.
67 |
68 | References
69 | ----------
70 | .. [1] Olivier Fercoq and Pascal Bianchi,
71 | "A Coordinate-Descent Primal-Dual Algorithm with Large Step Size and Possibly
72 | Nonseparable Functions", SIAM Journal on Optimization, 2020,
73 | https://epubs.siam.org/doi/10.1137/18M1168480,
74 | code: https://github.com/Badr-MOUFAD/Fercoq-Bianchi-solver
75 |
76 | .. [2] Bertrand, Q. and Klopfenstein, Q. and Bannier, P.-A. and Gidel, G.
77 | and Massias, M.
78 | "Beyond L1: Faster and Better Sparse Models with skglm", NeurIPS, 2022
79 | https://arxiv.org/abs/2204.07826
80 | """
81 |
82 | _datafit_required_attr = ('prox_conjugate',)
83 | _penalty_required_attr = ("prox_1d",)
84 |
85 | def __init__(
86 | self, max_iter=1000, max_epochs=1000, dual_init=None, p0=100, tol=1e-6,
87 | fit_intercept=False, warm_start=True, verbose=False
88 | ):
89 | self.max_iter = max_iter
90 | self.max_epochs = max_epochs
91 | self.dual_init = dual_init
92 | self.p0 = p0
93 | self.tol = tol
94 | self.fit_intercept = fit_intercept # TODO not handled
95 | self.warm_start = warm_start # TODO not handled
96 | self.verbose = verbose
97 |
98 | def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
99 | n_samples, n_features = X.shape
100 |
101 | # init steps
102 | # Despite violating the conditions mentioned in [1]
103 | # this choice of steps yield in practice a convergent algorithm
104 | # with better speed of convergence
105 | dual_step = 1 / norm(X, ord=2)
106 | primal_steps = 1 / norm(X, axis=0, ord=2)
107 |
108 | # primal vars
109 | w = np.zeros(n_features) if w_init is None else w_init
110 | Xw = np.zeros(n_samples) if Xw_init is None else Xw_init
111 |
112 | # dual vars
113 | if self.dual_init is None:
114 | z = np.zeros(n_samples)
115 | z_bar = np.zeros(n_samples)
116 | else:
117 | z = self.dual_init.copy()
118 | z_bar = self.dual_init.copy()
119 |
120 | p_objs = []
121 | stop_crit = 0.
122 | all_features = np.arange(n_features)
123 |
124 | for iteration in range(self.max_iter):
125 |
126 | # check convergence using fixed-point criteria on both dual and primal
127 | opts_primal = _scores_primal(X, w, z, penalty, primal_steps, all_features)
128 | opt_dual = _score_dual(y, z, Xw, datafit, dual_step)
129 |
130 | stop_crit = max(max(opts_primal), opt_dual)
131 |
132 | if self.verbose:
133 | current_p_obj = datafit.value(y, w, Xw) + penalty.value(w)
134 | print(
135 | f"Iteration {iteration+1}: {current_p_obj:.10f}, "
136 | f"stopping crit: {stop_crit:.2e}")
137 |
138 | if stop_crit <= self.tol:
139 | break
140 |
141 | # build ws
142 | gsupp_size = (w != 0).sum()
143 | ws_size = max(min(self.p0, n_features),
144 | min(n_features, 2 * gsupp_size))
145 |
146 | # similar to np.argsort()[-ws_size:] but without full sort
147 | ws = np.argpartition(opts_primal, -ws_size)[-ws_size:]
148 |
149 | # solve sub problem
150 | # inplace update of w, Xw, z, z_bar
151 | PDCD_WS._solve_subproblem(
152 | y, X, w, Xw, z, z_bar, datafit, penalty,
153 | primal_steps, dual_step, ws, self.max_epochs, tol_in=0.3*stop_crit)
154 |
155 | current_p_obj = datafit.value(y, w, Xw) + penalty.value(w)
156 | p_objs.append(current_p_obj)
157 | else:
158 | warnings.warn(
159 | f"PDCD_WS did not converge for tol={self.tol:.3e} "
160 | f"and max_iter={self.max_iter}.\n"
161 | "Considering increasing `max_iter` or `tol`.",
162 | category=ConvergenceWarning
163 | )
164 |
165 | return w, np.asarray(p_objs), stop_crit
166 |
167 | @staticmethod
168 | @njit
169 | def _solve_subproblem(y, X, w, Xw, z, z_bar, datafit, penalty,
170 | primal_steps, dual_step, ws, max_epochs, tol_in):
171 | n_features = X.shape[1]
172 |
173 | for epoch in range(max_epochs):
174 |
175 | for j in ws:
176 | # update primal
177 | old_w_j = w[j]
178 | pseudo_grad = X[:, j] @ (2 * z_bar - z)
179 | w[j] = penalty.prox_1d(
180 | old_w_j - primal_steps[j] * pseudo_grad,
181 | primal_steps[j], j)
182 |
183 | # keep Xw syncr with X @ w
184 | delta_w_j = w[j] - old_w_j
185 | if delta_w_j:
186 | Xw += delta_w_j * X[:, j]
187 |
188 | # update dual
189 | z_bar[:] = datafit.prox_conjugate(z + dual_step * Xw,
190 | dual_step, y)
191 | z += (z_bar - z) / n_features
192 |
193 | # check convergence using fixed-point criteria on both dual and primal
194 | if epoch % 10 == 0:
195 | opts_primal_in = _scores_primal(X, w, z, penalty, primal_steps, ws)
196 | opt_dual_in = _score_dual(y, z, Xw, datafit, dual_step)
197 |
198 | stop_crit_in = max(max(opts_primal_in), opt_dual_in)
199 |
200 | if stop_crit_in <= tol_in:
201 | break
202 |
203 | def custom_checks(self, X, y, datafit, penalty):
204 | if issparse(X):
205 | raise ValueError(
206 | "Sparse matrices are not yet supported in `PDCD_WS` solver."
207 | )
208 |
209 |
210 | @njit
211 | def _scores_primal(X, w, z, penalty, primal_steps, ws):
212 | scores_ws = np.zeros(len(ws))
213 |
214 | for idx, j in enumerate(ws):
215 | next_w_j = penalty.prox_1d(w[j] - primal_steps[j] * X[:, j] @ z,
216 | primal_steps[j], j)
217 | scores_ws[idx] = abs(w[j] - next_w_j)
218 |
219 | return scores_ws
220 |
221 |
222 | @njit
223 | def _score_dual(y, z, Xw, datafit, dual_step):
224 | next_z = datafit.prox_conjugate(z + dual_step * Xw,
225 | dual_step, y)
226 | return norm(z - next_z, ord=np.inf)
227 |
--------------------------------------------------------------------------------
/skglm/experimental/quantile_regression.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from numba import float64
3 | from skglm.datafits import BaseDatafit
4 | from skglm.utils.prox_funcs import ST_vec
5 |
6 |
7 | class Pinball(BaseDatafit):
8 | r"""Pinball datafit.
9 |
10 | The datafit reads::
11 |
12 | sum_i quantile_level * max(y_i - Xw_i, 0) +
13 | (1 - quantile_level) * max(Xw_i - y_i, 0)
14 |
15 | with ``quantile_level`` in [0, 1].
16 |
17 | Parameters
18 | ----------
19 | quantile_level : float
20 | Quantile level must be in [0, 1]. When ``quantile_level=0.5``,
21 | the datafit becomes a Least Absolute Deviation (LAD) datafit.
22 | """
23 |
24 | def __init__(self, quantile_level):
25 | self.quantile_level = quantile_level
26 |
27 | def value(self, y, w, Xw):
28 | # implementation taken from
29 | # github.com/benchopt/benchmark_quantile_regression/blob/main/objective.py
30 | quantile_level = self.quantile_level
31 |
32 | residual = y - Xw
33 | sign = residual >= 0
34 |
35 | loss = (quantile_level * sign * residual -
36 | (1 - quantile_level) * (1 - sign) * residual)
37 | return np.sum(loss)
38 |
39 | def prox(self, w, step, y):
40 | """Prox of ``step * pinball``."""
41 | shift_cst = (self.quantile_level - 1/2) * step
42 | return y - ST_vec(y - w - shift_cst, step / 2)
43 |
44 | def prox_conjugate(self, z, step, y):
45 | """Prox of ``step * pinball^*``."""
46 | # using Moreau decomposition
47 | inv_step = 1 / step
48 | return z - step * self.prox(inv_step * z, inv_step, y)
49 |
50 | def subdiff_distance(self, Xw, z, y):
51 | """Distance of ``z`` to subdiff of pinball at ``Xw``."""
52 | # computation note: \partial ||y - . ||_1(Xw) = -\partial || . ||_1(y - Xw)
53 | y_minus_Xw = y - Xw
54 | shift_cst = self.quantile_level - 1/2
55 |
56 | max_distance = 0.
57 | for i in range(len(y)):
58 |
59 | if y_minus_Xw[i] == 0.:
60 | distance_i = max(0, abs(z[i] - shift_cst) - 1)
61 | else:
62 | distance_i = abs(z[i] + shift_cst + np.sign(y_minus_Xw[i]))
63 |
64 | max_distance = max(max_distance, distance_i)
65 |
66 | return max_distance
67 |
68 | def get_spec(self):
69 | spec = (
70 | ('quantile_level', float64),
71 | )
72 | return spec
73 |
74 | def params_to_dict(self):
75 | return dict(quantile_level=self.quantile_level)
76 |
--------------------------------------------------------------------------------
/skglm/experimental/reweighted.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from skglm.datafits import Quadratic
3 | from skglm.estimators import GeneralizedLinearEstimator
4 | from skglm.penalties import WeightedL1, L0_5
5 | from skglm.utils.jit_compilation import compiled_clone
6 |
7 |
8 | class IterativeReweightedL1(GeneralizedLinearEstimator):
9 | r"""Reweighted L1-norm estimator.
10 |
11 | This estimator solves a non-convex problems by iteratively solving
12 | convex surrogates involving weighted L1 norms.
13 |
14 | Parameters
15 | ----------
16 | datafit : instance of BaseDatafit, optional
17 | Datafit. If None, ``datafit`` is initialized as a :class:`.Quadratic` datafit.
18 | ``datafit`` is replaced by a JIT-compiled instance when calling fit.
19 |
20 | penalty : instance of BasePenalty, optional
21 | Penalty. If None, `penalty` is initialized as a :class:`.L0_5` penalty.
22 | `penalty` is replaced by a JIT-compiled instance when calling fit.
23 |
24 | solver : instance of BaseSolver, optional
25 | Solver. If None, ``solver`` is initialized as an :class:`.AndersonCD` solver.
26 |
27 | n_reweights : int, optional
28 | Number of reweighting performed (convex surrogates solved).
29 |
30 | Attributes
31 | ----------
32 | coef_ : array, shape (n_features,)
33 | Parameter vector (:math:`w` in the cost function formula).
34 |
35 | loss_history_ : list
36 | Objective history after every reweighting.
37 |
38 | References
39 | ----------
40 | .. [1] Candès et al. (2007), Enhancing sparsity by reweighted l1 minimization
41 | https://web.stanford.edu/~boyd/papers/pdf/rwl1.pdf
42 | """
43 |
44 | def __init__(self, datafit=Quadratic(), penalty=L0_5(1.), solver=None,
45 | n_reweights=5):
46 | super().__init__(datafit=datafit, penalty=penalty, solver=solver)
47 | self.n_reweights = n_reweights
48 |
49 | def fit(self, X, y):
50 | """Fit the model according to the given training data.
51 |
52 | Parameters
53 | ----------
54 | X : array-like, shape (n_samples, n_features)
55 | Training data, where n_samples is the number of samples and
56 | n_features is the number of features.
57 |
58 | y : array-like, shape (n_samples,)
59 | Target vector relative to X.
60 |
61 | Returns
62 | -------
63 | self :
64 | Fitted estimator.
65 | """
66 | if not hasattr(self.penalty, "derivative"):
67 | raise ValueError(
68 | "Missing `derivative` method. Reweighting is not implemented for " +
69 | f"penalty {self.penalty.__class__.__name__}")
70 |
71 | n_features = X.shape[1]
72 | # we need to compile this as it is not passed to solver.solve:
73 | self.penalty = compiled_clone(self.penalty)
74 | _penalty = WeightedL1(self.penalty.alpha, np.ones(n_features))
75 |
76 | self.loss_history_ = []
77 |
78 | for iter_reweight in range(self.n_reweights):
79 | coef_ = self.solver.solve(X, y, self.datafit, _penalty)[0]
80 | _penalty.weights = self.penalty.derivative(coef_)
81 |
82 | loss = (self.datafit.value(y, coef_, X @ coef_)
83 | + self.penalty.value(coef_))
84 | self.loss_history_.append(loss)
85 |
86 | if self.solver.verbose:
87 | print(f"Reweight {iter_reweight}/{self.n_reweights}, objective {loss}")
88 |
89 | self.coef_ = coef_
90 |
91 | return self
92 |
--------------------------------------------------------------------------------
/skglm/experimental/tests/test_quantile_regression.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 | from numpy.linalg import norm
4 |
5 | from skglm.penalties import L1
6 | from skglm import GeneralizedLinearEstimator
7 | from skglm.experimental.pdcd_ws import PDCD_WS
8 | from skglm.experimental.quantile_regression import Pinball
9 |
10 | from skglm.utils.data import make_correlated_data
11 | from sklearn.linear_model import QuantileRegressor
12 |
13 |
14 | @pytest.mark.parametrize('quantile_level', [0.3, 0.5, 0.7])
15 | def test_PDCD_WS(quantile_level):
16 | n_samples, n_features = 50, 10
17 | X, y, _ = make_correlated_data(n_samples, n_features, random_state=123)
18 |
19 | # optimality condition for w = 0.
20 | # for all g in subdiff pinball(y), g must be in subdiff ||.||_1(0)
21 | # hint: use max(x, 0) = (x + |x|) / 2 to get subdiff pinball
22 | alpha_max = norm(X.T @ (np.sign(y)/2 + (quantile_level - 0.5)), ord=np.inf)
23 | alpha = alpha_max / 5
24 |
25 | datafit = Pinball(quantile_level)
26 | penalty = L1(alpha)
27 |
28 | w = PDCD_WS(
29 | dual_init=np.sign(y)/2 + (quantile_level - 0.5)
30 | ).solve(X, y, datafit, penalty)[0]
31 |
32 | clf = QuantileRegressor(
33 | quantile=quantile_level,
34 | alpha=alpha/n_samples,
35 | fit_intercept=False,
36 | solver='highs',
37 | ).fit(X, y)
38 |
39 | np.testing.assert_allclose(w, clf.coef_, atol=1e-5)
40 | # test compatibility when inside GLM:
41 | estimator = GeneralizedLinearEstimator(
42 | datafit=Pinball(.2),
43 | penalty=L1(alpha=1.),
44 | solver=PDCD_WS(),
45 | )
46 | estimator.fit(X, y)
47 |
48 |
49 | if __name__ == '__main__':
50 | pass
51 |
--------------------------------------------------------------------------------
/skglm/experimental/tests/test_reweighted.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from numpy.linalg import norm
3 |
4 | from skglm.penalties.separable import L0_5
5 | from skglm.utils.data import make_correlated_data
6 | from skglm.experimental import IterativeReweightedL1
7 | from skglm.solvers import AndersonCD
8 |
9 |
10 | n_samples, n_features = 20, 50
11 | X, y, w_true = make_correlated_data(
12 | n_samples=n_samples, n_features=n_features, random_state=24)
13 |
14 | alpha_max = norm(X.T @ y, ord=np.inf) / n_samples
15 | alpha = alpha_max / 100
16 | tol = 1e-10
17 |
18 |
19 | def test_decreasing_loss():
20 | # reweighting can't increase the L0.5 objective
21 | iterative_l05 = IterativeReweightedL1(
22 | penalty=L0_5(alpha),
23 | solver=AndersonCD(tol=tol, fit_intercept=False)).fit(X, y)
24 | np.testing.assert_array_less(
25 | iterative_l05.loss_history_[-1], iterative_l05.loss_history_[0])
26 | diffs = np.diff(iterative_l05.loss_history_)
27 | np.testing.assert_array_less(diffs, 1e-5)
28 |
--------------------------------------------------------------------------------
/skglm/experimental/tests/test_sqrt_lasso.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 | from numpy.linalg import norm
4 |
5 | from skglm.penalties import L1
6 | from skglm.utils.data import make_correlated_data
7 | from skglm.experimental.sqrt_lasso import (SqrtLasso, SqrtQuadratic,
8 | _chambolle_pock_sqrt)
9 | from skglm.experimental.pdcd_ws import PDCD_WS
10 | from skglm import Lasso
11 |
12 |
13 | def test_alpha_max():
14 | n_samples, n_features = 50, 10
15 | X, y, _ = make_correlated_data(n_samples, n_features, random_state=0)
16 | alpha_max = norm(X.T @ y, ord=np.inf) / norm(y)
17 |
18 | sqrt_lasso = SqrtLasso(alpha=alpha_max).fit(X, y)
19 |
20 | if sqrt_lasso.fit_intercept:
21 | np.testing.assert_equal(sqrt_lasso.coef_[:-1], 0)
22 | else:
23 | np.testing.assert_equal(sqrt_lasso.coef_, 0)
24 |
25 |
26 | def test_vs_statsmodels():
27 | try:
28 | from statsmodels.regression import linear_model # noqa
29 | except ImportError:
30 | pytest.xfail("This test requires statsmodels to run.")
31 | n_samples, n_features = 50, 10
32 | X, y, _ = make_correlated_data(n_samples, n_features, random_state=0)
33 |
34 | alpha_max = norm(X.T @ y, ord=np.inf) / norm(y)
35 | n_alphas = 3
36 | alphas = alpha_max * np.geomspace(1, 1e-2, n_alphas+1)[1:]
37 |
38 | sqrt_lasso = SqrtLasso(tol=1e-9, fit_intercept=False)
39 | coefs_skglm = sqrt_lasso.path(X, y, alphas)[1]
40 |
41 | coefs_statsmodels = np.zeros((len(alphas), n_features))
42 |
43 | # fit statsmodels on path
44 | for i in range(n_alphas):
45 | alpha = alphas[i]
46 | # statsmodels solves: ||y - Xw||_2 + alpha * ||w||_1 / sqrt(n_samples)
47 | model = linear_model.OLS(y, X)
48 | model = model.fit_regularized(method='sqrt_lasso', L1_wt=1.,
49 | alpha=np.sqrt(n_samples) * alpha)
50 | coefs_statsmodels[i] = model.params
51 |
52 | np.testing.assert_almost_equal(coefs_skglm, coefs_statsmodels, decimal=4)
53 |
54 |
55 | def test_prox_newton_cp():
56 | n_samples, n_features = 50, 10
57 | X, y, _ = make_correlated_data(n_samples, n_features, random_state=0)
58 |
59 | alpha_max = norm(X.T @ y, ord=np.inf) / norm(y)
60 | alpha = alpha_max / 10
61 | clf = SqrtLasso(alpha=alpha, fit_intercept=False, tol=1e-12).fit(X, y)
62 | w, _, _ = _chambolle_pock_sqrt(X, y, alpha, max_iter=1000)
63 | np.testing.assert_allclose(clf.coef_, w)
64 |
65 |
66 | @pytest.mark.parametrize('with_dual_init', [True, False])
67 | def test_PDCD_WS(with_dual_init):
68 | n_samples, n_features = 50, 10
69 | X, y, _ = make_correlated_data(n_samples, n_features, random_state=0)
70 |
71 | alpha_max = norm(X.T @ y, ord=np.inf) / norm(y)
72 | alpha = alpha_max / 10
73 |
74 | dual_init = y / norm(y) if with_dual_init else None
75 |
76 | datafit = SqrtQuadratic()
77 | penalty = L1(alpha)
78 |
79 | w = PDCD_WS(dual_init=dual_init).solve(X, y, datafit, penalty)[0]
80 | clf = SqrtLasso(alpha=alpha, fit_intercept=False, tol=1e-12).fit(X, y)
81 | np.testing.assert_allclose(clf.coef_, w, atol=1e-6)
82 |
83 |
84 | @pytest.mark.parametrize("fit_intercept", [True, False])
85 | def test_lasso_sqrt_lasso_equivalence(fit_intercept):
86 | n_samples, n_features = 50, 10
87 | X, y, _ = make_correlated_data(n_samples, n_features, random_state=0)
88 |
89 | alpha_max = norm(X.T @ y, ord=np.inf) / norm(y)
90 | alpha = alpha_max / 10
91 |
92 | lasso = Lasso(alpha=alpha, fit_intercept=fit_intercept, tol=1e-8).fit(X, y)
93 |
94 | scal = n_samples / norm(y - lasso.predict(X))
95 | sqrt = SqrtLasso(
96 | alpha=alpha * scal, fit_intercept=fit_intercept, tol=1e-8).fit(X, y)
97 |
98 | np.testing.assert_allclose(sqrt.coef_, lasso.coef_, rtol=1e-6)
99 |
100 |
101 | if __name__ == '__main__':
102 | pass
103 |
--------------------------------------------------------------------------------
/skglm/penalties/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import BasePenalty
2 | from .separable import (
3 | L1_plus_L2, L0_5, L1, L2, L2_3, MCPenalty, WeightedMCPenalty, SCAD,
4 | WeightedL1, IndicatorBox, PositiveConstraint, LogSumPenalty
5 | )
6 | from .block_separable import (
7 | L2_05, L2_1, BlockMCPenalty, BlockSCAD, WeightedGroupL2, WeightedL1GroupL2
8 | )
9 |
10 | from .non_separable import SLOPE
11 |
12 |
13 | __all__ = [
14 | BasePenalty,
15 | L1_plus_L2, L0_5, L1, L2, L2_3, MCPenalty, WeightedMCPenalty, SCAD, WeightedL1,
16 | IndicatorBox, PositiveConstraint, L2_05, L2_1, BlockMCPenalty, BlockSCAD,
17 | WeightedGroupL2, WeightedL1GroupL2, SLOPE, LogSumPenalty
18 | ]
19 |
--------------------------------------------------------------------------------
/skglm/penalties/base.py:
--------------------------------------------------------------------------------
1 |
2 | class BasePenalty:
3 | """Base class for penalty subclasses."""
4 |
5 | def get_spec(self):
6 | """Specify the numba types of the class attributes.
7 |
8 | Returns
9 | -------
10 | spec: Tuple of (attribute_name, dtype)
11 | spec to be passed to Numba jitclass to compile the class.
12 | """
13 |
14 | def params_to_dict(self):
15 | """Get the parameters to initialize an instance of the class.
16 |
17 | Returns
18 | -------
19 | dict_of_params : dict
20 | The parameters to instantiate an object of the class.
21 | """
22 |
23 | def value(self, w):
24 | """Value of penalty at vector w."""
25 |
26 | def is_penalized(self, n_features):
27 | """Return a binary mask with the penalized features."""
28 |
29 | def generalized_support(self, w):
30 | """Return a mask which is True for coefficients in the generalized support."""
31 |
--------------------------------------------------------------------------------
/skglm/penalties/non_separable.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from numba import float64
3 |
4 | from skglm.penalties.base import BasePenalty
5 | from skglm.utils.prox_funcs import prox_SLOPE
6 |
7 |
8 | class SLOPE(BasePenalty):
9 | """Sorted L-One Penalized Estimation (SLOPE) penalty.
10 |
11 | Attributes
12 | ----------
13 | alphas : array, shape (n_features,)
14 | Contain regularization levels for every feature.
15 | When ``alphas`` contain a single unique value, ``SLOPE``
16 | is equivalent to the ``L1``penalty.
17 |
18 | References
19 | ----------
20 | .. [1] M. Bogdan, E. van den Berg, C. Sabatti, W. Su, E. Candes
21 | "SLOPE - Adaptive Variable Selection via Convex Optimization",
22 | The Annals of Applied Statistics 9 (3): 1103-40
23 | https://doi.org/10.1214/15-AOAS842
24 | """
25 |
26 | def __init__(self, alphas):
27 | self.alphas = alphas
28 |
29 | def get_spec(self):
30 | spec = (
31 | ('alphas', float64[:]),
32 | )
33 | return spec
34 |
35 | def params_to_dict(self):
36 | return dict(alphas=self.alphas)
37 |
38 | def value(self, w):
39 | """Compute the value of SLOPE at w."""
40 | return np.sum(np.sort(np.abs(w)) * self.alphas[::-1])
41 |
42 | def prox_vec(self, x, stepsize):
43 | alphas = self.alphas
44 | prox = np.zeros_like(x)
45 |
46 | abs_x = np.abs(x)
47 | sorted_indices = np.argsort(abs_x)[::-1]
48 | prox[sorted_indices] = prox_SLOPE(abs_x[sorted_indices], alphas * stepsize)
49 |
50 | return np.sign(x) * prox
51 |
--------------------------------------------------------------------------------
/skglm/solvers/__init__.py:
--------------------------------------------------------------------------------
1 | from .anderson_cd import AndersonCD
2 | from .base import BaseSolver
3 | from .fista import FISTA
4 | from .gram_cd import GramCD
5 | from .group_bcd import GroupBCD
6 | from .multitask_bcd import MultiTaskBCD
7 | from .prox_newton import ProxNewton
8 | from .group_prox_newton import GroupProxNewton
9 | from .lbfgs import LBFGS
10 |
11 |
12 | __all__ = [AndersonCD, BaseSolver, FISTA, GramCD, GroupBCD, MultiTaskBCD, ProxNewton,
13 | GroupProxNewton, LBFGS]
14 |
--------------------------------------------------------------------------------
/skglm/solvers/base.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | from abc import abstractmethod, ABC
3 |
4 | import numpy as np
5 |
6 | from skglm.utils.validation import check_attrs
7 | from skglm.utils.jit_compilation import compiled_clone
8 |
9 |
10 | class BaseSolver(ABC):
11 | """Base class for solvers.
12 |
13 | Attributes
14 | ----------
15 | _datafit_required_attr : list
16 | List of attributes that must be implemented in Datafit.
17 |
18 | _penalty_required_attr : list
19 | List of attributes that must be implemented in Penalty.
20 |
21 | Notes
22 | -----
23 | For required attributes, if an attribute is given as a list of attributes
24 | it means at least one of them should be implemented.
25 | For instance, if
26 |
27 | _datafit_required_attr = (
28 | "get_global_lipschitz",
29 | ("gradient", "gradient_scalar")
30 | )
31 |
32 | it mean datafit must implement the methods ``get_global_lipschitz``
33 | and (``gradient`` or ``gradient_scaler``).
34 | """
35 |
36 | _datafit_required_attr: list
37 | _penalty_required_attr: list
38 |
39 | @abstractmethod
40 | def _solve(self, X, y, datafit, penalty, w_init, Xw_init):
41 | """Solve an optimization problem.
42 |
43 | Parameters
44 | ----------
45 | X : array, shape (n_samples, n_features)
46 | Training data.
47 |
48 | y : array, shape (n_samples,)
49 | Target values.
50 |
51 | datafit : instance of Datafit class
52 | Datafitting term.
53 |
54 | penalty : instance of Penalty class
55 | Penalty used in the model.
56 |
57 | w_init : array, shape (n_features,)
58 | Coefficient vector.
59 |
60 | Xw_init : array, shape (n_samples,)
61 | Model fit.
62 |
63 | Returns
64 | -------
65 | coefs : array, shape (n_features + fit_intercept, n_alphas)
66 | Coefficients along the path.
67 |
68 | obj_out : array, shape (n_iter,)
69 | The objective values at every outer iteration.
70 |
71 | stop_crit : float
72 | Value of stopping criterion at convergence.
73 | """
74 |
75 | def custom_checks(self, X, y, datafit, penalty):
76 | """Ensure the solver is suited for the `datafit` + `penalty` problem.
77 |
78 | This method includes extra checks to perform
79 | aside from checking attributes compatibility.
80 |
81 | Parameters
82 | ----------
83 | X : array, shape (n_samples, n_features)
84 | Training data.
85 |
86 | y : array, shape (n_samples,)
87 | Target values.
88 |
89 | datafit : instance of BaseDatafit
90 | Datafit.
91 |
92 | penalty : instance of BasePenalty
93 | Penalty.
94 | """
95 | pass
96 |
97 | def solve(
98 | self, X, y, datafit, penalty, w_init=None, Xw_init=None, *, run_checks=True
99 | ):
100 | """Solve the optimization problem after validating its compatibility.
101 |
102 | A proxy of ``_solve`` method that implicitly ensures the compatibility
103 | of ``datafit`` and ``penalty`` with the solver.
104 |
105 | Examples
106 | --------
107 | >>> ...
108 | >>> coefs, obj_out, stop_crit = solver.solve(X, y, datafit, penalty)
109 | """
110 | # TODO check for datafit/penalty being jit-compiled properly
111 | # instead of searching for a string
112 | if "jitclass" in str(type(datafit)):
113 | warnings.warn(
114 | "Passing in a compiled datafit is deprecated since skglm v0.5 "
115 | "Compilation is now done inside solver."
116 | "This will raise an error starting skglm v0.6 onwards."
117 | )
118 | elif datafit is not None:
119 | datafit = compiled_clone(datafit, to_float32=X.dtype == np.float32)
120 |
121 | if "jitclass" in str(type(penalty)):
122 | warnings.warn(
123 | "Passing in a compiled penalty is deprecated since skglm v0.5 "
124 | "Compilation is now done inside solver. "
125 | "This will raise an error starting skglm v0.6 onwards."
126 | )
127 | elif penalty is not None:
128 | penalty = compiled_clone(penalty)
129 | # TODO add support for bool spec in compiled_clone
130 | # currently, doing so break the code
131 | # penalty = compiled_clone(penalty, to_float32=X.dtype == np.float32)
132 |
133 | if run_checks:
134 | self._validate(X, y, datafit, penalty)
135 |
136 | return self._solve(X, y, datafit, penalty, w_init, Xw_init)
137 |
138 | def _validate(self, X, y, datafit, penalty):
139 | # execute: `custom_checks` then check attributes
140 | self.custom_checks(X, y, datafit, penalty)
141 |
142 | # do not check for sparse support here, make the check at the solver level
143 | # some solvers like ProxNewton don't require methods for sparse support
144 | check_attrs(datafit, self, self._datafit_required_attr)
145 | check_attrs(penalty, self, self._penalty_required_attr)
146 |
--------------------------------------------------------------------------------
/skglm/solvers/common.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from numba import njit
3 | from numpy.linalg import norm
4 |
5 |
6 | @njit
7 | def dist_fix_point_cd(w, grad_ws, lipschitz_ws, datafit, penalty, ws):
8 | """Compute the violation of the fixed point iterate scheme for CD.
9 |
10 | Parameters
11 | ----------
12 | w : array, shape (n_features,)
13 | Coefficient vector.
14 |
15 | grad_ws : array, shape (ws_size,)
16 | Gradient restricted to the working set.
17 |
18 | lipschitz_ws : array, shape (len(ws),)
19 | Coordinatewise gradient Lipschitz constants, restricted to working set.
20 |
21 | datafit: instance of BaseDatafit
22 | Datafit.
23 |
24 | penalty: instance of BasePenalty
25 | Penalty.
26 |
27 | ws : array, shape (len(ws),)
28 | The working set.
29 |
30 | Returns
31 | -------
32 | dist : array, shape (n_features,)
33 | Violation score for every feature.
34 | """
35 | dist = np.zeros(ws.shape[0], dtype=w.dtype)
36 |
37 | for idx, j in enumerate(ws):
38 | if lipschitz_ws[idx] == 0.:
39 | continue
40 |
41 | step_j = 1 / lipschitz_ws[idx]
42 | dist[idx] = np.abs(
43 | w[j] - penalty.prox_1d(w[j] - step_j * grad_ws[idx], step_j, j)
44 | )
45 | return dist
46 |
47 |
48 | @njit
49 | def dist_fix_point_bcd(w, grad_ws, lipschitz_ws, datafit, penalty, ws):
50 | """Compute the violation of the fixed point iterate scheme for BCD.
51 |
52 | Parameters
53 | ----------
54 | w : array, shape (n_features,)
55 | Coefficient vector.
56 |
57 | grad_ws : array, shape (ws_size,)
58 | Gradient restricted to the working set.
59 |
60 | lipschitz_ws : array, shape (len(ws),)
61 | Coordinatewise gradient Lipschitz constants, restricted to working set.
62 |
63 | datafit: instance of BaseDatafit
64 | Datafit.
65 |
66 | penalty: instance of BasePenalty
67 | Penalty.
68 |
69 | ws : array, shape (len(ws),)
70 | The working set.
71 |
72 | Returns
73 | -------
74 | dist : array, shape (n_groups,)
75 | Violation score for every group.
76 |
77 | Note:
78 | ----
79 | ``grad_ws`` is a stacked array of gradients ``[grad_ws_1, grad_ws_2, ...]``.
80 | """
81 | n_groups = len(penalty.grp_ptr) - 1
82 | dist = np.zeros(n_groups, dtype=w.dtype)
83 |
84 | grad_ptr = 0
85 | for idx, g in enumerate(ws):
86 | if lipschitz_ws[idx] == 0.:
87 | continue
88 | grp_g_indices = penalty.grp_indices[penalty.grp_ptr[g]: penalty.grp_ptr[g+1]]
89 |
90 | grad_g = grad_ws[grad_ptr: grad_ptr + len(grp_g_indices)]
91 | grad_ptr += len(grp_g_indices)
92 |
93 | step_g = 1 / lipschitz_ws[idx]
94 | w_g = w[grp_g_indices]
95 | dist[idx] = norm(
96 | w_g - penalty.prox_1group(w_g - grad_g * step_g, step_g, g)
97 | )
98 | return dist
99 |
100 |
101 | @njit
102 | def construct_grad(X, y, w, Xw, datafit, ws):
103 | """Compute the gradient of the datafit restricted to the working set.
104 |
105 | Parameters
106 | ----------
107 | X : array, shape (n_samples, n_features)
108 | Design matrix.
109 |
110 | y : array, shape (n_samples,)
111 | Target vector.
112 |
113 | w : array, shape (n_features,)
114 | Coefficient vector.
115 |
116 | Xw : array, shape (n_samples, )
117 | Model fit.
118 |
119 | datafit : Datafit
120 | Datafit.
121 |
122 | ws : array, shape (ws_size,)
123 | The working set.
124 |
125 | Returns
126 | -------
127 | grad : array, shape (ws_size, n_tasks)
128 | The gradient restricted to the working set.
129 | """
130 | grad = np.zeros(ws.shape[0])
131 | for idx, j in enumerate(ws):
132 | grad[idx] = datafit.gradient_scalar(X, y, w, Xw, j)
133 | return grad
134 |
135 |
136 | @njit
137 | def construct_grad_sparse(data, indptr, indices, y, w, Xw, datafit, ws):
138 | """Compute the gradient of the datafit restricted to the working set.
139 |
140 | Parameters
141 | ----------
142 | data : array-like
143 | Data array of the matrix in CSC format.
144 |
145 | indptr : array-like
146 | CSC format index point array.
147 |
148 | indices : array-like
149 | CSC format index array.
150 |
151 | y : array, shape (n_samples, )
152 | Target matrix.
153 |
154 | w : array, shape (n_features,)
155 | Coefficient matrix.
156 |
157 | Xw : array, shape (n_samples, )
158 | Model fit.
159 |
160 | datafit : Datafit
161 | Datafit.
162 |
163 | ws : array, shape (ws_size,)
164 | The working set.
165 |
166 | Returns
167 | -------
168 | grad : array, shape (ws_size, n_tasks)
169 | The gradient restricted to the working set.
170 | """
171 | grad = np.zeros(ws.shape[0])
172 | for idx, j in enumerate(ws):
173 | grad[idx] = datafit.gradient_scalar_sparse(
174 | data, indptr, indices, y, Xw, j)
175 | return grad
176 |
--------------------------------------------------------------------------------
/skglm/solvers/fista.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from scipy.sparse import issparse
3 | from skglm.solvers.base import BaseSolver
4 | from skglm.solvers.common import construct_grad, construct_grad_sparse
5 | from skglm.utils.prox_funcs import _prox_vec
6 | from skglm.utils.validation import check_attrs
7 |
8 |
9 | class FISTA(BaseSolver):
10 | r"""ISTA solver with Nesterov acceleration (FISTA).
11 |
12 | Attributes
13 | ----------
14 | max_iter : int, default 100
15 | Maximum number of iterations.
16 |
17 | tol : float, default 1e-4
18 | Tolerance for convergence.
19 |
20 | verbose : bool, default False
21 | Amount of verbosity. 0/False is silent.
22 |
23 | References
24 | ----------
25 | .. [1] Beck, A. and Teboulle M.
26 | "A Fast Iterative Shrinkage-Thresholding Algorithm for Linear Inverse
27 | problems", 2009, SIAM J. Imaging Sci.
28 | https://epubs.siam.org/doi/10.1137/080716542
29 | """
30 |
31 | _datafit_required_attr = ("get_global_lipschitz", ("gradient", "gradient_scalar"))
32 | _penalty_required_attr = (("prox_1d", "prox_vec"),)
33 |
34 | def __init__(self, max_iter=100, tol=1e-4, opt_strategy="subdiff", verbose=0):
35 | self.max_iter = max_iter
36 | self.tol = tol
37 | self.verbose = verbose
38 | self.opt_strategy = opt_strategy
39 | self.fit_intercept = False # needed to be passed to GeneralizedLinearEstimator
40 | self.warm_start = False
41 |
42 | def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
43 | p_objs_out = []
44 | n_samples, n_features = X.shape
45 | all_features = np.arange(n_features)
46 | X_is_sparse = issparse(X)
47 | t_new = 1.
48 |
49 | w = w_init.copy() if w_init is not None else np.zeros(n_features)
50 | z = w_init.copy() if w_init is not None else np.zeros(n_features)
51 | Xw = Xw_init.copy() if Xw_init is not None else np.zeros(n_samples)
52 |
53 | if X_is_sparse:
54 | datafit.initialize_sparse(X.data, X.indptr, X.indices, y)
55 | lipschitz = datafit.get_global_lipschitz_sparse(
56 | X.data, X.indptr, X.indices, y
57 | )
58 | else:
59 | datafit.initialize(X, y)
60 | lipschitz = datafit.get_global_lipschitz(X, y)
61 |
62 | for n_iter in range(self.max_iter):
63 | t_old = t_new
64 | t_new = (1 + np.sqrt(1 + 4 * t_old ** 2)) / 2
65 | w_old = w.copy()
66 |
67 | if X_is_sparse:
68 | if hasattr(datafit, "gradient_sparse"):
69 | grad = datafit.gradient_sparse(
70 | X.data, X.indptr, X.indices, y, X @ z)
71 | else:
72 | grad = construct_grad_sparse(
73 | X.data, X.indptr, X.indices, y, z, X @ z, datafit, all_features)
74 | else:
75 | if hasattr(datafit, "gradient"):
76 | grad = datafit.gradient(X, y, X @ z)
77 | else:
78 | grad = construct_grad(X, y, z, X @ z, datafit, all_features)
79 |
80 | step = 1 / lipschitz
81 | z -= step * grad
82 | if hasattr(penalty, "prox_vec"):
83 | w = penalty.prox_vec(z, step)
84 | else:
85 | w = _prox_vec(w, z, penalty, step)
86 | Xw = X @ w
87 | z = w + (t_old - 1.) / t_new * (w - w_old)
88 |
89 | if self.opt_strategy == "subdiff":
90 | opt = penalty.subdiff_distance(w, grad, all_features)
91 | elif self.opt_strategy == "fixpoint":
92 | opt = np.abs(w - penalty.prox_vec(w - grad / lipschitz, 1 / lipschitz))
93 | else:
94 | raise ValueError(
95 | "Unknown error optimality strategy. Expected "
96 | f"`subdiff` or `fixpoint`. Got {self.opt_strategy}")
97 |
98 | stop_crit = np.max(opt)
99 |
100 | p_obj = datafit.value(y, w, Xw) + penalty.value(w)
101 | p_objs_out.append(p_obj)
102 | if self.verbose:
103 | print(
104 | f"Iteration {n_iter+1}: {p_obj:.10f}, "
105 | f"stopping crit: {stop_crit:.2e}"
106 | )
107 |
108 | if stop_crit < self.tol:
109 | if self.verbose:
110 | print(f"Stopping criterion max violation: {stop_crit:.2e}")
111 | break
112 | return w, np.array(p_objs_out), stop_crit
113 |
114 | def custom_checks(self, X, y, datafit, penalty):
115 | # check datafit support sparse data
116 | check_attrs(
117 | datafit, solver=self,
118 | required_attr=self._datafit_required_attr,
119 | support_sparse=issparse(X)
120 | )
121 |
122 | # optimality check
123 | if self.opt_strategy == "subdiff" and not hasattr(penalty, "subdiff_distance"):
124 | raise AttributeError(
125 | "Penalty must implement `subdiff_distance` "
126 | "to use `opt_strategy='subdiff'` in Fista solver."
127 | )
128 |
--------------------------------------------------------------------------------
/skglm/solvers/gram_cd.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | import numpy as np
3 | from numba import njit
4 | from scipy.sparse import issparse
5 |
6 | from skglm.solvers.base import BaseSolver
7 | from skglm.utils.anderson import AndersonAcceleration
8 |
9 |
10 | class GramCD(BaseSolver):
11 | r"""Coordinate descent solver keeping the gradients up-to-date with Gram updates.
12 |
13 | This solver should be used when ``n_features`` < ``n_samples``, and computes the
14 | (``n_features``, ``n_features``) Gram matrix which comes with an overhead. It is
15 | only suited to Quadratic datafits.
16 |
17 | It minimizes:
18 |
19 | .. math:: 1 / (2 xx n_"samples") ||y - Xw||^2 + "penalty"(w)
20 |
21 | which can be rewritten as:
22 |
23 | .. math:: 1 / (2 xx n_"samples") w^T Q w - 1 / n_"samples" q^T w + "penalty"(w)
24 |
25 | where:
26 |
27 | .. math:: Q = X^T X " (gram matrix), and " q = X^T y
28 |
29 | Attributes
30 | ----------
31 | max_iter : int, default 100
32 | Maximum number of iterations.
33 |
34 | w_init : array, shape (n_features,), default None
35 | Initial value of coefficients.
36 | If set to ``None``, a zero vector is used instead.
37 |
38 | use_acc : bool, default False
39 | Extrapolate the iterates based on the past 5 iterates if set to ``True``.
40 | Can only be used when ``greedy_cd`` is ``False``.
41 |
42 | greedy_cd : bool, default True
43 | Use a greedy strategy to select features to update in coordinate descent epochs
44 | if set to ``True``. A cyclic strategy is used otherwise.
45 |
46 | tol : float, default 1e-4
47 | Tolerance for convergence.
48 |
49 | verbose : bool, default False
50 | Amount of verbosity. 0/False is silent.
51 | """
52 |
53 | _datafit_required_attr = ()
54 | _penalty_required_attr = ("prox_1d", "subdiff_distance")
55 |
56 | def __init__(self, max_iter=100, use_acc=False, greedy_cd=True, tol=1e-4,
57 | fit_intercept=True, warm_start=False, verbose=0):
58 | self.max_iter = max_iter
59 | self.use_acc = use_acc
60 | self.greedy_cd = greedy_cd
61 | self.tol = tol
62 | self.fit_intercept = fit_intercept
63 | self.warm_start = warm_start
64 | self.verbose = verbose
65 |
66 | def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
67 | # we don't pass Xw_init as the solver uses Gram updates
68 | # to keep the gradient up-to-date instead of Xw
69 | n_samples, n_features = X.shape
70 |
71 | if issparse(X):
72 | scaled_gram = X.T.dot(X)
73 | scaled_gram = scaled_gram.toarray() / n_samples
74 | scaled_Xty = X.T.dot(y) / n_samples
75 | else:
76 | scaled_gram = X.T @ X / n_samples
77 | scaled_Xty = X.T @ y / n_samples
78 |
79 | # TODO potential improvement: allow to pass scaled_gram
80 | # (e.g. for path computation)
81 | scaled_y_norm2 = np.linalg.norm(y) ** 2 / (2 * n_samples)
82 |
83 | all_features = np.arange(n_features)
84 | stop_crit = np.inf # prevent ref before assign
85 | p_objs_out = []
86 |
87 | w = np.zeros(n_features) if w_init is None else w_init
88 | grad = - scaled_Xty if w_init is None else scaled_gram @ w_init - scaled_Xty
89 | opt = penalty.subdiff_distance(w, grad, all_features)
90 |
91 | if self.use_acc:
92 | if self.greedy_cd:
93 | warnings.warn(
94 | "Anderson acceleration does not work with greedy_cd, " +
95 | "set use_acc=False", UserWarning)
96 | accelerator = AndersonAcceleration(K=5)
97 | w_acc = np.zeros(n_features)
98 | grad_acc = np.zeros(n_features)
99 |
100 | for t in range(self.max_iter):
101 | # check convergences
102 | stop_crit = np.max(opt)
103 | if self.verbose:
104 | p_obj = (0.5 * w @ (scaled_gram @ w) - scaled_Xty @ w +
105 | scaled_y_norm2 + penalty.value(w))
106 | print(
107 | f"Iteration {t+1}: {p_obj:.10f}, "
108 | f"stopping crit: {stop_crit:.2e}"
109 | )
110 |
111 | if stop_crit <= self.tol:
112 | if self.verbose:
113 | print(f"Stopping criterion max violation: {stop_crit:.2e}")
114 | break
115 |
116 | # inplace update of w, grad
117 | opt = _gram_cd_epoch(scaled_gram, w, grad, penalty, self.greedy_cd)
118 |
119 | # perform Anderson extrapolation
120 | if self.use_acc:
121 | w_acc, grad_acc, is_extrapolated = accelerator.extrapolate(w, grad)
122 |
123 | if is_extrapolated:
124 | # omit constant term for comparison
125 | p_obj_acc = (0.5 * w_acc @ (scaled_gram @ w_acc) -
126 | scaled_Xty @ w_acc + penalty.value(w_acc))
127 | p_obj = (0.5 * w @ (scaled_gram @ w) - scaled_Xty @ w
128 | + penalty.value(w))
129 | if p_obj_acc < p_obj:
130 | w[:] = w_acc
131 | grad[:] = grad_acc
132 |
133 | # store p_obj
134 | p_obj = (0.5 * w @ (scaled_gram @ w) - scaled_Xty @ w + scaled_y_norm2 +
135 | penalty.value(w))
136 | p_objs_out.append(p_obj)
137 | return w, np.array(p_objs_out), stop_crit
138 |
139 | def custom_checks(self, X, y, datafit, penalty):
140 | if datafit is not None:
141 | raise AttributeError(
142 | "`GramCD` supports only `Quadratic` datafit and fits it implicitly, "
143 | f"argument `datafit` must be `None`, got {datafit.__class__.__name__}."
144 | )
145 |
146 |
147 | @njit
148 | def _gram_cd_epoch(scaled_gram, w, grad, penalty, greedy_cd):
149 | all_features = np.arange(len(w))
150 | for cd_iter in all_features:
151 | # select feature j
152 | if greedy_cd:
153 | opt = penalty.subdiff_distance(w, grad, all_features)
154 | j = np.argmax(opt)
155 | else: # cyclic
156 | j = cd_iter
157 |
158 | # update w_j
159 | old_w_j = w[j]
160 | step = 1 / scaled_gram[j, j] # 1 / lipschitz_j
161 | w[j] = penalty.prox_1d(old_w_j - step * grad[j], step, j)
162 |
163 | # gradient update with Gram matrix
164 | if w[j] != old_w_j:
165 | grad += (w[j] - old_w_j) * scaled_gram[:, j]
166 |
167 | return penalty.subdiff_distance(w, grad, all_features)
168 |
--------------------------------------------------------------------------------
/skglm/solvers/lbfgs.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | from sklearn.exceptions import ConvergenceWarning
3 |
4 | import numpy as np
5 | import scipy.optimize
6 | from numpy.linalg import norm
7 | from scipy.sparse import issparse
8 |
9 | from skglm.solvers import BaseSolver
10 | from skglm.utils.validation import check_attrs
11 |
12 |
13 | class LBFGS(BaseSolver):
14 | """A wrapper for scipy L-BFGS solver.
15 |
16 | Refer to `scipy L-BFGS-B `_ documentation for details.
18 |
19 | Parameters
20 | ----------
21 | max_iter : int, default 20
22 | Maximum number of iterations.
23 |
24 | tol : float, default 1e-4
25 | Tolerance for convergence.
26 |
27 | verbose : bool, default False
28 | Amount of verbosity. 0/False is silent.
29 | """
30 |
31 | _datafit_required_attr = ("gradient",)
32 | _penalty_required_attr = ("gradient",)
33 |
34 | def __init__(self, max_iter=50, tol=1e-4, verbose=False):
35 | self.max_iter = max_iter
36 | self.tol = tol
37 | self.verbose = verbose
38 |
39 | def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
40 |
41 | # TODO: to be isolated in a seperated method
42 | is_sparse = issparse(X)
43 | if is_sparse:
44 | datafit.initialize_sparse(X.data, X.indptr, X.indices, y)
45 | else:
46 | datafit.initialize(X, y)
47 |
48 | def objective(w):
49 | Xw = X @ w
50 | datafit_value = datafit.value(y, w, Xw)
51 | penalty_value = penalty.value(w)
52 |
53 | return datafit_value + penalty_value
54 |
55 | def d_jac(w):
56 | Xw = X @ w
57 | datafit_grad = datafit.gradient(X, y, Xw)
58 | penalty_grad = penalty.gradient(w)
59 |
60 | return datafit_grad + penalty_grad
61 |
62 | def s_jac(w):
63 | Xw = X @ w
64 | datafit_grad = datafit.gradient_sparse(X.data, X.indptr, X.indices, y, Xw)
65 | penalty_grad = penalty.gradient(w)
66 |
67 | return datafit_grad + penalty_grad
68 |
69 | def callback_post_iter(w_k):
70 | # save p_obj
71 | p_obj = objective(w_k)
72 | p_objs_out.append(p_obj)
73 |
74 | if self.verbose:
75 | grad = jac(w_k)
76 | stop_crit = norm(grad, ord=np.inf)
77 |
78 | it = len(p_objs_out)
79 | print(
80 | f"Iteration {it}: {p_obj:.10f}, " f"stopping crit: {stop_crit:.2e}"
81 | )
82 |
83 | n_features = X.shape[1]
84 | w = np.zeros(n_features) if w_init is None else w_init
85 | jac = s_jac if issparse(X) else d_jac
86 | p_objs_out = []
87 |
88 | result = scipy.optimize.minimize(
89 | fun=objective,
90 | jac=jac,
91 | x0=w,
92 | method="L-BFGS-B",
93 | options=dict(
94 | maxiter=self.max_iter,
95 | gtol=self.tol,
96 | ftol=0.0, # set ftol=0. to control convergence using only gtol
97 | ),
98 | callback=callback_post_iter,
99 | )
100 |
101 | if not result.success:
102 | warnings.warn(
103 | f"`LBFGS` did not converge for tol={self.tol:.3e} "
104 | f"and max_iter={self.max_iter}.\n"
105 | "Consider increasing `max_iter` and/or `tol`.",
106 | category=ConvergenceWarning,
107 | )
108 |
109 | w = result.x
110 | # scipy LBFGS uses || projected gradient ||_oo to check convergence, cf. `gtol`
111 | # in https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html
112 | stop_crit = norm(result.jac, ord=np.inf)
113 |
114 | return w, np.asarray(p_objs_out), stop_crit
115 |
116 | def custom_checks(self, X, y, datafit, penalty):
117 | # check datafit support sparse data
118 | check_attrs(
119 | datafit,
120 | solver=self,
121 | required_attr=self._datafit_required_attr,
122 | support_sparse=issparse(X),
123 | )
124 |
--------------------------------------------------------------------------------
/skglm/tests/test_docstring_parameters.py:
--------------------------------------------------------------------------------
1 | """
2 | Source:
3 | https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/tests/test_docstrings.py
4 | With a few minor tweaks (line 31 - 32)
5 | """
6 | from inspect import signature, getmembers, isclass
7 | import pkgutil
8 | import inspect
9 | import importlib
10 | from typing import Optional
11 |
12 | import pytest
13 | import skglm
14 |
15 | numpydoc_validation = pytest.importorskip("numpydoc.validate")
16 |
17 | FUNCTION_DOCSTRING_IGNORE_LIST = [
18 | "skglm.plot_utils.configure_plt",
19 | "skglm.utils.ST",
20 | "skglm.utils.ST_vec",
21 | "skglm.utils.BST",
22 | "skglm.utils.box_proj",
23 | "skglm.utils.BST_vec",
24 | "skglm.utils.prox_05",
25 | "skglm.utils.prox_block_2_05",
26 | "skglm.utils.prox_2_3"
27 | ]
28 | FUNCTION_DOCSTRING_IGNORE_LIST = set(FUNCTION_DOCSTRING_IGNORE_LIST)
29 |
30 |
31 | def get_all_methods():
32 | estimators = getmembers(skglm.estimators, isclass)
33 | estimators = [estimator for estimator in estimators if estimator[1].__module__ ==
34 | "skglm.estimators"]
35 | for name, Estimator in estimators:
36 | if name.startswith("_"):
37 | # skip private classes
38 | continue
39 | methods = []
40 | for name in dir(Estimator):
41 | if name.startswith("_"):
42 | continue
43 | method_obj = getattr(Estimator, name)
44 | if hasattr(method_obj, "__call__") or isinstance(method_obj, property):
45 | methods.append(name)
46 | methods.append(None)
47 |
48 | for method in sorted(methods, key=lambda x: str(x)):
49 | yield Estimator, method
50 |
51 |
52 | def _is_checked_function(item):
53 | if not inspect.isfunction(item):
54 | return False
55 |
56 | if item.__name__.startswith("_"):
57 | return False
58 |
59 | mod = item.__module__
60 | if not mod.startswith("skglm.") or mod.endswith("estimator_checks"):
61 | return False
62 |
63 | return True
64 |
65 |
66 | def get_all_functions_names():
67 | """Get all public functions define in the skglm module"""
68 | modules_to_ignore = {
69 | "tests",
70 | "profile",
71 | "expes",
72 | "data",
73 | }
74 |
75 | all_functions_names = set()
76 | for module_finder, module_name, ispkg in pkgutil.walk_packages(
77 | path=skglm.__path__, prefix="skglm."
78 | ):
79 | module_parts = module_name.split(".")
80 | if (
81 | any(part in modules_to_ignore for part in module_parts)
82 | or "._" in module_name
83 | ):
84 | continue
85 |
86 | module = importlib.import_module(module_name)
87 | functions = inspect.getmembers(module, _is_checked_function)
88 | for name, func in functions:
89 | full_name = f"{func.__module__}.{func.__name__}"
90 | all_functions_names.add(full_name)
91 |
92 | return sorted(all_functions_names)
93 |
94 |
95 | def filter_errors(errors, method, Estimator=None):
96 | """
97 | Ignore some errors based on the method type.
98 | These rules are specific for scikit-learn."""
99 | for code, message in errors:
100 | # We ignore following error code,
101 | # - RT02: The first line of the Returns section
102 | # should contain only the type, ..
103 | # (as we may need refer to the name of the returned
104 | # object)
105 | # - GL01: Docstring text (summary) should start in the line
106 | # immediately after the opening quotes (not in the same line,
107 | # or leaving a blank line in between)
108 | # - GL02: If there's a blank line, it should be before the
109 | # first line of the Returns section, not after (it allows to have
110 | # short docstrings for properties).
111 |
112 | if code in ["RT02", "GL01", "GL02"]:
113 | continue
114 |
115 | # skglm specific: we ignore:
116 | # - SA01: See Also section not found.
117 | # - EX01: No examples section found.
118 | if code in ['SA01', 'EX01']:
119 | continue
120 |
121 | # Ignore PR02: Unknown parameters for properties. We sometimes use
122 | # properties for ducktyping, i.e. SGDClassifier.predict_proba
123 | if code == "PR02" and Estimator is not None and method is not None:
124 | method_obj = getattr(Estimator, method)
125 | if isinstance(method_obj, property):
126 | continue
127 |
128 | # Following codes are only taken into account for the
129 | # top level class docstrings:
130 | # - ES01: No extended summary found
131 | # - SA01: See Also section not found
132 | # - EX01: No examples section found
133 |
134 | if method is not None and code in ["EX01", "SA01", "ES01"]:
135 | continue
136 | yield code, message
137 |
138 |
139 | def repr_errors(res, estimator=None, method: Optional[str] = None) -> str:
140 | """Pretty print original docstring and the obtained errors
141 | Parameters
142 | ----------
143 | res : dict
144 | result of numpydoc.validate.validate
145 | estimator : {estimator, None}
146 | estimator object or None
147 | method : str
148 | if estimator is not None, either the method name or None.
149 | Returns
150 | -------
151 | str
152 | String representation of the error.
153 | """
154 | if method is None:
155 | if hasattr(estimator, "__init__"):
156 | method = "__init__"
157 | elif estimator is None:
158 | raise ValueError("At least one of estimator, method should be provided")
159 | else:
160 | raise NotImplementedError
161 |
162 | if estimator is not None:
163 | obj = getattr(estimator, method)
164 | try:
165 | obj_signature = str(signature(obj))
166 | except TypeError:
167 | # In particular we can't parse the signature of properties
168 | obj_signature = (
169 | "\nParsing of the method signature failed, "
170 | "possibly because this is a property."
171 | )
172 |
173 | obj_name = estimator.__name__ + "." + method
174 | else:
175 | obj_signature = ""
176 | obj_name = method
177 |
178 | msg = "\n\n" + "\n\n".join(
179 | [
180 | str(res["file"]),
181 | obj_name + obj_signature,
182 | res["docstring"],
183 | "# Errors",
184 | "\n".join(
185 | " - {}: {}".format(code, message) for code, message in res["errors"]
186 | ),
187 | ]
188 | )
189 | return msg
190 |
191 |
192 | @pytest.mark.parametrize("function_name", get_all_functions_names())
193 | def test_function_docstring(function_name, request):
194 | """Check function docstrings using numpydoc."""
195 | if function_name in FUNCTION_DOCSTRING_IGNORE_LIST:
196 | request.applymarker(
197 | pytest.mark.xfail(run=False, reason="TODO pass numpydoc validation")
198 | )
199 |
200 | res = numpydoc_validation.validate(function_name)
201 |
202 | res["errors"] = list(filter_errors(res["errors"], method="function"))
203 |
204 | if res["errors"]:
205 | msg = repr_errors(res, method=f"Tested function: {function_name}")
206 |
207 | raise ValueError(msg)
208 |
209 |
210 | @pytest.mark.parametrize("Estimator, method", get_all_methods())
211 | def test_docstring(Estimator, method, request):
212 | base_import_path = Estimator.__module__
213 | import_path = [base_import_path, Estimator.__name__]
214 | if method is not None:
215 | import_path.append(method)
216 |
217 | import_path = ".".join(import_path)
218 |
219 | res = numpydoc_validation.validate(import_path)
220 |
221 | res["errors"] = list(filter_errors(res["errors"], method, Estimator=Estimator))
222 |
223 | if res["errors"]:
224 | msg = repr_errors(res, Estimator, method)
225 |
226 | raise ValueError(msg)
227 |
228 |
229 | if __name__ == "__main__":
230 | pass
231 |
--------------------------------------------------------------------------------
/skglm/tests/test_fista.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import numpy as np
4 | from numpy.linalg import norm
5 |
6 | from scipy.sparse import csc_matrix
7 |
8 | from skglm.penalties import L1
9 | from skglm.solvers import FISTA, AndersonCD
10 | from skglm.datafits import Quadratic, Logistic
11 |
12 | from skglm.utils.data import make_correlated_data
13 |
14 |
15 | random_state = 113
16 | n_samples, n_features = 50, 60
17 |
18 | rng = np.random.RandomState(random_state)
19 | X, y, _ = make_correlated_data(n_samples, n_features, random_state=rng)
20 | rng.seed(random_state)
21 | X_sparse = csc_matrix(X * np.random.binomial(1, 0.5, X.shape))
22 | y_classif = np.sign(y)
23 |
24 | alpha_max = norm(X.T @ y, ord=np.inf) / len(y)
25 | alpha = alpha_max / 10
26 |
27 | tol = 1e-10
28 |
29 |
30 | @pytest.mark.parametrize("X", [X, X_sparse])
31 | @pytest.mark.parametrize("Datafit, Penalty", [
32 | (Quadratic, L1),
33 | (Logistic, L1),
34 | # (QuadraticSVC, IndicatorBox),
35 | ])
36 | def test_fista_solver(X, Datafit, Penalty):
37 | _y = y if isinstance(Datafit, Quadratic) else y_classif
38 | datafit = Datafit()
39 | penalty = Penalty(alpha)
40 |
41 | solver = FISTA(max_iter=1000, tol=tol)
42 | w_fista = solver.solve(X, _y, datafit, penalty)[0]
43 |
44 | solver_cd = AndersonCD(tol=tol, fit_intercept=False)
45 | w_cd = solver_cd.solve(X, _y, datafit, penalty)[0]
46 |
47 | np.testing.assert_allclose(w_fista, w_cd, atol=1e-7)
48 |
49 |
50 | if __name__ == '__main__':
51 | pass
52 |
--------------------------------------------------------------------------------
/skglm/tests/test_gram_solver.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from itertools import product
3 |
4 | import numpy as np
5 | from numpy.linalg import norm
6 | from sklearn.linear_model import Lasso
7 |
8 | from skglm.penalties import L1
9 | from skglm.solvers import GramCD
10 |
11 | from skglm.utils.data import make_correlated_data
12 |
13 |
14 | @pytest.mark.parametrize("rho, X_density, greedy_cd",
15 | product([1e-1, 1e-3], [1., 0.8], [True, False]))
16 | def test_vs_lasso_sklearn(rho, X_density, greedy_cd):
17 | X, y, _ = make_correlated_data(
18 | n_samples=18, n_features=8, random_state=0, X_density=X_density)
19 | alpha_max = norm(X.T @ y, ord=np.inf) / len(y)
20 | alpha = rho * alpha_max
21 |
22 | sk_lasso = Lasso(alpha, fit_intercept=False, tol=1e-9)
23 | sk_lasso.fit(X, y)
24 |
25 | l1_penalty = L1(alpha)
26 | w = GramCD(tol=1e-9, max_iter=1000, greedy_cd=greedy_cd).solve(
27 | X, y, None, l1_penalty)[0]
28 | np.testing.assert_allclose(w, sk_lasso.coef_.flatten(), rtol=1e-7, atol=1e-7)
29 |
30 |
31 | if __name__ == '__main__':
32 | pass
33 |
--------------------------------------------------------------------------------
/skglm/tests/test_lbfgs_solver.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 | import pandas as pd
4 |
5 | from skglm.penalties import L2
6 | from skglm.solvers import LBFGS
7 | from skglm.datafits import Logistic, Cox
8 |
9 | from sklearn.linear_model import LogisticRegression
10 |
11 | from skglm.utils.data import make_correlated_data, make_dummy_survival_data
12 |
13 |
14 | @pytest.mark.parametrize("X_sparse", [True, False])
15 | def test_lbfgs_L2_logreg(X_sparse):
16 | reg = 1.0
17 | X_density = 1.0 if not X_sparse else 0.5
18 | n_samples, n_features = 100, 50
19 |
20 | X, y, _ = make_correlated_data(
21 | n_samples,
22 | n_features,
23 | random_state=0,
24 | X_density=X_density,
25 | )
26 | y = np.sign(y)
27 |
28 | # fit L-BFGS
29 | datafit = Logistic()
30 | penalty = L2(reg)
31 | w, *_ = LBFGS(tol=1e-12).solve(X, y, datafit, penalty)
32 |
33 | # fit scikit learn
34 | estimator = LogisticRegression(
35 | penalty="l2",
36 | C=1 / (n_samples * reg),
37 | fit_intercept=False,
38 | tol=1e-12,
39 | ).fit(X, y)
40 |
41 | np.testing.assert_allclose(w, estimator.coef_.flatten(), atol=1e-5)
42 |
43 |
44 | @pytest.mark.parametrize("use_efron", [True, False])
45 | def test_L2_Cox(use_efron):
46 | try:
47 | from lifelines import CoxPHFitter
48 | except ModuleNotFoundError:
49 | pytest.xfail(
50 | "Testing L2 Cox Estimator requires `lifelines` packages\n"
51 | "Run `pip install lifelines`"
52 | )
53 |
54 | alpha = 10.0
55 | n_samples, n_features = 100, 50
56 |
57 | X, y = make_dummy_survival_data(
58 | n_samples, n_features, normalize=True, with_ties=use_efron, random_state=0
59 | )
60 |
61 | datafit = Cox(use_efron)
62 | penalty = L2(alpha)
63 |
64 | # XXX: intialize is needed here although it is done in LBFGS
65 | # is used to evaluate the objective
66 | datafit.initialize(X, y)
67 | w, *_ = LBFGS().solve(X, y, datafit, penalty)
68 |
69 | # fit lifeline estimator
70 | stacked_y_X = np.hstack((y, X))
71 | df = pd.DataFrame(stacked_y_X)
72 |
73 | estimator = CoxPHFitter(penalizer=alpha, l1_ratio=0.0).fit(
74 | df, duration_col=0, event_col=1
75 | )
76 | w_ll = estimator.params_.values
77 |
78 | p_obj_skglm = datafit.value(y, w, X @ w) + penalty.value(w)
79 | p_obj_ll = datafit.value(y, w_ll, X @ w_ll) + penalty.value(w_ll)
80 |
81 | # despite increasing tol in lifelines, solutions are quite far apart
82 | # suspecting lifelines https://github.com/CamDavidsonPilon/lifelines/pull/1534
83 | # as our solution gives the lowest objective value
84 | np.testing.assert_allclose(w, w_ll, rtol=1e-1)
85 | np.testing.assert_allclose(p_obj_skglm, p_obj_ll, rtol=1e-6)
86 |
87 |
88 | if __name__ == "__main__":
89 | pass
90 |
--------------------------------------------------------------------------------
/skglm/tests/test_penalties.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from numpy.linalg import norm
5 | from numpy.testing import assert_array_less
6 |
7 | from sklearn.linear_model import LinearRegression
8 |
9 | from skglm.datafits import Quadratic, QuadraticMultiTask
10 | from skglm.penalties import (
11 | L1, L1_plus_L2, WeightedL1, MCPenalty, SCAD, IndicatorBox, L0_5, L2_3, SLOPE,
12 | LogSumPenalty, PositiveConstraint, L2_1, L2_05, BlockMCPenalty, BlockSCAD)
13 | from skglm import GeneralizedLinearEstimator, Lasso
14 | from skglm.solvers import AndersonCD, MultiTaskBCD, FISTA
15 | from skglm.utils.data import make_correlated_data
16 |
17 | from skglm.utils.prox_funcs import prox_log_sum, _log_sum_prox_val
18 |
19 |
20 | n_samples = 20
21 | n_features = 10
22 | n_tasks = 10
23 | X, Y, _ = make_correlated_data(
24 | n_samples=n_samples, n_features=n_features, n_tasks=n_tasks, density=0.5,
25 | random_state=0)
26 | y = Y[:, 0]
27 |
28 | n_samples, n_features = X.shape
29 | alpha_max = norm(X.T @ y, ord=np.inf) / n_samples
30 | alpha = alpha_max / 1000
31 |
32 | tol = 1e-10
33 |
34 | penalties = [
35 | L1(alpha=alpha),
36 | L1_plus_L2(alpha=alpha, l1_ratio=0.5),
37 | WeightedL1(alpha=1, weights=np.arange(n_features)),
38 | MCPenalty(alpha=alpha, gamma=4),
39 | SCAD(alpha=alpha, gamma=4),
40 | IndicatorBox(alpha=alpha),
41 | L0_5(alpha),
42 | L2_3(alpha),
43 | LogSumPenalty(alpha=alpha, eps=1e-2)
44 | ]
45 |
46 | block_penalties = [
47 | L2_1(alpha=alpha), L2_05(alpha=alpha),
48 | BlockMCPenalty(alpha=alpha, gamma=4),
49 | BlockSCAD(alpha=alpha, gamma=4)
50 | ]
51 |
52 |
53 | @pytest.mark.parametrize('penalty', penalties)
54 | def test_subdiff_diff(penalty):
55 | # tol=1e-14 is too low when coefs are of order 1. square roots are computed in
56 | # some penalties and precision is lost
57 | est = GeneralizedLinearEstimator(
58 | datafit=Quadratic(),
59 | penalty=penalty,
60 | solver=AndersonCD(tol=tol)
61 | ).fit(X, y)
62 | # assert the stopping criterion is satisfied
63 | assert_array_less(est.stop_crit_, tol)
64 |
65 |
66 | @pytest.mark.parametrize('block_penalty', block_penalties)
67 | def test_subdiff_diff_block(block_penalty):
68 | est = GeneralizedLinearEstimator(
69 | datafit=QuadraticMultiTask(),
70 | penalty=block_penalty,
71 | solver=MultiTaskBCD(tol=tol)
72 | ).fit(X, Y)
73 | # assert the stopping criterion is satisfied
74 | assert_array_less(est.stop_crit_, est.solver.tol)
75 |
76 |
77 | def test_slope_lasso():
78 | # check that when alphas = [alpha, ..., alpha], SLOPE and L1 solutions are equal
79 | alphas = np.full(n_features, alpha)
80 | est = GeneralizedLinearEstimator(
81 | penalty=SLOPE(alphas),
82 | solver=FISTA(max_iter=1000, tol=tol, opt_strategy="fixpoint"),
83 | ).fit(X, y)
84 | lasso = Lasso(alpha, fit_intercept=False, tol=tol).fit(X, y)
85 | np.testing.assert_allclose(est.coef_, lasso.coef_, rtol=1e-5)
86 |
87 |
88 | def test_slope():
89 | # compare solutions with `sortedl1`: https://github.com/jolars/sortedl1
90 | try:
91 | from sortedl1 import Slope as SlopeEst # noqa
92 | except ImportError:
93 | pytest.xfail(
94 | "This test requires slope to run.\n"
95 | "https://github.com/jolars/sortedl1")
96 |
97 | # q = 0.1
98 | # alphas = lambda_sequence(
99 | # X, y, fit_intercept=False, reg=alpha / alpha_max, q=q)
100 | clf = SlopeEst(
101 | alpha=0.01, fit_intercept=False, tol=1e-6
102 | ).fit(X, y)
103 | alphas = clf.lambda_
104 | ours = GeneralizedLinearEstimator(
105 | penalty=SLOPE(clf.alpha * alphas),
106 | solver=FISTA(max_iter=1000, tol=tol, opt_strategy="fixpoint"),
107 | ).fit(X, y)
108 | np.testing.assert_allclose(ours.coef_, np.squeeze(clf.coef_), rtol=1e-3)
109 |
110 |
111 | @pytest.mark.parametrize("fit_intercept", [True, False])
112 | def test_nnls(fit_intercept):
113 | # compare solutions with sklearn's LinearRegression, note that n_samples >=
114 | # n_features for the design matrix to be injective, hence the solution unique
115 | clf = GeneralizedLinearEstimator(
116 | datafit=Quadratic(),
117 | penalty=PositiveConstraint(),
118 | solver=AndersonCD(tol=tol, fit_intercept=fit_intercept),
119 | ).fit(X, y)
120 | reg_nnls = LinearRegression(positive=True, fit_intercept=fit_intercept).fit(X, y)
121 |
122 | np.testing.assert_allclose(clf.coef_, reg_nnls.coef_)
123 | np.testing.assert_allclose(clf.intercept_, reg_nnls.intercept_)
124 |
125 |
126 | def test_logsum_prox():
127 | alpha = 1.
128 |
129 | grid_z = np.linspace(-2, 2, num=10)
130 | grid_test = np.linspace(-5, 5, num=100)
131 | grid_eps = np.linspace(0, 5, num=10 + 1)[1:]
132 |
133 | for z, eps in zip(grid_z, grid_eps):
134 | prox = prox_log_sum(z, alpha, eps)
135 | obj_at_prox = _log_sum_prox_val(prox, z, alpha, eps)
136 |
137 | is_lowest = all(
138 | obj_at_prox <= _log_sum_prox_val(x, z, alpha, eps) for x in grid_test
139 | )
140 |
141 | np.testing.assert_equal(is_lowest, True)
142 |
143 |
144 | if __name__ == "__main__":
145 | pass
146 |
--------------------------------------------------------------------------------
/skglm/tests/test_prox_newton.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 | from sklearn.linear_model import LogisticRegression
4 |
5 | from skglm.penalties import L1
6 | from skglm.datafits import Logistic
7 | from skglm.solvers.prox_newton import ProxNewton
8 |
9 | from skglm.utils.data import make_correlated_data
10 |
11 |
12 | @pytest.mark.parametrize("X_density", [1., 0.5])
13 | @pytest.mark.parametrize("fit_intercept", [True, False])
14 | @pytest.mark.parametrize("ws_strategy", ["subdiff", "fixpoint"])
15 | def test_pn_vs_sklearn(X_density, fit_intercept, ws_strategy):
16 | n_samples, n_features = 12, 25
17 | rho = 1e-1
18 |
19 | X, y, _ = make_correlated_data(n_samples, n_features, random_state=0,
20 | X_density=X_density)
21 | y = np.sign(y)
22 |
23 | alpha_max = np.linalg.norm(X.T @ y, ord=np.inf) / (2 * n_samples)
24 | alpha = rho * alpha_max
25 |
26 | sk_log_reg = LogisticRegression(penalty='l1', C=1/(n_samples * alpha),
27 | fit_intercept=fit_intercept, random_state=0,
28 | tol=1e-12, solver='saga', max_iter=1_000_000)
29 | sk_log_reg.fit(X, y)
30 |
31 | log_datafit = Logistic()
32 | l1_penalty = L1(alpha)
33 | prox_solver = ProxNewton(
34 | fit_intercept=fit_intercept, tol=1e-12, ws_strategy=ws_strategy)
35 | w = prox_solver.solve(X, y, log_datafit, l1_penalty)[0]
36 |
37 | np.testing.assert_allclose(w[:n_features], sk_log_reg.coef_.flatten())
38 | if fit_intercept:
39 | np.testing.assert_allclose(w[-1], sk_log_reg.intercept_)
40 |
41 |
42 | if __name__ == '__main__':
43 | pass
44 |
--------------------------------------------------------------------------------
/skglm/tests/test_sparse_ops.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import scipy.sparse
3 |
4 | from skglm.utils.sparse_ops import spectral_norm, sparse_columns_slice
5 |
6 |
7 | def test_spectral_norm():
8 | n_samples, n_features = 50, 60
9 | A_sparse = scipy.sparse.random(
10 | n_samples, n_features, density=0.7, format='csc', random_state=37)
11 |
12 | A_bundles = (A_sparse.data, A_sparse.indptr, A_sparse.indices)
13 | spectral_norm_our = spectral_norm(*A_bundles, n_samples=A_sparse.shape[0])
14 | spectral_norm_sp = scipy.sparse.linalg.svds(A_sparse, k=1)[1]
15 |
16 | np.testing.assert_allclose(spectral_norm_our, spectral_norm_sp)
17 |
18 |
19 | def test_slice_cols_sparse():
20 | n_samples, n_features = 20, 50
21 | rng = np.random.RandomState(546)
22 |
23 | M = scipy.sparse.random(
24 | n_samples, n_features, density=0.9, format="csc", random_state=rng)
25 | cols = rng.choice(n_features, size=n_features // 10, replace=False)
26 |
27 | sub_M_data, sub_M_indptr, sub_M_indices = sparse_columns_slice(
28 | cols, M.data, M.indptr, M.indices)
29 | sub_M = scipy.sparse.csc_matrix(
30 | (sub_M_data, sub_M_indices, sub_M_indptr), shape=(n_samples, len(cols)))
31 |
32 | np.testing.assert_array_equal(sub_M.toarray(), M.toarray()[:, cols])
33 |
34 |
35 | if __name__ == "__main__":
36 | pass
37 |
--------------------------------------------------------------------------------
/skglm/tests/test_validation.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 | from scipy import sparse
4 |
5 | from skglm.penalties import L1, WeightedL1GroupL2, WeightedGroupL2
6 | from skglm.datafits import Poisson, Huber, QuadraticGroup, LogisticGroup
7 | from skglm.solvers import FISTA, ProxNewton, GroupBCD, GramCD, GroupProxNewton
8 |
9 | from skglm.utils.data import grp_converter
10 | from skglm.utils.data import make_correlated_data
11 |
12 |
13 | def test_datafit_penalty_solver_compatibility():
14 | grp_size, n_features = 3, 9
15 | n_samples = 10
16 | X, y, _ = make_correlated_data(n_samples, n_features)
17 | X_sparse = sparse.csc_array(X)
18 |
19 | n_groups = n_features // grp_size
20 | weights_groups = np.ones(n_groups)
21 | weights_features = np.ones(n_features)
22 | grp_indices, grp_ptr = grp_converter(grp_size, n_features)
23 |
24 | # basic compatibility checks
25 | with pytest.raises(
26 | AttributeError, match="Missing `raw_grad` and `raw_hessian`"
27 | ):
28 | ProxNewton()._validate(
29 | X, y, Huber(1.), L1(1.)
30 | )
31 | with pytest.raises(
32 | AttributeError, match="Missing `get_global_lipschitz`"
33 | ):
34 | FISTA()._validate(
35 | X, y, Poisson(), L1(1.)
36 | )
37 | with pytest.raises(
38 | AttributeError, match="Missing `get_global_lipschitz`"
39 | ):
40 | FISTA()._validate(
41 | X, y, Poisson(), L1(1.)
42 | )
43 | # check Gram Solver
44 | with pytest.raises(
45 | AttributeError, match="`GramCD` supports only `Quadratic` datafit"
46 | ):
47 | GramCD()._validate(
48 | X, y, Poisson(), L1(1.)
49 | )
50 | # check working set strategy subdiff
51 | with pytest.raises(
52 | AttributeError, match="Penalty must implement `subdiff_distance`"
53 | ):
54 | GroupBCD()._validate(
55 | X, y,
56 | datafit=QuadraticGroup(grp_ptr, grp_indices),
57 | penalty=WeightedL1GroupL2(
58 | 1., weights_groups, weights_features, grp_ptr, grp_indices)
59 | )
60 | # checks for sparsity
61 | with pytest.raises(
62 | ValueError,
63 | match="Sparse matrices are not yet supported in `GroupProxNewton` solver."
64 | ):
65 | GroupProxNewton()._validate(
66 | X_sparse, y,
67 | datafit=QuadraticGroup(grp_ptr, grp_indices),
68 | penalty=WeightedL1GroupL2(
69 | 1., weights_groups, weights_features, grp_ptr, grp_indices)
70 | )
71 | with pytest.raises(
72 | AttributeError,
73 | match="LogisticGroup is not compatible with solver GroupBCD with sparse data."
74 | ):
75 | GroupBCD()._validate(
76 | X_sparse, y,
77 | datafit=LogisticGroup(grp_ptr, grp_indices),
78 | penalty=WeightedGroupL2(1., weights_groups, grp_ptr, grp_indices)
79 | )
80 |
81 |
82 | if __name__ == "__main__":
83 | pass
84 |
--------------------------------------------------------------------------------
/skglm/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/skglm/d57e3614cf9246b319192101f51959b7377c4514/skglm/utils/__init__.py
--------------------------------------------------------------------------------
/skglm/utils/anderson.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class AndersonAcceleration:
5 | """Abstraction of Anderson Acceleration.
6 |
7 | Extrapolate the asymptotic VAR ``w`` and ``Xw``
8 | based on ``K`` previous iterations.
9 |
10 | Parameters
11 | ----------
12 | K : int
13 | Number of previous iterates to consider for extrapolation.
14 | """
15 |
16 | def __init__(self, K):
17 | self.K, self.current_iter = K, 0
18 | self.arr_w_, self.arr_Xw_ = None, None
19 |
20 | def extrapolate(self, w, Xw):
21 | """Return w, Xw, and a bool indicating whether they were extrapolated."""
22 | if self.arr_w_ is None or self.arr_Xw_ is None:
23 | self.arr_w_ = np.zeros((w.shape[0], self.K+1))
24 | self.arr_Xw_ = np.zeros((Xw.shape[0], self.K+1))
25 |
26 | if self.current_iter <= self.K:
27 | self.arr_w_[:, self.current_iter] = w
28 | self.arr_Xw_[:, self.current_iter] = Xw
29 | self.current_iter += 1
30 | return w, Xw, False
31 |
32 | U = np.diff(self.arr_w_, axis=1) # compute residuals
33 |
34 | # compute extrapolation coefs
35 | try:
36 | inv_UTU_ones = np.linalg.solve(U.T @ U, np.ones(self.K))
37 | except np.linalg.LinAlgError:
38 | return w, Xw, False
39 | finally:
40 | self.current_iter = 0
41 |
42 | # extrapolate
43 | C = inv_UTU_ones / np.sum(inv_UTU_ones)
44 | # floating point errors may cause w and Xw to disagree
45 | return self.arr_w_[:, 1:] @ C, self.arr_Xw_[:, 1:] @ C, True
46 |
--------------------------------------------------------------------------------
/skglm/utils/jit_compilation.py:
--------------------------------------------------------------------------------
1 | from functools import lru_cache
2 |
3 | import numba
4 | from numba import float32, float64
5 | from numba.experimental import jitclass
6 |
7 |
8 | def spec_to_float32(spec):
9 | """Convert a numba specification to an equivalent float32 one.
10 |
11 | Parameters
12 | ----------
13 | spec : list
14 | A list of (name, dtype) for every attribute of a jitclass.
15 |
16 | Returns
17 | -------
18 | spec32 : list
19 | A list of (name, dtype) for every attribute of a jitclass, where float64
20 | have been replaced by float32.
21 | """
22 | spec32 = []
23 | for name, dtype in spec:
24 | if dtype == float64:
25 | dtype32 = float32
26 | elif isinstance(dtype, numba.core.types.npytypes.Array):
27 | if dtype.dtype == float64:
28 | dtype32 = dtype.copy(dtype=float32)
29 | else:
30 | dtype32 = dtype
31 | else:
32 | # raise ValueError(f"Unknown spec type {dtype}")
33 | # bool types and others are not affected:
34 | dtype32 = dtype
35 | spec32.append((name, dtype32))
36 | return spec32
37 |
38 |
39 | @lru_cache()
40 | def jit_cached_compile(klass, spec, to_float32=False):
41 | """Jit compile class and cache compilation.
42 |
43 | Parameters
44 | ----------
45 | klass : class
46 | Un instantiated Datafit or Penalty.
47 |
48 | spec : tuple
49 | A tuple of (name, dtype) for every attribute of a jitclass.
50 |
51 | to_float32 : bool, optional
52 | If ``True``converts float64 types to float32, by default False.
53 |
54 | Returns
55 | -------
56 | Instance of Datafit or penalty
57 | Return a jitclass.
58 | """
59 | if to_float32:
60 | spec = spec_to_float32(spec)
61 |
62 | return jitclass(spec)(klass)
63 |
64 |
65 | def compiled_clone(instance, to_float32=False):
66 | """Compile instance to a jitclass.
67 |
68 | Parameters
69 | ----------
70 | instance : Instance of Datafit or Penalty
71 | Datafit or Penalty object.
72 |
73 | to_float32 : bool, optional
74 | If ``True``converts float64 types to float32, by default False.
75 |
76 | Returns
77 | -------
78 | Instance of Datafit or penalty
79 | Return a jitclass.
80 | """
81 | return jit_cached_compile(
82 | instance.__class__,
83 | instance.get_spec(),
84 | to_float32,
85 | )(**instance.params_to_dict())
86 |
--------------------------------------------------------------------------------
/skglm/utils/sparse_ops.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from numpy.linalg import norm
3 | from numba import njit
4 |
5 |
6 | @njit
7 | def spectral_norm(X_data, X_indptr, X_indices, n_samples,
8 | max_iter=100, tol=1e-6):
9 | """Compute the spectral norm of sparse matrix ``X`` with power method.
10 |
11 | Parameters
12 | ----------
13 | X_data : array, shape (n_elements,)
14 | ``data`` attribute of the sparse CSC matrix ``X``.
15 |
16 | X_indptr : array, shape (n_features + 1,)
17 | ``indptr`` attribute of the sparse CSC matrix ``X``.
18 |
19 | X_indices : array, shape (n_elements,)
20 | ``indices`` attribute of the sparse CSC matrix ``X``.
21 |
22 | n_samples : int
23 | number of rows of ``X``.
24 |
25 | max_iter : int, default 20
26 | Maximum number of power method iterations.
27 |
28 | tol : float, default 1e-6
29 | Tolerance for convergence.
30 |
31 | Returns
32 | -------
33 | eigenvalue : float
34 | The largest singular value of ``X``.
35 |
36 | References
37 | ----------
38 | .. [1] Alfio Quarteroni, Riccardo Sacco, Fausto Saleri "Numerical Mathematics",
39 | chapter 5, page 192-195.
40 | """
41 | # init vec with norm(vec) == 1.
42 | eigenvector = np.random.randn(n_samples)
43 | eigenvector /= norm(eigenvector)
44 | eigenvalue = 1.
45 |
46 | for _ in range(max_iter):
47 | vec = _XXT_dot_vec(X_data, X_indptr, X_indices, eigenvector, n_samples)
48 | norm_vec = norm(vec)
49 | eigenvalue = vec @ eigenvector
50 |
51 | # norm(X @ X.T @ eigenvector - eigenvalue * eigenvector) <= tol
52 | # inequality (5.25) in ref [1] is squared
53 | if norm_vec ** 2 - eigenvalue ** 2 <= tol ** 2:
54 | break
55 |
56 | eigenvector = vec / norm_vec
57 |
58 | return np.sqrt(eigenvalue)
59 |
60 |
61 | @njit
62 | def sparse_columns_slice(cols, X_data, X_indptr, X_indices):
63 | """Select a sub matrix from CSC sparse matrix.
64 |
65 | Similar to ``X[:, cols]`` but for ``X`` a CSC sparse matrix.
66 |
67 | Parameters
68 | ----------
69 | cols : array of int
70 | Columns to select in matrix ``X``.
71 |
72 | X_data : array, shape (n_elements,)
73 | ``data`` attribute of the sparse CSC matrix ``X``.
74 |
75 | X_indptr : array, shape (n_features + 1,)
76 | ``indptr`` attribute of the sparse CSC matrix ``X``.
77 |
78 | X_indices : array, shape (n_elements,)
79 | ``indices`` attribute of the sparse CSC matrix ``X``.
80 |
81 | Returns
82 | -------
83 | sub_X_data, sub_X_indptr, sub_X_indices
84 | The ``data``, ``indptr``, and ``indices`` attributes of the sub matrix.
85 | """
86 | nnz = sum([X_indptr[j+1] - X_indptr[j] for j in cols])
87 |
88 | sub_X_indptr = np.zeros(len(cols) + 1, dtype=cols.dtype)
89 | sub_X_indices = np.zeros(nnz, dtype=X_indices.dtype)
90 | sub_X_data = np.zeros(nnz, dtype=X_data.dtype)
91 |
92 | for idx, j in enumerate(cols):
93 | n_elements = X_indptr[j+1] - X_indptr[j]
94 | sub_X_indptr[idx + 1] = sub_X_indptr[idx] + n_elements
95 |
96 | col_j_slice = slice(X_indptr[j], X_indptr[j+1])
97 | col_idx_slice = slice(sub_X_indptr[idx], sub_X_indptr[idx+1])
98 |
99 | sub_X_indices[col_idx_slice] = X_indices[col_j_slice]
100 | sub_X_data[col_idx_slice] = X_data[col_j_slice]
101 |
102 | return sub_X_data, sub_X_indptr, sub_X_indices
103 |
104 |
105 | @njit
106 | def _XXT_dot_vec(X_data, X_indptr, X_indices, vec, n_samples):
107 | # computes X @ X.T @ vec, with X csc encoded
108 | return _X_dot_vec(X_data, X_indptr, X_indices,
109 | _XT_dot_vec(X_data, X_indptr, X_indices, vec), n_samples)
110 |
111 |
112 | @njit
113 | def _X_dot_vec(X_data, X_indptr, X_indices, vec, n_samples):
114 | # compute X @ vec, with X csc encoded
115 | result = np.zeros(n_samples)
116 |
117 | # loop over features
118 | for j in range(len(X_indptr) - 1):
119 | if vec[j] == 0:
120 | continue
121 |
122 | col_j_rows_idx = slice(X_indptr[j], X_indptr[j+1])
123 | result[X_indices[col_j_rows_idx]] += vec[j] * X_data[col_j_rows_idx]
124 |
125 | return result
126 |
127 |
128 | @njit
129 | def _XT_dot_vec(X_data, X_indptr, X_indices, vec):
130 | # compute X.T @ vec, with X csc encoded
131 | n_features = len(X_indptr) - 1
132 | result = np.zeros(n_features)
133 |
134 | for j in range(n_features):
135 | for idx in range(X_indptr[j], X_indptr[j+1]):
136 | result[j] += X_data[idx] * vec[X_indices[idx]]
137 |
138 | return result
139 |
140 |
141 | @njit(fastmath=True)
142 | def _sparse_xj_dot(X_data, X_indptr, X_indices, j, other):
143 | # Compute X[:, j] @ other in case X sparse
144 | res = 0.
145 | for i in range(X_indptr[j], X_indptr[j+1]):
146 | res += X_data[i] * other[X_indices[i]]
147 | return res
148 |
--------------------------------------------------------------------------------
/skglm/utils/validation.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 |
4 | SPARSE_SUFFIX = "_sparse"
5 |
6 |
7 | def check_group_compatible(obj):
8 | """Check whether ``obj`` is compatible with ``bcd_solver``.
9 |
10 | Parameters
11 | ----------
12 | obj : instance of BaseDatafit or BasePenalty
13 | Object to check.
14 |
15 | Raises
16 | ------
17 | ValueError
18 | if the ``obj`` doesn't have a ``grp_ptr`` and ``grp_indices``
19 | attributes.
20 | """
21 | obj_name = obj.__class__.__name__
22 | group_attrs = ('grp_ptr', 'grp_indices')
23 |
24 | for attr in group_attrs:
25 | if not hasattr(obj, attr):
26 | raise ValueError(
27 | f"datafit and penalty must be compatible with 'bcd_solver'.\n"
28 | f"'{obj_name}' is not block-separable. "
29 | f"Missing '{attr}' attribute."
30 | )
31 |
32 |
33 | def check_attrs(obj, solver, required_attr, support_sparse=False):
34 | """Check whether datafit or penalty is compatible with solver.
35 |
36 | Parameters
37 | ----------
38 | obj : Instance of Datafit or Penalty
39 | The instance Datafit (or Penalty) to check.
40 |
41 | solver : Instance of Solver
42 | The instance of Solver to check.
43 |
44 | required_attr : List or tuple of strings
45 | The attributes that ``obj`` must have.
46 |
47 | support_sparse : bool, default False
48 | If ``True`` adds a ``SPARSE_SUFFIX`` to check compatibility with sparse data.
49 |
50 | Raises
51 | ------
52 | AttributeError
53 | if any of the attribute in ``required_attr`` is missing
54 | from ``obj`` attributes.
55 | """
56 | missing_attrs = []
57 | suffix = SPARSE_SUFFIX if support_sparse else ""
58 |
59 | # if `attr` is a list, check that at least one of them
60 | # is within `obj` attributes
61 | for attr in required_attr:
62 | attributes = attr if not isinstance(attr, str) else (attr,)
63 |
64 | for a in attributes:
65 | if hasattr(obj, f"{a}{suffix}"):
66 | break
67 | else:
68 | missing_attrs.append(_join_attrs_with_or(attributes, suffix))
69 |
70 | if len(missing_attrs):
71 | required_attr = [_join_attrs_with_or(attrs, suffix) for attrs in required_attr]
72 |
73 | # get name obj and solver
74 | name_matcher = re.compile(r"\.(\w+)'>")
75 |
76 | obj_name = name_matcher.search(str(obj.__class__)).group(1)
77 | solver_name = name_matcher.search(str(solver.__class__)).group(1)
78 |
79 | if not support_sparse:
80 | err_message = f"{obj_name} is not compatible with solver {solver_name}."
81 | else:
82 | err_message = (f"{obj_name} is not compatible with solver {solver_name} "
83 | "with sparse data.")
84 |
85 | err_message += (f" It must implement {' and '.join(required_attr)}.\n"
86 | f"Missing {' and '.join(missing_attrs)}.")
87 |
88 | raise AttributeError(err_message)
89 |
90 |
91 | def _join_attrs_with_or(attrs, suffix=""):
92 | if isinstance(attrs, str):
93 | return f"`{attrs}{suffix}`"
94 |
95 | if len(attrs) == 1:
96 | return f"`{attrs[0]}{suffix}`"
97 |
98 | out = " or ".join([f"`{a}{suffix}`" for a in attrs])
99 | return f"({out})"
100 |
--------------------------------------------------------------------------------