├── .github
├── dependabot.yml
├── release.yml
└── workflows
│ ├── build.yml
│ ├── docs.yml
│ ├── lint.yml
│ ├── release.yml
│ ├── test.yml
│ └── update-precommit.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── docs
├── _static
│ ├── logo-light.png
│ └── logo.png
├── api.rst
├── conf.py
├── contributing.rst
├── index.rst
├── install.rst
├── license.rst
├── sparselm.model.rst
├── sparselm.model_selection.rst
├── sparselm.stepwise.rst
└── sparselm.tools.rst
├── examples
├── README.rst
├── corr.npy
├── energy.npy
├── plot_adaptive.py
├── plot_chull.py
├── plot_gl_sgl.py
├── plot_line_search.py
├── plot_one_std.py
├── plot_sparse_signal.py
├── plot_stepwise.py
└── structures.json
├── pyproject.toml
├── requirements.txt
├── src
├── requirements.txt
└── sparselm
│ ├── __init__.py
│ ├── _utils
│ ├── __init__.py
│ └── validation.py
│ ├── dataset.py
│ ├── model
│ ├── __init__.py
│ ├── _adaptive_lasso.py
│ ├── _base.py
│ ├── _lasso.py
│ ├── _miqp
│ │ ├── __init__.py
│ │ ├── _base.py
│ │ ├── _best_subset.py
│ │ └── _regularized_l0.py
│ └── _ols.py
│ ├── model_selection.py
│ ├── stepwise.py
│ └── tools.py
└── tests
├── conftest.py
├── pytest.ini
├── test_common.py
├── test_dataset.py
├── test_lasso.py
├── test_miqp.py
├── test_model_selection.py
├── test_ols.py
├── test_stepwise.py
└── test_tools.py
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 |
4 | # Maintain dependencies for GitHub Actions
5 | - package-ecosystem: github-actions
6 | directory: "/"
7 | schedule:
8 | interval: weekly
9 |
10 | # Python dependencies
11 | - package-ecosystem: pip
12 | directory: "/"
13 | schedule:
14 | interval: weekly
15 | allow:
16 | - dependency-type: direct
17 | - dependency-type: indirect
18 |
--------------------------------------------------------------------------------
/.github/release.yml:
--------------------------------------------------------------------------------
1 | changelog:
2 | exclude:
3 | authors: [dependabot, github-actions, pre-commit-ci]
4 | categories:
5 | - title: 🎉 New Features
6 | labels: [feature]
7 | - title: 🐛 Bug Fixes
8 | labels: [fix]
9 | - title: 🛠 Enhancements
10 | labels: [enhancement]
11 | - title: 📖 Documentation
12 | labels: [documentation]
13 | - title: 💡 Refactoring
14 | labels: [refactor]
15 | - title: 🧪 Tests
16 | labels: [tests]
17 | - title: 💥 Breaking Changes
18 | labels: [breaking]
19 | - title: 🔒 Security Fixes
20 | labels: [security]
21 | - title: 🤷♂️ Other Changes
22 | labels: ["*"]
23 |
--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | name: build
2 |
3 | on: [workflow_dispatch, workflow_call]
4 |
5 | jobs:
6 |
7 | build-sdist:
8 | name: Build sdist
9 | runs-on: ubuntu-latest
10 |
11 | steps:
12 | - uses: actions/checkout@v4
13 | with:
14 | fetch-depth: 0 # Optional, use if you use setuptools_scm
15 |
16 | - name: Build
17 | run: pipx run build --sdist
18 |
19 | - uses: actions/upload-artifact@v4
20 | with:
21 | path: dist/*.tar.gz
22 |
--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
1 | name: build-documentation
2 |
3 | on: [workflow_dispatch, workflow_call]
4 |
5 | jobs:
6 | build-deploy:
7 | runs-on: ubuntu-latest
8 |
9 | steps:
10 | - uses: actions/checkout@v4
11 |
12 | - name: Install pandoc
13 | run: sudo apt-get install pandoc
14 |
15 | - uses: actions/setup-python@v5
16 | with:
17 | python-version: 3.11
18 |
19 | - name: Install dependencies
20 | run: |
21 | python -m pip install --upgrade pip
22 | pip install .[docs]
23 |
24 | - name: Build docs
25 | run: sphinx-build docs docs_build
26 |
27 | - name: Deploy
28 | uses: peaceiris/actions-gh-pages@v4
29 | with:
30 | github_token: ${{ secrets.GITHUB_TOKEN }}
31 | publish_dir: ./docs_build
32 |
--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
1 | name: lint
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 |
8 | pull_request:
9 | branches:
10 | - main
11 |
12 | jobs:
13 | lint:
14 | runs-on: ubuntu-latest
15 | strategy:
16 | max-parallel: 6
17 |
18 | steps:
19 | - uses: actions/checkout@v4
20 | - name: Set up Python
21 | uses: actions/setup-python@v5
22 | with:
23 | python-version: 3.11
24 | - name: Install dependencies
25 | run: |
26 | python -m pip install --upgrade pip
27 | pip install .[dev]
28 | - name: flake8
29 | run: |
30 | flake8 --version
31 | flake8 --count --show-source --statistics src/sparselm
32 | # exit-zero treats all errors as warnings.
33 | flake8 --count --exit-zero --max-complexity=20 --statistics src/sparselm
34 | - name: black
35 | run: |
36 | black --version
37 | black --check --diff --color src/sparselm
38 | - name: pydocstyle
39 | run: |
40 | pydocstyle --version
41 | pydocstyle --count src/sparselm
42 | # Not in shape for this yet
43 | # - name: pylint
44 | # run: |
45 | # pylint sparselm
46 | #- name: mypy
47 | # run: |
48 | # mypy --version
49 | # rm -rf .mypy_cache
50 | # mypy sparselm
51 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: release
2 |
3 | on:
4 | release:
5 | types: [published]
6 | inputs:
7 | release-pypi:
8 | required: true
9 | type: boolean
10 | default: true
11 | workflow_dispatch:
12 | inputs:
13 | release-pypi:
14 | required: true
15 | type: boolean
16 | description: "if true a release is made on PyPI"
17 |
18 | jobs:
19 | test:
20 | uses: ./.github/workflows/test.yml
21 | secrets: inherit
22 |
23 | build:
24 | needs: test
25 | uses: ./.github/workflows/build.yml
26 |
27 | docs:
28 | needs: test
29 | uses: ./.github/workflows/docs.yml
30 | secrets: inherit
31 |
32 | release-pypi:
33 | needs: [build]
34 | runs-on: ubuntu-latest
35 | if: github.event.inputs.release-pypi == 'true'
36 |
37 | steps:
38 | - uses: actions/download-artifact@v4
39 | with:
40 | name: artifact
41 | path: dist
42 |
43 | - uses: pypa/gh-action-pypi-publish@release/v1
44 | with:
45 | verbose: true
46 | user: __token__
47 | password: ${{ secrets.PYPI_API_TOKEN }}
48 |
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | name: test
2 |
3 | env:
4 | scip-version: 8.0.0
5 |
6 | on:
7 | push:
8 | branches:
9 | - main
10 |
11 | pull_request:
12 | branches:
13 | - main
14 |
15 | workflow_call:
16 |
17 | jobs:
18 | test:
19 | runs-on: ubuntu-20.04
20 | strategy:
21 | max-parallel: 10
22 | matrix:
23 | python_version: ["3.9", "3.10", "3.11"]
24 |
25 | steps:
26 | - uses: actions/checkout@v4
27 |
28 | - name: Install dependencies
29 | run: |
30 | wget --quiet --no-check-certificate https://scipopt.org/download/release/SCIPOptSuite-${{ env.scip-version }}-Linux-ubuntu.deb
31 | sudo apt-get update && sudo apt install -y ./SCIPOptSuite-${{ env.scip-version }}-Linux-ubuntu.deb
32 |
33 | - name: Set up Python ${{ matrix.python_version }}
34 | uses: actions/setup-python@v5
35 | with:
36 | python-version: ${{ matrix.python_version }}
37 |
38 | - name: Install dependencies and package
39 | run: |
40 | python -m pip install --upgrade pip
41 | pip install cython
42 | pip install .[tests,dev]
43 |
44 | - name: Test with pytest
45 | run: |
46 | pytest tests --cov=sparselm --cov-report=xml
47 |
48 | - if: ${{ matrix.python_version == 3.11 && github.event_name == 'push' }}
49 | name: codacy-coverage-reporter
50 | uses: codacy/codacy-coverage-reporter-action@v1
51 | with:
52 | project-token: ${{ secrets.CODACY_PROJECT_TOKEN }}
53 | coverage-reports: coverage.xml
54 |
--------------------------------------------------------------------------------
/.github/workflows/update-precommit.yaml:
--------------------------------------------------------------------------------
1 | name: pre-commit-auto-update
2 |
3 | on:
4 | # midnight twice a month
5 | schedule:
6 | - cron: '0 0 14,28 * *'
7 |
8 | jobs:
9 | auto-update:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@v4
13 |
14 | - name: Set up Python
15 | uses: actions/setup-python@v5
16 | with:
17 | python-version: 3.9
18 |
19 | - name: Install pre-commit
20 | run: pip install pre-commit
21 |
22 | - name: Run pre-commit autoupdate
23 | run: pre-commit autoupdate
24 |
25 | - name: Create Pull Request
26 | uses: peter-evans/create-pull-request@v6.0.5
27 | with:
28 | token: ${{ secrets.GITHUB_TOKEN }}
29 | branch: update/pre-commit-autoupdate
30 | title: auto-update pre-commit hooks
31 | commit-message: auto-update pre-commit hooks
32 | body: Update versions of tools in pre-commit hooks to latest versions.
33 | labels: dependencies
34 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 | .idea
132 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | # See https://pre-commit.com for more information
2 | # See https://pre-commit.com/hooks.html for more hooks
3 |
4 | ci:
5 | autoupdate_schedule: monthly
6 |
7 | repos:
8 | - repo: https://github.com/pre-commit/pre-commit-hooks
9 | rev: v5.0.0
10 | hooks:
11 | - id: check-yaml
12 | - id: fix-encoding-pragma
13 | args:
14 | - --remove
15 | - id: end-of-file-fixer
16 | - id: trailing-whitespace
17 | - id: check-added-large-files
18 | args: ['--maxkb=500']
19 |
20 | - repo: https://github.com/psf/black
21 | rev: 24.10.0
22 | hooks:
23 | - id: black
24 |
25 | - repo: https://github.com/asottile/blacken-docs
26 | rev: 1.19.1
27 | hooks:
28 | - id: blacken-docs
29 | additional_dependencies: [black==23.1.0]
30 | exclude: README.md
31 |
32 | - repo: https://github.com/pycqa/isort
33 | rev: 6.0.0
34 | hooks:
35 | - id: isort
36 | name: isort (python)
37 | args:
38 | - --profile=black
39 |
40 | - repo: https://github.com/asottile/pyupgrade
41 | rev: v3.19.1
42 | hooks:
43 | - id: pyupgrade
44 | args: [--py38-plus]
45 |
46 | - repo: https://github.com/PyCQA/autoflake
47 | rev: v2.3.1
48 | hooks:
49 | - id: autoflake
50 | args:
51 | - --in-place
52 | - --remove-unused-variables
53 | - --remove-all-unused-imports
54 | - --expand-star-imports
55 | - --ignore-init-module-imports
56 |
57 | - repo: https://github.com/pycqa/pydocstyle
58 | rev: 6.3.0 # pick a git hash / tag to point to
59 | hooks:
60 | - id: pydocstyle
61 | files: ^src/sparselm/
62 | args:
63 | - --convention=google
64 | - --add-ignore=D107
65 |
66 | - repo: https://github.com/pre-commit/pygrep-hooks
67 | rev: v1.10.0
68 | hooks:
69 | - id: rst-backticks
70 | - id: rst-directive-colons
71 | - id: rst-inline-touching-normal
72 |
73 | - repo: https://github.com/pre-commit/mirrors-mypy
74 | rev: 'v1.14.1' # Use the sha / tag you want to point at
75 | hooks:
76 | - id: mypy
77 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | ::
2 |
3 | sparse-lm Copyright (c) 2022, The Regents of the University of California, through
4 | Lawrence Berkeley National Laboratory (subject to receipt of any required approvals
5 | from the U.S. Dept. of Energy) and the University of California, Berkeley.
6 | All rights reserved.
7 |
8 | Redistribution and use in source and binary forms, with or without
9 | modification, are permitted provided that the following conditions are met:
10 |
11 | (1) Redistributions of source code must retain the above copyright notice,
12 | this list of conditions and the following disclaimer.
13 |
14 | (2) Redistributions in binary form must reproduce the above copyright
15 | notice, this list of conditions and the following disclaimer in the
16 | documentation and/or other materials provided with the distribution.
17 |
18 | (3) Neither the name of the University of California, Lawrence Berkeley
19 | National Laboratory, U.S. Dept. of Energy, University of California,
20 | Berkeley nor the names of its contributors may be used to endorse or
21 | promote products derived from this software without specific prior written
22 | permission.
23 |
24 |
25 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 | POSSIBILITY OF SUCH DAMAGE.
36 |
37 | You are under no obligation whatsoever to provide any bug fixes, patches,
38 | or upgrades to the features, functionality or performance of the source
39 | code ("Enhancements") to anyone; however, if you choose to make your
40 | Enhancements available either publicly, or directly to Lawrence Berkeley
41 | National Laboratory, without imposing a separate written license agreement
42 | for such Enhancements, then you hereby grant the following license: a
43 | non-exclusive, royalty-free perpetual license to install, use, modify,
44 | prepare derivative works, incorporate into other computer software,
45 | distribute, and sublicense such enhancements or derivative works thereof,
46 | in binary and source code form.
47 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | Sparse Linear Regression Models
4 | ===============================
5 |
6 | [](https://github.com/CederGroupHub/sparse-lm/actions/workflows/test.yml)
7 | [](https://www.codacy.com/gh/CederGroupHub/sparse-lm/dashboard?utm_source=github.com&utm_medium=referral&utm_content=CederGroupHub/sparse-lm&utm_campaign=Badge_Coverage)
8 | [](https://results.pre-commit.ci/latest/github/CederGroupHub/sparse-lm/main)
9 | [](https://pypi.org/project/sparse-lm)
10 | [](https://www.python.org/downloads/)
11 | [](https://doi.org/10.21105/joss.05867)
12 |
13 |
14 | **sparse-lm** includes several (structured) sparse linear regression estimators that are absent in the
15 | `sklearn.linear_model` module. The estimators in **sparse-lm** are designed to fit right into
16 | [scikit-learn](https://scikit-learn.org/stable/index.html), but the underlying optimization problem is expressed and
17 | solved by leveraging [cvxpy](https://www.cvxpy.org/).
18 |
19 | ---------------------------------------------------------------------------------------
20 |
21 | Available regression models
22 | ---------------------------
23 | - Lasso, Group Lasso, Overlap Group Lasso, Sparse Group Lasso & Ridged Group Lasso.
24 | - Adaptive versions of Lasso, Group Lasso, Overlap Group Lasso, Sparse Group Lasso & Ridged Group Lasso.
25 | - Best Subset Selection, Ridged Best Subset, L0, L1L0 & L2L0 (all with optional grouping of parameters)
26 |
27 | Installation
28 | ------------
29 | **sparse-lm** is available on [PyPI](https://pypi.org/project/sparse-lm/), and can be installed via pip:
30 |
31 | ```bash
32 | pip install sparse-lm
33 | ```
34 |
35 | Additional information on installation can be found the documentation [here](https://cedergrouphub.github.io/sparse-lm/install.html).
36 |
37 | Basic usage
38 | -----------
39 | If you already use **scikit-learn**, using **sparse-lm** will be very easy. Just use any
40 | model like you would any linear model in **scikit-learn**:
41 |
42 | ```python
43 | import numpy as np
44 | from sklearn.datasets import make_regression
45 | from sklearn.model_selection import GridSearchCV
46 | from sparselm.model import AdaptiveLasso
47 |
48 | X, y = make_regression(n_samples=100, n_features=80, n_informative=10, random_state=0)
49 | alasso = AdaptiveLasso(fit_intercept=False)
50 | param_grid = {'alpha': np.logspace(-8, 2, 10)}
51 |
52 | cvsearch = GridSearchCV(alasso, param_grid)
53 | cvsearch.fit(X, y)
54 | print(cvsearch.best_params_)
55 | ```
56 |
57 | For more details on use and functionality have a look at the
58 | [examples](https://cedergrouphub.github.io/sparse-lm/auto_examples/index.html) and
59 | [API](https://cedergrouphub.github.io/sparse-lm/api.html) sections of the documentation.
60 |
61 | Contributing
62 | ------------
63 |
64 | We welcome any contributions that you think may improve the package! Please have a look at the
65 | [contribution guidelines](https://cedergrouphub.github.io/sparse-lm/contributing.html) in the documentation.
66 |
--------------------------------------------------------------------------------
/docs/_static/logo-light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CederGroupHub/sparse-lm/220cbbad4a5ac98d1a52326c525aadb95f2c5b18/docs/_static/logo-light.png
--------------------------------------------------------------------------------
/docs/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CederGroupHub/sparse-lm/220cbbad4a5ac98d1a52326c525aadb95f2c5b18/docs/_static/logo.png
--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
1 | API Documentation
2 | =================
3 |
4 | .. toctree::
5 | :maxdepth: 2
6 |
7 | sparselm.model
8 | sparselm.stepwise
9 | sparselm.model_selection
10 | sparselm.tools
11 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # -- Path setup --------------------------------------------------------------
2 |
3 | # If extensions (or modules to document with autodoc) are in another directory,
4 | # add these directories to sys.path here. If the directory is relative to the
5 | # documentation root, use os.path.abspath to make it absolute, like shown here.
6 |
7 | import os
8 | import sys
9 |
10 | # import typing
11 | # typing.TYPE_CHECKING = True
12 | from sparselm import __version__
13 |
14 | sys.path.insert(0, os.path.abspath("../../"))
15 |
16 | # -- Project information -----------------------------------------------------
17 |
18 | project = "sparse-lm"
19 | copyright = "2022-2023, Ceder Group"
20 | author = "Luis Barroso-Luque"
21 |
22 | # The short X.Y version
23 | version = __version__
24 | # The full version, including alpha/beta/rc tags
25 | release = __version__
26 |
27 | # -- General configuration ---------------------------------------------------
28 |
29 | # Add any Sphinx extension module names here, as strings. They can be
30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
31 | # ones.
32 | extensions = [
33 | "sphinx.ext.autodoc",
34 | "sphinx.ext.napoleon",
35 | "sphinx.ext.intersphinx",
36 | "sphinx.ext.viewcode",
37 | "sphinx.ext.autosummary",
38 | "sphinx.ext.mathjax",
39 | "m2r2",
40 | "sphinx_gallery.gen_gallery",
41 | ]
42 |
43 | # Add any paths that contain templates here, relative to this directory.
44 | templates_path = ["_templates"]
45 |
46 | # List of patterns, relative to source directory, that match files and
47 | # directories to ignore when looking for source files.
48 | # This pattern also affects html_static_path and html_extra_path.
49 | exclude_patterns = ["Thumbs.db", ".DS_Store", "test*.py"]
50 |
51 | # use type hints
52 | autodoc_typehints = "description"
53 | autoclass_content = "both"
54 | autodoc_member_order = "bysource"
55 |
56 | # better napoleon support
57 | napoleon_use_param = True
58 | napoleon_use_rtype = True
59 | napoleon_use_ivar = True
60 |
61 | # The suffix(es) of source filenames.
62 | source_suffix = [".rst", ".md"]
63 |
64 | # -- Options for HTML output -------------------------------------------------
65 |
66 | # The theme to use for HTML and HTML Help pages. See the documentation for
67 | # a list of builtin themes.
68 | #
69 | html_theme = "furo"
70 |
71 | # hide sphinx footer
72 | html_show_sphinx = False
73 | html_show_sourcelink = False
74 |
75 | # Add any paths that contain custom static files (such as style sheets) here,
76 | # relative to this directory. They are copied after the builtin static files,
77 | # so a file named "default.css" will overwrite the builtin "default.css".
78 | fonts = [
79 | "Lato",
80 | "-apple-system",
81 | "BlinkMacSystemFont",
82 | "Segoe UI",
83 | "Helvetica",
84 | "Arial",
85 | "sans-serif",
86 | "Apple Color Emoji",
87 | "Segoe UI Emoji",
88 | ]
89 | html_static_path = ["_static"]
90 | html_css_files = ["custom.css"]
91 | html_favicon = "_static/favicon.ico"
92 | html_theme_options = {
93 | "light_css_variables": {
94 | "admonition-font-size": "92%",
95 | "admonition-title-font-size": "92%",
96 | "font-stack": ",".join(fonts),
97 | "font-size--small": "92%",
98 | "font-size--small--2": "87.5%",
99 | "font-size--small--3": "87.5%",
100 | "font-size--small--4": "87.5%",
101 | },
102 | "dark_css_variables": {
103 | "admonition-font-size": "92%",
104 | "admonition-title-font-size": "92%",
105 | "font-stack": ",".join(fonts),
106 | "font-size--small": "92%",
107 | "font-size--small--2": "87.5%",
108 | "font-size--small--3": "87.5%",
109 | "font-size--small--4": "87.5%",
110 | },
111 | }
112 | html_title = "sparse-lm"
113 |
114 | # code highlighting
115 | pygments_style = "sphinx"
116 | pygments_dark_style = "monokai"
117 |
118 | # -- Options for intersphinx extension ---------------------------------------
119 |
120 | # Example configuration for intersphinx: refer to the Python standard library.
121 | intersphinx_mapping = {
122 | "python": ("https://docs.python.org/3.9", None),
123 | "scikit-learn": ("https://scikit-learn.org/stable", None),
124 | "numpy": ("https://numpy.org/doc/stable/", None),
125 | "cvxpy": ("https://www.cvxpy.org/en/latest/", None),
126 | }
127 |
128 | # -- Options for sphinx gallery extension ---------------------------------------
129 |
130 | sphinx_gallery_conf = {
131 | "examples_dirs": "../examples", # path to your example scripts
132 | "gallery_dirs": "auto_examples", # path to where to save gallery generated output
133 | }
134 |
--------------------------------------------------------------------------------
/docs/contributing.rst:
--------------------------------------------------------------------------------
1 | Contributing
2 | ============
3 |
4 | We welcome all forms of contribution, please consider contributing in any way you can!
5 |
6 | Bugs, issues, input, and questions
7 | ----------------------------------
8 | Please use the
9 | `issue tracker `_ to share any
10 | of the following:
11 |
12 | - Bugs
13 | - Issues
14 | - Questions
15 | - Feature requests
16 | - Ideas
17 | - Input
18 |
19 | Having these reported and saved in the issue tracker is very helpful to make
20 | sure that they are properly addressed. Please make sure to be as descriptive
21 | and neat as possible when opening up an issue.
22 |
23 | Developing guidelines
24 | ---------------------
25 | If you have written code or want to start writing new code that you think will improve **sparse-lm** then please follow
26 | the steps below to make a contribution.
27 |
28 | * All code should have unit tests.
29 | * Code should be well documented following `google style `_ docstrings.
30 | * All code should pass the pre-commit hook. The code follows the `black code style `_.
31 | * Estimators should follow scikit-learn's `developing estimator guidelines `_.
32 |
33 | Adding code contributions
34 | -------------------------
35 |
36 | #. If you are contributing for the first time:
37 |
38 | * *Fork* the repository and then *clone* your fork to your local workspace.
39 | * Make sure to add the *upstream* repository as a remote::
40 |
41 | git remote add upstream https://github.com/CederGroupHub/sparse-lm.git
42 |
43 | * You should always keep your ``main`` branch or any feature branch up to date
44 | with the upstream repository ``main`` branch. Be good about doing *fast forward*
45 | merges of the upstream ``main`` into your fork branches while developing.
46 |
47 | #. In order to have changes available without having to re-install the package:
48 |
49 | * Install the package in *editable* mode::
50 |
51 | pip install -e .
52 |
53 | #. To develop your contributions you are free to do so in your *main* branch or any feature
54 | branch in your fork.
55 |
56 | * We recommend to only your forks *main* branch for short/easy fixes and additions.
57 | * For more complex features, try to use a feature branch with a descriptive name.
58 | * For very complex feautres feel free to open up a PR even before your contribution is finished with
59 | [WIP] in its name, and optionally mark it as a *draft*.
60 |
61 | #. While developing we recommend you use the pre-commit hook that is setup to ensure that your
62 | code will satisfy all lint, documentation and black requirements. To do so install pre-commit, and run
63 | in your clones top directory::
64 |
65 | pre-commit install
66 |
67 | * All code should use `google style `_ docstrings
68 | and `black `_ style formatting.
69 |
70 | #. Make sure to test your contribution and write unit tests for any new features. All tests should go in the
71 | ``sparse-lm\tests`` directory. The CI will run tests upon opening a PR, but running them locally will help find
72 | problems before::
73 |
74 | pytests tests
75 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 |
2 | .. toctree::
3 | :caption: Getting Started
4 | :hidden:
5 |
6 | install
7 | auto_examples/index
8 |
9 | .. toctree::
10 | :caption: Information
11 | :hidden:
12 |
13 | contributing
14 | license
15 | GitHub
16 |
17 |
18 | .. toctree::
19 | :caption: Reference
20 | :maxdepth: -1
21 | :hidden:
22 |
23 | API
24 | genindex
25 |
26 |
27 | .. image:: _static/logo.png
28 | :width: 700
29 | :class: only-dark
30 |
31 | .. image:: _static/logo-light.png
32 | :width: 700
33 | :class: only-light
34 |
35 | ===============================
36 | Sparse Linear Regression Models
37 | ===============================
38 |
39 | .. mdinclude:: ../README.md
40 | :start-line: 4
41 |
--------------------------------------------------------------------------------
/docs/install.rst:
--------------------------------------------------------------------------------
1 | Install
2 | =======
3 |
4 | **sparse-lm** can be installed from PyPI or from source using pip.
5 |
6 | PyPI
7 | ----
8 |
9 | You can install **sparse-lm** using pip::
10 |
11 | pip install sparse-lm
12 |
13 |
14 | Install from source
15 | -------------------
16 |
17 | To install **sparse-lm** from source, (fork and) clone the repository from `github
18 | `_::
19 |
20 | git clone https://github.com/CederGroupHub/sparse-lm
21 | cd sparselm
22 | pip install .
23 |
24 | Installing MIQP solvers
25 | -----------------------
26 |
27 | Since **cvxpy** is used to specify and solve regression optimization problems, any of
28 | `supported solvers `_
29 | can be used with **sparse-lm** estimators. **cvxpy** is shipped with open source solvers
30 | (OSQP, SCS, and ECOS) which are usually enough to solve most convex regression problems.
31 |
32 | However, for the mixed integer quadratic programming (MIQP) formulations used in
33 | :class:`BestSubsetSelection` and :class:`RegularizedL0` based classes we highly
34 | recommend installing an MIQP capable solver. ECOS_BB can be used to solve MIQP problems,
35 | but it can be very slow and more importantly has recurring correctness issues. See the
36 | `mixed-integer program section `_
37 | in the cvxpy documentation for more details.
38 |
39 | Gurobi
40 | ^^^^^^
41 |
42 | For using **sparse-lm** with MIQP solvers, we highly recommend installing **Gurobi**.
43 | It can be installed directly from PyPi::
44 |
45 | pip install gurobipy
46 |
47 | Without a license, a free trial **Gurobi** can be used to solve small problems. For
48 | larger problems a license is required. **Gurobi** grants
49 | `free academic licenses `_
50 | to students and academic researchers.
51 |
52 | SCIP
53 | ^^^^
54 |
55 | If installing a licensed solver is not an option, **SCIP** can be used as a free
56 | alternative. To use **SCIP**, the python interface **PySCIPOpt** must also be installed.
57 | **PySCIPOpt** can be installed from PyPi, however this requires building SCIP from
58 | source. See installation details `here `_.
59 |
60 | If you use conda, we recommend installing **SCIP** and **PySCIPOpt** using their
61 | conda-forge channel::
62 |
63 | conda install -c conda-forge scipopt pyscipopt
64 |
65 | The above command will install **PySCIPOpt** with a pre-built version of **SCIP**, and
66 | so you will not need to build it from source.
67 |
68 | Testing
69 | -------
70 |
71 | Unit tests can be run from the source folder using ``pytest``. First, the requirements
72 | to run tests must be installed::
73 |
74 | pip install .[tests]
75 |
76 | Then run the tests using::
77 |
78 | pytest tests
79 |
--------------------------------------------------------------------------------
/docs/license.rst:
--------------------------------------------------------------------------------
1 | =======
2 | License
3 | =======
4 |
5 | **sparse-lm** is distributed under a modified 3-clause BSD licence.
6 |
7 | .. include:: ../LICENSE
8 |
--------------------------------------------------------------------------------
/docs/sparselm.model.rst:
--------------------------------------------------------------------------------
1 | sparselm.model
2 | ==============
3 |
4 | .. automodule:: sparselm.model
5 | :members:
6 | :inherited-members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/sparselm.model_selection.rst:
--------------------------------------------------------------------------------
1 | sparselm.model_selection
2 | ========================
3 |
4 | .. automodule:: sparselm.model_selection
5 | :members:
6 | :inherited-members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/sparselm.stepwise.rst:
--------------------------------------------------------------------------------
1 | sparselm.stepwise
2 | ========================
3 |
4 | .. automodule:: sparselm.stepwise
5 | :members:
6 | :inherited-members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/docs/sparselm.tools.rst:
--------------------------------------------------------------------------------
1 | sparselm.tools
2 | ==============
3 |
4 | .. automodule:: sparselm.tools
5 | :members:
6 | :inherited-members:
7 | :undoc-members:
8 | :show-inheritance:
9 |
--------------------------------------------------------------------------------
/examples/README.rst:
--------------------------------------------------------------------------------
1 | Examples
2 | ========
3 |
4 | This is set of simple examples using sparse linear regression models implemented in
5 | **sparse-lm**. For the the vast majority of cases, the **sparse-lm** models can be
6 | used in the same way as the linear regression models in **scikit-learn**.
7 |
--------------------------------------------------------------------------------
/examples/corr.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CederGroupHub/sparse-lm/220cbbad4a5ac98d1a52326c525aadb95f2c5b18/examples/corr.npy
--------------------------------------------------------------------------------
/examples/energy.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CederGroupHub/sparse-lm/220cbbad4a5ac98d1a52326c525aadb95f2c5b18/examples/energy.npy
--------------------------------------------------------------------------------
/examples/plot_adaptive.py:
--------------------------------------------------------------------------------
1 | """
2 | ==============================
3 | Using adaptive regularization
4 | ==============================
5 |
6 | Adaptive or iteratively re-weighted regularization is a technique that can improve
7 | feature selection properties over the standard Lasso and Group Lasso extensions. In
8 | this example we compare the performance of the standard Lasso with adaptive Lasso.
9 | """
10 |
11 | import matplotlib.pyplot as plt
12 | import numpy as np
13 | from sklearn.datasets import make_regression
14 | from sklearn.linear_model import Lasso
15 | from sklearn.metrics import mean_squared_error, r2_score
16 | from sklearn.model_selection import GridSearchCV, KFold, train_test_split
17 |
18 | from sparselm.model import AdaptiveLasso
19 |
20 | X, y, coef = make_regression(
21 | n_samples=200,
22 | n_features=100,
23 | n_informative=10,
24 | noise=40.0,
25 | bias=-15.0,
26 | coef=True,
27 | random_state=0,
28 | )
29 |
30 | X_train, X_test, y_train, y_test = train_test_split(
31 | X, y, test_size=0.25, random_state=0
32 | )
33 |
34 | # create estimators
35 | lasso = Lasso(fit_intercept=True)
36 | alasso = AdaptiveLasso(max_iter=5, fit_intercept=True)
37 |
38 | # create cv search objects for each estimator
39 | cv5 = KFold(n_splits=5, shuffle=True, random_state=0)
40 | params = {"alpha": np.logspace(-1, 1, 10)}
41 |
42 | lasso_cv = GridSearchCV(lasso, params, cv=cv5, n_jobs=-1)
43 | alasso_cv = GridSearchCV(alasso, params, cv=cv5, n_jobs=-1)
44 |
45 | # fit models on training data
46 | lasso_cv.fit(X_train, y_train)
47 | alasso_cv.fit(X_train, y_train)
48 |
49 | # calculate model performance on test and train data
50 | lasso_train = {
51 | "r2": r2_score(y_train, lasso_cv.predict(X_train)),
52 | "rmse": np.sqrt(mean_squared_error(y_train, lasso_cv.predict(X_train))),
53 | }
54 |
55 | lasso_test = {
56 | "r2": r2_score(y_test, lasso_cv.predict(X_test)),
57 | "rmse": np.sqrt(mean_squared_error(y_test, lasso_cv.predict(X_test))),
58 | }
59 |
60 | alasso_train = {
61 | "r2": r2_score(y_train, alasso_cv.predict(X_train)),
62 | "rmse": np.sqrt(mean_squared_error(y_train, alasso_cv.predict(X_train))),
63 | }
64 |
65 | alasso_test = {
66 | "r2": r2_score(y_test, alasso_cv.predict(X_test)),
67 | "rmse": np.sqrt(mean_squared_error(y_test, alasso_cv.predict(X_test))),
68 | }
69 |
70 | print("Lasso performance metrics:")
71 | print(f" train r2: {lasso_train['r2']:.3f}")
72 | print(f" test r2: {lasso_test['r2']:.3f}")
73 | print(f" train rmse: {lasso_train['rmse']:.3f}")
74 | print(f" test rmse: {lasso_test['rmse']:.3f}")
75 |
76 | print("Adaptive Lasso performance metrics:")
77 | print(f" train r2: {alasso_train['r2']:.3f}")
78 | print(f" test r2: {alasso_test['r2']:.3f}")
79 | print(f" train rmse: {alasso_train['rmse']:.3f}")
80 | print(f" test rmse: {alasso_test['rmse']:.3f}")
81 |
82 | # plot model coefficients
83 | fig, ax = plt.subplots()
84 | ax.plot(coef, "o", label="True coefficients")
85 | ax.plot(lasso_cv.best_estimator_.coef_, "o", label="Lasso", alpha=0.5)
86 | ax.plot(alasso_cv.best_estimator_.coef_, "o", label="Adaptive Lasso", alpha=0.5)
87 | ax.set_xlabel("covariate index")
88 | ax.set_ylabel("coefficient value")
89 | ax.legend()
90 | fig.show()
91 |
92 | # plot predicted values
93 | fig, ax = plt.subplots()
94 | ax.plot(y_test, lasso_cv.predict(X_test), "o", label="lasso", alpha=0.5)
95 | ax.plot(y_test, alasso_cv.predict(X_test), "o", label="adaptive lasso", alpha=0.5)
96 | ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--")
97 | ax.set_xlabel("true values")
98 | ax.set_ylabel("predicted values")
99 | ax.legend()
100 | fig.show()
101 |
--------------------------------------------------------------------------------
/examples/plot_chull.py:
--------------------------------------------------------------------------------
1 | """
2 | ===========================
3 | Adding solution constraints
4 | ===========================
5 |
6 | **sparse-lm** allows including external solution constraints to the regression objective
7 | by exposing the underlying **cvxpy** problem objects. This is useful to solve regression
8 | problems with additional constraints, such as non-negativity.
9 |
10 | **NOTE**: That this functionality does not fully align with the requirements for
11 | compatible scikit-learn estimators, meaning that using an estimator with additional
12 | constraints added in a ski-kit learn pipeline or model selection is not supported.
13 |
14 | To show how to include constraints, we will solve a common problem in materials science:
15 | predicting the formation energy of many configurations of an alloy. In such problems,
16 | it is usually very important to ensure that the predicted formation energies for
17 | "ground-states" (i.e. energies that define the lower convex-hull of the energy vs
18 | composition graph) remain on the convex-hull. Similarly, it is often important to
19 | ensure that the predicted formation energies that are not "ground-states" in the
20 | training data remain above the predicted convex-hull.
21 |
22 | The example follows the methodology described in this paper:
23 | https://www.nature.com/articles/s41524-017-0032-0
24 |
25 | This example requires the **pymatgen** materials analysis package to be
26 | installed to easily plot convex-hulls: https://pymatgen.org/installation.html
27 |
28 | The training data used in this example is taken from this
29 | tutorial: https://icet.materialsmodeling.org/tutorial.zip for the
30 | **icet** cluster expansion Python package (https://icet.materialsmodeling.org/).
31 | """
32 |
33 | import json
34 |
35 | import matplotlib.pyplot as plt
36 | import numpy as np
37 | import pymatgen.analysis.phase_diagram as pd
38 | from pymatgen.core import Structure
39 | from sklearn.linear_model import Lasso
40 | from sklearn.metrics import mean_squared_error
41 |
42 | from sparselm.model import L2L0
43 |
44 | # load training data
45 | X, y = np.load("corr.npy"), np.load("energy.npy")
46 |
47 | # load corresponding structure objects
48 | with open("structures.json") as fp:
49 | structures = json.load(fp)
50 |
51 | structures = [Structure.from_dict(s) for s in structures]
52 |
53 | # create regressors (the hyperparameters have already been tuned)
54 | lasso_regressor = Lasso(fit_intercept=True, alpha=1.29e-5)
55 | # alpha is the pseudo-l0 norm hyperparameter and eta is the l2-norm hyperparameter
56 | l2l0_regressor = L2L0(
57 | fit_intercept=True,
58 | alpha=3.16e-7,
59 | eta=1.66e-6,
60 | solver="GUROBI",
61 | solver_options={"Threads": 4},
62 | )
63 |
64 | # fit models
65 | lasso_regressor.fit(X, y)
66 | l2l0_regressor.fit(X, y)
67 |
68 | # create phase diagram entries with training data
69 | training_entries = []
70 | for i, structure in enumerate(structures):
71 | corrs = X[
72 | i
73 | ] # in this problem the features of a sample are referred to as correlation vectors
74 | energy = y[i] * len(
75 | structure
76 | ) # the energy must be scaled by size to create the phase diagram
77 | entry = pd.PDEntry(
78 | structure.composition,
79 | energy,
80 | attribute={"corrs": corrs, "size": len(structure)},
81 | )
82 | training_entries.append(entry)
83 |
84 | # plot the training (true) phase diagram
85 | training_pd = pd.PhaseDiagram(training_entries)
86 | pplotter = pd.PDPlotter(training_pd, backend="matplotlib", show_unstable=0)
87 | pplotter.show(label_unstable=False)
88 |
89 | # plot the phase diagram based on the energies predicted by the Lasso fit
90 | lasso_y = lasso_regressor.predict(X)
91 | lasso_pd = pd.PhaseDiagram(
92 | [
93 | pd.PDEntry(s_i.composition, y_i * len(s_i))
94 | for s_i, y_i in zip(structures, lasso_y)
95 | ]
96 | )
97 | pplotter = pd.PDPlotter(lasso_pd, backend="matplotlib", show_unstable=0)
98 | pplotter.show(label_unstable=False)
99 |
100 | # plot the phase diagram based on the energies predicted by the L2L0 fit
101 | l2l0_y = l2l0_regressor.predict(X)
102 | l2l0_pd = pd.PhaseDiagram(
103 | [
104 | pd.PDEntry(s_i.composition, y_i * len(s_i))
105 | for s_i, y_i in zip(structures, l2l0_y)
106 | ]
107 | )
108 | pplotter = pd.PDPlotter(l2l0_pd, backend="matplotlib", show_unstable=0)
109 | pplotter.show(label_unstable=False)
110 |
111 | # we notice that both the Lasso fit and the L2L0 fit miss the ground-state Ag5Pd3
112 | # and also add spurious ground-states not present in the training convex hull
113 |
114 |
115 | # create matrices for two types of contraints to keep the predicted hull unchanged
116 | # 1) keep non-ground states above the hull
117 | # 2) ensure ground-states stay on the hull
118 |
119 | # 1) compute the correlation matrix for unstable structures and
120 | # the weighted correlation matrix of the decomposition products
121 | X_unstable = np.zeros(shape=(len(training_pd.unstable_entries), X.shape[1]))
122 | X_decomp = np.zeros_like(X_unstable)
123 | for i, entry in enumerate(training_pd.unstable_entries):
124 | if entry.is_element:
125 | continue
126 | X_unstable[i] = entry.attribute["corrs"]
127 | decomp_entries, ehull = training_pd.get_decomp_and_e_above_hull(entry)
128 | for dentry, amount in decomp_entries.items():
129 | ratio = (
130 | amount
131 | * (entry.composition.num_atoms / dentry.composition.num_atoms)
132 | * dentry.attribute["size"]
133 | / entry.attribute["size"]
134 | )
135 | X_decomp[i] += ratio * dentry.attribute["corrs"]
136 |
137 | # 2) compute the ground-state correlation matrix
138 | # and the weighted correlation matrix of decomposition products if the ground state was not a ground-state
139 | X_stable = np.zeros(shape=(len(training_pd.stable_entries), X.shape[1]))
140 | X_gsdecomp = np.zeros_like(X_stable)
141 | gs_pd = pd.PhaseDiagram(training_pd.stable_entries)
142 | for i, entry in enumerate(gs_pd.stable_entries):
143 | if entry.is_element:
144 | continue
145 | X_stable[i] = entry.attribute["corrs"]
146 | decomp_entries, ehull = gs_pd.get_decomp_and_phase_separation_energy(entry)
147 | for dentry, amount in decomp_entries.items():
148 | ratio = (
149 | amount
150 | * (entry.composition.num_atoms / dentry.composition.num_atoms)
151 | * dentry.attribute["size"]
152 | / entry.attribute["size"]
153 | )
154 | X_gsdecomp[i] += ratio * dentry.attribute["corrs"]
155 |
156 |
157 | constrained_regressor = L2L0(
158 | fit_intercept=True,
159 | alpha=3.16e-7,
160 | eta=1.66e-6,
161 | solver="GUROBI",
162 | solver_options={"Threads": 4},
163 | )
164 |
165 | # now create the constraints by accessing the underlying cvxpy objects
166 | # if regressor.fit has not been called with the gigen data, we must call generate_problem to generate
167 | # the cvxpy objects that represent the regressino objective
168 | constrained_regressor.generate_problem(X, y)
169 | J = (
170 | constrained_regressor.canonicals_.beta
171 | ) # this is the cvxpy variable representing the coefficients
172 |
173 | # 1) add constraint to keep unstable structures above hull, ie no new ground states
174 | epsilon = 0.0005 # solutions will be very sensitive to the size of this margin
175 | constrained_regressor.add_constraints([X_unstable @ J >= X_decomp @ J + epsilon])
176 |
177 | # 2) add constraint to keep all ground-states on the hull
178 | epsilon = 1e-6
179 | constrained_regressor.add_constraints([X_stable @ J <= X_gsdecomp @ J - epsilon])
180 |
181 |
182 | # fit the constrained regressor
183 | constrained_regressor.fit(X, y)
184 |
185 | # look at the phase diagram based on the energies predicted by the L2L0 fit
186 | l2l0c_y = constrained_regressor.predict(X)
187 | constrained_pd = pd.PhaseDiagram(
188 | [
189 | pd.PDEntry(s_i.composition, y_i * len(s_i))
190 | for s_i, y_i in zip(structures, l2l0c_y)
191 | ]
192 | )
193 | pplotter = pd.PDPlotter(constrained_pd, backend="matplotlib", show_unstable=0)
194 | pplotter.show(label_unstable=False)
195 | # the constraints now force the fitted model to respect the trainind convex-hull
196 |
197 | # Plot the different estimated coefficients
198 | fig, ax = plt.subplots()
199 | ax.plot(lasso_regressor.coef_[1:])
200 | ax.plot(l2l0_regressor.coef_[1:])
201 | ax.plot(constrained_regressor.coef_[1:])
202 | ax.set_xlabel("covariate index")
203 | ax.set_ylabel("coefficient value")
204 | ax.legend(["lasso", "l2l0", "l2l0 constrained"])
205 | fig.show()
206 |
207 | # print the resulting training RMSE from the different fits
208 | lasso_rmse = np.sqrt(mean_squared_error(y, lasso_regressor.predict(X)))
209 | l2l0_rmse = np.sqrt(mean_squared_error(y, l2l0_regressor.predict(X)))
210 | l2l0c_rmse = np.sqrt(mean_squared_error(y, constrained_regressor.predict(X)))
211 |
212 | print(f"Lasso train RMSE: {lasso_rmse:.4f}")
213 | print(f"L2L0 train RMSE: {l2l0_rmse:.4f}")
214 | print(f"L2L0 with constraings train RMSE: {l2l0c_rmse:.4f}")
215 |
--------------------------------------------------------------------------------
/examples/plot_gl_sgl.py:
--------------------------------------------------------------------------------
1 | """
2 | =========================
3 | (Sparse) Group regression
4 | =========================
5 |
6 | This examples shows how to use group lasso and sparse group lasso to fit a simulated
7 | dataset with group-level sparsity and within-group sparsity.
8 | """
9 |
10 | import warnings
11 |
12 | import matplotlib.pyplot as plt
13 | import numpy as np
14 | from sklearn.linear_model import Lasso
15 | from sklearn.metrics import mean_squared_error, r2_score
16 | from sklearn.model_selection import GridSearchCV, KFold, train_test_split
17 |
18 | from sparselm.dataset import make_group_regression
19 | from sparselm.model import GroupLasso, SparseGroupLasso
20 |
21 | warnings.filterwarnings("ignore", category=UserWarning) # ignore convergence warnings
22 |
23 | # generate a dataset with group-level sparsity only
24 | X, y, groups, coefs = make_group_regression(
25 | n_samples=400,
26 | n_groups=10,
27 | n_features_per_group=10,
28 | n_informative_groups=5,
29 | frac_informative_in_group=1.0,
30 | bias=-10.0,
31 | noise=200.0,
32 | coef=True,
33 | random_state=0,
34 | )
35 |
36 | # split data into train and test sets
37 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
38 |
39 | # create estimators
40 | cv5 = KFold(n_splits=5, shuffle=True, random_state=0)
41 | lasso_cv = GridSearchCV(
42 | Lasso(fit_intercept=True), {"alpha": np.logspace(0, 2, 5)}, cv=cv5, n_jobs=-1
43 | )
44 | lasso_cv.fit(X_train, y_train)
45 | glasso_cv = GridSearchCV(
46 | GroupLasso(groups=groups, fit_intercept=True),
47 | {"alpha": np.logspace(0, 2, 5)},
48 | cv=cv5,
49 | n_jobs=-1,
50 | )
51 | glasso_cv.fit(X_train, y_train)
52 |
53 | # Plot predicted values
54 | fig, ax = plt.subplots()
55 | ax.plot(
56 | y_test, glasso_cv.predict(X_test), marker="o", ls="", alpha=0.5, label="group lasso"
57 | )
58 | ax.plot(y_test, lasso_cv.predict(X_test), marker="o", ls="", alpha=0.5, label="lasso")
59 | ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--")
60 | ax.legend()
61 | ax.set_xlabel("true values")
62 | ax.set_ylabel("predicted values")
63 | fig.show()
64 |
65 | # calculate model performance on test and train data
66 | lasso_train = {
67 | "r2": r2_score(y_train, lasso_cv.predict(X_train)),
68 | "rmse": np.sqrt(mean_squared_error(y_train, lasso_cv.predict(X_train))),
69 | }
70 |
71 | lasso_test = {
72 | "r2": r2_score(y_test, lasso_cv.predict(X_test)),
73 | "rmse": np.sqrt(mean_squared_error(y_test, lasso_cv.predict(X_test))),
74 | }
75 |
76 | glasso_train = {
77 | "r2": r2_score(y_train, glasso_cv.predict(X_train)),
78 | "rmse": np.sqrt(mean_squared_error(y_train, glasso_cv.predict(X_train))),
79 | }
80 |
81 | glasso_test = {
82 | "r2": r2_score(y_test, glasso_cv.predict(X_test)),
83 | "rmse": np.sqrt(mean_squared_error(y_test, glasso_cv.predict(X_test))),
84 | }
85 |
86 | print("------- Performance metrics for signal with group-level sparsity only -------\n")
87 |
88 | print("Lasso performance metrics:")
89 | print(f" train r2: {lasso_train['r2']:.3f}")
90 | print(f" test r2: {lasso_test['r2']:.3f}")
91 | print(f" train rmse: {lasso_train['rmse']:.3f}")
92 | print(f" test rmse: {lasso_test['rmse']:.3f}")
93 |
94 | print("Group Lasso performance metrics:")
95 | print(f" train r2: {glasso_train['r2']:.3f}")
96 | print(f" test r2: {glasso_test['r2']:.3f}")
97 | print(f" train rmse: {glasso_train['rmse']:.3f}")
98 | print(f" test rmse: {glasso_test['rmse']:.3f}")
99 |
100 | # generate a dataset with group-level sparsity and within-group sparsity
101 | X, y, groups, coefs = make_group_regression(
102 | n_samples=400,
103 | n_groups=10,
104 | n_features_per_group=10,
105 | n_informative_groups=5,
106 | frac_informative_in_group=0.5,
107 | bias=-10.0,
108 | noise=100.0,
109 | coef=True,
110 | random_state=0,
111 | )
112 |
113 | # split data into train and test sets
114 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
115 |
116 | glasso_cv = GridSearchCV(
117 | GroupLasso(groups=groups, fit_intercept=True),
118 | {"alpha": np.logspace(0, 2, 5)},
119 | cv=cv5,
120 | n_jobs=-1,
121 | )
122 | sglasso_cv = GridSearchCV(
123 | SparseGroupLasso(groups=groups, fit_intercept=True),
124 | {"alpha": np.logspace(0, 2, 5), "l1_ratio": np.arange(0.3, 0.8, 0.1)},
125 | cv=cv5,
126 | n_jobs=-1,
127 | )
128 | glasso_cv.fit(X_train, y_train)
129 | sglasso_cv.fit(X_train, y_train)
130 |
131 | # Plot predicted values
132 | fig, ax = plt.subplots()
133 | ax.plot(
134 | y_test, glasso_cv.predict(X_test), marker="o", ls="", alpha=0.5, label="group lasso"
135 | )
136 | ax.plot(
137 | y_test,
138 | sglasso_cv.predict(X_test),
139 | marker="o",
140 | ls="",
141 | alpha=0.5,
142 | label="sparse group lasso",
143 | )
144 | ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--")
145 | ax.legend()
146 | ax.set_xlabel("true values")
147 | ax.set_ylabel("predicted values")
148 | fig.show()
149 |
150 | # calculate model performance on test and train data
151 | glasso_train = {
152 | "r2": r2_score(y_train, glasso_cv.predict(X_train)),
153 | "rmse": np.sqrt(mean_squared_error(y_train, glasso_cv.predict(X_train))),
154 | }
155 |
156 | glasso_test = {
157 | "r2": r2_score(y_test, glasso_cv.predict(X_test)),
158 | "rmse": np.sqrt(mean_squared_error(y_test, glasso_cv.predict(X_test))),
159 | }
160 |
161 | sglasso_train = {
162 | "r2": r2_score(y_train, sglasso_cv.predict(X_train)),
163 | "rmse": np.sqrt(mean_squared_error(y_train, sglasso_cv.predict(X_train))),
164 | }
165 |
166 | sglasso_test = {
167 | "r2": r2_score(y_test, sglasso_cv.predict(X_test)),
168 | "rmse": np.sqrt(mean_squared_error(y_test, sglasso_cv.predict(X_test))),
169 | }
170 |
171 |
172 | print(
173 | "------- Performance metrics for signal with group and within group sparsity -------\n"
174 | )
175 |
176 | print("Group Lasso performance metrics:")
177 | print(f" train r2: {glasso_train['r2']:.3f}")
178 | print(f" test r2: {glasso_test['r2']:.3f}")
179 | print(f" train rmse: {glasso_train['rmse']:.3f}")
180 | print(f" test rmse: {glasso_test['rmse']:.3f}")
181 |
182 | print("Sparse Group Lasso performance metrics:")
183 | print(f" train r2: {sglasso_train['r2']:.3f}")
184 | print(f" test r2: {sglasso_test['r2']:.3f}")
185 | print(f" train rmse: {sglasso_train['rmse']:.3f}")
186 | print(f" test rmse: {sglasso_test['rmse']:.3f}")
187 |
--------------------------------------------------------------------------------
/examples/plot_line_search.py:
--------------------------------------------------------------------------------
1 | """
2 | =======================================
3 | Tuning hyperparameters with line search
4 | =======================================
5 |
6 | Line search can typically be used in optimizing regressors with multiple weakly or
7 | uncorrelated hyperparameters.
8 |
9 | This example also showcases the usage of mixed L0 regressor where using a standard
10 | grid search can be too computationally expensive..
11 | """
12 |
13 | import numpy as np
14 | from sklearn.datasets import make_regression
15 | from sklearn.metrics import mean_squared_error, r2_score
16 | from sklearn.model_selection import KFold, train_test_split
17 |
18 | from sparselm.model import L2L0
19 | from sparselm.model_selection import LineSearchCV
20 |
21 | X, y, coef = make_regression(
22 | n_samples=60,
23 | n_features=30,
24 | n_informative=8,
25 | noise=40.0,
26 | bias=-15.0,
27 | coef=True,
28 | random_state=0,
29 | )
30 |
31 | X_train, X_test, y_train, y_test = train_test_split(
32 | X, y, test_size=0.25, random_state=0
33 | )
34 |
35 | # create an l2l0 estimator.
36 | # Groups for parameters must be provided each coefficient is in a singleton group.
37 | groups = np.arange(30, dtype=int)
38 | l2l0 = L2L0(groups, fit_intercept=True, solver="GUROBI", solver_options={"Threads": 4})
39 |
40 | # create cv search objects for each estimator
41 | cv5 = KFold(n_splits=5, shuffle=True, random_state=0)
42 | # LineSearchCV requires the parameters grid to be provided in a list of tuple format,
43 | # with order of parameters in the list being the order of them getting searched per
44 | # iteration.
45 | # The following example specifies the parameter alpha to be scanned first, then the
46 | # parameter eta.
47 | params = [("alpha", np.logspace(-6, 1, 5)), ("eta", np.logspace(-7, -1, 5))]
48 |
49 | l2l0_cv = LineSearchCV(l2l0, params, cv=cv5, n_jobs=4)
50 |
51 | # fit models on training data
52 | l2l0_cv.fit(X_train, y_train)
53 |
54 | # calculate model performance on test and train data
55 | l2l0_train = {
56 | "r2": r2_score(y_train, l2l0_cv.predict(X_train)),
57 | "rmse": np.sqrt(mean_squared_error(y_train, l2l0_cv.predict(X_train))),
58 | }
59 |
60 | l2l0_test = {
61 | "r2": r2_score(y_test, l2l0_cv.predict(X_test)),
62 | "rmse": np.sqrt(mean_squared_error(y_test, l2l0_cv.predict(X_test))),
63 | }
64 |
65 | print("Performance metrics:")
66 | print(f" train r2: {l2l0_train['r2']:.3f}")
67 | print(f" test r2: {l2l0_test['r2']:.3f}")
68 | print(f" train rmse: {l2l0_train['rmse']:.3f}")
69 | print(f" test rmse: {l2l0_test['rmse']:.3f}")
70 |
--------------------------------------------------------------------------------
/examples/plot_one_std.py:
--------------------------------------------------------------------------------
1 | """
2 | =========================================
3 | Hyperparameters selection with 1-std rule
4 | =========================================
5 |
6 | One-standard-deviation rule is a technique to promote model robustness when
7 | cross validation results are noisy. The hyperparameter is chosen to
8 | be equal to the maximum value that yields:
9 | CV = minimum CV + 1 * std(CV at minimum).
10 |
11 | One-standard-deviation rule is available in both GridSearchCV and LineSearchCV
12 | under sparselm.model_selection.
13 | """
14 |
15 | import matplotlib.pyplot as plt
16 | import numpy as np
17 | from sklearn.datasets import make_regression
18 | from sklearn.linear_model import Lasso
19 | from sklearn.metrics import mean_squared_error, r2_score
20 | from sklearn.model_selection import KFold, train_test_split
21 |
22 | from sparselm.model_selection import GridSearchCV
23 |
24 | X, y, coef = make_regression(
25 | n_samples=200,
26 | n_features=100,
27 | n_informative=10,
28 | noise=40.0,
29 | bias=-15.0,
30 | coef=True,
31 | random_state=0,
32 | )
33 |
34 | X_train, X_test, y_train, y_test = train_test_split(
35 | X, y, test_size=0.25, random_state=0
36 | )
37 |
38 | # create estimators
39 | lasso = Lasso(fit_intercept=True)
40 |
41 | # create cv search objects for each estimator
42 | cv5 = KFold(n_splits=5, shuffle=True, random_state=0)
43 | params = {"alpha": np.logspace(-1, 1.5, 20)}
44 |
45 | lasso_cv_std = GridSearchCV(
46 | lasso, params, opt_selection_method="one_std_score", cv=cv5, n_jobs=-1
47 | )
48 | lasso_cv_opt = GridSearchCV(
49 | lasso, params, opt_selection_method="max_score", cv=cv5, n_jobs=-1
50 | )
51 |
52 | # fit models on training data
53 | lasso_cv_std.fit(X_train, y_train)
54 | lasso_cv_opt.fit(X_train, y_train)
55 |
56 | # calculate model performance on test and train data
57 | lasso_std_train = {
58 | "r2": r2_score(y_train, lasso_cv_std.predict(X_train)),
59 | "rmse": np.sqrt(mean_squared_error(y_train, lasso_cv_std.predict(X_train))),
60 | }
61 |
62 | lasso_std_test = {
63 | "r2": r2_score(y_test, lasso_cv_std.predict(X_test)),
64 | "rmse": np.sqrt(mean_squared_error(y_test, lasso_cv_std.predict(X_test))),
65 | }
66 |
67 | print("Lasso with 1-std:")
68 | print(f" alpha value: {lasso_cv_std.best_params_['alpha']}")
69 | print(f" train r2: {lasso_std_train['r2']:.3f}")
70 | print(f" test r2: {lasso_std_test['r2']:.3f}")
71 | print(f" train rmse: {lasso_std_train['rmse']:.3f}")
72 | print(f" test rmse: {lasso_std_test['rmse']:.3f}")
73 | print(f" sparsity: {sum(abs(lasso_cv_std.best_estimator_.coef_) > 1E-8)}")
74 |
75 | lasso_opt_train = {
76 | "r2": r2_score(y_train, lasso_cv_opt.predict(X_train)),
77 | "rmse": np.sqrt(mean_squared_error(y_train, lasso_cv_opt.predict(X_train))),
78 | }
79 |
80 | lasso_opt_test = {
81 | "r2": r2_score(y_test, lasso_cv_opt.predict(X_test)),
82 | "rmse": np.sqrt(mean_squared_error(y_test, lasso_cv_opt.predict(X_test))),
83 | }
84 |
85 | print("Lasso performance:")
86 | print(f" alpha value: {lasso_cv_std.best_params_['alpha']}")
87 | print(f" train r2: {lasso_opt_train['r2']:.3f}")
88 | print(f" test r2: {lasso_opt_test['r2']:.3f}")
89 | print(f" train rmse: {lasso_opt_train['rmse']:.3f}")
90 | print(f" test rmse: {lasso_opt_test['rmse']:.3f}")
91 | print(f" sparsity: {sum(abs(lasso_cv_opt.best_estimator_.coef_) > 1E-8)}")
92 |
93 | # plot cross validation scores
94 | fig, ax = plt.subplots()
95 | ax.plot(
96 | lasso_cv_std.cv_results_["param_alpha"].data,
97 | -lasso_cv_std.cv_results_["mean_test_score"],
98 | "o-",
99 | label="One std",
100 | )
101 | ax.plot(
102 | lasso_cv_std.cv_results_["param_alpha"].data,
103 | -lasso_cv_opt.cv_results_["mean_test_score"]
104 | + lasso_cv_std.cv_results_["std_test_score"],
105 | "k--",
106 | alpha=0.5,
107 | )
108 | ax.plot(
109 | lasso_cv_std.cv_results_["param_alpha"].data,
110 | -lasso_cv_opt.cv_results_["mean_test_score"]
111 | - lasso_cv_std.cv_results_["std_test_score"],
112 | "k--",
113 | alpha=0.5,
114 | )
115 | ax.set_xlabel("alpha")
116 | ax.set_ylabel("rmse")
117 | ax.legend(["mean", "std"])
118 | fig.show()
119 |
120 | # plot model coefficients
121 | fig, ax = plt.subplots()
122 | ax.plot(coef, "o", label="True coefficients")
123 | ax.plot(lasso_cv_std.best_estimator_.coef_, "o", label="One std", alpha=0.5)
124 | ax.plot(lasso_cv_opt.best_estimator_.coef_, "o", label="Max score", alpha=0.5)
125 | ax.set_xlabel("covariate index")
126 | ax.set_ylabel("coefficient value")
127 | ax.legend()
128 | fig.show()
129 |
--------------------------------------------------------------------------------
/examples/plot_sparse_signal.py:
--------------------------------------------------------------------------------
1 | """
2 | =========================
3 | Recovering sparse signals
4 | =========================
5 |
6 | In this example we compare the results obtained from `BestSubsetSelection` with
7 | those obtained using the `OrthogonalMatchingPursuit` regressor from **scikit-learn**.
8 |
9 | Note that although using best subset selection tend to give more accurate results,
10 | `OrthogonalMatchingPursuit` scales much better to larger problems.
11 |
12 | This example is adapted from the scikit-learn documentation:
13 | https://scikit-learn.org/stable/auto_examples/linear_model/plot_omp.html#sphx-glr-auto-examples-linear-model-plot-omp-py
14 | """
15 |
16 | import matplotlib.pyplot as plt
17 | import numpy as np
18 | from sklearn.datasets import make_sparse_coded_signal
19 | from sklearn.linear_model import OrthogonalMatchingPursuit
20 |
21 | from sparselm.model import BestSubsetSelection
22 |
23 | n_components, n_features = 50, 20
24 | n_nonzero_coefs = 8
25 |
26 | # generate the data
27 | y, X, w = make_sparse_coded_signal(
28 | n_samples=1,
29 | n_components=n_components,
30 | n_features=n_features,
31 | n_nonzero_coefs=n_nonzero_coefs,
32 | random_state=0,
33 | )
34 | X = X.T
35 | (idx,) = w.nonzero()
36 |
37 | # distort the clean signal
38 | y_noisy = y + 0.005 * np.random.randn(len(y))
39 |
40 | # plot the sparse signal
41 | plt.figure(figsize=(14, 7))
42 | plt.subplot(3, 2, (1, 2))
43 | plt.xlim(0, n_components)
44 | plt.title("Sparse signal")
45 | plt.stem(idx, w[idx])
46 |
47 | # plot the noise-free reconstruction
48 | omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
49 | omp.fit(X, y)
50 | coef = omp.coef_
51 | (idx_r,) = coef.nonzero()
52 | plt.subplot(3, 2, 3)
53 | plt.xlim(0, n_components)
54 | plt.title("Orthogonal Matching Pursuit (noise-free measurements)")
55 | plt.stem(idx_r, coef[idx_r])
56 |
57 | bss = BestSubsetSelection(
58 | sparse_bound=n_nonzero_coefs, solver="GUROBI", solver_options={"Threads": 8}
59 | )
60 | bss.fit(X, y)
61 | coef = bss.coef_
62 | (idx_r,) = coef.nonzero()
63 | plt.subplot(3, 2, 4)
64 | plt.xlim(0, n_components)
65 | plt.title("Best Subset Selection (noise-free measurements)")
66 | plt.stem(idx_r, coef[idx_r])
67 |
68 | # plot the noisy reconstruction
69 | omp.fit(X, y_noisy)
70 | coef = omp.coef_
71 | (idx_r,) = coef.nonzero()
72 | plt.subplot(3, 2, 5)
73 | plt.xlim(0, n_components)
74 | plt.title("Orthogonal Matching Pursuit recovery (noisy measurements)")
75 | plt.stem(idx_r, coef[idx_r])
76 |
77 | bss.fit(X, y_noisy)
78 | coef = bss.coef_
79 | (idx_r,) = coef.nonzero()
80 | plt.subplot(3, 2, 6)
81 | plt.xlim(0, n_components)
82 | plt.title("Best Subset Selection (noisy measurements)")
83 | plt.stem(idx_r, coef[idx_r])
84 |
85 | plt.tight_layout()
86 | plt.show()
87 |
--------------------------------------------------------------------------------
/examples/plot_stepwise.py:
--------------------------------------------------------------------------------
1 | """
2 | ========================
3 | Using stepwise estimator
4 | ========================
5 |
6 | Stepwise estimator can be used to implement stepwise fitting. It comprises several
7 | regressor, each responsible for fitting specific rows of the feature matrix to
8 | the target vector and passing the residual values down to be fitted by the subsequent
9 | regressors.
10 |
11 | This example is purely for demonstration purpose and we do not expect any meaningful
12 | performance improvement.
13 |
14 | However, stepwise fitting can be useful in certain problems where groups of covariates
15 | have substantially different effects on the target vector.
16 |
17 | For example, in fitting the atomic configration energy of an crystalline solid using a
18 | cluster expansion of an ionic system, one might want to fit the energy to single site
19 | features first then subtract those main effects from the target, and fit the residual
20 | of energy to other cluster interactions.
21 | """
22 |
23 | import matplotlib.pyplot as plt
24 | import numpy as np
25 | from sklearn.datasets import make_regression
26 | from sklearn.linear_model import Lasso, Ridge
27 | from sklearn.metrics import mean_squared_error, r2_score
28 | from sklearn.model_selection import KFold, train_test_split
29 |
30 | from sparselm.model_selection import GridSearchCV
31 | from sparselm.stepwise import StepwiseEstimator
32 |
33 | X, y, coef = make_regression(
34 | n_samples=200,
35 | n_features=100,
36 | n_informative=10,
37 | noise=40.0,
38 | bias=-15.0,
39 | coef=True,
40 | random_state=0,
41 | )
42 |
43 | X_train, X_test, y_train, y_test = train_test_split(
44 | X, y, test_size=0.25, random_state=0
45 | )
46 |
47 | # Create estimators for each step.
48 | # Only the first estimator is allowed to fit_intercept!
49 | ridge = Ridge(fit_intercept=True)
50 | lasso = Lasso(fit_intercept=False)
51 | cv5 = KFold(n_splits=5, shuffle=True, random_state=0)
52 | params = {"alpha": np.logspace(-1, 1, 10)}
53 | estimator1 = GridSearchCV(ridge, params, cv=cv5, n_jobs=-1)
54 | estimator2 = GridSearchCV(lasso, params, cv=cv5, n_jobs=-1)
55 |
56 | # Create a StepwiseEstimator. It can be composed of either
57 | # regressors or GridSearchCV and LineSearchCV optimizers.
58 | # In this case, we first fit the target vector to the first 3
59 | # and the last feature, then fit the residual vector to the rest
60 | # of the features with GridSearchCV to optimize the Lasso
61 | # hyperparameter.
62 | stepwise = StepwiseEstimator(
63 | [("est", estimator1), ("est2", estimator2)], ((0, 1, 2, 99), tuple(range(3, 99)))
64 | )
65 |
66 | # fit models on training data
67 | stepwise.fit(X_train, y_train)
68 |
69 | # calculate model performance on test and train data
70 | stepwise_train = {
71 | "r2": r2_score(y_train, stepwise.predict(X_train)),
72 | "rmse": np.sqrt(mean_squared_error(y_train, stepwise.predict(X_train))),
73 | }
74 |
75 | stepwise_test = {
76 | "r2": r2_score(y_test, stepwise.predict(X_test)),
77 | "rmse": np.sqrt(mean_squared_error(y_test, stepwise.predict(X_test))),
78 | }
79 |
80 | print("Lasso performance metrics:")
81 | print(f" train r2: {stepwise_train['r2']:.3f}")
82 | print(f" test r2: {stepwise_test['r2']:.3f}")
83 | print(f" train rmse: {stepwise_train['rmse']:.3f}")
84 | print(f" test rmse: {stepwise_test['rmse']:.3f}")
85 |
86 | # plot model coefficients
87 | fig, ax = plt.subplots()
88 | ax.plot(coef, "o", label="True coefficients")
89 | ax.plot(stepwise.coef_[[0, 1, 2, 99]], "o", label="Stepwise (ridge)", alpha=0.5)
90 | ax.plot(stepwise.coef_[range(3, 99)], "o", label="Stepwise (lasso)", alpha=0.5)
91 | ax.set_xlabel("covariate index")
92 | ax.set_ylabel("coefficient value")
93 | ax.legend()
94 | fig.show()
95 |
96 | # plot predicted values
97 | fig, ax = plt.subplots()
98 | ax.plot(y_test, stepwise.predict(X_test), "o", label="Stepwise", alpha=0.5)
99 | ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--")
100 | ax.set_xlabel("true values")
101 | ax.set_ylabel("predicted values")
102 | ax.legend()
103 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=45", "setuptools-scm[toml]>=6.2"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "sparse-lm"
7 | description = "Sparse linear regression models"
8 | authors = [
9 | {name = "Luis Barroso-Luque", email = "lbluque@berkeley.edu"}
10 | ]
11 | readme = "README.md"
12 | license = {text = "BSD 3-Clause License"}
13 | dynamic = ["version"]
14 | dependencies = [
15 | "numpy >=1.23", "cvxpy >=1.2", "scikit-learn >=1.2.1",
16 | "scipy >=1.9", "joblib"
17 | ]
18 | classifiers = [
19 | "Development Status :: 3 - Alpha",
20 | "Programming Language :: Python :: 3 :: Only",
21 | "Programming Language :: Python :: 3.9",
22 | "Programming Language :: Python :: 3.10",
23 | "Intended Audience :: Science/Research",
24 | "License :: OSI Approved :: BSD License",
25 | "Operating System :: OS Independent",
26 | "Topic :: Scientific/Engineering :: Information Analysis",
27 | "Topic :: Scientific/Engineering :: Mathematics",
28 | "Topic :: Software Development :: Libraries :: Python Modules"
29 | ]
30 |
31 | [project.optional-dependencies]
32 | dev = ["pre-commit", "black", "isort", "flake8", "pylint", "pydocstyle", "flake8-pyproject"]
33 | # Gurobipy needed by mixedL0 tests, pandas needed by sklearn convention checks.
34 | tests = ["pytest >=7.2.0", "pytest-cov >=4.0.0", "coverage", "pandas", "gurobipy", "pyscipopt"]
35 | docs = ["sphinx>=5.3", "furo", "m2r2", "sphinx-gallery", "matplotlib", "gurobipy", "pymatgen"]
36 | optional = ["gurobipy"]
37 |
38 | # pyproject.toml
39 | [tool.setuptools_scm]
40 |
41 | # linting tools, etc
42 | [tool.pytest.ini_options]
43 | minversion = "6.0"
44 | addopts = "-x --durations = 30 --quiet -rxXs --color = yes"
45 | filterwarnings = [
46 | 'ignore::UserWarning',
47 | 'ignore::FutureWarning',
48 | 'ignore::RuntimeWarning'
49 | ]
50 |
51 | [tool.flake8]
52 | exclude = ['docs', 'tests']
53 | ignore = ['E203', 'E501', 'W503']
54 | max-line-length = 88
55 |
56 | [tool.pylint.main]
57 | ignore = ["tests"]
58 |
59 | [tool.pylint.basic]
60 | argument-naming-style = "snake_case"
61 | attr-naming-style = "snake_case"
62 | method-naming-style = "snake_case"
63 | function-naming-style = "snake_case"
64 | class-naming-style = "PascalCase"
65 | good-names = ['id', 'kB', 'i', 'j', 'k', 'f']
66 |
67 | [too.pylint."messages control"]
68 | disable = ['W0511', 'R0904', 'R0903', 'R0913', 'R0902', 'R0914', 'C0415']
69 |
70 | [tool.codespell]
71 | skip = "*.c,./.*"
72 | count = ''
73 | quiet-level = 3
74 | ignore-words-list = ['nd', 'tread']
75 |
76 | [tool.coverage.run]
77 | source = ["src/sparselm"]
78 | omit = ["*/__init__.py"]
79 |
80 | [tool.pydocstyle]
81 | convention = "google"
82 | add_ignore = ["D107"]
83 |
84 | [[tool.mypy.overrides]]
85 | module = ["sklearn.*", "scipy.linalg"]
86 | ignore_missing_imports = true
87 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy >=1.23
2 | cvxpy >=1.2
3 | scikit-learn > 1.2
4 | scipy >=1.9
5 | joblib
6 |
--------------------------------------------------------------------------------
/src/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scikit-learn
3 | cvxpy
4 | scipy
5 | joblib
6 |
--------------------------------------------------------------------------------
/src/sparselm/__init__.py:
--------------------------------------------------------------------------------
1 | """Classes implementing generalized linear regression Regressors."""
2 |
3 | from importlib.metadata import PackageNotFoundError, version
4 |
5 | try:
6 | __version__ = version("sparse-lm")
7 | except PackageNotFoundError:
8 | # package is not installed
9 | __version__ = ""
10 |
--------------------------------------------------------------------------------
/src/sparselm/_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CederGroupHub/sparse-lm/220cbbad4a5ac98d1a52326c525aadb95f2c5b18/src/sparselm/_utils/__init__.py
--------------------------------------------------------------------------------
/src/sparselm/_utils/validation.py:
--------------------------------------------------------------------------------
1 | """Data and hyper-parameters validation utilities."""
2 |
3 | from __future__ import annotations
4 |
5 | import numpy as np
6 | from numpy.typing import NDArray
7 |
8 |
9 | def _check_groups(
10 | groups: NDArray[np.floating | np.integer] | list[int | float] | None,
11 | n_features: int,
12 | ) -> None:
13 | """Check that groups are 1D and of the correct length.
14 |
15 | Args:
16 | groups (NDArray):
17 | List of group labels
18 | n_features (int):
19 | Number of features/covariates being fit
20 |
21 | """
22 | if groups is None:
23 | return
24 |
25 | if not isinstance(groups, (list, np.ndarray)):
26 | raise TypeError("groups must be a list or ndarray")
27 |
28 | groups = np.asarray(groups).astype(int)
29 | if groups.ndim != 1:
30 | raise ValueError("groups must be a 1D array")
31 |
32 | if len(groups) != n_features:
33 | raise ValueError(
34 | f"groups must be the same length as the number of features {n_features}"
35 | )
36 |
37 |
38 | def _check_group_weights(
39 | group_weights: NDArray[np.floating] | None, n_groups: int
40 | ) -> None:
41 | """Check that group weights are 1D and of the correct length.
42 |
43 | Args:
44 | group_weights (NDArray):
45 | List of group weights
46 | n_groups (int):
47 | Number of groups
48 | """
49 | if group_weights is None:
50 | return
51 |
52 | if not isinstance(group_weights, (list, np.ndarray)):
53 | raise TypeError("group_weights must be a list or ndarray")
54 |
55 | group_weights = np.asarray(group_weights)
56 | if len(group_weights) != n_groups:
57 | raise ValueError(
58 | f"group_weights must be the same length as the number of groups {len(group_weights)} != {n_groups}"
59 | )
60 |
--------------------------------------------------------------------------------
/src/sparselm/dataset.py:
--------------------------------------------------------------------------------
1 | """Generate synthemetic datasets akin to sklearn.datasets."""
2 |
3 | from __future__ import annotations
4 |
5 | import warnings
6 | from typing import Sequence
7 |
8 | import numpy as np
9 | from numpy.random import RandomState
10 | from sklearn.datasets import make_regression
11 | from sklearn.utils import check_random_state
12 |
13 |
14 | def make_group_regression(
15 | n_samples: int = 100,
16 | n_groups: int = 20,
17 | n_features_per_group: int | Sequence = 10,
18 | n_informative_groups: int = 5,
19 | frac_informative_in_group: float = 1.0,
20 | bias: float = 0.0,
21 | effective_rank: int | None = None,
22 | tail_strength: float = 0.5,
23 | noise: float = 0.0,
24 | shuffle: bool = True,
25 | coef: bool = False,
26 | random_state: int | RandomState | None = None,
27 | ) -> tuple[np.ndarray, ...]:
28 | """Generate a random regression problem with grouped covariates.
29 |
30 | Args:
31 | n_samples (int, optional):
32 | Number of samples to generate.
33 | n_groups (int, optional):
34 | Number of groups to generate.
35 | n_features_per_group (int | Sequence, optional):
36 | Number of features per group to generate. If a sequence is passed the
37 | length must be equal to n_groups then each element will be the number of
38 | features in the corresponding group.
39 | n_informative_groups (int, optional):
40 | Number of informative groups.
41 | frac_informative_in_group (float, optional):
42 | Fraction of informative features in each group
43 | The number of features will be rounded to nearest int.
44 | bias (float, optional):
45 | Bias added to the decision function.
46 | effective_rank ([type], optional):
47 | Approximate number of singular vectors
48 | required to explain most of the input data by linear combinations.
49 | tail_strength (float, optional):
50 | Relative importance of the fat noisy tail
51 | of the singular values profile if `effective_rank` is not None.
52 | noise (float, optional):
53 | Standard deviation of the gaussian noise applied to the output.
54 | shuffle (bool, optional):
55 | Shuffle the samples and the features. Defaults to True.
56 | coef (bool, optional):
57 | If True, the coefficients of the underlying linear model are returned.
58 | random_state ([type], optional):
59 | Random state for dataset generation.
60 |
61 | Returns:
62 | tuple[np.ndarray, np.ndarray, np.ndarray, ...]:
63 | X, y, groups, coefficients (optional)
64 | """
65 | generator = check_random_state(random_state)
66 |
67 | informative_groups = list(range(n_informative_groups))
68 |
69 | if isinstance(n_features_per_group, int):
70 | n_features = n_features_per_group * n_groups
71 | n_informative_in_group = round(frac_informative_in_group * n_features_per_group)
72 | n_informative = n_informative_in_group * n_informative_groups
73 | # make n_features_per_group a list of length n_groups
74 | n_features_per_group = [n_features_per_group] * n_groups
75 | n_informative_per_group = [n_informative_in_group] * n_informative_groups
76 | else:
77 | if len(n_features_per_group) == n_groups:
78 | n_features = sum(n_features_per_group)
79 | n_informative_per_group = [
80 | round(frac_informative_in_group * n_features_per_group[i])
81 | for i in informative_groups
82 | ]
83 | n_informative = sum(n_informative_per_group)
84 | else:
85 | raise ValueError(
86 | "If passing a sequence of n_features_per_group, the length must be "
87 | "equal to n_groups."
88 | )
89 |
90 | if any(n < 1 for n in n_informative_per_group):
91 | warnings.warn(
92 | "The number of features and fraction of informative features per group resulted in "
93 | "informative groups having no informative features.",
94 | UserWarning,
95 | )
96 |
97 | X, y, coefs = make_regression(
98 | n_samples=n_samples,
99 | n_features=n_features,
100 | n_informative=n_informative,
101 | bias=bias,
102 | effective_rank=effective_rank,
103 | tail_strength=tail_strength,
104 | noise=noise,
105 | shuffle=shuffle,
106 | coef=True,
107 | random_state=generator,
108 | )
109 |
110 | # assign coefficients to groups
111 | groups = np.zeros(n_features, dtype=int)
112 | informative_coef_inds = np.nonzero(coefs > noise)[0].tolist()
113 | other_coef_inds = np.nonzero(coefs <= noise)[0].tolist()
114 |
115 | for i, nfg in enumerate(n_features_per_group):
116 | if i in informative_groups:
117 | nifg = n_informative_per_group[informative_groups.index(i)]
118 | ii = informative_coef_inds[:nifg] + other_coef_inds[: nfg - nifg]
119 | # remove assigned indices
120 | informative_coef_inds = informative_coef_inds[nifg:]
121 | other_coef_inds = other_coef_inds[nfg - nifg :]
122 | else:
123 | ii = other_coef_inds[:nfg]
124 | other_coef_inds = other_coef_inds[nfg:]
125 |
126 | # assign group ids
127 | groups[ii] = i
128 |
129 | if shuffle:
130 | indices = np.arange(n_features)
131 | generator.shuffle(indices)
132 | X[:, :] = X[:, indices]
133 | groups = groups[indices]
134 | coefs = coefs[indices]
135 |
136 | if coef:
137 | return X, y, groups, coefs
138 | else:
139 | return X, y, groups
140 |
--------------------------------------------------------------------------------
/src/sparselm/model/__init__.py:
--------------------------------------------------------------------------------
1 | """Classes implementing generalized linear regression Regressors."""
2 |
3 | from ._adaptive_lasso import (
4 | AdaptiveGroupLasso,
5 | AdaptiveLasso,
6 | AdaptiveOverlapGroupLasso,
7 | AdaptiveRidgedGroupLasso,
8 | AdaptiveSparseGroupLasso,
9 | )
10 | from ._lasso import (
11 | GroupLasso,
12 | Lasso,
13 | OverlapGroupLasso,
14 | RidgedGroupLasso,
15 | SparseGroupLasso,
16 | )
17 | from ._miqp import (
18 | L1L0,
19 | L2L0,
20 | BestSubsetSelection,
21 | RegularizedL0,
22 | RidgedBestSubsetSelection,
23 | )
24 | from ._ols import OrdinaryLeastSquares
25 |
26 | __all__ = [
27 | "OrdinaryLeastSquares",
28 | "Lasso",
29 | "BestSubsetSelection",
30 | "RidgedBestSubsetSelection",
31 | "RegularizedL0",
32 | "L1L0",
33 | "L2L0",
34 | "GroupLasso",
35 | "OverlapGroupLasso",
36 | "SparseGroupLasso",
37 | "RidgedGroupLasso",
38 | "AdaptiveLasso",
39 | "AdaptiveGroupLasso",
40 | "AdaptiveOverlapGroupLasso",
41 | "AdaptiveSparseGroupLasso",
42 | "AdaptiveRidgedGroupLasso",
43 | ]
44 |
--------------------------------------------------------------------------------
/src/sparselm/model/_base.py:
--------------------------------------------------------------------------------
1 | """Base classes for in-house linear regression Regressors.
2 |
3 | The classes make use of and follow the scikit-learn API.
4 | """
5 |
6 | from __future__ import annotations
7 |
8 | __author__ = "Luis Barroso-Luque, Fengyu Xie"
9 |
10 | import warnings
11 | from abc import ABCMeta, abstractmethod
12 | from collections.abc import Sequence
13 | from numbers import Integral
14 | from types import SimpleNamespace
15 | from typing import Any, NamedTuple
16 |
17 | import cvxpy as cp
18 | import numpy as np
19 | from numpy.typing import NDArray
20 | from sklearn.base import RegressorMixin
21 | from sklearn.linear_model._base import (
22 | LinearModel,
23 | _check_sample_weight,
24 | _preprocess_data,
25 | _rescale_data,
26 | )
27 | from sklearn.utils._param_validation import (
28 | Interval,
29 | Options,
30 | _ArrayLikes,
31 | _Booleans,
32 | _InstancesOf,
33 | make_constraint,
34 | validate_parameter_constraints,
35 | )
36 |
37 |
38 | class CVXCanonicals(NamedTuple):
39 | """CVXpy Canonical objects representing the underlying optimization problem.
40 |
41 | Attributes:
42 | objective (cp.Problem):
43 | Objective function.
44 | objective (cp.Expression):
45 | Objective function.
46 | beta (cp.Variable):
47 | Variable to be optimized (corresponds to the estimated coef_ attribute).
48 | parameters (SimpleNamespace of cp.Parameter or NDArray):
49 | SimpleNamespace with named cp.Parameter objects or NDArray of parameters.
50 | The namespace should be defined by the Regressor generating it.
51 | auxiliaries (SimpleNamespace of cp.Variable or cp.Expression):
52 | SimpleNamespace with auxiliary cp.Variable or cp.Expression objects.
53 | The namespace should be defined by the Regressor generating it.
54 | constraints (list of cp.Constaint):
55 | List of constraints intrinsic to regression problem.
56 | user_constraints (list of cp.Constaint):
57 | List of user-defined constraints.
58 | """
59 |
60 | problem: cp.Problem
61 | objective: cp.Expression
62 | beta: cp.Variable
63 | parameters: SimpleNamespace | None
64 | auxiliaries: SimpleNamespace | None
65 | constraints: list[cp.Constraint]
66 | user_constraints: list[cp.Constraint]
67 |
68 |
69 | class CVXRegressor(RegressorMixin, LinearModel, metaclass=ABCMeta):
70 | r"""Abstract base class for Regressors using cvxpy with a sklearn interface.
71 |
72 | Note cvxpy can use one of many 3rd party solvers, default is most often
73 | CVXOPT or ECOS. For integer and mixed integer problems options include
74 | SCIP (open source) and Gurobi, among other commercial solvers.
75 |
76 | The solver can be specified by setting the solver keyword argument.
77 | And can solver specific settings can be set by passing a dictionary of
78 | solver_options.
79 |
80 | See "Setting solver options" in documentation for details of available options:
81 | https://www.cvxpy.org/tutorial/advanced/index.html#advanced
82 |
83 | Args:
84 | fit_intercept (bool):
85 | Whether the intercept should be estimated or not.
86 | If False, the data is assumed to be already centered.
87 | copy_X (bool):
88 | If True, X will be copied; else, it may be overwritten.
89 | warm_start (bool):
90 | When set to True, reuse the solution of the previous call to
91 | fit as initialization, otherwise, just erase the previous
92 | solution.
93 | solver (str):
94 | cvxpy backend solver to use. Supported solvers are listed here:
95 | https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options
96 | solver_options (dict):
97 | dictionary of keyword arguments passed to cvxpy solve.
98 | See docs linked above for more information.
99 |
100 | Attributes:
101 | coef_ (NDArray):
102 | Parameter vector (:math:`\beta` in the cost function formula) of shape
103 | (n_features,).
104 | intercept_ (float):
105 | Independent term in decision function.
106 | canonicals_ (SimpleNamespace):
107 | Namespace that contains underlying cvxpy objects used to define
108 | the optimization problem. The objects included are the following:
109 | - objective - the objective function.
110 | - beta - variable to be optimized (corresponds to the estimated
111 | coef_ attribute).
112 | - parameters - hyper-parameters
113 | - auxiliaries - auxiliary variables and expressions
114 | - constraints - solution constraints
115 | """
116 |
117 | # parameter constraints that do not need any cvxpy Parameter object
118 | _parameter_constraints: dict[str, list[Any]] = {
119 | "fit_intercept": ["boolean"],
120 | "copy_X": ["boolean"],
121 | "warm_start": ["boolean"],
122 | "solver": [Options(type=str, options=set(cp.installed_solvers())), None],
123 | "solver_options": [dict, None],
124 | }
125 | # parameter constraints that require a cvxpy Parameter object in problem definition
126 | _cvx_parameter_constraints: dict[str, list[Any]] | None = None
127 |
128 | def __init__(
129 | self,
130 | fit_intercept: bool = False,
131 | copy_X: bool = True,
132 | warm_start: bool = False,
133 | solver: str | None = None,
134 | solver_options: dict[str, Any] | None = None,
135 | ):
136 | self.fit_intercept = fit_intercept
137 | self.copy_X = copy_X
138 | self.warm_start = warm_start
139 | self.solver = solver
140 | self.solver_options = solver_options
141 |
142 | def fit(
143 | self,
144 | X: NDArray,
145 | y: NDArray,
146 | sample_weight: NDArray[np.floating] | None = None,
147 | *args,
148 | **kwargs,
149 | ):
150 | """Fit the linear model coefficients.
151 |
152 | Prepares the fit data input, generates cvxpy objects to represent the
153 | minimization objective, and solves the regression problem using the given
154 | solver.
155 |
156 | Args:
157 | X (NDArray):
158 | Training data of shape (n_samples, n_features).
159 | y (NDArray):
160 | Target values. Will be cast to X's dtype if necessary
161 | of shape (n_samples,) or (n_samples, n_targets)
162 | sample_weight (NDArray):
163 | Individual weights for each sample of shape (n_samples,)
164 | default=None
165 | *args:
166 | Positional arguments passed to solve method
167 | **kwargs:
168 | Keyword arguments passed to solve method
169 |
170 | Returns:
171 | instance of self
172 | """
173 | X, y = self._validate_data(
174 | X, y, accept_sparse=False, y_numeric=True, multi_output=False
175 | )
176 |
177 | X, y, X_offset, y_offset, X_scale = self._preprocess_data(X, y, sample_weight)
178 |
179 | self._validate_params(X, y)
180 |
181 | # TODO test theses cases
182 | if not hasattr(self, "canonicals_"):
183 | self.generate_problem(X, y, preprocess_data=False)
184 | elif not np.array_equal(self.cached_X_, X) or not np.array_equal(
185 | self.cached_y_, y
186 | ):
187 | if self.canonicals_.user_constraints:
188 | warnings.warn(
189 | "User constraints are set on a problem with different data (X, y). "
190 | "These constraints will be ignored.",
191 | UserWarning,
192 | )
193 | self.generate_problem(X, y, preprocess_data=False)
194 | else:
195 | self._set_param_values() # set parameter values
196 |
197 | solver_options = self.solver_options if self.solver_options is not None else {}
198 | if not isinstance(solver_options, dict):
199 | raise TypeError("solver_options must be a dictionary")
200 |
201 | self.coef_ = self._solve(X, y, solver_options, *args, **kwargs)
202 | self._set_intercept(X_offset, y_offset, X_scale)
203 |
204 | # return self for chaining fit and predict calls
205 | return self
206 |
207 | def _preprocess_data(
208 | self, X: NDArray, y: NDArray, sample_weight: NDArray[np.floating] | None = None
209 | ) -> tuple[NDArray, NDArray, NDArray, NDArray, NDArray]:
210 | """Preprocess data for fitting."""
211 | if sample_weight is not None:
212 | sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
213 | # rescale sample_weight to sum to number of samples
214 | sample_weight = sample_weight * (X.shape[0] / np.sum(sample_weight)) # type: ignore
215 |
216 | X, y, X_offset, y_offset, X_scale = _preprocess_data(
217 | X,
218 | y,
219 | copy=self.copy_X,
220 | fit_intercept=self.fit_intercept,
221 | sample_weight=sample_weight,
222 | )
223 |
224 | if sample_weight is not None:
225 | X, y, _ = _rescale_data(X, y, sample_weight)
226 |
227 | return X, y, X_offset, y_offset, X_scale
228 |
229 | def _validate_params(self, X: NDArray, y: NDArray) -> None:
230 | """Validate hyperparameter values.
231 |
232 | Implement this in an Regressor for additional parameter value validation.
233 | """
234 | if self._cvx_parameter_constraints is None:
235 | parameter_constraints = self._parameter_constraints
236 | else:
237 | parameter_constraints = {
238 | **self._parameter_constraints,
239 | **self._cvx_parameter_constraints,
240 | }
241 | validate_parameter_constraints(
242 | parameter_constraints,
243 | self.get_params(deep=False),
244 | caller_name=self.__class__.__name__,
245 | )
246 |
247 | def _set_param_values(self) -> None:
248 | """Set the values of cvxpy parameters from param attributes for warm starts."""
249 | if self._cvx_parameter_constraints is None:
250 | return
251 |
252 | for parameter, value in self.get_params(deep=False).items():
253 | if parameter in self._cvx_parameter_constraints:
254 | cvx_parameter = getattr(self.canonicals_.parameters, parameter)
255 | # check for parameters that take a scalar or an array
256 | if isinstance(value, np.ndarray) or isinstance(value, Sequence):
257 | if len(value) == 1:
258 | value = value * np.ones_like(cvx_parameter.value)
259 | else:
260 | value = np.asarray(value)
261 | cvx_parameter.value = value
262 |
263 | def _generate_params(self, X: NDArray, y: NDArray) -> SimpleNamespace:
264 | """Return the named tuple of cvxpy parameters for optimization problem.
265 |
266 | The cvxpy Parameters must be given values when generating.
267 |
268 | Args:
269 | X (NDArray):
270 | Covariate/Feature matrix
271 | y (NDArray):
272 | Target vector
273 |
274 | Returns:
275 | NamedTuple of cvxpy parameters
276 | """
277 | cvx_parameters = {}
278 | cvx_constraints = (
279 | {}
280 | if self._cvx_parameter_constraints is None
281 | else self._cvx_parameter_constraints
282 | )
283 | for param_name, param_val in self.get_params(deep=False).items():
284 | if param_name not in cvx_constraints:
285 | continue
286 |
287 | # make constraints sklearn constraint objects
288 | constraints = [
289 | make_constraint(constraint)
290 | for constraint in cvx_constraints[param_name]
291 | ]
292 |
293 | # For now we will only set nonneg, nonpos, neg, pos, integer, boolean and/or
294 | # shape of the cvxpy Parameter objects.
295 | # TODO cxvpy only allows a single one of these to be set (except bool and integer)
296 | param_kwargs = {}
297 | for constraint in constraints:
298 | if isinstance(constraint, _ArrayLikes):
299 | if not hasattr(param_val, "shape"):
300 | param_val = np.asarray(param_val)
301 |
302 | param_kwargs["shape"] = param_val.shape
303 |
304 | if isinstance(constraint, _Booleans):
305 | param_kwargs["boolean"] = True
306 |
307 | if isinstance(constraint, _InstancesOf):
308 | if constraint.is_satisfied_by(True): # is it boolean
309 | param_kwargs["boolean"] = True
310 | elif constraint.is_satisfied_by(5): # is it integer
311 | param_kwargs["integer"] = True
312 |
313 | if isinstance(constraint, Interval):
314 | if constraint.type is Integral:
315 | param_kwargs["integer"] = True
316 | if constraint.left is not None:
317 | if constraint.left == 0:
318 | if constraint.closed in ("left", "both"):
319 | param_kwargs["nonneg"] = True
320 | else:
321 | param_kwargs["pos"] = True
322 | elif constraint.left > 0:
323 | param_kwargs["pos"] = True
324 | if constraint.right is not None:
325 | if constraint.right == 0:
326 | if constraint.closed in ("right", "both"):
327 | param_kwargs["nonpos"] = True
328 | else:
329 | param_kwargs["neg"] = True
330 | elif constraint.right < 0:
331 | param_kwargs["neg"] = True
332 | cvx_parameters[param_name] = cp.Parameter(
333 | value=param_val, **param_kwargs
334 | )
335 |
336 | return SimpleNamespace(**cvx_parameters)
337 |
338 | def _generate_auxiliaries(
339 | self, X: NDArray, y: NDArray, beta: cp.Variable, parameters: SimpleNamespace
340 | ) -> SimpleNamespace | None:
341 | """Generate any auxiliary variables/expressions necessary to define objective.
342 |
343 | Args:
344 | X (NDArray):
345 | Covariate/Feature matrix
346 | y (NDArray):
347 | Target vector
348 | beta (cp.Variable):
349 | cp.Variable representing the estimated coefs_
350 | parameters (SimpleNamespace):
351 | SimpleNamespace of cvxpy parameters.
352 |
353 | Returns:
354 | SimpleNamespace of cp.Variable for auxiliary variables
355 | """
356 | return None
357 |
358 | @abstractmethod
359 | def _generate_objective(
360 | self,
361 | X: NDArray,
362 | y: NDArray,
363 | beta: cp.Variable,
364 | parameters: SimpleNamespace | None = None,
365 | auxiliaries: SimpleNamespace | None = None,
366 | ) -> cp.Expression:
367 | """Define the cvxpy objective function represeting regression model.
368 |
369 | The objective must be stated for a minimization problem.
370 |
371 | Args:
372 | X (NDArray):
373 | Covariate/Feature matrix
374 | y (NDArray):
375 | Target vector
376 | beta (cp.Variable):
377 | cp.Variable representing the estimated coefs_
378 | parameters (SimpleNamespace): optional
379 | SimpleNamespace with cp.Parameter objects
380 | auxiliaries (SimpleNamespace): optional
381 | SimpleNamespace with auxiliary cvxpy objects
382 |
383 | Returns:
384 | cvxpy Expression
385 | """
386 |
387 | def _generate_constraints(
388 | self,
389 | X: NDArray,
390 | y: NDArray,
391 | beta: cp.Variable,
392 | parameters: SimpleNamespace | None = None,
393 | auxiliaries: SimpleNamespace | None = None,
394 | ) -> list[cp.Constraint]:
395 | """Generate constraints for optimization problem.
396 |
397 | Args:
398 | X (NDArray):
399 | Covariate/Feature matrix
400 | y (NDArray):
401 | Target vector
402 | beta (cp.Variable):
403 | cp.Variable representing the estimated coefs_
404 | parameters (SimpleNamespace): optional
405 | SimpleNamespace with cp.Parameter objects
406 | auxiliaries (SimpleNamespace): optional
407 | SimpleNamespace with auxiliary cvxpy objects
408 |
409 | Returns:
410 | list of cvxpy constraints
411 | """
412 | return []
413 |
414 | def generate_problem(
415 | self,
416 | X: NDArray,
417 | y: NDArray,
418 | preprocess_data: bool = True,
419 | sample_weight: NDArray[np.floating] | None = None,
420 | ) -> None:
421 | """Generate regression problem and auxiliary cvxpy objects.
422 |
423 | This initializes the minimization problem, the objective, coefficient variable
424 | (beta), problem parameters, solution constraints, and auxiliary variables/terms.
425 |
426 | This is (almost always) called in the fit method, and not directly. However, it
427 | can be called directly if further control over the problem is needed by
428 | accessing the canonicals_ objects. For example to add additional constraints on
429 | problem variables.
430 |
431 | Args:
432 | X (NDArray):
433 | Covariate/Feature matrix
434 | y (NDArray):
435 | Target vector
436 | preprocess_data (bool):
437 | Whether to preprocess the data before generating the problem. If calling
438 | generate_problem directly, this should be kept as True to ensure the
439 | problem is generated correctly for a subsequent call to fit.
440 | sample_weight (NDArray):
441 | Individual weights for each sample of shape (n_samples,)
442 | default=None. Only used if preprocess_data=True to rescale the data
443 | accordingly.
444 | """
445 | if preprocess_data is True:
446 | X, y, _, _, _ = self._preprocess_data(X, y, sample_weight)
447 |
448 | # X, y are cached to avoid re-generating problem if fit is called again with
449 | # same data
450 | self.cached_X_ = X
451 | self.cached_y_ = y
452 |
453 | beta = cp.Variable(X.shape[1])
454 | parameters = self._generate_params(X, y)
455 | auxiliaries = self._generate_auxiliaries(X, y, beta, parameters)
456 | objective = self._generate_objective(X, y, beta, parameters, auxiliaries)
457 | constraints = self._generate_constraints(X, y, beta, parameters, auxiliaries)
458 | problem = cp.Problem(cp.Minimize(objective), constraints)
459 | self.canonicals_ = CVXCanonicals(
460 | problem=problem,
461 | objective=objective,
462 | beta=beta,
463 | parameters=parameters,
464 | auxiliaries=auxiliaries,
465 | constraints=constraints,
466 | user_constraints=[],
467 | )
468 |
469 | def add_constraints(self, constraints: list[cp.Constraint]) -> None:
470 | """Add a constraint to the problem.
471 |
472 | .. Warning::
473 | Adding constraints will not work with any sklearn class that relies on
474 | cloning the estimator (ie GridSearchCV, etc) . This is because a new cvxpy
475 | problem is generated for any cloned estimator.
476 |
477 | Args:
478 | constraints (list of cp.constraint or cp.expressions):
479 | cvxpy constraint to add to the problem
480 | """
481 | if not hasattr(self, "canonicals_"):
482 | raise RuntimeError(
483 | "Problem has not been generated. Please call generate_problem before"
484 | " adding constraints."
485 | )
486 | self.canonicals_.user_constraints.extend(list(constraints))
487 | # need to reset problem to update constraints
488 | self._reset_problem()
489 |
490 | def _reset_problem(self) -> None:
491 | """Reset the cvxpy problem."""
492 | if not hasattr(self, "canonicals_"):
493 | raise RuntimeError(
494 | "Problem has not been generated. Please call generate_problem before"
495 | " resetting."
496 | )
497 |
498 | problem = cp.Problem(
499 | cp.Minimize(self.canonicals_.objective),
500 | self.canonicals_.constraints + self.canonicals_.user_constraints,
501 | )
502 | self.canonicals_ = CVXCanonicals(
503 | problem=problem,
504 | objective=self.canonicals_.objective,
505 | beta=self.canonicals_.beta,
506 | parameters=self.canonicals_.parameters,
507 | auxiliaries=self.canonicals_.auxiliaries,
508 | constraints=self.canonicals_.constraints,
509 | user_constraints=self.canonicals_.user_constraints,
510 | )
511 |
512 | def _solve(
513 | self, X: NDArray, y: NDArray, solver_options: dict, *args, **kwargs
514 | ) -> NDArray[np.floating]:
515 | """Solve the cvxpy problem."""
516 | self.canonicals_.problem.solve(
517 | solver=self.solver, warm_start=self.warm_start, **solver_options
518 | )
519 | return self.canonicals_.beta.value
520 |
521 |
522 | class TikhonovMixin:
523 | """Mixin class to add a Tihhonov/ridge regularization term.
524 |
525 | When using this Mixin, a cvxpy parameter named "eta" should be saved in the
526 | parameters SimpleNamespace an attribute tikhonov_w can be added to allow a matrix
527 | otherwise simple l2/Ridge is used.
528 | """
529 |
530 | def _generate_objective(
531 | self,
532 | X: NDArray,
533 | y: NDArray,
534 | beta: cp.Variable,
535 | parameters: SimpleNamespace | None = None,
536 | auxiliaries: SimpleNamespace | None = None,
537 | ) -> cp.Expression:
538 | """Add a Tikhnonov regularization term to the objective function."""
539 | if hasattr(self, "tikhonov_w") and self.tikhonov_w is not None:
540 | tikhonov_w = self.tikhonov_w
541 | else:
542 | tikhonov_w = np.eye(X.shape[1])
543 | assert parameters is not None and hasattr(parameters, "eta")
544 | c0 = 2 * X.shape[0] # keeps hyperparameter scale independent
545 | objective = super()._generate_objective(X, y, beta, parameters, auxiliaries) # type: ignore
546 | objective += c0 * parameters.eta * cp.sum_squares(tikhonov_w @ beta)
547 |
548 | return objective
549 |
--------------------------------------------------------------------------------
/src/sparselm/model/_miqp/__init__.py:
--------------------------------------------------------------------------------
1 | """MIQP based regression Regressors."""
2 |
3 | from ._best_subset import BestSubsetSelection, RidgedBestSubsetSelection
4 | from ._regularized_l0 import L1L0, L2L0, RegularizedL0
5 |
6 | __all__ = [
7 | "BestSubsetSelection",
8 | "RidgedBestSubsetSelection",
9 | "RegularizedL0",
10 | "L1L0",
11 | "L2L0",
12 | ]
13 |
--------------------------------------------------------------------------------
/src/sparselm/model/_miqp/_base.py:
--------------------------------------------------------------------------------
1 | """Base class for mixed-integer quadratic programming l0 pseudo norm based Regressors."""
2 |
3 | from __future__ import annotations
4 |
5 | __author__ = "Luis Barroso-Luque"
6 |
7 | from abc import ABCMeta, abstractmethod
8 | from numbers import Real
9 | from types import SimpleNamespace
10 | from typing import Any
11 |
12 | import cvxpy as cp
13 | import numpy as np
14 | from cvxpy.atoms.affine.wraps import psd_wrap
15 | from numpy.typing import NDArray
16 | from sklearn.utils._param_validation import Interval
17 |
18 | from ..._utils.validation import _check_groups
19 | from .._base import CVXRegressor
20 |
21 |
22 | class MIQPl0(CVXRegressor, metaclass=ABCMeta):
23 | r"""Base class for mixed-integer quadratic programming (MIQP) Regressors.
24 |
25 | Generalized l0 formulation that allows grouping coefficients, based on:
26 |
27 | https://doi.org/10.1287/opre.2015.1436
28 |
29 | Args:
30 | groups (list or ndarray):
31 | array-like of integers specifying groups. Length should be the
32 | same as model, where each integer entry specifies the group
33 | each parameter corresponds to. If no grouping is required, simply
34 | pass a list of all different numbers, i.e. using range.
35 | big_M (float):
36 | Upper bound on the norm of coefficients associated with each
37 | groups of coefficients :math:`||\beta_c||_2`.
38 | hierarchy (list):
39 | A list of lists of integers storing hierarchy relations between
40 | coefficients.
41 | Each sublist contains indices of other coefficients
42 | on which the coefficient associated with each element of
43 | the list depends. i.e. hierarchy = [[1, 2], [0], []] mean that
44 | coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no
45 | dependence.
46 | ignore_psd_check (bool):
47 | Whether to ignore cvxpy's PSD checks of matrix used in quadratic
48 | form. Default is True to avoid raising errors for poorly
49 | conditioned matrices. But if you want to be strict set to False.
50 | fit_intercept (bool):
51 | Whether the intercept should be estimated or not.
52 | If False, the data is assumed to be already centered.
53 | copy_X (bool):
54 | If True, X will be copied; else, it may be overwritten.
55 | warm_start (bool):
56 | When set to True, reuse the solution of the previous call to
57 | fit as initialization, otherwise, just erase the previous
58 | solution.
59 | solver (str):
60 | cvxpy backend solver to use. Supported solvers are listed here:
61 | https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options
62 | solver_options (dict):
63 | dictionary of keyword arguments passed to cvxpy solve.
64 | See docs in CVXRegressor for more information.
65 | """
66 |
67 | _parameter_constraints: dict[str, list[Any]] = {
68 | "ignore_psd_check": ["boolean"],
69 | **CVXRegressor._parameter_constraints,
70 | }
71 |
72 | _cvx_parameter_constraints: dict[str, list[Any]] = {
73 | "big_M": [Interval(type=Real, left=0.0, right=None, closed="left")]
74 | }
75 |
76 | @abstractmethod # force inspect.isabstract to return True
77 | def __init__(
78 | self,
79 | groups: NDArray[np.floating | np.integer] | None = None,
80 | big_M: int = 100,
81 | hierarchy: list[list[int]] | None = None,
82 | ignore_psd_check: bool = True,
83 | fit_intercept: bool = False,
84 | copy_X: bool = True,
85 | warm_start: bool = False,
86 | solver: str | None = None,
87 | solver_options: dict | None = None,
88 | ):
89 | super().__init__(
90 | fit_intercept=fit_intercept,
91 | copy_X=copy_X,
92 | warm_start=warm_start,
93 | solver=solver,
94 | solver_options=solver_options,
95 | )
96 |
97 | self.hierarchy = hierarchy
98 | self.ignore_psd_check = ignore_psd_check
99 | self.groups = groups
100 | self.big_M = big_M
101 |
102 | def _validate_params(self, X: NDArray, y: NDArray) -> None:
103 | """Validate parameters."""
104 | super()._validate_params(X, y)
105 | _check_groups(self.groups, X.shape[1])
106 |
107 | def _generate_auxiliaries(
108 | self, X: NDArray, y: NDArray, beta: cp.Variable, parameters: SimpleNamespace
109 | ) -> SimpleNamespace | None:
110 | """Generate the boolean slack variable."""
111 | n_groups = X.shape[1] if self.groups is None else len(np.unique(self.groups))
112 | return SimpleNamespace(z0=cp.Variable(n_groups, boolean=True))
113 |
114 | def _generate_objective(
115 | self,
116 | X: NDArray,
117 | y: NDArray,
118 | beta: cp.Variable,
119 | parameters: SimpleNamespace | None = None,
120 | auxiliaries: SimpleNamespace | None = None,
121 | ) -> cp.Expression:
122 | """Generate the quadratic form portion of objective."""
123 | # psd_wrap will ignore cvxpy PSD checks, without it errors will
124 | # likely be raised since correlation matrices are usually very
125 | # poorly conditioned
126 | XTX = psd_wrap(X.T @ X) if self.ignore_psd_check else X.T @ X
127 | objective = cp.quad_form(beta, XTX) - 2 * y.T @ X @ beta
128 | # objective = cp.sum_squares(X @ self.beta_ - y)
129 | return objective
130 |
131 | def _generate_constraints(
132 | self,
133 | X: NDArray,
134 | y: NDArray,
135 | beta: cp.Variable,
136 | parameters: SimpleNamespace | None = None,
137 | auxiliaries: SimpleNamespace | None = None,
138 | ) -> list[cp.Constraint]:
139 | """Generate the constraints used to solve l0 regularization."""
140 | assert auxiliaries is not None and parameters is not None
141 | groups = np.arange(X.shape[1]) if self.groups is None else self.groups
142 | group_masks = [groups == i for i in np.sort(np.unique(groups))]
143 | constraints = []
144 | for i, mask in enumerate(group_masks):
145 | constraints += [
146 | -parameters.big_M * auxiliaries.z0[i] <= beta[mask],
147 | beta[mask] <= parameters.big_M * auxiliaries.z0[i],
148 | ]
149 |
150 | if self.hierarchy is not None:
151 | constraints += self._generate_hierarchy_constraints(groups, auxiliaries.z0)
152 |
153 | return constraints
154 |
155 | def _generate_hierarchy_constraints(
156 | self, groups: NDArray, z0: cp.Variable
157 | ) -> list[cp.Constraint]:
158 | """Generate single feature hierarchy constraints."""
159 | assert self.hierarchy is not None
160 | group_ids = np.sort(np.unique(groups))
161 | z0_index = {gid: i for i, gid in enumerate(group_ids)}
162 | constraints = [
163 | z0[z0_index[high_id]] <= z0[z0_index[sub_id]]
164 | for high_id, sub_ids in zip(group_ids, self.hierarchy)
165 | for sub_id in sub_ids
166 | ]
167 | return constraints
168 |
--------------------------------------------------------------------------------
/src/sparselm/model/_miqp/_best_subset.py:
--------------------------------------------------------------------------------
1 | """MIQP based solvers for Best Subset Selection solutions.
2 |
3 | Allows hierarchy constraints similar to mixed L0 solvers.
4 | """
5 |
6 | from __future__ import annotations
7 |
8 | __author__ = "Luis Barroso-Luque"
9 |
10 | from numbers import Real
11 | from types import SimpleNamespace
12 | from typing import Any
13 |
14 | import cvxpy as cp
15 | import numpy as np
16 | from numpy.typing import NDArray
17 | from sklearn.utils._param_validation import Interval
18 |
19 | from sparselm.model._base import TikhonovMixin
20 |
21 | from ._base import MIQPl0
22 |
23 |
24 | class BestSubsetSelection(MIQPl0):
25 | r"""MIQP Best Subset Selection Regressor.
26 |
27 | Generalized best subset that allows grouping subsets.
28 |
29 | Args:
30 | groups (NDArray):
31 | array-like of integers specifying groups. Length should be the
32 | same as model, where each integer entry specifies the group
33 | each parameter corresponds to. If no grouping is required,
34 | simply pass a list of all different numbers, i.e. using range.
35 | sparse_bound (int):
36 | Upper bound on sparsity. The upper bound on total number of
37 | nonzero coefficients.
38 | big_M (float):
39 | Upper bound on the norm of coefficients associated with each
40 | groups of coefficients :math:`||\beta_c||_2`.
41 | hierarchy (list):
42 | A list of lists of integers storing hierarchy relations between
43 | coefficients.
44 | Each sublist contains indices of other coefficients
45 | on which the coefficient associated with each element of
46 | the list depends. i.e. hierarchy = [[1, 2], [0], []] mean that
47 | coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no
48 | dependence.
49 | ignore_psd_check (bool):
50 | Whether to ignore cvxpy's PSD checks of matrix used in
51 | quadratic form. Default is True to avoid raising errors for
52 | poorly conditioned matrices. But if you want to be strict set
53 | to False.
54 | fit_intercept (bool):
55 | Whether the intercept should be estimated or not.
56 | If False, the data is assumed to be already centered.
57 | copy_X (bool):
58 | If True, X will be copied; else, it may be overwritten.
59 | warm_start (bool):
60 | When set to True, reuse the solution of the previous call to
61 | fit as initialization, otherwise, just erase the previous
62 | solution.
63 | solver (str):
64 | cvxpy backend solver to use. Supported solvers are listed here:
65 | https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options
66 | solver_options (dict):
67 | dictionary of keyword arguments passed to cvxpy solve.
68 | See docs in CVXRegressor for more information.
69 |
70 | Attributes:
71 | coef_ (NDArray):
72 | Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,).
73 | intercept_ (float):
74 | Independent term in decision function.
75 | canonicals_ (SimpleNamespace):
76 | Namespace that contains underlying cvxpy objects used to define
77 | the optimization problem. The objects included are the following:
78 | - objective - the objective function.
79 | - beta - variable to be optimized (corresponds to the estimated coef_ attribute).
80 | - parameters - hyper-parameters
81 | - auxiliaries - auxiliary variables and expressions
82 | - constraints - solution constraints
83 |
84 | Note:
85 | Installation of Gurobi is not a must, but highly recommended. An open source alternative
86 | is SCIP. ECOS_BB also works but can be very slow, and has recurring correctness issues.
87 | See the Mixed-integer programs section of the cvxpy docs:
88 | https://www.cvxpy.org/tutorial/advanced/index.html
89 |
90 | Warning:
91 | Even with gurobi solver, this can take a very long time to converge for large problems and under-determined
92 | problems.
93 | """
94 |
95 | _cvx_parameter_constraints: dict[str, list[Any]] = {
96 | "sparse_bound": [Interval(type=Real, left=0, right=None, closed="left")],
97 | **MIQPl0._cvx_parameter_constraints,
98 | }
99 |
100 | def __init__(
101 | self,
102 | groups: NDArray[np.floating | np.integer] | None = None,
103 | sparse_bound=100,
104 | big_M: int = 100,
105 | hierarchy: list[list[int]] | None = None,
106 | ignore_psd_check: bool = True,
107 | fit_intercept: bool = False,
108 | copy_X: bool = True,
109 | warm_start: bool = False,
110 | solver: str | None = None,
111 | solver_options: dict | None = None,
112 | ):
113 | super().__init__(
114 | groups=groups,
115 | big_M=big_M,
116 | hierarchy=hierarchy,
117 | ignore_psd_check=ignore_psd_check,
118 | fit_intercept=fit_intercept,
119 | copy_X=copy_X,
120 | warm_start=warm_start,
121 | solver=solver,
122 | solver_options=solver_options,
123 | )
124 | self.sparse_bound = sparse_bound
125 |
126 | def _generate_constraints(
127 | self,
128 | X: NDArray,
129 | y: NDArray,
130 | beta: cp.Variable,
131 | parameters: SimpleNamespace | None = None,
132 | auxiliaries: SimpleNamespace | None = None,
133 | ) -> list[cp.Constraint]:
134 | """Generate the constraints for best subset selection."""
135 | assert parameters is not None
136 | assert auxiliaries is not None
137 | constraints = super()._generate_constraints(X, y, beta, parameters, auxiliaries)
138 | constraints += [cp.sum(auxiliaries.z0) <= parameters.sparse_bound]
139 | return constraints
140 |
141 |
142 | class RidgedBestSubsetSelection(TikhonovMixin, BestSubsetSelection):
143 | r"""MIQP best subset selection Regressor with Ridge/Tihkonov regularization.
144 |
145 | Args:
146 | groups (NDArray):
147 | array-like of integers specifying groups. Length should be the
148 | same as model, where each integer entry specifies the group
149 | each parameter corresponds to. If no grouping is required,
150 | simply pass a list of all different numbers, i.e. using range.
151 | sparse_bound (int):
152 | Upper bound on sparsity. The upper bound on total number of
153 | nonzero coefficients.
154 | eta (float):
155 | L2 regularization hyper-parameter.
156 | big_M (float):
157 | Upper bound on the norm of coefficients associated with each
158 | groups of coefficients :math:`||\beta_c||_2`.
159 | hierarchy (list):
160 | A list of lists of integers storing hierarchy relations between
161 | coefficients.
162 | Each sublist contains indices of other coefficients
163 | on which the coefficient associated with each element of
164 | the list depends. i.e. hierarchy = [[1, 2], [0], []] mean that
165 | coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no
166 | dependence.
167 | tikhonov_w (np.array):
168 | Matrix to add weights to L2 regularization.
169 | ignore_psd_check (bool):
170 | Whether to ignore cvxpy's PSD checks of matrix used in
171 | quadratic form. Default is True to avoid raising errors for
172 | poorly conditioned matrices. But if you want to be strict set
173 | to False.
174 | fit_intercept (bool):
175 | Whether the intercept should be estimated or not.
176 | If False, the data is assumed to be already centered.
177 | copy_X (bool):
178 | If True, X will be copied; else, it may be overwritten.
179 | warm_start (bool):
180 | When set to True, reuse the solution of the previous call to
181 | fit as initialization, otherwise, just erase the previous
182 | solution.
183 | solver (str):
184 | cvxpy backend solver to use. Supported solvers are listed here:
185 | https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options
186 | solver_options (dict):
187 | dictionary of keyword arguments passed to cvxpy solve.
188 | See docs in CVXRegressor for more information.
189 |
190 | Attributes:
191 | coef_ (NDArray):
192 | Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,).
193 | intercept_ (float):
194 | Independent term in decision function.
195 | canonicals_ (SimpleNamespace):
196 | Namespace that contains underlying cvxpy objects used to define
197 | the optimization problem. The objects included are the following:
198 | - objective - the objective function.
199 | - beta - variable to be optimized (corresponds to the estimated coef_ attribute).
200 | - parameters - hyper-parameters
201 | - auxiliaries - auxiliary variables and expressions
202 | - constraints - solution constraints
203 |
204 | Note:
205 | Installation of Gurobi is not a must, but highly recommended. An open source alternative
206 | is SCIP. ECOS_BB also works but can be very slow, and has recurring correctness issues.
207 | See the Mixed-integer programs section of the cvxpy docs:
208 | https://www.cvxpy.org/tutorial/advanced/index.html
209 |
210 | Warning:
211 | Even with gurobi solver, this can take a very long time to converge for large problems and under-determined
212 | problems.
213 | """
214 |
215 | _cvx_parameter_constraints: dict[str, list[Any]] = {
216 | "eta": [Interval(type=Real, left=0.0, right=None, closed="left")],
217 | **BestSubsetSelection._cvx_parameter_constraints,
218 | }
219 |
220 | def __init__(
221 | self,
222 | groups: NDArray[np.floating | np.integer] | None = None,
223 | sparse_bound: int = 100,
224 | eta: float = 1.0,
225 | big_M: int = 100,
226 | hierarchy: list[list[int]] | None = None,
227 | tikhonov_w: NDArray[np.floating] | None = None,
228 | ignore_psd_check: bool = True,
229 | fit_intercept: bool = False,
230 | copy_X: bool = True,
231 | warm_start: bool = False,
232 | solver: str | None = None,
233 | solver_options: dict | None = None,
234 | ):
235 | super().__init__(
236 | groups=groups,
237 | sparse_bound=sparse_bound,
238 | big_M=big_M,
239 | hierarchy=hierarchy,
240 | ignore_psd_check=ignore_psd_check,
241 | fit_intercept=fit_intercept,
242 | copy_X=copy_X,
243 | warm_start=warm_start,
244 | solver=solver,
245 | solver_options=solver_options,
246 | )
247 | self.tikhonov_w = tikhonov_w
248 | self.eta = eta
249 |
--------------------------------------------------------------------------------
/src/sparselm/model/_miqp/_regularized_l0.py:
--------------------------------------------------------------------------------
1 | """MIQP based solvers for sparse solutions with hierarchical constraints.
2 |
3 | Generalized regularized l0 solvers that allow grouping parameters as detailed in:
4 |
5 | https://doi.org/10.1287/opre.2015.1436
6 |
7 | L1L0 proposed by Wenxuan Huang:
8 |
9 | https://arxiv.org/abs/1807.10753
10 |
11 | L2L0 proposed by Peichen Zhong:
12 |
13 | https://journals.aps.org/prb/abstract/10.1103/PhysRevB.106.024203
14 |
15 | Regressors allow optional inclusion of hierarchical constraints at the single coefficient
16 | or group of coefficients level.
17 | """
18 |
19 | from __future__ import annotations
20 |
21 | __author__ = "Luis Barroso-Luque, Fengyu Xie"
22 |
23 |
24 | from abc import ABCMeta, abstractmethod
25 | from numbers import Real
26 | from types import SimpleNamespace
27 | from typing import Any
28 |
29 | import cvxpy as cp
30 | import numpy as np
31 | from numpy.typing import NDArray
32 | from sklearn.utils._param_validation import Interval
33 |
34 | from sparselm.model._base import TikhonovMixin
35 |
36 | from ._base import MIQPl0
37 |
38 |
39 | class RegularizedL0(MIQPl0):
40 | r"""Implementation of mixed-integer quadratic programming l0 regularized Regressor.
41 |
42 | Supports grouping parameters and group-level hierarchy, but requires groups as a
43 | compulsory argument.
44 |
45 | Regularized regression objective:
46 |
47 | .. math::
48 |
49 | \min_{\beta} || X \beta - y ||^2_2 + \alpha \sum_{G} z_G
50 |
51 | Where G represents groups of features/coefficients and :math:`z_G` is are boolean
52 | valued slack variables.
53 |
54 | Args:
55 | groups (NDArray):
56 | 1D array-like of integers specifying groups. Length should be the
57 | same as model, where each integer entry specifies the group
58 | each parameter corresponds to. If no grouping is needed pass a list
59 | of all distinct numbers (ie range(len(coefs)) to create singleton groups
60 | for each parameter.
61 | alpha (float):
62 | L0 pseudo-norm regularization hyper-parameter.
63 | big_M (float):
64 | Upper bound on the norm of coefficients associated with each
65 | groups of coefficients :math:`||\beta_c||_2`.
66 | hierarchy (list):
67 | A list of lists of integers storing hierarchy relations between
68 | groups.
69 | Each sublist contains indices of other groups
70 | on which the group associated with each element of
71 | the list depends. i.e. hierarchy = [[1, 2], [0], []] mean that
72 | group 0 depends on 1, and 2; 1 depends on 0, and 2 has no
73 | dependence.
74 | ignore_psd_check (bool):
75 | Whether to ignore cvxpy's PSD checks of matrix used in quadratic
76 | form. Default is True to avoid raising errors for poorly
77 | conditioned matrices. But if you want to be strict set to False.
78 | fit_intercept (bool):
79 | Whether the intercept should be estimated or not.
80 | If False, the data is assumed to be already centered.
81 | copy_X (bool):
82 | If True, X will be copied; else, it may be overwritten.
83 | warm_start (bool):
84 | When set to True, reuse the solution of the previous call to
85 | fit as initialization, otherwise, just erase the previous
86 | solution.
87 | solver (str):
88 | cvxpy backend solver to use. Supported solvers are listed here:
89 | https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options
90 | solver_options (dict):
91 | dictionary of keyword arguments passed to cvxpy solve.
92 | See docs in CVXRegressor for more information.
93 |
94 | Attributes:
95 | coef_ (NDArray):
96 | Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,).
97 | intercept_ (float):
98 | Independent term in decision function.
99 | canonicals_ (SimpleNamespace):
100 | Namespace that contains underlying cvxpy objects used to define
101 | the optimization problem. The objects included are the following:
102 | - objective - the objective function.
103 | - beta - variable to be optimized (corresponds to the estimated coef_ attribute).
104 | - parameters - hyper-parameters
105 | - auxiliaries - auxiliary variables and expressions
106 | - constraints - solution constraints
107 |
108 | Note:
109 | Installation of Gurobi is not a must, but highly recommended. An open source alternative
110 | is SCIP. ECOS_BB also works but can be very slow, and has recurring correctness issues.
111 | See the Mixed-integer programs section of the cvxpy docs:
112 | https://www.cvxpy.org/tutorial/advanced/index.html
113 | """
114 |
115 | _cvx_parameter_constraints: dict[str, list[Any]] = {
116 | "alpha": [Interval(type=Real, left=0.0, right=None, closed="left")],
117 | **MIQPl0._cvx_parameter_constraints,
118 | }
119 |
120 | def __init__(
121 | self,
122 | groups: NDArray[np.floating | np.integer] | None = None,
123 | alpha: float = 1.0,
124 | big_M: int = 100,
125 | hierarchy: list[list[int]] | None = None,
126 | ignore_psd_check: bool = True,
127 | fit_intercept: bool = False,
128 | copy_X: bool = True,
129 | warm_start: bool = False,
130 | solver: str | None = None,
131 | solver_options: dict | None = None,
132 | ):
133 | super().__init__(
134 | groups=groups,
135 | big_M=big_M,
136 | hierarchy=hierarchy,
137 | ignore_psd_check=ignore_psd_check,
138 | fit_intercept=fit_intercept,
139 | copy_X=copy_X,
140 | warm_start=warm_start,
141 | solver=solver,
142 | solver_options=solver_options,
143 | )
144 | self.alpha = alpha
145 |
146 | def _generate_objective(
147 | self,
148 | X: NDArray,
149 | y: NDArray,
150 | beta: cp.Variable,
151 | parameters: SimpleNamespace | None = None,
152 | auxiliaries: SimpleNamespace | None = None,
153 | ) -> cp.Expression:
154 | """Generate the quadratic form and l0 regularization portion of objective."""
155 | assert parameters is not None
156 | assert auxiliaries is not None
157 | c0 = 2 * X.shape[0] # keeps hyperparameter scale independent
158 | objective = super()._generate_objective(
159 | X, y, beta, parameters, auxiliaries
160 | ) + c0 * parameters.alpha * cp.sum(auxiliaries.z0)
161 | return objective
162 |
163 |
164 | class MixedL0(RegularizedL0, metaclass=ABCMeta):
165 | """Abstract base class for mixed L0 regularization models: L1L0 and L2L0."""
166 |
167 | _cvx_parameter_constraints: dict[str, list[Any]] = {
168 | "eta": [Interval(type=Real, left=0.0, right=None, closed="left")],
169 | **RegularizedL0._cvx_parameter_constraints,
170 | }
171 |
172 | def __init__(
173 | self,
174 | groups: NDArray[np.floating | np.integer] | None = None,
175 | alpha: float = 1.0,
176 | eta: float = 1.0,
177 | big_M: int = 100,
178 | hierarchy: list[list[int]] | None = None,
179 | ignore_psd_check: bool = True,
180 | fit_intercept: bool = False,
181 | copy_X: bool = True,
182 | warm_start: bool = False,
183 | solver: str | None = None,
184 | solver_options: dict | None = None,
185 | ):
186 | """Initialize Regressor.
187 |
188 | Args:
189 | groups (NDArray):
190 | 1D array-like of integers specifying groups. Length should be the
191 | same as model, where each integer entry specifies the group
192 | each parameter corresponds to. If no grouping is needed pass a list
193 | of all distinct numbers (ie range(len(coefs)) to create singleton groups
194 | for each parameter.
195 | alpha (float):
196 | L0 pseudo-norm regularization hyper-parameter.
197 | eta (float):
198 | standard norm regularization hyper-parameter (usually l1 or l2).
199 | big_M (float):
200 | Upper bound on the norm of coefficients associated with each
201 |
202 | hierarchy (list):
203 | A list of lists of integers storing hierarchy relations between
204 | coefficients.
205 | Each sublist contains indices of other coefficients
206 | on which the coefficient associated with each element of
207 | the list depends. i.e. hierarchy = [[1, 2], [0], []] mean that
208 | coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no
209 | dependence.
210 | ignore_psd_check (bool):
211 | Whether to ignore cvxpy's PSD checks of matrix used in quadratic
212 | form. Default is True to avoid raising errors for poorly
213 | conditioned matrices. But if you want to be strict set to False.
214 | fit_intercept (bool):
215 | Whether the intercept should be estimated or not.
216 | If False, the data is assumed to be already centered.
217 | copy_X (bool):
218 | If True, X will be copied; else, it may be overwritten.
219 | warm_start (bool):
220 | When set to True, reuse the solution of the previous call to
221 | fit as initialization, otherwise, just erase the previous
222 | solution.
223 | solver (str):
224 | cvxpy backend solver to use. Supported solvers are listed here:
225 | https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options
226 | solver_options (dict):
227 | dictionary of keyword arguments passed to cvxpy solve.
228 | See docs in CVXRegressor for more information.
229 | """
230 | super().__init__(
231 | groups=groups,
232 | alpha=alpha,
233 | big_M=big_M,
234 | hierarchy=hierarchy,
235 | ignore_psd_check=ignore_psd_check,
236 | fit_intercept=fit_intercept,
237 | copy_X=copy_X,
238 | warm_start=warm_start,
239 | solver=solver,
240 | solver_options=solver_options,
241 | )
242 | self.eta = eta
243 |
244 | @abstractmethod
245 | def _generate_objective(
246 | self,
247 | X: NDArray,
248 | y: NDArray,
249 | beta: cp.Variable,
250 | parameters: SimpleNamespace | None = None,
251 | auxiliaries: SimpleNamespace | None = None,
252 | ) -> cp.Expression:
253 | """Generate optimization objective."""
254 | # implement in derived classes using super to call MIQP_L0 objective
255 | return super()._generate_objective(X, y, beta, parameters, auxiliaries)
256 |
257 |
258 | class L1L0(MixedL0):
259 | r"""L1L0 regularized Regressor.
260 |
261 | Regressor with L1L0 regularization solved with mixed integer programming
262 | as discussed in:
263 |
264 | https://arxiv.org/abs/1807.10753
265 |
266 | Extended to allow grouping of coefficients and group-level hierarchy as described
267 | in:
268 |
269 | https://doi.org/10.1287/opre.2015.1436
270 |
271 | Regularized regression objective:
272 |
273 | .. math::
274 |
275 | \min_{\beta} || X \beta - y ||^2_2 + \alpha \sum_{G} z_G + \eta ||\beta||_1
276 |
277 | Where G represents groups of features/coefficients and :math:`z_G` is are boolean
278 | valued slack variables.
279 |
280 | Args:
281 | groups (NDArray):
282 | 1D array-like of integers specifying groups. Length should be the
283 | same as model, where each integer entry specifies the group
284 | each parameter corresponds to. If no grouping is needed pass a list
285 | of all distinct numbers (ie range(len(coefs)) to create singleton groups
286 | for each parameter.
287 | alpha (float):
288 | L0 pseudo-norm regularization hyper-parameter.
289 | eta (float):
290 | L1 regularization hyper-parameter.
291 | big_M (float):
292 | Upper bound on the norm of coefficients associated with each
293 | groups of coefficients :math:`||\beta_c||_2`.
294 | hierarchy (list):
295 | A list of lists of integers storing hierarchy relations between
296 | coefficients.
297 | Each sublist contains indices of other coefficients
298 | on which the coefficient associated with each element of
299 | the list depends. i.e. hierarchy = [[1, 2], [0], []] mean that
300 | coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no
301 | dependence.
302 | ignore_psd_check (bool):
303 | Whether to ignore cvxpy's PSD checks of matrix used in quadratic
304 | form. Default is True to avoid raising errors for poorly
305 | conditioned matrices. But if you want to be strict set to False.
306 | fit_intercept (bool):
307 | Whether the intercept should be estimated or not.
308 | If False, the data is assumed to be already centered.
309 | copy_X (bool):
310 | If True, X will be copied; else, it may be overwritten.
311 | warm_start (bool):
312 | When set to True, reuse the solution of the previous call to
313 | fit as initialization, otherwise, just erase the previous
314 | solution.
315 | solver (str):
316 | cvxpy backend solver to use. Supported solvers are listed here:
317 | https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options
318 | solver_options (dict):
319 | dictionary of keyword arguments passed to cvxpy solve.
320 | See docs in CVXRegressor for more information.
321 |
322 | Attributes:
323 | coef_ (NDArray):
324 | Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,).
325 | intercept_ (float):
326 | Independent term in decision function.
327 | canonicals_ (SimpleNamespace):
328 | Namespace that contains underlying cvxpy objects used to define
329 | the optimization problem. The objects included are the following:
330 | - objective - the objective function.
331 | - beta - variable to be optimized (corresponds to the estimated coef_ attribute).
332 | - parameters - hyper-parameters
333 | - auxiliaries - auxiliary variables and expressions
334 | - constraints - solution constraints
335 |
336 | Note:
337 | Installation of Gurobi is not a must, but highly recommended. An open source alternative
338 | is SCIP. ECOS_BB also works but can be very slow, and has recurring correctness issues.
339 | See the Mixed-integer programs section of the cvxpy docs:
340 | https://www.cvxpy.org/tutorial/advanced/index.html
341 | """
342 |
343 | def __init__(
344 | self,
345 | groups: NDArray[np.floating | np.integer] | None = None,
346 | alpha: float = 1.0,
347 | eta: float = 1.0,
348 | big_M: int = 100,
349 | hierarchy: list[list[int]] | None = None,
350 | ignore_psd_check: bool = True,
351 | fit_intercept: bool = False,
352 | copy_X: bool = True,
353 | warm_start: bool = False,
354 | solver: str | None = None,
355 | solver_options: dict | None = None,
356 | ):
357 | super().__init__(
358 | groups=groups,
359 | eta=eta,
360 | alpha=alpha,
361 | big_M=big_M,
362 | hierarchy=hierarchy,
363 | ignore_psd_check=ignore_psd_check,
364 | fit_intercept=fit_intercept,
365 | copy_X=copy_X,
366 | warm_start=warm_start,
367 | solver=solver,
368 | solver_options=solver_options,
369 | )
370 |
371 | def _generate_auxiliaries(
372 | self, X: NDArray, y: NDArray, beta: cp.Variable, parameters: SimpleNamespace
373 | ) -> SimpleNamespace | None:
374 | """Generate the boolean slack variable."""
375 | auxiliaries = super()._generate_auxiliaries(X, y, beta, parameters)
376 | X.shape[1] if self.groups is None else len(np.unique(self.groups))
377 | auxiliaries.z1 = cp.Variable(X.shape[1]) # type: ignore
378 | return auxiliaries
379 |
380 | def _generate_constraints(
381 | self,
382 | X: NDArray,
383 | y: NDArray,
384 | beta: cp.Variable,
385 | parameters: SimpleNamespace | None = None,
386 | auxiliaries: SimpleNamespace | None = None,
387 | ) -> list[cp.Constraint]:
388 | """Generate the constraints used to solve l1l0 regularization."""
389 | assert auxiliaries is not None
390 | constraints = super()._generate_constraints(X, y, beta, parameters, auxiliaries)
391 | # L1 constraints (why not do an l1 norm in the objective instead?)
392 | constraints += [-auxiliaries.z1 <= beta, beta <= auxiliaries.z1]
393 | return constraints
394 |
395 | def _generate_objective(
396 | self,
397 | X: NDArray,
398 | y: NDArray,
399 | beta: cp.Variable,
400 | parameters: SimpleNamespace | None = None,
401 | auxiliaries: SimpleNamespace | None = None,
402 | ) -> cp.Expression:
403 | """Generate the objective function used in l1l0 regression model."""
404 | assert parameters is not None
405 | assert auxiliaries is not None
406 | c0 = 2 * X.shape[0] # keeps hyperparameter scale independent
407 | objective = super()._generate_objective(X, y, beta, parameters, auxiliaries)
408 | # L1 term
409 | objective += c0 * parameters.eta * cp.sum(auxiliaries.z1)
410 | return objective
411 |
412 |
413 | class L2L0(TikhonovMixin, MixedL0):
414 | r"""L2L0 regularized Regressor.
415 |
416 | Based on Regressor with L2L0 regularization solved with mixed integer programming
417 | proposed in:
418 |
419 | https://arxiv.org/abs/2204.13789
420 |
421 | Extended to allow grouping of coefficients and group-level hierarchy as described
422 | in:
423 |
424 | https://doi.org/10.1287/opre.2015.1436
425 |
426 | And allows using a Tihkonov matrix in the l2 term.
427 |
428 | Regularized regression objective:
429 |
430 | .. math::
431 |
432 | \min_{\beta} || X \beta - y ||^2_2 + \alpha \sum_{G} z_G + \eta ||W\beta||^2_2
433 |
434 | Where G represents groups of features/coefficients and :math:`z_G` is are boolean
435 | valued slack variables. W is a Tikhonov matrix.
436 |
437 | Args:
438 | groups (NDArray):
439 | 1D array-like of integers specifying groups. Length should be the
440 | same as model, where each integer entry specifies the group
441 | each parameter corresponds to. If no grouping is needed pass a list
442 | of all distinct numbers (ie range(len(coefs)) to create singleton groups
443 | for each parameter.
444 | alpha (float):
445 | L0 pseudo-norm regularization hyper-parameter.
446 | eta (float):
447 | L2 regularization hyper-parameter.
448 | big_M (float):
449 | Upper bound on the norm of coefficients associated with each
450 | groups of coefficients :math:`||\beta_c||_2`.
451 | hierarchy (list):
452 | A list of lists of integers storing hierarchy relations between
453 | coefficients.
454 | Each sublist contains indices of other coefficients
455 | on which the coefficient associated with each element of
456 | the list depends. i.e. hierarchy = [[1, 2], [0], []] mean that
457 | coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no
458 | dependence.
459 | tikhonov_w (np.array):
460 | Matrix to add weights to L2 regularization.
461 | ignore_psd_check (bool):
462 | Wether to ignore cvxpy's PSD checks of matrix used in quadratic
463 | form. Default is True to avoid raising errors for poorly
464 | conditioned matrices. But if you want to be strict set to False.
465 | fit_intercept (bool):
466 | Whether the intercept should be estimated or not.
467 | If False, the data is assumed to be already centered.
468 | copy_X (bool):
469 | If True, X will be copied; else, it may be overwritten.
470 | warm_start (bool):
471 | When set to True, reuse the solution of the previous call to
472 | fit as initialization, otherwise, just erase the previous
473 | solution.
474 | solver (str):
475 | cvxpy backend solver to use. Supported solvers are listed here:
476 | https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options
477 | solver_options (dict):
478 | dictionary of keyword arguments passed to cvxpy solve.
479 | See docs in CVXEstimator for more information.
480 |
481 | Attributes:
482 | coef_ (NDArray):
483 | Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,).
484 | intercept_ (float):
485 | Independent term in decision function.
486 | canonicals_ (SimpleNamespace):
487 | Namespace that contains underlying cvxpy objects used to define
488 | the optimization problem. The objects included are the following:
489 | - objective - the objective function.
490 | - beta - variable to be optimized (corresponds to the estimated coef_ attribute).
491 | - parameters - hyper-parameters
492 | - auxiliaries - auxiliary variables and expressions
493 | - constraints - solution constraints
494 |
495 | Note:
496 | Installation of Gurobi is not a must, but highly recommended. An open source alternative
497 | is SCIP. ECOS_BB also works but can be very slow, and has recurring correctness issues.
498 | See the Mixed-integer programs section of the cvxpy docs:
499 | https://www.cvxpy.org/tutorial/advanced/index.html
500 | """
501 |
502 | def __init__(
503 | self,
504 | groups: NDArray[np.floating | np.integer] | None = None,
505 | alpha: float = 1.0,
506 | eta: float = 1.0,
507 | big_M: int = 100,
508 | hierarchy: list[list[int]] | None = None,
509 | tikhonov_w: NDArray[np.floating] | None = None,
510 | ignore_psd_check: bool = True,
511 | fit_intercept: bool = False,
512 | copy_X: bool = True,
513 | warm_start: bool = False,
514 | solver: str | None = None,
515 | solver_options: dict | None = None,
516 | ):
517 | super().__init__(
518 | groups=groups,
519 | alpha=alpha,
520 | eta=eta,
521 | big_M=big_M,
522 | hierarchy=hierarchy,
523 | ignore_psd_check=ignore_psd_check,
524 | fit_intercept=fit_intercept,
525 | copy_X=copy_X,
526 | warm_start=warm_start,
527 | solver=solver,
528 | solver_options=solver_options,
529 | )
530 | self.tikhonov_w = tikhonov_w
531 |
--------------------------------------------------------------------------------
/src/sparselm/model/_ols.py:
--------------------------------------------------------------------------------
1 | """Ordinary least squares cvxpy solver."""
2 |
3 | from __future__ import annotations
4 |
5 | __author__ = "Fengyu Xie, Luis Barroso-Luque"
6 |
7 |
8 | from types import SimpleNamespace
9 |
10 | import cvxpy as cp
11 | from numpy.typing import NDArray
12 |
13 | from ._base import CVXRegressor
14 |
15 |
16 | class OrdinaryLeastSquares(CVXRegressor):
17 | r"""Ordinary Least Squares Linear Regression.
18 |
19 | Regression objective:
20 |
21 | .. math::
22 |
23 | \min_{\beta} || X \beta - y ||^2_2
24 |
25 | Args:
26 | fit_intercept (bool):
27 | Whether the intercept should be estimated or not.
28 | If False, the data is assumed to be already centered.
29 | copy_X (bool):
30 | If True, X will be copied; else, it may be overwritten.
31 | warm_start (bool):
32 | When set to True, reuse the solution of the previous call to
33 | fit as initialization, otherwise, just erase the previous
34 | solution.
35 | solver (str):
36 | cvxpy backend solver to use. Supported solvers are listed here:
37 | https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options
38 | solver_options (dict):
39 | dictionary of keyword arguments passed to cvxpy solve.
40 | See docs linked above for more information.
41 |
42 | Attributes:
43 | coef_ (NDArray):
44 | Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,).
45 | intercept_ (float):
46 | Independent term in decision function.
47 | canonicals_ (SimpleNamespace):
48 | Namespace that contains underlying cvxpy objects used to define
49 | the optimization problem. The objects included are the following:
50 | - objective - the objective function.
51 | - beta - variable to be optimized (corresponds to the estimated coef_ attribute).
52 | - parameters - hyper-parameters
53 | - auxiliaries - auxiliary variables and expressions
54 | - constraints - solution constraints
55 | """
56 |
57 | def _generate_objective(
58 | self,
59 | X: NDArray,
60 | y: NDArray,
61 | beta: cp.Variable,
62 | parameters: SimpleNamespace | None = None,
63 | auxiliaries: SimpleNamespace | None = None,
64 | ) -> cp.Expression:
65 | return 1 / (2 * X.shape[0]) * cp.sum_squares(X @ beta - y)
66 |
--------------------------------------------------------------------------------
/src/sparselm/stepwise.py:
--------------------------------------------------------------------------------
1 | """Stepwise model selection for piece-wise fitting."""
2 |
3 | from __future__ import annotations
4 |
5 | __author__ = "Fengyu Xie"
6 |
7 | from itertools import chain
8 |
9 | import numpy as np
10 | from numpy.typing import NDArray
11 | from sklearn.base import RegressorMixin
12 | from sklearn.linear_model._base import LinearModel, _check_sample_weight
13 | from sklearn.utils._param_validation import InvalidParameterError
14 | from sklearn.utils.metaestimators import _BaseComposition
15 | from sklearn.utils.validation import check_is_fitted
16 |
17 |
18 | # BaseComposition makes sure that StepwiseEstimator can be correctly cloned.
19 | def _indices_no_overlap_and_continuous(indices):
20 | scope = sorted(set(chain(*indices)))
21 | return sorted(chain(*indices)) == scope and scope == list(range(len(scope)))
22 |
23 |
24 | def _first_step_fit_intercept_only(steps):
25 | for sid, (_, estimator) in enumerate(steps):
26 | if hasattr(estimator, "estimator"):
27 | # Is a searcher such as GridSearchCV.
28 | fit_intercept = estimator.estimator.fit_intercept
29 | else:
30 | fit_intercept = estimator.fit_intercept
31 | if fit_intercept and sid > 0:
32 | return False
33 | return True
34 |
35 |
36 | def _no_nested_stepwise(steps):
37 | for _, estimator in steps:
38 | if isinstance(estimator, StepwiseEstimator):
39 | return False
40 | return True
41 |
42 |
43 | class StepwiseEstimator(_BaseComposition, RegressorMixin, LinearModel):
44 | """A composite estimator used to do stepwise fitting.
45 |
46 | The first estimator in the composite will be used to fit
47 | certain features (a piece of the feature matrix) to the
48 | target vector, and the residuals are fitted to the rest
49 | of features by using the next estimators in the composite.
50 |
51 | Each estimator can be either a CVXEstimator, a GridSearchCV or
52 | a LineSearchCV.
53 |
54 | Args:
55 | steps (list[(str, CVXEstimator)]):
56 | A list of step names and the CVXEstimators to use
57 | for each step. StepwiseEstimator cannot be used as
58 | a member of StepwiseEstimator.
59 | An estimator will fit the residuals of the previous
60 | estimator fits in the list.
61 | estimator_feature_indices (tuple[tuple[int]]):
62 | Scope of each estimator, which means the indices of
63 | features in the scope (features[:, scope]) will be
64 | fitted to the residual using the corresponding estimator.
65 | Notice:
66 | If estimators in the composite requires hierarchy
67 | or groups, the indices in the groups or hierarchy
68 | must be adjusted such that they correspond to the groups
69 | or hierarchy relations in the part of features sliced
70 | by scope.
71 | For example, consider original groups = [0, 1, 1, 2, 2],
72 | and an estimator has scope = [3, 4], then the estimator
73 | should be initialized with group = [0, 0].
74 | You are fully responsible to initialize the estimators
75 | with correct hierarchy, groups and other parameters before
76 | wrapping them up with the composite!
77 |
78 | Note:
79 | 1. Do not use GridSearchCV or LineSearchCV to search a StepwiseEstimator!
80 |
81 | 2. No nesting is allowed for StepwiseEstimator, which means no step of a
82 | StepwiseEstimator can be a StepwiseEstimator.
83 |
84 | 3. Since stepwise estimator requires specifying a list of feature indices for
85 | each step estimator, it requires fixing n_features_in_ before fitting, which
86 | violates sklearn convention for a regressor. Therefore, StepwiseEstimator is
87 | not checked by sklearn check_estimator method, and there is no guarantee that it
88 | is fully compatible with all scikit-learn features.
89 | """
90 |
91 | def __init__(
92 | self,
93 | steps,
94 | estimator_feature_indices,
95 | ):
96 | self.steps = steps
97 | # The estimator_feature_indices saved must be tuple because in
98 | # sklearn.base.clone, a cloned object is checked by pointer, rather than
99 | # by value.
100 | self.estimator_feature_indices = estimator_feature_indices
101 |
102 | # These parameters settings does not need to be called externally.
103 | def get_params(self, deep=True):
104 | """Get parameters of all estimators in the composite.
105 |
106 | Args:
107 | deep(bool):
108 | If True, will return the parameters for estimators in
109 | composite, and their contained sub-objects if they are
110 | also estimators.
111 | """
112 | return self._get_params("steps", deep=deep)
113 |
114 | def set_params(self, **params):
115 | """Set parameters for each estimator in the composite.
116 |
117 | This will be called when model selection optimizes
118 | all hyper parameters.
119 |
120 | Args:
121 | params: A Dictionary of parameters. Each parameter
122 | name must end with an underscore and a number to specify
123 | on which estimator in the composite the parameter is
124 | going to be set.
125 | Remember only to set params you wish to optimize!
126 | """
127 | self._set_params("steps", **params)
128 | return self
129 |
130 | @staticmethod
131 | def _get_estimator_coef(estimator):
132 | check_is_fitted(estimator)
133 | if hasattr(estimator, "best_estimator_"):
134 | return estimator.best_estimator_.coef_.copy()
135 | elif hasattr(estimator, "coef_"):
136 | return estimator.coef_.copy()
137 | else:
138 | raise ValueError(f"Estimator {estimator} is not a valid linear model!")
139 |
140 | @staticmethod
141 | def _get_estimator_intercept(estimator):
142 | check_is_fitted(estimator)
143 | if hasattr(estimator, "best_estimator_"):
144 | return estimator.best_estimator_.intercept_
145 | elif hasattr(estimator, "intercept_"):
146 | return estimator.intercept_
147 | else:
148 | raise ValueError(f"Estimator {estimator} is not a valid linear model!")
149 |
150 | def fit(
151 | self,
152 | X: NDArray,
153 | y: NDArray,
154 | sample_weight: NDArray[np.floating] | None = None,
155 | *args,
156 | **kwargs,
157 | ):
158 | """Prepare fit input with sklearn help then call fit method.
159 |
160 | Args:
161 | X (NDArray):
162 | Training data of shape (n_samples, n_features).
163 | y (NDArray):
164 | Target values. Will be cast to X's dtype if necessary
165 | of shape (n_samples,) or (n_samples, n_targets)
166 | sample_weight (NDArray):
167 | Individual weights for each sample of shape (n_samples,)
168 | default=None
169 | *args:
170 | Positional arguments passed to _fit method
171 | **kwargs:
172 | Keyword arguments passed to _fit method
173 | Returns:
174 | instance of self
175 | """
176 | # Check estimators and feature indices.
177 | if not _indices_no_overlap_and_continuous(self.estimator_feature_indices):
178 | raise InvalidParameterError(
179 | f"Given feature indices:"
180 | f" {self.estimator_feature_indices}"
181 | f" are not continuous and non-overlapping"
182 | f" series starting from 0!"
183 | )
184 | if not _first_step_fit_intercept_only(self.steps):
185 | raise InvalidParameterError(
186 | "Only the first estimator in steps is allowed" " to fit intercept!"
187 | )
188 | if not _no_nested_stepwise(self.steps):
189 | raise InvalidParameterError(
190 | "StepwiseEstimator should not be nested with"
191 | " another StepwiseEstimator!"
192 | )
193 |
194 | self.n_features_in_ = len(list(chain(*self.estimator_feature_indices)))
195 |
196 | # Set ensute_2d to True and reset to False so that it triggers number of
197 | # features checking.
198 | X, y = self._validate_data(
199 | X,
200 | y,
201 | accept_sparse=False,
202 | ensure_2d=True,
203 | y_numeric=True,
204 | multi_output=True,
205 | reset=False,
206 | )
207 |
208 | if sample_weight is not None:
209 | sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
210 |
211 | residuals = y.copy()
212 |
213 | self.coef_ = np.empty(X.shape[1])
214 | self.coef_.fill(np.nan)
215 | for (_, estimator), scope in zip(self.steps, self.estimator_feature_indices):
216 | # np.array indices should not be tuple.
217 | estimator.fit(
218 | X[:, list(scope)],
219 | residuals,
220 | *args,
221 | sample_weight=sample_weight,
222 | **kwargs,
223 | )
224 | self.coef_[list(scope)] = self._get_estimator_coef(estimator)
225 | residuals = residuals - estimator.predict(X[:, list(scope)])
226 | # Only the first estimator is allowed to fit intercept.
227 | if hasattr(self.steps[0][1], "estimator"):
228 | fit_intercept = self.steps[0][1].estimator.fit_intercept
229 | else:
230 | fit_intercept = self.steps[0][1].fit_intercept
231 | if fit_intercept:
232 | self.intercept_ = self._get_estimator_intercept(self.steps[0][1])
233 | else:
234 | self.intercept_ = 0.0
235 |
236 | # return self for chaining fit and predict calls
237 | return self
238 |
--------------------------------------------------------------------------------
/src/sparselm/tools.py:
--------------------------------------------------------------------------------
1 | """A variety of tools for fitting linear regression models to polish CE."""
2 |
3 | from __future__ import annotations
4 |
5 | __author__ = "Luis Barroso-Luque"
6 |
7 | import warnings
8 | from functools import wraps
9 |
10 | import numpy as np
11 | from numpy.typing import NDArray
12 |
13 |
14 | def constrain_coefficients(
15 | indices: NDArray,
16 | high: NDArray[np.floating] | float | None = None,
17 | low: NDArray[np.floating] | float | None = None,
18 | ):
19 | """Constrain a fit method to keep coefficients within a specified range.
20 |
21 | Use this as a standard decorator with parameters:
22 | - At runtime:
23 | coefs = constrain_coefficients(indices, high, low)(fit_method)(X, y)
24 | - In fit_method definitions:
25 | @constrain_coefficients(indices, high, low)
26 | def your_fit_method(X, y):
27 |
28 | Args:
29 | indices (array or list):
30 | indices of coefficients to constrain
31 | high (float or array):
32 | upper bound for indices,
33 | low (float or array):
34 | lower bounds for indices
35 | """
36 | indices = np.array(indices)
37 | if high is not None:
38 | high = (
39 | high * np.ones(len(indices))
40 | if isinstance(high, (int, float))
41 | else np.array(high)
42 | )
43 | else:
44 | high = np.inf * np.ones(len(indices))
45 | if low is not None:
46 | low = (
47 | low * np.ones(len(indices))
48 | if isinstance(low, (int, float))
49 | else np.array(low)
50 | )
51 | else:
52 | low = -np.inf * np.ones(len(indices))
53 |
54 | def decorate_fit_method(fit_method):
55 | """Decorate a fit method to constrain "dielectric constant".
56 |
57 | Args:
58 | fit_method (callable):
59 | the fit_method you will use to fit your regression model.
60 | Must take the feature matrix X and target vector y as first
61 | arguments. (i.e. fit_method(X, y, *args, **kwargs)
62 | """
63 |
64 | @wraps(fit_method)
65 | def wrapped(X, y, *args, **kwargs):
66 | coefs = fit_method(X, y, *args, **kwargs)
67 | above_range = coefs[indices] > high
68 | below_range = coefs[indices] < low
69 |
70 | # TODO do not set features to zero, do the fit without them instead
71 | if sum(above_range) > 0 or sum(below_range) > 0:
72 | X_, y_ = X.copy(), y.copy()
73 | y_ -= np.sum(X_[:, indices[above_range]] * high[above_range], axis=1)
74 | X_[:, indices[above_range]] = 0.0
75 | y_ -= np.sum(X_[:, indices[below_range]] * low[below_range], axis=1)
76 | X_[:, indices[below_range]] = 0.0
77 | coefs = fit_method(X_, y_, *args, **kwargs)
78 | coefs[indices[above_range]] = high[above_range]
79 | coefs[indices[below_range]] = low[below_range]
80 |
81 | # check if new coeficients are now out of range
82 | above_range = coefs[indices] > high
83 | below_range = coefs[indices] < low
84 | if sum(above_range) > 0 or sum(below_range) > 0:
85 | warnings.warn(
86 | "Running the constrained fit has resulted in new out of"
87 | " range coefficients that were not so in the unconstrained"
88 | " fit.\n"
89 | "Double check the sensibility of the bounds you provided!",
90 | RuntimeWarning,
91 | )
92 |
93 | return coefs
94 |
95 | return wrapped
96 |
97 | return decorate_fit_method
98 |
99 |
100 | def r2_score_to_cv_error(
101 | score: float,
102 | y: NDArray,
103 | y_pred: NDArray,
104 | weights: NDArray[np.floating] | None = None,
105 | ):
106 | """Convert r2 score to cross-validation error.
107 |
108 | Args:
109 | score (float):
110 | An r2 score obtained from cross validation.
111 | y (NDArray): 1D
112 | The target vector.
113 | y_pred (NDArray): 1D
114 | The fitted vector.
115 | weights (NDArray): 1D
116 | The weights of each sample. Default to 1.
117 |
118 | Returns:
119 | float:
120 | The CV error
121 | """
122 | if weights is None:
123 | weights = np.ones(len(y))
124 | weights = np.array(weights)
125 | if len(weights) != len(y):
126 | raise ValueError("Weights given but not the same length as sample.")
127 | if np.any(weights < 0) or np.allclose(weights, 0):
128 | raise ValueError("Weights can not be negative or all zero.")
129 |
130 | denominator = (weights * (y - y_pred) ** 2).sum() / weights.sum()
131 | return np.sqrt((1 - score) * denominator)
132 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pytest
3 | from sklearn.datasets import make_regression, make_sparse_coded_signal
4 |
5 | SEED = 0
6 |
7 | # A few solvers to test for convex problems
8 | # ECOS sometimes fails for Adaptive group estimators, but is fast
9 | # SCS and CXVOPT are reliable, but slower
10 | # GUROBI is best
11 | CONVEX_SOLVERS = ["GUROBI", "ECOS"] # SCS, GUROBI, CVXOPT
12 |
13 | # ECOS_BB is open source alternative, but much slower, and can get things wrong
14 | MIQP_SOLVERS = ["GUROBI"] # SCIP fails some tests...
15 |
16 | # Set to small values bc gurobi non-commercial can not solver large model.
17 | N_FEATURES = [20, 30] # an overdetermined and underdetermined case
18 | N_SAMPLES = 25
19 | N_INFORMATIVE = 10
20 |
21 |
22 | @pytest.fixture(scope="package")
23 | def rng():
24 | """Seed and return an RNG for test reproducibility"""
25 | return np.random.default_rng(SEED)
26 |
27 |
28 | @pytest.fixture(params=CONVEX_SOLVERS)
29 | def solver(request):
30 | return request.param
31 |
32 |
33 | @pytest.fixture(params=MIQP_SOLVERS)
34 | def miqp_solver(request):
35 | return request.param
36 |
37 |
38 | @pytest.fixture(scope="package", params=N_FEATURES)
39 | def random_model(rng, request):
40 | """Returns a fully random set of X, y, and beta representing a linear model."""
41 | X, y, beta = make_regression(
42 | n_samples=N_SAMPLES,
43 | n_features=request.param,
44 | n_informative=N_INFORMATIVE,
45 | coef=True,
46 | random_state=rng.integers(0, 2**32 - 1),
47 | bias=10 * rng.random(),
48 | )
49 | return X, y, beta
50 |
51 |
52 | @pytest.fixture(scope="package", params=N_FEATURES)
53 | def random_energy_model(rng, request):
54 | """Returns a random set of X, y, and beta with added gaussian noise for a linear
55 | model with sparse coefficients beta decay (on average) exponentially with the index
56 | of the coefficient.
57 | """
58 | X = rng.random((N_SAMPLES, request.param))
59 | beta = np.zeros(request.param) # coefficients
60 | non_zero_ids = rng.choice(request.param, size=N_INFORMATIVE, replace=False)
61 | non_zero_ids = np.array(np.round(non_zero_ids), dtype=int)
62 |
63 | for idx in non_zero_ids:
64 | eci = 0
65 | mag = np.exp(-0.5 * idx)
66 | while np.isclose(eci, 0):
67 | eci = (rng.random() - 0.5) * 2 * mag
68 | beta[idx] = eci
69 | y = X @ beta + rng.normal(size=N_SAMPLES) * 2e-3 # fake energies
70 | return X, y, beta
71 |
72 |
73 | @pytest.fixture(scope="package")
74 | def sparse_coded_signal(rng):
75 | n_components, n_features, n_nonzero = 24, 12, 6
76 | y, X, beta = make_sparse_coded_signal(
77 | n_samples=1,
78 | n_components=n_components,
79 | n_features=n_features,
80 | n_nonzero_coefs=n_nonzero,
81 | random_state=rng.integers(0, 2**32 - 1),
82 | )
83 | return X, y, beta
84 |
85 |
86 | @pytest.fixture(params=[4, 6], scope="package")
87 | def random_model_with_groups(random_model, rng, request):
88 | """Add a correct set of groups to model."""
89 | X, y, beta = random_model
90 | n_groups = request.param
91 | n_active_groups = n_groups // 3 + 1
92 |
93 | n_features_per_group = len(beta) // n_groups
94 | active_group_inds = rng.choice(range(n_groups), size=n_active_groups, replace=False)
95 | inactive_group_inds = np.setdiff1d(range(n_groups), active_group_inds)
96 |
97 | groups = np.zeros(len(beta), dtype=int)
98 | active_feature_inds = np.where(abs(beta) > 0)[0]
99 | inactive_feature_inds = np.setdiff1d(np.arange(len(beta)), active_feature_inds)
100 |
101 | # set active groups
102 | for i in active_group_inds:
103 | if len(active_feature_inds) > n_features_per_group:
104 | group_inds = rng.choice(
105 | active_feature_inds, size=n_features_per_group, replace=False
106 | )
107 | else:
108 | group_inds = active_feature_inds
109 | groups[group_inds] = i
110 | active_feature_inds = np.setdiff1d(active_feature_inds, group_inds)
111 |
112 | # set inactive_groups
113 | for i in inactive_group_inds:
114 | if len(inactive_feature_inds) > n_features_per_group:
115 | group_inds = rng.choice(
116 | inactive_feature_inds, size=n_features_per_group, replace=False
117 | )
118 | else:
119 | group_inds = inactive_feature_inds
120 | groups[group_inds] = i
121 | inactive_feature_inds = np.setdiff1d(inactive_feature_inds, group_inds)
122 |
123 | return X, y, beta, groups
124 |
--------------------------------------------------------------------------------
/tests/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | minversion = 5.3.0
3 |
--------------------------------------------------------------------------------
/tests/test_common.py:
--------------------------------------------------------------------------------
1 | """General tests for all linear models.
2 |
3 | Simply check that they execute successfully on random data.
4 | """
5 |
6 | from inspect import getmembers, isclass, signature
7 |
8 | import cvxpy as cp
9 | import numpy as np
10 | import pytest
11 | from cvxpy.error import SolverError
12 | from sklearn.utils.estimator_checks import check_estimator
13 | from sklearn.utils.fixes import threadpool_info
14 |
15 | import sparselm.model as spm
16 | from sparselm.model._miqp._base import MIQPl0
17 |
18 | ESTIMATORS = getmembers(spm, isclass)
19 | ESTIMATOR_NAMES = [est[0] for est in ESTIMATORS]
20 | ESTIMATORS = [est[1] for est in ESTIMATORS] # type: ignore
21 |
22 |
23 | @pytest.fixture(params=ESTIMATORS, ids=ESTIMATOR_NAMES)
24 | def estimator(request):
25 | estimator_cls = request.param
26 | if issubclass(estimator_cls, MIQPl0):
27 | regressor = estimator_cls(fit_intercept=True, solver="SCIP")
28 | if hasattr(regressor, "eta"):
29 | regressor.eta = 0.01
30 | return regressor
31 | return estimator_cls(fit_intercept=True, solver="ECOS")
32 |
33 |
34 | @pytest.mark.parametrize("estimator_cls", ESTIMATORS)
35 | def test_general_fit(estimator_cls, random_model, rng):
36 | X, y, beta = random_model
37 |
38 | # instantiate the estimator
39 | sig = signature(estimator_cls)
40 |
41 | # check for necessary parameters
42 | args = {}
43 | if "groups" in sig.parameters:
44 | args["groups"] = rng.integers(0, 5, size=len(beta))
45 | if "group_list" in sig.parameters:
46 | args["group_list"] = [
47 | np.sort(rng.choice(range(5), replace=False, size=rng.integers(1, 5)))
48 | for _ in range(len(beta))
49 | ]
50 | if "sparse_bound" in sig.parameters:
51 | args["sparse_bound"] = 12
52 |
53 | estimator = estimator_cls(**args)
54 | estimator.fit(X, y)
55 | # assert a value of coefficients has been set correctly
56 | assert isinstance(estimator.coef_, np.ndarray)
57 | assert len(estimator.coef_) == len(beta)
58 | assert len(estimator.predict(X)) == len(y)
59 | assert estimator.intercept_ == 0.0
60 |
61 | estimator = estimator_cls(fit_intercept=True, **args)
62 | estimator.fit(X, y)
63 | # assert a value of coefficients has been set correctly
64 | assert isinstance(estimator.coef_, np.ndarray)
65 | assert len(estimator.coef_) == len(beta)
66 | assert len(estimator.predict(X)) == len(y)
67 | assert estimator.intercept_ != 0.0
68 |
69 |
70 | @pytest.mark.xfail(raises=SolverError)
71 | def test_add_constraints(estimator, random_model, rng):
72 | with pytest.raises(RuntimeError):
73 | estimator.add_constraints([cp.Variable(1) >= 0])
74 |
75 | X, y, beta = random_model
76 | estimator.generate_problem(X, y)
77 | n_constraints = len(estimator.canonicals_.constraints)
78 | # a dummy constraint
79 | estimator.add_constraints([estimator.canonicals_.beta >= 0.0])
80 | assert len(estimator.canonicals_.problem.constraints) == n_constraints + 1
81 | assert len(estimator.canonicals_.user_constraints) == 1
82 | assert len(estimator.canonicals_.constraints) == n_constraints
83 |
84 | # force cache data
85 | # ( solving the model sometimes fails and we only want to check that a warning is
86 | # raised )
87 | estimator.cached_X_ = X
88 | estimator.cached_y_ = y
89 |
90 | new_X = rng.random(X.shape)
91 | with pytest.warns(UserWarning):
92 | estimator.fit(new_X, y)
93 |
94 |
95 | @pytest.mark.xfail(
96 | any(
97 | True
98 | for info in threadpool_info()
99 | if info["internal_api"] == "openblas"
100 | # Prudently assume Prescott might be the architecture if it is unknown.
101 | and info.get("architecture", "prescott").lower() == "prescott"
102 | ),
103 | reason="On Github runner above is true and sklearn will throw an error by trying to create_mmemap_backed_arrays "
104 | "with an estimator.",
105 | )
106 | def test_sklearn_compatible(estimator):
107 | """Test sklearn compatibility with no parameter inputs."""
108 | check_estimator(estimator)
109 |
--------------------------------------------------------------------------------
/tests/test_dataset.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import numpy.testing as npt
3 | import pytest
4 |
5 | from sparselm.dataset import make_group_regression
6 |
7 |
8 | @pytest.mark.parametrize("n_informative_groups", [5, 20])
9 | @pytest.mark.parametrize("n_features_per_group", [5, 4 * list(range(2, 7))])
10 | @pytest.mark.parametrize("frac_informative_in_group", [1.0, 0.5])
11 | @pytest.mark.parametrize("shuffle", [True, False])
12 | @pytest.mark.parametrize("coef", [True, False])
13 | def test_make_group_regression(
14 | n_informative_groups, n_features_per_group, frac_informative_in_group, shuffle, coef
15 | ):
16 | model = make_group_regression(
17 | n_informative_groups=n_informative_groups,
18 | n_features_per_group=n_features_per_group,
19 | frac_informative_in_group=frac_informative_in_group,
20 | shuffle=shuffle,
21 | coef=coef,
22 | )
23 |
24 | assert len(model) == 4 if coef else 3
25 |
26 | if coef:
27 | X, y, groups, coefs = model
28 | else:
29 | X, y, groups = model
30 |
31 | if not isinstance(n_features_per_group, list):
32 | n_features_per_group = [n_features_per_group] * 20
33 |
34 | n_features = (
35 | sum(n_features_per_group)
36 | if isinstance(n_features_per_group, list)
37 | else 20 * n_features_per_group
38 | )
39 |
40 | assert X.shape == (100, n_features)
41 | assert y.shape == (100,)
42 | assert groups.shape == (n_features,)
43 | assert len(np.unique(groups)) == 20
44 |
45 | if coef:
46 | n_informative = sum(
47 | round(frac_informative_in_group * n_features_per_group[i])
48 | for i in range(n_informative_groups)
49 | )
50 |
51 | assert coefs.shape == (n_features,)
52 | assert sum(coef > 0 for coef in coefs) == n_informative
53 | npt.assert_array_almost_equal(np.dot(X, coefs), y)
54 |
55 | if shuffle:
56 | # check that not all groups are lumped together
57 | assert sum(np.diff(groups) == 0) < 20 - 1
58 |
59 | # check warning
60 | with pytest.warns(UserWarning):
61 | make_group_regression(frac_informative_in_group=1 / 100)
62 |
--------------------------------------------------------------------------------
/tests/test_lasso.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import numpy.testing as npt
3 | import pytest
4 | from cvxpy.error import SolverError
5 |
6 | from sparselm.model import (
7 | AdaptiveGroupLasso,
8 | AdaptiveLasso,
9 | AdaptiveOverlapGroupLasso,
10 | AdaptiveRidgedGroupLasso,
11 | AdaptiveSparseGroupLasso,
12 | GroupLasso,
13 | Lasso,
14 | OverlapGroupLasso,
15 | SparseGroupLasso,
16 | )
17 |
18 | ADAPTIVE_ESTIMATORS = [
19 | AdaptiveLasso,
20 | AdaptiveGroupLasso,
21 | AdaptiveSparseGroupLasso,
22 | AdaptiveOverlapGroupLasso,
23 | AdaptiveRidgedGroupLasso,
24 | ]
25 |
26 | THRESHOLD = 1e-8 # relative threshold
27 |
28 |
29 | def test_lasso_toy():
30 | # Borrowed from sklearn tests
31 | # Test Lasso on a toy example for various values of alpha.
32 | # When validating this against glmnet notice that glmnet divides it
33 | # against nobs.
34 |
35 | X = [[-1], [0], [1]]
36 | Y = [-1, 0, 1] # just a straight line
37 | T = [[2], [3], [4]] # test sample
38 |
39 | lasso = Lasso(alpha=1e-8)
40 | lasso.fit(X, Y)
41 | pred = lasso.predict(T)
42 | npt.assert_array_almost_equal(lasso.coef_, [1])
43 | npt.assert_array_almost_equal(pred, [2, 3, 4])
44 |
45 | lasso = Lasso(alpha=0.1)
46 | lasso.fit(X, Y)
47 | pred = lasso.predict(T)
48 | npt.assert_array_almost_equal(lasso.coef_, [0.85])
49 | npt.assert_array_almost_equal(pred, [1.7, 2.55, 3.4])
50 |
51 | lasso = Lasso(alpha=0.5)
52 | lasso.fit(X, Y)
53 | pred = lasso.predict(T)
54 | npt.assert_array_almost_equal(lasso.coef_, [0.25])
55 | npt.assert_array_almost_equal(pred, [0.5, 0.75, 1.0])
56 |
57 | lasso = Lasso(alpha=1.0)
58 | lasso.fit(X, Y)
59 | pred = lasso.predict(T)
60 | npt.assert_array_almost_equal(lasso.coef_, [0.0])
61 | npt.assert_array_almost_equal(pred, [0, 0, 0])
62 |
63 |
64 | def test_lasso_non_float_y():
65 | # Borrowed from sklearn tests
66 | X = [[0, 0], [1, 1], [-1, -1]]
67 | y = [0, 1, 2]
68 | y_float = [0.0, 1.0, 2.0]
69 |
70 | lasso = Lasso(fit_intercept=False)
71 | lasso.fit(X, y)
72 | lasso_float = Lasso(fit_intercept=False)
73 | lasso_float.fit(X, y_float)
74 | npt.assert_array_equal(lasso.coef_, lasso_float.coef_)
75 |
76 |
77 | def test_adaptive_lasso_sparser(random_model):
78 | X, y, _ = random_model
79 | lasso = Lasso(fit_intercept=True)
80 | alasso = AdaptiveLasso(fit_intercept=True)
81 |
82 | lasso.fit(X, y)
83 | alasso.fit(X, y)
84 |
85 | assert sum(abs(lasso.coef_) > THRESHOLD) >= sum(abs(alasso.coef_) > THRESHOLD)
86 |
87 |
88 | # TODO flakey test, depends on THRESHOLD value
89 | @pytest.mark.xfail(raises=SolverError)
90 | @pytest.mark.parametrize(
91 | "standardize",
92 | [True, False],
93 | ) # standardize=False leads to failures
94 | def test_group_lasso(random_model_with_groups, solver, standardize):
95 | X, y, _, groups = random_model_with_groups
96 |
97 | aglasso = AdaptiveGroupLasso(
98 | groups=groups,
99 | alpha=0.1,
100 | fit_intercept=True,
101 | standardize=standardize,
102 | solver=solver,
103 | )
104 | aglasso.fit(X, y)
105 |
106 | # check that if all coefs in groups are consistent
107 | for gid in np.unique(groups):
108 | m = np.max(abs(aglasso.coef_))
109 | all_active = (abs(aglasso.coef_[groups == gid]) > m * THRESHOLD).all()
110 | all_inactive = (abs(aglasso.coef_[groups == gid]) <= m * THRESHOLD).all()
111 | assert all_active or all_inactive
112 |
113 |
114 | @pytest.mark.xfail(raises=SolverError)
115 | @pytest.mark.parametrize(
116 | "standardize",
117 | [True, False],
118 | )
119 | def test_group_lasso_weights(random_model_with_groups, solver, standardize):
120 | X, y, _, groups = random_model_with_groups
121 |
122 | group_weights = np.ones(len(np.unique(groups)))
123 |
124 | aglasso = AdaptiveGroupLasso(
125 | groups=groups,
126 | alpha=0.1,
127 | group_weights=group_weights,
128 | fit_intercept=True,
129 | standardize=standardize,
130 | solver=solver,
131 | )
132 | aglasso.fit(X, y)
133 |
134 | rglasso = AdaptiveRidgedGroupLasso(
135 | groups=groups,
136 | alpha=0.1,
137 | group_weights=group_weights,
138 | fit_intercept=True,
139 | standardize=standardize,
140 | solver=solver,
141 | )
142 | rglasso.fit(X, y)
143 |
144 | # check that if all coefs in groups are consistent
145 | for gid in np.unique(groups):
146 | m = np.max(abs(aglasso.coef_))
147 |
148 | all_active = (abs(aglasso.coef_[groups == gid]) > m * THRESHOLD).all()
149 | all_inactive = (abs(aglasso.coef_[groups == gid]) <= m * THRESHOLD).all()
150 | assert all_active or all_inactive
151 |
152 | m = np.max(abs(rglasso.coef_))
153 | all_active = (abs(rglasso.coef_[groups == gid]) > m * THRESHOLD).all()
154 | all_inactive = (abs(rglasso.coef_[groups == gid]) <= m * THRESHOLD).all()
155 | assert all_active or all_inactive
156 |
157 |
158 | @pytest.mark.xfail(raises=SolverError)
159 | @pytest.mark.parametrize("estimator_cls", ADAPTIVE_ESTIMATORS)
160 | def test_adaptive_weights(estimator_cls, random_model_with_groups, solver, rng):
161 | X, y, beta, groups = random_model_with_groups
162 |
163 | if estimator_cls.__name__ == "AdaptiveLasso":
164 | estimator = estimator_cls(solver=solver)
165 | elif estimator_cls.__name__ == "AdaptiveOverlapGroupLasso":
166 | gids = np.unique(groups)
167 | group_list = [
168 | rng.choice(gids, replace=False, size=rng.integers(1, 3))
169 | for _ in range(len(beta))
170 | ]
171 | estimator = estimator_cls(group_list=group_list, solver=solver)
172 | else:
173 | estimator = estimator_cls(groups=groups, solver=solver)
174 |
175 | # force generating weights
176 | estimator.generate_problem(X, y)
177 |
178 | if estimator_cls.__name__ == "AdaptiveSparseGroupLasso":
179 | weights = [
180 | estimator.canonicals_.parameters.adaptive_coef_weights.value.copy(),
181 | estimator.canonicals_.parameters.adaptive_group_weights.value.copy(),
182 | ]
183 | else:
184 | weights = [estimator.canonicals_.parameters.adaptive_weights.value.copy()]
185 |
186 | estimator.fit(X, y)
187 |
188 | if estimator_cls.__name__ == "AdaptiveSparseGroupLasso":
189 | new_weights = [
190 | estimator.canonicals_.parameters.adaptive_coef_weights.value.copy(),
191 | estimator.canonicals_.parameters.adaptive_group_weights.value.copy(),
192 | ]
193 | else:
194 | new_weights = [estimator.canonicals_.parameters.adaptive_weights.value.copy()]
195 |
196 | # simply check that the weights are updated.
197 | # TODO a better check would be to check that weights for active groups/coefs
198 | # are smaller than those of inactive ones
199 | for nw, w in zip(new_weights, weights):
200 | assert not any(nw_i == pytest.approx(w_i) for nw_i, w_i in zip(nw, w))
201 |
202 |
203 | def test_bad_inputs(random_model_with_groups, rng):
204 | X, y, beta, groups = random_model_with_groups
205 | bad_groups = rng.integers(0, 6, size=len(beta) - 1)
206 | group_weights = np.ones(len(np.unique(bad_groups)))
207 |
208 | # test that warns when no groups given
209 | with pytest.warns(UserWarning):
210 | gl = GroupLasso()
211 | gl.fit(X, y)
212 |
213 | with pytest.warns(UserWarning):
214 | gl = OverlapGroupLasso()
215 | gl.fit(X, y)
216 |
217 | # bad groups
218 | with pytest.raises(ValueError):
219 | gl = GroupLasso(bad_groups, group_weights=group_weights)
220 | gl.fit(X, y)
221 |
222 | with pytest.raises(TypeError):
223 | gl = GroupLasso("groups", group_weights=group_weights)
224 | gl.fit(X, y)
225 |
226 | # bad group_weights
227 | with pytest.raises(ValueError):
228 | group_weights = np.ones(len(np.unique(bad_groups)) - 1)
229 | gl = GroupLasso(bad_groups, group_weights=group_weights)
230 | gl.fit(X, y)
231 |
232 | with pytest.raises(TypeError):
233 | gl = GroupLasso(groups, group_weights="weights")
234 | gl.fit(X, y)
235 |
236 | # bad l1_ratio
237 | lasso = SparseGroupLasso(groups)
238 | with pytest.raises(ValueError):
239 | lasso.l1_ratio = -1.0
240 | lasso.fit(X, y)
241 |
242 | with pytest.raises(ValueError):
243 | lasso.l1_ratio = 2.0
244 | lasso.fit(X, y)
245 |
246 | with pytest.raises(ValueError):
247 | sgl = SparseGroupLasso(groups, l1_ratio=-1.0)
248 | sgl.fit(X, y)
249 |
250 | with pytest.raises(ValueError):
251 | sgl = SparseGroupLasso(groups, l1_ratio=2.0)
252 | sgl.fit(X, y)
253 |
254 | # test that it warns
255 | with pytest.warns(UserWarning):
256 | sgl = SparseGroupLasso(groups, l1_ratio=0.0)
257 | sgl.fit(X, y)
258 | with pytest.warns(UserWarning):
259 | sgl = SparseGroupLasso(groups, l1_ratio=1.0)
260 | sgl.fit(X, y)
261 |
262 |
263 | @pytest.mark.parametrize("estimator_cls", ADAPTIVE_ESTIMATORS)
264 | def test_set_parameters(estimator_cls, random_model_with_groups, rng):
265 | X, y, beta, groups = random_model_with_groups
266 |
267 | if estimator_cls.__name__ == "AdaptiveLasso":
268 | estimator = estimator_cls()
269 | elif estimator_cls.__name__ == "AdaptiveOverlapGroupLasso":
270 | gids = np.unique(groups)
271 | group_list = [
272 | rng.choice(gids, replace=False, size=rng.integers(1, 3))
273 | for _ in range(len(beta))
274 | ]
275 | estimator = estimator_cls(group_list=group_list)
276 | else:
277 | estimator = estimator_cls(groups=groups)
278 |
279 | estimator.alpha = 0.5
280 | assert estimator.alpha == 0.5
281 | estimator.generate_problem(X, y)
282 | assert estimator.canonicals_.parameters.alpha.value == 0.5
283 |
284 | if hasattr(estimator, "l1_ratio"):
285 | # default l1_ratio is 0.5
286 | assert estimator.canonicals_.parameters.lambda1.value == 0.5 * 0.5
287 | assert estimator.canonicals_.parameters.lambda2.value == 0.5 * 0.5
288 |
289 | estimator.l1_ratio = 0.25
290 | estimator._set_param_values()
291 | assert estimator.l1_ratio == 0.25
292 | assert estimator.canonicals_.parameters.lambda1.value == 0.25 * 0.5
293 | assert estimator.canonicals_.parameters.lambda2.value == 0.75 * 0.5
294 |
295 | if hasattr(estimator, "delta"):
296 | estimator.delta = (4.0,)
297 | estimator._set_param_values()
298 | npt.assert_array_equal(
299 | estimator.canonicals_.parameters.delta.value,
300 | 4.0 * np.ones(len(np.unique(groups))),
301 | )
302 |
303 | estimator.delta = 3.0 * np.ones(len(np.unique(groups)))
304 | estimator._set_param_values()
305 | npt.assert_array_equal(estimator.delta, 3.0 * np.ones(len(np.unique(groups))))
306 | npt.assert_array_equal(
307 | estimator.canonicals_.parameters.delta.value,
308 | 3.0 * np.ones(len(np.unique(groups))),
309 | )
310 |
--------------------------------------------------------------------------------
/tests/test_miqp.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import numpy.testing as npt
3 | import pytest
4 |
5 | from sparselm.model import (
6 | L2L0,
7 | BestSubsetSelection,
8 | RegularizedL0,
9 | RidgedBestSubsetSelection,
10 | )
11 |
12 | # exclude L1L0 since it breaks hierarchy constraints...
13 | MIQP_estimators = [
14 | BestSubsetSelection,
15 | RidgedBestSubsetSelection,
16 | RegularizedL0,
17 | L2L0,
18 | ]
19 |
20 | THRESHOLD = 1e-12
21 |
22 |
23 | def assert_hierarchy_respected(coef, slack_z, hierarchy, groups=None):
24 | groups = groups if groups is not None else np.arange(len(coef))
25 | group_ids = np.unique(groups)
26 | for grp_id, active, parents in zip(group_ids, slack_z, hierarchy):
27 | if active == 1: # all parents must also be active
28 | assert all(
29 | (abs(coef[groups == parent]) >= THRESHOLD).all() for parent in parents
30 | )
31 |
32 |
33 | def test_perfect_signal_recovery(sparse_coded_signal):
34 | X, y, beta = sparse_coded_signal
35 | X = X.T
36 |
37 | (idx,) = beta.nonzero()
38 |
39 | estimator = BestSubsetSelection(sparse_bound=np.count_nonzero(beta))
40 | estimator.fit(X, y)
41 |
42 | npt.assert_array_equal(idx, np.flatnonzero(estimator.coef_))
43 | npt.assert_array_almost_equal(beta, estimator.coef_)
44 |
45 | r_estimator = RidgedBestSubsetSelection(sparse_bound=np.count_nonzero(beta))
46 |
47 | # very low regularization should be the same
48 | r_estimator.eta = 1e-16
49 | r_estimator.fit(X, y)
50 | npt.assert_array_almost_equal(beta, r_estimator.coef_)
51 | npt.assert_array_equal(idx, np.flatnonzero(r_estimator.coef_))
52 | assert all(i in np.flatnonzero(r_estimator.coef_) for i in idx)
53 |
54 | # a bit higher regularization, check shrinkage
55 | coef = r_estimator.coef_.copy()
56 | r_estimator.eta = 1e-4
57 | r_estimator.fit(X, y)
58 | npt.assert_array_almost_equal(beta, r_estimator.coef_, decimal=1)
59 | assert np.linalg.norm(coef) > np.linalg.norm(r_estimator.coef_)
60 |
61 | # very sensitive to the value of alpha for exact results
62 | estimator = RegularizedL0(alpha=0.0008)
63 | estimator.fit(X, y)
64 |
65 | npt.assert_array_equal(idx, np.flatnonzero(estimator.coef_))
66 | npt.assert_array_almost_equal(beta, estimator.coef_, decimal=2)
67 |
68 |
69 | @pytest.mark.parametrize("estimator_cls", MIQP_estimators)
70 | def test_slack_variables(estimator_cls, random_model_with_groups, miqp_solver, rng):
71 | X, y, beta, groups = random_model_with_groups
72 |
73 | # ignore groups
74 | if "Subset" in estimator_cls.__name__:
75 | estimator = estimator_cls(sparse_bound=len(beta) // 2, solver=miqp_solver)
76 | else:
77 | estimator = estimator_cls(alpha=3.0, solver=miqp_solver)
78 |
79 | estimator.fit(X, y)
80 | for coef, active in zip(
81 | estimator.coef_, estimator.canonicals_.auxiliaries.z0.value
82 | ):
83 | if active == 1:
84 | assert abs(coef) >= THRESHOLD
85 | else:
86 | assert abs(coef) < THRESHOLD
87 |
88 | # now group hierarchy
89 | group_ids = np.sort(np.unique(groups))
90 | if "Subset" in estimator_cls.__name__:
91 | estimator = estimator_cls(
92 | groups, sparse_bound=len(group_ids) // 2, solver=miqp_solver
93 | )
94 | else:
95 | estimator = estimator_cls(groups, alpha=2.0, solver=miqp_solver)
96 |
97 | estimator.fit(X, y)
98 | for gid, active in zip(group_ids, estimator.canonicals_.auxiliaries.z0.value):
99 | if active:
100 | assert all(abs(estimator.coef_[groups == gid]) >= THRESHOLD)
101 | else:
102 | assert all(abs(estimator.coef_[groups == gid]) < THRESHOLD)
103 |
104 |
105 | @pytest.mark.parametrize("estimator_cls", MIQP_estimators)
106 | def test_singleton_hierarchy(estimator_cls, random_model, miqp_solver, rng):
107 | X, y, beta = random_model
108 | (idx,) = beta.nonzero()
109 |
110 | # ignore groups, single covariate hierarchy
111 | if "Subset" in estimator_cls.__name__:
112 | estimator = estimator_cls(sparse_bound=len(beta) // 2, solver=miqp_solver)
113 | else:
114 | estimator = estimator_cls(alpha=2.0, solver=miqp_solver)
115 |
116 | fully_chained = [[len(beta) - 1]] + [[i] for i in range(0, len(beta) - 1)]
117 | estimator.hierarchy = fully_chained
118 | estimator.fit(X, y)
119 |
120 | # bound is set lower than number of coefs so all must be zero in BestSubset
121 | if any(estimator.coef_ == 0):
122 | assert all(estimator.coef_ == 0)
123 | else:
124 | assert all(estimator.coef_ != 0)
125 | assert_hierarchy_respected(
126 | estimator.coef_, estimator.canonicals_.auxiliaries.z0.value, fully_chained
127 | )
128 |
129 | hierarchy = []
130 | for i in range(len(beta)):
131 | # everything depends on 1st nonzero coef
132 | if i != idx[0]:
133 | hierarchy.append([idx[0]])
134 | else:
135 | hierarchy.append([])
136 | # first half of remaining depends on 2nd nonzero
137 | if 0 < i < len(beta) // 2 and i != idx[1]:
138 | hierarchy[i].append(idx[1])
139 | # second half of remaining on 3rd nonzero
140 | if len(beta) // 2 <= i and i != idx[2]:
141 | hierarchy[i].append(idx[2])
142 |
143 | estimator.hierarchy = hierarchy
144 | # TODO make hierarchy and other non cp.Parameter params reset problem if reset
145 | estimator.problem = None
146 | estimator.fit(X, y)
147 | assert_hierarchy_respected(
148 | estimator.coef_, estimator.canonicals_.auxiliaries.z0.value, hierarchy
149 | )
150 |
151 |
152 | @pytest.mark.parametrize("estimator_cls", MIQP_estimators)
153 | def test_group_hierarchy(estimator_cls, random_model_with_groups, miqp_solver, rng):
154 | X, y, beta, groups = random_model_with_groups
155 | (idx,) = beta.nonzero()
156 |
157 | # now group hierarchy
158 | group_ids = np.unique(groups)
159 | if "Subset" in estimator_cls.__name__:
160 | estimator = estimator_cls(
161 | groups, sparse_bound=len(group_ids) // 2, solver=miqp_solver
162 | )
163 | else:
164 | estimator = estimator_cls(groups, alpha=3.0, solver=miqp_solver)
165 |
166 | fully_chained = [[group_ids[-1]]] + [
167 | [group_ids[i]] for i in range(0, len(group_ids) - 1)
168 | ]
169 | estimator.hierarchy = fully_chained
170 | estimator.fit(X, y)
171 |
172 | # bound is set lower than number of coefs so all must be zero in BestSubset
173 | if any(estimator.coef_ == 0):
174 | assert all(estimator.coef_ == 0)
175 | else:
176 | assert all(estimator.coef_ != 0)
177 |
178 | assert_hierarchy_respected(
179 | estimator.coef_,
180 | estimator.canonicals_.auxiliaries.z0.value,
181 | fully_chained,
182 | groups=groups,
183 | )
184 |
185 | # pick two groups with nozero coefs
186 | grp1 = groups[idx[0]]
187 | while (grp2 := groups[rng.choice(idx)]) == grp1:
188 | pass
189 |
190 | hierarchy = []
191 | for i in range(len(group_ids)):
192 | # everything depends on 1st nonzero coef
193 | if i != grp1:
194 | hierarchy.append([grp1])
195 | else:
196 | hierarchy.append([])
197 | # first half of remaining depends on 2nd nonzero
198 | if 0 < i < len(group_ids) // 2 and i not in [grp1, grp2]:
199 | hierarchy[i].append(grp2)
200 |
201 | estimator.problem = None # TODO also remove this...
202 | estimator.hierarchy = hierarchy
203 | estimator.fit(X, y)
204 |
205 | assert_hierarchy_respected(
206 | estimator.coef_,
207 | estimator.canonicals_.auxiliaries.z0.value,
208 | hierarchy,
209 | groups=groups,
210 | )
211 |
212 |
213 | def test_set_parameters(random_model):
214 | X, y, beta = random_model
215 | estimator = RidgedBestSubsetSelection(sparse_bound=1, eta=1.0)
216 | estimator.sparse_bound = 2
217 | estimator.fit(X, y)
218 | assert estimator.canonicals_.parameters.sparse_bound.value == 2
219 | assert estimator.canonicals_.parameters.eta.value == 1.0
220 |
221 | estimator.eta = 0.5
222 | estimator.fit(X, y)
223 | assert estimator.canonicals_.parameters.eta.value == 0.5
224 |
225 |
226 | def test_bad_input(random_model):
227 | X, y, beta = random_model
228 |
229 | # bad sparse_bound
230 | estimator = BestSubsetSelection(sparse_bound=-1)
231 | with pytest.raises(ValueError):
232 | estimator.fit(X, y)
233 |
234 | # bad eta
235 | estimator = RidgedBestSubsetSelection(eta=-1.0)
236 | with pytest.raises(ValueError):
237 | estimator.fit(X, y)
238 |
--------------------------------------------------------------------------------
/tests/test_model_selection.py:
--------------------------------------------------------------------------------
1 | import cvxpy as cp
2 | import numpy as np
3 | import pytest
4 | from sklearn.datasets import make_regression
5 | from sklearn.linear_model import Lasso
6 | from sklearn.model_selection import KFold, train_test_split
7 |
8 | from sparselm.model import L1L0, L2L0
9 | from sparselm.model_selection import GridSearchCV, LineSearchCV
10 |
11 | ALL_CRITERION = ["max_score", "one_std_score"]
12 | # Currently we will only test on mixedL0
13 | ALL_ESTIMATORS = [L2L0, L1L0]
14 | ONLY_L2L0 = [L2L0]
15 |
16 |
17 | @pytest.fixture(scope="module")
18 | def param_grid():
19 | # Test on multiple grids
20 | return [
21 | {"alpha": [0.01, 0.1], "eta": [0.03, 0.3]},
22 | {"alpha": [0.02, 0.2], "eta": [0.04, 0.4]},
23 | ]
24 |
25 |
26 | def test_solver():
27 | # Check that your solvers can work well.
28 | # Non-academic, non-commercial Gurobi can not solve large scale model > 100 params.
29 | # ECOS_BB is significantly slower, so use gurobi if possible!
30 | x = cp.Variable(10, integer=True)
31 | obj = cp.sum_squares(x)
32 | cons = [x <= 3, x >= -3]
33 | prob = cp.Problem(cp.Minimize(obj), cons)
34 |
35 | if "GUROBI" in cp.installed_solvers():
36 | result = prob.solve(solver="GUROBI")
37 | else:
38 | result = prob.solve(solver="ECOS_BB")
39 |
40 | assert x.value is not None
41 | assert result is not None
42 |
43 |
44 | @pytest.fixture(scope="module", params=ALL_ESTIMATORS)
45 | def estimator(random_energy_model, request):
46 | ecis = random_energy_model[2]
47 | # Each correlation function as its own group. Doing ordinary hierarchy.
48 | groups = list(range(len(ecis)))
49 | if "GUROBI" in cp.installed_solvers():
50 | return request.param(groups=groups, solver="GUROBI")
51 | else:
52 | return request.param(groups=groups, solver="ECOS_BB")
53 | # return request.param(solver="ECOS_BB")
54 |
55 |
56 | @pytest.fixture(scope="module", params=ONLY_L2L0)
57 | def mixed_l2l0_est(random_energy_model, request):
58 | ecis = random_energy_model[2]
59 | # Each correlation function as its own group. Doing ordinary hierarchy.
60 | groups = list(range(len(ecis)))
61 | if "GUROBI" in cp.installed_solvers():
62 | return request.param(groups=groups, solver="GUROBI")
63 | else:
64 | return request.param(groups=groups, solver="ECOS_BB")
65 | # return request.param(solver="ECOS_BB")
66 |
67 |
68 | def test_mixed_l0_wts(random_energy_model, mixed_l2l0_est, rng):
69 | femat, energies, _ = random_energy_model
70 | mixed_l2l0_est.eta = 1e-5
71 | mixed_l2l0_est.fit(X=femat, y=energies)
72 | energies_pred = mixed_l2l0_est.predict(femat)
73 | assert energies_pred is not None
74 | mixed_l2l0_est.tikhonov_w = 1000 * rng.random(femat.shape[1])
75 | mixed_l2l0_est.fit(X=femat, y=energies)
76 | energies_pred_wtd = mixed_l2l0_est.predict(femat)
77 | assert energies_pred_wtd is not None
78 |
79 |
80 | @pytest.fixture(scope="module", params=ALL_CRITERION)
81 | def grid_search(estimator, param_grid, request):
82 | grid_searcher = GridSearchCV(
83 | estimator, param_grid, opt_selection_method=request.param
84 | )
85 | return grid_searcher
86 |
87 |
88 | @pytest.fixture(scope="module", params=ALL_CRITERION)
89 | def line_search(estimator, param_grid, request):
90 | # Multi-grids not supported in line search mode.
91 | param_grid_lines = sorted((key, values) for key, values in param_grid[0].items())
92 | line_searcher = LineSearchCV(
93 | estimator,
94 | param_grid_lines,
95 | opt_selection_method=request.param,
96 | n_iter=3,
97 | )
98 | return line_searcher
99 |
100 |
101 | def test_grid_search(random_energy_model, grid_search):
102 | femat, energies, _ = random_energy_model
103 | n_samples, n_features = femat.shape
104 | grid_search.fit(X=femat, y=energies)
105 | assert "best_params_" in vars(grid_search)
106 | best_params = grid_search.best_params_
107 | assert "alpha" in best_params and "eta" in best_params
108 | assert best_params["alpha"] in [0.01, 0.1, 0.02, 0.2]
109 | assert best_params["eta"] in [0.03, 0.3, 0.04, 0.4]
110 |
111 | assert grid_search.best_score_ <= 1
112 | assert "coef_" in vars(grid_search.best_estimator_)
113 | assert "intercept_" in vars(grid_search.best_estimator_)
114 | energies_pred = grid_search.predict(femat)
115 | rmse = np.sum((energies - energies_pred) ** 2) / len(energies)
116 | # Overfit.
117 | if n_samples < n_features:
118 | assert -grid_search.best_score_ >= rmse
119 |
120 |
121 | # Guarantees that one-std rule always select larger params than max score.
122 | def test_onestd():
123 | success = 0
124 | for _ in range(10):
125 | X, y, coef = make_regression(
126 | n_samples=200,
127 | n_features=100,
128 | n_informative=10,
129 | noise=40.0,
130 | bias=-15.0,
131 | coef=True,
132 | random_state=0,
133 | )
134 |
135 | X_train, X_test, y_train, y_test = train_test_split(
136 | X, y, test_size=0.25, random_state=0
137 | )
138 |
139 | # create estimators
140 | lasso = Lasso(fit_intercept=True)
141 |
142 | # create cv search objects for each estimator
143 | cv5 = KFold(n_splits=5, shuffle=True, random_state=0)
144 | params = {"alpha": np.logspace(-1, 1, 10)}
145 |
146 | lasso_cv_std = GridSearchCV(
147 | lasso, params, opt_selection_method="one_std_score", cv=cv5, n_jobs=-1
148 | )
149 | lasso_cv_opt = GridSearchCV(
150 | lasso, params, opt_selection_method="max_score", cv=cv5, n_jobs=-1
151 | )
152 |
153 | # fit models on training data
154 | lasso_cv_std.fit(X_train, y_train)
155 | lasso_cv_opt.fit(X_train, y_train)
156 |
157 | correct_params = (
158 | lasso_cv_opt.best_params_["alpha"] <= lasso_cv_std.best_params_["alpha"]
159 | )
160 | sparsity_opt = np.sum(np.abs(lasso_cv_opt.best_estimator_.coef_) >= 1e-6)
161 | sparsity_std = np.sum(np.abs(lasso_cv_std.best_estimator_.coef_) >= 1e-6)
162 |
163 | if correct_params and sparsity_opt >= sparsity_std:
164 | success += 1
165 |
166 | # Allow some failure caused by randomness of CV splits.
167 | assert success >= 8
168 |
169 |
170 | def test_line_search(random_energy_model, line_search):
171 | femat, energies, _ = random_energy_model
172 | n_samples, n_features = femat.shape
173 | line_search.fit(X=femat, y=energies)
174 | assert "best_params_" in vars(line_search)
175 | best_params = line_search.best_params_
176 | assert "alpha" in best_params and "eta" in best_params
177 | assert best_params["alpha"] in [0.01, 0.1]
178 | assert best_params["eta"] in [0.03, 0.3]
179 |
180 | assert line_search.best_score_ <= 1
181 | assert "coef_" in vars(line_search.best_estimator_)
182 | assert "intercept_" in vars(line_search.best_estimator_)
183 | energies_pred = line_search.predict(femat)
184 | rmse = np.sum((energies - energies_pred) ** 2) / len(energies)
185 | # Overfit.
186 | if n_samples < n_features:
187 | assert -line_search.best_score_ >= rmse
188 |
--------------------------------------------------------------------------------
/tests/test_ols.py:
--------------------------------------------------------------------------------
1 | """Sanity checks: literally just copied from sklearn tests... """
2 |
3 | import numpy as np
4 | import numpy.testing as npt
5 | import pytest
6 | from sklearn.preprocessing import add_dummy_feature
7 |
8 | from sparselm.model import OrdinaryLeastSquares
9 |
10 |
11 | def test_linear_regression():
12 | # Test OrdinaryLeastSquares on a simple dataset.
13 | # a simple dataset
14 | X = [[1], [2]]
15 | Y = [1, 2]
16 |
17 | reg = OrdinaryLeastSquares()
18 | reg.fit(X, Y)
19 |
20 | npt.assert_array_almost_equal(reg.coef_, [1])
21 | npt.assert_array_almost_equal(reg.intercept_, [0])
22 | npt.assert_array_almost_equal(reg.predict(X), [1, 2])
23 |
24 | # test it also for degenerate input
25 | X = [[1]]
26 | Y = [0]
27 |
28 | reg = OrdinaryLeastSquares()
29 | reg.fit(X, Y)
30 | npt.assert_array_almost_equal(reg.coef_, [0])
31 | npt.assert_array_almost_equal(reg.intercept_, [0])
32 | npt.assert_array_almost_equal(reg.predict(X), [0])
33 |
34 |
35 | @pytest.mark.parametrize("fit_intercept", [True, False])
36 | def test_linear_regression_sample_weights(fit_intercept, rng):
37 | # It would not work with under-determined systems
38 | n_samples, n_features = 10, 8
39 |
40 | X = rng.normal(size=(n_samples, n_features))
41 | y = rng.normal(size=n_samples)
42 |
43 | sample_weight = 1.0 + rng.uniform(size=n_samples)
44 |
45 | # OLS with explicit sample_weight
46 | reg = OrdinaryLeastSquares(fit_intercept=fit_intercept)
47 | reg.fit(X, y, sample_weight=sample_weight)
48 | coefs1 = reg.coef_
49 | inter1 = reg.intercept_
50 |
51 | assert reg.coef_.shape == (X.shape[1],) # sanity checks
52 |
53 | # Closed form of the weighted least square
54 | # theta = (X^T W X)^(-1) @ X^T W y
55 | W = np.diag(sample_weight)
56 | X_aug = X if not fit_intercept else add_dummy_feature(X)
57 |
58 | Xw = X_aug.T @ W @ X_aug
59 | yw = X_aug.T @ W @ y
60 | coefs2 = np.linalg.solve(Xw, yw)
61 |
62 | if not fit_intercept:
63 | npt.assert_allclose(coefs1, coefs2)
64 | else:
65 | npt.assert_allclose(coefs1, coefs2[1:])
66 | npt.assert_allclose(inter1, coefs2[0])
67 |
68 |
69 | def test_fit_intercept():
70 | # Test assertions on betas shape.
71 | X2 = np.array([[0.38349978, 0.61650022], [0.58853682, 0.41146318]])
72 | X3 = np.array(
73 | [
74 | [0.27677969, 0.70693172, 0.01628859],
75 | [0.08385139, 0.20692515, 0.70922346],
76 | ]
77 | )
78 | y = np.array([1, 1])
79 |
80 | lr2_without_intercept = OrdinaryLeastSquares(fit_intercept=False).fit(X2, y)
81 | lr2_with_intercept = OrdinaryLeastSquares().fit(X2, y)
82 |
83 | lr3_without_intercept = OrdinaryLeastSquares(fit_intercept=False).fit(X3, y)
84 | lr3_with_intercept = OrdinaryLeastSquares().fit(X3, y)
85 |
86 | assert lr2_with_intercept.coef_.shape == lr2_without_intercept.coef_.shape
87 | assert lr3_with_intercept.coef_.shape == lr3_without_intercept.coef_.shape
88 | assert lr2_without_intercept.coef_.ndim == lr3_without_intercept.coef_.ndim
89 |
--------------------------------------------------------------------------------
/tests/test_stepwise.py:
--------------------------------------------------------------------------------
1 | """Test composite estimator class."""
2 |
3 | import numpy as np
4 | import numpy.testing as npt
5 | import pytest
6 | from sklearn.base import clone
7 | from sklearn.utils._param_validation import InvalidParameterError
8 |
9 | from sparselm.model import L2L0, Lasso
10 | from sparselm.model_selection import GridSearchCV
11 | from sparselm.stepwise import StepwiseEstimator
12 |
13 |
14 | def test_make_composite():
15 | # Test making a composite estimator.
16 | lasso1 = Lasso(fit_intercept=True, alpha=1.0)
17 | lasso2 = Lasso(fit_intercept=False, alpha=2.0)
18 | l2l0 = L2L0(groups=[0, 0, 1, 2], alpha=0.1, eta=4.0)
19 | steps = [("lasso1", lasso1), ("lasso2", lasso2), ("l2l0", l2l0)]
20 |
21 | scope1 = [0, 1, 8]
22 | scope2 = [2, 3]
23 | scope3 = [4, 5, 6, 7]
24 | estimator = StepwiseEstimator(steps, [scope1, scope2, scope3])
25 | # sklearn convention tests, need pandas.
26 | # Currently, not passing because conventional sklearn estimator should not have
27 | # fixed number of features.
28 | # check_estimator(estimator)
29 | assert estimator.steps[0][1].fit_intercept
30 | assert not estimator.steps[1][1].fit_intercept
31 | assert not estimator.steps[2][1].fit_intercept
32 |
33 | # check parameters. Nested estimator case not tested yet.
34 | params = estimator.get_params(deep=True)
35 | assert params["lasso1"].get_params(deep=True)["alpha"] == 1.0
36 | assert params["lasso2"].get_params(deep=True)["alpha"] == 2.0
37 | assert params["l2l0"].get_params(deep=True)["alpha"] == 0.1
38 | assert params["l2l0"].get_params(deep=True)["eta"] == 4.0
39 | assert params["lasso1__alpha"] == 1.0
40 | assert params["lasso2__alpha"] == 2.0
41 | assert params["l2l0__alpha"] == 0.1
42 | assert params["l2l0__eta"] == 4.0
43 |
44 | estimator.set_params(lasso2__alpha=0.5, l2l0__alpha=0.2, l2l0__eta=3.0)
45 | params = estimator.get_params(deep=True)
46 | assert params["lasso1"].get_params(deep=True)["alpha"] == 1.0
47 | assert params["lasso2"].get_params(deep=True)["alpha"] == 0.5
48 | assert params["l2l0"].get_params(deep=True)["alpha"] == 0.2
49 | assert params["l2l0"].get_params(deep=True)["eta"] == 3.0
50 | assert params["lasso1__alpha"] == 1.0
51 | assert params["lasso2__alpha"] == 0.5
52 | assert params["l2l0__alpha"] == 0.2
53 | assert params["l2l0__eta"] == 3.0
54 |
55 | # Test unsafe clone, such that composite can be used in the optimizers.
56 | # Currently, have to mute sanity check from origianl sklearn clone.
57 | cloned = clone(estimator)
58 | params = cloned.get_params(deep=True)
59 | assert params["lasso1"].get_params(deep=True)["alpha"] == 1.0
60 | assert params["lasso2"].get_params(deep=True)["alpha"] == 0.5
61 | assert params["l2l0"].get_params(deep=True)["alpha"] == 0.2
62 | assert params["l2l0"].get_params(deep=True)["eta"] == 3.0
63 | assert params["lasso1__alpha"] == 1.0
64 | assert params["lasso2__alpha"] == 0.5
65 | assert params["l2l0__alpha"] == 0.2
66 | assert params["l2l0__eta"] == 3.0
67 |
68 | # A searcher can also be put into stepwise.
69 | grid = GridSearchCV(lasso2, {"alpha": [0.01, 0.1, 1.0]})
70 | steps = [("lasso1", lasso1), ("lasso2", grid), ("l2l0", l2l0)]
71 | estimator = StepwiseEstimator(steps, [scope1, scope2, scope3])
72 | # check_estimator(estimator)
73 | params = estimator.get_params(deep=True)
74 | assert params["lasso1__alpha"] == 1.0
75 | assert params["l2l0__alpha"] == 0.2
76 | assert params["l2l0__eta"] == 3.0
77 | assert "lasso2__alpha" not in params
78 | assert params["lasso2__estimator__alpha"] == 0.5
79 |
80 |
81 | def test_toy_composite():
82 | lasso1 = Lasso(fit_intercept=True, alpha=1e-6)
83 | lasso2 = Lasso(fit_intercept=False, alpha=1e-6)
84 | grid = GridSearchCV(clone(lasso2), {"alpha": [1e-8, 1e-7, 1e-6]})
85 | bad_lasso2 = Lasso(fit_intercept=True, alpha=1e-6)
86 | l2l0 = L2L0(groups=[0, 0, 1, 2], alpha=0, eta=1e-9)
87 | steps = [("lasso1", lasso1), ("lasso2", lasso2), ("l2l0", l2l0)]
88 | steps2 = [("lasso1", clone(lasso1)), ("lasso2", grid), ("l2l0", clone(l2l0))]
89 | bad_steps = [("lasso1", lasso1), ("lasso2", bad_lasso2), ("l2l0", l2l0)]
90 |
91 | scope1 = [0, 1, 8]
92 | scope2 = [2, 3]
93 | scope3 = [4, 5, 6, 7]
94 | estimator = StepwiseEstimator(steps, [scope1, scope2, scope3])
95 | # Use grid search on lasso2.
96 | estimator2 = StepwiseEstimator(steps2, [scope1, scope2, scope3])
97 |
98 | bad_scope1 = [0, 1]
99 | bad_scope2 = [3, 4]
100 | bad_scope3 = [5, 6, 7, 8]
101 | bad_estimator1 = StepwiseEstimator(steps, [bad_scope1, bad_scope2, bad_scope3])
102 | bad_estimator2 = StepwiseEstimator(bad_steps, [scope1, scope2, scope3])
103 |
104 | w_test = np.random.normal(scale=2, size=9) * 0.2
105 | w_test[0] = 10
106 | w_test[-1] = 0.5
107 | # A bad feature matrix with too many features.
108 | bad_X = np.random.random(size=(20, 12))
109 | bad_X[:, 0] = 1
110 | with pytest.raises(ValueError):
111 | estimator.fit(bad_X, np.random.random(size=20))
112 | X = np.random.random(size=(20, 9))
113 | X[:, 0] = 1
114 | X[:, -1] = -8 * np.random.random(size=20)
115 | y = np.dot(X, w_test) + np.random.normal(scale=0.01, size=20)
116 |
117 | # Bad scopes.
118 | with pytest.raises(InvalidParameterError):
119 | bad_estimator1.fit(X, y)
120 | # Allow fit intercept in beyond the first estimator.
121 | with pytest.raises(InvalidParameterError):
122 | bad_estimator2.fit(X, y)
123 | # A correct estimator.
124 |
125 | def run_estimator_test(estimator_test):
126 | estimator_test.fit(X, y)
127 | # print("intercept:", estimator_test.intercept_)
128 | # print("coef:", estimator_test.coef_)
129 |
130 | assert estimator_test.intercept_ == estimator_test.steps[0][1].intercept_
131 | assert not np.any(np.isnan(estimator_test.coef_))
132 |
133 | assert not np.isclose(estimator_test.intercept_, 0)
134 |
135 | for (_, sub), scope in zip(
136 | estimator_test.steps, estimator_test.estimator_feature_indices
137 | ):
138 | if hasattr(sub, "estimator"):
139 | sub_coef = sub.best_estimator_.coef_
140 | else:
141 | sub_coef = sub.coef_
142 | npt.assert_array_almost_equal(sub_coef, estimator_test.coef_[scope])
143 | coef_1 = estimator_test.coef_.copy()
144 | intercept_1 = estimator_test.intercept_
145 |
146 | # Now do not fit intercept.
147 | estimator_test.steps[0][1].fit_intercept = False
148 | estimator_test.fit(X, y)
149 | coef_2 = estimator_test.coef_.copy()
150 | intercept_2 = estimator_test.intercept_
151 | assert np.isclose(intercept_2, 0)
152 |
153 | # Do some naive assertion on the fitted coefficients.
154 | assert abs(coef_1[0] + intercept_1 - 10) / 10 <= 0.1
155 | assert abs(coef_2[0] - 10) / 10 <= 0.1
156 | # assert np.linalg.norm(coef_2 - w_test) / np.linalg.norm(w_test) <= 0.4
157 |
158 | total_y = np.zeros(len(y))
159 | for (_, sub_estimator_test), sub_scope in zip(
160 | estimator_test.steps, estimator_test.estimator_feature_indices
161 | ):
162 | total_y += sub_estimator_test.predict(X[:, sub_scope])
163 | npt.assert_array_almost_equal(estimator_test.predict(X), total_y)
164 | npt.assert_array_almost_equal(
165 | np.dot(X, estimator_test.coef_) + estimator_test.intercept_, total_y
166 | )
167 |
168 | # Either estimators should be able to work.
169 | run_estimator_test(estimator)
170 | run_estimator_test(estimator2)
171 |
--------------------------------------------------------------------------------
/tests/test_tools.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | from functools import partial
3 |
4 | import numpy.testing as npt
5 | import pytest
6 |
7 | from sparselm.model import OrdinaryLeastSquares
8 | from sparselm.tools import constrain_coefficients
9 |
10 |
11 | @pytest.mark.parametrize("test_number", range(5)) # run the test 5 times
12 | def test_constrain_coefficients(test_number, rng):
13 | n_samples, n_features = 10, 8
14 | X = rng.normal(size=(n_samples, n_features))
15 | y = rng.normal(size=n_samples)
16 | reg = OrdinaryLeastSquares(fit_intercept=True)
17 | reg.fit(X, y)
18 | coefs = reg.coef_
19 |
20 | def fit(X, y, reg):
21 | reg.fit(X, y)
22 | return reg.coef_
23 |
24 | # Test uniform low and high values
25 | inds = rng.choice(n_features, size=3, replace=False)
26 |
27 | with warnings.catch_warnings(record=True) as w:
28 | cstr_coefs = constrain_coefficients(inds, 2, 0)(partial(fit, reg=reg))(X, y)
29 |
30 | assert cstr_coefs.shape == coefs.shape
31 |
32 | # Check if warning was raised, meaning coefficients were not within range
33 | # in that case just test that the indeed that warning was raised.
34 | if len(w) > 0:
35 | with pytest.warns(RuntimeWarning):
36 | cstr_coefs = constrain_coefficients(inds, 2, 0)(partial(fit, reg=reg))(X, y)
37 | else:
38 | for i in inds:
39 | assert 0 <= cstr_coefs[i] <= 2
40 |
41 | @constrain_coefficients(inds, 2, 0)
42 | def fit_constrained1(X, y, reg):
43 | reg.fit(X, y)
44 | return reg.coef_
45 |
46 | cstr_coefs2 = fit_constrained1(X, y, reg=reg)
47 | npt.assert_almost_equal(cstr_coefs, cstr_coefs2)
48 |
49 | # Test different low and high values
50 | low = rng.random(size=3) - 0.5
51 | high = rng.random(size=3) + low
52 |
53 | with warnings.catch_warnings(record=True) as w:
54 | cstr_coefs = constrain_coefficients(inds, high, low)(partial(fit, reg=reg))(
55 | X, y
56 | )
57 |
58 | assert cstr_coefs.shape == coefs.shape
59 |
60 | # Check if warning was raised, meaning coefficients were not within range
61 | # in that case just test that the indeed that warning was raised.
62 | if len(w) > 0:
63 | with pytest.warns(RuntimeWarning):
64 | cstr_coefs = constrain_coefficients(inds, high, low)(partial(fit, reg=reg))(
65 | X, y
66 | )
67 | else:
68 | for i, l, h in zip(inds, low, high):
69 | assert l <= cstr_coefs[i] <= h
70 |
71 | @constrain_coefficients(inds, high, low)
72 | def fit_constrained2(X, y, reg):
73 | reg.fit(X, y)
74 | return reg.coef_
75 |
76 | cstr_coefs2 = fit_constrained2(X, y, reg=reg)
77 | npt.assert_almost_equal(cstr_coefs, cstr_coefs2)
78 |
79 | # just use high value
80 | with warnings.catch_warnings(record=True) as w:
81 | cstr_coefs = constrain_coefficients(inds, high=high)(partial(fit, reg=reg))(
82 | X, y
83 | )
84 |
85 | assert cstr_coefs.shape == coefs.shape
86 |
87 | # Check if warning was raised, meaning coefficients were not within range
88 | # in that case just test that the indeed that warning was raised.
89 | if len(w) > 0:
90 | with pytest.warns(RuntimeWarning):
91 | cstr_coefs = constrain_coefficients(inds, high=high)(partial(fit, reg=reg))(
92 | X, y
93 | )
94 | else:
95 | for i, h in zip(inds, high):
96 | assert cstr_coefs[i] <= h
97 |
98 | # just use low value
99 | with warnings.catch_warnings(record=True) as w:
100 | cstr_coefs = constrain_coefficients(inds, low=low)(partial(fit, reg=reg))(X, y)
101 |
102 | assert cstr_coefs.shape == coefs.shape
103 |
104 | # Check if warning was raised, meaning coefficients were not within range
105 | # in that case just test that the indeed that warning was raised.
106 | if len(w) > 0:
107 | with pytest.warns(RuntimeWarning):
108 | cstr_coefs = constrain_coefficients(inds, low=low)(partial(fit, reg=reg))(
109 | X, y
110 | )
111 | else:
112 | for i, l in zip(inds, low):
113 | assert l <= cstr_coefs[i]
114 |
115 |
116 | # TODO write this test
117 | def test_r2_score_to_cv_error():
118 | pass
119 |
--------------------------------------------------------------------------------