├── .github ├── dependabot.yml ├── release.yml └── workflows │ ├── build.yml │ ├── docs.yml │ ├── lint.yml │ ├── release.yml │ ├── test.yml │ └── update-precommit.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── docs ├── _static │ ├── logo-light.png │ └── logo.png ├── api.rst ├── conf.py ├── contributing.rst ├── index.rst ├── install.rst ├── license.rst ├── sparselm.model.rst ├── sparselm.model_selection.rst ├── sparselm.stepwise.rst └── sparselm.tools.rst ├── examples ├── README.rst ├── corr.npy ├── energy.npy ├── plot_adaptive.py ├── plot_chull.py ├── plot_gl_sgl.py ├── plot_line_search.py ├── plot_one_std.py ├── plot_sparse_signal.py ├── plot_stepwise.py └── structures.json ├── pyproject.toml ├── requirements.txt ├── src ├── requirements.txt └── sparselm │ ├── __init__.py │ ├── _utils │ ├── __init__.py │ └── validation.py │ ├── dataset.py │ ├── model │ ├── __init__.py │ ├── _adaptive_lasso.py │ ├── _base.py │ ├── _lasso.py │ ├── _miqp │ │ ├── __init__.py │ │ ├── _base.py │ │ ├── _best_subset.py │ │ └── _regularized_l0.py │ └── _ols.py │ ├── model_selection.py │ ├── stepwise.py │ └── tools.py └── tests ├── conftest.py ├── pytest.ini ├── test_common.py ├── test_dataset.py ├── test_lasso.py ├── test_miqp.py ├── test_model_selection.py ├── test_ols.py ├── test_stepwise.py └── test_tools.py /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | 4 | # Maintain dependencies for GitHub Actions 5 | - package-ecosystem: github-actions 6 | directory: "/" 7 | schedule: 8 | interval: weekly 9 | 10 | # Python dependencies 11 | - package-ecosystem: pip 12 | directory: "/" 13 | schedule: 14 | interval: weekly 15 | allow: 16 | - dependency-type: direct 17 | - dependency-type: indirect 18 | -------------------------------------------------------------------------------- /.github/release.yml: -------------------------------------------------------------------------------- 1 | changelog: 2 | exclude: 3 | authors: [dependabot, github-actions, pre-commit-ci] 4 | categories: 5 | - title: 🎉 New Features 6 | labels: [feature] 7 | - title: 🐛 Bug Fixes 8 | labels: [fix] 9 | - title: 🛠 Enhancements 10 | labels: [enhancement] 11 | - title: 📖 Documentation 12 | labels: [documentation] 13 | - title: 💡 Refactoring 14 | labels: [refactor] 15 | - title: 🧪 Tests 16 | labels: [tests] 17 | - title: 💥 Breaking Changes 18 | labels: [breaking] 19 | - title: 🔒 Security Fixes 20 | labels: [security] 21 | - title: 🤷‍♂️ Other Changes 22 | labels: ["*"] 23 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: [workflow_dispatch, workflow_call] 4 | 5 | jobs: 6 | 7 | build-sdist: 8 | name: Build sdist 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - uses: actions/checkout@v4 13 | with: 14 | fetch-depth: 0 # Optional, use if you use setuptools_scm 15 | 16 | - name: Build 17 | run: pipx run build --sdist 18 | 19 | - uses: actions/upload-artifact@v4 20 | with: 21 | path: dist/*.tar.gz 22 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: build-documentation 2 | 3 | on: [workflow_dispatch, workflow_call] 4 | 5 | jobs: 6 | build-deploy: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - uses: actions/checkout@v4 11 | 12 | - name: Install pandoc 13 | run: sudo apt-get install pandoc 14 | 15 | - uses: actions/setup-python@v5 16 | with: 17 | python-version: 3.11 18 | 19 | - name: Install dependencies 20 | run: | 21 | python -m pip install --upgrade pip 22 | pip install .[docs] 23 | 24 | - name: Build docs 25 | run: sphinx-build docs docs_build 26 | 27 | - name: Deploy 28 | uses: peaceiris/actions-gh-pages@v4 29 | with: 30 | github_token: ${{ secrets.GITHUB_TOKEN }} 31 | publish_dir: ./docs_build 32 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: lint 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | pull_request: 9 | branches: 10 | - main 11 | 12 | jobs: 13 | lint: 14 | runs-on: ubuntu-latest 15 | strategy: 16 | max-parallel: 6 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | - name: Set up Python 21 | uses: actions/setup-python@v5 22 | with: 23 | python-version: 3.11 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | pip install .[dev] 28 | - name: flake8 29 | run: | 30 | flake8 --version 31 | flake8 --count --show-source --statistics src/sparselm 32 | # exit-zero treats all errors as warnings. 33 | flake8 --count --exit-zero --max-complexity=20 --statistics src/sparselm 34 | - name: black 35 | run: | 36 | black --version 37 | black --check --diff --color src/sparselm 38 | - name: pydocstyle 39 | run: | 40 | pydocstyle --version 41 | pydocstyle --count src/sparselm 42 | # Not in shape for this yet 43 | # - name: pylint 44 | # run: | 45 | # pylint sparselm 46 | #- name: mypy 47 | # run: | 48 | # mypy --version 49 | # rm -rf .mypy_cache 50 | # mypy sparselm 51 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: release 2 | 3 | on: 4 | release: 5 | types: [published] 6 | inputs: 7 | release-pypi: 8 | required: true 9 | type: boolean 10 | default: true 11 | workflow_dispatch: 12 | inputs: 13 | release-pypi: 14 | required: true 15 | type: boolean 16 | description: "if true a release is made on PyPI" 17 | 18 | jobs: 19 | test: 20 | uses: ./.github/workflows/test.yml 21 | secrets: inherit 22 | 23 | build: 24 | needs: test 25 | uses: ./.github/workflows/build.yml 26 | 27 | docs: 28 | needs: test 29 | uses: ./.github/workflows/docs.yml 30 | secrets: inherit 31 | 32 | release-pypi: 33 | needs: [build] 34 | runs-on: ubuntu-latest 35 | if: github.event.inputs.release-pypi == 'true' 36 | 37 | steps: 38 | - uses: actions/download-artifact@v4 39 | with: 40 | name: artifact 41 | path: dist 42 | 43 | - uses: pypa/gh-action-pypi-publish@release/v1 44 | with: 45 | verbose: true 46 | user: __token__ 47 | password: ${{ secrets.PYPI_API_TOKEN }} 48 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | 3 | env: 4 | scip-version: 8.0.0 5 | 6 | on: 7 | push: 8 | branches: 9 | - main 10 | 11 | pull_request: 12 | branches: 13 | - main 14 | 15 | workflow_call: 16 | 17 | jobs: 18 | test: 19 | runs-on: ubuntu-20.04 20 | strategy: 21 | max-parallel: 10 22 | matrix: 23 | python_version: ["3.9", "3.10", "3.11"] 24 | 25 | steps: 26 | - uses: actions/checkout@v4 27 | 28 | - name: Install dependencies 29 | run: | 30 | wget --quiet --no-check-certificate https://scipopt.org/download/release/SCIPOptSuite-${{ env.scip-version }}-Linux-ubuntu.deb 31 | sudo apt-get update && sudo apt install -y ./SCIPOptSuite-${{ env.scip-version }}-Linux-ubuntu.deb 32 | 33 | - name: Set up Python ${{ matrix.python_version }} 34 | uses: actions/setup-python@v5 35 | with: 36 | python-version: ${{ matrix.python_version }} 37 | 38 | - name: Install dependencies and package 39 | run: | 40 | python -m pip install --upgrade pip 41 | pip install cython 42 | pip install .[tests,dev] 43 | 44 | - name: Test with pytest 45 | run: | 46 | pytest tests --cov=sparselm --cov-report=xml 47 | 48 | - if: ${{ matrix.python_version == 3.11 && github.event_name == 'push' }} 49 | name: codacy-coverage-reporter 50 | uses: codacy/codacy-coverage-reporter-action@v1 51 | with: 52 | project-token: ${{ secrets.CODACY_PROJECT_TOKEN }} 53 | coverage-reports: coverage.xml 54 | -------------------------------------------------------------------------------- /.github/workflows/update-precommit.yaml: -------------------------------------------------------------------------------- 1 | name: pre-commit-auto-update 2 | 3 | on: 4 | # midnight twice a month 5 | schedule: 6 | - cron: '0 0 14,28 * *' 7 | 8 | jobs: 9 | auto-update: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v4 13 | 14 | - name: Set up Python 15 | uses: actions/setup-python@v5 16 | with: 17 | python-version: 3.9 18 | 19 | - name: Install pre-commit 20 | run: pip install pre-commit 21 | 22 | - name: Run pre-commit autoupdate 23 | run: pre-commit autoupdate 24 | 25 | - name: Create Pull Request 26 | uses: peter-evans/create-pull-request@v6.0.5 27 | with: 28 | token: ${{ secrets.GITHUB_TOKEN }} 29 | branch: update/pre-commit-autoupdate 30 | title: auto-update pre-commit hooks 31 | commit-message: auto-update pre-commit hooks 32 | body: Update versions of tools in pre-commit hooks to latest versions. 33 | labels: dependencies 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | .idea 132 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | 4 | ci: 5 | autoupdate_schedule: monthly 6 | 7 | repos: 8 | - repo: https://github.com/pre-commit/pre-commit-hooks 9 | rev: v5.0.0 10 | hooks: 11 | - id: check-yaml 12 | - id: fix-encoding-pragma 13 | args: 14 | - --remove 15 | - id: end-of-file-fixer 16 | - id: trailing-whitespace 17 | - id: check-added-large-files 18 | args: ['--maxkb=500'] 19 | 20 | - repo: https://github.com/psf/black 21 | rev: 24.10.0 22 | hooks: 23 | - id: black 24 | 25 | - repo: https://github.com/asottile/blacken-docs 26 | rev: 1.19.1 27 | hooks: 28 | - id: blacken-docs 29 | additional_dependencies: [black==23.1.0] 30 | exclude: README.md 31 | 32 | - repo: https://github.com/pycqa/isort 33 | rev: 6.0.0 34 | hooks: 35 | - id: isort 36 | name: isort (python) 37 | args: 38 | - --profile=black 39 | 40 | - repo: https://github.com/asottile/pyupgrade 41 | rev: v3.19.1 42 | hooks: 43 | - id: pyupgrade 44 | args: [--py38-plus] 45 | 46 | - repo: https://github.com/PyCQA/autoflake 47 | rev: v2.3.1 48 | hooks: 49 | - id: autoflake 50 | args: 51 | - --in-place 52 | - --remove-unused-variables 53 | - --remove-all-unused-imports 54 | - --expand-star-imports 55 | - --ignore-init-module-imports 56 | 57 | - repo: https://github.com/pycqa/pydocstyle 58 | rev: 6.3.0 # pick a git hash / tag to point to 59 | hooks: 60 | - id: pydocstyle 61 | files: ^src/sparselm/ 62 | args: 63 | - --convention=google 64 | - --add-ignore=D107 65 | 66 | - repo: https://github.com/pre-commit/pygrep-hooks 67 | rev: v1.10.0 68 | hooks: 69 | - id: rst-backticks 70 | - id: rst-directive-colons 71 | - id: rst-inline-touching-normal 72 | 73 | - repo: https://github.com/pre-commit/mirrors-mypy 74 | rev: 'v1.14.1' # Use the sha / tag you want to point at 75 | hooks: 76 | - id: mypy 77 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | :: 2 | 3 | sparse-lm Copyright (c) 2022, The Regents of the University of California, through 4 | Lawrence Berkeley National Laboratory (subject to receipt of any required approvals 5 | from the U.S. Dept. of Energy) and the University of California, Berkeley. 6 | All rights reserved. 7 | 8 | Redistribution and use in source and binary forms, with or without 9 | modification, are permitted provided that the following conditions are met: 10 | 11 | (1) Redistributions of source code must retain the above copyright notice, 12 | this list of conditions and the following disclaimer. 13 | 14 | (2) Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | 18 | (3) Neither the name of the University of California, Lawrence Berkeley 19 | National Laboratory, U.S. Dept. of Energy, University of California, 20 | Berkeley nor the names of its contributors may be used to endorse or 21 | promote products derived from this software without specific prior written 22 | permission. 23 | 24 | 25 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 26 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 29 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 | POSSIBILITY OF SUCH DAMAGE. 36 | 37 | You are under no obligation whatsoever to provide any bug fixes, patches, 38 | or upgrades to the features, functionality or performance of the source 39 | code ("Enhancements") to anyone; however, if you choose to make your 40 | Enhancements available either publicly, or directly to Lawrence Berkeley 41 | National Laboratory, without imposing a separate written license agreement 42 | for such Enhancements, then you hereby grant the following license: a 43 | non-exclusive, royalty-free perpetual license to install, use, modify, 44 | prepare derivative works, incorporate into other computer software, 45 | distribute, and sublicense such enhancements or derivative works thereof, 46 | in binary and source code form. 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |  2 | 3 | Sparse Linear Regression Models 4 | =============================== 5 | 6 | [![test](https://github.com/CederGroupHub/sparse-lm/actions/workflows/test.yml/badge.svg?branch=main)](https://github.com/CederGroupHub/sparse-lm/actions/workflows/test.yml) 7 | [![Codacy Badge](https://app.codacy.com/project/badge/Coverage/9b72db506d9c49b2a6c849348de8945e)](https://www.codacy.com/gh/CederGroupHub/sparse-lm/dashboard?utm_source=github.com&utm_medium=referral&utm_content=CederGroupHub/sparse-lm&utm_campaign=Badge_Coverage) 8 | [![pre-commit.ci status](https://results.pre-commit.ci/badge/github/CederGroupHub/sparse-lm/main.svg)](https://results.pre-commit.ci/latest/github/CederGroupHub/sparse-lm/main) 9 | [![pypi version](https://img.shields.io/pypi/v/sparse-lm?color=blue)](https://pypi.org/project/sparse-lm) 10 | [![Static Badge](https://img.shields.io/badge/python-3.9%2B-blue)](https://www.python.org/downloads/) 11 | [![DOI](https://joss.theoj.org/papers/10.21105/joss.05867/status.svg)](https://doi.org/10.21105/joss.05867) 12 | 13 | 14 | **sparse-lm** includes several (structured) sparse linear regression estimators that are absent in the 15 | `sklearn.linear_model` module. The estimators in **sparse-lm** are designed to fit right into 16 | [scikit-learn](https://scikit-learn.org/stable/index.html), but the underlying optimization problem is expressed and 17 | solved by leveraging [cvxpy](https://www.cvxpy.org/). 18 | 19 | --------------------------------------------------------------------------------------- 20 | 21 | Available regression models 22 | --------------------------- 23 | - Lasso, Group Lasso, Overlap Group Lasso, Sparse Group Lasso & Ridged Group Lasso. 24 | - Adaptive versions of Lasso, Group Lasso, Overlap Group Lasso, Sparse Group Lasso & Ridged Group Lasso. 25 | - Best Subset Selection, Ridged Best Subset, L0, L1L0 & L2L0 (all with optional grouping of parameters) 26 | 27 | Installation 28 | ------------ 29 | **sparse-lm** is available on [PyPI](https://pypi.org/project/sparse-lm/), and can be installed via pip: 30 | 31 | ```bash 32 | pip install sparse-lm 33 | ``` 34 | 35 | Additional information on installation can be found the documentation [here](https://cedergrouphub.github.io/sparse-lm/install.html). 36 | 37 | Basic usage 38 | ----------- 39 | If you already use **scikit-learn**, using **sparse-lm** will be very easy. Just use any 40 | model like you would any linear model in **scikit-learn**: 41 | 42 | ```python 43 | import numpy as np 44 | from sklearn.datasets import make_regression 45 | from sklearn.model_selection import GridSearchCV 46 | from sparselm.model import AdaptiveLasso 47 | 48 | X, y = make_regression(n_samples=100, n_features=80, n_informative=10, random_state=0) 49 | alasso = AdaptiveLasso(fit_intercept=False) 50 | param_grid = {'alpha': np.logspace(-8, 2, 10)} 51 | 52 | cvsearch = GridSearchCV(alasso, param_grid) 53 | cvsearch.fit(X, y) 54 | print(cvsearch.best_params_) 55 | ``` 56 | 57 | For more details on use and functionality have a look at the 58 | [examples](https://cedergrouphub.github.io/sparse-lm/auto_examples/index.html) and 59 | [API](https://cedergrouphub.github.io/sparse-lm/api.html) sections of the documentation. 60 | 61 | Contributing 62 | ------------ 63 | 64 | We welcome any contributions that you think may improve the package! Please have a look at the 65 | [contribution guidelines](https://cedergrouphub.github.io/sparse-lm/contributing.html) in the documentation. 66 | -------------------------------------------------------------------------------- /docs/_static/logo-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CederGroupHub/sparse-lm/220cbbad4a5ac98d1a52326c525aadb95f2c5b18/docs/_static/logo-light.png -------------------------------------------------------------------------------- /docs/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CederGroupHub/sparse-lm/220cbbad4a5ac98d1a52326c525aadb95f2c5b18/docs/_static/logo.png -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | API Documentation 2 | ================= 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | sparselm.model 8 | sparselm.stepwise 9 | sparselm.model_selection 10 | sparselm.tools 11 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -- Path setup -------------------------------------------------------------- 2 | 3 | # If extensions (or modules to document with autodoc) are in another directory, 4 | # add these directories to sys.path here. If the directory is relative to the 5 | # documentation root, use os.path.abspath to make it absolute, like shown here. 6 | 7 | import os 8 | import sys 9 | 10 | # import typing 11 | # typing.TYPE_CHECKING = True 12 | from sparselm import __version__ 13 | 14 | sys.path.insert(0, os.path.abspath("../../")) 15 | 16 | # -- Project information ----------------------------------------------------- 17 | 18 | project = "sparse-lm" 19 | copyright = "2022-2023, Ceder Group" 20 | author = "Luis Barroso-Luque" 21 | 22 | # The short X.Y version 23 | version = __version__ 24 | # The full version, including alpha/beta/rc tags 25 | release = __version__ 26 | 27 | # -- General configuration --------------------------------------------------- 28 | 29 | # Add any Sphinx extension module names here, as strings. They can be 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 31 | # ones. 32 | extensions = [ 33 | "sphinx.ext.autodoc", 34 | "sphinx.ext.napoleon", 35 | "sphinx.ext.intersphinx", 36 | "sphinx.ext.viewcode", 37 | "sphinx.ext.autosummary", 38 | "sphinx.ext.mathjax", 39 | "m2r2", 40 | "sphinx_gallery.gen_gallery", 41 | ] 42 | 43 | # Add any paths that contain templates here, relative to this directory. 44 | templates_path = ["_templates"] 45 | 46 | # List of patterns, relative to source directory, that match files and 47 | # directories to ignore when looking for source files. 48 | # This pattern also affects html_static_path and html_extra_path. 49 | exclude_patterns = ["Thumbs.db", ".DS_Store", "test*.py"] 50 | 51 | # use type hints 52 | autodoc_typehints = "description" 53 | autoclass_content = "both" 54 | autodoc_member_order = "bysource" 55 | 56 | # better napoleon support 57 | napoleon_use_param = True 58 | napoleon_use_rtype = True 59 | napoleon_use_ivar = True 60 | 61 | # The suffix(es) of source filenames. 62 | source_suffix = [".rst", ".md"] 63 | 64 | # -- Options for HTML output ------------------------------------------------- 65 | 66 | # The theme to use for HTML and HTML Help pages. See the documentation for 67 | # a list of builtin themes. 68 | # 69 | html_theme = "furo" 70 | 71 | # hide sphinx footer 72 | html_show_sphinx = False 73 | html_show_sourcelink = False 74 | 75 | # Add any paths that contain custom static files (such as style sheets) here, 76 | # relative to this directory. They are copied after the builtin static files, 77 | # so a file named "default.css" will overwrite the builtin "default.css". 78 | fonts = [ 79 | "Lato", 80 | "-apple-system", 81 | "BlinkMacSystemFont", 82 | "Segoe UI", 83 | "Helvetica", 84 | "Arial", 85 | "sans-serif", 86 | "Apple Color Emoji", 87 | "Segoe UI Emoji", 88 | ] 89 | html_static_path = ["_static"] 90 | html_css_files = ["custom.css"] 91 | html_favicon = "_static/favicon.ico" 92 | html_theme_options = { 93 | "light_css_variables": { 94 | "admonition-font-size": "92%", 95 | "admonition-title-font-size": "92%", 96 | "font-stack": ",".join(fonts), 97 | "font-size--small": "92%", 98 | "font-size--small--2": "87.5%", 99 | "font-size--small--3": "87.5%", 100 | "font-size--small--4": "87.5%", 101 | }, 102 | "dark_css_variables": { 103 | "admonition-font-size": "92%", 104 | "admonition-title-font-size": "92%", 105 | "font-stack": ",".join(fonts), 106 | "font-size--small": "92%", 107 | "font-size--small--2": "87.5%", 108 | "font-size--small--3": "87.5%", 109 | "font-size--small--4": "87.5%", 110 | }, 111 | } 112 | html_title = "sparse-lm" 113 | 114 | # code highlighting 115 | pygments_style = "sphinx" 116 | pygments_dark_style = "monokai" 117 | 118 | # -- Options for intersphinx extension --------------------------------------- 119 | 120 | # Example configuration for intersphinx: refer to the Python standard library. 121 | intersphinx_mapping = { 122 | "python": ("https://docs.python.org/3.9", None), 123 | "scikit-learn": ("https://scikit-learn.org/stable", None), 124 | "numpy": ("https://numpy.org/doc/stable/", None), 125 | "cvxpy": ("https://www.cvxpy.org/en/latest/", None), 126 | } 127 | 128 | # -- Options for sphinx gallery extension --------------------------------------- 129 | 130 | sphinx_gallery_conf = { 131 | "examples_dirs": "../examples", # path to your example scripts 132 | "gallery_dirs": "auto_examples", # path to where to save gallery generated output 133 | } 134 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | Contributing 2 | ============ 3 | 4 | We welcome all forms of contribution, please consider contributing in any way you can! 5 | 6 | Bugs, issues, input, and questions 7 | ---------------------------------- 8 | Please use the 9 | `issue tracker `_ to share any 10 | of the following: 11 | 12 | - Bugs 13 | - Issues 14 | - Questions 15 | - Feature requests 16 | - Ideas 17 | - Input 18 | 19 | Having these reported and saved in the issue tracker is very helpful to make 20 | sure that they are properly addressed. Please make sure to be as descriptive 21 | and neat as possible when opening up an issue. 22 | 23 | Developing guidelines 24 | --------------------- 25 | If you have written code or want to start writing new code that you think will improve **sparse-lm** then please follow 26 | the steps below to make a contribution. 27 | 28 | * All code should have unit tests. 29 | * Code should be well documented following `google style `_ docstrings. 30 | * All code should pass the pre-commit hook. The code follows the `black code style `_. 31 | * Estimators should follow scikit-learn's `developing estimator guidelines `_. 32 | 33 | Adding code contributions 34 | ------------------------- 35 | 36 | #. If you are contributing for the first time: 37 | 38 | * *Fork* the repository and then *clone* your fork to your local workspace. 39 | * Make sure to add the *upstream* repository as a remote:: 40 | 41 | git remote add upstream https://github.com/CederGroupHub/sparse-lm.git 42 | 43 | * You should always keep your ``main`` branch or any feature branch up to date 44 | with the upstream repository ``main`` branch. Be good about doing *fast forward* 45 | merges of the upstream ``main`` into your fork branches while developing. 46 | 47 | #. In order to have changes available without having to re-install the package: 48 | 49 | * Install the package in *editable* mode:: 50 | 51 | pip install -e . 52 | 53 | #. To develop your contributions you are free to do so in your *main* branch or any feature 54 | branch in your fork. 55 | 56 | * We recommend to only your forks *main* branch for short/easy fixes and additions. 57 | * For more complex features, try to use a feature branch with a descriptive name. 58 | * For very complex feautres feel free to open up a PR even before your contribution is finished with 59 | [WIP] in its name, and optionally mark it as a *draft*. 60 | 61 | #. While developing we recommend you use the pre-commit hook that is setup to ensure that your 62 | code will satisfy all lint, documentation and black requirements. To do so install pre-commit, and run 63 | in your clones top directory:: 64 | 65 | pre-commit install 66 | 67 | * All code should use `google style `_ docstrings 68 | and `black `_ style formatting. 69 | 70 | #. Make sure to test your contribution and write unit tests for any new features. All tests should go in the 71 | ``sparse-lm\tests`` directory. The CI will run tests upon opening a PR, but running them locally will help find 72 | problems before:: 73 | 74 | pytests tests 75 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. toctree:: 3 | :caption: Getting Started 4 | :hidden: 5 | 6 | install 7 | auto_examples/index 8 | 9 | .. toctree:: 10 | :caption: Information 11 | :hidden: 12 | 13 | contributing 14 | license 15 | GitHub 16 | 17 | 18 | .. toctree:: 19 | :caption: Reference 20 | :maxdepth: -1 21 | :hidden: 22 | 23 | API 24 | genindex 25 | 26 | 27 | .. image:: _static/logo.png 28 | :width: 700 29 | :class: only-dark 30 | 31 | .. image:: _static/logo-light.png 32 | :width: 700 33 | :class: only-light 34 | 35 | =============================== 36 | Sparse Linear Regression Models 37 | =============================== 38 | 39 | .. mdinclude:: ../README.md 40 | :start-line: 4 41 | -------------------------------------------------------------------------------- /docs/install.rst: -------------------------------------------------------------------------------- 1 | Install 2 | ======= 3 | 4 | **sparse-lm** can be installed from PyPI or from source using pip. 5 | 6 | PyPI 7 | ---- 8 | 9 | You can install **sparse-lm** using pip:: 10 | 11 | pip install sparse-lm 12 | 13 | 14 | Install from source 15 | ------------------- 16 | 17 | To install **sparse-lm** from source, (fork and) clone the repository from `github 18 | `_:: 19 | 20 | git clone https://github.com/CederGroupHub/sparse-lm 21 | cd sparselm 22 | pip install . 23 | 24 | Installing MIQP solvers 25 | ----------------------- 26 | 27 | Since **cvxpy** is used to specify and solve regression optimization problems, any of 28 | `supported solvers `_ 29 | can be used with **sparse-lm** estimators. **cvxpy** is shipped with open source solvers 30 | (OSQP, SCS, and ECOS) which are usually enough to solve most convex regression problems. 31 | 32 | However, for the mixed integer quadratic programming (MIQP) formulations used in 33 | :class:`BestSubsetSelection` and :class:`RegularizedL0` based classes we highly 34 | recommend installing an MIQP capable solver. ECOS_BB can be used to solve MIQP problems, 35 | but it can be very slow and more importantly has recurring correctness issues. See the 36 | `mixed-integer program section `_ 37 | in the cvxpy documentation for more details. 38 | 39 | Gurobi 40 | ^^^^^^ 41 | 42 | For using **sparse-lm** with MIQP solvers, we highly recommend installing **Gurobi**. 43 | It can be installed directly from PyPi:: 44 | 45 | pip install gurobipy 46 | 47 | Without a license, a free trial **Gurobi** can be used to solve small problems. For 48 | larger problems a license is required. **Gurobi** grants 49 | `free academic licenses `_ 50 | to students and academic researchers. 51 | 52 | SCIP 53 | ^^^^ 54 | 55 | If installing a licensed solver is not an option, **SCIP** can be used as a free 56 | alternative. To use **SCIP**, the python interface **PySCIPOpt** must also be installed. 57 | **PySCIPOpt** can be installed from PyPi, however this requires building SCIP from 58 | source. See installation details `here `_. 59 | 60 | If you use conda, we recommend installing **SCIP** and **PySCIPOpt** using their 61 | conda-forge channel:: 62 | 63 | conda install -c conda-forge scipopt pyscipopt 64 | 65 | The above command will install **PySCIPOpt** with a pre-built version of **SCIP**, and 66 | so you will not need to build it from source. 67 | 68 | Testing 69 | ------- 70 | 71 | Unit tests can be run from the source folder using ``pytest``. First, the requirements 72 | to run tests must be installed:: 73 | 74 | pip install .[tests] 75 | 76 | Then run the tests using:: 77 | 78 | pytest tests 79 | -------------------------------------------------------------------------------- /docs/license.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | License 3 | ======= 4 | 5 | **sparse-lm** is distributed under a modified 3-clause BSD licence. 6 | 7 | .. include:: ../LICENSE 8 | -------------------------------------------------------------------------------- /docs/sparselm.model.rst: -------------------------------------------------------------------------------- 1 | sparselm.model 2 | ============== 3 | 4 | .. automodule:: sparselm.model 5 | :members: 6 | :inherited-members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/sparselm.model_selection.rst: -------------------------------------------------------------------------------- 1 | sparselm.model_selection 2 | ======================== 3 | 4 | .. automodule:: sparselm.model_selection 5 | :members: 6 | :inherited-members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/sparselm.stepwise.rst: -------------------------------------------------------------------------------- 1 | sparselm.stepwise 2 | ======================== 3 | 4 | .. automodule:: sparselm.stepwise 5 | :members: 6 | :inherited-members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/sparselm.tools.rst: -------------------------------------------------------------------------------- 1 | sparselm.tools 2 | ============== 3 | 4 | .. automodule:: sparselm.tools 5 | :members: 6 | :inherited-members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /examples/README.rst: -------------------------------------------------------------------------------- 1 | Examples 2 | ======== 3 | 4 | This is set of simple examples using sparse linear regression models implemented in 5 | **sparse-lm**. For the the vast majority of cases, the **sparse-lm** models can be 6 | used in the same way as the linear regression models in **scikit-learn**. 7 | -------------------------------------------------------------------------------- /examples/corr.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CederGroupHub/sparse-lm/220cbbad4a5ac98d1a52326c525aadb95f2c5b18/examples/corr.npy -------------------------------------------------------------------------------- /examples/energy.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CederGroupHub/sparse-lm/220cbbad4a5ac98d1a52326c525aadb95f2c5b18/examples/energy.npy -------------------------------------------------------------------------------- /examples/plot_adaptive.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================== 3 | Using adaptive regularization 4 | ============================== 5 | 6 | Adaptive or iteratively re-weighted regularization is a technique that can improve 7 | feature selection properties over the standard Lasso and Group Lasso extensions. In 8 | this example we compare the performance of the standard Lasso with adaptive Lasso. 9 | """ 10 | 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | from sklearn.datasets import make_regression 14 | from sklearn.linear_model import Lasso 15 | from sklearn.metrics import mean_squared_error, r2_score 16 | from sklearn.model_selection import GridSearchCV, KFold, train_test_split 17 | 18 | from sparselm.model import AdaptiveLasso 19 | 20 | X, y, coef = make_regression( 21 | n_samples=200, 22 | n_features=100, 23 | n_informative=10, 24 | noise=40.0, 25 | bias=-15.0, 26 | coef=True, 27 | random_state=0, 28 | ) 29 | 30 | X_train, X_test, y_train, y_test = train_test_split( 31 | X, y, test_size=0.25, random_state=0 32 | ) 33 | 34 | # create estimators 35 | lasso = Lasso(fit_intercept=True) 36 | alasso = AdaptiveLasso(max_iter=5, fit_intercept=True) 37 | 38 | # create cv search objects for each estimator 39 | cv5 = KFold(n_splits=5, shuffle=True, random_state=0) 40 | params = {"alpha": np.logspace(-1, 1, 10)} 41 | 42 | lasso_cv = GridSearchCV(lasso, params, cv=cv5, n_jobs=-1) 43 | alasso_cv = GridSearchCV(alasso, params, cv=cv5, n_jobs=-1) 44 | 45 | # fit models on training data 46 | lasso_cv.fit(X_train, y_train) 47 | alasso_cv.fit(X_train, y_train) 48 | 49 | # calculate model performance on test and train data 50 | lasso_train = { 51 | "r2": r2_score(y_train, lasso_cv.predict(X_train)), 52 | "rmse": np.sqrt(mean_squared_error(y_train, lasso_cv.predict(X_train))), 53 | } 54 | 55 | lasso_test = { 56 | "r2": r2_score(y_test, lasso_cv.predict(X_test)), 57 | "rmse": np.sqrt(mean_squared_error(y_test, lasso_cv.predict(X_test))), 58 | } 59 | 60 | alasso_train = { 61 | "r2": r2_score(y_train, alasso_cv.predict(X_train)), 62 | "rmse": np.sqrt(mean_squared_error(y_train, alasso_cv.predict(X_train))), 63 | } 64 | 65 | alasso_test = { 66 | "r2": r2_score(y_test, alasso_cv.predict(X_test)), 67 | "rmse": np.sqrt(mean_squared_error(y_test, alasso_cv.predict(X_test))), 68 | } 69 | 70 | print("Lasso performance metrics:") 71 | print(f" train r2: {lasso_train['r2']:.3f}") 72 | print(f" test r2: {lasso_test['r2']:.3f}") 73 | print(f" train rmse: {lasso_train['rmse']:.3f}") 74 | print(f" test rmse: {lasso_test['rmse']:.3f}") 75 | 76 | print("Adaptive Lasso performance metrics:") 77 | print(f" train r2: {alasso_train['r2']:.3f}") 78 | print(f" test r2: {alasso_test['r2']:.3f}") 79 | print(f" train rmse: {alasso_train['rmse']:.3f}") 80 | print(f" test rmse: {alasso_test['rmse']:.3f}") 81 | 82 | # plot model coefficients 83 | fig, ax = plt.subplots() 84 | ax.plot(coef, "o", label="True coefficients") 85 | ax.plot(lasso_cv.best_estimator_.coef_, "o", label="Lasso", alpha=0.5) 86 | ax.plot(alasso_cv.best_estimator_.coef_, "o", label="Adaptive Lasso", alpha=0.5) 87 | ax.set_xlabel("covariate index") 88 | ax.set_ylabel("coefficient value") 89 | ax.legend() 90 | fig.show() 91 | 92 | # plot predicted values 93 | fig, ax = plt.subplots() 94 | ax.plot(y_test, lasso_cv.predict(X_test), "o", label="lasso", alpha=0.5) 95 | ax.plot(y_test, alasso_cv.predict(X_test), "o", label="adaptive lasso", alpha=0.5) 96 | ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--") 97 | ax.set_xlabel("true values") 98 | ax.set_ylabel("predicted values") 99 | ax.legend() 100 | fig.show() 101 | -------------------------------------------------------------------------------- /examples/plot_chull.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================== 3 | Adding solution constraints 4 | =========================== 5 | 6 | **sparse-lm** allows including external solution constraints to the regression objective 7 | by exposing the underlying **cvxpy** problem objects. This is useful to solve regression 8 | problems with additional constraints, such as non-negativity. 9 | 10 | **NOTE**: That this functionality does not fully align with the requirements for 11 | compatible scikit-learn estimators, meaning that using an estimator with additional 12 | constraints added in a ski-kit learn pipeline or model selection is not supported. 13 | 14 | To show how to include constraints, we will solve a common problem in materials science: 15 | predicting the formation energy of many configurations of an alloy. In such problems, 16 | it is usually very important to ensure that the predicted formation energies for 17 | "ground-states" (i.e. energies that define the lower convex-hull of the energy vs 18 | composition graph) remain on the convex-hull. Similarly, it is often important to 19 | ensure that the predicted formation energies that are not "ground-states" in the 20 | training data remain above the predicted convex-hull. 21 | 22 | The example follows the methodology described in this paper: 23 | https://www.nature.com/articles/s41524-017-0032-0 24 | 25 | This example requires the **pymatgen** materials analysis package to be 26 | installed to easily plot convex-hulls: https://pymatgen.org/installation.html 27 | 28 | The training data used in this example is taken from this 29 | tutorial: https://icet.materialsmodeling.org/tutorial.zip for the 30 | **icet** cluster expansion Python package (https://icet.materialsmodeling.org/). 31 | """ 32 | 33 | import json 34 | 35 | import matplotlib.pyplot as plt 36 | import numpy as np 37 | import pymatgen.analysis.phase_diagram as pd 38 | from pymatgen.core import Structure 39 | from sklearn.linear_model import Lasso 40 | from sklearn.metrics import mean_squared_error 41 | 42 | from sparselm.model import L2L0 43 | 44 | # load training data 45 | X, y = np.load("corr.npy"), np.load("energy.npy") 46 | 47 | # load corresponding structure objects 48 | with open("structures.json") as fp: 49 | structures = json.load(fp) 50 | 51 | structures = [Structure.from_dict(s) for s in structures] 52 | 53 | # create regressors (the hyperparameters have already been tuned) 54 | lasso_regressor = Lasso(fit_intercept=True, alpha=1.29e-5) 55 | # alpha is the pseudo-l0 norm hyperparameter and eta is the l2-norm hyperparameter 56 | l2l0_regressor = L2L0( 57 | fit_intercept=True, 58 | alpha=3.16e-7, 59 | eta=1.66e-6, 60 | solver="GUROBI", 61 | solver_options={"Threads": 4}, 62 | ) 63 | 64 | # fit models 65 | lasso_regressor.fit(X, y) 66 | l2l0_regressor.fit(X, y) 67 | 68 | # create phase diagram entries with training data 69 | training_entries = [] 70 | for i, structure in enumerate(structures): 71 | corrs = X[ 72 | i 73 | ] # in this problem the features of a sample are referred to as correlation vectors 74 | energy = y[i] * len( 75 | structure 76 | ) # the energy must be scaled by size to create the phase diagram 77 | entry = pd.PDEntry( 78 | structure.composition, 79 | energy, 80 | attribute={"corrs": corrs, "size": len(structure)}, 81 | ) 82 | training_entries.append(entry) 83 | 84 | # plot the training (true) phase diagram 85 | training_pd = pd.PhaseDiagram(training_entries) 86 | pplotter = pd.PDPlotter(training_pd, backend="matplotlib", show_unstable=0) 87 | pplotter.show(label_unstable=False) 88 | 89 | # plot the phase diagram based on the energies predicted by the Lasso fit 90 | lasso_y = lasso_regressor.predict(X) 91 | lasso_pd = pd.PhaseDiagram( 92 | [ 93 | pd.PDEntry(s_i.composition, y_i * len(s_i)) 94 | for s_i, y_i in zip(structures, lasso_y) 95 | ] 96 | ) 97 | pplotter = pd.PDPlotter(lasso_pd, backend="matplotlib", show_unstable=0) 98 | pplotter.show(label_unstable=False) 99 | 100 | # plot the phase diagram based on the energies predicted by the L2L0 fit 101 | l2l0_y = l2l0_regressor.predict(X) 102 | l2l0_pd = pd.PhaseDiagram( 103 | [ 104 | pd.PDEntry(s_i.composition, y_i * len(s_i)) 105 | for s_i, y_i in zip(structures, l2l0_y) 106 | ] 107 | ) 108 | pplotter = pd.PDPlotter(l2l0_pd, backend="matplotlib", show_unstable=0) 109 | pplotter.show(label_unstable=False) 110 | 111 | # we notice that both the Lasso fit and the L2L0 fit miss the ground-state Ag5Pd3 112 | # and also add spurious ground-states not present in the training convex hull 113 | 114 | 115 | # create matrices for two types of contraints to keep the predicted hull unchanged 116 | # 1) keep non-ground states above the hull 117 | # 2) ensure ground-states stay on the hull 118 | 119 | # 1) compute the correlation matrix for unstable structures and 120 | # the weighted correlation matrix of the decomposition products 121 | X_unstable = np.zeros(shape=(len(training_pd.unstable_entries), X.shape[1])) 122 | X_decomp = np.zeros_like(X_unstable) 123 | for i, entry in enumerate(training_pd.unstable_entries): 124 | if entry.is_element: 125 | continue 126 | X_unstable[i] = entry.attribute["corrs"] 127 | decomp_entries, ehull = training_pd.get_decomp_and_e_above_hull(entry) 128 | for dentry, amount in decomp_entries.items(): 129 | ratio = ( 130 | amount 131 | * (entry.composition.num_atoms / dentry.composition.num_atoms) 132 | * dentry.attribute["size"] 133 | / entry.attribute["size"] 134 | ) 135 | X_decomp[i] += ratio * dentry.attribute["corrs"] 136 | 137 | # 2) compute the ground-state correlation matrix 138 | # and the weighted correlation matrix of decomposition products if the ground state was not a ground-state 139 | X_stable = np.zeros(shape=(len(training_pd.stable_entries), X.shape[1])) 140 | X_gsdecomp = np.zeros_like(X_stable) 141 | gs_pd = pd.PhaseDiagram(training_pd.stable_entries) 142 | for i, entry in enumerate(gs_pd.stable_entries): 143 | if entry.is_element: 144 | continue 145 | X_stable[i] = entry.attribute["corrs"] 146 | decomp_entries, ehull = gs_pd.get_decomp_and_phase_separation_energy(entry) 147 | for dentry, amount in decomp_entries.items(): 148 | ratio = ( 149 | amount 150 | * (entry.composition.num_atoms / dentry.composition.num_atoms) 151 | * dentry.attribute["size"] 152 | / entry.attribute["size"] 153 | ) 154 | X_gsdecomp[i] += ratio * dentry.attribute["corrs"] 155 | 156 | 157 | constrained_regressor = L2L0( 158 | fit_intercept=True, 159 | alpha=3.16e-7, 160 | eta=1.66e-6, 161 | solver="GUROBI", 162 | solver_options={"Threads": 4}, 163 | ) 164 | 165 | # now create the constraints by accessing the underlying cvxpy objects 166 | # if regressor.fit has not been called with the gigen data, we must call generate_problem to generate 167 | # the cvxpy objects that represent the regressino objective 168 | constrained_regressor.generate_problem(X, y) 169 | J = ( 170 | constrained_regressor.canonicals_.beta 171 | ) # this is the cvxpy variable representing the coefficients 172 | 173 | # 1) add constraint to keep unstable structures above hull, ie no new ground states 174 | epsilon = 0.0005 # solutions will be very sensitive to the size of this margin 175 | constrained_regressor.add_constraints([X_unstable @ J >= X_decomp @ J + epsilon]) 176 | 177 | # 2) add constraint to keep all ground-states on the hull 178 | epsilon = 1e-6 179 | constrained_regressor.add_constraints([X_stable @ J <= X_gsdecomp @ J - epsilon]) 180 | 181 | 182 | # fit the constrained regressor 183 | constrained_regressor.fit(X, y) 184 | 185 | # look at the phase diagram based on the energies predicted by the L2L0 fit 186 | l2l0c_y = constrained_regressor.predict(X) 187 | constrained_pd = pd.PhaseDiagram( 188 | [ 189 | pd.PDEntry(s_i.composition, y_i * len(s_i)) 190 | for s_i, y_i in zip(structures, l2l0c_y) 191 | ] 192 | ) 193 | pplotter = pd.PDPlotter(constrained_pd, backend="matplotlib", show_unstable=0) 194 | pplotter.show(label_unstable=False) 195 | # the constraints now force the fitted model to respect the trainind convex-hull 196 | 197 | # Plot the different estimated coefficients 198 | fig, ax = plt.subplots() 199 | ax.plot(lasso_regressor.coef_[1:]) 200 | ax.plot(l2l0_regressor.coef_[1:]) 201 | ax.plot(constrained_regressor.coef_[1:]) 202 | ax.set_xlabel("covariate index") 203 | ax.set_ylabel("coefficient value") 204 | ax.legend(["lasso", "l2l0", "l2l0 constrained"]) 205 | fig.show() 206 | 207 | # print the resulting training RMSE from the different fits 208 | lasso_rmse = np.sqrt(mean_squared_error(y, lasso_regressor.predict(X))) 209 | l2l0_rmse = np.sqrt(mean_squared_error(y, l2l0_regressor.predict(X))) 210 | l2l0c_rmse = np.sqrt(mean_squared_error(y, constrained_regressor.predict(X))) 211 | 212 | print(f"Lasso train RMSE: {lasso_rmse:.4f}") 213 | print(f"L2L0 train RMSE: {l2l0_rmse:.4f}") 214 | print(f"L2L0 with constraings train RMSE: {l2l0c_rmse:.4f}") 215 | -------------------------------------------------------------------------------- /examples/plot_gl_sgl.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================= 3 | (Sparse) Group regression 4 | ========================= 5 | 6 | This examples shows how to use group lasso and sparse group lasso to fit a simulated 7 | dataset with group-level sparsity and within-group sparsity. 8 | """ 9 | 10 | import warnings 11 | 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | from sklearn.linear_model import Lasso 15 | from sklearn.metrics import mean_squared_error, r2_score 16 | from sklearn.model_selection import GridSearchCV, KFold, train_test_split 17 | 18 | from sparselm.dataset import make_group_regression 19 | from sparselm.model import GroupLasso, SparseGroupLasso 20 | 21 | warnings.filterwarnings("ignore", category=UserWarning) # ignore convergence warnings 22 | 23 | # generate a dataset with group-level sparsity only 24 | X, y, groups, coefs = make_group_regression( 25 | n_samples=400, 26 | n_groups=10, 27 | n_features_per_group=10, 28 | n_informative_groups=5, 29 | frac_informative_in_group=1.0, 30 | bias=-10.0, 31 | noise=200.0, 32 | coef=True, 33 | random_state=0, 34 | ) 35 | 36 | # split data into train and test sets 37 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) 38 | 39 | # create estimators 40 | cv5 = KFold(n_splits=5, shuffle=True, random_state=0) 41 | lasso_cv = GridSearchCV( 42 | Lasso(fit_intercept=True), {"alpha": np.logspace(0, 2, 5)}, cv=cv5, n_jobs=-1 43 | ) 44 | lasso_cv.fit(X_train, y_train) 45 | glasso_cv = GridSearchCV( 46 | GroupLasso(groups=groups, fit_intercept=True), 47 | {"alpha": np.logspace(0, 2, 5)}, 48 | cv=cv5, 49 | n_jobs=-1, 50 | ) 51 | glasso_cv.fit(X_train, y_train) 52 | 53 | # Plot predicted values 54 | fig, ax = plt.subplots() 55 | ax.plot( 56 | y_test, glasso_cv.predict(X_test), marker="o", ls="", alpha=0.5, label="group lasso" 57 | ) 58 | ax.plot(y_test, lasso_cv.predict(X_test), marker="o", ls="", alpha=0.5, label="lasso") 59 | ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--") 60 | ax.legend() 61 | ax.set_xlabel("true values") 62 | ax.set_ylabel("predicted values") 63 | fig.show() 64 | 65 | # calculate model performance on test and train data 66 | lasso_train = { 67 | "r2": r2_score(y_train, lasso_cv.predict(X_train)), 68 | "rmse": np.sqrt(mean_squared_error(y_train, lasso_cv.predict(X_train))), 69 | } 70 | 71 | lasso_test = { 72 | "r2": r2_score(y_test, lasso_cv.predict(X_test)), 73 | "rmse": np.sqrt(mean_squared_error(y_test, lasso_cv.predict(X_test))), 74 | } 75 | 76 | glasso_train = { 77 | "r2": r2_score(y_train, glasso_cv.predict(X_train)), 78 | "rmse": np.sqrt(mean_squared_error(y_train, glasso_cv.predict(X_train))), 79 | } 80 | 81 | glasso_test = { 82 | "r2": r2_score(y_test, glasso_cv.predict(X_test)), 83 | "rmse": np.sqrt(mean_squared_error(y_test, glasso_cv.predict(X_test))), 84 | } 85 | 86 | print("------- Performance metrics for signal with group-level sparsity only -------\n") 87 | 88 | print("Lasso performance metrics:") 89 | print(f" train r2: {lasso_train['r2']:.3f}") 90 | print(f" test r2: {lasso_test['r2']:.3f}") 91 | print(f" train rmse: {lasso_train['rmse']:.3f}") 92 | print(f" test rmse: {lasso_test['rmse']:.3f}") 93 | 94 | print("Group Lasso performance metrics:") 95 | print(f" train r2: {glasso_train['r2']:.3f}") 96 | print(f" test r2: {glasso_test['r2']:.3f}") 97 | print(f" train rmse: {glasso_train['rmse']:.3f}") 98 | print(f" test rmse: {glasso_test['rmse']:.3f}") 99 | 100 | # generate a dataset with group-level sparsity and within-group sparsity 101 | X, y, groups, coefs = make_group_regression( 102 | n_samples=400, 103 | n_groups=10, 104 | n_features_per_group=10, 105 | n_informative_groups=5, 106 | frac_informative_in_group=0.5, 107 | bias=-10.0, 108 | noise=100.0, 109 | coef=True, 110 | random_state=0, 111 | ) 112 | 113 | # split data into train and test sets 114 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) 115 | 116 | glasso_cv = GridSearchCV( 117 | GroupLasso(groups=groups, fit_intercept=True), 118 | {"alpha": np.logspace(0, 2, 5)}, 119 | cv=cv5, 120 | n_jobs=-1, 121 | ) 122 | sglasso_cv = GridSearchCV( 123 | SparseGroupLasso(groups=groups, fit_intercept=True), 124 | {"alpha": np.logspace(0, 2, 5), "l1_ratio": np.arange(0.3, 0.8, 0.1)}, 125 | cv=cv5, 126 | n_jobs=-1, 127 | ) 128 | glasso_cv.fit(X_train, y_train) 129 | sglasso_cv.fit(X_train, y_train) 130 | 131 | # Plot predicted values 132 | fig, ax = plt.subplots() 133 | ax.plot( 134 | y_test, glasso_cv.predict(X_test), marker="o", ls="", alpha=0.5, label="group lasso" 135 | ) 136 | ax.plot( 137 | y_test, 138 | sglasso_cv.predict(X_test), 139 | marker="o", 140 | ls="", 141 | alpha=0.5, 142 | label="sparse group lasso", 143 | ) 144 | ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--") 145 | ax.legend() 146 | ax.set_xlabel("true values") 147 | ax.set_ylabel("predicted values") 148 | fig.show() 149 | 150 | # calculate model performance on test and train data 151 | glasso_train = { 152 | "r2": r2_score(y_train, glasso_cv.predict(X_train)), 153 | "rmse": np.sqrt(mean_squared_error(y_train, glasso_cv.predict(X_train))), 154 | } 155 | 156 | glasso_test = { 157 | "r2": r2_score(y_test, glasso_cv.predict(X_test)), 158 | "rmse": np.sqrt(mean_squared_error(y_test, glasso_cv.predict(X_test))), 159 | } 160 | 161 | sglasso_train = { 162 | "r2": r2_score(y_train, sglasso_cv.predict(X_train)), 163 | "rmse": np.sqrt(mean_squared_error(y_train, sglasso_cv.predict(X_train))), 164 | } 165 | 166 | sglasso_test = { 167 | "r2": r2_score(y_test, sglasso_cv.predict(X_test)), 168 | "rmse": np.sqrt(mean_squared_error(y_test, sglasso_cv.predict(X_test))), 169 | } 170 | 171 | 172 | print( 173 | "------- Performance metrics for signal with group and within group sparsity -------\n" 174 | ) 175 | 176 | print("Group Lasso performance metrics:") 177 | print(f" train r2: {glasso_train['r2']:.3f}") 178 | print(f" test r2: {glasso_test['r2']:.3f}") 179 | print(f" train rmse: {glasso_train['rmse']:.3f}") 180 | print(f" test rmse: {glasso_test['rmse']:.3f}") 181 | 182 | print("Sparse Group Lasso performance metrics:") 183 | print(f" train r2: {sglasso_train['r2']:.3f}") 184 | print(f" test r2: {sglasso_test['r2']:.3f}") 185 | print(f" train rmse: {sglasso_train['rmse']:.3f}") 186 | print(f" test rmse: {sglasso_test['rmse']:.3f}") 187 | -------------------------------------------------------------------------------- /examples/plot_line_search.py: -------------------------------------------------------------------------------- 1 | """ 2 | ======================================= 3 | Tuning hyperparameters with line search 4 | ======================================= 5 | 6 | Line search can typically be used in optimizing regressors with multiple weakly or 7 | uncorrelated hyperparameters. 8 | 9 | This example also showcases the usage of mixed L0 regressor where using a standard 10 | grid search can be too computationally expensive.. 11 | """ 12 | 13 | import numpy as np 14 | from sklearn.datasets import make_regression 15 | from sklearn.metrics import mean_squared_error, r2_score 16 | from sklearn.model_selection import KFold, train_test_split 17 | 18 | from sparselm.model import L2L0 19 | from sparselm.model_selection import LineSearchCV 20 | 21 | X, y, coef = make_regression( 22 | n_samples=60, 23 | n_features=30, 24 | n_informative=8, 25 | noise=40.0, 26 | bias=-15.0, 27 | coef=True, 28 | random_state=0, 29 | ) 30 | 31 | X_train, X_test, y_train, y_test = train_test_split( 32 | X, y, test_size=0.25, random_state=0 33 | ) 34 | 35 | # create an l2l0 estimator. 36 | # Groups for parameters must be provided each coefficient is in a singleton group. 37 | groups = np.arange(30, dtype=int) 38 | l2l0 = L2L0(groups, fit_intercept=True, solver="GUROBI", solver_options={"Threads": 4}) 39 | 40 | # create cv search objects for each estimator 41 | cv5 = KFold(n_splits=5, shuffle=True, random_state=0) 42 | # LineSearchCV requires the parameters grid to be provided in a list of tuple format, 43 | # with order of parameters in the list being the order of them getting searched per 44 | # iteration. 45 | # The following example specifies the parameter alpha to be scanned first, then the 46 | # parameter eta. 47 | params = [("alpha", np.logspace(-6, 1, 5)), ("eta", np.logspace(-7, -1, 5))] 48 | 49 | l2l0_cv = LineSearchCV(l2l0, params, cv=cv5, n_jobs=4) 50 | 51 | # fit models on training data 52 | l2l0_cv.fit(X_train, y_train) 53 | 54 | # calculate model performance on test and train data 55 | l2l0_train = { 56 | "r2": r2_score(y_train, l2l0_cv.predict(X_train)), 57 | "rmse": np.sqrt(mean_squared_error(y_train, l2l0_cv.predict(X_train))), 58 | } 59 | 60 | l2l0_test = { 61 | "r2": r2_score(y_test, l2l0_cv.predict(X_test)), 62 | "rmse": np.sqrt(mean_squared_error(y_test, l2l0_cv.predict(X_test))), 63 | } 64 | 65 | print("Performance metrics:") 66 | print(f" train r2: {l2l0_train['r2']:.3f}") 67 | print(f" test r2: {l2l0_test['r2']:.3f}") 68 | print(f" train rmse: {l2l0_train['rmse']:.3f}") 69 | print(f" test rmse: {l2l0_test['rmse']:.3f}") 70 | -------------------------------------------------------------------------------- /examples/plot_one_std.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================================= 3 | Hyperparameters selection with 1-std rule 4 | ========================================= 5 | 6 | One-standard-deviation rule is a technique to promote model robustness when 7 | cross validation results are noisy. The hyperparameter is chosen to 8 | be equal to the maximum value that yields: 9 | CV = minimum CV + 1 * std(CV at minimum). 10 | 11 | One-standard-deviation rule is available in both GridSearchCV and LineSearchCV 12 | under sparselm.model_selection. 13 | """ 14 | 15 | import matplotlib.pyplot as plt 16 | import numpy as np 17 | from sklearn.datasets import make_regression 18 | from sklearn.linear_model import Lasso 19 | from sklearn.metrics import mean_squared_error, r2_score 20 | from sklearn.model_selection import KFold, train_test_split 21 | 22 | from sparselm.model_selection import GridSearchCV 23 | 24 | X, y, coef = make_regression( 25 | n_samples=200, 26 | n_features=100, 27 | n_informative=10, 28 | noise=40.0, 29 | bias=-15.0, 30 | coef=True, 31 | random_state=0, 32 | ) 33 | 34 | X_train, X_test, y_train, y_test = train_test_split( 35 | X, y, test_size=0.25, random_state=0 36 | ) 37 | 38 | # create estimators 39 | lasso = Lasso(fit_intercept=True) 40 | 41 | # create cv search objects for each estimator 42 | cv5 = KFold(n_splits=5, shuffle=True, random_state=0) 43 | params = {"alpha": np.logspace(-1, 1.5, 20)} 44 | 45 | lasso_cv_std = GridSearchCV( 46 | lasso, params, opt_selection_method="one_std_score", cv=cv5, n_jobs=-1 47 | ) 48 | lasso_cv_opt = GridSearchCV( 49 | lasso, params, opt_selection_method="max_score", cv=cv5, n_jobs=-1 50 | ) 51 | 52 | # fit models on training data 53 | lasso_cv_std.fit(X_train, y_train) 54 | lasso_cv_opt.fit(X_train, y_train) 55 | 56 | # calculate model performance on test and train data 57 | lasso_std_train = { 58 | "r2": r2_score(y_train, lasso_cv_std.predict(X_train)), 59 | "rmse": np.sqrt(mean_squared_error(y_train, lasso_cv_std.predict(X_train))), 60 | } 61 | 62 | lasso_std_test = { 63 | "r2": r2_score(y_test, lasso_cv_std.predict(X_test)), 64 | "rmse": np.sqrt(mean_squared_error(y_test, lasso_cv_std.predict(X_test))), 65 | } 66 | 67 | print("Lasso with 1-std:") 68 | print(f" alpha value: {lasso_cv_std.best_params_['alpha']}") 69 | print(f" train r2: {lasso_std_train['r2']:.3f}") 70 | print(f" test r2: {lasso_std_test['r2']:.3f}") 71 | print(f" train rmse: {lasso_std_train['rmse']:.3f}") 72 | print(f" test rmse: {lasso_std_test['rmse']:.3f}") 73 | print(f" sparsity: {sum(abs(lasso_cv_std.best_estimator_.coef_) > 1E-8)}") 74 | 75 | lasso_opt_train = { 76 | "r2": r2_score(y_train, lasso_cv_opt.predict(X_train)), 77 | "rmse": np.sqrt(mean_squared_error(y_train, lasso_cv_opt.predict(X_train))), 78 | } 79 | 80 | lasso_opt_test = { 81 | "r2": r2_score(y_test, lasso_cv_opt.predict(X_test)), 82 | "rmse": np.sqrt(mean_squared_error(y_test, lasso_cv_opt.predict(X_test))), 83 | } 84 | 85 | print("Lasso performance:") 86 | print(f" alpha value: {lasso_cv_std.best_params_['alpha']}") 87 | print(f" train r2: {lasso_opt_train['r2']:.3f}") 88 | print(f" test r2: {lasso_opt_test['r2']:.3f}") 89 | print(f" train rmse: {lasso_opt_train['rmse']:.3f}") 90 | print(f" test rmse: {lasso_opt_test['rmse']:.3f}") 91 | print(f" sparsity: {sum(abs(lasso_cv_opt.best_estimator_.coef_) > 1E-8)}") 92 | 93 | # plot cross validation scores 94 | fig, ax = plt.subplots() 95 | ax.plot( 96 | lasso_cv_std.cv_results_["param_alpha"].data, 97 | -lasso_cv_std.cv_results_["mean_test_score"], 98 | "o-", 99 | label="One std", 100 | ) 101 | ax.plot( 102 | lasso_cv_std.cv_results_["param_alpha"].data, 103 | -lasso_cv_opt.cv_results_["mean_test_score"] 104 | + lasso_cv_std.cv_results_["std_test_score"], 105 | "k--", 106 | alpha=0.5, 107 | ) 108 | ax.plot( 109 | lasso_cv_std.cv_results_["param_alpha"].data, 110 | -lasso_cv_opt.cv_results_["mean_test_score"] 111 | - lasso_cv_std.cv_results_["std_test_score"], 112 | "k--", 113 | alpha=0.5, 114 | ) 115 | ax.set_xlabel("alpha") 116 | ax.set_ylabel("rmse") 117 | ax.legend(["mean", "std"]) 118 | fig.show() 119 | 120 | # plot model coefficients 121 | fig, ax = plt.subplots() 122 | ax.plot(coef, "o", label="True coefficients") 123 | ax.plot(lasso_cv_std.best_estimator_.coef_, "o", label="One std", alpha=0.5) 124 | ax.plot(lasso_cv_opt.best_estimator_.coef_, "o", label="Max score", alpha=0.5) 125 | ax.set_xlabel("covariate index") 126 | ax.set_ylabel("coefficient value") 127 | ax.legend() 128 | fig.show() 129 | -------------------------------------------------------------------------------- /examples/plot_sparse_signal.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================= 3 | Recovering sparse signals 4 | ========================= 5 | 6 | In this example we compare the results obtained from `BestSubsetSelection` with 7 | those obtained using the `OrthogonalMatchingPursuit` regressor from **scikit-learn**. 8 | 9 | Note that although using best subset selection tend to give more accurate results, 10 | `OrthogonalMatchingPursuit` scales much better to larger problems. 11 | 12 | This example is adapted from the scikit-learn documentation: 13 | https://scikit-learn.org/stable/auto_examples/linear_model/plot_omp.html#sphx-glr-auto-examples-linear-model-plot-omp-py 14 | """ 15 | 16 | import matplotlib.pyplot as plt 17 | import numpy as np 18 | from sklearn.datasets import make_sparse_coded_signal 19 | from sklearn.linear_model import OrthogonalMatchingPursuit 20 | 21 | from sparselm.model import BestSubsetSelection 22 | 23 | n_components, n_features = 50, 20 24 | n_nonzero_coefs = 8 25 | 26 | # generate the data 27 | y, X, w = make_sparse_coded_signal( 28 | n_samples=1, 29 | n_components=n_components, 30 | n_features=n_features, 31 | n_nonzero_coefs=n_nonzero_coefs, 32 | random_state=0, 33 | ) 34 | X = X.T 35 | (idx,) = w.nonzero() 36 | 37 | # distort the clean signal 38 | y_noisy = y + 0.005 * np.random.randn(len(y)) 39 | 40 | # plot the sparse signal 41 | plt.figure(figsize=(14, 7)) 42 | plt.subplot(3, 2, (1, 2)) 43 | plt.xlim(0, n_components) 44 | plt.title("Sparse signal") 45 | plt.stem(idx, w[idx]) 46 | 47 | # plot the noise-free reconstruction 48 | omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs) 49 | omp.fit(X, y) 50 | coef = omp.coef_ 51 | (idx_r,) = coef.nonzero() 52 | plt.subplot(3, 2, 3) 53 | plt.xlim(0, n_components) 54 | plt.title("Orthogonal Matching Pursuit (noise-free measurements)") 55 | plt.stem(idx_r, coef[idx_r]) 56 | 57 | bss = BestSubsetSelection( 58 | sparse_bound=n_nonzero_coefs, solver="GUROBI", solver_options={"Threads": 8} 59 | ) 60 | bss.fit(X, y) 61 | coef = bss.coef_ 62 | (idx_r,) = coef.nonzero() 63 | plt.subplot(3, 2, 4) 64 | plt.xlim(0, n_components) 65 | plt.title("Best Subset Selection (noise-free measurements)") 66 | plt.stem(idx_r, coef[idx_r]) 67 | 68 | # plot the noisy reconstruction 69 | omp.fit(X, y_noisy) 70 | coef = omp.coef_ 71 | (idx_r,) = coef.nonzero() 72 | plt.subplot(3, 2, 5) 73 | plt.xlim(0, n_components) 74 | plt.title("Orthogonal Matching Pursuit recovery (noisy measurements)") 75 | plt.stem(idx_r, coef[idx_r]) 76 | 77 | bss.fit(X, y_noisy) 78 | coef = bss.coef_ 79 | (idx_r,) = coef.nonzero() 80 | plt.subplot(3, 2, 6) 81 | plt.xlim(0, n_components) 82 | plt.title("Best Subset Selection (noisy measurements)") 83 | plt.stem(idx_r, coef[idx_r]) 84 | 85 | plt.tight_layout() 86 | plt.show() 87 | -------------------------------------------------------------------------------- /examples/plot_stepwise.py: -------------------------------------------------------------------------------- 1 | """ 2 | ======================== 3 | Using stepwise estimator 4 | ======================== 5 | 6 | Stepwise estimator can be used to implement stepwise fitting. It comprises several 7 | regressor, each responsible for fitting specific rows of the feature matrix to 8 | the target vector and passing the residual values down to be fitted by the subsequent 9 | regressors. 10 | 11 | This example is purely for demonstration purpose and we do not expect any meaningful 12 | performance improvement. 13 | 14 | However, stepwise fitting can be useful in certain problems where groups of covariates 15 | have substantially different effects on the target vector. 16 | 17 | For example, in fitting the atomic configration energy of an crystalline solid using a 18 | cluster expansion of an ionic system, one might want to fit the energy to single site 19 | features first then subtract those main effects from the target, and fit the residual 20 | of energy to other cluster interactions. 21 | """ 22 | 23 | import matplotlib.pyplot as plt 24 | import numpy as np 25 | from sklearn.datasets import make_regression 26 | from sklearn.linear_model import Lasso, Ridge 27 | from sklearn.metrics import mean_squared_error, r2_score 28 | from sklearn.model_selection import KFold, train_test_split 29 | 30 | from sparselm.model_selection import GridSearchCV 31 | from sparselm.stepwise import StepwiseEstimator 32 | 33 | X, y, coef = make_regression( 34 | n_samples=200, 35 | n_features=100, 36 | n_informative=10, 37 | noise=40.0, 38 | bias=-15.0, 39 | coef=True, 40 | random_state=0, 41 | ) 42 | 43 | X_train, X_test, y_train, y_test = train_test_split( 44 | X, y, test_size=0.25, random_state=0 45 | ) 46 | 47 | # Create estimators for each step. 48 | # Only the first estimator is allowed to fit_intercept! 49 | ridge = Ridge(fit_intercept=True) 50 | lasso = Lasso(fit_intercept=False) 51 | cv5 = KFold(n_splits=5, shuffle=True, random_state=0) 52 | params = {"alpha": np.logspace(-1, 1, 10)} 53 | estimator1 = GridSearchCV(ridge, params, cv=cv5, n_jobs=-1) 54 | estimator2 = GridSearchCV(lasso, params, cv=cv5, n_jobs=-1) 55 | 56 | # Create a StepwiseEstimator. It can be composed of either 57 | # regressors or GridSearchCV and LineSearchCV optimizers. 58 | # In this case, we first fit the target vector to the first 3 59 | # and the last feature, then fit the residual vector to the rest 60 | # of the features with GridSearchCV to optimize the Lasso 61 | # hyperparameter. 62 | stepwise = StepwiseEstimator( 63 | [("est", estimator1), ("est2", estimator2)], ((0, 1, 2, 99), tuple(range(3, 99))) 64 | ) 65 | 66 | # fit models on training data 67 | stepwise.fit(X_train, y_train) 68 | 69 | # calculate model performance on test and train data 70 | stepwise_train = { 71 | "r2": r2_score(y_train, stepwise.predict(X_train)), 72 | "rmse": np.sqrt(mean_squared_error(y_train, stepwise.predict(X_train))), 73 | } 74 | 75 | stepwise_test = { 76 | "r2": r2_score(y_test, stepwise.predict(X_test)), 77 | "rmse": np.sqrt(mean_squared_error(y_test, stepwise.predict(X_test))), 78 | } 79 | 80 | print("Lasso performance metrics:") 81 | print(f" train r2: {stepwise_train['r2']:.3f}") 82 | print(f" test r2: {stepwise_test['r2']:.3f}") 83 | print(f" train rmse: {stepwise_train['rmse']:.3f}") 84 | print(f" test rmse: {stepwise_test['rmse']:.3f}") 85 | 86 | # plot model coefficients 87 | fig, ax = plt.subplots() 88 | ax.plot(coef, "o", label="True coefficients") 89 | ax.plot(stepwise.coef_[[0, 1, 2, 99]], "o", label="Stepwise (ridge)", alpha=0.5) 90 | ax.plot(stepwise.coef_[range(3, 99)], "o", label="Stepwise (lasso)", alpha=0.5) 91 | ax.set_xlabel("covariate index") 92 | ax.set_ylabel("coefficient value") 93 | ax.legend() 94 | fig.show() 95 | 96 | # plot predicted values 97 | fig, ax = plt.subplots() 98 | ax.plot(y_test, stepwise.predict(X_test), "o", label="Stepwise", alpha=0.5) 99 | ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--") 100 | ax.set_xlabel("true values") 101 | ax.set_ylabel("predicted values") 102 | ax.legend() 103 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=45", "setuptools-scm[toml]>=6.2"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "sparse-lm" 7 | description = "Sparse linear regression models" 8 | authors = [ 9 | {name = "Luis Barroso-Luque", email = "lbluque@berkeley.edu"} 10 | ] 11 | readme = "README.md" 12 | license = {text = "BSD 3-Clause License"} 13 | dynamic = ["version"] 14 | dependencies = [ 15 | "numpy >=1.23", "cvxpy >=1.2", "scikit-learn >=1.2.1", 16 | "scipy >=1.9", "joblib" 17 | ] 18 | classifiers = [ 19 | "Development Status :: 3 - Alpha", 20 | "Programming Language :: Python :: 3 :: Only", 21 | "Programming Language :: Python :: 3.9", 22 | "Programming Language :: Python :: 3.10", 23 | "Intended Audience :: Science/Research", 24 | "License :: OSI Approved :: BSD License", 25 | "Operating System :: OS Independent", 26 | "Topic :: Scientific/Engineering :: Information Analysis", 27 | "Topic :: Scientific/Engineering :: Mathematics", 28 | "Topic :: Software Development :: Libraries :: Python Modules" 29 | ] 30 | 31 | [project.optional-dependencies] 32 | dev = ["pre-commit", "black", "isort", "flake8", "pylint", "pydocstyle", "flake8-pyproject"] 33 | # Gurobipy needed by mixedL0 tests, pandas needed by sklearn convention checks. 34 | tests = ["pytest >=7.2.0", "pytest-cov >=4.0.0", "coverage", "pandas", "gurobipy", "pyscipopt"] 35 | docs = ["sphinx>=5.3", "furo", "m2r2", "sphinx-gallery", "matplotlib", "gurobipy", "pymatgen"] 36 | optional = ["gurobipy"] 37 | 38 | # pyproject.toml 39 | [tool.setuptools_scm] 40 | 41 | # linting tools, etc 42 | [tool.pytest.ini_options] 43 | minversion = "6.0" 44 | addopts = "-x --durations = 30 --quiet -rxXs --color = yes" 45 | filterwarnings = [ 46 | 'ignore::UserWarning', 47 | 'ignore::FutureWarning', 48 | 'ignore::RuntimeWarning' 49 | ] 50 | 51 | [tool.flake8] 52 | exclude = ['docs', 'tests'] 53 | ignore = ['E203', 'E501', 'W503'] 54 | max-line-length = 88 55 | 56 | [tool.pylint.main] 57 | ignore = ["tests"] 58 | 59 | [tool.pylint.basic] 60 | argument-naming-style = "snake_case" 61 | attr-naming-style = "snake_case" 62 | method-naming-style = "snake_case" 63 | function-naming-style = "snake_case" 64 | class-naming-style = "PascalCase" 65 | good-names = ['id', 'kB', 'i', 'j', 'k', 'f'] 66 | 67 | [too.pylint."messages control"] 68 | disable = ['W0511', 'R0904', 'R0903', 'R0913', 'R0902', 'R0914', 'C0415'] 69 | 70 | [tool.codespell] 71 | skip = "*.c,./.*" 72 | count = '' 73 | quiet-level = 3 74 | ignore-words-list = ['nd', 'tread'] 75 | 76 | [tool.coverage.run] 77 | source = ["src/sparselm"] 78 | omit = ["*/__init__.py"] 79 | 80 | [tool.pydocstyle] 81 | convention = "google" 82 | add_ignore = ["D107"] 83 | 84 | [[tool.mypy.overrides]] 85 | module = ["sklearn.*", "scipy.linalg"] 86 | ignore_missing_imports = true 87 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy >=1.23 2 | cvxpy >=1.2 3 | scikit-learn > 1.2 4 | scipy >=1.9 5 | joblib 6 | -------------------------------------------------------------------------------- /src/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scikit-learn 3 | cvxpy 4 | scipy 5 | joblib 6 | -------------------------------------------------------------------------------- /src/sparselm/__init__.py: -------------------------------------------------------------------------------- 1 | """Classes implementing generalized linear regression Regressors.""" 2 | 3 | from importlib.metadata import PackageNotFoundError, version 4 | 5 | try: 6 | __version__ = version("sparse-lm") 7 | except PackageNotFoundError: 8 | # package is not installed 9 | __version__ = "" 10 | -------------------------------------------------------------------------------- /src/sparselm/_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CederGroupHub/sparse-lm/220cbbad4a5ac98d1a52326c525aadb95f2c5b18/src/sparselm/_utils/__init__.py -------------------------------------------------------------------------------- /src/sparselm/_utils/validation.py: -------------------------------------------------------------------------------- 1 | """Data and hyper-parameters validation utilities.""" 2 | 3 | from __future__ import annotations 4 | 5 | import numpy as np 6 | from numpy.typing import NDArray 7 | 8 | 9 | def _check_groups( 10 | groups: NDArray[np.floating | np.integer] | list[int | float] | None, 11 | n_features: int, 12 | ) -> None: 13 | """Check that groups are 1D and of the correct length. 14 | 15 | Args: 16 | groups (NDArray): 17 | List of group labels 18 | n_features (int): 19 | Number of features/covariates being fit 20 | 21 | """ 22 | if groups is None: 23 | return 24 | 25 | if not isinstance(groups, (list, np.ndarray)): 26 | raise TypeError("groups must be a list or ndarray") 27 | 28 | groups = np.asarray(groups).astype(int) 29 | if groups.ndim != 1: 30 | raise ValueError("groups must be a 1D array") 31 | 32 | if len(groups) != n_features: 33 | raise ValueError( 34 | f"groups must be the same length as the number of features {n_features}" 35 | ) 36 | 37 | 38 | def _check_group_weights( 39 | group_weights: NDArray[np.floating] | None, n_groups: int 40 | ) -> None: 41 | """Check that group weights are 1D and of the correct length. 42 | 43 | Args: 44 | group_weights (NDArray): 45 | List of group weights 46 | n_groups (int): 47 | Number of groups 48 | """ 49 | if group_weights is None: 50 | return 51 | 52 | if not isinstance(group_weights, (list, np.ndarray)): 53 | raise TypeError("group_weights must be a list or ndarray") 54 | 55 | group_weights = np.asarray(group_weights) 56 | if len(group_weights) != n_groups: 57 | raise ValueError( 58 | f"group_weights must be the same length as the number of groups {len(group_weights)} != {n_groups}" 59 | ) 60 | -------------------------------------------------------------------------------- /src/sparselm/dataset.py: -------------------------------------------------------------------------------- 1 | """Generate synthemetic datasets akin to sklearn.datasets.""" 2 | 3 | from __future__ import annotations 4 | 5 | import warnings 6 | from typing import Sequence 7 | 8 | import numpy as np 9 | from numpy.random import RandomState 10 | from sklearn.datasets import make_regression 11 | from sklearn.utils import check_random_state 12 | 13 | 14 | def make_group_regression( 15 | n_samples: int = 100, 16 | n_groups: int = 20, 17 | n_features_per_group: int | Sequence = 10, 18 | n_informative_groups: int = 5, 19 | frac_informative_in_group: float = 1.0, 20 | bias: float = 0.0, 21 | effective_rank: int | None = None, 22 | tail_strength: float = 0.5, 23 | noise: float = 0.0, 24 | shuffle: bool = True, 25 | coef: bool = False, 26 | random_state: int | RandomState | None = None, 27 | ) -> tuple[np.ndarray, ...]: 28 | """Generate a random regression problem with grouped covariates. 29 | 30 | Args: 31 | n_samples (int, optional): 32 | Number of samples to generate. 33 | n_groups (int, optional): 34 | Number of groups to generate. 35 | n_features_per_group (int | Sequence, optional): 36 | Number of features per group to generate. If a sequence is passed the 37 | length must be equal to n_groups then each element will be the number of 38 | features in the corresponding group. 39 | n_informative_groups (int, optional): 40 | Number of informative groups. 41 | frac_informative_in_group (float, optional): 42 | Fraction of informative features in each group 43 | The number of features will be rounded to nearest int. 44 | bias (float, optional): 45 | Bias added to the decision function. 46 | effective_rank ([type], optional): 47 | Approximate number of singular vectors 48 | required to explain most of the input data by linear combinations. 49 | tail_strength (float, optional): 50 | Relative importance of the fat noisy tail 51 | of the singular values profile if `effective_rank` is not None. 52 | noise (float, optional): 53 | Standard deviation of the gaussian noise applied to the output. 54 | shuffle (bool, optional): 55 | Shuffle the samples and the features. Defaults to True. 56 | coef (bool, optional): 57 | If True, the coefficients of the underlying linear model are returned. 58 | random_state ([type], optional): 59 | Random state for dataset generation. 60 | 61 | Returns: 62 | tuple[np.ndarray, np.ndarray, np.ndarray, ...]: 63 | X, y, groups, coefficients (optional) 64 | """ 65 | generator = check_random_state(random_state) 66 | 67 | informative_groups = list(range(n_informative_groups)) 68 | 69 | if isinstance(n_features_per_group, int): 70 | n_features = n_features_per_group * n_groups 71 | n_informative_in_group = round(frac_informative_in_group * n_features_per_group) 72 | n_informative = n_informative_in_group * n_informative_groups 73 | # make n_features_per_group a list of length n_groups 74 | n_features_per_group = [n_features_per_group] * n_groups 75 | n_informative_per_group = [n_informative_in_group] * n_informative_groups 76 | else: 77 | if len(n_features_per_group) == n_groups: 78 | n_features = sum(n_features_per_group) 79 | n_informative_per_group = [ 80 | round(frac_informative_in_group * n_features_per_group[i]) 81 | for i in informative_groups 82 | ] 83 | n_informative = sum(n_informative_per_group) 84 | else: 85 | raise ValueError( 86 | "If passing a sequence of n_features_per_group, the length must be " 87 | "equal to n_groups." 88 | ) 89 | 90 | if any(n < 1 for n in n_informative_per_group): 91 | warnings.warn( 92 | "The number of features and fraction of informative features per group resulted in " 93 | "informative groups having no informative features.", 94 | UserWarning, 95 | ) 96 | 97 | X, y, coefs = make_regression( 98 | n_samples=n_samples, 99 | n_features=n_features, 100 | n_informative=n_informative, 101 | bias=bias, 102 | effective_rank=effective_rank, 103 | tail_strength=tail_strength, 104 | noise=noise, 105 | shuffle=shuffle, 106 | coef=True, 107 | random_state=generator, 108 | ) 109 | 110 | # assign coefficients to groups 111 | groups = np.zeros(n_features, dtype=int) 112 | informative_coef_inds = np.nonzero(coefs > noise)[0].tolist() 113 | other_coef_inds = np.nonzero(coefs <= noise)[0].tolist() 114 | 115 | for i, nfg in enumerate(n_features_per_group): 116 | if i in informative_groups: 117 | nifg = n_informative_per_group[informative_groups.index(i)] 118 | ii = informative_coef_inds[:nifg] + other_coef_inds[: nfg - nifg] 119 | # remove assigned indices 120 | informative_coef_inds = informative_coef_inds[nifg:] 121 | other_coef_inds = other_coef_inds[nfg - nifg :] 122 | else: 123 | ii = other_coef_inds[:nfg] 124 | other_coef_inds = other_coef_inds[nfg:] 125 | 126 | # assign group ids 127 | groups[ii] = i 128 | 129 | if shuffle: 130 | indices = np.arange(n_features) 131 | generator.shuffle(indices) 132 | X[:, :] = X[:, indices] 133 | groups = groups[indices] 134 | coefs = coefs[indices] 135 | 136 | if coef: 137 | return X, y, groups, coefs 138 | else: 139 | return X, y, groups 140 | -------------------------------------------------------------------------------- /src/sparselm/model/__init__.py: -------------------------------------------------------------------------------- 1 | """Classes implementing generalized linear regression Regressors.""" 2 | 3 | from ._adaptive_lasso import ( 4 | AdaptiveGroupLasso, 5 | AdaptiveLasso, 6 | AdaptiveOverlapGroupLasso, 7 | AdaptiveRidgedGroupLasso, 8 | AdaptiveSparseGroupLasso, 9 | ) 10 | from ._lasso import ( 11 | GroupLasso, 12 | Lasso, 13 | OverlapGroupLasso, 14 | RidgedGroupLasso, 15 | SparseGroupLasso, 16 | ) 17 | from ._miqp import ( 18 | L1L0, 19 | L2L0, 20 | BestSubsetSelection, 21 | RegularizedL0, 22 | RidgedBestSubsetSelection, 23 | ) 24 | from ._ols import OrdinaryLeastSquares 25 | 26 | __all__ = [ 27 | "OrdinaryLeastSquares", 28 | "Lasso", 29 | "BestSubsetSelection", 30 | "RidgedBestSubsetSelection", 31 | "RegularizedL0", 32 | "L1L0", 33 | "L2L0", 34 | "GroupLasso", 35 | "OverlapGroupLasso", 36 | "SparseGroupLasso", 37 | "RidgedGroupLasso", 38 | "AdaptiveLasso", 39 | "AdaptiveGroupLasso", 40 | "AdaptiveOverlapGroupLasso", 41 | "AdaptiveSparseGroupLasso", 42 | "AdaptiveRidgedGroupLasso", 43 | ] 44 | -------------------------------------------------------------------------------- /src/sparselm/model/_base.py: -------------------------------------------------------------------------------- 1 | """Base classes for in-house linear regression Regressors. 2 | 3 | The classes make use of and follow the scikit-learn API. 4 | """ 5 | 6 | from __future__ import annotations 7 | 8 | __author__ = "Luis Barroso-Luque, Fengyu Xie" 9 | 10 | import warnings 11 | from abc import ABCMeta, abstractmethod 12 | from collections.abc import Sequence 13 | from numbers import Integral 14 | from types import SimpleNamespace 15 | from typing import Any, NamedTuple 16 | 17 | import cvxpy as cp 18 | import numpy as np 19 | from numpy.typing import NDArray 20 | from sklearn.base import RegressorMixin 21 | from sklearn.linear_model._base import ( 22 | LinearModel, 23 | _check_sample_weight, 24 | _preprocess_data, 25 | _rescale_data, 26 | ) 27 | from sklearn.utils._param_validation import ( 28 | Interval, 29 | Options, 30 | _ArrayLikes, 31 | _Booleans, 32 | _InstancesOf, 33 | make_constraint, 34 | validate_parameter_constraints, 35 | ) 36 | 37 | 38 | class CVXCanonicals(NamedTuple): 39 | """CVXpy Canonical objects representing the underlying optimization problem. 40 | 41 | Attributes: 42 | objective (cp.Problem): 43 | Objective function. 44 | objective (cp.Expression): 45 | Objective function. 46 | beta (cp.Variable): 47 | Variable to be optimized (corresponds to the estimated coef_ attribute). 48 | parameters (SimpleNamespace of cp.Parameter or NDArray): 49 | SimpleNamespace with named cp.Parameter objects or NDArray of parameters. 50 | The namespace should be defined by the Regressor generating it. 51 | auxiliaries (SimpleNamespace of cp.Variable or cp.Expression): 52 | SimpleNamespace with auxiliary cp.Variable or cp.Expression objects. 53 | The namespace should be defined by the Regressor generating it. 54 | constraints (list of cp.Constaint): 55 | List of constraints intrinsic to regression problem. 56 | user_constraints (list of cp.Constaint): 57 | List of user-defined constraints. 58 | """ 59 | 60 | problem: cp.Problem 61 | objective: cp.Expression 62 | beta: cp.Variable 63 | parameters: SimpleNamespace | None 64 | auxiliaries: SimpleNamespace | None 65 | constraints: list[cp.Constraint] 66 | user_constraints: list[cp.Constraint] 67 | 68 | 69 | class CVXRegressor(RegressorMixin, LinearModel, metaclass=ABCMeta): 70 | r"""Abstract base class for Regressors using cvxpy with a sklearn interface. 71 | 72 | Note cvxpy can use one of many 3rd party solvers, default is most often 73 | CVXOPT or ECOS. For integer and mixed integer problems options include 74 | SCIP (open source) and Gurobi, among other commercial solvers. 75 | 76 | The solver can be specified by setting the solver keyword argument. 77 | And can solver specific settings can be set by passing a dictionary of 78 | solver_options. 79 | 80 | See "Setting solver options" in documentation for details of available options: 81 | https://www.cvxpy.org/tutorial/advanced/index.html#advanced 82 | 83 | Args: 84 | fit_intercept (bool): 85 | Whether the intercept should be estimated or not. 86 | If False, the data is assumed to be already centered. 87 | copy_X (bool): 88 | If True, X will be copied; else, it may be overwritten. 89 | warm_start (bool): 90 | When set to True, reuse the solution of the previous call to 91 | fit as initialization, otherwise, just erase the previous 92 | solution. 93 | solver (str): 94 | cvxpy backend solver to use. Supported solvers are listed here: 95 | https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options 96 | solver_options (dict): 97 | dictionary of keyword arguments passed to cvxpy solve. 98 | See docs linked above for more information. 99 | 100 | Attributes: 101 | coef_ (NDArray): 102 | Parameter vector (:math:`\beta` in the cost function formula) of shape 103 | (n_features,). 104 | intercept_ (float): 105 | Independent term in decision function. 106 | canonicals_ (SimpleNamespace): 107 | Namespace that contains underlying cvxpy objects used to define 108 | the optimization problem. The objects included are the following: 109 | - objective - the objective function. 110 | - beta - variable to be optimized (corresponds to the estimated 111 | coef_ attribute). 112 | - parameters - hyper-parameters 113 | - auxiliaries - auxiliary variables and expressions 114 | - constraints - solution constraints 115 | """ 116 | 117 | # parameter constraints that do not need any cvxpy Parameter object 118 | _parameter_constraints: dict[str, list[Any]] = { 119 | "fit_intercept": ["boolean"], 120 | "copy_X": ["boolean"], 121 | "warm_start": ["boolean"], 122 | "solver": [Options(type=str, options=set(cp.installed_solvers())), None], 123 | "solver_options": [dict, None], 124 | } 125 | # parameter constraints that require a cvxpy Parameter object in problem definition 126 | _cvx_parameter_constraints: dict[str, list[Any]] | None = None 127 | 128 | def __init__( 129 | self, 130 | fit_intercept: bool = False, 131 | copy_X: bool = True, 132 | warm_start: bool = False, 133 | solver: str | None = None, 134 | solver_options: dict[str, Any] | None = None, 135 | ): 136 | self.fit_intercept = fit_intercept 137 | self.copy_X = copy_X 138 | self.warm_start = warm_start 139 | self.solver = solver 140 | self.solver_options = solver_options 141 | 142 | def fit( 143 | self, 144 | X: NDArray, 145 | y: NDArray, 146 | sample_weight: NDArray[np.floating] | None = None, 147 | *args, 148 | **kwargs, 149 | ): 150 | """Fit the linear model coefficients. 151 | 152 | Prepares the fit data input, generates cvxpy objects to represent the 153 | minimization objective, and solves the regression problem using the given 154 | solver. 155 | 156 | Args: 157 | X (NDArray): 158 | Training data of shape (n_samples, n_features). 159 | y (NDArray): 160 | Target values. Will be cast to X's dtype if necessary 161 | of shape (n_samples,) or (n_samples, n_targets) 162 | sample_weight (NDArray): 163 | Individual weights for each sample of shape (n_samples,) 164 | default=None 165 | *args: 166 | Positional arguments passed to solve method 167 | **kwargs: 168 | Keyword arguments passed to solve method 169 | 170 | Returns: 171 | instance of self 172 | """ 173 | X, y = self._validate_data( 174 | X, y, accept_sparse=False, y_numeric=True, multi_output=False 175 | ) 176 | 177 | X, y, X_offset, y_offset, X_scale = self._preprocess_data(X, y, sample_weight) 178 | 179 | self._validate_params(X, y) 180 | 181 | # TODO test theses cases 182 | if not hasattr(self, "canonicals_"): 183 | self.generate_problem(X, y, preprocess_data=False) 184 | elif not np.array_equal(self.cached_X_, X) or not np.array_equal( 185 | self.cached_y_, y 186 | ): 187 | if self.canonicals_.user_constraints: 188 | warnings.warn( 189 | "User constraints are set on a problem with different data (X, y). " 190 | "These constraints will be ignored.", 191 | UserWarning, 192 | ) 193 | self.generate_problem(X, y, preprocess_data=False) 194 | else: 195 | self._set_param_values() # set parameter values 196 | 197 | solver_options = self.solver_options if self.solver_options is not None else {} 198 | if not isinstance(solver_options, dict): 199 | raise TypeError("solver_options must be a dictionary") 200 | 201 | self.coef_ = self._solve(X, y, solver_options, *args, **kwargs) 202 | self._set_intercept(X_offset, y_offset, X_scale) 203 | 204 | # return self for chaining fit and predict calls 205 | return self 206 | 207 | def _preprocess_data( 208 | self, X: NDArray, y: NDArray, sample_weight: NDArray[np.floating] | None = None 209 | ) -> tuple[NDArray, NDArray, NDArray, NDArray, NDArray]: 210 | """Preprocess data for fitting.""" 211 | if sample_weight is not None: 212 | sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype) 213 | # rescale sample_weight to sum to number of samples 214 | sample_weight = sample_weight * (X.shape[0] / np.sum(sample_weight)) # type: ignore 215 | 216 | X, y, X_offset, y_offset, X_scale = _preprocess_data( 217 | X, 218 | y, 219 | copy=self.copy_X, 220 | fit_intercept=self.fit_intercept, 221 | sample_weight=sample_weight, 222 | ) 223 | 224 | if sample_weight is not None: 225 | X, y, _ = _rescale_data(X, y, sample_weight) 226 | 227 | return X, y, X_offset, y_offset, X_scale 228 | 229 | def _validate_params(self, X: NDArray, y: NDArray) -> None: 230 | """Validate hyperparameter values. 231 | 232 | Implement this in an Regressor for additional parameter value validation. 233 | """ 234 | if self._cvx_parameter_constraints is None: 235 | parameter_constraints = self._parameter_constraints 236 | else: 237 | parameter_constraints = { 238 | **self._parameter_constraints, 239 | **self._cvx_parameter_constraints, 240 | } 241 | validate_parameter_constraints( 242 | parameter_constraints, 243 | self.get_params(deep=False), 244 | caller_name=self.__class__.__name__, 245 | ) 246 | 247 | def _set_param_values(self) -> None: 248 | """Set the values of cvxpy parameters from param attributes for warm starts.""" 249 | if self._cvx_parameter_constraints is None: 250 | return 251 | 252 | for parameter, value in self.get_params(deep=False).items(): 253 | if parameter in self._cvx_parameter_constraints: 254 | cvx_parameter = getattr(self.canonicals_.parameters, parameter) 255 | # check for parameters that take a scalar or an array 256 | if isinstance(value, np.ndarray) or isinstance(value, Sequence): 257 | if len(value) == 1: 258 | value = value * np.ones_like(cvx_parameter.value) 259 | else: 260 | value = np.asarray(value) 261 | cvx_parameter.value = value 262 | 263 | def _generate_params(self, X: NDArray, y: NDArray) -> SimpleNamespace: 264 | """Return the named tuple of cvxpy parameters for optimization problem. 265 | 266 | The cvxpy Parameters must be given values when generating. 267 | 268 | Args: 269 | X (NDArray): 270 | Covariate/Feature matrix 271 | y (NDArray): 272 | Target vector 273 | 274 | Returns: 275 | NamedTuple of cvxpy parameters 276 | """ 277 | cvx_parameters = {} 278 | cvx_constraints = ( 279 | {} 280 | if self._cvx_parameter_constraints is None 281 | else self._cvx_parameter_constraints 282 | ) 283 | for param_name, param_val in self.get_params(deep=False).items(): 284 | if param_name not in cvx_constraints: 285 | continue 286 | 287 | # make constraints sklearn constraint objects 288 | constraints = [ 289 | make_constraint(constraint) 290 | for constraint in cvx_constraints[param_name] 291 | ] 292 | 293 | # For now we will only set nonneg, nonpos, neg, pos, integer, boolean and/or 294 | # shape of the cvxpy Parameter objects. 295 | # TODO cxvpy only allows a single one of these to be set (except bool and integer) 296 | param_kwargs = {} 297 | for constraint in constraints: 298 | if isinstance(constraint, _ArrayLikes): 299 | if not hasattr(param_val, "shape"): 300 | param_val = np.asarray(param_val) 301 | 302 | param_kwargs["shape"] = param_val.shape 303 | 304 | if isinstance(constraint, _Booleans): 305 | param_kwargs["boolean"] = True 306 | 307 | if isinstance(constraint, _InstancesOf): 308 | if constraint.is_satisfied_by(True): # is it boolean 309 | param_kwargs["boolean"] = True 310 | elif constraint.is_satisfied_by(5): # is it integer 311 | param_kwargs["integer"] = True 312 | 313 | if isinstance(constraint, Interval): 314 | if constraint.type is Integral: 315 | param_kwargs["integer"] = True 316 | if constraint.left is not None: 317 | if constraint.left == 0: 318 | if constraint.closed in ("left", "both"): 319 | param_kwargs["nonneg"] = True 320 | else: 321 | param_kwargs["pos"] = True 322 | elif constraint.left > 0: 323 | param_kwargs["pos"] = True 324 | if constraint.right is not None: 325 | if constraint.right == 0: 326 | if constraint.closed in ("right", "both"): 327 | param_kwargs["nonpos"] = True 328 | else: 329 | param_kwargs["neg"] = True 330 | elif constraint.right < 0: 331 | param_kwargs["neg"] = True 332 | cvx_parameters[param_name] = cp.Parameter( 333 | value=param_val, **param_kwargs 334 | ) 335 | 336 | return SimpleNamespace(**cvx_parameters) 337 | 338 | def _generate_auxiliaries( 339 | self, X: NDArray, y: NDArray, beta: cp.Variable, parameters: SimpleNamespace 340 | ) -> SimpleNamespace | None: 341 | """Generate any auxiliary variables/expressions necessary to define objective. 342 | 343 | Args: 344 | X (NDArray): 345 | Covariate/Feature matrix 346 | y (NDArray): 347 | Target vector 348 | beta (cp.Variable): 349 | cp.Variable representing the estimated coefs_ 350 | parameters (SimpleNamespace): 351 | SimpleNamespace of cvxpy parameters. 352 | 353 | Returns: 354 | SimpleNamespace of cp.Variable for auxiliary variables 355 | """ 356 | return None 357 | 358 | @abstractmethod 359 | def _generate_objective( 360 | self, 361 | X: NDArray, 362 | y: NDArray, 363 | beta: cp.Variable, 364 | parameters: SimpleNamespace | None = None, 365 | auxiliaries: SimpleNamespace | None = None, 366 | ) -> cp.Expression: 367 | """Define the cvxpy objective function represeting regression model. 368 | 369 | The objective must be stated for a minimization problem. 370 | 371 | Args: 372 | X (NDArray): 373 | Covariate/Feature matrix 374 | y (NDArray): 375 | Target vector 376 | beta (cp.Variable): 377 | cp.Variable representing the estimated coefs_ 378 | parameters (SimpleNamespace): optional 379 | SimpleNamespace with cp.Parameter objects 380 | auxiliaries (SimpleNamespace): optional 381 | SimpleNamespace with auxiliary cvxpy objects 382 | 383 | Returns: 384 | cvxpy Expression 385 | """ 386 | 387 | def _generate_constraints( 388 | self, 389 | X: NDArray, 390 | y: NDArray, 391 | beta: cp.Variable, 392 | parameters: SimpleNamespace | None = None, 393 | auxiliaries: SimpleNamespace | None = None, 394 | ) -> list[cp.Constraint]: 395 | """Generate constraints for optimization problem. 396 | 397 | Args: 398 | X (NDArray): 399 | Covariate/Feature matrix 400 | y (NDArray): 401 | Target vector 402 | beta (cp.Variable): 403 | cp.Variable representing the estimated coefs_ 404 | parameters (SimpleNamespace): optional 405 | SimpleNamespace with cp.Parameter objects 406 | auxiliaries (SimpleNamespace): optional 407 | SimpleNamespace with auxiliary cvxpy objects 408 | 409 | Returns: 410 | list of cvxpy constraints 411 | """ 412 | return [] 413 | 414 | def generate_problem( 415 | self, 416 | X: NDArray, 417 | y: NDArray, 418 | preprocess_data: bool = True, 419 | sample_weight: NDArray[np.floating] | None = None, 420 | ) -> None: 421 | """Generate regression problem and auxiliary cvxpy objects. 422 | 423 | This initializes the minimization problem, the objective, coefficient variable 424 | (beta), problem parameters, solution constraints, and auxiliary variables/terms. 425 | 426 | This is (almost always) called in the fit method, and not directly. However, it 427 | can be called directly if further control over the problem is needed by 428 | accessing the canonicals_ objects. For example to add additional constraints on 429 | problem variables. 430 | 431 | Args: 432 | X (NDArray): 433 | Covariate/Feature matrix 434 | y (NDArray): 435 | Target vector 436 | preprocess_data (bool): 437 | Whether to preprocess the data before generating the problem. If calling 438 | generate_problem directly, this should be kept as True to ensure the 439 | problem is generated correctly for a subsequent call to fit. 440 | sample_weight (NDArray): 441 | Individual weights for each sample of shape (n_samples,) 442 | default=None. Only used if preprocess_data=True to rescale the data 443 | accordingly. 444 | """ 445 | if preprocess_data is True: 446 | X, y, _, _, _ = self._preprocess_data(X, y, sample_weight) 447 | 448 | # X, y are cached to avoid re-generating problem if fit is called again with 449 | # same data 450 | self.cached_X_ = X 451 | self.cached_y_ = y 452 | 453 | beta = cp.Variable(X.shape[1]) 454 | parameters = self._generate_params(X, y) 455 | auxiliaries = self._generate_auxiliaries(X, y, beta, parameters) 456 | objective = self._generate_objective(X, y, beta, parameters, auxiliaries) 457 | constraints = self._generate_constraints(X, y, beta, parameters, auxiliaries) 458 | problem = cp.Problem(cp.Minimize(objective), constraints) 459 | self.canonicals_ = CVXCanonicals( 460 | problem=problem, 461 | objective=objective, 462 | beta=beta, 463 | parameters=parameters, 464 | auxiliaries=auxiliaries, 465 | constraints=constraints, 466 | user_constraints=[], 467 | ) 468 | 469 | def add_constraints(self, constraints: list[cp.Constraint]) -> None: 470 | """Add a constraint to the problem. 471 | 472 | .. Warning:: 473 | Adding constraints will not work with any sklearn class that relies on 474 | cloning the estimator (ie GridSearchCV, etc) . This is because a new cvxpy 475 | problem is generated for any cloned estimator. 476 | 477 | Args: 478 | constraints (list of cp.constraint or cp.expressions): 479 | cvxpy constraint to add to the problem 480 | """ 481 | if not hasattr(self, "canonicals_"): 482 | raise RuntimeError( 483 | "Problem has not been generated. Please call generate_problem before" 484 | " adding constraints." 485 | ) 486 | self.canonicals_.user_constraints.extend(list(constraints)) 487 | # need to reset problem to update constraints 488 | self._reset_problem() 489 | 490 | def _reset_problem(self) -> None: 491 | """Reset the cvxpy problem.""" 492 | if not hasattr(self, "canonicals_"): 493 | raise RuntimeError( 494 | "Problem has not been generated. Please call generate_problem before" 495 | " resetting." 496 | ) 497 | 498 | problem = cp.Problem( 499 | cp.Minimize(self.canonicals_.objective), 500 | self.canonicals_.constraints + self.canonicals_.user_constraints, 501 | ) 502 | self.canonicals_ = CVXCanonicals( 503 | problem=problem, 504 | objective=self.canonicals_.objective, 505 | beta=self.canonicals_.beta, 506 | parameters=self.canonicals_.parameters, 507 | auxiliaries=self.canonicals_.auxiliaries, 508 | constraints=self.canonicals_.constraints, 509 | user_constraints=self.canonicals_.user_constraints, 510 | ) 511 | 512 | def _solve( 513 | self, X: NDArray, y: NDArray, solver_options: dict, *args, **kwargs 514 | ) -> NDArray[np.floating]: 515 | """Solve the cvxpy problem.""" 516 | self.canonicals_.problem.solve( 517 | solver=self.solver, warm_start=self.warm_start, **solver_options 518 | ) 519 | return self.canonicals_.beta.value 520 | 521 | 522 | class TikhonovMixin: 523 | """Mixin class to add a Tihhonov/ridge regularization term. 524 | 525 | When using this Mixin, a cvxpy parameter named "eta" should be saved in the 526 | parameters SimpleNamespace an attribute tikhonov_w can be added to allow a matrix 527 | otherwise simple l2/Ridge is used. 528 | """ 529 | 530 | def _generate_objective( 531 | self, 532 | X: NDArray, 533 | y: NDArray, 534 | beta: cp.Variable, 535 | parameters: SimpleNamespace | None = None, 536 | auxiliaries: SimpleNamespace | None = None, 537 | ) -> cp.Expression: 538 | """Add a Tikhnonov regularization term to the objective function.""" 539 | if hasattr(self, "tikhonov_w") and self.tikhonov_w is not None: 540 | tikhonov_w = self.tikhonov_w 541 | else: 542 | tikhonov_w = np.eye(X.shape[1]) 543 | assert parameters is not None and hasattr(parameters, "eta") 544 | c0 = 2 * X.shape[0] # keeps hyperparameter scale independent 545 | objective = super()._generate_objective(X, y, beta, parameters, auxiliaries) # type: ignore 546 | objective += c0 * parameters.eta * cp.sum_squares(tikhonov_w @ beta) 547 | 548 | return objective 549 | -------------------------------------------------------------------------------- /src/sparselm/model/_miqp/__init__.py: -------------------------------------------------------------------------------- 1 | """MIQP based regression Regressors.""" 2 | 3 | from ._best_subset import BestSubsetSelection, RidgedBestSubsetSelection 4 | from ._regularized_l0 import L1L0, L2L0, RegularizedL0 5 | 6 | __all__ = [ 7 | "BestSubsetSelection", 8 | "RidgedBestSubsetSelection", 9 | "RegularizedL0", 10 | "L1L0", 11 | "L2L0", 12 | ] 13 | -------------------------------------------------------------------------------- /src/sparselm/model/_miqp/_base.py: -------------------------------------------------------------------------------- 1 | """Base class for mixed-integer quadratic programming l0 pseudo norm based Regressors.""" 2 | 3 | from __future__ import annotations 4 | 5 | __author__ = "Luis Barroso-Luque" 6 | 7 | from abc import ABCMeta, abstractmethod 8 | from numbers import Real 9 | from types import SimpleNamespace 10 | from typing import Any 11 | 12 | import cvxpy as cp 13 | import numpy as np 14 | from cvxpy.atoms.affine.wraps import psd_wrap 15 | from numpy.typing import NDArray 16 | from sklearn.utils._param_validation import Interval 17 | 18 | from ..._utils.validation import _check_groups 19 | from .._base import CVXRegressor 20 | 21 | 22 | class MIQPl0(CVXRegressor, metaclass=ABCMeta): 23 | r"""Base class for mixed-integer quadratic programming (MIQP) Regressors. 24 | 25 | Generalized l0 formulation that allows grouping coefficients, based on: 26 | 27 | https://doi.org/10.1287/opre.2015.1436 28 | 29 | Args: 30 | groups (list or ndarray): 31 | array-like of integers specifying groups. Length should be the 32 | same as model, where each integer entry specifies the group 33 | each parameter corresponds to. If no grouping is required, simply 34 | pass a list of all different numbers, i.e. using range. 35 | big_M (float): 36 | Upper bound on the norm of coefficients associated with each 37 | groups of coefficients :math:`||\beta_c||_2`. 38 | hierarchy (list): 39 | A list of lists of integers storing hierarchy relations between 40 | coefficients. 41 | Each sublist contains indices of other coefficients 42 | on which the coefficient associated with each element of 43 | the list depends. i.e. hierarchy = [[1, 2], [0], []] mean that 44 | coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no 45 | dependence. 46 | ignore_psd_check (bool): 47 | Whether to ignore cvxpy's PSD checks of matrix used in quadratic 48 | form. Default is True to avoid raising errors for poorly 49 | conditioned matrices. But if you want to be strict set to False. 50 | fit_intercept (bool): 51 | Whether the intercept should be estimated or not. 52 | If False, the data is assumed to be already centered. 53 | copy_X (bool): 54 | If True, X will be copied; else, it may be overwritten. 55 | warm_start (bool): 56 | When set to True, reuse the solution of the previous call to 57 | fit as initialization, otherwise, just erase the previous 58 | solution. 59 | solver (str): 60 | cvxpy backend solver to use. Supported solvers are listed here: 61 | https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options 62 | solver_options (dict): 63 | dictionary of keyword arguments passed to cvxpy solve. 64 | See docs in CVXRegressor for more information. 65 | """ 66 | 67 | _parameter_constraints: dict[str, list[Any]] = { 68 | "ignore_psd_check": ["boolean"], 69 | **CVXRegressor._parameter_constraints, 70 | } 71 | 72 | _cvx_parameter_constraints: dict[str, list[Any]] = { 73 | "big_M": [Interval(type=Real, left=0.0, right=None, closed="left")] 74 | } 75 | 76 | @abstractmethod # force inspect.isabstract to return True 77 | def __init__( 78 | self, 79 | groups: NDArray[np.floating | np.integer] | None = None, 80 | big_M: int = 100, 81 | hierarchy: list[list[int]] | None = None, 82 | ignore_psd_check: bool = True, 83 | fit_intercept: bool = False, 84 | copy_X: bool = True, 85 | warm_start: bool = False, 86 | solver: str | None = None, 87 | solver_options: dict | None = None, 88 | ): 89 | super().__init__( 90 | fit_intercept=fit_intercept, 91 | copy_X=copy_X, 92 | warm_start=warm_start, 93 | solver=solver, 94 | solver_options=solver_options, 95 | ) 96 | 97 | self.hierarchy = hierarchy 98 | self.ignore_psd_check = ignore_psd_check 99 | self.groups = groups 100 | self.big_M = big_M 101 | 102 | def _validate_params(self, X: NDArray, y: NDArray) -> None: 103 | """Validate parameters.""" 104 | super()._validate_params(X, y) 105 | _check_groups(self.groups, X.shape[1]) 106 | 107 | def _generate_auxiliaries( 108 | self, X: NDArray, y: NDArray, beta: cp.Variable, parameters: SimpleNamespace 109 | ) -> SimpleNamespace | None: 110 | """Generate the boolean slack variable.""" 111 | n_groups = X.shape[1] if self.groups is None else len(np.unique(self.groups)) 112 | return SimpleNamespace(z0=cp.Variable(n_groups, boolean=True)) 113 | 114 | def _generate_objective( 115 | self, 116 | X: NDArray, 117 | y: NDArray, 118 | beta: cp.Variable, 119 | parameters: SimpleNamespace | None = None, 120 | auxiliaries: SimpleNamespace | None = None, 121 | ) -> cp.Expression: 122 | """Generate the quadratic form portion of objective.""" 123 | # psd_wrap will ignore cvxpy PSD checks, without it errors will 124 | # likely be raised since correlation matrices are usually very 125 | # poorly conditioned 126 | XTX = psd_wrap(X.T @ X) if self.ignore_psd_check else X.T @ X 127 | objective = cp.quad_form(beta, XTX) - 2 * y.T @ X @ beta 128 | # objective = cp.sum_squares(X @ self.beta_ - y) 129 | return objective 130 | 131 | def _generate_constraints( 132 | self, 133 | X: NDArray, 134 | y: NDArray, 135 | beta: cp.Variable, 136 | parameters: SimpleNamespace | None = None, 137 | auxiliaries: SimpleNamespace | None = None, 138 | ) -> list[cp.Constraint]: 139 | """Generate the constraints used to solve l0 regularization.""" 140 | assert auxiliaries is not None and parameters is not None 141 | groups = np.arange(X.shape[1]) if self.groups is None else self.groups 142 | group_masks = [groups == i for i in np.sort(np.unique(groups))] 143 | constraints = [] 144 | for i, mask in enumerate(group_masks): 145 | constraints += [ 146 | -parameters.big_M * auxiliaries.z0[i] <= beta[mask], 147 | beta[mask] <= parameters.big_M * auxiliaries.z0[i], 148 | ] 149 | 150 | if self.hierarchy is not None: 151 | constraints += self._generate_hierarchy_constraints(groups, auxiliaries.z0) 152 | 153 | return constraints 154 | 155 | def _generate_hierarchy_constraints( 156 | self, groups: NDArray, z0: cp.Variable 157 | ) -> list[cp.Constraint]: 158 | """Generate single feature hierarchy constraints.""" 159 | assert self.hierarchy is not None 160 | group_ids = np.sort(np.unique(groups)) 161 | z0_index = {gid: i for i, gid in enumerate(group_ids)} 162 | constraints = [ 163 | z0[z0_index[high_id]] <= z0[z0_index[sub_id]] 164 | for high_id, sub_ids in zip(group_ids, self.hierarchy) 165 | for sub_id in sub_ids 166 | ] 167 | return constraints 168 | -------------------------------------------------------------------------------- /src/sparselm/model/_miqp/_best_subset.py: -------------------------------------------------------------------------------- 1 | """MIQP based solvers for Best Subset Selection solutions. 2 | 3 | Allows hierarchy constraints similar to mixed L0 solvers. 4 | """ 5 | 6 | from __future__ import annotations 7 | 8 | __author__ = "Luis Barroso-Luque" 9 | 10 | from numbers import Real 11 | from types import SimpleNamespace 12 | from typing import Any 13 | 14 | import cvxpy as cp 15 | import numpy as np 16 | from numpy.typing import NDArray 17 | from sklearn.utils._param_validation import Interval 18 | 19 | from sparselm.model._base import TikhonovMixin 20 | 21 | from ._base import MIQPl0 22 | 23 | 24 | class BestSubsetSelection(MIQPl0): 25 | r"""MIQP Best Subset Selection Regressor. 26 | 27 | Generalized best subset that allows grouping subsets. 28 | 29 | Args: 30 | groups (NDArray): 31 | array-like of integers specifying groups. Length should be the 32 | same as model, where each integer entry specifies the group 33 | each parameter corresponds to. If no grouping is required, 34 | simply pass a list of all different numbers, i.e. using range. 35 | sparse_bound (int): 36 | Upper bound on sparsity. The upper bound on total number of 37 | nonzero coefficients. 38 | big_M (float): 39 | Upper bound on the norm of coefficients associated with each 40 | groups of coefficients :math:`||\beta_c||_2`. 41 | hierarchy (list): 42 | A list of lists of integers storing hierarchy relations between 43 | coefficients. 44 | Each sublist contains indices of other coefficients 45 | on which the coefficient associated with each element of 46 | the list depends. i.e. hierarchy = [[1, 2], [0], []] mean that 47 | coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no 48 | dependence. 49 | ignore_psd_check (bool): 50 | Whether to ignore cvxpy's PSD checks of matrix used in 51 | quadratic form. Default is True to avoid raising errors for 52 | poorly conditioned matrices. But if you want to be strict set 53 | to False. 54 | fit_intercept (bool): 55 | Whether the intercept should be estimated or not. 56 | If False, the data is assumed to be already centered. 57 | copy_X (bool): 58 | If True, X will be copied; else, it may be overwritten. 59 | warm_start (bool): 60 | When set to True, reuse the solution of the previous call to 61 | fit as initialization, otherwise, just erase the previous 62 | solution. 63 | solver (str): 64 | cvxpy backend solver to use. Supported solvers are listed here: 65 | https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options 66 | solver_options (dict): 67 | dictionary of keyword arguments passed to cvxpy solve. 68 | See docs in CVXRegressor for more information. 69 | 70 | Attributes: 71 | coef_ (NDArray): 72 | Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,). 73 | intercept_ (float): 74 | Independent term in decision function. 75 | canonicals_ (SimpleNamespace): 76 | Namespace that contains underlying cvxpy objects used to define 77 | the optimization problem. The objects included are the following: 78 | - objective - the objective function. 79 | - beta - variable to be optimized (corresponds to the estimated coef_ attribute). 80 | - parameters - hyper-parameters 81 | - auxiliaries - auxiliary variables and expressions 82 | - constraints - solution constraints 83 | 84 | Note: 85 | Installation of Gurobi is not a must, but highly recommended. An open source alternative 86 | is SCIP. ECOS_BB also works but can be very slow, and has recurring correctness issues. 87 | See the Mixed-integer programs section of the cvxpy docs: 88 | https://www.cvxpy.org/tutorial/advanced/index.html 89 | 90 | Warning: 91 | Even with gurobi solver, this can take a very long time to converge for large problems and under-determined 92 | problems. 93 | """ 94 | 95 | _cvx_parameter_constraints: dict[str, list[Any]] = { 96 | "sparse_bound": [Interval(type=Real, left=0, right=None, closed="left")], 97 | **MIQPl0._cvx_parameter_constraints, 98 | } 99 | 100 | def __init__( 101 | self, 102 | groups: NDArray[np.floating | np.integer] | None = None, 103 | sparse_bound=100, 104 | big_M: int = 100, 105 | hierarchy: list[list[int]] | None = None, 106 | ignore_psd_check: bool = True, 107 | fit_intercept: bool = False, 108 | copy_X: bool = True, 109 | warm_start: bool = False, 110 | solver: str | None = None, 111 | solver_options: dict | None = None, 112 | ): 113 | super().__init__( 114 | groups=groups, 115 | big_M=big_M, 116 | hierarchy=hierarchy, 117 | ignore_psd_check=ignore_psd_check, 118 | fit_intercept=fit_intercept, 119 | copy_X=copy_X, 120 | warm_start=warm_start, 121 | solver=solver, 122 | solver_options=solver_options, 123 | ) 124 | self.sparse_bound = sparse_bound 125 | 126 | def _generate_constraints( 127 | self, 128 | X: NDArray, 129 | y: NDArray, 130 | beta: cp.Variable, 131 | parameters: SimpleNamespace | None = None, 132 | auxiliaries: SimpleNamespace | None = None, 133 | ) -> list[cp.Constraint]: 134 | """Generate the constraints for best subset selection.""" 135 | assert parameters is not None 136 | assert auxiliaries is not None 137 | constraints = super()._generate_constraints(X, y, beta, parameters, auxiliaries) 138 | constraints += [cp.sum(auxiliaries.z0) <= parameters.sparse_bound] 139 | return constraints 140 | 141 | 142 | class RidgedBestSubsetSelection(TikhonovMixin, BestSubsetSelection): 143 | r"""MIQP best subset selection Regressor with Ridge/Tihkonov regularization. 144 | 145 | Args: 146 | groups (NDArray): 147 | array-like of integers specifying groups. Length should be the 148 | same as model, where each integer entry specifies the group 149 | each parameter corresponds to. If no grouping is required, 150 | simply pass a list of all different numbers, i.e. using range. 151 | sparse_bound (int): 152 | Upper bound on sparsity. The upper bound on total number of 153 | nonzero coefficients. 154 | eta (float): 155 | L2 regularization hyper-parameter. 156 | big_M (float): 157 | Upper bound on the norm of coefficients associated with each 158 | groups of coefficients :math:`||\beta_c||_2`. 159 | hierarchy (list): 160 | A list of lists of integers storing hierarchy relations between 161 | coefficients. 162 | Each sublist contains indices of other coefficients 163 | on which the coefficient associated with each element of 164 | the list depends. i.e. hierarchy = [[1, 2], [0], []] mean that 165 | coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no 166 | dependence. 167 | tikhonov_w (np.array): 168 | Matrix to add weights to L2 regularization. 169 | ignore_psd_check (bool): 170 | Whether to ignore cvxpy's PSD checks of matrix used in 171 | quadratic form. Default is True to avoid raising errors for 172 | poorly conditioned matrices. But if you want to be strict set 173 | to False. 174 | fit_intercept (bool): 175 | Whether the intercept should be estimated or not. 176 | If False, the data is assumed to be already centered. 177 | copy_X (bool): 178 | If True, X will be copied; else, it may be overwritten. 179 | warm_start (bool): 180 | When set to True, reuse the solution of the previous call to 181 | fit as initialization, otherwise, just erase the previous 182 | solution. 183 | solver (str): 184 | cvxpy backend solver to use. Supported solvers are listed here: 185 | https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options 186 | solver_options (dict): 187 | dictionary of keyword arguments passed to cvxpy solve. 188 | See docs in CVXRegressor for more information. 189 | 190 | Attributes: 191 | coef_ (NDArray): 192 | Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,). 193 | intercept_ (float): 194 | Independent term in decision function. 195 | canonicals_ (SimpleNamespace): 196 | Namespace that contains underlying cvxpy objects used to define 197 | the optimization problem. The objects included are the following: 198 | - objective - the objective function. 199 | - beta - variable to be optimized (corresponds to the estimated coef_ attribute). 200 | - parameters - hyper-parameters 201 | - auxiliaries - auxiliary variables and expressions 202 | - constraints - solution constraints 203 | 204 | Note: 205 | Installation of Gurobi is not a must, but highly recommended. An open source alternative 206 | is SCIP. ECOS_BB also works but can be very slow, and has recurring correctness issues. 207 | See the Mixed-integer programs section of the cvxpy docs: 208 | https://www.cvxpy.org/tutorial/advanced/index.html 209 | 210 | Warning: 211 | Even with gurobi solver, this can take a very long time to converge for large problems and under-determined 212 | problems. 213 | """ 214 | 215 | _cvx_parameter_constraints: dict[str, list[Any]] = { 216 | "eta": [Interval(type=Real, left=0.0, right=None, closed="left")], 217 | **BestSubsetSelection._cvx_parameter_constraints, 218 | } 219 | 220 | def __init__( 221 | self, 222 | groups: NDArray[np.floating | np.integer] | None = None, 223 | sparse_bound: int = 100, 224 | eta: float = 1.0, 225 | big_M: int = 100, 226 | hierarchy: list[list[int]] | None = None, 227 | tikhonov_w: NDArray[np.floating] | None = None, 228 | ignore_psd_check: bool = True, 229 | fit_intercept: bool = False, 230 | copy_X: bool = True, 231 | warm_start: bool = False, 232 | solver: str | None = None, 233 | solver_options: dict | None = None, 234 | ): 235 | super().__init__( 236 | groups=groups, 237 | sparse_bound=sparse_bound, 238 | big_M=big_M, 239 | hierarchy=hierarchy, 240 | ignore_psd_check=ignore_psd_check, 241 | fit_intercept=fit_intercept, 242 | copy_X=copy_X, 243 | warm_start=warm_start, 244 | solver=solver, 245 | solver_options=solver_options, 246 | ) 247 | self.tikhonov_w = tikhonov_w 248 | self.eta = eta 249 | -------------------------------------------------------------------------------- /src/sparselm/model/_miqp/_regularized_l0.py: -------------------------------------------------------------------------------- 1 | """MIQP based solvers for sparse solutions with hierarchical constraints. 2 | 3 | Generalized regularized l0 solvers that allow grouping parameters as detailed in: 4 | 5 | https://doi.org/10.1287/opre.2015.1436 6 | 7 | L1L0 proposed by Wenxuan Huang: 8 | 9 | https://arxiv.org/abs/1807.10753 10 | 11 | L2L0 proposed by Peichen Zhong: 12 | 13 | https://journals.aps.org/prb/abstract/10.1103/PhysRevB.106.024203 14 | 15 | Regressors allow optional inclusion of hierarchical constraints at the single coefficient 16 | or group of coefficients level. 17 | """ 18 | 19 | from __future__ import annotations 20 | 21 | __author__ = "Luis Barroso-Luque, Fengyu Xie" 22 | 23 | 24 | from abc import ABCMeta, abstractmethod 25 | from numbers import Real 26 | from types import SimpleNamespace 27 | from typing import Any 28 | 29 | import cvxpy as cp 30 | import numpy as np 31 | from numpy.typing import NDArray 32 | from sklearn.utils._param_validation import Interval 33 | 34 | from sparselm.model._base import TikhonovMixin 35 | 36 | from ._base import MIQPl0 37 | 38 | 39 | class RegularizedL0(MIQPl0): 40 | r"""Implementation of mixed-integer quadratic programming l0 regularized Regressor. 41 | 42 | Supports grouping parameters and group-level hierarchy, but requires groups as a 43 | compulsory argument. 44 | 45 | Regularized regression objective: 46 | 47 | .. math:: 48 | 49 | \min_{\beta} || X \beta - y ||^2_2 + \alpha \sum_{G} z_G 50 | 51 | Where G represents groups of features/coefficients and :math:`z_G` is are boolean 52 | valued slack variables. 53 | 54 | Args: 55 | groups (NDArray): 56 | 1D array-like of integers specifying groups. Length should be the 57 | same as model, where each integer entry specifies the group 58 | each parameter corresponds to. If no grouping is needed pass a list 59 | of all distinct numbers (ie range(len(coefs)) to create singleton groups 60 | for each parameter. 61 | alpha (float): 62 | L0 pseudo-norm regularization hyper-parameter. 63 | big_M (float): 64 | Upper bound on the norm of coefficients associated with each 65 | groups of coefficients :math:`||\beta_c||_2`. 66 | hierarchy (list): 67 | A list of lists of integers storing hierarchy relations between 68 | groups. 69 | Each sublist contains indices of other groups 70 | on which the group associated with each element of 71 | the list depends. i.e. hierarchy = [[1, 2], [0], []] mean that 72 | group 0 depends on 1, and 2; 1 depends on 0, and 2 has no 73 | dependence. 74 | ignore_psd_check (bool): 75 | Whether to ignore cvxpy's PSD checks of matrix used in quadratic 76 | form. Default is True to avoid raising errors for poorly 77 | conditioned matrices. But if you want to be strict set to False. 78 | fit_intercept (bool): 79 | Whether the intercept should be estimated or not. 80 | If False, the data is assumed to be already centered. 81 | copy_X (bool): 82 | If True, X will be copied; else, it may be overwritten. 83 | warm_start (bool): 84 | When set to True, reuse the solution of the previous call to 85 | fit as initialization, otherwise, just erase the previous 86 | solution. 87 | solver (str): 88 | cvxpy backend solver to use. Supported solvers are listed here: 89 | https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options 90 | solver_options (dict): 91 | dictionary of keyword arguments passed to cvxpy solve. 92 | See docs in CVXRegressor for more information. 93 | 94 | Attributes: 95 | coef_ (NDArray): 96 | Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,). 97 | intercept_ (float): 98 | Independent term in decision function. 99 | canonicals_ (SimpleNamespace): 100 | Namespace that contains underlying cvxpy objects used to define 101 | the optimization problem. The objects included are the following: 102 | - objective - the objective function. 103 | - beta - variable to be optimized (corresponds to the estimated coef_ attribute). 104 | - parameters - hyper-parameters 105 | - auxiliaries - auxiliary variables and expressions 106 | - constraints - solution constraints 107 | 108 | Note: 109 | Installation of Gurobi is not a must, but highly recommended. An open source alternative 110 | is SCIP. ECOS_BB also works but can be very slow, and has recurring correctness issues. 111 | See the Mixed-integer programs section of the cvxpy docs: 112 | https://www.cvxpy.org/tutorial/advanced/index.html 113 | """ 114 | 115 | _cvx_parameter_constraints: dict[str, list[Any]] = { 116 | "alpha": [Interval(type=Real, left=0.0, right=None, closed="left")], 117 | **MIQPl0._cvx_parameter_constraints, 118 | } 119 | 120 | def __init__( 121 | self, 122 | groups: NDArray[np.floating | np.integer] | None = None, 123 | alpha: float = 1.0, 124 | big_M: int = 100, 125 | hierarchy: list[list[int]] | None = None, 126 | ignore_psd_check: bool = True, 127 | fit_intercept: bool = False, 128 | copy_X: bool = True, 129 | warm_start: bool = False, 130 | solver: str | None = None, 131 | solver_options: dict | None = None, 132 | ): 133 | super().__init__( 134 | groups=groups, 135 | big_M=big_M, 136 | hierarchy=hierarchy, 137 | ignore_psd_check=ignore_psd_check, 138 | fit_intercept=fit_intercept, 139 | copy_X=copy_X, 140 | warm_start=warm_start, 141 | solver=solver, 142 | solver_options=solver_options, 143 | ) 144 | self.alpha = alpha 145 | 146 | def _generate_objective( 147 | self, 148 | X: NDArray, 149 | y: NDArray, 150 | beta: cp.Variable, 151 | parameters: SimpleNamespace | None = None, 152 | auxiliaries: SimpleNamespace | None = None, 153 | ) -> cp.Expression: 154 | """Generate the quadratic form and l0 regularization portion of objective.""" 155 | assert parameters is not None 156 | assert auxiliaries is not None 157 | c0 = 2 * X.shape[0] # keeps hyperparameter scale independent 158 | objective = super()._generate_objective( 159 | X, y, beta, parameters, auxiliaries 160 | ) + c0 * parameters.alpha * cp.sum(auxiliaries.z0) 161 | return objective 162 | 163 | 164 | class MixedL0(RegularizedL0, metaclass=ABCMeta): 165 | """Abstract base class for mixed L0 regularization models: L1L0 and L2L0.""" 166 | 167 | _cvx_parameter_constraints: dict[str, list[Any]] = { 168 | "eta": [Interval(type=Real, left=0.0, right=None, closed="left")], 169 | **RegularizedL0._cvx_parameter_constraints, 170 | } 171 | 172 | def __init__( 173 | self, 174 | groups: NDArray[np.floating | np.integer] | None = None, 175 | alpha: float = 1.0, 176 | eta: float = 1.0, 177 | big_M: int = 100, 178 | hierarchy: list[list[int]] | None = None, 179 | ignore_psd_check: bool = True, 180 | fit_intercept: bool = False, 181 | copy_X: bool = True, 182 | warm_start: bool = False, 183 | solver: str | None = None, 184 | solver_options: dict | None = None, 185 | ): 186 | """Initialize Regressor. 187 | 188 | Args: 189 | groups (NDArray): 190 | 1D array-like of integers specifying groups. Length should be the 191 | same as model, where each integer entry specifies the group 192 | each parameter corresponds to. If no grouping is needed pass a list 193 | of all distinct numbers (ie range(len(coefs)) to create singleton groups 194 | for each parameter. 195 | alpha (float): 196 | L0 pseudo-norm regularization hyper-parameter. 197 | eta (float): 198 | standard norm regularization hyper-parameter (usually l1 or l2). 199 | big_M (float): 200 | Upper bound on the norm of coefficients associated with each 201 | 202 | hierarchy (list): 203 | A list of lists of integers storing hierarchy relations between 204 | coefficients. 205 | Each sublist contains indices of other coefficients 206 | on which the coefficient associated with each element of 207 | the list depends. i.e. hierarchy = [[1, 2], [0], []] mean that 208 | coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no 209 | dependence. 210 | ignore_psd_check (bool): 211 | Whether to ignore cvxpy's PSD checks of matrix used in quadratic 212 | form. Default is True to avoid raising errors for poorly 213 | conditioned matrices. But if you want to be strict set to False. 214 | fit_intercept (bool): 215 | Whether the intercept should be estimated or not. 216 | If False, the data is assumed to be already centered. 217 | copy_X (bool): 218 | If True, X will be copied; else, it may be overwritten. 219 | warm_start (bool): 220 | When set to True, reuse the solution of the previous call to 221 | fit as initialization, otherwise, just erase the previous 222 | solution. 223 | solver (str): 224 | cvxpy backend solver to use. Supported solvers are listed here: 225 | https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options 226 | solver_options (dict): 227 | dictionary of keyword arguments passed to cvxpy solve. 228 | See docs in CVXRegressor for more information. 229 | """ 230 | super().__init__( 231 | groups=groups, 232 | alpha=alpha, 233 | big_M=big_M, 234 | hierarchy=hierarchy, 235 | ignore_psd_check=ignore_psd_check, 236 | fit_intercept=fit_intercept, 237 | copy_X=copy_X, 238 | warm_start=warm_start, 239 | solver=solver, 240 | solver_options=solver_options, 241 | ) 242 | self.eta = eta 243 | 244 | @abstractmethod 245 | def _generate_objective( 246 | self, 247 | X: NDArray, 248 | y: NDArray, 249 | beta: cp.Variable, 250 | parameters: SimpleNamespace | None = None, 251 | auxiliaries: SimpleNamespace | None = None, 252 | ) -> cp.Expression: 253 | """Generate optimization objective.""" 254 | # implement in derived classes using super to call MIQP_L0 objective 255 | return super()._generate_objective(X, y, beta, parameters, auxiliaries) 256 | 257 | 258 | class L1L0(MixedL0): 259 | r"""L1L0 regularized Regressor. 260 | 261 | Regressor with L1L0 regularization solved with mixed integer programming 262 | as discussed in: 263 | 264 | https://arxiv.org/abs/1807.10753 265 | 266 | Extended to allow grouping of coefficients and group-level hierarchy as described 267 | in: 268 | 269 | https://doi.org/10.1287/opre.2015.1436 270 | 271 | Regularized regression objective: 272 | 273 | .. math:: 274 | 275 | \min_{\beta} || X \beta - y ||^2_2 + \alpha \sum_{G} z_G + \eta ||\beta||_1 276 | 277 | Where G represents groups of features/coefficients and :math:`z_G` is are boolean 278 | valued slack variables. 279 | 280 | Args: 281 | groups (NDArray): 282 | 1D array-like of integers specifying groups. Length should be the 283 | same as model, where each integer entry specifies the group 284 | each parameter corresponds to. If no grouping is needed pass a list 285 | of all distinct numbers (ie range(len(coefs)) to create singleton groups 286 | for each parameter. 287 | alpha (float): 288 | L0 pseudo-norm regularization hyper-parameter. 289 | eta (float): 290 | L1 regularization hyper-parameter. 291 | big_M (float): 292 | Upper bound on the norm of coefficients associated with each 293 | groups of coefficients :math:`||\beta_c||_2`. 294 | hierarchy (list): 295 | A list of lists of integers storing hierarchy relations between 296 | coefficients. 297 | Each sublist contains indices of other coefficients 298 | on which the coefficient associated with each element of 299 | the list depends. i.e. hierarchy = [[1, 2], [0], []] mean that 300 | coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no 301 | dependence. 302 | ignore_psd_check (bool): 303 | Whether to ignore cvxpy's PSD checks of matrix used in quadratic 304 | form. Default is True to avoid raising errors for poorly 305 | conditioned matrices. But if you want to be strict set to False. 306 | fit_intercept (bool): 307 | Whether the intercept should be estimated or not. 308 | If False, the data is assumed to be already centered. 309 | copy_X (bool): 310 | If True, X will be copied; else, it may be overwritten. 311 | warm_start (bool): 312 | When set to True, reuse the solution of the previous call to 313 | fit as initialization, otherwise, just erase the previous 314 | solution. 315 | solver (str): 316 | cvxpy backend solver to use. Supported solvers are listed here: 317 | https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options 318 | solver_options (dict): 319 | dictionary of keyword arguments passed to cvxpy solve. 320 | See docs in CVXRegressor for more information. 321 | 322 | Attributes: 323 | coef_ (NDArray): 324 | Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,). 325 | intercept_ (float): 326 | Independent term in decision function. 327 | canonicals_ (SimpleNamespace): 328 | Namespace that contains underlying cvxpy objects used to define 329 | the optimization problem. The objects included are the following: 330 | - objective - the objective function. 331 | - beta - variable to be optimized (corresponds to the estimated coef_ attribute). 332 | - parameters - hyper-parameters 333 | - auxiliaries - auxiliary variables and expressions 334 | - constraints - solution constraints 335 | 336 | Note: 337 | Installation of Gurobi is not a must, but highly recommended. An open source alternative 338 | is SCIP. ECOS_BB also works but can be very slow, and has recurring correctness issues. 339 | See the Mixed-integer programs section of the cvxpy docs: 340 | https://www.cvxpy.org/tutorial/advanced/index.html 341 | """ 342 | 343 | def __init__( 344 | self, 345 | groups: NDArray[np.floating | np.integer] | None = None, 346 | alpha: float = 1.0, 347 | eta: float = 1.0, 348 | big_M: int = 100, 349 | hierarchy: list[list[int]] | None = None, 350 | ignore_psd_check: bool = True, 351 | fit_intercept: bool = False, 352 | copy_X: bool = True, 353 | warm_start: bool = False, 354 | solver: str | None = None, 355 | solver_options: dict | None = None, 356 | ): 357 | super().__init__( 358 | groups=groups, 359 | eta=eta, 360 | alpha=alpha, 361 | big_M=big_M, 362 | hierarchy=hierarchy, 363 | ignore_psd_check=ignore_psd_check, 364 | fit_intercept=fit_intercept, 365 | copy_X=copy_X, 366 | warm_start=warm_start, 367 | solver=solver, 368 | solver_options=solver_options, 369 | ) 370 | 371 | def _generate_auxiliaries( 372 | self, X: NDArray, y: NDArray, beta: cp.Variable, parameters: SimpleNamespace 373 | ) -> SimpleNamespace | None: 374 | """Generate the boolean slack variable.""" 375 | auxiliaries = super()._generate_auxiliaries(X, y, beta, parameters) 376 | X.shape[1] if self.groups is None else len(np.unique(self.groups)) 377 | auxiliaries.z1 = cp.Variable(X.shape[1]) # type: ignore 378 | return auxiliaries 379 | 380 | def _generate_constraints( 381 | self, 382 | X: NDArray, 383 | y: NDArray, 384 | beta: cp.Variable, 385 | parameters: SimpleNamespace | None = None, 386 | auxiliaries: SimpleNamespace | None = None, 387 | ) -> list[cp.Constraint]: 388 | """Generate the constraints used to solve l1l0 regularization.""" 389 | assert auxiliaries is not None 390 | constraints = super()._generate_constraints(X, y, beta, parameters, auxiliaries) 391 | # L1 constraints (why not do an l1 norm in the objective instead?) 392 | constraints += [-auxiliaries.z1 <= beta, beta <= auxiliaries.z1] 393 | return constraints 394 | 395 | def _generate_objective( 396 | self, 397 | X: NDArray, 398 | y: NDArray, 399 | beta: cp.Variable, 400 | parameters: SimpleNamespace | None = None, 401 | auxiliaries: SimpleNamespace | None = None, 402 | ) -> cp.Expression: 403 | """Generate the objective function used in l1l0 regression model.""" 404 | assert parameters is not None 405 | assert auxiliaries is not None 406 | c0 = 2 * X.shape[0] # keeps hyperparameter scale independent 407 | objective = super()._generate_objective(X, y, beta, parameters, auxiliaries) 408 | # L1 term 409 | objective += c0 * parameters.eta * cp.sum(auxiliaries.z1) 410 | return objective 411 | 412 | 413 | class L2L0(TikhonovMixin, MixedL0): 414 | r"""L2L0 regularized Regressor. 415 | 416 | Based on Regressor with L2L0 regularization solved with mixed integer programming 417 | proposed in: 418 | 419 | https://arxiv.org/abs/2204.13789 420 | 421 | Extended to allow grouping of coefficients and group-level hierarchy as described 422 | in: 423 | 424 | https://doi.org/10.1287/opre.2015.1436 425 | 426 | And allows using a Tihkonov matrix in the l2 term. 427 | 428 | Regularized regression objective: 429 | 430 | .. math:: 431 | 432 | \min_{\beta} || X \beta - y ||^2_2 + \alpha \sum_{G} z_G + \eta ||W\beta||^2_2 433 | 434 | Where G represents groups of features/coefficients and :math:`z_G` is are boolean 435 | valued slack variables. W is a Tikhonov matrix. 436 | 437 | Args: 438 | groups (NDArray): 439 | 1D array-like of integers specifying groups. Length should be the 440 | same as model, where each integer entry specifies the group 441 | each parameter corresponds to. If no grouping is needed pass a list 442 | of all distinct numbers (ie range(len(coefs)) to create singleton groups 443 | for each parameter. 444 | alpha (float): 445 | L0 pseudo-norm regularization hyper-parameter. 446 | eta (float): 447 | L2 regularization hyper-parameter. 448 | big_M (float): 449 | Upper bound on the norm of coefficients associated with each 450 | groups of coefficients :math:`||\beta_c||_2`. 451 | hierarchy (list): 452 | A list of lists of integers storing hierarchy relations between 453 | coefficients. 454 | Each sublist contains indices of other coefficients 455 | on which the coefficient associated with each element of 456 | the list depends. i.e. hierarchy = [[1, 2], [0], []] mean that 457 | coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no 458 | dependence. 459 | tikhonov_w (np.array): 460 | Matrix to add weights to L2 regularization. 461 | ignore_psd_check (bool): 462 | Wether to ignore cvxpy's PSD checks of matrix used in quadratic 463 | form. Default is True to avoid raising errors for poorly 464 | conditioned matrices. But if you want to be strict set to False. 465 | fit_intercept (bool): 466 | Whether the intercept should be estimated or not. 467 | If False, the data is assumed to be already centered. 468 | copy_X (bool): 469 | If True, X will be copied; else, it may be overwritten. 470 | warm_start (bool): 471 | When set to True, reuse the solution of the previous call to 472 | fit as initialization, otherwise, just erase the previous 473 | solution. 474 | solver (str): 475 | cvxpy backend solver to use. Supported solvers are listed here: 476 | https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options 477 | solver_options (dict): 478 | dictionary of keyword arguments passed to cvxpy solve. 479 | See docs in CVXEstimator for more information. 480 | 481 | Attributes: 482 | coef_ (NDArray): 483 | Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,). 484 | intercept_ (float): 485 | Independent term in decision function. 486 | canonicals_ (SimpleNamespace): 487 | Namespace that contains underlying cvxpy objects used to define 488 | the optimization problem. The objects included are the following: 489 | - objective - the objective function. 490 | - beta - variable to be optimized (corresponds to the estimated coef_ attribute). 491 | - parameters - hyper-parameters 492 | - auxiliaries - auxiliary variables and expressions 493 | - constraints - solution constraints 494 | 495 | Note: 496 | Installation of Gurobi is not a must, but highly recommended. An open source alternative 497 | is SCIP. ECOS_BB also works but can be very slow, and has recurring correctness issues. 498 | See the Mixed-integer programs section of the cvxpy docs: 499 | https://www.cvxpy.org/tutorial/advanced/index.html 500 | """ 501 | 502 | def __init__( 503 | self, 504 | groups: NDArray[np.floating | np.integer] | None = None, 505 | alpha: float = 1.0, 506 | eta: float = 1.0, 507 | big_M: int = 100, 508 | hierarchy: list[list[int]] | None = None, 509 | tikhonov_w: NDArray[np.floating] | None = None, 510 | ignore_psd_check: bool = True, 511 | fit_intercept: bool = False, 512 | copy_X: bool = True, 513 | warm_start: bool = False, 514 | solver: str | None = None, 515 | solver_options: dict | None = None, 516 | ): 517 | super().__init__( 518 | groups=groups, 519 | alpha=alpha, 520 | eta=eta, 521 | big_M=big_M, 522 | hierarchy=hierarchy, 523 | ignore_psd_check=ignore_psd_check, 524 | fit_intercept=fit_intercept, 525 | copy_X=copy_X, 526 | warm_start=warm_start, 527 | solver=solver, 528 | solver_options=solver_options, 529 | ) 530 | self.tikhonov_w = tikhonov_w 531 | -------------------------------------------------------------------------------- /src/sparselm/model/_ols.py: -------------------------------------------------------------------------------- 1 | """Ordinary least squares cvxpy solver.""" 2 | 3 | from __future__ import annotations 4 | 5 | __author__ = "Fengyu Xie, Luis Barroso-Luque" 6 | 7 | 8 | from types import SimpleNamespace 9 | 10 | import cvxpy as cp 11 | from numpy.typing import NDArray 12 | 13 | from ._base import CVXRegressor 14 | 15 | 16 | class OrdinaryLeastSquares(CVXRegressor): 17 | r"""Ordinary Least Squares Linear Regression. 18 | 19 | Regression objective: 20 | 21 | .. math:: 22 | 23 | \min_{\beta} || X \beta - y ||^2_2 24 | 25 | Args: 26 | fit_intercept (bool): 27 | Whether the intercept should be estimated or not. 28 | If False, the data is assumed to be already centered. 29 | copy_X (bool): 30 | If True, X will be copied; else, it may be overwritten. 31 | warm_start (bool): 32 | When set to True, reuse the solution of the previous call to 33 | fit as initialization, otherwise, just erase the previous 34 | solution. 35 | solver (str): 36 | cvxpy backend solver to use. Supported solvers are listed here: 37 | https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options 38 | solver_options (dict): 39 | dictionary of keyword arguments passed to cvxpy solve. 40 | See docs linked above for more information. 41 | 42 | Attributes: 43 | coef_ (NDArray): 44 | Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,). 45 | intercept_ (float): 46 | Independent term in decision function. 47 | canonicals_ (SimpleNamespace): 48 | Namespace that contains underlying cvxpy objects used to define 49 | the optimization problem. The objects included are the following: 50 | - objective - the objective function. 51 | - beta - variable to be optimized (corresponds to the estimated coef_ attribute). 52 | - parameters - hyper-parameters 53 | - auxiliaries - auxiliary variables and expressions 54 | - constraints - solution constraints 55 | """ 56 | 57 | def _generate_objective( 58 | self, 59 | X: NDArray, 60 | y: NDArray, 61 | beta: cp.Variable, 62 | parameters: SimpleNamespace | None = None, 63 | auxiliaries: SimpleNamespace | None = None, 64 | ) -> cp.Expression: 65 | return 1 / (2 * X.shape[0]) * cp.sum_squares(X @ beta - y) 66 | -------------------------------------------------------------------------------- /src/sparselm/stepwise.py: -------------------------------------------------------------------------------- 1 | """Stepwise model selection for piece-wise fitting.""" 2 | 3 | from __future__ import annotations 4 | 5 | __author__ = "Fengyu Xie" 6 | 7 | from itertools import chain 8 | 9 | import numpy as np 10 | from numpy.typing import NDArray 11 | from sklearn.base import RegressorMixin 12 | from sklearn.linear_model._base import LinearModel, _check_sample_weight 13 | from sklearn.utils._param_validation import InvalidParameterError 14 | from sklearn.utils.metaestimators import _BaseComposition 15 | from sklearn.utils.validation import check_is_fitted 16 | 17 | 18 | # BaseComposition makes sure that StepwiseEstimator can be correctly cloned. 19 | def _indices_no_overlap_and_continuous(indices): 20 | scope = sorted(set(chain(*indices))) 21 | return sorted(chain(*indices)) == scope and scope == list(range(len(scope))) 22 | 23 | 24 | def _first_step_fit_intercept_only(steps): 25 | for sid, (_, estimator) in enumerate(steps): 26 | if hasattr(estimator, "estimator"): 27 | # Is a searcher such as GridSearchCV. 28 | fit_intercept = estimator.estimator.fit_intercept 29 | else: 30 | fit_intercept = estimator.fit_intercept 31 | if fit_intercept and sid > 0: 32 | return False 33 | return True 34 | 35 | 36 | def _no_nested_stepwise(steps): 37 | for _, estimator in steps: 38 | if isinstance(estimator, StepwiseEstimator): 39 | return False 40 | return True 41 | 42 | 43 | class StepwiseEstimator(_BaseComposition, RegressorMixin, LinearModel): 44 | """A composite estimator used to do stepwise fitting. 45 | 46 | The first estimator in the composite will be used to fit 47 | certain features (a piece of the feature matrix) to the 48 | target vector, and the residuals are fitted to the rest 49 | of features by using the next estimators in the composite. 50 | 51 | Each estimator can be either a CVXEstimator, a GridSearchCV or 52 | a LineSearchCV. 53 | 54 | Args: 55 | steps (list[(str, CVXEstimator)]): 56 | A list of step names and the CVXEstimators to use 57 | for each step. StepwiseEstimator cannot be used as 58 | a member of StepwiseEstimator. 59 | An estimator will fit the residuals of the previous 60 | estimator fits in the list. 61 | estimator_feature_indices (tuple[tuple[int]]): 62 | Scope of each estimator, which means the indices of 63 | features in the scope (features[:, scope]) will be 64 | fitted to the residual using the corresponding estimator. 65 | Notice: 66 | If estimators in the composite requires hierarchy 67 | or groups, the indices in the groups or hierarchy 68 | must be adjusted such that they correspond to the groups 69 | or hierarchy relations in the part of features sliced 70 | by scope. 71 | For example, consider original groups = [0, 1, 1, 2, 2], 72 | and an estimator has scope = [3, 4], then the estimator 73 | should be initialized with group = [0, 0]. 74 | You are fully responsible to initialize the estimators 75 | with correct hierarchy, groups and other parameters before 76 | wrapping them up with the composite! 77 | 78 | Note: 79 | 1. Do not use GridSearchCV or LineSearchCV to search a StepwiseEstimator! 80 | 81 | 2. No nesting is allowed for StepwiseEstimator, which means no step of a 82 | StepwiseEstimator can be a StepwiseEstimator. 83 | 84 | 3. Since stepwise estimator requires specifying a list of feature indices for 85 | each step estimator, it requires fixing n_features_in_ before fitting, which 86 | violates sklearn convention for a regressor. Therefore, StepwiseEstimator is 87 | not checked by sklearn check_estimator method, and there is no guarantee that it 88 | is fully compatible with all scikit-learn features. 89 | """ 90 | 91 | def __init__( 92 | self, 93 | steps, 94 | estimator_feature_indices, 95 | ): 96 | self.steps = steps 97 | # The estimator_feature_indices saved must be tuple because in 98 | # sklearn.base.clone, a cloned object is checked by pointer, rather than 99 | # by value. 100 | self.estimator_feature_indices = estimator_feature_indices 101 | 102 | # These parameters settings does not need to be called externally. 103 | def get_params(self, deep=True): 104 | """Get parameters of all estimators in the composite. 105 | 106 | Args: 107 | deep(bool): 108 | If True, will return the parameters for estimators in 109 | composite, and their contained sub-objects if they are 110 | also estimators. 111 | """ 112 | return self._get_params("steps", deep=deep) 113 | 114 | def set_params(self, **params): 115 | """Set parameters for each estimator in the composite. 116 | 117 | This will be called when model selection optimizes 118 | all hyper parameters. 119 | 120 | Args: 121 | params: A Dictionary of parameters. Each parameter 122 | name must end with an underscore and a number to specify 123 | on which estimator in the composite the parameter is 124 | going to be set. 125 | Remember only to set params you wish to optimize! 126 | """ 127 | self._set_params("steps", **params) 128 | return self 129 | 130 | @staticmethod 131 | def _get_estimator_coef(estimator): 132 | check_is_fitted(estimator) 133 | if hasattr(estimator, "best_estimator_"): 134 | return estimator.best_estimator_.coef_.copy() 135 | elif hasattr(estimator, "coef_"): 136 | return estimator.coef_.copy() 137 | else: 138 | raise ValueError(f"Estimator {estimator} is not a valid linear model!") 139 | 140 | @staticmethod 141 | def _get_estimator_intercept(estimator): 142 | check_is_fitted(estimator) 143 | if hasattr(estimator, "best_estimator_"): 144 | return estimator.best_estimator_.intercept_ 145 | elif hasattr(estimator, "intercept_"): 146 | return estimator.intercept_ 147 | else: 148 | raise ValueError(f"Estimator {estimator} is not a valid linear model!") 149 | 150 | def fit( 151 | self, 152 | X: NDArray, 153 | y: NDArray, 154 | sample_weight: NDArray[np.floating] | None = None, 155 | *args, 156 | **kwargs, 157 | ): 158 | """Prepare fit input with sklearn help then call fit method. 159 | 160 | Args: 161 | X (NDArray): 162 | Training data of shape (n_samples, n_features). 163 | y (NDArray): 164 | Target values. Will be cast to X's dtype if necessary 165 | of shape (n_samples,) or (n_samples, n_targets) 166 | sample_weight (NDArray): 167 | Individual weights for each sample of shape (n_samples,) 168 | default=None 169 | *args: 170 | Positional arguments passed to _fit method 171 | **kwargs: 172 | Keyword arguments passed to _fit method 173 | Returns: 174 | instance of self 175 | """ 176 | # Check estimators and feature indices. 177 | if not _indices_no_overlap_and_continuous(self.estimator_feature_indices): 178 | raise InvalidParameterError( 179 | f"Given feature indices:" 180 | f" {self.estimator_feature_indices}" 181 | f" are not continuous and non-overlapping" 182 | f" series starting from 0!" 183 | ) 184 | if not _first_step_fit_intercept_only(self.steps): 185 | raise InvalidParameterError( 186 | "Only the first estimator in steps is allowed" " to fit intercept!" 187 | ) 188 | if not _no_nested_stepwise(self.steps): 189 | raise InvalidParameterError( 190 | "StepwiseEstimator should not be nested with" 191 | " another StepwiseEstimator!" 192 | ) 193 | 194 | self.n_features_in_ = len(list(chain(*self.estimator_feature_indices))) 195 | 196 | # Set ensute_2d to True and reset to False so that it triggers number of 197 | # features checking. 198 | X, y = self._validate_data( 199 | X, 200 | y, 201 | accept_sparse=False, 202 | ensure_2d=True, 203 | y_numeric=True, 204 | multi_output=True, 205 | reset=False, 206 | ) 207 | 208 | if sample_weight is not None: 209 | sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype) 210 | 211 | residuals = y.copy() 212 | 213 | self.coef_ = np.empty(X.shape[1]) 214 | self.coef_.fill(np.nan) 215 | for (_, estimator), scope in zip(self.steps, self.estimator_feature_indices): 216 | # np.array indices should not be tuple. 217 | estimator.fit( 218 | X[:, list(scope)], 219 | residuals, 220 | *args, 221 | sample_weight=sample_weight, 222 | **kwargs, 223 | ) 224 | self.coef_[list(scope)] = self._get_estimator_coef(estimator) 225 | residuals = residuals - estimator.predict(X[:, list(scope)]) 226 | # Only the first estimator is allowed to fit intercept. 227 | if hasattr(self.steps[0][1], "estimator"): 228 | fit_intercept = self.steps[0][1].estimator.fit_intercept 229 | else: 230 | fit_intercept = self.steps[0][1].fit_intercept 231 | if fit_intercept: 232 | self.intercept_ = self._get_estimator_intercept(self.steps[0][1]) 233 | else: 234 | self.intercept_ = 0.0 235 | 236 | # return self for chaining fit and predict calls 237 | return self 238 | -------------------------------------------------------------------------------- /src/sparselm/tools.py: -------------------------------------------------------------------------------- 1 | """A variety of tools for fitting linear regression models to polish CE.""" 2 | 3 | from __future__ import annotations 4 | 5 | __author__ = "Luis Barroso-Luque" 6 | 7 | import warnings 8 | from functools import wraps 9 | 10 | import numpy as np 11 | from numpy.typing import NDArray 12 | 13 | 14 | def constrain_coefficients( 15 | indices: NDArray, 16 | high: NDArray[np.floating] | float | None = None, 17 | low: NDArray[np.floating] | float | None = None, 18 | ): 19 | """Constrain a fit method to keep coefficients within a specified range. 20 | 21 | Use this as a standard decorator with parameters: 22 | - At runtime: 23 | coefs = constrain_coefficients(indices, high, low)(fit_method)(X, y) 24 | - In fit_method definitions: 25 | @constrain_coefficients(indices, high, low) 26 | def your_fit_method(X, y): 27 | 28 | Args: 29 | indices (array or list): 30 | indices of coefficients to constrain 31 | high (float or array): 32 | upper bound for indices, 33 | low (float or array): 34 | lower bounds for indices 35 | """ 36 | indices = np.array(indices) 37 | if high is not None: 38 | high = ( 39 | high * np.ones(len(indices)) 40 | if isinstance(high, (int, float)) 41 | else np.array(high) 42 | ) 43 | else: 44 | high = np.inf * np.ones(len(indices)) 45 | if low is not None: 46 | low = ( 47 | low * np.ones(len(indices)) 48 | if isinstance(low, (int, float)) 49 | else np.array(low) 50 | ) 51 | else: 52 | low = -np.inf * np.ones(len(indices)) 53 | 54 | def decorate_fit_method(fit_method): 55 | """Decorate a fit method to constrain "dielectric constant". 56 | 57 | Args: 58 | fit_method (callable): 59 | the fit_method you will use to fit your regression model. 60 | Must take the feature matrix X and target vector y as first 61 | arguments. (i.e. fit_method(X, y, *args, **kwargs) 62 | """ 63 | 64 | @wraps(fit_method) 65 | def wrapped(X, y, *args, **kwargs): 66 | coefs = fit_method(X, y, *args, **kwargs) 67 | above_range = coefs[indices] > high 68 | below_range = coefs[indices] < low 69 | 70 | # TODO do not set features to zero, do the fit without them instead 71 | if sum(above_range) > 0 or sum(below_range) > 0: 72 | X_, y_ = X.copy(), y.copy() 73 | y_ -= np.sum(X_[:, indices[above_range]] * high[above_range], axis=1) 74 | X_[:, indices[above_range]] = 0.0 75 | y_ -= np.sum(X_[:, indices[below_range]] * low[below_range], axis=1) 76 | X_[:, indices[below_range]] = 0.0 77 | coefs = fit_method(X_, y_, *args, **kwargs) 78 | coefs[indices[above_range]] = high[above_range] 79 | coefs[indices[below_range]] = low[below_range] 80 | 81 | # check if new coeficients are now out of range 82 | above_range = coefs[indices] > high 83 | below_range = coefs[indices] < low 84 | if sum(above_range) > 0 or sum(below_range) > 0: 85 | warnings.warn( 86 | "Running the constrained fit has resulted in new out of" 87 | " range coefficients that were not so in the unconstrained" 88 | " fit.\n" 89 | "Double check the sensibility of the bounds you provided!", 90 | RuntimeWarning, 91 | ) 92 | 93 | return coefs 94 | 95 | return wrapped 96 | 97 | return decorate_fit_method 98 | 99 | 100 | def r2_score_to_cv_error( 101 | score: float, 102 | y: NDArray, 103 | y_pred: NDArray, 104 | weights: NDArray[np.floating] | None = None, 105 | ): 106 | """Convert r2 score to cross-validation error. 107 | 108 | Args: 109 | score (float): 110 | An r2 score obtained from cross validation. 111 | y (NDArray): 1D 112 | The target vector. 113 | y_pred (NDArray): 1D 114 | The fitted vector. 115 | weights (NDArray): 1D 116 | The weights of each sample. Default to 1. 117 | 118 | Returns: 119 | float: 120 | The CV error 121 | """ 122 | if weights is None: 123 | weights = np.ones(len(y)) 124 | weights = np.array(weights) 125 | if len(weights) != len(y): 126 | raise ValueError("Weights given but not the same length as sample.") 127 | if np.any(weights < 0) or np.allclose(weights, 0): 128 | raise ValueError("Weights can not be negative or all zero.") 129 | 130 | denominator = (weights * (y - y_pred) ** 2).sum() / weights.sum() 131 | return np.sqrt((1 - score) * denominator) 132 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from sklearn.datasets import make_regression, make_sparse_coded_signal 4 | 5 | SEED = 0 6 | 7 | # A few solvers to test for convex problems 8 | # ECOS sometimes fails for Adaptive group estimators, but is fast 9 | # SCS and CXVOPT are reliable, but slower 10 | # GUROBI is best 11 | CONVEX_SOLVERS = ["GUROBI", "ECOS"] # SCS, GUROBI, CVXOPT 12 | 13 | # ECOS_BB is open source alternative, but much slower, and can get things wrong 14 | MIQP_SOLVERS = ["GUROBI"] # SCIP fails some tests... 15 | 16 | # Set to small values bc gurobi non-commercial can not solver large model. 17 | N_FEATURES = [20, 30] # an overdetermined and underdetermined case 18 | N_SAMPLES = 25 19 | N_INFORMATIVE = 10 20 | 21 | 22 | @pytest.fixture(scope="package") 23 | def rng(): 24 | """Seed and return an RNG for test reproducibility""" 25 | return np.random.default_rng(SEED) 26 | 27 | 28 | @pytest.fixture(params=CONVEX_SOLVERS) 29 | def solver(request): 30 | return request.param 31 | 32 | 33 | @pytest.fixture(params=MIQP_SOLVERS) 34 | def miqp_solver(request): 35 | return request.param 36 | 37 | 38 | @pytest.fixture(scope="package", params=N_FEATURES) 39 | def random_model(rng, request): 40 | """Returns a fully random set of X, y, and beta representing a linear model.""" 41 | X, y, beta = make_regression( 42 | n_samples=N_SAMPLES, 43 | n_features=request.param, 44 | n_informative=N_INFORMATIVE, 45 | coef=True, 46 | random_state=rng.integers(0, 2**32 - 1), 47 | bias=10 * rng.random(), 48 | ) 49 | return X, y, beta 50 | 51 | 52 | @pytest.fixture(scope="package", params=N_FEATURES) 53 | def random_energy_model(rng, request): 54 | """Returns a random set of X, y, and beta with added gaussian noise for a linear 55 | model with sparse coefficients beta decay (on average) exponentially with the index 56 | of the coefficient. 57 | """ 58 | X = rng.random((N_SAMPLES, request.param)) 59 | beta = np.zeros(request.param) # coefficients 60 | non_zero_ids = rng.choice(request.param, size=N_INFORMATIVE, replace=False) 61 | non_zero_ids = np.array(np.round(non_zero_ids), dtype=int) 62 | 63 | for idx in non_zero_ids: 64 | eci = 0 65 | mag = np.exp(-0.5 * idx) 66 | while np.isclose(eci, 0): 67 | eci = (rng.random() - 0.5) * 2 * mag 68 | beta[idx] = eci 69 | y = X @ beta + rng.normal(size=N_SAMPLES) * 2e-3 # fake energies 70 | return X, y, beta 71 | 72 | 73 | @pytest.fixture(scope="package") 74 | def sparse_coded_signal(rng): 75 | n_components, n_features, n_nonzero = 24, 12, 6 76 | y, X, beta = make_sparse_coded_signal( 77 | n_samples=1, 78 | n_components=n_components, 79 | n_features=n_features, 80 | n_nonzero_coefs=n_nonzero, 81 | random_state=rng.integers(0, 2**32 - 1), 82 | ) 83 | return X, y, beta 84 | 85 | 86 | @pytest.fixture(params=[4, 6], scope="package") 87 | def random_model_with_groups(random_model, rng, request): 88 | """Add a correct set of groups to model.""" 89 | X, y, beta = random_model 90 | n_groups = request.param 91 | n_active_groups = n_groups // 3 + 1 92 | 93 | n_features_per_group = len(beta) // n_groups 94 | active_group_inds = rng.choice(range(n_groups), size=n_active_groups, replace=False) 95 | inactive_group_inds = np.setdiff1d(range(n_groups), active_group_inds) 96 | 97 | groups = np.zeros(len(beta), dtype=int) 98 | active_feature_inds = np.where(abs(beta) > 0)[0] 99 | inactive_feature_inds = np.setdiff1d(np.arange(len(beta)), active_feature_inds) 100 | 101 | # set active groups 102 | for i in active_group_inds: 103 | if len(active_feature_inds) > n_features_per_group: 104 | group_inds = rng.choice( 105 | active_feature_inds, size=n_features_per_group, replace=False 106 | ) 107 | else: 108 | group_inds = active_feature_inds 109 | groups[group_inds] = i 110 | active_feature_inds = np.setdiff1d(active_feature_inds, group_inds) 111 | 112 | # set inactive_groups 113 | for i in inactive_group_inds: 114 | if len(inactive_feature_inds) > n_features_per_group: 115 | group_inds = rng.choice( 116 | inactive_feature_inds, size=n_features_per_group, replace=False 117 | ) 118 | else: 119 | group_inds = inactive_feature_inds 120 | groups[group_inds] = i 121 | inactive_feature_inds = np.setdiff1d(inactive_feature_inds, group_inds) 122 | 123 | return X, y, beta, groups 124 | -------------------------------------------------------------------------------- /tests/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | minversion = 5.3.0 3 | -------------------------------------------------------------------------------- /tests/test_common.py: -------------------------------------------------------------------------------- 1 | """General tests for all linear models. 2 | 3 | Simply check that they execute successfully on random data. 4 | """ 5 | 6 | from inspect import getmembers, isclass, signature 7 | 8 | import cvxpy as cp 9 | import numpy as np 10 | import pytest 11 | from cvxpy.error import SolverError 12 | from sklearn.utils.estimator_checks import check_estimator 13 | from sklearn.utils.fixes import threadpool_info 14 | 15 | import sparselm.model as spm 16 | from sparselm.model._miqp._base import MIQPl0 17 | 18 | ESTIMATORS = getmembers(spm, isclass) 19 | ESTIMATOR_NAMES = [est[0] for est in ESTIMATORS] 20 | ESTIMATORS = [est[1] for est in ESTIMATORS] # type: ignore 21 | 22 | 23 | @pytest.fixture(params=ESTIMATORS, ids=ESTIMATOR_NAMES) 24 | def estimator(request): 25 | estimator_cls = request.param 26 | if issubclass(estimator_cls, MIQPl0): 27 | regressor = estimator_cls(fit_intercept=True, solver="SCIP") 28 | if hasattr(regressor, "eta"): 29 | regressor.eta = 0.01 30 | return regressor 31 | return estimator_cls(fit_intercept=True, solver="ECOS") 32 | 33 | 34 | @pytest.mark.parametrize("estimator_cls", ESTIMATORS) 35 | def test_general_fit(estimator_cls, random_model, rng): 36 | X, y, beta = random_model 37 | 38 | # instantiate the estimator 39 | sig = signature(estimator_cls) 40 | 41 | # check for necessary parameters 42 | args = {} 43 | if "groups" in sig.parameters: 44 | args["groups"] = rng.integers(0, 5, size=len(beta)) 45 | if "group_list" in sig.parameters: 46 | args["group_list"] = [ 47 | np.sort(rng.choice(range(5), replace=False, size=rng.integers(1, 5))) 48 | for _ in range(len(beta)) 49 | ] 50 | if "sparse_bound" in sig.parameters: 51 | args["sparse_bound"] = 12 52 | 53 | estimator = estimator_cls(**args) 54 | estimator.fit(X, y) 55 | # assert a value of coefficients has been set correctly 56 | assert isinstance(estimator.coef_, np.ndarray) 57 | assert len(estimator.coef_) == len(beta) 58 | assert len(estimator.predict(X)) == len(y) 59 | assert estimator.intercept_ == 0.0 60 | 61 | estimator = estimator_cls(fit_intercept=True, **args) 62 | estimator.fit(X, y) 63 | # assert a value of coefficients has been set correctly 64 | assert isinstance(estimator.coef_, np.ndarray) 65 | assert len(estimator.coef_) == len(beta) 66 | assert len(estimator.predict(X)) == len(y) 67 | assert estimator.intercept_ != 0.0 68 | 69 | 70 | @pytest.mark.xfail(raises=SolverError) 71 | def test_add_constraints(estimator, random_model, rng): 72 | with pytest.raises(RuntimeError): 73 | estimator.add_constraints([cp.Variable(1) >= 0]) 74 | 75 | X, y, beta = random_model 76 | estimator.generate_problem(X, y) 77 | n_constraints = len(estimator.canonicals_.constraints) 78 | # a dummy constraint 79 | estimator.add_constraints([estimator.canonicals_.beta >= 0.0]) 80 | assert len(estimator.canonicals_.problem.constraints) == n_constraints + 1 81 | assert len(estimator.canonicals_.user_constraints) == 1 82 | assert len(estimator.canonicals_.constraints) == n_constraints 83 | 84 | # force cache data 85 | # ( solving the model sometimes fails and we only want to check that a warning is 86 | # raised ) 87 | estimator.cached_X_ = X 88 | estimator.cached_y_ = y 89 | 90 | new_X = rng.random(X.shape) 91 | with pytest.warns(UserWarning): 92 | estimator.fit(new_X, y) 93 | 94 | 95 | @pytest.mark.xfail( 96 | any( 97 | True 98 | for info in threadpool_info() 99 | if info["internal_api"] == "openblas" 100 | # Prudently assume Prescott might be the architecture if it is unknown. 101 | and info.get("architecture", "prescott").lower() == "prescott" 102 | ), 103 | reason="On Github runner above is true and sklearn will throw an error by trying to create_mmemap_backed_arrays " 104 | "with an estimator.", 105 | ) 106 | def test_sklearn_compatible(estimator): 107 | """Test sklearn compatibility with no parameter inputs.""" 108 | check_estimator(estimator) 109 | -------------------------------------------------------------------------------- /tests/test_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.testing as npt 3 | import pytest 4 | 5 | from sparselm.dataset import make_group_regression 6 | 7 | 8 | @pytest.mark.parametrize("n_informative_groups", [5, 20]) 9 | @pytest.mark.parametrize("n_features_per_group", [5, 4 * list(range(2, 7))]) 10 | @pytest.mark.parametrize("frac_informative_in_group", [1.0, 0.5]) 11 | @pytest.mark.parametrize("shuffle", [True, False]) 12 | @pytest.mark.parametrize("coef", [True, False]) 13 | def test_make_group_regression( 14 | n_informative_groups, n_features_per_group, frac_informative_in_group, shuffle, coef 15 | ): 16 | model = make_group_regression( 17 | n_informative_groups=n_informative_groups, 18 | n_features_per_group=n_features_per_group, 19 | frac_informative_in_group=frac_informative_in_group, 20 | shuffle=shuffle, 21 | coef=coef, 22 | ) 23 | 24 | assert len(model) == 4 if coef else 3 25 | 26 | if coef: 27 | X, y, groups, coefs = model 28 | else: 29 | X, y, groups = model 30 | 31 | if not isinstance(n_features_per_group, list): 32 | n_features_per_group = [n_features_per_group] * 20 33 | 34 | n_features = ( 35 | sum(n_features_per_group) 36 | if isinstance(n_features_per_group, list) 37 | else 20 * n_features_per_group 38 | ) 39 | 40 | assert X.shape == (100, n_features) 41 | assert y.shape == (100,) 42 | assert groups.shape == (n_features,) 43 | assert len(np.unique(groups)) == 20 44 | 45 | if coef: 46 | n_informative = sum( 47 | round(frac_informative_in_group * n_features_per_group[i]) 48 | for i in range(n_informative_groups) 49 | ) 50 | 51 | assert coefs.shape == (n_features,) 52 | assert sum(coef > 0 for coef in coefs) == n_informative 53 | npt.assert_array_almost_equal(np.dot(X, coefs), y) 54 | 55 | if shuffle: 56 | # check that not all groups are lumped together 57 | assert sum(np.diff(groups) == 0) < 20 - 1 58 | 59 | # check warning 60 | with pytest.warns(UserWarning): 61 | make_group_regression(frac_informative_in_group=1 / 100) 62 | -------------------------------------------------------------------------------- /tests/test_lasso.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.testing as npt 3 | import pytest 4 | from cvxpy.error import SolverError 5 | 6 | from sparselm.model import ( 7 | AdaptiveGroupLasso, 8 | AdaptiveLasso, 9 | AdaptiveOverlapGroupLasso, 10 | AdaptiveRidgedGroupLasso, 11 | AdaptiveSparseGroupLasso, 12 | GroupLasso, 13 | Lasso, 14 | OverlapGroupLasso, 15 | SparseGroupLasso, 16 | ) 17 | 18 | ADAPTIVE_ESTIMATORS = [ 19 | AdaptiveLasso, 20 | AdaptiveGroupLasso, 21 | AdaptiveSparseGroupLasso, 22 | AdaptiveOverlapGroupLasso, 23 | AdaptiveRidgedGroupLasso, 24 | ] 25 | 26 | THRESHOLD = 1e-8 # relative threshold 27 | 28 | 29 | def test_lasso_toy(): 30 | # Borrowed from sklearn tests 31 | # Test Lasso on a toy example for various values of alpha. 32 | # When validating this against glmnet notice that glmnet divides it 33 | # against nobs. 34 | 35 | X = [[-1], [0], [1]] 36 | Y = [-1, 0, 1] # just a straight line 37 | T = [[2], [3], [4]] # test sample 38 | 39 | lasso = Lasso(alpha=1e-8) 40 | lasso.fit(X, Y) 41 | pred = lasso.predict(T) 42 | npt.assert_array_almost_equal(lasso.coef_, [1]) 43 | npt.assert_array_almost_equal(pred, [2, 3, 4]) 44 | 45 | lasso = Lasso(alpha=0.1) 46 | lasso.fit(X, Y) 47 | pred = lasso.predict(T) 48 | npt.assert_array_almost_equal(lasso.coef_, [0.85]) 49 | npt.assert_array_almost_equal(pred, [1.7, 2.55, 3.4]) 50 | 51 | lasso = Lasso(alpha=0.5) 52 | lasso.fit(X, Y) 53 | pred = lasso.predict(T) 54 | npt.assert_array_almost_equal(lasso.coef_, [0.25]) 55 | npt.assert_array_almost_equal(pred, [0.5, 0.75, 1.0]) 56 | 57 | lasso = Lasso(alpha=1.0) 58 | lasso.fit(X, Y) 59 | pred = lasso.predict(T) 60 | npt.assert_array_almost_equal(lasso.coef_, [0.0]) 61 | npt.assert_array_almost_equal(pred, [0, 0, 0]) 62 | 63 | 64 | def test_lasso_non_float_y(): 65 | # Borrowed from sklearn tests 66 | X = [[0, 0], [1, 1], [-1, -1]] 67 | y = [0, 1, 2] 68 | y_float = [0.0, 1.0, 2.0] 69 | 70 | lasso = Lasso(fit_intercept=False) 71 | lasso.fit(X, y) 72 | lasso_float = Lasso(fit_intercept=False) 73 | lasso_float.fit(X, y_float) 74 | npt.assert_array_equal(lasso.coef_, lasso_float.coef_) 75 | 76 | 77 | def test_adaptive_lasso_sparser(random_model): 78 | X, y, _ = random_model 79 | lasso = Lasso(fit_intercept=True) 80 | alasso = AdaptiveLasso(fit_intercept=True) 81 | 82 | lasso.fit(X, y) 83 | alasso.fit(X, y) 84 | 85 | assert sum(abs(lasso.coef_) > THRESHOLD) >= sum(abs(alasso.coef_) > THRESHOLD) 86 | 87 | 88 | # TODO flakey test, depends on THRESHOLD value 89 | @pytest.mark.xfail(raises=SolverError) 90 | @pytest.mark.parametrize( 91 | "standardize", 92 | [True, False], 93 | ) # standardize=False leads to failures 94 | def test_group_lasso(random_model_with_groups, solver, standardize): 95 | X, y, _, groups = random_model_with_groups 96 | 97 | aglasso = AdaptiveGroupLasso( 98 | groups=groups, 99 | alpha=0.1, 100 | fit_intercept=True, 101 | standardize=standardize, 102 | solver=solver, 103 | ) 104 | aglasso.fit(X, y) 105 | 106 | # check that if all coefs in groups are consistent 107 | for gid in np.unique(groups): 108 | m = np.max(abs(aglasso.coef_)) 109 | all_active = (abs(aglasso.coef_[groups == gid]) > m * THRESHOLD).all() 110 | all_inactive = (abs(aglasso.coef_[groups == gid]) <= m * THRESHOLD).all() 111 | assert all_active or all_inactive 112 | 113 | 114 | @pytest.mark.xfail(raises=SolverError) 115 | @pytest.mark.parametrize( 116 | "standardize", 117 | [True, False], 118 | ) 119 | def test_group_lasso_weights(random_model_with_groups, solver, standardize): 120 | X, y, _, groups = random_model_with_groups 121 | 122 | group_weights = np.ones(len(np.unique(groups))) 123 | 124 | aglasso = AdaptiveGroupLasso( 125 | groups=groups, 126 | alpha=0.1, 127 | group_weights=group_weights, 128 | fit_intercept=True, 129 | standardize=standardize, 130 | solver=solver, 131 | ) 132 | aglasso.fit(X, y) 133 | 134 | rglasso = AdaptiveRidgedGroupLasso( 135 | groups=groups, 136 | alpha=0.1, 137 | group_weights=group_weights, 138 | fit_intercept=True, 139 | standardize=standardize, 140 | solver=solver, 141 | ) 142 | rglasso.fit(X, y) 143 | 144 | # check that if all coefs in groups are consistent 145 | for gid in np.unique(groups): 146 | m = np.max(abs(aglasso.coef_)) 147 | 148 | all_active = (abs(aglasso.coef_[groups == gid]) > m * THRESHOLD).all() 149 | all_inactive = (abs(aglasso.coef_[groups == gid]) <= m * THRESHOLD).all() 150 | assert all_active or all_inactive 151 | 152 | m = np.max(abs(rglasso.coef_)) 153 | all_active = (abs(rglasso.coef_[groups == gid]) > m * THRESHOLD).all() 154 | all_inactive = (abs(rglasso.coef_[groups == gid]) <= m * THRESHOLD).all() 155 | assert all_active or all_inactive 156 | 157 | 158 | @pytest.mark.xfail(raises=SolverError) 159 | @pytest.mark.parametrize("estimator_cls", ADAPTIVE_ESTIMATORS) 160 | def test_adaptive_weights(estimator_cls, random_model_with_groups, solver, rng): 161 | X, y, beta, groups = random_model_with_groups 162 | 163 | if estimator_cls.__name__ == "AdaptiveLasso": 164 | estimator = estimator_cls(solver=solver) 165 | elif estimator_cls.__name__ == "AdaptiveOverlapGroupLasso": 166 | gids = np.unique(groups) 167 | group_list = [ 168 | rng.choice(gids, replace=False, size=rng.integers(1, 3)) 169 | for _ in range(len(beta)) 170 | ] 171 | estimator = estimator_cls(group_list=group_list, solver=solver) 172 | else: 173 | estimator = estimator_cls(groups=groups, solver=solver) 174 | 175 | # force generating weights 176 | estimator.generate_problem(X, y) 177 | 178 | if estimator_cls.__name__ == "AdaptiveSparseGroupLasso": 179 | weights = [ 180 | estimator.canonicals_.parameters.adaptive_coef_weights.value.copy(), 181 | estimator.canonicals_.parameters.adaptive_group_weights.value.copy(), 182 | ] 183 | else: 184 | weights = [estimator.canonicals_.parameters.adaptive_weights.value.copy()] 185 | 186 | estimator.fit(X, y) 187 | 188 | if estimator_cls.__name__ == "AdaptiveSparseGroupLasso": 189 | new_weights = [ 190 | estimator.canonicals_.parameters.adaptive_coef_weights.value.copy(), 191 | estimator.canonicals_.parameters.adaptive_group_weights.value.copy(), 192 | ] 193 | else: 194 | new_weights = [estimator.canonicals_.parameters.adaptive_weights.value.copy()] 195 | 196 | # simply check that the weights are updated. 197 | # TODO a better check would be to check that weights for active groups/coefs 198 | # are smaller than those of inactive ones 199 | for nw, w in zip(new_weights, weights): 200 | assert not any(nw_i == pytest.approx(w_i) for nw_i, w_i in zip(nw, w)) 201 | 202 | 203 | def test_bad_inputs(random_model_with_groups, rng): 204 | X, y, beta, groups = random_model_with_groups 205 | bad_groups = rng.integers(0, 6, size=len(beta) - 1) 206 | group_weights = np.ones(len(np.unique(bad_groups))) 207 | 208 | # test that warns when no groups given 209 | with pytest.warns(UserWarning): 210 | gl = GroupLasso() 211 | gl.fit(X, y) 212 | 213 | with pytest.warns(UserWarning): 214 | gl = OverlapGroupLasso() 215 | gl.fit(X, y) 216 | 217 | # bad groups 218 | with pytest.raises(ValueError): 219 | gl = GroupLasso(bad_groups, group_weights=group_weights) 220 | gl.fit(X, y) 221 | 222 | with pytest.raises(TypeError): 223 | gl = GroupLasso("groups", group_weights=group_weights) 224 | gl.fit(X, y) 225 | 226 | # bad group_weights 227 | with pytest.raises(ValueError): 228 | group_weights = np.ones(len(np.unique(bad_groups)) - 1) 229 | gl = GroupLasso(bad_groups, group_weights=group_weights) 230 | gl.fit(X, y) 231 | 232 | with pytest.raises(TypeError): 233 | gl = GroupLasso(groups, group_weights="weights") 234 | gl.fit(X, y) 235 | 236 | # bad l1_ratio 237 | lasso = SparseGroupLasso(groups) 238 | with pytest.raises(ValueError): 239 | lasso.l1_ratio = -1.0 240 | lasso.fit(X, y) 241 | 242 | with pytest.raises(ValueError): 243 | lasso.l1_ratio = 2.0 244 | lasso.fit(X, y) 245 | 246 | with pytest.raises(ValueError): 247 | sgl = SparseGroupLasso(groups, l1_ratio=-1.0) 248 | sgl.fit(X, y) 249 | 250 | with pytest.raises(ValueError): 251 | sgl = SparseGroupLasso(groups, l1_ratio=2.0) 252 | sgl.fit(X, y) 253 | 254 | # test that it warns 255 | with pytest.warns(UserWarning): 256 | sgl = SparseGroupLasso(groups, l1_ratio=0.0) 257 | sgl.fit(X, y) 258 | with pytest.warns(UserWarning): 259 | sgl = SparseGroupLasso(groups, l1_ratio=1.0) 260 | sgl.fit(X, y) 261 | 262 | 263 | @pytest.mark.parametrize("estimator_cls", ADAPTIVE_ESTIMATORS) 264 | def test_set_parameters(estimator_cls, random_model_with_groups, rng): 265 | X, y, beta, groups = random_model_with_groups 266 | 267 | if estimator_cls.__name__ == "AdaptiveLasso": 268 | estimator = estimator_cls() 269 | elif estimator_cls.__name__ == "AdaptiveOverlapGroupLasso": 270 | gids = np.unique(groups) 271 | group_list = [ 272 | rng.choice(gids, replace=False, size=rng.integers(1, 3)) 273 | for _ in range(len(beta)) 274 | ] 275 | estimator = estimator_cls(group_list=group_list) 276 | else: 277 | estimator = estimator_cls(groups=groups) 278 | 279 | estimator.alpha = 0.5 280 | assert estimator.alpha == 0.5 281 | estimator.generate_problem(X, y) 282 | assert estimator.canonicals_.parameters.alpha.value == 0.5 283 | 284 | if hasattr(estimator, "l1_ratio"): 285 | # default l1_ratio is 0.5 286 | assert estimator.canonicals_.parameters.lambda1.value == 0.5 * 0.5 287 | assert estimator.canonicals_.parameters.lambda2.value == 0.5 * 0.5 288 | 289 | estimator.l1_ratio = 0.25 290 | estimator._set_param_values() 291 | assert estimator.l1_ratio == 0.25 292 | assert estimator.canonicals_.parameters.lambda1.value == 0.25 * 0.5 293 | assert estimator.canonicals_.parameters.lambda2.value == 0.75 * 0.5 294 | 295 | if hasattr(estimator, "delta"): 296 | estimator.delta = (4.0,) 297 | estimator._set_param_values() 298 | npt.assert_array_equal( 299 | estimator.canonicals_.parameters.delta.value, 300 | 4.0 * np.ones(len(np.unique(groups))), 301 | ) 302 | 303 | estimator.delta = 3.0 * np.ones(len(np.unique(groups))) 304 | estimator._set_param_values() 305 | npt.assert_array_equal(estimator.delta, 3.0 * np.ones(len(np.unique(groups)))) 306 | npt.assert_array_equal( 307 | estimator.canonicals_.parameters.delta.value, 308 | 3.0 * np.ones(len(np.unique(groups))), 309 | ) 310 | -------------------------------------------------------------------------------- /tests/test_miqp.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.testing as npt 3 | import pytest 4 | 5 | from sparselm.model import ( 6 | L2L0, 7 | BestSubsetSelection, 8 | RegularizedL0, 9 | RidgedBestSubsetSelection, 10 | ) 11 | 12 | # exclude L1L0 since it breaks hierarchy constraints... 13 | MIQP_estimators = [ 14 | BestSubsetSelection, 15 | RidgedBestSubsetSelection, 16 | RegularizedL0, 17 | L2L0, 18 | ] 19 | 20 | THRESHOLD = 1e-12 21 | 22 | 23 | def assert_hierarchy_respected(coef, slack_z, hierarchy, groups=None): 24 | groups = groups if groups is not None else np.arange(len(coef)) 25 | group_ids = np.unique(groups) 26 | for grp_id, active, parents in zip(group_ids, slack_z, hierarchy): 27 | if active == 1: # all parents must also be active 28 | assert all( 29 | (abs(coef[groups == parent]) >= THRESHOLD).all() for parent in parents 30 | ) 31 | 32 | 33 | def test_perfect_signal_recovery(sparse_coded_signal): 34 | X, y, beta = sparse_coded_signal 35 | X = X.T 36 | 37 | (idx,) = beta.nonzero() 38 | 39 | estimator = BestSubsetSelection(sparse_bound=np.count_nonzero(beta)) 40 | estimator.fit(X, y) 41 | 42 | npt.assert_array_equal(idx, np.flatnonzero(estimator.coef_)) 43 | npt.assert_array_almost_equal(beta, estimator.coef_) 44 | 45 | r_estimator = RidgedBestSubsetSelection(sparse_bound=np.count_nonzero(beta)) 46 | 47 | # very low regularization should be the same 48 | r_estimator.eta = 1e-16 49 | r_estimator.fit(X, y) 50 | npt.assert_array_almost_equal(beta, r_estimator.coef_) 51 | npt.assert_array_equal(idx, np.flatnonzero(r_estimator.coef_)) 52 | assert all(i in np.flatnonzero(r_estimator.coef_) for i in idx) 53 | 54 | # a bit higher regularization, check shrinkage 55 | coef = r_estimator.coef_.copy() 56 | r_estimator.eta = 1e-4 57 | r_estimator.fit(X, y) 58 | npt.assert_array_almost_equal(beta, r_estimator.coef_, decimal=1) 59 | assert np.linalg.norm(coef) > np.linalg.norm(r_estimator.coef_) 60 | 61 | # very sensitive to the value of alpha for exact results 62 | estimator = RegularizedL0(alpha=0.0008) 63 | estimator.fit(X, y) 64 | 65 | npt.assert_array_equal(idx, np.flatnonzero(estimator.coef_)) 66 | npt.assert_array_almost_equal(beta, estimator.coef_, decimal=2) 67 | 68 | 69 | @pytest.mark.parametrize("estimator_cls", MIQP_estimators) 70 | def test_slack_variables(estimator_cls, random_model_with_groups, miqp_solver, rng): 71 | X, y, beta, groups = random_model_with_groups 72 | 73 | # ignore groups 74 | if "Subset" in estimator_cls.__name__: 75 | estimator = estimator_cls(sparse_bound=len(beta) // 2, solver=miqp_solver) 76 | else: 77 | estimator = estimator_cls(alpha=3.0, solver=miqp_solver) 78 | 79 | estimator.fit(X, y) 80 | for coef, active in zip( 81 | estimator.coef_, estimator.canonicals_.auxiliaries.z0.value 82 | ): 83 | if active == 1: 84 | assert abs(coef) >= THRESHOLD 85 | else: 86 | assert abs(coef) < THRESHOLD 87 | 88 | # now group hierarchy 89 | group_ids = np.sort(np.unique(groups)) 90 | if "Subset" in estimator_cls.__name__: 91 | estimator = estimator_cls( 92 | groups, sparse_bound=len(group_ids) // 2, solver=miqp_solver 93 | ) 94 | else: 95 | estimator = estimator_cls(groups, alpha=2.0, solver=miqp_solver) 96 | 97 | estimator.fit(X, y) 98 | for gid, active in zip(group_ids, estimator.canonicals_.auxiliaries.z0.value): 99 | if active: 100 | assert all(abs(estimator.coef_[groups == gid]) >= THRESHOLD) 101 | else: 102 | assert all(abs(estimator.coef_[groups == gid]) < THRESHOLD) 103 | 104 | 105 | @pytest.mark.parametrize("estimator_cls", MIQP_estimators) 106 | def test_singleton_hierarchy(estimator_cls, random_model, miqp_solver, rng): 107 | X, y, beta = random_model 108 | (idx,) = beta.nonzero() 109 | 110 | # ignore groups, single covariate hierarchy 111 | if "Subset" in estimator_cls.__name__: 112 | estimator = estimator_cls(sparse_bound=len(beta) // 2, solver=miqp_solver) 113 | else: 114 | estimator = estimator_cls(alpha=2.0, solver=miqp_solver) 115 | 116 | fully_chained = [[len(beta) - 1]] + [[i] for i in range(0, len(beta) - 1)] 117 | estimator.hierarchy = fully_chained 118 | estimator.fit(X, y) 119 | 120 | # bound is set lower than number of coefs so all must be zero in BestSubset 121 | if any(estimator.coef_ == 0): 122 | assert all(estimator.coef_ == 0) 123 | else: 124 | assert all(estimator.coef_ != 0) 125 | assert_hierarchy_respected( 126 | estimator.coef_, estimator.canonicals_.auxiliaries.z0.value, fully_chained 127 | ) 128 | 129 | hierarchy = [] 130 | for i in range(len(beta)): 131 | # everything depends on 1st nonzero coef 132 | if i != idx[0]: 133 | hierarchy.append([idx[0]]) 134 | else: 135 | hierarchy.append([]) 136 | # first half of remaining depends on 2nd nonzero 137 | if 0 < i < len(beta) // 2 and i != idx[1]: 138 | hierarchy[i].append(idx[1]) 139 | # second half of remaining on 3rd nonzero 140 | if len(beta) // 2 <= i and i != idx[2]: 141 | hierarchy[i].append(idx[2]) 142 | 143 | estimator.hierarchy = hierarchy 144 | # TODO make hierarchy and other non cp.Parameter params reset problem if reset 145 | estimator.problem = None 146 | estimator.fit(X, y) 147 | assert_hierarchy_respected( 148 | estimator.coef_, estimator.canonicals_.auxiliaries.z0.value, hierarchy 149 | ) 150 | 151 | 152 | @pytest.mark.parametrize("estimator_cls", MIQP_estimators) 153 | def test_group_hierarchy(estimator_cls, random_model_with_groups, miqp_solver, rng): 154 | X, y, beta, groups = random_model_with_groups 155 | (idx,) = beta.nonzero() 156 | 157 | # now group hierarchy 158 | group_ids = np.unique(groups) 159 | if "Subset" in estimator_cls.__name__: 160 | estimator = estimator_cls( 161 | groups, sparse_bound=len(group_ids) // 2, solver=miqp_solver 162 | ) 163 | else: 164 | estimator = estimator_cls(groups, alpha=3.0, solver=miqp_solver) 165 | 166 | fully_chained = [[group_ids[-1]]] + [ 167 | [group_ids[i]] for i in range(0, len(group_ids) - 1) 168 | ] 169 | estimator.hierarchy = fully_chained 170 | estimator.fit(X, y) 171 | 172 | # bound is set lower than number of coefs so all must be zero in BestSubset 173 | if any(estimator.coef_ == 0): 174 | assert all(estimator.coef_ == 0) 175 | else: 176 | assert all(estimator.coef_ != 0) 177 | 178 | assert_hierarchy_respected( 179 | estimator.coef_, 180 | estimator.canonicals_.auxiliaries.z0.value, 181 | fully_chained, 182 | groups=groups, 183 | ) 184 | 185 | # pick two groups with nozero coefs 186 | grp1 = groups[idx[0]] 187 | while (grp2 := groups[rng.choice(idx)]) == grp1: 188 | pass 189 | 190 | hierarchy = [] 191 | for i in range(len(group_ids)): 192 | # everything depends on 1st nonzero coef 193 | if i != grp1: 194 | hierarchy.append([grp1]) 195 | else: 196 | hierarchy.append([]) 197 | # first half of remaining depends on 2nd nonzero 198 | if 0 < i < len(group_ids) // 2 and i not in [grp1, grp2]: 199 | hierarchy[i].append(grp2) 200 | 201 | estimator.problem = None # TODO also remove this... 202 | estimator.hierarchy = hierarchy 203 | estimator.fit(X, y) 204 | 205 | assert_hierarchy_respected( 206 | estimator.coef_, 207 | estimator.canonicals_.auxiliaries.z0.value, 208 | hierarchy, 209 | groups=groups, 210 | ) 211 | 212 | 213 | def test_set_parameters(random_model): 214 | X, y, beta = random_model 215 | estimator = RidgedBestSubsetSelection(sparse_bound=1, eta=1.0) 216 | estimator.sparse_bound = 2 217 | estimator.fit(X, y) 218 | assert estimator.canonicals_.parameters.sparse_bound.value == 2 219 | assert estimator.canonicals_.parameters.eta.value == 1.0 220 | 221 | estimator.eta = 0.5 222 | estimator.fit(X, y) 223 | assert estimator.canonicals_.parameters.eta.value == 0.5 224 | 225 | 226 | def test_bad_input(random_model): 227 | X, y, beta = random_model 228 | 229 | # bad sparse_bound 230 | estimator = BestSubsetSelection(sparse_bound=-1) 231 | with pytest.raises(ValueError): 232 | estimator.fit(X, y) 233 | 234 | # bad eta 235 | estimator = RidgedBestSubsetSelection(eta=-1.0) 236 | with pytest.raises(ValueError): 237 | estimator.fit(X, y) 238 | -------------------------------------------------------------------------------- /tests/test_model_selection.py: -------------------------------------------------------------------------------- 1 | import cvxpy as cp 2 | import numpy as np 3 | import pytest 4 | from sklearn.datasets import make_regression 5 | from sklearn.linear_model import Lasso 6 | from sklearn.model_selection import KFold, train_test_split 7 | 8 | from sparselm.model import L1L0, L2L0 9 | from sparselm.model_selection import GridSearchCV, LineSearchCV 10 | 11 | ALL_CRITERION = ["max_score", "one_std_score"] 12 | # Currently we will only test on mixedL0 13 | ALL_ESTIMATORS = [L2L0, L1L0] 14 | ONLY_L2L0 = [L2L0] 15 | 16 | 17 | @pytest.fixture(scope="module") 18 | def param_grid(): 19 | # Test on multiple grids 20 | return [ 21 | {"alpha": [0.01, 0.1], "eta": [0.03, 0.3]}, 22 | {"alpha": [0.02, 0.2], "eta": [0.04, 0.4]}, 23 | ] 24 | 25 | 26 | def test_solver(): 27 | # Check that your solvers can work well. 28 | # Non-academic, non-commercial Gurobi can not solve large scale model > 100 params. 29 | # ECOS_BB is significantly slower, so use gurobi if possible! 30 | x = cp.Variable(10, integer=True) 31 | obj = cp.sum_squares(x) 32 | cons = [x <= 3, x >= -3] 33 | prob = cp.Problem(cp.Minimize(obj), cons) 34 | 35 | if "GUROBI" in cp.installed_solvers(): 36 | result = prob.solve(solver="GUROBI") 37 | else: 38 | result = prob.solve(solver="ECOS_BB") 39 | 40 | assert x.value is not None 41 | assert result is not None 42 | 43 | 44 | @pytest.fixture(scope="module", params=ALL_ESTIMATORS) 45 | def estimator(random_energy_model, request): 46 | ecis = random_energy_model[2] 47 | # Each correlation function as its own group. Doing ordinary hierarchy. 48 | groups = list(range(len(ecis))) 49 | if "GUROBI" in cp.installed_solvers(): 50 | return request.param(groups=groups, solver="GUROBI") 51 | else: 52 | return request.param(groups=groups, solver="ECOS_BB") 53 | # return request.param(solver="ECOS_BB") 54 | 55 | 56 | @pytest.fixture(scope="module", params=ONLY_L2L0) 57 | def mixed_l2l0_est(random_energy_model, request): 58 | ecis = random_energy_model[2] 59 | # Each correlation function as its own group. Doing ordinary hierarchy. 60 | groups = list(range(len(ecis))) 61 | if "GUROBI" in cp.installed_solvers(): 62 | return request.param(groups=groups, solver="GUROBI") 63 | else: 64 | return request.param(groups=groups, solver="ECOS_BB") 65 | # return request.param(solver="ECOS_BB") 66 | 67 | 68 | def test_mixed_l0_wts(random_energy_model, mixed_l2l0_est, rng): 69 | femat, energies, _ = random_energy_model 70 | mixed_l2l0_est.eta = 1e-5 71 | mixed_l2l0_est.fit(X=femat, y=energies) 72 | energies_pred = mixed_l2l0_est.predict(femat) 73 | assert energies_pred is not None 74 | mixed_l2l0_est.tikhonov_w = 1000 * rng.random(femat.shape[1]) 75 | mixed_l2l0_est.fit(X=femat, y=energies) 76 | energies_pred_wtd = mixed_l2l0_est.predict(femat) 77 | assert energies_pred_wtd is not None 78 | 79 | 80 | @pytest.fixture(scope="module", params=ALL_CRITERION) 81 | def grid_search(estimator, param_grid, request): 82 | grid_searcher = GridSearchCV( 83 | estimator, param_grid, opt_selection_method=request.param 84 | ) 85 | return grid_searcher 86 | 87 | 88 | @pytest.fixture(scope="module", params=ALL_CRITERION) 89 | def line_search(estimator, param_grid, request): 90 | # Multi-grids not supported in line search mode. 91 | param_grid_lines = sorted((key, values) for key, values in param_grid[0].items()) 92 | line_searcher = LineSearchCV( 93 | estimator, 94 | param_grid_lines, 95 | opt_selection_method=request.param, 96 | n_iter=3, 97 | ) 98 | return line_searcher 99 | 100 | 101 | def test_grid_search(random_energy_model, grid_search): 102 | femat, energies, _ = random_energy_model 103 | n_samples, n_features = femat.shape 104 | grid_search.fit(X=femat, y=energies) 105 | assert "best_params_" in vars(grid_search) 106 | best_params = grid_search.best_params_ 107 | assert "alpha" in best_params and "eta" in best_params 108 | assert best_params["alpha"] in [0.01, 0.1, 0.02, 0.2] 109 | assert best_params["eta"] in [0.03, 0.3, 0.04, 0.4] 110 | 111 | assert grid_search.best_score_ <= 1 112 | assert "coef_" in vars(grid_search.best_estimator_) 113 | assert "intercept_" in vars(grid_search.best_estimator_) 114 | energies_pred = grid_search.predict(femat) 115 | rmse = np.sum((energies - energies_pred) ** 2) / len(energies) 116 | # Overfit. 117 | if n_samples < n_features: 118 | assert -grid_search.best_score_ >= rmse 119 | 120 | 121 | # Guarantees that one-std rule always select larger params than max score. 122 | def test_onestd(): 123 | success = 0 124 | for _ in range(10): 125 | X, y, coef = make_regression( 126 | n_samples=200, 127 | n_features=100, 128 | n_informative=10, 129 | noise=40.0, 130 | bias=-15.0, 131 | coef=True, 132 | random_state=0, 133 | ) 134 | 135 | X_train, X_test, y_train, y_test = train_test_split( 136 | X, y, test_size=0.25, random_state=0 137 | ) 138 | 139 | # create estimators 140 | lasso = Lasso(fit_intercept=True) 141 | 142 | # create cv search objects for each estimator 143 | cv5 = KFold(n_splits=5, shuffle=True, random_state=0) 144 | params = {"alpha": np.logspace(-1, 1, 10)} 145 | 146 | lasso_cv_std = GridSearchCV( 147 | lasso, params, opt_selection_method="one_std_score", cv=cv5, n_jobs=-1 148 | ) 149 | lasso_cv_opt = GridSearchCV( 150 | lasso, params, opt_selection_method="max_score", cv=cv5, n_jobs=-1 151 | ) 152 | 153 | # fit models on training data 154 | lasso_cv_std.fit(X_train, y_train) 155 | lasso_cv_opt.fit(X_train, y_train) 156 | 157 | correct_params = ( 158 | lasso_cv_opt.best_params_["alpha"] <= lasso_cv_std.best_params_["alpha"] 159 | ) 160 | sparsity_opt = np.sum(np.abs(lasso_cv_opt.best_estimator_.coef_) >= 1e-6) 161 | sparsity_std = np.sum(np.abs(lasso_cv_std.best_estimator_.coef_) >= 1e-6) 162 | 163 | if correct_params and sparsity_opt >= sparsity_std: 164 | success += 1 165 | 166 | # Allow some failure caused by randomness of CV splits. 167 | assert success >= 8 168 | 169 | 170 | def test_line_search(random_energy_model, line_search): 171 | femat, energies, _ = random_energy_model 172 | n_samples, n_features = femat.shape 173 | line_search.fit(X=femat, y=energies) 174 | assert "best_params_" in vars(line_search) 175 | best_params = line_search.best_params_ 176 | assert "alpha" in best_params and "eta" in best_params 177 | assert best_params["alpha"] in [0.01, 0.1] 178 | assert best_params["eta"] in [0.03, 0.3] 179 | 180 | assert line_search.best_score_ <= 1 181 | assert "coef_" in vars(line_search.best_estimator_) 182 | assert "intercept_" in vars(line_search.best_estimator_) 183 | energies_pred = line_search.predict(femat) 184 | rmse = np.sum((energies - energies_pred) ** 2) / len(energies) 185 | # Overfit. 186 | if n_samples < n_features: 187 | assert -line_search.best_score_ >= rmse 188 | -------------------------------------------------------------------------------- /tests/test_ols.py: -------------------------------------------------------------------------------- 1 | """Sanity checks: literally just copied from sklearn tests... """ 2 | 3 | import numpy as np 4 | import numpy.testing as npt 5 | import pytest 6 | from sklearn.preprocessing import add_dummy_feature 7 | 8 | from sparselm.model import OrdinaryLeastSquares 9 | 10 | 11 | def test_linear_regression(): 12 | # Test OrdinaryLeastSquares on a simple dataset. 13 | # a simple dataset 14 | X = [[1], [2]] 15 | Y = [1, 2] 16 | 17 | reg = OrdinaryLeastSquares() 18 | reg.fit(X, Y) 19 | 20 | npt.assert_array_almost_equal(reg.coef_, [1]) 21 | npt.assert_array_almost_equal(reg.intercept_, [0]) 22 | npt.assert_array_almost_equal(reg.predict(X), [1, 2]) 23 | 24 | # test it also for degenerate input 25 | X = [[1]] 26 | Y = [0] 27 | 28 | reg = OrdinaryLeastSquares() 29 | reg.fit(X, Y) 30 | npt.assert_array_almost_equal(reg.coef_, [0]) 31 | npt.assert_array_almost_equal(reg.intercept_, [0]) 32 | npt.assert_array_almost_equal(reg.predict(X), [0]) 33 | 34 | 35 | @pytest.mark.parametrize("fit_intercept", [True, False]) 36 | def test_linear_regression_sample_weights(fit_intercept, rng): 37 | # It would not work with under-determined systems 38 | n_samples, n_features = 10, 8 39 | 40 | X = rng.normal(size=(n_samples, n_features)) 41 | y = rng.normal(size=n_samples) 42 | 43 | sample_weight = 1.0 + rng.uniform(size=n_samples) 44 | 45 | # OLS with explicit sample_weight 46 | reg = OrdinaryLeastSquares(fit_intercept=fit_intercept) 47 | reg.fit(X, y, sample_weight=sample_weight) 48 | coefs1 = reg.coef_ 49 | inter1 = reg.intercept_ 50 | 51 | assert reg.coef_.shape == (X.shape[1],) # sanity checks 52 | 53 | # Closed form of the weighted least square 54 | # theta = (X^T W X)^(-1) @ X^T W y 55 | W = np.diag(sample_weight) 56 | X_aug = X if not fit_intercept else add_dummy_feature(X) 57 | 58 | Xw = X_aug.T @ W @ X_aug 59 | yw = X_aug.T @ W @ y 60 | coefs2 = np.linalg.solve(Xw, yw) 61 | 62 | if not fit_intercept: 63 | npt.assert_allclose(coefs1, coefs2) 64 | else: 65 | npt.assert_allclose(coefs1, coefs2[1:]) 66 | npt.assert_allclose(inter1, coefs2[0]) 67 | 68 | 69 | def test_fit_intercept(): 70 | # Test assertions on betas shape. 71 | X2 = np.array([[0.38349978, 0.61650022], [0.58853682, 0.41146318]]) 72 | X3 = np.array( 73 | [ 74 | [0.27677969, 0.70693172, 0.01628859], 75 | [0.08385139, 0.20692515, 0.70922346], 76 | ] 77 | ) 78 | y = np.array([1, 1]) 79 | 80 | lr2_without_intercept = OrdinaryLeastSquares(fit_intercept=False).fit(X2, y) 81 | lr2_with_intercept = OrdinaryLeastSquares().fit(X2, y) 82 | 83 | lr3_without_intercept = OrdinaryLeastSquares(fit_intercept=False).fit(X3, y) 84 | lr3_with_intercept = OrdinaryLeastSquares().fit(X3, y) 85 | 86 | assert lr2_with_intercept.coef_.shape == lr2_without_intercept.coef_.shape 87 | assert lr3_with_intercept.coef_.shape == lr3_without_intercept.coef_.shape 88 | assert lr2_without_intercept.coef_.ndim == lr3_without_intercept.coef_.ndim 89 | -------------------------------------------------------------------------------- /tests/test_stepwise.py: -------------------------------------------------------------------------------- 1 | """Test composite estimator class.""" 2 | 3 | import numpy as np 4 | import numpy.testing as npt 5 | import pytest 6 | from sklearn.base import clone 7 | from sklearn.utils._param_validation import InvalidParameterError 8 | 9 | from sparselm.model import L2L0, Lasso 10 | from sparselm.model_selection import GridSearchCV 11 | from sparselm.stepwise import StepwiseEstimator 12 | 13 | 14 | def test_make_composite(): 15 | # Test making a composite estimator. 16 | lasso1 = Lasso(fit_intercept=True, alpha=1.0) 17 | lasso2 = Lasso(fit_intercept=False, alpha=2.0) 18 | l2l0 = L2L0(groups=[0, 0, 1, 2], alpha=0.1, eta=4.0) 19 | steps = [("lasso1", lasso1), ("lasso2", lasso2), ("l2l0", l2l0)] 20 | 21 | scope1 = [0, 1, 8] 22 | scope2 = [2, 3] 23 | scope3 = [4, 5, 6, 7] 24 | estimator = StepwiseEstimator(steps, [scope1, scope2, scope3]) 25 | # sklearn convention tests, need pandas. 26 | # Currently, not passing because conventional sklearn estimator should not have 27 | # fixed number of features. 28 | # check_estimator(estimator) 29 | assert estimator.steps[0][1].fit_intercept 30 | assert not estimator.steps[1][1].fit_intercept 31 | assert not estimator.steps[2][1].fit_intercept 32 | 33 | # check parameters. Nested estimator case not tested yet. 34 | params = estimator.get_params(deep=True) 35 | assert params["lasso1"].get_params(deep=True)["alpha"] == 1.0 36 | assert params["lasso2"].get_params(deep=True)["alpha"] == 2.0 37 | assert params["l2l0"].get_params(deep=True)["alpha"] == 0.1 38 | assert params["l2l0"].get_params(deep=True)["eta"] == 4.0 39 | assert params["lasso1__alpha"] == 1.0 40 | assert params["lasso2__alpha"] == 2.0 41 | assert params["l2l0__alpha"] == 0.1 42 | assert params["l2l0__eta"] == 4.0 43 | 44 | estimator.set_params(lasso2__alpha=0.5, l2l0__alpha=0.2, l2l0__eta=3.0) 45 | params = estimator.get_params(deep=True) 46 | assert params["lasso1"].get_params(deep=True)["alpha"] == 1.0 47 | assert params["lasso2"].get_params(deep=True)["alpha"] == 0.5 48 | assert params["l2l0"].get_params(deep=True)["alpha"] == 0.2 49 | assert params["l2l0"].get_params(deep=True)["eta"] == 3.0 50 | assert params["lasso1__alpha"] == 1.0 51 | assert params["lasso2__alpha"] == 0.5 52 | assert params["l2l0__alpha"] == 0.2 53 | assert params["l2l0__eta"] == 3.0 54 | 55 | # Test unsafe clone, such that composite can be used in the optimizers. 56 | # Currently, have to mute sanity check from origianl sklearn clone. 57 | cloned = clone(estimator) 58 | params = cloned.get_params(deep=True) 59 | assert params["lasso1"].get_params(deep=True)["alpha"] == 1.0 60 | assert params["lasso2"].get_params(deep=True)["alpha"] == 0.5 61 | assert params["l2l0"].get_params(deep=True)["alpha"] == 0.2 62 | assert params["l2l0"].get_params(deep=True)["eta"] == 3.0 63 | assert params["lasso1__alpha"] == 1.0 64 | assert params["lasso2__alpha"] == 0.5 65 | assert params["l2l0__alpha"] == 0.2 66 | assert params["l2l0__eta"] == 3.0 67 | 68 | # A searcher can also be put into stepwise. 69 | grid = GridSearchCV(lasso2, {"alpha": [0.01, 0.1, 1.0]}) 70 | steps = [("lasso1", lasso1), ("lasso2", grid), ("l2l0", l2l0)] 71 | estimator = StepwiseEstimator(steps, [scope1, scope2, scope3]) 72 | # check_estimator(estimator) 73 | params = estimator.get_params(deep=True) 74 | assert params["lasso1__alpha"] == 1.0 75 | assert params["l2l0__alpha"] == 0.2 76 | assert params["l2l0__eta"] == 3.0 77 | assert "lasso2__alpha" not in params 78 | assert params["lasso2__estimator__alpha"] == 0.5 79 | 80 | 81 | def test_toy_composite(): 82 | lasso1 = Lasso(fit_intercept=True, alpha=1e-6) 83 | lasso2 = Lasso(fit_intercept=False, alpha=1e-6) 84 | grid = GridSearchCV(clone(lasso2), {"alpha": [1e-8, 1e-7, 1e-6]}) 85 | bad_lasso2 = Lasso(fit_intercept=True, alpha=1e-6) 86 | l2l0 = L2L0(groups=[0, 0, 1, 2], alpha=0, eta=1e-9) 87 | steps = [("lasso1", lasso1), ("lasso2", lasso2), ("l2l0", l2l0)] 88 | steps2 = [("lasso1", clone(lasso1)), ("lasso2", grid), ("l2l0", clone(l2l0))] 89 | bad_steps = [("lasso1", lasso1), ("lasso2", bad_lasso2), ("l2l0", l2l0)] 90 | 91 | scope1 = [0, 1, 8] 92 | scope2 = [2, 3] 93 | scope3 = [4, 5, 6, 7] 94 | estimator = StepwiseEstimator(steps, [scope1, scope2, scope3]) 95 | # Use grid search on lasso2. 96 | estimator2 = StepwiseEstimator(steps2, [scope1, scope2, scope3]) 97 | 98 | bad_scope1 = [0, 1] 99 | bad_scope2 = [3, 4] 100 | bad_scope3 = [5, 6, 7, 8] 101 | bad_estimator1 = StepwiseEstimator(steps, [bad_scope1, bad_scope2, bad_scope3]) 102 | bad_estimator2 = StepwiseEstimator(bad_steps, [scope1, scope2, scope3]) 103 | 104 | w_test = np.random.normal(scale=2, size=9) * 0.2 105 | w_test[0] = 10 106 | w_test[-1] = 0.5 107 | # A bad feature matrix with too many features. 108 | bad_X = np.random.random(size=(20, 12)) 109 | bad_X[:, 0] = 1 110 | with pytest.raises(ValueError): 111 | estimator.fit(bad_X, np.random.random(size=20)) 112 | X = np.random.random(size=(20, 9)) 113 | X[:, 0] = 1 114 | X[:, -1] = -8 * np.random.random(size=20) 115 | y = np.dot(X, w_test) + np.random.normal(scale=0.01, size=20) 116 | 117 | # Bad scopes. 118 | with pytest.raises(InvalidParameterError): 119 | bad_estimator1.fit(X, y) 120 | # Allow fit intercept in beyond the first estimator. 121 | with pytest.raises(InvalidParameterError): 122 | bad_estimator2.fit(X, y) 123 | # A correct estimator. 124 | 125 | def run_estimator_test(estimator_test): 126 | estimator_test.fit(X, y) 127 | # print("intercept:", estimator_test.intercept_) 128 | # print("coef:", estimator_test.coef_) 129 | 130 | assert estimator_test.intercept_ == estimator_test.steps[0][1].intercept_ 131 | assert not np.any(np.isnan(estimator_test.coef_)) 132 | 133 | assert not np.isclose(estimator_test.intercept_, 0) 134 | 135 | for (_, sub), scope in zip( 136 | estimator_test.steps, estimator_test.estimator_feature_indices 137 | ): 138 | if hasattr(sub, "estimator"): 139 | sub_coef = sub.best_estimator_.coef_ 140 | else: 141 | sub_coef = sub.coef_ 142 | npt.assert_array_almost_equal(sub_coef, estimator_test.coef_[scope]) 143 | coef_1 = estimator_test.coef_.copy() 144 | intercept_1 = estimator_test.intercept_ 145 | 146 | # Now do not fit intercept. 147 | estimator_test.steps[0][1].fit_intercept = False 148 | estimator_test.fit(X, y) 149 | coef_2 = estimator_test.coef_.copy() 150 | intercept_2 = estimator_test.intercept_ 151 | assert np.isclose(intercept_2, 0) 152 | 153 | # Do some naive assertion on the fitted coefficients. 154 | assert abs(coef_1[0] + intercept_1 - 10) / 10 <= 0.1 155 | assert abs(coef_2[0] - 10) / 10 <= 0.1 156 | # assert np.linalg.norm(coef_2 - w_test) / np.linalg.norm(w_test) <= 0.4 157 | 158 | total_y = np.zeros(len(y)) 159 | for (_, sub_estimator_test), sub_scope in zip( 160 | estimator_test.steps, estimator_test.estimator_feature_indices 161 | ): 162 | total_y += sub_estimator_test.predict(X[:, sub_scope]) 163 | npt.assert_array_almost_equal(estimator_test.predict(X), total_y) 164 | npt.assert_array_almost_equal( 165 | np.dot(X, estimator_test.coef_) + estimator_test.intercept_, total_y 166 | ) 167 | 168 | # Either estimators should be able to work. 169 | run_estimator_test(estimator) 170 | run_estimator_test(estimator2) 171 | -------------------------------------------------------------------------------- /tests/test_tools.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from functools import partial 3 | 4 | import numpy.testing as npt 5 | import pytest 6 | 7 | from sparselm.model import OrdinaryLeastSquares 8 | from sparselm.tools import constrain_coefficients 9 | 10 | 11 | @pytest.mark.parametrize("test_number", range(5)) # run the test 5 times 12 | def test_constrain_coefficients(test_number, rng): 13 | n_samples, n_features = 10, 8 14 | X = rng.normal(size=(n_samples, n_features)) 15 | y = rng.normal(size=n_samples) 16 | reg = OrdinaryLeastSquares(fit_intercept=True) 17 | reg.fit(X, y) 18 | coefs = reg.coef_ 19 | 20 | def fit(X, y, reg): 21 | reg.fit(X, y) 22 | return reg.coef_ 23 | 24 | # Test uniform low and high values 25 | inds = rng.choice(n_features, size=3, replace=False) 26 | 27 | with warnings.catch_warnings(record=True) as w: 28 | cstr_coefs = constrain_coefficients(inds, 2, 0)(partial(fit, reg=reg))(X, y) 29 | 30 | assert cstr_coefs.shape == coefs.shape 31 | 32 | # Check if warning was raised, meaning coefficients were not within range 33 | # in that case just test that the indeed that warning was raised. 34 | if len(w) > 0: 35 | with pytest.warns(RuntimeWarning): 36 | cstr_coefs = constrain_coefficients(inds, 2, 0)(partial(fit, reg=reg))(X, y) 37 | else: 38 | for i in inds: 39 | assert 0 <= cstr_coefs[i] <= 2 40 | 41 | @constrain_coefficients(inds, 2, 0) 42 | def fit_constrained1(X, y, reg): 43 | reg.fit(X, y) 44 | return reg.coef_ 45 | 46 | cstr_coefs2 = fit_constrained1(X, y, reg=reg) 47 | npt.assert_almost_equal(cstr_coefs, cstr_coefs2) 48 | 49 | # Test different low and high values 50 | low = rng.random(size=3) - 0.5 51 | high = rng.random(size=3) + low 52 | 53 | with warnings.catch_warnings(record=True) as w: 54 | cstr_coefs = constrain_coefficients(inds, high, low)(partial(fit, reg=reg))( 55 | X, y 56 | ) 57 | 58 | assert cstr_coefs.shape == coefs.shape 59 | 60 | # Check if warning was raised, meaning coefficients were not within range 61 | # in that case just test that the indeed that warning was raised. 62 | if len(w) > 0: 63 | with pytest.warns(RuntimeWarning): 64 | cstr_coefs = constrain_coefficients(inds, high, low)(partial(fit, reg=reg))( 65 | X, y 66 | ) 67 | else: 68 | for i, l, h in zip(inds, low, high): 69 | assert l <= cstr_coefs[i] <= h 70 | 71 | @constrain_coefficients(inds, high, low) 72 | def fit_constrained2(X, y, reg): 73 | reg.fit(X, y) 74 | return reg.coef_ 75 | 76 | cstr_coefs2 = fit_constrained2(X, y, reg=reg) 77 | npt.assert_almost_equal(cstr_coefs, cstr_coefs2) 78 | 79 | # just use high value 80 | with warnings.catch_warnings(record=True) as w: 81 | cstr_coefs = constrain_coefficients(inds, high=high)(partial(fit, reg=reg))( 82 | X, y 83 | ) 84 | 85 | assert cstr_coefs.shape == coefs.shape 86 | 87 | # Check if warning was raised, meaning coefficients were not within range 88 | # in that case just test that the indeed that warning was raised. 89 | if len(w) > 0: 90 | with pytest.warns(RuntimeWarning): 91 | cstr_coefs = constrain_coefficients(inds, high=high)(partial(fit, reg=reg))( 92 | X, y 93 | ) 94 | else: 95 | for i, h in zip(inds, high): 96 | assert cstr_coefs[i] <= h 97 | 98 | # just use low value 99 | with warnings.catch_warnings(record=True) as w: 100 | cstr_coefs = constrain_coefficients(inds, low=low)(partial(fit, reg=reg))(X, y) 101 | 102 | assert cstr_coefs.shape == coefs.shape 103 | 104 | # Check if warning was raised, meaning coefficients were not within range 105 | # in that case just test that the indeed that warning was raised. 106 | if len(w) > 0: 107 | with pytest.warns(RuntimeWarning): 108 | cstr_coefs = constrain_coefficients(inds, low=low)(partial(fit, reg=reg))( 109 | X, y 110 | ) 111 | else: 112 | for i, l in zip(inds, low): 113 | assert l <= cstr_coefs[i] 114 | 115 | 116 | # TODO write this test 117 | def test_r2_score_to_cv_error(): 118 | pass 119 | --------------------------------------------------------------------------------