├── .github
    ├── dependabot.yml
    ├── release.yml
    └── workflows
    │   ├── build.yml
    │   ├── docs.yml
    │   ├── lint.yml
    │   ├── release.yml
    │   ├── test.yml
    │   └── update-precommit.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── docs
    ├── _static
    │   ├── logo-light.png
    │   └── logo.png
    ├── api.rst
    ├── conf.py
    ├── contributing.rst
    ├── index.rst
    ├── install.rst
    ├── license.rst
    ├── sparselm.model.rst
    ├── sparselm.model_selection.rst
    ├── sparselm.stepwise.rst
    └── sparselm.tools.rst
├── examples
    ├── README.rst
    ├── corr.npy
    ├── energy.npy
    ├── plot_adaptive.py
    ├── plot_chull.py
    ├── plot_gl_sgl.py
    ├── plot_line_search.py
    ├── plot_one_std.py
    ├── plot_sparse_signal.py
    ├── plot_stepwise.py
    └── structures.json
├── pyproject.toml
├── requirements.txt
├── src
    ├── requirements.txt
    └── sparselm
    │   ├── __init__.py
    │   ├── _utils
    │       ├── __init__.py
    │       └── validation.py
    │   ├── dataset.py
    │   ├── model
    │       ├── __init__.py
    │       ├── _adaptive_lasso.py
    │       ├── _base.py
    │       ├── _lasso.py
    │       ├── _miqp
    │       │   ├── __init__.py
    │       │   ├── _base.py
    │       │   ├── _best_subset.py
    │       │   └── _regularized_l0.py
    │       └── _ols.py
    │   ├── model_selection.py
    │   ├── stepwise.py
    │   └── tools.py
└── tests
    ├── conftest.py
    ├── pytest.ini
    ├── test_common.py
    ├── test_dataset.py
    ├── test_lasso.py
    ├── test_miqp.py
    ├── test_model_selection.py
    ├── test_ols.py
    ├── test_stepwise.py
    └── test_tools.py


/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 | 
 4 | # Maintain dependencies for GitHub Actions
 5 | - package-ecosystem: github-actions
 6 |   directory: "/"
 7 |   schedule:
 8 |     interval: weekly
 9 | 
10 | # Python dependencies
11 | - package-ecosystem: pip
12 |   directory: "/"
13 |   schedule:
14 |     interval: weekly
15 |   allow:
16 |   - dependency-type: direct
17 |   - dependency-type: indirect
18 | 


--------------------------------------------------------------------------------
/.github/release.yml:
--------------------------------------------------------------------------------
 1 | changelog:
 2 |   exclude:
 3 |     authors: [dependabot, github-actions, pre-commit-ci]
 4 |   categories:
 5 |     - title: 🎉 New Features
 6 |       labels: [feature]
 7 |     - title: 🐛 Bug Fixes
 8 |       labels: [fix]
 9 |     - title: 🛠 Enhancements
10 |       labels: [enhancement]
11 |     - title: 📖 Documentation
12 |       labels: [documentation]
13 |     - title: 💡 Refactoring
14 |       labels: [refactor]
15 |     - title: 🧪 Tests
16 |       labels: [tests]
17 |     - title: 💥 Breaking Changes
18 |       labels: [breaking]
19 |     - title: 🔒 Security Fixes
20 |       labels: [security]
21 |     - title: 🤷‍♂️ Other Changes
22 |       labels: ["*"]
23 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: build
 2 | 
 3 | on: [workflow_dispatch, workflow_call]
 4 | 
 5 | jobs:
 6 | 
 7 |   build-sdist:
 8 |     name: Build sdist
 9 |     runs-on: ubuntu-latest
10 | 
11 |     steps:
12 |       - uses: actions/checkout@v4
13 |         with:
14 |           fetch-depth: 0 # Optional, use if you use setuptools_scm
15 | 
16 |       - name: Build
17 |         run: pipx run build --sdist
18 | 
19 |       - uses: actions/upload-artifact@v4
20 |         with:
21 |           path: dist/*.tar.gz
22 | 


--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
 1 | name: build-documentation
 2 | 
 3 | on: [workflow_dispatch, workflow_call]
 4 | 
 5 | jobs:
 6 |   build-deploy:
 7 |     runs-on: ubuntu-latest
 8 | 
 9 |     steps:
10 |     - uses: actions/checkout@v4
11 | 
12 |     - name: Install pandoc
13 |       run: sudo apt-get install pandoc
14 | 
15 |     - uses: actions/setup-python@v5
16 |       with:
17 |         python-version: 3.11
18 | 
19 |     - name: Install dependencies
20 |       run: |
21 |         python -m pip install --upgrade pip
22 |         pip install .[docs]
23 | 
24 |     - name: Build docs
25 |       run: sphinx-build docs docs_build
26 | 
27 |     - name: Deploy
28 |       uses: peaceiris/actions-gh-pages@v4
29 |       with:
30 |           github_token: ${{ secrets.GITHUB_TOKEN }}
31 |           publish_dir: ./docs_build
32 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: lint
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 | 
12 | jobs:
13 |   lint:
14 |     runs-on: ubuntu-latest
15 |     strategy:
16 |       max-parallel: 6
17 | 
18 |     steps:
19 |       - uses: actions/checkout@v4
20 |       - name: Set up Python
21 |         uses: actions/setup-python@v5
22 |         with:
23 |           python-version: 3.11
24 |       - name: Install dependencies
25 |         run: |
26 |           python -m pip install --upgrade pip
27 |           pip install .[dev]
28 |       - name: flake8
29 |         run: |
30 |             flake8 --version
31 |             flake8 --count --show-source --statistics src/sparselm
32 |             # exit-zero treats all errors as warnings.
33 |             flake8 --count --exit-zero --max-complexity=20 --statistics src/sparselm
34 |       - name: black
35 |         run: |
36 |           black --version
37 |           black --check --diff --color src/sparselm
38 |       - name: pydocstyle
39 |         run: |
40 |           pydocstyle --version
41 |           pydocstyle --count src/sparselm
42 |       # Not in shape for this yet
43 |       # - name: pylint
44 |       #  run: |
45 |       #     pylint sparselm
46 |       #- name: mypy
47 |        #   run: |
48 |         #    mypy --version
49 |          #   rm -rf .mypy_cache
50 |           #  mypy sparselm
51 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: release
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published]
 6 |     inputs:
 7 |       release-pypi:
 8 |         required: true
 9 |         type: boolean
10 |         default: true
11 |   workflow_dispatch:
12 |     inputs:
13 |       release-pypi:
14 |         required: true
15 |         type: boolean
16 |         description: "if true a release is made on PyPI"
17 | 
18 | jobs:
19 |   test:
20 |     uses: ./.github/workflows/test.yml
21 |     secrets: inherit
22 | 
23 |   build:
24 |     needs: test
25 |     uses: ./.github/workflows/build.yml
26 | 
27 |   docs:
28 |     needs: test
29 |     uses: ./.github/workflows/docs.yml
30 |     secrets: inherit
31 | 
32 |   release-pypi:
33 |     needs: [build]
34 |     runs-on: ubuntu-latest
35 |     if: github.event.inputs.release-pypi == 'true'
36 | 
37 |     steps:
38 |       - uses: actions/download-artifact@v4
39 |         with:
40 |           name: artifact
41 |           path: dist
42 | 
43 |       - uses: pypa/gh-action-pypi-publish@release/v1
44 |         with:
45 |           verbose: true
46 |           user: __token__
47 |           password: ${{ secrets.PYPI_API_TOKEN }}
48 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: test
 2 | 
 3 | env:
 4 |   scip-version: 8.0.0
 5 | 
 6 | on:
 7 |   push:
 8 |     branches:
 9 |       - main
10 | 
11 |   pull_request:
12 |     branches:
13 |       - main
14 | 
15 |   workflow_call:
16 | 
17 | jobs:
18 |   test:
19 |     runs-on: ubuntu-20.04
20 |     strategy:
21 |       max-parallel: 10
22 |       matrix:
23 |         python_version: ["3.9", "3.10", "3.11"]
24 | 
25 |     steps:
26 |       - uses: actions/checkout@v4
27 | 
28 |       - name: Install dependencies
29 |         run: |
30 |           wget --quiet --no-check-certificate https://scipopt.org/download/release/SCIPOptSuite-${{ env.scip-version }}-Linux-ubuntu.deb
31 |           sudo apt-get update && sudo apt install -y ./SCIPOptSuite-${{ env.scip-version }}-Linux-ubuntu.deb
32 | 
33 |       - name: Set up Python ${{ matrix.python_version }}
34 |         uses: actions/setup-python@v5
35 |         with:
36 |           python-version:  ${{ matrix.python_version }}
37 | 
38 |       - name: Install dependencies and package
39 |         run: |
40 |           python -m pip install --upgrade pip
41 |           pip install cython
42 |           pip install .[tests,dev]
43 | 
44 |       - name: Test with pytest
45 |         run: |
46 |           pytest tests --cov=sparselm --cov-report=xml
47 | 
48 |       - if: ${{ matrix.python_version == 3.11 && github.event_name == 'push' }}
49 |         name: codacy-coverage-reporter
50 |         uses: codacy/codacy-coverage-reporter-action@v1
51 |         with:
52 |             project-token: ${{ secrets.CODACY_PROJECT_TOKEN }}
53 |             coverage-reports: coverage.xml
54 | 


--------------------------------------------------------------------------------
/.github/workflows/update-precommit.yaml:
--------------------------------------------------------------------------------
 1 | name: pre-commit-auto-update
 2 | 
 3 | on:
 4 |   # midnight twice a month
 5 |   schedule:
 6 |     - cron: '0 0 14,28 * *'
 7 | 
 8 | jobs:
 9 |   auto-update:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v4
13 | 
14 |       - name: Set up Python
15 |         uses: actions/setup-python@v5
16 |         with:
17 |           python-version: 3.9
18 | 
19 |       - name: Install pre-commit
20 |         run: pip install pre-commit
21 | 
22 |       - name: Run pre-commit autoupdate
23 |         run: pre-commit autoupdate
24 | 
25 |       - name: Create Pull Request
26 |         uses: peter-evans/create-pull-request@v6.0.5
27 |         with:
28 |           token: ${{ secrets.GITHUB_TOKEN }}
29 |           branch: update/pre-commit-autoupdate
30 |           title: auto-update pre-commit hooks
31 |           commit-message: auto-update pre-commit hooks
32 |           body: Update versions of tools in pre-commit hooks to latest versions.
33 |           labels: dependencies
34 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | .idea
132 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com for more information
 2 | # See https://pre-commit.com/hooks.html for more hooks
 3 | 
 4 | ci:
 5 |   autoupdate_schedule: monthly
 6 | 
 7 | repos:
 8 | - repo: https://github.com/pre-commit/pre-commit-hooks
 9 |   rev: v5.0.0
10 |   hooks:
11 |   - id: check-yaml
12 |   - id: fix-encoding-pragma
13 |     args:
14 |     - --remove
15 |   - id: end-of-file-fixer
16 |   - id: trailing-whitespace
17 |   - id: check-added-large-files
18 |     args: ['--maxkb=500']
19 | 
20 | - repo: https://github.com/psf/black
21 |   rev: 24.10.0
22 |   hooks:
23 |   - id: black
24 | 
25 | - repo: https://github.com/asottile/blacken-docs
26 |   rev: 1.19.1
27 |   hooks:
28 |   - id: blacken-docs
29 |     additional_dependencies: [black==23.1.0]
30 |     exclude: README.md
31 | 
32 | - repo: https://github.com/pycqa/isort
33 |   rev: 6.0.0
34 |   hooks:
35 |   - id: isort
36 |     name: isort (python)
37 |     args:
38 |     - --profile=black
39 | 
40 | - repo: https://github.com/asottile/pyupgrade
41 |   rev: v3.19.1
42 |   hooks:
43 |     - id: pyupgrade
44 |       args: [--py38-plus]
45 | 
46 | - repo: https://github.com/PyCQA/autoflake
47 |   rev: v2.3.1
48 |   hooks:
49 |     - id: autoflake
50 |       args:
51 |         - --in-place
52 |         - --remove-unused-variables
53 |         - --remove-all-unused-imports
54 |         - --expand-star-imports
55 |         - --ignore-init-module-imports
56 | 
57 | - repo: https://github.com/pycqa/pydocstyle
58 |   rev: 6.3.0  # pick a git hash / tag to point to
59 |   hooks:
60 |   - id: pydocstyle
61 |     files: ^src/sparselm/
62 |     args:
63 |       - --convention=google
64 |       - --add-ignore=D107
65 | 
66 | - repo: https://github.com/pre-commit/pygrep-hooks
67 |   rev: v1.10.0
68 |   hooks:
69 |   - id: rst-backticks
70 |   - id: rst-directive-colons
71 |   - id: rst-inline-touching-normal
72 | 
73 | -   repo: https://github.com/pre-commit/mirrors-mypy
74 |     rev: 'v1.14.1'  # Use the sha / tag you want to point at
75 |     hooks:
76 |     -   id: mypy
77 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | ::
 2 | 
 3 |     sparse-lm Copyright (c) 2022, The Regents of the University of California, through
 4 |     Lawrence Berkeley National Laboratory (subject to receipt of any required approvals
 5 |     from the U.S. Dept. of Energy) and the University of California, Berkeley.
 6 |     All rights reserved.
 7 | 
 8 |     Redistribution and use in source and binary forms, with or without
 9 |     modification, are permitted provided that the following conditions are met:
10 | 
11 |     (1) Redistributions of source code must retain the above copyright notice,
12 |     this list of conditions and the following disclaimer.
13 | 
14 |     (2) Redistributions in binary form must reproduce the above copyright
15 |     notice, this list of conditions and the following disclaimer in the
16 |     documentation and/or other materials provided with the distribution.
17 | 
18 |     (3) Neither the name of the University of California, Lawrence Berkeley
19 |     National Laboratory, U.S. Dept. of Energy, University of California,
20 |     Berkeley nor the names of its contributors may be used to endorse or
21 |     promote products derived from this software without specific prior written
22 |     permission.
23 | 
24 | 
25 |     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 |     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 |     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 |     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 |     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 |     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 |     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 |     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 |     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 |     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 |     POSSIBILITY OF SUCH DAMAGE.
36 | 
37 |     You are under no obligation whatsoever to provide any bug fixes, patches,
38 |     or upgrades to the features, functionality or performance of the source
39 |     code ("Enhancements") to anyone; however, if you choose to make your
40 |     Enhancements available either publicly, or directly to Lawrence Berkeley
41 |     National Laboratory, without imposing a separate written license agreement
42 |     for such Enhancements, then you hereby grant the following license: a
43 |     non-exclusive, royalty-free perpetual license to install, use, modify,
44 |     prepare derivative works, incorporate into other computer software,
45 |     distribute, and sublicense such enhancements or derivative works thereof,
46 |     in binary and source code form.
47 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <img src="docs/_static/logo.png" width="500px" alt=" ">
 2 | 
 3 | Sparse Linear Regression Models
 4 | ===============================
 5 | 
 6 | [![test](https://github.com/CederGroupHub/sparse-lm/actions/workflows/test.yml/badge.svg?branch=main)](https://github.com/CederGroupHub/sparse-lm/actions/workflows/test.yml)
 7 | [![Codacy Badge](https://app.codacy.com/project/badge/Coverage/9b72db506d9c49b2a6c849348de8945e)](https://www.codacy.com/gh/CederGroupHub/sparse-lm/dashboard?utm_source=github.com&utm_medium=referral&utm_content=CederGroupHub/sparse-lm&utm_campaign=Badge_Coverage)
 8 | [![pre-commit.ci status](https://results.pre-commit.ci/badge/github/CederGroupHub/sparse-lm/main.svg)](https://results.pre-commit.ci/latest/github/CederGroupHub/sparse-lm/main)
 9 | [![pypi version](https://img.shields.io/pypi/v/sparse-lm?color=blue)](https://pypi.org/project/sparse-lm)
10 | [![Static Badge](https://img.shields.io/badge/python-3.9%2B-blue)](https://www.python.org/downloads/)
11 | [![DOI](https://joss.theoj.org/papers/10.21105/joss.05867/status.svg)](https://doi.org/10.21105/joss.05867)
12 | 
13 | 
14 | **sparse-lm**  includes several (structured) sparse linear regression estimators that are absent in the
15 | `sklearn.linear_model` module. The estimators in **sparse-lm** are designed to fit right into
16 | [scikit-learn](https://scikit-learn.org/stable/index.html), but the underlying optimization problem is expressed and
17 | solved by leveraging [cvxpy](https://www.cvxpy.org/).
18 | 
19 | ---------------------------------------------------------------------------------------
20 | 
21 | Available regression models
22 | ---------------------------
23 | - Lasso, Group Lasso, Overlap Group Lasso, Sparse Group Lasso & Ridged Group Lasso.
24 | - Adaptive versions of Lasso, Group Lasso, Overlap Group Lasso, Sparse Group Lasso & Ridged Group Lasso.
25 | - Best Subset Selection, Ridged Best Subset, L0, L1L0 & L2L0 (all with optional grouping of parameters)
26 | 
27 | Installation
28 | ------------
29 | **sparse-lm** is available on [PyPI](https://pypi.org/project/sparse-lm/), and can be installed via pip:
30 | 
31 | ```bash
32 | pip install sparse-lm
33 | ```
34 | 
35 | Additional information on installation can be found the documentation [here](https://cedergrouphub.github.io/sparse-lm/install.html).
36 | 
37 | Basic usage
38 | -----------
39 | If you already use **scikit-learn**, using **sparse-lm** will be very easy. Just use any
40 | model like you would any linear model in **scikit-learn**:
41 | 
42 | ```python
43 | import numpy as np
44 | from sklearn.datasets import make_regression
45 | from sklearn.model_selection import GridSearchCV
46 | from sparselm.model import AdaptiveLasso
47 | 
48 | X, y = make_regression(n_samples=100, n_features=80, n_informative=10, random_state=0)
49 | alasso = AdaptiveLasso(fit_intercept=False)
50 | param_grid = {'alpha': np.logspace(-8, 2, 10)}
51 | 
52 | cvsearch = GridSearchCV(alasso, param_grid)
53 | cvsearch.fit(X, y)
54 | print(cvsearch.best_params_)
55 | ```
56 | 
57 | For more details on use and functionality have a look at the
58 | [examples](https://cedergrouphub.github.io/sparse-lm/auto_examples/index.html) and
59 | [API](https://cedergrouphub.github.io/sparse-lm/api.html) sections of the documentation.
60 | 
61 | Contributing
62 | ------------
63 | 
64 | We welcome any contributions that you think may improve the package! Please have a look at the
65 | [contribution guidelines](https://cedergrouphub.github.io/sparse-lm/contributing.html) in the documentation.
66 | 


--------------------------------------------------------------------------------
/docs/_static/logo-light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CederGroupHub/sparse-lm/220cbbad4a5ac98d1a52326c525aadb95f2c5b18/docs/_static/logo-light.png


--------------------------------------------------------------------------------
/docs/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CederGroupHub/sparse-lm/220cbbad4a5ac98d1a52326c525aadb95f2c5b18/docs/_static/logo.png


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | API Documentation
 2 | =================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 2
 6 | 
 7 |    sparselm.model
 8 |    sparselm.stepwise
 9 |    sparselm.model_selection
10 |    sparselm.tools
11 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -- Path setup --------------------------------------------------------------
  2 | 
  3 | # If extensions (or modules to document with autodoc) are in another directory,
  4 | # add these directories to sys.path here. If the directory is relative to the
  5 | # documentation root, use os.path.abspath to make it absolute, like shown here.
  6 | 
  7 | import os
  8 | import sys
  9 | 
 10 | # import typing
 11 | # typing.TYPE_CHECKING = True
 12 | from sparselm import __version__
 13 | 
 14 | sys.path.insert(0, os.path.abspath("../../"))
 15 | 
 16 | # -- Project information -----------------------------------------------------
 17 | 
 18 | project = "sparse-lm"
 19 | copyright = "2022-2023, Ceder Group"
 20 | author = "Luis Barroso-Luque"
 21 | 
 22 | # The short X.Y version
 23 | version = __version__
 24 | # The full version, including alpha/beta/rc tags
 25 | release = __version__
 26 | 
 27 | # -- General configuration ---------------------------------------------------
 28 | 
 29 | # Add any Sphinx extension module names here, as strings. They can be
 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 31 | # ones.
 32 | extensions = [
 33 |     "sphinx.ext.autodoc",
 34 |     "sphinx.ext.napoleon",
 35 |     "sphinx.ext.intersphinx",
 36 |     "sphinx.ext.viewcode",
 37 |     "sphinx.ext.autosummary",
 38 |     "sphinx.ext.mathjax",
 39 |     "m2r2",
 40 |     "sphinx_gallery.gen_gallery",
 41 | ]
 42 | 
 43 | # Add any paths that contain templates here, relative to this directory.
 44 | templates_path = ["_templates"]
 45 | 
 46 | # List of patterns, relative to source directory, that match files and
 47 | # directories to ignore when looking for source files.
 48 | # This pattern also affects html_static_path and html_extra_path.
 49 | exclude_patterns = ["Thumbs.db", ".DS_Store", "test*.py"]
 50 | 
 51 | # use type hints
 52 | autodoc_typehints = "description"
 53 | autoclass_content = "both"
 54 | autodoc_member_order = "bysource"
 55 | 
 56 | # better napoleon support
 57 | napoleon_use_param = True
 58 | napoleon_use_rtype = True
 59 | napoleon_use_ivar = True
 60 | 
 61 | # The suffix(es) of source filenames.
 62 | source_suffix = [".rst", ".md"]
 63 | 
 64 | # -- Options for HTML output -------------------------------------------------
 65 | 
 66 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 67 | # a list of builtin themes.
 68 | #
 69 | html_theme = "furo"
 70 | 
 71 | # hide sphinx footer
 72 | html_show_sphinx = False
 73 | html_show_sourcelink = False
 74 | 
 75 | # Add any paths that contain custom static files (such as style sheets) here,
 76 | # relative to this directory. They are copied after the builtin static files,
 77 | # so a file named "default.css" will overwrite the builtin "default.css".
 78 | fonts = [
 79 |     "Lato",
 80 |     "-apple-system",
 81 |     "BlinkMacSystemFont",
 82 |     "Segoe UI",
 83 |     "Helvetica",
 84 |     "Arial",
 85 |     "sans-serif",
 86 |     "Apple Color Emoji",
 87 |     "Segoe UI Emoji",
 88 | ]
 89 | html_static_path = ["_static"]
 90 | html_css_files = ["custom.css"]
 91 | html_favicon = "_static/favicon.ico"
 92 | html_theme_options = {
 93 |     "light_css_variables": {
 94 |         "admonition-font-size": "92%",
 95 |         "admonition-title-font-size": "92%",
 96 |         "font-stack": ",".join(fonts),
 97 |         "font-size--small": "92%",
 98 |         "font-size--small--2": "87.5%",
 99 |         "font-size--small--3": "87.5%",
100 |         "font-size--small--4": "87.5%",
101 |     },
102 |     "dark_css_variables": {
103 |         "admonition-font-size": "92%",
104 |         "admonition-title-font-size": "92%",
105 |         "font-stack": ",".join(fonts),
106 |         "font-size--small": "92%",
107 |         "font-size--small--2": "87.5%",
108 |         "font-size--small--3": "87.5%",
109 |         "font-size--small--4": "87.5%",
110 |     },
111 | }
112 | html_title = "sparse-lm"
113 | 
114 | # code highlighting
115 | pygments_style = "sphinx"
116 | pygments_dark_style = "monokai"
117 | 
118 | # -- Options for intersphinx extension ---------------------------------------
119 | 
120 | # Example configuration for intersphinx: refer to the Python standard library.
121 | intersphinx_mapping = {
122 |     "python": ("https://docs.python.org/3.9", None),
123 |     "scikit-learn": ("https://scikit-learn.org/stable", None),
124 |     "numpy": ("https://numpy.org/doc/stable/", None),
125 |     "cvxpy": ("https://www.cvxpy.org/en/latest/", None),
126 | }
127 | 
128 | # -- Options for sphinx gallery extension  ---------------------------------------
129 | 
130 | sphinx_gallery_conf = {
131 |     "examples_dirs": "../examples",  # path to your example scripts
132 |     "gallery_dirs": "auto_examples",  # path to where to save gallery generated output
133 | }
134 | 


--------------------------------------------------------------------------------
/docs/contributing.rst:
--------------------------------------------------------------------------------
 1 | Contributing
 2 | ============
 3 | 
 4 | We welcome all forms of contribution, please consider contributing in any way you can!
 5 | 
 6 | Bugs, issues, input, and questions
 7 | ----------------------------------
 8 | Please use the
 9 | `issue tracker <https://github.com/CederGroupHub/sparse-lm/issues>`_ to share any
10 | of the following:
11 | 
12 | -   Bugs
13 | -   Issues
14 | -   Questions
15 | -   Feature requests
16 | -   Ideas
17 | -   Input
18 | 
19 | Having these reported and saved in the issue tracker is very helpful to make
20 | sure that they are properly addressed. Please make sure to be as descriptive
21 | and neat as possible when opening up an issue.
22 | 
23 | Developing guidelines
24 | ---------------------
25 | If you have written code or want to start writing new code that you think will improve **sparse-lm** then please follow
26 | the steps below to make a contribution.
27 | 
28 | * All code should have unit tests.
29 | * Code should be well documented following `google style <https://google.github.io/styleguide/pyguide.html>`_  docstrings.
30 | * All code should pass the pre-commit hook. The code follows the `black code style <https://black.readthedocs.io/en/stable/>`_.
31 | * Estimators should follow scikit-learn's `developing estimator guidelines <https://scikit-learn.org/stable/developers/develop.html>`_.
32 | 
33 | Adding code contributions
34 | -------------------------
35 | 
36 | #.  If you are contributing for the first time:
37 | 
38 |     * *Fork* the repository and then *clone* your fork to your local workspace.
39 |     * Make sure to add the *upstream* repository as a remote::
40 | 
41 |         git remote add upstream https://github.com/CederGroupHub/sparse-lm.git
42 | 
43 |     * You should always keep your ``main`` branch or any feature branch up to date
44 |       with the upstream repository ``main`` branch. Be good about doing *fast forward*
45 |       merges of the upstream ``main`` into your fork branches while developing.
46 | 
47 | #.  In order to have changes available without having to re-install the package:
48 | 
49 |     * Install the package in *editable* mode::
50 | 
51 |          pip install -e .
52 | 
53 | #.  To develop your contributions you are free to do so in your *main* branch or any feature
54 |     branch in your fork.
55 | 
56 |     * We recommend to only your forks *main* branch for short/easy fixes and additions.
57 |     * For more complex features, try to use a feature branch with a descriptive name.
58 |     * For very complex feautres feel free to open up a PR even before your contribution is finished with
59 |       [WIP] in its name, and optionally mark it as a *draft*.
60 | 
61 | #.  While developing we recommend you use the pre-commit hook that is setup to ensure that your
62 |     code will satisfy all lint, documentation and black requirements. To do so install pre-commit, and run
63 |     in your clones top directory::
64 | 
65 |         pre-commit install
66 | 
67 |     *  All code should use `google style <https://google.github.io/styleguide/pyguide.html>`_ docstrings
68 |        and `black <https://black.readthedocs.io/en/stable/?badge=stable>`_ style formatting.
69 | 
70 | #.  Make sure to test your contribution and write unit tests for any new features. All tests should go in the
71 |     ``sparse-lm\tests`` directory. The CI will run tests upon opening a PR, but running them locally will help find
72 |     problems before::
73 | 
74 |         pytests tests
75 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. toctree::
 3 |    :caption: Getting Started
 4 |    :hidden:
 5 | 
 6 |    install
 7 |    auto_examples/index
 8 | 
 9 | .. toctree::
10 |    :caption: Information
11 |    :hidden:
12 | 
13 |    contributing
14 |    license
15 |    GitHub <https://github.com/CederGroupHub/sparse-lm>
16 | 
17 | 
18 | .. toctree::
19 |    :caption: Reference
20 |    :maxdepth: -1
21 |    :hidden:
22 | 
23 |    API <api>
24 |    genindex
25 | 
26 | 
27 | .. image:: _static/logo.png
28 |     :width: 700
29 |     :class: only-dark
30 | 
31 | .. image:: _static/logo-light.png
32 |    :width: 700
33 |    :class: only-light
34 | 
35 | ===============================
36 | Sparse Linear Regression Models
37 | ===============================
38 | 
39 | .. mdinclude:: ../README.md
40 |    :start-line: 4
41 | 


--------------------------------------------------------------------------------
/docs/install.rst:
--------------------------------------------------------------------------------
 1 | Install
 2 | =======
 3 | 
 4 | **sparse-lm** can be installed from PyPI or from source using pip.
 5 | 
 6 | PyPI
 7 | ----
 8 | 
 9 | You can install **sparse-lm** using pip::
10 | 
11 |    pip install sparse-lm
12 | 
13 | 
14 | Install from source
15 | -------------------
16 | 
17 | To install **sparse-lm** from source, (fork and) clone the repository from `github
18 | <https://github.com/CederGroupHub/sparse-lm>`_::
19 | 
20 |     git clone https://github.com/CederGroupHub/sparse-lm
21 |     cd sparselm
22 |     pip install .
23 | 
24 | Installing MIQP solvers
25 | -----------------------
26 | 
27 | Since **cvxpy** is used to specify and solve regression optimization problems, any of
28 | `supported solvers <https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options>`_
29 | can be used with **sparse-lm** estimators. **cvxpy** is shipped with open source solvers
30 | (OSQP, SCS, and ECOS) which are usually enough to solve most convex regression problems.
31 | 
32 | However, for the mixed integer quadratic programming (MIQP) formulations used in
33 | :class:`BestSubsetSelection` and :class:`RegularizedL0` based classes we highly
34 | recommend installing an MIQP capable solver. ECOS_BB can be used to solve MIQP problems,
35 | but it can be very slow and more importantly has recurring correctness issues. See the
36 | `mixed-integer program section <https://www.cvxpy.org/version/1.2/tutorial/advanced/index.html#mixed-integer-programs>`_
37 | in the cvxpy documentation for more details.
38 | 
39 | Gurobi
40 | ^^^^^^
41 | 
42 | For using **sparse-lm** with MIQP solvers, we highly recommend installing **Gurobi**.
43 | It can be installed directly from PyPi::
44 | 
45 |     pip install gurobipy
46 | 
47 | Without a license, a free trial **Gurobi** can be used to solve small problems. For
48 | larger problems a license is required. **Gurobi** grants
49 | `free academic licenses <https://www.gurobi.com/academia/academic-program-and-licenses/>`_
50 | to students and academic researchers.
51 | 
52 | SCIP
53 | ^^^^
54 | 
55 | If installing a licensed solver is not an option, **SCIP** can be used as a free
56 | alternative. To use **SCIP**, the python interface **PySCIPOpt** must also be installed.
57 | **PySCIPOpt** can be installed from PyPi, however this requires building SCIP from
58 | source. See installation details `here <https://github.com/scipopt/PySCIPOpt>`_.
59 | 
60 | If you use conda, we recommend installing **SCIP** and **PySCIPOpt** using their
61 | conda-forge channel::
62 | 
63 |     conda install -c conda-forge scipopt pyscipopt
64 | 
65 | The above command will install **PySCIPOpt** with a pre-built version of **SCIP**, and
66 | so you will not need to build it from source.
67 | 
68 | Testing
69 | -------
70 | 
71 | Unit tests can be run from the source folder using ``pytest``. First, the requirements
72 | to run tests must be installed::
73 | 
74 |     pip install .[tests]
75 | 
76 | Then run the tests using::
77 | 
78 |     pytest tests
79 | 


--------------------------------------------------------------------------------
/docs/license.rst:
--------------------------------------------------------------------------------
1 | =======
2 | License
3 | =======
4 | 
5 | **sparse-lm** is distributed under a modified 3-clause BSD licence.
6 | 
7 | .. include:: ../LICENSE
8 | 


--------------------------------------------------------------------------------
/docs/sparselm.model.rst:
--------------------------------------------------------------------------------
1 | sparselm.model
2 | ==============
3 | 
4 | .. automodule:: sparselm.model
5 |    :members:
6 |    :inherited-members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/sparselm.model_selection.rst:
--------------------------------------------------------------------------------
1 | sparselm.model_selection
2 | ========================
3 | 
4 | .. automodule:: sparselm.model_selection
5 |    :members:
6 |    :inherited-members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/sparselm.stepwise.rst:
--------------------------------------------------------------------------------
1 | sparselm.stepwise
2 | ========================
3 | 
4 | .. automodule:: sparselm.stepwise
5 |    :members:
6 |    :inherited-members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/sparselm.tools.rst:
--------------------------------------------------------------------------------
1 | sparselm.tools
2 | ==============
3 | 
4 | .. automodule:: sparselm.tools
5 |    :members:
6 |    :inherited-members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/examples/README.rst:
--------------------------------------------------------------------------------
1 | Examples
2 | ========
3 | 
4 | This is set of simple examples using sparse linear regression models implemented in
5 | **sparse-lm**. For the the vast majority of cases, the **sparse-lm** models can be
6 | used in the same way as the linear regression models in **scikit-learn**.
7 | 


--------------------------------------------------------------------------------
/examples/corr.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CederGroupHub/sparse-lm/220cbbad4a5ac98d1a52326c525aadb95f2c5b18/examples/corr.npy


--------------------------------------------------------------------------------
/examples/energy.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CederGroupHub/sparse-lm/220cbbad4a5ac98d1a52326c525aadb95f2c5b18/examples/energy.npy


--------------------------------------------------------------------------------
/examples/plot_adaptive.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ==============================
  3 | Using adaptive regularization
  4 | ==============================
  5 | 
  6 | Adaptive or iteratively re-weighted regularization is a technique that can improve
  7 | feature selection properties over the standard Lasso and Group Lasso extensions. In
  8 | this example we compare the performance of the standard Lasso with adaptive Lasso.
  9 | """
 10 | 
 11 | import matplotlib.pyplot as plt
 12 | import numpy as np
 13 | from sklearn.datasets import make_regression
 14 | from sklearn.linear_model import Lasso
 15 | from sklearn.metrics import mean_squared_error, r2_score
 16 | from sklearn.model_selection import GridSearchCV, KFold, train_test_split
 17 | 
 18 | from sparselm.model import AdaptiveLasso
 19 | 
 20 | X, y, coef = make_regression(
 21 |     n_samples=200,
 22 |     n_features=100,
 23 |     n_informative=10,
 24 |     noise=40.0,
 25 |     bias=-15.0,
 26 |     coef=True,
 27 |     random_state=0,
 28 | )
 29 | 
 30 | X_train, X_test, y_train, y_test = train_test_split(
 31 |     X, y, test_size=0.25, random_state=0
 32 | )
 33 | 
 34 | # create estimators
 35 | lasso = Lasso(fit_intercept=True)
 36 | alasso = AdaptiveLasso(max_iter=5, fit_intercept=True)
 37 | 
 38 | # create cv search objects for each estimator
 39 | cv5 = KFold(n_splits=5, shuffle=True, random_state=0)
 40 | params = {"alpha": np.logspace(-1, 1, 10)}
 41 | 
 42 | lasso_cv = GridSearchCV(lasso, params, cv=cv5, n_jobs=-1)
 43 | alasso_cv = GridSearchCV(alasso, params, cv=cv5, n_jobs=-1)
 44 | 
 45 | # fit models on training data
 46 | lasso_cv.fit(X_train, y_train)
 47 | alasso_cv.fit(X_train, y_train)
 48 | 
 49 | # calculate model performance on test and train data
 50 | lasso_train = {
 51 |     "r2": r2_score(y_train, lasso_cv.predict(X_train)),
 52 |     "rmse": np.sqrt(mean_squared_error(y_train, lasso_cv.predict(X_train))),
 53 | }
 54 | 
 55 | lasso_test = {
 56 |     "r2": r2_score(y_test, lasso_cv.predict(X_test)),
 57 |     "rmse": np.sqrt(mean_squared_error(y_test, lasso_cv.predict(X_test))),
 58 | }
 59 | 
 60 | alasso_train = {
 61 |     "r2": r2_score(y_train, alasso_cv.predict(X_train)),
 62 |     "rmse": np.sqrt(mean_squared_error(y_train, alasso_cv.predict(X_train))),
 63 | }
 64 | 
 65 | alasso_test = {
 66 |     "r2": r2_score(y_test, alasso_cv.predict(X_test)),
 67 |     "rmse": np.sqrt(mean_squared_error(y_test, alasso_cv.predict(X_test))),
 68 | }
 69 | 
 70 | print("Lasso performance metrics:")
 71 | print(f"    train r2: {lasso_train['r2']:.3f}")
 72 | print(f"    test r2: {lasso_test['r2']:.3f}")
 73 | print(f"    train rmse: {lasso_train['rmse']:.3f}")
 74 | print(f"    test rmse: {lasso_test['rmse']:.3f}")
 75 | 
 76 | print("Adaptive Lasso performance metrics:")
 77 | print(f"    train r2: {alasso_train['r2']:.3f}")
 78 | print(f"    test r2: {alasso_test['r2']:.3f}")
 79 | print(f"    train rmse: {alasso_train['rmse']:.3f}")
 80 | print(f"    test rmse: {alasso_test['rmse']:.3f}")
 81 | 
 82 | # plot model coefficients
 83 | fig, ax = plt.subplots()
 84 | ax.plot(coef, "o", label="True coefficients")
 85 | ax.plot(lasso_cv.best_estimator_.coef_, "o", label="Lasso", alpha=0.5)
 86 | ax.plot(alasso_cv.best_estimator_.coef_, "o", label="Adaptive Lasso", alpha=0.5)
 87 | ax.set_xlabel("covariate index")
 88 | ax.set_ylabel("coefficient value")
 89 | ax.legend()
 90 | fig.show()
 91 | 
 92 | # plot predicted values
 93 | fig, ax = plt.subplots()
 94 | ax.plot(y_test, lasso_cv.predict(X_test), "o", label="lasso", alpha=0.5)
 95 | ax.plot(y_test, alasso_cv.predict(X_test), "o", label="adaptive lasso", alpha=0.5)
 96 | ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--")
 97 | ax.set_xlabel("true values")
 98 | ax.set_ylabel("predicted values")
 99 | ax.legend()
100 | fig.show()
101 | 


--------------------------------------------------------------------------------
/examples/plot_chull.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ===========================
  3 | Adding solution constraints
  4 | ===========================
  5 | 
  6 | **sparse-lm** allows including external solution constraints to the regression objective
  7 | by exposing the underlying **cvxpy** problem objects. This is useful to solve regression
  8 | problems with additional constraints, such as non-negativity.
  9 | 
 10 | **NOTE**: That this functionality does not fully align with the requirements for
 11 | compatible scikit-learn estimators, meaning that using an estimator with additional
 12 | constraints added in a ski-kit learn pipeline or model selection is not supported.
 13 | 
 14 | To show how to include constraints, we will solve a common problem in materials science:
 15 | predicting the formation energy of many configurations of an alloy. In such problems,
 16 | it is usually very important to ensure that the predicted formation energies for
 17 | "ground-states" (i.e. energies that define the lower convex-hull of the energy vs
 18 | composition graph) remain on the convex-hull. Similarly, it is often important to
 19 | ensure that the predicted formation energies that are not "ground-states" in the
 20 | training data remain above the predicted convex-hull.
 21 | 
 22 | The example follows the methodology described in this paper:
 23 | https://www.nature.com/articles/s41524-017-0032-0
 24 | 
 25 | This example requires the **pymatgen** materials analysis package to be
 26 | installed to easily plot convex-hulls: https://pymatgen.org/installation.html
 27 | 
 28 | The training data used in this example is taken from this
 29 | tutorial: https://icet.materialsmodeling.org/tutorial.zip for the
 30 | **icet** cluster expansion Python package (https://icet.materialsmodeling.org/).
 31 | """
 32 | 
 33 | import json
 34 | 
 35 | import matplotlib.pyplot as plt
 36 | import numpy as np
 37 | import pymatgen.analysis.phase_diagram as pd
 38 | from pymatgen.core import Structure
 39 | from sklearn.linear_model import Lasso
 40 | from sklearn.metrics import mean_squared_error
 41 | 
 42 | from sparselm.model import L2L0
 43 | 
 44 | # load training data
 45 | X, y = np.load("corr.npy"), np.load("energy.npy")
 46 | 
 47 | # load corresponding structure objects
 48 | with open("structures.json") as fp:
 49 |     structures = json.load(fp)
 50 | 
 51 | structures = [Structure.from_dict(s) for s in structures]
 52 | 
 53 | # create regressors (the hyperparameters have already been tuned)
 54 | lasso_regressor = Lasso(fit_intercept=True, alpha=1.29e-5)
 55 | # alpha is the pseudo-l0 norm hyperparameter and eta is the l2-norm hyperparameter
 56 | l2l0_regressor = L2L0(
 57 |     fit_intercept=True,
 58 |     alpha=3.16e-7,
 59 |     eta=1.66e-6,
 60 |     solver="GUROBI",
 61 |     solver_options={"Threads": 4},
 62 | )
 63 | 
 64 | # fit models
 65 | lasso_regressor.fit(X, y)
 66 | l2l0_regressor.fit(X, y)
 67 | 
 68 | # create phase diagram entries with training data
 69 | training_entries = []
 70 | for i, structure in enumerate(structures):
 71 |     corrs = X[
 72 |         i
 73 |     ]  # in this problem the features of a sample are referred to as correlation vectors
 74 |     energy = y[i] * len(
 75 |         structure
 76 |     )  # the energy must be scaled by size to create the phase diagram
 77 |     entry = pd.PDEntry(
 78 |         structure.composition,
 79 |         energy,
 80 |         attribute={"corrs": corrs, "size": len(structure)},
 81 |     )
 82 |     training_entries.append(entry)
 83 | 
 84 | # plot the training (true) phase diagram
 85 | training_pd = pd.PhaseDiagram(training_entries)
 86 | pplotter = pd.PDPlotter(training_pd, backend="matplotlib", show_unstable=0)
 87 | pplotter.show(label_unstable=False)
 88 | 
 89 | # plot the phase diagram based on the energies predicted by the Lasso fit
 90 | lasso_y = lasso_regressor.predict(X)
 91 | lasso_pd = pd.PhaseDiagram(
 92 |     [
 93 |         pd.PDEntry(s_i.composition, y_i * len(s_i))
 94 |         for s_i, y_i in zip(structures, lasso_y)
 95 |     ]
 96 | )
 97 | pplotter = pd.PDPlotter(lasso_pd, backend="matplotlib", show_unstable=0)
 98 | pplotter.show(label_unstable=False)
 99 | 
100 | # plot the phase diagram based on the energies predicted by the L2L0 fit
101 | l2l0_y = l2l0_regressor.predict(X)
102 | l2l0_pd = pd.PhaseDiagram(
103 |     [
104 |         pd.PDEntry(s_i.composition, y_i * len(s_i))
105 |         for s_i, y_i in zip(structures, l2l0_y)
106 |     ]
107 | )
108 | pplotter = pd.PDPlotter(l2l0_pd, backend="matplotlib", show_unstable=0)
109 | pplotter.show(label_unstable=False)
110 | 
111 | # we notice that both the Lasso fit and the L2L0 fit miss the ground-state Ag5Pd3
112 | # and also add spurious ground-states not present in the training convex hull
113 | 
114 | 
115 | # create matrices for two types of contraints to keep the predicted hull unchanged
116 | # 1) keep non-ground states above the hull
117 | # 2) ensure ground-states stay on the hull
118 | 
119 | # 1) compute the correlation matrix for unstable structures and
120 | # the weighted correlation matrix of the decomposition products
121 | X_unstable = np.zeros(shape=(len(training_pd.unstable_entries), X.shape[1]))
122 | X_decomp = np.zeros_like(X_unstable)
123 | for i, entry in enumerate(training_pd.unstable_entries):
124 |     if entry.is_element:
125 |         continue
126 |     X_unstable[i] = entry.attribute["corrs"]
127 |     decomp_entries, ehull = training_pd.get_decomp_and_e_above_hull(entry)
128 |     for dentry, amount in decomp_entries.items():
129 |         ratio = (
130 |             amount
131 |             * (entry.composition.num_atoms / dentry.composition.num_atoms)
132 |             * dentry.attribute["size"]
133 |             / entry.attribute["size"]
134 |         )
135 |         X_decomp[i] += ratio * dentry.attribute["corrs"]
136 | 
137 | # 2) compute the ground-state correlation matrix
138 | # and the weighted correlation matrix of decomposition products if the ground state was not a ground-state
139 | X_stable = np.zeros(shape=(len(training_pd.stable_entries), X.shape[1]))
140 | X_gsdecomp = np.zeros_like(X_stable)
141 | gs_pd = pd.PhaseDiagram(training_pd.stable_entries)
142 | for i, entry in enumerate(gs_pd.stable_entries):
143 |     if entry.is_element:
144 |         continue
145 |     X_stable[i] = entry.attribute["corrs"]
146 |     decomp_entries, ehull = gs_pd.get_decomp_and_phase_separation_energy(entry)
147 |     for dentry, amount in decomp_entries.items():
148 |         ratio = (
149 |             amount
150 |             * (entry.composition.num_atoms / dentry.composition.num_atoms)
151 |             * dentry.attribute["size"]
152 |             / entry.attribute["size"]
153 |         )
154 |         X_gsdecomp[i] += ratio * dentry.attribute["corrs"]
155 | 
156 | 
157 | constrained_regressor = L2L0(
158 |     fit_intercept=True,
159 |     alpha=3.16e-7,
160 |     eta=1.66e-6,
161 |     solver="GUROBI",
162 |     solver_options={"Threads": 4},
163 | )
164 | 
165 | # now create the constraints by accessing the underlying cvxpy objects
166 | # if regressor.fit has not been called with the gigen data, we must call generate_problem to generate
167 | # the cvxpy objects that represent the regressino objective
168 | constrained_regressor.generate_problem(X, y)
169 | J = (
170 |     constrained_regressor.canonicals_.beta
171 | )  # this is the cvxpy variable representing the coefficients
172 | 
173 | # 1) add constraint to keep unstable structures above hull, ie no new ground states
174 | epsilon = 0.0005  # solutions will be very sensitive to the size of this margin
175 | constrained_regressor.add_constraints([X_unstable @ J >= X_decomp @ J + epsilon])
176 | 
177 | # 2) add constraint to keep all ground-states on the hull
178 | epsilon = 1e-6
179 | constrained_regressor.add_constraints([X_stable @ J <= X_gsdecomp @ J - epsilon])
180 | 
181 | 
182 | # fit the constrained regressor
183 | constrained_regressor.fit(X, y)
184 | 
185 | # look at the phase diagram based on the energies predicted by the L2L0 fit
186 | l2l0c_y = constrained_regressor.predict(X)
187 | constrained_pd = pd.PhaseDiagram(
188 |     [
189 |         pd.PDEntry(s_i.composition, y_i * len(s_i))
190 |         for s_i, y_i in zip(structures, l2l0c_y)
191 |     ]
192 | )
193 | pplotter = pd.PDPlotter(constrained_pd, backend="matplotlib", show_unstable=0)
194 | pplotter.show(label_unstable=False)
195 | # the constraints now force the fitted model to respect the trainind convex-hull
196 | 
197 | # Plot the different estimated coefficients
198 | fig, ax = plt.subplots()
199 | ax.plot(lasso_regressor.coef_[1:])
200 | ax.plot(l2l0_regressor.coef_[1:])
201 | ax.plot(constrained_regressor.coef_[1:])
202 | ax.set_xlabel("covariate index")
203 | ax.set_ylabel("coefficient value")
204 | ax.legend(["lasso", "l2l0", "l2l0 constrained"])
205 | fig.show()
206 | 
207 | # print the resulting training RMSE from the different fits
208 | lasso_rmse = np.sqrt(mean_squared_error(y, lasso_regressor.predict(X)))
209 | l2l0_rmse = np.sqrt(mean_squared_error(y, l2l0_regressor.predict(X)))
210 | l2l0c_rmse = np.sqrt(mean_squared_error(y, constrained_regressor.predict(X)))
211 | 
212 | print(f"Lasso train RMSE:                 {lasso_rmse:.4f}")
213 | print(f"L2L0 train RMSE:                  {l2l0_rmse:.4f}")
214 | print(f"L2L0 with constraings train RMSE: {l2l0c_rmse:.4f}")
215 | 


--------------------------------------------------------------------------------
/examples/plot_gl_sgl.py:
--------------------------------------------------------------------------------
  1 | """
  2 | =========================
  3 | (Sparse) Group regression
  4 | =========================
  5 | 
  6 | This examples shows how to use group lasso and sparse group lasso to fit a simulated
  7 | dataset with group-level sparsity and within-group sparsity.
  8 | """
  9 | 
 10 | import warnings
 11 | 
 12 | import matplotlib.pyplot as plt
 13 | import numpy as np
 14 | from sklearn.linear_model import Lasso
 15 | from sklearn.metrics import mean_squared_error, r2_score
 16 | from sklearn.model_selection import GridSearchCV, KFold, train_test_split
 17 | 
 18 | from sparselm.dataset import make_group_regression
 19 | from sparselm.model import GroupLasso, SparseGroupLasso
 20 | 
 21 | warnings.filterwarnings("ignore", category=UserWarning)  # ignore convergence warnings
 22 | 
 23 | # generate a dataset with group-level sparsity only
 24 | X, y, groups, coefs = make_group_regression(
 25 |     n_samples=400,
 26 |     n_groups=10,
 27 |     n_features_per_group=10,
 28 |     n_informative_groups=5,
 29 |     frac_informative_in_group=1.0,
 30 |     bias=-10.0,
 31 |     noise=200.0,
 32 |     coef=True,
 33 |     random_state=0,
 34 | )
 35 | 
 36 | # split data into train and test sets
 37 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
 38 | 
 39 | # create estimators
 40 | cv5 = KFold(n_splits=5, shuffle=True, random_state=0)
 41 | lasso_cv = GridSearchCV(
 42 |     Lasso(fit_intercept=True), {"alpha": np.logspace(0, 2, 5)}, cv=cv5, n_jobs=-1
 43 | )
 44 | lasso_cv.fit(X_train, y_train)
 45 | glasso_cv = GridSearchCV(
 46 |     GroupLasso(groups=groups, fit_intercept=True),
 47 |     {"alpha": np.logspace(0, 2, 5)},
 48 |     cv=cv5,
 49 |     n_jobs=-1,
 50 | )
 51 | glasso_cv.fit(X_train, y_train)
 52 | 
 53 | # Plot predicted values
 54 | fig, ax = plt.subplots()
 55 | ax.plot(
 56 |     y_test, glasso_cv.predict(X_test), marker="o", ls="", alpha=0.5, label="group lasso"
 57 | )
 58 | ax.plot(y_test, lasso_cv.predict(X_test), marker="o", ls="", alpha=0.5, label="lasso")
 59 | ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--")
 60 | ax.legend()
 61 | ax.set_xlabel("true values")
 62 | ax.set_ylabel("predicted values")
 63 | fig.show()
 64 | 
 65 | # calculate model performance on test and train data
 66 | lasso_train = {
 67 |     "r2": r2_score(y_train, lasso_cv.predict(X_train)),
 68 |     "rmse": np.sqrt(mean_squared_error(y_train, lasso_cv.predict(X_train))),
 69 | }
 70 | 
 71 | lasso_test = {
 72 |     "r2": r2_score(y_test, lasso_cv.predict(X_test)),
 73 |     "rmse": np.sqrt(mean_squared_error(y_test, lasso_cv.predict(X_test))),
 74 | }
 75 | 
 76 | glasso_train = {
 77 |     "r2": r2_score(y_train, glasso_cv.predict(X_train)),
 78 |     "rmse": np.sqrt(mean_squared_error(y_train, glasso_cv.predict(X_train))),
 79 | }
 80 | 
 81 | glasso_test = {
 82 |     "r2": r2_score(y_test, glasso_cv.predict(X_test)),
 83 |     "rmse": np.sqrt(mean_squared_error(y_test, glasso_cv.predict(X_test))),
 84 | }
 85 | 
 86 | print("------- Performance metrics for signal with group-level sparsity only -------\n")
 87 | 
 88 | print("Lasso performance metrics:")
 89 | print(f"    train r2: {lasso_train['r2']:.3f}")
 90 | print(f"    test r2: {lasso_test['r2']:.3f}")
 91 | print(f"    train rmse: {lasso_train['rmse']:.3f}")
 92 | print(f"    test rmse: {lasso_test['rmse']:.3f}")
 93 | 
 94 | print("Group Lasso performance metrics:")
 95 | print(f"    train r2: {glasso_train['r2']:.3f}")
 96 | print(f"    test r2: {glasso_test['r2']:.3f}")
 97 | print(f"    train rmse: {glasso_train['rmse']:.3f}")
 98 | print(f"    test rmse: {glasso_test['rmse']:.3f}")
 99 | 
100 | # generate a dataset with group-level sparsity and within-group sparsity
101 | X, y, groups, coefs = make_group_regression(
102 |     n_samples=400,
103 |     n_groups=10,
104 |     n_features_per_group=10,
105 |     n_informative_groups=5,
106 |     frac_informative_in_group=0.5,
107 |     bias=-10.0,
108 |     noise=100.0,
109 |     coef=True,
110 |     random_state=0,
111 | )
112 | 
113 | # split data into train and test sets
114 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
115 | 
116 | glasso_cv = GridSearchCV(
117 |     GroupLasso(groups=groups, fit_intercept=True),
118 |     {"alpha": np.logspace(0, 2, 5)},
119 |     cv=cv5,
120 |     n_jobs=-1,
121 | )
122 | sglasso_cv = GridSearchCV(
123 |     SparseGroupLasso(groups=groups, fit_intercept=True),
124 |     {"alpha": np.logspace(0, 2, 5), "l1_ratio": np.arange(0.3, 0.8, 0.1)},
125 |     cv=cv5,
126 |     n_jobs=-1,
127 | )
128 | glasso_cv.fit(X_train, y_train)
129 | sglasso_cv.fit(X_train, y_train)
130 | 
131 | # Plot predicted values
132 | fig, ax = plt.subplots()
133 | ax.plot(
134 |     y_test, glasso_cv.predict(X_test), marker="o", ls="", alpha=0.5, label="group lasso"
135 | )
136 | ax.plot(
137 |     y_test,
138 |     sglasso_cv.predict(X_test),
139 |     marker="o",
140 |     ls="",
141 |     alpha=0.5,
142 |     label="sparse group lasso",
143 | )
144 | ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--")
145 | ax.legend()
146 | ax.set_xlabel("true values")
147 | ax.set_ylabel("predicted values")
148 | fig.show()
149 | 
150 | # calculate model performance on test and train data
151 | glasso_train = {
152 |     "r2": r2_score(y_train, glasso_cv.predict(X_train)),
153 |     "rmse": np.sqrt(mean_squared_error(y_train, glasso_cv.predict(X_train))),
154 | }
155 | 
156 | glasso_test = {
157 |     "r2": r2_score(y_test, glasso_cv.predict(X_test)),
158 |     "rmse": np.sqrt(mean_squared_error(y_test, glasso_cv.predict(X_test))),
159 | }
160 | 
161 | sglasso_train = {
162 |     "r2": r2_score(y_train, sglasso_cv.predict(X_train)),
163 |     "rmse": np.sqrt(mean_squared_error(y_train, sglasso_cv.predict(X_train))),
164 | }
165 | 
166 | sglasso_test = {
167 |     "r2": r2_score(y_test, sglasso_cv.predict(X_test)),
168 |     "rmse": np.sqrt(mean_squared_error(y_test, sglasso_cv.predict(X_test))),
169 | }
170 | 
171 | 
172 | print(
173 |     "------- Performance metrics for signal with group and within group sparsity -------\n"
174 | )
175 | 
176 | print("Group Lasso performance metrics:")
177 | print(f"    train r2: {glasso_train['r2']:.3f}")
178 | print(f"    test r2: {glasso_test['r2']:.3f}")
179 | print(f"    train rmse: {glasso_train['rmse']:.3f}")
180 | print(f"    test rmse: {glasso_test['rmse']:.3f}")
181 | 
182 | print("Sparse Group Lasso performance metrics:")
183 | print(f"    train r2: {sglasso_train['r2']:.3f}")
184 | print(f"    test r2: {sglasso_test['r2']:.3f}")
185 | print(f"    train rmse: {sglasso_train['rmse']:.3f}")
186 | print(f"    test rmse: {sglasso_test['rmse']:.3f}")
187 | 


--------------------------------------------------------------------------------
/examples/plot_line_search.py:
--------------------------------------------------------------------------------
 1 | """
 2 | =======================================
 3 | Tuning hyperparameters with line search
 4 | =======================================
 5 | 
 6 | Line search can typically be used in optimizing regressors with multiple weakly or
 7 | uncorrelated hyperparameters.
 8 | 
 9 | This example also showcases the usage of mixed L0 regressor where using a standard
10 | grid search can be too computationally expensive..
11 | """
12 | 
13 | import numpy as np
14 | from sklearn.datasets import make_regression
15 | from sklearn.metrics import mean_squared_error, r2_score
16 | from sklearn.model_selection import KFold, train_test_split
17 | 
18 | from sparselm.model import L2L0
19 | from sparselm.model_selection import LineSearchCV
20 | 
21 | X, y, coef = make_regression(
22 |     n_samples=60,
23 |     n_features=30,
24 |     n_informative=8,
25 |     noise=40.0,
26 |     bias=-15.0,
27 |     coef=True,
28 |     random_state=0,
29 | )
30 | 
31 | X_train, X_test, y_train, y_test = train_test_split(
32 |     X, y, test_size=0.25, random_state=0
33 | )
34 | 
35 | # create an l2l0 estimator.
36 | # Groups for parameters must be provided each coefficient is in a singleton group.
37 | groups = np.arange(30, dtype=int)
38 | l2l0 = L2L0(groups, fit_intercept=True, solver="GUROBI", solver_options={"Threads": 4})
39 | 
40 | # create cv search objects for each estimator
41 | cv5 = KFold(n_splits=5, shuffle=True, random_state=0)
42 | # LineSearchCV requires the parameters grid to be provided in a list of tuple format,
43 | # with order of parameters in the list being the order of them getting searched per
44 | # iteration.
45 | # The following example specifies the parameter alpha to be scanned first, then the
46 | # parameter eta.
47 | params = [("alpha", np.logspace(-6, 1, 5)), ("eta", np.logspace(-7, -1, 5))]
48 | 
49 | l2l0_cv = LineSearchCV(l2l0, params, cv=cv5, n_jobs=4)
50 | 
51 | # fit models on training data
52 | l2l0_cv.fit(X_train, y_train)
53 | 
54 | # calculate model performance on test and train data
55 | l2l0_train = {
56 |     "r2": r2_score(y_train, l2l0_cv.predict(X_train)),
57 |     "rmse": np.sqrt(mean_squared_error(y_train, l2l0_cv.predict(X_train))),
58 | }
59 | 
60 | l2l0_test = {
61 |     "r2": r2_score(y_test, l2l0_cv.predict(X_test)),
62 |     "rmse": np.sqrt(mean_squared_error(y_test, l2l0_cv.predict(X_test))),
63 | }
64 | 
65 | print("Performance metrics:")
66 | print(f"    train r2: {l2l0_train['r2']:.3f}")
67 | print(f"    test r2: {l2l0_test['r2']:.3f}")
68 | print(f"    train rmse: {l2l0_train['rmse']:.3f}")
69 | print(f"    test rmse: {l2l0_test['rmse']:.3f}")
70 | 


--------------------------------------------------------------------------------
/examples/plot_one_std.py:
--------------------------------------------------------------------------------
  1 | """
  2 | =========================================
  3 | Hyperparameters selection with 1-std rule
  4 | =========================================
  5 | 
  6 | One-standard-deviation rule is a technique to promote model robustness when
  7 | cross validation results are noisy. The hyperparameter is chosen to
  8 | be equal to the maximum value that yields:
  9 |      CV = minimum CV + 1 * std(CV at minimum).
 10 | 
 11 | One-standard-deviation rule is available in both GridSearchCV and LineSearchCV
 12 | under sparselm.model_selection.
 13 | """
 14 | 
 15 | import matplotlib.pyplot as plt
 16 | import numpy as np
 17 | from sklearn.datasets import make_regression
 18 | from sklearn.linear_model import Lasso
 19 | from sklearn.metrics import mean_squared_error, r2_score
 20 | from sklearn.model_selection import KFold, train_test_split
 21 | 
 22 | from sparselm.model_selection import GridSearchCV
 23 | 
 24 | X, y, coef = make_regression(
 25 |     n_samples=200,
 26 |     n_features=100,
 27 |     n_informative=10,
 28 |     noise=40.0,
 29 |     bias=-15.0,
 30 |     coef=True,
 31 |     random_state=0,
 32 | )
 33 | 
 34 | X_train, X_test, y_train, y_test = train_test_split(
 35 |     X, y, test_size=0.25, random_state=0
 36 | )
 37 | 
 38 | # create estimators
 39 | lasso = Lasso(fit_intercept=True)
 40 | 
 41 | # create cv search objects for each estimator
 42 | cv5 = KFold(n_splits=5, shuffle=True, random_state=0)
 43 | params = {"alpha": np.logspace(-1, 1.5, 20)}
 44 | 
 45 | lasso_cv_std = GridSearchCV(
 46 |     lasso, params, opt_selection_method="one_std_score", cv=cv5, n_jobs=-1
 47 | )
 48 | lasso_cv_opt = GridSearchCV(
 49 |     lasso, params, opt_selection_method="max_score", cv=cv5, n_jobs=-1
 50 | )
 51 | 
 52 | # fit models on training data
 53 | lasso_cv_std.fit(X_train, y_train)
 54 | lasso_cv_opt.fit(X_train, y_train)
 55 | 
 56 | # calculate model performance on test and train data
 57 | lasso_std_train = {
 58 |     "r2": r2_score(y_train, lasso_cv_std.predict(X_train)),
 59 |     "rmse": np.sqrt(mean_squared_error(y_train, lasso_cv_std.predict(X_train))),
 60 | }
 61 | 
 62 | lasso_std_test = {
 63 |     "r2": r2_score(y_test, lasso_cv_std.predict(X_test)),
 64 |     "rmse": np.sqrt(mean_squared_error(y_test, lasso_cv_std.predict(X_test))),
 65 | }
 66 | 
 67 | print("Lasso with 1-std:")
 68 | print(f"    alpha value: {lasso_cv_std.best_params_['alpha']}")
 69 | print(f"    train r2: {lasso_std_train['r2']:.3f}")
 70 | print(f"    test r2: {lasso_std_test['r2']:.3f}")
 71 | print(f"    train rmse: {lasso_std_train['rmse']:.3f}")
 72 | print(f"    test rmse: {lasso_std_test['rmse']:.3f}")
 73 | print(f"    sparsity: {sum(abs(lasso_cv_std.best_estimator_.coef_) > 1E-8)}")
 74 | 
 75 | lasso_opt_train = {
 76 |     "r2": r2_score(y_train, lasso_cv_opt.predict(X_train)),
 77 |     "rmse": np.sqrt(mean_squared_error(y_train, lasso_cv_opt.predict(X_train))),
 78 | }
 79 | 
 80 | lasso_opt_test = {
 81 |     "r2": r2_score(y_test, lasso_cv_opt.predict(X_test)),
 82 |     "rmse": np.sqrt(mean_squared_error(y_test, lasso_cv_opt.predict(X_test))),
 83 | }
 84 | 
 85 | print("Lasso performance:")
 86 | print(f"    alpha value: {lasso_cv_std.best_params_['alpha']}")
 87 | print(f"    train r2: {lasso_opt_train['r2']:.3f}")
 88 | print(f"    test r2: {lasso_opt_test['r2']:.3f}")
 89 | print(f"    train rmse: {lasso_opt_train['rmse']:.3f}")
 90 | print(f"    test rmse: {lasso_opt_test['rmse']:.3f}")
 91 | print(f"    sparsity: {sum(abs(lasso_cv_opt.best_estimator_.coef_) > 1E-8)}")
 92 | 
 93 | # plot cross validation scores
 94 | fig, ax = plt.subplots()
 95 | ax.plot(
 96 |     lasso_cv_std.cv_results_["param_alpha"].data,
 97 |     -lasso_cv_std.cv_results_["mean_test_score"],
 98 |     "o-",
 99 |     label="One std",
100 | )
101 | ax.plot(
102 |     lasso_cv_std.cv_results_["param_alpha"].data,
103 |     -lasso_cv_opt.cv_results_["mean_test_score"]
104 |     + lasso_cv_std.cv_results_["std_test_score"],
105 |     "k--",
106 |     alpha=0.5,
107 | )
108 | ax.plot(
109 |     lasso_cv_std.cv_results_["param_alpha"].data,
110 |     -lasso_cv_opt.cv_results_["mean_test_score"]
111 |     - lasso_cv_std.cv_results_["std_test_score"],
112 |     "k--",
113 |     alpha=0.5,
114 | )
115 | ax.set_xlabel("alpha")
116 | ax.set_ylabel("rmse")
117 | ax.legend(["mean", "std"])
118 | fig.show()
119 | 
120 | # plot model coefficients
121 | fig, ax = plt.subplots()
122 | ax.plot(coef, "o", label="True coefficients")
123 | ax.plot(lasso_cv_std.best_estimator_.coef_, "o", label="One std", alpha=0.5)
124 | ax.plot(lasso_cv_opt.best_estimator_.coef_, "o", label="Max score", alpha=0.5)
125 | ax.set_xlabel("covariate index")
126 | ax.set_ylabel("coefficient value")
127 | ax.legend()
128 | fig.show()
129 | 


--------------------------------------------------------------------------------
/examples/plot_sparse_signal.py:
--------------------------------------------------------------------------------
 1 | """
 2 | =========================
 3 | Recovering sparse signals
 4 | =========================
 5 | 
 6 | In this example we compare the results obtained from `BestSubsetSelection` with
 7 | those obtained using the `OrthogonalMatchingPursuit` regressor from **scikit-learn**.
 8 | 
 9 | Note that although using best subset selection tend to give more accurate results,
10 | `OrthogonalMatchingPursuit` scales much better to larger problems.
11 | 
12 | This example is adapted from the scikit-learn documentation:
13 | https://scikit-learn.org/stable/auto_examples/linear_model/plot_omp.html#sphx-glr-auto-examples-linear-model-plot-omp-py
14 | """
15 | 
16 | import matplotlib.pyplot as plt
17 | import numpy as np
18 | from sklearn.datasets import make_sparse_coded_signal
19 | from sklearn.linear_model import OrthogonalMatchingPursuit
20 | 
21 | from sparselm.model import BestSubsetSelection
22 | 
23 | n_components, n_features = 50, 20
24 | n_nonzero_coefs = 8
25 | 
26 | # generate the data
27 | y, X, w = make_sparse_coded_signal(
28 |     n_samples=1,
29 |     n_components=n_components,
30 |     n_features=n_features,
31 |     n_nonzero_coefs=n_nonzero_coefs,
32 |     random_state=0,
33 | )
34 | X = X.T
35 | (idx,) = w.nonzero()
36 | 
37 | # distort the clean signal
38 | y_noisy = y + 0.005 * np.random.randn(len(y))
39 | 
40 | # plot the sparse signal
41 | plt.figure(figsize=(14, 7))
42 | plt.subplot(3, 2, (1, 2))
43 | plt.xlim(0, n_components)
44 | plt.title("Sparse signal")
45 | plt.stem(idx, w[idx])
46 | 
47 | # plot the noise-free reconstruction
48 | omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
49 | omp.fit(X, y)
50 | coef = omp.coef_
51 | (idx_r,) = coef.nonzero()
52 | plt.subplot(3, 2, 3)
53 | plt.xlim(0, n_components)
54 | plt.title("Orthogonal Matching Pursuit (noise-free measurements)")
55 | plt.stem(idx_r, coef[idx_r])
56 | 
57 | bss = BestSubsetSelection(
58 |     sparse_bound=n_nonzero_coefs, solver="GUROBI", solver_options={"Threads": 8}
59 | )
60 | bss.fit(X, y)
61 | coef = bss.coef_
62 | (idx_r,) = coef.nonzero()
63 | plt.subplot(3, 2, 4)
64 | plt.xlim(0, n_components)
65 | plt.title("Best Subset Selection (noise-free measurements)")
66 | plt.stem(idx_r, coef[idx_r])
67 | 
68 | # plot the noisy reconstruction
69 | omp.fit(X, y_noisy)
70 | coef = omp.coef_
71 | (idx_r,) = coef.nonzero()
72 | plt.subplot(3, 2, 5)
73 | plt.xlim(0, n_components)
74 | plt.title("Orthogonal Matching Pursuit recovery (noisy measurements)")
75 | plt.stem(idx_r, coef[idx_r])
76 | 
77 | bss.fit(X, y_noisy)
78 | coef = bss.coef_
79 | (idx_r,) = coef.nonzero()
80 | plt.subplot(3, 2, 6)
81 | plt.xlim(0, n_components)
82 | plt.title("Best Subset Selection (noisy measurements)")
83 | plt.stem(idx_r, coef[idx_r])
84 | 
85 | plt.tight_layout()
86 | plt.show()
87 | 


--------------------------------------------------------------------------------
/examples/plot_stepwise.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ========================
  3 | Using stepwise estimator
  4 | ========================
  5 | 
  6 | Stepwise estimator can be used to implement stepwise fitting. It comprises several
  7 | regressor, each responsible for fitting specific rows of the feature matrix to
  8 | the target vector and passing the residual values down to be fitted by the subsequent
  9 | regressors.
 10 | 
 11 | This example is purely for demonstration purpose and we do not expect any meaningful
 12 | performance improvement.
 13 | 
 14 | However, stepwise fitting can be useful in certain problems where groups of covariates
 15 | have substantially different effects on the target vector.
 16 | 
 17 | For example, in fitting the atomic configration energy of an crystalline solid using a
 18 | cluster expansion of an ionic system, one might want to fit the energy to single site
 19 | features first then subtract those main effects from the target, and fit the residual
 20 | of energy to other cluster interactions.
 21 | """
 22 | 
 23 | import matplotlib.pyplot as plt
 24 | import numpy as np
 25 | from sklearn.datasets import make_regression
 26 | from sklearn.linear_model import Lasso, Ridge
 27 | from sklearn.metrics import mean_squared_error, r2_score
 28 | from sklearn.model_selection import KFold, train_test_split
 29 | 
 30 | from sparselm.model_selection import GridSearchCV
 31 | from sparselm.stepwise import StepwiseEstimator
 32 | 
 33 | X, y, coef = make_regression(
 34 |     n_samples=200,
 35 |     n_features=100,
 36 |     n_informative=10,
 37 |     noise=40.0,
 38 |     bias=-15.0,
 39 |     coef=True,
 40 |     random_state=0,
 41 | )
 42 | 
 43 | X_train, X_test, y_train, y_test = train_test_split(
 44 |     X, y, test_size=0.25, random_state=0
 45 | )
 46 | 
 47 | # Create estimators for each step.
 48 | # Only the first estimator is allowed to fit_intercept!
 49 | ridge = Ridge(fit_intercept=True)
 50 | lasso = Lasso(fit_intercept=False)
 51 | cv5 = KFold(n_splits=5, shuffle=True, random_state=0)
 52 | params = {"alpha": np.logspace(-1, 1, 10)}
 53 | estimator1 = GridSearchCV(ridge, params, cv=cv5, n_jobs=-1)
 54 | estimator2 = GridSearchCV(lasso, params, cv=cv5, n_jobs=-1)
 55 | 
 56 | # Create a StepwiseEstimator. It can be composed of either
 57 | # regressors or GridSearchCV and LineSearchCV optimizers.
 58 | # In this case, we first fit the target vector to the first 3
 59 | # and the last feature, then fit the residual vector to the rest
 60 | # of the features with GridSearchCV to optimize the Lasso
 61 | # hyperparameter.
 62 | stepwise = StepwiseEstimator(
 63 |     [("est", estimator1), ("est2", estimator2)], ((0, 1, 2, 99), tuple(range(3, 99)))
 64 | )
 65 | 
 66 | # fit models on training data
 67 | stepwise.fit(X_train, y_train)
 68 | 
 69 | # calculate model performance on test and train data
 70 | stepwise_train = {
 71 |     "r2": r2_score(y_train, stepwise.predict(X_train)),
 72 |     "rmse": np.sqrt(mean_squared_error(y_train, stepwise.predict(X_train))),
 73 | }
 74 | 
 75 | stepwise_test = {
 76 |     "r2": r2_score(y_test, stepwise.predict(X_test)),
 77 |     "rmse": np.sqrt(mean_squared_error(y_test, stepwise.predict(X_test))),
 78 | }
 79 | 
 80 | print("Lasso performance metrics:")
 81 | print(f"    train r2: {stepwise_train['r2']:.3f}")
 82 | print(f"    test r2: {stepwise_test['r2']:.3f}")
 83 | print(f"    train rmse: {stepwise_train['rmse']:.3f}")
 84 | print(f"    test rmse: {stepwise_test['rmse']:.3f}")
 85 | 
 86 | # plot model coefficients
 87 | fig, ax = plt.subplots()
 88 | ax.plot(coef, "o", label="True coefficients")
 89 | ax.plot(stepwise.coef_[[0, 1, 2, 99]], "o", label="Stepwise (ridge)", alpha=0.5)
 90 | ax.plot(stepwise.coef_[range(3, 99)], "o", label="Stepwise (lasso)", alpha=0.5)
 91 | ax.set_xlabel("covariate index")
 92 | ax.set_ylabel("coefficient value")
 93 | ax.legend()
 94 | fig.show()
 95 | 
 96 | # plot predicted values
 97 | fig, ax = plt.subplots()
 98 | ax.plot(y_test, stepwise.predict(X_test), "o", label="Stepwise", alpha=0.5)
 99 | ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--")
100 | ax.set_xlabel("true values")
101 | ax.set_ylabel("predicted values")
102 | ax.legend()
103 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=45", "setuptools-scm[toml]>=6.2"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "sparse-lm"
 7 | description = "Sparse linear regression models"
 8 | authors = [
 9 |     {name = "Luis Barroso-Luque", email = "lbluque@berkeley.edu"}
10 | ]
11 | readme = "README.md"
12 | license = {text = "BSD 3-Clause License"}
13 | dynamic = ["version"]
14 | dependencies = [
15 |     "numpy >=1.23", "cvxpy >=1.2", "scikit-learn >=1.2.1",
16 |     "scipy >=1.9", "joblib"
17 |     ]
18 | classifiers = [
19 |     "Development Status :: 3 - Alpha",
20 |     "Programming Language :: Python :: 3 :: Only",
21 |     "Programming Language :: Python :: 3.9",
22 |     "Programming Language :: Python :: 3.10",
23 |     "Intended Audience :: Science/Research",
24 |     "License :: OSI Approved :: BSD License",
25 |     "Operating System :: OS Independent",
26 |     "Topic :: Scientific/Engineering :: Information Analysis",
27 |     "Topic :: Scientific/Engineering :: Mathematics",
28 |     "Topic :: Software Development :: Libraries :: Python Modules"
29 | ]
30 | 
31 | [project.optional-dependencies]
32 | dev = ["pre-commit", "black", "isort", "flake8", "pylint", "pydocstyle", "flake8-pyproject"]
33 | # Gurobipy needed by mixedL0 tests, pandas needed by sklearn convention checks.
34 | tests = ["pytest >=7.2.0", "pytest-cov >=4.0.0", "coverage", "pandas", "gurobipy", "pyscipopt"]
35 | docs = ["sphinx>=5.3", "furo", "m2r2", "sphinx-gallery", "matplotlib", "gurobipy", "pymatgen"]
36 | optional = ["gurobipy"]
37 | 
38 | # pyproject.toml
39 | [tool.setuptools_scm]
40 | 
41 | # linting tools, etc
42 | [tool.pytest.ini_options]
43 | minversion = "6.0"
44 | addopts  =  "-x --durations = 30 --quiet -rxXs --color = yes"
45 | filterwarnings  = [
46 |     'ignore::UserWarning',
47 |     'ignore::FutureWarning',
48 |     'ignore::RuntimeWarning'
49 |     ]
50 | 
51 | [tool.flake8]
52 | exclude  =  ['docs', 'tests']
53 | ignore  =  ['E203', 'E501', 'W503']
54 | max-line-length  =  88
55 | 
56 | [tool.pylint.main]
57 | ignore = ["tests"]
58 | 
59 | [tool.pylint.basic]
60 | argument-naming-style = "snake_case"
61 | attr-naming-style = "snake_case"
62 | method-naming-style = "snake_case"
63 | function-naming-style = "snake_case"
64 | class-naming-style = "PascalCase"
65 | good-names  =  ['id', 'kB', 'i', 'j', 'k', 'f']
66 | 
67 | [too.pylint."messages control"]
68 | disable  =  ['W0511', 'R0904', 'R0903', 'R0913', 'R0902', 'R0914', 'C0415']
69 | 
70 | [tool.codespell]
71 | skip = "*.c,./.*"
72 | count = ''
73 | quiet-level = 3
74 | ignore-words-list = ['nd', 'tread']
75 | 
76 | [tool.coverage.run]
77 | source = ["src/sparselm"]
78 | omit = ["*/__init__.py"]
79 | 
80 | [tool.pydocstyle]
81 | convention = "google"
82 | add_ignore = ["D107"]
83 | 
84 | [[tool.mypy.overrides]]
85 | module = ["sklearn.*", "scipy.linalg"]
86 | ignore_missing_imports = true
87 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy >=1.23
2 | cvxpy >=1.2
3 | scikit-learn > 1.2
4 | scipy >=1.9
5 | joblib
6 | 


--------------------------------------------------------------------------------
/src/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scikit-learn
3 | cvxpy
4 | scipy
5 | joblib
6 | 


--------------------------------------------------------------------------------
/src/sparselm/__init__.py:
--------------------------------------------------------------------------------
 1 | """Classes implementing generalized linear regression Regressors."""
 2 | 
 3 | from importlib.metadata import PackageNotFoundError, version
 4 | 
 5 | try:
 6 |     __version__ = version("sparse-lm")
 7 | except PackageNotFoundError:
 8 |     # package is not installed
 9 |     __version__ = ""
10 | 


--------------------------------------------------------------------------------
/src/sparselm/_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CederGroupHub/sparse-lm/220cbbad4a5ac98d1a52326c525aadb95f2c5b18/src/sparselm/_utils/__init__.py


--------------------------------------------------------------------------------
/src/sparselm/_utils/validation.py:
--------------------------------------------------------------------------------
 1 | """Data and hyper-parameters validation utilities."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import numpy as np
 6 | from numpy.typing import NDArray
 7 | 
 8 | 
 9 | def _check_groups(
10 |     groups: NDArray[np.floating | np.integer] | list[int | float] | None,
11 |     n_features: int,
12 | ) -> None:
13 |     """Check that groups are 1D and of the correct length.
14 | 
15 |     Args:
16 |         groups (NDArray):
17 |             List of group labels
18 |         n_features (int):
19 |             Number of features/covariates being fit
20 | 
21 |     """
22 |     if groups is None:
23 |         return
24 | 
25 |     if not isinstance(groups, (list, np.ndarray)):
26 |         raise TypeError("groups must be a list or ndarray")
27 | 
28 |     groups = np.asarray(groups).astype(int)
29 |     if groups.ndim != 1:
30 |         raise ValueError("groups must be a 1D array")
31 | 
32 |     if len(groups) != n_features:
33 |         raise ValueError(
34 |             f"groups must be the same length as the number of features {n_features}"
35 |         )
36 | 
37 | 
38 | def _check_group_weights(
39 |     group_weights: NDArray[np.floating] | None, n_groups: int
40 | ) -> None:
41 |     """Check that group weights are 1D and of the correct length.
42 | 
43 |     Args:
44 |         group_weights (NDArray):
45 |             List of group weights
46 |         n_groups (int):
47 |             Number of groups
48 |     """
49 |     if group_weights is None:
50 |         return
51 | 
52 |     if not isinstance(group_weights, (list, np.ndarray)):
53 |         raise TypeError("group_weights must be a list or ndarray")
54 | 
55 |     group_weights = np.asarray(group_weights)
56 |     if len(group_weights) != n_groups:
57 |         raise ValueError(
58 |             f"group_weights must be the same length as the number of groups {len(group_weights)} != {n_groups}"
59 |         )
60 | 


--------------------------------------------------------------------------------
/src/sparselm/dataset.py:
--------------------------------------------------------------------------------
  1 | """Generate synthemetic datasets akin to sklearn.datasets."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import warnings
  6 | from typing import Sequence
  7 | 
  8 | import numpy as np
  9 | from numpy.random import RandomState
 10 | from sklearn.datasets import make_regression
 11 | from sklearn.utils import check_random_state
 12 | 
 13 | 
 14 | def make_group_regression(
 15 |     n_samples: int = 100,
 16 |     n_groups: int = 20,
 17 |     n_features_per_group: int | Sequence = 10,
 18 |     n_informative_groups: int = 5,
 19 |     frac_informative_in_group: float = 1.0,
 20 |     bias: float = 0.0,
 21 |     effective_rank: int | None = None,
 22 |     tail_strength: float = 0.5,
 23 |     noise: float = 0.0,
 24 |     shuffle: bool = True,
 25 |     coef: bool = False,
 26 |     random_state: int | RandomState | None = None,
 27 | ) -> tuple[np.ndarray, ...]:
 28 |     """Generate a random regression problem with grouped covariates.
 29 | 
 30 |     Args:
 31 |         n_samples (int, optional):
 32 |             Number of samples to generate.
 33 |         n_groups (int, optional):
 34 |             Number of groups to generate.
 35 |         n_features_per_group (int | Sequence, optional):
 36 |             Number of features per group to generate. If a sequence is passed the
 37 |             length must be equal to n_groups then each element will be the number of
 38 |             features in the corresponding group.
 39 |         n_informative_groups (int, optional):
 40 |             Number of informative groups.
 41 |         frac_informative_in_group (float, optional):
 42 |             Fraction of informative features in each group
 43 |             The number of features will be rounded to nearest int.
 44 |         bias (float, optional):
 45 |             Bias added to the decision function.
 46 |         effective_rank ([type], optional):
 47 |             Approximate number of singular vectors
 48 |             required to explain most of the input data by linear combinations.
 49 |         tail_strength (float, optional):
 50 |             Relative importance of the fat noisy tail
 51 |             of the singular values profile if `effective_rank` is not None.
 52 |         noise (float, optional):
 53 |             Standard deviation of the gaussian noise applied to the output.
 54 |         shuffle (bool, optional):
 55 |             Shuffle the samples and the features. Defaults to True.
 56 |         coef (bool, optional):
 57 |             If True, the coefficients of the underlying linear model are returned.
 58 |         random_state ([type], optional):
 59 |             Random state for dataset generation.
 60 | 
 61 |     Returns:
 62 |         tuple[np.ndarray, np.ndarray, np.ndarray, ...]:
 63 |             X, y, groups, coefficients (optional)
 64 |     """
 65 |     generator = check_random_state(random_state)
 66 | 
 67 |     informative_groups = list(range(n_informative_groups))
 68 | 
 69 |     if isinstance(n_features_per_group, int):
 70 |         n_features = n_features_per_group * n_groups
 71 |         n_informative_in_group = round(frac_informative_in_group * n_features_per_group)
 72 |         n_informative = n_informative_in_group * n_informative_groups
 73 |         # make n_features_per_group a list of length n_groups
 74 |         n_features_per_group = [n_features_per_group] * n_groups
 75 |         n_informative_per_group = [n_informative_in_group] * n_informative_groups
 76 |     else:
 77 |         if len(n_features_per_group) == n_groups:
 78 |             n_features = sum(n_features_per_group)
 79 |             n_informative_per_group = [
 80 |                 round(frac_informative_in_group * n_features_per_group[i])
 81 |                 for i in informative_groups
 82 |             ]
 83 |             n_informative = sum(n_informative_per_group)
 84 |         else:
 85 |             raise ValueError(
 86 |                 "If passing a sequence of n_features_per_group, the length must be "
 87 |                 "equal to n_groups."
 88 |             )
 89 | 
 90 |     if any(n < 1 for n in n_informative_per_group):
 91 |         warnings.warn(
 92 |             "The number of features and fraction of informative features per group resulted in "
 93 |             "informative groups having no informative features.",
 94 |             UserWarning,
 95 |         )
 96 | 
 97 |     X, y, coefs = make_regression(
 98 |         n_samples=n_samples,
 99 |         n_features=n_features,
100 |         n_informative=n_informative,
101 |         bias=bias,
102 |         effective_rank=effective_rank,
103 |         tail_strength=tail_strength,
104 |         noise=noise,
105 |         shuffle=shuffle,
106 |         coef=True,
107 |         random_state=generator,
108 |     )
109 | 
110 |     # assign coefficients to groups
111 |     groups = np.zeros(n_features, dtype=int)
112 |     informative_coef_inds = np.nonzero(coefs > noise)[0].tolist()
113 |     other_coef_inds = np.nonzero(coefs <= noise)[0].tolist()
114 | 
115 |     for i, nfg in enumerate(n_features_per_group):
116 |         if i in informative_groups:
117 |             nifg = n_informative_per_group[informative_groups.index(i)]
118 |             ii = informative_coef_inds[:nifg] + other_coef_inds[: nfg - nifg]
119 |             # remove assigned indices
120 |             informative_coef_inds = informative_coef_inds[nifg:]
121 |             other_coef_inds = other_coef_inds[nfg - nifg :]
122 |         else:
123 |             ii = other_coef_inds[:nfg]
124 |             other_coef_inds = other_coef_inds[nfg:]
125 | 
126 |         # assign group ids
127 |         groups[ii] = i
128 | 
129 |     if shuffle:
130 |         indices = np.arange(n_features)
131 |         generator.shuffle(indices)
132 |         X[:, :] = X[:, indices]
133 |         groups = groups[indices]
134 |         coefs = coefs[indices]
135 | 
136 |     if coef:
137 |         return X, y, groups, coefs
138 |     else:
139 |         return X, y, groups
140 | 


--------------------------------------------------------------------------------
/src/sparselm/model/__init__.py:
--------------------------------------------------------------------------------
 1 | """Classes implementing generalized linear regression Regressors."""
 2 | 
 3 | from ._adaptive_lasso import (
 4 |     AdaptiveGroupLasso,
 5 |     AdaptiveLasso,
 6 |     AdaptiveOverlapGroupLasso,
 7 |     AdaptiveRidgedGroupLasso,
 8 |     AdaptiveSparseGroupLasso,
 9 | )
10 | from ._lasso import (
11 |     GroupLasso,
12 |     Lasso,
13 |     OverlapGroupLasso,
14 |     RidgedGroupLasso,
15 |     SparseGroupLasso,
16 | )
17 | from ._miqp import (
18 |     L1L0,
19 |     L2L0,
20 |     BestSubsetSelection,
21 |     RegularizedL0,
22 |     RidgedBestSubsetSelection,
23 | )
24 | from ._ols import OrdinaryLeastSquares
25 | 
26 | __all__ = [
27 |     "OrdinaryLeastSquares",
28 |     "Lasso",
29 |     "BestSubsetSelection",
30 |     "RidgedBestSubsetSelection",
31 |     "RegularizedL0",
32 |     "L1L0",
33 |     "L2L0",
34 |     "GroupLasso",
35 |     "OverlapGroupLasso",
36 |     "SparseGroupLasso",
37 |     "RidgedGroupLasso",
38 |     "AdaptiveLasso",
39 |     "AdaptiveGroupLasso",
40 |     "AdaptiveOverlapGroupLasso",
41 |     "AdaptiveSparseGroupLasso",
42 |     "AdaptiveRidgedGroupLasso",
43 | ]
44 | 


--------------------------------------------------------------------------------
/src/sparselm/model/_base.py:
--------------------------------------------------------------------------------
  1 | """Base classes for in-house linear regression Regressors.
  2 | 
  3 | The classes make use of and follow the scikit-learn API.
  4 | """
  5 | 
  6 | from __future__ import annotations
  7 | 
  8 | __author__ = "Luis Barroso-Luque, Fengyu Xie"
  9 | 
 10 | import warnings
 11 | from abc import ABCMeta, abstractmethod
 12 | from collections.abc import Sequence
 13 | from numbers import Integral
 14 | from types import SimpleNamespace
 15 | from typing import Any, NamedTuple
 16 | 
 17 | import cvxpy as cp
 18 | import numpy as np
 19 | from numpy.typing import NDArray
 20 | from sklearn.base import RegressorMixin
 21 | from sklearn.linear_model._base import (
 22 |     LinearModel,
 23 |     _check_sample_weight,
 24 |     _preprocess_data,
 25 |     _rescale_data,
 26 | )
 27 | from sklearn.utils._param_validation import (
 28 |     Interval,
 29 |     Options,
 30 |     _ArrayLikes,
 31 |     _Booleans,
 32 |     _InstancesOf,
 33 |     make_constraint,
 34 |     validate_parameter_constraints,
 35 | )
 36 | 
 37 | 
 38 | class CVXCanonicals(NamedTuple):
 39 |     """CVXpy Canonical objects representing the underlying optimization problem.
 40 | 
 41 |     Attributes:
 42 |         objective (cp.Problem):
 43 |             Objective function.
 44 |         objective (cp.Expression):
 45 |             Objective function.
 46 |         beta (cp.Variable):
 47 |             Variable to be optimized (corresponds to the estimated coef_ attribute).
 48 |         parameters (SimpleNamespace of cp.Parameter or NDArray):
 49 |             SimpleNamespace with named cp.Parameter objects or NDArray of parameters.
 50 |             The namespace should be defined by the Regressor generating it.
 51 |         auxiliaries (SimpleNamespace of cp.Variable or cp.Expression):
 52 |             SimpleNamespace with auxiliary cp.Variable or cp.Expression objects.
 53 |             The namespace should be defined by the Regressor generating it.
 54 |         constraints (list of cp.Constaint):
 55 |             List of constraints intrinsic to regression problem.
 56 |         user_constraints (list of cp.Constaint):
 57 |             List of user-defined constraints.
 58 |     """
 59 | 
 60 |     problem: cp.Problem
 61 |     objective: cp.Expression
 62 |     beta: cp.Variable
 63 |     parameters: SimpleNamespace | None
 64 |     auxiliaries: SimpleNamespace | None
 65 |     constraints: list[cp.Constraint]
 66 |     user_constraints: list[cp.Constraint]
 67 | 
 68 | 
 69 | class CVXRegressor(RegressorMixin, LinearModel, metaclass=ABCMeta):
 70 |     r"""Abstract base class for Regressors using cvxpy with a sklearn interface.
 71 | 
 72 |     Note cvxpy can use one of many 3rd party solvers, default is most often
 73 |     CVXOPT or ECOS. For integer and mixed integer problems options include
 74 |     SCIP (open source) and Gurobi, among other commercial solvers.
 75 | 
 76 |     The solver can be specified by setting the solver keyword argument.
 77 |     And can solver specific settings can be set by passing a dictionary of
 78 |     solver_options.
 79 | 
 80 |     See "Setting solver options" in documentation for details of available options:
 81 |     https://www.cvxpy.org/tutorial/advanced/index.html#advanced
 82 | 
 83 |     Args:
 84 |         fit_intercept (bool):
 85 |             Whether the intercept should be estimated or not.
 86 |             If False, the data is assumed to be already centered.
 87 |         copy_X (bool):
 88 |             If True, X will be copied; else, it may be overwritten.
 89 |         warm_start (bool):
 90 |             When set to True, reuse the solution of the previous call to
 91 |             fit as initialization, otherwise, just erase the previous
 92 |             solution.
 93 |         solver (str):
 94 |             cvxpy backend solver to use. Supported solvers are listed here:
 95 |             https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options
 96 |         solver_options (dict):
 97 |             dictionary of keyword arguments passed to cvxpy solve.
 98 |             See docs linked above for more information.
 99 | 
100 |     Attributes:
101 |         coef_ (NDArray):
102 |             Parameter vector (:math:`\beta` in the cost function formula) of shape
103 |             (n_features,).
104 |         intercept_ (float):
105 |             Independent term in decision function.
106 |         canonicals_ (SimpleNamespace):
107 |             Namespace that contains underlying cvxpy objects used to define
108 |             the optimization problem. The objects included are the following:
109 |                 - objective - the objective function.
110 |                 - beta - variable to be optimized (corresponds to the estimated
111 |                          coef_ attribute).
112 |                 - parameters - hyper-parameters
113 |                 - auxiliaries - auxiliary variables and expressions
114 |                 - constraints - solution constraints
115 |     """
116 | 
117 |     # parameter constraints that do not need any cvxpy Parameter object
118 |     _parameter_constraints: dict[str, list[Any]] = {
119 |         "fit_intercept": ["boolean"],
120 |         "copy_X": ["boolean"],
121 |         "warm_start": ["boolean"],
122 |         "solver": [Options(type=str, options=set(cp.installed_solvers())), None],
123 |         "solver_options": [dict, None],
124 |     }
125 |     # parameter constraints that require a cvxpy Parameter object in problem definition
126 |     _cvx_parameter_constraints: dict[str, list[Any]] | None = None
127 | 
128 |     def __init__(
129 |         self,
130 |         fit_intercept: bool = False,
131 |         copy_X: bool = True,
132 |         warm_start: bool = False,
133 |         solver: str | None = None,
134 |         solver_options: dict[str, Any] | None = None,
135 |     ):
136 |         self.fit_intercept = fit_intercept
137 |         self.copy_X = copy_X
138 |         self.warm_start = warm_start
139 |         self.solver = solver
140 |         self.solver_options = solver_options
141 | 
142 |     def fit(
143 |         self,
144 |         X: NDArray,
145 |         y: NDArray,
146 |         sample_weight: NDArray[np.floating] | None = None,
147 |         *args,
148 |         **kwargs,
149 |     ):
150 |         """Fit the linear model coefficients.
151 | 
152 |         Prepares the  fit data input, generates cvxpy objects to represent the
153 |         minimization objective, and solves the regression problem using the given
154 |         solver.
155 | 
156 |         Args:
157 |             X (NDArray):
158 |                 Training data of shape (n_samples, n_features).
159 |             y (NDArray):
160 |                 Target values. Will be cast to X's dtype if necessary
161 |                 of shape (n_samples,) or (n_samples, n_targets)
162 |             sample_weight (NDArray):
163 |                 Individual weights for each sample of shape (n_samples,)
164 |                 default=None
165 |             *args:
166 |                 Positional arguments passed to solve method
167 |             **kwargs:
168 |                 Keyword arguments passed to solve method
169 | 
170 |         Returns:
171 |             instance of self
172 |         """
173 |         X, y = self._validate_data(
174 |             X, y, accept_sparse=False, y_numeric=True, multi_output=False
175 |         )
176 | 
177 |         X, y, X_offset, y_offset, X_scale = self._preprocess_data(X, y, sample_weight)
178 | 
179 |         self._validate_params(X, y)
180 | 
181 |         # TODO test theses cases
182 |         if not hasattr(self, "canonicals_"):
183 |             self.generate_problem(X, y, preprocess_data=False)
184 |         elif not np.array_equal(self.cached_X_, X) or not np.array_equal(
185 |             self.cached_y_, y
186 |         ):
187 |             if self.canonicals_.user_constraints:
188 |                 warnings.warn(
189 |                     "User constraints are set on a problem with different data (X, y). "
190 |                     "These constraints will be ignored.",
191 |                     UserWarning,
192 |                 )
193 |             self.generate_problem(X, y, preprocess_data=False)
194 |         else:
195 |             self._set_param_values()  # set parameter values
196 | 
197 |         solver_options = self.solver_options if self.solver_options is not None else {}
198 |         if not isinstance(solver_options, dict):
199 |             raise TypeError("solver_options must be a dictionary")
200 | 
201 |         self.coef_ = self._solve(X, y, solver_options, *args, **kwargs)
202 |         self._set_intercept(X_offset, y_offset, X_scale)
203 | 
204 |         # return self for chaining fit and predict calls
205 |         return self
206 | 
207 |     def _preprocess_data(
208 |         self, X: NDArray, y: NDArray, sample_weight: NDArray[np.floating] | None = None
209 |     ) -> tuple[NDArray, NDArray, NDArray, NDArray, NDArray]:
210 |         """Preprocess data for fitting."""
211 |         if sample_weight is not None:
212 |             sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
213 |             # rescale sample_weight to sum to number of samples
214 |             sample_weight = sample_weight * (X.shape[0] / np.sum(sample_weight))  # type: ignore
215 | 
216 |         X, y, X_offset, y_offset, X_scale = _preprocess_data(
217 |             X,
218 |             y,
219 |             copy=self.copy_X,
220 |             fit_intercept=self.fit_intercept,
221 |             sample_weight=sample_weight,
222 |         )
223 | 
224 |         if sample_weight is not None:
225 |             X, y, _ = _rescale_data(X, y, sample_weight)
226 | 
227 |         return X, y, X_offset, y_offset, X_scale
228 | 
229 |     def _validate_params(self, X: NDArray, y: NDArray) -> None:
230 |         """Validate hyperparameter values.
231 | 
232 |         Implement this in an Regressor for additional parameter value validation.
233 |         """
234 |         if self._cvx_parameter_constraints is None:
235 |             parameter_constraints = self._parameter_constraints
236 |         else:
237 |             parameter_constraints = {
238 |                 **self._parameter_constraints,
239 |                 **self._cvx_parameter_constraints,
240 |             }
241 |         validate_parameter_constraints(
242 |             parameter_constraints,
243 |             self.get_params(deep=False),
244 |             caller_name=self.__class__.__name__,
245 |         )
246 | 
247 |     def _set_param_values(self) -> None:
248 |         """Set the values of cvxpy parameters from param attributes for warm starts."""
249 |         if self._cvx_parameter_constraints is None:
250 |             return
251 | 
252 |         for parameter, value in self.get_params(deep=False).items():
253 |             if parameter in self._cvx_parameter_constraints:
254 |                 cvx_parameter = getattr(self.canonicals_.parameters, parameter)
255 |                 # check for parameters that take a scalar or an array
256 |                 if isinstance(value, np.ndarray) or isinstance(value, Sequence):
257 |                     if len(value) == 1:
258 |                         value = value * np.ones_like(cvx_parameter.value)
259 |                     else:
260 |                         value = np.asarray(value)
261 |                 cvx_parameter.value = value
262 | 
263 |     def _generate_params(self, X: NDArray, y: NDArray) -> SimpleNamespace:
264 |         """Return the named tuple of cvxpy parameters for optimization problem.
265 | 
266 |         The cvxpy Parameters must be given values when generating.
267 | 
268 |         Args:
269 |             X (NDArray):
270 |                 Covariate/Feature matrix
271 |             y (NDArray):
272 |                 Target vector
273 | 
274 |         Returns:
275 |             NamedTuple of cvxpy parameters
276 |         """
277 |         cvx_parameters = {}
278 |         cvx_constraints = (
279 |             {}
280 |             if self._cvx_parameter_constraints is None
281 |             else self._cvx_parameter_constraints
282 |         )
283 |         for param_name, param_val in self.get_params(deep=False).items():
284 |             if param_name not in cvx_constraints:
285 |                 continue
286 | 
287 |             # make constraints sklearn constraint objects
288 |             constraints = [
289 |                 make_constraint(constraint)
290 |                 for constraint in cvx_constraints[param_name]
291 |             ]
292 | 
293 |             # For now we will only set nonneg, nonpos, neg, pos, integer, boolean and/or
294 |             # shape of the cvxpy Parameter objects.
295 |             # TODO cxvpy only allows a single one of these to be set (except bool and integer)
296 |             param_kwargs = {}
297 |             for constraint in constraints:
298 |                 if isinstance(constraint, _ArrayLikes):
299 |                     if not hasattr(param_val, "shape"):
300 |                         param_val = np.asarray(param_val)
301 | 
302 |                     param_kwargs["shape"] = param_val.shape
303 | 
304 |                 if isinstance(constraint, _Booleans):
305 |                     param_kwargs["boolean"] = True
306 | 
307 |                 if isinstance(constraint, _InstancesOf):
308 |                     if constraint.is_satisfied_by(True):  # is it boolean
309 |                         param_kwargs["boolean"] = True
310 |                     elif constraint.is_satisfied_by(5):  # is it integer
311 |                         param_kwargs["integer"] = True
312 | 
313 |                 if isinstance(constraint, Interval):
314 |                     if constraint.type is Integral:
315 |                         param_kwargs["integer"] = True
316 |                     if constraint.left is not None:
317 |                         if constraint.left == 0:
318 |                             if constraint.closed in ("left", "both"):
319 |                                 param_kwargs["nonneg"] = True
320 |                             else:
321 |                                 param_kwargs["pos"] = True
322 |                         elif constraint.left > 0:
323 |                             param_kwargs["pos"] = True
324 |                     if constraint.right is not None:
325 |                         if constraint.right == 0:
326 |                             if constraint.closed in ("right", "both"):
327 |                                 param_kwargs["nonpos"] = True
328 |                             else:
329 |                                 param_kwargs["neg"] = True
330 |                         elif constraint.right < 0:
331 |                             param_kwargs["neg"] = True
332 |                 cvx_parameters[param_name] = cp.Parameter(
333 |                     value=param_val, **param_kwargs
334 |                 )
335 | 
336 |         return SimpleNamespace(**cvx_parameters)
337 | 
338 |     def _generate_auxiliaries(
339 |         self, X: NDArray, y: NDArray, beta: cp.Variable, parameters: SimpleNamespace
340 |     ) -> SimpleNamespace | None:
341 |         """Generate any auxiliary variables/expressions necessary to define objective.
342 | 
343 |         Args:
344 |             X (NDArray):
345 |                 Covariate/Feature matrix
346 |             y (NDArray):
347 |                 Target vector
348 |             beta (cp.Variable):
349 |                 cp.Variable representing the estimated coefs_
350 |             parameters (SimpleNamespace):
351 |                 SimpleNamespace of cvxpy parameters.
352 | 
353 |         Returns:
354 |             SimpleNamespace of cp.Variable for auxiliary variables
355 |         """
356 |         return None
357 | 
358 |     @abstractmethod
359 |     def _generate_objective(
360 |         self,
361 |         X: NDArray,
362 |         y: NDArray,
363 |         beta: cp.Variable,
364 |         parameters: SimpleNamespace | None = None,
365 |         auxiliaries: SimpleNamespace | None = None,
366 |     ) -> cp.Expression:
367 |         """Define the cvxpy objective function represeting regression model.
368 | 
369 |         The objective must be stated for a minimization problem.
370 | 
371 |         Args:
372 |             X (NDArray):
373 |                 Covariate/Feature matrix
374 |             y (NDArray):
375 |                 Target vector
376 |             beta (cp.Variable):
377 |                 cp.Variable representing the estimated coefs_
378 |             parameters (SimpleNamespace): optional
379 |                 SimpleNamespace with cp.Parameter objects
380 |             auxiliaries (SimpleNamespace): optional
381 |                 SimpleNamespace with auxiliary cvxpy objects
382 | 
383 |         Returns:
384 |             cvxpy Expression
385 |         """
386 | 
387 |     def _generate_constraints(
388 |         self,
389 |         X: NDArray,
390 |         y: NDArray,
391 |         beta: cp.Variable,
392 |         parameters: SimpleNamespace | None = None,
393 |         auxiliaries: SimpleNamespace | None = None,
394 |     ) -> list[cp.Constraint]:
395 |         """Generate constraints for optimization problem.
396 | 
397 |         Args:
398 |             X (NDArray):
399 |                 Covariate/Feature matrix
400 |             y (NDArray):
401 |                 Target vector
402 |             beta (cp.Variable):
403 |                 cp.Variable representing the estimated coefs_
404 |             parameters (SimpleNamespace): optional
405 |                 SimpleNamespace with cp.Parameter objects
406 |             auxiliaries (SimpleNamespace): optional
407 |                 SimpleNamespace with auxiliary cvxpy objects
408 | 
409 |         Returns:
410 |             list of cvxpy constraints
411 |         """
412 |         return []
413 | 
414 |     def generate_problem(
415 |         self,
416 |         X: NDArray,
417 |         y: NDArray,
418 |         preprocess_data: bool = True,
419 |         sample_weight: NDArray[np.floating] | None = None,
420 |     ) -> None:
421 |         """Generate regression problem and auxiliary cvxpy objects.
422 | 
423 |         This initializes the minimization problem, the objective, coefficient variable
424 |         (beta), problem parameters, solution constraints, and auxiliary variables/terms.
425 | 
426 |         This is (almost always) called in the fit method, and not directly. However, it
427 |         can be called directly if further control over the problem is needed by
428 |         accessing the canonicals_ objects. For example to add additional constraints on
429 |         problem variables.
430 | 
431 |         Args:
432 |             X (NDArray):
433 |                 Covariate/Feature matrix
434 |             y (NDArray):
435 |                 Target vector
436 |             preprocess_data (bool):
437 |                 Whether to preprocess the data before generating the problem. If calling
438 |                 generate_problem directly, this should be kept as True to ensure the
439 |                 problem is generated correctly for a subsequent call to fit.
440 |             sample_weight (NDArray):
441 |                 Individual weights for each sample of shape (n_samples,)
442 |                 default=None. Only used if preprocess_data=True to rescale the data
443 |                 accordingly.
444 |         """
445 |         if preprocess_data is True:
446 |             X, y, _, _, _ = self._preprocess_data(X, y, sample_weight)
447 | 
448 |         # X, y are cached to avoid re-generating problem if fit is called again with
449 |         # same data
450 |         self.cached_X_ = X
451 |         self.cached_y_ = y
452 | 
453 |         beta = cp.Variable(X.shape[1])
454 |         parameters = self._generate_params(X, y)
455 |         auxiliaries = self._generate_auxiliaries(X, y, beta, parameters)
456 |         objective = self._generate_objective(X, y, beta, parameters, auxiliaries)
457 |         constraints = self._generate_constraints(X, y, beta, parameters, auxiliaries)
458 |         problem = cp.Problem(cp.Minimize(objective), constraints)
459 |         self.canonicals_ = CVXCanonicals(
460 |             problem=problem,
461 |             objective=objective,
462 |             beta=beta,
463 |             parameters=parameters,
464 |             auxiliaries=auxiliaries,
465 |             constraints=constraints,
466 |             user_constraints=[],
467 |         )
468 | 
469 |     def add_constraints(self, constraints: list[cp.Constraint]) -> None:
470 |         """Add a constraint to the problem.
471 | 
472 |         .. Warning::
473 |             Adding constraints will not work with any sklearn class that relies on
474 |             cloning the estimator (ie GridSearchCV, etc) . This is because a new cvxpy
475 |             problem is generated for any cloned estimator.
476 | 
477 |         Args:
478 |             constraints (list of cp.constraint or cp.expressions):
479 |                 cvxpy constraint to add to the problem
480 |         """
481 |         if not hasattr(self, "canonicals_"):
482 |             raise RuntimeError(
483 |                 "Problem has not been generated. Please call generate_problem before"
484 |                 " adding constraints."
485 |             )
486 |         self.canonicals_.user_constraints.extend(list(constraints))
487 |         # need to reset problem to update constraints
488 |         self._reset_problem()
489 | 
490 |     def _reset_problem(self) -> None:
491 |         """Reset the cvxpy problem."""
492 |         if not hasattr(self, "canonicals_"):
493 |             raise RuntimeError(
494 |                 "Problem has not been generated. Please call generate_problem before"
495 |                 " resetting."
496 |             )
497 | 
498 |         problem = cp.Problem(
499 |             cp.Minimize(self.canonicals_.objective),
500 |             self.canonicals_.constraints + self.canonicals_.user_constraints,
501 |         )
502 |         self.canonicals_ = CVXCanonicals(
503 |             problem=problem,
504 |             objective=self.canonicals_.objective,
505 |             beta=self.canonicals_.beta,
506 |             parameters=self.canonicals_.parameters,
507 |             auxiliaries=self.canonicals_.auxiliaries,
508 |             constraints=self.canonicals_.constraints,
509 |             user_constraints=self.canonicals_.user_constraints,
510 |         )
511 | 
512 |     def _solve(
513 |         self, X: NDArray, y: NDArray, solver_options: dict, *args, **kwargs
514 |     ) -> NDArray[np.floating]:
515 |         """Solve the cvxpy problem."""
516 |         self.canonicals_.problem.solve(
517 |             solver=self.solver, warm_start=self.warm_start, **solver_options
518 |         )
519 |         return self.canonicals_.beta.value
520 | 
521 | 
522 | class TikhonovMixin:
523 |     """Mixin class to add a Tihhonov/ridge regularization term.
524 | 
525 |     When using this Mixin, a cvxpy parameter named "eta" should be saved in the
526 |     parameters SimpleNamespace an attribute tikhonov_w can be added to allow a matrix
527 |     otherwise simple l2/Ridge is used.
528 |     """
529 | 
530 |     def _generate_objective(
531 |         self,
532 |         X: NDArray,
533 |         y: NDArray,
534 |         beta: cp.Variable,
535 |         parameters: SimpleNamespace | None = None,
536 |         auxiliaries: SimpleNamespace | None = None,
537 |     ) -> cp.Expression:
538 |         """Add a Tikhnonov regularization term to the objective function."""
539 |         if hasattr(self, "tikhonov_w") and self.tikhonov_w is not None:
540 |             tikhonov_w = self.tikhonov_w
541 |         else:
542 |             tikhonov_w = np.eye(X.shape[1])
543 |         assert parameters is not None and hasattr(parameters, "eta")
544 |         c0 = 2 * X.shape[0]  # keeps hyperparameter scale independent
545 |         objective = super()._generate_objective(X, y, beta, parameters, auxiliaries)  # type: ignore
546 |         objective += c0 * parameters.eta * cp.sum_squares(tikhonov_w @ beta)
547 | 
548 |         return objective
549 | 


--------------------------------------------------------------------------------
/src/sparselm/model/_miqp/__init__.py:
--------------------------------------------------------------------------------
 1 | """MIQP based regression Regressors."""
 2 | 
 3 | from ._best_subset import BestSubsetSelection, RidgedBestSubsetSelection
 4 | from ._regularized_l0 import L1L0, L2L0, RegularizedL0
 5 | 
 6 | __all__ = [
 7 |     "BestSubsetSelection",
 8 |     "RidgedBestSubsetSelection",
 9 |     "RegularizedL0",
10 |     "L1L0",
11 |     "L2L0",
12 | ]
13 | 


--------------------------------------------------------------------------------
/src/sparselm/model/_miqp/_base.py:
--------------------------------------------------------------------------------
  1 | """Base class for mixed-integer quadratic programming l0 pseudo norm based Regressors."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | __author__ = "Luis Barroso-Luque"
  6 | 
  7 | from abc import ABCMeta, abstractmethod
  8 | from numbers import Real
  9 | from types import SimpleNamespace
 10 | from typing import Any
 11 | 
 12 | import cvxpy as cp
 13 | import numpy as np
 14 | from cvxpy.atoms.affine.wraps import psd_wrap
 15 | from numpy.typing import NDArray
 16 | from sklearn.utils._param_validation import Interval
 17 | 
 18 | from ..._utils.validation import _check_groups
 19 | from .._base import CVXRegressor
 20 | 
 21 | 
 22 | class MIQPl0(CVXRegressor, metaclass=ABCMeta):
 23 |     r"""Base class for mixed-integer quadratic programming (MIQP) Regressors.
 24 | 
 25 |     Generalized l0 formulation that allows grouping coefficients, based on:
 26 | 
 27 |     https://doi.org/10.1287/opre.2015.1436
 28 | 
 29 |     Args:
 30 |         groups (list or ndarray):
 31 |             array-like of integers specifying groups. Length should be the
 32 |             same as model, where each integer entry specifies the group
 33 |             each parameter corresponds to. If no grouping is required, simply
 34 |             pass a list of all different numbers, i.e. using range.
 35 |         big_M (float):
 36 |             Upper bound on the norm of coefficients associated with each
 37 |             groups of coefficients :math:`||\beta_c||_2`.
 38 |         hierarchy (list):
 39 |             A list of lists of integers storing hierarchy relations between
 40 |             coefficients.
 41 |             Each sublist contains indices of other coefficients
 42 |             on which the coefficient associated with each element of
 43 |             the list depends. i.e. hierarchy = [[1, 2], [0], []] mean that
 44 |             coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no
 45 |             dependence.
 46 |         ignore_psd_check (bool):
 47 |             Whether to ignore cvxpy's PSD checks  of matrix used in quadratic
 48 |             form. Default is True to avoid raising errors for poorly
 49 |             conditioned matrices. But if you want to be strict set to False.
 50 |         fit_intercept (bool):
 51 |             Whether the intercept should be estimated or not.
 52 |             If False, the data is assumed to be already centered.
 53 |         copy_X (bool):
 54 |             If True, X will be copied; else, it may be overwritten.
 55 |         warm_start (bool):
 56 |             When set to True, reuse the solution of the previous call to
 57 |             fit as initialization, otherwise, just erase the previous
 58 |             solution.
 59 |         solver (str):
 60 |             cvxpy backend solver to use. Supported solvers are listed here:
 61 |             https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options
 62 |         solver_options (dict):
 63 |             dictionary of keyword arguments passed to cvxpy solve.
 64 |             See docs in CVXRegressor for more information.
 65 |     """
 66 | 
 67 |     _parameter_constraints: dict[str, list[Any]] = {
 68 |         "ignore_psd_check": ["boolean"],
 69 |         **CVXRegressor._parameter_constraints,
 70 |     }
 71 | 
 72 |     _cvx_parameter_constraints: dict[str, list[Any]] = {
 73 |         "big_M": [Interval(type=Real, left=0.0, right=None, closed="left")]
 74 |     }
 75 | 
 76 |     @abstractmethod  # force inspect.isabstract to return True
 77 |     def __init__(
 78 |         self,
 79 |         groups: NDArray[np.floating | np.integer] | None = None,
 80 |         big_M: int = 100,
 81 |         hierarchy: list[list[int]] | None = None,
 82 |         ignore_psd_check: bool = True,
 83 |         fit_intercept: bool = False,
 84 |         copy_X: bool = True,
 85 |         warm_start: bool = False,
 86 |         solver: str | None = None,
 87 |         solver_options: dict | None = None,
 88 |     ):
 89 |         super().__init__(
 90 |             fit_intercept=fit_intercept,
 91 |             copy_X=copy_X,
 92 |             warm_start=warm_start,
 93 |             solver=solver,
 94 |             solver_options=solver_options,
 95 |         )
 96 | 
 97 |         self.hierarchy = hierarchy
 98 |         self.ignore_psd_check = ignore_psd_check
 99 |         self.groups = groups
100 |         self.big_M = big_M
101 | 
102 |     def _validate_params(self, X: NDArray, y: NDArray) -> None:
103 |         """Validate parameters."""
104 |         super()._validate_params(X, y)
105 |         _check_groups(self.groups, X.shape[1])
106 | 
107 |     def _generate_auxiliaries(
108 |         self, X: NDArray, y: NDArray, beta: cp.Variable, parameters: SimpleNamespace
109 |     ) -> SimpleNamespace | None:
110 |         """Generate the boolean slack variable."""
111 |         n_groups = X.shape[1] if self.groups is None else len(np.unique(self.groups))
112 |         return SimpleNamespace(z0=cp.Variable(n_groups, boolean=True))
113 | 
114 |     def _generate_objective(
115 |         self,
116 |         X: NDArray,
117 |         y: NDArray,
118 |         beta: cp.Variable,
119 |         parameters: SimpleNamespace | None = None,
120 |         auxiliaries: SimpleNamespace | None = None,
121 |     ) -> cp.Expression:
122 |         """Generate the quadratic form portion of objective."""
123 |         # psd_wrap will ignore cvxpy PSD checks, without it errors will
124 |         # likely be raised since correlation matrices are usually very
125 |         # poorly conditioned
126 |         XTX = psd_wrap(X.T @ X) if self.ignore_psd_check else X.T @ X
127 |         objective = cp.quad_form(beta, XTX) - 2 * y.T @ X @ beta
128 |         # objective = cp.sum_squares(X @ self.beta_ - y)
129 |         return objective
130 | 
131 |     def _generate_constraints(
132 |         self,
133 |         X: NDArray,
134 |         y: NDArray,
135 |         beta: cp.Variable,
136 |         parameters: SimpleNamespace | None = None,
137 |         auxiliaries: SimpleNamespace | None = None,
138 |     ) -> list[cp.Constraint]:
139 |         """Generate the constraints used to solve l0 regularization."""
140 |         assert auxiliaries is not None and parameters is not None
141 |         groups = np.arange(X.shape[1]) if self.groups is None else self.groups
142 |         group_masks = [groups == i for i in np.sort(np.unique(groups))]
143 |         constraints = []
144 |         for i, mask in enumerate(group_masks):
145 |             constraints += [
146 |                 -parameters.big_M * auxiliaries.z0[i] <= beta[mask],
147 |                 beta[mask] <= parameters.big_M * auxiliaries.z0[i],
148 |             ]
149 | 
150 |         if self.hierarchy is not None:
151 |             constraints += self._generate_hierarchy_constraints(groups, auxiliaries.z0)
152 | 
153 |         return constraints
154 | 
155 |     def _generate_hierarchy_constraints(
156 |         self, groups: NDArray, z0: cp.Variable
157 |     ) -> list[cp.Constraint]:
158 |         """Generate single feature hierarchy constraints."""
159 |         assert self.hierarchy is not None
160 |         group_ids = np.sort(np.unique(groups))
161 |         z0_index = {gid: i for i, gid in enumerate(group_ids)}
162 |         constraints = [
163 |             z0[z0_index[high_id]] <= z0[z0_index[sub_id]]
164 |             for high_id, sub_ids in zip(group_ids, self.hierarchy)
165 |             for sub_id in sub_ids
166 |         ]
167 |         return constraints
168 | 


--------------------------------------------------------------------------------
/src/sparselm/model/_miqp/_best_subset.py:
--------------------------------------------------------------------------------
  1 | """MIQP based solvers for Best Subset Selection solutions.
  2 | 
  3 | Allows hierarchy constraints similar to mixed L0 solvers.
  4 | """
  5 | 
  6 | from __future__ import annotations
  7 | 
  8 | __author__ = "Luis Barroso-Luque"
  9 | 
 10 | from numbers import Real
 11 | from types import SimpleNamespace
 12 | from typing import Any
 13 | 
 14 | import cvxpy as cp
 15 | import numpy as np
 16 | from numpy.typing import NDArray
 17 | from sklearn.utils._param_validation import Interval
 18 | 
 19 | from sparselm.model._base import TikhonovMixin
 20 | 
 21 | from ._base import MIQPl0
 22 | 
 23 | 
 24 | class BestSubsetSelection(MIQPl0):
 25 |     r"""MIQP Best Subset Selection Regressor.
 26 | 
 27 |     Generalized best subset that allows grouping subsets.
 28 | 
 29 |     Args:
 30 |         groups (NDArray):
 31 |             array-like of integers specifying groups. Length should be the
 32 |             same as model, where each integer entry specifies the group
 33 |             each parameter corresponds to. If no grouping is required,
 34 |             simply pass a list of all different numbers, i.e. using range.
 35 |         sparse_bound (int):
 36 |             Upper bound on sparsity. The upper bound on total number of
 37 |             nonzero coefficients.
 38 |         big_M (float):
 39 |             Upper bound on the norm of coefficients associated with each
 40 |             groups of coefficients :math:`||\beta_c||_2`.
 41 |         hierarchy (list):
 42 |             A list of lists of integers storing hierarchy relations between
 43 |             coefficients.
 44 |             Each sublist contains indices of other coefficients
 45 |             on which the coefficient associated with each element of
 46 |             the list depends. i.e. hierarchy = [[1, 2], [0], []] mean that
 47 |             coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no
 48 |             dependence.
 49 |         ignore_psd_check (bool):
 50 |             Whether to ignore cvxpy's PSD checks of matrix used in
 51 |             quadratic form. Default is True to avoid raising errors for
 52 |             poorly conditioned matrices. But if you want to be strict set
 53 |             to False.
 54 |         fit_intercept (bool):
 55 |             Whether the intercept should be estimated or not.
 56 |             If False, the data is assumed to be already centered.
 57 |         copy_X (bool):
 58 |             If True, X will be copied; else, it may be overwritten.
 59 |         warm_start (bool):
 60 |             When set to True, reuse the solution of the previous call to
 61 |             fit as initialization, otherwise, just erase the previous
 62 |             solution.
 63 |         solver (str):
 64 |             cvxpy backend solver to use. Supported solvers are listed here:
 65 |             https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options
 66 |         solver_options (dict):
 67 |             dictionary of keyword arguments passed to cvxpy solve.
 68 |             See docs in CVXRegressor for more information.
 69 | 
 70 |     Attributes:
 71 |         coef_ (NDArray):
 72 |             Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,).
 73 |         intercept_ (float):
 74 |             Independent term in decision function.
 75 |         canonicals_ (SimpleNamespace):
 76 |             Namespace that contains underlying cvxpy objects used to define
 77 |             the optimization problem. The objects included are the following:
 78 |             - objective - the objective function.
 79 |             - beta - variable to be optimized (corresponds to the estimated coef_ attribute).
 80 |             - parameters - hyper-parameters
 81 |             - auxiliaries - auxiliary variables and expressions
 82 |             - constraints - solution constraints
 83 | 
 84 |     Note:
 85 |         Installation of Gurobi is not a must, but highly recommended. An open source alternative
 86 |         is SCIP. ECOS_BB also works but can be very slow, and has recurring correctness issues.
 87 |         See the Mixed-integer programs section of the cvxpy docs:
 88 |         https://www.cvxpy.org/tutorial/advanced/index.html
 89 | 
 90 |     Warning:
 91 |         Even with gurobi solver, this can take a very long time to converge for large problems and under-determined
 92 |         problems.
 93 |     """
 94 | 
 95 |     _cvx_parameter_constraints: dict[str, list[Any]] = {
 96 |         "sparse_bound": [Interval(type=Real, left=0, right=None, closed="left")],
 97 |         **MIQPl0._cvx_parameter_constraints,
 98 |     }
 99 | 
100 |     def __init__(
101 |         self,
102 |         groups: NDArray[np.floating | np.integer] | None = None,
103 |         sparse_bound=100,
104 |         big_M: int = 100,
105 |         hierarchy: list[list[int]] | None = None,
106 |         ignore_psd_check: bool = True,
107 |         fit_intercept: bool = False,
108 |         copy_X: bool = True,
109 |         warm_start: bool = False,
110 |         solver: str | None = None,
111 |         solver_options: dict | None = None,
112 |     ):
113 |         super().__init__(
114 |             groups=groups,
115 |             big_M=big_M,
116 |             hierarchy=hierarchy,
117 |             ignore_psd_check=ignore_psd_check,
118 |             fit_intercept=fit_intercept,
119 |             copy_X=copy_X,
120 |             warm_start=warm_start,
121 |             solver=solver,
122 |             solver_options=solver_options,
123 |         )
124 |         self.sparse_bound = sparse_bound
125 | 
126 |     def _generate_constraints(
127 |         self,
128 |         X: NDArray,
129 |         y: NDArray,
130 |         beta: cp.Variable,
131 |         parameters: SimpleNamespace | None = None,
132 |         auxiliaries: SimpleNamespace | None = None,
133 |     ) -> list[cp.Constraint]:
134 |         """Generate the constraints for best subset selection."""
135 |         assert parameters is not None
136 |         assert auxiliaries is not None
137 |         constraints = super()._generate_constraints(X, y, beta, parameters, auxiliaries)
138 |         constraints += [cp.sum(auxiliaries.z0) <= parameters.sparse_bound]
139 |         return constraints
140 | 
141 | 
142 | class RidgedBestSubsetSelection(TikhonovMixin, BestSubsetSelection):
143 |     r"""MIQP best subset selection Regressor with Ridge/Tihkonov regularization.
144 | 
145 |     Args:
146 |         groups (NDArray):
147 |             array-like of integers specifying groups. Length should be the
148 |             same as model, where each integer entry specifies the group
149 |             each parameter corresponds to. If no grouping is required,
150 |             simply pass a list of all different numbers, i.e. using range.
151 |         sparse_bound (int):
152 |             Upper bound on sparsity. The upper bound on total number of
153 |             nonzero coefficients.
154 |         eta (float):
155 |             L2 regularization hyper-parameter.
156 |         big_M (float):
157 |             Upper bound on the norm of coefficients associated with each
158 |             groups of coefficients :math:`||\beta_c||_2`.
159 |         hierarchy (list):
160 |             A list of lists of integers storing hierarchy relations between
161 |             coefficients.
162 |             Each sublist contains indices of other coefficients
163 |             on which the coefficient associated with each element of
164 |             the list depends. i.e. hierarchy = [[1, 2], [0], []] mean that
165 |             coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no
166 |             dependence.
167 |         tikhonov_w (np.array):
168 |             Matrix to add weights to L2 regularization.
169 |         ignore_psd_check (bool):
170 |             Whether to ignore cvxpy's PSD checks of matrix used in
171 |             quadratic form. Default is True to avoid raising errors for
172 |             poorly conditioned matrices. But if you want to be strict set
173 |             to False.
174 |         fit_intercept (bool):
175 |             Whether the intercept should be estimated or not.
176 |             If False, the data is assumed to be already centered.
177 |         copy_X (bool):
178 |             If True, X will be copied; else, it may be overwritten.
179 |         warm_start (bool):
180 |             When set to True, reuse the solution of the previous call to
181 |             fit as initialization, otherwise, just erase the previous
182 |             solution.
183 |         solver (str):
184 |             cvxpy backend solver to use. Supported solvers are listed here:
185 |             https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options
186 |         solver_options (dict):
187 |             dictionary of keyword arguments passed to cvxpy solve.
188 |             See docs in CVXRegressor for more information.
189 | 
190 |     Attributes:
191 |         coef_ (NDArray):
192 |             Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,).
193 |         intercept_ (float):
194 |             Independent term in decision function.
195 |         canonicals_ (SimpleNamespace):
196 |             Namespace that contains underlying cvxpy objects used to define
197 |             the optimization problem. The objects included are the following:
198 |                 - objective - the objective function.
199 |                 - beta - variable to be optimized (corresponds to the estimated coef_ attribute).
200 |                 - parameters - hyper-parameters
201 |                 - auxiliaries - auxiliary variables and expressions
202 |                 - constraints - solution constraints
203 | 
204 |     Note:
205 |         Installation of Gurobi is not a must, but highly recommended. An open source alternative
206 |         is SCIP. ECOS_BB also works but can be very slow, and has recurring correctness issues.
207 |         See the Mixed-integer programs section of the cvxpy docs:
208 |         https://www.cvxpy.org/tutorial/advanced/index.html
209 | 
210 |     Warning:
211 |         Even with gurobi solver, this can take a very long time to converge for large problems and under-determined
212 |         problems.
213 |     """
214 | 
215 |     _cvx_parameter_constraints: dict[str, list[Any]] = {
216 |         "eta": [Interval(type=Real, left=0.0, right=None, closed="left")],
217 |         **BestSubsetSelection._cvx_parameter_constraints,
218 |     }
219 | 
220 |     def __init__(
221 |         self,
222 |         groups: NDArray[np.floating | np.integer] | None = None,
223 |         sparse_bound: int = 100,
224 |         eta: float = 1.0,
225 |         big_M: int = 100,
226 |         hierarchy: list[list[int]] | None = None,
227 |         tikhonov_w: NDArray[np.floating] | None = None,
228 |         ignore_psd_check: bool = True,
229 |         fit_intercept: bool = False,
230 |         copy_X: bool = True,
231 |         warm_start: bool = False,
232 |         solver: str | None = None,
233 |         solver_options: dict | None = None,
234 |     ):
235 |         super().__init__(
236 |             groups=groups,
237 |             sparse_bound=sparse_bound,
238 |             big_M=big_M,
239 |             hierarchy=hierarchy,
240 |             ignore_psd_check=ignore_psd_check,
241 |             fit_intercept=fit_intercept,
242 |             copy_X=copy_X,
243 |             warm_start=warm_start,
244 |             solver=solver,
245 |             solver_options=solver_options,
246 |         )
247 |         self.tikhonov_w = tikhonov_w
248 |         self.eta = eta
249 | 


--------------------------------------------------------------------------------
/src/sparselm/model/_miqp/_regularized_l0.py:
--------------------------------------------------------------------------------
  1 | """MIQP based solvers for sparse solutions with hierarchical constraints.
  2 | 
  3 | Generalized regularized l0 solvers that allow grouping parameters as detailed in:
  4 | 
  5 |     https://doi.org/10.1287/opre.2015.1436
  6 | 
  7 | L1L0 proposed by Wenxuan Huang:
  8 | 
  9 |     https://arxiv.org/abs/1807.10753
 10 | 
 11 | L2L0 proposed by Peichen Zhong:
 12 | 
 13 |     https://journals.aps.org/prb/abstract/10.1103/PhysRevB.106.024203
 14 | 
 15 | Regressors allow optional inclusion of hierarchical constraints at the single coefficient
 16 | or group of coefficients level.
 17 | """
 18 | 
 19 | from __future__ import annotations
 20 | 
 21 | __author__ = "Luis Barroso-Luque, Fengyu Xie"
 22 | 
 23 | 
 24 | from abc import ABCMeta, abstractmethod
 25 | from numbers import Real
 26 | from types import SimpleNamespace
 27 | from typing import Any
 28 | 
 29 | import cvxpy as cp
 30 | import numpy as np
 31 | from numpy.typing import NDArray
 32 | from sklearn.utils._param_validation import Interval
 33 | 
 34 | from sparselm.model._base import TikhonovMixin
 35 | 
 36 | from ._base import MIQPl0
 37 | 
 38 | 
 39 | class RegularizedL0(MIQPl0):
 40 |     r"""Implementation of mixed-integer quadratic programming l0 regularized Regressor.
 41 | 
 42 |     Supports grouping parameters and group-level hierarchy, but requires groups as a
 43 |     compulsory argument.
 44 | 
 45 |     Regularized regression objective:
 46 | 
 47 |     .. math::
 48 | 
 49 |         \min_{\beta} || X \beta - y ||^2_2 + \alpha \sum_{G} z_G
 50 | 
 51 |     Where G represents groups of features/coefficients and :math:`z_G` is are boolean
 52 |     valued slack variables.
 53 | 
 54 |     Args:
 55 |         groups (NDArray):
 56 |             1D array-like of integers specifying groups. Length should be the
 57 |             same as model, where each integer entry specifies the group
 58 |             each parameter corresponds to. If no grouping is needed pass a list
 59 |             of all distinct numbers (ie range(len(coefs)) to create singleton groups
 60 |             for each parameter.
 61 |         alpha (float):
 62 |             L0 pseudo-norm regularization hyper-parameter.
 63 |         big_M (float):
 64 |             Upper bound on the norm of coefficients associated with each
 65 |             groups of coefficients :math:`||\beta_c||_2`.
 66 |         hierarchy (list):
 67 |             A list of lists of integers storing hierarchy relations between
 68 |             groups.
 69 |             Each sublist contains indices of other groups
 70 |             on which the group associated with each element of
 71 |             the list depends. i.e. hierarchy = [[1, 2], [0], []] mean that
 72 |             group 0 depends on 1, and 2; 1 depends on 0, and 2 has no
 73 |             dependence.
 74 |         ignore_psd_check (bool):
 75 |             Whether to ignore cvxpy's PSD checks  of matrix used in quadratic
 76 |             form. Default is True to avoid raising errors for poorly
 77 |             conditioned matrices. But if you want to be strict set to False.
 78 |         fit_intercept (bool):
 79 |             Whether the intercept should be estimated or not.
 80 |             If False, the data is assumed to be already centered.
 81 |         copy_X (bool):
 82 |             If True, X will be copied; else, it may be overwritten.
 83 |         warm_start (bool):
 84 |             When set to True, reuse the solution of the previous call to
 85 |             fit as initialization, otherwise, just erase the previous
 86 |             solution.
 87 |         solver (str):
 88 |             cvxpy backend solver to use. Supported solvers are listed here:
 89 |             https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options
 90 |         solver_options (dict):
 91 |             dictionary of keyword arguments passed to cvxpy solve.
 92 |             See docs in CVXRegressor for more information.
 93 | 
 94 |     Attributes:
 95 |         coef_ (NDArray):
 96 |             Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,).
 97 |         intercept_ (float):
 98 |             Independent term in decision function.
 99 |         canonicals_ (SimpleNamespace):
100 |             Namespace that contains underlying cvxpy objects used to define
101 |             the optimization problem. The objects included are the following:
102 |                 - objective - the objective function.
103 |                 - beta - variable to be optimized (corresponds to the estimated coef_ attribute).
104 |                 - parameters - hyper-parameters
105 |                 - auxiliaries - auxiliary variables and expressions
106 |                 - constraints - solution constraints
107 | 
108 |     Note:
109 |         Installation of Gurobi is not a must, but highly recommended. An open source alternative
110 |         is SCIP. ECOS_BB also works but can be very slow, and has recurring correctness issues.
111 |         See the Mixed-integer programs section of the cvxpy docs:
112 |         https://www.cvxpy.org/tutorial/advanced/index.html
113 |     """
114 | 
115 |     _cvx_parameter_constraints: dict[str, list[Any]] = {
116 |         "alpha": [Interval(type=Real, left=0.0, right=None, closed="left")],
117 |         **MIQPl0._cvx_parameter_constraints,
118 |     }
119 | 
120 |     def __init__(
121 |         self,
122 |         groups: NDArray[np.floating | np.integer] | None = None,
123 |         alpha: float = 1.0,
124 |         big_M: int = 100,
125 |         hierarchy: list[list[int]] | None = None,
126 |         ignore_psd_check: bool = True,
127 |         fit_intercept: bool = False,
128 |         copy_X: bool = True,
129 |         warm_start: bool = False,
130 |         solver: str | None = None,
131 |         solver_options: dict | None = None,
132 |     ):
133 |         super().__init__(
134 |             groups=groups,
135 |             big_M=big_M,
136 |             hierarchy=hierarchy,
137 |             ignore_psd_check=ignore_psd_check,
138 |             fit_intercept=fit_intercept,
139 |             copy_X=copy_X,
140 |             warm_start=warm_start,
141 |             solver=solver,
142 |             solver_options=solver_options,
143 |         )
144 |         self.alpha = alpha
145 | 
146 |     def _generate_objective(
147 |         self,
148 |         X: NDArray,
149 |         y: NDArray,
150 |         beta: cp.Variable,
151 |         parameters: SimpleNamespace | None = None,
152 |         auxiliaries: SimpleNamespace | None = None,
153 |     ) -> cp.Expression:
154 |         """Generate the quadratic form and l0 regularization portion of objective."""
155 |         assert parameters is not None
156 |         assert auxiliaries is not None
157 |         c0 = 2 * X.shape[0]  # keeps hyperparameter scale independent
158 |         objective = super()._generate_objective(
159 |             X, y, beta, parameters, auxiliaries
160 |         ) + c0 * parameters.alpha * cp.sum(auxiliaries.z0)
161 |         return objective
162 | 
163 | 
164 | class MixedL0(RegularizedL0, metaclass=ABCMeta):
165 |     """Abstract base class for mixed L0 regularization models: L1L0 and L2L0."""
166 | 
167 |     _cvx_parameter_constraints: dict[str, list[Any]] = {
168 |         "eta": [Interval(type=Real, left=0.0, right=None, closed="left")],
169 |         **RegularizedL0._cvx_parameter_constraints,
170 |     }
171 | 
172 |     def __init__(
173 |         self,
174 |         groups: NDArray[np.floating | np.integer] | None = None,
175 |         alpha: float = 1.0,
176 |         eta: float = 1.0,
177 |         big_M: int = 100,
178 |         hierarchy: list[list[int]] | None = None,
179 |         ignore_psd_check: bool = True,
180 |         fit_intercept: bool = False,
181 |         copy_X: bool = True,
182 |         warm_start: bool = False,
183 |         solver: str | None = None,
184 |         solver_options: dict | None = None,
185 |     ):
186 |         """Initialize Regressor.
187 | 
188 |         Args:
189 |             groups (NDArray):
190 |                 1D array-like of integers specifying groups. Length should be the
191 |                 same as model, where each integer entry specifies the group
192 |                 each parameter corresponds to. If no grouping is needed pass a list
193 |                 of all distinct numbers (ie range(len(coefs)) to create singleton groups
194 |                 for each parameter.
195 |             alpha (float):
196 |                 L0 pseudo-norm regularization hyper-parameter.
197 |             eta (float):
198 |                 standard norm regularization hyper-parameter (usually l1 or l2).
199 |             big_M (float):
200 |                 Upper bound on the norm of coefficients associated with each
201 | 
202 |             hierarchy (list):
203 |                 A list of lists of integers storing hierarchy relations between
204 |                 coefficients.
205 |                 Each sublist contains indices of other coefficients
206 |                 on which the coefficient associated with each element of
207 |                 the list depends. i.e. hierarchy = [[1, 2], [0], []] mean that
208 |                 coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no
209 |                 dependence.
210 |             ignore_psd_check (bool):
211 |                 Whether to ignore cvxpy's PSD checks  of matrix used in quadratic
212 |                 form. Default is True to avoid raising errors for poorly
213 |                 conditioned matrices. But if you want to be strict set to False.
214 |             fit_intercept (bool):
215 |                 Whether the intercept should be estimated or not.
216 |                 If False, the data is assumed to be already centered.
217 |             copy_X (bool):
218 |                 If True, X will be copied; else, it may be overwritten.
219 |             warm_start (bool):
220 |                 When set to True, reuse the solution of the previous call to
221 |                 fit as initialization, otherwise, just erase the previous
222 |                 solution.
223 |             solver (str):
224 |                 cvxpy backend solver to use. Supported solvers are listed here:
225 |                 https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options
226 |             solver_options (dict):
227 |                 dictionary of keyword arguments passed to cvxpy solve.
228 |                 See docs in CVXRegressor for more information.
229 |         """
230 |         super().__init__(
231 |             groups=groups,
232 |             alpha=alpha,
233 |             big_M=big_M,
234 |             hierarchy=hierarchy,
235 |             ignore_psd_check=ignore_psd_check,
236 |             fit_intercept=fit_intercept,
237 |             copy_X=copy_X,
238 |             warm_start=warm_start,
239 |             solver=solver,
240 |             solver_options=solver_options,
241 |         )
242 |         self.eta = eta
243 | 
244 |     @abstractmethod
245 |     def _generate_objective(
246 |         self,
247 |         X: NDArray,
248 |         y: NDArray,
249 |         beta: cp.Variable,
250 |         parameters: SimpleNamespace | None = None,
251 |         auxiliaries: SimpleNamespace | None = None,
252 |     ) -> cp.Expression:
253 |         """Generate optimization objective."""
254 |         # implement in derived classes using super to call MIQP_L0 objective
255 |         return super()._generate_objective(X, y, beta, parameters, auxiliaries)
256 | 
257 | 
258 | class L1L0(MixedL0):
259 |     r"""L1L0 regularized Regressor.
260 | 
261 |     Regressor with L1L0 regularization solved with mixed integer programming
262 |     as discussed in:
263 | 
264 |     https://arxiv.org/abs/1807.10753
265 | 
266 |     Extended to allow grouping of coefficients and group-level hierarchy as described
267 |     in:
268 | 
269 |     https://doi.org/10.1287/opre.2015.1436
270 | 
271 |     Regularized regression objective:
272 | 
273 |     .. math::
274 | 
275 |         \min_{\beta} || X \beta - y ||^2_2 + \alpha \sum_{G} z_G + \eta ||\beta||_1
276 | 
277 |     Where G represents groups of features/coefficients and :math:`z_G` is are boolean
278 |     valued slack variables.
279 | 
280 |     Args:
281 |         groups (NDArray):
282 |             1D array-like of integers specifying groups. Length should be the
283 |             same as model, where each integer entry specifies the group
284 |             each parameter corresponds to. If no grouping is needed pass a list
285 |             of all distinct numbers (ie range(len(coefs)) to create singleton groups
286 |             for each parameter.
287 |         alpha (float):
288 |             L0 pseudo-norm regularization hyper-parameter.
289 |         eta (float):
290 |             L1 regularization hyper-parameter.
291 |         big_M (float):
292 |             Upper bound on the norm of coefficients associated with each
293 |             groups of coefficients :math:`||\beta_c||_2`.
294 |         hierarchy (list):
295 |             A list of lists of integers storing hierarchy relations between
296 |             coefficients.
297 |             Each sublist contains indices of other coefficients
298 |             on which the coefficient associated with each element of
299 |             the list depends. i.e. hierarchy = [[1, 2], [0], []] mean that
300 |             coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no
301 |             dependence.
302 |         ignore_psd_check (bool):
303 |             Whether to ignore cvxpy's PSD checks of matrix used in quadratic
304 |             form. Default is True to avoid raising errors for poorly
305 |             conditioned matrices. But if you want to be strict set to False.
306 |         fit_intercept (bool):
307 |             Whether the intercept should be estimated or not.
308 |             If False, the data is assumed to be already centered.
309 |         copy_X (bool):
310 |             If True, X will be copied; else, it may be overwritten.
311 |         warm_start (bool):
312 |             When set to True, reuse the solution of the previous call to
313 |             fit as initialization, otherwise, just erase the previous
314 |             solution.
315 |         solver (str):
316 |             cvxpy backend solver to use. Supported solvers are listed here:
317 |             https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options
318 |         solver_options (dict):
319 |             dictionary of keyword arguments passed to cvxpy solve.
320 |             See docs in CVXRegressor for more information.
321 | 
322 |     Attributes:
323 |         coef_ (NDArray):
324 |             Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,).
325 |         intercept_ (float):
326 |             Independent term in decision function.
327 |         canonicals_ (SimpleNamespace):
328 |             Namespace that contains underlying cvxpy objects used to define
329 |             the optimization problem. The objects included are the following:
330 |                 - objective - the objective function.
331 |                 - beta - variable to be optimized (corresponds to the estimated coef_ attribute).
332 |                 - parameters - hyper-parameters
333 |                 - auxiliaries - auxiliary variables and expressions
334 |                 - constraints - solution constraints
335 | 
336 |     Note:
337 |         Installation of Gurobi is not a must, but highly recommended. An open source alternative
338 |         is SCIP. ECOS_BB also works but can be very slow, and has recurring correctness issues.
339 |         See the Mixed-integer programs section of the cvxpy docs:
340 |         https://www.cvxpy.org/tutorial/advanced/index.html
341 |     """
342 | 
343 |     def __init__(
344 |         self,
345 |         groups: NDArray[np.floating | np.integer] | None = None,
346 |         alpha: float = 1.0,
347 |         eta: float = 1.0,
348 |         big_M: int = 100,
349 |         hierarchy: list[list[int]] | None = None,
350 |         ignore_psd_check: bool = True,
351 |         fit_intercept: bool = False,
352 |         copy_X: bool = True,
353 |         warm_start: bool = False,
354 |         solver: str | None = None,
355 |         solver_options: dict | None = None,
356 |     ):
357 |         super().__init__(
358 |             groups=groups,
359 |             eta=eta,
360 |             alpha=alpha,
361 |             big_M=big_M,
362 |             hierarchy=hierarchy,
363 |             ignore_psd_check=ignore_psd_check,
364 |             fit_intercept=fit_intercept,
365 |             copy_X=copy_X,
366 |             warm_start=warm_start,
367 |             solver=solver,
368 |             solver_options=solver_options,
369 |         )
370 | 
371 |     def _generate_auxiliaries(
372 |         self, X: NDArray, y: NDArray, beta: cp.Variable, parameters: SimpleNamespace
373 |     ) -> SimpleNamespace | None:
374 |         """Generate the boolean slack variable."""
375 |         auxiliaries = super()._generate_auxiliaries(X, y, beta, parameters)
376 |         X.shape[1] if self.groups is None else len(np.unique(self.groups))
377 |         auxiliaries.z1 = cp.Variable(X.shape[1])  # type: ignore
378 |         return auxiliaries
379 | 
380 |     def _generate_constraints(
381 |         self,
382 |         X: NDArray,
383 |         y: NDArray,
384 |         beta: cp.Variable,
385 |         parameters: SimpleNamespace | None = None,
386 |         auxiliaries: SimpleNamespace | None = None,
387 |     ) -> list[cp.Constraint]:
388 |         """Generate the constraints used to solve l1l0 regularization."""
389 |         assert auxiliaries is not None
390 |         constraints = super()._generate_constraints(X, y, beta, parameters, auxiliaries)
391 |         # L1 constraints (why not do an l1 norm in the objective instead?)
392 |         constraints += [-auxiliaries.z1 <= beta, beta <= auxiliaries.z1]
393 |         return constraints
394 | 
395 |     def _generate_objective(
396 |         self,
397 |         X: NDArray,
398 |         y: NDArray,
399 |         beta: cp.Variable,
400 |         parameters: SimpleNamespace | None = None,
401 |         auxiliaries: SimpleNamespace | None = None,
402 |     ) -> cp.Expression:
403 |         """Generate the objective function used in l1l0 regression model."""
404 |         assert parameters is not None
405 |         assert auxiliaries is not None
406 |         c0 = 2 * X.shape[0]  # keeps hyperparameter scale independent
407 |         objective = super()._generate_objective(X, y, beta, parameters, auxiliaries)
408 |         # L1 term
409 |         objective += c0 * parameters.eta * cp.sum(auxiliaries.z1)
410 |         return objective
411 | 
412 | 
413 | class L2L0(TikhonovMixin, MixedL0):
414 |     r"""L2L0 regularized Regressor.
415 | 
416 |     Based on Regressor with L2L0 regularization solved with mixed integer programming
417 |     proposed in:
418 | 
419 |     https://arxiv.org/abs/2204.13789
420 | 
421 |     Extended to allow grouping of coefficients and group-level hierarchy as described
422 |     in:
423 | 
424 |     https://doi.org/10.1287/opre.2015.1436
425 | 
426 |     And allows using a Tihkonov matrix in the l2 term.
427 | 
428 |     Regularized regression objective:
429 | 
430 |     .. math::
431 | 
432 |         \min_{\beta} || X \beta - y ||^2_2 + \alpha \sum_{G} z_G + \eta ||W\beta||^2_2
433 | 
434 |     Where G represents groups of features/coefficients and :math:`z_G` is are boolean
435 |     valued slack variables. W is a Tikhonov matrix.
436 | 
437 |     Args:
438 |         groups (NDArray):
439 |             1D array-like of integers specifying groups. Length should be the
440 |             same as model, where each integer entry specifies the group
441 |             each parameter corresponds to. If no grouping is needed pass a list
442 |             of all distinct numbers (ie range(len(coefs)) to create singleton groups
443 |             for each parameter.
444 |         alpha (float):
445 |             L0 pseudo-norm regularization hyper-parameter.
446 |         eta (float):
447 |             L2 regularization hyper-parameter.
448 |         big_M (float):
449 |             Upper bound on the norm of coefficients associated with each
450 |             groups of coefficients :math:`||\beta_c||_2`.
451 |         hierarchy (list):
452 |             A list of lists of integers storing hierarchy relations between
453 |             coefficients.
454 |             Each sublist contains indices of other coefficients
455 |             on which the coefficient associated with each element of
456 |             the list depends. i.e. hierarchy = [[1, 2], [0], []] mean that
457 |             coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no
458 |             dependence.
459 |         tikhonov_w (np.array):
460 |             Matrix to add weights to L2 regularization.
461 |         ignore_psd_check (bool):
462 |             Wether to ignore cvxpy's PSD checks of matrix used in quadratic
463 |             form. Default is True to avoid raising errors for poorly
464 |             conditioned matrices. But if you want to be strict set to False.
465 |         fit_intercept (bool):
466 |             Whether the intercept should be estimated or not.
467 |             If False, the data is assumed to be already centered.
468 |         copy_X (bool):
469 |             If True, X will be copied; else, it may be overwritten.
470 |         warm_start (bool):
471 |             When set to True, reuse the solution of the previous call to
472 |             fit as initialization, otherwise, just erase the previous
473 |             solution.
474 |         solver (str):
475 |             cvxpy backend solver to use. Supported solvers are listed here:
476 |             https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options
477 |         solver_options (dict):
478 |             dictionary of keyword arguments passed to cvxpy solve.
479 |             See docs in CVXEstimator for more information.
480 | 
481 |     Attributes:
482 |         coef_ (NDArray):
483 |             Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,).
484 |         intercept_ (float):
485 |             Independent term in decision function.
486 |         canonicals_ (SimpleNamespace):
487 |             Namespace that contains underlying cvxpy objects used to define
488 |             the optimization problem. The objects included are the following:
489 |                 - objective - the objective function.
490 |                 - beta - variable to be optimized (corresponds to the estimated coef_ attribute).
491 |                 - parameters - hyper-parameters
492 |                 - auxiliaries - auxiliary variables and expressions
493 |                 - constraints - solution constraints
494 | 
495 |     Note:
496 |         Installation of Gurobi is not a must, but highly recommended. An open source alternative
497 |         is SCIP. ECOS_BB also works but can be very slow, and has recurring correctness issues.
498 |         See the Mixed-integer programs section of the cvxpy docs:
499 |         https://www.cvxpy.org/tutorial/advanced/index.html
500 |     """
501 | 
502 |     def __init__(
503 |         self,
504 |         groups: NDArray[np.floating | np.integer] | None = None,
505 |         alpha: float = 1.0,
506 |         eta: float = 1.0,
507 |         big_M: int = 100,
508 |         hierarchy: list[list[int]] | None = None,
509 |         tikhonov_w: NDArray[np.floating] | None = None,
510 |         ignore_psd_check: bool = True,
511 |         fit_intercept: bool = False,
512 |         copy_X: bool = True,
513 |         warm_start: bool = False,
514 |         solver: str | None = None,
515 |         solver_options: dict | None = None,
516 |     ):
517 |         super().__init__(
518 |             groups=groups,
519 |             alpha=alpha,
520 |             eta=eta,
521 |             big_M=big_M,
522 |             hierarchy=hierarchy,
523 |             ignore_psd_check=ignore_psd_check,
524 |             fit_intercept=fit_intercept,
525 |             copy_X=copy_X,
526 |             warm_start=warm_start,
527 |             solver=solver,
528 |             solver_options=solver_options,
529 |         )
530 |         self.tikhonov_w = tikhonov_w
531 | 


--------------------------------------------------------------------------------
/src/sparselm/model/_ols.py:
--------------------------------------------------------------------------------
 1 | """Ordinary least squares cvxpy solver."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | __author__ = "Fengyu Xie, Luis Barroso-Luque"
 6 | 
 7 | 
 8 | from types import SimpleNamespace
 9 | 
10 | import cvxpy as cp
11 | from numpy.typing import NDArray
12 | 
13 | from ._base import CVXRegressor
14 | 
15 | 
16 | class OrdinaryLeastSquares(CVXRegressor):
17 |     r"""Ordinary Least Squares Linear Regression.
18 | 
19 |     Regression objective:
20 | 
21 |     .. math::
22 | 
23 |         \min_{\beta} || X \beta - y ||^2_2
24 | 
25 |     Args:
26 |         fit_intercept (bool):
27 |             Whether the intercept should be estimated or not.
28 |             If False, the data is assumed to be already centered.
29 |         copy_X (bool):
30 |             If True, X will be copied; else, it may be overwritten.
31 |         warm_start (bool):
32 |             When set to True, reuse the solution of the previous call to
33 |             fit as initialization, otherwise, just erase the previous
34 |             solution.
35 |         solver (str):
36 |             cvxpy backend solver to use. Supported solvers are listed here:
37 |             https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options
38 |         solver_options (dict):
39 |             dictionary of keyword arguments passed to cvxpy solve.
40 |             See docs linked above for more information.
41 | 
42 |     Attributes:
43 |         coef_ (NDArray):
44 |             Parameter vector (:math:`\beta` in the cost function formula) of shape (n_features,).
45 |         intercept_ (float):
46 |             Independent term in decision function.
47 |         canonicals_ (SimpleNamespace):
48 |             Namespace that contains underlying cvxpy objects used to define
49 |             the optimization problem. The objects included are the following:
50 |                 - objective - the objective function.
51 |                 - beta - variable to be optimized (corresponds to the estimated coef_ attribute).
52 |                 - parameters - hyper-parameters
53 |                 - auxiliaries - auxiliary variables and expressions
54 |                 - constraints - solution constraints
55 |     """
56 | 
57 |     def _generate_objective(
58 |         self,
59 |         X: NDArray,
60 |         y: NDArray,
61 |         beta: cp.Variable,
62 |         parameters: SimpleNamespace | None = None,
63 |         auxiliaries: SimpleNamespace | None = None,
64 |     ) -> cp.Expression:
65 |         return 1 / (2 * X.shape[0]) * cp.sum_squares(X @ beta - y)
66 | 


--------------------------------------------------------------------------------
/src/sparselm/stepwise.py:
--------------------------------------------------------------------------------
  1 | """Stepwise model selection for piece-wise fitting."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | __author__ = "Fengyu Xie"
  6 | 
  7 | from itertools import chain
  8 | 
  9 | import numpy as np
 10 | from numpy.typing import NDArray
 11 | from sklearn.base import RegressorMixin
 12 | from sklearn.linear_model._base import LinearModel, _check_sample_weight
 13 | from sklearn.utils._param_validation import InvalidParameterError
 14 | from sklearn.utils.metaestimators import _BaseComposition
 15 | from sklearn.utils.validation import check_is_fitted
 16 | 
 17 | 
 18 | # BaseComposition makes sure that StepwiseEstimator can be correctly cloned.
 19 | def _indices_no_overlap_and_continuous(indices):
 20 |     scope = sorted(set(chain(*indices)))
 21 |     return sorted(chain(*indices)) == scope and scope == list(range(len(scope)))
 22 | 
 23 | 
 24 | def _first_step_fit_intercept_only(steps):
 25 |     for sid, (_, estimator) in enumerate(steps):
 26 |         if hasattr(estimator, "estimator"):
 27 |             # Is a searcher such as GridSearchCV.
 28 |             fit_intercept = estimator.estimator.fit_intercept
 29 |         else:
 30 |             fit_intercept = estimator.fit_intercept
 31 |         if fit_intercept and sid > 0:
 32 |             return False
 33 |     return True
 34 | 
 35 | 
 36 | def _no_nested_stepwise(steps):
 37 |     for _, estimator in steps:
 38 |         if isinstance(estimator, StepwiseEstimator):
 39 |             return False
 40 |     return True
 41 | 
 42 | 
 43 | class StepwiseEstimator(_BaseComposition, RegressorMixin, LinearModel):
 44 |     """A composite estimator used to do stepwise fitting.
 45 | 
 46 |     The first estimator in the composite will be used to fit
 47 |     certain features (a piece of the feature matrix) to the
 48 |     target vector, and the residuals are fitted to the rest
 49 |     of features by using the next estimators in the composite.
 50 | 
 51 |     Each estimator can be either a CVXEstimator, a GridSearchCV or
 52 |     a LineSearchCV.
 53 | 
 54 |     Args:
 55 |         steps (list[(str, CVXEstimator)]):
 56 |             A list of step names and the CVXEstimators to use
 57 |             for each step. StepwiseEstimator cannot be used as
 58 |             a member of StepwiseEstimator.
 59 |             An estimator will fit the residuals of the previous
 60 |             estimator fits in the list.
 61 |         estimator_feature_indices (tuple[tuple[int]]):
 62 |             Scope of each estimator, which means the indices of
 63 |             features in the scope (features[:, scope]) will be
 64 |             fitted to the residual using the corresponding estimator.
 65 |             Notice:
 66 |                If estimators in the composite requires hierarchy
 67 |                or groups, the indices in the groups or hierarchy
 68 |                must be adjusted such that they correspond to the groups
 69 |                or hierarchy relations in the part of features sliced
 70 |                by scope.
 71 |                For example, consider original groups = [0, 1, 1, 2, 2],
 72 |                and an estimator has scope = [3, 4], then the estimator
 73 |                should be initialized with group = [0, 0].
 74 |                You are fully responsible to initialize the estimators
 75 |                with correct hierarchy, groups and other parameters before
 76 |                wrapping them up with the composite!
 77 | 
 78 |     Note:
 79 |         1. Do not use GridSearchCV or LineSearchCV to search a StepwiseEstimator!
 80 | 
 81 |         2. No nesting is allowed for StepwiseEstimator, which means no step of a
 82 |         StepwiseEstimator can be a StepwiseEstimator.
 83 | 
 84 |         3. Since stepwise estimator requires specifying a list of feature indices for
 85 |         each step estimator, it requires fixing n_features_in_ before fitting, which
 86 |         violates sklearn convention for a regressor. Therefore, StepwiseEstimator is
 87 |         not checked by sklearn check_estimator method, and there is no guarantee that it
 88 |         is fully compatible with all scikit-learn features.
 89 |     """
 90 | 
 91 |     def __init__(
 92 |         self,
 93 |         steps,
 94 |         estimator_feature_indices,
 95 |     ):
 96 |         self.steps = steps
 97 |         # The estimator_feature_indices saved must be tuple because in
 98 |         # sklearn.base.clone, a cloned object is checked by pointer, rather than
 99 |         # by value.
100 |         self.estimator_feature_indices = estimator_feature_indices
101 | 
102 |     # These parameters settings does not need to be called externally.
103 |     def get_params(self, deep=True):
104 |         """Get parameters of all estimators in the composite.
105 | 
106 |         Args:
107 |             deep(bool):
108 |                 If True, will return the parameters for estimators in
109 |                 composite, and their contained sub-objects if they are
110 |                 also estimators.
111 |         """
112 |         return self._get_params("steps", deep=deep)
113 | 
114 |     def set_params(self, **params):
115 |         """Set parameters for each estimator in the composite.
116 | 
117 |         This will be called when model selection optimizes
118 |         all hyper parameters.
119 | 
120 |         Args:
121 |             params: A Dictionary of parameters. Each parameter
122 |             name must end with an underscore and a number to specify
123 |             on which estimator in the composite the parameter is
124 |             going to be set.
125 |             Remember only to set params you wish to optimize!
126 |         """
127 |         self._set_params("steps", **params)
128 |         return self
129 | 
130 |     @staticmethod
131 |     def _get_estimator_coef(estimator):
132 |         check_is_fitted(estimator)
133 |         if hasattr(estimator, "best_estimator_"):
134 |             return estimator.best_estimator_.coef_.copy()
135 |         elif hasattr(estimator, "coef_"):
136 |             return estimator.coef_.copy()
137 |         else:
138 |             raise ValueError(f"Estimator {estimator} is not a valid linear model!")
139 | 
140 |     @staticmethod
141 |     def _get_estimator_intercept(estimator):
142 |         check_is_fitted(estimator)
143 |         if hasattr(estimator, "best_estimator_"):
144 |             return estimator.best_estimator_.intercept_
145 |         elif hasattr(estimator, "intercept_"):
146 |             return estimator.intercept_
147 |         else:
148 |             raise ValueError(f"Estimator {estimator} is not a valid linear model!")
149 | 
150 |     def fit(
151 |         self,
152 |         X: NDArray,
153 |         y: NDArray,
154 |         sample_weight: NDArray[np.floating] | None = None,
155 |         *args,
156 |         **kwargs,
157 |     ):
158 |         """Prepare fit input with sklearn help then call fit method.
159 | 
160 |         Args:
161 |             X (NDArray):
162 |                 Training data of shape (n_samples, n_features).
163 |             y (NDArray):
164 |                 Target values. Will be cast to X's dtype if necessary
165 |                 of shape (n_samples,) or (n_samples, n_targets)
166 |             sample_weight (NDArray):
167 |                 Individual weights for each sample of shape (n_samples,)
168 |                 default=None
169 |             *args:
170 |                 Positional arguments passed to _fit method
171 |             **kwargs:
172 |                 Keyword arguments passed to _fit method
173 |         Returns:
174 |             instance of self
175 |         """
176 |         # Check estimators and feature indices.
177 |         if not _indices_no_overlap_and_continuous(self.estimator_feature_indices):
178 |             raise InvalidParameterError(
179 |                 f"Given feature indices:"
180 |                 f" {self.estimator_feature_indices}"
181 |                 f" are not continuous and non-overlapping"
182 |                 f" series starting from 0!"
183 |             )
184 |         if not _first_step_fit_intercept_only(self.steps):
185 |             raise InvalidParameterError(
186 |                 "Only the first estimator in steps is allowed" " to fit intercept!"
187 |             )
188 |         if not _no_nested_stepwise(self.steps):
189 |             raise InvalidParameterError(
190 |                 "StepwiseEstimator should not be nested with"
191 |                 " another StepwiseEstimator!"
192 |             )
193 | 
194 |         self.n_features_in_ = len(list(chain(*self.estimator_feature_indices)))
195 | 
196 |         # Set ensute_2d to True and reset to False so that it triggers number of
197 |         # features checking.
198 |         X, y = self._validate_data(
199 |             X,
200 |             y,
201 |             accept_sparse=False,
202 |             ensure_2d=True,
203 |             y_numeric=True,
204 |             multi_output=True,
205 |             reset=False,
206 |         )
207 | 
208 |         if sample_weight is not None:
209 |             sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
210 | 
211 |         residuals = y.copy()
212 | 
213 |         self.coef_ = np.empty(X.shape[1])
214 |         self.coef_.fill(np.nan)
215 |         for (_, estimator), scope in zip(self.steps, self.estimator_feature_indices):
216 |             # np.array indices should not be tuple.
217 |             estimator.fit(
218 |                 X[:, list(scope)],
219 |                 residuals,
220 |                 *args,
221 |                 sample_weight=sample_weight,
222 |                 **kwargs,
223 |             )
224 |             self.coef_[list(scope)] = self._get_estimator_coef(estimator)
225 |             residuals = residuals - estimator.predict(X[:, list(scope)])
226 |             # Only the first estimator is allowed to fit intercept.
227 |         if hasattr(self.steps[0][1], "estimator"):
228 |             fit_intercept = self.steps[0][1].estimator.fit_intercept
229 |         else:
230 |             fit_intercept = self.steps[0][1].fit_intercept
231 |         if fit_intercept:
232 |             self.intercept_ = self._get_estimator_intercept(self.steps[0][1])
233 |         else:
234 |             self.intercept_ = 0.0
235 | 
236 |         # return self for chaining fit and predict calls
237 |         return self
238 | 


--------------------------------------------------------------------------------
/src/sparselm/tools.py:
--------------------------------------------------------------------------------
  1 | """A variety of tools for fitting linear regression models to polish CE."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | __author__ = "Luis Barroso-Luque"
  6 | 
  7 | import warnings
  8 | from functools import wraps
  9 | 
 10 | import numpy as np
 11 | from numpy.typing import NDArray
 12 | 
 13 | 
 14 | def constrain_coefficients(
 15 |     indices: NDArray,
 16 |     high: NDArray[np.floating] | float | None = None,
 17 |     low: NDArray[np.floating] | float | None = None,
 18 | ):
 19 |     """Constrain a fit method to keep coefficients within a specified range.
 20 | 
 21 |     Use this as a standard decorator with parameters:
 22 |     - At runtime:
 23 |         coefs = constrain_coefficients(indices, high, low)(fit_method)(X, y)
 24 |     - In fit_method definitions:
 25 |         @constrain_coefficients(indices, high, low)
 26 |         def your_fit_method(X, y):
 27 | 
 28 |     Args:
 29 |         indices (array or list):
 30 |             indices of coefficients to constrain
 31 |         high (float or array):
 32 |             upper bound for indices,
 33 |         low (float or array):
 34 |             lower bounds for indices
 35 |     """
 36 |     indices = np.array(indices)
 37 |     if high is not None:
 38 |         high = (
 39 |             high * np.ones(len(indices))
 40 |             if isinstance(high, (int, float))
 41 |             else np.array(high)
 42 |         )
 43 |     else:
 44 |         high = np.inf * np.ones(len(indices))
 45 |     if low is not None:
 46 |         low = (
 47 |             low * np.ones(len(indices))
 48 |             if isinstance(low, (int, float))
 49 |             else np.array(low)
 50 |         )
 51 |     else:
 52 |         low = -np.inf * np.ones(len(indices))
 53 | 
 54 |     def decorate_fit_method(fit_method):
 55 |         """Decorate a fit method to constrain "dielectric constant".
 56 | 
 57 |         Args:
 58 |             fit_method (callable):
 59 |                 the fit_method you will use to fit your regression model.
 60 |                 Must take the feature matrix X and target vector y as first
 61 |                 arguments. (i.e. fit_method(X, y, *args, **kwargs)
 62 |         """
 63 | 
 64 |         @wraps(fit_method)
 65 |         def wrapped(X, y, *args, **kwargs):
 66 |             coefs = fit_method(X, y, *args, **kwargs)
 67 |             above_range = coefs[indices] > high
 68 |             below_range = coefs[indices] < low
 69 | 
 70 |             # TODO do not set features to zero, do the fit without them instead
 71 |             if sum(above_range) > 0 or sum(below_range) > 0:
 72 |                 X_, y_ = X.copy(), y.copy()
 73 |                 y_ -= np.sum(X_[:, indices[above_range]] * high[above_range], axis=1)
 74 |                 X_[:, indices[above_range]] = 0.0
 75 |                 y_ -= np.sum(X_[:, indices[below_range]] * low[below_range], axis=1)
 76 |                 X_[:, indices[below_range]] = 0.0
 77 |                 coefs = fit_method(X_, y_, *args, **kwargs)
 78 |                 coefs[indices[above_range]] = high[above_range]
 79 |                 coefs[indices[below_range]] = low[below_range]
 80 | 
 81 |             # check if new coeficients are now out of range
 82 |             above_range = coefs[indices] > high
 83 |             below_range = coefs[indices] < low
 84 |             if sum(above_range) > 0 or sum(below_range) > 0:
 85 |                 warnings.warn(
 86 |                     "Running the constrained fit has resulted in new out of"
 87 |                     " range coefficients that were not so in the unconstrained"
 88 |                     " fit.\n"
 89 |                     "Double check the sensibility of the bounds you provided!",
 90 |                     RuntimeWarning,
 91 |                 )
 92 | 
 93 |             return coefs
 94 | 
 95 |         return wrapped
 96 | 
 97 |     return decorate_fit_method
 98 | 
 99 | 
100 | def r2_score_to_cv_error(
101 |     score: float,
102 |     y: NDArray,
103 |     y_pred: NDArray,
104 |     weights: NDArray[np.floating] | None = None,
105 | ):
106 |     """Convert r2 score to cross-validation error.
107 | 
108 |     Args:
109 |         score (float):
110 |             An r2 score obtained from cross validation.
111 |         y (NDArray): 1D
112 |             The target vector.
113 |         y_pred (NDArray): 1D
114 |             The fitted vector.
115 |         weights (NDArray): 1D
116 |             The weights of each sample. Default to 1.
117 | 
118 |     Returns:
119 |         float:
120 |             The CV error
121 |     """
122 |     if weights is None:
123 |         weights = np.ones(len(y))
124 |     weights = np.array(weights)
125 |     if len(weights) != len(y):
126 |         raise ValueError("Weights given but not the same length as sample.")
127 |     if np.any(weights < 0) or np.allclose(weights, 0):
128 |         raise ValueError("Weights can not be negative or all zero.")
129 | 
130 |     denominator = (weights * (y - y_pred) ** 2).sum() / weights.sum()
131 |     return np.sqrt((1 - score) * denominator)
132 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | from sklearn.datasets import make_regression, make_sparse_coded_signal
  4 | 
  5 | SEED = 0
  6 | 
  7 | # A few solvers to test for convex problems
  8 | # ECOS sometimes fails for Adaptive group estimators, but is fast
  9 | # SCS and CXVOPT are reliable, but slower
 10 | # GUROBI is best
 11 | CONVEX_SOLVERS = ["GUROBI", "ECOS"]  # SCS, GUROBI, CVXOPT
 12 | 
 13 | # ECOS_BB is open source alternative, but much slower, and can get things wrong
 14 | MIQP_SOLVERS = ["GUROBI"]  # SCIP fails some tests...
 15 | 
 16 | # Set to small values bc gurobi non-commercial can not solver large model.
 17 | N_FEATURES = [20, 30]  # an overdetermined and underdetermined case
 18 | N_SAMPLES = 25
 19 | N_INFORMATIVE = 10
 20 | 
 21 | 
 22 | @pytest.fixture(scope="package")
 23 | def rng():
 24 |     """Seed and return an RNG for test reproducibility"""
 25 |     return np.random.default_rng(SEED)
 26 | 
 27 | 
 28 | @pytest.fixture(params=CONVEX_SOLVERS)
 29 | def solver(request):
 30 |     return request.param
 31 | 
 32 | 
 33 | @pytest.fixture(params=MIQP_SOLVERS)
 34 | def miqp_solver(request):
 35 |     return request.param
 36 | 
 37 | 
 38 | @pytest.fixture(scope="package", params=N_FEATURES)
 39 | def random_model(rng, request):
 40 |     """Returns a fully random set of X, y, and beta representing a linear model."""
 41 |     X, y, beta = make_regression(
 42 |         n_samples=N_SAMPLES,
 43 |         n_features=request.param,
 44 |         n_informative=N_INFORMATIVE,
 45 |         coef=True,
 46 |         random_state=rng.integers(0, 2**32 - 1),
 47 |         bias=10 * rng.random(),
 48 |     )
 49 |     return X, y, beta
 50 | 
 51 | 
 52 | @pytest.fixture(scope="package", params=N_FEATURES)
 53 | def random_energy_model(rng, request):
 54 |     """Returns a random set of X, y, and beta with added gaussian noise for a linear
 55 |     model with sparse coefficients beta decay (on average) exponentially with the index
 56 |     of the coefficient.
 57 |     """
 58 |     X = rng.random((N_SAMPLES, request.param))
 59 |     beta = np.zeros(request.param)  # coefficients
 60 |     non_zero_ids = rng.choice(request.param, size=N_INFORMATIVE, replace=False)
 61 |     non_zero_ids = np.array(np.round(non_zero_ids), dtype=int)
 62 | 
 63 |     for idx in non_zero_ids:
 64 |         eci = 0
 65 |         mag = np.exp(-0.5 * idx)
 66 |         while np.isclose(eci, 0):
 67 |             eci = (rng.random() - 0.5) * 2 * mag
 68 |         beta[idx] = eci
 69 |     y = X @ beta + rng.normal(size=N_SAMPLES) * 2e-3  # fake energies
 70 |     return X, y, beta
 71 | 
 72 | 
 73 | @pytest.fixture(scope="package")
 74 | def sparse_coded_signal(rng):
 75 |     n_components, n_features, n_nonzero = 24, 12, 6
 76 |     y, X, beta = make_sparse_coded_signal(
 77 |         n_samples=1,
 78 |         n_components=n_components,
 79 |         n_features=n_features,
 80 |         n_nonzero_coefs=n_nonzero,
 81 |         random_state=rng.integers(0, 2**32 - 1),
 82 |     )
 83 |     return X, y, beta
 84 | 
 85 | 
 86 | @pytest.fixture(params=[4, 6], scope="package")
 87 | def random_model_with_groups(random_model, rng, request):
 88 |     """Add a correct set of groups to model."""
 89 |     X, y, beta = random_model
 90 |     n_groups = request.param
 91 |     n_active_groups = n_groups // 3 + 1
 92 | 
 93 |     n_features_per_group = len(beta) // n_groups
 94 |     active_group_inds = rng.choice(range(n_groups), size=n_active_groups, replace=False)
 95 |     inactive_group_inds = np.setdiff1d(range(n_groups), active_group_inds)
 96 | 
 97 |     groups = np.zeros(len(beta), dtype=int)
 98 |     active_feature_inds = np.where(abs(beta) > 0)[0]
 99 |     inactive_feature_inds = np.setdiff1d(np.arange(len(beta)), active_feature_inds)
100 | 
101 |     # set active groups
102 |     for i in active_group_inds:
103 |         if len(active_feature_inds) > n_features_per_group:
104 |             group_inds = rng.choice(
105 |                 active_feature_inds, size=n_features_per_group, replace=False
106 |             )
107 |         else:
108 |             group_inds = active_feature_inds
109 |         groups[group_inds] = i
110 |         active_feature_inds = np.setdiff1d(active_feature_inds, group_inds)
111 | 
112 |     # set inactive_groups
113 |     for i in inactive_group_inds:
114 |         if len(inactive_feature_inds) > n_features_per_group:
115 |             group_inds = rng.choice(
116 |                 inactive_feature_inds, size=n_features_per_group, replace=False
117 |             )
118 |         else:
119 |             group_inds = inactive_feature_inds
120 |         groups[group_inds] = i
121 |         inactive_feature_inds = np.setdiff1d(inactive_feature_inds, group_inds)
122 | 
123 |     return X, y, beta, groups
124 | 


--------------------------------------------------------------------------------
/tests/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | minversion = 5.3.0
3 | 


--------------------------------------------------------------------------------
/tests/test_common.py:
--------------------------------------------------------------------------------
  1 | """General tests for all linear models.
  2 | 
  3 | Simply check that they execute successfully on random data.
  4 | """
  5 | 
  6 | from inspect import getmembers, isclass, signature
  7 | 
  8 | import cvxpy as cp
  9 | import numpy as np
 10 | import pytest
 11 | from cvxpy.error import SolverError
 12 | from sklearn.utils.estimator_checks import check_estimator
 13 | from sklearn.utils.fixes import threadpool_info
 14 | 
 15 | import sparselm.model as spm
 16 | from sparselm.model._miqp._base import MIQPl0
 17 | 
 18 | ESTIMATORS = getmembers(spm, isclass)
 19 | ESTIMATOR_NAMES = [est[0] for est in ESTIMATORS]
 20 | ESTIMATORS = [est[1] for est in ESTIMATORS]  # type: ignore
 21 | 
 22 | 
 23 | @pytest.fixture(params=ESTIMATORS, ids=ESTIMATOR_NAMES)
 24 | def estimator(request):
 25 |     estimator_cls = request.param
 26 |     if issubclass(estimator_cls, MIQPl0):
 27 |         regressor = estimator_cls(fit_intercept=True, solver="SCIP")
 28 |         if hasattr(regressor, "eta"):
 29 |             regressor.eta = 0.01
 30 |         return regressor
 31 |     return estimator_cls(fit_intercept=True, solver="ECOS")
 32 | 
 33 | 
 34 | @pytest.mark.parametrize("estimator_cls", ESTIMATORS)
 35 | def test_general_fit(estimator_cls, random_model, rng):
 36 |     X, y, beta = random_model
 37 | 
 38 |     # instantiate the estimator
 39 |     sig = signature(estimator_cls)
 40 | 
 41 |     # check for necessary parameters
 42 |     args = {}
 43 |     if "groups" in sig.parameters:
 44 |         args["groups"] = rng.integers(0, 5, size=len(beta))
 45 |     if "group_list" in sig.parameters:
 46 |         args["group_list"] = [
 47 |             np.sort(rng.choice(range(5), replace=False, size=rng.integers(1, 5)))
 48 |             for _ in range(len(beta))
 49 |         ]
 50 |     if "sparse_bound" in sig.parameters:
 51 |         args["sparse_bound"] = 12
 52 | 
 53 |     estimator = estimator_cls(**args)
 54 |     estimator.fit(X, y)
 55 |     # assert a value of coefficients has been set correctly
 56 |     assert isinstance(estimator.coef_, np.ndarray)
 57 |     assert len(estimator.coef_) == len(beta)
 58 |     assert len(estimator.predict(X)) == len(y)
 59 |     assert estimator.intercept_ == 0.0
 60 | 
 61 |     estimator = estimator_cls(fit_intercept=True, **args)
 62 |     estimator.fit(X, y)
 63 |     # assert a value of coefficients has been set correctly
 64 |     assert isinstance(estimator.coef_, np.ndarray)
 65 |     assert len(estimator.coef_) == len(beta)
 66 |     assert len(estimator.predict(X)) == len(y)
 67 |     assert estimator.intercept_ != 0.0
 68 | 
 69 | 
 70 | @pytest.mark.xfail(raises=SolverError)
 71 | def test_add_constraints(estimator, random_model, rng):
 72 |     with pytest.raises(RuntimeError):
 73 |         estimator.add_constraints([cp.Variable(1) >= 0])
 74 | 
 75 |     X, y, beta = random_model
 76 |     estimator.generate_problem(X, y)
 77 |     n_constraints = len(estimator.canonicals_.constraints)
 78 |     # a dummy constraint
 79 |     estimator.add_constraints([estimator.canonicals_.beta >= 0.0])
 80 |     assert len(estimator.canonicals_.problem.constraints) == n_constraints + 1
 81 |     assert len(estimator.canonicals_.user_constraints) == 1
 82 |     assert len(estimator.canonicals_.constraints) == n_constraints
 83 | 
 84 |     # force cache data
 85 |     # ( solving the model sometimes fails and we only want to check that a warning is
 86 |     # raised )
 87 |     estimator.cached_X_ = X
 88 |     estimator.cached_y_ = y
 89 | 
 90 |     new_X = rng.random(X.shape)
 91 |     with pytest.warns(UserWarning):
 92 |         estimator.fit(new_X, y)
 93 | 
 94 | 
 95 | @pytest.mark.xfail(
 96 |     any(
 97 |         True
 98 |         for info in threadpool_info()
 99 |         if info["internal_api"] == "openblas"
100 |         # Prudently assume Prescott might be the architecture if it is unknown.
101 |         and info.get("architecture", "prescott").lower() == "prescott"
102 |     ),
103 |     reason="On Github runner above is true and sklearn will throw an error by trying to create_mmemap_backed_arrays "
104 |     "with an estimator.",
105 | )
106 | def test_sklearn_compatible(estimator):
107 |     """Test sklearn compatibility with no parameter inputs."""
108 |     check_estimator(estimator)
109 | 


--------------------------------------------------------------------------------
/tests/test_dataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import numpy.testing as npt
 3 | import pytest
 4 | 
 5 | from sparselm.dataset import make_group_regression
 6 | 
 7 | 
 8 | @pytest.mark.parametrize("n_informative_groups", [5, 20])
 9 | @pytest.mark.parametrize("n_features_per_group", [5, 4 * list(range(2, 7))])
10 | @pytest.mark.parametrize("frac_informative_in_group", [1.0, 0.5])
11 | @pytest.mark.parametrize("shuffle", [True, False])
12 | @pytest.mark.parametrize("coef", [True, False])
13 | def test_make_group_regression(
14 |     n_informative_groups, n_features_per_group, frac_informative_in_group, shuffle, coef
15 | ):
16 |     model = make_group_regression(
17 |         n_informative_groups=n_informative_groups,
18 |         n_features_per_group=n_features_per_group,
19 |         frac_informative_in_group=frac_informative_in_group,
20 |         shuffle=shuffle,
21 |         coef=coef,
22 |     )
23 | 
24 |     assert len(model) == 4 if coef else 3
25 | 
26 |     if coef:
27 |         X, y, groups, coefs = model
28 |     else:
29 |         X, y, groups = model
30 | 
31 |     if not isinstance(n_features_per_group, list):
32 |         n_features_per_group = [n_features_per_group] * 20
33 | 
34 |     n_features = (
35 |         sum(n_features_per_group)
36 |         if isinstance(n_features_per_group, list)
37 |         else 20 * n_features_per_group
38 |     )
39 | 
40 |     assert X.shape == (100, n_features)
41 |     assert y.shape == (100,)
42 |     assert groups.shape == (n_features,)
43 |     assert len(np.unique(groups)) == 20
44 | 
45 |     if coef:
46 |         n_informative = sum(
47 |             round(frac_informative_in_group * n_features_per_group[i])
48 |             for i in range(n_informative_groups)
49 |         )
50 | 
51 |         assert coefs.shape == (n_features,)
52 |         assert sum(coef > 0 for coef in coefs) == n_informative
53 |         npt.assert_array_almost_equal(np.dot(X, coefs), y)
54 | 
55 |     if shuffle:
56 |         # check that not all groups are lumped together
57 |         assert sum(np.diff(groups) == 0) < 20 - 1
58 | 
59 |     # check warning
60 |     with pytest.warns(UserWarning):
61 |         make_group_regression(frac_informative_in_group=1 / 100)
62 | 


--------------------------------------------------------------------------------
/tests/test_lasso.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import numpy.testing as npt
  3 | import pytest
  4 | from cvxpy.error import SolverError
  5 | 
  6 | from sparselm.model import (
  7 |     AdaptiveGroupLasso,
  8 |     AdaptiveLasso,
  9 |     AdaptiveOverlapGroupLasso,
 10 |     AdaptiveRidgedGroupLasso,
 11 |     AdaptiveSparseGroupLasso,
 12 |     GroupLasso,
 13 |     Lasso,
 14 |     OverlapGroupLasso,
 15 |     SparseGroupLasso,
 16 | )
 17 | 
 18 | ADAPTIVE_ESTIMATORS = [
 19 |     AdaptiveLasso,
 20 |     AdaptiveGroupLasso,
 21 |     AdaptiveSparseGroupLasso,
 22 |     AdaptiveOverlapGroupLasso,
 23 |     AdaptiveRidgedGroupLasso,
 24 | ]
 25 | 
 26 | THRESHOLD = 1e-8  # relative threshold
 27 | 
 28 | 
 29 | def test_lasso_toy():
 30 |     # Borrowed from sklearn tests
 31 |     # Test Lasso on a toy example for various values of alpha.
 32 |     # When validating this against glmnet notice that glmnet divides it
 33 |     # against nobs.
 34 | 
 35 |     X = [[-1], [0], [1]]
 36 |     Y = [-1, 0, 1]  # just a straight line
 37 |     T = [[2], [3], [4]]  # test sample
 38 | 
 39 |     lasso = Lasso(alpha=1e-8)
 40 |     lasso.fit(X, Y)
 41 |     pred = lasso.predict(T)
 42 |     npt.assert_array_almost_equal(lasso.coef_, [1])
 43 |     npt.assert_array_almost_equal(pred, [2, 3, 4])
 44 | 
 45 |     lasso = Lasso(alpha=0.1)
 46 |     lasso.fit(X, Y)
 47 |     pred = lasso.predict(T)
 48 |     npt.assert_array_almost_equal(lasso.coef_, [0.85])
 49 |     npt.assert_array_almost_equal(pred, [1.7, 2.55, 3.4])
 50 | 
 51 |     lasso = Lasso(alpha=0.5)
 52 |     lasso.fit(X, Y)
 53 |     pred = lasso.predict(T)
 54 |     npt.assert_array_almost_equal(lasso.coef_, [0.25])
 55 |     npt.assert_array_almost_equal(pred, [0.5, 0.75, 1.0])
 56 | 
 57 |     lasso = Lasso(alpha=1.0)
 58 |     lasso.fit(X, Y)
 59 |     pred = lasso.predict(T)
 60 |     npt.assert_array_almost_equal(lasso.coef_, [0.0])
 61 |     npt.assert_array_almost_equal(pred, [0, 0, 0])
 62 | 
 63 | 
 64 | def test_lasso_non_float_y():
 65 |     # Borrowed from sklearn tests
 66 |     X = [[0, 0], [1, 1], [-1, -1]]
 67 |     y = [0, 1, 2]
 68 |     y_float = [0.0, 1.0, 2.0]
 69 | 
 70 |     lasso = Lasso(fit_intercept=False)
 71 |     lasso.fit(X, y)
 72 |     lasso_float = Lasso(fit_intercept=False)
 73 |     lasso_float.fit(X, y_float)
 74 |     npt.assert_array_equal(lasso.coef_, lasso_float.coef_)
 75 | 
 76 | 
 77 | def test_adaptive_lasso_sparser(random_model):
 78 |     X, y, _ = random_model
 79 |     lasso = Lasso(fit_intercept=True)
 80 |     alasso = AdaptiveLasso(fit_intercept=True)
 81 | 
 82 |     lasso.fit(X, y)
 83 |     alasso.fit(X, y)
 84 | 
 85 |     assert sum(abs(lasso.coef_) > THRESHOLD) >= sum(abs(alasso.coef_) > THRESHOLD)
 86 | 
 87 | 
 88 | # TODO flakey test, depends on THRESHOLD value
 89 | @pytest.mark.xfail(raises=SolverError)
 90 | @pytest.mark.parametrize(
 91 |     "standardize",
 92 |     [True, False],
 93 | )  # standardize=False leads to failures
 94 | def test_group_lasso(random_model_with_groups, solver, standardize):
 95 |     X, y, _, groups = random_model_with_groups
 96 | 
 97 |     aglasso = AdaptiveGroupLasso(
 98 |         groups=groups,
 99 |         alpha=0.1,
100 |         fit_intercept=True,
101 |         standardize=standardize,
102 |         solver=solver,
103 |     )
104 |     aglasso.fit(X, y)
105 | 
106 |     # check that if all coefs in groups are consistent
107 |     for gid in np.unique(groups):
108 |         m = np.max(abs(aglasso.coef_))
109 |         all_active = (abs(aglasso.coef_[groups == gid]) > m * THRESHOLD).all()
110 |         all_inactive = (abs(aglasso.coef_[groups == gid]) <= m * THRESHOLD).all()
111 |         assert all_active or all_inactive
112 | 
113 | 
114 | @pytest.mark.xfail(raises=SolverError)
115 | @pytest.mark.parametrize(
116 |     "standardize",
117 |     [True, False],
118 | )
119 | def test_group_lasso_weights(random_model_with_groups, solver, standardize):
120 |     X, y, _, groups = random_model_with_groups
121 | 
122 |     group_weights = np.ones(len(np.unique(groups)))
123 | 
124 |     aglasso = AdaptiveGroupLasso(
125 |         groups=groups,
126 |         alpha=0.1,
127 |         group_weights=group_weights,
128 |         fit_intercept=True,
129 |         standardize=standardize,
130 |         solver=solver,
131 |     )
132 |     aglasso.fit(X, y)
133 | 
134 |     rglasso = AdaptiveRidgedGroupLasso(
135 |         groups=groups,
136 |         alpha=0.1,
137 |         group_weights=group_weights,
138 |         fit_intercept=True,
139 |         standardize=standardize,
140 |         solver=solver,
141 |     )
142 |     rglasso.fit(X, y)
143 | 
144 |     # check that if all coefs in groups are consistent
145 |     for gid in np.unique(groups):
146 |         m = np.max(abs(aglasso.coef_))
147 | 
148 |         all_active = (abs(aglasso.coef_[groups == gid]) > m * THRESHOLD).all()
149 |         all_inactive = (abs(aglasso.coef_[groups == gid]) <= m * THRESHOLD).all()
150 |         assert all_active or all_inactive
151 | 
152 |         m = np.max(abs(rglasso.coef_))
153 |         all_active = (abs(rglasso.coef_[groups == gid]) > m * THRESHOLD).all()
154 |         all_inactive = (abs(rglasso.coef_[groups == gid]) <= m * THRESHOLD).all()
155 |         assert all_active or all_inactive
156 | 
157 | 
158 | @pytest.mark.xfail(raises=SolverError)
159 | @pytest.mark.parametrize("estimator_cls", ADAPTIVE_ESTIMATORS)
160 | def test_adaptive_weights(estimator_cls, random_model_with_groups, solver, rng):
161 |     X, y, beta, groups = random_model_with_groups
162 | 
163 |     if estimator_cls.__name__ == "AdaptiveLasso":
164 |         estimator = estimator_cls(solver=solver)
165 |     elif estimator_cls.__name__ == "AdaptiveOverlapGroupLasso":
166 |         gids = np.unique(groups)
167 |         group_list = [
168 |             rng.choice(gids, replace=False, size=rng.integers(1, 3))
169 |             for _ in range(len(beta))
170 |         ]
171 |         estimator = estimator_cls(group_list=group_list, solver=solver)
172 |     else:
173 |         estimator = estimator_cls(groups=groups, solver=solver)
174 | 
175 |     # force generating weights
176 |     estimator.generate_problem(X, y)
177 | 
178 |     if estimator_cls.__name__ == "AdaptiveSparseGroupLasso":
179 |         weights = [
180 |             estimator.canonicals_.parameters.adaptive_coef_weights.value.copy(),
181 |             estimator.canonicals_.parameters.adaptive_group_weights.value.copy(),
182 |         ]
183 |     else:
184 |         weights = [estimator.canonicals_.parameters.adaptive_weights.value.copy()]
185 | 
186 |     estimator.fit(X, y)
187 | 
188 |     if estimator_cls.__name__ == "AdaptiveSparseGroupLasso":
189 |         new_weights = [
190 |             estimator.canonicals_.parameters.adaptive_coef_weights.value.copy(),
191 |             estimator.canonicals_.parameters.adaptive_group_weights.value.copy(),
192 |         ]
193 |     else:
194 |         new_weights = [estimator.canonicals_.parameters.adaptive_weights.value.copy()]
195 | 
196 |     # simply check that the weights are updated.
197 |     # TODO a better check would be to check that weights for active groups/coefs
198 |     #  are smaller than those of inactive ones
199 |     for nw, w in zip(new_weights, weights):
200 |         assert not any(nw_i == pytest.approx(w_i) for nw_i, w_i in zip(nw, w))
201 | 
202 | 
203 | def test_bad_inputs(random_model_with_groups, rng):
204 |     X, y, beta, groups = random_model_with_groups
205 |     bad_groups = rng.integers(0, 6, size=len(beta) - 1)
206 |     group_weights = np.ones(len(np.unique(bad_groups)))
207 | 
208 |     # test that warns when no groups given
209 |     with pytest.warns(UserWarning):
210 |         gl = GroupLasso()
211 |         gl.fit(X, y)
212 | 
213 |     with pytest.warns(UserWarning):
214 |         gl = OverlapGroupLasso()
215 |         gl.fit(X, y)
216 | 
217 |     # bad groups
218 |     with pytest.raises(ValueError):
219 |         gl = GroupLasso(bad_groups, group_weights=group_weights)
220 |         gl.fit(X, y)
221 | 
222 |     with pytest.raises(TypeError):
223 |         gl = GroupLasso("groups", group_weights=group_weights)
224 |         gl.fit(X, y)
225 | 
226 |     # bad group_weights
227 |     with pytest.raises(ValueError):
228 |         group_weights = np.ones(len(np.unique(bad_groups)) - 1)
229 |         gl = GroupLasso(bad_groups, group_weights=group_weights)
230 |         gl.fit(X, y)
231 | 
232 |     with pytest.raises(TypeError):
233 |         gl = GroupLasso(groups, group_weights="weights")
234 |         gl.fit(X, y)
235 | 
236 |     # bad l1_ratio
237 |     lasso = SparseGroupLasso(groups)
238 |     with pytest.raises(ValueError):
239 |         lasso.l1_ratio = -1.0
240 |         lasso.fit(X, y)
241 | 
242 |     with pytest.raises(ValueError):
243 |         lasso.l1_ratio = 2.0
244 |         lasso.fit(X, y)
245 | 
246 |     with pytest.raises(ValueError):
247 |         sgl = SparseGroupLasso(groups, l1_ratio=-1.0)
248 |         sgl.fit(X, y)
249 | 
250 |     with pytest.raises(ValueError):
251 |         sgl = SparseGroupLasso(groups, l1_ratio=2.0)
252 |         sgl.fit(X, y)
253 | 
254 |     # test that it warns
255 |     with pytest.warns(UserWarning):
256 |         sgl = SparseGroupLasso(groups, l1_ratio=0.0)
257 |         sgl.fit(X, y)
258 |     with pytest.warns(UserWarning):
259 |         sgl = SparseGroupLasso(groups, l1_ratio=1.0)
260 |         sgl.fit(X, y)
261 | 
262 | 
263 | @pytest.mark.parametrize("estimator_cls", ADAPTIVE_ESTIMATORS)
264 | def test_set_parameters(estimator_cls, random_model_with_groups, rng):
265 |     X, y, beta, groups = random_model_with_groups
266 | 
267 |     if estimator_cls.__name__ == "AdaptiveLasso":
268 |         estimator = estimator_cls()
269 |     elif estimator_cls.__name__ == "AdaptiveOverlapGroupLasso":
270 |         gids = np.unique(groups)
271 |         group_list = [
272 |             rng.choice(gids, replace=False, size=rng.integers(1, 3))
273 |             for _ in range(len(beta))
274 |         ]
275 |         estimator = estimator_cls(group_list=group_list)
276 |     else:
277 |         estimator = estimator_cls(groups=groups)
278 | 
279 |     estimator.alpha = 0.5
280 |     assert estimator.alpha == 0.5
281 |     estimator.generate_problem(X, y)
282 |     assert estimator.canonicals_.parameters.alpha.value == 0.5
283 | 
284 |     if hasattr(estimator, "l1_ratio"):
285 |         # default l1_ratio is 0.5
286 |         assert estimator.canonicals_.parameters.lambda1.value == 0.5 * 0.5
287 |         assert estimator.canonicals_.parameters.lambda2.value == 0.5 * 0.5
288 | 
289 |         estimator.l1_ratio = 0.25
290 |         estimator._set_param_values()
291 |         assert estimator.l1_ratio == 0.25
292 |         assert estimator.canonicals_.parameters.lambda1.value == 0.25 * 0.5
293 |         assert estimator.canonicals_.parameters.lambda2.value == 0.75 * 0.5
294 | 
295 |     if hasattr(estimator, "delta"):
296 |         estimator.delta = (4.0,)
297 |         estimator._set_param_values()
298 |         npt.assert_array_equal(
299 |             estimator.canonicals_.parameters.delta.value,
300 |             4.0 * np.ones(len(np.unique(groups))),
301 |         )
302 | 
303 |         estimator.delta = 3.0 * np.ones(len(np.unique(groups)))
304 |         estimator._set_param_values()
305 |         npt.assert_array_equal(estimator.delta, 3.0 * np.ones(len(np.unique(groups))))
306 |         npt.assert_array_equal(
307 |             estimator.canonicals_.parameters.delta.value,
308 |             3.0 * np.ones(len(np.unique(groups))),
309 |         )
310 | 


--------------------------------------------------------------------------------
/tests/test_miqp.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import numpy.testing as npt
  3 | import pytest
  4 | 
  5 | from sparselm.model import (
  6 |     L2L0,
  7 |     BestSubsetSelection,
  8 |     RegularizedL0,
  9 |     RidgedBestSubsetSelection,
 10 | )
 11 | 
 12 | # exclude L1L0 since it breaks hierarchy constraints...
 13 | MIQP_estimators = [
 14 |     BestSubsetSelection,
 15 |     RidgedBestSubsetSelection,
 16 |     RegularizedL0,
 17 |     L2L0,
 18 | ]
 19 | 
 20 | THRESHOLD = 1e-12
 21 | 
 22 | 
 23 | def assert_hierarchy_respected(coef, slack_z, hierarchy, groups=None):
 24 |     groups = groups if groups is not None else np.arange(len(coef))
 25 |     group_ids = np.unique(groups)
 26 |     for grp_id, active, parents in zip(group_ids, slack_z, hierarchy):
 27 |         if active == 1:  # all parents must also be active
 28 |             assert all(
 29 |                 (abs(coef[groups == parent]) >= THRESHOLD).all() for parent in parents
 30 |             )
 31 | 
 32 | 
 33 | def test_perfect_signal_recovery(sparse_coded_signal):
 34 |     X, y, beta = sparse_coded_signal
 35 |     X = X.T
 36 | 
 37 |     (idx,) = beta.nonzero()
 38 | 
 39 |     estimator = BestSubsetSelection(sparse_bound=np.count_nonzero(beta))
 40 |     estimator.fit(X, y)
 41 | 
 42 |     npt.assert_array_equal(idx, np.flatnonzero(estimator.coef_))
 43 |     npt.assert_array_almost_equal(beta, estimator.coef_)
 44 | 
 45 |     r_estimator = RidgedBestSubsetSelection(sparse_bound=np.count_nonzero(beta))
 46 | 
 47 |     # very low regularization should be the same
 48 |     r_estimator.eta = 1e-16
 49 |     r_estimator.fit(X, y)
 50 |     npt.assert_array_almost_equal(beta, r_estimator.coef_)
 51 |     npt.assert_array_equal(idx, np.flatnonzero(r_estimator.coef_))
 52 |     assert all(i in np.flatnonzero(r_estimator.coef_) for i in idx)
 53 | 
 54 |     # a bit higher regularization, check shrinkage
 55 |     coef = r_estimator.coef_.copy()
 56 |     r_estimator.eta = 1e-4
 57 |     r_estimator.fit(X, y)
 58 |     npt.assert_array_almost_equal(beta, r_estimator.coef_, decimal=1)
 59 |     assert np.linalg.norm(coef) > np.linalg.norm(r_estimator.coef_)
 60 | 
 61 |     # very sensitive to the value of alpha for exact results
 62 |     estimator = RegularizedL0(alpha=0.0008)
 63 |     estimator.fit(X, y)
 64 | 
 65 |     npt.assert_array_equal(idx, np.flatnonzero(estimator.coef_))
 66 |     npt.assert_array_almost_equal(beta, estimator.coef_, decimal=2)
 67 | 
 68 | 
 69 | @pytest.mark.parametrize("estimator_cls", MIQP_estimators)
 70 | def test_slack_variables(estimator_cls, random_model_with_groups, miqp_solver, rng):
 71 |     X, y, beta, groups = random_model_with_groups
 72 | 
 73 |     # ignore groups
 74 |     if "Subset" in estimator_cls.__name__:
 75 |         estimator = estimator_cls(sparse_bound=len(beta) // 2, solver=miqp_solver)
 76 |     else:
 77 |         estimator = estimator_cls(alpha=3.0, solver=miqp_solver)
 78 | 
 79 |     estimator.fit(X, y)
 80 |     for coef, active in zip(
 81 |         estimator.coef_, estimator.canonicals_.auxiliaries.z0.value
 82 |     ):
 83 |         if active == 1:
 84 |             assert abs(coef) >= THRESHOLD
 85 |         else:
 86 |             assert abs(coef) < THRESHOLD
 87 | 
 88 |     # now group hierarchy
 89 |     group_ids = np.sort(np.unique(groups))
 90 |     if "Subset" in estimator_cls.__name__:
 91 |         estimator = estimator_cls(
 92 |             groups, sparse_bound=len(group_ids) // 2, solver=miqp_solver
 93 |         )
 94 |     else:
 95 |         estimator = estimator_cls(groups, alpha=2.0, solver=miqp_solver)
 96 | 
 97 |     estimator.fit(X, y)
 98 |     for gid, active in zip(group_ids, estimator.canonicals_.auxiliaries.z0.value):
 99 |         if active:
100 |             assert all(abs(estimator.coef_[groups == gid]) >= THRESHOLD)
101 |         else:
102 |             assert all(abs(estimator.coef_[groups == gid]) < THRESHOLD)
103 | 
104 | 
105 | @pytest.mark.parametrize("estimator_cls", MIQP_estimators)
106 | def test_singleton_hierarchy(estimator_cls, random_model, miqp_solver, rng):
107 |     X, y, beta = random_model
108 |     (idx,) = beta.nonzero()
109 | 
110 |     # ignore groups, single covariate hierarchy
111 |     if "Subset" in estimator_cls.__name__:
112 |         estimator = estimator_cls(sparse_bound=len(beta) // 2, solver=miqp_solver)
113 |     else:
114 |         estimator = estimator_cls(alpha=2.0, solver=miqp_solver)
115 | 
116 |     fully_chained = [[len(beta) - 1]] + [[i] for i in range(0, len(beta) - 1)]
117 |     estimator.hierarchy = fully_chained
118 |     estimator.fit(X, y)
119 | 
120 |     # bound is set lower than number of coefs so all must be zero in BestSubset
121 |     if any(estimator.coef_ == 0):
122 |         assert all(estimator.coef_ == 0)
123 |     else:
124 |         assert all(estimator.coef_ != 0)
125 |     assert_hierarchy_respected(
126 |         estimator.coef_, estimator.canonicals_.auxiliaries.z0.value, fully_chained
127 |     )
128 | 
129 |     hierarchy = []
130 |     for i in range(len(beta)):
131 |         # everything depends on 1st nonzero coef
132 |         if i != idx[0]:
133 |             hierarchy.append([idx[0]])
134 |         else:
135 |             hierarchy.append([])
136 |         # first half of remaining depends on 2nd nonzero
137 |         if 0 < i < len(beta) // 2 and i != idx[1]:
138 |             hierarchy[i].append(idx[1])
139 |         # second half of remaining on 3rd nonzero
140 |         if len(beta) // 2 <= i and i != idx[2]:
141 |             hierarchy[i].append(idx[2])
142 | 
143 |     estimator.hierarchy = hierarchy
144 |     # TODO make hierarchy and other non cp.Parameter params reset problem if reset
145 |     estimator.problem = None
146 |     estimator.fit(X, y)
147 |     assert_hierarchy_respected(
148 |         estimator.coef_, estimator.canonicals_.auxiliaries.z0.value, hierarchy
149 |     )
150 | 
151 | 
152 | @pytest.mark.parametrize("estimator_cls", MIQP_estimators)
153 | def test_group_hierarchy(estimator_cls, random_model_with_groups, miqp_solver, rng):
154 |     X, y, beta, groups = random_model_with_groups
155 |     (idx,) = beta.nonzero()
156 | 
157 |     # now group hierarchy
158 |     group_ids = np.unique(groups)
159 |     if "Subset" in estimator_cls.__name__:
160 |         estimator = estimator_cls(
161 |             groups, sparse_bound=len(group_ids) // 2, solver=miqp_solver
162 |         )
163 |     else:
164 |         estimator = estimator_cls(groups, alpha=3.0, solver=miqp_solver)
165 | 
166 |     fully_chained = [[group_ids[-1]]] + [
167 |         [group_ids[i]] for i in range(0, len(group_ids) - 1)
168 |     ]
169 |     estimator.hierarchy = fully_chained
170 |     estimator.fit(X, y)
171 | 
172 |     # bound is set lower than number of coefs so all must be zero in BestSubset
173 |     if any(estimator.coef_ == 0):
174 |         assert all(estimator.coef_ == 0)
175 |     else:
176 |         assert all(estimator.coef_ != 0)
177 | 
178 |     assert_hierarchy_respected(
179 |         estimator.coef_,
180 |         estimator.canonicals_.auxiliaries.z0.value,
181 |         fully_chained,
182 |         groups=groups,
183 |     )
184 | 
185 |     # pick two groups with nozero coefs
186 |     grp1 = groups[idx[0]]
187 |     while (grp2 := groups[rng.choice(idx)]) == grp1:
188 |         pass
189 | 
190 |     hierarchy = []
191 |     for i in range(len(group_ids)):
192 |         # everything depends on 1st nonzero coef
193 |         if i != grp1:
194 |             hierarchy.append([grp1])
195 |         else:
196 |             hierarchy.append([])
197 |         # first half of remaining depends on 2nd nonzero
198 |         if 0 < i < len(group_ids) // 2 and i not in [grp1, grp2]:
199 |             hierarchy[i].append(grp2)
200 | 
201 |     estimator.problem = None  # TODO also remove this...
202 |     estimator.hierarchy = hierarchy
203 |     estimator.fit(X, y)
204 | 
205 |     assert_hierarchy_respected(
206 |         estimator.coef_,
207 |         estimator.canonicals_.auxiliaries.z0.value,
208 |         hierarchy,
209 |         groups=groups,
210 |     )
211 | 
212 | 
213 | def test_set_parameters(random_model):
214 |     X, y, beta = random_model
215 |     estimator = RidgedBestSubsetSelection(sparse_bound=1, eta=1.0)
216 |     estimator.sparse_bound = 2
217 |     estimator.fit(X, y)
218 |     assert estimator.canonicals_.parameters.sparse_bound.value == 2
219 |     assert estimator.canonicals_.parameters.eta.value == 1.0
220 | 
221 |     estimator.eta = 0.5
222 |     estimator.fit(X, y)
223 |     assert estimator.canonicals_.parameters.eta.value == 0.5
224 | 
225 | 
226 | def test_bad_input(random_model):
227 |     X, y, beta = random_model
228 | 
229 |     # bad sparse_bound
230 |     estimator = BestSubsetSelection(sparse_bound=-1)
231 |     with pytest.raises(ValueError):
232 |         estimator.fit(X, y)
233 | 
234 |     # bad eta
235 |     estimator = RidgedBestSubsetSelection(eta=-1.0)
236 |     with pytest.raises(ValueError):
237 |         estimator.fit(X, y)
238 | 


--------------------------------------------------------------------------------
/tests/test_model_selection.py:
--------------------------------------------------------------------------------
  1 | import cvxpy as cp
  2 | import numpy as np
  3 | import pytest
  4 | from sklearn.datasets import make_regression
  5 | from sklearn.linear_model import Lasso
  6 | from sklearn.model_selection import KFold, train_test_split
  7 | 
  8 | from sparselm.model import L1L0, L2L0
  9 | from sparselm.model_selection import GridSearchCV, LineSearchCV
 10 | 
 11 | ALL_CRITERION = ["max_score", "one_std_score"]
 12 | # Currently we will only test on mixedL0
 13 | ALL_ESTIMATORS = [L2L0, L1L0]
 14 | ONLY_L2L0 = [L2L0]
 15 | 
 16 | 
 17 | @pytest.fixture(scope="module")
 18 | def param_grid():
 19 |     # Test on multiple grids
 20 |     return [
 21 |         {"alpha": [0.01, 0.1], "eta": [0.03, 0.3]},
 22 |         {"alpha": [0.02, 0.2], "eta": [0.04, 0.4]},
 23 |     ]
 24 | 
 25 | 
 26 | def test_solver():
 27 |     # Check that your solvers can work well.
 28 |     # Non-academic, non-commercial Gurobi can not solve large scale model > 100 params.
 29 |     # ECOS_BB is significantly slower, so use gurobi if possible!
 30 |     x = cp.Variable(10, integer=True)
 31 |     obj = cp.sum_squares(x)
 32 |     cons = [x <= 3, x >= -3]
 33 |     prob = cp.Problem(cp.Minimize(obj), cons)
 34 | 
 35 |     if "GUROBI" in cp.installed_solvers():
 36 |         result = prob.solve(solver="GUROBI")
 37 |     else:
 38 |         result = prob.solve(solver="ECOS_BB")
 39 | 
 40 |     assert x.value is not None
 41 |     assert result is not None
 42 | 
 43 | 
 44 | @pytest.fixture(scope="module", params=ALL_ESTIMATORS)
 45 | def estimator(random_energy_model, request):
 46 |     ecis = random_energy_model[2]
 47 |     # Each correlation function as its own group. Doing ordinary hierarchy.
 48 |     groups = list(range(len(ecis)))
 49 |     if "GUROBI" in cp.installed_solvers():
 50 |         return request.param(groups=groups, solver="GUROBI")
 51 |     else:
 52 |         return request.param(groups=groups, solver="ECOS_BB")
 53 |     # return request.param(solver="ECOS_BB")
 54 | 
 55 | 
 56 | @pytest.fixture(scope="module", params=ONLY_L2L0)
 57 | def mixed_l2l0_est(random_energy_model, request):
 58 |     ecis = random_energy_model[2]
 59 |     # Each correlation function as its own group. Doing ordinary hierarchy.
 60 |     groups = list(range(len(ecis)))
 61 |     if "GUROBI" in cp.installed_solvers():
 62 |         return request.param(groups=groups, solver="GUROBI")
 63 |     else:
 64 |         return request.param(groups=groups, solver="ECOS_BB")
 65 |     # return request.param(solver="ECOS_BB")
 66 | 
 67 | 
 68 | def test_mixed_l0_wts(random_energy_model, mixed_l2l0_est, rng):
 69 |     femat, energies, _ = random_energy_model
 70 |     mixed_l2l0_est.eta = 1e-5
 71 |     mixed_l2l0_est.fit(X=femat, y=energies)
 72 |     energies_pred = mixed_l2l0_est.predict(femat)
 73 |     assert energies_pred is not None
 74 |     mixed_l2l0_est.tikhonov_w = 1000 * rng.random(femat.shape[1])
 75 |     mixed_l2l0_est.fit(X=femat, y=energies)
 76 |     energies_pred_wtd = mixed_l2l0_est.predict(femat)
 77 |     assert energies_pred_wtd is not None
 78 | 
 79 | 
 80 | @pytest.fixture(scope="module", params=ALL_CRITERION)
 81 | def grid_search(estimator, param_grid, request):
 82 |     grid_searcher = GridSearchCV(
 83 |         estimator, param_grid, opt_selection_method=request.param
 84 |     )
 85 |     return grid_searcher
 86 | 
 87 | 
 88 | @pytest.fixture(scope="module", params=ALL_CRITERION)
 89 | def line_search(estimator, param_grid, request):
 90 |     # Multi-grids not supported in line search mode.
 91 |     param_grid_lines = sorted((key, values) for key, values in param_grid[0].items())
 92 |     line_searcher = LineSearchCV(
 93 |         estimator,
 94 |         param_grid_lines,
 95 |         opt_selection_method=request.param,
 96 |         n_iter=3,
 97 |     )
 98 |     return line_searcher
 99 | 
100 | 
101 | def test_grid_search(random_energy_model, grid_search):
102 |     femat, energies, _ = random_energy_model
103 |     n_samples, n_features = femat.shape
104 |     grid_search.fit(X=femat, y=energies)
105 |     assert "best_params_" in vars(grid_search)
106 |     best_params = grid_search.best_params_
107 |     assert "alpha" in best_params and "eta" in best_params
108 |     assert best_params["alpha"] in [0.01, 0.1, 0.02, 0.2]
109 |     assert best_params["eta"] in [0.03, 0.3, 0.04, 0.4]
110 | 
111 |     assert grid_search.best_score_ <= 1
112 |     assert "coef_" in vars(grid_search.best_estimator_)
113 |     assert "intercept_" in vars(grid_search.best_estimator_)
114 |     energies_pred = grid_search.predict(femat)
115 |     rmse = np.sum((energies - energies_pred) ** 2) / len(energies)
116 |     # Overfit.
117 |     if n_samples < n_features:
118 |         assert -grid_search.best_score_ >= rmse
119 | 
120 | 
121 | # Guarantees that one-std rule always select larger params than max score.
122 | def test_onestd():
123 |     success = 0
124 |     for _ in range(10):
125 |         X, y, coef = make_regression(
126 |             n_samples=200,
127 |             n_features=100,
128 |             n_informative=10,
129 |             noise=40.0,
130 |             bias=-15.0,
131 |             coef=True,
132 |             random_state=0,
133 |         )
134 | 
135 |         X_train, X_test, y_train, y_test = train_test_split(
136 |             X, y, test_size=0.25, random_state=0
137 |         )
138 | 
139 |         # create estimators
140 |         lasso = Lasso(fit_intercept=True)
141 | 
142 |         # create cv search objects for each estimator
143 |         cv5 = KFold(n_splits=5, shuffle=True, random_state=0)
144 |         params = {"alpha": np.logspace(-1, 1, 10)}
145 | 
146 |         lasso_cv_std = GridSearchCV(
147 |             lasso, params, opt_selection_method="one_std_score", cv=cv5, n_jobs=-1
148 |         )
149 |         lasso_cv_opt = GridSearchCV(
150 |             lasso, params, opt_selection_method="max_score", cv=cv5, n_jobs=-1
151 |         )
152 | 
153 |         # fit models on training data
154 |         lasso_cv_std.fit(X_train, y_train)
155 |         lasso_cv_opt.fit(X_train, y_train)
156 | 
157 |         correct_params = (
158 |             lasso_cv_opt.best_params_["alpha"] <= lasso_cv_std.best_params_["alpha"]
159 |         )
160 |         sparsity_opt = np.sum(np.abs(lasso_cv_opt.best_estimator_.coef_) >= 1e-6)
161 |         sparsity_std = np.sum(np.abs(lasso_cv_std.best_estimator_.coef_) >= 1e-6)
162 | 
163 |         if correct_params and sparsity_opt >= sparsity_std:
164 |             success += 1
165 | 
166 |     # Allow some failure caused by randomness of CV splits.
167 |     assert success >= 8
168 | 
169 | 
170 | def test_line_search(random_energy_model, line_search):
171 |     femat, energies, _ = random_energy_model
172 |     n_samples, n_features = femat.shape
173 |     line_search.fit(X=femat, y=energies)
174 |     assert "best_params_" in vars(line_search)
175 |     best_params = line_search.best_params_
176 |     assert "alpha" in best_params and "eta" in best_params
177 |     assert best_params["alpha"] in [0.01, 0.1]
178 |     assert best_params["eta"] in [0.03, 0.3]
179 | 
180 |     assert line_search.best_score_ <= 1
181 |     assert "coef_" in vars(line_search.best_estimator_)
182 |     assert "intercept_" in vars(line_search.best_estimator_)
183 |     energies_pred = line_search.predict(femat)
184 |     rmse = np.sum((energies - energies_pred) ** 2) / len(energies)
185 |     # Overfit.
186 |     if n_samples < n_features:
187 |         assert -line_search.best_score_ >= rmse
188 | 


--------------------------------------------------------------------------------
/tests/test_ols.py:
--------------------------------------------------------------------------------
 1 | """Sanity checks: literally just copied from sklearn tests... """
 2 | 
 3 | import numpy as np
 4 | import numpy.testing as npt
 5 | import pytest
 6 | from sklearn.preprocessing import add_dummy_feature
 7 | 
 8 | from sparselm.model import OrdinaryLeastSquares
 9 | 
10 | 
11 | def test_linear_regression():
12 |     # Test OrdinaryLeastSquares on a simple dataset.
13 |     # a simple dataset
14 |     X = [[1], [2]]
15 |     Y = [1, 2]
16 | 
17 |     reg = OrdinaryLeastSquares()
18 |     reg.fit(X, Y)
19 | 
20 |     npt.assert_array_almost_equal(reg.coef_, [1])
21 |     npt.assert_array_almost_equal(reg.intercept_, [0])
22 |     npt.assert_array_almost_equal(reg.predict(X), [1, 2])
23 | 
24 |     # test it also for degenerate input
25 |     X = [[1]]
26 |     Y = [0]
27 | 
28 |     reg = OrdinaryLeastSquares()
29 |     reg.fit(X, Y)
30 |     npt.assert_array_almost_equal(reg.coef_, [0])
31 |     npt.assert_array_almost_equal(reg.intercept_, [0])
32 |     npt.assert_array_almost_equal(reg.predict(X), [0])
33 | 
34 | 
35 | @pytest.mark.parametrize("fit_intercept", [True, False])
36 | def test_linear_regression_sample_weights(fit_intercept, rng):
37 |     # It would not work with under-determined systems
38 |     n_samples, n_features = 10, 8
39 | 
40 |     X = rng.normal(size=(n_samples, n_features))
41 |     y = rng.normal(size=n_samples)
42 | 
43 |     sample_weight = 1.0 + rng.uniform(size=n_samples)
44 | 
45 |     # OLS with explicit sample_weight
46 |     reg = OrdinaryLeastSquares(fit_intercept=fit_intercept)
47 |     reg.fit(X, y, sample_weight=sample_weight)
48 |     coefs1 = reg.coef_
49 |     inter1 = reg.intercept_
50 | 
51 |     assert reg.coef_.shape == (X.shape[1],)  # sanity checks
52 | 
53 |     # Closed form of the weighted least square
54 |     # theta = (X^T W X)^(-1) @ X^T W y
55 |     W = np.diag(sample_weight)
56 |     X_aug = X if not fit_intercept else add_dummy_feature(X)
57 | 
58 |     Xw = X_aug.T @ W @ X_aug
59 |     yw = X_aug.T @ W @ y
60 |     coefs2 = np.linalg.solve(Xw, yw)
61 | 
62 |     if not fit_intercept:
63 |         npt.assert_allclose(coefs1, coefs2)
64 |     else:
65 |         npt.assert_allclose(coefs1, coefs2[1:])
66 |         npt.assert_allclose(inter1, coefs2[0])
67 | 
68 | 
69 | def test_fit_intercept():
70 |     # Test assertions on betas shape.
71 |     X2 = np.array([[0.38349978, 0.61650022], [0.58853682, 0.41146318]])
72 |     X3 = np.array(
73 |         [
74 |             [0.27677969, 0.70693172, 0.01628859],
75 |             [0.08385139, 0.20692515, 0.70922346],
76 |         ]
77 |     )
78 |     y = np.array([1, 1])
79 | 
80 |     lr2_without_intercept = OrdinaryLeastSquares(fit_intercept=False).fit(X2, y)
81 |     lr2_with_intercept = OrdinaryLeastSquares().fit(X2, y)
82 | 
83 |     lr3_without_intercept = OrdinaryLeastSquares(fit_intercept=False).fit(X3, y)
84 |     lr3_with_intercept = OrdinaryLeastSquares().fit(X3, y)
85 | 
86 |     assert lr2_with_intercept.coef_.shape == lr2_without_intercept.coef_.shape
87 |     assert lr3_with_intercept.coef_.shape == lr3_without_intercept.coef_.shape
88 |     assert lr2_without_intercept.coef_.ndim == lr3_without_intercept.coef_.ndim
89 | 


--------------------------------------------------------------------------------
/tests/test_stepwise.py:
--------------------------------------------------------------------------------
  1 | """Test composite estimator class."""
  2 | 
  3 | import numpy as np
  4 | import numpy.testing as npt
  5 | import pytest
  6 | from sklearn.base import clone
  7 | from sklearn.utils._param_validation import InvalidParameterError
  8 | 
  9 | from sparselm.model import L2L0, Lasso
 10 | from sparselm.model_selection import GridSearchCV
 11 | from sparselm.stepwise import StepwiseEstimator
 12 | 
 13 | 
 14 | def test_make_composite():
 15 |     # Test making a composite estimator.
 16 |     lasso1 = Lasso(fit_intercept=True, alpha=1.0)
 17 |     lasso2 = Lasso(fit_intercept=False, alpha=2.0)
 18 |     l2l0 = L2L0(groups=[0, 0, 1, 2], alpha=0.1, eta=4.0)
 19 |     steps = [("lasso1", lasso1), ("lasso2", lasso2), ("l2l0", l2l0)]
 20 | 
 21 |     scope1 = [0, 1, 8]
 22 |     scope2 = [2, 3]
 23 |     scope3 = [4, 5, 6, 7]
 24 |     estimator = StepwiseEstimator(steps, [scope1, scope2, scope3])
 25 |     # sklearn convention tests, need pandas.
 26 |     # Currently, not passing because conventional sklearn estimator should not have
 27 |     # fixed number of features.
 28 |     # check_estimator(estimator)
 29 |     assert estimator.steps[0][1].fit_intercept
 30 |     assert not estimator.steps[1][1].fit_intercept
 31 |     assert not estimator.steps[2][1].fit_intercept
 32 | 
 33 |     # check parameters. Nested estimator case not tested yet.
 34 |     params = estimator.get_params(deep=True)
 35 |     assert params["lasso1"].get_params(deep=True)["alpha"] == 1.0
 36 |     assert params["lasso2"].get_params(deep=True)["alpha"] == 2.0
 37 |     assert params["l2l0"].get_params(deep=True)["alpha"] == 0.1
 38 |     assert params["l2l0"].get_params(deep=True)["eta"] == 4.0
 39 |     assert params["lasso1__alpha"] == 1.0
 40 |     assert params["lasso2__alpha"] == 2.0
 41 |     assert params["l2l0__alpha"] == 0.1
 42 |     assert params["l2l0__eta"] == 4.0
 43 | 
 44 |     estimator.set_params(lasso2__alpha=0.5, l2l0__alpha=0.2, l2l0__eta=3.0)
 45 |     params = estimator.get_params(deep=True)
 46 |     assert params["lasso1"].get_params(deep=True)["alpha"] == 1.0
 47 |     assert params["lasso2"].get_params(deep=True)["alpha"] == 0.5
 48 |     assert params["l2l0"].get_params(deep=True)["alpha"] == 0.2
 49 |     assert params["l2l0"].get_params(deep=True)["eta"] == 3.0
 50 |     assert params["lasso1__alpha"] == 1.0
 51 |     assert params["lasso2__alpha"] == 0.5
 52 |     assert params["l2l0__alpha"] == 0.2
 53 |     assert params["l2l0__eta"] == 3.0
 54 | 
 55 |     # Test unsafe clone, such that composite can be used in the optimizers.
 56 |     # Currently, have to mute sanity check from origianl sklearn clone.
 57 |     cloned = clone(estimator)
 58 |     params = cloned.get_params(deep=True)
 59 |     assert params["lasso1"].get_params(deep=True)["alpha"] == 1.0
 60 |     assert params["lasso2"].get_params(deep=True)["alpha"] == 0.5
 61 |     assert params["l2l0"].get_params(deep=True)["alpha"] == 0.2
 62 |     assert params["l2l0"].get_params(deep=True)["eta"] == 3.0
 63 |     assert params["lasso1__alpha"] == 1.0
 64 |     assert params["lasso2__alpha"] == 0.5
 65 |     assert params["l2l0__alpha"] == 0.2
 66 |     assert params["l2l0__eta"] == 3.0
 67 | 
 68 |     # A searcher can also be put into stepwise.
 69 |     grid = GridSearchCV(lasso2, {"alpha": [0.01, 0.1, 1.0]})
 70 |     steps = [("lasso1", lasso1), ("lasso2", grid), ("l2l0", l2l0)]
 71 |     estimator = StepwiseEstimator(steps, [scope1, scope2, scope3])
 72 |     # check_estimator(estimator)
 73 |     params = estimator.get_params(deep=True)
 74 |     assert params["lasso1__alpha"] == 1.0
 75 |     assert params["l2l0__alpha"] == 0.2
 76 |     assert params["l2l0__eta"] == 3.0
 77 |     assert "lasso2__alpha" not in params
 78 |     assert params["lasso2__estimator__alpha"] == 0.5
 79 | 
 80 | 
 81 | def test_toy_composite():
 82 |     lasso1 = Lasso(fit_intercept=True, alpha=1e-6)
 83 |     lasso2 = Lasso(fit_intercept=False, alpha=1e-6)
 84 |     grid = GridSearchCV(clone(lasso2), {"alpha": [1e-8, 1e-7, 1e-6]})
 85 |     bad_lasso2 = Lasso(fit_intercept=True, alpha=1e-6)
 86 |     l2l0 = L2L0(groups=[0, 0, 1, 2], alpha=0, eta=1e-9)
 87 |     steps = [("lasso1", lasso1), ("lasso2", lasso2), ("l2l0", l2l0)]
 88 |     steps2 = [("lasso1", clone(lasso1)), ("lasso2", grid), ("l2l0", clone(l2l0))]
 89 |     bad_steps = [("lasso1", lasso1), ("lasso2", bad_lasso2), ("l2l0", l2l0)]
 90 | 
 91 |     scope1 = [0, 1, 8]
 92 |     scope2 = [2, 3]
 93 |     scope3 = [4, 5, 6, 7]
 94 |     estimator = StepwiseEstimator(steps, [scope1, scope2, scope3])
 95 |     # Use grid search on lasso2.
 96 |     estimator2 = StepwiseEstimator(steps2, [scope1, scope2, scope3])
 97 | 
 98 |     bad_scope1 = [0, 1]
 99 |     bad_scope2 = [3, 4]
100 |     bad_scope3 = [5, 6, 7, 8]
101 |     bad_estimator1 = StepwiseEstimator(steps, [bad_scope1, bad_scope2, bad_scope3])
102 |     bad_estimator2 = StepwiseEstimator(bad_steps, [scope1, scope2, scope3])
103 | 
104 |     w_test = np.random.normal(scale=2, size=9) * 0.2
105 |     w_test[0] = 10
106 |     w_test[-1] = 0.5
107 |     # A bad feature matrix with too many features.
108 |     bad_X = np.random.random(size=(20, 12))
109 |     bad_X[:, 0] = 1
110 |     with pytest.raises(ValueError):
111 |         estimator.fit(bad_X, np.random.random(size=20))
112 |     X = np.random.random(size=(20, 9))
113 |     X[:, 0] = 1
114 |     X[:, -1] = -8 * np.random.random(size=20)
115 |     y = np.dot(X, w_test) + np.random.normal(scale=0.01, size=20)
116 | 
117 |     # Bad scopes.
118 |     with pytest.raises(InvalidParameterError):
119 |         bad_estimator1.fit(X, y)
120 |     # Allow fit intercept in beyond the first estimator.
121 |     with pytest.raises(InvalidParameterError):
122 |         bad_estimator2.fit(X, y)
123 |     # A correct estimator.
124 | 
125 |     def run_estimator_test(estimator_test):
126 |         estimator_test.fit(X, y)
127 |         # print("intercept:", estimator_test.intercept_)
128 |         # print("coef:", estimator_test.coef_)
129 | 
130 |         assert estimator_test.intercept_ == estimator_test.steps[0][1].intercept_
131 |         assert not np.any(np.isnan(estimator_test.coef_))
132 | 
133 |         assert not np.isclose(estimator_test.intercept_, 0)
134 | 
135 |         for (_, sub), scope in zip(
136 |             estimator_test.steps, estimator_test.estimator_feature_indices
137 |         ):
138 |             if hasattr(sub, "estimator"):
139 |                 sub_coef = sub.best_estimator_.coef_
140 |             else:
141 |                 sub_coef = sub.coef_
142 |             npt.assert_array_almost_equal(sub_coef, estimator_test.coef_[scope])
143 |         coef_1 = estimator_test.coef_.copy()
144 |         intercept_1 = estimator_test.intercept_
145 | 
146 |         # Now do not fit intercept.
147 |         estimator_test.steps[0][1].fit_intercept = False
148 |         estimator_test.fit(X, y)
149 |         coef_2 = estimator_test.coef_.copy()
150 |         intercept_2 = estimator_test.intercept_
151 |         assert np.isclose(intercept_2, 0)
152 | 
153 |         # Do some naive assertion on the fitted coefficients.
154 |         assert abs(coef_1[0] + intercept_1 - 10) / 10 <= 0.1
155 |         assert abs(coef_2[0] - 10) / 10 <= 0.1
156 |         # assert np.linalg.norm(coef_2 - w_test) / np.linalg.norm(w_test) <= 0.4
157 | 
158 |         total_y = np.zeros(len(y))
159 |         for (_, sub_estimator_test), sub_scope in zip(
160 |             estimator_test.steps, estimator_test.estimator_feature_indices
161 |         ):
162 |             total_y += sub_estimator_test.predict(X[:, sub_scope])
163 |         npt.assert_array_almost_equal(estimator_test.predict(X), total_y)
164 |         npt.assert_array_almost_equal(
165 |             np.dot(X, estimator_test.coef_) + estimator_test.intercept_, total_y
166 |         )
167 | 
168 |     # Either estimators should be able to work.
169 |     run_estimator_test(estimator)
170 |     run_estimator_test(estimator2)
171 | 


--------------------------------------------------------------------------------
/tests/test_tools.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | from functools import partial
  3 | 
  4 | import numpy.testing as npt
  5 | import pytest
  6 | 
  7 | from sparselm.model import OrdinaryLeastSquares
  8 | from sparselm.tools import constrain_coefficients
  9 | 
 10 | 
 11 | @pytest.mark.parametrize("test_number", range(5))  # run the test 5 times
 12 | def test_constrain_coefficients(test_number, rng):
 13 |     n_samples, n_features = 10, 8
 14 |     X = rng.normal(size=(n_samples, n_features))
 15 |     y = rng.normal(size=n_samples)
 16 |     reg = OrdinaryLeastSquares(fit_intercept=True)
 17 |     reg.fit(X, y)
 18 |     coefs = reg.coef_
 19 | 
 20 |     def fit(X, y, reg):
 21 |         reg.fit(X, y)
 22 |         return reg.coef_
 23 | 
 24 |     # Test uniform low and high values
 25 |     inds = rng.choice(n_features, size=3, replace=False)
 26 | 
 27 |     with warnings.catch_warnings(record=True) as w:
 28 |         cstr_coefs = constrain_coefficients(inds, 2, 0)(partial(fit, reg=reg))(X, y)
 29 | 
 30 |     assert cstr_coefs.shape == coefs.shape
 31 | 
 32 |     # Check if warning was raised, meaning coefficients were not within range
 33 |     # in that case just test that the indeed that warning was raised.
 34 |     if len(w) > 0:
 35 |         with pytest.warns(RuntimeWarning):
 36 |             cstr_coefs = constrain_coefficients(inds, 2, 0)(partial(fit, reg=reg))(X, y)
 37 |     else:
 38 |         for i in inds:
 39 |             assert 0 <= cstr_coefs[i] <= 2
 40 | 
 41 |     @constrain_coefficients(inds, 2, 0)
 42 |     def fit_constrained1(X, y, reg):
 43 |         reg.fit(X, y)
 44 |         return reg.coef_
 45 | 
 46 |     cstr_coefs2 = fit_constrained1(X, y, reg=reg)
 47 |     npt.assert_almost_equal(cstr_coefs, cstr_coefs2)
 48 | 
 49 |     # Test different low and high values
 50 |     low = rng.random(size=3) - 0.5
 51 |     high = rng.random(size=3) + low
 52 | 
 53 |     with warnings.catch_warnings(record=True) as w:
 54 |         cstr_coefs = constrain_coefficients(inds, high, low)(partial(fit, reg=reg))(
 55 |             X, y
 56 |         )
 57 | 
 58 |     assert cstr_coefs.shape == coefs.shape
 59 | 
 60 |     # Check if warning was raised, meaning coefficients were not within range
 61 |     # in that case just test that the indeed that warning was raised.
 62 |     if len(w) > 0:
 63 |         with pytest.warns(RuntimeWarning):
 64 |             cstr_coefs = constrain_coefficients(inds, high, low)(partial(fit, reg=reg))(
 65 |                 X, y
 66 |             )
 67 |     else:
 68 |         for i, l, h in zip(inds, low, high):
 69 |             assert l <= cstr_coefs[i] <= h
 70 | 
 71 |     @constrain_coefficients(inds, high, low)
 72 |     def fit_constrained2(X, y, reg):
 73 |         reg.fit(X, y)
 74 |         return reg.coef_
 75 | 
 76 |     cstr_coefs2 = fit_constrained2(X, y, reg=reg)
 77 |     npt.assert_almost_equal(cstr_coefs, cstr_coefs2)
 78 | 
 79 |     # just use high value
 80 |     with warnings.catch_warnings(record=True) as w:
 81 |         cstr_coefs = constrain_coefficients(inds, high=high)(partial(fit, reg=reg))(
 82 |             X, y
 83 |         )
 84 | 
 85 |     assert cstr_coefs.shape == coefs.shape
 86 | 
 87 |     # Check if warning was raised, meaning coefficients were not within range
 88 |     # in that case just test that the indeed that warning was raised.
 89 |     if len(w) > 0:
 90 |         with pytest.warns(RuntimeWarning):
 91 |             cstr_coefs = constrain_coefficients(inds, high=high)(partial(fit, reg=reg))(
 92 |                 X, y
 93 |             )
 94 |     else:
 95 |         for i, h in zip(inds, high):
 96 |             assert cstr_coefs[i] <= h
 97 | 
 98 |     # just use low value
 99 |     with warnings.catch_warnings(record=True) as w:
100 |         cstr_coefs = constrain_coefficients(inds, low=low)(partial(fit, reg=reg))(X, y)
101 | 
102 |     assert cstr_coefs.shape == coefs.shape
103 | 
104 |     # Check if warning was raised, meaning coefficients were not within range
105 |     # in that case just test that the indeed that warning was raised.
106 |     if len(w) > 0:
107 |         with pytest.warns(RuntimeWarning):
108 |             cstr_coefs = constrain_coefficients(inds, low=low)(partial(fit, reg=reg))(
109 |                 X, y
110 |             )
111 |     else:
112 |         for i, l in zip(inds, low):
113 |             assert l <= cstr_coefs[i]
114 | 
115 | 
116 | # TODO write this test
117 | def test_r2_score_to_cv_error():
118 |     pass
119 | 


--------------------------------------------------------------------------------