├── .gitattributes ├── MANIFEST.in ├── examples ├── README.rst ├── kernel_ridge │ ├── README.rst │ ├── plot_kernel_ridge.py │ ├── plot_kernel_ridge_cv.py │ └── plot_model_on_gpu.py.py └── multiple_kernel_ridge │ ├── README.rst │ ├── plot_mkr_3_path.py │ ├── plot_mkr_5_refine_results.py │ ├── plot_mkr_4_refit_from_deltas.py │ ├── plot_mkr_1_sklearn_api.py │ └── plot_mkr_0_random_search.py ├── setup.cfg ├── doc ├── getting_started.rst ├── README.md ├── index.rst ├── static │ ├── custom.css │ ├── logo.py │ └── logo.svg ├── Makefile ├── flowchart.rst ├── troubleshooting.rst ├── api.rst ├── changelog.rst ├── conf.py └── models.rst ├── .codespellrc ├── .gitignore ├── himalaya ├── __init__.py ├── lasso │ ├── __init__.py │ ├── tests │ │ ├── test_sklearn_api_lasso.py │ │ └── test_group_lasso.py │ └── _sklearn_api.py ├── backend │ ├── __init__.py │ ├── tests │ │ ├── test_backend_utils.py │ │ └── test_backends.py │ ├── torch_cuda.py │ ├── _utils.py │ ├── numpy.py │ └── cupy.py ├── tests │ ├── test_viz.py │ ├── test_progress_bar.py │ ├── test_utils.py │ └── test_validation.py ├── ridge │ ├── __init__.py │ ├── tests │ │ ├── test_column.py │ │ ├── test_random_search_ridge.py │ │ └── test_solvers_ridge.py │ └── _solvers.py ├── viz.py ├── kernel_ridge │ ├── tests │ │ ├── test_predictions.py │ │ ├── test_input_arrays.py │ │ ├── test_force_cpu.py │ │ └── test_random_search_kernel.py │ ├── __init__.py │ └── _predictions.py ├── progress_bar.py └── utils.py ├── .github ├── workflows │ ├── codespell.yml │ ├── build_docs.yml │ ├── deploy_pypi.yml │ └── run_tests.yml └── dependabot.yml ├── .codecov.yml ├── LICENSE.md ├── setup.py └── README.rst /.gitattributes: -------------------------------------------------------------------------------- 1 | *.svg -diff 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst 2 | include LICENSE.md 3 | -------------------------------------------------------------------------------- /examples/README.rst: -------------------------------------------------------------------------------- 1 | Gallery of examples 2 | =================== 3 | -------------------------------------------------------------------------------- /examples/kernel_ridge/README.rst: -------------------------------------------------------------------------------- 1 | Kernel ridge 2 | ------------ 3 | -------------------------------------------------------------------------------- /examples/multiple_kernel_ridge/README.rst: -------------------------------------------------------------------------------- 1 | Multiple-kernel ridge 2 | --------------------- 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description_file = README.rst 3 | license_files = LICENSE.md 4 | -------------------------------------------------------------------------------- /doc/getting_started.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | Getting started 3 | =============== 4 | 5 | .. include:: ../README.rst 6 | -------------------------------------------------------------------------------- /.codespellrc: -------------------------------------------------------------------------------- 1 | [codespell] 2 | skip = .git,*.pdf,*.svg,*.css,.codespellrc 3 | check-hidden = true 4 | # ignore-regex = 5 | ignore-words-list = fro 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.egg-info/ 3 | *.so 4 | 5 | .vscode 6 | .pytest_cache 7 | __pycache__ 8 | .coverage 9 | .idea 10 | htmlcov/ 11 | build/ 12 | dist/ 13 | 14 | # Documentation build 15 | doc/_build/ 16 | doc/_auto_examples/ 17 | doc/_generated/ 18 | -------------------------------------------------------------------------------- /himalaya/__init__.py: -------------------------------------------------------------------------------- 1 | from . import backend, kernel_ridge, lasso, progress_bar, ridge, scoring, utils, viz 2 | 3 | __version__ = '0.4.8' 4 | 5 | __all__ = [ 6 | backend, 7 | kernel_ridge, 8 | lasso, 9 | ridge, 10 | progress_bar, 11 | scoring, 12 | utils, 13 | viz, 14 | ] 15 | -------------------------------------------------------------------------------- /himalaya/lasso/__init__.py: -------------------------------------------------------------------------------- 1 | from ._group_lasso import solve_sparse_group_lasso 2 | from ._group_lasso import solve_sparse_group_lasso_cv 3 | from ._sklearn_api import SparseGroupLassoCV 4 | 5 | __all__ = [ 6 | "solve_sparse_group_lasso", 7 | "solve_sparse_group_lasso_cv", 8 | "SparseGroupLassoCV", 9 | ] 10 | -------------------------------------------------------------------------------- /himalaya/backend/__init__.py: -------------------------------------------------------------------------------- 1 | from ._utils import ALL_BACKENDS 2 | from ._utils import CURRENT_BACKEND 3 | from ._utils import set_backend 4 | from ._utils import get_backend 5 | from ._utils import force_cpu_backend 6 | 7 | __all__ = [ 8 | "ALL_BACKENDS", 9 | "CURRENT_BACKEND", 10 | "set_backend", 11 | "get_backend", 12 | "force_cpu_backend", 13 | ] 14 | -------------------------------------------------------------------------------- /doc/README.md: -------------------------------------------------------------------------------- 1 | # Himalaya website 2 | 3 | ## Requirements 4 | 5 | ``` 6 | numpydoc 7 | sphinx 8 | sphinx_gallery 9 | sphinxcontrib-mermaid 10 | ``` 11 | 12 | ## Build the website 13 | 14 | ```bash 15 | make html 16 | # ignore "WARNING: autosummary: stub file not found ..." 17 | firefox _build/html/index.html 18 | ``` 19 | 20 | ## Push the website 21 | 22 | ```bash 23 | make push-pages 24 | ``` 25 | -------------------------------------------------------------------------------- /himalaya/tests/test_viz.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from himalaya.viz import plot_alphas_diagnostic 4 | 5 | 6 | def test_smoke_viz(): 7 | alphas = np.logspace(0, 5, 6) 8 | best_alphas = np.random.choice(np.logspace(0, 5, 6), 10) 9 | plot_alphas_diagnostic(best_alphas, alphas, ax=None) 10 | plot_alphas_diagnostic(best_alphas, alphas, ax=plt.gca()) 11 | -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | Himalaya 2 | ======== 3 | 4 | Welcome to ``himalaya``'s documentation website. 5 | 6 | Documentation 7 | ------------- 8 | .. toctree:: 9 | :maxdepth: 1 10 | 11 | getting_started 12 | models 13 | flowchart 14 | troubleshooting 15 | _auto_examples/index 16 | 17 | 18 | Package details 19 | --------------- 20 | .. toctree:: 21 | :maxdepth: 1 22 | 23 | api 24 | changelog 25 | -------------------------------------------------------------------------------- /.github/workflows/codespell.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Codespell 3 | 4 | on: 5 | push: 6 | branches: [main] 7 | pull_request: 8 | branches: [main] 9 | 10 | permissions: 11 | contents: read 12 | 13 | jobs: 14 | codespell: 15 | name: Check for spelling errors 16 | runs-on: ubuntu-latest 17 | 18 | steps: 19 | - name: Checkout 20 | uses: actions/checkout@v6 21 | - name: Codespell 22 | uses: codespell-project/actions-codespell@v2 23 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "github-actions" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "weekly" 12 | -------------------------------------------------------------------------------- /himalaya/tests/test_progress_bar.py: -------------------------------------------------------------------------------- 1 | from himalaya.progress_bar import bar 2 | from himalaya.progress_bar import ProgressBar 3 | 4 | 5 | def test_progress_bar(): 6 | # simple smoke test 7 | for ii in bar(range(10), title="La barre"): 8 | pass 9 | 10 | bar_ = ProgressBar(title="La barre", max_value=10, initial_value=0, 11 | max_chars=40, progress_character='.', spinner=False, 12 | verbose_bool=True) 13 | for ii in bar_(range(10)): 14 | pass 15 | 16 | bar_ = ProgressBar(max_value=10, title="La barre") 17 | for ii in range(10): 18 | bar_.update_with_increment_value(1) 19 | bar_.close() 20 | -------------------------------------------------------------------------------- /doc/static/custom.css: -------------------------------------------------------------------------------- 1 | .sphx-glr-thumbcontainer { 2 | min-height: 230px !important; /*default = 230 */ 3 | margin: 5px !important; /*default = 0 ? */ 4 | } 5 | .sphx-glr-thumbcontainer .figure { 6 | width: 210px !important; /*default = 160 */ 7 | } 8 | .sphx-glr-thumbcontainer img { 9 | max-height: 112px !important; /*default = 112 */ 10 | max-width: 210px !important; /*default = 160 */ 11 | } 12 | .sphx-glr-thumbcontainer a.internal { 13 | padding: 150px 10px 0 !important; /*default = 150px 10px 0 */ 14 | } 15 | div.sphinxsidebar { 16 | max-height: 100%; 17 | overflow-y: auto; 18 | } 19 | div.sphx-glr-download a{ 20 | background-image: none; 21 | background-color: rgb(238, 238, 238); 22 | border-color: rgb(204, 204, 204); 23 | } 24 | -------------------------------------------------------------------------------- /himalaya/ridge/__init__.py: -------------------------------------------------------------------------------- 1 | from ._column import ColumnTransformerNoStack 2 | from ._column import make_column_transformer_no_stack 3 | from ._random_search import solve_group_ridge_random_search 4 | from ._random_search import solve_ridge_cv_svd 5 | from ._random_search import GROUP_RIDGE_SOLVERS 6 | from ._solvers import solve_ridge_svd 7 | from ._solvers import RIDGE_SOLVERS 8 | from ._sklearn_api import Ridge 9 | from ._sklearn_api import RidgeCV 10 | from ._sklearn_api import GroupRidgeCV 11 | 12 | # alternative names 13 | BandedRidgeCV = GroupRidgeCV 14 | solve_banded_ridge_random_search = solve_group_ridge_random_search 15 | BANDED_RIDGE_SOLVERS = GROUP_RIDGE_SOLVERS 16 | 17 | __all__ = [ 18 | # column transformers 19 | "ColumnTransformerNoStack", 20 | "make_column_transformer_no_stack", 21 | # group ridge solvers 22 | "solve_group_ridge_random_search", 23 | "GROUP_RIDGE_SOLVERS", 24 | # ridge solvers 25 | "solve_ridge_svd", 26 | "solve_ridge_cv_svd", 27 | "RIDGE_SOLVERS", 28 | # sklearn API 29 | "Ridge", 30 | "RidgeCV", 31 | "GroupRidgeCV", 32 | ] 33 | -------------------------------------------------------------------------------- /.github/workflows/build_docs.yml: -------------------------------------------------------------------------------- 1 | name: Build docs 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | 9 | jobs: 10 | build-docs: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v6 14 | 15 | - name: Set up Python 16 | uses: actions/setup-python@v6 17 | with: 18 | python-version: 3.9 19 | 20 | - uses: actions/cache@v5 21 | with: 22 | path: ~/.cache/pip 23 | key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} 24 | restore-keys: | 25 | ${{ runner.os }}-pip- 26 | 27 | - name: Install dependencies 28 | run: | 29 | pip install -e ."[github]" 30 | pip install numpydoc sphinx sphinx_gallery sphinxcontrib-mermaid 31 | 32 | - name: Build documents 33 | run: | 34 | cd doc && make html && cd .. 35 | touch doc/_build/html/.nojekyll 36 | 37 | - name: Publish to gh-pages if tagged 38 | if: startsWith(github.ref, 'refs/tags') 39 | uses: JamesIves/github-pages-deploy-action@v4.7.6 40 | with: 41 | branch: gh-pages 42 | folder: doc/_build/html 43 | -------------------------------------------------------------------------------- /himalaya/viz.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def plot_alphas_diagnostic(best_alphas, alphas, ax=None): 5 | """Plot a diagnostic plot for the selected alphas during cross-validation. 6 | 7 | To figure out whether to increase the range of alphas. 8 | 9 | Parameters 10 | ---------- 11 | best_alphas : array of shape (n_targets, ) 12 | Alphas selected during cross-validation for each target. 13 | alphas : array of shape (n_alphas) 14 | Alphas used while fitting the model. 15 | ax : None or figure axis 16 | 17 | Returns 18 | ------- 19 | ax : figure axis 20 | """ 21 | import matplotlib.pyplot as plt 22 | alphas = np.sort(alphas) 23 | n_alphas = len(alphas) 24 | indices = np.argmin(np.abs(best_alphas[None] - alphas[:, None]), 0) 25 | hist = np.bincount(indices, minlength=n_alphas) 26 | 27 | if ax is None: 28 | fig, ax = plt.subplots(1, 1) 29 | 30 | log10alphas = np.log(alphas) / np.log(10) 31 | ax.plot(log10alphas, hist, '.-', markersize=12) 32 | ax.set_ylabel('Number of targets') 33 | ax.set_xlabel('log10(alpha)') 34 | ax.grid("on") 35 | return ax 36 | -------------------------------------------------------------------------------- /.codecov.yml: -------------------------------------------------------------------------------- 1 | # For more configuration details: 2 | # https://docs.codecov.io/docs/codecov-yaml 3 | 4 | # Check if this file is valid by running in bash: 5 | # curl -X POST --data-binary @.codecov.yml https://codecov.io/validate 6 | 7 | # Coverage configuration 8 | # ---------------------- 9 | coverage: 10 | status: 11 | project: 12 | default: 13 | threshold: 1% # complain if change in codecoverage is greater than 1% 14 | patch: false 15 | range: 70..90 # First number represents red, and second represents green 16 | # (default is 70..100) 17 | round: down # up, down, or nearest 18 | precision: 2 # Number of decimal places, between 0 and 5 19 | 20 | 21 | # Ignoring Paths 22 | # -------------- 23 | # which folders/files to ignore 24 | ignore: 25 | - setup.py 26 | # GPU not available on github-actions 27 | - himalaya/backend/cupy.py 28 | - himalaya/backend/torch_cuda.py 29 | 30 | 31 | # Pull request comments: 32 | # ---------------------- 33 | # Diff is the Coverage Diff of the pull request. 34 | # Files are the files impacted by the pull request 35 | comment: false 36 | # layout: diff, files # accepted in any order: reach, diff, flags, and/or files -------------------------------------------------------------------------------- /doc/static/logo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.decomposition import PCA 3 | 4 | import matplotlib.pyplot as plt 5 | from matplotlib.patches import Polygon 6 | 7 | from himalaya.kernel_ridge import generate_dirichlet_samples 8 | kernel_weights = generate_dirichlet_samples(10000, n_kernels=3, 9 | concentration=[1.], random_state=0) 10 | pca = PCA(2).fit(kernel_weights) 11 | 12 | darkgreen = "#446455" 13 | white = "white" 14 | 15 | 16 | def plot_simplex(bias=(0, 0), ax=None): 17 | corners = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) 18 | corners = pca.transform(corners).T 19 | 20 | if ax is None: 21 | plt.figure(figsize=(2, 2)) 22 | ax = plt.gca() 23 | 24 | # Faces 25 | ax.add_patch( 26 | Polygon(corners[:2].T + bias, closed=True, edgecolor=None, fill=True, 27 | facecolor=white, alpha=0.6)) 28 | # Edges 29 | ax.add_patch( 30 | Polygon(corners[:2].T + bias, closed=True, edgecolor=darkgreen, 31 | fill=False, alpha=1, linewidth=2)) 32 | 33 | ax.axis('equal') 34 | ax.axis('off') 35 | return ax 36 | 37 | 38 | fig, ax = plt.subplots(figsize=(2, 2)) 39 | 40 | bias = [0.4, 0.04] 41 | for factor in np.linspace(2, 0, 3): 42 | bias_ = np.array(bias) * factor 43 | plot_simplex(bias_, ax=ax) 44 | 45 | ax.text(-0.4, -1, "himalaya", fontsize=20, color=darkgreen) 46 | fig.savefig("logo.svg", bbox_inches='tight', pad_inches=0) 47 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = Tutorials 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | 22 | clean: 23 | rm -rf $(BUILDDIR)/* 24 | rm -rf _auto_examples/ 25 | rm -rf _generated/ 26 | 27 | html-noplot: 28 | $(SPHINXBUILD) -D plot_gallery=0 -b html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 29 | @echo 30 | @echo "Build finished (noplot). The HTML pages are in $(BUILDDIR)/html." 31 | 32 | # -b gh_pages --single-branch (to clone only one branch) 33 | # --no-checkout (just fetches the root folder without content) 34 | # --depth 1 (since we don't need the history prior to the last commit) 35 | push-pages: 36 | rm -rf _build/gh_pages 37 | git clone -b gh-pages --single-branch --no-checkout --depth 1 \ 38 | https://github.com/gallantlab/himalaya _build/gh_pages 39 | 40 | cd _build/ && \ 41 | cp -r html/* gh_pages && \ 42 | cd gh_pages && \ 43 | touch .nojekyll && \ 44 | git add * && \ 45 | git add .nojekyll && \ 46 | git commit -a -m 'Make push-pages' && \ 47 | git push 48 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2020, the himalaya developers 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /.github/workflows/deploy_pypi.yml: -------------------------------------------------------------------------------- 1 | name: Deploy to PyPI 2 | # Deploy to PyPI if the __version__ variable in himalaya/__init__.py 3 | # is larger than the latest version on PyPI. 4 | 5 | on: 6 | push: 7 | branches: 8 | - main 9 | paths: 10 | # trigger workflow only on commits that change __init__.py 11 | - 'himalaya/__init__.py' 12 | 13 | jobs: 14 | deploy-pypi: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v6 18 | - uses: actions/setup-python@v6 19 | 20 | - name: Get versions 21 | # Compare the latest version on PyPI, and the current version 22 | run: | 23 | python -m pip install --upgrade -q pip 24 | pip index versions himalaya 25 | LATEST=$(pip index versions himalaya | grep 'himalaya' |awk '{print $2}' | tr -d '(' | tr -d ')') 26 | CURRENT=$(cat himalaya/__init__.py | grep "__version__" | awk '{print $3}' | tr -d "'" | tr -d '"') 27 | EQUAL=$([ "$CURRENT" = "$LATEST" ] && echo 1 || echo 0) 28 | echo "LATEST=$LATEST" >> $GITHUB_ENV 29 | echo "CURRENT=$CURRENT" >> $GITHUB_ENV 30 | echo "EQUAL=$EQUAL" >> $GITHUB_ENV 31 | 32 | - name: Print versions 33 | run: | 34 | echo ${{ env.LATEST }} 35 | echo ${{ env.CURRENT }} 36 | echo ${{ env.EQUAL }} 37 | 38 | - name: Build and publish 39 | if: ${{ env.EQUAL == 0 }} 40 | env: 41 | TWINE_USERNAME: __token__ 42 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 43 | run: | 44 | python -m pip install --upgrade pip 45 | python -m pip install setuptools wheel "twine<6.0" 46 | python setup.py sdist bdist_wheel 47 | python -m twine upload dist/* 48 | -------------------------------------------------------------------------------- /.github/workflows/run_tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | 9 | jobs: 10 | run-tests: 11 | strategy: 12 | matrix: 13 | os: [ubuntu-latest, macos-latest] 14 | python-version: [3.8, 3.9, "3.10", "3.11", "3.12"] 15 | max-parallel: 5 16 | fail-fast: false 17 | runs-on: ${{ matrix.os }} 18 | 19 | steps: 20 | - uses: actions/checkout@v6 21 | - name: Set up Python 22 | uses: actions/setup-python@v6 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | 26 | - uses: actions/cache@v5 27 | with: 28 | path: ~/.cache/pip 29 | key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} 30 | restore-keys: | 31 | ${{ runner.os }}-pip- 32 | 33 | - name: Install dependencies 34 | run: | 35 | pip install -e ."[github]" 36 | 37 | - name: Lint with flake8 38 | run: | 39 | pip install -q flake8 40 | # stop the build if there are Python syntax errors or undefined names 41 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 42 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 43 | flake8 . --count --exit-zero --ignore=E402,C901 --max-line-length=127 --statistics 44 | 45 | - name: Test with pytest 46 | run: | 47 | pip install -q pytest pytest-cov 48 | pytest --cov=./ --reruns 2 49 | 50 | - name: Upload coverage to Codecov 51 | uses: codecov/codecov-action@v5 52 | with: 53 | env_vars: OS,PYTHON 54 | fail_ci_if_error: true 55 | token: ${{ secrets.CODECOV_TOKEN }} 56 | verbose: false 57 | -------------------------------------------------------------------------------- /examples/kernel_ridge/plot_kernel_ridge.py: -------------------------------------------------------------------------------- 1 | """ 2 | Kernel ridge 3 | ============ 4 | 5 | This example demonstrates how to solve kernel ridge regression, using 6 | himalaya's estimator ``KernelRidge`` compatible with scikit-learn's API. 7 | """ 8 | 9 | ############################################################################### 10 | # Create a random dataset 11 | # ----------------------- 12 | import numpy as np 13 | n_samples, n_features, n_targets = 10, 20, 4 14 | X = np.random.randn(n_samples, n_features) 15 | Y = np.random.randn(n_samples, n_targets) 16 | 17 | ############################################################################### 18 | # Scikit-learn API 19 | # ---------------- 20 | # Himalaya implements a ``KernelRidge`` estimator, similar to the corresponding 21 | # scikit-learn estimator, with similar parameters and methods. 22 | import sklearn.kernel_ridge 23 | import himalaya.kernel_ridge 24 | 25 | # Fit a scikit-learn model 26 | model_skl = sklearn.kernel_ridge.KernelRidge(kernel="linear", alpha=0.1) 27 | model_skl.fit(X, Y) 28 | 29 | # Fit a himalaya model 30 | model_him = himalaya.kernel_ridge.KernelRidge(kernel="linear", alpha=0.1) 31 | model_him.fit(X, Y) 32 | 33 | Y_pred_skl = model_skl.predict(X) 34 | Y_pred_him = model_him.predict(X) 35 | 36 | # The predictions are virtually identical. 37 | print(np.max(np.abs(Y_pred_skl - Y_pred_him))) 38 | 39 | ############################################################################### 40 | # Small API difference 41 | # -------------------- 42 | # Since himalaya focuses on fitting multiple targets, the ``score`` method 43 | # returns the score on each target separately, while scikit-learn returns the 44 | # average score over targets. 45 | 46 | print(model_skl.score(X, Y)) 47 | print(model_him.score(X, Y)) 48 | print(model_him.score(X, Y).mean()) 49 | -------------------------------------------------------------------------------- /doc/flowchart.rst: -------------------------------------------------------------------------------- 1 | Model flowchart 2 | --------------- 3 | 4 | The following flowchart can be used as a guide to select the right estimator. 5 | 6 | .. mermaid:: 7 | 8 | graph TD; 9 | A(How many feature space ?) 10 | O(Data size ?) 11 | M(Data size ?) 12 | OR(Hyperparameters ?) 13 | OK(Hyperparameters ?) 14 | MR(Hyperparameters ?) 15 | MK(Hyperparameters ?) 16 | 17 | A-- one-->O; 18 | A--multiple-->M; 19 | O--more samples-->OR; 20 | O--more features-->OK; 21 | M--more samples-->MR; 22 | M--more features-->MK; 23 | 24 | OK--known-->OKH[KernelRidge]; 25 | OK--unknown-->OKCV[KernelRidgeCV]; 26 | OR--known-->ORH[Ridge]; 27 | OR--unknown-->ORCV[RidgeCV]; 28 | MK--known-->MKH[WeightedKernelRidge]; 29 | MK--unknown-->MKCV[MultipleKernelRidgeCV]; 30 | 31 | MR--unknown-->MRCV[BandedRidgeCV]; 32 | MR--known-->MKH; 33 | 34 | classDef fork fill:#FFDC97 35 | class A,O,M,OR,OK,MR,MK fork; 36 | 37 | classDef leaf fill:#ABBBE1 38 | class ORH,OKH,MRH,MKH leaf; 39 | class ORCV,OKCV,MRCV,MKCV leaf; 40 | 41 | click ORH "https://gallantlab.github.io/himalaya/_generated/himalaya.ridge.Ridge.html" 42 | click ORCV "https://gallantlab.github.io/himalaya/_generated/himalaya.ridge.RidgeCV.html" 43 | click MRCV "https://gallantlab.github.io/himalaya/_generated/himalaya.ridge.BandedRidgeCV.html" 44 | click OKH "https://gallantlab.github.io/himalaya/_generated/himalaya.kernel_ridge.KernelRidge.html" 45 | click OKCV "https://gallantlab.github.io/himalaya/_generated/himalaya.kernel_ridge.KernelRidgeCV.html" 46 | click MKH "https://gallantlab.github.io/himalaya/_generated/himalaya.kernel_ridge.WeightedKernelRidge.html" 47 | click MKCV "https://gallantlab.github.io/himalaya/_generated/himalaya.kernel_ridge.MultipleKernelRidgeCV.html" 48 | -------------------------------------------------------------------------------- /himalaya/lasso/tests/test_sklearn_api_lasso.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import sklearn.utils.estimator_checks 3 | 4 | from himalaya.backend import set_backend 5 | from himalaya.backend import get_backend 6 | from himalaya.backend import ALL_BACKENDS 7 | 8 | from himalaya.lasso import SparseGroupLassoCV 9 | 10 | ############################################################################### 11 | # scikit-learn.utils.estimator_checks 12 | 13 | 14 | class SparseGroupLassoCV_(SparseGroupLassoCV): 15 | """Cast predictions to numpy arrays, to be used in scikit-learn tests. 16 | 17 | Used for testing only. 18 | """ 19 | 20 | def __init__(self, groups=None, l1_regs=(0, 0.1), l21_regs=(0, 0.1), 21 | solver="proximal_gradient", solver_params=None, cv=2): 22 | super().__init__(groups=groups, l1_regs=l1_regs, l21_regs=l21_regs, 23 | solver=solver, solver_params=solver_params, cv=cv) 24 | 25 | def predict(self, X): 26 | backend = get_backend() 27 | return backend.to_numpy(super().predict(X)) 28 | 29 | def score(self, X, y): 30 | from himalaya.validation import check_array 31 | from himalaya.scoring import r2_score 32 | backend = get_backend() 33 | 34 | y_pred = super().predict(X) 35 | y_true = check_array(y, dtype=self.dtype_, ndim=self.coef_.ndim) 36 | 37 | if y_true.ndim == 1: 38 | return backend.to_numpy( 39 | r2_score(y_true[:, None], y_pred[:, None])[0]) 40 | else: 41 | return backend.to_numpy(r2_score(y_true, y_pred)) 42 | 43 | 44 | @sklearn.utils.estimator_checks.parametrize_with_checks([ 45 | SparseGroupLassoCV_(), 46 | ]) 47 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 48 | def test_check_estimator(estimator, check, backend): 49 | backend = set_backend(backend) 50 | check(estimator) 51 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import re 2 | from pathlib import Path 3 | from setuptools import find_packages, setup 4 | 5 | # get version from himalaya/__init__.py 6 | __version__ = 0.0 7 | with open('himalaya/__init__.py') as f: 8 | infos = f.readlines() 9 | for line in infos: 10 | if "__version__" in line: 11 | match = re.search(r"__version__ = '([^']*)'", line) 12 | __version__ = match.groups()[0] 13 | 14 | # read the contents of the README file 15 | this_directory = Path(__file__).parent 16 | long_description = (this_directory / "README.rst").read_text() 17 | 18 | requirements = [ 19 | "numpy", 20 | "scikit-learn", 21 | # "cupy", # optional backend 22 | # "torch", # optional backend, 1.9+ preferred 23 | # "matplotlib", # for visualization only 24 | # "pytest", # for testing only 25 | ] 26 | 27 | extras_require = { 28 | "all_backends": ["cupy", "torch"], 29 | "viz": ["matplotlib"], 30 | "test": ["pytest", "matplotlib", "cupy", "torch"], 31 | "github": ["pytest", "matplotlib", "torch", "pytest-rerunfailures"], 32 | } 33 | 34 | extras_require["all"] = sum(list(extras_require.values()), []) 35 | extras_require["doc"] = ["numpydoc", "sphinx", "sphinx_gallery", 36 | "sphinxcontrib-mermaid"] 37 | extras_require["doc"] += extras_require["viz"] + extras_require["all_backends"] 38 | 39 | if __name__ == "__main__": 40 | setup( 41 | name='himalaya', 42 | maintainer="Tom Dupre la Tour", 43 | maintainer_email="tomdlt@berkeley.edu", 44 | description="Multiple-target machine learning", 45 | license='BSD (3-clause)', 46 | version=__version__, 47 | packages=find_packages(), 48 | url="https://github.com/gallantlab/himalaya", 49 | install_requires=requirements, 50 | extras_require=extras_require, 51 | long_description=long_description, 52 | long_description_content_type='text/x-rst', 53 | ) 54 | -------------------------------------------------------------------------------- /himalaya/backend/tests/test_backend_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from himalaya.backend import set_backend 4 | from himalaya.backend import get_backend 5 | from himalaya.backend import ALL_BACKENDS 6 | from himalaya.backend import force_cpu_backend 7 | from himalaya.backend._utils import MATCHING_CPU_BACKEND 8 | 9 | 10 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 11 | def test_set_backend_correct(backend): 12 | # test the change of backend 13 | module = set_backend(backend) 14 | assert module.__name__.split('.')[-1] == backend 15 | 16 | # test idempotence 17 | module = set_backend(set_backend(backend)) 18 | assert module.__name__.split('.')[-1] == backend 19 | 20 | # test set and get 21 | module = set_backend(get_backend()) 22 | assert module.__name__.split('.')[-1] == backend 23 | 24 | assert set_backend(backend) 25 | 26 | 27 | def test_set_backend_incorrect(): 28 | for backend in ["wrong", ["numpy"], True, None, 10]: 29 | with pytest.raises(ValueError): 30 | set_backend(backend) 31 | with pytest.raises(ValueError): 32 | set_backend(backend, on_error="raise") 33 | with pytest.warns(Warning): 34 | set_backend(backend, on_error="warn") 35 | with pytest.raises(ValueError): 36 | set_backend(backend, on_error="foo") 37 | 38 | 39 | class ToyEstimator(): 40 | def __init__(self, force_cpu): 41 | self.force_cpu = force_cpu 42 | 43 | @force_cpu_backend 44 | def get_backend_wrapped(self): 45 | return get_backend() 46 | 47 | 48 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 49 | def test_force_cpu_backend(backend): 50 | backend = set_backend(backend) 51 | 52 | est = ToyEstimator(force_cpu=True) 53 | assert est.get_backend_wrapped().name == MATCHING_CPU_BACKEND[backend.name] 54 | 55 | est = ToyEstimator(force_cpu=False) 56 | assert est.get_backend_wrapped().name == backend.name 57 | -------------------------------------------------------------------------------- /examples/kernel_ridge/plot_kernel_ridge_cv.py: -------------------------------------------------------------------------------- 1 | """ 2 | Kernel ridge with cross-validation 3 | ================================== 4 | 5 | This example demonstrates how to solve kernel ridge regression with a 6 | cross-validation of the regularization parameter, using himalaya's estimator 7 | ``KernelRidgeCV``. 8 | """ 9 | 10 | ############################################################################### 11 | # Create a random dataset 12 | # ----------------------- 13 | import numpy as np 14 | np.random.seed(0) 15 | n_samples, n_features, n_targets = 10, 20, 4 16 | X = np.random.randn(n_samples, n_features) 17 | Y = np.random.randn(n_samples, n_targets) 18 | 19 | ############################################################################### 20 | # Limit of GridSearchCV 21 | # --------------------- 22 | # In scikit-learn, one can use ``GridSearchCV`` to optimize hyperparameters 23 | # over cross-validation. 24 | 25 | import sklearn.model_selection 26 | import sklearn.kernel_ridge 27 | 28 | estimator = sklearn.kernel_ridge.KernelRidge(kernel="linear") 29 | gscv = sklearn.model_selection.GridSearchCV( 30 | estimator=estimator, 31 | param_grid=dict(alpha=np.logspace(-2, 2, 5)), 32 | ) 33 | gscv.fit(X, Y) 34 | 35 | ############################################################################### 36 | # However, since ``GridSearchCV`` optimizes the average score over all targets, 37 | # it returns a single value for alpha. 38 | gscv.best_params_ 39 | 40 | ############################################################################### 41 | # KernelRidgeCV 42 | # ------------- 43 | # To optimize each target independently, himalaya implements ``KernelRidgeCV``, 44 | # which supports any cross-validation scheme compatible with scikit-learn. 45 | import himalaya.kernel_ridge 46 | 47 | model = himalaya.kernel_ridge.KernelRidgeCV(kernel="linear", 48 | alphas=np.logspace(-2, 2, 5)) 49 | model.fit(X, Y) 50 | 51 | ############################################################################### 52 | # KernelRidgeCV returns a separate best alpha per target. 53 | model.best_alphas_ 54 | -------------------------------------------------------------------------------- /doc/troubleshooting.rst: -------------------------------------------------------------------------------- 1 | Troubleshooting 2 | =============== 3 | We detail here common issues encountered with ``himalaya``, and how to fix 4 | them. 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | 9 | 10 | CUDA out of memory 11 | ------------------ 12 | 13 | The GPU memory is often smaller than the CPU memory, so it requires more 14 | attention to avoid running out of memory. Himalaya implements a series of 15 | options to limit the GPU memory, often at the cost of computational speed: 16 | 17 | - Some solvers implement computations over batches, to limit the size of 18 | intermediate arrays. See for instance ``n_targets_batch``, or 19 | ``n_alphas_batch`` in :class:`~himalaya.kernel_ridge.KernelRidgeCV`. 20 | - Some solvers implement an option to keep the input kernels or the targets in 21 | CPU memory. See for instance ``Y_in_cpu`` in 22 | :class:`~himalaya.kernel_ridge.MultipleKernelRidgeCV`. 23 | - Some estimators can also be forced to use CPU, ignoring the current backend, 24 | using the parameter ``force_cpu=True``. To limit GPU memory, some estimators 25 | in the same pipeline can use ``force_cpu=True`` and others 26 | ``force_cpu=False``. In particular, it is possible to precompute kernels on 27 | CPU, using :class:`~himalaya.kernel_ridge.Kernelizer` or 28 | :class:`~himalaya.kernel_ridge.ColumnKernelizer` with the parameter 29 | ``force_cpu=True`` before fitting a 30 | :class:`~himalaya.kernel_ridge.KernelRidgeCV` or a 31 | :class:`~himalaya.kernel_ridge.MultipleKernelRidgeCV` on GPU. 32 | 33 | A CUDA out of memory issue can also arise with ``pytorch < 1.9``, for example 34 | with :class:`~himalaya.kernel_ridge.KernelRidge`, where a solver requires 35 | ridiculously high peak memory during a broadcasting matmul operation. This 36 | `issue `_ can be fixed by 37 | updating to ``pytorch = 1.9`` or newer versions. 38 | 39 | 40 | Slow check_array 41 | ---------------- 42 | 43 | In himalaya, the scikit-learn compatible estimators validate the input data, 44 | checking the absence of NaN or infinite values. For large datasets, this check 45 | can take significant computational time. To skip this check, simply call 46 | ``sklearn.set_config(assume_finite=True)`` before fitting your models. 47 | -------------------------------------------------------------------------------- /himalaya/kernel_ridge/tests/test_predictions.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from himalaya.backend import set_backend 4 | from himalaya.backend import ALL_BACKENDS 5 | from himalaya.utils import assert_array_almost_equal 6 | 7 | from himalaya.kernel_ridge import primal_weights_weighted_kernel_ridge 8 | from himalaya.kernel_ridge import predict_weighted_kernel_ridge 9 | 10 | 11 | def _create_dataset(backend): 12 | n_samples, n_targets = 30, 3 13 | 14 | Xs = [ 15 | backend.asarray(backend.randn(n_samples, n_features), backend.float64) 16 | for n_features in [100, 200] 17 | ] 18 | Ks = backend.stack([backend.matmul(X, X.T) for X in Xs]) 19 | Y = backend.asarray(backend.randn(n_samples, n_targets), backend.float64) 20 | dual_weights = backend.asarray(backend.randn(n_samples, n_targets), 21 | backend.float64) 22 | exp_deltas = backend.asarray(backend.rand(Ks.shape[0], n_targets), 23 | backend.float64) 24 | deltas = backend.log(exp_deltas) 25 | 26 | return Xs, Ks, Y, deltas, dual_weights 27 | 28 | 29 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 30 | def test_predict_weighted_kernel_ridge(backend): 31 | backend = set_backend(backend) 32 | Xs, Ks, _, deltas, dual_weights = _create_dataset(backend) 33 | 34 | primal_weights = primal_weights_weighted_kernel_ridge( 35 | dual_weights, deltas, Xs) 36 | predictions_primal = backend.stack( 37 | [X @ backend.asarray(w) for X, w in zip(Xs, primal_weights)]).sum(0) 38 | 39 | predictions_dual = predict_weighted_kernel_ridge(Ks, dual_weights, deltas) 40 | 41 | assert_array_almost_equal(predictions_primal, predictions_dual) 42 | 43 | 44 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 45 | def test_predict_weighted_kernel_ridge_n_targets_batch(backend): 46 | backend = set_backend(backend) 47 | Xs, Ks, _, deltas, dual_weights = _create_dataset(backend) 48 | 49 | predictions_dual = predict_weighted_kernel_ridge(Ks, dual_weights, deltas) 50 | 51 | predictions_dual_n_targets_batch = predict_weighted_kernel_ridge( 52 | Ks, dual_weights, deltas, n_targets_batch=10) 53 | 54 | assert_array_almost_equal(predictions_dual, predictions_dual_n_targets_batch) 55 | -------------------------------------------------------------------------------- /examples/kernel_ridge/plot_model_on_gpu.py.py: -------------------------------------------------------------------------------- 1 | """ 2 | Fitting a model on GPU 3 | ====================== 4 | 5 | This example demonstrates how to fit a model using GPU computations. 6 | 7 | Himalaya implements different computational backends to fit the models: 8 | 9 | - "numpy" (CPU) (default) 10 | - "torch" (CPU) 11 | - "torch_cuda" (GPU) 12 | - "cupy" (GPU) 13 | 14 | Each backend is only available if you installed the corresponding package with 15 | CUDA enabled. Check the ``pytorch``/``cupy`` documentation for installation 16 | instructions. 17 | """ 18 | 19 | ############################################################################### 20 | # Create a random dataset 21 | # ----------------------- 22 | import numpy as np 23 | n_samples, n_features, n_targets = 10, 20, 4 24 | X = np.random.randn(n_samples, n_features) 25 | Y = np.random.randn(n_samples, n_targets) 26 | 27 | ############################################################################### 28 | # Change backend 29 | # -------------- 30 | # To change the backend, you need to call the function 31 | # ``himalaya.backend.set_backend``. With the option ``on_error="warn"``, the 32 | # function does not raise an error if the new backend fails to be imported, and 33 | # the backend is kept unchanged. 34 | 35 | from himalaya.backend import set_backend 36 | backend = set_backend("cupy", on_error="warn") 37 | 38 | ############################################################################### 39 | # GPU backend 40 | # ----------- 41 | # To fit a himalaya model on GPU, you don't need to move the input arrays to 42 | # GPU, the method ``fit`` will do it for you. However, the float precision will 43 | # not be changed. 44 | # 45 | # To make the most of GPU memory and computational speed, you might want to 46 | # change the float precision to float32. 47 | X = X.astype("float32") 48 | 49 | from himalaya.kernel_ridge import KernelRidge 50 | model_him = KernelRidge(kernel="linear", alpha=0.1) 51 | model_him.fit(X, Y) 52 | 53 | ############################################################################### 54 | # The results are stored in GPU memory, using an array object specific to the 55 | # backend used. To use the results in other libraries (for example matplotlib), 56 | # you can create a numpy array using the function ``backend.to_numpy``. 57 | scores = model_him.score(X, Y) 58 | print(scores.__class__) 59 | scores = backend.to_numpy(scores) 60 | print(scores.__class__) 61 | -------------------------------------------------------------------------------- /himalaya/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from himalaya.backend import set_backend 5 | from himalaya.backend import ALL_BACKENDS 6 | 7 | from himalaya.utils import compute_lipschitz_constants 8 | from himalaya.utils import generate_multikernel_dataset 9 | from himalaya.utils import assert_array_almost_equal 10 | 11 | 12 | @pytest.mark.parametrize('kernelize', ["XXT", "XTX", "X"]) 13 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 14 | def test_compute_lipschitz_constants(backend, kernelize): 15 | backend = set_backend(backend) 16 | 17 | Xs = backend.randn(3, 5, 6) 18 | if kernelize == "X": 19 | XTs = backend.transpose(Xs, (0, 2, 1)) 20 | Xs = backend.matmul(XTs, Xs) 21 | 22 | L = compute_lipschitz_constants(Xs) 23 | assert L.ndim == 1 24 | assert L.shape[0] == Xs.shape[0] 25 | 26 | 27 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 28 | def test_compute_lipschitz_constants_error(backend): 29 | backend = set_backend(backend) 30 | 31 | Xs = backend.randn(3, 5, 6) 32 | with pytest.raises(ValueError): 33 | compute_lipschitz_constants(Xs, "wrong") 34 | 35 | 36 | # A small number of sets of parameters 37 | _parameters = { 38 | "params_1": 39 | dict(n_kernels=4, n_targets=50, n_samples_train=100, n_samples_test=40, 40 | kernel_weights=None, n_features_list=[10, 10, 20, 5]), 41 | "params_2": 42 | dict(n_kernels=3, n_targets=40, n_samples_train=90, n_samples_test=40, 43 | kernel_weights=np.random.rand(40, 3), n_features_list=None), 44 | } 45 | 46 | 47 | @pytest.mark.parametrize("name", ["params_1", "params_2"]) 48 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 49 | def test_generate_multikernel_dataset(backend, name): 50 | backend = set_backend(backend) 51 | 52 | kwargs = _parameters[name] 53 | 54 | (X_train, X_test, Y_train, Y_test, kernel_weights, 55 | n_features_list) = generate_multikernel_dataset(**kwargs) 56 | 57 | assert X_train.shape[0] == kwargs["n_samples_train"] 58 | assert X_test.shape[0] == kwargs["n_samples_test"] 59 | assert Y_train.shape[0] == kwargs["n_samples_train"] 60 | assert Y_test.shape[0] == kwargs["n_samples_test"] 61 | assert Y_train.shape[1] == kwargs["n_targets"] 62 | assert Y_test.shape[1] == kwargs["n_targets"] 63 | assert len(n_features_list) == kwargs["n_kernels"] 64 | assert kernel_weights.shape[1] == kwargs["n_kernels"] 65 | assert kernel_weights.shape[0] == kwargs["n_targets"] 66 | 67 | if kwargs["kernel_weights"] is not None: 68 | assert_array_almost_equal(kwargs["kernel_weights"], 69 | kernel_weights) 70 | if kwargs["n_features_list"] is not None: 71 | assert np.sum(kwargs["n_features_list"]) == X_train.shape[1] 72 | assert np.sum(kwargs["n_features_list"]) == X_test.shape[1] 73 | -------------------------------------------------------------------------------- /himalaya/backend/torch_cuda.py: -------------------------------------------------------------------------------- 1 | """The "torch_cuda" GPU backend, based on PyTorch. 2 | 3 | To use this backend, call ``himalaya.backend.set_backend("torch_cuda")``. 4 | """ 5 | from .torch import * # noqa 6 | import torch 7 | 8 | if not torch.cuda.is_available(): 9 | import sys 10 | if "pytest" in sys.modules: # if run through pytest 11 | import pytest 12 | pytest.skip("PyTorch with CUDA is not available.") 13 | raise RuntimeError("PyTorch with CUDA is not available.") 14 | 15 | from ._utils import _dtype_to_str 16 | from ._utils import warn_if_not_float32 17 | 18 | ############################################################################### 19 | 20 | name = "torch_cuda" 21 | 22 | 23 | def randn(*args, **kwargs): 24 | return torch.randn(*args, **kwargs).cuda() 25 | 26 | 27 | def rand(*args, **kwargs): 28 | return torch.rand(*args, **kwargs).cuda() 29 | 30 | 31 | def asarray(x, dtype=None, device="cuda"): 32 | if dtype is None: 33 | if isinstance(x, torch.Tensor): 34 | dtype = x.dtype 35 | if hasattr(x, "dtype") and hasattr(x.dtype, "name"): 36 | dtype = x.dtype.name 37 | if dtype is not None: 38 | dtype = _dtype_to_str(dtype) 39 | dtype = getattr(torch, dtype) 40 | if device is None: 41 | if isinstance(x, torch.Tensor): 42 | device = x.device 43 | else: 44 | device = "cuda" 45 | try: 46 | tensor = torch.as_tensor(x, dtype=dtype, device=device) 47 | except Exception: 48 | import numpy as np 49 | array = np.asarray(x, dtype=_dtype_to_str(dtype)) 50 | tensor = torch.as_tensor(array, dtype=dtype, device=device) 51 | return tensor 52 | 53 | 54 | def check_arrays(*all_inputs): 55 | """Change all inputs into Tensors (or list of Tensors) using the same 56 | precision and device as the first one. Some tensors can be None. 57 | """ 58 | all_tensors = [] 59 | all_tensors.append(asarray(all_inputs[0])) 60 | dtype = all_tensors[0].dtype 61 | warn_if_not_float32(dtype) 62 | device = all_tensors[0].device 63 | for tensor in all_inputs[1:]: 64 | if tensor is None: 65 | pass 66 | elif isinstance(tensor, list): 67 | tensor = [asarray(tt, dtype=dtype, device=device) for tt in tensor] 68 | else: 69 | tensor = asarray(tensor, dtype=dtype, device=device) 70 | all_tensors.append(tensor) 71 | return all_tensors 72 | 73 | 74 | def zeros(shape, dtype="float32", device="cuda"): 75 | if isinstance(shape, int): 76 | shape = (shape, ) 77 | if isinstance(dtype, str): 78 | dtype = getattr(torch, dtype) 79 | return torch.zeros(shape, dtype=dtype, device=device) 80 | 81 | 82 | def to_cpu(array): 83 | return array.cpu() 84 | 85 | 86 | def to_gpu(array, device="cuda"): 87 | return asarray(array, device=device) 88 | -------------------------------------------------------------------------------- /himalaya/kernel_ridge/tests/test_input_arrays.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import pytest 3 | 4 | import sklearn.linear_model 5 | import sklearn.model_selection 6 | 7 | from himalaya.backend import set_backend 8 | from himalaya.backend import ALL_BACKENDS 9 | 10 | from himalaya.kernel_ridge import solve_multiple_kernel_ridge_random_search 11 | from himalaya.kernel_ridge import solve_multiple_kernel_ridge_hyper_gradient 12 | 13 | 14 | def _create_dataset(backend): 15 | n_featuress = (50, 80) 16 | n_samples = 30 17 | n_targets = 2 18 | n_gammas = 3 19 | 20 | Xs = [ 21 | backend.asarray(backend.randn(n_samples, n_features), backend.float64) 22 | for n_features in n_featuress 23 | ] 24 | Ks = backend.stack([X @ X.T for X in Xs]) 25 | 26 | ws = [ 27 | backend.asarray(backend.randn(n_features, n_targets), backend.float64) 28 | for n_features in n_featuress 29 | ] 30 | Ys = backend.stack([X @ w for X, w in zip(Xs, ws)]) 31 | Y = Ys.sum(0) 32 | 33 | gammas = backend.asarray(backend.rand(n_gammas, Ks.shape[0]), 34 | backend.float64) 35 | gammas /= gammas.sum(1)[:, None] 36 | 37 | return Ks, Y, gammas 38 | 39 | 40 | @pytest.mark.parametrize('Ks_in_cpu', [True, False]) 41 | @pytest.mark.parametrize('Y_in_cpu', [True, False]) 42 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 43 | def test_random_search(backend, Ks_in_cpu, Y_in_cpu): 44 | backend = set_backend(backend) 45 | 46 | Ks, Y, gammas = _create_dataset(backend) 47 | gammas = gammas[:1] 48 | alphas = backend.asarray_like(backend.logspace(-3, 5, 3), Ks) 49 | cv = sklearn.model_selection.check_cv(2) 50 | 51 | for Ks_, Y_, gammas_, alphas_ in itertools.product( 52 | [Ks, backend.to_numpy(Ks), 53 | backend.to_cpu(Ks)], 54 | [Y, backend.to_numpy(Y), backend.to_cpu(Y)], 55 | [gammas, backend.to_numpy(gammas), 56 | backend.to_cpu(gammas), 2], 57 | [alphas, backend.to_numpy(alphas), 58 | backend.to_cpu(alphas)], 59 | ): 60 | 61 | deltas, _, _ = solve_multiple_kernel_ridge_random_search( 62 | Ks_, Y_, n_iter=gammas_, alphas=alphas_, cv=cv, progress_bar=False, 63 | Ks_in_cpu=Ks_in_cpu, Y_in_cpu=Y_in_cpu) 64 | 65 | assert deltas.dtype == Ks.dtype 66 | assert getattr(deltas, "device", None) == getattr(Ks, "device", None) 67 | 68 | 69 | @pytest.mark.parametrize('Y_in_cpu', [True, False]) 70 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 71 | def test_hyper_gradient(backend, Y_in_cpu): 72 | backend = set_backend(backend) 73 | 74 | Ks, Y, _ = _create_dataset(backend) 75 | cv = sklearn.model_selection.check_cv(2) 76 | 77 | for Ks_, Y_ in itertools.product( 78 | [Ks, backend.to_numpy(Ks), 79 | backend.to_cpu(Ks)], 80 | [Y, backend.to_numpy(Y), backend.to_cpu(Y)], 81 | ): 82 | 83 | deltas, _, _ = solve_multiple_kernel_ridge_hyper_gradient( 84 | Ks_, Y_, max_iter=1, cv=cv, progress_bar=False, Y_in_cpu=Y_in_cpu) 85 | 86 | assert deltas.dtype == Ks.dtype 87 | assert getattr(deltas, "device", None) == getattr(Ks, "device", None) 88 | -------------------------------------------------------------------------------- /himalaya/kernel_ridge/__init__.py: -------------------------------------------------------------------------------- 1 | from ._solvers import solve_weighted_kernel_ridge_gradient_descent 2 | from ._solvers import solve_weighted_kernel_ridge_conjugate_gradient 3 | from ._solvers import solve_weighted_kernel_ridge_neumann_series 4 | from ._solvers import solve_kernel_ridge_eigenvalues 5 | from ._solvers import solve_kernel_ridge_gradient_descent 6 | from ._solvers import solve_kernel_ridge_conjugate_gradient 7 | from ._solvers import KERNEL_RIDGE_SOLVERS 8 | from ._solvers import WEIGHTED_KERNEL_RIDGE_SOLVERS 9 | from ._hyper_gradient import solve_multiple_kernel_ridge_hyper_gradient 10 | from ._hyper_gradient import MULTIPLE_KERNEL_RIDGE_SOLVERS 11 | from ._random_search import solve_multiple_kernel_ridge_random_search 12 | from ._random_search import generate_dirichlet_samples 13 | from ._random_search import solve_kernel_ridge_cv_eigenvalues 14 | from ._random_search import solve_kernel_ridge_cv_svd 15 | from ._random_search import KERNEL_RIDGE_CV_SOLVERS 16 | from ._predictions import predict_weighted_kernel_ridge 17 | from ._predictions import predict_and_score_weighted_kernel_ridge 18 | from ._predictions import primal_weights_kernel_ridge 19 | from ._predictions import primal_weights_weighted_kernel_ridge 20 | from ._sklearn_api import KernelRidge 21 | from ._sklearn_api import KernelRidgeCV 22 | from ._sklearn_api import MultipleKernelRidgeCV 23 | from ._sklearn_api import WeightedKernelRidge 24 | from ._kernels import PAIRWISE_KERNEL_FUNCTIONS 25 | from ._kernels import linear_kernel 26 | from ._kernels import polynomial_kernel 27 | from ._kernels import rbf_kernel 28 | from ._kernels import sigmoid_kernel 29 | from ._kernels import cosine_similarity_kernel 30 | from ._kernels import KernelCenterer 31 | from ._kernelizer import Kernelizer 32 | from ._kernelizer import ColumnKernelizer 33 | from ._kernelizer import make_column_kernelizer 34 | 35 | __all__ = [ 36 | # kernel ridge solvers 37 | "solve_weighted_kernel_ridge_gradient_descent", 38 | "solve_weighted_kernel_ridge_conjugate_gradient", 39 | "solve_weighted_kernel_ridge_neumann_series", 40 | "solve_kernel_ridge_cv_eigenvalues", 41 | "solve_kernel_ridge_cv_svd", 42 | "solve_kernel_ridge_eigenvalues", 43 | "solve_kernel_ridge_gradient_descent", 44 | "solve_kernel_ridge_conjugate_gradient", 45 | "KERNEL_RIDGE_SOLVERS", 46 | "KERNEL_RIDGE_CV_SOLVERS", 47 | "WEIGHTED_KERNEL_RIDGE_SOLVERS", 48 | # multiple kernel ridge solvers 49 | "MULTIPLE_KERNEL_RIDGE_SOLVERS", 50 | "solve_multiple_kernel_ridge_hyper_gradient", 51 | "solve_multiple_kernel_ridge_random_search", 52 | # helpers 53 | "generate_dirichlet_samples", 54 | "predict_weighted_kernel_ridge", 55 | "predict_and_score_weighted_kernel_ridge", 56 | "primal_weights_kernel_ridge", 57 | "primal_weights_weighted_kernel_ridge", 58 | # scikit-learn API 59 | "KernelRidge", 60 | "KernelRidgeCV", 61 | "MultipleKernelRidgeCV", 62 | "WeightedKernelRidge", 63 | # kernels 64 | "PAIRWISE_KERNEL_FUNCTIONS", 65 | "linear_kernel", 66 | "polynomial_kernel", 67 | "rbf_kernel", 68 | "sigmoid_kernel", 69 | "cosine_similarity_kernel", 70 | "KernelCenterer", 71 | # kernelizers 72 | "Kernelizer", 73 | "ColumnKernelizer", 74 | "make_column_kernelizer", 75 | ] 76 | -------------------------------------------------------------------------------- /himalaya/ridge/tests/test_column.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from sklearn.pipeline import make_pipeline 4 | from sklearn.preprocessing import StandardScaler 5 | 6 | from himalaya.backend import set_backend 7 | from himalaya.backend import ALL_BACKENDS 8 | from himalaya.utils import assert_array_almost_equal 9 | 10 | from himalaya.ridge import ColumnTransformerNoStack 11 | from himalaya.ridge import make_column_transformer_no_stack 12 | from himalaya.ridge import GroupRidgeCV 13 | 14 | 15 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 16 | def test_column_transformer_all_columns(backend): 17 | backend = set_backend(backend) 18 | X = np.random.randn(10, 5) 19 | 20 | ct = ColumnTransformerNoStack([("name", StandardScaler(), slice(0, 5))]) 21 | Xt = ct.fit_transform(X) 22 | assert len(Xt) == 1 23 | assert Xt[0].shape == (10, 5) 24 | 25 | 26 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 27 | def test_column_transformer_passthrough(backend): 28 | backend = set_backend(backend) 29 | X = np.random.randn(10, 5) 30 | 31 | ct = ColumnTransformerNoStack([("name", "passthrough", slice(0, 5))]) 32 | Xt = ct.fit_transform(X) 33 | assert len(Xt) == 1 34 | assert Xt[0].shape == (10, 5) 35 | assert_array_almost_equal(X, Xt[0]) 36 | 37 | 38 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 39 | def test_column_transformer_remainder(backend): 40 | backend = set_backend(backend) 41 | X = np.random.randn(10, 5) 42 | 43 | ct = ColumnTransformerNoStack([("name", "passthrough", slice(0, 0))], 44 | remainder="passthrough") 45 | Xt = ct.fit_transform(X) 46 | assert len(Xt) == 2 47 | assert Xt[0].shape == (10, 0) 48 | assert Xt[1].shape == (10, 5) 49 | assert_array_almost_equal(X, Xt[1]) 50 | 51 | 52 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 53 | def test_column_transformer_multiple(backend): 54 | backend = set_backend(backend) 55 | X = np.random.randn(10, 5) 56 | 57 | ct = ColumnTransformerNoStack([ 58 | ("name0", StandardScaler(), [0, 1]), 59 | ("name1", StandardScaler(with_mean=False), [2, 3]), 60 | ], remainder="passthrough") 61 | Xt = ct.fit_transform(X) 62 | assert len(Xt) == 3 63 | assert Xt[0].shape == (10, 2) 64 | assert Xt[1].shape == (10, 2) 65 | assert Xt[2].shape == (10, 1) 66 | 67 | 68 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 69 | def test_make_column_transformer(backend): 70 | backend = set_backend(backend) 71 | 72 | trans = StandardScaler() 73 | ct = make_column_transformer_no_stack((trans, slice(0, 3))) 74 | 75 | assert isinstance(ct, ColumnTransformerNoStack) 76 | assert len(ct.transformers) == 1 77 | assert len(ct.transformers[0]) == 3 78 | assert ct.transformers[0][0] == "standardscaler" 79 | assert ct.transformers[0][1] == trans 80 | assert ct.transformers[0][2] == slice(0, 3) 81 | 82 | trans = StandardScaler() 83 | ct = make_column_transformer_no_stack((trans, slice(0, 3)), 84 | ("passthrough", [3, 4])) 85 | 86 | assert isinstance(ct, ColumnTransformerNoStack) 87 | assert len(ct.transformers) == 2 88 | assert len(ct.transformers[0]) == 3 89 | assert len(ct.transformers[1]) == 3 90 | assert ct.transformers[0][0] == "standardscaler" 91 | assert ct.transformers[0][1] == trans 92 | assert ct.transformers[0][2] == slice(0, 3) 93 | assert ct.transformers[1][0] == "passthrough" 94 | assert ct.transformers[1][1] == "passthrough" 95 | assert ct.transformers[1][2] == [3, 4] 96 | 97 | 98 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 99 | def test_column_transformer_in_pipeline(backend): 100 | backend = set_backend(backend) 101 | 102 | X = np.random.randn(10, 5) 103 | Y = np.random.randn(10, 3) 104 | 105 | ct = make_column_transformer_no_stack( 106 | (StandardScaler(), slice(0, 4)), 107 | (StandardScaler(), slice(4, 6)), 108 | ) 109 | pipe = make_pipeline( 110 | ct, 111 | GroupRidgeCV( 112 | groups="input", solver_params=dict(n_iter=np.ones((1, 2)), 113 | progress_bar=False))) 114 | pipe.fit(X, Y) 115 | -------------------------------------------------------------------------------- /himalaya/backend/_utils.py: -------------------------------------------------------------------------------- 1 | import types 2 | import importlib 3 | import warnings 4 | from functools import wraps 5 | 6 | ALL_BACKENDS = [ 7 | "numpy", 8 | "cupy", 9 | "torch", 10 | "torch_cuda", 11 | ] 12 | 13 | CURRENT_BACKEND = "numpy" 14 | 15 | MATCHING_CPU_BACKEND = { 16 | "numpy": "numpy", 17 | "cupy": "numpy", 18 | "torch": "torch", 19 | "torch_cuda": "torch", 20 | } 21 | 22 | 23 | def set_backend(backend, on_error="raise"): 24 | """Set the backend using a global variable, and return the backend module. 25 | 26 | Parameters 27 | ---------- 28 | backend : str or module 29 | Name or module of the backend. 30 | on_error : str in {"raise", "warn"} 31 | Define what is done if the backend fails to be loaded. 32 | If "warn", this function only warns, and keeps the previous backend. 33 | If "raise", this function raises on errors. 34 | 35 | Returns 36 | ------- 37 | module : python module 38 | Module of the backend. 39 | """ 40 | global CURRENT_BACKEND 41 | 42 | try: 43 | if isinstance(backend, types.ModuleType): # get name from module 44 | backend = backend.name 45 | 46 | if backend not in ALL_BACKENDS: 47 | raise ValueError("Unknown backend=%r" % (backend, )) 48 | 49 | module = importlib.import_module(__package__ + "." + backend) 50 | CURRENT_BACKEND = backend 51 | except Exception as error: 52 | if on_error == "raise": 53 | raise error 54 | elif on_error == "warn": 55 | warnings.warn(f"Setting backend to {backend} failed: {str(error)}." 56 | f"Falling back to {CURRENT_BACKEND} backend.") 57 | module = get_backend() 58 | else: 59 | raise ValueError('Unknown value on_error=%r' % (on_error, )) 60 | 61 | return module 62 | 63 | 64 | def get_backend(): 65 | """Get the current backend module. 66 | 67 | Returns 68 | ------- 69 | module : python module 70 | Module of the backend. 71 | """ 72 | module = importlib.import_module(__package__ + "." + CURRENT_BACKEND) 73 | return module 74 | 75 | 76 | def _dtype_to_str(dtype): 77 | """Cast dtype to string, such as "float32", or "float64".""" 78 | if isinstance(dtype, str): 79 | return dtype 80 | elif hasattr(dtype, "name"): # works for numpy and cupy 81 | return dtype.name 82 | elif "torch." in str(dtype): # works for torch 83 | return str(dtype)[6:] 84 | elif dtype is None: 85 | return None 86 | else: 87 | raise NotImplementedError() 88 | 89 | 90 | def force_cpu_backend(func): 91 | """Decorator to force the use of a CPU backend.""" 92 | 93 | @wraps(func) 94 | def wrapper(*args, **kwargs): 95 | # skip if the object does not force cpu use 96 | if not hasattr(args[0], "force_cpu") or not args[0].force_cpu: 97 | return func(*args, **kwargs) 98 | 99 | # set corresponding cpu backend 100 | original_backend = get_backend().name 101 | temp_backend = MATCHING_CPU_BACKEND[original_backend] 102 | set_backend(temp_backend) 103 | 104 | # run function 105 | result = func(*args, **kwargs) 106 | 107 | # set back original backend 108 | set_backend(original_backend) 109 | return result 110 | 111 | return wrapper 112 | 113 | 114 | def _add_error_message(func, msg=""): 115 | """Decorator to add a custom error message to a function.""" 116 | 117 | @wraps(func) 118 | def with_error_message(*args, **kwargs): 119 | try: 120 | return func(*args, **kwargs) 121 | except Exception as e: 122 | raise RuntimeError( 123 | f"{msg}\nOriginal error:\n{type(e).__name__}: {e}") 124 | 125 | return with_error_message 126 | 127 | 128 | _already_warned = [False] 129 | 130 | 131 | def warn_if_not_float32(dtype): 132 | """Warn if X is not float32.""" 133 | if _already_warned[0]: # avoid warning multiple times 134 | return None 135 | 136 | if _dtype_to_str(dtype) != "float32": 137 | backend = get_backend() 138 | warnings.warn( 139 | f"GPU backend {backend.name} is much faster with single " 140 | f"precision floats (float32), got input in {dtype}. " 141 | "Consider casting your data to float32.", UserWarning) 142 | _already_warned[0] = True 143 | -------------------------------------------------------------------------------- /himalaya/kernel_ridge/tests/test_force_cpu.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from himalaya.backend import set_backend 4 | from himalaya.kernel_ridge import KernelCenterer 5 | from himalaya.kernel_ridge import Kernelizer 6 | from himalaya.kernel_ridge import ColumnKernelizer 7 | from himalaya.kernel_ridge import make_column_kernelizer 8 | from himalaya.kernel_ridge import KernelRidgeCV 9 | from himalaya.kernel_ridge import MultipleKernelRidgeCV 10 | from himalaya.ridge import RidgeCV 11 | from himalaya.ridge import GroupRidgeCV 12 | from himalaya.lasso import SparseGroupLassoCV 13 | 14 | GPU_BACKENDS = [ 15 | "cupy", 16 | "torch_cuda", 17 | ] 18 | 19 | 20 | @pytest.mark.parametrize('backend', GPU_BACKENDS) 21 | @pytest.mark.parametrize('force_cpu', [True, False]) 22 | def test_kernel_centerer(backend, force_cpu): 23 | backend = set_backend(backend) 24 | X = backend.randn(5, 5) 25 | K = X @ X.T 26 | 27 | Kc = KernelCenterer(force_cpu=force_cpu).fit_transform(K) 28 | assert backend.is_in_gpu(Kc) != force_cpu 29 | 30 | 31 | @pytest.mark.parametrize('backend', GPU_BACKENDS) 32 | @pytest.mark.parametrize('force_cpu', [True, False]) 33 | def test_kernelizer(backend, force_cpu): 34 | backend = set_backend(backend) 35 | X = backend.randn(10, 5) 36 | 37 | K = Kernelizer(force_cpu=force_cpu).fit_transform(X) 38 | assert backend.is_in_gpu(K) != force_cpu 39 | 40 | 41 | @pytest.mark.parametrize('backend', GPU_BACKENDS) 42 | @pytest.mark.parametrize('force_cpu', [True, False]) 43 | def test_column_kernelizer(backend, force_cpu): 44 | backend = set_backend(backend) 45 | X = backend.randn(10, 5) 46 | 47 | Ks = ColumnKernelizer([ 48 | ("name", Kernelizer(), slice(0, 5)), 49 | ], force_cpu=force_cpu).fit_transform(X) 50 | 51 | assert backend.is_in_gpu(Ks) != force_cpu 52 | 53 | 54 | @pytest.mark.parametrize('backend', GPU_BACKENDS) 55 | @pytest.mark.parametrize('force_cpu', [True, False]) 56 | def test_make_column_kernelizer(backend, force_cpu): 57 | backend = set_backend(backend) 58 | X = backend.randn(10, 5) 59 | 60 | Ks = make_column_kernelizer((Kernelizer(), slice(0, 5)), 61 | force_cpu=force_cpu).fit_transform(X) 62 | assert backend.is_in_gpu(Ks) != force_cpu 63 | 64 | 65 | @pytest.mark.parametrize('backend', GPU_BACKENDS) 66 | @pytest.mark.parametrize('force_cpu', [True, False]) 67 | def test_kernel_ridge_cv(backend, force_cpu): 68 | backend = set_backend(backend) 69 | X = backend.randn(10, 5) 70 | Y = backend.randn(10, 2) 71 | 72 | best_alphas_ = KernelRidgeCV(force_cpu=force_cpu).fit(X, Y).best_alphas_ 73 | assert backend.is_in_gpu(best_alphas_) != force_cpu 74 | 75 | 76 | @pytest.mark.parametrize('backend', GPU_BACKENDS) 77 | @pytest.mark.parametrize('force_cpu', [True, False]) 78 | def test_multiple_kernel_ridge_cv(backend, force_cpu): 79 | backend = set_backend(backend) 80 | X = backend.randn(10, 5) 81 | Y = backend.randn(10, 2) 82 | 83 | deltas_ = MultipleKernelRidgeCV( 84 | kernels=["linear"], force_cpu=force_cpu, 85 | solver_params=dict(n_iter=2, progress_bar=False)).fit(X, Y).deltas_ 86 | assert backend.is_in_gpu(deltas_) != force_cpu 87 | 88 | 89 | @pytest.mark.parametrize('backend', GPU_BACKENDS) 90 | @pytest.mark.parametrize('force_cpu', [True, False]) 91 | def test_ridge_cv(backend, force_cpu): 92 | backend = set_backend(backend) 93 | X = backend.randn(10, 5) 94 | Y = backend.randn(10, 2) 95 | 96 | best_alphas_ = RidgeCV(force_cpu=force_cpu).fit(X, Y).best_alphas_ 97 | assert backend.is_in_gpu(best_alphas_) != force_cpu 98 | 99 | 100 | @pytest.mark.parametrize('backend', GPU_BACKENDS) 101 | @pytest.mark.parametrize('force_cpu', [True, False]) 102 | def test_group_ridge_cv(backend, force_cpu): 103 | backend = set_backend(backend) 104 | X = backend.randn(10, 5) 105 | Y = backend.randn(10, 2) 106 | 107 | deltas_ = GroupRidgeCV(groups=[0, 1, 0, 1, 1], 108 | force_cpu=force_cpu, solver_params=dict( 109 | n_iter=2, progress_bar=False)).fit(X, Y).deltas_ 110 | assert backend.is_in_gpu(deltas_) != force_cpu 111 | 112 | 113 | @pytest.mark.parametrize('backend', GPU_BACKENDS) 114 | @pytest.mark.parametrize('force_cpu', [True, False]) 115 | def test_sparse_group_lasso_cv(backend, force_cpu): 116 | backend = set_backend(backend) 117 | X = backend.randn(10, 5) 118 | Y = backend.randn(10, 2) 119 | 120 | best_l21_reg_ = SparseGroupLassoCV( 121 | groups=[0, 1, 0, 1, 1], force_cpu=force_cpu, 122 | solver_params=dict(progress_bar=False)).fit(X, Y).best_l21_reg_ 123 | assert backend.is_in_gpu(best_l21_reg_) != force_cpu 124 | -------------------------------------------------------------------------------- /himalaya/ridge/tests/test_random_search_ridge.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | import sklearn.linear_model 5 | import sklearn.model_selection 6 | 7 | from himalaya.backend import set_backend 8 | from himalaya.backend import ALL_BACKENDS 9 | from himalaya.utils import assert_array_almost_equal 10 | from himalaya.scoring import r2_score 11 | 12 | from himalaya.ridge import solve_group_ridge_random_search 13 | 14 | 15 | def _create_dataset(backend): 16 | n_featuress = (10, 20) 17 | n_samples = 80 18 | n_targets = 4 19 | n_gammas = 3 20 | 21 | Xs = [ 22 | backend.asarray(backend.randn(n_samples, n_features), backend.float64) 23 | for n_features in n_featuress 24 | ] 25 | 26 | ws = [ 27 | backend.asarray(backend.randn(n_features, n_targets), backend.float64) 28 | for n_features in n_featuress 29 | ] 30 | Ys = backend.stack([X @ w for X, w in zip(Xs, ws)]) 31 | Y = Ys.sum(0) 32 | Y += backend.asarray(backend.randn(*Y.shape), backend.float64) 33 | 34 | gammas = backend.asarray(backend.rand(n_gammas, len(Xs)), backend.float64) 35 | gammas /= gammas.sum(1)[:, None] 36 | 37 | return Xs, Y, gammas 38 | 39 | 40 | @pytest.mark.parametrize('n_targets_batch', [None, 3]) 41 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 42 | def test_solve_group_ridge_random_search_n_targets_batch( 43 | backend, n_targets_batch): 44 | _test_solve_group_ridge_random_search(backend=backend, 45 | n_targets_batch=n_targets_batch) 46 | 47 | 48 | @pytest.mark.parametrize('n_alphas_batch', [None, 2]) 49 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 50 | def test_solve_group_ridge_random_search_n_alphas_batch( 51 | backend, n_alphas_batch): 52 | _test_solve_group_ridge_random_search(backend=backend, 53 | n_alphas_batch=n_alphas_batch) 54 | 55 | 56 | def _test_solve_group_ridge_random_search(backend, n_targets_batch=None, 57 | n_alphas_batch=None): 58 | backend = set_backend(backend) 59 | 60 | Xs, Y, gammas = _create_dataset(backend) 61 | alphas = backend.asarray_like(backend.logspace(-3, 5, 9), Xs[0]) 62 | n_targets = Y.shape[1] 63 | cv = sklearn.model_selection.check_cv(10) 64 | 65 | ############ 66 | # run solver 67 | results = solve_group_ridge_random_search( 68 | Xs, Y, n_iter=gammas, alphas=alphas, score_func=r2_score, cv=cv, 69 | n_targets_batch=n_targets_batch, progress_bar=False, 70 | return_weights=True, n_alphas_batch=n_alphas_batch, 71 | diagonalize_method="svd") 72 | best_deltas, refit_weights, cv_scores = results 73 | 74 | ######################################### 75 | # compare with sklearn.linear_model.Ridge 76 | test_scores = [] 77 | for gamma in gammas: 78 | X = backend.concatenate( 79 | [x * backend.sqrt(g) for x, g in zip(Xs, gamma)], 1) 80 | for train, test in cv.split(X): 81 | for alpha in alphas: 82 | model = sklearn.linear_model.Ridge( 83 | alpha=backend.to_numpy(alpha), fit_intercept=False) 84 | model = model.fit(backend.to_numpy(X[train]), 85 | backend.to_numpy(Y[train])) 86 | predictions = backend.asarray_like( 87 | model.predict(backend.to_numpy(X[test])), ref=Y) 88 | test_scores.append(r2_score(Y[test], predictions)) 89 | 90 | test_scores = backend.stack(test_scores) 91 | test_scores = test_scores.reshape(len(gammas), cv.get_n_splits(), 92 | len(alphas), n_targets) 93 | test_scores_mean = backend.max(test_scores.mean(1), 1) 94 | assert_array_almost_equal(cv_scores, test_scores_mean, decimal=5) 95 | 96 | ###################### 97 | # test refited_weights 98 | for tt in range(n_targets): 99 | gamma = backend.exp(best_deltas[:, tt]) 100 | alpha = 1.0 101 | 102 | # compare primal weights with sklearn.linear_model.Ridge 103 | X = backend.concatenate( 104 | [X * backend.sqrt(g) for X, g in zip(Xs, gamma)], 1) 105 | model = sklearn.linear_model.Ridge(fit_intercept=False, 106 | alpha=backend.to_numpy(alpha)) 107 | w1 = model.fit(backend.to_numpy(X), backend.to_numpy(Y[:, tt])).coef_ 108 | w1 = np.split(w1, np.cumsum([X.shape[1] for X in Xs][:-1]), axis=0) 109 | w1 = [backend.asarray(w) for w in w1] 110 | w1_scaled = backend.concatenate( 111 | [w * backend.sqrt(g) for w, g, in zip(w1, gamma)]) 112 | assert_array_almost_equal(w1_scaled, refit_weights[:, tt], decimal=5) 113 | 114 | 115 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 116 | def test_different_number_of_samples(backend): 117 | backend = set_backend(backend) 118 | Xs, Y, gammas = _create_dataset(backend) 119 | 120 | with pytest.raises(ValueError, match="same number of samples"): 121 | solve_group_ridge_random_search(Xs[:4], Y[:3]) 122 | -------------------------------------------------------------------------------- /examples/multiple_kernel_ridge/plot_mkr_3_path.py: -------------------------------------------------------------------------------- 1 | """ 2 | Multiple-kernel ridge path between two kernels 3 | ============================================== 4 | This example demonstrates the path of all possible ratios of kernel weights 5 | between two kernels, in a multiple kernel ridge regression model. Over the path 6 | of ratios, the kernels are weighted by the kernel weights, then summed, and a 7 | joint model is fit on the obtained kernel. The explained variance on a test set 8 | is then computed, and decomposed over both kernels. 9 | """ 10 | from functools import partial 11 | 12 | import numpy as np 13 | import matplotlib.pyplot as plt 14 | 15 | from himalaya.backend import set_backend 16 | from himalaya.kernel_ridge import MultipleKernelRidgeCV 17 | from himalaya.kernel_ridge import Kernelizer 18 | from himalaya.kernel_ridge import ColumnKernelizer 19 | from himalaya.progress_bar import bar 20 | from himalaya.utils import generate_multikernel_dataset 21 | 22 | from sklearn.pipeline import make_pipeline 23 | from sklearn import set_config 24 | set_config(display='diagram') 25 | 26 | ############################################################################### 27 | # In this example, we use the ``cupy`` backend. 28 | 29 | backend = set_backend("cupy", on_error="warn") 30 | 31 | ############################################################################### 32 | # Generate a random dataset 33 | # ------------------------- 34 | # - X_train : array of shape (n_samples_train, n_features) 35 | # - X_test : array of shape (n_samples_test, n_features) 36 | # - Y_train : array of shape (n_samples_train, n_targets) 37 | # - Y_test : array of shape (n_samples_test, n_targets) 38 | 39 | n_targets = 50 40 | kernel_weights = np.tile(np.array([0.6, 0.4])[None], (n_targets, 1)) 41 | 42 | (X_train, X_test, Y_train, Y_test, 43 | kernel_weights, n_features_list) = generate_multikernel_dataset( 44 | n_kernels=2, n_targets=n_targets, n_samples_train=600, 45 | n_samples_test=300, random_state=42, noise=0.31, 46 | kernel_weights=kernel_weights) 47 | 48 | feature_names = [f"Feature space {ii}" for ii in range(len(n_features_list))] 49 | 50 | ############################################################################### 51 | # Create a MultipleKernelRidgeCV model, see plot_mkr_sklearn_api.py for more 52 | # details. 53 | 54 | # Find the start and end of each feature space X in Xs. 55 | start_and_end = np.concatenate([[0], np.cumsum(n_features_list)]) 56 | slices = [ 57 | slice(start, end) 58 | for start, end in zip(start_and_end[:-1], start_and_end[1:]) 59 | ] 60 | 61 | # Create a different ``Kernelizer`` for each feature space. 62 | kernelizers = [(name, Kernelizer(), slice_) 63 | for name, slice_ in zip(feature_names, slices)] 64 | column_kernelizer = ColumnKernelizer(kernelizers) 65 | 66 | # Create a MultipleKernelRidgeCV model. 67 | solver_params = dict(alphas=np.logspace(-5, 5, 41), progress_bar=False) 68 | model = MultipleKernelRidgeCV(kernels="precomputed", solver="random_search", 69 | solver_params=solver_params, 70 | random_state=42) 71 | pipe = make_pipeline(column_kernelizer, model) 72 | pipe 73 | 74 | ############################################################################### 75 | # Then, we manually perform a hyperparameter grid search for the kernel weights. 76 | 77 | # Make the score method use `split=True` by default. 78 | model.score = partial(model.score, split=True) 79 | 80 | # Define the hyperparameter grid search. 81 | ratios = np.logspace(-4, 4, 41) 82 | candidates = np.array([1 - ratios / (1 + ratios), ratios / (1 + ratios)]).T 83 | 84 | # Loop over hyperparameter candidates 85 | split_r2_scores = [] 86 | for candidate in bar(candidates, "Hyperparameter candidates"): 87 | # test one hyperparameter candidate at a time 88 | pipe[-1].solver_params["n_iter"] = candidate[None] 89 | pipe.fit(X_train, Y_train) 90 | 91 | # split the R2 score between both kernels 92 | scores = pipe.score(X_test, Y_test) 93 | split_r2_scores.append(backend.to_numpy(scores)) 94 | 95 | # average scores over targets for plotting 96 | split_r2_scores_avg = np.array(split_r2_scores).mean(axis=2) 97 | 98 | ############################################################################### 99 | # Plot the variance decomposition for all the hyperparameter ratios. 100 | # 101 | # For a ratio of 1e-3, feature space 0 is almost not used. For a ratio of 1e3, 102 | # feature space 1 is almost not used. The best ratio is here around 1, because 103 | # the feature spaces are used with similar scales in the simulated dataset. 104 | 105 | fig, ax = plt.subplots(figsize=(5, 4)) 106 | accumulator = np.zeros_like(ratios) 107 | for split in split_r2_scores_avg.T: 108 | ax.fill_between(ratios, accumulator, accumulator + split, alpha=0.7) 109 | accumulator += split 110 | 111 | ax.set(xscale='log') 112 | ax.set(xlabel=r"Ratio of kernel weight ($\gamma_A / \gamma_B$)") 113 | ax.set(ylabel=r"$R^2$ score (test set)") 114 | ax.set(title=r"$R^2$ score decomposition") 115 | ax.legend(feature_names, loc="upper left") 116 | ax.grid() 117 | fig.tight_layout() 118 | plt.show() 119 | -------------------------------------------------------------------------------- /examples/multiple_kernel_ridge/plot_mkr_5_refine_results.py: -------------------------------------------------------------------------------- 1 | """ 2 | Multiple-kernel ridge refining 3 | ============================== 4 | This example demonstrates how to solve multiple-kernel ridge regression with 5 | hyperparameter random search, then refine the results with hyperparameter 6 | gradient descent. 7 | """ 8 | import numpy as np 9 | 10 | from himalaya.backend import set_backend 11 | from himalaya.kernel_ridge import MultipleKernelRidgeCV 12 | from himalaya.kernel_ridge import Kernelizer 13 | from himalaya.kernel_ridge import ColumnKernelizer 14 | from himalaya.utils import generate_multikernel_dataset 15 | 16 | from sklearn.pipeline import make_pipeline 17 | from sklearn import set_config 18 | set_config(display='diagram') 19 | 20 | ############################################################################### 21 | # In this example, we use the ``cupy`` backend (GPU). 22 | 23 | backend = set_backend("cupy", on_error="warn") 24 | 25 | ############################################################################### 26 | # Generate a random dataset 27 | # ------------------------- 28 | # - X_train : array of shape (n_samples_train, n_features) 29 | # - X_test : array of shape (n_samples_test, n_features) 30 | # - Y_train : array of shape (n_samples_train, n_targets) 31 | # - Y_test : array of shape (n_samples_test, n_targets) 32 | 33 | (X_train, X_test, Y_train, Y_test, kernel_weights, 34 | n_features_list) = generate_multikernel_dataset(n_kernels=4, n_targets=50, 35 | n_samples_train=600, 36 | n_samples_test=300, 37 | random_state=42) 38 | 39 | feature_names = [f"Feature space {ii}" for ii in range(len(n_features_list))] 40 | 41 | ############################################################################### 42 | # Prepare the pipeline 43 | # -------------------- 44 | 45 | # Find the start and end of each feature space X in Xs 46 | start_and_end = np.concatenate([[0], np.cumsum(n_features_list)]) 47 | slices = [ 48 | slice(start, end) 49 | for start, end in zip(start_and_end[:-1], start_and_end[1:]) 50 | ] 51 | 52 | # Create a different ``Kernelizer`` for each feature space. 53 | kernelizers = [("space %d" % ii, Kernelizer(), slice_) 54 | for ii, slice_ in enumerate(slices)] 55 | column_kernelizer = ColumnKernelizer(kernelizers) 56 | 57 | ############################################################################### 58 | # Define the random-search model 59 | # ------------------------------ 60 | # We use very few iteration on purpose, to make the random search suboptimal, 61 | # and refine it with hyperparameter gradient descent. 62 | 63 | solver_params = dict(n_iter=5, alphas=np.logspace(-10, 10, 41)) 64 | 65 | model_1 = MultipleKernelRidgeCV(kernels="precomputed", solver="random_search", 66 | solver_params=solver_params, random_state=42) 67 | pipe_1 = make_pipeline(column_kernelizer, model_1) 68 | 69 | # Fit the model on all targets 70 | pipe_1.fit(X_train, Y_train) 71 | 72 | ############################################################################### 73 | # Define the gradient-descent model 74 | # --------------------------------- 75 | 76 | solver_params = dict(max_iter=10, hyper_gradient_method="direct", 77 | max_iter_inner_hyper=10, 78 | initial_deltas="here_will_go_the_previous_deltas") 79 | 80 | model_2 = MultipleKernelRidgeCV(kernels="precomputed", solver="hyper_gradient", 81 | solver_params=solver_params) 82 | pipe_2 = make_pipeline(column_kernelizer, model_2) 83 | 84 | ############################################################################### 85 | # Use the random-search to initialize the gradient-descent 86 | # -------------------------------------------------------- 87 | 88 | # We might want to refine only the best predicting targets, since the 89 | # hyperparameter gradient descent is less efficient over many targets. 90 | top = 60 # top 60% 91 | best_cv_scores = backend.to_numpy(pipe_1[-1].cv_scores_.max(0)) 92 | mask = best_cv_scores > np.percentile(best_cv_scores, 100 - top) 93 | 94 | pipe_2[-1].solver_params['initial_deltas'] = pipe_1[-1].deltas_[:, mask] 95 | pipe_2.fit(X_train, Y_train[:, mask]) 96 | 97 | ############################################################################### 98 | # Compute predictions on a test set 99 | # --------------------------------- 100 | import matplotlib.pyplot as plt 101 | 102 | # use the first model for all targets 103 | test_scores_1 = pipe_1.score(X_test, Y_test) 104 | 105 | # use the second model for the refined targets 106 | test_scores_2 = backend.copy(test_scores_1) 107 | test_scores_2[mask] = pipe_2.score(X_test, Y_test[:, mask]) 108 | 109 | test_scores_1 = backend.to_numpy(test_scores_1) 110 | test_scores_2 = backend.to_numpy(test_scores_2) 111 | plt.figure(figsize=(4, 4)) 112 | plt.scatter(test_scores_1, test_scores_2, alpha=0.3) 113 | plt.xlim(0, 1) 114 | plt.plot(plt.xlim(), plt.xlim(), color='k', lw=1) 115 | plt.xlabel(r"Base model") 116 | plt.ylabel(r"Refined model") 117 | plt.title("$R^2$ generalization score") 118 | plt.grid() 119 | plt.tight_layout() 120 | plt.show() 121 | -------------------------------------------------------------------------------- /himalaya/ridge/tests/test_solvers_ridge.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | import sklearn.linear_model 5 | import scipy.linalg 6 | 7 | from himalaya.backend import set_backend 8 | from himalaya.backend import ALL_BACKENDS 9 | from himalaya.utils import assert_array_almost_equal 10 | 11 | from himalaya.ridge import RIDGE_SOLVERS 12 | 13 | 14 | def _create_dataset(backend, many_targets=False): 15 | if many_targets: 16 | n_samples, n_features, n_targets = 10, 5, 20 17 | else: 18 | n_samples, n_features, n_targets = 30, 10, 3 19 | 20 | X = backend.asarray(backend.randn(n_samples, n_features), backend.float64) 21 | Y = backend.asarray(backend.randn(n_samples, n_targets), backend.float64) 22 | weights = backend.asarray(backend.randn(n_features, n_targets), 23 | backend.float64) 24 | 25 | return X, Y, weights 26 | 27 | 28 | @pytest.mark.parametrize('many_targets', [False, True]) 29 | @pytest.mark.parametrize('solver_name', RIDGE_SOLVERS) 30 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 31 | def test_solve_kernel_ridge(solver_name, backend, many_targets): 32 | backend = set_backend(backend) 33 | 34 | X, Y, weights = _create_dataset(backend, many_targets=many_targets) 35 | alphas = backend.asarray_like(backend.logspace(-2, 5, 7), Y) 36 | 37 | solver = RIDGE_SOLVERS[solver_name] 38 | XTX = X.T @ X 39 | XTY = X.T @ Y 40 | 41 | for alpha in alphas: 42 | alpha = backend.full_like(Y, fill_value=alpha, shape=Y.shape[1]) 43 | b2 = solver(X, Y, alpha=alpha, fit_intercept=False) 44 | b2 = backend.to_gpu(b2) 45 | assert b2.shape == (X.shape[1], Y.shape[1]) 46 | 47 | n_features, n_targets = weights.shape 48 | for ii in range(n_targets): 49 | # compare primal coefficients with scipy.linalg.solve 50 | reg = backend.asarray_like(np.eye(n_features), Y) * alpha[ii] 51 | b1 = scipy.linalg.solve(backend.to_numpy(XTX + reg), 52 | backend.to_numpy(XTY[:, ii])) 53 | assert_array_almost_equal(b1, b2[:, ii], decimal=6) 54 | 55 | # compare predictions with sklearn.linear_model.Ridge 56 | prediction = backend.matmul(X, b2[:, ii]) 57 | model = sklearn.linear_model.Ridge( 58 | alpha=backend.to_numpy(alpha[ii]), max_iter=1000, tol=1e-6, 59 | fit_intercept=False) 60 | model.fit(backend.to_numpy(X), backend.to_numpy(Y[:, ii])) 61 | prediction_sklearn = model.predict(backend.to_numpy(X)) 62 | assert_array_almost_equal(prediction, prediction_sklearn, 63 | decimal=6) 64 | 65 | assert_array_almost_equal(model.coef_, b2[:, ii], decimal=5) 66 | 67 | 68 | @pytest.mark.parametrize('solver_name', RIDGE_SOLVERS) 69 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 70 | def test_solve_kernel_ridge_intercept(solver_name, backend): 71 | backend = set_backend(backend) 72 | 73 | X, Y, weights = _create_dataset(backend) 74 | Y += 100 75 | X += 10 76 | alphas = backend.asarray_like(backend.logspace(-2, 5, 7), Y) 77 | 78 | solver = RIDGE_SOLVERS[solver_name] 79 | 80 | for alpha in alphas: 81 | alpha = backend.full_like(Y, fill_value=alpha, shape=Y.shape[1]) 82 | b2, i2 = solver(X, Y, alpha=alpha, fit_intercept=True) 83 | assert b2.shape == (X.shape[1], Y.shape[1]) 84 | assert i2.shape == (Y.shape[1], ) 85 | b2 = backend.to_gpu(b2) 86 | i2 = backend.to_gpu(i2) 87 | 88 | n_features, n_targets = weights.shape 89 | for ii in range(n_targets): 90 | 91 | # compare predictions with sklearn.linear_model.Ridge 92 | prediction = backend.matmul(X, b2[:, ii]) + i2[ii] 93 | model = sklearn.linear_model.Ridge( 94 | alpha=backend.to_numpy(alpha[ii]), max_iter=1000, tol=1e-6, 95 | fit_intercept=True) 96 | model.fit(backend.to_numpy(X), backend.to_numpy(Y[:, ii])) 97 | prediction_sklearn = model.predict(backend.to_numpy(X)) 98 | assert_array_almost_equal(prediction, prediction_sklearn, 99 | decimal=5) 100 | 101 | assert_array_almost_equal(model.coef_, b2[:, ii], decimal=5) 102 | 103 | 104 | @pytest.mark.parametrize('solver_name', RIDGE_SOLVERS) 105 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 106 | def test_warning_kernel_ridge_ridge(solver_name, backend): 107 | backend = set_backend(backend) 108 | X, Y, weights = _create_dataset(backend) 109 | solver = RIDGE_SOLVERS[solver_name] 110 | 111 | with pytest.warns(UserWarning, 112 | match="ridge is slower than solving kernel"): 113 | solver(X[:4], Y[:4]) 114 | 115 | 116 | @pytest.mark.parametrize('solver_name', RIDGE_SOLVERS) 117 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 118 | def test_different_number_of_samples(solver_name, backend): 119 | backend = set_backend(backend) 120 | X, Y, weights = _create_dataset(backend) 121 | solver = RIDGE_SOLVERS[solver_name] 122 | 123 | with pytest.raises(ValueError, match="same number of samples"): 124 | solver(X[:4], Y[:3]) 125 | -------------------------------------------------------------------------------- /doc/api.rst: -------------------------------------------------------------------------------- 1 | .. _api_documentation: 2 | 3 | === 4 | API 5 | === 6 | 7 | List of functions and classes in Himalaya. 8 | 9 | Backend 10 | ======= 11 | 12 | Public functions in ``himalaya.backend``. 13 | 14 | .. currentmodule:: himalaya.backend 15 | 16 | .. autosummary:: 17 | :toctree: _generated/ 18 | :nosignatures: 19 | :template: function.rst 20 | 21 | set_backend 22 | get_backend 23 | ALL_BACKENDS 24 | 25 | | 26 | 27 | _____ 28 | 29 | Kernel ridge 30 | ============ 31 | 32 | Public functions and classes in ``himalaya.kernel_ridge``. 33 | 34 | .. currentmodule:: himalaya.kernel_ridge 35 | 36 | Estimators 37 | ---------- 38 | Estimators compatible with the ``scikit-learn`` API. 39 | 40 | .. autosummary:: 41 | :toctree: _generated/ 42 | :nosignatures: 43 | 44 | :template: class.rst 45 | KernelRidge 46 | KernelRidgeCV 47 | WeightedKernelRidge 48 | MultipleKernelRidgeCV 49 | 50 | Kernelizer 51 | ColumnKernelizer 52 | :template: function.rst 53 | make_column_kernelizer 54 | 55 | 56 | 57 | Solver functions 58 | ---------------- 59 | .. autosummary:: 60 | :toctree: _generated/ 61 | :nosignatures: 62 | :template: function.rst 63 | 64 | KERNEL_RIDGE_SOLVERS 65 | solve_kernel_ridge_cv_eigenvalues 66 | solve_kernel_ridge_eigenvalues 67 | solve_kernel_ridge_gradient_descent 68 | solve_kernel_ridge_conjugate_gradient 69 | 70 | WEIGHTED_KERNEL_RIDGE_SOLVERS 71 | solve_weighted_kernel_ridge_gradient_descent 72 | solve_weighted_kernel_ridge_conjugate_gradient 73 | solve_weighted_kernel_ridge_neumann_series 74 | 75 | MULTIPLE_KERNEL_RIDGE_SOLVERS 76 | solve_multiple_kernel_ridge_hyper_gradient 77 | solve_multiple_kernel_ridge_random_search 78 | 79 | 80 | 81 | Helpers 82 | ------- 83 | .. autosummary:: 84 | :toctree: _generated/ 85 | :nosignatures: 86 | :template: function.rst 87 | 88 | generate_dirichlet_samples 89 | predict_weighted_kernel_ridge 90 | predict_and_score_weighted_kernel_ridge 91 | primal_weights_kernel_ridge 92 | primal_weights_weighted_kernel_ridge 93 | 94 | 95 | 96 | Kernels 97 | ------- 98 | .. autosummary:: 99 | :toctree: _generated/ 100 | :nosignatures: 101 | :template: function.rst 102 | 103 | PAIRWISE_KERNEL_FUNCTIONS 104 | linear_kernel 105 | polynomial_kernel 106 | rbf_kernel 107 | sigmoid_kernel 108 | cosine_similarity_kernel 109 | 110 | :template: class.rst 111 | KernelCenterer 112 | 113 | | 114 | 115 | _____ 116 | 117 | 118 | Lasso 119 | ===== 120 | 121 | Public functions and classes in ``himalaya.lasso``. 122 | 123 | .. currentmodule:: himalaya.lasso 124 | 125 | Estimators 126 | ---------- 127 | Estimators compatible with the ``scikit-learn`` API. 128 | 129 | .. autosummary:: 130 | :toctree: _generated/ 131 | :nosignatures: 132 | 133 | :template: class.rst 134 | SparseGroupLassoCV 135 | 136 | Solver functions 137 | ---------------- 138 | .. autosummary:: 139 | :toctree: _generated/ 140 | :nosignatures: 141 | :template: function.rst 142 | 143 | solve_sparse_group_lasso 144 | solve_sparse_group_lasso_cv 145 | 146 | 147 | | 148 | 149 | _____ 150 | 151 | Ridge 152 | ===== 153 | 154 | Public functions and classes in ``himalaya.ridge``. 155 | 156 | .. currentmodule:: himalaya.ridge 157 | 158 | Estimators 159 | ---------- 160 | Estimators compatible with the ``scikit-learn`` API. 161 | 162 | .. autosummary:: 163 | :toctree: _generated/ 164 | :nosignatures: 165 | 166 | :template: class.rst 167 | Ridge 168 | RidgeCV 169 | GroupRidgeCV 170 | BandedRidgeCV 171 | 172 | ColumnTransformerNoStack 173 | :template: function.rst 174 | make_column_transformer_no_stack 175 | 176 | Solver functions 177 | ---------------- 178 | .. autosummary:: 179 | :toctree: _generated/ 180 | :nosignatures: 181 | :template: function.rst 182 | 183 | RIDGE_SOLVERS 184 | solve_ridge_svd 185 | solve_ridge_cv_svd 186 | GROUP_RIDGE_SOLVERS 187 | BANDED_RIDGE_SOLVERS 188 | solve_group_ridge_random_search 189 | solve_banded_ridge_random_search 190 | 191 | 192 | | 193 | 194 | _____ 195 | 196 | 197 | Other modules 198 | ============= 199 | 200 | Public functions and classes in other minor modules. 201 | 202 | .. currentmodule:: himalaya 203 | 204 | Progress bar 205 | ------------ 206 | .. autosummary:: 207 | :toctree: _generated/ 208 | :nosignatures: 209 | 210 | :template: class.rst 211 | progress_bar.ProgressBar 212 | :template: function.rst 213 | progress_bar.bar 214 | 215 | 216 | Scoring functions 217 | ----------------- 218 | .. autosummary:: 219 | :toctree: _generated/ 220 | :nosignatures: 221 | :template: function.rst 222 | 223 | scoring.l2_neg_loss 224 | scoring.r2_score 225 | scoring.correlation_score 226 | scoring.r2_score_split 227 | scoring.r2_score_split_svd 228 | scoring.correlation_score_split 229 | 230 | 231 | Utils 232 | ----- 233 | .. autosummary:: 234 | :toctree: _generated/ 235 | :nosignatures: 236 | :template: function.rst 237 | 238 | utils.compute_lipschitz_constants 239 | utils.generate_multikernel_dataset 240 | 241 | 242 | Visualization 243 | ------------- 244 | .. autosummary:: 245 | :toctree: _generated/ 246 | :nosignatures: 247 | :template: function.rst 248 | 249 | viz.plot_alphas_diagnostic 250 | -------------------------------------------------------------------------------- /examples/multiple_kernel_ridge/plot_mkr_4_refit_from_deltas.py: -------------------------------------------------------------------------------- 1 | """ 2 | Multiple-kernel ridge fit from fixed hyper-parameters 3 | ===================================================== 4 | This example demonstrates how to fit a multiple-kernel ridge model with fixed 5 | hyper-parameters. Here are three different usecases: 6 | 7 | - If the kernel weights hyper-parameters are known and identical across 8 | targets, the kernels can be scaled and summed, and a simple KernelRidgeCV can 9 | be used to fit the model. 10 | - If the kernel weights hyper-parameters are unknown and different across 11 | targets, a MultipleKernelRidgeCV can be use to search the best 12 | hyper-parameters per target. 13 | - If the kernel weights hyper-parameters are known and different across 14 | targets, a WeightedKernelRidge model can be used to fit the ridge models on 15 | each target independently. 16 | 17 | This method can be used for example in the following workflow: 18 | 19 | - fit a MultipleKernelRidgeCV to learn the kernel weights hyper-parameter, 20 | - save the hyper-parameters, but not the ridge weights to save disk space, 21 | - fit a WeightedKernelRidge from the saved hyper-parameters, for further use of 22 | the model (prediction, interpretation, etc.). 23 | """ 24 | import numpy as np 25 | 26 | from himalaya.backend import set_backend 27 | from himalaya.kernel_ridge import WeightedKernelRidge 28 | from himalaya.kernel_ridge import Kernelizer 29 | from himalaya.kernel_ridge import ColumnKernelizer 30 | from himalaya.utils import generate_multikernel_dataset 31 | 32 | from sklearn.pipeline import make_pipeline 33 | from sklearn import set_config 34 | set_config(display='diagram') 35 | 36 | ############################################################################### 37 | # In this example, we use the ``torch_cuda`` backend (GPU). 38 | 39 | backend = set_backend("torch_cuda", on_error="warn") 40 | 41 | ############################################################################### 42 | # Generate a random dataset 43 | # ------------------------- 44 | # - X_train : array of shape (n_samples_train, n_features) 45 | # - X_test : array of shape (n_samples_test, n_features) 46 | # - Y_train : array of shape (n_samples_train, n_targets) 47 | # - Y_test : array of shape (n_samples_test, n_targets) 48 | 49 | (X_train, X_test, Y_train, Y_test, kernel_weights, 50 | n_features_list) = generate_multikernel_dataset(n_kernels=4, n_targets=500, 51 | n_samples_train=1000, 52 | n_samples_test=400, 53 | random_state=42) 54 | 55 | ############################################################################### 56 | # Prepare the pipeline 57 | # -------------------- 58 | 59 | # Find the start and end of each feature space X in Xs 60 | start_and_end = np.concatenate([[0], np.cumsum(n_features_list)]) 61 | slices = [ 62 | slice(start, end) 63 | for start, end in zip(start_and_end[:-1], start_and_end[1:]) 64 | ] 65 | 66 | # Create a different ``Kernelizer`` for each feature space. 67 | kernelizers = [("space %d" % ii, Kernelizer(), slice_) 68 | for ii, slice_ in enumerate(slices)] 69 | column_kernelizer = ColumnKernelizer(kernelizers) 70 | 71 | ############################################################################### 72 | # Define the weighted kernel ridge model 73 | # -------------------------------------- 74 | # Here we use the ground truth kernel weights for each target (deltas), but it 75 | # can be typically used with deltas obtained from a MultipleKernelRidgeCV fit. 76 | 77 | deltas = backend.log(backend.asarray(kernel_weights.T)) 78 | 79 | model_1 = WeightedKernelRidge(alpha=1, deltas=deltas, kernels="precomputed") 80 | pipe_1 = make_pipeline(column_kernelizer, model_1) 81 | 82 | # Fit the model on all targets 83 | pipe_1.fit(X_train, Y_train) 84 | 85 | ############################################################################### 86 | # compute test score 87 | test_scores_1 = pipe_1.score(X_test, Y_test) 88 | test_scores_1 = backend.to_numpy(test_scores_1) 89 | 90 | ############################################################################### 91 | # We can compare this model to a baseline model where the kernel weights are 92 | # all equal and not learnt. 93 | 94 | model_2 = WeightedKernelRidge(alpha=1, deltas="zeros", kernels="precomputed") 95 | pipe_2 = make_pipeline(column_kernelizer, model_2) 96 | 97 | # Fit the model on all targets 98 | pipe_2.fit(X_train, Y_train) 99 | 100 | ############################################################################### 101 | # compute test score 102 | test_scores_2 = pipe_2.score(X_test, Y_test) 103 | test_scores_2 = backend.to_numpy(test_scores_2) 104 | 105 | ############################################################################### 106 | # Compare the predictions on a test set 107 | # ------------------------------------- 108 | import matplotlib.pyplot as plt 109 | 110 | plt.figure(figsize=(4, 3)) 111 | plt.hist(test_scores_2, np.linspace(0, 1, 30), alpha=0.7, 112 | label="Default deltas") 113 | plt.hist(test_scores_1, np.linspace(0, 1, 30), alpha=0.7, 114 | label="Ground truth deltas") 115 | plt.xlabel("$R^2$ generalization score") 116 | plt.ylabel("Number of voxels") 117 | plt.legend() 118 | plt.tight_layout() 119 | plt.show() 120 | -------------------------------------------------------------------------------- /doc/changelog.rst: -------------------------------------------------------------------------------- 1 | Changelog 2 | ========= 3 | 4 | Development Version 5 | ------------------- 6 | 7 | - 8 | 9 | Version 0.4.5 10 | ------------- 11 | (*June 2024*) 12 | 13 | - FIX update `~himalaya.kernel_ridge.ColumnKernelizer` for scikit-learn versions >= 1.5 14 | 15 | Version 0.4.4 16 | ------------- 17 | (*March 2024*) 18 | 19 | - FIX cupy boolean dtype 20 | 21 | Version 0.4.3 22 | ------------- 23 | (*March 2024*) 24 | 25 | - FIX update `~himalaya.kernel_ridge.ColumnKernelizer` for scikit-learn versions > 1.4 26 | 27 | Version 0.4.2 28 | ------------- 29 | (*February 2023*) 30 | 31 | - ENH add better error message when ``torch.linalg.eigh`` fails. 32 | - ENH add :func:`~himalaya.kernel_ridge.solve_kernel_ridge_cv_svd` solver. It 33 | can be used with:class:`~himalaya.kernel_ridge.KernelRidgeCV` 34 | ``(solver="svd")``. 35 | 36 | Version 0.4.1 37 | ------------- 38 | (*February 2023*) 39 | 40 | - FIX avoid error in :class:`~himalaya.kernel_ridge.MultipleKernelRidgeCV` 41 | with ``solver_params(return_alphas=True)``. 42 | - ENH add ``fit_intercept`` in 43 | :class:`~himalaya.kernel_ridge.MultipleKernelRidgeCV`. 44 | - FIX torch 1.13.1 requires tensor masks to be on the same device as tensors. 45 | 46 | Version 0.4.0 47 | ------------- 48 | (*June 2022*) 49 | 50 | - DOC explain how to implement a winner-take-all model. 51 | - FIX comply with most recent scikit-learn's check_estimator. 52 | - FIX avoid an indexing error in the hypergradient solver, when early stopping 53 | after different numbers of iterations for different batches. 54 | 55 | Version 0.3.6 56 | ------------- 57 | (*April 2022*) 58 | 59 | - DOC improve documentation website, add estimator flowchart. 60 | - TST improve test robustness. 61 | - ENH add batching over targets in 62 | :func:`~himalaya.kernel_ridge.predict_weighted_kernel_ridge`. 63 | - ENH add ``solver="auto"`` in :class:`~himalaya.kernel_ridge.KernelRidge`, 64 | which switches solver based on the presence of a separate alpha per target. 65 | 66 | Version 0.3.5 67 | ------------- 68 | (*February 2022*) 69 | 70 | - MNT speed up examples on CPU, to build the doc faster on github actions. 71 | - ENH add batching over targets in :class:`~himalaya.ridge.Ridge`, 72 | :class:`~himalaya.kernel_ridge.KernelRidge`, and 73 | :class:`~himalaya.kernel_ridge.WeightedKernelRidge`. 74 | - ENH add warnings to guide the user between using 75 | :class:`~himalaya.ridge.Ridge` or 76 | :class:`~himalaya.kernel_ridge.KernelRidge`. 77 | - ENH add user-friendly errors when the number of samples is inconsistent. 78 | - ENH raise ValueError if the indices in cross-validation exceed number of 79 | samples. 80 | 81 | Version 0.3.4 82 | ------------- 83 | (*November 2021*) 84 | 85 | - FIX :class:`~himalaya.ridge.Ridge` with ``n_samples < n_targets``. 86 | - FIX update of alphas when ``local_alpha=False`` in 87 | :class:`~himalaya.kernel_ridge.MultipleKernelRidgeCV`. 88 | - EXA refactor examples with new 89 | :func:`~himalaya.utils.generate_multikernel_dataset` function. 90 | - MNT add github actions for running tests, building and publishing the doc, 91 | and publishing to PyPI. 92 | 93 | Version 0.3.3 94 | ------------- 95 | (*November 2021*) 96 | 97 | - FIX :class:`~himalaya.kernel_ridge.KernelRidge` with 98 | ``n_samples < n_targets``. 99 | - FIX random search with single alpha in 100 | :class:`~himalaya.kernel_ridge.MultipleKernelRidgeCV`. 101 | 102 | Version 0.3.2 103 | ------------- 104 | (*November 2021*) 105 | 106 | - ENH add :func:`~himalaya.scoring.r2_score_split_svd` scoring function. 107 | - ENH add :func:`~himalaya.scoring.correlation_score_split` scoring function. 108 | - ENH add ``split`` parameter to the ``score`` method in 109 | :class:`~himalaya.kernel_ridge.WeightedKernelRidge`, 110 | :class:`~himalaya.kernel_ridge.MultipleKernelRidgeCV`, and 111 | :class:`~himalaya.ridge.GroupRidgeCV`. 112 | - ENH add ``force_cpu`` parameter in all estimators. 113 | - FIX remove deprecation warnings for cupy v9. 114 | - DOC mention that pytorch 1.9+ is preferred. 115 | 116 | Version 0.3.1 117 | ------------- 118 | (*September 2021*) 119 | 120 | - MNT Rename :class:`~himalaya.ridge.BandedRidgeCV` into 121 | :class:`~himalaya.ridge.GroupRidgeCV` (both names are available). 122 | - ENH improve robustness to noise in the cross-validation scores. 123 | - ENH start the random search with equal weights in 124 | :class:`~himalaya.kernel_ridge.MultipleKernelRidgeCV` 125 | and :class:`~himalaya.ridge.GroupRidgeCV`. 126 | - FIX remove deprecation warnings with pytorch 1.8. 127 | - TST improve test coverage. 128 | 129 | Version 0.3.0 130 | ------------- 131 | (*April 2021*) 132 | 133 | - ENH add ``fit_intercept`` parameter in :class:`~himalaya.ridge.Ridge`, 134 | :class:`~himalaya.ridge.RidgeCV`, and :class:`~himalaya.ridge.BandedRidgeCV`. 135 | - ENH add ``fit_intercept`` parameter in 136 | :class:`~himalaya.kernel_ridge.KernelRidge`, 137 | :class:`~himalaya.kernel_ridge.KernelRidgeCV`, 138 | :func:`~himalaya.kernel_ridge.solve_multiple_kernel_ridge_gradient_descent`, 139 | and :func:`~himalaya.kernel_ridge.solve_multiple_kernel_ridge_random_search`. 140 | - ENH add :class:`~himalaya.kernel_ridge.KernelCenterer`. 141 | - ENH allow change of backend midscript. 142 | - ENH Add option to return selected alpha values in 143 | :func:`~himalaya.kernel_ridge.solve_multiple_kernel_ridge_random_search`. 144 | 145 | Version 0.2.0 146 | ------------- 147 | (*December 2020*) 148 | 149 | Version 0.1.0 150 | ------------- 151 | (*March 2020*) 152 | -------------------------------------------------------------------------------- /himalaya/ridge/_solvers.py: -------------------------------------------------------------------------------- 1 | import numbers 2 | import warnings 3 | 4 | from ..backend import get_backend 5 | from ..utils import _batch_or_skip 6 | 7 | 8 | def solve_ridge_svd(X, Y, alpha=1., method="svd", fit_intercept=False, 9 | negative_eigenvalues="zeros", n_targets_batch=None, 10 | warn=True): 11 | """Solve ridge regression using SVD decomposition. 12 | 13 | Solve the ridge regression:: 14 | 15 | b* = argmin_B ||X @ b - Y||^2 + alpha ||b||^2 16 | 17 | Parameters 18 | ---------- 19 | X : array of shape (n_samples, n_features) 20 | Input features. 21 | Y : array of shape (n_samples, n_targets) 22 | Target data. 23 | alpha : float, or array of shape (n_targets, ) 24 | Regularization parameter. 25 | method : str in {"svd"} 26 | Method used to diagonalize the input feature matrix. 27 | fit_intercept : boolean 28 | Whether to fit an intercept. 29 | If False, X and Y must be zero-mean over samples. 30 | negative_eigenvalues : str in {"nan", "error", "zeros"} 31 | If the decomposition leads to negative eigenvalues (wrongly emerging 32 | from float32 errors): 33 | - "error" raises an error. 34 | - "zeros" replaces them with zeros. 35 | - "nan" returns nans if the regularization does not compensate 36 | twice the smallest negative value, else it ignores the problem. 37 | n_targets_batch : int or None 38 | Size of the batch for over targets during cross-validation. 39 | Used for memory reasons. If None, uses all n_targets at once. 40 | warn : bool 41 | If True, warn if the number of samples is smaller than the number of 42 | features. 43 | 44 | Returns 45 | ------- 46 | weights : array of shape (n_features, n_targets) 47 | Ridge coefficients. 48 | intercept : array of shape (n_targets,) 49 | Intercept. Only returned when fit_intercept is True. 50 | """ 51 | backend = get_backend() 52 | if isinstance(alpha, numbers.Number) or alpha.ndim == 0: 53 | alpha = backend.ones_like(Y, shape=(1, )) * alpha 54 | 55 | X, Y, alpha = backend.check_arrays(X, Y, alpha) 56 | 57 | n_samples, n_features = X.shape 58 | if n_samples < n_features and warn: 59 | warnings.warn( 60 | "Solving ridge is slower than solving kernel ridge when n_samples " 61 | f"< n_features (here {n_samples} < {n_features}). " 62 | "Using a linear kernel in himalaya.kernel_ridge.KernelRidge or " 63 | "himalaya.kernel_ridge.solve_kernel_ridge_eigenvalues would be " 64 | "faster. Use warn=False to silence this warning.", UserWarning) 65 | if X.shape[0] != Y.shape[0]: 66 | raise ValueError("X and Y must have the same number of samples.") 67 | 68 | X_offset, Y_offset = None, None 69 | if fit_intercept: 70 | X_offset = X.mean(0) 71 | Y_offset = Y.mean(0) 72 | X = X - X_offset 73 | Y = Y - Y_offset 74 | 75 | if method == "svd": 76 | # SVD: X = U @ np.diag(eigenvalues) @ Vt 77 | U, eigenvalues, Vt = backend.svd(X, full_matrices=False) 78 | else: 79 | raise ValueError("Unknown method=%r." % (method, )) 80 | 81 | inverse = eigenvalues[:, None] / (alpha[None] + eigenvalues[:, None] ** 2) 82 | 83 | # negative eigenvalues can emerge from incorrect kernels, or from float32 84 | if eigenvalues[0] < 0: 85 | if negative_eigenvalues == "nan": 86 | if alpha < -eigenvalues[0] * 2: 87 | return backend.ones_like(Y) * backend.asarray( 88 | backend.nan, dtype=Y.dtype) 89 | else: 90 | pass 91 | 92 | elif negative_eigenvalues == "zeros": 93 | eigenvalues[eigenvalues < 0] = 0 94 | 95 | elif negative_eigenvalues == "error": 96 | raise RuntimeError( 97 | "Negative eigenvalues. Make sure the kernel is positive " 98 | "semi-definite, increase the regularization alpha, or use" 99 | "another solver.") 100 | else: 101 | raise ValueError("Unknown negative_eigenvalues=%r." % 102 | (negative_eigenvalues, )) 103 | 104 | n_samples, n_features = X.shape 105 | n_samples, n_targets = Y.shape 106 | weights = backend.zeros_like(X, shape=(n_features, n_targets), 107 | device="cpu") 108 | if n_targets_batch is None: 109 | n_targets_batch = n_targets 110 | 111 | for start in range(0, n_targets, n_targets_batch): 112 | batch = slice(start, start + n_targets_batch) 113 | 114 | iUT = _batch_or_skip(inverse, batch, 1)[:, None, :] * U.T[:, :, None] 115 | iUT = backend.transpose(iUT, (2, 0, 1)) 116 | # iUT.shape = (1 or n_targets_batch, n_samples, n_samples) 117 | 118 | if Y.shape[0] < Y.shape[1]: 119 | weights_batch = ((Vt.T @ iUT) @ Y.T[batch, :, None])[:, :, 0].T 120 | else: 121 | weights_batch = Vt.T @ (iUT @ Y.T[batch, :, None])[:, :, 0].T 122 | weights[:, batch] = backend.to_cpu(weights_batch) 123 | 124 | if fit_intercept: 125 | intercept = backend.to_cpu( 126 | Y_offset) - backend.to_cpu(X_offset) @ weights 127 | return weights, intercept 128 | else: 129 | return weights 130 | 131 | 132 | #: Dictionary with all ridge solvers 133 | RIDGE_SOLVERS = {"svd": solve_ridge_svd} 134 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Himalaya: Multiple-target linear models 2 | ======================================= 3 | 4 | |Github| |Python| |License| |Build| |Codecov| |Downloads| 5 | 6 | ``Himalaya`` [1]_ implements machine learning linear models in Python, focusing 7 | on computational efficiency for large numbers of targets. 8 | 9 | Use ``himalaya`` if you need a library that: 10 | 11 | - estimates linear models on large numbers of targets, 12 | - runs on CPU and GPU hardware, 13 | - provides estimators compatible with ``scikit-learn``'s API. 14 | 15 | ``Himalaya`` is stable (with particular care for backward compatibility) and 16 | open for public use (give it a star!). 17 | 18 | Example 19 | ======= 20 | 21 | .. code-block:: python 22 | 23 | import numpy as np 24 | n_samples, n_features, n_targets = 10, 5, 4 25 | np.random.seed(0) 26 | X = np.random.randn(n_samples, n_features) 27 | Y = np.random.randn(n_samples, n_targets) 28 | 29 | from himalaya.ridge import RidgeCV 30 | model = RidgeCV(alphas=[1, 10, 100]) 31 | model.fit(X, Y) 32 | print(model.best_alphas_) # [ 10. 100. 10. 100.] 33 | 34 | 35 | - The model ``RidgeCV`` uses the same API as ``scikit-learn`` 36 | estimators, with methods such as ``fit``, ``predict``, ``score``, etc. 37 | - The model is able to efficiently fit a large number of targets (routinely 38 | used with 100k targets). 39 | - The model selects the best hyperparameter ``alpha`` for each target 40 | independently. 41 | 42 | More examples 43 | ------------- 44 | 45 | Check more examples of use of ``himalaya`` in the `gallery of examples 46 | `_. 47 | 48 | Tutorials using ``himalaya`` for fMRI 49 | ------------------------------------- 50 | 51 | ``Himalaya`` was designed primarily for functional magnetic resonance imaging 52 | (fMRI) encoding models. In depth tutorials about using ``himalaya`` for fMRI 53 | encoding models can be found at `gallantlab/voxelwise_tutorials 54 | `_. 55 | 56 | Models 57 | ====== 58 | 59 | ``Himalaya`` implements the following models: 60 | 61 | - Ridge, RidgeCV 62 | - KernelRidge, KernelRidgeCV 63 | - GroupRidgeCV, MultipleKernelRidgeCV, WeightedKernelRidge 64 | - SparseGroupLassoCV 65 | 66 | 67 | See the `model descriptions 68 | `_ in the documentation 69 | website. 70 | 71 | Himalaya backends 72 | ================= 73 | 74 | ``Himalaya`` can be used seamlessly with different backends. 75 | The available backends are ``numpy`` (default), ``cupy``, ``torch``, and 76 | ``torch_cuda``. 77 | To change the backend, call: 78 | 79 | .. code-block:: python 80 | 81 | from himalaya.backend import set_backend 82 | backend = set_backend("torch") 83 | 84 | 85 | and give ``torch`` arrays inputs to the ``himalaya`` solvers. For convenience, 86 | estimators implementing ``scikit-learn``'s API can cast arrays to the correct 87 | input type. 88 | 89 | GPU acceleration 90 | ---------------- 91 | 92 | To run ``himalaya`` on a graphics processing unit (GPU), you can use either 93 | the ``cupy`` or the ``torch_cuda`` backend: 94 | 95 | .. code-block:: python 96 | 97 | from himalaya.backend import set_backend 98 | backend = set_backend("cupy") # or "torch_cuda" 99 | 100 | data = backend.asarray(data) 101 | 102 | 103 | Installation 104 | ============ 105 | 106 | Dependencies 107 | ------------ 108 | 109 | - Python 3 110 | - Numpy 111 | - Scikit-learn 112 | 113 | Optional (GPU backends): 114 | 115 | - PyTorch (1.9+ preferred) 116 | - Cupy 117 | 118 | 119 | Standard installation 120 | --------------------- 121 | You may install the latest version of ``himalaya`` using the package manager 122 | ``pip``, which will automatically download ``himalaya`` from the Python Package 123 | Index (PyPI): 124 | 125 | .. code-block:: bash 126 | 127 | pip install himalaya 128 | 129 | 130 | Installation from source 131 | ------------------------ 132 | 133 | To install ``himalaya`` from the latest source (``main`` branch), you may 134 | call: 135 | 136 | .. code-block:: bash 137 | 138 | pip install git+https://github.com/gallantlab/himalaya.git 139 | 140 | 141 | Developers can also install ``himalaya`` in editable mode via: 142 | 143 | .. code-block:: bash 144 | 145 | git clone https://github.com/gallantlab/himalaya 146 | cd himalaya 147 | pip install --editable . 148 | 149 | 150 | .. |Github| image:: https://img.shields.io/badge/github-himalaya-blue 151 | :target: https://github.com/gallantlab/himalaya 152 | 153 | .. |Python| image:: https://img.shields.io/badge/python-3.7%2B-blue 154 | :target: https://www.python.org/downloads/release/python-370 155 | 156 | .. |License| image:: https://img.shields.io/badge/License-BSD%203--Clause-blue.svg 157 | :target: https://opensource.org/licenses/BSD-3-Clause 158 | 159 | .. |Build| image:: https://github.com/gallantlab/himalaya/actions/workflows/run_tests.yml/badge.svg 160 | :target: https://github.com/gallantlab/himalaya/actions/workflows/run_tests.yml 161 | 162 | .. |Codecov| image:: https://codecov.io/gh/gallantlab/himalaya/branch/main/graph/badge.svg?token=ECzjd9gvrw 163 | :target: https://codecov.io/gh/gallantlab/himalaya 164 | 165 | .. |Downloads| image:: https://pepy.tech/badge/himalaya 166 | :target: https://pepy.tech/project/himalaya 167 | 168 | 169 | Cite this package 170 | ================= 171 | 172 | If you use ``himalaya`` in your work, please give it a star, and cite our 173 | publication: 174 | 175 | .. [1] Dupré La Tour, T., Eickenberg, M., Nunez-Elizalde, A.O., & Gallant, J. L. (2022). 176 | Feature-space selection with banded ridge regression. `NeuroImage `_. 177 | 178 | -------------------------------------------------------------------------------- /himalaya/progress_bar.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | 4 | 5 | def bar(iterable, title='', use_it=True): 6 | """Simple API for progress_bar. 7 | 8 | Parameters 9 | ---------- 10 | iterable : iterable 11 | Iterable that will make the progress bar progress. 12 | title : str 13 | Message to include at end of progress bar. 14 | use_it : bool 15 | If False, return the iterable unchanged, and does not show a progress 16 | bar. 17 | 18 | Examples 19 | -------- 20 | >>> import time 21 | >>> from himalaya.progress_bar import bar 22 | >>> for ii in bar(range(10)): 23 | >>> time.sleep(0.5) 24 | """ 25 | if use_it: 26 | return ProgressBar(title=title, max_value=len(iterable))(iterable) 27 | else: 28 | return iterable 29 | 30 | 31 | class ProgressBar(): 32 | """Generate a command-line progress bar. 33 | 34 | Parameters 35 | ---------- 36 | max_value : int 37 | Maximum value of process (e.g. number of samples to process, bytes to 38 | download, etc.). 39 | initial_value : int 40 | Initial value of process, useful when resuming process from a specific 41 | value, defaults to 0. 42 | title : str 43 | Message to include at end of progress bar. 44 | max_chars : int 45 | Number of characters to use for progress bar (be sure to save some room 46 | for the message and % complete as well). 47 | progress_character : char 48 | Character in the progress bar that indicates the portion completed. 49 | spinner : bool 50 | Show a spinner. Useful for long-running processes that may not 51 | increment the progress bar very often. This provides the user with 52 | feedback that the progress has not stalled. 53 | 54 | Examples 55 | -------- 56 | >>> import time 57 | >>> from himalaya.progress_bar import ProgressBar 58 | >>> for ii in ProgressBar(title="La barre", max_value=10)(range(10)): 59 | >>> time.sleep(0.5) 60 | """ 61 | 62 | spinner_symbols = ['|', '/', '-', '\\'] 63 | template = '\r[{0}{1}] {2:0.0f}% {3} {4:.02f} sec | {5} | ' 64 | 65 | def __init__(self, title='', max_value=None, initial_value=0, max_chars=40, 66 | progress_character='.', spinner=False, verbose_bool=True): 67 | self.cur_value = initial_value 68 | self.max_value = max_value 69 | self.title = title 70 | self.max_chars = max_chars 71 | self.progress_character = progress_character 72 | self.spinner = spinner 73 | self.spinner_index = 0 74 | self.n_spinner = len(self.spinner_symbols) 75 | self._do_print = verbose_bool 76 | self.start = time.time() 77 | 78 | self.closed = False 79 | self.update(initial_value) 80 | 81 | def update(self, cur_value, title=None): 82 | """Update progressbar with current value of process. 83 | 84 | Parameters 85 | ---------- 86 | cur_value : number 87 | Current value of process. Should be <= max_value (but this is not 88 | enforced). The percent of the progressbar will be computed as 89 | (cur_value / max_value) * 100 90 | title : str 91 | Message to display to the right of the progressbar. If None, the 92 | last message provided will be used. To clear the current message, 93 | pass a null string, ''. 94 | """ 95 | # Ensure floating-point division so we can get fractions of a percent 96 | # for the progressbar. 97 | self.cur_value = cur_value 98 | max_value = self.max_value or 1 99 | progress = min(float(self.cur_value) / max_value, 1.) 100 | num_chars = int(progress * self.max_chars) 101 | num_left = self.max_chars - num_chars 102 | 103 | # Update the message 104 | if title is not None: 105 | self.title = title 106 | 107 | # time from start 108 | duration = time.time() - self.start 109 | 110 | # The \r tells the cursor to return to the beginning of the line rather 111 | # than starting a new line. This allows us to have a progressbar-style 112 | # display in the console window. 113 | bar = self.template.format(self.progress_character * num_chars, 114 | ' ' * num_left, progress * 100, 115 | self.spinner_symbols[self.spinner_index], 116 | duration, self.title) 117 | # Force a flush because sometimes when using bash scripts and pipes, 118 | # the output is not printed until after the program exits. 119 | if self._do_print: 120 | sys.stdout.write(bar) 121 | sys.stdout.flush() 122 | # Increment the spinner 123 | if self.spinner: 124 | self.spinner_index = (self.spinner_index + 1) % self.n_spinner 125 | 126 | if progress == 1: 127 | self.close() 128 | 129 | def update_with_increment_value(self, increment_value, title=None): 130 | """Update progressbar with the value of the increment instead of the 131 | current value of process as in update(). 132 | 133 | Parameters 134 | ---------- 135 | increment_value : int 136 | Value of the increment of process. The percent of the progressbar 137 | will be computed as 138 | (self.cur_value + increment_value / max_value) * 100 139 | title : str 140 | Message to display to the right of the progressbar. If None, the 141 | last message provided will be used. To clear the current message, 142 | pass a null string, ''. 143 | """ 144 | self.cur_value += increment_value 145 | self.update(self.cur_value, title) 146 | 147 | def close(self): 148 | """Close the progress bar.""" 149 | if not self.closed: 150 | sys.stdout.write('\n') 151 | sys.stdout.flush() 152 | self.closed = True 153 | 154 | def __call__(self, sequence): 155 | sequence = iter(sequence) 156 | while True: 157 | try: 158 | yield next(sequence) 159 | self.update_with_increment_value(1) 160 | except StopIteration: 161 | return 162 | -------------------------------------------------------------------------------- /himalaya/lasso/tests/test_group_lasso.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import sklearn.linear_model 4 | 5 | from himalaya.backend import set_backend 6 | from himalaya.backend import ALL_BACKENDS 7 | from himalaya.utils import assert_array_almost_equal 8 | 9 | from himalaya.lasso import solve_sparse_group_lasso 10 | from himalaya.lasso import solve_sparse_group_lasso_cv 11 | 12 | 13 | def _create_dataset(backend): 14 | n_samples, n_features, n_targets = 10, 5, 3 15 | 16 | X = backend.asarray(backend.randn(n_samples, n_features), backend.float64) 17 | Y = backend.asarray(backend.randn(n_samples, n_targets), backend.float64) 18 | 19 | return X, Y 20 | 21 | 22 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 23 | def test_group_lasso_vs_ols(backend): 24 | backend = set_backend(backend) 25 | X, Y = _create_dataset(backend) 26 | 27 | coef = solve_sparse_group_lasso(X, Y, groups=None, l21_reg=0.0, l1_reg=0.0, 28 | max_iter=2000, tol=1e-8, 29 | progress_bar=False) 30 | 31 | ols = sklearn.linear_model.LinearRegression(fit_intercept=False).fit( 32 | backend.to_numpy(X), backend.to_numpy(Y)) 33 | assert_array_almost_equal(coef, ols.coef_.T, decimal=4) 34 | 35 | 36 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 37 | def test_group_lasso_decreasing(backend): 38 | backend = set_backend(backend) 39 | X, Y = _create_dataset(backend) 40 | 41 | coef, losses = solve_sparse_group_lasso(X, Y, max_iter=500, tol=1e-8, 42 | progress_bar=False, debug=True, 43 | momentum=False) 44 | 45 | assert backend.all(losses[1:] - losses[:-1] < 1e-14) 46 | 47 | 48 | @pytest.mark.parametrize('n_targets_batch', [None, 2]) 49 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 50 | def test_group_lasso_vs_lasso(backend, n_targets_batch): 51 | backend = set_backend(backend) 52 | X, Y = _create_dataset(backend) 53 | 54 | for l1_reg in backend.logspace(-5, 5, 5): 55 | 56 | coef = solve_sparse_group_lasso(X, Y, groups=None, l21_reg=0.0, 57 | l1_reg=l1_reg, max_iter=1000, tol=1e-8, 58 | progress_bar=False, debug=False, 59 | momentum=False, 60 | n_targets_batch=n_targets_batch) 61 | 62 | ols = sklearn.linear_model.Lasso(fit_intercept=False, 63 | alpha=float(l1_reg), 64 | max_iter=1000, 65 | tol=1e-8).fit(backend.to_numpy(X), 66 | backend.to_numpy(Y)) 67 | assert_array_almost_equal(coef, ols.coef_.T, decimal=5) 68 | 69 | 70 | @pytest.mark.parametrize('n_targets_batch', [None, 2]) 71 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 72 | def test_group_lasso_regularization_per_target(backend, n_targets_batch): 73 | backend = set_backend(backend) 74 | X, Y = _create_dataset(backend) 75 | 76 | n_targets = Y.shape[1] 77 | l21_reg = backend.rand(n_targets) 78 | l1_reg = backend.rand(n_targets) 79 | 80 | coef = solve_sparse_group_lasso(X, Y, groups=None, l21_reg=l21_reg, 81 | l1_reg=l1_reg, max_iter=1000, tol=1e-8, 82 | progress_bar=False, debug=False, 83 | momentum=False, 84 | n_targets_batch=n_targets_batch) 85 | 86 | for tt in range(n_targets): 87 | 88 | coef_tt = solve_sparse_group_lasso(X, Y[:, tt:tt + 1], groups=None, 89 | l21_reg=l21_reg[tt], 90 | l1_reg=l1_reg[tt], max_iter=1000, 91 | tol=1e-8, progress_bar=False, 92 | debug=False, momentum=False, 93 | n_targets_batch=n_targets_batch) 94 | assert_array_almost_equal(coef[:, tt:tt + 1], coef_tt) 95 | 96 | 97 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 98 | def test_group_lasso_cv(backend): 99 | backend = set_backend(backend) 100 | X, Y = _create_dataset(backend) 101 | 102 | n_targets = Y.shape[1] 103 | l21_regs = backend.rand(2) / 10 104 | l1_regs = backend.rand(3) / 10 105 | 106 | coef, best_l21_reg, best_l1_reg, all_cv_scores = \ 107 | solve_sparse_group_lasso_cv( 108 | X, Y, cv=2, groups=None, l21_regs=l21_regs, l1_regs=l1_regs, 109 | progress_bar=False, tol=1e-2, max_iter=100) 110 | 111 | assert best_l1_reg.shape == (n_targets, ) 112 | assert best_l21_reg.shape == (n_targets, ) 113 | assert coef.shape == (X.shape[1], n_targets) 114 | 115 | 116 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 117 | def test_group_lasso_group_sparsity(backend): 118 | backend = set_backend(backend) 119 | 120 | # Set dataset parameters 121 | import numpy as np 122 | group_sizes = [np.random.randint(10, 20) for i in range(5)] 123 | active_groups = [np.random.randint(2) for _ in group_sizes] 124 | active_groups[1] = 1 # make sure we have at least one active... 125 | active_groups[0] = 0 # ...and one inactive group 126 | groups = np.concatenate([size * [i] for i, size in enumerate(group_sizes)]) 127 | n_features = sum(group_sizes) 128 | n_samples = 10000 129 | noise_std = 10 130 | 131 | # Generate data matrix 132 | X = backend.randn(n_samples, n_features) 133 | w = backend.concatenate([ 134 | backend.randn(group_size) * is_active 135 | for group_size, is_active in zip(group_sizes, active_groups) 136 | ]).reshape(-1, 1) 137 | y = X @ w 138 | y = y + backend.randn(*y.shape) * noise_std 139 | 140 | # Generate estimator and train it 141 | coef = solve_sparse_group_lasso(X=X, Y=y, groups=groups, l21_reg=0.6, 142 | l1_reg=0, max_iter=100, tol=1e-4, 143 | progress_bar=False) 144 | 145 | # check the group sparsity of the result 146 | for group, active in enumerate(active_groups): 147 | if active: 148 | assert backend.all(coef[groups == group] != 0) 149 | else: 150 | assert backend.all(coef[groups == group] == 0) 151 | -------------------------------------------------------------------------------- /himalaya/backend/numpy.py: -------------------------------------------------------------------------------- 1 | """The "numpy" CPU backend, based on NumPy. 2 | 3 | To use this backend, call ``himalaya.backend.set_backend("numpy")``. 4 | """ 5 | import numpy as np 6 | try: 7 | import scipy.linalg as linalg 8 | use_scipy = True 9 | except ImportError: 10 | import numpy.linalg as linalg 11 | use_scipy = False 12 | 13 | ############################################################################### 14 | 15 | 16 | def apply_argmax(array, argmax, axis): 17 | """Apply precomputed argmax indices in multi dimension arrays 18 | 19 | array[np.argmax(array)] works fine in dimension 1, but not in higher ones. 20 | This function extends it to higher dimensions. 21 | 22 | Examples 23 | -------- 24 | >>> import numpy as np 25 | >>> array = np.random.randn(10, 4, 8) 26 | >>> argmax = np.argmax(array, axis=1) 27 | >>> max_ = apply_argmax(array, argmax, axis=1) 28 | >>> assert np.all(max_ == np.max(array, axis=1)) 29 | """ 30 | argmax = np.expand_dims(argmax, axis=axis) 31 | max_ = np.take_along_axis(array, argmax, axis=axis) 32 | return np.take(max_, 0, axis=axis) 33 | 34 | 35 | def std_float64(array, axis=None, demean=True, keepdims=False): 36 | """Compute the standard deviation of X with double precision, 37 | and cast back the result to original dtype. 38 | """ 39 | return array.std(axis, dtype=np.float64, 40 | keepdims=keepdims).astype(array.dtype, copy=False) 41 | 42 | 43 | def mean_float64(array, axis=None, keepdims=False): 44 | """Compute the mean of X with double precision, 45 | and cast back the result to original dtype. 46 | """ 47 | return array.mean(axis, dtype=np.float64, 48 | keepdims=keepdims).astype(array.dtype, copy=False) 49 | 50 | 51 | ############################################################################### 52 | 53 | name = "numpy" 54 | argmax = np.argmax 55 | max = np.max 56 | min = np.min 57 | abs = np.abs 58 | randn = np.random.randn 59 | rand = np.random.rand 60 | matmul = np.matmul 61 | transpose = np.transpose 62 | stack = np.stack 63 | concatenate = np.concatenate 64 | sum = np.sum 65 | sqrt = np.sqrt 66 | any = np.any 67 | all = np.all 68 | nan = np.nan 69 | inf = np.inf 70 | isnan = np.isnan 71 | isinf = np.isinf 72 | logspace = np.logspace 73 | copy = np.copy 74 | bool = np.bool_ 75 | float32 = np.float32 76 | float64 = np.float64 77 | int32 = np.int32 78 | eigh = linalg.eigh 79 | norm = linalg.norm 80 | log = np.log 81 | exp = np.exp 82 | arange = np.arange 83 | flatnonzero = np.flatnonzero 84 | isin = np.isin 85 | searchsorted = np.searchsorted 86 | unique = np.unique 87 | einsum = np.einsum 88 | tanh = np.tanh 89 | power = np.power 90 | prod = np.prod 91 | zeros = np.zeros 92 | clip = np.clip 93 | sign = np.sign 94 | sort = np.sort 95 | flip = np.flip 96 | atleast_1d = np.atleast_1d 97 | finfo = np.finfo 98 | eye = np.eye 99 | 100 | 101 | def diagonal_view(array, axis1=0, axis2=1): 102 | """Return a view of the array diagonal""" 103 | assert array.ndim >= 2 104 | axis1, axis2 = min([axis1, axis2]), max([axis1, axis2]) 105 | shape = list(array.shape) 106 | new = min([shape[axis1], shape[axis2]]) 107 | shape.pop(axis1) 108 | shape.pop(axis2 - 1) 109 | shape.append(new) 110 | strides = list(array.strides) 111 | new = strides[axis1] + strides[axis2] 112 | strides.pop(axis1) 113 | strides.pop(axis2 - 1) 114 | strides.append(new) 115 | diag = np.lib.stride_tricks.as_strided(array, shape=shape, strides=strides) 116 | return diag 117 | 118 | 119 | def to_numpy(array): 120 | return array 121 | 122 | 123 | def zeros_like(array, shape=None, dtype=None, device=None): 124 | """Add a shape parameter in zeros_like.""" 125 | if shape is None: 126 | shape = array.shape 127 | if dtype is None: 128 | dtype = array.dtype 129 | return np.zeros(shape, dtype=dtype) 130 | 131 | 132 | def ones_like(array, shape=None, dtype=None, device=None): 133 | """Add a shape parameter in ones_like.""" 134 | if shape is None: 135 | shape = array.shape 136 | if dtype is None: 137 | dtype = array.dtype 138 | return np.ones(shape, dtype=dtype) 139 | 140 | 141 | def full_like(array, fill_value, shape=None, dtype=None, device=None): 142 | """Add a shape parameter in full_like.""" 143 | if shape is None: 144 | shape = array.shape 145 | if dtype is None: 146 | dtype = array.dtype 147 | return np.full(shape, fill_value, dtype=dtype) 148 | 149 | 150 | def to_cpu(array): 151 | return array 152 | 153 | 154 | def to_gpu(array, device=None): 155 | return array 156 | 157 | 158 | def is_in_gpu(array): 159 | return False 160 | 161 | 162 | def asarray_like(x, ref): 163 | return np.asarray(x, dtype=ref.dtype) 164 | 165 | 166 | def check_arrays(*all_inputs): 167 | """Change all inputs into arrays (or list of arrays) using the same 168 | precision as the first one. Some arrays can be None. 169 | """ 170 | all_arrays = [] 171 | all_arrays.append(asarray(all_inputs[0])) 172 | dtype = all_arrays[0].dtype 173 | for tensor in all_inputs[1:]: 174 | if tensor is None: 175 | pass 176 | elif isinstance(tensor, list): 177 | tensor = [asarray(tt, dtype=dtype) for tt in tensor] 178 | else: 179 | tensor = asarray(tensor, dtype=dtype) 180 | all_arrays.append(tensor) 181 | return all_arrays 182 | 183 | 184 | def asarray(a, dtype=None, order=None, device=None): 185 | # works from numpy, lists, torch, and others 186 | try: 187 | return np.asarray(a, dtype=dtype, order=order) 188 | except Exception: 189 | pass 190 | # works from cupy 191 | try: 192 | import cupy 193 | return np.asarray(cupy.asnumpy(a), dtype=dtype, order=order) 194 | except Exception: 195 | pass 196 | # works from torch_cuda 197 | try: 198 | return np.asarray(a.cpu(), dtype=dtype, order=order) 199 | except Exception: 200 | pass 201 | 202 | return np.asarray(a, dtype=dtype, order=order) 203 | 204 | 205 | def svd(X, full_matrices=True): 206 | if X.ndim == 2 or not use_scipy: 207 | return linalg.svd(X, full_matrices=full_matrices) 208 | 209 | elif X.ndim == 3: 210 | UsV_list = [linalg.svd(Xi, full_matrices=full_matrices) for Xi in X] 211 | return map(np.stack, zip(*UsV_list)) 212 | else: 213 | raise NotImplementedError() 214 | -------------------------------------------------------------------------------- /himalaya/tests/test_validation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sklearn 3 | import pytest 4 | 5 | from himalaya.backend import set_backend 6 | from himalaya.backend import ALL_BACKENDS 7 | from himalaya.validation import _assert_all_finite 8 | from himalaya.validation import check_cv 9 | from himalaya.validation import validate_data 10 | 11 | 12 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 13 | def test_suppress_validation(backend): 14 | backend = set_backend(backend) 15 | X = backend.asarray([0, np.inf]) 16 | with pytest.raises(ValueError): 17 | _assert_all_finite(X, True) 18 | sklearn.set_config(assume_finite=True) 19 | _assert_all_finite(X, True) 20 | sklearn.set_config(assume_finite=False) 21 | with pytest.raises(ValueError): 22 | _assert_all_finite(X, True) 23 | 24 | 25 | def test_check_cv(): 26 | cv = [([0, 1], [2]), ([0, 2], [1]), ([1, 2], [0])] 27 | 28 | # works because cv does not exceed y.shape[0] 29 | y = np.zeros(4) 30 | check_cv(cv, y) 31 | # fails because cv does exceed y.shape[0] 32 | with pytest.raises(ValueError, match="exceed number of samples"): 33 | y = np.zeros(2) 34 | check_cv(cv, y) 35 | 36 | 37 | class DummyEstimator: 38 | """Dummy estimator for testing validate_data""" 39 | def __init__(self): 40 | pass 41 | 42 | 43 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 44 | def test_validate_data_X_only(backend): 45 | backend = set_backend(backend) 46 | X = backend.asarray([[1, 2], [3, 4]]) 47 | estimator = DummyEstimator() 48 | 49 | # Test reset=True (fit behavior) 50 | X_val = validate_data(estimator, X, reset=True, ndim=2) 51 | assert hasattr(estimator, 'n_features_in_') 52 | assert estimator.n_features_in_ == 2 53 | assert X_val.shape == (2, 2) 54 | 55 | # Test reset=False (predict behavior) - should work 56 | X_val2 = validate_data(estimator, X, reset=False, ndim=2) 57 | assert X_val2.shape == (2, 2) 58 | 59 | # Test reset=False with wrong number of features - should fail 60 | X_wrong = backend.asarray([[1, 2, 3], [4, 5, 6]]) 61 | with pytest.raises(ValueError, match="X has 3 features.*expecting 2 features"): 62 | validate_data(estimator, X_wrong, reset=False, ndim=2) 63 | 64 | 65 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 66 | def test_validate_data_X_and_y(backend): 67 | backend = set_backend(backend) 68 | X = backend.asarray([[1, 2], [3, 4]]) 69 | y = backend.asarray([1, 0]) 70 | estimator = DummyEstimator() 71 | 72 | # Test with both X and y - X gets ndim=2, y gets default [1,2] 73 | X_val, y_val = validate_data(estimator, X, y, reset=True, ndim=2) 74 | assert hasattr(estimator, 'n_features_in_') 75 | assert estimator.n_features_in_ == 2 76 | assert X_val.shape == (2, 2) 77 | assert y_val.shape == (2,) 78 | 79 | 80 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 81 | def test_validate_data_no_validation(backend): 82 | backend = set_backend(backend) 83 | estimator = DummyEstimator() 84 | 85 | # Test X='no_validation' only 86 | result = validate_data(estimator, X='no_validation', reset=True) 87 | assert result == 'no_validation' 88 | 89 | # Test y='no_validation' 90 | X = backend.asarray([[1, 2], [3, 4]]) 91 | X_val = validate_data(estimator, X, y='no_validation', reset=True, ndim=2) 92 | assert X_val.shape == (2, 2) 93 | assert hasattr(estimator, 'n_features_in_') 94 | 95 | # Test both 'no_validation' 96 | result = validate_data(estimator, X='no_validation', y='no_validation', reset=True) 97 | assert result == 'no_validation' 98 | 99 | 100 | def test_validate_data_error_without_n_features_in(): 101 | # Test that predict without prior fit doesn't crash 102 | estimator = DummyEstimator() 103 | X = np.array([[1, 2], [3, 4]]) 104 | 105 | # Should work fine if estimator doesn't have n_features_in_ yet 106 | X_val = validate_data(estimator, X, reset=False, ndim=2) 107 | assert X_val.shape == (2, 2) 108 | 109 | 110 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 111 | def test_validate_data_3d_feature_axis(backend): 112 | """Test feature axis handling for 3D arrays (precomputed kernels). 113 | 114 | This test ensures that validate_data correctly handles the feature dimension 115 | for 3D precomputed kernel arrays where: 116 | - During fit: shape (n_kernels, n_samples_train, n_samples_train) 117 | - During predict: shape (n_kernels, n_samples_test, n_samples_train) 118 | 119 | The "feature" dimension (last axis) should be consistent between fit and predict. 120 | """ 121 | backend = set_backend(backend) 122 | estimator = DummyEstimator() 123 | 124 | # Simulate fit with 3D precomputed kernels: (n_kernels=2, n_train=600, n_train=600) 125 | X_fit = backend.asarray(np.random.randn(2, 600, 600)) 126 | X_val_fit = validate_data(estimator, X_fit, reset=True, ndim=3) 127 | 128 | # Should store n_features_in_ as last axis (600) 129 | assert hasattr(estimator, 'n_features_in_') 130 | assert estimator.n_features_in_ == 600 131 | assert X_val_fit.shape == (2, 600, 600) 132 | 133 | # Simulate predict with 3D kernels: (n_kernels=2, n_test=300, n_train=600) 134 | # The middle axis changes (test samples) but last axis stays same (training samples) 135 | X_predict = backend.asarray(np.random.randn(2, 300, 600)) 136 | X_val_predict = validate_data(estimator, X_predict, reset=False, ndim=3) 137 | 138 | # Should validate successfully - last axis (600) matches stored n_features_in_ 139 | assert X_val_predict.shape == (2, 300, 600) 140 | 141 | # Test failure case: wrong last dimension 142 | X_wrong = backend.asarray(np.random.randn(2, 300, 500)) # wrong last dim 143 | with pytest.raises(ValueError, match="X has 500 features.*expecting 600 features"): 144 | validate_data(estimator, X_wrong, reset=False, ndim=3) 145 | 146 | # Test explicit feature_axis parameter 147 | estimator2 = DummyEstimator() 148 | 149 | # Using feature_axis=1 (middle axis) for 3D - this was the old buggy behavior 150 | validate_data(estimator2, X_fit, reset=True, ndim=3, feature_axis=1) 151 | assert estimator2.n_features_in_ == 600 # middle axis 152 | 153 | # This should fail with the test data because middle axis is different (300 vs 600) 154 | with pytest.raises(ValueError, match="X has 300 features.*expecting 600 features"): 155 | validate_data(estimator2, X_predict, reset=False, ndim=3, feature_axis=1) 156 | -------------------------------------------------------------------------------- /himalaya/backend/cupy.py: -------------------------------------------------------------------------------- 1 | """The "cupy" GPU backend, based on CuPy. 2 | 3 | To use this backend, call ``himalaya.backend.set_backend("cupy")``. 4 | """ 5 | try: 6 | import cupy 7 | except ImportError as error: 8 | import sys 9 | if "pytest" in sys.modules: # if run through pytest 10 | import pytest 11 | pytest.skip("Cupy not installed.") 12 | raise ImportError("Cupy not installed.") from error 13 | 14 | from ._utils import warn_if_not_float32 15 | 16 | ############################################################################### 17 | 18 | 19 | def apply_argmax(array, argmax, axis): 20 | """Apply precomputed argmax indices in multi dimension arrays 21 | 22 | array[np.argmax(array)] works fine in dimension 1, but not in higher ones. 23 | This function extends it to higher dimensions. 24 | 25 | Examples 26 | -------- 27 | >>> import cupy 28 | >>> array = cupy.random.randn(10, 4, 8) 29 | >>> argmax = cupy.argmax(array, axis=1) 30 | >>> max_ = apply_argmax(array, argmax, axis=1) 31 | >>> assert cupy.all(max_ == cupy.max(array, axis=1)) 32 | """ 33 | argmax = cupy.expand_dims(argmax, axis=axis) 34 | max_ = cupy.take_along_axis(array, argmax, axis=axis) 35 | return cupy.take(max_, 0, axis=axis) 36 | 37 | 38 | def std_float64(array, axis=None, demean=True, keepdims=False): 39 | """Compute the standard deviation of X with double precision, 40 | and cast back the result to original dtype. 41 | """ 42 | return array.std(axis, dtype=cupy.float64, 43 | keepdims=keepdims).astype(array.dtype, copy=False) 44 | 45 | 46 | def mean_float64(array, axis=None, keepdims=False): 47 | """Compute the mean of X with double precision, 48 | and cast back the result to original dtype. 49 | """ 50 | return array.mean(axis, dtype=cupy.float64, 51 | keepdims=keepdims).astype(array.dtype, copy=False) 52 | 53 | 54 | ############################################################################### 55 | 56 | name = "cupy" 57 | argmax = cupy.argmax 58 | max = cupy.max 59 | min = cupy.min 60 | abs = cupy.abs 61 | randn = cupy.random.randn 62 | rand = cupy.random.rand 63 | matmul = cupy.matmul 64 | transpose = cupy.transpose 65 | stack = cupy.stack 66 | concatenate = cupy.concatenate 67 | sum = cupy.sum 68 | sqrt = cupy.sqrt 69 | any = cupy.any 70 | all = cupy.all 71 | nan = cupy.nan 72 | inf = cupy.inf 73 | isnan = cupy.isnan 74 | isinf = cupy.isinf 75 | logspace = cupy.logspace 76 | copy = cupy.copy 77 | bool = cupy.bool_ 78 | float32 = cupy.float32 79 | float64 = cupy.float64 80 | int32 = cupy.int32 81 | eigh = cupy.linalg.eigh 82 | norm = cupy.linalg.norm 83 | log = cupy.log 84 | exp = cupy.exp 85 | arange = cupy.arange 86 | flatnonzero = cupy.flatnonzero 87 | unique = cupy.unique 88 | einsum = cupy.einsum 89 | tanh = cupy.tanh 90 | power = cupy.power 91 | prod = cupy.prod 92 | zeros = cupy.zeros 93 | sign = cupy.sign 94 | clip = cupy.clip 95 | sort = cupy.sort 96 | flip = cupy.flip 97 | atleast_1d = cupy.atleast_1d 98 | finfo = cupy.finfo 99 | eye = cupy.eye 100 | 101 | 102 | def diagonal_view(array, axis1=0, axis2=1): 103 | """Return a view of the array diagonal.""" 104 | return cupy.diagonal(array, 0, axis1=axis1, axis2=axis2) 105 | 106 | 107 | def to_numpy(array): 108 | return cupy.asnumpy(array) 109 | 110 | 111 | def isin(x, y): 112 | import numpy as np # XXX 113 | np_result = np.isin(to_numpy(x), to_numpy(y)) 114 | return asarray(np_result, dtype=bool) 115 | 116 | 117 | def searchsorted(x, y): 118 | import numpy as np # XXX 119 | np_result = np.searchsorted(to_numpy(x), to_numpy(y)) 120 | return asarray(np_result, dtype=cupy.int64) 121 | 122 | 123 | def zeros_like(array, shape=None, dtype=None, device=None): 124 | """Add a shape parameter in zeros_like.""" 125 | xp = cupy.get_array_module(array) 126 | if shape is None: 127 | shape = array.shape 128 | if dtype is None: 129 | dtype = array.dtype 130 | if device == "cpu": 131 | import numpy as xp 132 | return xp.zeros(shape, dtype=dtype) 133 | 134 | 135 | def ones_like(array, shape=None, dtype=None, device=None): 136 | """Add a shape parameter in ones_like.""" 137 | xp = cupy.get_array_module(array) 138 | if shape is None: 139 | shape = array.shape 140 | if dtype is None: 141 | dtype = array.dtype 142 | if device == "cpu": 143 | import numpy as xp 144 | return xp.ones(shape, dtype=dtype) 145 | 146 | 147 | def full_like(array, fill_value, shape=None, dtype=None, device=None): 148 | """Add a shape parameter in full_like.""" 149 | xp = cupy.get_array_module(array) 150 | if shape is None: 151 | shape = array.shape 152 | if dtype is None: 153 | dtype = array.dtype 154 | if device == "cpu": 155 | import numpy as xp 156 | return xp.full(shape, fill_value, dtype=dtype) 157 | 158 | 159 | def to_cpu(array): 160 | return cupy.asnumpy(array) 161 | 162 | 163 | def to_gpu(array, device=None): 164 | return cupy.asarray(array) 165 | 166 | 167 | def is_in_gpu(array): 168 | return getattr(array, "device", None) is not None 169 | 170 | 171 | def asarray(a, dtype=None, order=None, device=None): 172 | if device == "cpu": 173 | import numpy as np 174 | return np.asarray(cupy.asnumpy(a), dtype, order) 175 | else: 176 | return cupy.asarray(a, dtype, order) 177 | 178 | 179 | def asarray_like(x, ref): 180 | xp = cupy.get_array_module(ref) 181 | return xp.asarray(x, dtype=ref.dtype) 182 | 183 | 184 | def check_arrays(*all_inputs): 185 | """Change all inputs into arrays (or list of arrays) using the same 186 | precision as the first one. Some arrays can be None. 187 | """ 188 | all_arrays = [] 189 | all_arrays.append(asarray(all_inputs[0])) 190 | dtype = all_arrays[0].dtype 191 | warn_if_not_float32(dtype) 192 | for tensor in all_inputs[1:]: 193 | if tensor is None: 194 | pass 195 | elif isinstance(tensor, list): 196 | tensor = [asarray(tt, dtype=dtype) for tt in tensor] 197 | else: 198 | tensor = asarray(tensor, dtype=dtype) 199 | all_arrays.append(tensor) 200 | return all_arrays 201 | 202 | 203 | def svd(X, full_matrices=True): 204 | if X.ndim == 2: 205 | return cupy.linalg.svd(X, full_matrices=full_matrices) 206 | elif X.ndim == 3: 207 | UsV_list = [ 208 | cupy.linalg.svd(Xi, full_matrices=full_matrices) for Xi in X 209 | ] 210 | return map(cupy.stack, zip(*UsV_list)) 211 | else: 212 | raise NotImplementedError() 213 | -------------------------------------------------------------------------------- /doc/static/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 2020-12-09T18:55:20.947118 11 | image/svg+xml 12 | 13 | 14 | Matplotlib v3.3.3, https://matplotlib.org/ 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 31 | 32 | 33 | 34 | 39 | 40 | 41 | 46 | 47 | 48 | 53 | 54 | 55 | 60 | 61 | 62 | 67 | 68 | 69 | 74 | 75 | 76 | 77 | 78 | 79 | 98 | 109 | 139 | 171 | 177 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | -------------------------------------------------------------------------------- /himalaya/utils.py: -------------------------------------------------------------------------------- 1 | import numbers 2 | 3 | import numpy as np 4 | 5 | from .backend import get_backend 6 | from .validation import check_random_state 7 | 8 | 9 | def compute_lipschitz_constants(Xs, kernelize="XTX", random_state=None): 10 | """Compute Lipschitz constants of gradients of linear regression problems. 11 | 12 | Find the largest eigenvalue of X^TX for several X, using power iteration. 13 | 14 | Parameters 15 | ---------- 16 | Xs : array of shape (n_kernels, n_samples, n_features) or \ 17 | (n_kernels, n_samples, n_samples) 18 | Multiple linear features or kernels. 19 | kernelize : str in {"XTX", "XXT", "X"} 20 | Whether to consider X^TX, XX^T, or directly X. 21 | random_state : int, or None 22 | Random generator seed. Use an int for deterministic search. 23 | 24 | Returns 25 | ------- 26 | lipschitz : array of shape (n_kernels) 27 | Lipschitz constants. 28 | """ 29 | backend = get_backend() 30 | 31 | if kernelize == "XXT": 32 | XTs = backend.transpose(Xs, (0, 2, 1)) 33 | kernels = backend.matmul(Xs, XTs) 34 | del XTs 35 | elif kernelize == "XTX": 36 | XTs = backend.transpose(Xs, (0, 2, 1)) 37 | kernels = backend.matmul(XTs, Xs) 38 | del XTs 39 | elif kernelize == "X": 40 | kernels = Xs 41 | else: 42 | raise ValueError("Unknown parameter kernelize=%r" % (kernelize, )) 43 | 44 | # check the random state 45 | random_generator = check_random_state(random_state) 46 | ys = random_generator.randn(*(kernels.shape[:2] + (1, ))) 47 | 48 | ys = backend.asarray_like(ys, Xs) 49 | for i in range(10): 50 | ys /= backend.norm(ys, axis=1, keepdims=True) + 1e-16 51 | ys = backend.matmul(kernels, ys) 52 | evs = backend.norm(ys, axis=1)[:, 0] 53 | return evs 54 | 55 | 56 | def assert_array_almost_equal(x, y, decimal=6, err_msg='', verbose=True): 57 | """Test array equality, casting all arrays to numpy.""" 58 | backend = get_backend() 59 | x = backend.to_numpy(x) 60 | y = backend.to_numpy(y) 61 | return np.testing.assert_array_almost_equal(x, y, decimal=decimal, 62 | err_msg=err_msg, 63 | verbose=verbose) 64 | 65 | 66 | def generate_multikernel_dataset(n_kernels=4, n_targets=500, 67 | n_samples_train=1000, n_samples_test=400, 68 | noise=0.1, kernel_weights=None, 69 | n_features_list=None, random_state=None): 70 | """Utility to generate datasets for the gallery of examples. 71 | 72 | Parameters 73 | ---------- 74 | n_kernels : int 75 | Number of kernels. 76 | n_targets : int 77 | Number of targets. 78 | n_samples_train : int 79 | Number of samples in the training set. 80 | n_samples_test : int 81 | Number of sample in the testing set. 82 | noise : float > 0 83 | Scale of the Gaussian white noise added to the targets. 84 | kernel_weights : array of shape (n_targets, n_kernels) or None 85 | Kernel weights used in the prediction of the targets. 86 | If None, generate random kernel weights from a Dirichlet distribution. 87 | n_features_list : list of int of length (n_kernels, ) or None 88 | Number of features in each kernel. If None, use 1000 features for each. 89 | random_state : int, or None 90 | Random generator seed use to generate the true kernel weights. 91 | 92 | Returns 93 | ------- 94 | X_train : array of shape (n_samples_train, n_features) 95 | Training features. 96 | X_test : array of shape (n_samples_test, n_features) 97 | Testing features. 98 | Y_train : array of shape (n_samples_train, n_targets) 99 | Training targets. 100 | Y_test : array of shape (n_samples_test, n_targets) 101 | Testing targets. 102 | kernel_weights : array of shape (n_targets, n_kernels) 103 | Kernel weights in the prediction of the targets. 104 | n_features_list : list of int of length (n_kernels, ) 105 | Number of features in each kernel. 106 | """ 107 | from .kernel_ridge import generate_dirichlet_samples 108 | backend = get_backend() 109 | 110 | # Create a few kernel weights if not given. 111 | if kernel_weights is None: 112 | kernel_weights = generate_dirichlet_samples(n_targets, n_kernels, 113 | concentration=[.3], 114 | random_state=random_state) 115 | kernel_weights = backend.to_numpy(kernel_weights) 116 | 117 | if n_features_list is None: 118 | n_features_list = np.full(n_kernels, fill_value=1000) 119 | 120 | rng = check_random_state(random_state) 121 | 122 | # Then, generate a random dataset, using the arbitrary scalings. 123 | Xs_train, Xs_test = [], [] 124 | Y_train, Y_test = None, None 125 | for ii in range(n_kernels): 126 | n_features = n_features_list[ii] 127 | 128 | X_train = rng.randn(n_samples_train, n_features) 129 | X_test = rng.randn(n_samples_test, n_features) 130 | X_train -= X_train.mean(0) 131 | X_test -= X_test.mean(0) 132 | Xs_train.append(X_train) 133 | Xs_test.append(X_test) 134 | 135 | weights = rng.randn(n_features, n_targets) / n_features 136 | weights *= kernel_weights[:, ii] ** 0.5 137 | 138 | if ii == 0: 139 | Y_train = X_train @ weights 140 | Y_test = X_test @ weights 141 | else: 142 | Y_train += X_train @ weights 143 | Y_test += X_test @ weights 144 | 145 | std = Y_train.std(0)[None] 146 | Y_train /= std 147 | Y_test /= std 148 | 149 | Y_train += rng.randn(n_samples_train, n_targets) * noise 150 | Y_test += rng.randn(n_samples_test, n_targets) * noise 151 | Y_train -= Y_train.mean(0) 152 | Y_test -= Y_test.mean(0) 153 | 154 | # Concatenate the feature spaces. 155 | X_train = backend.asarray(np.concatenate(Xs_train, 1), dtype="float32") 156 | X_test = backend.asarray(np.concatenate(Xs_test, 1), dtype="float32") 157 | Y_train = backend.asarray(Y_train, dtype="float32") 158 | Y_test = backend.asarray(Y_test, dtype="float32") 159 | kernel_weights = backend.asarray(kernel_weights, dtype="float32") 160 | 161 | return X_train, X_test, Y_train, Y_test, kernel_weights, n_features_list 162 | 163 | 164 | def _batch_or_skip(array, batch, axis): 165 | """Apply a batch on given axis, or skip if the dimension is equal to 1.""" 166 | skip = (array is None or isinstance(array, numbers.Number) 167 | or array.ndim == 0 or array.shape[axis] == 1) # noqa 168 | if skip: 169 | return array 170 | else: 171 | # Not general but works with slices in `batch`. 172 | if axis == 0: 173 | return array[batch] 174 | elif axis == 1: 175 | return array[:, batch] 176 | else: 177 | raise NotImplementedError() 178 | -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Himalaya documentation build configuration file. 4 | # 5 | # This file is execfile()d with the current directory set to its 6 | # containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | # If extensions (or modules to document with autodoc) are in another directory, 15 | # add these directories to sys.path here. If the directory is relative to the 16 | # documentation root, use os.path.abspath to make it absolute, like shown here. 17 | # 18 | # import os 19 | # import sys 20 | # sys.path.insert(0, os.path.abspath('.')) 21 | 22 | from sphinx_gallery.sorting import FileNameSortKey 23 | 24 | # -- General configuration ------------------------------------------------ 25 | 26 | # If your documentation needs a minimal Sphinx version, state it here. 27 | # 28 | # needs_sphinx = '1.0' 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = [ 34 | 'sphinx.ext.autodoc', 35 | 'sphinx.ext.autosummary', 36 | 'numpydoc', 37 | 'sphinx.ext.intersphinx', 38 | 'sphinx.ext.mathjax', 39 | 'sphinx.ext.viewcode', 40 | 'sphinx.ext.githubpages', 41 | 'sphinx_gallery.gen_gallery', 42 | 'sphinxcontrib.mermaid', 43 | ] 44 | 45 | # generate autosummary even if no references 46 | autosummary_generate = True 47 | 48 | # Sphinx-gallery 49 | sphinx_gallery_conf = { 50 | # path to your examples scripts 51 | 'examples_dirs': '../examples', 52 | # path where to save gallery generated examples 53 | 'gallery_dirs': '_auto_examples', 54 | # which files to execute? only those with "plot_" 55 | 'filename_pattern': 'plot_', 56 | # 'ignore_pattern': 'download', 57 | 'within_subsection_order': FileNameSortKey, 58 | 'remove_config_comments': 'True', 59 | 'plot_gallery': 'True', 60 | 'thumbnail_size': (480, 250), 61 | 'download_all_examples': False, 62 | } 63 | 64 | # Add any paths that contain templates here, relative to this directory. 65 | templates_path = ['static'] 66 | 67 | # The suffix(es) of source filenames. 68 | # You can specify multiple suffix as a list of string: 69 | # 70 | # source_suffix = ['.rst', '.md'] 71 | source_suffix = '.rst' 72 | 73 | # The main toctree document. 74 | main_doc = 'index' 75 | 76 | # General information about the project. 77 | project = u'Himalaya' 78 | copyright = u'2023, Gallant lab' 79 | author = u'Tom Dupre la Tour' 80 | 81 | # The version info for the project you're documenting, acts as replacement for 82 | # |version| and |release|, also used in various other places throughout the 83 | # built documents. 84 | # 85 | # The short X.Y version. 86 | import himalaya 87 | version = himalaya.__version__ 88 | # The full version, including alpha/beta/rc tags. 89 | release = himalaya.__version__ 90 | 91 | # The language for content autogenerated by Sphinx. Refer to documentation 92 | # for a list of supported languages. 93 | # 94 | # This is also used if you do content translation via gettext catalogs. 95 | # Usually you set "language" from the command line for these cases. 96 | language = "en" 97 | 98 | # List of patterns, relative to source directory, that match files and 99 | # directories to ignore when looking for source files. 100 | # This patterns also effect to html_static_path and html_extra_path 101 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 102 | 103 | # The name of the Pygments (syntax highlighting) style to use. 104 | pygments_style = 'sphinx' 105 | 106 | # If true, `todo` and `todoList` produce output, else they produce nothing. 107 | todo_include_todos = False 108 | 109 | # -- Options for HTML output ---------------------------------------------- 110 | 111 | # The theme to use for HTML and HTML Help pages. See the documentation for 112 | # a list of builtin themes. 113 | # 114 | html_theme = 'alabaster' 115 | 116 | # Theme options are theme-specific and customize the look and feel of a theme 117 | # further. For a list of options available for each theme, see the 118 | # documentation. 119 | html_theme_options = { 120 | 'github_user': 'gallantlab', 121 | 'github_repo': 'himalaya', 122 | 'github_type': 'star', 123 | 'page_width': '1200px', 124 | 'sidebar_width': '235px', 125 | 'logo': 'logo.svg', 126 | 'logo_name': 'himalaya', 127 | # 'description': 'Himalaya', 128 | 'fixed_sidebar': 'True', 129 | } 130 | 131 | # Add any paths that contain custom static files (such as style sheets) here, 132 | # relative to this directory. They are copied after the builtin static files, 133 | # so a file named "default.css" will overwrite the builtin "default.css". 134 | html_static_path = ['static'] 135 | 136 | # Custom sidebar templates, must be a dictionary that maps document names 137 | # to template names. 138 | # 139 | # This is required for the alabaster theme 140 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars 141 | html_sidebars = { 142 | '**': [ 143 | 'about.html', 144 | 'relations.html', # needs 'show_related': True theme option to display 145 | 'navigation.html', 146 | 'searchbox.html', 147 | ] 148 | } 149 | 150 | # -- Options for HTMLHelp output ------------------------------------------ 151 | 152 | # Output file base name for HTML help builder. 153 | htmlhelp_basename = 'Himalayadoc' 154 | 155 | # -- Options for LaTeX output --------------------------------------------- 156 | 157 | latex_elements = { 158 | # The paper size ('letterpaper' or 'a4paper'). 159 | # 160 | # 'papersize': 'letterpaper', 161 | 162 | # The font size ('10pt', '11pt' or '12pt'). 163 | # 164 | # 'pointsize': '10pt', 165 | 166 | # Additional stuff for the LaTeX preamble. 167 | # 168 | # 'preamble': '', 169 | 170 | # Latex figure (float) alignment 171 | # 172 | # 'figure_align': 'htbp', 173 | } 174 | 175 | # Grouping the document tree into LaTeX files. List of tuples 176 | # (source start file, target name, title, 177 | # author, documentclass [howto, manual, or own class]). 178 | latex_documents = [ 179 | (main_doc, 'Himalaya.tex', u'Himalaya Documentation', u'Gallant lab', 180 | 'manual'), 181 | ] 182 | 183 | # -- Options for manual page output --------------------------------------- 184 | 185 | # One entry per manual page. List of tuples 186 | # (source start file, name, description, authors, manual section). 187 | man_pages = [(main_doc, 'himalaya', u'Himalaya Documentation', [author], 1)] 188 | 189 | # -- Options for Texinfo output ------------------------------------------- 190 | 191 | # Grouping the document tree into Texinfo files. List of tuples 192 | # (source start file, target name, title, author, 193 | # dir menu entry, description, category) 194 | texinfo_documents = [ 195 | (main_doc, 'Himalaya', u'Himalaya Documentation', author, 'Himalaya', 196 | 'One line description of project.', 'Miscellaneous'), 197 | ] 198 | 199 | # Example configuration for intersphinx: refer to the Python standard library. 200 | intersphinx_mapping = {'https://docs.python.org/': None} 201 | -------------------------------------------------------------------------------- /himalaya/lasso/_sklearn_api.py: -------------------------------------------------------------------------------- 1 | from sklearn.base import BaseEstimator, RegressorMixin, MultiOutputMixin 2 | from sklearn.utils.validation import check_is_fitted 3 | 4 | from ._group_lasso import solve_sparse_group_lasso_cv 5 | 6 | from ..validation import check_array 7 | from ..validation import check_cv 8 | from ..validation import validate_data 9 | from ..validation import _get_string_dtype 10 | from ..backend import get_backend 11 | from ..backend import force_cpu_backend 12 | from ..scoring import r2_score 13 | 14 | 15 | class SparseGroupLassoCV(MultiOutputMixin, RegressorMixin, BaseEstimator): 16 | """Sparse group Lasso 17 | 18 | Solved with hyperparameter grid-search over cross-validation. 19 | 20 | Parameters 21 | ---------- 22 | groups : array of shape (n_features, ) or None 23 | Encoding of the group of each feature. If None, all features are 24 | gathered in one group, and the problem is equivalent to the Lasso. 25 | 26 | l21_regs : array of shape (n_l21_regs, ) 27 | All the Group Lasso regularization parameter tested. 28 | 29 | l1_regs : array of shape (n_l1_regs, ) 30 | All the Lasso regularization parameter tested. 31 | 32 | solver : str 33 | Algorithm used during the fit, "proximal_gradient" only for now. 34 | 35 | solver_params : dict or None 36 | Additional parameters for the solver. 37 | See more details in the docstring of the function: 38 | ``SparseGroupLassoCV.ALL_SOLVERS[solver]`` 39 | 40 | cv : int or scikit-learn splitter 41 | Cross-validation splitter. If an int, KFold is used. 42 | 43 | force_cpu : bool 44 | If True, computations will be performed on CPU, ignoring the 45 | current backend. If False, use the current backend. 46 | 47 | Attributes 48 | ---------- 49 | coef_ : array of shape (n_samples) or (n_samples, n_targets) 50 | Coefficient of the linear model. Always on CPU. 51 | 52 | best_l21_reg_ : array of shape (n_targets, ) 53 | Best hyperparameter per target. 54 | 55 | best_l1_reg_ : array of shape (n_targets, ) 56 | Best hyperparameter per target. 57 | 58 | cv_scores_ : array of shape (n_l21_regs * n_l1_regs, n_targets) 59 | Cross-validation scores of all tested hyperparameters. 60 | The scores are computed with r2_score. 61 | 62 | n_features_in_ : int 63 | Number of features used during the fit. 64 | 65 | Examples 66 | -------- 67 | >>> from himalaya.lasso import SparseGroupLassoCV 68 | >>> import numpy as np 69 | >>> n_samples, n_features, n_targets = 10, 5, 3 70 | >>> X = np.random.randn(n_samples, n_features) 71 | >>> Y = np.random.randn(n_samples, n_targets) 72 | >>> clf = SparseGroupLassoCV() 73 | >>> clf.fit(X, Y) 74 | SparseGroupLassoCV() 75 | """ 76 | ALL_SOLVERS = dict(proximal_gradient=solve_sparse_group_lasso_cv) 77 | 78 | def __init__(self, groups=None, l1_regs=[0], l21_regs=[0], 79 | solver="proximal_gradient", solver_params=None, cv=5, 80 | force_cpu=False): 81 | self.groups = groups 82 | self.l1_regs = l1_regs 83 | self.l21_regs = l21_regs 84 | self.solver = solver 85 | self.solver_params = solver_params 86 | self.cv = cv 87 | self.force_cpu = force_cpu 88 | 89 | @force_cpu_backend 90 | def fit(self, X, y): 91 | """Fit the model 92 | 93 | Parameters 94 | ---------- 95 | X : array of shape (n_samples, n_features). 96 | Training data. 97 | 98 | y : array of shape (n_samples,) or (n_samples, n_targets) 99 | Target values. 100 | 101 | Returns 102 | ------- 103 | self : returns an instance of self. 104 | """ 105 | X, y = validate_data(self, X, y, reset=True, accept_sparse=False, ndim=2) 106 | self.dtype_ = _get_string_dtype(X) 107 | y = check_array(y, dtype=self.dtype_, ndim=[1, 2]) 108 | if X.shape[0] != y.shape[0]: 109 | raise ValueError("Inconsistent number of samples.") 110 | cv = check_cv(self.cv, y) 111 | ravel = False 112 | if y.ndim == 1: 113 | y = y[:, None] 114 | ravel = True 115 | 116 | results = self._call_solver(X=X, Y=y, groups=self.groups, cv=cv, 117 | l21_regs=self.l21_regs, 118 | l1_regs=self.l1_regs) 119 | self.coef_, self.best_l21_reg_, self.best_l1_reg_ = results[:3] 120 | self.cv_scores_ = results[3] 121 | 122 | if ravel: 123 | self.coef_ = self.coef_[:, 0] 124 | 125 | return self 126 | 127 | def _call_solver(self, **direct_params): 128 | if self.solver not in self.ALL_SOLVERS: 129 | raise ValueError("Unknown solver=%r." % self.solver) 130 | 131 | function = self.ALL_SOLVERS[self.solver] 132 | solver_params = self.solver_params or {} 133 | 134 | # check duplicated parameters 135 | intersection = set(direct_params.keys()).intersection( 136 | set(solver_params.keys())) 137 | if intersection: 138 | raise ValueError( 139 | 'Parameters %s should not be given in solver_params, since ' 140 | 'they are either fixed or have a direct parameter in %s.' % 141 | (intersection, self.__class__.__name__)) 142 | 143 | return function(**direct_params, **solver_params) 144 | 145 | @force_cpu_backend 146 | def predict(self, X): 147 | """Predict using the model. 148 | 149 | Parameters 150 | ---------- 151 | X : array of shape (n_samples_test, n_features) 152 | Samples. 153 | 154 | Returns 155 | ------- 156 | Y_hat : array of shape (n_samples,) or (n_samples, n_targets) 157 | Returns predicted values. 158 | """ 159 | backend = get_backend() 160 | check_is_fitted(self) 161 | X = validate_data(self, X, reset=False, dtype=self.dtype_, accept_sparse=False, ndim=2) 162 | Y_hat = backend.to_numpy(X) @ backend.to_numpy(self.coef_) 163 | return backend.asarray_like(Y_hat, ref=X) 164 | 165 | @force_cpu_backend 166 | def score(self, X, y): 167 | """Return the coefficient of determination R^2 of the prediction. 168 | 169 | Parameters 170 | ---------- 171 | X : array of shape (n_samples_test, n_features) 172 | Samples. 173 | 174 | y : array-like of shape (n_samples,) or (n_samples, n_targets) 175 | True values for X. 176 | 177 | Returns 178 | ------- 179 | score : array of shape (n_targets, ) 180 | R^2 of self.predict(X) versus y. 181 | """ 182 | y_pred = self.predict(X) 183 | y_true = check_array(y, dtype=self.dtype_, ndim=self.coef_.ndim) 184 | 185 | if y_true.ndim == 1: 186 | return r2_score(y_true[:, None], y_pred[:, None])[0] 187 | else: 188 | return r2_score(y_true, y_pred) 189 | 190 | def _more_tags(self): 191 | return {'requires_y': True} 192 | 193 | def __sklearn_tags__(self): 194 | tags = super().__sklearn_tags__() 195 | tags.target_tags.required = True 196 | return tags 197 | -------------------------------------------------------------------------------- /examples/multiple_kernel_ridge/plot_mkr_1_sklearn_api.py: -------------------------------------------------------------------------------- 1 | """ 2 | Multiple-kernel ridge with scikit-learn API 3 | =========================================== 4 | This example demonstrates how to solve multiple kernel ridge regression, using 5 | scikit-learn API. 6 | """ 7 | 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | 11 | from himalaya.backend import set_backend 12 | from himalaya.kernel_ridge import KernelRidgeCV 13 | from himalaya.kernel_ridge import MultipleKernelRidgeCV 14 | from himalaya.kernel_ridge import Kernelizer 15 | from himalaya.kernel_ridge import ColumnKernelizer 16 | from himalaya.utils import generate_multikernel_dataset 17 | 18 | from sklearn.pipeline import make_pipeline 19 | from sklearn import set_config 20 | set_config(display='diagram') 21 | 22 | # sphinx_gallery_thumbnail_number = 2 23 | ############################################################################### 24 | # In this example, we use the ``torch_cuda`` backend. 25 | # 26 | # Torch can perform computations both on CPU and GPU. To use CPU, use the 27 | # "torch" backend, to use GPU, use the "torch_cuda" backend. 28 | 29 | backend = set_backend("torch_cuda", on_error="warn") 30 | 31 | ############################################################################### 32 | # Generate a random dataset 33 | # ------------------------- 34 | # - X_train : array of shape (n_samples_train, n_features) 35 | # - X_test : array of shape (n_samples_test, n_features) 36 | # - Y_train : array of shape (n_samples_train, n_targets) 37 | # - Y_test : array of shape (n_samples_test, n_targets) 38 | 39 | (X_train, X_test, Y_train, Y_test, kernel_weights, 40 | n_features_list) = generate_multikernel_dataset(n_kernels=3, n_targets=50, 41 | n_samples_train=600, 42 | n_samples_test=300, 43 | random_state=42) 44 | 45 | feature_names = [f"Feature space {ii}" for ii in range(len(n_features_list))] 46 | 47 | ############################################################################### 48 | # We could precompute the kernels by hand on ``Xs_train``, as done in 49 | # ``plot_mkr_random_search.py``. Instead, here we use the ``ColumnKernelizer`` 50 | # to make a ``scikit-learn`` ``Pipeline``. 51 | 52 | # Find the start and end of each feature space X in Xs 53 | start_and_end = np.concatenate([[0], np.cumsum(n_features_list)]) 54 | slices = [ 55 | slice(start, end) 56 | for start, end in zip(start_and_end[:-1], start_and_end[1:]) 57 | ] 58 | 59 | ############################################################################### 60 | # Create a different ``Kernelizer`` for each feature space. Here we use a 61 | # linear kernel for all feature spaces, but ``ColumnKernelizer`` accepts any 62 | # ``Kernelizer``, or ``scikit-learn`` ``Pipeline`` ending with a 63 | # ``Kernelizer``. 64 | kernelizers = [(name, Kernelizer(), slice_) 65 | for name, slice_ in zip(feature_names, slices)] 66 | column_kernelizer = ColumnKernelizer(kernelizers) 67 | 68 | # Note that ``ColumnKernelizer`` has a parameter ``n_jobs`` to parallelize each 69 | # kernelizer, yet such parallelism does not work with GPU arrays. 70 | 71 | ############################################################################### 72 | # Define the model 73 | # ---------------- 74 | # 75 | # The class takes a number of common parameters during initialization, such as 76 | # `kernels` or `solver`. Since the solver parameters might be different 77 | # depending on the solver, they can be passed in the `solver_params` parameter. 78 | 79 | ############################################################################### 80 | # Here we use the "random_search" solver. 81 | # We can check its specific parameters in the function docstring: 82 | solver_function = MultipleKernelRidgeCV.ALL_SOLVERS["random_search"] 83 | print("Docstring of the function %s:" % solver_function.__name__) 84 | print(solver_function.__doc__) 85 | 86 | ############################################################################### 87 | # We use 100 iterations to have a reasonably fast example (~40 sec). 88 | # To have a better convergence, we probably need more iterations. 89 | # Note that there is currently no stopping criterion in this method. 90 | n_iter = 100 91 | 92 | ############################################################################### 93 | # Grid of regularization parameters. 94 | alphas = np.logspace(-10, 10, 41) 95 | 96 | ############################################################################### 97 | # Batch parameters are used to reduce the necessary GPU memory. A larger value 98 | # will be a bit faster, but the solver might crash if it runs out of memory. 99 | # Optimal values depend on the size of your dataset. 100 | n_targets_batch = 1000 101 | n_alphas_batch = 20 102 | n_targets_batch_refit = 200 103 | 104 | solver_params = dict(n_iter=n_iter, alphas=alphas, 105 | n_targets_batch=n_targets_batch, 106 | n_alphas_batch=n_alphas_batch, 107 | n_targets_batch_refit=n_targets_batch_refit, 108 | jitter_alphas=True) 109 | 110 | model = MultipleKernelRidgeCV(kernels="precomputed", solver="random_search", 111 | solver_params=solver_params) 112 | 113 | ############################################################################### 114 | # Define and fit the pipeline 115 | pipe = make_pipeline(column_kernelizer, model) 116 | pipe.fit(X_train, Y_train) 117 | 118 | ############################################################################### 119 | # Plot the convergence curve 120 | # -------------------------- 121 | 122 | # ``cv_scores`` gives the scores for each sampled kernel weights. 123 | # The convergence curve is thus the current maximum for each target. 124 | cv_scores = backend.to_numpy(pipe[1].cv_scores_) 125 | current_max = np.maximum.accumulate(cv_scores, axis=0) 126 | mean_current_max = np.mean(current_max, axis=1) 127 | 128 | x_array = np.arange(1, len(mean_current_max) + 1) 129 | plt.plot(x_array, mean_current_max, '-o') 130 | plt.grid("on") 131 | plt.xlabel("Number of kernel weights sampled") 132 | plt.ylabel("L2 negative loss (higher is better)") 133 | plt.title("Convergence curve, averaged over targets") 134 | plt.tight_layout() 135 | plt.show() 136 | 137 | ############################################################################### 138 | # Compare to ``KernelRidgeCV`` 139 | # ---------------------------- 140 | # Compare to a baseline ``KernelRidgeCV`` model with all the concatenated 141 | # features. Comparison is performed using the prediction scores on the test 142 | # set. 143 | 144 | ############################################################################### 145 | # Fit the baseline model ``KernelRidgeCV`` 146 | baseline = KernelRidgeCV(kernel="linear", alphas=alphas) 147 | baseline.fit(X_train, Y_train) 148 | 149 | ############################################################################### 150 | # Compute scores of both models 151 | scores = pipe.score(X_test, Y_test) 152 | scores = backend.to_numpy(scores) 153 | 154 | scores_baseline = baseline.score(X_test, Y_test) 155 | scores_baseline = backend.to_numpy(scores_baseline) 156 | 157 | ############################################################################### 158 | # Plot histograms 159 | bins = np.linspace(0, max(scores_baseline.max(), scores.max()), 50) 160 | plt.hist(scores_baseline, bins, alpha=0.7, label="KernelRidgeCV") 161 | plt.hist(scores, bins, alpha=0.7, label="MultipleKernelRidgeCV") 162 | plt.xlabel(r"$R^2$ generalization score") 163 | plt.title("Histogram over targets") 164 | plt.legend() 165 | plt.show() 166 | -------------------------------------------------------------------------------- /doc/models.rst: -------------------------------------------------------------------------------- 1 | Model descriptions 2 | ================== 3 | 4 | This package implements a number of models. 5 | 6 | Ridge 7 | ----- 8 | 9 | Let :math:`X\in \mathbb{R}^{n\times p}` be a feature matrix with :math:`n` 10 | samples and :math:`p` features, :math:`y\in \mathbb{R}^n` a target vector, and 11 | :math:`\alpha > 0` a fixed regularization hyperparameter. Ridge regression 12 | [1]_ defines the weight vector :math:`b^*\in \mathbb{R}^p` as: 13 | 14 | .. math:: 15 | b^* = \arg\min_b \|Xb - y\|_2^2 + \alpha \|b\|_2^2. 16 | 17 | The equation has a closed-form solution :math:`b^* = M y`, where :math:`M = 18 | (X^\top X + \alpha I_p)^{-1}X^\top \in \mathbb{R}^{p \times n}`. 19 | 20 | .. admonition:: This model is implemented in 21 | 22 | - :class:`~himalaya.ridge.Ridge` (scikit-learn-compatible estimator) 23 | - :func:`~himalaya.ridge.solve_ridge_svd` (function) 24 | 25 | KernelRidge 26 | ----------- 27 | 28 | By the Woodbury matrix identity, :math:`b^*` can be written as :math:`b^* = 29 | X^\top(XX^\top + \alpha I_n)^{-1}y`, or :math:`b^* = X^\top w^*` for some 30 | :math:`w^*\in \mathbb{R}^n`. Noting the linear kernel :math:`K = X X^\top \in 31 | \mathbb{R}^{n\times n}`, this leads to the *equivalent* formulation: 32 | 33 | .. math:: 34 | w^* = \arg\min_w \|Kw - y\|_2^2 + \alpha w^\top Kw. 35 | 36 | This model can be extended to arbitrary positive semidefinite kernels 37 | :math:`K`, leading to the more general kernel ridge regression [2]_. 38 | 39 | .. admonition:: This model is implemented in 40 | 41 | - :class:`~himalaya.kernel_ridge.KernelRidge` (scikit-learn-compatible estimator) 42 | - :func:`~himalaya.kernel_ridge.solve_kernel_ridge_eigenvalues` (function) 43 | - :func:`~himalaya.kernel_ridge.solve_kernel_ridge_gradient_descent` (function) 44 | - :func:`~himalaya.kernel_ridge.solve_kernel_ridge_conjugate_gradient` (function) 45 | 46 | 47 | RidgeCV and KernelRidgeCV 48 | ------------------------- 49 | 50 | In practice, because the ridge regression and kernel ridge regression 51 | hyperparameter :math:`\alpha` is unknown, it is typically selected through a 52 | grid-search with cross-validation. In cross-validation, we split the data set 53 | into a training set :math:`(X_{train}, y_{train})` and a validation set 54 | :math:`(X_{val}, y_{val})`. Then, we train the model on the training set, and 55 | evaluate the generalization performance on the validation set. We perform this 56 | process for multiple hyperparameter candidates :math:`\alpha`, typically 57 | defined over a grid of log-spaced values. Finally, we keep the candidate 58 | leading to the best generalization performance, as measured by the validation 59 | loss, averaged over all cross-validation splits. 60 | 61 | .. admonition:: These models are implemented in 62 | 63 | - :class:`~himalaya.ridge.RidgeCV` (scikit-learn-compatible estimator) 64 | - :func:`~himalaya.ridge.solve_ridge_cv_svd` (function) 65 | - :class:`~himalaya.kernel_ridge.KernelRidgeCV` (scikit-learn-compatible estimator) 66 | - :func:`~himalaya.kernel_ridge.solve_kernel_ridge_cv_eigenvalues` (function) 67 | 68 | 69 | GroupRidgeCV / BandedRidgeCV 70 | ---------------------------- 71 | 72 | In some applications, features are naturally grouped into groups (or feature 73 | spaces). To adapt the regularization level to each feature space, ridge 74 | regression can be extended to group-regularized ridge regression (also known 75 | as banded ridge regression [3]_). In this model, a separate hyperparameter is 76 | optimized for each feature space: 77 | 78 | .. math:: 79 | b^* = \arg\min_b \|\sum_{i=1}^m X_i b_i - y\|_2^2 + \sum_{i=1}^m \alpha_i \|b_i\|_2^2. 80 | 81 | This is equivalent to solving a ridge regression: 82 | 83 | .. math:: 84 | b^* = \arg\min_b \|Z b - Y\|_2^2 + \|b\|_2^2 85 | 86 | where the feature space :math:`X_i` is scaled by a group scaling :math:`Z_i = 87 | e^{\delta_i} X_i`. The hyperparameters :math:`\delta_i = - \log(\alpha_i)` are 88 | then learned over cross-validation [4]_. 89 | 90 | .. admonition:: This model is implemented in 91 | 92 | - :class:`~himalaya.ridge.GroupRidgeCV` (scikit-learn-compatible estimator) 93 | - :func:`~himalaya.ridge.solve_group_ridge_random_search` (function) 94 | 95 | See also multiple-kernel ridge regression, which is equivalent to 96 | group-regularization ridge regression when using one linear kernel per group 97 | of features: 98 | 99 | - :class:`~himalaya.kernel_ridge.MultipleKernelRidgeCV` (scikit-learn-compatible estimator) 100 | - :func:`~himalaya.kernel_ridge.solve_multiple_kernel_ridge_random_search` (function) 101 | - :func:`~himalaya.kernel_ridge.solve_multiple_kernel_ridge_hyper_gradient` (function) 102 | 103 | .. note:: "Group ridge regression" is also sometimes called "Banded ridge regression". 104 | 105 | WeightedKernelRidge 106 | ------------------- 107 | 108 | To extend kernel ridge to group-regularization, we can compute the kernel as a 109 | weighted sum of multiple kernels, :math:`K = \sum_{i=1}^m e^{\delta_i} K_i`. 110 | Then, we can use :math:`K_i = X_i X_i^\top` for different groups of features 111 | :math:`X_i`. The model becomes: 112 | 113 | .. math:: 114 | w^* = \arg\min_w \left\|\sum_{i=1}^m e^{\delta_i} K_{i} w - y\right\|_2^2 115 | + \alpha \sum_{i=1}^m e^{\delta_i} w^\top K_{i} w. 116 | 117 | This model is called weighted kernel ridge regression. The log-kernel-weights 118 | :math:`\delta_i` are here fixed. When all the targets use the same 119 | log-kernel-weights, a single weighted kernel can be precomputed and used in a 120 | kernel ridge regression. However, when the log-kernel-weights are different for 121 | each target, the kernel sum cannot be precomputed, and the model requires some 122 | specific algorithms to be fit. 123 | 124 | .. admonition:: This model is implemented in 125 | 126 | - :class:`~himalaya.kernel_ridge.WeightedKernelRidge` (scikit-learn-compatible estimator) 127 | - :func:`~himalaya.kernel_ridge.solve_weighted_kernel_ridge_gradient_descent` (function) 128 | - :func:`~himalaya.kernel_ridge.solve_weighted_kernel_ridge_conjugate_gradient` (function) 129 | - :func:`~himalaya.kernel_ridge.solve_weighted_kernel_ridge_neumann_series` (function) 130 | 131 | 132 | MultipleKernelRidgeCV 133 | --------------------- 134 | 135 | In weighted kernel ridge regression, when the log-kernel-weights 136 | :math:`\delta_i` are unknown, we can learn them over cross-validation. This 137 | model is called multiple-kernel ridge regression. When the kernels are defined 138 | by :math:`K_i = X_i X_i^\top` for different groups of features :math:`X_i`, 139 | multiple-kernel ridge regression is equivalent with group-ridge regression 140 | (aka banded ridge regression). 141 | 142 | .. admonition:: This model is implemented in 143 | 144 | - :class:`~himalaya.kernel_ridge.MultipleKernelRidgeCV` (scikit-learn-compatible estimator) 145 | - :func:`~himalaya.kernel_ridge.solve_multiple_kernel_ridge_hyper_gradient` (function) 146 | - :func:`~himalaya.kernel_ridge.solve_multiple_kernel_ridge_random_search` (function) 147 | 148 | 149 | .. include:: flowchart.rst 150 | 151 | References 152 | ~~~~~~~~~~ 153 | 154 | .. [1] Hoerl, A. E., & Kennard, R. W. (1970). Ridge regression: Biased 155 | estimation for nonorthogonal problems. Technometrics, 12(1), 55-67. 156 | 157 | .. [2] Saunders, C., Gammerman, A., & Vovk, V. (1998). Ridge regression 158 | learning algorithm in dual variables. 159 | 160 | .. [3] Nunez-Elizalde, A. O., Huth, A. G., & Gallant, J. L. (2019). Voxelwise 161 | encoding models with non-spherical multivariate normal priors. Neuroimage, 162 | 197, 482-492. 163 | 164 | .. [4] Dupré La Tour, T., Eickenberg, M., Nunez-Elizalde, A.O., & Gallant, J. L. (2022). 165 | Feature-space selection with banded ridge regression. NeuroImage. 166 | -------------------------------------------------------------------------------- /himalaya/backend/tests/test_backends.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from himalaya.backend import set_backend 4 | from himalaya.backend import ALL_BACKENDS 5 | from himalaya.backend._utils import _dtype_to_str 6 | from himalaya.utils import assert_array_almost_equal 7 | 8 | 9 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 10 | def test_apply_argmax(backend): 11 | backend = set_backend(backend) 12 | for array in [ 13 | backend.randn(1), 14 | backend.randn(10), 15 | backend.randn(10, 1), 16 | backend.randn(10, 4), 17 | backend.randn(10, 1, 8), 18 | backend.randn(10, 4, 8), 19 | ]: 20 | for axis in range(array.ndim): 21 | argmax = backend.argmax(array, axis=axis) 22 | assert_array_almost_equal( 23 | backend.max(array, axis=axis), 24 | backend.apply_argmax(array, argmax, axis=axis), 25 | ) 26 | 27 | 28 | @pytest.mark.parametrize('dtype_str', ["float32", "float64"]) 29 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 30 | def test_std_float64(backend, dtype_str): 31 | backend = set_backend(backend) 32 | for array in [ 33 | backend.randn(1), 34 | backend.randn(10), 35 | backend.randn(10, 1), 36 | backend.randn(10, 4), 37 | backend.randn(10, 1, 8), 38 | backend.randn(10, 4, 8), 39 | ]: 40 | array = backend.asarray(array, dtype=dtype_str) 41 | array_64 = backend.asarray(array, dtype="float64") 42 | for axis in range(array.ndim): 43 | result = backend.std_float64(array, axis=axis) 44 | reference = backend.to_numpy(array_64).std(axis=axis, 45 | dtype="float64") 46 | reference = backend.asarray(reference, dtype=dtype_str) 47 | assert_array_almost_equal(result, reference) 48 | 49 | 50 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 51 | def test_diagonal_view(backend): 52 | backend = set_backend(backend) 53 | try: 54 | import torch 55 | except ImportError as error: 56 | pytest.skip("PyTorch not installed.") 57 | raise ImportError("PyTorch not installed.") from error 58 | import numpy as np 59 | 60 | for array in [ 61 | backend.randn(10, 4), 62 | backend.randn(10, 4).T, 63 | backend.randn(10, 4, 8), 64 | backend.randn(10, 4, 8).T, 65 | backend.randn(3, 4, 8, 5), 66 | ]: 67 | for axis1 in range(array.ndim): 68 | for axis2 in range(array.ndim): 69 | if axis1 != axis2: 70 | result = backend.diagonal_view(array, axis1=axis1, 71 | axis2=axis2) 72 | # compare with torch diagonal 73 | reference = torch.diagonal( 74 | torch.from_numpy(backend.to_numpy(array)), dim1=axis1, 75 | dim2=axis2) 76 | assert_array_almost_equal(result, reference) 77 | # compare with numpy diagonal 78 | reference = np.diagonal(backend.to_numpy(array), 79 | axis1=axis1, axis2=axis2) 80 | assert_array_almost_equal(result, reference) 81 | # test that this is a modifiable view 82 | result += 1 83 | reference = np.diagonal(backend.to_numpy(array), 84 | axis1=axis1, axis2=axis2) 85 | assert_array_almost_equal(result, reference) 86 | 87 | 88 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 89 | def test_eigh(backend): 90 | import scipy.linalg 91 | backend = set_backend(backend) 92 | 93 | array = backend.randn(10, 20) 94 | array = backend.asarray(array, dtype='float64') 95 | kernel = array @ array.T 96 | 97 | values, vectors = backend.eigh(kernel) 98 | values_ref, vectors_ref = scipy.linalg.eigh(backend.to_numpy(kernel)) 99 | 100 | assert_array_almost_equal(values, values_ref) 101 | 102 | # vectors can be flipped in sign 103 | assert vectors.shape == vectors_ref.shape 104 | for ii in range(vectors.shape[1]): 105 | try: 106 | assert_array_almost_equal(vectors[:, ii], vectors_ref[:, ii]) 107 | except AssertionError: 108 | assert_array_almost_equal(vectors[:, ii], -vectors_ref[:, ii]) 109 | 110 | 111 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 112 | @pytest.mark.parametrize('full_matrices', [True, False]) 113 | @pytest.mark.parametrize('three_dim', [True, False]) 114 | def test_svd(backend, full_matrices, three_dim): 115 | import numpy.linalg 116 | backend = set_backend(backend) 117 | 118 | if three_dim: 119 | array = backend.randn(3, 5, 7) 120 | else: 121 | array = backend.randn(5, 7) 122 | 123 | array = backend.asarray(array, dtype='float64') 124 | 125 | U, s, V = backend.svd(array, full_matrices=full_matrices) 126 | U_ref, s_ref, V_ref = numpy.linalg.svd(backend.to_numpy(array), 127 | full_matrices=full_matrices) 128 | 129 | assert_array_almost_equal(s, s_ref) 130 | 131 | if not three_dim: 132 | U_ref = U_ref[None] 133 | U = U[None] 134 | V_ref = V_ref[None] 135 | V = V[None] 136 | 137 | # vectors can be flipped in sign 138 | assert U.shape == U_ref.shape 139 | assert V.shape == V_ref.shape 140 | for kk in range(U.shape[0]): 141 | for ii in range(U.shape[2]): 142 | try: 143 | assert_array_almost_equal(U[kk, :, ii], U_ref[kk, :, ii]) 144 | assert_array_almost_equal(V[kk, ii, :], V_ref[kk, ii, :]) 145 | except AssertionError: 146 | assert_array_almost_equal(U[kk, :, ii], -U_ref[kk, :, ii]) 147 | assert_array_almost_equal(V[kk, ii, :], -V_ref[kk, ii, :]) 148 | 149 | 150 | @pytest.mark.parametrize('backend_out', ALL_BACKENDS) 151 | @pytest.mark.parametrize('backend_in', ALL_BACKENDS) 152 | def test_changed_backend_asarray(backend_in, backend_out): 153 | backend = set_backend(backend_in) 154 | array_in = backend.asarray([1.2, 2.4, 4.8]) 155 | assert array_in is not None 156 | 157 | # change the backend, and cast to the correct class 158 | backend = set_backend(backend_out) 159 | array_out = backend.asarray(array_in) 160 | assert array_out is not None 161 | 162 | if backend_in == backend_out or backend_in[:5] == backend_out[:5]: 163 | # assert the class did not change 164 | assert array_in.__class__ == array_out.__class__ 165 | else: 166 | # assert the class did change 167 | assert array_in.__class__ != array_out.__class__ 168 | 169 | # assert the new class is correct 170 | array_out2 = backend.randn(3) 171 | assert array_out.__class__ == array_out2.__class__ 172 | 173 | # test check_arrays 174 | array_out3, array_out4, array_out5 = backend.check_arrays( 175 | array_in, array_in, [array_in]) 176 | assert array_out.__class__ == array_out3.__class__ 177 | assert array_out.__class__ == array_out4.__class__ 178 | assert array_out.__class__ == array_out5[0].__class__ 179 | 180 | 181 | @pytest.mark.parametrize('dtype_out', ["float32", "float64"]) 182 | @pytest.mark.parametrize('dtype_in', ["float32", "float64"]) 183 | @pytest.mark.parametrize('backend_out', ALL_BACKENDS) 184 | @pytest.mark.parametrize('backend_in', ALL_BACKENDS) 185 | def test_asarray_dtype(backend_in, backend_out, dtype_in, dtype_out): 186 | backend = set_backend(backend_in) 187 | array_in = backend.asarray([1.2, 2.4, 4.8], dtype=dtype_in) 188 | assert _dtype_to_str(array_in.dtype) == dtype_in 189 | 190 | backend = set_backend(backend_out) 191 | array_out = backend.asarray(array_in, dtype=dtype_out) 192 | assert _dtype_to_str(array_out.dtype) == dtype_out 193 | 194 | 195 | def test_dtype_to_str_wrong_input(): 196 | assert _dtype_to_str(None) is None 197 | 198 | with pytest.raises(NotImplementedError): 199 | _dtype_to_str(42) 200 | -------------------------------------------------------------------------------- /himalaya/kernel_ridge/_predictions.py: -------------------------------------------------------------------------------- 1 | from ..backend import get_backend 2 | from ..progress_bar import bar 3 | from ..utils import _batch_or_skip 4 | 5 | 6 | def predict_weighted_kernel_ridge(Ks, dual_weights, deltas, split=False, 7 | n_targets_batch=None, progress_bar=False, 8 | intercept=None): 9 | """ 10 | Compute predictions, typically on a test set. 11 | 12 | Parameters 13 | ---------- 14 | Ks : array of shape (n_kernels, n_samples_test, n_samples_train) 15 | Test kernels. 16 | dual_weights : array of shape (n_samples_train, n_targets) 17 | Dual weights of the kernel ridge model. 18 | deltas : array of shape (n_kernels, n_targets) or (n_kernels, ) 19 | Log kernel weights for each target. 20 | split : bool 21 | If True, the predictions is split across kernels. 22 | n_targets_batch : int or None 23 | Size of the batch for computing predictions. Used for memory reasons. 24 | If None, uses all n_targets at once. 25 | progress_bar : bool 26 | If True, display a progress bar over batches and iterations. 27 | intercept : None, or array of shape (n_targets,) 28 | Intercept added to the predictions. Must be None if split=True. 29 | 30 | Returns 31 | ------- 32 | Y_hat : array of shape (n_samples_test, n_targets) or \ 33 | (n_kernels, n_samples_test, n_targets) (if split is True) 34 | Predicted values. 35 | """ 36 | backend = get_backend() 37 | 38 | Ks, dual_weights, deltas, intercept = backend.check_arrays( 39 | Ks, dual_weights, deltas, intercept) 40 | n_samples = Ks.shape[1] 41 | n_targets = dual_weights.shape[1] 42 | n_kernels = deltas.shape[0] 43 | 44 | if deltas.ndim == 1: 45 | deltas = deltas[:, None] 46 | 47 | if split: 48 | Y_hat_full = backend.zeros_like( 49 | deltas, shape=(n_kernels, n_samples, n_targets)) 50 | else: 51 | Y_hat_full = backend.zeros_like(deltas, shape=(n_samples, n_targets)) 52 | 53 | if not n_targets_batch: 54 | n_targets_batch = n_targets 55 | 56 | for start in bar(list(range(0, n_targets, n_targets_batch)), 57 | title='predict', use_it=progress_bar): 58 | batch = slice(start, start + n_targets_batch) 59 | dual_weights_batch = dual_weights[:, batch] 60 | deltas_batch = _batch_or_skip(deltas, batch, axis=1) 61 | chi = backend.matmul(Ks, dual_weights_batch) 62 | split_predictions = backend.exp(deltas_batch[:, None, :]) * chi 63 | if split: 64 | if intercept is not None: 65 | raise ValueError( 66 | "Cannot split the predictions with an intercept.") 67 | Y_hat_full[:, :, batch] = split_predictions 68 | else: 69 | Y_hat_full[:, batch] = split_predictions.sum(0) 70 | if intercept is not None: 71 | Y_hat_full[:, batch] += intercept[None, batch] 72 | 73 | return Y_hat_full 74 | 75 | 76 | def predict_and_score_weighted_kernel_ridge(Ks, dual_weights, deltas, Y, 77 | score_func, split=False, 78 | n_targets_batch=None, 79 | progress_bar=False, 80 | intercept=None): 81 | """ 82 | Compute predictions, typically on a test set, and compute the score. 83 | 84 | Parameters 85 | ---------- 86 | Ks : array of shape (n_kernels, n_samples_test, n_samples_train) 87 | Input kernels. 88 | dual_weights : array of shape (n_samples_train, n_targets) 89 | Dual weights of the kernel ridge model. 90 | deltas : array of shape (n_kernels, n_targets) or (n_kernels, ) 91 | Log kernel weights for each target. 92 | Y : array of shape (n_samples_test, n_targets) 93 | Target data. 94 | score_func : callable 95 | Function used to compute the score of predictions. 96 | split : bool 97 | If True, the predictions is split across kernels. 98 | n_targets_batch : int or None 99 | Size of the batch for computing predictions. Used for memory reasons. 100 | If None, uses all n_targets at once. 101 | progress_bar : bool 102 | If True, display a progress bar over batches and iterations. 103 | intercept : None, or array of shape (n_targets,) 104 | Intercept added to the predictions. To allow split=True, the intercept 105 | is not added to the predictions but subtracted to the target Y. 106 | 107 | Returns 108 | ------- 109 | scores : array of shape (n_targets, ) or (n_kernels, n_targets) (if split) 110 | Prediction score per target. 111 | """ 112 | backend = get_backend() 113 | Ks, dual_weights, deltas, Y, intercept = backend.check_arrays( 114 | Ks, dual_weights, deltas, Y, intercept) 115 | 116 | if deltas.ndim == 1: 117 | deltas = deltas[:, None] 118 | 119 | n_kernels, _ = deltas.shape 120 | _, n_targets = Y.shape 121 | if split: 122 | scores = backend.zeros_like(Y, shape=(n_kernels, n_targets)) 123 | else: 124 | scores = backend.zeros_like(Y, shape=(n_targets)) 125 | 126 | if n_targets_batch is None: 127 | n_targets_batch = n_targets 128 | for start in bar(list(range(0, n_targets, n_targets_batch)), 129 | title='predict_and_score', use_it=progress_bar): 130 | batch = slice(start, start + n_targets_batch) 131 | predictions = predict_weighted_kernel_ridge( 132 | Ks, dual_weights[:, batch], _batch_or_skip(deltas, batch, axis=1), 133 | split=split) 134 | if intercept is not None: 135 | score_batch = score_func(Y[:, batch] - intercept[None, batch], 136 | predictions) 137 | else: 138 | score_batch = score_func(Y[:, batch], predictions) 139 | 140 | if split: 141 | scores[:, batch] = score_batch 142 | else: 143 | scores[batch] = score_batch 144 | 145 | return scores 146 | 147 | 148 | def primal_weights_kernel_ridge(dual_weights, X_fit): 149 | """Compute the primal weights for kernel ridge regression. 150 | 151 | Parameters 152 | ---------- 153 | dual_weights : array of shape (n_samples_fit, n_targets) 154 | Dual coefficient of the kernel ridge regression. 155 | X_fit : array of shape (n_samples_fit, n_features) 156 | Training features. 157 | 158 | Returns 159 | ------- 160 | primal_weights : array of shape (n_features, n_targets) 161 | Primal coefficients of the equivalent ridge regression. The 162 | coefficients are computed on CPU memory, since they can be large. 163 | """ 164 | backend = get_backend() 165 | X_fit = backend.to_cpu(X_fit) 166 | dual_weights = backend.to_cpu(dual_weights) 167 | 168 | return X_fit.T @ dual_weights 169 | 170 | 171 | def primal_weights_weighted_kernel_ridge(dual_weights, deltas, Xs_fit): 172 | """Compute the primal weights for weighted kernel ridge regression. 173 | 174 | Parameters 175 | ---------- 176 | dual_weights : array of shape (n_samples_fit, n_targets) 177 | Dual coefficient of the kernel ridge regression. 178 | deltas : array of shape (n_kernels, n_targets) 179 | Log of kernel weights. 180 | Xs_fit : list of arrays of shape (n_samples_fit, n_features) 181 | Training features. The list should have `n_kernels` elements. 182 | 183 | Returns 184 | ------- 185 | primal_weights : list of arrays of shape (n_features, n_targets) 186 | Primal coefficients of the equivalent ridge regression. The 187 | coefficients are computed on CPU memory, since they can be large. 188 | """ 189 | backend = get_backend() 190 | dual_weights = backend.to_cpu(dual_weights) 191 | 192 | primal_weights = [] 193 | for X_fit, deltas_i in zip(Xs_fit, deltas): 194 | X_fit = backend.to_cpu(X_fit) 195 | exp_deltas_i = backend.to_cpu(backend.exp(deltas_i)) 196 | primal_weights_i = X_fit.T @ dual_weights * exp_deltas_i[None] 197 | primal_weights.append(primal_weights_i) 198 | 199 | return primal_weights 200 | -------------------------------------------------------------------------------- /examples/multiple_kernel_ridge/plot_mkr_0_random_search.py: -------------------------------------------------------------------------------- 1 | """ 2 | Multiple-kernel ridge 3 | ===================== 4 | This example demonstrates how to solve multiple kernel ridge regression. 5 | It uses random search and cross validation to select optimal hyperparameters. 6 | """ 7 | 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | 11 | from himalaya.backend import set_backend 12 | from himalaya.kernel_ridge import solve_multiple_kernel_ridge_random_search 13 | from himalaya.kernel_ridge import predict_and_score_weighted_kernel_ridge 14 | from himalaya.utils import generate_multikernel_dataset 15 | from himalaya.scoring import r2_score_split 16 | from himalaya.viz import plot_alphas_diagnostic 17 | 18 | # sphinx_gallery_thumbnail_number = 4 19 | ############################################################################### 20 | # In this example, we use the ``cupy`` backend, and fit the model on GPU. 21 | 22 | backend = set_backend("cupy", on_error="warn") 23 | 24 | ############################################################################### 25 | # Generate a random dataset 26 | # ------------------------- 27 | # 28 | # - X_train : array of shape (n_samples_train, n_features) 29 | # - X_test : array of shape (n_samples_test, n_features) 30 | # - Y_train : array of shape (n_samples_train, n_targets) 31 | # - Y_test : array of shape (n_samples_test, n_targets) 32 | 33 | n_kernels = 3 34 | n_targets = 50 35 | kernel_weights = np.tile(np.array([0.5, 0.3, 0.2])[None], (n_targets, 1)) 36 | 37 | (X_train, X_test, Y_train, Y_test, 38 | kernel_weights, n_features_list) = generate_multikernel_dataset( 39 | n_kernels=n_kernels, n_targets=n_targets, n_samples_train=600, 40 | n_samples_test=300, kernel_weights=kernel_weights, random_state=42) 41 | 42 | feature_names = [f"Feature space {ii}" for ii in range(len(n_features_list))] 43 | 44 | # Find the start and end of each feature space X in Xs 45 | start_and_end = np.concatenate([[0], np.cumsum(n_features_list)]) 46 | slices = [ 47 | slice(start, end) 48 | for start, end in zip(start_and_end[:-1], start_and_end[1:]) 49 | ] 50 | Xs_train = [X_train[:, slic] for slic in slices] 51 | Xs_test = [X_test[:, slic] for slic in slices] 52 | 53 | ############################################################################### 54 | # Precompute the linear kernels 55 | # ----------------------------- 56 | # We also cast them to float32. 57 | 58 | Ks_train = backend.stack([X_train @ X_train.T for X_train in Xs_train]) 59 | Ks_train = backend.asarray(Ks_train, dtype=backend.float32) 60 | Y_train = backend.asarray(Y_train, dtype=backend.float32) 61 | 62 | Ks_test = backend.stack( 63 | [X_test @ X_train.T for X_train, X_test in zip(Xs_train, Xs_test)]) 64 | Ks_test = backend.asarray(Ks_test, dtype=backend.float32) 65 | Y_test = backend.asarray(Y_test, dtype=backend.float32) 66 | 67 | ############################################################################### 68 | # Run the solver, using random search 69 | # ----------------------------------- 70 | # This method should work fine for 71 | # small number of kernels (< 20). The larger the number of kernels, the larger 72 | # we need to sample the hyperparameter space (i.e. increasing ``n_iter``). 73 | 74 | ############################################################################### 75 | # Here we use 100 iterations to have a reasonably fast example (~40 sec). 76 | # To have a better convergence, we probably need more iterations. 77 | # Note that there is currently no stopping criterion in this method. 78 | n_iter = 100 79 | 80 | ############################################################################### 81 | # Grid of regularization parameters. 82 | alphas = np.logspace(-10, 10, 21) 83 | 84 | ############################################################################### 85 | # Batch parameters are used to reduce the necessary GPU memory. A larger value 86 | # will be a bit faster, but the solver might crash if it runs out of memory. 87 | # Optimal values depend on the size of your dataset. 88 | n_targets_batch = 1000 89 | n_alphas_batch = 20 90 | 91 | ############################################################################### 92 | # If ``return_weights == "dual"``, the solver will use more memory. 93 | # To mitigate this, you can reduce ``n_targets_batch`` in the refit 94 | # using ```n_targets_batch_refit``. 95 | # If you don't need the dual weights, use ``return_weights = None``. 96 | return_weights = 'dual' 97 | n_targets_batch_refit = 200 98 | 99 | ############################################################################### 100 | # Run the solver. For each iteration, it will: 101 | # 102 | # - sample kernel weights from a Dirichlet distribution 103 | # - fit (n_splits * n_alphas * n_targets) ridge models 104 | # - compute the scores on the validation set of each split 105 | # - average the scores over splits 106 | # - take the maximum over alphas 107 | # - (only if you ask for the ridge weights) refit using the best alphas per 108 | # target and the entire dataset 109 | # - return for each target the log kernel weights leading to the best CV score 110 | # (and the best weights if necessary) 111 | results = solve_multiple_kernel_ridge_random_search( 112 | Ks=Ks_train, 113 | Y=Y_train, 114 | n_iter=n_iter, 115 | alphas=alphas, 116 | n_targets_batch=n_targets_batch, 117 | return_weights=return_weights, 118 | n_alphas_batch=n_alphas_batch, 119 | n_targets_batch_refit=n_targets_batch_refit, 120 | jitter_alphas=True, 121 | ) 122 | 123 | ############################################################################### 124 | # As we used the ``cupy`` backend, the results are ``cupy`` arrays, which are 125 | # on GPU. Here, we cast the results back to CPU, and to ``numpy`` arrays. 126 | deltas = backend.to_numpy(results[0]) 127 | dual_weights = backend.to_numpy(results[1]) 128 | cv_scores = backend.to_numpy(results[2]) 129 | 130 | ############################################################################### 131 | # Plot the convergence curve 132 | # -------------------------- 133 | # 134 | # ``cv_scores`` gives the scores for each sampled kernel weights. 135 | # The convergence curve is thus the current maximum for each target. 136 | 137 | current_max = np.maximum.accumulate(cv_scores, axis=0) 138 | mean_current_max = np.mean(current_max, axis=1) 139 | x_array = np.arange(1, len(mean_current_max) + 1) 140 | plt.plot(x_array, mean_current_max, '-o') 141 | plt.grid("on") 142 | plt.xlabel("Number of kernel weights sampled") 143 | plt.ylabel("L2 negative loss (higher is better)") 144 | plt.title("Convergence curve, averaged over targets") 145 | plt.tight_layout() 146 | plt.show() 147 | 148 | ############################################################################### 149 | # Plot the optimal alphas selected by the solver 150 | # ---------------------------------------------- 151 | # 152 | # This plot is helpful to refine the alpha grid if the range is too small or 153 | # too large. 154 | 155 | best_alphas = 1. / np.sum(np.exp(deltas), axis=0) 156 | plot_alphas_diagnostic(best_alphas, alphas) 157 | plt.title("Best alphas selected by cross-validation") 158 | plt.show() 159 | 160 | ############################################################################### 161 | # Compute the predictions on the test set 162 | # --------------------------------------- 163 | # (requires the dual weights) 164 | 165 | split = False 166 | scores = predict_and_score_weighted_kernel_ridge( 167 | Ks_test, dual_weights, deltas, Y_test, split=split, 168 | n_targets_batch=n_targets_batch, score_func=r2_score_split) 169 | scores = backend.to_numpy(scores) 170 | 171 | plt.hist(scores, np.linspace(0, 1, 50)) 172 | plt.xlabel(r"$R^2$ generalization score") 173 | plt.title("Histogram over targets") 174 | plt.show() 175 | 176 | ############################################################################### 177 | # Compute the split predictions on the test set 178 | # --------------------------------------------- 179 | # (requires the dual weights) 180 | # 181 | # Here we apply the dual weights on each kernel separately 182 | # (``exp(deltas[i]) * kernel[i]``), and we compute the R\ :sup:`2` scores 183 | # (corrected for correlations) of each prediction. 184 | 185 | split = True 186 | scores_split = predict_and_score_weighted_kernel_ridge( 187 | Ks_test, dual_weights, deltas, Y_test, split=split, 188 | n_targets_batch=n_targets_batch, score_func=r2_score_split) 189 | scores_split = backend.to_numpy(scores_split) 190 | 191 | for kk, score in enumerate(scores_split): 192 | plt.hist(score, np.linspace(0, np.max(scores_split), 50), alpha=0.7, 193 | label="kernel %d" % kk) 194 | plt.title(r"Histogram of $R^2$ generalization score split between kernels") 195 | plt.legend() 196 | plt.show() 197 | -------------------------------------------------------------------------------- /himalaya/kernel_ridge/tests/test_random_search_kernel.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | import sklearn.linear_model 5 | import sklearn.model_selection 6 | import scipy.linalg 7 | 8 | from himalaya.backend import set_backend 9 | from himalaya.backend import ALL_BACKENDS 10 | from himalaya.utils import assert_array_almost_equal 11 | from himalaya.scoring import r2_score 12 | 13 | from himalaya.kernel_ridge import solve_multiple_kernel_ridge_random_search 14 | 15 | 16 | def _create_dataset(backend, n_targets=4): 17 | n_featuress = (100, 200) 18 | n_samples = 80 19 | n_gammas = 3 20 | 21 | Xs = [ 22 | backend.asarray(backend.randn(n_samples, n_features), backend.float64) 23 | for n_features in n_featuress 24 | ] 25 | Ks = backend.stack([X @ X.T for X in Xs]) 26 | 27 | ws = [ 28 | backend.asarray(backend.randn(n_features, n_targets), backend.float64) 29 | for n_features in n_featuress 30 | ] 31 | Ys = backend.stack([X @ w for X, w in zip(Xs, ws)]) 32 | Y = Ys.sum(0) 33 | 34 | gammas = backend.asarray(backend.rand(n_gammas, Ks.shape[0]), 35 | backend.float64) 36 | gammas /= gammas.sum(1)[:, None] 37 | 38 | return Ks, Y, gammas, Xs 39 | 40 | 41 | @pytest.mark.parametrize('local_alpha', [True, False]) 42 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 43 | def test_solve_multiple_kernel_ridge_random_search_local_alpha( 44 | backend, local_alpha): 45 | _test_solve_multiple_kernel_ridge_random_search(backend=backend, 46 | local_alpha=local_alpha) 47 | 48 | 49 | @pytest.mark.parametrize('n_targets_batch', [None, 3]) 50 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 51 | def test_solve_multiple_kernel_ridge_random_search_n_targets_batch( 52 | backend, n_targets_batch): 53 | _test_solve_multiple_kernel_ridge_random_search( 54 | backend=backend, n_targets_batch=n_targets_batch) 55 | 56 | 57 | @pytest.mark.parametrize('n_alphas_batch', [None, 2]) 58 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 59 | def test_solve_multiple_kernel_ridge_random_search_n_alphas_batch( 60 | backend, n_alphas_batch): 61 | _test_solve_multiple_kernel_ridge_random_search( 62 | backend=backend, n_alphas_batch=n_alphas_batch) 63 | 64 | 65 | @pytest.mark.parametrize('return_weights', ['primal', 'dual']) 66 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 67 | def test_solve_multiple_kernel_ridge_random_search_return_weights( 68 | backend, return_weights): 69 | _test_solve_multiple_kernel_ridge_random_search( 70 | backend=backend, return_weights=return_weights) 71 | 72 | 73 | @pytest.mark.parametrize('diagonalize_method', ['eigh', 'svd']) 74 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 75 | def test_solve_multiple_kernel_ridge_random_search_diagonalize_method( 76 | backend, diagonalize_method): 77 | _test_solve_multiple_kernel_ridge_random_search( 78 | backend=backend, diagonalize_method=diagonalize_method) 79 | 80 | 81 | def _test_solve_multiple_kernel_ridge_random_search( 82 | backend, n_targets_batch=None, n_alphas_batch=None, 83 | return_weights="dual", diagonalize_method="eigh", local_alpha=True): 84 | backend = set_backend(backend) 85 | 86 | Ks, Y, gammas, Xs = _create_dataset(backend) 87 | alphas = backend.asarray_like(backend.logspace(-3, 5, 9), Ks) 88 | n_targets = Y.shape[1] 89 | cv = sklearn.model_selection.check_cv(10) 90 | 91 | ############ 92 | # run solver 93 | results = solve_multiple_kernel_ridge_random_search( 94 | Ks, Y, n_iter=gammas, alphas=alphas, score_func=r2_score, cv=cv, 95 | n_targets_batch=n_targets_batch, Xs=Xs, progress_bar=False, 96 | return_weights=return_weights, n_alphas_batch=n_alphas_batch, 97 | diagonalize_method=diagonalize_method, local_alpha=local_alpha) 98 | best_deltas, refit_weights, cv_scores = results 99 | 100 | ######################################### 101 | # compare with sklearn.linear_model.Ridge 102 | if local_alpha: # only compare when each target optimizes alpha 103 | test_scores = [] 104 | for gamma in backend.sqrt(gammas): 105 | X = backend.concatenate([x * g for x, g in zip(Xs, gamma)], 1) 106 | for train, test in cv.split(X): 107 | for alpha in alphas: 108 | model = sklearn.linear_model.Ridge( 109 | alpha=backend.to_numpy(alpha), fit_intercept=False) 110 | model = model.fit(backend.to_numpy(X[train]), 111 | backend.to_numpy(Y[train])) 112 | predictions = backend.asarray_like( 113 | model.predict(backend.to_numpy(X[test])), Y) 114 | test_scores.append(r2_score(Y[test], predictions)) 115 | 116 | test_scores = backend.stack(test_scores) 117 | test_scores = test_scores.reshape(len(gammas), cv.get_n_splits(), 118 | len(alphas), n_targets) 119 | test_scores_mean = backend.max(test_scores.mean(1), 1) 120 | assert_array_almost_equal(cv_scores, test_scores_mean, decimal=5) 121 | 122 | ###################### 123 | # test refited_weights 124 | for tt in range(n_targets): 125 | gamma = backend.exp(best_deltas[:, tt]) 126 | alpha = 1.0 127 | 128 | if return_weights == 'primal': 129 | # compare primal weights with sklearn.linear_model.Ridge 130 | X = backend.concatenate( 131 | [X * backend.sqrt(g) for X, g in zip(Xs, gamma)], 1) 132 | model = sklearn.linear_model.Ridge(fit_intercept=False, 133 | alpha=backend.to_numpy(alpha)) 134 | w1 = model.fit(backend.to_numpy(X), 135 | backend.to_numpy(Y[:, tt])).coef_ 136 | w1 = np.split(w1, np.cumsum([X.shape[1] for X in Xs][:-1]), axis=0) 137 | w1 = [backend.asarray(w) for w in w1] 138 | w1_scaled = backend.concatenate( 139 | [w * backend.sqrt(g) for w, g, in zip(w1, gamma)]) 140 | assert_array_almost_equal(w1_scaled, refit_weights[:, tt], 141 | decimal=5) 142 | 143 | elif return_weights == 'dual': 144 | # compare dual weights with scipy.linalg.solve 145 | Ks_64 = backend.asarray(Ks, dtype=backend.float64) 146 | gamma_64 = backend.asarray(gamma, dtype=backend.float64) 147 | K = backend.matmul(Ks_64.T, gamma_64).T 148 | reg = backend.asarray_like(np.eye(K.shape[0]), K) * alpha 149 | Y_64 = backend.asarray(Y, dtype=backend.float64) 150 | c1 = scipy.linalg.solve(backend.to_numpy(K + reg), 151 | backend.to_numpy(Y_64[:, tt])) 152 | c1 = backend.asarray_like(c1, K) 153 | assert_array_almost_equal(c1, refit_weights[:, tt], decimal=5) 154 | 155 | 156 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 157 | def test_solve_multiple_kernel_ridge_random_search_single_alpha_numpy(backend): 158 | backend = set_backend(backend) 159 | # just a smoke test, so make it minimal 160 | Ks, Y, gammas, Xs = _create_dataset(backend) 161 | alphas = 1.0 162 | # make Y a numpy array 163 | Y = backend.to_numpy(Y) 164 | _ = solve_multiple_kernel_ridge_random_search( 165 | Ks, Y, n_iter=gammas, alphas=alphas 166 | ) 167 | 168 | 169 | @pytest.mark.parametrize('backend', ALL_BACKENDS) 170 | @pytest.mark.parametrize('n_kernels', [1, 2]) 171 | def test_solve_multiple_kernel_ridge_random_search_global_alpha( 172 | backend, n_kernels): 173 | backend = set_backend(backend) 174 | # add more targets to make sure we get some variability 175 | Ks, Y, gammas, Xs = _create_dataset(backend, n_targets=20) 176 | alphas = backend.asarray_like(backend.logspace(-3, 5, 9), Ks) 177 | cv = sklearn.model_selection.check_cv(5) 178 | 179 | deltas, *_, best_alphas = solve_multiple_kernel_ridge_random_search( 180 | Ks[:n_kernels], 181 | Y, 182 | n_iter=50, 183 | progress_bar=False, 184 | alphas=alphas, 185 | cv=cv, 186 | local_alpha=False, 187 | return_alphas=True 188 | ) 189 | # test that we return a single combination of deltas 190 | deltas = backend.to_numpy(deltas) 191 | if deltas.ndim == 1: 192 | assert np.allclose(deltas[0], deltas) 193 | else: 194 | for dd in deltas: 195 | assert np.allclose(dd[0], dd) 196 | 197 | # test that we return a single alpha 198 | best_alphas = backend.to_numpy(best_alphas) 199 | assert np.allclose(best_alphas[0], best_alphas) 200 | --------------------------------------------------------------------------------