├── .gitattributes
├── MANIFEST.in
├── examples
    ├── README.rst
    ├── kernel_ridge
    │   ├── README.rst
    │   ├── plot_kernel_ridge.py
    │   ├── plot_kernel_ridge_cv.py
    │   └── plot_model_on_gpu.py.py
    └── multiple_kernel_ridge
    │   ├── README.rst
    │   ├── plot_mkr_3_path.py
    │   ├── plot_mkr_5_refine_results.py
    │   ├── plot_mkr_4_refit_from_deltas.py
    │   ├── plot_mkr_1_sklearn_api.py
    │   └── plot_mkr_0_random_search.py
├── setup.cfg
├── doc
    ├── getting_started.rst
    ├── README.md
    ├── index.rst
    ├── static
    │   ├── custom.css
    │   ├── logo.py
    │   └── logo.svg
    ├── Makefile
    ├── flowchart.rst
    ├── troubleshooting.rst
    ├── api.rst
    ├── changelog.rst
    ├── conf.py
    └── models.rst
├── .codespellrc
├── .gitignore
├── himalaya
    ├── __init__.py
    ├── lasso
    │   ├── __init__.py
    │   ├── tests
    │   │   ├── test_sklearn_api_lasso.py
    │   │   └── test_group_lasso.py
    │   └── _sklearn_api.py
    ├── backend
    │   ├── __init__.py
    │   ├── tests
    │   │   ├── test_backend_utils.py
    │   │   └── test_backends.py
    │   ├── torch_cuda.py
    │   ├── _utils.py
    │   ├── numpy.py
    │   └── cupy.py
    ├── tests
    │   ├── test_viz.py
    │   ├── test_progress_bar.py
    │   ├── test_utils.py
    │   └── test_validation.py
    ├── ridge
    │   ├── __init__.py
    │   ├── tests
    │   │   ├── test_column.py
    │   │   ├── test_random_search_ridge.py
    │   │   └── test_solvers_ridge.py
    │   └── _solvers.py
    ├── viz.py
    ├── kernel_ridge
    │   ├── tests
    │   │   ├── test_predictions.py
    │   │   ├── test_input_arrays.py
    │   │   ├── test_force_cpu.py
    │   │   └── test_random_search_kernel.py
    │   ├── __init__.py
    │   └── _predictions.py
    ├── progress_bar.py
    └── utils.py
├── .github
    ├── workflows
    │   ├── codespell.yml
    │   ├── build_docs.yml
    │   ├── deploy_pypi.yml
    │   └── run_tests.yml
    └── dependabot.yml
├── .codecov.yml
├── LICENSE.md
├── setup.py
└── README.rst


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.svg -diff
2 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.rst
2 | include LICENSE.md
3 | 


--------------------------------------------------------------------------------
/examples/README.rst:
--------------------------------------------------------------------------------
1 | Gallery of examples
2 | ===================
3 | 


--------------------------------------------------------------------------------
/examples/kernel_ridge/README.rst:
--------------------------------------------------------------------------------
1 | Kernel ridge
2 | ------------
3 | 


--------------------------------------------------------------------------------
/examples/multiple_kernel_ridge/README.rst:
--------------------------------------------------------------------------------
1 | Multiple-kernel ridge
2 | ---------------------
3 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description_file = README.rst
3 | license_files = LICENSE.md
4 | 


--------------------------------------------------------------------------------
/doc/getting_started.rst:
--------------------------------------------------------------------------------
1 | ===============
2 | Getting started
3 | ===============
4 | 
5 | .. include:: ../README.rst
6 | 


--------------------------------------------------------------------------------
/.codespellrc:
--------------------------------------------------------------------------------
1 | [codespell]
2 | skip = .git,*.pdf,*.svg,*.css,.codespellrc
3 | check-hidden = true
4 | # ignore-regex = 
5 | ignore-words-list = fro
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *.egg-info/
 3 | *.so
 4 | 
 5 | .vscode
 6 | .pytest_cache
 7 | __pycache__
 8 | .coverage
 9 | .idea
10 | htmlcov/
11 | build/
12 | dist/
13 | 
14 | # Documentation build
15 | doc/_build/
16 | doc/_auto_examples/
17 | doc/_generated/
18 | 


--------------------------------------------------------------------------------
/himalaya/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import backend, kernel_ridge, lasso, progress_bar, ridge, scoring, utils, viz
 2 | 
 3 | __version__ = '0.4.8'
 4 | 
 5 | __all__ = [
 6 |     backend,
 7 |     kernel_ridge,
 8 |     lasso,
 9 |     ridge,
10 |     progress_bar,
11 |     scoring,
12 |     utils,
13 |     viz,
14 | ]
15 | 


--------------------------------------------------------------------------------
/himalaya/lasso/__init__.py:
--------------------------------------------------------------------------------
 1 | from ._group_lasso import solve_sparse_group_lasso
 2 | from ._group_lasso import solve_sparse_group_lasso_cv
 3 | from ._sklearn_api import SparseGroupLassoCV
 4 | 
 5 | __all__ = [
 6 |     "solve_sparse_group_lasso",
 7 |     "solve_sparse_group_lasso_cv",
 8 |     "SparseGroupLassoCV",
 9 | ]
10 | 


--------------------------------------------------------------------------------
/himalaya/backend/__init__.py:
--------------------------------------------------------------------------------
 1 | from ._utils import ALL_BACKENDS
 2 | from ._utils import CURRENT_BACKEND
 3 | from ._utils import set_backend
 4 | from ._utils import get_backend
 5 | from ._utils import force_cpu_backend
 6 | 
 7 | __all__ = [
 8 |     "ALL_BACKENDS",
 9 |     "CURRENT_BACKEND",
10 |     "set_backend",
11 |     "get_backend",
12 |     "force_cpu_backend",
13 | ]
14 | 


--------------------------------------------------------------------------------
/doc/README.md:
--------------------------------------------------------------------------------
 1 | # Himalaya website
 2 | 
 3 | ## Requirements
 4 | 
 5 | ```
 6 | numpydoc
 7 | sphinx
 8 | sphinx_gallery
 9 | sphinxcontrib-mermaid
10 | ```
11 | 
12 | ## Build the website
13 | 
14 | ```bash
15 | make html
16 | # ignore "WARNING: autosummary: stub file not found ..."
17 | firefox _build/html/index.html
18 | ```
19 | 
20 | ## Push the website
21 | 
22 | ```bash
23 | make push-pages
24 | ```
25 | 


--------------------------------------------------------------------------------
/himalaya/tests/test_viz.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from himalaya.viz import plot_alphas_diagnostic
 4 | 
 5 | 
 6 | def test_smoke_viz():
 7 |     alphas = np.logspace(0, 5, 6)
 8 |     best_alphas = np.random.choice(np.logspace(0, 5, 6), 10)
 9 |     plot_alphas_diagnostic(best_alphas, alphas, ax=None)
10 |     plot_alphas_diagnostic(best_alphas, alphas, ax=plt.gca())
11 | 


--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
 1 | Himalaya
 2 | ========
 3 | 
 4 | Welcome to ``himalaya``'s documentation website.
 5 | 
 6 | Documentation
 7 | -------------
 8 | .. toctree::
 9 |    :maxdepth: 1
10 | 
11 |    getting_started
12 |    models
13 |    flowchart
14 |    troubleshooting
15 |    _auto_examples/index
16 | 
17 | 
18 | Package details
19 | ---------------
20 | .. toctree::
21 |    :maxdepth: 1
22 | 
23 |    api
24 |    changelog
25 | 


--------------------------------------------------------------------------------
/.github/workflows/codespell.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Codespell
 3 | 
 4 | on:
 5 |   push:
 6 |     branches: [main]
 7 |   pull_request:
 8 |     branches: [main]
 9 | 
10 | permissions:
11 |   contents: read
12 | 
13 | jobs:
14 |   codespell:
15 |     name: Check for spelling errors
16 |     runs-on: ubuntu-latest
17 | 
18 |     steps:
19 |       - name: Checkout
20 |         uses: actions/checkout@v6
21 |       - name: Codespell
22 |         uses: codespell-project/actions-codespell@v2
23 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "github-actions" # See documentation for possible values
 9 |     directory: "/" # Location of package manifests
10 |     schedule:
11 |       interval: "weekly"
12 | 


--------------------------------------------------------------------------------
/himalaya/tests/test_progress_bar.py:
--------------------------------------------------------------------------------
 1 | from himalaya.progress_bar import bar
 2 | from himalaya.progress_bar import ProgressBar
 3 | 
 4 | 
 5 | def test_progress_bar():
 6 |     # simple smoke test
 7 |     for ii in bar(range(10), title="La barre"):
 8 |         pass
 9 | 
10 |     bar_ = ProgressBar(title="La barre", max_value=10, initial_value=0,
11 |                        max_chars=40, progress_character='.', spinner=False,
12 |                        verbose_bool=True)
13 |     for ii in bar_(range(10)):
14 |         pass
15 | 
16 |     bar_ = ProgressBar(max_value=10, title="La barre")
17 |     for ii in range(10):
18 |         bar_.update_with_increment_value(1)
19 |     bar_.close()
20 | 


--------------------------------------------------------------------------------
/doc/static/custom.css:
--------------------------------------------------------------------------------
 1 | .sphx-glr-thumbcontainer {
 2 |     min-height: 230px !important;       /*default = 230 */
 3 |     margin: 5px !important;             /*default = 0 ? */
 4 | }
 5 | .sphx-glr-thumbcontainer .figure {
 6 |     width: 210px !important;            /*default = 160 */
 7 | }
 8 | .sphx-glr-thumbcontainer img {
 9 |     max-height: 112px !important;       /*default = 112 */
10 |     max-width: 210px !important;        /*default = 160 */
11 | }
12 | .sphx-glr-thumbcontainer a.internal {
13 |     padding: 150px 10px 0 !important;   /*default = 150px 10px 0 */
14 | }
15 | div.sphinxsidebar {
16 |     max-height: 100%;
17 |     overflow-y: auto;
18 | }
19 | div.sphx-glr-download a{
20 |     background-image: none;
21 |     background-color: rgb(238, 238, 238);
22 |     border-color: rgb(204, 204, 204);
23 | }
24 | 


--------------------------------------------------------------------------------
/himalaya/ridge/__init__.py:
--------------------------------------------------------------------------------
 1 | from ._column import ColumnTransformerNoStack
 2 | from ._column import make_column_transformer_no_stack
 3 | from ._random_search import solve_group_ridge_random_search
 4 | from ._random_search import solve_ridge_cv_svd
 5 | from ._random_search import GROUP_RIDGE_SOLVERS
 6 | from ._solvers import solve_ridge_svd
 7 | from ._solvers import RIDGE_SOLVERS
 8 | from ._sklearn_api import Ridge
 9 | from ._sklearn_api import RidgeCV
10 | from ._sklearn_api import GroupRidgeCV
11 | 
12 | # alternative names
13 | BandedRidgeCV = GroupRidgeCV
14 | solve_banded_ridge_random_search = solve_group_ridge_random_search
15 | BANDED_RIDGE_SOLVERS = GROUP_RIDGE_SOLVERS
16 | 
17 | __all__ = [
18 |     # column transformers
19 |     "ColumnTransformerNoStack",
20 |     "make_column_transformer_no_stack",
21 |     # group ridge solvers
22 |     "solve_group_ridge_random_search",
23 |     "GROUP_RIDGE_SOLVERS",
24 |     # ridge solvers
25 |     "solve_ridge_svd",
26 |     "solve_ridge_cv_svd",
27 |     "RIDGE_SOLVERS",
28 |     # sklearn API
29 |     "Ridge",
30 |     "RidgeCV",
31 |     "GroupRidgeCV",
32 | ]
33 | 


--------------------------------------------------------------------------------
/.github/workflows/build_docs.yml:
--------------------------------------------------------------------------------
 1 | name: Build docs
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 | 
 9 | jobs:
10 |   build-docs:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |     - uses: actions/checkout@v6
14 | 
15 |     - name: Set up Python
16 |       uses: actions/setup-python@v6
17 |       with:
18 |         python-version: 3.9
19 | 
20 |     - uses: actions/cache@v5
21 |       with:
22 |         path: ~/.cache/pip
23 |         key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }}
24 |         restore-keys: |
25 |           ${{ runner.os }}-pip-
26 | 
27 |     - name: Install dependencies
28 |       run: |
29 |         pip install -e ."[github]"
30 |         pip install numpydoc sphinx sphinx_gallery sphinxcontrib-mermaid
31 | 
32 |     - name: Build documents
33 |       run: |
34 |         cd doc && make html && cd ..
35 |         touch doc/_build/html/.nojekyll
36 | 
37 |     - name: Publish to gh-pages if tagged
38 |       if: startsWith(github.ref, 'refs/tags')
39 |       uses: JamesIves/github-pages-deploy-action@v4.7.6
40 |       with:
41 |         branch: gh-pages
42 |         folder: doc/_build/html
43 | 


--------------------------------------------------------------------------------
/himalaya/viz.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def plot_alphas_diagnostic(best_alphas, alphas, ax=None):
 5 |     """Plot a diagnostic plot for the selected alphas during cross-validation.
 6 | 
 7 |     To figure out whether to increase the range of alphas.
 8 | 
 9 |     Parameters
10 |     ----------
11 |     best_alphas : array of shape (n_targets, )
12 |         Alphas selected during cross-validation for each target.
13 |     alphas : array of shape (n_alphas)
14 |         Alphas used while fitting the model.
15 |     ax : None or figure axis
16 | 
17 |     Returns
18 |     -------
19 |     ax : figure axis
20 |     """
21 |     import matplotlib.pyplot as plt
22 |     alphas = np.sort(alphas)
23 |     n_alphas = len(alphas)
24 |     indices = np.argmin(np.abs(best_alphas[None] - alphas[:, None]), 0)
25 |     hist = np.bincount(indices, minlength=n_alphas)
26 | 
27 |     if ax is None:
28 |         fig, ax = plt.subplots(1, 1)
29 | 
30 |     log10alphas = np.log(alphas) / np.log(10)
31 |     ax.plot(log10alphas, hist, '.-', markersize=12)
32 |     ax.set_ylabel('Number of targets')
33 |     ax.set_xlabel('log10(alpha)')
34 |     ax.grid("on")
35 |     return ax
36 | 


--------------------------------------------------------------------------------
/.codecov.yml:
--------------------------------------------------------------------------------
 1 | # For more configuration details:
 2 | # https://docs.codecov.io/docs/codecov-yaml
 3 | 
 4 | # Check if this file is valid by running in bash:
 5 | # curl -X POST --data-binary @.codecov.yml https://codecov.io/validate
 6 | 
 7 | # Coverage configuration
 8 | # ----------------------
 9 | coverage:
10 |   status:
11 |     project:
12 |       default:
13 |         threshold: 1%  # complain if change in codecoverage is greater than 1%
14 |     patch: false
15 |   range: 70..90     # First number represents red, and second represents green
16 |   # (default is 70..100)
17 |   round: down       # up, down, or nearest
18 |   precision: 2      # Number of decimal places, between 0 and 5
19 | 
20 | 
21 | # Ignoring Paths
22 | # --------------
23 | # which folders/files to ignore
24 | ignore:
25 |  - setup.py
26 |  # GPU not available on github-actions
27 |  - himalaya/backend/cupy.py
28 |  - himalaya/backend/torch_cuda.py
29 | 
30 | 
31 | # Pull request comments:
32 | # ----------------------
33 | # Diff is the Coverage Diff of the pull request.
34 | # Files are the files impacted by the pull request
35 | comment: false
36 | #  layout: diff, files  # accepted in any order: reach, diff, flags, and/or files


--------------------------------------------------------------------------------
/doc/static/logo.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.decomposition import PCA
 3 | 
 4 | import matplotlib.pyplot as plt
 5 | from matplotlib.patches import Polygon
 6 | 
 7 | from himalaya.kernel_ridge import generate_dirichlet_samples
 8 | kernel_weights = generate_dirichlet_samples(10000, n_kernels=3,
 9 |                                             concentration=[1.], random_state=0)
10 | pca = PCA(2).fit(kernel_weights)
11 | 
12 | darkgreen = "#446455"
13 | white = "white"
14 | 
15 | 
16 | def plot_simplex(bias=(0, 0), ax=None):
17 |     corners = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
18 |     corners = pca.transform(corners).T
19 | 
20 |     if ax is None:
21 |         plt.figure(figsize=(2, 2))
22 |         ax = plt.gca()
23 | 
24 |     # Faces
25 |     ax.add_patch(
26 |         Polygon(corners[:2].T + bias, closed=True, edgecolor=None, fill=True,
27 |                 facecolor=white, alpha=0.6))
28 |     # Edges
29 |     ax.add_patch(
30 |         Polygon(corners[:2].T + bias, closed=True, edgecolor=darkgreen,
31 |                 fill=False, alpha=1, linewidth=2))
32 | 
33 |     ax.axis('equal')
34 |     ax.axis('off')
35 |     return ax
36 | 
37 | 
38 | fig, ax = plt.subplots(figsize=(2, 2))
39 | 
40 | bias = [0.4, 0.04]
41 | for factor in np.linspace(2, 0, 3):
42 |     bias_ = np.array(bias) * factor
43 |     plot_simplex(bias_, ax=ax)
44 | 
45 | ax.text(-0.4, -1, "himalaya", fontsize=20, color=darkgreen)
46 | fig.savefig("logo.svg", bbox_inches='tight', pad_inches=0)
47 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = Tutorials
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 
22 | clean:
23 | 	rm -rf $(BUILDDIR)/*
24 | 	rm -rf _auto_examples/
25 | 	rm -rf _generated/
26 | 
27 | html-noplot:
28 | 	$(SPHINXBUILD) -D plot_gallery=0 -b html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
29 | 	@echo
30 | 	@echo "Build finished (noplot). The HTML pages are in $(BUILDDIR)/html."
31 | 
32 | # -b gh_pages --single-branch (to clone only one branch)
33 | # --no-checkout (just fetches the root folder without content)
34 | # --depth 1 (since we don't need the history prior to the last commit)
35 | push-pages:
36 | 	rm -rf _build/gh_pages
37 | 	git clone -b gh-pages --single-branch --no-checkout --depth 1 \
38 | 		https://github.com/gallantlab/himalaya _build/gh_pages
39 | 
40 | 	cd _build/ && \
41 | 		cp -r html/* gh_pages && \
42 | 		cd gh_pages && \
43 | 		touch .nojekyll && \
44 | 		git add * && \
45 | 		git add .nojekyll && \
46 | 		git commit -a -m 'Make push-pages' && \
47 | 		git push
48 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2020, the himalaya developers
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy_pypi.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy to PyPI
 2 | # Deploy to PyPI if the __version__ variable in himalaya/__init__.py
 3 | # is larger than the latest version on PyPI.
 4 | 
 5 | on:
 6 |   push:
 7 |     branches:    
 8 |       - main
 9 |     paths:
10 |       # trigger workflow only on commits that change __init__.py
11 |       - 'himalaya/__init__.py'
12 | 
13 | jobs:
14 |   deploy-pypi:
15 |     runs-on: ubuntu-latest
16 |     steps:
17 |     - uses: actions/checkout@v6
18 |     - uses: actions/setup-python@v6
19 | 
20 |     - name: Get versions
21 |       # Compare the latest version on PyPI, and the current version
22 |       run: |
23 |         python -m pip install --upgrade -q pip
24 |         pip index versions himalaya
25 |         LATEST=$(pip index versions himalaya | grep 'himalaya' |awk '{print $2}' | tr -d '(' | tr -d ')')
26 |         CURRENT=$(cat himalaya/__init__.py | grep "__version__" | awk '{print $3}' | tr -d "'" | tr -d '"')
27 |         EQUAL=$([ "$CURRENT" = "$LATEST" ] && echo 1 || echo 0)
28 |         echo "LATEST=$LATEST" >> $GITHUB_ENV
29 |         echo "CURRENT=$CURRENT" >> $GITHUB_ENV
30 |         echo "EQUAL=$EQUAL" >> $GITHUB_ENV
31 |     
32 |     - name: Print versions
33 |       run: |
34 |         echo ${{ env.LATEST }}
35 |         echo ${{ env.CURRENT }}
36 |         echo ${{ env.EQUAL }}
37 | 
38 |     - name: Build and publish
39 |       if: ${{ env.EQUAL == 0 }}
40 |       env:
41 |         TWINE_USERNAME: __token__
42 |         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
43 |       run: |
44 |         python -m pip install --upgrade pip
45 |         python -m pip install setuptools wheel "twine<6.0"
46 |         python setup.py sdist bdist_wheel
47 |         python -m twine upload dist/*
48 | 


--------------------------------------------------------------------------------
/.github/workflows/run_tests.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 | 
 9 | jobs:
10 |   run-tests:
11 |     strategy:
12 |       matrix:
13 |         os: [ubuntu-latest, macos-latest]
14 |         python-version: [3.8, 3.9, "3.10", "3.11", "3.12"]
15 |       max-parallel: 5
16 |       fail-fast: false
17 |     runs-on: ${{ matrix.os }}
18 | 
19 |     steps:
20 |     - uses: actions/checkout@v6
21 |     - name: Set up Python
22 |       uses: actions/setup-python@v6
23 |       with:
24 |         python-version: ${{ matrix.python-version }}
25 | 
26 |     - uses: actions/cache@v5
27 |       with:
28 |         path: ~/.cache/pip
29 |         key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }}
30 |         restore-keys: |
31 |           ${{ runner.os }}-pip-
32 | 
33 |     - name: Install dependencies
34 |       run: |
35 |         pip install -e ."[github]"
36 | 
37 |     - name: Lint with flake8
38 |       run: |
39 |         pip install -q flake8
40 |         # stop the build if there are Python syntax errors or undefined names
41 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
42 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
43 |         flake8 . --count --exit-zero --ignore=E402,C901 --max-line-length=127 --statistics
44 | 
45 |     - name: Test with pytest
46 |       run: |
47 |         pip install -q pytest pytest-cov
48 |         pytest --cov=./  --reruns 2
49 | 
50 |     - name: Upload coverage to Codecov
51 |       uses: codecov/codecov-action@v5
52 |       with:
53 |         env_vars: OS,PYTHON
54 |         fail_ci_if_error: true
55 |         token: ${{ secrets.CODECOV_TOKEN }}
56 |         verbose: false
57 | 


--------------------------------------------------------------------------------
/examples/kernel_ridge/plot_kernel_ridge.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Kernel ridge
 3 | ============
 4 | 
 5 | This example demonstrates how to solve kernel ridge regression, using
 6 | himalaya's estimator ``KernelRidge`` compatible with scikit-learn's API.
 7 | """
 8 | 
 9 | ###############################################################################
10 | # Create a random dataset
11 | # -----------------------
12 | import numpy as np
13 | n_samples, n_features, n_targets = 10, 20, 4
14 | X = np.random.randn(n_samples, n_features)
15 | Y = np.random.randn(n_samples, n_targets)
16 | 
17 | ###############################################################################
18 | # Scikit-learn API
19 | # ----------------
20 | # Himalaya implements a ``KernelRidge`` estimator, similar to the corresponding
21 | # scikit-learn estimator, with similar parameters and methods.
22 | import sklearn.kernel_ridge
23 | import himalaya.kernel_ridge
24 | 
25 | # Fit a scikit-learn model
26 | model_skl = sklearn.kernel_ridge.KernelRidge(kernel="linear", alpha=0.1)
27 | model_skl.fit(X, Y)
28 | 
29 | # Fit a himalaya model
30 | model_him = himalaya.kernel_ridge.KernelRidge(kernel="linear", alpha=0.1)
31 | model_him.fit(X, Y)
32 | 
33 | Y_pred_skl = model_skl.predict(X)
34 | Y_pred_him = model_him.predict(X)
35 | 
36 | # The predictions are virtually identical.
37 | print(np.max(np.abs(Y_pred_skl - Y_pred_him)))
38 | 
39 | ###############################################################################
40 | # Small API difference
41 | # --------------------
42 | # Since himalaya focuses on fitting multiple targets, the ``score`` method
43 | # returns the score on each target separately, while scikit-learn returns the
44 | # average score over targets.
45 | 
46 | print(model_skl.score(X, Y))
47 | print(model_him.score(X, Y))
48 | print(model_him.score(X, Y).mean())
49 | 


--------------------------------------------------------------------------------
/doc/flowchart.rst:
--------------------------------------------------------------------------------
 1 | Model flowchart
 2 | ---------------
 3 | 
 4 | The following flowchart can be used as a guide to select the right estimator.
 5 | 
 6 | .. mermaid::
 7 | 
 8 |   graph TD;
 9 |     A(How many feature space ?)
10 |     O(Data size ?)
11 |     M(Data size ?)
12 |     OR(Hyperparameters ?)
13 |     OK(Hyperparameters ?)
14 |     MR(Hyperparameters ?)
15 |     MK(Hyperparameters ?)
16 | 
17 |     A-- one-->O;
18 |     A--multiple-->M;
19 |     O--more samples-->OR;
20 |     O--more features-->OK;
21 |     M--more samples-->MR;
22 |     M--more features-->MK;
23 | 
24 |     OK--known-->OKH[KernelRidge];
25 |     OK--unknown-->OKCV[KernelRidgeCV];
26 |     OR--known-->ORH[Ridge];
27 |     OR--unknown-->ORCV[RidgeCV];
28 |     MK--known-->MKH[WeightedKernelRidge];
29 |     MK--unknown-->MKCV[MultipleKernelRidgeCV];
30 | 
31 |     MR--unknown-->MRCV[BandedRidgeCV];
32 |     MR--known-->MKH;
33 | 
34 |     classDef fork fill:#FFDC97
35 |     class A,O,M,OR,OK,MR,MK fork;
36 | 
37 |     classDef leaf fill:#ABBBE1
38 |     class ORH,OKH,MRH,MKH leaf;
39 |     class ORCV,OKCV,MRCV,MKCV leaf;
40 | 
41 |     click ORH "https://gallantlab.github.io/himalaya/_generated/himalaya.ridge.Ridge.html"
42 |     click ORCV "https://gallantlab.github.io/himalaya/_generated/himalaya.ridge.RidgeCV.html"
43 |     click MRCV "https://gallantlab.github.io/himalaya/_generated/himalaya.ridge.BandedRidgeCV.html"
44 |     click OKH "https://gallantlab.github.io/himalaya/_generated/himalaya.kernel_ridge.KernelRidge.html"
45 |     click OKCV "https://gallantlab.github.io/himalaya/_generated/himalaya.kernel_ridge.KernelRidgeCV.html"
46 |     click MKH "https://gallantlab.github.io/himalaya/_generated/himalaya.kernel_ridge.WeightedKernelRidge.html"
47 |     click MKCV "https://gallantlab.github.io/himalaya/_generated/himalaya.kernel_ridge.MultipleKernelRidgeCV.html"
48 | 


--------------------------------------------------------------------------------
/himalaya/lasso/tests/test_sklearn_api_lasso.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import sklearn.utils.estimator_checks
 3 | 
 4 | from himalaya.backend import set_backend
 5 | from himalaya.backend import get_backend
 6 | from himalaya.backend import ALL_BACKENDS
 7 | 
 8 | from himalaya.lasso import SparseGroupLassoCV
 9 | 
10 | ###############################################################################
11 | # scikit-learn.utils.estimator_checks
12 | 
13 | 
14 | class SparseGroupLassoCV_(SparseGroupLassoCV):
15 |     """Cast predictions to numpy arrays, to be used in scikit-learn tests.
16 | 
17 |     Used for testing only.
18 |     """
19 | 
20 |     def __init__(self, groups=None, l1_regs=(0, 0.1), l21_regs=(0, 0.1),
21 |                  solver="proximal_gradient", solver_params=None, cv=2):
22 |         super().__init__(groups=groups, l1_regs=l1_regs, l21_regs=l21_regs,
23 |                          solver=solver, solver_params=solver_params, cv=cv)
24 | 
25 |     def predict(self, X):
26 |         backend = get_backend()
27 |         return backend.to_numpy(super().predict(X))
28 | 
29 |     def score(self, X, y):
30 |         from himalaya.validation import check_array
31 |         from himalaya.scoring import r2_score
32 |         backend = get_backend()
33 | 
34 |         y_pred = super().predict(X)
35 |         y_true = check_array(y, dtype=self.dtype_, ndim=self.coef_.ndim)
36 | 
37 |         if y_true.ndim == 1:
38 |             return backend.to_numpy(
39 |                 r2_score(y_true[:, None], y_pred[:, None])[0])
40 |         else:
41 |             return backend.to_numpy(r2_score(y_true, y_pred))
42 | 
43 | 
44 | @sklearn.utils.estimator_checks.parametrize_with_checks([
45 |     SparseGroupLassoCV_(),
46 | ])
47 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
48 | def test_check_estimator(estimator, check, backend):
49 |     backend = set_backend(backend)
50 |     check(estimator)
51 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from pathlib import Path
 3 | from setuptools import find_packages, setup
 4 | 
 5 | # get version from himalaya/__init__.py
 6 | __version__ = 0.0
 7 | with open('himalaya/__init__.py') as f:
 8 |     infos = f.readlines()
 9 | for line in infos:
10 |     if "__version__" in line:
11 |         match = re.search(r"__version__ = '([^']*)'", line)
12 |         __version__ = match.groups()[0]
13 | 
14 | # read the contents of the README file
15 | this_directory = Path(__file__).parent
16 | long_description = (this_directory / "README.rst").read_text()
17 | 
18 | requirements = [
19 |     "numpy",
20 |     "scikit-learn",
21 |     # "cupy",  # optional backend
22 |     # "torch",  # optional backend, 1.9+ preferred
23 |     # "matplotlib",  # for visualization only
24 |     # "pytest",  # for testing only
25 | ]
26 | 
27 | extras_require = {
28 |     "all_backends": ["cupy", "torch"],
29 |     "viz": ["matplotlib"],
30 |     "test": ["pytest", "matplotlib", "cupy", "torch"],
31 |     "github": ["pytest", "matplotlib", "torch", "pytest-rerunfailures"],
32 | }
33 | 
34 | extras_require["all"] = sum(list(extras_require.values()), [])
35 | extras_require["doc"] = ["numpydoc", "sphinx", "sphinx_gallery",
36 |                          "sphinxcontrib-mermaid"]
37 | extras_require["doc"] += extras_require["viz"] + extras_require["all_backends"]
38 | 
39 | if __name__ == "__main__":
40 |     setup(
41 |         name='himalaya',
42 |         maintainer="Tom Dupre la Tour",
43 |         maintainer_email="tomdlt@berkeley.edu",
44 |         description="Multiple-target machine learning",
45 |         license='BSD (3-clause)',
46 |         version=__version__,
47 |         packages=find_packages(),
48 |         url="https://github.com/gallantlab/himalaya",
49 |         install_requires=requirements,
50 |         extras_require=extras_require,
51 |         long_description=long_description,
52 |         long_description_content_type='text/x-rst',
53 |     )
54 | 


--------------------------------------------------------------------------------
/himalaya/backend/tests/test_backend_utils.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from himalaya.backend import set_backend
 4 | from himalaya.backend import get_backend
 5 | from himalaya.backend import ALL_BACKENDS
 6 | from himalaya.backend import force_cpu_backend
 7 | from himalaya.backend._utils import MATCHING_CPU_BACKEND
 8 | 
 9 | 
10 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
11 | def test_set_backend_correct(backend):
12 |     # test the change of backend
13 |     module = set_backend(backend)
14 |     assert module.__name__.split('.')[-1] == backend
15 | 
16 |     # test idempotence
17 |     module = set_backend(set_backend(backend))
18 |     assert module.__name__.split('.')[-1] == backend
19 | 
20 |     # test set and get
21 |     module = set_backend(get_backend())
22 |     assert module.__name__.split('.')[-1] == backend
23 | 
24 |     assert set_backend(backend)
25 | 
26 | 
27 | def test_set_backend_incorrect():
28 |     for backend in ["wrong", ["numpy"], True, None, 10]:
29 |         with pytest.raises(ValueError):
30 |             set_backend(backend)
31 |         with pytest.raises(ValueError):
32 |             set_backend(backend, on_error="raise")
33 |         with pytest.warns(Warning):
34 |             set_backend(backend, on_error="warn")
35 |         with pytest.raises(ValueError):
36 |             set_backend(backend, on_error="foo")
37 | 
38 | 
39 | class ToyEstimator():
40 |     def __init__(self, force_cpu):
41 |         self.force_cpu = force_cpu
42 | 
43 |     @force_cpu_backend
44 |     def get_backend_wrapped(self):
45 |         return get_backend()
46 | 
47 | 
48 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
49 | def test_force_cpu_backend(backend):
50 |     backend = set_backend(backend)
51 | 
52 |     est = ToyEstimator(force_cpu=True)
53 |     assert est.get_backend_wrapped().name == MATCHING_CPU_BACKEND[backend.name]
54 | 
55 |     est = ToyEstimator(force_cpu=False)
56 |     assert est.get_backend_wrapped().name == backend.name
57 | 


--------------------------------------------------------------------------------
/examples/kernel_ridge/plot_kernel_ridge_cv.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Kernel ridge with cross-validation
 3 | ==================================
 4 | 
 5 | This example demonstrates how to solve kernel ridge regression with a
 6 | cross-validation of the regularization parameter, using himalaya's estimator
 7 | ``KernelRidgeCV``.
 8 | """
 9 | 
10 | ###############################################################################
11 | # Create a random dataset
12 | # -----------------------
13 | import numpy as np
14 | np.random.seed(0)
15 | n_samples, n_features, n_targets = 10, 20, 4
16 | X = np.random.randn(n_samples, n_features)
17 | Y = np.random.randn(n_samples, n_targets)
18 | 
19 | ###############################################################################
20 | # Limit of GridSearchCV
21 | # ---------------------
22 | # In scikit-learn, one can use ``GridSearchCV`` to optimize hyperparameters
23 | # over cross-validation.
24 | 
25 | import sklearn.model_selection
26 | import sklearn.kernel_ridge
27 | 
28 | estimator = sklearn.kernel_ridge.KernelRidge(kernel="linear")
29 | gscv = sklearn.model_selection.GridSearchCV(
30 |     estimator=estimator,
31 |     param_grid=dict(alpha=np.logspace(-2, 2, 5)),
32 | )
33 | gscv.fit(X, Y)
34 | 
35 | ###############################################################################
36 | # However, since ``GridSearchCV`` optimizes the average score over all targets,
37 | # it returns a single value for alpha.
38 | gscv.best_params_
39 | 
40 | ###############################################################################
41 | # KernelRidgeCV
42 | # -------------
43 | # To optimize each target independently, himalaya implements ``KernelRidgeCV``,
44 | # which supports any cross-validation scheme compatible with scikit-learn.
45 | import himalaya.kernel_ridge
46 | 
47 | model = himalaya.kernel_ridge.KernelRidgeCV(kernel="linear",
48 |                                             alphas=np.logspace(-2, 2, 5))
49 | model.fit(X, Y)
50 | 
51 | ###############################################################################
52 | # KernelRidgeCV returns a separate best alpha per target.
53 | model.best_alphas_
54 | 


--------------------------------------------------------------------------------
/doc/troubleshooting.rst:
--------------------------------------------------------------------------------
 1 | Troubleshooting
 2 | ===============
 3 | We detail here common issues encountered with ``himalaya``, and how to fix
 4 | them.
 5 | 
 6 | .. toctree::
 7 |    :maxdepth: 2
 8 | 
 9 | 
10 | CUDA out of memory
11 | ------------------
12 | 
13 | The GPU memory is often smaller than the CPU memory, so it requires more
14 | attention to avoid running out of memory. Himalaya implements a series of
15 | options to limit the GPU memory, often at the cost of computational speed:
16 | 
17 | - Some solvers implement computations over batches, to limit the size of
18 |   intermediate arrays. See for instance ``n_targets_batch``, or
19 |   ``n_alphas_batch`` in :class:`~himalaya.kernel_ridge.KernelRidgeCV`.
20 | - Some solvers implement an option to keep the input kernels or the targets in
21 |   CPU memory. See for instance ``Y_in_cpu`` in
22 |   :class:`~himalaya.kernel_ridge.MultipleKernelRidgeCV`.
23 | - Some estimators can also be forced to use CPU, ignoring the current backend,
24 |   using the parameter ``force_cpu=True``. To limit GPU memory, some estimators
25 |   in the same pipeline can use ``force_cpu=True`` and others
26 |   ``force_cpu=False``. In particular, it is possible to precompute kernels on
27 |   CPU, using :class:`~himalaya.kernel_ridge.Kernelizer` or
28 |   :class:`~himalaya.kernel_ridge.ColumnKernelizer` with the parameter
29 |   ``force_cpu=True`` before fitting a
30 |   :class:`~himalaya.kernel_ridge.KernelRidgeCV` or a
31 |   :class:`~himalaya.kernel_ridge.MultipleKernelRidgeCV` on GPU.
32 | 
33 | A CUDA out of memory issue can also arise with ``pytorch < 1.9``, for example
34 | with :class:`~himalaya.kernel_ridge.KernelRidge`, where a solver requires
35 | ridiculously high peak memory during a broadcasting matmul operation. This
36 | `issue <https://github.com/pytorch/pytorch/pull/54616>`_ can be fixed by
37 | updating to ``pytorch = 1.9`` or newer versions.
38 | 
39 | 
40 | Slow check_array
41 | ----------------
42 | 
43 | In himalaya, the scikit-learn compatible estimators validate the input data,
44 | checking the absence of NaN or infinite values. For large datasets, this check
45 | can take significant computational time. To skip this check, simply call
46 | ``sklearn.set_config(assume_finite=True)`` before fitting your models.
47 | 


--------------------------------------------------------------------------------
/himalaya/kernel_ridge/tests/test_predictions.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from himalaya.backend import set_backend
 4 | from himalaya.backend import ALL_BACKENDS
 5 | from himalaya.utils import assert_array_almost_equal
 6 | 
 7 | from himalaya.kernel_ridge import primal_weights_weighted_kernel_ridge
 8 | from himalaya.kernel_ridge import predict_weighted_kernel_ridge
 9 | 
10 | 
11 | def _create_dataset(backend):
12 |     n_samples, n_targets = 30, 3
13 | 
14 |     Xs = [
15 |         backend.asarray(backend.randn(n_samples, n_features), backend.float64)
16 |         for n_features in [100, 200]
17 |     ]
18 |     Ks = backend.stack([backend.matmul(X, X.T) for X in Xs])
19 |     Y = backend.asarray(backend.randn(n_samples, n_targets), backend.float64)
20 |     dual_weights = backend.asarray(backend.randn(n_samples, n_targets),
21 |                                    backend.float64)
22 |     exp_deltas = backend.asarray(backend.rand(Ks.shape[0], n_targets),
23 |                                  backend.float64)
24 |     deltas = backend.log(exp_deltas)
25 | 
26 |     return Xs, Ks, Y, deltas, dual_weights
27 | 
28 | 
29 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
30 | def test_predict_weighted_kernel_ridge(backend):
31 |     backend = set_backend(backend)
32 |     Xs, Ks, _, deltas, dual_weights = _create_dataset(backend)
33 | 
34 |     primal_weights = primal_weights_weighted_kernel_ridge(
35 |         dual_weights, deltas, Xs)
36 |     predictions_primal = backend.stack(
37 |         [X @ backend.asarray(w) for X, w in zip(Xs, primal_weights)]).sum(0)
38 | 
39 |     predictions_dual = predict_weighted_kernel_ridge(Ks, dual_weights, deltas)
40 | 
41 |     assert_array_almost_equal(predictions_primal, predictions_dual)
42 | 
43 | 
44 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
45 | def test_predict_weighted_kernel_ridge_n_targets_batch(backend):
46 |     backend = set_backend(backend)
47 |     Xs, Ks, _, deltas, dual_weights = _create_dataset(backend)
48 | 
49 |     predictions_dual = predict_weighted_kernel_ridge(Ks, dual_weights, deltas)
50 | 
51 |     predictions_dual_n_targets_batch = predict_weighted_kernel_ridge(
52 |             Ks, dual_weights, deltas, n_targets_batch=10)
53 | 
54 |     assert_array_almost_equal(predictions_dual, predictions_dual_n_targets_batch)
55 | 


--------------------------------------------------------------------------------
/examples/kernel_ridge/plot_model_on_gpu.py.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Fitting a model on GPU
 3 | ======================
 4 | 
 5 | This example demonstrates how to fit a model using GPU computations.
 6 | 
 7 | Himalaya implements different computational backends to fit the models:
 8 | 
 9 | - "numpy" (CPU) (default)
10 | - "torch" (CPU)
11 | - "torch_cuda" (GPU)
12 | - "cupy" (GPU)
13 | 
14 | Each backend is only available if you installed the corresponding package with
15 | CUDA enabled. Check the ``pytorch``/``cupy`` documentation for installation
16 | instructions.
17 | """
18 | 
19 | ###############################################################################
20 | # Create a random dataset
21 | # -----------------------
22 | import numpy as np
23 | n_samples, n_features, n_targets = 10, 20, 4
24 | X = np.random.randn(n_samples, n_features)
25 | Y = np.random.randn(n_samples, n_targets)
26 | 
27 | ###############################################################################
28 | # Change backend
29 | # --------------
30 | # To change the backend, you need to call the function
31 | # ``himalaya.backend.set_backend``. With the option ``on_error="warn"``, the
32 | # function does not raise an error if the new backend fails to be imported, and
33 | # the backend is kept unchanged.
34 | 
35 | from himalaya.backend import set_backend
36 | backend = set_backend("cupy", on_error="warn")
37 | 
38 | ###############################################################################
39 | # GPU backend
40 | # -----------
41 | # To fit a himalaya model on GPU, you don't need to move the input arrays to
42 | # GPU, the method ``fit`` will do it for you. However, the float precision will
43 | # not be changed.
44 | #
45 | # To make the most of GPU memory and computational speed, you might want to
46 | # change the float precision to float32.
47 | X = X.astype("float32")
48 | 
49 | from himalaya.kernel_ridge import KernelRidge
50 | model_him = KernelRidge(kernel="linear", alpha=0.1)
51 | model_him.fit(X, Y)
52 | 
53 | ###############################################################################
54 | # The results are stored in GPU memory, using an array object specific to the
55 | # backend used. To use the results in other libraries (for example matplotlib),
56 | # you can create a numpy array using the function ``backend.to_numpy``.
57 | scores = model_him.score(X, Y)
58 | print(scores.__class__)
59 | scores = backend.to_numpy(scores)
60 | print(scores.__class__)
61 | 


--------------------------------------------------------------------------------
/himalaya/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import numpy as np
 3 | 
 4 | from himalaya.backend import set_backend
 5 | from himalaya.backend import ALL_BACKENDS
 6 | 
 7 | from himalaya.utils import compute_lipschitz_constants
 8 | from himalaya.utils import generate_multikernel_dataset
 9 | from himalaya.utils import assert_array_almost_equal
10 | 
11 | 
12 | @pytest.mark.parametrize('kernelize', ["XXT", "XTX", "X"])
13 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
14 | def test_compute_lipschitz_constants(backend, kernelize):
15 |     backend = set_backend(backend)
16 | 
17 |     Xs = backend.randn(3, 5, 6)
18 |     if kernelize == "X":
19 |         XTs = backend.transpose(Xs, (0, 2, 1))
20 |         Xs = backend.matmul(XTs, Xs)
21 | 
22 |     L = compute_lipschitz_constants(Xs)
23 |     assert L.ndim == 1
24 |     assert L.shape[0] == Xs.shape[0]
25 | 
26 | 
27 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
28 | def test_compute_lipschitz_constants_error(backend):
29 |     backend = set_backend(backend)
30 | 
31 |     Xs = backend.randn(3, 5, 6)
32 |     with pytest.raises(ValueError):
33 |         compute_lipschitz_constants(Xs, "wrong")
34 | 
35 | 
36 | # A small number of sets of parameters
37 | _parameters = {
38 |     "params_1":
39 |     dict(n_kernels=4, n_targets=50, n_samples_train=100, n_samples_test=40,
40 |          kernel_weights=None, n_features_list=[10, 10, 20, 5]),
41 |     "params_2":
42 |     dict(n_kernels=3, n_targets=40, n_samples_train=90, n_samples_test=40,
43 |          kernel_weights=np.random.rand(40, 3), n_features_list=None),
44 | }
45 | 
46 | 
47 | @pytest.mark.parametrize("name", ["params_1", "params_2"])
48 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
49 | def test_generate_multikernel_dataset(backend, name):
50 |     backend = set_backend(backend)
51 | 
52 |     kwargs = _parameters[name]
53 | 
54 |     (X_train, X_test, Y_train, Y_test, kernel_weights,
55 |      n_features_list) = generate_multikernel_dataset(**kwargs)
56 | 
57 |     assert X_train.shape[0] == kwargs["n_samples_train"]
58 |     assert X_test.shape[0] == kwargs["n_samples_test"]
59 |     assert Y_train.shape[0] == kwargs["n_samples_train"]
60 |     assert Y_test.shape[0] == kwargs["n_samples_test"]
61 |     assert Y_train.shape[1] == kwargs["n_targets"]
62 |     assert Y_test.shape[1] == kwargs["n_targets"]
63 |     assert len(n_features_list) == kwargs["n_kernels"]
64 |     assert kernel_weights.shape[1] == kwargs["n_kernels"]
65 |     assert kernel_weights.shape[0] == kwargs["n_targets"]
66 | 
67 |     if kwargs["kernel_weights"] is not None:
68 |         assert_array_almost_equal(kwargs["kernel_weights"],
69 |                                   kernel_weights)
70 |     if kwargs["n_features_list"] is not None:
71 |         assert np.sum(kwargs["n_features_list"]) == X_train.shape[1]
72 |         assert np.sum(kwargs["n_features_list"]) == X_test.shape[1]
73 | 


--------------------------------------------------------------------------------
/himalaya/backend/torch_cuda.py:
--------------------------------------------------------------------------------
 1 | """The "torch_cuda" GPU backend, based on PyTorch.
 2 | 
 3 | To use this backend, call ``himalaya.backend.set_backend("torch_cuda")``.
 4 | """
 5 | from .torch import *  # noqa
 6 | import torch
 7 | 
 8 | if not torch.cuda.is_available():
 9 |     import sys
10 |     if "pytest" in sys.modules:  # if run through pytest
11 |         import pytest
12 |         pytest.skip("PyTorch with CUDA is not available.")
13 |     raise RuntimeError("PyTorch with CUDA is not available.")
14 | 
15 | from ._utils import _dtype_to_str
16 | from ._utils import warn_if_not_float32
17 | 
18 | ###############################################################################
19 | 
20 | name = "torch_cuda"
21 | 
22 | 
23 | def randn(*args, **kwargs):
24 |     return torch.randn(*args, **kwargs).cuda()
25 | 
26 | 
27 | def rand(*args, **kwargs):
28 |     return torch.rand(*args, **kwargs).cuda()
29 | 
30 | 
31 | def asarray(x, dtype=None, device="cuda"):
32 |     if dtype is None:
33 |         if isinstance(x, torch.Tensor):
34 |             dtype = x.dtype
35 |         if hasattr(x, "dtype") and hasattr(x.dtype, "name"):
36 |             dtype = x.dtype.name
37 |     if dtype is not None:
38 |         dtype = _dtype_to_str(dtype)
39 |         dtype = getattr(torch, dtype)
40 |     if device is None:
41 |         if isinstance(x, torch.Tensor):
42 |             device = x.device
43 |         else:
44 |             device = "cuda"
45 |     try:
46 |         tensor = torch.as_tensor(x, dtype=dtype, device=device)
47 |     except Exception:
48 |         import numpy as np
49 |         array = np.asarray(x, dtype=_dtype_to_str(dtype))
50 |         tensor = torch.as_tensor(array, dtype=dtype, device=device)
51 |     return tensor
52 | 
53 | 
54 | def check_arrays(*all_inputs):
55 |     """Change all inputs into Tensors (or list of Tensors) using the same
56 |     precision and device as the first one. Some tensors can be None.
57 |     """
58 |     all_tensors = []
59 |     all_tensors.append(asarray(all_inputs[0]))
60 |     dtype = all_tensors[0].dtype
61 |     warn_if_not_float32(dtype)
62 |     device = all_tensors[0].device
63 |     for tensor in all_inputs[1:]:
64 |         if tensor is None:
65 |             pass
66 |         elif isinstance(tensor, list):
67 |             tensor = [asarray(tt, dtype=dtype, device=device) for tt in tensor]
68 |         else:
69 |             tensor = asarray(tensor, dtype=dtype, device=device)
70 |         all_tensors.append(tensor)
71 |     return all_tensors
72 | 
73 | 
74 | def zeros(shape, dtype="float32", device="cuda"):
75 |     if isinstance(shape, int):
76 |         shape = (shape, )
77 |     if isinstance(dtype, str):
78 |         dtype = getattr(torch, dtype)
79 |     return torch.zeros(shape, dtype=dtype, device=device)
80 | 
81 | 
82 | def to_cpu(array):
83 |     return array.cpu()
84 | 
85 | 
86 | def to_gpu(array, device="cuda"):
87 |     return asarray(array, device=device)
88 | 


--------------------------------------------------------------------------------
/himalaya/kernel_ridge/tests/test_input_arrays.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | import pytest
 3 | 
 4 | import sklearn.linear_model
 5 | import sklearn.model_selection
 6 | 
 7 | from himalaya.backend import set_backend
 8 | from himalaya.backend import ALL_BACKENDS
 9 | 
10 | from himalaya.kernel_ridge import solve_multiple_kernel_ridge_random_search
11 | from himalaya.kernel_ridge import solve_multiple_kernel_ridge_hyper_gradient
12 | 
13 | 
14 | def _create_dataset(backend):
15 |     n_featuress = (50, 80)
16 |     n_samples = 30
17 |     n_targets = 2
18 |     n_gammas = 3
19 | 
20 |     Xs = [
21 |         backend.asarray(backend.randn(n_samples, n_features), backend.float64)
22 |         for n_features in n_featuress
23 |     ]
24 |     Ks = backend.stack([X @ X.T for X in Xs])
25 | 
26 |     ws = [
27 |         backend.asarray(backend.randn(n_features, n_targets), backend.float64)
28 |         for n_features in n_featuress
29 |     ]
30 |     Ys = backend.stack([X @ w for X, w in zip(Xs, ws)])
31 |     Y = Ys.sum(0)
32 | 
33 |     gammas = backend.asarray(backend.rand(n_gammas, Ks.shape[0]),
34 |                              backend.float64)
35 |     gammas /= gammas.sum(1)[:, None]
36 | 
37 |     return Ks, Y, gammas
38 | 
39 | 
40 | @pytest.mark.parametrize('Ks_in_cpu', [True, False])
41 | @pytest.mark.parametrize('Y_in_cpu', [True, False])
42 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
43 | def test_random_search(backend, Ks_in_cpu, Y_in_cpu):
44 |     backend = set_backend(backend)
45 | 
46 |     Ks, Y, gammas = _create_dataset(backend)
47 |     gammas = gammas[:1]
48 |     alphas = backend.asarray_like(backend.logspace(-3, 5, 3), Ks)
49 |     cv = sklearn.model_selection.check_cv(2)
50 | 
51 |     for Ks_, Y_, gammas_, alphas_ in itertools.product(
52 |         [Ks, backend.to_numpy(Ks),
53 |          backend.to_cpu(Ks)],
54 |         [Y, backend.to_numpy(Y), backend.to_cpu(Y)],
55 |         [gammas, backend.to_numpy(gammas),
56 |          backend.to_cpu(gammas), 2],
57 |         [alphas, backend.to_numpy(alphas),
58 |          backend.to_cpu(alphas)],
59 |     ):
60 | 
61 |         deltas, _, _ = solve_multiple_kernel_ridge_random_search(
62 |             Ks_, Y_, n_iter=gammas_, alphas=alphas_, cv=cv, progress_bar=False,
63 |             Ks_in_cpu=Ks_in_cpu, Y_in_cpu=Y_in_cpu)
64 | 
65 |         assert deltas.dtype == Ks.dtype
66 |         assert getattr(deltas, "device", None) == getattr(Ks, "device", None)
67 | 
68 | 
69 | @pytest.mark.parametrize('Y_in_cpu', [True, False])
70 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
71 | def test_hyper_gradient(backend, Y_in_cpu):
72 |     backend = set_backend(backend)
73 | 
74 |     Ks, Y, _ = _create_dataset(backend)
75 |     cv = sklearn.model_selection.check_cv(2)
76 | 
77 |     for Ks_, Y_ in itertools.product(
78 |         [Ks, backend.to_numpy(Ks),
79 |          backend.to_cpu(Ks)],
80 |         [Y, backend.to_numpy(Y), backend.to_cpu(Y)],
81 |     ):
82 | 
83 |         deltas, _, _ = solve_multiple_kernel_ridge_hyper_gradient(
84 |             Ks_, Y_, max_iter=1, cv=cv, progress_bar=False, Y_in_cpu=Y_in_cpu)
85 | 
86 |         assert deltas.dtype == Ks.dtype
87 |         assert getattr(deltas, "device", None) == getattr(Ks, "device", None)
88 | 


--------------------------------------------------------------------------------
/himalaya/kernel_ridge/__init__.py:
--------------------------------------------------------------------------------
 1 | from ._solvers import solve_weighted_kernel_ridge_gradient_descent
 2 | from ._solvers import solve_weighted_kernel_ridge_conjugate_gradient
 3 | from ._solvers import solve_weighted_kernel_ridge_neumann_series
 4 | from ._solvers import solve_kernel_ridge_eigenvalues
 5 | from ._solvers import solve_kernel_ridge_gradient_descent
 6 | from ._solvers import solve_kernel_ridge_conjugate_gradient
 7 | from ._solvers import KERNEL_RIDGE_SOLVERS
 8 | from ._solvers import WEIGHTED_KERNEL_RIDGE_SOLVERS
 9 | from ._hyper_gradient import solve_multiple_kernel_ridge_hyper_gradient
10 | from ._hyper_gradient import MULTIPLE_KERNEL_RIDGE_SOLVERS
11 | from ._random_search import solve_multiple_kernel_ridge_random_search
12 | from ._random_search import generate_dirichlet_samples
13 | from ._random_search import solve_kernel_ridge_cv_eigenvalues
14 | from ._random_search import solve_kernel_ridge_cv_svd
15 | from ._random_search import KERNEL_RIDGE_CV_SOLVERS
16 | from ._predictions import predict_weighted_kernel_ridge
17 | from ._predictions import predict_and_score_weighted_kernel_ridge
18 | from ._predictions import primal_weights_kernel_ridge
19 | from ._predictions import primal_weights_weighted_kernel_ridge
20 | from ._sklearn_api import KernelRidge
21 | from ._sklearn_api import KernelRidgeCV
22 | from ._sklearn_api import MultipleKernelRidgeCV
23 | from ._sklearn_api import WeightedKernelRidge
24 | from ._kernels import PAIRWISE_KERNEL_FUNCTIONS
25 | from ._kernels import linear_kernel
26 | from ._kernels import polynomial_kernel
27 | from ._kernels import rbf_kernel
28 | from ._kernels import sigmoid_kernel
29 | from ._kernels import cosine_similarity_kernel
30 | from ._kernels import KernelCenterer
31 | from ._kernelizer import Kernelizer
32 | from ._kernelizer import ColumnKernelizer
33 | from ._kernelizer import make_column_kernelizer
34 | 
35 | __all__ = [
36 |     # kernel ridge solvers
37 |     "solve_weighted_kernel_ridge_gradient_descent",
38 |     "solve_weighted_kernel_ridge_conjugate_gradient",
39 |     "solve_weighted_kernel_ridge_neumann_series",
40 |     "solve_kernel_ridge_cv_eigenvalues",
41 |     "solve_kernel_ridge_cv_svd",
42 |     "solve_kernel_ridge_eigenvalues",
43 |     "solve_kernel_ridge_gradient_descent",
44 |     "solve_kernel_ridge_conjugate_gradient",
45 |     "KERNEL_RIDGE_SOLVERS",
46 |     "KERNEL_RIDGE_CV_SOLVERS",
47 |     "WEIGHTED_KERNEL_RIDGE_SOLVERS",
48 |     # multiple kernel ridge solvers
49 |     "MULTIPLE_KERNEL_RIDGE_SOLVERS",
50 |     "solve_multiple_kernel_ridge_hyper_gradient",
51 |     "solve_multiple_kernel_ridge_random_search",
52 |     # helpers
53 |     "generate_dirichlet_samples",
54 |     "predict_weighted_kernel_ridge",
55 |     "predict_and_score_weighted_kernel_ridge",
56 |     "primal_weights_kernel_ridge",
57 |     "primal_weights_weighted_kernel_ridge",
58 |     # scikit-learn API
59 |     "KernelRidge",
60 |     "KernelRidgeCV",
61 |     "MultipleKernelRidgeCV",
62 |     "WeightedKernelRidge",
63 |     # kernels
64 |     "PAIRWISE_KERNEL_FUNCTIONS",
65 |     "linear_kernel",
66 |     "polynomial_kernel",
67 |     "rbf_kernel",
68 |     "sigmoid_kernel",
69 |     "cosine_similarity_kernel",
70 |     "KernelCenterer",
71 |     # kernelizers
72 |     "Kernelizer",
73 |     "ColumnKernelizer",
74 |     "make_column_kernelizer",
75 | ]
76 | 


--------------------------------------------------------------------------------
/himalaya/ridge/tests/test_column.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import numpy as np
  3 | from sklearn.pipeline import make_pipeline
  4 | from sklearn.preprocessing import StandardScaler
  5 | 
  6 | from himalaya.backend import set_backend
  7 | from himalaya.backend import ALL_BACKENDS
  8 | from himalaya.utils import assert_array_almost_equal
  9 | 
 10 | from himalaya.ridge import ColumnTransformerNoStack
 11 | from himalaya.ridge import make_column_transformer_no_stack
 12 | from himalaya.ridge import GroupRidgeCV
 13 | 
 14 | 
 15 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 16 | def test_column_transformer_all_columns(backend):
 17 |     backend = set_backend(backend)
 18 |     X = np.random.randn(10, 5)
 19 | 
 20 |     ct = ColumnTransformerNoStack([("name", StandardScaler(), slice(0, 5))])
 21 |     Xt = ct.fit_transform(X)
 22 |     assert len(Xt) == 1
 23 |     assert Xt[0].shape == (10, 5)
 24 | 
 25 | 
 26 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 27 | def test_column_transformer_passthrough(backend):
 28 |     backend = set_backend(backend)
 29 |     X = np.random.randn(10, 5)
 30 | 
 31 |     ct = ColumnTransformerNoStack([("name", "passthrough", slice(0, 5))])
 32 |     Xt = ct.fit_transform(X)
 33 |     assert len(Xt) == 1
 34 |     assert Xt[0].shape == (10, 5)
 35 |     assert_array_almost_equal(X, Xt[0])
 36 | 
 37 | 
 38 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 39 | def test_column_transformer_remainder(backend):
 40 |     backend = set_backend(backend)
 41 |     X = np.random.randn(10, 5)
 42 | 
 43 |     ct = ColumnTransformerNoStack([("name", "passthrough", slice(0, 0))],
 44 |                                   remainder="passthrough")
 45 |     Xt = ct.fit_transform(X)
 46 |     assert len(Xt) == 2
 47 |     assert Xt[0].shape == (10, 0)
 48 |     assert Xt[1].shape == (10, 5)
 49 |     assert_array_almost_equal(X, Xt[1])
 50 | 
 51 | 
 52 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 53 | def test_column_transformer_multiple(backend):
 54 |     backend = set_backend(backend)
 55 |     X = np.random.randn(10, 5)
 56 | 
 57 |     ct = ColumnTransformerNoStack([
 58 |         ("name0", StandardScaler(), [0, 1]),
 59 |         ("name1", StandardScaler(with_mean=False), [2, 3]),
 60 |     ], remainder="passthrough")
 61 |     Xt = ct.fit_transform(X)
 62 |     assert len(Xt) == 3
 63 |     assert Xt[0].shape == (10, 2)
 64 |     assert Xt[1].shape == (10, 2)
 65 |     assert Xt[2].shape == (10, 1)
 66 | 
 67 | 
 68 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 69 | def test_make_column_transformer(backend):
 70 |     backend = set_backend(backend)
 71 | 
 72 |     trans = StandardScaler()
 73 |     ct = make_column_transformer_no_stack((trans, slice(0, 3)))
 74 | 
 75 |     assert isinstance(ct, ColumnTransformerNoStack)
 76 |     assert len(ct.transformers) == 1
 77 |     assert len(ct.transformers[0]) == 3
 78 |     assert ct.transformers[0][0] == "standardscaler"
 79 |     assert ct.transformers[0][1] == trans
 80 |     assert ct.transformers[0][2] == slice(0, 3)
 81 | 
 82 |     trans = StandardScaler()
 83 |     ct = make_column_transformer_no_stack((trans, slice(0, 3)),
 84 |                                           ("passthrough", [3, 4]))
 85 | 
 86 |     assert isinstance(ct, ColumnTransformerNoStack)
 87 |     assert len(ct.transformers) == 2
 88 |     assert len(ct.transformers[0]) == 3
 89 |     assert len(ct.transformers[1]) == 3
 90 |     assert ct.transformers[0][0] == "standardscaler"
 91 |     assert ct.transformers[0][1] == trans
 92 |     assert ct.transformers[0][2] == slice(0, 3)
 93 |     assert ct.transformers[1][0] == "passthrough"
 94 |     assert ct.transformers[1][1] == "passthrough"
 95 |     assert ct.transformers[1][2] == [3, 4]
 96 | 
 97 | 
 98 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 99 | def test_column_transformer_in_pipeline(backend):
100 |     backend = set_backend(backend)
101 | 
102 |     X = np.random.randn(10, 5)
103 |     Y = np.random.randn(10, 3)
104 | 
105 |     ct = make_column_transformer_no_stack(
106 |         (StandardScaler(), slice(0, 4)),
107 |         (StandardScaler(), slice(4, 6)),
108 |     )
109 |     pipe = make_pipeline(
110 |         ct,
111 |         GroupRidgeCV(
112 |             groups="input", solver_params=dict(n_iter=np.ones((1, 2)),
113 |                                                progress_bar=False)))
114 |     pipe.fit(X, Y)
115 | 


--------------------------------------------------------------------------------
/himalaya/backend/_utils.py:
--------------------------------------------------------------------------------
  1 | import types
  2 | import importlib
  3 | import warnings
  4 | from functools import wraps
  5 | 
  6 | ALL_BACKENDS = [
  7 |     "numpy",
  8 |     "cupy",
  9 |     "torch",
 10 |     "torch_cuda",
 11 | ]
 12 | 
 13 | CURRENT_BACKEND = "numpy"
 14 | 
 15 | MATCHING_CPU_BACKEND = {
 16 |     "numpy": "numpy",
 17 |     "cupy": "numpy",
 18 |     "torch": "torch",
 19 |     "torch_cuda": "torch",
 20 | }
 21 | 
 22 | 
 23 | def set_backend(backend, on_error="raise"):
 24 |     """Set the backend using a global variable, and return the backend module.
 25 | 
 26 |     Parameters
 27 |     ----------
 28 |     backend : str or module
 29 |         Name or module of the backend.
 30 |     on_error : str in {"raise", "warn"}
 31 |         Define what is done if the backend fails to be loaded.
 32 |         If "warn", this function only warns, and keeps the previous backend.
 33 |         If "raise", this function raises on errors.
 34 | 
 35 |     Returns
 36 |     -------
 37 |     module : python module
 38 |         Module of the backend.
 39 |     """
 40 |     global CURRENT_BACKEND
 41 | 
 42 |     try:
 43 |         if isinstance(backend, types.ModuleType):  # get name from module
 44 |             backend = backend.name
 45 | 
 46 |         if backend not in ALL_BACKENDS:
 47 |             raise ValueError("Unknown backend=%r" % (backend, ))
 48 | 
 49 |         module = importlib.import_module(__package__ + "." + backend)
 50 |         CURRENT_BACKEND = backend
 51 |     except Exception as error:
 52 |         if on_error == "raise":
 53 |             raise error
 54 |         elif on_error == "warn":
 55 |             warnings.warn(f"Setting backend to {backend} failed: {str(error)}."
 56 |                           f"Falling back to {CURRENT_BACKEND} backend.")
 57 |             module = get_backend()
 58 |         else:
 59 |             raise ValueError('Unknown value on_error=%r' % (on_error, ))
 60 | 
 61 |     return module
 62 | 
 63 | 
 64 | def get_backend():
 65 |     """Get the current backend module.
 66 | 
 67 |     Returns
 68 |     -------
 69 |     module : python module
 70 |         Module of the backend.
 71 |     """
 72 |     module = importlib.import_module(__package__ + "." + CURRENT_BACKEND)
 73 |     return module
 74 | 
 75 | 
 76 | def _dtype_to_str(dtype):
 77 |     """Cast dtype to string, such as "float32", or "float64"."""
 78 |     if isinstance(dtype, str):
 79 |         return dtype
 80 |     elif hasattr(dtype, "name"):  # works for numpy and cupy
 81 |         return dtype.name
 82 |     elif "torch." in str(dtype):  # works for torch
 83 |         return str(dtype)[6:]
 84 |     elif dtype is None:
 85 |         return None
 86 |     else:
 87 |         raise NotImplementedError()
 88 | 
 89 | 
 90 | def force_cpu_backend(func):
 91 |     """Decorator to force the use of a CPU backend."""
 92 | 
 93 |     @wraps(func)
 94 |     def wrapper(*args, **kwargs):
 95 |         # skip if the object does not force cpu use
 96 |         if not hasattr(args[0], "force_cpu") or not args[0].force_cpu:
 97 |             return func(*args, **kwargs)
 98 | 
 99 |         # set corresponding cpu backend
100 |         original_backend = get_backend().name
101 |         temp_backend = MATCHING_CPU_BACKEND[original_backend]
102 |         set_backend(temp_backend)
103 | 
104 |         # run function
105 |         result = func(*args, **kwargs)
106 | 
107 |         # set back original backend
108 |         set_backend(original_backend)
109 |         return result
110 | 
111 |     return wrapper
112 | 
113 | 
114 | def _add_error_message(func, msg=""):
115 |     """Decorator to add a custom error message to a function."""
116 | 
117 |     @wraps(func)
118 |     def with_error_message(*args, **kwargs):
119 |         try:
120 |             return func(*args, **kwargs)
121 |         except Exception as e:
122 |             raise RuntimeError(
123 |                 f"{msg}\nOriginal error:\n{type(e).__name__}: {e}")
124 | 
125 |     return with_error_message
126 | 
127 | 
128 | _already_warned = [False]
129 | 
130 | 
131 | def warn_if_not_float32(dtype):
132 |     """Warn if X is not float32."""
133 |     if _already_warned[0]:  # avoid warning multiple times
134 |         return None
135 | 
136 |     if _dtype_to_str(dtype) != "float32":
137 |         backend = get_backend()
138 |         warnings.warn(
139 |             f"GPU backend {backend.name} is much faster with single "
140 |             f"precision floats (float32), got input in {dtype}. "
141 |             "Consider casting your data to float32.", UserWarning)
142 |         _already_warned[0] = True
143 | 


--------------------------------------------------------------------------------
/himalaya/kernel_ridge/tests/test_force_cpu.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from himalaya.backend import set_backend
  4 | from himalaya.kernel_ridge import KernelCenterer
  5 | from himalaya.kernel_ridge import Kernelizer
  6 | from himalaya.kernel_ridge import ColumnKernelizer
  7 | from himalaya.kernel_ridge import make_column_kernelizer
  8 | from himalaya.kernel_ridge import KernelRidgeCV
  9 | from himalaya.kernel_ridge import MultipleKernelRidgeCV
 10 | from himalaya.ridge import RidgeCV
 11 | from himalaya.ridge import GroupRidgeCV
 12 | from himalaya.lasso import SparseGroupLassoCV
 13 | 
 14 | GPU_BACKENDS = [
 15 |     "cupy",
 16 |     "torch_cuda",
 17 | ]
 18 | 
 19 | 
 20 | @pytest.mark.parametrize('backend', GPU_BACKENDS)
 21 | @pytest.mark.parametrize('force_cpu', [True, False])
 22 | def test_kernel_centerer(backend, force_cpu):
 23 |     backend = set_backend(backend)
 24 |     X = backend.randn(5, 5)
 25 |     K = X @ X.T
 26 | 
 27 |     Kc = KernelCenterer(force_cpu=force_cpu).fit_transform(K)
 28 |     assert backend.is_in_gpu(Kc) != force_cpu
 29 | 
 30 | 
 31 | @pytest.mark.parametrize('backend', GPU_BACKENDS)
 32 | @pytest.mark.parametrize('force_cpu', [True, False])
 33 | def test_kernelizer(backend, force_cpu):
 34 |     backend = set_backend(backend)
 35 |     X = backend.randn(10, 5)
 36 | 
 37 |     K = Kernelizer(force_cpu=force_cpu).fit_transform(X)
 38 |     assert backend.is_in_gpu(K) != force_cpu
 39 | 
 40 | 
 41 | @pytest.mark.parametrize('backend', GPU_BACKENDS)
 42 | @pytest.mark.parametrize('force_cpu', [True, False])
 43 | def test_column_kernelizer(backend, force_cpu):
 44 |     backend = set_backend(backend)
 45 |     X = backend.randn(10, 5)
 46 | 
 47 |     Ks = ColumnKernelizer([
 48 |         ("name", Kernelizer(), slice(0, 5)),
 49 |     ], force_cpu=force_cpu).fit_transform(X)
 50 | 
 51 |     assert backend.is_in_gpu(Ks) != force_cpu
 52 | 
 53 | 
 54 | @pytest.mark.parametrize('backend', GPU_BACKENDS)
 55 | @pytest.mark.parametrize('force_cpu', [True, False])
 56 | def test_make_column_kernelizer(backend, force_cpu):
 57 |     backend = set_backend(backend)
 58 |     X = backend.randn(10, 5)
 59 | 
 60 |     Ks = make_column_kernelizer((Kernelizer(), slice(0, 5)),
 61 |                                 force_cpu=force_cpu).fit_transform(X)
 62 |     assert backend.is_in_gpu(Ks) != force_cpu
 63 | 
 64 | 
 65 | @pytest.mark.parametrize('backend', GPU_BACKENDS)
 66 | @pytest.mark.parametrize('force_cpu', [True, False])
 67 | def test_kernel_ridge_cv(backend, force_cpu):
 68 |     backend = set_backend(backend)
 69 |     X = backend.randn(10, 5)
 70 |     Y = backend.randn(10, 2)
 71 | 
 72 |     best_alphas_ = KernelRidgeCV(force_cpu=force_cpu).fit(X, Y).best_alphas_
 73 |     assert backend.is_in_gpu(best_alphas_) != force_cpu
 74 | 
 75 | 
 76 | @pytest.mark.parametrize('backend', GPU_BACKENDS)
 77 | @pytest.mark.parametrize('force_cpu', [True, False])
 78 | def test_multiple_kernel_ridge_cv(backend, force_cpu):
 79 |     backend = set_backend(backend)
 80 |     X = backend.randn(10, 5)
 81 |     Y = backend.randn(10, 2)
 82 | 
 83 |     deltas_ = MultipleKernelRidgeCV(
 84 |         kernels=["linear"], force_cpu=force_cpu,
 85 |         solver_params=dict(n_iter=2, progress_bar=False)).fit(X, Y).deltas_
 86 |     assert backend.is_in_gpu(deltas_) != force_cpu
 87 | 
 88 | 
 89 | @pytest.mark.parametrize('backend', GPU_BACKENDS)
 90 | @pytest.mark.parametrize('force_cpu', [True, False])
 91 | def test_ridge_cv(backend, force_cpu):
 92 |     backend = set_backend(backend)
 93 |     X = backend.randn(10, 5)
 94 |     Y = backend.randn(10, 2)
 95 | 
 96 |     best_alphas_ = RidgeCV(force_cpu=force_cpu).fit(X, Y).best_alphas_
 97 |     assert backend.is_in_gpu(best_alphas_) != force_cpu
 98 | 
 99 | 
100 | @pytest.mark.parametrize('backend', GPU_BACKENDS)
101 | @pytest.mark.parametrize('force_cpu', [True, False])
102 | def test_group_ridge_cv(backend, force_cpu):
103 |     backend = set_backend(backend)
104 |     X = backend.randn(10, 5)
105 |     Y = backend.randn(10, 2)
106 | 
107 |     deltas_ = GroupRidgeCV(groups=[0, 1, 0, 1, 1],
108 |                            force_cpu=force_cpu, solver_params=dict(
109 |                                n_iter=2, progress_bar=False)).fit(X, Y).deltas_
110 |     assert backend.is_in_gpu(deltas_) != force_cpu
111 | 
112 | 
113 | @pytest.mark.parametrize('backend', GPU_BACKENDS)
114 | @pytest.mark.parametrize('force_cpu', [True, False])
115 | def test_sparse_group_lasso_cv(backend, force_cpu):
116 |     backend = set_backend(backend)
117 |     X = backend.randn(10, 5)
118 |     Y = backend.randn(10, 2)
119 | 
120 |     best_l21_reg_ = SparseGroupLassoCV(
121 |         groups=[0, 1, 0, 1, 1], force_cpu=force_cpu,
122 |         solver_params=dict(progress_bar=False)).fit(X, Y).best_l21_reg_
123 |     assert backend.is_in_gpu(best_l21_reg_) != force_cpu
124 | 


--------------------------------------------------------------------------------
/himalaya/ridge/tests/test_random_search_ridge.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | import numpy as np
  4 | import sklearn.linear_model
  5 | import sklearn.model_selection
  6 | 
  7 | from himalaya.backend import set_backend
  8 | from himalaya.backend import ALL_BACKENDS
  9 | from himalaya.utils import assert_array_almost_equal
 10 | from himalaya.scoring import r2_score
 11 | 
 12 | from himalaya.ridge import solve_group_ridge_random_search
 13 | 
 14 | 
 15 | def _create_dataset(backend):
 16 |     n_featuress = (10, 20)
 17 |     n_samples = 80
 18 |     n_targets = 4
 19 |     n_gammas = 3
 20 | 
 21 |     Xs = [
 22 |         backend.asarray(backend.randn(n_samples, n_features), backend.float64)
 23 |         for n_features in n_featuress
 24 |     ]
 25 | 
 26 |     ws = [
 27 |         backend.asarray(backend.randn(n_features, n_targets), backend.float64)
 28 |         for n_features in n_featuress
 29 |     ]
 30 |     Ys = backend.stack([X @ w for X, w in zip(Xs, ws)])
 31 |     Y = Ys.sum(0)
 32 |     Y += backend.asarray(backend.randn(*Y.shape), backend.float64)
 33 | 
 34 |     gammas = backend.asarray(backend.rand(n_gammas, len(Xs)), backend.float64)
 35 |     gammas /= gammas.sum(1)[:, None]
 36 | 
 37 |     return Xs, Y, gammas
 38 | 
 39 | 
 40 | @pytest.mark.parametrize('n_targets_batch', [None, 3])
 41 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 42 | def test_solve_group_ridge_random_search_n_targets_batch(
 43 |         backend, n_targets_batch):
 44 |     _test_solve_group_ridge_random_search(backend=backend,
 45 |                                           n_targets_batch=n_targets_batch)
 46 | 
 47 | 
 48 | @pytest.mark.parametrize('n_alphas_batch', [None, 2])
 49 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 50 | def test_solve_group_ridge_random_search_n_alphas_batch(
 51 |         backend, n_alphas_batch):
 52 |     _test_solve_group_ridge_random_search(backend=backend,
 53 |                                           n_alphas_batch=n_alphas_batch)
 54 | 
 55 | 
 56 | def _test_solve_group_ridge_random_search(backend, n_targets_batch=None,
 57 |                                           n_alphas_batch=None):
 58 |     backend = set_backend(backend)
 59 | 
 60 |     Xs, Y, gammas = _create_dataset(backend)
 61 |     alphas = backend.asarray_like(backend.logspace(-3, 5, 9), Xs[0])
 62 |     n_targets = Y.shape[1]
 63 |     cv = sklearn.model_selection.check_cv(10)
 64 | 
 65 |     ############
 66 |     # run solver
 67 |     results = solve_group_ridge_random_search(
 68 |         Xs, Y, n_iter=gammas, alphas=alphas, score_func=r2_score, cv=cv,
 69 |         n_targets_batch=n_targets_batch, progress_bar=False,
 70 |         return_weights=True, n_alphas_batch=n_alphas_batch,
 71 |         diagonalize_method="svd")
 72 |     best_deltas, refit_weights, cv_scores = results
 73 | 
 74 |     #########################################
 75 |     # compare with sklearn.linear_model.Ridge
 76 |     test_scores = []
 77 |     for gamma in gammas:
 78 |         X = backend.concatenate(
 79 |             [x * backend.sqrt(g) for x, g in zip(Xs, gamma)], 1)
 80 |         for train, test in cv.split(X):
 81 |             for alpha in alphas:
 82 |                 model = sklearn.linear_model.Ridge(
 83 |                     alpha=backend.to_numpy(alpha), fit_intercept=False)
 84 |                 model = model.fit(backend.to_numpy(X[train]),
 85 |                                   backend.to_numpy(Y[train]))
 86 |                 predictions = backend.asarray_like(
 87 |                     model.predict(backend.to_numpy(X[test])), ref=Y)
 88 |                 test_scores.append(r2_score(Y[test], predictions))
 89 | 
 90 |     test_scores = backend.stack(test_scores)
 91 |     test_scores = test_scores.reshape(len(gammas), cv.get_n_splits(),
 92 |                                       len(alphas), n_targets)
 93 |     test_scores_mean = backend.max(test_scores.mean(1), 1)
 94 |     assert_array_almost_equal(cv_scores, test_scores_mean, decimal=5)
 95 | 
 96 |     ######################
 97 |     # test refited_weights
 98 |     for tt in range(n_targets):
 99 |         gamma = backend.exp(best_deltas[:, tt])
100 |         alpha = 1.0
101 | 
102 |         # compare primal weights with sklearn.linear_model.Ridge
103 |         X = backend.concatenate(
104 |             [X * backend.sqrt(g) for X, g in zip(Xs, gamma)], 1)
105 |         model = sklearn.linear_model.Ridge(fit_intercept=False,
106 |                                            alpha=backend.to_numpy(alpha))
107 |         w1 = model.fit(backend.to_numpy(X), backend.to_numpy(Y[:, tt])).coef_
108 |         w1 = np.split(w1, np.cumsum([X.shape[1] for X in Xs][:-1]), axis=0)
109 |         w1 = [backend.asarray(w) for w in w1]
110 |         w1_scaled = backend.concatenate(
111 |             [w * backend.sqrt(g) for w, g, in zip(w1, gamma)])
112 |         assert_array_almost_equal(w1_scaled, refit_weights[:, tt], decimal=5)
113 | 
114 | 
115 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
116 | def test_different_number_of_samples(backend):
117 |     backend = set_backend(backend)
118 |     Xs, Y, gammas = _create_dataset(backend)
119 | 
120 |     with pytest.raises(ValueError, match="same number of samples"):
121 |         solve_group_ridge_random_search(Xs[:4], Y[:3])
122 | 


--------------------------------------------------------------------------------
/examples/multiple_kernel_ridge/plot_mkr_3_path.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Multiple-kernel ridge path between two kernels
  3 | ==============================================
  4 | This example demonstrates the path of all possible ratios of kernel weights
  5 | between two kernels, in a multiple kernel ridge regression model. Over the path
  6 | of ratios, the kernels are weighted by the kernel weights, then summed, and a
  7 | joint model is fit on the obtained kernel. The explained variance on a test set
  8 | is then computed, and decomposed over both kernels.
  9 | """
 10 | from functools import partial
 11 | 
 12 | import numpy as np
 13 | import matplotlib.pyplot as plt
 14 | 
 15 | from himalaya.backend import set_backend
 16 | from himalaya.kernel_ridge import MultipleKernelRidgeCV
 17 | from himalaya.kernel_ridge import Kernelizer
 18 | from himalaya.kernel_ridge import ColumnKernelizer
 19 | from himalaya.progress_bar import bar
 20 | from himalaya.utils import generate_multikernel_dataset
 21 | 
 22 | from sklearn.pipeline import make_pipeline
 23 | from sklearn import set_config
 24 | set_config(display='diagram')
 25 | 
 26 | ###############################################################################
 27 | # In this example, we use the ``cupy`` backend.
 28 | 
 29 | backend = set_backend("cupy", on_error="warn")
 30 | 
 31 | ###############################################################################
 32 | # Generate a random dataset
 33 | # -------------------------
 34 | # - X_train : array of shape (n_samples_train, n_features)
 35 | # - X_test : array of shape (n_samples_test, n_features)
 36 | # - Y_train : array of shape (n_samples_train, n_targets)
 37 | # - Y_test : array of shape (n_samples_test, n_targets)
 38 | 
 39 | n_targets = 50
 40 | kernel_weights = np.tile(np.array([0.6, 0.4])[None], (n_targets, 1))
 41 | 
 42 | (X_train, X_test, Y_train, Y_test,
 43 |  kernel_weights, n_features_list) = generate_multikernel_dataset(
 44 |      n_kernels=2, n_targets=n_targets, n_samples_train=600,
 45 |      n_samples_test=300, random_state=42, noise=0.31,
 46 |      kernel_weights=kernel_weights)
 47 | 
 48 | feature_names = [f"Feature space {ii}" for ii in range(len(n_features_list))]
 49 | 
 50 | ###############################################################################
 51 | # Create a MultipleKernelRidgeCV model, see plot_mkr_sklearn_api.py for more
 52 | # details.
 53 | 
 54 | # Find the start and end of each feature space X in Xs.
 55 | start_and_end = np.concatenate([[0], np.cumsum(n_features_list)])
 56 | slices = [
 57 |     slice(start, end)
 58 |     for start, end in zip(start_and_end[:-1], start_and_end[1:])
 59 | ]
 60 | 
 61 | # Create a different ``Kernelizer`` for each feature space.
 62 | kernelizers = [(name, Kernelizer(), slice_)
 63 |                for name, slice_ in zip(feature_names, slices)]
 64 | column_kernelizer = ColumnKernelizer(kernelizers)
 65 | 
 66 | # Create a MultipleKernelRidgeCV model.
 67 | solver_params = dict(alphas=np.logspace(-5, 5, 41), progress_bar=False)
 68 | model = MultipleKernelRidgeCV(kernels="precomputed", solver="random_search",
 69 |                               solver_params=solver_params,
 70 |                               random_state=42)
 71 | pipe = make_pipeline(column_kernelizer, model)
 72 | pipe
 73 | 
 74 | ###############################################################################
 75 | # Then, we manually perform a hyperparameter grid search for the kernel weights.
 76 | 
 77 | # Make the score method use `split=True` by default.
 78 | model.score = partial(model.score, split=True)
 79 | 
 80 | # Define the hyperparameter grid search.
 81 | ratios = np.logspace(-4, 4, 41)
 82 | candidates = np.array([1 - ratios / (1 + ratios), ratios / (1 + ratios)]).T
 83 | 
 84 | # Loop over hyperparameter candidates
 85 | split_r2_scores = []
 86 | for candidate in bar(candidates, "Hyperparameter candidates"):
 87 |     # test one hyperparameter candidate at a time
 88 |     pipe[-1].solver_params["n_iter"] = candidate[None]
 89 |     pipe.fit(X_train, Y_train)
 90 | 
 91 |     # split the R2 score between both kernels
 92 |     scores = pipe.score(X_test, Y_test)
 93 |     split_r2_scores.append(backend.to_numpy(scores))
 94 | 
 95 | # average scores over targets for plotting
 96 | split_r2_scores_avg = np.array(split_r2_scores).mean(axis=2)
 97 | 
 98 | ###############################################################################
 99 | # Plot the variance decomposition for all the hyperparameter ratios.
100 | #
101 | # For a ratio of 1e-3, feature space 0 is almost not used. For a ratio of 1e3,
102 | # feature space 1 is almost not used. The best ratio is here around 1, because
103 | # the feature spaces are used with similar scales in the simulated dataset.
104 | 
105 | fig, ax = plt.subplots(figsize=(5, 4))
106 | accumulator = np.zeros_like(ratios)
107 | for split in split_r2_scores_avg.T:
108 |     ax.fill_between(ratios, accumulator, accumulator + split, alpha=0.7)
109 |     accumulator += split
110 | 
111 | ax.set(xscale='log')
112 | ax.set(xlabel=r"Ratio of kernel weight ($\gamma_A / \gamma_B$)")
113 | ax.set(ylabel=r"$R^2$ score (test set)")
114 | ax.set(title=r"$R^2$ score decomposition")
115 | ax.legend(feature_names, loc="upper left")
116 | ax.grid()
117 | fig.tight_layout()
118 | plt.show()
119 | 


--------------------------------------------------------------------------------
/examples/multiple_kernel_ridge/plot_mkr_5_refine_results.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Multiple-kernel ridge refining
  3 | ==============================
  4 | This example demonstrates how to solve multiple-kernel ridge regression with
  5 | hyperparameter random search, then refine the results with hyperparameter
  6 | gradient descent.
  7 | """
  8 | import numpy as np
  9 | 
 10 | from himalaya.backend import set_backend
 11 | from himalaya.kernel_ridge import MultipleKernelRidgeCV
 12 | from himalaya.kernel_ridge import Kernelizer
 13 | from himalaya.kernel_ridge import ColumnKernelizer
 14 | from himalaya.utils import generate_multikernel_dataset
 15 | 
 16 | from sklearn.pipeline import make_pipeline
 17 | from sklearn import set_config
 18 | set_config(display='diagram')
 19 | 
 20 | ###############################################################################
 21 | # In this example, we use the ``cupy`` backend (GPU).
 22 | 
 23 | backend = set_backend("cupy", on_error="warn")
 24 | 
 25 | ###############################################################################
 26 | # Generate a random dataset
 27 | # -------------------------
 28 | # - X_train : array of shape (n_samples_train, n_features)
 29 | # - X_test : array of shape (n_samples_test, n_features)
 30 | # - Y_train : array of shape (n_samples_train, n_targets)
 31 | # - Y_test : array of shape (n_samples_test, n_targets)
 32 | 
 33 | (X_train, X_test, Y_train, Y_test, kernel_weights,
 34 |  n_features_list) = generate_multikernel_dataset(n_kernels=4, n_targets=50,
 35 |                                                  n_samples_train=600,
 36 |                                                  n_samples_test=300,
 37 |                                                  random_state=42)
 38 | 
 39 | feature_names = [f"Feature space {ii}" for ii in range(len(n_features_list))]
 40 | 
 41 | ###############################################################################
 42 | # Prepare the pipeline
 43 | # --------------------
 44 | 
 45 | # Find the start and end of each feature space X in Xs
 46 | start_and_end = np.concatenate([[0], np.cumsum(n_features_list)])
 47 | slices = [
 48 |     slice(start, end)
 49 |     for start, end in zip(start_and_end[:-1], start_and_end[1:])
 50 | ]
 51 | 
 52 | # Create a different ``Kernelizer`` for each feature space.
 53 | kernelizers = [("space %d" % ii, Kernelizer(), slice_)
 54 |                for ii, slice_ in enumerate(slices)]
 55 | column_kernelizer = ColumnKernelizer(kernelizers)
 56 | 
 57 | ###############################################################################
 58 | # Define the random-search model
 59 | # ------------------------------
 60 | # We use very few iteration on purpose, to make the random search suboptimal,
 61 | # and refine it with hyperparameter gradient descent.
 62 | 
 63 | solver_params = dict(n_iter=5, alphas=np.logspace(-10, 10, 41))
 64 | 
 65 | model_1 = MultipleKernelRidgeCV(kernels="precomputed", solver="random_search",
 66 |                                 solver_params=solver_params, random_state=42)
 67 | pipe_1 = make_pipeline(column_kernelizer, model_1)
 68 | 
 69 | # Fit the model on all targets
 70 | pipe_1.fit(X_train, Y_train)
 71 | 
 72 | ###############################################################################
 73 | # Define the gradient-descent model
 74 | # ---------------------------------
 75 | 
 76 | solver_params = dict(max_iter=10, hyper_gradient_method="direct",
 77 |                      max_iter_inner_hyper=10,
 78 |                      initial_deltas="here_will_go_the_previous_deltas")
 79 | 
 80 | model_2 = MultipleKernelRidgeCV(kernels="precomputed", solver="hyper_gradient",
 81 |                                 solver_params=solver_params)
 82 | pipe_2 = make_pipeline(column_kernelizer, model_2)
 83 | 
 84 | ###############################################################################
 85 | # Use the random-search to initialize the gradient-descent
 86 | # --------------------------------------------------------
 87 | 
 88 | # We might want to refine only the best predicting targets, since the
 89 | # hyperparameter gradient descent is less efficient over many targets.
 90 | top = 60  # top 60%
 91 | best_cv_scores = backend.to_numpy(pipe_1[-1].cv_scores_.max(0))
 92 | mask = best_cv_scores > np.percentile(best_cv_scores, 100 - top)
 93 | 
 94 | pipe_2[-1].solver_params['initial_deltas'] = pipe_1[-1].deltas_[:, mask]
 95 | pipe_2.fit(X_train, Y_train[:, mask])
 96 | 
 97 | ###############################################################################
 98 | # Compute predictions on a test set
 99 | # ---------------------------------
100 | import matplotlib.pyplot as plt
101 | 
102 | # use the first model for all targets
103 | test_scores_1 = pipe_1.score(X_test, Y_test)
104 | 
105 | # use the second model for the refined targets
106 | test_scores_2 = backend.copy(test_scores_1)
107 | test_scores_2[mask] = pipe_2.score(X_test, Y_test[:, mask])
108 | 
109 | test_scores_1 = backend.to_numpy(test_scores_1)
110 | test_scores_2 = backend.to_numpy(test_scores_2)
111 | plt.figure(figsize=(4, 4))
112 | plt.scatter(test_scores_1, test_scores_2, alpha=0.3)
113 | plt.xlim(0, 1)
114 | plt.plot(plt.xlim(), plt.xlim(), color='k', lw=1)
115 | plt.xlabel(r"Base model")
116 | plt.ylabel(r"Refined model")
117 | plt.title("$R^2$ generalization score")
118 | plt.grid()
119 | plt.tight_layout()
120 | plt.show()
121 | 


--------------------------------------------------------------------------------
/himalaya/ridge/tests/test_solvers_ridge.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | import numpy as np
  4 | import sklearn.linear_model
  5 | import scipy.linalg
  6 | 
  7 | from himalaya.backend import set_backend
  8 | from himalaya.backend import ALL_BACKENDS
  9 | from himalaya.utils import assert_array_almost_equal
 10 | 
 11 | from himalaya.ridge import RIDGE_SOLVERS
 12 | 
 13 | 
 14 | def _create_dataset(backend, many_targets=False):
 15 |     if many_targets:
 16 |         n_samples, n_features, n_targets = 10, 5, 20
 17 |     else:
 18 |         n_samples, n_features, n_targets = 30, 10, 3
 19 | 
 20 |     X = backend.asarray(backend.randn(n_samples, n_features), backend.float64)
 21 |     Y = backend.asarray(backend.randn(n_samples, n_targets), backend.float64)
 22 |     weights = backend.asarray(backend.randn(n_features, n_targets),
 23 |                               backend.float64)
 24 | 
 25 |     return X, Y, weights
 26 | 
 27 | 
 28 | @pytest.mark.parametrize('many_targets', [False, True])
 29 | @pytest.mark.parametrize('solver_name', RIDGE_SOLVERS)
 30 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 31 | def test_solve_kernel_ridge(solver_name, backend, many_targets):
 32 |     backend = set_backend(backend)
 33 | 
 34 |     X, Y, weights = _create_dataset(backend, many_targets=many_targets)
 35 |     alphas = backend.asarray_like(backend.logspace(-2, 5, 7), Y)
 36 | 
 37 |     solver = RIDGE_SOLVERS[solver_name]
 38 |     XTX = X.T @ X
 39 |     XTY = X.T @ Y
 40 | 
 41 |     for alpha in alphas:
 42 |         alpha = backend.full_like(Y, fill_value=alpha, shape=Y.shape[1])
 43 |         b2 = solver(X, Y, alpha=alpha, fit_intercept=False)
 44 |         b2 = backend.to_gpu(b2)
 45 |         assert b2.shape == (X.shape[1], Y.shape[1])
 46 | 
 47 |         n_features, n_targets = weights.shape
 48 |         for ii in range(n_targets):
 49 |             # compare primal coefficients with scipy.linalg.solve
 50 |             reg = backend.asarray_like(np.eye(n_features), Y) * alpha[ii]
 51 |             b1 = scipy.linalg.solve(backend.to_numpy(XTX + reg),
 52 |                                     backend.to_numpy(XTY[:, ii]))
 53 |             assert_array_almost_equal(b1, b2[:, ii], decimal=6)
 54 | 
 55 |             # compare predictions with sklearn.linear_model.Ridge
 56 |             prediction = backend.matmul(X, b2[:, ii])
 57 |             model = sklearn.linear_model.Ridge(
 58 |                 alpha=backend.to_numpy(alpha[ii]), max_iter=1000, tol=1e-6,
 59 |                 fit_intercept=False)
 60 |             model.fit(backend.to_numpy(X), backend.to_numpy(Y[:, ii]))
 61 |             prediction_sklearn = model.predict(backend.to_numpy(X))
 62 |             assert_array_almost_equal(prediction, prediction_sklearn,
 63 |                                       decimal=6)
 64 | 
 65 |             assert_array_almost_equal(model.coef_, b2[:, ii], decimal=5)
 66 | 
 67 | 
 68 | @pytest.mark.parametrize('solver_name', RIDGE_SOLVERS)
 69 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 70 | def test_solve_kernel_ridge_intercept(solver_name, backend):
 71 |     backend = set_backend(backend)
 72 | 
 73 |     X, Y, weights = _create_dataset(backend)
 74 |     Y += 100
 75 |     X += 10
 76 |     alphas = backend.asarray_like(backend.logspace(-2, 5, 7), Y)
 77 | 
 78 |     solver = RIDGE_SOLVERS[solver_name]
 79 | 
 80 |     for alpha in alphas:
 81 |         alpha = backend.full_like(Y, fill_value=alpha, shape=Y.shape[1])
 82 |         b2, i2 = solver(X, Y, alpha=alpha, fit_intercept=True)
 83 |         assert b2.shape == (X.shape[1], Y.shape[1])
 84 |         assert i2.shape == (Y.shape[1], )
 85 |         b2 = backend.to_gpu(b2)
 86 |         i2 = backend.to_gpu(i2)
 87 | 
 88 |         n_features, n_targets = weights.shape
 89 |         for ii in range(n_targets):
 90 | 
 91 |             # compare predictions with sklearn.linear_model.Ridge
 92 |             prediction = backend.matmul(X, b2[:, ii]) + i2[ii]
 93 |             model = sklearn.linear_model.Ridge(
 94 |                 alpha=backend.to_numpy(alpha[ii]), max_iter=1000, tol=1e-6,
 95 |                 fit_intercept=True)
 96 |             model.fit(backend.to_numpy(X), backend.to_numpy(Y[:, ii]))
 97 |             prediction_sklearn = model.predict(backend.to_numpy(X))
 98 |             assert_array_almost_equal(prediction, prediction_sklearn,
 99 |                                       decimal=5)
100 | 
101 |             assert_array_almost_equal(model.coef_, b2[:, ii], decimal=5)
102 | 
103 | 
104 | @pytest.mark.parametrize('solver_name', RIDGE_SOLVERS)
105 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
106 | def test_warning_kernel_ridge_ridge(solver_name, backend):
107 |     backend = set_backend(backend)
108 |     X, Y, weights = _create_dataset(backend)
109 |     solver = RIDGE_SOLVERS[solver_name]
110 | 
111 |     with pytest.warns(UserWarning,
112 |                       match="ridge is slower than solving kernel"):
113 |         solver(X[:4], Y[:4])
114 | 
115 | 
116 | @pytest.mark.parametrize('solver_name', RIDGE_SOLVERS)
117 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
118 | def test_different_number_of_samples(solver_name, backend):
119 |     backend = set_backend(backend)
120 |     X, Y, weights = _create_dataset(backend)
121 |     solver = RIDGE_SOLVERS[solver_name]
122 | 
123 |     with pytest.raises(ValueError, match="same number of samples"):
124 |         solver(X[:4], Y[:3])
125 | 


--------------------------------------------------------------------------------
/doc/api.rst:
--------------------------------------------------------------------------------
  1 | .. _api_documentation:
  2 | 
  3 | ===
  4 | API
  5 | ===
  6 | 
  7 | List of functions and classes in Himalaya.
  8 | 
  9 | Backend
 10 | =======
 11 | 
 12 | Public functions in ``himalaya.backend``.
 13 | 
 14 | .. currentmodule:: himalaya.backend
 15 | 
 16 | .. autosummary::
 17 |    :toctree: _generated/
 18 |    :nosignatures:
 19 |    :template: function.rst
 20 | 
 21 |    set_backend
 22 |    get_backend
 23 |    ALL_BACKENDS
 24 | 
 25 | |
 26 | 
 27 | _____
 28 | 
 29 | Kernel ridge
 30 | ============
 31 | 
 32 | Public functions and classes in ``himalaya.kernel_ridge``.
 33 | 
 34 | .. currentmodule:: himalaya.kernel_ridge
 35 | 
 36 | Estimators
 37 | ----------
 38 | Estimators compatible with the ``scikit-learn`` API.
 39 | 
 40 | .. autosummary::
 41 |    :toctree: _generated/
 42 |    :nosignatures:
 43 | 
 44 |    :template: class.rst
 45 |    KernelRidge
 46 |    KernelRidgeCV
 47 |    WeightedKernelRidge
 48 |    MultipleKernelRidgeCV
 49 | 
 50 |    Kernelizer
 51 |    ColumnKernelizer
 52 |    :template: function.rst
 53 |    make_column_kernelizer
 54 | 
 55 | 
 56 | 
 57 | Solver functions
 58 | ----------------
 59 | .. autosummary::
 60 |    :toctree: _generated/
 61 |    :nosignatures:
 62 |    :template: function.rst
 63 | 
 64 |    KERNEL_RIDGE_SOLVERS
 65 |    solve_kernel_ridge_cv_eigenvalues
 66 |    solve_kernel_ridge_eigenvalues
 67 |    solve_kernel_ridge_gradient_descent
 68 |    solve_kernel_ridge_conjugate_gradient
 69 | 
 70 |    WEIGHTED_KERNEL_RIDGE_SOLVERS
 71 |    solve_weighted_kernel_ridge_gradient_descent
 72 |    solve_weighted_kernel_ridge_conjugate_gradient
 73 |    solve_weighted_kernel_ridge_neumann_series
 74 | 
 75 |    MULTIPLE_KERNEL_RIDGE_SOLVERS
 76 |    solve_multiple_kernel_ridge_hyper_gradient
 77 |    solve_multiple_kernel_ridge_random_search
 78 | 
 79 | 
 80 | 
 81 | Helpers
 82 | -------
 83 | .. autosummary::
 84 |    :toctree: _generated/
 85 |    :nosignatures:
 86 |    :template: function.rst
 87 | 
 88 |    generate_dirichlet_samples
 89 |    predict_weighted_kernel_ridge
 90 |    predict_and_score_weighted_kernel_ridge
 91 |    primal_weights_kernel_ridge
 92 |    primal_weights_weighted_kernel_ridge
 93 | 
 94 | 
 95 | 
 96 | Kernels
 97 | -------
 98 | .. autosummary::
 99 |    :toctree: _generated/
100 |    :nosignatures:
101 |    :template: function.rst
102 | 
103 |    PAIRWISE_KERNEL_FUNCTIONS
104 |    linear_kernel
105 |    polynomial_kernel
106 |    rbf_kernel
107 |    sigmoid_kernel
108 |    cosine_similarity_kernel
109 | 
110 |    :template: class.rst
111 |    KernelCenterer
112 | 
113 | |
114 | 
115 | _____
116 | 
117 | 
118 | Lasso
119 | =====
120 | 
121 | Public functions and classes in ``himalaya.lasso``.
122 | 
123 | .. currentmodule:: himalaya.lasso
124 | 
125 | Estimators
126 | ----------
127 | Estimators compatible with the ``scikit-learn`` API.
128 | 
129 | .. autosummary::
130 |    :toctree: _generated/
131 |    :nosignatures:
132 | 
133 |    :template: class.rst
134 |    SparseGroupLassoCV
135 | 
136 | Solver functions
137 | ----------------
138 | .. autosummary::
139 |    :toctree: _generated/
140 |    :nosignatures:
141 |    :template: function.rst
142 | 
143 |    solve_sparse_group_lasso
144 |    solve_sparse_group_lasso_cv
145 | 
146 | 
147 | |
148 | 
149 | _____
150 | 
151 | Ridge
152 | =====
153 | 
154 | Public functions and classes in ``himalaya.ridge``.
155 | 
156 | .. currentmodule:: himalaya.ridge
157 | 
158 | Estimators
159 | ----------
160 | Estimators compatible with the ``scikit-learn`` API.
161 | 
162 | .. autosummary::
163 |    :toctree: _generated/
164 |    :nosignatures:
165 | 
166 |    :template: class.rst
167 |    Ridge
168 |    RidgeCV
169 |    GroupRidgeCV
170 |    BandedRidgeCV
171 | 
172 |    ColumnTransformerNoStack
173 |    :template: function.rst
174 |    make_column_transformer_no_stack
175 | 
176 | Solver functions
177 | ----------------
178 | .. autosummary::
179 |    :toctree: _generated/
180 |    :nosignatures:
181 |    :template: function.rst
182 | 
183 |    RIDGE_SOLVERS
184 |    solve_ridge_svd
185 |    solve_ridge_cv_svd
186 |    GROUP_RIDGE_SOLVERS
187 |    BANDED_RIDGE_SOLVERS
188 |    solve_group_ridge_random_search
189 |    solve_banded_ridge_random_search
190 | 
191 | 
192 | |
193 | 
194 | _____
195 | 
196 | 
197 | Other modules
198 | =============
199 | 
200 | Public functions and classes in other minor modules.
201 | 
202 | .. currentmodule:: himalaya
203 | 
204 | Progress bar
205 | ------------
206 | .. autosummary::
207 |    :toctree: _generated/
208 |    :nosignatures:
209 | 
210 |    :template: class.rst
211 |    progress_bar.ProgressBar
212 |    :template: function.rst
213 |    progress_bar.bar
214 | 
215 | 
216 | Scoring functions
217 | -----------------
218 | .. autosummary::
219 |    :toctree: _generated/
220 |    :nosignatures:
221 |    :template: function.rst
222 | 
223 |    scoring.l2_neg_loss
224 |    scoring.r2_score
225 |    scoring.correlation_score
226 |    scoring.r2_score_split
227 |    scoring.r2_score_split_svd
228 |    scoring.correlation_score_split
229 | 
230 | 
231 | Utils
232 | -----
233 | .. autosummary::
234 |    :toctree: _generated/
235 |    :nosignatures:
236 |    :template: function.rst
237 | 
238 |    utils.compute_lipschitz_constants
239 |    utils.generate_multikernel_dataset
240 | 
241 | 
242 | Visualization
243 | -------------
244 | .. autosummary::
245 |    :toctree: _generated/
246 |    :nosignatures:
247 |    :template: function.rst
248 | 
249 |    viz.plot_alphas_diagnostic
250 | 


--------------------------------------------------------------------------------
/examples/multiple_kernel_ridge/plot_mkr_4_refit_from_deltas.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Multiple-kernel ridge fit from fixed hyper-parameters
  3 | =====================================================
  4 | This example demonstrates how to fit a multiple-kernel ridge model with fixed
  5 | hyper-parameters. Here are three different usecases:
  6 | 
  7 | - If the kernel weights hyper-parameters are known and identical across
  8 |   targets, the kernels can be scaled and summed, and a simple KernelRidgeCV can
  9 |   be used to fit the model.
 10 | - If the kernel weights hyper-parameters are unknown and different across
 11 |   targets, a MultipleKernelRidgeCV can be use to search the best
 12 |   hyper-parameters per target.
 13 | - If the kernel weights hyper-parameters are known and different across
 14 |   targets, a WeightedKernelRidge model can be used to fit the ridge models on
 15 |   each target independently.
 16 | 
 17 | This method can be used for example in the following workflow:
 18 | 
 19 | - fit a MultipleKernelRidgeCV to learn the kernel weights hyper-parameter,
 20 | - save the hyper-parameters, but not the ridge weights to save disk space,
 21 | - fit a WeightedKernelRidge from the saved hyper-parameters, for further use of
 22 |   the model (prediction, interpretation, etc.).
 23 | """
 24 | import numpy as np
 25 | 
 26 | from himalaya.backend import set_backend
 27 | from himalaya.kernel_ridge import WeightedKernelRidge
 28 | from himalaya.kernel_ridge import Kernelizer
 29 | from himalaya.kernel_ridge import ColumnKernelizer
 30 | from himalaya.utils import generate_multikernel_dataset
 31 | 
 32 | from sklearn.pipeline import make_pipeline
 33 | from sklearn import set_config
 34 | set_config(display='diagram')
 35 | 
 36 | ###############################################################################
 37 | # In this example, we use the ``torch_cuda`` backend (GPU).
 38 | 
 39 | backend = set_backend("torch_cuda", on_error="warn")
 40 | 
 41 | ###############################################################################
 42 | # Generate a random dataset
 43 | # -------------------------
 44 | # - X_train : array of shape (n_samples_train, n_features)
 45 | # - X_test : array of shape (n_samples_test, n_features)
 46 | # - Y_train : array of shape (n_samples_train, n_targets)
 47 | # - Y_test : array of shape (n_samples_test, n_targets)
 48 | 
 49 | (X_train, X_test, Y_train, Y_test, kernel_weights,
 50 |  n_features_list) = generate_multikernel_dataset(n_kernels=4, n_targets=500,
 51 |                                                  n_samples_train=1000,
 52 |                                                  n_samples_test=400,
 53 |                                                  random_state=42)
 54 | 
 55 | ###############################################################################
 56 | # Prepare the pipeline
 57 | # --------------------
 58 | 
 59 | # Find the start and end of each feature space X in Xs
 60 | start_and_end = np.concatenate([[0], np.cumsum(n_features_list)])
 61 | slices = [
 62 |     slice(start, end)
 63 |     for start, end in zip(start_and_end[:-1], start_and_end[1:])
 64 | ]
 65 | 
 66 | # Create a different ``Kernelizer`` for each feature space.
 67 | kernelizers = [("space %d" % ii, Kernelizer(), slice_)
 68 |                for ii, slice_ in enumerate(slices)]
 69 | column_kernelizer = ColumnKernelizer(kernelizers)
 70 | 
 71 | ###############################################################################
 72 | # Define the weighted kernel ridge model
 73 | # --------------------------------------
 74 | # Here we use the ground truth kernel weights for each target (deltas), but it
 75 | # can be typically used with deltas obtained from a MultipleKernelRidgeCV fit.
 76 | 
 77 | deltas = backend.log(backend.asarray(kernel_weights.T))
 78 | 
 79 | model_1 = WeightedKernelRidge(alpha=1, deltas=deltas, kernels="precomputed")
 80 | pipe_1 = make_pipeline(column_kernelizer, model_1)
 81 | 
 82 | # Fit the model on all targets
 83 | pipe_1.fit(X_train, Y_train)
 84 | 
 85 | ###############################################################################
 86 | # compute test score
 87 | test_scores_1 = pipe_1.score(X_test, Y_test)
 88 | test_scores_1 = backend.to_numpy(test_scores_1)
 89 | 
 90 | ###############################################################################
 91 | # We can compare this model to a baseline model where the kernel weights are
 92 | # all equal and not learnt.
 93 | 
 94 | model_2 = WeightedKernelRidge(alpha=1, deltas="zeros", kernels="precomputed")
 95 | pipe_2 = make_pipeline(column_kernelizer, model_2)
 96 | 
 97 | # Fit the model on all targets
 98 | pipe_2.fit(X_train, Y_train)
 99 | 
100 | ###############################################################################
101 | # compute test score
102 | test_scores_2 = pipe_2.score(X_test, Y_test)
103 | test_scores_2 = backend.to_numpy(test_scores_2)
104 | 
105 | ###############################################################################
106 | # Compare the predictions on a test set
107 | # -------------------------------------
108 | import matplotlib.pyplot as plt
109 | 
110 | plt.figure(figsize=(4, 3))
111 | plt.hist(test_scores_2, np.linspace(0, 1, 30), alpha=0.7,
112 |          label="Default deltas")
113 | plt.hist(test_scores_1, np.linspace(0, 1, 30), alpha=0.7,
114 |          label="Ground truth deltas")
115 | plt.xlabel("$R^2$ generalization score")
116 | plt.ylabel("Number of voxels")
117 | plt.legend()
118 | plt.tight_layout()
119 | plt.show()
120 | 


--------------------------------------------------------------------------------
/doc/changelog.rst:
--------------------------------------------------------------------------------
  1 | Changelog
  2 | =========
  3 | 
  4 | Development Version
  5 | -------------------
  6 | 
  7 | - 
  8 | 
  9 | Version 0.4.5
 10 | -------------
 11 | (*June 2024*)
 12 | 
 13 | - FIX update `~himalaya.kernel_ridge.ColumnKernelizer` for scikit-learn versions >= 1.5
 14 | 
 15 | Version 0.4.4
 16 | -------------
 17 | (*March 2024*)
 18 | 
 19 | - FIX cupy boolean dtype
 20 | 
 21 | Version 0.4.3
 22 | -------------
 23 | (*March 2024*)
 24 | 
 25 | - FIX update `~himalaya.kernel_ridge.ColumnKernelizer` for scikit-learn versions > 1.4
 26 | 
 27 | Version 0.4.2
 28 | -------------
 29 | (*February 2023*)
 30 | 
 31 | - ENH add better error message when ``torch.linalg.eigh`` fails.
 32 | - ENH add :func:`~himalaya.kernel_ridge.solve_kernel_ridge_cv_svd` solver. It
 33 |   can be used with:class:`~himalaya.kernel_ridge.KernelRidgeCV`
 34 |   ``(solver="svd")``.
 35 | 
 36 | Version 0.4.1
 37 | -------------
 38 | (*February 2023*)
 39 | 
 40 | - FIX avoid error in :class:`~himalaya.kernel_ridge.MultipleKernelRidgeCV`
 41 |   with ``solver_params(return_alphas=True)``.
 42 | - ENH add ``fit_intercept`` in
 43 |   :class:`~himalaya.kernel_ridge.MultipleKernelRidgeCV`.
 44 | - FIX torch 1.13.1 requires tensor masks to be on the same device as tensors.
 45 | 
 46 | Version 0.4.0
 47 | -------------
 48 | (*June 2022*)
 49 | 
 50 | - DOC explain how to implement a winner-take-all model.
 51 | - FIX comply with most recent scikit-learn's check_estimator.
 52 | - FIX avoid an indexing error in the hypergradient solver, when early stopping
 53 |   after different numbers of iterations for different batches.
 54 | 
 55 | Version 0.3.6
 56 | -------------
 57 | (*April 2022*)
 58 | 
 59 | - DOC improve documentation website, add estimator flowchart.
 60 | - TST improve test robustness.
 61 | - ENH add batching over targets in
 62 |   :func:`~himalaya.kernel_ridge.predict_weighted_kernel_ridge`.
 63 | - ENH add ``solver="auto"`` in :class:`~himalaya.kernel_ridge.KernelRidge`,
 64 |   which switches solver based on the presence of a separate alpha per target.
 65 | 
 66 | Version 0.3.5
 67 | -------------
 68 | (*February 2022*)
 69 | 
 70 | - MNT speed up examples on CPU, to build the doc faster on github actions.
 71 | - ENH add batching over targets in :class:`~himalaya.ridge.Ridge`,
 72 |   :class:`~himalaya.kernel_ridge.KernelRidge`, and
 73 |   :class:`~himalaya.kernel_ridge.WeightedKernelRidge`.
 74 | - ENH add warnings to guide the user between using
 75 |   :class:`~himalaya.ridge.Ridge` or
 76 |   :class:`~himalaya.kernel_ridge.KernelRidge`.
 77 | - ENH add user-friendly errors when the number of samples is inconsistent.
 78 | - ENH raise ValueError if the indices in cross-validation exceed number of
 79 |   samples.
 80 | 
 81 | Version 0.3.4
 82 | -------------
 83 | (*November 2021*)
 84 | 
 85 | - FIX :class:`~himalaya.ridge.Ridge` with ``n_samples < n_targets``.
 86 | - FIX update of alphas when ``local_alpha=False`` in
 87 |   :class:`~himalaya.kernel_ridge.MultipleKernelRidgeCV`.
 88 | - EXA refactor examples with new
 89 |   :func:`~himalaya.utils.generate_multikernel_dataset` function.
 90 | - MNT add github actions for running tests, building and publishing the doc,
 91 |   and publishing to PyPI.
 92 | 
 93 | Version 0.3.3
 94 | -------------
 95 | (*November 2021*)
 96 | 
 97 | - FIX :class:`~himalaya.kernel_ridge.KernelRidge` with
 98 |   ``n_samples < n_targets``.
 99 | - FIX random search with single alpha in
100 |   :class:`~himalaya.kernel_ridge.MultipleKernelRidgeCV`.
101 | 
102 | Version 0.3.2
103 | -------------
104 | (*November 2021*)
105 | 
106 | - ENH add :func:`~himalaya.scoring.r2_score_split_svd` scoring function.
107 | - ENH add :func:`~himalaya.scoring.correlation_score_split` scoring function.
108 | - ENH add ``split`` parameter to the ``score`` method in
109 |   :class:`~himalaya.kernel_ridge.WeightedKernelRidge`,
110 |   :class:`~himalaya.kernel_ridge.MultipleKernelRidgeCV`, and
111 |   :class:`~himalaya.ridge.GroupRidgeCV`.
112 | - ENH add ``force_cpu`` parameter in all estimators.
113 | - FIX remove deprecation warnings for cupy v9.
114 | - DOC mention that pytorch 1.9+ is preferred.
115 | 
116 | Version 0.3.1
117 | -------------
118 | (*September 2021*)
119 | 
120 | - MNT Rename :class:`~himalaya.ridge.BandedRidgeCV` into
121 |   :class:`~himalaya.ridge.GroupRidgeCV` (both names are available).
122 | - ENH improve robustness to noise in the cross-validation scores.
123 | - ENH start the random search with equal weights in
124 |   :class:`~himalaya.kernel_ridge.MultipleKernelRidgeCV`
125 |   and :class:`~himalaya.ridge.GroupRidgeCV`.
126 | - FIX remove deprecation warnings with pytorch 1.8.
127 | - TST improve test coverage.
128 | 
129 | Version 0.3.0
130 | -------------
131 | (*April 2021*)
132 | 
133 | - ENH add ``fit_intercept`` parameter in :class:`~himalaya.ridge.Ridge`,
134 |   :class:`~himalaya.ridge.RidgeCV`, and :class:`~himalaya.ridge.BandedRidgeCV`.
135 | - ENH add ``fit_intercept`` parameter in
136 |   :class:`~himalaya.kernel_ridge.KernelRidge`,
137 |   :class:`~himalaya.kernel_ridge.KernelRidgeCV`,
138 |   :func:`~himalaya.kernel_ridge.solve_multiple_kernel_ridge_gradient_descent`,
139 |   and :func:`~himalaya.kernel_ridge.solve_multiple_kernel_ridge_random_search`.
140 | - ENH add :class:`~himalaya.kernel_ridge.KernelCenterer`.
141 | - ENH allow change of backend midscript.
142 | - ENH Add option to return selected alpha values in
143 |   :func:`~himalaya.kernel_ridge.solve_multiple_kernel_ridge_random_search`.
144 | 
145 | Version 0.2.0
146 | -------------
147 | (*December 2020*)
148 | 
149 | Version 0.1.0
150 | -------------
151 | (*March 2020*)
152 | 


--------------------------------------------------------------------------------
/himalaya/ridge/_solvers.py:
--------------------------------------------------------------------------------
  1 | import numbers
  2 | import warnings
  3 | 
  4 | from ..backend import get_backend
  5 | from ..utils import _batch_or_skip
  6 | 
  7 | 
  8 | def solve_ridge_svd(X, Y, alpha=1., method="svd", fit_intercept=False,
  9 |                     negative_eigenvalues="zeros", n_targets_batch=None,
 10 |                     warn=True):
 11 |     """Solve ridge regression using SVD decomposition.
 12 | 
 13 |     Solve the ridge regression::
 14 | 
 15 |         b* = argmin_B ||X @ b - Y||^2 + alpha ||b||^2
 16 | 
 17 |     Parameters
 18 |     ----------
 19 |     X : array of shape (n_samples, n_features)
 20 |         Input features.
 21 |     Y : array of shape (n_samples, n_targets)
 22 |         Target data.
 23 |     alpha : float, or array of shape (n_targets, )
 24 |         Regularization parameter.
 25 |     method : str in {"svd"}
 26 |         Method used to diagonalize the input feature matrix.
 27 |     fit_intercept : boolean
 28 |         Whether to fit an intercept.
 29 |         If False, X and Y must be zero-mean over samples.
 30 |     negative_eigenvalues : str in {"nan", "error", "zeros"}
 31 |         If the decomposition leads to negative eigenvalues (wrongly emerging
 32 |         from float32 errors):
 33 |         - "error" raises an error.
 34 |         - "zeros" replaces them with zeros.
 35 |         - "nan" returns nans if the regularization does not compensate
 36 |         twice the smallest negative value, else it ignores the problem.
 37 |     n_targets_batch : int or None
 38 |         Size of the batch for over targets during cross-validation.
 39 |         Used for memory reasons. If None, uses all n_targets at once.
 40 |     warn : bool
 41 |         If True, warn if the number of samples is smaller than the number of
 42 |         features.
 43 | 
 44 |     Returns
 45 |     -------
 46 |     weights : array of shape (n_features, n_targets)
 47 |         Ridge coefficients.
 48 |     intercept : array of shape (n_targets,)
 49 |         Intercept. Only returned when fit_intercept is True.
 50 |     """
 51 |     backend = get_backend()
 52 |     if isinstance(alpha, numbers.Number) or alpha.ndim == 0:
 53 |         alpha = backend.ones_like(Y, shape=(1, )) * alpha
 54 | 
 55 |     X, Y, alpha = backend.check_arrays(X, Y, alpha)
 56 | 
 57 |     n_samples, n_features = X.shape
 58 |     if n_samples < n_features and warn:
 59 |         warnings.warn(
 60 |             "Solving ridge is slower than solving kernel ridge when n_samples "
 61 |             f"< n_features (here {n_samples} < {n_features}). "
 62 |             "Using a linear kernel in himalaya.kernel_ridge.KernelRidge or "
 63 |             "himalaya.kernel_ridge.solve_kernel_ridge_eigenvalues would be "
 64 |             "faster. Use warn=False to silence this warning.", UserWarning)
 65 |     if X.shape[0] != Y.shape[0]:
 66 |         raise ValueError("X and Y must have the same number of samples.")
 67 | 
 68 |     X_offset, Y_offset = None, None
 69 |     if fit_intercept:
 70 |         X_offset = X.mean(0)
 71 |         Y_offset = Y.mean(0)
 72 |         X = X - X_offset
 73 |         Y = Y - Y_offset
 74 | 
 75 |     if method == "svd":
 76 |         # SVD: X = U @ np.diag(eigenvalues) @ Vt
 77 |         U, eigenvalues, Vt = backend.svd(X, full_matrices=False)
 78 |     else:
 79 |         raise ValueError("Unknown method=%r." % (method, ))
 80 | 
 81 |     inverse = eigenvalues[:, None] / (alpha[None] + eigenvalues[:, None] ** 2)
 82 | 
 83 |     # negative eigenvalues can emerge from incorrect kernels, or from float32
 84 |     if eigenvalues[0] < 0:
 85 |         if negative_eigenvalues == "nan":
 86 |             if alpha < -eigenvalues[0] * 2:
 87 |                 return backend.ones_like(Y) * backend.asarray(
 88 |                     backend.nan, dtype=Y.dtype)
 89 |             else:
 90 |                 pass
 91 | 
 92 |         elif negative_eigenvalues == "zeros":
 93 |             eigenvalues[eigenvalues < 0] = 0
 94 | 
 95 |         elif negative_eigenvalues == "error":
 96 |             raise RuntimeError(
 97 |                 "Negative eigenvalues. Make sure the kernel is positive "
 98 |                 "semi-definite, increase the regularization alpha, or use"
 99 |                 "another solver.")
100 |         else:
101 |             raise ValueError("Unknown negative_eigenvalues=%r." %
102 |                              (negative_eigenvalues, ))
103 | 
104 |     n_samples, n_features = X.shape
105 |     n_samples, n_targets = Y.shape
106 |     weights = backend.zeros_like(X, shape=(n_features, n_targets),
107 |                                  device="cpu")
108 |     if n_targets_batch is None:
109 |         n_targets_batch = n_targets
110 | 
111 |     for start in range(0, n_targets, n_targets_batch):
112 |         batch = slice(start, start + n_targets_batch)
113 | 
114 |         iUT = _batch_or_skip(inverse, batch, 1)[:, None, :] * U.T[:, :, None]
115 |         iUT = backend.transpose(iUT, (2, 0, 1))
116 |         # iUT.shape = (1 or n_targets_batch, n_samples, n_samples)
117 | 
118 |         if Y.shape[0] < Y.shape[1]:
119 |             weights_batch = ((Vt.T @ iUT) @ Y.T[batch, :, None])[:, :, 0].T
120 |         else:
121 |             weights_batch = Vt.T @ (iUT @ Y.T[batch, :, None])[:, :, 0].T
122 |         weights[:, batch] = backend.to_cpu(weights_batch)
123 | 
124 |     if fit_intercept:
125 |         intercept = backend.to_cpu(
126 |             Y_offset) - backend.to_cpu(X_offset) @ weights
127 |         return weights, intercept
128 |     else:
129 |         return weights
130 | 
131 | 
132 | #: Dictionary with all ridge solvers
133 | RIDGE_SOLVERS = {"svd": solve_ridge_svd}
134 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | Himalaya: Multiple-target linear models
  2 | =======================================
  3 | 
  4 | |Github| |Python| |License| |Build| |Codecov| |Downloads|
  5 | 
  6 | ``Himalaya`` [1]_ implements machine learning linear models in Python, focusing
  7 | on computational efficiency for large numbers of targets.
  8 | 
  9 | Use ``himalaya`` if you need a library that:
 10 | 
 11 | - estimates linear models on large numbers of targets,
 12 | - runs on CPU and GPU hardware,
 13 | - provides estimators compatible with ``scikit-learn``'s API.
 14 | 
 15 | ``Himalaya`` is stable (with particular care for backward compatibility) and
 16 | open for public use (give it a star!).
 17 | 
 18 | Example
 19 | =======
 20 | 
 21 | .. code-block:: python
 22 | 
 23 |     import numpy as np
 24 |     n_samples, n_features, n_targets = 10, 5, 4
 25 |     np.random.seed(0)
 26 |     X = np.random.randn(n_samples, n_features)
 27 |     Y = np.random.randn(n_samples, n_targets)
 28 | 
 29 |     from himalaya.ridge import RidgeCV
 30 |     model = RidgeCV(alphas=[1, 10, 100])
 31 |     model.fit(X, Y)
 32 |     print(model.best_alphas_)  # [ 10. 100.  10. 100.]
 33 | 
 34 | 
 35 | - The model ``RidgeCV`` uses the same API as ``scikit-learn``
 36 |   estimators, with methods such as ``fit``, ``predict``, ``score``, etc.
 37 | - The model is able to efficiently fit a large number of targets (routinely
 38 |   used with 100k targets).
 39 | - The model selects the best hyperparameter ``alpha`` for each target
 40 |   independently.
 41 | 
 42 | More examples
 43 | -------------
 44 | 
 45 | Check more examples of use of ``himalaya`` in the `gallery of examples
 46 | <https://gallantlab.github.io/himalaya/_auto_examples/index.html>`_.
 47 | 
 48 | Tutorials using ``himalaya`` for fMRI
 49 | -------------------------------------
 50 | 
 51 | ``Himalaya`` was designed primarily for functional magnetic resonance imaging
 52 | (fMRI) encoding models. In depth tutorials about using ``himalaya`` for fMRI
 53 | encoding models can be found at `gallantlab/voxelwise_tutorials
 54 | <https://github.com/gallantlab/voxelwise_tutorials>`_.
 55 | 
 56 | Models
 57 | ======
 58 | 
 59 | ``Himalaya`` implements the following models:
 60 | 
 61 | - Ridge, RidgeCV
 62 | - KernelRidge, KernelRidgeCV
 63 | - GroupRidgeCV, MultipleKernelRidgeCV, WeightedKernelRidge
 64 | - SparseGroupLassoCV
 65 | 
 66 | 
 67 | See the `model descriptions
 68 | <https://gallantlab.github.io/himalaya/models.html>`_ in the documentation
 69 | website.
 70 | 
 71 | Himalaya backends
 72 | =================
 73 | 
 74 | ``Himalaya`` can be used seamlessly with different backends.
 75 | The available backends are ``numpy`` (default), ``cupy``, ``torch``, and
 76 | ``torch_cuda``.
 77 | To change the backend, call:
 78 | 
 79 | .. code-block:: python
 80 | 
 81 |     from himalaya.backend import set_backend
 82 |     backend = set_backend("torch")
 83 | 
 84 | 
 85 | and give ``torch`` arrays inputs to the ``himalaya`` solvers. For convenience,
 86 | estimators implementing ``scikit-learn``'s API can cast arrays to the correct
 87 | input type.
 88 | 
 89 | GPU acceleration
 90 | ----------------
 91 | 
 92 | To run ``himalaya`` on a graphics processing unit (GPU), you can use either
 93 | the ``cupy`` or the ``torch_cuda`` backend:
 94 | 
 95 | .. code-block:: python
 96 | 
 97 |     from himalaya.backend import set_backend
 98 |     backend = set_backend("cupy")  # or "torch_cuda"
 99 | 
100 |     data = backend.asarray(data)
101 | 
102 | 
103 | Installation
104 | ============
105 | 
106 | Dependencies
107 | ------------
108 | 
109 | - Python 3
110 | - Numpy
111 | - Scikit-learn
112 | 
113 | Optional (GPU backends):
114 | 
115 | - PyTorch (1.9+ preferred)
116 | - Cupy
117 | 
118 | 
119 | Standard installation
120 | ---------------------
121 | You may install the latest version of ``himalaya`` using the package manager
122 | ``pip``, which will automatically download ``himalaya`` from the Python Package
123 | Index (PyPI):
124 | 
125 | .. code-block:: bash
126 | 
127 |     pip install himalaya
128 | 
129 | 
130 | Installation from source
131 | ------------------------
132 | 
133 | To install ``himalaya`` from the latest source (``main`` branch), you may
134 | call:
135 | 
136 | .. code-block:: bash
137 | 
138 |     pip install git+https://github.com/gallantlab/himalaya.git
139 | 
140 | 
141 | Developers can also install ``himalaya`` in editable mode via:
142 | 
143 | .. code-block:: bash
144 | 
145 |     git clone https://github.com/gallantlab/himalaya
146 |     cd himalaya
147 |     pip install --editable .
148 | 
149 | 
150 | .. |Github| image:: https://img.shields.io/badge/github-himalaya-blue
151 |    :target: https://github.com/gallantlab/himalaya
152 | 
153 | .. |Python| image:: https://img.shields.io/badge/python-3.7%2B-blue
154 |    :target: https://www.python.org/downloads/release/python-370
155 | 
156 | .. |License| image:: https://img.shields.io/badge/License-BSD%203--Clause-blue.svg
157 |    :target: https://opensource.org/licenses/BSD-3-Clause
158 | 
159 | .. |Build| image:: https://github.com/gallantlab/himalaya/actions/workflows/run_tests.yml/badge.svg
160 |    :target: https://github.com/gallantlab/himalaya/actions/workflows/run_tests.yml
161 | 
162 | .. |Codecov| image:: https://codecov.io/gh/gallantlab/himalaya/branch/main/graph/badge.svg?token=ECzjd9gvrw
163 |    :target: https://codecov.io/gh/gallantlab/himalaya
164 | 
165 | .. |Downloads| image:: https://pepy.tech/badge/himalaya
166 |    :target: https://pepy.tech/project/himalaya
167 | 
168 | 
169 | Cite this package
170 | =================
171 | 
172 | If you use ``himalaya`` in your work, please give it a star, and cite our
173 | publication:
174 | 
175 | .. [1] Dupré La Tour, T., Eickenberg, M., Nunez-Elizalde, A.O., & Gallant, J. L. (2022).
176 |    Feature-space selection with banded ridge regression. `NeuroImage <https://doi.org/10.1016/j.neuroimage.2022.119728>`_.
177 |   
178 | 


--------------------------------------------------------------------------------
/himalaya/progress_bar.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | 
  4 | 
  5 | def bar(iterable, title='', use_it=True):
  6 |     """Simple API for progress_bar.
  7 | 
  8 |     Parameters
  9 |     ----------
 10 |     iterable : iterable
 11 |         Iterable that will make the progress bar progress.
 12 |     title : str
 13 |         Message to include at end of progress bar.
 14 |     use_it : bool
 15 |         If False, return the iterable unchanged, and does not show a progress
 16 |         bar.
 17 | 
 18 |     Examples
 19 |     --------
 20 |     >>> import time
 21 |     >>> from himalaya.progress_bar import bar
 22 |     >>> for ii in bar(range(10)):
 23 |     >>>     time.sleep(0.5)
 24 |     """
 25 |     if use_it:
 26 |         return ProgressBar(title=title, max_value=len(iterable))(iterable)
 27 |     else:
 28 |         return iterable
 29 | 
 30 | 
 31 | class ProgressBar():
 32 |     """Generate a command-line progress bar.
 33 | 
 34 |     Parameters
 35 |     ----------
 36 |     max_value : int
 37 |         Maximum value of process (e.g. number of samples to process, bytes to
 38 |         download, etc.).
 39 |     initial_value : int
 40 |         Initial value of process, useful when resuming process from a specific
 41 |         value, defaults to 0.
 42 |     title : str
 43 |         Message to include at end of progress bar.
 44 |     max_chars : int
 45 |         Number of characters to use for progress bar (be sure to save some room
 46 |         for the message and % complete as well).
 47 |     progress_character : char
 48 |         Character in the progress bar that indicates the portion completed.
 49 |     spinner : bool
 50 |         Show a spinner.  Useful for long-running processes that may not
 51 |         increment the progress bar very often.  This provides the user with
 52 |         feedback that the progress has not stalled.
 53 | 
 54 |     Examples
 55 |     --------
 56 |     >>> import time
 57 |     >>> from himalaya.progress_bar import ProgressBar
 58 |     >>> for ii in ProgressBar(title="La barre", max_value=10)(range(10)):
 59 |     >>>     time.sleep(0.5)
 60 |     """
 61 | 
 62 |     spinner_symbols = ['|', '/', '-', '\\']
 63 |     template = '\r[{0}{1}] {2:0.0f}% {3} {4:.02f} sec | {5} | '
 64 | 
 65 |     def __init__(self, title='', max_value=None, initial_value=0, max_chars=40,
 66 |                  progress_character='.', spinner=False, verbose_bool=True):
 67 |         self.cur_value = initial_value
 68 |         self.max_value = max_value
 69 |         self.title = title
 70 |         self.max_chars = max_chars
 71 |         self.progress_character = progress_character
 72 |         self.spinner = spinner
 73 |         self.spinner_index = 0
 74 |         self.n_spinner = len(self.spinner_symbols)
 75 |         self._do_print = verbose_bool
 76 |         self.start = time.time()
 77 | 
 78 |         self.closed = False
 79 |         self.update(initial_value)
 80 | 
 81 |     def update(self, cur_value, title=None):
 82 |         """Update progressbar with current value of process.
 83 | 
 84 |         Parameters
 85 |         ----------
 86 |         cur_value : number
 87 |             Current value of process.  Should be <= max_value (but this is not
 88 |             enforced).  The percent of the progressbar will be computed as
 89 |             (cur_value / max_value) * 100
 90 |         title : str
 91 |             Message to display to the right of the progressbar.  If None, the
 92 |             last message provided will be used.  To clear the current message,
 93 |             pass a null string, ''.
 94 |         """
 95 |         # Ensure floating-point division so we can get fractions of a percent
 96 |         # for the progressbar.
 97 |         self.cur_value = cur_value
 98 |         max_value = self.max_value or 1
 99 |         progress = min(float(self.cur_value) / max_value, 1.)
100 |         num_chars = int(progress * self.max_chars)
101 |         num_left = self.max_chars - num_chars
102 | 
103 |         # Update the message
104 |         if title is not None:
105 |             self.title = title
106 | 
107 |         # time from start
108 |         duration = time.time() - self.start
109 | 
110 |         # The \r tells the cursor to return to the beginning of the line rather
111 |         # than starting a new line.  This allows us to have a progressbar-style
112 |         # display in the console window.
113 |         bar = self.template.format(self.progress_character * num_chars,
114 |                                    ' ' * num_left, progress * 100,
115 |                                    self.spinner_symbols[self.spinner_index],
116 |                                    duration, self.title)
117 |         # Force a flush because sometimes when using bash scripts and pipes,
118 |         # the output is not printed until after the program exits.
119 |         if self._do_print:
120 |             sys.stdout.write(bar)
121 |             sys.stdout.flush()
122 |         # Increment the spinner
123 |         if self.spinner:
124 |             self.spinner_index = (self.spinner_index + 1) % self.n_spinner
125 | 
126 |         if progress == 1:
127 |             self.close()
128 | 
129 |     def update_with_increment_value(self, increment_value, title=None):
130 |         """Update progressbar with the value of the increment instead of the
131 |         current value of process as in update().
132 | 
133 |         Parameters
134 |         ----------
135 |         increment_value : int
136 |             Value of the increment of process.  The percent of the progressbar
137 |             will be computed as
138 |             (self.cur_value + increment_value / max_value) * 100
139 |         title : str
140 |             Message to display to the right of the progressbar.  If None, the
141 |             last message provided will be used.  To clear the current message,
142 |             pass a null string, ''.
143 |         """
144 |         self.cur_value += increment_value
145 |         self.update(self.cur_value, title)
146 | 
147 |     def close(self):
148 |         """Close the progress bar."""
149 |         if not self.closed:
150 |             sys.stdout.write('\n')
151 |             sys.stdout.flush()
152 |             self.closed = True
153 | 
154 |     def __call__(self, sequence):
155 |         sequence = iter(sequence)
156 |         while True:
157 |             try:
158 |                 yield next(sequence)
159 |                 self.update_with_increment_value(1)
160 |             except StopIteration:
161 |                 return
162 | 


--------------------------------------------------------------------------------
/himalaya/lasso/tests/test_group_lasso.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | import sklearn.linear_model
  4 | 
  5 | from himalaya.backend import set_backend
  6 | from himalaya.backend import ALL_BACKENDS
  7 | from himalaya.utils import assert_array_almost_equal
  8 | 
  9 | from himalaya.lasso import solve_sparse_group_lasso
 10 | from himalaya.lasso import solve_sparse_group_lasso_cv
 11 | 
 12 | 
 13 | def _create_dataset(backend):
 14 |     n_samples, n_features, n_targets = 10, 5, 3
 15 | 
 16 |     X = backend.asarray(backend.randn(n_samples, n_features), backend.float64)
 17 |     Y = backend.asarray(backend.randn(n_samples, n_targets), backend.float64)
 18 | 
 19 |     return X, Y
 20 | 
 21 | 
 22 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 23 | def test_group_lasso_vs_ols(backend):
 24 |     backend = set_backend(backend)
 25 |     X, Y = _create_dataset(backend)
 26 | 
 27 |     coef = solve_sparse_group_lasso(X, Y, groups=None, l21_reg=0.0, l1_reg=0.0,
 28 |                                     max_iter=2000, tol=1e-8,
 29 |                                     progress_bar=False)
 30 | 
 31 |     ols = sklearn.linear_model.LinearRegression(fit_intercept=False).fit(
 32 |         backend.to_numpy(X), backend.to_numpy(Y))
 33 |     assert_array_almost_equal(coef, ols.coef_.T, decimal=4)
 34 | 
 35 | 
 36 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 37 | def test_group_lasso_decreasing(backend):
 38 |     backend = set_backend(backend)
 39 |     X, Y = _create_dataset(backend)
 40 | 
 41 |     coef, losses = solve_sparse_group_lasso(X, Y, max_iter=500, tol=1e-8,
 42 |                                             progress_bar=False, debug=True,
 43 |                                             momentum=False)
 44 | 
 45 |     assert backend.all(losses[1:] - losses[:-1] < 1e-14)
 46 | 
 47 | 
 48 | @pytest.mark.parametrize('n_targets_batch', [None, 2])
 49 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 50 | def test_group_lasso_vs_lasso(backend, n_targets_batch):
 51 |     backend = set_backend(backend)
 52 |     X, Y = _create_dataset(backend)
 53 | 
 54 |     for l1_reg in backend.logspace(-5, 5, 5):
 55 | 
 56 |         coef = solve_sparse_group_lasso(X, Y, groups=None, l21_reg=0.0,
 57 |                                         l1_reg=l1_reg, max_iter=1000, tol=1e-8,
 58 |                                         progress_bar=False, debug=False,
 59 |                                         momentum=False,
 60 |                                         n_targets_batch=n_targets_batch)
 61 | 
 62 |         ols = sklearn.linear_model.Lasso(fit_intercept=False,
 63 |                                          alpha=float(l1_reg),
 64 |                                          max_iter=1000,
 65 |                                          tol=1e-8).fit(backend.to_numpy(X),
 66 |                                                        backend.to_numpy(Y))
 67 |         assert_array_almost_equal(coef, ols.coef_.T, decimal=5)
 68 | 
 69 | 
 70 | @pytest.mark.parametrize('n_targets_batch', [None, 2])
 71 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 72 | def test_group_lasso_regularization_per_target(backend, n_targets_batch):
 73 |     backend = set_backend(backend)
 74 |     X, Y = _create_dataset(backend)
 75 | 
 76 |     n_targets = Y.shape[1]
 77 |     l21_reg = backend.rand(n_targets)
 78 |     l1_reg = backend.rand(n_targets)
 79 | 
 80 |     coef = solve_sparse_group_lasso(X, Y, groups=None, l21_reg=l21_reg,
 81 |                                     l1_reg=l1_reg, max_iter=1000, tol=1e-8,
 82 |                                     progress_bar=False, debug=False,
 83 |                                     momentum=False,
 84 |                                     n_targets_batch=n_targets_batch)
 85 | 
 86 |     for tt in range(n_targets):
 87 | 
 88 |         coef_tt = solve_sparse_group_lasso(X, Y[:, tt:tt + 1], groups=None,
 89 |                                            l21_reg=l21_reg[tt],
 90 |                                            l1_reg=l1_reg[tt], max_iter=1000,
 91 |                                            tol=1e-8, progress_bar=False,
 92 |                                            debug=False, momentum=False,
 93 |                                            n_targets_batch=n_targets_batch)
 94 |         assert_array_almost_equal(coef[:, tt:tt + 1], coef_tt)
 95 | 
 96 | 
 97 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 98 | def test_group_lasso_cv(backend):
 99 |     backend = set_backend(backend)
100 |     X, Y = _create_dataset(backend)
101 | 
102 |     n_targets = Y.shape[1]
103 |     l21_regs = backend.rand(2) / 10
104 |     l1_regs = backend.rand(3) / 10
105 | 
106 |     coef, best_l21_reg, best_l1_reg, all_cv_scores = \
107 |         solve_sparse_group_lasso_cv(
108 |             X, Y, cv=2, groups=None, l21_regs=l21_regs, l1_regs=l1_regs,
109 |             progress_bar=False, tol=1e-2, max_iter=100)
110 | 
111 |     assert best_l1_reg.shape == (n_targets, )
112 |     assert best_l21_reg.shape == (n_targets, )
113 |     assert coef.shape == (X.shape[1], n_targets)
114 | 
115 | 
116 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
117 | def test_group_lasso_group_sparsity(backend):
118 |     backend = set_backend(backend)
119 | 
120 |     # Set dataset parameters
121 |     import numpy as np
122 |     group_sizes = [np.random.randint(10, 20) for i in range(5)]
123 |     active_groups = [np.random.randint(2) for _ in group_sizes]
124 |     active_groups[1] = 1  # make sure we have at least one active...
125 |     active_groups[0] = 0  # ...and one inactive group
126 |     groups = np.concatenate([size * [i] for i, size in enumerate(group_sizes)])
127 |     n_features = sum(group_sizes)
128 |     n_samples = 10000
129 |     noise_std = 10
130 | 
131 |     # Generate data matrix
132 |     X = backend.randn(n_samples, n_features)
133 |     w = backend.concatenate([
134 |         backend.randn(group_size) * is_active
135 |         for group_size, is_active in zip(group_sizes, active_groups)
136 |     ]).reshape(-1, 1)
137 |     y = X @ w
138 |     y = y + backend.randn(*y.shape) * noise_std
139 | 
140 |     # Generate estimator and train it
141 |     coef = solve_sparse_group_lasso(X=X, Y=y, groups=groups, l21_reg=0.6,
142 |                                     l1_reg=0, max_iter=100, tol=1e-4,
143 |                                     progress_bar=False)
144 | 
145 |     # check the group sparsity of the result
146 |     for group, active in enumerate(active_groups):
147 |         if active:
148 |             assert backend.all(coef[groups == group] != 0)
149 |         else:
150 |             assert backend.all(coef[groups == group] == 0)
151 | 


--------------------------------------------------------------------------------
/himalaya/backend/numpy.py:
--------------------------------------------------------------------------------
  1 | """The "numpy" CPU backend, based on NumPy.
  2 | 
  3 | To use this backend, call ``himalaya.backend.set_backend("numpy")``.
  4 | """
  5 | import numpy as np
  6 | try:
  7 |     import scipy.linalg as linalg
  8 |     use_scipy = True
  9 | except ImportError:
 10 |     import numpy.linalg as linalg
 11 |     use_scipy = False
 12 | 
 13 | ###############################################################################
 14 | 
 15 | 
 16 | def apply_argmax(array, argmax, axis):
 17 |     """Apply precomputed argmax indices in multi dimension arrays
 18 | 
 19 |     array[np.argmax(array)] works fine in dimension 1, but not in higher ones.
 20 |     This function extends it to higher dimensions.
 21 | 
 22 |     Examples
 23 |     --------
 24 |     >>> import numpy as np
 25 |     >>> array = np.random.randn(10, 4, 8)
 26 |     >>> argmax = np.argmax(array, axis=1)
 27 |     >>> max_ = apply_argmax(array, argmax, axis=1)
 28 |     >>> assert np.all(max_ == np.max(array, axis=1))
 29 |     """
 30 |     argmax = np.expand_dims(argmax, axis=axis)
 31 |     max_ = np.take_along_axis(array, argmax, axis=axis)
 32 |     return np.take(max_, 0, axis=axis)
 33 | 
 34 | 
 35 | def std_float64(array, axis=None, demean=True, keepdims=False):
 36 |     """Compute the standard deviation of X with double precision,
 37 |     and cast back the result to original dtype.
 38 |     """
 39 |     return array.std(axis, dtype=np.float64,
 40 |                      keepdims=keepdims).astype(array.dtype, copy=False)
 41 | 
 42 | 
 43 | def mean_float64(array, axis=None, keepdims=False):
 44 |     """Compute the mean of X with double precision,
 45 |     and cast back the result to original dtype.
 46 |     """
 47 |     return array.mean(axis, dtype=np.float64,
 48 |                       keepdims=keepdims).astype(array.dtype, copy=False)
 49 | 
 50 | 
 51 | ###############################################################################
 52 | 
 53 | name = "numpy"
 54 | argmax = np.argmax
 55 | max = np.max
 56 | min = np.min
 57 | abs = np.abs
 58 | randn = np.random.randn
 59 | rand = np.random.rand
 60 | matmul = np.matmul
 61 | transpose = np.transpose
 62 | stack = np.stack
 63 | concatenate = np.concatenate
 64 | sum = np.sum
 65 | sqrt = np.sqrt
 66 | any = np.any
 67 | all = np.all
 68 | nan = np.nan
 69 | inf = np.inf
 70 | isnan = np.isnan
 71 | isinf = np.isinf
 72 | logspace = np.logspace
 73 | copy = np.copy
 74 | bool = np.bool_
 75 | float32 = np.float32
 76 | float64 = np.float64
 77 | int32 = np.int32
 78 | eigh = linalg.eigh
 79 | norm = linalg.norm
 80 | log = np.log
 81 | exp = np.exp
 82 | arange = np.arange
 83 | flatnonzero = np.flatnonzero
 84 | isin = np.isin
 85 | searchsorted = np.searchsorted
 86 | unique = np.unique
 87 | einsum = np.einsum
 88 | tanh = np.tanh
 89 | power = np.power
 90 | prod = np.prod
 91 | zeros = np.zeros
 92 | clip = np.clip
 93 | sign = np.sign
 94 | sort = np.sort
 95 | flip = np.flip
 96 | atleast_1d = np.atleast_1d
 97 | finfo = np.finfo
 98 | eye = np.eye
 99 | 
100 | 
101 | def diagonal_view(array, axis1=0, axis2=1):
102 |     """Return a view of the array diagonal"""
103 |     assert array.ndim >= 2
104 |     axis1, axis2 = min([axis1, axis2]), max([axis1, axis2])
105 |     shape = list(array.shape)
106 |     new = min([shape[axis1], shape[axis2]])
107 |     shape.pop(axis1)
108 |     shape.pop(axis2 - 1)
109 |     shape.append(new)
110 |     strides = list(array.strides)
111 |     new = strides[axis1] + strides[axis2]
112 |     strides.pop(axis1)
113 |     strides.pop(axis2 - 1)
114 |     strides.append(new)
115 |     diag = np.lib.stride_tricks.as_strided(array, shape=shape, strides=strides)
116 |     return diag
117 | 
118 | 
119 | def to_numpy(array):
120 |     return array
121 | 
122 | 
123 | def zeros_like(array, shape=None, dtype=None, device=None):
124 |     """Add a shape parameter in zeros_like."""
125 |     if shape is None:
126 |         shape = array.shape
127 |     if dtype is None:
128 |         dtype = array.dtype
129 |     return np.zeros(shape, dtype=dtype)
130 | 
131 | 
132 | def ones_like(array, shape=None, dtype=None, device=None):
133 |     """Add a shape parameter in ones_like."""
134 |     if shape is None:
135 |         shape = array.shape
136 |     if dtype is None:
137 |         dtype = array.dtype
138 |     return np.ones(shape, dtype=dtype)
139 | 
140 | 
141 | def full_like(array, fill_value, shape=None, dtype=None, device=None):
142 |     """Add a shape parameter in full_like."""
143 |     if shape is None:
144 |         shape = array.shape
145 |     if dtype is None:
146 |         dtype = array.dtype
147 |     return np.full(shape, fill_value, dtype=dtype)
148 | 
149 | 
150 | def to_cpu(array):
151 |     return array
152 | 
153 | 
154 | def to_gpu(array, device=None):
155 |     return array
156 | 
157 | 
158 | def is_in_gpu(array):
159 |     return False
160 | 
161 | 
162 | def asarray_like(x, ref):
163 |     return np.asarray(x, dtype=ref.dtype)
164 | 
165 | 
166 | def check_arrays(*all_inputs):
167 |     """Change all inputs into arrays (or list of arrays) using the same
168 |     precision as the first one. Some arrays can be None.
169 |     """
170 |     all_arrays = []
171 |     all_arrays.append(asarray(all_inputs[0]))
172 |     dtype = all_arrays[0].dtype
173 |     for tensor in all_inputs[1:]:
174 |         if tensor is None:
175 |             pass
176 |         elif isinstance(tensor, list):
177 |             tensor = [asarray(tt, dtype=dtype) for tt in tensor]
178 |         else:
179 |             tensor = asarray(tensor, dtype=dtype)
180 |         all_arrays.append(tensor)
181 |     return all_arrays
182 | 
183 | 
184 | def asarray(a, dtype=None, order=None, device=None):
185 |     # works from numpy, lists, torch, and others
186 |     try:
187 |         return np.asarray(a, dtype=dtype, order=order)
188 |     except Exception:
189 |         pass
190 |     # works from cupy
191 |     try:
192 |         import cupy
193 |         return np.asarray(cupy.asnumpy(a), dtype=dtype, order=order)
194 |     except Exception:
195 |         pass
196 |     # works from torch_cuda
197 |     try:
198 |         return np.asarray(a.cpu(), dtype=dtype, order=order)
199 |     except Exception:
200 |         pass
201 | 
202 |     return np.asarray(a, dtype=dtype, order=order)
203 | 
204 | 
205 | def svd(X, full_matrices=True):
206 |     if X.ndim == 2 or not use_scipy:
207 |         return linalg.svd(X, full_matrices=full_matrices)
208 | 
209 |     elif X.ndim == 3:
210 |         UsV_list = [linalg.svd(Xi, full_matrices=full_matrices) for Xi in X]
211 |         return map(np.stack, zip(*UsV_list))
212 |     else:
213 |         raise NotImplementedError()
214 | 


--------------------------------------------------------------------------------
/himalaya/tests/test_validation.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import sklearn
  3 | import pytest
  4 | 
  5 | from himalaya.backend import set_backend
  6 | from himalaya.backend import ALL_BACKENDS
  7 | from himalaya.validation import _assert_all_finite
  8 | from himalaya.validation import check_cv
  9 | from himalaya.validation import validate_data
 10 | 
 11 | 
 12 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 13 | def test_suppress_validation(backend):
 14 |     backend = set_backend(backend)
 15 |     X = backend.asarray([0, np.inf])
 16 |     with pytest.raises(ValueError):
 17 |         _assert_all_finite(X, True)
 18 |     sklearn.set_config(assume_finite=True)
 19 |     _assert_all_finite(X, True)
 20 |     sklearn.set_config(assume_finite=False)
 21 |     with pytest.raises(ValueError):
 22 |         _assert_all_finite(X, True)
 23 | 
 24 | 
 25 | def test_check_cv():
 26 |     cv = [([0, 1], [2]), ([0, 2], [1]), ([1, 2], [0])]
 27 | 
 28 |     # works because cv does not exceed y.shape[0]
 29 |     y = np.zeros(4)
 30 |     check_cv(cv, y)
 31 |     # fails because cv does exceed y.shape[0]
 32 |     with pytest.raises(ValueError, match="exceed number of samples"):
 33 |         y = np.zeros(2)
 34 |         check_cv(cv, y)
 35 | 
 36 | 
 37 | class DummyEstimator:
 38 |     """Dummy estimator for testing validate_data"""
 39 |     def __init__(self):
 40 |         pass
 41 | 
 42 | 
 43 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 44 | def test_validate_data_X_only(backend):
 45 |     backend = set_backend(backend)
 46 |     X = backend.asarray([[1, 2], [3, 4]])
 47 |     estimator = DummyEstimator()
 48 | 
 49 |     # Test reset=True (fit behavior)
 50 |     X_val = validate_data(estimator, X, reset=True, ndim=2)
 51 |     assert hasattr(estimator, 'n_features_in_')
 52 |     assert estimator.n_features_in_ == 2
 53 |     assert X_val.shape == (2, 2)
 54 | 
 55 |     # Test reset=False (predict behavior) - should work
 56 |     X_val2 = validate_data(estimator, X, reset=False, ndim=2)
 57 |     assert X_val2.shape == (2, 2)
 58 | 
 59 |     # Test reset=False with wrong number of features - should fail
 60 |     X_wrong = backend.asarray([[1, 2, 3], [4, 5, 6]])
 61 |     with pytest.raises(ValueError, match="X has 3 features.*expecting 2 features"):
 62 |         validate_data(estimator, X_wrong, reset=False, ndim=2)
 63 | 
 64 | 
 65 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 66 | def test_validate_data_X_and_y(backend):
 67 |     backend = set_backend(backend)
 68 |     X = backend.asarray([[1, 2], [3, 4]])
 69 |     y = backend.asarray([1, 0])
 70 |     estimator = DummyEstimator()
 71 | 
 72 |     # Test with both X and y - X gets ndim=2, y gets default [1,2]
 73 |     X_val, y_val = validate_data(estimator, X, y, reset=True, ndim=2)
 74 |     assert hasattr(estimator, 'n_features_in_')
 75 |     assert estimator.n_features_in_ == 2
 76 |     assert X_val.shape == (2, 2)
 77 |     assert y_val.shape == (2,)
 78 | 
 79 | 
 80 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 81 | def test_validate_data_no_validation(backend):
 82 |     backend = set_backend(backend)
 83 |     estimator = DummyEstimator()
 84 | 
 85 |     # Test X='no_validation' only
 86 |     result = validate_data(estimator, X='no_validation', reset=True)
 87 |     assert result == 'no_validation'
 88 | 
 89 |     # Test y='no_validation'
 90 |     X = backend.asarray([[1, 2], [3, 4]])
 91 |     X_val = validate_data(estimator, X, y='no_validation', reset=True, ndim=2)
 92 |     assert X_val.shape == (2, 2)
 93 |     assert hasattr(estimator, 'n_features_in_')
 94 | 
 95 |     # Test both 'no_validation'
 96 |     result = validate_data(estimator, X='no_validation', y='no_validation', reset=True)
 97 |     assert result == 'no_validation'
 98 | 
 99 | 
100 | def test_validate_data_error_without_n_features_in():
101 |     # Test that predict without prior fit doesn't crash
102 |     estimator = DummyEstimator()
103 |     X = np.array([[1, 2], [3, 4]])
104 | 
105 |     # Should work fine if estimator doesn't have n_features_in_ yet
106 |     X_val = validate_data(estimator, X, reset=False, ndim=2)
107 |     assert X_val.shape == (2, 2)
108 | 
109 | 
110 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
111 | def test_validate_data_3d_feature_axis(backend):
112 |     """Test feature axis handling for 3D arrays (precomputed kernels).
113 | 
114 |     This test ensures that validate_data correctly handles the feature dimension
115 |     for 3D precomputed kernel arrays where:
116 |     - During fit: shape (n_kernels, n_samples_train, n_samples_train)
117 |     - During predict: shape (n_kernels, n_samples_test, n_samples_train)
118 | 
119 |     The "feature" dimension (last axis) should be consistent between fit and predict.
120 |     """
121 |     backend = set_backend(backend)
122 |     estimator = DummyEstimator()
123 | 
124 |     # Simulate fit with 3D precomputed kernels: (n_kernels=2, n_train=600, n_train=600)
125 |     X_fit = backend.asarray(np.random.randn(2, 600, 600))
126 |     X_val_fit = validate_data(estimator, X_fit, reset=True, ndim=3)
127 | 
128 |     # Should store n_features_in_ as last axis (600)
129 |     assert hasattr(estimator, 'n_features_in_')
130 |     assert estimator.n_features_in_ == 600
131 |     assert X_val_fit.shape == (2, 600, 600)
132 | 
133 |     # Simulate predict with 3D kernels: (n_kernels=2, n_test=300, n_train=600)
134 |     # The middle axis changes (test samples) but last axis stays same (training samples)
135 |     X_predict = backend.asarray(np.random.randn(2, 300, 600))
136 |     X_val_predict = validate_data(estimator, X_predict, reset=False, ndim=3)
137 | 
138 |     # Should validate successfully - last axis (600) matches stored n_features_in_
139 |     assert X_val_predict.shape == (2, 300, 600)
140 | 
141 |     # Test failure case: wrong last dimension
142 |     X_wrong = backend.asarray(np.random.randn(2, 300, 500))  # wrong last dim
143 |     with pytest.raises(ValueError, match="X has 500 features.*expecting 600 features"):
144 |         validate_data(estimator, X_wrong, reset=False, ndim=3)
145 | 
146 |     # Test explicit feature_axis parameter
147 |     estimator2 = DummyEstimator()
148 | 
149 |     # Using feature_axis=1 (middle axis) for 3D - this was the old buggy behavior
150 |     validate_data(estimator2, X_fit, reset=True, ndim=3, feature_axis=1)
151 |     assert estimator2.n_features_in_ == 600  # middle axis
152 | 
153 |     # This should fail with the test data because middle axis is different (300 vs 600)
154 |     with pytest.raises(ValueError, match="X has 300 features.*expecting 600 features"):
155 |         validate_data(estimator2, X_predict, reset=False, ndim=3, feature_axis=1)
156 | 


--------------------------------------------------------------------------------
/himalaya/backend/cupy.py:
--------------------------------------------------------------------------------
  1 | """The "cupy" GPU backend, based on CuPy.
  2 | 
  3 | To use this backend, call ``himalaya.backend.set_backend("cupy")``.
  4 | """
  5 | try:
  6 |     import cupy
  7 | except ImportError as error:
  8 |     import sys
  9 |     if "pytest" in sys.modules:  # if run through pytest
 10 |         import pytest
 11 |         pytest.skip("Cupy not installed.")
 12 |     raise ImportError("Cupy not installed.") from error
 13 | 
 14 | from ._utils import warn_if_not_float32
 15 | 
 16 | ###############################################################################
 17 | 
 18 | 
 19 | def apply_argmax(array, argmax, axis):
 20 |     """Apply precomputed argmax indices in multi dimension arrays
 21 | 
 22 |     array[np.argmax(array)] works fine in dimension 1, but not in higher ones.
 23 |     This function extends it to higher dimensions.
 24 | 
 25 |     Examples
 26 |     --------
 27 |     >>> import cupy
 28 |     >>> array = cupy.random.randn(10, 4, 8)
 29 |     >>> argmax = cupy.argmax(array, axis=1)
 30 |     >>> max_ = apply_argmax(array, argmax, axis=1)
 31 |     >>> assert cupy.all(max_ == cupy.max(array, axis=1))
 32 |     """
 33 |     argmax = cupy.expand_dims(argmax, axis=axis)
 34 |     max_ = cupy.take_along_axis(array, argmax, axis=axis)
 35 |     return cupy.take(max_, 0, axis=axis)
 36 | 
 37 | 
 38 | def std_float64(array, axis=None, demean=True, keepdims=False):
 39 |     """Compute the standard deviation of X with double precision,
 40 |     and cast back the result to original dtype.
 41 |     """
 42 |     return array.std(axis, dtype=cupy.float64,
 43 |                      keepdims=keepdims).astype(array.dtype, copy=False)
 44 | 
 45 | 
 46 | def mean_float64(array, axis=None, keepdims=False):
 47 |     """Compute the mean of X with double precision,
 48 |     and cast back the result to original dtype.
 49 |     """
 50 |     return array.mean(axis, dtype=cupy.float64,
 51 |                       keepdims=keepdims).astype(array.dtype, copy=False)
 52 | 
 53 | 
 54 | ###############################################################################
 55 | 
 56 | name = "cupy"
 57 | argmax = cupy.argmax
 58 | max = cupy.max
 59 | min = cupy.min
 60 | abs = cupy.abs
 61 | randn = cupy.random.randn
 62 | rand = cupy.random.rand
 63 | matmul = cupy.matmul
 64 | transpose = cupy.transpose
 65 | stack = cupy.stack
 66 | concatenate = cupy.concatenate
 67 | sum = cupy.sum
 68 | sqrt = cupy.sqrt
 69 | any = cupy.any
 70 | all = cupy.all
 71 | nan = cupy.nan
 72 | inf = cupy.inf
 73 | isnan = cupy.isnan
 74 | isinf = cupy.isinf
 75 | logspace = cupy.logspace
 76 | copy = cupy.copy
 77 | bool = cupy.bool_
 78 | float32 = cupy.float32
 79 | float64 = cupy.float64
 80 | int32 = cupy.int32
 81 | eigh = cupy.linalg.eigh
 82 | norm = cupy.linalg.norm
 83 | log = cupy.log
 84 | exp = cupy.exp
 85 | arange = cupy.arange
 86 | flatnonzero = cupy.flatnonzero
 87 | unique = cupy.unique
 88 | einsum = cupy.einsum
 89 | tanh = cupy.tanh
 90 | power = cupy.power
 91 | prod = cupy.prod
 92 | zeros = cupy.zeros
 93 | sign = cupy.sign
 94 | clip = cupy.clip
 95 | sort = cupy.sort
 96 | flip = cupy.flip
 97 | atleast_1d = cupy.atleast_1d
 98 | finfo = cupy.finfo
 99 | eye = cupy.eye
100 | 
101 | 
102 | def diagonal_view(array, axis1=0, axis2=1):
103 |     """Return a view of the array diagonal."""
104 |     return cupy.diagonal(array, 0, axis1=axis1, axis2=axis2)
105 | 
106 | 
107 | def to_numpy(array):
108 |     return cupy.asnumpy(array)
109 | 
110 | 
111 | def isin(x, y):
112 |     import numpy as np  # XXX
113 |     np_result = np.isin(to_numpy(x), to_numpy(y))
114 |     return asarray(np_result, dtype=bool)
115 | 
116 | 
117 | def searchsorted(x, y):
118 |     import numpy as np  # XXX
119 |     np_result = np.searchsorted(to_numpy(x), to_numpy(y))
120 |     return asarray(np_result, dtype=cupy.int64)
121 | 
122 | 
123 | def zeros_like(array, shape=None, dtype=None, device=None):
124 |     """Add a shape parameter in zeros_like."""
125 |     xp = cupy.get_array_module(array)
126 |     if shape is None:
127 |         shape = array.shape
128 |     if dtype is None:
129 |         dtype = array.dtype
130 |     if device == "cpu":
131 |         import numpy as xp
132 |     return xp.zeros(shape, dtype=dtype)
133 | 
134 | 
135 | def ones_like(array, shape=None, dtype=None, device=None):
136 |     """Add a shape parameter in ones_like."""
137 |     xp = cupy.get_array_module(array)
138 |     if shape is None:
139 |         shape = array.shape
140 |     if dtype is None:
141 |         dtype = array.dtype
142 |     if device == "cpu":
143 |         import numpy as xp
144 |     return xp.ones(shape, dtype=dtype)
145 | 
146 | 
147 | def full_like(array, fill_value, shape=None, dtype=None, device=None):
148 |     """Add a shape parameter in full_like."""
149 |     xp = cupy.get_array_module(array)
150 |     if shape is None:
151 |         shape = array.shape
152 |     if dtype is None:
153 |         dtype = array.dtype
154 |     if device == "cpu":
155 |         import numpy as xp
156 |     return xp.full(shape, fill_value, dtype=dtype)
157 | 
158 | 
159 | def to_cpu(array):
160 |     return cupy.asnumpy(array)
161 | 
162 | 
163 | def to_gpu(array, device=None):
164 |     return cupy.asarray(array)
165 | 
166 | 
167 | def is_in_gpu(array):
168 |     return getattr(array, "device", None) is not None
169 | 
170 | 
171 | def asarray(a, dtype=None, order=None, device=None):
172 |     if device == "cpu":
173 |         import numpy as np
174 |         return np.asarray(cupy.asnumpy(a), dtype, order)
175 |     else:
176 |         return cupy.asarray(a, dtype, order)
177 | 
178 | 
179 | def asarray_like(x, ref):
180 |     xp = cupy.get_array_module(ref)
181 |     return xp.asarray(x, dtype=ref.dtype)
182 | 
183 | 
184 | def check_arrays(*all_inputs):
185 |     """Change all inputs into arrays (or list of arrays) using the same
186 |     precision as the first one. Some arrays can be None.
187 |     """
188 |     all_arrays = []
189 |     all_arrays.append(asarray(all_inputs[0]))
190 |     dtype = all_arrays[0].dtype
191 |     warn_if_not_float32(dtype)
192 |     for tensor in all_inputs[1:]:
193 |         if tensor is None:
194 |             pass
195 |         elif isinstance(tensor, list):
196 |             tensor = [asarray(tt, dtype=dtype) for tt in tensor]
197 |         else:
198 |             tensor = asarray(tensor, dtype=dtype)
199 |         all_arrays.append(tensor)
200 |     return all_arrays
201 | 
202 | 
203 | def svd(X, full_matrices=True):
204 |     if X.ndim == 2:
205 |         return cupy.linalg.svd(X, full_matrices=full_matrices)
206 |     elif X.ndim == 3:
207 |         UsV_list = [
208 |             cupy.linalg.svd(Xi, full_matrices=full_matrices) for Xi in X
209 |         ]
210 |         return map(cupy.stack, zip(*UsV_list))
211 |     else:
212 |         raise NotImplementedError()
213 | 


--------------------------------------------------------------------------------
/doc/static/logo.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8" standalone="no"?>
  2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
  3 |   "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
  4 | <!-- Created with matplotlib (https://matplotlib.org/) -->
  5 | <svg height="112.556771pt" version="1.1" viewBox="0 0 111.6 112.556771" width="111.6pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
  6 |  <metadata>
  7 |   <rdf:RDF xmlns:cc="http://creativecommons.org/ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
  8 |    <cc:Work>
  9 |     <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
 10 |     <dc:date>2020-12-09T18:55:20.947118</dc:date>
 11 |     <dc:format>image/svg+xml</dc:format>
 12 |     <dc:creator>
 13 |      <cc:Agent>
 14 |       <dc:title>Matplotlib v3.3.3, https://matplotlib.org/</dc:title>
 15 |      </cc:Agent>
 16 |     </dc:creator>
 17 |    </cc:Work>
 18 |   </rdf:RDF>
 19 |  </metadata>
 20 |  <defs>
 21 |   <style type="text/css">*{stroke-linecap:butt;stroke-linejoin:round;}</style>
 22 |  </defs>
 23 |  <g id="figure_1">
 24 |   <g id="patch_1">
 25 |    <path d="M 0 112.556771 
 26 | L 111.6 112.556771 
 27 | L 111.6 0 
 28 | L 0 0 
 29 | z
 30 | " style="fill:#ffffff;"/>
 31 |   </g>
 32 |   <g id="axes_1">
 33 |    <g id="patch_2">
 34 |     <path clip-path="url(#pd8282e7346)" d="M 106.527273 64.224485 
 35 | L 43.111551 86.589001 
 36 | L 55.451173 20.487117 
 37 | z
 38 | " style="fill:#ffffff;opacity:0.6;"/>
 39 |    </g>
 40 |    <g id="patch_3">
 41 |     <path clip-path="url(#pd8282e7346)" d="M 106.527273 64.224485 
 42 | L 43.111551 86.589001 
 43 | L 55.451173 20.487117 
 44 | z
 45 | " style="fill:none;stroke:#446455;stroke-linejoin:miter;stroke-width:2;"/>
 46 |    </g>
 47 |    <g id="patch_4">
 48 |     <path clip-path="url(#pd8282e7346)" d="M 87.507861 66.126426 
 49 | L 24.092139 88.490942 
 50 | L 36.431761 22.389058 
 51 | z
 52 | " style="fill:#ffffff;opacity:0.6;"/>
 53 |    </g>
 54 |    <g id="patch_5">
 55 |     <path clip-path="url(#pd8282e7346)" d="M 87.507861 66.126426 
 56 | L 24.092139 88.490942 
 57 | L 36.431761 22.389058 
 58 | z
 59 | " style="fill:none;stroke:#446455;stroke-linejoin:miter;stroke-width:2;"/>
 60 |    </g>
 61 |    <g id="patch_6">
 62 |     <path clip-path="url(#pd8282e7346)" d="M 68.488449 68.028367 
 63 | L 5.072727 90.392883 
 64 | L 17.412349 24.290999 
 65 | z
 66 | " style="fill:#ffffff;opacity:0.6;"/>
 67 |    </g>
 68 |    <g id="patch_7">
 69 |     <path clip-path="url(#pd8282e7346)" d="M 68.488449 68.028367 
 70 | L 5.072727 90.392883 
 71 | L 17.412349 24.290999 
 72 | z
 73 | " style="fill:none;stroke:#446455;stroke-linejoin:miter;stroke-width:2;"/>
 74 |    </g>
 75 |    <g id="text_1">
 76 |     <!-- himalaya -->
 77 |     <g style="fill:#446455;" transform="translate(11.451932 108.397396)scale(0.2 -0.2)">
 78 |      <defs>
 79 |       <path d="M 54.890625 33.015625 
 80 | L 54.890625 0 
 81 | L 45.90625 0 
 82 | L 45.90625 32.71875 
 83 | Q 45.90625 40.484375 42.875 44.328125 
 84 | Q 39.84375 48.1875 33.796875 48.1875 
 85 | Q 26.515625 48.1875 22.3125 43.546875 
 86 | Q 18.109375 38.921875 18.109375 30.90625 
 87 | L 18.109375 0 
 88 | L 9.078125 0 
 89 | L 9.078125 75.984375 
 90 | L 18.109375 75.984375 
 91 | L 18.109375 46.1875 
 92 | Q 21.34375 51.125 25.703125 53.5625 
 93 | Q 30.078125 56 35.796875 56 
 94 | Q 45.21875 56 50.046875 50.171875 
 95 | Q 54.890625 44.34375 54.890625 33.015625 
 96 | z
 97 | " id="DejaVuSans-104"/>
 98 |       <path d="M 9.421875 54.6875 
 99 | L 18.40625 54.6875 
100 | L 18.40625 0 
101 | L 9.421875 0 
102 | z
103 | M 9.421875 75.984375 
104 | L 18.40625 75.984375 
105 | L 18.40625 64.59375 
106 | L 9.421875 64.59375 
107 | z
108 | " id="DejaVuSans-105"/>
109 |       <path d="M 52 44.1875 
110 | Q 55.375 50.25 60.0625 53.125 
111 | Q 64.75 56 71.09375 56 
112 | Q 79.640625 56 84.28125 50.015625 
113 | Q 88.921875 44.046875 88.921875 33.015625 
114 | L 88.921875 0 
115 | L 79.890625 0 
116 | L 79.890625 32.71875 
117 | Q 79.890625 40.578125 77.09375 44.375 
118 | Q 74.3125 48.1875 68.609375 48.1875 
119 | Q 61.625 48.1875 57.5625 43.546875 
120 | Q 53.515625 38.921875 53.515625 30.90625 
121 | L 53.515625 0 
122 | L 44.484375 0 
123 | L 44.484375 32.71875 
124 | Q 44.484375 40.625 41.703125 44.40625 
125 | Q 38.921875 48.1875 33.109375 48.1875 
126 | Q 26.21875 48.1875 22.15625 43.53125 
127 | Q 18.109375 38.875 18.109375 30.90625 
128 | L 18.109375 0 
129 | L 9.078125 0 
130 | L 9.078125 54.6875 
131 | L 18.109375 54.6875 
132 | L 18.109375 46.1875 
133 | Q 21.1875 51.21875 25.484375 53.609375 
134 | Q 29.78125 56 35.6875 56 
135 | Q 41.65625 56 45.828125 52.96875 
136 | Q 50 49.953125 52 44.1875 
137 | z
138 | " id="DejaVuSans-109"/>
139 |       <path d="M 34.28125 27.484375 
140 | Q 23.390625 27.484375 19.1875 25 
141 | Q 14.984375 22.515625 14.984375 16.5 
142 | Q 14.984375 11.71875 18.140625 8.90625 
143 | Q 21.296875 6.109375 26.703125 6.109375 
144 | Q 34.1875 6.109375 38.703125 11.40625 
145 | Q 43.21875 16.703125 43.21875 25.484375 
146 | L 43.21875 27.484375 
147 | z
148 | M 52.203125 31.203125 
149 | L 52.203125 0 
150 | L 43.21875 0 
151 | L 43.21875 8.296875 
152 | Q 40.140625 3.328125 35.546875 0.953125 
153 | Q 30.953125 -1.421875 24.3125 -1.421875 
154 | Q 15.921875 -1.421875 10.953125 3.296875 
155 | Q 6 8.015625 6 15.921875 
156 | Q 6 25.140625 12.171875 29.828125 
157 | Q 18.359375 34.515625 30.609375 34.515625 
158 | L 43.21875 34.515625 
159 | L 43.21875 35.40625 
160 | Q 43.21875 41.609375 39.140625 45 
161 | Q 35.0625 48.390625 27.6875 48.390625 
162 | Q 23 48.390625 18.546875 47.265625 
163 | Q 14.109375 46.140625 10.015625 43.890625 
164 | L 10.015625 52.203125 
165 | Q 14.9375 54.109375 19.578125 55.046875 
166 | Q 24.21875 56 28.609375 56 
167 | Q 40.484375 56 46.34375 49.84375 
168 | Q 52.203125 43.703125 52.203125 31.203125 
169 | z
170 | " id="DejaVuSans-97"/>
171 |       <path d="M 9.421875 75.984375 
172 | L 18.40625 75.984375 
173 | L 18.40625 0 
174 | L 9.421875 0 
175 | z
176 | " id="DejaVuSans-108"/>
177 |       <path d="M 32.171875 -5.078125 
178 | Q 28.375 -14.84375 24.75 -17.8125 
179 | Q 21.140625 -20.796875 15.09375 -20.796875 
180 | L 7.90625 -20.796875 
181 | L 7.90625 -13.28125 
182 | L 13.1875 -13.28125 
183 | Q 16.890625 -13.28125 18.9375 -11.515625 
184 | Q 21 -9.765625 23.484375 -3.21875 
185 | L 25.09375 0.875 
186 | L 2.984375 54.6875 
187 | L 12.5 54.6875 
188 | L 29.59375 11.921875 
189 | L 46.6875 54.6875 
190 | L 56.203125 54.6875 
191 | z
192 | " id="DejaVuSans-121"/>
193 |      </defs>
194 |      <use xlink:href="#DejaVuSans-104"/>
195 |      <use x="63.378906" xlink:href="#DejaVuSans-105"/>
196 |      <use x="91.162109" xlink:href="#DejaVuSans-109"/>
197 |      <use x="188.574219" xlink:href="#DejaVuSans-97"/>
198 |      <use x="249.853516" xlink:href="#DejaVuSans-108"/>
199 |      <use x="277.636719" xlink:href="#DejaVuSans-97"/>
200 |      <use x="338.916016" xlink:href="#DejaVuSans-121"/>
201 |      <use x="398.095703" xlink:href="#DejaVuSans-97"/>
202 |     </g>
203 |    </g>
204 |   </g>
205 |  </g>
206 |  <defs>
207 |   <clipPath id="pd8282e7346">
208 |    <rect height="110.88" width="111.6" x="0" y="0"/>
209 |   </clipPath>
210 |  </defs>
211 | </svg>
212 | 


--------------------------------------------------------------------------------
/himalaya/utils.py:
--------------------------------------------------------------------------------
  1 | import numbers
  2 | 
  3 | import numpy as np
  4 | 
  5 | from .backend import get_backend
  6 | from .validation import check_random_state
  7 | 
  8 | 
  9 | def compute_lipschitz_constants(Xs, kernelize="XTX", random_state=None):
 10 |     """Compute Lipschitz constants of gradients of linear regression problems.
 11 | 
 12 |     Find the largest eigenvalue of X^TX for several X, using power iteration.
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     Xs : array of shape (n_kernels, n_samples, n_features) or \
 17 |             (n_kernels, n_samples, n_samples)
 18 |         Multiple linear features or kernels.
 19 |     kernelize : str in {"XTX", "XXT", "X"}
 20 |         Whether to consider X^TX, XX^T, or directly X.
 21 |     random_state : int, or None
 22 |         Random generator seed. Use an int for deterministic search.
 23 | 
 24 |     Returns
 25 |     -------
 26 |     lipschitz : array of shape (n_kernels)
 27 |         Lipschitz constants.
 28 |     """
 29 |     backend = get_backend()
 30 | 
 31 |     if kernelize == "XXT":
 32 |         XTs = backend.transpose(Xs, (0, 2, 1))
 33 |         kernels = backend.matmul(Xs, XTs)
 34 |         del XTs
 35 |     elif kernelize == "XTX":
 36 |         XTs = backend.transpose(Xs, (0, 2, 1))
 37 |         kernels = backend.matmul(XTs, Xs)
 38 |         del XTs
 39 |     elif kernelize == "X":
 40 |         kernels = Xs
 41 |     else:
 42 |         raise ValueError("Unknown parameter kernelize=%r" % (kernelize, ))
 43 | 
 44 |     # check the random state
 45 |     random_generator = check_random_state(random_state)
 46 |     ys = random_generator.randn(*(kernels.shape[:2] + (1, )))
 47 | 
 48 |     ys = backend.asarray_like(ys, Xs)
 49 |     for i in range(10):
 50 |         ys /= backend.norm(ys, axis=1, keepdims=True) + 1e-16
 51 |         ys = backend.matmul(kernels, ys)
 52 |     evs = backend.norm(ys, axis=1)[:, 0]
 53 |     return evs
 54 | 
 55 | 
 56 | def assert_array_almost_equal(x, y, decimal=6, err_msg='', verbose=True):
 57 |     """Test array equality, casting all arrays to numpy."""
 58 |     backend = get_backend()
 59 |     x = backend.to_numpy(x)
 60 |     y = backend.to_numpy(y)
 61 |     return np.testing.assert_array_almost_equal(x, y, decimal=decimal,
 62 |                                                 err_msg=err_msg,
 63 |                                                 verbose=verbose)
 64 | 
 65 | 
 66 | def generate_multikernel_dataset(n_kernels=4, n_targets=500,
 67 |                                  n_samples_train=1000, n_samples_test=400,
 68 |                                  noise=0.1, kernel_weights=None,
 69 |                                  n_features_list=None, random_state=None):
 70 |     """Utility to generate datasets for the gallery of examples.
 71 | 
 72 |     Parameters
 73 |     ----------
 74 |     n_kernels : int
 75 |         Number of kernels.
 76 |     n_targets : int
 77 |         Number of targets.
 78 |     n_samples_train : int
 79 |         Number of samples in the training set.
 80 |     n_samples_test : int
 81 |         Number of sample in the testing set.
 82 |     noise : float > 0
 83 |         Scale of the Gaussian white noise added to the targets.
 84 |     kernel_weights : array of shape (n_targets, n_kernels) or None
 85 |         Kernel weights used in the prediction of the targets.
 86 |         If None, generate random kernel weights from a Dirichlet distribution.
 87 |     n_features_list : list of int of length (n_kernels, ) or None
 88 |         Number of features in each kernel. If None, use 1000 features for each.
 89 |     random_state : int, or None
 90 |         Random generator seed use to generate the true kernel weights.
 91 | 
 92 |     Returns
 93 |     -------
 94 |     X_train : array of shape (n_samples_train, n_features)
 95 |         Training features.
 96 |     X_test : array of shape (n_samples_test, n_features)
 97 |         Testing features.
 98 |     Y_train : array of shape (n_samples_train, n_targets)
 99 |         Training targets.
100 |     Y_test : array of shape (n_samples_test, n_targets)
101 |         Testing targets.
102 |     kernel_weights : array of shape (n_targets, n_kernels)
103 |         Kernel weights in the prediction of the targets.
104 |     n_features_list : list of int of length (n_kernels, )
105 |         Number of features in each kernel.
106 |     """
107 |     from .kernel_ridge import generate_dirichlet_samples
108 |     backend = get_backend()
109 | 
110 |     # Create a few kernel weights if not given.
111 |     if kernel_weights is None:
112 |         kernel_weights = generate_dirichlet_samples(n_targets, n_kernels,
113 |                                                     concentration=[.3],
114 |                                                     random_state=random_state)
115 |         kernel_weights = backend.to_numpy(kernel_weights)
116 | 
117 |     if n_features_list is None:
118 |         n_features_list = np.full(n_kernels, fill_value=1000)
119 | 
120 |     rng = check_random_state(random_state)
121 | 
122 |     # Then, generate a random dataset, using the arbitrary scalings.
123 |     Xs_train, Xs_test = [], []
124 |     Y_train, Y_test = None, None
125 |     for ii in range(n_kernels):
126 |         n_features = n_features_list[ii]
127 | 
128 |         X_train = rng.randn(n_samples_train, n_features)
129 |         X_test = rng.randn(n_samples_test, n_features)
130 |         X_train -= X_train.mean(0)
131 |         X_test -= X_test.mean(0)
132 |         Xs_train.append(X_train)
133 |         Xs_test.append(X_test)
134 | 
135 |         weights = rng.randn(n_features, n_targets) / n_features
136 |         weights *= kernel_weights[:, ii] ** 0.5
137 | 
138 |         if ii == 0:
139 |             Y_train = X_train @ weights
140 |             Y_test = X_test @ weights
141 |         else:
142 |             Y_train += X_train @ weights
143 |             Y_test += X_test @ weights
144 | 
145 |     std = Y_train.std(0)[None]
146 |     Y_train /= std
147 |     Y_test /= std
148 | 
149 |     Y_train += rng.randn(n_samples_train, n_targets) * noise
150 |     Y_test += rng.randn(n_samples_test, n_targets) * noise
151 |     Y_train -= Y_train.mean(0)
152 |     Y_test -= Y_test.mean(0)
153 | 
154 |     # Concatenate the feature spaces.
155 |     X_train = backend.asarray(np.concatenate(Xs_train, 1), dtype="float32")
156 |     X_test = backend.asarray(np.concatenate(Xs_test, 1), dtype="float32")
157 |     Y_train = backend.asarray(Y_train, dtype="float32")
158 |     Y_test = backend.asarray(Y_test, dtype="float32")
159 |     kernel_weights = backend.asarray(kernel_weights, dtype="float32")
160 | 
161 |     return X_train, X_test, Y_train, Y_test, kernel_weights, n_features_list
162 | 
163 | 
164 | def _batch_or_skip(array, batch, axis):
165 |     """Apply a batch on given axis, or skip if the dimension is equal to 1."""
166 |     skip = (array is None or isinstance(array, numbers.Number)
167 |             or array.ndim == 0 or array.shape[axis] == 1)  # noqa
168 |     if skip:
169 |         return array
170 |     else:
171 |         # Not general but works with slices in `batch`.
172 |         if axis == 0:
173 |             return array[batch]
174 |         elif axis == 1:
175 |             return array[:, batch]
176 |         else:
177 |             raise NotImplementedError()
178 | 


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Himalaya documentation build configuration file.
  4 | #
  5 | # This file is execfile()d with the current directory set to its
  6 | # containing dir.
  7 | #
  8 | # Note that not all possible configuration values are present in this
  9 | # autogenerated file.
 10 | #
 11 | # All configuration values have a default; values that are commented out
 12 | # serve to show the default.
 13 | 
 14 | # If extensions (or modules to document with autodoc) are in another directory,
 15 | # add these directories to sys.path here. If the directory is relative to the
 16 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 17 | #
 18 | # import os
 19 | # import sys
 20 | # sys.path.insert(0, os.path.abspath('.'))
 21 | 
 22 | from sphinx_gallery.sorting import FileNameSortKey
 23 | 
 24 | # -- General configuration ------------------------------------------------
 25 | 
 26 | # If your documentation needs a minimal Sphinx version, state it here.
 27 | #
 28 | # needs_sphinx = '1.0'
 29 | 
 30 | # Add any Sphinx extension module names here, as strings. They can be
 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 32 | # ones.
 33 | extensions = [
 34 |     'sphinx.ext.autodoc',
 35 |     'sphinx.ext.autosummary',
 36 |     'numpydoc',
 37 |     'sphinx.ext.intersphinx',
 38 |     'sphinx.ext.mathjax',
 39 |     'sphinx.ext.viewcode',
 40 |     'sphinx.ext.githubpages',
 41 |     'sphinx_gallery.gen_gallery',
 42 |     'sphinxcontrib.mermaid',
 43 | ]
 44 | 
 45 | # generate autosummary even if no references
 46 | autosummary_generate = True
 47 | 
 48 | # Sphinx-gallery
 49 | sphinx_gallery_conf = {
 50 |     # path to your examples scripts
 51 |     'examples_dirs': '../examples',
 52 |     # path where to save gallery generated examples
 53 |     'gallery_dirs': '_auto_examples',
 54 |     # which files to execute? only those with "plot_"
 55 |     'filename_pattern': 'plot_',
 56 |     # 'ignore_pattern': 'download',
 57 |     'within_subsection_order': FileNameSortKey,
 58 |     'remove_config_comments': 'True',
 59 |     'plot_gallery': 'True',
 60 |     'thumbnail_size': (480, 250),
 61 |     'download_all_examples': False,
 62 | }
 63 | 
 64 | # Add any paths that contain templates here, relative to this directory.
 65 | templates_path = ['static']
 66 | 
 67 | # The suffix(es) of source filenames.
 68 | # You can specify multiple suffix as a list of string:
 69 | #
 70 | # source_suffix = ['.rst', '.md']
 71 | source_suffix = '.rst'
 72 | 
 73 | # The main toctree document.
 74 | main_doc = 'index'
 75 | 
 76 | # General information about the project.
 77 | project = u'Himalaya'
 78 | copyright = u'2023, Gallant lab'
 79 | author = u'Tom Dupre la Tour'
 80 | 
 81 | # The version info for the project you're documenting, acts as replacement for
 82 | # |version| and |release|, also used in various other places throughout the
 83 | # built documents.
 84 | #
 85 | # The short X.Y version.
 86 | import himalaya
 87 | version = himalaya.__version__
 88 | # The full version, including alpha/beta/rc tags.
 89 | release = himalaya.__version__
 90 | 
 91 | # The language for content autogenerated by Sphinx. Refer to documentation
 92 | # for a list of supported languages.
 93 | #
 94 | # This is also used if you do content translation via gettext catalogs.
 95 | # Usually you set "language" from the command line for these cases.
 96 | language = "en"
 97 | 
 98 | # List of patterns, relative to source directory, that match files and
 99 | # directories to ignore when looking for source files.
100 | # This patterns also effect to html_static_path and html_extra_path
101 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
102 | 
103 | # The name of the Pygments (syntax highlighting) style to use.
104 | pygments_style = 'sphinx'
105 | 
106 | # If true, `todo` and `todoList` produce output, else they produce nothing.
107 | todo_include_todos = False
108 | 
109 | # -- Options for HTML output ----------------------------------------------
110 | 
111 | # The theme to use for HTML and HTML Help pages.  See the documentation for
112 | # a list of builtin themes.
113 | #
114 | html_theme = 'alabaster'
115 | 
116 | # Theme options are theme-specific and customize the look and feel of a theme
117 | # further.  For a list of options available for each theme, see the
118 | # documentation.
119 | html_theme_options = {
120 |     'github_user': 'gallantlab',
121 |     'github_repo': 'himalaya',
122 |     'github_type': 'star',
123 |     'page_width': '1200px',
124 |     'sidebar_width': '235px',
125 |     'logo': 'logo.svg',
126 |     'logo_name': 'himalaya',
127 |     # 'description': 'Himalaya',
128 |     'fixed_sidebar': 'True',
129 | }
130 | 
131 | # Add any paths that contain custom static files (such as style sheets) here,
132 | # relative to this directory. They are copied after the builtin static files,
133 | # so a file named "default.css" will overwrite the builtin "default.css".
134 | html_static_path = ['static']
135 | 
136 | # Custom sidebar templates, must be a dictionary that maps document names
137 | # to template names.
138 | #
139 | # This is required for the alabaster theme
140 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
141 | html_sidebars = {
142 |     '**': [
143 |         'about.html',
144 |         'relations.html',  # needs 'show_related': True theme option to display
145 |         'navigation.html',
146 |         'searchbox.html',
147 |     ]
148 | }
149 | 
150 | # -- Options for HTMLHelp output ------------------------------------------
151 | 
152 | # Output file base name for HTML help builder.
153 | htmlhelp_basename = 'Himalayadoc'
154 | 
155 | # -- Options for LaTeX output ---------------------------------------------
156 | 
157 | latex_elements = {
158 |     # The paper size ('letterpaper' or 'a4paper').
159 |     #
160 |     # 'papersize': 'letterpaper',
161 | 
162 |     # The font size ('10pt', '11pt' or '12pt').
163 |     #
164 |     # 'pointsize': '10pt',
165 | 
166 |     # Additional stuff for the LaTeX preamble.
167 |     #
168 |     # 'preamble': '',
169 | 
170 |     # Latex figure (float) alignment
171 |     #
172 |     # 'figure_align': 'htbp',
173 | }
174 | 
175 | # Grouping the document tree into LaTeX files. List of tuples
176 | # (source start file, target name, title,
177 | #  author, documentclass [howto, manual, or own class]).
178 | latex_documents = [
179 |     (main_doc, 'Himalaya.tex', u'Himalaya Documentation', u'Gallant lab',
180 |      'manual'),
181 | ]
182 | 
183 | # -- Options for manual page output ---------------------------------------
184 | 
185 | # One entry per manual page. List of tuples
186 | # (source start file, name, description, authors, manual section).
187 | man_pages = [(main_doc, 'himalaya', u'Himalaya Documentation', [author], 1)]
188 | 
189 | # -- Options for Texinfo output -------------------------------------------
190 | 
191 | # Grouping the document tree into Texinfo files. List of tuples
192 | # (source start file, target name, title, author,
193 | #  dir menu entry, description, category)
194 | texinfo_documents = [
195 |     (main_doc, 'Himalaya', u'Himalaya Documentation', author, 'Himalaya',
196 |      'One line description of project.', 'Miscellaneous'),
197 | ]
198 | 
199 | # Example configuration for intersphinx: refer to the Python standard library.
200 | intersphinx_mapping = {'https://docs.python.org/': None}
201 | 


--------------------------------------------------------------------------------
/himalaya/lasso/_sklearn_api.py:
--------------------------------------------------------------------------------
  1 | from sklearn.base import BaseEstimator, RegressorMixin, MultiOutputMixin
  2 | from sklearn.utils.validation import check_is_fitted
  3 | 
  4 | from ._group_lasso import solve_sparse_group_lasso_cv
  5 | 
  6 | from ..validation import check_array
  7 | from ..validation import check_cv
  8 | from ..validation import validate_data
  9 | from ..validation import _get_string_dtype
 10 | from ..backend import get_backend
 11 | from ..backend import force_cpu_backend
 12 | from ..scoring import r2_score
 13 | 
 14 | 
 15 | class SparseGroupLassoCV(MultiOutputMixin, RegressorMixin, BaseEstimator):
 16 |     """Sparse group Lasso
 17 | 
 18 |     Solved with hyperparameter grid-search over cross-validation.
 19 | 
 20 |     Parameters
 21 |     ----------
 22 |     groups : array of shape (n_features, ) or None
 23 |         Encoding of the group of each feature. If None, all features are
 24 |         gathered in one group, and the problem is equivalent to the Lasso.
 25 | 
 26 |     l21_regs : array of shape (n_l21_regs, )
 27 |         All the Group Lasso regularization parameter tested.
 28 | 
 29 |     l1_regs : array of shape (n_l1_regs, )
 30 |         All the Lasso regularization parameter tested.
 31 | 
 32 |     solver : str
 33 |         Algorithm used during the fit, "proximal_gradient" only for now.
 34 | 
 35 |     solver_params : dict or None
 36 |         Additional parameters for the solver.
 37 |         See more details in the docstring of the function:
 38 |         ``SparseGroupLassoCV.ALL_SOLVERS[solver]``
 39 | 
 40 |     cv : int or scikit-learn splitter
 41 |         Cross-validation splitter. If an int, KFold is used.
 42 | 
 43 |     force_cpu : bool
 44 |         If True, computations will be performed on CPU, ignoring the
 45 |         current backend. If False, use the current backend.
 46 | 
 47 |     Attributes
 48 |     ----------
 49 |     coef_ : array of shape (n_samples) or (n_samples, n_targets)
 50 |         Coefficient of the linear model. Always on CPU.
 51 | 
 52 |     best_l21_reg_ : array of shape (n_targets, )
 53 |         Best hyperparameter per target.
 54 | 
 55 |     best_l1_reg_ : array of shape (n_targets, )
 56 |         Best hyperparameter per target.
 57 | 
 58 |     cv_scores_ : array of shape (n_l21_regs * n_l1_regs, n_targets)
 59 |         Cross-validation scores of all tested hyperparameters.
 60 |         The scores are computed with r2_score.
 61 | 
 62 |     n_features_in_ : int
 63 |         Number of features used during the fit.
 64 | 
 65 |     Examples
 66 |     --------
 67 |     >>> from himalaya.lasso import SparseGroupLassoCV
 68 |     >>> import numpy as np
 69 |     >>> n_samples, n_features, n_targets = 10, 5, 3
 70 |     >>> X = np.random.randn(n_samples, n_features)
 71 |     >>> Y = np.random.randn(n_samples, n_targets)
 72 |     >>> clf = SparseGroupLassoCV()
 73 |     >>> clf.fit(X, Y)
 74 |     SparseGroupLassoCV()
 75 |     """
 76 |     ALL_SOLVERS = dict(proximal_gradient=solve_sparse_group_lasso_cv)
 77 | 
 78 |     def __init__(self, groups=None, l1_regs=[0], l21_regs=[0],
 79 |                  solver="proximal_gradient", solver_params=None, cv=5,
 80 |                  force_cpu=False):
 81 |         self.groups = groups
 82 |         self.l1_regs = l1_regs
 83 |         self.l21_regs = l21_regs
 84 |         self.solver = solver
 85 |         self.solver_params = solver_params
 86 |         self.cv = cv
 87 |         self.force_cpu = force_cpu
 88 | 
 89 |     @force_cpu_backend
 90 |     def fit(self, X, y):
 91 |         """Fit the model
 92 | 
 93 |         Parameters
 94 |         ----------
 95 |         X : array of shape (n_samples, n_features).
 96 |             Training data.
 97 | 
 98 |         y : array of shape (n_samples,) or (n_samples, n_targets)
 99 |             Target values.
100 | 
101 |         Returns
102 |         -------
103 |         self : returns an instance of self.
104 |         """
105 |         X, y = validate_data(self, X, y, reset=True, accept_sparse=False, ndim=2)
106 |         self.dtype_ = _get_string_dtype(X)
107 |         y = check_array(y, dtype=self.dtype_, ndim=[1, 2])
108 |         if X.shape[0] != y.shape[0]:
109 |             raise ValueError("Inconsistent number of samples.")
110 |         cv = check_cv(self.cv, y)
111 |         ravel = False
112 |         if y.ndim == 1:
113 |             y = y[:, None]
114 |             ravel = True
115 | 
116 |         results = self._call_solver(X=X, Y=y, groups=self.groups, cv=cv,
117 |                                     l21_regs=self.l21_regs,
118 |                                     l1_regs=self.l1_regs)
119 |         self.coef_, self.best_l21_reg_, self.best_l1_reg_ = results[:3]
120 |         self.cv_scores_ = results[3]
121 | 
122 |         if ravel:
123 |             self.coef_ = self.coef_[:, 0]
124 | 
125 |         return self
126 | 
127 |     def _call_solver(self, **direct_params):
128 |         if self.solver not in self.ALL_SOLVERS:
129 |             raise ValueError("Unknown solver=%r." % self.solver)
130 | 
131 |         function = self.ALL_SOLVERS[self.solver]
132 |         solver_params = self.solver_params or {}
133 | 
134 |         # check duplicated parameters
135 |         intersection = set(direct_params.keys()).intersection(
136 |             set(solver_params.keys()))
137 |         if intersection:
138 |             raise ValueError(
139 |                 'Parameters %s should not be given in solver_params, since '
140 |                 'they are either fixed or have a direct parameter in %s.' %
141 |                 (intersection, self.__class__.__name__))
142 | 
143 |         return function(**direct_params, **solver_params)
144 | 
145 |     @force_cpu_backend
146 |     def predict(self, X):
147 |         """Predict using the model.
148 | 
149 |         Parameters
150 |         ----------
151 |         X : array of shape (n_samples_test, n_features)
152 |             Samples.
153 | 
154 |         Returns
155 |         -------
156 |         Y_hat : array of shape (n_samples,) or (n_samples, n_targets)
157 |             Returns predicted values.
158 |         """
159 |         backend = get_backend()
160 |         check_is_fitted(self)
161 |         X = validate_data(self, X, reset=False, dtype=self.dtype_, accept_sparse=False, ndim=2)
162 |         Y_hat = backend.to_numpy(X) @ backend.to_numpy(self.coef_)
163 |         return backend.asarray_like(Y_hat, ref=X)
164 | 
165 |     @force_cpu_backend
166 |     def score(self, X, y):
167 |         """Return the coefficient of determination R^2 of the prediction.
168 | 
169 |         Parameters
170 |         ----------
171 |         X : array of shape (n_samples_test, n_features)
172 |             Samples.
173 | 
174 |         y : array-like of shape (n_samples,) or (n_samples, n_targets)
175 |             True values for X.
176 | 
177 |         Returns
178 |         -------
179 |         score : array of shape (n_targets, )
180 |             R^2 of self.predict(X) versus y.
181 |         """
182 |         y_pred = self.predict(X)
183 |         y_true = check_array(y, dtype=self.dtype_, ndim=self.coef_.ndim)
184 | 
185 |         if y_true.ndim == 1:
186 |             return r2_score(y_true[:, None], y_pred[:, None])[0]
187 |         else:
188 |             return r2_score(y_true, y_pred)
189 | 
190 |     def _more_tags(self):
191 |         return {'requires_y': True}
192 | 
193 |     def __sklearn_tags__(self):
194 |         tags = super().__sklearn_tags__()
195 |         tags.target_tags.required = True
196 |         return tags
197 | 


--------------------------------------------------------------------------------
/examples/multiple_kernel_ridge/plot_mkr_1_sklearn_api.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Multiple-kernel ridge with scikit-learn API
  3 | ===========================================
  4 | This example demonstrates how to solve multiple kernel ridge regression, using
  5 | scikit-learn API.
  6 | """
  7 | 
  8 | import numpy as np
  9 | import matplotlib.pyplot as plt
 10 | 
 11 | from himalaya.backend import set_backend
 12 | from himalaya.kernel_ridge import KernelRidgeCV
 13 | from himalaya.kernel_ridge import MultipleKernelRidgeCV
 14 | from himalaya.kernel_ridge import Kernelizer
 15 | from himalaya.kernel_ridge import ColumnKernelizer
 16 | from himalaya.utils import generate_multikernel_dataset
 17 | 
 18 | from sklearn.pipeline import make_pipeline
 19 | from sklearn import set_config
 20 | set_config(display='diagram')
 21 | 
 22 | # sphinx_gallery_thumbnail_number = 2
 23 | ###############################################################################
 24 | # In this example, we use the ``torch_cuda`` backend.
 25 | #
 26 | # Torch can perform computations both on CPU and GPU. To use CPU, use the
 27 | # "torch" backend, to use GPU, use the "torch_cuda" backend.
 28 | 
 29 | backend = set_backend("torch_cuda", on_error="warn")
 30 | 
 31 | ###############################################################################
 32 | # Generate a random dataset
 33 | # -------------------------
 34 | # - X_train : array of shape (n_samples_train, n_features)
 35 | # - X_test : array of shape (n_samples_test, n_features)
 36 | # - Y_train : array of shape (n_samples_train, n_targets)
 37 | # - Y_test : array of shape (n_samples_test, n_targets)
 38 | 
 39 | (X_train, X_test, Y_train, Y_test, kernel_weights,
 40 |  n_features_list) = generate_multikernel_dataset(n_kernels=3, n_targets=50,
 41 |                                                  n_samples_train=600,
 42 |                                                  n_samples_test=300,
 43 |                                                  random_state=42)
 44 | 
 45 | feature_names = [f"Feature space {ii}" for ii in range(len(n_features_list))]
 46 | 
 47 | ###############################################################################
 48 | # We could precompute the kernels by hand on ``Xs_train``, as done in
 49 | # ``plot_mkr_random_search.py``. Instead, here we use the ``ColumnKernelizer``
 50 | # to make a ``scikit-learn`` ``Pipeline``.
 51 | 
 52 | # Find the start and end of each feature space X in Xs
 53 | start_and_end = np.concatenate([[0], np.cumsum(n_features_list)])
 54 | slices = [
 55 |     slice(start, end)
 56 |     for start, end in zip(start_and_end[:-1], start_and_end[1:])
 57 | ]
 58 | 
 59 | ###############################################################################
 60 | # Create a different ``Kernelizer`` for each feature space. Here we use a
 61 | # linear kernel for all feature spaces, but ``ColumnKernelizer`` accepts any
 62 | # ``Kernelizer``, or ``scikit-learn`` ``Pipeline`` ending with a
 63 | # ``Kernelizer``.
 64 | kernelizers = [(name, Kernelizer(), slice_)
 65 |                for name, slice_ in zip(feature_names, slices)]
 66 | column_kernelizer = ColumnKernelizer(kernelizers)
 67 | 
 68 | # Note that ``ColumnKernelizer`` has a parameter ``n_jobs`` to parallelize each
 69 | # kernelizer, yet such parallelism does not work with GPU arrays.
 70 | 
 71 | ###############################################################################
 72 | # Define the model
 73 | # ----------------
 74 | #
 75 | # The class takes a number of common parameters during initialization, such as
 76 | # `kernels` or `solver`. Since the solver parameters might be different
 77 | # depending on the solver, they can be passed in the `solver_params` parameter.
 78 | 
 79 | ###############################################################################
 80 | # Here we use the "random_search" solver.
 81 | # We can check its specific parameters in the function docstring:
 82 | solver_function = MultipleKernelRidgeCV.ALL_SOLVERS["random_search"]
 83 | print("Docstring of the function %s:" % solver_function.__name__)
 84 | print(solver_function.__doc__)
 85 | 
 86 | ###############################################################################
 87 | # We use 100 iterations to have a reasonably fast example (~40 sec).
 88 | # To have a better convergence, we probably need more iterations.
 89 | # Note that there is currently no stopping criterion in this method.
 90 | n_iter = 100
 91 | 
 92 | ###############################################################################
 93 | # Grid of regularization parameters.
 94 | alphas = np.logspace(-10, 10, 41)
 95 | 
 96 | ###############################################################################
 97 | # Batch parameters are used to reduce the necessary GPU memory. A larger value
 98 | # will be a bit faster, but the solver might crash if it runs out of memory.
 99 | # Optimal values depend on the size of your dataset.
100 | n_targets_batch = 1000
101 | n_alphas_batch = 20
102 | n_targets_batch_refit = 200
103 | 
104 | solver_params = dict(n_iter=n_iter, alphas=alphas,
105 |                      n_targets_batch=n_targets_batch,
106 |                      n_alphas_batch=n_alphas_batch,
107 |                      n_targets_batch_refit=n_targets_batch_refit,
108 |                      jitter_alphas=True)
109 | 
110 | model = MultipleKernelRidgeCV(kernels="precomputed", solver="random_search",
111 |                               solver_params=solver_params)
112 | 
113 | ###############################################################################
114 | # Define and fit the pipeline
115 | pipe = make_pipeline(column_kernelizer, model)
116 | pipe.fit(X_train, Y_train)
117 | 
118 | ###############################################################################
119 | # Plot the convergence curve
120 | # --------------------------
121 | 
122 | # ``cv_scores`` gives the scores for each sampled kernel weights.
123 | # The convergence curve is thus the current maximum for each target.
124 | cv_scores = backend.to_numpy(pipe[1].cv_scores_)
125 | current_max = np.maximum.accumulate(cv_scores, axis=0)
126 | mean_current_max = np.mean(current_max, axis=1)
127 | 
128 | x_array = np.arange(1, len(mean_current_max) + 1)
129 | plt.plot(x_array, mean_current_max, '-o')
130 | plt.grid("on")
131 | plt.xlabel("Number of kernel weights sampled")
132 | plt.ylabel("L2 negative loss (higher is better)")
133 | plt.title("Convergence curve, averaged over targets")
134 | plt.tight_layout()
135 | plt.show()
136 | 
137 | ###############################################################################
138 | # Compare to ``KernelRidgeCV``
139 | # ----------------------------
140 | # Compare to a baseline ``KernelRidgeCV`` model with all the concatenated
141 | # features. Comparison is performed using the prediction scores on the test
142 | # set.
143 | 
144 | ###############################################################################
145 | # Fit the baseline model ``KernelRidgeCV``
146 | baseline = KernelRidgeCV(kernel="linear", alphas=alphas)
147 | baseline.fit(X_train, Y_train)
148 | 
149 | ###############################################################################
150 | # Compute scores of both models
151 | scores = pipe.score(X_test, Y_test)
152 | scores = backend.to_numpy(scores)
153 | 
154 | scores_baseline = baseline.score(X_test, Y_test)
155 | scores_baseline = backend.to_numpy(scores_baseline)
156 | 
157 | ###############################################################################
158 | # Plot histograms
159 | bins = np.linspace(0, max(scores_baseline.max(), scores.max()), 50)
160 | plt.hist(scores_baseline, bins, alpha=0.7, label="KernelRidgeCV")
161 | plt.hist(scores, bins, alpha=0.7, label="MultipleKernelRidgeCV")
162 | plt.xlabel(r"$R^2$ generalization score")
163 | plt.title("Histogram over targets")
164 | plt.legend()
165 | plt.show()
166 | 


--------------------------------------------------------------------------------
/doc/models.rst:
--------------------------------------------------------------------------------
  1 | Model descriptions
  2 | ==================
  3 | 
  4 | This package implements a number of models.
  5 | 
  6 | Ridge
  7 | -----
  8 | 
  9 | Let :math:`X\in \mathbb{R}^{n\times p}` be a feature matrix with :math:`n`
 10 | samples and :math:`p` features,  :math:`y\in \mathbb{R}^n` a target vector, and
 11 | :math:`\alpha > 0` a fixed regularization hyperparameter. Ridge regression
 12 | [1]_ defines the weight vector :math:`b^*\in \mathbb{R}^p` as:
 13 | 
 14 | .. math::
 15 |     b^* = \arg\min_b \|Xb - y\|_2^2 + \alpha \|b\|_2^2.
 16 | 
 17 | The equation has a  closed-form solution :math:`b^* = M y`, where :math:`M =
 18 | (X^\top X + \alpha I_p)^{-1}X^\top \in  \mathbb{R}^{p \times n}`.
 19 | 
 20 | .. admonition:: This model is implemented in
 21 | 
 22 |   - :class:`~himalaya.ridge.Ridge` (scikit-learn-compatible estimator)
 23 |   - :func:`~himalaya.ridge.solve_ridge_svd` (function)
 24 | 
 25 | KernelRidge
 26 | -----------
 27 | 
 28 | By the Woodbury matrix identity, :math:`b^*` can be written as :math:`b^* =
 29 | X^\top(XX^\top + \alpha I_n)^{-1}y`, or :math:`b^* = X^\top w^*` for some
 30 | :math:`w^*\in \mathbb{R}^n`. Noting the linear kernel :math:`K = X X^\top \in
 31 | \mathbb{R}^{n\times n}`, this leads to the *equivalent* formulation:
 32 | 
 33 | .. math::
 34 |     w^* = \arg\min_w \|Kw - y\|_2^2 + \alpha w^\top Kw.
 35 | 
 36 | This model can be extended to arbitrary positive semidefinite kernels
 37 | :math:`K`, leading to the more general kernel ridge regression [2]_.
 38 | 
 39 | .. admonition:: This model is implemented in
 40 | 
 41 |   - :class:`~himalaya.kernel_ridge.KernelRidge` (scikit-learn-compatible estimator)
 42 |   - :func:`~himalaya.kernel_ridge.solve_kernel_ridge_eigenvalues` (function)
 43 |   - :func:`~himalaya.kernel_ridge.solve_kernel_ridge_gradient_descent` (function)
 44 |   - :func:`~himalaya.kernel_ridge.solve_kernel_ridge_conjugate_gradient` (function)
 45 | 
 46 | 
 47 | RidgeCV and KernelRidgeCV
 48 | -------------------------
 49 | 
 50 | In practice, because the ridge regression and kernel ridge regression
 51 | hyperparameter :math:`\alpha` is unknown, it is typically selected through a
 52 | grid-search with cross-validation. In cross-validation, we split the data set
 53 | into a training set :math:`(X_{train}, y_{train})` and a validation set
 54 | :math:`(X_{val}, y_{val})`. Then, we train the model on the training set, and
 55 | evaluate the generalization performance on the validation set. We perform this
 56 | process for multiple hyperparameter candidates :math:`\alpha`, typically
 57 | defined over a grid of log-spaced values. Finally, we keep the candidate
 58 | leading to the best generalization performance, as measured by the validation
 59 | loss, averaged over all cross-validation splits.
 60 | 
 61 | .. admonition:: These models are implemented in
 62 | 
 63 |   - :class:`~himalaya.ridge.RidgeCV` (scikit-learn-compatible estimator)
 64 |   - :func:`~himalaya.ridge.solve_ridge_cv_svd` (function)
 65 |   - :class:`~himalaya.kernel_ridge.KernelRidgeCV` (scikit-learn-compatible estimator)
 66 |   - :func:`~himalaya.kernel_ridge.solve_kernel_ridge_cv_eigenvalues` (function)
 67 | 
 68 | 
 69 | GroupRidgeCV / BandedRidgeCV
 70 | ----------------------------
 71 | 
 72 | In some applications, features are naturally grouped into groups (or feature
 73 | spaces). To adapt the regularization level to each feature space, ridge
 74 | regression can be extended to group-regularized ridge regression (also known
 75 | as banded ridge regression [3]_). In this model, a separate hyperparameter is
 76 | optimized for each feature space:
 77 | 
 78 | .. math::
 79 |     b^* = \arg\min_b \|\sum_{i=1}^m X_i b_i - y\|_2^2 + \sum_{i=1}^m \alpha_i \|b_i\|_2^2.
 80 | 
 81 | This is equivalent to solving a ridge regression:
 82 | 
 83 | .. math::
 84 |     b^* = \arg\min_b \|Z b - Y\|_2^2 + \|b\|_2^2
 85 | 
 86 | where the feature space :math:`X_i` is scaled by a group scaling :math:`Z_i =
 87 | e^{\delta_i} X_i`. The hyperparameters :math:`\delta_i = - \log(\alpha_i)` are
 88 | then learned over cross-validation [4]_. 
 89 | 
 90 | .. admonition:: This model is implemented in
 91 | 
 92 |   - :class:`~himalaya.ridge.GroupRidgeCV` (scikit-learn-compatible estimator)
 93 |   - :func:`~himalaya.ridge.solve_group_ridge_random_search` (function)
 94 | 
 95 |   See also multiple-kernel ridge regression, which is equivalent to
 96 |   group-regularization ridge regression when using one linear kernel per group
 97 |   of features:
 98 | 
 99 |   - :class:`~himalaya.kernel_ridge.MultipleKernelRidgeCV` (scikit-learn-compatible estimator)
100 |   - :func:`~himalaya.kernel_ridge.solve_multiple_kernel_ridge_random_search` (function)
101 |   - :func:`~himalaya.kernel_ridge.solve_multiple_kernel_ridge_hyper_gradient` (function)
102 | 
103 | .. note:: "Group ridge regression" is also sometimes called "Banded ridge regression".
104 | 
105 | WeightedKernelRidge
106 | -------------------
107 | 
108 | To extend kernel ridge to group-regularization, we can compute the kernel as a
109 | weighted sum of multiple kernels, :math:`K = \sum_{i=1}^m e^{\delta_i} K_i`.
110 | Then, we can use :math:`K_i = X_i X_i^\top` for different groups of features
111 | :math:`X_i`. The model becomes:
112 | 
113 | .. math::
114 |     w^* = \arg\min_w \left\|\sum_{i=1}^m e^{\delta_i} K_{i} w - y\right\|_2^2
115 |     + \alpha \sum_{i=1}^m e^{\delta_i} w^\top K_{i} w.
116 | 
117 | This model is called weighted kernel ridge regression. The log-kernel-weights
118 | :math:`\delta_i` are here fixed. When all the targets use the same
119 | log-kernel-weights, a single weighted kernel can be precomputed and used in a
120 | kernel ridge regression. However, when the log-kernel-weights are different for
121 | each target, the kernel sum cannot be precomputed, and the model requires some
122 | specific algorithms to be fit.
123 | 
124 | .. admonition:: This model is implemented in
125 | 
126 |   - :class:`~himalaya.kernel_ridge.WeightedKernelRidge` (scikit-learn-compatible estimator)
127 |   - :func:`~himalaya.kernel_ridge.solve_weighted_kernel_ridge_gradient_descent` (function)
128 |   - :func:`~himalaya.kernel_ridge.solve_weighted_kernel_ridge_conjugate_gradient` (function)
129 |   - :func:`~himalaya.kernel_ridge.solve_weighted_kernel_ridge_neumann_series` (function)
130 | 
131 | 
132 | MultipleKernelRidgeCV
133 | ---------------------
134 | 
135 | In weighted kernel ridge regression, when the log-kernel-weights
136 | :math:`\delta_i` are unknown, we can learn them over cross-validation. This
137 | model is called multiple-kernel ridge regression. When the kernels are defined
138 | by :math:`K_i = X_i X_i^\top` for different groups of features :math:`X_i`,
139 | multiple-kernel ridge regression is equivalent with group-ridge regression
140 | (aka banded ridge regression).
141 | 
142 | .. admonition:: This model is implemented in
143 | 
144 |   - :class:`~himalaya.kernel_ridge.MultipleKernelRidgeCV` (scikit-learn-compatible estimator)
145 |   - :func:`~himalaya.kernel_ridge.solve_multiple_kernel_ridge_hyper_gradient` (function)
146 |   - :func:`~himalaya.kernel_ridge.solve_multiple_kernel_ridge_random_search` (function)
147 | 
148 | 
149 | .. include:: flowchart.rst
150 | 
151 | References
152 | ~~~~~~~~~~
153 | 
154 | .. [1] Hoerl, A. E., & Kennard, R. W. (1970). Ridge regression: Biased
155 |   estimation for nonorthogonal problems. Technometrics, 12(1), 55-67.
156 | 
157 | .. [2] Saunders, C., Gammerman, A., & Vovk, V. (1998). Ridge regression
158 |   learning algorithm in dual variables.
159 | 
160 | .. [3] Nunez-Elizalde, A. O., Huth, A. G., & Gallant, J. L. (2019). Voxelwise
161 |   encoding models with non-spherical multivariate normal priors. Neuroimage,
162 |   197, 482-492.
163 | 
164 | .. [4] Dupré La Tour, T., Eickenberg, M., Nunez-Elizalde, A.O., & Gallant, J. L. (2022).
165 |    Feature-space selection with banded ridge regression. NeuroImage.
166 | 


--------------------------------------------------------------------------------
/himalaya/backend/tests/test_backends.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from himalaya.backend import set_backend
  4 | from himalaya.backend import ALL_BACKENDS
  5 | from himalaya.backend._utils import _dtype_to_str
  6 | from himalaya.utils import assert_array_almost_equal
  7 | 
  8 | 
  9 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 10 | def test_apply_argmax(backend):
 11 |     backend = set_backend(backend)
 12 |     for array in [
 13 |             backend.randn(1),
 14 |             backend.randn(10),
 15 |             backend.randn(10, 1),
 16 |             backend.randn(10, 4),
 17 |             backend.randn(10, 1, 8),
 18 |             backend.randn(10, 4, 8),
 19 |     ]:
 20 |         for axis in range(array.ndim):
 21 |             argmax = backend.argmax(array, axis=axis)
 22 |             assert_array_almost_equal(
 23 |                 backend.max(array, axis=axis),
 24 |                 backend.apply_argmax(array, argmax, axis=axis),
 25 |             )
 26 | 
 27 | 
 28 | @pytest.mark.parametrize('dtype_str', ["float32", "float64"])
 29 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 30 | def test_std_float64(backend, dtype_str):
 31 |     backend = set_backend(backend)
 32 |     for array in [
 33 |             backend.randn(1),
 34 |             backend.randn(10),
 35 |             backend.randn(10, 1),
 36 |             backend.randn(10, 4),
 37 |             backend.randn(10, 1, 8),
 38 |             backend.randn(10, 4, 8),
 39 |     ]:
 40 |         array = backend.asarray(array, dtype=dtype_str)
 41 |         array_64 = backend.asarray(array, dtype="float64")
 42 |         for axis in range(array.ndim):
 43 |             result = backend.std_float64(array, axis=axis)
 44 |             reference = backend.to_numpy(array_64).std(axis=axis,
 45 |                                                        dtype="float64")
 46 |             reference = backend.asarray(reference, dtype=dtype_str)
 47 |             assert_array_almost_equal(result, reference)
 48 | 
 49 | 
 50 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 51 | def test_diagonal_view(backend):
 52 |     backend = set_backend(backend)
 53 |     try:
 54 |         import torch
 55 |     except ImportError as error:
 56 |         pytest.skip("PyTorch not installed.")
 57 |         raise ImportError("PyTorch not installed.") from error
 58 |     import numpy as np
 59 | 
 60 |     for array in [
 61 |             backend.randn(10, 4),
 62 |             backend.randn(10, 4).T,
 63 |             backend.randn(10, 4, 8),
 64 |             backend.randn(10, 4, 8).T,
 65 |             backend.randn(3, 4, 8, 5),
 66 |     ]:
 67 |         for axis1 in range(array.ndim):
 68 |             for axis2 in range(array.ndim):
 69 |                 if axis1 != axis2:
 70 |                     result = backend.diagonal_view(array, axis1=axis1,
 71 |                                                    axis2=axis2)
 72 |                     # compare with torch diagonal
 73 |                     reference = torch.diagonal(
 74 |                         torch.from_numpy(backend.to_numpy(array)), dim1=axis1,
 75 |                         dim2=axis2)
 76 |                     assert_array_almost_equal(result, reference)
 77 |                     # compare with numpy diagonal
 78 |                     reference = np.diagonal(backend.to_numpy(array),
 79 |                                             axis1=axis1, axis2=axis2)
 80 |                     assert_array_almost_equal(result, reference)
 81 |                     # test that this is a modifiable view
 82 |                     result += 1
 83 |                     reference = np.diagonal(backend.to_numpy(array),
 84 |                                             axis1=axis1, axis2=axis2)
 85 |                     assert_array_almost_equal(result, reference)
 86 | 
 87 | 
 88 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 89 | def test_eigh(backend):
 90 |     import scipy.linalg
 91 |     backend = set_backend(backend)
 92 | 
 93 |     array = backend.randn(10, 20)
 94 |     array = backend.asarray(array, dtype='float64')
 95 |     kernel = array @ array.T
 96 | 
 97 |     values, vectors = backend.eigh(kernel)
 98 |     values_ref, vectors_ref = scipy.linalg.eigh(backend.to_numpy(kernel))
 99 | 
100 |     assert_array_almost_equal(values, values_ref)
101 | 
102 |     # vectors can be flipped in sign
103 |     assert vectors.shape == vectors_ref.shape
104 |     for ii in range(vectors.shape[1]):
105 |         try:
106 |             assert_array_almost_equal(vectors[:, ii], vectors_ref[:, ii])
107 |         except AssertionError:
108 |             assert_array_almost_equal(vectors[:, ii], -vectors_ref[:, ii])
109 | 
110 | 
111 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
112 | @pytest.mark.parametrize('full_matrices', [True, False])
113 | @pytest.mark.parametrize('three_dim', [True, False])
114 | def test_svd(backend, full_matrices, three_dim):
115 |     import numpy.linalg
116 |     backend = set_backend(backend)
117 | 
118 |     if three_dim:
119 |         array = backend.randn(3, 5, 7)
120 |     else:
121 |         array = backend.randn(5, 7)
122 | 
123 |     array = backend.asarray(array, dtype='float64')
124 | 
125 |     U, s, V = backend.svd(array, full_matrices=full_matrices)
126 |     U_ref, s_ref, V_ref = numpy.linalg.svd(backend.to_numpy(array),
127 |                                            full_matrices=full_matrices)
128 | 
129 |     assert_array_almost_equal(s, s_ref)
130 | 
131 |     if not three_dim:
132 |         U_ref = U_ref[None]
133 |         U = U[None]
134 |         V_ref = V_ref[None]
135 |         V = V[None]
136 | 
137 |     # vectors can be flipped in sign
138 |     assert U.shape == U_ref.shape
139 |     assert V.shape == V_ref.shape
140 |     for kk in range(U.shape[0]):
141 |         for ii in range(U.shape[2]):
142 |             try:
143 |                 assert_array_almost_equal(U[kk, :, ii], U_ref[kk, :, ii])
144 |                 assert_array_almost_equal(V[kk, ii, :], V_ref[kk, ii, :])
145 |             except AssertionError:
146 |                 assert_array_almost_equal(U[kk, :, ii], -U_ref[kk, :, ii])
147 |                 assert_array_almost_equal(V[kk, ii, :], -V_ref[kk, ii, :])
148 | 
149 | 
150 | @pytest.mark.parametrize('backend_out', ALL_BACKENDS)
151 | @pytest.mark.parametrize('backend_in', ALL_BACKENDS)
152 | def test_changed_backend_asarray(backend_in, backend_out):
153 |     backend = set_backend(backend_in)
154 |     array_in = backend.asarray([1.2, 2.4, 4.8])
155 |     assert array_in is not None
156 | 
157 |     # change the backend, and cast to the correct class
158 |     backend = set_backend(backend_out)
159 |     array_out = backend.asarray(array_in)
160 |     assert array_out is not None
161 | 
162 |     if backend_in == backend_out or backend_in[:5] == backend_out[:5]:
163 |         # assert the class did not change
164 |         assert array_in.__class__ == array_out.__class__
165 |     else:
166 |         # assert the class did change
167 |         assert array_in.__class__ != array_out.__class__
168 | 
169 |     # assert the new class is correct
170 |     array_out2 = backend.randn(3)
171 |     assert array_out.__class__ == array_out2.__class__
172 | 
173 |     # test check_arrays
174 |     array_out3, array_out4, array_out5 = backend.check_arrays(
175 |         array_in, array_in, [array_in])
176 |     assert array_out.__class__ == array_out3.__class__
177 |     assert array_out.__class__ == array_out4.__class__
178 |     assert array_out.__class__ == array_out5[0].__class__
179 | 
180 | 
181 | @pytest.mark.parametrize('dtype_out', ["float32", "float64"])
182 | @pytest.mark.parametrize('dtype_in', ["float32", "float64"])
183 | @pytest.mark.parametrize('backend_out', ALL_BACKENDS)
184 | @pytest.mark.parametrize('backend_in', ALL_BACKENDS)
185 | def test_asarray_dtype(backend_in, backend_out, dtype_in, dtype_out):
186 |     backend = set_backend(backend_in)
187 |     array_in = backend.asarray([1.2, 2.4, 4.8], dtype=dtype_in)
188 |     assert _dtype_to_str(array_in.dtype) == dtype_in
189 | 
190 |     backend = set_backend(backend_out)
191 |     array_out = backend.asarray(array_in, dtype=dtype_out)
192 |     assert _dtype_to_str(array_out.dtype) == dtype_out
193 | 
194 | 
195 | def test_dtype_to_str_wrong_input():
196 |     assert _dtype_to_str(None) is None
197 | 
198 |     with pytest.raises(NotImplementedError):
199 |         _dtype_to_str(42)
200 | 


--------------------------------------------------------------------------------
/himalaya/kernel_ridge/_predictions.py:
--------------------------------------------------------------------------------
  1 | from ..backend import get_backend
  2 | from ..progress_bar import bar
  3 | from ..utils import _batch_or_skip
  4 | 
  5 | 
  6 | def predict_weighted_kernel_ridge(Ks, dual_weights, deltas, split=False,
  7 |                                   n_targets_batch=None, progress_bar=False,
  8 |                                   intercept=None):
  9 |     """
 10 |     Compute predictions, typically on a test set.
 11 | 
 12 |     Parameters
 13 |     ----------
 14 |     Ks : array of shape (n_kernels, n_samples_test, n_samples_train)
 15 |         Test kernels.
 16 |     dual_weights : array of shape (n_samples_train, n_targets)
 17 |         Dual weights of the kernel ridge model.
 18 |     deltas : array of shape (n_kernels, n_targets) or (n_kernels, )
 19 |         Log kernel weights for each target.
 20 |     split : bool
 21 |         If True, the predictions is split across kernels.
 22 |     n_targets_batch : int or None
 23 |         Size of the batch for computing predictions. Used for memory reasons.
 24 |         If None, uses all n_targets at once.
 25 |     progress_bar : bool
 26 |         If True, display a progress bar over batches and iterations.
 27 |     intercept : None, or array of shape (n_targets,)
 28 |         Intercept added to the predictions. Must be None if split=True.
 29 | 
 30 |     Returns
 31 |     -------
 32 |     Y_hat : array of shape (n_samples_test, n_targets) or \
 33 |             (n_kernels, n_samples_test, n_targets) (if split is True)
 34 |         Predicted values.
 35 |     """
 36 |     backend = get_backend()
 37 | 
 38 |     Ks, dual_weights, deltas, intercept = backend.check_arrays(
 39 |         Ks, dual_weights, deltas, intercept)
 40 |     n_samples = Ks.shape[1]
 41 |     n_targets = dual_weights.shape[1]
 42 |     n_kernels = deltas.shape[0]
 43 | 
 44 |     if deltas.ndim == 1:
 45 |         deltas = deltas[:, None]
 46 | 
 47 |     if split:
 48 |         Y_hat_full = backend.zeros_like(
 49 |             deltas, shape=(n_kernels, n_samples, n_targets))
 50 |     else:
 51 |         Y_hat_full = backend.zeros_like(deltas, shape=(n_samples, n_targets))
 52 | 
 53 |     if not n_targets_batch:
 54 |         n_targets_batch = n_targets
 55 | 
 56 |     for start in bar(list(range(0, n_targets, n_targets_batch)),
 57 |                      title='predict', use_it=progress_bar):
 58 |         batch = slice(start, start + n_targets_batch)
 59 |         dual_weights_batch = dual_weights[:, batch]
 60 |         deltas_batch = _batch_or_skip(deltas, batch, axis=1)
 61 |         chi = backend.matmul(Ks, dual_weights_batch)
 62 |         split_predictions = backend.exp(deltas_batch[:, None, :]) * chi
 63 |         if split:
 64 |             if intercept is not None:
 65 |                 raise ValueError(
 66 |                     "Cannot split the predictions with an intercept.")
 67 |             Y_hat_full[:, :, batch] = split_predictions
 68 |         else:
 69 |             Y_hat_full[:, batch] = split_predictions.sum(0)
 70 |             if intercept is not None:
 71 |                 Y_hat_full[:, batch] += intercept[None, batch]
 72 | 
 73 |     return Y_hat_full
 74 | 
 75 | 
 76 | def predict_and_score_weighted_kernel_ridge(Ks, dual_weights, deltas, Y,
 77 |                                             score_func, split=False,
 78 |                                             n_targets_batch=None,
 79 |                                             progress_bar=False,
 80 |                                             intercept=None):
 81 |     """
 82 |     Compute predictions, typically on a test set, and compute the score.
 83 | 
 84 |     Parameters
 85 |     ----------
 86 |     Ks : array of shape (n_kernels, n_samples_test, n_samples_train)
 87 |         Input kernels.
 88 |     dual_weights : array of shape (n_samples_train, n_targets)
 89 |         Dual weights of the kernel ridge model.
 90 |     deltas : array of shape (n_kernels, n_targets) or (n_kernels, )
 91 |         Log kernel weights for each target.
 92 |     Y : array of shape (n_samples_test, n_targets)
 93 |         Target data.
 94 |     score_func : callable
 95 |         Function used to compute the score of predictions.
 96 |     split : bool
 97 |         If True, the predictions is split across kernels.
 98 |     n_targets_batch : int or None
 99 |         Size of the batch for computing predictions. Used for memory reasons.
100 |         If None, uses all n_targets at once.
101 |     progress_bar : bool
102 |         If True, display a progress bar over batches and iterations.
103 |     intercept : None, or array of shape (n_targets,)
104 |         Intercept added to the predictions. To allow split=True, the intercept
105 |         is not added to the predictions but subtracted to the target Y.
106 | 
107 |     Returns
108 |     -------
109 |     scores : array of shape (n_targets, ) or (n_kernels, n_targets) (if split)
110 |         Prediction score per target.
111 |     """
112 |     backend = get_backend()
113 |     Ks, dual_weights, deltas, Y, intercept = backend.check_arrays(
114 |         Ks, dual_weights, deltas, Y, intercept)
115 | 
116 |     if deltas.ndim == 1:
117 |         deltas = deltas[:, None]
118 | 
119 |     n_kernels, _ = deltas.shape
120 |     _, n_targets = Y.shape
121 |     if split:
122 |         scores = backend.zeros_like(Y, shape=(n_kernels, n_targets))
123 |     else:
124 |         scores = backend.zeros_like(Y, shape=(n_targets))
125 | 
126 |     if n_targets_batch is None:
127 |         n_targets_batch = n_targets
128 |     for start in bar(list(range(0, n_targets, n_targets_batch)),
129 |                      title='predict_and_score', use_it=progress_bar):
130 |         batch = slice(start, start + n_targets_batch)
131 |         predictions = predict_weighted_kernel_ridge(
132 |             Ks, dual_weights[:, batch], _batch_or_skip(deltas, batch, axis=1),
133 |             split=split)
134 |         if intercept is not None:
135 |             score_batch = score_func(Y[:, batch] - intercept[None, batch],
136 |                                      predictions)
137 |         else:
138 |             score_batch = score_func(Y[:, batch], predictions)
139 | 
140 |         if split:
141 |             scores[:, batch] = score_batch
142 |         else:
143 |             scores[batch] = score_batch
144 | 
145 |     return scores
146 | 
147 | 
148 | def primal_weights_kernel_ridge(dual_weights, X_fit):
149 |     """Compute the primal weights for kernel ridge regression.
150 | 
151 |     Parameters
152 |     ----------
153 |     dual_weights : array of shape (n_samples_fit, n_targets)
154 |         Dual coefficient of the kernel ridge regression.
155 |     X_fit : array of shape (n_samples_fit, n_features)
156 |         Training features.
157 | 
158 |     Returns
159 |     -------
160 |     primal_weights : array of shape (n_features, n_targets)
161 |         Primal coefficients of the equivalent ridge regression. The
162 |         coefficients are computed on CPU memory, since they can be large.
163 |     """
164 |     backend = get_backend()
165 |     X_fit = backend.to_cpu(X_fit)
166 |     dual_weights = backend.to_cpu(dual_weights)
167 | 
168 |     return X_fit.T @ dual_weights
169 | 
170 | 
171 | def primal_weights_weighted_kernel_ridge(dual_weights, deltas, Xs_fit):
172 |     """Compute the primal weights for weighted kernel ridge regression.
173 | 
174 |     Parameters
175 |     ----------
176 |     dual_weights : array of shape (n_samples_fit, n_targets)
177 |         Dual coefficient of the kernel ridge regression.
178 |     deltas : array of shape (n_kernels, n_targets)
179 |         Log of kernel weights.
180 |     Xs_fit : list of arrays of shape (n_samples_fit, n_features)
181 |         Training features. The list should have `n_kernels` elements.
182 | 
183 |     Returns
184 |     -------
185 |     primal_weights : list of arrays of shape (n_features, n_targets)
186 |         Primal coefficients of the equivalent ridge regression. The
187 |         coefficients are computed on CPU memory, since they can be large.
188 |     """
189 |     backend = get_backend()
190 |     dual_weights = backend.to_cpu(dual_weights)
191 | 
192 |     primal_weights = []
193 |     for X_fit, deltas_i in zip(Xs_fit, deltas):
194 |         X_fit = backend.to_cpu(X_fit)
195 |         exp_deltas_i = backend.to_cpu(backend.exp(deltas_i))
196 |         primal_weights_i = X_fit.T @ dual_weights * exp_deltas_i[None]
197 |         primal_weights.append(primal_weights_i)
198 | 
199 |     return primal_weights
200 | 


--------------------------------------------------------------------------------
/examples/multiple_kernel_ridge/plot_mkr_0_random_search.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Multiple-kernel ridge
  3 | =====================
  4 | This example demonstrates how to solve multiple kernel ridge regression.
  5 | It uses random search and cross validation to select optimal hyperparameters.
  6 | """
  7 | 
  8 | import numpy as np
  9 | import matplotlib.pyplot as plt
 10 | 
 11 | from himalaya.backend import set_backend
 12 | from himalaya.kernel_ridge import solve_multiple_kernel_ridge_random_search
 13 | from himalaya.kernel_ridge import predict_and_score_weighted_kernel_ridge
 14 | from himalaya.utils import generate_multikernel_dataset
 15 | from himalaya.scoring import r2_score_split
 16 | from himalaya.viz import plot_alphas_diagnostic
 17 | 
 18 | # sphinx_gallery_thumbnail_number = 4
 19 | ###############################################################################
 20 | # In this example, we use the ``cupy`` backend, and fit the model on GPU.
 21 | 
 22 | backend = set_backend("cupy", on_error="warn")
 23 | 
 24 | ###############################################################################
 25 | # Generate a random dataset
 26 | # -------------------------
 27 | #
 28 | # - X_train : array of shape (n_samples_train, n_features)
 29 | # - X_test : array of shape (n_samples_test, n_features)
 30 | # - Y_train : array of shape (n_samples_train, n_targets)
 31 | # - Y_test : array of shape (n_samples_test, n_targets)
 32 | 
 33 | n_kernels = 3
 34 | n_targets = 50
 35 | kernel_weights = np.tile(np.array([0.5, 0.3, 0.2])[None], (n_targets, 1))
 36 | 
 37 | (X_train, X_test, Y_train, Y_test,
 38 |  kernel_weights, n_features_list) = generate_multikernel_dataset(
 39 |      n_kernels=n_kernels, n_targets=n_targets, n_samples_train=600,
 40 |      n_samples_test=300, kernel_weights=kernel_weights, random_state=42)
 41 | 
 42 | feature_names = [f"Feature space {ii}" for ii in range(len(n_features_list))]
 43 | 
 44 | # Find the start and end of each feature space X in Xs
 45 | start_and_end = np.concatenate([[0], np.cumsum(n_features_list)])
 46 | slices = [
 47 |     slice(start, end)
 48 |     for start, end in zip(start_and_end[:-1], start_and_end[1:])
 49 | ]
 50 | Xs_train = [X_train[:, slic] for slic in slices]
 51 | Xs_test = [X_test[:, slic] for slic in slices]
 52 | 
 53 | ###############################################################################
 54 | # Precompute the linear kernels
 55 | # -----------------------------
 56 | # We also cast them to float32.
 57 | 
 58 | Ks_train = backend.stack([X_train @ X_train.T for X_train in Xs_train])
 59 | Ks_train = backend.asarray(Ks_train, dtype=backend.float32)
 60 | Y_train = backend.asarray(Y_train, dtype=backend.float32)
 61 | 
 62 | Ks_test = backend.stack(
 63 |     [X_test @ X_train.T for X_train, X_test in zip(Xs_train, Xs_test)])
 64 | Ks_test = backend.asarray(Ks_test, dtype=backend.float32)
 65 | Y_test = backend.asarray(Y_test, dtype=backend.float32)
 66 | 
 67 | ###############################################################################
 68 | # Run the solver, using random search
 69 | # -----------------------------------
 70 | # This method should work fine for
 71 | # small number of kernels (< 20). The larger the number of kernels, the larger
 72 | # we need to sample the hyperparameter space (i.e. increasing ``n_iter``).
 73 | 
 74 | ###############################################################################
 75 | # Here we use 100 iterations to have a reasonably fast example (~40 sec).
 76 | # To have a better convergence, we probably need more iterations.
 77 | # Note that there is currently no stopping criterion in this method.
 78 | n_iter = 100
 79 | 
 80 | ###############################################################################
 81 | # Grid of regularization parameters.
 82 | alphas = np.logspace(-10, 10, 21)
 83 | 
 84 | ###############################################################################
 85 | # Batch parameters are used to reduce the necessary GPU memory. A larger value
 86 | # will be a bit faster, but the solver might crash if it runs out of memory.
 87 | # Optimal values depend on the size of your dataset.
 88 | n_targets_batch = 1000
 89 | n_alphas_batch = 20
 90 | 
 91 | ###############################################################################
 92 | # If ``return_weights == "dual"``, the solver will use more memory.
 93 | # To mitigate this, you can reduce ``n_targets_batch`` in the refit
 94 | # using ```n_targets_batch_refit``.
 95 | # If you don't need the dual weights, use ``return_weights = None``.
 96 | return_weights = 'dual'
 97 | n_targets_batch_refit = 200
 98 | 
 99 | ###############################################################################
100 | # Run the solver. For each iteration, it will:
101 | #
102 | # - sample kernel weights from a Dirichlet distribution
103 | # - fit (n_splits * n_alphas * n_targets) ridge models
104 | # - compute the scores on the validation set of each split
105 | # - average the scores over splits
106 | # - take the maximum over alphas
107 | # - (only if you ask for the ridge weights) refit using the best alphas per
108 | #   target and the entire dataset
109 | # - return for each target the log kernel weights leading to the best CV score
110 | #   (and the best weights if necessary)
111 | results = solve_multiple_kernel_ridge_random_search(
112 |     Ks=Ks_train,
113 |     Y=Y_train,
114 |     n_iter=n_iter,
115 |     alphas=alphas,
116 |     n_targets_batch=n_targets_batch,
117 |     return_weights=return_weights,
118 |     n_alphas_batch=n_alphas_batch,
119 |     n_targets_batch_refit=n_targets_batch_refit,
120 |     jitter_alphas=True,
121 | )
122 | 
123 | ###############################################################################
124 | # As we used the ``cupy`` backend, the results are ``cupy`` arrays, which are
125 | # on GPU. Here, we cast the results back to CPU, and to ``numpy`` arrays.
126 | deltas = backend.to_numpy(results[0])
127 | dual_weights = backend.to_numpy(results[1])
128 | cv_scores = backend.to_numpy(results[2])
129 | 
130 | ###############################################################################
131 | # Plot the convergence curve
132 | # --------------------------
133 | #
134 | # ``cv_scores`` gives the scores for each sampled kernel weights.
135 | # The convergence curve is thus the current maximum for each target.
136 | 
137 | current_max = np.maximum.accumulate(cv_scores, axis=0)
138 | mean_current_max = np.mean(current_max, axis=1)
139 | x_array = np.arange(1, len(mean_current_max) + 1)
140 | plt.plot(x_array, mean_current_max, '-o')
141 | plt.grid("on")
142 | plt.xlabel("Number of kernel weights sampled")
143 | plt.ylabel("L2 negative loss (higher is better)")
144 | plt.title("Convergence curve, averaged over targets")
145 | plt.tight_layout()
146 | plt.show()
147 | 
148 | ###############################################################################
149 | # Plot the optimal alphas selected by the solver
150 | # ----------------------------------------------
151 | #
152 | # This plot is helpful to refine the alpha grid if the range is too small or
153 | # too large.
154 | 
155 | best_alphas = 1. / np.sum(np.exp(deltas), axis=0)
156 | plot_alphas_diagnostic(best_alphas, alphas)
157 | plt.title("Best alphas selected by cross-validation")
158 | plt.show()
159 | 
160 | ###############################################################################
161 | # Compute the predictions on the test set
162 | # ---------------------------------------
163 | # (requires the dual weights)
164 | 
165 | split = False
166 | scores = predict_and_score_weighted_kernel_ridge(
167 |     Ks_test, dual_weights, deltas, Y_test, split=split,
168 |     n_targets_batch=n_targets_batch, score_func=r2_score_split)
169 | scores = backend.to_numpy(scores)
170 | 
171 | plt.hist(scores, np.linspace(0, 1, 50))
172 | plt.xlabel(r"$R^2$ generalization score")
173 | plt.title("Histogram over targets")
174 | plt.show()
175 | 
176 | ###############################################################################
177 | # Compute the split predictions on the test set
178 | # ---------------------------------------------
179 | # (requires the dual weights)
180 | #
181 | # Here we apply the dual weights on each kernel separately
182 | # (``exp(deltas[i]) * kernel[i]``), and we compute the R\ :sup:`2` scores
183 | # (corrected for correlations) of each prediction.
184 | 
185 | split = True
186 | scores_split = predict_and_score_weighted_kernel_ridge(
187 |     Ks_test, dual_weights, deltas, Y_test, split=split,
188 |     n_targets_batch=n_targets_batch, score_func=r2_score_split)
189 | scores_split = backend.to_numpy(scores_split)
190 | 
191 | for kk, score in enumerate(scores_split):
192 |     plt.hist(score, np.linspace(0, np.max(scores_split), 50), alpha=0.7,
193 |              label="kernel %d" % kk)
194 | plt.title(r"Histogram of $R^2$ generalization score split between kernels")
195 | plt.legend()
196 | plt.show()
197 | 


--------------------------------------------------------------------------------
/himalaya/kernel_ridge/tests/test_random_search_kernel.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | import numpy as np
  4 | import sklearn.linear_model
  5 | import sklearn.model_selection
  6 | import scipy.linalg
  7 | 
  8 | from himalaya.backend import set_backend
  9 | from himalaya.backend import ALL_BACKENDS
 10 | from himalaya.utils import assert_array_almost_equal
 11 | from himalaya.scoring import r2_score
 12 | 
 13 | from himalaya.kernel_ridge import solve_multiple_kernel_ridge_random_search
 14 | 
 15 | 
 16 | def _create_dataset(backend, n_targets=4):
 17 |     n_featuress = (100, 200)
 18 |     n_samples = 80
 19 |     n_gammas = 3
 20 | 
 21 |     Xs = [
 22 |         backend.asarray(backend.randn(n_samples, n_features), backend.float64)
 23 |         for n_features in n_featuress
 24 |     ]
 25 |     Ks = backend.stack([X @ X.T for X in Xs])
 26 | 
 27 |     ws = [
 28 |         backend.asarray(backend.randn(n_features, n_targets), backend.float64)
 29 |         for n_features in n_featuress
 30 |     ]
 31 |     Ys = backend.stack([X @ w for X, w in zip(Xs, ws)])
 32 |     Y = Ys.sum(0)
 33 | 
 34 |     gammas = backend.asarray(backend.rand(n_gammas, Ks.shape[0]),
 35 |                              backend.float64)
 36 |     gammas /= gammas.sum(1)[:, None]
 37 | 
 38 |     return Ks, Y, gammas, Xs
 39 | 
 40 | 
 41 | @pytest.mark.parametrize('local_alpha', [True, False])
 42 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 43 | def test_solve_multiple_kernel_ridge_random_search_local_alpha(
 44 |         backend, local_alpha):
 45 |     _test_solve_multiple_kernel_ridge_random_search(backend=backend,
 46 |                                                     local_alpha=local_alpha)
 47 | 
 48 | 
 49 | @pytest.mark.parametrize('n_targets_batch', [None, 3])
 50 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 51 | def test_solve_multiple_kernel_ridge_random_search_n_targets_batch(
 52 |         backend, n_targets_batch):
 53 |     _test_solve_multiple_kernel_ridge_random_search(
 54 |         backend=backend, n_targets_batch=n_targets_batch)
 55 | 
 56 | 
 57 | @pytest.mark.parametrize('n_alphas_batch', [None, 2])
 58 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 59 | def test_solve_multiple_kernel_ridge_random_search_n_alphas_batch(
 60 |         backend, n_alphas_batch):
 61 |     _test_solve_multiple_kernel_ridge_random_search(
 62 |         backend=backend, n_alphas_batch=n_alphas_batch)
 63 | 
 64 | 
 65 | @pytest.mark.parametrize('return_weights', ['primal', 'dual'])
 66 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 67 | def test_solve_multiple_kernel_ridge_random_search_return_weights(
 68 |         backend, return_weights):
 69 |     _test_solve_multiple_kernel_ridge_random_search(
 70 |         backend=backend, return_weights=return_weights)
 71 | 
 72 | 
 73 | @pytest.mark.parametrize('diagonalize_method', ['eigh', 'svd'])
 74 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
 75 | def test_solve_multiple_kernel_ridge_random_search_diagonalize_method(
 76 |         backend, diagonalize_method):
 77 |     _test_solve_multiple_kernel_ridge_random_search(
 78 |         backend=backend, diagonalize_method=diagonalize_method)
 79 | 
 80 | 
 81 | def _test_solve_multiple_kernel_ridge_random_search(
 82 |         backend, n_targets_batch=None, n_alphas_batch=None,
 83 |         return_weights="dual", diagonalize_method="eigh", local_alpha=True):
 84 |     backend = set_backend(backend)
 85 | 
 86 |     Ks, Y, gammas, Xs = _create_dataset(backend)
 87 |     alphas = backend.asarray_like(backend.logspace(-3, 5, 9), Ks)
 88 |     n_targets = Y.shape[1]
 89 |     cv = sklearn.model_selection.check_cv(10)
 90 | 
 91 |     ############
 92 |     # run solver
 93 |     results = solve_multiple_kernel_ridge_random_search(
 94 |         Ks, Y, n_iter=gammas, alphas=alphas, score_func=r2_score, cv=cv,
 95 |         n_targets_batch=n_targets_batch, Xs=Xs, progress_bar=False,
 96 |         return_weights=return_weights, n_alphas_batch=n_alphas_batch,
 97 |         diagonalize_method=diagonalize_method, local_alpha=local_alpha)
 98 |     best_deltas, refit_weights, cv_scores = results
 99 | 
100 |     #########################################
101 |     # compare with sklearn.linear_model.Ridge
102 |     if local_alpha:  # only compare when each target optimizes alpha
103 |         test_scores = []
104 |         for gamma in backend.sqrt(gammas):
105 |             X = backend.concatenate([x * g for x, g in zip(Xs, gamma)], 1)
106 |             for train, test in cv.split(X):
107 |                 for alpha in alphas:
108 |                     model = sklearn.linear_model.Ridge(
109 |                         alpha=backend.to_numpy(alpha), fit_intercept=False)
110 |                     model = model.fit(backend.to_numpy(X[train]),
111 |                                       backend.to_numpy(Y[train]))
112 |                     predictions = backend.asarray_like(
113 |                         model.predict(backend.to_numpy(X[test])), Y)
114 |                     test_scores.append(r2_score(Y[test], predictions))
115 | 
116 |         test_scores = backend.stack(test_scores)
117 |         test_scores = test_scores.reshape(len(gammas), cv.get_n_splits(),
118 |                                           len(alphas), n_targets)
119 |         test_scores_mean = backend.max(test_scores.mean(1), 1)
120 |         assert_array_almost_equal(cv_scores, test_scores_mean, decimal=5)
121 | 
122 |     ######################
123 |     # test refited_weights
124 |     for tt in range(n_targets):
125 |         gamma = backend.exp(best_deltas[:, tt])
126 |         alpha = 1.0
127 | 
128 |         if return_weights == 'primal':
129 |             # compare primal weights with sklearn.linear_model.Ridge
130 |             X = backend.concatenate(
131 |                 [X * backend.sqrt(g) for X, g in zip(Xs, gamma)], 1)
132 |             model = sklearn.linear_model.Ridge(fit_intercept=False,
133 |                                                alpha=backend.to_numpy(alpha))
134 |             w1 = model.fit(backend.to_numpy(X),
135 |                            backend.to_numpy(Y[:, tt])).coef_
136 |             w1 = np.split(w1, np.cumsum([X.shape[1] for X in Xs][:-1]), axis=0)
137 |             w1 = [backend.asarray(w) for w in w1]
138 |             w1_scaled = backend.concatenate(
139 |                 [w * backend.sqrt(g) for w, g, in zip(w1, gamma)])
140 |             assert_array_almost_equal(w1_scaled, refit_weights[:, tt],
141 |                                       decimal=5)
142 | 
143 |         elif return_weights == 'dual':
144 |             # compare dual weights with scipy.linalg.solve
145 |             Ks_64 = backend.asarray(Ks, dtype=backend.float64)
146 |             gamma_64 = backend.asarray(gamma, dtype=backend.float64)
147 |             K = backend.matmul(Ks_64.T, gamma_64).T
148 |             reg = backend.asarray_like(np.eye(K.shape[0]), K) * alpha
149 |             Y_64 = backend.asarray(Y, dtype=backend.float64)
150 |             c1 = scipy.linalg.solve(backend.to_numpy(K + reg),
151 |                                     backend.to_numpy(Y_64[:, tt]))
152 |             c1 = backend.asarray_like(c1, K)
153 |             assert_array_almost_equal(c1, refit_weights[:, tt], decimal=5)
154 | 
155 | 
156 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
157 | def test_solve_multiple_kernel_ridge_random_search_single_alpha_numpy(backend):
158 |     backend = set_backend(backend)
159 |     # just a smoke test, so make it minimal
160 |     Ks, Y, gammas, Xs = _create_dataset(backend)
161 |     alphas = 1.0
162 |     # make Y a numpy array
163 |     Y = backend.to_numpy(Y)
164 |     _ = solve_multiple_kernel_ridge_random_search(
165 |         Ks, Y, n_iter=gammas, alphas=alphas
166 |     )
167 | 
168 | 
169 | @pytest.mark.parametrize('backend', ALL_BACKENDS)
170 | @pytest.mark.parametrize('n_kernels', [1, 2])
171 | def test_solve_multiple_kernel_ridge_random_search_global_alpha(
172 |         backend, n_kernels):
173 |     backend = set_backend(backend)
174 |     # add more targets to make sure we get some variability
175 |     Ks, Y, gammas, Xs = _create_dataset(backend, n_targets=20)
176 |     alphas = backend.asarray_like(backend.logspace(-3, 5, 9), Ks)
177 |     cv = sklearn.model_selection.check_cv(5)
178 | 
179 |     deltas, *_, best_alphas = solve_multiple_kernel_ridge_random_search(
180 |         Ks[:n_kernels],
181 |         Y,
182 |         n_iter=50,
183 |         progress_bar=False,
184 |         alphas=alphas,
185 |         cv=cv,
186 |         local_alpha=False,
187 |         return_alphas=True
188 |     )
189 |     # test that we return a single combination of deltas
190 |     deltas = backend.to_numpy(deltas)
191 |     if deltas.ndim == 1:
192 |         assert np.allclose(deltas[0], deltas)
193 |     else:
194 |         for dd in deltas:
195 |             assert np.allclose(dd[0], dd)
196 | 
197 |     # test that we return a single alpha
198 |     best_alphas = backend.to_numpy(best_alphas)
199 |     assert np.allclose(best_alphas[0], best_alphas)
200 | 


--------------------------------------------------------------------------------