├── tests
    ├── __init__.py
    ├── requirements.txt
    ├── test_utilities.py
    └── test_transformer.py
├── releasenotes
    └── notes
    │   ├── initial-40d81acd45b0cca8.yaml
    │   ├── problem-submission-label-68c3670bd04b5c25.yaml
    │   ├── SelectFromQuadraticModel-b051274f784d6fbf.yaml
    │   ├── fixed-column-f05e10ff94adfaef.yaml
    │   ├── Update-the-transformers.py-file-to-work-with-the-NL-solver-0cf03a2e1b7a33d6.yaml
    │   ├── Updated-README-to-NL-e02d2ae2ae9d6705.yaml
    │   ├── test_transformer.py-7793a6957af9735a.yaml
    │   └── correlation-8eaf77b85eeed9ba.yaml
├── requirements.txt
├── setup.py
├── dwave
    ├── __init__.py
    └── plugins
    │   ├── __init__.py
    │   └── sklearn
    │       ├── __init__.py
    │       ├── utilities.py
    │       └── transformers.py
├── pyproject.toml
├── .gitignore
├── .circleci
    └── config.yml
├── README.md
└── LICENSE


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/requirements.txt:
--------------------------------------------------------------------------------
1 | coverage
2 | codecov
3 | 
4 | parameterized==0.9.0


--------------------------------------------------------------------------------
/releasenotes/notes/initial-40d81acd45b0cca8.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | prelude: >
3 |     Initial release of ``dwave-scikit-learn-plugin``.
4 | 


--------------------------------------------------------------------------------
/releasenotes/notes/problem-submission-label-68c3670bd04b5c25.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | features:
3 |   - Apply a label to the problems submitted to Leap.
4 | 


--------------------------------------------------------------------------------
/releasenotes/notes/SelectFromQuadraticModel-b051274f784d6fbf.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | features:
3 |   - Add ``SelectFromQuadraticModel`` class for feature selection.
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | dimod==0.12.20
2 | dwave-optimization==0.6.2
3 | dwave-system==1.32.0
4 | 
5 | numpy==2.0.2
6 | scikit-learn==1.6.1
7 | 
8 | reno==3.5.0
9 | 


--------------------------------------------------------------------------------
/releasenotes/notes/fixed-column-f05e10ff94adfaef.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | fixes:
3 |   - Fix ``SelectFromQuadraticModel`` to handle the case when ``X`` has a column with all equal values.
4 | 


--------------------------------------------------------------------------------
/releasenotes/notes/Update-the-transformers.py-file-to-work-with-the-NL-solver-0cf03a2e1b7a33d6.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | upgrade:
3 |   - |
4 |     Update the entire file to work with the NL solver
5 | 


--------------------------------------------------------------------------------
/releasenotes/notes/Updated-README-to-NL-e02d2ae2ae9d6705.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | other:
3 |   - Updated README.md to include usage instructions and explanations for the new nonlinear solver backend.
4 | 


--------------------------------------------------------------------------------
/releasenotes/notes/test_transformer.py-7793a6957af9735a.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | issues:
 3 |   - |
 4 |     ResourceWarnings occur on some tests. Some tests run several times, but RessourceWarnings only occur on some of the runs. 
 5 |     "/usr/lib/python3.11/concurrent/futures/thread.py:85: ResourceWarning: unclosed file <_io.BufferedRandom name=8>
 6 |     del work_item
 7 |     ResourceWarning: Enable tracemalloc to get the object allocation traceback"
 8 | upgrade:
 9 |   - |
10 |     Updated to test the nonlinear version of transformers.py
11 | 
12 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 D-Wave Systems Inc.
 2 | #
 3 | #    Licensed under the Apache License, Version 2.0 (the "License");
 4 | #    you may not use this file except in compliance with the License.
 5 | #    You may obtain a copy of the License at
 6 | #
 7 | #        http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #    Unless required by applicable law or agreed to in writing, software
10 | #    distributed under the License is distributed on an "AS IS" BASIS,
11 | #    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #    See the License for the specific language governing permissions and
13 | #    limitations under the License.
14 | 
15 | from setuptools import setup
16 | 
17 | setup()
18 | 


--------------------------------------------------------------------------------
/dwave/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 D-Wave Systems Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import pkgutil
16 | __path__ = pkgutil.extend_path(__path__, __name__)
17 | 


--------------------------------------------------------------------------------
/dwave/plugins/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 D-Wave Systems Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import pkgutil
16 | __path__ = pkgutil.extend_path(__path__, __name__)
17 | 


--------------------------------------------------------------------------------
/dwave/plugins/sklearn/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 D-Wave Systems Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | __version__ = '0.2.0'
16 | 
17 | from dwave.plugins.sklearn.transformers import *
18 | 


--------------------------------------------------------------------------------
/releasenotes/notes/correlation-8eaf77b85eeed9ba.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | features:
 3 |   - |
 4 |     Add ``dwave.plugins.sklearn.utilities.cov`` function. A drop-in replacement
 5 |     for ``numpy.cov`` that is modified to avoid unnecessary memory usage when
 6 |     working with ``numpy.memmap`` arrays.
 7 |   - |
 8 |     Add ``dwave.plugins.sklearn.utilities.corrcoef`` function. A drop-in replacement
 9 |     for ``numpy.corrcoef`` that is modified to avoid unnecessary memory usage when
10 |     working with ``numpy.memmap`` arrays.
11 |   - |
12 |     Add ``dwave.plugins.sklearn.utilities.dot_2d`` function. A drop-in replacement
13 |     for ``numpy.dot`` for 2d arrays that is modified to avoid unnecessary memory usage when
14 |     working with ``numpy.memmap`` arrays.
15 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = [
 3 |     "setuptools>=77.0.3",   # PEP 639
 4 | ]
 5 | build-backend = "setuptools.build_meta"
 6 | 
 7 | [project]
 8 | name = "dwave-scikit-learn-plugin"
 9 | dynamic = ["version"]
10 | authors = [
11 |     {name = "D-Wave Inc.", email = "tools@dwavesys.com"},
12 | ]
13 | description = "A plugin to scikit-learn for quantum-classical hybrid solving."
14 | license = "Apache-2.0"
15 | license-files = ["LICENSE"]
16 | classifiers = [
17 |     "Operating System :: OS Independent",
18 |     "Programming Language :: Python",
19 |     "Programming Language :: Python :: 3",
20 | ]
21 | requires-python = ">= 3.9"
22 | dependencies = [
23 |     "dimod>=0.12.20",
24 |     "dwave-optimization>=0.6.2",
25 |     "dwave-system>=1.32.0",
26 |     "numpy>=2.0.2",
27 |     "scikit-learn>=1.6.1",
28 | ]
29 | 
30 | [project.readme]
31 | file = "README.md"
32 | content-type = "text/markdown"
33 | 
34 | [project.urls]
35 | Homepage = "https://github.com/dwavesystems/dwave-scikit-learn-plugin"
36 | Download = "https://github.com/dwavesystems/dwave-scikit-learn-plugin/releases"
37 | 
38 | [tool.setuptools.packages.find]
39 | include = ["dwave.*"]
40 | 
41 | [tool.setuptools.dynamic]
42 | version = {attr = "dwave.plugins.sklearn.__version__"}
43 | 
44 | [tool.coverage.run]
45 | omit = ["tests/*"]
46 | source = ["dwave/plugins/sklearn"]
47 | 
48 | [tool.coverage.report]
49 | include_namespace_packages = true
50 | exclude_lines = [
51 |     "pragma: no cover",
52 |     "raise NotImplementedError",
53 |     "if __name__ == .__main__.:",
54 | ]
55 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # vscode
  2 | .vscode/
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | pip-wheel-metadata/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | db.sqlite3-journal
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # IPython
 84 | profile_default/
 85 | ipython_config.py
 86 | 
 87 | # pyenv
 88 | .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 


--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
  1 | version: 2.1
  2 | 
  3 | orbs:
  4 |   ocean: dwave/ocean@1
  5 |   windows: circleci/windows@5.0
  6 | 
  7 | environment:
  8 |   PIP_PROGRESS_BAR: 'off'
  9 | 
 10 | jobs:
 11 |   test-linux:
 12 |     parameters:
 13 |       python-version:
 14 |         type: string
 15 | 
 16 |     docker:
 17 |       - image: python:<< parameters.python-version >>
 18 | 
 19 |     steps:
 20 |       - checkout
 21 | 
 22 |       - ocean/pip-install:
 23 |           requirements: tests/requirements.txt
 24 |           cache: true
 25 | 
 26 |       - ocean/pip-install:
 27 |           requirements: requirements.txt
 28 |           packages: .
 29 |           cache: false
 30 | 
 31 |       - ocean/coverage-run-unittest
 32 | 
 33 |   test-macos:
 34 |     parameters:
 35 |       python-version:
 36 |         type: string
 37 | 
 38 |     executor:
 39 |       name: ocean/macos
 40 |       xcode: "16.2.0"
 41 | 
 42 |     steps:
 43 |       - checkout
 44 | 
 45 |       - ocean/brew-install-pyenv:
 46 |           update-homebrew: true
 47 | 
 48 |       - ocean/pyenv-install-python:
 49 |           python-version: << parameters.python-version >>
 50 |           cache: true
 51 | 
 52 |       - ocean/pip-install:
 53 |           requirements: tests/requirements.txt
 54 |           cache: true
 55 | 
 56 |       - ocean/pip-install:
 57 |           requirements: requirements.txt
 58 |           packages: .
 59 |           cache: false
 60 | 
 61 |       - ocean/coverage-run-unittest:
 62 |           upload-coverage: true
 63 | 
 64 |   test-windows:
 65 |     parameters:
 66 |       python-version:
 67 |         type: string
 68 | 
 69 |     executor:
 70 |       name: windows/default
 71 | 
 72 |     steps:
 73 |       - checkout
 74 | 
 75 |       - ocean/nuget-install-python:
 76 |           python-version: << parameters.python-version >>
 77 |           cache: true
 78 | 
 79 |       - ocean/pip-install:
 80 |           requirements: tests/requirements.txt
 81 |           cache: false
 82 | 
 83 |       - ocean/pip-install:
 84 |           requirements: requirements.txt
 85 |           packages: .
 86 |           cache: false
 87 | 
 88 |       - ocean/coverage-run-unittest
 89 | 
 90 |   deploy:
 91 |     docker:
 92 |       - image: python:3.12
 93 | 
 94 |     steps:
 95 |       - checkout
 96 | 
 97 |       - run:
 98 |           name: create virtualenv
 99 |           command: |
100 |             python -m venv env
101 | 
102 |       - run:
103 |           name: build sdist and bdist
104 |           command: |
105 |             . env/bin/activate
106 |             pip install -U pip setuptools wheel
107 |             python setup.py sdist
108 |             python setup.py bdist_wheel
109 | 
110 |       - run:
111 |           name: upload
112 |           command: |
113 |             . env/bin/activate
114 |             pip install twine
115 |             twine check dist/*
116 |             twine upload -u "$PYPI_USERNAME" -p "$PYPI_PASSWORD" --skip-existing ./dist/*
117 | 
118 | workflows:
119 |   version: 2
120 | 
121 |   test:
122 |     jobs:
123 |       - test-linux:
124 |           matrix:
125 |             parameters:
126 |               python-version: &python-versions ["3.9.21", "3.10.16", "3.11.11", "3.12.8", "3.13.1"]
127 |       - test-macos:
128 |           matrix:
129 |             parameters:
130 |               python-version: *python-versions
131 |       - test-windows:
132 |           matrix:
133 |             parameters:
134 |               # note: limit to versions available via nuget
135 |               python-version: &python-versions-windows ["3.9.13", "3.10.11", "3.11.9", "3.12.8", "3.13.1"]
136 | 
137 |   deploy:
138 |     jobs:
139 |       - deploy:
140 |           filters: &on-tag-push
141 |             tags:
142 |               only: /^[0-9]+(\.[0-9]+)*((\.dev|rc)([0-9]+)?)?$/
143 |             branches:
144 |               ignore: /.*/
145 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![PyPI](https://img.shields.io/pypi/v/dwave-scikit-learn-plugin.svg)](https://pypi.python.org/pypi/dwave-scikit-learn-plugin)
  2 | [![CircleCI](https://dl.circleci.com/status-badge/img/gh/dwavesystems/dwave-scikit-learn-plugin/tree/main.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/dwavesystems/dwave-scikit-learn-plugin)
  3 | 
  4 | # D-Wave `scikit-learn` Plugin
  5 | 
  6 | This package provides a [scikit-learn](https://scikit-learn.org/) transformer for 
  7 | [feature selection](https://en.wikipedia.org/wiki/Feature_selection) using a
  8 | quantum-classical [hybrid solver](https://docs.ocean.dwavesys.com/en/stable/concepts/hybrid.html).
  9 | 
 10 | This plugin makes use of a Leap™ quantum-classical hybrid solver. Developers can get started by
 11 | [signing up](https://cloud.dwavesys.com/leap/signup/) for the Leap quantum cloud service for free.
 12 | Those seeking a more collaborative approach and assistance with building a production application can
 13 | reach out to D-Wave [directly](https://www.dwavesys.com/solutions-and-products/professional-services/) and also explore the feature selection [offering](https://aws.amazon.com/marketplace/pp/prodview-bsrc3yuwgjbo4) in AWS Marketplace.
 14 | 
 15 | The package's main class, `SelectFromQuadraticModel`, can be used in any existing `sklearn` pipeline.
 16 | For an introduction to hybrid methods for feature selection, see the [Feature Selection for CQM](https://github.com/dwave-examples/feature-selection-cqm).
 17 | 
 18 | ## Examples
 19 | 
 20 | ### Basic Usage
 21 | 
 22 | A minimal example of using the plugin to select 20 of 30 features of an `sklearn` dataset: 
 23 | 
 24 | ```python
 25 | >>> from sklearn.datasets import load_breast_cancer
 26 | >>> from dwave.plugins.sklearn import SelectFromQuadraticModel
 27 | ... 
 28 | >>> X, y = load_breast_cancer(return_X_y=True)
 29 | >>> X.shape
 30 | (569, 30)
 31 | >>> # solver can also be equal to "cqm"
 32 | >>> X_new = SelectFromQuadraticModel(num_features=20, solver="nl").fit_transform(X, y)
 33 | >>> X_new.shape
 34 | (569, 20)
 35 | ```
 36 | 
 37 | For large problems, the default runtime may be insufficient. You can use the CQM solver's [`time_limit`](https://docs.dwavequantum.com/en/latest/industrial_optimization/solver_cqm_parameters.html#time-limit) or Nonlinear (NL) solver's
 38 | [`time_limit`](https://docs.dwavequantum.com/en/latest/industrial_optimization/solver_nl_parameters.html#time-limit)
 39 | method to find the minimum accepted runtime for your problem; alternatively, simply submit as above 
 40 | and check the returned error message for the required runtime. 
 41 | 
 42 | The feature selector can be re-instantiated with a longer time limit.
 43 | 
 44 | ```python
 45 | >>> # solver can also be equal to "nl"
 46 | >>> X_new = SelectFromQuadraticModel(num_features=20, time_limit=200, solver="cqm").fit_transform(X, y)
 47 | ```
 48 | 
 49 | ### Tuning
 50 | 
 51 | You can use `SelectFromQuadraticModel` with scikit-learn's
 52 | [hyper-parameter optimizers](https://scikit-learn.org/stable/modules/classes.html#hyper-parameter-optimizers).
 53 | 
 54 | For example, the number of features can be tuned using a grid search. **Please note that this will
 55 | submit many problems to the hybrid solver.**
 56 | 
 57 | ```python
 58 | >>> import numpy as np
 59 | ...
 60 | >>> from sklearn.datasets import load_breast_cancer
 61 | >>> from sklearn.ensemble import RandomForestClassifier
 62 | >>> from sklearn.model_selection import GridSearchCV
 63 | >>> from sklearn.pipeline import Pipeline
 64 | >>> from dwave.plugins.sklearn import SelectFromQuadraticModel
 65 | ...
 66 | >>> X, y = load_breast_cancer(return_X_y=True)
 67 | ...
 68 | >>> num_features = X.shape[1]
 69 | >>> searchspace = np.linspace(1, num_features, num=5, dtype=int, endpoint=True)
 70 | ...
 71 | >>> # solver can also be equal to "cqm"
 72 | >>> pipe = Pipeline([
 73 | >>>   ('feature_selection', SelectFromQuadraticModel(solver="nl")),
 74 | >>>   ('classification', RandomForestClassifier())
 75 | >>> ])
 76 | ...
 77 | >>> clf = GridSearchCV(pipe, param_grid=dict(feature_selection__num_features=searchspace))
 78 | >>> search = clf.fit(X, y)
 79 | >>> print(search.best_params_)
 80 | {'feature_selection__num_features': 22}
 81 | ```
 82 | 
 83 | ## Installation
 84 | 
 85 | To install the core package:
 86 | 
 87 | ```bash
 88 | pip install dwave-scikit-learn-plugin
 89 | ```
 90 | 
 91 | ## License
 92 | 
 93 | Released under the Apache License 2.0
 94 | 
 95 | ## Contributing
 96 | 
 97 | Ocean's [contributing guide](https://docs.ocean.dwavesys.com/en/stable/contributing.html)
 98 | has guidelines for contributing to Ocean packages.
 99 | 
100 | ### Release Notes
101 | 
102 | **dwave-scikit-learn-plugin** makes use of [reno](https://docs.openstack.org/reno/) to manage its
103 | release notes.
104 | 
105 | When making a contribution to **dwave-scikit-learn-plugin** that will affect users, create a new
106 | release note file by running
107 | 
108 | ```bash
109 | reno new your-short-descriptor-here
110 | ```
111 | 
112 | You can then edit the file created under ``releasenotes/notes/``.
113 | Remove any sections not relevant to your changes.
114 | Commit the file along with your changes.
115 | 


--------------------------------------------------------------------------------
/tests/test_utilities.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 D-Wave Systems Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #   http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # Some tests are adapted from NumPy under the following license.
 16 | 
 17 | # Copyright (c) 2005-2022, NumPy Developers.
 18 | # All rights reserved.
 19 | #
 20 | # Redistribution and use in source and binary forms, with or without
 21 | # modification, are permitted provided that the following conditions are
 22 | # met:
 23 | #
 24 | #     * Redistributions of source code must retain the above copyright
 25 | #        notice, this list of conditions and the following disclaimer.
 26 | #
 27 | #     * Redistributions in binary form must reproduce the above
 28 | #        copyright notice, this list of conditions and the following
 29 | #        disclaimer in the documentation and/or other materials provided
 30 | #        with the distribution.
 31 | #
 32 | #     * Neither the name of the NumPy Developers nor the names of any
 33 | #        contributors may be used to endorse or promote products derived
 34 | #        from this software without specific prior written permission.
 35 | #
 36 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 37 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 38 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 39 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 40 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 41 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 42 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 43 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 44 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 45 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 46 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 47 | 
 48 | import os.path
 49 | import tempfile
 50 | import unittest
 51 | 
 52 | import numpy as np
 53 | 
 54 | from dwave.plugins.sklearn.utilities import corrcoef, cov, dot_2d
 55 | 
 56 | 
 57 | class TestCorrCoef(unittest.TestCase):
 58 |     def test_agreement(self):
 59 |         rng = np.random.default_rng(42)
 60 |         X = rng.uniform(size=(100, 100))
 61 |         for rowvar in (True, False):
 62 |             with self.subTest(rowvar=rowvar):
 63 |                 np.testing.assert_array_equal(
 64 |                     corrcoef(X, rowvar=rowvar), np.corrcoef(X, rowvar=rowvar))
 65 | 
 66 |     def test_memmap(self):
 67 |         # Smoketest for memmap.
 68 |         # here isn't really a nice way to test memory usage but it's useful to
 69 |         # have this test present for manual testing
 70 |         rng = np.random.default_rng(42)
 71 | 
 72 |         size = (1_000, 100)
 73 |         # size = (25_000, 100_000)  # The max size we want to support
 74 | 
 75 |         with tempfile.TemporaryFile() as fX:
 76 |             with tempfile.NamedTemporaryFile() as fout:
 77 |                 X = np.memmap(fX, "float64", mode="w+", shape=size)
 78 |                 X[:, :10] = rng.uniform(size=(X.shape[0], 10))  # so we don't get stddev = 0
 79 |                 X[:, 10:] = 1
 80 |                 out = np.memmap(fout, "float64", mode="w+", shape=(X.shape[0], X.shape[0]))
 81 | 
 82 |                 corrcoef(X, rowvar=True, out=out, copy=False)
 83 | 
 84 |     # the following tests are adapted from NumPy
 85 | 
 86 |     def test_non_array(self):
 87 |         np.testing.assert_almost_equal(
 88 |             corrcoef([[0, 1, 0], [1, 0, 1]]), [[1., -1.], [-1.,  1.]])
 89 | 
 90 |     def test_simple(self):
 91 |         A = np.array(
 92 |             [[0.15391142, 0.18045767, 0.14197213],
 93 |              [0.70461506, 0.96474128, 0.27906989],
 94 |              [0.9297531, 0.32296769, 0.19267156]])
 95 |         res1 = np.array(
 96 |             [[1., 0.9379533, -0.04931983],
 97 |              [0.9379533, 1., 0.30007991],
 98 |              [-0.04931983, 0.30007991, 1.]])
 99 |         tgt1 = corrcoef(A)
100 |         np.testing.assert_almost_equal(tgt1, res1)
101 |         self.assertTrue(np.all(np.abs(tgt1) <= 1.0))
102 | 
103 |     def test_complex(self):
104 |         x = np.array([[1, 2, 3], [1j, 2j, 3j]])
105 |         res = corrcoef(x)
106 |         tgt = np.array([[1., -1.j], [1.j, 1.]])
107 |         np.testing.assert_allclose(res, tgt)
108 |         self.assertTrue(np.all(np.abs(res) <= 1.0))
109 | 
110 | 
111 | class TestCov(unittest.TestCase):
112 |     def test_agreement(self):
113 |         rng = np.random.default_rng(42)
114 |         X = rng.uniform(size=(10, 20))
115 |         for rowvar in (True, False):
116 |             with self.subTest(rowvar=rowvar):
117 |                 np.testing.assert_array_equal(cov(X, rowvar=rowvar), np.cov(X, rowvar=rowvar))
118 | 
119 |     def test_memmap(self):
120 |         # Smoketest for memmap.
121 |         # here isn't really a nice way to test memory usage but it's useful to
122 |         # have this test present for manual testing
123 |         size = (1_000, 100)
124 |         # size = (25_000, 100_000)  # The max size we want to support
125 | 
126 |         with tempfile.TemporaryFile() as fX:
127 |             with tempfile.NamedTemporaryFile() as fout:
128 |                 X = np.memmap(fX, "float64", mode="w+", shape=size)
129 |                 X[:] = 1
130 |                 out = np.memmap(fout, "float64", mode="w+", shape=(X.shape[0], X.shape[0]))
131 | 
132 |                 cov(X, rowvar=True, out=out, copy=False)
133 | 
134 |     # the following tests are adapted from NumPy
135 | 
136 |     def test_basic(self):
137 |         x1 = np.array([[0, 2], [1, 1], [2, 0]]).T
138 |         res1 = np.array([[1., -1.], [-1., 1.]])
139 |         np.testing.assert_allclose(cov(x1), res1)
140 | 
141 |     def test_complex(self):
142 |         x = np.array([[1, 2, 3], [1j, 2j, 3j]])
143 |         res = np.array([[1., -1.j], [1.j, 1.]])
144 |         np.testing.assert_allclose(cov(x), res)
145 | 
146 |     def test_1D_rowvar(self):
147 |         x3 = np.array([0.3942, 0.5969, 0.7730, 0.9918, 0.7964])
148 |         np.testing.assert_allclose(cov(x3), cov(x3, rowvar=False))
149 | 
150 | 
151 | class TestDot2D(unittest.TestCase):
152 |     def test_agreement(self):
153 |         rng = np.random.default_rng(42)
154 |         X = rng.uniform(size=(10, 20))
155 |         Y = rng.uniform(size=(20, 100))
156 |         np.testing.assert_array_equal(dot_2d(X, Y), np.dot(X, Y))
157 | 
158 |     def test_chunksize(self):
159 |         # make sure that chunk sizes that don't align with the total number
160 |         # of rows still work
161 |         rng = np.random.default_rng(42)
162 |         X = rng.uniform(size=(10, 20))
163 |         Y = rng.uniform(size=(20, 15))
164 |         np.testing.assert_array_almost_equal(dot_2d(X, Y, chunksize=86), np.dot(X, Y))
165 |         np.testing.assert_array_almost_equal(dot_2d(X, Y, chunksize=365), np.dot(X, Y))
166 | 
167 |     def test_memmap(self):
168 |         # Smoketest for memmap.
169 |         # here isn't really a nice way to test memory usage but it's useful to
170 |         # have this test present for manual testing
171 |         size = (1_000, 100)
172 |         # size = (25_000, 100_000)  # The max size we want to support
173 | 
174 |         with tempfile.TemporaryFile() as fX:
175 |             with tempfile.NamedTemporaryFile() as fout:
176 |                 X = np.memmap(fX, "float64", mode="w+", shape=size)
177 |                 X[:] = 1
178 |                 out = np.memmap(fout, "float64", mode="w+", shape=(X.shape[0], X.shape[0]))
179 | 
180 |                 dot_2d(X, X.T, out=out)
181 | 


--------------------------------------------------------------------------------
/dwave/plugins/sklearn/utilities.py:
--------------------------------------------------------------------------------
  1 | # The following traversal code is adapted from NumPy's implementation.
  2 | 
  3 | # Copyright (c) 2005-2022, NumPy Developers.
  4 | # All rights reserved.
  5 | #
  6 | # Redistribution and use in source and binary forms, with or without
  7 | # modification, are permitted provided that the following conditions are
  8 | # met:
  9 | #
 10 | #     * Redistributions of source code must retain the above copyright
 11 | #        notice, this list of conditions and the following disclaimer.
 12 | #
 13 | #     * Redistributions in binary form must reproduce the above
 14 | #        copyright notice, this list of conditions and the following
 15 | #        disclaimer in the documentation and/or other materials provided
 16 | #        with the distribution.
 17 | #
 18 | #     * Neither the name of the NumPy Developers nor the names of any
 19 | #        contributors may be used to endorse or promote products derived
 20 | #        from this software without specific prior written permission.
 21 | #
 22 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 23 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 24 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 25 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 26 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 27 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 28 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 29 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 30 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 31 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 32 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 33 | 
 34 | # Modifications are licensed under the Apache 2.0 Software license.
 35 | 
 36 | # Copyright 2023 D-Wave Systems Inc.
 37 | #
 38 | # Licensed under the Apache License, Version 2.0 (the "License");
 39 | # you may not use this file except in compliance with the License.
 40 | # You may obtain a copy of the License at
 41 | #
 42 | #   http://www.apache.org/licenses/LICENSE-2.0
 43 | #
 44 | # Unless required by applicable law or agreed to in writing, software
 45 | # distributed under the License is distributed on an "AS IS" BASIS,
 46 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 47 | # See the License for the specific language governing permissions and
 48 | # limitations under the License.
 49 | 
 50 | import typing
 51 | 
 52 | import numpy as np
 53 | import numpy.typing as npt
 54 | 
 55 | __all__ = ["corrcoef", "cov", "dot_2d"]
 56 | 
 57 | 
 58 | def corrcoef(x: npt.ArrayLike, *,
 59 |              out: typing.Optional[np.ndarray] = None,
 60 |              rowvar: bool = True,
 61 |              copy: bool = True,
 62 |              ) -> np.ndarray:
 63 |     """A drop-in replacement for :func:`numpy.corrcoef`.
 64 | 
 65 |     This method is modified to avoid unnecessary memory usage when working with
 66 |     :class:`numpy.memmap` arrays.
 67 |     It does not support the full range of arguments accepted by
 68 |     :func:`numpy.corrcoef`.
 69 | 
 70 |     Additionally, in the case that a row of ``x`` is fixed, this method
 71 |     will return a correlation value of 0 rather than :class:`numpy.nan`.
 72 | 
 73 |     Args:
 74 |         x: See :func:`numpy.corrcoef`.
 75 | 
 76 |         out: Output argument. This must be the exact kind that would be returned
 77 |             if it was not used.
 78 | 
 79 |         rowvar: See :func:`numpy.corrcoef`.
 80 | 
 81 |         copy: If ``True``, ``x`` is not modified by this function.
 82 | 
 83 |     Returns:
 84 |         See :func:`numpy.corrcoef`.
 85 | 
 86 |     """
 87 |     c = cov(x, out=out, rowvar=rowvar, copy=copy)
 88 |     try:
 89 |         d = np.diag(c)
 90 |     except ValueError:
 91 |         # scalar covariance
 92 |         # nan if incorrect value (nan, inf, 0), 1 otherwise
 93 |         return c / c
 94 |     stddev = np.sqrt(d.real)
 95 | 
 96 |     # the places that stddev == 0 are exactly the places that the columns
 97 |     # are fixed. We can safely ignore those when dividing
 98 |     np.divide(c, stddev[:, None], out=c, where=stddev[:, None] != 0)
 99 |     np.divide(c, stddev[None, :], out=c, where=stddev[None, :] != 0)
100 | 
101 |     # Clip real and imaginary parts to [-1, 1].  This does not guarantee
102 |     # abs(a[i,j]) <= 1 for complex arrays, but is the best we can do without
103 |     # excessive work.
104 |     np.clip(c.real, -1, 1, out=c.real)
105 |     if np.iscomplexobj(c):
106 |         np.clip(c.imag, -1, 1, out=c.imag)
107 | 
108 |     return c
109 | 
110 | 
111 | def cov(m: npt.ArrayLike, *,
112 |         out: typing.Optional[np.ndarray] = None,
113 |         rowvar: bool = True,
114 |         copy: bool = True,
115 |         ) -> np.ndarray:
116 |     """A drop-in replacement for :func:`numpy.cov`.
117 | 
118 |     This method is modified to avoid unnecessary memory usage when working with
119 |     :class:`numpy.memmap` arrays.
120 |     It does not support the full range of arguments accepted by
121 |     :func:`numpy.cov`.
122 | 
123 |     Args:
124 |         m: See :func:`numpy.cov`.
125 | 
126 |         out: Output argument. This must be the exact kind that would be returned
127 |             if it was not used.
128 | 
129 |         rowvar: See :func:`numpy.cov`.
130 | 
131 |         copy: If ``True``, ``x`` is not modified by this function.
132 | 
133 |     Returns:
134 |         See :func:`numpy.cov`.
135 | 
136 |     """
137 |     # we want to modify X, so if copy=True we make a copy and re-call
138 |     if copy:
139 |         if hasattr(m, "flush"):
140 |             # we could do a lot of fiddling here, but it's easier to just
141 |             # disallow this case and rely on the user making a modifiable
142 |             # X
143 |             raise ValueError("memmap arrays cannot be copied easily")
144 | 
145 |         return cov(np.array(m), rowvar=rowvar, copy=False, out=out)
146 | 
147 |     # handle array-like
148 |     if isinstance(m, np.memmap):
149 |         X = m
150 |     else:
151 |         X = np.atleast_2d(np.asarray(m, dtype=np.result_type(m, np.float64)))
152 | 
153 |     if X.ndim != 2:
154 |         raise ValueError("X must have 2 dimensions")
155 | 
156 |     if not rowvar and X.shape[0] != 1:
157 |         X = X.T
158 | 
159 |     # Get the product of frequencies and weights
160 |     avg = np.average(X, axis=1)
161 | 
162 |     # Determine the normalization
163 |     fact = max(X.shape[1] - 1, 0)
164 | 
165 |     X -= avg[:, None]
166 | 
167 |     if hasattr(m, "flush"):
168 |         X.flush()
169 | 
170 |     X_T = X.T
171 | 
172 |     out = dot_2d(X, X_T.conj(), out=out)
173 |     out *= np.true_divide(1, fact)
174 | 
175 |     if hasattr(out, "flush"):
176 |         out.flush()
177 | 
178 |     return out
179 | 
180 | 
181 | def dot_2d(a: npt.ArrayLike, b: npt.ArrayLike, *,
182 |            out: typing.Optional[np.ndarray] = None,
183 |            chunksize: int = int(1e+9),
184 |            ) -> np.ndarray:
185 |     """A drop-in replacment for :func:`numpy.dot` for 2d arrays.
186 | 
187 |     This method is modified to avoid unnecessary memory usage when working with
188 |     :class:`numpy.memmap` arrays.
189 | 
190 |     Args:
191 |         a: See :func:`numpy.dot`. ``a.ndim`` must be 2.
192 |         b: See :func:`numpy.dot`. ``b.ndim`` must be 2.
193 |         out: See :func:`numpy.dot`.
194 |         chunksize: The number of bytes that should be created by each step
195 |             of the multiplication. This is used to keep the total memory
196 |             usage low when multiplying :class:`numpy.memmap` arrays.
197 | 
198 |     Returns:
199 |         See :func:`numpy.dot`.
200 | 
201 |     """
202 |     if not isinstance(a, np.memmap):
203 |         a = np.asarray(a)
204 |     if not isinstance(b, np.memmap):
205 |         b = np.asarray(b)
206 | 
207 |     if a.ndim != 2:
208 |         raise ValueError("a must be a 2d array")
209 |     if b.ndim != 2:
210 |         raise ValueError("b must be a 2d array")
211 | 
212 |     if out is None:
213 |         out = np.empty((a.shape[0], b.shape[1]), dtype=np.result_type(a, b))
214 |     elif out.shape[0] != a.shape[0] or out.shape[1] != b.shape[1]:
215 |         raise ValueError(f"out must be a ({a.shape[0]}, {b.shape[1]}) array")
216 | 
217 |     is_memmap = hasattr(out, "flush")
218 | 
219 |     num_rows = max(chunksize // (out.dtype.itemsize * out.shape[1]), 1)
220 |     for start in range(0, out.shape[0], num_rows):
221 |         np.dot(a[start:start+num_rows, :], b, out=out[start:start+num_rows, :])
222 | 
223 |         if is_memmap:
224 |             out.flush()
225 | 
226 |     return out
227 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/tests/test_transformer.py:
--------------------------------------------------------------------------------
  1 | # Copyright 523 D-Wave Systems Inc.
  2 | #
  3 | #    Licensed under the Apache License, Version 2.0 (the "License");
  4 | #    you may not use this file except in compliance with the License.
  5 | #    You may obtain a copy of the License at
  6 | #
  7 | #        http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | #    Unless required by applicable law or agreed to in writing, software
 10 | #    distributed under the License is distributed on an "AS IS" BASIS,
 11 | #    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | #    See the License for the specific language governing permissions and
 13 | #    limitations under the License.
 14 | 
 15 | import concurrent.futures
 16 | import unittest
 17 | import unittest.mock
 18 | import warnings
 19 | import tempfile
 20 | from parameterized import parameterized
 21 | 
 22 | import dimod
 23 | import numpy as np
 24 | 
 25 | from dwave.optimization import Model
 26 | from dwave.cloud.exceptions import ConfigFileError, SolverAuthenticationError
 27 | from dwave.system import LeapHybridNLSampler
 28 | from dwave.system import LeapHybridCQMSampler
 29 | from sklearn.datasets import load_iris
 30 | from sklearn.ensemble import RandomForestClassifier
 31 | from sklearn.model_selection import GridSearchCV
 32 | from sklearn.pipeline import Pipeline
 33 | 
 34 | from dwave.plugins.sklearn.transformers import SelectFromQuadraticModel
 35 | from dwave.plugins.sklearn.utilities import corrcoef
 36 | 
 37 | FEASIBLE_NL_SOLUTION = []
 38 | 
 39 | class MockCQM(dimod.ExactCQMSolver):
 40 |     def sample_cqm(self, cqm: dimod.CQM, *, time_limit: float, label: str) -> dimod.SampleSet:
 41 |         return super().sample_cqm(cqm)
 42 | 
 43 |     def min_time_limit(self, cqm):
 44 |         return 1
 45 | 
 46 | 
 47 | class MockNL():
 48 |     def sample(self, nl: Model, *, time_limit: float, label: str):
 49 |         nl.states.resize(1)
 50 | 
 51 |         for decision in nl.iter_decisions():
 52 |             decision.set_state(0, FEASIBLE_NL_SOLUTION)
 53 | 
 54 |         return concurrent.futures.Future()
 55 | 
 56 | 
 57 | @unittest.mock.patch("dwave.plugins.sklearn.transformers.LeapHybridNLSampler", MockNL)
 58 | @unittest.mock.patch("dwave.plugins.sklearn.transformers.LeapHybridCQMSampler", MockCQM)
 59 | class TestSelectFromQuadraticModel(unittest.TestCase):
 60 |     @classmethod
 61 |     def setUpClass(cls):
 62 |         rng = np.random.default_rng(138984)
 63 |         cls.X = rng.uniform(-10, 10, size=(100, 9))
 64 |         cls.y = np.asarray(rng.uniform(0, 1, size=100) > 0.5, dtype=int)
 65 |     
 66 |     @parameterized.expand([
 67 |         (0.1, 30, "cqm"), 
 68 |         (0.1, 15, "cqm"), 
 69 |         (0.1, 30, "nl"), 
 70 |         (0.1, 15, "nl"), 
 71 |     ])
 72 |     def test_init_good(self, alpha, time_limit, solver):
 73 |         a = SelectFromQuadraticModel(solver=solver)
 74 | 
 75 |         b = SelectFromQuadraticModel(alpha=alpha, solver=solver)
 76 | 
 77 |         c = SelectFromQuadraticModel(alpha=alpha, time_limit=time_limit, solver=solver)
 78 | 
 79 |         d = SelectFromQuadraticModel(time_limit=time_limit, solver=solver)
 80 | 
 81 |         self.assertIsInstance(a, SelectFromQuadraticModel)
 82 |         self.assertIsInstance(b, SelectFromQuadraticModel)
 83 |         self.assertIsInstance(c, SelectFromQuadraticModel)
 84 |         self.assertIsInstance(d, SelectFromQuadraticModel)
 85 | 
 86 |         self.assertEqual(a.alpha, 0.5)
 87 |         self.assertEqual(b.alpha, 0.1)
 88 |         self.assertEqual(c.alpha, 0.1)
 89 |         self.assertEqual(d.alpha, 0.5)
 90 | 
 91 |         self.assertEqual(a.time_limit, None)
 92 |         self.assertEqual(b.time_limit, None)
 93 |         self.assertEqual(c.time_limit, time_limit)
 94 |         self.assertEqual(d.time_limit, time_limit)
 95 | 
 96 |         self.assertIsInstance(
 97 |             SelectFromQuadraticModel(alpha=0), SelectFromQuadraticModel
 98 |         )
 99 | 
100 |     @parameterized.expand([
101 |         (-10, "cqm"), 
102 |         (10, "cqm"), 
103 |         (-10, "nl"), 
104 |         (10, "nl"), 
105 |     ])
106 |     def test_init_bad(self, alpha, solver):
107 |         self.assertRaises(ValueError, SelectFromQuadraticModel, alpha=alpha, solver=solver)
108 |         self.assertRaises(ValueError, SelectFromQuadraticModel, alpha=alpha, solver=solver)
109 |     
110 |     @parameterized.expand([
111 |         (7, "cqm"), 
112 |         (5, "cqm"), 
113 |         (7, "nl"), 
114 |         (5, "nl"), 
115 |     ])
116 |     def test_fit(self, num_features, solver):
117 |         global FEASIBLE_NL_SOLUTION
118 |         if num_features==7:
119 |             FEASIBLE_NL_SOLUTION = [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0]
120 |         elif num_features==5:
121 |             FEASIBLE_NL_SOLUTION = [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]
122 | 
123 |         selector = SelectFromQuadraticModel(num_features=num_features, solver=solver)
124 | 
125 |         # test default numpy
126 | 
127 |         selector.fit(self.X, self.y)
128 |         self.assertEqual(sum(selector._mask), num_features)
129 | 
130 |         try:
131 |             self.X[:, selector._mask]
132 |         except Exception as e:
133 |             self.fail(e)
134 | 
135 |         # test non-default numpy
136 | 
137 |         selector.fit(self.X, self.y, num_features=num_features, solver=solver)
138 |         self.assertEqual(sum(selector._mask), num_features)
139 | 
140 |         try:
141 |             self.X[:, selector._mask]
142 |         except Exception as e:
143 |             self.fail(e)
144 |     
145 |     @parameterized.expand([
146 |         (7, "cqm"), 
147 |         (7, "nl"), 
148 |     ])
149 |     def test_fit_transform(self, num_features, solver):
150 |         global FEASIBLE_NL_SOLUTION 
151 |         FEASIBLE_NL_SOLUTION = [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]
152 | 
153 |         selector = SelectFromQuadraticModel(num_features=num_features, solver=solver)
154 | 
155 |         # test numpy without fit
156 |         x = selector.fit_transform(self.X, self.y, num_features=num_features-2, solver=solver)
157 | 
158 |         self.assertEqual(x.shape[1], num_features-2)
159 | 
160 |         x_from_fit = self.X[:, selector._mask]
161 |         np.testing.assert_array_equal(x, x_from_fit)
162 |     
163 |     @parameterized.expand([
164 |         (2, "cqm"), 
165 |         (2, "nl"), 
166 |     ])
167 |     def test_pipeline(self, num_features, solver):
168 |         global FEASIBLE_NL_SOLUTION 
169 |         FEASIBLE_NL_SOLUTION = [1.0, 1.0, 0.0, 0.0]
170 | 
171 |         X, y = load_iris(return_X_y=True)
172 | 
173 |         clf = Pipeline([
174 |           ('feature_selection', SelectFromQuadraticModel(num_features=num_features, solver=solver)),
175 |           ('classification', RandomForestClassifier())
176 |         ])
177 |         clf.fit(X, y)
178 | 
179 |         clf.predict(X)
180 |     
181 |     def test_alpha_0(self):
182 |         cqm = SelectFromQuadraticModel.correlation(self.X, self.y, num_features=3, alpha=0, solver="cqm")
183 |         self.assertTrue(not any(cqm.objective.linear.values()))
184 | 
185 |         X = np.atleast_2d(np.asarray(self.X))
186 |         y = np.asarray(self.y)
187 | 
188 |         with tempfile.TemporaryFile() as fX, tempfile.TemporaryFile() as fout:
189 |             # we make a copy of X because we'll be modifying it in-place within
190 |             # some of the functions
191 |             X_copy = np.memmap(fX, X.dtype, mode="w+", shape=(X.shape[0], X.shape[1] + 1))
192 |             X_copy[:, :-1] = X
193 |             X_copy[:, -1] = y
194 | 
195 |             # make the matrix that will hold the correlations
196 |             correlations = np.memmap(
197 |                 fout,
198 |                 dtype=np.result_type(X, y),
199 |                 mode="w+",
200 |                 shape=(X_copy.shape[1], X_copy.shape[1]),
201 |                 )
202 | 
203 |             # main calculation. It modifies X_copy in-place
204 |             corrcoef(X_copy, out=correlations, rowvar=False, copy=False)
205 | 
206 |             # we don't care about the direction of correlation in terms of
207 |             # the penalty/quality
208 |             np.absolute(correlations, out=correlations)
209 | 
210 |         label_corr = np.array(correlations[:-1,-1])
211 |         expected_linear = np.zeros(X.shape[1])
212 |         expected_linear += (-1.0 * label_corr * 0 * 3)
213 |         self.assertTrue(np.allclose(expected_linear.all(), 0))
214 |     
215 |     @parameterized.expand([
216 |         (3, 1, "cqm"), 
217 |         (3, 1, "nl"), 
218 |     ])
219 |     def test_alpha_1(self, num_features, alpha, solver):
220 |         global FEASIBLE_NL_SOLUTION
221 |         FEASIBLE_NL_SOLUTION = [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
222 | 
223 |         rng = np.random.default_rng(42)
224 | 
225 |         y = rng.uniform(size=1000)
226 | 
227 |         # make the first three columns exactly match the test data
228 |         X = rng.uniform(size=(1000, 10))
229 |         X[:, 0] = X[:, 1] = X[:, 2] = y
230 | 
231 |         selector = SelectFromQuadraticModel(num_features=num_features, alpha=alpha, solver=solver).fit(X, y)
232 | 
233 |         # with alpha=1, we should see that only the quality matters, so the
234 |         # first three should be selected despite being perfectly correlated
235 |         self.assertTrue(selector._get_support_mask()[0:3].all())
236 |         self.assertFalse(selector._get_support_mask()[3:].any())
237 | 
238 |     @parameterized.expand([
239 |         (1, "cqm"), 
240 |         (1, "nl"), 
241 |     ])
242 |     def test_xy_shape(self, num_features, solver):
243 |         with self.assertRaises(ValueError):
244 |             SelectFromQuadraticModel(num_features=num_features, solver=solver).fit([[0, 1]], [1, 2])
245 | 
246 |     def test_repr(self):
247 |         repr(SelectFromQuadraticModel(solver="cqm"))
248 |         repr(SelectFromQuadraticModel(solver="nl"))
249 | 
250 |     @parameterized.expand([
251 |         (2, "cqm"), 
252 |         (2, "nl"), 
253 |     ])
254 |     def test_gridsearch(self, num_features, solver):
255 |         global FEASIBLE_NL_SOLUTION
256 |         FEASIBLE_NL_SOLUTION = [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0]
257 |         rng = np.random.default_rng(42)
258 |         X = rng.uniform(-10, 10, size=(100, 9))
259 |         y = np.asarray(rng.uniform(0, 1, size=100) > 0.5, dtype=int)
260 | 
261 |         pipe = Pipeline([
262 |           ('feature_selection', SelectFromQuadraticModel(num_features=num_features, solver=solver)),
263 |           ('classification', RandomForestClassifier())
264 |         ])
265 | 
266 |         clf = GridSearchCV(pipe,
267 |                            param_grid=dict(
268 |                             feature_selection__num_features=[num_features+1],
269 |                             feature_selection__alpha=[0, .5]))
270 |         clf.fit(X, y)
271 | 
272 |     @parameterized.expand([
273 |         (2, "cqm"), 
274 |         (2, "nl"), 
275 |     ])
276 |     def test_one_row(self, num_features, solver):
277 |         X = [[-7.85717866, 1.93442648, 8.85760003]]
278 |         y = [1]
279 | 
280 |         with self.assertRaises(ValueError):
281 |             SelectFromQuadraticModel(num_features=num_features, solver=solver).fit(X, y)
282 | 
283 |     def test_fixed_column(self):
284 |         global FEASIBLE_NL_SOLUTION
285 |         FEASIBLE_NL_SOLUTION = [0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0]
286 |         X = np.copy(self.X)
287 | 
288 |         # fix two of the columns
289 |         X[:, 1] = 0
290 |         X[:, 5] = 1
291 | 
292 |         cqm = SelectFromQuadraticModel.correlation(X, self.y, alpha=0.5, num_features=5, solver="cqm")
293 |         fitted = SelectFromQuadraticModel(alpha=0.5, num_features=5, solver="cqm").fit(X, self.y)
294 | 
295 |         # in this case the linear bias for those two columns should be 0
296 |         self.assertEqual(cqm.objective.linear[1], 0)
297 |         self.assertEqual(cqm.objective.linear[5], 0)
298 | 
299 |         # as should the quadratic biases
300 |         self.assertEqual(cqm.objective.degree(1), 0)
301 |         self.assertEqual(cqm.objective.degree(5), 0)
302 | 
303 |         selected = SelectFromQuadraticModel(alpha=0.5, num_features=5, solver="nl").fit(X, self.y)
304 | 
305 |         # Check that the variables corresponding to constant columns are not present
306 |         self.assertEqual(selected._mask.all(), fitted._mask.all())
307 | 
308 | class TestIntegration(unittest.TestCase):
309 |     @classmethod
310 |     def setUpClass(cls):
311 |         try:
312 |             LeapHybridNLSampler()
313 |             LeapHybridCQMSampler()
314 |         except (ConfigFileError, SolverAuthenticationError, ValueError):
315 |             raise unittest.SkipTest("no hybrid solver available")
316 | 
317 |     def test_pipeline(self):
318 |         X, y = load_iris(return_X_y=True)
319 | 
320 |         clf = Pipeline([
321 |           ('feature_selection', SelectFromQuadraticModel(num_features=2)),
322 |           ('classification', RandomForestClassifier())
323 |         ])
324 |         clf.fit(X, y)
325 | 
326 |         clf.predict(X)
327 | 


--------------------------------------------------------------------------------
/dwave/plugins/sklearn/transformers.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 D-Wave Systems Inc.
  2 | #
  3 | #    Licensed under the Apache License, Version 2.0 (the "License");
  4 | #    you may not use this file except in compliance with the License.
  5 | #    You may obtain a copy of the License at
  6 | #
  7 | #        http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | #    Unless required by applicable law or agreed to in writing, software
 10 | #    distributed under the License is distributed on an "AS IS" BASIS,
 11 | #    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | #    See the License for the specific language governing permissions and
 13 | #    limitations under the License.
 14 | 
 15 | from __future__ import annotations
 16 | 
 17 | import itertools
 18 | import logging
 19 | import tempfile
 20 | import typing
 21 | import warnings
 22 | 
 23 | import dimod
 24 | import numpy as np
 25 | import numpy.typing as npt
 26 | 
 27 | from dwave.cloud.exceptions import ConfigFileError, SolverAuthenticationError
 28 | from dwave.system import LeapHybridCQMSampler, LeapHybridNLSampler
 29 | from dwave.optimization import Model
 30 | 
 31 | from sklearn.base import BaseEstimator
 32 | from sklearn.feature_selection import SelectorMixin
 33 | from sklearn.utils.validation import check_is_fitted
 34 | 
 35 | from dwave.plugins.sklearn.utilities import corrcoef
 36 | 
 37 | __all__ = ["SelectFromQuadraticModel"]
 38 | 
 39 | 
 40 | class SelectFromQuadraticModel(SelectorMixin, BaseEstimator):
 41 |     """Select features using a quadratic optimization problem solved on a hybrid solver.
 42 | 
 43 |     Args:
 44 |         alpha:
 45 |             Hyperparameter between 0 and 1 that controls the relative weight of
 46 |             the relevance and redundancy terms.
 47 |             ``alpha=0`` places no weight on the quality of the features,
 48 |             therefore the features will be selected as to minimize the
 49 |             redundancy without any consideration to quality.
 50 |             ``alpha=1`` places the maximum weight on the quality of the features,
 51 |             and therefore will be equivalent to using
 52 |             :class:`sklearn.feature_selection.SelectKBest`.
 53 |         num_features:
 54 |             The number of features to select.
 55 |         time_limit:
 56 |             The time limit for the run on the hybrid solver.
 57 |     """
 58 | 
 59 |     ACCEPTED_METHODS = [
 60 |         "correlation",
 61 |         # "mutual information",  # todo
 62 |         ]
 63 | 
 64 |     def __init__(
 65 |         self,
 66 |         *,
 67 |         alpha: float = .5,
 68 |         method: str = "correlation",  # undocumented until there is another supported
 69 |         num_features: int = 10,
 70 |         time_limit: typing.Optional[float] = None,
 71 |         solver: str = "cqm",
 72 |     ):
 73 |         if not 0 <= alpha <= 1:
 74 |             raise ValueError(f"alpha must be between 0 and 1, given {alpha}")
 75 | 
 76 |         if method not in self.ACCEPTED_METHODS:
 77 |             raise ValueError(
 78 |                 f"method must be one of {self.ACCEPTED_METHODS}, given {method}"
 79 |             )
 80 | 
 81 |         if num_features <= 0:
 82 |             raise ValueError(f"num_features must be a positive integer, given {num_features}")
 83 | 
 84 |         self.alpha = alpha
 85 |         self.method = method
 86 |         self.num_features = num_features
 87 |         self.time_limit = time_limit  # check this lazily
 88 |         self.solver = solver
 89 | 
 90 |     def __sklearn_is_fitted__(self) -> bool:
 91 |         # used by `check_is_fitted()`
 92 |         try:
 93 |             self._mask
 94 |         except AttributeError:
 95 |             return False
 96 | 
 97 |         return True
 98 | 
 99 |     def _get_support_mask(self) -> np.ndarray[typing.Any, np.dtype[np.bool_]]:
100 |         """Get the boolean mask indicating which features are selected
101 | 
102 |         Returns:
103 |           boolean array of shape [# input features]. An element is True iff its
104 |           corresponding feature is selected for retention.
105 | 
106 |         Raises:
107 |             RuntimeError: This method will raise an error if it is run before `fit`
108 |         """
109 |         check_is_fitted(self)
110 | 
111 |         try:
112 |             return self._mask
113 |         except AttributeError:
114 |             raise RuntimeError("fit hasn't been run yet")
115 | 
116 |     @staticmethod
117 |     def _create_cqm_model(
118 |         correlations: np.memmap, 
119 |         X: npt.ArrayLike, 
120 |         alpha: float, 
121 |         num_features: int, 
122 |         strict: bool, 
123 |     ) -> dimod.ConstrainedQuadraticModel:
124 |         """Build a constrained quadratic model (CQM) for feature selection.
125 | 
126 |         This method is based on maximizing influence and feature independence as
127 |         measured by correlation [Milne et al.]_.
128 | 
129 |         Args:
130 |             correlations:
131 |                 Correlation matrix of features
132 |             X:
133 |                 Feature vectors formatted as a numerical 2D array-like.
134 |             alpha:
135 |                 Hyperparameter between 0 and 1 that controls the relative weight of
136 |                 the relevance and redundancy terms.
137 |                 ``alpha=0`` places no weight on the quality of the features,
138 |                 therefore the features will be selected as to minimize the
139 |                 redundancy without any consideration to quality.
140 |                 ``alpha=1`` places the maximum weight on the quality of the features,
141 |                 and therefore will be equivalent to using
142 |                 :class:`sklearn.feature_selection.SelectKBest`.
143 |             num_features:
144 |                 The number of features to select.
145 |             strict:
146 |                 If ``False`` the constraint on the number of selected features
147 |                 is ``<=`` rather than ``==``.
148 | 
149 |         Returns:
150 |             A constrained quadratic model (CQM)
151 |         """
152 | 
153 |         # our objective
154 |         # we multiply by 2 because the matrix is symmetric
155 |         np.fill_diagonal(correlations, correlations[:, -1] * (-2 * alpha * num_features))
156 | 
157 |         cqm = dimod.ConstrainedQuadraticModel()
158 |         cqm.add_variables(dimod.BINARY, X.shape[1])
159 | 
160 |         # add the k-hot constraint
161 |         cqm.add_constraint(
162 |             ((v, 1) for v in cqm.variables),
163 |             '==' if strict else '<=',
164 |             num_features,
165 |             label=f"{num_features}-hot",
166 |             )
167 | 
168 |         # Note: the full symmetric matrix (with both upper- and lower-diagonal
169 |         # entries for each correlation coefficient) is retained for consistency with
170 |         # the original formulation from Milne et al.
171 |         it = np.nditer(correlations[:-1, :-1], flags=['multi_index'], op_flags=[['readonly']])
172 |         cqm.set_objective((*it.multi_index, x) for x in it if x)
173 | 
174 |         return cqm
175 | 
176 |     @staticmethod
177 |     def _create_nl_model(
178 |         correlations: np.memmap, 
179 |         X: npt.ArrayLike, 
180 |         alpha: float, 
181 |         num_features: int,
182 |         strict: bool, 
183 |     ) -> Model:
184 |         """Build a nonlinear (NL) model for feature selection.
185 | 
186 |         This method is based on maximizing influence and feature independence as
187 |         measured by correlation [Milne et al.]_.
188 | 
189 |         Args:
190 |             correlations:
191 |                 Correlation matrix of features
192 |             X:
193 |                 Feature vectors formatted as a numerical 2D array-like.
194 |             alpha:
195 |                 Hyperparameter between 0 and 1 that controls the relative weight of
196 |                 the relevance and redundancy terms.
197 |                 ``alpha=0`` places no weight on the quality of the features,
198 |                 therefore the features will be selected as to minimize the
199 |                 redundancy without any consideration to quality.
200 |                 ``alpha=1`` places the maximum weight on the quality of the features,
201 |                 and therefore will be equivalent to using
202 |                 :class:`sklearn.feature_selection.SelectKBest`.
203 |             num_features:
204 |                 The number of features to select.
205 |             strict:
206 |                 If ``False`` the constraint on the number of selected features
207 |                 is ``<=`` rather than ``==``.
208 | 
209 |         Returns:
210 |             A nonlinear model, the binary list, and a ndarray
211 |         """
212 | 
213 |         # initialize model, create binary list, make constant
214 |         nl = Model()
215 |         total_num_features=X.shape[1]
216 | 
217 |         x_binary = nl.binary(total_num_features)
218 |         var_features = nl.constant(num_features)
219 |         feat_corr = correlations[:-1,:-1]
220 | 
221 |         # take last element in every row
222 |         label_corr = np.array(correlations[:-1,-1])
223 | 
224 |         # Make a constant node in order to splice and use in objective
225 |         nl_corr = nl.constant(feat_corr)
226 | 
227 |         # extract upper triangle, excluding diagonal. Flatten into 1D array
228 |         C = np.triu(nl_corr, k=1).flatten()
229 | 
230 |         # generate all column and row indices
231 |         quad_col = np.tile(np.arange(total_num_features), total_num_features)
232 |         quad_row = np.tile(np.arange(total_num_features), 
233 |                     (total_num_features,1)).flatten('F')
234 | 
235 |         # extract indices where correlation value not equal to zero
236 | 
237 |         # j index
238 |         q2 = quad_col[C != 0]
239 |         # i index
240 |         q1 = quad_row[C != 0]
241 | 
242 |         # extract values at position (i,j) where not equal to zero
243 |         q3 = C[C != 0]
244 | 
245 |         # 1D numpy array initialized to size of num_rows with 0 in every position
246 |         linear = np.zeros(len(feat_corr[0]))
247 | 
248 |         # numpy will automatically go element-by-element in the arrays
249 |         linear += nl.constant(-1.0 * label_corr * alpha * num_features)
250 | 
251 |         # if must choose exact number of desired features
252 |         if strict:
253 |             nl.add_constraint(x_binary.sum() == var_features)
254 |         else:
255 |             nl.add_constraint(x_binary.sum() <= var_features)
256 | 
257 |         nl.minimize(nl.constant(2.0) * nl.quadratic_model(x_binary, quadratic=(q3, [q1, q2]), linear=linear))
258 |         return nl
259 | 
260 |     @typing.overload
261 |     def correlation(X: npt.ArrayLike, y: npt.ArrayLike, *, solver: Literal["cqm"], **kwargs) -> dimod.ConstrainedQuadraticModel: ...
262 | 
263 |     @typing.overload
264 |     def correlation(X: npt.ArrayLike, y: npt.ArrayLike, *, solver: Literal["nl"], **kwargs) -> Model: ...
265 | 
266 |     @staticmethod
267 |     def correlation(
268 |         X: npt.ArrayLike,
269 |         y: npt.ArrayLike,
270 |         *,
271 |         alpha: float,
272 |         num_features: int,
273 |         strict: bool = True,
274 |         solver: str,
275 |     ) -> Union[dimod.ConstrainedQuadraticModel, Model]:
276 |         """Build a model for feature selection.
277 | 
278 |         This method is based on maximizing influence and feature independence as
279 |         measured by correlation [Milne et al.]_.
280 | 
281 |         Args:
282 |             X:
283 |                 Feature vectors formatted as a numerical 2D array-like.
284 |             y:
285 |                 Class labels formatted as a numerical 1D array-like.
286 |             alpha:
287 |                 Hyperparameter between 0 and 1 that controls the relative weight of
288 |                 the relevance and redundancy terms.
289 |                 ``alpha=0`` places no weight on the quality of the features,
290 |                 therefore the features will be selected as to minimize the
291 |                 redundancy without any consideration to quality.
292 |                 ``alpha=1`` places the maximum weight on the quality of the features,
293 |                 and therefore will be equivalent to using
294 |                 :class:`sklearn.feature_selection.SelectKBest`.
295 |             num_features:
296 |                 The number of features to select.
297 |             strict:
298 |                 If ``False`` the constraint on the number of selected features
299 |                 is ``<=`` rather than ``==``.
300 |             solver:
301 |                 String containing either "cqm" or "nl" to decide which solver creation method to use. Defaults to "cqm"
302 | 
303 |         Returns:
304 |             A constrained quadratic model or a nonlinear model.
305 | 
306 |         .. [Milne et al.] Milne, Andrew, Maxwell Rounds, and Phil Goddard. 2017. "Optimal Feature
307 |             Selection in Credit Scoring and Classification Using a Quantum Annealer."
308 |             1QBit; White Paper.
309 |             https://1qbit.com/whitepaper/optimal-feature-selection-in-credit-scoring-classification-using-quantum-annealer
310 |         """
311 | 
312 |         X = np.atleast_2d(np.asarray(X))
313 |         y = np.asarray(y)
314 | 
315 |         if X.ndim != 2:
316 |             raise ValueError("X must be a 2-dimensional array-like")
317 | 
318 |         if y.ndim != 1:
319 |             raise ValueError("y must be a 1-dimensional array-like")
320 | 
321 |         if y.shape[0] != X.shape[0]:
322 |             raise ValueError(f"requires: X.shape[0] == y.shape[0] but {X.shape[0]} != {y.shape[0]}")
323 | 
324 |         if not 0 <= alpha <= 1:
325 |             raise ValueError(f"alpha must be between 0 and 1, given {alpha}")
326 | 
327 |         if num_features <= 0:
328 |             raise ValueError(f"num_features must be a positive integer, given {num_features}")
329 | 
330 |         if X.shape[0] <= 1:
331 |             raise ValueError("X must have at least two rows")
332 | 
333 |         with tempfile.TemporaryFile() as fX, tempfile.TemporaryFile() as fout:
334 |             # we make a copy of X because we'll be modifying it in-place within
335 |             # some of the functions
336 |             X_copy = np.memmap(fX, X.dtype, mode="w+", shape=(X.shape[0], X.shape[1] + 1))
337 |             X_copy[:, :-1] = X
338 |             X_copy[:, -1] = y
339 | 
340 |             # make the matrix that will hold the correlations
341 |             correlations = np.memmap(
342 |                 fout,
343 |                 dtype=np.result_type(X, y),
344 |                 mode="w+",
345 |                 shape=(X_copy.shape[1], X_copy.shape[1]),
346 |                 )
347 | 
348 |             # main calculation. It modifies X_copy in-place
349 |             corrcoef(X_copy, out=correlations, rowvar=False, copy=False)
350 | 
351 |             # we don't care about the direction of correlation in terms of
352 |             # the penalty/quality
353 |             np.absolute(correlations, out=correlations)
354 | 
355 |             if (solver == "cqm"):
356 |                 return SelectFromQuadraticModel._create_cqm_model(correlations=correlations, X=X, alpha=alpha, num_features=num_features, strict=strict)
357 |             elif (solver == "nl"):
358 |                 return SelectFromQuadraticModel._create_nl_model(correlations=correlations, X=X, alpha=alpha, num_features=num_features, strict=strict)
359 |             raise ValueError(f"Solver parameter must be equal to 'nl' or 'cqm'. Received solver parameter: {solver}")
360 | 
361 |     @staticmethod
362 |     def correlation_cqm(X: npt.ArrayLike,
363 |         y: npt.ArrayLike,
364 |         *,
365 |         alpha: float,
366 |         num_features: int,
367 |         strict: bool = True,
368 |     ) -> dimod.ConstrainedQuadraticModel:
369 |         return SelectFromQuadraticModel.correlation(X=X, y=y, num_features=num_features, alpha=alpha, solver="cqm")
370 |     
371 |     @staticmethod
372 |     def correlation_nl(X: npt.ArrayLike,
373 |         y: npt.ArrayLike,
374 |         *,
375 |         alpha: float,
376 |         num_features: int,
377 |         strict: bool = True,
378 |     ) -> dimod.ConstrainedQuadraticModel:
379 |         return SelectFromQuadraticModel.correlation(X=X, y=y, num_features=num_features, alpha=alpha, solver="nl")
380 | 
381 |     def fit(
382 |         self,
383 |         X: npt.ArrayLike,
384 |         y: npt.ArrayLike,
385 |         *,
386 |         alpha: typing.Optional[float] = None,
387 |         num_features: typing.Optional[int] = None,
388 |         time_limit: typing.Optional[float] = None,
389 |         solver: typing.Optional[str] = None,
390 |     ) -> SelectFromQuadraticModel:
391 |         """Select the features to keep.
392 | 
393 |         Args:
394 |             X:
395 |                 Feature vectors formatted as a numerical 2D array-like.
396 |             y:
397 |                 Class labels formatted as a numerical 1D array-like.
398 |             alpha:
399 |                 Hyperparameter between 0 and 1 that controls the relative weight of
400 |                 the relevance and redundancy terms.
401 |                 ``alpha=0`` places no weight on the quality of the features,
402 |                 therefore the features will be selected as to minimize the
403 |                 redundancy without any consideration to quality.
404 |                 ``alpha=1`` places the maximum weight on the quality of the features,
405 |                 and therefore will be equivalent to using
406 |                 :class:`sklearn.feature_selection.SelectKBest`.
407 |             num_features:
408 |                 The number of features to select.
409 |                 Defaults to the value provided to the constructor.
410 |             time_limit:
411 |                 The time limit for the run on the hybrid solver.
412 |                 Defaults to the value provided to the constructor.
413 | 
414 |         Returns:
415 |             This instance of `SelectFromQuadraticModel`.
416 |         """
417 |         X = np.atleast_2d(np.asarray(X))
418 |         if X.ndim != 2:
419 |             raise ValueError("X must be a 2-dimensional array-like")
420 | 
421 |         # y is checked by the correlation method function
422 | 
423 |         if alpha is None:
424 |             alpha = self.alpha
425 |         # alpha is checked by the correlation method function
426 | 
427 |         if num_features is None:
428 |             num_features = self.num_features
429 |         # num_features is checked by the correlation method function
430 | 
431 |         if solver is None:
432 |             solver = self.solver
433 | 
434 |         # if we already have fewer features than requested, just return
435 |         if num_features >= X.shape[1]:
436 |             self._mask = np.ones(X.shape[1], dtype=bool)
437 |             return self
438 | 
439 |         if self.method == "correlation":
440 |             model = self.correlation(X, y, num_features=num_features, alpha=alpha, solver=solver)
441 | 
442 |         else:
443 |             raise ValueError(f"only methods {self.acceptable_methods} are implemented")
444 | 
445 |         try:
446 |             if solver == "cqm":
447 |                 sampler = LeapHybridCQMSampler()
448 |                 sampleset = sampler.sample_cqm(model, time_limit=self.time_limit,
449 |                                            label=f"{self.__module__}.{type(self).__qualname__}")
450 | 
451 |                 filtered = sampleset.filter(lambda d: d.is_feasible)
452 | 
453 |                 if len(filtered) == 0:
454 |                     raise RuntimeError("no feasible solutions found by the hybrid solver")
455 | 
456 |                 lowest = filtered.first.sample
457 | 
458 |                 self._mask = np.fromiter((lowest[v] for v in model.variables),
459 |                                          count=model.num_variables(), dtype=bool)
460 | 
461 |             elif solver == "nl":
462 |                 sampler = LeapHybridNLSampler()
463 | 
464 |                 # time_limit is checked by the LeapHybridNLSampler
465 |                 _ = sampler.sample(model, time_limit=self.time_limit, label='scikit-learn Plug-In: NL')
466 | 
467 |                 # Get the index position of chosen features
468 |                 # Example Given (e.g.) of 6 features to choose 3
469 |                 with model.lock():
470 |                     selected = next(model.iter_decisions()).state(0)
471 | 
472 |                 self._mask = np.asarray(selected, dtype=bool) # e.g. [False, True, False, False, True, True, False]
473 | 
474 |         except (ConfigFileError, SolverAuthenticationError) as e:
475 |             raise RuntimeError(
476 |                 f"""Instantiation of a Leap hybrid solver failed with an {e} error.
477 | 
478 |                 See https://docs.ocean.dwavesys.com/en/stable/overview/sapi.html for configuring
479 |                 access to Leap’s solvers.
480 |                 """
481 |             )
482 | 
483 |         return self
484 | 
485 |     def unfit(self):
486 |         """Undo a previously executed ``fit`` method."""
487 |         del self._mask
488 | 


--------------------------------------------------------------------------------