├── .coveragerc
├── .coveralls.yml
├── .editorconfig
├── .gitattributes
├── .github
    └── ISSUE_TEMPLATE.md
├── .gitignore
├── .travis.yml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.rst
├── HISTORY.rst
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.rst
├── cmd2pkg
├── datasets
    ├── libsvm
    │   ├── README.txt
    │   ├── breast-cancer_scale
    │   ├── german.numer_scale
    │   ├── heart_scale
    │   ├── ionosphere.arff
    │   └── ionosphere_scale
    └── simple_comparison_JKMs_Weka.py
├── demo_tutorials
    └── demo_kernelmethods.ipynb
├── docs
    ├── API.rst
    ├── Makefile
    ├── categorical_kernels.rst
    ├── conf.py
    ├── contributing.rst
    ├── flyer.png
    ├── graph_kernels.rst
    ├── history.rst
    ├── index.rst
    ├── installation.rst
    ├── kernel_functions.rst
    ├── kernel_matrix.rst
    ├── km_collections.rst
    ├── logo_kernelmethods.png
    ├── make.bat
    ├── numeric_kernels.rst
    ├── operations.rst
    ├── readme.rst
    ├── string_kernels.rst
    ├── usage.rst
    └── utilities.rst
├── kernelmethods
    ├── __init__.py
    ├── _version.py
    ├── algorithms.py
    ├── base.py
    ├── categorical.py
    ├── config.py
    ├── numeric_kernels.py
    ├── operations.py
    ├── ranking.py
    ├── sampling.py
    ├── tests
    │   ├── test_algorithms.py
    │   ├── test_base_classes.py
    │   ├── test_categorical.py
    │   ├── test_kernel_matrix.py
    │   ├── test_kernel_set.py
    │   ├── test_numeric_kernels.py
    │   ├── test_operations.py
    │   ├── test_ranking.py
    │   ├── test_sampling.py
    │   └── test_utils.py
    └── utils.py
├── pytest.ini
├── requirements.txt
├── requirements_dev.txt
├── setup.cfg
├── setup.py
├── tox.ini
└── versioneer.py


/.coveragerc:
--------------------------------------------------------------------------------
 1 | [report]
 2 | exclude_lines =
 3 |     pragma: no cover
 4 |     def __repr__
 5 |     def __str__
 6 |     def __format__
 7 |     __format__
 8 |     __repr__
 9 |     __str__
10 |     if contains_nan_inf*
11 |     if self.debug:
12 |     if settings.DEBUG
13 |     raise RuntimeError
14 |     raise AssertionError
15 |     raise NotImplementedError
16 |     if 0:
17 |     if __name__ == .__main__.:
18 | 
19 | ignore_errors = True
20 | 
21 | [run]
22 | omit =
23 |     # omit anything in a .local directory anywhere
24 |     */.local/*
25 | 
26 |     # omit everything in /usr
27 |     /usr/*
28 |     */tests/*.py
29 | 
30 |     # irrelevant files
31 |     kernelmethods/__*__.py
32 |     kernelmethods/_version.py
33 | 
34 | 


--------------------------------------------------------------------------------
/.coveralls.yml:
--------------------------------------------------------------------------------
1 | service_name: travis-pro
2 | repo_token: mnWg3PvTHwoOPt7HFxzVqM5gFXwI095KB
3 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # http://editorconfig.org
 2 | 
 3 | root = true
 4 | 
 5 | [*]
 6 | indent_style = space
 7 | indent_size = 4
 8 | trim_trailing_whitespace = true
 9 | insert_final_newline = true
10 | charset = utf-8
11 | end_of_line = lf
12 | 
13 | [*.bat]
14 | indent_style = tab
15 | end_of_line = crlf
16 | 
17 | [LICENSE]
18 | insert_final_newline = false
19 | 
20 | [Makefile]
21 | indent_style = tab
22 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | kernelmethods/_version.py export-subst
2 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | * kernelmethods version:
 2 | * Python version:
 3 | * Operating System:
 4 | 
 5 | ### Description
 6 | 
 7 | Describe what you were trying to get done.
 8 | Tell us what happened, what went wrong, and what you expected to happen.
 9 | 
10 | ### What I Did
11 | 
12 | ```
13 | Paste the command(s) you ran and the output.
14 | If there was a crash, please include the traceback here.
15 | ```
16 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .idea
  2 | .vscode
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | env/
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | 
 61 | # Flask stuff:
 62 | instance/
 63 | .webassets-cache
 64 | 
 65 | # Scrapy stuff:
 66 | .scrapy
 67 | 
 68 | # Sphinx documentation
 69 | docs/_build/
 70 | 
 71 | # PyBuilder
 72 | target/
 73 | 
 74 | # Jupyter Notebook
 75 | .ipynb_checkpoints
 76 | 
 77 | # pyenv
 78 | .python-version
 79 | 
 80 | # celery beat schedule file
 81 | celerybeat-schedule
 82 | 
 83 | # SageMath parsed files
 84 | *.sage.py
 85 | 
 86 | # dotenv
 87 | .env
 88 | 
 89 | # virtualenv
 90 | .venv
 91 | venv/
 92 | ENV/
 93 | 
 94 | # Spyder project settings
 95 | .spyderproject
 96 | .spyproject
 97 | 
 98 | # Rope project settings
 99 | .ropeproject
100 | 
101 | # mkdocs documentation
102 | /site
103 | 
104 | # mypy
105 | .mypy_cache/
106 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | # Config file for automatic testing at travis-ci.org
 3 | install:
 4 |   - pip install pytest-cov hypothesis
 5 |   - pip install -r requirements_dev.txt
 6 |   - pip install -e .
 7 | 
 8 | language: python
 9 | cache: pip
10 | python:
11 |   - 3.6
12 | 
13 | script:
14 |   - pytest --cov kernelmethods --cov-config=.coveragerc
15 | 
16 | after_success:
17 |     - coveralls
18 | 
19 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |  advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |  address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |  professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at raamana@gmail.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 | 
73 | [homepage]: https://www.contributor-covenant.org
74 | 
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
  1 | .. highlight:: shell
  2 | 
  3 | ============
  4 | Contributing
  5 | ============
  6 | 
  7 | Contributions are welcome, and they are greatly appreciated! Every little bit
  8 | helps, and credit will always be given.
  9 | 
 10 | You can contribute in many ways:
 11 | 
 12 | Types of Contributions
 13 | ----------------------
 14 | 
 15 | Report Bugs
 16 | ~~~~~~~~~~~
 17 | 
 18 | Report bugs at https://github.com/raamana/kernelmethods/issues.
 19 | 
 20 | If you are reporting a bug, please include:
 21 | 
 22 | * Your operating system name and version.
 23 | * Any details about your local setup that might be helpful in troubleshooting.
 24 | * Detailed steps to reproduce the bug.
 25 | 
 26 | Fix Bugs
 27 | ~~~~~~~~
 28 | 
 29 | Look through the GitHub issues for bugs. Anything tagged with "bug" and "help
 30 | wanted" is open to whoever wants to implement it.
 31 | 
 32 | Implement Features
 33 | ~~~~~~~~~~~~~~~~~~
 34 | 
 35 | Look through the GitHub issues for features. Anything tagged with "enhancement"
 36 | and "help wanted" is open to whoever wants to implement it.
 37 | 
 38 | Write Documentation
 39 | ~~~~~~~~~~~~~~~~~~~
 40 | 
 41 | kernelmethods could always use more documentation, whether as part of the
 42 | official kernelmethods docs, in docstrings, or even on the web in blog posts,
 43 | articles, and such.
 44 | 
 45 | Submit Feedback
 46 | ~~~~~~~~~~~~~~~
 47 | 
 48 | The best way to send feedback is to file an issue at https://github.com/raamana/kernelmethods/issues.
 49 | 
 50 | If you are proposing a feature:
 51 | 
 52 | * Explain in detail how it would work.
 53 | * Keep the scope as narrow as possible, to make it easier to implement.
 54 | * Remember that this is a volunteer-driven project, and that contributions
 55 |   are welcome :)
 56 | 
 57 | Get Started!
 58 | ------------
 59 | 
 60 | Ready to contribute? Here's how to set up `kernelmethods` for local development.
 61 | 
 62 | 1. Fork the `kernelmethods` repo on GitHub.
 63 | 2. Clone your fork locally::
 64 | 
 65 |     $ git clone git@github.com:your_name_here/kernelmethods.git
 66 | 
 67 | 3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development::
 68 | 
 69 |     $ mkvirtualenv kernelmethods
 70 |     $ cd kernelmethods/
 71 |     $ python setup.py develop
 72 | 
 73 | 4. Create a branch for local development::
 74 | 
 75 |     $ git checkout -b name-of-your-bugfix-or-feature
 76 | 
 77 |    Now you can make your changes locally.
 78 | 
 79 | 5. When you're done making changes, check that your changes pass flake8 and the
 80 |    tests, including testing other Python versions with tox::
 81 | 
 82 |     $ flake8 kernelmethods tests
 83 |     $ python setup.py test or py.test
 84 |     $ tox
 85 | 
 86 |    To get flake8 and tox, just pip install them into your virtualenv.
 87 | 
 88 | 6. Commit your changes and push your branch to GitHub::
 89 | 
 90 |     $ git add .
 91 |     $ git commit -m "Your detailed description of your changes."
 92 |     $ git push origin name-of-your-bugfix-or-feature
 93 | 
 94 | 7. Submit a pull request through the GitHub website.
 95 | 
 96 | Pull Request Guidelines
 97 | -----------------------
 98 | 
 99 | Before you submit a pull request, check that it meets these guidelines:
100 | 
101 | 1. The pull request should include tests.
102 | 2. If the pull request adds functionality, the docs should be updated. Put
103 |    your new functionality into a function with a docstring, and add the
104 |    feature to the list in README.rst.
105 | 3. The pull request should work for Python 2.7, 3.4, 3.5 and 3.6, and for PyPy. Check
106 |    https://travis-ci.org/raamana/kernelmethods/pull_requests
107 |    and make sure that the tests pass for all supported Python versions.
108 | 
109 | Tips
110 | ----
111 | 
112 | To run a subset of tests::
113 | 
114 | $ py.test tests.test_kernelmethods
115 | 
116 | 
117 | Deploying
118 | ---------
119 | 
120 | A reminder for the maintainers on how to deploy.
121 | Make sure all your changes are committed (including an entry in HISTORY.rst).
122 | Then run::
123 | 
124 | $ bumpversion patch # possible: major / minor / patch
125 | $ git push
126 | $ git push --tags
127 | 
128 | Travis will then deploy to PyPI if tests pass.
129 | 


--------------------------------------------------------------------------------
/HISTORY.rst:
--------------------------------------------------------------------------------
1 | =======
2 | History
3 | =======
4 | 
5 | 0.2 (2019-08-08)
6 | ------------------
7 | 
8 | * First full release
9 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Apache Software License 2.0
 2 | 
 3 | Copyright (c) 2018, Pradeep Reddy Raamana
 4 | 
 5 | Licensed under the Apache License, Version 2.0 (the "License");
 6 | you may not use this file except in compliance with the License.
 7 | You may obtain a copy of the License at
 8 | 
 9 | http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | Unless required by applicable law or agreed to in writing, software
12 | distributed under the License is distributed on an "AS IS" BASIS,
13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | See the License for the specific language governing permissions and
15 | limitations under the License.
16 | 
17 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include CONTRIBUTING.rst
 2 | include HISTORY.rst
 3 | include LICENSE
 4 | include README.rst
 5 | 
 6 | recursive-include tests *
 7 | recursive-exclude * __pycache__
 8 | recursive-exclude * *.py[co]
 9 | 
10 | recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif
11 | include versioneer.py
12 | include kernelmethods/_version.py
13 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: clean clean-test clean-pyc clean-build docs help
 2 | .DEFAULT_GOAL := help
 3 | 
 4 | define BROWSER_PYSCRIPT
 5 | import os, webbrowser, sys
 6 | 
 7 | try:
 8 | 	from urllib import pathname2url
 9 | except:
10 | 	from urllib.request import pathname2url
11 | 
12 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
13 | endef
14 | export BROWSER_PYSCRIPT
15 | 
16 | define PRINT_HELP_PYSCRIPT
17 | import re, sys
18 | 
19 | for line in sys.stdin:
20 | 	match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
21 | 	if match:
22 | 		target, help = match.groups()
23 | 		print("%-20s %s" % (target, help))
24 | endef
25 | export PRINT_HELP_PYSCRIPT
26 | 
27 | BROWSER := python -c "$$BROWSER_PYSCRIPT"
28 | 
29 | help:
30 | 	@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
31 | 
32 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
33 | 
34 | clean-build: ## remove build artifacts
35 | 	rm -fr build/
36 | 	rm -fr dist/
37 | 	rm -fr .eggs/
38 | 	find . -name '*.egg-info' -exec rm -fr {} +
39 | 	find . -name '*.egg' -exec rm -f {} +
40 | 
41 | clean-pyc: ## remove Python file artifacts
42 | 	find . -name '*.pyc' -exec rm -f {} +
43 | 	find . -name '*.pyo' -exec rm -f {} +
44 | 	find . -name '*~' -exec rm -f {} +
45 | 	find . -name '__pycache__' -exec rm -fr {} +
46 | 
47 | clean-test: ## remove test and coverage artifacts
48 | 	rm -fr .tox/
49 | 	rm -f .coverage
50 | 	rm -fr htmlcov/
51 | 	rm -fr .pytest_cache
52 | 
53 | lint: ## check style with flake8
54 | 	flake8 kernelmethods tests
55 | 
56 | test: ## run tests quickly with the default Python
57 | 	py.test
58 | 
59 | test-all: ## run tests on every Python version with tox
60 | 	tox
61 | 
62 | coverage: ## check code coverage quickly with the default Python
63 | 	coverage run --source kernelmethods -m pytest
64 | 	coverage report -m
65 | 	coverage html
66 | 	$(BROWSER) htmlcov/index.html
67 | 
68 | docs: ## generate Sphinx HTML documentation, including API docs
69 | 	rm -f docs/kernelmethods.rst
70 | 	rm -f docs/modules.rst
71 | 	sphinx-apidoc -o docs/ kernelmethods
72 | 	$(MAKE) -C docs clean
73 | 	$(MAKE) -C docs html
74 | 	$(BROWSER) docs/_build/html/index.html
75 | 
76 | servedocs: docs ## compile the docs watching for changes
77 | 	watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D .
78 | 
79 | release: dist ## package and upload a release
80 | 	twine upload dist/*
81 | 
82 | dist: clean ## builds source and wheel package
83 | 	python setup.py sdist
84 | 	python setup.py bdist_wheel
85 | 	ls -l dist
86 | 
87 | install: clean ## install the package to the active Python's site-packages
88 | 	python setup.py install
89 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | ==========================
 2 | Kernel methods and classes
 3 | ==========================
 4 | 
 5 | .. image:: docs/logo_kernelmethods.png
 6 |     :height: 150
 7 |     
 8 | .. image:: https://img.shields.io/pypi/v/kernelmethods.svg
 9 |         :target: https://pypi.python.org/pypi/kernelmethods
10 | 
11 | .. image:: https://img.shields.io/travis/raamana/kernelmethods.svg
12 |         :target: https://travis-ci.org/raamana/kernelmethods
13 | 
14 | .. image:: https://coveralls.io/repos/github/raamana/kernelmethods/badge.svg?branch=master
15 |     :target: https://coveralls.io/github/raamana/kernelmethods?branch=master
16 | 
17 | 
18 | Documentation
19 | ---------------
20 | 
21 | API and usage: https://raamana.github.io/kernelmethods/
22 | 
23 | 
24 | Demo notebooks (no setup required, click on the binder logo) :
25 | 
26 | .. image:: https://mybinder.org/badge_logo.svg
27 |  :target: https://mybinder.org/v2/gh/raamana/kernelmethods/master?filepath=demo_tutorials%2Fdemo_kernelmethods.ipynb
28 | 
29 | Arxiv preprint
30 | ---------------
31 | 
32 | A paper presenting the design and some validation is available at https://arxiv.org/abs/2005.13483
33 | 
34 | 
35 | News
36 | ------
37 | 
38 | - Hadamard kernel is now available (which showed potential in some bioinformatics applications like breast cancer detection).
39 | 
40 | 
41 | Description
42 | -------------
43 | 
44 | 
45 | ``kernelmethods`` is a pure python library defining modular classes that provides basic kernel methods as well as an intuitive interface for advanced functionality such as composite and hyper kernels. This library fills an important void in the ever-growing python-based machine learning ecosystem, where users can only use predefined kernels and are not able to customize or extend them for their own applications, which requires great flexibility owing to their diversity and need for better performing kernel.
46 | 
47 | *schmeatic*:
48 | 
49 | .. image:: docs/flyer.png
50 | 
51 | This library defines the ``KernelMatrix`` class that is central to all the kernel methods and machines. As the ``KernelMatrix`` class is a key bridge between input data and the various kernel learning algorithms, it is designed to be highly usable and extensible to different applications and data types. Besides being able to apply basic kernels on a given sample (to produce a ``KernelMatrix``), this library provides various kernel operations, such as normalization, centering, product, alignment evaluation, linear combination and ranking (by various performance metrics) of kernel matrices.
52 | 
53 | In addition, we provide several convenient classes, such as ``KernelSet`` and ``KernelBucket`` for easy management of a large collection of kernels.  Dealing with a diverse configuration of kernels is necessary for automatic kernel selection and optimization in applications such as Multiple Kernel Learning (MKL) and the like.
54 | 
55 | In addition to the common numerical kernels such as the Gaussian and Polynomial kernels, we designed this library to make it easy to develop categorical, string and graph kernels, with the same attractive properties of intuitive and highly-testable API. In addition to providing native implementation of non-numerical kernels, we aim to provide a deeply and easily extensible framework for arbitrary input data types, such as sequences, trees and graphs etc, via data structures such as ``pyradigm``.
56 | 
57 | Moreover, drop-in ``Estimator`` classes are provided, called ``KernelMachine``, offering the power of ``SVM`` for seamless usage in the ``scikit-learn`` ecosystem. Another useful class is called ``OptimalKernelSVR`` which finds the most optimal kernel func for a given sample, and trains the SVM using the optimal kernel.
58 | 
59 | 
60 | Docs
61 | ----
62 | 
63 | API and Usage: https://raamana.github.io/kernelmethods/
64 | 
65 | Demo notebook: `on binder <https://mybinder.org/v2/gh/raamana/kernelmethods/master?filepath=demo_tutorials%2Fdemo_kernelmethods.ipynb>`_.
66 | 
67 | A paper presenting the design and some validation is available `here <https://arxiv.org/abs/2005.13483>`_
68 | 
69 | Note
70 | ----
71 | 
72 | The software is beta. All types of contributions are greatly welcome.
73 | 
74 | 
75 | Dedication
76 | -----------
77 | 
78 | This library is dedicated to `The Concert for Bangladesh <https://en.wikipedia.org/wiki/The_Concert_for_Bangladesh>`_, George Harrison and Pandit Ravi Shankar, who moved me immensely with their empathy and kindness, by organizing the first benefit concert ever to raise international awareness and funds for Bangladesh's liberation war in 1971.
79 | 
80 | 
81 | 
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/cmd2pkg:
--------------------------------------------------------------------------------
 1 | 
 2 | rm -rf dist build kernelmethods.egg-info 
 3 | 
 4 | python setup.py sdist bdist_wheel
 5 | 
 6 | twine upload dist/*
 7 | 
 8 | rm -rf dist build kernelmethods.egg-info
 9 | 
10 | 


--------------------------------------------------------------------------------
/datasets/libsvm/README.txt:
--------------------------------------------------------------------------------
1 | 
2 | Datasets are copied from JKernelMachine's repo at
3 | 
4 | https://github.com/davidpicard/jkernelmachines/tree/master/resources
5 | 
6 | for the purpose of comparison to experiments in their paper: JKernelMachines and Weka.
7 | 
8 | 


--------------------------------------------------------------------------------
/datasets/simple_comparison_JKMs_Weka.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 
 3 | This is a simple comparison of kernelmethods to JKernelMachines and Weka.
 4 | 
 5 | Repeated holdout (80% train, 20% test) with 20 repetitions, on four UCI datasets
 6 | 
 7 | """
 8 | 
 9 | from os.path import abspath, dirname, join as pjoin
10 | from time import gmtime, strftime
11 | from warnings import simplefilter
12 | 
13 | import numpy as np
14 | from sklearn.datasets.svmlight_format import load_svmlight_file
15 | from sklearn.model_selection import ShuffleSplit, cross_val_score
16 | from sklearn.svm import SVC
17 | 
18 | from kernelmethods.algorithms import KernelMachine
19 | from kernelmethods.numeric_kernels import GaussianKernel
20 | from kernelmethods.utils import _ensure_min_eps
21 | 
22 | simplefilter('ignore')
23 | 
24 | ds_dir = dirname(abspath(__file__))
25 | ds_names = (
26 |     "ionosphere_scale",
27 |     "heart_scale",
28 |     "breast-cancer_scale",
29 |     "german.numer_scale",)
30 | 
31 | ds_paths = [pjoin(ds_dir, 'libsvm', name) for name in ds_names]
32 | 
33 | 
34 | def sigma_from_gamma(gamma=0.1):
35 |     return _ensure_min_eps(np.sqrt(1.0 / (2 * gamma)))
36 | 
37 | 
38 | def gamma_from_sigma(sigma=0.1):
39 |     return _ensure_min_eps(1.0 / (2 * sigma ** 2))
40 | 
41 | 
42 | for name, ds_path in zip(ds_names, ds_paths):
43 |     time_stamp = strftime("%H:%M:%S", gmtime())
44 | 
45 |     X, y = load_svmlight_file(ds_path)
46 |     X = X.toarray()
47 | 
48 |     print('\n{:10}  {:20} {}'.format(time_stamp, name, X.shape))
49 | 
50 |     gamma = 0.1
51 |     skl_svm = SVC(C=1.0, kernel='rbf', gamma=gamma)
52 |     ss_cv1 = ShuffleSplit(n_splits=20, train_size=0.8, test_size=0.2)
53 |     scores_skl = cross_val_score(skl_svm, X, y, cv=ss_cv1)
54 | 
55 |     ker_func = GaussianKernel(sigma=sigma_from_gamma(gamma))
56 |     km_svm = KernelMachine(k_func=ker_func, learner_id='SVM', normalized=False)
57 |     ss_cv2 = ShuffleSplit(n_splits=20, train_size=0.8, test_size=0.2)
58 |     scores_km = cross_val_score(km_svm, X, y, cv=ss_cv2)
59 | 
60 |     print('\tSKLearn    Accuracy: {:.4f} +/- {:.4f}'
61 |           ''.format(np.mean(scores_skl), np.std(scores_skl)))
62 | 
63 |     print('\tKM    SVM  Accuracy: {:.4f} +/- {:.4f}'
64 |           ''.format(np.mean(scores_km), np.std(scores_km)))
65 | 


--------------------------------------------------------------------------------
/docs/API.rst:
--------------------------------------------------------------------------------
 1 | API Reference
 2 | --------------
 3 | 
 4 | A tutorial-like presentation is available at :doc:`usage`.
 5 | 
 6 | This library consists of a set of key classes such as ``KernelMatrix`` documented in :doc:`kernel_matrix`, diverse library of :doc:`kernel_functions`, ``KernelSet`` and ``KernelBucket`` described in :doc:`km_collections`, along with a library of :doc:`operations` and related :doc:`utilities`.
 7 | 
 8 | 
 9 | Exceptions
10 | ==========
11 | 
12 | .. autoclass:: kernelmethods.KernelMethodsException
13 |    :undoc-members:
14 |    :inherited-members:
15 |    :show-inheritance:
16 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = python -msphinx
 7 | SPHINXPROJ    = kernelmethods
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/categorical_kernels.rst:
--------------------------------------------------------------------------------
1 | Categorical kernels
2 | -------------------
3 | 
4 | Implemented:
5 | 
6 |  - ``MatchCountKernel``
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # kernelmethods documentation build configuration file, created by
  5 | # sphinx-quickstart on Fri Jun  9 13:47:02 2017.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another
 17 | # directory, add these directories to sys.path here. If the directory is
 18 | # relative to the documentation root, use os.path.abspath to make it
 19 | # absolute, like shown here.
 20 | #
 21 | import os
 22 | import sys
 23 | 
 24 | # Get the project root dir, which is the parent dir of this
 25 | cwd = os.getcwd()
 26 | project_root = os.path.dirname(cwd)
 27 | 
 28 | sys.path.insert(0, os.path.abspath('..'))
 29 | sys.path.insert(0, project_root)
 30 | 
 31 | sys.path.insert(0, os.path.abspath('../visualqc'))
 32 | sys.path.insert(0, os.path.abspath('../../visualqc'))
 33 | sys.path.insert(0, os.path.abspath('visualqc'))
 34 | 
 35 | import kernelmethods
 36 | import sphinx_rtd_theme
 37 | 
 38 | # -- General configuration ---------------------------------------------
 39 | 
 40 | # If your documentation needs a minimal Sphinx version, state it here.
 41 | #
 42 | # needs_sphinx = '1.0'
 43 | 
 44 | # Add any Sphinx extension module names here, as strings. They can be
 45 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 46 | extensions = ['sphinx.ext.autodoc',
 47 |               'sphinx.ext.intersphinx',
 48 |               'sphinx.ext.mathjax',
 49 |               'sphinx.ext.viewcode',
 50 |               'sphinx.ext.githubpages',
 51 |               'numpydoc',
 52 |               'sphinxarg.ext',
 53 |               'sphinx.ext.intersphinx']
 54 | 
 55 | # Add any paths that contain templates here, relative to this directory.
 56 | templates_path = ['_templates']
 57 | 
 58 | # The suffix(es) of source filenames.
 59 | # You can specify multiple suffix as a list of string:
 60 | #
 61 | # source_suffix = ['.rst', '.md']
 62 | source_suffix = '.rst'
 63 | 
 64 | # The master toctree document.
 65 | master_doc = 'index'
 66 | 
 67 | # General information about the project.
 68 | project = u'kernelmethods'
 69 | copyright = u"2018, Pradeep Reddy Raamana"
 70 | author = u"Pradeep Reddy Raamana"
 71 | 
 72 | # The version info for the project you're documenting, acts as replacement
 73 | # for |version| and |release|, also used in various other places throughout
 74 | # the built documents.
 75 | #
 76 | # The short X.Y version.
 77 | version = kernelmethods.__version__
 78 | # The full version, including alpha/beta/rc tags.
 79 | release = kernelmethods.__version__
 80 | 
 81 | # The language for content autogenerated by Sphinx. Refer to documentation
 82 | # for a list of supported languages.
 83 | #
 84 | # This is also used if you do content translation via gettext catalogs.
 85 | # Usually you set "language" from the command line for these cases.
 86 | language = None
 87 | 
 88 | # List of patterns, relative to source directory, that match files and
 89 | # directories to ignore when looking for source files.
 90 | # This patterns also effect to html_static_path and html_extra_path
 91 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 92 | 
 93 | # The name of the Pygments (syntax highlighting) style to use.
 94 | pygments_style = 'sphinx'
 95 | 
 96 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 97 | todo_include_todos = False
 98 | 
 99 | 
100 | # -- Options for HTML output -------------------------------------------
101 | 
102 | # The theme to use for HTML and HTML Help pages.  See the documentation for
103 | # a list of builtin themes.
104 | #
105 | # html_theme = 'alabaster'
106 | html_theme = "sphinx_rtd_theme"
107 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
108 | 
109 | # Theme options are theme-specific and customize the look and feel of a
110 | # theme further.  For a list of options available for each theme, see the
111 | # documentation.
112 | #
113 | # html_theme_options = {}
114 | 
115 | # Add any paths that contain custom static files (such as style sheets) here,
116 | # relative to this directory. They are copied after the builtin static files,
117 | # so a file named "default.css" will overwrite the builtin "default.css".
118 | html_static_path = ['_static']
119 | 
120 | 
121 | # -- Options for HTMLHelp output ---------------------------------------
122 | 
123 | # Output file base name for HTML help builder.
124 | htmlhelp_basename = 'kernelmethodsdoc'
125 | 
126 | 
127 | # -- Options for LaTeX output ------------------------------------------
128 | 
129 | latex_elements = {
130 |     # The paper size ('letterpaper' or 'a4paper').
131 |     #
132 |     # 'papersize': 'letterpaper',
133 | 
134 |     # The font size ('10pt', '11pt' or '12pt').
135 |     #
136 |     # 'pointsize': '10pt',
137 | 
138 |     # Additional stuff for the LaTeX preamble.
139 |     #
140 |     # 'preamble': '',
141 | 
142 |     # Latex figure (float) alignment
143 |     #
144 |     # 'figure_align': 'htbp',
145 | }
146 | 
147 | # Grouping the document tree into LaTeX files. List of tuples
148 | # (source start file, target name, title, author, documentclass
149 | # [howto, manual, or own class]).
150 | latex_documents = [
151 |     (master_doc, 'kernelmethods.tex',
152 |      u'kernelmethods Documentation',
153 |      u'Pradeep Reddy Raamana', 'manual'),
154 | ]
155 | 
156 | 
157 | # -- Options for manual page output ------------------------------------
158 | 
159 | # One entry per manual page. List of tuples
160 | # (source start file, name, description, authors, manual section).
161 | man_pages = [
162 |     (master_doc, 'kernelmethods',
163 |      u'kernelmethods Documentation',
164 |      [author], 1)
165 | ]
166 | 
167 | 
168 | # -- Options for Texinfo output ----------------------------------------
169 | 
170 | # Grouping the document tree into Texinfo files. List of tuples
171 | # (source start file, target name, title, author,
172 | #  dir menu entry, description, category)
173 | texinfo_documents = [
174 |     (master_doc, 'kernelmethods',
175 |      u'kernelmethods Documentation',
176 |      author,
177 |      'kernelmethods',
178 |      'One line description of project.',
179 |      'Miscellaneous'),
180 | ]
181 | 
182 | 
183 | 
184 | 


--------------------------------------------------------------------------------
/docs/contributing.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../CONTRIBUTING.rst
2 | 


--------------------------------------------------------------------------------
/docs/flyer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raamana/kernelmethods/5497b572edc588027f9498d873afca0763d8e8e7/docs/flyer.png


--------------------------------------------------------------------------------
/docs/graph_kernels.rst:
--------------------------------------------------------------------------------
1 | Graph kernels (coming soon)
2 | ----------------------------
3 | 
4 | 


--------------------------------------------------------------------------------
/docs/history.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../HISTORY.rst
2 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to kernelmethods's documentation!
 2 | =========================================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 3
 6 |    :caption: Contents:
 7 | 
 8 |    readme
 9 |    installation
10 |    usage
11 |    API
12 |    kernel_matrix
13 |    kernel_functions
14 |    km_collections
15 |    operations
16 |    utilities
17 |    numeric_kernels
18 |    categorical_kernels
19 |    string_kernels
20 |    graph_kernels
21 |    contributing
22 |    history
23 | 
24 | Indices and tables
25 | ==================
26 | * :ref:`genindex`
27 | * :ref:`modindex`
28 | * :ref:`search`
29 | 


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
 1 | .. highlight:: shell
 2 | 
 3 | ============
 4 | Installation
 5 | ============
 6 | 
 7 | 
 8 | Stable release
 9 | --------------
10 | 
11 | To install kernelmethods, run this command in your terminal:
12 | 
13 | .. code-block:: console
14 | 
15 |     $ pip install kernelmethods
16 | 
17 | This is the preferred method to install kernelmethods, as it will always install the most recent stable release.
18 | 
19 | If you don't have `pip`_ installed, this `Python installation guide`_ can guide
20 | you through the process.
21 | 
22 | .. _pip: https://pip.pypa.io
23 | .. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/
24 | 
25 | 
26 | From sources
27 | ------------
28 | 
29 | The sources for kernelmethods can be downloaded from the `Github repo`_.
30 | 
31 | You can either clone the public repository:
32 | 
33 | .. code-block:: console
34 | 
35 |     $ git clone git://github.com/raamana/kernelmethods
36 | 
37 | Or download the `tarball`_:
38 | 
39 | .. code-block:: console
40 | 
41 |     $ curl  -OL https://github.com/raamana/kernelmethods/tarball/master
42 | 
43 | Once you have a copy of the source, you can install it with:
44 | 
45 | .. code-block:: console
46 | 
47 |     $ python setup.py install
48 | 
49 | 
50 | .. _Github repo: https://github.com/raamana/kernelmethods
51 | .. _tarball: https://github.com/raamana/kernelmethods/tarball/master
52 | 


--------------------------------------------------------------------------------
/docs/kernel_functions.rst:
--------------------------------------------------------------------------------
 1 | Kernel functions
 2 | ----------------
 3 | 
 4 | Kernel functions are the key to producing kernel matrices and hence are the backbone of kernel methods and machines. These are represented by a fundamental [abstract base] class called ``BaseKernelFunction``, which defines several desirable properties, such as making it callable, easy way to check if it induces a positive semi-definite as well as a readable representation of the underlying function.
 5 | 
 6 | We also provide a ``KernelFromCallable`` class which makes it even easier to define a kernel function just by specifying the underlying function, without having to define a fully separate class.
 7 | 
 8 | In addition, the following classes are provided to enable compositional represenation of multiple kernel functions for advanced applications: ``CompositeKernel``, ``ProductKernel``, ``SumKernel``, ``AverageKernel``, and ``WeightedAverageKernel``.
 9 | 
10 | 
11 | ``kernelmethods`` offers kernel functions that can operate on the following data types:
12 | 
13 |  - :doc:`numeric_kernels`
14 |  - :doc:`categorical_kernels`
15 |  - :doc:`string_kernels`
16 |  - :doc:`graph_kernels`
17 |  - and others such as trees and sequences (TBA).
18 | 
19 | .. automodule:: kernelmethods
20 |    :members: BaseKernelFunction, KernelFromCallable
21 |    :undoc-members:
22 |    :inherited-members:
23 |    :show-inheritance:
24 | 
25 | 
26 | Composite kernel functions
27 | ---------------------------
28 | 
29 | 
30 | .. automodule:: kernelmethods
31 |    :members: CompositeKernel, ProductKernel, SumKernel, AverageKernel, WeightedAverageKernel
32 |    :undoc-members:
33 |    :inherited-members:
34 |    :show-inheritance:
35 | 
36 | 


--------------------------------------------------------------------------------
/docs/kernel_matrix.rst:
--------------------------------------------------------------------------------
 1 | KernelMatrix class
 2 | ------------------
 3 | 
 4 | ``KernelMatrix`` is a self-contained class for the Gram matrix induced by a kernel function on a given sample. This class defines the central data structure for all kernel methods, as it acts a key bridge between input data space and the learning algorithms.
 5 | 
 6 | The class is designed in such a way that
 7 | 
 8 |  - it only computes elements of the kernel matrix (KM) as neeeded, and nothing more, which can save a lot computation and storage
 9 |  - it supports both callable as well as attribute access, allowing easy access to partial or random portions of the KM. Indexing is aimed to be compliant with numpy as much as possible.
10 |  - allows parallel computation of different part of the KM to speed up computation when ``N`` is large
11 |  - allows setting of user-defined attributes to allow easy identification and differentiation among a collection of KMs when working in applications such as Multiple Kernel Learning (MKL)
12 |  - implements basic operations such as centering and normalization (whose implementation differs from that of manipulating regular matrices)
13 |  - exposes several convenience attributes to make advanced development a breeze
14 | 
15 | This library also provides convenience wrappers:
16 | 
17 |  - ``KernelMatrixPrecomputed`` turns a precomputed kernel matrix into a ``KernelMatrix`` class with all its attractive properties
18 |  - ``ConstantKernelMatrix`` that defines a ``KernelMatrix`` with a constant everywhere
19 | 
20 | 
21 | .. autoclass:: kernelmethods.KernelMatrix
22 |    :members:
23 |    :undoc-members:
24 | 
25 | 
26 | Exceptions
27 | ==========
28 | 
29 | .. autoclass:: kernelmethods.KMAccessError
30 |    :undoc-members:
31 |    :inherited-members:
32 |    :show-inheritance:
33 | 
34 | 


--------------------------------------------------------------------------------
/docs/km_collections.rst:
--------------------------------------------------------------------------------
 1 | Collection of kernel matrices
 2 | -----------------------------
 3 | 
 4 | 
 5 | Kernel Set
 6 | ==============
 7 | 
 8 | .. autoclass:: kernelmethods.KernelSet
 9 |    :undoc-members:
10 |    :inherited-members:
11 |    :show-inheritance:
12 | 
13 | 
14 | Kernel Bucket
15 | ==============
16 | 
17 | .. autoclass:: kernelmethods.KernelBucket
18 |    :undoc-members:
19 |    :inherited-members:
20 |    :show-inheritance:
21 | 
22 | 
23 | Exceptions
24 | ==========
25 | 
26 | .. autoclass:: kernelmethods.KMSetAdditionError
27 |    :undoc-members:
28 |    :inherited-members:
29 |    :show-inheritance:
30 | 
31 | 


--------------------------------------------------------------------------------
/docs/logo_kernelmethods.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raamana/kernelmethods/5497b572edc588027f9498d873afca0763d8e8e7/docs/logo_kernelmethods.png


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=python -msphinx
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=kernelmethods
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The Sphinx module was not found. Make sure you have Sphinx installed,
20 | 	echo.then set the SPHINXBUILD environment variable to point to the full
21 | 	echo.path of the 'sphinx-build' executable. Alternatively you may add the
22 | 	echo.Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/docs/numeric_kernels.rst:
--------------------------------------------------------------------------------
 1 | Numeric kernels
 2 | -----------------
 3 | 
 4 | 
 5 | .. automodule:: kernelmethods.numeric_kernels
 6 |    :members:
 7 |    :undoc-members:
 8 |    :show-inheritance:
 9 | 
10 | 


--------------------------------------------------------------------------------
/docs/operations.rst:
--------------------------------------------------------------------------------
 1 | Kernel Operations
 2 | ------------------------
 3 | 
 4 | 
 5 | 
 6 | .. automodule:: kernelmethods.operations
 7 |    :members:
 8 |    :undoc-members:
 9 | 
10 | 


--------------------------------------------------------------------------------
/docs/readme.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../README.rst
2 | 


--------------------------------------------------------------------------------
/docs/string_kernels.rst:
--------------------------------------------------------------------------------
1 | String kernels (coming soon)
2 | -----------------------------
3 | 
4 | 


--------------------------------------------------------------------------------
/docs/usage.rst:
--------------------------------------------------------------------------------
 1 | =====
 2 | Usage
 3 | =====
 4 | 
 5 | The demo notebooks are available at
 6 | 
 7 | Demo notebooks:
 8 | 
 9 | .. image:: https://mybinder.org/badge_logo.svg
10 |  :target: https://mybinder.org/v2/gh/raamana/kernelmethods/master?filepath=demo_tutorials%2Fdemo_kernelmethods.ipynb
11 | 
12 | 
13 | You can also directly get them code [repo](https://github.com/raamana/kernelmethods/tree/master/demo_tutorials)
14 | 


--------------------------------------------------------------------------------
/docs/utilities.rst:
--------------------------------------------------------------------------------
 1 | Utilities
 2 | ----------
 3 | 
 4 | Here, we document several important utilities related to this library.
 5 | 
 6 | .. automodule:: kernelmethods.ranking
 7 |    :members:
 8 |    :undoc-members:
 9 |    :show-inheritance:
10 | 
11 | 


--------------------------------------------------------------------------------
/kernelmethods/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """Top-level package for kernelmethods."""
 4 | 
 5 | __all__ = ['KernelMatrix',
 6 |            'BaseKernelFunction',
 7 |            'KernelMethodsException', 'KMAccessError', 'KMNormError',
 8 |            'KMSetAdditionError',
 9 |            'PolyKernel', 'GaussianKernel', 'LaplacianKernel', 'LinearKernel',
10 |            'Chi2Kernel', 'SigmoidKernel', 'HadamardKernel',
11 |            'KernelBucket', 'KernelSet',
12 |            'KernelMachine', 'OptimalKernelSVC', 'OptimalKernelSVR', ]
13 | 
14 | from kernelmethods.algorithms import (KernelMachine, KernelMachineRegressor,
15 |                                       OptimalKernelSVC, OptimalKernelSVR)
16 | from kernelmethods.base import BaseKernelFunction, KernelMatrix, KernelSet
17 | from kernelmethods.config import (KMAccessError, KMNormError, KMSetAdditionError,
18 |                                   KernelMethodsException)
19 | from kernelmethods.numeric_kernels import (Chi2Kernel, GaussianKernel,
20 |                                            LaplacianKernel, LinearKernel, PolyKernel,
21 |                                            SigmoidKernel, HadamardKernel)
22 | from kernelmethods.sampling import KernelBucket
23 | from ._version import get_versions
24 | 
25 | __version__ = get_versions()['version']
26 | del get_versions
27 | 
28 | __author__ = """Pradeep Reddy Raamana"""
29 | __email__ = 'raamana@gmail.com'
30 | 


--------------------------------------------------------------------------------
/kernelmethods/_version.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # This file helps to compute a version number in source trees obtained from
  3 | # git-archive tarball (such as those provided by githubs download-from-tag
  4 | # feature). Distribution tarballs (built by setup.py sdist) and build
  5 | # directories (produced by setup.py build) will contain a much shorter file
  6 | # that just contains the computed version number.
  7 | 
  8 | # This file is released into the public domain. Generated by
  9 | # versioneer-0.18 (https://github.com/warner/python-versioneer)
 10 | 
 11 | """Git implementation of _version.py."""
 12 | 
 13 | import errno
 14 | import os
 15 | import re
 16 | import subprocess
 17 | import sys
 18 | 
 19 | 
 20 | def get_keywords():
 21 |     """Get the keywords needed to look up the version information."""
 22 |     # these strings will be replaced by git during git-archive.
 23 |     # setup.py/versioneer.py will grep for the variable names, so they must
 24 |     # each be defined on a line of their own. _version.py will just call
 25 |     # get_keywords().
 26 |     git_refnames = " (HEAD -> master)"
 27 |     git_full = "5497b572edc588027f9498d873afca0763d8e8e7"
 28 |     git_date = "2023-02-07 09:05:32 -0500"
 29 |     keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
 30 |     return keywords
 31 | 
 32 | 
 33 | class VersioneerConfig:
 34 |     """Container for Versioneer configuration parameters."""
 35 | 
 36 | 
 37 | def get_config():
 38 |     """Create, populate and return the VersioneerConfig() object."""
 39 |     # these strings are filled in when 'setup.py versioneer' creates
 40 |     # _version.py
 41 |     cfg = VersioneerConfig()
 42 |     cfg.VCS = "git"
 43 |     cfg.style = "pep440"
 44 |     cfg.tag_prefix = ""
 45 |     cfg.parentdir_prefix = "kernelmethods-"
 46 |     cfg.versionfile_source = "kernelmethods/_version.py"
 47 |     cfg.verbose = False
 48 |     return cfg
 49 | 
 50 | 
 51 | class NotThisMethod(Exception):
 52 |     """Exception raised if a method is not valid for the current scenario."""
 53 | 
 54 | 
 55 | LONG_VERSION_PY = {}
 56 | HANDLERS = {}
 57 | 
 58 | 
 59 | def register_vcs_handler(vcs, method):  # decorator
 60 |     """Decorator to mark a method as the handler for a particular VCS."""
 61 |     def decorate(f):
 62 |         """Store f in HANDLERS[vcs][method]."""
 63 |         if vcs not in HANDLERS:
 64 |             HANDLERS[vcs] = {}
 65 |         HANDLERS[vcs][method] = f
 66 |         return f
 67 |     return decorate
 68 | 
 69 | 
 70 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
 71 |                 env=None):
 72 |     """Call the given command(s)."""
 73 |     assert isinstance(commands, list)
 74 |     p = None
 75 |     for c in commands:
 76 |         try:
 77 |             dispcmd = str([c] + args)
 78 |             # remember shell=False, so use git.cmd on windows, not just git
 79 |             p = subprocess.Popen([c] + args, cwd=cwd, env=env,
 80 |                                  stdout=subprocess.PIPE,
 81 |                                  stderr=(subprocess.PIPE if hide_stderr
 82 |                                          else None))
 83 |             break
 84 |         except EnvironmentError:
 85 |             e = sys.exc_info()[1]
 86 |             if e.errno == errno.ENOENT:
 87 |                 continue
 88 |             if verbose:
 89 |                 print("unable to run %s" % dispcmd)
 90 |                 print(e)
 91 |             return None, None
 92 |     else:
 93 |         if verbose:
 94 |             print("unable to find command, tried %s" % (commands,))
 95 |         return None, None
 96 |     stdout = p.communicate()[0].strip()
 97 |     if sys.version_info[0] >= 3:
 98 |         stdout = stdout.decode()
 99 |     if p.returncode != 0:
100 |         if verbose:
101 |             print("unable to run %s (error)" % dispcmd)
102 |             print("stdout was %s" % stdout)
103 |         return None, p.returncode
104 |     return stdout, p.returncode
105 | 
106 | 
107 | def versions_from_parentdir(parentdir_prefix, root, verbose):
108 |     """Try to determine the version from the parent directory name.
109 | 
110 |     Source tarballs conventionally unpack into a directory that includes both
111 |     the project name and a version string. We will also support searching up
112 |     two directory levels for an appropriately named parent directory
113 |     """
114 |     rootdirs = []
115 | 
116 |     for i in range(3):
117 |         dirname = os.path.basename(root)
118 |         if dirname.startswith(parentdir_prefix):
119 |             return {"version": dirname[len(parentdir_prefix):],
120 |                     "full-revisionid": None,
121 |                     "dirty": False, "error": None, "date": None}
122 |         else:
123 |             rootdirs.append(root)
124 |             root = os.path.dirname(root)  # up a level
125 | 
126 |     if verbose:
127 |         print("Tried directories %s but none started with prefix %s" %
128 |               (str(rootdirs), parentdir_prefix))
129 |     raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
130 | 
131 | 
132 | @register_vcs_handler("git", "get_keywords")
133 | def git_get_keywords(versionfile_abs):
134 |     """Extract version information from the given file."""
135 |     # the code embedded in _version.py can just fetch the value of these
136 |     # keywords. When used from setup.py, we don't want to import _version.py,
137 |     # so we do it with a regexp instead. This function is not used from
138 |     # _version.py.
139 |     keywords = {}
140 |     try:
141 |         f = open(versionfile_abs, "r")
142 |         for line in f.readlines():
143 |             if line.strip().startswith("git_refnames ="):
144 |                 mo = re.search(r'=\s*"(.*)"', line)
145 |                 if mo:
146 |                     keywords["refnames"] = mo.group(1)
147 |             if line.strip().startswith("git_full ="):
148 |                 mo = re.search(r'=\s*"(.*)"', line)
149 |                 if mo:
150 |                     keywords["full"] = mo.group(1)
151 |             if line.strip().startswith("git_date ="):
152 |                 mo = re.search(r'=\s*"(.*)"', line)
153 |                 if mo:
154 |                     keywords["date"] = mo.group(1)
155 |         f.close()
156 |     except EnvironmentError:
157 |         pass
158 |     return keywords
159 | 
160 | 
161 | @register_vcs_handler("git", "keywords")
162 | def git_versions_from_keywords(keywords, tag_prefix, verbose):
163 |     """Get version information from git keywords."""
164 |     if not keywords:
165 |         raise NotThisMethod("no keywords at all, weird")
166 |     date = keywords.get("date")
167 |     if date is not None:
168 |         # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
169 |         # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
170 |         # -like" string, which we must then edit to make compliant), because
171 |         # it's been around since git-1.5.3, and it's too difficult to
172 |         # discover which version we're using, or to work around using an
173 |         # older one.
174 |         date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
175 |     refnames = keywords["refnames"].strip()
176 |     if refnames.startswith("$Format"):
177 |         if verbose:
178 |             print("keywords are unexpanded, not using")
179 |         raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
180 |     refs = set([r.strip() for r in refnames.strip("()").split(",")])
181 |     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
182 |     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
183 |     TAG = "tag: "
184 |     tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
185 |     if not tags:
186 |         # Either we're using git < 1.8.3, or there really are no tags. We use
187 |         # a heuristic: assume all version tags have a digit. The old git %d
188 |         # expansion behaves like git log --decorate=short and strips out the
189 |         # refs/heads/ and refs/tags/ prefixes that would let us distinguish
190 |         # between branches and tags. By ignoring refnames without digits, we
191 |         # filter out many common branch names like "release" and
192 |         # "stabilization", as well as "HEAD" and "master".
193 |         tags = set([r for r in refs if re.search(r'\d', r)])
194 |         if verbose:
195 |             print("discarding '%s', no digits" % ",".join(refs - tags))
196 |     if verbose:
197 |         print("likely tags: %s" % ",".join(sorted(tags)))
198 |     for ref in sorted(tags):
199 |         # sorting will prefer e.g. "2.0" over "2.0rc1"
200 |         if ref.startswith(tag_prefix):
201 |             r = ref[len(tag_prefix):]
202 |             if verbose:
203 |                 print("picking %s" % r)
204 |             return {"version": r,
205 |                     "full-revisionid": keywords["full"].strip(),
206 |                     "dirty": False, "error": None,
207 |                     "date": date}
208 |     # no suitable tags, so version is "0+unknown", but full hex is still there
209 |     if verbose:
210 |         print("no suitable tags, using unknown + full revision id")
211 |     return {"version": "0+unknown",
212 |             "full-revisionid": keywords["full"].strip(),
213 |             "dirty": False, "error": "no suitable tags", "date": None}
214 | 
215 | 
216 | @register_vcs_handler("git", "pieces_from_vcs")
217 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
218 |     """Get version from 'git describe' in the root of the source tree.
219 | 
220 |     This only gets called if the git-archive 'subst' keywords were *not*
221 |     expanded, and _version.py hasn't already been rewritten with a short
222 |     version string, meaning we're inside a checked out source tree.
223 |     """
224 |     GITS = ["git"]
225 |     if sys.platform == "win32":
226 |         GITS = ["git.cmd", "git.exe"]
227 | 
228 |     out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
229 |                           hide_stderr=True)
230 |     if rc != 0:
231 |         if verbose:
232 |             print("Directory %s not under git control" % root)
233 |         raise NotThisMethod("'git rev-parse --git-dir' returned error")
234 | 
235 |     # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
236 |     # if there isn't one, this yields HEX[-dirty] (no NUM)
237 |     describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
238 |                                           "--always", "--long",
239 |                                           "--match", "%s*" % tag_prefix],
240 |                                    cwd=root)
241 |     # --long was added in git-1.5.5
242 |     if describe_out is None:
243 |         raise NotThisMethod("'git describe' failed")
244 |     describe_out = describe_out.strip()
245 |     full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
246 |     if full_out is None:
247 |         raise NotThisMethod("'git rev-parse' failed")
248 |     full_out = full_out.strip()
249 | 
250 |     pieces = {}
251 |     pieces["long"] = full_out
252 |     pieces["short"] = full_out[:7]  # maybe improved later
253 |     pieces["error"] = None
254 | 
255 |     # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
256 |     # TAG might have hyphens.
257 |     git_describe = describe_out
258 | 
259 |     # look for -dirty suffix
260 |     dirty = git_describe.endswith("-dirty")
261 |     pieces["dirty"] = dirty
262 |     if dirty:
263 |         git_describe = git_describe[:git_describe.rindex("-dirty")]
264 | 
265 |     # now we have TAG-NUM-gHEX or HEX
266 | 
267 |     if "-" in git_describe:
268 |         # TAG-NUM-gHEX
269 |         mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
270 |         if not mo:
271 |             # unparseable. Maybe git-describe is misbehaving?
272 |             pieces["error"] = ("unable to parse git-describe output: '%s'"
273 |                                % describe_out)
274 |             return pieces
275 | 
276 |         # tag
277 |         full_tag = mo.group(1)
278 |         if not full_tag.startswith(tag_prefix):
279 |             if verbose:
280 |                 fmt = "tag '%s' doesn't start with prefix '%s'"
281 |                 print(fmt % (full_tag, tag_prefix))
282 |             pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
283 |                                % (full_tag, tag_prefix))
284 |             return pieces
285 |         pieces["closest-tag"] = full_tag[len(tag_prefix):]
286 | 
287 |         # distance: number of commits since tag
288 |         pieces["distance"] = int(mo.group(2))
289 | 
290 |         # commit: short hex revision ID
291 |         pieces["short"] = mo.group(3)
292 | 
293 |     else:
294 |         # HEX: no tags
295 |         pieces["closest-tag"] = None
296 |         count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
297 |                                     cwd=root)
298 |         pieces["distance"] = int(count_out)  # total number of commits
299 | 
300 |     # commit date: see ISO-8601 comment in git_versions_from_keywords()
301 |     date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
302 |                        cwd=root)[0].strip()
303 |     pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
304 | 
305 |     return pieces
306 | 
307 | 
308 | def plus_or_dot(pieces):
309 |     """Return a + if we don't already have one, else return a ."""
310 |     if "+" in pieces.get("closest-tag", ""):
311 |         return "."
312 |     return "+"
313 | 
314 | 
315 | def render_pep440(pieces):
316 |     """Build up version string, with post-release "local version identifier".
317 | 
318 |     Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
319 |     get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
320 | 
321 |     Exceptions:
322 |     1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
323 |     """
324 |     if pieces["closest-tag"]:
325 |         rendered = pieces["closest-tag"]
326 |         if pieces["distance"] or pieces["dirty"]:
327 |             rendered += plus_or_dot(pieces)
328 |             rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
329 |             if pieces["dirty"]:
330 |                 rendered += ".dirty"
331 |     else:
332 |         # exception #1
333 |         rendered = "0+untagged.%d.g%s" % (pieces["distance"],
334 |                                           pieces["short"])
335 |         if pieces["dirty"]:
336 |             rendered += ".dirty"
337 |     return rendered
338 | 
339 | 
340 | def render_pep440_pre(pieces):
341 |     """TAG[.post.devDISTANCE] -- No -dirty.
342 | 
343 |     Exceptions:
344 |     1: no tags. 0.post.devDISTANCE
345 |     """
346 |     if pieces["closest-tag"]:
347 |         rendered = pieces["closest-tag"]
348 |         if pieces["distance"]:
349 |             rendered += ".post.dev%d" % pieces["distance"]
350 |     else:
351 |         # exception #1
352 |         rendered = "0.post.dev%d" % pieces["distance"]
353 |     return rendered
354 | 
355 | 
356 | def render_pep440_post(pieces):
357 |     """TAG[.postDISTANCE[.dev0]+gHEX] .
358 | 
359 |     The ".dev0" means dirty. Note that .dev0 sorts backwards
360 |     (a dirty tree will appear "older" than the corresponding clean one),
361 |     but you shouldn't be releasing software with -dirty anyways.
362 | 
363 |     Exceptions:
364 |     1: no tags. 0.postDISTANCE[.dev0]
365 |     """
366 |     if pieces["closest-tag"]:
367 |         rendered = pieces["closest-tag"]
368 |         if pieces["distance"] or pieces["dirty"]:
369 |             rendered += ".post%d" % pieces["distance"]
370 |             if pieces["dirty"]:
371 |                 rendered += ".dev0"
372 |             rendered += plus_or_dot(pieces)
373 |             rendered += "g%s" % pieces["short"]
374 |     else:
375 |         # exception #1
376 |         rendered = "0.post%d" % pieces["distance"]
377 |         if pieces["dirty"]:
378 |             rendered += ".dev0"
379 |         rendered += "+g%s" % pieces["short"]
380 |     return rendered
381 | 
382 | 
383 | def render_pep440_old(pieces):
384 |     """TAG[.postDISTANCE[.dev0]] .
385 | 
386 |     The ".dev0" means dirty.
387 | 
388 |     Eexceptions:
389 |     1: no tags. 0.postDISTANCE[.dev0]
390 |     """
391 |     if pieces["closest-tag"]:
392 |         rendered = pieces["closest-tag"]
393 |         if pieces["distance"] or pieces["dirty"]:
394 |             rendered += ".post%d" % pieces["distance"]
395 |             if pieces["dirty"]:
396 |                 rendered += ".dev0"
397 |     else:
398 |         # exception #1
399 |         rendered = "0.post%d" % pieces["distance"]
400 |         if pieces["dirty"]:
401 |             rendered += ".dev0"
402 |     return rendered
403 | 
404 | 
405 | def render_git_describe(pieces):
406 |     """TAG[-DISTANCE-gHEX][-dirty].
407 | 
408 |     Like 'git describe --tags --dirty --always'.
409 | 
410 |     Exceptions:
411 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
412 |     """
413 |     if pieces["closest-tag"]:
414 |         rendered = pieces["closest-tag"]
415 |         if pieces["distance"]:
416 |             rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
417 |     else:
418 |         # exception #1
419 |         rendered = pieces["short"]
420 |     if pieces["dirty"]:
421 |         rendered += "-dirty"
422 |     return rendered
423 | 
424 | 
425 | def render_git_describe_long(pieces):
426 |     """TAG-DISTANCE-gHEX[-dirty].
427 | 
428 |     Like 'git describe --tags --dirty --always -long'.
429 |     The distance/hash is unconditional.
430 | 
431 |     Exceptions:
432 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
433 |     """
434 |     if pieces["closest-tag"]:
435 |         rendered = pieces["closest-tag"]
436 |         rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
437 |     else:
438 |         # exception #1
439 |         rendered = pieces["short"]
440 |     if pieces["dirty"]:
441 |         rendered += "-dirty"
442 |     return rendered
443 | 
444 | 
445 | def render(pieces, style):
446 |     """Render the given version pieces into the requested style."""
447 |     if pieces["error"]:
448 |         return {"version": "unknown",
449 |                 "full-revisionid": pieces.get("long"),
450 |                 "dirty": None,
451 |                 "error": pieces["error"],
452 |                 "date": None}
453 | 
454 |     if not style or style == "default":
455 |         style = "pep440"  # the default
456 | 
457 |     if style == "pep440":
458 |         rendered = render_pep440(pieces)
459 |     elif style == "pep440-pre":
460 |         rendered = render_pep440_pre(pieces)
461 |     elif style == "pep440-post":
462 |         rendered = render_pep440_post(pieces)
463 |     elif style == "pep440-old":
464 |         rendered = render_pep440_old(pieces)
465 |     elif style == "git-describe":
466 |         rendered = render_git_describe(pieces)
467 |     elif style == "git-describe-long":
468 |         rendered = render_git_describe_long(pieces)
469 |     else:
470 |         raise ValueError("unknown style '%s'" % style)
471 | 
472 |     return {"version": rendered, "full-revisionid": pieces["long"],
473 |             "dirty": pieces["dirty"], "error": None,
474 |             "date": pieces.get("date")}
475 | 
476 | 
477 | def get_versions():
478 |     """Get version information or return default if unable to do so."""
479 |     # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
480 |     # __file__, we can work backwards from there to the root. Some
481 |     # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
482 |     # case we can only use expanded keywords.
483 | 
484 |     cfg = get_config()
485 |     verbose = cfg.verbose
486 | 
487 |     try:
488 |         return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
489 |                                           verbose)
490 |     except NotThisMethod:
491 |         pass
492 | 
493 |     try:
494 |         root = os.path.realpath(__file__)
495 |         # versionfile_source is the relative path from the top of the source
496 |         # tree (where the .git directory might live) to this file. Invert
497 |         # this to find the root from __file__.
498 |         for i in cfg.versionfile_source.split('/'):
499 |             root = os.path.dirname(root)
500 |     except NameError:
501 |         return {"version": "0+unknown", "full-revisionid": None,
502 |                 "dirty": None,
503 |                 "error": "unable to find root of source tree",
504 |                 "date": None}
505 | 
506 |     try:
507 |         pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
508 |         return render(pieces, cfg.style)
509 |     except NotThisMethod:
510 |         pass
511 | 
512 |     try:
513 |         if cfg.parentdir_prefix:
514 |             return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
515 |     except NotThisMethod:
516 |         pass
517 | 
518 |     return {"version": "0+unknown", "full-revisionid": None,
519 |             "dirty": None,
520 |             "error": "unable to compute version", "date": None}
521 | 


--------------------------------------------------------------------------------
/kernelmethods/algorithms.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 
  3 | Module to gather various high-level algorithms based on the kernel methods,
  4 |     such as kernel-based predictive models for classification and regression.
  5 | 
  6 | """
  7 | 
  8 | from abc import abstractmethod
  9 | from copy import deepcopy
 10 | 
 11 | import numpy as np
 12 | from sklearn.base import (BaseEstimator, ClassifierMixin, RegressorMixin,
 13 |                           is_classifier, is_regressor)
 14 | from sklearn.exceptions import NotFittedError
 15 | from sklearn.svm import SVC, SVR
 16 | from sklearn.utils.validation import check_X_y, check_array
 17 | 
 18 | from kernelmethods import config as cfg
 19 | from kernelmethods.base import KernelMatrix
 20 | from kernelmethods.numeric_kernels import GaussianKernel
 21 | from kernelmethods.ranking import find_optimal_kernel, get_estimator
 22 | from kernelmethods.sampling import KernelBucket, make_kernel_bucket
 23 | 
 24 | 
 25 | class BaseKernelMachine(BaseEstimator):
 26 |     """Generic class to return a drop-in sklearn estimator.
 27 | 
 28 |     Parameters
 29 |     ----------
 30 |     k_func : KernelFunction
 31 |         The kernel function the kernel machine bases itself on
 32 | 
 33 |     learner_id : str
 34 |         Identifier for the estimator to be built based on the kernel function.
 35 |         Options: ``SVC`` and ``SVR``.
 36 |         Default: ``SVC`` (classifier version of SVM)
 37 | 
 38 |     normalized : flag
 39 |         Flag to indicate whether to keep the kernel matrix normalized
 40 |         Default: False
 41 | 
 42 |     """
 43 | 
 44 | 
 45 |     def __init__(self,
 46 |                  k_func=GaussianKernel(),
 47 |                  learner_id='SVC',
 48 |                  normalized=False):
 49 |         """
 50 |         Constructor for the KernelMachine class.
 51 | 
 52 |         Parameters
 53 |         ----------
 54 |         k_func : KernelFunction
 55 |             The kernel function the kernel machine bases itself on
 56 | 
 57 |         learner_id : str
 58 |             Identifier for the estimator to be built based on the kernel function.
 59 |             Options: ``SVC`` and ``SVR``.
 60 |             Default: ``SVC`` (classifier version of SVM)
 61 | 
 62 |         normalized : flag
 63 |             Flag to indicate whether to keep the kernel matrix normalized.
 64 |             Default: False
 65 |         """
 66 | 
 67 |         self.k_func = k_func
 68 |         self.learner_id = learner_id
 69 |         self.normalized = normalized
 70 | 
 71 | 
 72 |     def fit(self, X, y, sample_weight=None):
 73 |         """Fit the chosen Estimator based on the user-defined kernel.
 74 | 
 75 |         Parameters
 76 |         ----------
 77 |         X : {array-like, sparse matrix}, shape (n_samples, n_features)
 78 |             Training vectors, where n_samples is the number of samples
 79 |             and n_features is the number of features.
 80 | 
 81 |         y : array-like, shape (n_samples,)
 82 |             Target values (class labels in classification, real numbers in
 83 |             regression)
 84 | 
 85 |         sample_weight : array-like, shape (n_samples,)
 86 |             Per-sample weights. Rescale C per sample. Higher weights
 87 |             force the classifier to put more emphasis on these points.
 88 | 
 89 |         Returns
 90 |         -------
 91 |         self : object
 92 | 
 93 |         Notes
 94 |         ------
 95 |         If X and y are not C-ordered and contiguous arrays of np.float64 and
 96 |         X is not a scipy.sparse.csr_matrix, X and/or y may be copied.
 97 | 
 98 |         If X is a dense array, then the other methods will not support sparse
 99 |         matrices as input.
100 | 
101 |         """
102 | 
103 |         if is_regressor(self):
104 |             self._train_X, self._train_y = check_X_y(X, y, y_numeric=True)
105 |             self._train_y = self._train_y.astype(np.float_)
106 |         else:
107 |             self._train_X, self._train_y = check_X_y(X, y)
108 | 
109 |         self._km = KernelMatrix(self.k_func, name='train_km',
110 |                                 normalized=self.normalized)
111 |         self._km.attach_to(self._train_X)
112 | 
113 |         self._estimator, self.param_grid = get_estimator(self.learner_id)
114 |         self._estimator.fit(X=self._km.full, y=self._train_y,
115 |                             sample_weight=sample_weight)
116 | 
117 |         if is_classifier(self):
118 |             self.classes_ = self._estimator.classes_
119 | 
120 |         return self
121 | 
122 | 
123 |     def predict(self, X):
124 |         """
125 |         Make predictions on the new samplets in X.
126 | 
127 |         For an one-class model, +1 or -1 is returned.
128 | 
129 |         Parameters
130 |         ----------
131 |         X : {array-like, sparse matrix}, shape (n_samples, n_features)
132 | 
133 |         Returns
134 |         -------
135 |         y_pred : array, shape (n_samples,)
136 |             Class labels for samples in X.
137 |         """
138 | 
139 |         if not hasattr(self, '_km'):
140 |             raise NotFittedError("Can't predict. Not fitted yet. Run .fit() first!")
141 | 
142 |         test_X = check_array(X)
143 | 
144 |         # this is a fresh new KM
145 |         self._km = KernelMatrix(self.k_func, name='test_km',
146 |                                 normalized=self.normalized)
147 | 
148 |         # sample_one must be test data to get the right shape for sklearn X
149 |         self._km.attach_to(sample_one=test_X, sample_two=self._train_X)
150 | 
151 |         predicted_y = self._estimator.predict(self._km.full)
152 | 
153 |         return np.asarray(predicted_y, dtype=self._train_y.dtype)
154 | 
155 | 
156 |     def get_params(self, deep=True):
157 |         """returns all the relevant parameters for this estimator!"""
158 | 
159 |         return {'k_func'    : self.k_func,
160 |                 'normalized': self.normalized,
161 |                 'learner_id': self.learner_id}
162 | 
163 | 
164 |     def set_params(self, **parameters):
165 |         """Param setter"""
166 | 
167 |         for parameter, value in parameters.items():
168 |             if parameter in ('k_func', 'learner_id', 'normalized'):
169 |                 setattr(self, parameter, value)
170 | 
171 |         return self
172 | 
173 | 
174 |     def _more_tags(self):
175 |         """Handling specific cases with tags"""
176 | 
177 |         from kernelmethods.numeric_kernels import Chi2Kernel, SigmoidKernel, \
178 |             HadamardKernel
179 |         if isinstance(self.k_func, Chi2Kernel):
180 |             return {'requires_positive_X': True}
181 |         elif isinstance(self.k_func, (SigmoidKernel, HadamardKernel)):
182 |             return {'poor_score': True}
183 |         else:
184 |             return dict()
185 | 
186 | 
187 | class KernelMachine(BaseKernelMachine, ClassifierMixin):
188 |     """Classifier version of the KernelMachine"""
189 | 
190 | 
191 | class KernelMachineRegressor(BaseKernelMachine, RegressorMixin):
192 |     """Regressor version of the KernelMachine
193 | 
194 |     Parameters
195 |     ----------
196 |     k_func : KernelFunction
197 |         The kernel function the kernel machine bases itself on
198 | 
199 |     learner_id : str
200 |         Identifier for the estimator to be built based on the kernel function.
201 |         Options: ``SVR``.
202 |         Default: ``SVR`` (regressor version of SVM)
203 | 
204 |     normalized : flag
205 |         Flag to indicate whether to keep the kernel matrix normalized
206 |         Default: False
207 | 
208 |     """
209 | 
210 | 
211 |     def __init__(self,
212 |                  k_func=GaussianKernel(),
213 |                  learner_id='SVR',
214 |                  normalized=False):
215 |         """
216 |         Constructor for the regressor version of the KernelMachine
217 | 
218 |         Parameters
219 |         ----------
220 |         k_func : KernelFunction
221 |             The kernel function the kernel machine bases itself on
222 | 
223 |         learner_id : str
224 |             Identifier for the estimator to be built based on the kernel function.
225 |             Options: ``SVR``
226 |             Default: ``SVR`` (regressor version of SVM)
227 | 
228 |         normalized : flag
229 |             Flag to indicate whether to keep the kernel matrix normalized.
230 |             Default: False
231 |         """
232 | 
233 |         self.k_func = k_func
234 |         self.learner_id = learner_id
235 |         self.normalized = normalized
236 | 
237 | 
238 | class BaseOptimalKernelMachine(BaseEstimator):
239 |     """
240 |     An estimator to learn the optimal kernel for a given sample and
241 |     build a support vector regressor based on this custom kernel.
242 | 
243 |     This class is wrapped around the sklearn SVR estimator to function as its
244 |     drop-in replacement, whose implementation is in turn based on LIBSVM.
245 | 
246 |     Parameters
247 |     ----------
248 | 
249 |     k_bucket : KernelBucket or str
250 |         An instance of KernelBucket that contains all the kernels to be compared,
251 |         or a string identifying the sampling_strategy which populates a KernelBucket.
252 | 
253 |     method : str
254 |         Scoring method to rank different kernels
255 | 
256 |     C : float, optional (default=1.0)
257 |         Penalty parameter C of the error term.
258 | 
259 |     epsilon : float, optional (default=0.1)
260 |          Epsilon in the epsilon-SVR model. It specifies the epsilon-tube
261 |          within which no penalty is associated in the training loss function
262 |          with points predicted within a distance epsilon from the actual
263 |          value.
264 | 
265 |     tol : float, optional (default=1e-3)
266 |         Tolerance for stopping criterion.
267 | 
268 |     shrinking : boolean, optional (default=True)
269 |         Whether to use the shrinking heuristic.
270 | 
271 | 
272 |     Attributes
273 |     ----------
274 |     support_ : array-like, shape = [n_SV]
275 |         Indices of support vectors.
276 | 
277 |     support_vectors_ : array-like, shape = [nSV, n_features]
278 |         Support vectors.
279 | 
280 |     dual_coef_ : array, shape = [1, n_SV]
281 |         Coefficients of the support vector in the decision function.
282 | 
283 |     coef_ : array, shape = [1, n_features]
284 |         Weights assigned to the features (coefficients in the primal
285 |         problem). This is only available in the case of a linear kernel.
286 | 
287 |         `coef_` is readonly property derived from `dual_coef_` and
288 |         `support_vectors_`.
289 | 
290 |     intercept_ : array, shape = [1]
291 |         Constants in decision function.
292 | 
293 |     """
294 | 
295 | 
296 |     @abstractmethod
297 |     def _find_optimal_kernel(self):
298 |         """Method to find the optimal kernel
299 | 
300 |         Given a kernel bucket, a training sample and a ranking method. To be
301 |         defined by the child class, appropriate for their task i.e. classification
302 |         or regression
303 |         """
304 | 
305 | 
306 |     def fit(self, X, y, sample_weight=None):
307 |         """Estimate the optimal kernel, and fit a SVM based on the custom kernel.
308 | 
309 |         Parameters
310 |         ----------
311 |         X : {array-like, sparse matrix}, shape (n_samples, n_features)
312 |             Training vectors, where n_samples is the number of samples
313 |             and n_features is the number of features.
314 | 
315 |         y : array-like, shape (n_samples,)
316 |             Target values (class labels in classification, real numbers in
317 |             regression)
318 | 
319 |         sample_weight : array-like, shape (n_samples,)
320 |             Per-sample weights. Rescale C per sample. Higher weights
321 |             force the classifier to put more emphasis on these points.
322 | 
323 |         Returns
324 |         -------
325 |         self : object
326 | 
327 |         Notes
328 |         ------
329 |         If X and y are not C-ordered and contiguous arrays of np.float64 and
330 |         X is not a scipy.sparse.csr_matrix, X and/or y may be copied.
331 | 
332 |         If X is a dense array, then the other methods will not support sparse
333 |         matrices as input.
334 | 
335 |         """
336 | 
337 |         if isinstance(self.k_bucket, str):
338 |             try:
339 |                 # using a new internal variable to retain user supplied param
340 |                 self._k_bucket = make_kernel_bucket(self.k_bucket)
341 |             except:
342 |                 raise ValueError('Input for k_func can only an instance of '
343 |                                  'KernelBucket or a sampling strategy to generate '
344 |                                  'one with make_kernel_bucket.'
345 |                                  'sampling strategy must be one of {}'
346 |                                  ''.format(cfg.kernel_bucket_strategies))
347 |         elif isinstance(self.k_bucket, KernelBucket):
348 |             self._k_bucket = deepcopy(self.k_bucket)
349 |         else:
350 |             raise ValueError('Input for k_func can only an instance of '
351 |                              'KernelBucket or a sampling strategy to generate '
352 |                              'one with make_kernel_bucket')
353 | 
354 |         self._train_X, self._train_y = check_X_y(X, y, y_numeric=True)
355 | 
356 |         self.opt_kernel_ = self._find_optimal_kernel()
357 | 
358 |         super().fit(X=self.opt_kernel_.full, y=self._train_y,
359 |                     sample_weight=sample_weight)
360 | 
361 |         # temporary hack to pass sklearn estimator checks till a bug is fixed
362 |         # for more see: https://github.com/scikit-learn/scikit-learn/issues/14712
363 |         self.n_iter_ = 1
364 | 
365 |         return self
366 | 
367 | 
368 |     def predict(self, X):
369 |         """
370 |         Perform classification on samples in X.
371 | 
372 |         For an one-class model, +1 or -1 is returned.
373 | 
374 |         Parameters
375 |         ----------
376 |         X : {array-like, sparse matrix}, shape (n_samples, n_features)
377 | 
378 |         Returns
379 |         -------
380 |         y_pred : array, shape (n_samples,)
381 |             Class labels for samples in X.
382 |         """
383 | 
384 |         if not hasattr(self, 'opt_kernel_'):
385 |             raise NotFittedError("Can't predict. Not fitted yet. Run .fit() first!")
386 | 
387 |         X = check_array(X)
388 | 
389 |         # sample_one must be test data to get the right shape for sklearn X
390 |         self.opt_kernel_.attach_to(sample_one=X, sample_two=self._train_X)
391 |         test_train_KM = self.opt_kernel_.full
392 |         predicted_y = super().predict(test_train_KM)
393 | 
394 |         # data type coversion is done in child class, esp. for classification
395 |         # return np.asarray(predicted_y, dtype=np.intp)
396 |         return predicted_y
397 | 
398 | 
399 |     @abstractmethod
400 |     def get_params(self, deep=True):
401 |         """returns all the relevant parameters for this estimator!"""
402 | 
403 |         # example code, for future reference
404 |         return {'k_bucket' : self.k_bucket,
405 |                 'method'   : self.method,
406 |                 'C'        : self.C,
407 |                 'epsilon'  : self.epsilon,
408 |                 'shrinking': self.shrinking,
409 |                 'tol'      : self.tol}
410 | 
411 | 
412 |     @abstractmethod
413 |     def set_params(self, **parameters):
414 |         """Param setter"""
415 | 
416 |         # example code, for future reference
417 |         for parameter, value in parameters.items():
418 |             if parameter in ('k_bucket', 'method',
419 |                              'C', 'epsilon', 'shrinking', 'tol'):
420 |                 setattr(self, parameter, value)
421 | 
422 |         return self
423 | 
424 | 
425 | class OptimalKernelSVR(BaseOptimalKernelMachine, SVR):
426 |     """
427 |     An estimator to learn the optimal kernel for a given sample and
428 |     build a support vector regressor based on this custom kernel.
429 | 
430 |     This class is wrapped around the sklearn SVR estimator to function as its
431 |     drop-in replacement, whose implementation is in turn based on LIBSVM.
432 | 
433 |     Parameters
434 |     ----------
435 | 
436 |     k_bucket : KernelBucket or str
437 |         An instance of KernelBucket that contains all the kernels to be compared,
438 |         or a string identifying the sampling_strategy which populates a KernelBucket.
439 | 
440 |     method : str
441 |         Scoring method to rank different kernels
442 | 
443 |     C : float, optional (default=1.0)
444 |         Penalty parameter C of the error term.
445 | 
446 |     epsilon : float, optional (default=0.1)
447 |          Epsilon in the epsilon-SVR model. It specifies the epsilon-tube
448 |          within which no penalty is associated in the training loss function
449 |          with points predicted within a distance epsilon from the actual
450 |          value.
451 | 
452 |     tol : float, optional (default=1e-3)
453 |         Tolerance for stopping criterion.
454 | 
455 |     shrinking : boolean, optional (default=True)
456 |         Whether to use the shrinking heuristic.
457 | 
458 | 
459 |     Attributes
460 |     ----------
461 |     support_ : array-like, shape = [n_SV]
462 |         Indices of support vectors.
463 | 
464 |     support_vectors_ : array-like, shape = [nSV, n_features]
465 |         Support vectors.
466 | 
467 |     dual_coef_ : array, shape = [1, n_SV]
468 |         Coefficients of the support vector in the decision function.
469 | 
470 |     coef_ : array, shape = [1, n_features]
471 |         Weights assigned to the features (coefficients in the primal
472 |         problem). This is only available in the case of a linear kernel.
473 | 
474 |         `coef_` is readonly property derived from `dual_coef_` and
475 |         `support_vectors_`.
476 | 
477 |     intercept_ : array, shape = [1]
478 |         Constants in decision function.
479 | 
480 |     """
481 | 
482 | 
483 |     def __init__(self,
484 |                  k_bucket='exhaustive',
485 |                  method='cv_risk',
486 |                  C=1.0,
487 |                  epsilon=0.1,
488 |                  shrinking=True,
489 |                  tol=1e-3):
490 |         """
491 | 
492 |         Parameters
493 |         ----------
494 |         k_bucket : KernelBucket or str
495 |             An instance of KernelBucket that contains all the kernels to be compared,
496 |             or a string identifying sampling strategy to populate a KernelBucket.
497 | 
498 |         method : str
499 |             Scoring method to rank different kernels
500 | 
501 |         C : float, optional (default=1.0)
502 |             Penalty parameter C of the error term.
503 | 
504 |         epsilon : float, optional (default=0.1)
505 |              Epsilon in the epsilon-SVR model. It specifies the epsilon-tube
506 |              within which no penalty is associated in the training loss function
507 |              with points predicted within a distance epsilon from the actual
508 |              value.
509 | 
510 |         shrinking : boolean, optional (default=True)
511 |             Whether to use the shrinking heuristic.
512 | 
513 |         tol : float, optional (default=1e-3)
514 |             Tolerance for stopping criterion.
515 | 
516 |         """
517 | 
518 |         # not init'ing SVC/SVR with kernel='precomputed' to avoid issues with
519 |         # cross_val_score and safe_split
520 |         super().__init__(C=C, epsilon=epsilon, shrinking=shrinking, tol=tol)
521 | 
522 |         self.k_bucket = k_bucket
523 |         self.method = method
524 |         self.C = C
525 |         self.epsilon = epsilon
526 |         self.shrinking = shrinking
527 |         self.tol = tol
528 | 
529 | 
530 |     def _find_optimal_kernel(self):
531 |         """Method to find the optimal kernel"""
532 | 
533 |         self._opt_ker_search_est_name = 'SVR'
534 | 
535 |         return find_optimal_kernel(self._k_bucket,
536 |                                    self._train_X, self._train_y,
537 |                                    method=self.method,
538 |                                    estimator_name=self._opt_ker_search_est_name)
539 | 
540 | 
541 |     def get_params(self, deep=True):
542 |         """returns all the relevant parameters for this estimator!"""
543 | 
544 |         return {'k_bucket' : self.k_bucket,
545 |                 'method'   : self.method,
546 |                 'C'        : self.C,
547 |                 'epsilon'  : self.epsilon,
548 |                 'shrinking': self.shrinking,
549 |                 'tol'      : self.tol}
550 | 
551 | 
552 |     def set_params(self, **parameters):
553 |         """Param setter"""
554 | 
555 |         for parameter, value in parameters.items():
556 |             if parameter in ('k_bucket', 'method',
557 |                              'C', 'epsilon', 'shrinking', 'tol'):
558 |                 setattr(self, parameter, value)
559 | 
560 |         return self
561 | 
562 | 
563 | class OptimalKernelSVC(BaseOptimalKernelMachine, SVC):
564 |     """
565 |     An estimator to learn the optimal kernel for a given sample and
566 |     build a support vector classifier based on this custom kernel.
567 | 
568 |     This class is wrapped around the sklearn SVC estimator to function as its
569 |     drop-in replacement, whose implementation is in turn based on LIBSVM.
570 | 
571 |     Parameters
572 |     ----------
573 | 
574 |     k_bucket : KernelBucket or str
575 |         An instance of KernelBucket that contains all the kernels to be compared,
576 |         or a string identifying the sampling_strategy which populates a KernelBucket.
577 | 
578 |     method : str
579 |         Scoring method to rank different kernels
580 | 
581 |     C : float, optional (default=1.0)
582 |         Penalty parameter C of the error term.
583 | 
584 |     tol : float, optional (default=1e-3)
585 |         Tolerance for stopping criterion.
586 | 
587 |     shrinking : boolean, optional (default=True)
588 |         Whether to use the shrinking heuristic.
589 | 
590 | 
591 |     Attributes
592 |     ----------
593 |     support_ : array-like, shape = [n_SV]
594 |         Indices of support vectors.
595 | 
596 |     support_vectors_ : array-like, shape = [nSV, n_features]
597 |         Support vectors.
598 | 
599 |     dual_coef_ : array, shape = [1, n_SV]
600 |         Coefficients of the support vector in the decision function.
601 | 
602 |     coef_ : array, shape = [1, n_features]
603 |         Weights assigned to the features (coefficients in the primal
604 |         problem). This is only available in the case of a linear kernel.
605 | 
606 |         `coef_` is readonly property derived from `dual_coef_` and
607 |         `support_vectors_`.
608 | 
609 |     intercept_ : array, shape = [1]
610 |         Constants in decision function.
611 | 
612 |     """
613 | 
614 | 
615 |     def __init__(self, k_bucket='exhaustive',
616 |                  method='cv_risk',
617 |                  C=1.0,
618 |                  shrinking=True,
619 |                  tol=1e-3):
620 |         """
621 |         SVC classifier trained with the sample-wise optimal kernel
622 | 
623 |         Parameters
624 |         ----------
625 |         k_bucket : KernelBucket or str
626 |             An instance of KernelBucket that contains all the kernels to be compared,
627 |             or a string identifying sampling strategy to populate a KernelBucket.
628 | 
629 |         method : str
630 |             Scoring method to rank different kernels
631 | 
632 |         C : float, optional (default=1.0)
633 |             Penalty parameter C of the error term.
634 | 
635 |         shrinking : boolean, optional (default=True)
636 |             Whether to use the shrinking heuristic.
637 | 
638 |         tol : float, optional (default=1e-3)
639 |             Tolerance for stopping criterion.
640 | 
641 |         """
642 | 
643 |         # not init'ing SVC/SVR with kernel='precomputed' to avoid issues with
644 |         # cross_val_score and safe_split
645 |         super().__init__(C=C, shrinking=shrinking, tol=tol)
646 | 
647 |         self.k_bucket = k_bucket
648 |         self.method = method
649 |         self.C = C
650 |         self.shrinking = shrinking
651 |         self.tol = tol
652 | 
653 | 
654 |     def _find_optimal_kernel(self):
655 |         """Method to find the optimal kernel"""
656 | 
657 |         self._opt_ker_search_est_name = 'SVC'
658 | 
659 |         return find_optimal_kernel(self._k_bucket,
660 |                                    self._train_X, self._train_y,
661 |                                    method=self.method,
662 |                                    estimator_name=self._opt_ker_search_est_name)
663 | 
664 | 
665 |     def predict(self, X):
666 |         """
667 |         Perform classification on samples in X.
668 | 
669 |         For an one-class model, +1 or -1 is returned.
670 | 
671 |         Parameters
672 |         ----------
673 |         X : {array-like, sparse matrix}, shape (n_samples, n_features)
674 | 
675 |         Returns
676 |         -------
677 |         y_pred : array, shape (n_samples,)
678 |             Class labels for samples in X.
679 |         """
680 | 
681 |         predicted_y = super().predict(X)
682 |         # casting output type to integers
683 |         return np.asarray(predicted_y, dtype=np.intp)
684 | 
685 | 
686 |     def get_params(self, deep=True):
687 |         """returns all the relevant parameters for this estimator!"""
688 | 
689 |         return {'k_bucket' : self.k_bucket,
690 |                 'method'   : self.method,
691 |                 'C'        : self.C,
692 |                 'shrinking': self.shrinking,
693 |                 'tol'      : self.tol}
694 | 
695 | 
696 |     def set_params(self, **parameters):
697 |         """Param setter"""
698 | 
699 |         for parameter, value in parameters.items():
700 |             if parameter in ('k_bucket', 'method',
701 |                              'C', 'shrinking', 'tol'):
702 |                 setattr(self, parameter, value)
703 | 
704 |         return self
705 | 


--------------------------------------------------------------------------------
/kernelmethods/categorical.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 
  3 | Module for categorical kernels
  4 | 
  5 | Please refer to the following papers and theses for more details:
  6 | 
  7 |  - Villegas García, Marco Antonio. "An investigation into new kernels for
  8 |    categorical variables." Master's thesis, Universitat Politècnica de Catalunya,
  9 |    2013.
 10 | 
 11 | 
 12 | """
 13 | 
 14 | import numpy as np
 15 | 
 16 | from kernelmethods.base import BaseKernelFunction
 17 | from kernelmethods.utils import check_input_arrays
 18 | from kernelmethods import config as cfg
 19 | 
 20 | 
 21 | class MatchCountKernel(BaseKernelFunction):
 22 |     """
 23 |     Categorical kernel measuring similarity via the number of matching categorical
 24 |     dimensions.
 25 | 
 26 |     Parameters
 27 |     ----------
 28 | 
 29 |     return_perc : bool
 30 |         If True, the return value would be normalized by the number of dimensions.
 31 | 
 32 |     References
 33 |     ----------
 34 | 
 35 |     Villegas García, Marco A., "An investigation into new kernels for categorical
 36 |     variables." Master's thesis, Universitat Politècnica de Catalunya, 2013.
 37 | 
 38 |     """
 39 | 
 40 | 
 41 |     def __init__(self,
 42 |                  return_perc=True,
 43 |                  skip_input_checks=False):
 44 |         """Constructor."""
 45 | 
 46 |         self.return_perc = return_perc
 47 |         if self.return_perc:
 48 |             super().__init__('MatchPerc')
 49 |         else:
 50 |             super().__init__('MatchCount')
 51 | 
 52 |         self.skip_input_checks = skip_input_checks
 53 | 
 54 | 
 55 |     def __call__(self, vec_c, vec_d):
 56 |         """
 57 |         Actual implementation of the kernel func.
 58 | 
 59 |         Parameters
 60 |         ----------
 61 | 
 62 |         vec_c, vec_d : array of equal-sized categorical variables
 63 | 
 64 |         """
 65 | 
 66 |         vec_c, vec_d = _check_categorical_arrays(vec_c, vec_d)
 67 | 
 68 |         if not np.issubdtype(vec_c.dtype, cfg.dtype_categorical) or \
 69 |             not np.issubdtype(vec_d.dtype, cfg.dtype_categorical):
 70 |             raise TypeError('Categorical kernels require str or unicode dtype')
 71 | 
 72 |         match_count = np.sum(vec_c==vec_d)
 73 | 
 74 |         if self.return_perc:
 75 |             return match_count / len(vec_d)
 76 |         else:
 77 |             return match_count
 78 | 
 79 | 
 80 |     def __str__(self):
 81 |         """human readable repr"""
 82 | 
 83 |         return self.name
 84 | 
 85 | 
 86 | def _check_categorical_arrays(x, y):
 87 |     """
 88 |     Ensures the inputs are
 89 |     1) 1D arrays (not matrices)
 90 |     2) with compatible size
 91 |     3) of categorical data type
 92 |     and hence are safe to operate on.
 93 | 
 94 |     This is a variation of utils.check_input_arrays() to accommodate the special
 95 |     needs for categorical dtype, where we do not have lists of
 96 |     originally numbers/bool data to be converted to strings, and assume they are
 97 |     categorical.
 98 | 
 99 |     Parameters
100 |     ----------
101 |     x : iterable
102 |     y : iterable
103 | 
104 |     Returns
105 |     -------
106 |     x : ndarray
107 |     y : ndarray
108 |     """
109 | 
110 |     x = _ensure_type_size(x, ensure_num_dim=1)
111 |     y = _ensure_type_size(y, ensure_num_dim=1)
112 | 
113 |     if x.size != y.size:
114 |         raise ValueError('x (n={}) and y (n={}) differ in size! '
115 |                          'They must be of same length'.format(x.size, y.size))
116 | 
117 |     return x, y
118 | 
119 | 
120 | def _ensure_type_size(array, ensure_num_dim=1):
121 |     """Checking type and size of arrays"""
122 | 
123 |     if not isinstance(array, np.ndarray):
124 |         array = np.squeeze(np.asarray(array))
125 | 
126 |     if array.ndim != ensure_num_dim:
127 |         raise ValueError('array must be {}-dimensional! '
128 |                          'It has {} dims with shape {} '
129 |                          ''.format(ensure_num_dim, array.ndim, array.shape))
130 | 
131 |     return array
132 | 


--------------------------------------------------------------------------------
/kernelmethods/config.py:
--------------------------------------------------------------------------------
 1 | from operator import add, mul
 2 | import numpy as np
 3 | 
 4 | class KernelMethodsException(Exception):
 5 |     """
 6 |     Generic exception to indicate invalid use of the ``kernelmethods`` library.
 7 | 
 8 |     Allows to distinguish improper use of KernelMatrix from other code exceptions
 9 |     """
10 |     pass
11 | 
12 | 
13 | class KMAccessError(KernelMethodsException):
14 |     """Exception to indicate invalid access to the kernel matrix elements!"""
15 |     pass
16 | 
17 | 
18 | class KMNormError(KernelMethodsException):
19 |     """Custom exception to indicate error during normalization of kernel matrix"""
20 |     pass
21 | 
22 | 
23 | class KMSetAdditionError(KernelMethodsException):
24 |     """Exception to indicate invalid addition of kernel matrix to a KernelSet"""
25 |     pass
26 | 
27 | 
28 | class KernelMethodsWarning(Warning):
29 |     """Custom warning to indicate kernelmethods-specific warning!"""
30 |     pass
31 | 
32 | 
33 | class Chi2NegativeValuesException(KernelMethodsException):
34 |     """Custom exception to indicate Chi^2 kernel requires non-negative values"""
35 |     pass
36 | 
37 | 
38 | VALID_KERNEL_MATRIX_OPS = ('sum', 'product', 'average')
39 | 
40 | OPER_KM_OPS = {'sum'    : add,
41 |                'product': mul}
42 | 
43 | 
44 | # default values and ranges
45 | 
46 | kernel_bucket_strategies = ('exhaustive', 'light', 'linear_only')
47 | # strategy: exhaustive
48 | default_degree_values_poly_kernel = (2, 3, 4)
49 | default_sigma_values_gaussian_kernel = tuple([2**exp for exp in range(-5, 6, 2)])
50 | default_gamma_values_laplacian_kernel = tuple([2**exp for exp in range(-5, 7, 2)])
51 | default_gamma_values_sigmoid_kernel = tuple([2**exp for exp in range(-5, 7, 2)])
52 | default_offset_values_sigmoid_kernel = tuple([-2.0, 1.0, 2.0])
53 | 
54 | # light
55 | light_degree_values_poly_kernel = (2, 3, )
56 | light_sigma_values_gaussian_kernel = tuple([2**exp for exp in range(-3, 3, 2)])
57 | light_gamma_values_laplacian_kernel = tuple([2**exp for exp in range(-3, 3, 2)])
58 | light_gamma_values_sigmoid_kernel = tuple([2**exp for exp in range(-3, 7, 2)])
59 | light_offset_values_sigmoid_kernel = tuple([1.0, ])
60 | 
61 | # ranking
62 | 
63 | VALID_RANKING_METHODS = ("align/corr", "cv_risk")
64 | 
65 | # controls the precision for kernel_matrix elements
66 | km_dtype = np.dtype('f8')
67 | 
68 | # categorical variables
69 | dtype_categorical = np.unicode_
70 | 
71 | 


--------------------------------------------------------------------------------
/kernelmethods/numeric_kernels.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from kernelmethods.base import BaseKernelFunction
  3 | from kernelmethods.config import Chi2NegativeValuesException
  4 | from kernelmethods.utils import _ensure_min_eps, check_input_arrays
  5 | 
  6 | 
  7 | # TODO special handling for sparse arrays
  8 | #   (e.g. custom dot product during kernel evaluation might be more efficient
  9 | 
 10 | 
 11 | class HadamardKernel(BaseKernelFunction):
 12 |     """Hadamard kernel function
 13 | 
 14 |     Formula::
 15 |         K_a(x, y) = \Sum_k {|x_k|^a * |y_k|^a} / {2*(|x_k|^a + |y_k|^a)}
 16 | 
 17 |     Alpha (a) must be non-zero.
 18 |     Hadamard kernel is not always PSD.
 19 | 
 20 |     Parameters
 21 |     ----------
 22 |     alpha : int
 23 |         degree to raise the inner product
 24 | 
 25 |     skip_input_checks : bool
 26 |         Flag to skip input validation to save time.
 27 |         Skipping validation is strongly discouraged for normal use,
 28 |         unless you know exactly what you are doing (expert users).
 29 | 
 30 |     Raises
 31 |     ------
 32 |     ValueError
 33 |         If Alpha is zero.
 34 | 
 35 |     """
 36 | 
 37 | 
 38 |     def __init__(self, alpha=3, skip_input_checks=False):
 39 |         """
 40 |         Constructor
 41 | 
 42 |         Parameters
 43 |         ----------
 44 |         alpha : int
 45 |             degree to raise the inner product
 46 | 
 47 |         skip_input_checks : bool
 48 |             Flag to skip input validation to save time.
 49 |             Skipping validation is strongly discouraged for normal use,
 50 |             unless you know exactly what you are doing (expert users).
 51 | 
 52 |         """
 53 | 
 54 |         super().__init__(name='Hadamard')
 55 | 
 56 |         if not np.isclose(alpha, 0.0):
 57 |             self.alpha = alpha
 58 |         else:
 59 |             raise ValueError('Alpha for Hadamard kernel must be non-zero')
 60 | 
 61 |         self.skip_input_checks = skip_input_checks
 62 | 
 63 | 
 64 |     def __call__(self, x, y):
 65 |         """Actual implementation of kernel func"""
 66 | 
 67 |         if not self.skip_input_checks:
 68 |             x, y = check_input_arrays(x, y, ensure_dtype=np.number)
 69 | 
 70 |         abs_x_a = np.power(np.abs(x), self.alpha)
 71 |         abs_y_a = np.power(np.abs(y), self.alpha)
 72 | 
 73 |         return np.dot((abs_x_a * abs_y_a), 2 * (abs_x_a + abs_y_a))
 74 | 
 75 | 
 76 |     def __str__(self):
 77 |         """human readable repr"""
 78 | 
 79 |         return "{}(alpha={})".format(self.name, self.alpha)
 80 | 
 81 | 
 82 | class PolyKernel(BaseKernelFunction):
 83 |     """Polynomial kernel function
 84 | 
 85 |     Formula::
 86 |         K(x, y) = ( b + gamma*<x, y> )^degree
 87 | 
 88 |     Parameters
 89 |     ----------
 90 |     degree : int
 91 |         degree to raise the inner product
 92 | 
 93 |     gamma : float
 94 |         scaling factor
 95 | 
 96 |     b : float
 97 |         intercept
 98 | 
 99 |     skip_input_checks : bool
100 |         Flag to skip input validation to save time.
101 |         Skipping validation is strongly discouraged for normal use,
102 |         unless you know exactly what you are doing (expert users).
103 |     """
104 | 
105 | 
106 |     def __init__(self, degree=3, gamma=1.0, b=1.0, skip_input_checks=False):
107 |         """
108 |         Constructor
109 | 
110 |         Parameters
111 |         ----------
112 |         degree : int
113 |             degree to raise the inner product
114 | 
115 |         b : float
116 |             intercept
117 | 
118 |         skip_input_checks : bool
119 |             Flag to skip input validation to save time.
120 |             Skipping validation is strongly discouraged for normal use,
121 |             unless you know exactly what you are doing (expert users).
122 | 
123 |         """
124 | 
125 |         super().__init__(name='polynomial')
126 | 
127 |         # TODO implement param check
128 |         self.degree = degree
129 |         self.gamma = gamma
130 |         self.b = b
131 | 
132 |         self.skip_input_checks = skip_input_checks
133 | 
134 | 
135 |     def __call__(self, x, y):
136 |         """Actual implementation of kernel func"""
137 | 
138 |         if not self.skip_input_checks:
139 |             x, y = check_input_arrays(x, y, ensure_dtype=np.number)
140 | 
141 |         return (self.b + self.gamma * np.dot(x, y)) ** self.degree
142 | 
143 | 
144 |     def __str__(self):
145 |         """human readable repr"""
146 | 
147 |         return "{}(degree={},gamma={},b={})".format(self.name, self.degree,
148 |                                                     self.gamma, self.b)
149 | 
150 | 
151 | class GaussianKernel(BaseKernelFunction):
152 |     """Gaussian kernel function
153 | 
154 |     Parameters
155 |     ----------
156 |     sigma : float
157 |         bandwidth
158 | 
159 |     skip_input_checks : bool
160 |         Flag to skip input validation to save time.
161 |         Skipping validation is strongly discouraged for normal use,
162 |         unless you know exactly what you are doing (expert users).
163 | 
164 |     """
165 | 
166 | 
167 |     def __init__(self, sigma=2.0, skip_input_checks=False):
168 |         """
169 |         Constructor
170 | 
171 |         Parameters
172 |         ----------
173 |         sigma : float
174 |             bandwidth
175 | 
176 |         skip_input_checks : bool
177 |             Flag to skip input validation to save time.
178 |             Skipping validation is strongly discouraged for normal use,
179 |             unless you know exactly what you are doing (expert users).
180 | 
181 |         """
182 | 
183 |         super().__init__(name='gaussian')
184 | 
185 |         # TODO implement param check
186 |         # ensuring values of gamma/gamma is eps or larger to avoid zero division
187 |         self.sigma = _ensure_min_eps(sigma)
188 |         self.gamma = _ensure_min_eps(1.0 / (2 * self.sigma ** 2))
189 | 
190 |         self.skip_input_checks = skip_input_checks
191 | 
192 | 
193 |     def __call__(self, x, y):
194 |         """Actual implementation of kernel func"""
195 | 
196 |         if not self.skip_input_checks:
197 |             x, y = check_input_arrays(x, y, ensure_dtype=np.number)
198 | 
199 |         return np.exp(-self.gamma * np.linalg.norm(x - y, ord=2) ** 2)
200 | 
201 | 
202 |     def __str__(self):
203 |         """human readable repr"""
204 | 
205 |         return "{}(sigma={})".format(self.name, self.sigma)
206 | 
207 | 
208 | class LaplacianKernel(BaseKernelFunction):
209 |     """Laplacian kernel function
210 | 
211 |     Parameters
212 |     ----------
213 |     gamma : float
214 |         scale factor
215 | 
216 |     skip_input_checks : bool
217 |         Flag to skip input validation to save time.
218 |         Skipping validation is strongly discouraged for normal use,
219 |         unless you know exactly what you are doing (expert users).
220 | 
221 |     """
222 | 
223 | 
224 |     def __init__(self, gamma=1.0, skip_input_checks=False):
225 |         """
226 |         Constructor
227 | 
228 |         Parameters
229 |         ----------
230 |         gamma : float
231 |             scale factor
232 | 
233 |         skip_input_checks : bool
234 |             Flag to skip input validation to save time.
235 |             Skipping validation is strongly discouraged for normal use,
236 |             unless you know exactly what you are doing (expert users).
237 | 
238 |         """
239 | 
240 |         super().__init__(name='laplacian')
241 | 
242 |         self.gamma = gamma
243 | 
244 |         self.skip_input_checks = skip_input_checks
245 | 
246 | 
247 |     def __call__(self, x, y):
248 |         """Actual implementation of kernel func"""
249 | 
250 |         if not self.skip_input_checks:
251 |             x, y = check_input_arrays(x, y, ensure_dtype=np.number)
252 | 
253 |         return np.exp(-self.gamma * np.sum(np.abs(x - y)))
254 | 
255 | 
256 |     def __str__(self):
257 |         """human readable repr"""
258 | 
259 |         return "{}(gamma={})".format(self.name, self.gamma)
260 | 
261 | 
262 | class Chi2Kernel(BaseKernelFunction):
263 |     """Chi-squared kernel function
264 | 
265 |     This kernel is implemented as::
266 | 
267 |         k(x, y) = exp(-gamma Sum [(x - y)^2 / (x + y)])
268 | 
269 |     x and y must have non-negative values (>=0).
270 | 
271 |     As a division is involved, when x+y is 0 or when x+y and x-y are both 0 for a
272 |     particular dimension, the division results in a NaN, which is currently
273 |     being ignored, by summing only non-NaN values. If your feature sets have many
274 |     zeros, you may want investigate the effect of this kernel on your dataset
275 |     carefully to ensure you understand this kernel meets your needs and
276 |     expectations.
277 | 
278 |     Parameters
279 |     ----------
280 |     gamma : float
281 |         scale factor
282 | 
283 |     skip_input_checks : bool
284 |         Flag to skip input validation to save time.
285 |         Skipping validation is strongly discouraged for normal use,
286 |         unless you know exactly what you are doing (expert users).
287 | 
288 |     """
289 | 
290 | 
291 |     def __init__(self, gamma=1.0, skip_input_checks=False):
292 |         """
293 |         Constructor
294 | 
295 |         Parameters
296 |         ----------
297 |         gamma : float
298 |             scale factor
299 | 
300 |         skip_input_checks : bool
301 |             Flag to skip input validation to save time.
302 |             Skipping validation is strongly discouraged for normal use,
303 |             unless you know exactly what you are doing (expert users).
304 | 
305 |         """
306 | 
307 |         super().__init__(name='chi2')
308 | 
309 |         self.gamma = gamma
310 | 
311 |         self.skip_input_checks = skip_input_checks
312 | 
313 | 
314 |     def __call__(self, x, y):
315 |         """Actual implementation of kernel func"""
316 | 
317 |         if not self.skip_input_checks:
318 |             x, y = check_input_arrays(x, y, ensure_dtype=np.float64)
319 | 
320 |         if (x < 0).any() or (y < 0).any():
321 |             raise Chi2NegativeValuesException(
322 |                 'Chi^2 kernel requires non-negative values!'
323 |                 ' x or y contains non-negative values')
324 | 
325 |         # Note: NaNs due to Zero division are being ignored via np.nansum!
326 |         value = np.exp(-self.gamma * np.nansum(np.power(x - y, 2) / (x + y)))
327 | 
328 |         return value
329 | 
330 | 
331 |     def __str__(self):
332 |         """human readable repr"""
333 | 
334 |         return "{}(gamma={})".format(self.name, self.gamma)
335 | 
336 | 
337 | class SigmoidKernel(BaseKernelFunction):
338 |     """
339 |     Sigmoid kernel function (also known as hyperbolic tangent kernel)
340 | 
341 |     NOTE: This kernel is not always PSD, and normalizing its kernel matrix can
342 |     result in numerical issues or errors.
343 | 
344 |     Parameters
345 |     ----------
346 |     gamma : float
347 |         scale factor
348 | 
349 |     offset : float
350 |         value of offset/bias
351 | 
352 |     skip_input_checks : bool
353 |         Flag to skip input validation to save time.
354 |         Skipping validation is strongly discouraged for normal use,
355 |         unless you know exactly what you are doing (expert users).
356 | 
357 |     """
358 | 
359 | 
360 |     def __init__(self, gamma=1.0, offset=1.0, skip_input_checks=False):
361 |         """
362 |         Constructor
363 | 
364 |         Parameters
365 |         ----------
366 |         gamma : float
367 |             scale factor
368 | 
369 |         offset : float
370 |             value of offset/bias
371 | 
372 |         skip_input_checks : bool
373 |             Flag to skip input validation to save time.
374 |             Skipping validation is strongly discouraged for normal use,
375 |             unless you know exactly what you are doing (expert users).
376 | 
377 |         """
378 | 
379 |         super().__init__(name='sigmoid')
380 | 
381 |         self.gamma = gamma
382 |         self.offset = offset
383 | 
384 |         self.skip_input_checks = skip_input_checks
385 | 
386 | 
387 |     def __call__(self, x, y):
388 |         """Actual implementation of kernel func"""
389 | 
390 |         if not self.skip_input_checks:
391 |             x, y = check_input_arrays(x, y, ensure_dtype=np.number)
392 | 
393 |         return np.tanh(self.offset + (self.gamma * np.dot(x, y)))
394 | 
395 | 
396 |     def __str__(self):
397 |         """human readable repr"""
398 | 
399 |         return "{}(gamma={},offset={})".format(self.name, self.gamma, self.offset)
400 | 
401 | 
402 | class LinearKernel(BaseKernelFunction):
403 |     """Linear kernel function
404 | 
405 |     Parameters
406 |     ----------
407 |     skip_input_checks : bool
408 |         Flag to skip input validation to save time.
409 |         Skipping validation is strongly discouraged for normal use,
410 |         unless you know exactly what you are doing (expert users).
411 |     """
412 | 
413 | 
414 |     def __init__(self, skip_input_checks=False):
415 |         """
416 |         Constructor
417 | 
418 |         Parameters
419 |         ----------
420 |         skip_input_checks : bool
421 |             Flag to skip input validation to save time.
422 |             Skipping validation is strongly discouraged for normal use,
423 |             unless you know exactly what you are doing (expert users).
424 | 
425 |         """
426 | 
427 |         super().__init__(name='linear')
428 |         self.skip_input_checks = skip_input_checks
429 | 
430 | 
431 |     def __call__(self, x, y):
432 |         """Actual implementation of kernel func"""
433 | 
434 |         if not self.skip_input_checks:
435 |             x, y = check_input_arrays(x, y, ensure_dtype=np.number)
436 | 
437 |         return x.dot(y.T)
438 | 
439 | 
440 |     def __str__(self):
441 |         """human readable repr"""
442 | 
443 |         return self.name
444 | 
445 | 
446 | DEFINED_KERNEL_FUNCS = (Chi2Kernel(),
447 |                         HadamardKernel(),
448 |                         PolyKernel(),
449 |                         GaussianKernel(),
450 |                         LaplacianKernel(),
451 |                         LinearKernel(),
452 |                         SigmoidKernel(),
453 |                         )
454 | 


--------------------------------------------------------------------------------
/kernelmethods/operations.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 | This module implements the common kernel operations such as
  5 | 
  6 |  - normalization of a kernel matrix (KM),
  7 |  - centering (one- and two-sample cases),
  8 |  - evaluating similarity, computing alignment,
  9 |  - frobenius norms,
 10 |  - linear combinations and
 11 |  - checking whether a KM is PSD.
 12 | 
 13 | API
 14 | ----
 15 | 
 16 | """
 17 | 
 18 | import traceback
 19 | from warnings import warn
 20 | 
 21 | import numpy as np
 22 | from kernelmethods.config import KMNormError, KernelMethodsException
 23 | from kernelmethods.utils import contains_nan_inf, ensure_ndarray_1D
 24 | from numpy import multiply as elem_wise_multiply
 25 | from scipy.linalg import LinAlgError, eigh
 26 | 
 27 | 
 28 | def is_positive_semidefinite(sym_matrix,
 29 |                              tolerance=1e-6,
 30 |                              verbose=False):
 31 |     """
 32 |     Tests whether a given matrix is positive-semidefinite (PSD).
 33 | 
 34 |     A symmetric matrix is PSD if ALL its eigen values >= 0 (non-negative).
 35 |     If any of its eigen values are negative, it is not PSD.
 36 | 
 37 |     This functions accounts for numerical instabilities with a tolerance parameter.
 38 | 
 39 |     This function can also be called with a shorthand ``is_PSD()``
 40 | 
 41 |     Parameters
 42 |     ----------
 43 |     sym_matrix : ndarray
 44 |         Matrix to be evaluted for PSDness
 45 | 
 46 |     tolerance : float
 47 |         Tolerance parameter to account for numerical instabilities in the eigen
 48 |         value computations (which can result in negative eigen values very slightly
 49 |         below 0)
 50 | 
 51 |     verbose : bool
 52 |         Flag to indicate whether to print traceback in case of errors
 53 |         during the computation of the eigen values
 54 | 
 55 |     Returns
 56 |     -------
 57 |     psd : bool
 58 |         Flag indicating whether the matrix is PSD.
 59 | 
 60 |     """
 61 | 
 62 |     if not isinstance(sym_matrix, np.ndarray):
 63 |         raise TypeError('Input matrix must be in numpy array format!')
 64 | 
 65 |     if sym_matrix.shape[0] != sym_matrix.shape[1]:
 66 |         warn('Input matrix is not square, and hence not PSD')
 67 |         return False
 68 | 
 69 |     if not np.isclose(sym_matrix, sym_matrix.T).all():
 70 |         warn('Input matrix is not symmetric, and hence not PSD')
 71 |         return False
 72 | 
 73 |     try:
 74 |         eig_values = eigh(sym_matrix, eigvals_only=True)
 75 |     except LinAlgError:
 76 |         if verbose:
 77 |             traceback.print_exc()
 78 |         # we are not actually raising LinAlgError, just using it to categorize as
 79 |         # not PSD. So, can't use test cases to try raise LinAlgError, so not
 80 |         # testable!
 81 |         print('LinAlgError raised - eigen value computation failed --> not PSD')
 82 |         psd = False
 83 |     except:
 84 |         if verbose:
 85 |             traceback.print_exc()
 86 |         warn('Unknown exception during eigen value computation --> not PSD')
 87 |         psd = False
 88 |     else:
 89 |         if verbose:
 90 |             print('Smallest eigen values are:\n'
 91 |                   '{}'.format(eig_values[:min(10, len(eig_values))]))
 92 |         if any(eig_values < -tolerance):  # notice the negative sign before tolerance
 93 |             psd = False
 94 |         else:
 95 |             psd = True
 96 | 
 97 |     return psd
 98 | 
 99 | 
100 | # shorter alias
101 | is_PSD = is_positive_semidefinite
102 | 
103 | 
104 | def center_km(KM):
105 |     """
106 |     Centers a given kernel matrix.
107 | 
108 |     Implements the definition according to Lemma 1 in Section 2.2 in
109 |     Cortes, Corinna, Mehryar Mohri, and Afshin Rostamizadeh, 2012, "Algorithms for
110 |     Learning Kernels Based on Centered Alignment", Journal of Machine Learning
111 |     Research 13(Mar): 795–828.
112 | 
113 |     Parameters
114 |     ----------
115 |     KM : ndarray
116 |         Symmetric matrix to be centered.
117 | 
118 |     Returns
119 |     -------
120 |     centered_km : ndarray
121 |         Centered kernel matrix
122 | 
123 |     """
124 | 
125 |     if isinstance(KM, np.ndarray):
126 |         if KM.shape[0] == KM.shape[1]:
127 |             n_rows = KM.shape[0]
128 |         else:
129 |             raise ValueError('Input matrix is not square!')
130 |     else:
131 |         raise ValueError('Unknown format for input matrix -'
132 |                          'must be a square numpy ndarray')
133 | 
134 |     # directly initializing one_oneT without going through unnecessary matrix
135 |     # products
136 |     #   vec_1s = np.ones((n_rows, 1)) # row vector of 1s
137 |     #   one_oneT = vec_1s.dot(vec_1s.T) # 1 dot 1T
138 |     one_oneT = np.ones((n_rows, n_rows))
139 |     Ic = np.eye(n_rows) - (one_oneT / n_rows)
140 | 
141 |     return Ic.dot(KM).dot(Ic)
142 | 
143 | 
144 | def normalize_km(KM, method='cosine'):
145 |     """
146 |     Normalize a kernel matrix to have unit diagonal.
147 | 
148 |     Cosine normalization normalizes the kernel matrix to have unit diagonal.
149 |     Implements definition according to Section 5.1 in book (Page 113)
150 |     Shawe-Taylor and Cristianini, "Kernels Methods for Pattern Analysis", 2004
151 | 
152 |     Matrix must be square (and coming from a single sample: K(X,X), not K(X,Y)
153 | 
154 |     Parameters
155 |     ----------
156 |     KM : ndarray
157 |         Symmetric matrix to be normalized
158 | 
159 |     method : str
160 |         Method of normalization. Options: ``cosine`` only.
161 | 
162 |     Returns
163 |     -------
164 |     normed_km : ndarray
165 |         Normalized kernel matrix
166 | 
167 |     """
168 | 
169 |     if KM.shape[0] != KM.shape[1]:
170 |         raise ValueError('Input kernel matrix must be square! '
171 |                          'i.e. K(X,X) must be generated from '
172 |                          'inner products on a single sample X, '
173 |                          'not an inner-product on two separate samples X and Y')
174 | 
175 |     try:
176 |         method = method.lower()
177 |         if method == 'cosine':
178 |             km_diag = KM.diagonal()
179 |             if np.isclose(km_diag, 0.0).any():
180 |                 raise KMNormError(
181 |                     'Some diagnoal entries in KM are [close to] zero - '
182 |                     ' this results in infinite or Nan values '
183 |                     'during Cosine normalization of KM!')
184 |             # D = diag(1./sqrt(diag(K)))
185 |             # normed_K = D * K * D;
186 |             _1bySqrtDiag = np.diagflat(1 / np.sqrt(km_diag))
187 |             # notice @ is matrix multiplication operator
188 |             normed_km = _1bySqrtDiag @ KM @ _1bySqrtDiag
189 |             # in case of two samples K(X, Y), the left- and right-most factors
190 |             #  must come from K(X,X) & K(Y,Y) respectively: see normalize_km_2sample
191 |         else:
192 |             raise NotImplementedError('normalization method {} is not implemented'
193 |                                       'yet!'.format(method))
194 |     except (KMNormError, KernelMethodsException):
195 |         raise
196 |     except:
197 |         warn('Unable to normalize kernel matrix using method {}'.format(method))
198 |         raise
199 |     else:
200 |         if contains_nan_inf(normed_km):
201 |             warn('normalization of kernel matrix resulted in Inf / NaN '
202 |                  'values - check your parameters and data!')
203 | 
204 |     return normed_km
205 | 
206 | 
207 | def normalize_km_2sample(cross_K_XY, diag_K_XX, diag_K_YY, method='cosine'):
208 |     """
209 |     Normalize a kernel matrix K(X,Y) to have unit diagonal.
210 | 
211 |     Cosine normalization normalizes the kernel matrix to have unit diagonal.
212 |     Implements definition _similar_ to Section 5.1 in book (Page 113)
213 |     Shawe-Taylor and Cristianini, "Kernels Methods for Pattern Analysis", 2004
214 | 
215 | 
216 |     Parameters
217 |     ----------
218 |     cross_K_XY : ndarray, 2D
219 |         Matrix of inner-products for samples from X onto Y i.e. K(X,Y)
220 | 
221 |     diag_K_XX : array
222 |         Diagonal from matrix of inner-products for samples from X onto itself i.e.
223 |         K(X,X)
224 |         K(X,X) must NOT be normalized (otherwise they will all be 1s)
225 | 
226 |     diag_K_YY : array
227 |         Diagonal from matrix of inner-products for samples from Y onto itself i.e.
228 |         K(Y,Y)
229 | 
230 |     Returns
231 |     -------
232 |     normed_km : ndarray
233 |         Normalized version of K(X,Y)
234 | 
235 |         NOTE: K_XY may NOT have unit diagonal, as k(x,y) != sqrt(k(x,x))*sqrt(k(y,y))
236 |     """
237 | 
238 |     if diag_K_XX.size != cross_K_XY.shape[0] or \
239 |         cross_K_XY.shape[1] != diag_K_YY.size:
240 |         raise ValueError('Shape mismatch for multiplication across the 3 kernel '
241 |                          'matrices! Length of diag_K_XX must match '
242 |                          'number of rows in K_XY, and number of columns in K_XY '
243 |                          'must match length of diag_K_XX.')
244 | 
245 |     method = method.lower()
246 |     if method == 'cosine':
247 |         if np.isclose(diag_K_XX, 0.0).any() or \
248 |             np.isclose(diag_K_YY, 0.0).any():
249 |             raise KMNormError(
250 |                 'Some diagnoal entries in one of the KMs are [close to] zero - '
251 |                 ' this results in infinite or Nan values '
252 |                 'during Cosine normalization of KM!')
253 | 
254 |         # using diagflat to explicitly construct a matrix from diag values
255 |         diag_factor_xx = np.diagflat(1 / np.sqrt(diag_K_XX))
256 |         diag_factor_yy = np.diagflat(1 / np.sqrt(diag_K_YY))
257 |         # notice @ is matrix multiplication operator
258 |         normed_km = diag_factor_xx @ cross_K_XY @ diag_factor_yy
259 |     else:
260 |         raise NotImplementedError('Two-sample normalization method {} is not'
261 |                                   'implemented yet!'.format(method))
262 | 
263 |     return normed_km
264 | 
265 | 
266 | def frobenius_product(A, B):
267 |     """
268 |     Computes the Frobenious product between two matrices of equal dimensions.
269 | 
270 |     <A, B>_F is equal to the sum of element-wise products between A and B.
271 | 
272 |     .. math::
273 |         <\mathbf{A}, \mathbf{B}>_F = \sum_{i, j} \mathbf{A}_{ij} \mathbf{B}_{ij}
274 | 
275 |     Parameters
276 |     ----------
277 |     A, B : ndarray
278 |         Two matrices of equal dimensions to compute the product.
279 | 
280 |     Returns
281 |     -------
282 |     product : float
283 |         Frobenious product
284 | 
285 |     """
286 | 
287 |     if A.shape != B.shape:
288 |         raise ValueError('Dimensions of the two matrices must be the same '
289 |                          'to compute Frobenious product! They differ: {}, {}'
290 |                          ''.format(A.shape, B.shape))
291 | 
292 |     return np.sum(elem_wise_multiply(A, B), axis=None)
293 | 
294 | 
295 | def frobenius_norm(A):
296 |     """Computes the Frobenius norm of a matrix A, which  is the square root of the
297 |     Frobenius product with itself.
298 | 
299 |     Parameters
300 |     ----------
301 |     A : ndarray
302 |         Matrix to compute the norm of
303 | 
304 |     Returns
305 |     -------
306 |     norm : float
307 |         Frobenious norm
308 | 
309 |     """
310 | 
311 |     return np.sqrt(frobenius_product(A, A))
312 | 
313 | 
314 | def alignment_centered(km_one, km_two,
315 |                        value_if_zero_division='raise',
316 |                        centered_already=False):
317 |     """
318 |     Computes the centered alignment between two kernel matrices
319 | 
320 |     (Alignment is computed on centered kernel matrices)
321 | 
322 |     Implements Definition 4 (Kernel matrix alignment) from Section 2.3 in Cortes,
323 |     Corinna, Mehryar Mohri, and Afshin Rostamizadeh, 2012, "Algorithms for
324 |     Learning Kernels Based on Centered Alignment", Journal of Machine Learning
325 |     Research 13(Mar): 795–828.
326 | 
327 |     Parameters
328 |     ----------
329 | 
330 |     km_one, km_two : KernelMatrix
331 | 
332 |     value_if_zero_division : str or float
333 |         determines the value of alignment, in case the norm of one of the two
334 |         kernel matrices is close to zero and we are unable to compute it.
335 | 
336 |         Default is 'raise', requesting to raise an exception.
337 | 
338 |         One could also choose 0.0, which assigns lowest alignment,  effectively
339 |         discarding it for ranking purposes.
340 | 
341 |     centered_already : bool
342 |         Flag to indicate whether the input kernel matrices are centered already
343 |         or not. If False, input KMs will be centered.
344 | 
345 |     Returns
346 |     -------
347 |     centered_alignment : float
348 |         Value of centered_alignment between the two kernel matrices
349 | 
350 |     """
351 | 
352 |     if km_one.shape != km_two.shape:
353 |         raise ValueError('Dimensions of the two matrices must be the same '
354 |                          'to compute their alignment! They differ: {}, {}'
355 |                          ''.format(km_one.shape, km_two.shape))
356 | 
357 |     if not isinstance(km_one, np.ndarray) or not isinstance(km_two, np.ndarray):
358 |         raise TypeError('Input KMs must be numpy arrays')
359 | 
360 |     if not centered_already:
361 |         kC_one = center_km(km_one)
362 |         kC_two = center_km(km_two)
363 |     else:
364 |         kC_one = km_one
365 |         kC_two = km_two
366 | 
367 |     fnorm_one = frobenius_norm(kC_one)
368 |     fnorm_two = frobenius_norm(kC_two)
369 | 
370 |     if np.isclose(fnorm_one, 0.0) or np.isclose(fnorm_two, 0.0):
371 |         if value_if_zero_division in ('raise', Exception):
372 |             raise ValueError('The Frobenius norm of KM1 or KM2 is 0. '
373 |                              'Can not compute alignment!')
374 |         else:
375 |             warn('The Frobenius norm of KM1 or KM2 is 0. Setting value of '
376 |                  'alignment as {} as requested'.format(
377 |                 value_if_zero_division))
378 |             return value_if_zero_division
379 | 
380 |     return frobenius_product(kC_one, kC_two) / (fnorm_one * fnorm_two)
381 | 
382 | 
383 | def eval_similarity(km_one, km_two):
384 |     """Evaluate similarity between two kernel matrices"""
385 | 
386 |     raise NotImplementedError()
387 | 
388 | 
389 | def linear_combination(km_set, weights, norm_weights=False):
390 |     """
391 |     Weighted linear combinations of a set of given kernel matrices
392 | 
393 |     Parameters
394 |     ----------
395 |     km_set : KernelSet
396 |         Collection of compatible kernel matrices
397 | 
398 |     weights : Iterable
399 |         Set of weights for the kernel matrices in km_set.
400 |         Weights are not checked to sum to 1.0. Use norm_weights=True if needed.
401 | 
402 |     norm_weights : bool
403 |         Flag to request normalizing weights to ensure they sum to 1.0
404 | 
405 |     Returns
406 |     -------
407 |     lin_comb_KM : ndarray
408 |         Final result of weighted linear combination of the kernel matrix set
409 | 
410 |     """
411 | 
412 |     if km_set.size == len(weights):
413 |         weights = ensure_ndarray_1D(weights)
414 |     else:
415 |         raise ValueError('Number of weights ({}) supplied differ '
416 |                          'from the kernel set size ({})'
417 |                          ''.format(km_set.size, len(weights)))
418 | 
419 |     if norm_weights:
420 |         denom = weights.sum()
421 |         if np.isclose(denom, 0.0):
422 |             raise RuntimeError('sum of weights == 0.0, unable to normalize!')
423 |         weights = weights / denom
424 | 
425 |     # Computes the weighted average kernel
426 |     # km_set.num_samples is a tuple (N, M) when operating on two samples
427 |     #   e.g. train x test
428 |     KM = np.zeros(km_set.num_samples)
429 |     for weight, km in zip(weights, km_set):
430 |         KM = KM + weight * km.full
431 | 
432 |     return KM
433 | 


--------------------------------------------------------------------------------
/kernelmethods/ranking.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 
  3 | Module gathering techniques and helpers to rank kernels using various methods and
  4 | metrics, such as
  5 | 
  6 |  - their target alignment,
  7 |  - performance in cross-validation
  8 | 
  9 | """
 10 | 
 11 | import numpy as np
 12 | from kernelmethods import config as cfg
 13 | from kernelmethods.sampling import KernelBucket
 14 | from kernelmethods.utils import min_max_scale
 15 | 
 16 | 
 17 | def find_optimal_kernel(kernel_bucket, sample, targets, method='align/corr',
 18 |                         **method_params):
 19 |     """
 20 |     Finds the optimal kernel for the current sample given their labels.
 21 | 
 22 |     Parameters
 23 |     ----------
 24 |     kernel_bucket : KernelBucket
 25 |         The collection of kernels to evaluate and rank
 26 | 
 27 |     sample : ndarray
 28 |         The dataset given kernel bucket to be evaluated on
 29 | 
 30 |     targets : ndarray
 31 |         Target labels for each point in the sample dataset
 32 | 
 33 |     method : str
 34 |         identifier for the metric to choose to rank the kernels
 35 | 
 36 |     Returns
 37 |     -------
 38 |     km : KernelMatrix
 39 |         Instance of KernelMatrix with the optimal kernel function
 40 | 
 41 |     """
 42 | 
 43 |     if not isinstance(kernel_bucket, KernelBucket):
 44 |         raise TypeError('Input is not of required type: KernelBucket')
 45 | 
 46 |     method = method.lower()
 47 |     if method not in cfg.VALID_RANKING_METHODS:
 48 |         raise NotImplementedError('Ranking method not recognized. Choose one of {}'
 49 |                                   ''.format(cfg.VALID_RANKING_METHODS))
 50 | 
 51 |     kernel_bucket.attach_to(sample=sample)
 52 |     metric = rank_kernels(kernel_bucket, targets, method=method, **method_params)
 53 | 
 54 |     return kernel_bucket[np.argmax(metric)]
 55 | 
 56 | 
 57 | def rank_kernels(kernel_bucket, targets, method='align/corr', **method_params):
 58 |     """
 59 |     Computes a given ranking metric for all the kernel matrices in the bucket.
 60 | 
 61 |     Choices for the method include: "align/corr", "cv_risk"
 62 | 
 63 |     Parameters
 64 |     ----------
 65 |     kernel_bucket : KernelBucket
 66 | 
 67 |     targets : Iterable
 68 |         target values of the sample attached to the bucket
 69 | 
 70 |     method : str
 71 |         Identifies one of the metrics: ``align/corr``, ``cv_risk``
 72 | 
 73 |     method_params : dict
 74 |         Additional parameters to be passed on to the method chosen above.
 75 | 
 76 |     Returns
 77 |     -------
 78 |     scores : ndarray
 79 |         Values of the ranking metrics computed for the kernel matrices in the bucket
 80 | 
 81 |     """
 82 | 
 83 |     method = method.lower()
 84 |     if method not in cfg.VALID_RANKING_METHODS:
 85 |         raise NotImplementedError('Ranking method not recognized. Choose one of {}'
 86 |                                   ''.format(cfg.VALID_RANKING_METHODS))
 87 | 
 88 |     if method in ("align/corr",):
 89 |         return alignment_ranking(kernel_bucket, targets, **method_params)
 90 |     elif method in ('cv_risk', 'cv'):
 91 |         return CV_ranking(kernel_bucket, targets, **method_params)
 92 | 
 93 | 
 94 | def CV_ranking(kernel_bucket, targets, num_folds=3, estimator_name='SVM'):
 95 |     """
 96 |     Ranks kernels by their performance measured via cross-validation (CV).
 97 | 
 98 |     Parameters
 99 |     ----------
100 |     kernel_bucket : KernelBucket
101 | 
102 |     targets : Iterable
103 |         target values of the sample attached to the bucket
104 | 
105 |     num_folds : int
106 |         Number of folds for the CV to be employed
107 | 
108 |     estimator_name : str
109 |         Name of a valid Scikit-Learn estimator. Default: ``SVM``
110 | 
111 |     Returns
112 |     -------
113 |     scores : ndarray
114 |         CV performance computed for the kernel matrices in the bucket
115 | 
116 |     """
117 | 
118 |     from sklearn.model_selection import GridSearchCV
119 | 
120 |     cv_scores = list()
121 |     for km in kernel_bucket:
122 |         estimator, param_grid = get_estimator(estimator_name)
123 |         gs = GridSearchCV(estimator=estimator,
124 |                           param_grid=param_grid,
125 |                           cv=num_folds)
126 |         gs.fit(km.full, targets)
127 |         cv_scores.append(gs.best_score_)
128 | 
129 |     # scaling helps compare across multiple metrics
130 |     return 100 * min_max_scale(cv_scores)
131 | 
132 | 
133 | def alignment_ranking(kernel_bucket, targets, **method_params):
134 |     """Method to rank kernels that depend on target alignment.
135 | 
136 |     .. note:
137 | 
138 |         To be implemented.
139 | 
140 |     """
141 | 
142 |     raise NotImplementedError()
143 | 
144 | 
145 | def get_estimator(learner_id='svm'):
146 |     """
147 |     Returns a valid kernel machine to become the base learner of the MKL methods.
148 | 
149 |     Base learner must be able to accept a precomputed kernel for fit/predict methods!
150 | 
151 |     Parameters
152 |     ----------
153 |     learner_id : str
154 |         Identifier for the estimator to be chosen.
155 |         Options: ``SVM`` and ``SVR``.
156 |         Default: ``SVM``
157 | 
158 |     Returns
159 |     -------
160 |     base_learner : Estimator
161 |         An sklearn estimator
162 | 
163 |     param_grid : dict
164 |         Parameter grid (sklearn format) for the chosen estimator.
165 | 
166 |     """
167 | 
168 |     # TODO hyper-param optimization needs to be incorporated somewhere!!
169 |     #   Perhaps by returning a GridSearchCV(base_learner) object or similar?
170 | 
171 |     learner_id = learner_id.lower()
172 |     if learner_id in ('svm', 'svc'):
173 |         from sklearn.svm import SVC
174 |         range_C = np.power(10.0, range(-6, 6))
175 |         param_grid = dict(C=range_C)
176 |         base_learner = SVC(kernel='precomputed', probability=True, C=10)
177 |     elif learner_id in ('svr',):
178 |         from sklearn.svm import SVR
179 |         range_C = np.power(10.0, range(-6, 6))
180 |         param_grid = dict(C=range_C)
181 |         base_learner = SVR(kernel='precomputed', C=10)
182 |     else:
183 |         raise NotImplementedError('Requested base learner {} is not implemented yet!'
184 |                                   ''.format(learner_id))
185 | 
186 |     return base_learner, param_grid
187 | 


--------------------------------------------------------------------------------
/kernelmethods/sampling.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | from warnings import warn
  3 | 
  4 | import numpy as np
  5 | from kernelmethods import config as cfg
  6 | from kernelmethods.base import BaseKernelFunction, KernelMatrix, KernelSet
  7 | from kernelmethods.config import KernelMethodsException, KernelMethodsWarning
  8 | from kernelmethods.numeric_kernels import (GaussianKernel, LaplacianKernel,
  9 |                                            LinearKernel, PolyKernel, SigmoidKernel)
 10 | from kernelmethods.operations import alignment_centered
 11 | from kernelmethods.utils import is_iterable_but_not_str
 12 | from scipy.stats.stats import pearsonr
 13 | 
 14 | 
 15 | class KernelBucket(KernelSet):
 16 |     """
 17 |     Class to generate and/or maintain a "bucket" of candidate kernels.
 18 | 
 19 |     Applications:
 20 | 
 21 |         1. to rank/filter/select kernels based on a given sample via many metrics
 22 |         2. to be defined.
 23 | 
 24 |     **Note**:
 25 |     1. Linear kernel is always added during init without your choosing.
 26 |     2. This is in contrast to Chi^2 kernel, which is not added to the bucket by
 27 |     default, as it requires positive feature values and may break default use for
 28 |     common applications. You can easily add Chi^2 or any other kernels via the
 29 |     ``add_parametrized_kernels`` method.
 30 | 
 31 | 
 32 |     Parameters
 33 |     ----------
 34 |     poly_degree_values : Iterable
 35 |         List of values for the degree parameter of the PolyKernel. One
 36 |         KernelMatrix will be added to the bucket for each value.
 37 | 
 38 |     rbf_sigma_values : Iterable
 39 |         List of values for the sigma parameter of the GaussianKernel. One
 40 |         KernelMatrix will be added to the bucket for each value.
 41 | 
 42 |     laplace_gamma_values : Iterable
 43 |         List of values for the gamma parameter of the LaplacianKernel. One
 44 |         KernelMatrix will be added to the bucket for each value.
 45 | 
 46 |     sigmoid_gamma_values : Iterable
 47 |         List of values for the gamma parameter of the SigmoidKernel. One
 48 |         KernelMatrix will be added to the bucket for each value.
 49 | 
 50 |     sigmoid_offset_values : Iterable
 51 |         List of values for the offset parameter of the SigmoidKernel. One
 52 |         KernelMatrix will be added to the bucket for each value.
 53 | 
 54 |     name : str
 55 |         String to identify the purpose or type of the bucket of kernels.
 56 |         Also helps easily distinguishing it from other buckets.
 57 | 
 58 |     normalize_kernels : bool
 59 |         Flag to indicate whether the kernel matrices need to be normalized
 60 | 
 61 |     skip_input_checks : bool
 62 |         Flag to indicate whether checks on input data (type, format etc) can
 63 |         be skipped. This helps save a tiny bit of runtime for expert uses when
 64 |         data types and formats are managed thoroughly in numpy. Default:
 65 |         False. Disable this only when you know exactly what you're doing!
 66 | 
 67 |     """
 68 | 
 69 | 
 70 |     def __init__(self,
 71 |                  poly_degree_values=cfg.default_degree_values_poly_kernel,
 72 |                  rbf_sigma_values=cfg.default_sigma_values_gaussian_kernel,
 73 |                  laplace_gamma_values=cfg.default_gamma_values_laplacian_kernel,
 74 |                  sigmoid_gamma_values=cfg.default_gamma_values_sigmoid_kernel,
 75 |                  sigmoid_offset_values=cfg.default_offset_values_sigmoid_kernel,
 76 |                  name='KernelBucket',
 77 |                  normalize_kernels=True,
 78 |                  skip_input_checks=False,
 79 |                  ):
 80 |         """
 81 |         Constructor.
 82 | 
 83 |         Parameters
 84 |         ----------
 85 |         poly_degree_values : Iterable
 86 |             List of values for the degree parameter of the PolyKernel. One
 87 |             KernelMatrix will be added to the bucket for each value.
 88 | 
 89 |         rbf_sigma_values : Iterable
 90 |             List of values for the sigma parameter of the GaussianKernel. One
 91 |             KernelMatrix will be added to the bucket for each value.
 92 | 
 93 |         laplace_gamma_values : Iterable
 94 |             List of values for the gamma parameter of the LaplacianKernel. One
 95 |             KernelMatrix will be added to the bucket for each value.
 96 | 
 97 |         sigmoid_gamma_values : Iterable
 98 |             List of values for the gamma parameter of the SigmoidKernel. One
 99 |             KernelMatrix will be added to the bucket for each value.
100 | 
101 |         sigmoid_offset_values : Iterable
102 |             List of values for the offset parameter of the SigmoidKernel. One
103 |             KernelMatrix will be added to the bucket for each value.
104 | 
105 |         name : str
106 |             String to identify the purpose or type of the bucket of kernels.
107 |             Also helps easily distinguishing it from other buckets.
108 | 
109 |         normalize_kernels : bool
110 |             Flag to indicate whether the kernel matrices need to be normalized
111 | 
112 |         skip_input_checks : bool
113 |             Flag to indicate whether checks on input data (type, format etc) can
114 |             be skipped. This helps save a tiny bit of runtime for expert uses when
115 |             data types and formats are managed thoroughly in numpy. Default:
116 |             False. Disable this only when you know exactly what you're doing!
117 | 
118 |         """
119 | 
120 |         if isinstance(normalize_kernels, bool):
121 |             self._norm_kernels = normalize_kernels
122 |         else:
123 |             raise TypeError('normalize_kernels must be bool')
124 | 
125 |         if isinstance(skip_input_checks, bool):
126 |             self._skip_input_checks = skip_input_checks
127 |         else:
128 |             raise TypeError('skip_input_checks must be bool')
129 | 
130 |         # start with the addition of kernel matrix for linear kernel
131 |         init_kset = [KernelMatrix(LinearKernel(), normalized=self._norm_kernels), ]
132 |         super().__init__(km_list=init_kset, name=name)
133 |         # not attached to a sample yet
134 |         self._num_samples = None
135 | 
136 |         self.add_parametrized_kernels(PolyKernel, 'degree', poly_degree_values)
137 |         self.add_parametrized_kernels(GaussianKernel, 'sigma', rbf_sigma_values)
138 |         self.add_parametrized_kernels(LaplacianKernel, 'gamma', laplace_gamma_values)
139 |         self.add_parametrized_kernels(SigmoidKernel, 'gamma', sigmoid_gamma_values)
140 |         self.add_parametrized_kernels(SigmoidKernel, 'offset', sigmoid_offset_values)
141 | 
142 | 
143 |     def add_parametrized_kernels(self, kernel_func, param, values):
144 |         """
145 |         Adds a list of kernels parametrized by various values for a given param
146 | 
147 |         Parameters
148 |         ----------
149 |         kernel_func : BaseKernelFunction
150 |             Kernel function to be added (not an instance, but callable class)
151 | 
152 |         param : str
153 |             Name of the parameter to the above kernel function
154 | 
155 |         values : Iterable
156 |             List of parameter values. One kernel will be added for each value
157 | 
158 |         """
159 | 
160 |         if (not isinstance(kernel_func, type)) or \
161 |             (not issubclass(kernel_func, BaseKernelFunction)):
162 |             raise KernelMethodsException('Input {} is not a valid kernel func!'
163 |                                          ' Must be derived from BaseKernelFunction'
164 |                                          ''.format(kernel_func))
165 | 
166 |         if values is None:
167 |             # warn('No values provided for {}. Doing nothing!'.format(param))
168 |             return
169 | 
170 |         if not is_iterable_but_not_str(values, min_length=1):
171 |             raise ValueError('values must be an iterable set of param values (n>=1)')
172 | 
173 |         for val in values:
174 |             try:
175 |                 param_dict = {param              : val,
176 |                               'skip_input_checks': self._skip_input_checks}
177 |                 self.append(KernelMatrix(kernel_func(**param_dict),
178 |                                          normalized=self._norm_kernels))
179 |             except:
180 |                 warn('Unable to add {} to the bucket for {}={}. Skipping it.'
181 |                      ''.format(kernel_func, param, val), KernelMethodsWarning)
182 | 
183 | 
184 | def make_kernel_bucket(strategy='exhaustive',
185 |                        normalize_kernels=True,
186 |                        skip_input_checks=False):
187 |     """
188 |     Generates a candidate kernels based on user preferences.
189 | 
190 |     Parameters
191 |     ----------
192 |     strategy : str
193 |         Name of the strategy for populating the kernel bucket.
194 |         Options: 'exhaustive' and 'light'. Default: 'exhaustive'
195 | 
196 |     normalize_kernels : bool
197 |         Flag to indicate whether to normalize the kernel matrices
198 | 
199 |     skip_input_checks : bool
200 |         Flag to indicate whether checks on input data (type, format etc) can
201 |         be skipped. This helps save a tiny bit of runtime for expert uses when
202 |         data types and formats are managed thoroughly in numpy. Default:
203 |         False. Disable this only when you know exactly what you're doing!
204 | 
205 |     Returns
206 |     -------
207 |     kb : KernelBucket
208 |         Kernel bucket populated according to the requested strategy
209 | 
210 |     """
211 | 
212 |     if isinstance(strategy, (KernelBucket, KernelSet)):
213 |         import warnings
214 |         warnings.warn('Input is already a kernel bucket/set - simply returning it!')
215 |         return strategy
216 | 
217 |     strategy = strategy.lower()
218 |     if strategy == 'exhaustive':
219 |         return KernelBucket(name='KBucketExhaustive',
220 |                             normalize_kernels=normalize_kernels,
221 |                             skip_input_checks=skip_input_checks,
222 |                             poly_degree_values=cfg.default_degree_values_poly_kernel,
223 |                             rbf_sigma_values=cfg.default_sigma_values_gaussian_kernel,
224 |                             laplace_gamma_values=cfg.default_gamma_values_laplacian_kernel,
225 |                             sigmoid_gamma_values=cfg.default_gamma_values_sigmoid_kernel,
226 |                             sigmoid_offset_values=cfg.default_offset_values_sigmoid_kernel)
227 |     elif strategy == 'light':
228 |         return KernelBucket(name='KBucketLight',
229 |                             normalize_kernels=normalize_kernels,
230 |                             skip_input_checks=skip_input_checks,
231 |                             poly_degree_values=cfg.light_degree_values_poly_kernel,
232 |                             rbf_sigma_values=cfg.light_sigma_values_gaussian_kernel,
233 |                             laplace_gamma_values=cfg.light_gamma_values_laplacian_kernel,
234 |                             sigmoid_gamma_values=cfg.light_gamma_values_sigmoid_kernel,
235 |                             sigmoid_offset_values=cfg.light_offset_values_sigmoid_kernel)
236 |     elif strategy == 'linear_only':
237 |         return KernelBucket(name='KBucketLight',
238 |                             normalize_kernels=normalize_kernels,
239 |                             skip_input_checks=skip_input_checks,
240 |                             poly_degree_values=None,
241 |                             rbf_sigma_values=None,
242 |                             laplace_gamma_values=None,
243 |                             sigmoid_gamma_values=None,
244 |                             sigmoid_offset_values=None)
245 |     else:
246 |         raise ValueError('Invalid choice of strategy '
247 |                          '- must be one of {}'.format(cfg.kernel_bucket_strategies))
248 | 
249 | 
250 | def ideal_kernel(targets):
251 |     """
252 |     Computes the kernel matrix from the given target labels.
253 | 
254 |     Parameters
255 |     ----------
256 |     targets : Iterable
257 |         Target values (``y``) to compute the ideal kernel from.
258 | 
259 |     Returns
260 |     -------
261 |     ideal_kernel : ndarray
262 |         The ideal kernel from (``yy\ :sup:`T` ``)
263 | 
264 |     """
265 | 
266 |     targets = np.array(targets).reshape((-1, 1))  # row vector
267 | 
268 |     return targets.dot(targets.T)
269 | 
270 | 
271 | def correlation_km(k1, k2):
272 |     """
273 |     Computes [pearson] correlation coefficient between two kernel matrices
274 | 
275 |     Parameters
276 |     ----------
277 |     k1, k2 : ndarray
278 |         Two kernel matrices of the same size
279 | 
280 |     Returns
281 |     -------
282 |     corr_coef : float
283 |         Correlation coefficient between the vectorized kernel matrices
284 | 
285 |     """
286 | 
287 |     corr_coef, p_val = pearsonr(k1.ravel(), k2.ravel())
288 | 
289 |     return corr_coef
290 | 
291 | 
292 | def pairwise_similarity(k_bucket, metric='corr'):
293 |     """
294 |     Computes the similarity between all pairs of kernel matrices in a given bucket.
295 | 
296 |     Parameters
297 |     ----------
298 |     k_bucket : KernelBucket
299 |         Container of length num_km, with each an instance ``KernelMatrix``
300 | 
301 |     metric : str
302 |         Identifies the metric to be used. Options: ``corr`` (correlation
303 |         coefficient) and ``align`` (centered alignment).
304 | 
305 |     Returns
306 |     -------
307 |     pairwise_metric : ndarray of shape (num_km, num_km)
308 |         A symmetric matrix computing the pairwise similarity between the various
309 |         kernel matrices
310 | 
311 |     """
312 | 
313 |     # mutual info?
314 |     metric_func = {'corr' : correlation_km,
315 |                    'align': partial(alignment_centered, value_if_zero_division=0.0)}
316 | 
317 |     num_kernels = k_bucket.size
318 |     estimator = metric_func[metric]
319 |     pairwise_metric = np.full((k_bucket.size, k_bucket.size), fill_value=np.nan)
320 |     for idx_one in range(num_kernels):
321 |         # kernel matrix is symmetric
322 |         for idx_two in range(idx_one, num_kernels): # computing i,i as well to be consistent
323 |             pairwise_metric[idx_one, idx_two] = estimator(k_bucket[idx_one].full,
324 |                                                           k_bucket[idx_two].full)
325 | 
326 |         # not computing diagonal entries (can also be set to 1 for some metrics)
327 | 
328 |     # making it symmetric
329 |     idx_lower_tri = np.tril_indices(num_kernels)
330 |     pairwise_metric[idx_lower_tri] = pairwise_metric.T[idx_lower_tri]
331 | 
332 |     return pairwise_metric
333 | 


--------------------------------------------------------------------------------
/kernelmethods/tests/test_algorithms.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | 
  3 | import numpy as np
  4 | from pytest import raises
  5 | from sklearn.datasets import make_classification
  6 | from sklearn.utils.estimator_checks import check_estimator
  7 | 
  8 | from kernelmethods.algorithms import (KernelMachine, KernelMachineRegressor,
  9 |                                       OptimalKernelSVC, OptimalKernelSVR)
 10 | from kernelmethods.config import (Chi2NegativeValuesException, KMNormError,
 11 |                                   KernelMethodsException, KernelMethodsWarning)
 12 | from kernelmethods.numeric_kernels import DEFINED_KERNEL_FUNCS
 13 | from kernelmethods.sampling import make_kernel_bucket
 14 | 
 15 | warnings.simplefilter('ignore')
 16 | 
 17 | rnd = np.random.RandomState(0)
 18 | np.set_printoptions(precision=3, linewidth=120)
 19 | 
 20 | sample_dim = 5
 21 | n_training = 100
 22 | n_testing = 30
 23 | 
 24 | all_warns = set()
 25 | warn_line = '{dashes} IGNORED WARNING {dashes}'.format(dashes='-' * 15)
 26 | dash_line = '-' * 50
 27 | 
 28 | 
 29 | def gen_random_sample(num_samples, sample_dim):
 30 |     """To better control precision and type of floats"""
 31 | 
 32 |     # TODO input sparse arrays for test
 33 |     return np.random.rand(num_samples, sample_dim)
 34 | 
 35 | 
 36 | def warn_dev(msg):
 37 |     if msg not in all_warns:
 38 |         print('\n\n{}\n  {}\n{}\n'.format(warn_line, msg, dash_line))
 39 |         all_warns.add(msg)
 40 | 
 41 | 
 42 | def _test_estimator_can_fit_predict(estimator, est_name=None):
 43 |     # fresh data for each call
 44 |     train_data, labels = make_classification(n_features=sample_dim,
 45 |                                              n_samples=n_training)
 46 |     test_data = gen_random_sample(n_testing, sample_dim)
 47 | 
 48 |     if hasattr(estimator, 'k_func') and 'chi2' in estimator.k_func.name:
 49 |         train_data = np.abs(train_data)
 50 |         test_data = np.abs(test_data)
 51 | 
 52 |     if est_name is None:
 53 |         est_name = str(estimator.__class__)
 54 | 
 55 |     try:
 56 |         check_estimator(estimator)
 57 |     except (KMNormError, Chi2NegativeValuesException,
 58 |             KernelMethodsException, KernelMethodsWarning,
 59 |             RuntimeError) as kme:
 60 |         warn_dev('KernelMethodsException encountered during estimator checks - '
 61 |                  'ignoring it!\n Estimator: {}'.format(est_name))
 62 |         # traceback.print_exc()
 63 |         # pass
 64 |     except Exception as exc:
 65 |         exc_msg = str(exc)
 66 |         # Given unresolved issues with sklearn estimator checks, not enforcing them!
 67 |         if '__dict__' in exc_msg:
 68 |             warn_dev('Ignoring the sklearn __dict__ check')
 69 |             pass
 70 |         elif 'not greater than' in exc_msg:
 71 |             warn_dev('Ignoring accuracy check from sklearn')
 72 |         elif "the number of features at training time" in exc_msg:
 73 |             if 'OptimalKernel' in est_name:
 74 |                 warn_dev('Ignoring shape mismatch between train and test for '
 75 |                          'OptimalKernel estimators (need for two-sample KM product)')
 76 |         else:
 77 |             raise exc
 78 |             # raise TypeError('atypical failed check for {}\nMessage: {}\n'
 79 |             #                 ''.format(est_name, exc_msg))
 80 | 
 81 |     # try:
 82 |     #     with warnings.catch_warnings():
 83 |     #         warnings.simplefilter("ignore")
 84 |     #         estimator.fit(train_data, labels)
 85 |     # except:
 86 |     #     raise RuntimeError('{} is unable to fit to training data!'.format(
 87 |     #     est_name))
 88 |     #
 89 |     # try:
 90 |     #     estimator.predict(test_data)
 91 |     # except:
 92 |     #     raise RuntimeError('{} is unable to make predictions'.format(est_name))
 93 | 
 94 | 
 95 | def test_optimal_kernel_estimators():
 96 |     train_data, labels = make_classification(n_features=sample_dim, n_classes=2,
 97 |                                              n_samples=n_training)
 98 |     test_data = gen_random_sample(n_testing, sample_dim)
 99 | 
100 |     # creating the smallest bucket, just with linear kernel, to speed up tests
101 |     kb = make_kernel_bucket(strategy='linear_only')
102 | 
103 |     for OKEstimator in (OptimalKernelSVC, OptimalKernelSVR,):
104 | 
105 |         try:
106 |             ok_est = OKEstimator(k_bucket=kb)
107 |         except:
108 |             raise RuntimeError('Unable to instantiate OptimalKernelSVR!')
109 | 
110 |         # disabling sklearn checks to avoid headaches with their internal checks
111 |         _test_estimator_can_fit_predict(ok_est)
112 | 
113 |         for invalid_value in (np.random.randint(10), 10.1, ('tuple')):
114 |             with raises(ValueError):
115 |                 ok_est = OKEstimator(k_bucket=invalid_value)
116 |                 ok_est.fit(train_data, labels)
117 | 
118 |         ok_est = OKEstimator(k_bucket=kb)
119 |         ok_est.set_params(k_bucket=kb)
120 | 
121 | 
122 | def test_kernel_machine():
123 |     for ker_func in DEFINED_KERNEL_FUNCS:
124 |         for ker_machine in (KernelMachine, KernelMachineRegressor):
125 |             # print('\n\nTesting {}'.format(kernel))
126 |             try:
127 |                 k_machine = ker_machine(ker_func)
128 |             except:
129 |                 raise RuntimeError('Unable to instantiate KernelMachine '
130 |                                    'with this this ker func {}!'.format(ker_func))
131 | 
132 |             # print('\n{}'.format(k_machine))
133 |             try:
134 |                 _test_estimator_can_fit_predict(
135 |                     k_machine, 'kernel machine with ' + str(ker_func))
136 |             except Exception as exc:
137 |                 raise
138 | 
139 | 


--------------------------------------------------------------------------------
/kernelmethods/tests/test_base_classes.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | from pytest import raises
  4 | 
  5 | from kernelmethods.base import (AverageKernel, BaseKernelFunction, CompositeKernel,
  6 |                                 KernelFromCallable, KernelMatrix,
  7 |                                 KernelMatrixPrecomputed, ProductKernel,
  8 |                                 SumKernel, WeightedAverageKernel)
  9 | from kernelmethods.config import KMAccessError
 10 | from kernelmethods.numeric_kernels import (GaussianKernel, LaplacianKernel,
 11 |                                            LinearKernel, PolyKernel)
 12 | from kernelmethods.sampling import make_kernel_bucket
 13 | from kernelmethods.tests.test_numeric_kernels import _test_for_all_kernels
 14 | 
 15 | default_feature_dim = 10
 16 | range_feature_dim = [10, 500]
 17 | range_num_samples = [50, 500]
 18 | num_samples = np.random.randint(20)
 19 | sample_dim = np.random.randint(10)
 20 | range_polynomial_degree = [2, 10] # degree=1 is tested in LinearKernel()
 21 | 
 22 | np.random.seed(42)
 23 | 
 24 | # choosing skip_input_checks=False will speed up test runs
 25 | # default values for parameters
 26 | SupportedKernels = (GaussianKernel(), PolyKernel(), LinearKernel(),
 27 |                     LaplacianKernel())
 28 | num_tests_psd_kernel = 3
 29 | 
 30 | def gen_random_array(dim):
 31 |     """To better control precision and type of floats"""
 32 | 
 33 |     # TODO input sparse arrays for test
 34 |     return np.random.rand(dim)
 35 | 
 36 | def gen_random_sample(num_samples, sample_dim):
 37 |     """To better control precision and type of floats"""
 38 | 
 39 |     # TODO input sparse arrays for test
 40 |     return np.random.rand(num_samples, sample_dim)
 41 | 
 42 | km_lin = KernelMatrix(kernel=LinearKernel())
 43 | km_lin.attach_to(gen_random_sample(num_samples, sample_dim))
 44 | 
 45 | def simple_callable(x, y):
 46 |     return np.dot(x, y)
 47 | 
 48 | def test_kernel_from_callable():
 49 | 
 50 |     kf = KernelFromCallable(simple_callable)
 51 |     if not isinstance(kf, BaseKernelFunction):
 52 |         raise TypeError('Error in implementation of KernelFromCallable')
 53 | 
 54 |     _test_for_all_kernels(kf, 5)
 55 | 
 56 | 
 57 | def test_KernelMatrix_design():
 58 | 
 59 |     with raises(TypeError):
 60 |         km = KernelMatrix(kernel=simple_callable)
 61 | 
 62 |     with raises(TypeError):
 63 |         km = KernelMatrix(kernel=LinearKernel, normalized='True')
 64 | 
 65 |     assert len(km_lin) == num_samples**2
 66 | 
 67 |     colon_access = km_lin[:,:]
 68 |     if colon_access.size != km_lin.size:
 69 |         raise ValueError('error in getitem implementation when using [:, :]')
 70 | 
 71 |     _ = km_lin[1, :]
 72 |     _ = km_lin[:, 1]
 73 |     for invalid_index in (-1, np.Inf, np.NaN):
 74 |         with raises(KMAccessError):
 75 |             _ = km_lin[:, invalid_index]
 76 | 
 77 | 
 78 | def test_centering():
 79 | 
 80 |     km = KernelMatrix(kernel=LinearKernel())
 81 |     km.attach_to(gen_random_sample(num_samples, sample_dim))
 82 |     km.center()
 83 | 
 84 | 
 85 | def test_normalize():
 86 | 
 87 |     km = KernelMatrix(kernel=LinearKernel())
 88 |     km.attach_to(gen_random_sample(num_samples, sample_dim))
 89 |     km.normalize()
 90 | 
 91 | 
 92 | def test_KM_results_in_NaN_Inf():
 93 |     """"""
 94 |     pass
 95 | 
 96 | 
 97 | def test_km_precomputed():
 98 | 
 99 |     rand_size = np.random.randint(5, 50)
100 |     rand_matrix = np.random.rand(rand_size, rand_size)
101 |     # making symmetric
102 |     rand_matrix = rand_matrix + rand_matrix.T
103 |     pre = KernelMatrixPrecomputed(rand_matrix, name='rand')
104 | 
105 |     assert pre.size == rand_size == len(pre)
106 |     assert np.isclose(pre.full, rand_matrix).all()
107 |     assert np.isclose(pre.diag, rand_matrix.diagonal()).all()
108 |     # __getitem__
109 |     for _ in range(min(5, rand_size)):
110 |         indices = np.random.randint(0, rand_size, 2)
111 |         assert pre[indices[0], indices[1]] == rand_matrix[indices[0], indices[1]]
112 | 
113 |     with raises(ValueError): # not symmtric
114 |         pre = KernelMatrixPrecomputed(np.random.rand(rand_size, rand_size+1))
115 | 
116 |     with raises(ValueError):
117 |         pre = KernelMatrixPrecomputed([[1, 2], [2, 3, 4, 9]])
118 | 
119 |     # 3D or 1D
120 |     with raises(ValueError):
121 |         pre = KernelMatrixPrecomputed(np.random.rand(rand_size, rand_size, 2))
122 | 
123 |     with raises(ValueError):
124 |         pre = KernelMatrixPrecomputed(np.random.rand(rand_size))
125 | 
126 |     # must have real values
127 |     with raises(ValueError):
128 |         pre = KernelMatrixPrecomputed([[1, 2+4j], [9+2j, 3]])
129 | 
130 |     with raises(KMAccessError):
131 |         _= pre[np.Inf, 0]
132 | 
133 | 
134 | def test_composite_kernels():
135 | 
136 |     kset = make_kernel_bucket()
137 |     kset.attach_to(gen_random_sample(num_samples, sample_dim))
138 | 
139 |     for ck in (AverageKernel, SumKernel, WeightedAverageKernel, ProductKernel):
140 | 
141 |         if issubclass(ck, WeightedAverageKernel):
142 |             result_km = ck(kset, np.random.rand(kset.size))
143 |         else:
144 |             result_km = ck(kset)
145 | 
146 |         if not isinstance(result_km, CompositeKernel):
147 |             raise TypeError(' Composite kernel {} not defined properly: '
148 |                             'it must be a child of {}'
149 |                             ''.format(result_km, CompositeKernel))
150 | 
151 |         result_km.fit()
152 | 
153 |         reqd_attrs = ('composite_KM', 'full')
154 |         for reqd in reqd_attrs:
155 |             if not hasattr(result_km, reqd):
156 |                 raise TypeError('{} does not have attr {}'.format(result_km, reqd))
157 | 


--------------------------------------------------------------------------------
/kernelmethods/tests/test_categorical.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import string
  3 | import traceback
  4 | from numbers import Number
  5 | 
  6 | import numpy as np
  7 | from hypothesis import (HealthCheck, given, settings as hyp_settings, strategies)
  8 | from pytest import raises
  9 | 
 10 | from kernelmethods.base import KernelMatrix
 11 | from kernelmethods.categorical import MatchCountKernel
 12 | from kernelmethods.config import dtype_categorical
 13 | from kernelmethods.operations import is_positive_semidefinite
 14 | from kernelmethods.utils import check_callable
 15 | 
 16 | default_feature_dim = 10
 17 | range_feature_dim = [10, 500]
 18 | range_num_samples = [50, 500]
 19 | range_string_length = [3, 25]
 20 | 
 21 | np.random.seed(42)
 22 | 
 23 | # choosing skip_input_checks=False will speed up test runs
 24 | # default values for parameters
 25 | SupportedKernels = (MatchCountKernel(),)
 26 | num_tests_psd_kernel = 3
 27 | 
 28 | 
 29 | def random_string(length=5):
 30 |     return ''.join(random.choices(string.ascii_letters, k=length))
 31 | 
 32 | 
 33 | def gen_random_categorical_array(dim, length):
 34 |     """To better control precision and type of floats"""
 35 | 
 36 |     return np.array([random_string(length) for _ in range(dim)],
 37 |                     dtype=dtype_categorical)
 38 | 
 39 | 
 40 | def gen_random_sample(num_samples, sample_dim, string_length):
 41 |     """To better control precision and type of floats"""
 42 | 
 43 |     return np.array([gen_random_categorical_array(sample_dim, string_length) for
 44 |                      _ in range(num_samples)])
 45 | 
 46 | 
 47 | def _test_for_all_kernels(kernel, sample_dim, string_length):
 48 |     """Common tests that all kernels must pass."""
 49 | 
 50 |     x = gen_random_categorical_array(sample_dim, string_length)
 51 |     y = gen_random_categorical_array(sample_dim, string_length)
 52 | 
 53 |     try:
 54 |         result = kernel(x, y)
 55 |     except Exception:
 56 |         traceback.print_exc()
 57 |         raise RuntimeError('{} unable to calculate!\n'
 58 |                            ' on x {}\n y{}'.format(kernel, x, y))
 59 | 
 60 |     if not isinstance(result, Number):
 61 |         raise ValueError('result {} of type {} is not a number!\n'
 62 |                          'x={}\ny={}\nkernel={}\n'
 63 |                          ''.format(result, type(result), x, y, kernel))
 64 | 
 65 |     if kernel(y, x) != result:
 66 |         raise ValueError('{} is not symmetric!'
 67 |                          'x={}\n y={}\n kernel={}\n'.format(kernel.name, x, y,
 68 |                                                             kernel))
 69 | 
 70 | 
 71 | def test_kernel_design():
 72 |     """
 73 |     Every kernel must be
 74 |     1. must have a name defined
 75 |     2. must be callable with two samples
 76 |     3. returns a number
 77 | 
 78 |     """
 79 | 
 80 |     for kernel in SupportedKernels:
 81 | 
 82 |         # must be callable with 2 args
 83 |         check_callable(kernel, min_num_args=2)
 84 | 
 85 |         if not hasattr(kernel, 'name'):
 86 |             raise TypeError('{} does not have name attribute!'.format(kernel))
 87 | 
 88 |         # only numeric data is accepted and other dtypes must raise an error
 89 |         for non_catg in [(True, False, True),
 90 |                          [1.0, 2.4],
 91 |                          [object, object]]:
 92 |             with raises(TypeError):
 93 |                 _ = kernel(non_catg, non_catg)
 94 | 
 95 | 
 96 | def _test_func_is_valid_kernel(kernel, sample_dim, num_samples, string_length):
 97 |     """A func is a valid kernel if the kernel matrix generated by it is PSD.
 98 | 
 99 |     Not including this in tests for all kernels to allow for non-PSD kernels in
100 |     the future
101 | 
102 |     """
103 | 
104 |     KM = KernelMatrix(kernel, name='TestKM')
105 |     KM.attach_to(gen_random_sample(num_samples, sample_dim, string_length))
106 |     is_psd = is_positive_semidefinite(KM.full, verbose=True)
107 |     if not is_psd:
108 |         raise ValueError('{} is not PSD'.format(str(KM)))
109 | 
110 | 
111 | @hyp_settings(max_examples=num_tests_psd_kernel, deadline=None,
112 |               suppress_health_check=HealthCheck.all())
113 | @given(strategies.integers(range_feature_dim[0], range_feature_dim[1]),
114 |        strategies.integers(range_num_samples[0], range_num_samples[1]),
115 |        strategies.integers(range_string_length[0], range_string_length[1]),
116 |        strategies.booleans())
117 | def test_match_count_kernel(sample_dim, num_samples, string_length, perc_flag):
118 |     """Tests specific for Polynomial kernel."""
119 | 
120 |     poly = MatchCountKernel(return_perc=perc_flag, skip_input_checks=False)
121 |     _test_for_all_kernels(poly, sample_dim, string_length)
122 |     _test_func_is_valid_kernel(poly, sample_dim, num_samples, string_length)
123 | 


--------------------------------------------------------------------------------
/kernelmethods/tests/test_kernel_matrix.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import numpy as np
  4 | np.set_printoptions(linewidth=120, precision=4)
  5 | from scipy.sparse import issparse
  6 | from scipy.linalg import eigh
  7 | from pytest import raises
  8 | from kernelmethods.numeric_kernels import PolyKernel, GaussianKernel, LinearKernel, \
  9 |     DEFINED_KERNEL_FUNCS
 10 | from kernelmethods import KernelMatrix, KMAccessError, KernelMethodsException
 11 | from kernelmethods.base import ConstantKernelMatrix
 12 | from kernelmethods.operations import is_PSD
 13 | 
 14 | num_samples = np.random.randint(30, 100)
 15 | sample_dim = np.random.randint(3, 10) # 2
 16 | target_label_set = [1, 2]
 17 | 
 18 | num_samples_two = np.random.randint(30, 100)
 19 | sample_two_dim = sample_dim
 20 | 
 21 | sample_data = np.random.rand(num_samples, sample_dim)
 22 | target_labels = np.random.choice(target_label_set, num_samples)
 23 | 
 24 | poly = PolyKernel(degree=2, skip_input_checks=True)
 25 | # suffix 1 to indicate one sample case
 26 | km1 = KernelMatrix(poly)
 27 | km1.attach_to(sample_data)
 28 | 
 29 | max_num_elements = max_num_ker_eval = num_samples * (num_samples + 1) / 2
 30 | 
 31 | def test_symmetry():
 32 | 
 33 |     if not np.isclose(km1.full, km1.full.T).all():
 34 |         print('KM not symmetric')
 35 | 
 36 | def test_PSD():
 37 | 
 38 |     if not is_PSD(km1.full):
 39 |         raise ValueError('this kernel matrix is not PSD!')
 40 | 
 41 | def test_normalization():
 42 | 
 43 |     km1.normalize(method='cosine')
 44 |     if not hasattr(km1, 'normed_km'):
 45 |         raise ValueError('Attribute exposing normalized km does not exist!')
 46 | 
 47 |     if not np.isclose(km1.normed_km.diagonal(), 1.0).all():
 48 |         raise ValueError('One or more diagonal elements of normalized KM != 1.0:\n\t'
 49 |                          '{}'.format(km1.normed_km.diagonal()))
 50 | 
 51 |     km2 = KernelMatrix(poly)
 52 |     km2.attach_to(sample_data)
 53 |     normed_km = km2.normed_km
 54 |     assert normed_km.shape == km2.shape
 55 | 
 56 |     frob = km1.frob_norm
 57 |     assert np.isreal(frob)
 58 | 
 59 |     # during init
 60 |     with raises(TypeError):
 61 |         _ = KernelMatrix(poly, normalized='True')
 62 | 
 63 | def test_centering():
 64 | 
 65 |     km2 = KernelMatrix(poly)
 66 |     km2.attach_to(sample_data)
 67 |     assert km2.centered.shape == km2.shape
 68 | 
 69 | def test_get_item():
 70 | 
 71 |     for invalid_index in [-1, num_samples+1]:
 72 |         # out of range indices must raise an error on any dim
 73 |         with raises(KMAccessError):
 74 |             print(km1[invalid_index, :])
 75 |         with raises(KMAccessError):
 76 |             print(km1[:, invalid_index])
 77 | 
 78 |     # max 2 dims allowed for access
 79 |     # TODO no restriction on float: float indices will be rounded down towards 0
 80 |     # (1.0, 2), (1, 3.5) are valid at the moment
 81 |     for invalid_access in [(2  , 4, 5), (5,),
 82 |                            ('1', 1), (2, 'efd'),
 83 |                            ( ((0, 1), 2), (3, 4)), # no tuple of tuples for a single dim
 84 |                            ]:
 85 |         with raises((KMAccessError, TypeError)):
 86 |             print(km1[invalid_access])
 87 | 
 88 |     with raises(KMAccessError):
 89 |         km1[1, 2, 3] # no 3-dim access
 90 | 
 91 |     with raises(KMAccessError):
 92 |         km1[1, 2, 3, 4] # no 4-dim access either
 93 | 
 94 |     # selection must result in valid indices
 95 |     with raises(KMAccessError):
 96 |         km1[0,km1.size+5]
 97 | 
 98 |     with raises(KMAccessError):
 99 |         km1[km1.size + 5, 0]
100 | 
101 |     # linear indexing is now allowed
102 |     for valid_index in np.random.randint(0, km1.size, 5):
103 |         _ = km1[valid_index]
104 | 
105 |     # as well as vectorized/colon
106 |     _ = km1[:,0]
107 |     _ = km1[0, :]
108 | 
109 | 
110 | def test_random_submatrix_access():
111 | 
112 |     # for trial in range(10):
113 | 
114 |     subset_len1 = np.random.choice(np.arange(num_samples - 1) + 1, 2)
115 |     subset_len2 = np.random.choice(np.arange(num_samples - 1) + 1, 2)
116 |     subset_len1.sort()
117 |     subset_len2.sort()
118 | 
119 |     if subset_len1[0]==subset_len1[1]:
120 |         subset_len1[1] = subset_len1[0] + 1
121 | 
122 |     if subset_len2[0]==subset_len2[1]:
123 |         subset_len2[1] = subset_len2[0] + 1
124 | 
125 |     sub_matrix = km1[subset_len1[0]:subset_len1[1], subset_len2[0]:subset_len2[1]]
126 |     if not sub_matrix.shape == (subset_len1[1]-subset_len1[0],
127 |                                 subset_len2[1]-subset_len2[0]):
128 |         raise ValueError('error in KM access implementation')
129 | 
130 | def test_size_properties():
131 | 
132 |     if len(km1.diagonal()) != num_samples:
133 |         raise ValueError('KM diagonal does not have N elements!')
134 | 
135 |     if km1.size != num_samples**2:
136 |         raise ValueError('KM size does not match N^2, N=num_samples')
137 | 
138 |     if km1.size != km1.num_samples**2:
139 |         raise ValueError('KM size does not match N^2, invalid internal representation!')
140 | 
141 | def test_sparsity():
142 | 
143 |     km = KernelMatrix(poly, normalized=False)
144 |     km.attach_to(sample_data)
145 |     # when normalized=True, full KM won't be sparse!
146 |     if not km._keep_normed and not issparse(km.full_sparse):
147 |         raise TypeError('error in sparse format access of KM : it is not sparse')
148 | 
149 |     if issparse(km1.full):
150 |         raise TypeError('error in dense format access of KM : it is sparse!')
151 | 
152 | def test_reset_flags_on_new_attach():
153 | 
154 |     km1.attach_to(sample_data)
155 |     if km1._populated_fully:
156 |         raise ValueError('flag _populated_fully not set to False upon reset')
157 |     if km1._lower_tri_km_filled:
158 |         raise ValueError('flag _lower_tri_km_filled not set to False upon reset')
159 |     if km1._num_ker_eval > 0:
160 |         raise ValueError('counter _num_ker_eval > 0 upon reset!')
161 |     if hasattr(km1, '_full_km'):
162 |         raise ValueError('_full_km from previous run is not cleared!')
163 |     if len(km1._KM) > 0:
164 |         raise ValueError('internal dict not empty upon reset!')
165 | 
166 | def test_internal_flags_on_recompute():
167 | 
168 |     km1.attach_to(sample_data) # reset first
169 |     new_dense = km1.full # recompute
170 |     if not km1._populated_fully:
171 |         raise ValueError('flag _populated_fully not set to True upon recompute')
172 |     if km1._num_ker_eval != max_num_ker_eval:
173 |         raise ValueError('unexpected value for counter _num_ker_eval upon recompute!')
174 |     if not hasattr(km1, '_full_km'):
175 |         raise ValueError('_full_km is not populated yet!')
176 |     if len(km1._KM)!=max_num_elements:
177 |         raise ValueError('internal dict not empty upon recompute!')
178 |     if not km1._lower_tri_km_filled:
179 |         raise ValueError('flag _lower_tri_km_filled not set to True '
180 |                          'upon recompute with fill_lower_tri=True')
181 | 
182 | def test_attach_to_two_samples():
183 |     """
184 |     Behaviour of KM when attached to two samples.
185 | 
186 |     0. it is not necessarily symmetric
187 | 
188 |     """
189 | 
190 |     sample_two = np.random.rand(num_samples_two, sample_two_dim)
191 |     targets_two = np.random.choice(target_label_set, num_samples_two)
192 | 
193 |     for kernel in DEFINED_KERNEL_FUNCS:
194 |         km2 = KernelMatrix(kernel=kernel, normalized=False)
195 |         km2.attach_to(sample_data, name_one='S1', sample_two=sample_two, name_two='S2')
196 |         km2_dense = km2.full  # this will force computation of full KM
197 | 
198 |         rand_ix_one = np.random.choice(range(num_samples), 5)
199 |         rand_ix_two = np.random.choice(range(num_samples_two), 5)
200 |         for ix_one, ix_two in zip(rand_ix_one, rand_ix_two):
201 |             external_eval = kernel(sample_data[ix_one,:], sample_two[ix_two,:])
202 |             if not np.isclose(km2[ix_one, ix_two], external_eval):
203 |                 raise ValueError('Invalid implementation in two sample case:'
204 |                                  '\n\tcomputed values do not match external evaluation!'
205 |                                  '\n\t for {}'.format(kernel))
206 | 
207 |     if km2.size != num_samples*num_samples_two:
208 |         raise ValueError('KM size does not match N1*N2, N=num_samples for dataset i')
209 | 
210 |     if km2.size != np.prod(km2.num_samples):
211 |         raise ValueError('KM size does not match N1*N2, invalid internal representation!')
212 | 
213 |     with raises(NotImplementedError):
214 |         km2.center()
215 | 
216 |     with raises(KMAccessError):
217 |         km2.centered
218 | 
219 |     with raises((KMAccessError, NotImplementedError)):
220 |         km2.diagonal()
221 | 
222 |     with raises(ValueError):
223 |         # dimensionalities can not differ!
224 |         more_dims = np.hstack((sample_data, sample_data[:,:1]))
225 |         km2.attach_to(sample_data, sample_two=more_dims)
226 | 
227 | 
228 | def test_attributes():
229 | 
230 |     km = KernelMatrix(LinearKernel())
231 |     km.set_attr('name', 'linear')
232 |     assert km.get_attr('name') == 'linear'
233 |     assert km.get_attr('noname', '404') == '404'
234 |     km.set_attr('weight', 42)
235 | 
236 |     kma = km.attributes()
237 |     for attr in ('name', 'weight'):
238 |         assert attr in kma
239 | 
240 | 
241 | def test_constant_km():
242 | 
243 |     rand_val = np.random.random()
244 |     rand_size = np.random.randint(50)
245 | 
246 |     const = ConstantKernelMatrix(num_samples=rand_size,
247 |                                  value=rand_val)
248 |     # trying name param also
249 |     const = ConstantKernelMatrix(num_samples=rand_size,
250 |                                  value=rand_val, name=None)
251 | 
252 |     assert const.num_samples == rand_size == const.size
253 |     assert len(const) == rand_size
254 |     assert const.shape == (rand_size, rand_size)
255 | 
256 |     for _ in range(min(5, rand_size)):
257 |         indices = np.random.randint(0, rand_size, 2)
258 |         assert all(const[indices[0], indices[1]] == rand_val)
259 | 
260 |     for invalid_index in ('index', ':',
261 |                           [np.Inf, ], [ 1,-rand_size-2],
262 |                           [], [None, 2]):
263 |         with raises(KMAccessError):
264 |             const[invalid_index]
265 | 
266 |     # there must be a single unique value in the matrix or diagonal
267 |     assert np.isclose(np.unique(const.full), rand_val).all()
268 |     assert np.isclose(np.unique(const.diag), rand_val).all()
269 | 
270 |     expected = np.full((rand_size, rand_size), fill_value=rand_val)
271 |     assert np.isclose(const.full, expected).all()
272 | 
273 | 
274 | # test_attributes()
275 | # test_constant_km()
276 | test_get_item()
277 | 


--------------------------------------------------------------------------------
/kernelmethods/tests/test_kernel_set.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | from pytest import raises
  4 | 
  5 | from kernelmethods.base import KMSetAdditionError, KernelMatrix, KernelSet, \
  6 |     BaseKernelFunction
  7 | from kernelmethods.numeric_kernels import GaussianKernel, LinearKernel, PolyKernel
  8 | from kernelmethods.sampling import make_kernel_bucket
  9 | 
 10 | num_samples = 50 # 9
 11 | sample_dim = 3 # 2
 12 | target_label_set = [1, 2]
 13 | 
 14 | sample_data = np.random.rand(num_samples, sample_dim)
 15 | target_labels = np.random.choice(target_label_set, (num_samples, 1))
 16 | 
 17 | IdealKM = target_labels.dot(target_labels.T)
 18 | 
 19 | rbf = KernelMatrix(GaussianKernel(sigma=10, skip_input_checks=True))
 20 | lin = KernelMatrix(LinearKernel(skip_input_checks=True))
 21 | poly = KernelMatrix(PolyKernel(degree=2, skip_input_checks=True))
 22 | 
 23 | # lin.attach_to(sample_data)
 24 | # rbf.attach_to(sample_data)
 25 | # poly.attach_to(sample_data)
 26 | 
 27 | kset = KernelSet([lin, poly, rbf])
 28 | print(kset)
 29 | 
 30 | def test_creation():
 31 | 
 32 |     try:
 33 |         ks = KernelSet()
 34 |     except:
 35 |         raise SyntaxError('empty set creation failed.')
 36 | 
 37 |     with raises(TypeError):
 38 |         ks = KernelSet(km_list='blah')
 39 | 
 40 | def test_size_property_mismatch():
 41 | 
 42 |     ks = KernelSet(num_samples=sample_data.shape[0]+1)
 43 |     lin = KernelMatrix(LinearKernel(skip_input_checks=True))
 44 |     lin.attach_to(sample_data)
 45 |     with raises(KMSetAdditionError):
 46 |         ks.append(lin)
 47 | 
 48 | 
 49 | def test_size():
 50 | 
 51 |     assert kset.size == 3
 52 |     assert len(kset) == 3
 53 | 
 54 | def test_get_item():
 55 |     """access by index"""
 56 | 
 57 |     for invalid_index in [-1, kset.size]:
 58 |         with raises(IndexError):
 59 |             print(kset[invalid_index])
 60 | 
 61 |     for invalid_index in [-1.0, '1']:
 62 |         with raises(ValueError):
 63 |             print(kset[invalid_index])
 64 | 
 65 | 
 66 | def test_get_ker_funcs():
 67 | 
 68 |     for index in (0, 1):
 69 |         kf_list = kset.get_kernel_funcs([index, ])
 70 |         for kf in kf_list:
 71 |             if not isinstance(kf, BaseKernelFunction):
 72 |                 raise TypeError('get_kernel_funcs not returning proper output type')
 73 | 
 74 | def test_take():
 75 |     """access by index"""
 76 | 
 77 |     for invalid_index in [-1, kset.size]:
 78 |         with raises(IndexError):
 79 |             print(kset.take([invalid_index]))
 80 | 
 81 |     for valid_index in np.random.randint(0, min(kset.size, 3), 3):
 82 |         _ks = kset.take(valid_index)
 83 |         if not isinstance(_ks, KernelSet):
 84 |             raise TypeError('.take not returning KernelSet')
 85 |         for _km in _ks:
 86 |             if not isinstance(_km, KernelMatrix):
 87 |                 raise TypeError('Elements of KernelSet are not KernelMatrix!')
 88 | 
 89 |     k2 = kset.take([0, 1])
 90 |     assert isinstance(k2, KernelSet)
 91 |     assert k2.size == 2
 92 | 
 93 | def test_extend():
 94 | 
 95 |     kset1 = KernelSet([poly, rbf, lin])
 96 |     kset2 = KernelSet([poly, rbf])
 97 |     kset1.extend(kset2)
 98 | 
 99 |     if kset1.size != 5:
100 |         raise ValueError('KernelSet.extend() failed')
101 | 
102 |     with raises(KMSetAdditionError):
103 |         kset1.extend(['blah', ])
104 | 
105 |     with raises(KMSetAdditionError):
106 |         k4_diff_size = KernelSet(num_samples=kset.size+1)
107 |         kset1.extend(k4_diff_size)
108 | 
109 | 
110 | def test_attributes():
111 | 
112 |     kset.set_attr('name', 'linear')
113 |     for km in kset:
114 |         assert km.get_attr('name') == 'linear'
115 |         assert km.get_attr('noname', '404') == '404'
116 | 
117 |     values = np.random.rand(kset.size)
118 |     kset.set_attr('weight', values)
119 |     for ii, km in enumerate(kset):
120 |         assert km.get_attr('weight') == values[ii]
121 | 
122 |     kb = make_kernel_bucket()
123 |     kb.attach_to(sample_data, attr_name='a', attr_value='b')
124 |     # differing length
125 |     with raises(ValueError):
126 |         kb.set_attr('a', ['value']*(kb.size-1))
127 | 
128 |     kb.get_attr('a')
129 | 
130 | #
131 | # print('Alignment to Ideal Kernel:')
132 | # ag = np.zeros(kb.size)
133 | # for ix, km in enumerate(kb):
134 | #     ag[ix] = alignment_centered(km.full, IdealKM)
135 | #     print('{:4} {:>60} : {:10.5f}'.format(ix, str(km),ag[ix]))
136 | 
137 | test_take()
138 | 


--------------------------------------------------------------------------------
/kernelmethods/tests/test_numeric_kernels.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from numbers import Number
  3 | 
  4 | import numpy as np
  5 | from hypothesis import (HealthCheck, given, settings as hyp_settings, strategies)
  6 | from pytest import raises
  7 | 
  8 | from kernelmethods.base import KernelMatrix
  9 | from kernelmethods.numeric_kernels import (Chi2Kernel, DEFINED_KERNEL_FUNCS,
 10 |                                            GaussianKernel, LaplacianKernel,
 11 |                                            LinearKernel, PolyKernel, SigmoidKernel,
 12 |                                            HadamardKernel)
 13 | from kernelmethods.operations import is_positive_semidefinite
 14 | from kernelmethods.utils import check_callable
 15 | 
 16 | default_feature_dim = 10
 17 | range_feature_dim = [10, 50]
 18 | range_num_samples = [50, 100]
 19 | 
 20 | range_polynomial_degree = [2, 10] # degree=1 is tested in LinearKernel()
 21 | 
 22 | np.random.seed(42)
 23 | 
 24 | # choosing skip_input_checks=False will speed up test runs
 25 | # default values for parameters
 26 | 
 27 | num_tests_psd_kernel = 3
 28 | 
 29 | def gen_random_array(dim):
 30 |     """To better control precision and type of floats"""
 31 | 
 32 |     # TODO input sparse arrays for test
 33 |     return np.random.rand(dim)
 34 | 
 35 | def gen_random_sample(num_samples, sample_dim):
 36 |     """To better control precision and type of floats"""
 37 | 
 38 |     # TODO input sparse arrays for test
 39 |     return np.random.rand(num_samples, sample_dim)
 40 | 
 41 | 
 42 | def _test_for_all_kernels(kernel, sample_dim, check_PSDness=True):
 43 |     """Common tests that all kernels must pass."""
 44 | 
 45 |     x = gen_random_array(sample_dim)
 46 |     y = gen_random_array(sample_dim)
 47 | 
 48 |     try:
 49 |         result = kernel(x, y)
 50 |     except Exception:
 51 |         raise RuntimeError('{} unable to calculate!\n'
 52 |                            ' on x {}\n y{}'.format(kernel, x, y))
 53 | 
 54 |     if not isinstance(result, Number):
 55 |         raise ValueError('result {} of type {} is not a number!\n'
 56 |                          'x={}\ny={}\nkernel={}\n'
 57 |                          ''.format(result, type(result), x, y, kernel))
 58 | 
 59 |     if kernel(y, x) != result:
 60 |         raise ValueError('{} is not symmetric!'
 61 |                          'x={}\n y={}\n kernel={}\n'
 62 |                          ''.format(kernel.name, x, y, kernel))
 63 | 
 64 |     if check_PSDness:
 65 |         # ensuring it produces a PSD KM
 66 |         kernel.is_psd()
 67 | 
 68 | 
 69 | def test_kernel_design():
 70 |     """
 71 |     Every kernel must be
 72 |     1. must have a name defined
 73 |     2. must be callable with two samples
 74 |     3. returns a number
 75 | 
 76 |     """
 77 | 
 78 |     for kernel in DEFINED_KERNEL_FUNCS:
 79 | 
 80 |         # must be callable with 2 args
 81 |         check_callable(kernel, min_num_args=2)
 82 | 
 83 |         if not hasattr(kernel, 'name'):
 84 |             raise TypeError('{} does not have name attribute!'.format(kernel))
 85 | 
 86 |         # only numeric data is accepted and other dtypes must raise an error
 87 |         for non_num in ['string',
 88 |                         [object, object] ]:
 89 |             with raises(ValueError):
 90 |                 _ = kernel(non_num, non_num)
 91 | 
 92 | 
 93 | def _test_func_is_valid_kernel(kernel, sample_dim, num_samples):
 94 |     """A func is a valid kernel if the kernel matrix generated by it is PSD.
 95 | 
 96 |     Not including this in tests for all kernels to allow for non-PSD kernels in the future
 97 | 
 98 |     """
 99 | 
100 |     KM = KernelMatrix(kernel, name='TestKM')
101 |     KM.attach_to(gen_random_sample(num_samples, sample_dim))
102 |     is_psd = is_positive_semidefinite(KM.full, verbose=True)
103 |     if not is_psd:
104 |         raise ValueError('{} is not PSD'.format(str(KM)))
105 | 
106 | 
107 | @hyp_settings(max_examples=num_tests_psd_kernel, deadline=None,
108 |               suppress_health_check=HealthCheck.all())
109 | @given(strategies.integers(range_feature_dim[0], range_feature_dim[1]),
110 |        strategies.integers(range_num_samples[0], range_num_samples[1]),
111 |        strategies.integers(range_polynomial_degree[0], range_polynomial_degree[1]),
112 |        strategies.floats(min_value=0, max_value=1e3,
113 |                          allow_nan=False, allow_infinity=False))
114 | def test_polynomial_kernel(sample_dim, num_samples,
115 |                            poly_degree, poly_intercept):
116 |     """Tests specific for Polynomial kernel."""
117 | 
118 |     poly = PolyKernel(degree=poly_degree, b=poly_intercept, skip_input_checks=False)
119 |     _test_for_all_kernels(poly, sample_dim)
120 |     _test_func_is_valid_kernel(poly, sample_dim, num_samples)
121 | 
122 | 
123 | @hyp_settings(max_examples=num_tests_psd_kernel, deadline=None,
124 |               suppress_health_check=HealthCheck.all())
125 | @given(strategies.integers(range_feature_dim[0], range_feature_dim[1]),
126 |        strategies.integers(range_num_samples[0], range_num_samples[1]),
127 |        strategies.floats(min_value=0, max_value=1e6,
128 |                          allow_nan=False, allow_infinity=False))
129 | def test_gaussian_kernel(sample_dim, num_samples, sigma):
130 |     """Tests specific for Gaussian kernel."""
131 | 
132 |     gaussian = GaussianKernel(sigma=sigma, skip_input_checks=False)
133 |     _test_for_all_kernels(gaussian, sample_dim)
134 |     _test_func_is_valid_kernel(gaussian, sample_dim, num_samples)
135 | 
136 | @hyp_settings(max_examples=num_tests_psd_kernel, deadline=None,
137 |               suppress_health_check=HealthCheck.all())
138 | @given(strategies.integers(range_feature_dim[0], range_feature_dim[1]),
139 |        strategies.integers(range_num_samples[0], range_num_samples[1]))
140 | def test_linear_kernel(sample_dim, num_samples):
141 |     """Tests specific for Linear kernel."""
142 | 
143 |     linear = LinearKernel(skip_input_checks=False)
144 |     _test_for_all_kernels(linear, sample_dim)
145 |     _test_func_is_valid_kernel(linear, sample_dim, num_samples)
146 | 
147 | 
148 | @hyp_settings(max_examples=num_tests_psd_kernel, deadline=None,
149 |               suppress_health_check=HealthCheck.all())
150 | @given(strategies.integers(range_feature_dim[0], range_feature_dim[1]),
151 |        strategies.integers(range_num_samples[0], range_num_samples[1]),
152 |        strategies.floats(min_value=0, max_value=1e6,
153 |                          allow_nan=False, allow_infinity=False))
154 | def test_laplacian_kernel(sample_dim, num_samples, gamma):
155 |     """Tests specific for Laplacian kernel."""
156 | 
157 |     laplacian = LaplacianKernel(gamma=gamma, skip_input_checks=False)
158 |     _test_for_all_kernels(laplacian, sample_dim)
159 |     _test_func_is_valid_kernel(laplacian, sample_dim, num_samples)
160 | 
161 | 
162 | @hyp_settings(max_examples=num_tests_psd_kernel, deadline=None,
163 |               suppress_health_check=HealthCheck.all())
164 | @given(strategies.integers(range_feature_dim[0], range_feature_dim[1]),
165 |        strategies.integers(range_num_samples[0], range_num_samples[1]),
166 |        strategies.floats(min_value=0, max_value=1e6,
167 |                          allow_nan=False, allow_infinity=False),
168 |        strategies.floats(min_value=0, max_value=1e6,
169 |                          allow_nan=False, allow_infinity=False)
170 |        )
171 | def test_sigmoid_kernel(sample_dim, num_samples, gamma, offset):
172 |     """Tests specific for sigmoid kernel."""
173 | 
174 |     sigmoid = SigmoidKernel(gamma=gamma, offset=offset, skip_input_checks=False)
175 |     # sigmoid is not always PSD
176 |     _test_for_all_kernels(sigmoid, sample_dim, check_PSDness=False)
177 | 
178 | 
179 | @hyp_settings(max_examples=num_tests_psd_kernel, deadline=None,
180 |               suppress_health_check=HealthCheck.all())
181 | @given(strategies.integers(range_feature_dim[0], range_feature_dim[1]),
182 |        strategies.integers(range_num_samples[0], range_num_samples[1]),
183 |        strategies.floats(min_value=0, max_value=1e6,
184 |                          allow_nan=False, allow_infinity=False))
185 | def test_chi2_kernel(sample_dim, num_samples, gamma):
186 |     """Tests specific for Laplacian kernel."""
187 | 
188 |     chi2 = Chi2Kernel(gamma=gamma, skip_input_checks=False)
189 |     _test_for_all_kernels(chi2, sample_dim)
190 |     _test_func_is_valid_kernel(chi2, sample_dim, num_samples)
191 | 
192 | 
193 | def test_chi2_kernel_misc():
194 |     """Tests specific for Laplacian kernel."""
195 | 
196 |     chi2 = Chi2Kernel()
197 |     x = gen_random_array(10)
198 |     y = gen_random_array(10)
199 | 
200 |     neg_x = x - x.mean() # some values would be negative
201 |     pos_y = np.abs(y)
202 | 
203 |     from kernelmethods.config import Chi2NegativeValuesException
204 |     with raises(Chi2NegativeValuesException):
205 |         chi2(neg_x, pos_y)
206 |     with raises(Chi2NegativeValuesException):
207 |         chi2(pos_y, neg_x)
208 | 
209 | @hyp_settings(max_examples=num_tests_psd_kernel, deadline=None,
210 |               suppress_health_check=HealthCheck.all())
211 | @given(strategies.integers(range_feature_dim[0], range_feature_dim[1]),
212 |        strategies.floats(min_value=1, max_value=1e6,
213 |                          allow_nan=False, allow_infinity=False))
214 | def test_Hadamard_kernel(sample_dim, alpha):
215 |     """Tests specific for Hadamard kernel."""
216 | 
217 |     had = HadamardKernel(alpha=alpha, skip_input_checks=False)
218 |     _test_for_all_kernels(had, sample_dim, check_PSDness=False)
219 | 
220 | 
221 | def test_Hadamard_kernel_misc():
222 |     """Tests specific for Hadamard kernel."""
223 | 
224 |     with raises(ValueError):
225 |         had = HadamardKernel(alpha=0)
226 | 
227 | 


--------------------------------------------------------------------------------
/kernelmethods/tests/test_operations.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from kernelmethods.base import KernelMatrix
  3 | from kernelmethods.config import KMNormError
  4 | from kernelmethods.numeric_kernels import LinearKernel
  5 | from kernelmethods.operations import (alignment_centered, center_km, frobenius_norm,
  6 |                                       frobenius_product, is_PSD, linear_combination,
  7 |                                       normalize_km, normalize_km_2sample)
  8 | from kernelmethods.sampling import make_kernel_bucket
  9 | from numpy.random import randn
 10 | from pytest import raises, warns
 11 | 
 12 | num_samples = np.random.randint(20, 50)
 13 | sample_dim = 3  # 2
 14 | target_label_set = [1, 2]
 15 | 
 16 | sample_data = np.random.rand(num_samples, sample_dim)
 17 | target_labels = np.random.choice(target_label_set, (num_samples, 1))
 18 | 
 19 | A = np.random.rand(4, 4)
 20 | B = np.random.rand(4, 4)
 21 | 
 22 | 
 23 | def gen_random_array(dim):
 24 |     """To better control precision and type of floats"""
 25 | 
 26 |     # TODO input sparse arrays for test
 27 |     return np.random.rand(dim)
 28 | 
 29 | 
 30 | def gen_random_sample(num_samples, sample_dim):
 31 |     """To better control precision and type of floats"""
 32 | 
 33 |     # TODO input sparse arrays for test
 34 |     return np.random.rand(num_samples, sample_dim)
 35 | 
 36 | 
 37 | def test_psd():
 38 |     with raises(TypeError):
 39 |         is_PSD([2, 34, 23])
 40 | 
 41 |     if is_PSD(np.random.rand(2, 4)):
 42 |         raise ValueError('Non-square matrix is being deemed PSD!!! Big error!')
 43 | 
 44 |     if is_PSD(np.random.rand(5, 5)):
 45 |         raise ValueError('Non-symmetric matrix is being deemed PSD!!! Big error!')
 46 | 
 47 |     negative_semi_def_matrix = np.array([[-1, 0], [0, -1]])
 48 |     if is_PSD(negative_semi_def_matrix):
 49 |         raise ValueError('Implementation for PSD check failed. '
 50 |                          'negative_semi_def_matrix is approved as PSD.')
 51 | 
 52 |     not_psd_matrices = (np.array([[1, 1.00001, 1],
 53 |                                   [1.00001, 1, 1.00001],
 54 |                                   [1, 1.00001, 1]]),
 55 |                         np.array([[3, 4],
 56 |                                   [4, 3]]))
 57 |     for not_psd_matrix in not_psd_matrices:
 58 |         assert is_PSD(not_psd_matrix) is False
 59 | 
 60 | 
 61 | def test_frobenius_product():
 62 |     A = np.array([[1, 2], [3, 4]])
 63 |     B = np.array([[4, 1], [2, 5]])
 64 |     C = np.array([[10, 2, 5], [6, 8, 6]])
 65 | 
 66 |     fprod = frobenius_product(A, B)
 67 |     if not np.isclose(fprod, 32):
 68 |         raise ValueError('Frobenius product implementation is wrong!')
 69 | 
 70 |     with raises(ValueError):
 71 |         frobenius_product(B, C)
 72 | 
 73 |     fnorm = frobenius_norm(A)
 74 |     assert np.isreal(fnorm)
 75 |     if not np.isclose(fnorm, np.sqrt(frobenius_product(A, A))):
 76 |         raise ValueError('Frobenius norm implementation is wrong!')
 77 | 
 78 | 
 79 | def test_centering():
 80 |     with raises(ValueError):
 81 |         center_km(np.full((3, 4), 1))
 82 | 
 83 |     with raises(ValueError):
 84 |         center_km([])
 85 | 
 86 |     mat_size = 10
 87 |     kmc = center_km(np.random.rand(mat_size, mat_size))
 88 |     assert kmc.shape == (mat_size, mat_size)
 89 | 
 90 | 
 91 | def test_normalize():
 92 |     with raises(ValueError):
 93 |         normalize_km(np.full((3, 4), 1))
 94 | 
 95 |     with raises(KMNormError):
 96 |         normalize_km(np.zeros((5, 5)))
 97 | 
 98 |     kmc = normalize_km(randn(10, 10))
 99 | 
100 | 
101 | def test_normalize_two_sample():
102 |     num_samples_one = 3
103 |     num_samples_two = 4
104 |     with raises(ValueError):
105 |         normalize_km_2sample(randn(num_samples_one, num_samples_two),
106 |                              randn(num_samples_two + 1, 1), [])
107 | 
108 |     with raises(ValueError):
109 |         normalize_km_2sample(randn(num_samples_one, num_samples_two),
110 |                              randn(num_samples_one, 1),
111 |                              randn(num_samples_two - 1, 1), )
112 | 
113 |     with raises((KMNormError, ValueError, RuntimeError)):
114 |         normalize_km_2sample(np.zeros((5, 5)), np.zeros((5, 1)), np.zeros((5, 1)))
115 | 
116 |     with raises(NotImplementedError):
117 |         normalize_km_2sample(randn(num_samples_one, num_samples_two),
118 |                              randn(num_samples_one, 1),
119 |                              randn(num_samples_two, 1),
120 |                              method='notcosine')
121 | 
122 |     with raises(NotImplementedError):
123 |         normalize_km(randn(10, 10), method='notcosine')
124 | 
125 |     # the following should work
126 |     _ = normalize_km(randn(10, 10))
127 |     # adding 0.1 to diagonals to avoid norm errors with denom close to 0
128 |     diag_one = np.abs(randn(num_samples_one, 1)) + 0.1
129 |     diag_two = np.abs(randn(num_samples_two, 1)) + 0.1
130 |     _ = normalize_km_2sample(np.abs(randn(num_samples_one, num_samples_two)),
131 |                              diag_one, diag_two, method='cosine')
132 | 
133 | 
134 | def test_alignment_centered():
135 |     km1 = KernelMatrix(kernel=LinearKernel())
136 |     km1.attach_to(gen_random_sample(num_samples, sample_dim))
137 | 
138 |     km2 = KernelMatrix(kernel=LinearKernel())
139 |     km2.attach_to(gen_random_sample(num_samples, sample_dim))
140 | 
141 |     km3_bad_size = KernelMatrix(kernel=LinearKernel())
142 |     km3_bad_size.attach_to(gen_random_sample(num_samples + 2, sample_dim))
143 | 
144 |     with raises(ValueError):
145 |         alignment_centered(km1.full, km3_bad_size.full)
146 | 
147 |     # bad type : must be ndarray
148 |     with raises(TypeError):
149 |         alignment_centered(km1, km2.full)
150 | 
151 |     # bad type : must be ndarray
152 |     with raises(TypeError):
153 |         alignment_centered(km1.full, km2)
154 | 
155 |     for flag in (True, False):
156 |         _ = alignment_centered(km1.full, km2.full, centered_already=flag)
157 | 
158 |     with raises(ValueError):
159 |         _ = alignment_centered(np.zeros((10, 10)), randn(10, 10),
160 |                                value_if_zero_division='raise')
161 | 
162 |     return_val_requested = 'random_set_value'
163 |     with warns(UserWarning):
164 |         ret_value = alignment_centered(randn(10, 10),
165 |                                        np.zeros((10, 10)),
166 |                                        value_if_zero_division=return_val_requested)
167 |     if ret_value != return_val_requested:
168 |         raise ValueError('Not returning the value requested in case of error!')
169 | 
170 | 
171 | def test_linear_comb():
172 |     kset = make_kernel_bucket('light')
173 |     weights = randn(kset.size)
174 |     kset.attach_to(sample_data)
175 |     lc = linear_combination(kset, weights)
176 | 
177 |     with raises(ValueError):
178 |         lc = linear_combination(kset, randn(kset.size + 1))
179 | 
180 |     zero_weights = np.zeros((kset.size,1))
181 |     lc0 = linear_combination(kset, zero_weights)
182 |     if not np.isclose(lc0.max(), 0.0):
183 |         raise ValueError('zero weights do not lead to zero KM!')
184 | 
185 |     with raises(RuntimeError):
186 |         lc0 = linear_combination(kset, zero_weights, norm_weights=True)
187 | 


--------------------------------------------------------------------------------
/kernelmethods/tests/test_ranking.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from kernelmethods.sampling import make_kernel_bucket
 3 | from kernelmethods.ranking import find_optimal_kernel, rank_kernels, \
 4 |     alignment_ranking, min_max_scale, CV_ranking, get_estimator
 5 | import numpy as np
 6 | from pytest import raises, warns
 7 | 
 8 | kb = make_kernel_bucket()
 9 | 
10 | def test_misc():
11 | 
12 |     raises(TypeError, find_optimal_kernel, 'bucket', None, None)
13 | 
14 |     with raises(NotImplementedError):
15 |         rank_kernels(kb, None, method='align/corr')
16 | 
17 | 


--------------------------------------------------------------------------------
/kernelmethods/tests/test_sampling.py:
--------------------------------------------------------------------------------
 1 | from kernelmethods.numeric_kernels import (PolyKernel, GaussianKernel,
 2 |                                            SigmoidKernel,
 3 |                                            LaplacianKernel)
 4 | import numpy as np
 5 | from kernelmethods.config import KernelMethodsException, kernel_bucket_strategies
 6 | from kernelmethods.numeric_kernels import (GaussianKernel, LaplacianKernel,
 7 |                                            PolyKernel, SigmoidKernel)
 8 | from kernelmethods.sampling import (KernelBucket, correlation_km, ideal_kernel,
 9 |                                     make_kernel_bucket, pairwise_similarity)
10 | from pytest import raises, warns
11 | 
12 | num_samples = 50  # 9
13 | sample_dim = 3  # 2
14 | target_label_set = [1, 2]
15 | 
16 | sample_data = np.random.rand(num_samples, sample_dim)
17 | target_labels = np.random.choice(target_label_set, (num_samples, 1))
18 | 
19 | A = np.random.rand(4, 4)
20 | B = np.random.rand(4, 4)
21 | 
22 | 
23 | def gen_random_array(dim):
24 |     """To better control precision and type of floats"""
25 | 
26 |     # TODO input sparse arrays for test
27 |     return np.random.rand(dim)
28 | 
29 | 
30 | def gen_random_sample(num_samples, sample_dim):
31 |     """To better control precision and type of floats"""
32 | 
33 |     # TODO input sparse arrays for test
34 |     return np.random.rand(num_samples, sample_dim)
35 | 
36 | 
37 | kset = make_kernel_bucket('light')
38 | kset.attach_to(sample_data)
39 | 
40 | 
41 | def test_make_bucket():
42 |     with warns(UserWarning):
43 |         _ = make_kernel_bucket(kset)
44 | 
45 |     with raises(ValueError):
46 |         _ = make_kernel_bucket('blah_invalid_strategy')
47 | 
48 |     # ensure correct values work
49 |     for strategy in kernel_bucket_strategies:
50 |         _ = make_kernel_bucket(strategy=strategy)
51 | 
52 | 
53 | def test_KB_class():
54 |     for param in ['normalize_kernels', 'skip_input_checks']:
55 |         for invalid_value in (1, 'str', 34., 2 + 4j):
56 |             with raises(TypeError):
57 |                 _ = KernelBucket(**{param: invalid_value})
58 | 
59 | 
60 | def test_add_parametrized_kernels():
61 |     kb = KernelBucket()
62 |     for invalid_kfunc in ('kfunc', gen_random_sample, KernelBucket,):
63 |         with raises(KernelMethodsException):
64 |             kb.add_parametrized_kernels(invalid_kfunc, 'param', (1,))
65 | 
66 |     for invalid_values in ('string', gen_random_sample, [], KernelBucket):
67 |         with raises(ValueError):
68 |             kb.add_parametrized_kernels(PolyKernel, 'param', invalid_values)
69 | 
70 |     for invalid_param in ('__param__', (), 'blahblah', 5):
71 |         for ker_func in (PolyKernel, LaplacianKernel, GaussianKernel, SigmoidKernel):
72 |             with raises(ValueError):
73 |                 kb.add_parametrized_kernels(ker_func, invalid_param, 2)
74 | 
75 | 
76 | def test_ideal_kernel():
77 |     ik = ideal_kernel(np.random.randint(1, 5, num_samples))
78 |     if ik.size != num_samples ** 2:
79 |         raise ValueError('ideal kernel size unexpected')
80 | 
81 | 
82 | def test_correlation_km():
83 |     corr_coef = correlation_km(np.random.rand(10, 10), np.random.rand(10, 10))
84 |     if corr_coef > 1 or corr_coef < -1:
85 |         raise ValueError('correlation out of bounds [-1, 1]')
86 | 
87 | 
88 | def test_pairwise_similarity():
89 |     ps = pairwise_similarity(kset)
90 |     if ps.shape != (kset.size, kset.size):
91 |         raise ValueError('invalid shape for pairwise_similarity computation')
92 | 


--------------------------------------------------------------------------------
/kernelmethods/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | from pytest import raises
 4 | 
 5 | from kernelmethods.numeric_kernels import (GaussianKernel, LaplacianKernel,
 6 |                                            LinearKernel, PolyKernel)
 7 | from kernelmethods.utils import (check_callable, check_input_arrays,
 8 |                                  check_operation_kernel_matrix, ensure_ndarray_1D,
 9 |                                  ensure_ndarray_2D,
10 |                                  get_callable_name, not_symmetric)
11 | 
12 | default_feature_dim = 10
13 | range_feature_dim = [10, 500]
14 | range_num_samples = [50, 500]
15 | num_samples = np.random.randint(20)
16 | sample_dim = np.random.randint(10)
17 | 
18 | range_polynomial_degree = [2, 10] # degree=1 is tested in LinearKernel()
19 | 
20 | np.random.seed(42)
21 | 
22 | # choosing skip_input_checks=False will speed up test runs
23 | # default values for parameters
24 | SupportedKernels = (GaussianKernel(), PolyKernel(), LinearKernel(),
25 |                     LaplacianKernel())
26 | num_tests_psd_kernel = 3
27 | 
28 | 
29 | def test_check_input_arrays():
30 | 
31 |     with raises(ValueError):
32 |         check_input_arrays(np.random.rand(10, 5), np.random.rand(5, 4))
33 | 
34 |     with raises(ValueError):
35 |         check_input_arrays(np.random.rand(10), np.random.rand(5))
36 | 
37 |     # from scipy.sparse import csr_matrix
38 |     # s1 = csr_matrix((3,4))
39 |     # s2 = csr_matrix((3, 4))
40 |     # _, _ = check_input_arrays(s1, s2)
41 | 
42 | def test_valid_op():
43 | 
44 |     for invalid_op in ('foo', 'bar', 'adition', 'some'):
45 |         with raises(ValueError):
46 |             check_operation_kernel_matrix(invalid_op)
47 | 
48 |     from kernelmethods.config import VALID_KERNEL_MATRIX_OPS
49 |     for valid_op in VALID_KERNEL_MATRIX_OPS:
50 |         _ = check_operation_kernel_matrix(valid_op)
51 | 
52 | def test_ensure_array_dim():
53 | 
54 |     with raises(ValueError):
55 |         ensure_ndarray_2D(np.random.rand(10, 5), ensure_num_cols=3)
56 | 
57 |     with raises(ValueError):
58 |         ensure_ndarray_2D(np.random.rand(10), ensure_num_cols=3)
59 | 
60 |     with raises(ValueError):
61 |         ensure_ndarray_1D(np.random.rand(10, 5))
62 | 
63 |     with raises(ValueError):
64 |         ensure_ndarray_1D(np.random.rand(10, 5, 10))
65 | 
66 | def test_misc():
67 | 
68 |     _ = get_callable_name(test_ensure_array_dim, 'test')
69 |     _ = get_callable_name('test_ensure_array_dim', None)
70 | 
71 |     with raises(TypeError):
72 |         check_callable('kdjkj')
73 | 
74 |     def func_with_less_than_min_args(): return None
75 | 
76 |     with raises(TypeError):
77 |         check_callable(func_with_less_than_min_args)
78 | 
79 |     with raises(TypeError):
80 |         check_callable(func_with_less_than_min_args, 1)
81 | 
82 |     with raises(TypeError):
83 |         check_callable(func_with_less_than_min_args, 3)
84 | 
85 |     assert not_symmetric(np.array([[1, 2], [1, 2]])) is True
86 | 


--------------------------------------------------------------------------------
/kernelmethods/utils.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | from scipy.sparse import issparse
  4 | from kernelmethods import config
  5 | from collections.abc import Iterable
  6 | 
  7 | def check_input_arrays(x, y, ensure_dtype=np.number):
  8 |     """
  9 |     Ensures the inputs are
 10 |     1) 1D arrays (not matrices)
 11 |     2) with compatible size
 12 |     3) of a particular data type
 13 |     and hence are safe to operate on.
 14 | 
 15 |     Parameters
 16 |     ----------
 17 |     x : iterable
 18 | 
 19 |     y : iterable
 20 | 
 21 |     ensure_dtype : dtype
 22 | 
 23 |     Returns
 24 |     -------
 25 |     x : ndarray
 26 | 
 27 |     y : ndarray
 28 | 
 29 |     """
 30 | 
 31 |     x = ensure_ndarray_1D(x, ensure_dtype)
 32 |     y = ensure_ndarray_1D(y, ensure_dtype)
 33 | 
 34 |     if x.size != y.size:
 35 |         raise ValueError('x (n={}) and y (n={}) differ in size! '
 36 |                          'They must be of same length'.format(x.size, y.size))
 37 | 
 38 |     return x, y
 39 | 
 40 | 
 41 | def ensure_ndarray_2D(array, ensure_dtype=np.number, ensure_num_cols=None):
 42 |     """Converts the input to a numpy array and ensure it is 1D."""
 43 | 
 44 |     if not isinstance(array, np.ndarray):
 45 |         array = np.asarray(array)
 46 | 
 47 |     # squeezing only 3rd dim if they are singleton, leaving 1st & 2nd dim alone
 48 |     axes_to_sqz = tuple(ax for ax, sz in enumerate(array.shape) if sz==1 and ax>1)
 49 |     array = np.squeeze(array, axis=axes_to_sqz)
 50 | 
 51 |     array = ensure_ndarray_size(array, ensure_dtype=ensure_dtype, ensure_num_dim=2)
 52 | 
 53 |     if ensure_num_cols is not None and array.shape[1] != ensure_num_cols:
 54 |         raise ValueError('The number of columns differ from expected {}'
 55 |                          ''.format(ensure_num_cols))
 56 | 
 57 |     return array
 58 | 
 59 | 
 60 | def ensure_ndarray_1D(array, ensure_dtype=np.number):
 61 |     """Converts the input to a numpy array and ensure it is 1D."""
 62 | 
 63 |     if not isinstance(array, np.ndarray):
 64 |         array = np.asarray(array)
 65 | 
 66 |     # squeezing only 2nd, 3rd dim if they are singleton, leaving 1st dim alone
 67 |     axes_to_sqz = tuple(ax for ax, sz in enumerate(array.shape) if sz==1 and ax>0)
 68 |     array = np.squeeze(array, axis=axes_to_sqz)
 69 | 
 70 |     return ensure_ndarray_size(array, ensure_dtype=ensure_dtype, ensure_num_dim=1)
 71 | 
 72 | 
 73 | def ensure_ndarray_size(array, ensure_dtype=np.number, ensure_num_dim=1):
 74 |     """Converts the input to a numpy array and ensure it is of specified dim."""
 75 | 
 76 |     if array.ndim != ensure_num_dim:
 77 |         raise ValueError('array must be {}-dimensional! '
 78 |                          'It has {} dims with shape {} '
 79 |                          ''.format(ensure_num_dim, array.ndim, array.shape))
 80 | 
 81 |     if not np.issubdtype(ensure_dtype, array.dtype):
 82 |         prev_dtype = array.dtype
 83 |         try:
 84 |             array = array.astype(ensure_dtype)
 85 |         except:
 86 |             raise ValueError('Unable to recast input dtype from {} to required {}!'
 87 |                              ''.format(prev_dtype, ensure_dtype))
 88 | 
 89 |     return array
 90 | 
 91 | 
 92 | def check_callable(input_func, min_num_args=2):
 93 |     """Ensures the input func 1) is callable, and 2) can accept a min # of args"""
 94 | 
 95 |     if not callable(input_func):
 96 |         raise TypeError('Input function must be callable!')
 97 | 
 98 |     from inspect import signature
 99 |     # would not work for C/builtin functions such as numpy.dot
100 |     func_signature = signature(input_func)
101 | 
102 |     if len(func_signature.parameters) < min_num_args:
103 |         raise TypeError('Input func must accept atleast {} inputs'.format(min_num_args))
104 | 
105 |     return input_func
106 | 
107 | 
108 | def get_callable_name(input_func, name=None):
109 |     """Returns the callable name"""
110 | 
111 |     if name is None:
112 |         if hasattr(input_func, '__name__'):
113 |             return input_func.__name__
114 |         else:
115 |             return ''
116 |     else:
117 |         return str(name)
118 | 
119 | _float_eps = np.finfo('float').eps
120 | 
121 | def _ensure_min_eps(x):
122 |     return  np.maximum(_float_eps, x)
123 | 
124 | def not_symmetric(matrix):
125 |     """Returns true if the input matrix is not symmetric."""
126 | 
127 |     if not np.isclose(matrix, matrix.T).all():
128 |         return True
129 |     else:
130 |         return False
131 | 
132 | def check_operation_kernel_matrix(operation):
133 |     """Validates whether input is a valid operation on KernelMatrices"""
134 | 
135 |     opr = operation.lower()
136 |     if opr not in config.VALID_KERNEL_MATRIX_OPS:
137 |         raise ValueError('Invalid kernel matrix operation - must be one of:\n{}'
138 |                          ''.format(config.VALID_KERNEL_MATRIX_OPS))
139 | 
140 |     return opr
141 | 
142 | 
143 | def min_max_scale(array):
144 |     """Rescale the array values from 0 to 1 via min-max normalization."""
145 | 
146 |     array = np.array(array)
147 |     min_val = array.min()
148 |     return (array - min_val) / (np.max(array) - min_val)
149 | 
150 | 
151 | def contains_nan_inf(matrix):
152 |     """
153 |     Helper func to check for the presence of NaN or Inf.
154 | 
155 |     Returns True if any element is not finite (Inf) or NaN. Returns False otherwise.
156 | 
157 |     This is designed to works for both dense and sparse matrices!
158 |     """
159 | 
160 |     if issparse(matrix):
161 |         matrix = matrix.todense()
162 | 
163 |     if (not np.isfinite(matrix).all()) \
164 |         or (np.isnan(matrix).any()):
165 |         return True
166 |     else:
167 |         return False
168 | 
169 | 
170 | def is_iterable_but_not_str(input_obj, min_length=1):
171 |     """Boolean check for iterables that are not strings and of a minimum length"""
172 | 
173 |     if not (not isinstance(input_obj, str) and isinstance(input_obj, Iterable)):
174 |         return False
175 | 
176 |     if len(input_obj) < min_length:
177 |         return False
178 |     else:
179 |         return True
180 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | addopts = -s -v
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | scipy
2 | numpy
3 | scikit-learn
4 | 


--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
 1 | scipy
 2 | numpy
 3 | scikit-learn
 4 | pytest
 5 | pytest-runner
 6 | pyyaml
 7 | python-coveralls
 8 | hypothesis
 9 | bumpversion
10 | wheel
11 | watchdog
12 | flake8
13 | coverage
14 | Sphinx
15 | twine
16 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | 
 2 | [versioneer]
 3 | VCS = git
 4 | style = pep440
 5 | versionfile_source = kernelmethods/_version.py
 6 | versionfile_build = kernelmethods/_version.py
 7 | tag_prefix =
 8 | parentdir_prefix = kernelmethods-
 9 | 
10 | [bumpversion]
11 | current_version = 0.0.1
12 | commit = True
13 | tag = True
14 | 
15 | [bumpversion:file:setup.py]
16 | search = version='{current_version}'
17 | replace = version='{new_version}'
18 | 
19 | [bumpversion:file:kernelmethods/__init__.py]
20 | search = __version__ = '{current_version}'
21 | replace = __version__ = '{new_version}'
22 | 
23 | [bdist_wheel]
24 | universal = 1
25 | 
26 | [flake8]
27 | exclude = docs
28 | 
29 | [aliases]
30 | # Define setup.py command aliases here
31 | test = pytest
32 | 
33 | [tool:pytest]
34 | collect_ignore = ['setup.py']
35 | 
36 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """The setup script."""
 5 | 
 6 | from setuptools import setup, find_packages
 7 | import versioneer
 8 | 
 9 | with open('README.rst') as readme_file:
10 |     readme = readme_file.read()
11 | 
12 | with open('HISTORY.rst') as history_file:
13 |     history = history_file.read()
14 | 
15 | requirements = ['scipy',
16 |                 'numpy']
17 | 
18 | setup_requirements = ['pytest-runner', ]
19 | 
20 | test_requirements = ['pytest', ] + requirements
21 | 
22 | setup(name='kernelmethods',
23 |     version=versioneer.get_version(),
24 |     cmdclass=versioneer.get_cmdclass(),
25 |     author="Pradeep Reddy Raamana",
26 |     author_email='raamana@gmail.com',
27 |     classifiers=[
28 |         'Development Status :: 2 - Pre-Alpha',
29 |         'Intended Audience :: Developers',
30 |         'License :: OSI Approved :: Apache Software License',
31 |         'Natural Language :: English',
32 |         'Programming Language :: Python :: 3',
33 |         'Programming Language :: Python :: 3.4',
34 |         'Programming Language :: Python :: 3.5',
35 |         'Programming Language :: Python :: 3.6',
36 |         'Programming Language :: Python :: 3.7',
37 |     ],
38 |     description="kernel methods and classes",
39 |     install_requires=requirements,
40 |     license="Apache Software License 2.0",
41 |     long_description=readme + '\n\n' + history,
42 |     include_package_data=True,
43 |     keywords='kernelmethods',
44 |     packages=find_packages(include=['kernelmethods']),
45 |     setup_requires=setup_requirements,
46 |     test_suite='tests',
47 |     tests_require=test_requirements,
48 |     url='https://github.com/raamana/kernelmethods',
49 |     zip_safe=False,
50 | )
51 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py27, py34, py35, py36, flake8
 3 | 
 4 | [travis]
 5 | python =
 6 |     3.6: py36
 7 |     3.5: py35
 8 |     3.4: py34
 9 |     2.7: py27
10 | 
11 | [testenv:flake8]
12 | basepython = python
13 | deps = flake8
14 | commands = flake8 kernelmethods
15 | 
16 | [testenv]
17 | setenv =
18 |     PYTHONPATH = {toxinidir}
19 | deps =
20 |     -r{toxinidir}/requirements_dev.txt
21 | ; If you want to make tox run the tests with the same versions, create a
22 | ; requirements.txt with the pinned versions and uncomment the following line:
23 | ;     -r{toxinidir}/requirements.txt
24 | commands =
25 |     pip install -U pip
26 |     py.test --basetemp={envtmpdir}
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------