├── examples ├── __init__.py ├── kernels │ ├── sphere │ │ ├── sphere_gaussian_kernel_parameters.py │ │ └── sphere_kernels.py │ └── spd │ │ └── spd_gaussian_kernel_parameters.py └── bo_sphere │ └── benchmark_examples │ └── bo_euclidean_sphere.py ├── BoManifolds ├── kernel_utils │ ├── __init__.py │ ├── kernels_sphere.py │ ├── kernels_nested_sphere.py │ └── kernels_nested_spd.py ├── plot_utils │ ├── __init__.py │ ├── shapes_plots.py │ └── manifolds_plots.py ├── Riemannian_utils │ ├── __init__.py │ ├── sphere_constraints_utils_torch.py │ ├── sphere_constraint_utils.py │ ├── utils.py │ ├── sphere_utils_torch.py │ ├── spd_constraints_utils.py │ ├── spd_constraints_utils_torch.py │ ├── sphere_utils.py │ ├── spd_utils_torch.py │ └── spd_utils.py ├── pymanopt_addons │ ├── __init__.py │ ├── README.txt │ ├── tools │ │ ├── autodiff │ │ │ ├── __init__.py │ │ │ ├── _backend.py │ │ │ ├── _autograd.py │ │ │ ├── _tensorflow.py │ │ │ ├── _theano.py │ │ │ └── _pytorch.py │ │ ├── __init__.py │ │ ├── testing.py │ │ └── multi.py │ └── problem.py ├── BO_test_functions │ ├── __init__.py │ ├── nested_test_functions_sphere.py │ └── nested_test_functions_spd.py ├── euclidean_optimization │ ├── __init__.py │ └── euclidean_constrained_optimize.py ├── manifold_optimization │ ├── __init__.py │ ├── approximate_hessian.py │ ├── numpy_list_converter.py │ └── manifold_gp_fit.py └── nested_mappings │ ├── nested_spheres_optimization.py │ ├── nested_spd_constraints_utils.py │ ├── nested_spd_utils.py │ ├── nested_spheres_utils.py │ └── nested_spd_optimization.py ├── data └── 2Dletters │ └── C.mat ├── setup.py ├── LICENSE └── README.md /examples/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /BoManifolds/kernel_utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /BoManifolds/plot_utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /BoManifolds/Riemannian_utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /BoManifolds/pymanopt_addons/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /BoManifolds/BO_test_functions/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /BoManifolds/euclidean_optimization/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /BoManifolds/manifold_optimization/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /data/2Dletters/C.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NoemieJaquier/GaBOtorch/HEAD/data/2Dletters/C.mat -------------------------------------------------------------------------------- /BoManifolds/pymanopt_addons/README.txt: -------------------------------------------------------------------------------- 1 | The functions of this folder comes from the modified pymanopt version found in: 2 | https://github.com/leonbottou/pymanopt 3 | 4 | The changes are in pull request in the main pymanopt repository. Therefore, the whole folder can be removed once the pull request is accepted. 5 | 6 | -------------------------------------------------------------------------------- /BoManifolds/pymanopt_addons/tools/autodiff/__init__.py: -------------------------------------------------------------------------------- 1 | from ._theano import TheanoBackend 2 | 3 | from ._autograd import AutogradBackend 4 | 5 | from ._tensorflow import TensorflowBackend 6 | 7 | from ._pytorch import PytorchBackend 8 | 9 | __all__ = ["AutogradBackend", "PytorchBackend", 10 | "TensorflowBackend", "TheanoBackend"] 11 | -------------------------------------------------------------------------------- /BoManifolds/pymanopt_addons/tools/__init__.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | 4 | def make_enum(name, fields): 5 | return collections.namedtuple(name, fields)(*range(len(fields))) 6 | 7 | 8 | class ndarraySequenceMixin: 9 | # The following attributes ensure that operations on sequences of 10 | # np.ndarrays with scalar numpy data types such as np.float64 don't attempt 11 | # to vectorize the scalar variable. Refer to 12 | # 13 | # https://docs.scipy.org/doc/numpy/reference/arrays.classes.html 14 | # https://github.com/pymanopt/pymanopt/issues/49 15 | # 16 | # for details. 17 | __array_priority__ = 1000 18 | __array_ufunc__ = None # Available since numpy 1.13 19 | -------------------------------------------------------------------------------- /BoManifolds/pymanopt_addons/tools/autodiff/_backend.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | 3 | 4 | def assert_backend_available(f): 5 | @wraps(f) 6 | def inner(backend, *args, **kwargs): 7 | if not backend.is_available(): 8 | raise RuntimeError( 9 | "Backend `{:s}` is not available".format(str(backend))) 10 | return f(backend, *args, **kwargs) 11 | return inner 12 | 13 | 14 | class Backend(object): 15 | def __str__(self): 16 | return "" 17 | 18 | def __id(self, objective, argument): 19 | return objective 20 | 21 | compile_function = compute_gradient = compute_hessian = __id 22 | 23 | def __false(self): 24 | return False 25 | 26 | is_available = is_compatible = __false 27 | -------------------------------------------------------------------------------- /BoManifolds/Riemannian_utils/sphere_constraints_utils_torch.py: -------------------------------------------------------------------------------- 1 | import torch 2 | ''' 3 | This file is part of the GaBOtorch library. 4 | Authors: Noemie Jaquier and Leonel Rozo, 2020 5 | License: MIT 6 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 7 | 8 | The functions of this file are based on the function of botorch (in botorch.optim). 9 | ''' 10 | 11 | 12 | def post_processing_init_sphere_torch(x): 13 | """ 14 | This function post-processes vectors, so that its norm is equal to 1. 15 | 16 | Parameters 17 | ---------- 18 | :param x: d-dimensional vectors [N x d] 19 | 20 | Returns 21 | ------- 22 | :return: unit-norm vectors [N x d] 23 | 24 | """ 25 | return x / torch.cat(x.shape[-1] * [torch.norm(x, dim=[-1]).unsqueeze(-1)], dim=-1) 26 | -------------------------------------------------------------------------------- /BoManifolds/Riemannian_utils/sphere_constraint_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | ''' 3 | This file is part of the GaBOtorch library. 4 | Authors: Noemie Jaquier and Leonel Rozo, 2020 5 | License: MIT 6 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 7 | 8 | The functions of this file are based on the function of botorch (in botorch.optim). 9 | ''' 10 | 11 | 12 | def norm_one_constraint(x): 13 | """ 14 | This function defines an 1-norm equality constraint on the a vector. 15 | The value returned by the function is 0 if the equality constraints is satisfied. 16 | 17 | Parameters 18 | ---------- 19 | :param x: vector 20 | 21 | Returns 22 | ------- 23 | :return: difference between the norm of x and 1 24 | """ 25 | return np.linalg.norm(x) - 1. 26 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | # get description from readme file 4 | with open('README.md', 'r') as f: 5 | long_description = f.read() 6 | 7 | # setup 8 | setup( 9 | name='BoManifolds_torch', 10 | version='', 11 | description='', 12 | long_description = long_description, 13 | long_description_content_type="text/markdown", 14 | author='Noémie Jaquier, Leonel Rozo', 15 | author_email='noemie.jaquier@de.bosch.com, leonel.rozo@de.bosch.com', 16 | maintainer=' ', 17 | maintainer_email='', 18 | license=' ', 19 | url=' ', 20 | platforms=['Linux Ubuntu'], 21 | packages=find_packages(), 22 | classifiers=[ 23 | "Programming Language :: Python :: 2.7", 24 | "Programming Language :: Python :: 3.5", 25 | "License :: OSI Approved :: MIT License", 26 | "Operating System :: POSIX :: Linux", 27 | ], 28 | ) 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Noémie Jaquier 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /BoManifolds/Riemannian_utils/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | ''' 3 | This file is part of the GaBOtorch library. 4 | Authors: Noemie Jaquier and Leonel Rozo, 2020 5 | License: MIT 6 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 7 | 8 | The functions of this file are based on the function of botorch (in botorch.optim). 9 | ''' 10 | 11 | 12 | def rotation_matrix_from_axis_angle(ax, angle): 13 | """ 14 | Gets rotation matrix from axis angle representation using Rodriguez formula. 15 | Based on the function of riepybdlib (https://gitlab.martijnzeestraten.nl/martijn/riepybdlib) 16 | 17 | Parameters 18 | ---------- 19 | :param ax: unit axis defining the axis of rotation 20 | :param angle: angle of rotation 21 | 22 | Returns 23 | ------- 24 | :return: R(ax, angle) = I + sin(angle) x ax + (1 - cos(angle) ) x ax^2 with x the cross product. 25 | """ 26 | utilde = vector_to_skew_matrix(ax) 27 | return np.eye(3) + np.sin(angle)*utilde + (1 - np.cos(angle))*utilde.dot(utilde) 28 | 29 | 30 | def vector_to_skew_matrix(q): 31 | """ 32 | Transform a vector into a skew-symmetric matrix 33 | 34 | Parameters 35 | ---------- 36 | :param q: vector 37 | 38 | Returns 39 | ------- 40 | :return: corresponding skew-symmetric matrix 41 | """ 42 | return np.array([[0, -q[2], q[1]], [q[2], 0, -q[0]], [-q[1], q[0], 0]]) 43 | -------------------------------------------------------------------------------- /BoManifolds/manifold_optimization/approximate_hessian.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ''' 4 | This file is part of the GaBOtorch library. 5 | Authors: Noemie Jaquier and Leonel Rozo, 2020 6 | License: MIT 7 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 8 | ''' 9 | 10 | 11 | def get_hessianfd(self, x, a): 12 | """ 13 | Compute an approximation of the Hessian w/ finite differences of the gradient. 14 | This function is based on the manopt function getHessianFD 15 | (https://www.manopt.org/reference/manopt/core/getHessianFD.html) 16 | and on the following paper: 17 | "Riemannian Trust Regions with Finite-Difference Hessian Approximations are Globally Convergent", N. Boumal, GSI'15. 18 | 19 | Parameters 20 | ---------- 21 | :param self: problem of pymanopt 22 | :param x: base point to compute the Hessian 23 | :param a: direction where to compute the Hessian 24 | 25 | Returns 26 | ------- 27 | :return: approximate Hessian 28 | """ 29 | # Step size 30 | norm_a = self.manifold.norm(x, a) 31 | 32 | # Compute the gradient at the current point 33 | grad = self.grad(x) 34 | 35 | # Check that the step is not too small 36 | if norm_a < 1e-15: 37 | return np.zeros(grad.shape) 38 | 39 | # Parameter: how far do we look? 40 | epsilon = 2**(-14) 41 | 42 | c = epsilon/norm_a 43 | 44 | # Compute a point a little further along a and the gradient there. 45 | x1 = self.manifold.retr(x, c*a) 46 | 47 | grad1 = self.grad(x1) 48 | 49 | # Transport grad1 from x1 to x 50 | grad1 = self.manifold.transp(x1, x, grad1) 51 | 52 | # Return the finite difference of them 53 | if type(x) in (list, tuple) or issubclass(type(x), (list, tuple)): 54 | # Handle the case where x is a list or a tuple (typically for products of manifolds 55 | for k in range(len(x)): 56 | grad1[k] /= c 57 | grad[k] /= c 58 | finite_difference_grad = grad1 - grad 59 | else: 60 | finite_difference_grad = grad1/c - grad/c 61 | 62 | return finite_difference_grad 63 | -------------------------------------------------------------------------------- /BoManifolds/pymanopt_addons/tools/testing.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module containing tools for testing correctness in Pymanopt. Note, these 3 | currently require autograd. 4 | 5 | Note: the methods for generating rgrad, egrad2rgrad, ehess and ehess2rhess 6 | will only be correct if the manifold is a submanifold of Euclidean space, 7 | that is if the projection is an orthogonal projection onto the tangent space. 8 | """ 9 | import numpy as np 10 | 11 | from autograd import grad, jacobian 12 | 13 | 14 | def rgrad(cost, proj): 15 | """ 16 | Generates the Riemannain gradient of cost. Cost must be defined using 17 | autograd.numpy. 18 | """ 19 | return lambda x: proj(x, grad(cost)(x)) 20 | 21 | 22 | def egrad2rgrad(proj): 23 | """ 24 | Generates an egrad2rgrad function. 25 | """ 26 | return lambda x, g: proj(x, g) 27 | 28 | 29 | def rhess(cost, proj): 30 | """ 31 | Generates the Riemannian hessian of the cost. Specifically, rhess(cost, 32 | proj)(x, u) is the directional derivatative of cost at point X on the 33 | manifold, in direction u. 34 | cost and proj must be defined using autograd.numpy. 35 | See http://sites.uclouvain.be/absil/2013-01/Weingarten_07PA_techrep.pdf 36 | for some discussion. 37 | proj and cost must be defined using autograd. 38 | Currently this is correct but not efficient, because of the jacobian- 39 | vector product. Hopefully this can be fixed in future. 40 | """ 41 | return lambda x, u: proj(x, np.tensordot(jacobian(rgrad(cost, proj))(x), u, 42 | axes=u.ndim)) 43 | 44 | 45 | def ehess2rhess(proj): 46 | """ 47 | Generates an ehess2rhess function for a manifold which is a sub-manifold 48 | of Euclidean space. 49 | ehess2rhess(proj)(x, egrad, ehess, u) converts the Euclidean hessian ehess 50 | at the point x to a Riemannian hessian. That is the directional 51 | derivatative of the gradient in the direction u. 52 | proj must be defined using autograd.numpy. 53 | This will not be an efficient implementation because of missing support 54 | for efficient jacobian-vector products in autograd. 55 | """ 56 | # Differentiate proj w.r.t. the first argument 57 | d_proj = jacobian(proj) 58 | return lambda x, egrad, ehess, u: proj(x, ehess + 59 | np.tensordot(d_proj(x, egrad), u, 60 | axes=u.ndim)) 61 | -------------------------------------------------------------------------------- /BoManifolds/pymanopt_addons/tools/multi.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def multiprod(A, B): 5 | """ 6 | Inspired by MATLAB multiprod function by Paolo de Leva. A and B are 7 | assumed to be arrays containing M matrices, that is, A and B have 8 | dimensions A: (M, N, P), B:(M, P, Q). multiprod multiplies each matrix 9 | in A with the corresponding matrix in B, using matrix multiplication. 10 | so multiprod(A, B) has dimensions (M, N, Q). 11 | """ 12 | 13 | # First check if we have been given just one matrix 14 | if len(np.shape(A)) == 2: 15 | return np.dot(A, B) 16 | 17 | # Old (slower) implementation: 18 | # a = A.reshape(np.hstack([np.shape(A), [1]])) 19 | # b = B.reshape(np.hstack([[np.shape(B)[0]], [1], np.shape(B)[1:]])) 20 | # return np.sum(a * b, axis=2) 21 | 22 | # Approx 5x faster, only supported by numpy version >= 1.6: 23 | return np.einsum('ijk,ikl->ijl', A, B) 24 | 25 | 26 | def multitransp(A): 27 | """ 28 | Inspired by MATLAB multitransp function by Paolo de Leva. A is assumed to 29 | be an array containing M matrices, each of which has dimension N x P. 30 | That is, A is an M x N x P array. Multitransp then returns an array 31 | containing the M matrix transposes of the matrices in A, each of which 32 | will be P x N. 33 | """ 34 | # First check if we have been given just one matrix 35 | if A.ndim == 2: 36 | return A.T 37 | return np.transpose(A, (0, 2, 1)) 38 | 39 | 40 | def multisym(A): 41 | # Inspired by MATLAB multisym function by Nicholas Boumal. 42 | return 0.5 * (A + multitransp(A)) 43 | 44 | 45 | def multiskew(A): 46 | # Inspired by MATLAB multiskew function by Nicholas Boumal. 47 | return 0.5 * (A - multitransp(A)) 48 | 49 | 50 | def multieye(k, n): 51 | # Creates a k x n x n array containing k (n x n) identity matrices. 52 | return np.tile(np.eye(n), (k, 1, 1)) 53 | 54 | 55 | def multilog(A, pos_def=False): 56 | if not pos_def: 57 | raise NotImplementedError 58 | 59 | # Computes the logm of each matrix in an array containing k positive 60 | # definite matrices. This is much faster than scipy.linalg.logm even 61 | # for a single matrix. Could potentially be improved further. 62 | w, v = np.linalg.eigh(A) 63 | w = np.expand_dims(np.log(w), axis=-1) 64 | return multiprod(v, w * multitransp(v)) 65 | 66 | 67 | def multiexp(A, sym=False): 68 | if not sym: 69 | raise NotImplementedError 70 | 71 | # Compute the expm of each matrix in an array of k symmetric matrices. 72 | # Sometimes faster than scipy.linalg.expm even for a single matrix. 73 | w, v = np.linalg.eigh(A) 74 | w = np.expand_dims(np.exp(w), axis=-1) 75 | return multiprod(v, w * multitransp(v)) 76 | -------------------------------------------------------------------------------- /BoManifolds/pymanopt_addons/tools/autodiff/_autograd.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module containing functions to differentiate functions using autograd. 3 | """ 4 | try: 5 | import autograd.numpy as np 6 | from autograd import grad 7 | except ImportError: 8 | np = None 9 | grad = None 10 | 11 | from ._backend import Backend, assert_backend_available 12 | 13 | 14 | class AutogradBackend(Backend): 15 | def __str__(self): 16 | return "autograd" 17 | 18 | def is_available(self): 19 | return np is not None and grad is not None 20 | 21 | @assert_backend_available 22 | def is_compatible(self, objective, argument): 23 | return callable(objective) 24 | 25 | @assert_backend_available 26 | def compile_function(self, objective, argument): 27 | def func(x): 28 | if type(x) in (list, tuple): 29 | return objective([np.array(xi) for xi in x]) 30 | else: 31 | return objective(np.array(x)) 32 | 33 | return func 34 | 35 | @assert_backend_available 36 | def compute_gradient(self, objective, argument): 37 | """ 38 | Compute the gradient of 'objective' with respect to the first 39 | argument and return as a function. 40 | """ 41 | g = grad(objective) 42 | 43 | # Sometimes x will be some custom type, e.g. with the FixedRankEmbedded 44 | # manifold. Therefore cast it to a numpy.array. 45 | def gradient(x): 46 | if type(x) in (list, tuple): 47 | return g([np.array(xi) for xi in x]) 48 | else: 49 | return g(np.array(x)) 50 | return gradient 51 | 52 | @assert_backend_available 53 | def compute_hessian(self, objective, argument): 54 | h = _hessian_vector_product(objective) 55 | 56 | def hess_vec_prod(x, a): 57 | return h(x, a) 58 | return hess_vec_prod 59 | 60 | 61 | def _hessian_vector_product(fun, argnum=0): 62 | """Builds a function that returns the exact Hessian-vector product. 63 | The returned function has arguments (*args, vector, **kwargs). Note, 64 | this function will be incorporated into autograd, with name 65 | hessian_vector_product. Once it has been this function can be 66 | deleted.""" 67 | fun_grad = grad(fun, argnum) 68 | 69 | def vector_dot_grad(*args, **kwargs): 70 | args, vector = args[:-1], args[-1] 71 | try: 72 | return np.tensordot(fun_grad(*args, **kwargs), vector, 73 | axes=vector.ndim) 74 | except AttributeError: 75 | # Assume we are on the product manifold. 76 | return np.sum([np.tensordot(fun_grad(*args, **kwargs)[k], 77 | vector[k], axes=vector[k].ndim) 78 | for k in range(len(vector))]) 79 | # Grad wrt original input. 80 | return grad(vector_dot_grad, argnum) 81 | -------------------------------------------------------------------------------- /BoManifolds/pymanopt_addons/tools/autodiff/_tensorflow.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module containing functions to differentiate functions using tensorflow. 3 | """ 4 | try: 5 | import tensorflow as tf 6 | try: 7 | from tensorflow.python.ops.gradients import _hessian_vector_product 8 | except ImportError: 9 | from tensorflow.python.ops.gradients_impl import \ 10 | _hessian_vector_product 11 | except ImportError: 12 | tf = None 13 | 14 | from ._backend import Backend, assert_backend_available 15 | 16 | 17 | class TensorflowBackend(Backend): 18 | def __init__(self): 19 | if tf is not None: 20 | self._session = tf.Session() 21 | 22 | def __str__(self): 23 | return "tensorflow" 24 | 25 | def is_available(self): 26 | return tf is not None 27 | 28 | @assert_backend_available 29 | def is_compatible(self, objective, argument): 30 | if isinstance(objective, tf.Tensor): 31 | if (argument is None or not 32 | isinstance(argument, tf.Variable) and not 33 | all([isinstance(arg, tf.Variable) 34 | for arg in argument])): 35 | raise ValueError( 36 | "Tensorflow backend requires an argument (or sequence of " 37 | "arguments) with respect to which compilation is to be " 38 | "carried out") 39 | return True 40 | return False 41 | 42 | @assert_backend_available 43 | def compile_function(self, objective, argument): 44 | if not isinstance(argument, list): 45 | 46 | def func(x): 47 | feed_dict = {argument: x} 48 | return self._session.run(objective, feed_dict) 49 | else: 50 | 51 | def func(x): 52 | feed_dict = {i: d for i, d in zip(argument, x)} 53 | return self._session.run(objective, feed_dict) 54 | 55 | return func 56 | 57 | @assert_backend_available 58 | def compute_gradient(self, objective, argument): 59 | """ 60 | Compute the gradient of 'objective' and return as a function. 61 | """ 62 | tfgrad = tf.gradients(objective, argument) 63 | 64 | if not isinstance(argument, list): 65 | 66 | def grad(x): 67 | feed_dict = {argument: x} 68 | return self._session.run(tfgrad[0], feed_dict) 69 | 70 | else: 71 | 72 | def grad(x): 73 | feed_dict = {i: d for i, d in zip(argument, x)} 74 | return self._session.run(tfgrad, feed_dict) 75 | 76 | return grad 77 | 78 | @assert_backend_available 79 | def compute_hessian(self, objective, argument): 80 | if not isinstance(argument, list): 81 | argA = tf.zeros_like(argument) 82 | tfhess = _hessian_vector_product(objective, [argument], [argA]) 83 | 84 | def hess(x, a): 85 | feed_dict = {argument: x, argA: a} 86 | return self._session.run(tfhess[0], feed_dict) 87 | 88 | else: 89 | argA = [tf.zeros_like(arg) for arg in argument] 90 | tfhess = _hessian_vector_product(objective, argument, argA) 91 | 92 | def hess(x, a): 93 | feed_dict = {i: d for i, d in zip(argument+argA, x+a)} 94 | return self._session.run(tfhess, feed_dict) 95 | 96 | return hess 97 | -------------------------------------------------------------------------------- /BoManifolds/Riemannian_utils/sphere_utils_torch.py: -------------------------------------------------------------------------------- 1 | import torch 2 | ''' 3 | This file is part of the GaBOtorch library. 4 | Authors: Noemie Jaquier and Leonel Rozo, 2020 5 | License: MIT 6 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 7 | 8 | The functions of this file are based on the function of botorch (in botorch.optim). 9 | ''' 10 | 11 | 12 | def sphere_distance_torch(x1, x2, diag=False): 13 | """ 14 | This function computes the Riemannian distance between points on a sphere manifold. 15 | 16 | Parameters 17 | ---------- 18 | :param x1: points on the sphere N1 x dim or b1 x ... x bk x N1 x dim 19 | :param x2: points on the sphere N2 x dim or b1 x ... x bk x N2 x dim 20 | 21 | Optional parameters 22 | ------------------- 23 | :param diag: Should we return the whole distance matrix, or just the diagonal? If True, we must have `x1 == x2`. 24 | 25 | Returns 26 | ------- 27 | :return: matrix of manifold distance between the points in x1 and x2 N1 x N2 or b1 x ... x bk x N1 x N2 28 | """ 29 | if diag is False: 30 | # Expand dimensions to compute all vector-vector distances 31 | x1 = x1.unsqueeze(-2) 32 | x2 = x2.unsqueeze(-3) 33 | 34 | # Repeat x and y data along -2 and -3 dimensions to have b1 x ... x ndata_x x ndata_y x dim arrays 35 | x1 = torch.cat(x2.shape[-2] * [x1], dim=-2) 36 | x2 = torch.cat(x1.shape[-3] * [x2], dim=-3) 37 | 38 | # Expand dimension to perform inner product 39 | x1 = x1.unsqueeze(-2) 40 | x2 = x2.unsqueeze(-1) 41 | 42 | # Compute the inner product (should be [-1,1]) 43 | inner_product = torch.bmm(x1.view(-1, 1, x1.shape[-1]), x2.view(-1, x2.shape[-2], 1)).view(x1.shape[:-2]) 44 | 45 | else: 46 | # Expand dimensions to compute all vector-vector distances 47 | x1 = x1.unsqueeze(-1).transpose(-1, -2) 48 | x2 = x2.unsqueeze(-1) 49 | inner_product = torch.bmm(x1, x2).squeeze(-1) 50 | 51 | # Clamp in case any value is not in the interval [-1,1] 52 | # A small number is added/substracted to the bounds to avoid NaNs during backward computation. 53 | inner_product = inner_product.clamp(-1.+1e-15, 1.-1e-15) 54 | 55 | return torch.acos(inner_product) 56 | 57 | 58 | def rotation_from_sphere_points_torch(x, y): 59 | """ 60 | Gets the rotation matrix that moves x to y in the geodesic path on the sphere. 61 | Based on the equations of "Analysis of principal nested spheres", Sung et al. 2012 (appendix) 62 | 63 | Parameters 64 | ---------- 65 | :param x: point on a sphere 66 | :param y: point on a sphere 67 | 68 | Returns 69 | ------- 70 | :return: rotation matrix 71 | """ 72 | if x.dim() == 1: 73 | x = x.unsqueeze(-2) 74 | if y.dim() == 1: 75 | y = y.unsqueeze(-2) 76 | 77 | dim = x.shape[1] 78 | 79 | # Compute the inner product 80 | inner_product = torch.mm(x, y.T) 81 | # Clamp in case any value is not in the interval [-1,1] 82 | # A small number is added/substracted to the bounds to avoid NaNs during backward computation. 83 | inner_product = inner_product.clamp(-1. + 1e-15, 1. - 1e-15) 84 | 85 | # Compute intermediate vector 86 | c_vec = x - y * inner_product 87 | c_vec = c_vec / torch.norm(c_vec) 88 | 89 | R = torch.eye(dim, dim, dtype=inner_product.dtype) + \ 90 | torch.sin(torch.acos(inner_product)) * (torch.mm(y.T, c_vec) - torch.mm(c_vec.T, y)) + \ 91 | (inner_product - 1.) * (torch.mm(y.T, y) + torch.mm(c_vec.T, c_vec)) 92 | 93 | return R 94 | -------------------------------------------------------------------------------- /BoManifolds/BO_test_functions/nested_test_functions_sphere.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from BoManifolds.nested_mappings.nested_spheres_utils import projection_from_sphere_to_subsphere, \ 4 | projection_from_subsphere_to_sphere 5 | 6 | ''' 7 | This file is part of the GaBOtorch library. 8 | Authors: Noemie Jaquier and Leonel Rozo, 2020 9 | License: MIT 10 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 11 | ''' 12 | 13 | 14 | def nested_function_sphere(x, subsphere_manifold, test_function, sphere_axes, sphere_distances_to_axes): 15 | """ 16 | This function computes the value of a test function defined on a subsphere S^m of the original sphere manifold 17 | S^d. 18 | 19 | Note: test functions and their global minimum are defined in test_function_sphere.py. 20 | 21 | Parameters 22 | ---------- 23 | :param x: point on the sphere (torch tensor) 24 | :param test_function: test function defined on the subsphere S^m 25 | :param subsphere_manifold: m-dimensional sphere manifold (pymanopt manifold) 26 | :param sphere_axes: axes of the nested spheres belonging to [Sd, Sd-1, ..., Sm+1] 27 | :param sphere_distances_to_axes: distances from the axes w.r.t each point of the nested spheres of 28 | [Sd, Sd-1, ..., Sm+1] 29 | 30 | Returns 31 | ------- 32 | :return: value of the test function at x (numpy [1,1] array) 33 | """ 34 | # Two-dimensional input 35 | if x.dim() < 2: 36 | x = x[None] 37 | # Projection into subsphere 38 | x_subsphere = projection_from_sphere_to_subsphere(x, sphere_axes, sphere_distances_to_axes)[-1] 39 | 40 | # Compute the test function value 41 | return test_function(x_subsphere, subsphere_manifold) 42 | 43 | 44 | def optimum_nested_function_sphere(optimum_function, subsphere_manifold, sphere_axes, sphere_distances_to_axes): 45 | """ 46 | This function returns the global minimum (x, f(x)) of a test function defined on a subsphere S^m of the original 47 | sphere manifold S^d. 48 | Note that the location of the global minimum is unique on the subsphere but not on the original sphere. All the 49 | locations on the sphere that are projected onto the minimum point on the nested sphere are minimum of the function. 50 | We return here the minimum point of the sphere belonging directly to the nested sphere. 51 | 52 | Note: test functions and their global minimum are defined in test_function_sphere.py. 53 | 54 | Parameters 55 | ---------- 56 | :param optimum_function: function returning the global minimum (x, f(x)) of the test function on the subsphere 57 | :param subsphere_manifold: m-dimensional sphere manifold (pymanopt manifold) 58 | :param sphere_axes: axes of the nested spheres belonging to [Sd, Sd-1, ..., Sd-r+1] 59 | :param sphere_distances_to_axes: distances from the axes w.r.t each point of the nested spheres of 60 | [Sd, Sd-1, ..., Sd-r+1] 61 | 62 | Returns 63 | ------- 64 | :return opt_x: location of the global minimum of the test function on the sphere 65 | :return opt_y: value of the global minimum of the test function on the sphere 66 | """ 67 | # Global minimum on subsphere 68 | nested_opt_x, opt_y = optimum_function(subsphere_manifold) 69 | # To torch 70 | nested_opt_x_torch = torch.tensor(nested_opt_x, dtype=sphere_axes[0].dtype) 71 | 72 | # Projection onto the original sphere space 73 | opt_x_torch = projection_from_subsphere_to_sphere(nested_opt_x_torch, sphere_axes, sphere_distances_to_axes)[-1] 74 | 75 | # To numpy 76 | opt_x = opt_x_torch.numpy() 77 | 78 | return opt_x, opt_y 79 | 80 | 81 | -------------------------------------------------------------------------------- /BoManifolds/Riemannian_utils/spd_constraints_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from BoManifolds.Riemannian_utils.spd_utils import vector_to_symmetric_matrix_mandel 4 | 5 | ''' 6 | This file is part of the GaBOtorch library. 7 | Authors: Noemie Jaquier and Leonel Rozo, 2020 8 | License: MIT 9 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 10 | 11 | The functions of this file are based on the function of botorch (in botorch.optim). 12 | ''' 13 | 14 | 15 | def min_eigenvalue_constraint(x_vec, min_eigenvalue): 16 | """ 17 | This function defines an inequality constraint on the minimum eigenvalue of a SPD matrix. 18 | The value returned by the function is positive if the inequality constraints is satisfied. 19 | 20 | Parameters 21 | ---------- 22 | :param x_vec: SPD matrix in Mandel vector form 23 | :param min_eigenvalue: minimum eigenvalue to satisfy the constraint 24 | 25 | Returns 26 | ------- 27 | :return: difference between minimum eigenvalue of x and minimum tolerated eigenvalue 28 | """ 29 | x = vector_to_symmetric_matrix_mandel(x_vec) 30 | eigenvalues = np.linalg.eigvals(x) 31 | return np.min(eigenvalues) - min_eigenvalue 32 | 33 | 34 | def max_eigenvalue_constraint(x_vec, max_eigenvalue): 35 | """ 36 | This function defines an inequality constraint on the maximum eigenvalue of a SPD matrix. 37 | The value returned by the function is positive if the inequality constraints is satisfied. 38 | 39 | Parameters 40 | ---------- 41 | :param x_vec: SPD matrix in Mandel vector form 42 | :param max_eigenvalue: maximum eigenvalue to satisfy the constraint 43 | 44 | Returns 45 | ------- 46 | :return: difference between maximum tolerated eigenvalue and maximum eigenvalue of x 47 | """ 48 | x = vector_to_symmetric_matrix_mandel(x_vec) 49 | eigenvalues = np.linalg.eigvals(x) 50 | return max_eigenvalue - np.max(eigenvalues) 51 | 52 | 53 | def min_eigenvalue_constraint_cholesky(x_chol, min_eigenvalue): 54 | """ 55 | This function defines an inequality constraint on the minimum eigenvalue of a SPD matrix. 56 | The value returned by the function is positive if the inequality constraints is satisfied. 57 | 58 | Parameters 59 | ---------- 60 | :param x_chol: cholesky decomposition of a SPD matrix in vector form 61 | :param min_eigenvalue: minimum eigenvalue to satisfy the constraint 62 | 63 | Returns 64 | ------- 65 | :return: difference between minimum eigenvalue of x and minimum tolerated eigenvalue 66 | """ 67 | dim_vec = x_chol.shape[0] 68 | dim = int((-1.0 + (1.0 + 8.0 * dim_vec) ** 0.5) / 2.0) 69 | indices = np.tril_indices(dim) 70 | xL = np.zeros((dim, dim)) 71 | xL[indices] = x_chol 72 | 73 | x_mat = np.dot(xL, xL.T) 74 | eigenvalues = np.linalg.eigvals(x_mat) 75 | return np.max(eigenvalues) - min_eigenvalue 76 | 77 | 78 | def max_eigenvalue_constraint_cholesky(x_chol, max_eigenvalue): 79 | """ 80 | This function defines an inequality constraint on the maximum eigenvalue of a SPD matrix. 81 | The value returned by the function is positive if the inequality constraints is satisfied. 82 | 83 | Parameters 84 | ---------- 85 | :param x_chol: cholesky decomposition of a SPD matrix in vector form 86 | :param max_eigenvalue: maximum eigenvalue to satisfy the constraint 87 | 88 | Returns 89 | ------- 90 | :return: difference between maximum tolerated eigenvalue and maximum eigenvalue of x 91 | """ 92 | dim_vec = x_chol.shape[0] 93 | dim = int((-1.0 + (1.0 + 8.0 * dim_vec) ** 0.5) / 2.0) 94 | indices = np.tril_indices(dim) 95 | xL = np.zeros((dim, dim)) 96 | xL[indices] = x_chol 97 | 98 | x_mat = np.dot(xL, xL.T) 99 | eigenvalues = np.linalg.eigvals(x_mat) 100 | return max_eigenvalue - np.max(eigenvalues) 101 | 102 | 103 | 104 | -------------------------------------------------------------------------------- /BoManifolds/plot_utils/shapes_plots.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from BoManifolds.Riemannian_utils.utils import rotation_matrix_from_axis_angle 4 | from BoManifolds.Riemannian_utils.sphere_utils import get_axisangle 5 | 6 | ''' 7 | This file is part of the GaBOtorch library. 8 | Authors: Noemie Jaquier and Leonel Rozo, 2020 9 | License: MIT 10 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 11 | 12 | The functions of this file are based on the function of botorch (in botorch.optim). 13 | ''' 14 | 15 | 16 | def plot_ellipse3d(ax, ellipse_cov, center=None, color=None, alpha=0.2, linewidth=0, n_elems=50, **kwargs): 17 | """ 18 | Plot a 3D ellipsoid 19 | Based on the function of riepybdlib (https://gitlab.martijnzeestraten.nl/martijn/riepybdlib) 20 | 21 | Parameters 22 | ---------- 23 | :param ax: figure axes 24 | :param ellipse_cov: covariance matrix of the ellipsoid 25 | 26 | Optional parameters 27 | ------------------- 28 | :param center: center of the ellipsoid 29 | :param color: color of the surface 30 | :param alpha: transparency index 31 | :param linewidth: linewidth of the surface 32 | :param n_elems: number of points in the surface 33 | :param kwargs: 34 | 35 | Returns 36 | ------- 37 | :return: - 38 | """ 39 | center = center or [0, 0, 0] 40 | color = color or [0.8, 0.8, 0.8] 41 | 42 | u = np.linspace(0, 2 * np.pi, n_elems) 43 | v = np.linspace(0, np.pi, n_elems) 44 | 45 | D, V = np.linalg.eig(ellipse_cov) 46 | D = np.real(D) 47 | V = np.real(V) 48 | 49 | x0 = D[0] * np.outer(np.cos(u), np.sin(v)) 50 | y0 = D[1] * np.outer(np.sin(u), np.sin(v)) 51 | z0 = D[2] * np.outer(np.ones(np.size(u)), np.cos(v)) 52 | 53 | xyz0 = np.stack((x0, y0, z0), axis=2) 54 | xyz0 = np.reshape(xyz0, (n_elems*n_elems, 3)).T 55 | xyz = np.dot(V, xyz0) 56 | xyz = np.reshape(xyz.T, (n_elems, n_elems, 3)) 57 | 58 | x = xyz[:, :, 0] + center[0] 59 | y = xyz[:, :, 1] + center[1] 60 | z = xyz[:, :, 2] + center[2] 61 | 62 | ax.plot_surface(x, y, z, rstride=4, cstride=4, color=color, linewidth=linewidth, alpha=alpha, **kwargs) 63 | 64 | 65 | def plot_plane(ax, normal_vector, point_on_plane, l_vert=1, color='w', line_color='black', alpha=0.15, linewidth=0.5, 66 | **kwargs): 67 | """ 68 | Plot a plane in R3. 69 | Based on the function of riepybdlib (https://gitlab.martijnzeestraten.nl/martijn/riepybdlib) 70 | 71 | Parameters 72 | ---------- 73 | :param ax: figure axes 74 | :param normal_vector: normal vector of the plane 75 | :param point_on_plane: point lying on the plane 76 | 77 | Optional parameters 78 | ------------------- 79 | :param l_vert: length/width of the displayed plane 80 | :param color: color of the plane 81 | :param line_color: color of the contour of the plane 82 | :param alpha: transparency index 83 | :param linewidth: linewidth of the border of the plane 84 | :param kwargs: 85 | 86 | Returns 87 | ------- 88 | :return: - 89 | """ 90 | # Tangent axis at 0 rotation: 91 | T0 = np.array([[1, 0], [0, 1], [0, 0]]) 92 | 93 | # Rotation matrix with respect to zero: 94 | (axis, ang) = get_axisangle(normal_vector) 95 | R = rotation_matrix_from_axis_angle(axis, -ang) 96 | 97 | # Tangent axis in new plane: 98 | T = R.T.dot(T0) 99 | 100 | # Compute vertices of tangent plane at g 101 | hl = 0.5 * l_vert 102 | X = [[hl, hl], # p0 103 | [hl, -hl], # p1 104 | [-hl, hl], # p2 105 | [-hl, -hl]] # p3 106 | X = np.array(X).T 107 | points = (T.dot(X).T + point_on_plane).T 108 | psurf = points.reshape((-1, 2, 2)) 109 | 110 | ax.plot_surface(psurf[0, :], psurf[1, :], psurf[2, :], color=color, alpha=alpha, linewidth=0, **kwargs) 111 | 112 | # Plot contours of the tangent space 113 | points_lines = points[:, [0, 1, 3, 2, 0]] 114 | ax.plot(points_lines[0], points_lines[1], points_lines[2], color=line_color, linewidth=linewidth) 115 | -------------------------------------------------------------------------------- /BoManifolds/nested_mappings/nested_spheres_optimization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import gpytorch 4 | 5 | import pymanopt.manifolds as pyman_man 6 | 7 | from BoManifolds.pymanopt_addons.problem import Problem 8 | 9 | from BoManifolds.Riemannian_utils.sphere_utils_torch import sphere_distance_torch 10 | from BoManifolds.nested_mappings.nested_spheres_utils import projection_from_subsphere_to_sphere 11 | 12 | ''' 13 | This file is part of the GaBOtorch library. 14 | Authors: Noemie Jaquier and Leonel Rozo, 2020 15 | License: MIT 16 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 17 | ''' 18 | 19 | 20 | def min_error_reconstruction_cost(x_data, x_subsphere, sphere_axes, sphere_distances): 21 | """ 22 | This function computes the squared error between a set of data x on the sphere and their reconstruction from the 23 | corresponding projection from the nested subsphere. 24 | 25 | Parameters 26 | ---------- 27 | :param x_data: original data on the sphere S^d 28 | :param x_subsphere: data on the subsphere S^d-r 29 | :param sphere_axes: list of nested sphere axes [S^d, S^d-1, ... S^d-r+1] 30 | :param sphere_distances: list of nested sphere distances to axes [S^d, S^d-1, ... S^d-r+1] 31 | 32 | Returns 33 | ------- 34 | :return: sum of squared distances between reconstructed and original data on the sphere 35 | """ 36 | x_reconstructed = projection_from_subsphere_to_sphere(x_subsphere, sphere_axes, sphere_distances)[-1] 37 | cost = sphere_distance_torch(x_data, x_reconstructed, diag=True) 38 | return torch.sum(cost * cost) 39 | 40 | 41 | def optimize_reconstruction_parameters_nested_sphere(x_data, x_subsphere, sphere_axes, solver, 42 | nb_init_candidates=100): 43 | """ 44 | This function computes the distances-to-axis parameters of the "projection_from_subsphere_to_sphere", so that the 45 | distance between the original and reconstructed data on the sphere S^d is minimized. 46 | The problem is treated as an unconstrained optimization problem on a product of Euclidean manifolds 47 | by transforming the interval [0,pi] for the distances with a sigmoid function. 48 | 49 | Parameters 50 | ---------- 51 | :param x_data: original data on the sphere S^d 52 | :param x_subsphere: data on the subsphere S^d-r 53 | :param sphere_axes: list of nested sphere axes [S^d, S^d-1, ... S^d-r+1] 54 | :param solver: optimization solver 55 | 56 | Optional parameters 57 | ------------------- 58 | :param nb_init_candidates: number of initial candidates for the optimization 59 | 60 | Returns 61 | ------- 62 | :return: list of nested sphere distances to axes [S^d, S^d-1, ... S^d-r+1] 63 | """ 64 | # Dimensions 65 | dim = x_data.shape[1] 66 | latent_dim = x_subsphere.shape[1] 67 | 68 | # Product of Euclidean manifold 69 | manifolds_list = [pyman_man.Euclidean(1) for dim in range(dim, latent_dim, -1)] 70 | product_manifold = pyman_man.Product(manifolds_list) 71 | 72 | # Interval constraint [0,pi] for the distances to axis 73 | radius_constraint = gpytorch.constraints.Interval(0., np.pi) 74 | 75 | # Define the reconstruction cost 76 | def reconstruction_cost(parameters): 77 | sphere_distances = [radius_constraint.transform(p) for p in parameters] 78 | return min_error_reconstruction_cost(x_data, x_subsphere, sphere_axes, sphere_distances) 79 | 80 | # Generate candidate for initial data 81 | x0_candidates = [product_manifold.rand() for i in range(nb_init_candidates)] 82 | x0_candidates_torch = [] 83 | for x0 in x0_candidates: 84 | x0_candidates_torch.append([torch.from_numpy(x) for x in x0]) 85 | y0_candidates = [reconstruction_cost(x0_candidates_torch[i]) for i in range(nb_init_candidates)] 86 | 87 | # Initialize with the best of the candidates 88 | y0, x_init_idx = torch.Tensor(y0_candidates).min(0) 89 | x0 = x0_candidates[x_init_idx] 90 | 91 | # Define the problem 92 | reconstruction_problem = Problem(manifold=product_manifold, cost=reconstruction_cost, arg=torch.Tensor(), 93 | verbosity=0) 94 | # Solve 95 | sphere_parameters_np = solver.solve(reconstruction_problem, x=x0) 96 | 97 | # Return torch data 98 | return [radius_constraint.transform(torch.Tensor(distance)) for distance in sphere_parameters_np] 99 | -------------------------------------------------------------------------------- /BoManifolds/pymanopt_addons/tools/autodiff/_theano.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module containing functions to compile and differentiate Theano graphs. Part of 3 | the pymanopt package. 4 | 5 | Jamie Townsend December 2014 6 | """ 7 | try: 8 | import theano 9 | import theano.tensor as T 10 | from theano.gradient import disconnected_grad 11 | except ImportError: 12 | theano = None 13 | T = None 14 | 15 | from ._backend import Backend, assert_backend_available 16 | 17 | 18 | class TheanoBackend(Backend): 19 | def __str__(self): 20 | return "theano" 21 | 22 | def is_available(self): 23 | return theano is not None and T is not None 24 | 25 | @assert_backend_available 26 | def is_compatible(self, objective, argument): 27 | if isinstance(objective, T.TensorVariable): 28 | if (argument is None or not 29 | isinstance(argument, T.TensorVariable) and not 30 | all([isinstance(arg, T.TensorVariable) 31 | for arg in argument])): 32 | raise ValueError( 33 | "Theano backend requires an argument (or sequence of " 34 | "arguments) with respect to which compilation is to be " 35 | "carried out") 36 | return True 37 | return False 38 | 39 | @assert_backend_available 40 | def compile_function(self, objective, argument): 41 | """ 42 | Wrapper for the theano.function(). Compiles a theano graph into a 43 | python function. 44 | """ 45 | try: 46 | return theano.function([argument], objective) 47 | except TypeError: 48 | # Assume we are on a product manifold 49 | compiled = theano.function([arg for arg in argument], objective) 50 | return lambda x: compiled(*x) 51 | 52 | @assert_backend_available 53 | def compute_gradient(self, objective, argument): 54 | """ 55 | Wrapper for theano.tensor.grad(). Computes the gradient of 'objective' 56 | with respect to 'argument' and returns compiled version. 57 | """ 58 | g = T.grad(objective, argument) 59 | return self.compile_function(g, argument) 60 | 61 | @assert_backend_available 62 | def compute_hessian(self, objective, argument): 63 | """ 64 | Computes the directional derivative of the gradient (which is equal to 65 | the Hessian multiplied by direction). 66 | """ 67 | g = T.grad(objective, argument) 68 | 69 | # Create a new tensor A, which has the same type (i.e. same 70 | # dimensionality) as argument. 71 | is_product_manifold = isinstance(argument, (list, tuple)) 72 | if not is_product_manifold: 73 | A = argument.type() 74 | else: 75 | A = [arg.type() for arg in argument] 76 | 77 | # First attempt efficient 'R-op', this directly calculates the 78 | # directional derivative of the gradient. 79 | try: 80 | R = T.Rop(g, argument, A) 81 | except NotImplementedError: 82 | # Implementation based on 83 | # tensorflow.python.ops.gradients_impl._hessian_vector_product 84 | if not is_product_manifold: 85 | proj = T.sum(g * disconnected_grad(A)) 86 | R = T.grad(proj, argument) 87 | else: 88 | proj = [T.sum(g_elem * disconnected_grad(a_elem)) 89 | for g_elem, a_elem in zip(g, A)] 90 | proj_grad = [T.grad(proj_elem, argument, 91 | disconnected_inputs="ignore", 92 | return_disconnected="None") 93 | for proj_elem in proj] 94 | proj_grad_transpose = map(list, zip(*proj_grad)) 95 | proj_grad_stack = [ 96 | T.stacklists([c for c in row if c is not None]) 97 | for row in proj_grad_transpose] 98 | R = [T.sum(stack, axis=0) for stack in proj_grad_stack] 99 | 100 | if not is_product_manifold: 101 | hess = theano.function([argument, A], R, on_unused_input="warn") 102 | else: 103 | hess_prod = theano.function(argument + A, R, 104 | on_unused_input="warn") 105 | 106 | def hess(x, a): 107 | return hess_prod(*(x + a)) 108 | 109 | return hess 110 | -------------------------------------------------------------------------------- /BoManifolds/nested_mappings/nested_spd_constraints_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from BoManifolds.nested_mappings.nested_spd_utils import projection_from_nested_spd_to_spd, \ 4 | projection_from_spd_to_nested_spd 5 | 6 | ''' 7 | This file is part of the GaBOtorch library. 8 | Authors: Noemie Jaquier and Leonel Rozo, 2020 9 | License: MIT 10 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 11 | ''' 12 | 13 | 14 | def max_eigenvalue_nested_spd_constraint(x_nested_spd, maximum_eigenvalue, projection_matrix, 15 | projection_complement_matrix, bottom_spd_matrix, contraction_matrix): 16 | """ 17 | This function defines an inequality constraint on the maximum eigenvalue of the nested SPD in function of the 18 | maximum eigenvalue authorized in the original SPD space. 19 | To do so, the nested SPD is projected back to the original SPD space and the eigenvalues of the resulting matrix 20 | are computed. 21 | 22 | Parameters 23 | ---------- 24 | :param x_nested_spd: low-dimensional nested SPD matrix 25 | :param maximum_eigenvalue: maximum eigenvalue in the original SPD space 26 | :param projection_matrix: element of the Grassmann manifold (D x d) 27 | :param projection_complement_matrix: element of the Grassmann manifold (D x D-d) 28 | Note that we must have torch.mm(projection_complement_matrix.T, projection_matrix) = 0. 29 | :param bottom_spd_matrix: bottom part of the rotated SPD matrix (D-d, D-d) 30 | :param contraction_matrix: matrix whose norm is <=1 (d x D-d) 31 | 32 | Returns 33 | ------- 34 | :return: difference between maximum tolerated eigenvalue and maximum eigenvalue of the SPD matrix in the original 35 | space 36 | """ 37 | # Project to original SPD space 38 | x_spd = projection_from_nested_spd_to_spd(x_nested_spd, projection_matrix, projection_complement_matrix, 39 | bottom_spd_matrix, contraction_matrix) 40 | # Eigenvalue decomposition 41 | eigenvalues = torch.symeig(x_spd, eigenvectors=True).eigenvalues 42 | return maximum_eigenvalue - eigenvalues.max() 43 | 44 | 45 | def min_eigenvalue_nested_spd_constraint(x_nested_spd, minimum_eigenvalue, projection_matrix, 46 | projection_complement_matrix, bottom_spd_matrix, contraction_matrix): 47 | """ 48 | This function defines an inequality constraint on the minimum eigenvalue of the nested SPD in function of the 49 | maximum eigenvalue authorized in the original SPD space. 50 | To do so, the nested SPD is projected back to the original SPD space and the eigenvalues of the resulting matrix 51 | are computed. 52 | 53 | Parameters 54 | ---------- 55 | :param x_nested_spd: low-dimensional nested SPD matrix 56 | :param minimum_eigenvalue: minimum eigenvalue in the original SPD space 57 | :param projection_matrix: element of the Grassmann manifold (D x d) 58 | :param projection_complement_matrix: element of the Grassmann manifold (D x D-d) 59 | Note that we must have torch.mm(projection_complement_matrix.T, projection_matrix) = 0. 60 | :param bottom_spd_matrix: bottom part of the rotated SPD matrix (D-d, D-d) 61 | :param contraction_matrix: matrix whose norm is <=1 (d x D-d) 62 | 63 | Returns 64 | ------- 65 | :return: difference between minimum eigenvalue of the SPD matrix in the original space and the minimum tolerated 66 | eigenvalue 67 | """ 68 | # Project to original SPD space 69 | x_spd = projection_from_nested_spd_to_spd(x_nested_spd, projection_matrix, projection_complement_matrix, 70 | bottom_spd_matrix, contraction_matrix) 71 | # Eigenvalue decomposition 72 | eigenvalues = torch.symeig(x_spd, eigenvectors=True).eigenvalues 73 | return eigenvalues.min() - minimum_eigenvalue 74 | 75 | 76 | def random_nested_spd_with_spd_eigenvalue_constraints(self, random_spd_fct, projection_matrix): 77 | """ 78 | This function computes a nested SPD sample by computing first a sample in the original SPD space and projecting it 79 | into the nested SPD space. 80 | 81 | Parameters 82 | ---------- 83 | :param self: self parameter of the SPD pymanopt class 84 | :param random_spd_fct: function to generate SPD samples in the original space 85 | :param projection_matrix: element of the Grassmann manifold (D x d) 86 | 87 | Returns 88 | ------- 89 | :return: nested SPD sample 90 | """ 91 | # Sample a SPD sample respecting the constraint in the original space 92 | x_spd = torch.tensor(random_spd_fct(), dtype=projection_matrix.dtype) 93 | 94 | # Project it to nested SPD 95 | nested_spd = projection_from_spd_to_nested_spd(x_spd, projection_matrix) 96 | # To numpy to stay within pymanopt format 97 | return nested_spd.numpy() 98 | -------------------------------------------------------------------------------- /BoManifolds/Riemannian_utils/spd_constraints_utils_torch.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from BoManifolds.Riemannian_utils.spd_utils_torch import vector_to_symmetric_matrix_mandel_torch, \ 5 | symmetric_matrix_to_vector_mandel_torch 6 | 7 | ''' 8 | This file is part of the GaBOtorch library. 9 | Authors: Noemie Jaquier and Leonel Rozo, 2020 10 | License: MIT 11 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 12 | 13 | The functions of this file are based on the function of botorch (in botorch.optim). 14 | ''' 15 | 16 | 17 | def max_eigenvalue_constraint_torch(x, maximum_eigenvalue): 18 | """ 19 | This function defines an inequality constraint on the maximum eigenvalue of a SPD matrix. 20 | The value returned by the function is positive if the inequality constraints is satisfied. 21 | 22 | Parameters 23 | ---------- 24 | :param x: SPD matrix 25 | :param maximum_eigenvalue: maximum eigenvalue to satisfy the constraint 26 | 27 | Returns 28 | ------- 29 | :return: difference between maximum tolerated eigenvalue and maximum eigenvalue of x 30 | """ 31 | eigenvalues = torch.symeig(x, eigenvectors=True).eigenvalues # Eigenvalue True necessary for derivation 32 | return maximum_eigenvalue - eigenvalues.max() 33 | 34 | 35 | def min_eigenvalue_constraint_torch(x, minimum_eigenvalue): 36 | """ 37 | This function defines an inequality constraint on the minimum eigenvalue of a SPD matrix. 38 | The value returned by the function is positive if the inequality constraints is satisfied. 39 | 40 | Parameters 41 | ---------- 42 | :param x: SPD matrix 43 | :param minimum_eigenvalue: minimum eigenvalue to satisfy the constraint 44 | 45 | Returns 46 | ------- 47 | :return: difference between minimum eigenvalue of x and minimum tolerated eigenvalue 48 | """ 49 | eigenvalues = torch.symeig(x, eigenvectors=True).eigenvalues # Eigenvalue True necessary for derivation 50 | return eigenvalues.min() - minimum_eigenvalue 51 | 52 | 53 | def post_processing_init_spd_torch(x_vec, min_eigenvalue, max_eigenvalue): 54 | """ 55 | This function post-processes a symmetric matrix, so that its eigenvalues lie in the defined bounds. 56 | 57 | Parameters 58 | ---------- 59 | :param x_vec: symmetric matrix in Mandel vector form 60 | :param min_eigenvalue: minimum eigenvalue to satisfy the constraint 61 | :param max_eigenvalue: maximum eigenvalue to satisfy the constraint 62 | 63 | Returns 64 | ------- 65 | :return: symmetric matrix in Mandel vector form 66 | """ 67 | x = vector_to_symmetric_matrix_mandel_torch(x_vec) 68 | init_shape = x.shape 69 | x = x.view(-1, x.shape[-2], x.shape[-1]) 70 | 71 | for n in range(x.shape[0]): 72 | eigdec = torch.symeig(x[n], eigenvectors=True) 73 | eigvals = eigdec.eigenvalues 74 | eigvecs = eigdec.eigenvectors 75 | 76 | eigvals[eigvals <= min_eigenvalue] = min_eigenvalue # Minimum eigenvalue constraint 77 | eigvals[eigvals > max_eigenvalue] = max_eigenvalue # Max eigenvalue constraint 78 | 79 | x[n] = torch.mm(torch.mm(eigvecs, torch.diag(eigvals)), torch.inverse(eigvecs)) 80 | 81 | x = x.view(init_shape) 82 | return symmetric_matrix_to_vector_mandel_torch(x) 83 | 84 | 85 | def post_processing_spd_cholesky_torch(x_chol, min_eigenvalue, max_eigenvalue): 86 | """ 87 | This function post-processes a symmetric matrix, so that its eigenvalues lie in the defined bounds. 88 | 89 | Parameters 90 | ---------- 91 | :param x_vec: symmetric matrix in Mandel vector form 92 | :param min_eigenvalue: minimum eigenvalue to satisfy the constraint 93 | :param max_eigenvalue: maximum eigenvalue to satisfy the constraint 94 | 95 | Returns 96 | ------- 97 | :return: symmetric matrix in Mandel vector form 98 | """ 99 | # Initial shape 100 | init_shape = list(x_chol.shape) 101 | x_chol = x_chol.view(-1, init_shape[-1]) 102 | 103 | # Dimension of SPD matrix 104 | dim_vec = x_chol.shape[-1] 105 | dim = int((-1.0 + (1.0 + 8.0 * dim_vec) ** 0.5) / 2.0) 106 | # Indices for Cholesky decomposition 107 | indices = np.tril_indices(dim) 108 | 109 | for n in range(x_chol.shape[0]): 110 | # SPD matrix 111 | xL = torch.zeros((dim, dim), dtype=x_chol.dtype) 112 | xL[indices] = x_chol[n] 113 | x_mat = torch.mm(xL, xL.T) 114 | 115 | # Check constraints 116 | eigdec = torch.eig(x_mat, eigenvectors=True) 117 | eigvals = eigdec.eigenvalues[:, 0] 118 | eigvecs = eigdec.eigenvectors 119 | 120 | eigvals[eigvals <= min_eigenvalue] = min_eigenvalue # PD constraint 121 | eigvals[eigvals > max_eigenvalue] = max_eigenvalue # Max eigenvalue constraint 122 | 123 | x_mat = torch.mm(torch.mm(eigvecs, torch.diag(eigvals)), torch.inverse(eigvecs)) 124 | 125 | # Cholesky decomposition 126 | x_chol[n] = torch.cholesky(x_mat)[indices] 127 | 128 | return x_chol.view(init_shape) 129 | 130 | -------------------------------------------------------------------------------- /BoManifolds/kernel_utils/kernels_sphere.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gpytorch 3 | from gpytorch.constraints import GreaterThan 4 | 5 | from BoManifolds.Riemannian_utils.sphere_utils_torch import sphere_distance_torch 6 | 7 | ''' 8 | This file is part of the GaBOtorch library. 9 | Authors: Noemie Jaquier and Leonel Rozo, 2020 10 | License: MIT 11 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 12 | ''' 13 | 14 | 15 | class SphereGaussianKernel(gpytorch.kernels.Kernel): 16 | """ 17 | Instances of this class represent a Gaussian (RBF) covariance matrix between input points on the sphere manifold. 18 | 19 | Attributes 20 | ---------- 21 | self.beta_min, minimum value of the inverse square lengthscale parameter beta 22 | 23 | Methods 24 | ------- 25 | forward(point1_in_the_sphere, point2_in_the_sphere, diagonal_matrix_flag=False, **params) 26 | 27 | Static methods 28 | -------------- 29 | """ 30 | def __init__(self, beta_min, beta_prior=None, **kwargs): 31 | """ 32 | Initialisation. 33 | 34 | Parameters 35 | ---------- 36 | :param beta_min: minimum value of the inverse square lengthscale parameter beta 37 | 38 | Optional parameters 39 | ------------------- 40 | :param beta_prior: prior on the parameter beta 41 | :param kwargs: additional arguments 42 | """ 43 | super(SphereGaussianKernel, self).__init__(has_lengthscale=False, **kwargs) 44 | self.beta_min = beta_min 45 | 46 | # Add beta parameter, corresponding to the inverse of the lengthscale parameter. 47 | beta_num_dims = 1 48 | self.register_parameter(name="raw_beta", 49 | parameter=torch.nn.Parameter(torch.zeros(*self.batch_shape, 1, beta_num_dims))) 50 | 51 | if beta_prior is not None: 52 | self.register_prior("beta_prior", beta_prior, lambda: self.beta, lambda v: self._set_beta(v)) 53 | 54 | # A GreaterThan constraint is defined on the lengthscale parameter to guarantee positive-definiteness. 55 | # The value of beta_min can be determined e.g. experimentally. 56 | self.register_constraint("raw_beta", GreaterThan(self.beta_min)) 57 | 58 | @property 59 | def beta(self): 60 | return self.raw_beta_constraint.transform(self.raw_beta) 61 | 62 | @beta.setter 63 | def beta(self, value): 64 | self._set_beta(value) 65 | 66 | def _set_beta(self, value): 67 | if not torch.is_tensor(value): 68 | value = torch.as_tensor(value).to(self.raw_beta) 69 | self.initialize(raw_beta=self.raw_beta_constraint.inverse_transform(value)) 70 | 71 | def forward(self, x1, x2, diag=False, **params): 72 | """ 73 | Computes the Gaussian kernel matrix between inputs x1 and x2 belonging to a sphere manifold. 74 | 75 | Parameters 76 | ---------- 77 | :param x1: input points on the sphere 78 | :param x2: input points on the sphere 79 | 80 | Optional parameters 81 | ------------------- 82 | :param diag: Should we return the whole distance matrix, or just the diagonal? If True, we must have `x1 == x2` 83 | :param params: additional parameters 84 | 85 | Returns 86 | ------- 87 | :return: kernel matrix between x1 and x2 88 | """ 89 | # Compute distance 90 | distance = sphere_distance_torch(x1, x2, diag=diag) 91 | distance2 = torch.mul(distance, distance) 92 | # Kernel 93 | exp_component = torch.exp(- distance2.mul(self.beta.double())) 94 | return exp_component 95 | 96 | 97 | class SphereLaplaceKernel(gpytorch.kernels.Kernel): 98 | """ 99 | Instances of this class represent a Laplace covariance matrix between input points on the sphere manifold. 100 | """ 101 | def __init__(self, **kwargs): 102 | """ 103 | Initialisation. 104 | 105 | Optional parameters 106 | ------------------- 107 | :param kwargs: additional arguments 108 | """ 109 | self.has_lengthscale = True 110 | super(SphereLaplaceKernel, self).__init__(has_lengthscale=True, ard_num_dims=None, **kwargs) 111 | 112 | def forward(self, x1, x2, diag=False, **params): 113 | """ 114 | Computes the Laplace kernel matrix between inputs x1 and x2 belonging to a sphere manifold. 115 | 116 | Parameters 117 | ---------- 118 | :param x1: input points on the sphere 119 | :param x2: input points on the sphere 120 | 121 | Optional parameters 122 | ------------------- 123 | :param diag: Should we return the whole distance matrix, or just the diagonal? If True, we must have `x1 == x2` 124 | :param params: additional parameters 125 | 126 | Returns 127 | ------- 128 | :return: kernel matrix between x1 and x2 129 | """ 130 | # Compute distance 131 | distance = sphere_distance_torch(x1, x2, diag=diag) 132 | # Kernel 133 | exp_component = torch.exp(- distance.div(torch.mul(self.lengthscale.double(), self.lengthscale.double()))) 134 | return exp_component 135 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GaBOtorch 2 | This repository contains the source code to perform Geometry-aware Bayesian Optimization (GaBO) and High-Dimensional Geometry-aware Bayesian Optimization (HD-GaBO) on Riemannian manifolds. 3 | 4 | # Dependencies 5 | This code runs with Python>=3.6. It requires the following packages: 6 | - numpy 7 | - scipy 8 | - matplotlib 9 | - pymanopt 10 | - torch 11 | - gpytorch 12 | - botorch 13 | 14 | # Installation 15 | To install GaBOtorch, first clone the repository and install the related packages, as explained below. 16 | 17 | ``` 18 | pip install numpy scipy matplotlib pymanopt torch gpytorch botorch 19 | ``` 20 | Finally, from the GaBOtorch folder, run 21 | ``` 22 | pip install -e . 23 | ``` 24 | 25 | 26 | # Examples 27 | The following examples are available in GaBOtorch: 28 | ### Kernels [1] 29 | | Sphere manifold | | 30 | |:------------- |:-------------| 31 | | sphere_kernels | This example shows the use of different kernels for the hypershere manifold S^n , used for Gaussian process regression. | 32 | | sphere_gaussian_kernel_parameters | This example shows the experimental selection of parameters for the Sphere Gaussian kernel. | 33 | 34 | | SPD manifold | | 35 | |:------------- |:-------------| 36 | | spd_kernels | This example shows the use of different kernels for the SPD manifold, used for Gaussian process regression | 37 | | spd_gaussian_kernel_parameters | This example shows the experimental selection of parameters for the SPD Affine-Invariant Gaussian kernel. | 38 | 39 | 40 | ### BO on the sphere [1] 41 | | Benchmark examples | | 42 | |:------------- |:-------------| 43 | | gabo_sphere | This example shows the use of Geometry-aware Bayesian optimization (GaBO) on the sphere S2 to optimize the Ackley function. | 44 | | bo_euclidean_sphere | This example shows the use of Euclidean Bayesian optimization on the sphere S2 to optimize the Ackley function. | 45 | 46 | | Constrained benchmark examples | | 47 | |:------------- |:-------------| 48 | | gabo_sphere_bound_constraints | This example shows the use of Geometry-aware Bayesian optimization (GaBO) on the sphere S2 to optimize the Ackley function. In this example, the search domain is bounded and represents a subspace of the manifold. | 49 | | gabo_sphere_equality_constraints | This example shows the use of Euclidean Bayesian optimization on the sphere S2 to optimize the Ackley function. In this example, the parameters must satisfy equality constraints and the search space represents a subspace of the manifold. | 50 | | gabo_sphere_equality_constraints | This example shows the use of Euclidean Bayesian optimization on the sphere S2 to optimize the Ackley function. In this example, the parameters must satisfy inequality constraints and the search space represents a subspace of the manifold. | 51 | 52 | ### BO on the SPD manifold [1] 53 | | Benchmark examples | | 54 | |:------------- |:-------------| 55 | | gabo_spd | This example shows the use of Geometry-aware Bayesian optimization (GaBO) on the SPD manifold S2_++ to optimize the Ackley function. | 56 | | bo_cholesky_spd | This example shows the use of Cholesky Bayesian optimization on the SPD manifold S2_++ to optimize the Ackley function. An Euclidean BO is applied on the Cholesky decomposition of the SPD matrices. | 57 | | bo_euclidean_spd | This example shows the use of Euclidean Bayesian optimization on the SPD manifold S2_++ to optimize the Ackley function. | 58 | 59 | ### High-dimentional BO on the sphere [2] 60 | | Benchmark examples | | 61 | |:------------- |:-------------| 62 | | hd_gabo_sphere | This example shows the use of High-Dimensional Geometry-aware Bayesian optimization (HD-GaBO) to optimize the Ackley function on a latent sphere S2 embedded in the sphere S5. | 63 | 64 | ### High-diemnsional BO on the SPD manifold [2] 65 | | Benchmark examples | | 66 | |:------------- |:-------------| 67 | | hd_gabo_spd | This example shows the use of High-Dimensional Geometry-aware Bayesian optimization (HD-GaBO) to optimize the Rosenbrock function on a latent SPD manifold S2_++ embedded in the SPD manifold S5_++. | 68 | 69 | # References 70 | If you found GaBOtorch useful, we would be grateful if you cite the following references: 71 | 72 | [[1](http://njaquier.ch/files/CoRL19_Jaquier_GaBO.pdf)] N. Jaquier, L. Rozo, S. Calinon, and M. Bürger (2019). Bayesian Optimization meets Riemannian Manifolds in Robot Learning. In Conference on Robot Learning (CoRL). 73 | 74 | [[2](http://njaquier.ch/files/HDGaBO.pdf)] N. Jaquier, and L. Rozo (2020). High-dimensional Bayesian Optimization via Nested Riemannian Manifolds. In Neural Information Processing Systems (NeurIPS). 75 | 76 | ``` 77 | @inproceedings{Jaquier19GaBO, 78 | author="Jaquier, N and Rozo, L. and Calinon, S. and B\"urger, M.", 79 | title="Bayesian Optimization meets Riemannian Manifolds in Robot Learning", 80 | booktitle="In Conference on Robot Learning (CoRL)", 81 | year="2019", 82 | pages="" 83 | } 84 | @inproceedings{Jaquier20HDGaBO, 85 | author="Jaquier, N and Rozo, L.", 86 | title="High-dimensional Bayesian Optimization via Nested Riemannian Manifolds", 87 | booktitle="Neural Information Processing Systems (NeurIPS)", 88 | year="2020", 89 | pages="" 90 | } 91 | ``` 92 | -------------------------------------------------------------------------------- /BoManifolds/BO_test_functions/nested_test_functions_spd.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from BoManifolds.Riemannian_utils.spd_utils import symmetric_matrix_to_vector_mandel 5 | from BoManifolds.Riemannian_utils.spd_utils_torch import vector_to_symmetric_matrix_mandel_torch, \ 6 | symmetric_matrix_to_vector_mandel_torch 7 | from BoManifolds.nested_mappings.nested_spd_utils import projection_from_spd_to_nested_spd 8 | 9 | ''' 10 | This file is part of the GaBOtorch library. 11 | Authors: Noemie Jaquier and Leonel Rozo, 2020 12 | License: MIT 13 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 14 | ''' 15 | 16 | 17 | def projected_function_spd(x, low_dimensional_spd_manifold, test_function, projection_matrix): 18 | """ 19 | This function computes the value of a test function defined on a projection of the original SPD manifold S^D_++ 20 | on a lower-dimensional SPD manifold S^d_++. 21 | 22 | Note: test functions and their global minimum are defined in test_function_spd.py. 23 | 24 | Parameters 25 | ---------- 26 | :param x: point on the SPD manifold (in Mandel notation) (torch tensor) 27 | :param low_dimensional_spd_manifold: d-dimensional SPD manifold (pymanopt manifold) 28 | :param test_function: test function defined on the low-dimensional SPD manifold 29 | :param projection_matrix: element of the Grassmann manifold (D x d) 30 | 31 | 32 | Returns 33 | ------- 34 | :return: value of the test function at x (numpy [1,1] array) 35 | """ 36 | # Mandel to matrix 37 | x_spd = vector_to_symmetric_matrix_mandel_torch(x) 38 | 39 | # Projection to lower dimensional SPD manifold 40 | x_spd_low_dimension = projection_from_spd_to_nested_spd(x_spd, projection_matrix) 41 | 42 | # From matrix to Mandel 43 | x_low_dimension = symmetric_matrix_to_vector_mandel_torch(x_spd_low_dimension) 44 | 45 | # Compute the test function value 46 | return test_function(x_low_dimension, low_dimensional_spd_manifold) 47 | 48 | 49 | def optimum_projected_function_spd(optimum_function, low_dimensional_spd_manifold, projection_matrix): 50 | """ 51 | This function returns the global minimum (x, f(x)) of a test function defined on a projection of a SPD manifold 52 | S^D_++ in a lower dimensional SPD manifold S^d_++. 53 | Note that, as the inverse of the projection function is not available, the location of the optimum x on the 54 | low-dimensional manifold is returned. 55 | 56 | Note: test functions and their global minimum are defined in test_function_spd.py. 57 | 58 | Parameters 59 | ---------- 60 | :param optimum_function: function returning the global minimum (x, f(x)) of the test function on S^d_++ 61 | :param low_dimensional_spd_manifold: d-dimensional SPD manifold (pymanopt manifold) 62 | :param projection_matrix: element of the Grassmann manifold (D x d) 63 | 64 | Returns 65 | ------- 66 | :return opt_x: location of the global minimum of the Ackley function on the low-dimensional SPD manifold 67 | :return opt_y: value of the global minimum of the Ackley function on the SPD manifold 68 | """ 69 | # Global minimum on nested SPD 70 | nested_opt_x, opt_y = optimum_function(low_dimensional_spd_manifold) 71 | 72 | return nested_opt_x, opt_y 73 | 74 | 75 | def cholesky_embedded_function_wrapped(x_cholesky, low_dimensional_spd_manifold, spd_manifold, test_function): 76 | """ 77 | This function is a wrapper for tests function on the SPD manifold with inputs in the form of a Cholesky 78 | decomposition. The Cholesky decomposition input is transformed into the corresponding SPD matrix which is then 79 | given as input for the given test function. 80 | 81 | Parameters 82 | ---------- 83 | :param x_cholesky: Cholesky decomposition of a SPD matrix 84 | :param low_dimensional_spd_manifold: d-dimensional SPD manifold (pymanopt manifold) 85 | :param spd_manifold: D-dimensional SPD manifold (pymanopt manifold) 86 | :param test_function: embedded function on the low-dimensional SPD manifold to be tested 87 | 88 | Returns 89 | ------- 90 | :return: value of the test function at x (numpy [1,1] array) 91 | """ 92 | # Dimension 93 | dimension = spd_manifold._n 94 | 95 | # Data to numpy 96 | torch_type = x_cholesky.dtype 97 | x_cholesky = x_cholesky.detach().numpy() 98 | 99 | if np.ndim(x_cholesky) == 2: 100 | x_cholesky = x_cholesky[0] 101 | 102 | # Verify that Cholesky decomposition does not have zero 103 | if x_cholesky.size - np.count_nonzero(x_cholesky): 104 | x_cholesky += 1e-6 105 | 106 | # Add also a small value to too-close-to-zero Cholesky decomposition elements 107 | x_cholesky[np.abs(x_cholesky) < 1e-10] += 1e-10 108 | 109 | # Reshape matrix 110 | indices = np.tril_indices(dimension) 111 | xL = np.zeros((dimension, dimension)) 112 | xL[indices] = x_cholesky 113 | 114 | # Compute SPD from Cholesky 115 | x = np.dot(xL, xL.T) 116 | # Mandel notation 117 | x = symmetric_matrix_to_vector_mandel(x) 118 | # To torch 119 | x = torch.from_numpy(x).to(dtype=torch_type) 120 | 121 | # Test function 122 | return test_function(x, low_dimensional_spd_manifold) 123 | -------------------------------------------------------------------------------- /BoManifolds/pymanopt_addons/tools/autodiff/_pytorch.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module containing functions to differentiate functions using autograd. 3 | """ 4 | try: 5 | import torch 6 | import numpy as np 7 | except ImportError: 8 | torch = None 9 | np = None 10 | 11 | from ._backend import Backend, assert_backend_available 12 | 13 | 14 | # The pytorch tape-based automatic differentiation means 15 | # that one needs to compute the function to compute its gradient. 16 | # Alas manopt uses different functions to compute the cost, 17 | # its gradient, or its hessian. Therefore it is important to 18 | # cache previous computations with the same x. 19 | 20 | 21 | class PytorchBackend(Backend): 22 | def __str__(self): 23 | return "pytorch" 24 | 25 | def is_available(self): 26 | return torch is not None and np is not None 27 | 28 | @assert_backend_available 29 | def is_compatible(self, objective, arg): 30 | """ 31 | To select the pytorch backend, use 32 | 'Problem(manifold=..,cost=...,arg=torch.Tensor())' 33 | The tensor passed as argument is used as a python object 34 | to cache recent calls to the cost, egrad, or ehess functions 35 | """ 36 | return callable(objective) and \ 37 | isinstance(arg, torch.Tensor) and arg.nelement() == 0 38 | 39 | @assert_backend_available 40 | def _compile(self, objective, cache): 41 | assert isinstance(cache, torch.Tensor) and cache.nelement() == 0 42 | if hasattr(cache, 'cost'): 43 | return cache 44 | cache.x = None # list woith torch copies of input np.arrays 45 | cache.ids = None # list with ids of the input np.arrays 46 | cache.f = None # scalar tensor with cost function 47 | cache.df = None # list of gradient tensors 48 | 49 | def _astensor(x): 50 | return x.detach() if isinstance(x, torch.Tensor) \ 51 | else torch.from_numpy(np.array(x)) 52 | 53 | def _asiterable(x): 54 | return (x, True) if type(x) in (list, tuple) or issubclass(type(x), (list, tuple)) \ 55 | else ([x], False) # NJ: updated to also return true if x is a list/tuple subclass 56 | 57 | def _notcached(x, cache): 58 | if not cache.ids: 59 | return True 60 | if len(x) != len(cache.ids): 61 | return True 62 | for (xi, cachex, cacheid) in zip(x, cache.x, cache.ids): 63 | if (id(xi) != cacheid): 64 | return True 65 | if not (_astensor(xi) == _astensor(cachex)).all().item(): 66 | return True 67 | return False 68 | 69 | def _updatex(x, cache): 70 | if _notcached(x, cache): 71 | cache.x = [_astensor(xi).clone().requires_grad_() for xi in x] 72 | cache.ids = [id(xi) for xi in x] 73 | cache.f = None 74 | cache.df = None 75 | 76 | def _updatef(seqp, cache): 77 | if not cache.f: 78 | cache.f = objective(cache.x) if seqp else objective(cache.x[0]) 79 | if not torch.is_tensor(cache.f) or len(cache.f.size()) > 0: 80 | raise ValueError("Pytorch backend wants a functions " 81 | "that returns a zerodim tensor(scalar)") 82 | 83 | def cost(x): 84 | xx, seqp = _asiterable(x) 85 | _updatex(xx, cache) 86 | _updatef(seqp, cache) 87 | return cache.f.item() 88 | 89 | def _updatedf(seqp, cache): 90 | if not cache.df: 91 | _updatef(seqp, cache) 92 | cache.df = torch.autograd.grad(cache.f, cache.x, 93 | create_graph=True, 94 | allow_unused=True) 95 | 96 | def egrad(x): 97 | xx, seqp = _asiterable(x) 98 | _updatex(xx, cache) 99 | _updatedf(seqp, cache) 100 | return [di.detach().numpy() for di in cache.df] if seqp \ 101 | else cache.df[0].detach().numpy() 102 | 103 | def ehess(x, u): 104 | xx, seqp = _asiterable(x) 105 | uu, sequ = _asiterable(u) 106 | if seqp != sequ or len(xx) != len(uu): 107 | raise ValueError("Incompatible lists in ehess") 108 | _updatex(xx, cache) 109 | _updatedf(seqp, cache) 110 | r = 0 111 | for (di, ui) in zip(cache.df, uu): 112 | n = di.nelement() 113 | r = r + torch.dot(di.view(n), _astensor(ui).view(n)) 114 | h = torch.autograd.grad([r], cache.x, 115 | retain_graph=True, allow_unused=True) 116 | return [hi.numpy() for hi in h] if seqp else h[0].numpy() 117 | 118 | cache.cost = cost 119 | cache.egrad = egrad 120 | cache.ehess = ehess 121 | return cache 122 | 123 | @assert_backend_available 124 | def compile_function(self, objective, argument): 125 | return self._compile(objective, argument).cost 126 | 127 | @assert_backend_available 128 | def compute_gradient(self, objective, argument): 129 | return self._compile(objective, argument).egrad 130 | 131 | @assert_backend_available 132 | def compute_hessian(self, objective, argument): 133 | return self._compile(objective, argument).ehess 134 | -------------------------------------------------------------------------------- /BoManifolds/manifold_optimization/numpy_list_converter.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from math import inf 3 | from typing import Dict, List, NamedTuple, Optional, Set, Tuple 4 | 5 | import numpy as np 6 | import torch 7 | from torch.nn import Module 8 | 9 | from botorch.optim.numpy_converter import ParameterBounds, TorchAttr 10 | 11 | ''' 12 | This file is part of the GaBOtorch library. 13 | Authors: Noemie Jaquier and Leonel Rozo, 2020 14 | License: MIT 15 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 16 | 17 | The functions of this file are based on the functions of botorch (in botorch.optim.numpy_converter.py). 18 | ''' 19 | 20 | 21 | def module_to_list_of_array( 22 | module: Module, 23 | bounds: Optional[ParameterBounds] = None, 24 | exclude: Optional[Set[str]] = None, 25 | ) -> Tuple[list, Dict[str, TorchAttr], Optional[list]]: 26 | """ 27 | This function extract named parameters from a module into a list of numpy arrays. It only extracts parameters with 28 | requires_grad, since it is meant for optimizing. 29 | 30 | Parameters 31 | ---------- 32 | :param module: A module with parameters. May specify parameter constraints in a `named_parameters_and_constraints` 33 | method. 34 | 35 | Optional parameters 36 | ------------------- 37 | :param bounds: A ParameterBounds dictionary mapping parameter names to tuples of lower and upper bounds. 38 | Bounds specified here take precedence over bounds on the same parameters specified in the constraints 39 | registered with the module. 40 | :param exclude: A list of parameter names that are to be excluded from extraction. 41 | 42 | Returns 43 | ------- 44 | :return: 3-element tuple containing 45 | - The parameter values as a list of numpy arrays. 46 | - An ordered dictionary with the name and tensor attributes of each parameter. 47 | - A list of `2 x n_params` numpy array with lower and upper bounds if at least one constraint is finite, and 48 | None otherwise. 49 | 50 | Example: 51 | mll = ExactMarginalLogLikelihood(model.likelihood, model) 52 | parameter_array, property_dict, bounds_out = module_to_array(mll) 53 | """ 54 | x: List[np.ndarray] = [] 55 | lower: List[np.ndarray] = [] 56 | upper: List[np.ndarray] = [] 57 | property_dict = OrderedDict() 58 | exclude = set() if exclude is None else exclude 59 | 60 | # get bounds specified in model (if any) 61 | bounds_: ParameterBounds = {} 62 | if hasattr(module, "named_parameters_and_constraints"): 63 | for param_name, _, constraint in module.named_parameters_and_constraints(): 64 | if constraint is not None and not constraint.enforced: 65 | bounds_[param_name] = constraint.lower_bound, constraint.upper_bound 66 | 67 | # update with user-supplied bounds (overwrites if already exists) 68 | if bounds is not None: 69 | bounds_.update(bounds) 70 | 71 | for p_name, t in module.named_parameters(): 72 | if p_name not in exclude and t.requires_grad: 73 | property_dict[p_name] = TorchAttr( 74 | shape=t.shape, dtype=t.dtype, device=t.device 75 | ) 76 | if t.ndim > 1 and t.shape[0] > 1: # if the variable is a matrix, keep its shape 77 | x.append(t.detach().cpu().double().clone().numpy()) 78 | else: # Vector case 79 | x.append(t.detach().view(-1).cpu().double().clone().numpy()) 80 | # construct bounds 81 | if bounds_: 82 | l_, u_ = bounds_.get(p_name, (-inf, inf)) 83 | if torch.is_tensor(l_): 84 | l_ = l_.cpu().detach() 85 | if torch.is_tensor(u_): 86 | u_ = u_.cpu().detach() 87 | # check for Nones here b/c it may be passed in manually in bounds 88 | lower.append(np.full(t.nelement(), l_ if l_ is not None else -inf)) 89 | upper.append(np.full(t.nelement(), u_ if u_ is not None else inf)) 90 | 91 | return x, property_dict, bounds 92 | 93 | 94 | def set_params_with_list_of_array( 95 | module: Module, x: list, property_dict: Dict[str, TorchAttr] 96 | ) -> Module: 97 | """ 98 | This function sets module parameters with values from numpy array. 99 | 100 | Parameters 101 | ---------- 102 | :param module: a module with parameters to be set 103 | :param x: the numpy array containing parameter values 104 | :param property_dict: a dictionary of parameter names and torch attributes as returned by module_to_array. 105 | 106 | Returns 107 | ------- 108 | :return: a module with parameters updated in-place. 109 | 110 | Example: 111 | mll = ExactMarginalLogLikelihood(model.likelihood, model) 112 | parameter_array, property_dict, bounds_out = module_to_array(mll) 113 | parameter_array += 0.1 # perturb parameters (for example only) 114 | mll = set_params_with_array(mll, parameter_array, property_dict) 115 | """ 116 | param_dict = OrderedDict(module.named_parameters()) 117 | idx = 0 118 | for p_name, attrs in property_dict.items(): 119 | # Construct the new tensor 120 | if len(attrs.shape) == 0: # deal with scalar tensors 121 | new_data = torch.tensor(x[idx][0], dtype=attrs.dtype, device=attrs.device) 122 | else: 123 | new_data = torch.tensor(x[idx], dtype=attrs.dtype, device=attrs.device).view(*attrs.shape) 124 | idx += 1 125 | # Update corresponding parameter in-place. Disable autograd to update. 126 | param_dict[p_name].requires_grad_(False) 127 | param_dict[p_name].copy_(new_data) 128 | param_dict[p_name].requires_grad_(True) 129 | return module 130 | -------------------------------------------------------------------------------- /BoManifolds/nested_mappings/nested_spd_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from BoManifolds.Riemannian_utils.spd_utils_torch import sqrtm_torch 4 | 5 | ''' 6 | This file is part of the GaBOtorch library. 7 | Authors: Noemie Jaquier and Leonel Rozo, 2020 8 | License: MIT 9 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 10 | ''' 11 | 12 | 13 | def projection_from_spd_to_nested_spd(x_spd, projection_matrix): 14 | """ 15 | This function projects data from the SPD manifold S^D_++ to a lower dimensional SPD manifold S^d_++. 16 | This is done by multiplying each DxD SPD data on the left and on the right by an orthogonal matrix belonging to the 17 | Grassmann manifold G(D,d). The resulting SPD data of dimension dxd with d < D are given by Y = W'XW, 18 | with X \in S^D_++ and W \in G(D,d). 19 | This projection was proposed in ["Dimensionality Reduction on SPD Manifolds: The Emergence of Geometry-Aware 20 | Methods", M. Harandi, S. Salzmann and R. Harley. PAMI 2018]. 21 | 22 | Parameters 23 | ---------- 24 | :param x_spd: SPD matrix or set of SPD matrices (D x D or b1 x ... x b_k x D x D) 25 | :param projection_matrix: element of the Grassmann manifold (D x d) 26 | 27 | Returns 28 | ------- 29 | :return: low dimensional SPD matrix or set of low dimensional SPD matrices (d x d or b1 x ... x b_k x d x d) 30 | """ 31 | low_dimension = projection_matrix.shape[1] 32 | 33 | # Reshape x_spd to N x d x d format 34 | init_shape = list(x_spd.shape) 35 | dimension = x_spd.shape[-1] 36 | x_spd = x_spd.view(-1, dimension, dimension) 37 | nb_data = x_spd.shape[0] 38 | 39 | # Augment projection matrix 40 | projection_matrix = projection_matrix.unsqueeze(0) 41 | projection_matrix = projection_matrix.repeat([nb_data, 1, 1]) 42 | 43 | # Project data to SPD matrix of lower dimension 44 | x_spd_low_dimension = torch.bmm(torch.bmm(projection_matrix.transpose(-2, -1), x_spd), projection_matrix) 45 | 46 | # Back to initial shape 47 | new_shape = init_shape[:-2] + [low_dimension, low_dimension] 48 | return x_spd_low_dimension.view(new_shape) 49 | 50 | 51 | def projection_from_nested_spd_to_spd(x_spd_low_dimension, projection_matrix, projection_complement_matrix, 52 | bottom_spd_matrix, contraction_matrix): 53 | """ 54 | This function is an approximation of the inverse of the function projection_from_spd_to_nested_spd. 55 | It maps low-dimensional SPD matrices to the original SPD space. 56 | To do so, we consider that the nested SPD matrix Y = W'XW is the d x d upper-left part of the rotated matrix 57 | Xr = R'XR, where R = [W, V] and Xr = [Y B; B' C]. 58 | In order to recover X, we assume a constant SPD matrix C, and B = Y^0.5*K*C^0.5 to ensure the PDness of Xr, with 59 | K a contraction matrix (norm(K) <=1). We first reconstruct Xr, and then X as X = RXrR'. 60 | Note that W and V belong to Grassmann manifolds, W \in G(D,d) and V \in G(D,D-d), and must have orthonormal columns, 61 | so that W'V = 0. 62 | 63 | Parameters 64 | ---------- 65 | :param x_spd_low_dimension: low dimensional SPD matrix or set of low dimensional SPD matrices (d x d or N x d x d) 66 | :param projection_matrix: element of the Grassmann manifold (D x d) 67 | :param projection_complement_matrix: element of the Grassmann manifold (D x D-d) 68 | Note that we must have torch.mm(projection_complement_matrix.T, projection_matrix) = 0. 69 | :param bottom_spd_matrix: bottom-right part of the rotated SPD matrix (D-d, D-d) 70 | :param contraction_matrix: matrix whose norm is <=1 (d x D-d) 71 | 72 | Returns 73 | ------- 74 | :return: SPD matrix or set of SPD matrices (D x D or N x D x D) 75 | """ 76 | 77 | # Type 78 | torch_type = x_spd_low_dimension.dtype 79 | 80 | # Number of data 81 | if x_spd_low_dimension.ndim == 2: 82 | nb_data = 1 83 | x_spd_low_dimension = torch.unsqueeze(x_spd_low_dimension, 0) 84 | one_data_output = True # To return a 2D SPD matrix 85 | else: 86 | nb_data = x_spd_low_dimension.shape[0] 87 | one_data_output = False # To return a 3D array of nb_data SPD matrices 88 | 89 | # SPD matrices array initialization 90 | dimension = projection_matrix.shape[0] 91 | x_spd = torch.zeros((nb_data, dimension, dimension), dtype=torch_type) 92 | 93 | # Compute rotation matrix 94 | rotation_matrix = torch.cat((projection_matrix, projection_complement_matrix), dim=1) 95 | # inverse_rotation_matrix = torch.inverse(rotation_matrix) 96 | 97 | # Compute sqrtm of the bottom block 98 | sqrt_bottom_spd_matrix = sqrtm_torch(bottom_spd_matrix) 99 | 100 | # Solve the equation for each data 101 | for n in range(nb_data): 102 | # Compute sqrtm of the top block 103 | sqrt_top_spd_matrix = sqrtm_torch(x_spd_low_dimension[n]) 104 | 105 | # Side block 106 | side_block = torch.mm(torch.mm(sqrt_top_spd_matrix, contraction_matrix), sqrt_bottom_spd_matrix) 107 | 108 | # Reconstruct full SPD matrix 109 | x_spd_reconstructed = torch.cat((torch.cat((x_spd_low_dimension[n], side_block), dim=1), 110 | torch.cat((side_block.T, bottom_spd_matrix), dim=1)), dim=0) 111 | 112 | # Rotate the matrix back to finalize the reconstruction 113 | x_spd[n] = torch.mm(rotation_matrix, torch.mm(x_spd_reconstructed, rotation_matrix.T)) 114 | 115 | if one_data_output: 116 | x_spd = x_spd[0] 117 | 118 | return x_spd 119 | -------------------------------------------------------------------------------- /BoManifolds/Riemannian_utils/sphere_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy as sc 3 | 4 | ''' 5 | This file is part of the GaBOtorch library. 6 | Authors: Noemie Jaquier and Leonel Rozo, 2020 7 | License: MIT 8 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 9 | 10 | The functions of this file are based on the function of botorch (in botorch.optim). 11 | ''' 12 | 13 | 14 | def expmap(u, x0): 15 | """ 16 | This function maps a vector u lying on the tangent space of x0 into the manifold. 17 | 18 | Parameters 19 | ---------- 20 | :param u: vector in the tangent space 21 | :param x0: basis point of the tangent space 22 | 23 | Returns 24 | ------- 25 | :return: x: point on the manifold 26 | """ 27 | if np.ndim(x0) < 2: 28 | x0 = x0[:, None] 29 | 30 | if np.ndim(u) < 2: 31 | u = u[:, None] 32 | 33 | norm_u = np.sqrt(np.sum(u*u, axis=0)) 34 | x = x0 * np.cos(norm_u) + u * np.sin(norm_u)/norm_u 35 | 36 | x[:, norm_u < 1e-16] = x0 37 | 38 | return x 39 | 40 | 41 | def logmap(x, x0): 42 | """ 43 | This functions maps a point lying on the manifold into the tangent space of a second point of the manifold. 44 | 45 | Parameters 46 | ---------- 47 | :param x: point on the manifold 48 | :param x0: basis point of the tangent space where x will be mapped 49 | 50 | Returns 51 | ------- 52 | :return: u: vector in the tangent space of x0 53 | """ 54 | if np.ndim(x0) < 2: 55 | x0 = x0[:, None] 56 | 57 | if np.ndim(x) < 2: 58 | x = x[:, None] 59 | 60 | theta = np.arccos(np.maximum(np.minimum(np.dot(x0.T, x), 1.), -1.)) 61 | u = (x - x0 * np.cos(theta)) * theta/np.sin(theta) 62 | 63 | u[:, theta[0] < 1e-16] = np.zeros((u.shape[0], 1)) 64 | 65 | return u 66 | 67 | 68 | def sphere_distance(x, y): 69 | """ 70 | This function computes the Riemannian distance between two points on the manifold. 71 | 72 | Parameters 73 | ---------- 74 | :param x: point on the manifold 75 | :param y: point on the manifold 76 | 77 | Returns 78 | ------- 79 | :return: distance: manifold distance between x and y 80 | """ 81 | if np.ndim(x) < 2: 82 | x = x[:, None] 83 | 84 | if np.ndim(y) < 2: 85 | y = y[:, None] 86 | 87 | # Compute the inner product (should be [-1,1]) 88 | inner_product = np.dot(x.T, y) 89 | inner_product = np.max(np.min(inner_product, 1), -1) 90 | return np.arccos(inner_product) 91 | 92 | 93 | def parallel_transport_operator(x1, x2): 94 | """ 95 | This function computes the parallel transport operator from x1 to x2. 96 | Transported vectors can be computed as u.dot(v). 97 | 98 | Parameters 99 | ---------- 100 | :param x1: point on the manifold 101 | :param x2: point on the manifold 102 | 103 | Returns 104 | ------- 105 | :return: operator: parallel transport operator 106 | """ 107 | if np.sum(x1-x2) == 0.: 108 | return np.eye(x1.shape[0]) 109 | else: 110 | if np.ndim(x1) < 2: 111 | x1 = x1[:, None] 112 | 113 | if np.ndim(x2) < 2: 114 | x2 = x2[:, None] 115 | 116 | x_dir = logmap(x2, x1) 117 | norm_x_dir = np.sqrt(np.sum(x_dir*x_dir, axis=0)) 118 | normalized_x_dir = x_dir / norm_x_dir 119 | u = np.dot(-x1 * np.sin(norm_x_dir), normalized_x_dir.T) + \ 120 | np.dot(normalized_x_dir * np.cos(norm_x_dir), normalized_x_dir.T) + np.eye(x_dir.shape[0]) - \ 121 | np.dot(normalized_x_dir, normalized_x_dir.T) 122 | 123 | return u 124 | 125 | 126 | def karcher_mean_sphere(data, nb_iter=10): 127 | """ 128 | This function computes the mean of points lying on the manifold (Fréchet/Karcher mean). 129 | 130 | Parameters 131 | ---------- 132 | :param data: data points lying on the manifold 133 | 134 | Optional parameters 135 | ------------------- 136 | :param nb_iter: number of iterations 137 | 138 | Returns 139 | ------- 140 | :return: m: mean of the datapoints 141 | """ 142 | # Initialize the mean as equal to the first datapoint 143 | m = data[:, 0] 144 | for i in range(nb_iter): 145 | data_tgt = logmap(data, m) 146 | m_tgt = np.mean(data_tgt, axis=1) 147 | m = expmap(m_tgt, m) 148 | 149 | return m 150 | 151 | 152 | def get_axisangle(d): 153 | """ 154 | Gets axis-angle representation of a point lying on a unit sphere 155 | Based on the function of riepybdlib (https://gitlab.martijnzeestraten.nl/martijn/riepybdlib) 156 | 157 | Parameters 158 | ---------- 159 | :param d: point on the sphere 160 | 161 | Returns 162 | ------- 163 | :return: axis, angle: corresponding axis and angle representation 164 | """ 165 | norm = np.sqrt(d[0]**2 + d[1]**2) 166 | if norm < 1e-6: 167 | return np.array([0, 0, 1]), 0 168 | else: 169 | vec = np.array([-d[1], d[0], 0]) 170 | return vec/norm, np.arccos(d[2]) 171 | 172 | 173 | def rotation_from_sphere_points(x, y): 174 | """ 175 | Gets the rotation matrix that moves x to y in the geodesic path on the sphere. 176 | Based on the equations of "Analysis of principal nested spheres", Sung et al. 2012 (appendix) 177 | 178 | Parameters 179 | ---------- 180 | :param x: point on a sphere 181 | :param y: point on a sphere 182 | 183 | Returns 184 | ------- 185 | :return: rotation matrix 186 | """ 187 | if np.ndim(x) < 2: 188 | x = x[:, None] 189 | if np.ndim(y) < 2: 190 | y = y[:, None] 191 | 192 | dim = x.shape[0] 193 | 194 | in_prod = np.dot(x.T, y) 195 | in_prod = np.max(np.min(in_prod, 1), -1) 196 | c_vec = x - y * in_prod 197 | c_vec = c_vec / np.linalg.norm(c_vec) 198 | 199 | R = np.eye(dim) + np.sin(np.arccos(in_prod)) * (np.dot(y, c_vec.T) - np.dot(c_vec, y.T)) + (in_prod - 1.) * (np.dot(y, y.T) + np.dot(c_vec, c_vec.T)) 200 | 201 | return R 202 | 203 | -------------------------------------------------------------------------------- /BoManifolds/kernel_utils/kernels_nested_sphere.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import gpytorch 4 | from gpytorch.constraints import GreaterThan, Interval 5 | 6 | import pymanopt.manifolds as pyman_man 7 | 8 | from BoManifolds.Riemannian_utils.sphere_utils_torch import sphere_distance_torch 9 | from BoManifolds.nested_mappings.nested_spheres_utils import projection_from_sphere_to_subsphere 10 | 11 | ''' 12 | This file is part of the GaBOtorch library. 13 | Authors: Noemie Jaquier and Leonel Rozo, 2020 14 | License: MIT 15 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 16 | ''' 17 | 18 | 19 | class NestedSphereGaussianKernel(gpytorch.kernels.Kernel): 20 | """ 21 | Instances of this class represent a Gaussian (RBF) covariance matrix between projected input points from a 22 | high-dimensional sphere manifold to a low-dimensional sphere manifold via nested-spheres projections. 23 | 24 | Attributes 25 | ---------- 26 | self.beta_min, minimum value of the inverse square lengthscale parameter beta 27 | self.dim, dimension of the ambient high-dimensional sphere manifold 28 | self.latent_dim, dimension of the latent low-dimensional sphere manifold 29 | 30 | Properties 31 | ---------- 32 | self.beta, inverse square lengthscale parameter beta 33 | self.sphere_axes, axes of the nested spheres belonging to [Sd, Sd-1, ..., Sd-r+1] 34 | self.sphere_distances_to_axes, distances from the axes w.r.t each point of the nested spheres of [Sd, Sd-1, ..., 35 | Sd-r+1] 36 | 37 | Methods 38 | ------- 39 | forward(point1_in_the_sphere, point2_in_the_sphere, diagonal_matrix_flag=False, **params) 40 | 41 | Static methods 42 | -------------- 43 | """ 44 | def __init__(self, dim, latent_dim, beta_min, beta_prior=None, **kwargs): 45 | """ 46 | Initialisation. 47 | 48 | Parameters 49 | ---------- 50 | :param dim: dimension of the ambient high-dimensional sphere manifold 51 | :param latent_dim: dimension of the latent low-dimensional sphere manifold 52 | :param beta_min: minimum value of the inverse square lengthscale parameter beta 53 | 54 | Optional parameters 55 | ------------------- 56 | :param beta_prior: prior on the parameter beta 57 | :param kwargs: additional arguments 58 | """ 59 | super(NestedSphereGaussianKernel, self).__init__(has_lengthscale=False, **kwargs) 60 | self.beta_min = beta_min 61 | self.dim = dim 62 | self.latent_dim = latent_dim 63 | 64 | # Add beta parameter, corresponding to the inverse of the lengthscale parameter. 65 | beta_num_dims = 1 66 | self.register_parameter(name="raw_beta", 67 | parameter=torch.nn.Parameter(torch.zeros(*self.batch_shape, 1, beta_num_dims))) 68 | 69 | if beta_prior is not None: 70 | self.register_prior("beta_prior", beta_prior, lambda: self.beta, lambda v: self._set_beta(v)) 71 | 72 | # A GreaterThan constraint is defined on the lengthscale parameter to guarantee positive-definiteness. 73 | # The value of beta_min can be determined e.g. experimentally. 74 | self.register_constraint("raw_beta", GreaterThan(self.beta_min)) 75 | 76 | # Add projection parameters 77 | for d in range(self.dim, self.latent_dim, -1): 78 | # Axes parameters 79 | # Register 80 | axis_name = "raw_axis_S" + str(d) 81 | # axis = torch.zeros(1, d) 82 | # axis[:, 0] = 1 83 | axis = torch.randn(1, d) 84 | axis = axis / torch.norm(axis) 85 | axis = axis.repeat(*self.batch_shape, 1, 1) 86 | self.register_parameter(name=axis_name, 87 | parameter=torch.nn.Parameter(axis)) 88 | # Corresponding manifold 89 | axis_manifold_name = "raw_axis_S" + str(d) + "_manifold" 90 | setattr(self, axis_manifold_name, pyman_man.Sphere(d)) 91 | 92 | # Distance to axis (constant), fixed at pi/2 93 | self.distances_to_axis = [np.pi/2 *torch.ones(1, 1) for d in range(self.dim, self.latent_dim, -1)] 94 | 95 | @property 96 | def beta(self): 97 | return self.raw_beta_constraint.transform(self.raw_beta) 98 | 99 | @beta.setter 100 | def beta(self, value): 101 | self._set_beta(value) 102 | 103 | def _set_beta(self, value): 104 | if not torch.is_tensor(value): 105 | value = torch.as_tensor(value).to(self.raw_beta) 106 | self.initialize(raw_beta=self.raw_beta_constraint.inverse_transform(value)) 107 | # self.initialize(**{'raw_beta': self.raw_beta_constraint.inverse_transform(value)}) 108 | 109 | @property 110 | def axes(self): 111 | return [self._parameters["raw_axis_S" + str(d)] for d in range(self.dim, self.latent_dim, -1)] 112 | 113 | @axes.setter 114 | def axes(self, values_list): 115 | self._set_axes(values_list) 116 | 117 | def _set_axes(self, values_list): 118 | for d in range(self.dim, self.latent_dim, -1): 119 | value = values_list[self.dim-d] 120 | axis_name = "raw_axis_S" + str(d) 121 | if not torch.is_tensor(value): 122 | value = torch.as_tensor(value).to(self._parameters[axis_name]) 123 | self.initialize(**{axis_name: value}) 124 | 125 | def forward(self, x1, x2, diag=False, **params): 126 | """ 127 | Computes the Gaussian kernel matrix between inputs x1 and x2 belonging to the ambient high-dim. sphere manifold. 128 | 129 | Parameters 130 | ---------- 131 | :param x1: input points on the sphere 132 | :param x2: input points on the sphere 133 | 134 | Optional parameters 135 | ------------------- 136 | :param diag: Should we return the whole distance matrix, or just the diagonal? If True, we must have `x1 == x2` 137 | :param params: additional parameters 138 | 139 | Returns 140 | ------- 141 | :return: kernel matrix between p(x1) and p(x2) 142 | """ 143 | # Projection from the sphere to the latent low-dimensional sphere 144 | px1 = projection_from_sphere_to_subsphere(x1, self.axes, self.distances_to_axis)[-1] 145 | px2 = projection_from_sphere_to_subsphere(x2, self.axes, self.distances_to_axis)[-1] 146 | 147 | # Compute distance 148 | distance = sphere_distance_torch(px1, px2, diag=diag) 149 | distance2 = torch.mul(distance, distance) 150 | # Kernel 151 | exp_component = torch.exp(- distance2.mul(self.beta)) 152 | return exp_component 153 | -------------------------------------------------------------------------------- /BoManifolds/pymanopt_addons/problem.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module containing pymanopt problem class. Use this to build a problem 3 | object to feed to one of the solvers. 4 | """ 5 | from __future__ import print_function 6 | 7 | # from pymanopt.tools.autodiff import (AutogradBackend, TensorflowBackend, 8 | # TheanoBackend, PytorchBackend) 9 | 10 | from BoManifolds.pymanopt_addons.tools.autodiff import (AutogradBackend, TensorflowBackend, 11 | TheanoBackend, PytorchBackend) 12 | 13 | 14 | class Problem(object): 15 | """ 16 | Problem class for setting up a problem to feed to one of the 17 | pymanopt solvers. 18 | 19 | Attributes: 20 | - manifold 21 | Manifold to optimize over. 22 | - cost 23 | A callable which takes an element of manifold and returns a 24 | real number, or a symbolic Theano or TensorFlow expression. 25 | In case of a symbolic expression, the gradient (and if 26 | necessary the Hessian) are computed automatically if they are 27 | not explicitly given. We recommend you take this approach 28 | rather than calculating gradients and Hessians by hand. 29 | - grad 30 | grad(x) is the gradient of cost at x. This must take an 31 | element X of manifold and return an element of the tangent space 32 | to manifold at X. This is usually computed automatically and 33 | doesn't need to be set by the user. 34 | - hess 35 | hess(x, a) is the directional derivative of grad at x, in 36 | direction a. It should return an element of the tangent 37 | space to manifold at x. 38 | - egrad 39 | The 'Euclidean gradient', egrad(x) should return the grad of 40 | cost in the usual sense, i.e. egrad(x) need not lie in the 41 | tangent space. 42 | - ehess 43 | The 'Euclidean Hessian', ehess(x, a) should return the 44 | directional derivative of egrad at x in direction a. This 45 | need not lie in the tangent space. 46 | - arg 47 | A symbolic (tensor) variable with respect to which you would like 48 | to optimize. Its type (together with the type of the cost argument) 49 | defines the autodiff backend used. 50 | - verbosity (2) 51 | Level of information printed by the solver while it operates, 0 52 | is silent, 2 is most information. 53 | """ 54 | def __init__(self, manifold, cost, egrad=None, ehess=None, grad=None, 55 | hess=None, arg=None, precon=None, verbosity=2): 56 | self.manifold = manifold 57 | # We keep a reference to the original cost function in case we want to 58 | # call the `prepare` method twice (for instance, after switching from 59 | # a first- to second-order method). 60 | self._cost = None 61 | self._original_cost = cost 62 | self._egrad = egrad 63 | self._ehess = ehess 64 | self._grad = grad 65 | self._hess = hess 66 | self._arg = arg 67 | self._backend = None 68 | 69 | if precon is None: 70 | def precon(x, d): 71 | return d 72 | self.precon = precon 73 | 74 | self.verbosity = verbosity 75 | 76 | self._backends = list( 77 | filter(lambda b: b.is_available(), [ 78 | TheanoBackend(), 79 | PytorchBackend(), 80 | AutogradBackend(), 81 | TensorflowBackend() 82 | ])) 83 | self._backend = None 84 | 85 | @property 86 | def backend(self): 87 | if self._backend is None: 88 | for backend in self._backends: 89 | if backend.is_compatible(self._original_cost, self._arg): 90 | self._backend = backend 91 | break 92 | else: 93 | backend_names = [str(backend) for backend in self._backends] 94 | if self.verbosity >= 1: 95 | print(backend_names) 96 | raise ValueError( 97 | "Cannot determine autodiff backend from cost function of " 98 | "type `{:s}`. Available backends are: {:s}".format( 99 | self._original_cost.__class__.__name__, 100 | ", ".join(backend_names))) 101 | return self._backend 102 | 103 | @property 104 | def cost(self): 105 | if (self._cost is None and callable(self._original_cost) and 106 | not AutogradBackend().is_available() and 107 | not PytorchBackend().is_available()): 108 | self._cost = self._original_cost 109 | 110 | elif self._cost is None: 111 | if self.verbosity >= 1: 112 | print("Compiling cost function...") 113 | self._cost = self.backend.compile_function(self._original_cost, 114 | self._arg) 115 | 116 | return self._cost 117 | 118 | @property 119 | def egrad(self): 120 | if self._egrad is None: 121 | if self.verbosity >= 1: 122 | print("Computing gradient of cost function...") 123 | egrad = self.backend.compute_gradient(self._original_cost, 124 | self._arg) 125 | self._egrad = egrad 126 | return self._egrad 127 | 128 | @property 129 | def grad(self): 130 | if self._grad is None: 131 | # Explicit access forces computation/compilation if necessary. 132 | egrad = self.egrad 133 | 134 | def grad(x): 135 | return self.manifold.egrad2rgrad(x, egrad(x)) 136 | self._grad = grad 137 | return self._grad 138 | 139 | @property 140 | def ehess(self): 141 | if self._ehess is None: 142 | if self.verbosity >= 1: 143 | print("Computing Hessian of cost function...") 144 | ehess = self.backend.compute_hessian(self._original_cost, 145 | self._arg) 146 | self._ehess = ehess 147 | return self._ehess 148 | 149 | @property 150 | def hess(self): 151 | if self._hess is None: 152 | # Explicit access forces computation if necessary. 153 | ehess = self.ehess 154 | 155 | def hess(x, a): 156 | return self.manifold.ehess2rhess( 157 | x, self.egrad(x), ehess(x, a), a) 158 | self._hess = hess 159 | return self._hess 160 | -------------------------------------------------------------------------------- /examples/kernels/sphere/sphere_gaussian_kernel_parameters.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | import matplotlib.pyplot as plt 5 | from mpl_toolkits.mplot3d import Axes3D 6 | 7 | from BoManifolds.kernel_utils.kernels_sphere import SphereGaussianKernel 8 | 9 | from BoManifolds.plot_utils.manifolds_plots import plot_sphere 10 | 11 | plt.rcParams['text.usetex'] = True # use Latex font for plots 12 | plt.rcParams['text.latex.preamble'] = r'\usepackage{bm}' 13 | """ 14 | This example shows the experimental selection of parameters for the Sphere Gaussian kernel. To do so, a random sampling 15 | is carried out from different Gaussian distributions on the manifold (random mean and identity covariance 16 | matrix). 17 | After, the corresponding kernel matrix is computed for a range of values for $beta$, with $theta = 1$. This process is 18 | repeated several times (in this case, 10) for each value of $beta$. 19 | A minimum value of $beta$ is set to the lowest $beta$ value leading to all the kernel matrices to be positive-definite. 20 | 21 | This file is part of the GaBOtorch library. 22 | Authors: Noemie Jaquier and Leonel Rozo, 2020 23 | License: MIT 24 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 25 | """ 26 | 27 | 28 | # Align y axis for double y axis plot 29 | def align_y_axis(axis1, v1, axis2, v2): 30 | _, y1 = axis1.transData.transform((0, v1)) 31 | _, y2 = axis2.transData.transform((0, v2)) 32 | inv = axis2.transData.inverted() 33 | _, dy = inv.transform((0, 0)) - inv.transform((0, y1-y2)) 34 | miny, maxy = axis2.get_ylim() 35 | axis2.set_ylim(miny+dy, maxy+dy) 36 | 37 | 38 | if __name__ == "__main__": 39 | # Generate random data in the manifold 40 | dim = 3 # Dimension of the manifold 41 | nb_samples = 500 # Total number of samples 42 | nb_sources = 10 # Number of Gaussians to sample from 43 | nb_trials = 20 # Number of set of random points to test 44 | fact_cov = 1. # Do not put it too big, otherwise the projected data on the manifold can be really far 45 | 46 | # Origin in the manifold 47 | origin_man = np.array([0, 0, 1]) 48 | 49 | # Define the range of parameter for the kernel 50 | nb_params = 30 51 | if dim == 3: 52 | betas = np.logspace(0, 5, nb_params) 53 | elif dim == 4: 54 | betas = np.logspace(0, 2, nb_params) 55 | elif 5 <= dim <= 10: 56 | betas = np.logspace(-0.2, 1.5, nb_params) 57 | elif dim > 10 : 58 | betas = np.logspace(-1.5, 0.5, nb_params) 59 | 60 | min_eigval_trials = [] 61 | 62 | for trial in range(nb_trials): 63 | print('Trial ', trial) 64 | 65 | # Means and covariances to generate random data 66 | mean = [np.random.randn(dim) for i in range(nb_sources)] 67 | mean = np.array(mean) 68 | mean = mean / np.linalg.norm(mean, axis=1)[:, None] 69 | cov = fact_cov * np.eye(dim) 70 | 71 | # This way of sampling is not extremely rigorous. 72 | # The sampling should be done on the tangent space and projected on the manifold. 73 | # However, it should be sufficient for the current purpose. 74 | # Sample data 75 | data = [np.random.multivariate_normal(mean[i], cov, int(nb_samples / nb_sources)).T for i in range(nb_sources)] 76 | 77 | # Project samples on the manifold 78 | data_man_tmp = [] 79 | for i in range(nb_sources): 80 | for n in range(int(nb_samples / nb_sources)): 81 | data_man_tmp.append(data[i][:, n] / np.linalg.norm(data[i][:, n])[None]) 82 | data_man = np.array(data_man_tmp) 83 | nb_data = data_man.shape[0] 84 | 85 | # Define and compute the kernel for the parameters 86 | K = [] 87 | min_eigval = [] 88 | 89 | for i in range(nb_params): 90 | # Create kernel instance and set beta 91 | k = SphereGaussianKernel(beta_min=0.0) 92 | k.beta = betas[i] 93 | 94 | # Compute the kernel 95 | Ktmp = k.forward(torch.tensor(data_man), torch.tensor(data_man)).detach().numpy() 96 | K.append(Ktmp) 97 | 98 | # Compute the eigenvalues 99 | eigvals, _ = np.linalg.eig(Ktmp) 100 | eigvals = np.real(eigvals) 101 | min_eigval.append(np.min(eigvals)) 102 | 103 | # Minimum eigenvalue of the kernel 104 | min_eigval = np.array(min_eigval) 105 | 106 | min_eigval_trials.append(min_eigval) 107 | 108 | # Compute percentage of PD kernels 109 | pd_kernels = np.array(min_eigval_trials) 110 | pd_kernels[pd_kernels > 0] = 1. 111 | pd_kernels[pd_kernels <= 0] = 0. 112 | percentage_pd_kernels = np.sum(pd_kernels, axis=0) / nb_trials 113 | 114 | print(betas) 115 | print(percentage_pd_kernels) 116 | 117 | # Plot input data if dim is 3 118 | if dim == 3: 119 | # 3D figure 120 | fig = plt.figure(figsize=(5, 5)) 121 | ax = Axes3D(fig) 122 | 123 | # Make the panes transparent 124 | ax.xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) 125 | ax.yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) 126 | ax.zaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) 127 | 128 | # Make the grid lines transparent 129 | ax.xaxis._axinfo["grid"]['color'] = (1, 1, 1, 0) 130 | ax.yaxis._axinfo["grid"]['color'] = (1, 1, 1, 0) 131 | ax.zaxis._axinfo["grid"]['color'] = (1, 1, 1, 0) 132 | 133 | # Remove axis 134 | ax._axis3don = False 135 | 136 | # Initial view 137 | ax.view_init(elev=10, azim=-20.) # (default: elev=30, azim=-60) 138 | # ax.view_init(elev=10, azim=50.) # (default: elev=30, azim=-60) 139 | 140 | # Plot sphere 141 | plot_sphere(ax, alpha=0.4) 142 | 143 | # Plot training data on the manifold 144 | plt.plot(data_man[:, 0], data_man[:, 1], data_man[:, 2], color='k', marker='.', linewidth=0., markersize=3.) 145 | 146 | # Plot mean of generated data 147 | plt.plot(mean[:, 0], mean[:, 1], mean[:, 2], color='r', marker='.', linewidth=0., markersize=6.) 148 | 149 | plt.title(r'Training data', size=20) 150 | 151 | # Plot minimum eigenvalue in function of the kernel parameter 152 | min_eigval_trials = np.array(min_eigval_trials) 153 | min_eigval_mean = np.mean(min_eigval_trials, axis=0) 154 | min_eigval_std = np.std(min_eigval_trials, axis=0) 155 | 156 | fig = plt.figure(figsize=(5, 5)) 157 | ax = plt.gca() 158 | plt.fill_between(np.log10(betas), min_eigval_mean - min_eigval_std, min_eigval_mean + min_eigval_std, alpha=0.2, 159 | color='orchid') 160 | plt.plot(np.log10(betas), min_eigval_mean, marker='o', color='orchid') 161 | plt.plot(np.log10(betas), np.zeros(nb_params), color='k') 162 | ax.set_xlabel(r'$\log_{10}(\beta)$') 163 | ax.set_ylabel(r'$\lambda_{\min}(\bm{K})$') 164 | 165 | # Plot percentage of positive kernel in function of the kernel parameter 166 | fig = plt.figure(figsize=(5, 5)) 167 | ax = plt.gca() 168 | plt.plot(np.log10(betas), percentage_pd_kernels, marker='o', color='darkblue') 169 | plt.plot(np.log10(betas), np.zeros(nb_params), color='k') 170 | ax.set_xlabel(r'$\log_{10}(\beta)$') 171 | ax.set_ylabel(r'PD percentage of $\bm{K}$') 172 | plt.show() 173 | 174 | # Plot min eigenvalue and percentage of PD kernel in function of the kernel parameter (one graph) 175 | fig = plt.figure(figsize=(10, 5)) 176 | ax1 = plt.gca() 177 | ax2 = ax1.twinx() 178 | ax1.plot(np.log10(betas), min_eigval_mean, color='orchid', marker='o') 179 | ax1.fill_between(np.log10(betas), min_eigval_mean - min_eigval_std, min_eigval_mean + min_eigval_std, color='orchid', alpha=0.2) 180 | ax2.plot(np.log10(betas), percentage_pd_kernels*100, color='darkblue', marker='o') 181 | ax2.plot(np.log10(betas), np.zeros(nb_params), color='k') 182 | 183 | ax1.tick_params(labelsize=30) 184 | ax2.tick_params(labelsize=30) 185 | ax1.locator_params(axis='y', nbins=4) 186 | ax2.locator_params(axis='y', nbins=4) 187 | 188 | ax1.set_xlabel(r'$\log_{10}(\beta)$', fontsize=44) 189 | ax1.set_ylabel(r'$\lambda_{\min}(\bm{K})$', fontsize=44) 190 | ax2.set_ylabel(r'PD percentage of $\bm{K}$', fontsize=44) 191 | 192 | align_y_axis(ax1, 0, ax2, 0) 193 | 194 | filename = '../../../Figures/sphere' + str(dim-1) + '_kernel_params.png' 195 | plt.savefig(filename, bbox_inches='tight') 196 | -------------------------------------------------------------------------------- /BoManifolds/plot_utils/manifolds_plots.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy as sc 3 | 4 | from BoManifolds.Riemannian_utils.utils import rotation_matrix_from_axis_angle 5 | from BoManifolds.Riemannian_utils.sphere_utils import get_axisangle 6 | import BoManifolds.Riemannian_utils.sphere_utils as sphere 7 | 8 | ''' 9 | This file is part of the GaBOtorch library. 10 | Authors: Noemie Jaquier and Leonel Rozo, 2020 11 | License: MIT 12 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 13 | 14 | The functions of this file are based on the function of botorch (in botorch.optim). 15 | ''' 16 | 17 | 18 | def plot_sphere(ax, base=None, color=None, alpha=0.8, r=0.99, linewidth=0, lim=1.1, n_elems=100, **kwargs): 19 | """ 20 | Plots a sphere 21 | Based on the function of riepybdlib (https://gitlab.martijnzeestraten.nl/martijn/riepybdlib) 22 | 23 | Parameters 24 | ---------- 25 | :param ax: figure axes 26 | 27 | Optional parameters 28 | ------------------- 29 | :param color: color of the surface 30 | :param alpha: transparency index 31 | :param r: radius 32 | :param linewidth: linewidth of sphere lines 33 | :param lim: axes limits 34 | :param n_elems: number of points in the surface 35 | :param kwargs: 36 | 37 | Returns 38 | ------- 39 | :return: - 40 | """ 41 | if base is None: 42 | base = [0, 0, 1] 43 | else: 44 | if len(base) != 3: 45 | base = [0, 0, 1] 46 | print('Base was set to its default value as a wrong argument was given!') 47 | 48 | if color is None: 49 | color = [0.8, 0.8, 0.8] 50 | else: 51 | if len(color) != 3: 52 | color = [0.8, 0.8, 0.8] 53 | print('Sphere color was set to its default value as a wrong color argument was given!') 54 | 55 | u = np.linspace(0, 2 * np.pi, n_elems) 56 | v = np.linspace(0, np.pi, n_elems) 57 | 58 | x = r * np.outer(np.cos(u), np.sin(v)) 59 | y = r * np.outer(np.sin(u), np.sin(v)) 60 | z = r * np.outer(np.ones(np.size(u)), np.cos(v)) 61 | 62 | ax.plot_surface(x, y, z, rstride=4, cstride=4, color=color, linewidth=linewidth, alpha=alpha, **kwargs) 63 | # ax.plot(xs=[base[0]], ys=[base[1]], zs=[base[2]], marker='*', color=color) 64 | 65 | ax.set_xlim([-lim, lim]) 66 | ax.set_ylim([-lim, lim]) 67 | ax.set_zlim([-lim, lim]) 68 | 69 | 70 | def plot_sphere_tangent_plane(ax, base, l_vert=1, color='w', alpha=0.15, linewidth=0.5, **kwargs): 71 | """ 72 | Plots tangent plane of a point lying on the sphere manifold 73 | Based on the function of riepybdlib (https://gitlab.martijnzeestraten.nl/martijn/riepybdlib) 74 | 75 | Parameters 76 | ---------- 77 | :param ax: figure axes 78 | :param base: base point of the tangent space 79 | 80 | Optional parameters 81 | ------------------- 82 | :param l_vert: length/width of the displayed plane 83 | :param color: color of the plane 84 | :param alpha: transparency index 85 | :param linewidth: linewidth of the border of the plane 86 | :param kwargs: 87 | 88 | Returns 89 | ------- 90 | :return: - 91 | """ 92 | # Tangent axis at 0 rotation: 93 | T0 = np.array([[1, 0], [0, 1], [0, 0]]) 94 | 95 | # Rotation matrix with respect to zero: 96 | (axis, ang) = get_axisangle(base) 97 | R = rotation_matrix_from_axis_angle(axis, -ang) 98 | 99 | # Tangent axis in new plane: 100 | T = R.T.dot(T0) 101 | 102 | # Compute vertices of tangent plane at g 103 | hl = 0.5 * l_vert 104 | X = [[hl, hl], # p0 105 | [hl, -hl], # p1 106 | [-hl, hl], # p2 107 | [-hl, -hl]] # p3 108 | X = np.array(X).T 109 | points = (T.dot(X).T + base).T 110 | psurf = points.reshape((-1, 2, 2)) 111 | 112 | ax.plot_surface(psurf[0, :], psurf[1, :], psurf[2, :], color=color, alpha=alpha, linewidth=0, **kwargs) 113 | 114 | # Plot contours of the tangent space 115 | points_lines = points[:, [0, 1, 3, 2, 0]] 116 | ax.plot(points_lines[0], points_lines[1], points_lines[2], color='black', linewidth=linewidth) 117 | 118 | 119 | def plot_gaussian_on_sphere(ax, mu, sigma, color='red', linewidth=2, linealpha=1, **kwargs): 120 | """ 121 | Plots (mean and) covariance in the sphere manifold. 122 | Based on the function of riepybdlib (https://gitlab.martijnzeestraten.nl/martijn/riepybdlib) 123 | 124 | Parameters 125 | ---------- 126 | :param ax: figure axes 127 | :param mu: mean (point on the manifold) 128 | :param sigma: covariance belonging to the tangent space of the mean 129 | 130 | Optional parameters 131 | ------------------- 132 | :param color: color of the Gaussian 133 | :param linewidth: linewidth of the covariance 134 | :param linealpha: transparency index for the lines 135 | :param planealpha: transparency index for planes 136 | :param label: 137 | :param showtangent: 138 | :param kwargs: 139 | 140 | Returns 141 | ------- 142 | :return: - 143 | """ 144 | 145 | # Plot Gaussian 146 | # - Generate Points @ Identity: 147 | nbDrawingSeg = 35 148 | t = np.linspace(-np.pi, np.pi, nbDrawingSeg) 149 | R = np.real(sc.linalg.sqrtm(1.0 * sigma)) 150 | # Rotation for covariance 151 | # (axis, angle) = get_axisangle(mu) 152 | # R = R_from_axis_angle(axis, angle).dot(R) # Rotation for manifold location 153 | 154 | points = np.vstack((np.cos(t), np.sin(t), np.ones(nbDrawingSeg))) 155 | 156 | if np.ndim(mu) < 2: 157 | mu = mu[:, None] 158 | # points = R.dot(points) + mu 159 | points2 = R.dot(points) + mu 160 | points = sphere.expmap(R.dot(points), mu) 161 | 162 | # l, = ax.plot(xs=mu[0, None], ys=mu[1, None], zs=mu[2, None], marker='.', color=color, alpha=linealpha, 163 | # label=label, **kwargs) # Mean 164 | 165 | ax.plot(xs=points[0, :], ys=points[1, :], zs=points[2, :], 166 | color=color, 167 | linewidth=linewidth, 168 | markersize=2, alpha=linealpha, **kwargs) # Contour 169 | 170 | 171 | def plot_spd_cone(ax, r=1., color=[0.8, 0.8, 0.8], n_elems=50, linewidth=2., linewidth_axes=1., alpha=0.3, lim_fact=0.6, 172 | l1=47, l2=30): 173 | """ 174 | Plot the 2x2 SPD cone 175 | 176 | Parameters 177 | ---------- 178 | :param ax: figure acis 179 | :param r: radius of the cone 180 | :param color: color of the surface of the cone 181 | :param n_elems: number of elements used to plot the cone 182 | :param linewidth: linewidth of the borders of the cone 183 | :param linewidth_axes: linewidth of the axis of the symmetric space (plotted at the origin) 184 | :param alpha: transparency factor 185 | :param lim_fact: factor for the axis length 186 | :param l1: index of the first line plotted to represent the border of the cone 187 | :param l2: index of the second line plotted to represent the border of the cone 188 | 189 | Returns 190 | ------- 191 | :return: - 192 | """ 193 | 194 | phi = np.linspace(0, 2 * np.pi, n_elems) 195 | 196 | # Rotation of 45° of the cone 197 | dir = np.cross(np.array([1, 0, 0]), np.array([1., 1., 0.])) 198 | R = rotation_matrix_from_axis_angle(dir, np.pi / 4.) 199 | 200 | # Points of the cone 201 | xyz = np.vstack((r * np.ones(n_elems), r * np.sin(phi), r / np.sqrt(2) * np.cos(phi))) 202 | 203 | xyz = R.dot(xyz) 204 | 205 | x = np.vstack((np.zeros(n_elems), xyz[0])) 206 | y = np.vstack((np.zeros(n_elems), xyz[1])) 207 | z = np.vstack((np.zeros(n_elems), xyz[2])) 208 | 209 | # Draw cone 210 | ax.plot_surface(x, y, z, rstride=4, cstride=4, color=color, linewidth=linewidth, alpha=alpha) 211 | 212 | ax.plot(xyz[0], xyz[1], xyz[2], color='k', linewidth=linewidth) 213 | ax.plot(x[:, l1], y[:, l1], z[:, l1], color='k', linewidth=linewidth) 214 | ax.plot(x[:, l2], y[:, l2], z[:, l2], color='k', linewidth=linewidth) 215 | 216 | # Draw axis 217 | lim = lim_fact * r 218 | x_axis = np.array([[0, lim / 2], [0, 0], [0, 0]]) 219 | y_axis = np.array([[0, 0], [0, lim / 2], [0, 0]]) 220 | z_axis = np.array([[0, 0], [0, 0], [0, lim / 2]]) 221 | 222 | ax.plot(x_axis[0], x_axis[1], x_axis[2], color='k', linewidth=linewidth_axes) 223 | ax.plot(y_axis[0], y_axis[1], y_axis[2], color='k', linewidth=linewidth_axes) 224 | ax.plot(z_axis[0], z_axis[1], z_axis[2], color='k', linewidth=linewidth_axes) 225 | 226 | # Set limits 227 | ax.set_xlim([-lim/2, 3.*lim/2]) 228 | ax.set_ylim([-lim/2, 3*lim/2]) 229 | ax.set_zlim([-lim, lim]) 230 | 231 | 232 | -------------------------------------------------------------------------------- /BoManifolds/Riemannian_utils/spd_utils_torch.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | ''' 4 | This file is part of the GaBOtorch library. 5 | Authors: Noemie Jaquier and Leonel Rozo, 2020 6 | License: MIT 7 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 8 | 9 | The functions of this file are based on the function of botorch (in botorch.optim). 10 | ''' 11 | 12 | 13 | def logm_torch(x): 14 | """ 15 | This function computes the logarithm of a matrix. 16 | 17 | Parameters 18 | ---------- 19 | :param x: positive definite matrix 20 | 21 | Returns 22 | ------- 23 | :return: logm(x) 24 | """ 25 | eigendecomposition = torch.symeig(x, eigenvectors=True) 26 | 27 | eigenvectors = eigendecomposition.eigenvectors 28 | log_eigenvalues = torch.log(eigendecomposition.eigenvalues) # Assume real eigenvalues (first column only) 29 | 30 | return torch.mm(eigenvectors, torch.mm(torch.diag(log_eigenvalues), torch.inverse(eigenvectors))) 31 | 32 | 33 | def sqrtm_torch(x): 34 | """ 35 | This function computes the square root of a matrix. 36 | 37 | Parameters 38 | ---------- 39 | :param x: positive definite matrix 40 | 41 | Returns 42 | ------- 43 | :return: sqrtm(x) 44 | """ 45 | eigendecomposition = torch.symeig(x, eigenvectors=True) 46 | 47 | eigenvectors = eigendecomposition.eigenvectors 48 | sqrt_eigenvalues = torch.sqrt(eigendecomposition.eigenvalues) # Assume real eigenvalues (first column only) 49 | 50 | return torch.mm(eigenvectors, torch.mm(torch.diag(sqrt_eigenvalues), torch.inverse(eigenvectors))) 51 | 52 | 53 | def affine_invariant_distance_torch(x1, x2, diagonal_distance=False): 54 | """ 55 | Compute the affine invariant distance between points on the SPD manifold 56 | 57 | Parameters 58 | ---------- 59 | :param x1: set of SPD matrices (N1 x d x d or b1 x ... x bk x N1 x d x d) 60 | :param x2: set of SPD matrices (N2 x d x d or b1 x ... x bk x N1 x d x d) 61 | 62 | Optional parameters 63 | ------------------- 64 | :param diagonal_distance: Whole distance matrix, or just the diagonal? If True, we must have `x1 == x2`. 65 | 66 | Returns 67 | ------- 68 | :return: matrix of manifold affine-invariante distance between points in x1 and x2 69 | (N1 x N2 or b1 x ... x bk x N1 x N2) 70 | """ 71 | # If diag, x1 must be equal to x2 and we can return zeros. 72 | if diagonal_distance is True: 73 | shape = list(x2.shape)[:-2] 74 | shape.append(1) 75 | return torch.zeros(shape, dtype=x1.dtype) 76 | 77 | dim = x1.shape[-1] 78 | 79 | # Expand dimensions to compute all matrix-matrix distances 80 | x1 = x1.unsqueeze(-3) 81 | x2 = x2.unsqueeze(-4) 82 | 83 | # Method 1: compute x1^(-1) 84 | # x1_inv = torch.inverse(x1) # method 1: uses inv(x1)x2 85 | 86 | # Method 2: uses the Cholesky decomposition to compute x1^(-0.5) 87 | x1_chol = torch.cholesky(x1) 88 | x1_chol_inv = torch.inverse(x1_chol) 89 | 90 | # Repeat x and y data along 1- and 0- dimensions to have ndata_x x ndata_y x dim x dim arrays 91 | # x1_inv = torch.cat(x2.shape[-3] * [x1_inv], dim=-3) # method 1 92 | x1_chol_inv = torch.cat(x2.shape[-3] * [x1_chol_inv], dim=-3) 93 | x2 = torch.cat(x1.shape[-4] * [x2], dim=-4) 94 | 95 | # Compute the distance between each pair of matrices 96 | # Method 1: compute x1\x2 97 | # x1_inv_x2 = torch.bmm(x1_inv.view(-1, dim, dim), x2.view(-1, dim, dim)) # method 1 98 | # x1_inv_x2 = torch.solve(x1, x2).solution.view(-1, dim, dim) 99 | 100 | # Method 2: compute x1^(-0.5)*x2*x1^(-0.5) 101 | # The advantage of this method is that the resulting matrix is symmetric => we can use symeig for the eigenvalues. 102 | x1_inv_x2_x1_inv = torch.bmm(torch.bmm(x1_chol_inv.view(-1, dim, dim), x2.view(-1, dim, dim)), 103 | x1_chol_inv.view(-1, dim, dim).transpose(-2, -1)) 104 | 105 | # x1_inv_x2_x1_inv += 1e-10 * torch.rand(x1_inv_x2_x1_inv.shape) 106 | 107 | # Compute norm(logm(x1\x2)) 108 | eig_values = torch.zeros(x1_inv_x2_x1_inv.shape[0], dim) 109 | for i in range(x1_inv_x2_x1_inv.shape[0]): 110 | eig_values[i] = torch.symeig(x1_inv_x2_x1_inv[i], eigenvectors=True).eigenvalues # Eigenvalue True necessary 111 | # for derivation 112 | # Reshape 113 | shape = list(x2.shape)[:-2] 114 | shape.append(eig_values.shape[-1]) 115 | eigv = eig_values.view(shape) 116 | 117 | logeigv = torch.log(eigv) 118 | logeigv2 = logeigv * logeigv 119 | sumlogeigv2 = torch.sum(logeigv2, dim=-1) 120 | return torch.sqrt(sumlogeigv2 + 1e-15).double() 121 | # return torch.sqrt(torch.sum(logeigv * logeigv, dim=-1) + 1e-15).double() 122 | 123 | 124 | def frobenius_distance_torch(x1, x2, diagonal_distance=False): 125 | """ 126 | Compute the Frobenius distance between matrix points 127 | 128 | Parameters 129 | ---------- 130 | :param x1: set of matrices (N1 x d1 x d2 or b1 x ... x bk x N1 x d1 x d2) 131 | :param x2: set of matrices (N2 x d1 x d2 or b1 x ... x bk x N2 x d1 x d2) 132 | 133 | Optional parameters 134 | ------------------- 135 | :param diagonal_distance: Whole distance matrix, or just the diagonal? If True, we must have `x1 == x2`. 136 | 137 | Returns 138 | ------- 139 | :return: matrix of Frobenius distance between points in x1 and x2 (N1 x N2 or b1 x ... x bk x N1 x N2) 140 | """ 141 | # If diag, x1 must be equal to x2 and we can return zeros. 142 | if diagonal_distance is True: 143 | shape = list(x2.shape)[:-2] 144 | shape.append(1) 145 | return torch.zeros(shape, dtype=x1.dtype) 146 | 147 | # Expand dimensions to compute all matrix-matrix distances 148 | x1 = x1.unsqueeze(-3) 149 | x2 = x2.unsqueeze(-4) 150 | 151 | # Repeat x and y data along 1- and 0- dimensions to have ndata_x x ndata_y x dim x dim arrays 152 | x1 = torch.cat(x2.shape[-3] * [x1], dim=-3) 153 | x2 = torch.cat(x1.shape[-4] * [x2], dim=-4) 154 | 155 | # Compute the distance between each pair of matrices 156 | return torch.norm(torch.add(x1, -x2) + 1e-15, dim=[-2, -1]).double() 157 | 158 | 159 | def vector_to_symmetric_matrix_mandel_torch(vectors): 160 | """ 161 | This function transforms vectors to symmetric matrices using Mandel notation 162 | 163 | Parameters 164 | ---------- 165 | :param vectors: set of vectors N x d_vec or b1 x ... x bk x N x d_vec 166 | 167 | Returns 168 | ------- 169 | :return: set of symmetric matrices N x d_mat x d_mat b1 x ... x bk x N x d_mat x d_mat 170 | """ 171 | init_shape = list(vectors.shape) 172 | vectors = vectors.view(-1, init_shape[-1]) 173 | n_mat = vectors.shape[0] 174 | d_vec = vectors.shape[1] 175 | d_mat = int((-1.0 + (1.0 + 8.0 * d_vec) ** 0.5) / 2.0) 176 | 177 | matrices = torch.zeros(n_mat, d_mat, d_mat, dtype=vectors.dtype) 178 | 179 | for n in range(n_mat): 180 | vector = vectors[n] 181 | matrix = torch.diag(vector[0:d_mat]) 182 | 183 | id = np.cumsum(range(d_mat, 0, -1)) 184 | 185 | for i in range(0, d_mat - 1): 186 | matrix += torch.diag(vector[range(id[i], id[i + 1])], i + 1) / 2.0 ** 0.5 187 | matrix += torch.diag(vector[range(id[i], id[i + 1])], -i - 1) / 2.0 ** 0.5 188 | 189 | matrices[n] = matrix 190 | 191 | new_shape = init_shape[:-1] 192 | new_shape.append(d_mat) 193 | new_shape.append(d_mat) 194 | return matrices.view(new_shape) 195 | 196 | 197 | def symmetric_matrix_to_vector_mandel_torch(matrices): 198 | """ 199 | This function transforms symmetric matrices to vectors using Mandel notation 200 | 201 | Parameters 202 | ---------- 203 | :param matrices: set of symmetric matrices N x d_mat x d_mat b1 x ... x bk x N x d_mat x d_mat 204 | 205 | Returns 206 | ------- 207 | :return: set of vectors N x d_vec or b1 x ... x bk x N x d_vec 208 | """ 209 | init_shape = list(matrices.shape) 210 | d_mat = matrices.shape[-1] 211 | matrices = matrices.view(-1, d_mat, d_mat) 212 | n_mat = matrices.shape[0] 213 | 214 | vectors = [] 215 | for n in range(n_mat): 216 | vector = matrices[n].diag() 217 | for d in range(1, d_mat): 218 | # Consider both diagonals for gradient computation 219 | vector = torch.cat((vector, 0.5 * (2.0**0.5*matrices[n].diag(d) + 2.0**0.5*matrices[n].diag(-d)))) 220 | vectors.append(vector[None]) 221 | 222 | vectors = torch.cat(vectors) 223 | new_shape = init_shape[:-2] 224 | new_shape.append(vectors.shape[-1]) 225 | 226 | return vectors.view(new_shape).type(matrices.dtype) 227 | -------------------------------------------------------------------------------- /examples/kernels/spd/spd_gaussian_kernel_parameters.py: -------------------------------------------------------------------------------- 1 | import types 2 | import numpy as np 3 | import torch 4 | 5 | import pymanopt.manifolds as pyman_man 6 | 7 | import matplotlib.pyplot as plt 8 | from mpl_toolkits.mplot3d import Axes3D 9 | 10 | from BoManifolds.Riemannian_utils.spd_utils import symmetric_matrix_to_vector_mandel, \ 11 | vector_to_symmetric_matrix_mandel, spd_sample 12 | from BoManifolds.kernel_utils.kernels_spd import SpdAffineInvariantGaussianKernel 13 | 14 | from BoManifolds.plot_utils.manifolds_plots import plot_spd_cone 15 | 16 | plt.rcParams['text.usetex'] = True # use Latex font for plots 17 | plt.rcParams['text.latex.preamble'] = r'\usepackage{bm}' 18 | """ 19 | This example shows the experimental selection of parameters for the SPD Affine-Invariant kernel. To do so, a random 20 | sampling is carried out on the SPD manifold for minimum and maximum eigenvalues included in a given interval. 21 | After, the corresponding kernel matrix is computed for a range of values for $beta$, with $theta = 1$. This process is 22 | repeated several times (in this case, 10) for each value of $beta$. 23 | A minimum value of $beta$ is set to the lowest $beta$ value leading to all the kernel matrices to be positive-definite. 24 | 25 | This file is part of the GaBOtorch library. 26 | Authors: Noemie Jaquier and Leonel Rozo, 2020 27 | License: MIT 28 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 29 | """ 30 | 31 | 32 | # Align yaxis for double y axis plot 33 | def align_y_axis(axis1, v1, axis2, v2): 34 | _, y1 = axis1.transData.transform((0, v1)) 35 | _, y2 = axis2.transData.transform((0, v2)) 36 | inv = axis2.transData.inverted() 37 | _, dy = inv.transform((0, 0)) - inv.transform((0, y1 - y2)) 38 | miny, maxy = axis2.get_ylim() 39 | axis2.set_ylim(miny + dy, maxy + dy) 40 | 41 | 42 | if __name__ == "__main__": 43 | # Generate random data in the manifold 44 | dim = 3 # Dimension of the manifold 45 | nb_samples = 500 # Total number of samples 46 | nb_sources = 10 # Number of Gaussians to sample from 47 | nb_trials = 10 # Number of set of random points to test 48 | fact_cov = 1. # Do not put it too big, otherwise the projected data on the manifold can be really far 49 | 50 | # Minimum tolerated eigenvalue 51 | # We add a small tolerance to account for numerical imprecision during the computation of the kernel which are 52 | # then handled by gpytorch during GP computations 53 | min_tolerated_eigenvalue = -5e-7 54 | 55 | # Vector dimension 56 | dim_vec = int((dim*dim + dim) / 2) 57 | 58 | # Instantiate the manifold 59 | spd_manifold = pyman_man.PositiveDefinite(dim) 60 | 61 | # Update the random function of the manifold (the original one samples only eigenvalues between 1 and 2). 62 | # We need to specify the minimum and maximum eigenvalues of the random matrices. 63 | spd_manifold.rand = types.MethodType(spd_sample, spd_manifold) 64 | # Specify the domain 65 | min_eig = 0.001 66 | max_eig = 5. 67 | spd_manifold.min_eig = min_eig 68 | spd_manifold.max_eig = max_eig 69 | 70 | # Origin in the manifold 71 | origin_man = symmetric_matrix_to_vector_mandel(np.eye(dim)) 72 | 73 | # Define the range of parameter for the kernel 74 | nb_params = 30 75 | if dim == 2: 76 | betas = np.logspace(-1, 2, nb_params) 77 | elif dim == 3: 78 | betas = np.logspace(-1.1, 1, nb_params) 79 | elif dim >= 5: 80 | betas = np.logspace(-1.5, 0.8, nb_params) 81 | 82 | min_eigval_trials = [] 83 | 84 | for trial in range(nb_trials): 85 | print('Trial ', trial) 86 | 87 | # Sample data 88 | data_man = np.array([symmetric_matrix_to_vector_mandel(spd_manifold.rand()) for i in range(nb_samples)]) 89 | nb_data = data_man.shape[0] 90 | 91 | # Remove too-ill-conditionned matrices 92 | id_to_remove = [] 93 | for n in range(nb_data): 94 | if np.linalg.cond(vector_to_symmetric_matrix_mandel(data_man[n])) > 100: 95 | id_to_remove.append(n) 96 | data_man = np.delete(data_man, id_to_remove, axis=0) 97 | nb_data = data_man.shape[0] 98 | 99 | # Define and compute the kernel for the parameters 100 | K = [] 101 | min_eigval = [] 102 | 103 | for i in range(nb_params): 104 | # Create kernel instance and set beta 105 | k = SpdAffineInvariantGaussianKernel(beta_min=0.0) 106 | k.beta = betas[i] 107 | 108 | # Compute the kernel 109 | Ktmp = k.forward(torch.tensor(data_man), torch.tensor(data_man)).detach().numpy() 110 | K.append(Ktmp) 111 | 112 | # Compute the eigenvalues 113 | eigvals, _ = np.linalg.eig(Ktmp) 114 | eigvals = np.real(eigvals) 115 | min_eigval.append(np.min(eigvals)) 116 | 117 | # Minimum eigenvalue of the kernel 118 | min_eigval = np.array(min_eigval) 119 | min_eigval_trials.append(min_eigval) 120 | 121 | # Compute percentage of PD kernels 122 | pd_kernels = np.array(min_eigval_trials) 123 | pd_kernels[pd_kernels > min_tolerated_eigenvalue] = 1. 124 | pd_kernels[pd_kernels <= min_tolerated_eigenvalue] = 0. 125 | percentage_pd_kernels = np.sum(pd_kernels, axis=0) / nb_trials 126 | 127 | print(betas) 128 | print(percentage_pd_kernels) 129 | 130 | # Plot input data if dim is 2 131 | if dim == 2: 132 | # 3D figure 133 | fig = plt.figure(figsize=(5, 5)) 134 | ax = Axes3D(fig) 135 | 136 | # Make the panes transparent 137 | ax.xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) 138 | ax.yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) 139 | ax.zaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) 140 | 141 | # Make the grid lines transparent 142 | ax.xaxis._axinfo["grid"]['color'] = (1, 1, 1, 0) 143 | ax.yaxis._axinfo["grid"]['color'] = (1, 1, 1, 0) 144 | ax.zaxis._axinfo["grid"]['color'] = (1, 1, 1, 0) 145 | 146 | # Remove axis 147 | ax._axis3don = False 148 | 149 | # Initial view 150 | ax.view_init(elev=10, azim=-20.) # (default: elev=30, azim=-60) 151 | # ax.view_init(elev=10, azim=50.) # (default: elev=30, azim=-60) 152 | 153 | # Plot SPD cone 154 | plot_spd_cone(ax, r=10., lim_fact=0.8) 155 | 156 | # Plot training data on the manifold 157 | plt.plot(data_man[:, 0], data_man[:, 1], data_man[:, 2] / np.sqrt(2), color='k', marker='.', linewidth=0., 158 | markersize=3.) 159 | 160 | # Plot minimum eigenvalue in function of the kernel parameter 161 | min_eigval_trials = np.array(min_eigval_trials) 162 | min_eigval_mean = np.mean(min_eigval_trials, axis=0) 163 | min_eigval_std = np.std(min_eigval_trials, axis=0) 164 | 165 | fig = plt.figure(figsize=(5, 5)) 166 | ax = plt.gca() 167 | plt.fill_between(np.log10(betas), min_eigval_mean - min_eigval_std, min_eigval_mean + min_eigval_std, alpha=0.2, 168 | color='orchid') 169 | plt.plot(np.log10(betas), min_eigval_mean, marker='o', color='orchid') 170 | plt.plot(np.log10(betas), np.zeros(nb_params), color='k') 171 | ax.set_xlabel(r'$\log_{10}(\beta)$') 172 | ax.set_ylabel(r'$\lambda_{\min}(\bm{K})$') 173 | 174 | # Plot percentage of positive kernel in function of the kernel parameter 175 | fig = plt.figure(figsize=(5, 5)) 176 | ax = plt.gca() 177 | plt.plot(np.log10(betas), percentage_pd_kernels, marker='o', color='darkblue') 178 | plt.plot(np.log10(betas), np.zeros(nb_params), color='k') 179 | ax.set_xlabel(r'$\log_{10}(\beta)$') 180 | ax.set_ylabel(r'$PD percentage of $\bm{K}$') 181 | plt.show() 182 | 183 | # Plot min eigenvalue and percentage of PD kernel in function of the kernel parameter (one graph) 184 | fig = plt.figure(figsize=(10, 5)) 185 | ax1 = plt.gca() 186 | ax2 = ax1.twinx() 187 | ax1.plot(np.log10(betas), min_eigval_mean, color='orchid', marker='o') 188 | ax1.fill_between(np.log10(betas), min_eigval_mean - min_eigval_std, min_eigval_mean + min_eigval_std, 189 | color='orchid', alpha=0.2) 190 | ax2.plot(np.log10(betas), percentage_pd_kernels * 100, color='darkblue', marker='o') 191 | ax2.plot(np.log10(betas), np.zeros(nb_params), color='k') 192 | 193 | ax1.tick_params(labelsize=30) 194 | ax2.tick_params(labelsize=30) 195 | ax1.locator_params(axis='y', nbins=4) 196 | ax2.locator_params(axis='y', nbins=4) 197 | 198 | ax1.set_xlabel(r'$\log_{10}(\beta)$', fontsize=44) 199 | ax1.set_ylabel(r'$\lambda_{\min}(\bm{K})$', fontsize=44) 200 | ax2.set_ylabel(r'PD percentage of $\bm{K}$', fontsize=44) 201 | 202 | ax2.set_ylim(-10., 110) 203 | align_y_axis(ax1, 0, ax2, 0) 204 | 205 | filename = '../../../Figures/spd' + str(dim) + '_kernel_params.png' 206 | plt.savefig(filename, bbox_inches='tight') 207 | -------------------------------------------------------------------------------- /BoManifolds/nested_mappings/nested_spheres_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from BoManifolds.Riemannian_utils.sphere_utils_torch import sphere_distance_torch, rotation_from_sphere_points_torch 4 | 5 | ''' 6 | This file is part of the GaBOtorch library. 7 | Authors: Noemie Jaquier and Leonel Rozo, 2020 8 | License: MIT 9 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 10 | ''' 11 | 12 | 13 | def projection_from_sphere_to_nested_sphere(x, sphere_axis, sphere_distance_to_axis): 14 | """ 15 | This function computes the projection of data on a sphere Sd to a small circle in Sd (nested sphere). 16 | 17 | Parameters 18 | ---------- 19 | :param x: data on the sphere Sd [N x d] or [b1 x ... x bk x N x d] 20 | :param sphere_axis: axis of the nested sphere belonging to Sd 21 | :param sphere_distance_to_axis: distance from the axis w.r.t each point of the nested sphere 22 | 23 | Returns 24 | ------- 25 | :return: data projected on the nested sphere [N x d] or [b1 x ... x bk x N x d] 26 | """ 27 | dim = x.shape[-1] 28 | 29 | # If x has more than two dimensions, fuse the first dimensions so that x = N x d 30 | init_shape = list(x.shape) 31 | x = x.view(-1, init_shape[-1]) 32 | 33 | # Ensure that sphere axis is 2-dimensional 34 | if sphere_axis.dim() == 1: 35 | sphere_axis = sphere_axis.unsqueeze(-2) 36 | 37 | # Here, we first rotate the data, so that the axis is aligned with the north pole. For some reason, 38 | # this seems to limit the numerical errors, even more when the distance between the axis and the data is very small. 39 | # The data are finally rotated back once projected into the nested sphere. 40 | # Rotation matrix to rotate the axis to the north pole 41 | north_pole = torch.zeros_like(sphere_axis) 42 | north_pole[:, -1] = 1. 43 | rotation_matrix = rotation_from_sphere_points_torch(sphere_axis, north_pole) 44 | 45 | # Rotate data 46 | x_rotated = torch.mm(rotation_matrix, x.T).T 47 | 48 | # Compute the distance between the data and the subsphere axis 49 | distance_to_axis = sphere_distance_torch(x_rotated, north_pole) 50 | distance_to_axis = distance_to_axis.repeat((1, dim)) 51 | 52 | # Project data to the nested sphere 53 | x_nested_sphere_rotated = torch.sin(sphere_distance_to_axis) * x_rotated \ 54 | + torch.sin(distance_to_axis - sphere_distance_to_axis) * north_pole 55 | x_nested_sphere_rotated_rescaled = x_nested_sphere_rotated / (torch.sin(distance_to_axis) 56 | + 1e-6*torch.ones_like(distance_to_axis)) 57 | 58 | # Rotate the data back 59 | x_nested_sphere = torch.mm(rotation_matrix.T, x_nested_sphere_rotated_rescaled.T).T 60 | 61 | # Back to initial data structure 62 | new_shape = init_shape[:-1] 63 | new_shape.append(x_nested_sphere.shape[-1]) 64 | 65 | return x_nested_sphere.view(new_shape) 66 | 67 | 68 | def projection_from_sphere_to_next_subsphere(x, sphere_axis, sphere_distance_to_axis): 69 | """ 70 | This function computes the projection of data on a sphere Sd to a subsphere Sd-1. 71 | The data are first projected to a nested sphere, which is then identified with Sd-1. 72 | 73 | Parameters 74 | ---------- 75 | :param x: data on the sphere Sd [N x d] or [b1 x ... x bk x N x d] 76 | :param sphere_axis: axis of the nested sphere belonging to Sd 77 | :param sphere_distance_to_axis: distance from the axis w.r.t each point of the nested sphere 78 | 79 | Returns 80 | ------- 81 | :return: data projected on the subsphere [N x d-1] or [b1 x ... x bk x N x d] 82 | """ 83 | # If x has more than two dimensions, fuse the first dimensions so that x = N x d 84 | init_shape = list(x.shape) 85 | x = x.view(-1, init_shape[-1]) 86 | 87 | # Ensure that sphere axis is 2-dimensional 88 | if sphere_axis.dim() == 1: 89 | sphere_axis = sphere_axis.unsqueeze(-2) 90 | 91 | # Define the north pole 92 | north_pole = torch.zeros_like(sphere_axis) 93 | north_pole[:, -1] = 1. 94 | 95 | # Projection onto the nested sphere defined by the axis and distance 96 | x_nested_sphere = projection_from_sphere_to_nested_sphere(x, sphere_axis, sphere_distance_to_axis) 97 | 98 | # Rotation matrix to rotate the axis to the north pole 99 | rotation_matrix = rotation_from_sphere_points_torch(sphere_axis, north_pole) 100 | 101 | # Identification of the nested sphere with the subsphere of radius 1 102 | x_subsphere = torch.mm(rotation_matrix[:-1, :], x_nested_sphere.T).T / (torch.sin(sphere_distance_to_axis) + 103 | 1e-6 * 104 | torch.ones_like(sphere_distance_to_axis)) 105 | 106 | # For numerical reason, we ensure here that the norm is exactly 1. 107 | norm_x_subsphere = torch.norm(x_subsphere, dim=[-1]).unsqueeze(-1).repeat(1, x_subsphere.shape[-1]) 108 | x_subsphere = x_subsphere / (norm_x_subsphere + 1e-6 * torch.ones_like(norm_x_subsphere)) 109 | 110 | # Back to initial data structure 111 | new_shape = init_shape[:-1] 112 | new_shape.append(x_subsphere.shape[-1]) 113 | 114 | return x_subsphere.view(new_shape) 115 | 116 | 117 | def projection_from_sphere_to_subsphere(x, sphere_axes, sphere_distances_to_axes): 118 | """ 119 | This function computes the projection of data on a sphere Sd to a subsphere Sd-r. 120 | For each dimension, the data are first projected to a nested sphere Si, which is then identified with Si-1. 121 | 122 | Parameters 123 | ---------- 124 | :param x: data on the sphere Sd [N x d] 125 | :param sphere_axes: axes of the nested spheres belonging to [Sd, Sd-1, ..., Sd-r+1] 126 | :param sphere_distances_to_axes: distances from the axes w.r.t each point of the nested spheres of 127 | [Sd, Sd-1, ..., Sd-r+1] 128 | 129 | Returns 130 | ------- 131 | :return: data projected on the subspheres of dimension d-1 to d-r list([N x d-1], ..., [N x d-r]) 132 | """ 133 | x_subsphere = [x] 134 | 135 | # Transform parameters into lists 136 | if not isinstance(sphere_axes, list): 137 | sphere_axes = [sphere_axes] 138 | if not isinstance(sphere_distances_to_axes, list): 139 | sphere_distances_to_axes = [sphere_distances_to_axes] 140 | 141 | # Compute the subsphere for each dimension from the precedent subsphere 142 | for s in range(len(sphere_axes)): 143 | x_subsphere.append(projection_from_sphere_to_next_subsphere(x_subsphere[-1], sphere_axes[s], 144 | sphere_distances_to_axes[s])) 145 | 146 | return x_subsphere 147 | 148 | 149 | def projection_from_subsphere_to_next_sphere(x_subsphere, sphere_axis, sphere_distance_to_axis): 150 | """ 151 | This function computes the projection of data from a subsphere Sd-1 to a sphere Sd. 152 | The data are first identified on a nested sphere with the axis at the north pole, and then rotated according to the 153 | nested sphere axis. 154 | 155 | Parameters 156 | ---------- 157 | :param x_subsphere: data on the subsphere Sd-1 [N x d-1] 158 | :param sphere_axis: axis of the nested sphere belonging to Sd 159 | :param sphere_distance_to_axis: distance from the axis w.r.t each point of the nested sphere 160 | 161 | Returns 162 | ------- 163 | :return: data projected on the sphere of dimension d [N x d] 164 | """ 165 | if sphere_axis.dim() == 1: 166 | sphere_axis = sphere_axis.unsqueeze(-2) 167 | 168 | # Define the north pole 169 | north_pole = torch.zeros_like(sphere_axis) 170 | north_pole[:, -1] = 1. 171 | 172 | # Rotation matrix to rotate the north pole to the axis 173 | rotation_matrix = rotation_from_sphere_points_torch(north_pole, sphere_axis) 174 | 175 | # Projection of the data from the subsphere to the sphere 176 | cos_vector = torch.cos(sphere_distance_to_axis) * torch.ones(x_subsphere.shape[0], 1, dtype=x_subsphere.dtype) 177 | x = torch.mm(rotation_matrix, torch.cat((torch.sin(sphere_distance_to_axis) * x_subsphere, cos_vector), 1).T).T 178 | 179 | return x 180 | 181 | 182 | def projection_from_subsphere_to_sphere(x_subsphere, sphere_axes, sphere_distances_to_axes): 183 | """ 184 | This function computes the projection of data from a subsphere Sd-r to a sphere Sd. 185 | For each dimension, the data are first identified on a nested sphere with the axis at the north pole, and then 186 | rotated according to the nested sphere axis. 187 | 188 | Parameters 189 | ---------- 190 | :param x_subsphere: data on the subsphere Sd-1 [N x d-1] 191 | :param sphere_axes: axes of the nested spheres belonging to [Sd, Sd-1, ..., Sd-r+1] 192 | :param sphere_distances_to_axes: distances from the axes w.r.t each point of the nested spheres of 193 | [Sd, Sd-1, ..., Sd-r+1] 194 | 195 | Returns 196 | ------- 197 | :return: data projected on the spheres of dimension d-r+1 to d list([N x d-r+1], ..., [N x d]) 198 | """ 199 | x = [x_subsphere] 200 | 201 | # Transform parameters into lists 202 | if not isinstance(sphere_axes, list): 203 | sphere_axes = [sphere_axes] 204 | if not isinstance(sphere_distances_to_axes, list): 205 | sphere_distances_to_axes = [sphere_distances_to_axes] 206 | 207 | # Compute the sphere for each dimension from the precedent sphere 208 | nb_spheres = len(sphere_axes) 209 | for s in range(nb_spheres): 210 | x.append(projection_from_subsphere_to_next_sphere(x[-1], sphere_axes[nb_spheres-s-1], 211 | sphere_distances_to_axes[nb_spheres-s-1])) 212 | 213 | return x 214 | 215 | 216 | 217 | -------------------------------------------------------------------------------- /BoManifolds/manifold_optimization/manifold_gp_fit.py: -------------------------------------------------------------------------------- 1 | import time 2 | import types 3 | import warnings 4 | from typing import Any, Callable, Dict, List, NamedTuple, Optional, Set, Tuple, Union 5 | from operator import attrgetter 6 | from collections import OrderedDict 7 | 8 | import numpy as np 9 | import torch 10 | from gpytorch import settings as gpt_settings 11 | from gpytorch.mlls.marginal_log_likelihood import MarginalLogLikelihood 12 | from torch.nn import Module 13 | 14 | from botorch.optim.utils import ( 15 | _get_extra_mll_args, 16 | ) 17 | 18 | from pymanopt.manifolds import Euclidean, Product 19 | from pymanopt.solvers.solver import Solver 20 | import pymanopt.solvers as pyman_solvers 21 | 22 | from BoManifolds.manifold_optimization.numpy_list_converter import TorchAttr, module_to_list_of_array, \ 23 | set_params_with_list_of_array 24 | from BoManifolds.manifold_optimization.approximate_hessian import get_hessianfd 25 | 26 | from BoManifolds.pymanopt_addons.problem import Problem 27 | 28 | ParameterBounds = Dict[str, Tuple[Optional[float], Optional[float]]] 29 | TScipyObjective = Callable[ 30 | [np.ndarray, MarginalLogLikelihood, Dict[str, TorchAttr]], Tuple[float, np.ndarray] 31 | ] 32 | TModToArray = Callable[ 33 | [Module, Optional[ParameterBounds], Optional[Set[str]]], 34 | Tuple[np.ndarray, Dict[str, TorchAttr], Optional[np.ndarray]], 35 | ] 36 | TArrayToMod = Callable[[Module, np.ndarray, Dict[str, TorchAttr]], Module] 37 | 38 | ''' 39 | This file is part of the GaBOtorch library. 40 | Authors: Noemie Jaquier and Leonel Rozo, 2020 41 | License: MIT 42 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 43 | 44 | The functions of this file are based on the functions of botorch (in botorch.fit). 45 | ''' 46 | 47 | 48 | class OptimizationIteration(NamedTuple): 49 | itr: int 50 | fun: float 51 | time: float 52 | 53 | 54 | def fit_gpytorch_manifold( 55 | mll: MarginalLogLikelihood, 56 | bounds: Optional[ParameterBounds] = None, 57 | solver: Solver = pyman_solvers.ConjugateGradient(maxiter=500), 58 | nb_init_candidates: int = 200, 59 | last_x_as_candidate_prob: float = 0.9, 60 | options: Optional[Dict[str, Any]] = None, 61 | track_iterations: bool = True, 62 | approx_mll: bool = False, 63 | module_to_array_func: TModToArray = module_to_list_of_array, 64 | module_from_array_func: TArrayToMod = set_params_with_list_of_array, 65 | ) -> Tuple[MarginalLogLikelihood, Dict[str, Union[float, List[OptimizationIteration]]]]: 66 | """ 67 | This function fits a gpytorch model by maximizing MLL with a pymanopt optimizer. 68 | 69 | The model and likelihood in mll must already be in train mode. 70 | This method requires that the model has `train_inputs` and `train_targets`. 71 | 72 | Parameters 73 | ---------- 74 | :param mll: MarginalLogLikelihood to be maximized. 75 | 76 | Optional parameters 77 | ------------------- 78 | :param nb_init_candidates: number of random initial candidates for the GP parameters 79 | :param last_x_as_candidate_prob: probability that the last set of parameter is among the initial candidates 80 | :param bounds: A dictionary mapping parameter names to tuples of lower and upper bounds. 81 | :param solver: Pymanopt solver. 82 | :param options: Dictionary of solver options, passed along to scipy.minimize. 83 | :param track_iterations: Track the function values and wall time for each iteration. 84 | :param approx_mll: If True, use gpytorch's approximate MLL computation. This is disabled by default since the 85 | stochasticity is an issue for determistic optimizers). Enabling this is only recommended when working with 86 | large training data sets (n>2000). 87 | 88 | Returns 89 | ------- 90 | :return: 2-element tuple containing 91 | - MarginalLogLikelihood with parameters optimized in-place. 92 | - Dictionary with the following key/values: 93 | "fopt": Best mll value. 94 | "wall_time": Wall time of fitting. 95 | "iterations": List of OptimizationIteration objects with information on each iteration. 96 | If track_iterations is False, will be empty. 97 | 98 | Example: 99 | gp = SingleTaskGP(train_X, train_Y) 100 | mll = ExactMarginalLogLikelihood(gp.likelihood, gp) 101 | mll.train() 102 | fit_gpytorch_scipy(mll) 103 | mll.eval() 104 | """ 105 | options = options or {} 106 | # Current parameters 107 | x0, property_dict, bounds = module_to_array_func(module=mll, bounds=bounds, exclude=options.pop("exclude", None)) 108 | x0 = [x0i.astype(np.float64) for x0i in x0] 109 | if bounds is not None: 110 | warnings.warn('Bounds handling not supported yet in fit_gpytorch_manifold') 111 | # bounds = Bounds(lb=bounds[0], ub=bounds[1], keep_feasible=True) 112 | 113 | t1 = time.time() 114 | 115 | # Define cost function 116 | def cost(x): 117 | param_dict = OrderedDict(mll.named_parameters()) 118 | idx = 0 119 | for p_name, attrs in property_dict.items(): 120 | # Construct the new tensor 121 | if len(attrs.shape) == 0: # deal with scalar tensors 122 | # new_data = torch.tensor(x[0], dtype=attrs.dtype, device=attrs.device) 123 | new_data = torch.tensor(x[idx][0], dtype=attrs.dtype, device=attrs.device) 124 | else: 125 | # new_data = torch.tensor(x, dtype=attrs.dtype, device=attrs.device).view(*attrs.shape) 126 | new_data = torch.tensor(x[idx], dtype=attrs.dtype, device=attrs.device).view(*attrs.shape) 127 | param_dict[p_name].data = new_data 128 | idx += 1 129 | # mllx = set_params_with_array(mll, x, property_dict) 130 | train_inputs, train_targets = mll.model.train_inputs, mll.model.train_targets 131 | mll.zero_grad() 132 | output = mll.model(*train_inputs) 133 | args = [output, train_targets] + _get_extra_mll_args(mll) 134 | loss = -mll(*args).sum() 135 | return loss 136 | 137 | def egrad(x): 138 | loss = cost(x) 139 | loss.backward() 140 | param_dict = OrderedDict(mll.named_parameters()) 141 | grad = [] 142 | for p_name in property_dict: 143 | t = param_dict[p_name].grad 144 | if t is None: 145 | # this deals with parameters that do not affect the loss 146 | if len(property_dict[p_name].shape) > 1 and property_dict[p_name].shape[0] > 1: 147 | # if the variable is a matrix, keep its shape 148 | grad.append(np.zeros(property_dict[p_name].shape)) 149 | else: 150 | grad.append(np.zeros(property_dict[p_name].shape)) 151 | else: 152 | if t.ndim > 1 and t.shape[0] > 1: # if the variable is a matrix, keep its shape 153 | grad.append(t.detach().cpu().double().clone().numpy()) 154 | else: # Vector case 155 | grad.append(t.detach().view(-1).cpu().double().clone().numpy()) 156 | return grad 157 | 158 | # Define the manifold (product of manifolds) 159 | manifolds_list = [] 160 | for p_name, t in mll.named_parameters(): 161 | try: 162 | # If a manifold is given add it 163 | manifolds_list.append(attrgetter(p_name + "_manifold")(mll)) 164 | except AttributeError: 165 | # Otherwise, default: Euclidean 166 | manifolds_list.append(Euclidean(int(np.prod(property_dict[p_name].shape)))) 167 | # Product of manifolds 168 | manifold = Product(manifolds_list) 169 | 170 | # Instanciate the problem on the manifold 171 | if track_iterations: 172 | verbosity = 2 173 | else: 174 | verbosity = 0 175 | 176 | problem = Problem(manifold=manifold, cost=cost, egrad=egrad, verbosity=verbosity, arg=torch.Tensor()) #, precon=precon) 177 | 178 | # For cases where the Hessian is hard/long to compute, we approximate it with finite differences of the gradient. 179 | # Typical cases: the Hessian can be hard to compute due to the 2nd derivative of the eigenvalue decomposition, 180 | # e.g. in the SPD affine-invariant distance. 181 | problem._hess = types.MethodType(get_hessianfd, problem) 182 | 183 | # Choose initial parameters 184 | # Do not always consider x0, to encourage variations of the parameters. 185 | if np.random.rand() < last_x_as_candidate_prob: 186 | x0_candidates = [x0] 187 | x0_candidates += [manifold.rand() for i in range(nb_init_candidates - 1)] 188 | else: 189 | x0_candidates = [] 190 | x0_candidates += [manifold.rand() for i in range(nb_init_candidates)] 191 | for i in range(int(3*nb_init_candidates/4)): 192 | x0_candidates[i][0:4] = x0[0:4] #TODO remove hard-coding 193 | y0_candidates = [cost(x0_candidates[i]) for i in range(nb_init_candidates)] 194 | 195 | y_init, x_init_idx = torch.Tensor(y0_candidates).min(0) 196 | x_init = x0_candidates[x_init_idx] 197 | 198 | with gpt_settings.fast_computations(log_prob=approx_mll): 199 | # Logverbosity of the solver to 1 200 | solver._logverbosity = 1 201 | # Solve 202 | opt_x, opt_log = solver.solve(problem, x=x_init) 203 | 204 | # Construct info dict 205 | info_dict = { 206 | "fopt": float(cost(opt_x).detach().numpy()), 207 | "wall_time": time.time() - t1, 208 | "opt_log": opt_log, 209 | } 210 | # if not res.success: # TODO update 211 | # try: 212 | # # Some res.message are bytes 213 | # msg = res.message.decode("ascii") 214 | # except AttributeError: 215 | # # Others are str 216 | # msg = res.message 217 | # warnings.warn( 218 | # f"Fitting failed with the optimizer reporting '{msg}'", OptimizationWarning 219 | # ) 220 | # Set to optimum 221 | mll = module_from_array_func(mll, opt_x, property_dict) 222 | return mll, info_dict 223 | -------------------------------------------------------------------------------- /BoManifolds/kernel_utils/kernels_nested_spd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gpytorch 3 | from gpytorch.constraints import GreaterThan, Interval 4 | 5 | import pymanopt.manifolds as pyman_man 6 | 7 | from BoManifolds.Riemannian_utils.spd_utils_torch import vector_to_symmetric_matrix_mandel_torch, \ 8 | affine_invariant_distance_torch, frobenius_distance_torch, logm_torch 9 | from BoManifolds.nested_mappings.nested_spd_utils import projection_from_spd_to_nested_spd 10 | 11 | ''' 12 | This file is part of the GaBOtorch library. 13 | Authors: Noemie Jaquier and Leonel Rozo, 2020 14 | License: MIT 15 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 16 | ''' 17 | 18 | 19 | class NestedSpdAffineInvariantGaussianKernel(gpytorch.kernels.Kernel): 20 | """ 21 | Instances of this class represent a Gaussian (RBF) covariance matrix between projected input points from a 22 | high-dimensional SPD manifold to a low-dimensional SPD manifold via nested-spheres projections using the 23 | affine-invariant distance. 24 | 25 | Attributes 26 | ---------- 27 | self.beta_min: minimum value of the inverse square lengthscale parameter beta 28 | self.dim, dimension of the ambient high-dimensional sphere manifold 29 | self.latent_dim, dimension of the latent low-dimensional sphere manifold 30 | 31 | Properties 32 | ---------- 33 | self.beta, inverse square lengthscale parameter beta 34 | self.projection_matrix, projection matrix of the nested SPD projection 35 | 36 | 37 | Methods 38 | ------- 39 | forward(point1_in_SPD, point2_in_SPD, diagonal_matrix_flag=False, **params): 40 | 41 | Static methods 42 | -------------- 43 | """ 44 | def __init__(self, dim, latent_dim, beta_min, beta_prior=None, **kwargs): 45 | """ 46 | Initialisation. 47 | 48 | Parameters 49 | ---------- 50 | :param dim: dimension of the ambient high-dimensional sphere manifold 51 | :param latent_dim: dimension of the latent low-dimensional sphere manifold 52 | :param beta_min: minimum value of the inverse square lengthscale parameter beta 53 | :param beta_prior: prior on the parameter beta 54 | :param kwargs: additional arguments 55 | """ 56 | super(NestedSpdAffineInvariantGaussianKernel, self).__init__(has_lengthscale=False, **kwargs) 57 | self.beta_min = beta_min 58 | self.dim = dim 59 | self.latent_dim = latent_dim 60 | 61 | # Add beta parameter, corresponding to the inverse of the lengthscale parameter. 62 | beta_num_dims = 1 63 | self.register_parameter(name="raw_beta", parameter=torch.nn.Parameter(torch.zeros(*self.batch_shape, 1, 64 | beta_num_dims))) 65 | 66 | if beta_prior is not None: 67 | self.register_prior("beta_prior", beta_prior, lambda: self.beta, lambda v: self._set_beta(v)) 68 | 69 | # A GreaterThan constraint is defined on the lengthscale parameter to guarantee the positive-definiteness of the 70 | # kernel. 71 | # The value of beta_min can be determined e.g. experimentally. 72 | self.register_constraint("raw_beta", GreaterThan(self.beta_min)) 73 | 74 | # Add projection parameters 75 | self.raw_projection_matrix_manifold = pyman_man.Grassmann(self.dim, self.latent_dim) 76 | self.register_parameter(name="raw_projection_matrix", 77 | parameter=torch.nn.Parameter(torch.Tensor(self.raw_projection_matrix_manifold.rand()). 78 | repeat(*self.batch_shape, 1, 1))) 79 | 80 | @property 81 | def beta(self): 82 | return self.raw_beta_constraint.transform(self.raw_beta) 83 | 84 | @beta.setter 85 | def beta(self, value): 86 | self._set_beta(value) 87 | 88 | def _set_beta(self, value): 89 | if not torch.is_tensor(value): 90 | value = torch.as_tensor(value).to(self.raw_beta) 91 | self.initialize(raw_beta=self.raw_beta_constraint.inverse_transform(value)) 92 | 93 | @property 94 | def projection_matrix(self): 95 | return self.raw_projection_matrix 96 | 97 | @projection_matrix.setter 98 | def projection_matrix(self, value): 99 | self._set_projection_matrix(value) 100 | 101 | def _set_projection_matrix(self, value): 102 | self.initialize(raw_projection_matrix=value) 103 | 104 | def forward(self, x1, x2, diagonal_distance=False, **params): 105 | """ 106 | Compute the Gaussian kernel matrix between inputs x1 and x2 belonging to the ambient high-dim. SPD manifold. 107 | 108 | Parameters 109 | ---------- 110 | :param x1: input points on the SPD manifold 111 | :param x2: input points on the SPD manifold 112 | 113 | Optional parameters 114 | ------------------- 115 | :param diagonal_distance: Whole distance matrix, or just the diagonal? If True, we must have `x1 == x2`. 116 | :param params: additional parameters 117 | 118 | Returns 119 | ------- 120 | :return: kernel matrix between x1 and x2 121 | """ 122 | # Transform input vector to matrix 123 | x1 = vector_to_symmetric_matrix_mandel_torch(x1) 124 | x2 = vector_to_symmetric_matrix_mandel_torch(x2) 125 | 126 | # Projection from the SPD manifold to the latent low-dimensional SPD manifold 127 | px1 = projection_from_spd_to_nested_spd(x1, self.projection_matrix) 128 | px2 = projection_from_spd_to_nested_spd(x2, self.projection_matrix) 129 | 130 | # Compute distance 131 | distance = affine_invariant_distance_torch(px1, px2, diagonal_distance=diagonal_distance) 132 | distance2 = torch.mul(distance, distance) 133 | 134 | exp_component = torch.exp(- distance2.mul(self.beta.double())) 135 | 136 | return exp_component 137 | 138 | 139 | class NestedSpdLogEuclideanGaussianKernel(gpytorch.kernels.Kernel): 140 | """ 141 | Instances of this class represent a Gaussian (RBF) covariance matrix between projected input points from a 142 | high-dimensional SPD manifold to a low-dimensional SPD manifold via nested-spheres projections using the 143 | log-Euclidean distance. 144 | 145 | Attributes 146 | ---------- 147 | self.dim, dimension of the ambient high-dimensional sphere manifold 148 | self.latent_dim, dimension of the latent low-dimensional sphere manifold 149 | 150 | Properties 151 | ---------- 152 | self.projection_matrix, projection matrix of the nested SPD projection 153 | 154 | Methods 155 | ------- 156 | forward(point1_in_SPD, point2_in_SPD, diagonal_matrix_flag=False, **params): 157 | 158 | Static methods 159 | -------------- 160 | """ 161 | def __init__(self, dim, latent_dim, **kwargs): 162 | """ 163 | Initialisation. 164 | 165 | Optional parameters 166 | ------------------- 167 | :param kwargs: additional arguments 168 | """ 169 | self.has_lengthscale = True 170 | super(NestedSpdLogEuclideanGaussianKernel, self).__init__(ard_num_dims=None, **kwargs) 171 | self.dim = dim 172 | self.latent_dim = latent_dim 173 | 174 | # Add projection parameters 175 | self.raw_projection_matrix_manifold = pyman_man.Grassmann(self.dim, self.latent_dim) 176 | self.register_parameter(name="raw_projection_matrix", 177 | parameter=torch.nn.Parameter(torch.Tensor(self.raw_projection_matrix_manifold.rand()). 178 | repeat(*self.batch_shape, 1, 1))) 179 | 180 | @property 181 | def projection_matrix(self): 182 | return self.raw_projection_matrix 183 | 184 | @projection_matrix.setter 185 | def projection_matrix(self, value): 186 | self._set_projection_matrix(value) 187 | 188 | def _set_projection_matrix(self, value): 189 | self.initialize(raw_projection_matrix=value) 190 | 191 | def forward(self, x1, x2, diagonal_distance=False, **params): 192 | """ 193 | Compute the Gaussian kernel matrix between inputs x1 and x2 belonging to a SPD manifold. 194 | 195 | Parameters 196 | ------------------- 197 | :param x1: input points on the SPD manifold 198 | :param x2: input points on the SPD manifold 199 | 200 | Optional parameters 201 | ------------------- 202 | :param diag: Whole distance matrix, or just the diagonal? If True, we must have `x1 == x2` 203 | :param params: additional parameters 204 | 205 | Returns 206 | ------------------- 207 | :return: kernel matrix between x1 and x2 208 | """ 209 | # Transform input vector to matrix 210 | x1 = vector_to_symmetric_matrix_mandel_torch(x1) 211 | x2 = vector_to_symmetric_matrix_mandel_torch(x2) 212 | 213 | # Projection from the SPD manifold to the latent low-dimensional SPD manifold 214 | px1 = projection_from_spd_to_nested_spd(x1, self.projection_matrix) 215 | px2 = projection_from_spd_to_nested_spd(x2, self.projection_matrix) 216 | 217 | # Compute the log of the matrices 218 | # Reshape px1 to N x d x d format 219 | init_shape = list(px1.shape) 220 | px1 = px1.view(-1, self.latent_dim, self.latent_dim) 221 | nb_data = px1.shape[0] 222 | # Log 223 | log_px1 = torch.zeros_like(px1) 224 | for n in range(nb_data): 225 | log_px1[n] = logm_torch(px1[n]) 226 | # Reshape to initial format 227 | log_px1 = log_px1.view(init_shape) 228 | 229 | # Reshape px2 to N x d x d format 230 | init_shape = list(px2.shape) 231 | px2 = px2.view(-1, self.latent_dim, self.latent_dim) 232 | nb_data = px2.shape[0] 233 | # Log 234 | log_px2 = torch.zeros_like(px2) 235 | for n in range(nb_data): 236 | log_px2[n] = logm_torch(px2[n]) 237 | # Reshape to initial format 238 | log_px2 = log_px2.view(init_shape) 239 | 240 | # Compute distance 241 | distance = frobenius_distance_torch(log_px1, log_px2, diagonal_distance=diagonal_distance) 242 | distance2 = torch.mul(distance, distance) 243 | 244 | exp_component = torch.exp(- distance2.div(torch.mul(self.lengthscale.double(), self.lengthscale.double()))) 245 | 246 | return exp_component 247 | -------------------------------------------------------------------------------- /BoManifolds/Riemannian_utils/spd_utils.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import numpy as np 4 | import scipy.linalg as sc_la 5 | ''' 6 | This file is part of the GaBOtorch library. 7 | Authors: Noemie Jaquier and Leonel Rozo, 2020 8 | License: MIT 9 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 10 | 11 | The functions of this file are based on the function of botorch (in botorch.optim). 12 | ''' 13 | 14 | 15 | def tensor_matrix_product(T, U, mode): 16 | """ 17 | This function computes a Tensor-matrix product 18 | 19 | Parameters 20 | ---------- 21 | :param T: tensor 22 | :param U: matrix 23 | :param mode: mode of the product 24 | 25 | Returns 26 | ------- 27 | :return: tensor T x_mode U 28 | """ 29 | # Mode-n tensor-matrix product 30 | N = len(T.shape) 31 | 32 | # Compute the complement of the set of modes 33 | modec = range(0, N) 34 | modec.remove(mode) 35 | 36 | # Permutation of the tensor 37 | perm = [mode] + modec 38 | S = np.transpose(T, perm) 39 | sizeS = S.shape 40 | S = S.reshape((sizeS[0], -1), order='F') 41 | 42 | # n-mode product 43 | S = np.dot(U, S) 44 | sizeS = U.shape[0:1] + sizeS[1:] 45 | S = S.reshape(sizeS, order='F') 46 | 47 | # Inverse permutation 48 | inv_perm = [0]*N 49 | for i in range(0, N): 50 | inv_perm[perm[i]] = i 51 | 52 | S = np.transpose(S, inv_perm) 53 | 54 | return S 55 | 56 | 57 | def symmetric_matrix_to_vector_mandel(M): 58 | """ 59 | Transforms a symmetric matrix to vector using Mandel notation 60 | 61 | Parameters 62 | ---------- 63 | :param M: symmetric matrix 64 | 65 | Returns 66 | ------- 67 | :return: vector 68 | """ 69 | N = M.shape[0] 70 | 71 | v = np.copy(M.diagonal()) 72 | 73 | for i in range(1, N): 74 | v = np.concatenate((v, 2.0**0.5*M.diagonal(i))) 75 | 76 | return v 77 | 78 | 79 | def vector_to_symmetric_matrix_mandel(v): 80 | """ 81 | Transforms a vector to symmetric matrix using Mandel notation 82 | 83 | Parameters 84 | ---------- 85 | :param v: vector 86 | 87 | Returns 88 | ------- 89 | :return: symmetric matrix M 90 | """ 91 | n = v.shape[0] 92 | N = int((-1.0 + (1.0+8.0*n)**0.5)/2.0) 93 | 94 | M = np.copy(np.diag(v[0:N])) 95 | 96 | id = np.cumsum(range(N,0,-1)) 97 | 98 | for i in range(0, N-1): 99 | M += np.diag(v[range(id[i], id[i+1])], i+1) / 2.0**0.5 + np.diag(v[range(id[i], id[i+1])], -i-1) / 2.0**0.5 100 | 101 | return M 102 | 103 | 104 | def expmap(U, S): 105 | """ 106 | Computes exponential map 107 | 108 | Parameters 109 | ---------- 110 | :param U: symmetric matrix 111 | :param S: SPD matrix 112 | 113 | Returns 114 | ------- 115 | :return: SPD matrix computed as Expmap_S(U) 116 | """ 117 | D, V = np.linalg.eig(np.linalg.solve(S, U)) 118 | X = S.dot(V.dot(np.diag(np.exp(D))).dot(np.linalg.inv(V))) 119 | 120 | return X 121 | 122 | 123 | def logmap(X, S): 124 | """ 125 | Computes the logarithmic map 126 | 127 | Parameters 128 | ---------- 129 | :param X: SPD matrix 130 | :param S: SPD matrix 131 | 132 | Returns 133 | ------- 134 | :return: symmetric matrix computed as Logmap_S(X) 135 | """ 136 | D, V = np.linalg.eig(np.linalg.solve(S, X)) 137 | U = S.dot(V.dot(np.diag(np.log(D))).dot(np.linalg.inv(V))) 138 | 139 | return U 140 | 141 | 142 | def expmap_mandel_vector(u, s): 143 | """ 144 | Computes the exponential map using Mandel notation 145 | 146 | Parameters 147 | ---------- 148 | :param u: symmetrix matrix in Mandel notation form 149 | :param s: SPD matrix in Mandel notation form 150 | 151 | Returns 152 | ------- 153 | :return: SPD matrix computed as Expmap_S(U) in Mandel notation form 154 | """ 155 | U = vector_to_symmetric_matrix_mandel(u) 156 | S = vector_to_symmetric_matrix_mandel(s) 157 | 158 | return symmetric_matrix_to_vector_mandel(expmap(U, S)) 159 | 160 | 161 | def logmap_mandel_vector(x, s): 162 | """ 163 | Computes the logarithm map using Mandel notation 164 | 165 | Parameters 166 | ---------- 167 | :param x: SPD matrix in Mandel notation form 168 | :param s: SPD matrix in Mandel notation form 169 | 170 | Returns 171 | ------- 172 | :return: symmetric matrix computed as Logmap_S(X) in Mandel notation form 173 | """ 174 | X = vector_to_symmetric_matrix_mandel(x) 175 | S = vector_to_symmetric_matrix_mandel(s) 176 | 177 | return symmetric_matrix_to_vector_mandel(logmap(X, S)) 178 | 179 | 180 | def affine_invariant_distance(S1, S2): 181 | """ 182 | Computes the SPD affine invariant distance 183 | 184 | Parameters 185 | ---------- 186 | :param S1: SPD matrix 187 | :param S2: SPD matrix 188 | 189 | Returns 190 | ------- 191 | :return: affine invariant distance between S1 and S2 192 | """ 193 | # S1_pow = sc_la.fractional_matrix_power(S1, -0.5) 194 | # return np.linalg.norm(sc_la.logm(np.dot(np.dot(S1_pow, S2), S1_pow)), 'fro') 195 | 196 | eigv, _ = np.linalg.eig(np.dot(np.linalg.inv(S1), S2)) 197 | return np.sqrt(np.sum(np.log(eigv)*np.log(eigv))) 198 | 199 | 200 | def parallel_transport_operator(S1, S2): 201 | """ 202 | Computes the Parallel transport operation 203 | 204 | Parameters 205 | ---------- 206 | :param S1: SPD matrix 207 | :param S2: SPD matrix 208 | 209 | Returns 210 | ------- 211 | :return: parallel transport operator 212 | """ 213 | return sc_la.fractional_matrix_power(np.dot(S2, np.linalg.inv(S1)), 0.5) 214 | 215 | 216 | def parallel_transport_operator_mandel_vector(s1, s2): 217 | """ 218 | Computes the parallel transport operation for SPD matrices given in Mandel vector notation 219 | 220 | Parameters 221 | ---------- 222 | :param S1: SPD matrix 223 | :param S2: SPD matrix 224 | 225 | Returns 226 | ------- 227 | :return: parallel transport operator 228 | """ 229 | S1 = vector_to_symmetric_matrix_mandel(s1) 230 | S2 = vector_to_symmetric_matrix_mandel(s2) 231 | 232 | return parallel_transport_operator(S1, S2) 233 | 234 | 235 | def mean(data, nb_iter=10): 236 | """ 237 | This function computes the mean of points lying on the manifold (Fréchet/Karcher mean). 238 | 239 | Parameters 240 | ---------- 241 | :param data: data points lying on the manifold N x nb_dim x nb_dim 242 | :param nb_iter: number of iterations 243 | 244 | Returns 245 | ------- 246 | :return: mean of the datapoints 247 | """ 248 | nb_data = data.shape[0] 249 | 250 | # Initialize the mean as equal to the first datapoint 251 | m = data[0] 252 | for i in range(nb_iter): 253 | data_tgt = logmap(data[0], m) 254 | for n in range(1, nb_data): 255 | data_tgt += logmap(data[n], m) 256 | m_tgt = data_tgt / nb_data 257 | m = expmap(m_tgt, m) 258 | 259 | return m 260 | 261 | 262 | def mean_mandel_vector(data, nb_iter=10): 263 | """ 264 | This function computes the mean of points lying on the manifold (Fréchet/Karcher mean) for SPD matrices given in 265 | Mandel vector notation. 266 | 267 | Parameters 268 | ---------- 269 | :param data: data points lying on the manifold nb_dim_vec x N 270 | :param nb_iter: number of iterations 271 | 272 | Returns 273 | ------- 274 | :return: mean of the datapoints 275 | """ 276 | nb_data = data.shape[1] 277 | 278 | # Initialize the mean as equal to the first datapoint 279 | m = data[:, 0] 280 | for i in range(nb_iter): 281 | data_tgt = logmap_mandel_vector(data[:, 0], m) 282 | for n in range(1, nb_data): 283 | data_tgt += logmap_mandel_vector(data[:, n], m) 284 | m_tgt = data_tgt / nb_data 285 | m = expmap_mandel_vector(m_tgt, m) 286 | 287 | return m 288 | 289 | 290 | def spd_sample(self): 291 | """ 292 | This function computes a random SPD matrix sample. 293 | 294 | Returns 295 | ------- 296 | :return: mean of the datapoints 297 | """ 298 | # Generate eigenvalues between min_eig and max_eig 299 | d = self.min_eig * np.ones(1) + (self.max_eig - self.min_eig) * np.random.rand(self._n) 300 | 301 | # Generate an orthogonal matrix. Annoyingly qr decomp isn't 302 | # vectorized so need to use a for loop. Could be done using 303 | # svd but this is slower for bigger matrices. 304 | u, _ = np.linalg.qr(np.random.randn(self._n, self._n)) 305 | point_mat = np.dot(u, np.dot(np.diag(d), u.T)) 306 | return point_mat 307 | 308 | 309 | def in_domain(domain, x): 310 | """ 311 | This function checks if a symmetric matrix is in a domain defined by upper/lower bounds with Mandel notation 312 | 313 | Parameters 314 | ---------- 315 | :param domain: domain.upper and domain.lower contains the bounds of the domain 316 | :param x: symmetric matrix 317 | 318 | Returns 319 | ------- 320 | :return: True if the matrix is in the domain, False otherwise 321 | """ 322 | if symmetric_matrix_to_vector_mandel(x) in domain: 323 | return True 324 | else: 325 | return False 326 | 327 | 328 | def in_domain_eig(domain, x): 329 | """ 330 | This function checks if a symmetric matrix is in a domain defined by upper/lower eigenvalues 331 | 332 | Parameters 333 | ---------- 334 | :param domain: domain.upper and domain.lower contains the bounds of the domain 335 | :param x: symmetric matrix 336 | 337 | Returns 338 | ------- 339 | :return: True if the matrix is in the domain, False otherwise 340 | """ 341 | max_eig = domain.upper[0] 342 | min_eig = domain.lower[0] 343 | 344 | D = np.linalg.eigvals(x) 345 | 346 | if np.min(D) < min_eig: 347 | return False 348 | elif np.max(D) > max_eig: 349 | return False 350 | else: 351 | return True 352 | 353 | 354 | def project_to_eigenvalue_domain(domain, x): 355 | """ 356 | This function scales the eigenvalue of a matrix so that they are constrained in a specific domain 357 | 358 | Parameters 359 | ---------- 360 | :param domain: domain.upper[0] and domain.lower[0] contain the maximum and minimum eigenvalue, respectively. 361 | :param x: matrix to project in the domain 362 | 363 | Returns 364 | ------- 365 | :return: matrix with rescaled eigenvalues 366 | """ 367 | max_eig = domain.upper[0] 368 | min_eig = domain.lower[0] 369 | 370 | D, V = np.linalg.eig(x) 371 | D[D < min_eig] = min_eig 372 | D[D > max_eig] = max_eig 373 | return np.dot(V, np.dot(np.diag(D), V.T)) 374 | 375 | -------------------------------------------------------------------------------- /BoManifolds/nested_mappings/nested_spd_optimization.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gpytorch 3 | 4 | import pymanopt.manifolds as pyman_man 5 | 6 | from BoManifolds.pymanopt_addons.problem import Problem 7 | 8 | from BoManifolds.manifold_optimization.augmented_Lagrange_method import AugmentedLagrangeMethod 9 | 10 | from BoManifolds.Riemannian_utils.spd_utils_torch import affine_invariant_distance_torch, frobenius_distance_torch, \ 11 | logm_torch 12 | 13 | from BoManifolds.nested_mappings.nested_spd_utils import projection_from_nested_spd_to_spd 14 | 15 | ''' 16 | This file is part of the GaBOtorch library. 17 | Authors: Noemie Jaquier and Leonel Rozo, 2020 18 | License: MIT 19 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 20 | ''' 21 | 22 | 23 | def min_affine_invariant_distance_reconstruction_cost(x_data, x_data_projected, projection_matrix, 24 | projection_complement_matrix, 25 | bottom_spd_matrix, contraction_matrix): 26 | """ 27 | This function computes the squared error between a set of SPD data X and their reconstruction from the 28 | corresponding projections Y = W'XW with W \in G(D,d). 29 | 30 | Parameters 31 | ---------- 32 | :param x_data: set of high-dimensional SPD matrices (N x D x D) 33 | :param x_data_projected: set of low-dimensional SPD matrices (projected from x_data) (N x d x d) 34 | :param projection_matrix: element of the Grassmann manifold (D x d) 35 | :param projection_complement_matrix: element of the Grassmann manifold (D x D-d) 36 | Note that we must have torch.mm(projection_complement_matrix.T, projection_matrix) = 0. 37 | :param bottom_spd_matrix: bottom-right part of the rotated SPD matrix (D-d, D-d) 38 | :param contraction_matrix: matrix whose norm is <=1 (d x D-d) 39 | 40 | Returns 41 | ------- 42 | :return: sum of squared distances between reconstructed and original SPD data 43 | """ 44 | n_data = x_data_projected.shape[0] 45 | # Projection from low-dimensional to high-dimensional SPD data 46 | x_reconstructed = projection_from_nested_spd_to_spd(x_data_projected, projection_matrix, 47 | projection_complement_matrix, bottom_spd_matrix, 48 | contraction_matrix) 49 | 50 | # Compute distances between original and reconstructed data 51 | cost = torch.zeros(n_data) 52 | for n in range(n_data): 53 | cost[n] = affine_invariant_distance_torch(x_data[n].unsqueeze(0), x_reconstructed[n].unsqueeze(0)) 54 | 55 | # Sum of squared distances 56 | return torch.sum(cost * cost) 57 | 58 | 59 | def min_log_euclidean_distance_reconstruction_cost(x_data, x_data_projected, projection_matrix, 60 | projection_complement_matrix, bottom_spd_matrix, contraction_matrix): 61 | """ 62 | This function computes the squared error between a set of SPD data X and their reconstruction from the 63 | corresponding projections Y = W'XW with W \in G(D,d). 64 | 65 | Parameters 66 | ---------- 67 | :param x_data: set of high-dimensional SPD matrices (N x D x D) 68 | :param x_data_projected: set of low-dimensional SPD matrices (projected from x_data) (N x d x d) 69 | :param projection_matrix: element of the Grassmann manifold (D x d) 70 | :param projection_complement_matrix: element of the Grassmann manifold (D x D-d) 71 | Note that we must have torch.mm(projection_complement_matrix.T, projection_matrix) = 0. 72 | :param bottom_spd_matrix: bottom-right part of the rotated SPD matrix (D-d, D-d) 73 | :param contraction_matrix: matrix whose norm is <=1 (d x D-d) 74 | 75 | Returns 76 | ------- 77 | :return: sum of squared distances between reconstructed and original SPD data 78 | """ 79 | n_data = x_data_projected.shape[0] 80 | # Projection from low-dimensional to high-dimensional SPD data 81 | x_reconstructed = projection_from_nested_spd_to_spd(x_data_projected, projection_matrix, 82 | projection_complement_matrix, bottom_spd_matrix, 83 | contraction_matrix) 84 | 85 | # Compute distances between original and reconstructed data 86 | cost = torch.zeros(n_data) 87 | for n in range(n_data): 88 | cost[n] = frobenius_distance_torch(logm_torch(x_data[n]).unsqueeze(0), 89 | logm_torch(x_reconstructed[n]).unsqueeze(0)) 90 | 91 | # Sum of squared distances 92 | return torch.sum(cost * cost) 93 | 94 | 95 | def optimize_reconstruction_parameters_nested_spd(x_data, x_data_projected, projection_matrix, inner_solver, 96 | cost_function=min_affine_invariant_distance_reconstruction_cost, 97 | nb_init_candidates=100, maxiter=50): 98 | """ 99 | This function computes the parameters of the mapping "projection_from_nested_spd_to_spd" from nested SPD matrices 100 | Y = W'XW to SPD matrices Xrec, so that the distance between the original data X and the reconstructed data Xrec 101 | is minimized. 102 | To do so, we consider that the nested SPD matrix Y = W'XW is the d x d upper-left part of the rotated matrix 103 | Xr = R'XR, where R = [W, V] and Xr = [Y B; B' C]. 104 | In order to recover X, we assume a constant SPD matrix C, and B = Y^0.5*K*C^0.5 to ensure the PDness of Xr, with 105 | K a contraction matrix (norm(K) <=1). We first reconstruct Xr, and then Xrec as X = RXrR'. 106 | We are minimizing the squared distance between X and Xrec, by optimizing the complement to the projection matrix V, 107 | the bottom SPD matrix C, and the contraction matrix K. The contraction matrix K is described here as a norm-1 108 | matrix multiplied by a factor in [0,1] (unconstraintly optimized by transform it with a sigmoid function). 109 | The augmented Lagrange optimization method on Riemannian manifold is used to optimize the parameters on the product 110 | of manifolds G(D,D-d), SPD(D-d), S(d*(D-d)) and Eucl(1), while respecting the constraint W'V = 0. 111 | 112 | Parameters 113 | ---------- 114 | :param x_data: set of high-dimensional SPD matrices (N x D x D) 115 | :param x_data_projected: set of low-dimensional SPD matrices (projected from x_data) (N x d x d) 116 | :param projection_matrix: element of the Grassmann manifold (D x d) 117 | :param inner_solver: inner solver for the ALM on Riemnannian manifolds 118 | 119 | Optional parameters 120 | ------------------- 121 | :param nb_init_candidates: number of initial candidates for the optimization 122 | :param maxiter: maximum iteration of ALM solver 123 | 124 | Returns 125 | ------- 126 | :return: projection_complement_matrix: element of the Grassmann manifold (D x D-d) 127 | so that torch.mm(projection_complement_matrix.T, projection_matrix) = 0. 128 | :return: bottom_spd_matrix: bottom-right part of the rotated SPD matrix (D-d, D-d) 129 | :return: contraction_matrix: matrix whose norm is <=1 (d x D-d) 130 | """ 131 | # Dimensions 132 | dim = x_data.shape[1] 133 | latent_dim = projection_matrix.shape[1] 134 | 135 | # Product of manifolds for the optimization 136 | manifolds_list = [pyman_man.Grassmann(dim, dim - latent_dim), pyman_man.PositiveDefinite(dim - latent_dim), 137 | pyman_man.Sphere(latent_dim * (dim - latent_dim)), pyman_man.Euclidean(1)] 138 | product_manifold = pyman_man.Product(manifolds_list) 139 | 140 | # Constraint on the norm of the contraction matrix 141 | contraction_norm_constraint = gpytorch.constraints.Interval(0., 1.) 142 | 143 | # Constraint W'V = 0 144 | def constraint_fct(parameters): 145 | cost = torch.norm(torch.mm(parameters[0].T, projection_matrix)) 146 | zero_element_needed_for_correct_grad = 0. * torch.norm(parameters[1]) + 0. * torch.norm(parameters[2]) + \ 147 | 0. * torch.norm(parameters[3]) 148 | return cost + zero_element_needed_for_correct_grad 149 | 150 | # Reconstruction cost 151 | def reconstruction_cost(parameters): 152 | projection_complement_matrix = parameters[0] 153 | bottom_spd_matrix = parameters[1] 154 | contraction_norm = contraction_norm_constraint.transform(parameters[3]) 155 | contraction_matrix = contraction_norm * parameters[2].view(latent_dim, dim-latent_dim) 156 | 157 | return cost_function(x_data, x_data_projected, projection_matrix, projection_complement_matrix, 158 | bottom_spd_matrix, contraction_matrix) 159 | 160 | # Generate candidate for initial data 161 | x0_candidates = [product_manifold.rand() for i in range(nb_init_candidates)] 162 | x0_candidates_torch = [] 163 | for x0 in x0_candidates: 164 | x0_candidates_torch.append([torch.from_numpy(x) for x in x0]) 165 | y0_candidates = [reconstruction_cost(x0_candidates_torch[i]) for i in range(nb_init_candidates)] 166 | 167 | # Initialize with the best of the candidates 168 | y0, x_init_idx = torch.Tensor(y0_candidates).min(0) 169 | x0 = x0_candidates[x_init_idx] 170 | 171 | # Define the optimization problem 172 | reconstruction_problem = Problem(manifold=product_manifold, cost=reconstruction_cost, arg=torch.Tensor(), 173 | verbosity=0) 174 | # Define ALM solver 175 | solver = AugmentedLagrangeMethod(maxiter=maxiter, inner_solver=inner_solver, lambdas_fact=0.05) 176 | 177 | # Solve 178 | spd_parameters_np = solver.solve(reconstruction_problem, x=x0, eq_constraints=constraint_fct) 179 | 180 | # Parameters to torch data 181 | projection_complement_matrix = torch.from_numpy(spd_parameters_np[0]) 182 | bottom_spd_matrix = torch.from_numpy(spd_parameters_np[1]) 183 | contraction_norm = contraction_norm_constraint.transform(torch.from_numpy(spd_parameters_np[3])) 184 | contraction_matrix = contraction_norm * torch.from_numpy(spd_parameters_np[2]).view(latent_dim, dim-latent_dim) 185 | 186 | return projection_complement_matrix, bottom_spd_matrix, contraction_matrix 187 | -------------------------------------------------------------------------------- /BoManifolds/euclidean_optimization/euclidean_constrained_optimize.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from typing import Any, Callable, Dict, List, Optional, Tuple, Union 3 | 4 | from scipy.optimize import minimize, Bounds 5 | 6 | import torch 7 | from torch import Tensor 8 | from torch.nn import Module 9 | 10 | from botorch.acquisition import AcquisitionFunction 11 | from botorch.acquisition.analytic import AnalyticAcquisitionFunction 12 | from botorch.acquisition.utils import is_nonnegative 13 | from botorch.exceptions import BadInitialCandidatesWarning 14 | from botorch.gen import get_best_candidates 15 | from botorch.utils.sampling import draw_sobol_samples 16 | from botorch.optim.initializers import initialize_q_batch, initialize_q_batch_nonneg 17 | from botorch.optim.utils import columnwise_clamp, fix_features 18 | 19 | from botorch.optim.parameter_constraints import ( 20 | _arrayify, 21 | make_scipy_bounds, 22 | ) 23 | ''' 24 | This file is part of the GaBOtorch library. 25 | Authors: Noemie Jaquier and Leonel Rozo, 2020 26 | License: MIT 27 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 28 | 29 | The functions of this file are based on the function of botorch (in botorch.optim). 30 | ''' 31 | 32 | 33 | # This function is based on (and very similar to) the botorch.optim.joint_optimize function of botorch. 34 | def joint_optimize( 35 | acq_function: AcquisitionFunction, 36 | bounds: Tensor, 37 | q: int, 38 | num_restarts: int, 39 | raw_samples: int, 40 | options: Optional[Dict[str, Union[bool, float, int]]] = None, 41 | constraints = (), 42 | fixed_features: Optional[Dict[int, float]] = None, 43 | post_processing_init: Optional[Callable[[Tensor], Tensor]] = None, 44 | ) -> Tensor: 45 | """ 46 | This function generates a set of candidates via joint multi-start optimization 47 | 48 | Parameters 49 | ---------- 50 | :param acq_function: the acquisition function 51 | :param bounds: a `2 x d` tensor of lower and upper bounds for each column of `X` 52 | :param q: number of candidates 53 | :param num_restarts: number of starting points for multistart acquisition function optimization 54 | :param raw_samples: number of samples for initialization 55 | 56 | 57 | Optional parameters 58 | ------------------- 59 | :param options: options for candidate generation 60 | :param constraints: constraints in scipy format 61 | :param fixed_features: A map {feature_index: value} for features that should be fixed to a particular value 62 | during generation. 63 | :param post_processing_init: A function that post processes the generated initial samples 64 | (e.g. so that they fulfill some constraints). 65 | 66 | Returns 67 | ------- 68 | :return: a `q x d` tensor of generated candidates. 69 | """ 70 | 71 | options = options or {} 72 | batch_initial_conditions = \ 73 | gen_batch_initial_conditions(acq_function=acq_function, bounds=bounds, 74 | q=None if isinstance(acq_function, AnalyticAcquisitionFunction) else q, 75 | num_restarts=num_restarts, raw_samples=raw_samples, 76 | options=options, post_processing_init=post_processing_init) 77 | batch_limit = options.get("batch_limit", num_restarts) 78 | batch_candidates_list = [] 79 | batch_acq_values_list = [] 80 | start_idx = 0 81 | while start_idx < num_restarts: 82 | end_idx = min(start_idx + batch_limit, num_restarts) 83 | # optimize using random restart optimization 84 | batch_candidates_curr, batch_acq_values_curr = \ 85 | gen_candidates_scipy(initial_conditions=batch_initial_conditions[start_idx:end_idx], 86 | acquisition_function=acq_function, lower_bounds=bounds[0], upper_bounds=bounds[1], 87 | options={k: v for k, v in options.items() if k not in ("batch_limit", "nonnegative")}, 88 | constraints=constraints, fixed_features=fixed_features) 89 | batch_candidates_list.append(batch_candidates_curr) 90 | batch_acq_values_list.append(batch_acq_values_curr) 91 | start_idx += batch_limit 92 | 93 | batch_candidates = torch.cat(batch_candidates_list) 94 | batch_acq_values = torch.cat(batch_acq_values_list) 95 | return get_best_candidates(batch_candidates=batch_candidates, batch_values=batch_acq_values) 96 | 97 | 98 | # This function is based on (and very similar to) the botorch.gen.gen_candidates_scipy function of botorch. 99 | def gen_candidates_scipy( 100 | initial_conditions: Tensor, 101 | acquisition_function: Module, 102 | lower_bounds: Optional[Union[float, Tensor]] = None, 103 | upper_bounds: Optional[Union[float, Tensor]] = None, 104 | constraints=(), 105 | options: Optional[Dict[str, Any]] = None, 106 | fixed_features: Optional[Dict[int, Optional[float]]] = None, 107 | ) -> Tuple[Tensor, Tensor]: 108 | """ 109 | This function generates a set of candidates using `scipy.optimize.minimize` 110 | 111 | Parameters 112 | ---------- 113 | :param initial_conditions: starting points for optimization 114 | :param acquisition_function: acquisition function to be optimized 115 | 116 | Optional parameters 117 | ------------------- 118 | :param lower_bounds: minimum values for each column of initial_conditions 119 | :param upper_bounds: maximum values for each column of initial_conditions 120 | :param constraints: constraints in scipy format 121 | :param options: options for candidate generation 122 | :param fixed_features: A map {feature_index: value} for features that should be fixed to a particular value 123 | during generation. 124 | 125 | Returns 126 | ------- 127 | :return: 2-element tuple containing the set of generated candidates and the acquisition value for each t-batch. 128 | """ 129 | 130 | options = options or {} 131 | x0 = columnwise_clamp(initial_conditions, lower_bounds, upper_bounds).requires_grad_(True) 132 | 133 | bounds = Bounds(lb=lower_bounds, ub=upper_bounds, keep_feasible=True) 134 | 135 | def f(x): 136 | X = (torch.from_numpy(x).to(initial_conditions).contiguous().requires_grad_(True)) 137 | X_fix = fix_features(X=X, fixed_features=fixed_features) 138 | loss = -acquisition_function(X_fix[None]).sum() 139 | # compute gradient w.r.t. the inputs (does not accumulate in leaves) 140 | gradf = _arrayify(torch.autograd.grad(loss, X)[0].contiguous().view(-1)) 141 | fval = loss.item() 142 | return fval, gradf 143 | 144 | candidates = torch.zeros(x0.shape, dtype=torch.float64) 145 | # TODO this does not handle the case where q!=1 146 | for i in range(x0.shape[0]): 147 | res = minimize(f, x0[i, 0].detach().numpy(), method="SLSQP", jac=True, bounds=bounds, constraints=constraints, 148 | options={k: v for k, v in options.items() if k != "method"},) 149 | candidates[i] = fix_features(X=torch.from_numpy(res.x).to(initial_conditions).contiguous(), 150 | fixed_features=fixed_features,) 151 | 152 | batch_acquisition = acquisition_function(candidates) 153 | 154 | return candidates, batch_acquisition 155 | 156 | 157 | # This function is based on (and very similar to) the botorch.optim.gen_batch_initial_conditions function of botorch. 158 | def gen_batch_initial_conditions( 159 | acq_function: AcquisitionFunction, 160 | bounds: Tensor, 161 | q: int, 162 | num_restarts: int, 163 | raw_samples: int, 164 | options: Optional[Dict[str, Union[bool, float, int]]] = None, 165 | post_processing_init: Optional[Callable[[Tensor], Tensor]] = None, 166 | ) -> Tensor: 167 | """ 168 | This function generates a batch of initial conditions for random-restart optimization 169 | 170 | Parameters 171 | ---------- 172 | :param acq_function: the acquisition function to be optimized. 173 | :param bounds: a `2 x d` tensor of lower and upper bounds for each column of `X` 174 | :param q: number of candidates 175 | :param num_restarts: number of starting points for multistart acquisition function optimization 176 | :param raw_samples: number of samples for initialization 177 | 178 | Optional parameters 179 | ------------------- 180 | :param options: options for candidate generation 181 | :param post_processing_init: A function that post processes the generated initial samples 182 | (e.g. so that they fulfill some constraints). 183 | 184 | Returns 185 | ------- 186 | :return: a `num_restarts x q x d` tensor of initial conditions 187 | """ 188 | options = options or {} 189 | seed: Optional[int] = options.get("seed") # pyre-ignore 190 | batch_limit: Optional[int] = options.get("batch_limit") # pyre-ignore 191 | batch_initial_arms: Tensor 192 | factor, max_factor = 1, 5 193 | init_kwargs = {} 194 | if "eta" in options: 195 | init_kwargs["eta"] = options.get("eta") 196 | if options.get("nonnegative") or is_nonnegative(acq_function): 197 | init_func = initialize_q_batch_nonneg 198 | if "alpha" in options: 199 | init_kwargs["alpha"] = options.get("alpha") 200 | else: 201 | init_func = initialize_q_batch 202 | 203 | while factor < max_factor: 204 | with warnings.catch_warnings(record=True) as ws: 205 | X_rnd = draw_sobol_samples(bounds=bounds, n=raw_samples * factor, q=1 if q is None else q, seed=seed,) 206 | 207 | # Constraints the samples 208 | if post_processing_init is not None: 209 | X_rnd = post_processing_init(X_rnd) 210 | 211 | with torch.no_grad(): 212 | if batch_limit is None: 213 | batch_limit = X_rnd.shape[0] 214 | 215 | Y_rnd_list = [] 216 | start_idx = 0 217 | while start_idx < X_rnd.shape[0]: 218 | end_idx = min(start_idx + batch_limit, X_rnd.shape[0]) 219 | Y_rnd_curr = acq_function(X_rnd[start_idx:end_idx]) 220 | Y_rnd_list.append(Y_rnd_curr) 221 | start_idx += batch_limit 222 | Y_rnd = torch.cat(Y_rnd_list).to(X_rnd) 223 | 224 | batch_initial_conditions = init_func(X=X_rnd, Y=Y_rnd, n=num_restarts, **init_kwargs) 225 | 226 | if not any(issubclass(w.category, BadInitialCandidatesWarning) for w in ws): 227 | return batch_initial_conditions 228 | if factor < max_factor: 229 | factor += 1 230 | warnings.warn( 231 | "Unable to find non-zero acquisition function values - initial conditions " 232 | "are being selected randomly.", 233 | BadInitialCandidatesWarning, 234 | ) 235 | return batch_initial_conditions 236 | -------------------------------------------------------------------------------- /examples/kernels/sphere/sphere_kernels.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import gpytorch 4 | import botorch 5 | 6 | import matplotlib.pyplot as plt 7 | import matplotlib.pylab as pl 8 | from mpl_toolkits.mplot3d import Axes3D 9 | 10 | from BoManifolds.Riemannian_utils.sphere_utils import logmap 11 | from BoManifolds.kernel_utils.kernels_sphere import SphereGaussianKernel, SphereLaplaceKernel 12 | 13 | from BoManifolds.plot_utils.manifolds_plots import plot_sphere 14 | 15 | plt.rcParams['text.usetex'] = True # use Latex font for plots 16 | plt.rcParams['text.latex.preamble'] = r'\usepackage{bm}' 17 | """ 18 | This example shows the use of different kernels for the hypershere manifold S^n , used for Gaussian process regression. 19 | The tested function corresponds to a Gaussian distribution with a mean defined on the sphere and a covariance defined on 20 | the tangent space of the mean. Training data are generated "far" from the mean. The trained Gaussian process is then 21 | used to determine the value of the function from test data sampled around the mean of the test function. 22 | The kernels used are: 23 | - Manifold-RBF kernel (geometry-aware) 24 | - Laplace kernel (geometry-aware) 25 | - Euclidean kernel (classical geometry-unaware) 26 | 27 | This file is part of the GaBOtorch library. 28 | Authors: Noemie Jaquier and Leonel Rozo, 2020 29 | License: MIT 30 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 31 | """ 32 | 33 | 34 | def test_function(x, mu_test_function): 35 | x_proj = logmap(x, mu_test_function) 36 | 37 | sigma_test_fct = np.array([[0.6, 0.2, 0], [0.2, 0.3, -0.01], [0, -0.01, 0.2]]) 38 | inv_sigma_test_fct = np.linalg.inv(sigma_test_fct) 39 | det_sigma_test_fct = np.linalg.det(sigma_test_fct) 40 | 41 | return np.exp(- 0.5 * np.dot(x_proj.T, np.dot(inv_sigma_test_fct, x_proj))) / np.sqrt( 42 | (2 * np.pi) ** dim * det_sigma_test_fct) 43 | 44 | 45 | def plot_gaussian_process_prediction(figure_handle, mu, test_data, mean_est, mu_test_fct, title): 46 | ax = Axes3D(figure_handle) 47 | 48 | # Make the panes transparent 49 | ax.xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) 50 | ax.yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) 51 | ax.zaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) 52 | # Make the grid lines transparent 53 | ax.xaxis._axinfo["grid"]['color'] = (1, 1, 1, 0) 54 | ax.yaxis._axinfo["grid"]['color'] = (1, 1, 1, 0) 55 | ax.zaxis._axinfo["grid"]['color'] = (1, 1, 1, 0) 56 | # Remove axis 57 | ax._axis3don = False 58 | 59 | # Initial view 60 | # ax.view_init(elev=10, azim=-20.) # (default: elev=30, azim=-60) 61 | ax.view_init(elev=10, azim=30.) # (default: elev=30, azim=-60) 62 | # Plot sphere 63 | plot_sphere(ax, alpha=0.4) 64 | # Plot training data on the manifold 65 | plt_scale_fact = test_function(mu_test_fct, mu_test_fct)[0, 0] 66 | nb_data_test = test_data.shape[0] 67 | for n in range(nb_data_test): 68 | ax.scatter(test_data[n, 0], test_data[n, 1], test_data[n, 2], 69 | c=[pl.cm.inferno(mean_est[n] / plt_scale_fact)]) 70 | 71 | # Plot mean of Gaussian test function 72 | ax.scatter(mu[0], mu[1], mu[2], c='g', marker='D') 73 | plt.title(title, size=25) 74 | 75 | 76 | if __name__ == "__main__": 77 | np.random.seed(1234) 78 | 79 | # Define the test function 80 | mu_test_fct = np.array([1 / np.sqrt(2), 1 / np.sqrt(2), 0]) 81 | 82 | # Generate random data on the sphere 83 | nb_data = 20 84 | dim = 3 85 | 86 | mean = np.array([1, 0, 0]) 87 | mean = mean / np.linalg.norm(mean) 88 | fact_cov = 0.1 89 | cov = fact_cov * np.eye(dim) 90 | 91 | data = np.random.multivariate_normal(mean, cov, nb_data) 92 | x_man = data / np.linalg.norm(data, axis=1)[:, None] 93 | 94 | y_train = np.zeros((nb_data, 1)) 95 | for n in range(nb_data): 96 | y_train[n] = test_function(x_man[n], mu_test_fct) 97 | 98 | # Generate test data on the sphere 99 | nb_data_test = 10 100 | 101 | # mean_test = np.array([-2, 1, 0]) 102 | # mean_test = np.array([2, 1, 0]) 103 | mean_test = mu_test_fct 104 | mean_test = mean_test / np.linalg.norm(mean) 105 | fact_cov = 0.1 106 | cov_test = fact_cov * np.eye(dim) 107 | 108 | data = np.random.multivariate_normal(mean_test, cov_test, nb_data_test) 109 | x_man_test = data / np.linalg.norm(data, axis=1)[:, None] 110 | 111 | y_test = np.zeros((nb_data_test, 1)) 112 | for n in range(nb_data_test): 113 | y_test[n] = test_function(x_man_test[n], mu_test_fct) 114 | 115 | # Plot training data - 3D figure 116 | fig = plt.figure(figsize=(5, 5)) 117 | y_train_for_plot = y_train.reshape((len(y_train),)) 118 | plot_gaussian_process_prediction(fig, mu_test_fct, x_man, y_train_for_plot, mu_test_fct, r'Training data') 119 | 120 | # Plot true test data 121 | # 3D figure 122 | fig = plt.figure(figsize=(5, 5)) 123 | y_test_for_plot = y_test.reshape((len(y_test),)) 124 | plot_gaussian_process_prediction(fig, mu_test_fct, x_man_test, y_test_for_plot, mu_test_fct, r'Test data (ground truth)') 125 | 126 | # ### Gaussian kernel 127 | # Define the kernel 128 | k_gauss = gpytorch.kernels.ScaleKernel(SphereGaussianKernel(beta_min=6.5), 129 | outputscale_prior=gpytorch.priors.torch_priors.GammaPrior(2.0, 0.15)) 130 | # GPR model 131 | noise_prior = gpytorch.priors.torch_priors.GammaPrior(1.1, 0.05) 132 | noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate 133 | lik_gauss = gpytorch.likelihoods.gaussian_likelihood.GaussianLikelihood(noise_prior=noise_prior, 134 | noise_constraint=gpytorch.constraints.GreaterThan(1e-8), 135 | initial_value=noise_prior_mode) 136 | m_gauss = botorch.models.SingleTaskGP(torch.tensor(x_man), torch.tensor(y_train), 137 | covar_module=k_gauss, likelihood=lik_gauss) 138 | # Define the marginal log-likelihood 139 | mll_gauss = gpytorch.mlls.ExactMarginalLogLikelihood(m_gauss.likelihood, m_gauss) 140 | # Optimization of the model parameters 141 | botorch.fit_gpytorch_model(mll=mll_gauss) 142 | # Kernel computation 143 | K1 = k_gauss.forward(torch.tensor(x_man), torch.tensor(x_man)) 144 | K12 = k_gauss.forward(torch.tensor(x_man), torch.tensor(x_man_test)) 145 | K2 = k_gauss.forward(torch.tensor(x_man_test), torch.tensor(x_man_test)) 146 | # Prediction 147 | preds_gauss = m_gauss(torch.tensor(x_man_test)) 148 | mean_est_gauss = preds_gauss.mean.detach().numpy() 149 | var_est_gauss = preds_gauss.variance.detach().numpy() 150 | covar_est_gauss = preds_gauss.covariance_matrix.detach().numpy() 151 | # Compute posterior samples 152 | # posterior_samples = preds_gauss.sample(sample_shape=torch.Size(1000,)) 153 | error_gauss = np.sqrt(np.sum((y_test - mean_est_gauss) ** 2) / nb_data_test) 154 | print('Estimation error (Manifold-RBF kernel) = ', error_gauss) 155 | # Plot test data 156 | fig = plt.figure(figsize=(5, 5)) 157 | plot_gaussian_process_prediction(fig, mu_test_fct, x_man_test, mean_est_gauss, mu_test_fct, r'Manifold-RBF kernel') 158 | 159 | # ### Laplace kernel 160 | # Define the kernel 161 | k_laplace = gpytorch.kernels.ScaleKernel(SphereLaplaceKernel(), 162 | outputscale_prior=gpytorch.priors.torch_priors.GammaPrior(2.0, 0.15)) 163 | 164 | # GPR model 165 | noise_prior = gpytorch.priors.torch_priors.GammaPrior(1.1, 0.05) 166 | noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate 167 | lik_laplace = gpytorch.likelihoods.gaussian_likelihood.GaussianLikelihood(noise_prior=noise_prior, 168 | noise_constraint=gpytorch.constraints.GreaterThan(1e-8), 169 | initial_value=noise_prior_mode) 170 | m_laplace = botorch.models.SingleTaskGP(torch.tensor(x_man), torch.tensor(y_train), 171 | covar_module=k_laplace, likelihood=lik_laplace) 172 | # Define the marginal log-likelihood 173 | mll_laplace = gpytorch.mlls.ExactMarginalLogLikelihood(m_laplace.likelihood, m_laplace) 174 | # Optimization of the model parameters 175 | botorch.fit_gpytorch_model(mll=mll_laplace) 176 | # Kernel computation 177 | K1 = k_laplace.forward(torch.tensor(x_man), torch.tensor(x_man)) 178 | K12 = k_laplace.forward(torch.tensor(x_man), torch.tensor(x_man_test)) 179 | K2 = k_laplace.forward(torch.tensor(x_man_test), torch.tensor(x_man_test)) 180 | # Prediction 181 | preds_laplace = m_laplace(torch.tensor(x_man_test)) 182 | mean_est_laplace = preds_laplace.mean.detach().numpy() 183 | var_est_laplace = preds_laplace.variance.detach().numpy() 184 | covar_est_laplace = preds_laplace.covariance_matrix.detach().numpy() 185 | error_laplace = np.sqrt(np.sum((y_test - mean_est_laplace) ** 2) / nb_data_test) 186 | print('Estimation error (Laplace kernel) = ', error_laplace) 187 | # Plot test data 188 | fig = plt.figure(figsize=(5, 5)) 189 | plot_gaussian_process_prediction(fig, mu_test_fct, x_man_test, mean_est_laplace, mu_test_fct, r'Laplace kernel') 190 | 191 | # ### Euclidean RBF 192 | # Define the kernel 193 | k_eucl = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(ard_num_dims=None), 194 | outputscale_prior=gpytorch.priors.torch_priors.GammaPrior(2.0, 0.15)) 195 | # GPR model 196 | noise_prior = gpytorch.priors.torch_priors.GammaPrior(1.1, 0.05) 197 | noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate 198 | lik_eucl = gpytorch.likelihoods.gaussian_likelihood.GaussianLikelihood(noise_prior=noise_prior, 199 | noise_constraint=gpytorch.constraints.GreaterThan(1e-8), 200 | initial_value=noise_prior_mode) 201 | m_eucl = botorch.models.SingleTaskGP(torch.tensor(x_man), torch.tensor(y_train), 202 | covar_module=k_eucl, likelihood=lik_eucl) 203 | # Define the marginal log-likelihood 204 | mll_eucl = gpytorch.mlls.ExactMarginalLogLikelihood(m_eucl.likelihood, m_eucl) 205 | # Optimization of the model parameters 206 | botorch.fit_gpytorch_model(mll=mll_eucl) 207 | # Kernel computation 208 | K1 = k_eucl.forward(torch.tensor(x_man), torch.tensor(x_man)) 209 | K12 = k_eucl.forward(torch.tensor(x_man), torch.tensor(x_man_test)) 210 | K2 = k_eucl.forward(torch.tensor(x_man_test), torch.tensor(x_man_test)) 211 | # Prediction 212 | preds_eucl = m_eucl(torch.tensor(x_man_test)) 213 | mean_est_eucl = preds_eucl.mean.detach().numpy() 214 | var_est_eucl = preds_eucl.variance.detach().numpy() 215 | covar_est_eucl = preds_eucl.covariance_matrix.detach().numpy() 216 | error_eucl = np.sqrt(np.sum((y_test - mean_est_eucl) ** 2) / nb_data_test) 217 | print('Estimation error (Euclidean-RBF kernel) = ', error_eucl) 218 | # Plot test data 219 | fig = plt.figure(figsize=(5, 5)) 220 | plot_gaussian_process_prediction(fig, mu_test_fct, x_man_test, mean_est_eucl, mu_test_fct, r'Euclidean-RBF kernel') 221 | 222 | plt.show() 223 | 224 | -------------------------------------------------------------------------------- /examples/bo_sphere/benchmark_examples/bo_euclidean_sphere.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | import functools 4 | 5 | import torch 6 | import gpytorch 7 | import botorch 8 | 9 | import pymanopt.manifolds as pyman_man 10 | 11 | import matplotlib.pyplot as plt 12 | import matplotlib.pylab as pl 13 | from mpl_toolkits.mplot3d import Axes3D 14 | 15 | from BoManifolds.euclidean_optimization.euclidean_constrained_optimize import joint_optimize 16 | from BoManifolds.Riemannian_utils.sphere_constraint_utils import norm_one_constraint 17 | 18 | from BoManifolds.plot_utils.manifolds_plots import plot_sphere 19 | from BoManifolds.plot_utils.bo_plots import bo_plot_function_sphere, bo_plot_acquisition_sphere, bo_plot_gp_sphere, \ 20 | bo_plot_gp_sphere_planar 21 | 22 | from BoManifolds.BO_test_functions.test_functions_sphere import ackley_function_sphere, optimum_ackley_sphere 23 | 24 | plt.rcParams['text.usetex'] = True # use Latex font for plots 25 | plt.rcParams['text.latex.preamble'] = r'\usepackage{bm}' 26 | 27 | ''' 28 | This example shows the use of Euclidean Bayesian optimization on the sphere S2 to optimize the Ackley function. 29 | 30 | The test function, defined on the tangent space of the north pole, is projected on the sphere with the exponential 31 | map (i.e. the logarithm map is used to determine the function value). 32 | The Euclidean BO uses a Gaussian kernel for comparisons with GaBO. 33 | The acquisition function is optimized with a constrained optimization to obtain points lying on the sphere. 34 | The dimension of the manifold is set by the variable 'dim'. Note that the following element must be adapted when the 35 | dimension is modified: 36 | - if the dimension is not 3, 'display_figures' is set to 'False'. 37 | The number of BO iterations is set by the user by changing the variable 'nb_iter_bo'. 38 | The test function is the Ackley function on the sphere, but can be changed by the user. Other test functions are 39 | available in BoManifolds.BO_test_functions.test_functions_sphere. 40 | 41 | The current optimum value of the function is printed at each BO iteration and the optimal estimate of the optimizer 42 | (on the sphere) is printed at the end of the queries. 43 | The following graphs are produced by this example: 44 | - the convergence graph shows the distance between two consecutive iterations and the best function value found by the 45 | BO at each iteration. Note that the randomly generated initial data are not displayed, so that the iterations number 46 | starts at the number of initial data + 1. 47 | The following graphs are produced by this example if 'display_figures' is 'True': 48 | - the true function graph is displayed on S2; 49 | - the acquisition function at the end of the optimization is displayed on S2; 50 | - the GP mean at the end of the optimization is displayed on S2; 51 | - the GP mean and variances are displayed on 2D projections of S2; 52 | - the BO observations are displayed on S2. 53 | For all the graphs, the optimum parameter is displayed with a star, the current best estimation with a diamond and all 54 | the BO observation with dots. 55 | 56 | This file is part of the GaBOtorch library. 57 | Authors: Noemie Jaquier and Leonel Rozo, 2020 58 | License: MIT 59 | Contact: noemie.jaquier@kit.edu, leonel.rozo@de.bosch.com 60 | ''' 61 | 62 | if __name__ == "__main__": 63 | seed = 1234 64 | # Set numpy and pytorch seeds 65 | random.seed(seed) 66 | np.random.seed(seed) 67 | torch.manual_seed(seed) 68 | torch.backends.cudnn.deterministic = True 69 | torch.backends.cudnn.benchmark = False 70 | 71 | # Define the dimension 72 | dim = 3 73 | 74 | if dim == 3: 75 | disp_fig = True 76 | else: 77 | disp_fig = False 78 | 79 | # Instantiate the manifold (used for the test function) 80 | sphere_manifold = pyman_man.Sphere(dim) 81 | 82 | # Function to optimize 83 | test_function = functools.partial(ackley_function_sphere, sphere_manifold=sphere_manifold) 84 | # Optimum 85 | true_min, true_opt_val = optimum_ackley_sphere(sphere_manifold) 86 | 87 | # Plot test function with inputs on the sphere 88 | # 3D figure 89 | if disp_fig: 90 | fig = plt.figure(figsize=(5, 5)) 91 | ax = Axes3D(fig) 92 | 93 | max_colors = bo_plot_function_sphere(ax, test_function, true_opt_x=true_min, true_opt_y=true_opt_val, 94 | elev=10, azim=30, n_elems=300) 95 | ax.set_title('True function', fontsize=20) 96 | plt.show() 97 | else: 98 | max_colors = None 99 | 100 | # Generate random data on the sphere 101 | nb_data_init = 5 102 | x_data = torch.tensor(np.array([sphere_manifold.rand() for n in range(nb_data_init)])) 103 | y_data = torch.zeros(nb_data_init, dtype=torch.float64) 104 | for n in range(nb_data_init): 105 | y_data[n] = test_function(x_data[n]) 106 | 107 | # Define the kernel function 108 | k_fct = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(), 109 | outputscale_prior=gpytorch.priors.torch_priors.GammaPrior(2.0, 0.15)) 110 | 111 | # Define the GPR model 112 | # A constant mean function is already included in the model 113 | noise_prior = gpytorch.priors.torch_priors.GammaPrior(1.1, 0.05) 114 | noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate 115 | lik_fct = gpytorch.likelihoods.gaussian_likelihood.GaussianLikelihood(noise_prior=noise_prior, 116 | noise_constraint= 117 | gpytorch.constraints.GreaterThan(1e-8), 118 | initial_value=noise_prior_mode) 119 | model = botorch.models.SingleTaskGP(x_data, y_data[:, None], covar_module=k_fct, likelihood=lik_fct) 120 | 121 | # Define the marginal log-likelihood 122 | mll_fct = gpytorch.mlls.ExactMarginalLogLikelihood(model.likelihood, model) 123 | 124 | # Specify the optimization domain 125 | bounds = torch.stack([-torch.ones(dim, dtype=torch.float64), torch.ones(dim, dtype=torch.float64)]) 126 | 127 | # Initialize best observation and function value list 128 | new_best_f, index = y_data.min(0) 129 | best_x = [x_data[index]] 130 | best_f = [new_best_f] 131 | 132 | # Define constraints 133 | constraints = [{'type': 'eq', 'fun': norm_one_constraint}] 134 | 135 | # Define sampling post processing function 136 | def post_processing_init(x): 137 | return x / torch.cat(x.shape[-1] * [torch.norm(x, dim=[-1]).unsqueeze(-1)], dim=-1) 138 | 139 | # BO loop 140 | n_iters = 25 141 | for iteration in range(n_iters): 142 | # Fit GP model 143 | botorch.fit_gpytorch_model(mll=mll_fct) 144 | 145 | # Define the acquisition function 146 | acq_fct = botorch.acquisition.ExpectedImprovement(model=model, best_f=best_f[-1], maximize=False) 147 | 148 | # Get new candidate 149 | new_x = joint_optimize(acq_fct, bounds=bounds, q=1, num_restarts=5, raw_samples=100, constraints=constraints, 150 | post_processing_init=post_processing_init) 151 | 152 | # Get new observation 153 | new_y = test_function(new_x)[0] 154 | 155 | # Update training points 156 | x_data = torch.cat((x_data, new_x)) 157 | y_data = torch.cat((y_data, new_y)) 158 | 159 | # Update best observation 160 | new_best_f, index = y_data.min(0) 161 | best_x.append(x_data[index]) 162 | best_f.append(new_best_f) 163 | 164 | # Update the model 165 | model.set_train_data(x_data, y_data, strict=False) # strict False necessary to add datapoints 166 | 167 | print("Iteration " + str(iteration) + "\t Best f " + str(new_best_f.item())) 168 | 169 | # To numpy 170 | x_eval = x_data.numpy() 171 | y_eval = y_data.numpy()[:, None] 172 | 173 | if disp_fig: 174 | # Plot acquisition function 175 | fig = plt.figure(figsize=(5, 5)) 176 | ax = Axes3D(fig) 177 | bo_plot_acquisition_sphere(ax, acq_fct, xs=x_eval, opt_x=best_x[-1][None], true_opt_x=true_min, 178 | elev=10, azim=30, n_elems=100) 179 | ax.set_title('Acquisition function', fontsize=20) 180 | plt.show() 181 | 182 | # Plot GP 183 | fig = plt.figure(figsize=(5, 5)) 184 | ax = Axes3D(fig) 185 | bo_plot_gp_sphere(ax, model, xs=x_eval, opt_x=best_x[-1][None], true_opt_x=true_min, true_opt_y=true_opt_val, 186 | max_colors=max_colors, elev=10, azim=30, n_elems=100) 187 | ax.set_title('GP mean', fontsize=20) 188 | plt.show() 189 | 190 | # Plot GP projected on planes 191 | fig = plt.figure(figsize=(10, 5)) 192 | bo_plot_gp_sphere_planar(fig, model, var_fact=2., xs=x_eval, ys=y_eval, opt_x=best_x[-1][None], 193 | opt_y=best_f[-1], true_opt_x=true_min, true_opt_y=true_opt_val, max_colors=max_colors, 194 | n_elems=100) 195 | plt.title('GP mean and variance', fontsize=20) 196 | plt.show() 197 | 198 | # Plot convergence on the sphere 199 | # 3D figure 200 | fig = plt.figure(figsize=(5, 5)) 201 | ax = Axes3D(fig) 202 | 203 | # Make the panes transparent 204 | ax.xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) 205 | ax.yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) 206 | ax.zaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) 207 | 208 | # Make the grid lines transparent 209 | ax.xaxis._axinfo["grid"]['color'] = (1, 1, 1, 0) 210 | ax.yaxis._axinfo["grid"]['color'] = (1, 1, 1, 0) 211 | ax.zaxis._axinfo["grid"]['color'] = (1, 1, 1, 0) 212 | 213 | # Remove axis 214 | ax._axis3don = False 215 | 216 | # Initial view 217 | # ax.view_init(elev=10, azim=-20.) # (default: elev=30, azim=-60) 218 | ax.view_init(elev=10, azim=30.) # (default: elev=30, azim=-60) 219 | 220 | # Plot sphere 221 | plot_sphere(ax, alpha=0.4) 222 | 223 | # Plot evaluated points 224 | if max_colors is None: 225 | max_colors = np.max(y_eval - true_opt_val[0]) 226 | for n in range(x_eval.shape[0]): 227 | ax.scatter(x_eval[n, 0], x_eval[n, 1], x_eval[n, 2], 228 | c=pl.cm.inferno(1. - (y_eval[n] - true_opt_val[0]) / max_colors)) 229 | 230 | # Plot true minimum 231 | ax.scatter(true_min[0, 0], true_min[0, 1], true_min[0, 2], s=40, c='g', marker='P') 232 | 233 | # Plot BO minimum 234 | ax.scatter(best_x[-1][0], best_x[-1][1], best_x[-1][2], s=20, c='r', marker='D') 235 | ax.set_title('BO observations', fontsize=20) 236 | plt.show() 237 | 238 | # Compute distances between consecutive x's and best evaluation for each iteration 239 | neval = x_eval.shape[0] 240 | distances = np.zeros(neval-1) 241 | for n in range(neval-1): 242 | distances[n] = np.linalg.norm(x_eval[n + 1, :] - x_eval[n, :]) 243 | 244 | Y_best = np.ones(neval) 245 | for i in range(neval): 246 | Y_best[i] = y_eval[:(i + 1)].min() 247 | 248 | # Plot distances between consecutive x's 249 | plt.figure(figsize=(10, 5)) 250 | plt.subplot(1, 2, 1) 251 | plt.plot(np.array(range(neval - 1)), distances, '-ro') 252 | plt.xlabel('Iteration') 253 | plt.ylabel('d(x[n], x[n-1])') 254 | plt.title('Distance between consecutive observations') 255 | plt.grid(True) 256 | 257 | # Estimated m(x) at the proposed sampling points 258 | plt.subplot(1, 2, 2) 259 | plt.plot(np.array(range(neval)), Y_best, '-o') 260 | plt.title('Value of the best selected sample') 261 | plt.xlabel('Iteration') 262 | plt.ylabel('Best y') 263 | plt.grid(True) 264 | 265 | plt.show() 266 | --------------------------------------------------------------------------------