├── DATENSCHUTZHINWEIS ├── LICENSE ├── README.md ├── argmaxlogo.png ├── cpmp-exp-output └── .gitignore ├── docker.build ├── docker.run ├── docker ├── Dockerfile └── tf_prob_patch_0.10.1 ├── opt_pmp_utils ├── __init__.py ├── basis_functions.py ├── constraints.py ├── cpmp.py ├── plot_2d_normal.py ├── promp.py ├── unscented_transform.py └── utils.py ├── quant_experiment ├── 2d_env_vipmp_obsAv.py ├── 2d_env_vipmp_vWall.py ├── 2d_env_vipmp_viaP.py ├── analyse_obsAv.py ├── analyse_vWall.py ├── analyse_viaP.py └── trajectory_env.py ├── requirements.txt └── setup.py /DATENSCHUTZHINWEIS: -------------------------------------------------------------------------------- 1 | Transparenz und offene Kommunikation sind innerhalb und außerhalb der Volkswagen AG für uns selbstverständlich. Wichtige Informationen über die Volkswagen AG, die Möglichkeiten der Kontaktaufnahme sowie zum Datenschutz finden Sie hier. 2 | 3 | I. Informationen über die Volkswagen AG: siehe 4 | 5 | https://www.volkswagenag.com/de/meta/provider-identification.html 6 | 7 | II. Information über die Erhebung personenbezogener Daten 8 | 9 | (1) Wir stellen Ihnen auf der Plattform der Firma GitHub, Inc., 88 Colin P Kelly Jr Street, San Francisco, CA 94107, USA, ein Repository zur Verfügung, über das u.a. Softwarebibliotheken der VOLKSWAGEN AG dokumentiert und zum Download angeboten werden. Im Folgenden informieren wir über die Erhebung personenbezogener Daten bei Nutzung unseres Repository. 10 | 11 | (2) Personenbezogene Daten sind alle Daten, die auf Sie persönlich beziehbar sind, z.B. E-Mail-Adresse, Nutzerprofil u.ä.. Die Erhebung personenbezogener Daten erfolgt bei Nutzung unseres Repository allein durch die Firma GitHub. 12 | 13 | (3) Wir erhalten von der Firma GitHub lediglich anonymisierte Informationen, also Daten ohne Personenbezug. Diese Informationen betreffen die Aktivitäten der Nutzer von GitHub bzw. unseres Repository. So übermittelt uns GitHub beispielsweise Statistiken über die Nutzung unserer Open-Source-Projekte, etwa über die Anzahl der „Stars“ (Lesezeichen) in Bezug auf unser Repository. 14 | 15 | (4) Verantwortlicher für die Datenerhebung und damit Ansprechpartner für Datenschutzanfragen und Betroffenenrechte ist allein GitHub. Wir verweisen insoweit auf die GitHub Privacy Notice unter https://help.github.com/articles/github-privacy-statement/#how-we-share-the-information-we-collect 16 | 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | =============== 3 | 4 | Copyright (C) 2022 Volkswagen Aktiengesellschaft, 5 | Berliner Ring 2, 38440 Wolfsburg, Germany 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![argmax.ai](argmaxlogo.png) 2 | 3 | *This repository is published by the Volkswagen Group Machine Learning Research Lab.* 4 | 5 | *Learn more at https://argmax.ai.* 6 | 7 | 8 | # [Constrained Probabilistic Movement Primitives for Robot Trajectory Adaptation](https://arxiv.org/abs/2101.12561) 9 | 10 | This repository contains the source code of some of the experiments conducted for our paper "Constrained Probabilistic Movement Primitives for Robot Trajectory Adaptation", published in [TRO](https://ieeexplore.ieee.org/document/9655714) in 2021. 11 | The relevant experiments can be found in Section III.B of our paper and their results are shown in the Tables II, III and IV. 12 | 13 | [Video explaining the paper](https://youtu.be/7UI6QX-eZ3I) 14 | 15 | 16 | ## Folder Structure 17 | 18 | ```bash 19 | ├── docker 20 | │   ├── Dockerfile # Dockerfile for reducing the experiments 21 | │   └── tf_prob_patch_0.10.1 # patchfile for tf-probability 22 | ├── docker.build # convenience docker build script 23 | ├── docker.run # convenience docker run script 24 | ├── opt_pmp_utils # core implementation of the algorithm 25 | │   ├── __init__.py 26 | │   ├── basis_functions.py # ProMP basis functions 27 | │   ├── constraints.py # implementation of the presented constraints 28 | │   ├── cpmp.py # implementation of Constrained ProMPs and algorithm 1 29 | │   ├── plot_2d_normal.py # custom plot function 30 | │   ├── promp.py # ProMP implementation 31 | │   ├── unscented_transform.py # generic TF unscented transform 32 | │   └── utils.py # utilities 33 | ├── quant_experiment 34 | │   ├── 2d_env_vipmp_obsAv.py # obstacle avoidance experiment script 35 | │   ├── 2d_env_vipmp_viaP.py # via-point experiment script 36 | │   ├── 2d_env_vipmp_vWall.py # virtual wall experiment script 37 | │   ├── analyse_obsAv.py # obstacle avoidance analysis script 38 | │   ├── analyse_viaP.py # via-point analysis script 39 | │   ├── analyse_vWall.py # virtual wall analysis script 40 | │   └── trajectory_env.py # 2D environment used in all experiments 41 | ├── README.md # this file 42 | └── setup.py # Python setup file 43 | ``` 44 | 45 | 46 | ## Reproducing the Results of the Paper 47 | 48 | This repository contains 3 experiment scripts in the `quant_experiment` folder. 49 | These can be used to run an obstacle avoidance, a via-point and a virtual wall experiment. 50 | They produce Figures similar to the Figures 6, 7 and 8 of our paper and they also store all data necessary to produce tables similar to the Tables II, III and IV. 51 | We provide a docker environment and step-by-step instructions on how to run the respective experiments. 52 | 53 | ### Setup docker 54 | 55 | Install `docker>=20.10.10` according to their [published manuals](https://docs.docker.com/get-docker/). 56 | It helps if you have basic familiarity with docker, otherwise you can go through their [getting started manuals](https://docs.docker.com/get-started/). 57 | 58 | ### Build and Run the Docker Container 59 | 60 | There are two convenience scripts, `docker.build` and `docker.run`, at the top level of the repository that you can execute to build and run the docker container, respectively. 61 | 62 | ```bash 63 | cd 64 | ./docker.build 65 | ./docker.run 66 | ``` 67 | 68 | The `docker.run` command accepts an argument determining the output path for experiments which are run from the docker container. 69 | This path has to be absolute. 70 | If not specified, the output folder defaults to the `cpmp-exp-output` at the root of the repository. 71 | 72 | 73 | ### Run the Experiments 74 | 75 | Inside the docker container, simply execute an experiment by running the corresponding script in the `quant_experiment` folder: 76 | 77 | - Obstacle avoidance experiment: `2d_env_vipmp_obsAv.py` 78 | - Via-point experiment: `2d_env_vipmp_viaP.py` 79 | - Virtual wall experiment: `2d_env_vipmp_vWall.py` 80 | 81 | These scripts have a range of parameters that you can adjust, such as the number of experiments, the number of obstacles, scale of the smoothness penalty or the number of iterations you allow. 82 | Note that you have to rebuild the docker container (by running `./docker.build`) after changing any of the parameters or the code in general for the changes to take effect. 83 | 84 | 85 | ### Evaluate the Experiments 86 | 87 | If you want to look at the generated figures after running the experiments you can check out the experiment output folder you've specified in the `docker.run` command (or the `cpmp-exp-output` folder by default). 88 | Every type of experiment (obstacle avoidance, via-points and virtual wall) will be in a separate folder and every individual experiment will be in a folder with the timestamp as its name. 89 | The experiment output folder contains some images as well as checkpoints and a pickle file with the final state of the learned ProMP. 90 | The final state images of every experiment are called `vipmp_final.png` 91 | 92 | For producing tables similar to the ones presented in the paper, run the corresponding analysis script inside the docker container after running the experiment: 93 | 94 | - Obstacle avoidance experiment: `analyse_obsAv.py` 95 | - Via-point experiment: `analyse_viaP.py` 96 | - Virtual wall experiment: `analyse_vWall.py` 97 | 98 | 99 | ### Run & Evaluate All Experiments 100 | 101 | ```bash 102 | cd 103 | ./docker.build 104 | ./docker.run 105 | # Now inside a shell that opens after executing docker.run 106 | python 2d_env_vipmp_obsAv.py && python 2d_env_vipmp_viaP.py && python 2d_env_vipmp_vWall.py && python analyse_obsAv.py && python analyse_viaP.py && python analyse_vWall.py 107 | ``` 108 | 109 | 110 | ## Related Publications 111 | 112 | If you find the code useful for your research, please consider citing our work. 113 | 114 | ```BibTeX 115 | @article{frank2021constrained, 116 | title={Constrained Probabilistic Movement Primitives for Robot Trajectory Adaptation}, 117 | author={Frank, Felix and Paraschos, Alexandros and van der Smagt, Patrick and Cseke, Botond}, 118 | journal={IEEE Transactions on Robotics}, 119 | year={2021}, 120 | publisher={IEEE} 121 | } 122 | ``` 123 | 124 | 125 | ## Disclaimer 126 | 127 | The purpose of this source code is limited to bare demonstration of the experimental section of the related papers. 128 | -------------------------------------------------------------------------------- /argmaxlogo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argmax-ai/constrained-promps/410a9158270838fe577c4d2f8864ffb446c7a780/argmaxlogo.png -------------------------------------------------------------------------------- /cpmp-exp-output/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /docker.build: -------------------------------------------------------------------------------- 1 | docker build . -t constrained-promps -f docker/Dockerfile -------------------------------------------------------------------------------- /docker.run: -------------------------------------------------------------------------------- 1 | docker run \ 2 | -u $(id -u ${USER}):$(id -g ${USER}) \ 3 | --mount type=bind,source=${1:-`pwd`/cpmp-exp-output},target=/src/cpmp/quant_experiment/output \ 4 | -w /src/cpmp/quant_experiment \ 5 | -it constrained-promps:latest /bin/zsh 6 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:2.2.0 2 | RUN apt update && apt upgrade -y && DEBIAN_FRONTEND=noninteractive TZ=Europe/Berlin apt install -y --no-install-recommends git curl zsh vim less python3-tk feh 3 | RUN pip install matplotlib python-box tqdm scipy tensorflow_probability==0.10.1 4 | ADD docker/tf_prob_patch_0.10.1 /src/tf_prob_patch_0.10.1 5 | RUN patch -p2 -d /usr/local/lib/python3.6/dist-packages/tensorflow_probability < /src/tf_prob_patch_0.10.1 6 | RUN mkdir -p /root/.config/matplotlib && echo "backend: TKAgg" > /root/.config/matplotlib/matplotlibrc 7 | RUN mkdir /src/cpmp 8 | ADD setup.py /src/cpmp 9 | ADD requirements.txt /src/cpmp 10 | ADD opt_pmp_utils /src/cpmp/opt_pmp_utils 11 | RUN pip install -e /src/cpmp 12 | ADD quant_experiment /src/cpmp/quant_experiment -------------------------------------------------------------------------------- /docker/tf_prob_patch_0.10.1: -------------------------------------------------------------------------------- 1 | diff --git a/tensorflow_probability/python/optimizer/lbfgs.py b/tensorflow_probability/python/optimizer/lbfgs.py 2 | index 77a17bae3..719e26eaa 100644 3 | --- a/tensorflow_probability/python/optimizer/lbfgs.py 4 | +++ b/tensorflow_probability/python/optimizer/lbfgs.py 5 | @@ -36,6 +36,7 @@ from tensorflow_probability.python.internal import distribution_util 6 | from tensorflow_probability.python.internal import prefer_static 7 | from tensorflow_probability.python.optimizer import bfgs_utils 8 | 9 | +from tqdm import tqdm 10 | 11 | LBfgsOptimizerResults = collections.namedtuple( 12 | 'LBfgsOptimizerResults', [ 13 | @@ -87,6 +88,7 @@ def minimize(value_and_gradients_function, 14 | max_iterations=50, 15 | parallel_iterations=1, 16 | stopping_condition=None, 17 | + one_step_callback=None, 18 | name=None): 19 | """Applies the L-BFGS algorithm to minimize a differentiable function. 20 | 21 | @@ -217,6 +219,8 @@ def minimize(value_and_gradients_function, 22 | x_tolerance, dtype=dtype, name='x_tolerance') 23 | max_iterations = tf.convert_to_tensor(max_iterations, name='max_iterations') 24 | 25 | + if not (one_step_callback is None): 26 | + pbar = tqdm(total=max_iterations.numpy(), desc="L-BFGS") 27 | # The `state` here is a `LBfgsOptimizerResults` tuple with values for the 28 | # current state of the algorithm computation. 29 | def _cond(state): 30 | @@ -247,17 +251,23 @@ def minimize(value_and_gradients_function, 31 | gradient_deltas=_queue_push( 32 | current_state.gradient_deltas, should_update, 33 | next_state.objective_gradient - current_state.objective_gradient)) 34 | + if not (one_step_callback is None): 35 | + state_after_inv_hessian_update = one_step_callback(state_after_inv_hessian_update, pbar) 36 | + pbar.update(n=1) # may trigger a refresh 37 | return [state_after_inv_hessian_update] 38 | 39 | initial_state = _get_initial_state(value_and_gradients_function, 40 | initial_position, 41 | num_correction_pairs, 42 | tolerance) 43 | - return tf.while_loop( 44 | + res = tf.while_loop( 45 | cond=_cond, 46 | body=_body, 47 | loop_vars=[initial_state], 48 | parallel_iterations=parallel_iterations)[0] 49 | + if not (one_step_callback is None): 50 | + pbar.close() 51 | + return res 52 | 53 | 54 | def _get_initial_state(value_and_gradients_function, 55 | -------------------------------------------------------------------------------- /opt_pmp_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .constraints import ( 2 | Border1DConstraint, 3 | ConvexConstraint, 4 | KLPenalty, 5 | Repeller, 6 | SmoothnessPenalty, 7 | Waypoint, 8 | ) 9 | from .cpmp import CProMP 10 | from .plot_2d_normal import plot2dNormal 11 | from .unscented_transform import uTransform, uTransform_cholesky, uTransform_mViP 12 | -------------------------------------------------------------------------------- /opt_pmp_utils/basis_functions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class GaussianRBF(object): 5 | """GaussianRBF basis functions for ProMP research 6 | 7 | Contains only __init__ function which computes feature matrix and 8 | corresponding derivatives. 9 | 10 | Parameters: 11 | n_basis: Number of basis functions 12 | time_vec: one-dim vector of timepoints 13 | std_distance: modifies the standard deviation of the kernels, 14 | 1.0 = sigma is exactly the distance between centers 15 | 2.0 = sigma is twice the distance between centers, and so on 16 | normalize_features: True = sum of basis functions at each timepoint is 1 17 | c_t_delta: allows basis function centers outside of the given time range 18 | """ 19 | 20 | def __init__( 21 | self, 22 | n_basis: int, 23 | time_vec: np.ndarray = np.linspace(0, 1, num=101), 24 | std_distance: float = 1.0, 25 | normalize_features: bool = True, 26 | c_t_delta: float = 0.0, 27 | ): 28 | super(GaussianRBF, self).__init__() 29 | self.n_basis = n_basis 30 | self.time_vec = time_vec 31 | self.std_distance = std_distance 32 | self.normalize_features = normalize_features 33 | self.c_t_delta = c_t_delta 34 | 35 | self.t = self.time_vec.reshape(-1, 1) 36 | self.T = self.t.size 37 | self.dt = np.gradient(self.time_vec) 38 | 39 | self.center = np.linspace( 40 | self.t[0] - self.c_t_delta, self.t[-1] + self.c_t_delta, num=self.n_basis 41 | ).reshape((-1, 1)) 42 | self.sig_b = np.abs(self.center[1] - self.center[0]) / self.std_distance 43 | self.dX2 = ( 44 | np.matmul(self.t**2, np.ones((1, self.n_basis))) 45 | - 2 * np.matmul(self.t, self.center.transpose()) 46 | + np.matmul(np.ones((self.T, 1)), self.center.transpose() ** 2) 47 | ) 48 | 49 | self.X = np.exp(-0.5 * self.dX2 / (self.sig_b**2)) 50 | if self.normalize_features: 51 | self.X = self.X / np.sum(self.X, axis=1)[..., np.newaxis] 52 | 53 | self.dX = ( 54 | self.X 55 | * ( 56 | np.ones([self.T, 1]) @ self.center.transpose() 57 | - self.t @ np.ones([1, self.n_basis]) 58 | ) 59 | / (self.sig_b**2), 60 | ) 61 | self.ddX = self.X * (self.dX2 - (self.sig_b**2)) / (self.sig_b**4) 62 | 63 | 64 | def main(): 65 | n_basis = 20 66 | bf = GaussianRBF(n_basis) 67 | import matplotlib.pyplot as plt 68 | 69 | fig, ax = plt.subplots(1, 1) 70 | for i in range(n_basis): 71 | ax.plot(bf.time_vec, bf.X[:, i]) 72 | plt.show() 73 | 74 | 75 | if __name__ == "__main__": 76 | main() 77 | -------------------------------------------------------------------------------- /opt_pmp_utils/constraints.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import opt_einsum 3 | import scipy as sp 4 | import tensorflow.compat.v1 as tf 5 | import tensorflow_probability as tfp 6 | from matplotlib.patches import FancyArrowPatch 7 | 8 | from opt_pmp_utils.unscented_transform import uTransform 9 | 10 | 11 | def FK(q): 12 | x = tf.cos(q[..., 0]) + tf.cos(q[..., 0] + q[..., 1]) 13 | y = tf.sin(q[..., 0]) + tf.sin(q[..., 0] + q[..., 1]) 14 | return tf.stack([x, y], axis=1) 15 | 16 | 17 | class Arrow3d(FancyArrowPatch): 18 | def __init__(self, x_dx, y_dy, z_dz, *args, **kwargs): 19 | FancyArrowPatch.__init__(self, (0, 0), (0, 0), *args, **kwargs) 20 | self.x_dx = x_dx 21 | self.y_dy = y_dy 22 | self.z_dz = z_dz 23 | 24 | def draw(self, renderer): 25 | from mpl_toolkits.mplot3d import proj3d 26 | 27 | x, y, z = proj3d.proj_transform(self.x_dx, self.y_dy, self.z_dz, renderer.M) 28 | self.set_positions((x[0], y[0]), (x[1], y[1])) 29 | FancyArrowPatch.draw(self, renderer) 30 | 31 | 32 | class Constraint(object): 33 | """Base class for different constraints in the CPMP framework 34 | 35 | Parameters: 36 | lagrange_learning_rate: learning rate for lagrange multiplier 37 | alpha: desired strictness of prob. constraint 38 | transform_fcn: nonlinear transformation into constraint space 39 | floatT: tensorflow type used for variable initialisation 40 | """ 41 | 42 | def __init__( 43 | self, 44 | lagrange_learning_rate: float, 45 | alpha: float, 46 | transform_fcn, 47 | floatT=tf.float64, 48 | ): 49 | super(Constraint, self).__init__() 50 | self.lagrange_learning_rate = lagrange_learning_rate 51 | self.alpha = alpha 52 | self.transform_fcn = transform_fcn 53 | self.floatT = floatT 54 | 55 | def evaluate(self): 56 | """Returns the constraint cost value (lag. mult. * const.)""" 57 | raise NotImplementedError() 58 | 59 | def update(self): 60 | """Updates the lag. mult. given the current opt. state""" 61 | raise NotImplementedError() 62 | 63 | def plot2D(self, ax): 64 | """Plots the constraint in a 2D-plot""" 65 | raise NotImplementedError() 66 | 67 | def plot3D(self, ax): 68 | """Plots the constraint in a 3D-plot""" 69 | raise NotImplementedError() 70 | 71 | def get_violations(self, paths): 72 | """Returns boolean mask of which paths violate the constraint""" 73 | return tf.zeros(paths.shape[0], dtype=tf.bool) 74 | raise NotImplementedError() 75 | 76 | def info(self): 77 | """Prints information about the constraint""" 78 | print("\tAlpha:\t\t{}\n".format(self.alpha)) 79 | print("\tTransformation:\t\t{}\n".format(self.transform_fcn)) 80 | print("\tTime Mask:\t\t{}\n".format(self.time_mask)) 81 | 82 | 83 | class SquareDistance2Point(Constraint): 84 | """Base class for distance based constraints 85 | 86 | Parameters: 87 | lagrange_learning_rate: learning rate for lagrange multiplier 88 | alpha: desired strictness of prob. constraint 89 | transform_fcn: nonlinear transformation into constraint space 90 | point: obstacle/waypoint location 91 | margin: spatial margin around obstacle/waypoint 92 | n_timesteps: number points in the timegrid 93 | time_mask: indicated at which timepoints the constraint is active (=1) 94 | floatT: tensorflow type used for variable initialisation 95 | lagrange_initial: initial value for lagrange multiplier 96 | """ 97 | 98 | def __init__( 99 | self, 100 | lagrange_learning_rate, 101 | alpha, 102 | transform_fcn, 103 | point, 104 | margin, 105 | n_timesteps, 106 | time_mask=1, 107 | floatT=tf.float64, 108 | lagrange_initial=1.0, 109 | ): 110 | with tf.variable_scope("SquareDistance2Point"): 111 | super(SquareDistance2Point, self).__init__( 112 | lagrange_learning_rate, 113 | alpha, 114 | transform_fcn, 115 | floatT=floatT, 116 | ) 117 | self.lagrange_var = tf.squeeze( 118 | tf.Variable( 119 | initial_value=lagrange_initial * np.ones(n_timesteps), 120 | dtype=self.floatT, 121 | ) 122 | ) 123 | self.lagrange_initial = lagrange_initial 124 | self.point = point 125 | self.margin = margin 126 | self.time_mask = time_mask 127 | self.n_timesteps = n_timesteps 128 | self._color = "k" 129 | self._init() 130 | 131 | def _init(self): 132 | pass 133 | 134 | def transform(self, q): 135 | d = self.transform_fcn(q) - self.point 136 | sqd = tf.einsum("...n,...n->...", d, d) 137 | return tf.expand_dims(sqd, -1) 138 | 139 | def evaluate(self, dist, lagrange_var, alpha=1e-1, kappa=0, beta=2.0): 140 | self.mean, self.covar = uTransform( 141 | dist, self.transform, alpha=alpha, kappa=kappa, beta=beta 142 | ) 143 | self.mean = tf.squeeze(self.mean) 144 | self.covar = tf.squeeze(self.covar) 145 | self.distd = self._target_dist() 146 | self.mass = self.distd.cdf((self.margin) ** 2) 147 | self.constraint = tf.squeeze(self._compute_constraint() * self.time_mask) 148 | return self.constraint * lagrange_var 149 | 150 | def evaluate_internal(self, dist): 151 | return tf.reduce_sum(self.evaluate(dist, self.lagrange_var)) 152 | 153 | def update(self, x): 154 | # print("Lagrange update") 155 | self.lagrange_var = self.lagrange_var * tf.exp( 156 | self.lagrange_learning_rate * self.constraint 157 | ) 158 | 159 | def plot2D(self, ax): 160 | from matplotlib.patches import Circle 161 | 162 | ax.scatter(self.point[0], self.point[1], c=self._color, marker="+", s=20) 163 | ax.add_artist( 164 | Circle(self.point, radius=self.margin, ec=self._color, fc="None", zorder=4) 165 | ) 166 | 167 | def plot3D(self, ax): 168 | u = np.linspace(0, 2 * np.pi, 100) 169 | v = np.linspace(0, np.pi, 100) 170 | x = self.point[0] + self.margin * np.outer(np.cos(u), np.sin(v)) 171 | y = self.point[1] + self.margin * np.outer(np.sin(u), np.sin(v)) 172 | z = self.point[2] + self.margin * np.outer(np.ones_like(u), np.cos(v)) 173 | 174 | ax.plot_surface( 175 | x, 176 | y, 177 | z, 178 | color=self._color, 179 | ec="k", 180 | linewidth=0.3, 181 | alpha=0.4, 182 | rstride=10, 183 | cstride=10, 184 | ) 185 | ax.scatter( 186 | self.point[0], self.point[1], self.point[2], c=self._color, marker="X", s=50 187 | ) 188 | 189 | def _compute_constraint(self): 190 | raise NotImplementedError() 191 | 192 | def _target_dist(self): 193 | raise NotImplementedError() 194 | 195 | def info(self): 196 | print("\tPoint:\t\t{}\n".format(self.point)) 197 | print("\tMargin:\t\t{}\n".format(self.margin)) 198 | if hasattr(self, "lagrange_initial"): 199 | print("\tLagrange initial:\t\t{}\n".format(self.lagrange_initial)) 200 | print("\tLagrange final:\t\t{}\n".format(self.lagrange_var.numpy)) 201 | super(SquareDistance2Point, self).info() 202 | 203 | 204 | class Repeller(SquareDistance2Point): 205 | """Repeller constraint 206 | 207 | Parameters: 208 | lagrange_learning_rate: learning rate for lagrange multiplier 209 | alpha: desired strictness of prob. constraint 210 | transform_fcn: nonlinear transformation into constraint space 211 | x_avoid: obstacle location 212 | margin: spatial margin around obstacle 213 | n_timesteps: number points in the timegrid 214 | time_mask: indicated at which timepoints the constraint is active (=1) 215 | floatT: tensorflow type used for variable initialisation 216 | lagrange_initial: initial value for lagrange multiplier 217 | """ 218 | 219 | def __init__( 220 | self, 221 | lagrange_learning_rate, 222 | alpha, 223 | transform_fcn, 224 | x_avoid, 225 | margin, 226 | n_timesteps, 227 | time_mask=1, 228 | floatT=tf.float64, 229 | lagrange_initial=1.0, 230 | ): 231 | with tf.variable_scope("Repeller"): 232 | super(Repeller, self).__init__( 233 | lagrange_learning_rate, 234 | alpha, 235 | transform_fcn, 236 | x_avoid, 237 | margin, 238 | n_timesteps, 239 | time_mask=time_mask, 240 | floatT=floatT, 241 | lagrange_initial=lagrange_initial, 242 | ) 243 | self._color = "k" 244 | 245 | def _compute_constraint(self): 246 | return self.mass - self.alpha 247 | 248 | def _target_dist(self): 249 | self.rate = self.mean / tf.squeeze(self.covar) 250 | self.concentration = self.mean**2 / tf.squeeze(self.covar) 251 | return tfp.distributions.Gamma(self.concentration, self.rate) 252 | 253 | def get_violations(self, paths): 254 | # paths: sxtxd 255 | distances = tf.norm(self.point - paths, axis=-1) 256 | min_distance = tf.reduce_min(distances, axis=-1) 257 | violations = min_distance < self.margin 258 | return violations 259 | 260 | def info(self): 261 | print("Repeller constraint:\n") 262 | super(Repeller, self).info() 263 | print("\n") 264 | 265 | 266 | class Waypoint(SquareDistance2Point): 267 | """Waypoint constraint 268 | 269 | Parameters: 270 | lagrange_learning_rate: learning rate for lagrange multiplier 271 | alpha: desired strictness of prob. constraint 272 | transform_fcn: nonlinear transformation into constraint space 273 | waypoint: waypoint location 274 | margin: allowed spatial margin around waypoint 275 | n_timesteps: number points in the timegrid 276 | time_mask: indicated at which timepoints the constraint is active (=1) 277 | floatT: tensorflow type used for variable initialisation 278 | lagrange_initial: initial value for lagrange multiplier 279 | """ 280 | 281 | def __init__( 282 | self, 283 | lagrange_learning_rate, 284 | alpha, 285 | transform_fcn, 286 | waypoint, 287 | margin, 288 | n_timesteps, 289 | time_mask=1, 290 | floatT=tf.float64, 291 | lagrange_initial=1.0, 292 | ): 293 | with tf.variable_scope("Waypoint"): 294 | super(Waypoint, self).__init__( 295 | lagrange_learning_rate, 296 | alpha, 297 | transform_fcn, 298 | waypoint, 299 | margin, 300 | n_timesteps, 301 | time_mask=time_mask, 302 | floatT=tf.float64, 303 | lagrange_initial=lagrange_initial, 304 | ) 305 | 306 | def _compute_constraint(self): 307 | return 1 - self.mass - self.alpha 308 | 309 | def _target_dist(self): 310 | self.rate = self.mean / tf.squeeze(self.covar) 311 | self.concentration = self.mean**2 / tf.squeeze(self.covar) 312 | return tfp.distributions.Gamma(self.concentration, self.rate) 313 | 314 | def get_violations(self, paths): 315 | # paths: sxtxd 316 | distances = tf.norm(self.point - paths, axis=-1) 317 | min_distance = tf.reduce_min(distances, axis=-1) 318 | violations = min_distance > self.margin 319 | return violations 320 | 321 | def info(self): 322 | print("Waypoint constraint:\n") 323 | super(Waypoint, self).info() 324 | print("\n") 325 | 326 | 327 | class OneTimeWaypoint(Waypoint): 328 | """Temporally unbound waypoint constraint 329 | 330 | Parameters: 331 | lagrange_learning_rate: learning rate for lagrange multiplier 332 | alpha: desired strictness of prob. constraint 333 | transform_fcn: nonlinear transformation into constraint space 334 | waypoint: waypoint location 335 | margin: allowed spatial margin around waypoint 336 | n_timesteps: number points in the timegrid 337 | time_mask: indicated at which timepoints the constraint is active (=1) 338 | floatT: tensorflow type used for variable initialisation 339 | lagrange_initial: initial value for lagrange multiplier 340 | window_margin: constraint enforces being at waypoint for 341 | 2 * window_margin + 1 timesteps 342 | """ 343 | 344 | def __init__( 345 | self, 346 | lagrange_learning_rate, 347 | alpha, 348 | transform_fcn, 349 | waypoint, 350 | margin, 351 | n_timesteps, 352 | floatT=tf.float64, 353 | lagrange_initial=1.0, 354 | window_margin=1, 355 | ): 356 | with tf.variable_scope("Waypoint"): 357 | super(Waypoint, self).__init__( 358 | lagrange_learning_rate, 359 | alpha, 360 | transform_fcn, 361 | waypoint, 362 | margin, 363 | 1, 364 | time_mask=1.0, 365 | floatT=tf.float64, 366 | lagrange_initial=lagrange_initial, 367 | ) 368 | self.window_margin = window_margin 369 | self.window_size = 2 * window_margin + 1 370 | self.n_ts = n_timesteps 371 | 372 | def evaluate(self, dist, lagrange_var): 373 | self.mean, self.covar = uTransform(dist, self.transform) 374 | self.mean = tf.squeeze(self.mean) 375 | self.covar = tf.squeeze(self.covar) 376 | self.distd = self._target_dist() 377 | self.mass = self.distd.cdf((self.margin) ** 2) 378 | con_vals = self._compute_constraint() * self.time_mask 379 | self.min_idx = tf.math.argmin(con_vals) 380 | gather_start = tf.math.maximum( 381 | tf.constant(0, dtype=tf.dtypes.int64), self.min_idx - self.window_margin 382 | ) 383 | gather_start = tf.math.minimum( 384 | tf.constant(self.n_ts - self.window_size, dtype=tf.dtypes.int64), 385 | gather_start, 386 | ) 387 | gather_range = tf.range(self.window_size, dtype=tf.dtypes.int64) + gather_start 388 | self.constraint = tf.reduce_mean(tf.gather(con_vals, gather_range)) 389 | return self.constraint * lagrange_var 390 | 391 | def info(self): 392 | print("OneTimeWaypoint constraint:\n") 393 | super(OneTimeWaypoint, self).info() 394 | print("\n") 395 | 396 | 397 | class DualRobotAvoidance(SquareDistance2Point): 398 | """Dual robot avoidance constraint 399 | 400 | Parameters: 401 | lagrange_learning_rate: learning rate for lagrange multiplier 402 | alpha: desired strictness of prob. constraint 403 | transform_fcn: nonlinear transformation into constraint space, usually 404 | robot forward kinematics to the two corresponding robot links 405 | margin: desired distance between the robots 406 | n_timesteps: number points in the timegrid 407 | time_mask: indicated at which timepoints the constraint is active (=1) 408 | floatT: tensorflow type used for variable initialisation 409 | lagrange_initial: initial value for lagrange multiplier 410 | """ 411 | 412 | def __init__( 413 | self, 414 | lagrange_learning_rate, 415 | alpha, 416 | transform_fcn, 417 | margin, 418 | n_timesteps, 419 | time_mask=1, 420 | floatT=tf.float64, 421 | lagrange_initial=1.0, 422 | ): 423 | with tf.variable_scope("DualRobotAvoidance"): 424 | super(DualRobotAvoidance, self).__init__( 425 | lagrange_learning_rate, 426 | alpha, 427 | transform_fcn, 428 | 0.0, 429 | margin, 430 | n_timesteps, 431 | time_mask=time_mask, 432 | floatT=floatT, 433 | lagrange_initial=lagrange_initial, 434 | ) 435 | self._color = "k" 436 | 437 | def _init(self): 438 | pass 439 | 440 | def transform(self, q): 441 | xA, xB = self.transform_fcn(q) 442 | d = xA - xB 443 | sqd = tf.einsum("...n,...n->...", d, d) 444 | return tf.expand_dims(sqd, -1) 445 | 446 | def _compute_constraint(self): 447 | return self.mass - self.alpha 448 | 449 | def _target_dist(self): 450 | self.rate = self.mean / tf.squeeze(self.covar) 451 | self.concentration = self.mean**2 / tf.squeeze(self.covar) 452 | return tfp.distributions.Gamma(self.concentration, self.rate) 453 | 454 | def info(self): 455 | print("DualRobotAvoidance constraint:\n") 456 | super(DualRobotAvoidance, self).info() 457 | print("\n") 458 | 459 | 460 | class ConvexConstraint(Constraint): 461 | """Convex constraint, can be used for virtual walls 462 | 463 | Parameters: 464 | lagrange_learning_rate: learning rate for lagrange multiplier 465 | alpha: desired strictness of prob. constraint 466 | transform_fcn: nonlinear transformation into constraint space 467 | normal_vectors: normal vector of the virtual wall 468 | intersection: one point on the virtual wall 469 | n_timesteps: number points in the timegrid 470 | time_mask: indicated at which timepoints the constraint is active (=1) 471 | floatT: tensorflow type used for variable initialisation 472 | lagrange_initial: initial value for lagrange multiplier 473 | vel_decay: unused? 474 | """ 475 | 476 | def __init__( 477 | self, 478 | lagrange_learning_rate, 479 | alpha, 480 | transform_fcn, 481 | normal_vectors, 482 | intersection, 483 | n_timesteps, 484 | time_mask=1, 485 | floatT=tf.float64, 486 | lagrange_initial=1.0, 487 | vel_decay=0.1, 488 | ): 489 | self.normal_vectors_np = np.array(normal_vectors) 490 | self.intersection_np = np.array(intersection) 491 | assert self.intersection_np.shape == self.normal_vectors_np.shape 492 | if len(self.normal_vectors_np.shape) == 1: 493 | self.normal_vectors_np = self.normal_vectors_np[np.newaxis, :] 494 | self.intersection_np = self.intersection_np[np.newaxis, :] 495 | with tf.variable_scope("ConvexConstraint"): 496 | super(ConvexConstraint, self).__init__( 497 | lagrange_learning_rate, 498 | alpha, 499 | transform_fcn, 500 | floatT=floatT, 501 | ) 502 | self.normal_vectors = tf.constant( 503 | value=self.normal_vectors_np, dtype=floatT 504 | ) # n_con x cart. dim 505 | self.intersection = tf.constant( 506 | value=self.intersection_np, dtype=floatT 507 | ) # n_con x cart. dim 508 | self.lagrange_var = tf.squeeze( 509 | tf.Variable( 510 | initial_value=lagrange_initial 511 | * np.ones([n_timesteps, self.normal_vectors_np.shape[0]]), 512 | dtype=floatT, 513 | ) 514 | ) 515 | self.lagrange_var_vel = tf.squeeze( 516 | tf.zeros( 517 | [n_timesteps, self.normal_vectors_np.shape[0]], 518 | dtype=floatT, 519 | ) 520 | ) 521 | self.std_normal = tfp.distributions.Normal( 522 | tf.constant(0.0, dtype=floatT), tf.constant(1.0, dtype=floatT) 523 | ) 524 | self.n_timesteps = n_timesteps 525 | self.time_mask = time_mask 526 | self.vel_decay = vel_decay 527 | self.lagrange_initial = lagrange_initial 528 | 529 | def get_violations(self, paths): 530 | # Paths = sxtxd 531 | # inter/normal = nxd 532 | distance = paths[:, :, tf.newaxis, :] - self.intersection # sxtxnxd 533 | direction = tf.einsum("...nd,nd->...n", distance, self.normal_vectors) # sxtxn 534 | max_direction = tf.math.reduce_max(direction, [1, 2]) 535 | violations = max_direction > 0 536 | return violations 537 | 538 | def evaluate(self, dist, lagrange_var): 539 | self.mean, self.covar = uTransform(dist, self.transform_fcn) 540 | self.V_proj_diag = opt_einsum.contract( 541 | "cd,...di,ci->...c", 542 | self.normal_vectors, 543 | self.covar, 544 | self.normal_vectors, 545 | backend="tensorflow", 546 | ) 547 | self.mass = self.std_normal.cdf( 548 | tf.einsum( 549 | "cd,...cd->...c", 550 | self.normal_vectors, 551 | self.mean[..., tf.newaxis, :] - self.intersection, 552 | ) 553 | / tf.sqrt(self.V_proj_diag) 554 | ) 555 | self.constraint = tf.squeeze((self.mass - self.alpha) * self.time_mask) 556 | 557 | return self.constraint * lagrange_var 558 | 559 | def evaluate_internal(self, dist): 560 | return tf.reduce_sum(self.evaluate(dist, self.lagrange_var)) 561 | 562 | def update(self, x): 563 | self.lagrange_var = self.lagrange_var * tf.exp( 564 | self.lagrange_learning_rate * self.constraint 565 | ) 566 | 567 | def _rotate2dVector(self, vec, rotation): 568 | rotMat = np.array( 569 | [ 570 | [np.cos(rotation), -np.sin(rotation)], 571 | [np.sin(rotation), np.cos(rotation)], 572 | ] 573 | ) 574 | return np.einsum("ij,...j->...i", rotMat, vec) 575 | 576 | def info(self): 577 | print("ConvexConstraint constraint:\n") 578 | print("\tNormal vec:\t\t{}\n".format(self.normal_vectors_np)) 579 | print("\tIntersection:\t\t{}\n".format(self.intersection_np)) 580 | if hasattr(self, "lagrange_initial"): 581 | print("\tLagrange initial:\t\t{}\n".format(self.lagrange_initial)) 582 | print("\tLagrange final:\t\t{}\n".format(self.lagrange_var.numpy)) 583 | super(ConvexConstraint, self).info() 584 | print("\n") 585 | 586 | def plot2D(self, ax): 587 | lvecs = self._rotate2dVector(self.normal_vectors_np, np.pi / 2) 588 | for nvec, inter, lvec in zip( 589 | self.normal_vectors_np, self.intersection_np, lvecs 590 | ): 591 | linep1 = inter + 10 * lvec 592 | linep2 = inter - 10 * lvec 593 | linepx = [linep1[0], linep2[0]] 594 | linepy = [linep1[1], linep2[1]] 595 | ax.plot(linepx, linepy, c="k") 596 | ax.arrow(inter[0], inter[1], nvec[0], nvec[1], color="k", width=0.02) 597 | 598 | def plot3D( 599 | self, ax, n_mesh=100, color="r", ec="None", alpha=0.4, rstride=10, cstride=10 600 | ): 601 | limits = np.array([[*ax.get_xlim()], [*ax.get_ylim()], [*ax.get_zlim()]]) 602 | pos = np.linspace(limits[:, 0], limits[:, 1], num=n_mesh) 603 | for nvec, inter in zip(self.normal_vectors_np, self.intersection_np): 604 | cond = nvec != 0 605 | idx = cond.nonzero()[0][0] 606 | cond[...] = False 607 | cond[idx] = True 608 | mesh = np.zeros([3, n_mesh, n_mesh]) 609 | a, b = np.meshgrid(pos[:, ~cond][:, 0], pos[:, ~cond][:, 1]) 610 | mesh[~cond, ...] = np.stack((a, b)) 611 | mesh[cond, ...] = ( 612 | -1 613 | / nvec[cond] 614 | * np.einsum("n,n...->...", nvec[~cond], mesh[~cond, ...]) 615 | ) 616 | x = inter[0] + mesh[0, ...] 617 | y = inter[1] + mesh[1, ...] 618 | z = inter[2] + mesh[2, ...] 619 | ax.plot_surface( 620 | x, 621 | y, 622 | z, 623 | color=color, 624 | ec=ec, 625 | alpha=alpha, 626 | rstride=rstride, 627 | cstride=cstride, 628 | ) 629 | ax.add_artist( 630 | Arrow3d( 631 | [inter[0], nvec[0]], 632 | [inter[1], nvec[1]], 633 | [inter[2], nvec[2]], 634 | arrowstyle="-|>", 635 | color=color, 636 | mutation_scale=10, 637 | lw=1, 638 | ) 639 | ) 640 | 641 | 642 | class Border1DConstraint(Constraint): 643 | """1 dimensional constraint, can be used for joint limits 644 | 645 | Parameters: 646 | lagrange_learning_rate: learning rate for lagrange multiplier 647 | alpha: desired strictness of prob. constraint 648 | transform_fcn: nonlinear transformation into constraint space 649 | dir_vector: +1 = upper limit, -1 = lower limit 650 | border: location of the 1-D limit 651 | n_timesteps: number points in the timegrid 652 | time_mask: indicated at which timepoints the constraint is active (=1) 653 | floatT: tensorflow type used for variable initialisation 654 | lagrange_initial: initial value for lagrange multiplier 655 | """ 656 | 657 | def __init__( 658 | self, 659 | lagrange_learning_rate, 660 | alpha, 661 | transform_fcn, 662 | dir_vector, 663 | border, 664 | n_timesteps, 665 | time_mask=1, 666 | floatT=tf.float64, 667 | lagrange_initial=1.0, 668 | ): 669 | self.lagrange_initial = lagrange_initial 670 | self.dir_vector_np = np.array(dir_vector) 671 | self.border_np = np.array(border) 672 | self.proj_dim = self.dir_vector_np.size 673 | if len(self.border_np.shape) == 0: 674 | self.border_np = self.border_np[np.newaxis, np.newaxis] 675 | self.border_np = np.tile(self.border_np, [n_timesteps, self.proj_dim]) 676 | elif len(self.border_np.shape) == 1: 677 | if self.border_np.size == n_timesteps: 678 | self.border_np = self.border_np[..., np.newaxis] 679 | self.border_np = np.tile(self.border_np, [1, self.proj_dim]) 680 | elif self.border_np.size == self.proj_dim: 681 | self.border_np = self.border_np[np.newaxis, ...] 682 | self.border_np = np.tile(self.border_np, [n_timesteps, 1]) 683 | else: 684 | raise RuntimeError( 685 | "Border vector shapes cannot be broadcasted to [n_timesteps x proj_dim]" 686 | ) 687 | assert self.border_np.shape[0] == n_timesteps 688 | assert self.border_np.shape[1] == self.proj_dim 689 | with tf.variable_scope("Border1DConstraint"): 690 | super(Border1DConstraint, self).__init__( 691 | lagrange_learning_rate, 692 | alpha, 693 | transform_fcn, 694 | floatT=floatT, 695 | ) 696 | self.dir_vector = tf.constant( 697 | value=self.dir_vector_np, dtype=floatT 698 | ) # proj. dim 699 | self.border = tf.constant( 700 | value=self.border_np, dtype=floatT 701 | ) # n_t x proj. dim 702 | self.lagrange_var = tf.constant( 703 | lagrange_initial * np.ones([n_timesteps, self.proj_dim]), 704 | dtype=floatT, 705 | ) # n_t x proj. dim 706 | self.std_normal = tfp.distributions.Normal( 707 | tf.constant(0.0, dtype=floatT), tf.constant(1.0, dtype=floatT) 708 | ) 709 | self.n_timesteps = n_timesteps 710 | self.time_mask = np.array(time_mask) 711 | if len(self.time_mask.shape) == 0: 712 | self.time_mask = self.time_mask[np.newaxis, np.newaxis] 713 | self.time_mask = np.tile(self.time_mask, [self.n_timesteps, self.proj_dim]) 714 | elif len(self.time_mask.shape) == 1: 715 | if self.time_mask.size == self.n_timesteps: 716 | self.time_mask = self.time_mask[..., np.newaxis] 717 | self.time_mask = np.tile(self.time_mask, [1, self.proj_dim]) 718 | elif self.time_mask.size == self.proj_dim: 719 | self.time_mask = self.time_mask[np.newaxis, ...] 720 | self.time_mask = np.tile(self.time_mask, [self.n_timesteps, 1]) 721 | else: 722 | raise RuntimeError( 723 | "Time mask shapes cannot be broadcasted to [n_timesteps x proj_dim]" 724 | ) 725 | assert self.time_mask.shape[0] == n_timesteps 726 | assert self.time_mask.shape[1] == self.proj_dim 727 | 728 | def evaluate(self, dist, lagrange_var): 729 | self.mean, self.covar = uTransform(dist, self.transform_fcn) 730 | self.V_diag = tf.linalg.diag_part(self.covar) 731 | # mean: n_t x proj. dim 732 | # border: n_t x proj. dim 733 | # V_diag: n_t x proj. dim 734 | self.mass = self.std_normal.cdf( 735 | self.dir_vector * (self.mean - self.border) / tf.sqrt(self.V_diag) 736 | ) 737 | # mass: n_t x proj. dim 738 | self.constraint = (self.mass - self.alpha) * self.time_mask 739 | 740 | return self.constraint * lagrange_var 741 | 742 | def evaluate_internal(self, dist): 743 | return tf.reduce_sum(self.evaluate(dist, self.lagrange_var)) 744 | 745 | def update(self, x): 746 | self.lagrange_var = self.lagrange_var * tf.exp( 747 | self.lagrange_learning_rate * self.constraint 748 | ) 749 | 750 | def info(self): 751 | print("Border1DConstraint constraint:\n") 752 | print("\tBorder :\t\t{}\n".format(self.border_np)) 753 | print("\tDirection:\t\t{}\n".format(self.dir_vector_np)) 754 | if hasattr(self, "lagrange_initial"): 755 | print("\tLagrange initial:\t\t{}\n".format(self.lagrange_initial)) 756 | print("\tLagrange final:\t\t{}\n".format(self.lagrange_var.numpy)) 757 | super(Border1DConstraint, self).info() 758 | print("\n") 759 | 760 | def plot(self, axs, tvec, color="k", linestyle="--"): 761 | if self.proj_dim == 1: 762 | axs = [axs] 763 | assert len(axs) == self.proj_dim 764 | for i in range(self.proj_dim): 765 | if self.time_mask.size == 1: 766 | axs[i].plot( 767 | tvec, self.border_np[:, i], color=color, linestyle=linestyle 768 | ) 769 | else: 770 | idx = self.time_mask[:, i] != 0 771 | axs[i].plot( 772 | tvec[idx], self.border_np[idx, i], color=color, linestyle=linestyle 773 | ) 774 | 775 | 776 | class SmoothnessPenalty(Constraint): 777 | """Smoothness Penalty, can be used as an additional cost-function 778 | 779 | Parameters: 780 | dn_phi: n-th derivative to be considered 781 | n_outputs: number of output channels of the ProMP (typically number of dims) 782 | dt: system delta t 783 | priority: Can be used to prioritize the smoothness of specific dimensions 784 | scale: multiplier for the cost function (balances wrt. KL) 785 | floatT: tensorflow type used for variable initialisation 786 | """ 787 | 788 | def __init__( 789 | self, 790 | dn_phi, 791 | n_outputs, 792 | dt, 793 | priority=[1.0], 794 | scale=1.0, 795 | floatT=tf.float64, 796 | ): 797 | with tf.variable_scope("SmoothnessPenalty"): 798 | super(SmoothnessPenalty, self).__init__( 799 | 1.0, 800 | 1.0, 801 | tf.identity, 802 | floatT=floatT, 803 | ) 804 | self.lagrange_var = tf.constant( 805 | 1.0, 806 | dtype=floatT, 807 | ) # Just a dummy, will not be used 808 | 809 | self.n_obs = n_outputs 810 | self.dt = dt 811 | self.scale = scale 812 | self.dn_phi = tf.constant(dn_phi, dtype=self.floatT) 813 | # Phi: n_basis x n_basis 814 | self.Phi = tf.transpose(dn_phi) @ dn_phi * self.dt 815 | self.priority = np.array(priority) 816 | if self.priority.size == 1: 817 | self.priority = self.priority * np.ones(self.n_obs) 818 | # Phi_stacked: (n_obs * n_basis) x (n_obs * n_basis) 819 | self.Phi_stacked = np.kron(np.diag(priority), self.Phi.numpy()) 820 | self.Phi_stacked = tf.constant(self.Phi_stacked, dtype=self.floatT) 821 | 822 | def evaluate(self, dist): 823 | self.mean = dist.mean() 824 | self.covar = dist.covariance() 825 | # mean: n_t x proj. dim 826 | # border: n_t x proj. dim 827 | # V_diag: n_t x proj. dim 828 | self.E_smooth = tf.reduce_sum( 829 | tf.einsum("b...,bc,c...->...", self.mean, self.Phi_stacked, self.mean) 830 | ) + tf.linalg.trace(self.Phi_stacked @ self.covar) 831 | self.penalty = self.E_smooth * self.scale 832 | return self.penalty 833 | 834 | def update(self, x): 835 | pass 836 | 837 | def info(self): 838 | print("SmoothnessPenalty:\n") 839 | print("\tScale :\t\t{}\n".format(self.scale)) 840 | print("\tPriority:\t\t{}\n".format(self.priority)) 841 | print("\n") 842 | 843 | 844 | class KLPenalty(Constraint): 845 | """KLPenalty adds another KL term and allows a transform fcn 846 | 847 | Parameters: 848 | prior: prior ProMP to compute KL against 849 | transform_fcn: (possibly nonlinear) transformation into penalty space 850 | scale: multiplier for the cost function (balances wrt. KL) 851 | use_uTransform: whether to use the unscented transformation with the given transform fcn. 852 | floatT: tensorflow type used for variable initialisation 853 | """ 854 | 855 | def __init__( 856 | self, 857 | prior, 858 | transform_fcn=tf.identity, 859 | use_uTransform=False, 860 | scale=1.0, 861 | floatT=tf.float64, 862 | ): 863 | with tf.variable_scope("KLPenalty"): 864 | super(KLPenalty, self).__init__( 865 | 1.0, 866 | 1.0, 867 | tf.identity, 868 | floatT=floatT, 869 | ) 870 | self.lagrange_var = tf.constant( 871 | 1.0, 872 | dtype=floatT, 873 | ) # Just a dummy, will not be used 874 | self.scale = scale 875 | self.prior = prior 876 | self.transform_fcn = transform_fcn 877 | self.use_uTransform = use_uTransform 878 | 879 | def evaluate(self, dist): 880 | if self.use_uTransform: 881 | self.mean, self.covar = uTransform(dist, self.transform_fcn) 882 | kl_dist = tfp.distributions.MultivariateNormalFullCovariance( 883 | self.mean, self.covar 884 | ) 885 | else: 886 | kl_dist = self.transform_fcn(dist) 887 | self.kl = tfp.distributions.kl_divergence(kl_dist, self.prior) 888 | self.penalty = self.kl * self.scale 889 | return self.penalty 890 | 891 | def update(self, x): 892 | pass 893 | 894 | def info(self): 895 | print("KLPenalty:\n") 896 | print("\tScale :\t\t{}\n".format(self.scale)) 897 | print("\tPrior:\t\t{}\n".format(self.prior)) 898 | print("\tTransform_fcn:\t\t{}\n".format(self.self.transform_fcn)) 899 | print("\tuse_uTransform:\t\t{}\n".format(self.self.use_uTransform)) 900 | print("\n") 901 | 902 | 903 | class NonConvexConstraint(Constraint): 904 | """Non convex constraint, can be used to describe corner constraints 905 | 906 | Parameters: 907 | lagrange_learning_rate: learning rate for lagrange multiplier 908 | alpha: desired strictness of prob. constraint 909 | transform_fcn: nonlinear transformation into constraint space 910 | normal_vectors: normal vectors of the two walls 911 | intersection: corner point 912 | n_timesteps: number points in the timegrid 913 | time_mask: indicated at which timepoints the constraint is active (=1) 914 | floatT: tensorflow type used for variable initialisation 915 | lagrange_initial: initial value for lagrange multiplier 916 | vel_decay: unused? 917 | """ 918 | 919 | def __init__( 920 | self, 921 | lagrange_learning_rate, 922 | alpha, 923 | transform_fcn, 924 | normal_vectors, 925 | intersection, 926 | n_timesteps, 927 | time_mask=1, 928 | floatT=tf.float64, 929 | lagrange_initial=1.0, 930 | vel_decay=0.1, 931 | ): 932 | self.lagrange_initial = lagrange_initial 933 | self.normal_vectors_np = np.array(normal_vectors) 934 | self.intersection_np = np.array(intersection) 935 | assert self.intersection_np.shape[-1] == self.normal_vectors_np.shape[-1] 936 | if ( 937 | len(self.normal_vectors_np.shape) == 2 938 | and len(self.intersection_np.shape) == 1 939 | ): 940 | self.normal_vectors_np = self.normal_vectors_np[np.newaxis, ...] 941 | self.intersection_np = self.intersection_np[np.newaxis, :] 942 | elif ( 943 | len(self.normal_vectors_np.shape) == 3 944 | and len(self.normal_vectors_np.shape) == 2 945 | ): 946 | # We have multiple constraints 947 | pass 948 | else: 949 | # Odd shapes 950 | raise RuntimeError( 951 | "Please check the shapes of the handed intersection and normal vector pairs" 952 | ) 953 | with tf.variable_scope("NonConvexConstraint"): 954 | super(NonConvexConstraint, self).__init__( 955 | lagrange_learning_rate, 956 | alpha, 957 | transform_fcn, 958 | floatT=floatT, 959 | ) 960 | self.normal_vectors = tf.constant( 961 | value=self.normal_vectors_np, dtype=floatT 962 | ) # n_con x 2 x cart. dim 963 | self.intersection = tf.constant( 964 | value=self.intersection_np, dtype=floatT 965 | ) # n_con x cart. dim 966 | self.lagrange_var = tf.squeeze( 967 | tf.Variable( 968 | initial_value=lagrange_initial 969 | * np.ones([n_timesteps, self.normal_vectors_np.shape[0]]), 970 | dtype=floatT, 971 | ) 972 | ) # n_ts x n_con or n_ts if n_con==1 973 | self.lagrange_var_vel = tf.squeeze( 974 | tf.zeros( 975 | [n_timesteps, self.normal_vectors_np.shape[0]], 976 | dtype=floatT, 977 | ) 978 | ) # n_ts x n_con or n_ts if n_con==1 979 | self.std_normal = tfp.distributions.Normal( 980 | tf.constant(0.0, dtype=floatT), tf.constant(1.0, dtype=floatT) 981 | ) 982 | self.n_timesteps = n_timesteps 983 | self.time_mask = np.array(time_mask) 984 | self.vel_decay = vel_decay 985 | if len(self.time_mask.shape) == 0: 986 | self.time_mask = self.time_mask[np.newaxis, np.newaxis] 987 | self.time_mask = np.tile(self.time_mask, [self.n_timesteps, 1]) 988 | elif self.time_mask.size == self.n_timesteps: 989 | self.time_mask = self.time_mask[:, np.newaxis] 990 | else: 991 | assert self.time_mask.shape[0] == self.n_timesteps 992 | assert self.time_mask.shape[1] == self.normal_vectors_np.shape[0] 993 | 994 | def evaluate(self, dist, lagrange_var): 995 | self.mean, self.covar = uTransform(dist, self.transform_fcn) 996 | self.V_proj_diag = opt_einsum.contract( 997 | "cnd,...di,cni->...cn", 998 | self.normal_vectors, 999 | self.covar, 1000 | self.normal_vectors, 1001 | backend="tensorflow", 1002 | ) # B x n_t x n_con x 2 1003 | self.mass = self.std_normal.cdf( 1004 | tf.einsum( 1005 | "cnd,...cd->...cn", 1006 | self.normal_vectors, 1007 | self.mean[..., tf.newaxis, :] - self.intersection, 1008 | ) 1009 | / tf.sqrt(self.V_proj_diag) 1010 | ) # B x n_t x n_con x 2 1011 | self.mass = tf.reduce_prod(self.mass, axis=-1) # B x n_t x n_con 1012 | self.constraint = tf.squeeze((self.mass - self.alpha) * self.time_mask) 1013 | 1014 | return self.constraint * lagrange_var 1015 | 1016 | def evaluate_internal(self, dist): 1017 | return tf.reduce_sum(self.evaluate(dist, self.lagrange_var)) 1018 | 1019 | def update(self, x): 1020 | self.lagrange_var = self.lagrange_var * tf.exp( 1021 | self.lagrange_learning_rate * self.constraint 1022 | ) 1023 | 1024 | def _rotate2dVector(self, vec, rotation): 1025 | rotMat = np.array( 1026 | [ 1027 | [np.cos(rotation), -np.sin(rotation)], 1028 | [np.sin(rotation), np.cos(rotation)], 1029 | ] 1030 | ) 1031 | return np.einsum("ij,...j->...i", rotMat, vec) 1032 | 1033 | def info(self): 1034 | print("NonConvexConstraint:\n") 1035 | print("\tNormal vectors:\t\t{}\n".format(self.normal_vectors_np)) 1036 | print("\tIntersection:\t\t{}\n".format(self.intersection)) 1037 | super(NonConvexConstraint, self).info() 1038 | print("\n") 1039 | 1040 | def plot2D(self, ax): 1041 | lvecs = self._rotate2dVector(self.normal_vectors_np, np.pi / 2) 1042 | # n_con x 2 x n_cart 1043 | for nvec, inter, lvec in zip( 1044 | self.normal_vectors_np, self.intersection_np, lvecs 1045 | ): 1046 | for bidx in (np.array([True, False]), np.array([False, True])): 1047 | if ( 1048 | np.tensordot( 1049 | np.squeeze(lvec[bidx]), np.squeeze(nvec[~bidx]), axes=1 1050 | ) 1051 | > 0 1052 | ): 1053 | lv = lvec[bidx] 1054 | else: 1055 | lv = -lvec[bidx] 1056 | lv = np.squeeze(lv) 1057 | nv = np.squeeze(nvec[bidx]) 1058 | linep1 = inter + 10 * lv 1059 | linep2 = inter 1060 | linepx = [linep1[0], linep2[0]] 1061 | linepy = [linep1[1], linep2[1]] 1062 | ax.plot(linepx, linepy, c="k") 1063 | ax.arrow( 1064 | inter[0] + 1.5 * lv[0], 1065 | inter[1] + 1.5 * lv[1], 1066 | nv[0], 1067 | nv[1], 1068 | color="k", 1069 | width=0.02, 1070 | ) 1071 | 1072 | def plot3D( 1073 | self, ax, n_mesh=100, color="r", ec="None", alpha=0.4, rstride=10, cstride=10 1074 | ): 1075 | raise NotImplementedError("plot3D for nonconvex missing") 1076 | limits = np.array([[*ax.get_xlim()], [*ax.get_ylim()], [*ax.get_zlim()]]) 1077 | pos = np.linspace(limits[:, 0], limits[:, 1], num=n_mesh) 1078 | for nvec, inter in zip(self.normal_vectors_np, self.intersection_np): 1079 | cond = nvec != 0 1080 | idx = cond.nonzero()[0][0] 1081 | cond[...] = False 1082 | cond[idx] = True 1083 | mesh = np.zeros([3, n_mesh, n_mesh]) 1084 | a, b = np.meshgrid(pos[:, ~cond][:, 0], pos[:, ~cond][:, 1]) 1085 | mesh[~cond, ...] = np.stack((a, b)) 1086 | mesh[cond, ...] = ( 1087 | -1 1088 | / nvec[cond] 1089 | * np.einsum("n,n...->...", nvec[~cond], mesh[~cond, ...]) 1090 | ) 1091 | x = inter[0] + mesh[0, ...] 1092 | y = inter[1] + mesh[1, ...] 1093 | z = inter[2] + mesh[2, ...] 1094 | ax.plot_surface( 1095 | x, 1096 | y, 1097 | z, 1098 | color=color, 1099 | ec=ec, 1100 | alpha=alpha, 1101 | rstride=rstride, 1102 | cstride=cstride, 1103 | ) 1104 | ax.add_artist( 1105 | Arrow3d( 1106 | [inter[0], nvec[0]], 1107 | [inter[1], nvec[1]], 1108 | [inter[2], nvec[2]], 1109 | arrowstyle="-|>", 1110 | color=color, 1111 | mutation_scale=10, 1112 | lw=1, 1113 | ) 1114 | ) 1115 | -------------------------------------------------------------------------------- /opt_pmp_utils/cpmp.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy as sp 3 | import tensorflow as tf 4 | import tensorflow_probability as tfp 5 | from tqdm import tqdm 6 | 7 | from opt_pmp_utils.basis_functions import GaussianRBF 8 | from opt_pmp_utils.utils import sp_spd_inv, tf_vectorize 9 | 10 | 11 | class CProMP(object): 12 | """Constrained ProMPs class 13 | 14 | Create object from class, use `add_constraint` and `add_penalty` to create your problem. 15 | Use the `fit` function to run the cpmp algorithm. 16 | 17 | Parameters: 18 | number_of_outputs: output dimensionality 19 | basis_fun: desired basis function class (typically GaussianRBF) 20 | const: list of constraint objects 21 | penalty: list of penalty objects 22 | """ 23 | 24 | def __init__( 25 | self, 26 | number_of_outputs, 27 | basis_fun=GaussianRBF(20, np.linspace(0, 1, 101)), 28 | const=[], # Constraints 29 | penalty=[], # Penalties 30 | floatT=tf.float64, 31 | ): 32 | self.n_obs = number_of_outputs 33 | self.basis_fun = basis_fun 34 | self.n_basis = basis_fun.n_basis 35 | self.n_weights = self.n_basis * self.n_obs 36 | self.const = const 37 | self.penalty = penalty 38 | self.floatT = floatT 39 | self.lagrange_updates = 0 40 | self.kl_scale = 1.0 41 | self.kl_no_corl = False 42 | self.kl_n_blocks = 1 43 | self._generate_features() 44 | 45 | def add_constraint(self, constraint): 46 | self.const.append(constraint) 47 | 48 | self.n_t_const = 0 49 | self.c_scatter_idx = [] 50 | for c in self.const: 51 | c_size = tf.size(c.lagrange_var) 52 | self.c_scatter_idx.append(tf.range(c_size)[:, tf.newaxis] + self.n_t_const) 53 | self.n_t_const += c_size.numpy() 54 | self.c_list = tf.zeros(self.n_t_const, dtype=self.floatT) 55 | 56 | def add_penalty(self, new_penalty): 57 | self.penalty.append(new_penalty) 58 | 59 | def _generate_features(self): 60 | # Extract features 61 | self.X = tf.constant(self.basis_fun.X, dtype=self.floatT) 62 | self.dX = tf.constant(self.basis_fun.dX, dtype=self.floatT) 63 | self.ddX = tf.constant(self.basis_fun.ddX, dtype=self.floatT) 64 | 65 | def _get_lagrange_list(self): 66 | # Return a list which includes all the lagrange variables 67 | lagrange_list = [] 68 | for c in self.const: 69 | lagrange_list.append(c.lagrange_var) 70 | return lagrange_list 71 | 72 | def tf_fun_graph(self, M, log_diag_L_V, off_diag_L_V): 73 | lagrange_list = self._get_lagrange_list() 74 | loss, c_list, kl, p_sum = self._tf_fun_graph( 75 | M, log_diag_L_V, off_diag_L_V, lagrange_list 76 | ) 77 | self.c_list = c_list 78 | self.kl_tmp = kl 79 | self.p_sum_tmp = p_sum 80 | return loss 81 | 82 | def graph(self, M, log_diag_L_V, off_diag_L_V): 83 | lagrange_list = self._get_lagrange_list() 84 | loss, c_list, kl, p_sum = self._graph( 85 | M, log_diag_L_V, off_diag_L_V, lagrange_list 86 | ) 87 | self.c_list = c_list 88 | self.kl_tmp = kl 89 | self.p_sum_tmp = p_sum 90 | return loss 91 | 92 | @tf.function 93 | def _tf_fun_graph(self, M, log_diag_L_V, off_diag_L_V, lagrange_list): 94 | return self._graph(M, log_diag_L_V, off_diag_L_V, lagrange_list) 95 | 96 | def _graph(self, M, log_diag_L_V, off_diag_L_V, lagrange_list): 97 | # The actual computation graph 98 | self.M = M 99 | self.M_vec = tf_vectorize(M, name="M_vec") 100 | self.L_V_tmp = tfp.math.fill_triangular(off_diag_L_V) 101 | self.L_V = self.L_V_tmp + tf.linalg.tensor_diag( 102 | -tf.linalg.diag_part(self.L_V_tmp) + tf.exp(log_diag_L_V) 103 | ) 104 | self.V = self.L_V @ tf.transpose(self.L_V) 105 | self.w_dist = tfp.distributions.MultivariateNormalTriL( 106 | loc=self.M_vec, scale_tril=self.L_V 107 | ) 108 | 109 | self.p_sum = tf.constant(0.0, dtype=self.floatT) 110 | for p in self.penalty: 111 | self.p_sum += tf.reduce_sum(p.evaluate(self.w_dist)) 112 | 113 | self.my_pp = self.X @ M 114 | self.X_lop = tf.linalg.LinearOperatorFullMatrix( 115 | self.X[:, tf.newaxis, :], name="LinearOperatorFullMatrix_X" 116 | ) 117 | self.X_stacked = tf.linalg.LinearOperatorKronecker( 118 | [ 119 | tf.linalg.LinearOperatorIdentity( 120 | num_rows=self.n_obs, dtype=self.floatT 121 | ), 122 | self.X_lop, 123 | ] 124 | ).to_dense() 125 | self.Vy_pp = tf.einsum( 126 | "tob,bj,tij->toi", self.X_stacked, self.V, self.X_stacked 127 | ) 128 | 129 | self.marginals = tfp.distributions.MultivariateNormalFullCovariance( 130 | loc=self.my_pp, 131 | covariance_matrix=self.Vy_pp, 132 | name="marginals", 133 | ) 134 | 135 | # KL between two multivariate gaussians 136 | self.mean_diff = tf.subtract(self.M_vec, self.m0, name="mean_diff") 137 | self.kl_var = ( 138 | -0.5 * 2.0 * self.sum_log_diag_L_Q0 139 | + 0.5 * tf.linalg.trace(self.Q0 @ self.V) 140 | - 0.5 * self.n_weights 141 | ) 142 | self.kl_mean = +0.5 * tf.einsum( 143 | "n,nm,m", self.mean_diff, self.Q0, self.mean_diff 144 | ) 145 | if self.kl_no_corl: 146 | for i in range(self.kl_n_blocks): 147 | idx_l = (self.n_weights // self.kl_n_blocks) * i 148 | idx_u = (self.n_weights // self.kl_n_blocks) * (i + 1) 149 | V_block = self.V[idx_l:idx_u, idx_l:idx_u] 150 | self.kl_var += -0.5 * tf.linalg.logdet(V_block) 151 | else: 152 | self.kl_var += -0.5 * 2.0 * tf.reduce_sum(log_diag_L_V) 153 | 154 | self.kl = tfp.distributions.kl_divergence(self.w_dist, self.prior) 155 | self.kl_old = self.kl_mean + self.kl_var 156 | 157 | # Sum up constraints 158 | self.c_sum = tf.constant(0.0, dtype=self.floatT) 159 | c_list = tf.zeros(self.n_t_const, dtype=self.floatT) 160 | for c, lagrange_var, idx in zip(self.const, lagrange_list, self.c_scatter_idx): 161 | c_val = c.evaluate(self.marginals, tf.ones_like(lagrange_var)) 162 | c_list = tf.tensor_scatter_nd_update(c_list, idx, tf.reshape(c_val, [-1])) 163 | self.c_sum += tf.reduce_sum(c_val * lagrange_var) 164 | 165 | # Lagrange loss 166 | self.loss = self.kl * self.kl_scale + self.c_sum + self.p_sum 167 | 168 | return self.loss, c_list, self.kl, self.p_sum 169 | 170 | def set_kl_scale(self, scale): 171 | self.kl_scale = tf.constant(scale, dtype=self.floatT) 172 | 173 | def set_prior(self, m0, V0, Q0=None): 174 | self.m0 = m0 175 | self.V0 = V0 176 | self.L_V0 = sp.linalg.cholesky(V0, lower=True) 177 | if Q0 is None: 178 | self.Q0 = sp_spd_inv(V0) 179 | else: 180 | self.Q0 = Q0 181 | self.L_Q0 = sp.linalg.cholesky(self.Q0, lower=True) 182 | self.sum_log_diag_L_Q0 = np.sum(np.log(np.diagonal(self.L_Q0))) 183 | self.prior = tfp.distributions.MultivariateNormalFullCovariance(m0, V0) 184 | 185 | def get_initial(self, assign=True): 186 | if assign: 187 | # Mean 188 | M0 = np.reshape(self.m0, [self.n_obs, self.n_basis]).transpose() 189 | 190 | # V diagonal 191 | L_V0_log_diag = np.log(np.diagonal(self.L_V0)) 192 | 193 | # V off-diagonal 194 | L_V0_off_diag = tf.cast( 195 | tfp.math.fill_triangular_inverse(self.L_V0), self.floatT 196 | ) 197 | return np.concatenate( 198 | [M0.flatten(), L_V0_log_diag.flatten(), L_V0_off_diag.numpy().flatten()] 199 | ) 200 | else: 201 | return np.zeros( 202 | int( 203 | self.n_weights 204 | + self.n_weights 205 | + (self.n_weights) * (self.n_weights + 1) / 2 206 | ) 207 | ) 208 | 209 | def update_lagrange(self): 210 | self.lagrange_updates += 1 211 | 212 | def _unroll_var(self): 213 | def f(x): 214 | start = 0 215 | end = self.n_basis * self.n_obs 216 | m = x[start:end] 217 | M = tf.reshape(m, [self.n_basis, self.n_obs]) 218 | start = end 219 | end += self.n_basis * self.n_obs 220 | log_diag_L_V = x[start:end] 221 | start = end 222 | off_diag_L_V = x[start:] 223 | return M, log_diag_L_V, off_diag_L_V 224 | 225 | return f 226 | 227 | def _closure(self, x): 228 | return tfp.math.value_and_gradient( 229 | lambda x: self.tf_fun_graph(*self._unroll_var()(x)), x 230 | ) 231 | 232 | def _callback(self, solution): 233 | paths = self.sample(self.n_paths_violations, x_opt=solution.position) 234 | # self.var_hist.append(solution.position.numpy()) 235 | self.loss_hist.append(solution.objective_value.numpy()) 236 | self.kl_hist.append(self.kl.numpy()) 237 | lam_vec = np.array([]) 238 | cval_vec = np.array([]) 239 | violations = tf.zeros([self.n_paths_violations], dtype=tf.bool) 240 | for c in self.const: 241 | violations = tf.math.logical_or(violations, c.get_violations(paths)) 242 | lam_vec = np.concatenate( 243 | [lam_vec, np.reshape(c.lagrange_var.numpy(), [-1])], axis=0 244 | ) 245 | cval_vec = np.concatenate( 246 | [cval_vec, np.reshape(c.constraint.numpy(), [-1])], axis=0 247 | ) 248 | perc_violations = np.sum(violations) / self.n_paths_violations 249 | self.violation_hist.append(perc_violations) 250 | pval_vec = [] 251 | for p in self.penalty: 252 | pval_vec.append(p.penalty) 253 | self.lam_hist.append(np.squeeze(np.array(lam_vec))) 254 | self.const_hist.append(np.squeeze(np.array(cval_vec))) 255 | self.penalty_hist.append(np.squeeze(np.array(pval_vec))) 256 | 257 | def _lbfgs_callback(self, state, pbar): 258 | c_max = tf.reduce_max(self.c_list).numpy() 259 | c_sum = tf.reduce_sum(self.c_list).numpy() 260 | pbar.set_postfix( 261 | { 262 | "fval": state.objective_value.numpy(), 263 | "kl": self.kl.numpy(), 264 | "c_sum": c_sum, 265 | "p_sum": self.p_sum_tmp.numpy(), 266 | "c_max": c_max, 267 | "grad_norm": tf.norm(state.objective_gradient).numpy(), 268 | }, 269 | refresh=False, 270 | ) 271 | 272 | c_step = c_max - self.c_max 273 | self.c_steps_sum += c_step - self.c_steps[0] 274 | # if self.c_steps_sum > 0 and c_max > 0: # and c_sum > 0: 275 | # state = state._replace(failed=tf.constant(True)) 276 | self._push_c_steps_queue(c_step) 277 | self.c_max = c_max 278 | return state 279 | 280 | def _push_c_steps_queue(self, c_step): 281 | self.c_steps = np.concatenate((self.c_steps[1:], [c_step]), axis=0) 282 | 283 | def fit( 284 | self, 285 | x0, 286 | max_iters=int(1e4), 287 | sub_iters=int(1e2), 288 | tolerance=1e-6, 289 | x_tolerance=0, 290 | f_relative_tolerance=0, 291 | const_tolerance=1e-6, 292 | f_improved_tolerance=1e-6, 293 | parallel_iterations=1, 294 | num_correction_pairs=10, 295 | first_iter=0, 296 | n_c_steps=40, 297 | callback=None, 298 | callback_freq=5, 299 | n_paths_violations=int(1e5), 300 | ): 301 | """ 302 | Parameters: 303 | x0: optimization starting point, use internal `get_initial` function 304 | max_iters: Maximum iterations of the cpmp algorithm (EMM steps) 305 | sub_iters: Maximum iterations of each inner-loop L-BFGS algorithm 306 | tolerance: L-BFGS tolerance 307 | x_tolerance: L-BFGS x_tolerance 308 | f_relative_tolerance: L-BFGS f_relative_tolerance 309 | const_tolerance: allowed constraint sum for the algorithm to terminate prematurely 310 | f_improved_tolerance: minimum improvement per lagrange update, otherwise algorithm can be terminated early 311 | parallel_iterations: L-BFGS parallel_iterations 312 | num_correction_pairs: L-BFGS num_correction_pairs 313 | first_iter: Maximum iterations for the first L-BFGS call 314 | n_c_steps: number of constraint steps to be considered for early abort criterion of the inner loop (currently disabled) 315 | callback: custom callback, gets called after every `callback_freq` inner loop iteration and gets the L-BFGS output 316 | callback_freq: callback frequency 317 | n_paths_violations: Paths sampled for evaluating constraint violations 318 | """ 319 | self.var_hist = [] 320 | self.loss_hist = [] 321 | self.kl_hist = [] 322 | self.lam_hist = [] 323 | self.const_hist = [] 324 | self.penalty_hist = [] 325 | self.violation_hist = [] 326 | 327 | self.n_paths_violations = n_paths_violations 328 | self.opt_iters = 0 329 | self.opt_evals = 0 330 | self.lam_iters = 0 331 | unroll = self._unroll_var() 332 | 333 | self.n_t_const = 0 334 | self.c_scatter_idx = [] 335 | for c in self.const: 336 | c_size = tf.size(c.lagrange_var) 337 | self.c_scatter_idx.append(tf.range(c_size)[:, tf.newaxis] + self.n_t_const) 338 | self.n_t_const += c_size.numpy() 339 | self.c_list = tf.zeros(self.n_t_const, dtype=self.floatT) 340 | 341 | opt_res = tfp.optimizer.lbfgs_minimize( 342 | self._closure, x0, max_iterations=first_iter 343 | ) 344 | _ = self.graph(*unroll(opt_res.position)) 345 | self._callback(opt_res) 346 | if not callback is None: 347 | callback(opt_res) 348 | pbar = tqdm(total=max_iters, desc="CProMP") 349 | while True: 350 | self.c_max = 1e6 351 | self.c_steps = -np.ones(n_c_steps) * 1e6 352 | self.c_steps_sum = np.sum(self.c_steps) 353 | 354 | opt_res = tfp.optimizer.lbfgs_minimize( 355 | self._closure, 356 | opt_res.position, 357 | max_iterations=sub_iters, 358 | tolerance=tolerance, 359 | x_tolerance=x_tolerance, 360 | f_relative_tolerance=f_relative_tolerance, 361 | parallel_iterations=parallel_iterations, 362 | num_correction_pairs=num_correction_pairs, 363 | one_step_callback=self._lbfgs_callback, 364 | # initial_inverse_hessian_estimate=opt_res.inverse_hessian_estimate, 365 | ) 366 | 367 | _ = self.graph(*unroll(opt_res.position)) 368 | self._callback(opt_res) 369 | 370 | max_lam = 0.0 371 | for c in self.const: 372 | c.update(opt_res.position) 373 | max_lam = np.maximum(max_lam, c.lagrange_var.numpy().max()) 374 | self.lam_iters += 1 375 | new_loss, new_grad = self._closure(opt_res.position) 376 | 377 | pbar.set_postfix( 378 | { 379 | "fval": opt_res.objective_value.numpy(), 380 | "kl": self.kl.numpy(), 381 | "c_sum": self.c_sum.numpy(), 382 | "p_sum": self.p_sum.numpy(), 383 | "max_lam": max_lam, 384 | "violations": self.violation_hist[-1], 385 | }, 386 | refresh=False, 387 | ) 388 | pbar.update(n=1) # may trigger a refresh 389 | 390 | self.opt_iters += opt_res.num_iterations.numpy() 391 | self.opt_evals += opt_res.num_objective_evaluations.numpy() 392 | 393 | # Callback 394 | if self.lam_iters % callback_freq == 0: 395 | if not (callback is None): 396 | callback(opt_res) 397 | 398 | if self.lam_iters > max_iters: 399 | break 400 | print(f"f_improved: {(new_loss - opt_res.objective_value).numpy()}") 401 | if ( 402 | opt_res.converged 403 | & ((new_loss - opt_res.objective_value) < f_improved_tolerance) 404 | & (self.c_sum < const_tolerance) 405 | ): 406 | break 407 | pbar.close() 408 | return opt_res 409 | 410 | def sample(self, number_of_samples, x_opt=None): 411 | if not x_opt is None: 412 | unroll = self._unroll_var() 413 | _ = self.graph(*unroll(x_opt)) 414 | w_samples = self.w_dist.sample(number_of_samples).numpy() 415 | w_samples = np.transpose( 416 | w_samples.reshape([number_of_samples, self.n_obs, self.n_basis]), [0, 2, 1] 417 | ) 418 | y_samples = np.einsum("tb,sbo->sto", self.X, w_samples) 419 | return y_samples 420 | 421 | def set_kl_computation(self, no_corl, n_blocks): 422 | self.kl_no_corl = no_corl 423 | self.kl_n_blocks = n_blocks 424 | 425 | 426 | def main(): 427 | import pickle 428 | 429 | import matplotlib.pyplot as plt 430 | 431 | from opt_pmp_utils import tf_allow_growth 432 | 433 | fprimitive = "real_robot/weights_grasp_lowVar_2020-01-29_16:00:06_processed" 434 | assign = False 435 | 436 | with open(fprimitive, mode="rb") as f: 437 | primitive = pickle.load(f) 438 | 439 | const = [] 440 | np.random.seed(7) 441 | n_b = 20 442 | n_b = primitive.n_basis 443 | n_o = 7 444 | n_o = primitive.n_outputs 445 | 446 | n_w = n_b * n_o 447 | m0 = np.random.randn(n_w) 448 | V0 = np.random.randn(n_w, n_w) 449 | V0 = 0.5 * (V0 + V0.T) 450 | V0 = V0 + n_w * np.eye(n_w) 451 | m0 = primitive.m_w 452 | V0 = primitive.V_w 453 | 454 | bf = GaussianRBF( 455 | n_b, 456 | np.linspace(0, 1, primitive.n_ts), 457 | std_distance=1.0, 458 | normalize_features=True, 459 | c_t_delta=0.1, 460 | ) 461 | 462 | model = CProMP(n_o, basis_fun=bf, const=const, floatT=tf.float64) 463 | model.set_prior(m0, V0, Q0=None) 464 | 465 | x0 = tf.constant(model.get_initial(assign)) 466 | 467 | opt_res = model.fit(x0, max_iters=1000) 468 | 469 | print("Converged: {}".format(opt_res.converged)) 470 | print("iterations: {}".format(model.opt_iters)) 471 | print("lagrange_updates: {}".format(model.lam_iters)) 472 | print("evaluations: {}".format(model.opt_evals)) 473 | x_opt = opt_res.position 474 | M, log_diag_L_V, off_diag_L_V = model._unroll_var()(x_opt) 475 | model.graph(M, log_diag_L_V, off_diag_L_V) 476 | m_opt = model.M_vec 477 | V_opt = model.V 478 | 479 | fig, axs = plt.subplots(4, 1) 480 | ax1 = plt.subplot2grid((4, 1), (0, 0)) 481 | ax2 = plt.subplot2grid((4, 1), (1, 0), rowspan=3) 482 | 483 | ax1.plot(m0, label="prior") 484 | ax1.plot(m_opt, label="posterior") 485 | ax1.set_ylabel("Mean") 486 | ax1.legend() 487 | image = ax2.imshow(np.abs(V_opt - V0)) 488 | ax2.set_title("V - V0") 489 | plt.colorbar(image, ax=ax2) 490 | plt.show() 491 | 492 | 493 | if __name__ == "__main__": 494 | main() 495 | -------------------------------------------------------------------------------- /opt_pmp_utils/plot_2d_normal.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from matplotlib.patches import Ellipse 3 | from scipy.stats import chi2 4 | 5 | 6 | def plot2dNormal(m, V, ax, conf=0.95, color="k", fc="None", linestyle="-", alpha=1.0): 7 | eigvals, eigvecs = np.linalg.eig(V) 8 | eigidx = np.argsort(eigvals) 9 | 10 | confVal = chi2.ppf(conf, 2) 11 | ax.add_artist( 12 | Ellipse( 13 | m, 14 | 2 * np.sqrt(confVal * eigvals[eigidx[0]]), 15 | 2 * np.sqrt(confVal * eigvals[eigidx[1]]), 16 | angle=180 17 | / np.pi 18 | * np.arctan(eigvecs[1, eigidx[0]] / eigvecs[0, eigidx[0]]), 19 | fc=fc, 20 | ec=color, 21 | linestyle=linestyle, 22 | linewidth=1.5, 23 | alpha=alpha, 24 | ) 25 | ) 26 | ax.scatter(m[0], m[1], c=color, marker="X", s=50) 27 | -------------------------------------------------------------------------------- /opt_pmp_utils/promp.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy as sp 3 | import scipy.linalg 4 | 5 | from opt_pmp_utils.basis_functions import GaussianRBF 6 | 7 | 8 | class ProMP(object): 9 | """Basic ProMP class 10 | 11 | Can fit to data with EM method. 12 | 13 | Parameters: 14 | number_of_outputs: output dimensionality 15 | basis_fun: desired basis function class (typically GaussianRBF) 16 | std_y: measurement noise standard deviation 17 | std_prior: Prior standard deviation 18 | robust: enables robust learning of the ProMP 19 | fit_noise: enables fitting measurement noise 20 | """ 21 | 22 | def __init__( 23 | self, 24 | number_of_outputs, 25 | basis_fun=GaussianRBF( 26 | 20, 27 | np.linspace(0, 1, 101), 28 | std_distance=1.0, 29 | normalize_features=True, 30 | c_t_delta=0.1, 31 | ), 32 | std_y=0.001, 33 | std_prior=100.0, 34 | robust=False, 35 | fit_noise=False, 36 | ): 37 | self.basis_fun = basis_fun 38 | self.n_basis = self.basis_fun.n_basis 39 | self.n_obs = number_of_outputs 40 | self.std_y = std_y 41 | self.std_prior = std_prior 42 | self.robust = robust 43 | self.fit_noise = fit_noise 44 | self._generate_features() 45 | 46 | def _generate_features(self): 47 | # Construct feature matrix 48 | self.X = self.basis_fun.X 49 | self.X_stacked = np.kron(np.eye(self.n_obs), self.X[:, np.newaxis, :]) 50 | 51 | self.m0 = np.zeros(self.n_basis * self.n_obs, np.float) 52 | self.V0 = np.eye(self.n_basis * self.n_obs, dtype=np.float) * ( 53 | self.std_prior**2 54 | ) 55 | self.Q0 = np.eye(self.n_basis * self.n_obs, dtype=np.float) / ( 56 | self.std_prior**2 57 | ) 58 | self.Vy = np.eye(self.n_obs, dtype=np.float) * (self.std_y**2) 59 | self.Qy = np.eye(self.n_obs, dtype=np.float) / (self.std_y**2) 60 | 61 | def _extract_block_diag(self, A, blength): 62 | multiples = A.shape[0] / blength 63 | if np.floor(multiples) != np.ceil(multiples): 64 | raise RuntimeError( 65 | "Matrix cannot be split evenly into blocks of the given blocklength" 66 | ) 67 | A_bd_ones = sp.linalg.block_diag( 68 | *[np.ones([blength, blength]) for _ in range(int(multiples))] 69 | ) 70 | return A * A_bd_ones 71 | 72 | def _sp_spd_inv(self, A): 73 | L_A = sp.linalg.cholesky(A, lower=True) 74 | rhs = sp.linalg.solve_triangular(L_A, np.eye(A.shape[0]), lower=True) 75 | A_inv = sp.linalg.solve_triangular(L_A.T, rhs) 76 | return 0.5 * (A_inv + A_inv.T) 77 | 78 | def e_step(self, y): 79 | Q = np.kron(self.Qy, self.X.transpose() @ self.X) + self.Q0 80 | Q = 0.5 * (Q + Q.T) 81 | V = self._sp_spd_inv(Q) 82 | rhs = self.Q0 @ self.m0 + np.ravel(self.X.transpose() @ y @ self.Qy, order="F") 83 | m = V @ rhs 84 | if self.fit_noise: 85 | M = np.transpose(np.reshape(m, [self.n_obs, self.n_basis])) 86 | error = y - self.X @ M 87 | sig_y_sq = np.einsum("to,to->o", error, error) + np.einsum( 88 | "tob,bj,toj->o", self.X_stacked, self.V0, self.X_stacked 89 | ) 90 | self.sig_y_sq += sig_y_sq 91 | return m, V 92 | 93 | def m_step(self, m_list, V_list): 94 | n_i = len(m_list) 95 | assert n_i == len(V_list) 96 | m = np.average(m_list, axis=0) 97 | m0 = np.reshape(m, [-1, 1]) 98 | V = [] 99 | for m_i, V_i in zip(m_list, V_list): 100 | m_i = m_i[..., np.newaxis] 101 | V.append(((m_i - m0) @ (m_i - m0).transpose() + V_i)) 102 | V = np.average(V, axis=0) 103 | if self.robust: 104 | V_block_diag = self._extract_block_diag(V, self.n_basis) 105 | N0 = 2 * (self.n_basis * self.n_obs + 1) 106 | V = 1 / (n_i + N0) * (n_i * V + N0 * V_block_diag) 107 | if self.fit_noise: 108 | self.sig_y_sq = self.sig_y_sq / n_i / self.basis_fun.T 109 | return m, V 110 | 111 | def EM(self, Y): 112 | m_list = [] 113 | V_list = [] 114 | if self.fit_noise: 115 | self.sig_y_sq = 0 116 | for y in Y: 117 | m_i, V_i = self.e_step(y) 118 | m_list.append(m_i) 119 | V_list.append(V_i) 120 | m0, V0 = self.m_step(m_list, V_list) 121 | return m0, V0 122 | 123 | def fit(self, Y, em_iter): 124 | self.n_iter_em = em_iter 125 | for i in range(self.n_iter_em): 126 | m, V = self.EM(Y) 127 | self.m0 = m 128 | self.V0 = V 129 | self.Q0 = self._sp_spd_inv(self.V0) 130 | if self.fit_noise: 131 | self.Vy = np.diag(self.sig_y_sq) 132 | self.Qy = np.diag(1 / self.sig_y_sq) 133 | 134 | self.condition = np.linalg.cond(self.V0) 135 | 136 | def project(self): 137 | # Final values 138 | self.M = np.transpose(np.reshape(self.m0, [self.n_obs, self.n_basis])) 139 | # Posterior predictive distribution 140 | self.X_s = np.kron(np.eye(self.n_obs), self.X) 141 | self.my_pp = self.X @ self.M 142 | self.myt = np.einsum("tw,w->t", self.X_s, self.m0) 143 | # Final y variance 144 | self.Vyt = np.einsum("mb,bj,nj->mn", self.X_s, self.V0, self.X_s) 145 | # Compute Vy_pp via einsum 146 | self.Vy_pp = np.einsum( 147 | "tob,bj,tij->toi", self.X_stacked, self.V0, self.X_stacked 148 | ) 149 | 150 | def reset(self): 151 | self._generate_features() 152 | 153 | def add_via_point(self, point, cov, xt): 154 | xt_stacked = np.kron(np.eye(self.n_obs), xt) 155 | K = ( 156 | self.V0 157 | @ xt_stacked.T 158 | @ self._sp_spd_inv(cov + xt_stacked @ self.V0 @ xt_stacked.T) 159 | ) 160 | self.m0 = self.m0 + K @ (point - xt_stacked @ self.m0) 161 | self.V0 = self.V0 - K @ xt_stacked @ self.V0 162 | 163 | def plot_covariance_at_timeidx(self, timeidx): 164 | import matplotlib.pyplot as plt 165 | 166 | fig, axs = plt.subplots(1, 1) 167 | Vy_diag = np.sqrt(np.diag(self.Vy_pp[timeidx, :, :])[:, np.newaxis]) 168 | pos = axs.imshow(self.Vy_pp[timeidx, :, :] / (Vy_diag @ Vy_diag.T)) 169 | fig.colorbar(pos, ax=axs) 170 | axs.set_title("Covariance at timeidx {}".format(timeidx)) 171 | 172 | def sample(self, number_of_samples): 173 | w_samples = np.random.multivariate_normal( 174 | self.m0, self.V0, size=number_of_samples 175 | ) 176 | w_samples = np.transpose( 177 | w_samples.reshape([number_of_samples, self.n_obs, self.n_basis]), [0, 2, 1] 178 | ) 179 | y_samples = np.einsum("tb,sbo->sto", self.X, w_samples) 180 | return y_samples 181 | 182 | 183 | def test_promp(): 184 | import matplotlib.pyplot as plt 185 | 186 | from opt_pmp_utils.planarRobot import ControlledDualPlanarRobot 187 | 188 | np.random.seed(77) 189 | robust = False 190 | n_basis = 30 191 | n_iter_em = 10 192 | nLinks = 2 193 | nLinks_total = 2 * nLinks 194 | nSamples = 100 195 | pGain = 20 196 | originA = [-2, 0] 197 | originB = [2, 0] 198 | linkLength = 2 199 | robot = ControlledDualPlanarRobot( 200 | pGain=pGain, 201 | dGain=0.8 * 2 * np.sqrt(pGain), 202 | noise=2.0, 203 | nLinks=nLinks, 204 | linkLengthA=np.ones(2) * linkLength, 205 | linkLengthB=np.ones(2) * linkLength, 206 | originA=originA, 207 | originB=originB, 208 | ) 209 | 210 | q0 = np.zeros(nLinks * 4) # [q0A, dq0A, q0B, dq0B] 211 | q0[0] = np.pi / 2 212 | q0[nLinks_total] = np.pi / 2 213 | tEnd = 1.0 214 | dt = 0.01 215 | tVec = np.array([0.0, tEnd]) 216 | rVec = np.zeros([tVec.size, 2 * nLinks_total]) 217 | final_target_distance = 0.4 218 | target_angle = np.arcsin( 219 | (np.abs(originA[0] - originB[0]) - final_target_distance) / 4 / linkLength 220 | ) 221 | rVec[:, :nLinks] = np.ones([tVec.size, nLinks]) * [ 222 | np.pi / 2 - target_angle, 223 | -np.pi + (2 * target_angle), 224 | ] 225 | rVec[:, nLinks_total : nLinks_total + nLinks] = np.ones([tVec.size, nLinks]) * [ 226 | np.pi / 2 + target_angle, 227 | np.pi - (2 * target_angle), 228 | ] 229 | tEval = np.linspace(0, tEnd, num=int(tEnd // dt + 1)) 230 | mean, cov = robot.evolveMeanCov(q0, tVec, rVec, tEval) 231 | tSamples, samples = robot.sample(nSamples, q0, tVec, rVec, tEnd, dt) 232 | 233 | samplesA, samplesB = robot._split_q(samples) 234 | samplesA = samplesA[:, :, :nLinks] 235 | samplesB = samplesB[:, :, :nLinks] 236 | bf = GaussianRBF( 237 | n_basis, 238 | time_vec=tSamples, 239 | std_distance=1.0, 240 | normalize_features=True, 241 | c_t_delta=0.1, 242 | ) 243 | pmpA = ProMP(nLinks, bf) 244 | pmpB = ProMP(nLinks, bf) 245 | pmpA.fit(samplesA, n_iter_em) 246 | pmpB.fit(samplesB, n_iter_em) 247 | pmpA.project() 248 | pmpB.project() 249 | 250 | fig, axs = plt.subplots(nLinks, 2, figsize=(10, 10)) 251 | for j in range(2): 252 | for i in range(nLinks): 253 | idx = i + j * nLinks_total 254 | didx = idx + nLinks 255 | 256 | # Plot position 257 | axs[i, j].plot(tEval, mean[:, idx], "b", label="mean") 258 | axs[i, j].fill_between( 259 | tEval, 260 | mean[:, idx] - 3 * np.sqrt(cov[:, idx, idx]), 261 | mean[:, idx] + 3 * np.sqrt(cov[:, idx, idx]), 262 | color="b", 263 | alpha=0.2, 264 | # label="variance", 265 | ) 266 | art_s = axs[i, j].plot( 267 | tSamples, samples[:, :, idx].transpose(), "k", alpha=0.3 268 | ) 269 | art_s[0].set_label("samples") 270 | if j == 0: 271 | axs[i, j].set_title("qA{}".format(i)) 272 | mean_pmp = pmpA.my_pp[:, i] 273 | cov_pmp = pmpA.Vy_pp[:, i, i] 274 | else: 275 | axs[i, j].set_title("qB{}".format(i)) 276 | mean_pmp = pmpB.my_pp[:, i] 277 | cov_pmp = pmpB.Vy_pp[:, i, i] 278 | 279 | # Plot promp 280 | axs[i, j].plot(tEval, mean_pmp, "r", label="promp") 281 | axs[i, j].fill_between( 282 | tEval, 283 | mean_pmp - 3 * np.sqrt(cov_pmp), 284 | mean_pmp + 3 * np.sqrt(cov_pmp), 285 | color="r", 286 | alpha=0.2, 287 | # label="variance", 288 | ) 289 | axs[i, j].plot( 290 | tSamples, 291 | np.mean(samples[:, :, idx], axis=0), 292 | "g", 293 | linestyle="--", 294 | alpha=1.0, 295 | label="sample mean", 296 | ) 297 | leg = axs[i, j].legend() 298 | for l in leg.get_lines(): 299 | l.set_alpha(1) 300 | plt.show() 301 | 302 | 303 | if __name__ == "__main__": 304 | test_promp() 305 | -------------------------------------------------------------------------------- /opt_pmp_utils/unscented_transform.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def _uTransform( 5 | normalDist, transform, tf_sqrt=tf.linalg.sqrtm, alpha=1e-1, beta=2.0, kappa=-1 6 | ): 7 | Ndim = tf.squeeze(normalDist.event_shape_tensor()) # Event shape 8 | NdimF = tf.cast(Ndim, normalDist.dtype) 9 | lam = (alpha**2 + kappa) * NdimF # some scaling 10 | # Compute weighting vectors for computing mean and covariance from samples 11 | # See 12 | weigthsMean0 = tf.ones([1], dtype=normalDist.dtype) * lam / (NdimF + lam) 13 | weigthsMean1 = tf.ones([2 * Ndim], dtype=normalDist.dtype) / 2 / (NdimF + lam) 14 | weigthsMean = tf.concat((weigthsMean0, weigthsMean1), axis=0) 15 | weightsCovariance0 = tf.ones([1], dtype=normalDist.dtype) * lam / (NdimF + lam) + ( 16 | 1 - alpha**2 + beta 17 | ) 18 | weightsCovariance1 = tf.ones(2 * Ndim, dtype=normalDist.dtype) / 2 / (NdimF + lam) 19 | weightsCovariance = tf.concat((weightsCovariance0, weightsCovariance1), axis=0) 20 | 21 | # Distance from mean based on square root of the covariance 22 | # sigM: n x n x B 23 | # Is already transposed so we have col vectors in first dimension 24 | n_batch_dim = len(normalDist.batch_shape) 25 | sigM = tf.transpose( 26 | tf_sqrt((NdimF + lam) * normalDist.covariance()), 27 | perm=[n_batch_dim + 1, *range(n_batch_dim), n_batch_dim], 28 | ) 29 | 30 | # All samples are either mean or mean +/- sigM -> repeat mean 31 | tile_arg = tf.concat( 32 | [ 33 | tf.ones_like(normalDist.event_shape_tensor()) * (2 * Ndim + 1), 34 | tf.ones_like(normalDist.batch_shape_tensor()), 35 | tf.ones_like(normalDist.event_shape_tensor()), 36 | ], 37 | axis=0, 38 | ) 39 | # Mean: (2n+1) x B x n 40 | Mean = tf.tile(normalDist.mean()[tf.newaxis, ...], tile_arg) 41 | # samplePoints: (2n+1) x B x n 42 | samplePoints = Mean + tf.concat( 43 | (tf.zeros_like(normalDist.mean())[tf.newaxis, ...], sigM, -sigM), 0 44 | ) 45 | transformedPoints = transform(samplePoints) # transformedPoints: (2n+1) x B x o 46 | # Batch matmul [transformedMean: B x o] 47 | transformedMean = tf.einsum("m,m...->...", weigthsMean, transformedPoints) 48 | meanDiff = transformedPoints - transformedMean # meanDiff: (2n+1) x B x o 49 | # Weighted outer product over sample points 50 | Vy = tf.einsum( 51 | "m...o,mn,n...l->...ol", 52 | meanDiff, 53 | tf.linalg.tensor_diag(weightsCovariance), 54 | meanDiff, 55 | ) # Vy: B x o x o 56 | return transformedMean, Vy, transformedPoints 57 | 58 | 59 | def _uTransform_mV( 60 | normalDist, transform, tf_sqrt=tf.linalg.sqrtm, alpha=1e-1, beta=2.0, kappa=-1 61 | ): 62 | m, V, _ = _uTransform( 63 | normalDist, 64 | transform, 65 | tf_sqrt=tf_sqrt, 66 | alpha=alpha, 67 | beta=beta, 68 | kappa=kappa, 69 | ) 70 | return m, V 71 | 72 | 73 | def uTransform(normalDist, transform, alpha=1e-1, beta=2.0, kappa=-1): 74 | return _uTransform_mV( 75 | normalDist, 76 | transform, 77 | tf_sqrt=tf.linalg.sqrtm, 78 | alpha=alpha, 79 | beta=beta, 80 | kappa=kappa, 81 | ) 82 | 83 | 84 | def uTransform_cholesky(normalDist, transform, alpha=1e-1, beta=2.0, kappa=-1): 85 | return _uTransform_mV( 86 | normalDist, 87 | transform, 88 | tf_sqrt=tf.linalg.cholesky, 89 | alpha=alpha, 90 | beta=beta, 91 | kappa=kappa, 92 | ) 93 | 94 | 95 | def uTransform_mViP(normalDist, transform, alpha=1e-1, beta=2.0, kappa=-1): 96 | return _uTransform( 97 | normalDist, 98 | transform, 99 | tf_sqrt=tf.linalg.sqrtm, 100 | alpha=alpha, 101 | beta=beta, 102 | kappa=kappa, 103 | ) 104 | -------------------------------------------------------------------------------- /opt_pmp_utils/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import scipy as sp 5 | import tensorflow as tf 6 | 7 | 8 | def tf_vectorize(A, name=None): 9 | # Vectorize Matrix A 10 | return tf.reshape(tf.transpose(A), [-1], name=name) 11 | 12 | 13 | def sp_spd_inv(A): 14 | """Efficient inverse computation for symmetric positive definite matrices""" 15 | L_A = sp.linalg.cholesky(A, lower=True) 16 | rhs = sp.linalg.solve_triangular(L_A, np.eye(A.shape[0]), lower=True) 17 | return sp.linalg.solve_triangular(L_A.T, rhs) 18 | 19 | 20 | def rotate2dVector(vec, rotation): 21 | """Rotate a given 2d-vector vec by rotation given in rad""" 22 | rotMat = np.array( 23 | [[np.cos(rotation), -np.sin(rotation)], [np.sin(rotation), np.cos(rotation)]] 24 | ) 25 | return np.einsum("ij,...j->...i", rotMat, vec) 26 | 27 | 28 | def safe_makedir(path): 29 | dir = os.path.dirname(path) 30 | if not os.path.exists(dir): 31 | os.makedirs(dir) 32 | 33 | 34 | def tf_power_iteration(A, n_steps): 35 | # Tensorflow implementation of power iteration 36 | x_k = tf.ones(tf.shape(A)[:-1]) 37 | 38 | for _ in range(n_steps): 39 | x_k1 = tf.einsum("...ij,...j->...i", A, x_k) 40 | x_k = x_k1 / tf.norm(x_k1, axis=-1)[..., tf.newaxis] 41 | 42 | return x_k 43 | -------------------------------------------------------------------------------- /quant_experiment/2d_env_vipmp_obsAv.py: -------------------------------------------------------------------------------- 1 | # 2-D environment with randomly placed obstacles to avoid 2 | 3 | import datetime 4 | import os 5 | import pickle 6 | import time 7 | from collections.abc import Container 8 | 9 | import h5py 10 | import matplotlib.pyplot as plt 11 | import numpy as np 12 | import tensorflow as tf 13 | import tensorflow_probability as tfp 14 | from box import Box 15 | from trajectory_env import TrajectoryEnv 16 | 17 | from opt_pmp_utils.constraints import Repeller 18 | from opt_pmp_utils.cpmp import CProMP 19 | from opt_pmp_utils.utils import safe_makedir 20 | 21 | 22 | def generate_vipmp_callback(model, env, path): 23 | safe_makedir(path) 24 | 25 | def callback(opt_res): 26 | cwd = os.getcwd() 27 | os.chdir(path) 28 | fig, axs = env.compare_show(10, model, model.marginals) 29 | axs[3].set_title("Adapted (CPMP)") 30 | axs[3].text( 31 | -3.0, 3.5, f"iter={model.lam_iters}, viol.={model.violation_hist[-1]}" 32 | ) 33 | fig.savefig(f"vipmp_iter_{model.lam_iters}.png") 34 | plt.close(fig) 35 | with h5py.File(f"vipmp_iter_{model.lam_iters}.hdf5", mode="w") as f: 36 | f.create_dataset("opt_res.position", data=opt_res.position) 37 | os.chdir(cwd) 38 | 39 | return callback 40 | 41 | 42 | def main(): 43 | # tf.random.set_seed(777) 44 | # np.random.seed(777) 45 | n_experiments = 3 # Number of experiments to run 46 | nt = 101 # Number of points in the timegrid 47 | nbasis_pmp = 10 # Number of basis functions for the ProMP 48 | n_dim = 2 # only 2 is supported 49 | n_paths_violations = int( 50 | 1e5 51 | ) # Number of paths sampled to test for violations of the constraints 52 | n_obstacles = [1, 2, 3] # Number of obstacles, can be list of ints or int 53 | n_via_points = 2 # Number of via-points for the initial ProMP (only 2 is supported) 54 | via_point_var = ( 55 | 1e-3 # Variance for conditioning the original ProMP on the via-points 56 | ) 57 | pmp_prior_var = 0.8 # Prior ProMP variance 58 | rep_margin_min = 0.3 # Minimum radius of the obstacle 59 | rep_margin_max = 0.6 # Maximum radius of the obstacle 60 | # VIPMP Parameter 61 | n_iter_cpmp = 10 # Maximum iterations of the CPMP algorithm 62 | n_sub_iter_cpmp = 500 # Maximum iterations of the sub L-BFGS algorithm 63 | callback_freq_vipmp = 5 # Frequency of saving an intermediate checkpoint 64 | lagrange_learning_rate = 1.0 # Learning rate of the lagrange multipliers 65 | lagrange_initial = 20.0 # Initial value of the lagrange multipliers 66 | alpha = 1e-3 # Strictness of the constraints 67 | ################## 68 | experiment_folder = f"output/experiments_obsAv/" 69 | 70 | for i in range(len(n_obstacles) if isinstance(n_obstacles, Container) else 1): 71 | for ii in range(n_experiments): 72 | exp_name = datetime.datetime.now().strftime("%Y_%m_%d-%H:%M:%S") 73 | env = TrajectoryEnv( 74 | n_dim, 75 | nt, 76 | nbasis_pmp, 77 | pmp_prior_var=pmp_prior_var, 78 | n_obstacles=n_obstacles[i] 79 | if isinstance(n_obstacles, Container) 80 | else n_obstacles, 81 | n_via_points=n_via_points, 82 | rep_margin_min=rep_margin_min, 83 | rep_margin_max=rep_margin_max, 84 | via_point_var=via_point_var, 85 | ) 86 | 87 | # CPMP CODE 88 | with tf.device("CPU"): 89 | model = CProMP(n_dim, basis_fun=env.bfun, const=[]) 90 | # Constraints 91 | for obs in env.obstacles: 92 | model.add_constraint( 93 | Repeller( 94 | lagrange_learning_rate, 95 | alpha, 96 | tf.identity, 97 | obs["point"], 98 | obs["margin"], 99 | nt, 100 | time_mask=1.0, 101 | lagrange_initial=lagrange_initial, 102 | ) 103 | ) 104 | cb = generate_vipmp_callback( 105 | model, env, experiment_folder + exp_name + "/" 106 | ) 107 | model.set_prior(env.pmp.m0, env.pmp.V0, env.pmp.Q0) 108 | x0 = tf.constant(model.get_initial(True)) 109 | try: 110 | start = time.time() 111 | opt_res = model.fit( 112 | x0, 113 | max_iters=int(n_iter_cpmp), 114 | sub_iters=int(n_sub_iter_cpmp), 115 | callback=cb, 116 | callback_freq=callback_freq_vipmp, 117 | n_paths_violations=n_paths_violations, 118 | f_improved_tolerance=1e-2, 119 | ) 120 | runtime = time.time() - start 121 | except Exception as e: 122 | print(e) 123 | continue 124 | print("Converged: {}".format(opt_res.converged)) 125 | print("iterations: {}".format(model.opt_iters)) 126 | print("lagrange_updates: {}".format(model.lam_iters)) 127 | print("evaluations: {}".format(model.opt_evals)) 128 | x_opt = opt_res.position 129 | M, log_diag_L_V, off_diag_L_V = model._unroll_var()(x_opt) 130 | model.graph(M, log_diag_L_V, off_diag_L_V) 131 | 132 | paths = model.sample(n_paths_violations) 133 | violations = tf.zeros([n_paths_violations], dtype=tf.bool) 134 | for const in model.const: 135 | violations = tf.math.logical_or(violations, const.get_violations(paths)) 136 | perc_violations = np.sum(violations) / n_paths_violations 137 | kl_2_prior = tfp.distributions.kl_divergence(model.w_dist, env.w_dist) 138 | 139 | fig, axs = env.compare_show(10, model, model.marginals) 140 | axs[3].set_title("Adapted (CPMP)") 141 | axs[3].text( 142 | -3.0, 3.5, f"iter={model.lam_iters}, viol.={model.violation_hist[-1]}" 143 | ) 144 | axs[3].text(-3.0, 3.0, f"kl={kl_2_prior}") 145 | fig.savefig(experiment_folder + exp_name + "/" + f"vipmp_final.png") 146 | plt.close(fig) 147 | 148 | save = Box( 149 | nt=nt, 150 | nbasis_pmp=nbasis_pmp, 151 | n_dim=n_dim, 152 | n_paths_violations=n_paths_violations, 153 | env=env, 154 | loss_hist=model.loss_hist, 155 | violation_hist=model.violation_hist, 156 | kl_hist=model.kl_hist, 157 | lam_hist=model.lam_hist, 158 | const_hist=model.const_hist, 159 | penalty_hist=model.penalty_hist, 160 | opt_res=opt_res, 161 | constraints=model.const, 162 | penalties=model.penalty, 163 | lam_iters=model.lam_iters, 164 | opt_iters=model.opt_iters, 165 | runtime=runtime, 166 | violations=perc_violations, 167 | kl_2_prior=kl_2_prior, 168 | n_iter_cpmp=n_iter_cpmp, 169 | n_sub_iter_cpmp=n_sub_iter_cpmp, 170 | callback_freq_vipmp=callback_freq_vipmp, 171 | lagrange_learning_rate=lagrange_learning_rate, 172 | alpha=alpha, 173 | ) 174 | 175 | with open( 176 | experiment_folder + exp_name + "/vipmp_final_save", mode="wb" 177 | ) as f: 178 | pickle.dump(save, f) 179 | del save 180 | # CPMP CODE 181 | 182 | 183 | if __name__ == "__main__": 184 | main() 185 | -------------------------------------------------------------------------------- /quant_experiment/2d_env_vipmp_vWall.py: -------------------------------------------------------------------------------- 1 | # 2-D environment with randomly placed virtual walls to avoid 2 | 3 | import copy 4 | import datetime 5 | import os 6 | import pickle 7 | import time 8 | from collections.abc import Container 9 | 10 | import h5py 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | import tensorflow as tf 14 | import tensorflow_probability as tfp 15 | from box import Box 16 | from trajectory_env import TrajectoryEnv 17 | 18 | from opt_pmp_utils.constraints import ConvexConstraint 19 | from opt_pmp_utils.cpmp import CProMP 20 | from opt_pmp_utils.utils import safe_makedir 21 | 22 | 23 | def generate_vipmp_callback(model, env, path): 24 | safe_makedir(path) 25 | 26 | def callback(opt_res): 27 | cwd = os.getcwd() 28 | os.chdir(path) 29 | # fig, axs = env.compare_show(10, model, model.marginals) 30 | # axs[3].set_title("Adapted (CPMP)") 31 | # axs[3].text( 32 | # -3.0, 3.5, f"iter={model.lam_iters}, viol.={model.violation_hist[-1]}" 33 | # ) 34 | # fig.savefig(f"vipmp_iter_{model.lam_iters}.png") 35 | # plt.close(fig) 36 | with h5py.File(f"vipmp_iter_{model.lam_iters}.hdf5", mode="w") as f: 37 | f.create_dataset("opt_res.position", data=opt_res.position) 38 | os.chdir(cwd) 39 | 40 | return callback 41 | 42 | 43 | def main(): 44 | # tf.random.set_seed(777) 45 | # np.random.seed(777) 46 | n_experiments = 3 # Number of experiments to run 47 | nt = 20 # Number of points in the timegrid 48 | nbasis_pmp = 10 # Number of basis functions for the ProMP 49 | n_dim = 2 # only 2 is supported 50 | n_paths_violations = int( 51 | 1e5 52 | ) # Number of paths sampled to test for violations of the constraints 53 | n_vWall = [1, 2, 3] # Number of sampled virtual walls, can be list of ints or int 54 | n_via_points = 2 # Number of via-points for the initial ProMP (only 2 is supported) 55 | via_point_var = ( 56 | 1e-3 # Variance for conditioning the original ProMP on the via-points 57 | ) 58 | pmp_prior_var = 0.8 # Prior ProMP variance 59 | # VIPMP Parameter 60 | n_iter_cpmp = 10 # Maximum iterations of the CPMP algorithm 61 | n_sub_iter_cpmp = 500 # Maximum iterations of the sub L-BFGS algorithm 62 | callback_freq_vipmp = 5 # Frequency of saving an intermediate checkpoint 63 | lagrange_learning_rate = 1.0 # Learning rate of the lagrange multipliers 64 | lagrange_initial = 40.0 # Initial value of the lagrange multipliers 65 | alpha = 1e-3 # Strictness of the constraints 66 | ################## 67 | experiment_folder = f"output/experiments_vWall/" 68 | 69 | for i in range(len(n_vWall) if isinstance(n_vWall, Container) else 1): 70 | for ii in range(n_experiments): 71 | exp_name = datetime.datetime.now().strftime("%Y_%m_%d-%H:%M:%S") 72 | env = TrajectoryEnv( 73 | n_dim, 74 | nt, 75 | nbasis_pmp, 76 | pmp_prior_var=pmp_prior_var, 77 | n_v_walls=n_vWall[i] if isinstance(n_vWall, Container) else n_vWall, 78 | n_via_points=n_via_points, 79 | via_point_var=via_point_var, 80 | ) 81 | 82 | # CPMP CODE 83 | with tf.device("CPU"): 84 | model = CProMP(n_dim, basis_fun=env.bfun, const=[]) 85 | # Constraints 86 | intersections = np.array([v["b"] for v in env.v_walls]) 87 | normal_vectors = np.array([v["n_vec"] for v in env.v_walls]) 88 | 89 | model.add_constraint( 90 | ConvexConstraint( 91 | lagrange_learning_rate, 92 | alpha, 93 | tf.identity, 94 | normal_vectors, 95 | intersections, 96 | nt, 97 | lagrange_initial=lagrange_initial, 98 | ) 99 | ) 100 | 101 | cb = generate_vipmp_callback( 102 | model, env, experiment_folder + exp_name + "/" 103 | ) 104 | model.set_prior(env.pmp.m0, env.pmp.V0, env.pmp.Q0) 105 | x0 = tf.constant(model.get_initial(True)) 106 | try: 107 | start = time.time() 108 | opt_res = model.fit( 109 | x0, 110 | max_iters=int(n_iter_cpmp), 111 | sub_iters=int(n_sub_iter_cpmp), 112 | callback=cb, 113 | callback_freq=callback_freq_vipmp, 114 | n_paths_violations=n_paths_violations, 115 | f_improved_tolerance=1e-2, 116 | ) 117 | runtime = time.time() - start 118 | except Exception as e: 119 | print(e) 120 | continue 121 | print("Converged: {}".format(opt_res.converged)) 122 | print("iterations: {}".format(model.opt_iters)) 123 | print("lagrange_updates: {}".format(model.lam_iters)) 124 | print("evaluations: {}".format(model.opt_evals)) 125 | x_opt = opt_res.position 126 | M, log_diag_L_V, off_diag_L_V = model._unroll_var()(x_opt) 127 | model.graph(M, log_diag_L_V, off_diag_L_V) 128 | 129 | paths = model.sample(n_paths_violations) 130 | violations = tf.zeros([n_paths_violations], dtype=tf.bool) 131 | for const in model.const: 132 | violations = tf.math.logical_or(violations, const.get_violations(paths)) 133 | perc_violations = np.sum(violations) / n_paths_violations 134 | kl_2_prior = tfp.distributions.kl_divergence(model.w_dist, env.w_dist) 135 | 136 | fig, axs = env.compare_show(10, model, model.marginals, show_obs=False) 137 | for c in model.const: 138 | c.plot2D(axs[2]) 139 | c.plot2D(axs[3]) 140 | axs[3].set_title("Adapted (CPMP)") 141 | axs[3].text( 142 | -3.0, 3.5, f"iter={model.lam_iters}, viol.={model.violation_hist[-1]}" 143 | ) 144 | axs[3].text(-3.0, 3.0, f"kl={kl_2_prior}") 145 | fig.savefig(experiment_folder + exp_name + "/" + f"vipmp_final.png") 146 | plt.close(fig) 147 | 148 | save = Box( 149 | nt=nt, 150 | nbasis_pmp=nbasis_pmp, 151 | n_dim=n_dim, 152 | n_paths_violations=n_paths_violations, 153 | env=env, 154 | loss_hist=model.loss_hist, 155 | violation_hist=model.violation_hist, 156 | kl_hist=model.kl_hist, 157 | lam_hist=model.lam_hist, 158 | const_hist=model.const_hist, 159 | penalty_hist=model.penalty_hist, 160 | opt_res=opt_res, 161 | constraints=model.const, 162 | penalties=model.penalty, 163 | lam_iters=model.lam_iters, 164 | opt_iters=model.opt_iters, 165 | runtime=runtime, 166 | violations=perc_violations, 167 | kl_2_prior=kl_2_prior, 168 | n_iter_cpmp=n_iter_cpmp, 169 | n_sub_iter_cpmp=n_sub_iter_cpmp, 170 | callback_freq_vipmp=callback_freq_vipmp, 171 | lagrange_learning_rate=lagrange_learning_rate, 172 | alpha=alpha, 173 | ) 174 | 175 | with open( 176 | experiment_folder + exp_name + "/vipmp_final_save", mode="wb" 177 | ) as f: 178 | pickle.dump(save, f) 179 | del save 180 | # CPMP CODE 181 | 182 | 183 | if __name__ == "__main__": 184 | main() 185 | -------------------------------------------------------------------------------- /quant_experiment/2d_env_vipmp_viaP.py: -------------------------------------------------------------------------------- 1 | # 2-D environment with randomly placed temporally unbound waypoints 2 | 3 | import copy 4 | import datetime 5 | import os 6 | import pickle 7 | import time 8 | from collections.abc import Container 9 | 10 | import h5py 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | import tensorflow as tf 14 | import tensorflow_probability as tfp 15 | from box import Box 16 | from trajectory_env import TrajectoryEnv 17 | 18 | from opt_pmp_utils.constraints import OneTimeWaypoint, SmoothnessPenalty 19 | from opt_pmp_utils.cpmp import CProMP 20 | from opt_pmp_utils.utils import safe_makedir 21 | 22 | 23 | def generate_vipmp_callback(model, env, path): 24 | safe_makedir(path) 25 | 26 | def callback(opt_res): 27 | cwd = os.getcwd() 28 | os.chdir(path) 29 | fig, axs = env.compare_show(10, model, model.marginals) 30 | axs[3].set_title("Adapted (CPMP)") 31 | axs[3].text( 32 | -3.0, 3.5, f"iter={model.lam_iters}, viol.={model.violation_hist[-1]}" 33 | ) 34 | fig.savefig(f"vipmp_iter_{model.lam_iters}.png") 35 | plt.close(fig) 36 | with h5py.File(f"vipmp_iter_{model.lam_iters}.hdf5", mode="w") as f: 37 | f.create_dataset("opt_res.position", data=opt_res.position) 38 | os.chdir(cwd) 39 | 40 | return callback 41 | 42 | 43 | def main(): 44 | # tf.random.set_seed(777) 45 | # np.random.seed(777) 46 | n_experiments = 3 # Number of experiments to run 47 | nt = 40 # Number of points in the timegrid 48 | nbasis_pmp = 20 # Number of basis functions for the ProMP 49 | n_dim = 2 # only 2 is supported 50 | n_paths_violations = int( 51 | 1e5 52 | ) # Number of paths sampled to test for violations of the constraints 53 | n_t_via_points = [ 54 | 1, 55 | 2, 56 | 3, 57 | ] # Number of sampled temporally unbound via-points, can be list of ints or int 58 | n_via_points = 2 # Number of via-points for the initial ProMP (only 2 is supported) 59 | via_point_var = ( 60 | 1e-3 # Variance for conditioning the original ProMP on the via-points 61 | ) 62 | pmp_prior_var = 0.8 # Prior ProMP variance 63 | tVia_margin_min = 0.05 # Minimum radius of the via-points 64 | tVia_margin_max = 0.2 # Maximum radius of the via-points 65 | smooth_scale = 0.0 # Scale of the smoothness penalty (0 = off) 66 | std_distance = 1.1 # unused (?) 67 | window_margin = 0 # Window margin of the temporally unbound via-point 68 | t_via_y_min = -0.7 # Minimum y-coordinate of the sampled via-points 69 | t_via_y_max = 0.7 # Minimum y-coordinate of the sampled via-points 70 | # VIPMP Parameter 71 | n_iter_cpmp = 10 # Maximum iterations of the CPMP algorithm 72 | n_sub_iter_cpmp = 500 # Maximum iterations of the sub L-BFGS algorithm 73 | callback_freq_vipmp = 5 # Frequency of saving an intermediate checkpoint 74 | lagrange_learning_rate = 1.0 # Learning rate of the lagrange multipliers 75 | lagrange_initial = 20.0 # Initial value of the lagrange multipliers 76 | alpha = 1e-3 # Strictness of the constraints 77 | ################## 78 | experiment_folder = f"output/experiments_viaP/" 79 | 80 | for i in range(len(n_t_via_points) if isinstance(n_t_via_points, Container) else 1): 81 | for ii in range(n_experiments): 82 | exp_name = datetime.datetime.now().strftime("%Y_%m_%d-%H:%M:%S") 83 | env = TrajectoryEnv( 84 | n_dim, 85 | nt, 86 | nbasis_pmp, 87 | pmp_prior_var=pmp_prior_var, 88 | n_obstacles=0, 89 | n_t_via_points=n_t_via_points[i] 90 | if isinstance(n_t_via_points, Container) 91 | else n_t_via_points, 92 | n_via_points=n_via_points, 93 | tVia_margin_min=tVia_margin_min, 94 | tVia_margin_max=tVia_margin_max, 95 | via_point_var=via_point_var, 96 | std_distance=std_distance, 97 | t_via_y_min=t_via_y_min, 98 | t_via_y_max=t_via_y_max, 99 | ) 100 | 101 | # CPMP CODE 102 | with tf.device("CPU"): 103 | model = CProMP(n_dim, basis_fun=env.bfun, const=[]) 104 | # Constraints 105 | for t_via in env.t_via_points: 106 | model.add_constraint( 107 | OneTimeWaypoint( 108 | lagrange_learning_rate, 109 | alpha, 110 | tf.identity, 111 | t_via["point"], 112 | t_via["margin"], 113 | nt, 114 | lagrange_initial=lagrange_initial, 115 | window_margin=1, 116 | ) 117 | ) 118 | model.add_penalty( 119 | SmoothnessPenalty( 120 | model.basis_fun.ddX, 121 | n_dim, 122 | model.basis_fun.dt[0], 123 | priority=np.ones(n_dim), 124 | scale=smooth_scale, 125 | ) 126 | ) 127 | cb = generate_vipmp_callback( 128 | model, env, experiment_folder + exp_name + "/" 129 | ) 130 | model.set_prior(env.pmp.m0, env.pmp.V0, env.pmp.Q0) 131 | x0 = tf.constant(model.get_initial(True)) 132 | try: 133 | start = time.time() 134 | opt_res = model.fit( 135 | x0, 136 | max_iters=int(n_iter_cpmp), 137 | sub_iters=int(n_sub_iter_cpmp), 138 | callback=cb, 139 | callback_freq=callback_freq_vipmp, 140 | n_paths_violations=n_paths_violations, 141 | f_improved_tolerance=1e-2, 142 | ) 143 | runtime = time.time() - start 144 | except Exception as e: 145 | print(e) 146 | continue 147 | print("Converged: {}".format(opt_res.converged)) 148 | print("iterations: {}".format(model.opt_iters)) 149 | print("lagrange_updates: {}".format(model.lam_iters)) 150 | print("evaluations: {}".format(model.opt_evals)) 151 | x_opt = opt_res.position 152 | M, log_diag_L_V, off_diag_L_V = model._unroll_var()(x_opt) 153 | model.graph(M, log_diag_L_V, off_diag_L_V) 154 | 155 | paths = model.sample(n_paths_violations) 156 | violations = tf.zeros([n_paths_violations], dtype=tf.bool) 157 | for const in model.const: 158 | violations = tf.math.logical_or(violations, const.get_violations(paths)) 159 | perc_violations = np.sum(violations) / n_paths_violations 160 | kl_2_prior = tfp.distributions.kl_divergence(model.w_dist, env.w_dist) 161 | 162 | fig, axs = env.compare_show(10, model, model.marginals) 163 | axs[3].set_title("Adapted (CPMP)") 164 | axs[3].text( 165 | -3.0, 3.5, f"iter={model.lam_iters}, viol.={model.violation_hist[-1]}" 166 | ) 167 | axs[3].text(-3.0, 3.0, f"kl={kl_2_prior}") 168 | fig.savefig(experiment_folder + exp_name + "/" + f"vipmp_final.png") 169 | plt.close(fig) 170 | 171 | save = Box( 172 | nt=nt, 173 | nbasis_pmp=nbasis_pmp, 174 | n_dim=n_dim, 175 | n_paths_violations=n_paths_violations, 176 | env=env, 177 | loss_hist=model.loss_hist, 178 | violation_hist=model.violation_hist, 179 | kl_hist=model.kl_hist, 180 | lam_hist=model.lam_hist, 181 | const_hist=model.const_hist, 182 | penalty_hist=model.penalty_hist, 183 | opt_res=opt_res, 184 | constraints=model.const, 185 | penalties=model.penalty, 186 | lam_iters=model.lam_iters, 187 | opt_iters=model.opt_iters, 188 | runtime=runtime, 189 | violations=perc_violations, 190 | kl_2_prior=kl_2_prior, 191 | n_iter_cpmp=n_iter_cpmp, 192 | n_sub_iter_cpmp=n_sub_iter_cpmp, 193 | callback_freq_vipmp=callback_freq_vipmp, 194 | lagrange_learning_rate=lagrange_learning_rate, 195 | alpha=alpha, 196 | ) 197 | 198 | with open( 199 | experiment_folder + exp_name + "/vipmp_final_save", mode="wb" 200 | ) as f: 201 | pickle.dump(save, f) 202 | del save 203 | # CPMP CODE 204 | 205 | 206 | if __name__ == "__main__": 207 | main() 208 | -------------------------------------------------------------------------------- /quant_experiment/analyse_obsAv.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | import pickle 4 | from glob import glob 5 | 6 | import numpy as np 7 | import pandas as pd 8 | 9 | 10 | def main(): 11 | folder = "output/experiments_obsAv" 12 | df = pd.DataFrame() 13 | cut_failed = True 14 | savefiles = [] 15 | experiments = [x for x in os.walk(folder)] 16 | for exp in experiments[1:]: 17 | if "vipmp_final_save" in exp[-1]: 18 | savefiles.append(exp[0] + "/vipmp_final_save") 19 | 20 | for sf in savefiles: 21 | with open(sf, mode="rb") as f: 22 | save_vipmp = pickle.load(f) 23 | n_obs = len(save_vipmp.env.obstacles) 24 | df = df.append( 25 | { 26 | "n_obstacles": len(save_vipmp.env.obstacles), 27 | "n_via_points": len(save_vipmp.env.via_points), 28 | "nt": save_vipmp.nt, 29 | "nbasis_pmp": save_vipmp.nbasis_pmp, 30 | "n_dim": save_vipmp.n_dim, 31 | "n_iter_cpmp": save_vipmp.n_iter_cpmp, 32 | "n_sub_iter_cpmp": save_vipmp.n_sub_iter_cpmp, 33 | "lagrange_learning_rate": save_vipmp.lagrange_learning_rate, 34 | "alpha": save_vipmp.alpha, 35 | "vipmp_runtime": save_vipmp.runtime, 36 | "vipmp_violations": save_vipmp.violations, 37 | "vipmp_kl": save_vipmp.kl_2_prior.numpy() / save_vipmp.nbasis_pmp, 38 | "vipmp_loss": save_vipmp.loss_hist, 39 | "vipmp_loss_size": len(save_vipmp.loss_hist), 40 | }, 41 | ignore_index=True, 42 | ) 43 | 44 | loss_hist = df 45 | print( 46 | "obs\tn\tfail\tviolations\t\tkl\tviolations(S)\t kl(S)\t\truntime\t\truntime(S)" 47 | ) 48 | for i in range(1, 4): 49 | df_o = df[df.n_obstacles == i] 50 | 51 | failed_vipmp = df_o.vipmp_violations > 0.3 52 | df_cut_vipmp = df_o[np.logical_not(failed_vipmp)] 53 | 54 | n = df_o.shape[0] 55 | print( 56 | f"{i}\t" 57 | f"{n}\t" 58 | f"{failed_vipmp.sum():>2}({failed_vipmp.sum()/n*100:.1f}%) " 59 | f"{df_o.vipmp_violations.mean()*100:.1f}% +/- {df_o.vipmp_violations.std()*100:.1f}%\t" 60 | f"{df_o.vipmp_kl.mean():>5.2f} +/- {df_o.vipmp_kl.std():.2f}\t" 61 | f"{df_cut_vipmp.vipmp_violations.mean()*100:.2f}% +/- {df_cut_vipmp.vipmp_violations.std()*100:.2f}%\t" 62 | f"{df_cut_vipmp.vipmp_kl.mean():>5.2f} +/- {df_cut_vipmp.vipmp_kl.std():.2f}\t" 63 | f"{df_o.vipmp_runtime.mean():.1f} +/- {df_o.vipmp_runtime.std():.1f}\t" 64 | f"{df_cut_vipmp.vipmp_runtime.mean():.1f} +/- {df_cut_vipmp.vipmp_runtime.std():.1f}\t" 65 | ) 66 | 67 | 68 | if __name__ == "__main__": 69 | main() 70 | -------------------------------------------------------------------------------- /quant_experiment/analyse_vWall.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | import pickle 4 | from glob import glob 5 | 6 | import numpy as np 7 | import pandas as pd 8 | 9 | 10 | def main(): 11 | folder = "output/experiments_vWall" 12 | df = pd.DataFrame() 13 | cut_failed = True 14 | savefiles = [] 15 | experiments = [x for x in os.walk(folder)] 16 | for exp in experiments[1:]: 17 | if "vipmp_final_save" in exp[-1]: 18 | savefiles.append(exp[0] + "/vipmp_final_save") 19 | 20 | for sf in savefiles: 21 | with open(sf, mode="rb") as f: 22 | save_vipmp = pickle.load(f) 23 | df = df.append( 24 | { 25 | "n_v_walls": len(save_vipmp.env.v_walls), 26 | "n_via_points": len(save_vipmp.env.via_points), 27 | "nt": save_vipmp.nt, 28 | "nbasis_pmp": save_vipmp.nbasis_pmp, 29 | "n_dim": save_vipmp.n_dim, 30 | "n_iter_cpmp": save_vipmp.n_iter_cpmp, 31 | "n_sub_iter_cpmp": save_vipmp.n_sub_iter_cpmp, 32 | "lagrange_learning_rate": save_vipmp.lagrange_learning_rate, 33 | "alpha": save_vipmp.alpha, 34 | "vipmp_runtime": save_vipmp.runtime, 35 | "vipmp_violations": save_vipmp.violations, 36 | "vipmp_kl": save_vipmp.kl_2_prior.numpy() / save_vipmp.nbasis_pmp, 37 | "vipmp_loss": save_vipmp.loss_hist, 38 | "vipmp_loss_size": len(save_vipmp.loss_hist), 39 | }, 40 | ignore_index=True, 41 | ) 42 | 43 | loss_hist = df 44 | print( 45 | "vWalls\tn\tfail\t\tviolations\tkl\t\t\tviolations(S)\tkl(S)\t\truntime\t\truntime(S)" 46 | ) 47 | for i in range(1, 4): 48 | df_o = df[df.n_v_walls == i] 49 | 50 | failed_vipmp = df_o.vipmp_violations > 0.3 51 | df_cut_vipmp = df_o[np.logical_not(failed_vipmp)] 52 | 53 | n = df_o.shape[0] 54 | print( 55 | f"{i}\t" 56 | f"{n}\t" 57 | f"{failed_vipmp.sum():>2}({failed_vipmp.sum()/n*100:>4.1f}%) " 58 | f"{df_o.vipmp_violations.mean()*100:>4.1f}% +/- {df_o.vipmp_violations.std()*100:>4.1f}%\t" 59 | f"{df_o.vipmp_kl.mean():>5.2f} +/- {df_o.vipmp_kl.std():>6.2f}\t" 60 | f"{df_cut_vipmp.vipmp_violations.mean()*100:.2f}% +/- {df_cut_vipmp.vipmp_violations.std()*100:.2f}%\t" 61 | f"{df_cut_vipmp.vipmp_kl.mean():>5.2f} +/- {df_cut_vipmp.vipmp_kl.std():>5.2f}\t" 62 | f"{df_o.vipmp_runtime.mean():>5.1f} +/- {df_o.vipmp_runtime.std():>5.1f}\t" 63 | f"{df_cut_vipmp.vipmp_runtime.mean():>5.1f} +/- {df_cut_vipmp.vipmp_runtime.std():>5.1f}\t" 64 | ) 65 | 66 | 67 | if __name__ == "__main__": 68 | main() 69 | -------------------------------------------------------------------------------- /quant_experiment/analyse_viaP.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | import pickle 4 | from glob import glob 5 | 6 | import numpy as np 7 | import pandas as pd 8 | 9 | 10 | def main(): 11 | folder = "output/experiments_viaP" 12 | df = pd.DataFrame() 13 | cut_failed = True 14 | savefiles = [] 15 | experiments = [x for x in os.walk(folder)] 16 | for exp in experiments[1:]: 17 | if "vipmp_final_save" in exp[-1]: 18 | savefiles.append(exp[0] + "/vipmp_final_save") 19 | 20 | for sf in savefiles: 21 | with open(sf, mode="rb") as f: 22 | save_vipmp = pickle.load(f) 23 | if hasattr(save_vipmp.env, "t_via_points"): 24 | n_viaP = len(save_vipmp.env.t_via_points) 25 | else: 26 | n_viaP = len(save_vipmp.env.obstacles) 27 | df = df.append( 28 | { 29 | "n_viaP": n_viaP, 30 | "n_via_points": len(save_vipmp.env.via_points), 31 | "nt": save_vipmp.nt, 32 | "nbasis_pmp": save_vipmp.nbasis_pmp, 33 | "n_dim": save_vipmp.n_dim, 34 | "n_iter_cpmp": save_vipmp.n_iter_cpmp, 35 | "n_sub_iter_cpmp": save_vipmp.n_sub_iter_cpmp, 36 | "lagrange_learning_rate": save_vipmp.lagrange_learning_rate, 37 | "alpha": save_vipmp.alpha, 38 | "vipmp_runtime": save_vipmp.runtime, 39 | "vipmp_violations": save_vipmp.violations, 40 | "vipmp_kl": save_vipmp.kl_2_prior.numpy() / save_vipmp.nbasis_pmp, 41 | "vipmp_loss": save_vipmp.loss_hist, 42 | "vipmp_loss_size": len(save_vipmp.loss_hist), 43 | }, 44 | ignore_index=True, 45 | ) 46 | 47 | loss_hist = df 48 | print( 49 | "viaP\tn\tfail\t\tviolations\tkl\t\t\tviolations(S)\tkl(S)\t\truntime\t\truntime(S)" 50 | ) 51 | for i in range(1, 4): 52 | df_o = df[df.n_viaP == i] 53 | 54 | failed_vipmp = df_o.vipmp_violations > 0.3 55 | df_cut_vipmp = df_o[np.logical_not(failed_vipmp)] 56 | 57 | n = df_o.shape[0] 58 | print( 59 | f"{i}\t" 60 | f"{n}\t" 61 | f"{failed_vipmp.sum():>2}({failed_vipmp.sum()/n*100:>4.1f}%) " 62 | f"{df_o.vipmp_violations.mean()*100:>4.1f}% +/- {df_o.vipmp_violations.std()*100:>4.1f}%\t" 63 | f"{df_o.vipmp_kl.mean():>5.2f} +/- {df_o.vipmp_kl.std():>6.2f}\t" 64 | f"{df_cut_vipmp.vipmp_violations.mean()*100:.2f}% +/- {df_cut_vipmp.vipmp_violations.std()*100:.2f}%\t" 65 | f"{df_cut_vipmp.vipmp_kl.mean():>5.2f} +/- {df_cut_vipmp.vipmp_kl.std():>5.2f}\t" 66 | f"{df_o.vipmp_runtime.mean():>5.1f} +/- {df_o.vipmp_runtime.std():>5.1f}\t" 67 | f"{df_cut_vipmp.vipmp_runtime.mean():>5.1f} +/- {df_cut_vipmp.vipmp_runtime.std():>5.1f}\t" 68 | ) 69 | 70 | 71 | if __name__ == "__main__": 72 | main() 73 | -------------------------------------------------------------------------------- /quant_experiment/trajectory_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow_probability as tfp 3 | 4 | from opt_pmp_utils.basis_functions import GaussianRBF 5 | from opt_pmp_utils.promp import ProMP 6 | 7 | 8 | class TrajectoryEnv(object): 9 | """Creates a 2D-Test environment with a prior ProMP and a range 10 | of potential constraints, specifically: 11 | - obstacles for repeller constraints, 12 | - via-points for waypoint constraints, 13 | - virtual walls 14 | 15 | """ 16 | 17 | def __init__(self, n_dim, nt, nbasis_pmp, **kwargs): 18 | super(TrajectoryEnv, self).__init__() 19 | self.n_dim = n_dim 20 | self.nt = nt 21 | self.nbasis_pmp = nbasis_pmp 22 | self.t = np.linspace(0, 1, self.nt) 23 | 24 | # Gather default arguments 25 | kwargs, bfun_args, pmp_args = self._defaults(**kwargs) 26 | self.kwargs = kwargs 27 | 28 | # Setup basis function and ProMP 29 | self.bfun = GaussianRBF(n_basis=self.nbasis_pmp, time_vec=self.t, **bfun_args) 30 | self.pmp = ProMP(number_of_outputs=n_dim, basis_fun=self.bfun, **pmp_args) 31 | self.pmp.m0[: self.pmp.n_basis] = np.linspace( 32 | kwargs["x_min"], kwargs["x_max"], self.pmp.n_basis 33 | ) 34 | 35 | # Sample via-points 36 | self.via_points = [] 37 | x_points = np.linspace(kwargs["x_min"], kwargs["x_max"], kwargs["n_via_points"]) 38 | delta = kwargs["x_max"] - kwargs["x_min"] 39 | # x_points[1] = np.random.uniform(0.25 * delta, 0.75 * delta) + kwargs["x_min"] 40 | y_points = np.random.uniform( 41 | kwargs["y_min"], kwargs["y_max"], kwargs["n_via_points"] 42 | ) 43 | idx_points = np.linspace(0, self.nt - 1, kwargs["n_via_points"]) 44 | # idx_points[1] = np.random.uniform(0.33 * nt, 0.66 * nt) 45 | idx_points = idx_points.astype(np.int) 46 | for x, y, idx in zip(x_points, y_points, idx_points): 47 | self.via_points.append({"point": [x, y], "idx": idx}) 48 | self.pmp.add_via_point( 49 | [x, y], 50 | np.diag(np.ones(n_dim) * kwargs["via_point_var"]), 51 | self.pmp.X[idx], 52 | ) 53 | 54 | # Project ProMP 55 | self.pmp.project() 56 | self.w_dist = tfp.distributions.MultivariateNormalFullCovariance( 57 | self.pmp.m0, self.pmp.V0 58 | ) 59 | self.pmp_dist = tfp.distributions.MultivariateNormalFullCovariance( 60 | self.pmp.myt, self.pmp.Vyt + np.diag(np.ones(nt * n_dim) * 1e-6) 61 | ) 62 | self.marginals = tfp.distributions.MultivariateNormalFullCovariance( 63 | self.pmp.my_pp, self.pmp.Vy_pp 64 | ) 65 | 66 | # Sample obstacles 67 | self.obstacles = [] 68 | x_points = np.random.uniform( 69 | kwargs["obstacle_delta_x_min"], 70 | kwargs["obstacle_delta_x_max"], 71 | kwargs["n_obstacles"], 72 | ) 73 | y_points = np.random.uniform( 74 | kwargs["obstacle_delta_y_min"], 75 | kwargs["obstacle_delta_y_max"], 76 | kwargs["n_obstacles"], 77 | ) 78 | delta = 1 - kwargs["obstacles_field"] 79 | idx = np.round( 80 | np.random.uniform(delta * nt, (1 - delta) * nt, kwargs["n_obstacles"]) 81 | ) 82 | # margins = np.tile( 83 | # np.random.uniform(kwargs["rep_margin_min"], kwargs["rep_margin_max"]), 84 | # kwargs["n_obstacles"], 85 | # ) 86 | margins = np.random.uniform( 87 | kwargs["rep_margin_min"], 88 | kwargs["rep_margin_max"], 89 | kwargs["n_obstacles"], 90 | ) 91 | for x, y, margin, i in zip(x_points, y_points, margins, idx): 92 | self.obstacles.append( 93 | { 94 | "point": self.marginals[i].mean().numpy() + [x, y], 95 | "margin": margin, 96 | "delta": [x, y], 97 | } 98 | ) 99 | 100 | # Sample temp unbound via-points 101 | self.t_via_points = [] 102 | x_points = ( 103 | np.random.uniform(0.4, 0.6, kwargs["n_t_via_points"]) 104 | * (kwargs["x_max"] - kwargs["x_min"]) 105 | / (kwargs["n_t_via_points"] + 2) 106 | + np.linspace( 107 | kwargs["x_min"], kwargs["x_max"], kwargs["n_t_via_points"] + 3 108 | )[1:-2] 109 | ) 110 | y_mean = [] 111 | for x in x_points: 112 | idx = np.argmin(np.abs(self.marginals.mean()[:, 0] - x)) 113 | y_mean.append(self.marginals.mean()[idx, 1].numpy()) 114 | y_points = ( 115 | np.random.uniform( 116 | kwargs["t_via_y_min"], kwargs["t_via_y_max"], kwargs["n_t_via_points"] 117 | ) 118 | + y_mean 119 | ) 120 | margins = np.random.uniform( 121 | kwargs["tVia_margin_min"], 122 | kwargs["tVia_margin_max"], 123 | kwargs["n_t_via_points"], 124 | ) 125 | for x, y, margin in zip(x_points, y_points, margins): 126 | self.t_via_points.append( 127 | { 128 | "point": [x, y], 129 | "margin": margin, 130 | } 131 | ) 132 | 133 | # Sample virtual walls 134 | self.v_walls = [] 135 | start = np.array(self.via_points[0]["point"]) 136 | end = np.array(self.via_points[-1]["point"]) 137 | mid = (start + end) / 2 138 | delta_vec = end - start 139 | while len(self.v_walls) < kwargs["n_v_walls"]: 140 | if np.random.uniform() < 0.5: 141 | d_vec = self._rotate2dVector(delta_vec, np.pi / 2) 142 | else: 143 | d_vec = self._rotate2dVector(delta_vec, -np.pi / 2) 144 | d_vec = ( 145 | d_vec 146 | / np.linalg.norm(d_vec) 147 | * np.random.uniform(kwargs["v_wall_d_min"], kwargs["v_wall_d_max"]) 148 | ) 149 | b = mid + d_vec 150 | rot = np.random.uniform(-1.0, 1.0) * 2 * np.pi - np.pi 151 | n_vec = self._rotate2dVector(d_vec, rot) 152 | if np.dot(end - b, n_vec) > 0 or np.dot(start - b, n_vec) > 0: 153 | continue 154 | self.v_walls.append({"b": b, "n_vec": n_vec}) 155 | 156 | def _rotate2dVector(cls, vec, rotation): 157 | rotMat = np.array( 158 | [ 159 | [np.cos(rotation), -np.sin(rotation)], 160 | [np.sin(rotation), np.cos(rotation)], 161 | ] 162 | ) 163 | return np.einsum("ij,...j->...i", rotMat, vec) 164 | 165 | def sample(self, n_samples): 166 | return self.pmp.sample(n_samples) 167 | 168 | def _defaults(self, **kwargs): 169 | """ 170 | Default arguments, split in three categories: 171 | - Basis function arguments (bfun_args), concerning the 172 | ProMP basis functions 173 | - ProMP arguments (pmp_args), used during the creation 174 | of the ProMP 175 | - Environment arguments (kwargs), concerning the trajectory 176 | environment itself. Examples are the number of 177 | obstacles/ via-points, or the margins they require 178 | 179 | All arguments can be overwritten through kwargs given to the 180 | Environment creation 181 | """ 182 | bfun_args = {} 183 | pmp_args = {} 184 | 185 | if not "c_t_delta" in kwargs: 186 | bfun_args["c_t_delta"] = 0.1 187 | else: 188 | bfun_args["c_t_delta"] = kwargs["c_t_delta"] 189 | 190 | if not "normalize_features" in kwargs: 191 | bfun_args["normalize_features"] = True 192 | else: 193 | bfun_args["normalize_features"] = kwargs["normalize_features"] 194 | 195 | if not "std_distance" in kwargs: 196 | bfun_args["std_distance"] = 1.0 197 | else: 198 | bfun_args["std_distance"] = kwargs["std_distance"] 199 | 200 | if not "pmp_prior_var" in kwargs: 201 | pmp_args["std_prior"] = 1.0 202 | else: 203 | pmp_args["std_prior"] = kwargs["pmp_prior_var"] 204 | if not "x_min" in kwargs: 205 | kwargs["x_min"] = -3.0 206 | if not "x_max" in kwargs: 207 | kwargs["x_max"] = 3.0 208 | if not "y_min" in kwargs: 209 | kwargs["y_min"] = -3.0 210 | if not "y_max" in kwargs: 211 | kwargs["y_max"] = 3.0 212 | 213 | # Obstacle arguments 214 | if not "obstacle_delta_x_min" in kwargs: 215 | kwargs["obstacle_delta_x_min"] = -0.0 216 | if not "obstacle_delta_x_max" in kwargs: 217 | kwargs["obstacle_delta_x_max"] = 0.0 218 | if not "obstacle_delta_y_min" in kwargs: 219 | kwargs["obstacle_delta_y_min"] = -1.5 220 | if not "obstacle_delta_y_max" in kwargs: 221 | kwargs["obstacle_delta_y_max"] = 1.5 222 | 223 | # Initial ProMP via-point arguments 224 | if not "n_via_points" in kwargs: 225 | kwargs["n_via_points"] = 3 226 | if not "n_obstacles" in kwargs: 227 | kwargs["n_obstacles"] = 0 228 | if not "via_point_var" in kwargs: 229 | kwargs["via_point_var"] = 1e-1 230 | if not "rep_margin_min" in kwargs: 231 | kwargs["rep_margin_min"] = 0.5 232 | if not "rep_margin_max" in kwargs: 233 | kwargs["rep_margin_max"] = 1.6 234 | if not "obstacles_field" in kwargs: 235 | kwargs["obstacles_field"] = 0.8 236 | 237 | # Arguments for the constraint via-points 238 | if not "n_t_via_points" in kwargs: 239 | kwargs["n_t_via_points"] = 0 240 | if not "t_via_y_min" in kwargs: 241 | kwargs["t_via_y_min"] = -1.5 242 | if not "t_via_y_max" in kwargs: 243 | kwargs["t_via_y_max"] = 1.5 244 | if not "tVia_margin_min" in kwargs: 245 | kwargs["tVia_margin_min"] = 0.05 246 | if not "tVia_margin_max" in kwargs: 247 | kwargs["tVia_margin_max"] = 0.2 248 | 249 | # Virtual wall arguments 250 | if not "n_v_walls" in kwargs: 251 | kwargs["n_v_walls"] = 0 252 | if not "v_wall_d_min" in kwargs: 253 | kwargs["v_wall_d_min"] = 0.2 254 | if not "v_wall_d_max" in kwargs: 255 | kwargs["v_wall_d_max"] = 1.5 256 | 257 | return kwargs, bfun_args, pmp_args 258 | 259 | def _show_marginals(self, ax, dist, idx, color="b", alpha=0.2): 260 | ax.plot(self.t, dist.mean()[:, idx], c=color) 261 | ax.fill_between( 262 | self.t, 263 | dist.mean()[:, idx] - 3 * dist.stddev()[:, 0], 264 | dist.mean()[:, idx] + 3 * dist.stddev()[:, 0], 265 | color=color, 266 | alpha=alpha, 267 | ) 268 | ax.set_xlabel("Time") 269 | 270 | def _show_2d(self, ax, show_obs): 271 | from matplotlib.patches import Circle 272 | 273 | if show_obs: 274 | for ob in self.obstacles: 275 | ax.scatter( 276 | ob["point"][0], ob["point"][1], c="k", marker="+", s=20, zorder=4 277 | ) 278 | ax.add_artist( 279 | Circle( 280 | ob["point"], radius=ob["margin"], ec="k", fc="None", zorder=4 281 | ) 282 | ) 283 | 284 | for viaP in self.t_via_points: 285 | ax.scatter( 286 | viaP["point"][0], 287 | viaP["point"][1], 288 | c="k", 289 | marker="+", 290 | s=20, 291 | zorder=4, 292 | ) 293 | ax.add_artist( 294 | Circle( 295 | viaP["point"], 296 | radius=viaP["margin"], 297 | ec="k", 298 | fc="None", 299 | zorder=4, 300 | ) 301 | ) 302 | ax.set_xlabel("x") 303 | ax.set_ylabel("y") 304 | ax.set_xlim([self.kwargs["x_min"] - 1.0, self.kwargs["x_max"] + 1.0]) 305 | ax.set_ylim([self.kwargs["y_min"] - 1.0, self.kwargs["y_max"] + 1.0]) 306 | ax.set_aspect("equal", "box") 307 | 308 | def _show_2d_dist( 309 | self, ax, dist, marginals, n_samples, color="b", cov_alpha=0.3, path_alpha=0.3 310 | ): 311 | from opt_pmp_utils.plot_2d_normal import plot2dNormal 312 | 313 | paths = dist.sample(n_samples) 314 | ax.scatter( 315 | marginals.mean()[:, 0], 316 | marginals.mean()[:, 1], 317 | c=color, 318 | marker=".", 319 | s=1, 320 | zorder=3, 321 | ) 322 | for path in paths: 323 | if path.shape[-1] == self.n_dim: 324 | ax.plot(path[:, 0], path[:, 1], "k", alpha=path_alpha, zorder=5) 325 | else: 326 | ax.plot( 327 | path[: self.nt], path[self.nt :], "k", alpha=path_alpha, zorder=5 328 | ) 329 | 330 | for i in range(marginals.batch_shape_tensor()[0]): 331 | plot2dNormal( 332 | marginals.mean()[i], 333 | marginals.covariance()[i], 334 | ax, 335 | color="None", 336 | fc=color, 337 | alpha=cov_alpha, 338 | ) 339 | 340 | def show(self, n_samples, show_obs=True): 341 | import matplotlib.pyplot as plt 342 | 343 | fig, axs = plt.subplots(4, 2) 344 | ax = [] 345 | ax.append(plt.subplot2grid((4, 2), (0, 0), colspan=1)) 346 | ax.append(plt.subplot2grid((4, 2), (0, 1), colspan=1)) 347 | ax.append(plt.subplot2grid((4, 2), (1, 0), colspan=2, rowspan=3)) 348 | self._show_marginals(ax[0], self.marginals, 0, color="b", alpha=0.2) 349 | ax[0].set_ylabel("x") 350 | self._show_marginals(ax[1], self.marginals, 1, color="b", alpha=0.2) 351 | ax[1].set_ylabel("y") 352 | self._show_2d(ax[2], show_obs) 353 | self._show_2d_dist( 354 | ax[2], 355 | self.pmp_dist, 356 | self.marginals, 357 | n_samples, 358 | color="b", 359 | cov_alpha=0.3, 360 | path_alpha=0.3, 361 | ) 362 | fig.tight_layout() 363 | return fig, ax 364 | 365 | def compare_show(self, n_samples, opt_dist, marginals, show_obs=True): 366 | import matplotlib.pyplot as plt 367 | 368 | fig, axs = plt.subplots(4, 2) 369 | ax = [] 370 | ax.append(plt.subplot2grid((4, 2), (0, 0), colspan=1)) 371 | ax.append(plt.subplot2grid((4, 2), (0, 1), colspan=1)) 372 | ax.append(plt.subplot2grid((4, 2), (1, 0), colspan=1, rowspan=3)) 373 | ax.append(plt.subplot2grid((4, 2), (1, 1), colspan=1, rowspan=3)) 374 | self._show_marginals(ax[0], self.marginals, 0, color="b", alpha=0.2) 375 | self._show_marginals(ax[0], marginals, 0, color="r", alpha=0.2) 376 | ax[0].set_ylabel("x") 377 | self._show_marginals(ax[1], self.marginals, 1, color="b", alpha=0.2) 378 | self._show_marginals(ax[1], marginals, 1, color="r", alpha=0.2) 379 | ax[1].set_ylabel("y") 380 | self._show_2d(ax[2], show_obs) 381 | self._show_2d_dist( 382 | ax[2], 383 | self.pmp_dist, 384 | self.marginals, 385 | n_samples, 386 | color="b", 387 | cov_alpha=0.3, 388 | path_alpha=0.3, 389 | ) 390 | self._show_2d(ax[3], show_obs) 391 | self._show_2d_dist( 392 | ax[3], 393 | opt_dist, 394 | marginals, 395 | n_samples, 396 | color="r", 397 | cov_alpha=0.3, 398 | path_alpha=0.3, 399 | ) 400 | ax[0].set_title("x-Marginal") 401 | ax[1].set_title("y-Marginal") 402 | ax[2].set_title("Original") 403 | ax[3].set_title("Adapted") 404 | fig.tight_layout() 405 | return fig, ax 406 | 407 | 408 | def main(): 409 | import matplotlib.pyplot as plt 410 | 411 | env = TrajectoryEnv( 412 | 2, 21, 8, c_t_delta=0.1, pmp_prior_var=2.0, n_obstacles=0, n_t_via_points=3 413 | ) 414 | env.show(n_samples=10, show_obs=True) 415 | plt.show() 416 | 417 | 418 | if __name__ == "__main__": 419 | main() 420 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib==3.3.4 2 | pandas==1.1.5 3 | python-box==6.0.2 4 | scipy==1.4.1 5 | tensorflow==2.2.0 6 | tensorflow-probability==0.10.1 7 | tqdm==4.64.0 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("requirements.txt") as f: 4 | REQUIRED_PACKAGES = f.read().splitlines() 5 | 6 | setuptools.setup( 7 | name="opt_pmp_utils", 8 | version="0.0.1", 9 | author="Felix Frank", 10 | description="Some utilities for the constrained ProMPs experiments", 11 | long_description="", 12 | packages=["opt_pmp_utils"], 13 | install_requires=REQUIRED_PACKAGES, 14 | python_requires=">=3.6", 15 | ) 16 | --------------------------------------------------------------------------------