├── DL ├── __init__.py ├── dynamics_learner_interface │ ├── __init__.py │ └── dynamics_learner_interface.py ├── evaluation │ ├── __init__.py │ └── evaluation.py ├── methods │ ├── BNN.py │ ├── LWPR.py │ ├── SKI.py │ ├── SVGPR.py │ ├── __init__.py │ ├── eql_dynamics_learner.py │ ├── eureqa_dynamics_learner.py │ ├── linear_regression_ls.py │ ├── linear_regression_sgd.py │ ├── nn_dynamics_learner.py │ ├── pilco_dynamics_learner.py │ ├── system_id.lyx │ └── system_id.py ├── plotting │ ├── __init__.py │ └── plots.py └── utils │ ├── __init__.py │ ├── data_extractor.py │ ├── data_loading.py │ ├── data_splitting.py │ ├── plot_utils.py │ └── standardizer.py ├── LICENSE ├── Pipfile ├── Pipfile.lock ├── README.md ├── Settings ├── eql_prediction_horizon_1_history_length_1.json └── nn_prediction_horizon_1_history_length_1.json ├── img ├── 1.gif ├── 16.png ├── 2.gif ├── 3.gif ├── 4.gif ├── 5.png └── datasets_closed_loop.png ├── setup.py └── tests ├── __init__.py ├── fake_data_test_case.py ├── test_data_extractor.py ├── test_data_loading.py ├── test_data_splitting.py ├── test_recursive_prediction.py └── test_standardizer.py /DL/__init__.py: -------------------------------------------------------------------------------- 1 | from DL.dynamics_learner_interface.dynamics_learner_interface import DynamicsLearnerInterface 2 | -------------------------------------------------------------------------------- /DL/dynamics_learner_interface/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rr-learning/transferable_dynamics_dataset/3660a4bd7c6b011c8d02853bad3cce540708512c/DL/dynamics_learner_interface/__init__.py -------------------------------------------------------------------------------- /DL/dynamics_learner_interface/dynamics_learner_interface.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | from collections import defaultdict 4 | from DL.utils import unrollTrainingData, concatenateActionsStates, \ 5 | Standardizer, concatenateActionsStatesAverages, \ 6 | unrollTrainingDataStream, computeNumberOfTrainingPairs 7 | 8 | 9 | class DynamicsLearnerInterface(object): 10 | 11 | def __init__(self, history_length, prediction_horizon, 12 | difference_learning=True, averaging=False, streaming=False): 13 | self.history_length = history_length 14 | self.prediction_horizon = prediction_horizon 15 | self.observation_dimension = 9 16 | self.action_dimension = 3 17 | self.difference_learning = difference_learning 18 | self.averaging = averaging 19 | self.streaming = streaming 20 | 21 | # do not override this function! 22 | def learn(self, observation_sequences, action_sequences): 23 | self._check_learning_inputs(observation_sequences, action_sequences) 24 | 25 | self.load_normalization_stats(observation_sequences, action_sequences) 26 | 27 | if self.streaming: 28 | training_data_stream = unrollTrainingDataStream( 29 | observation_sequences, action_sequences, 30 | self.history_length, self.prediction_horizon, 31 | self.difference_learning, average=self.averaging, 32 | infinite=True) 33 | ntraining_pairs = computeNumberOfTrainingPairs( 34 | observation_sequences, self.history_length, 35 | self.prediction_horizon) 36 | normalized_data_stream = self._standardize_data_stream( 37 | training_data_stream) 38 | self._learn_from_stream(normalized_data_stream, ntraining_pairs) 39 | else: 40 | targets, inputs = unrollTrainingData(observation_sequences, 41 | action_sequences, 42 | self.history_length, 43 | self.prediction_horizon, 44 | self.difference_learning, 45 | self.averaging) 46 | 47 | # Whitening the inputs. 48 | std_targets = self.targets_standardizer.standardize(targets) 49 | std_inputs = self.inputs_standardizer.standardize(inputs) 50 | self._learn(std_inputs, std_targets) 51 | 52 | def _standardize_data_stream(self, data_stream): 53 | for training_target, training_input in data_stream: 54 | yield (self.targets_standardizer.standardize(training_target), 55 | self.inputs_standardizer.standardize(training_input)) 56 | 57 | def _training_inputs_data_stream(self, data_stream): 58 | for _, training_input in data_stream: 59 | yield training_input 60 | 61 | def _training_targets_data_stream(self, data_stream): 62 | for training_target, _ in data_stream: 63 | yield training_target 64 | 65 | # do not override this function! 66 | def _preprocess_and_predict(self, observation_history, action_history, 67 | action_future=None): 68 | if action_future is None: 69 | assert self.prediction_horizon == 1 70 | action_future = np.empty((observation_history.shape[0], 71 | 0, 72 | self.action_dimension)) 73 | 74 | self._check_prediction_inputs(observation_history, action_history, 75 | action_future) 76 | 77 | # Making a single input from all the input parameters. 78 | if self.averaging: 79 | dynamics_inputs = concatenateActionsStatesAverages(action_history, 80 | observation_history, 81 | action_future) 82 | else: 83 | dynamics_inputs = concatenateActionsStates(action_history, 84 | observation_history, 85 | action_future) 86 | 87 | # Whitening the input. 88 | whitened_input = self.inputs_standardizer.standardize(dynamics_inputs) 89 | 90 | whitened_predictions = self._predict(whitened_input) 91 | 92 | # Dewhitening the output 93 | dewhitened_predictions = self.targets_standardizer.unstandardize( 94 | whitened_predictions) 95 | 96 | if self.difference_learning: 97 | dewhitened_predictions += observation_history[:, -1, :] 98 | 99 | self._check_prediction_outputs(observation_history, 100 | dewhitened_predictions) 101 | return dewhitened_predictions 102 | 103 | # do not override this function! 104 | def predict(self, observation_history, action_history, action_future=None): 105 | if action_future is None: 106 | assert self.prediction_horizon == 1 107 | action_future = np.empty((observation_history.shape[0], 108 | 0, 109 | self.action_dimension)) 110 | 111 | assert self.prediction_horizon == action_future.shape[1] + 1 112 | 113 | return self._preprocess_and_predict(observation_history, 114 | action_history, action_future) 115 | 116 | # assert self.prediction_horizon == 1 117 | # 118 | # observation_history_t = observation_history 119 | # action_history_t = action_history 120 | # predicted_observation = self._preprocess_and_predict( 121 | # observation_history_t, action_history_t) 122 | # 123 | # for t in range(action_future.shape[1]): 124 | # predicted_observation = np.expand_dims(predicted_observation, 125 | # axis=1) 126 | # observation_history_t = np.append(observation_history_t[:, 1:], 127 | # predicted_observation, axis=1) 128 | # action_history_t = np.append(action_history_t[:, 1:], 129 | # action_future[:, t:t + 1], axis=1) 130 | # predicted_observation = self._preprocess_and_predict( 131 | # observation_history_t, action_history_t) 132 | # 133 | # assert (action_history_t[:, :-(t + 1)] == action_history[:, 134 | # t + 1:]).all() 135 | # assert (observation_history_t[:, :-(t + 1)] == observation_history[ 136 | # :, t + 1:]).all() 137 | # assert (action_history_t[:, -1] == action_future[:, t]).all() 138 | # 139 | # return predicted_observation 140 | 141 | def predict_recursively(self, 142 | observation_history, 143 | action_history, 144 | action_future): 145 | # parse arguments ------------------------------------------------------ 146 | assert (self.history_length == 1) 147 | n_time_steps = action_future.shape[1] + 1 148 | assert (n_time_steps % self.prediction_horizon == 0) 149 | n_prediction_steps = int(n_time_steps / self.prediction_horizon) 150 | 151 | observation = observation_history 152 | action_sequence = np.concatenate([action_history, 153 | action_future], axis=1) 154 | 155 | observations = np.empty([observation.shape[0], 156 | n_prediction_steps, 157 | observation.shape[2]]) 158 | observations[:] = np.nan 159 | 160 | for prediction_step in range(n_prediction_steps): 161 | t = prediction_step * self.prediction_horizon 162 | macro_action = action_sequence[:, t: t + self.prediction_horizon] 163 | 164 | observation = self.predict(observation, 165 | macro_action[:, 0:1], 166 | macro_action[:, 1:])[:, np.newaxis,:] 167 | 168 | observations[:, prediction_step] = observation 169 | 170 | return observations 171 | 172 | # Do not override this function. 173 | def _get_input_dim(self): 174 | if self.averaging: 175 | ret = self.observation_dimension + self.action_dimension 176 | if self.prediction_horizon > 1: 177 | ret += self.action_dimension 178 | return ret 179 | return self.history_length * (self.observation_dimension + 180 | self.action_dimension) + ( 181 | self.prediction_horizon 182 | - 1) * self.action_dimension 183 | 184 | # override this function 185 | def name(self): 186 | raise NotImplementedError 187 | 188 | # override this function 189 | def _learn(self, training_inputs, training_targets): 190 | """ 191 | Learns from the entire batch of training pairs. 192 | 193 | Parameters 194 | ---------- 195 | training_inputs: np-array of shape nTrainingInstances x input dim 196 | that represents the input to the dynamics 197 | (i.e. relevant observations and actions within 198 | the history length and prediction horizon) 199 | training_targets: np-array of shape nTrainingInstances x state dim 200 | denoting the targets of the dynamics model. 201 | """ 202 | raise NotImplementedError 203 | 204 | # override this function 205 | def _learn_from_stream(self, training_datastream, datastream_size): 206 | """ 207 | Learns from a data stream which iterates over the training set. This 208 | way there is no need to have the whole data set in memory. 209 | 210 | Parameters 211 | ---------- 212 | training_datastream: Python generator that yields (target, input) pairs 213 | of the training data set. 214 | 215 | datastream_size: Number of training pairs in training_datastream. 216 | """ 217 | raise NotImplementedError 218 | 219 | # override this function 220 | def _predict(self, single_input): 221 | """ 222 | Parameters 223 | ---------- 224 | single_input: one dimensional np-array with all the inputs to 225 | the dynamics model concatenated (size: input dim) 226 | (i.e. relevant observations and actions within 227 | the history length and prediction horizon) 228 | Outputs 229 | ---------- 230 | observation_prediction: np-array of shape n_samples x observation_dimension 231 | corresponding the prediction for the observation 232 | prediction_horizon steps after the last observation 233 | of observation_history 234 | 235 | """ 236 | raise NotImplementedError 237 | 238 | def _check_learning_inputs(self, observation_sequences, action_sequences): 239 | assert observation_sequences.shape[:2] == action_sequences.shape[:2] 240 | assert observation_sequences.shape[2] == self.observation_dimension 241 | assert action_sequences.shape[2] == self.action_dimension 242 | 243 | def _check_prediction_inputs(self, observation_history, action_history, 244 | action_future): 245 | n_samples = observation_history.shape[0] 246 | 247 | assert observation_history.shape == (n_samples, 248 | self.history_length, 249 | self.observation_dimension) 250 | 251 | assert action_history.shape == (n_samples, 252 | self.history_length, 253 | self.action_dimension) 254 | 255 | assert action_future.shape == (n_samples, 256 | self.prediction_horizon - 1, 257 | self.action_dimension) 258 | 259 | def _check_prediction_outputs(self, observation_history, 260 | observation_prediction): 261 | n_samples = observation_history.shape[0] 262 | 263 | assert observation_prediction.shape == (n_samples, 264 | self.observation_dimension) 265 | 266 | def load_normalization_stats(self, observation_sequences, action_sequences): 267 | """ 268 | Loads the normalization statistics from the input data. 269 | """ 270 | self._check_learning_inputs(observation_sequences, action_sequences) 271 | if not self.streaming: 272 | targets, inputs = unrollTrainingData(observation_sequences, 273 | action_sequences, 274 | self.history_length, 275 | self.prediction_horizon, 276 | self.difference_learning, 277 | self.averaging) 278 | else: 279 | targets = self._training_targets_data_stream( 280 | unrollTrainingDataStream( 281 | observation_sequences, action_sequences, 282 | self.history_length, self.prediction_horizon, 283 | self.difference_learning, average=self.averaging, 284 | infinite=False)) 285 | inputs = self._training_inputs_data_stream( 286 | unrollTrainingDataStream( 287 | observation_sequences, action_sequences, 288 | self.history_length, self.prediction_horizon, 289 | self.difference_learning, average=self.averaging, 290 | infinite=False)) 291 | 292 | # Loading the standardizers. 293 | self.targets_standardizer = Standardizer(targets) 294 | self.inputs_standardizer = Standardizer(inputs) 295 | 296 | # Override this function. 297 | def load(self, model_filename): 298 | raise NotImplementedError 299 | 300 | # Override this function. 301 | def save(self, model_filename): 302 | raise NotImplementedError 303 | 304 | 305 | class DynamicsLearnerExample(DynamicsLearnerInterface): 306 | 307 | def __init__(self, *args, **kwargs): 308 | super().__init__(*args, **kwargs) 309 | 310 | def name(self): 311 | return 'dynamics_learner_example' 312 | 313 | def _learn(self, training_inputs, training_targets): 314 | pass 315 | 316 | def _learn_from_stream(self, training_data_stream, datastream_size): 317 | pass 318 | 319 | def _predict(self, single_input): 320 | return np.zeros((single_input.shape[0], self.observation_dimension)) 321 | 322 | 323 | if __name__ == '__main__': 324 | data = np.load('./Dataset/dataset_v01.npz') 325 | observation_sequences = np.concatenate((data['measured_angles'], 326 | data['measured_velocities'], 327 | data['measured_torques']), 2) 328 | action_sequences = data['constrained_torques'] 329 | history_length = 10 330 | prediction_horizon = 100 331 | dynamics_learner = DynamicsLearnerExample(history_length, 332 | prediction_horizon, 333 | streaming=True) 334 | dynamics_learner.learn(observation_sequences, action_sequences) 335 | 336 | hist_obs = observation_sequences[:, :history_length].copy() 337 | hist_act = action_sequences[:, :history_length].copy() 338 | fut_act = action_sequences[:, history_length:history_length + 339 | prediction_horizon - 1].copy() 340 | observation_prediction = dynamics_learner.predict(hist_obs, hist_act, 341 | fut_act) 342 | rms = np.linalg.norm( 343 | observation_sequences[:, history_length + prediction_horizon - 1] - 344 | observation_prediction) 345 | 346 | # Asserting that the inputs to the predict method were left unchanged. 347 | assert np.array_equal(hist_obs, 348 | observation_sequences[:, :history_length]) 349 | assert np.array_equal(hist_act, 350 | action_sequences[:, :history_length]) 351 | assert np.array_equal(fut_act, action_sequences[:, 352 | history_length:history_length + prediction_horizon - 1]) 353 | print('rms: ', rms) 354 | -------------------------------------------------------------------------------- /DL/evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rr-learning/transferable_dynamics_dataset/3660a4bd7c6b011c8d02853bad3cce540708512c/DL/evaluation/__init__.py -------------------------------------------------------------------------------- /DL/evaluation/evaluation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Evaluation functionality. Note that it can also be called as a script. 3 | """ 4 | import os 5 | import json 6 | import argparse 7 | import numpy as np 8 | import time 9 | 10 | import sys 11 | 12 | from DL.dynamics_learner_interface.dynamics_learner_interface import DynamicsLearnerExample 13 | from DL.utils import Standardizer 14 | from DL.utils.data_loading import loadRobotData 15 | 16 | 17 | # hack because if have to use python 2.7 and it does not seem to have perf_counter 18 | if sys.version_info[0] < 3: 19 | def return_0(): 20 | return 0 21 | time.perf_counter = return_0 22 | 23 | 24 | def evaluate(dynamics_learner, observation_sequences, action_sequences, 25 | test_dataset_name, verbose=False): 26 | possible_history_lengths = [1, 10] 27 | possible_prediction_horizons = [1, 10, 100, 1000] 28 | assert dynamics_learner.history_length in possible_history_lengths 29 | assert dynamics_learner.prediction_horizon in possible_prediction_horizons 30 | 31 | history_length = dynamics_learner.history_length 32 | 33 | # Only evaluating in the prediction horizon that a model was trained on. 34 | prediction_horizons = [dynamics_learner.prediction_horizon] 35 | 36 | output_errors = {} 37 | for prediction_horizon in prediction_horizons: 38 | T = range(possible_history_lengths[-1] - 1, 39 | observation_sequences.shape[1] - possible_prediction_horizons[-1]) 40 | errors = np.empty((observation_sequences.shape[0], 41 | len(T), 42 | observation_sequences.shape[2])) 43 | times = [] 44 | init_pred_time = time.perf_counter() 45 | for i in range(len(T)): 46 | t = T[i] 47 | observation_history = observation_sequences[:, t + 1 - history_length: t + 1] 48 | action_history = action_sequences[:, t + 1 - history_length: t + 1] 49 | action_future = action_sequences[:, t + 1: t + prediction_horizon] 50 | start_time = time.perf_counter() 51 | observation_prediction = dynamics_learner.predict( 52 | observation_history=observation_history, 53 | action_history=action_history, 54 | action_future=action_future) 55 | times.append(time.perf_counter() - start_time) 56 | true_observation = observation_sequences[:, t + prediction_horizon] 57 | errors[:, i] = true_observation - observation_prediction 58 | end_pred_time = time.perf_counter() 59 | if verbose: 60 | times = np.array(times) 61 | print('Number of predict calls {}'.format(len(times))) 62 | print('Elapsed time for predict call {}: {} +- {}'.format( 63 | test_dataset_name, np.mean(times), np.std(times))) 64 | print('Total prediction time {}'.format( 65 | end_pred_time - init_pred_time)) 66 | 67 | errors_key = test_dataset_name + '__history_' + str(history_length) + \ 68 | '__training_horizon_' + \ 69 | str(dynamics_learner.prediction_horizon) + \ 70 | '__evaluation_horizon_' + str(prediction_horizon) 71 | output_errors[errors_key] = errors 72 | 73 | # Right now we only test on the same setup used for training. 74 | # Therefore, there must be only one entry in the dictionary. 75 | errors_to_return = list(output_errors.values()) 76 | assert len(errors_to_return) == 1 77 | return errors_to_return[0] 78 | 79 | def get_evaluation_errors(all_errors): 80 | evaluation_errors = {} 81 | evaluation_errors['angle'] = all_errors[:,:, :3] 82 | evaluation_errors['velocity'] = all_errors[:,:, 3:6] 83 | evaluation_errors['torque'] = all_errors[:,:, 6:9] 84 | 85 | for key in evaluation_errors.keys(): 86 | norms = np.linalg.norm(evaluation_errors[key], axis=-1, ord=1) 87 | norms = np.sum(norms, axis=1) 88 | 89 | # norms = norms / evaluation_errors[key].size * norms.size 90 | evaluation_errors[key] = norms.flatten() 91 | 92 | return evaluation_errors 93 | 94 | 95 | 96 | def get_angle_errors(errors): 97 | """ 98 | Takes error vectors computed over full state predictions and picks the 99 | dimensions corresponding to angle predictions. Notice that it is assumed 100 | that the first three dimenions contain angle errors. 101 | """ 102 | return errors[:,:, :3] 103 | 104 | def compute_RMSE_from_errors(errors): 105 | """ 106 | Computes the RMSE from the error vectors. Notice that it weights equally 107 | all dimensions. 108 | """ 109 | nseq, length, state_dim = errors.shape 110 | errors = errors.reshape((-1, state_dim)) 111 | squared_errors = np.sum(errors * errors, axis=1) 112 | return np.sqrt(np.mean(squared_errors)) 113 | 114 | 115 | def run(parser): 116 | parser.add_argument("--training_data", required=True, 117 | help=" filename of the input robot training data") 118 | parser.add_argument("--trained_model", 119 | help="filename of a trained model. If specified the model won't be" 120 | " trained") 121 | parser.add_argument("--settings", 122 | help="filename where the model settings are stored") 123 | parser.add_argument("--validation_data", 124 | help="filename of the input robot validation data") 125 | parser.add_argument("--iid_test_data", 126 | help="filename of the input robot iid testing data") 127 | parser.add_argument("--transfer_test_data", nargs='+', 128 | help="filename of the input robot transfer testing data") 129 | parser.add_argument("--method", required=True, 130 | help=" Name of the method that will be tested") 131 | parser.add_argument("--history_length", type=int, default=1) 132 | parser.add_argument("--prediction_horizon", type=int, default=1) 133 | parser.add_argument("--output_errors", 134 | help=" filename where the computed errors will be saved") 135 | parser.add_argument("--output_model", 136 | help="filename where the trained model will be saved if a trained" 137 | " model was not already provided in the command line.") 138 | parser.add_argument("--averaging", dest='averaging', action='store_true') 139 | parser.add_argument("--no-averaging", dest='averaging', 140 | action='store_false') 141 | parser.add_argument("--streaming", dest='streaming', action='store_true') 142 | parser.add_argument("--no-streaming", dest='streaming', 143 | action='store_false') 144 | parser.add_argument("--verbose", action='store_true') 145 | parser.set_defaults(averaging=False) 146 | parser.set_defaults(streaming=False) 147 | arguments = parser.parse_args() 148 | history_length = arguments.history_length 149 | prediction_horizon = arguments.prediction_horizon 150 | settings = None 151 | if arguments.settings: 152 | with open(arguments.settings, 'r') as f: 153 | settings = json.load(f) 154 | dynamics_learner = None 155 | if arguments.method == 'example': 156 | dynamics_learner = DynamicsLearnerExample(history_length, 157 | prediction_horizon, 158 | averaging=arguments.averaging, 159 | streaming=arguments.streaming) 160 | elif arguments.method == 'pilco_ninducing_500_ntraining_50000': 161 | from DL.methods.pilco_dynamics_learner import PilcoDynamicsLearner 162 | 163 | ninducing = 500 164 | ntraining = 50000 165 | dynamics_learner = PilcoDynamicsLearner(history_length, 166 | prediction_horizon, ninducing, 167 | ntraining, 168 | averaging=arguments.averaging, 169 | streaming=arguments.streaming) 170 | elif arguments.method == 'SVGPR': 171 | from DL.methods.SVGPR import SVGPR 172 | 173 | ninducing = 1000 174 | minibatch_size = 1000 175 | epochs = 40 176 | dynamics_learner = SVGPR(history_length, 177 | prediction_horizon, ninducing, minibatch_size, 178 | epochs=epochs, averaging=arguments.averaging, 179 | streaming=arguments.streaming) 180 | elif arguments.method == 'linear_model_ls': 181 | from DL.methods.linear_regression_ls import LinearModel 182 | 183 | dynamics_learner = LinearModel(history_length, prediction_horizon, 184 | averaging=arguments.averaging, 185 | streaming=arguments.streaming) 186 | elif arguments.method == 'linear_model_sgd': 187 | from DL.methods.linear_regression_sgd import LinearModelSGD 188 | 189 | dynamics_learner = LinearModelSGD(history_length, prediction_horizon, 190 | difference_learning=True, 191 | averaging=arguments.averaging, 192 | streaming=arguments.streaming, 193 | settings=settings) 194 | elif arguments.method == 'BNN': 195 | from DL.methods.BNN import BNNLearner 196 | 197 | dynamics_learner = BNNLearner(history_length, prediction_horizon, 198 | averaging=arguments.averaging, 199 | streaming=arguments.streaming) 200 | elif arguments.method == 'NN': 201 | from DL.methods.nn_dynamics_learner import NNDynamicsLearner 202 | settings_file = "./Settings/nn_prediction_horizon_{0}_history_length_{1}.json".format( 203 | prediction_horizon, history_length) 204 | exists = os.path.isfile(settings_file) 205 | if exists: 206 | with open(settings_file, 'r') as f: 207 | params = json.load(f) 208 | dynamics_learner = NNDynamicsLearner(history_length=history_length, 209 | prediction_horizon=prediction_horizon, 210 | model_arch_params=params[ 211 | "model_arch_params"], 212 | model_train_params=params[ 213 | "model_train_params"], 214 | mode=params['mode'], 215 | averaging=arguments.averaging, 216 | streaming=arguments.streaming) 217 | elif arguments.method == 'EQL': 218 | from DL.methods.eql_dynamics_learner import EQL 219 | settings_file = "./Settings/eql_prediction_horizon_{0}_history_length_{1}.json".format( 220 | prediction_horizon, history_length) 221 | exists = os.path.isfile(settings_file) 222 | if exists: 223 | with open(settings_file, 'r') as f: 224 | params = json.load(f) 225 | dynamics_learner = EQL(history_length=history_length, 226 | prediction_horizon=prediction_horizon, 227 | model_arch_params=params[ 228 | "model_arch_params"], 229 | model_train_params=params[ 230 | "model_train_params"], 231 | optional_params=params["optional_params"], 232 | averaging=arguments.averaging, 233 | streaming=arguments.streaming) 234 | elif arguments.method == 'Eureqa': 235 | from DL.methods.eureqa_dynamics_learner import Eureqa 236 | dynamics_learner = Eureqa(history_length=history_length, 237 | prediction_horizon=prediction_horizon, 238 | averaging=arguments.averaging, 239 | streaming=arguments.streaming) 240 | elif arguments.method == 'lwpr': 241 | from DL.methods.LWPR import lwpr_dyn_model 242 | dynamics_learner = lwpr_dyn_model(history_length, prediction_horizon, 243 | difference_learning=True, 244 | averaging=arguments.averaging, 245 | streaming=arguments.streaming, 246 | settings=settings) 247 | elif arguments.method == 'system_id': 248 | from DL.methods.system_id import SystemId 249 | dynamics_learner = SystemId(history_length=arguments.history_length, 250 | prediction_horizon=arguments.prediction_horizon, 251 | settings=settings) 252 | assert dynamics_learner, "Make sure the method is implemented." 253 | training_observations, training_actions = loadRobotData( 254 | arguments.training_data) 255 | if arguments.trained_model: 256 | dynamics_learner.load_normalization_stats(training_observations, 257 | training_actions) 258 | dynamics_learner.load(arguments.trained_model) 259 | else: 260 | initial_time = time.perf_counter() 261 | dynamics_learner.learn(training_observations, training_actions) 262 | if arguments.verbose: 263 | print('Training time {} s'.format( 264 | time.perf_counter() - initial_time)) 265 | if arguments.output_model: 266 | dynamics_learner.save(arguments.output_model) 267 | 268 | datasets = {} 269 | if arguments.transfer_test_data: 270 | for i, dataset_path in enumerate(arguments.transfer_test_data): 271 | datasets['transfer_test_data_{}'.format(i + 1)] = dataset_path 272 | for dataset in ['iid_test_data']: 273 | dataset_path = getattr(arguments, dataset) 274 | if dataset_path: 275 | datasets[dataset] = dataset_path 276 | 277 | # Maps each data set to its corresponding error file. 278 | set_to_errors = {} 279 | for dataset in sorted(datasets.keys()): 280 | dataset_path = datasets[dataset] 281 | testing_observations, testing_actions = loadRobotData(dataset_path) 282 | print("evaluating on ", dataset_path, testing_observations.shape) 283 | 284 | errors = evaluate(dynamics_learner, testing_observations, 285 | testing_actions, dataset, verbose=arguments.verbose) 286 | set_to_errors[dataset] = errors 287 | print("{} error:".format(dataset)) 288 | angle_errors = get_angle_errors(errors) 289 | print(compute_RMSE_from_errors(angle_errors)) 290 | if arguments.output_errors: 291 | np.savez(arguments.output_errors, **set_to_errors) 292 | 293 | def main(): 294 | run(parser=argparse.ArgumentParser(description=__doc__)) 295 | 296 | # import ipdb 297 | # import traceback 298 | # try: 299 | # run(parser=argparse.ArgumentParser(description=__doc__)) 300 | # except: 301 | # traceback.print_exc(sys.stdout) 302 | # _, _, tb = sys.exc_info() 303 | # ipdb.post_mortem(tb) 304 | 305 | 306 | if __name__ == "__main__": 307 | main() 308 | -------------------------------------------------------------------------------- /DL/methods/BNN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | 6 | import numpy as np 7 | import math 8 | import os 9 | 10 | from itertools import islice 11 | from itertools import chain 12 | 13 | from DL import DynamicsLearnerInterface 14 | 15 | MODE = 'MAP' 16 | 17 | class BNNLearner(DynamicsLearnerInterface): 18 | def __init__(self, history_length, prediction_horizon, 19 | difference_learning = True, learning_rate=0.1, 20 | optim_epochs=1, # TODO: redo 400 21 | hidden_units=[100, 100], 22 | prior_mean=0, prior_std=1, 23 | batch_size=512, 24 | predDim=3, # will only predict the first predDim dimensions 25 | averaging=None, 26 | streaming=None): 27 | super().__init__(history_length, prediction_horizon, 28 | difference_learning, averaging=averaging, streaming=streaming) 29 | print("streaming = {}".format(streaming)) 30 | print("averaging = {}".format(averaging)) 31 | # self.history_length = history_length 32 | # self.prediction_horizon = prediction_horizon 33 | # self.observation_dimension = 9 34 | # self.action_dimension = 3 35 | # self.difference_learning = difference_learning 36 | # BNN tuning parameters 37 | self.learning_rate = np.loadtxt("learning_rate.csv") 38 | self.optim_epochs = int(np.loadtxt("optim_epochs.csv")) 39 | layer_width = int(np.loadtxt("width.csv")) 40 | layer_depth = int(np.loadtxt("depth.csv")) 41 | self.hidden_units = [layer_width]*layer_depth 42 | self.prior_mean = prior_mean 43 | self.prior_std = prior_std 44 | self.batch_size = int(np.loadtxt("batch_size.csv")) 45 | # create models 46 | self.predDim = predDim 47 | if averaging: 48 | self.input_dim = self.observation_dimension + self.action_dimension 49 | if prediction_horizon > 1: 50 | self.input_dim += self.action_dimension 51 | else: 52 | self.input_dim = self.history_length*(self.observation_dimension + self.action_dimension) \ 53 | + (self.prediction_horizon-1)*(self.action_dimension) 54 | self.output_dim = self.predDim 55 | self.models_ = [] 56 | self.optims_ = [] 57 | for i in range(self.predDim): 58 | # create model and append to model list 59 | layers = [] 60 | input_layer = BNNLayer(self.input_dim, 61 | self.hidden_units[0], 62 | activation='relu', 63 | prior_mean=self.prior_mean, 64 | prior_rho=self.prior_std) 65 | layers.append(input_layer) 66 | for i in np.arange(len(self.hidden_units)-1): 67 | layers.append(BNNLayer(self.hidden_units[i], 68 | self.hidden_units[i+1], 69 | activation='relu', 70 | prior_mean=self.prior_mean, 71 | prior_rho=self.prior_std)) 72 | print("more layers") 73 | output_layer = BNNLayer(self.hidden_units[-1], 74 | 1, 75 | activation='none', 76 | prior_mean=self.prior_mean, 77 | prior_rho=self.prior_std) 78 | layers.append(output_layer) 79 | self.models_.append(BNN(layers)) 80 | 81 | optim = torch.optim.Adam(self.models_[-1].parameters(), 82 | lr=self.learning_rate) 83 | self.optims_.append(optim) 84 | 85 | def name(self): 86 | return "BNN" 87 | 88 | def _learn(self, training_inputs, training_targets): 89 | print("non-streaming training") 90 | print(training_inputs.shape) 91 | print(training_targets.shape) 92 | Var = lambda x, dtype=torch.FloatTensor: Variable(torch.from_numpy(x).type(dtype)) 93 | # print(training_inputs.size()) 94 | # print(training_targets[:, 1].reshape((-1, 1)).shape) 95 | if self.batch_size is None: 96 | training_inputs = Var(training_inputs) 97 | training_targets = Var(training_targets) 98 | for i in range(self.predDim): 99 | for i_ep in range(self.optim_epochs): 100 | kl, lg_lklh = self.models_[i].Forward( 101 | training_inputs, training_targets[:, i].reshape((-1, 1)), 1, 'Gaussian') 102 | loss = BNN.loss_fn(kl, lg_lklh, 1) 103 | self.optims_[i].zero_grad() 104 | loss.backward() 105 | self.optims_[i].step() 106 | # print("{}.{} / {}.{}".format(i, i_ep, self.observation_dimension, self.optim_epochs)) 107 | else: 108 | dataSize = training_targets[:, 1].size 109 | stepsPerEpoch = dataSize / self.batch_size 110 | nSteps = int(np.ceil(self.optim_epochs * stepsPerEpoch)) 111 | for i_ep in range(nSteps): 112 | # subsample 113 | currInputs, currTargets = self._subsample_training_set( 114 | training_inputs, training_targets) 115 | currInputs = Var(currInputs) 116 | currTargets = Var(currTargets) 117 | for dim in range(self.predDim): 118 | kl, lg_lklh = self.models_[dim].Forward( 119 | currInputs, currTargets[:, dim].reshape((-1, 1)), 1, 'Gaussian') 120 | loss = BNN.loss_fn(kl, lg_lklh, 1) 121 | self.optims_[dim].zero_grad() 122 | loss.backward() 123 | self.optims_[dim].step() 124 | # print("{}.{} / {}.{}".format(i_ep, dim, nSteps, self.observation_dimension)) 125 | def _getNewData(self, generator): 126 | """ 127 | returns chunks of training data and corresponding targets 128 | """ 129 | currData = np.asarray(list(islice(generator, self.batch_size))) 130 | newDataTargets = np.fromiter(chain.from_iterable(currData[:, 0]), 131 | dtype=np.float).reshape([512, self.observation_dimension]) 132 | newDataTrain = np.fromiter(chain.from_iterable(currData[:, 1]), 133 | dtype=np.float).reshape([512, self.input_dim]) 134 | return newDataTrain, newDataTargets # 135 | 136 | def _learn_from_stream(self, generator, datastream_size): 137 | """ 138 | Parameters 139 | ---------- 140 | training_inputs: np-array of shape nTrainingInstances x input dim 141 | that represents the input to the dynamics 142 | (i.e. relevant observations and actions within 143 | the history length and prediction horizon) 144 | training_targets: np-array of shape nTrainingInstances x state dim 145 | denoting the targets of the dynamics model. 146 | """ 147 | print("streaming training") 148 | print("data stream size: {}".format(datastream_size)) 149 | print("object size 1: {}".format(next(generator)[0].shape)) 150 | print("object size 2: {}".format(next(generator)[1].shape)) 151 | Var = lambda x, dtype=torch.FloatTensor: Variable(torch.from_numpy(x).type(dtype)) 152 | 153 | dataSize = datastream_size 154 | stepsPerEpoch = dataSize / self.batch_size 155 | nSteps = int(np.ceil(self.optim_epochs * stepsPerEpoch)) 156 | print("steps: {}".format(nSteps)) 157 | for i_ep in range(nSteps): 158 | # subsample 159 | currInputs, currTargets = self._getNewData(generator) 160 | currInputs = Var(currInputs) 161 | currTargets = Var(currTargets) 162 | for dim in range(self.predDim): 163 | kl, lg_lklh = self.models_[dim].Forward( 164 | currInputs, currTargets[:, dim].reshape((-1, 1)), 1, 'Gaussian') 165 | loss = BNN.loss_fn(kl, lg_lklh, 1) 166 | self.optims_[dim].zero_grad() 167 | loss.backward() 168 | self.optims_[dim].step() 169 | 170 | # for count in range(self.epochs_ * datastream_size): 171 | # training_target, training_input = next(generator) 172 | # assert training_input.shape[0] == self._get_input_dim() 173 | # model_input = training_input.reshape(1, -1) 174 | # 175 | # for output_idx in range(self.observation_dimension): 176 | # model_target = training_target[output_idx:output_idx + 1] 177 | # self.models_[output_idx].partial_fit(model_input, model_target) 178 | # 179 | # 180 | # 181 | # 182 | # input_output_shapes = ([self.input_dim], [self.output_dim]) 183 | # input_output_dtypes = (tf.float64, tf.float64) 184 | # def switch_input_target(): 185 | # def gen(): 186 | # for target, input in generator: 187 | # yield input, target 188 | # return gen 189 | # ds = tf.data.Dataset.from_generator(switch_input_target(), 190 | # input_output_dtypes, input_output_shapes) 191 | # ds = ds.repeat() 192 | # ds = ds.batch(self.batch_size) 193 | # self.model.fit(ds.make_one_shot_iterator(), 194 | # steps_per_epoch=datastream_size//self.batch_size, 195 | # epochs=self.epochs, 196 | # callbacks=[self.tensorboard] 197 | 198 | 199 | def _subsample_training_set(self, training_inputs, training_targets): 200 | assert self.batch_size 201 | total_size = training_inputs.shape[0] 202 | permutation = np.random.permutation( 203 | total_size)[:self.batch_size] 204 | return training_inputs[permutation], training_targets[permutation] 205 | 206 | def _predict(self, inputs): 207 | # print("Start prediction") 208 | prediction = np.zeros((inputs.shape[0], self.observation_dimension)) 209 | Var = lambda x, dtype=torch.FloatTensor: Variable(torch.from_numpy(x).type(dtype)) 210 | X_ = Var(inputs) 211 | for i, model in enumerate(self.models_): 212 | if MODE == 'MC': 213 | pred_lst = [model.forward(X_, mode='MC').data.numpy() for _ in range(40)] 214 | pred = np.array(pred_lst).T 215 | prediction[:, i] = pred.mean(axis=2) 216 | else: 217 | prediction[:, i] = np.squeeze(model.forward(X_, mode='MAP').data.numpy()) 218 | return prediction 219 | 220 | def load(self, filename): 221 | """ 222 | Parameters 223 | ---------- 224 | filename: string used as filename to load a model. 225 | """ 226 | raise NotImplementedError 227 | 228 | def save(self, filename): 229 | """ 230 | Parameters 231 | ---------- 232 | filename: string used as filename to save a model. 233 | """ 234 | if not os.path.exists(filename): 235 | os.makedirs(filename) 236 | for i, model in enumerate(self.models_): 237 | torch.save(model.state_dict(), os.path.join(filename, "state{}.pt".format(i))) 238 | 239 | class BNN(nn.Module): 240 | def __init__(self, layers): 241 | super(BNN, self).__init__() 242 | 243 | self.layers, self.params = [], nn.ParameterList() 244 | for layer in layers: 245 | self.layers.append(layer) 246 | self.params.extend([*layer.parameters()]) # register module parameters 247 | 248 | def forward(self, x, mode): 249 | if mode == 'forward': 250 | net_kl = 0 251 | for layer in self.layers: 252 | x, layer_kl = layer.forward(x, mode) 253 | net_kl += layer_kl 254 | return x, net_kl 255 | else: 256 | for layer in self.layers: 257 | x = layer.forward(x, mode) 258 | return x 259 | 260 | def Forward(self, x, y, n_samples, type): 261 | 262 | assert type in {'Gaussian', 'Softmax'}, 'Likelihood type not found' 263 | 264 | # Sample N samples and average 265 | total_kl, total_likelh = 0., 0. 266 | for _ in range(n_samples): 267 | out, kl = self.forward(x, mode='forward') 268 | 269 | # Gaussian output (with unit var) 270 | # lklh = torch.log(torch.exp(-(y - out) ** 2 / 2e-2) / math.sqrt(2e-2 * math.pi)).sum() 271 | 272 | if type == 'Gaussian': 273 | lklh = (-.5 * (y - out) ** 2).sum() 274 | else: # softmax 275 | lklh = torch.log(out.gather(1, y)).sum() 276 | 277 | total_kl += kl 278 | total_likelh += lklh 279 | 280 | return total_kl / n_samples, total_likelh / n_samples 281 | 282 | @staticmethod 283 | def loss_fn(kl, lklh, n_batch): 284 | return (kl / n_batch - lklh).mean() 285 | 286 | class BNNLayer(nn.Module): 287 | NegHalfLog2PI = -.5 * math.log(2.0 * math.pi) 288 | softplus = lambda x: math.log(1 + math.exp(x)) 289 | 290 | def __init__(self, n_input, n_output, activation, prior_mean, prior_rho): 291 | assert activation in {'relu', 'softmax', 'none'}, 'Activation Type Not Found' 292 | 293 | super(BNNLayer, self).__init__() 294 | 295 | # Instantiate a large Gaussian block to sample from, much faster than generating random sample every time 296 | self._gaussian_block = np.random.randn(10000) 297 | 298 | self.n_input = n_input 299 | self.n_output = n_output 300 | 301 | self.W_mean = nn.Parameter(torch.ones((n_input, n_output)) * prior_mean) 302 | self.W_rho = nn.Parameter(torch.ones(n_input, n_output) * prior_rho) 303 | 304 | self.b_mean = nn.Parameter(torch.ones(1, n_output) * prior_mean) 305 | self.b_rho = nn.Parameter(torch.ones(1, n_output) * prior_rho) 306 | 307 | self.prior_var = Variable(torch.ones(1, 1) * BNNLayer.softplus(prior_rho) ** 2) 308 | 309 | # Set activation function 310 | self.act = None 311 | if activation == 'relu': 312 | self.act = F.relu 313 | elif activation == 'softmax': 314 | self.act = F.softmax 315 | 316 | self._Var = lambda x: Variable(torch.from_numpy(x).type(torch.FloatTensor)) 317 | 318 | def forward(self, X, mode): 319 | assert mode in {'forward', 'MAP', 'MC'}, 'BNNLayer Mode Not Found' 320 | 321 | _shape = (X.size()[0], self.n_output) 322 | 323 | # Z: pre-activation. Local reparam. trick is used. 324 | Z_Mean = torch.mm(X, self.W_mean) + self.b_mean.expand(*_shape) 325 | 326 | if mode == 'MAP': return self.act(Z_Mean) if self.act is not None else Z_Mean 327 | 328 | Z_Std = torch.sqrt( 329 | torch.mm(torch.pow(X, 2), 330 | torch.pow(F.softplus(self.W_rho), 2)) + 331 | torch.pow(F.softplus(self.b_rho.expand(*_shape)), 2) 332 | ) 333 | 334 | Z_noise = self._random(_shape) 335 | Z = Z_Mean + Z_Std * Z_noise 336 | 337 | if mode == 'MC': return self.act(Z) if self.act is not None else Z 338 | 339 | # Stddev for the prior 340 | Prior_Z_Std = torch.sqrt( 341 | torch.mm(torch.pow(X, 2), 342 | self.prior_var.expand(self.n_input, self.n_output)) + 343 | self.prior_var.expand(*_shape) 344 | ).detach() 345 | 346 | # KL[posterior(w|D)||prior(w)] 347 | layer_KL = self.sample_KL(Z, 348 | Z_Mean, Z_Std, 349 | Z_Mean.detach(), Prior_Z_Std) 350 | 351 | out = self.act(Z) if self.act is not None else Z 352 | return out, layer_KL 353 | 354 | def _random(self, shape): 355 | Z_noise = np.random.choice(self._gaussian_block, size=shape[0] * shape[1]) 356 | Z_noise = np.expand_dims(Z_noise, axis=1).reshape(*shape) 357 | return self._Var(Z_noise) 358 | 359 | @staticmethod 360 | def log_gaussian(x, mean, std): 361 | return BNNLayer.NegHalfLog2PI - torch.log(std) - .5 * torch.pow(x - mean, 2) / torch.pow(std, 2) 362 | 363 | @staticmethod 364 | def sample_KL(x, mean1, std1, mean2, std2): 365 | log_prob1 = BNNLayer.log_gaussian(x, mean1, std1) 366 | log_prob2 = BNNLayer.log_gaussian(x, mean2, std2) 367 | return (log_prob1 - log_prob2).sum() 368 | -------------------------------------------------------------------------------- /DL/methods/LWPR.py: -------------------------------------------------------------------------------- 1 | """ 2 | Learning a LWPR model. 3 | """ 4 | 5 | import argparse 6 | import numpy as np 7 | import time 8 | from collections import deque 9 | from DL import DynamicsLearnerInterface 10 | from DL.utils import loadRobotData 11 | from lwpr import LWPR 12 | 13 | class lwpr_dyn_model(DynamicsLearnerInterface): 14 | 15 | def __init__(self, history_length, prediction_horizon, difference_learning, 16 | averaging, streaming, settings=None): 17 | super().__init__(history_length, prediction_horizon, 18 | difference_learning, averaging=averaging, streaming=streaming) 19 | self.model_ = LWPR(self._get_input_dim(), self.observation_dimension) 20 | 21 | # Default values. 22 | init_D = 25 23 | init_alpha = 175 24 | self.time_threshold = np.inf 25 | if settings: 26 | init_D = settings['init_D'] 27 | init_alpha = settings['init_alpha'] 28 | self.time_threshold = settings.get('time_threshold', np.inf) 29 | self.model_.init_D = init_D * np.eye(self._get_input_dim()) 30 | self.model_.init_alpha = init_alpha * np.eye(self._get_input_dim()) 31 | 32 | def _learn(self, training_inputs, training_targets): 33 | 34 | def gen(inputs, targets): 35 | for i in range(inputs.shape[0]): 36 | yield targets[i], inputs[i] 37 | 38 | self._learn_from_stream(gen(training_inputs, training_targets), 39 | training_inputs.shape[0]) 40 | 41 | def _learn_from_stream(self, training_generator, generator_size): 42 | deck = deque(maxlen=100) 43 | for count in range(generator_size): 44 | training_target, training_input = next(training_generator) 45 | assert training_input.shape[0] == self._get_input_dim() 46 | assert training_target.shape[0] == self.observation_dimension 47 | time_before_update = time.perf_counter() 48 | self.model_.update(training_input, training_target) 49 | elapsed_time = time.perf_counter() - time_before_update 50 | deck.append(elapsed_time) 51 | if count and count % 1000 == 0: 52 | median_time = sorted(deck)[deck.maxlen // 2] 53 | print('Update time for iter {} is {}'.format(count, 54 | median_time)) 55 | if median_time > self.time_threshold: 56 | break 57 | 58 | def _predict(self, inputs): 59 | assert self.model_, "a trained model must be available" 60 | prediction = np.zeros((inputs.shape[0], self.observation_dimension)) 61 | for idx in range(inputs.shape[0]): 62 | prediction[idx, :] = self.model_.predict(inputs[idx]) 63 | return prediction 64 | 65 | def name(self): 66 | return "LWPR" 67 | 68 | if __name__ == "__main__": 69 | parser = argparse.ArgumentParser(description=__doc__) 70 | parser.add_argument("--data_filename", required=True, 71 | help=" filename of the input robot data") 72 | parser.add_argument("--history_length", type=int, default=1) 73 | parser.add_argument("--prediction_horizon", type=int, default=1) 74 | parser.add_argument("--averaging", dest='averaging', action='store_true') 75 | parser.add_argument("--no-averaging", dest='averaging', 76 | action='store_false') 77 | parser.set_defaults(averaging=False) 78 | args = parser.parse_args() 79 | observations, actions = loadRobotData(args.data_filename) 80 | 81 | settings = {"init_alpha": 175, "init_D": 25, "time_threshold" : 0.01} 82 | 83 | # Learning in stremaing mode. 84 | dynamics_model = lwpr_dyn_model(args.history_length, 85 | args.prediction_horizon, True, args.averaging, streaming=True, 86 | settings=settings) 87 | init_train_time = time.perf_counter() 88 | dynamics_model.learn(observations, actions) 89 | end_train_time = time.perf_counter() 90 | print('Training time {}'.format(end_train_time - init_train_time)) 91 | 92 | init_pred_time = time.perf_counter() 93 | dynamics_model.predict(observations[:, :args.history_length], 94 | actions[:, :args.history_length], 95 | actions[:, args.history_length: args.history_length + \ 96 | args.prediction_horizon - 1]) 97 | end_pred_time = time.perf_counter() 98 | print('Prediction time {}'.format(end_pred_time - init_pred_time)) 99 | -------------------------------------------------------------------------------- /DL/methods/SKI.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import math 3 | import torch 4 | import gpytorch 5 | from matplotlib import pyplot as plt 6 | 7 | DTYPE=torch.float 8 | 9 | import argparse 10 | import numpy as np 11 | from DL.dynamics_learner_interface.dynamics_learner_interface import DynamicsLearnerInterface 12 | from DL.utils.data_loading import unrollForDifferenceTraining 13 | 14 | from DL.utils.standardizer import Standardizer 15 | 16 | class SKIDynamicsLearner(DynamicsLearnerInterface): 17 | 18 | def __init__(self, state_dims, action_dims, learningRate=0.1, 19 | trainingIterations=100): 20 | self.learningRate=learningRate 21 | self.state_dims = state_dims 22 | self.action_dims = action_dims 23 | self.models = None 24 | self.trainingIterations = trainingIterations 25 | 26 | def setAdam(self, currModel, learningRate=0.1): 27 | self.optimizer = torch.optim.Adam( 28 | [{'params': currModel.parameters()}], 29 | lr=learningRate) 30 | 31 | def learn(self, observation_sequences, action_sequences): 32 | """ 33 | Parameters 34 | ---------- 35 | observations_sequences: np-array of shape nSequences x nStepsPerRollout x nStates 36 | past state observations 37 | action_sequences: np-array of shape nSequences x nStepsPerRollout x nInputs 38 | actions taken at the corresponding time points. 39 | """ 40 | targets, inputs = unrollForDifferenceTraining(observation_sequences, action_sequences) 41 | targets = np.asarray(targets, dtype=np.double) 42 | inputs = np.asarray(inputs, dtype=np.double) 43 | # standardize everything 44 | self.targetStandardizer = Standardizer(targets) 45 | self.inputStandardizer = Standardizer(inputs) 46 | targets = self.targetStandardizer.standardize(targets) 47 | inputs = self.inputStandardizer.standardize(inputs) 48 | targets = torch.from_numpy(targets).float() 49 | inputs = torch.from_numpy(inputs).float() 50 | 51 | self.models = [] 52 | for modelIndex in np.arange(targets.shape[1]): 53 | print(modelIndex) 54 | # create model 55 | currModel = GPRegressionModel(inputs, targets[:, modelIndex]) 56 | # set model into training mode 57 | currModel.train() 58 | currModel.likelihood.train() 59 | # initialize optimizer and optimization target 60 | self.setAdam(currModel, self.learningRate) 61 | mll = gpytorch.mlls.ExactMarginalLogLikelihood( 62 | currModel.likelihood, 63 | currModel) 64 | # do training 65 | for iterIndex in range(self.trainingIterations): 66 | self.optimizer.zero_grad() 67 | output = currModel(inputs) 68 | loss = -mll(output, targets[:, modelIndex]) 69 | loss.backward() 70 | print('Iter %d/%d - Loss: %.3f' % (iterIndex + 1, self.trainingIterations, loss.item())) 71 | self.optimizer.step() 72 | self.models.append(currModel) 73 | 74 | def learn2(self, inputs, targets): 75 | """ 76 | Parameters 77 | ---------- 78 | observations_sequences: np-array of shape nSequences x nStepsPerRollout x nStates 79 | past state observations 80 | action_sequences: np-array of shape nSequences x nStepsPerRollout x nInputs 81 | actions taken at the corresponding time points. 82 | """ 83 | # targets, inputs = unrollForDifferenceTraining(observation_sequences, action_sequences) 84 | # targets = np.asarray(targets, dtype=np.double) 85 | # inputs = np.asarray(inputs, dtype=np.double) 86 | # # standardize everything 87 | # self.targetStandardizer = Standardizer(targets) 88 | # self.inputStandardizer = Standardizer(inputs) 89 | # targets = self.targetStandardizer.standardize(targets) 90 | # inputs = self.inputStandardizer.standardize(inputs) 91 | # targets = torch.tensor(targets, dtype=DTYPE) 92 | # inputs = torch.tensor(inputs, dtype=DTYPE) 93 | 94 | self.models = [] 95 | for modelIndex in np.arange(targets.shape[1]): 96 | print(modelIndex) 97 | # create model 98 | currModel = GPRegressionModel(inputs, targets[:, modelIndex]) 99 | # set model into training mode 100 | currModel.train() 101 | currModel.likelihood.train() 102 | # initialize optimizer and optimization target 103 | self.setAdam(currModel, self.learningRate) 104 | mll = gpytorch.mlls.ExactMarginalLogLikelihood( 105 | currModel.likelihood, 106 | currModel) 107 | # do training 108 | for iterIndex in range(self.trainingIterations): 109 | self.optimizer.zero_grad() 110 | output = currModel(inputs) 111 | loss = -mll(output, targets[:, modelIndex]) 112 | loss.backward() 113 | print('Iter %d/%d - Loss: %.3f' % (iterIndex + 1, self.trainingIterations, loss.item())) 114 | self.optimizer.step() 115 | self.models.append(currModel) 116 | 117 | def predict(self, observation_history, action_history, action_future): 118 | """ 119 | Parameters 120 | ---------- 121 | observation_history: np-array of shape nStepsPerRollout x nStates 122 | all states seen by the system in the current 123 | rollout 124 | action_history: np-array of shape nStepsPerRollout x nInputs 125 | all actions seen by the system in the current 126 | rollout. The last action corresponds to the action 127 | that was applied at the final time step. 128 | action_future: np-array of shape nPredict x nInputs 129 | actions to be applied to the system. First 130 | dimension determins prediction horizon. The first 131 | action is the action applied one time step after 132 | the last action of "action_history". 133 | Outputs 134 | ---------- 135 | observation_future: np-array of shape nPredict+1 x nStates 136 | predicted states of the system. The last state 137 | will be one time step after the last action of 138 | action_future 139 | """ 140 | # Set model and likelihood into evaluation mode 141 | self.model.eval() 142 | self.likelihood.eval() 143 | 144 | x0 = observation_history[-1, :] 145 | a0 = np.asarray(action_history[-1, :]).reshape([1, -1]) 146 | allActions = np.concatenate([a0, action_future], axis=0) 147 | 148 | observation_future = np.zeros(allActions.shape[0]+1, x0.size) 149 | observation_future[0, :] = x0 150 | 151 | for i in np.arange(allActions.shape[0]): 152 | with torch.no_grad(), gpytorch.settings.fast_pred_var(): 153 | observed_pred = self.likelihood(self.model(allActions[i, :])) 154 | currentPrediction = observed_pred.mean # .view(n, n) 155 | observation_future[i+1, :] = observation_future[i, :] + currentPrediction 156 | 157 | return observation_future[1:, :] 158 | 159 | 160 | class GPRegressionModel(gpytorch.models.ExactGP): 161 | def __init__(self, train_x, train_y, likelihood=gpytorch.likelihoods.GaussianLikelihood()): 162 | """ 163 | train_x: tensor containing training data. size nObs x nDim 164 | train_y: tensor containing observations. size nObs 165 | likelihood: currently just assumed to be Gaussian (observation model) 166 | """ 167 | super(GPRegressionModel, self).__init__(train_x, train_y, likelihood) 168 | 169 | # SKI requires a grid size hyperparameter. This util can help with that 170 | # grid_size = gpytorch.utils.grid.choose_grid_size(train_x) 171 | # print("grid size {}".format(grid_size)) 172 | grid_size = 40 # TODO: MAGIC NUMBER !!! 173 | 174 | self.mean_module = gpytorch.means.ConstantMean() 175 | self.covar_module = gpytorch.kernels.GridInterpolationKernel( 176 | gpytorch.kernels.ScaleKernel( 177 | gpytorch.kernels.RBFKernel(ard_num_dims=train_x.shape[1]), 178 | ), grid_size=grid_size, num_dims=train_x.shape[1] 179 | ) 180 | self.obsLikelihood = likelihood 181 | 182 | def forward(self, x): 183 | mean_x = self.mean_module(x) 184 | covar_x = self.covar_module(x) 185 | return gpytorch.distributions.MultivariateNormal(mean_x, covar_x) 186 | 187 | 188 | 189 | 190 | -------------------------------------------------------------------------------- /DL/methods/SVGPR.py: -------------------------------------------------------------------------------- 1 | """ 2 | Using the implementation of scalable GPs using SVI available in GPflow which is 3 | based on the paper Gaussian Processes for Big Data (Hensman et al. 2013). 4 | """ 5 | 6 | import argparse 7 | import tensorflow as tf 8 | import gpflow 9 | import numpy as np 10 | from collections import defaultdict 11 | from DL import DynamicsLearnerInterface 12 | from DL.utils import loadRobotData 13 | 14 | class SVGPR(DynamicsLearnerInterface): 15 | 16 | 17 | class Logger(gpflow.actions.Action): 18 | def __init__(self, model): 19 | self.model = model 20 | self.logf = [] 21 | 22 | def run(self, ctx): 23 | if (ctx.iteration % 10) == 0: 24 | # Extract likelihood tensor from Tensorflow session 25 | likelihood = - ctx.session.run(self.model.likelihood_tensor) 26 | # Append likelihood value to list 27 | print("Iteration {} loglikelihood {}".format(ctx.iteration, 28 | likelihood)) 29 | self.logf.append(likelihood) 30 | 31 | 32 | def __init__(self, history_length, prediction_horizon, 33 | ninducing_points, minibatch_size, epochs, averaging=True, 34 | streaming=False): 35 | super().__init__(history_length, prediction_horizon, 36 | averaging=averaging, streaming=streaming) 37 | self.ninducing_points = ninducing_points 38 | self.minibatch_size = minibatch_size 39 | self.epochs = epochs 40 | 41 | def _learn(self, training_inputs, training_targets): 42 | ntraining, input_dim = training_inputs.shape 43 | assert input_dim == self._get_input_dim() 44 | kern = gpflow.kernels.RBF(input_dim=input_dim, ARD=True) 45 | Z = training_inputs[np.random.permutation( 46 | ntraining)[:self.ninducing_points]].copy() 47 | assert Z.shape == (self.ninducing_points, input_dim) 48 | likelihood = gpflow.likelihoods.Gaussian(np.ones( 49 | self.observation_dimension)) 50 | 51 | # Alternatively we can explicitly have one model per dimension. 52 | self.model_ = gpflow.models.SVGP(training_inputs, 53 | training_targets, kern, likelihood, Z, 54 | minibatch_size=self.minibatch_size) 55 | self.run_adam_(ntraining) 56 | 57 | def _learn_from_stream(self, training_datastream, datastream_size): 58 | 59 | def gen(): 60 | """ Dummy generator since Tensorflow expects a callable object.""" 61 | for y, x in training_datastream: 62 | yield y, x 63 | 64 | input_dim = self._get_input_dim() 65 | input_output_shapes = ([self.observation_dimension], [input_dim]) 66 | input_output_types = (tf.float64, tf.float64) 67 | ds = tf.data.Dataset.from_generator(gen, input_output_types, 68 | input_output_shapes) 69 | ds = ds.batch(self.minibatch_size) 70 | training_targets, training_inputs = ds.make_one_shot_iterator().get_next() 71 | kern = gpflow.kernels.RBF(input_dim=input_dim, ARD=True) 72 | Z = np.random.rand(self.ninducing_points, input_dim) 73 | likelihood = gpflow.likelihoods.Gaussian(np.ones( 74 | self.observation_dimension)) 75 | 76 | # Alternatively we can explicitly have one model per dimension. 77 | self.model_ = gpflow.models.SVGP(training_inputs, 78 | training_targets, kern, likelihood, Z, num_data=datastream_size) 79 | self.run_adam_(datastream_size) 80 | 81 | def _predict(self, inputs): 82 | assert self.model_, "a trained model must be available" 83 | mean, _ = self.model_.predict_f(inputs) 84 | return mean 85 | 86 | def run_adam_(self, ntraining): 87 | """ 88 | Utility function running the Adam Optimiser interleaved with a `Logger` action. 89 | """ 90 | niterations = ntraining // self.minibatch_size 91 | if (niterations % self.minibatch_size) > 0: 92 | niterations += 1 93 | niterations = niterations * self.epochs 94 | print('Initial loglikelihood: ', self.model_.compute_log_likelihood()) 95 | # Create an Adam Optimiser action 96 | adam = gpflow.train.AdamOptimizer().make_optimize_action(self.model_) 97 | # Create a Logger action 98 | self.logger = self.Logger(self.model_) 99 | actions = [adam, self.logger] 100 | # Create optimisation loop that interleaves Adam with Logger 101 | loop = gpflow.actions.Loop(actions, stop=niterations)() 102 | # Bind current TF session to model 103 | self.model_.anchor(self.model_.enquire_session()) 104 | print('Trained loglikelihood: ', self.model_.compute_log_likelihood()) 105 | 106 | def name(self): 107 | return "SVGPR" 108 | 109 | def save(self, filename): 110 | """ 111 | Stores the trainable hyperparameters of SVGPR including inducing points 112 | """ 113 | params = self.model_.read_trainables() 114 | np.savez(filename, **params) 115 | 116 | def compute_log_likelihood(self, niter): 117 | """ 118 | Computes the ELBO stochastiscally using minibatches. 119 | """ 120 | evals = [self.model_.compute_log_likelihood() for _ in range(niter)] 121 | return evals 122 | 123 | 124 | if __name__ == "__main__": 125 | parser = argparse.ArgumentParser(description=__doc__) 126 | parser.add_argument("--data_filename", required=True, 127 | help=" filename of the input robot data") 128 | parser.add_argument("--plot", action='store_true') 129 | parser.add_argument("--save", help="Filename to save the model") 130 | parser.add_argument("--streaming", action='store_true') 131 | args = parser.parse_args() 132 | observations, actions = loadRobotData(args.data_filename) 133 | dynamics_model = SVGPR(1, 1, epochs = 10, ninducing_points = 10, 134 | minibatch_size=1000, streaming=args.streaming) 135 | dynamics_model.learn(observations, actions) 136 | elbo_evals = dynamics_model.compute_log_likelihood(100) 137 | print("Mean ELBO value over training set: ", np.mean(elbo_evals)) 138 | if args.save: 139 | dynamics_model.save(args.save) 140 | 141 | # Plotting the ELBO during optimzation. 142 | if args.plot: 143 | import matplotlib.pyplot as plt 144 | plt.plot(-np.array(dynamics_model.logger.logf)) 145 | plt.xlabel('iteration') 146 | plt.ylabel('ELBO') 147 | plt.show() 148 | 149 | -------------------------------------------------------------------------------- /DL/methods/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rr-learning/transferable_dynamics_dataset/3660a4bd7c6b011c8d02853bad3cce540708512c/DL/methods/__init__.py -------------------------------------------------------------------------------- /DL/methods/eql_dynamics_learner.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import numpy as np 3 | import sympy as sym 4 | import tensorflow as tf 5 | import sys 6 | import os 7 | import glob 8 | import pickle 9 | from collections import namedtuple, defaultdict, OrderedDict 10 | from sympy.utilities.lambdify import lambdify 11 | 12 | 13 | from eql.data_utils import get_train_input_fns_from_ndarrays, extract_metadata_from_array, input_from_data 14 | from eql.evaluation import EvaluationHook 15 | from eql.utils import get_run_config, save_results, update_runtime_params, \ 16 | get_div_thresh_fn, \ 17 | tensorboard_summarize, evaluate_learner 18 | from eql.model import ModelFn as EQLModelFn 19 | 20 | 21 | from DL import DynamicsLearnerInterface 22 | from DL.utils import unrollTrainingData 23 | 24 | class EQL(DynamicsLearnerInterface): 25 | def __init__(self, 26 | history_length, 27 | prediction_horizon, 28 | model_arch_params, 29 | model_train_params, 30 | optional_params, 31 | difference_learning = True): 32 | super().__init__(history_length, prediction_horizon) 33 | self.eps = 1e-7 34 | self.new_data = True 35 | self._parse_arch_params(**model_arch_params) 36 | 37 | self._parse_train_params(**model_train_params) 38 | 39 | default_params = { 40 | "output_bound": None, 41 | "penalty_bounds": None, 42 | "generate_symbolic_expr": True, 43 | "id": 1, 44 | "keys": None, 45 | "network_init_seed": None, 46 | "kill_tensorboard_summaries_and_checkpoints": False, 47 | "use_cluster": True, 48 | "val_acc_thresh": 0.98, 49 | "weight_init_param": 1.0 50 | } 51 | for key, val in optional_params.items(): 52 | if key in default_params: 53 | default_params[key] = val 54 | else: 55 | raise AttributeError('There are no parameter with name {}'.format(key)) 56 | self.optional_params = default_params 57 | # If all the params presents we can add them additionaly in self.params 58 | self.params = {**model_arch_params, **model_train_params, **self.optional_params} 59 | run_config = get_run_config(self.params["kill_tensorboard_summaries_and_checkpoints"]) 60 | evaluation_hook = EvaluationHook(store_path=self.model_dir) 61 | self.model_fn = EQLModelFn(config=run_config, evaluation_hook=evaluation_hook) 62 | self.fast_estimator = FastPredict(tf.estimator.Estimator(model_fn=self.model_fn, config=run_config, 63 | model_dir=self.model_dir, 64 | params=self.params)) 65 | 66 | def _parse_arch_params(self, num_h_layers, layer_width): 67 | self.num_h_layers = num_h_layers 68 | self.layer_width = layer_width 69 | 70 | def _parse_train_params(self, batch_size, learning_rate, beta1, 71 | epochs_first_reg, epochs_per_reg, 72 | L0_beta, reg_scales, test_div_threshold, train_val_split, 73 | model_dir, evaluate_every): 74 | self.learning_rate = learning_rate 75 | self.beta1 = beta1 76 | self.batch_size = batch_size 77 | self.epochs_first_reg = epochs_first_reg 78 | self.epochs_per_reg = epochs_per_reg 79 | self.evaluate_every = evaluate_every 80 | self.L0_beta = L0_beta 81 | self.reg_scales = reg_scales 82 | self.test_div_threshold = test_div_threshold 83 | self.train_val_split = train_val_split 84 | self.model_dir = model_dir 85 | 86 | def _learn(self, training_inputs, training_targets): 87 | """ 88 | Parameters 89 | ---------- 90 | training_inputs: np-array of shape nTrainingInstances x input dim 91 | that represents the input to the dynamics 92 | (i.e. relevant observations and actions within 93 | the history length and prediction horizon) 94 | training_targets: np-array of shape nTrainingInstances x state dim 95 | denoting the targets of the dynamics model. 96 | """ 97 | # states_and_acts = np.concatenate((states, actions), axis=1) 98 | # inputs, outputs = connected_shuffle([states_and_acts, deltas]) 99 | data = (training_inputs, training_targets) 100 | metadata = extract_metadata_from_array(train_val_data=data, test_data=None, **self.params) 101 | self.model_fn.set_metadata(metadata=metadata) 102 | logging_hook = tf.train.LoggingTensorHook(tensors={'train_accuracy': 'train_accuracy'}, every_n_iter=1000) 103 | train_input, val_input = get_train_input_fns_from_ndarrays(num_epochs=self.evaluate_every, 104 | inputs=training_inputs, outputs=training_targets, **self.params, **metadata) 105 | print('One train episode equals %d epochs.' % self.evaluate_every) 106 | for i, reg_scale in enumerate(self.reg_scales): 107 | print('Regularizing with scale %s' % str(reg_scale)) 108 | self.model_fn.set_reg_scale(reg_scale) 109 | if i == 0: 110 | max_episode = self.epochs_first_reg // self.evaluate_every 111 | else: 112 | max_episode = self.epochs_per_reg // self.evaluate_every 113 | for train_episode in range(1, max_episode + 1): 114 | print('Regularized train episode with scale %s: %d out of %d.' % (str(reg_scale), train_episode, max_episode)) 115 | self.fast_estimator.estimator.train(input_fn=train_input, hooks=[logging_hook]) 116 | val_results = self.fast_estimator.estimator.evaluate(input_fn=val_input, name='validation') 117 | # if (i == 0) and (val_results['eval_accuracy'] > self.params['val_acc_thresh']): 118 | # print('Reached accuracy of %d, starting regularization.' % val_results['eval_accuracy']) 119 | # break 120 | def load_normalization_stats(self, observation_sequences, action_sequences): 121 | targets, inputs = unrollTrainingData(observation_sequences, 122 | action_sequences, self.history_length, self.prediction_horizon, 123 | self.difference_learning, self.averaging) 124 | data = (inputs, targets) 125 | metadata = extract_metadata_from_array(train_val_data=data, test_data=None, **self.params) 126 | self.model_fn.set_metadata(metadata=metadata) 127 | super().load_normalization_stats(observation_sequences, action_sequences) 128 | 129 | def _predict(self, single_input): 130 | """ 131 | Parameters 132 | ---------- 133 | single_input: two dimensional np-array with all the inputs to 134 | the dynamics model concatenated (size: input dim) 135 | (i.e. relevant observations and actions within 136 | the history length and prediction horizon) 137 | Outputs 138 | ---------- 139 | observation_prediction: two dimensional np-array shape: (n_examples, observation_dimension) 140 | corresponding the prediction for the observation 141 | after 1 step. 142 | 143 | """ 144 | if self.new_data: 145 | self.fast_estimator.first_run = True 146 | self.new_data = False 147 | single_input = single_input.astype(np.float32) 148 | batch_size = single_input.shape[0] 149 | if self.model_fn.evaluation_hook.numba_expr is not None: 150 | predictions = self.model_fn.evaluation_hook.numba_expr(*single_input.T) 151 | predictions = np.asarray(predictions).T 152 | return predictions 153 | else: 154 | predictions = np.asarray([prediction for prediction in self.fast_estimator.predict(single_input)]) 155 | return predictions 156 | 157 | def name(self): 158 | return 'EQL' #TODO: change to attribute 159 | 160 | def save(self, model_filename): 161 | """ 162 | Parameters 163 | ---------- 164 | filename: string used as filename to load a model. 165 | """ 166 | expr = self.model_fn.evaluation_hook.numba_expr 167 | 168 | with open(model_filename, 'wb') as handle: 169 | pickle.dump(expr, handle, protocol=pickle.HIGHEST_PROTOCOL) 170 | 171 | def load(self, model_filename): 172 | """ 173 | Parameters 174 | ---------- 175 | filename: string used as filename to save a model. 176 | """ 177 | with open(model_filename, 'rb') as handle: 178 | expr = pickle.load(handle) 179 | self.model_fn.evaluation_hook.numba_expr = expr 180 | 181 | 182 | 183 | 184 | 185 | 186 | class FastPredict: 187 | """ 188 | Speeds up estimator.predict by preventing it from reloading the graph on each call to predict. 189 | It does this by creating a python generator to keep the predict call open. 190 | 191 | Usage: Just warp your estimator in a FastPredict. i.e. 192 | classifier = FastPredict(learn.Estimator(model_fn=model_params.model_fn, model_dir=model_params.model_dir)) 193 | 194 | Author: Marc Stogaitis 195 | """ 196 | def _createGenerator(self): 197 | def generator(): 198 | while not self.closed: 199 | yield (self.next_data, self.next_data) 200 | self.generator = generator 201 | 202 | def __init__(self, estimator): 203 | self.estimator = estimator 204 | self.first_run = True 205 | self.closed = False 206 | self._createGenerator() 207 | 208 | def predict(self, input_fn): 209 | self.next_data = input_fn 210 | batch_size = self.next_data.shape[0] 211 | if self.first_run: 212 | def input_func(): 213 | ds = tf.data.Dataset.from_generator(self.generator, output_types=(tf.float32, tf.float32), 214 | output_shapes=(tf.TensorShape(self.next_data.shape), 215 | tf.TensorShape(self.next_data.shape))) 216 | 217 | value = ds.make_one_shot_iterator().get_next() 218 | return value 219 | 220 | self.predictions = self.estimator.predict(input_fn=input_func) 221 | self.first_run = False 222 | results = [next(self.predictions) for i in range(batch_size)] 223 | return np.array(results) 224 | 225 | def close(self): 226 | self.closed = True 227 | next(self.predictions) 228 | -------------------------------------------------------------------------------- /DL/methods/eureqa_dynamics_learner.py: -------------------------------------------------------------------------------- 1 | import os 2 | from time import time 3 | import pickle 4 | 5 | 6 | import numpy as np 7 | 8 | 9 | from DL import DynamicsLearnerInterface 10 | 11 | 12 | atan2, exp, cos, sin, asin, acos, sqrt, tan = np.arctan2, np.exp, np.cos, np.sin, np.arcsin, np.arccos, np.sqrt, np.tan 13 | 14 | def logistic(x): 15 | return 1.0/(1.0+exp(-x)) 16 | 17 | class Eureqa(DynamicsLearnerInterface): 18 | def __init__(self, 19 | history_length, 20 | prediction_horizon): 21 | super().__init__(history_length, prediction_horizon, averaging = True) 22 | self.load_normalization_stats() 23 | 24 | 25 | def name(self): 26 | return 'Eureqa' 27 | 28 | def load(self, model_filename): 29 | pass 30 | 31 | def save(self, model_filename): 32 | pass 33 | 34 | def learn(self, training_observations, training_actions): 35 | print("There is no training for Eureqa model") 36 | 37 | 38 | def load_normalization_stats(self): 39 | std_file = "Eureqa/dataset_v03_hist{}avg-h{}-standarizers.dat".format(self.history_length, self.prediction_horizon) 40 | self.inputs_standardizer, self.targets_standardizer = pickle.load(open(std_file,"rb")) 41 | 42 | def _predict(self, single_input): 43 | """ 44 | Parameters 45 | ---------- 46 | single_input: one dimensional np-array with all the inputs to 47 | the dynamics model concatenated (size: input dim) 48 | (i.e. relevant observations and actions within 49 | the history length and prediction horizon) 50 | Outputs 51 | ---------- 52 | observation_prediction: two dimensional np-array shape: (n_examples, observation_dimension) 53 | corresponding the prediction for the observation 54 | after 1 step. 55 | 56 | """ 57 | N = single_input.shape[0] 58 | outputs = np.zeros(shape=(9,N)) 59 | if self.prediction_horizon == 1 and self.history_length == 10: 60 | (a1,a2,a3,o1,o2,o3,o4,o5,o6,o7,o8, o9) = single_input.T 61 | outputs[0] = 0.927545926485996*o4 - atan2(o4**2, 0.993772848147297)*atan2(1.51503614021168*o7 - 1.54259547584619*a1, atan2(0.701051028114578*o4**4, 2.18307914600903)) # 38 62 | outputs[1] = 0.915155783046552*o5 + 0.00999039773492605*a2**3 - 0.00223659206272502 - 0.000142902662532832*o8*a2**4 - 0.00666004702617924*o2*a2**2 # 33 63 | outputs[2] = 1.85669778663471*o6*logistic(0.504530431798097*a3/o6) - 0.233125311924516*atan2(2.35262483146102*o6*logistic(0.822784787640318*o9*cos(a3)/o6), logistic(-2.21644425141774*o3*o6)) #100 64 | return outputs.T 65 | elif self.prediction_horizon == 10 and self.history_length == 1: 66 | (a1,a2,a3,o1,o2,o3,o4,o5,o6,o7,o8, o9, fa1, fa2, fa3) = single_input.T 67 | outputs[0] = 0.948119054585915*o4 + 0.386106580966172*fa1 - 0.0685375755464533*o1 - 0.283543691559158*o7 - 0.275025861741297*atan2(atan2(sin(fa1 + 0.744650964153902*o4 - 0.131826320086593*o1 - 0.761602293358418*o7), 0.948119054585915), 0.948119054585915) # 44 68 | outputs[1] = 0.943758696287776*o5 + 0.251024338492492*a2 + 0.0319433419435836*o9 - 0.0615296919755952*o2 - 0.130351480626544*o8 - 0.152436113810505*sin(0.808347498005865*o5 + 0.227133974920654*fa2) # 32 69 | outputs[2] = 0.978015482034113*o6 + 0.187610472853223*a3 + 0.0584523177371514*fa3 - 0.0108895071770413 - 0.0459711720653234*o3 - 0.19277526019455*atan2(o6 + 0.242769987372392*a3 - 0.0122353820290359 - 0.0459711720653234*o3, 0.138588971296687) #36 70 | return outputs.T 71 | elif self.prediction_horizon == 10 and self.history_length == 10: 72 | (a1,a2,a3,o1,o2,o3,o4,o5,o6,o7,o8, o9, fa1, fa2, fa3) = single_input.T 73 | outputs[0] = o4 + 0.292515498948442*fa1 - 0.187669935664476*o1 - 0.37346237816614*atan2(0.309877594109472*fa1 + 3.26039998618911*o4**5 - 0.196222096246709*o1, 0.421130317499481) # 36 74 | outputs[1] = o5 + 0.184870045531283*fa2 + 0.0935843578392409*a3 - 0.0914628882259398*o2 - 0.208995660626396*atan2(0.220597690264509*fa2 + 0.162184310681982*o9 + 2*o5**3, 0.325811410204256) # 36 75 | outputs[2] = 0.932820662609382*o6 + 0.421489182928521*a3 - 0.00192261910908924 - 0.368345591341457*atan2(o6, 0.457379554910025) - 0.389121549145925*o9*cos(atan2(o6*o9 + 0.577581276000956*o6*fa3, 0.284430375989894)) # 82 76 | return outputs.T 77 | else: 78 | print("There is no trained model for prediction_horizon={0} and history_length={1}".format(self.history_length, self.prediction_horizon)) 79 | -------------------------------------------------------------------------------- /DL/methods/linear_regression_ls.py: -------------------------------------------------------------------------------- 1 | """ 2 | Learning a linear model with least squares. 3 | """ 4 | 5 | import argparse 6 | import numpy as np 7 | from DL import DynamicsLearnerInterface 8 | from DL.utils import loadRobotData 9 | from sklearn import linear_model 10 | 11 | class LinearModel(DynamicsLearnerInterface): 12 | 13 | def __init__(self, *args, **kwargs): 14 | super().__init__(*args, **kwargs) 15 | self.model_ = linear_model.LinearRegression() 16 | 17 | def _learn(self, training_inputs, training_targets): 18 | self.model_.fit(training_inputs, training_targets) 19 | 20 | def _predict(self, inputs): 21 | assert self.model_, "a trained model must be available" 22 | return self.model_.predict(inputs) 23 | 24 | def name(self): 25 | return "linear-model-ls" 26 | 27 | 28 | if __name__ == "__main__": 29 | parser = argparse.ArgumentParser(description=__doc__) 30 | parser.add_argument("--data_filename", required=True, 31 | help=" filename of the input robot data") 32 | args = parser.parse_args() 33 | observations, actions = loadRobotData(args.data_filename) 34 | dynamics_model = LinearModel(1, 1) 35 | dynamics_model.learn(observations, actions) 36 | print(dynamics_model.name()) 37 | -------------------------------------------------------------------------------- /DL/methods/linear_regression_sgd.py: -------------------------------------------------------------------------------- 1 | """ 2 | Learning a linear model with SGD using scikit-learn. 3 | """ 4 | 5 | import argparse 6 | import numpy as np 7 | from DL import DynamicsLearnerInterface 8 | from DL.utils import loadRobotData 9 | from sklearn import linear_model 10 | 11 | class LinearModelSGD(DynamicsLearnerInterface): 12 | 13 | def __init__(self, history_length, prediction_horizon, difference_learning, 14 | averaging, streaming, settings=None): 15 | super().__init__(history_length, prediction_horizon, 16 | difference_learning, averaging=averaging, streaming=streaming) 17 | eta0 = 0.0001 18 | epochs = 1 19 | if settings: 20 | eta0 = settings['eta0'] 21 | epochs = settings.get('epochs', 1) 22 | self.models_ = [] 23 | for i in range(self.observation_dimension): 24 | self.models_.append(linear_model.SGDRegressor(verbose=False, 25 | learning_rate='constant', eta0=eta0)) 26 | self.epochs_ = epochs 27 | 28 | def _learn(self, training_inputs, training_targets): 29 | for i in range(self.observation_dimension): 30 | self.models_[i].fit(training_inputs, training_targets[:,i]) 31 | 32 | def _learn_from_stream(self, training_generator, generator_size): 33 | for count in range(self.epochs_ * generator_size): 34 | training_target, training_input = next(training_generator) 35 | assert training_input.shape[0] == self._get_input_dim() 36 | model_input = training_input.reshape(1, -1) 37 | for output_idx in range(self.observation_dimension): 38 | model_target = training_target[output_idx:output_idx + 1] 39 | self.models_[output_idx].partial_fit(model_input, model_target) 40 | 41 | def _predict(self, inputs): 42 | assert self.models_, "a trained model must be available" 43 | prediction = np.zeros((inputs.shape[0], self.observation_dimension)) 44 | for i, model in enumerate(self.models_): 45 | prediction[:, i] = model.predict(inputs) 46 | return prediction 47 | 48 | def name(self): 49 | return "linear-model-SGD" 50 | 51 | if __name__ == "__main__": 52 | parser = argparse.ArgumentParser(description=__doc__) 53 | parser.add_argument("--data_filename", required=True, 54 | help=" filename of the input robot data") 55 | args = parser.parse_args() 56 | observations, actions = loadRobotData(args.data_filename) 57 | 58 | # Learning in batch mode. 59 | dynamics_model = LinearModelSGD(1, 1, True, False, False) 60 | dynamics_model.learn(observations, actions) 61 | print(dynamics_model.name()) 62 | 63 | # Learning in mini batch mode. 64 | dynamics_model = LinearModelSGD(1, 1, True, False, True) 65 | dynamics_model.learn(observations, actions) 66 | print(dynamics_model.name()) 67 | -------------------------------------------------------------------------------- /DL/methods/nn_dynamics_learner.py: -------------------------------------------------------------------------------- 1 | import os 2 | from time import time 3 | 4 | import numpy as np 5 | import tensorflow as tf 6 | 7 | from tensorflow.keras.layers import Dense 8 | from tensorflow.keras.models import Sequential 9 | from tensorflow.keras.optimizers import Adam, Adadelta, Adagrad, SGD, RMSprop 10 | import tensorflow.keras.backend as K 11 | from tensorflow.keras.models import load_model 12 | from tensorflow.keras.callbacks import TensorBoard 13 | from tensorflow.keras import regularizers 14 | 15 | from DL import DynamicsLearnerInterface 16 | 17 | 18 | 19 | 20 | class NNDynamicsLearner(DynamicsLearnerInterface): 21 | def __init__(self, 22 | history_length, 23 | prediction_horizon, 24 | averaging, 25 | streaming, 26 | model_arch_params, 27 | model_train_params, 28 | mode): 29 | 30 | 31 | self.input_dim = 3 * (prediction_horizon - 1) + history_length * 12 32 | self.output_dim = 9 33 | super().__init__(history_length, prediction_horizon, 34 | averaging=averaging, 35 | streaming=streaming) 36 | self._parse_arch_params(**model_arch_params) 37 | if mode == "train": 38 | self._parse_train_params(**model_train_params) 39 | self.model = Sequential() 40 | self.build() 41 | 42 | def name(self): 43 | return 'NN' #TODO: change to attribute 44 | 45 | def _parse_arch_params(self, num_layers, num_units, activation): 46 | self.num_layers = num_layers 47 | self.size = num_units 48 | self.activation = activation_from_string(activation) 49 | 50 | def _parse_train_params(self, learning_rate, optimizer, batch_size, validation_split, epochs, loss, l2_reg): # l2_reg, 51 | self.learning_rate = learning_rate 52 | self.optimizer = optimizer_from_string(optimizer)(lr=self.learning_rate) 53 | self.batch_size = batch_size 54 | self.validation_split = validation_split 55 | self.epochs = epochs 56 | self.loss = loss 57 | self.l2_reg = l2_reg 58 | 59 | def build(self): 60 | all_dims = [self.input_dim] + [self.size] * (self.num_layers - 1) 61 | for in_dim, size in zip(all_dims[:-1], all_dims[1:]): 62 | self.model.add(Dense(units=size, input_dim=in_dim, activation=self.activation, kernel_regularizer=regularizers.l2(self.l2_reg))) # 63 | self.model.add(Dense(units=self.output_dim, input_dim=all_dims[-1], activation=None)) 64 | 65 | self.model.compile(optimizer=self.optimizer, loss=self.loss) 66 | self.tensorboard = TensorBoard(log_dir="logs/history_length_{4}_prediction_horizon_{5}_n_{0}_m_{1}_l2_reg_{3}_Adam_lr_{2}_bs_512_epochs_400_{4}".format(self.num_layers, 67 | self.size, 68 | self.learning_rate, 69 | self.l2_reg, 70 | self.history_length, 71 | self.prediction_horizon, 72 | time())) 73 | def _learn_from_stream(self, generator, datastream_size): 74 | """ 75 | Parameters 76 | ---------- 77 | training_inputs: np-array of shape nTrainingInstances x input dim 78 | that represents the input to the dynamics 79 | (i.e. relevant observations and actions within 80 | the history length and prediction horizon) 81 | training_targets: np-array of shape nTrainingInstances x state dim 82 | denoting the targets of the dynamics model. 83 | """ 84 | 85 | input_output_shapes = ([self.input_dim], [self.output_dim]) 86 | input_output_dtypes = (tf.float64, tf.float64) 87 | def switch_input_target(): 88 | def gen(): 89 | for target, input in generator: 90 | yield input, target 91 | return gen 92 | ds = tf.data.Dataset.from_generator(switch_input_target(), 93 | input_output_dtypes, input_output_shapes) 94 | ds = ds.repeat() 95 | ds = ds.batch(self.batch_size) 96 | self.model.fit(ds.make_one_shot_iterator(), 97 | steps_per_epoch=datastream_size//self.batch_size, 98 | epochs=self.epochs, 99 | callbacks=[self.tensorboard]) 100 | 101 | def _learn(self, training_inputs, training_targets): 102 | """ 103 | Parameters 104 | ---------- 105 | training_inputs: np-array of shape nTrainingInstances x input dim 106 | that represents the input to the dynamics 107 | (i.e. relevant observations and actions within 108 | the history length and prediction horizon) 109 | training_targets: np-array of shape nTrainingInstances x state dim 110 | denoting the targets of the dynamics model. 111 | """ 112 | self.model.fit(x=training_inputs, y=training_targets, batch_size=self.batch_size, epochs=self.epochs, 113 | validation_split=self.validation_split, shuffle=False, callbacks=[self.tensorboard]) 114 | 115 | def _predict(self, single_input): 116 | """ 117 | Parameters 118 | ---------- 119 | single_input: one dimensional np-array with all the inputs to 120 | the dynamics model concatenated (size: input dim) 121 | (i.e. relevant observations and actions within 122 | the history length and prediction horizon) 123 | Outputs 124 | ---------- 125 | observation_prediction: two dimensional np-array shape: (n_examples, observation_dimension) 126 | corresponding the prediction for the observation 127 | after 1 step. 128 | 129 | """ 130 | 131 | 132 | deltas = self.model.predict(single_input) 133 | return deltas 134 | 135 | def save(self, model_filename): 136 | self.model.save(model_filename) 137 | 138 | 139 | def load(self, model_filename): 140 | def position_loss(y_true, y_pred): 141 | return tf.losses.mean_squared_error(y_true[:, :3], y_pred[:, :3]) 142 | custom_objects={'position_loss': position_loss} 143 | self.model = load_model(model_filename, custom_objects=custom_objects) 144 | 145 | 146 | 147 | def optimizer_from_string(opt_str): 148 | opt_dict = {'Adam': Adam, 'Adagrad': Adagrad, 'Adadelta': Adadelta, 'SGD': SGD, 'RMSprop': RMSprop} 149 | if opt_str in opt_dict: 150 | return opt_dict[opt_str] 151 | else: 152 | raise NotImplementedError('Implement optimizer {} and add it to dictionary'.format(opt_str)) 153 | 154 | 155 | def activation_from_string(act_str): 156 | act_dict = {'relu': K.relu, 'tanh': K.tanh} 157 | if act_str in act_dict: 158 | return act_dict[act_str] 159 | else: 160 | raise NotImplementedError('Add activation function {} to dictionary'.format(act_str)) 161 | -------------------------------------------------------------------------------- /DL/methods/pilco_dynamics_learner.py: -------------------------------------------------------------------------------- 1 | """ 2 | Dynamics learning using GPs as in PILCO. 3 | """ 4 | 5 | import argparse 6 | import numpy as np 7 | from collections import defaultdict 8 | from DL import DynamicsLearnerInterface 9 | from DL.utils import loadRobotData 10 | from pilco.models import PILCO 11 | 12 | class PilcoDynamicsLearner(DynamicsLearnerInterface): 13 | 14 | def __init__(self, history_length, prediction_horizon, ninducing_points, 15 | nsampled_training_points = None, averaging = True): 16 | super().__init__(history_length, prediction_horizon, 17 | averaging=averaging) 18 | self.ninducing_points = ninducing_points 19 | self.nsampled_training_points = nsampled_training_points 20 | 21 | def _learn(self, training_inputs, training_targets): 22 | assert self.ninducing_points 23 | 24 | # Subsampling the data if required. 25 | if self.nsampled_training_points: 26 | training_inputs, training_targets = self._subsample_training_set( 27 | training_inputs, training_targets) 28 | 29 | # Full GP if no inducing points are provided. 30 | self.pilco_ = PILCO(training_inputs, training_targets, 31 | self.ninducing_points) 32 | self.pilco_.optimize_models(disp=True) 33 | 34 | def _predict(self, inputs): 35 | assert self.pilco_, "a trained model must be available" 36 | prediction = [] 37 | for model in self.pilco_.mgpr.models: 38 | means, _ = model.predict_f(inputs) 39 | prediction.append(means) 40 | return np.hstack(prediction) 41 | 42 | def _subsample_training_set(self, training_inputs, training_targets): 43 | assert self.nsampled_training_points 44 | total_size = training_inputs.shape[0] 45 | permutation = np.random.permutation( 46 | total_size)[:self.nsampled_training_points] 47 | return training_inputs[permutation], training_targets[permutation] 48 | 49 | def name(self): 50 | return "pilco" 51 | 52 | def load(self, filename): 53 | params_dict = np.load(filename) 54 | for k in params_dict.keys(): 55 | print(k, params_dict[k].shape) 56 | raise NotImplementedError # TODO: parse the hyperparameters. 57 | 58 | def save(self, filename): 59 | """ 60 | Stores the hyperparameters of the GPs which includes inducing points 61 | in the case of sparse approximations. 62 | """ 63 | params_dict = defaultdict(list) 64 | for model in self.pilco_.mgpr.models: 65 | params = model.read_trainables() 66 | for key in params.keys(): 67 | params_dict[key].append(params[key]) 68 | np.savez(filename, **params_dict) 69 | 70 | if __name__ == "__main__": 71 | parser = argparse.ArgumentParser(description=__doc__) 72 | parser.add_argument("--data_filename", required=True, 73 | help=" filename of the input robot data") 74 | parser.add_argument("--model_filename", required=True, 75 | help=" filename where the model will be saved") 76 | parser.add_argument("--ninducing", default=10, type=int) 77 | parser.add_argument("--ntraining", default=10, type=int) 78 | args = parser.parse_args() 79 | observations, actions = loadRobotData(args.data_filename) 80 | dynamics_model = PilcoDynamicsLearner(1, 1, args.ninducing, args.ntraining) 81 | dynamics_model.learn(observations, actions) 82 | dynamics_model.save(args.model_filename) 83 | 84 | # TODO: figure out why after the next line tensorflow throws an error. 85 | # This apparently only happens when this is file is executed as a script. 86 | print(dynamics_model.name()) 87 | -------------------------------------------------------------------------------- /DL/methods/system_id.lyx: -------------------------------------------------------------------------------- 1 | #LyX 2.2 created this file. For more info see http://www.lyx.org/ 2 | \lyxformat 508 3 | \begin_document 4 | \begin_header 5 | \save_transient_properties true 6 | \origin unavailable 7 | \textclass article 8 | \use_default_options true 9 | \maintain_unincluded_children false 10 | \language english 11 | \language_package default 12 | \inputencoding auto 13 | \fontencoding global 14 | \font_roman "default" "default" 15 | \font_sans "default" "default" 16 | \font_typewriter "default" "default" 17 | \font_math "auto" "auto" 18 | \font_default_family default 19 | \use_non_tex_fonts false 20 | \font_sc false 21 | \font_osf false 22 | \font_sf_scale 100 100 23 | \font_tt_scale 100 100 24 | \graphics default 25 | \default_output_format default 26 | \output_sync 0 27 | \bibtex_command default 28 | \index_command default 29 | \paperfontsize default 30 | \use_hyperref false 31 | \papersize default 32 | \use_geometry false 33 | \use_package amsmath 1 34 | \use_package amssymb 1 35 | \use_package cancel 1 36 | \use_package esint 1 37 | \use_package mathdots 1 38 | \use_package mathtools 1 39 | \use_package mhchem 1 40 | \use_package stackrel 1 41 | \use_package stmaryrd 1 42 | \use_package undertilde 1 43 | \cite_engine basic 44 | \cite_engine_type default 45 | \biblio_style plain 46 | \use_bibtopic false 47 | \use_indices false 48 | \paperorientation portrait 49 | \suppress_date false 50 | \justification true 51 | \use_refstyle 1 52 | \index Index 53 | \shortcut idx 54 | \color #008000 55 | \end_index 56 | \secnumdepth 3 57 | \tocdepth 3 58 | \paragraph_separation indent 59 | \paragraph_indentation default 60 | \quotes_language english 61 | \papercolumns 1 62 | \papersides 1 63 | \paperpagestyle default 64 | \tracking_changes false 65 | \output_changes false 66 | \html_math_output 0 67 | \html_css_as_file 0 68 | \html_be_strict false 69 | \end_header 70 | 71 | \begin_body 72 | 73 | \begin_layout Standard 74 | Suppose we have a manipulator with 75 | \begin_inset Formula $N_{dof}$ 76 | \end_inset 77 | 78 | degrees of freedom. 79 | The dynamics can be written as (without friction for now) 80 | \begin_inset Formula 81 | \begin{equation} 82 | \tau_{t}=Y(q_{t},v_{t},a_{t})\theta 83 | \end{equation} 84 | 85 | \end_inset 86 | 87 | Where 88 | \begin_inset Formula $\theta$ 89 | \end_inset 90 | 91 | is a 92 | \begin_inset Formula $N_{\theta}$ 93 | \end_inset 94 | 95 | dimensional parameter vector. 96 | 97 | \end_layout 98 | 99 | \begin_layout Standard 100 | Suppose we have a dataset 101 | \begin_inset Formula $S=(\tau_{i},q_{i},v_{i},a_{i})_{i=1}^{N_{S}}$ 102 | \end_inset 103 | 104 | . 105 | The goal is to fit this dataset well everywhere, i.e. 106 | ideally we would like 107 | \begin_inset Formula 108 | \begin{equation} 109 | \underbrace{\left(\begin{array}{c} 110 | \tau_{1}\\ 111 | \vdots\\ 112 | \tau_{N} 113 | \end{array}\right)}_{\mathbf{T}}=\underbrace{\left(\begin{array}{c} 114 | Y(q_{1},v_{1},a_{1})\\ 115 | \vdots\\ 116 | Y(q_{N},v_{N},a_{N}) 117 | \end{array}\right)}_{\mathbf{Y}}\theta. 118 | \end{equation} 119 | 120 | \end_inset 121 | 122 | Now it may of course not be possible to fit the data perfectly, hence we 123 | can write instead the optimization problem 124 | \begin_inset Formula 125 | \begin{equation} 126 | \min_{\theta}\left\Vert \mathbf{Y}\theta-\mathbf{T}\right\Vert . 127 | \end{equation} 128 | 129 | \end_inset 130 | 131 | Any 132 | \begin_inset Formula $\theta$ 133 | \end_inset 134 | 135 | which satisifies 136 | \begin_inset Formula 137 | \begin{equation} 138 | \mathbf{Y}^{\top}\mathbf{Y}\theta=\mathbf{Y}^{\top}\mathbf{T} 139 | \end{equation} 140 | 141 | \end_inset 142 | 143 | is optimal. 144 | If 145 | \begin_inset Formula $\mathbb{\mathbf{Y}}$ 146 | \end_inset 147 | 148 | has rank 149 | \begin_inset Formula $N_{\theta}$ 150 | \end_inset 151 | 152 | , i.e. 153 | it has at least 154 | \begin_inset Formula $N_{\theta}$ 155 | \end_inset 156 | 157 | independent rows, then the solution is unique. 158 | The complexity of computing 159 | \begin_inset Formula $\mathbf{Y}^{\top}\mathbf{Y}$ 160 | \end_inset 161 | 162 | is 163 | \begin_inset Formula $N_{S}N_{dof}N_{\theta}^{2}$ 164 | \end_inset 165 | 166 | , which means that this is feasible even for a large number of datapoints. 167 | 168 | \end_layout 169 | 170 | \begin_layout Standard 171 | If we add some regularizaion 172 | \begin_inset Formula 173 | \begin{equation} 174 | \min_{\theta}\left(\left\Vert \mathbf{Y}\theta-\mathbf{T}\right\Vert ^{2}+\epsilon\left\Vert \theta-\mu\right\Vert ^{2}\right) 175 | \end{equation} 176 | 177 | \end_inset 178 | 179 | 180 | \end_layout 181 | 182 | \begin_layout Standard 183 | we obtain the normal equation 184 | \begin_inset Formula 185 | \begin{equation} 186 | \left(\mathbf{Y}^{\top}\mathbf{Y}+\epsilon\mathbf{I}\right)\theta=\mathbf{Y}^{\top}\mathbf{T}+\epsilon\mu 187 | \end{equation} 188 | 189 | \end_inset 190 | 191 | 192 | \end_layout 193 | 194 | \begin_layout Section 195 | Adding friction 196 | \end_layout 197 | 198 | \begin_layout Standard 199 | \begin_inset Formula 200 | \begin{align} 201 | \tau_{t}-diag(v_{t})\theta_{vf}-diag(sign(v_{t}))\theta_{sf} & =Y(q_{t},v_{t},a_{t})\theta_{i}\\ 202 | \tau_{t} & =Y(q_{t},v_{t},a_{t})\theta_{i}+diag(v_{t})\theta_{vf}+diag(sign(v_{t}))\theta_{sf}\\ 203 | \tau_{t} & =\left(\begin{array}{ccc} 204 | Y(q_{t},v_{t},a_{t}) & diag(v_{t}) & diag(sign(v_{t}))\end{array}\right)\left(\begin{array}{c} 205 | \theta_{i}\\ 206 | \theta_{vf}\\ 207 | \theta_{sf} 208 | \end{array}\right) 209 | \end{align} 210 | 211 | \end_inset 212 | 213 | 214 | \end_layout 215 | 216 | \end_body 217 | \end_document 218 | -------------------------------------------------------------------------------- /DL/methods/system_id.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import argparse 4 | import sys 5 | 6 | 7 | from collections import OrderedDict 8 | 9 | 10 | import numpy as np 11 | import numpy.matlib 12 | import time 13 | import os 14 | 15 | import math 16 | 17 | import matplotlib.pylab as plt 18 | 19 | import pinocchio 20 | 21 | from pinocchio.robot_wrapper import RobotWrapper 22 | from pinocchio.visualize import * 23 | from os.path import join, dirname 24 | 25 | from scipy.ndimage import gaussian_filter1d 26 | 27 | import rospkg 28 | 29 | import time 30 | 31 | import cvxpy 32 | 33 | # dynamics learning stuff 34 | from DL import DynamicsLearnerInterface 35 | 36 | 37 | class SystemId(DynamicsLearnerInterface): 38 | def __init__(self, 39 | history_length, 40 | prediction_horizon, 41 | settings=None): 42 | DynamicsLearnerInterface.__init__(self, 43 | history_length=history_length, 44 | prediction_horizon=prediction_horizon, 45 | difference_learning=False, 46 | averaging=False, 47 | streaming=False) 48 | if settings is None: 49 | settings = {} 50 | # TODO: should we have a default value for this attribute. 51 | self.identification_method = settings.pop( 52 | 'identification_method', None) 53 | self.robot = Robot(**settings) 54 | 55 | self.dt = 0.001 56 | 57 | def learn(self, observation_sequences, action_sequences): 58 | # preprocess data ------------------------------------------------------ 59 | data = dict() 60 | data['angle'] = observation_sequences[:, :, :3] 61 | data['velocity'] = observation_sequences[:, :, 3:6] 62 | data['torque'] = action_sequences 63 | compute_accelerations(data, self.dt) 64 | data = preprocess_data(data=data, 65 | desired_n_data_points=100000, 66 | smoothing_sigma=1.0) 67 | print('Learning with {} points'.format(data['angle'].shape[0])) 68 | 69 | # identify ------------------------------------------------------------- 70 | if self.identification_method == 'cad': 71 | return 72 | elif self.identification_method == 'ls': 73 | sys_id(robot=self.robot, 74 | angle=data['angle'], 75 | velocity=data['velocity'], 76 | acceleration=data['acceleration'], 77 | torque=data['torque'], 78 | method_name='ls') 79 | elif self.identification_method == 'ls-lmi': 80 | sys_id(robot=self.robot, 81 | angle=data['angle'], 82 | velocity=data['velocity'], 83 | acceleration=data['acceleration'], 84 | torque=data['torque'], 85 | method_name='lmi') 86 | else: 87 | raise NotImplementedError('Choose an identification method') 88 | 89 | def predict(self, observation_history, action_history, action_future=None): 90 | # parse arguments ------------------------------------------------------ 91 | n_samples = observation_history.shape[0] 92 | dim_observation = observation_history.shape[2] 93 | 94 | if action_future is None: 95 | assert self.prediction_horizon == 1 96 | action_future = np.empty((n_samples, 0, self.action_dimension)) 97 | 98 | assert (action_future.shape[1] == self.prediction_horizon - 1) 99 | 100 | # make predictions ----------------------------------------------------- 101 | predictions = np.empty((n_samples, dim_observation)) 102 | predictions[:] = numpy.nan 103 | 104 | for i in range(n_samples): 105 | torques_sequence = np.append(action_history[i, -1:], 106 | action_future[i], 107 | axis=0) 108 | 109 | angle = observation_history[i, -1, :3] 110 | velocity = observation_history[i, -1, 3:6] 111 | for t in range(0, self.prediction_horizon): 112 | angle, velocity = \ 113 | self.robot.predict(angle=angle, 114 | velocity=velocity, 115 | torque=torques_sequence[t], 116 | dt=self.dt) 117 | 118 | predictions[i] = np.concatenate([np.array(angle).flatten(), 119 | np.array(velocity).flatten(), 120 | torques_sequence[-1]], axis=0) 121 | return predictions 122 | 123 | def name(self): 124 | return 'system_id' 125 | 126 | 127 | def to_matrix(array): 128 | matrix = np.matrix(array) 129 | if matrix.shape[0] < matrix.shape[1]: 130 | matrix = matrix.transpose() 131 | 132 | return matrix 133 | 134 | 135 | def to_diagonal_matrix(vector): 136 | return np.matrix(np.diag(np.array(vector).flatten())) 137 | 138 | 139 | class Robot(RobotWrapper): 140 | def __init__(self, symplectic=True, init='cad', 141 | visualizer=None): 142 | self.load_urdf() 143 | self.viscous_friction = to_matrix(np.zeros(3)) + 0.0001 144 | self.static_friction = to_matrix(np.zeros(3)) + 0.00 145 | self.symplectic = symplectic 146 | if visualizer == "meshcat": 147 | self.setVisualizer(MeshcatVisualizer()) 148 | elif visualizer == "gepetto": 149 | self.setVisualizer(GepettoVisualizer()) 150 | elif visualizer: 151 | raise NotImplementedError 152 | 153 | # Initialization of Parameters 154 | if init == 'cad': 155 | pass 156 | elif init == 'random': 157 | self.set_random_params() 158 | elif init == 'noisy': 159 | self.set_noisy_params() 160 | elif init == 'identity': 161 | self.set_identity_params() 162 | else: 163 | raise NotImplementedError 164 | 165 | # dynamics ----------------------------------------------------------------- 166 | def simulate(self, 167 | dt, 168 | n_steps=None, 169 | torque=None, 170 | initial_angle=None, 171 | initial_velocity=None, 172 | mask=np.ones(3), 173 | verbose=False): 174 | """ Returns the sequence of angles, velocities and torques resulting 175 | from simulating the given torques.""" 176 | zero = pinocchio.utils.zero(self.model.nv) 177 | 178 | torque = np.array(zero) if torque is None else np.array(torque) 179 | torque = torque.reshape(-1, 3, 1) 180 | if torque.shape[0] == 1: 181 | assert (n_steps) 182 | torque = np.repeat(torque, repeats=n_steps, axis=0) 183 | elif n_steps: 184 | assert (n_steps == torque.shape[0]) 185 | 186 | angle = zero if initial_angle is None else to_matrix(initial_angle) 187 | velocity = \ 188 | zero if initial_velocity is None else to_matrix(initial_velocity) 189 | mask = to_matrix(mask) 190 | 191 | simulated_angles = [] 192 | simulated_vels = [] 193 | simulated_accelerations = [] 194 | applied_torques = [] 195 | for t in range(torque.shape[0]): 196 | acceleration = self.forward_dynamics(angle, velocity, torque[t]) 197 | 198 | simulated_angles.append(np.ravel(angle)) 199 | simulated_vels.append(np.ravel(velocity)) 200 | simulated_accelerations.append(np.ravel(acceleration)) 201 | applied_torques.append(np.ravel(torque[t])) 202 | if self.symplectic: 203 | velocity = velocity + np.multiply(mask, acceleration * dt) 204 | angle = angle + np.multiply(mask, velocity * dt) 205 | else: 206 | angle = angle + np.multiply(mask, velocity * dt) 207 | velocity = velocity + np.multiply(mask, acceleration * dt) 208 | if verbose: 209 | print('angle: ', np.array(angle).flatten(), 210 | '\nvelocity: ', np.array(velocity).flatten()) 211 | return np.array(simulated_angles), np.array(simulated_vels), \ 212 | np.array(simulated_accelerations), np.array(applied_torques) 213 | 214 | def predict(self, angle, velocity, torque, dt): 215 | angle = to_matrix(angle) 216 | velocity = to_matrix(velocity) 217 | torque = to_matrix(torque) 218 | acceleration = self.forward_dynamics(angle, velocity, torque) 219 | if self.symplectic: 220 | velocity = velocity + acceleration * dt 221 | angle = angle + velocity * dt 222 | else: 223 | angle = angle + velocity * dt 224 | velocity = velocity + acceleration * dt 225 | return angle, velocity 226 | 227 | def friction_torque(self, velocity): 228 | velocity = to_matrix(velocity) 229 | return -(np.multiply(velocity, self.viscous_friction) + 230 | np.multiply(np.sign(velocity), self.static_friction)) 231 | 232 | def forward_dynamics(self, angle, velocity, actuator_torque): 233 | joint_torque = actuator_torque + self.friction_torque(velocity) 234 | 235 | return pinocchio.aba(self.model, self.data, angle, velocity, 236 | joint_torque) 237 | 238 | def inverse_dynamics(self, angle, velocity, acceleration): 239 | 240 | joint_torque = pinocchio.rnea(self.model, self.data, 241 | to_matrix(angle), 242 | to_matrix(velocity), 243 | to_matrix(acceleration)) 244 | actuator_torque = joint_torque - self.friction_torque(velocity) 245 | 246 | # TODO: Figure out why this fails some times. 247 | # just as a sanity check ----------------------------------------------- 248 | Y = self.compute_regressor_matrix(angle, velocity, acceleration) 249 | actuator_torque_1 = Y * self.get_params() 250 | assert ((abs(actuator_torque - actuator_torque_1) <= 1e-6).all()) 251 | # ---------------------------------------------------------------------- 252 | 253 | return actuator_torque 254 | 255 | def compute_regressor_matrix(self, angle, velocity, acceleration): 256 | joint_torque_regressor = \ 257 | pinocchio.computeJointTorqueRegressor(self.model, self.data, 258 | to_matrix(angle), 259 | to_matrix(velocity), 260 | to_matrix(acceleration)) 261 | 262 | viscous_friction_torque_regressor = to_diagonal_matrix(velocity) 263 | static_friction_torque_regressor = to_diagonal_matrix( 264 | np.sign(velocity)) 265 | 266 | regressor_matrix = np.concatenate([ 267 | joint_torque_regressor, 268 | viscous_friction_torque_regressor, 269 | static_friction_torque_regressor], axis=1) 270 | 271 | return regressor_matrix 272 | 273 | def params_to_inertia_about_origin(self, params, link_index): 274 | if isinstance(params, np.ndarray): 275 | params = np.array(params).flatten() 276 | 277 | inertia_about_origin = \ 278 | np.array([[params[10 * link_index + 4], params[10 * link_index + 5], params[10 * link_index + 7]], 279 | [params[10 * link_index + 5], params[10 * link_index + 6], params[10 * link_index + 8]], 280 | [params[10 * link_index + 7], params[10 * link_index + 8], params[10 * link_index + 9]]]) 281 | return inertia_about_origin 282 | 283 | # see Wensing et al 2018 for details 284 | def params_to_second_moment(self, params, link_index): 285 | inertia_about_origin = self.params_to_inertia_about_origin(params, link_index) 286 | 287 | second_moment = np.diag([0.5 * np.trace(inertia_about_origin) 288 | for _ in range(3)]) - inertia_about_origin 289 | 290 | return second_moment 291 | 292 | # see Wensing et al 2018 for details 293 | def params_to_pseudo_inertia(self, params, link_index): 294 | second_moment = self.params_to_second_moment(params, link_index) 295 | mass_times_com = self.params_to_mass_times_com(params, link_index) 296 | mass = self.params_to_mass(params, link_index) 297 | 298 | pseudo_inertia = np.empty(shape=[4, 4], dtype=second_moment.dtype) 299 | 300 | pseudo_inertia[:3, :3] = second_moment 301 | pseudo_inertia[3, :3] = mass_times_com 302 | pseudo_inertia[:3, 3] = mass_times_com 303 | pseudo_inertia[3, 3] = mass 304 | 305 | return pseudo_inertia 306 | 307 | def params_to_mass_times_com(self, params, link_index): 308 | if isinstance(params, np.ndarray): 309 | params = np.array(params).flatten() 310 | 311 | mass_times_com = np.array([params[10 * link_index + 1], 312 | params[10 * link_index + 2], params[10 * link_index + 3]]) 313 | return mass_times_com 314 | 315 | def params_to_mass(self, params, link_index): 316 | if isinstance(params, np.ndarray): 317 | params = np.array(params).flatten() 318 | 319 | mass = params[10 * link_index] 320 | return mass 321 | 322 | def params_to_viscous_friction(self, params, link_index): 323 | if isinstance(params, np.ndarray): 324 | params = np.array(params).flatten() 325 | 326 | return params[10 * self.count_degrees_of_freedom() + link_index] 327 | 328 | def params_to_static_friction(self, params, link_index): 329 | if isinstance(params, np.ndarray): 330 | params = np.array(params).flatten() 331 | 332 | return params[11 * self.count_degrees_of_freedom() + link_index] 333 | 334 | def count_degrees_of_freedom(self): 335 | return self.nv 336 | 337 | # getters and setters ------------------------------------------------------ 338 | 339 | def get_params(self): 340 | theta = [self.model.inertias[i].toDynamicParameters() 341 | for i in range(1, len(self.model.inertias))] 342 | 343 | theta = theta + [self.viscous_friction, self.static_friction] 344 | 345 | theta = np.concatenate(theta, axis=0) 346 | 347 | # some sanity checks 348 | for i in range(len(self.model.inertias) - 1): 349 | A = self.params_to_inertia_about_origin(theta, i) 350 | B = self.get_inertia_about_origin(i) 351 | assert(np.allclose(A, B)) 352 | 353 | A = self.params_to_mass_times_com(theta, i) 354 | B = self.get_mass_times_com(i) 355 | assert(np.allclose(A, B)) 356 | 357 | A = self.params_to_mass(theta, i) 358 | B = self.get_mass(i) 359 | assert(np.allclose(A, B)) 360 | 361 | A = self.params_to_viscous_friction(theta, i) 362 | B = self.get_viscous_friction(i) 363 | assert(np.allclose(A, B)) 364 | 365 | A = self.params_to_static_friction(theta, i) 366 | B = self.get_static_friction(i) 367 | assert(np.allclose(A, B)) 368 | 369 | A = self.params_to_second_moment(theta, i) 370 | B = self.get_second_moment(i) 371 | assert(np.allclose(A, B)) 372 | 373 | return theta 374 | 375 | def get_com(self, link_index): 376 | return np.array(self.model.inertias[link_index + 1].lever).flatten() 377 | 378 | def get_mass(self, link_index): 379 | return self.model.inertias[link_index + 1].mass 380 | 381 | def get_mass_times_com(self, link_index): 382 | return self.get_mass(link_index) * self.get_com(link_index) 383 | 384 | def get_inertia_about_com(self, link_index): 385 | return np.array(self.model.inertias[link_index + 1].inertia) 386 | 387 | def get_inertia_about_origin(self, link_index): 388 | inertia_matrix_com = self.get_inertia_about_com(link_index) 389 | com = self.get_com(link_index) 390 | mass = self.get_mass(link_index) 391 | 392 | # parallel axis theorem 393 | inertia_matrix_origin = inertia_matrix_com + mass * \ 394 | (np.inner(com, com)*np.identity(3) - np.outer(com, com)) 395 | return inertia_matrix_origin 396 | 397 | def get_viscous_friction(self, link_index): 398 | return self.viscous_friction[link_index] 399 | 400 | def get_static_friction(self, link_index): 401 | return self.static_friction[link_index] 402 | 403 | def get_second_moment(self, link_index): 404 | inertia_about_com = self.get_inertia_about_com(link_index) 405 | mass = self.get_mass(link_index) 406 | com = self.get_com(link_index) 407 | 408 | second_moment = 0.5 * np.trace(inertia_about_com) * \ 409 | np.identity(3) - \ 410 | inertia_about_com + mass * np.outer(com, com) 411 | 412 | return second_moment 413 | 414 | def set_params(self, theta): 415 | 416 | for dof in range(self.model.nv): 417 | theta_dof = theta[dof * 10: (dof + 1) * 10] 418 | 419 | self.model.inertias[dof + 1] = pinocchio.libpinocchio_pywrap.Inertia.FromDynamicParameters( 420 | theta_dof) 421 | 422 | n_inertial_params = self.model.nv * 10 423 | self.viscous_friction = theta[n_inertial_params: n_inertial_params + 3] 424 | self.static_friction = theta[ 425 | n_inertial_params + 3: n_inertial_params + 6] 426 | 427 | assert (((self.get_params() - theta) < 1e-9).all()) 428 | 429 | def set_random_params(self): 430 | for dof in range(self.model.nv): 431 | self.model.inertias[dof + 1].setRandom() 432 | 433 | def set_identity_params(self): 434 | for dof in range(self.model.nv): 435 | self.model.inertias[dof + 1].setIdentity() 436 | 437 | def set_noisy_params(self): 438 | sigma = 0.001 439 | for dof in range(self.model.nv): 440 | self.model.inertias[dof + 1].mass += sigma * np.random.randn() 441 | self.model.inertias[dof + 1].lever += sigma * np.random.randn(3, 1) 442 | self.model.inertias[dof + 1].inertia += np.abs(np.diag( 443 | sigma * np.random.randn(3))) 444 | 445 | # loading ------------------------------------------------------------------ 446 | def load_urdf(self): 447 | try: 448 | model_path = rospkg.RosPack().get_path( 449 | "robot_properties_manipulator") 450 | except rospkg.ResourceNotFound: 451 | print('Warning: The URDF is not being loaded from a ROS package.') 452 | current_path = str(os.path.dirname(os.path.abspath(__file__))) 453 | model_path = str(os.path.abspath(os.path.join(current_path, 454 | '../../robot_properties_manipulator'))) 455 | urdf_path = join(model_path, "urdf", "manipulator.urdf") 456 | meshes_path = dirname(model_path) 457 | print(urdf_path, meshes_path) 458 | self.initFromURDF(urdf_path, [meshes_path]) 459 | 460 | def test_regressor_matrix(self): 461 | for _ in range(100): 462 | angle = pinocchio.randomConfiguration(self.model) 463 | velocity = pinocchio.utils.rand(self.model.nv) 464 | acceleration = pinocchio.utils.rand(self.model.nv) 465 | 466 | Y = self.compute_regressor_matrix(angle, velocity, acceleration) 467 | theta = self.get_params() 468 | other_tau = Y * theta 469 | 470 | torque = self.inverse_dynamics(angle, velocity, acceleration) 471 | 472 | assert ((abs(torque - other_tau) <= 1e-9).all()) 473 | 474 | 475 | def compute_accelerations(data, dt): 476 | data['acceleration'] = np.diff(data['velocity'], axis=1) / dt 477 | for key in ['angle', 'velocity', 'torque']: 478 | data[key] = data[key][:, :-1] 479 | 480 | # test that everything worked out ----------------------------------------- 481 | integrated_velocity = data['velocity'][:, :-1] + \ 482 | data['acceleration'][:, :-1] * dt 483 | 484 | is_consistent = (np.absolute(integrated_velocity - 485 | data['velocity'][:, 1:]) <= 1e-12).all() 486 | assert(is_consistent) 487 | 488 | 489 | def preprocess_data(data, desired_n_data_points, 490 | smoothing_sigma=None, shuffle_data=True): 491 | # smoothen ----------------------------------------------------------------- 492 | if smoothing_sigma is not None: 493 | for key in data.keys(): 494 | data[key] = gaussian_filter1d(data[key], 495 | sigma=smoothing_sigma, 496 | axis=1) 497 | 498 | # cut off ends ------------------------------------------------------------- 499 | for key in data.keys(): 500 | data[key] = data[key][:, 501 | data[key].shape[1] // 10: -data[key].shape[1] // 10] 502 | 503 | # reshape ------------------------------------------------------------------ 504 | ordered_data = data.copy() 505 | n_trajectories = data['angle'].shape[0] 506 | n_time_steps = data['angle'].shape[1] 507 | n_data_points = n_trajectories * n_time_steps 508 | for key in data.keys(): 509 | data[key] = np.reshape(data[key], [n_data_points, 3]) 510 | 511 | for _ in range(10): 512 | trajectory_idx = np.random.randint(n_trajectories) 513 | time_step = np.random.randint(n_time_steps) 514 | 515 | global_idx = trajectory_idx * n_time_steps + time_step 516 | 517 | for key in data.keys(): 518 | assert ((ordered_data[key][trajectory_idx, time_step] == 519 | data[key][global_idx]).all()) 520 | 521 | # return a random subset of the datapoints --------------------------------- 522 | data_point_indices = np.arange(n_data_points) 523 | if shuffle_data: 524 | data_point_indices = np.random.permutation(data_point_indices) 525 | 526 | if desired_n_data_points < n_data_points: 527 | data_point_indices = data_point_indices[:desired_n_data_points] 528 | 529 | for key in data.keys(): 530 | data[key] = data[key][data_point_indices] 531 | 532 | return data 533 | 534 | 535 | def satisfies_normal_equation(theta, Y, T, epsilon=1e-6): 536 | lhs = (Y.transpose() * Y).dot(theta) 537 | rhs = Y.transpose().dot(T) 538 | return (abs(lhs - rhs) < epsilon).all() 539 | 540 | 541 | def rmse_sequential(robot, angle, velocity, acceleration, torque): 542 | sum_squared_error = 0 543 | 544 | T = angle.shape[0] 545 | 546 | for t in range(T): 547 | predicted_torque = robot.inverse_dynamics(angle=angle[t], 548 | velocity=velocity[t], 549 | acceleration=acceleration[t]) 550 | sum_squared_error = sum_squared_error \ 551 | + np.linalg.norm( 552 | predicted_torque - to_matrix(torque[t])) ** 2 553 | 554 | mean_squared_error = sum_squared_error / T / 3 555 | 556 | return np.sqrt(mean_squared_error) 557 | 558 | 559 | def rmse_batch(theta, Y, T): 560 | return np.squeeze(np.array(np.sqrt( 561 | (Y * theta - T).transpose().dot(Y * theta - T) / len(T)))) 562 | 563 | 564 | def check_and_log(log, robot, angle, velocity, acceleration, torque, Y, T, suffix): 565 | log['rank Y ' + suffix] = np.linalg.matrix_rank(Y) 566 | 567 | rmse_a = rmse_sequential(robot=robot, 568 | angle=angle, 569 | velocity=velocity, 570 | acceleration=acceleration, 571 | torque=torque) 572 | rmse_b = rmse_batch(theta=robot.get_params(), 573 | Y=Y, T=T) 574 | assert(abs(rmse_a - rmse_b) <= 1e-6) 575 | log['rmse ' + suffix] = rmse_a 576 | 577 | for i in range(robot.count_degrees_of_freedom()): 578 | inertia = robot.get_inertia_about_com(i) 579 | eigenvalues, eigenvectors = np.linalg.eig(inertia) 580 | reconstruction = eigenvectors.dot(np.diag(eigenvalues)).dot(eigenvectors.transpose()) 581 | assert(np.allclose(reconstruction, inertia, atol=1e-5)) 582 | assert(np.allclose(eigenvectors.dot(eigenvectors.transpose()), np.identity(3), atol=1e-6)) 583 | 584 | log['params ' + suffix] = np.array(robot.get_params()).flatten() 585 | log['eigenvalues of inertia ' + str(i) + ' ' + suffix] = eigenvalues 586 | log['mass ' + str(i) + ' ' + suffix] = robot.get_mass(i) 587 | log['com ' + str(i) + ' ' + suffix] = robot.get_com(i) 588 | log['static friction ' + str(i) + ' ' + suffix] = robot.get_static_friction(i) 589 | log['viscous friction ' + str(i) + ' ' + suffix] = robot.get_viscous_friction(i) 590 | 591 | 592 | def sys_id(robot, angle, velocity, acceleration, torque, method_name): 593 | log = dict() 594 | robot.test_regressor_matrix() 595 | 596 | Y = np.concatenate( 597 | [robot.compute_regressor_matrix(angle[t], velocity[t], acceleration[t]) 598 | for t in 599 | range(angle.shape[0])], axis=0) 600 | 601 | T = np.concatenate( 602 | [to_matrix(torque[t]) for t in range(angle.shape[0])], axis=0) 603 | 604 | check_and_log(log=log, robot=robot, angle=angle, velocity=velocity, 605 | acceleration=acceleration, torque=torque, Y=Y, T=T, suffix='before id') 606 | 607 | if method_name == 'lmi': 608 | theta = sys_id_lmi(robot=robot, Y=Y, T=T) 609 | elif method_name == 'ls': 610 | theta = sys_id_ls(robot=robot, Y=Y, T=T) 611 | else: 612 | raise NotImplementedError 613 | 614 | robot.set_params(theta) 615 | 616 | check_and_log(log=log, robot=robot, angle=angle, velocity=velocity, 617 | acceleration=acceleration, torque=torque, Y=Y, T=T, suffix='after id') 618 | 619 | for key in sorted(log.keys()): 620 | print(key + ': ', log[key], '\n') 621 | 622 | 623 | def sys_id_lmi(robot, Y, T): 624 | theta = cvxpy.Variable((36, 1)) # [m, mc_x, mc_y, mc_z, I_xx, I_xy, I_yy, I_xz, I_yz, I_zz] 625 | theta_cad = np.asarray(robot.get_params()) 626 | # it is not clear whether norm or sum of squares is better for solver 627 | cost = cvxpy.sum_squares(Y * theta - T) + 1e-6 * cvxpy.sum_squares(theta - theta_cad) 628 | 629 | pseudo_inertias = [] 630 | static_frictions = [] 631 | viscous_frictions = [] 632 | masses = [] 633 | constraints = [] 634 | for i in range(robot.count_degrees_of_freedom()): 635 | pseudo_inertias += [cvxpy.bmat(robot.params_to_pseudo_inertia(theta, i))] 636 | static_frictions += [robot.params_to_static_friction(theta, i)] 637 | viscous_frictions += [robot.params_to_viscous_friction(theta, i)] 638 | masses += [robot.params_to_mass(theta, i)] 639 | 640 | constraints += [masses[i] >= 0.01] 641 | constraints += [masses[i] <= 0.5] 642 | constraints += [pseudo_inertias[i] >> 0] 643 | constraints += [static_frictions[i] >= 0] 644 | constraints += [viscous_frictions[i] >= 0] 645 | 646 | problem = cvxpy.Problem(cvxpy.Minimize(cost), constraints) 647 | theta.value = theta_cad 648 | problem.solve(solver='MOSEK', warm_start=True) 649 | assert(all(c.value() for c in constraints)) 650 | assert(theta.value is not None) 651 | 652 | return np.array(theta.value) 653 | 654 | 655 | def sys_id_ls(robot, Y, T): 656 | regularization_epsilon = 1e-10 657 | regularization_mu = np.asarray(robot.get_params()) 658 | theta = np.linalg.solve( 659 | Y.transpose() * Y + regularization_epsilon * np.eye(Y.shape[1], 660 | Y.shape[1]), 661 | Y.transpose() * T + regularization_epsilon * regularization_mu) 662 | 663 | return theta 664 | 665 | 666 | def save_simulated_data(angles, velocities, torques, filename): 667 | """Stores the simulated data in a compatible format with the dynamics 668 | learning code.""" 669 | data_dict = {} 670 | data_dict['measured_angles'] = angles 671 | data_dict['measured_velocities'] = velocities 672 | data_dict['measured_torques'] = torques 673 | data_dict['constrained_torques'] = torques 674 | data_dict['desired_torques'] = torques 675 | np.savez(filename, **data_dict) 676 | 677 | 678 | def test_numeric_differentiation(): 679 | dt = 0.001 680 | robot = Robot() 681 | 682 | data = {} 683 | data['angle'], data['velocity'], data['acceleration'], data['torque'] = robot.simulate( 684 | dt=dt, n_steps=10000) 685 | 686 | for key in data.keys(): 687 | data[key] = np.expand_dims(data[key], axis=0) 688 | 689 | data_copy = data.copy() 690 | compute_accelerations(data_copy, dt=dt) 691 | 692 | for key in data.keys(): 693 | difference = data[key][:, :-1] - data_copy[key] 694 | assert((np.absolute(difference) < 1e-12).all()) 695 | 696 | 697 | if __name__ == '__main__': 698 | parser = argparse.ArgumentParser(description='system id baseline') 699 | parser.add_argument("--input", help="Filename of the input robot data") 700 | parser.add_argument("--output", 701 | help="Filename to save simulated robot data") 702 | parser.add_argument("--visualizer", choices=['meshcat', 'gepetto']) 703 | parser.add_argument("--noise", type=float) 704 | args = parser.parse_args() 705 | robot = Robot() 706 | print(robot.model.inertias[2]) 707 | if args.visualizer: 708 | if args.input: 709 | data = load_data() 710 | 711 | # Playing the first recorded angle trajectory. 712 | q_trajectory = np.matrix(data['angle'][0]).T 713 | else: 714 | 715 | # Playing artificial angle trajectory. Each degree of freedom is 716 | # linearly increased from 0 to PI independently. 717 | nsteps = 1000 718 | linear = np.linspace(0, np.pi, nsteps) 719 | zeros = np.zeros(nsteps) 720 | q_trajectory = np.block([[linear, zeros, zeros], 721 | [zeros, linear, zeros], 722 | [zeros, zeros, linear]]) 723 | q_trajectory = np.matrix(q_trajectory) 724 | show_angle_trajectory(q_trajectory) 725 | if args.output: 726 | assert args.input 727 | data = load_data() 728 | nseq = data['angle'].shape[0] 729 | qs = [] 730 | qdots = [] 731 | taus = [] 732 | for sample_idx in range(nseq): 733 | print(sample_idx) 734 | q, qdot, _, tau = robot.simulate(dt=0.001, 735 | torque=data['torque'][sample_idx], 736 | initial_angle=data['angle'][sample_idx, 0], 737 | initial_velocity=data['velocity'][sample_idx, 0]) 738 | qs.append(np.expand_dims(q, 0)) 739 | qdots.append(np.expand_dims(qdot, 0)) 740 | taus.append(np.expand_dims(tau, 0)) 741 | 742 | qs = np.vstack(qs) 743 | qdots = np.vstack(qdots) 744 | taus = np.vstack(taus) 745 | if args.noise: 746 | qs = qs + args.noise * np.random.randn(*qs.shape) 747 | qdots = qdots + args.noise * np.random.randn(*qdots.shape) 748 | taus = taus + args.noise * np.random.randn(*taus.shape) 749 | save_simulated_data(qs, qdots, taus, args.output) 750 | 751 | # check_inertias() 752 | # test_sys_id_lmi() 753 | # test_sys_id_visually() 754 | # test_sys_id_simulated_torques() 755 | -------------------------------------------------------------------------------- /DL/plotting/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rr-learning/transferable_dynamics_dataset/3660a4bd7c6b011c8d02853bad3cce540708512c/DL/plotting/__init__.py -------------------------------------------------------------------------------- /DL/plotting/plots.py: -------------------------------------------------------------------------------- 1 | """ 2 | Box plotting of multiple error files. 3 | """ 4 | import argparse 5 | import itertools 6 | import os 7 | import matplotlib.patches as mpatches 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | from collections import defaultdict 11 | from DL.evaluation.evaluation import get_angle_errors, \ 12 | compute_RMSE_from_errors, get_evaluation_errors 13 | 14 | 15 | def box_violin_plot(error_files, 16 | method_names, 17 | violinplot, 18 | dataset_names): 19 | 20 | evaluation_errors = {} 21 | for method_name, error_file in zip(method_names, error_files): 22 | errors_dict = np.load(error_file) 23 | evaluation_errors[method_name] = {} 24 | for dataset_name, errors in errors_dict.items(): 25 | evaluation_errors[method_name][dataset_name] = \ 26 | get_evaluation_errors(errors) 27 | 28 | # normalize and remove delta 0 from plot ----------------------------------- 29 | for dataset_name in evaluation_errors['delta 0'].keys(): 30 | normalizer = np.mean(evaluation_errors['delta 0'][dataset_name]) 31 | for method_name in evaluation_errors.keys(): 32 | evaluation_errors[method_name][dataset_name] /= normalizer 33 | method_names.remove('delta 0') 34 | 35 | 36 | 37 | fig, axs = plt.subplots(1, len(dataset_names), 38 | sharey=True, figsize=(20,4)) 39 | 40 | for i, dataset_name in enumerate(dataset_names): 41 | ax = axs[i] 42 | ax.set_title(dataset_name) 43 | #ax.set_yscale("log") 44 | # ax.set_ylim(0,0.5) 45 | evaluation_errors_dataset = \ 46 | [evaluation_errors[method_name][dataset_name] 47 | for method_name in method_names] 48 | if violinplot: 49 | ret = ax.violinplot(evaluation_errors_dataset, 50 | showmeans=True, showmedians=True, 51 | showextrema=True) 52 | ret['cmeans'].set_color('r') 53 | ret['cmedians'].set_color('b') 54 | else: 55 | ret = ax.boxplot(evaluation_errors_dataset, 56 | showmeans=True) 57 | 58 | ax.set_xticks([y+1 for y in range(len(error_files))]) 59 | ax.set_xticklabels(method_names, rotation=45, fontsize=8) 60 | 61 | return fig 62 | 63 | def aggregated_plot(RMSEs, 64 | path_to_plots_folder=None, 65 | methods = None, 66 | prediction_horizons=[1, 10, 100, 1000], 67 | history_lengths=[1, 10], 68 | setups=["iid_test_data", "transfer_test_data_1", "validation_data"], 69 | weighted=True 70 | ): 71 | if methods is not None: 72 | RMSEs = RMSEs[RMSEs["method"].isin(methods)] 73 | 74 | color_dict = {"training_data": "m", 75 | "validation_data": "k", 76 | "transfer_test_data_1": "g", 77 | "transfer_test_data_2": "b", 78 | "transfer_test_data_3": "c", 79 | "iid_test_data": "r"} 80 | 81 | label_dict = {"training_data": "train", 82 | "validation_data": "validation", 83 | "transfer_test_data_1": "transfer 1", 84 | "transfer_test_data_2": "transfer 2", 85 | "transfer_test_data_3": "transfer 3", 86 | "iid_test_data": "iid"} 87 | 88 | marker_dict = {1: ".", 10: "s"} 89 | 90 | fig = plt.figure(figsize=(10*len(prediction_horizons), 5)) 91 | for i, prediction_horizon in enumerate(prediction_horizons): 92 | ax = fig.add_subplot(1, len(prediction_horizons), i+1) 93 | filtered = RMSEs[(RMSEs["prediction_horizon"] == prediction_horizon)] 94 | for history_length in history_lengths: 95 | for setup in setups: 96 | filtered_hist_setup = filtered[(filtered["history_length"]==history_length)&(filtered["setup"]==setup)] 97 | if weighted: 98 | delta_0 = float(filtered_hist_setup[filtered_hist_setup["method"]=="delta_0"]["RMSE"]) 99 | filtered_hist_setup["weighted"] = filtered_hist_setup["RMSE"]/delta_0 100 | ax.scatter(filtered_hist_setup["method"], filtered_hist_setup["weighted"], 101 | c=color_dict[setup], 102 | marker=marker_dict[history_length], 103 | label='Hist: {0}, Test: {1}'.format(history_length, 104 | label_dict[setup])) 105 | else: 106 | ax.scatter(filtered_hist_setup["method"], filtered_hist_setup["RMSE"], 107 | c=color_dict[setup], 108 | marker=marker_dict[history_length], 109 | label='Hist: {0}, Test: {1}'.format(history_length, 110 | label_dict[setup])) 111 | ax.set_title("Prediction_horizon: {}".format(prediction_horizon)) 112 | plt.legend() 113 | if path_to_plots_folder is not None: 114 | if weighted: 115 | fig.savefig(os.path.join(path_to_plots_folder, "weighted_RMSEs.pdf")) 116 | fig.savefig(os.path.join(path_to_plots_folder, "weighted_RMSEs.png")) 117 | else: 118 | fig.savefig(os.path.join(path_to_plots_folder, "RMSEs.pdf")) 119 | fig.savefig(os.path.join(path_to_plots_folder, "RMSEs.png")) 120 | 121 | def main(): 122 | parser = argparse.ArgumentParser(description=__doc__) 123 | parser.add_argument("--error_files", required=True, nargs='+', 124 | help="Filename of the error files to plot") 125 | parser.add_argument("--names", nargs='+', help="Names of the methods to" 126 | "display") 127 | parser.add_argument("--violinplot", action='store_true') 128 | args = parser.parse_args() 129 | box_violin_plot(args.error_files, args.names, args.violinplot) 130 | 131 | 132 | if __name__ == "__main__": 133 | main() 134 | -------------------------------------------------------------------------------- /DL/utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from DL.utils.data_loading import computeNumberOfTrainingPairs 3 | from DL.utils.data_loading import unrollTrainingData 4 | from DL.utils.data_loading import unrollTrainingDataStream 5 | from DL.utils.data_loading import loadRobotData 6 | from DL.utils.data_loading import concatenateActionsStates 7 | from DL.utils.data_loading import concatenateActionsStatesAverages 8 | from DL.utils.standardizer import Standardizer 9 | -------------------------------------------------------------------------------- /DL/utils/data_extractor.py: -------------------------------------------------------------------------------- 1 | """ Script to extract data splits (i.e., training, testing and validation) 2 | from a raw npz data file.""" 3 | 4 | import argparse 5 | import numpy as np 6 | from collections import defaultdict 7 | 8 | 9 | def split_into_train_val_test(data, nvalidation=3, ntesting=9, ntraining=38): 10 | training = {} 11 | validation = {} 12 | testing = {} 13 | for key in data.keys(): 14 | testing[key] = data[key][:ntesting, :, :] 15 | for key in data.keys(): 16 | validation[key] = data[key][ntesting:ntesting + nvalidation, :, :] 17 | for key in data.keys(): 18 | training[key] = data[key][ntesting + nvalidation:, :, :] 19 | assert training[key].shape[0] == ntraining 20 | return training, validation, testing 21 | 22 | def from_npz_to_dict(npz): 23 | ret = {} 24 | for k in npz.keys(): 25 | if k == "index": 26 | continue 27 | ret[k] = npz[k] 28 | return ret 29 | 30 | def discard_prefix(data, discard_prefix): 31 | """ Removes the prefix of each rollout.""" 32 | for key in data.keys(): 33 | data[key] = data[key][:, discard_prefix:, :] 34 | return data 35 | 36 | def extract_rollouts_from(source_dict, start, end): 37 | target_dict = {} 38 | for key in source_dict.keys(): 39 | target_dict[key] = source_dict[key][start:end, :, :] 40 | return target_dict 41 | 42 | def extract(args, full_data): 43 | """ Extracts dataset splits (training, validation, test, etc.) according 44 | to the way sines_full.npz and GPs_full.npz were recorded.""" 45 | full_data = discard_prefix(full_data, args.discard_prefix) 46 | training, validation, testiid = split_into_train_val_test( 47 | extract_rollouts_from(full_data, 0, 50)) 48 | testtransfer_datasets = [] 49 | for i in range(1, 4): 50 | _, _, transfertest = split_into_train_val_test( 51 | extract_rollouts_from(full_data, i * 50, (i + 1) * 50)) 52 | testtransfer_datasets.append(transfertest) 53 | return training, validation, testiid, testtransfer_datasets 54 | 55 | 56 | if __name__ == "__main__": 57 | parser = argparse.ArgumentParser(description=__doc__) 58 | parser.add_argument("--raw_data", help="filename of the raw data npz", 59 | default="sines_full.npz", required=True) 60 | parser.add_argument("--discard_prefix", type=int, 61 | default=1000, help="discard this many number of observations at" 62 | "the beginning of each rollout") 63 | 64 | # Output splits 65 | parser.add_argument("--training", help="Filename training set", 66 | default="sines_training.npz") 67 | parser.add_argument("--testiid", help="Filename iid test set", 68 | default="sines_test_iid.npz") 69 | parser.add_argument("--validation", help="Filename validation set", 70 | default="sines_validation.npz") 71 | parser.add_argument("--testtransfer", help="Filename transfer test sets", 72 | default="sines_test_transfer_{}.npz") 73 | 74 | args = parser.parse_args() 75 | 76 | raw_data = from_npz_to_dict(np.load(args.raw_data)) 77 | training, validation, testiid, testtransfer_sets = extract(args, raw_data) 78 | 79 | np.savez(args.training, **training) 80 | np.savez(args.validation, **validation) 81 | np.savez(args.testiid, **testiid) 82 | for i, dataset in enumerate(testtransfer_sets): 83 | np.savez(args.testtransfer.format(i + 1), **dataset) 84 | 85 | -------------------------------------------------------------------------------- /DL/utils/data_loading.py: -------------------------------------------------------------------------------- 1 | """ 2 | basic helper functions to load the data set and get it in the right format 3 | for different learning algorithms 4 | """ 5 | import os 6 | import pickle 7 | import numpy as np 8 | 9 | 10 | def loadRobotData(filename): 11 | """ 12 | Loads Robot data at the given filename and returns it as a tuple of 13 | observations and actions. 14 | 15 | Returns 16 | ---------- 17 | obs: array of shape nRollouts x nStepsPerRollout x nStates 18 | containing the state trajectories of all rollouts 19 | actions: array of shape nRollouts x nStepsPerRollout x nInputs 20 | containing the state trajectories of all rollouts 21 | """ 22 | data = np.load(filename) 23 | observations = np.concatenate((data['measured_angles'], data['measured_velocities'], 24 | data['measured_torques']), 2) 25 | actions = data['constrained_torques'] 26 | return observations, actions 27 | 28 | 29 | def concatenateActionsStates(history_actions, history_obs, future_actions): 30 | assert len(history_actions.shape) == 3 31 | assert len(history_obs.shape) == 3 32 | assert len(future_actions.shape) == 3 33 | assert history_actions.shape[:2] == history_obs.shape[:2] 34 | assert (history_actions.shape[0], history_actions.shape[2]) ==\ 35 | (future_actions.shape[0], future_actions.shape[2]) 36 | """ 37 | concatenates observations and actions to form a single (row) vector. This 38 | function is intended to standardize the order observations and actions are 39 | merged to form the dynamics input. Note that it can handle multiple 40 | sequences at the same time. 41 | 42 | Parameters 43 | ---------- 44 | 45 | history_actions: np array with shape nsequences x history len x action dim 46 | history_obs: np array with shape nsequences x history len x state dim 47 | future_actions: np array with shape nsequences x prediction horizon - 1 48 | x action dim. 49 | Returns 50 | ------- 51 | 52 | joint_states_actions: np array with shape nsequences x (history len x action dim 53 | + history len x state dim + (prediction horizon - 1) 54 | * action dim). 55 | 56 | """ 57 | joint_states_actions = [history_actions.reshape( 58 | (history_actions.shape[0], -1)), history_obs.reshape( 59 | (history_obs.shape[0], -1)), future_actions.reshape( 60 | (future_actions.shape[0], -1))] 61 | joint_states_actions = np.hstack(joint_states_actions) 62 | assert joint_states_actions.shape[0] == history_obs.shape[0] 63 | return joint_states_actions 64 | 65 | 66 | def concatenateActionsStatesAverages(history_actions, history_obs, future_actions): 67 | assert len(history_actions.shape) == 3 68 | assert len(history_obs.shape) == 3 69 | assert len(future_actions.shape) == 3 70 | assert history_actions.shape[:2] == history_obs.shape[:2] 71 | assert (history_actions.shape[0], history_actions.shape[2]) ==\ 72 | (future_actions.shape[0], future_actions.shape[2]) 73 | """ 74 | averages and concatenates observations and actions to form a single (row) vector. 75 | See also concatenateActionsStates() 76 | 77 | Returns 78 | ------- 79 | 80 | joint_states_actions: np array with shape 81 | nsequences x (input dim + state dim + (prediction horizon - 1) * input_dim) 82 | """ 83 | if future_actions.shape[1]>0: 84 | joint_states_actions = [np.mean(history_actions, axis=1), 85 | np.mean(history_obs, axis=1), 86 | np.mean(future_actions, axis=1)] 87 | else: 88 | joint_states_actions = [np.mean(history_actions, axis=1), 89 | np.mean(history_obs, axis=1)] 90 | joint_states_actions = np.hstack(joint_states_actions) 91 | assert joint_states_actions.shape[0] == history_obs.shape[0] 92 | return joint_states_actions 93 | 94 | 95 | def unrollTrainingData(obs_seqs, actions_seqs, history_len, prediction_horizon, 96 | difference_learning, average=False): 97 | """ 98 | Receives sequences of observations and actions and returns training targets 99 | and training inputs that will be used to learn the dynamics model. 100 | If average is True then the mean of the history and the mean of the future actions are used. 101 | 102 | Outputs 103 | ------- 104 | targets: np.array of shape training_instances x state dim 105 | 106 | inputs: np-array of shape traininig_instances x input_dimension 107 | Note that input_dimension = (action dim+state dim)*history_len + 108 | (prediction_horizon - 1) x action dim 109 | """ 110 | assert obs_seqs.shape[:2] == actions_seqs.shape[:2] 111 | inputs = [] 112 | targets = [] 113 | nrollouts, length, nstates = obs_seqs.shape 114 | for offset in range(history_len, length - prediction_horizon + 1): 115 | hist_obs = obs_seqs[:, offset - history_len:offset, :] 116 | hist_act = actions_seqs[:, offset - history_len:offset, :] 117 | future_act = actions_seqs[:,offset: offset + prediction_horizon - 1, :] 118 | output_obs = obs_seqs[:,offset + prediction_horizon - 1, :] 119 | if average: 120 | current_input = concatenateActionsStatesAverages(hist_act, hist_obs, future_act) 121 | else: 122 | current_input = concatenateActionsStates(hist_act, hist_obs, future_act) 123 | current_target = output_obs 124 | if difference_learning: 125 | current_target = current_target.copy() - hist_obs[:, -1, :] 126 | inputs.append(current_input) 127 | targets.append(current_target) 128 | return np.vstack(targets), np.vstack(inputs) 129 | 130 | 131 | def unrollTrainingDataStream(obs_seqs, actions_seqs, history_len, 132 | prediction_horizon, difference_learning, average=False, shuffle=True, 133 | infinite=False): 134 | """ 135 | Generator function that receives sequences of observations and actions and 136 | yields training pairs (target, input). Notice that the order of the pairs 137 | is shuffled by default. Moreover, the data iteration restarts from the 138 | beginning once the training pairs are exhausted if infinite=True (default). 139 | 140 | Outputs 141 | ------- 142 | 143 | target: np array of size state dim. 144 | 145 | inputs: np array of size history_len * (action dim + state dim) + 146 | (prediction_horizon - 1) * action dim. 147 | """ 148 | assert obs_seqs.shape[:2] == actions_seqs.shape[:2] 149 | nrollouts = obs_seqs.shape[0] 150 | ninstances = computeNumberOfTrainingPairs(obs_seqs, history_len, 151 | prediction_horizon) 152 | order = range(ninstances) 153 | while True: 154 | if shuffle: 155 | order = np.random.permutation(ninstances) 156 | for index in order: 157 | seq_id = index % nrollouts 158 | offset = index // nrollouts + history_len 159 | hist_obs = obs_seqs[seq_id, offset - history_len:offset, :] 160 | hist_act = actions_seqs[seq_id, offset - history_len:offset, :] 161 | future_act = actions_seqs[seq_id, 162 | offset: offset + prediction_horizon - 1, :] 163 | output_obs = obs_seqs[seq_id, offset + prediction_horizon - 1, :] 164 | if average: 165 | current_input = concatenateActionsStatesAverages( 166 | hist_act[np.newaxis, :, :], hist_obs[np.newaxis, :, :], 167 | future_act[np.newaxis, :, :]) 168 | else: 169 | current_input = concatenateActionsStates( 170 | hist_act[np.newaxis, :, :], hist_obs[np.newaxis, :, :], 171 | future_act[np.newaxis, :, :]) 172 | current_target = output_obs 173 | if difference_learning: 174 | current_target = current_target.copy() - hist_obs[-1, :] 175 | yield (current_target.flatten(), current_input.flatten()) 176 | if not infinite: 177 | break 178 | 179 | def computeNumberOfTrainingPairs(obs_seqs, history_len, prediction_horizon): 180 | """ 181 | Computes the number of different training pairs (target, input) for given 182 | sequences of observations and actions. Note that it also depends on the 183 | history length and prediction horizon. 184 | """ 185 | nrollouts, length, _ = obs_seqs.shape 186 | valid_range_len = len(range(history_len, length - prediction_horizon + 1)) 187 | ninstances = valid_range_len * nrollouts 188 | return ninstances 189 | -------------------------------------------------------------------------------- /DL/utils/data_splitting.py: -------------------------------------------------------------------------------- 1 | """ 2 | Training/validation data splitters. 3 | """ 4 | import numpy as np 5 | from DL.utils.data_loading import loadRobotData 6 | 7 | 8 | class DataSplitterInterface(object): 9 | 10 | def get_training_data(self): 11 | """ 12 | Returns two sets of sequences as training data. 13 | 14 | Outputs 15 | ------- 16 | training_observation_sequences: np-array of shape: 17 | nSequences x nStepsPerRollout x nStates 18 | 19 | training_action_sequences: np-array of shape: 20 | nSequences x nStepsPerRollout x nInputs 21 | """ 22 | raise NotImplementedError 23 | 24 | def get_test_data(self): 25 | """ 26 | Returns two sets of sequences as testing data. 27 | 28 | Outputs 29 | ------- 30 | testing_observation_sequences: np-array of shape: 31 | nSequences x nStepsPerRollout x nStates 32 | 33 | testing_action_sequences: np-array of shape: 34 | nSequences x nStepsPerRollout x nInputs 35 | """ 36 | raise NotImplementedError 37 | 38 | 39 | class CompleteRolloutsDataSplitter(DataSplitterInterface): 40 | 41 | def __init__(self, data_filename, test_rollout_indexes): 42 | self.observations, self.actions = loadRobotData(data_filename) 43 | self.test_rollouts = np.unique(test_rollout_indexes) 44 | nrollouts = self.observations.shape[0] 45 | self.train_rollouts = np.setdiff1d(np.arange(nrollouts), 46 | self.test_rollouts) 47 | assert self.test_rollouts.shape == test_rollout_indexes.shape,\ 48 | "There are repeated numbers in the provided array." 49 | assert self.train_rollouts.size + self.test_rollouts.size == nrollouts 50 | 51 | def get_training_data(self): 52 | return self.observations[self.train_rollouts],\ 53 | self.actions[self.train_rollouts] 54 | 55 | def get_test_data(self): 56 | return self.observations[self.test_rollouts],\ 57 | self.actions[self.test_rollouts] 58 | 59 | -------------------------------------------------------------------------------- /DL/utils/plot_utils.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import re 4 | import os 5 | from DL.evaluation.evaluation import get_angle_errors, compute_RMSE_from_errors 6 | import re 7 | 8 | 9 | def get_number_of_parameters(l, w, prediction_horizon=100, history_length=1): 10 | action_dimension = 3 11 | observation_dimension = 9 12 | input = history_length * (observation_dimension + action_dimension) + (prediction_horizon - 1) * action_dimension 13 | output = observation_dimension 14 | return input*w + w*w*(l-1) + w*output + l*w + output 15 | 16 | 17 | def get_path_to_run(num_layers, num_units, lr, reg, path_to_ho="/agbs/dynlearning/Errors from HO/prediction_horizon_100_history_length_1_epochs_40/"): 18 | jobs_info = pd.read_csv(os.path.join(path_to_ho, "job_info.csv")) 19 | jobs_info["model_train_params"] 20 | run_id = None 21 | for arch, train, id in zip(jobs_info["model_arch_params"], jobs_info["model_train_params"], jobs_info["id"]): 22 | arch = eval(arch) 23 | train = eval(train) 24 | if arch["num_layers"] == num_layers and arch["num_units"] == num_units and train["learning_rate"] == lr and train["l2_reg"] == reg: 25 | run_id = id 26 | if run_id is not None: 27 | return os.path.join(path_to_ho, "{}_".format(run_id), "errors.npz") 28 | else: 29 | print("No such run in given folder") 30 | 31 | 32 | def get_diego_index(prediction_horizon, 33 | history_length, 34 | averaging): 35 | prediction_horizons = [1, 10, 100, 1000] 36 | history_lengths = [1, 10] 37 | 38 | count = 0 39 | for current_prediction_horizon in prediction_horizons: 40 | for current_history_length in history_lengths: 41 | for current_averaging in [True, False]: 42 | if prediction_horizon == current_prediction_horizon and \ 43 | history_length == current_history_length and \ 44 | averaging == current_averaging: 45 | return count 46 | 47 | count += 1 48 | return np.nan 49 | 50 | def path_to_error_file(method_name, 51 | experiment_name, 52 | prediction_horizon, 53 | history_length): 54 | if experiment_name == 'sine_pd': 55 | path_to_results = "/agbs/dynlearning/Errors/new_datasets/SinePD/" 56 | elif experiment_name == 'sim': 57 | path_to_results = "/agbs/dynlearning/Errors/simulated_data" 58 | elif experiment_name == 'sim_noisy': 59 | path_to_results = "/agbs/dynlearning/Errors/simulated_noisy_data" 60 | else: 61 | raise NotImplementedError 62 | 63 | if method_name == 'avg-NN': 64 | error_file_name = "NN/averaging_prediction_horizon_{}_history_length" \ 65 | "_{}_epochs_40/errors.npz".format(prediction_horizon, 66 | history_length) 67 | elif method_name == 'NN': 68 | error_file_name = "NN/prediction_horizon_{}_history_length" \ 69 | "_{}_epochs_40/errors.npz".format(prediction_horizon, 70 | history_length) 71 | elif method_name == 'avg-EQL': 72 | error_file_name = "EQL/averaging_prediction_horizon_{}_history_length" \ 73 | "_{}_epochs_20/errors.npz".format(prediction_horizon, 74 | history_length) 75 | elif method_name == 'delta 0': 76 | error_file_name = 'delta_0/errors_{}_delta_0_{:03d}.npz'.format( 77 | experiment_name, 78 | get_diego_index(prediction_horizon=prediction_horizon, 79 | history_length=history_length, 80 | averaging=False)) 81 | elif method_name == 'svgpr': 82 | error_file_name = 'svgpr/errors_{}_svgpr_{:03d}.npz'.format( 83 | experiment_name, 84 | get_diego_index(prediction_horizon=prediction_horizon, 85 | history_length=history_length, 86 | averaging=False)) 87 | elif method_name == 'avg-svgpr': 88 | error_file_name = 'svgpr/errors_{}_svgpr_{:03d}.npz'.format( 89 | experiment_name, 90 | get_diego_index(prediction_horizon=prediction_horizon, 91 | history_length=history_length, 92 | averaging=True)) 93 | elif method_name == 'linear': 94 | error_file_name = 'linear_model_learning_rate_0.0001/errors_{}' \ 95 | '_linear_model_{:03d}.npz'.format(experiment_name, 96 | get_diego_index(prediction_horizon=prediction_horizon, 97 | history_length=history_length, 98 | averaging=False)) 99 | elif method_name == 'avg-linear': 100 | error_file_name = 'linear_model_learning_rate_0.0001/errors_{}' \ 101 | '_linear_model_{:03d}.npz'.format(experiment_name, 102 | get_diego_index(prediction_horizon=prediction_horizon, 103 | history_length=history_length, 104 | averaging=True)) 105 | elif method_name in ['system_id_cad', 'system_id_ls', 'system_id_ls_lmi']: 106 | error_file_name = '{0}/errors_{2}_{0}_{1:03d}.npz'.format( 107 | method_name, int(np.log10(prediction_horizon)), experiment_name) 108 | elif bool(re.compile("NN_lr_0.0001_reg_0.0001_l_[0-9]_w_[0-9]+").match(method_name)): 109 | pattern2 = re.compile("[-+]?[.]?[\d]+(?:,\d\d\d)*[\.]?\d*(?:[eE][-+]?\d+)?") 110 | (lr, reg, num_layers, num_units) = [float(name) for name in pattern2.findall(method_name)] 111 | ho_path = "/agbs/dynlearning/Errors from HO/prediction_horizon_{0}_history_length_{1}_epochs_40/".format(prediction_horizon, history_length) 112 | return get_path_to_run(num_layers, num_units, lr, reg, path_to_ho=ho_path) 113 | else: 114 | print(method_name) 115 | assert (False) 116 | return os.path.join(path_to_results, error_file_name) 117 | 118 | 119 | def aggregate_RMSE(experiment_name, 120 | methods, 121 | prediction_horizons=[1, 10, 100, 1000], 122 | history_lengths=[1, 10]): 123 | error_means = pd.DataFrame(columns=["method", "prediction_horizon", "history_length", "setup", "RMSE"]) 124 | for prediction_horizon in prediction_horizons: 125 | for history_length in history_lengths: 126 | for method in methods: 127 | address = path_to_error_file(method, 128 | experiment_name, 129 | prediction_horizon, 130 | history_length) 131 | errors_dict = np.load(address) 132 | for setup, errors in errors_dict.items(): 133 | np_errors = get_angle_errors(errors) 134 | mean_error = compute_RMSE_from_errors(np_errors) 135 | mean = pd.DataFrame({"method": [method], "prediction_horizon":prediction_horizon, "history_length":[history_length], "setup":[setup], "RMSE": [mean_error]}) 136 | error_means = error_means.append(mean, ignore_index = True) 137 | print("Prediction Horizon: {}, History length: {}".format(prediction_horizon, history_length)) 138 | return error_means 139 | -------------------------------------------------------------------------------- /DL/utils/standardizer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | class Standardizer: 6 | 7 | def __init__(self, data=None): 8 | """ 9 | initializes the standardizer. The standardizer will standardize 10 | vectors of dimension "dataDimension" component-wise 11 | 12 | Parameters 13 | ---------- 14 | data: nPoints x dataDimension 15 | training data to obtain empirical variance and mean 16 | """ 17 | if data is not None: 18 | self.updateParameters(data) 19 | 20 | def updateParameters(self, data): 21 | n = 0 22 | accum = None 23 | squared = None 24 | for vector in data: 25 | if accum is None: 26 | accum = vector.copy() 27 | squared = vector * vector 28 | else: 29 | accum += vector 30 | squared += vector * vector 31 | n += 1 32 | self.means = accum / n 33 | self.stds = np.sqrt(squared / n - self.means * self.means) 34 | 35 | def standardize(self, dataVector): 36 | """ 37 | standardizes a vector of dimension "dataDimension" component wise using 38 | the empirical mean and std of this Standardizer instance. 39 | 40 | dataVector can be either a vector or an array of shape 41 | nDataPoints x dataDimension 42 | """ 43 | if dataVector.ndim == 1: 44 | return (dataVector - self.means) / self.stds 45 | elif dataVector.ndim == 2: 46 | return (dataVector - self.means) / self.stds 47 | raise Exception("Wrong input format") 48 | 49 | def unstandardize(self, dataVector): 50 | """ 51 | retransforms a standardized vector of dimension "dataDimension" component 52 | wise using the empirical mean and std of this Standardizer instance. 53 | 54 | dataVector can be either a vector or an array of shape 55 | nDataPoints x dataDimension 56 | """ 57 | if dataVector.ndim == 1: 58 | return (dataVector*self.stds) + self.means 59 | elif dataVector.ndim == 2: 60 | return (dataVector*self.stds) + self.means 61 | raise Exception("Wrong input format") 62 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 rr-learning 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | 8 | [packages] 9 | numpy = "*" 10 | Keras = "*" 11 | tensorflow = "*" 12 | eql = {editable = true,git = "https://github.com/martius-lab/EQL_Tensorflow.git", ref = "EQL0"} 13 | l0-regularization = {git = "https://github.com/martius-lab/L0-regularization.git", editable = true} 14 | ipdb = "*" 15 | matplotlib = "*" 16 | pandas = "*" 17 | gitpython = "*" 18 | mujoco-py = "==2.0.2.0" 19 | gym = "==0.12.0" 20 | pipenv = "*" 21 | graphviz = "==0.10.1" 22 | gpflow = "*" 23 | pilco = {git = "https://github.com/DiegoAE/PILCO.git",editable = true} 24 | sklearn = "*" 25 | jupyter = "*" 26 | torch = "*" 27 | lwpr = {git = "https://github.com/DiegoAE/lwpr.git",editable = true} 28 | 29 | [requires] 30 | python_version = "3.6" 31 | 32 | [scripts] 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Transferable Dynamics Learning 2 | 3 | Benchmarking of different dynamics learning methods under changes in the distribution over controls. In this repository we provide the evaluation code and links to the corresponding evaluation data sets. 4 | 5 | ## Robotic setup 6 | 7 | The data was recorded on a 3-DOF torque-controlled real-robotic system. Please check the [Open Dynamic Robot Initiative](https://open-dynamic-robot-initiative.github.io/) for the details about the robotic platform. 8 | 9 | 10 | 11 | ## Robotic dataset 12 | 13 | All the dataset files can be downloaded from [here](https://edmond.mpdl.mpg.de/dataset.xhtml?persistentId=doi:10.17617/3.ZT6K7P) or alternatively from [here](https://owncloud.tuebingen.mpg.de/index.php/s/3THSfyBgFrYykPc?path=%2F) 14 | 15 | We use the aforementioned robotic platform to record different datasets under substantially different conditions in order to assess the predictive performance of dynamics learning algorithms beyond the iid. setting. In this study such conditions are in turn governed by the particular controllers used to generate the control inputs feed into the system. We consider two types of controllers depending on whether there is a feedback control loop or not. 16 | 17 | ### Closed-loop dataset - Sine waves PD control 18 | 19 | This dataset was generated using a superposition of sine waves to generate trajectories that were subsequently tracked by the robot using PD position control (Please refer to the paper for details). Check out some sampled robot movements under the 4 different families of controllers considered in the closed-loop dataset: 20 | 21 | 22 | 23 | 24 | The shown movements are arranged according to the following diagram where each of the datasets *D* account for a particular configuration of sine angular frenquencies (low, high) and reachable task space (left, full). The arrows denote the different transfer settings that are discussed in our paper. 25 | 26 |

27 | 28 |

29 | 30 | ### Open-loop dataset - GP torque controls 31 | 32 | We used sampled trajectories from a Gaussian process (GP) directly as torque inputs (controls) to record this dataset in an open loop fashion. Please refer to the paper for details. 33 | 34 | ### Dataset structure and naming convention. 35 | 36 | The released files of the closed-loop dataset are named according to the following convention: 37 | * `Sines_full.npz` 38 | * `Sines_training.npz` 39 | * `Sines_validation.npz` 40 | * `Sines_test_iid.npz` 41 | * `Sines_test_transfer_%d.npz` 42 | 43 | with the different test transfer files being indexed starting from 1 (e.g., `Sines_test_transfer_1.npz`). Note that each `_full.npz` files includes all recorded rollouts of its corresponding category. The rest of the files can be obtained from it as follows: 44 | 45 | ``` 46 | python -m DL.utils.data_extractor --raw_data Sines_full.npz --training Sines_training.npz --testiid Sines_test_iid.npz --validation Sines_validation.npz --testtransfer Sines_test_transfer_{}.npz 47 | ``` 48 | 49 | The open-loop dataset files are equally named, except that the prefix `GPs` is used instead of `Sines`. 50 | 51 | Each of the released `npz` files can be indexed by the following keywords which account for different recorded variables: 52 | * `measured_angles` 53 | * `measured_velocities` 54 | * `measured_torques` 55 | * `constrained_torques` 56 | * `desired_torques` 57 | 58 | Each of these keys is associated with a numpy array of shape `(S, T, D)`, where `S` denotes the number of sequences/rollouts, and `T`, `D` are the sequence length and number of degrees of freedom, respectively. We recorded at a frequency of 1 Hz, meaning that the time elapsed between consecutive observations is 0.001 seconds. We recorded using a 3-DOF finger robot (`D=3`), and all rollouts have a total duration of 14s (`T=14000`). 59 | 60 | ### Simulated closed-loop dataset 61 | 62 | We also provide a simulated version of the closed-loop dataset. We keep the naming convention consistent with the real datasets but use the prefix `Sim_Sines` instead. 63 | 64 | ## Evaluation. 65 | 66 | Our evaluation protocol consists of two stages. First, we compute the error vectors of a particular method over different datasets and save them in a single `.npz` file. After we compute the error files corresponding to all benchmarked methods we aggregate the results in different plots as shown in the paper, which we refer to for further details. 67 | 68 | ### Computing error vectors 69 | 70 | As an example the following command computes the aforementioned error file for a linear model trained with SGD: 71 | 72 | ``` 73 | python -m DL.evaluation.evaluation \ 74 | --method linear_model_sgd \ 75 | --training_data Sines_training.npz \ 76 | --validation_data Sines_validation.npz \ 77 | --iid_test_data Sines_test_iid.npz \ 78 | --transfer_test_data Sines_test_transfer_1.npz Sines_test_transfer_2.npz Sines_test_transfer_3.npz \ 79 | --prediction_horizon 1 \ 80 | --history_length 1 \ 81 | --no-averaging \ 82 | --streaming \ 83 | --verbose \ 84 | --output_errors errors.npz 85 | ``` 86 | 87 | Among the implemented methods we have Gaussian Processes (`SVGPR`), System Identification methods (`system_id`), Neural networks `NN`, etc. 88 | 89 | ### Dependencies 90 | 91 | All dependencies are tracked using [Pipenv](https://github.com/pypa/pipenv). In order to reproduce our Python environment with all dependencies type the following command in this project's directory (transferable_dynamics_dataset): 92 | 93 | ``` 94 | pipenv install 95 | ``` 96 | 97 | After this the virtual environment can be activated by typing: 98 | 99 | ``` 100 | pipenv shell 101 | ``` 102 | 103 | ## Paper & Reference 104 | 105 | [Preprint.](https://www.is.mpg.de/uploads_file/attachment/attachment/589/ICRA20_1157_FI.pdf) 106 | 107 | 108 | In order to cite us please do so according to: 109 | ``` 110 | @conference{AgudeloEspanaetal20, 111 | title = {A Real-Robot Dataset for Assessing Transferability of Learned Dynamics Models }, 112 | author = {Agudelo-España, D. and Zadaianchuk, A. and Wenk, P. and Garg, A. and Akpo, J. and Grimminger, F. and Viereck, J. and Naveau, M. and Righetti, L. and Martius, G. and Krause, A. and Sch{\"o}lkopf, B. and Bauer, S. and W{\"u}thrich, M.}, 113 | booktitle = {IEEE International Conference on Robotics and Automation (ICRA)}, 114 | year = {2020} 115 | } 116 | ``` 117 | -------------------------------------------------------------------------------- /Settings/eql_prediction_horizon_1_history_length_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "averaging": true, 3 | "history_length": 1, 4 | "id": 88, 5 | "iid_test_data": "./Dataset/dataset_v06_sines_test_iid.npz", 6 | "mode": "train", 7 | "model_arch_params": { 8 | "layer_width": 10, 9 | "num_h_layers": 2 10 | }, 11 | "model_dir": "./Results/EQL", 12 | "model_train_params": { 13 | "L0_beta": 0.66, 14 | "batch_size": 20, 15 | "beta1": 0.4, 16 | "epochs_first_reg": 20, 17 | "epochs_per_reg": 20, 18 | "evaluate_every": 20, 19 | "learning_rate": 0.001, 20 | "reg_scales": [ 21 | 1e-10, 22 | 0.000207 23 | ], 24 | "test_div_threshold": 0.0001, 25 | "train_val_split": 1.0 26 | }, 27 | "optional_params": { 28 | "kill_tensorboard_summaries_and_checkpoints": true 29 | }, 30 | "prediction_horizon": 1, 31 | "streaming": false, 32 | "train_data": "./Dataset/dataset_v06_sines_training.npz", 33 | "training_data": "./Dataset/dataset_v01.npz", 34 | "transfer_test_data_1": "./Dataset/dataset_v06_sines_test_transfer_1.npz", 35 | "transfer_test_data_2": "./Dataset/dataset_v06_sines_test_transfer_2.npz", 36 | "transfer_test_data_3": "./Dataset/dataset_v06_sines_test_transfer_3.npz", 37 | "validation_data": "./Dataset/dataset_v06_sines_validation.npz" 38 | } -------------------------------------------------------------------------------- /Settings/nn_prediction_horizon_1_history_length_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "averaging": true, 3 | "history_length": 1, 4 | "id": 26, 5 | "iid_test_data": "./Dataset/dataset_v06_sines_test_iid.npz", 6 | "mode": "train", 7 | "model_arch_params": { 8 | "activation": "relu", 9 | "num_layers": 2, 10 | "num_units": 256 11 | }, 12 | "model_dir": "./Results/NN", 13 | "model_train_params": { 14 | "batch_size": 512, 15 | "epochs": 40, 16 | "l2_reg": 0.01, 17 | "learning_rate": 0.0001, 18 | "loss": "mse", 19 | "optimizer": "Adam", 20 | "validation_split": 0.0 21 | }, 22 | "prediction_horizon": 1, 23 | "streaming": false, 24 | "training_data": "./Dataset/dataset_v06_sines_training.npz", 25 | "transfer_test_data_1": "./Dataset/dataset_v06_sines_test_transfer_1.npz", 26 | "transfer_test_data_2": "./Dataset/dataset_v06_sines_test_transfer_2.npz", 27 | "transfer_test_data_3": "./Dataset/dataset_v06_sines_test_transfer_3.npz", 28 | "validation_data": "./Dataset/dataset_v06_sines_validation.npz" 29 | } -------------------------------------------------------------------------------- /img/1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rr-learning/transferable_dynamics_dataset/3660a4bd7c6b011c8d02853bad3cce540708512c/img/1.gif -------------------------------------------------------------------------------- /img/16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rr-learning/transferable_dynamics_dataset/3660a4bd7c6b011c8d02853bad3cce540708512c/img/16.png -------------------------------------------------------------------------------- /img/2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rr-learning/transferable_dynamics_dataset/3660a4bd7c6b011c8d02853bad3cce540708512c/img/2.gif -------------------------------------------------------------------------------- /img/3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rr-learning/transferable_dynamics_dataset/3660a4bd7c6b011c8d02853bad3cce540708512c/img/3.gif -------------------------------------------------------------------------------- /img/4.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rr-learning/transferable_dynamics_dataset/3660a4bd7c6b011c8d02853bad3cce540708512c/img/4.gif -------------------------------------------------------------------------------- /img/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rr-learning/transferable_dynamics_dataset/3660a4bd7c6b011c8d02853bad3cce540708512c/img/5.png -------------------------------------------------------------------------------- /img/datasets_closed_loop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rr-learning/transferable_dynamics_dataset/3660a4bd7c6b011c8d02853bad3cce540708512c/img/datasets_closed_loop.png -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r", encoding="utf-8") as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name="transferable-dynamics-learning", 8 | version="1.0.0", 9 | author="Diego Agudelo", 10 | author_email="dagudelo@tuebingen.mpg.de", 11 | description="A benchmark for assessing transferability of " 12 | "dynamics learning algorithms", 13 | long_description=long_description, 14 | long_description_content_type="text/markdown", 15 | url="https://github.com/rr-learning/transferable_dynamics_dataset", 16 | packages=setuptools.find_packages(include=['DL', 'DL.*']), 17 | entry_points={ 18 | 'console_scripts': [ 19 | 'compute_errors=DL.evaluation.evaluation:main', 20 | 'plot_errors=DL.plotting.plots:main', 21 | ], 22 | }, 23 | install_requires=[ 24 | 'numpy', 25 | 'matplotlib', 26 | 'scikit-learn', 27 | ], 28 | python_requires='>=3.6', 29 | ) 30 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | -------------------------------------------------------------------------------- /tests/fake_data_test_case.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import unittest 3 | from tempfile import TemporaryFile 4 | 5 | from DL.utils.data_loading import loadRobotData 6 | 7 | 8 | # TODO: close the fake data file at the end. 9 | class FakeDataTestCase(unittest.TestCase): 10 | 11 | def setUp(self): 12 | self.fake_data_npzfile = TemporaryFile() 13 | data_keys = ['measured_angles', 'measured_velocities', 'measured_torques', 14 | 'constrained_torques'] 15 | fake_data_dict = {} 16 | nseq = 5 17 | seq_length = 1000 18 | for k in data_keys: 19 | fake_data_dict[k] = np.random.rand(nseq, seq_length, 3) 20 | np.savez(self.fake_data_npzfile, **fake_data_dict) 21 | 22 | # File resets needed for subsequent reading. 23 | _ = self.fake_data_npzfile.seek(0) 24 | self.observations, self.actions = loadRobotData(self.fake_data_npzfile) 25 | _ = self.fake_data_npzfile.seek(0) 26 | -------------------------------------------------------------------------------- /tests/test_data_extractor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | Testing the extraction of data splits from a npz raw data file. 5 | """ 6 | 7 | import numpy as np 8 | import os 9 | import unittest 10 | 11 | from DL.utils.data_extractor import extract 12 | 13 | 14 | class Bunch(object): 15 | 16 | def __init__(self, **kwargs): 17 | self.__dict__.update(kwargs) 18 | 19 | 20 | class TestDataLoading(unittest.TestCase): 21 | 22 | def get_fake_data(self): 23 | keys = ['measured_velocities', 'constrained_torques', 24 | 'measured_torques', 'measured_angles', 'desired_torques'] 25 | nrollouts = 200 26 | nlen = 15000 27 | ndof = 3 28 | data = {} 29 | for k in keys: 30 | data[k] = np.arange(nrollouts * nlen * ndof).reshape( 31 | (nrollouts, nlen, ndof)) 32 | return data 33 | 34 | def setUp(self): 35 | self.full_data = self.get_fake_data() 36 | self.arguments = Bunch(discard_prefix=1000, takeoutrollouts_iid=9, 37 | takeoutrollouts_validation=3) 38 | 39 | def helper_shape_assert(self, data, shape): 40 | for k in data.keys(): 41 | self.assertEqual(data[k].shape, shape) 42 | 43 | def helper_flatten_data(self, data): 44 | array = [] 45 | for k in data.keys(): 46 | array.append(data[k].flatten()) 47 | return np.concatenate(array) 48 | 49 | def test_extract(self): 50 | training, validation, testiid, testtransfer_sets = extract( 51 | self.arguments, self.full_data) 52 | self.assertEqual(training.keys(), self.full_data.keys()) 53 | self.assertEqual(validation.keys(), self.full_data.keys()) 54 | self.assertEqual(testiid.keys(), self.full_data.keys()) 55 | self.helper_shape_assert(training, (38, 14000, 3)) 56 | self.helper_shape_assert(testiid, (9, 14000, 3)) 57 | self.helper_shape_assert(validation, (3, 14000, 3)) 58 | for dataset in testtransfer_sets: 59 | self.assertEqual(dataset.keys(), self.full_data.keys()) 60 | self.helper_shape_assert(dataset, (9, 14000, 3)) 61 | a = self.helper_flatten_data(training) 62 | b = self.helper_flatten_data(testiid) 63 | c = self.helper_flatten_data(validation) 64 | intersections = np.concatenate([np.intersect1d(a, b), 65 | np.intersect1d(b, c), np.intersect1d(a, c)]) 66 | self.assertEqual(np.size(intersections), 0) 67 | intersections = [] 68 | for dataset in testtransfer_sets: 69 | intersections.append(np.intersect1d(a, 70 | self.helper_flatten_data(dataset))) 71 | intersections = np.concatenate(intersections) 72 | self.assertEqual(np.size(intersections), 0) 73 | 74 | 75 | if __name__ == '__main__': 76 | unittest.main() 77 | -------------------------------------------------------------------------------- /tests/test_data_loading.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | Testing basic functionalities of DL.utils.dataLoading.py 5 | """ 6 | 7 | import unittest 8 | 9 | import numpy as np 10 | 11 | from DL.utils.data_loading import loadRobotData, unrollTrainingData 12 | from DL.utils.data_loading import unrollTrainingDataStream, computeNumberOfTrainingPairs 13 | from tests.fake_data_test_case import FakeDataTestCase 14 | 15 | 16 | class TestDataLoading(FakeDataTestCase): 17 | 18 | def test_unrolling0(self): 19 | hist_len = 1 20 | pred_horizon = 1 21 | targets, inputs = unrollTrainingData(self.observations, self.actions, 22 | hist_len, pred_horizon, False) 23 | ntargets, dimtargets = targets.shape 24 | ninputs, diminputs = inputs.shape 25 | self.assertEqual(ntargets, ninputs) 26 | self.assertEqual(dimtargets, 9) 27 | self.assertEqual(diminputs, 12) 28 | 29 | def test_unrolling1(self): 30 | hist_len = 2 31 | pred_horizon = 2 32 | targets, inputs = unrollTrainingData(self.observations, self.actions, 33 | hist_len, pred_horizon, False) 34 | ntargets, dimtargets = targets.shape 35 | ninputs, diminputs = inputs.shape 36 | self.assertEqual(ntargets, ninputs) 37 | self.assertEqual(dimtargets, 9) 38 | self.assertEqual(diminputs, 27) 39 | 40 | def print_unrolling(self, difference_learning): 41 | print("Unrolling test with difference learning {}".format( 42 | difference_learning)) 43 | testObs = np.arange(2*3*4).reshape([2,3,4]) 44 | testActions = np.arange(2*3*4).reshape([2,3,4])+100 45 | hist_len, pred_horizon = (1, 1) 46 | old_testObs, old_testActions = testObs.copy(), testActions.copy() 47 | targets, inputs = unrollTrainingData(testObs, testActions, hist_len, 48 | pred_horizon, difference_learning) 49 | self.assertTrue(np.array_equal((old_testObs, old_testActions), 50 | (testObs, testActions))) 51 | print("test observations") 52 | print(testObs) 53 | print("test actions") 54 | print(testActions) 55 | print("first observation sequence") 56 | print(testObs[0, :, :]) 57 | print("first action sequence") 58 | print(testActions[0, :, :]) 59 | print("inputs") 60 | print(inputs) 61 | print("targets") 62 | print(targets) 63 | 64 | def test_unrolling2(self): 65 | self.print_unrolling(False) 66 | self.print_unrolling(True) 67 | 68 | def test_unroll_minibatching(self): 69 | hist_len = 1 70 | pred_horizon = 1 71 | for average in (False, True): 72 | targets, inputs = unrollTrainingData(self.observations, 73 | self.actions, hist_len, pred_horizon, False, 74 | average=average) 75 | minibatch_generator = unrollTrainingDataStream(self.observations, 76 | self.actions, hist_len, pred_horizon, False, 77 | average=average, shuffle=False, infinite=False) 78 | joint_inputs = [] 79 | joint_targets = [] 80 | for minibatch_targets, minibatch_inputs in minibatch_generator: 81 | joint_inputs.append(minibatch_inputs) 82 | joint_targets.append(minibatch_targets) 83 | self.assertTrue(np.array_equal(targets, np.vstack(joint_targets))) 84 | self.assertTrue(np.array_equal(inputs, np.vstack(joint_inputs))) 85 | 86 | minibatch_generator = unrollTrainingDataStream(self.observations, 87 | self.actions, hist_len, pred_horizon, False, 88 | average=average, shuffle=True, infinite=False) 89 | joint_inputs = [] 90 | joint_targets = [] 91 | for minibatch_targets, minibatch_inputs in minibatch_generator: 92 | joint_inputs.append(minibatch_inputs) 93 | joint_targets.append(minibatch_targets) 94 | joint_targets = np.vstack(joint_targets) 95 | joint_inputs = np.vstack(joint_inputs) 96 | self.assertEqual(joint_targets.shape, targets.shape) 97 | self.assertEqual(joint_inputs.shape, inputs.shape) 98 | self.assertFalse(np.array_equal(targets, joint_targets)) 99 | self.assertFalse(np.array_equal(inputs, joint_inputs)) 100 | self.assertTrue(np.array_equal( 101 | np.sort(joint_targets.flatten()), np.sort(targets.flatten()))) 102 | self.assertTrue(np.array_equal( 103 | np.sort(joint_inputs.flatten()), np.sort(inputs.flatten()))) 104 | 105 | def test_dataset_size(self): 106 | history_len, prediction_horizon = (1, 10) 107 | dataset_size = computeNumberOfTrainingPairs(self.observations, 108 | history_len, 109 | prediction_horizon) 110 | stream_size = sum(1 for _ in unrollTrainingDataStream(self.observations, 111 | self.actions, 112 | history_len, 113 | prediction_horizon, 114 | True, 115 | infinite=False)) 116 | self.assertEqual(dataset_size, stream_size) 117 | 118 | def test_infinite_data_stream(self): 119 | hist_len = 1 120 | pred_horizon = 1 121 | finite_data_stream = unrollTrainingDataStream(self.observations, 122 | self.actions, hist_len, pred_horizon, False, shuffle=False, 123 | infinite=False) 124 | finite_data_stream_2 = unrollTrainingDataStream(self.observations, 125 | self.actions, hist_len, pred_horizon, False, shuffle=False, 126 | infinite=False) 127 | infinite_data_stream = unrollTrainingDataStream(self.observations, 128 | self.actions, hist_len, pred_horizon, False, shuffle=False, 129 | infinite=True) 130 | for y, x in finite_data_stream: 131 | yy , xx = next(infinite_data_stream) 132 | self.assertTrue(np.array_equal(y, yy)) 133 | self.assertTrue(np.array_equal(x, xx)) 134 | 135 | # The infinite stream starts again from the beginning. 136 | for y, x in finite_data_stream_2: 137 | yy , xx = next(infinite_data_stream) 138 | self.assertTrue(np.array_equal(y, yy)) 139 | self.assertTrue(np.array_equal(x, xx)) 140 | 141 | 142 | if __name__ == '__main__': 143 | unittest.main() 144 | -------------------------------------------------------------------------------- /tests/test_data_splitting.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | Testing basic functionalities of DL.utils.data_splitting.py 5 | """ 6 | 7 | import unittest 8 | 9 | import numpy as np 10 | 11 | from DL.utils.data_loading import loadRobotData 12 | from DL.utils.data_splitting import CompleteRolloutsDataSplitter 13 | from tests.fake_data_test_case import FakeDataTestCase 14 | 15 | 16 | class TestDataSplitting(FakeDataTestCase): 17 | 18 | def test_default_load(self): 19 | data_splitter = CompleteRolloutsDataSplitter(self.fake_data_npzfile, 20 | np.arange(5)) 21 | training_obs, training_act = data_splitter.get_training_data() 22 | testing_obs, testing_act = data_splitter.get_test_data() 23 | self.assertEqual(training_obs.shape[0], training_act.shape[0]) 24 | self.assertEqual(training_obs.shape[1], testing_obs.shape[1]) 25 | self.assertEqual(training_act.shape[1], testing_act.shape[1]) 26 | self.assertEqual(training_obs.shape[0], training_act.shape[0]) 27 | self.assertEqual(testing_obs.shape[0], testing_act.shape[0]) 28 | flag = np.array_equal(np.sort(np.concatenate(( 29 | data_splitter.train_rollouts, 30 | data_splitter.test_rollouts))), 31 | np.arange(self.observations.shape[0])) 32 | self.assertTrue(flag) 33 | 34 | -------------------------------------------------------------------------------- /tests/test_recursive_prediction.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | from DL.dynamics_learner_interface.dynamics_learner_interface import DynamicsLearnerInterface 6 | from DL.dynamics_learner_interface.dynamics_learner_interface import DynamicsLearnerExample 7 | from tests.fake_data_test_case import FakeDataTestCase 8 | 9 | 10 | class TestRecursivePrediction(object): 11 | 12 | def test_recursive_prediction(self): 13 | data = np.load(self.fake_data_npzfile) 14 | 15 | observation_sequences = np.concatenate((data['measured_angles'], 16 | data['measured_velocities'], 17 | data['measured_torques']), 2) 18 | 19 | action_sequences = data['constrained_torques'] 20 | 21 | history_length = 10 22 | prediction_horizon = 3 23 | dynamics_learner = DynamicsLearnerExample(history_length, 1) 24 | dynamics_learner.learn(observation_sequences, action_sequences) 25 | 26 | observation_prediction = dynamics_learner.predict_recursively( 27 | observation_sequences[:, :history_length], 28 | action_sequences[:, :history_length], 29 | action_sequences[:, history_length:history_length + 30 | prediction_horizon - 1]) 31 | 32 | rms = np.linalg.norm(observation_sequences[:, 33 | history_length + prediction_horizon - 1] - 34 | observation_prediction) 35 | print('rms: ', rms) 36 | 37 | 38 | if __name__ == '__main__': 39 | unittest.main() 40 | -------------------------------------------------------------------------------- /tests/test_standardizer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from DL.utils.standardizer import Standardizer 4 | import numpy as np 5 | 6 | 7 | testData = np.arange(10).reshape([-1, 2]) 8 | testData[:, 1] = 2*testData[:, 1] 9 | 10 | stateStandardizer = Standardizer(testData) 11 | 12 | print(stateStandardizer.means) 13 | print(stateStandardizer.stds) 14 | 15 | # test vector 16 | print("standardizing vector") 17 | testVector = testData[3, :] 18 | print(testVector) 19 | standVector = stateStandardizer.standardize(testVector) 20 | print(standVector) 21 | print(stateStandardizer.unstandardize(standVector)) 22 | 23 | print("standardizing matrix") 24 | print(testData) 25 | standData = stateStandardizer.standardize(testData) 26 | print(standData) 27 | print(stateStandardizer.unstandardize(standData)) 28 | print(np.mean(stateStandardizer.standardize(testData))) 29 | print(np.std(stateStandardizer.standardize(testData))) 30 | --------------------------------------------------------------------------------