├── Door ├── BC_training.py ├── Data │ ├── Demonstration.pickle │ └── Testing.pickle ├── Koopman_training.py ├── LSTM_training.py ├── NDP_training.py ├── NGF_training.py ├── Results │ ├── Controller │ │ └── NN_controller_best.pt │ ├── Expert_policy │ │ └── best_policy.pickle │ ├── KODex │ │ └── koopmanMatrix.npy │ ├── LSTM │ │ └── LSTM_agent.pt │ ├── NDP │ │ └── NDP_agent.pt │ ├── NGF │ │ └── ngf_door_200_1.pt │ └── NN │ │ └── BC_agent.pt └── utils │ ├── Controller.py │ ├── Koopman_evaluation.py │ ├── Observables.py │ ├── coord_trans.py │ ├── dmp_layer.py │ ├── fc_network.py │ ├── gaussian_rnn.py │ ├── gnn_networks.py │ ├── gym_env.py │ ├── quatmath.py │ └── rnn_network.py ├── README.md ├── Relocation ├── BC_training.py ├── Data │ └── Relocate_task.pickle ├── Koopman_training.py ├── LSTM_training.py ├── NDP_training.py ├── NGF_training.py ├── Results │ ├── Controller │ │ └── NN_controller_best.pt │ ├── Expert_policy │ │ └── best_policy.pickle │ └── KODex │ │ └── koopmanMatrix.npy └── utils │ ├── Controller.py │ ├── Koopman_evaluation.py │ ├── Observables.py │ ├── coord_trans.py │ ├── dmp_layer.py │ ├── fc_network.py │ ├── gaussian_rnn.py │ ├── gnn_networks.py │ ├── gym_env.py │ ├── quatmath.py │ └── rnn_network.py ├── Reorientation ├── BC_training.py ├── Data │ └── Pen_task.pickle ├── Koopman_training.py ├── LSTM_training.py ├── NDP_training.py ├── NGF_training.py ├── Results │ ├── Controller │ │ └── NN_controller_best.pt │ ├── Expert_policy │ │ └── best_policy.pickle │ └── KODex │ │ └── koopmanMatrix.npy └── utils │ ├── Controller.py │ ├── Koopman_evaluation.py │ ├── Observables.py │ ├── coord_trans.py │ ├── dmp_layer.py │ ├── fc_network.py │ ├── gaussian_rnn.py │ ├── gnn_networks.py │ ├── gym_env.py │ ├── quatmath.py │ └── rnn_network.py ├── Tool ├── BC_training.py ├── Data │ └── Hammer_task.pickle ├── Koopman_training.py ├── LSTM_training.py ├── NDP_training.py ├── NGF_training.py ├── Results │ ├── Controller │ │ └── NN_controller_best.pt │ ├── Expert_policy │ │ └── best_policy.pickle │ └── KODex │ │ └── koopmanMatrix.npy └── utils │ ├── Controller.py │ ├── Koopman_evaluation.py │ ├── Observables.py │ ├── coord_trans.py │ ├── dmp_layer.py │ ├── fc_network.py │ ├── gaussian_rnn.py │ ├── gnn_networks.py │ ├── gym_env.py │ ├── quatmath.py │ └── rnn_network.py └── geometric_fabrics_torch-0.0.0-py2.py3-none-any.whl /Door/Data/Demonstration.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GT-STAR-Lab/KODex/b058159f59baeff37437672e49f5d73406d4a5a6/Door/Data/Demonstration.pickle -------------------------------------------------------------------------------- /Door/Data/Testing.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GT-STAR-Lab/KODex/b058159f59baeff37437672e49f5d73406d4a5a6/Door/Data/Testing.pickle -------------------------------------------------------------------------------- /Door/Results/Controller/NN_controller_best.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GT-STAR-Lab/KODex/b058159f59baeff37437672e49f5d73406d4a5a6/Door/Results/Controller/NN_controller_best.pt -------------------------------------------------------------------------------- /Door/Results/Expert_policy/best_policy.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GT-STAR-Lab/KODex/b058159f59baeff37437672e49f5d73406d4a5a6/Door/Results/Expert_policy/best_policy.pickle -------------------------------------------------------------------------------- /Door/Results/KODex/koopmanMatrix.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GT-STAR-Lab/KODex/b058159f59baeff37437672e49f5d73406d4a5a6/Door/Results/KODex/koopmanMatrix.npy -------------------------------------------------------------------------------- /Door/Results/LSTM/LSTM_agent.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GT-STAR-Lab/KODex/b058159f59baeff37437672e49f5d73406d4a5a6/Door/Results/LSTM/LSTM_agent.pt -------------------------------------------------------------------------------- /Door/Results/NDP/NDP_agent.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GT-STAR-Lab/KODex/b058159f59baeff37437672e49f5d73406d4a5a6/Door/Results/NDP/NDP_agent.pt -------------------------------------------------------------------------------- /Door/Results/NGF/ngf_door_200_1.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GT-STAR-Lab/KODex/b058159f59baeff37437672e49f5d73406d4a5a6/Door/Results/NGF/ngf_door_200_1.pt -------------------------------------------------------------------------------- /Door/Results/NN/BC_agent.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GT-STAR-Lab/KODex/b058159f59baeff37437672e49f5d73406d4a5a6/Door/Results/NN/BC_agent.pt -------------------------------------------------------------------------------- /Door/utils/Controller.py: -------------------------------------------------------------------------------- 1 | """ 2 | Define the controllers 3 | """ 4 | import numpy as np 5 | 6 | class PID(): 7 | def __init__(self, Kp, Ki, Kd, setpoint = None): 8 | self.Kp, self.Ki, self.Kd = Kp, Ki, Kd 9 | self.setpoint = setpoint 10 | self._last_input = None 11 | self._last_error = None 12 | 13 | def __call__(self, input): # input: current hand joints 14 | ''' 15 | Call the PID controller with *input* and calculate and return a control output 16 | ''' 17 | error = self.setpoint - input 18 | d_input = input - (self._last_input if (self._last_input is not None) else input) 19 | self._proportional = self.Kp * error # P term 20 | # self._integral += self.Ki * error * dt 21 | # self._integral = _clamp(self._integral, self.output_limits) # Avoid integral windup 22 | self._derivative = -self.Kd * d_input #/ dt 23 | output = self._proportional + self._derivative 24 | self._last_output = output 25 | self._last_input = input 26 | self._last_error = error 27 | return output 28 | 29 | def set_goal(self, setpoint): # set target joint state 30 | self.setpoint = setpoint 31 | -------------------------------------------------------------------------------- /Door/utils/Observables.py: -------------------------------------------------------------------------------- 1 | """ 2 | Define the functions that are used for koopman lifting 3 | """ 4 | import numpy as np 5 | from utils.fc_network import FCNetwork 6 | from utils.gnn_networks import InteractionNetwork 7 | import copy 8 | import torch 9 | 10 | class DraftedObservable(object): 11 | def __init__(self, num_handStates, num_objStates): 12 | self.num_ori_handStates = num_handStates 13 | self.num_ori_objStates = num_objStates 14 | self.num_ori_states = num_handStates + num_objStates 15 | 16 | def z(self, handState, objStates): 17 | """ 18 | Inputs: hand states(pos, vel) & object states(pos, vel) 19 | Outputs: state in lifted space 20 | Note: velocity is optional 21 | """ 22 | obs = np.zeros(self.compute_observable(self.num_ori_handStates, self.num_ori_objStates)) 23 | index = 0 24 | for i in range(self.num_ori_handStates): 25 | obs[index] = handState[i] 26 | index += 1 27 | for i in range(self.num_ori_handStates): 28 | obs[index] = handState[i] ** 2 29 | index += 1 30 | for i in range(self.num_ori_objStates): 31 | obs[index] = objStates[i] 32 | index += 1 33 | for i in range(self.num_ori_objStates): 34 | obs[index] = objStates[i] ** 2 35 | index += 1 36 | for i in range(self.num_ori_objStates): 37 | for j in range(i + 1, self.num_ori_objStates): 38 | obs[index] = objStates[i] * objStates[j] 39 | index += 1 40 | for i in range(self.num_ori_handStates): 41 | for j in range(i + 1, self.num_ori_handStates): 42 | obs[index] = handState[i] * handState[j] 43 | index += 1 44 | # can add handState[i] ** 3 45 | for i in range(self.num_ori_handStates): 46 | obs[index] = handState[i] ** 3 47 | index += 1 48 | # for i in range(self.num_ori_objStates): 49 | # obs[index] = objStates[i] ** 3 50 | # index += 1 51 | # for i in range(self.num_ori_handStates): 52 | # for j in range(self.num_ori_handStates): 53 | # obs[index] = handState[i] ** 2 * handState[j] 54 | # index += 1 55 | for i in range(self.num_ori_objStates): 56 | for j in range(self.num_ori_objStates): 57 | obs[index] = objStates[i] ** 2 * objStates[j] 58 | index += 1 59 | return obs 60 | 61 | def compute_observable(self, num_hand, num_obj): 62 | return int(2 * (num_hand + num_obj) + (num_obj - 1) * num_obj / 2 + num_hand + num_obj ** 2 + (num_hand - 1) * num_hand / 2) 63 | 64 | class MLPObservable(object): 65 | def __init__(self, num_handStates, num_objStates, param): 66 | self.param = param 67 | self.num_ori_states = num_handStates + num_objStates 68 | self.encoder = [i for i in param['encoder']] 69 | self.encoder.insert(0, self.num_ori_states) 70 | self.decoder = [i for i in param['decoder']] 71 | self.decoder.append(self.num_ori_states) 72 | self.encoder_NN = FCNetwork(self.encoder, param['nonlinearity'], param['encoder_seed']) 73 | self.decoder_NN = FCNetwork(self.decoder, param['nonlinearity'], param['decoder_seed']) 74 | self.trainable_params = list(self.encoder_NN.parameters()) + list(self.decoder_NN.parameters()) 75 | 76 | def count_parameters(self): 77 | return sum(p.numel() for p in self.trainable_params if p.requires_grad) 78 | 79 | def z(self, input_state): 80 | """ 81 | Inputs: original states: hand states(pos, vel) & object states(pos, vel) 82 | Outputs: state in lifted space 83 | Note: velocity is optional 84 | """ 85 | z = self.encoder_NN(input_state) 86 | return z 87 | 88 | def z_inverse(self, liftedStates): 89 | """ 90 | Inputs: state in lifted space 91 | Outputs: original states: hand states(pos, vel) & object states(pos, vel) 92 | Note: velocity is optional 93 | """ 94 | x = self.decoder_NN(liftedStates) 95 | return x 96 | 97 | def train(self): # set the models in training mode 98 | self.encoder_NN.train() 99 | self.decoder_NN.train() 100 | 101 | def eval(self): # set the models in evaluation mode 102 | self.encoder_NN.eval() 103 | self.decoder_NN.eval() 104 | 105 | def loss(self, batch_data, encoder_lambda, pred_lambda): # the loss consists of three part: 106 | # 1). Intrinsic coordinates transformation loss, 2). Linear dynamics loss, 3). some others 107 | out = {} 108 | lifted_data = self.z(batch_data) 109 | batch_data_inverse = self.z_inverse(lifted_data) 110 | self.encoder_loss_criterion = torch.nn.MSELoss() 111 | self.encoder_loss = encoder_lambda * self.encoder_loss_criterion(batch_data_inverse, batch_data.detach().to(torch.float32)) # the auto-encoder loss 112 | out['Encoder_loss_torch'] = self.encoder_loss.item() 113 | batch_data_inverse_numpy = batch_data_inverse.detach().numpy() 114 | batch_data_numpy = batch_data.detach().numpy() 115 | Encoder_loss = np.abs(batch_data_numpy - batch_data_inverse_numpy) 116 | out['Encoder_loss_numpy'] = Encoder_loss 117 | self.pred_loss_criterion = torch.nn.MSELoss() 118 | for i in range(lifted_data.shape[0]): 119 | if i == 0: 120 | A = torch.outer(lifted_data[i,1,:], lifted_data[i,0,:]) / lifted_data.shape[0] 121 | G = torch.outer(lifted_data[i,0,:], lifted_data[i,0,:]) / lifted_data.shape[0] 122 | else: 123 | A += torch.outer(lifted_data[i,1,:], lifted_data[i,0,:]) / lifted_data.shape[0] 124 | G += torch.outer(lifted_data[i,0,:], lifted_data[i,0,:]) / lifted_data.shape[0] 125 | koopman_operator = torch.matmul(A, torch.linalg.pinv(G)) # using the batch data, compute the koopman matrix 126 | Linear_evol = torch.matmul(lifted_data[:,0,:], koopman_operator) 127 | self.pred_loss = pred_lambda * self.pred_loss_criterion(Linear_evol, lifted_data[:,1,:]) # the prediction loss 128 | out['Pred_loss_torch'] = self.pred_loss.item() 129 | Linear_evol_numpy = Linear_evol.detach().numpy() 130 | lifted_data_numpy = lifted_data[:,1,:].detach().numpy() 131 | pred_loss = np.abs(lifted_data_numpy - Linear_evol_numpy) 132 | out['Pred_loss_numpy'] = pred_loss 133 | loss = self.encoder_loss + self.pred_loss # loss is the sum of each component -> this one is used for pytorch training 134 | return loss, out, koopman_operator 135 | 136 | def load(self, encoder_path, decoder_path): 137 | self.encoder_NN.load_state_dict(torch.load(encoder_path)) 138 | self.decoder_NN.load_state_dict(torch.load(decoder_path)) 139 | 140 | class GNNObservable(object): 141 | def __init__(self, num_objStates, param): 142 | self.gnn_encoder = InteractionNetwork(num_objStates, param['relation_domain'], param['relation_encoder'], param['object_encoder'], param['gnn_nonlinearity'], param['gnn_encoder_seed']) 143 | tmp = param['object_decoder'] 144 | tmp.append(num_objStates) 145 | self.gnn_decoder = InteractionNetwork(param['object_encoder'][-1], param['relation_domain'], param['relation_decoder'], tmp, param['gnn_nonlinearity'], param['gnn_decoder_seed']) 146 | self.trainable_params = list(self.gnn_encoder.parameters()) + list(self.gnn_decoder.parameters()) 147 | self._rollout = False 148 | 149 | def count_parameters(self): 150 | return sum(p.numel() for p in self.trainable_params if p.requires_grad) 151 | 152 | def z(self, input_state, relations, flag): 153 | """ 154 | Inputs: original states: hand states(pos, vel) & object states(pos, vel); defined relations & relation types 155 | Outputs: state in lifted space 156 | Note: velocity is optional 157 | """ 158 | z = self.gnn_encoder(input_state, relations['sender_relations'], relations['receiver_relations'], relations['relation_info'], flag) 159 | return z 160 | 161 | def z_inverse(self, liftedStates, relations, flag): 162 | """ 163 | Inputs: state in lifted space; defined relations & relation types 164 | Outputs: original states: hand states(pos, vel) & object states(pos, vel) 165 | Note: velocity is optional 166 | """ 167 | x = self.gnn_decoder(liftedStates, relations['sender_relations'], relations['receiver_relations'], relations['relation_info'], flag) 168 | return x 169 | 170 | def train(self): # set the models in training mode 171 | self.gnn_encoder.train() 172 | self.gnn_decoder.train() 173 | 174 | def eval(self): # set the models in evaluation mode 175 | self.gnn_encoder.eval() 176 | self.gnn_decoder.eval() 177 | 178 | def set_status(self, flag): 179 | if flag == 0: 180 | self._rollout = True 181 | 182 | def loss(self, batch_data, relations, encoder_lambda, pred_lambda): # the loss consists of three part: 183 | batch_size = batch_data.shape[0] 184 | out = {} 185 | lifted_data = self.z(batch_data, relations, self._rollout) 186 | # .view(effect_receivers_t_1.shape[0], -1) 187 | batch_data_inverse = self.z_inverse(lifted_data, relations, self._rollout) 188 | self.encoder_loss_criterion = torch.nn.MSELoss() 189 | self.encoder_loss = encoder_lambda * self.encoder_loss_criterion(batch_data_inverse, batch_data.detach()) # the auto-encoder loss 190 | out['Encoder_loss_torch'] = self.encoder_loss.item() 191 | batch_data_inverse_numpy = batch_data_inverse.detach().numpy() 192 | batch_data_numpy = batch_data.detach().numpy() 193 | Encoder_loss = np.abs(batch_data_numpy - batch_data_inverse_numpy) 194 | out['Encoder_loss_numpy'] = Encoder_loss 195 | self.pred_loss_criterion = torch.nn.MSELoss() 196 | lifted_data = lifted_data.view(batch_size, 2, -1) # Squeeze along the last 2 axis 197 | for i in range(lifted_data.shape[0]): 198 | if i == 0: 199 | A = torch.outer(lifted_data[i,1,:], lifted_data[i,0,:]) / lifted_data.shape[0] 200 | G = torch.outer(lifted_data[i,0,:], lifted_data[i,0,:]) / lifted_data.shape[0] 201 | else: 202 | A += torch.outer(lifted_data[i,1,:], lifted_data[i,0,:]) / lifted_data.shape[0] 203 | G += torch.outer(lifted_data[i,0,:], lifted_data[i,0,:]) / lifted_data.shape[0] 204 | koopman_operator = torch.matmul(A, torch.linalg.pinv(G)) # using the batch data, compute the koopman matrix 205 | Linear_evol = torch.matmul(lifted_data[:,0,:], koopman_operator) 206 | self.pred_loss = pred_lambda * self.pred_loss_criterion(Linear_evol, lifted_data[:,1,:]) # the prediction loss 207 | out['Pred_loss_torch'] = self.pred_loss.item() 208 | Linear_evol_numpy = Linear_evol.detach().numpy() 209 | lifted_data_numpy = lifted_data[:,1,:].detach().numpy() 210 | pred_loss = np.abs(lifted_data_numpy - Linear_evol_numpy) 211 | out['Pred_loss_numpy'] = pred_loss 212 | loss = self.encoder_loss + self.pred_loss # loss is the sum of each component -> this one is used for pytorch training 213 | return loss, out, koopman_operator 214 | 215 | def load(self, encoder_path, decoder_path): 216 | self.gnn_encoder.load_state_dict(torch.load(encoder_path)) 217 | self.gnn_decoder.load_state_dict(torch.load(decoder_path)) 218 | 219 | def Create_relations(self, batch_size, gnn_num_obj, gnn_num_relation, param, eval_length): 220 | receiver_relations = np.zeros((batch_size, gnn_num_obj, gnn_num_relation)) 221 | sender_relations = np.zeros((batch_size, gnn_num_obj, gnn_num_relation)) 222 | relation_info = np.zeros((batch_size, gnn_num_relation, param['relation_domain'])) 223 | receiver_relations[:, 0, 0:5] = 1 224 | cnt = 0 225 | for i in [1,2,3,4,5]: 226 | sender_relations[:, 0, (cnt * gnn_num_obj) + i] = 1 227 | cnt += 1 228 | receiver_relations[:, 1, 6] = 1 229 | receiver_relations[:, 1, 11] = 1 230 | sender_relations[:, 1, 0] = 1 231 | sender_relations[:, 1, 41] = 1 232 | receiver_relations[:, 2, 12] = 1 233 | receiver_relations[:, 2, 17] = 1 234 | sender_relations[:, 2, 0] = 1 235 | sender_relations[:, 2, 41] = 1 236 | receiver_relations[:, 3, 18] = 1 237 | receiver_relations[:, 3, 23] = 1 238 | sender_relations[:, 3, 0] = 1 239 | sender_relations[:, 3, 41] = 1 240 | receiver_relations[:, 4, 24] = 1 241 | receiver_relations[:, 4, 29] = 1 242 | sender_relations[:, 4, 0] = 1 243 | sender_relations[:, 4, 41] = 1 244 | receiver_relations[:, 5, 30] = 1 245 | receiver_relations[:, 5, 35] = 1 246 | sender_relations[:, 5, 0] = 1 247 | sender_relations[:, 5, 41] = 1 248 | receiver_relations[:, 6, 37:42] = 1 249 | cnt = 0 250 | for i in [1,2,3,4,5]: 251 | sender_relations[:, 6, (cnt * gnn_num_obj) + i] = 1 252 | cnt += 1 253 | relation_info[:, 0:5, 0] = 1 254 | relation_info[:, 6, 0] = 1 255 | relation_info[:, 11, 1] = 1 256 | relation_info[:, 12, 0] = 1 257 | relation_info[:, 17, 1] = 1 258 | relation_info[:, 18, 0] = 1 259 | relation_info[:, 23, 1] = 1 260 | relation_info[:, 24, 0] = 1 261 | relation_info[:, 29, 1] = 1 262 | relation_info[:, 30, 0] = 1 263 | relation_info[:, 35, 1] = 1 264 | relation_info[:, -5:-1, 1] = 1 265 | relation_info[:, -1, 1] = 1 266 | # Todo: check if this part is implemented correctly 267 | relations = {} 268 | relations['receiver_relations'] = torch.from_numpy(receiver_relations).to(torch.float32) 269 | relations['sender_relations'] = torch.from_numpy(sender_relations).to(torch.float32) 270 | relations['relation_info'] = torch.from_numpy(relation_info).to(torch.float32) 271 | relations_eval = {} 272 | receiver_relations_eval = relations['receiver_relations'][0][None,:].repeat(eval_length, 1, 1) 273 | sender_relations_eval = relations['sender_relations'][0][None,:].repeat(eval_length, 1, 1) 274 | relation_info_eval = relations['relation_info'][0][None,:].repeat(eval_length, 1, 1) 275 | relations_eval['receiver_relations'] = receiver_relations_eval 276 | relations_eval['sender_relations'] = sender_relations_eval 277 | relations_eval['relation_info'] = relation_info_eval 278 | return relations, relations_eval 279 | 280 | def Create_states(self): 281 | hand_dict = {} 282 | hand_dict['palm'] = [0, 3] # or [0, 2] 283 | hand_dict['forfinger'] = [3, 7] 284 | hand_dict['middlefinger'] = [7, 11] 285 | hand_dict['ringfinger'] = [11, 15] 286 | hand_dict['littlefinger'] = [15, 19] 287 | hand_dict['thumb'] = [19, 24] 288 | gnn_num_obj = 7 # palm, five fingers, manipulated object 289 | gnn_num_relation = gnn_num_obj * (gnn_num_obj - 1) 290 | return hand_dict, gnn_num_obj, gnn_num_relation -------------------------------------------------------------------------------- /Door/utils/coord_trans.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.linalg import inv 3 | 4 | def ori_transform(current_ori, desired_ori): 5 | # build a new coordinate frame that define the desired_ori as [1, 0, 0] - use desired_ori to build a new coordinate basis 6 | # and then convert current_ori in that new coordinate frame 7 | x = desired_ori 8 | y, z = compute_orthonormal_vector(x) 9 | origin_x = np.array([1, 0, 0]) 10 | origin_y = np.array([0, 1, 0]) 11 | origin_z = np.array([0, 0, 1]) 12 | Q = np.zeros([3,3]) 13 | Q[0,0] = np.dot(x, origin_x) 14 | Q[0,1] = np.dot(x, origin_y) 15 | Q[0,2] = np.dot(x, origin_z) 16 | Q[1,0] = np.dot(y, origin_x) 17 | Q[1,1] = np.dot(y, origin_y) 18 | Q[1,2] = np.dot(y, origin_z) 19 | Q[2,0] = np.dot(z, origin_x) 20 | Q[2,1] = np.dot(z, origin_y) 21 | Q[2,2] = np.dot(z, origin_z) 22 | return np.dot(Q, current_ori) 23 | 24 | def ori_transform_inverse(current_ori, desired_ori): 25 | x = desired_ori 26 | y, z = compute_orthonormal_vector(x) 27 | origin_x = np.array([1, 0, 0]) 28 | origin_y = np.array([0, 1, 0]) 29 | origin_z = np.array([0, 0, 1]) 30 | Q = np.zeros([3,3]) 31 | Q[0,0] = np.dot(x, origin_x) 32 | Q[0,1] = np.dot(x, origin_y) 33 | Q[0,2] = np.dot(x, origin_z) 34 | Q[1,0] = np.dot(y, origin_x) 35 | Q[1,1] = np.dot(y, origin_y) 36 | Q[1,2] = np.dot(y, origin_z) 37 | Q[2,0] = np.dot(z, origin_x) 38 | Q[2,1] = np.dot(z, origin_y) 39 | Q[2,2] = np.dot(z, origin_z) 40 | Q = Q.T 41 | return np.dot(Q, current_ori) 42 | 43 | def compute_orthonormal_vector(desired_ori): 44 | # compute two orthonormal vectors wrt desired_ori(new x axis) as the new y, z axis 45 | # for the y axis, we will have three constraints: 1).x1v1s + x2v2 + x3v3=0, 2).v1^2+v2^2+v^3=1, 3). v1=1*v2(manually specify) 46 | # the z axis is uniquely defined by the x and y axis 47 | x1 = desired_ori[0] 48 | x2 = desired_ori[1] 49 | x3 = desired_ori[2] 50 | v1 = v2 = np.sqrt(1 / (2 + (x1 + x2) ** 2 / x3 ** 2)) 51 | v3 = -(x1+x2)/x3*v2 52 | y = np.array([v1, v2, v3]) 53 | z1 = x2*v3 - x3*v2 54 | z2 = x3*v1 - x1*v3 55 | z3 = x1*v2 - x2*v1 56 | z = np.array([z1, z2, z3]) / (z1**2+z2**2+z3**2) 57 | return y, z 58 | 59 | -------------------------------------------------------------------------------- /Door/utils/dmp_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | import numpy as np 4 | 5 | 6 | class DMPIntegrator(): 7 | def __init__(self, rbf='gaussian', only_g=False, az=False): 8 | a = 1 9 | self.rbf = rbf 10 | self.only_g = only_g 11 | self.az = az 12 | 13 | def forward(self, inputs, parameters, param_gradients, scaling, y0, dy0, goal=None, w=None, vel=False): 14 | # param_gradient is not explicitly unused. 15 | # goal is None 16 | dim = int(parameters[0].item()) # 2 17 | k = dim 18 | N = int(parameters[1].item()) 19 | division = k*(N + 2) 20 | inputs_np = inputs # NN output: Size(64) 21 | if goal is not None: 22 | goal = goal 23 | w = w 24 | else: 25 | w = inputs_np[:, dim:dim*(N + 1)] # dim:dim*(N + 1) = 2:62 / w.shape = (batch_size, 60) 26 | goal = inputs_np[:, :dim] 27 | 28 | if self.az: #False 29 | alpha_z = inputs[:, -1] 30 | t = y0.shape[0] // inputs.shape[0] 31 | alpha_z = alpha_z.repeat(t, 1).transpose(0, 1).reshape(inputs.shape[0], -1) 32 | alpha_z = alpha_z.contiguous().view(alpha_z.shape[0] * alpha_z.shape[1], ) 33 | 34 | w = w.reshape(-1, N) # (2*batch_size, N) y0.shape -> [2*batch_size] 35 | 36 | if self.only_g: #false, if true, NN only outputs goal 37 | w = torch.zeros_like(w) 38 | if vel: # false 39 | dy0 = torch.zeros_like(y0) 40 | # dy0 = torch.zeros_like(y0) + 0.01 # set to be small values 0.01 41 | goal = goal.contiguous().view(goal.shape[0]*goal.shape[1], ) # [2*batch_size] y0.shape -> [2*batch_size] 42 | if self.az: 43 | X, dX, ddX = integrate(parameters, w, y0, dy0, goal, 1, rbf=self.rbf, az=True, alpha_z=alpha_z) 44 | else: 45 | X, dX, ddX = integrate(parameters, w, y0, dy0, goal, 1, rbf=self.rbf) # X -> whole trajectory 46 | # X and inputs.new(X) have the same values 47 | return inputs.new(X), inputs.new(dX), inputs.new(ddX) 48 | 49 | 50 | def forward_not_int(self, inputs, parameters, param_gradients, scaling, y0, dy0, goal=None, w=None, vel=False): 51 | dim = int(parameters[0].item()) 52 | k = dim 53 | N = int(parameters[1].item()) 54 | division = k*(N + 2) 55 | inputs_np = inputs 56 | if goal is not None: 57 | goal = goal 58 | w = w 59 | else: 60 | w = inputs_np[:, dim:dim*(N + 1)] 61 | goal = inputs_np[:, :dim] 62 | w = w.reshape(-1, N) 63 | if vel: 64 | dy0 = torch.zeros_like(y0) 65 | goal = goal.contiguous().view(goal.shape[0]*goal.shape[1], ) 66 | return parameters, w, y0, dy0, goal, 1 67 | 68 | def first_step(self, w, parameters, scaling, y0, dy0, l, tau=1): 69 | data = parameters 70 | y = y0 71 | self.y0 = y0 72 | z = dy0 * tau 73 | self.x = 1 74 | self.N = int(data[1].item()) 75 | self.dt = data[3].item() 76 | self.a_x = data[4].item() 77 | self.a_z = data[5].item() 78 | self.b_z = self.a_z / 4 79 | self.h = data[(6+self.N):(6+self.N*2)] 80 | self.c = data[6:(6+self.N)] 81 | self.num_steps = int(data[2].item())-1 82 | self.i = 0 83 | self.w = w.reshape(-1, self.N) 84 | self.tau = tau 85 | self.l = l 86 | 87 | def step(self, g, y, dy): 88 | g = g.reshape(-1, 1)[:, 0] 89 | z = dy*self.tau 90 | dt = self.dt 91 | for _ in range(self.l): 92 | dx = (-self.a_x * self.x) / self.tau 93 | self.x = self.x + dx * dt 94 | psi = torch.exp(-self.h * torch.pow((self.x - self.c), 2)) 95 | fx = torch.mv(self.w, psi)*self.x*(g - self.y0) / torch.sum(psi) 96 | dz = self.a_z * (self.b_z * (g - y) - z) + fx 97 | dy = z 98 | dz = dz / self.tau 99 | dy = dy / self.tau 100 | y = y + dy * dt 101 | z = z + dz * dt 102 | self.i += 1 103 | return y, dy, dz 104 | 105 | 106 | def integrate(data, w, y0, dy0, goal, tau, rbf='gaussian', az=False, alpha_z=None): 107 | y = y0 # y0:input 108 | z = dy0 * tau # tau = 1, z = dy 109 | x = 1 # x_0 110 | # data[2] -> simu horizon 111 | if w.is_cuda: 112 | Y = torch.cuda.FloatTensor(w.shape[0], int(data[2].item())).fill_(0) 113 | dY = torch.cuda.FloatTensor(w.shape[0], int(data[2].item())).fill_(0) 114 | ddY = torch.cuda.FloatTensor(w.shape[0], int(data[2].item())).fill_(0) 115 | else: 116 | Y = torch.zeros((w.shape[0],int(data[2].item()))) 117 | dY = torch.zeros((w.shape[0],int(data[2].item()))) 118 | ddY = torch.zeros((w.shape[0],int(data[2].item()))) 119 | Y[:, 0] = y 120 | dY[:, 0] = dy0 121 | ddY[:, 0] = z 122 | N = int(data[1].item()) 123 | dt = data[3].item() 124 | a_x = data[4].item() # a_x in paper (eqn. (2)) 125 | a_z = data[5].item() # \alpha in paper (eqn. (1)) 126 | if az: 127 | a_z = alpha_z 128 | a_z = torch.clamp(a_z, 0.5, 30) 129 | b_z = a_z / 4 # \beta in paper (eqn. (1)) 130 | h = data[(6+N):(6+N*2)] # h size N(30) 131 | c = data[6:(6+N)] # c size N(30) 132 | # the first 6 params are for other functions 133 | for i in range(0, int(data[2].item())-1): # data[2] -> simu horizon 134 | dx = (-a_x * x) / tau 135 | x = x + dx * dt 136 | eps = torch.pow((x - c), 2) 137 | # rbf are for basis functions (radial basis functions) 138 | if rbf == 'gaussian': 139 | psi = torch.exp(-h * eps) # (eqn. (3)) 140 | if rbf == 'multiquadric': 141 | psi = torch.sqrt(1 + h * eps) 142 | if rbf == 'inverse_quadric': 143 | psi = 1/(1 + h*eps) 144 | if rbf == 'inverse_multiquadric': 145 | psi = 1/torch.sqrt(1 + h * eps) 146 | if rbf == 'linear': 147 | psi = h * eps 148 | # print(w.shape) torch.Size([200, 30]) 149 | # print(psi.shape) torch.Size([30]) 150 | # print(x) int 151 | # print(goal.shape) torch.Size([200]) 152 | # print(y0.shape) torch.Size([200]) 153 | fx = torch.mv(w, psi)*x*(goal-y0) / torch.sum(psi) # mv - matrix-vector product 154 | dz = a_z * (b_z * (goal - y) - z) + fx 155 | dy = z 156 | dz = dz / tau # tau = 1 157 | dy = dy / tau 158 | y = y + dy * dt 159 | z = z + dz * dt 160 | Y[:, i+1] = y 161 | dY[:, i+1] = dy 162 | ddY[:, i+1] = dz 163 | return Y, dY, ddY 164 | 165 | 166 | class DMPParameters(): 167 | def __init__(self, N, tau, dt, Dof, scale, T, a_z=25): 168 | # N = number of radial basis functions 169 | # tau = 1 170 | # dt = time duration for each simulation time 171 | # Dof = num of states 172 | 173 | self.a_z = a_z 174 | self.a_x = 1 175 | self.N = N 176 | c = np.exp(-self.a_x * np.linspace(0, 1, self.N)) # a_x(data[4]) in paper (eqn. (2)) / c is horizontal shifts of each basis function 177 | sigma2 = np.ones(self.N) * self.N**1.5 / c / self.a_x # h is the width of each basis function 178 | self.c = torch.from_numpy(c).float() 179 | self.sigma2 = torch.from_numpy(sigma2).float() 180 | self.tau = tau 181 | self.dt = dt 182 | self.time_steps = T 183 | self.y0 = [0] 184 | self.dy0 = np.zeros(Dof) 185 | self.Dof = Dof 186 | self.Y = torch.zeros((self.time_steps)) 187 | grad = torch.zeros((self.time_steps, self.N + 2)) 188 | 189 | self.data = {'time_steps':self.time_steps,'c':self.c,'sigma2':self.sigma2,'a_z':self.a_z,'a_x':self.a_x,'dt':self.dt,'Y':self.Y} 190 | dmp_data = torch.tensor([self.Dof,self.N,self.time_steps,self.dt,self.a_x,self.a_z]) 191 | data_tensor = torch.cat((dmp_data,self.c,self.sigma2),0) 192 | # len(data_tensor) = 6 + self.N + self.N 193 | 194 | data_tensor.dy0 = self.dy0 195 | data_tensor.tau = self.tau 196 | 197 | 198 | for i in range(0, self.N): 199 | weights = torch.zeros((1,self.N)) 200 | weights[0,i] = 1 201 | grad[:, i + 2 ], _, _= integrate(data_tensor, weights, 0, 0, 0, self.tau) 202 | weights = torch.zeros((1,self.N)) 203 | grad[:, 0], _, _ = integrate(data_tensor, weights, 1, 0, 0, self.tau) 204 | weights = torch.zeros((1,self.N)) 205 | grad[:, 1], _, _ = integrate(data_tensor, weights, 0, 0, 1, self.tau) 206 | 207 | ''' 208 | self.c = self.c.cuda() 209 | self.sigma2 = self.sigma2.cuda() 210 | self.grad = grad.cuda() 211 | self.point_grads = torch.zeros(54).cuda() 212 | ''' 213 | self.data_tensor = data_tensor 214 | self.grad_tensor = grad 215 | 216 | self.point_grads = torch.zeros(self.N*2 + 4) 217 | self.X = np.zeros((self.time_steps, self.Dof)) 218 | -------------------------------------------------------------------------------- /Door/utils/fc_network.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | 6 | class FCNetwork(nn.Module): 7 | def __init__(self, model_size = None, 8 | nonlinearity='tanh', # either 'tanh' or 'relu' 9 | seed = 100 10 | ): 11 | super(FCNetwork, self).__init__() 12 | 13 | self.obs_dim = model_size[0] 14 | self.act_dim = model_size[-1] 15 | self.layer_sizes = model_size 16 | 17 | # hidden layers 18 | torch.manual_seed(seed) 19 | self.fc_layers = nn.ModuleList([nn.Linear(self.layer_sizes[i], self.layer_sizes[i+1]) \ 20 | for i in range(len(self.layer_sizes) -1)]) # stack severeal layers together. 21 | # The weights are initialzied in default by: 22 | # stdv = 1. / math.sqrt(self.weight.size(1)) 23 | # self.weight.data.uniform_(-stdv, stdv) 24 | # if self.bias is not None: 25 | # self.bias.data.uniform_(-stdv, stdv) 26 | self.nonlinearity = torch.relu if nonlinearity == 'relu' else torch.tanh 27 | 28 | def forward(self, x): 29 | if x.is_cuda: 30 | out = x.to('cpu') 31 | else: 32 | out = x 33 | out = out.to(torch.float32) 34 | for i in range(len(self.fc_layers)-1): 35 | out = self.fc_layers[i](out) 36 | out = self.nonlinearity(out) 37 | out = self.fc_layers[-1](out) 38 | return out -------------------------------------------------------------------------------- /Door/utils/gaussian_rnn.py: -------------------------------------------------------------------------------- 1 | from utils.rnn_network import RNNNetwork 2 | import numpy as np 3 | import torch 4 | from torch.autograd import Variable 5 | # import kerasncp as kncp 6 | # from kerasncp.torch import LTCCell 7 | 8 | class RNN: 9 | def __init__(self, 10 | m, 11 | n, 12 | input_sizes=64, 13 | hidden_state = 64, 14 | LSTM_layer = 1, 15 | seed=None): 16 | """ 17 | :param m: NN_input size 18 | :param n: NN_output size 19 | :param hidden_sizes: network hidden layer sizes (currently 2 layers only) 20 | :param seed: random seed 21 | """ 22 | # Set seed 23 | # ------------------------ 24 | if seed is not None: 25 | torch.manual_seed(seed) 26 | np.random.seed(seed) 27 | 28 | # Policy network 29 | # ------------------------ 30 | # If batch_first is True, then the input and output tensors are provided as (batch, seq, feature) instead of (seq, batch, feature). 31 | rnn_cell = torch.nn.LSTM(input_sizes, hidden_state, batch_first=True, num_layers = LSTM_layer) 32 | # self.model = NCPNetwork(ltc_cells, env) 33 | self.model = RNNNetwork(rnn_cell, m, n) 34 | # make weights smaller 35 | # for param in list(self.model.parameters())[-2:]: # only last layer 36 | # param.data = 1e1 * param.data 37 | self.trainable_params = list(self.model.parameters()) 38 | 39 | # Placeholders 40 | # ------------------------ 41 | self.obs_var = Variable(torch.randn(m), requires_grad=False) 42 | self.hidden_state_var = Variable(torch.randn( # generate an variable to store values 43 | (LSTM_layer, 1, self.model.rnn_cell.hidden_size)), requires_grad=False) 44 | self.cell_state_var = Variable(torch.randn( 45 | (LSTM_layer, 1, self.model.rnn_cell.hidden_size)), requires_grad=False) 46 | 47 | # get_action is used for executions 48 | def get_action(self, observation, hidden_state): 49 | o = np.float32(observation.reshape(1, 1, -1)) 50 | self.obs_var.data = torch.from_numpy(o) 51 | # print(hidden_state[0].shape) 52 | self.hidden_state_var.data = torch.from_numpy(np.float32(hidden_state[0])) 53 | self.cell_state_var.data = torch.from_numpy(np.float32(hidden_state[1])) 54 | # print(self.hidden_state_var.shape) 55 | mean, hidden_state = self.model.predict(self.obs_var, (self.hidden_state_var, self.cell_state_var)) 56 | mean = mean.data.numpy().ravel() 57 | new_hidden_state = hidden_state[0].data.numpy() 58 | new_cell_state = hidden_state[1].data.numpy() 59 | hidden_state = (new_hidden_state, new_cell_state) # these are needed for the next time step. 60 | #Since the input is the obs at each time step, we have to manually pass the hidden state and the cell state 61 | return mean, hidden_state 62 | -------------------------------------------------------------------------------- /Door/utils/gnn_networks.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | class RelationalModel(nn.Module): 6 | def __init__(self, input_size, model_size, nonlinearity='tanh', seed = 100): 7 | super(RelationalModel, self).__init__() 8 | 9 | self.layer_sizes = model_size 10 | self.layer_sizes.insert(0, input_size) 11 | torch.manual_seed(seed) 12 | 13 | self.fc_layers = nn.ModuleList([nn.Linear(self.layer_sizes[i], self.layer_sizes[i+1]) \ 14 | for i in range(len(self.layer_sizes) -1)]) # stack severeal layers together. 15 | self.nonlinearity = torch.relu if nonlinearity == 'relu' else torch.tanh 16 | 17 | def forward(self, x): 18 | ''' 19 | Args: 20 | x: [batch_size, n_relations, input_size] 21 | Returns: 22 | [batch_size, n_relations, output_size] 23 | ''' 24 | if x.is_cuda: 25 | out = x.to('cpu') 26 | else: 27 | out = x 28 | for i in range(len(self.fc_layers)): 29 | out = self.fc_layers[i](out) 30 | out = self.nonlinearity(out) 31 | return out 32 | 33 | class ObjectModel(nn.Module): 34 | def __init__(self, input_size, model_size, nonlinearity='tanh', seed = 100): 35 | super(ObjectModel, self).__init__() 36 | 37 | self.layer_sizes = model_size 38 | self.layer_sizes.insert(0, input_size) 39 | torch.manual_seed(seed) 40 | 41 | self.fc_layers = nn.ModuleList([nn.Linear(self.layer_sizes[i], self.layer_sizes[i+1]) \ 42 | for i in range(len(self.layer_sizes) -1)]) # stack severeal layers together. 43 | self.nonlinearity = torch.relu if nonlinearity == 'relu' else torch.tanh 44 | 45 | def forward(self, x): 46 | ''' 47 | Args: 48 | x: [batch_size, n_objects, input_size] 49 | Returns: 50 | [batch_size * n_objects, 2] speedX and speedY 51 | ''' 52 | if x.is_cuda: 53 | out = x.to('cpu') 54 | else: 55 | out = x 56 | for i in range(len(self.fc_layers) - 1): 57 | out = self.fc_layers[i](out) 58 | out = self.nonlinearity(out) 59 | out = self.fc_layers[-1](out) 60 | return out 61 | 62 | class InteractionNetwork(nn.Module): 63 | def __init__(self, object_dim, relation_dim, relation_model, object_model, nonlinearity, seed): 64 | super(InteractionNetwork, self).__init__() 65 | self.relational_model = RelationalModel(2 * object_dim + relation_dim, relation_model, nonlinearity, seed) 66 | # object_model defines the output dimension of the encoder(dim of lifted space)/decoder(dim of original space) 67 | self.object_model = ObjectModel(object_dim + relation_model[-1], object_model, nonlinearity, seed) 68 | 69 | def forward(self, objects, sender_relations, receiver_relations, relation_info, flag): 70 | # in our case, the variable objects consists of palm, each finger and the manipulated object 71 | if not flag: 72 | z_t = objects[:,0,:] 73 | z_t_1 = objects[:,1,:] 74 | senders_t = sender_relations.permute(0, 2, 1).bmm(z_t) 75 | senders_t_1 = sender_relations.permute(0, 2, 1).bmm(z_t_1) 76 | receivers_t = receiver_relations.permute(0, 2, 1).bmm(z_t) 77 | receivers_t_1 = receiver_relations.permute(0, 2, 1).bmm(z_t_1) 78 | effects_t = self.relational_model(torch.cat([senders_t, receivers_t, relation_info], 2)) # cat: 2 -> Concatenate along the second axis 79 | effects_t_1 = self.relational_model(torch.cat([senders_t_1, receivers_t_1, relation_info], 2)) # cat: 2 -> Concatenate along the second axis 80 | effect_receivers_t = receiver_relations.bmm(effects_t) 81 | effect_receivers_t_1 = receiver_relations.bmm(effects_t_1) 82 | predicted_t = self.object_model(torch.cat([z_t, effect_receivers_t], 2))[:,None,:] 83 | predicted_t_1 = self.object_model(torch.cat([z_t_1, effect_receivers_t_1], 2))[:,None,:] 84 | return torch.cat([predicted_t, predicted_t_1], 1) 85 | else: 86 | z_t = objects 87 | senders_t = sender_relations.permute(0, 2, 1).bmm(z_t) 88 | receivers_t = receiver_relations.permute(0, 2, 1).bmm(z_t) 89 | effects_t = self.relational_model(torch.cat([senders_t, receivers_t, relation_info], 2)) # cat: 2 -> Concatenate along the second axis 90 | effect_receivers_t = receiver_relations.bmm(effects_t) 91 | predicted_t = self.object_model(torch.cat([z_t, effect_receivers_t], 2)) 92 | return predicted_t 93 | 94 | -------------------------------------------------------------------------------- /Door/utils/gym_env.py: -------------------------------------------------------------------------------- 1 | """ 2 | Wrapper around a gym env that provides convenience functions 3 | """ 4 | 5 | import gym 6 | import numpy as np 7 | from utils.rnn_network import RNNNetwork 8 | import pickle 9 | from tqdm import tqdm 10 | import copy 11 | from utils.coord_trans import ori_transform, ori_transform_inverse 12 | from utils.quatmath import euler2quat 13 | 14 | class EnvSpec(object): 15 | def __init__(self, obs_dim, act_dim, horizon): 16 | self.observation_dim = obs_dim 17 | self.action_dim = act_dim 18 | self.horizon = horizon 19 | 20 | 21 | class GymEnv(object): 22 | def __init__(self, env, env_kwargs=None, 23 | obs_mask=None, act_repeat=1, 24 | *args, **kwargs): 25 | 26 | # get the correct env behavior 27 | if type(env) == str: 28 | env = gym.make(env) # generare the mojuco env 29 | elif isinstance(env, gym.Env): 30 | env = env 31 | elif callable(env): 32 | env = env(**env_kwargs) 33 | else: 34 | print("Unsupported environment format") 35 | raise AttributeError 36 | 37 | self.env = env 38 | self.env_id = env.spec.id 39 | self.act_repeat = act_repeat 40 | try: 41 | self._horizon = env.spec.max_episode_steps # max_episode_steps is defnied in the __init__.py file (under ) 42 | except AttributeError: 43 | self._horizon = env.spec._horizon 44 | assert self._horizon % act_repeat == 0 45 | self._horizon = self._horizon // self.act_repeat 46 | 47 | try: 48 | self._action_dim = self.env.env.action_dim 49 | except AttributeError: 50 | self._action_dim = self.env.action_space.shape[0] 51 | 52 | try: 53 | self._observation_dim = self.env.env.obs_dim 54 | except AttributeError: 55 | self._observation_dim = self.env.observation_space.shape[0] 56 | 57 | # Specs 58 | self.spec = EnvSpec(self._observation_dim, self._action_dim, self._horizon) 59 | 60 | # obs mask 61 | self.obs_mask = np.ones(self._observation_dim) if obs_mask is None else obs_mask 62 | 63 | @property 64 | def action_dim(self): 65 | return self._action_dim 66 | 67 | @property 68 | def observation_dim(self): 69 | return self._observation_dim 70 | 71 | @property 72 | def observation_space(self): 73 | return self.env.observation_space 74 | 75 | @property 76 | def action_space(self): # each env has defined a action space 77 | return self.env.action_space 78 | 79 | @property 80 | def horizon(self): 81 | return self._horizon 82 | 83 | def reset(self, seed=None): 84 | try: 85 | self.env._elapsed_steps = 0 86 | return self.env.env.reset_model(seed=seed) 87 | except: 88 | if seed is not None: 89 | self.set_seed(seed) 90 | return self.env.reset() 91 | 92 | def reset4Koopman(self, seed=None, ori=None, init_pos=None, init_vel=None): 93 | try: 94 | self.env._elapsed_steps = 0 95 | return self.env.env.reset_model4Koopman(seed=seed, ori = ori, init_pos = init_pos, init_vel = init_vel) 96 | except: 97 | if seed is not None: 98 | self.set_seed(seed) 99 | return self.env.reset_model4Koopman(ori = ori, init_pos = init_pos, init_vel = init_vel) 100 | 101 | def KoopmanVisualize(self, seed=None, state_dict=None): 102 | try: 103 | self.env._elapsed_steps = 0 104 | return self.env.env.KoopmanVisualize(seed=seed, state_dict=state_dict) 105 | except: 106 | if seed is not None: 107 | self.set_seed(seed) 108 | return self.env.KoopmanVisualize(state_dict=state_dict) 109 | 110 | def reset_model(self, seed=None): 111 | # overloading for legacy code 112 | return self.reset(seed) 113 | 114 | def step(self, action): 115 | action = action.clip(self.action_space.low, self.action_space.high) 116 | # type(action_space) -> 117 | # self.action_space.low -> numpy.ndarray(lowest boundary) 118 | # self.action_space.high -> numpy.ndarray(highest boundary) 119 | if self.act_repeat == 1: 120 | obs, cum_reward, done, ifo = self.env.step(action) # the system dynamics is defined in each separate env python file 121 | # if(ifo['goal_achieved']): 122 | # print("done: ", ifo) 123 | # Run one timestep of the environment’s dynamics. 124 | else: 125 | cum_reward = 0.0 126 | for i in range(self.act_repeat): 127 | obs, reward, done, ifo = self.env.step(action) # the actual operations can be found in the env files 128 | # seems done is always set to be False 129 | cum_reward += reward 130 | if done: break 131 | return self.obs_mask * obs, cum_reward, done, ifo 132 | 133 | def render(self): 134 | try: 135 | self.env.env.mujoco_render_frames = True 136 | self.env.env.mj_render() 137 | except: 138 | self.env.render() 139 | 140 | def set_seed(self, seed=123): 141 | try: 142 | self.env.seed(seed) 143 | except AttributeError: 144 | self.env._seed(seed) 145 | 146 | def get_obs(self): 147 | try: 148 | return self.obs_mask * self.env.env.get_obs() 149 | except: 150 | return self.obs_mask * self.env.env._get_obs() 151 | 152 | def get_env_infos(self): 153 | try: 154 | return self.env.env.get_env_infos() 155 | except: 156 | return {} 157 | 158 | # =========================================== 159 | # Trajectory optimization related 160 | # Envs should support these functions in case of trajopt 161 | 162 | def get_env_state(self): 163 | try: 164 | return self.env.env.get_env_state() 165 | except: 166 | raise NotImplementedError 167 | 168 | def set_env_state(self, state_dict): 169 | try: 170 | self.env.env.set_env_state(state_dict) 171 | except: 172 | raise NotImplementedError 173 | 174 | def real_env_step(self, bool_val): 175 | try: 176 | self.env.env.real_step = bool_val 177 | except: 178 | raise NotImplementedError 179 | 180 | def visualize_policy_on_demos(self, policy, demos, Visualize, horizon=1000): 181 | print("Testing the RL agent!") 182 | self.reset() 183 | init_state_dict = dict() 184 | demo_ori_error = np.zeros([horizon - 1, len(demos)]) 185 | success_list_RL = [] 186 | for k in tqdm(range(len(demos))): 187 | init_state_dict['qpos'] = demos[k]['init']['qpos'] 188 | init_state_dict['qvel'] = demos[k]['init']['qvel'] 189 | init_state_dict['door_body_pos'] = demos[k]['init']['door_body_pos'] # fixed for each piece of demo data 190 | self.set_env_state(init_state_dict) 191 | o = demos[k]['o'] 192 | if True: # RL agent is trained using RNN 193 | # generate the hidden states at time 0 194 | hidden_state = (np.zeros((1, 1, policy.model.rnn_cell.hidden_size)), np.zeros((1, 1, policy.model.rnn_cell.hidden_size))) 195 | for t in range(horizon - 1): # this loop is for system evolution, open loop control, no feedback 196 | if True: 197 | a = policy.get_action(o, hidden_state) 198 | hidden_state = a[1]['hidden_state'] 199 | else: 200 | a = policy.get_action(o) 201 | a =a[1]['evaluation'] 202 | o, *_ = self.step(a) 203 | if Visualize: 204 | self.render() 205 | # calculated state values using Koopman rollouts (in simulator) 206 | obj_obs = self.get_obs() 207 | # compute the errors 208 | demo_ori_error[t, k] = np.mean(np.abs(obj_obs[35:38])) 209 | current_hinge_pos = obj_obs[28:29] # door opening angle 210 | if current_hinge_pos > 1.35: 211 | success_list_RL.append(1) 212 | print("Success rate (RL) = %f" % (len(success_list_RL) / len(demos))) 213 | return demo_ori_error 214 | 215 | def generate_unseen_data(self, number_sample): 216 | samples = [] 217 | for ep in range(number_sample): 218 | o, desired_orien = self.reset(seed = None) 219 | episode_data = {} 220 | episode_data['init_state_dict'] = copy.deepcopy(self.get_env_state()) 221 | episode_data['pen_desired_orien'] = desired_orien # record the goal orientation angle 222 | episode_data['o'] = o 223 | handpos = o[:24] 224 | episode_data['handpos'] = handpos 225 | hand_vel = self.env.get_hand_vel() 226 | episode_data['handvel'] = hand_vel 227 | objpos = o[24:27] 228 | episode_data['objpos'] = objpos 229 | objvel = o[27:33] 230 | episode_data['objvel'] = objvel 231 | episode_data['desired_ori'] = o[36:39] 232 | objorient = o[33:36] 233 | episode_data['objorient'] = ori_transform(objorient, episode_data['desired_ori']) 234 | samples.append(episode_data) 235 | return samples 236 | 237 | def generate_unseen_data_relocate(self, number_sample): 238 | samples = [] 239 | for ep in range(number_sample): 240 | o, desired_pos = self.reset(seed = None) 241 | o_visual = self.env.get_full_obs_visualization() 242 | episode_data = {} 243 | episode_data['init'] = copy.deepcopy(self.get_env_state()) 244 | episode_data['desired_pos'] = desired_pos 245 | episode_data['o'] = o 246 | handpos = o[:30] 247 | episode_data['handpos'] = handpos 248 | hand_vel = self.env.get_hand_vel()[:30] 249 | episode_data['handvel'] = hand_vel 250 | objpos = o[39:42] 251 | episode_data['objpos'] = objpos - episode_data['desired_pos'] 252 | episode_data['objorient'] = o_visual[33:36] 253 | episode_data['objvel'] = self.env.get_hand_vel()[30:] 254 | samples.append(episode_data) 255 | return samples 256 | 257 | def generate_unseen_data_hammer(self, number_sample): 258 | samples = [] 259 | for ep in range(number_sample): 260 | o, _ = self.reset(seed = None) 261 | hand_vel = self.env.get_hand_vel() 262 | episode_data = {} 263 | episode_data['init'] = copy.deepcopy(self.get_env_state()) 264 | episode_data['o'] = o 265 | handpos = o[:26] 266 | episode_data['handpos'] = handpos 267 | episode_data['handvel'] = hand_vel[:26] 268 | objpos = o[49:52] + o[42:45] 269 | episode_data['objpos'] = objpos # current tool position 270 | episode_data['objorient'] = o[39:42] 271 | episode_data['objvel'] = o[27:33] 272 | episode_data['nail_goal'] = o[46:49] 273 | samples.append(episode_data) 274 | return samples 275 | 276 | def generate_unseen_data_door(self, number_sample): 277 | samples = [] 278 | for ep in range(number_sample): 279 | o, desired_pos = self.reset(seed = ep) 280 | obs_visual = self.env.get_full_obs_visualization() 281 | hand_vel = self.env.get_hand_vel() 282 | episode_data = {} 283 | episode_data['init'] = copy.deepcopy(self.get_env_state()) 284 | episode_data['o'] = o 285 | handpos = obs_visual[:28] 286 | episode_data['handpos'] = handpos 287 | episode_data['handvel'] = hand_vel[:28] 288 | objpos = o[32:35] 289 | episode_data['objpos'] = objpos # current tool position 290 | episode_data['objvel'] = obs_visual[58:59] 291 | episode_data['handle_init'] = episode_data['init']['door_body_pos'] 292 | samples.append(episode_data) 293 | return samples -------------------------------------------------------------------------------- /Door/utils/quatmath.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | # For testing whether a number is close to zero 3 | _FLOAT_EPS = np.finfo(np.float64).eps 4 | _EPS4 = _FLOAT_EPS * 4.0 5 | 6 | 7 | def mulQuat(qa, qb): 8 | res = np.zeros(4) 9 | res[0] = qa[0]*qb[0] - qa[1]*qb[1] - qa[2]*qb[2] - qa[3]*qb[3] 10 | res[1] = qa[0]*qb[1] + qa[1]*qb[0] + qa[2]*qb[3] - qa[3]*qb[2] 11 | res[2] = qa[0]*qb[2] - qa[1]*qb[3] + qa[2]*qb[0] + qa[3]*qb[1] 12 | res[3] = qa[0]*qb[3] + qa[1]*qb[2] - qa[2]*qb[1] + qa[3]*qb[0] 13 | return res 14 | 15 | def negQuat(quat): 16 | return np.array([quat[0], -quat[1], -quat[2], -quat[3]]) 17 | 18 | def quat2Vel(quat, dt=1): 19 | axis = quat[1:].copy() 20 | sin_a_2 = np.sqrt(np.sum(axis**2)) 21 | axis = axis/(sin_a_2+1e-8) 22 | speed = 2*np.arctan2(sin_a_2, quat[0])/dt 23 | return speed, axis 24 | 25 | def quatDiff2Vel(quat1, quat2, dt): 26 | neg = negQuat(quat1) 27 | diff = mulQuat(quat2, neg) 28 | return quat2Vel(diff, dt) 29 | 30 | 31 | def axis_angle2quat(axis, angle): 32 | c = np.cos(angle/2) 33 | s = np.sin(angle/2) 34 | return np.array([c, s*axis[0], s*axis[1], s*axis[2]]) 35 | 36 | def euler2mat(euler): 37 | """ Convert Euler Angles to Rotation Matrix. See rotation.py for notes """ 38 | euler = np.asarray(euler, dtype=np.float64) 39 | assert euler.shape[-1] == 3, "Invalid shaped euler {}".format(euler) 40 | 41 | ai, aj, ak = -euler[..., 2], -euler[..., 1], -euler[..., 0] 42 | si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak) 43 | ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak) 44 | cc, cs = ci * ck, ci * sk 45 | sc, ss = si * ck, si * sk 46 | 47 | mat = np.empty(euler.shape[:-1] + (3, 3), dtype=np.float64) 48 | mat[..., 2, 2] = cj * ck 49 | mat[..., 2, 1] = sj * sc - cs 50 | mat[..., 2, 0] = sj * cc + ss 51 | mat[..., 1, 2] = cj * sk 52 | mat[..., 1, 1] = sj * ss + cc 53 | mat[..., 1, 0] = sj * cs - sc 54 | mat[..., 0, 2] = -sj 55 | mat[..., 0, 1] = cj * si 56 | mat[..., 0, 0] = cj * ci 57 | return mat 58 | 59 | 60 | def euler2quat(euler): 61 | """ Convert Euler Angles to Quaternions. See rotation.py for notes """ 62 | euler = np.asarray(euler, dtype=np.float64) 63 | assert euler.shape[-1] == 3, "Invalid shape euler {}".format(euler) 64 | 65 | ai, aj, ak = euler[..., 2] / 2, -euler[..., 1] / 2, euler[..., 0] / 2 66 | si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak) 67 | ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak) 68 | cc, cs = ci * ck, ci * sk 69 | sc, ss = si * ck, si * sk 70 | 71 | quat = np.empty(euler.shape[:-1] + (4,), dtype=np.float64) 72 | quat[..., 0] = cj * cc + sj * ss 73 | quat[..., 3] = cj * sc - sj * cs 74 | quat[..., 2] = -(cj * ss + sj * cc) 75 | quat[..., 1] = cj * cs - sj * sc 76 | return quat 77 | 78 | 79 | def mat2euler(mat): 80 | """ Convert Rotation Matrix to Euler Angles. See rotation.py for notes """ 81 | mat = np.asarray(mat, dtype=np.float64) 82 | assert mat.shape[-2:] == (3, 3), "Invalid shape matrix {}".format(mat) 83 | 84 | cy = np.sqrt(mat[..., 2, 2] * mat[..., 2, 2] + mat[..., 1, 2] * mat[..., 1, 2]) 85 | condition = cy > _EPS4 86 | euler = np.empty(mat.shape[:-1], dtype=np.float64) 87 | euler[..., 2] = np.where(condition, 88 | -np.arctan2(mat[..., 0, 1], mat[..., 0, 0]), 89 | -np.arctan2(-mat[..., 1, 0], mat[..., 1, 1])) 90 | euler[..., 1] = np.where(condition, 91 | -np.arctan2(-mat[..., 0, 2], cy), 92 | -np.arctan2(-mat[..., 0, 2], cy)) 93 | euler[..., 0] = np.where(condition, 94 | -np.arctan2(mat[..., 1, 2], mat[..., 2, 2]), 95 | 0.0) 96 | return euler 97 | 98 | 99 | def mat2quat(mat): 100 | """ Convert Rotation Matrix to Quaternion. See rotation.py for notes """ 101 | mat = np.asarray(mat, dtype=np.float64) 102 | assert mat.shape[-2:] == (3, 3), "Invalid shape matrix {}".format(mat) 103 | 104 | Qxx, Qyx, Qzx = mat[..., 0, 0], mat[..., 0, 1], mat[..., 0, 2] 105 | Qxy, Qyy, Qzy = mat[..., 1, 0], mat[..., 1, 1], mat[..., 1, 2] 106 | Qxz, Qyz, Qzz = mat[..., 2, 0], mat[..., 2, 1], mat[..., 2, 2] 107 | # Fill only lower half of symmetric matrix 108 | K = np.zeros(mat.shape[:-2] + (4, 4), dtype=np.float64) 109 | K[..., 0, 0] = Qxx - Qyy - Qzz 110 | K[..., 1, 0] = Qyx + Qxy 111 | K[..., 1, 1] = Qyy - Qxx - Qzz 112 | K[..., 2, 0] = Qzx + Qxz 113 | K[..., 2, 1] = Qzy + Qyz 114 | K[..., 2, 2] = Qzz - Qxx - Qyy 115 | K[..., 3, 0] = Qyz - Qzy 116 | K[..., 3, 1] = Qzx - Qxz 117 | K[..., 3, 2] = Qxy - Qyx 118 | K[..., 3, 3] = Qxx + Qyy + Qzz 119 | K /= 3.0 120 | # TODO: vectorize this -- probably could be made faster 121 | q = np.empty(K.shape[:-2] + (4,)) 122 | it = np.nditer(q[..., 0], flags=['multi_index']) 123 | while not it.finished: 124 | # Use Hermitian eigenvectors, values for speed 125 | vals, vecs = np.linalg.eigh(K[it.multi_index]) 126 | # Select largest eigenvector, reorder to w,x,y,z quaternion 127 | q[it.multi_index] = vecs[[3, 0, 1, 2], np.argmax(vals)] 128 | # Prefer quaternion with positive w 129 | # (q * -1 corresponds to same rotation as q) 130 | if q[it.multi_index][0] < 0: 131 | q[it.multi_index] *= -1 132 | it.iternext() 133 | return q 134 | 135 | 136 | def quat2euler(quat): 137 | """ Convert Quaternion to Euler Angles. See rotation.py for notes """ 138 | return mat2euler(quat2mat(quat)) 139 | 140 | 141 | def quat2mat(quat): 142 | """ Convert Quaternion to Euler Angles. See rotation.py for notes """ 143 | quat = np.asarray(quat, dtype=np.float64) 144 | assert quat.shape[-1] == 4, "Invalid shape quat {}".format(quat) 145 | 146 | w, x, y, z = quat[..., 0], quat[..., 1], quat[..., 2], quat[..., 3] 147 | Nq = np.sum(quat * quat, axis=-1) 148 | s = 2.0 / Nq 149 | X, Y, Z = x * s, y * s, z * s 150 | wX, wY, wZ = w * X, w * Y, w * Z 151 | xX, xY, xZ = x * X, x * Y, x * Z 152 | yY, yZ, zZ = y * Y, y * Z, z * Z 153 | 154 | mat = np.empty(quat.shape[:-1] + (3, 3), dtype=np.float64) 155 | mat[..., 0, 0] = 1.0 - (yY + zZ) 156 | mat[..., 0, 1] = xY - wZ 157 | mat[..., 0, 2] = xZ + wY 158 | mat[..., 1, 0] = xY + wZ 159 | mat[..., 1, 1] = 1.0 - (xX + zZ) 160 | mat[..., 1, 2] = yZ - wX 161 | mat[..., 2, 0] = xZ - wY 162 | mat[..., 2, 1] = yZ + wX 163 | mat[..., 2, 2] = 1.0 - (xX + yY) 164 | return np.where((Nq > _FLOAT_EPS)[..., np.newaxis, np.newaxis], mat, np.eye(3)) -------------------------------------------------------------------------------- /Door/utils/rnn_network.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from torch.autograd import Variable 5 | 6 | class RNNNetwork(nn.Module): 7 | def __init__(self, rnn_cell, 8 | m, 9 | n): 10 | """ 11 | :param rnn_cell: RNN (NCP) module 12 | """ 13 | # nn.Module that unfolds a RNN cell into a sequence 14 | super(RNNNetwork, self).__init__() 15 | self.rnn_cell = rnn_cell 16 | self.obs_dim = m 17 | self.act_dim = n 18 | # follow by a fc_layer 19 | self.fc_input_layer = nn.Linear(in_features=self.obs_dim, out_features=rnn_cell.input_size) # map observation_dim to RNN_input dim 20 | self.fc_output_layer = nn.Linear(in_features=rnn_cell.hidden_size, out_features=self.act_dim) 21 | 22 | def forward(self, x): # this is used for training 23 | device = x.device 24 | batch_size = x.size(0) 25 | # hidden states were set to be zeros at time 0 26 | hidden_state = ( # h_{0} and c_{0} 27 | torch.zeros((self.rnn_cell.num_layers, batch_size, self.rnn_cell.hidden_size), device=device), 28 | torch.zeros((self.rnn_cell.num_layers, batch_size, self.rnn_cell.hidden_size), device=device) 29 | ) 30 | x = self.fc_input_layer(x.float()) # (batch_size, seq_len, self.rnn_cell.input_size) 31 | outputs, hidden_state = self.rnn_cell(x, hidden_state) 32 | # output.shape = (batch_size, seq_len, self.rnn_cell.hidden_size) 33 | # output -> hidden_state at each time step (the top layer if multiple layers), and we use this as the output data 34 | # hidden_state = (hidden_state[0] = h_{T} and hidden_state[1] = c_{T}) at last time step T, if there are multiple layers, output are the h_{t} for the top layers 35 | outputs = self.fc_output_layer(outputs) #(batch_size, seq_len, act_dim) 36 | return outputs 37 | 38 | def predict(self, observation, hidden_state): 39 | # for execution, so batch_size is always 1. Not for training 40 | observation = self.fc_input_layer(observation) 41 | output, hidden_state = self.rnn_cell(observation.view(1, 1, -1), hidden_state) 42 | output = self.fc_output_layer(output) 43 | return output, hidden_state -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # On the Utility of Koopman Operator Theory in Learning Dexterous Manipulation Skills 2 | This is a repository containing the code for the paper "On the Utility of Koopman Operator Theory in Learning Dexterous Manipulation Skills", which has been accepted as an oral presentation at CoRL 2023. 3 | 4 | Project webpage: [KODex](https://sites.google.com/view/kodex-corl) 5 | 6 | Paper link: [On the Utility of Koopman Operator Theory in Learning Dexterous Manipulation Skills](https://arxiv.org/abs/2303.13446) 7 | ## Environment Setup 8 | 9 | Please refer to DAPG project to setup the Mujoco environment: [Learning Complex Dexterous Manipulation with Deep Reinforcement Learning and Demonstrations](https://github.com/aravindr93/hand_dapg). Note that we are not aware if our codes support the latest mj_envs envinronment, so we recomment install v1.0.0 of mj_envs, which is the one used before. Note, we have been aware that to install mj_envs might cause lots of trouble, so we have documented a guidance on the installation process. You could download it here: [install.txt](https://drive.google.com/file/d/1ryYK2K3GFxgVfyzPdN1UeGP-92GzHvQV/view?usp=sharing). 10 | 11 | 12 | ## KODex 13 | 14 | The training and testing codes for each task are generated seperately, and differences mainly lie on the state design and task success criteia. The first step is to install the NGF dependency: 15 | ``` 16 | $ conda activate mjrl-env 17 | $ pip install geometric-fabrics-torch/dist/geometric_fabrics_torch-0.0.0-py2.py3-none-any.whl --force 18 | ``` 19 | Please make sure that you switch to the conda environment where you installed DAPG dependencies. If you followed the instructions from DAPG project, it should be *mjrl-env*. In addtion, for Object Relocation and Tool Use tasks, the return of *get_obs()* functions under each env-py files need to be replaced with following: 20 | 21 | 1. In relocate_v0.py: `return np.concatenate([qp[:-6], palm_pos-obj_pos, palm_pos-target_pos, obj_pos-target_pos])` is replaced with `return np.concatenate([qp[:-6], palm_pos-obj_pos, palm_pos-target_pos, obj_pos-target_pos, obj_pos, palm_pos, target_pos])` 22 | 2. In hammer_v0.py: `return np.concatenate([qp[:-6], qv[-6:], palm_pos, obj_pos, obj_rot, target_pos, np.array([nail_impact])])` is replaced with `return np.concatenate([qp[:-6], qv[-6:], palm_pos, obj_pos, obj_rot, target_pos, np.array([nail_impact]), goal_pos, tool_pos - target_pos])` 23 | 24 | Additional, In KODex, we assume the initial object position is fixed. So, in relocate_v0.py, change the initial xy pos of the ball to be 0: 25 | 26 | 1. line85: `self.model.body_pos[self.obj_bid,0] = 0` 27 | 28 | 2. line86: `self.model.body_pos[self.obj_bid,1] = 0` 29 | 30 | Lastly, you have to add the following functions to each env-py file: 31 | 32 | 1. In hammer_v0.py: 33 | ```python 34 | def KoopmanVisualize(self, state_dict): 35 | qp = state_dict['qpos'] 36 | qv = state_dict['qvel'] 37 | board_pos = state_dict['board_pos'] 38 | self.set_state(qp, qv) 39 | self.model.body_pos[self.model.body_name2id('nail_board')] = board_pos 40 | self.sim.forward() 41 | 42 | def get_full_obs_visualization(self): 43 | qp = self.data.qpos.ravel() 44 | qv = self.data.qvel.ravel() 45 | return np.concatenate([qp, qv]) 46 | ``` 47 | 48 | 2. In door_v0.py: 49 | ```python 50 | def KoopmanVisualize(self, state_dict): 51 | # visualize the koopman trajectory 52 | qp = state_dict['qpos'] 53 | qv = state_dict['qvel'] 54 | self.set_state(qp, qv) 55 | self.model.body_pos[self.door_bid] = state_dict['door_body_pos'] 56 | self.sim.forward() 57 | 58 | def get_full_obs_visualization(self): 59 | qp = self.data.qpos.ravel() 60 | qv = self.data.qvel.ravel() 61 | return np.concatenate([qp, qv]) 62 | ``` 63 | 64 | 3. In relocate_v0.py: 65 | ```python 66 | def KoopmanVisualize(self, state_dict): 67 | qp = state_dict['qpos'] 68 | qv = state_dict['qvel'] 69 | self.set_state(qp, qv) 70 | obj_pos = state_dict['obj_pos'] 71 | target_pos = state_dict['target_pos'] 72 | self.model.body_pos[self.obj_bid] = obj_pos 73 | self.model.site_pos[self.target_obj_sid] = target_pos 74 | self.sim.forward() 75 | 76 | def get_full_obs_visualization(self): 77 | qp = self.data.qpos.ravel() 78 | qv = self.data.qvel.ravel() 79 | return np.concatenate([qp, qv]) 80 | ``` 81 | 82 | 4. In pen_v0.py: 83 | ```python 84 | def KoopmanVisualize(self, state_dict): 85 | qp = self.init_qpos.copy() 86 | qp = state_dict['qpos'] 87 | qv = self.init_qvel.copy() 88 | qv = state_dict['qvel'] 89 | self.set_state(qp, qv) 90 | desired_orien = state_dict['desired_orien'] 91 | self.model.body_quat[self.target_obj_bid] = euler2quat(desired_orien) 92 | self.sim.forward() 93 | 94 | def get_full_obs_visualization(self): 95 | qp = self.data.qpos.ravel() 96 | qv = self.data.qvel.ravel() 97 | return np.concatenate([qp, qv]) 98 | ``` 99 | As far as we know, after you make these changes, you should be able to run the following experiments. If you still get some unexpected errors, feel free to leave an issue or contact us via email! 100 | 101 | ### Door 102 | To visulize each trained policy on the test set 103 | 104 | **KODex**: 105 | ``` 106 | $ conda activate mjrl-env 107 | $ cd Door/ 108 | $ MJPL python3 Koopman_training.py --env_name door-v0 --demo_file ./Data/Testing.pickle --num_demo 0 --koopmanoption Drafted --velocity False --save_matrix True --matrix_file ./Results/KODex/koopmanMatrix.npy --control True --error_type demo --visualize True --unseen_test False --rl_policy ./Results/Expert_policy/best_policy.pickle 109 | ``` 110 | **NN**: 111 | ``` 112 | $ conda activate mjrl-env 113 | $ cd Door/ 114 | $ MJPL python3 BC_training.py --env_name door-v0 --demo_file ./Data/Testing.pickle --num_demo 0 --koopmanoption Drafted --velocity False --save_matrix True --matrix_file ./Results/NN/BC_agent.pt --control True --error_type demo --visualize True --unseen_test False --rl_policy ./Results/Expert_policy/best_policy.pickle 115 | ``` 116 | **LSTM**: 117 | ``` 118 | $ conda activate mjrl-env 119 | $ cd Door/ 120 | $ MJPL python3 LSTM_training.py --env_name door-v0 --demo_file ./Data/Testing.pickle --num_demo 0 --koopmanoption Drafted --velocity False --save_matrix True --matrix_file ./Results/LSTM/LSTM_agent.pt --control True --error_type demo --visualize True --unseen_test False --rl_policy ./Results/Expert_policy/best_policy.pickle 121 | ``` 122 | **NDP**: 123 | ``` 124 | $ conda activate mjrl-env 125 | $ cd Door/ 126 | $ MJPL python3 NDP_training.py --env_name door-v0 --demo_file ./Data/Testing.pickle --num_demo 0 --koopmanoption Drafted --velocity False --save_matrix True --matrix_file ./Results/NDP/NDP_agent.pt --control True --error_type demo --visualize True --unseen_test False --rl_policy ./Results/Expert_policy/best_policy.pickle 127 | ``` 128 | **NGF**: 129 | ``` 130 | $ conda activate mjrl-env 131 | $ cd Door/ 132 | $ MJPL python3 NGF_training.py --env_name door-v0 --demo_file Data/Testing.pickle --num_demo 0 --koopmanoption Drafted --velocity False --save_matrix True --matrix_file Results/NGF/ --control True --error_type demo --visualize True --unseen_test False --rl_policy Results/Expert_policy/best_policy.pickle --seed 1 --first_demo 200 133 | ``` 134 | 135 | To train a new policy using 200 demonstrations 136 | 137 | **KODex**: 138 | ``` 139 | $ conda activate mjrl-env 140 | $ cd Door/ 141 | $ MJPL python3 Koopman_training.py --env_name door-v0 --demo_file ./Data/Demonstration.pickle --num_demo 200 --koopmanoption Drafted --velocity False --save_matrix True --matrix_file None --control True --error_type demo --visualize True --unseen_test False --rl_policy ./Results/Expert_policy/best_policy.pickle --folder_name Results/New_policy/ 142 | ``` 143 | 144 | For other baselines, the ways to train new policies are simiar as KODex. Note that NGF is currently not available for training. 145 | 146 | ### Tool Use, Object Relocation, In-hand Reorientation 147 | For these tasks, we also provide with the demonstration data under `/Data` folder of each task module. Therefore, you can use the similar commands as above to visualze the trained KODex policy or train a new KODex policy. For example, to visualize the KODex policy for Relocation task: 148 | 149 | **Testing**: 150 | ``` 151 | $ conda activate mjrl-env 152 | $ cd Relocation/ 153 | $ MJPL python3 Koopman_training.py --env_name relocate-v0 --demo_file ./Data/Relocate_task.pickle --num_demo 0 --koopmanoption Drafted --velocity False --save_matrix True --matrix_file ./Results/KODex/koopmanMatrix.npy --control True --error_type demo --visualize True --unseen_test False --rl_policy ./Results/Expert_policy/best_policy.pickle 154 | ``` 155 | 156 | And to train a new KODex policy for Relocation task: 157 | 158 | **Training**: 159 | ``` 160 | $ conda activate mjrl-env 161 | $ cd Relocation/ 162 | $ MJPL python3 Koopman_training.py --env_name relocate-v0 --demo_file ./Data/Relocate_task.pickle --num_demo 200 --koopmanoption Drafted --velocity False --save_matrix True --matrix_file None --control True --error_type demo --visualize True --unseen_test False --rl_policy ./Results/Expert_policy/best_policy.pickle --folder_name Results/New_policy/ 163 | ``` 164 | **More Test instances**: 165 | 166 | For each task, we also provide with extra 20,000 test instances. You could download them via [this Link](https://drive.google.com/file/d/12heE7bgf0NvU0TAmhgtRCNCNrpzjLEp6/view?usp=sharing), and then specify the demo_file location when running the commands. 167 | ## Bibtex 168 | ``` 169 | @inproceedings{han2023KODex, 170 | title={On the Utility of Koopman Operator Theory in Learning Dexterous Manipulation Skills}, 171 | author={Han, Yunhai and Xie, Mandy and Zhao, Ye and Ravichandar, Harish}, 172 | booktitle={Proceedings of The 7th Conference on Robot Learning}, 173 | year={2023}, 174 | pages={106--126}, 175 | volume={229}, 176 | organization={PMLR} 177 | } 178 | ``` -------------------------------------------------------------------------------- /Relocation/Data/Relocate_task.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GT-STAR-Lab/KODex/b058159f59baeff37437672e49f5d73406d4a5a6/Relocation/Data/Relocate_task.pickle -------------------------------------------------------------------------------- /Relocation/Results/Controller/NN_controller_best.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GT-STAR-Lab/KODex/b058159f59baeff37437672e49f5d73406d4a5a6/Relocation/Results/Controller/NN_controller_best.pt -------------------------------------------------------------------------------- /Relocation/Results/Expert_policy/best_policy.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GT-STAR-Lab/KODex/b058159f59baeff37437672e49f5d73406d4a5a6/Relocation/Results/Expert_policy/best_policy.pickle -------------------------------------------------------------------------------- /Relocation/Results/KODex/koopmanMatrix.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GT-STAR-Lab/KODex/b058159f59baeff37437672e49f5d73406d4a5a6/Relocation/Results/KODex/koopmanMatrix.npy -------------------------------------------------------------------------------- /Relocation/utils/Controller.py: -------------------------------------------------------------------------------- 1 | """ 2 | Define the controllers 3 | """ 4 | import numpy as np 5 | 6 | class PID(): 7 | def __init__(self, Kp, Ki, Kd, setpoint = None): 8 | self.Kp, self.Ki, self.Kd = Kp, Ki, Kd 9 | self.setpoint = setpoint 10 | self._last_input = None 11 | self._last_error = None 12 | 13 | def __call__(self, input): # input: current hand joints 14 | ''' 15 | Call the PID controller with *input* and calculate and return a control output 16 | ''' 17 | error = self.setpoint - input 18 | d_input = input - (self._last_input if (self._last_input is not None) else input) 19 | self._proportional = self.Kp * error # P term 20 | # self._integral += self.Ki * error * dt 21 | # self._integral = _clamp(self._integral, self.output_limits) # Avoid integral windup 22 | self._derivative = -self.Kd * d_input #/ dt 23 | output = self._proportional + self._derivative 24 | self._last_output = output 25 | self._last_input = input 26 | self._last_error = error 27 | return output 28 | 29 | def set_goal(self, setpoint): # set target joint state 30 | self.setpoint = setpoint 31 | -------------------------------------------------------------------------------- /Relocation/utils/Observables.py: -------------------------------------------------------------------------------- 1 | """ 2 | Define the functions that are used for koopman lifting 3 | """ 4 | import numpy as np 5 | from utils.fc_network import FCNetwork 6 | from utils.gnn_networks import InteractionNetwork 7 | import copy 8 | import torch 9 | 10 | class DraftedObservable(object): 11 | def __init__(self, num_handStates, num_objStates): 12 | self.num_ori_handStates = num_handStates 13 | self.num_ori_objStates = num_objStates 14 | self.num_ori_states = num_handStates + num_objStates 15 | 16 | def z(self, handState, objStates): 17 | """ 18 | Inputs: hand states(pos, vel) & object states(pos, vel) 19 | Outputs: state in lifted space 20 | Note: velocity is optional 21 | """ 22 | obs = np.zeros(self.compute_observable(self.num_ori_handStates, self.num_ori_objStates)) 23 | index = 0 24 | for i in range(self.num_ori_handStates): 25 | obs[index] = handState[i] 26 | index += 1 27 | for i in range(self.num_ori_handStates): 28 | obs[index] = handState[i] ** 2 29 | index += 1 30 | for i in range(self.num_ori_objStates): 31 | obs[index] = objStates[i] 32 | index += 1 33 | for i in range(self.num_ori_objStates): 34 | obs[index] = objStates[i] ** 2 35 | index += 1 36 | for i in range(self.num_ori_objStates): 37 | for j in range(i + 1, self.num_ori_objStates): 38 | obs[index] = objStates[i] * objStates[j] 39 | index += 1 40 | for i in range(self.num_ori_handStates): 41 | for j in range(i + 1, self.num_ori_handStates): 42 | obs[index] = handState[i] * handState[j] 43 | index += 1 44 | # can add handState[i] ** 3 45 | for i in range(self.num_ori_handStates): 46 | obs[index] = handState[i] ** 3 47 | index += 1 48 | # for i in range(self.num_ori_objStates): 49 | # obs[index] = objStates[i] ** 3 50 | # index += 1 51 | # for i in range(self.num_ori_handStates): 52 | # for j in range(self.num_ori_handStates): 53 | # obs[index] = handState[i] ** 2 * handState[j] 54 | # index += 1 55 | for i in range(self.num_ori_objStates): 56 | for j in range(self.num_ori_objStates): 57 | obs[index] = objStates[i] ** 2 * objStates[j] 58 | index += 1 59 | return obs 60 | 61 | def compute_observable(self, num_hand, num_obj): 62 | return int(2 * (num_hand + num_obj) + (num_obj - 1) * num_obj / 2 + num_hand + num_obj ** 2 + (num_hand - 1) * num_hand / 2) 63 | 64 | class MLPObservable(object): 65 | def __init__(self, num_handStates, num_objStates, param): 66 | self.param = param 67 | self.num_ori_states = num_handStates + num_objStates 68 | self.encoder = [i for i in param['encoder']] 69 | self.encoder.insert(0, self.num_ori_states) 70 | self.decoder = [i for i in param['decoder']] 71 | self.decoder.append(self.num_ori_states) 72 | self.encoder_NN = FCNetwork(self.encoder, param['nonlinearity'], param['encoder_seed']) 73 | self.decoder_NN = FCNetwork(self.decoder, param['nonlinearity'], param['decoder_seed']) 74 | self.trainable_params = list(self.encoder_NN.parameters()) + list(self.decoder_NN.parameters()) 75 | 76 | def count_parameters(self): 77 | return sum(p.numel() for p in self.trainable_params if p.requires_grad) 78 | 79 | def z(self, input_state): 80 | """ 81 | Inputs: original states: hand states(pos, vel) & object states(pos, vel) 82 | Outputs: state in lifted space 83 | Note: velocity is optional 84 | """ 85 | z = self.encoder_NN(input_state) 86 | return z 87 | 88 | def z_inverse(self, liftedStates): 89 | """ 90 | Inputs: state in lifted space 91 | Outputs: original states: hand states(pos, vel) & object states(pos, vel) 92 | Note: velocity is optional 93 | """ 94 | x = self.decoder_NN(liftedStates) 95 | return x 96 | 97 | def train(self): # set the models in training mode 98 | self.encoder_NN.train() 99 | self.decoder_NN.train() 100 | 101 | def eval(self): # set the models in evaluation mode 102 | self.encoder_NN.eval() 103 | self.decoder_NN.eval() 104 | 105 | def loss(self, batch_data, encoder_lambda, pred_lambda): # the loss consists of three part: 106 | # 1). Intrinsic coordinates transformation loss, 2). Linear dynamics loss, 3). some others 107 | out = {} 108 | lifted_data = self.z(batch_data) 109 | batch_data_inverse = self.z_inverse(lifted_data) 110 | self.encoder_loss_criterion = torch.nn.MSELoss() 111 | self.encoder_loss = encoder_lambda * self.encoder_loss_criterion(batch_data_inverse, batch_data.detach().to(torch.float32)) # the auto-encoder loss 112 | out['Encoder_loss_torch'] = self.encoder_loss.item() 113 | batch_data_inverse_numpy = batch_data_inverse.detach().numpy() 114 | batch_data_numpy = batch_data.detach().numpy() 115 | Encoder_loss = np.abs(batch_data_numpy - batch_data_inverse_numpy) 116 | out['Encoder_loss_numpy'] = Encoder_loss 117 | self.pred_loss_criterion = torch.nn.MSELoss() 118 | for i in range(lifted_data.shape[0]): 119 | if i == 0: 120 | A = torch.outer(lifted_data[i,1,:], lifted_data[i,0,:]) / lifted_data.shape[0] 121 | G = torch.outer(lifted_data[i,0,:], lifted_data[i,0,:]) / lifted_data.shape[0] 122 | else: 123 | A += torch.outer(lifted_data[i,1,:], lifted_data[i,0,:]) / lifted_data.shape[0] 124 | G += torch.outer(lifted_data[i,0,:], lifted_data[i,0,:]) / lifted_data.shape[0] 125 | koopman_operator = torch.matmul(A, torch.linalg.pinv(G)) # using the batch data, compute the koopman matrix 126 | Linear_evol = torch.matmul(lifted_data[:,0,:], koopman_operator) 127 | self.pred_loss = pred_lambda * self.pred_loss_criterion(Linear_evol, lifted_data[:,1,:]) # the prediction loss 128 | out['Pred_loss_torch'] = self.pred_loss.item() 129 | Linear_evol_numpy = Linear_evol.detach().numpy() 130 | lifted_data_numpy = lifted_data[:,1,:].detach().numpy() 131 | pred_loss = np.abs(lifted_data_numpy - Linear_evol_numpy) 132 | out['Pred_loss_numpy'] = pred_loss 133 | loss = self.encoder_loss + self.pred_loss # loss is the sum of each component -> this one is used for pytorch training 134 | return loss, out, koopman_operator 135 | 136 | def load(self, encoder_path, decoder_path): 137 | self.encoder_NN.load_state_dict(torch.load(encoder_path)) 138 | self.decoder_NN.load_state_dict(torch.load(decoder_path)) 139 | 140 | class GNNObservable(object): 141 | def __init__(self, num_objStates, param): 142 | self.gnn_encoder = InteractionNetwork(num_objStates, param['relation_domain'], param['relation_encoder'], param['object_encoder'], param['gnn_nonlinearity'], param['gnn_encoder_seed']) 143 | tmp = param['object_decoder'] 144 | tmp.append(num_objStates) 145 | self.gnn_decoder = InteractionNetwork(param['object_encoder'][-1], param['relation_domain'], param['relation_decoder'], tmp, param['gnn_nonlinearity'], param['gnn_decoder_seed']) 146 | self.trainable_params = list(self.gnn_encoder.parameters()) + list(self.gnn_decoder.parameters()) 147 | self._rollout = False 148 | 149 | def count_parameters(self): 150 | return sum(p.numel() for p in self.trainable_params if p.requires_grad) 151 | 152 | def z(self, input_state, relations, flag): 153 | """ 154 | Inputs: original states: hand states(pos, vel) & object states(pos, vel); defined relations & relation types 155 | Outputs: state in lifted space 156 | Note: velocity is optional 157 | """ 158 | z = self.gnn_encoder(input_state, relations['sender_relations'], relations['receiver_relations'], relations['relation_info'], flag) 159 | return z 160 | 161 | def z_inverse(self, liftedStates, relations, flag): 162 | """ 163 | Inputs: state in lifted space; defined relations & relation types 164 | Outputs: original states: hand states(pos, vel) & object states(pos, vel) 165 | Note: velocity is optional 166 | """ 167 | x = self.gnn_decoder(liftedStates, relations['sender_relations'], relations['receiver_relations'], relations['relation_info'], flag) 168 | return x 169 | 170 | def train(self): # set the models in training mode 171 | self.gnn_encoder.train() 172 | self.gnn_decoder.train() 173 | 174 | def eval(self): # set the models in evaluation mode 175 | self.gnn_encoder.eval() 176 | self.gnn_decoder.eval() 177 | 178 | def set_status(self, flag): 179 | if flag == 0: 180 | self._rollout = True 181 | 182 | def loss(self, batch_data, relations, encoder_lambda, pred_lambda): # the loss consists of three part: 183 | batch_size = batch_data.shape[0] 184 | out = {} 185 | lifted_data = self.z(batch_data, relations, self._rollout) 186 | # .view(effect_receivers_t_1.shape[0], -1) 187 | batch_data_inverse = self.z_inverse(lifted_data, relations, self._rollout) 188 | self.encoder_loss_criterion = torch.nn.MSELoss() 189 | self.encoder_loss = encoder_lambda * self.encoder_loss_criterion(batch_data_inverse, batch_data.detach()) # the auto-encoder loss 190 | out['Encoder_loss_torch'] = self.encoder_loss.item() 191 | batch_data_inverse_numpy = batch_data_inverse.detach().numpy() 192 | batch_data_numpy = batch_data.detach().numpy() 193 | Encoder_loss = np.abs(batch_data_numpy - batch_data_inverse_numpy) 194 | out['Encoder_loss_numpy'] = Encoder_loss 195 | self.pred_loss_criterion = torch.nn.MSELoss() 196 | lifted_data = lifted_data.view(batch_size, 2, -1) # Squeeze along the last 2 axis 197 | for i in range(lifted_data.shape[0]): 198 | if i == 0: 199 | A = torch.outer(lifted_data[i,1,:], lifted_data[i,0,:]) / lifted_data.shape[0] 200 | G = torch.outer(lifted_data[i,0,:], lifted_data[i,0,:]) / lifted_data.shape[0] 201 | else: 202 | A += torch.outer(lifted_data[i,1,:], lifted_data[i,0,:]) / lifted_data.shape[0] 203 | G += torch.outer(lifted_data[i,0,:], lifted_data[i,0,:]) / lifted_data.shape[0] 204 | koopman_operator = torch.matmul(A, torch.linalg.pinv(G)) # using the batch data, compute the koopman matrix 205 | Linear_evol = torch.matmul(lifted_data[:,0,:], koopman_operator) 206 | self.pred_loss = pred_lambda * self.pred_loss_criterion(Linear_evol, lifted_data[:,1,:]) # the prediction loss 207 | out['Pred_loss_torch'] = self.pred_loss.item() 208 | Linear_evol_numpy = Linear_evol.detach().numpy() 209 | lifted_data_numpy = lifted_data[:,1,:].detach().numpy() 210 | pred_loss = np.abs(lifted_data_numpy - Linear_evol_numpy) 211 | out['Pred_loss_numpy'] = pred_loss 212 | loss = self.encoder_loss + self.pred_loss # loss is the sum of each component -> this one is used for pytorch training 213 | return loss, out, koopman_operator 214 | 215 | def load(self, encoder_path, decoder_path): 216 | self.gnn_encoder.load_state_dict(torch.load(encoder_path)) 217 | self.gnn_decoder.load_state_dict(torch.load(decoder_path)) 218 | 219 | def Create_relations(self, batch_size, gnn_num_obj, gnn_num_relation, param, eval_length): 220 | receiver_relations = np.zeros((batch_size, gnn_num_obj, gnn_num_relation)) 221 | sender_relations = np.zeros((batch_size, gnn_num_obj, gnn_num_relation)) 222 | relation_info = np.zeros((batch_size, gnn_num_relation, param['relation_domain'])) 223 | receiver_relations[:, 0, 0:5] = 1 224 | cnt = 0 225 | for i in [1,2,3,4,5]: 226 | sender_relations[:, 0, (cnt * gnn_num_obj) + i] = 1 227 | cnt += 1 228 | receiver_relations[:, 1, 6] = 1 229 | receiver_relations[:, 1, 11] = 1 230 | sender_relations[:, 1, 0] = 1 231 | sender_relations[:, 1, 41] = 1 232 | receiver_relations[:, 2, 12] = 1 233 | receiver_relations[:, 2, 17] = 1 234 | sender_relations[:, 2, 0] = 1 235 | sender_relations[:, 2, 41] = 1 236 | receiver_relations[:, 3, 18] = 1 237 | receiver_relations[:, 3, 23] = 1 238 | sender_relations[:, 3, 0] = 1 239 | sender_relations[:, 3, 41] = 1 240 | receiver_relations[:, 4, 24] = 1 241 | receiver_relations[:, 4, 29] = 1 242 | sender_relations[:, 4, 0] = 1 243 | sender_relations[:, 4, 41] = 1 244 | receiver_relations[:, 5, 30] = 1 245 | receiver_relations[:, 5, 35] = 1 246 | sender_relations[:, 5, 0] = 1 247 | sender_relations[:, 5, 41] = 1 248 | receiver_relations[:, 6, 37:42] = 1 249 | cnt = 0 250 | for i in [1,2,3,4,5]: 251 | sender_relations[:, 6, (cnt * gnn_num_obj) + i] = 1 252 | cnt += 1 253 | relation_info[:, 0:5, 0] = 1 254 | relation_info[:, 6, 0] = 1 255 | relation_info[:, 11, 1] = 1 256 | relation_info[:, 12, 0] = 1 257 | relation_info[:, 17, 1] = 1 258 | relation_info[:, 18, 0] = 1 259 | relation_info[:, 23, 1] = 1 260 | relation_info[:, 24, 0] = 1 261 | relation_info[:, 29, 1] = 1 262 | relation_info[:, 30, 0] = 1 263 | relation_info[:, 35, 1] = 1 264 | relation_info[:, -5:-1, 1] = 1 265 | relation_info[:, -1, 1] = 1 266 | # Todo: check if this part is implemented correctly 267 | relations = {} 268 | relations['receiver_relations'] = torch.from_numpy(receiver_relations).to(torch.float32) 269 | relations['sender_relations'] = torch.from_numpy(sender_relations).to(torch.float32) 270 | relations['relation_info'] = torch.from_numpy(relation_info).to(torch.float32) 271 | relations_eval = {} 272 | receiver_relations_eval = relations['receiver_relations'][0][None,:].repeat(eval_length, 1, 1) 273 | sender_relations_eval = relations['sender_relations'][0][None,:].repeat(eval_length, 1, 1) 274 | relation_info_eval = relations['relation_info'][0][None,:].repeat(eval_length, 1, 1) 275 | relations_eval['receiver_relations'] = receiver_relations_eval 276 | relations_eval['sender_relations'] = sender_relations_eval 277 | relations_eval['relation_info'] = relation_info_eval 278 | return relations, relations_eval 279 | 280 | def Create_states(self): 281 | hand_dict = {} 282 | hand_dict['palm'] = [0, 3] # or [0, 2] 283 | hand_dict['forfinger'] = [3, 7] 284 | hand_dict['middlefinger'] = [7, 11] 285 | hand_dict['ringfinger'] = [11, 15] 286 | hand_dict['littlefinger'] = [15, 19] 287 | hand_dict['thumb'] = [19, 24] 288 | gnn_num_obj = 7 # palm, five fingers, manipulated object 289 | gnn_num_relation = gnn_num_obj * (gnn_num_obj - 1) 290 | return hand_dict, gnn_num_obj, gnn_num_relation -------------------------------------------------------------------------------- /Relocation/utils/coord_trans.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.linalg import inv 3 | 4 | def ori_transform(current_ori, desired_ori): 5 | # build a new coordinate frame that define the desired_ori as [1, 0, 0] - use desired_ori to build a new coordinate basis 6 | # and then convert current_ori in that new coordinate frame 7 | x = desired_ori 8 | y, z = compute_orthonormal_vector(x) 9 | origin_x = np.array([1, 0, 0]) 10 | origin_y = np.array([0, 1, 0]) 11 | origin_z = np.array([0, 0, 1]) 12 | Q = np.zeros([3,3]) 13 | Q[0,0] = np.dot(x, origin_x) 14 | Q[0,1] = np.dot(x, origin_y) 15 | Q[0,2] = np.dot(x, origin_z) 16 | Q[1,0] = np.dot(y, origin_x) 17 | Q[1,1] = np.dot(y, origin_y) 18 | Q[1,2] = np.dot(y, origin_z) 19 | Q[2,0] = np.dot(z, origin_x) 20 | Q[2,1] = np.dot(z, origin_y) 21 | Q[2,2] = np.dot(z, origin_z) 22 | return np.dot(Q, current_ori) 23 | 24 | def ori_transform_inverse(current_ori, desired_ori): 25 | x = desired_ori 26 | y, z = compute_orthonormal_vector(x) 27 | origin_x = np.array([1, 0, 0]) 28 | origin_y = np.array([0, 1, 0]) 29 | origin_z = np.array([0, 0, 1]) 30 | Q = np.zeros([3,3]) 31 | Q[0,0] = np.dot(x, origin_x) 32 | Q[0,1] = np.dot(x, origin_y) 33 | Q[0,2] = np.dot(x, origin_z) 34 | Q[1,0] = np.dot(y, origin_x) 35 | Q[1,1] = np.dot(y, origin_y) 36 | Q[1,2] = np.dot(y, origin_z) 37 | Q[2,0] = np.dot(z, origin_x) 38 | Q[2,1] = np.dot(z, origin_y) 39 | Q[2,2] = np.dot(z, origin_z) 40 | Q = Q.T 41 | return np.dot(Q, current_ori) 42 | 43 | def compute_orthonormal_vector(desired_ori): 44 | # compute two orthonormal vectors wrt desired_ori(new x axis) as the new y, z axis 45 | # for the y axis, we will have three constraints: 1).x1v1s + x2v2 + x3v3=0, 2).v1^2+v2^2+v^3=1, 3). v1=1*v2(manually specify) 46 | # the z axis is uniquely defined by the x and y axis 47 | x1 = desired_ori[0] 48 | x2 = desired_ori[1] 49 | x3 = desired_ori[2] 50 | v1 = v2 = np.sqrt(1 / (2 + (x1 + x2) ** 2 / x3 ** 2)) 51 | v3 = -(x1+x2)/x3*v2 52 | y = np.array([v1, v2, v3]) 53 | z1 = x2*v3 - x3*v2 54 | z2 = x3*v1 - x1*v3 55 | z3 = x1*v2 - x2*v1 56 | z = np.array([z1, z2, z3]) / (z1**2+z2**2+z3**2) 57 | return y, z 58 | 59 | -------------------------------------------------------------------------------- /Relocation/utils/dmp_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | import numpy as np 4 | 5 | 6 | class DMPIntegrator(): 7 | def __init__(self, rbf='gaussian', only_g=False, az=False): 8 | a = 1 9 | self.rbf = rbf 10 | self.only_g = only_g 11 | self.az = az 12 | 13 | def forward(self, inputs, parameters, param_gradients, scaling, y0, dy0, goal=None, w=None, vel=False): 14 | # param_gradient is not explicitly unused. 15 | # goal is None 16 | dim = int(parameters[0].item()) # 2 17 | k = dim 18 | N = int(parameters[1].item()) 19 | division = k*(N + 2) 20 | inputs_np = inputs # NN output: Size(64) 21 | if goal is not None: 22 | goal = goal 23 | w = w 24 | else: 25 | w = inputs_np[:, dim:dim*(N + 1)] # dim:dim*(N + 1) = 2:62 / w.shape = (batch_size, 60) 26 | goal = inputs_np[:, :dim] 27 | 28 | if self.az: #False 29 | alpha_z = inputs[:, -1] 30 | t = y0.shape[0] // inputs.shape[0] 31 | alpha_z = alpha_z.repeat(t, 1).transpose(0, 1).reshape(inputs.shape[0], -1) 32 | alpha_z = alpha_z.contiguous().view(alpha_z.shape[0] * alpha_z.shape[1], ) 33 | 34 | w = w.reshape(-1, N) # (2*batch_size, N) y0.shape -> [2*batch_size] 35 | 36 | if self.only_g: #false, if true, NN only outputs goal 37 | w = torch.zeros_like(w) 38 | if vel: # false 39 | dy0 = torch.zeros_like(y0) 40 | # dy0 = torch.zeros_like(y0) + 0.01 # set to be small values 0.01 41 | goal = goal.contiguous().view(goal.shape[0]*goal.shape[1], ) # [2*batch_size] y0.shape -> [2*batch_size] 42 | if self.az: 43 | X, dX, ddX = integrate(parameters, w, y0, dy0, goal, 1, rbf=self.rbf, az=True, alpha_z=alpha_z) 44 | else: 45 | X, dX, ddX = integrate(parameters, w, y0, dy0, goal, 1, rbf=self.rbf) # X -> whole trajectory 46 | # X and inputs.new(X) have the same values 47 | return inputs.new(X), inputs.new(dX), inputs.new(ddX) 48 | 49 | 50 | def forward_not_int(self, inputs, parameters, param_gradients, scaling, y0, dy0, goal=None, w=None, vel=False): 51 | dim = int(parameters[0].item()) 52 | k = dim 53 | N = int(parameters[1].item()) 54 | division = k*(N + 2) 55 | inputs_np = inputs 56 | if goal is not None: 57 | goal = goal 58 | w = w 59 | else: 60 | w = inputs_np[:, dim:dim*(N + 1)] 61 | goal = inputs_np[:, :dim] 62 | w = w.reshape(-1, N) 63 | if vel: 64 | dy0 = torch.zeros_like(y0) 65 | goal = goal.contiguous().view(goal.shape[0]*goal.shape[1], ) 66 | return parameters, w, y0, dy0, goal, 1 67 | 68 | def first_step(self, w, parameters, scaling, y0, dy0, l, tau=1): 69 | data = parameters 70 | y = y0 71 | self.y0 = y0 72 | z = dy0 * tau 73 | self.x = 1 74 | self.N = int(data[1].item()) 75 | self.dt = data[3].item() 76 | self.a_x = data[4].item() 77 | self.a_z = data[5].item() 78 | self.b_z = self.a_z / 4 79 | self.h = data[(6+self.N):(6+self.N*2)] 80 | self.c = data[6:(6+self.N)] 81 | self.num_steps = int(data[2].item())-1 82 | self.i = 0 83 | self.w = w.reshape(-1, self.N) 84 | self.tau = tau 85 | self.l = l 86 | 87 | def step(self, g, y, dy): 88 | g = g.reshape(-1, 1)[:, 0] 89 | z = dy*self.tau 90 | dt = self.dt 91 | for _ in range(self.l): 92 | dx = (-self.a_x * self.x) / self.tau 93 | self.x = self.x + dx * dt 94 | psi = torch.exp(-self.h * torch.pow((self.x - self.c), 2)) 95 | fx = torch.mv(self.w, psi)*self.x*(g - self.y0) / torch.sum(psi) 96 | dz = self.a_z * (self.b_z * (g - y) - z) + fx 97 | dy = z 98 | dz = dz / self.tau 99 | dy = dy / self.tau 100 | y = y + dy * dt 101 | z = z + dz * dt 102 | self.i += 1 103 | return y, dy, dz 104 | 105 | 106 | def integrate(data, w, y0, dy0, goal, tau, rbf='gaussian', az=False, alpha_z=None): 107 | y = y0 # y0:input 108 | z = dy0 * tau # tau = 1, z = dy 109 | x = 1 # x_0 110 | # data[2] -> simu horizon 111 | if w.is_cuda: 112 | Y = torch.cuda.FloatTensor(w.shape[0], int(data[2].item())).fill_(0) 113 | dY = torch.cuda.FloatTensor(w.shape[0], int(data[2].item())).fill_(0) 114 | ddY = torch.cuda.FloatTensor(w.shape[0], int(data[2].item())).fill_(0) 115 | else: 116 | Y = torch.zeros((w.shape[0],int(data[2].item()))) 117 | dY = torch.zeros((w.shape[0],int(data[2].item()))) 118 | ddY = torch.zeros((w.shape[0],int(data[2].item()))) 119 | Y[:, 0] = y 120 | dY[:, 0] = dy0 121 | ddY[:, 0] = z 122 | N = int(data[1].item()) 123 | dt = data[3].item() 124 | a_x = data[4].item() # a_x in paper (eqn. (2)) 125 | a_z = data[5].item() # \alpha in paper (eqn. (1)) 126 | if az: 127 | a_z = alpha_z 128 | a_z = torch.clamp(a_z, 0.5, 30) 129 | b_z = a_z / 4 # \beta in paper (eqn. (1)) 130 | h = data[(6+N):(6+N*2)] # h size N(30) 131 | c = data[6:(6+N)] # c size N(30) 132 | # the first 6 params are for other functions 133 | for i in range(0, int(data[2].item())-1): # data[2] -> simu horizon 134 | dx = (-a_x * x) / tau 135 | x = x + dx * dt 136 | eps = torch.pow((x - c), 2) 137 | # rbf are for basis functions (radial basis functions) 138 | if rbf == 'gaussian': 139 | psi = torch.exp(-h * eps) # (eqn. (3)) 140 | if rbf == 'multiquadric': 141 | psi = torch.sqrt(1 + h * eps) 142 | if rbf == 'inverse_quadric': 143 | psi = 1/(1 + h*eps) 144 | if rbf == 'inverse_multiquadric': 145 | psi = 1/torch.sqrt(1 + h * eps) 146 | if rbf == 'linear': 147 | psi = h * eps 148 | # print(w.shape) torch.Size([200, 30]) 149 | # print(psi.shape) torch.Size([30]) 150 | # print(x) int 151 | # print(goal.shape) torch.Size([200]) 152 | # print(y0.shape) torch.Size([200]) 153 | fx = torch.mv(w, psi)*x*(goal-y0) / torch.sum(psi) # mv - matrix-vector product 154 | dz = a_z * (b_z * (goal - y) - z) + fx 155 | dy = z 156 | dz = dz / tau # tau = 1 157 | dy = dy / tau 158 | y = y + dy * dt 159 | z = z + dz * dt 160 | Y[:, i+1] = y 161 | dY[:, i+1] = dy 162 | ddY[:, i+1] = dz 163 | return Y, dY, ddY 164 | 165 | 166 | class DMPParameters(): 167 | def __init__(self, N, tau, dt, Dof, scale, T, a_z=25): 168 | # N = number of radial basis functions 169 | # tau = 1 170 | # dt = time duration for each simulation time 171 | # Dof = num of states 172 | 173 | self.a_z = a_z 174 | self.a_x = 1 175 | self.N = N 176 | c = np.exp(-self.a_x * np.linspace(0, 1, self.N)) # a_x(data[4]) in paper (eqn. (2)) / c is horizontal shifts of each basis function 177 | sigma2 = np.ones(self.N) * self.N**1.5 / c / self.a_x # h is the width of each basis function 178 | self.c = torch.from_numpy(c).float() 179 | self.sigma2 = torch.from_numpy(sigma2).float() 180 | self.tau = tau 181 | self.dt = dt 182 | self.time_steps = T 183 | self.y0 = [0] 184 | self.dy0 = np.zeros(Dof) 185 | self.Dof = Dof 186 | self.Y = torch.zeros((self.time_steps)) 187 | grad = torch.zeros((self.time_steps, self.N + 2)) 188 | 189 | self.data = {'time_steps':self.time_steps,'c':self.c,'sigma2':self.sigma2,'a_z':self.a_z,'a_x':self.a_x,'dt':self.dt,'Y':self.Y} 190 | dmp_data = torch.tensor([self.Dof,self.N,self.time_steps,self.dt,self.a_x,self.a_z]) 191 | data_tensor = torch.cat((dmp_data,self.c,self.sigma2),0) 192 | # len(data_tensor) = 6 + self.N + self.N 193 | 194 | data_tensor.dy0 = self.dy0 195 | data_tensor.tau = self.tau 196 | 197 | 198 | for i in range(0, self.N): 199 | weights = torch.zeros((1,self.N)) 200 | weights[0,i] = 1 201 | grad[:, i + 2 ], _, _= integrate(data_tensor, weights, 0, 0, 0, self.tau) 202 | weights = torch.zeros((1,self.N)) 203 | grad[:, 0], _, _ = integrate(data_tensor, weights, 1, 0, 0, self.tau) 204 | weights = torch.zeros((1,self.N)) 205 | grad[:, 1], _, _ = integrate(data_tensor, weights, 0, 0, 1, self.tau) 206 | 207 | ''' 208 | self.c = self.c.cuda() 209 | self.sigma2 = self.sigma2.cuda() 210 | self.grad = grad.cuda() 211 | self.point_grads = torch.zeros(54).cuda() 212 | ''' 213 | self.data_tensor = data_tensor 214 | self.grad_tensor = grad 215 | 216 | self.point_grads = torch.zeros(self.N*2 + 4) 217 | self.X = np.zeros((self.time_steps, self.Dof)) 218 | -------------------------------------------------------------------------------- /Relocation/utils/fc_network.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | 6 | class FCNetwork(nn.Module): 7 | def __init__(self, model_size = None, 8 | nonlinearity='tanh', # either 'tanh' or 'relu' 9 | seed = 100 10 | ): 11 | super(FCNetwork, self).__init__() 12 | 13 | self.obs_dim = model_size[0] 14 | self.act_dim = model_size[-1] 15 | self.layer_sizes = model_size 16 | 17 | # hidden layers 18 | torch.manual_seed(seed) 19 | np.random.seed(seed) 20 | self.fc_layers = nn.ModuleList([nn.Linear(self.layer_sizes[i], self.layer_sizes[i+1]) \ 21 | for i in range(len(self.layer_sizes) -1)]) # stack severeal layers together. 22 | # The weights are initialzied in default by: 23 | # stdv = 1. / math.sqrt(self.weight.size(1)) 24 | # self.weight.data.uniform_(-stdv, stdv) 25 | # if self.bias is not None: 26 | # self.bias.data.uniform_(-stdv, stdv) 27 | self.nonlinearity = torch.relu if nonlinearity == 'relu' else torch.tanh 28 | 29 | def forward(self, x): 30 | if x.is_cuda: 31 | out = x.to('cpu') 32 | else: 33 | out = x 34 | out = out.to(torch.float32) 35 | for i in range(len(self.fc_layers)-1): 36 | out = self.fc_layers[i](out) 37 | out = self.nonlinearity(out) 38 | out = self.fc_layers[-1](out) 39 | return out 40 | -------------------------------------------------------------------------------- /Relocation/utils/gaussian_rnn.py: -------------------------------------------------------------------------------- 1 | from utils.rnn_network import RNNNetwork 2 | import numpy as np 3 | import torch 4 | from torch.autograd import Variable 5 | # import kerasncp as kncp 6 | # from kerasncp.torch import LTCCell 7 | 8 | class RNN: 9 | def __init__(self, 10 | m, 11 | n, 12 | input_sizes=64, 13 | hidden_state = 64, 14 | LSTM_layer = 1, 15 | seed=None): 16 | """ 17 | :param m: NN_input size 18 | :param n: NN_output size 19 | :param hidden_sizes: network hidden layer sizes (currently 2 layers only) 20 | :param seed: random seed 21 | """ 22 | # Set seed 23 | # ------------------------ 24 | if seed is not None: 25 | torch.manual_seed(seed) 26 | np.random.seed(seed) 27 | 28 | # Policy network 29 | # ------------------------ 30 | # If batch_first is True, then the input and output tensors are provided as (batch, seq, feature) instead of (seq, batch, feature). 31 | rnn_cell = torch.nn.LSTM(input_sizes, hidden_state, batch_first=True, num_layers = LSTM_layer) 32 | # self.model = NCPNetwork(ltc_cells, env) 33 | self.model = RNNNetwork(rnn_cell, m, n) 34 | # make weights smaller 35 | # for param in list(self.model.parameters())[-2:]: # only last layer 36 | # param.data = 1e1 * param.data 37 | self.trainable_params = list(self.model.parameters()) 38 | 39 | # Placeholders 40 | # ------------------------ 41 | self.obs_var = Variable(torch.randn(m), requires_grad=False) 42 | self.hidden_state_var = Variable(torch.randn( # generate an variable to store values 43 | (LSTM_layer, 1, self.model.rnn_cell.hidden_size)), requires_grad=False) 44 | self.cell_state_var = Variable(torch.randn( 45 | (LSTM_layer, 1, self.model.rnn_cell.hidden_size)), requires_grad=False) 46 | 47 | # get_action is used for executions 48 | def get_action(self, observation, hidden_state): 49 | o = np.float32(observation.reshape(1, 1, -1)) 50 | self.obs_var.data = torch.from_numpy(o) 51 | # print(hidden_state[0].shape) 52 | self.hidden_state_var.data = torch.from_numpy(np.float32(hidden_state[0])) 53 | self.cell_state_var.data = torch.from_numpy(np.float32(hidden_state[1])) 54 | # print(self.hidden_state_var.shape) 55 | mean, hidden_state = self.model.predict(self.obs_var, (self.hidden_state_var, self.cell_state_var)) 56 | mean = mean.data.numpy().ravel() 57 | new_hidden_state = hidden_state[0].data.numpy() 58 | new_cell_state = hidden_state[1].data.numpy() 59 | hidden_state = (new_hidden_state, new_cell_state) # these are needed for the next time step. 60 | #Since the input is the obs at each time step, we have to manually pass the hidden state and the cell state 61 | return mean, hidden_state 62 | -------------------------------------------------------------------------------- /Relocation/utils/gnn_networks.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | class RelationalModel(nn.Module): 6 | def __init__(self, input_size, model_size, nonlinearity='tanh', seed = 100): 7 | super(RelationalModel, self).__init__() 8 | 9 | self.layer_sizes = model_size 10 | self.layer_sizes.insert(0, input_size) 11 | torch.manual_seed(seed) 12 | 13 | self.fc_layers = nn.ModuleList([nn.Linear(self.layer_sizes[i], self.layer_sizes[i+1]) \ 14 | for i in range(len(self.layer_sizes) -1)]) # stack severeal layers together. 15 | self.nonlinearity = torch.relu if nonlinearity == 'relu' else torch.tanh 16 | 17 | def forward(self, x): 18 | ''' 19 | Args: 20 | x: [batch_size, n_relations, input_size] 21 | Returns: 22 | [batch_size, n_relations, output_size] 23 | ''' 24 | if x.is_cuda: 25 | out = x.to('cpu') 26 | else: 27 | out = x 28 | for i in range(len(self.fc_layers)): 29 | out = self.fc_layers[i](out) 30 | out = self.nonlinearity(out) 31 | return out 32 | 33 | class ObjectModel(nn.Module): 34 | def __init__(self, input_size, model_size, nonlinearity='tanh', seed = 100): 35 | super(ObjectModel, self).__init__() 36 | 37 | self.layer_sizes = model_size 38 | self.layer_sizes.insert(0, input_size) 39 | torch.manual_seed(seed) 40 | 41 | self.fc_layers = nn.ModuleList([nn.Linear(self.layer_sizes[i], self.layer_sizes[i+1]) \ 42 | for i in range(len(self.layer_sizes) -1)]) # stack severeal layers together. 43 | self.nonlinearity = torch.relu if nonlinearity == 'relu' else torch.tanh 44 | 45 | def forward(self, x): 46 | ''' 47 | Args: 48 | x: [batch_size, n_objects, input_size] 49 | Returns: 50 | [batch_size * n_objects, 2] speedX and speedY 51 | ''' 52 | if x.is_cuda: 53 | out = x.to('cpu') 54 | else: 55 | out = x 56 | for i in range(len(self.fc_layers) - 1): 57 | out = self.fc_layers[i](out) 58 | out = self.nonlinearity(out) 59 | out = self.fc_layers[-1](out) 60 | return out 61 | 62 | class InteractionNetwork(nn.Module): 63 | def __init__(self, object_dim, relation_dim, relation_model, object_model, nonlinearity, seed): 64 | super(InteractionNetwork, self).__init__() 65 | self.relational_model = RelationalModel(2 * object_dim + relation_dim, relation_model, nonlinearity, seed) 66 | # object_model defines the output dimension of the encoder(dim of lifted space)/decoder(dim of original space) 67 | self.object_model = ObjectModel(object_dim + relation_model[-1], object_model, nonlinearity, seed) 68 | 69 | def forward(self, objects, sender_relations, receiver_relations, relation_info, flag): 70 | # in our case, the variable objects consists of palm, each finger and the manipulated object 71 | if not flag: 72 | z_t = objects[:,0,:] 73 | z_t_1 = objects[:,1,:] 74 | senders_t = sender_relations.permute(0, 2, 1).bmm(z_t) 75 | senders_t_1 = sender_relations.permute(0, 2, 1).bmm(z_t_1) 76 | receivers_t = receiver_relations.permute(0, 2, 1).bmm(z_t) 77 | receivers_t_1 = receiver_relations.permute(0, 2, 1).bmm(z_t_1) 78 | effects_t = self.relational_model(torch.cat([senders_t, receivers_t, relation_info], 2)) # cat: 2 -> Concatenate along the second axis 79 | effects_t_1 = self.relational_model(torch.cat([senders_t_1, receivers_t_1, relation_info], 2)) # cat: 2 -> Concatenate along the second axis 80 | effect_receivers_t = receiver_relations.bmm(effects_t) 81 | effect_receivers_t_1 = receiver_relations.bmm(effects_t_1) 82 | predicted_t = self.object_model(torch.cat([z_t, effect_receivers_t], 2))[:,None,:] 83 | predicted_t_1 = self.object_model(torch.cat([z_t_1, effect_receivers_t_1], 2))[:,None,:] 84 | return torch.cat([predicted_t, predicted_t_1], 1) 85 | else: 86 | z_t = objects 87 | senders_t = sender_relations.permute(0, 2, 1).bmm(z_t) 88 | receivers_t = receiver_relations.permute(0, 2, 1).bmm(z_t) 89 | effects_t = self.relational_model(torch.cat([senders_t, receivers_t, relation_info], 2)) # cat: 2 -> Concatenate along the second axis 90 | effect_receivers_t = receiver_relations.bmm(effects_t) 91 | predicted_t = self.object_model(torch.cat([z_t, effect_receivers_t], 2)) 92 | return predicted_t 93 | 94 | -------------------------------------------------------------------------------- /Relocation/utils/gym_env.py: -------------------------------------------------------------------------------- 1 | """ 2 | Wrapper around a gym env that provides convenience functions 3 | """ 4 | 5 | import gym 6 | import numpy as np 7 | from utils.rnn_network import RNNNetwork 8 | import pickle 9 | from tqdm import tqdm 10 | import copy 11 | from utils.coord_trans import ori_transform, ori_transform_inverse 12 | from utils.quatmath import euler2quat 13 | 14 | class EnvSpec(object): 15 | def __init__(self, obs_dim, act_dim, horizon): 16 | self.observation_dim = obs_dim 17 | self.action_dim = act_dim 18 | self.horizon = horizon 19 | 20 | 21 | class GymEnv(object): 22 | def __init__(self, env, env_kwargs=None, 23 | obs_mask=None, act_repeat=1, 24 | *args, **kwargs): 25 | 26 | # get the correct env behavior 27 | if type(env) == str: 28 | env = gym.make(env) # generare the mojuco env 29 | elif isinstance(env, gym.Env): 30 | env = env 31 | elif callable(env): 32 | env = env(**env_kwargs) 33 | else: 34 | print("Unsupported environment format") 35 | raise AttributeError 36 | 37 | self.env = env 38 | self.env_id = env.spec.id 39 | self.act_repeat = act_repeat 40 | try: 41 | self._horizon = env.spec.max_episode_steps # max_episode_steps is defnied in the __init__.py file (under ) 42 | except AttributeError: 43 | self._horizon = env.spec._horizon 44 | assert self._horizon % act_repeat == 0 45 | self._horizon = self._horizon // self.act_repeat 46 | 47 | try: 48 | self._action_dim = self.env.env.action_dim 49 | except AttributeError: 50 | self._action_dim = self.env.action_space.shape[0] 51 | 52 | try: 53 | self._observation_dim = self.env.env.obs_dim 54 | except AttributeError: 55 | self._observation_dim = self.env.observation_space.shape[0] 56 | 57 | # Specs 58 | self.spec = EnvSpec(self._observation_dim, self._action_dim, self._horizon) 59 | 60 | # obs mask 61 | self.obs_mask = np.ones(self._observation_dim) if obs_mask is None else obs_mask 62 | 63 | @property 64 | def action_dim(self): 65 | return self._action_dim 66 | 67 | @property 68 | def observation_dim(self): 69 | return self._observation_dim 70 | 71 | @property 72 | def observation_space(self): 73 | return self.env.observation_space 74 | 75 | @property 76 | def action_space(self): # each env has defined a action space 77 | return self.env.action_space 78 | 79 | @property 80 | def horizon(self): 81 | return self._horizon 82 | 83 | def reset(self, seed=None): 84 | try: 85 | self.env._elapsed_steps = 0 86 | return self.env.env.reset_model(seed=seed) 87 | except: 88 | if seed is not None: 89 | self.set_seed(seed) 90 | return self.env.reset() 91 | 92 | def reset4Koopman(self, seed=None, ori=None, init_pos=None, init_vel=None): 93 | try: 94 | self.env._elapsed_steps = 0 95 | return self.env.env.reset_model4Koopman(seed=seed, ori = ori, init_pos = init_pos, init_vel = init_vel) 96 | except: 97 | if seed is not None: 98 | self.set_seed(seed) 99 | return self.env.reset_model4Koopman(ori = ori, init_pos = init_pos, init_vel = init_vel) 100 | 101 | def KoopmanVisualize(self, seed=None, state_dict=None): 102 | try: 103 | self.env._elapsed_steps = 0 104 | return self.env.env.KoopmanVisualize(seed=seed, state_dict=state_dict) 105 | except: 106 | if seed is not None: 107 | self.set_seed(seed) 108 | return self.env.KoopmanVisualize(state_dict=state_dict) 109 | 110 | def reset_model(self, seed=None): 111 | # overloading for legacy code 112 | return self.reset(seed) 113 | 114 | def step(self, action): 115 | action = action.clip(self.action_space.low, self.action_space.high) 116 | # type(action_space) -> 117 | # self.action_space.low -> numpy.ndarray(lowest boundary) 118 | # self.action_space.high -> numpy.ndarray(highest boundary) 119 | if self.act_repeat == 1: 120 | obs, cum_reward, done, ifo = self.env.step(action) # the system dynamics is defined in each separate env python file 121 | # if(ifo['goal_achieved']): 122 | # print("done: ", ifo) 123 | # Run one timestep of the environment’s dynamics. 124 | else: 125 | cum_reward = 0.0 126 | for i in range(self.act_repeat): 127 | obs, reward, done, ifo = self.env.step(action) # the actual operations can be found in the env files 128 | # seems done is always set to be False 129 | cum_reward += reward 130 | if done: break 131 | return self.obs_mask * obs, cum_reward, done, ifo 132 | 133 | def render(self): 134 | try: 135 | self.env.env.mujoco_render_frames = True 136 | self.env.env.mj_render() 137 | except: 138 | self.env.render() 139 | 140 | def set_seed(self, seed=123): 141 | try: 142 | self.env.seed(seed) 143 | except AttributeError: 144 | self.env._seed(seed) 145 | 146 | def get_obs(self): 147 | try: 148 | return self.obs_mask * self.env.env.get_obs() 149 | except: 150 | return self.obs_mask * self.env.env._get_obs() 151 | 152 | def get_env_infos(self): 153 | try: 154 | return self.env.env.get_env_infos() 155 | except: 156 | return {} 157 | 158 | # =========================================== 159 | # Trajectory optimization related 160 | # Envs should support these functions in case of trajopt 161 | 162 | def get_env_state(self): 163 | try: 164 | return self.env.env.get_env_state() 165 | except: 166 | raise NotImplementedError 167 | 168 | def set_env_state(self, state_dict): 169 | try: 170 | self.env.env.set_env_state(state_dict) 171 | except: 172 | raise NotImplementedError 173 | 174 | def real_env_step(self, bool_val): 175 | try: 176 | self.env.env.real_step = bool_val 177 | except: 178 | raise NotImplementedError 179 | 180 | def get_hand_vel(self): 181 | return self.env.env.get_hand_vel() 182 | 183 | def visualize_policy_on_demos(self, policy, demos, Visualize, horizon=1000): 184 | print("Testing the RL agent!") 185 | self.reset() 186 | init_state_dict = dict() 187 | demo_ori_error = np.zeros([horizon - 1, len(demos)]) 188 | success_threshold = 10 189 | success_list_demo = [] 190 | Demo_joint_values = np.zeros([30, horizon, len(demos)]) # 30 -> num of hands 191 | for k in tqdm(range(len(demos))): 192 | success_count_RL = np.zeros(horizon - 1) 193 | init_state_dict['qpos'] = demos[k]['init']['qpos'] 194 | init_state_dict['qvel'] = demos[k]['init']['qvel'] 195 | init_state_dict['obj_pos'] = demos[k]['init']['obj_pos'] 196 | init_state_dict['target_pos'] = demos[k]['init']['target_pos'] 197 | self.set_env_state(init_state_dict) 198 | o = demos[k]['o'] 199 | o_control = o[:-9] 200 | Demo_joint_values[:, 0, k] = demos[k]['init']['qpos'][:30] 201 | if True: # RL agent is trained using RNN 202 | # generate the hidden states at time 0 203 | hidden_state = (np.zeros((1, 1, policy.model.rnn_cell.hidden_size)), np.zeros((1, 1, policy.model.rnn_cell.hidden_size))) 204 | for t in range(horizon - 1): # this loop is for system evolution, open loop control, no feedback 205 | if True: 206 | a = policy.get_action(o_control, hidden_state) 207 | hidden_state = a[1]['hidden_state'] 208 | else: 209 | a = policy.get_action(o) 210 | a =a[1]['evaluation'] 211 | o, *_ = self.step(a) 212 | Demo_joint_values[:, t + 1, k] = o[:30] 213 | o_control = o[:-9] 214 | if Visualize: 215 | self.render() 216 | # calculated state values using Koopman rollouts (in simulator) 217 | obj_obs = self.get_obs() 218 | # compute the errors 219 | demo_ori_error[t, k] = np.mean(np.abs(obj_obs[36:39])) # obj_orien-desired_orien (goal error) 220 | if np.linalg.norm(obj_obs[36:39]) < 0.1: 221 | success_count_RL[t] = 1 222 | if sum(success_count_RL) > success_threshold: 223 | success_list_demo.append(1) 224 | print("Success rate (RL) = %f" % (len(success_list_demo) / len(demos))) 225 | return demo_ori_error, Demo_joint_values 226 | 227 | def generate_unseen_data(self, number_sample): 228 | samples = [] 229 | for ep in range(number_sample): 230 | o, desired_orien = self.reset(seed = None) 231 | episode_data = {} 232 | episode_data['init_state_dict'] = copy.deepcopy(self.get_env_state()) 233 | episode_data['pen_desired_orien'] = desired_orien # record the goal orientation angle 234 | episode_data['o'] = o 235 | handpos = o[:24] 236 | episode_data['handpos'] = handpos 237 | hand_vel = self.env.get_hand_vel() 238 | episode_data['handvel'] = hand_vel 239 | objpos = o[24:27] 240 | episode_data['objpos'] = objpos 241 | objvel = o[27:33] 242 | episode_data['objvel'] = objvel 243 | episode_data['desired_ori'] = o[36:39] 244 | objorient = o[33:36] 245 | episode_data['objorient'] = ori_transform(objorient, episode_data['desired_ori']) 246 | samples.append(episode_data) 247 | return samples 248 | 249 | def generate_unseen_data_relocate(self, number_sample): 250 | samples = [] 251 | for ep in range(number_sample): 252 | o, desired_pos = self.reset(seed = ep) 253 | o_visual = self.env.get_full_obs_visualization() 254 | episode_data = {} 255 | episode_data['init'] = copy.deepcopy(self.get_env_state()) 256 | episode_data['desired_pos'] = desired_pos 257 | episode_data['o'] = o 258 | handpos = o[:30] 259 | episode_data['handpos'] = handpos 260 | hand_vel = self.env.get_hand_vel()[:30] 261 | episode_data['handvel'] = hand_vel 262 | objpos = o[39:42] 263 | episode_data['objpos'] = objpos - episode_data['desired_pos'] 264 | episode_data['objorient'] = o_visual[33:36] 265 | episode_data['objvel'] = self.env.get_hand_vel()[30:] 266 | samples.append(episode_data) 267 | return samples 268 | -------------------------------------------------------------------------------- /Relocation/utils/quatmath.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | # For testing whether a number is close to zero 3 | _FLOAT_EPS = np.finfo(np.float64).eps 4 | _EPS4 = _FLOAT_EPS * 4.0 5 | 6 | 7 | def mulQuat(qa, qb): 8 | res = np.zeros(4) 9 | res[0] = qa[0]*qb[0] - qa[1]*qb[1] - qa[2]*qb[2] - qa[3]*qb[3] 10 | res[1] = qa[0]*qb[1] + qa[1]*qb[0] + qa[2]*qb[3] - qa[3]*qb[2] 11 | res[2] = qa[0]*qb[2] - qa[1]*qb[3] + qa[2]*qb[0] + qa[3]*qb[1] 12 | res[3] = qa[0]*qb[3] + qa[1]*qb[2] - qa[2]*qb[1] + qa[3]*qb[0] 13 | return res 14 | 15 | def negQuat(quat): 16 | return np.array([quat[0], -quat[1], -quat[2], -quat[3]]) 17 | 18 | def quat2Vel(quat, dt=1): 19 | axis = quat[1:].copy() 20 | sin_a_2 = np.sqrt(np.sum(axis**2)) 21 | axis = axis/(sin_a_2+1e-8) 22 | speed = 2*np.arctan2(sin_a_2, quat[0])/dt 23 | return speed, axis 24 | 25 | def quatDiff2Vel(quat1, quat2, dt): 26 | neg = negQuat(quat1) 27 | diff = mulQuat(quat2, neg) 28 | return quat2Vel(diff, dt) 29 | 30 | 31 | def axis_angle2quat(axis, angle): 32 | c = np.cos(angle/2) 33 | s = np.sin(angle/2) 34 | return np.array([c, s*axis[0], s*axis[1], s*axis[2]]) 35 | 36 | def euler2mat(euler): 37 | """ Convert Euler Angles to Rotation Matrix. See rotation.py for notes """ 38 | euler = np.asarray(euler, dtype=np.float64) 39 | assert euler.shape[-1] == 3, "Invalid shaped euler {}".format(euler) 40 | 41 | ai, aj, ak = -euler[..., 2], -euler[..., 1], -euler[..., 0] 42 | si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak) 43 | ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak) 44 | cc, cs = ci * ck, ci * sk 45 | sc, ss = si * ck, si * sk 46 | 47 | mat = np.empty(euler.shape[:-1] + (3, 3), dtype=np.float64) 48 | mat[..., 2, 2] = cj * ck 49 | mat[..., 2, 1] = sj * sc - cs 50 | mat[..., 2, 0] = sj * cc + ss 51 | mat[..., 1, 2] = cj * sk 52 | mat[..., 1, 1] = sj * ss + cc 53 | mat[..., 1, 0] = sj * cs - sc 54 | mat[..., 0, 2] = -sj 55 | mat[..., 0, 1] = cj * si 56 | mat[..., 0, 0] = cj * ci 57 | return mat 58 | 59 | 60 | def euler2quat(euler): 61 | """ Convert Euler Angles to Quaternions. See rotation.py for notes """ 62 | euler = np.asarray(euler, dtype=np.float64) 63 | assert euler.shape[-1] == 3, "Invalid shape euler {}".format(euler) 64 | 65 | ai, aj, ak = euler[..., 2] / 2, -euler[..., 1] / 2, euler[..., 0] / 2 66 | si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak) 67 | ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak) 68 | cc, cs = ci * ck, ci * sk 69 | sc, ss = si * ck, si * sk 70 | 71 | quat = np.empty(euler.shape[:-1] + (4,), dtype=np.float64) 72 | quat[..., 0] = cj * cc + sj * ss 73 | quat[..., 3] = cj * sc - sj * cs 74 | quat[..., 2] = -(cj * ss + sj * cc) 75 | quat[..., 1] = cj * cs - sj * sc 76 | return quat 77 | 78 | 79 | def mat2euler(mat): 80 | """ Convert Rotation Matrix to Euler Angles. See rotation.py for notes """ 81 | mat = np.asarray(mat, dtype=np.float64) 82 | assert mat.shape[-2:] == (3, 3), "Invalid shape matrix {}".format(mat) 83 | 84 | cy = np.sqrt(mat[..., 2, 2] * mat[..., 2, 2] + mat[..., 1, 2] * mat[..., 1, 2]) 85 | condition = cy > _EPS4 86 | euler = np.empty(mat.shape[:-1], dtype=np.float64) 87 | euler[..., 2] = np.where(condition, 88 | -np.arctan2(mat[..., 0, 1], mat[..., 0, 0]), 89 | -np.arctan2(-mat[..., 1, 0], mat[..., 1, 1])) 90 | euler[..., 1] = np.where(condition, 91 | -np.arctan2(-mat[..., 0, 2], cy), 92 | -np.arctan2(-mat[..., 0, 2], cy)) 93 | euler[..., 0] = np.where(condition, 94 | -np.arctan2(mat[..., 1, 2], mat[..., 2, 2]), 95 | 0.0) 96 | return euler 97 | 98 | 99 | def mat2quat(mat): 100 | """ Convert Rotation Matrix to Quaternion. See rotation.py for notes """ 101 | mat = np.asarray(mat, dtype=np.float64) 102 | assert mat.shape[-2:] == (3, 3), "Invalid shape matrix {}".format(mat) 103 | 104 | Qxx, Qyx, Qzx = mat[..., 0, 0], mat[..., 0, 1], mat[..., 0, 2] 105 | Qxy, Qyy, Qzy = mat[..., 1, 0], mat[..., 1, 1], mat[..., 1, 2] 106 | Qxz, Qyz, Qzz = mat[..., 2, 0], mat[..., 2, 1], mat[..., 2, 2] 107 | # Fill only lower half of symmetric matrix 108 | K = np.zeros(mat.shape[:-2] + (4, 4), dtype=np.float64) 109 | K[..., 0, 0] = Qxx - Qyy - Qzz 110 | K[..., 1, 0] = Qyx + Qxy 111 | K[..., 1, 1] = Qyy - Qxx - Qzz 112 | K[..., 2, 0] = Qzx + Qxz 113 | K[..., 2, 1] = Qzy + Qyz 114 | K[..., 2, 2] = Qzz - Qxx - Qyy 115 | K[..., 3, 0] = Qyz - Qzy 116 | K[..., 3, 1] = Qzx - Qxz 117 | K[..., 3, 2] = Qxy - Qyx 118 | K[..., 3, 3] = Qxx + Qyy + Qzz 119 | K /= 3.0 120 | # TODO: vectorize this -- probably could be made faster 121 | q = np.empty(K.shape[:-2] + (4,)) 122 | it = np.nditer(q[..., 0], flags=['multi_index']) 123 | while not it.finished: 124 | # Use Hermitian eigenvectors, values for speed 125 | vals, vecs = np.linalg.eigh(K[it.multi_index]) 126 | # Select largest eigenvector, reorder to w,x,y,z quaternion 127 | q[it.multi_index] = vecs[[3, 0, 1, 2], np.argmax(vals)] 128 | # Prefer quaternion with positive w 129 | # (q * -1 corresponds to same rotation as q) 130 | if q[it.multi_index][0] < 0: 131 | q[it.multi_index] *= -1 132 | it.iternext() 133 | return q 134 | 135 | 136 | def quat2euler(quat): 137 | """ Convert Quaternion to Euler Angles. See rotation.py for notes """ 138 | return mat2euler(quat2mat(quat)) 139 | 140 | 141 | def quat2mat(quat): 142 | """ Convert Quaternion to Euler Angles. See rotation.py for notes """ 143 | quat = np.asarray(quat, dtype=np.float64) 144 | assert quat.shape[-1] == 4, "Invalid shape quat {}".format(quat) 145 | 146 | w, x, y, z = quat[..., 0], quat[..., 1], quat[..., 2], quat[..., 3] 147 | Nq = np.sum(quat * quat, axis=-1) 148 | s = 2.0 / Nq 149 | X, Y, Z = x * s, y * s, z * s 150 | wX, wY, wZ = w * X, w * Y, w * Z 151 | xX, xY, xZ = x * X, x * Y, x * Z 152 | yY, yZ, zZ = y * Y, y * Z, z * Z 153 | 154 | mat = np.empty(quat.shape[:-1] + (3, 3), dtype=np.float64) 155 | mat[..., 0, 0] = 1.0 - (yY + zZ) 156 | mat[..., 0, 1] = xY - wZ 157 | mat[..., 0, 2] = xZ + wY 158 | mat[..., 1, 0] = xY + wZ 159 | mat[..., 1, 1] = 1.0 - (xX + zZ) 160 | mat[..., 1, 2] = yZ - wX 161 | mat[..., 2, 0] = xZ - wY 162 | mat[..., 2, 1] = yZ + wX 163 | mat[..., 2, 2] = 1.0 - (xX + yY) 164 | return np.where((Nq > _FLOAT_EPS)[..., np.newaxis, np.newaxis], mat, np.eye(3)) -------------------------------------------------------------------------------- /Relocation/utils/rnn_network.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from torch.autograd import Variable 5 | 6 | class RNNNetwork(nn.Module): 7 | def __init__(self, rnn_cell, 8 | m, 9 | n): 10 | """ 11 | :param rnn_cell: RNN (NCP) module 12 | """ 13 | # nn.Module that unfolds a RNN cell into a sequence 14 | super(RNNNetwork, self).__init__() 15 | self.rnn_cell = rnn_cell 16 | self.obs_dim = m 17 | self.act_dim = n 18 | # follow by a fc_layer 19 | self.fc_input_layer = nn.Linear(in_features=self.obs_dim, out_features=rnn_cell.input_size) # map observation_dim to RNN_input dim 20 | self.fc_output_layer = nn.Linear(in_features=rnn_cell.hidden_size, out_features=self.act_dim) 21 | 22 | def forward(self, x): # this is used for training 23 | device = x.device 24 | batch_size = x.size(0) 25 | # hidden states were set to be zeros at time 0 26 | hidden_state = ( # h_{0} and c_{0} 27 | torch.zeros((self.rnn_cell.num_layers, batch_size, self.rnn_cell.hidden_size), device=device), 28 | torch.zeros((self.rnn_cell.num_layers, batch_size, self.rnn_cell.hidden_size), device=device) 29 | ) 30 | x = self.fc_input_layer(x.float()) # (batch_size, seq_len, self.rnn_cell.input_size) 31 | outputs, hidden_state = self.rnn_cell(x, hidden_state) 32 | # output.shape = (batch_size, seq_len, self.rnn_cell.hidden_size) 33 | # output -> hidden_state at each time step (the top layer if multiple layers), and we use this as the output data 34 | # hidden_state = (hidden_state[0] = h_{T} and hidden_state[1] = c_{T}) at last time step T, if there are multiple layers, output are the h_{t} for the top layers 35 | outputs = self.fc_output_layer(outputs) #(batch_size, seq_len, act_dim) 36 | return outputs 37 | 38 | def predict(self, observation, hidden_state): 39 | # for execution, so batch_size is always 1. Not for training 40 | observation = self.fc_input_layer(observation) 41 | output, hidden_state = self.rnn_cell(observation.view(1, 1, -1), hidden_state) 42 | output = self.fc_output_layer(output) 43 | return output, hidden_state -------------------------------------------------------------------------------- /Reorientation/Data/Pen_task.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GT-STAR-Lab/KODex/b058159f59baeff37437672e49f5d73406d4a5a6/Reorientation/Data/Pen_task.pickle -------------------------------------------------------------------------------- /Reorientation/Results/Controller/NN_controller_best.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GT-STAR-Lab/KODex/b058159f59baeff37437672e49f5d73406d4a5a6/Reorientation/Results/Controller/NN_controller_best.pt -------------------------------------------------------------------------------- /Reorientation/Results/Expert_policy/best_policy.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GT-STAR-Lab/KODex/b058159f59baeff37437672e49f5d73406d4a5a6/Reorientation/Results/Expert_policy/best_policy.pickle -------------------------------------------------------------------------------- /Reorientation/Results/KODex/koopmanMatrix.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GT-STAR-Lab/KODex/b058159f59baeff37437672e49f5d73406d4a5a6/Reorientation/Results/KODex/koopmanMatrix.npy -------------------------------------------------------------------------------- /Reorientation/utils/Controller.py: -------------------------------------------------------------------------------- 1 | """ 2 | Define the controllers 3 | """ 4 | import numpy as np 5 | 6 | class PID(): 7 | def __init__(self, Kp, Ki, Kd, setpoint = None): 8 | self.Kp, self.Ki, self.Kd = Kp, Ki, Kd 9 | self.setpoint = setpoint 10 | self._last_input = None 11 | self._last_error = None 12 | 13 | def __call__(self, input): # input: current hand joints 14 | ''' 15 | Call the PID controller with *input* and calculate and return a control output 16 | ''' 17 | error = self.setpoint - input 18 | d_input = input - (self._last_input if (self._last_input is not None) else input) 19 | self._proportional = self.Kp * error # P term 20 | # self._integral += self.Ki * error * dt 21 | # self._integral = _clamp(self._integral, self.output_limits) # Avoid integral windup 22 | self._derivative = -self.Kd * d_input #/ dt 23 | output = self._proportional + self._derivative 24 | self._last_output = output 25 | self._last_input = input 26 | self._last_error = error 27 | return output 28 | 29 | def set_goal(self, setpoint): # set target joint state 30 | self.setpoint = setpoint 31 | -------------------------------------------------------------------------------- /Reorientation/utils/coord_trans.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.linalg import inv 3 | 4 | def ori_transform(current_ori, desired_ori): 5 | # build a new coordinate frame that define the desired_ori as [1, 0, 0] - use desired_ori to build a new coordinate basis 6 | # and then convert current_ori in that new coordinate frame 7 | x = desired_ori 8 | y, z = compute_orthonormal_vector(x) 9 | origin_x = np.array([1, 0, 0]) 10 | origin_y = np.array([0, 1, 0]) 11 | origin_z = np.array([0, 0, 1]) 12 | Q = np.zeros([3,3]) 13 | Q[0,0] = np.dot(x, origin_x) 14 | Q[0,1] = np.dot(x, origin_y) 15 | Q[0,2] = np.dot(x, origin_z) 16 | Q[1,0] = np.dot(y, origin_x) 17 | Q[1,1] = np.dot(y, origin_y) 18 | Q[1,2] = np.dot(y, origin_z) 19 | Q[2,0] = np.dot(z, origin_x) 20 | Q[2,1] = np.dot(z, origin_y) 21 | Q[2,2] = np.dot(z, origin_z) 22 | return np.dot(Q, current_ori) 23 | 24 | def ori_transform_inverse(current_ori, desired_ori): 25 | x = desired_ori 26 | y, z = compute_orthonormal_vector(x) 27 | origin_x = np.array([1, 0, 0]) 28 | origin_y = np.array([0, 1, 0]) 29 | origin_z = np.array([0, 0, 1]) 30 | Q = np.zeros([3,3]) 31 | Q[0,0] = np.dot(x, origin_x) 32 | Q[0,1] = np.dot(x, origin_y) 33 | Q[0,2] = np.dot(x, origin_z) 34 | Q[1,0] = np.dot(y, origin_x) 35 | Q[1,1] = np.dot(y, origin_y) 36 | Q[1,2] = np.dot(y, origin_z) 37 | Q[2,0] = np.dot(z, origin_x) 38 | Q[2,1] = np.dot(z, origin_y) 39 | Q[2,2] = np.dot(z, origin_z) 40 | Q = Q.T 41 | return np.dot(Q, current_ori) 42 | 43 | def compute_orthonormal_vector(desired_ori): 44 | # compute two orthonormal vectors wrt desired_ori(new x axis) as the new y, z axis 45 | # for the y axis, we will have three constraints: 1).x1v1s + x2v2 + x3v3=0, 2).v1^2+v2^2+v^3=1, 3). v1=1*v2(manually specify) 46 | # the z axis is uniquely defined by the x and y axis 47 | x1 = desired_ori[0] 48 | x2 = desired_ori[1] 49 | x3 = desired_ori[2] 50 | v1 = v2 = np.sqrt(1 / (2 + (x1 + x2) ** 2 / x3 ** 2)) 51 | v3 = -(x1+x2)/x3*v2 52 | y = np.array([v1, v2, v3]) 53 | z1 = x2*v3 - x3*v2 54 | z2 = x3*v1 - x1*v3 55 | z3 = x1*v2 - x2*v1 56 | z = np.array([z1, z2, z3]) / (z1**2+z2**2+z3**2) 57 | return y, z 58 | 59 | -------------------------------------------------------------------------------- /Reorientation/utils/dmp_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | import numpy as np 4 | 5 | 6 | class DMPIntegrator(): 7 | def __init__(self, rbf='gaussian', only_g=False, az=False): 8 | a = 1 9 | self.rbf = rbf 10 | self.only_g = only_g 11 | self.az = az 12 | 13 | def forward(self, inputs, parameters, param_gradients, scaling, y0, dy0, goal=None, w=None, vel=False): 14 | # param_gradient is not explicitly unused. 15 | # goal is None 16 | dim = int(parameters[0].item()) # 2 17 | k = dim 18 | N = int(parameters[1].item()) 19 | division = k*(N + 2) 20 | inputs_np = inputs # NN output: Size(64) 21 | if goal is not None: 22 | goal = goal 23 | w = w 24 | else: 25 | w = inputs_np[:, dim:dim*(N + 1)] # dim:dim*(N + 1) = 2:62 / w.shape = (batch_size, 60) 26 | goal = inputs_np[:, :dim] 27 | 28 | if self.az: #False 29 | alpha_z = inputs[:, -1] 30 | t = y0.shape[0] // inputs.shape[0] 31 | alpha_z = alpha_z.repeat(t, 1).transpose(0, 1).reshape(inputs.shape[0], -1) 32 | alpha_z = alpha_z.contiguous().view(alpha_z.shape[0] * alpha_z.shape[1], ) 33 | 34 | w = w.reshape(-1, N) # (2*batch_size, N) y0.shape -> [2*batch_size] 35 | 36 | if self.only_g: #false, if true, NN only outputs goal 37 | w = torch.zeros_like(w) 38 | if vel: # false 39 | dy0 = torch.zeros_like(y0) 40 | # dy0 = torch.zeros_like(y0) + 0.01 # set to be small values 0.01 41 | goal = goal.contiguous().view(goal.shape[0]*goal.shape[1], ) # [2*batch_size] y0.shape -> [2*batch_size] 42 | if self.az: 43 | X, dX, ddX = integrate(parameters, w, y0, dy0, goal, 1, rbf=self.rbf, az=True, alpha_z=alpha_z) 44 | else: 45 | X, dX, ddX = integrate(parameters, w, y0, dy0, goal, 1, rbf=self.rbf) # X -> whole trajectory 46 | # X and inputs.new(X) have the same values 47 | return inputs.new(X), inputs.new(dX), inputs.new(ddX) 48 | 49 | 50 | def forward_not_int(self, inputs, parameters, param_gradients, scaling, y0, dy0, goal=None, w=None, vel=False): 51 | dim = int(parameters[0].item()) 52 | k = dim 53 | N = int(parameters[1].item()) 54 | division = k*(N + 2) 55 | inputs_np = inputs 56 | if goal is not None: 57 | goal = goal 58 | w = w 59 | else: 60 | w = inputs_np[:, dim:dim*(N + 1)] 61 | goal = inputs_np[:, :dim] 62 | w = w.reshape(-1, N) 63 | if vel: 64 | dy0 = torch.zeros_like(y0) 65 | goal = goal.contiguous().view(goal.shape[0]*goal.shape[1], ) 66 | return parameters, w, y0, dy0, goal, 1 67 | 68 | def first_step(self, w, parameters, scaling, y0, dy0, l, tau=1): 69 | data = parameters 70 | y = y0 71 | self.y0 = y0 72 | z = dy0 * tau 73 | self.x = 1 74 | self.N = int(data[1].item()) 75 | self.dt = data[3].item() 76 | self.a_x = data[4].item() 77 | self.a_z = data[5].item() 78 | self.b_z = self.a_z / 4 79 | self.h = data[(6+self.N):(6+self.N*2)] 80 | self.c = data[6:(6+self.N)] 81 | self.num_steps = int(data[2].item())-1 82 | self.i = 0 83 | self.w = w.reshape(-1, self.N) 84 | self.tau = tau 85 | self.l = l 86 | 87 | def step(self, g, y, dy): 88 | g = g.reshape(-1, 1)[:, 0] 89 | z = dy*self.tau 90 | dt = self.dt 91 | for _ in range(self.l): 92 | dx = (-self.a_x * self.x) / self.tau 93 | self.x = self.x + dx * dt 94 | psi = torch.exp(-self.h * torch.pow((self.x - self.c), 2)) 95 | fx = torch.mv(self.w, psi)*self.x*(g - self.y0) / torch.sum(psi) 96 | dz = self.a_z * (self.b_z * (g - y) - z) + fx 97 | dy = z 98 | dz = dz / self.tau 99 | dy = dy / self.tau 100 | y = y + dy * dt 101 | z = z + dz * dt 102 | self.i += 1 103 | return y, dy, dz 104 | 105 | 106 | def integrate(data, w, y0, dy0, goal, tau, rbf='gaussian', az=False, alpha_z=None): 107 | y = y0 # y0:input 108 | z = dy0 * tau # tau = 1, z = dy 109 | x = 1 # x_0 110 | # data[2] -> simu horizon 111 | if w.is_cuda: 112 | Y = torch.cuda.FloatTensor(w.shape[0], int(data[2].item())).fill_(0) 113 | dY = torch.cuda.FloatTensor(w.shape[0], int(data[2].item())).fill_(0) 114 | ddY = torch.cuda.FloatTensor(w.shape[0], int(data[2].item())).fill_(0) 115 | else: 116 | Y = torch.zeros((w.shape[0],int(data[2].item()))) 117 | dY = torch.zeros((w.shape[0],int(data[2].item()))) 118 | ddY = torch.zeros((w.shape[0],int(data[2].item()))) 119 | Y[:, 0] = y 120 | dY[:, 0] = dy0 121 | ddY[:, 0] = z 122 | N = int(data[1].item()) 123 | dt = data[3].item() 124 | a_x = data[4].item() # a_x in paper (eqn. (2)) 125 | a_z = data[5].item() # \alpha in paper (eqn. (1)) 126 | if az: 127 | a_z = alpha_z 128 | a_z = torch.clamp(a_z, 0.5, 30) 129 | b_z = a_z / 4 # \beta in paper (eqn. (1)) 130 | h = data[(6+N):(6+N*2)] # h size N(30) 131 | c = data[6:(6+N)] # c size N(30) 132 | # the first 6 params are for other functions 133 | for i in range(0, int(data[2].item())-1): # data[2] -> simu horizon 134 | dx = (-a_x * x) / tau 135 | x = x + dx * dt 136 | eps = torch.pow((x - c), 2) 137 | # rbf are for basis functions (radial basis functions) 138 | if rbf == 'gaussian': 139 | psi = torch.exp(-h * eps) # (eqn. (3)) 140 | if rbf == 'multiquadric': 141 | psi = torch.sqrt(1 + h * eps) 142 | if rbf == 'inverse_quadric': 143 | psi = 1/(1 + h*eps) 144 | if rbf == 'inverse_multiquadric': 145 | psi = 1/torch.sqrt(1 + h * eps) 146 | if rbf == 'linear': 147 | psi = h * eps 148 | # print(w.shape) torch.Size([200, 30]) 149 | # print(psi.shape) torch.Size([30]) 150 | # print(x) int 151 | # print(goal.shape) torch.Size([200]) 152 | # print(y0.shape) torch.Size([200]) 153 | fx = torch.mv(w, psi)*x*(goal-y0) / torch.sum(psi) # mv - matrix-vector product 154 | dz = a_z * (b_z * (goal - y) - z) + fx 155 | dy = z 156 | dz = dz / tau # tau = 1 157 | dy = dy / tau 158 | y = y + dy * dt 159 | z = z + dz * dt 160 | Y[:, i+1] = y 161 | dY[:, i+1] = dy 162 | ddY[:, i+1] = dz 163 | return Y, dY, ddY 164 | 165 | 166 | class DMPParameters(): 167 | def __init__(self, N, tau, dt, Dof, scale, T, a_z=25): 168 | # N = number of radial basis functions 169 | # tau = 1 170 | # dt = time duration for each simulation time 171 | # Dof = num of states 172 | 173 | self.a_z = a_z 174 | self.a_x = 1 175 | self.N = N 176 | c = np.exp(-self.a_x * np.linspace(0, 1, self.N)) # a_x(data[4]) in paper (eqn. (2)) / c is horizontal shifts of each basis function 177 | sigma2 = np.ones(self.N) * self.N**1.5 / c / self.a_x # h is the width of each basis function 178 | self.c = torch.from_numpy(c).float() 179 | self.sigma2 = torch.from_numpy(sigma2).float() 180 | self.tau = tau 181 | self.dt = dt 182 | self.time_steps = T 183 | self.y0 = [0] 184 | self.dy0 = np.zeros(Dof) 185 | self.Dof = Dof 186 | self.Y = torch.zeros((self.time_steps)) 187 | grad = torch.zeros((self.time_steps, self.N + 2)) 188 | 189 | self.data = {'time_steps':self.time_steps,'c':self.c,'sigma2':self.sigma2,'a_z':self.a_z,'a_x':self.a_x,'dt':self.dt,'Y':self.Y} 190 | dmp_data = torch.tensor([self.Dof,self.N,self.time_steps,self.dt,self.a_x,self.a_z]) 191 | data_tensor = torch.cat((dmp_data,self.c,self.sigma2),0) 192 | # len(data_tensor) = 6 + self.N + self.N 193 | 194 | data_tensor.dy0 = self.dy0 195 | data_tensor.tau = self.tau 196 | 197 | 198 | for i in range(0, self.N): 199 | weights = torch.zeros((1,self.N)) 200 | weights[0,i] = 1 201 | grad[:, i + 2 ], _, _= integrate(data_tensor, weights, 0, 0, 0, self.tau) 202 | weights = torch.zeros((1,self.N)) 203 | grad[:, 0], _, _ = integrate(data_tensor, weights, 1, 0, 0, self.tau) 204 | weights = torch.zeros((1,self.N)) 205 | grad[:, 1], _, _ = integrate(data_tensor, weights, 0, 0, 1, self.tau) 206 | 207 | ''' 208 | self.c = self.c.cuda() 209 | self.sigma2 = self.sigma2.cuda() 210 | self.grad = grad.cuda() 211 | self.point_grads = torch.zeros(54).cuda() 212 | ''' 213 | self.data_tensor = data_tensor 214 | self.grad_tensor = grad 215 | 216 | self.point_grads = torch.zeros(self.N*2 + 4) 217 | self.X = np.zeros((self.time_steps, self.Dof)) 218 | -------------------------------------------------------------------------------- /Reorientation/utils/fc_network.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | 6 | class FCNetwork(nn.Module): 7 | def __init__(self, model_size = None, 8 | nonlinearity='tanh', # either 'tanh' or 'relu' 9 | seed = 100 10 | ): 11 | super(FCNetwork, self).__init__() 12 | 13 | self.obs_dim = model_size[0] 14 | self.act_dim = model_size[-1] 15 | self.layer_sizes = model_size 16 | 17 | # hidden layers 18 | torch.manual_seed(seed) 19 | self.fc_layers = nn.ModuleList([nn.Linear(self.layer_sizes[i], self.layer_sizes[i+1]) \ 20 | for i in range(len(self.layer_sizes) -1)]) # stack severeal layers together. 21 | # The weights are initialzied in default by: 22 | # stdv = 1. / math.sqrt(self.weight.size(1)) 23 | # self.weight.data.uniform_(-stdv, stdv) 24 | # if self.bias is not None: 25 | # self.bias.data.uniform_(-stdv, stdv) 26 | self.nonlinearity = torch.relu if nonlinearity == 'relu' else torch.tanh 27 | 28 | def forward(self, x): 29 | if x.is_cuda: 30 | out = x.to('cpu') 31 | else: 32 | out = x 33 | out = out.to(torch.float32) 34 | for i in range(len(self.fc_layers)-1): 35 | out = self.fc_layers[i](out) 36 | out = self.nonlinearity(out) 37 | out = self.fc_layers[-1](out) 38 | return out 39 | -------------------------------------------------------------------------------- /Reorientation/utils/gaussian_rnn.py: -------------------------------------------------------------------------------- 1 | from utils.rnn_network import RNNNetwork 2 | import numpy as np 3 | import torch 4 | from torch.autograd import Variable 5 | # import kerasncp as kncp 6 | # from kerasncp.torch import LTCCell 7 | 8 | class RNN: 9 | def __init__(self, 10 | m, 11 | n, 12 | input_sizes=64, 13 | hidden_state = 64, 14 | LSTM_layer = 1, 15 | seed=None): 16 | """ 17 | :param m: NN_input size 18 | :param n: NN_output size 19 | :param hidden_sizes: network hidden layer sizes (currently 2 layers only) 20 | :param seed: random seed 21 | """ 22 | # Set seed 23 | # ------------------------ 24 | if seed is not None: 25 | torch.manual_seed(seed) 26 | np.random.seed(seed) 27 | 28 | # Policy network 29 | # ------------------------ 30 | # If batch_first is True, then the input and output tensors are provided as (batch, seq, feature) instead of (seq, batch, feature). 31 | rnn_cell = torch.nn.LSTM(input_sizes, hidden_state, batch_first=True, num_layers = LSTM_layer) 32 | # self.model = NCPNetwork(ltc_cells, env) 33 | self.model = RNNNetwork(rnn_cell, m, n) 34 | # make weights smaller 35 | # for param in list(self.model.parameters())[-2:]: # only last layer 36 | # param.data = 1e1 * param.data 37 | self.trainable_params = list(self.model.parameters()) 38 | 39 | # Placeholders 40 | # ------------------------ 41 | self.obs_var = Variable(torch.randn(m), requires_grad=False) 42 | self.hidden_state_var = Variable(torch.randn( # generate an variable to store values 43 | (LSTM_layer, 1, self.model.rnn_cell.hidden_size)), requires_grad=False) 44 | self.cell_state_var = Variable(torch.randn( 45 | (LSTM_layer, 1, self.model.rnn_cell.hidden_size)), requires_grad=False) 46 | 47 | # get_action is used for executions 48 | def get_action(self, observation, hidden_state): 49 | o = np.float32(observation.reshape(1, 1, -1)) 50 | self.obs_var.data = torch.from_numpy(o) 51 | # print(hidden_state[0].shape) 52 | self.hidden_state_var.data = torch.from_numpy(np.float32(hidden_state[0])) 53 | self.cell_state_var.data = torch.from_numpy(np.float32(hidden_state[1])) 54 | # print(self.hidden_state_var.shape) 55 | mean, hidden_state = self.model.predict(self.obs_var, (self.hidden_state_var, self.cell_state_var)) 56 | mean = mean.data.numpy().ravel() 57 | new_hidden_state = hidden_state[0].data.numpy() 58 | new_cell_state = hidden_state[1].data.numpy() 59 | hidden_state = (new_hidden_state, new_cell_state) # these are needed for the next time step. 60 | #Since the input is the obs at each time step, we have to manually pass the hidden state and the cell state 61 | return mean, hidden_state 62 | -------------------------------------------------------------------------------- /Reorientation/utils/gnn_networks.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | class RelationalModel(nn.Module): 6 | def __init__(self, input_size, model_size, nonlinearity='tanh', seed = 100): 7 | super(RelationalModel, self).__init__() 8 | 9 | self.layer_sizes = model_size 10 | self.layer_sizes.insert(0, input_size) 11 | torch.manual_seed(seed) 12 | 13 | self.fc_layers = nn.ModuleList([nn.Linear(self.layer_sizes[i], self.layer_sizes[i+1]) \ 14 | for i in range(len(self.layer_sizes) -1)]) # stack severeal layers together. 15 | self.nonlinearity = torch.relu if nonlinearity == 'relu' else torch.tanh 16 | 17 | def forward(self, x): 18 | ''' 19 | Args: 20 | x: [batch_size, n_relations, input_size] 21 | Returns: 22 | [batch_size, n_relations, output_size] 23 | ''' 24 | if x.is_cuda: 25 | out = x.to('cpu') 26 | else: 27 | out = x 28 | for i in range(len(self.fc_layers)): 29 | out = self.fc_layers[i](out) 30 | out = self.nonlinearity(out) 31 | return out 32 | 33 | class ObjectModel(nn.Module): 34 | def __init__(self, input_size, model_size, nonlinearity='tanh', seed = 100): 35 | super(ObjectModel, self).__init__() 36 | 37 | self.layer_sizes = model_size 38 | self.layer_sizes.insert(0, input_size) 39 | torch.manual_seed(seed) 40 | 41 | self.fc_layers = nn.ModuleList([nn.Linear(self.layer_sizes[i], self.layer_sizes[i+1]) \ 42 | for i in range(len(self.layer_sizes) -1)]) # stack severeal layers together. 43 | self.nonlinearity = torch.relu if nonlinearity == 'relu' else torch.tanh 44 | 45 | def forward(self, x): 46 | ''' 47 | Args: 48 | x: [batch_size, n_objects, input_size] 49 | Returns: 50 | [batch_size * n_objects, 2] speedX and speedY 51 | ''' 52 | if x.is_cuda: 53 | out = x.to('cpu') 54 | else: 55 | out = x 56 | for i in range(len(self.fc_layers) - 1): 57 | out = self.fc_layers[i](out) 58 | out = self.nonlinearity(out) 59 | out = self.fc_layers[-1](out) 60 | return out 61 | 62 | class InteractionNetwork(nn.Module): 63 | def __init__(self, object_dim, relation_dim, relation_model, object_model, nonlinearity, seed): 64 | super(InteractionNetwork, self).__init__() 65 | self.relational_model = RelationalModel(2 * object_dim + relation_dim, relation_model, nonlinearity, seed) 66 | # object_model defines the output dimension of the encoder(dim of lifted space)/decoder(dim of original space) 67 | self.object_model = ObjectModel(object_dim + relation_model[-1], object_model, nonlinearity, seed) 68 | 69 | def forward(self, objects, sender_relations, receiver_relations, relation_info, flag): 70 | # in our case, the variable objects consists of palm, each finger and the manipulated object 71 | if not flag: 72 | z_t = objects[:,0,:] 73 | z_t_1 = objects[:,1,:] 74 | senders_t = sender_relations.permute(0, 2, 1).bmm(z_t) 75 | senders_t_1 = sender_relations.permute(0, 2, 1).bmm(z_t_1) 76 | receivers_t = receiver_relations.permute(0, 2, 1).bmm(z_t) 77 | receivers_t_1 = receiver_relations.permute(0, 2, 1).bmm(z_t_1) 78 | effects_t = self.relational_model(torch.cat([senders_t, receivers_t, relation_info], 2)) # cat: 2 -> Concatenate along the second axis 79 | effects_t_1 = self.relational_model(torch.cat([senders_t_1, receivers_t_1, relation_info], 2)) # cat: 2 -> Concatenate along the second axis 80 | effect_receivers_t = receiver_relations.bmm(effects_t) 81 | effect_receivers_t_1 = receiver_relations.bmm(effects_t_1) 82 | predicted_t = self.object_model(torch.cat([z_t, effect_receivers_t], 2))[:,None,:] 83 | predicted_t_1 = self.object_model(torch.cat([z_t_1, effect_receivers_t_1], 2))[:,None,:] 84 | return torch.cat([predicted_t, predicted_t_1], 1) 85 | else: 86 | z_t = objects 87 | senders_t = sender_relations.permute(0, 2, 1).bmm(z_t) 88 | receivers_t = receiver_relations.permute(0, 2, 1).bmm(z_t) 89 | effects_t = self.relational_model(torch.cat([senders_t, receivers_t, relation_info], 2)) # cat: 2 -> Concatenate along the second axis 90 | effect_receivers_t = receiver_relations.bmm(effects_t) 91 | predicted_t = self.object_model(torch.cat([z_t, effect_receivers_t], 2)) 92 | return predicted_t 93 | 94 | -------------------------------------------------------------------------------- /Reorientation/utils/gym_env.py: -------------------------------------------------------------------------------- 1 | """ 2 | Wrapper around a gym env that provides convenience functions 3 | """ 4 | 5 | import gym 6 | import numpy as np 7 | from utils.rnn_network import RNNNetwork 8 | import pickle 9 | from tqdm import tqdm 10 | import copy 11 | from utils.coord_trans import ori_transform, ori_transform_inverse 12 | from utils.quatmath import euler2quat 13 | 14 | class EnvSpec(object): 15 | def __init__(self, obs_dim, act_dim, horizon): 16 | self.observation_dim = obs_dim 17 | self.action_dim = act_dim 18 | self.horizon = horizon 19 | 20 | 21 | class GymEnv(object): 22 | def __init__(self, env, env_kwargs=None, 23 | obs_mask=None, act_repeat=1, 24 | *args, **kwargs): 25 | 26 | # get the correct env behavior 27 | if type(env) == str: 28 | env = gym.make(env) # generare the mojuco env 29 | elif isinstance(env, gym.Env): 30 | env = env 31 | elif callable(env): 32 | env = env(**env_kwargs) 33 | else: 34 | print("Unsupported environment format") 35 | raise AttributeError 36 | 37 | self.env = env 38 | self.env_id = env.spec.id 39 | self.act_repeat = act_repeat 40 | try: 41 | self._horizon = env.spec.max_episode_steps # max_episode_steps is defnied in the __init__.py file (under ) 42 | except AttributeError: 43 | self._horizon = env.spec._horizon 44 | assert self._horizon % act_repeat == 0 45 | self._horizon = self._horizon // self.act_repeat 46 | 47 | try: 48 | self._action_dim = self.env.env.action_dim 49 | except AttributeError: 50 | self._action_dim = self.env.action_space.shape[0] 51 | 52 | try: 53 | self._observation_dim = self.env.env.obs_dim 54 | except AttributeError: 55 | self._observation_dim = self.env.observation_space.shape[0] 56 | 57 | # Specs 58 | self.spec = EnvSpec(self._observation_dim, self._action_dim, self._horizon) 59 | 60 | # obs mask 61 | self.obs_mask = np.ones(self._observation_dim) if obs_mask is None else obs_mask 62 | 63 | @property 64 | def action_dim(self): 65 | return self._action_dim 66 | 67 | @property 68 | def observation_dim(self): 69 | return self._observation_dim 70 | 71 | @property 72 | def observation_space(self): 73 | return self.env.observation_space 74 | 75 | @property 76 | def action_space(self): # each env has defined a action space 77 | return self.env.action_space 78 | 79 | @property 80 | def horizon(self): 81 | return self._horizon 82 | 83 | def reset(self, seed=None): 84 | try: 85 | self.env._elapsed_steps = 0 86 | return self.env.env.reset_model(seed=seed) 87 | except: 88 | if seed is not None: 89 | self.set_seed(seed) 90 | return self.env.reset() 91 | 92 | def reset4Koopman(self, seed=None, ori=None, init_pos=None, init_vel=None): 93 | try: 94 | self.env._elapsed_steps = 0 95 | return self.env.env.reset_model4Koopman(seed=seed, ori = ori, init_pos = init_pos, init_vel = init_vel) 96 | except: 97 | if seed is not None: 98 | self.set_seed(seed) 99 | return self.env.reset_model4Koopman(ori = ori, init_pos = init_pos, init_vel = init_vel) 100 | 101 | def KoopmanVisualize(self, seed=None, state_dict=None): 102 | try: 103 | self.env._elapsed_steps = 0 104 | return self.env.env.KoopmanVisualize(seed=seed, state_dict=state_dict) 105 | except: 106 | if seed is not None: 107 | self.set_seed(seed) 108 | return self.env.KoopmanVisualize(state_dict=state_dict) 109 | 110 | def reset_model(self, seed=None): 111 | # overloading for legacy code 112 | return self.reset(seed) 113 | 114 | def step(self, action): 115 | action = action.clip(self.action_space.low, self.action_space.high) 116 | # type(action_space) -> 117 | # self.action_space.low -> numpy.ndarray(lowest boundary) 118 | # self.action_space.high -> numpy.ndarray(highest boundary) 119 | if self.act_repeat == 1: 120 | obs, cum_reward, done, ifo = self.env.step(action) # the system dynamics is defined in each separate env python file 121 | # if(ifo['goal_achieved']): 122 | # print("done: ", ifo) 123 | # Run one timestep of the environment’s dynamics. 124 | else: 125 | cum_reward = 0.0 126 | for i in range(self.act_repeat): 127 | obs, reward, done, ifo = self.env.step(action) # the actual operations can be found in the env files 128 | # seems done is always set to be False 129 | cum_reward += reward 130 | if done: break 131 | return self.obs_mask * obs, cum_reward, done, ifo 132 | 133 | def render(self): 134 | try: 135 | self.env.env.mujoco_render_frames = True 136 | self.env.env.mj_render() 137 | except: 138 | self.env.render() 139 | 140 | def set_seed(self, seed=123): 141 | try: 142 | self.env.seed(seed) 143 | except AttributeError: 144 | self.env._seed(seed) 145 | 146 | def get_obs(self): 147 | try: 148 | return self.obs_mask * self.env.env.get_obs() 149 | except: 150 | return self.obs_mask * self.env.env._get_obs() 151 | 152 | def get_env_infos(self): 153 | try: 154 | return self.env.env.get_env_infos() 155 | except: 156 | return {} 157 | 158 | # =========================================== 159 | # Trajectory optimization related 160 | # Envs should support these functions in case of trajopt 161 | 162 | def get_env_state(self): 163 | try: 164 | return self.env.env.get_env_state() 165 | except: 166 | raise NotImplementedError 167 | 168 | def set_env_state(self, state_dict): 169 | try: 170 | self.env.env.set_env_state(state_dict) 171 | except: 172 | raise NotImplementedError 173 | 174 | def real_env_step(self, bool_val): 175 | try: 176 | self.env.env.real_step = bool_val 177 | except: 178 | raise NotImplementedError 179 | 180 | def visualize_policy_on_demos(self, policy, demos, Visualize, horizon=1000): 181 | print("Testing the RL agent!") 182 | self.reset() 183 | init_state_dict = dict() 184 | demo_ori_error = np.zeros([horizon - 1, len(demos)]) 185 | success_threshold = 10 186 | success_list_RL = [] 187 | fall_list_RL = [] 188 | success_rate = str() 189 | for k in tqdm(range(len(demos))): 190 | success_count_RL = np.zeros(horizon - 1) 191 | init_state_dict['qpos'] = np.append(demos[k]['handpos'], np.zeros(6)) 192 | # For hand control test, we can set the initial pos of pen to be (0.15, 0, 0, 0, 0, 0), so that there is no contact. 193 | # init_state_dict['qpos'][num_handpos] = 0.15 194 | init_state_dict['qvel'] = np.append(demos[k]['handvel'], np.zeros(6)) 195 | init_state_dict['desired_orien'] = euler2quat(demos[k]['pen_desired_orien']) 196 | self.set_env_state(init_state_dict) 197 | o = demos[k]['o'] 198 | if True: 199 | # generate the hidden states at time 0 200 | hidden_state = (np.zeros((1, 1, policy.model.rnn_cell.hidden_size)), np.zeros((1, 1, policy.model.rnn_cell.hidden_size))) 201 | for t in range(horizon - 1): # this loop is for system evolution, open loop control, no feedback 202 | if True: 203 | a = policy.get_action(o, hidden_state) 204 | hidden_state = a[1]['hidden_state'] 205 | else: 206 | a = policy.get_action(o) 207 | a =a[1]['evaluation'] 208 | o, *_ = self.step(a) 209 | if Visualize: 210 | self.render() 211 | # calculated state values using Koopman rollouts (in simulator) 212 | obj_obs = self.get_obs() 213 | obj_vel = self.get_obs()[27:33] 214 | orien_similarity_RL = np.dot(obj_obs[33:36], obj_obs[36:39]) 215 | dist = np.linalg.norm(obj_obs[39:42]) 216 | success_count_RL[t] = 1 if (orien_similarity_RL > 0.90) else 0 217 | # compute the errors 218 | demo_ori_error[t, k] = np.mean(np.abs(obj_obs[42:45])) # obj_orien-desired_orien (goal error) 219 | if np.abs(obj_obs[39:42])[2] > 0.15: 220 | fall_list_RL.append(1) 221 | else: 222 | if sum(success_count_RL) > success_threshold and np.mean(np.abs(obj_vel)) < 1.: 223 | success_list_RL.append(1) 224 | print("Success rate (RL) = %f" % (len(success_list_RL) / len(demos))) 225 | print("Throw out rate (RL) = %f" % (len(fall_list_RL) / len(demos))) 226 | success_rate += "Success rate (RL) = %f" % (len(success_list_RL) / len(demos)) 227 | success_rate += "Throw out rate (RL) = %f" % (len(fall_list_RL) / len(demos)) 228 | return demo_ori_error, success_rate 229 | 230 | def generate_unseen_data(self, number_sample): 231 | samples = [] 232 | for ep in range(number_sample): 233 | o, desired_orien = self.reset(seed = ep) 234 | episode_data = {} 235 | episode_data['init_state_dict'] = copy.deepcopy(self.get_env_state()) 236 | episode_data['pen_desired_orien'] = desired_orien # record the goal orientation angle 237 | episode_data['o'] = o 238 | handpos = o[:24] 239 | episode_data['handpos'] = handpos 240 | hand_vel = self.env.get_hand_vel() 241 | episode_data['handvel'] = hand_vel 242 | objpos = o[24:27] 243 | episode_data['objpos'] = objpos 244 | objvel = o[27:33] 245 | episode_data['objvel'] = objvel 246 | episode_data['desired_ori'] = o[36:39] 247 | objorient = o[33:36] 248 | episode_data['objorient'] = ori_transform(objorient, episode_data['desired_ori']) 249 | samples.append(episode_data) 250 | return samples 251 | -------------------------------------------------------------------------------- /Reorientation/utils/quatmath.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | # For testing whether a number is close to zero 3 | _FLOAT_EPS = np.finfo(np.float64).eps 4 | _EPS4 = _FLOAT_EPS * 4.0 5 | 6 | 7 | def mulQuat(qa, qb): 8 | res = np.zeros(4) 9 | res[0] = qa[0]*qb[0] - qa[1]*qb[1] - qa[2]*qb[2] - qa[3]*qb[3] 10 | res[1] = qa[0]*qb[1] + qa[1]*qb[0] + qa[2]*qb[3] - qa[3]*qb[2] 11 | res[2] = qa[0]*qb[2] - qa[1]*qb[3] + qa[2]*qb[0] + qa[3]*qb[1] 12 | res[3] = qa[0]*qb[3] + qa[1]*qb[2] - qa[2]*qb[1] + qa[3]*qb[0] 13 | return res 14 | 15 | def negQuat(quat): 16 | return np.array([quat[0], -quat[1], -quat[2], -quat[3]]) 17 | 18 | def quat2Vel(quat, dt=1): 19 | axis = quat[1:].copy() 20 | sin_a_2 = np.sqrt(np.sum(axis**2)) 21 | axis = axis/(sin_a_2+1e-8) 22 | speed = 2*np.arctan2(sin_a_2, quat[0])/dt 23 | return speed, axis 24 | 25 | def quatDiff2Vel(quat1, quat2, dt): 26 | neg = negQuat(quat1) 27 | diff = mulQuat(quat2, neg) 28 | return quat2Vel(diff, dt) 29 | 30 | 31 | def axis_angle2quat(axis, angle): 32 | c = np.cos(angle/2) 33 | s = np.sin(angle/2) 34 | return np.array([c, s*axis[0], s*axis[1], s*axis[2]]) 35 | 36 | def euler2mat(euler): 37 | """ Convert Euler Angles to Rotation Matrix. See rotation.py for notes """ 38 | euler = np.asarray(euler, dtype=np.float64) 39 | assert euler.shape[-1] == 3, "Invalid shaped euler {}".format(euler) 40 | 41 | ai, aj, ak = -euler[..., 2], -euler[..., 1], -euler[..., 0] 42 | si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak) 43 | ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak) 44 | cc, cs = ci * ck, ci * sk 45 | sc, ss = si * ck, si * sk 46 | 47 | mat = np.empty(euler.shape[:-1] + (3, 3), dtype=np.float64) 48 | mat[..., 2, 2] = cj * ck 49 | mat[..., 2, 1] = sj * sc - cs 50 | mat[..., 2, 0] = sj * cc + ss 51 | mat[..., 1, 2] = cj * sk 52 | mat[..., 1, 1] = sj * ss + cc 53 | mat[..., 1, 0] = sj * cs - sc 54 | mat[..., 0, 2] = -sj 55 | mat[..., 0, 1] = cj * si 56 | mat[..., 0, 0] = cj * ci 57 | return mat 58 | 59 | 60 | def euler2quat(euler): 61 | """ Convert Euler Angles to Quaternions. See rotation.py for notes """ 62 | euler = np.asarray(euler, dtype=np.float64) 63 | assert euler.shape[-1] == 3, "Invalid shape euler {}".format(euler) 64 | 65 | ai, aj, ak = euler[..., 2] / 2, -euler[..., 1] / 2, euler[..., 0] / 2 66 | si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak) 67 | ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak) 68 | cc, cs = ci * ck, ci * sk 69 | sc, ss = si * ck, si * sk 70 | 71 | quat = np.empty(euler.shape[:-1] + (4,), dtype=np.float64) 72 | quat[..., 0] = cj * cc + sj * ss 73 | quat[..., 3] = cj * sc - sj * cs 74 | quat[..., 2] = -(cj * ss + sj * cc) 75 | quat[..., 1] = cj * cs - sj * sc 76 | return quat 77 | 78 | 79 | def mat2euler(mat): 80 | """ Convert Rotation Matrix to Euler Angles. See rotation.py for notes """ 81 | mat = np.asarray(mat, dtype=np.float64) 82 | assert mat.shape[-2:] == (3, 3), "Invalid shape matrix {}".format(mat) 83 | 84 | cy = np.sqrt(mat[..., 2, 2] * mat[..., 2, 2] + mat[..., 1, 2] * mat[..., 1, 2]) 85 | condition = cy > _EPS4 86 | euler = np.empty(mat.shape[:-1], dtype=np.float64) 87 | euler[..., 2] = np.where(condition, 88 | -np.arctan2(mat[..., 0, 1], mat[..., 0, 0]), 89 | -np.arctan2(-mat[..., 1, 0], mat[..., 1, 1])) 90 | euler[..., 1] = np.where(condition, 91 | -np.arctan2(-mat[..., 0, 2], cy), 92 | -np.arctan2(-mat[..., 0, 2], cy)) 93 | euler[..., 0] = np.where(condition, 94 | -np.arctan2(mat[..., 1, 2], mat[..., 2, 2]), 95 | 0.0) 96 | return euler 97 | 98 | 99 | def mat2quat(mat): 100 | """ Convert Rotation Matrix to Quaternion. See rotation.py for notes """ 101 | mat = np.asarray(mat, dtype=np.float64) 102 | assert mat.shape[-2:] == (3, 3), "Invalid shape matrix {}".format(mat) 103 | 104 | Qxx, Qyx, Qzx = mat[..., 0, 0], mat[..., 0, 1], mat[..., 0, 2] 105 | Qxy, Qyy, Qzy = mat[..., 1, 0], mat[..., 1, 1], mat[..., 1, 2] 106 | Qxz, Qyz, Qzz = mat[..., 2, 0], mat[..., 2, 1], mat[..., 2, 2] 107 | # Fill only lower half of symmetric matrix 108 | K = np.zeros(mat.shape[:-2] + (4, 4), dtype=np.float64) 109 | K[..., 0, 0] = Qxx - Qyy - Qzz 110 | K[..., 1, 0] = Qyx + Qxy 111 | K[..., 1, 1] = Qyy - Qxx - Qzz 112 | K[..., 2, 0] = Qzx + Qxz 113 | K[..., 2, 1] = Qzy + Qyz 114 | K[..., 2, 2] = Qzz - Qxx - Qyy 115 | K[..., 3, 0] = Qyz - Qzy 116 | K[..., 3, 1] = Qzx - Qxz 117 | K[..., 3, 2] = Qxy - Qyx 118 | K[..., 3, 3] = Qxx + Qyy + Qzz 119 | K /= 3.0 120 | # TODO: vectorize this -- probably could be made faster 121 | q = np.empty(K.shape[:-2] + (4,)) 122 | it = np.nditer(q[..., 0], flags=['multi_index']) 123 | while not it.finished: 124 | # Use Hermitian eigenvectors, values for speed 125 | vals, vecs = np.linalg.eigh(K[it.multi_index]) 126 | # Select largest eigenvector, reorder to w,x,y,z quaternion 127 | q[it.multi_index] = vecs[[3, 0, 1, 2], np.argmax(vals)] 128 | # Prefer quaternion with positive w 129 | # (q * -1 corresponds to same rotation as q) 130 | if q[it.multi_index][0] < 0: 131 | q[it.multi_index] *= -1 132 | it.iternext() 133 | return q 134 | 135 | 136 | def quat2euler(quat): 137 | """ Convert Quaternion to Euler Angles. See rotation.py for notes """ 138 | return mat2euler(quat2mat(quat)) 139 | 140 | 141 | def quat2mat(quat): 142 | """ Convert Quaternion to Euler Angles. See rotation.py for notes """ 143 | quat = np.asarray(quat, dtype=np.float64) 144 | assert quat.shape[-1] == 4, "Invalid shape quat {}".format(quat) 145 | 146 | w, x, y, z = quat[..., 0], quat[..., 1], quat[..., 2], quat[..., 3] 147 | Nq = np.sum(quat * quat, axis=-1) 148 | s = 2.0 / Nq 149 | X, Y, Z = x * s, y * s, z * s 150 | wX, wY, wZ = w * X, w * Y, w * Z 151 | xX, xY, xZ = x * X, x * Y, x * Z 152 | yY, yZ, zZ = y * Y, y * Z, z * Z 153 | 154 | mat = np.empty(quat.shape[:-1] + (3, 3), dtype=np.float64) 155 | mat[..., 0, 0] = 1.0 - (yY + zZ) 156 | mat[..., 0, 1] = xY - wZ 157 | mat[..., 0, 2] = xZ + wY 158 | mat[..., 1, 0] = xY + wZ 159 | mat[..., 1, 1] = 1.0 - (xX + zZ) 160 | mat[..., 1, 2] = yZ - wX 161 | mat[..., 2, 0] = xZ - wY 162 | mat[..., 2, 1] = yZ + wX 163 | mat[..., 2, 2] = 1.0 - (xX + yY) 164 | return np.where((Nq > _FLOAT_EPS)[..., np.newaxis, np.newaxis], mat, np.eye(3)) -------------------------------------------------------------------------------- /Reorientation/utils/rnn_network.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from torch.autograd import Variable 5 | 6 | class RNNNetwork(nn.Module): 7 | def __init__(self, rnn_cell, 8 | m, 9 | n): 10 | """ 11 | :param rnn_cell: RNN (NCP) module 12 | """ 13 | # nn.Module that unfolds a RNN cell into a sequence 14 | super(RNNNetwork, self).__init__() 15 | self.rnn_cell = rnn_cell 16 | self.obs_dim = m 17 | self.act_dim = n 18 | # follow by a fc_layer 19 | self.fc_input_layer = nn.Linear(in_features=self.obs_dim, out_features=rnn_cell.input_size) # map observation_dim to RNN_input dim 20 | self.fc_output_layer = nn.Linear(in_features=rnn_cell.hidden_size, out_features=self.act_dim) 21 | 22 | def forward(self, x): # this is used for training 23 | device = x.device 24 | batch_size = x.size(0) 25 | # hidden states were set to be zeros at time 0 26 | hidden_state = ( # h_{0} and c_{0} 27 | torch.zeros((self.rnn_cell.num_layers, batch_size, self.rnn_cell.hidden_size), device=device), 28 | torch.zeros((self.rnn_cell.num_layers, batch_size, self.rnn_cell.hidden_size), device=device) 29 | ) 30 | x = self.fc_input_layer(x.float()) # (batch_size, seq_len, self.rnn_cell.input_size) 31 | outputs, hidden_state = self.rnn_cell(x, hidden_state) 32 | # output.shape = (batch_size, seq_len, self.rnn_cell.hidden_size) 33 | # output -> hidden_state at each time step (the top layer if multiple layers), and we use this as the output data 34 | # hidden_state = (hidden_state[0] = h_{T} and hidden_state[1] = c_{T}) at last time step T, if there are multiple layers, output are the h_{t} for the top layers 35 | outputs = self.fc_output_layer(outputs) #(batch_size, seq_len, act_dim) 36 | return outputs 37 | 38 | def predict(self, observation, hidden_state): 39 | # for execution, so batch_size is always 1. Not for training 40 | observation = self.fc_input_layer(observation) 41 | output, hidden_state = self.rnn_cell(observation.view(1, 1, -1), hidden_state) 42 | output = self.fc_output_layer(output) 43 | return output, hidden_state -------------------------------------------------------------------------------- /Tool/Data/Hammer_task.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GT-STAR-Lab/KODex/b058159f59baeff37437672e49f5d73406d4a5a6/Tool/Data/Hammer_task.pickle -------------------------------------------------------------------------------- /Tool/Results/Controller/NN_controller_best.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GT-STAR-Lab/KODex/b058159f59baeff37437672e49f5d73406d4a5a6/Tool/Results/Controller/NN_controller_best.pt -------------------------------------------------------------------------------- /Tool/Results/Expert_policy/best_policy.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GT-STAR-Lab/KODex/b058159f59baeff37437672e49f5d73406d4a5a6/Tool/Results/Expert_policy/best_policy.pickle -------------------------------------------------------------------------------- /Tool/Results/KODex/koopmanMatrix.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GT-STAR-Lab/KODex/b058159f59baeff37437672e49f5d73406d4a5a6/Tool/Results/KODex/koopmanMatrix.npy -------------------------------------------------------------------------------- /Tool/utils/Controller.py: -------------------------------------------------------------------------------- 1 | """ 2 | Define the controllers 3 | """ 4 | import numpy as np 5 | 6 | class PID(): 7 | def __init__(self, Kp, Ki, Kd, setpoint = None): 8 | self.Kp, self.Ki, self.Kd = Kp, Ki, Kd 9 | self.setpoint = setpoint 10 | self._last_input = None 11 | self._last_error = None 12 | 13 | def __call__(self, input): # input: current hand joints 14 | ''' 15 | Call the PID controller with *input* and calculate and return a control output 16 | ''' 17 | error = self.setpoint - input 18 | d_input = input - (self._last_input if (self._last_input is not None) else input) 19 | self._proportional = self.Kp * error # P term 20 | # self._integral += self.Ki * error * dt 21 | # self._integral = _clamp(self._integral, self.output_limits) # Avoid integral windup 22 | self._derivative = -self.Kd * d_input #/ dt 23 | output = self._proportional + self._derivative 24 | self._last_output = output 25 | self._last_input = input 26 | self._last_error = error 27 | return output 28 | 29 | def set_goal(self, setpoint): # set target joint state 30 | self.setpoint = setpoint 31 | -------------------------------------------------------------------------------- /Tool/utils/Observables.py: -------------------------------------------------------------------------------- 1 | """ 2 | Define the functions that are used for koopman lifting 3 | """ 4 | import numpy as np 5 | from utils.fc_network import FCNetwork 6 | from utils.gnn_networks import InteractionNetwork 7 | import copy 8 | import torch 9 | 10 | class DraftedObservable(object): 11 | def __init__(self, num_handStates, num_objStates): 12 | self.num_ori_handStates = num_handStates 13 | self.num_ori_objStates = num_objStates 14 | self.num_ori_states = num_handStates + num_objStates 15 | 16 | def z(self, handState, objStates): 17 | """ 18 | Inputs: hand states(pos, vel) & object states(pos, vel) 19 | Outputs: state in lifted space 20 | Note: velocity is optional 21 | """ 22 | obs = np.zeros(self.compute_observable(self.num_ori_handStates, self.num_ori_objStates)) 23 | index = 0 24 | for i in range(self.num_ori_handStates): 25 | obs[index] = handState[i] 26 | index += 1 27 | for i in range(self.num_ori_handStates): 28 | obs[index] = handState[i] ** 2 29 | index += 1 30 | for i in range(self.num_ori_objStates): 31 | obs[index] = objStates[i] 32 | index += 1 33 | for i in range(self.num_ori_objStates): 34 | obs[index] = objStates[i] ** 2 35 | index += 1 36 | for i in range(self.num_ori_objStates): 37 | for j in range(i + 1, self.num_ori_objStates): 38 | obs[index] = objStates[i] * objStates[j] 39 | index += 1 40 | for i in range(self.num_ori_handStates): 41 | for j in range(i + 1, self.num_ori_handStates): 42 | obs[index] = handState[i] * handState[j] 43 | index += 1 44 | # can add handState[i] ** 3 45 | for i in range(self.num_ori_handStates): 46 | obs[index] = handState[i] ** 3 47 | index += 1 48 | # for i in range(self.num_ori_objStates): 49 | # obs[index] = objStates[i] ** 3 50 | # index += 1 51 | # for i in range(self.num_ori_handStates): 52 | # for j in range(self.num_ori_handStates): 53 | # obs[index] = handState[i] ** 2 * handState[j] 54 | # index += 1 55 | for i in range(self.num_ori_objStates): 56 | for j in range(self.num_ori_objStates): 57 | obs[index] = objStates[i] ** 2 * objStates[j] 58 | index += 1 59 | return obs 60 | 61 | def compute_observable(self, num_hand, num_obj): 62 | return int(2 * (num_hand + num_obj) + (num_obj - 1) * num_obj / 2 + num_hand + num_obj ** 2 + (num_hand - 1) * num_hand / 2) 63 | 64 | class MLPObservable(object): 65 | def __init__(self, num_handStates, num_objStates, param): 66 | self.param = param 67 | self.num_ori_states = num_handStates + num_objStates 68 | self.encoder = [i for i in param['encoder']] 69 | self.encoder.insert(0, self.num_ori_states) 70 | self.decoder = [i for i in param['decoder']] 71 | self.decoder.append(self.num_ori_states) 72 | self.encoder_NN = FCNetwork(self.encoder, param['nonlinearity'], param['encoder_seed']) 73 | self.decoder_NN = FCNetwork(self.decoder, param['nonlinearity'], param['decoder_seed']) 74 | self.trainable_params = list(self.encoder_NN.parameters()) + list(self.decoder_NN.parameters()) 75 | 76 | def count_parameters(self): 77 | return sum(p.numel() for p in self.trainable_params if p.requires_grad) 78 | 79 | def z(self, input_state): 80 | """ 81 | Inputs: original states: hand states(pos, vel) & object states(pos, vel) 82 | Outputs: state in lifted space 83 | Note: velocity is optional 84 | """ 85 | z = self.encoder_NN(input_state) 86 | return z 87 | 88 | def z_inverse(self, liftedStates): 89 | """ 90 | Inputs: state in lifted space 91 | Outputs: original states: hand states(pos, vel) & object states(pos, vel) 92 | Note: velocity is optional 93 | """ 94 | x = self.decoder_NN(liftedStates) 95 | return x 96 | 97 | def train(self): # set the models in training mode 98 | self.encoder_NN.train() 99 | self.decoder_NN.train() 100 | 101 | def eval(self): # set the models in evaluation mode 102 | self.encoder_NN.eval() 103 | self.decoder_NN.eval() 104 | 105 | def loss(self, batch_data, encoder_lambda, pred_lambda): # the loss consists of three part: 106 | # 1). Intrinsic coordinates transformation loss, 2). Linear dynamics loss, 3). some others 107 | out = {} 108 | lifted_data = self.z(batch_data) 109 | batch_data_inverse = self.z_inverse(lifted_data) 110 | self.encoder_loss_criterion = torch.nn.MSELoss() 111 | self.encoder_loss = encoder_lambda * self.encoder_loss_criterion(batch_data_inverse, batch_data.detach().to(torch.float32)) # the auto-encoder loss 112 | out['Encoder_loss_torch'] = self.encoder_loss.item() 113 | batch_data_inverse_numpy = batch_data_inverse.detach().numpy() 114 | batch_data_numpy = batch_data.detach().numpy() 115 | Encoder_loss = np.abs(batch_data_numpy - batch_data_inverse_numpy) 116 | out['Encoder_loss_numpy'] = Encoder_loss 117 | self.pred_loss_criterion = torch.nn.MSELoss() 118 | for i in range(lifted_data.shape[0]): 119 | if i == 0: 120 | A = torch.outer(lifted_data[i,1,:], lifted_data[i,0,:]) / lifted_data.shape[0] 121 | G = torch.outer(lifted_data[i,0,:], lifted_data[i,0,:]) / lifted_data.shape[0] 122 | else: 123 | A += torch.outer(lifted_data[i,1,:], lifted_data[i,0,:]) / lifted_data.shape[0] 124 | G += torch.outer(lifted_data[i,0,:], lifted_data[i,0,:]) / lifted_data.shape[0] 125 | koopman_operator = torch.matmul(A, torch.linalg.pinv(G)) # using the batch data, compute the koopman matrix 126 | Linear_evol = torch.matmul(lifted_data[:,0,:], koopman_operator) 127 | self.pred_loss = pred_lambda * self.pred_loss_criterion(Linear_evol, lifted_data[:,1,:]) # the prediction loss 128 | out['Pred_loss_torch'] = self.pred_loss.item() 129 | Linear_evol_numpy = Linear_evol.detach().numpy() 130 | lifted_data_numpy = lifted_data[:,1,:].detach().numpy() 131 | pred_loss = np.abs(lifted_data_numpy - Linear_evol_numpy) 132 | out['Pred_loss_numpy'] = pred_loss 133 | loss = self.encoder_loss + self.pred_loss # loss is the sum of each component -> this one is used for pytorch training 134 | return loss, out, koopman_operator 135 | 136 | def load(self, encoder_path, decoder_path): 137 | self.encoder_NN.load_state_dict(torch.load(encoder_path)) 138 | self.decoder_NN.load_state_dict(torch.load(decoder_path)) 139 | 140 | class GNNObservable(object): 141 | def __init__(self, num_objStates, param): 142 | self.gnn_encoder = InteractionNetwork(num_objStates, param['relation_domain'], param['relation_encoder'], param['object_encoder'], param['gnn_nonlinearity'], param['gnn_encoder_seed']) 143 | tmp = param['object_decoder'] 144 | tmp.append(num_objStates) 145 | self.gnn_decoder = InteractionNetwork(param['object_encoder'][-1], param['relation_domain'], param['relation_decoder'], tmp, param['gnn_nonlinearity'], param['gnn_decoder_seed']) 146 | self.trainable_params = list(self.gnn_encoder.parameters()) + list(self.gnn_decoder.parameters()) 147 | self._rollout = False 148 | 149 | def count_parameters(self): 150 | return sum(p.numel() for p in self.trainable_params if p.requires_grad) 151 | 152 | def z(self, input_state, relations, flag): 153 | """ 154 | Inputs: original states: hand states(pos, vel) & object states(pos, vel); defined relations & relation types 155 | Outputs: state in lifted space 156 | Note: velocity is optional 157 | """ 158 | z = self.gnn_encoder(input_state, relations['sender_relations'], relations['receiver_relations'], relations['relation_info'], flag) 159 | return z 160 | 161 | def z_inverse(self, liftedStates, relations, flag): 162 | """ 163 | Inputs: state in lifted space; defined relations & relation types 164 | Outputs: original states: hand states(pos, vel) & object states(pos, vel) 165 | Note: velocity is optional 166 | """ 167 | x = self.gnn_decoder(liftedStates, relations['sender_relations'], relations['receiver_relations'], relations['relation_info'], flag) 168 | return x 169 | 170 | def train(self): # set the models in training mode 171 | self.gnn_encoder.train() 172 | self.gnn_decoder.train() 173 | 174 | def eval(self): # set the models in evaluation mode 175 | self.gnn_encoder.eval() 176 | self.gnn_decoder.eval() 177 | 178 | def set_status(self, flag): 179 | if flag == 0: 180 | self._rollout = True 181 | 182 | def loss(self, batch_data, relations, encoder_lambda, pred_lambda): # the loss consists of three part: 183 | batch_size = batch_data.shape[0] 184 | out = {} 185 | lifted_data = self.z(batch_data, relations, self._rollout) 186 | # .view(effect_receivers_t_1.shape[0], -1) 187 | batch_data_inverse = self.z_inverse(lifted_data, relations, self._rollout) 188 | self.encoder_loss_criterion = torch.nn.MSELoss() 189 | self.encoder_loss = encoder_lambda * self.encoder_loss_criterion(batch_data_inverse, batch_data.detach()) # the auto-encoder loss 190 | out['Encoder_loss_torch'] = self.encoder_loss.item() 191 | batch_data_inverse_numpy = batch_data_inverse.detach().numpy() 192 | batch_data_numpy = batch_data.detach().numpy() 193 | Encoder_loss = np.abs(batch_data_numpy - batch_data_inverse_numpy) 194 | out['Encoder_loss_numpy'] = Encoder_loss 195 | self.pred_loss_criterion = torch.nn.MSELoss() 196 | lifted_data = lifted_data.view(batch_size, 2, -1) # Squeeze along the last 2 axis 197 | for i in range(lifted_data.shape[0]): 198 | if i == 0: 199 | A = torch.outer(lifted_data[i,1,:], lifted_data[i,0,:]) / lifted_data.shape[0] 200 | G = torch.outer(lifted_data[i,0,:], lifted_data[i,0,:]) / lifted_data.shape[0] 201 | else: 202 | A += torch.outer(lifted_data[i,1,:], lifted_data[i,0,:]) / lifted_data.shape[0] 203 | G += torch.outer(lifted_data[i,0,:], lifted_data[i,0,:]) / lifted_data.shape[0] 204 | koopman_operator = torch.matmul(A, torch.linalg.pinv(G)) # using the batch data, compute the koopman matrix 205 | Linear_evol = torch.matmul(lifted_data[:,0,:], koopman_operator) 206 | self.pred_loss = pred_lambda * self.pred_loss_criterion(Linear_evol, lifted_data[:,1,:]) # the prediction loss 207 | out['Pred_loss_torch'] = self.pred_loss.item() 208 | Linear_evol_numpy = Linear_evol.detach().numpy() 209 | lifted_data_numpy = lifted_data[:,1,:].detach().numpy() 210 | pred_loss = np.abs(lifted_data_numpy - Linear_evol_numpy) 211 | out['Pred_loss_numpy'] = pred_loss 212 | loss = self.encoder_loss + self.pred_loss # loss is the sum of each component -> this one is used for pytorch training 213 | return loss, out, koopman_operator 214 | 215 | def load(self, encoder_path, decoder_path): 216 | self.gnn_encoder.load_state_dict(torch.load(encoder_path)) 217 | self.gnn_decoder.load_state_dict(torch.load(decoder_path)) 218 | 219 | def Create_relations(self, batch_size, gnn_num_obj, gnn_num_relation, param, eval_length): 220 | receiver_relations = np.zeros((batch_size, gnn_num_obj, gnn_num_relation)) 221 | sender_relations = np.zeros((batch_size, gnn_num_obj, gnn_num_relation)) 222 | relation_info = np.zeros((batch_size, gnn_num_relation, param['relation_domain'])) 223 | receiver_relations[:, 0, 0:5] = 1 224 | cnt = 0 225 | for i in [1,2,3,4,5]: 226 | sender_relations[:, 0, (cnt * gnn_num_obj) + i] = 1 227 | cnt += 1 228 | receiver_relations[:, 1, 6] = 1 229 | receiver_relations[:, 1, 11] = 1 230 | sender_relations[:, 1, 0] = 1 231 | sender_relations[:, 1, 41] = 1 232 | receiver_relations[:, 2, 12] = 1 233 | receiver_relations[:, 2, 17] = 1 234 | sender_relations[:, 2, 0] = 1 235 | sender_relations[:, 2, 41] = 1 236 | receiver_relations[:, 3, 18] = 1 237 | receiver_relations[:, 3, 23] = 1 238 | sender_relations[:, 3, 0] = 1 239 | sender_relations[:, 3, 41] = 1 240 | receiver_relations[:, 4, 24] = 1 241 | receiver_relations[:, 4, 29] = 1 242 | sender_relations[:, 4, 0] = 1 243 | sender_relations[:, 4, 41] = 1 244 | receiver_relations[:, 5, 30] = 1 245 | receiver_relations[:, 5, 35] = 1 246 | sender_relations[:, 5, 0] = 1 247 | sender_relations[:, 5, 41] = 1 248 | receiver_relations[:, 6, 37:42] = 1 249 | cnt = 0 250 | for i in [1,2,3,4,5]: 251 | sender_relations[:, 6, (cnt * gnn_num_obj) + i] = 1 252 | cnt += 1 253 | relation_info[:, 0:5, 0] = 1 254 | relation_info[:, 6, 0] = 1 255 | relation_info[:, 11, 1] = 1 256 | relation_info[:, 12, 0] = 1 257 | relation_info[:, 17, 1] = 1 258 | relation_info[:, 18, 0] = 1 259 | relation_info[:, 23, 1] = 1 260 | relation_info[:, 24, 0] = 1 261 | relation_info[:, 29, 1] = 1 262 | relation_info[:, 30, 0] = 1 263 | relation_info[:, 35, 1] = 1 264 | relation_info[:, -5:-1, 1] = 1 265 | relation_info[:, -1, 1] = 1 266 | # Todo: check if this part is implemented correctly 267 | relations = {} 268 | relations['receiver_relations'] = torch.from_numpy(receiver_relations).to(torch.float32) 269 | relations['sender_relations'] = torch.from_numpy(sender_relations).to(torch.float32) 270 | relations['relation_info'] = torch.from_numpy(relation_info).to(torch.float32) 271 | relations_eval = {} 272 | receiver_relations_eval = relations['receiver_relations'][0][None,:].repeat(eval_length, 1, 1) 273 | sender_relations_eval = relations['sender_relations'][0][None,:].repeat(eval_length, 1, 1) 274 | relation_info_eval = relations['relation_info'][0][None,:].repeat(eval_length, 1, 1) 275 | relations_eval['receiver_relations'] = receiver_relations_eval 276 | relations_eval['sender_relations'] = sender_relations_eval 277 | relations_eval['relation_info'] = relation_info_eval 278 | return relations, relations_eval 279 | 280 | def Create_states(self): 281 | hand_dict = {} 282 | hand_dict['palm'] = [0, 3] # or [0, 2] 283 | hand_dict['forfinger'] = [3, 7] 284 | hand_dict['middlefinger'] = [7, 11] 285 | hand_dict['ringfinger'] = [11, 15] 286 | hand_dict['littlefinger'] = [15, 19] 287 | hand_dict['thumb'] = [19, 24] 288 | gnn_num_obj = 7 # palm, five fingers, manipulated object 289 | gnn_num_relation = gnn_num_obj * (gnn_num_obj - 1) 290 | return hand_dict, gnn_num_obj, gnn_num_relation -------------------------------------------------------------------------------- /Tool/utils/coord_trans.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.linalg import inv 3 | 4 | def ori_transform(current_ori, desired_ori): 5 | # build a new coordinate frame that define the desired_ori as [1, 0, 0] - use desired_ori to build a new coordinate basis 6 | # and then convert current_ori in that new coordinate frame 7 | x = desired_ori 8 | y, z = compute_orthonormal_vector(x) 9 | origin_x = np.array([1, 0, 0]) 10 | origin_y = np.array([0, 1, 0]) 11 | origin_z = np.array([0, 0, 1]) 12 | Q = np.zeros([3,3]) 13 | Q[0,0] = np.dot(x, origin_x) 14 | Q[0,1] = np.dot(x, origin_y) 15 | Q[0,2] = np.dot(x, origin_z) 16 | Q[1,0] = np.dot(y, origin_x) 17 | Q[1,1] = np.dot(y, origin_y) 18 | Q[1,2] = np.dot(y, origin_z) 19 | Q[2,0] = np.dot(z, origin_x) 20 | Q[2,1] = np.dot(z, origin_y) 21 | Q[2,2] = np.dot(z, origin_z) 22 | return np.dot(Q, current_ori) 23 | 24 | def ori_transform_inverse(current_ori, desired_ori): 25 | x = desired_ori 26 | y, z = compute_orthonormal_vector(x) 27 | origin_x = np.array([1, 0, 0]) 28 | origin_y = np.array([0, 1, 0]) 29 | origin_z = np.array([0, 0, 1]) 30 | Q = np.zeros([3,3]) 31 | Q[0,0] = np.dot(x, origin_x) 32 | Q[0,1] = np.dot(x, origin_y) 33 | Q[0,2] = np.dot(x, origin_z) 34 | Q[1,0] = np.dot(y, origin_x) 35 | Q[1,1] = np.dot(y, origin_y) 36 | Q[1,2] = np.dot(y, origin_z) 37 | Q[2,0] = np.dot(z, origin_x) 38 | Q[2,1] = np.dot(z, origin_y) 39 | Q[2,2] = np.dot(z, origin_z) 40 | Q = Q.T 41 | return np.dot(Q, current_ori) 42 | 43 | def compute_orthonormal_vector(desired_ori): 44 | # compute two orthonormal vectors wrt desired_ori(new x axis) as the new y, z axis 45 | # for the y axis, we will have three constraints: 1).x1v1s + x2v2 + x3v3=0, 2).v1^2+v2^2+v^3=1, 3). v1=1*v2(manually specify) 46 | # the z axis is uniquely defined by the x and y axis 47 | x1 = desired_ori[0] 48 | x2 = desired_ori[1] 49 | x3 = desired_ori[2] 50 | v1 = v2 = np.sqrt(1 / (2 + (x1 + x2) ** 2 / x3 ** 2)) 51 | v3 = -(x1+x2)/x3*v2 52 | y = np.array([v1, v2, v3]) 53 | z1 = x2*v3 - x3*v2 54 | z2 = x3*v1 - x1*v3 55 | z3 = x1*v2 - x2*v1 56 | z = np.array([z1, z2, z3]) / (z1**2+z2**2+z3**2) 57 | return y, z 58 | 59 | -------------------------------------------------------------------------------- /Tool/utils/dmp_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | import numpy as np 4 | 5 | 6 | class DMPIntegrator(): 7 | def __init__(self, rbf='gaussian', only_g=False, az=False): 8 | a = 1 9 | self.rbf = rbf 10 | self.only_g = only_g 11 | self.az = az 12 | 13 | def forward(self, inputs, parameters, param_gradients, scaling, y0, dy0, goal=None, w=None, vel=False): 14 | # param_gradient is not explicitly unused. 15 | # goal is None 16 | dim = int(parameters[0].item()) # 2 17 | k = dim 18 | N = int(parameters[1].item()) 19 | division = k*(N + 2) 20 | inputs_np = inputs # NN output: Size(64) 21 | if goal is not None: 22 | goal = goal 23 | w = w 24 | else: 25 | w = inputs_np[:, dim:dim*(N + 1)] # dim:dim*(N + 1) = 2:62 / w.shape = (batch_size, 60) 26 | goal = inputs_np[:, :dim] 27 | 28 | if self.az: #False 29 | alpha_z = inputs[:, -1] 30 | t = y0.shape[0] // inputs.shape[0] 31 | alpha_z = alpha_z.repeat(t, 1).transpose(0, 1).reshape(inputs.shape[0], -1) 32 | alpha_z = alpha_z.contiguous().view(alpha_z.shape[0] * alpha_z.shape[1], ) 33 | 34 | w = w.reshape(-1, N) # (2*batch_size, N) y0.shape -> [2*batch_size] 35 | 36 | if self.only_g: #false, if true, NN only outputs goal 37 | w = torch.zeros_like(w) 38 | if vel: # false 39 | dy0 = torch.zeros_like(y0) 40 | # dy0 = torch.zeros_like(y0) + 0.01 # set to be small values 0.01 41 | goal = goal.contiguous().view(goal.shape[0]*goal.shape[1], ) # [2*batch_size] y0.shape -> [2*batch_size] 42 | if self.az: 43 | X, dX, ddX = integrate(parameters, w, y0, dy0, goal, 1, rbf=self.rbf, az=True, alpha_z=alpha_z) 44 | else: 45 | X, dX, ddX = integrate(parameters, w, y0, dy0, goal, 1, rbf=self.rbf) # X -> whole trajectory 46 | # X and inputs.new(X) have the same values 47 | return inputs.new(X), inputs.new(dX), inputs.new(ddX) 48 | 49 | 50 | def forward_not_int(self, inputs, parameters, param_gradients, scaling, y0, dy0, goal=None, w=None, vel=False): 51 | dim = int(parameters[0].item()) 52 | k = dim 53 | N = int(parameters[1].item()) 54 | division = k*(N + 2) 55 | inputs_np = inputs 56 | if goal is not None: 57 | goal = goal 58 | w = w 59 | else: 60 | w = inputs_np[:, dim:dim*(N + 1)] 61 | goal = inputs_np[:, :dim] 62 | w = w.reshape(-1, N) 63 | if vel: 64 | dy0 = torch.zeros_like(y0) 65 | goal = goal.contiguous().view(goal.shape[0]*goal.shape[1], ) 66 | return parameters, w, y0, dy0, goal, 1 67 | 68 | def first_step(self, w, parameters, scaling, y0, dy0, l, tau=1): 69 | data = parameters 70 | y = y0 71 | self.y0 = y0 72 | z = dy0 * tau 73 | self.x = 1 74 | self.N = int(data[1].item()) 75 | self.dt = data[3].item() 76 | self.a_x = data[4].item() 77 | self.a_z = data[5].item() 78 | self.b_z = self.a_z / 4 79 | self.h = data[(6+self.N):(6+self.N*2)] 80 | self.c = data[6:(6+self.N)] 81 | self.num_steps = int(data[2].item())-1 82 | self.i = 0 83 | self.w = w.reshape(-1, self.N) 84 | self.tau = tau 85 | self.l = l 86 | 87 | def step(self, g, y, dy): 88 | g = g.reshape(-1, 1)[:, 0] 89 | z = dy*self.tau 90 | dt = self.dt 91 | for _ in range(self.l): 92 | dx = (-self.a_x * self.x) / self.tau 93 | self.x = self.x + dx * dt 94 | psi = torch.exp(-self.h * torch.pow((self.x - self.c), 2)) 95 | fx = torch.mv(self.w, psi)*self.x*(g - self.y0) / torch.sum(psi) 96 | dz = self.a_z * (self.b_z * (g - y) - z) + fx 97 | dy = z 98 | dz = dz / self.tau 99 | dy = dy / self.tau 100 | y = y + dy * dt 101 | z = z + dz * dt 102 | self.i += 1 103 | return y, dy, dz 104 | 105 | 106 | def integrate(data, w, y0, dy0, goal, tau, rbf='gaussian', az=False, alpha_z=None): 107 | y = y0 # y0:input 108 | z = dy0 * tau # tau = 1, z = dy 109 | x = 1 # x_0 110 | # data[2] -> simu horizon 111 | if w.is_cuda: 112 | Y = torch.cuda.FloatTensor(w.shape[0], int(data[2].item())).fill_(0) 113 | dY = torch.cuda.FloatTensor(w.shape[0], int(data[2].item())).fill_(0) 114 | ddY = torch.cuda.FloatTensor(w.shape[0], int(data[2].item())).fill_(0) 115 | else: 116 | Y = torch.zeros((w.shape[0],int(data[2].item()))) 117 | dY = torch.zeros((w.shape[0],int(data[2].item()))) 118 | ddY = torch.zeros((w.shape[0],int(data[2].item()))) 119 | Y[:, 0] = y 120 | dY[:, 0] = dy0 121 | ddY[:, 0] = z 122 | N = int(data[1].item()) 123 | dt = data[3].item() 124 | a_x = data[4].item() # a_x in paper (eqn. (2)) 125 | a_z = data[5].item() # \alpha in paper (eqn. (1)) 126 | if az: 127 | a_z = alpha_z 128 | a_z = torch.clamp(a_z, 0.5, 30) 129 | b_z = a_z / 4 # \beta in paper (eqn. (1)) 130 | h = data[(6+N):(6+N*2)] # h size N(30) 131 | c = data[6:(6+N)] # c size N(30) 132 | # the first 6 params are for other functions 133 | for i in range(0, int(data[2].item())-1): # data[2] -> simu horizon 134 | dx = (-a_x * x) / tau 135 | x = x + dx * dt 136 | eps = torch.pow((x - c), 2) 137 | # rbf are for basis functions (radial basis functions) 138 | if rbf == 'gaussian': 139 | psi = torch.exp(-h * eps) # (eqn. (3)) 140 | if rbf == 'multiquadric': 141 | psi = torch.sqrt(1 + h * eps) 142 | if rbf == 'inverse_quadric': 143 | psi = 1/(1 + h*eps) 144 | if rbf == 'inverse_multiquadric': 145 | psi = 1/torch.sqrt(1 + h * eps) 146 | if rbf == 'linear': 147 | psi = h * eps 148 | # print(w.shape) torch.Size([200, 30]) 149 | # print(psi.shape) torch.Size([30]) 150 | # print(x) int 151 | # print(goal.shape) torch.Size([200]) 152 | # print(y0.shape) torch.Size([200]) 153 | fx = torch.mv(w, psi)*x*(goal-y0) / torch.sum(psi) # mv - matrix-vector product 154 | dz = a_z * (b_z * (goal - y) - z) + fx 155 | dy = z 156 | dz = dz / tau # tau = 1 157 | dy = dy / tau 158 | y = y + dy * dt 159 | z = z + dz * dt 160 | Y[:, i+1] = y 161 | dY[:, i+1] = dy 162 | ddY[:, i+1] = dz 163 | return Y, dY, ddY 164 | 165 | 166 | class DMPParameters(): 167 | def __init__(self, N, tau, dt, Dof, scale, T, a_z=25): 168 | # N = number of radial basis functions 169 | # tau = 1 170 | # dt = time duration for each simulation time 171 | # Dof = num of states 172 | 173 | self.a_z = a_z 174 | self.a_x = 1 175 | self.N = N 176 | c = np.exp(-self.a_x * np.linspace(0, 1, self.N)) # a_x(data[4]) in paper (eqn. (2)) / c is horizontal shifts of each basis function 177 | sigma2 = np.ones(self.N) * self.N**1.5 / c / self.a_x # h is the width of each basis function 178 | self.c = torch.from_numpy(c).float() 179 | self.sigma2 = torch.from_numpy(sigma2).float() 180 | self.tau = tau 181 | self.dt = dt 182 | self.time_steps = T 183 | self.y0 = [0] 184 | self.dy0 = np.zeros(Dof) 185 | self.Dof = Dof 186 | self.Y = torch.zeros((self.time_steps)) 187 | grad = torch.zeros((self.time_steps, self.N + 2)) 188 | 189 | self.data = {'time_steps':self.time_steps,'c':self.c,'sigma2':self.sigma2,'a_z':self.a_z,'a_x':self.a_x,'dt':self.dt,'Y':self.Y} 190 | dmp_data = torch.tensor([self.Dof,self.N,self.time_steps,self.dt,self.a_x,self.a_z]) 191 | data_tensor = torch.cat((dmp_data,self.c,self.sigma2),0) 192 | # len(data_tensor) = 6 + self.N + self.N 193 | 194 | data_tensor.dy0 = self.dy0 195 | data_tensor.tau = self.tau 196 | 197 | 198 | for i in range(0, self.N): 199 | weights = torch.zeros((1,self.N)) 200 | weights[0,i] = 1 201 | grad[:, i + 2 ], _, _= integrate(data_tensor, weights, 0, 0, 0, self.tau) 202 | weights = torch.zeros((1,self.N)) 203 | grad[:, 0], _, _ = integrate(data_tensor, weights, 1, 0, 0, self.tau) 204 | weights = torch.zeros((1,self.N)) 205 | grad[:, 1], _, _ = integrate(data_tensor, weights, 0, 0, 1, self.tau) 206 | 207 | ''' 208 | self.c = self.c.cuda() 209 | self.sigma2 = self.sigma2.cuda() 210 | self.grad = grad.cuda() 211 | self.point_grads = torch.zeros(54).cuda() 212 | ''' 213 | self.data_tensor = data_tensor 214 | self.grad_tensor = grad 215 | 216 | self.point_grads = torch.zeros(self.N*2 + 4) 217 | self.X = np.zeros((self.time_steps, self.Dof)) 218 | -------------------------------------------------------------------------------- /Tool/utils/fc_network.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | 6 | class FCNetwork(nn.Module): 7 | def __init__(self, model_size = None, 8 | nonlinearity='tanh', # either 'tanh' or 'relu' 9 | seed = 100 10 | ): 11 | super(FCNetwork, self).__init__() 12 | 13 | self.obs_dim = model_size[0] 14 | self.act_dim = model_size[-1] 15 | self.layer_sizes = model_size 16 | 17 | # hidden layers 18 | torch.manual_seed(seed) 19 | self.fc_layers = nn.ModuleList([nn.Linear(self.layer_sizes[i], self.layer_sizes[i+1]) \ 20 | for i in range(len(self.layer_sizes) -1)]) # stack severeal layers together. 21 | # The weights are initialzied in default by: 22 | # stdv = 1. / math.sqrt(self.weight.size(1)) 23 | # self.weight.data.uniform_(-stdv, stdv) 24 | # if self.bias is not None: 25 | # self.bias.data.uniform_(-stdv, stdv) 26 | self.nonlinearity = torch.relu if nonlinearity == 'relu' else torch.tanh 27 | 28 | def forward(self, x): 29 | if x.is_cuda: 30 | out = x.to('cpu') 31 | else: 32 | out = x 33 | out = out.to(torch.float32) 34 | for i in range(len(self.fc_layers)-1): 35 | out = self.fc_layers[i](out) 36 | out = self.nonlinearity(out) 37 | out = self.fc_layers[-1](out) 38 | return out 39 | -------------------------------------------------------------------------------- /Tool/utils/gaussian_rnn.py: -------------------------------------------------------------------------------- 1 | from utils.rnn_network import RNNNetwork 2 | import numpy as np 3 | import torch 4 | from torch.autograd import Variable 5 | # import kerasncp as kncp 6 | # from kerasncp.torch import LTCCell 7 | 8 | class RNN: 9 | def __init__(self, 10 | m, 11 | n, 12 | input_sizes=64, 13 | hidden_state = 64, 14 | LSTM_layer = 1, 15 | seed=None): 16 | """ 17 | :param m: NN_input size 18 | :param n: NN_output size 19 | :param hidden_sizes: network hidden layer sizes (currently 2 layers only) 20 | :param seed: random seed 21 | """ 22 | # Set seed 23 | # ------------------------ 24 | if seed is not None: 25 | torch.manual_seed(seed) 26 | np.random.seed(seed) 27 | 28 | # Policy network 29 | # ------------------------ 30 | # If batch_first is True, then the input and output tensors are provided as (batch, seq, feature) instead of (seq, batch, feature). 31 | rnn_cell = torch.nn.LSTM(input_sizes, hidden_state, batch_first=True, num_layers = LSTM_layer) 32 | # self.model = NCPNetwork(ltc_cells, env) 33 | self.model = RNNNetwork(rnn_cell, m, n) 34 | # make weights smaller 35 | # for param in list(self.model.parameters())[-2:]: # only last layer 36 | # param.data = 1e1 * param.data 37 | self.trainable_params = list(self.model.parameters()) 38 | 39 | # Placeholders 40 | # ------------------------ 41 | self.obs_var = Variable(torch.randn(m), requires_grad=False) 42 | self.hidden_state_var = Variable(torch.randn( # generate an variable to store values 43 | (LSTM_layer, 1, self.model.rnn_cell.hidden_size)), requires_grad=False) 44 | self.cell_state_var = Variable(torch.randn( 45 | (LSTM_layer, 1, self.model.rnn_cell.hidden_size)), requires_grad=False) 46 | 47 | # get_action is used for executions 48 | def get_action(self, observation, hidden_state): 49 | o = np.float32(observation.reshape(1, 1, -1)) 50 | self.obs_var.data = torch.from_numpy(o) 51 | # print(hidden_state[0].shape) 52 | self.hidden_state_var.data = torch.from_numpy(np.float32(hidden_state[0])) 53 | self.cell_state_var.data = torch.from_numpy(np.float32(hidden_state[1])) 54 | # print(self.hidden_state_var.shape) 55 | mean, hidden_state = self.model.predict(self.obs_var, (self.hidden_state_var, self.cell_state_var)) 56 | mean = mean.data.numpy().ravel() 57 | new_hidden_state = hidden_state[0].data.numpy() 58 | new_cell_state = hidden_state[1].data.numpy() 59 | hidden_state = (new_hidden_state, new_cell_state) # these are needed for the next time step. 60 | #Since the input is the obs at each time step, we have to manually pass the hidden state and the cell state 61 | return mean, hidden_state 62 | -------------------------------------------------------------------------------- /Tool/utils/gnn_networks.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | class RelationalModel(nn.Module): 6 | def __init__(self, input_size, model_size, nonlinearity='tanh', seed = 100): 7 | super(RelationalModel, self).__init__() 8 | 9 | self.layer_sizes = model_size 10 | self.layer_sizes.insert(0, input_size) 11 | torch.manual_seed(seed) 12 | 13 | self.fc_layers = nn.ModuleList([nn.Linear(self.layer_sizes[i], self.layer_sizes[i+1]) \ 14 | for i in range(len(self.layer_sizes) -1)]) # stack severeal layers together. 15 | self.nonlinearity = torch.relu if nonlinearity == 'relu' else torch.tanh 16 | 17 | def forward(self, x): 18 | ''' 19 | Args: 20 | x: [batch_size, n_relations, input_size] 21 | Returns: 22 | [batch_size, n_relations, output_size] 23 | ''' 24 | if x.is_cuda: 25 | out = x.to('cpu') 26 | else: 27 | out = x 28 | for i in range(len(self.fc_layers)): 29 | out = self.fc_layers[i](out) 30 | out = self.nonlinearity(out) 31 | return out 32 | 33 | class ObjectModel(nn.Module): 34 | def __init__(self, input_size, model_size, nonlinearity='tanh', seed = 100): 35 | super(ObjectModel, self).__init__() 36 | 37 | self.layer_sizes = model_size 38 | self.layer_sizes.insert(0, input_size) 39 | torch.manual_seed(seed) 40 | 41 | self.fc_layers = nn.ModuleList([nn.Linear(self.layer_sizes[i], self.layer_sizes[i+1]) \ 42 | for i in range(len(self.layer_sizes) -1)]) # stack severeal layers together. 43 | self.nonlinearity = torch.relu if nonlinearity == 'relu' else torch.tanh 44 | 45 | def forward(self, x): 46 | ''' 47 | Args: 48 | x: [batch_size, n_objects, input_size] 49 | Returns: 50 | [batch_size * n_objects, 2] speedX and speedY 51 | ''' 52 | if x.is_cuda: 53 | out = x.to('cpu') 54 | else: 55 | out = x 56 | for i in range(len(self.fc_layers) - 1): 57 | out = self.fc_layers[i](out) 58 | out = self.nonlinearity(out) 59 | out = self.fc_layers[-1](out) 60 | return out 61 | 62 | class InteractionNetwork(nn.Module): 63 | def __init__(self, object_dim, relation_dim, relation_model, object_model, nonlinearity, seed): 64 | super(InteractionNetwork, self).__init__() 65 | self.relational_model = RelationalModel(2 * object_dim + relation_dim, relation_model, nonlinearity, seed) 66 | # object_model defines the output dimension of the encoder(dim of lifted space)/decoder(dim of original space) 67 | self.object_model = ObjectModel(object_dim + relation_model[-1], object_model, nonlinearity, seed) 68 | 69 | def forward(self, objects, sender_relations, receiver_relations, relation_info, flag): 70 | # in our case, the variable objects consists of palm, each finger and the manipulated object 71 | if not flag: 72 | z_t = objects[:,0,:] 73 | z_t_1 = objects[:,1,:] 74 | senders_t = sender_relations.permute(0, 2, 1).bmm(z_t) 75 | senders_t_1 = sender_relations.permute(0, 2, 1).bmm(z_t_1) 76 | receivers_t = receiver_relations.permute(0, 2, 1).bmm(z_t) 77 | receivers_t_1 = receiver_relations.permute(0, 2, 1).bmm(z_t_1) 78 | effects_t = self.relational_model(torch.cat([senders_t, receivers_t, relation_info], 2)) # cat: 2 -> Concatenate along the second axis 79 | effects_t_1 = self.relational_model(torch.cat([senders_t_1, receivers_t_1, relation_info], 2)) # cat: 2 -> Concatenate along the second axis 80 | effect_receivers_t = receiver_relations.bmm(effects_t) 81 | effect_receivers_t_1 = receiver_relations.bmm(effects_t_1) 82 | predicted_t = self.object_model(torch.cat([z_t, effect_receivers_t], 2))[:,None,:] 83 | predicted_t_1 = self.object_model(torch.cat([z_t_1, effect_receivers_t_1], 2))[:,None,:] 84 | return torch.cat([predicted_t, predicted_t_1], 1) 85 | else: 86 | z_t = objects 87 | senders_t = sender_relations.permute(0, 2, 1).bmm(z_t) 88 | receivers_t = receiver_relations.permute(0, 2, 1).bmm(z_t) 89 | effects_t = self.relational_model(torch.cat([senders_t, receivers_t, relation_info], 2)) # cat: 2 -> Concatenate along the second axis 90 | effect_receivers_t = receiver_relations.bmm(effects_t) 91 | predicted_t = self.object_model(torch.cat([z_t, effect_receivers_t], 2)) 92 | return predicted_t 93 | 94 | -------------------------------------------------------------------------------- /Tool/utils/gym_env.py: -------------------------------------------------------------------------------- 1 | """ 2 | Wrapper around a gym env that provides convenience functions 3 | """ 4 | 5 | import gym 6 | import numpy as np 7 | from utils.rnn_network import RNNNetwork 8 | import pickle 9 | from tqdm import tqdm 10 | import copy 11 | from utils.coord_trans import ori_transform, ori_transform_inverse 12 | from utils.quatmath import euler2quat 13 | 14 | class EnvSpec(object): 15 | def __init__(self, obs_dim, act_dim, horizon): 16 | self.observation_dim = obs_dim 17 | self.action_dim = act_dim 18 | self.horizon = horizon 19 | 20 | 21 | class GymEnv(object): 22 | def __init__(self, env, env_kwargs=None, 23 | obs_mask=None, act_repeat=1, 24 | *args, **kwargs): 25 | 26 | # get the correct env behavior 27 | if type(env) == str: 28 | env = gym.make(env) # generare the mojuco env 29 | elif isinstance(env, gym.Env): 30 | env = env 31 | elif callable(env): 32 | env = env(**env_kwargs) 33 | else: 34 | print("Unsupported environment format") 35 | raise AttributeError 36 | 37 | self.env = env 38 | self.env_id = env.spec.id 39 | self.act_repeat = act_repeat 40 | try: 41 | self._horizon = env.spec.max_episode_steps # max_episode_steps is defnied in the __init__.py file (under ) 42 | except AttributeError: 43 | self._horizon = env.spec._horizon 44 | assert self._horizon % act_repeat == 0 45 | self._horizon = self._horizon // self.act_repeat 46 | 47 | try: 48 | self._action_dim = self.env.env.action_dim 49 | except AttributeError: 50 | self._action_dim = self.env.action_space.shape[0] 51 | 52 | try: 53 | self._observation_dim = self.env.env.obs_dim 54 | except AttributeError: 55 | self._observation_dim = self.env.observation_space.shape[0] 56 | 57 | # Specs 58 | self.spec = EnvSpec(self._observation_dim, self._action_dim, self._horizon) 59 | 60 | # obs mask 61 | self.obs_mask = np.ones(self._observation_dim) if obs_mask is None else obs_mask 62 | 63 | @property 64 | def action_dim(self): 65 | return self._action_dim 66 | 67 | @property 68 | def observation_dim(self): 69 | return self._observation_dim 70 | 71 | @property 72 | def observation_space(self): 73 | return self.env.observation_space 74 | 75 | @property 76 | def action_space(self): # each env has defined a action space 77 | return self.env.action_space 78 | 79 | @property 80 | def horizon(self): 81 | return self._horizon 82 | 83 | def reset(self, seed=None): 84 | try: 85 | self.env._elapsed_steps = 0 86 | return self.env.env.reset_model(seed=seed) 87 | except: 88 | if seed is not None: 89 | self.set_seed(seed) 90 | return self.env.reset() 91 | 92 | def reset4Koopman(self, seed=None, ori=None, init_pos=None, init_vel=None): 93 | try: 94 | self.env._elapsed_steps = 0 95 | return self.env.env.reset_model4Koopman(seed=seed, ori = ori, init_pos = init_pos, init_vel = init_vel) 96 | except: 97 | if seed is not None: 98 | self.set_seed(seed) 99 | return self.env.reset_model4Koopman(ori = ori, init_pos = init_pos, init_vel = init_vel) 100 | 101 | def KoopmanVisualize(self, seed=None, state_dict=None): 102 | try: 103 | self.env._elapsed_steps = 0 104 | return self.env.env.KoopmanVisualize(seed=seed, state_dict=state_dict) 105 | except: 106 | if seed is not None: 107 | self.set_seed(seed) 108 | return self.env.KoopmanVisualize(state_dict=state_dict) 109 | 110 | def reset_model(self, seed=None): 111 | # overloading for legacy code 112 | return self.reset(seed) 113 | 114 | def step(self, action): 115 | action = action.clip(self.action_space.low, self.action_space.high) 116 | # type(action_space) -> 117 | # self.action_space.low -> numpy.ndarray(lowest boundary) 118 | # self.action_space.high -> numpy.ndarray(highest boundary) 119 | if self.act_repeat == 1: 120 | obs, cum_reward, done, ifo = self.env.step(action) # the system dynamics is defined in each separate env python file 121 | # if(ifo['goal_achieved']): 122 | # print("done: ", ifo) 123 | # Run one timestep of the environment’s dynamics. 124 | else: 125 | cum_reward = 0.0 126 | for i in range(self.act_repeat): 127 | obs, reward, done, ifo = self.env.step(action) # the actual operations can be found in the env files 128 | # seems done is always set to be False 129 | cum_reward += reward 130 | if done: break 131 | return self.obs_mask * obs, cum_reward, done, ifo 132 | 133 | def render(self): 134 | try: 135 | self.env.env.mujoco_render_frames = True 136 | self.env.env.mj_render() 137 | except: 138 | self.env.render() 139 | 140 | def set_seed(self, seed=123): 141 | try: 142 | self.env.seed(seed) 143 | except AttributeError: 144 | self.env._seed(seed) 145 | 146 | def get_obs(self): 147 | try: 148 | return self.obs_mask * self.env.env.get_obs() 149 | except: 150 | return self.obs_mask * self.env.env._get_obs() 151 | 152 | def get_env_infos(self): 153 | try: 154 | return self.env.env.get_env_infos() 155 | except: 156 | return {} 157 | 158 | # =========================================== 159 | # Trajectory optimization related 160 | # Envs should support these functions in case of trajopt 161 | 162 | def get_env_state(self): 163 | try: 164 | return self.env.env.get_env_state() 165 | except: 166 | raise NotImplementedError 167 | 168 | def set_env_state(self, state_dict): 169 | try: 170 | self.env.env.set_env_state(state_dict) 171 | except: 172 | raise NotImplementedError 173 | 174 | def real_env_step(self, bool_val): 175 | try: 176 | self.env.env.real_step = bool_val 177 | except: 178 | raise NotImplementedError 179 | 180 | def visualize_policy_on_demos(self, policy, demos, Visualize, horizon=1000): 181 | print("Testing the RL agent!") 182 | self.reset() 183 | init_state_dict = dict() 184 | demo_ori_error = np.zeros([horizon - 1, len(demos)]) 185 | success_list_RL = [] 186 | num_hand = 26 187 | Computed_joint_values = np.zeros([num_hand, horizon, len(demos)]) 188 | for k in tqdm(range(len(demos))): 189 | init_state_dict['qpos'] = demos[k]['init']['qpos'] 190 | Computed_joint_values[:, 0, k] = demos[k]['init']['qpos'][:num_hand] 191 | init_state_dict['qvel'] = demos[k]['init']['qvel'] 192 | init_state_dict['board_pos'] = demos[k]['init']['board_pos'] # fixed for each piece of demo data 193 | self.set_env_state(init_state_dict) 194 | o = demos[k]['o'] 195 | o_control = o[:-6] 196 | if True: # RL agent is trained using RNN 197 | # generate the hidden states at time 0 198 | hidden_state = (np.zeros((1, 1, policy.model.rnn_cell.hidden_size)), np.zeros((1, 1, policy.model.rnn_cell.hidden_size))) 199 | for t in range(horizon - 1): # this loop is for system evolution, open loop control, no feedback 200 | if True: 201 | a = policy.get_action(o_control, hidden_state) 202 | hidden_state = a[1]['hidden_state'] 203 | else: 204 | a = policy.get_action(o) 205 | a =a[1]['evaluation'] 206 | o, *_ = self.step(a) 207 | o_control = o[:-6] 208 | Computed_joint_values[:, t + 1, k] = a.copy() 209 | if Visualize: 210 | self.render() 211 | # calculated state values using Koopman rollouts (in simulator) 212 | obj_obs = self.get_obs() 213 | # compute the errors 214 | demo_ori_error[t, k] = np.mean(np.abs(obj_obs[49:52])) 215 | current_nail_pos = obj_obs[42:45] 216 | target_nail_pos = obj_obs[46:49] 217 | dist = np.linalg.norm(current_nail_pos - target_nail_pos) 218 | if dist < 0.01: 219 | success_list_RL.append(1) 220 | print("Success rate (RL) = %f" % (len(success_list_RL) / len(demos))) 221 | return demo_ori_error, Computed_joint_values 222 | 223 | def generate_unseen_data(self, number_sample): 224 | samples = [] 225 | for ep in range(number_sample): 226 | o, desired_orien = self.reset(seed = None) 227 | episode_data = {} 228 | episode_data['init_state_dict'] = copy.deepcopy(self.get_env_state()) 229 | episode_data['pen_desired_orien'] = desired_orien # record the goal orientation angle 230 | episode_data['o'] = o 231 | handpos = o[:24] 232 | episode_data['handpos'] = handpos 233 | hand_vel = self.env.get_hand_vel() 234 | episode_data['handvel'] = hand_vel 235 | objpos = o[24:27] 236 | episode_data['objpos'] = objpos 237 | objvel = o[27:33] 238 | episode_data['objvel'] = objvel 239 | episode_data['desired_ori'] = o[36:39] 240 | objorient = o[33:36] 241 | episode_data['objorient'] = ori_transform(objorient, episode_data['desired_ori']) 242 | samples.append(episode_data) 243 | return samples 244 | 245 | def generate_unseen_data_relocate(self, number_sample): 246 | samples = [] 247 | for ep in range(number_sample): 248 | o, desired_pos = self.reset(seed = None) 249 | o_visual = self.env.get_full_obs_visualization() 250 | episode_data = {} 251 | episode_data['init'] = copy.deepcopy(self.get_env_state()) 252 | episode_data['desired_pos'] = desired_pos 253 | episode_data['o'] = o 254 | handpos = o[:30] 255 | episode_data['handpos'] = handpos 256 | hand_vel = self.env.get_hand_vel()[:30] 257 | episode_data['handvel'] = hand_vel 258 | objpos = o[39:42] 259 | episode_data['objpos'] = objpos - episode_data['desired_pos'] 260 | episode_data['objorient'] = o_visual[33:36] 261 | episode_data['objvel'] = self.env.get_hand_vel()[30:] 262 | samples.append(episode_data) 263 | return samples 264 | 265 | def generate_unseen_data_hammer(self, number_sample): 266 | samples = [] 267 | for ep in range(number_sample): 268 | o, height = self.reset(seed = ep) 269 | hand_vel = self.env.get_hand_vel() 270 | episode_data = {} 271 | episode_data['init'] = copy.deepcopy(self.get_env_state()) 272 | episode_data['o'] = o 273 | handpos = o[:26] 274 | episode_data['handpos'] = handpos 275 | episode_data['handvel'] = hand_vel[:26] 276 | objpos = o[49:52] + o[42:45] 277 | episode_data['objpos'] = objpos # current tool position 278 | episode_data['objorient'] = o[39:42] 279 | episode_data['objvel'] = o[27:33] 280 | episode_data['nail_goal'] = o[46:49] 281 | samples.append(episode_data) 282 | return samples 283 | -------------------------------------------------------------------------------- /Tool/utils/quatmath.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | # For testing whether a number is close to zero 3 | _FLOAT_EPS = np.finfo(np.float64).eps 4 | _EPS4 = _FLOAT_EPS * 4.0 5 | 6 | 7 | def mulQuat(qa, qb): 8 | res = np.zeros(4) 9 | res[0] = qa[0]*qb[0] - qa[1]*qb[1] - qa[2]*qb[2] - qa[3]*qb[3] 10 | res[1] = qa[0]*qb[1] + qa[1]*qb[0] + qa[2]*qb[3] - qa[3]*qb[2] 11 | res[2] = qa[0]*qb[2] - qa[1]*qb[3] + qa[2]*qb[0] + qa[3]*qb[1] 12 | res[3] = qa[0]*qb[3] + qa[1]*qb[2] - qa[2]*qb[1] + qa[3]*qb[0] 13 | return res 14 | 15 | def negQuat(quat): 16 | return np.array([quat[0], -quat[1], -quat[2], -quat[3]]) 17 | 18 | def quat2Vel(quat, dt=1): 19 | axis = quat[1:].copy() 20 | sin_a_2 = np.sqrt(np.sum(axis**2)) 21 | axis = axis/(sin_a_2+1e-8) 22 | speed = 2*np.arctan2(sin_a_2, quat[0])/dt 23 | return speed, axis 24 | 25 | def quatDiff2Vel(quat1, quat2, dt): 26 | neg = negQuat(quat1) 27 | diff = mulQuat(quat2, neg) 28 | return quat2Vel(diff, dt) 29 | 30 | 31 | def axis_angle2quat(axis, angle): 32 | c = np.cos(angle/2) 33 | s = np.sin(angle/2) 34 | return np.array([c, s*axis[0], s*axis[1], s*axis[2]]) 35 | 36 | def euler2mat(euler): 37 | """ Convert Euler Angles to Rotation Matrix. See rotation.py for notes """ 38 | euler = np.asarray(euler, dtype=np.float64) 39 | assert euler.shape[-1] == 3, "Invalid shaped euler {}".format(euler) 40 | 41 | ai, aj, ak = -euler[..., 2], -euler[..., 1], -euler[..., 0] 42 | si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak) 43 | ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak) 44 | cc, cs = ci * ck, ci * sk 45 | sc, ss = si * ck, si * sk 46 | 47 | mat = np.empty(euler.shape[:-1] + (3, 3), dtype=np.float64) 48 | mat[..., 2, 2] = cj * ck 49 | mat[..., 2, 1] = sj * sc - cs 50 | mat[..., 2, 0] = sj * cc + ss 51 | mat[..., 1, 2] = cj * sk 52 | mat[..., 1, 1] = sj * ss + cc 53 | mat[..., 1, 0] = sj * cs - sc 54 | mat[..., 0, 2] = -sj 55 | mat[..., 0, 1] = cj * si 56 | mat[..., 0, 0] = cj * ci 57 | return mat 58 | 59 | 60 | def euler2quat(euler): 61 | """ Convert Euler Angles to Quaternions. See rotation.py for notes """ 62 | euler = np.asarray(euler, dtype=np.float64) 63 | assert euler.shape[-1] == 3, "Invalid shape euler {}".format(euler) 64 | 65 | ai, aj, ak = euler[..., 2] / 2, -euler[..., 1] / 2, euler[..., 0] / 2 66 | si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak) 67 | ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak) 68 | cc, cs = ci * ck, ci * sk 69 | sc, ss = si * ck, si * sk 70 | 71 | quat = np.empty(euler.shape[:-1] + (4,), dtype=np.float64) 72 | quat[..., 0] = cj * cc + sj * ss 73 | quat[..., 3] = cj * sc - sj * cs 74 | quat[..., 2] = -(cj * ss + sj * cc) 75 | quat[..., 1] = cj * cs - sj * sc 76 | return quat 77 | 78 | 79 | def mat2euler(mat): 80 | """ Convert Rotation Matrix to Euler Angles. See rotation.py for notes """ 81 | mat = np.asarray(mat, dtype=np.float64) 82 | assert mat.shape[-2:] == (3, 3), "Invalid shape matrix {}".format(mat) 83 | 84 | cy = np.sqrt(mat[..., 2, 2] * mat[..., 2, 2] + mat[..., 1, 2] * mat[..., 1, 2]) 85 | condition = cy > _EPS4 86 | euler = np.empty(mat.shape[:-1], dtype=np.float64) 87 | euler[..., 2] = np.where(condition, 88 | -np.arctan2(mat[..., 0, 1], mat[..., 0, 0]), 89 | -np.arctan2(-mat[..., 1, 0], mat[..., 1, 1])) 90 | euler[..., 1] = np.where(condition, 91 | -np.arctan2(-mat[..., 0, 2], cy), 92 | -np.arctan2(-mat[..., 0, 2], cy)) 93 | euler[..., 0] = np.where(condition, 94 | -np.arctan2(mat[..., 1, 2], mat[..., 2, 2]), 95 | 0.0) 96 | return euler 97 | 98 | 99 | def mat2quat(mat): 100 | """ Convert Rotation Matrix to Quaternion. See rotation.py for notes """ 101 | mat = np.asarray(mat, dtype=np.float64) 102 | assert mat.shape[-2:] == (3, 3), "Invalid shape matrix {}".format(mat) 103 | 104 | Qxx, Qyx, Qzx = mat[..., 0, 0], mat[..., 0, 1], mat[..., 0, 2] 105 | Qxy, Qyy, Qzy = mat[..., 1, 0], mat[..., 1, 1], mat[..., 1, 2] 106 | Qxz, Qyz, Qzz = mat[..., 2, 0], mat[..., 2, 1], mat[..., 2, 2] 107 | # Fill only lower half of symmetric matrix 108 | K = np.zeros(mat.shape[:-2] + (4, 4), dtype=np.float64) 109 | K[..., 0, 0] = Qxx - Qyy - Qzz 110 | K[..., 1, 0] = Qyx + Qxy 111 | K[..., 1, 1] = Qyy - Qxx - Qzz 112 | K[..., 2, 0] = Qzx + Qxz 113 | K[..., 2, 1] = Qzy + Qyz 114 | K[..., 2, 2] = Qzz - Qxx - Qyy 115 | K[..., 3, 0] = Qyz - Qzy 116 | K[..., 3, 1] = Qzx - Qxz 117 | K[..., 3, 2] = Qxy - Qyx 118 | K[..., 3, 3] = Qxx + Qyy + Qzz 119 | K /= 3.0 120 | # TODO: vectorize this -- probably could be made faster 121 | q = np.empty(K.shape[:-2] + (4,)) 122 | it = np.nditer(q[..., 0], flags=['multi_index']) 123 | while not it.finished: 124 | # Use Hermitian eigenvectors, values for speed 125 | vals, vecs = np.linalg.eigh(K[it.multi_index]) 126 | # Select largest eigenvector, reorder to w,x,y,z quaternion 127 | q[it.multi_index] = vecs[[3, 0, 1, 2], np.argmax(vals)] 128 | # Prefer quaternion with positive w 129 | # (q * -1 corresponds to same rotation as q) 130 | if q[it.multi_index][0] < 0: 131 | q[it.multi_index] *= -1 132 | it.iternext() 133 | return q 134 | 135 | 136 | def quat2euler(quat): 137 | """ Convert Quaternion to Euler Angles. See rotation.py for notes """ 138 | return mat2euler(quat2mat(quat)) 139 | 140 | 141 | def quat2mat(quat): 142 | """ Convert Quaternion to Euler Angles. See rotation.py for notes """ 143 | quat = np.asarray(quat, dtype=np.float64) 144 | assert quat.shape[-1] == 4, "Invalid shape quat {}".format(quat) 145 | 146 | w, x, y, z = quat[..., 0], quat[..., 1], quat[..., 2], quat[..., 3] 147 | Nq = np.sum(quat * quat, axis=-1) 148 | s = 2.0 / Nq 149 | X, Y, Z = x * s, y * s, z * s 150 | wX, wY, wZ = w * X, w * Y, w * Z 151 | xX, xY, xZ = x * X, x * Y, x * Z 152 | yY, yZ, zZ = y * Y, y * Z, z * Z 153 | 154 | mat = np.empty(quat.shape[:-1] + (3, 3), dtype=np.float64) 155 | mat[..., 0, 0] = 1.0 - (yY + zZ) 156 | mat[..., 0, 1] = xY - wZ 157 | mat[..., 0, 2] = xZ + wY 158 | mat[..., 1, 0] = xY + wZ 159 | mat[..., 1, 1] = 1.0 - (xX + zZ) 160 | mat[..., 1, 2] = yZ - wX 161 | mat[..., 2, 0] = xZ - wY 162 | mat[..., 2, 1] = yZ + wX 163 | mat[..., 2, 2] = 1.0 - (xX + yY) 164 | return np.where((Nq > _FLOAT_EPS)[..., np.newaxis, np.newaxis], mat, np.eye(3)) -------------------------------------------------------------------------------- /Tool/utils/rnn_network.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from torch.autograd import Variable 5 | 6 | class RNNNetwork(nn.Module): 7 | def __init__(self, rnn_cell, 8 | m, 9 | n): 10 | """ 11 | :param rnn_cell: RNN (NCP) module 12 | """ 13 | # nn.Module that unfolds a RNN cell into a sequence 14 | super(RNNNetwork, self).__init__() 15 | self.rnn_cell = rnn_cell 16 | self.obs_dim = m 17 | self.act_dim = n 18 | # follow by a fc_layer 19 | self.fc_input_layer = nn.Linear(in_features=self.obs_dim, out_features=rnn_cell.input_size) # map observation_dim to RNN_input dim 20 | self.fc_output_layer = nn.Linear(in_features=rnn_cell.hidden_size, out_features=self.act_dim) 21 | 22 | def forward(self, x): # this is used for training 23 | device = x.device 24 | batch_size = x.size(0) 25 | # hidden states were set to be zeros at time 0 26 | hidden_state = ( # h_{0} and c_{0} 27 | torch.zeros((self.rnn_cell.num_layers, batch_size, self.rnn_cell.hidden_size), device=device), 28 | torch.zeros((self.rnn_cell.num_layers, batch_size, self.rnn_cell.hidden_size), device=device) 29 | ) 30 | x = self.fc_input_layer(x.float()) # (batch_size, seq_len, self.rnn_cell.input_size) 31 | outputs, hidden_state = self.rnn_cell(x, hidden_state) 32 | # output.shape = (batch_size, seq_len, self.rnn_cell.hidden_size) 33 | # output -> hidden_state at each time step (the top layer if multiple layers), and we use this as the output data 34 | # hidden_state = (hidden_state[0] = h_{T} and hidden_state[1] = c_{T}) at last time step T, if there are multiple layers, output are the h_{t} for the top layers 35 | outputs = self.fc_output_layer(outputs) #(batch_size, seq_len, act_dim) 36 | return outputs 37 | 38 | def predict(self, observation, hidden_state): 39 | # for execution, so batch_size is always 1. Not for training 40 | observation = self.fc_input_layer(observation) 41 | output, hidden_state = self.rnn_cell(observation.view(1, 1, -1), hidden_state) 42 | output = self.fc_output_layer(output) 43 | return output, hidden_state -------------------------------------------------------------------------------- /geometric_fabrics_torch-0.0.0-py2.py3-none-any.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GT-STAR-Lab/KODex/b058159f59baeff37437672e49f5d73406d4a5a6/geometric_fabrics_torch-0.0.0-py2.py3-none-any.whl --------------------------------------------------------------------------------