├── .gitattributes ├── README.md ├── reinforcement learning └── TD3 │ ├── CNN3D │ ├── networkmodels.py │ └── utils_network.py │ ├── TD3_corner.py │ ├── packing_enviroment_corner_v3.py │ ├── slip_detector_both.py │ ├── train_corner.py │ └── utils.py └── supervised_learning ├── cnn_encoder_epoch20.pth ├── crnn_model.py ├── crnn_test.py ├── image_processing ├── 0.jpg ├── marker_detection.py └── marker_flow.py ├── label_converter.py ├── main.py ├── rnn_decoder_epoch20.pth └── utils.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tactile_insertion_RL 2 | 3 | -------------------------------------------------------------------------------- /reinforcement learning/TD3/CNN3D/networkmodels.py: -------------------------------------------------------------------------------- 1 | ##pytorch cnn+ lstm 2 | 3 | import argparse 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import torch.optim as optim 8 | from torchvision import datasets, transforms 9 | from torch.autograd import Variable 10 | import numpy as np 11 | from utils_network import * 12 | 13 | ########################## Model for CNN LSTM ########################################################## 14 | 15 | 16 | # 2D CNN encoder train from scratch (no transfer learning) 17 | class EncoderCNN(nn.Module): 18 | def __init__(self, 19 | img_x=84, 20 | img_y=84, 21 | fc_hidden1=512, 22 | fc_hidden2=512, 23 | drop_p=0.3, 24 | CNN_embed_dim=300): 25 | super(EncoderCNN, self).__init__() 26 | 27 | self.img_x = img_x 28 | self.img_y = img_y 29 | self.CNN_embed_dim = CNN_embed_dim 30 | 31 | # CNN architechtures 32 | self.ch1, self.ch2, self.ch3, self.ch4 = 32, 64, 128, 256 33 | self.k1, self.k2, self.k3, self.k4 = (5, 5), (3, 3), (3, 3), ( 34 | 3, 3) # 2d kernal size 35 | self.s1, self.s2, self.s3, self.s4 = (2, 2), (2, 2), (2, 2), ( 36 | 2, 2) # 2d strides 37 | self.pd1, self.pd2, self.pd3, self.pd4 = (0, 0), (0, 0), (0, 0), ( 38 | 0, 0) # 2d padding 39 | 40 | # conv2D output shapes 41 | self.conv1_outshape = conv2D_output_size((self.img_x, self.img_y), 42 | self.pd1, self.k1, 43 | self.s1) # Conv1 output shape 44 | self.conv2_outshape = conv2D_output_size(self.conv1_outshape, self.pd2, 45 | self.k2, self.s2) 46 | self.conv3_outshape = conv2D_output_size(self.conv2_outshape, self.pd3, 47 | self.k3, self.s3) 48 | self.conv4_outshape = conv2D_output_size(self.conv3_outshape, self.pd4, 49 | self.k4, self.s4) 50 | 51 | # fully connected layer hidden nodes 52 | self.fc_hidden1, self.fc_hidden2 = fc_hidden1, fc_hidden2 53 | self.drop_p = drop_p 54 | 55 | self.conv1 = nn.Sequential( 56 | nn.Conv2d(in_channels=3, 57 | out_channels=self.ch1, 58 | kernel_size=self.k1, 59 | stride=self.s1, 60 | padding=self.pd1), 61 | # nn.BatchNorm2d(self.ch1, momentum=0.01), 62 | nn.ReLU(inplace=True), 63 | # nn.MaxPool2d(kernel_size=2), 64 | ) 65 | self.conv2 = nn.Sequential( 66 | nn.Conv2d(in_channels=self.ch1, 67 | out_channels=self.ch2, 68 | kernel_size=self.k2, 69 | stride=self.s2, 70 | padding=self.pd2), 71 | # nn.BatchNorm2d(self.ch2, momentum=0.01), 72 | nn.ReLU(inplace=True), 73 | # nn.MaxPool2d(kernel_size=2), 74 | ) 75 | 76 | self.conv3 = nn.Sequential( 77 | nn.Conv2d(in_channels=self.ch2, 78 | out_channels=self.ch3, 79 | kernel_size=self.k3, 80 | stride=self.s3, 81 | padding=self.pd3), 82 | # nn.BatchNorm2d(self.ch3, momentum=0.01), 83 | nn.ReLU(inplace=True), 84 | # nn.MaxPool2d(kernel_size=2), 85 | ) 86 | 87 | self.conv4 = nn.Sequential( 88 | nn.Conv2d(in_channels=self.ch3, 89 | out_channels=self.ch4, 90 | kernel_size=self.k4, 91 | stride=self.s4, 92 | padding=self.pd4), 93 | # nn.BatchNorm2d(self.ch4, momentum=0.01), 94 | nn.ReLU(inplace=True), 95 | # nn.MaxPool2d(kernel_size=2), 96 | ) 97 | 98 | self.drop = nn.Dropout2d(self.drop_p) 99 | self.pool = nn.MaxPool2d(2) 100 | self.fc1 = nn.Linear( 101 | self.ch4 * self.conv4_outshape[0] * self.conv4_outshape[1], 102 | self.fc_hidden1) # fully connected layer, output k classes 103 | # self.fc2 = nn.Linear(self.fc_hidden1, self.fc_hidden2) 104 | self.fc2 = nn.Linear( 105 | self.fc_hidden1, 106 | self.CNN_embed_dim) # output = CNN embedding latent variables 107 | 108 | def forward(self, x_3d): 109 | cnn_embed_seq = [] 110 | for t in range(x_3d.size(1)): 111 | # CNNs 112 | x = self.conv1(x_3d[:, t, :, :, :]) 113 | x = self.conv2(x) 114 | x = self.conv3(x) 115 | x = self.conv4(x) 116 | x = x.view(x.size(0), -1) # flatten the output of conv 117 | 118 | # FC layers 119 | x = F.relu(self.fc1(x)) 120 | # x = F.dropout(x, p=self.drop_p, training=self.training) 121 | # x = F.relu(self.fc2(x)) 122 | # x = F.dropout(x, p=self.drop_p, training=self.training) 123 | x = self.fc2(x) 124 | cnn_embed_seq.append(x) 125 | 126 | # swap time and sample dim such that (sample dim, time dim, CNN latent dim) 127 | cnn_embed_seq = torch.stack(cnn_embed_seq, dim=0).transpose_(0, 1) 128 | # cnn_embed_seq: shape=(batch, time_step, input_size) 129 | 130 | return cnn_embed_seq 131 | 132 | 133 | class DecoderRNN(nn.Module): 134 | def __init__(self, 135 | CNN_embed_dim=300, 136 | h_RNN_layers=2, 137 | h_RNN=256, 138 | h_FC_dim=128, 139 | drop_p=0.3, 140 | output_dim=3): 141 | super(DecoderRNN, self).__init__() 142 | 143 | self.RNN_input_size = CNN_embed_dim 144 | self.h_RNN_layers = h_RNN_layers # RNN hidden layers 145 | self.h_RNN = h_RNN # RNN hidden nodes 146 | self.h_FC_dim = h_FC_dim 147 | self.drop_p = drop_p 148 | self.output_dim = output_dim 149 | 150 | self.LSTM = nn.LSTM( 151 | input_size=self.RNN_input_size, 152 | hidden_size=self.h_RNN, 153 | num_layers=h_RNN_layers, 154 | batch_first= 155 | True, # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size) 156 | ) 157 | 158 | self.fc1 = nn.Linear(self.h_RNN * 2, self.h_FC_dim) 159 | self.fc2 = nn.Linear(self.h_FC_dim, self.output_dim) 160 | 161 | def forward(self, x_RNN_left, x_RNN_right): 162 | 163 | self.LSTM.flatten_parameters() 164 | # RNN_out, (h_n, h_c) = self.LSTM(x_RNN, None) 165 | RNN_out_left, (h_n, h_c) = self.LSTM(x_RNN_left, None) 166 | RNN_out_right, (h_n, h_c) = self.LSTM(x_RNN_right, None) 167 | """ h_n shape (n_layers, batch, hidden_size), h_c shape (n_layers, batch, hidden_size) """ 168 | """ None represents zero initial hidden state. RNN_out has shape=(batch, time_step, output_size) """ 169 | RNN_out = torch.cat((RNN_out_left[:, -1, :], RNN_out_right[:, -1, :]), 170 | 1) 171 | # print(RNN_out_left[:, -1, :].size()) 172 | # FC layers 173 | # x = self.fc1(RNN_out[:, -1, :]) # choose RNN_out at the last time step 174 | x = self.fc1(RNN_out) 175 | x = F.relu(x) 176 | # x = F.dropout(x, p=self.drop_p, training=self.training) 177 | x = torch.tanh(self.fc2(x)) 178 | 179 | return x 180 | 181 | 182 | ###### End of CNNLSTM Network ##### 183 | 184 | ######################## ConvolutionLSTM Network Model ######################################## 185 | 186 | 187 | class ConvLSTMCell(nn.Module): 188 | def __init__(self, input_channels, hidden_channels, kernel_size): 189 | super(ConvLSTMCell, self).__init__() 190 | 191 | assert hidden_channels % 2 == 0 192 | 193 | self.input_channels = input_channels 194 | self.hidden_channels = hidden_channels 195 | self.kernel_size = kernel_size 196 | self.num_features = 4 197 | 198 | self.padding = int((kernel_size - 1) / 2) 199 | 200 | self.Wxi = nn.Conv2d(self.input_channels, 201 | self.hidden_channels, 202 | self.kernel_size, 203 | 1, 204 | self.padding, 205 | bias=True) 206 | self.Whi = nn.Conv2d(self.hidden_channels, 207 | self.hidden_channels, 208 | self.kernel_size, 209 | 1, 210 | self.padding, 211 | bias=False) 212 | self.Wxf = nn.Conv2d(self.input_channels, 213 | self.hidden_channels, 214 | self.kernel_size, 215 | 1, 216 | self.padding, 217 | bias=True) 218 | self.Whf = nn.Conv2d(self.hidden_channels, 219 | self.hidden_channels, 220 | self.kernel_size, 221 | 1, 222 | self.padding, 223 | bias=False) 224 | self.Wxc = nn.Conv2d(self.input_channels, 225 | self.hidden_channels, 226 | self.kernel_size, 227 | 1, 228 | self.padding, 229 | bias=True) 230 | self.Whc = nn.Conv2d(self.hidden_channels, 231 | self.hidden_channels, 232 | self.kernel_size, 233 | 1, 234 | self.padding, 235 | bias=False) 236 | self.Wxo = nn.Conv2d(self.input_channels, 237 | self.hidden_channels, 238 | self.kernel_size, 239 | 1, 240 | self.padding, 241 | bias=True) 242 | self.Who = nn.Conv2d(self.hidden_channels, 243 | self.hidden_channels, 244 | self.kernel_size, 245 | 1, 246 | self.padding, 247 | bias=False) 248 | 249 | self.Wci = None 250 | self.Wcf = None 251 | self.Wco = None 252 | 253 | def forward(self, x, h, c): 254 | ci = torch.sigmoid(self.Wxi(x) + self.Whi(h) + c * self.Wci) 255 | cf = torch.sigmoid(self.Wxf(x) + self.Whf(h) + c * self.Wcf) 256 | cc = cf * c + ci * torch.tanh(self.Wxc(x) + self.Whc(h)) 257 | co = torch.sigmoid(self.Wxo(x) + self.Who(h) + cc * self.Wco) 258 | ch = co * torch.tanh(cc) 259 | return ch, cc 260 | 261 | def init_hidden(self, batch_size, hidden, shape): 262 | if self.Wci is None: 263 | self.Wci = Variable(torch.zeros(1, hidden, shape[0], 264 | shape[1])).cuda() 265 | self.Wcf = Variable(torch.zeros(1, hidden, shape[0], 266 | shape[1])).cuda() 267 | self.Wco = Variable(torch.zeros(1, hidden, shape[0], 268 | shape[1])).cuda() 269 | 270 | else: 271 | 272 | assert shape[0] == self.Wci.size()[2], 'Input Height Mismatched!' 273 | assert shape[1] == self.Wci.size()[3], 'Input Width Mismatched!' 274 | 275 | return (Variable(torch.zeros(batch_size, hidden, shape[0], 276 | shape[1])).cuda(), 277 | Variable(torch.zeros(batch_size, hidden, shape[0], 278 | shape[1])).cuda()) 279 | 280 | 281 | class ConvLSTM(nn.Module): 282 | # input_channels corresponds to the first input feature map 283 | # hidden state is a list of succeeding lstm layers. 284 | def __init__(self, 285 | input_channels, 286 | hidden_channels, 287 | kernel_size, 288 | step=1, 289 | effective_step=[1]): 290 | super(ConvLSTM, self).__init__() 291 | self.input_channels = [input_channels] + hidden_channels 292 | self.hidden_channels = hidden_channels 293 | self.kernel_size = kernel_size 294 | self.num_layers = len(hidden_channels) 295 | self.step = step 296 | self.effective_step = effective_step 297 | self._all_layers = [] 298 | for i in range(self.num_layers): 299 | name = 'cell{}'.format(i) 300 | cell = ConvLSTMCell(self.input_channels[i], 301 | self.hidden_channels[i], self.kernel_size) 302 | setattr(self, name, cell) 303 | self._all_layers.append(cell) 304 | 305 | def forward(self, input): 306 | internal_state = [] 307 | outputs = [] 308 | for step in range(self.step): 309 | x = input 310 | for i in range(self.num_layers): 311 | name = 'cell{}'.format(i) 312 | if step == 0: 313 | bsize, _, height, width = x.size() 314 | (h, c) = getattr(self, name).init_hidden( 315 | batch_size=bsize, 316 | hidden=self.hidden_channels[i], 317 | shape=(height, width)) 318 | internal_state.append((h, c)) 319 | 320 | # do forward 321 | (h, c) = internal_state[i] 322 | x, new_c = getattr(self, name)(x, h, c) 323 | internal_state[i] = (x, new_c) 324 | # only record effective steps 325 | if step in self.effective_step: 326 | outputs.append(x) 327 | 328 | return outputs, (x, new_c) 329 | 330 | 331 | ######################## End of ConvolutionLSTM Network Model ######################################## 332 | 333 | ######################## CNN3D Model ################################################################ 334 | 335 | 336 | class CNN3D(nn.Module): 337 | def __init__(self, 338 | t_dim=45, 339 | img_x=128, 340 | img_y=128, 341 | drop_p=0.2, 342 | fc_hidden1=256, 343 | fc_hidden2=128, 344 | output_dim=3): 345 | super(CNN3D, self).__init__() 346 | 347 | # set video dimension 348 | self.t_dim = t_dim 349 | self.img_x = img_x 350 | self.img_y = img_y 351 | # fully connected layer hidden nodes 352 | self.fc_hidden1, self.fc_hidden2 = fc_hidden1, fc_hidden2 353 | self.drop_p = drop_p 354 | self.output_dim = output_dim 355 | self.ch1, self.ch2 = 32, 48 356 | self.k1, self.k2 = (5, 5, 5), (3, 3, 3) # 3d kernel size 357 | self.s1, self.s2 = (2, 2, 2), (2, 2, 2) # 3d strides 358 | self.pd1, self.pd2 = (0, 0, 0), (0, 0, 0) # 3d padding 359 | 360 | # compute conv1 & conv2 output shape 361 | self.conv1_outshape = conv3D_output_size( 362 | (self.t_dim, self.img_x, self.img_y), self.pd1, self.k1, self.s1) 363 | self.conv2_outshape = conv3D_output_size(self.conv1_outshape, self.pd2, 364 | self.k2, self.s2) 365 | 366 | self.conv1 = nn.Conv3d(in_channels=1, 367 | out_channels=self.ch1, 368 | kernel_size=self.k1, 369 | stride=self.s1, 370 | padding=self.pd1) 371 | self.bn1 = nn.BatchNorm3d(self.ch1) 372 | self.conv2 = nn.Conv3d(in_channels=self.ch1, 373 | out_channels=self.ch2, 374 | kernel_size=self.k2, 375 | stride=self.s2, 376 | padding=self.pd2) 377 | self.bn2 = nn.BatchNorm3d(self.ch2) 378 | self.relu = nn.ReLU(inplace=True) 379 | self.drop = nn.Dropout3d(self.drop_p) 380 | self.pool = nn.MaxPool3d(2) 381 | self.fc1 = nn.Linear(self.ch2 * self.conv2_outshape[0] * 382 | self.conv2_outshape[1] * self.conv2_outshape[2], 383 | self.fc_hidden1) # fully connected hidden layer 384 | self.fc2 = nn.Linear(self.fc_hidden1, self.fc_hidden2) 385 | self.fc3 = nn.Linear( 386 | self.fc_hidden2, 387 | self.output_dim) # fully connected layer, output = multi-classes 388 | 389 | def forward(self, x_3d): 390 | # Conv 1 391 | x = self.conv1(x_3d) 392 | x = self.bn1(x) 393 | x = self.relu(x) 394 | x = self.drop(x) 395 | # Conv 2 396 | x = self.conv2(x) 397 | x = self.bn2(x) 398 | x = self.relu(x) 399 | x = self.drop(x) 400 | # FC 1 and 2 401 | x = x.view(x.size(0), -1) 402 | x = F.relu(self.fc1(x)) 403 | x = F.relu(self.fc2(x)) 404 | x = F.dropout(x, p=self.drop_p, training=self.training) 405 | x = self.fc3(x) 406 | 407 | return x 408 | 409 | 410 | ## --------------------- end of 3D CNN module ---------------- ## 411 | -------------------------------------------------------------------------------- /reinforcement learning/TD3/CNN3D/utils_network.py: -------------------------------------------------------------------------------- 1 | # Some useful functions for computing network sizes 2 | import numpy as np 3 | 4 | def conv2D_output_size(img_size, padding, kernel_size, stride): 5 | 6 | output_shape=(np.floor((img_size[0] + 2 * padding[0] - (kernel_size[0] - 1) - 1) / stride[0] + 1).astype(int), 7 | np.floor((img_size[1] + 2 * padding[1] - (kernel_size[1] - 1) - 1) / stride[1] + 1).astype(int)) 8 | 9 | return output_shape 10 | 11 | 12 | def conv3D_output_size(img_size, padding, kernel_size, stride): 13 | 14 | output_shape=(np.floor((img_size[0] + 2 * padding[0] - (kernel_size[0] - 1) - 1) / stride[0] + 1).astype(int), 15 | np.floor((img_size[1] + 2 * padding[1] - (kernel_size[1] - 1) - 1) / stride[1] + 1).astype(int), 16 | np.floor((img_size[2] + 2 * padding[2] - (kernel_size[2] - 1) - 1) / stride[2] + 1).astype(int)) 17 | 18 | return output_shape -------------------------------------------------------------------------------- /reinforcement learning/TD3/TD3_corner.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.optim as optim 5 | import numpy as np 6 | # from i3d.pytorch_i3d import InceptionI3d 7 | # from CNN3D.cnn3d import CNN3D_actor, CNN3D_critic 8 | from CNN3D.cnn import CNN_Actor_new, CNN_Critic 9 | from CNN3D.nn import MLP_Critic_3 10 | from CNN3D.networkmodels import DecoderRNN, EncoderCNN 11 | 12 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 13 | 14 | 15 | class Actor(nn.Module): 16 | def __init__(self, state_dim, action_dim, max_action): 17 | super(Actor, self).__init__() 18 | # self.actor = InceptionI3d(400, in_channels=3) 19 | # self.actor = CNN3D_actor() 20 | self.actor_cnn = EncoderCNN(img_x=218, 21 | img_y=300, 22 | fc_hidden1=1024, 23 | fc_hidden2=768, 24 | drop_p=0, 25 | CNN_embed_dim=512) 26 | 27 | self.actor_rnn = DecoderRNN(CNN_embed_dim=512, 28 | h_RNN_layers=1, 29 | h_RNN=512, 30 | h_FC_dim=256, 31 | drop_p=0, 32 | output_dim=action_dim) 33 | 34 | # self.actor_cnn = nn.DataParallel(self.actor_cnn) 35 | # self.actor_rnn = nn.DataParallel(self.actor_rnn) 36 | # self.actor_cnn.load_state_dict( 37 | # torch.load( 38 | # 'preTrained/supervised_learned_policy/cnn_encoder_epoch21.pth') 39 | # ) 40 | # strict=False) # 41 | # self.actor_rnn.load_state_dict( 42 | # torch.load( 43 | # 'preTrained/supervised_learned_policy/rnn_decoder_epoch21.pth') 44 | # ) 45 | # strict=False) # 46 | # self.actor_cnn.eval() 47 | # self.actor_rnn.eval() 48 | # self.actor = CNN_Actor_new(num_inputs=state_dim, 49 | # num_classes=action_dim) 50 | # self.actor_cnn = nn.DataParallel(self.actor) 51 | # self.actor.load_state_dict( 52 | # torch.load( 53 | # 'preTrained/Tactile_packing_corner_4_directions_new_gelsight/best_model_color_small_decrease.pt' 54 | # )) 55 | # self.actor.freeze_cnnlayer() 56 | # self.actor.load_state_dict(torch.load('i3d/models/rgb_imagenet.pt')) 57 | # self.actor.replace_logits(action_dim, nn.Tanh()) 58 | # self.actor.load_state_dict(torch.load('i3d/weights/error_000065.pt')) 59 | self.max_action = max_action 60 | 61 | def forward(self, state): 62 | # action = self.actor(state) * self.max_action 63 | action = self.actor_rnn(self.actor_cnn(state[:, :12, :, :, :]), 64 | self.actor_cnn( 65 | state[:, 12:, :, :, :])) * self.max_action 66 | # print("action",action.size()) 67 | return action 68 | 69 | 70 | class Critic(nn.Module): 71 | def __init__(self, state_dim, action_dim): 72 | super(Critic, self).__init__() 73 | # self.critic = InceptionI3d(400, in_channels=3) 74 | # self.critic = CNN3D_critic() 75 | # self.critic = CNN_Critic() 76 | self.critic = MLP_Critic_3() 77 | # self.critic.load_state_dict(torch.load('i3d/models/rgb_imagenet.pt')) 78 | # self.critic.replace_logits(action_dim, None) 79 | # self.critic.load_state_dict(torch.load('i3d/weights/error_000065.pt')) 80 | # self.critic.replace_logits(1) 81 | # self.last_layer = nn.Linear(402, 1) 82 | 83 | def forward(self, state, action): 84 | # feature = self.critic(state) 85 | value = self.critic(state, torch.squeeze(action)) 86 | # print 'feature and action size', feature.size(), action.size() 87 | # state_action = torch.cat([feature, action.unsqueeze(2)], 1).view(-1,402) 88 | # print 'state action', state_action.size() 89 | 90 | # value = self.last_layer(state_action) 91 | # q = F.relu(self.l1(state_action)) 92 | # q = F.relu(self.l2(q)) 93 | # q = self.l3(q) 94 | return value 95 | 96 | 97 | class TD3: 98 | def __init__(self, lr, state_dim, action_dim, max_action): 99 | 100 | self.actor = Actor(state_dim, action_dim, max_action).to(device) 101 | self.actor.load_state_dict( 102 | torch.load( 103 | 'preTrained/policy_finetune_6/TD3_policy_finetune_6_0_actor.pth' 104 | # 'preTrained/tactile_packing_corner_4object_new/TD3_tactile_packing_corner_4object_new_0_actor.pth' 105 | )) 106 | self.actor_target = Actor(state_dim, action_dim, max_action).to(device) 107 | self.actor_target.load_state_dict(self.actor.state_dict()) 108 | self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=lr) 109 | 110 | self.critic_1 = Critic(state_dim, action_dim).to(device) 111 | self.critic_1.load_state_dict( 112 | torch.load( 113 | 'preTrained/policy_finetune_6/TD3_policy_finetune_6_0_crtic_1.pth' 114 | )) 115 | self.critic_1_target = Critic(state_dim, action_dim).to(device) 116 | self.critic_1_target.load_state_dict(self.critic_1.state_dict()) 117 | self.critic_1_optimizer = optim.Adam(self.critic_1.parameters(), 118 | lr=0.1) 119 | 120 | self.critic_2 = Critic(state_dim, action_dim).to(device) 121 | self.critic_2.load_state_dict( 122 | torch.load( 123 | 'preTrained/policy_finetune_6/TD3_policy_finetune_6_0_crtic_1.pth' 124 | )) 125 | self.critic_2_target = Critic(state_dim, action_dim).to(device) 126 | self.critic_2_target.load_state_dict(self.critic_2.state_dict()) 127 | self.critic_2_optimizer = optim.Adam(self.critic_2.parameters(), 128 | lr=0.1) 129 | 130 | self.max_action = max_action 131 | 132 | def select_action(self, state): 133 | # state = torch.FloatTensor(state.reshape(1, -1)).to(device) 134 | # self.actor.eval() 135 | state = state.to(device) 136 | # self.actor.eval() 137 | return self.actor(state).cpu().data.numpy().flatten() 138 | 139 | def update(self, replay_buffer, n_iter, batch_size, gamma, polyak, 140 | policy_noise, noise_clip, policy_delay, directory): 141 | 142 | if replay_buffer.size > 200: 143 | n_iter = (n_iter + 1) 144 | use_full_state = True 145 | actor_loss_list = np.load(directory + '/actor_loss.npy').tolist() 146 | critic_loss_list = np.load(directory + '/critic_loss.npy').tolist() 147 | for i in range(n_iter): 148 | # Sample a batch of transitions from replay buffer: 149 | state, action_, reward, next_state, done, state_full, next_state_full = replay_buffer.sample( 150 | batch_size) 151 | state = torch.FloatTensor(state).to(device) 152 | action = torch.FloatTensor(action_).to(device) 153 | reward = torch.FloatTensor(reward).reshape( 154 | (batch_size, 1)).to(device) 155 | next_state = torch.FloatTensor(next_state).to(device) 156 | done = torch.FloatTensor(done).reshape((batch_size, 1)).to(device) 157 | state_full = torch.FloatTensor(state_full).to(device) 158 | next_state_full = torch.FloatTensor(next_state_full).to(device) 159 | 160 | noise = torch.FloatTensor(action_).data.normal_( 161 | 0, policy_noise).to(device) 162 | noise = noise.clamp(-noise_clip, noise_clip) 163 | next_action = (self.actor_target(next_state) / self.max_action) 164 | # print(r_matrix_next.size(), next_action.size()) 165 | next_action = next_action + noise 166 | next_action = next_action.clamp(-1, 1) 167 | # next_action[2] = 0. 168 | # print 'action', action.size(), 'next action', next_action.size() 169 | # Compute target Q-value: 170 | if not use_full_state: 171 | target_Q1 = self.critic_1_target(next_state, next_action) 172 | target_Q2 = self.critic_2_target(next_state, next_action) 173 | else: 174 | target_Q1 = self.critic_1_target(next_state_full, next_action) 175 | target_Q2 = self.critic_2_target(next_state_full, next_action) 176 | 177 | target_Q = torch.min(target_Q1, target_Q2) 178 | target_Q = reward + ((1 - done) * gamma * target_Q).detach() 179 | 180 | # Optimize Critic 1: 181 | if not use_full_state: 182 | current_Q1 = self.critic_1(state, action) 183 | else: 184 | current_Q1 = self.critic_1(state_full, action) 185 | loss_Q1 = F.mse_loss(current_Q1, target_Q) 186 | self.critic_1_optimizer.zero_grad() 187 | loss_Q1.backward() 188 | self.critic_1_optimizer.step() 189 | print('critic loss 1', loss_Q1) 190 | critic_loss_list.append(loss_Q1.cpu().detach().numpy()) 191 | # Optimize Critic 2: 192 | if not use_full_state: 193 | current_Q2 = self.critic_2(state, action) 194 | else: 195 | current_Q2 = self.critic_2(state_full, action) 196 | loss_Q2 = F.mse_loss(current_Q2, target_Q) 197 | self.critic_2_optimizer.zero_grad() 198 | loss_Q2.backward() 199 | self.critic_2_optimizer.step() 200 | print('critic loss 2', loss_Q2) 201 | 202 | # Delayed policy updates: 203 | if i % policy_delay == 0: 204 | self.actor.train() 205 | # Compute actor loss: 206 | if not use_full_state: 207 | actor_loss = -self.critic_1( 208 | state, 209 | self.actor(state) / self.max_action).mean() 210 | else: 211 | action_predict = self.actor(state) / self.max_action 212 | torch.clamp(action_predict, min=-1.0, max=1.0) 213 | actor_loss = -self.critic_1(state_full, 214 | action_predict).mean() 215 | # print('actor loss', actor_loss, 'action', self.actor(state)/self.max_action, 'reward', reward, 'target_Q', target_Q) 216 | # Optimize the actor 217 | self.actor_optimizer.zero_grad() 218 | actor_loss.backward() 219 | print('actor loss', actor_loss) 220 | actor_loss_list.append(actor_loss.cpu().detach().numpy()) 221 | self.actor_optimizer.step() 222 | 223 | # Polyak averaging update: 224 | for param, target_param in zip(self.actor.parameters(), 225 | self.actor_target.parameters()): 226 | target_param.data.copy_((polyak * target_param.data) + 227 | ((1 - polyak) * param.data)) 228 | 229 | for param, target_param in zip( 230 | self.critic_1.parameters(), 231 | self.critic_1_target.parameters()): 232 | target_param.data.copy_((polyak * target_param.data) + 233 | ((1 - polyak) * param.data)) 234 | 235 | for param, target_param in zip( 236 | self.critic_2.parameters(), 237 | self.critic_2_target.parameters()): 238 | target_param.data.copy_((polyak * target_param.data) + 239 | ((1 - polyak) * param.data)) 240 | np.save(directory + '/actor_loss.npy', actor_loss_list) 241 | np.save(directory + '/critic_loss.npy', critic_loss_list) 242 | 243 | def save(self, directory, name): 244 | torch.save(self.actor.state_dict(), 245 | '%s/%s_actor.pth' % (directory, name)) 246 | torch.save(self.actor_target.state_dict(), 247 | '%s/%s_actor_target.pth' % (directory, name)) 248 | 249 | torch.save(self.critic_1.state_dict(), 250 | '%s/%s_crtic_1.pth' % (directory, name)) 251 | torch.save(self.critic_1_target.state_dict(), 252 | '%s/%s_critic_1_target.pth' % (directory, name)) 253 | 254 | torch.save(self.critic_2.state_dict(), 255 | '%s/%s_crtic_2.pth' % (directory, name)) 256 | torch.save(self.critic_2_target.state_dict(), 257 | '%s/%s_critic_2_target.pth' % (directory, name)) 258 | 259 | def load(self, directory, name): 260 | self.actor.load_state_dict( 261 | torch.load('%s/%s_actor.pth' % (directory, name), 262 | map_location=lambda storage, loc: storage)) 263 | self.actor_target.load_state_dict( 264 | torch.load('%s/%s_actor_target.pth' % (directory, name), 265 | map_location=lambda storage, loc: storage)) 266 | 267 | self.critic_1.load_state_dict( 268 | torch.load('%s/%s_crtic_1.pth' % (directory, name), 269 | map_location=lambda storage, loc: storage)) 270 | self.critic_1_target.load_state_dict( 271 | torch.load('%s/%s_critic_1_target.pth' % (directory, name), 272 | map_location=lambda storage, loc: storage)) 273 | 274 | self.critic_2.load_state_dict( 275 | torch.load('%s/%s_crtic_2.pth' % (directory, name), 276 | map_location=lambda storage, loc: storage)) 277 | self.critic_2_target.load_state_dict( 278 | torch.load('%s/%s_critic_2_target.pth' % (directory, name), 279 | map_location=lambda storage, loc: storage)) 280 | 281 | def load_actor(self, directory, name): 282 | self.actor.load_state_dict( 283 | torch.load('%s/%s_actor.pth' % (directory, name), 284 | map_location=lambda storage, loc: storage)) 285 | self.actor_target.load_state_dict( 286 | torch.load('%s/%s_actor_target.pth' % (directory, name), 287 | map_location=lambda storage, loc: storage)) 288 | 289 | def freeze_cnnlayer(self): 290 | for i, param in enumerate(self.actor.parameters()): 291 | # if i < 6: 292 | param.requires_grad = False 293 | 294 | for i, param in enumerate(self.actor_target.parameters()): 295 | # if i < 6: 296 | param.requires_grad = False 297 | 298 | def print_param(self): 299 | for i, param in enumerate(self.actor.parameters()): 300 | print param.requires_grad 301 | 302 | for i, param in enumerate(self.actor_target.parameters()): 303 | print param.requires_grad 304 | 305 | def unfreeze_cnnlayer(self): 306 | for i, param in enumerate(self.actor.parameters()): 307 | # if i < 6: 308 | param.requires_grad = True 309 | 310 | for i, param in enumerate(self.actor_target.parameters()): 311 | # if i < 6: 312 | param.requires_grad = True 313 | 314 | def unfreeze_rnnlayer(self): 315 | for i, param in enumerate(self.actor.actor_rnn.parameters()): 316 | # if i < 6: 317 | param.requires_grad = True 318 | 319 | for i, param in enumerate(self.actor_target.actor_rnn.parameters()): 320 | # if i < 6: 321 | param.requires_grad = True -------------------------------------------------------------------------------- /reinforcement learning/TD3/packing_enviroment_corner_v3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from sensor_msgs.msg import CompressedImage, JointState, ChannelFloat32 4 | from std_msgs.msg import Bool, Float32MultiArray 5 | import numpy as np 6 | import time 7 | from scipy import ndimage 8 | import matplotlib.pyplot as plt 9 | from visualization_msgs.msg import * 10 | # from gripper import * 11 | # from ik.helper import * 12 | from wsg_32_common.srv import * 13 | from robot_comm.srv import * 14 | import rospy, math, cv2, os, pickle 15 | # from ik.ik import fastik, fastfk, fastfk_python 16 | from geometry_msgs.msg import PoseStamped 17 | import json 18 | from std_srvs.srv import * 19 | import random 20 | # import helpers.services as services 21 | from scipy.spatial.transform import Rotation as R 22 | import os 23 | from slip_detector_both import slip_detector 24 | 25 | 26 | class Robot_motion: 27 | def __init__(self): 28 | self.initialJointPosition = [ 29 | -18.45, 30.26, 35.84, -21.02, -67.59, 7.2 30 | ] #initial joint position 31 | self.cartesianOfCircle = [ 32 | 128.7, -148.3, 41.2, 0.6998, 0.7143, 0.0051, 0.0061 33 | ] 34 | self.cartesianOfEllipse = [ 35 | 128.8, -146.7 - 158, 41.2, 0.6998, 0.7143, 0.0051, 0.0061 36 | ] 37 | self.cartesianOfRectangle = [ 38 | 127.8, -146.7 - 251.2, 41.2, 0.6998, 0.7143, 0.0051, 0.0061 39 | ] 40 | self.cartesianOfHexagon = [ 41 | 128.2, -146.7 - 75.405 - 1.3, 41.2, 0.6998, 0.7143, 0.0051, 0.0061 42 | ] 43 | # self.cartesian_Vitamin = [ 44 | # # 127.8 + 46., -146.7 - 251.2, 41.2, 0.6998, 0.7143, 0.0051, 0.0061 45 | # 303.64, 46 | # -213.24, 47 | # 42.2 + 24, 48 | # 0.6998, 49 | # 0.7143, 50 | # 0.0051, 51 | # 0.0061 52 | # ] 53 | self.cartesian_Vitamin = [ 54 | 127.8, -146.7 - 357.2, 41.2, 0.6998, 0.7143, 0.0051, 0.0061 55 | ] # real one 56 | 57 | self.cartesianOfCircle_top = list(self.cartesianOfCircle) 58 | self.cartesianOfCircle_top[2] += 75 59 | self.jointOfCircle_top = [-19.02, 61.68, 12.58, -20.64, -75.3, 4.31] 60 | self.cartesianOfEllipse_top = list(self.cartesianOfEllipse) 61 | self.cartesianOfEllipse_top[2] += 75 62 | self.jointOfEllipse_top = [-17.84, 39.18, 12.83, -23.29, -54.41, 12.91] 63 | self.cartesianOfRectangle_top = list(self.cartesianOfRectangle) 64 | self.cartesianOfRectangle_top[2] += 75 65 | self.jointOfRectangle_top = [ 66 | -17.94, 28.78, 8.87, -29.15, -41.54, 21.51 67 | ] 68 | self.cartesianOfHexagon_top = list(self.cartesianOfHexagon) 69 | self.cartesianOfHexagon_top[2] += 75 70 | self.jointOfHexagon_top = [-17.84, 51.08, 11.41, -20.93, -64.14, 8.32] 71 | self.cartesian_Vitamin_top = list(self.cartesian_Vitamin) 72 | self.cartesian_Vitamin_top[2] += 150 73 | self.joint_Vitamin_top = [-17.84, 39.18, 12.83, -23.29, -54.41, 12.91] 74 | self.objectCartesianDict = {'circle':[self.cartesianOfCircle,self.cartesianOfCircle_top,self.jointOfCircle_top],\ 75 | 'rectangle':[self.cartesianOfRectangle,self.cartesianOfRectangle_top, self.jointOfRectangle_top],\ 76 | 'hexagon':[self.cartesianOfHexagon,self.cartesianOfHexagon_top, self.jointOfHexagon_top],\ 77 | 'ellipse':[self.cartesianOfEllipse,self.cartesianOfEllipse_top, self.jointOfEllipse_top],\ 78 | 'vitamin':[self.cartesian_Vitamin, self.cartesian_Vitamin_top, self.joint_Vitamin_top] 79 | } 80 | 81 | ################################corner########################################## 82 | self.cartesianOfGap_Circle = [ 83 | 127.7 + 106.5, -148.3 - 66, 42.2 + 25, 0.6998, 0.7143, 0.0051, 84 | 0.0061 85 | ] 86 | self.cartesianOfGap_Rectangle = [ 87 | 127.7 + 106.5, -148.5 - 71, 42.2 + 24.5, 0.6998, 0.7143, 0.0051, 88 | 0.0061 89 | ] 90 | self.cartesianOfGap_Hexagon = [ 91 | 127.7 + 105 - 1.5, -148.3 - 64.8, 42.2 + 25, 0.6998, 0.7143, 92 | 0.0051, 0.0061 93 | ] 94 | self.cartesianOfGap_Ellipse = [ 95 | 127.7 + 106.5, -148.3 - 72.2, 42.2 + 25, 0.6998, 0.7143, 0.0051, 96 | 0.0061 97 | ] 98 | #############################parallel wall###################################### 99 | self.cartesianOfGap_parallel_Circle = [ 100 | 127.7 + 105.3, -148.3 - 66 + 77.5, 42.2 + 24, 0.6998, 0.7143, 101 | 0.0051, 0.0061 102 | ] 103 | self.cartesianOfGap_parallel_Rectangle = [ 104 | 127.7 + 106.35, -148.3 - 72 + 85, 42.2 + 23.5, 0.6998, 0.7143, 105 | 0.0051, 0.0061 106 | ] 107 | self.cartesianOfGap_parallel_Hexagon = [ 108 | 127.7 + 105 - 1.5, -148.3 - 66 + 77.5, 42.2 + 24, 0.6998, 0.7143, 109 | 0.0051, 0.0061 110 | ] 111 | self.cartesianOfGap_parallel_Ellipse = [ 112 | 127.7 + 105.3, -148.3 - 72.2 + 85, 42.2 + 24, 0.6998, 0.7143, 113 | 0.0051, 0.0061 114 | ] 115 | #############################parallel wall rotate################################# 116 | self.cartesianOfGap_parallel_rotate_Circle = [ 117 | 127.7 + 105.3 + 93., -148.3 - 66 + 77.5 + 2.0, 42.2 + 22, 0.6998, 118 | 0.7143, 0.0051, 0.0061 119 | ] 120 | self.cartesianOfGap_parallel_rotate_Rectangle = [ 121 | 127.7 + 106.5 + 94.5 + 60, -148.3 - 72 + 85 - 64.5 + 2.5, 122 | 42.2 + 23.5, 0.6998, 0.7143, 0.0051, 0.0061 123 | ] 124 | self.cartesianOfGap_parallel_rotate_Hexagon = [ 125 | 127.7 + 105 - 1.5 + 96., -148.3 - 66 + 77.5 + 2.1, 42.2 + 24, 126 | 0.6998, 0.7143, 0.0051, 0.0061 127 | ] 128 | self.cartesianOfGap_parallel_rotate_Ellipse = [ 129 | 127.7 + 105.3 + 94.5 + 60, -148.3 - 72.2 + 85 - 64.5 + 1.5, 130 | 42.2 + 24, 0.6998, 0.7143, 0.0051, 0.0061 131 | ] 132 | ################################U shape########################################## 133 | # self.cartesianOfGap_Ushape_Circle = [ 134 | # 127.7 + 105.3, -148.3 - 66 - 84, 42.2 + 24, 0.6998, 0.7143, 0.0051, 135 | # 0.0061 136 | # ] 137 | # self.cartesianOfGap_Ushape_Rectangle = [ 138 | # 127.7 + 106.2, -148.3 - 72 - 84.4 + 0.5, 42.2 + 23.5, 0.6998, 139 | # 0.7143, 0.0051, 0.0061 140 | # ] 141 | # self.cartesianOfGap_Ushape_Hexagon = [ 142 | # 127.7 + 105 - 1.7, -147.7 - 66 - 84, 42.2 + 24, 0.6998, 0.7143, 143 | # 0.0051, 0.0061 144 | # ] 145 | # self.cartesianOfGap_Ushape_Ellipse = [ 146 | # 127.7 + 105.3, -148.3 - 72.2 - 84, 42.2 + 24, 0.6998, 0.7143, 147 | # 0.0051, 0.0061 148 | # ] 149 | self.cartesianOfGap_Ushape_Circle = [ 150 | 127.7 + 105.3 + 155, -148.3 - 66 - 84 + 67, 42.2 + 25, 0.6998, 151 | 0.7143, 0.0051, 0.0061 152 | ] 153 | self.cartesianOfGap_Ushape_Rectangle = [ 154 | 127.7 + 106.2 + 155, -148.3 - 72 - 84.4 + 0.5 + 67, 42.2 + 24.5, 155 | 0.6998, 0.7143, 0.0051, 0.0061 156 | ] 157 | self.cartesianOfGap_Ushape_Hexagon = [ 158 | 127.7 + 105 - 2.0 + 155, -147.7 - 66 - 84 + 67, 42.2 + 25, 0.6998, 159 | 0.7143, 0.0051, 0.0061 160 | ] 161 | self.cartesianOfGap_Ushape_Ellipse = [ 162 | 127.7 + 105.3 + 155, -148.3 - 72.2 - 84 + 67.6, 42.2 + 25, 0.6998, 163 | 0.7143, 0.0051, 0.0061 164 | ] 165 | ################################U shape rotate########################################## 166 | # self.cartesianOfGap_Ushape_rotate_Circle = [ 167 | # 127.7 + 105.3 + 156, -148.3 - 66 - 84 + 1.5, 42.2 + 24, 0.6998, 168 | # 0.7143, 0.0051, 0.0061 169 | # ] 170 | # self.cartesianOfGap_Ushape_rotate_Rectangle = [ 171 | # 127.7 + 106.5 + 156 - 0.6, -148.3 - 72 - 84 + 15 + 1.6, 172 | # 42.2 + 23.5, 0.6998, 0.7143, 0.0051, 0.0061 173 | # ] 174 | # self.cartesianOfGap_Ushape_rotate_Hexagon = [ 175 | # 127.7 + 105.7 + 158, -148.3 - 66 - 84 + 2., 42.2 + 24, 0.6998, 176 | # 0.7143, 0.0051, 0.0061 177 | # ] 178 | # self.cartesianOfGap_Ushape_rotate_Ellipse = [ 179 | # 127.7 + 105.3 + 156, -148.3 - 72.2 - 84 + 15 + 1.5, 42.2 + 24, 180 | # 0.6998, 0.7143, 0.0051, 0.0061 181 | # ] 182 | self.cartesianOfGap_Ushape_rotate_Circle = [ 183 | 127.7 + 105.3 + 156 + 3, -148.3 - 66 - 84 + 1.5 - 10, 42.2 + 25, 184 | 0.6998, 0.7143, 0.0051, 0.0061 185 | ] 186 | self.cartesianOfGap_Ushape_rotate_Rectangle = [ 187 | 127.7 + 106.5 + 156 - 1.1 + 3, -148.3 - 71 - 84 + 15 + 1.6 - 10, 188 | 42.2 + 24.5, 0.6998, 0.7143, 0.0051, 0.0061 189 | ] 190 | self.cartesianOfGap_Ushape_rotate_Hexagon = [ 191 | 127.7 + 105.7 + 158 + 2.4, -148.3 - 66 - 84 + 2. - 10, 42.2 + 25, 192 | 0.6998, 0.7143, 0.0051, 0.0061 193 | ] 194 | self.cartesianOfGap_Ushape_rotate_Ellipse = [ 195 | 127.7 + 105.3 + 156 + 3, -148.3 - 72.2 - 83 + 15 + 1.5 - 10, 196 | 42.2 + 25, 0.6998, 0.7143, 0.0051, 0.0061 197 | ] 198 | ################################hole########################################## 199 | # self.cartesianOfGap_hole_Circle = [ 200 | # 127.7 + 105.3 + 136.8, -148.3 - 66 - 84 - 95.5, 42.2 + 26, 0.6998, 201 | # 0.7143, 0.0051, 0.0061 202 | # ] 203 | # self.cartesianOfGap_hole_Rectangle = [ 204 | # 127.7 + 105.3 + 134.5 + 59.8, -148.3 - 66 - 84 - 93.3, 42.2 + 23, 205 | # 0.6998, 0.7143, 0.0051, 0.0061 206 | # ] 207 | # self.cartesianOfGap_hole_Hexagon = [ 208 | # 127.7 + 105.3 + 132.7 + 1.5, -147.7 - 66 - 84 - 95., 42.2 + 26, 209 | # 0.6998, 0.7143, 0.0051, 0.0061 210 | # ] 211 | # self.cartesianOfGap_hole_Ellipse = [ 212 | # 127.7 + 105.3 + 134.5 + 59.8, -148.3 - 66 - 84 - 93.5, 42.2 + 26., 213 | # 0.6998, 0.7143, 0.0051, 0.0061 214 | # ] 215 | self.cartesianOfGap_hole_Circle = [ 216 | 127.7 + 105.3 + 155 + 136.3, -148.3 - 66 - 84 + 67 - 77.3, 217 | 42.2 + 24, 0.6998, 0.7143, 0.0051, 0.0061 218 | ] 219 | self.cartesianOfGap_hole_Rectangle = [ 220 | 127.7 + 106.2 + 155 + 77., -148.3 - 72 - 84.4 + 0.5 + 67 - 42, 221 | 42.2 + 21.5, 0.6998, 0.7143, 0.0051, 0.0061 222 | ] 223 | self.cartesianOfGap_hole_Hexagon = [ 224 | 127.7 + 105 - 2.0 + 155 + 138.5, -147.7 - 66 - 84 + 67 - 22, 225 | 42.2 + 24, 0.6998, 0.7143, 0.0051, 0.0061 226 | ] 227 | self.cartesianOfGap_hole_Ellipse = [ 228 | 127.7 + 105.3 + 155 + 78.2, -148.3 - 72.2 - 84 + 67.6 - 43, 229 | 42.2 + 24, 0.6998, 0.7143, 0.0051, 0.0061 230 | ] 231 | # self.cartesianOfGap_hole_Vitamin = [ 232 | # 40.56, -437.12 + 71, 51.34, 0.6998, 0.7143, 0.0051, 0.0061 233 | # ] 234 | self.cartesianOfGap_hole_Vitamin = [ 235 | 40.56 - 3, -437.12 - 3.5, 53.34 + 16., 0.6998, 0.7143, 0.0051, 236 | 0.0061 237 | ] # the right one 238 | # self.cartesianOfGap_hole_Vitamin = [ 239 | # 127.7 + 105.3 + 170 + 85, -148.3 - 52.2 - 200 - 1, 240 | # 42.2 + 24.5 + 28, 0.6998, 0.7143, 0.0051, 0.0061 241 | # ] 242 | 243 | ################################single wall########################################## 244 | self.cartesianOfGap_onewall_Circle = [ 245 | 127.7 + 105.3 + 170, -148.3 - 66 - 247, 42.2 + 26, 0.6998, 0.7143, 246 | 0.0051, 0.0061 247 | ] 248 | self.cartesianOfGap_onewall_Rectangle = [ 249 | 127.7 + 106.5 + 170, -148.3 - 72 - 246, 42.2 + 24, 0.6998, 0.7143, 250 | 0.0051, 0.0061 251 | ] 252 | self.cartesianOfGap_onewall_Hexagon = [ 253 | 127.7 + 105 - 1.5 + 170, -148.3 - 66 - 246, 42.2 + 26, 0.6998, 254 | 0.7143, 0.0051, 0.0061 255 | ] 256 | self.cartesianOfGap_onewall_Ellipse = [ 257 | 127.7 + 105.3 + 170, -148.3 - 72.2 - 246, 42.2 + 24.5, 0.6998, 258 | 0.7143, 0.0051, 0.0061 259 | ] 260 | 261 | 262 | self.cartesianOfGapDict = {'circle': self.cartesianOfGap_Circle,\ 263 | 'rectangle': self.cartesianOfGap_Rectangle,\ 264 | 'hexagon': self.cartesianOfGap_Hexagon,\ 265 | 'ellipse': self.cartesianOfGap_Ellipse} 266 | 267 | self.cartesianOfGap_onewall_Dict = {'circle': self.cartesianOfGap_onewall_Circle,\ 268 | 'rectangle': self.cartesianOfGap_onewall_Rectangle,\ 269 | 'hexagon': self.cartesianOfGap_onewall_Hexagon,\ 270 | 'ellipse': self.cartesianOfGap_onewall_Ellipse} 271 | 272 | self.cartesianOfGap_parallel_Dict = {'circle': self.cartesianOfGap_parallel_Circle,\ 273 | 'rectangle': self.cartesianOfGap_parallel_Rectangle,\ 274 | 'hexagon': self.cartesianOfGap_parallel_Hexagon,\ 275 | 'ellipse': self.cartesianOfGap_parallel_Ellipse} 276 | 277 | self.cartesianOfGap_parallel_rotate_Dict = {'circle': self.cartesianOfGap_parallel_rotate_Circle,\ 278 | 'rectangle': self.cartesianOfGap_parallel_rotate_Rectangle,\ 279 | 'hexagon': self.cartesianOfGap_parallel_rotate_Hexagon,\ 280 | 'ellipse': self.cartesianOfGap_parallel_rotate_Ellipse} 281 | 282 | self.cartesianOfGap_Ushape_Dict = {'circle': self.cartesianOfGap_Ushape_Circle,\ 283 | 'rectangle': self.cartesianOfGap_Ushape_Rectangle,\ 284 | 'hexagon': self.cartesianOfGap_Ushape_Hexagon,\ 285 | 'ellipse': self.cartesianOfGap_Ushape_Ellipse} 286 | 287 | self.cartesianOfGap_Ushape_rotate_Dict = {'circle': self.cartesianOfGap_Ushape_rotate_Circle,\ 288 | 'rectangle': self.cartesianOfGap_Ushape_rotate_Rectangle,\ 289 | 'hexagon': self.cartesianOfGap_Ushape_rotate_Hexagon,\ 290 | 'ellipse': self.cartesianOfGap_Ushape_rotate_Ellipse} 291 | 292 | self.cartesianOfGap_hole_Dict = {'circle': self.cartesianOfGap_hole_Circle,\ 293 | 'rectangle': self.cartesianOfGap_hole_Rectangle,\ 294 | 'hexagon': self.cartesianOfGap_hole_Hexagon,\ 295 | 'ellipse': self.cartesianOfGap_hole_Ellipse,\ 296 | 'vitamin': self.cartesianOfGap_hole_Vitamin} 297 | 298 | self.object_width = {'circle': 40.,\ 299 | 'rectangle': 40.,\ 300 | 'hexagon': 34.,\ 301 | 'ellipse': 40., 302 | 'vitamin': 55.} 303 | self.ob_cmp = {'circle': [-3.3,-1.,-1.2,3.1],\ 304 | 'rectangle': [7,4.0,4.,8.3],\ 305 | 'hexagon': [0.6,0.4,0.8,4.0],\ 306 | 'ellipse': [5.5,5.5,4.8,9.2]} 307 | 308 | self.jointAngleOfGap = [0.51, 39.24, 6.46, 0.0, 44.3, 90.51] 309 | 310 | self.second_corner_dis = { 311 | 'circle': [0., 0.], 312 | 'rectangle': [0., 15.8], 313 | 'hexagon': [5.0, -0.2], 314 | 'ellipse': [0.7, 15.] 315 | } 316 | self.Start_EGM = rospy.ServiceProxy('/robot2_ActivateEGM', 317 | robot_ActivateEGM) 318 | self.Stop_EGM = rospy.ServiceProxy('/robot2_StopEGM', robot_StopEGM) 319 | self.setSpeed = rospy.ServiceProxy('/robot2_SetSpeed', robot_SetSpeed) 320 | self.command_pose_pub = rospy.Publisher('/robot2_EGM/SetCartesian', 321 | PoseStamped, 322 | queue_size=100, 323 | latch=True) 324 | self.mode = 0 325 | 326 | def move_cart_mm(self, position): 327 | setCartRos = rospy.ServiceProxy('/robot2_SetCartesian', 328 | robot_SetCartesian) 329 | setCartRos(position[0], position[1], position[2], position[6], 330 | position[3], position[4], position[5]) 331 | 332 | def move_cart_add(self, dx=0, dy=0, dz=0): 333 | #Define ros services 334 | getCartRos = rospy.ServiceProxy('/robot2_GetCartesian', 335 | robot_GetCartesian) 336 | setCartRos = rospy.ServiceProxy('/robot2_SetCartesian', 337 | robot_SetCartesian) 338 | #read current robot pose 339 | c = getCartRos() 340 | # print([c.x, c.y, c.z, c.q0, c.qx, c.qy, c.qz]) 341 | #move robot to new pose 342 | setCartRos(c.x + dx, c.y + dy, c.z + dz, c.q0, c.qx, c.qy, c.qz) 343 | 344 | def get_cart(self): 345 | getCartRos = rospy.ServiceProxy('/robot2_GetCartesian', 346 | robot_GetCartesian) 347 | c = getCartRos() 348 | return [c.x, c.y, c.z, c.qx, c.qy, c.qz, c.q0] 349 | 350 | def close_gripper_f(self, grasp_speed=50, grasp_force=10, width=40.): 351 | grasp = rospy.ServiceProxy('/wsg_32_driver/grasp', Move) 352 | self.ack() 353 | self.set_grip_force(grasp_force) 354 | time.sleep(0.1) 355 | error = grasp(width=width, speed=grasp_speed) 356 | time.sleep(0.5) 357 | 358 | def home_gripper(self): 359 | self.ack() 360 | home = rospy.ServiceProxy('/wsg_32_driver/homing', Empty) 361 | try: 362 | error = home() 363 | except: 364 | pass 365 | time.sleep(0.5) 366 | # print('error', error) 367 | 368 | def open_gripper(self): 369 | self.ack() 370 | release = rospy.ServiceProxy('/wsg_32_driver/move', Move) 371 | release(68.0, 100) 372 | time.sleep(0.5) 373 | 374 | def set_grip_force(self, val=5): 375 | set_force = rospy.ServiceProxy('/wsg_32_driver/set_force', Conf) 376 | error = set_force(val) 377 | time.sleep(0.2) 378 | 379 | def ack(self): 380 | srv = rospy.ServiceProxy('/wsg_32_driver/ack', Empty) 381 | error = srv() 382 | time.sleep(0.5) 383 | 384 | def get_jointangle(self): 385 | getJoint = rospy.ServiceProxy('/robot2_GetJoints', robot_GetJoints) 386 | angle = getJoint() 387 | return [angle.j1, angle.j2, angle.j3, angle.j4, angle.j5, angle.j6] 388 | 389 | def set_jointangle(self, angle): 390 | setJoint = rospy.ServiceProxy('/robot2_SetJoints', robot_SetJoints) 391 | setJoint(angle[0], angle[1], angle[2], angle[3], angle[4], angle[5]) 392 | #image processing 393 | 394 | def object_regrasp(self, objectCartesian, objectCartesian_top, graspForce, 395 | target_object, random_pose): 396 | 397 | # print('random_pose regrasp', random_pose) 398 | 399 | # print('regrasp', 'rand y', self.randomy, 'rand z', self.randomz) 400 | objectCartesian = np.array(objectCartesian) 401 | objectCartesian_top = np.array(objectCartesian_top) 402 | 403 | cart_positon = self.get_cart() 404 | 405 | self.setSpeed(600, 200) 406 | self.move_cart_add(0., 0., 100.) 407 | time.sleep(0.5) 408 | # print "step6" 409 | # raw_input("Press Enter to continue...") 410 | #move to the top of the obejct 411 | objectCartesian_top[1] += random_pose[1] 412 | self.move_cart_mm(objectCartesian_top) 413 | time.sleep(0.5) 414 | # print "step3" 415 | 416 | self.setSpeed(100, 50) 417 | 418 | # raw_input("Press Enter to continue...") 419 | objectCartesian[:3] += np.array([0, random_pose[1], random_pose[2]]) 420 | self.move_cart_mm(objectCartesian) 421 | time.sleep(0.5) 422 | # print "step6" 423 | 424 | # raw_input("Press Enter to continue...") 425 | self.open_gripper() 426 | # time.sleep(1) 427 | 428 | self.close_gripper_f(100, graspForce, self.object_width[target_object]) 429 | # time.sleep(1) 430 | self.setSpeed(600, 200) 431 | self.move_cart_add(0., 0., 75.) 432 | time.sleep(0.5) 433 | 434 | # if target_object == 'rectangle': 435 | # raw_input("Press Enter to continue...") 436 | cart_positon_top = list(cart_positon) 437 | cart_positon_top[2] = cart_positon_top[2] + 25. 438 | 439 | # self.setSpeed(400, 100) 440 | 441 | self.move_cart_mm(cart_positon_top) 442 | time.sleep(0.5) 443 | 444 | # self.setSpeed(400, 100) 445 | self.move_cart_mm(cart_positon) 446 | time.sleep(0.5) 447 | 448 | def return_object(self, objectCartesian, objectCartesian_top, 449 | objectJoint_top, random_pose): 450 | 451 | # print('random_pose return', random_pose) 452 | objectCartesian = np.array(objectCartesian) 453 | objectCartesian_top = np.array(objectCartesian_top) 454 | 455 | self.setSpeed(600, 200) 456 | self.move_cart_add(0., 0., 100.) 457 | time.sleep(0.5) 458 | # print('return', 'rand y', self.randomy, 'rand z', self.randomz) 459 | # self.set_jointangle(objectJoint_top) 460 | objectCartesian_top[1] += random_pose[1] 461 | self.move_cart_mm(objectCartesian_top) 462 | time.sleep(0.5) 463 | # current_joint_angle = self.get_jointangle() 464 | # print('current_joint_angle', current_joint_angle) 465 | # raw_input("Press Enter to continue...") 466 | 467 | self.setSpeed(100, 50) 468 | 469 | objectCartesian[:3] += np.array([0, random_pose[1], random_pose[2]]) 470 | self.move_cart_mm(objectCartesian) 471 | time.sleep(0.5) 472 | 473 | self.open_gripper() 474 | time.sleep(0.2) 475 | # self.setSpeed(400, 100) 476 | # self.move_cart_add(0., 0., 50.) 477 | 478 | def movedown_EGM(self, slip_monitor): 479 | index = 1 480 | rate = rospy.Rate(248) 481 | while ((((not slip_monitor.slip_indicator1 482 | and not slip_monitor.slip_indicator2)) or index < 248 * 1.0) 483 | and index < int(248 * 3.5)): 484 | # t = time.time() 485 | pose = PoseStamped() 486 | # pose.header.stamp = now 487 | # pose.header.frame_id = "map" 488 | # Position in mm or velocity in mm/s 489 | pose.pose.position.x = 0. 490 | pose.pose.position.y = 0. 491 | pose.pose.position.z = -2. 492 | # Orientation or angular velocity in xyzw 493 | pose.pose.orientation.x = 0. 494 | pose.pose.orientation.y = 0. 495 | pose.pose.orientation.z = 0. 496 | pose.pose.orientation.w = 0. 497 | self.command_pose_pub.publish(pose) 498 | index += 1 499 | rate.sleep() 500 | # print('index', index / 248.) 501 | if index == int(248 * 3.5): 502 | return True 503 | else: 504 | return False 505 | 506 | def get_cartesianOfGap_corner(self, random_num_rotation, cartesianOfGap, 507 | target_object): 508 | 509 | if self.mode == 0: 510 | cartesianOfGap_rotate = list(cartesianOfGap) 511 | self.mode = 0 512 | 513 | elif self.mode == 1: 514 | cartesianOfGap_rotate = self.tran_rotate_robot( 515 | list(cartesianOfGap), self.ob_cmp[target_object][0], 516 | self.ob_cmp[target_object][1], 90.) 517 | 518 | elif self.mode == 2: 519 | cartesianOfGap_rotate = self.tran_rotate_robot( 520 | list(cartesianOfGap), self.ob_cmp[target_object][2], 521 | self.ob_cmp[target_object][3], -90.) 522 | 523 | elif self.mode == 3: 524 | cartesianOfGap_rotate = list(cartesianOfGap) 525 | cartesianOfGap_rotate[ 526 | 0] += 77. + self.second_corner_dis[target_object][0] 527 | cartesianOfGap_rotate[ 528 | 1] += -28. + self.second_corner_dis[target_object][1] 529 | 530 | return cartesianOfGap_rotate 531 | 532 | def get_cartesianOfGap_singlewall(self, random_num_rotation, 533 | cartesianOfGap, target_object): 534 | 535 | self.ob_cmp_singlewall = {'circle': [-1.8,-1.5,0.,3.],\ 536 | 'rectangle': [7,4.,4.,8.3],\ 537 | 'hexagon': [0.5,0.7,1.5,4.0],\ 538 | 'ellipse': [7,4.3,5.5,8.]} 539 | 540 | self.second_corner_dis_singlewall = { 541 | 'circle': 0., 542 | 'rectangle': 14.5, 543 | 'hexagon': 0., 544 | 'ellipse': 14.5 545 | } 546 | 547 | if self.mode == 9: 548 | cartesianOfGap_rotate = list(cartesianOfGap) 549 | self.mode = 0 550 | 551 | elif self.mode == 10: 552 | cartesianOfGap_rotate = self.tran_rotate_robot( 553 | list(cartesianOfGap), self.ob_cmp_singlewall[target_object][0], 554 | self.ob_cmp_singlewall[target_object][1], 90.) 555 | 556 | elif self.mode == 11: 557 | cartesianOfGap_rotate = self.tran_rotate_robot( 558 | list(cartesianOfGap), self.ob_cmp_singlewall[target_object][2], 559 | self.ob_cmp_singlewall[target_object][3], -90.) 560 | 561 | elif self.mode == 12: 562 | cartesianOfGap_rotate = list(cartesianOfGap) 563 | # cartesianOfGap_rotate[ 564 | # 0] += self.second_corner_dis_singlewall[target_object][0] 565 | cartesianOfGap_rotate[ 566 | 1] += 133.5 + self.second_corner_dis_singlewall[target_object] 567 | 568 | return cartesianOfGap_rotate 569 | 570 | def pick_up_object(self, target_object, graspForce, inposition, mode, 571 | random_pose): 572 | # print('random_pose pick up', random_pose) 573 | self.mode = mode 574 | object_cart_info = list(self.objectCartesianDict[target_object]) 575 | 576 | # self.randomz = random.random() * 15 - 10 577 | # self.randomy = (random.random() - 0.5) * 12 578 | # print('rand y', self.randomy, 'rand z', self.randomz) 579 | objectCartesian = np.array(object_cart_info[0]).copy() 580 | objectCartesian[:3] += np.array([0, random_pose[1], random_pose[2]]) 581 | objectCartesian_top = np.array(object_cart_info[1]).copy() 582 | objectCartesian_top[:3] += np.array([0, random_pose[1], 0]) 583 | 584 | self.setSpeed(600, 200) 585 | if not inposition: 586 | self.move_cart_mm(objectCartesian_top) 587 | time.sleep(0.5) 588 | # print "go to the top of the object" 589 | self.move_cart_mm(objectCartesian) 590 | time.sleep(0.5) 591 | # print "go to the object" 592 | # rand_num = random.random() * -2 593 | # self.move_cart_add(0., 0., rand_num) 594 | # time.sleep(0.2) 595 | # raw_input("Press Enter to continue...") 596 | self.close_gripper_f(100, graspForce, self.object_width[target_object]) 597 | 598 | # time.sleep(1000000000) 599 | # print "grasp the object" 600 | # raw_input("Press Enter to continue...") 601 | self.move_cart_mm(objectCartesian_top) 602 | time.sleep(0.5) 603 | # print "go up" 604 | # raw_input("Press Enter to continue...") 605 | 606 | # self.setSpeed(400, 100) 607 | 608 | random_num_env = np.random.rand() 609 | random_num_rotation = np.random.rand() 610 | # random_num_env = 0.6 611 | # random_num_rotation = 0.3 612 | 613 | if self.mode < 4: 614 | cartesianOfGap = list(self.cartesianOfGapDict[target_object]) 615 | cartesianOfGap_rotate = self.get_cartesianOfGap_corner( 616 | random_num_rotation, cartesianOfGap, target_object) 617 | elif self.mode == 4: 618 | cartesianOfGap = self.cartesianOfGap_parallel_Dict[target_object] 619 | cartesianOfGap_rotate = list(cartesianOfGap) 620 | elif self.mode == 5: 621 | cartesianOfGap = self.cartesianOfGap_parallel_rotate_Dict[ 622 | target_object] 623 | cartesianOfGap_rotate = list(cartesianOfGap) 624 | elif self.mode == 6: 625 | cartesianOfGap = list( 626 | self.cartesianOfGap_Ushape_Dict[target_object]) 627 | cartesianOfGap_rotate = list(cartesianOfGap) 628 | elif self.mode == 7: 629 | cartesianOfGap = list( 630 | self.cartesianOfGap_Ushape_rotate_Dict[target_object]) 631 | cartesianOfGap_rotate = list(cartesianOfGap) 632 | elif self.mode == 8: 633 | cartesianOfGap = list(self.cartesianOfGap_hole_Dict[target_object]) 634 | cartesianOfGap_rotate = list(cartesianOfGap) 635 | elif 9 <= self.mode <= 12: 636 | cartesianOfGap = list( 637 | self.cartesianOfGap_onewall_Dict[target_object]) 638 | cartesianOfGap_rotate = self.get_cartesianOfGap_singlewall( 639 | random_num_rotation, cartesianOfGap, target_object) 640 | 641 | # if target_object == 'rectangle': 642 | cart_positon_top = list(cartesianOfGap_rotate) 643 | cart_positon_top[1] += random_pose[1] 644 | cart_positon_top[2] += random_pose[2] + 25. 645 | # cart_positon_top[2] = cart_positon_top[2] + 25. 646 | self.move_cart_mm(cart_positon_top) 647 | time.sleep(0.5) 648 | 649 | cartesianOfGap_rotate[:3] += np.array( 650 | [0, random_pose[1], random_pose[2]]) 651 | self.move_cart_mm(cartesianOfGap_rotate) 652 | 653 | time.sleep(0.5) 654 | # print('robot mode', self.mode) 655 | # raw_input("Press Enter to continue...") 656 | 657 | def robot_reset(self): 658 | print('go to the initial position') 659 | self.set_jointangle(self.initialJointPosition) 660 | 661 | def tran_rotate_robot(self, targetCartesian, x, y, theta): 662 | # targetCartesian = np.array(curren_tart) #current cart 663 | relativeVector = np.array([0., 0., 0.]) # 0 12 380 664 | ratationMatrix1 = (R.from_quat(targetCartesian[3:])).as_dcm() 665 | ratationMatrix2 = (R.from_euler('z', -theta, 666 | degrees=True)).as_dcm() #rotate theta 667 | targetQuaternion = R.from_dcm( 668 | ratationMatrix2.dot(ratationMatrix1)).as_quat() 669 | targetCartesian[:3] = np.array( 670 | targetCartesian[:3]) + ratationMatrix1.dot( 671 | relativeVector) - ratationMatrix2.dot(ratationMatrix1).dot( 672 | relativeVector) 673 | targetCartesian[3:] = targetQuaternion 674 | targetCartesian[0] = targetCartesian[0] + x # add 675 | targetCartesian[1] = targetCartesian[1] + y # add 676 | return targetCartesian 677 | 678 | def error_converter(self, error_x, error_y): 679 | if self.mode == 0: 680 | error_x_new = error_x 681 | error_y_new = error_y 682 | if self.mode == 1: 683 | error_x_new = -error_y 684 | error_y_new = error_x 685 | elif self.mode == 2: 686 | error_x_new = error_y 687 | error_y_new = -error_x 688 | elif self.mode == 3: 689 | error_x_new = -error_x 690 | error_y_new = -error_y 691 | elif self.mode == 4: # parallel wall 692 | error_x_new = error_x 693 | error_y_new = error_y 694 | elif self.mode == 5: # parallel wall rotate 695 | error_x_new = error_x 696 | error_y_new = error_y 697 | elif self.mode == 6: # U shape 698 | error_x_new = error_x 699 | error_y_new = error_y 700 | elif self.mode == 7: # U shape rotate 701 | error_x_new = -error_x 702 | error_y_new = -error_y 703 | elif self.mode == 8: # hole 704 | error_x_new = error_x 705 | error_y_new = error_y 706 | if self.mode == 9: 707 | error_x_new = error_x 708 | error_y_new = error_y 709 | if self.mode == 10: 710 | error_x_new = -error_y 711 | error_y_new = error_x 712 | elif self.mode == 11: 713 | error_x_new = error_y 714 | error_y_new = -error_x 715 | elif self.mode == 12: 716 | error_x_new = -error_x 717 | error_y_new = -error_y 718 | return error_x_new, error_y_new 719 | 720 | 721 | class Packing_env: 722 | def __init__(self, num_frame): 723 | self.slip_monitor = slip_detector() 724 | self.robot = Robot_motion() 725 | self.done = False 726 | self.object_name_list = ['circle', 'hexagon', 'ellipse', 'rectangle'] 727 | # self.object_name_list = ['circle', 'hexagon', 'ellipse'] 728 | # self.object_name_list = ['hexagon', 'circle'] 729 | # self.object_name_list = ['vitamin'] 730 | self.target_object = self.object_name_list[self.select_object()] 731 | self.x_error = 0 732 | self.y_error = 0 733 | self.theta_error = 0 734 | self.error_generator() 735 | self.max_x_error = 14 #mm 736 | self.max_y_error = 14 #mm 737 | self.max_theta_error = 20 #degree 738 | self.reward = 0 739 | self.state = None 740 | self.rgrasp_counter = 0 741 | self.num_frame = num_frame 742 | self.save_data = True 743 | 744 | def select_object(self): 745 | rnum = random.random() 746 | ob_index = 0 747 | if rnum <= 0.2: 748 | ob_index = 0 749 | elif 0.2 < rnum <= 0.4: 750 | ob_index = 1 751 | elif 0.4 < rnum <= 0.6: 752 | ob_index = 2 753 | else: 754 | ob_index = 3 755 | return ob_index 756 | 757 | def select_image(self, success): 758 | 759 | image_g1, image_g2, time_g1, time_g2, motion_g1, motion_g2 = [], [], [], [], [], [] 760 | self.data1 = list(self.data1) 761 | self.data2 = list(self.data2) 762 | # np.save('data_gelsight1.npy', self.data1) 763 | # np.save('data_gelsight2.npy', self.data2) 764 | 765 | for i in range(len(self.data1)): 766 | image_g1.append(self.data1[i][0]) 767 | image_g2.append(self.data2[i][0]) 768 | time_g1.append(self.data1[i][1]) 769 | time_g2.append(self.data2[i][1]) 770 | motion_g1.append(self.data1[i][2]) 771 | motion_g2.append(self.data2[i][2]) 772 | 773 | image_g1 = np.array(image_g1) 774 | image_g2 = np.array(image_g2) 775 | time_g1 = np.array(time_g1) 776 | time_g2 = np.array(time_g2) 777 | motion_g1 = np.array(motion_g1) 778 | motion_g2 = np.array(motion_g2) 779 | num_of_frame = 12 780 | motion_thre = 0.7 781 | start_num = -75 782 | kernel = np.ones((4, )) / 4. 783 | motion_g1_smooth = np.convolve(kernel, 784 | motion_g1[start_num:], 785 | mode='same') 786 | motion_g2_smooth = np.convolve(kernel, 787 | motion_g2[start_num:], 788 | mode='same') 789 | motion_diff = np.abs(motion_g1_smooth - motion_thre).tolist() 790 | start_index1 = motion_diff.index(min(motion_diff)) 791 | motion_diff = np.abs(motion_g2_smooth - motion_thre).tolist() 792 | start_index2 = motion_diff.index(min(motion_diff)) 793 | 794 | if time_g1[start_num + start_index1] > time_g2[start_num + 795 | start_index2]: 796 | start_frame2 = max(-75, start_num + start_index2 - 5) 797 | time_diff = np.abs(time_g1[start_num:] - 798 | time_g2[start_frame2]).tolist() 799 | index = time_diff.index(min(time_diff)) 800 | start_frame1 = start_num + index 801 | # print('gelsight 2 is early') 802 | else: 803 | start_frame1 = max(-75, start_num + start_index1 - 5) 804 | time_diff = np.abs(time_g2[start_num:] - 805 | time_g1[start_frame1]).tolist() 806 | index = time_diff.index(min(time_diff)) 807 | start_frame2 = start_num + index 808 | # print('gelsight 1 is early') 809 | 810 | # image2save_g1 = image_g1[start_frame1:start_frame1 + 8] 811 | # image2save_g2 = image_g2[start_frame2:start_frame2 + 8] 812 | imageseq1 = np.array(image_g1[start_frame1:min(start_frame1 + 20, -1)]) 813 | imageseq2 = np.array(image_g2[start_frame2:min(start_frame2 + 20, -1)]) 814 | # flow_g1 = motion_g1[start_frame1:] 815 | # flow_g2 = motion_g2[start_frame2:] 816 | 817 | num_frame = min(imageseq1.shape[0], imageseq2.shape[0]) 818 | sample_index = np.linspace(0, num_frame - 1, 819 | num=num_of_frame).astype(np.uint8) 820 | 821 | image2save_g1 = imageseq1[sample_index, :, :, :] 822 | image2save_g2 = imageseq2[sample_index, :, :, :] 823 | # flow2save_g1 = flow_g1[sample_index] 824 | # flow2save_g2 = flow_g2[sample_index] 825 | 826 | # print('start_index', start_index1, start_index2) 827 | # print('start_frame', start_frame1, start_frame2) 828 | # print('image shape', image2save_g1.shape, image2save_g2.shape) 829 | # np.save('image2save_g1.npy', image2save_g1) 830 | # np.save('image2save_g2.npy', image2save_g2) 831 | return image2save_g1, image2save_g2 832 | 833 | def step(self, action, check_bound): 834 | action_origin = np.array(action) 835 | action, r_matrix = self.action_convertor( 836 | action, check_bound) # action in gripper frame 837 | # print("converted action", action) 838 | if check_bound: 839 | Fail_sign, reward, state_full = self.check_boundary(action) 840 | else: 841 | Fail_sign = False 842 | reward = 0 843 | state_full = np.array(action) 844 | 845 | if not Fail_sign: 846 | # print('data number', folder_num) 847 | current_cart = self.robot.get_cart() 848 | self.robot.setSpeed(100, 50) 849 | if self.mode in [3, 7, 12]: 850 | targetCartesian = self.robot.tran_rotate_robot( 851 | np.array(current_cart), -action[0], -action[1], action[2]) 852 | else: 853 | targetCartesian = self.robot.tran_rotate_robot( 854 | np.array(current_cart), action[0], action[1], action[2]) 855 | 856 | self.robot.move_cart_mm(targetCartesian) 857 | self.slip_monitor.restart1 = True 858 | self.slip_monitor.restart2 = True 859 | time.sleep(0.5) 860 | # joint6 = -(self.robot.get_jointangle()[-1] - 1.0) / 180. * np.pi 861 | if self.mode in [0, 6, 8, 9]: 862 | joint6 = (self.theta_error) / 180. * np.pi 863 | elif self.mode in [1, 10]: 864 | joint6 = (self.theta_error - 90.) / 180. * np.pi 865 | elif self.mode in [2, 11]: 866 | joint6 = (self.theta_error + 90.) / 180. * np.pi 867 | elif self.mode in [3, 7, 12]: 868 | joint6 = (self.theta_error + 180.) / 180. * np.pi 869 | 870 | # if self.mode != 3 and self.mode != 7 and self.mode != 12: 871 | r_matrix_next = np.array([[np.cos(joint6), -np.sin(joint6), 0],\ 872 | [np.sin(joint6), np.cos(joint6), 0], \ 873 | [0, 0, 1]]) 874 | # else: 875 | # r_matrix_next = np.array([[np.cos(joint6+180), -np.sin(joint6+180), 0],\ 876 | # [np.sin(joint6+180), np.cos(joint6+180), 0], \ 877 | # [0, 0, 1]]) 878 | # raw_input("Press Enter to continue...") 879 | ret = self.robot.Start_EGM(True, 86400) 880 | success_sign = self.robot.movedown_EGM(self.slip_monitor) 881 | ret = self.robot.Stop_EGM() 882 | time.sleep(0.3) 883 | self.data1 = self.slip_monitor.data1 884 | self.data2 = self.slip_monitor.data2 885 | self.robot.move_cart_add(0., 0., 3.) 886 | time.sleep(0.2) 887 | # if not os.path.isfile( 888 | # '/home/mcube/siyuan/Packing_RL/utils/gelsight1_data.npy'): 889 | # return [], [], True, [0, 0, 0], [], [], [] 890 | 891 | if success_sign: 892 | print( 893 | 'object inserted!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' 894 | ) 895 | if np.abs(state_full[2]) < 4.: 896 | self.reward = reward + 80 897 | else: 898 | self.reward = reward + 40 899 | # self.state = np.array( 900 | # self.slip_monitor.image2save_g1[-self.num_frame * 3:]) 901 | image2save_g1, image2save_g2 = self.select_image(True) 902 | self.state = np.concatenate((np.array(image2save_g1),\ 903 | np.array(image2save_g2)),axis = 0) 904 | self.done = True 905 | else: 906 | # self.state = np.array( 907 | # self.slip_monitor.image2save_g1[-self.num_frame * 3:]) 908 | image2save_g1, image2save_g2 = self.select_image(False) 909 | self.state = np.concatenate((np.array(image2save_g1),\ 910 | np.array(image2save_g2)),axis = 0) 911 | self.reward = reward - 5 912 | self.done = False 913 | # self.robot.setSpeed(200,10) 914 | # self.slip_monitor.restartDetector1() 915 | # self.slip_monitor.restartDetector2() 916 | 917 | else: 918 | self.reward = reward - 40 919 | self.done = True 920 | image2save_g1, image2save_g2 = self.select_image(True) 921 | # self.state = np.array( 922 | # self.slip_monitor.image2save_g1[-self.num_frame * 3:]) 923 | # self.state = np.concatenate((np.array(self.slip_monitor.image2save_g1[-45:]),\ 924 | # np.array(self.slip_monitor.image2save_g2[-45:])),axis = 0) 925 | self.state = np.concatenate((np.array(image2save_g1),\ 926 | np.array(image2save_g2)),axis = 0) 927 | r_matrix_next = np.array(r_matrix) 928 | print('Failed?????????????????????????????????????????????') 929 | 930 | return self.state, self.reward, self.done, state_full, action, r_matrix, r_matrix_next 931 | 932 | def action_convertor(self, action, notinitial): 933 | # print 'action' 934 | time.sleep(0.5) 935 | # joint6 = -(self.robot.get_jointangle()[-1] + 24.36) / 180. * np.pi 936 | if self.mode in [0, 6, 8, 9]: 937 | joint6 = (self.theta_error * notinitial) / 180. * np.pi 938 | elif self.mode in [1, 10]: 939 | joint6 = (self.theta_error * notinitial - 90.) / 180. * np.pi 940 | elif self.mode in [2, 11]: 941 | joint6 = (self.theta_error * notinitial + 90.) / 180. * np.pi 942 | elif self.mode in [3, 7, 12]: 943 | joint6 = (self.theta_error * notinitial + 180.) / 180. * np.pi 944 | 945 | # if notinitial: 946 | # joint6 = (self.theta_error * notinitial) / 180. * np.pi 947 | # else: 948 | # joint6 = 0. 949 | # print("joint6", joint6) 950 | r_matrix = np.array([[np.cos(joint6), -np.sin(joint6), 0],\ 951 | [np.sin(joint6), np.cos(joint6), 0], \ 952 | [0, 0, 1]]) 953 | # print 'r_matrix', r_matrix 954 | action_world = r_matrix.dot(np.array(action)) 955 | # print 'action_world', action_world 956 | # action_world = action_world.tolist() 957 | # action_world.append(action[2]) 958 | # print 'action_world', action_world 959 | # if self.mode == 3 or self.mode == 7 or self.mode == 12: 960 | # action_world[0] *= -1 961 | # action_world[1] *= -1 962 | # r_matrix = np.array([[np.cos(joint6+180), -np.sin(joint6+180), 0],\ 963 | # [np.sin(joint6+180), np.cos(joint6+180), 0], \ 964 | # [0, 0, 1]]) 965 | return action_world, r_matrix 966 | 967 | def check_rgrasp(self, x_error, theta_error, x_error_previous, 968 | theta_error_previous): 969 | if x_error * x_error_previous > 0 and abs(theta_error) < 8.0 and abs( 970 | theta_error_previous) < 8.0: 971 | self.rgrasp_counter += 1 972 | else: 973 | self.rgrasp_counter = 0 974 | 975 | def reward_function(self, error_before, error_after): 976 | error_before *= -1 977 | error_after *= -1 978 | if error_before > 0: 979 | error_before = 0 980 | if error_after > 0: 981 | error_after = 0 982 | 983 | reward = error_after - error_before 984 | 985 | # if reward > 0: 986 | # reward *= 2 987 | return reward 988 | 989 | def reward_function_2(self, error_before, error_after): 990 | if error_after < 0: 991 | reward = 0 992 | else: 993 | reward = -error_after**2 994 | 995 | return reward 996 | 997 | def reward_function_3(self, error_before, error_after): 998 | margin = 4. 999 | if -margin <= error_before <= 0. and -margin <= error_after <= 0.: # still in the safe zoom ok 1000 | reward = 0 1001 | elif error_before >= 0. and error_after >= 0.: # still in the collision zoom, depends on the action 1002 | reward = (abs(error_before) - abs(error_after)) 1003 | elif error_before < -margin and error_after < -margin: # still in the safe zoom, but the gap too big, depends on the action 1004 | reward = abs(error_before) - abs(error_after) 1005 | elif -margin <= error_before <= 0. and error_after >= 0.: # collied with the enviroment $very bad$ 1006 | reward = -error_after 1007 | elif error_before < -margin and error_after >= 0.: # collied with the enviroment $very bad$ 1008 | reward = -error_after + error_before + margin 1009 | elif -margin <= error_before <= 0. and error_after <= -margin: # increased the gap $bad$ 1010 | reward = error_after + margin 1011 | elif error_before >= 0 and -margin <= error_after <= 0.: # correct the error $very good$ 1012 | reward = error_before 1013 | elif error_before >= 0 and error_after <= -margin: # correct the error good but overshoot 1014 | reward = error_before + error_after + margin 1015 | elif error_before <= -margin and -margin <= error_after <= 0.: # make the gap smaller good 1016 | reward = -margin - error_before 1017 | 1018 | return reward 1019 | 1020 | def check_boundary(self, action): 1021 | theta_error_acc = self.theta_error + action[2] 1022 | 1023 | # if self.mode != 3 and self.mode != 7 and self.mode != 12: 1024 | x_error_acc = self.x_error + action[0] 1025 | y_error_acc = self.y_error + action[1] 1026 | 1027 | reward_x = self.reward_function_3(-self.x_error, -x_error_acc) * 3 1028 | reward_y = self.reward_function_3(self.y_error, y_error_acc) * 3 1029 | 1030 | reward_theta = (abs(self.theta_error) - abs(theta_error_acc)) * 1 1031 | # reward_theta = -(theta_error_acc)**2 1032 | # print 'accumulated error: ', x_error_acc, theta_error_acc 1033 | # print('accumulated error', [x_error_acc, y_error_acc, theta_error_acc]) 1034 | x_offset = -2 1035 | y_offset = 2 1036 | # x_offset = 0 1037 | # y_offset = 0 1038 | if abs(x_error_acc + x_offset) > self.max_x_error or abs( 1039 | y_error_acc + y_offset) > self.max_y_error or abs( 1040 | theta_error_acc) > self.max_theta_error: 1041 | # print("cross the max error limitation, please restart") 1042 | self.error_generator() 1043 | Fail_sign = True 1044 | # reward = -200 1045 | else: 1046 | self.x_error = x_error_acc 1047 | self.y_error = y_error_acc 1048 | self.theta_error = theta_error_acc 1049 | Fail_sign = False 1050 | # reward = (reward_x > 0)*reward_x*2 + (reward_x <= 0)*reward_x*3 + \ 1051 | # (reward_y > 0)*reward_y*2 + (reward_y <= 0)*reward_y*3 + \ 1052 | # (reward_theta > 0)*reward_theta*2 + (reward_theta <= 0)*reward_theta*3 1053 | reward = reward_x + reward_y + reward_theta 1054 | state_full = np.array([x_error_acc, y_error_acc, theta_error_acc]) 1055 | # self.check_rgrasp(x_error_acc, theta_error_acc, self.x_error, self.theta_error) 1056 | 1057 | return Fail_sign, reward, state_full 1058 | 1059 | def reset(self, random_pose): 1060 | object_cart_info = list( 1061 | self.robot.objectCartesianDict[self.target_object]) 1062 | self.robot.return_object(object_cart_info[0], object_cart_info[1], 1063 | object_cart_info[2], random_pose) 1064 | self.target_object = self.object_name_list[self.select_object()] 1065 | self.error_generator() 1066 | 1067 | def mode_generator(self): 1068 | # self.mode = random.randint(6, 7) 1069 | self.mode = 8 1070 | # self.mode = random.randint(9, 12) 1071 | # self.mode = 11 1072 | # self.mode = random.randint(0, 3) 1073 | # self.mode = 9 1074 | # self.mode = 2 1075 | # self.mode = 6 1076 | # if self.mode == 6: 1077 | # self.mode = 4 1078 | # elif self.mode == 7: 1079 | # self.mode = 5 1080 | 1081 | def U_shape_error_generator(self): 1082 | rand_error = random.random() 1083 | if self.target_object != 'hexagon': 1084 | add_on = 5 1085 | else: 1086 | add_on = 7 1087 | 1088 | add_on_x = 2 1089 | add_on_y = 2 1090 | 1091 | if rand_error <= 0.1: 1092 | self.x_error = 0 1093 | self.y_error = random.random() * 6. + 1. 1094 | elif 0.1 < rand_error <= 0.2: 1095 | self.x_error = random.random() * 6. + add_on 1096 | self.y_error = 0 1097 | elif 0.2 < rand_error <= 0.3: 1098 | self.x_error = random.random() * -6. - 1. 1099 | self.y_error = 0 1100 | elif 0.3 < rand_error <= 0.65: 1101 | self.x_error = random.random() * 6. + add_on 1102 | self.y_error = random.random() * 6. + 1. 1103 | else: 1104 | self.x_error = random.random() * -6. - 1. 1105 | self.y_error = random.random() * 6. + 1. 1106 | 1107 | self.x_error += add_on_x 1108 | self.y_error -= add_on_y 1109 | 1110 | def hole_error_generator(self): 1111 | # if self.target_object == 'hexagon': 1112 | # add_on_x = 2.5 1113 | # add_on_y = 1 1114 | # else: 1115 | add_on_x = 2 1116 | add_on_y = 2 1117 | mag = 6. 1118 | 1119 | rand_error_x1 = random.random() * mag + 1 1120 | rand_error_x2 = random.random() * -mag - 1 1121 | 1122 | rand_error_y1 = random.random() * mag + 1 1123 | rand_error_y2 = random.random() * -mag - 1 1124 | 1125 | if random.random() <= 0.5: 1126 | self.x_error = rand_error_x1 + add_on_x 1127 | else: 1128 | self.x_error = rand_error_x2 + add_on_x 1129 | 1130 | if random.random() <= 0.5: 1131 | self.y_error = rand_error_y1 - add_on_y 1132 | else: 1133 | self.y_error = rand_error_y2 - add_on_y 1134 | 1135 | def error_generator(self): 1136 | # self.pose_y_error = 0. #(random.random() - 0.5) * 10 1137 | # self.pose_z_error = 1138 | # self.pose_z_error = 0. 1139 | # self.random_pose = [0., self.pose_y_error, self.pose_z_error] 1140 | self.mode_generator() 1141 | self.x_error = random.random() * -6. - 1. 1142 | self.y_error = random.random() * 6. + 1. 1143 | self.theta_error = (random.random() - 0.5) * 20 1144 | if random.random() < 0.2: 1145 | self.theta_error = 0 1146 | 1147 | if self.mode < 4: 1148 | rand_error = random.random() 1149 | if rand_error <= 0.2: 1150 | self.x_error = 0. 1151 | elif 0.2 < rand_error <= 0.4: 1152 | self.y_error = 0. 1153 | 1154 | elif self.mode == 4: 1155 | self.y_error = 0. 1156 | if self.target_object != 'hexagon': 1157 | self.x_error = (random.random() - 0.5) * 12 + 1 1158 | else: 1159 | self.x_error = (random.random() - 0.5) * 14 + 2 1160 | if 0 < self.x_error <= 4.: 1161 | self.x_error = random.random() * 5 + 4. 1162 | 1163 | elif self.mode == 5: 1164 | self.x_error = 0. 1165 | self.y_error = (random.random() - 0.5) * 12 - 1 1166 | 1167 | elif self.mode == 6 or self.mode == 7: 1168 | self.U_shape_error_generator() 1169 | 1170 | elif self.mode == 8: 1171 | self.hole_error_generator() 1172 | 1173 | elif self.mode in [9, 10, 11, 12]: 1174 | # self.x_error = random.random() * 7. + 1. 1175 | self.x_error = 0 1176 | self.y_error = random.random() * 6. + 1. 1177 | # self.y_error = 3 1178 | 1179 | # elif self.mode == 10: 1180 | # self.x_error = random.random() * -7. - 1. 1181 | # self.y_error = 0 1182 | 1183 | # elif self.mode == 11: 1184 | # self.x_error = random.random() * 7. + 1. 1185 | # self.y_error = 0 1186 | print('initial error', 'x', self.x_error, 'y', self.y_error, 'theta', 1187 | self.theta_error, 'mode', self.mode) 1188 | 1189 | def regrasp(self, graspForce, random_pose): 1190 | object_cart_info = list( 1191 | self.robot.objectCartesianDict[self.target_object]) 1192 | self.robot.object_regrasp(object_cart_info[0], object_cart_info[1], 1193 | graspForce, self.target_object, random_pose) 1194 | -------------------------------------------------------------------------------- /reinforcement learning/TD3/slip_detector_both.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from sensor_msgs.msg import CompressedImage, JointState, ChannelFloat32 4 | from std_msgs.msg import Bool 5 | import numpy as np 6 | import time 7 | from scipy import ndimage 8 | import matplotlib.pyplot as plt 9 | from visualization_msgs.msg import * 10 | from collections import deque 11 | # from gripper import * 12 | # from ik.helper import * 13 | # from robot_comm.srv import * 14 | # from wsg_50_common.msg import Status 15 | import rospy, math, cv2, os, pickle 16 | import std_srvs.srv 17 | 18 | 19 | class slip_detector: 20 | def __init__(self): 21 | self.kernal = self.make_kernal(5, 'circle') 22 | self.kernal1 = self.make_kernal(4, 'rect') 23 | self.kernal2 = self.make_kernal(7, 'circle') 24 | self.kernal3 = self.make_kernal(25, 'circle') 25 | self.kernal4 = self.make_kernal(3, 'circle') 26 | self.kernal5 = self.make_kernal(5, 'rect') 27 | self.kernal6 = self.make_kernal(25, 'circle') 28 | self.kernal_size = 25 29 | self.kernal7 = self.make_kernal(self.kernal_size, 'circle') 30 | self.kernal8 = self.make_kernal(2, 'rect') 31 | self.kernal9 = self.make_kernal(2, 'rect') 32 | self.kernal10 = self.make_kernal(45, 'circle') 33 | self.cols, self.rows, self.cha = 320, 427, 3 34 | self.scale = 1 35 | self.con_flag1 = False 36 | self.con_flag2 = False 37 | self.refresh1 = False 38 | self.refresh2 = False 39 | self.restart1 = False 40 | self.restart2 = False 41 | self.slip_indicator1 = False 42 | self.slip_indicator2 = False 43 | self.image_sub1 = rospy.Subscriber("/raspicam_node1/image/compressed", 44 | CompressedImage, 45 | self.call_back1, 46 | queue_size=1, 47 | buff_size=2**24) 48 | self.image_sub2 = rospy.Subscriber("/raspicam_node2/image/compressed", 49 | CompressedImage, 50 | self.call_back2, 51 | queue_size=1, 52 | buff_size=2**24) 53 | 54 | self.collideThre = 1.5 #1.2 55 | self.collide_rotation = 2. #2. 56 | self.marker_thre = 100 57 | self.showimage1 = False 58 | self.showimage2 = False 59 | self.data1 = deque(maxlen=75) 60 | self.data2 = deque(maxlen=75) 61 | # self.timestamp1 = deque(maxlen=75) 62 | # self.markermotion1 = deque(maxlen=75) 63 | # self.image2save2 = deque(maxlen=75) 64 | # self.timestamp2 = deque(maxlen=75) 65 | # self.markermotion2 = deque(maxlen=75) 66 | 67 | def rgb2gray(self, rgb): 68 | return np.dot(rgb[..., :3], [0.33, 0.33, 0.34]) 69 | 70 | def make_kernal(self, n, type): 71 | if type is 'circle': 72 | kernal = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (n, n)) 73 | else: 74 | kernal = cv2.getStructuringElement(cv2.MORPH_RECT, (n, n)) 75 | return kernal 76 | 77 | def defect_mask(self, im_cal): 78 | pad = 60 79 | var0 = 60 #left up 80 | var1 = 60 # right up 81 | var2 = 65 # right down 82 | var3 = 60 # left down 83 | im_mask = np.ones((im_cal.shape)) 84 | triangle0 = np.array([[0, 0], [var0, 0], [0, var0]]) 85 | triangle1 = np.array([[im_mask.shape[1] - var1, 0], 86 | [im_mask.shape[1], 0], [im_mask.shape[1], var1]]) 87 | triangle2 = np.array([[im_mask.shape[1] - var2, im_mask.shape[0]], [im_mask.shape[1], im_mask.shape[0]], \ 88 | [im_mask.shape[1], im_mask.shape[0]-var2]]) 89 | triangle3 = np.array([[0, im_mask.shape[0]], 90 | [0, im_mask.shape[0] - var3], 91 | [var3, im_mask.shape[0]]]) 92 | color = [0] #im_mask 93 | cv2.fillConvexPoly(im_mask, triangle0, color) 94 | cv2.fillConvexPoly(im_mask, triangle1, color) 95 | cv2.fillConvexPoly(im_mask, triangle2, color) 96 | cv2.fillConvexPoly(im_mask, triangle3, color) 97 | im_mask[:pad, :] = 0 98 | im_mask[-pad:, :] = 0 99 | # im_mask[:, :pad] = 0 100 | im_mask[:, -pad:] = 0 101 | return im_mask 102 | 103 | def make_thre_mask(self, im_cal): 104 | thre_image = np.zeros(im_cal.shape, dtype=np.uint8) 105 | previous_mask = np.zeros(im_cal.shape, dtype=np.uint8) 106 | for i in range(10, 80, 30): 107 | _, mask = cv2.threshold(im_cal.astype(np.uint8), i, 255, 108 | cv2.THRESH_BINARY_INV) 109 | mask_expand = cv2.dilate(mask, self.kernal10, iterations=1) 110 | mask_erode = cv2.erode(mask_expand, self.kernal10, iterations=1) 111 | thre_image += (mask_erode - previous_mask) / 255 * i 112 | previous_mask = mask_erode 113 | # cv2.imshow('threshold', thre_image) 114 | # cv2.waitKey(0) 115 | thre_image += (np.ones(im_cal.shape, dtype=np.uint8) - 116 | previous_mask / 255) * 80 + 10 117 | 118 | return thre_image 119 | 120 | def creat_mask_2(self, raw_image, dmask): 121 | # t = time.time() 122 | scale = 2 123 | m, n = raw_image.shape[1], raw_image.shape[0] 124 | raw_image = cv2.pyrDown(raw_image.astype(np.uint8)).astype(np.float32) 125 | blur = cv2.GaussianBlur(raw_image, (25, 25), 0) 126 | blur2 = cv2.GaussianBlur(raw_image, (5, 5), 0) 127 | # print(time.time() - t) 128 | diff = blur - blur2 129 | # diff = cv2.resize(diff, (int(m / scale), int(n / scale))) 130 | # diff = (diff - np.min(diff)) / (np.max(diff) - np.min(diff)) * 255 131 | diff *= 16.0 132 | # cv2.imshow('blur2', blur.astype(np.uint8)) 133 | # cv2.waitKey(1) 134 | 135 | diff[diff < 0.] = 0. 136 | diff[diff > 255.] = 255. 137 | 138 | # diff = cv2.GaussianBlur(diff, (5, 5), 0) 139 | # cv2.imshow('diff', diff.astype(np.uint8)) 140 | # cv2.waitKey(1) 141 | mask = (diff[:, :, 0] > 25) & (diff[:, :, 2] > 25) & (diff[:, :, 1] > 142 | 120) 143 | # cv2.imshow('mask', mask.astype(np.uint8) * 255) 144 | # cv2.waitKey(1) 145 | mask = cv2.resize(mask.astype(np.uint8), (m, n)) 146 | mask = mask * dmask 147 | mask = cv2.dilate(mask, self.kernal4, iterations=1) 148 | 149 | # mask = cv2.erode(mask, self.kernal4, iterations=1) 150 | # print(time.time() - t) 151 | return (1 - mask) * 255 152 | 153 | def find_dots(self, binary_image): 154 | # down_image = cv2.resize(binary_image, None, fx=2, fy=2) 155 | params = cv2.SimpleBlobDetector_Params() 156 | # Change thresholds 157 | params.minThreshold = 1 158 | params.maxThreshold = 12 159 | params.minDistBetweenBlobs = 9 160 | params.filterByArea = True 161 | params.minArea = 9 162 | params.filterByCircularity = False 163 | params.filterByConvexity = False 164 | params.filterByInertia = False 165 | params.minInertiaRatio = 0.5 166 | detector = cv2.SimpleBlobDetector_create(params) 167 | keypoints = detector.detect(binary_image.astype(np.uint8)) 168 | return keypoints 169 | 170 | def flow_calculate_in_contact(self, keypoints2, x_initial, y_initial, 171 | u_ref, v_ref): 172 | x2, y2, u, v, x1_paired, y1_paired, x2_paired, y2_paired = [], [], [], [], [], [], [], [] 173 | 174 | for i in range(len(keypoints2)): 175 | x2.append(keypoints2[i].pt[0] / self.scale) 176 | y2.append(keypoints2[i].pt[1] / self.scale) 177 | 178 | x2 = np.array(x2) 179 | y2 = np.array(y2) 180 | 181 | refresh = False 182 | for i in range(x2.shape[0]): 183 | 184 | distance = list(((np.array(x_initial) - x2[i])**2 + 185 | (np.array(y_initial) - y2[i])**2)) 186 | if len(distance) == 0: 187 | break 188 | min_index = distance.index(min(distance)) 189 | u_temp = x2[i] - x_initial[min_index] 190 | v_temp = y2[i] - y_initial[min_index] 191 | shift_length = np.sqrt(u_temp**2 + v_temp**2) 192 | # print 'length',shift_length 193 | 194 | if shift_length < 12: 195 | # print xy2.shape,min_index,len(distance) 196 | x1_paired.append(x_initial[min_index] - u_ref[min_index]) 197 | y1_paired.append(y_initial[min_index] - v_ref[min_index]) 198 | x2_paired.append(x2[i]) 199 | y2_paired.append(y2[i]) 200 | u.append(u_temp + u_ref[min_index]) 201 | v.append(v_temp + v_ref[min_index]) 202 | 203 | del x_initial[min_index], y_initial[min_index], u_ref[ 204 | min_index], v_ref[min_index] 205 | 206 | if shift_length > 7: 207 | refresh = True 208 | else: 209 | refresh = False 210 | 211 | return x1_paired, y1_paired, x2_paired, y2_paired, u, v, refresh 212 | 213 | def flow_calculate_global(self, keypoints2, x_initial, y_initial, u_ref, 214 | v_ref): 215 | x2, y2, u, v, x1_paired, y1_paired, x2_paired, y2_paired = [], [], [], [], [], [], [], [] 216 | x1_return, y1_return, x2_return, y2_return, u_return, v_return = [],[],[],[],[],[] 217 | 218 | for i in range(len(keypoints2)): 219 | x2.append(keypoints2[i].pt[0] / self.scale) 220 | y2.append(keypoints2[i].pt[1] / self.scale) 221 | 222 | x2 = np.array(x2) 223 | y2 = np.array(y2) 224 | 225 | for i in range(x2.shape[0]): 226 | distance = list(((np.array(x_initial) - x2[i])**2 + 227 | (np.array(y_initial) - y2[i])**2)) 228 | if len(distance) == 0: 229 | break 230 | min_index = distance.index(min(distance)) 231 | u_temp = x2[i] - x_initial[min_index] 232 | v_temp = y2[i] - y_initial[min_index] 233 | shift_length = np.sqrt(u_temp**2 + v_temp**2) 234 | # print 'length',shift_length 235 | if shift_length < 12: 236 | x1_paired.append(x_initial[min_index] - u_ref[min_index]) 237 | y1_paired.append(y_initial[min_index] - v_ref[min_index]) 238 | x2_paired.append(x2[i]) 239 | y2_paired.append(y2[i]) 240 | u.append(u_temp + u_ref[min_index]) 241 | v.append(v_temp + v_ref[min_index]) 242 | 243 | del x_initial[min_index], y_initial[min_index], u_ref[ 244 | min_index], v_ref[min_index] 245 | 246 | x1_return = np.array(x1_paired) 247 | y1_return = np.array(y1_paired) 248 | x2_return = np.array(x2_paired) 249 | y2_return = np.array(y2_paired) 250 | u_return = np.array(u) 251 | v_return = np.array(v) 252 | 253 | return x1_return, y1_return, x2_return, y2_return, u_return, v_return, \ 254 | list(x2_paired), list(y2_paired), np.array(x2_paired), np.array(y2_paired) 255 | # return x1_paired,y1_paired,x2_paired,y2_paired,u,v 256 | 257 | def dispOpticalFlow(self, im_cal, x, y, u, v, name, slip_indicator): 258 | # mask = np.zeros_like(im_cal) 259 | mask2 = np.zeros_like(im_cal) 260 | amf = 1 261 | x = np.array(x).astype(np.int16) 262 | y = np.array(y).astype(np.int16) 263 | for i in range(u.shape[0]): #self.u_sum 264 | 265 | mask2 = cv2.line(mask2, 266 | (int(x[i] + u[i] * amf), int(y[i] + v[i] * amf)), 267 | (x[i], y[i]), [0, 120, 120], 2) 268 | 269 | img = cv2.add(im_cal / 1.5, mask2) 270 | 271 | if slip_indicator: 272 | img = img + self.im_slipsign / 2 273 | 274 | cv2.imshow(name, img.astype(np.uint8)) 275 | cv2.waitKey(1) 276 | 277 | # raw_input("Press Enter to continue...") 278 | 279 | def call_back1(self, data): 280 | t = time.time() 281 | np_arr = np.fromstring(data.data, np.uint8) 282 | raw_imag = cv2.imdecode(np_arr, cv2.IMREAD_COLOR) 283 | 284 | if not self.con_flag1: 285 | 286 | imgwc = np.array(raw_imag).astype(np.float32) 287 | self.im_slipsign = np.zeros(imgwc.shape) 288 | cv2.putText(self.im_slipsign, 'Slip', (210, 30), 289 | cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, 290 | cv2.LINE_AA) 291 | im_gray = self.rgb2gray(imgwc) #.astype(np.uint8) 292 | self.dmask1 = self.defect_mask(im_gray) 293 | final_image = self.creat_mask_2(imgwc, self.dmask1) 294 | keypoints = self.find_dots(final_image) 295 | self.u_sum1 = np.zeros(len(keypoints)) 296 | self.v_sum1 = np.zeros(len(keypoints)) 297 | self.u_addon1 = list(self.u_sum1) 298 | self.v_addon1 = list(self.v_sum1) 299 | self.x1_last1 = [] 300 | self.y1_last1 = [] 301 | for i in range(len(keypoints)): 302 | self.x1_last1.append(keypoints[i].pt[0] / self.scale) 303 | self.y1_last1.append(keypoints[i].pt[1] / self.scale) 304 | self.x_iniref1 = list(self.x1_last1) 305 | self.y_iniref1 = list(self.y1_last1) 306 | self.con_flag1 = True 307 | self.absmotion1 = 0 308 | # print("sensor 1 finishes pre-calculation") 309 | 310 | else: #start detecting slip 311 | # print('time', time.time()) 312 | imgwc = np.array(raw_imag).astype(np.float32) 313 | im_gray = self.rgb2gray(imgwc) #.astype(np.uint8) 314 | ill_back = cv2.GaussianBlur(im_gray, (31, 31), 31) 315 | im_cal = (im_gray - ill_back + 50) * 2 + 20 316 | im_cal = np.clip(im_cal, 0, 255) 317 | im_cal_show = np.array(imgwc) 318 | if self.restart1: 319 | self.con_flag1 = False 320 | self.restart1 = False 321 | self.absmotion1 = 0 322 | self.slip_indicator1 = False 323 | 324 | else: 325 | final_image = self.creat_mask_2(imgwc, self.dmask1) 326 | 327 | if self.refresh1: 328 | keypoints = self.find_dots(final_image) 329 | x1, y1, x2, y2, u, v, self.x_iniref1, self.y_iniref1, self.u_addon1, self.v_addon1\ 330 | = self.flow_calculate_global(keypoints, list(self.x_iniref1), list(self.y_iniref1), \ 331 | list(self.u_addon1), list(self.v_addon1)) 332 | self.refresh1 = False 333 | else: 334 | keypoints = self.find_dots(final_image) 335 | x1, y1, x2, y2, u, v, self.refresh1 = self.flow_calculate_in_contact( 336 | keypoints, list(self.x_iniref1), list(self.y_iniref1), 337 | list(self.u_addon1), list(self.v_addon1)) 338 | 339 | x2_center = np.expand_dims(np.array(x2), axis=1) 340 | y2_center = np.expand_dims(np.array(y2), axis=1) 341 | x1_center = np.expand_dims(np.array(x1), axis=1) 342 | y1_center = np.expand_dims(np.array(y1), axis=1) 343 | # p2_center = np.expand_dims(np.concatenate( 344 | # (x2_center, y2_center), axis=1), 345 | # axis=0) 346 | # p1_center = np.expand_dims(np.concatenate( 347 | # (x1_center, y1_center), axis=1), 348 | # axis=0) 349 | p2_center = np.concatenate((x2_center, y2_center), 350 | axis=1).astype(np.uint16) 351 | p1_center = np.concatenate((x1_center, y1_center), 352 | axis=1).astype(np.uint16) 353 | # tran_matrix = cv2.estimateRigidTransform( 354 | # p1_center, p2_center, False) 355 | theta = 0 356 | try: 357 | tran_matrix = cv2.estimateRigidTransform( 358 | p1_center, p2_center, False) 359 | if tran_matrix is not None: 360 | theta = np.arctan(-tran_matrix[0, 1] / 361 | tran_matrix[0, 0]) * 180. * np.pi 362 | else: 363 | theta = 0 364 | except: 365 | pass 366 | # print('theta1', theta) 367 | # theta = np.arctan( 368 | # -tran_matrix[0, 1] / tran_matrix[0, 0]) * 180. * np.pi 369 | 370 | u_sum, v_sum, uv_sum = np.array(u), np.array(v), np.sqrt( 371 | np.array(u)**2 + np.array(v)**2) 372 | self.absmotion1 = np.mean(uv_sum) 373 | 374 | self.slip_indicator1 = max( 375 | np.abs(np.mean(u_sum)), np.abs(np.mean(v_sum)), 376 | np.abs(np.mean(uv_sum))) > self.collideThre or abs( 377 | theta) > self.collide_rotation 378 | 379 | if self.showimage1: 380 | self.dispOpticalFlow(im_cal_show, x2, y2, u_sum, v_sum, 381 | 'flow1', self.slip_indicator1) 382 | 383 | # if self.slip_indicator1: 384 | # print("sensor 1 slip!!!") 385 | 386 | self.data1.append([raw_imag, time.time(), self.absmotion1]) 387 | 388 | def call_back2(self, data): 389 | t = time.time() 390 | np_arr = np.fromstring(data.data, np.uint8) 391 | raw_imag = cv2.imdecode(np_arr, cv2.IMREAD_COLOR) 392 | 393 | if not self.con_flag2: 394 | 395 | imgwc = np.array(raw_imag).astype(np.float32) 396 | self.im_slipsign = np.zeros(imgwc.shape) 397 | cv2.putText(self.im_slipsign, 'Slip', (210, 30), 398 | cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, 399 | cv2.LINE_AA) 400 | im_gray = self.rgb2gray(imgwc) #.astype(np.uint8) 401 | self.dmask2 = self.defect_mask(im_gray) 402 | final_image = self.creat_mask_2(imgwc, self.dmask2) 403 | keypoints = self.find_dots(final_image) 404 | self.u_sum2 = np.zeros(len(keypoints)) 405 | self.v_sum2 = np.zeros(len(keypoints)) 406 | self.u_addon2 = list(self.u_sum2) 407 | self.v_addon2 = list(self.v_sum2) 408 | self.x1_last2 = [] 409 | self.y1_last2 = [] 410 | for i in range(len(keypoints)): 411 | self.x1_last2.append(keypoints[i].pt[0] / self.scale) 412 | self.y1_last2.append(keypoints[i].pt[1] / self.scale) 413 | self.x_iniref2 = list(self.x1_last2) 414 | self.y_iniref2 = list(self.y1_last2) 415 | self.con_flag2 = True 416 | self.absmotion2 = 0 417 | # print("sensor 2 finishes pre-calculation") 418 | 419 | else: #start detecting slip 420 | # print('time', time.time()) 421 | imgwc = np.array(raw_imag).astype(np.float32) 422 | im_gray = self.rgb2gray(imgwc) #.astype(np.uint8) 423 | ill_back = cv2.GaussianBlur(im_gray, (31, 31), 31) 424 | im_cal = (im_gray - ill_back + 50) * 2 + 20 425 | im_cal = np.clip(im_cal, 0, 255) 426 | im_cal_show = np.array(imgwc) 427 | if self.restart2: 428 | self.con_flag2 = False 429 | self.restart2 = False 430 | self.absmotion2 = 0 431 | self.slip_indicator2 = False 432 | 433 | else: 434 | final_image = self.creat_mask_2(imgwc, self.dmask2) 435 | 436 | if self.refresh2: 437 | keypoints = self.find_dots(final_image) 438 | x1, y1, x2, y2, u, v, self.x_iniref2, self.y_iniref2, self.u_addon2, self.v_addon2\ 439 | = self.flow_calculate_global(keypoints, list(self.x_iniref2), list(self.y_iniref2), \ 440 | list(self.u_addon2), list(self.v_addon2)) 441 | self.refresh2 = False 442 | else: 443 | keypoints = self.find_dots(final_image) 444 | x1, y1, x2, y2, u, v, self.refresh2 = self.flow_calculate_in_contact( 445 | keypoints, list(self.x_iniref2), list(self.y_iniref2), 446 | list(self.u_addon2), list(self.v_addon2)) 447 | 448 | x2_center = np.expand_dims(np.array(x2), axis=1) 449 | y2_center = np.expand_dims(np.array(y2), axis=1) 450 | x1_center = np.expand_dims(np.array(x1), axis=1) 451 | y1_center = np.expand_dims(np.array(y1), axis=1) 452 | # p2_center = np.expand_dims(np.concatenate( 453 | # (x2_center, y2_center), axis=1), 454 | # axis=0) 455 | # p1_center = np.expand_dims(np.concatenate( 456 | # (x1_center, y1_center), axis=1), 457 | # axis=0) 458 | p2_center = np.concatenate((x2_center, y2_center), 459 | axis=1).astype(np.uint16) 460 | p1_center = np.concatenate((x1_center, y1_center), 461 | axis=1).astype(np.uint16) 462 | # tran_matrix = cv2.estimateRigidTransform( 463 | # p1_center, p2_center, False) 464 | theta = 0 465 | try: 466 | tran_matrix = cv2.estimateRigidTransform( 467 | p1_center, p2_center, False) 468 | if tran_matrix is not None: 469 | theta = np.arctan(-tran_matrix[0, 1] / 470 | tran_matrix[0, 0]) * 180. * np.pi 471 | else: 472 | theta = 0 473 | except: 474 | pass 475 | # print('theta2', theta) 476 | u_sum, v_sum, uv_sum = np.array(u), np.array(v), np.sqrt( 477 | np.array(u)**2 + np.array(v)**2) 478 | self.absmotion2 = np.mean(uv_sum) 479 | 480 | self.slip_indicator2 = max( 481 | np.abs(np.mean(u_sum)), np.abs(np.mean(v_sum)), 482 | np.abs(np.mean(uv_sum))) > self.collideThre or abs( 483 | theta) > self.collide_rotation 484 | 485 | if self.showimage2: 486 | self.dispOpticalFlow(im_cal_show, x2, y2, u_sum, v_sum, 487 | 'flow2', self.slip_indicator2) 488 | 489 | # if self.slip_indicator2: 490 | # print("sensor 2 slip!!!") 491 | 492 | self.data2.append([raw_imag, time.time(), self.absmotion2]) 493 | 494 | 495 | def main(): 496 | print "start" 497 | rospy.init_node('slip_detector', anonymous=True) 498 | while not rospy.is_shutdown(): 499 | # time.sleep(2) 500 | # open_gripper() 501 | # time.sleep(1) 502 | # force_initial = 10 503 | # close_gripper_f(50,force_initial) 504 | # time.sleep(0.1) 505 | slip = slip_detector() 506 | rospy.spin() 507 | 508 | 509 | if __name__ == "__main__": 510 | main() 511 | 512 | #%% 513 | -------------------------------------------------------------------------------- /reinforcement learning/TD3/train_corner.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 5 | os.environ["CUDA_VISIBLE_DEVICES"] = '0' 6 | 7 | import torch 8 | import numpy as np 9 | from TD3_corner import TD3 10 | from utils import ReplayBuffer 11 | import os 12 | from packing_enviroment_corner_v3 import Robot_motion, slip_detector, Packing_env 13 | import scipy 14 | import rospy 15 | import cv2 16 | import os.path 17 | import random 18 | from marker_detection import marker_detection 19 | import time 20 | 21 | data_folder = "/media/mcube/SERVER_HD/siyuan/policy_finetune/" 22 | test_mode = True 23 | test_object = 'vitamin' 24 | 25 | 26 | def rgb2gray(rgb): 27 | return np.dot(rgb[..., :3], [0.33, 0.33, 0.34]) 28 | 29 | 30 | def preprocess(state_raw, rows, cols, num_frame, folder_num, state_full, mode, 31 | done, r_matrix, object_name, marker_maker): 32 | 33 | if test_mode: 34 | save_data = False 35 | else: 36 | save_data = True 37 | img2_seq = [] 38 | use_color = True 39 | m, n = 320, 427 40 | pad_m = 145 41 | pad_n = 200 42 | 43 | for i in range(24): 44 | 45 | # if not use_color: 46 | # imgwc_gray = rgb2gray(state_raw[i, :, :, :]) 47 | # else: 48 | imgwc_gray = np.array(state_raw[i, :, :, :]).astype(np.uint8) 49 | # cv2.imshow('imgwc', imgwc.astype(np.uint8)) 50 | # cv2.waitKey(0) 51 | # t0 = time.time() 52 | _, marker_image = marker_maker.marker_detection(imgwc_gray) 53 | # print('max value', np.max(marker_image), marker_image.shape) 54 | # print('time', time.time() - t0) 55 | if not done and save_data: 56 | save_folder = data_folder + object_name + '/' + str(folder_num) 57 | if not os.path.isdir(save_folder): 58 | os.mkdir(save_folder) 59 | cv2.imwrite(save_folder + '/' + str(i) + '.jpg', 60 | state_raw[i, :, :, :]) 61 | cv2.imwrite(save_folder + '/' + 'marker' + str(i) + '.jpg', 62 | marker_image * 255) 63 | 64 | # img2_temp = imgwc_gray[int(m / 2) - pad_m:int(m / 2) + pad_m, 65 | # int(n / 2) - pad_n:int(n / 2) + pad_n, :] 66 | 67 | marker_image = marker_image[int(m / 2) - pad_m:int(m / 2) + pad_m, 68 | int(n / 2) - pad_n:int(n / 2) + pad_n] 69 | 70 | # img2_temp = cv2.resize(img2_temp, (200, 200)).astype(np.float32) 71 | img2_temp = cv2.resize(marker_image, (218, 300)).astype(np.float32) 72 | 73 | img2_temp = np.expand_dims(img2_temp, 2) 74 | # if i == 0 or i == 12: 75 | # mean_2 = np.mean(img2_temp) 76 | # std_2 = np.std(img2_temp) 77 | 78 | # img2_temp = (img2_temp - mean_2) / std_2 79 | if not use_color: 80 | img2_seq.append(img2_temp) 81 | else: 82 | # if i == 0: #or i == 40: 83 | # img2_seq = img2_temp.copy() 84 | # img2_seq = img2_seq.transpose(2, 0, 1) 85 | # else: 86 | # img2_seq = np.concatenate( 87 | # (img2_seq, img2_temp.transpose(2, 0, 1)), axis=0) 88 | 89 | img2_seq.append(img2_temp.transpose(2, 0, 1)) 90 | 91 | img2_temp = np.array(img2_seq) 92 | img2_temp1 = np.expand_dims(img2_temp, axis=0) 93 | # img = np.concatenate((img1,img2),axis = 0) 94 | if not done and save_data: 95 | np.save(save_folder + '/' + 'label.npy', state_full) 96 | np.save(save_folder + '/' + 'r_matrix.npy', r_matrix) 97 | 98 | # signal_quality = (np.mean(img2_temp[:12, :, :, :] * std_2 + mean_2) < 99 | # 40) or (np.mean(img2_temp[12:, :, :, :] * std_2 + mean_2) 100 | # < 40) 101 | signal_quality = False 102 | 103 | X = torch.from_numpy(img2_temp1).type(torch.FloatTensor) 104 | return X, signal_quality 105 | 106 | 107 | def save_buffer(data1, data2, data3, data4, data5, data6, data7, number): 108 | np.savez('/media/mcube/data/Data_packing_RL/buffer/'+str(number)+'.npz', state = data1, action = data2, \ 109 | reward = data3, next_state = data4, done = data5, state_full = data6, next_state_full = data7) 110 | 111 | 112 | def load_buffer(replay_buffer): 113 | folder = '/media/mcube/data/Data_packing_RL/buffer/' 114 | path, dirs, files = next(os.walk(folder)) 115 | for i in range(min(len(files), 950)): 116 | data = np.load(path + files[i]) 117 | replay_buffer.add((torch.from_numpy(data['state']), data['action'], data['reward'], \ 118 | torch.from_numpy(data['next_state']), data['done'], data['state_full'], \ 119 | data['next_state_full'])) 120 | return replay_buffer 121 | 122 | 123 | def check_regrasp(mode, state_full): 124 | if state_full[0] >= 0 and state_full[1] >= 0 and mode == 0: 125 | need_regrasp = True 126 | elif state_full[0] <= 0 and state_full[1] <= 0 and mode > 0: 127 | need_regrasp = True 128 | else: 129 | need_regrasp = False 130 | return need_regrasp 131 | 132 | 133 | def hole_error(): 134 | add_on_x = 2 135 | add_on_y = 2 136 | error_px = [-5., -3., -1., 5., 7., 9.] 137 | # error_px = [2., -3.] 138 | error_py = [5., 3., 1., -5., -7., -9.] 139 | # error_py = [-10., 3.] 140 | error_theta = [-10., -6., -2., 0., 2., 6., 10.] 141 | # error_theta = [-2., 0., 2.] 142 | x_error_temp, y_error_temp, theta_error_temp = np.meshgrid( 143 | error_px, error_py, error_theta) 144 | x_error = x_error_temp.flatten() 145 | y_error = y_error_temp.flatten() 146 | theta_error = theta_error_temp.flatten() 147 | return x_error, y_error, theta_error 148 | 149 | 150 | def train(): 151 | ######### Hyperparameters ######### 152 | env_name = 'policy_test_vitamin_2.1' 153 | log_interval = 1 # print avg reward after interval 154 | random_seed = 0 155 | gamma = 0.99 # discount for future rewards 156 | batch_size = 30 # num of transitions sampled from replay buffer 157 | lr = 1e-5 158 | 159 | if not test_mode: 160 | exploration_noise = 0.1 161 | # exploration_noise = 1. 162 | exploration_noise_min = 0.1 163 | else: 164 | exploration_noise = 0.001 165 | # exploration_noise = 1. 166 | exploration_noise_min = 0.001 167 | polyak = 0.995 # target policy update parameter (1-tau) 168 | policy_noise = 0.2 # target policy smoothing noise 169 | noise_clip = 0.5 170 | policy_delay = 4 # delayed policy updates parameter 171 | max_episodes = 1500 # max num of episodes 172 | max_timesteps = 15 # max timesteps in one episode 173 | directory = "./preTrained/{}".format(env_name) # save trained models 174 | if not os.path.isdir(directory): 175 | os.mkdir(directory) 176 | filename = "TD3_{}_{}".format(env_name, random_seed) 177 | graspForce = 10 178 | 179 | cols, rows = 320, 427 180 | 181 | if not test_mode: 182 | rgrasp_threshold = { 183 | 'circle': 5, 184 | 'ellipse': 5, 185 | 'rectangle': 5, 186 | 'hexagon': 5, 187 | 'vitamin': 10 188 | } 189 | else: 190 | rgrasp_threshold = { 191 | 'circle': 15, 192 | 'ellipse': 15, 193 | 'rectangle': 15, 194 | 'hexagon': 15, 195 | 'vitamin': 15 196 | } 197 | num_frame = 8 198 | 199 | ################################### 200 | 201 | env = Packing_env(num_frame) 202 | robot = Robot_motion() 203 | 204 | state_dim = 8 * 3 * 2 205 | action_dim = 3 206 | max_action = 5.0 207 | 208 | if not (os.path.isfile(directory + '/actor_loss.npy') 209 | and os.path.isfile(directory + '/critic_loss.npy')): 210 | actor_loss_list = [] 211 | critic_loss_list = [] 212 | np.save(directory + '/actor_loss.npy', actor_loss_list) 213 | np.save(directory + '/critic_loss.npy', critic_loss_list) 214 | 215 | policy = TD3(lr, state_dim, action_dim, max_action) 216 | replay_buffer = ReplayBuffer(max_size=800) 217 | marker_maker = marker_detection() 218 | # replay_buffer = load_buffer(replay_buffer) 219 | print('buffer size', replay_buffer.size) 220 | # policy.load(directory, filename) 221 | policy.freeze_cnnlayer() 222 | # policy.print_param() 223 | 224 | # if random_seed: 225 | # print("Random Seed: {}".format(random_seed)) 226 | # env.seed(random_seed) 227 | # torch.manual_seed(random_seed) 228 | # np.random.seed(random_seed) 229 | 230 | # logging variables: 231 | avg_reward = 0 232 | ep_reward = 0 233 | ep_reward_list = [] 234 | ep_object_list = [] 235 | ep_success_list = [] 236 | ep_trialnum_list = [] 237 | ep_model_list = [] 238 | # ep_reward_list = np.load('reward_log.npy').tolist() 239 | inposition = False 240 | success_sign = False 241 | object_name = '' 242 | bad_data = False 243 | # log_f = open("log.txt", "w+") 244 | 245 | x_error_list, y_error_list, theta_error_list = hole_error() 246 | 247 | if test_mode: 248 | max_episodes = len(x_error_list) 249 | x_error_list_ = [] 250 | y_error_list_ = [] 251 | theta_error_list_ = [] 252 | # ep_reward_list = np.load('reward_log.npy').tolist() 253 | # ep_object_list = np.load('object_log.npy').tolist() 254 | # ep_success_list = np.load('success_log.npy').tolist() 255 | # ep_trialnum_list = np.load('trialnum_log.npy').tolist() 256 | # ep_model_list = np.load('mode_log.npy').tolist() 257 | 258 | robot.setSpeed(600, 200) 259 | robot.robot_reset() 260 | robot.open_gripper() 261 | 262 | # training procedure: 263 | sample_counter = 0 264 | 265 | for episode in range(249, max_episodes): 266 | print( 267 | '###########################################################################' 268 | ) 269 | num_trial = 0 270 | if object_name != env.target_object: 271 | inposition = False 272 | if test_mode: 273 | env.target_object = test_object 274 | object_name = env.target_object 275 | 276 | if not test_mode: 277 | rand_pose = np.array([0., 0., random.random() * 15. - 10.]) 278 | graspForce = random.random() * 20 + 10. 279 | else: 280 | rand_pose = np.array([0., 0., 15.]) 281 | graspForce = 10. 282 | robot.pick_up_object(env.target_object, graspForce, inposition, 283 | env.mode, rand_pose) 284 | inposition = True 285 | if test_mode: 286 | env.x_error, env.y_error, env.theta_error = x_error_list[ 287 | episode], y_error_list[episode], theta_error_list[episode] 288 | # env.x_error, env.y_error, env.theta_error = 9., -9., 10. 289 | env_x_error, env_y_error = robot.error_converter( 290 | env.x_error, env.y_error) 291 | # print('converted error', env_x_error, env_y_error) 292 | 293 | state_full = np.array([env.x_error, env.y_error, env.theta_error]) 294 | state, _, done, _, _, _, r_matrix_next = env.step( 295 | [env_x_error, env_y_error, env.theta_error], False) 296 | # raw_input("Press Enter to continue...") 297 | need_regrasp = check_regrasp(robot.mode, state_full) and (not done) 298 | if done: 299 | env.reset(rand_pose) 300 | else: 301 | state, signal_quality = preprocess(state, cols, rows, num_frame, 302 | sample_counter, state_full, 303 | robot.mode, done, r_matrix_next, 304 | object_name, marker_maker) 305 | 306 | trial_number_rgrasp = 0 307 | if sample_counter > 300: 308 | batch_size = 30 309 | # if sample_counter >= 500 and sample_counter < 1000: 310 | # batch_size = 200 311 | # elif sample_counter >= 1000: 312 | # batch_size = 500 313 | 314 | # exploration_noise += -0.01 315 | for t in range(max_timesteps): 316 | # select action and add exploration noise: 317 | 318 | trial_number_rgrasp += 1 319 | num_trial += 1 320 | action = policy.select_action(state) 321 | # action[2] = 0 322 | original_action = action 323 | 324 | action = action + np.random.normal( 325 | 0, 326 | max(exploration_noise, exploration_noise_min), 327 | size=action_dim) * max_action 328 | action = action.clip(-max_action, 329 | max_action) # in gripper frame 330 | # action[2] = 0 331 | # print action.shape 332 | 333 | # take action in env: 334 | # if need_regrasp: 335 | # trial_number_rgrasp += 2 336 | 337 | if trial_number_rgrasp > rgrasp_threshold[env.target_object]: 338 | env.regrasp(graspForce, rand_pose) 339 | # print('regrasp++++++++++++++++++++++++++++++') 340 | trial_number_rgrasp = 0 341 | env.rgrasp_counter = 0 342 | 343 | next_state, reward, done, next_state_full, action_world, r_matrix, r_matrix_next = env.step( 344 | action, True) 345 | 346 | sample_counter += 1 347 | need_regrasp = check_regrasp(robot.mode, 348 | next_state_full) and (not done) 349 | 350 | print('Ep', episode, 'Num', sample_counter, 351 | 'Name', object_name, 'mode', robot.mode, 'A.', 352 | list(action), 'N. A.', list(action_world), 'R.', reward) 353 | print('state_full', state_full) 354 | # raw_input("Press Enter to continue...") 355 | 356 | next_state, signal_quality = preprocess( 357 | next_state, cols, rows, num_frame, sample_counter, 358 | next_state_full, robot.mode, done, r_matrix_next, 359 | object_name, marker_maker) 360 | replay_buffer.add( 361 | (torch.squeeze(state, 362 | 0), np.array(action) / max_action, reward, 363 | torch.squeeze(next_state, 0), float(done), 364 | np.linalg.inv(r_matrix).dot(state_full / max_action), 365 | np.linalg.inv(r_matrix_next).dot(next_state_full / 366 | max_action))) 367 | 368 | # save_buffer(torch.squeeze(state, 0).numpy(), np.array(action) / max_action, reward, torch.squeeze(next_state, 0).numpy(),\ 369 | # float(done), np.linalg.inv(r_matrix).dot(state_full / max_action), np.linalg.inv(r_matrix_next).dot(next_state_full / max_action), sample_counter-1) 370 | 371 | state = next_state 372 | state_full = next_state_full 373 | 374 | # print('reward', reward) 375 | avg_reward += reward 376 | ep_reward += reward 377 | 378 | # if episode is done then update policy: 379 | if done or t == (max_timesteps - 1): 380 | env.reset(rand_pose) 381 | if sample_counter > 100 and not test_mode: 382 | policy.update(replay_buffer, t + 1, batch_size, gamma, 383 | polyak, policy_noise, noise_clip, 384 | policy_delay, directory) 385 | print('NN updated') 386 | trial_number_rgrasp = 0 387 | ep_reward_list.append(500) 388 | ep_object_list.append('gap') 389 | ep_success_list.append(True) 390 | ep_trialnum_list.append(0) 391 | ep_model_list.append(100) 392 | break 393 | 394 | if sample_counter > 100: 395 | policy.unfreeze_cnnlayer() 396 | 397 | if signal_quality: 398 | break 399 | 400 | # logging updates: 401 | # log_f.write('{},{},{},{}\n'.format(episode, ep_reward, t, 402 | # object_name)) 403 | # log_f.flush() 404 | 405 | if reward > 0: 406 | success_sign = True 407 | else: 408 | success_sign = False 409 | 410 | if test_mode: 411 | x_error_list_.append(x_error_list[episode]) 412 | y_error_list_.append(y_error_list[episode]) 413 | theta_error_list_.append(theta_error_list[episode]) 414 | ep_reward_list.append(ep_reward) 415 | ep_object_list.append(object_name) 416 | ep_success_list.append(success_sign) 417 | ep_trialnum_list.append(num_trial) 418 | ep_model_list.append(robot.mode) 419 | np.save(directory + '/reward_log.npy', ep_reward_list) 420 | np.save(directory + '/object_log.npy', ep_object_list) 421 | np.save(directory + '/success_log.npy', ep_success_list) 422 | np.save(directory + '/trialnum_log.npy', ep_trialnum_list) 423 | np.save(directory + '/mode_log.npy', ep_model_list) 424 | if test_mode: 425 | np.save(directory + '/x_error_log.npy', x_error_list_) 426 | np.save(directory + '/y_error_log.npy', y_error_list_) 427 | np.save(directory + '/theta_error_log.npy', theta_error_list_) 428 | 429 | ep_reward = 0 430 | # if avg reward > 300 then save and stop traning: 431 | if (avg_reward / log_interval) >= 800: 432 | print("########## Solved! ###########") 433 | name = filename + '_solved' 434 | policy.save(directory, name) 435 | # log_f.close() 436 | break 437 | 438 | if episode % 10 == 0 and not test_mode: 439 | policy.save(directory, filename) 440 | 441 | # print avg reward every log interval: 442 | if episode % log_interval == 0: 443 | avg_reward = int(avg_reward / log_interval) 444 | print("Episode: {}\tAverage Reward: {}".format( 445 | episode, avg_reward)) 446 | avg_reward = 0 447 | 448 | if signal_quality: 449 | break 450 | 451 | 452 | if __name__ == '__main__': 453 | rospy.init_node('Dense_packing_with_RL', anonymous=True) 454 | train() -------------------------------------------------------------------------------- /reinforcement learning/TD3/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class ReplayBuffer: 5 | def __init__(self, max_size=1000): 6 | self.buffer = [] 7 | self.max_size = int(max_size) 8 | self.size = 0 9 | 10 | def add(self, transition): 11 | self.size += 1 12 | # transiton is tuple of (state, action, reward, next_state, done) 13 | self.buffer.append(transition) 14 | 15 | def sample(self, batch_size): 16 | # delete 1/5th of the buffer when full 17 | if self.size > self.max_size: 18 | del self.buffer[0:int(self.size / 5)] 19 | self.size = len(self.buffer) 20 | 21 | indexes = np.random.randint(0, len(self.buffer), size=batch_size) 22 | state, action, reward, next_state, done, state_full, next_state_full = [], [], [], [], [], [], [] 23 | 24 | for i in indexes: 25 | s, a, r, s_, d, s_f, s_full_ = self.buffer[i] 26 | state.append(np.array(s, copy=False)) 27 | action.append(np.array(a, copy=False)) 28 | reward.append(np.array(r, copy=False)) 29 | next_state.append(np.array(s_, copy=False)) 30 | done.append(np.array(d, copy=False)) 31 | state_full.append(np.array(s_f, copy=False)) 32 | next_state_full.append(np.array(s_full_, copy=False)) 33 | 34 | return np.array(state), np.array(action), np.array(reward), np.array( 35 | next_state), np.array(done), np.array(state_full), np.array( 36 | next_state_full) 37 | -------------------------------------------------------------------------------- /supervised_learning/cnn_encoder_epoch20.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/siyuandong16/Tactile_insertion_with_RL/775f7e45fcd4f438c8aec25a82ff6b6095bc65c8/supervised_learning/cnn_encoder_epoch20.pth -------------------------------------------------------------------------------- /supervised_learning/crnn_model.py: -------------------------------------------------------------------------------- 1 | ##pytorch cnn+ lstm 2 | 3 | import argparse 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import torch.optim as optim 8 | from torchvision import datasets, transforms 9 | from torch.autograd import Variable 10 | import numpy as np 11 | from utils_network import * 12 | 13 | ########################## Model for CNN LSTM ########################################################## 14 | 15 | 16 | # 2D CNN encoder train from scratch (no transfer learning) 17 | class EncoderCNN(nn.Module): 18 | def __init__(self, img_x=84, img_y=84, fc_hidden1=512, fc_hidden2=512, drop_p=0.3, CNN_embed_dim=300): 19 | super(EncoderCNN, self).__init__() 20 | 21 | self.img_x = img_x 22 | self.img_y = img_y 23 | self.CNN_embed_dim = CNN_embed_dim 24 | 25 | # CNN architechtures 26 | self.ch1, self.ch2, self.ch3, self.ch4 = 32, 64, 128, 256 27 | self.k1, self.k2, self.k3, self.k4 = (5, 5), (3, 3), (3, 3), (3, 3) # 2d kernal size 28 | self.s1, self.s2, self.s3, self.s4 = (2, 2), (2, 2), (2, 2), (2, 2) # 2d strides 29 | self.pd1, self.pd2, self.pd3, self.pd4 = (0, 0), (0, 0), (0, 0), (0, 0) # 2d padding 30 | 31 | # conv2D output shapes 32 | self.conv1_outshape = conv2D_output_size((self.img_x, self.img_y), self.pd1, self.k1, self.s1) # Conv1 output shape 33 | self.conv2_outshape = conv2D_output_size(self.conv1_outshape, self.pd2, self.k2, self.s2) 34 | self.conv3_outshape = conv2D_output_size(self.conv2_outshape, self.pd3, self.k3, self.s3) 35 | self.conv4_outshape = conv2D_output_size(self.conv3_outshape, self.pd4, self.k4, self.s4) 36 | 37 | # fully connected layer hidden nodes 38 | self.fc_hidden1, self.fc_hidden2 = fc_hidden1, fc_hidden2 39 | self.drop_p = drop_p 40 | 41 | self.conv1 = nn.Sequential( 42 | nn.Conv2d(in_channels=3, out_channels=self.ch1, kernel_size=self.k1, stride=self.s1, padding=self.pd1), 43 | nn.BatchNorm2d(self.ch1, momentum=0.01), 44 | nn.ReLU(inplace=True), 45 | # nn.MaxPool2d(kernel_size=2), 46 | ) 47 | self.conv2 = nn.Sequential( 48 | nn.Conv2d(in_channels=self.ch1, out_channels=self.ch2, kernel_size=self.k2, stride=self.s2, padding=self.pd2), 49 | nn.BatchNorm2d(self.ch2, momentum=0.01), 50 | nn.ReLU(inplace=True), 51 | # nn.MaxPool2d(kernel_size=2), 52 | ) 53 | 54 | self.conv3 = nn.Sequential( 55 | nn.Conv2d(in_channels=self.ch2, out_channels=self.ch3, kernel_size=self.k3, stride=self.s3, padding=self.pd3), 56 | nn.BatchNorm2d(self.ch3, momentum=0.01), 57 | nn.ReLU(inplace=True), 58 | # nn.MaxPool2d(kernel_size=2), 59 | ) 60 | 61 | self.conv4 = nn.Sequential( 62 | nn.Conv2d(in_channels=self.ch3, out_channels=self.ch4, kernel_size=self.k4, stride=self.s4, padding=self.pd4), 63 | nn.BatchNorm2d(self.ch4, momentum=0.01), 64 | nn.ReLU(inplace=True), 65 | # nn.MaxPool2d(kernel_size=2), 66 | ) 67 | 68 | self.drop = nn.Dropout2d(self.drop_p) 69 | self.pool = nn.MaxPool2d(2) 70 | self.fc1 = nn.Linear(self.ch4 * self.conv4_outshape[0] * self.conv4_outshape[1], self.fc_hidden1) # fully connected layer, output k classes 71 | self.fc2 = nn.Linear(self.fc_hidden1, self.fc_hidden2) 72 | self.fc3 = nn.Linear(self.fc_hidden2, self.CNN_embed_dim) # output = CNN embedding latent variables 73 | 74 | def forward(self, x_3d): 75 | cnn_embed_seq = [] 76 | for t in range(x_3d.size(1)): 77 | # CNNs 78 | x = self.conv1(x_3d[:, t, :, :, :]) 79 | x = self.conv2(x) 80 | x = self.conv3(x) 81 | x = self.conv4(x) 82 | x = x.view(x.size(0), -1) # flatten the output of conv 83 | 84 | # FC layers 85 | x = F.relu(self.fc1(x)) 86 | # x = F.dropout(x, p=self.drop_p, training=self.training) 87 | x = F.relu(self.fc2(x)) 88 | # x = F.dropout(x, p=self.drop_p, training=self.training) 89 | x = self.fc3(x) 90 | cnn_embed_seq.append(x) 91 | 92 | # swap time and sample dim such that (sample dim, time dim, CNN latent dim) 93 | cnn_embed_seq = torch.stack(cnn_embed_seq, dim=0).transpose_(0, 1) 94 | # cnn_embed_seq: shape=(batch, time_step, input_size) 95 | 96 | return cnn_embed_seq 97 | 98 | 99 | class DecoderRNN(nn.Module): 100 | def __init__(self, CNN_embed_dim=300, h_RNN_layers=3, h_RNN=256, h_FC_dim=128, drop_p=0.3, output_dim=3): 101 | super(DecoderRNN, self).__init__() 102 | 103 | self.RNN_input_size = CNN_embed_dim 104 | self.h_RNN_layers = h_RNN_layers # RNN hidden layers 105 | self.h_RNN = h_RNN # RNN hidden nodes 106 | self.h_FC_dim = h_FC_dim 107 | self.drop_p = drop_p 108 | self.output_dim = output_dim 109 | 110 | self.LSTM = nn.LSTM( 111 | input_size=self.RNN_input_size, 112 | hidden_size=self.h_RNN, 113 | num_layers=h_RNN_layers, 114 | batch_first=True, # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size) 115 | ) 116 | 117 | self.fc1 = nn.Linear(self.h_RNN, self.h_FC_dim) 118 | self.fc2 = nn.Linear(self.h_FC_dim, self.output_dim) 119 | 120 | def forward(self, x_RNN): 121 | 122 | self.LSTM.flatten_parameters() 123 | RNN_out, (h_n, h_c) = self.LSTM(x_RNN, None) 124 | """ h_n shape (n_layers, batch, hidden_size), h_c shape (n_layers, batch, hidden_size) """ 125 | """ None represents zero initial hidden state. RNN_out has shape=(batch, time_step, output_size) """ 126 | 127 | # FC layers 128 | x = self.fc1(RNN_out[:, -1, :]) # choose RNN_out at the last time step 129 | x = F.relu(x) 130 | # x = F.dropout(x, p=self.drop_p, training=self.training) 131 | x = self.fc2(x) 132 | 133 | return x -------------------------------------------------------------------------------- /supervised_learning/crnn_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 3 | os.environ["CUDA_VISIBLE_DEVICES"]='10,11' 4 | import sys 5 | 6 | import numpy as np 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | import torch.optim as optim 11 | from torch.optim import lr_scheduler 12 | from torch.autograd import Variable 13 | import torchvision 14 | from torchvision import datasets, transforms 15 | import cv2 16 | from utils_siyuan import data_selection 17 | from networkmodels import DecoderRNN, EncoderCNN 18 | import matplotlib 19 | gui_env = ['TKAgg','GTKAgg','Qt4Agg','WXAgg'] 20 | for gui in gui_env: 21 | try: 22 | print("testing", gui) 23 | matplotlib.use(gui,warn=False, force=True) 24 | from matplotlib import pyplot as plt 25 | break 26 | except: 27 | continue 28 | print("Using:",matplotlib.get_backend()) 29 | import matplotlib.pyplot as plt 30 | 31 | 32 | 33 | 34 | def rgb2gray(rgb): 35 | return np.dot(rgb[...,:3], [0.33, 0.33, 0.34]) 36 | 37 | def read_img(data_path): 38 | img2_seq = [] 39 | use_color = True 40 | # range_list = list(range(9,24,2)) #+ list(range(33,48,2)) 41 | range_list = list(range(0, 45, 6)) + list(range(45, 90, 6)) 42 | for i in range_list: 43 | img = cv2.imread(data_path+str(i)+'.jpg') 44 | img = img[30:-30, 30:-30, :] 45 | if not use_color: 46 | imgwc_gray = rgb2gray(img) 47 | else: 48 | imgwc_gray = np.array(img).astype(np.float32) 49 | # img2_temp = scipy.misc.imresize(imgwc_gray,(84,84)) 50 | img2_temp = cv2.resize(imgwc_gray, (84, 84)) 51 | # print(img2_temp.shape) 52 | # cv2.imwrite('image_test.jpg', (img2_temp.astype(np.uint8))) 53 | if i == 0 or i == 45: 54 | mean_2 = np.mean(img2_temp) 55 | std_2 = np.std(img2_temp) 56 | 57 | img2_temp = (img2_temp-mean_2)/std_2 58 | if not use_color: 59 | img2_seq.append(img2_temp) 60 | else: 61 | img2_seq.append(img2_temp.transpose(2,0,1)) 62 | # print(img2_seq.shape) 63 | img2_temp = np.array(img2_seq) 64 | img2_temp = np.expand_dims(img2_temp, axis = 0) 65 | # print("shape", img2_temp.shape) 66 | # cv2.imwrite('image_test.jpg', ((img2_temp[:3,:,:].transpose(1,2,0)+1)*255/2.).astype(np.uint8)) 67 | #~ img2_temp1 = np.expand_dims(img2_temp, axis=0) 68 | X = torch.from_numpy(img2_temp).type(torch.FloatTensor) 69 | return X 70 | 71 | 72 | def run(): 73 | CNN_fc_hidden1, CNN_fc_hidden2 = 1024, 768 74 | CNN_embed_dim = 512 # latent dim extracted by 2D CNN 75 | img_x, img_y = 84,84 # 76 | dropout_p = 0.3 # dropout probability 77 | 78 | # DecoderRNN architecture 79 | RNN_hidden_layers = 3 80 | RNN_hidden_nodes = 512 81 | RNN_FC_dim = 256 82 | k = 3 83 | 84 | use_cuda = torch.cuda.is_available() # check if GPU exists 85 | device = torch.device("cuda:0" if use_cuda else "cpu") # use CPU or GPU 86 | 87 | cnn_encoder = EncoderCNN(img_x=img_x, img_y=img_y, fc_hidden1=CNN_fc_hidden1, fc_hidden2=CNN_fc_hidden2, 88 | drop_p=dropout_p, CNN_embed_dim=CNN_embed_dim).to(device) 89 | 90 | rnn_decoder = DecoderRNN(CNN_embed_dim=CNN_embed_dim, h_RNN_layers=RNN_hidden_layers, h_RNN=RNN_hidden_nodes, 91 | h_FC_dim=RNN_FC_dim, drop_p=dropout_p, output_dim=k).to(device) 92 | cnn_encoder = nn.DataParallel(cnn_encoder) 93 | rnn_decoder = nn.DataParallel(rnn_decoder) 94 | 95 | # model = torch.load('/home/ubuntu/packing/weights/best_model_color_small.pt') 96 | cnn_encoder.load_state_dict(torch.load('weights/cnn_encoder_epoch20.pth')) # 97 | rnn_decoder.load_state_dict(torch.load('weights/rnn_decoder_epoch20.pth')) # 98 | cnn_encoder.eval() 99 | rnn_decoder.eval() 100 | loss_function = nn.MSELoss() 101 | 102 | # model = nn.DataParallel(model) 103 | loss_list = [] 104 | label_list = [] 105 | prediction_list = [] 106 | for i in range(1300,1400): 107 | try: 108 | root = "/home/ubuntu/packing/data/data_newsensor_3/" 109 | index = i 110 | label_true = np.load(root + str(index) + '/label_true.npy') 111 | # r_matrix = np.load(root + str(index) + '/r_matrix.npy') 112 | 113 | # if label_true[0] > 0: 114 | # label_true[0] = 0 115 | # if label_true[1] < 0: 116 | # label_true[1] = 0 117 | 118 | # label_correct = r_matrix.dot(np.array([-label[0], -label[1]])) 119 | # label_true = np.array([label_correct[0], label_correct[1], -label[2]]) 120 | label_true[:2] /= 10. 121 | label_true[2] /= 25. 122 | labels = Variable(torch.from_numpy(label_true).type(torch.FloatTensor).cuda()) 123 | # print("corrected label", label_true) 124 | 125 | 126 | X = read_img(root + str(index) + '/') 127 | inputs = Variable(X.cuda()) 128 | error_predicted = rnn_decoder(cnn_encoder(inputs)) 129 | loss = loss_function(error_predicted.squeeze(),labels) 130 | label_list.append(np.array(label_true)*10) 131 | prediction_list.append(error_predicted.cpu().data[0].numpy()*10) 132 | print(error_predicted.cpu().data[0].numpy()*10) 133 | print(np.array(label_true)*10) 134 | loss_list.append(loss) 135 | except: 136 | pass 137 | # print(loss) 138 | print(sum(loss_list)/len(loss_list)) 139 | label_list = np.array(label_list) 140 | prediction_list = np.array(prediction_list) 141 | plt.figure() 142 | plt.plot(label_list[:,1],'ro') 143 | plt.hold = True 144 | plt.plot(prediction_list[:,1],'go') 145 | plt.show() 146 | 147 | 148 | 149 | if __name__ == '__main__': 150 | # need to add argparse 151 | run() 152 | -------------------------------------------------------------------------------- /supervised_learning/image_processing/0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/siyuandong16/Tactile_insertion_with_RL/775f7e45fcd4f438c8aec25a82ff6b6095bc65c8/supervised_learning/image_processing/0.jpg -------------------------------------------------------------------------------- /supervised_learning/image_processing/marker_detection.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import time 4 | import matplotlib.pyplot as plt 5 | import cv2 6 | 7 | class marker_detection: 8 | def __init__(self): 9 | self.kernal = self.make_kernal(3, 'circle') 10 | 11 | def make_kernal(self, n, type): 12 | if type is 'circle': 13 | kernal = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (n, n)) 14 | else: 15 | kernal = cv2.getStructuringElement(cv2.MORPH_RECT, (n, n)) 16 | return kernal 17 | 18 | def creat_mask(self, raw_image): 19 | scale = 2 20 | m, n = raw_image.shape[1], raw_image.shape[0] 21 | raw_image = cv2.pyrDown(raw_image).astype(np.float32) 22 | blur = cv2.GaussianBlur(raw_image, (25, 25), 0) 23 | blur2 = cv2.GaussianBlur(raw_image, (5, 5), 0) 24 | diff = blur - blur2 25 | diff *= 16.0 26 | diff[diff < 0.] = 0. 27 | diff[diff > 255.] = 255. 28 | mask = (diff[:, :, 0] > 25) & (diff[:, :, 2] > 25) & (diff[:, :, 1] > 29 | 120) 30 | mask = cv2.resize(mask.astype(np.uint8), (m, n)) 31 | # mask = mask * dmask 32 | mask = cv2.dilate(mask, self.kernal, iterations=1) 33 | return (1 - mask) * 255 34 | 35 | def find_dots(self, binary_image): 36 | params = cv2.SimpleBlobDetector_Params() 37 | # Change thresholds 38 | params.minThreshold = 1 39 | params.maxThreshold = 12 40 | params.minDistBetweenBlobs = 9 41 | params.filterByArea = True 42 | params.minArea = 9 43 | params.filterByCircularity = False 44 | params.filterByConvexity = False 45 | params.filterByInertia = False 46 | params.minInertiaRatio = 0.5 47 | detector = cv2.SimpleBlobDetector_create(params) 48 | keypoints = detector.detect(binary_image.astype(np.uint8)) 49 | return keypoints 50 | 51 | def draw_mask(self, img, keypoints): 52 | img = np.zeros_like(img[:, :, 0]) 53 | for i in range(len(keypoints)): 54 | cv2.ellipse(img,(int(keypoints[i].pt[0]), int(keypoints[i].pt[1])), 55 | (3, 3), 0, 0, 360, (1), -1) 56 | return img 57 | 58 | def marker_detection(self, raw_imag): 59 | 60 | img = np.array(raw_imag).astype(np.float32) 61 | mask1 = self.creat_mask(img) 62 | keypoints = self.find_dots(mask1) 63 | mask2 = self.draw_mask(img, keypoints) 64 | return mask1, mask2 65 | 66 | 67 | 68 | 69 | if __name__ == "__main__": 70 | detector = marker_detection() 71 | im = cv2.imread('/homes/jha/Dropbox/Packing_RL_data/data_newsensor_14/hexagon/166/2.jpg') 72 | mask1, mask2 = detector.marker_detection(im) 73 | 74 | print("Image size..", mask2.shape) 75 | 76 | cv2.imshow('marker mask1', mask1) 77 | cv2.imshow('marker mask2', mask2) 78 | cv2.waitKey(0) 79 | 80 | #%% 81 | -------------------------------------------------------------------------------- /supervised_learning/image_processing/marker_flow.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | import matplotlib.pyplot as plt 4 | import math, cv2, os 5 | import copy 6 | from scipy.interpolate import griddata 7 | 8 | 9 | class marker_flow: 10 | def __init__(self): 11 | self.kernel1 = self.make_kernel(3, 'circle') 12 | self.kernel2 = self.make_kernel(45, 'circle') 13 | self.scale = 1 14 | self.refresh1 = False 15 | self.refresh2 = False 16 | self.marker_thre = 100 17 | 18 | def rgb2gray(self, rgb): 19 | return np.dot(rgb[..., :3], [0.33, 0.33, 0.34]) 20 | 21 | def make_kernel(self, n, type): 22 | if type is 'circle': 23 | kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (n, n)) 24 | else: 25 | kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (n, n)) 26 | return kernel 27 | 28 | def defect_mask(self, im_cal): 29 | pad = 60 30 | var0 = 60 #left up 31 | var1 = 60 # right up 32 | var2 = 65 # right down 33 | var3 = 60 # left down 34 | im_mask = np.ones((im_cal.shape)) 35 | triangle0 = np.array([[0, 0], [var0, 0], [0, var0]]) 36 | triangle1 = np.array([[im_mask.shape[1] - var1, 0], 37 | [im_mask.shape[1], 0], [im_mask.shape[1], var1]]) 38 | triangle2 = np.array([[im_mask.shape[1] - var2, im_mask.shape[0]], [im_mask.shape[1], im_mask.shape[0]], \ 39 | [im_mask.shape[1], im_mask.shape[0]-var2]]) 40 | triangle3 = np.array([[0, im_mask.shape[0]], 41 | [0, im_mask.shape[0] - var3], 42 | [var3, im_mask.shape[0]]]) 43 | color = [0] #im_mask 44 | cv2.fillConvexPoly(im_mask, triangle0, color) 45 | cv2.fillConvexPoly(im_mask, triangle1, color) 46 | cv2.fillConvexPoly(im_mask, triangle2, color) 47 | cv2.fillConvexPoly(im_mask, triangle3, color) 48 | im_mask[:pad, :] = 0 49 | im_mask[-pad:, :] = 0 50 | im_mask[:, :pad] = 0 51 | im_mask[:, -pad:] = 0 52 | return im_mask 53 | 54 | def make_thre_mask(self, im_cal): 55 | thre_image = np.zeros(im_cal.shape, dtype=np.uint8) 56 | previous_mask = np.zeros(im_cal.shape, dtype=np.uint8) 57 | for i in range(10, 80, 30): 58 | _, mask = cv2.threshold(im_cal.astype(np.uint8), i, 255, 59 | cv2.THRESH_BINARY_INV) 60 | mask_expand = cv2.dilate(mask, self.kernel2, iterations=1) 61 | mask_erode = cv2.erode(mask_expand, self.kernel2, iterations=1) 62 | thre_image += (mask_erode - previous_mask) / 255 * i 63 | previous_mask = mask_erode 64 | # cv2.imshow('threshold', thre_image) 65 | # cv2.waitKey(0) 66 | thre_image += (np.ones(im_cal.shape, dtype=np.uint8) - 67 | previous_mask / 255) * 80 + 10 68 | 69 | return thre_image 70 | 71 | def creat_mask_2(self, raw_image, dmask): 72 | scale = 2 73 | m, n = raw_image.shape[1], raw_image.shape[0] 74 | raw_image = cv2.pyrDown(raw_image).astype(np.float32) 75 | blur = cv2.GaussianBlur(raw_image, (25, 25), 0) 76 | blur2 = cv2.GaussianBlur(raw_image, (5, 5), 0) 77 | diff = blur - blur2 78 | diff *= 16.0 79 | diff[diff < 0.] = 0. 80 | diff[diff > 255.] = 255. 81 | mask = (diff[:, :, 0] > 25) & (diff[:, :, 2] > 25) & (diff[:, :, 1] > 82 | 120) 83 | mask = cv2.resize(mask.astype(np.uint8), (m, n)) 84 | mask = mask * dmask 85 | mask = cv2.dilate(mask, self.kernel1, iterations=1) 86 | return (1 - mask) * 255 87 | 88 | def find_dots(self, binary_image): 89 | # down_image = cv2.resize(binary_image, None, fx=2, fy=2) 90 | params = cv2.SimpleBlobDetector_Params() 91 | # Change thresholds 92 | params.minThreshold = 1 93 | params.maxThreshold = 12 94 | params.minDistBetweenBlobs = 9 95 | params.filterByArea = True 96 | params.minArea = 9 97 | params.filterByCircularity = False 98 | params.filterByConvexity = False 99 | params.filterByInertia = False 100 | params.minInertiaRatio = 0.5 101 | detector = cv2.SimpleBlobDetector_create(params) 102 | keypoints = detector.detect(binary_image.astype(np.uint8)) 103 | return keypoints 104 | 105 | def flow_calculate_in_contact(self, keypoints2, x_initial, y_initial, 106 | u_ref, v_ref): 107 | x2, y2, u, v, x1_paired, y1_paired, x2_paired, y2_paired = [], [], [], [], [], [], [], [] 108 | 109 | for i in range(len(keypoints2)): 110 | x2.append(keypoints2[i].pt[0] / self.scale) 111 | y2.append(keypoints2[i].pt[1] / self.scale) 112 | 113 | x2 = np.array(x2) 114 | y2 = np.array(y2) 115 | index_list = [] 116 | 117 | for i in range(x2.shape[0]): 118 | 119 | distance = list(((np.array(x_initial) - x2[i])**2 + 120 | (np.array(y_initial) - y2[i])**2)) 121 | if len(distance) == 0: 122 | break 123 | min_index = distance.index(min(distance)) 124 | u_temp = x2[i] - x_initial[min_index] 125 | v_temp = y2[i] - y_initial[min_index] 126 | shift_length = np.sqrt(u_temp**2 + v_temp**2) 127 | # print 'length',shift_length 128 | 129 | if shift_length < 12: 130 | # print xy2.shape,min_index,len(distance) 131 | x1_paired.append(x_initial[min_index] - u_ref[min_index]) 132 | y1_paired.append(y_initial[min_index] - v_ref[min_index]) 133 | x2_paired.append(x2[i]) 134 | y2_paired.append(y2[i]) 135 | u.append(u_temp + u_ref[min_index]) 136 | v.append(v_temp + v_ref[min_index]) 137 | index_list.append(self.index_list[min_index]) 138 | 139 | if shift_length > 7: 140 | refresh = True 141 | else: 142 | refresh = False 143 | 144 | return x1_paired, y1_paired, x2_paired, y2_paired, u, v, refresh, index_list 145 | 146 | def flow_calculate_global(self, keypoints2, x_initial, y_initial, u_ref, 147 | v_ref): 148 | x2, y2, u, v, x1_paired, y1_paired, x2_paired, y2_paired = [], [], [], [], [], [], [], [] 149 | x1_return, y1_return, x2_return, y2_return, u_return, v_return = [],[],[],[],[],[] 150 | 151 | for i in range(len(keypoints2)): 152 | x2.append(keypoints2[i].pt[0] / self.scale) 153 | y2.append(keypoints2[i].pt[1] / self.scale) 154 | 155 | x2 = np.array(x2) 156 | y2 = np.array(y2) 157 | index_list = [] 158 | 159 | for i in range(x2.shape[0]): 160 | distance = list(((np.array(x_initial) - x2[i])**2 + 161 | (np.array(y_initial) - y2[i])**2)) 162 | if len(distance) == 0: 163 | break 164 | min_index = distance.index(min(distance)) 165 | u_temp = x2[i] - x_initial[min_index] 166 | v_temp = y2[i] - y_initial[min_index] 167 | shift_length = np.sqrt(u_temp**2 + v_temp**2) 168 | # print 'length',shift_length 169 | if shift_length < 12: 170 | x1_paired.append(x_initial[min_index] - u_ref[min_index]) 171 | y1_paired.append(y_initial[min_index] - v_ref[min_index]) 172 | x2_paired.append(x2[i]) 173 | y2_paired.append(y2[i]) 174 | u.append(u_temp + u_ref[min_index]) 175 | v.append(v_temp + v_ref[min_index]) 176 | index_list.append(self.index_list[min_index]) 177 | 178 | # del x_initial[min_index], y_initial[min_index], u_ref[ 179 | # min_index], v_ref[min_index] 180 | 181 | x1_return = np.array(x1_paired) 182 | y1_return = np.array(y1_paired) 183 | x2_return = np.array(x2_paired) 184 | y2_return = np.array(y2_paired) 185 | u_return = np.array(u) 186 | v_return = np.array(v) 187 | 188 | return x1_return, y1_return, x2_return, y2_return, u_return, v_return, \ 189 | list(x2_paired), list(y2_paired), np.array(x2_paired), np.array(y2_paired), index_list 190 | # return x1_paired,y1_paired,x2_paired,y2_paired,u,v 191 | 192 | def dispOpticalFlow(self, im_cal, x, y, u, v, name): 193 | # mask = np.zeros_like(im_cal) 194 | mask2 = np.zeros_like(im_cal) 195 | amf = 3 196 | x = np.array(x).astype(np.int16) 197 | y = np.array(y).astype(np.int16) 198 | for i in range(u.shape[0]): #self.u_sum 199 | 200 | mask2 = cv2.line(mask2, 201 | (int(x[i] + u[i] * amf), int(y[i] + v[i] * amf)), 202 | (x[i], y[i]), [0, 120, 120], 2) 203 | 204 | img = cv2.add(im_cal, mask2) 205 | 206 | cv2.imshow(name, img.astype(np.uint8)) 207 | cv2.waitKey(0) 208 | 209 | def main(self, raw_imag, first_image): 210 | 211 | if first_image: 212 | 213 | imgwc = np.array(raw_imag).astype(np.float32) 214 | self.im_slipsign = np.zeros(imgwc.shape) 215 | cv2.putText(self.im_slipsign, 'Slip', (210, 30), 216 | cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, 217 | cv2.LINE_AA) 218 | im_gray = self.rgb2gray(imgwc) #.astype(np.uint8) 219 | self.dmask1 = self.defect_mask(im_gray) 220 | final_image = self.creat_mask_2(imgwc, self.dmask1) 221 | keypoints = self.find_dots(final_image) 222 | self.u_sum1 = np.zeros(len(keypoints)) 223 | self.v_sum1 = np.zeros(len(keypoints)) 224 | self.u_addon1 = list(self.u_sum1) 225 | self.v_addon1 = list(self.v_sum1) 226 | self.x1_last1 = [] 227 | self.y1_last1 = [] 228 | for i in range(len(keypoints)): 229 | self.x1_last1.append(keypoints[i].pt[0] / self.scale) 230 | self.y1_last1.append(keypoints[i].pt[1] / self.scale) 231 | self.x_iniref1 = list(self.x1_last1) 232 | self.y_iniref1 = list(self.y1_last1) 233 | self.absmotion1 = 0 234 | self.index_list = range(len(self.x1_last1)) 235 | return np.array(self.x1_last1), np.array( 236 | self.y1_last1), np.zeros_like(self.x1_last1), np.zeros_like( 237 | self.x1_last1), np.array(self.index_list) 238 | 239 | else: 240 | imgwc = np.array(raw_imag).astype(np.float32) 241 | final_image = self.creat_mask_2(imgwc, self.dmask1) 242 | 243 | if self.refresh1: 244 | keypoints = self.find_dots(final_image) 245 | x1, y1, x2, y2, u, v, self.x_iniref1, self.y_iniref1, self.u_addon1, self.v_addon1, index_list\ 246 | = self.flow_calculate_global(keypoints, list(self.x_iniref1), list(self.y_iniref1), \ 247 | list(self.u_addon1), list(self.v_addon1)) 248 | self.refresh1 = False 249 | else: 250 | keypoints = self.find_dots(final_image) 251 | x1, y1, x2, y2, u, v, self.refresh1, index_list = self.flow_calculate_in_contact( 252 | keypoints, list(self.x_iniref1), list(self.y_iniref1), 253 | list(self.u_addon1), list(self.v_addon1)) 254 | 255 | return np.array(x2), np.array(y2), np.array(u), np.array( 256 | v), np.array(index_list) 257 | 258 | 259 | if __name__ == "__main__": 260 | 261 | mf = marker_flow() 262 | path = '/media/mcube/SERVER_HD/siyuan/policy_finetune/' 263 | ob = 'circle' 264 | num = 10 265 | x_min, x_max, y_min, y_max = 62, 366, 63, 256 #marker tracking region 266 | gap = 10 #pixel gap between each measurement 267 | 268 | for i in range(12): #read the 12 images during contact period 269 | img = cv2.imread(path + ob + '/' + str(num) + '/' + str(i) + '.jpg') 270 | x, y, u, v, index_list = mf.main(img, i == 0) 271 | 272 | if i == 0: 273 | x_ref, y_ref = copy.deepcopy(x), copy.deepcopy(y) 274 | x_paired, y_paired = copy.deepcopy(x), copy.deepcopy(y) 275 | # x_min, x_max = int(np.min(x_ref)), int(np.max(x_ref)) + 1 276 | # y_min, y_max = int(np.min(y_ref)), int(np.max(y_ref)) + 1 277 | x_grid, y_grid = np.meshgrid(range(x_min, x_max, 10), 278 | range(y_min, y_max, 10)) 279 | u_grid = np.zeros_like(x_grid) 280 | v_grid = np.zeros_like(x_grid) 281 | else: 282 | x_paired, y_paired = x_ref[index_list], y_ref[index_list] 283 | 284 | points = np.squeeze(np.dstack((x_paired.T, y_paired.T))) 285 | values = np.squeeze(np.dstack((u.T, v.T))) 286 | 287 | uv = griddata(points, 288 | values, (x_grid.flatten(), y_grid.flatten()), 289 | method='linear') 290 | uv[np.isnan(uv)] = 0. 291 | u_grid = uv[:, 0] 292 | v_grid = uv[:, 1] 293 | 294 | u_grid_image = np.reshape(u_grid, x_grid.shape) 295 | v_grid_image = np.reshape(v_grid, x_grid.shape) 296 | 297 | cv2.imshow('u', ((u_grid_image + 10) * 12).astype(np.uint8)) 298 | cv2.imshow('v', ((v_grid_image + 10) * 12).astype(np.uint8)) 299 | mf.dispOpticalFlow(img, x_grid.flatten(), y_grid.flatten(), 300 | u_grid.flatten(), v_grid.flatten(), 'flow') 301 | -------------------------------------------------------------------------------- /supervised_learning/label_converter.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | 5 | # folder = '/media/siyuan/data/Data_packing_RL/data_newsensor_3/' 6 | folder = '/media/siyuan/data/Data_packing_RL/data_newsensor_3/' 7 | folder = '/home/siyuan/Dropbox (MIT)/2020_RSS_RL_packing/data/data_newsensor_3/' 8 | num = 5000 9 | # num = 1 10 | 11 | for i in range(num): 12 | try: 13 | r_matrxi = np.load(folder+str(i)+'/r_matrix.npy') 14 | label = np.load(folder+str(i)+'/label.npy') 15 | if label[0] > 0: 16 | label[0] = 0 17 | if label[1] < 0: 18 | label[1] = 0 19 | label_true = np.linalg.inv(r_matrxi).dot(label[:2]).tolist() 20 | label_true.append(label[2]) 21 | label_true = np.array(label_true) 22 | np.save(folder+str(i)+'/label_true.npy', label_true) 23 | except: 24 | pass -------------------------------------------------------------------------------- /supervised_learning/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 3 | os.environ["CUDA_VISIBLE_DEVICES"]='0' 4 | import sys 5 | 6 | import numpy as np 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | import torch.optim as optim 11 | from torch.optim import lr_scheduler 12 | from torch.autograd import Variable 13 | import torchvision 14 | from torchvision import datasets, transforms 15 | 16 | from utils import CNN_Actor, data_selection 17 | 18 | 19 | def run(init_lr=0.0001, max_epoch=64e3, batch_size=128*5, save_model=''): 20 | 21 | use_color = True 22 | train_set, valid_set, train_data_size, valid_data_size = data_selection(use_color) 23 | print('training data size: ', train_data_size, 'validation data size: ', valid_data_size) 24 | params = {'batch_size': batch_size, 'shuffle': True, 'num_workers': 4, 'pin_memory': True} 25 | train_loader = torch.utils.data.DataLoader(train_set, **params) 26 | valid_loader = torch.utils.data.DataLoader(valid_set, **params) 27 | 28 | dataloaders = {'train': train_loader, 'val': valid_loader} 29 | 30 | model = CNN_Actor(num_inputs = 8*2*3) 31 | # print(model) 32 | model.cuda() 33 | model = nn.DataParallel(model) 34 | 35 | lr = init_lr 36 | # optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=0.0000001) 37 | optimizer = optim.Adam(model.parameters(), lr=lr) 38 | lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [300, 1000]) 39 | loss_function = nn.MSELoss() 40 | 41 | num_steps_per_update = 1 # accum gradient 42 | epoch = 0 43 | gap = 1 44 | train_error_list = [] 45 | valid_error_list = [1.] 46 | smallest_valid_error = 1. 47 | 48 | # train it 49 | while epoch < max_epoch:#for epoch in range(num_epochs): 50 | print('Epoch {}/{}'.format(epoch, max_epoch)) 51 | print('-' * 10) 52 | steps = 0 53 | # Each epoch has a training and validation phase 54 | for phase in ['train', 'val']: 55 | if phase == 'train': 56 | model.train() 57 | else: 58 | model.eval() # Set model to evaluate mode 59 | 60 | tot_loss = 0.0 61 | tot_loc_loss = 0.0 62 | tot_cls_loss = 0.0 63 | num_iter = 0 64 | acc_old= 0. 65 | sample_number = 0 66 | optimizer.zero_grad() 67 | 68 | # Iterate over data. 69 | for (X, y) in dataloaders[phase]: 70 | num_iter += 1 71 | steps += 1 72 | #print(X.size(),y.size()) 73 | # get the inputs 74 | # inputs, labels = data 75 | # inputs_1_temp,labels = X,y 76 | # inputs_1 = torch.from_numpy(np.array(inputs_1)).type(torch.FloatTensor) 77 | # inputs_2 = torch.from_numpy(np.array(inputs_2)).type(torch.FloatTensor) 78 | # wrap them in Variable 79 | # print inputs.shape 80 | inputs = Variable(X.cuda()) 81 | labels = Variable(y.cuda()) 82 | 83 | error_predicted = model(inputs) 84 | # print 'predict_error',per_frame_logits.squeeze().size() 85 | # print 'gt error',labels.size() 86 | cls_loss = F.smooth_l1_loss(error_predicted.squeeze(),labels) 87 | sample_number += y.size(0) 88 | 89 | 90 | # print cls_loss.size() 91 | tot_cls_loss += cls_loss.data 92 | 93 | loss = (cls_loss)/num_steps_per_update 94 | #loss = (0.5*loc_loss + 0.5*cls_loss)/num_steps_per_update 95 | 96 | tot_loss += loss.data 97 | loss.backward() 98 | 99 | if num_iter == num_steps_per_update and phase == 'train': 100 | num_iter = 0 101 | optimizer.step() 102 | optimizer.zero_grad() 103 | lr_sched.step() 104 | # print steps%gap 105 | if steps % gap == 0: 106 | print('{} {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format(phase, \ 107 | sample_number*1.0/train_data_size,tot_cls_loss/(gap*num_steps_per_update), tot_loss/gap)) 108 | # save model 109 | train_error_list.append([epoch,tot_loss/gap]) 110 | np.save(save_model+'train_error_list.npy',train_error_list) 111 | # torch.save(model.module.state_dict(), save_model+str(steps).zfill(6)+'.pt') 112 | tot_loss = tot_cls_loss = 0. 113 | # label_true_list = [] 114 | # label_predict_list = [] 115 | 116 | 117 | if phase == 'val': 118 | print('{} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format(phase, tot_cls_loss/num_iter, \ 119 | (tot_loss*num_steps_per_update)/num_iter)) 120 | if tot_cls_loss/num_iter < smallest_valid_error: 121 | torch.save(model.state_dict(), save_model+'best_model_color_small_decrease_bnorm.pt') 122 | smallest_valid_error = tot_cls_loss/num_iter 123 | valid_error_list.append(tot_cls_loss/num_iter) 124 | np.save(save_model+'valid_error_list.npy',valid_error_list) 125 | epoch += 1 126 | 127 | 128 | 129 | if __name__ == '__main__': 130 | # need to add argparse 131 | run(save_model='/home/siyuan/Documents/2020_RSS_still_packing/tactile_insertion_RL/supervised_learning/') 132 | -------------------------------------------------------------------------------- /supervised_learning/rnn_decoder_epoch20.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/siyuandong16/Tactile_insertion_with_RL/775f7e45fcd4f438c8aec25a82ff6b6095bc65c8/supervised_learning/rnn_decoder_epoch20.pth -------------------------------------------------------------------------------- /supervised_learning/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from random import shuffle 6 | import random 7 | from torch.utils import data 8 | import cv2 9 | import scipy.misc 10 | import os 11 | 12 | def rgb2gray(rgb): 13 | return np.dot(rgb[...,:3], [0.33, 0.33, 0.34]) 14 | 15 | def init(module, weight_init, bias_init, gain=1): 16 | weight_init(module.weight.data, gain=gain) 17 | bias_init(module.bias.data) 18 | return module 19 | 20 | class Flatten(nn.Module): 21 | def forward(self, x): 22 | return x.view(x.size(0), -1) 23 | 24 | class CNN_Actor(nn.Module): 25 | def __init__(self, num_inputs = 8, hidden_size=256, num_classes = 3): 26 | super(CNN_Actor, self).__init__() 27 | 28 | init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. 29 | constant_(x, 0), nn.init.calculate_gain('relu')) 30 | 31 | self.main = nn.Sequential( 32 | init_(nn.Conv2d(num_inputs, 64, 7, stride=4)), nn.ReLU(), 33 | init_(nn.Conv2d(64, 32, 5, stride=2)), nn.ReLU(), 34 | init_(nn.Conv2d(32, 32, 3, stride=1)), nn.ReLU(), Flatten(), 35 | # init_(nn.Linear(32 * 10 * 10, hidden_size)), nn.ReLU() 36 | init_(nn.Linear(32 * 6 * 6, hidden_size)), nn.ReLU() 37 | ) 38 | 39 | init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. 40 | constant_(x, 0))#, nn.init.calculate_gain('tanh')) 41 | 42 | self.critic_linear = init_(nn.Linear(hidden_size, num_classes)) 43 | 44 | 45 | def forward(self, inputs): 46 | x = self.main(inputs) 47 | x = self.critic_linear(x) 48 | x = torch.tanh(x) 49 | return x 50 | 51 | 52 | 53 | class Dataset_CRNN(data.Dataset): 54 | "Characterizes a dataset for PyTorch" 55 | def __init__(self, root, file_folder, use_color): 56 | "Initialization" 57 | self.root = root 58 | self.file_folder = file_folder 59 | self.use_color = use_color 60 | 61 | def __len__(self): 62 | "Denotes the total number of samples" 63 | return len(self.file_folder) 64 | 65 | def label_correction(self, label): 66 | 67 | label[:2] /= 10. 68 | label[2] /= 15. 69 | 70 | return label 71 | 72 | def load_data(self, data_path): 73 | img2_seq = [] 74 | range_list = list(range(0, 45, 6)) + list(range(45, 90, 6)) 75 | for i in range_list: 76 | img = cv2.imread(data_path+str(i)+'.jpg') 77 | img = img[30:-30, 30:-30, :] 78 | if not self.use_color: 79 | imgwc_gray = rgb2gray(img) 80 | else: 81 | imgwc_gray = np.array(img).astype(np.float32) 82 | img2_temp = scipy.misc.imresize(imgwc_gray,(84,84)) 83 | if i == 0 or i == 45: 84 | mean_2 = np.mean(img2_temp) 85 | std_2 = np.std(img2_temp) 86 | 87 | img2_temp = (img2_temp-mean_2)/std_2 88 | if not self.use_color: 89 | img2_seq.append(img2_temp) 90 | else: 91 | if i == 0: 92 | img2_seq = img2_temp.copy() 93 | img2_seq = img2_seq.transpose(2,0,1) 94 | else: 95 | img2_seq = np.concatenate((img2_seq, img2_temp.transpose(2,0,1)), axis=0) 96 | img2_temp = np.array(img2_seq) 97 | X = torch.from_numpy(img2_temp).type(torch.FloatTensor) 98 | 99 | label = np.array(np.load(data_path + 'label_true.npy')) 100 | label = self.label_correction(label) 101 | Y = torch.from_numpy(label).type(torch.FloatTensor) 102 | return X, Y 103 | 104 | def __getitem__(self, index): 105 | "Generates one sample of data" 106 | # Select sample 107 | filename = self.file_folder[index] 108 | # Load data 109 | X, Y = self.load_data(filename) 110 | return X, Y 111 | 112 | 113 | 114 | def data_selection(use_color): 115 | root = [] 116 | root.append("/media/siyuan/data/Data_packing_RL/data_newsensor_3/") 117 | 118 | num_data = 5000 119 | 120 | file_folder = [] 121 | 122 | range_list = list(range(0, 45, 6)) + list(range(45, 90, 6)) 123 | for k in range(len(root)): 124 | root_folder = root[k] 125 | for i in range(num_data): 126 | path = root_folder+str(i)+'/' 127 | break_sign = False 128 | if os.path.isdir(path): 129 | label = np.array(np.load(path + 'label_true.npy')) 130 | if (label[0] < 0 and label[1] < 1.0) or (label[0] > -1 and label[1] > 0): 131 | file_folder.append(path) 132 | 133 | num_of_data = len(file_folder) 134 | num_of_train = int(num_of_data*0.8) 135 | num_of_valid = num_of_data - num_of_train 136 | 137 | random.seed(40) 138 | shuffle(file_folder) 139 | 140 | train_folder = file_folder[:num_of_train] 141 | valid_folder = file_folder[num_of_train:] 142 | 143 | train_set, valid_set = Dataset_CRNN(root, train_folder, use_color), Dataset_CRNN(root, valid_folder, use_color) 144 | 145 | return train_set, valid_set, num_of_train, num_of_valid 146 | 147 | --------------------------------------------------------------------------------