├── README.md ├── main.py ├── linear_classifier.py └── deltaencoder.py /README.md: -------------------------------------------------------------------------------- 1 | # DeltaEncoder_pytorch 2 | Pytorch implement of deltaencoder 3 | 4 | 5 | # References 6 | https://github.com/EliSchwartz/DeltaEncoder -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # © Copyright IBM Corp. 2019 2 | #!/usr/bin/env python 3 | # -*- coding: UTF-8 -*- 4 | ''' 5 | @author: leven03 6 | @contact: xuwang37@163.com 7 | @file: linear_classifier.py 8 | @time: 2020/1/9 9 | @desc: 10 | ''' 11 | 12 | import pickle 13 | from deltaencoder import DeltaEncoder 14 | 15 | 16 | ########### Load Data ################ 17 | features_train, labels_train, features_test, labels_test, episodes_1shot, episodes_5shot = pickle.load(open('data/mIN.pkl','rb'),encoding='bytes') 18 | 19 | # features_train/features_test are features extracted from some backbone (resnet18); they are np array with size = (N,D), where N is the number of samples and D the features dimensions 20 | # labels_train/labels_test are one hot GT labels with size = (N,C), where C is the number of classes (can be different for train and test sets 21 | # episodes_*shot are supplied for reproduction of the paper results size=(num_episodes, num_classes, num_shots, D) 22 | 23 | 24 | 25 | ######### 1-shot Experiment ######### 26 | args = {'data_set' : 'mIN', 27 | 'num_shots' : 1, 28 | 'num_epoch': 6, 29 | 'nb_val_loop': 10, 30 | 'learning_rate': 1e-5, 31 | 'drop_out_rate': 0.5, 32 | 'drop_out_rate_input': 0.0, 33 | 'batch_size': 128, 34 | 'noise_size' : 16, 35 | 'nb_img' : 1024, 36 | 'num_ways' : 5, 37 | 'encoder_size' : [8192], 38 | 'decoder_size' : [8192], 39 | 'opt_type': 'adam' 40 | } 41 | 42 | model = DeltaEncoder(args, features_train, labels_train, features_test, labels_test, episodes_1shot) 43 | model.train(verbose=True) 44 | 45 | 46 | 47 | ######### 5-shot Experiment ######### 48 | args = {'data_set' : 'mIN', 49 | 'num_shots' : 5, 50 | 'num_epoch': 12, 51 | 'nb_val_loop': 10, 52 | 'learning_rate': 1e-5, 53 | 'drop_out_rate': 0.5, 54 | 'drop_out_rate_input': 0.0, 55 | 'batch_size': 128, 56 | 'noise_size' : 16, 57 | 'nb_img' : 1024, 58 | 'num_ways' : 5, 59 | 'encoder_size' : [8192], 60 | 'decoder_size' : [8192], 61 | 'opt_type': 'adam' 62 | } 63 | 64 | model = DeltaEncoder(args, features_train, labels_train, features_test, labels_test, episodes_5shot) 65 | model.train(verbose=False) 66 | -------------------------------------------------------------------------------- /linear_classifier.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: UTF-8 -*- 3 | ''' 4 | @author: leven03 5 | @contact: xuwang37@163.com 6 | @file: linear_classifier.py 7 | @time: 2020/1/9 8 | @desc: 9 | ''' 10 | import torch 11 | import torch.nn as nn 12 | import numpy as np 13 | import random 14 | from sklearn.metrics import accuracy_score 15 | import os 16 | 17 | 18 | class LinearClassifier(nn.Module): 19 | def __init__(self, input_dim, out_dim): 20 | super(LinearClassifier, self).__init__() 21 | 22 | self.linear = nn.Linear(input_dim, out_dim) 23 | 24 | def forward(self, x): 25 | return self.linear(x) 26 | 27 | 28 | class linear_classifier(object): 29 | def __init__(self, features_train, labels_train, features_test, labels_test, 30 | learning_rate=0.0005, number_epoch=25, batch_size=100): 31 | self.decay_factor = 0.9 32 | 33 | self.features_test = features_test 34 | self.labels_test = labels_test 35 | self.features_train = features_train 36 | self.labels_train = labels_train 37 | 38 | self.class_idx = np.where(np.sum(self.labels_train, axis=0) != 0)[0] 39 | self.labels_train = self.labels_train[:, self.class_idx] 40 | self.labels_test = self.labels_test[:, self.class_idx] 41 | idx = np.any(self.labels_test, axis=1) 42 | self.labels_test = self.labels_test[idx] 43 | self.features_test = self.features_test[idx] 44 | 45 | self.learning_rate = learning_rate 46 | self.number_epoch = number_epoch 47 | self.batch_size = batch_size 48 | 49 | 50 | # print("label test shape is:", labels_test) 51 | 52 | self.classifier = LinearClassifier(self.features_test.shape[1], self.labels_test.shape[1]).cuda() 53 | 54 | def loss(self, features_batch, labels_batch): 55 | self.classifier.train() 56 | features_batch = torch.Tensor(features_batch).cuda() 57 | labels_batch = torch.LongTensor(labels_batch).cuda() 58 | 59 | cel = nn.CrossEntropyLoss() 60 | logits = self.classifier(features_batch) 61 | 62 | return cel(logits, torch.argmax(labels_batch, 1)) 63 | 64 | def training(self, model, learning_rate): 65 | optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) 66 | return optimizer 67 | 68 | def next_batch(self, start, end): 69 | if start == 0: 70 | idx = np.r_[:self.features_train.shape[0]] 71 | random.shuffle(idx) 72 | self.features_train = self.features_train[idx] 73 | self.labels_train = self.labels_train[idx] 74 | if end > self.features_train.shape[0]: 75 | end = self.features_train.shape[0] 76 | return self.features_train[start:end], self.labels_train[start:end] 77 | 78 | def val(self): 79 | self.classifier.eval() 80 | with torch.no_grad(): 81 | logits = self.classifier(torch.from_numpy(self.features_test).cuda()).cpu() 82 | 83 | acc = accuracy_score(np.argmax(logits, axis=1), np.argmax(self.labels_test, axis=1)) 84 | return acc 85 | 86 | def learn(self): 87 | 88 | # self.features_test_temp = self.features_test 89 | self.learning_rate = 0.001 90 | 91 | best_acc = best_acc_seen = best_acc_unseen = 0.0 92 | last_loss_epoch = None 93 | optimizer = self.training(self.classifier, self.learning_rate) 94 | for i in range(self.number_epoch): 95 | mean_loss_d = 0.0 96 | for count in range(0, self.features_train.shape[0], self.batch_size): 97 | features_batch, labels_batch = self.next_batch(count, count + self.batch_size) 98 | with torch.enable_grad(): 99 | optimizer.zero_grad() 100 | loss_value = self.loss(features_batch, labels_batch) 101 | loss_value.backward() 102 | optimizer.step() 103 | 104 | mean_loss_d += loss_value 105 | 106 | mean_loss_d /= (self.features_train.shape[0] / self.batch_size) 107 | 108 | if last_loss_epoch is not None and mean_loss_d > last_loss_epoch: 109 | self.learning_rate *= self.decay_factor 110 | else: 111 | last_loss_epoch = mean_loss_d 112 | 113 | acc = self.val() 114 | if acc > best_acc: 115 | best_acc = acc 116 | return best_acc 117 | 118 | 119 | -------------------------------------------------------------------------------- /deltaencoder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: UTF-8 -*- 3 | ''' 4 | @author: leven03 5 | @contact: xuwang37@163.com 6 | @file: deltaencoder.py 7 | @time: 2020/1/9 8 | @desc: 9 | ''' 10 | import torch 11 | import torch.nn as nn 12 | import numpy as np 13 | import random 14 | import os 15 | from linear_classifier import linear_classifier 16 | 17 | class Encoder(nn.Module): 18 | def __init__(self, feature_dim=256, encoder_size=[8192], z_dim=16, dropout=0.5, dropout_input=0.0, leak=0.2): 19 | super(Encoder, self).__init__() 20 | self.first_linear = nn.Linear(feature_dim*2, encoder_size[0]) 21 | 22 | linear = [] 23 | for i in range(len(encoder_size) - 1): 24 | linear.append(nn.Linear(encoder_size[i], encoder_size[i+1])) 25 | linear.append(nn.LeakyReLU(leak)) 26 | linear.append(nn.Dropout(dropout)) 27 | 28 | self.linear = nn.Sequential(*linear) 29 | self.final_linear = nn.Linear(encoder_size[-1], z_dim) 30 | self.lrelu = nn.LeakyReLU(leak) 31 | self.dropout_input = nn.Dropout(dropout_input) 32 | self.dropout = nn.Dropout(dropout) 33 | 34 | def forward(self, features, reference_features): 35 | features = self.dropout_input(features) 36 | x = torch.cat([features, reference_features], 1) 37 | 38 | # print("features shape is:", features.shape, reference_features.shape) 39 | # print(x.shape) 40 | 41 | x = self.first_linear(x) 42 | x = self.linear(x) 43 | 44 | x = self.final_linear(x) 45 | 46 | return x 47 | 48 | class Decoder(nn.Module): 49 | def __init__(self, feature_dim=256, decoder_size=[8192], z_dim=16, dropout=0.5, leak=0.2): 50 | super(Decoder, self).__init__() 51 | self.first_linear = nn.Linear(z_dim+feature_dim, decoder_size[0]) 52 | 53 | linear = [] 54 | for i in range(len(decoder_size) - 1): 55 | linear.append(nn.Linear(decoder_size[i], decoder_size[i+1])) 56 | linear.append(nn.LeakyReLU(leak)) 57 | linear.append(nn.Dropout(dropout)) 58 | 59 | self.linear = nn.Sequential(*linear) 60 | 61 | self.final_linear = nn.Linear(decoder_size[-1], feature_dim) 62 | self.lrelu = nn.LeakyReLU(leak) 63 | self.dropout = nn.Dropout(dropout) 64 | 65 | def forward(self, reference_features, code): 66 | x = torch.cat([reference_features, code], 1) 67 | 68 | x = self.first_linear(x) 69 | x = self.linear(x) 70 | 71 | x = self.final_linear(x) 72 | 73 | return x 74 | 75 | # class EncoderDecoder(nn.Module): 76 | # def __init__(self, encoder, decoder): 77 | # super(EncoderDecoder, self).__init__() 78 | # self.encoder = encoder 79 | # self.decoder = decoder 80 | # 81 | # def forward(self, features, reference_features): 82 | # code = self.encoder(features, reference_features) 83 | # x_hat = self.decoder(reference_features, code) 84 | # 85 | # return x_hat 86 | 87 | class DeltaEncoder(object): 88 | def __init__(self, args, features, labels, features_test, labels_test, episodes, resume = ''): 89 | self.count_data = 0 90 | self.num_epoch = args['num_epoch'] 91 | self.noise_size = args['noise_size'] 92 | self.nb_val_loop = args['nb_val_loop'] 93 | self.encoder_size = args['encoder_size'] 94 | self.decoder_size = args['decoder_size'] 95 | self.batch_size = args['batch_size'] 96 | self.drop_out_rate = args['drop_out_rate'] 97 | self.drop_out_rate_input = args['drop_out_rate_input'] 98 | self.best_acc = 0.0 99 | self.name = args['data_set'] 100 | self.last_file_name = "" 101 | self.nb_fake_img = args['nb_img'] 102 | self.learning_rate = args['learning_rate'] 103 | self.decay_factor = 0.9 104 | self.num_shots = args['num_shots'] 105 | self.num_ways = args['num_ways'] 106 | self.resume = resume 107 | self.save_var_dict = {} 108 | 109 | self.features, self.labels = features, labels 110 | self.features_test, self.labels_test = features_test, labels_test 111 | self.episodes = episodes 112 | 113 | self.features_dim = self.features.shape[1] 114 | self.reference_features = self.random_pairs(self.features, self.labels) 115 | 116 | # discriminator input => image features 117 | 118 | self._create_model() 119 | 120 | # assign pairs with the same labels 121 | def random_pairs(self,X, labels): 122 | Y = X.copy() 123 | for l in range(labels.shape[1]): 124 | inds = np.where(labels[:,l])[0] 125 | inds_pairs = np.random.permutation(inds) 126 | Y[inds,:] = X[inds_pairs,:] 127 | return Y 128 | 129 | def _create_model(self): 130 | self.encoder = Encoder(self.features_dim, self.encoder_size, self.noise_size, self.drop_out_rate, self.drop_out_rate_input) 131 | self.decoder = Decoder(self.features_dim, self.decoder_size, self.noise_size, self.drop_out_rate) 132 | 133 | self.encoder = self.encoder.cuda() 134 | self.decoder = self.decoder.cuda() 135 | 136 | def loss(self, features_batch, reference_features_batch): 137 | l1loss = nn.L1Loss().cuda() 138 | 139 | self.pred_noise = self.encoder(features_batch, reference_features_batch) 140 | self.pred_x = self.decoder(reference_features_batch, self.pred_noise) 141 | 142 | # assert self.pred_noise.shape == self.pred_x.shape 143 | abs_diff = l1loss(features_batch, self.pred_x) 144 | 145 | w = torch.pow(abs_diff, 2) 146 | w = w / torch.norm(w) 147 | 148 | loss = w * abs_diff 149 | 150 | return loss 151 | 152 | def optimizer(self, encoder, decoder, lr): 153 | optimizer = torch.optim.Adam([{'params': encoder.parameters()}, 154 | {'params': decoder.parameters()}], lr=lr) 155 | 156 | return optimizer 157 | 158 | def next_batch(self, start, end): 159 | if start == 0: 160 | if self.num_shots: 161 | self.reference_features = self.random_pairs(self.features, self.labels) 162 | idx = np.r_[:self.features.shape[0]] 163 | random.shuffle(idx) 164 | self.features = self.features[idx] 165 | self.reference_features = self.reference_features[idx] 166 | self.labels = self.labels[idx] 167 | if end > self.features.shape[0]: 168 | end = self.features.shape[0] 169 | 170 | return torch.from_numpy(self.features[start:end]), \ 171 | torch.from_numpy(self.reference_features[start:end]), \ 172 | torch.from_numpy(self.labels[start:end]) 173 | 174 | def train(self, verbose=False): 175 | last_loss_epoch = None 176 | acc = self.val() 177 | print('Unseen classes accuracy without training: {}'.format(acc)) 178 | print("-----") 179 | optimizer = self.optimizer(self.encoder, self.decoder, lr=self.learning_rate) 180 | 181 | self.encoder.train() 182 | self.decoder.train() 183 | for epoch in range(self.num_epoch): 184 | mean_loss_e = 0.0 185 | for count in range(0, self.features.shape[0], self.batch_size): 186 | features_batch, reference_features_batch, labels_batch = self.next_batch(count, count + self.batch_size) 187 | 188 | with torch.enable_grad(): 189 | optimizer.zero_grad() 190 | loss_e = self.loss(features_batch.cuda(), reference_features_batch.cuda()) 191 | loss_e.backward() 192 | optimizer.step() 193 | 194 | mean_loss_e += loss_e 195 | 196 | c = (count/self.batch_size) + 1 197 | if verbose: 198 | if np.mod(c, 10) == 1: 199 | print('Batch#{0} Loss {1}'.format(c, mean_loss_e / (c + 1e-7))) 200 | 201 | mean_loss_e /= (self.features.shape[0] / self.batch_size) 202 | if verbose: 203 | print('epoch : {}: E : {}'.format(epoch, mean_loss_e)) 204 | if last_loss_epoch is not None and mean_loss_e > last_loss_epoch: 205 | self.learning_rate *= self.decay_factor 206 | if verbose: 207 | print("AE learning rate decay: ", self.learning_rate) 208 | else: 209 | last_loss_epoch = mean_loss_e 210 | 211 | acc = self.val() 212 | if acc > self.best_acc: 213 | if self.best_acc != 0.0: 214 | os.remove(self.last_file_name) 215 | self.best_acc = acc 216 | self.last_file_name = "model_weights/" + self.name + '_' \ 217 | + str(self.num_shots) + '_shot_' \ 218 | + str(np.around(self.best_acc, decimals=2)) + '_acc.ckpt' 219 | self.save_model(self.encoder, self.decoder, self.last_file_name) 220 | print('epoch {}: Higher unseen classes accuracy reached: {} (Saved in {})'.format(epoch+1, acc, self.last_file_name)) 221 | else: 222 | print('epoch {}: Lower unseen classes accuracy reached: {} (<={})'.format(epoch+1, acc,self.best_acc)) 223 | print("-----") 224 | return self.best_acc 225 | 226 | def generate_samples(self, reference_features_class, labels_class, nb_ex): 227 | self.encoder.eval() 228 | self.decoder.eval() 229 | iterations = 0 230 | nb_ex = int(nb_ex) 231 | features = np.zeros((nb_ex * labels_class.shape[0], self.features.shape[1])) 232 | labels = np.zeros((nb_ex * labels_class.shape[0], labels_class.shape[1])) 233 | reference_features = np.zeros((nb_ex * labels_class.shape[0], self.reference_features.shape[1])) 234 | for c in range(labels_class.shape[0]): 235 | if True: # sample "noise" from training set 236 | inds = np.random.permutation(range(self.features.shape[0]))[:nb_ex] 237 | 238 | noise = self.encoder(torch.Tensor(self.features[inds, ...]).cuda(), 239 | torch.Tensor(self.reference_features[inds, ...]).cuda()) 240 | 241 | else: 242 | noise = torch.from_numpy(np.random.normal(0, 1, (nb_ex, self.noise_size))).cuda() 243 | reference_features_class_tensor = torch.Tensor(np.tile(reference_features_class[c], (nb_ex, 1))).cuda() 244 | features[c * nb_ex:(c * nb_ex) + nb_ex] = self.decoder(reference_features_class_tensor, noise).cpu().detach().numpy() 245 | 246 | labels[c * nb_ex:(c * nb_ex) + nb_ex] = np.tile(labels_class[c], (nb_ex, 1)) 247 | reference_features[c * nb_ex:(c * nb_ex) + nb_ex] = np.tile(reference_features_class[c], (nb_ex, 1)) 248 | return features, reference_features, labels 249 | 250 | 251 | def val(self, verbose=False): 252 | acc = [] 253 | 254 | for episode_data in self.episodes: 255 | unique_labels_episode = episode_data[1][:, 0, :] 256 | 257 | features, reference_features, labels = [], [], [] 258 | for shot in range(max(self.num_shots, 1)): 259 | unique_reference_features_test = episode_data[0][:, shot, :] 260 | features_, reference_features_, labels_ = self.generate_samples(unique_reference_features_test, 261 | unique_labels_episode, 262 | self.nb_fake_img / max(self.num_shots, 1)) 263 | features.append(unique_reference_features_test) 264 | reference_features.append(unique_reference_features_test) 265 | labels.append(unique_labels_episode) 266 | features.append(features_) 267 | reference_features.append(reference_features_) 268 | labels.append(labels_) 269 | if verbose: 270 | print(np.mean([np.linalg.norm(x) for x in unique_reference_features_test])) 271 | print(np.mean([np.linalg.norm(x) for x in features_])) 272 | 273 | features = np.concatenate(features) 274 | reference_features = np.concatenate(reference_features) 275 | labels = np.concatenate(labels) 276 | 277 | lin_model = linear_classifier(features, labels, self.features_test, 278 | self.labels_test) 279 | acc_ = lin_model.learn() 280 | acc.append(acc_) 281 | 282 | acc = 100 * np.mean(acc) 283 | return acc 284 | 285 | 286 | def save_model(self, encoder, decoder, save_dir): 287 | 288 | if not os.path.exists(os.path.dirname(save_dir)): 289 | os.mkdir(os.path.dirname(save_dir)) 290 | torch.save({ 291 | 'encoder_state_dict': encoder.state_dict(), 292 | 'decoder_state_dict': decoder.state_dict(), 293 | }, save_dir) --------------------------------------------------------------------------------