├── .gitignore ├── LICENSE ├── README.md ├── dataset.py ├── main.py ├── models ├── __init__.py ├── gcn.py ├── linear.py ├── lossnet.py ├── mobilenet.py ├── resnet.py ├── resnet64.py ├── shufflenet.py ├── vgg.py ├── wa_model.py └── wideresnet.py ├── query_strategies ├── __init__.py ├── active_learning_by_learning.py ├── adversarial_bim.py ├── adversarial_deepfool.py ├── aug_uda.py ├── augmentations.py ├── badge_sampling.py ├── baseline_sampling.py ├── batch_BALD.py ├── batch_active_learning_at_scale.py ├── bayesian_active_learning_disagreement_dropout.py ├── coreGCN.py ├── core_set.py ├── cpu_dist.py ├── entropy_sampling.py ├── entropy_sampling_dropout.py ├── kcenter_greedy.py ├── kmeans_sampling.py ├── lal.py ├── learning_loss_for_al.py ├── least_confidence.py ├── least_confidence_dropout.py ├── margin_sampling.py ├── margin_sampling_dropout.py ├── mcadl.py ├── random_sampling.py ├── selection_via_proxy.py ├── semi_fixmatch.py ├── semi_flexmatch.py ├── semi_pseudolabel.py ├── semi_strategy.py ├── ssl_consistency.py ├── ssl_diff2augdirect.py ├── ssl_diff2augkmeans.py ├── ssl_least_confidence.py ├── ssl_rand.py ├── strategy.py ├── uncertainGCN.py ├── util.py ├── vaal.py └── wasserstein_adversarial.py ├── requirements.txt ├── run.sh └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | ./save/ 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2023, cure-lab 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Active Learning with Pytorch 2 | An implementation of the state-of-the-art Deep Active Learning algorithm. 3 | This code was built based on [Jordan Ash's repository](https://github.com/JordanAsh/badge). 4 | 5 | # Dependencies 6 | 7 | To run this code fully, you'll need [PyTorch](https://pytorch.org/) (we're using version 1.4.0), [scikit-learn](https://scikit-learn.org/stable/). 8 | We've been running our code in Python 3.7. 9 | 10 | # Algorithms Implemented 11 | ## Deep active learning Strategies 12 | | Sampling Strategies | Year | Done | 13 | |:-------------------------------------------------:|:----------:|:----:| 14 | | Random Sampling | x | ✅ | 15 | | ClusterMargin [1] | arXiv'21 | ✅ | 16 | | WAAL [2] | AISTATS'20 | ✅ | 17 | | BADGE [3] | ICLR'20 | ✅ | 18 | | Adversarial Sampling for Active Learning [4] | WACV'20 | ✅ | 19 | | Learning Loss for Active Learning [5] | CVPR'19 | ✅ | 20 | | Variational Adversial Active Learning [6] | ICCV'19 | ✅ | 21 | | BatchBALD [7] | NIPS'19 | ✅ | 22 | | K-Means Sampling [8] | ICLR'18 | ✅ | 23 | | K-Centers Greedy [8] | ICLR'18 | ✅ | 24 | | Core-Set [8] | ICLR'18 | ✅ | 25 | | Adversarial - DeepFool [9] | ArXiv'18 | ✅ | 26 | | Uncertainty Ensembles [10] | NIPS'17 | ✅ | 27 | | Uncertainty Sampling with Dropout Estimation [11] | ICML'17 | ✅ | 28 | | Bayesian Active Learning Disagreement [11] | ICML'17 | ✅ | 29 | | Least Confidence [12] | IJCNN'14 | ✅ | 30 | | Margin Sampling [12] | IJCNN'14 | ✅ | 31 | | Entropy Sampling [12] | IJCNN'14 | ✅ | 32 | | UncertainGCN Sampling [13] | CVPR'21 | ✅ | 33 | | CoreGCN Sampling [13] | CVPR'21 | ✅ | 34 | | Ensemble [14] | CVPR'18 | ✅ | 35 | | MCDAL [15] | Knowledge-based Systems'19 | ✅ | 36 | 37 | 38 | ## Deep active learning + Semi-supervised learning 39 | 40 | | Sampling Strategies | Year | Done | 41 | |:-------------------------------------------------:|:----------:|:----:| 42 | | Consistency-SSLAL [16] | ECCV'20 | ✅ | 43 | | MixMatch-SSLAL [17] | arXiv | ✅ | 44 | | UDA [18] | NIPS'20 | In progress | 45 | 46 | 47 | 48 | 49 | # Running an experiment 50 | ## Requirements 51 | 52 | First, please make sure you have installed Conda. Then, our environment can be installed by: 53 | ``` 54 | conda create -n DAL python=3.7 55 | conda activate DAL 56 | pip install -r requirements.txt 57 | ``` 58 | 59 | ## Example 60 | ``` 61 | python main.py --model ResNet18 --dataset cifar10 --strategy LeastConfidence 62 | ``` 63 | It runs an active learning experiment using ResNet18 and CIFAR-10 data, querying according to the LeastConfidence algorithm. The result will be saved in the **./save** directory. 64 | 65 | You can also use `run.sh` to run experiments. 66 | 67 | ## Self-supervised feautres of data 68 | You can download the features/feature_model from [here](https://mycuhk-my.sharepoint.com/:f:/g/personal/1155170454_link_cuhk_edu_hk/EtfELBvgwwlArSvrxtOHbaoBkTmCZgqZ3qOPwaQ601a4SQ?e=GfEj94) 69 | 70 | # Contact 71 | If you have any questions/suggestions, or would like to contribute to this repo, please feel free to contact: 72 | Yu Li `yuli@cse.cuhk.edu.hk`, Muxi Chen `mxchen21@cse.cuhk.edu.hk` or Prof. Qiang Xu `qxu@cse.cuhk.edu.hk` 73 | 74 | 75 | 76 | ## References 77 | 78 | [1] (ClusterMargin, 2021) Batch Active Learning at Scale 79 | 80 | [2] (WAAL, AISTATS'20) Deep Active Learning: Unified and Principled Method for Query and Training [paper](https://arxiv.org/pdf/1911.09162.pdf) [code](https://github.com/cjshui/WAAL) 81 | 82 | [3] (BADGE, ICLR'20) Deep Batch Active Learning by Diverse, Uncertain Gradient Lower Bounds [paper](https://openreview.net/forum?id=ryghZJBKPS) [code](https://github.com/JordanAsh/badge) 83 | 84 | 85 | 86 | 87 | [4] (ASAL, WACV'20) Adversarial Sampling for Active Learning [paper](https://arxiv.org/pdf/1808.06671.pdf) 88 | 89 | [5] (CVPR'19) Learning Loss for Active Learning [paper](https://arxiv.org/pdf/1905.03677v1.pdf) [code](https://github.com/Mephisto405/Learning-Loss-for-Active-Learning) 90 | 91 | [6] (VAAL, ICCV'19) Variational Adversial Active Learning [paper](https://arxiv.org/pdf/1904.00370.pdf) [code](https://github.com/sinhasam/vaal) 92 | 93 | [7] (BatchBALD, NIPS'19) BatchBALD: Efficient and Diverse Batch Acquisition for Deep Bayesian Active Learning [paper](https://papers.nips.cc/paper/2019/file/95323660ed2124450caaac2c46b5ed90-Paper.pdf) [code](https://github.com/BlackHC/BatchBALD) 94 | 95 | 96 | 97 | 98 | [8] (CORE-SET, ICLR'18) Active Learning for Convolutional Neural Networks: A Core-Set Approach [paper](https://arxiv.org/pdf/1708.00489.pdf) [code](https://github.com/ozansener/active_learning_coreset) 99 | 100 | [9] (DFAL, 2018) Adversarial Active Learning for Deep Networks: a Margin Based Approach 101 | 102 | [10] (NIPS'17) Simple and Scalable Predictive Uncertainty Estimation using Deep Ensembles [paper](https://arxiv.org/pdf/1612.01474.pdf) [code](https://github.com/vvanirudh/deep-ensembles-uncertainty) 103 | 104 | [11] (DBAL, ICML'17) Deep Bayesian Active Learning with Image Data [paper](https://arxiv.org/pdf/1703.02910.pdf) [code](https://github.com/bnjasim/Deep-Bayesian-Active-Learning) 105 | 106 | [12] (Least Confidence/Margin/Entropy, IJCNN'14) A New Active Labeling Method for Deep Learning, IJCNN, 2014 107 | 108 | 109 | 110 | 111 | 112 | [13] (UncertainGCN, CoreGCN, CVPR'21) Sequential Graph Convolutional Network for Active Learning [paper](https://arxiv.org/pdf/2006.10219.pdf) [code](https://github.com/razvancaramalau/Sequential-GCN-for-Active-Learning) 113 | 114 | [14] (Emsemble, CVPR'18) The power of ensembles for active learning in image classification [paper](https://openaccess.thecvf.com/content_cvpr_2018/papers/Beluch_The_Power_of_CVPR_2018_paper.pdf) 115 | 116 | [15] (Knowledge-based Systems'19) Multi-criteria active deep learning for image classification [paper](https://www.sciencedirect.com/science/article/abs/pii/S0950705119300747?via%3Dihub) [code](https://github.com/houxingxing/Multi-Criteria-Active-Deep-Learning-for-Image-Classification) 117 | 118 | [16] (ECCV'20) Consistency-based semi-supervised active learning: Towards minimizing labeling cost [paper](https://www.ecva.net/papers/eccv_2020/papers_ECCV/papers/123550511.pdf) 119 | 120 | [17] (Google, arXiv) Combining MixMatch and Active Learning for Better Accuracy with Fewer Labels 121 | 122 | [18] (Google, NIPS’20) Unsupervised Data Augmentation for Consistency Training 123 | 124 | 125 | 126 | 127 | 128 | 129 | -------------------------------------------------------------------------------- /dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pdb 3 | import torch 4 | from torchvision import datasets 5 | from torch.utils.data import Dataset 6 | from PIL import Image 7 | from torchvision import transforms 8 | import os 9 | import torchvision 10 | 11 | 12 | def get_dataset(name, path): 13 | if name.lower() == 'mnist': 14 | return get_MNIST(path) 15 | elif name.lower() == 'fashionmnist': 16 | return get_FashionMNIST(path) 17 | elif name.lower() == 'svhn': 18 | return get_SVHN(path) 19 | elif name.lower() == 'cifar10': 20 | return get_CIFAR10(path) 21 | elif name.lower() == 'cifar100': 22 | return get_CIFAR100(path) 23 | elif name.lower() == 'gtsrb': 24 | return get_GTSRB(path) 25 | elif name.lower() == 'tinyimagenet': 26 | return get_tinyImageNet(path) 27 | 28 | def get_ImageNet(path): 29 | raw_tr = datasets.ImageFolder(path + '/tinyImageNet/tiny-imagenet-200/train') 30 | imagenet_tr_path = path +'imagenet-object-localization-challenge/ILSVRC/Data/CLS-LOC/train/' 31 | from torchvision import transforms 32 | transform = transforms.Compose([transforms.Resize((64, 64))]) 33 | imagenet_folder = datasets.ImageFolder(imagenet_tr_path, transform=transform) 34 | idx_to_class = {} 35 | for (class_num, idx) in imagenet_folder.class_to_idx.items(): 36 | idx_to_class[idx] = class_num 37 | X_tr,Y_tr = [], [] 38 | item_list = imagenet_folder.imgs 39 | for (class_num, idx) in raw_tr.class_to_idx.items(): 40 | new_img_num = 0 41 | for ii, (path, target) in enumerate(item_list): 42 | if idx_to_class[target] == class_num: 43 | X_tr.append(np.array(imagenet_folder[ii][0])) 44 | Y_tr.append(idx) 45 | new_img_num += 1 46 | if new_img_num >= 250: 47 | break 48 | 49 | return np.array(X_tr), np.array(Y_tr) 50 | 51 | 52 | def get_tinyImageNet(path): 53 | # 100000 train 10000 test 54 | raw_tr = datasets.ImageFolder(path + '/tinyImageNet/tiny-imagenet-200/train') 55 | raw_te = datasets.ImageFolder(path + '/tinyImageNet/tiny-imagenet-200/val') 56 | f = open(path + '/tinyImageNet/tiny-imagenet-200/val/val_annotations.txt') 57 | 58 | val_dict = {} 59 | for line in f.readlines(): 60 | val_dict[line.split()[0]] = raw_tr.class_to_idx[line.split()[1]] 61 | X_tr,Y_tr,X_te, Y_te = [],[],[],[] 62 | 63 | div_list = [len(raw_tr)*(x+1)//10 for x in range(10)] # can not load at once, memory limitation 64 | i=0 65 | for count in div_list: 66 | loop = count - i 67 | for j in range(loop): 68 | image,target = raw_tr[i] 69 | X_tr.append(np.array(image)) 70 | Y_tr.append(target) 71 | i += 1 72 | 73 | for i in range(len(raw_te)): 74 | img, label = raw_te[i] 75 | img_pth = raw_te.imgs[i][0].split('/')[-1] 76 | X_te.append(np.array(img)) 77 | Y_te.append(val_dict[img_pth]) 78 | 79 | return X_tr,Y_tr,X_te, Y_te 80 | # torch.tensor(X_tr), torch.tensor(Y_tr), torch.tensor(X_te), torch.tensor(Y_te) 81 | 82 | def get_MNIST(path): 83 | raw_tr = datasets.MNIST(path + '/mnist', train=True, download=True) 84 | raw_te = datasets.MNIST(path + '/mnist', train=False, download=True) 85 | X_tr = raw_tr.data 86 | Y_tr = raw_tr.targets 87 | X_te = raw_te.data 88 | Y_te = raw_te.targets 89 | return X_tr, Y_tr, X_te, Y_te 90 | 91 | def get_FashionMNIST(path): 92 | raw_tr = datasets.FashionMNIST(path + '/fashionmnist', train=True, download=True) 93 | raw_te = datasets.FashionMNIST(path + '/fashionmnist', train=False, download=True) 94 | X_tr = raw_tr.data 95 | Y_tr = raw_tr.targets 96 | X_te = raw_te.data 97 | Y_te = raw_te.targets 98 | return X_tr, Y_tr, X_te, Y_te 99 | 100 | def get_SVHN(path): 101 | data_tr = datasets.SVHN(path, split='train', download=True) 102 | data_te = datasets.SVHN(path, split='test', download=True) 103 | X_tr = data_tr.data 104 | Y_tr = torch.from_numpy(data_tr.labels) 105 | X_te = data_te.data 106 | Y_te = torch.from_numpy(data_te.labels) 107 | return X_tr, Y_tr, X_te, Y_te 108 | 109 | def get_CIFAR10(path): 110 | data_tr = datasets.CIFAR10(path + '/cifar10', train=True, download=True) 111 | data_te = datasets.CIFAR10(path + '/cifar10', train=False, download=True) 112 | X_tr = data_tr.data 113 | # print(np.array(X_tr[0]).shape) 114 | Y_tr = torch.from_numpy(np.array(data_tr.targets)) 115 | X_te = data_te.data 116 | Y_te = torch.from_numpy(np.array(data_te.targets)) 117 | return X_tr, Y_tr, X_te, Y_te 118 | 119 | def get_CIFAR100(path): 120 | data_tr = datasets.CIFAR100(path + '/cifar100', train=True, download=True) 121 | data_te = datasets.CIFAR100(path + '/cifar100', train=False, download=True) 122 | X_tr = data_tr.data 123 | Y_tr = torch.from_numpy(np.array(data_tr.targets)) 124 | X_te = data_te.data 125 | Y_te = torch.from_numpy(np.array(data_te.targets)) 126 | return X_tr, Y_tr, X_te, Y_te 127 | 128 | def get_GTSRB(path): 129 | train_dir = os.path.join(path, 'gtsrb/train') 130 | test_dir = os.path.join(path, 'gtsrb/test') 131 | train_data = torchvision.datasets.ImageFolder(train_dir) 132 | test_data = torchvision.datasets.ImageFolder(test_dir) 133 | X_tr = np.array([np.asarray(datasets.folder.default_loader(s[0])) for s in train_data.samples]) 134 | Y_tr = torch.from_numpy(np.array(train_data.targets)) 135 | X_te = np.array([np.asarray(datasets.folder.default_loader(s[0])) for s in test_data.samples]) 136 | Y_te = torch.from_numpy(np.array(test_data.targets)) 137 | 138 | return X_tr, Y_tr, X_te, Y_te 139 | 140 | 141 | def get_handler(name): 142 | if name.lower() == 'mnist': 143 | return DataHandler1 144 | elif name.lower() == 'fashionmnist': 145 | return DataHandler1 146 | elif name.lower() == 'svhn': 147 | return DataHandler2 148 | elif name.lower() == 'cifar10': 149 | return DataHandler3 150 | elif name.lower() == 'cifar100': 151 | return DataHandler3 152 | elif name.lower() == 'gtsrb': 153 | return DataHandler3 154 | elif name.lower() == 'tinyimagenet': 155 | return DataHandler3 156 | else: 157 | return DataHandler4 158 | 159 | 160 | class DataHandler1(Dataset): 161 | def __init__(self, X, Y, transform=None): 162 | self.X = X 163 | self.Y = Y 164 | self.transform = transform 165 | 166 | def __getitem__(self, index): 167 | x, y = self.X[index], self.Y[index] 168 | if self.transform is not None: 169 | x = x.numpy() if not isinstance(x, np.ndarray) else x 170 | x = Image.fromarray(x, mode='L') 171 | x = self.transform(x) 172 | return x, y, index 173 | 174 | def __len__(self): 175 | return len(self.X) 176 | 177 | class DataHandler2(Dataset): 178 | def __init__(self, X, Y, transform=None): 179 | self.X = X 180 | self.Y = Y 181 | self.transform = transform 182 | 183 | def __getitem__(self, index): 184 | x, y = self.X[index], self.Y[index] 185 | if self.transform is not None: 186 | x = Image.fromarray(np.transpose(x, (1, 2, 0))) 187 | x = self.transform(x) 188 | return x, y, index 189 | 190 | def __len__(self): 191 | return len(self.X) 192 | 193 | class DataHandler3(Dataset): 194 | def __init__(self, X, Y, transform=None): 195 | self.X = X 196 | self.Y = Y 197 | self.transform = transform 198 | 199 | def __getitem__(self, index): 200 | x, y = self.X[index], self.Y[index] 201 | if self.transform is not None: 202 | x = Image.fromarray(x) 203 | x = self.transform(x) 204 | return x, y, index 205 | 206 | def __len__(self): 207 | return len(self.X) 208 | 209 | class DataHandler4(Dataset): 210 | def __init__(self, X, Y, transform=None): 211 | self.X = X 212 | self.Y = Y 213 | self.transform = transform 214 | 215 | def __getitem__(self, index): 216 | x, y = self.X[index], self.Y[index] 217 | return x, y, index 218 | 219 | def __len__(self): 220 | return len(self.X) 221 | 222 | 223 | # handler for waal 224 | def get_wa_handler(name): 225 | if name.lower() == 'fashionmnist': 226 | return Wa_datahandler1 227 | elif name.lower() == 'svhn': 228 | return Wa_datahandler2 229 | elif name.lower() == 'cifar10': 230 | return Wa_datahandler3 231 | elif name.lower() == 'cifar100': 232 | return Wa_datahandler3 233 | elif name.lower() == 'tinyimagenet': 234 | return Wa_datahandler3 235 | elif name.lower() == 'mnist': 236 | return Wa_datahandler1 237 | elif name.lower() == 'gtsrb': 238 | return Wa_datahandler3 239 | 240 | 241 | class Wa_datahandler1(Dataset): 242 | 243 | def __init__(self,X_1, Y_1, X_2, Y_2, transform = None): 244 | """ 245 | :param X_1: covariate from the first distribution 246 | :param Y_1: label from the first distribution 247 | :param X_2: 248 | :param Y_2: 249 | :param transform: 250 | """ 251 | self.X1 = X_1 252 | self.Y1 = Y_1 253 | self.X2 = X_2 254 | self.Y2 = Y_2 255 | self.transform = transform 256 | 257 | def __len__(self): 258 | 259 | # returning the minimum length of two data-sets 260 | 261 | return max(len(self.X1),len(self.X2)) 262 | 263 | def __getitem__(self, index): 264 | Len1 = len(self.Y1) 265 | Len2 = len(self.Y2) 266 | 267 | # checking the index in the range or not 268 | 269 | if index < Len1: 270 | x_1 = self.X1[index] 271 | y_1 = self.Y1[index] 272 | 273 | else: 274 | 275 | # rescaling the index to the range of Len1 276 | re_index = index % Len1 277 | 278 | x_1 = self.X1[re_index] 279 | y_1 = self.Y1[re_index] 280 | 281 | # checking second datasets 282 | if index < Len2: 283 | 284 | x_2 = self.X2[index] 285 | y_2 = self.Y2[index] 286 | 287 | else: 288 | # rescaling the index to the range of Len2 289 | re_index = index % Len2 290 | 291 | x_2 = self.X2[re_index] 292 | y_2 = self.Y2[re_index] 293 | 294 | if self.transform is not None: 295 | # print (x_1) 296 | x_1 = Image.fromarray(x_1, mode='L') 297 | x_1 = self.transform(x_1) 298 | 299 | x_2 = Image.fromarray(x_2, mode='L') 300 | x_2 = self.transform(x_2) 301 | 302 | return index,x_1,y_1,x_2,y_2 303 | 304 | 305 | 306 | class Wa_datahandler2(Dataset): 307 | 308 | def __init__(self,X_1, Y_1, X_2, Y_2, transform = None): 309 | """ 310 | :param X_1: covariate from the first distribution 311 | :param Y_1: label from the first distribution 312 | :param X_2: 313 | :param Y_2: 314 | :param transform: 315 | """ 316 | self.X1 = X_1 317 | self.Y1 = Y_1 318 | self.X2 = X_2 319 | self.Y2 = Y_2 320 | self.transform = transform 321 | 322 | def __len__(self): 323 | 324 | # returning the minimum length of two data-sets 325 | 326 | return max(len(self.X1),len(self.X2)) 327 | 328 | def __getitem__(self, index): 329 | Len1 = len(self.Y1) 330 | Len2 = len(self.Y2) 331 | 332 | # checking the index in the range or not 333 | 334 | if index < Len1: 335 | x_1 = self.X1[index] 336 | y_1 = self.Y1[index] 337 | 338 | else: 339 | 340 | # rescaling the index to the range of Len1 341 | re_index = index % Len1 342 | 343 | x_1 = self.X1[re_index] 344 | y_1 = self.Y1[re_index] 345 | 346 | # checking second datasets 347 | if index < Len2: 348 | 349 | x_2 = self.X2[index] 350 | y_2 = self.Y2[index] 351 | 352 | else: 353 | # rescaling the index to the range of Len2 354 | re_index = index % Len2 355 | 356 | x_2 = self.X2[re_index] 357 | y_2 = self.Y2[re_index] 358 | 359 | if self.transform is not None: 360 | 361 | x_1 = Image.fromarray(np.transpose(x_1, (1, 2, 0))) 362 | x_1 = self.transform(x_1) 363 | 364 | x_2 = Image.fromarray(np.transpose(x_2, (1, 2, 0))) 365 | x_2 = self.transform(x_2) 366 | 367 | return index,x_1,y_1,x_2,y_2 368 | 369 | 370 | class Wa_datahandler3(Dataset): 371 | 372 | def __init__(self,X_1, Y_1, X_2, Y_2, transform = None): 373 | """ 374 | :param X_1: covariate from the first distribution 375 | :param Y_1: label from the first distribution 376 | :param X_2: 377 | :param Y_2: 378 | :param transform: 379 | """ 380 | self.X1 = X_1 381 | self.Y1 = Y_1 382 | self.X2 = X_2 383 | self.Y2 = Y_2 384 | self.transform = transform 385 | 386 | def __len__(self): 387 | 388 | # returning the minimum length of two data-sets 389 | 390 | return max(len(self.X1),len(self.X2)) 391 | 392 | def __getitem__(self, index): 393 | Len1 = len(self.Y1) 394 | Len2 = len(self.Y2) 395 | 396 | # checking the index in the range or not 397 | 398 | if index < Len1: 399 | x_1 = self.X1[index] 400 | y_1 = self.Y1[index] 401 | 402 | else: 403 | 404 | # rescaling the index to the range of Len1 405 | re_index = index % Len1 406 | 407 | x_1 = self.X1[re_index] 408 | y_1 = self.Y1[re_index] 409 | 410 | # checking second datasets 411 | if index < Len2: 412 | 413 | x_2 = self.X2[index] 414 | y_2 = self.Y2[index] 415 | 416 | else: 417 | # rescaling the index to the range of Len2 418 | re_index = index % Len2 419 | 420 | x_2 = self.X2[re_index] 421 | y_2 = self.Y2[re_index] 422 | 423 | if self.transform is not None: 424 | 425 | x_1 = Image.fromarray(x_1) 426 | x_1 = self.transform(x_1) 427 | 428 | x_2 = Image.fromarray(x_2) 429 | x_2 = self.transform(x_2) 430 | 431 | return index,x_1,y_1,x_2,y_2 432 | 433 | # get_CIFAR10('./dataset') 434 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | from .vgg import VGG 2 | from .resnet import ResNet18, ResNet34 3 | from .resnet64 import ResNet18_64 4 | from .wa_model import get_wa_net 5 | from .lossnet import LossNet 6 | from .linear import LeNet 7 | from .mobilenet import MobileNet 8 | from .shufflenet import ShuffleNet 9 | from .wideresnet import WideResNet 10 | -------------------------------------------------------------------------------- /models/gcn.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.init as init 5 | import torch.nn.functional as F 6 | from torch.nn.parameter import Parameter 7 | from torch.nn.modules.module import Module 8 | 9 | class GraphConvolution(Module): 10 | """ 11 | Simple GCN layer, similar to https://arxiv.org/abs/1609.02907 12 | """ 13 | 14 | def __init__(self, in_features, out_features, bias=True): 15 | super(GraphConvolution, self).__init__() 16 | self.in_features = in_features 17 | self.out_features = out_features 18 | self.weight = Parameter(torch.FloatTensor(in_features, out_features)) 19 | if bias: 20 | self.bias = Parameter(torch.FloatTensor(out_features)) 21 | else: 22 | self.register_parameter('bias', None) 23 | self.reset_parameters() 24 | 25 | def reset_parameters(self): 26 | stdv = 1. / math.sqrt(self.weight.size(1)) 27 | self.weight.data.uniform_(-stdv, stdv) 28 | if self.bias is not None: 29 | self.bias.data.uniform_(-stdv, stdv) 30 | 31 | def forward(self, input, adj): 32 | support = torch.mm(input, self.weight) 33 | output = torch.spmm(adj, support) 34 | if self.bias is not None: 35 | return output + self.bias 36 | else: 37 | return output 38 | 39 | def __repr__(self): 40 | return self.__class__.__name__ + ' (' \ 41 | + str(self.in_features) + ' -> ' \ 42 | + str(self.out_features) + ')' 43 | 44 | 45 | class GCN(nn.Module): 46 | def __init__(self, nfeat, nhid, nclass, dropout): 47 | """r 48 | nfeat: input feature dimension 49 | nhid: the hidden layer dimension 50 | nclass: the output dimension 51 | """ 52 | super(GCN, self).__init__() 53 | 54 | self.gc1 = GraphConvolution(nfeat, nhid) 55 | self.gc2 = GraphConvolution(nhid, nhid) 56 | self.gc3 = GraphConvolution(nhid, nclass) 57 | self.dropout = dropout 58 | self.linear = nn.Linear(nclass, 1) 59 | 60 | def forward(self, x, adj): 61 | x = F.relu(self.gc1(x, adj)) 62 | feat = F.dropout(x, self.dropout, training=self.training) 63 | x = self.gc3(feat, adj) 64 | #x = self.linear(x) 65 | # x = F.softmax(x, dim=1) 66 | return torch.sigmoid(x), feat, torch.cat((feat,x),1) 67 | -------------------------------------------------------------------------------- /models/linear.py: -------------------------------------------------------------------------------- 1 | # linear model and mlp class 2 | from torch import nn 3 | import numpy as np 4 | import torch.nn.functional as F 5 | import torch 6 | 7 | 8 | class linMod(nn.Module): 9 | def __init__(self, dim, n_class): 10 | super(linMod, self).__init__() 11 | self.dim = dim 12 | self.lm = nn.Linear(int(np.prod(dim)), n_class) 13 | def forward(self, x): 14 | x = x.view(-1, int(np.prod(self.dim))) 15 | out = self.lm(x) 16 | return out, x 17 | def get_embedding_dim(self): 18 | return int(np.prod(self.dim)) 19 | 20 | # mlp model class 21 | class mlpMod(nn.Module): 22 | def __init__(self, dim, n_class, embSize=256): 23 | super(mlpMod, self).__init__() 24 | self.embSize = embSize 25 | self.dim = int(np.prod(dim)) 26 | self.lm1 = nn.Linear(self.dim, embSize) 27 | self.lm2 = nn.Linear(embSize, n_class) 28 | def forward(self, x): 29 | x = x.view(-1, self.dim) 30 | emb = F.relu(self.lm1(x)) 31 | out = self.lm2(emb) 32 | return out, emb 33 | def get_embedding_dim(self): 34 | return self.embSize 35 | 36 | 37 | class Net1_fea(nn.Module): 38 | """ 39 | Feature extractor network 40 | """ 41 | 42 | def __init__(self): 43 | super(Net1_fea, self).__init__() 44 | self.conv1 = nn.Conv2d(1, 10, kernel_size=5) 45 | self.conv2 = nn.Conv2d(10, 20, kernel_size=5) 46 | self.conv2_drop = nn.Dropout2d() 47 | 48 | def forward(self,x): 49 | 50 | x1 = F.relu(F.max_pool2d(self.conv1(x), 2)) 51 | x2 = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x1)), 2)) 52 | # print (x.shape) 53 | x = x2.view(x2.shape[0], 320) 54 | 55 | return x, [x1, x2] 56 | 57 | class Net1_clf(nn.Module): 58 | """ 59 | Classifier network, also give the latent space and embedding dimension 60 | """ 61 | def __init__(self, n_class): 62 | super(Net1_clf,self).__init__() 63 | self.fc1 = nn.Linear(320, 50) 64 | self.fc2 = nn.Linear(50, n_class) 65 | 66 | def forward(self,x): 67 | 68 | e1 = F.relu(self.fc1(x)) 69 | x = F.dropout(e1, training=self.training) 70 | x = self.fc2(x) 71 | 72 | return x, e1 73 | 74 | def get_embedding_dim(self): 75 | return 50 76 | 77 | 78 | class Net1_dis(nn.Module): 79 | 80 | """ 81 | Discriminator network, output with [0,1] (sigmoid function) 82 | """ 83 | def __init__(self): 84 | super(Net1_dis,self).__init__() 85 | self.fc1 = nn.Linear(320, 50) 86 | self.fc2 = nn.Linear(50, 1) 87 | 88 | def forward(self,x): 89 | e1 = F.relu(self.fc1(x)) 90 | x = F.dropout(e1, training=self.training) 91 | x = self.fc2(x) 92 | x = torch.sigmoid(x) 93 | 94 | return x 95 | 96 | # net 97 | class LeNet(nn.Module): 98 | def __init__(self, n_class=10, bayesian=False): 99 | super(LeNet, self).__init__() 100 | self.feature_extractor = Net1_fea() 101 | self.linear = Net1_clf(n_class) 102 | self.discriminator = Net1_dis() 103 | self.bayesian = bayesian 104 | 105 | def forward(self, x, intermediate=False): 106 | x, in_values = self.feature_extractor(x) 107 | x = F.dropout(x, p=0.2, training=self.bayesian) 108 | x, e1 = self.linear(x) 109 | 110 | if intermediate == True: 111 | return x, e1, in_values 112 | else: 113 | return x, e1 114 | 115 | def get_embedding_dim(self): 116 | return 50 117 | -------------------------------------------------------------------------------- /models/lossnet.py: -------------------------------------------------------------------------------- 1 | '''Loss Prediction Module in PyTorch. 2 | Reference: 3 | [Yoo et al. 2019] Learning Loss for Active Learning (https://arxiv.org/abs/1905.03677) 4 | ''' 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | # https://github.com/Mephisto405/Learning-Loss-for-Active-Learning 10 | class LossNet(nn.Module): 11 | def __init__(self, feature_sizes=[32, 16, 8, 4], 12 | num_channels=[64, 128, 256, 512], interm_dim=128): 13 | super(LossNet, self).__init__() 14 | 15 | self.GAP1 = nn.AvgPool2d(feature_sizes[0]) 16 | self.GAP2 = nn.AvgPool2d(feature_sizes[1]) 17 | self.GAP3 = nn.AvgPool2d(feature_sizes[2]) 18 | self.GAP4 = nn.AvgPool2d(feature_sizes[3]) 19 | 20 | self.FC1 = nn.Linear(num_channels[0], interm_dim) 21 | self.FC2 = nn.Linear(num_channels[1], interm_dim) 22 | self.FC3 = nn.Linear(num_channels[2], interm_dim) 23 | self.FC4 = nn.Linear(num_channels[3], interm_dim) 24 | 25 | self.linear = nn.Linear(4 * interm_dim, 1) 26 | 27 | def forward(self, features,intermediate=False): 28 | out1 = self.GAP1(features[0]) 29 | out1 = out1.view(out1.size(0), -1) 30 | out1 = F.relu(self.FC1(out1)) 31 | 32 | out2 = self.GAP2(features[1]) 33 | out2 = out2.view(out2.size(0), -1) 34 | out2 = F.relu(self.FC2(out2)) 35 | 36 | out3 = self.GAP3(features[2]) 37 | out3 = out3.view(out3.size(0), -1) 38 | out3 = F.relu(self.FC3(out3)) 39 | 40 | out4 = self.GAP4(features[3]) 41 | out4 = out4.view(out4.size(0), -1) 42 | out4 = F.relu(self.FC4(out4)) 43 | 44 | out = self.linear(torch.cat((out1, out2, out3, out4), 1)) 45 | if intermediate: 46 | return out,torch.cat((out1, out2, out3, out4),1),[out1, out2, out3, out4] 47 | else: 48 | return out,torch.cat((out1, out2, out3, out4),1) 49 | -------------------------------------------------------------------------------- /models/mobilenet.py: -------------------------------------------------------------------------------- 1 | '''MobileNet in PyTorch. 2 | 3 | See the paper "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications" 4 | for more details. 5 | ''' 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | 11 | class Block(nn.Module): 12 | '''Depthwise conv + Pointwise conv''' 13 | def __init__(self, in_planes, out_planes, stride=1): 14 | super(Block, self).__init__() 15 | self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False) 16 | self.bn1 = nn.BatchNorm2d(in_planes) 17 | self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) 18 | self.bn2 = nn.BatchNorm2d(out_planes) 19 | 20 | def forward(self, x): 21 | out = F.relu(self.bn1(self.conv1(x))) 22 | out = F.relu(self.bn2(self.conv2(out))) 23 | return out 24 | 25 | 26 | class MobileNet(nn.Module): 27 | # (128,2) means conv planes=128, conv stride=2, by default conv stride=1 28 | cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024] 29 | 30 | def __init__(self, channels=3, num_classes=10, dropout=False): 31 | super(MobileNet, self).__init__() 32 | self.conv1 = nn.Conv2d(channels, 32, kernel_size=3, stride=1, padding=1, bias=False) 33 | self.bn1 = nn.BatchNorm2d(32) 34 | self.layers = self._make_layers(in_planes=32) 35 | self.linear = nn.Linear(1024, num_classes) 36 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 37 | self.dropout = dropout 38 | 39 | def _make_layers(self, in_planes): 40 | layers = [] 41 | for x in self.cfg: 42 | out_planes = x if isinstance(x, int) else x[0] 43 | stride = 1 if isinstance(x, int) else x[1] 44 | layers.append(Block(in_planes, out_planes, stride)) 45 | in_planes = out_planes 46 | return nn.Sequential(*layers) 47 | 48 | def forward(self, x,intermediate=False): 49 | out1 = F.relu(self.bn1(self.conv1(x))) 50 | out1 = self.layers(out1) 51 | out2 = F.avg_pool2d(out2, 2) 52 | out3 = self.avgpool(out2) 53 | out3 = out.view(out3.size(0), -1) 54 | # print (out.shape) 55 | if self.dropout: 56 | out = F.dropout(out3, p=0.2, training=True) 57 | out1 = self.linear(out) 58 | if intermediate: 59 | return out1, out,[out1,out2,out3] 60 | else: 61 | return out1, out 62 | 63 | 64 | def test(): 65 | net = MobileNet() 66 | x = torch.randn(1,3,32,32) 67 | y = net(x) 68 | print(y.size()) 69 | 70 | # test() 71 | -------------------------------------------------------------------------------- /models/resnet.py: -------------------------------------------------------------------------------- 1 | '''ResNet in PyTorch. 2 | 3 | For Pre-activation ResNet, see 'preact_resnet.py'. 4 | 5 | Reference: 6 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 7 | Deep Residual Learning for Image Recognition. arXiv:1512.03385 8 | ''' 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | import pdb 13 | from torch.autograd import Variable 14 | 15 | 16 | class BasicBlock(nn.Module): 17 | expansion = 1 18 | 19 | def __init__(self, in_planes, planes, stride=1): 20 | super(BasicBlock, self).__init__() 21 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 22 | self.bn1 = nn.BatchNorm2d(planes) 23 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 24 | self.bn2 = nn.BatchNorm2d(planes) 25 | 26 | self.shortcut = nn.Sequential() 27 | if stride != 1 or in_planes != self.expansion*planes: 28 | self.shortcut = nn.Sequential( 29 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 30 | nn.BatchNorm2d(self.expansion*planes) 31 | ) 32 | 33 | def forward(self, x): 34 | out = F.relu(self.bn1(self.conv1(x))) 35 | out = self.bn2(self.conv2(out)) 36 | out += self.shortcut(x) 37 | out = F.relu(out) 38 | return out 39 | 40 | class Bottleneck(nn.Module): 41 | expansion = 4 42 | 43 | def __init__(self, in_planes, planes, stride=1): 44 | super(Bottleneck, self).__init__() 45 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 46 | self.bn1 = nn.BatchNorm2d(planes) 47 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 48 | self.bn2 = nn.BatchNorm2d(planes) 49 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) 50 | self.bn3 = nn.BatchNorm2d(self.expansion*planes) 51 | 52 | self.shortcut = nn.Sequential() 53 | if stride != 1 or in_planes != self.expansion*planes: 54 | self.shortcut = nn.Sequential( 55 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 56 | nn.BatchNorm2d(self.expansion*planes) 57 | ) 58 | 59 | def forward(self, x): 60 | out = F.relu(self.bn1(self.conv1(x))) 61 | out = F.relu(self.bn2(self.conv2(out))) 62 | out = self.bn3(self.conv3(out)) 63 | out += self.shortcut(x) 64 | out = F.relu(out) 65 | return out 66 | 67 | 68 | class resnet_fea(nn.Module): 69 | def __init__(self, block, num_blocks): 70 | super(resnet_fea, self).__init__() 71 | self.in_planes = 16 72 | 73 | self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False) 74 | self.bn1 = nn.BatchNorm2d(16) 75 | self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1) 76 | self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2) 77 | self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2) 78 | self.layer4 = self._make_layer(block, 128, num_blocks[3], stride=2) 79 | 80 | def _make_layer(self, block, planes, num_blocks, stride): 81 | strides = [stride] + [1]*(num_blocks-1) 82 | layers = [] 83 | for stride in strides: 84 | layers.append(block(self.in_planes, planes, stride)) 85 | self.in_planes = planes * block.expansion 86 | return nn.Sequential(*layers) 87 | 88 | def forward(self, x, img_size, intermediate=False): 89 | out = F.relu(self.bn1(self.conv1(x))) 90 | out1 = self.layer1(out) 91 | out2 = self.layer2(out1) 92 | out3 = self.layer3(out2) 93 | out4 = self.layer4(out3) 94 | avg_pool_size = img_size//8 95 | out = F.avg_pool2d(out4, avg_pool_size) 96 | out = out.view(out.size(0), -1) 97 | return out, [out1, out2, out3, out4] 98 | 99 | class resnet_clf(nn.Module): 100 | def __init__(self, block, n_class=10): 101 | super(resnet_clf, self).__init__() 102 | self.linear = nn.Linear(128 * block.expansion, n_class) 103 | 104 | def forward(self, x): 105 | # emb = x.view(x.size(0), -1) 106 | out = self.linear(x) 107 | return out, x 108 | 109 | class resnet_dis(nn.Module): 110 | def __init__(self, embDim): 111 | super(resnet_dis, self).__init__() 112 | self.dis_fc1 = nn.Linear(embDim, 50) 113 | self.dis_fc2 = nn.Linear(50, 1) 114 | 115 | def forward(self, x): 116 | e1 = F.relu(self.dis_fc1(x)) 117 | x = F.dropout(e1, training=self.training) 118 | x = self.dis_fc2(x) 119 | x = torch.sigmoid(x) 120 | return x 121 | 122 | class ResNet(nn.Module): 123 | def __init__(self, block, num_blocks, n_class=10, bayesian=False): 124 | super(ResNet, self).__init__() 125 | # self.in_planes = 16 126 | self.embDim = 128 * block.expansion 127 | # self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False) 128 | # self.bn1 = nn.BatchNorm2d(16) 129 | # self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1) 130 | # self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2) 131 | # self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2) 132 | # self.layer4 = self._make_layer(block, 128, num_blocks[3], stride=2) 133 | # self.linear = nn.Linear(128 * block.expansion, n_class) 134 | 135 | # self.dis_fc1 = nn.Linear(512, 50) 136 | # self.dis_fc2 = nn.Linear(50, 1) 137 | 138 | self.feature_extractor = resnet_fea(block, num_blocks) 139 | self.linear = resnet_clf(block, n_class) 140 | self.discriminator = resnet_dis(self.embDim) 141 | self.bayesian = bayesian 142 | 143 | # def _make_layer(self, block, planes, num_blocks, stride): 144 | # strides = [stride] + [1]*(num_blocks-1) 145 | # layers = [] 146 | # for stride in strides: 147 | # layers.append(block(self.in_planes, planes, stride)) 148 | # self.in_planes = planes * block.expansion 149 | # return nn.Sequential(*layers) 150 | 151 | # def feature_extractor(self, x): # feature extractor 152 | # out = F.relu(self.bn1(self.conv1(x))) 153 | # out = self.layer1(out) 154 | # out = self.layer2(out) 155 | # out = self.layer3(out) 156 | # out = self.layer4(out) 157 | # out = F.avg_pool2d(out, 4) 158 | # emb = out.view(out.size(0), -1) 159 | # return emb 160 | 161 | 162 | def forward(self, x, intermediate=False): 163 | out, in_values = self.feature_extractor(x, x.shape[2]) 164 | # apply dropout to approximate the bayesian networks 165 | out = F.dropout(out, p=0.2, training=self.bayesian) 166 | # emb = emb.view(emb.size(0), -1) 167 | out, emb = self.linear(out) 168 | if intermediate == True: 169 | return out, emb, in_values 170 | else: 171 | return out, emb 172 | 173 | def get_embedding_dim(self): 174 | return self.embDim 175 | 176 | 177 | def ResNet18(n_class, bayesian=False): 178 | return ResNet(BasicBlock, [2,2,2,2], n_class=n_class, bayesian=bayesian) 179 | 180 | def ResNet34(n_class, bayesian=False): 181 | return ResNet(BasicBlock, [3,4,6,3], n_class=n_class, bayesian=bayesian) 182 | 183 | def ResNet50(n_class, bayesian=False): 184 | return ResNet(Bottleneck, [3,4,6,3], n_class=n_class, bayesian=bayesian) 185 | 186 | def ResNet101(n_class, bayesian=False): 187 | return ResNet(Bottleneck, [3,4,23,3], n_class=n_class, bayesian=bayesian) 188 | 189 | def ResNet152(n_class, bayesian=False): 190 | return ResNet(Bottleneck, [3,8,36,3], n_class=n_class, bayesian=bayesian) 191 | 192 | def test(): 193 | net = ResNet18() 194 | y = net(Variable(torch.randn(1,3,32,32))) 195 | print(y.size()) 196 | 197 | # test() 198 | -------------------------------------------------------------------------------- /models/resnet64.py: -------------------------------------------------------------------------------- 1 | '''ResNet in PyTorch. 2 | 3 | For Pre-activation ResNet, see 'preact_resnet.py'. 4 | 5 | Reference: 6 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 7 | Deep Residual Learning for Image Recognition. arXiv:1512.03385 8 | ''' 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | import pdb 13 | from torch.autograd import Variable 14 | 15 | 16 | class BasicBlock(nn.Module): 17 | expansion = 1 18 | 19 | def __init__(self, in_planes, planes, stride=1): 20 | super(BasicBlock, self).__init__() 21 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 22 | self.bn1 = nn.BatchNorm2d(planes) 23 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 24 | self.bn2 = nn.BatchNorm2d(planes) 25 | 26 | self.shortcut = nn.Sequential() 27 | if stride != 1 or in_planes != self.expansion*planes: 28 | self.shortcut = nn.Sequential( 29 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 30 | nn.BatchNorm2d(self.expansion*planes) 31 | ) 32 | 33 | def forward(self, x): 34 | out = F.relu(self.bn1(self.conv1(x))) 35 | out = self.bn2(self.conv2(out)) 36 | out += self.shortcut(x) 37 | out = F.relu(out) 38 | return out 39 | 40 | class Bottleneck(nn.Module): 41 | expansion = 4 42 | 43 | def __init__(self, in_planes, planes, stride=1): 44 | super(Bottleneck, self).__init__() 45 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 46 | self.bn1 = nn.BatchNorm2d(planes) 47 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 48 | self.bn2 = nn.BatchNorm2d(planes) 49 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) 50 | self.bn3 = nn.BatchNorm2d(self.expansion*planes) 51 | 52 | self.shortcut = nn.Sequential() 53 | if stride != 1 or in_planes != self.expansion*planes: 54 | self.shortcut = nn.Sequential( 55 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 56 | nn.BatchNorm2d(self.expansion*planes) 57 | ) 58 | 59 | def forward(self, x): 60 | out = F.relu(self.bn1(self.conv1(x))) 61 | out = F.relu(self.bn2(self.conv2(out))) 62 | out = self.bn3(self.conv3(out)) 63 | out += self.shortcut(x) 64 | out = F.relu(out) 65 | return out 66 | 67 | 68 | class resnet_fea(nn.Module): 69 | def __init__(self, block, num_blocks): 70 | super(resnet_fea, self).__init__() 71 | self.in_planes = 16 72 | 73 | self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False) 74 | self.bn1 = nn.BatchNorm2d(16) 75 | self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1) 76 | self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2) 77 | self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2) 78 | self.layer4 = self._make_layer(block, 128, num_blocks[3], stride=2) 79 | 80 | def _make_layer(self, block, planes, num_blocks, stride): 81 | strides = [stride] + [1]*(num_blocks-1) 82 | layers = [] 83 | for stride in strides: 84 | layers.append(block(self.in_planes, planes, stride)) 85 | self.in_planes = planes * block.expansion 86 | return nn.Sequential(*layers) 87 | 88 | def forward(self, x, img_size, intermediate=False): 89 | out = F.relu(self.bn1(self.conv1(x))) 90 | out1 = self.layer1(out) 91 | out2 = self.layer2(out1) 92 | out3 = self.layer3(out2) 93 | out4 = self.layer4(out3) 94 | out = F.avg_pool2d(out4, 4) 95 | out = out.view(out.size(0), -1) 96 | return out, [out1, out2, out3, out4] 97 | 98 | class resnet_clf(nn.Module): 99 | def __init__(self, block, n_class=10): 100 | super(resnet_clf, self).__init__() 101 | self.linear = nn.Linear(128 * block.expansion * 4, n_class) 102 | 103 | def forward(self, x): 104 | # emb = x.view(x.size(0), -1) 105 | out = self.linear(x) 106 | return out, x 107 | 108 | class resnet_dis(nn.Module): 109 | def __init__(self, embDim): 110 | super(resnet_dis, self).__init__() 111 | self.dis_fc1 = nn.Linear(embDim, 50) 112 | self.dis_fc2 = nn.Linear(50, 1) 113 | 114 | def forward(self, x): 115 | e1 = F.relu(self.dis_fc1(x)) 116 | x = F.dropout(e1, training=self.training) 117 | x = self.dis_fc2(x) 118 | x = torch.sigmoid(x) 119 | return x 120 | 121 | class ResNet(nn.Module): 122 | def __init__(self, block, num_blocks, n_class=10, bayesian=False): 123 | super(ResNet, self).__init__() 124 | # self.in_planes = 16 125 | self.embDim = 128 * block.expansion * 4 126 | # self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False) 127 | # self.bn1 = nn.BatchNorm2d(16) 128 | # self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1) 129 | # self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2) 130 | # self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2) 131 | # self.layer4 = self._make_layer(block, 128, num_blocks[3], stride=2) 132 | # self.linear = nn.Linear(128 * block.expansion, n_class) 133 | 134 | # self.dis_fc1 = nn.Linear(512, 50) 135 | # self.dis_fc2 = nn.Linear(50, 1) 136 | 137 | self.feature_extractor = resnet_fea(block, num_blocks) 138 | self.linear = resnet_clf(block, n_class) 139 | self.discriminator = resnet_dis(self.embDim) 140 | self.bayesian = bayesian 141 | 142 | # def _make_layer(self, block, planes, num_blocks, stride): 143 | # strides = [stride] + [1]*(num_blocks-1) 144 | # layers = [] 145 | # for stride in strides: 146 | # layers.append(block(self.in_planes, planes, stride)) 147 | # self.in_planes = planes * block.expansion 148 | # return nn.Sequential(*layers) 149 | 150 | # def feature_extractor(self, x): # feature extractor 151 | # out = F.relu(self.bn1(self.conv1(x))) 152 | # out = self.layer1(out) 153 | # out = self.layer2(out) 154 | # out = self.layer3(out) 155 | # out = self.layer4(out) 156 | # out = F.avg_pool2d(out, 4) 157 | # emb = out.view(out.size(0), -1) 158 | # return emb 159 | 160 | 161 | def forward(self, x, intermediate=False): 162 | out, in_values = self.feature_extractor(x, x.shape[2]) 163 | # apply dropout to approximate the bayesian networks 164 | out = F.dropout(out, p=0.2, training=self.bayesian) 165 | # emb = emb.view(emb.size(0), -1) 166 | out, emb = self.linear(out) 167 | if intermediate == True: 168 | return out, emb, in_values 169 | else: 170 | return out, emb 171 | 172 | def get_embedding_dim(self): 173 | return self.embDim 174 | 175 | 176 | def ResNet18_64(n_class, bayesian=False): 177 | return ResNet(BasicBlock, [2,2,2,2], n_class=n_class, bayesian=bayesian) 178 | 179 | def ResNet34_64(n_class, bayesian=False): 180 | return ResNet(BasicBlock, [3,4,6,3], n_class=n_class, bayesian=bayesian) 181 | 182 | def ResNet50_64(n_class, bayesian=False): 183 | return ResNet(Bottleneck, [3,4,6,3], n_class=n_class, bayesian=bayesian) 184 | 185 | def ResNet101_64(n_class, bayesian=False): 186 | return ResNet(Bottleneck, [3,4,23,3], n_class=n_class, bayesian=bayesian) 187 | 188 | def ResNet152_64(n_class, bayesian=False): 189 | return ResNet(Bottleneck, [3,8,36,3], n_class=n_class, bayesian=bayesian) 190 | 191 | def test(): 192 | net = ResNet18() 193 | y = net(Variable(torch.randn(1,3,32,32))) 194 | print(y.size()) 195 | 196 | # test() 197 | -------------------------------------------------------------------------------- /models/shufflenet.py: -------------------------------------------------------------------------------- 1 | '''ShuffleNet in PyTorch. 2 | 3 | See the paper "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" for more details. 4 | ''' 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | 10 | class ShuffleBlock(nn.Module): 11 | def __init__(self, groups): 12 | super(ShuffleBlock, self).__init__() 13 | self.groups = groups 14 | 15 | def forward(self, x): 16 | '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]''' 17 | N,C,H,W = x.size() 18 | g = self.groups 19 | return x.view(N,g, int(C/g),H,W).permute(0,2,1,3,4).contiguous().view(N,C,H,W) 20 | 21 | 22 | class Bottleneck(nn.Module): 23 | def __init__(self, in_planes, out_planes, stride, groups): 24 | super(Bottleneck, self).__init__() 25 | self.stride = stride 26 | 27 | mid_planes = int(out_planes/4) 28 | g = 1 if in_planes==24 else groups 29 | # print ("in plance and mid planes", in_planes, mid_planes) 30 | self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False) 31 | self.bn1 = nn.BatchNorm2d(mid_planes) 32 | self.shuffle1 = ShuffleBlock(groups=g) 33 | self.conv2 = nn.Conv2d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False) 34 | self.bn2 = nn.BatchNorm2d(mid_planes) 35 | self.conv3 = nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False) 36 | self.bn3 = nn.BatchNorm2d(out_planes) 37 | 38 | self.shortcut = nn.Sequential() 39 | if stride == 2: 40 | self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1)) 41 | 42 | def forward(self, x): 43 | out = F.relu(self.bn1(self.conv1(x))) 44 | out = self.shuffle1(out) 45 | out = F.relu(self.bn2(self.conv2(out))) 46 | out = self.bn3(self.conv3(out)) 47 | res = self.shortcut(x) 48 | out = F.relu(torch.cat([out,res], 1)) if self.stride==2 else F.relu(out+res) 49 | return out 50 | 51 | 52 | class ShuffleNet(nn.Module): 53 | def __init__(self, cfg, channels=3, num_classes=10, dropout=False): 54 | super(ShuffleNet, self).__init__() 55 | # print (cfg, channels, num_classes) 56 | out_planes = cfg['out_planes'] 57 | num_blocks = cfg['num_blocks'] 58 | groups = cfg['groups'] 59 | 60 | self.conv1 = nn.Conv2d(channels, 24, kernel_size=1, bias=False) 61 | self.bn1 = nn.BatchNorm2d(24) 62 | self.in_planes = 24 63 | self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups) 64 | self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups) 65 | self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups) 66 | self.linear = nn.Linear(out_planes[2], num_classes) 67 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 68 | self.dropout = dropout 69 | 70 | def _make_layer(self, out_planes, num_blocks, groups): 71 | layers = [] 72 | for i in range(num_blocks): 73 | stride = 2 if i == 0 else 1 74 | cat_planes = self.in_planes if i == 0 else 0 75 | layers.append(Bottleneck(self.in_planes, out_planes-cat_planes, stride=stride, groups=groups)) 76 | self.in_planes = out_planes 77 | return nn.Sequential(*layers) 78 | 79 | def forward(self, x, intermediate=False): 80 | out0 = F.relu(self.bn1(self.conv1(x))) 81 | out1 = self.layer1(out0) 82 | out2 = self.layer2(out1) 83 | out3 = self.layer3(out2) 84 | out = F.avg_pool2d(out3, 4) 85 | out = self.avgpool(out) 86 | e1 = out.view(out.size(0), -1) 87 | if self.dropout: 88 | e1 = F.dropout(e1, p=0.5, training=True) 89 | out = self.linear(e1) 90 | 91 | in_values = [out0, out1, out2, out3] 92 | if intermediate==True: 93 | return out, e1, in_values 94 | else: 95 | return out, e1 96 | 97 | 98 | def ShuffleNetG2(channels=3,num_classes=10, dropout=False): 99 | cfg = { 100 | 'out_planes': [200,400,800], 101 | 'num_blocks': [4,8,4], 102 | 'groups': 2 103 | } 104 | # print (cfg) 105 | # print (channels, num_classes) 106 | return ShuffleNet(channels=channels,num_classes=num_classes, dropout=dropout, cfg=cfg) 107 | 108 | def ShuffleNetG3(channels=3, num_classes=10): 109 | cfg = { 110 | 'out_planes': [240,480,960], 111 | 'num_blocks': [4,8,4], 112 | 'groups': 3 113 | } 114 | return ShuffleNet(channels=channels, num_classes=num_classes, cfg=cfg) 115 | 116 | 117 | def test(): 118 | net = ShuffleNetG2() 119 | x = torch.randn(1,3,32,32) 120 | y, _ = net(x) 121 | print(y) 122 | 123 | # test() 124 | -------------------------------------------------------------------------------- /models/vgg.py: -------------------------------------------------------------------------------- 1 | '''VGG11/13/16/19 in Pytorch.''' 2 | import torch 3 | import torch.nn as nn 4 | 5 | cfg = { 6 | 'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 7 | 'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 8 | 'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 9 | 'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], 10 | } 11 | 12 | 13 | class VGG(nn.Module): 14 | def __init__(self, vgg_name): 15 | super(VGG, self).__init__() 16 | self.features = self._make_layers(cfg[vgg_name]) 17 | self.classifier = nn.Linear(512, 10) 18 | 19 | def forward(self, x): 20 | out = self.features(x) 21 | emb = out.view(out.size(0), -1) 22 | out = self.classifier(emb) 23 | return out, emb 24 | 25 | def _make_layers(self, cfg): 26 | layers = [] 27 | in_channels = 3 28 | for x in cfg: 29 | if x == 'M': 30 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 31 | else: 32 | layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1), 33 | nn.BatchNorm2d(x), 34 | nn.ReLU(inplace=True)] 35 | in_channels = x 36 | layers += [nn.AvgPool2d(kernel_size=1, stride=1)] 37 | return nn.Sequential(*layers) 38 | 39 | def get_embedding_dim(self): 40 | return 512 41 | 42 | def test(): 43 | net = VGG('VGG11') 44 | x = torch.randn(2,3,32,32) 45 | y = net(x) 46 | print(y.size()) 47 | -------------------------------------------------------------------------------- /models/wa_model.py: -------------------------------------------------------------------------------- 1 | # WAAL net prepared for MNIST and Fashion_mnist 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch 5 | import torch.nn.init as init 6 | 7 | def get_wa_net(name): 8 | if name.lower() == 'fashionmnist': 9 | return Net1_fea, Net1_clf, Net1_dis 10 | # elif name.lower() == 'svhn': 11 | # return VGG_10_fea, VGG_10_clf, VGG_10_dis 12 | elif name.lower() == 'cifar10': 13 | return VGG_10_fea, VGG_10_clf, VGG_10_dis 14 | elif name.lower() == 'mnist': 15 | return Net1_fea, Net1_clf, Net1_dis 16 | 17 | 18 | 19 | # net_1 for Mnist and Fashion_mnist 20 | 21 | class Net1_fea(nn.Module): 22 | """ 23 | Feature extractor network 24 | """ 25 | 26 | def __init__(self): 27 | super(Net1_fea, self).__init__() 28 | self.conv1 = nn.Conv2d(1, 10, kernel_size=5) 29 | self.conv2 = nn.Conv2d(10, 20, kernel_size=5) 30 | self.conv2_drop = nn.Dropout2d() 31 | 32 | def forward(self,x): 33 | 34 | x = F.relu(F.max_pool2d(self.conv1(x), 2)) 35 | x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) 36 | # print (x.shape) 37 | x = x.view(x.shape[0], 320) 38 | 39 | return x 40 | 41 | class Net1_clf(nn.Module): 42 | """ 43 | Classifier network, also give the latent space and embedding dimension 44 | """ 45 | 46 | def __init__(self): 47 | super(Net1_clf,self).__init__() 48 | self.fc1 = nn.Linear(320, 50) 49 | self.fc2 = nn.Linear(50, 10) 50 | 51 | def forward(self,x): 52 | 53 | e1 = F.relu(self.fc1(x)) 54 | x = F.dropout(e1, training=self.training) 55 | x = self.fc2(x) 56 | 57 | return x, e1 58 | 59 | def get_embedding_dim(self): 60 | 61 | return 50 62 | 63 | class Net1_dis(nn.Module): 64 | 65 | """ 66 | Discriminator network, output with [0,1] (sigmoid function) 67 | """ 68 | def __init__(self): 69 | super(Net1_dis,self).__init__() 70 | self.fc1 = nn.Linear(320, 50) 71 | self.fc2 = nn.Linear(50, 1) 72 | 73 | def forward(self,x): 74 | e1 = F.relu(self.fc1(x)) 75 | x = F.dropout(e1, training=self.training) 76 | x = self.fc2(x) 77 | x = torch.sigmoid(x) 78 | 79 | return x 80 | 81 | 82 | 83 | 84 | # VGG_three parts 85 | cfg = { 86 | 'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 87 | 'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 88 | 'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 89 | 'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], 90 | } 91 | 92 | 93 | ## VGG for CIFAR 10/SVHN (since they are 32 * 32) 94 | 95 | class VGG_10_fea(nn.Module): 96 | 97 | def __init__(self): 98 | 99 | super(VGG_10_fea, self).__init__() 100 | # the vgg model can be changed to vgg11/vgg16 101 | # vgg 11 for svhn 102 | # vgg 16 for cifar 10 and cifar 100 103 | self.features = self._make_layers(cfg['VGG16']) 104 | 105 | def forward(self, x): 106 | out = self.features(x) 107 | out = out.view(out.size(0), -1) 108 | 109 | return out 110 | 111 | def _make_layers(self, cfg): 112 | 113 | layers = [] 114 | in_channels = 3 115 | for x in cfg: 116 | if x == 'M': 117 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 118 | else: 119 | layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1), 120 | nn.BatchNorm2d(x), 121 | nn.ReLU(inplace=True)] 122 | in_channels = x 123 | layers += [nn.AvgPool2d(kernel_size=1, stride=1)] 124 | return nn.Sequential(*layers) 125 | 126 | 127 | class VGG_10_clf(nn.Module): 128 | 129 | def __init__(self): 130 | 131 | super(VGG_10_clf, self).__init__() 132 | self.fc1 = nn.Linear(512,50) 133 | self.fc2 = nn.Linear(50,10) 134 | 135 | def forward(self,x): 136 | e1 = F.relu(self.fc1(x)) 137 | x = F.dropout(e1, training=self.training) 138 | x = self.fc2(x) 139 | 140 | return x, e1 141 | 142 | def get_embedding_dim(self): 143 | 144 | return 50 145 | 146 | 147 | class VGG_10_dis(nn.Module): 148 | 149 | def __init__(self): 150 | 151 | super(VGG_10_dis,self).__init__() 152 | self.fc1 = nn.Linear(512, 50) 153 | self.fc2 = nn.Linear(50, 1) 154 | 155 | def weight_init(self): 156 | for block in self._modules: 157 | for m in self._modules[block]: 158 | kaiming_init(m) 159 | 160 | def forward(self,x): 161 | 162 | e1 = F.relu(self.fc1(x)) 163 | x = F.dropout(e1, training=self.training) 164 | x = self.fc2(x) 165 | x = torch.sigmoid(x) 166 | 167 | return x 168 | 169 | 170 | def kaiming_init(m): 171 | if isinstance(m, (nn.Linear, nn.Conv2d)): 172 | init.kaiming_normal(m.weight) 173 | if m.bias is not None: 174 | m.bias.data.fill_(0) 175 | elif isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d)): 176 | m.weight.data.fill_(1) 177 | if m.bias is not None: 178 | m.bias.data.fill_(0) -------------------------------------------------------------------------------- /models/wideresnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.init as init 4 | import torch.nn.functional as F 5 | from torch.autograd import Variable 6 | 7 | import sys 8 | import numpy as np 9 | 10 | def conv3x3(in_planes, out_planes, stride=1): 11 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=True) 12 | 13 | def conv_init(m): 14 | classname = m.__class__.__name__ 15 | if classname.find('Conv') != -1: 16 | init.xavier_uniform_(m.weight, gain=np.sqrt(2)) 17 | init.constant_(m.bias, 0) 18 | elif classname.find('BatchNorm') != -1: 19 | init.constant_(m.weight, 1) 20 | init.constant_(m.bias, 0) 21 | 22 | class wide_basic(nn.Module): 23 | def __init__(self, in_planes, planes, dropout_rate, stride=1): 24 | super(wide_basic, self).__init__() 25 | self.bn1 = nn.BatchNorm2d(in_planes) 26 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, padding=1, bias=True) 27 | self.dropout = nn.Dropout(p=dropout_rate) 28 | self.bn2 = nn.BatchNorm2d(planes) 29 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=True) 30 | 31 | self.shortcut = nn.Sequential() 32 | if stride != 1 or in_planes != planes: 33 | self.shortcut = nn.Sequential( 34 | nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=True), 35 | ) 36 | 37 | def forward(self, x): 38 | out = self.dropout(self.conv1(F.relu(self.bn1(x)))) 39 | out = self.conv2(F.relu(self.bn2(out))) 40 | out += self.shortcut(x) 41 | 42 | return out 43 | 44 | class WideResNet(nn.Module): 45 | def __init__(self, depth=10, widen_factor=8, channels=3, dropout_rate=0.3, num_classes=10): 46 | super(WideResNet, self).__init__() 47 | self.in_planes = 16 48 | # dummy variable, to compatible with resnet dropout 49 | self.dropout = False 50 | 51 | assert ((depth-4)%6 ==0), 'Wide-resnet depth should be 6n+4' 52 | n = (depth-4)/6 53 | k = widen_factor 54 | 55 | print('| Wide-Resnet %dx%d' %(depth, k)) 56 | nStages = [16, 16*k, 32*k, 64*k] 57 | 58 | self.conv1 = conv3x3(channels,nStages[0]) 59 | self.layer1 = self._wide_layer(wide_basic, nStages[1], n, dropout_rate, stride=1) 60 | self.layer2 = self._wide_layer(wide_basic, nStages[2], n, dropout_rate, stride=2) 61 | self.layer3 = self._wide_layer(wide_basic, nStages[3], n, dropout_rate, stride=2) 62 | self.bn1 = nn.BatchNorm2d(nStages[3], momentum=0.9) 63 | self.linear = nn.Linear(nStages[3], num_classes) 64 | 65 | def _wide_layer(self, block, planes, num_blocks, dropout_rate, stride): 66 | strides = [stride] + [1]*(int(num_blocks)-1) 67 | layers = [] 68 | 69 | for stride in strides: 70 | layers.append(block(self.in_planes, planes, dropout_rate, stride)) 71 | self.in_planes = planes 72 | 73 | return nn.Sequential(*layers) 74 | 75 | def forward(self, x, intermediate=False): 76 | out_0 = self.conv1(x) 77 | out_1 = self.layer1(out_0) 78 | out_2 = self.layer2(out_1) 79 | out_3 = self.layer3(out_2) 80 | out = F.relu(self.bn1(out_3)) 81 | out = F.avg_pool2d(out, 8) 82 | e1 = out.view(out.size(0), -1) 83 | out = self.linear(e1) 84 | 85 | in_values = [out_0, out_1, out_2, out_3] 86 | if intermediate == True: 87 | return x, e1, in_values 88 | else: 89 | return x, e1 90 | 91 | 92 | if __name__ == '__main__': 93 | net=WideResNet(depth=16, widen_factor=8, channels=3, dropout_rate=0.3, num_classes=10) 94 | y = net(Variable(torch.randn(1,3,32,32))) 95 | print (net) 96 | 97 | # print(y[0].size()) -------------------------------------------------------------------------------- /query_strategies/__init__.py: -------------------------------------------------------------------------------- 1 | # Supervised learning + DAL 2 | from .random_sampling import RandomSampling 3 | from .least_confidence import LeastConfidence 4 | from .margin_sampling import MarginSampling 5 | from .entropy_sampling import EntropySampling 6 | from .least_confidence_dropout import LeastConfidenceDropout 7 | from .margin_sampling_dropout import MarginSamplingDropout 8 | from .entropy_sampling_dropout import EntropySamplingDropout 9 | from .kmeans_sampling import KMeansSampling 10 | from .kcenter_greedy import KCenterGreedy 11 | from .bayesian_active_learning_disagreement_dropout import BALDDropout 12 | from .core_set import CoreSet 13 | from .adversarial_bim import AdversarialBIM 14 | from .adversarial_deepfool import AdversarialDeepFool 15 | from .active_learning_by_learning import ActiveLearningByLearning 16 | from .badge_sampling import BadgeSampling 17 | from .baseline_sampling import BaselineSampling 18 | from .wasserstein_adversarial import WAAL 19 | from .learning_loss_for_al import LearningLoss 20 | from .vaal import VAAL 21 | from .batch_active_learning_at_scale import ClusterMarginSampling 22 | from .batch_BALD import BatchBALD 23 | from .uncertainGCN import uncertainGCN 24 | from .coreGCN import coreGCN 25 | from .mcadl import MCADL 26 | 27 | # Semi-supervise(mixmatch) + DAL 28 | from .ssl_rand import ssl_Random 29 | from .ssl_diff2augkmeans import ssl_Diff2AugKmeans 30 | from .ssl_diff2augdirect import ssl_Diff2AugDirect 31 | from .ssl_consistency import ssl_Consistency 32 | from .ssl_least_confidence import ssl_LC 33 | 34 | # Other augmentation/semi-supervise + random selection 35 | from .aug_uda import uda 36 | from .semi_fixmatch import fixmatch 37 | from .semi_flexmatch import flexmatch 38 | from .semi_pseudolabel import pseudolabel 39 | -------------------------------------------------------------------------------- /query_strategies/active_learning_by_learning.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .strategy import Strategy 3 | 4 | # Implementation of the paper: Active Learning by learning 5 | # Publised in AAAI'15, Natianal Twiwan University 6 | # https://www.csie.ntu.edu.tw/~htlin/paper/doc/aaai15albl.pdf 7 | # Main Idea: Choosing and blending different AL strategies under different scenarios. 8 | 9 | 10 | class ActiveLearningByLearning(Strategy): 11 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args, strategy_list, delta = 0.1): 12 | super(ActiveLearningByLearning, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 13 | self.strategy_list = strategy_list 14 | self.n_strategy = len(self.strategy_list) 15 | self.delta = delta 16 | self.w = np.ones((self.n_strategy, )) 17 | self.pmin = 1.0 / (self.n_strategy * 10.0) 18 | self.start = True 19 | self.aw = np.zeros((len(Y))) 20 | self.aw[self.idxs_lb] = 1.0 21 | 22 | def query(self, n): 23 | if not self.start: 24 | idxs_labeled = np.arange(self.n_pool)[self.idxs_lb] 25 | pred_probs = self.predict_prob(self.X[idxs_labeled], self.Y.numpy()[idxs_labeled]) 26 | pred_labels = pred_probs.max(1)[1] 27 | fn = (pred_labels.numpy() == self.Y.numpy()[idxs_labeled]).astype(float) 28 | reward = (fn / self.aw[self.idxs_lb]).mean() 29 | 30 | self.w[self.s_idx] *= np.exp(self.pmin/2.0 * (reward + 1.0 / self.last_p * np.sqrt(np.log(self.n_strategy / self.delta) / self.n_strategy))) 31 | 32 | self.start = False 33 | W = self.w.sum() 34 | p = (1.0 - self.n_strategy * self.pmin) * self.w / W + self.pmin 35 | 36 | for i, stgy in enumerate(self.strategy_list): 37 | print(' {} {}'.format(p[i], type(stgy).__name__)) 38 | 39 | self.s_idx = np.random.choice(np.arange(self.n_strategy), p=p) 40 | print(' select {}'.format(type(self.strategy_list[self.s_idx]).__name__)) 41 | self.strategy_list[self.s_idx].clf = self.clf 42 | q_idxs = self.strategy_list[self.s_idx].query(n) 43 | self.aw[q_idxs] = p[self.s_idx] 44 | self.last_p = p[self.s_idx] 45 | 46 | return q_idxs 47 | 48 | def update(self, idxs_lb): 49 | self.idxs_lb = idxs_lb 50 | for i, stgy in enumerate(self.strategy_list): 51 | stgy.update(idxs_lb) 52 | -------------------------------------------------------------------------------- /query_strategies/adversarial_bim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn.functional as F 4 | from .strategy import Strategy 5 | # from https://github.com/JordanAsh/badge 6 | 7 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 8 | 9 | class AdversarialBIM(Strategy): 10 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args, eps=0.05): 11 | super(AdversarialBIM, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 12 | self.eps = eps 13 | 14 | def cal_dis(self, x): 15 | # x = x.to(device=device) 16 | nx = torch.unsqueeze(x, 0) 17 | nx.requires_grad_() 18 | eta = torch.zeros(nx.shape) 19 | 20 | out, e1 = self.clf((nx+eta).to(self.device)) 21 | py = out.max(1)[1] 22 | ny = out.max(1)[1] 23 | cnt = 0 24 | while py.item() == ny.item(): 25 | cnt += 1 26 | loss = F.cross_entropy(out, ny) 27 | loss.backward() 28 | 29 | eta += self.eps * torch.sign(nx.grad.data) 30 | nx.grad.data.zero_() 31 | 32 | out, e1 = self.clf((nx+eta).to(self.device)) 33 | py = out.max(1)[1] 34 | 35 | if cnt % 10 == 0: 36 | print (cnt) 37 | 38 | if cnt == 100: 39 | break 40 | 41 | return (eta*eta).sum() 42 | 43 | def query(self, n): 44 | idxs_unlabeled = np.arange(self.n_pool)[~self.idxs_lb] 45 | 46 | # self.clf = self.clf.to('cpu') 47 | self.clf.eval() 48 | dis = np.zeros(idxs_unlabeled.shape) 49 | transform = self.args.transform_te 50 | data_pool = self.handler(self.X[idxs_unlabeled], self.Y[idxs_unlabeled], 51 | transform=transform) 52 | for i in range(len(idxs_unlabeled)): 53 | 54 | if i % 100 == 0: 55 | print('adv {}/{}'.format(i, len(idxs_unlabeled))) 56 | x, y, idx = data_pool[i] 57 | dis[i] = self.cal_dis(x) 58 | 59 | # self.clf = self.clf.to(self.device) 60 | 61 | return idxs_unlabeled[dis.argsort()[:n]] 62 | 63 | 64 | -------------------------------------------------------------------------------- /query_strategies/adversarial_deepfool.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn.functional as F 4 | from .strategy import Strategy 5 | from torch.autograd import Variable 6 | import pdb 7 | # from https://github.com/JordanAsh/badge 8 | 9 | class AdversarialDeepFool(Strategy): 10 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args, max_iter=50): 11 | super(AdversarialDeepFool, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 12 | self.max_iter = max_iter 13 | 14 | def cal_dis(self, x): 15 | nx = Variable(torch.unsqueeze(x, 0), requires_grad=True) 16 | eta = Variable(torch.zeros(nx.shape)) 17 | 18 | out, e1 = self.clf(nx + eta) 19 | n_class = out.shape[1] 20 | py = int(out.max(1)[1]) 21 | ny = int(out.max(1)[1]) 22 | 23 | i_iter = 0 24 | 25 | while py == ny and i_iter < self.max_iter: 26 | out[0, py].backward(retain_graph=True) 27 | grad_np = nx.grad.data.clone() 28 | value_l = np.inf 29 | ri = None 30 | 31 | for i in range(n_class): 32 | if i == py: 33 | continue 34 | 35 | nx.grad.data.zero_() 36 | out[0, i].backward(retain_graph=True) 37 | grad_i = nx.grad.data.clone() 38 | 39 | wi = grad_i - grad_np 40 | fi = out[0, i] - out[0, py] 41 | value_i = np.abs(float(fi)) / np.linalg.norm(wi.numpy().flatten()) 42 | 43 | if value_i < value_l: 44 | ri = value_i/np.linalg.norm(wi.numpy().flatten()) * wi 45 | 46 | eta += Variable(ri.clone()) 47 | nx.grad.data.zero_() 48 | out, e1 = self.clf(nx + eta) 49 | py = int(out.max(1)[1]) 50 | i_iter += 1 51 | 52 | return (eta*eta).sum() 53 | 54 | def query(self, n): 55 | idxs_unlabeled = np.arange(self.n_pool)[~self.idxs_lb] 56 | 57 | self.clf.cpu() 58 | self.clf.eval() 59 | dis = np.zeros(idxs_unlabeled.shape) 60 | transform = self.args.transform_te 61 | data_pool = self.handler(self.X[idxs_unlabeled], self.Y.numpy()[idxs_unlabeled], 62 | transform=transform) 63 | for i in range(len(idxs_unlabeled)): 64 | if i % 100 == 0: 65 | print('adv {}/{}'.format(i, len(idxs_unlabeled)), flush=True) 66 | x, y, idx = data_pool[i] 67 | dis[i] = self.cal_dis(x) 68 | 69 | self.clf.cuda() 70 | 71 | return idxs_unlabeled[dis.argsort()[:n]] 72 | 73 | 74 | -------------------------------------------------------------------------------- /query_strategies/augmentations.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Base augmentations operators.""" 16 | 17 | # Code from: https://github.com/google-research/augmix/blob/master/augmentations.py 18 | 19 | 20 | import numpy as np 21 | from PIL import Image, ImageOps, ImageEnhance 22 | 23 | # ImageNet code should change this value 24 | IMAGE_SIZE = 32 25 | 26 | 27 | def int_parameter(level, maxval): 28 | """Helper function to scale `val` between 0 and maxval . 29 | 30 | Args: 31 | level: Level of the operation that will be between [0, `PARAMETER_MAX`]. 32 | maxval: Maximum value that the operation can have. This will be scaled to 33 | level/PARAMETER_MAX. 34 | 35 | Returns: 36 | An int that results from scaling `maxval` according to `level`. 37 | """ 38 | return int(level * maxval / 10) 39 | 40 | 41 | def float_parameter(level, maxval): 42 | """Helper function to scale `val` between 0 and maxval. 43 | 44 | Args: 45 | level: Level of the operation that will be between [0, `PARAMETER_MAX`]. 46 | maxval: Maximum value that the operation can have. This will be scaled to 47 | level/PARAMETER_MAX. 48 | 49 | Returns: 50 | A float that results from scaling `maxval` according to `level`. 51 | """ 52 | return float(level) * maxval / 10. 53 | 54 | 55 | def sample_level(n): 56 | return np.random.uniform(low=0.1, high=n) 57 | 58 | 59 | def autocontrast(pil_img, _): 60 | return ImageOps.autocontrast(pil_img) 61 | 62 | 63 | def equalize(pil_img, _): 64 | return ImageOps.equalize(pil_img) 65 | 66 | 67 | def posterize(pil_img, level): 68 | level = int_parameter(sample_level(level), 4) 69 | return ImageOps.posterize(pil_img, 4 - level) 70 | 71 | 72 | def rotate(pil_img, level): 73 | degrees = int_parameter(sample_level(level), 30) 74 | if np.random.uniform() > 0.5: 75 | degrees = -degrees 76 | return pil_img.rotate(degrees, resample=Image.BILINEAR) 77 | 78 | 79 | def solarize(pil_img, level): 80 | level = int_parameter(sample_level(level), 256) 81 | return ImageOps.solarize(pil_img, 256 - level) 82 | 83 | 84 | def shear_x(pil_img, level): 85 | level = float_parameter(sample_level(level), 0.3) 86 | if np.random.uniform() > 0.5: 87 | level = -level 88 | return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), 89 | Image.AFFINE, (1, level, 0, 0, 1, 0), 90 | resample=Image.BILINEAR) 91 | 92 | 93 | def shear_y(pil_img, level): 94 | level = float_parameter(sample_level(level), 0.3) 95 | if np.random.uniform() > 0.5: 96 | level = -level 97 | return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), 98 | Image.AFFINE, (1, 0, 0, level, 1, 0), 99 | resample=Image.BILINEAR) 100 | 101 | 102 | def translate_x(pil_img, level): 103 | level = int_parameter(sample_level(level), IMAGE_SIZE / 3) 104 | if np.random.random() > 0.5: 105 | level = -level 106 | return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), 107 | Image.AFFINE, (1, 0, level, 0, 1, 0), 108 | resample=Image.BILINEAR) 109 | 110 | 111 | def translate_y(pil_img, level): 112 | level = int_parameter(sample_level(level), IMAGE_SIZE / 3) 113 | if np.random.random() > 0.5: 114 | level = -level 115 | return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), 116 | Image.AFFINE, (1, 0, 0, 0, 1, level), 117 | resample=Image.BILINEAR) 118 | 119 | 120 | # operation that overlaps with ImageNet-C's test set 121 | def color(pil_img, level): 122 | level = float_parameter(sample_level(level), 1.8) + 0.1 123 | return ImageEnhance.Color(pil_img).enhance(level) 124 | 125 | 126 | # operation that overlaps with ImageNet-C's test set 127 | def contrast(pil_img, level): 128 | level = float_parameter(sample_level(level), 1.8) + 0.1 129 | return ImageEnhance.Contrast(pil_img).enhance(level) 130 | 131 | 132 | # operation that overlaps with ImageNet-C's test set 133 | def brightness(pil_img, level): 134 | level = float_parameter(sample_level(level), 1.8) + 0.1 135 | return ImageEnhance.Brightness(pil_img).enhance(level) 136 | 137 | 138 | # operation that overlaps with ImageNet-C's test set 139 | def sharpness(pil_img, level): 140 | level = float_parameter(sample_level(level), 1.8) + 0.1 141 | return ImageEnhance.Sharpness(pil_img).enhance(level) 142 | 143 | 144 | augmentations = [ 145 | autocontrast, equalize, posterize, rotate, solarize, shear_x, shear_y, 146 | translate_x, translate_y 147 | ] 148 | 149 | augmentations_all = [ 150 | autocontrast, equalize, posterize, rotate, solarize, shear_x, shear_y, 151 | translate_x, translate_y, color, contrast, brightness, sharpness 152 | ] -------------------------------------------------------------------------------- /query_strategies/badge_sampling.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch.utils.data import DataLoader 3 | from .strategy import Strategy 4 | # import pickle 5 | from scipy.spatial.distance import cosine 6 | # import sys 7 | # import gc 8 | # from scipy.linalg import det 9 | # from scipy.linalg import pinv as inv 10 | from copy import copy as copy 11 | from copy import deepcopy as deepcopy 12 | # import torch 13 | # from torch import nn 14 | # import torchfile 15 | # from torch.autograd import Variable 16 | # import torch.optim as optim 17 | import pdb 18 | # from torch.nn import functional as F 19 | # import argparse 20 | # import torch.nn as nn 21 | # from collections import OrderedDict 22 | from scipy import stats 23 | # import time 24 | import numpy as np 25 | # import scipy.sparse as sp 26 | # from itertools import product 27 | # from sklearn.base import BaseEstimator, ClusterMixin, TransformerMixin 28 | # from sklearn.metrics.pairwise import euclidean_distances 29 | # from sklearn.metrics.pairwise import pairwise_distances_argmin_min 30 | # from sklearn.utils.extmath import row_norms, squared_norm, stable_cumsum 31 | # from sklearn.utils.sparsefuncs_fast import assign_rows_csr 32 | # from sklearn.utils.sparsefuncs import mean_variance_axis 33 | # from sklearn.utils.validation import _num_samples 34 | # from sklearn.utils import check_array 35 | # from sklearn.utils import gen_batches 36 | # from sklearn.utils import check_random_state 37 | # from sklearn.utils.validation import check_is_fitted 38 | # from sklearn.utils.validation import FLOAT_DTYPES 39 | # from sklearn.metrics.pairwise import rbf_kernel as rbf 40 | # from sklearn.externals.six import string_types 41 | # from sklearn.exceptions import ConvergenceWarning 42 | from sklearn.metrics import pairwise_distances 43 | # from https://github.com/JordanAsh/badge 44 | # kmeans ++ initialization 45 | 46 | def init_centers(X, K): 47 | ind = np.argmax([np.linalg.norm(s, 2) for s in X]) 48 | mu = [X[ind]] 49 | indsAll = [ind] 50 | centInds = [0.] * len(X) 51 | cent = 0 52 | print('#Samps\tTotal Distance') 53 | while len(mu) < K: 54 | if len(mu) == 1: 55 | D2 = pairwise_distances(X, mu).ravel().astype(float) 56 | else: 57 | newD = pairwise_distances(X, [mu[-1]]).ravel().astype(float) 58 | for i in range(len(X)): 59 | if D2[i] > newD[i]: 60 | centInds[i] = cent 61 | D2[i] = newD[i] 62 | print(str(len(mu)) + '\t' + str(sum(D2)), flush=True) 63 | if sum(D2) == 0.0: pdb.set_trace() 64 | D2 = D2.ravel().astype(float) 65 | Ddist = (D2 ** 2)/ sum(D2 ** 2) 66 | customDist = stats.rv_discrete(name='custm', values=(np.arange(len(D2)), Ddist)) 67 | ind = customDist.rvs(size=1)[0] 68 | mu.append(X[ind]) 69 | indsAll.append(ind) 70 | cent += 1 71 | gram = np.matmul(X[indsAll], X[indsAll].T) 72 | val, _ = np.linalg.eig(gram) 73 | val = np.abs(val) 74 | vgt = val[val > 1e-2] 75 | return indsAll 76 | 77 | class BadgeSampling(Strategy): 78 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args): 79 | super(BadgeSampling, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 80 | 81 | def query(self, n): 82 | idxs_unlabeled = np.arange(self.n_pool)[~self.idxs_lb] 83 | gradEmbedding = self.get_grad_embedding(self.X[idxs_unlabeled], self.Y.numpy()[idxs_unlabeled]).numpy() 84 | chosen = init_centers(gradEmbedding, n) 85 | return idxs_unlabeled[chosen] 86 | -------------------------------------------------------------------------------- /query_strategies/baseline_sampling.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch.utils.data import DataLoader 3 | from .strategy import Strategy 4 | import pickle 5 | from scipy.spatial.distance import cosine 6 | import sys 7 | import gc 8 | from scipy.linalg import det 9 | from scipy.linalg import pinv as inv 10 | from copy import copy as copy 11 | from copy import deepcopy as deepcopy 12 | import torch 13 | from torch import nn 14 | import torchfile 15 | from torch.autograd import Variable 16 | import torch.optim as optim 17 | import pdb 18 | from torch.nn import functional as F 19 | import argparse 20 | import torch.nn as nn 21 | from collections import OrderedDict 22 | from scipy import stats 23 | import time 24 | import numpy as np 25 | import scipy.sparse as sp 26 | from itertools import product 27 | import sklearn 28 | from sklearn.base import BaseEstimator, ClusterMixin, TransformerMixin 29 | from sklearn.metrics.pairwise import euclidean_distances 30 | from sklearn.metrics.pairwise import pairwise_distances_argmin_min 31 | from sklearn.utils.extmath import row_norms, squared_norm, stable_cumsum 32 | from sklearn.utils.sparsefuncs_fast import assign_rows_csr 33 | from sklearn.utils.sparsefuncs import mean_variance_axis 34 | from sklearn.utils.validation import _num_samples 35 | from sklearn.utils import check_array 36 | from sklearn.utils import gen_batches 37 | from sklearn.utils import check_random_state 38 | from sklearn.utils.validation import check_is_fitted 39 | from sklearn.utils.validation import FLOAT_DTYPES 40 | from sklearn.metrics.pairwise import rbf_kernel as rbf 41 | # from sklearn.externals.six import string_types 42 | from sklearn.exceptions import ConvergenceWarning 43 | 44 | def gram_red(L, L_inv, u_loc): 45 | n = np.shape(L_inv)[0] 46 | ms = np.array([False for i in range(n)]) 47 | ms[u_loc] = True 48 | 49 | L_red = L[~ms][:, ~ms] 50 | 51 | D = L_inv[~ms][:, ~ms] 52 | e = L_inv[~ms][:, ms] 53 | f = L_inv[ms][:, ms] 54 | 55 | L_red_inv = D - e.dot(e.T) / f 56 | return L_red, L_red_inv 57 | 58 | def gram_aug(L_Y, L_Y_inv, b_u, c_u): 59 | d_u = c_u - b_u.T.dot(L_Y_inv.dot(b_u)) 60 | g_u = L_Y_inv.dot(b_u) 61 | 62 | L_aug = np.block([[L_Y, b_u],[b_u.T, c_u]]) 63 | L_aug_inv = np.block([[L_Y_inv + g_u.dot(g_u.T/d_u), -g_u/d_u], [-g_u.T/d_u, 1.0/d_u]]) 64 | 65 | return L_aug, L_aug_inv 66 | 67 | def sample_k_imp(Phi, k, max_iter, rng=np.random): 68 | n = np.shape(Phi)[0] 69 | Ind = rng.choice(range(n), size=k, replace=False) 70 | 71 | if n == k: 72 | return Ind 73 | 74 | X = [False] * n 75 | for i in Ind: 76 | X[i] = True 77 | X = np.array(X) 78 | 79 | L_X = Phi[Ind, :].dot(Phi[Ind, :].T) 80 | 81 | L_X_inv = np.linalg.pinv(L_X) 82 | 83 | for i in range(1, max_iter): 84 | 85 | u = rng.choice(np.arange(n)[X]) 86 | v = rng.choice(np.arange(n)[~X]) 87 | 88 | for j in range(len(Ind)): 89 | if Ind[j] == u: 90 | u_loc = j 91 | 92 | L_Y, L_Y_inv = gram_red(L_X, L_X_inv, u_loc) 93 | 94 | Ind_red = [i for i in Ind if i != u] 95 | 96 | b_u = Phi[Ind_red, :].dot(Phi[[u], :].T) 97 | c_u = Phi[[u], :].dot(Phi[[u], :].T) 98 | b_v = Phi[Ind_red, :].dot(Phi[[v], :].T) 99 | c_v = Phi[[v], :].dot(Phi[[v], :].T) 100 | 101 | p = min(1, (c_v - b_v.T.dot(L_Y_inv.dot(b_v))) / (c_u - b_u.T.dot(L_Y_inv.dot(b_u))) ) 102 | 103 | if rng.uniform() <= p: 104 | X[u] = False 105 | X[v] = True 106 | Ind = Ind_red + [v] 107 | L_X, L_X_inv = gram_aug(L_Y, L_Y_inv, b_v, c_v) 108 | 109 | if i % k == 0: 110 | print('Iter ', i) 111 | 112 | return Ind 113 | 114 | class BaselineSampling(Strategy): 115 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args): 116 | super(BaselineSampling, self).__init__(X, Y,X_te, Y_te, idxs_lb, net, handler, args) 117 | 118 | def query(self, n): 119 | idxs_unlabeled = np.arange(self.n_pool)[~self.idxs_lb] 120 | gradEmbedding = self.get_grad_embedding(self.X[idxs_unlabeled], self.Y.numpy()[idxs_unlabeled]).numpy() 121 | chosen = sample_k_imp(gradEmbedding, n, max_iter= int(5 * n * np.log(n))) 122 | return idxs_unlabeled[chosen] 123 | -------------------------------------------------------------------------------- /query_strategies/batch_BALD.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .strategy import Strategy 3 | import dataclasses 4 | import typing 5 | from batchbald_redux.batchbald import get_batchbald_batch # pip install batchbald_redux 6 | from torch.utils.data import DataLoader 7 | import torch 8 | from torch.autograd import Variable 9 | import torch.nn.functional as F 10 | """" 11 | BatchBALD: Efficient and Diverse Batch Acquisition for Deep Bayesian Active Learning 12 | Reproduce through API 13 | """ 14 | 15 | @dataclasses.dataclass 16 | class AcquisitionBatch: 17 | indices: typing.List[int] 18 | scores: typing.List[float] 19 | orignal_scores: typing.Optional[typing.List[float]] 20 | 21 | class BatchBALD(Strategy): 22 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args): 23 | super(BatchBALD, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 24 | self.net = net 25 | self.args = args 26 | 27 | def compute_NKC(self, X,Y): 28 | loader_te = DataLoader(self.handler(X, Y, transform=self.args['transformTest']), 29 | shuffle=False, **self.args['loader_te_args']) 30 | K = 10 # MC 31 | self.clf.train() 32 | probs = torch.zeros([K, len(Y), len(np.unique(Y))]) 33 | with torch.no_grad(): 34 | for i in range(K): 35 | for x, y, idxs in loader_te: 36 | x, y = Variable(x.to(self.device)), Variable(y.to(self.device)) 37 | out, e1 = self.clf(x) 38 | 39 | probs[i][idxs] += F.softmax(out, dim=1).cpu().data 40 | 41 | return probs.permute(1,0,2) 42 | 43 | def query(self, n): 44 | idxs_unlabeled = np.arange(self.n_pool)[~self.idxs_lb] 45 | prob_NKC = self.compute_NKC(self.X[idxs_unlabeled], self.Y[idxs_unlabeled]) 46 | with torch.no_grad(): 47 | batch = get_batchbald_batch(prob_NKC, n, 10000000) # Don't know the meaning of the third argument 48 | return idxs_unlabeled[batch.indices] 49 | -------------------------------------------------------------------------------- /query_strategies/batch_active_learning_at_scale.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .strategy import Strategy 3 | import torch.nn.functional as F 4 | from torch.utils.data import DataLoader 5 | import torch 6 | from torch.autograd import Variable 7 | from sklearn.cluster import AgglomerativeClustering 8 | distance_threshold = 8 9 | # (ClusterMargin, 2021) Batch Active Learning at Scale 10 | # Original code is not open source. Reproduced by muxi 11 | 12 | class Cluster(): 13 | def __init__(self, init_points, cluster_id): 14 | self.points = init_points 15 | self.cluster_id = cluster_id 16 | self.merged = False 17 | 18 | def inverse(self, cluster_dict): 19 | for point in self.points: 20 | cluster_dict[str(point)] = self.cluster_id 21 | return cluster_dict 22 | 23 | def update_id(self, cluster_id): 24 | self.cluster_id = cluster_id 25 | 26 | def average_linkage(cluster_1, cluster_2): 27 | len_1 = len(cluster_1.points) 28 | len_2 = len(cluster_2.points) 29 | cluster_distance = 0 30 | for point_1 in cluster_1.points: 31 | for point_2 in cluster_2.points: 32 | cluster_distance += distance(point_1,point_2) 33 | cluster_distance/=(len_1*len_2) 34 | return cluster_distance 35 | 36 | shortcut = {} 37 | distance_dict = {} 38 | 39 | def distance(point_1,point_2): 40 | global distance_dict, shortcut 41 | p1 = shortcut[str(point_1)] 42 | p2 = shortcut[str(point_2)] 43 | if p2 not in distance_dict[p1].keys(): 44 | dist = round(np.linalg.norm(point_1 - point_2),10) 45 | distance_dict[p1][p2] = dist 46 | # distance_dict[p2][p1] = dist 47 | return distance_dict[p1][p2] 48 | 49 | def HAC(points_set): 50 | global distance_dict, shortcut 51 | cluster_list = [] 52 | for idex, point in enumerate(points_set): 53 | distance_dict[idex] = {} 54 | shortcut[str(point)] = idex 55 | cluster_list.append(Cluster([point,], idex)) 56 | update_flag = True 57 | round = 0 58 | 59 | while(update_flag) and len(cluster_list) > 5000: 60 | new_cluster_list = [] 61 | round += 1 62 | print("HAC round ", round, "Length ",len(cluster_list)) 63 | for index_1, cluster_1 in enumerate(cluster_list): 64 | for index_2, cluster_2 in enumerate(cluster_list): 65 | if index_2 <= index_1: 66 | continue 67 | if cluster_1.merged or cluster_2.merged: 68 | continue 69 | avg_link = average_linkage(cluster_1,cluster_2) 70 | if avg_link < distance_threshold: 71 | new_cluster_list.append(Cluster(cluster_1.points + cluster_2.points, cluster_1.cluster_id)) 72 | cluster_1.merged = True 73 | cluster_2.merged = True 74 | # print(avg_link) 75 | 76 | for cluster in cluster_list: 77 | if not cluster.merged: 78 | new_cluster_list.append(cluster) 79 | 80 | if len(cluster_list) == len(new_cluster_list): 81 | update_flag = False 82 | cluster_list= new_cluster_list 83 | for index, cluster in enumerate(cluster_list): 84 | cluster.update_id(index) 85 | cluster.merged = False 86 | # print(len(new_cluster_list)) 87 | cluster_dict = {} 88 | for index, cluster in enumerate(cluster_list): 89 | cluster_dict = cluster.inverse(cluster_dict) 90 | # print(cluster.points) 91 | return cluster_dict 92 | 93 | class ClusterMarginSampling(Strategy): 94 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args): 95 | super(ClusterMarginSampling, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 96 | self.one_sample_step = True 97 | 98 | def prepare_emb(self): 99 | loader_te = DataLoader(self.handler(self.X, self.Y, transform=self.args.transform_te), shuffle=False, **self.args.loader_te_args) 100 | self.clf.eval() 101 | create = True 102 | with torch.no_grad(): 103 | for x, y, idxs in loader_te: 104 | x, y = Variable(x.to(self.device)), Variable(y.to(self.device)) 105 | out, emb, feature = self.clf(x,intermediate = True) # resnet emb from last avg pool 106 | if create: 107 | create = False 108 | emb_list = torch.zeros([len(self.Y), len(feature[-1].view(out.size(0), -1)[1])]) 109 | emb_list[idxs] = feature[-1].view(out.size(0), -1)[1].cpu().data 110 | return np.array(emb_list) 111 | 112 | def margin_data(self, n): 113 | idxs_unlabeled = np.arange(self.n_pool)[~self.idxs_lb] 114 | # 仅获得未标注数据 prob 115 | probs = self.predict_prob(self.X[idxs_unlabeled], self.Y.numpy()[idxs_unlabeled]) 116 | probs_sorted, idxs = probs.sort(descending=True) 117 | U = probs_sorted[:, 0] - probs_sorted[:,1] 118 | # emb_list = self.emb_list 119 | # emb_select_list = emb_list[idxs_unlabeled[U.sort()[1].numpy()[:n]]] 120 | # sort函数默认 dim = -1, [1] 表示的是 index 121 | return idxs_unlabeled[U.sort()[1].numpy()[:n]] 122 | 123 | def query(self, k): 124 | if self.one_sample_step: 125 | self.one_sample_step = False 126 | self.emb_list = self.prepare_emb() 127 | # print(self.emb_list[0]) 128 | # self.HAC_dict = HAC(self.emb_list) 129 | # self.HAC_list = AgglomerativeClustering(n_clusters=None,distance_threshold = distance_threshold,linkage = 'average').fit(self.emb_list) 130 | # 全部数据 建立 cluster 131 | self.HAC_list = AgglomerativeClustering(n_clusters=20, linkage = 'average').fit(self.emb_list) 132 | 133 | idxs_unlabeled = np.arange(self.n_pool)[~self.idxs_lb] 134 | n = min(k*10,len(self.Y[idxs_unlabeled])) 135 | index = self.margin_data(n) 136 | index = self.round_robin(index, self.HAC_list.labels_, k) 137 | # print(len(index),len([i for i in index if i in idxs_unlabeled])) 138 | return index 139 | 140 | def round_robin(self, unlabeled_index, hac_list, k): 141 | cluster_list = [] 142 | # print("Round Robin") 143 | for i in range(len(self.Y)): 144 | cluster = [] 145 | cluster_list.append(cluster) 146 | for real_idx in unlabeled_index: 147 | i = hac_list[real_idx] 148 | cluster_list[i].append(real_idx) 149 | cluster_list.sort(key=lambda x:len(x)) 150 | index_select = [] 151 | cluster_index = 0 152 | # print("Select cluster",len(set(hac_list))) 153 | while k > 0: 154 | if len(cluster_list[cluster_index]) > 0: 155 | index_select.append(cluster_list[cluster_index].pop(0)) 156 | k -= 1 157 | if cluster_index < len(cluster_list) - 1: 158 | cluster_index += 1 159 | else: 160 | cluster_index = 0 161 | 162 | return index_select 163 | -------------------------------------------------------------------------------- /query_strategies/bayesian_active_learning_disagreement_dropout.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from .strategy import Strategy 4 | 5 | class BALDDropout(Strategy): 6 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args, n_drop=10): 7 | super(BALDDropout, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 8 | self.n_drop = n_drop 9 | 10 | def query(self, n): 11 | idxs_unlabeled = np.arange(self.n_pool)[~self.idxs_lb] 12 | probs = self.predict_prob_dropout_split( 13 | self.X[idxs_unlabeled], self.Y.numpy()[idxs_unlabeled], self.n_drop) 14 | pb = probs.mean(0) 15 | entropy1 = (-pb*torch.log(pb)).sum(1) 16 | entropy2 = (-probs*torch.log(probs)).sum(2).mean(0) 17 | U = entropy2 - entropy1 18 | return idxs_unlabeled[U.sort()[1][:n]] 19 | -------------------------------------------------------------------------------- /query_strategies/coreGCN.py: -------------------------------------------------------------------------------- 1 | # This is an implementation of the paper: Sequential GCN for Active Learning 2 | # Implemented by Yu LI, based on the code: https://github.com/razvancaramalau/Sequential-GCN-for-Active-Learning 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import numpy as np 8 | from .strategy import Strategy 9 | import pdb 10 | from torch.nn import functional 11 | import math 12 | import torch 13 | import torch.nn as nn 14 | import torch.nn.init as init 15 | import torch.nn.functional as F 16 | from torch.nn.parameter import Parameter 17 | from torch.nn.modules.module import Module 18 | from models.gcn import GCN 19 | import torch.optim as optim 20 | 21 | 22 | 23 | from sklearn.metrics import pairwise_distances 24 | from scipy.spatial import distance 25 | 26 | import abc 27 | 28 | 29 | EPOCH_GCN = 200 30 | LR_GCN = 1e-3 31 | SUBSET = 10000 # M 32 | 33 | def aff_to_adj(x, y=None): 34 | x = x.detach().cpu().numpy() 35 | adj = np.matmul(x, x.transpose()) 36 | adj += -1.0*np.eye(adj.shape[0]) 37 | adj_diag = np.sum(adj, axis=0) #rowise sum 38 | adj = np.matmul(adj, np.diag(1/adj_diag)) 39 | adj = adj + np.eye(adj.shape[0]) 40 | adj = torch.Tensor(adj) 41 | 42 | return adj 43 | 44 | 45 | def BCEAdjLoss(scores, lbl, nlbl, l_adj): 46 | lnl = torch.log(scores[lbl]) 47 | lnu = torch.log(1 - scores[nlbl]) 48 | labeled_score = torch.mean(lnl) 49 | unlabeled_score = torch.mean(lnu) 50 | bce_adj_loss = -labeled_score - l_adj*unlabeled_score 51 | return bce_adj_loss 52 | 53 | 54 | 55 | class SamplingMethod(object): 56 | __metaclass__ = abc.ABCMeta 57 | 58 | @abc.abstractmethod 59 | def __init__(self, X, y, seed, **kwargs): 60 | self.X = X 61 | self.y = y 62 | self.seed = seed 63 | 64 | def flatten_X(self): 65 | shape = self.X.shape 66 | flat_X = self.X 67 | if len(shape) > 2: 68 | flat_X = np.reshape(self.X, (shape[0],np.product(shape[1:]))) 69 | return flat_X 70 | 71 | 72 | @abc.abstractmethod 73 | def select_batch_(self): 74 | return 75 | 76 | def select_batch(self, **kwargs): 77 | return self.select_batch_(**kwargs) 78 | 79 | def select_batch_unc_(self, **kwargs): 80 | return self.select_batch_unc_(**kwargs) 81 | 82 | def to_dict(self): 83 | return None 84 | 85 | 86 | 87 | class kCenterGreedy(SamplingMethod): 88 | 89 | def __init__(self, X, metric='euclidean'): 90 | self.X = X 91 | # self.y = y 92 | self.flat_X = self.flatten_X() 93 | self.name = 'kcenter' 94 | self.features = self.flat_X 95 | self.metric = metric 96 | self.min_distances = None 97 | self.max_distances = None 98 | self.n_obs = self.X.shape[0] 99 | self.already_selected = [] 100 | 101 | def update_distances(self, cluster_centers, only_new=True, reset_dist=False): 102 | """Update min distances given cluster centers. 103 | Args: 104 | cluster_centers: indices of cluster centers 105 | only_new: only calculate distance for newly selected points and update 106 | min_distances. 107 | rest_dist: whether to reset min_distances. 108 | """ 109 | 110 | if reset_dist: 111 | self.min_distances = None 112 | if only_new: 113 | cluster_centers = [d for d in cluster_centers 114 | if d not in self.already_selected] 115 | if cluster_centers: 116 | x = self.features[cluster_centers] 117 | # Update min_distances for all examples given new cluster center. 118 | dist = pairwise_distances(self.features, x, metric=self.metric)#,n_jobs=4) 119 | 120 | if self.min_distances is None: 121 | self.min_distances = np.min(dist, axis=1).reshape(-1,1) 122 | else: 123 | self.min_distances = np.minimum(self.min_distances, dist) 124 | 125 | def select_batch_(self, already_selected, N, **kwargs): 126 | """ 127 | Diversity promoting active learning method that greedily forms a batch 128 | to minimize the maximum distance to a cluster center among all unlabeled 129 | datapoints. 130 | Args: 131 | model: model with scikit-like API with decision_function implemented 132 | already_selected: index of datapoints already selected 133 | N: batch size 134 | Returns: 135 | indices of points selected to minimize distance to cluster centers 136 | """ 137 | 138 | try: 139 | # Assumes that the transform function takes in original data and not 140 | # flattened data. 141 | print('Getting transformed features...') 142 | # self.features = model.transform(self.X) 143 | print('Calculating distances...') 144 | self.update_distances(already_selected, only_new=False, reset_dist=True) 145 | except: 146 | print('Using flat_X as features.') 147 | self.update_distances(already_selected, only_new=True, reset_dist=False) 148 | 149 | new_batch = [] 150 | 151 | for _ in range(N): 152 | if self.already_selected is None: 153 | # Initialize centers with a randomly selected datapoint 154 | ind = np.random.choice(np.arange(self.n_obs)) 155 | else: 156 | ind = np.argmax(self.min_distances) 157 | # New examples should not be in already selected since those points 158 | # should have min_distance of zero to a cluster center. 159 | assert ind not in already_selected 160 | 161 | self.update_distances([ind], only_new=True, reset_dist=False) 162 | new_batch.append(ind) 163 | print('Maximum distance from cluster centers is %0.2f' 164 | % max(self.min_distances)) 165 | 166 | 167 | self.already_selected = already_selected 168 | 169 | return new_batch 170 | 171 | 172 | 173 | class coreGCN(Strategy): 174 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args): 175 | super(coreGCN, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 176 | 177 | def query(self, n): 178 | # get the features of all data (labeled + unlabeled) 179 | subset = list(np.nonzero(~self.idxs_lb)[0][:SUBSET]) 180 | ind_idxs_lb = list(np.nonzero(self.idxs_lb)[0]) 181 | 182 | features = self.get_embedding(self.X[subset+ind_idxs_lb], self.Y[subset+ind_idxs_lb]) 183 | features = functional.normalize(features).to(self.device) 184 | adj = aff_to_adj(features).to(self.device) 185 | 186 | binary_labels = torch.cat((torch.zeros([SUBSET, 1]),(torch.ones([len(ind_idxs_lb),1]))),0) 187 | 188 | gcn_module = GCN(nfeat=features.shape[1], 189 | nhid=self.args.hidden_units, 190 | nclass=1, 191 | dropout=self.args.dropout_rate).to(self.device) 192 | models = {'gcn_module': gcn_module} 193 | 194 | optim_backbone = optim.Adam(models['gcn_module'].parameters(), lr=1e-3, 195 | weight_decay=5e-4) 196 | optimizers = {'gcn_module': optim_backbone} 197 | 198 | lbl = np.arange(SUBSET, SUBSET+len(ind_idxs_lb), 1) # temp labeled index 199 | nlbl = np.arange(0, SUBSET, 1) # temp unlabled index 200 | 201 | # train the gcn model 202 | for _ in range(200): 203 | optimizers['gcn_module'].zero_grad() 204 | outputs, _, _ = models['gcn_module'](features, adj) 205 | lamda = self.args.lambda_loss 206 | loss = BCEAdjLoss(outputs, lbl, nlbl, lamda) 207 | loss.backward() 208 | optimizers['gcn_module'].step() 209 | 210 | models['gcn_module'].eval() 211 | with torch.no_grad(): 212 | inputs = features.to(self.device) 213 | labels = binary_labels.to(self.device) 214 | scores, _, feat = models['gcn_module'](inputs, adj) 215 | 216 | feat = feat.detach().cpu().numpy() 217 | new_av_idx = np.arange(SUBSET,(SUBSET + len(ind_idxs_lb))) 218 | sampling2 = kCenterGreedy(feat) 219 | batch2 = sampling2.select_batch_(new_av_idx, n) 220 | other_idx = [x for x in range(SUBSET) if x not in batch2] 221 | arg = other_idx + batch2 222 | 223 | subset = np.array(subset) 224 | inds = subset[arg][-n:] 225 | 226 | print("Max confidence value: ",torch.max(scores.data)) 227 | print("Mean confidence value: ",torch.mean(scores.data)) 228 | preds = torch.round(scores) 229 | correct_labeled = (preds[SUBSET:,0] == labels[SUBSET:,0]).sum().item() / len(ind_idxs_lb) 230 | correct_unlabeled = (preds[:SUBSET,0] == labels[:SUBSET,0]).sum().item() / SUBSET 231 | correct = (preds[:,0] == labels[:,0]).sum().item() / (SUBSET + len(ind_idxs_lb)) 232 | print("Labeled classified: ", correct_labeled) 233 | print("Unlabeled classified: ", correct_unlabeled) 234 | print("Total classified: ", correct) 235 | 236 | return inds 237 | 238 | 239 | 240 | -------------------------------------------------------------------------------- /query_strategies/core_set.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pdb 3 | from .strategy import Strategy 4 | from sklearn.neighbors import NearestNeighbors 5 | import pickle 6 | from datetime import datetime 7 | from sklearn.metrics import pairwise_distances 8 | 9 | class CoreSet(Strategy): 10 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args, tor=1e-4): 11 | super(CoreSet, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 12 | self.tor = tor 13 | 14 | def furthest_first(self, X, X_set, n): 15 | m = np.shape(X)[0] 16 | if np.shape(X_set)[0] == 0: 17 | min_dist = np.tile(float("inf"), m) 18 | else: 19 | dist_ctr = pairwise_distances(X, X_set) 20 | min_dist = np.amin(dist_ctr, axis=1) 21 | 22 | idxs = [] 23 | 24 | for i in range(n): 25 | idx = min_dist.argmax() 26 | idxs.append(idx) 27 | dist_new_ctr = pairwise_distances(X, X[[idx], :]) 28 | for j in range(m): 29 | min_dist[j] = min(min_dist[j], dist_new_ctr[j, 0]) 30 | 31 | return idxs 32 | 33 | def query(self, n): 34 | t_start = datetime.now() 35 | idxs_unlabeled = np.arange(self.n_pool)[~self.idxs_lb] 36 | lb_flag = self.idxs_lb.copy() 37 | embedding = self.get_embedding(self.X, self.Y) 38 | embedding = embedding.numpy() 39 | 40 | chosen = self.furthest_first(embedding[idxs_unlabeled, :], embedding[lb_flag, :], n) 41 | 42 | return idxs_unlabeled[chosen] 43 | 44 | 45 | def query_old(self, n): 46 | lb_flag = self.idxs_lb.copy() 47 | embedding = self.get_embedding(self.X, self.Y) 48 | embedding = embedding.numpy() 49 | 50 | print('calculate distance matrix') 51 | t_start = datetime.now() 52 | dist_mat = np.matmul(embedding, embedding.transpose()) 53 | sq = np.array(dist_mat.diagonal()).reshape(len(self.X), 1) 54 | dist_mat *= -2 55 | dist_mat += sq 56 | dist_mat += sq.transpose() 57 | dist_mat = np.sqrt(dist_mat) 58 | print(datetime.now() - t_start) 59 | print('calculate greedy solution') 60 | t_start = datetime.now() 61 | mat = dist_mat[~lb_flag, :][:, lb_flag] 62 | 63 | for i in range(n): 64 | if i % 10 == 0: 65 | print('greedy solution {}/{}'.format(i, n)) 66 | mat_min = mat.min(axis=1) 67 | q_idx_ = mat_min.argmax() 68 | q_idx = np.arange(self.n_pool)[~lb_flag][q_idx_] 69 | lb_flag[q_idx] = True 70 | mat = np.delete(mat, q_idx_, 0) 71 | mat = np.append(mat, dist_mat[~lb_flag, q_idx][:, None], axis=1) 72 | 73 | print(datetime.now() - t_start) 74 | opt = mat.min(axis=1).max() 75 | 76 | bound_u = opt 77 | bound_l = opt/2.0 78 | delta = opt 79 | 80 | xx, yy = np.where(dist_mat <= opt) 81 | dd = dist_mat[xx, yy] 82 | 83 | lb_flag_ = self.idxs_lb.copy() 84 | subset = np.where(lb_flag_==True)[0].tolist() 85 | 86 | SEED = 5 87 | sols = None 88 | 89 | if sols is None: 90 | q_idxs = lb_flag 91 | else: 92 | lb_flag_[sols] = True 93 | q_idxs = lb_flag_ 94 | print('sum q_idxs = {}'.format(q_idxs.sum())) 95 | 96 | return np.arange(self.n_pool)[(self.idxs_lb ^ q_idxs)] 97 | -------------------------------------------------------------------------------- /query_strategies/cpu_dist.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.linalg.blas import dgemm, sgemm 3 | 4 | def ext_arrs(A,B, precision="float64"): 5 | """ 6 | Create extended version of arrays for matrix-multiplication based squared 7 | euclidean distance between two 2D arrays representing n-dimensional points. 8 | 9 | Parameters 10 | ---------- 11 | A : ndarray 12 | 2D NumPy array of float dtype representing n-dimensional points, with 13 | each row being one point. 14 | B : ndarray 15 | 2D NumPy array of float dtype representing n-dimensional points, with 16 | each row being one point. 17 | precision : str, optional 18 | Selects the precision type for creating extended arrays. 19 | 20 | Returns 21 | ------- 22 | A_ext : ndarray 23 | Extended version of A. The shape of A_ext is such that it has 3 times 24 | the number of columns in A. The arrangement is described below : 25 | The first block of dim columns has all 1s. 26 | The second block of dim columns has A. 27 | The third block of dim columns has squared elements of A. 28 | 29 | B_ext : ndarray 30 | Extended version of B. The shape of B_ext is such the the number of rows 31 | is 3 times the number of columns in B and the number of columns 32 | is same as the number of rows in B. The arrangement is described below : 33 | The first block of dim rows has squared B values, but transposed. 34 | The second block of dim rows has B values scaled by -2 and transposed. 35 | The third block of dim rows is all 1s. 36 | 37 | """ 38 | 39 | nA,dim = A.shape 40 | A_ext = np.ones((nA,dim*3),dtype=precision) 41 | A_ext[:,dim:2*dim] = A 42 | A_ext[:,2*dim:] = A**2 43 | 44 | nB = B.shape[0] 45 | B_ext = np.ones((dim*3,nB),dtype=precision) 46 | B_ext[:dim] = (B**2).T 47 | B_ext[dim:2*dim] = -2.0*B.T 48 | return A_ext, B_ext 49 | 50 | def auto_dtype(A, B): 51 | """ 52 | Get promoted datatype for A and B combined. 53 | 54 | Parameters 55 | ---------- 56 | A : ndarray 57 | B : ndarray 58 | 59 | Returns 60 | ------- 61 | precision : dtype 62 | Datatype that would be used after appplying NumPy type promotion rules. 63 | If its not float dtype, e.g. int dtype, output is `float32` dtype. 64 | 65 | """ 66 | 67 | # Datatype that would be used after appplying NumPy type promotion rules 68 | precision = np.result_type(A.dtype, B.dtype) 69 | 70 | # Cast to float32 dtype for dtypes that are not float 71 | if np.issubdtype(precision, float)==0: 72 | precision = np.float32 73 | 74 | return precision 75 | 76 | def output_dtype(A,B, precision): 77 | """ 78 | Get promoted datatype for A and B combined alongwith consideration 79 | for another input datatype. 80 | 81 | Parameters 82 | ---------- 83 | A : ndarray 84 | 85 | B : ndarray 86 | 87 | precision : dtype 88 | This decides whether promoted datatype for A and B combined would be 89 | outputted or float32. 90 | 91 | Returns 92 | ------- 93 | out_dtype : dtype 94 | Datatype that would be used after appplying NumPy type promotion rules. 95 | If its not float dtype, e.g. int dtype, output is `float32` dtype. 96 | 97 | """ 98 | # Get output dtype 99 | if precision=="auto": 100 | out_dtype = auto_dtype(A, B) 101 | else: 102 | out_dtype = np.float32 103 | 104 | return out_dtype 105 | 106 | 107 | def gemm_func(precision): 108 | """ 109 | Get appropriate blas function 110 | 111 | Parameters 112 | ---------- 113 | precision : dtype or str 114 | dtype or string signifying the datatype for which we need an appropriate 115 | blas function for matrix-multiplication 116 | 117 | Returns 118 | ------- 119 | gemm_func : function 120 | Appropriate blas function 121 | 122 | """ 123 | 124 | if (precision=="float64") | (precision==np.float64): 125 | gemm_func = dgemm 126 | else: 127 | gemm_func = sgemm 128 | return gemm_func 129 | 130 | 131 | def dist_ext(A,B, matmul="dot", precision="auto"): 132 | """ 133 | Compute squared euclidean distance between two 2D arrays representing 134 | n-dimensional points using extended arrays based approach. 135 | For more info on rest of the input parameters and output, please refer to 136 | function 'dist'. 137 | 138 | """ 139 | 140 | # Get output dtype 141 | out_dtype = output_dtype(A,B, precision) 142 | 143 | # Get extended arrays and then use matrix-multiplication to get distances 144 | A_ext, B_ext = ext_arrs(A,B, precision=out_dtype) 145 | 146 | if matmul=="dot": 147 | gemm_function = gemm_func(out_dtype) 148 | return gemm_function(alpha=1.0, a=A_ext, b=B_ext) 149 | elif matmul=="gemm": 150 | return A_ext.dot(B_ext) 151 | 152 | 153 | def dist_accum(A,B, matmul="dot", precision="auto"): 154 | """ 155 | Compute squared euclidean distance between two 2D arrays representing 156 | n-dimensional points using accumulation based approach. 157 | For more info on rest of the input parameters and output, please refer to 158 | function 'dist'. 159 | 160 | """ 161 | 162 | 163 | # Get matrix-multiplication between A and transposed B. 164 | # Then, accumulate squared row summations of A and B into it along the 165 | # appropriate axes of the matrix-multiplication result. 166 | out_dtype = output_dtype(A,B, precision) 167 | 168 | Af = A 169 | Bf = B 170 | if matmul=="dot": 171 | if np.issubdtype(A.dtype, int): 172 | Af = A.astype('float32') 173 | 174 | if np.issubdtype(B.dtype, int): 175 | Bf = B.astype('float32') 176 | 177 | out = Af.dot(-2*Bf.T) 178 | 179 | elif matmul=="gemm": 180 | # Get output dtype and appropriate gemm function for matrix-multiplication 181 | gemm_function = gemm_func(out_dtype) 182 | out = gemm_function(alpha=-2, a=Af, b=Bf,trans_b=True) 183 | 184 | out += np.einsum('ij,ij->i',Af,Af)[:,None] 185 | out += np.einsum('ij,ij->i',Bf,Bf) 186 | return out 187 | 188 | 189 | def dist(A,B, matmul="dot", method="ext", precision="auto"): 190 | """ 191 | Compute squared euclidean distance between two 2D arrays representing 192 | n-dimensional points. 193 | 194 | Parameters 195 | ---------- 196 | A : ndarray 197 | 2D NumPy array of float dtype representing n-dimensional points, with 198 | each row being one point. 199 | B : ndarray 200 | 2D NumPy array of float dtype representing n-dimensional points, with 201 | each row being one point. 202 | matmul : str, optional 203 | Selects the method for matrix-multiplication. It can be 'dot' or 'gemm' 204 | indicating the use of `numpy.dot` and `Scipy's` BLAS based wrapper 205 | functions- `sgemm/dgemm` respectively. 206 | method : str, optional 207 | Selects the method for sum-reductions needed to get those distances. 208 | It can be 'ext' or 'acc'. 209 | precision : str, optional 210 | Selects the precision type for computing distances. It can be 'auto' or 211 | 'float32'. 212 | 213 | Returns 214 | ------- 215 | out : ndarray 216 | Squared euclidean distance between two 2D arrays representing 217 | n-dimensional points. Basically there are two ways - 218 | First one involves creating extended versions of the input arrays and 219 | then using matrix-multiplication to get the final distances. 220 | Second one involves starting off with matrix-multiplication and then 221 | summing over row-wise squared summations of the input arrays into it 222 | along the rows and columns respectively. 223 | 224 | Example(s) 225 | ------- 226 | Find the pairwise euclidean distances between three 2-D coordinates: 227 | 228 | >>> from from eucl_dist.cpu_dist import dist 229 | >>> coords = np.array([[2,3],[3,4],[2,5]]) 230 | >>> dist(coords, coords) 231 | array([[ 0., 2., 4.], 232 | [ 2., 0., 2.], 233 | [ 4., 2., 0.]], dtype=float32) 234 | 235 | """ 236 | 237 | if method=="ext": 238 | return dist_ext(A,B, matmul=matmul, precision=precision) 239 | elif method=="accum": 240 | return dist_accum(A,B, matmul=matmul, precision=precision) 241 | else: 242 | raise Exception("Invalid method") 243 | -------------------------------------------------------------------------------- /query_strategies/entropy_sampling.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from .strategy import Strategy 4 | 5 | class EntropySampling(Strategy): 6 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args): 7 | super(EntropySampling, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 8 | 9 | def query(self, n): 10 | idxs_unlabeled = np.arange(self.n_pool)[~self.idxs_lb] 11 | probs = self.predict_prob(self.X[idxs_unlabeled], self.Y.numpy()[idxs_unlabeled]) 12 | log_probs = torch.log(probs) 13 | U = (probs*log_probs).sum(1) 14 | return idxs_unlabeled[U.sort()[1][:n]] 15 | -------------------------------------------------------------------------------- /query_strategies/entropy_sampling_dropout.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from .strategy import Strategy 4 | 5 | class EntropySamplingDropout(Strategy): 6 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args, n_drop=10): 7 | super(EntropySamplingDropout, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 8 | self.n_drop = n_drop 9 | 10 | def query(self, n): 11 | idxs_unlabeled = np.arange(self.n_pool)[~self.idxs_lb] 12 | probs = self.predict_prob_dropout(self.X[idxs_unlabeled], self.Y[idxs_unlabeled], self.n_drop) 13 | log_probs = torch.log(probs) 14 | U = (probs*log_probs).sum(1) 15 | return idxs_unlabeled[U.sort()[1][:n]] 16 | -------------------------------------------------------------------------------- /query_strategies/kcenter_greedy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .strategy import Strategy 3 | from sklearn.neighbors import NearestNeighbors 4 | 5 | class KCenterGreedy(Strategy): 6 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args): 7 | super(KCenterGreedy, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 8 | 9 | def query(self, n): 10 | lb_flag = self.idxs_lb.copy() 11 | embedding = self.get_embedding(self.X, self.Y) 12 | embedding = embedding.numpy() 13 | 14 | from datetime import datetime 15 | 16 | print('calculate distance matrix') 17 | t_start = datetime.now() 18 | dist_mat = np.matmul(embedding, embedding.transpose()) 19 | sq = np.array(dist_mat.diagonal()).reshape(len(self.X), 1) 20 | dist_mat *= -2 21 | dist_mat += sq 22 | dist_mat += sq.transpose() 23 | dist_mat = np.sqrt(dist_mat) 24 | print(datetime.now() - t_start) 25 | 26 | mat = dist_mat[~lb_flag, :][:, lb_flag] 27 | 28 | for i in range(n): 29 | if i%10 == 0: 30 | print('greedy solution {}/{}'.format(i, n)) 31 | mat_min = mat.min(axis=1) 32 | q_idx_ = mat_min.argmax() 33 | q_idx = np.arange(self.n_pool)[~lb_flag][q_idx_] 34 | lb_flag[q_idx] = True 35 | mat = np.delete(mat, q_idx_, 0) 36 | mat = np.append(mat, dist_mat[~lb_flag, q_idx][:, None], axis=1) 37 | 38 | return np.arange(self.n_pool)[(self.idxs_lb ^ lb_flag)] 39 | -------------------------------------------------------------------------------- /query_strategies/kmeans_sampling.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .strategy import Strategy 3 | from sklearn.cluster import KMeans 4 | 5 | class KMeansSampling(Strategy): 6 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args): 7 | super(KMeansSampling, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 8 | 9 | def query(self, n): 10 | idxs_unlabeled = np.arange(self.n_pool)[~self.idxs_lb] 11 | embedding = self.get_embedding(self.X[idxs_unlabeled], self.Y.numpy()[idxs_unlabeled]) 12 | embedding = embedding.numpy() 13 | cluster_learner = KMeans(n_clusters=n) 14 | cluster_learner.fit(embedding) 15 | 16 | cluster_idxs = cluster_learner.predict(embedding) 17 | centers = cluster_learner.cluster_centers_[cluster_idxs] 18 | dis = (embedding - centers)**2 19 | dis = dis.sum(axis=1) 20 | q_idxs = np.array([np.arange(embedding.shape[0])[cluster_idxs==i][dis[cluster_idxs==i].argmin()] for i in range(n)]) 21 | 22 | return idxs_unlabeled[q_idxs] 23 | -------------------------------------------------------------------------------- /query_strategies/lal.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | from .strategy import Strategy 4 | import pdb 5 | import math 6 | from sklearn.ensemble import RandomForestRegressor 7 | from utils import time_string, AverageMeter, RecorderMeter, convert_secs2time, adjust_learning_rate 8 | from torch.utils.data import DataLoader 9 | from torch import nn 10 | import torch.optim as optim 11 | import torch 12 | import time 13 | 14 | # Implementation of the paper: 15 | # Code for paper Ksenia Konyushkova, Raphael Sznitman, Pascal Fua 'Learning Active Learning from Data', NIPS 2017 16 | # Based on https://github.com/ksenia-konyushkova/LAL 17 | # Note: this method is only suitable for binary classification problem and random forest model 18 | # and hence not used in our experiment 19 | 20 | class LALmodel: 21 | ''' Class for the regressor that predicts the expected error reduction caused by adding datapoints''' 22 | 23 | def __init__(self, all_data_for_lal, all_labels_for_lal): 24 | 25 | self.all_data_for_lal = all_data_for_lal 26 | self.all_labels_for_lal = all_labels_for_lal 27 | 28 | def crossValidateLALmodel(self, possible_estimators, possible_depth, possible_features): 29 | ''' Cross-validate the regressor model. 30 | input: possible_estimators -- list of possible number of estimators (trees) in Random Forest regression 31 | possible_depth -- list of possible maximum depth of the tree in RF regressor 32 | possible_features -- list of possible maximum number of features in a split of tree in RF regressor''' 33 | 34 | best_score = -math.inf 35 | 36 | self.best_est = 0 37 | self.best_depth = 0 38 | self.best_feat = 0 39 | 40 | print('start cross-validating..') 41 | for est in possible_estimators: 42 | for depth in possible_depth: 43 | for feat in possible_features: 44 | model = RandomForestRegressor(n_estimators = est, max_depth=depth, max_features=feat, oob_score=True, n_jobs=8) 45 | model.fit(self.all_data_for_lal[:,:], np.ravel(self.all_labels_for_lal)) 46 | if model.oob_score_>best_score: 47 | self.best_est = est 48 | self.best_depth = depth 49 | self.best_feat = feat 50 | self.model = model 51 | best_score = model.oob_score_ 52 | print('parameters tested = ', est, ', ', depth, ', ', feat, ', with the score = ', model.oob_score_) 53 | # now train with the best parameters 54 | print('best parameters = ', self.best_est, ', ', self.best_depth, ', ', self.best_feat, ', with the best score = ', best_score) 55 | return best_score 56 | 57 | 58 | def builtModel(self, est, depth, feat): 59 | ''' Fits the regressor with the parameters identifier as an input ''' 60 | 61 | self.model = RandomForestRegressor(n_estimators = est, max_depth=depth, max_features=feat, oob_score=True, n_jobs=8) 62 | self.model.fit(self.all_data_for_lal, np.ravel(self.all_labels_for_lal)) 63 | print('oob score = ', self.model.oob_score_) 64 | 65 | 66 | class LearningAL(Strategy): 67 | '''Points are sampled according to a method described in K. Konyushkova, R. Sznitman, P. Fua 'Learning Active Learning from data' ''' 68 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args): 69 | super(LearningAL, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 70 | self.n_estimators = args.n_estimators 71 | self.X_te = X_te 72 | self.Y_te = Y_te 73 | all_data_for_lal, all_labels_for_lal = self.get_features(net, ) 74 | self.lalModel = self.train_lal_model(self, all_data_for_lal, all_labels_for_lal) 75 | 76 | def getLALdatapoints(self, n_points_per_experiment): 77 | # train the model based on the labeled data 78 | self.train(n_epoch=self.args.n_epoch) 79 | nFeatures = 8 80 | # get my features 81 | # now we need the number of training datapoints as a feature 82 | known_data = np.arange(self.n_pool)[self.idxs_lb] 83 | unknown_data = np.arange(self.n_pool)[~self.idxs_lb] 84 | known_labels = self.Y[np.arange(self.n_pool)[self.idxs_lb]] 85 | unknown_labels = self.Y[np.arange(self.n_pool)[~self.idxs_lb]] 86 | feature_vector = self._getFeaturevector4LAL(self.clf, unknown_data[0:n_points_per_experiment,:], known_labels, nFeatures) 87 | 88 | # predict on test data to evaluate the classifier quality 89 | test_0_acc = self.predict(self.X_te, self.Y_te) 90 | # sample n_points_per_experiment samples that we will add to the training dataset and check the change in error 91 | gains_quality = np.zeros((n_points_per_experiment)) 92 | 93 | for i in range(n_points_per_experiment): 94 | # try to add it to the labelled data 95 | new_known_data = np.concatenate((known_data,[unknown_data[i,:]])) 96 | new_known_labels = np.concatenate((known_labels,unknown_labels[i])) 97 | 98 | # train updated model - model_i 99 | new_known_labels = np.ravel(new_known_labels) 100 | self.train(self.args.n_epoch, self.X[new_known_data], self.Y[new_known_labels]) 101 | 102 | # predict on test data 103 | test_i_acc = self.clf.predict(self.X_te, self.Y_te) 104 | # how much the quality has changed 105 | gains_quality[i]=(test_i_acc - test_0_acc) 106 | 107 | 108 | return feature_vector, gains_quality 109 | 110 | 111 | def _getFeaturevector4LAL(self, unknown_data, known_labels, nFeatures): 112 | 113 | # - predicted mean (but only for n_points_per_experiment datapoints) 114 | prediction_unknown = self.clf.predict_proba(unknown_data) 115 | 116 | # features are in the following order: 117 | # 1: prediction probability 118 | # 2: prediction variance 119 | # 3: proportion of positive class 120 | # 4: oob score 121 | # 5: coeficiant of variance of feature importance 122 | # 6: variance of forest 123 | # 7: average depth of trees 124 | # 8: number of datapoints in training 125 | 126 | f_1 = prediction_unknown[:,0] 127 | # - predicted standard deviation 128 | # need to call each tree of a forest separately to get a prediction because it is not possible to get them all immediately 129 | # f_2 = np.std(np.array([tree.predict_proba(unknown_data)[:,0] for tree in model.estimators_]), axis=0) 130 | # - proportion of positive points 131 | # check np.size(self.indecesKnown) 132 | f_3 = (sum(known_labels>0)/np.size(self.indecesKnown))*np.ones_like(f_1) 133 | # the score estimated on out of bag estimate 134 | # f_4 = model.oob_score_*np.ones_like(f_1) 135 | # - coeficient of variance of feature importance 136 | # check if this is the number of features! 137 | # f_5 = np.std(model.feature_importances_/self.dataset.trainData.shape[1])*np.ones_like(f_1) 138 | # - estimate variance of forest by looking at avergae of variance of some predictions 139 | # f_6 = np.mean(np.std(np.array([tree.predict_proba(unknown_data)[:,0] for tree in model.estimators_]), axis=0))*np.ones_like(f_1) 140 | # - compute the average depth of the trees in the forest 141 | # f_7 = np.mean(np.array([tree.tree_.max_depth for tree in model.estimators_]))*np.ones_like(f_1) 142 | # LALfeatures = np.concatenate(([f_1], [f_2], [f_3], [f_4], [f_5], [f_6], [f_7]), axis=0) 143 | LALfeatures = np.concatenate(([f_1], [f_3]), axis=0) 144 | 145 | if nFeatures>7: 146 | # the same as f_3, check np.size(self.indecesKnown) 147 | f_8 = np.size(self.indecesKnown)*np.ones_like(f_1) 148 | # LALfeatures = np.concatenate(([f_1], [f_2], [f_3], [f_4], [f_5], [f_6], [f_7], [f_8]), axis=0) 149 | LALfeatures = np.concatenate(([f_1], [f_3]), axis=0) 150 | 151 | LALfeatures = np.transpose(LALfeatures) 152 | 153 | return LALfeatures 154 | 155 | 156 | def train_lal_model(self, all_data_for_lal, all_labels_for_lal): 157 | # build the lal model 158 | 159 | lalModel = self.train_lal_model(all_data_for_lal, all_labels_for_lal) 160 | 161 | parameters = {'est': 2000, 'depth': 40, 'feat': 6 } 162 | # the regression model to predict the error of an unlabeled image 163 | lal_model = LALmodel(all_data_for_lal, all_labels_for_lal) 164 | 165 | lal_model.builtModel(est=parameters['est'], 166 | depth=parameters['depth'], 167 | feat=parameters['feat']) 168 | lal_model.crossValidateLALmodel() 169 | print('Train Regressor Done!') 170 | print('Oob score = ', lal_model.model.oob_score_) 171 | return lal_model.model 172 | 173 | 174 | def query(self, n): 175 | # features are in the following order: 176 | # 1: prediction probability 177 | # 2: prediction variance 178 | # 3: proportion of positive class 179 | # 4: oob score 180 | # 5: coeficiant of variance of feature importance 181 | # 6: variance of forest 182 | # 7: average depth of trees 183 | # 8: number of datapoints in training 184 | 185 | 186 | # data 187 | known_labels = self.Y[self.idxs_lb] 188 | 189 | n_lablled = np.sum(self.idxs_lb) 190 | n_dim = np.shape(self.X)[1] 191 | 192 | # predict probabilities for the unlabeled data 193 | temp = self.predict_prob(self.X[~self.idxs_lb], self.Y[~self.idxs_lb]) 194 | 195 | # - average and standard deviation of the predicted scores 196 | f_1 = np.mean(temp, axis=0) 197 | f_2 = np.std(temp, axis=0) 198 | # - proportion of positive points 199 | f_3 = (sum(known_labels>0)/n_lablled)*np.ones_like(f_1) 200 | # the score estimated on out of bag estimate 201 | f_4 = self.model.oob_score_*np.ones_like(f_1) 202 | # - coeficient of variance of feature importance 203 | f_5 = np.std(self.model.feature_importances_/n_dim)*np.ones_like(f_1) 204 | # - estimate variance of forest by looking at avergae of variance of some predictions 205 | f_6 = np.mean(f_2, axis=0)*np.ones_like(f_1) 206 | # - compute the average depth of the trees in the forest 207 | f_7 = np.mean(np.array([tree.tree_.max_depth for tree in self.model.estimators_]))*np.ones_like(f_1) 208 | # - number of already labelled datapoints 209 | f_8 = n_lablled*np.ones_like(f_1) 210 | 211 | # all the featrues put together for regressor 212 | LALfeatures = np.concatenate(([f_1], [f_2], [f_3], [f_4], [f_5], [f_6], [f_7], [f_8]), axis=0) 213 | LALfeatures = np.transpose(LALfeatures) 214 | 215 | # predict the expercted reduction in the error by adding the point 216 | LALprediction = self.lalModel.predict(LALfeatures) 217 | 218 | # select the datapoint with the biggest reduction in the error 219 | selectedIndex1toN = np.argsort(LALprediction)[::-1][:n] 220 | 221 | # retrieve the real index of the selected datapoint 222 | indicesUnknown = np.nonzero(~self.idxs_lb)[0] 223 | selectedIndex = indicesUnknown[selectedIndex1toN] 224 | 225 | return selectedIndex 226 | 227 | def train(self, n_epoch=10, X=None, Y=None): 228 | def weight_reset(m): 229 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): 230 | m.reset_parameters() 231 | 232 | self.clf = self.clf.apply(weight_reset) 233 | self.clf = nn.DataParallel(self.clf).to(self.device) 234 | parameters = self.clf.parameters() 235 | optimizer = optim.SGD(parameters, lr = self.args.lr, 236 | weight_decay=5e-4, momentum=self.args.momentum) 237 | 238 | idxs_train = np.arange(self.n_pool)[self.idxs_lb] 239 | 240 | 241 | epoch_time = AverageMeter() 242 | recorder = RecorderMeter(n_epoch) 243 | epoch = 0 244 | train_acc = 0. 245 | previous_loss = 0. 246 | if idxs_train.shape[0] != 0: 247 | transform = self.args.transform_tr 248 | 249 | if X is None and Y is None: 250 | X_train = self.X[idxs_train] 251 | Y_train = torch.Tensor(self.Y.numpy()[idxs_train]).long() 252 | else: 253 | X_train = X 254 | Y_train = Y 255 | 256 | loader_tr = DataLoader(self.handler(X_train, Y_train, 257 | transform=transform), shuffle=True, 258 | **self.args.loader_tr_args) 259 | 260 | for epoch in range(n_epoch): 261 | ts = time.time() 262 | current_learning_rate, _ = adjust_learning_rate(optimizer, epoch, self.args.gammas, self.args.schedule, self.args) 263 | 264 | # Display simulation time 265 | need_hour, need_mins, need_secs = convert_secs2time(epoch_time.avg * (n_epoch - epoch)) 266 | need_time = '[{} Need: {:02d}:{:02d}:{:02d}]'.format(self.args.strategy, need_hour, need_mins, need_secs) 267 | 268 | # train one epoch 269 | train_acc, train_los = self._train(epoch, loader_tr, optimizer) 270 | 271 | # measure elapsed time 272 | epoch_time.update(time.time() - ts) 273 | 274 | print('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [LR={:6.4f}]'.format(time_string(), epoch, n_epoch, 275 | need_time, current_learning_rate 276 | ) \ 277 | + ' [Best : Train Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(True), 278 | 1. - recorder.max_accuracy(True))) 279 | 280 | 281 | recorder.update(epoch, train_los, train_acc, 0, 0) 282 | 283 | # The converge condition 284 | if abs(previous_loss - train_los) < 0.0001: 285 | break 286 | else: 287 | previous_loss = train_los 288 | 289 | self.clf = self.clf.module 290 | best_train_acc = recorder.max_accuracy(istrain=True) 291 | return best_train_acc 292 | -------------------------------------------------------------------------------- /query_strategies/learning_loss_for_al.py: -------------------------------------------------------------------------------- 1 | '''Active Learning Procedure in PyTorch. 2 | 3 | Reference: 4 | [Yoo et al. 2019] Learning Loss for Active Learning (https://arxiv.org/abs/1905.03677) 5 | ''' 6 | 7 | import torch.nn.functional as F 8 | import numpy as np 9 | from .strategy import Strategy 10 | 11 | from utils import print_log, time_string, AverageMeter, RecorderMeter, convert_secs2time 12 | 13 | # Torch 14 | import torch 15 | import torch.nn as nn 16 | import torch.optim as optim 17 | from torch.utils.data import DataLoader 18 | from torch.autograd import Variable 19 | import torch.optim.lr_scheduler as lr_scheduler 20 | from torch.utils.data.sampler import SubsetRandomSampler 21 | import time 22 | # Torchvison 23 | import torchvision.transforms as T 24 | # import torchvision.models as models 25 | from torchvision.datasets import CIFAR10 26 | 27 | 28 | class SubsetSequentialSampler(torch.utils.data.Sampler): 29 | r"""Samples elements sequentially from a given list of indices, without replacement. 30 | 31 | Arguments: 32 | indices (sequence): a sequence of indices 33 | """ 34 | def __init__(self, indices): 35 | super().__init__(indices) 36 | self.indices = indices 37 | 38 | def __iter__(self): 39 | return (self.indices[i] for i in range(len(self.indices))) 40 | 41 | def __len__(self): 42 | return len(self.indices) 43 | 44 | 45 | MARGIN = 1.0 # xi 46 | WEIGHT = 1.0 # lambda 47 | 48 | 49 | LR = 0.1 50 | MILESTONES = [160] 51 | EPOCHL = 120 # After 120 epochs, stop the gradient from the loss prediction module propagated to the target model 52 | 53 | MOMENTUM = 0.9 54 | WDECAY = 5e-4 55 | device_global = 'cuda' 56 | 57 | class Reshape(nn.Module): 58 | def __init__(self): 59 | super(Reshape, self).__init__() 60 | 61 | 62 | def forward(self, x): 63 | return x.view(x.size(0), -1) 64 | 65 | # 硬改成根据输入层数设计lossnet 66 | class LossNet(nn.Module): 67 | def __init__(self, features): 68 | # feature_sizes=[32, 16, 8, 4], num_channels=[16, 32, 64, 128] 69 | super(LossNet, self).__init__() 70 | self.num_layers = len(features) 71 | 72 | interm_dim = 128 73 | feature_sizes = [] 74 | num_channels = [] 75 | for f in features: 76 | num_channels.append(f.size(1)) 77 | feature_sizes.append(f.size(2)) 78 | 79 | self.GAP_list = [] 80 | self.FC_list = [] 81 | for num in range(self.num_layers): 82 | self.GAP_list.append(nn.AvgPool2d(feature_sizes[num]).to(device_global) ) 83 | self.FC_list.append(nn.Linear(num_channels[num], interm_dim).to(device_global)) 84 | 85 | self.linear = nn.Linear(self.num_layers * interm_dim, 1) 86 | 87 | 88 | def forward(self, features): 89 | out_list = [] 90 | 91 | for num in range(self.num_layers): 92 | out = self.GAP_list[num](features[num]) 93 | out = out.view(out.size(0), -1) 94 | out = self.FC_list[num](out) 95 | out = F.relu(out) 96 | out_list.append(out) 97 | # print(torch.cat(out_list, 1).size()) 98 | out = self.linear(torch.cat(out_list, 1)) 99 | return out 100 | 101 | 102 | def LossPredLoss(input, target, margin=1.0, reduction='mean'): 103 | assert len(input) % 2 == 0, 'the batch size is not even.' 104 | assert input.shape == input.flip(0).shape 105 | 106 | input = (input - input.flip(0))[ 107 | :len(input) // 2] # [l_1 - l_2B, l_2 - l_2B-1, ... , l_B - l_B+1], where batch_size = 2B 108 | target = (target - target.flip(0))[:len(target) // 2] 109 | target = target.detach() 110 | 111 | one = 2 * torch.sign(torch.clamp(target, min=0)) - 1 # 1 operation which is defined by the authors 112 | 113 | if reduction == 'mean': 114 | loss = torch.sum(torch.clamp(margin - one * input, min=0)) 115 | loss = loss / input.size(0) # Note that the size of input is already halved 116 | elif reduction == 'none': 117 | loss = torch.clamp(margin - one * input, min=0) 118 | else: 119 | NotImplementedError() 120 | return 121 | 122 | return loss 123 | 124 | class LearningLoss(Strategy): 125 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args): 126 | super(LearningLoss, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 127 | global device_global 128 | device_global = self.device 129 | 130 | 131 | 132 | def ll_train(self, epoch, loader_tr, optimizers,criterion): 133 | self.clf.train() 134 | self.loss_module.train() 135 | accFinal = 0. 136 | accLoss = 0. 137 | for batch_idx, (x, y, idxs) in enumerate(loader_tr): 138 | x, y = Variable(x.to(self.device) ), Variable(y.to(self.device) ) 139 | optimizers['backbone'].zero_grad() 140 | optimizers['module'].zero_grad() 141 | scores, e1, features = self.clf(x,intermediate = True) 142 | target_loss = criterion(scores, y) 143 | if epoch > 120: 144 | # After 120 epochs, stop the gradient from the loss prediction module propagated to the target model. 145 | for feature in features: 146 | feature = feature.detach() 147 | pred_loss = self.loss_module(features) 148 | pred_loss = pred_loss.view(pred_loss.size(0)) 149 | 150 | m_backbone_loss = torch.sum(target_loss) / target_loss.size(0) 151 | m_module_loss = LossPredLoss( 152 | pred_loss, target_loss, margin=MARGIN) 153 | loss = m_backbone_loss + WEIGHT * m_module_loss 154 | 155 | 156 | loss.backward() 157 | # clamp gradients, just in case 158 | for p in filter(lambda p: p.grad is not None, self.clf.parameters()): p.grad.data.clamp_(min=-.1, max=.1) 159 | for p in filter(lambda p: p.grad is not None, self.loss_module.parameters()): p.grad.data.clamp_(min=-.1, max=.1) 160 | 161 | optimizers['backbone'].step() 162 | optimizers['module'].step() 163 | 164 | 165 | 166 | accFinal += torch.sum((torch.max(scores,1)[1] == y).float()).data.item() 167 | accLoss += loss.item() 168 | if batch_idx % 10 == 0: 169 | print ("[Batch={:03d}] [Loss={:.2f}]".format(batch_idx, loss)) 170 | 171 | return accFinal / len(loader_tr.dataset.X), accLoss 172 | 173 | def train(self,alpha=0, n_epoch=80): 174 | def weight_reset(m): 175 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): 176 | m.reset_parameters() 177 | 178 | transform = self.args.transform_tr 179 | idxs_train = np.arange(self.n_pool)[self.idxs_lb] 180 | 181 | loader_tr = DataLoader(self.handler(self.X[idxs_train], torch.Tensor(self.Y.numpy()[idxs_train]).long(), 182 | transform=transform), 183 | shuffle=True, 184 | pin_memory=True, 185 | # sampler = DistributedSampler(train_data), 186 | worker_init_fn=self.seed_worker, 187 | generator=self.g, 188 | **self.args.loader_tr_args) 189 | 190 | # n_epoch = self.args.n_epoch'] 191 | self.clf = self.net.apply(weight_reset).to(self.device) 192 | criterion = nn.CrossEntropyLoss(reduction='none') 193 | optim_backbone = optim.SGD(self.clf.parameters(), lr = self.args.lr, weight_decay=5e-4, momentum=self.args.momentum) 194 | # sched_backbone = lr_scheduler.MultiStepLR(optim_backbone, milestones=MILESTONES) 195 | recorder = RecorderMeter(n_epoch) 196 | epoch_time = AverageMeter() 197 | print("current:",len(self.X[idxs_train])) 198 | for batch_idx, (x, y, idxs) in enumerate(loader_tr): 199 | x, y = Variable(x.to(self.device) ), Variable(y.to(self.device) ) 200 | scores, e1, features = self.clf(x,intermediate = True) 201 | break 202 | loader_tr = DataLoader(self.handler(self.X[idxs_train], torch.Tensor(self.Y.numpy()[idxs_train]).long(), 203 | transform=transform), 204 | shuffle=True, 205 | pin_memory=True, 206 | # sampler = DistributedSampler(train_data), 207 | worker_init_fn=self.seed_worker, 208 | generator=self.g, 209 | **self.args.loader_tr_args) 210 | 211 | # for f in features: 212 | # print(f.size()) 213 | self.loss_module = LossNet(features).to(self.device) 214 | optim_module = optim.SGD(self.loss_module.parameters(), lr=self.args.lr, 215 | momentum=MOMENTUM, weight_decay=WDECAY) 216 | optimizers = {'backbone': optim_backbone, 'module': optim_module} 217 | 218 | 219 | 220 | 221 | epoch = 0 222 | accCurrent = 0. 223 | while epoch < n_epoch: 224 | ts = time.time() 225 | # schedulers['backbone'].step() 226 | need_hour, need_mins, need_secs = convert_secs2time(epoch_time.avg * (n_epoch - epoch)) 227 | need_time = '[{} Need: {:02d}:{:02d}:{:02d}]'.format(self.args.strategy, need_hour, need_mins, need_secs) 228 | accCurrent,accLoss = self.ll_train(epoch, loader_tr, optimizers, criterion) 229 | test_acc= self.predict(self.X_te, self.Y_te) 230 | recorder.update(epoch, accLoss, accCurrent, 0, test_acc) 231 | epoch += 1 232 | current_learning_rate, _ = adjust_learning_rate(optimizers['backbone'], epoch, self.args.gammas, self.args.schedule, self.args) 233 | adjust_learning_rate(optimizers['module'], epoch, self.args.gammas, self.args.schedule, self.args) 234 | epoch_time.update(time.time() - ts) 235 | print('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [LR={:6.4f}]'.format(time_string(), epoch, n_epoch, 236 | need_time, current_learning_rate 237 | ) \ 238 | + ' [Best : Test Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 239 | 1. - recorder.max_accuracy(False))) 240 | # print(str(epoch) + ' training accuracy: ' + str(accCurrent),'lr',current_learning_rate, flush=True) 241 | # if (epoch % 50 == 0) and (accCurrent < 0.2): # reset if not converging 242 | # self.clf = self.net.apply(weight_reset) 243 | 244 | if self.args.save_model: 245 | self.save_model() 246 | 247 | return recorder.max_accuracy(istrain=False) 248 | 249 | def get_uncertainty(self,models, unlabeled_loader): 250 | models['backbone'].eval() 251 | models['module'].eval() 252 | uncertainty = torch.tensor([]).to(self.device) 253 | 254 | with torch.no_grad(): 255 | for (inputs, labels,idx) in unlabeled_loader: 256 | inputs = inputs.to(self.device) 257 | # labels = labels.to(self.device) 258 | scores, e1, features = models['backbone'](inputs,intermediate = True) 259 | pred_loss = models['module'](features) # pred_loss = criterion(scores, labels) # ground truth loss 260 | pred_loss = pred_loss.view(pred_loss.size(0)) 261 | 262 | uncertainty = torch.cat((uncertainty, pred_loss), 0) 263 | 264 | return uncertainty.cpu() 265 | 266 | def query(self, n): 267 | idxs_unlabeled = np.arange(self.n_pool)[~self.idxs_lb] 268 | # idxs_unlabeled = idxs_unlabeled[:int(len(idxs_unlabeled)/5)] 269 | np.random.shuffle(idxs_unlabeled) 270 | idxs_unlabeled = idxs_unlabeled[:min(10000,len(idxs_unlabeled))] 271 | unlabeled_loader = DataLoader( 272 | self.handler(self.X[idxs_unlabeled], torch.Tensor(self.Y.numpy()[idxs_unlabeled]).long(), 273 | transform=self.args.transform_te), shuffle=True, 274 | **self.args.loader_tr_args) 275 | models = {'backbone': self.clf, 'module': self.loss_module} 276 | uncertainty = self.get_uncertainty(models, unlabeled_loader) 277 | 278 | # Index in ascending order 279 | arg = np.argsort(uncertainty) 280 | 281 | return idxs_unlabeled[arg[:n]] 282 | 283 | def adjust_learning_rate(optimizer, epoch, gammas, schedule, args): 284 | """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" 285 | "Add by YU" 286 | lr = args.lr 287 | mu = args.momentum 288 | 289 | if args.optimizer != "YF": 290 | assert len(gammas) == len( 291 | schedule), "length of gammas and schedule should be equal" 292 | for (gamma, step) in zip(gammas, schedule): 293 | if (epoch >= step): 294 | lr = lr * gamma 295 | else: 296 | break 297 | for param_group in optimizer.param_groups: 298 | param_group['lr'] = lr 299 | 300 | elif args.optimizer == "YF": 301 | lr = optimizer._lr 302 | mu = optimizer._mu 303 | 304 | return lr, mu 305 | -------------------------------------------------------------------------------- /query_strategies/least_confidence.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .strategy import Strategy 3 | import pdb 4 | class LeastConfidence(Strategy): 5 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args): 6 | super(LeastConfidence, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 7 | 8 | def query(self, n): 9 | idxs_unlabeled = np.arange(self.n_pool)[~self.idxs_lb] 10 | probs = self.predict_prob(self.X[idxs_unlabeled], np.asarray(self.Y)[idxs_unlabeled]) 11 | U = probs.max(1)[0] 12 | return idxs_unlabeled[U.sort()[1][:n]] 13 | -------------------------------------------------------------------------------- /query_strategies/least_confidence_dropout.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from .strategy import Strategy 4 | 5 | class LeastConfidenceDropout(Strategy): 6 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args, n_drop=10): 7 | super(LeastConfidenceDropout, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 8 | self.n_drop = n_drop 9 | 10 | def query(self, n): 11 | idxs_unlabeled = np.arange(self.n_pool)[~self.idxs_lb] 12 | probs = self.predict_prob_dropout(self.X[idxs_unlabeled], self.Y.numpy()[idxs_unlabeled], self.n_drop) 13 | U = probs.max(1)[0] 14 | return idxs_unlabeled[U.sort()[1][:n]] 15 | -------------------------------------------------------------------------------- /query_strategies/margin_sampling.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .strategy import Strategy 3 | import pdb 4 | 5 | class MarginSampling(Strategy): 6 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args): 7 | super(MarginSampling, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 8 | 9 | def query(self, n): 10 | idxs_unlabeled = np.arange(self.n_pool)[~self.idxs_lb] 11 | probs = self.predict_prob(self.X[idxs_unlabeled], self.Y.numpy()[idxs_unlabeled]) 12 | probs_sorted, idxs = probs.sort(descending=True) 13 | U = probs_sorted[:, 0] - probs_sorted[:,1] 14 | return idxs_unlabeled[U.sort()[1].numpy()[:n]] 15 | -------------------------------------------------------------------------------- /query_strategies/margin_sampling_dropout.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from .strategy import Strategy 4 | 5 | class MarginSamplingDropout(Strategy): 6 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args, n_drop=10): 7 | super(MarginSamplingDropout, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 8 | self.n_drop = n_drop 9 | 10 | def query(self, n): 11 | idxs_unlabeled = np.arange(self.n_pool)[~self.idxs_lb] 12 | pdb.set_trace() 13 | probs = self.predict_prob_dropout(self.X[idxs_unlabeled], self.Y[idxs_unlabeled], self.n_drop) 14 | probs_sorted, idxs = probs.sort(descending=True) 15 | U = probs_sorted[:, 0] - probs_sorted[:,1] 16 | return idxs_unlabeled[U.sort()[1][:n]] 17 | -------------------------------------------------------------------------------- /query_strategies/mcadl.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .strategy import Strategy 3 | import pdb 4 | import heapq 5 | import copy 6 | import torch 7 | from torch.utils.data import DataLoader 8 | from sklearn.metrics import pairwise 9 | 10 | # An implementation of the paper: 11 | # Multi-criteria active deep learning for image classification 12 | # Published in Knowledge-Based System, 2019 13 | # Hunan University 14 | def _resize(X,length): 15 | # Smooth augmentation, for robust performance 16 | import PIL 17 | # print(X.shape) 18 | X = PIL.Image.fromarray(X) 19 | X = X.resize((length,length)) 20 | return np.array(X) 21 | 22 | class MCADL(Strategy): 23 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args): 24 | super(MCADL, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 25 | self.alpha_init = 0.9 26 | self.beta_init = 0.9 27 | self.last_acc = [0.0]*self.args.n_class 28 | X_sim = [] 29 | # print(type(self.X_te[0])) 30 | # exit() 31 | for x in X: 32 | X_sim.append(_resize(x,self.args.img_size)) 33 | X_sim = np.array(X_sim) 34 | self.similarity = pairwise.cosine_similarity(X_sim.reshape([len(X), -1]), X_sim.reshape([len(X), -1])) 35 | 36 | def uncertainty(self, proba, flag): 37 | ''' 38 | Input: 39 | @proba: probability for all samples, n_sample x nb_class 40 | @flag: a mark for samples, n_sample x 1, 0 represents unselected 41 | return: 42 | @BvSB: the uncertainty measure 43 | @pse_index: the index of sample to be psesudo labeled 44 | ''' 45 | n = proba.shape[0] 46 | P_index = np.where(np.array(flag)==0) #1xN array 47 | plist = P_index[0] 48 | uncert = -1.0*np.ones((n,),dtype='float64') 49 | for d in plist: 50 | D = proba[d,:] 51 | # decide k 52 | k = 0 53 | while True: 54 | k += 1 55 | lgst = heapq.nlargest(k, D) 56 | if sum(lgst) > 0.5: 57 | break 58 | 59 | Z = np.array(heapq.nlargest(k, D)) 60 | v = np.absolute(Z - Z.mean()).mean() 61 | uncert[d] = 1-v 62 | 63 | return uncert 64 | 65 | def evaluate_each_class(self, X, Y): 66 | ''' 67 | Input: 68 | @X: data 69 | @Y: label 70 | Return: 71 | @Acc: the accuracy of all classes 72 | ''' 73 | Acc = [] 74 | for i in range(self.args.n_class): 75 | # obtain the accuracy on each class 76 | if len(Y[Y==i]) == 0: 77 | acc = 0 78 | else: 79 | acc = self.predict(X[Y==i], Y[Y==i]) 80 | Acc.append(acc) 81 | return Acc 82 | 83 | 84 | def getID(self, pred_label, listID): 85 | ''' 86 | Input: 87 | @pred: the predicted label for the all samples 88 | @listID: the index of k neighbor to current sample 89 | Return: 90 | @maxID: the psesudo label 91 | The psesudo label is decided by the surrounding labels 92 | ''' 93 | pclass=[] 94 | for i in listID: 95 | pclass.append(pred_label[i]) 96 | setClass = set(pclass) 97 | maxID = -1 98 | maxCount =-1 99 | for i in setClass: 100 | if pclass.count(i)>maxCount: 101 | maxID =i 102 | maxCount = pclass.count(i) 103 | # print("knn class:",pclass,"final ",maxID) 104 | return maxID 105 | 106 | def avg_cosine_distance(self, idx, listed): 107 | ''' 108 | Input: 109 | @idx: the index of the sample to be measured density 110 | @listed: the index of the labeled samples 111 | Return: 112 | @The average value of diversity 113 | @m: the index of k neighbor to the sample to be measured density 114 | ''' 115 | 116 | n = len(listed) 117 | sumD = sum(1-self.similarity[idx, listed]) 118 | 119 | #modified 0805 120 | list_value = list(self.similarity[idx, listed]) 121 | kvalue = heapq.nlargest(5, list_value) 122 | m = [] 123 | for i in range(len(kvalue)): 124 | ids = list_value.index(kvalue[i]) 125 | v = listed[ids] 126 | m.append(v) 127 | return 1.0*sumD/n, m 128 | 129 | def getWeightM(self, last_acc, now_acc): 130 | ''' 131 | Input: 132 | @last_acc: the accuracy of last round 133 | @now_acc: the accuracy of current round 134 | Return: 135 | @q: the weights of all classes 136 | ''' 137 | b = 0.5 138 | q=[] 139 | # in the begining, the approach pays more attention to 140 | # samples from classes that have fast performance enhancement 141 | if min(now_acc) < b: 142 | p=copy.deepcopy(now_acc) 143 | s=0 144 | print("first ....") 145 | for i in range(self.args.n_class): 146 | t=p[i]-last_acc[i] 147 | if t<0: 148 | t=0 149 | s+=t 150 | q.append(t) 151 | print("improvement:", q) 152 | if s!=0.0: 153 | for i in range(self.args.n_class): 154 | q[i]/=s 155 | 156 | else: 157 | q=[0.1]*10 158 | else: 159 | # as the performance continues to improve, 160 | # they tends to select samples from the clases with low performance 161 | # to balance the performance among classes 162 | q = [1./i for i in list(now_acc)] 163 | q = [i/sum(q) for i in q] 164 | 165 | print("last_acc:", last_acc) 166 | print("acc:", now_acc) 167 | print("final weight:", q) 168 | return q 169 | 170 | def query(self, n): 171 | class_accs = self.evaluate_each_class(self.X[self.idxs_lb], self.Y[self.idxs_lb]) 172 | weight_classes = self.getWeightM(self.last_acc, class_accs) 173 | self.last_acc = class_accs 174 | 175 | # calculate alpha and beta 176 | AR_t = self.predict(self.X[self.idxs_lb], self.Y[self.idxs_lb]) # average acc on training data 177 | alpha = self.alpha_init * np.exp(-AR_t) 178 | beta = self.beta_init * np.exp(-AR_t) 179 | 180 | proba = self.predict_prob(self.X, self.Y) 181 | 182 | idxs_unlabeled = np.nonzero(~self.idxs_lb)[0] 183 | idxs_labeled = np.nonzero(self.idxs_lb)[0] 184 | # for each sample, calculate its informativeness 185 | # density and similarity 186 | density = [-1.0]*len(self.idxs_lb) 187 | similarity = [-1.0]*len(self.idxs_lb) 188 | for i in list(idxs_unlabeled): 189 | # for an unlabeled sample x_i, we determine its most similar samples x_s 190 | distance, kId = self.avg_cosine_distance(i, idxs_labeled) 191 | # The pesudo label of x_i is deciding by x_s 192 | pse_class = self.getID(proba.max(1)[1], kId) 193 | # cosine distance to the labeled samples belongs to the pesudo label 194 | sample_idxs = idxs_labeled[self.Y[self.idxs_lb] == pse_class] 195 | cosine_distance = self.similarity[i, sample_idxs] 196 | 197 | density[i] = 1 - cosine_distance.mean() 198 | similarity[i] = 1 - cosine_distance.max() 199 | 200 | 201 | # uncertainty of all samples 202 | uncertainty_score = self.uncertainty(proba=proba, flag=self.idxs_lb) 203 | 204 | # label-based measure 205 | label_based_score = [-1.0]*len(self.idxs_lb) 206 | 207 | for i in list(idxs_unlabeled): 208 | # for an unlabeled sample x_i, we determine its most similar samples x_s 209 | distance, kId = self.avg_cosine_distance(i, idxs_labeled) 210 | # The pesudo label of x_i is deciding by x_s 211 | pse_class = self.getID(proba.max(1)[1], kId) 212 | # finally, the info-label value of x_i is equal to w_s 213 | w_classes = weight_classes[pse_class] 214 | # score[i] = BvSB[i,] 215 | label_based_score[i] = w_classes 216 | 217 | 218 | info_data = [0.5*density[i] + 0.5*similarity[i] for i in range(len(density))] 219 | info_model = [beta*uncertainty_score[i] + (1-beta)*label_based_score[i] for i in range(len(label_based_score))] 220 | infomativeness = [alpha*info_data[i] + (1-alpha)*info_model[i] for i in range(len(info_model))] 221 | infomativeness = np.array(infomativeness)[idxs_unlabeled] 222 | 223 | # select the top-n samples 224 | ranked_idx = infomativeness.argsort()[::-1][:n] 225 | 226 | return idxs_unlabeled[ranked_idx] 227 | 228 | -------------------------------------------------------------------------------- /query_strategies/random_sampling.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .strategy import Strategy 3 | import pdb 4 | 5 | class RandomSampling(Strategy): 6 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args): 7 | super(RandomSampling, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 8 | 9 | def query(self, n): 10 | inds = np.where(self.idxs_lb==0)[0] 11 | return inds[np.random.permutation(len(inds))][:n] 12 | -------------------------------------------------------------------------------- /query_strategies/selection_via_proxy.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from .strategy import Strategy 4 | import mymodels 5 | from sklearn import preprocessing 6 | from torch import nn 7 | import sys 8 | import torch 9 | import torch.nn.functional as F 10 | import torch.optim as optim 11 | from torch.autograd import Variable 12 | from torch.utils.data import DataLoader 13 | from copy import deepcopy 14 | from utils import time_string, AverageMeter, RecorderMeter, convert_secs2time, adjust_learning_rate 15 | import time 16 | 17 | # Implementation of the paper: Selection via Proxy: Efficient Data Selection for Deep Learning 18 | # Published in ICLR'2020 19 | # Code: https://github.com/stanford-futuredata/selection-via-proxy/blob/master/svp/cifar/active.py 20 | 21 | 22 | 23 | class Proxy(Strategy): 24 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args): 25 | super(Proxy, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 26 | self.proxy_model = mymodels.__dict__[args.proxy_model](n_class=args.n_class) 27 | 28 | def _train(self, epoch, loader_tr, optimizer): 29 | model = self.proxy_model if self.args.proxy_model is not None else self.clf 30 | 31 | model.train() 32 | 33 | accFinal = 0. 34 | train_loss = 0. 35 | for batch_idx, (x, y, idxs) in enumerate(loader_tr): 36 | x, y = x.to(self.device), y.to(self.device) 37 | nan_mask = torch.isnan(x) 38 | if nan_mask.any(): 39 | raise RuntimeError(f"Found NAN in input indices: ", nan_mask.nonzero()) 40 | 41 | # exit() 42 | optimizer.zero_grad() 43 | 44 | out, e1 = model(x) 45 | nan_mask_out = torch.isnan(y) 46 | if nan_mask_out.any(): 47 | raise RuntimeError(f"Found NAN in output indices: ", nan_mask.nonzero()) 48 | 49 | loss = F.cross_entropy(out, y) 50 | 51 | train_loss += loss.item() 52 | accFinal += torch.sum((torch.max(out,1)[1] == y).float()).data.item() 53 | 54 | loss.backward() 55 | 56 | # clamp gradients, just in case 57 | for p in filter(lambda p: p.grad is not None, model.parameters()): p.grad.data.clamp_(min=-.1, max=.1) 58 | 59 | optimizer.step() 60 | 61 | if batch_idx % 10 == 0: 62 | print ("[Batch={:03d}] [Loss={:.2f}]".format(batch_idx, loss)) 63 | 64 | return accFinal / len(loader_tr.dataset.X), train_loss 65 | 66 | 67 | def train(self, alpha=0.1, n_epoch=10): 68 | n_query = int(self.args.nQuery*len(self.Y)/100) 69 | if self.idxs_lb.sum() + n_query > (self.args.nEnd*len(self.Y)/100): 70 | # the last round, we use the original model for training 71 | self.args.proxy_model = None 72 | 73 | model = self.proxy_model if self.args.proxy_model is not None else self.clf 74 | 75 | # train the proxy model for query 76 | def weight_reset(m): 77 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): 78 | m.reset_parameters() 79 | 80 | model = model.apply(weight_reset) 81 | model = nn.DataParallel(model).to(self.device) 82 | parameters = model.parameters() 83 | optimizer = optim.SGD(parameters, lr = self.args.lr, weight_decay=5e-4, momentum=self.args.momentum) 84 | 85 | idxs_train = np.arange(self.n_pool)[self.idxs_lb] 86 | 87 | 88 | epoch_time = AverageMeter() 89 | recorder = RecorderMeter(n_epoch) 90 | epoch = 0 91 | train_acc = 0. 92 | previous_loss = 0. 93 | if idxs_train.shape[0] != 0: 94 | transform = self.args.transform_tr 95 | 96 | loader_tr = DataLoader(self.handler(self.X[idxs_train], 97 | torch.Tensor(self.Y.numpy()[idxs_train]).long(), 98 | transform=transform), shuffle=True, 99 | **self.args.loader_tr_args) 100 | 101 | for epoch in range(n_epoch): 102 | ts = time.time() 103 | current_learning_rate, _ = adjust_learning_rate(optimizer, epoch, self.args.gammas, self.args.schedule, self.args) 104 | 105 | # Display simulation time 106 | need_hour, need_mins, need_secs = convert_secs2time(epoch_time.avg * (n_epoch - epoch)) 107 | need_time = '[{} Need: {:02d}:{:02d}:{:02d}]'.format(self.args.strategy, need_hour, need_mins, need_secs) 108 | 109 | # train one epoch 110 | train_acc, train_los = self._train_proxy(epoch, loader_tr, optimizer) 111 | test_acc = self.predict(self.X_te, self.Y_te) 112 | 113 | # measure elapsed time 114 | epoch_time.update(time.time() - ts) 115 | 116 | print('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [LR={:6.4f}]'.format(time_string(), epoch, n_epoch, 117 | need_time, current_learning_rate 118 | ) \ 119 | + ' [Best : Test Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 120 | 1. - recorder.max_accuracy(False))) 121 | 122 | 123 | recorder.update(epoch, train_los, train_acc, 0, test_acc) 124 | 125 | # The converge condition 126 | if abs(previous_loss - train_los) < 0.0001: 127 | break 128 | else: 129 | previous_loss = train_los 130 | 131 | model = model.module 132 | best_train_acc = recorder.max_accuracy(istrain=True) 133 | return best_train_acc 134 | 135 | 136 | def predict(self, X, Y): 137 | model = self.proxy_model if self.args.proxy_model is not None else self.clf 138 | transform=self.args.transform_te 139 | if type(X) is np.ndarray: 140 | loader_te = DataLoader(self.handler(X, Y, transform=transform), 141 | shuffle=False, **self.args.loader_te_args) 142 | else: 143 | loader_te = DataLoader(self.handler(X.numpy(), Y, transform=transform), 144 | shuffle=False, **self.args.loader_te_args) 145 | 146 | self.clf.eval() 147 | 148 | correct = 0 149 | with torch.no_grad(): 150 | for x, y, idxs in loader_te: 151 | x, y = x.to(self.device), y.to(self.device) 152 | out, e1 = model(x) 153 | pred = out.max(1)[1] 154 | correct += (y == pred).sum().item() 155 | 156 | test_acc = 1. * correct / len(Y) 157 | 158 | return test_acc 159 | 160 | def predict_prob(self, X, Y): 161 | model = self.proxy_model 162 | 163 | transform = self.args.transform_te 164 | loader_te = DataLoader(self.handler(X, Y, 165 | transform=transform), shuffle=False, **self.args.loader_te_args) 166 | self.clf.eval() 167 | 168 | probs = torch.zeros([len(Y), len(np.unique(self.Y))]) 169 | with torch.no_grad(): 170 | for x, y, idxs in loader_te: 171 | x, y = x.to(self.device), y.to(self.device) 172 | out, e1 = model(x) 173 | prob = F.softmax(out, dim=1) 174 | probs[idxs] = prob.cpu().data 175 | 176 | return probs 177 | 178 | def query(self, n): 179 | idxs_unlabeled = np.arange(self.n_pool)[~self.idxs_lb] 180 | probs = self.predict_prob(self.X[idxs_unlabeled], self.Y.numpy()[idxs_unlabeled]) 181 | log_probs = torch.log(probs) 182 | U = (probs*log_probs).sum(1) 183 | return idxs_unlabeled[U.sort()[1][:n]] 184 | 185 | 186 | -------------------------------------------------------------------------------- /query_strategies/semi_pseudolabel.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.optim as optim 4 | from torch.utils.data import DataLoader 5 | from torchvision import transforms 6 | import torch.nn.functional as F 7 | import os 8 | import numpy as np 9 | import time 10 | from .strategy import Strategy 11 | from utils import time_string, AverageMeter, RecorderMeter, convert_secs2time, adjust_learning_rate 12 | from .aug_uda import TransformUDA 13 | from copy import deepcopy 14 | 15 | unsup_ratio = 7 16 | p_cutoff = 0.95 17 | 18 | 19 | class TransformWeak(object): 20 | def __init__(self, mean, std, size): 21 | self.weak = transforms.Compose([ 22 | transforms.RandomHorizontalFlip(), 23 | transforms.RandomCrop(size=size, 24 | padding=int(size * 0.125), 25 | padding_mode='reflect')]) 26 | self.normalize = transforms.Compose([ 27 | transforms.ToTensor(), 28 | transforms.Normalize(mean=mean, std=std)]) 29 | 30 | def __call__(self, x): 31 | weak = self.weak(x) 32 | return self.normalize(weak) 33 | 34 | 35 | class pseudolabel(Strategy): 36 | """ 37 | Our omplementation of the paper: Unsupervised Data Augmentation for Consistency Training 38 | https://arxiv.org/pdf/1904.12848.pdf 39 | Google Research, Brain Team, 2 Carnegie Mellon University 40 | """ 41 | 42 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args): 43 | super(pseudolabel, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 44 | 45 | def query(self, n): 46 | """ 47 | n: number of data to query 48 | return the index of the selected data 49 | """ 50 | # TO DO: Query the unlabeled data 51 | inds = np.where(self.idxs_lb == 0)[0] 52 | # Notice: the returned index should be referenced to the whole training set 53 | return inds[np.random.permutation(len(inds))][:n] 54 | 55 | def _train(self, epoch, loader_tr_labeled, loader_tr_unlabeled, optimizer): 56 | self.clf.train() 57 | accFinal = 0. 58 | train_loss = 0. 59 | iter_unlabeled = iter(loader_tr_unlabeled) 60 | for batch_idx, (x, y, idxs) in enumerate(loader_tr_labeled): 61 | y = y.to(self.device) 62 | try: 63 | (inputs_u), _, idx = next(iter_unlabeled) 64 | except StopIteration: 65 | iter_unlabeled = iter(loader_tr_unlabeled) 66 | (inputs_u), _, idx = next(iter_unlabeled) 67 | 68 | 69 | logits_x_lb, _ = self.clf(x) 70 | logits_x_ulb_w, _ = self.clf(inputs_u) 71 | loss = F.nll_loss(F.log_softmax(logits_x_lb, dim=-1), y, reduction='mean') # loss for supervised learning 72 | 73 | pseudo_label = torch.softmax(logits_x_ulb_w, dim=-1) 74 | max_probs, max_idx = torch.max(pseudo_label, dim=-1) 75 | mask = max_probs.ge(p_cutoff).float() 76 | unsup_loss = (F.nll_loss(F.log_softmax(logits_x_ulb_w, dim=-1), max_idx.detach(), reduction='none') * mask).mean() 77 | 78 | loss += unsup_loss 79 | train_loss += loss.item() 80 | accFinal += torch.sum((torch.max(logits_x_lb, 1)[1] == y).float()).data.item() 81 | 82 | # exit() 83 | optimizer.zero_grad() 84 | loss.backward() 85 | 86 | # clamp gradients, just in case 87 | for p in filter(lambda p: p.grad is not None, self.clf.parameters()): p.grad.data.clamp_(min=-.1, max=.1) 88 | 89 | optimizer.step() 90 | 91 | if batch_idx % 10 == 0: 92 | print("[Batch={:03d}] [Loss={:.2f}]".format(batch_idx, loss)) 93 | 94 | return accFinal / len(loader_tr_labeled.dataset.X), train_loss 95 | 96 | def train(self, alpha=0.1, n_epoch=10): 97 | self.clf = deepcopy(self.net) 98 | print("Let's use", torch.cuda.device_count(), "GPUs!") 99 | self.clf = nn.DataParallel(self.clf).to(self.device) 100 | parameters = self.clf.parameters() 101 | optimizer = optim.SGD(parameters, lr=self.args.lr, weight_decay=5e-4, momentum=self.args.momentum) 102 | 103 | idxs_train = np.arange(self.n_pool)[self.idxs_lb] 104 | idxs_unlabeled = np.arange(self.n_pool)[~self.idxs_lb] 105 | 106 | epoch_time = AverageMeter() 107 | recorder = RecorderMeter(n_epoch) 108 | previous_loss = 0. 109 | 110 | if idxs_train.shape[0] != 0: 111 | transform = self.args.transform_tr 112 | 113 | train_data_labeled = self.handler(self.X[idxs_train], 114 | torch.Tensor(self.Y.numpy()[idxs_train]).long(), 115 | transform=transform) 116 | loader_tr_labeled = DataLoader(train_data_labeled, 117 | shuffle=True, 118 | pin_memory=True, 119 | # sampler = DistributedSampler(train_data), 120 | worker_init_fn=self.seed_worker, 121 | generator=self.g, 122 | **{'batch_size': 250, 'num_workers': 1}) 123 | if idxs_unlabeled.shape[0] != 0: 124 | mean = self.args.normalize['mean'] 125 | std = self.args.normalize['std'] 126 | train_data_unlabeled = self.handler(self.X[idxs_unlabeled], 127 | torch.Tensor(self.Y.numpy()[idxs_unlabeled]).long(), 128 | transform=TransformWeak(mean=mean, std=std, size=self.args.img_size)) 129 | loader_tr_unlabeled = DataLoader(train_data_unlabeled, 130 | shuffle=True, 131 | pin_memory=True, 132 | # sampler = DistributedSampler(train_data), 133 | worker_init_fn=self.seed_worker, 134 | generator=self.g, 135 | **{'batch_size': int(250 * unsup_ratio), 'num_workers': 1}) 136 | for epoch in range(n_epoch): 137 | ts = time.time() 138 | current_learning_rate, _ = adjust_learning_rate(optimizer, epoch, self.args.gammas, self.args.schedule, 139 | self.args) 140 | 141 | # Display simulation time 142 | need_hour, need_mins, need_secs = convert_secs2time(epoch_time.avg * (n_epoch - epoch)) 143 | need_time = '[{} Need: {:02d}:{:02d}:{:02d}]'.format(self.args.strategy, need_hour, need_mins, 144 | need_secs) 145 | 146 | # train one epoch 147 | train_acc, train_los = self._train(epoch, loader_tr_labeled, loader_tr_unlabeled, optimizer) 148 | test_acc = self.predict(self.X_te, self.Y_te) 149 | # measure elapsed time 150 | epoch_time.update(time.time() - ts) 151 | print('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [LR={:6.4f}]'.format(time_string(), epoch, n_epoch, 152 | need_time, current_learning_rate 153 | ) \ 154 | + ' [Best : Test Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 155 | 1. - recorder.max_accuracy(False))) 156 | 157 | recorder.update(epoch, train_los, train_acc, 0, test_acc) 158 | 159 | # The converge condition 160 | if abs(previous_loss - train_los) < 0.0005: 161 | break 162 | else: 163 | previous_loss = train_los 164 | if self.args.save_model: 165 | self.save_model() 166 | recorder.plot_curve(os.path.join(self.args.save_path, self.args.dataset)) 167 | self.clf = self.clf.module 168 | # self.save_tta_values(self.get_tta_values()) 169 | 170 | 171 | best_test_acc = recorder.max_accuracy(istrain=False) 172 | return best_test_acc 173 | -------------------------------------------------------------------------------- /query_strategies/ssl_consistency.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .semi_strategy import semi_Strategy 3 | import torch.nn.functional as F 4 | from torch.utils.data import DataLoader 5 | import torch 6 | from torch.autograd import Variable 7 | 8 | class TransformFive: 9 | def __init__(self, transform): 10 | self.transform = transform 11 | 12 | def __call__(self, inp): 13 | transform_list = [] 14 | for i in range(50): 15 | transform_list.append(self.transform(inp)) 16 | return transform_list 17 | 18 | class TransformFifty: 19 | def __init__(self, transform): 20 | self.transform = transform 21 | 22 | def __call__(self, inp): 23 | transform_list = [] 24 | for i in range(50): 25 | transform_list.append(self.transform(inp)) 26 | return transform_list 27 | 28 | class ssl_Consistency(semi_Strategy): 29 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args): 30 | super(ssl_Consistency, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 31 | 32 | def predict_consistency_aug(self, X, Y): 33 | 34 | loader_consistency = DataLoader(self.handler(X, Y, transform=TransformFive(self.args.transform_tr)), shuffle=False, **self.args.loader_te_args) 35 | 36 | self.ema_model.eval() 37 | 38 | consistency = np.zeros([len(Y)]) 39 | with torch.no_grad(): 40 | for x, y, idxs in loader_consistency: 41 | probs = np.zeros([len(x), y.size(0),len(np.unique(self.Y))]) 42 | for i, xi in enumerate(x): 43 | out1, e1 = self.ema_model(xi.to(self.device)) 44 | prob = torch.softmax(out1, dim=1).cpu() 45 | probs[i] = prob 46 | 47 | consistency[idxs] = probs.var(0).sum(1) 48 | 49 | return consistency 50 | 51 | def query(self,k): 52 | idxs_unlabeled = np.arange(self.n_pool)[~self.idxs_lb] 53 | # print(print(len(set(idxs_unlabeled))), idxs_unlabeled) 54 | consistency = self.predict_consistency_aug(self.X[idxs_unlabeled], self.Y.numpy()[idxs_unlabeled]) 55 | idxs = consistency.argsort() 56 | # print(idxs) 57 | # print(len(set(idxs[:k])),len(idxs[:k]),len(set(idxs_unlabeled)), idxs_unlabeled[idxs[:k]]) 58 | return idxs_unlabeled[idxs[-k:]] 59 | 60 | 61 | -------------------------------------------------------------------------------- /query_strategies/ssl_diff2augdirect.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .semi_strategy import semi_Strategy 3 | import torch.nn.functional as F 4 | from torch.utils.data import DataLoader 5 | import torch 6 | from torch.autograd import Variable 7 | 8 | class TransformTwice: 9 | def __init__(self, transform1,transform2): 10 | self.transform1 = transform1 11 | self.transform2 = transform2 12 | 13 | def __call__(self, inp): 14 | out1 = self.transform1(inp) 15 | out2 = self.transform2(inp) 16 | return out1, out2 17 | 18 | class ssl_Diff2AugDirect(semi_Strategy): 19 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args): 20 | super(ssl_Diff2AugDirect, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 21 | 22 | def predict_prob_aug(self, X, Y): 23 | 24 | loader_te = DataLoader(self.handler(X, Y, transform=TransformTwice(self.args.transform_te, self.args.transform_tr)), shuffle=False, **self.args.loader_te_args) 25 | 26 | self.ema_model.eval() 27 | 28 | probs = torch.zeros([len(Y), len(np.unique(self.Y))]).to(self.device) 29 | with torch.no_grad(): 30 | for x, y, idxs in loader_te: 31 | x1, x2, y = x[0].to(self.device), x[1].to(self.device), y.to(self.device) 32 | out1, e1 = self.ema_model(x1) 33 | out2, e1 = self.ema_model(x2) 34 | probs[idxs] = (torch.softmax(out1, dim=1) + torch.softmax(out2, dim=1)) / 2 35 | 36 | return probs.cpu() 37 | 38 | def margin_data(self,k): 39 | idxs_unlabeled = np.arange(self.n_pool)[~self.idxs_lb] 40 | probs = self.predict_prob_aug(self.X[idxs_unlabeled], self.Y.numpy()[idxs_unlabeled]) 41 | probs_sorted, idxs = probs.sort(descending=True) 42 | U = probs_sorted[:, 0] - probs_sorted[:,1] 43 | return idxs_unlabeled[U.sort()[1].numpy()[:k]] 44 | 45 | def query(self, k): 46 | index = self.margin_data(k) 47 | return index 48 | 49 | -------------------------------------------------------------------------------- /query_strategies/ssl_diff2augkmeans.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .semi_strategy import semi_Strategy 3 | import torch.nn.functional as F 4 | from torch.utils.data import DataLoader 5 | import torch 6 | from torch.autograd import Variable 7 | from sklearn.cluster import KMeans 8 | 9 | class TransformTwice: 10 | def __init__(self, transform1,transform2): 11 | self.transform1 = transform1 12 | self.transform2 = transform2 13 | 14 | def __call__(self, inp): 15 | out1 = self.transform1(inp) 16 | out2 = self.transform2(inp) 17 | return out1, out2 18 | 19 | class ssl_Diff2AugKmeans(semi_Strategy): 20 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args): 21 | super(ssl_Diff2AugKmeans, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 22 | 23 | def prepare_emb(self): 24 | loader_te = DataLoader(self.handler(self.X, self.Y, transform=TransformTwice(self.args.transform_te, self.args.transform_tr )), shuffle=False, **self.args.loader_te_args) 25 | self.ema_model.eval() 26 | create = True 27 | with torch.no_grad(): 28 | for x, y, idxs in loader_te: 29 | x, y = Variable(x[0].to(self.device)), Variable(y.to(self.device)) 30 | out, emb, feature = self.ema_model(x,intermediate = True) # resnet emb from last avg pool 31 | if create: 32 | create = False 33 | emb_list = torch.zeros([len(self.Y), len(feature[-1].view(out.size(0), -1)[1])]) 34 | emb_list[idxs] = feature[-1].view(out.size(0), -1)[1].cpu().data 35 | return np.array(emb_list) 36 | 37 | def predict_prob_aug(self, X, Y): 38 | 39 | loader_te = DataLoader(self.handler(X, Y, transform=TransformTwice(self.args.transform_te, self.args.transform_tr)), shuffle=False, **self.args.loader_te_args) 40 | 41 | self.ema_model.eval() 42 | 43 | probs = torch.zeros([len(Y), len(np.unique(self.Y))]).to(self.device) 44 | with torch.no_grad(): 45 | for x, y, idxs in loader_te: 46 | x1, x2, y = x[0].to(self.device), x[1].to(self.device), y.to(self.device) 47 | out1, e1 = self.ema_model(x1) 48 | out2, e1 = self.ema_model(x2) 49 | probs[idxs] = (torch.softmax(out1, dim=1) + torch.softmax(out2, dim=1)) / 2 50 | 51 | return probs.cpu() 52 | 53 | def margin_data(self): 54 | idxs_unlabeled = np.arange(self.n_pool)[~self.idxs_lb] 55 | probs = self.predict_prob_aug(self.X[idxs_unlabeled], self.Y.numpy()[idxs_unlabeled]) 56 | probs_sorted, idxs = probs.sort(descending=True) 57 | U = probs_sorted[:, 0] - probs_sorted[:,1] 58 | return idxs_unlabeled[U.sort()[1].numpy()] 59 | 60 | def query(self, k): 61 | self.emb_list = self.prepare_emb() 62 | self.Kmeans_list = KMeans(n_clusters=20).fit(self.emb_list) 63 | margin_sorted_index = self.margin_data() 64 | index = self.diff2_aug_kmeans(margin_sorted_index, self.Kmeans_list.labels_, k) 65 | 66 | return index 67 | 68 | def diff2_aug_kmeans(self, unlabeled_index, Kmeans_list, k): 69 | cluster_list = [] 70 | for i in range(20): 71 | cluster = [] 72 | cluster_list.append(cluster) 73 | 74 | # unlabeled_index is sorted by margin score, pop(0) is the most uncertain one 75 | 76 | for real_idx in unlabeled_index: 77 | cluster_list[Kmeans_list[real_idx]].append(real_idx) 78 | 79 | index_select = [] 80 | total_selected = k 81 | 82 | for cluster_index in range(len(cluster_list)): 83 | num_select_by_propotion = total_selected * len(cluster_list[cluster_index]) / len(unlabeled_index) 84 | for i in range(int(num_select_by_propotion)): 85 | index_select.append(cluster_list[cluster_index].pop(0)) 86 | k -= 1 87 | 88 | cluster_index = 0 89 | 90 | # int后可能不足k个,补足 91 | while k > 0: 92 | if len(cluster_list[cluster_index]) > 0: 93 | index_select.append(cluster_list[cluster_index].pop(0)) 94 | k -= 1 95 | if cluster_index < len(cluster_list) - 1: 96 | cluster_index += 1 97 | else: 98 | cluster_index = 0 99 | 100 | return index_select 101 | -------------------------------------------------------------------------------- /query_strategies/ssl_least_confidence.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .semi_strategy import semi_Strategy 3 | import pdb 4 | class ssl_LC(semi_Strategy): 5 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args): 6 | super(ssl_LC, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 7 | 8 | def query(self, n): 9 | idxs_unlabeled = np.arange(self.n_pool)[~self.idxs_lb] 10 | probs = self.predict_prob(self.X[idxs_unlabeled], np.asarray(self.Y)[idxs_unlabeled]) 11 | U = probs.max(1)[0] 12 | return idxs_unlabeled[U.sort()[1][:n]] 13 | -------------------------------------------------------------------------------- /query_strategies/ssl_rand.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .semi_strategy import semi_Strategy 3 | 4 | class ssl_Random(semi_Strategy): 5 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args): 6 | super(ssl_Random, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 7 | 8 | def query(self, n): 9 | inds = np.where(self.idxs_lb==0)[0] 10 | return inds[np.random.permutation(len(inds))][:n] 11 | -------------------------------------------------------------------------------- /query_strategies/strategy.py: -------------------------------------------------------------------------------- 1 | from joblib.externals.cloudpickle.cloudpickle import instance 2 | import numpy as np 3 | import random 4 | from sklearn import preprocessing 5 | from torch import nn 6 | import sys, os 7 | import torch 8 | import torch.nn.functional as F 9 | import torch.optim as optim 10 | from torch.autograd import Variable 11 | from torch.utils.data import DataLoader 12 | from copy import deepcopy 13 | from utils import time_string, AverageMeter, RecorderMeter, convert_secs2time, adjust_learning_rate 14 | import time 15 | from torchvision.utils import save_image 16 | from tqdm import tqdm 17 | from .util import AugMixDataset 18 | from sklearn.metrics import pairwise_distances 19 | class Strategy: 20 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args): 21 | self.X = X # vector 22 | self.Y = Y 23 | self.X_te = X_te 24 | self.Y_te = Y_te 25 | 26 | self.idxs_lb = idxs_lb # bool type 27 | self.handler = handler 28 | self.args = args 29 | self.n_pool = len(Y) 30 | self.device = 'cuda' if torch.cuda.is_available() else 'cpu' 31 | 32 | self.net = net.to(self.device) 33 | self.clf = deepcopy(net.to(self.device)) 34 | 35 | # for reproducibility 36 | self.g = torch.Generator() 37 | self.g.manual_seed(0) 38 | 39 | def seed_worker(self, worker_id): 40 | """ 41 | To preserve reproducibility when num_workers > 1 42 | """ 43 | # https://pytorch.org/docs/stable/notes/randomness.html 44 | worker_seed = torch.initial_seed() % 2**32 45 | np.random.seed(worker_seed) 46 | random.seed(worker_seed) 47 | 48 | 49 | def query(self, n): 50 | pass 51 | 52 | def update(self, idxs_lb): 53 | self.idxs_lb = idxs_lb 54 | 55 | def _train(self, epoch, loader_tr, optimizer): 56 | self.clf.train() 57 | 58 | accFinal = 0. 59 | train_loss = 0. 60 | for batch_idx, (x, y, idxs) in enumerate(loader_tr): 61 | x, y = x.to(self.device), y.to(self.device) 62 | nan_mask = torch.isnan(x) 63 | if nan_mask.any(): 64 | raise RuntimeError(f"Found NAN in input indices: ", nan_mask.nonzero()) 65 | 66 | # exit() 67 | optimizer.zero_grad() 68 | 69 | out, e1 = self.clf(x) 70 | nan_mask_out = torch.isnan(y) 71 | if nan_mask_out.any(): 72 | raise RuntimeError(f"Found NAN in output indices: ", nan_mask.nonzero()) 73 | 74 | loss = F.cross_entropy(out, y) 75 | 76 | train_loss += loss.item() 77 | accFinal += torch.sum((torch.max(out,1)[1] == y).float()).data.item() 78 | 79 | loss.backward() 80 | 81 | # clamp gradients, just in case 82 | for p in filter(lambda p: p.grad is not None, self.clf.parameters()): p.grad.data.clamp_(min=-.1, max=.1) 83 | 84 | optimizer.step() 85 | 86 | if batch_idx % 10 == 0: 87 | print ("[Batch={:03d}] [Loss={:.2f}]".format(batch_idx, loss)) 88 | 89 | return accFinal / len(loader_tr.dataset.X), train_loss 90 | 91 | 92 | def train(self, alpha=0.1, n_epoch=10): 93 | self.clf = deepcopy(self.net) 94 | # if torch.cuda.device_count() > 1: 95 | print("Let's use", torch.cuda.device_count(), "GPUs!") 96 | # self.clf = nn.parallel.DistributedDataParallel(self.clf, 97 | # find_unused_parameters=True, 98 | # ) 99 | self.clf = nn.DataParallel(self.clf).to(self.device) 100 | parameters = self.clf.parameters() 101 | optimizer = optim.SGD(parameters, lr = self.args.lr, weight_decay=5e-4, momentum=self.args.momentum) 102 | 103 | idxs_train = np.arange(self.n_pool)[self.idxs_lb] 104 | 105 | 106 | epoch_time = AverageMeter() 107 | recorder = RecorderMeter(n_epoch) 108 | epoch = 0 109 | train_acc = 0. 110 | best_test_acc = 0. 111 | if idxs_train.shape[0] != 0: 112 | transform = self.args.transform_tr 113 | 114 | train_data = self.handler(self.X[idxs_train], 115 | torch.Tensor(self.Y[idxs_train]).long() if type(self.Y) is np.ndarray else torch.Tensor(self.Y.numpy()[idxs_train]).long(), 116 | transform=transform) 117 | 118 | loader_tr = DataLoader(train_data, 119 | shuffle=True, 120 | pin_memory=True, 121 | # sampler = DistributedSampler(train_data), 122 | worker_init_fn=self.seed_worker, 123 | generator=self.g, 124 | **self.args.loader_tr_args) 125 | for epoch in range(n_epoch): 126 | ts = time.time() 127 | current_learning_rate, _ = adjust_learning_rate(optimizer, epoch, self.args.gammas, self.args.schedule, self.args) 128 | 129 | # Display simulation time 130 | need_hour, need_mins, need_secs = convert_secs2time(epoch_time.avg * (n_epoch - epoch)) 131 | need_time = '[{} Need: {:02d}:{:02d}:{:02d}]'.format(self.args.strategy, need_hour, need_mins, need_secs) 132 | 133 | # train one epoch 134 | train_acc, train_los = self._train(epoch, loader_tr, optimizer) 135 | test_acc = self.predict(self.X_te, self.Y_te) 136 | # measure elapsed time 137 | epoch_time.update(time.time() - ts) 138 | print('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [LR={:6.4f}]'.format(time_string(), epoch, n_epoch, 139 | need_time, current_learning_rate 140 | ) \ 141 | + ' [Best : Test Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 142 | 1. - recorder.max_accuracy(False))) 143 | recorder.update(epoch, train_los, train_acc, 0, test_acc) 144 | 145 | if self.args.save_model and test_acc > best_test_acc: 146 | best_test_acc = test_acc 147 | self.save_model() 148 | recorder.plot_curve(os.path.join(self.args.save_path, self.args.dataset)) 149 | self.clf = self.clf.module 150 | 151 | best_test_acc = recorder.max_accuracy(istrain=False) 152 | return best_test_acc 153 | 154 | 155 | def predict(self, X, Y): 156 | transform=self.args.transform_te 157 | loader_te = DataLoader(self.handler(X, Y, transform=transform), pin_memory=True, 158 | shuffle=False, **self.args.loader_te_args) 159 | 160 | self.clf.eval() 161 | 162 | correct = 0 163 | with torch.no_grad(): 164 | for x, y, idxs in loader_te: 165 | x, y = x.to(self.device), y.to(self.device) 166 | out, e1 = self.clf(x) 167 | pred = out.max(1)[1] 168 | correct += (y == pred).sum().item() 169 | 170 | test_acc = 1. * correct / len(Y) 171 | 172 | return test_acc 173 | 174 | def get_prediction(self, X, Y): 175 | transform=self.args.transform_te 176 | loader_te = DataLoader(self.handler(X, Y, transform=transform), pin_memory=True, 177 | shuffle=False, **self.args.loader_te_args) 178 | 179 | P = torch.zeros(len(X)).long().to(self.device) 180 | 181 | self.clf.eval() 182 | 183 | 184 | correct = 0 185 | with torch.no_grad(): 186 | for x, y, idxs in loader_te: 187 | x, y = x.to(self.device), y.to(self.device) 188 | out, e1 = self.clf(x) 189 | pred = out.max(1)[1] 190 | P[idxs] = pred 191 | correct += (y == pred).sum().item() 192 | 193 | return P 194 | 195 | def predict_prob(self, X, Y): 196 | transform = self.args.transform_te 197 | loader_te = DataLoader(self.handler(X, Y, 198 | transform=transform), shuffle=False, pin_memory=True, **self.args.loader_te_args) 199 | 200 | self.clf.eval() 201 | 202 | probs = torch.zeros([len(Y), len(np.unique(self.Y))]) 203 | with torch.no_grad(): 204 | for x, y, idxs in loader_te: 205 | x, y = x.to(self.device), y.to(self.device) 206 | out, e1 = self.clf(x) 207 | prob = F.softmax(out, dim=1) 208 | probs[idxs] = prob.cpu().data 209 | 210 | return probs 211 | 212 | def predict_prob_dropout(self, X, Y, n_drop): 213 | transform = self.args.transform_te 214 | loader_te = DataLoader(self.handler(X, Y, transform=transform), pin_memory=True, 215 | shuffle=False, **self.args.loader_te_args) 216 | 217 | self.clf.train() 218 | 219 | probs = torch.zeros([len(Y), len(np.unique(Y))]) 220 | with torch.no_grad(): 221 | for i in range(n_drop): 222 | print('n_drop {}/{}'.format(i+1, n_drop)) 223 | for x, y, idxs in loader_te: 224 | x, y = x.to(self.device), y.to(self.device) 225 | out, e1 = self.clf(x) 226 | prob = F.softmax(out, dim=1) 227 | probs[idxs] += prob.cpu().data 228 | probs /= n_drop 229 | 230 | return probs 231 | 232 | def predict_prob_dropout_split(self, X, Y, n_drop): 233 | transform = self.args.transform_te 234 | loader_te = DataLoader(self.handler(X, Y, transform=transform), pin_memory=True, 235 | shuffle=False, **self.args.loader_te_args) 236 | 237 | self.clf.train() 238 | 239 | probs = torch.zeros([n_drop, len(Y), len(np.unique(Y))]) 240 | with torch.no_grad(): 241 | for i in range(n_drop): 242 | print('n_drop {}/{}'.format(i+1, n_drop)) 243 | for x, y, idxs in loader_te: 244 | x, y = x.to(self.device), y.to(self.device) 245 | out, e1 = self.clf(x) 246 | probs[i][idxs] += F.softmax(out, dim=1).cpu().data 247 | return probs 248 | 249 | def get_embedding(self, X, Y): 250 | """ get last layer embedding from current model""" 251 | transform = self.args.transform_te 252 | loader_te = DataLoader(self.handler(X, Y, transform=transform), pin_memory=True, 253 | shuffle=False, **self.args.loader_te_args) 254 | 255 | self.clf.eval() 256 | 257 | embedding = torch.zeros([len(Y), 258 | self.clf.module.get_embedding_dim() if isinstance(self.clf, nn.DataParallel) 259 | else self.clf.get_embedding_dim()]) 260 | with torch.no_grad(): 261 | for x, y, idxs in loader_te: 262 | x, y = x.to(self.device), y.to(self.device) 263 | out, e1 = self.clf(x) 264 | embedding[idxs] = e1.data.cpu().float() 265 | 266 | return embedding 267 | 268 | 269 | def get_grad_embedding(self, X, Y): 270 | """ gradient embedding (assumes cross-entropy loss) of the last layer""" 271 | transform = self.args.transform_te 272 | 273 | model = self.clf 274 | if isinstance(model, nn.DataParallel): 275 | model = model.module 276 | embDim = model.get_embedding_dim() 277 | model.eval() 278 | nLab = len(np.unique(Y)) 279 | embedding = np.zeros([len(Y), embDim * nLab]) 280 | loader_te = DataLoader(self.handler(X, Y, transform=transform), pin_memory=True, 281 | shuffle=False, **self.args.loader_te_args) 282 | with torch.no_grad(): 283 | for x, y, idxs in loader_te: 284 | x, y = x.to(self.device), y.to(self.device) 285 | cout, out = self.clf(x) 286 | out = out.data.cpu().numpy() 287 | batchProbs = F.softmax(cout, dim=1).data.cpu().numpy() 288 | maxInds = np.argmax(batchProbs,1) 289 | for j in range(len(y)): 290 | for c in range(nLab): 291 | if c == maxInds[j]: 292 | embedding[idxs[j]][embDim * c : embDim * (c+1)] = deepcopy(out[j]) * (1 - batchProbs[j][c]) 293 | else: 294 | embedding[idxs[j]][embDim * c : embDim * (c+1)] = deepcopy(out[j]) * (-1 * batchProbs[j][c]) 295 | return torch.Tensor(embedding) 296 | 297 | def save_model(self): 298 | # save model and selected index 299 | save_path = os.path.join(self.args.save_path,self.args.dataset+'_checkpoint') 300 | if not os.path.isdir(save_path): 301 | os.makedirs(save_path) 302 | labeled = len(np.arange(self.n_pool)[self.idxs_lb]) 303 | labeled_percentage = '%.1f'%float(100*labeled/len(self.X)) 304 | torch.save(self.clf, os.path.join(save_path, self.args.strategy+'_'+self.args.model+'_'+labeled_percentage+'_'+str(self.args.seed)+'.pkl')) 305 | print('save to ',os.path.join(save_path, self.args.strategy+'_'+self.args.model+'_'+labeled_percentage+'_'+str(self.args.seed)+'.pkl')) 306 | path = os.path.join(save_path, self.args.strategy+'_'+self.args.model+'_'+labeled_percentage+'_'+str(self.args.seed)+'.npy') 307 | np.save(path,self.idxs_lb) 308 | 309 | def load_model(self): 310 | labeled = len(np.arange(self.n_pool)[self.idxs_lb]) 311 | labeled_percentage = '%.1f'%float(100*labeled/len(self.X)) 312 | save_path = os.path.join(self.args.save_path,self.args.dataset+'_checkpoint') 313 | self.clf = torch.load(os.path.join(save_path, self.args.strategy+'_'+self.args.model+'_'+labeled_percentage+'_'+str(self.args.seed)+'.pkl')) 314 | self.idxs_lb = np.load(os.path.join(save_path, self.args.strategy+'_'+self.args.model+'_'+labeled_percentage+'_'+str(self.args.seed)+'.npy')) 315 | 316 | 317 | -------------------------------------------------------------------------------- /query_strategies/uncertainGCN.py: -------------------------------------------------------------------------------- 1 | # This is an implementation of the paper: Sequential GCN for Active Learning 2 | 3 | # Implemented by Yu LI, based on the code: https://github.com/razvancaramalau/Sequential-GCN-for-Active-Learning 4 | 5 | import numpy as np 6 | from .strategy import Strategy 7 | import pdb 8 | from torch.nn import functional 9 | import math 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.init as init 13 | import torch.nn.functional as F 14 | from torch.nn.parameter import Parameter 15 | from torch.nn.modules.module import Module 16 | from models.gcn import GCN 17 | import torch.optim as optim 18 | from tqdm import tqdm 19 | 20 | EPOCH_GCN = 200 21 | LR_GCN = 1e-3 22 | SUBSET = 10000 # M 23 | 24 | def aff_to_adj(x, y=None): 25 | x = x.detach().cpu().numpy() 26 | adj = np.matmul(x, x.transpose()) 27 | adj += -1.0*np.eye(adj.shape[0]) 28 | adj_diag = np.sum(adj, axis=0) #rowise sum 29 | adj = np.matmul(adj, np.diag(1/adj_diag)) 30 | adj = adj + np.eye(adj.shape[0]) 31 | adj = torch.Tensor(adj) 32 | 33 | return adj 34 | 35 | 36 | def BCEAdjLoss(scores, lbl, nlbl, l_adj): 37 | lnl = torch.log(scores[lbl]) 38 | lnu = torch.log(1 - scores[nlbl]) 39 | labeled_score = torch.mean(lnl) 40 | unlabeled_score = torch.mean(lnu) 41 | bce_adj_loss = -labeled_score - l_adj*unlabeled_score 42 | return bce_adj_loss 43 | 44 | 45 | class uncertainGCN(Strategy): 46 | def __init__(self, X, Y, X_te, Y_te, idxs_lb, net, handler, args): 47 | super(uncertainGCN, self).__init__(X, Y, X_te, Y_te, idxs_lb, net, handler, args) 48 | 49 | def query(self, n): 50 | # get the features of all data (labeled + unlabeled) 51 | subset = list(np.nonzero(~self.idxs_lb)[0][:SUBSET]) 52 | ind_idxs_lb = list(np.nonzero(self.idxs_lb)[0]) 53 | 54 | features = self.get_embedding(self.X[subset+ind_idxs_lb], self.Y[subset+ind_idxs_lb]) 55 | features = functional.normalize(features).to(self.device) 56 | adj = aff_to_adj(features).to(self.device) 57 | 58 | binary_labels = torch.cat((torch.zeros([SUBSET, 1]),(torch.ones([len(ind_idxs_lb),1]))),0) 59 | 60 | gcn_module = GCN(nfeat=features.shape[1], 61 | nhid=self.args.hidden_units, 62 | nclass=1, 63 | dropout=self.args.dropout_rate).to(self.device) 64 | models = {'gcn_module': gcn_module} 65 | 66 | optim_backbone = optim.Adam(models['gcn_module'].parameters(), lr=1e-3, 67 | weight_decay=5e-4) 68 | optimizers = {'gcn_module': optim_backbone} 69 | 70 | lbl = np.arange(SUBSET, SUBSET+len(ind_idxs_lb), 1) # temp labeled index 71 | nlbl = np.arange(0, SUBSET, 1) # temp unlabled index 72 | 73 | # train the gcn model 74 | for _ in tqdm(range(200)): 75 | optimizers['gcn_module'].zero_grad() 76 | outputs, _, _ = models['gcn_module'](features, adj) 77 | lamda = self.args.lambda_loss 78 | loss = BCEAdjLoss(outputs, lbl, nlbl, lamda) 79 | loss.backward() 80 | optimizers['gcn_module'].step() 81 | 82 | models['gcn_module'].eval() 83 | with torch.no_grad(): 84 | inputs = features.to(self.device) 85 | labels = binary_labels.to(self.device) 86 | scores, _, feat = models['gcn_module'](inputs, adj) 87 | 88 | s_margin = self.args.s_margin 89 | scores_median = np.squeeze(torch.abs(scores[:SUBSET] - s_margin).detach().cpu().numpy()) 90 | arg = np.argsort(-(scores_median)) 91 | 92 | 93 | 94 | print("Max confidence value: ",torch.max(scores.data)) 95 | print("Mean confidence value: ",torch.mean(scores.data)) 96 | preds = torch.round(scores) 97 | correct_labeled = (preds[SUBSET:,0] == labels[SUBSET:,0]).sum().item() / len(ind_idxs_lb) 98 | correct_unlabeled = (preds[:SUBSET,0] == labels[:SUBSET,0]).sum().item() / SUBSET 99 | correct = (preds[:,0] == labels[:,0]).sum().item() / (SUBSET + len(ind_idxs_lb)) 100 | print("Labeled classified: ", correct_labeled) 101 | print("Unlabeled classified: ", correct_unlabeled) 102 | print("Total classified: ", correct) 103 | 104 | subset = np.array(subset) 105 | inds = subset[arg][-n:] 106 | 107 | return inds 108 | 109 | 110 | 111 | -------------------------------------------------------------------------------- /query_strategies/util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import torch 4 | 5 | def save_df_as_npy(path, df): 6 | """ 7 | Save pandas dataframe (multi-index or non multi-index) as an NPY file 8 | for later retrieval. It gets a list of input dataframe's index levels, 9 | column levels and underlying array data and saves it as an NPY file. 10 | 11 | Parameters 12 | ---------- 13 | path : str 14 | Path for saving the dataframe. 15 | df : pandas dataframe 16 | Input dataframe's index, column and underlying array data are gathered 17 | in a nested list and saved as an NPY file. 18 | This is capable of handling multi-index dataframes. 19 | 20 | Returns 21 | ------- 22 | out : None 23 | 24 | """ 25 | 26 | if df.index.nlevels>1: 27 | lvls = [list(i) for i in df.index.levels] 28 | lbls = [list(i) for i in df.index.labels] 29 | indx = [lvls, lbls] 30 | else: 31 | indx = list(df.index) 32 | 33 | if df.columns.nlevels>1: 34 | lvls = [list(i) for i in df.columns.levels] 35 | lbls = [list(i) for i in df.columns.labels] 36 | cols = [lvls, lbls] 37 | else: 38 | cols = list(df.columns) 39 | 40 | data_flat = df.values.ravel() 41 | df_all = [indx, cols, data_flat] 42 | np.save(path, df_all) 43 | 44 | def load_df_from_npy(path): 45 | """ 46 | Load pandas dataframe (multi-index or regular one) from NPY file. 47 | 48 | Parameters 49 | ---------- 50 | path : str 51 | Path to the NPY file containing the saved pandas dataframe data. 52 | 53 | Returns 54 | ------- 55 | df : Pandas dataframe 56 | Pandas dataframe that's retrieved back saved earlier as an NPY file. 57 | 58 | """ 59 | 60 | df_all = np.load(path) 61 | if isinstance(df_all[0][0], list): 62 | indx = pd.MultiIndex(levels=df_all[0][0], labels=df_all[0][1]) 63 | else: 64 | indx = df_all[0] 65 | 66 | if isinstance(df_all[1][0], list): 67 | cols = pd.MultiIndex(levels=df_all[1][0], labels=df_all[1][1]) 68 | else: 69 | cols = df_all[1] 70 | 71 | df0 = pd.DataFrame(index=indx, columns=cols) 72 | df0[:] = df_all[2].reshape(df0.shape) 73 | return df0 74 | 75 | def max_columns(df0, cols=''): 76 | """ 77 | Get dataframe with best configurations 78 | 79 | Parameters 80 | ---------- 81 | df0 : pandas dataframe 82 | Input pandas dataframe, which could be a multi-index or a regular one. 83 | cols : list, optional 84 | List of strings that would be used as the column IDs for 85 | output pandas dataframe. 86 | 87 | Returns 88 | ------- 89 | df : Pandas dataframe 90 | Pandas dataframe with best configurations for each row of the input 91 | dataframe for maximum value, where configurations refer to the column 92 | IDs of the input dataframe. 93 | 94 | """ 95 | 96 | df = df0.reindex_axis(sorted(df0.columns), axis=1) 97 | if df.columns.nlevels==1: 98 | idx = df.values.argmax(-1) 99 | max_vals = df.values[range(len(idx)), idx] 100 | max_df = pd.DataFrame({'':df.columns[idx], 'Out':max_vals}) 101 | max_df.index = df.index 102 | else: 103 | input_args = [list(i) for i in df.columns.levels] 104 | input_arg_lens = [len(i) for i in input_args] 105 | 106 | shp = [len(list(i)) for i in df.index.levels] + input_arg_lens 107 | speedups = df.values.reshape(shp) 108 | 109 | idx = speedups.reshape(speedups.shape[:2] + (-1,)).argmax(-1) 110 | argmax_idx = np.dstack((np.unravel_index(idx, input_arg_lens))) 111 | best_args = np.array(input_args)[np.arange(argmax_idx.shape[-1]), argmax_idx] 112 | 113 | N = len(input_arg_lens) 114 | max_df = pd.DataFrame(best_args.reshape(-1,N), index=df.index) 115 | max_vals = speedups.max(axis=tuple(-np.arange(len(input_arg_lens))-1)).ravel() 116 | max_df['Out'] = max_vals 117 | if cols!='': 118 | max_df.columns = cols 119 | return max_df 120 | 121 | # For Semi-Supervised 122 | class TransformTwice: 123 | def __init__(self, transform): 124 | self.transform = transform 125 | 126 | def __call__(self, inp): 127 | out1 = self.transform(inp) 128 | out2 = self.transform(inp) 129 | return out1, out2 130 | 131 | def linear_rampup(current, rampup_length=200): 132 | if rampup_length == 0: 133 | return 1.0 134 | else: 135 | current = np.clip(current / rampup_length, 0.0, 1.0) 136 | return float(current) 137 | 138 | def interleave_offsets(batch, nu): 139 | groups = [batch // (nu + 1)] * (nu + 1) 140 | for x in range(batch - sum(groups)): 141 | groups[-x - 1] += 1 142 | offsets = [0] 143 | for g in groups: 144 | offsets.append(offsets[-1] + g) 145 | assert offsets[-1] == batch 146 | return offsets 147 | 148 | def interleave(xy, batch): 149 | nu = len(xy) - 1 150 | offsets = interleave_offsets(batch, nu) 151 | xy = [[v[offsets[p]:offsets[p + 1]] for p in range(nu + 1)] for v in xy] 152 | for i in range(1, nu + 1): 153 | xy[0][i], xy[i][i] = xy[i][i], xy[0][i] 154 | return [torch.cat(v, dim=0) for v in xy] 155 | 156 | class WeightEMA(object): 157 | def __init__(self, model, ema_model, alpha ,lr): 158 | self.model = model 159 | self.ema_model = ema_model 160 | self.alpha = alpha 161 | self.params = list(model.state_dict().values()) 162 | self.ema_params = list(ema_model.state_dict().values()) 163 | self.wd = 0.02 * lr 164 | 165 | for param, ema_param in zip(self.params, self.ema_params): 166 | param.data.copy_(ema_param.data) 167 | 168 | def step(self): 169 | one_minus_alpha = 1.0 - self.alpha 170 | for param, ema_param in zip(self.params, self.ema_params): 171 | if ema_param.dtype==torch.float32: 172 | ema_param.mul_(self.alpha) 173 | ema_param.add_(param * one_minus_alpha) 174 | # customized weight decay 175 | param.mul_(1 - self.wd) 176 | 177 | def set_wd(self,lr): 178 | self.wd = 0.02 * lr 179 | 180 | # For analysis 181 | class AugMixDataset(torch.utils.data.Dataset): 182 | """Dataset wrapper to perform AugMix augmentation.""" 183 | def __init__(self, dataset, preprocess, n_iters, no_jsd=False): 184 | self.dataset = dataset 185 | self.preprocess = preprocess 186 | self.no_jsd = no_jsd 187 | self.n_iters = n_iters 188 | 189 | def __getitem__(self, i): 190 | x = self.dataset[i] 191 | if self.no_jsd: 192 | return aug(x, self.preprocess) 193 | else: 194 | aug_image = [aug(x, self.preprocess) for i in range(self.n_iters)] 195 | im_tuple = [self.preprocess(x)] + aug_image 196 | return im_tuple, i 197 | 198 | def __len__(self): 199 | return len(self.dataset) 200 | 201 | def aug(image, preprocess): 202 | """Perform AugMix augmentations and compute mixture. 203 | Args: 204 | image: PIL.Image input image 205 | preprocess: Preprocessing function which should return a torch tensor. 206 | Returns: 207 | mixed: Augmented and mixed image. 208 | """ 209 | from . import augmentations 210 | from PIL import Image 211 | image = Image.fromarray(image) 212 | aug_list = augmentations.augmentations 213 | # if args.all_ops: 214 | # aug_list = augmentations.augmentations_all 215 | mixture_width, mixture_depth = 1, -1 216 | aug_severity = 3 217 | ws = np.float32(np.random.dirichlet([1] * mixture_width)) 218 | m = np.float32(np.random.beta(1, 1)) 219 | 220 | mix = torch.zeros_like(preprocess(image)) 221 | for i in range(mixture_width): 222 | image_aug = image.copy() 223 | depth = mixture_depth if mixture_depth > 0 else np.random.randint( 224 | 1, 4) 225 | for _ in range(depth): 226 | op = np.random.choice(aug_list) 227 | image_aug = op(image_aug, aug_severity) 228 | # Preprocessing commutes since all coefficients are convex 229 | mix += ws[i] * preprocess(image_aug) 230 | 231 | mixed = (1 - m) * preprocess(image) + m * mix 232 | return np.array(mixed) 233 | 234 | 235 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | batchbald_redux==2.0.5 2 | gurobipy==9.5.1 3 | joblib==1.1.0 4 | matplotlib==3.1.3 5 | numpy==1.21 6 | pandas==1.0.3 7 | Pillow==9.1.0 8 | scikit_learn==1.0.2 9 | scipy==1.4.1 10 | torch==1.7.1 11 | torchfile==0.1.0 12 | torchvision==0.8.2 13 | tqdm==4.55.0 14 | uda==0.0.3 15 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | HOST=$(hostname) 2 | echo "Current host is: $HOST" 3 | DATE=`date +%Y-%m-%d` 4 | echo $DATE 5 | 6 | if [ ! -d "./save/" ]; then 7 | mkdir ./save/ 8 | fi 9 | 10 | DIRECTORY=./save/${DATE}/ 11 | if [ ! -d "$DIRECTORY" ]; then 12 | mkdir ./save/${DATE}/ 13 | fi 14 | 15 | 16 | # For reproducibility in a eGPU environment 17 | export CUBLAS_WORKSPACE_CONFIG=:16:8 18 | 19 | ########### RUN MAIN.py ############### 20 | dataset=mnist 21 | model=LeNet 22 | start=2 23 | end=20 24 | step=2 25 | n_epoch=50 26 | 27 | 28 | # dataset=cifar10 29 | # model=ResNet18 30 | # start=0.5 31 | # end=4 32 | # step=0.5 33 | # n_epoch=100 34 | 35 | # dataset=gtsrb 36 | # model=ResNet18 37 | # start=0.5 38 | # end=4 39 | # step=0.5 40 | # n_epoch=100 41 | 42 | 43 | strategy='RandomSampling' 44 | 45 | #('RandomSampling' 46 | # 'CoreSet' \ 47 | # 'BadgeSampling' \ 48 | # 'BALDDropout' \ 49 | # 'LeastConfidence' \ 50 | # 'KMeansSampling' \ 51 | # 'AdversarialBIM' \ 52 | # 'ActiveLearningByLearning' \ 53 | # 'LearningLoss' \ 54 | # 'ClusterMarginSampling' \ 55 | # 'uncertainGCN' \ 56 | # 'coreGCN' \ 57 | # 'MCADL' \ 58 | # 'WAAL' \ 59 | # 'VAAL' \ 60 | # 'ssl_Random' \ 61 | # 'ssl_Diff2AugKmeans' \ 62 | # 'ssl_Diff2AugDirect' \ 63 | # 'ssl_Consistency') 64 | 65 | 66 | save_file=$dataset'_result.csv' 67 | data_path='./dataset' 68 | 69 | for random_seed in 1 70 | do 71 | save_path=save/${DATE}/$strategy 72 | if [ ! -d "$save_path" ]; then 73 | mkdir ./save/${DATE}/$strategy 74 | fi 75 | echo $strategy 76 | echo $dataset 77 | python main.py --model $model \ 78 | --nStart $start \ 79 | --nEnd $end \ 80 | --nQuery $step \ 81 | --n_epoch $n_epoch \ 82 | --dataset $dataset \ 83 | --strategy $strategy \ 84 | --save_path $save_path \ 85 | --save_file $save_file \ 86 | --data_path $data_path \ 87 | --save_model \ 88 | --seed $random_seed 89 | # --lr 0.1 \ # 0.01 for ssl 90 | done 91 | 92 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | 2 | import os, sys, time, random 3 | import numpy as np 4 | import matplotlib 5 | matplotlib.use('agg') 6 | import matplotlib.pyplot as plt 7 | from torch import nn 8 | 9 | 10 | def print_log(string, log): 11 | print (string) 12 | with open(log, 'w+') as f: 13 | f.write(string) 14 | 15 | 16 | def time_string(): 17 | ISOTIMEFORMAT = '%Y-%m-%d %X' 18 | string = '[{}]'.format( 19 | time.strftime(ISOTIMEFORMAT, time.gmtime(time.time()))) 20 | return string 21 | 22 | 23 | class AverageMeter(object): 24 | """Computes and stores the average and current value""" 25 | 26 | def __init__(self): 27 | self.reset() 28 | 29 | def reset(self): 30 | self.val = 0 31 | self.avg = 0 32 | self.sum = 0 33 | self.count = 0 34 | 35 | def update(self, val, n=1): 36 | self.val = val 37 | self.sum += val * n 38 | self.count += n 39 | self.avg = self.sum / self.count 40 | 41 | 42 | 43 | class RecorderMeter(object): 44 | """Computes and stores the minimum loss value and its epoch index""" 45 | 46 | def __init__(self, total_epoch): 47 | self.reset(total_epoch) 48 | 49 | def reset(self, total_epoch): 50 | assert total_epoch > 0 51 | self.total_epoch = total_epoch 52 | self.current_epoch = 0 53 | self.epoch_losses = np.zeros((self.total_epoch, 2), 54 | dtype=np.float32) # [epoch, train/val] 55 | self.epoch_losses = self.epoch_losses - 1 56 | 57 | self.epoch_accuracy = np.zeros((self.total_epoch, 2), 58 | dtype=np.float32) # [epoch, train/val] 59 | self.epoch_accuracy = self.epoch_accuracy 60 | 61 | def update(self, idx, train_loss, train_acc, val_loss, val_acc): 62 | assert idx >= 0 and idx < self.total_epoch, 'total_epoch : {} , but update with the {} index'.format( 63 | self.total_epoch, idx) 64 | self.epoch_losses[idx, 0] = train_loss 65 | self.epoch_losses[idx, 1] = val_loss 66 | self.epoch_accuracy[idx, 0] = train_acc 67 | self.epoch_accuracy[idx, 1] = val_acc 68 | self.current_epoch = idx + 1 69 | # return self.max_accuracy(False) == val_acc 70 | 71 | def max_accuracy(self, istrain): 72 | if self.current_epoch <= 0: return 0 73 | if istrain: return self.epoch_accuracy[:self.current_epoch, 0].max() 74 | else: return self.epoch_accuracy[:self.current_epoch, 1].max() 75 | 76 | def plot_curve(self, save_path): 77 | title = 'the accuracy/loss curve of train/val' 78 | dpi = 80 79 | width, height = 1200, 800 80 | legend_fontsize = 10 81 | scale_distance = 48.8 82 | figsize = width / float(dpi), height / float(dpi) 83 | 84 | fig = plt.figure(figsize=figsize) 85 | x_axis = np.array([i for i in range(self.total_epoch)]) # epochs 86 | y_axis = np.zeros(self.total_epoch) 87 | 88 | plt.xlim(0, self.total_epoch) 89 | plt.ylim(0, 1) 90 | interval_y = 0.05 91 | interval_x = 5 92 | plt.xticks(np.arange(0, self.total_epoch + interval_x, interval_x)) 93 | plt.yticks(np.arange(0, 1 + interval_y, interval_y)) 94 | plt.grid() 95 | plt.title(title, fontsize=20) 96 | plt.xlabel('the training epoch', fontsize=16) 97 | plt.ylabel('accuracy', fontsize=16) 98 | 99 | y_axis[:] = self.epoch_accuracy[:, 0] 100 | plt.plot(x_axis, 101 | y_axis, 102 | color='g', 103 | linestyle='-', 104 | label='train-accuracy', 105 | lw=2) 106 | plt.legend(loc=4, fontsize=legend_fontsize) 107 | 108 | y_axis[:] = self.epoch_accuracy[:, 1] 109 | plt.plot(x_axis, 110 | y_axis, 111 | color='y', 112 | linestyle='-', 113 | label='valid-accuracy', 114 | lw=2) 115 | plt.legend(loc=4, fontsize=legend_fontsize) 116 | 117 | y_axis[:] = self.epoch_losses[:, 0] 118 | plt.plot(x_axis, 119 | y_axis * 50, 120 | color='g', 121 | linestyle=':', 122 | label='train-loss-x50', 123 | lw=2) 124 | plt.legend(loc=4, fontsize=legend_fontsize) 125 | 126 | y_axis[:] = self.epoch_losses[:, 1] 127 | plt.plot(x_axis, 128 | y_axis * 50, 129 | color='y', 130 | linestyle=':', 131 | label='valid-loss-x50', 132 | lw=2) 133 | plt.legend(loc=4, fontsize=legend_fontsize) 134 | 135 | 136 | if save_path is not None: 137 | fig.savefig(save_path, dpi=dpi, bbox_inches='tight') 138 | print('---- save figure {} into {}'.format(title, save_path)) 139 | plt.close(fig) 140 | 141 | 142 | def convert_secs2time(epoch_time): 143 | need_hour = int(epoch_time / 3600) 144 | need_mins = int((epoch_time - 3600 * need_hour) / 60) 145 | need_secs = int(epoch_time - 3600 * need_hour - 60 * need_mins) 146 | return need_hour, need_mins, need_secs 147 | 148 | 149 | def time_file_str(): 150 | ISOTIMEFORMAT = '%Y-%m-%d' 151 | string = '{}'.format(time.strftime(ISOTIMEFORMAT, 152 | time.gmtime(time.time()))) 153 | return string + '-{}'.format(random.randint(1, 10000)) 154 | 155 | 156 | def adjust_learning_rate(optimizer, epoch, gammas, schedule, args): 157 | """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" 158 | "Add by YU" 159 | lr = args.lr 160 | mu = args.momentum 161 | 162 | if args.optimizer != "YF": 163 | assert len(gammas) == len( 164 | schedule), "length of gammas and schedule should be equal" 165 | for (gamma, step) in zip(gammas, schedule): 166 | if (epoch >= step): 167 | lr = lr * gamma 168 | else: 169 | break 170 | for param_group in optimizer.param_groups: 171 | param_group['lr'] = lr 172 | 173 | elif args.optimizer == "YF": 174 | lr = optimizer._lr 175 | mu = optimizer._mu 176 | 177 | return lr, mu 178 | --------------------------------------------------------------------------------