├── LICENSE.md ├── README.md ├── deepcore ├── __init__.py ├── datasets │ ├── __init__.py │ ├── cifar10.py │ ├── cifar100.py │ ├── fashionmnist.py │ ├── imagenet.py │ ├── mnist.py │ ├── qmnist.py │ ├── svhn.py │ └── tinyimagenet.py ├── methods │ ├── __init__.py │ ├── cal.py │ ├── contextualdiversity.py │ ├── coresetmethod.py │ ├── craig.py │ ├── deepfool.py │ ├── earlytrain.py │ ├── forgetting.py │ ├── full.py │ ├── glister.py │ ├── gradmatch.py │ ├── grand.py │ ├── herding.py │ ├── kcentergreedy.py │ ├── methods_utils │ │ ├── __init__.py │ │ ├── cossim.py │ │ ├── euclidean.py │ │ ├── submodular_function.py │ │ └── submodular_optimizer.py │ ├── submodular.py │ ├── uncertainty.py │ └── uniform.py └── nets │ ├── __init__.py │ ├── alexnet.py │ ├── inceptionv3.py │ ├── lenet.py │ ├── mlp.py │ ├── mobilenetv3.py │ ├── nets_utils │ ├── __init__.py │ ├── parallel.py │ └── recorder.py │ ├── resnet.py │ ├── vgg.py │ └── wideresnet.py ├── main.py ├── requirements.txt └── utils.py /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 ZHAO, BO 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepCore: A Comprehensive Library for Coreset Selection in Deep Learning [PDF](https://arxiv.org/pdf/2204.08499.pdf) 2 | 3 | 4 | ### Introduction 5 | To advance the research of coreset selection in deep learning, we contribute a code library named **DeepCore**, an extensive and extendable code library, for coreset selection in deep learning, reproducing dozens of popular and advanced coreset selection methods and enabling a fair comparison of different methods in the same experimental settings. **DeepCore** is highly modular, allowing to add new architectures, datasets, methods and learning scenarios easily. It is built on PyTorch. 6 | 7 | ### Coreset Methods 8 | We list the methods in DeepCore according to the categories in our original paper, they are 1) geometry based methods Contextual Diversity (CD), Herding and k-Center Greedy; 2) uncertainty scores; 3) error based methods Forgetting and GraNd score ; 4) decision boundary based methods Cal and DeepFool ; 5) gradient matching based methods Craig and GradMatch ; 6) bilevel optimiza- tion methods Glister ; and 7) Submodularity based Methods (GC) and Facility Location (FL) functions. we also have Random selection as the baseline. 9 | 10 | ### Datasets 11 | It contains a series of other popular computer vision datasets, namely MNIST, QMNIST, FashionMNIST, SVHN, CIFAR10, CIFAR100 and TinyImageNet and ImageNet. 12 | 13 | ### Models 14 | They are two-layer fully connected MLP, LeNet , AlexNet, VGG, Inception-v3, ResNet, WideResNet and MobileNet-v3. 15 | 16 | ### Example 17 | Selecting with Glister and training on the coreset with fraction 0.1. 18 | ```sh 19 | CUDA_VISIBLE_DEVICES=0 python -u main.py --fraction 0.1 --dataset CIFAR10 --data_path ~/datasets --num_exp 5 --workers 10 --optimizer SGD -se 10 --selection Glister --model InceptionV3 --lr 0.1 -sp ./result --batch 128 20 | ``` 21 | 22 | Resuming interuppted training with argument ```--resume```. 23 | ```sh 24 | CUDA_VISIBLE_DEVICES=0 python -u main.py --fraction 0.1 --dataset CIFAR10 --data_path ~/datasets --num_exp 5 --workers 10 --optimizer SGD -se 10 --selection Glister --model InceptionV3 --lr 0.1 -sp ./result --batch 128 --resume "CIFAR10_InceptionV3_Glister_exp0_epoch200_2022-02-05 21:31:53.762903_0.1_unknown.ckpt" 25 | ``` 26 | 27 | Batch size can be seperatedly assigned for both selection and training. 28 | ```sh 29 | CUDA_VISIBLE_DEVICES=0 python -u main.py --fraction 0.5 --dataset ImageNet --data_path ~/datasets --num_exp 5 --workers 10 --optimizer SGD -se 10 --selection Cal --model MobileNetV3Large --lr 0.1 -sp ./result -tb 256 -sb 128 30 | ``` 31 | 32 | Argument ```--uncertainty``` to choose uncertainty scores. 33 | ```sh 34 | CUDA_VISIBLE_DEVICES=0 python -u main.py --fraction 0.1 --dataset CIFAR10 --data_path ~/datasets --num_exp 5 --workers 10 --optimizer SGD -se 10 --selection Uncertainty --model ResNet18 --lr 0.1 -sp ./result --batch 128 --uncertainty Entropy 35 | ``` 36 | 37 | 38 | Argument ```--submodular``` to choose submodular function, e.g. ```GraphCut```, ```FacilityLocation``` or ```LogDeterminant```. You may also specify the type of greedy algorithm to use when maximizing functions with argument ```--submodular_greedy```, for example ```NaiveGreedy```, ```LazyGreedy```, ```StochasticGreedy```, etc. 39 | ```sh 40 | CUDA_VISIBLE_DEVICES=0 python -u main.py --fraction 0.1 --dataset CIFAR10 --data_path ~/datasets --num_exp 5 --workers 10 --optimizer SGD -se 10 --selection Submodular --model ResNet18 --lr 0.1 -sp ./result --batch 128 --submodular GraphCut --submodular_greedy NaiveGreedy 41 | ``` 42 | 43 | ### Extend 44 | 45 | DeepCore is highly modular and scalable. It allows to add new architectures, datasets and selection methods easily, to help coreset methods to be evaluated in a richer set of scenarios, and also to facilitate new methods for comparison. Here is an example for datasets. To add a new dataset, you need implement a function whose input is the data path and outputs are number of channels, size of image, number of classes, names of classes, mean, std and training and testing dataset inherited from ```torch.utils.data.Dataset```. 46 | 47 | 48 | ```python 49 | from torchvision import datasets, transforms 50 | 51 | 52 | def MNIST(data_path): 53 | channel = 1 54 | im_size = (28, 28) 55 | num_classes = 10 56 | mean = [0.1307] 57 | std = [0.3081] 58 | transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) 59 | dst_train = datasets.MNIST(data_path, train=True, download=True, transform=transform) 60 | dst_test = datasets.MNIST(data_path, train=False, download=True, transform=transform) 61 | class_names = [str(c) for c in range(num_classes)] 62 | return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test 63 | ``` 64 | This is an example for implementing network architecture. 65 | ```python 66 | import torch.nn as nn 67 | import torch.nn.functional as F 68 | from torch import set_grad_enabled 69 | from .nets_utils import EmbeddingRecorder 70 | 71 | 72 | class MLP(nn.Module): 73 | def __init__(self, channel, num_classes, im_size, record_embedding: bool = False, no_grad: bool = False, 74 | pretrained: bool = False): 75 | if pretrained: 76 | raise NotImplementedError("torchvison pretrained models not available.") 77 | super(MLP, self).__init__() 78 | self.fc_1 = nn.Linear(im_size[0] * im_size[1] * channel, 128) 79 | self.fc_2 = nn.Linear(128, 128) 80 | self.fc_3 = nn.Linear(128, num_classes) 81 | 82 | self.embedding_recorder = EmbeddingRecorder(record_embedding) 83 | self.no_grad = no_grad 84 | 85 | def get_last_layer(self): 86 | return self.fc_3 87 | 88 | def forward(self, x): 89 | with set_grad_enabled(not self.no_grad): 90 | out = x.view(x.size(0), -1) 91 | out = F.relu(self.fc_1(out)) 92 | out = F.relu(self.fc_2(out)) 93 | out = self.embedding_recorder(out) 94 | out = self.fc_3(out) 95 | return out 96 | ``` 97 | 98 | To implement the new coreset method, you need to inherit the new method from the ```CoresetMethod``` class and return the selected indices via the ```select``` method. 99 | 100 | ```python 101 | class CoresetMethod(object): 102 | def __init__(self, dst_train, args, fraction=0.5, random_seed=None, **kwargs): 103 | if fraction <= 0.0 or fraction > 1.0: 104 | raise ValueError("Illegal Coreset Size.") 105 | self.dst_train = dst_train 106 | self.num_classes = len(dst_train.classes) 107 | self.fraction = fraction 108 | self.random_seed = random_seed 109 | self.index = [] 110 | self.args = args 111 | 112 | self.n_train = len(dst_train) 113 | self.coreset_size = round(self.n_train * fraction) 114 | 115 | def select(self, **kwargs): 116 | return 117 | ``` 118 | 119 | ### References 120 | 121 | 1. Agarwal, S., Arora, H., Anand, S., Arora, C.: Contextual diversity for active learning. In: ECCV. pp. 137–153. Springer (2020) 122 | 2. Coleman, C., Yeh, C., Mussmann, S., Mirzasoleiman, B., Bailis, P., Liang, P., Leskovec, J., Zaharia, M.: Selection via proxy: Efficient data selection for deep learning. In: ICLR (2019) 123 | 3. Ducoffe, M., Precioso, F.: Adversarial active learning for deep networks: a margin based approach. arXiv preprint arXiv:1802.09841 (2018) 124 | 4. Iyer, R., Khargoankar, N., Bilmes, J., Asanani, H.: Submodular combinatorial information measures with applications in machine learning. In: Algorithmic Learning Theory. pp. 722–754. PMLR (2021) 125 | 5. Killamsetty, K., Durga, S., Ramakrishnan, G., De, A., Iyer, R.: Grad-match: Gradient matching based data subset selection for efficient deep model training. In: ICML. pp. 5464–5474 (2021) 126 | 6. Killamsetty, K., Sivasubramanian, D., Ramakrishnan, G., Iyer, R.: Glister: Generalization based data subset selection for efficient and robust learning. In: Proceedings of the AAAI Conference on Artificial Intelligence (2021) 127 | 7. Margatina, K., Vernikos, G., Barrault, L., Aletras, N.: Active learning by acquiring contrastive examples. arXiv preprint arXiv:2109.03764 (2021) 128 | 8. Mirzasoleiman, B., Bilmes, J., Leskovec, J.: Coresets for data-efficient training of machine learning models. In: ICML. PMLR (2020) 129 | 9. Paul, M., Ganguli, S., Dziugaite, G.K.: Deep learning on a data diet: Finding important examples early in training. arXiv preprint arXiv:2107.07075 (2021) 130 | 10. Sener, O., Savarese, S.: Active learning for convolutional neural networks: A coreset approach. In: ICLR (2018) 131 | 11. Toneva, M., Sordoni, A., des Combes, R.T., Trischler, A., Bengio, Y., Gordon, G.J.: An empirical study of example forgetting during deep neural network learning. In: ICLR (2018) 132 | 12. Welling, M.: Herding dynamical weights to learn. In: Proceedings of the 26th Annual International Conference on Machine Learning. pp. 1121–1128 (2009) 133 | 134 | 135 | -------------------------------------------------------------------------------- /deepcore/__init__.py: -------------------------------------------------------------------------------- 1 | # __init__.py -------------------------------------------------------------------------------- /deepcore/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .cifar10 import * 2 | from .cifar100 import * 3 | from .fashionmnist import * 4 | from .imagenet import * 5 | from .mnist import * 6 | from .qmnist import * 7 | from .svhn import * 8 | from .tinyimagenet import * 9 | -------------------------------------------------------------------------------- /deepcore/datasets/cifar10.py: -------------------------------------------------------------------------------- 1 | from torchvision import datasets, transforms 2 | from torch import tensor, long 3 | 4 | 5 | def CIFAR10(data_path): 6 | channel = 3 7 | im_size = (32, 32) 8 | num_classes = 10 9 | mean = [0.4914, 0.4822, 0.4465] 10 | std = [0.2470, 0.2435, 0.2616] 11 | 12 | transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) 13 | dst_train = datasets.CIFAR10(data_path, train=True, download=True, transform=transform) 14 | dst_test = datasets.CIFAR10(data_path, train=False, download=True, transform=transform) 15 | class_names = dst_train.classes 16 | dst_train.targets = tensor(dst_train.targets, dtype=long) 17 | dst_test.targets = tensor(dst_test.targets, dtype=long) 18 | return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test 19 | -------------------------------------------------------------------------------- /deepcore/datasets/cifar100.py: -------------------------------------------------------------------------------- 1 | from torchvision import datasets, transforms 2 | from torch import tensor, long 3 | 4 | 5 | def CIFAR100(data_path): 6 | channel = 3 7 | im_size = (32, 32) 8 | num_classes = 100 9 | mean = [0.5071, 0.4865, 0.4409] 10 | std = [0.2673, 0.2564, 0.2762] 11 | transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) 12 | dst_train = datasets.CIFAR100(data_path, train=True, download=True, transform=transform) 13 | dst_test = datasets.CIFAR100(data_path, train=False, download=True, transform=transform) 14 | class_names = dst_train.classes 15 | dst_train.targets = tensor(dst_train.targets, dtype=long) 16 | dst_test.targets = tensor(dst_test.targets, dtype=long) 17 | return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test 18 | -------------------------------------------------------------------------------- /deepcore/datasets/fashionmnist.py: -------------------------------------------------------------------------------- 1 | from torchvision import datasets, transforms 2 | 3 | 4 | def FashionMNIST(data_path): 5 | channel = 1 6 | im_size = (28, 28) 7 | num_classes = 10 8 | mean = [0.2861] 9 | std = [0.3530] 10 | transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) 11 | dst_train = datasets.FashionMNIST(data_path, train=True, download=True, transform=transform) 12 | dst_test = datasets.FashionMNIST(data_path, train=False, download=True, transform=transform) 13 | class_names = dst_train.classes 14 | return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test 15 | -------------------------------------------------------------------------------- /deepcore/datasets/imagenet.py: -------------------------------------------------------------------------------- 1 | from torchvision import datasets, transforms 2 | from torch import tensor, long 3 | 4 | 5 | def ImageNet(data_path): 6 | channel = 3 7 | im_size = (224, 224) 8 | num_classes = 1000 9 | mean = [0.485, 0.456, 0.406] 10 | std = [0.229, 0.224, 0.225] 11 | normalize = transforms.Normalize(mean, std) 12 | dst_train = datasets.ImageNet(data_path, split="train", transform=transforms.Compose([ 13 | transforms.Resize(256), 14 | transforms.CenterCrop(224), 15 | transforms.ToTensor(), 16 | normalize, 17 | ])) 18 | dst_test = datasets.ImageNet(data_path, split="val", transform=transforms.Compose([ 19 | transforms.Resize(256), 20 | transforms.CenterCrop(224), 21 | transforms.ToTensor(), 22 | normalize, 23 | ])) 24 | class_names = dst_train.classes 25 | dst_train.targets = tensor(dst_train.targets, dtype=long) 26 | dst_test.targets = tensor(dst_test.targets, dtype=long) 27 | return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test 28 | -------------------------------------------------------------------------------- /deepcore/datasets/mnist.py: -------------------------------------------------------------------------------- 1 | from torchvision import datasets, transforms 2 | import numpy as np 3 | 4 | 5 | def MNIST(data_path, permuted=False, permutation_seed=None): 6 | channel = 1 7 | im_size = (28, 28) 8 | num_classes = 10 9 | mean = [0.1307] 10 | std = [0.3081] 11 | transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) 12 | if permuted: 13 | np.random.seed(permutation_seed) 14 | pixel_permutation = np.random.permutation(28 * 28) 15 | transform = transforms.Compose( 16 | [transform, transforms.Lambda(lambda x: x.view(-1, 1)[pixel_permutation].view(1, 28, 28))]) 17 | 18 | dst_train = datasets.MNIST(data_path, train=True, download=True, transform=transform) 19 | dst_test = datasets.MNIST(data_path, train=False, download=True, transform=transform) 20 | class_names = [str(c) for c in range(num_classes)] 21 | return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test 22 | 23 | 24 | def permutedMNIST(data_path, permutation_seed=None): 25 | return MNIST(data_path, True, permutation_seed) 26 | -------------------------------------------------------------------------------- /deepcore/datasets/qmnist.py: -------------------------------------------------------------------------------- 1 | from torchvision import datasets, transforms 2 | 3 | 4 | def QMNIST(data_path): 5 | channel = 1 6 | im_size = (28, 28) 7 | num_classes = 10 8 | mean = [0.1308] 9 | std = [0.3088] 10 | transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) 11 | dst_train = datasets.QMNIST(data_path, train=True, download=True, transform=transform) 12 | dst_test = datasets.QMNIST(data_path, train=False, download=True, transform=transform) 13 | class_names = [str(c) for c in range(num_classes)] 14 | dst_train.targets = dst_train.targets[:, 0] 15 | dst_test.targets = dst_test.targets[:, 0] 16 | dst_train.compat = False 17 | dst_test.compat = False 18 | return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test 19 | -------------------------------------------------------------------------------- /deepcore/datasets/svhn.py: -------------------------------------------------------------------------------- 1 | from torchvision import datasets, transforms 2 | from torch import tensor, long 3 | 4 | 5 | def SVHN(data_path): 6 | channel = 3 7 | im_size = (32, 32) 8 | num_classes = 10 9 | mean = [0.4377, 0.4438, 0.4728] 10 | std = [0.1980, 0.2010, 0.1970] 11 | transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) 12 | dst_train = datasets.SVHN(data_path, split='train', download=True, transform=transform) 13 | dst_test = datasets.SVHN(data_path, split='test', download=True, transform=transform) 14 | class_names = [str(c) for c in range(num_classes)] 15 | dst_train.classes = list(class_names) 16 | dst_test.classes = list(class_names) 17 | dst_train.targets = tensor(dst_train.labels, dtype=long) 18 | dst_test.targets = tensor(dst_test.labels, dtype=long) 19 | return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test 20 | -------------------------------------------------------------------------------- /deepcore/datasets/tinyimagenet.py: -------------------------------------------------------------------------------- 1 | from torchvision import datasets, transforms 2 | import os 3 | import requests 4 | import zipfile 5 | 6 | 7 | def TinyImageNet(data_path, downsize=True): 8 | if not os.path.exists(os.path.join(data_path, "tiny-imagenet-200")): 9 | url = "http://cs231n.stanford.edu/tiny-imagenet-200.zip" # 248MB 10 | print("Downloading Tiny-ImageNet") 11 | r = requests.get(url, stream=True) 12 | with open(os.path.join(data_path, "tiny-imagenet-200.zip"), "wb") as f: 13 | for chunk in r.iter_content(chunk_size=1024): 14 | if chunk: 15 | f.write(chunk) 16 | 17 | print("Unziping Tiny-ImageNet") 18 | with zipfile.ZipFile(os.path.join(data_path, "tiny-imagenet-200.zip")) as zf: 19 | zf.extractall(path=data_path) 20 | 21 | channel = 3 22 | im_size = (32, 32) if downsize else (64, 64) 23 | num_classes = 200 24 | mean = (0.4802, 0.4481, 0.3975) 25 | std = (0.2770, 0.2691, 0.2821) 26 | 27 | transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) 28 | if downsize: 29 | transform = transforms.Compose([transforms.Resize(32), transform]) 30 | 31 | dst_train = datasets.ImageFolder(root=os.path.join(data_path, 'tiny-imagenet-200/train'), transform=transform) 32 | dst_test = datasets.ImageFolder(root=os.path.join(data_path, 'tiny-imagenet-200/test'), transform=transform) 33 | 34 | class_names = dst_train.classes 35 | return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test 36 | -------------------------------------------------------------------------------- /deepcore/methods/__init__.py: -------------------------------------------------------------------------------- 1 | from .cal import * 2 | from .contextualdiversity import * 3 | from .coresetmethod import * 4 | from .craig import * 5 | from .deepfool import * 6 | from .earlytrain import * 7 | from .forgetting import * 8 | from .full import * 9 | from .glister import * 10 | from .grand import * 11 | from .gradmatch import * 12 | from .herding import * 13 | from .kcentergreedy import * 14 | from .submodular import * 15 | from .uncertainty import * 16 | from .uniform import * 17 | 18 | -------------------------------------------------------------------------------- /deepcore/methods/cal.py: -------------------------------------------------------------------------------- 1 | from .earlytrain import EarlyTrain 2 | from .methods_utils.euclidean import euclidean_dist_pair_np 3 | from .methods_utils.cossim import cossim_pair_np 4 | import numpy as np 5 | import torch 6 | from .. import nets 7 | from copy import deepcopy 8 | from torchvision import transforms 9 | 10 | 11 | class Cal(EarlyTrain): 12 | def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None, 13 | balance=True, metric="euclidean", neighbors: int = 10, pretrain_model: str = "ResNet18", **kwargs): 14 | super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs) 15 | 16 | self.balance = balance 17 | 18 | assert neighbors > 0 and neighbors < 100 19 | self.neighbors = neighbors 20 | 21 | if metric == "euclidean": 22 | self.metric = euclidean_dist_pair_np 23 | elif metric == "cossim": 24 | self.metric = lambda a, b: -1. * cossim_pair_np(a, b) 25 | elif callable(metric): 26 | self.metric = metric 27 | else: 28 | self.metric = euclidean_dist_pair_np 29 | 30 | self.pretrain_model = pretrain_model 31 | 32 | def num_classes_mismatch(self): 33 | raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.") 34 | 35 | def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size): 36 | if batch_idx % self.args.print_freq == 0: 37 | print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % ( 38 | epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item())) 39 | 40 | def find_knn(self): 41 | """ 42 | Find k-nearest-neighbor data points with the pretrained embedding model 43 | :return: knn matrix 44 | """ 45 | 46 | # Initialize pretrained model 47 | model = nets.__dict__[self.pretrain_model](channel=self.args.channel, num_classes=self.args.num_classes, 48 | im_size=(224, 224), record_embedding=True, no_grad=True, 49 | pretrained=True).to(self.args.device) 50 | model.eval() 51 | 52 | # Resize dst_train to 224*224 53 | if self.args.im_size[0] != 224 or self.args.im_size[1] != 224: 54 | dst_train = deepcopy(self.dst_train) 55 | dst_train.transform = transforms.Compose([dst_train.transform, transforms.Resize(224)]) 56 | else: 57 | dst_train = self.dst_train 58 | 59 | # Calculate the distance matrix and return knn results 60 | if self.balance: 61 | knn = [] 62 | for c in range(self.args.num_classes): 63 | class_index = np.arange(self.n_train)[self.dst_train.targets == c] 64 | 65 | # Start recording embedding vectors 66 | embdeddings = [] 67 | batch_loader = torch.utils.data.DataLoader(torch.utils.data.Subset(dst_train, class_index), 68 | batch_size=self.args.selection_batch, 69 | num_workers=self.args.workers) 70 | batch_num = len(batch_loader) 71 | for i, (aa, _) in enumerate(batch_loader): 72 | if i % self.args.print_freq == 0: 73 | print("| Caculating embeddings for batch [%3d/%3d]" % (i + 1, batch_num)) 74 | model(aa.to(self.args.device)) 75 | embdeddings.append(model.embedding_recorder.embedding.flatten(1).cpu().numpy()) 76 | 77 | embdeddings = np.concatenate(embdeddings, axis=0) 78 | 79 | knn.append(np.argsort(self.metric(embdeddings), axis=1)[:, 1:(self.neighbors + 1)]) 80 | return knn 81 | else: 82 | # Start recording embedding vectors 83 | embdeddings = [] 84 | batch_loader = torch.utils.data.DataLoader(dst_train, batch_size=self.args.selection_batch 85 | ,num_workers=self.args.workers) 86 | batch_num = len(batch_loader) 87 | 88 | for i, (aa, _) in enumerate(batch_loader): 89 | if i % self.args.print_freq == 0: 90 | print("| Caculating embeddings for batch [%3d/%3d]" % (i + 1, batch_num)) 91 | model(aa.to(self.args.device)) 92 | embdeddings.append(model.embedding_recorder.embedding.flatten(1).cpu().numpy()) 93 | embdeddings = np.concatenate(embdeddings, axis=0) 94 | 95 | return np.argsort(self.metric(embdeddings), axis=1)[:, 1:(self.neighbors + 1)] 96 | 97 | def calc_kl(self, knn, index=None): 98 | self.model.eval() 99 | self.model.no_grad = True 100 | sample_num = self.n_train if index is None else len(index) 101 | probs = np.zeros([sample_num, self.args.num_classes]) 102 | 103 | batch_loader = torch.utils.data.DataLoader( 104 | self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index), 105 | batch_size=self.args.selection_batch, num_workers=self.args.workers) 106 | batch_num = len(batch_loader) 107 | 108 | for i, (inputs, _) in enumerate(batch_loader): 109 | probs[i * self.args.selection_batch:(i + 1) * self.args.selection_batch] = torch.nn.functional.softmax( 110 | self.model(inputs.to(self.args.device)), dim=1).detach().cpu() 111 | 112 | s = np.zeros(sample_num) 113 | for i in range(0, sample_num, self.args.selection_batch): 114 | if i % self.args.print_freq == 0: 115 | print("| Caculating KL-divergence for batch [%3d/%3d]" % (i // self.args.selection_batch + 1, batch_num)) 116 | aa = np.expand_dims(probs[i:(i + self.args.selection_batch)], 1).repeat(self.neighbors, 1) 117 | bb = probs[knn[i:(i + self.args.selection_batch)], :] 118 | s[i:(i + self.args.selection_batch)] = np.mean( 119 | np.sum(0.5 * aa * np.log(aa / bb) + 0.5 * bb * np.log(bb / aa), axis=2), axis=1) 120 | self.model.no_grad = False 121 | return s 122 | 123 | def finish_run(self): 124 | scores=[] 125 | if self.balance: 126 | selection_result = np.array([], dtype=np.int32) 127 | for c, knn in zip(range(self.args.num_classes), self.knn): 128 | class_index = np.arange(self.n_train)[self.dst_train.targets == c] 129 | scores.append(self.calc_kl(knn, class_index)) 130 | selection_result = np.append(selection_result, class_index[np.argsort( 131 | #self.calc_kl(knn, class_index))[::1][:round(self.fraction * len(class_index))]]) 132 | scores[-1])[::1][:round(self.fraction * len(class_index))]]) 133 | else: 134 | selection_result = np.argsort(self.calc_kl(self.knn))[::1][:self.coreset_size] 135 | return {"indices": selection_result, "scores":scores} 136 | 137 | def select(self, **kwargs): 138 | self.knn = self.find_knn() 139 | selection_result = self.run() 140 | return selection_result -------------------------------------------------------------------------------- /deepcore/methods/contextualdiversity.py: -------------------------------------------------------------------------------- 1 | from .kcentergreedy import kCenterGreedy 2 | import torch 3 | 4 | 5 | # Acknowlegement to: 6 | # https://github.com/sharat29ag/CDAL 7 | 8 | 9 | class ContextualDiversity(kCenterGreedy): 10 | def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, 11 | specific_model=None, balance=True, already_selected=[], torchvision_pretrain: bool = False, **kwargs): 12 | super(ContextualDiversity, self).__init__(dst_train, args, fraction, random_seed, epochs=epochs, specific_model=specific_model, balance=balance, already_selected=already_selected, torchvision_pretrain=torchvision_pretrain, **kwargs) 13 | self.metric = self._metric 14 | 15 | def _metric(self, a_output, b_output): 16 | with torch.no_grad(): 17 | # Overload self.metric function for kCenterGreedy Algorithm 18 | aa = a_output.view(a_output.shape[0], 1, a_output.shape[1]).repeat(1, b_output.shape[0], 1) 19 | bb = b_output.view(1, b_output.shape[0], b_output.shape[1]).repeat(a_output.shape[0], 1, 1) 20 | return torch.sum(0.5 * aa * torch.log(aa / bb) + 0.5 * bb * torch.log(bb / aa), dim=2) 21 | 22 | def construct_matrix(self, index=None): 23 | self.model.eval() 24 | self.model.no_grad = True 25 | sample_num = self.n_train if index is None else len(index) 26 | matrix = torch.zeros([sample_num, self.args.num_classes], requires_grad=False).to(self.args.device) 27 | batch_loader = torch.utils.data.DataLoader(self.dst_train if index is None else 28 | torch.utils.data.Subset(self.dst_train, index), batch_size=self.args.selection_batch 29 | ,num_workers=self.args.workers) 30 | for i, (inputs, _) in enumerate(batch_loader): 31 | matrix[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch, sample_num)] = torch.nn.functional.softmax(self.model(inputs.to(self.args.device)), dim=1) 32 | self.model.no_grad = False 33 | return matrix 34 | -------------------------------------------------------------------------------- /deepcore/methods/coresetmethod.py: -------------------------------------------------------------------------------- 1 | class CoresetMethod(object): 2 | def __init__(self, dst_train, args, fraction=0.5, random_seed=None, **kwargs): 3 | if fraction <= 0.0 or fraction > 1.0: 4 | raise ValueError("Illegal Coreset Size.") 5 | self.dst_train = dst_train 6 | self.num_classes = len(dst_train.classes) 7 | self.fraction = fraction 8 | self.random_seed = random_seed 9 | self.index = [] 10 | self.args = args 11 | 12 | self.n_train = len(dst_train) 13 | self.coreset_size = round(self.n_train * fraction) 14 | 15 | def select(self, **kwargs): 16 | return 17 | 18 | -------------------------------------------------------------------------------- /deepcore/methods/craig.py: -------------------------------------------------------------------------------- 1 | from .earlytrain import EarlyTrain 2 | import torch 3 | from .methods_utils import FacilityLocation, submodular_optimizer 4 | import numpy as np 5 | from .methods_utils.euclidean import euclidean_dist_pair_np 6 | from ..nets.nets_utils import MyDataParallel 7 | 8 | 9 | class Craig(EarlyTrain): 10 | def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None, 11 | balance=True, greedy="LazyGreedy", **kwargs): 12 | super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs) 13 | 14 | if greedy not in submodular_optimizer.optimizer_choices: 15 | raise ModuleNotFoundError("Greedy optimizer not found.") 16 | self._greedy = greedy 17 | self.balance = balance 18 | 19 | def before_train(self): 20 | pass 21 | 22 | def after_loss(self, outputs, loss, targets, batch_inds, epoch): 23 | pass 24 | 25 | def before_epoch(self): 26 | pass 27 | 28 | def after_epoch(self): 29 | pass 30 | 31 | def before_run(self): 32 | pass 33 | 34 | def num_classes_mismatch(self): 35 | raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.") 36 | 37 | def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size): 38 | if batch_idx % self.args.print_freq == 0: 39 | print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % ( 40 | epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item())) 41 | 42 | def calc_gradient(self, index=None): 43 | self.model.eval() 44 | 45 | batch_loader = torch.utils.data.DataLoader( 46 | self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index), 47 | batch_size=self.args.selection_batch, num_workers=self.args.workers) 48 | sample_num = len(self.dst_val.targets) if index is None else len(index) 49 | self.embedding_dim = self.model.get_last_layer().in_features 50 | 51 | gradients = [] 52 | 53 | for i, (input, targets) in enumerate(batch_loader): 54 | self.model_optimizer.zero_grad() 55 | outputs = self.model(input.to(self.args.device)) 56 | loss = self.criterion(outputs.requires_grad_(True), 57 | targets.to(self.args.device)).sum() 58 | batch_num = targets.shape[0] 59 | with torch.no_grad(): 60 | bias_parameters_grads = torch.autograd.grad(loss, outputs)[0] 61 | weight_parameters_grads = self.model.embedding_recorder.embedding.view(batch_num, 1, 62 | self.embedding_dim).repeat(1, 63 | self.args.num_classes, 64 | 1) * bias_parameters_grads.view( 65 | batch_num, self.args.num_classes, 1).repeat(1, 1, self.embedding_dim) 66 | gradients.append( 67 | torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)], dim=1).cpu().numpy()) 68 | 69 | gradients = np.concatenate(gradients, axis=0) 70 | 71 | self.model.train() 72 | return euclidean_dist_pair_np(gradients) 73 | 74 | def calc_weights(self, matrix, result): 75 | min_sample = np.argmax(matrix[result], axis=0) 76 | weights = np.ones(np.sum(result) if result.dtype == bool else len(result)) 77 | for i in min_sample: 78 | weights[i] = weights[i] + 1 79 | return weights 80 | 81 | def finish_run(self): 82 | if isinstance(self.model, MyDataParallel): 83 | self.model = self.model.module 84 | 85 | self.model.no_grad = True 86 | with self.model.embedding_recorder: 87 | if self.balance: 88 | # Do selection by class 89 | selection_result = np.array([], dtype=np.int32) 90 | weights = np.array([]) 91 | for c in range(self.args.num_classes): 92 | class_index = np.arange(self.n_train)[self.dst_train.targets == c] 93 | matrix = -1. * self.calc_gradient(class_index) 94 | matrix -= np.min(matrix) - 1e-3 95 | submod_function = FacilityLocation(index=class_index, similarity_matrix=matrix) 96 | submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args, index=class_index, 97 | budget=round(self.fraction * len( 98 | class_index))) 99 | class_result = submod_optimizer.select(gain_function=submod_function.calc_gain, 100 | update_state=submod_function.update_state) 101 | selection_result = np.append(selection_result, class_result) 102 | weights = np.append(weights, self.calc_weights(matrix, np.isin(class_index, class_result))) 103 | else: 104 | matrix = np.zeros([self.n_train, self.n_train]) 105 | all_index = np.arange(self.n_train) 106 | for c in range(self.args.num_classes): # Sparse Matrix 107 | class_index = np.arange(self.n_train)[self.dst_train.targets == c] 108 | matrix[np.ix_(class_index, class_index)] = -1. * self.calc_gradient(class_index) 109 | matrix[np.ix_(class_index, class_index)] -= np.min(matrix[np.ix_(class_index, class_index)]) - 1e-3 110 | submod_function = FacilityLocation(index=all_index, similarity_matrix=matrix) 111 | submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args, index=all_index, 112 | budget=self.coreset_size) 113 | selection_result = submod_optimizer.select(gain_function=submod_function.calc_gain_batch, 114 | update_state=submod_function.update_state, 115 | batch=self.args.selection_batch) 116 | weights = self.calc_weights(matrix, selection_result) 117 | self.model.no_grad = False 118 | return {"indices": selection_result, "weights": weights} 119 | 120 | def select(self, **kwargs): 121 | selection_result = self.run() 122 | return selection_result 123 | -------------------------------------------------------------------------------- /deepcore/methods/deepfool.py: -------------------------------------------------------------------------------- 1 | from .earlytrain import EarlyTrain 2 | import torch 3 | import numpy as np 4 | 5 | 6 | class DeepFool(EarlyTrain): 7 | def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, 8 | specific_model=None, balance: bool = False, max_iter: int = 50, **kwargs): 9 | super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs) 10 | 11 | self.balance = balance 12 | self.max_iter = max_iter 13 | 14 | def num_classes_mismatch(self): 15 | raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.") 16 | 17 | def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size): 18 | if batch_idx % self.args.print_freq == 0: 19 | print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % ( 20 | epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item())) 21 | 22 | def finish_run(self): 23 | self.model.no_grad = False 24 | 25 | # Create a data loader for self.dst_train with batch size self.args.selection_batch 26 | batch_loader = torch.utils.data.DataLoader(self.dst_train, batch_size=self.args.selection_batch 27 | , num_workers=self.args.workers) 28 | 29 | r = np.zeros(self.n_train, dtype=np.float32) 30 | batch_num = len(batch_loader) 31 | for i, (inputs, targets) in enumerate(batch_loader): 32 | if i % self.args.print_freq == 0: 33 | print('| Selecting Batch [%3d/%3d]' % (i + 1, batch_num)) 34 | r[(i * self.args.selection_batch):(i * self.args.selection_batch + targets.shape[0])] = self.deep_fool( 35 | inputs) 36 | 37 | if self.balance: 38 | selection_result = np.array([], dtype=np.int64) 39 | for c in range(self.args.num_classes): 40 | class_index = np.arange(self.n_train)[self.dst_train.targets == c] 41 | selection_result = np.append(selection_result, class_index[ 42 | r[class_index].argsort()[:round(len(class_index) * self.fraction)]]) 43 | else: 44 | selection_result = r.argsort()[:self.coreset_size] 45 | return {"indices": selection_result, "scores": r} 46 | 47 | def deep_fool(self, inputs): 48 | # Here, start running DeepFool algorithm. 49 | self.model.eval() 50 | 51 | # Initialize a boolean mask indicating if selection has been stopped at corresponding positions. 52 | sample_size = inputs.shape[0] 53 | boolean_mask = np.ones(sample_size, dtype=bool) 54 | all_idx = np.arange(sample_size) 55 | 56 | # A matrix to store total pertubations. 57 | r_tot = np.zeros([sample_size, inputs.shape[1] * inputs.shape[2] * inputs.shape[3]]) 58 | 59 | # Set requires_grad for inputs. 60 | cur_inputs = inputs.requires_grad_(True).to(self.args.device) 61 | 62 | original_shape = inputs.shape[1:] 63 | 64 | # set requires_grad for all parametres in network as False to accelerate autograd 65 | for p in self.model.parameters(): 66 | p.requires_grad_(False) 67 | 68 | self.model.no_grad = True 69 | first_preds = self.model(cur_inputs).argmax(dim=1) 70 | self.model.no_grad = False 71 | 72 | for i in range(self.max_iter): 73 | f_all = self.model(cur_inputs) 74 | 75 | w_k = [] 76 | for c in range(self.args.num_classes): 77 | w_k.append(torch.autograd.grad(f_all[:, c].sum(), cur_inputs, 78 | retain_graph=False if c + 1 == self.args.num_classes else True)[ 79 | 0].flatten(1)) 80 | w_k = torch.stack(w_k, dim=0) 81 | w_k = w_k - w_k[first_preds, boolean_mask[boolean_mask]].unsqueeze(0) 82 | w_k_norm = w_k.norm(dim=2) 83 | 84 | w_k_norm[first_preds, boolean_mask[ 85 | boolean_mask]] = 1. # Set w_k_norm for preds positions to 1. to avoid division by zero. 86 | 87 | l_all = (f_all - f_all[boolean_mask[boolean_mask], first_preds].unsqueeze(1)).detach().abs() / w_k_norm.T 88 | l_all[boolean_mask[ 89 | boolean_mask], first_preds] = np.inf # Set l_k for preds positions to inf, as the argmin for each 90 | # row will be calculated soon. 91 | 92 | l_hat = l_all.argmin(dim=1) 93 | r_i = l_all[boolean_mask[boolean_mask], l_hat].unsqueeze(1) / w_k_norm[ 94 | l_hat, boolean_mask[boolean_mask]].T.unsqueeze(1) * w_k[l_hat, boolean_mask[boolean_mask]] 95 | 96 | # Update r_tot values. 97 | r_tot[boolean_mask] += r_i.cpu().numpy() 98 | 99 | cur_inputs += r_i.reshape([r_i.shape[0]] + list(original_shape)) 100 | 101 | # Re-input the updated sample into the network and get new predictions. 102 | self.model.no_grad = True 103 | preds = self.model(cur_inputs).argmax(dim=1) 104 | self.model.no_grad = False 105 | 106 | # In DeepFool algorithm, the iteration stops when the updated sample produces a different prediction 107 | # in the model. 108 | index_unfinished = (preds == first_preds) 109 | if torch.all(~index_unfinished): 110 | break 111 | 112 | cur_inputs = cur_inputs[index_unfinished] 113 | first_preds = first_preds[index_unfinished] 114 | boolean_mask[all_idx[boolean_mask][~index_unfinished.cpu().numpy()]] = False 115 | 116 | return (r_tot * r_tot).sum(axis=1) 117 | 118 | def select(self, **kwargs): 119 | selection_result = self.run() 120 | return selection_result 121 | -------------------------------------------------------------------------------- /deepcore/methods/earlytrain.py: -------------------------------------------------------------------------------- 1 | from .coresetmethod import CoresetMethod 2 | import torch, time 3 | from torch import nn 4 | import numpy as np 5 | from copy import deepcopy 6 | from .. import nets 7 | from torchvision import transforms 8 | 9 | 10 | class EarlyTrain(CoresetMethod): 11 | ''' 12 | Core code for training related to coreset selection methods when pre-training is required. 13 | ''' 14 | 15 | def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None, 16 | torchvision_pretrain: bool = False, dst_pretrain_dict: dict = {}, fraction_pretrain=1., dst_test=None, 17 | **kwargs): 18 | super().__init__(dst_train, args, fraction, random_seed) 19 | self.epochs = epochs 20 | self.n_train = len(dst_train) 21 | self.coreset_size = round(self.n_train * fraction) 22 | self.specific_model = specific_model 23 | 24 | if fraction_pretrain <= 0. or fraction_pretrain > 1.: 25 | raise ValueError("Illegal pretrain fraction value.") 26 | self.fraction_pretrain = fraction_pretrain 27 | 28 | if dst_pretrain_dict.__len__() != 0: 29 | dict_keys = dst_pretrain_dict.keys() 30 | if 'im_size' not in dict_keys or 'channel' not in dict_keys or 'dst_train' not in dict_keys or \ 31 | 'num_classes' not in dict_keys: 32 | raise AttributeError( 33 | 'Argument dst_pretrain_dict must contain imszie, channel, dst_train and num_classes.') 34 | if dst_pretrain_dict['im_size'][0] != args.im_size[0] or dst_pretrain_dict['im_size'][0] != args.im_size[0]: 35 | raise ValueError("im_size of pretrain dataset does not match that of the training dataset.") 36 | if dst_pretrain_dict['channel'] != args.channel: 37 | raise ValueError("channel of pretrain dataset does not match that of the training dataset.") 38 | if dst_pretrain_dict['num_classes'] != args.num_classes: 39 | self.num_classes_mismatch() 40 | 41 | self.dst_pretrain_dict = dst_pretrain_dict 42 | self.torchvision_pretrain = torchvision_pretrain 43 | self.if_dst_pretrain = (len(self.dst_pretrain_dict) != 0) 44 | 45 | if torchvision_pretrain: 46 | # Pretrained models in torchvision only accept 224*224 inputs, therefore we resize current 47 | # datasets to 224*224. 48 | if args.im_size[0] != 224 or args.im_size[1] != 224: 49 | self.dst_train = deepcopy(dst_train) 50 | self.dst_train.transform = transforms.Compose([self.dst_train.transform, transforms.Resize(224)]) 51 | if self.if_dst_pretrain: 52 | self.dst_pretrain_dict['dst_train'] = deepcopy(dst_pretrain_dict['dst_train']) 53 | self.dst_pretrain_dict['dst_train'].transform = transforms.Compose( 54 | [self.dst_pretrain_dict['dst_train'].transform, transforms.Resize(224)]) 55 | if self.if_dst_pretrain: 56 | self.n_pretrain = len(self.dst_pretrain_dict['dst_train']) 57 | self.n_pretrain_size = round( 58 | self.fraction_pretrain * (self.n_pretrain if self.if_dst_pretrain else self.n_train)) 59 | self.dst_test = dst_test 60 | 61 | def train(self, epoch, list_of_train_idx, **kwargs): 62 | """ Train model for one epoch """ 63 | 64 | self.before_train() 65 | self.model.train() 66 | 67 | print('\n=> Training Epoch #%d' % epoch) 68 | trainset_permutation_inds = np.random.permutation(list_of_train_idx) 69 | batch_sampler = torch.utils.data.BatchSampler(trainset_permutation_inds, batch_size=self.args.selection_batch, 70 | drop_last=False) 71 | trainset_permutation_inds = list(batch_sampler) 72 | 73 | train_loader = torch.utils.data.DataLoader(self.dst_pretrain_dict['dst_train'] if self.if_dst_pretrain 74 | else self.dst_train, shuffle=False, batch_sampler=batch_sampler, 75 | num_workers=self.args.workers, pin_memory=True) 76 | 77 | for i, (inputs, targets) in enumerate(train_loader): 78 | inputs, targets = inputs.to(self.args.device), targets.to(self.args.device) 79 | 80 | # Forward propagation, compute loss, get predictions 81 | self.model_optimizer.zero_grad() 82 | outputs = self.model(inputs) 83 | loss = self.criterion(outputs, targets) 84 | 85 | self.after_loss(outputs, loss, targets, trainset_permutation_inds[i], epoch) 86 | 87 | # Update loss, backward propagate, update optimizer 88 | loss = loss.mean() 89 | 90 | self.while_update(outputs, loss, targets, epoch, i, self.args.selection_batch) 91 | 92 | loss.backward() 93 | self.model_optimizer.step() 94 | return self.finish_train() 95 | 96 | def run(self): 97 | torch.manual_seed(self.random_seed) 98 | np.random.seed(self.random_seed) 99 | self.train_indx = np.arange(self.n_train) 100 | 101 | # Setup model and loss 102 | self.model = nets.__dict__[self.args.model if self.specific_model is None else self.specific_model]( 103 | self.args.channel, self.dst_pretrain_dict["num_classes"] if self.if_dst_pretrain else self.num_classes, 104 | pretrained=self.torchvision_pretrain, 105 | im_size=(224, 224) if self.torchvision_pretrain else self.args.im_size).to(self.args.device) 106 | 107 | if self.args.device == "cpu": 108 | print("Using CPU.") 109 | elif self.args.gpu is not None: 110 | torch.cuda.set_device(self.args.gpu[0]) 111 | self.model = nets.nets_utils.MyDataParallel(self.model, device_ids=self.args.gpu) 112 | elif torch.cuda.device_count() > 1: 113 | self.model = nets.nets_utils.MyDataParallel(self.model).cuda() 114 | 115 | self.criterion = nn.CrossEntropyLoss().to(self.args.device) 116 | self.criterion.__init__() 117 | 118 | # Setup optimizer 119 | if self.args.selection_optimizer == "SGD": 120 | self.model_optimizer = torch.optim.SGD(self.model.parameters(), lr=self.args.selection_lr, 121 | momentum=self.args.selection_momentum, 122 | weight_decay=self.args.selection_weight_decay, 123 | nesterov=self.args.selection_nesterov) 124 | elif self.args.selection_optimizer == "Adam": 125 | self.model_optimizer = torch.optim.Adam(self.model.parameters(), lr=self.args.selection_lr, 126 | weight_decay=self.args.selection_weight_decay) 127 | else: 128 | self.model_optimizer = torch.optim.__dict__[self.args.selection_optimizer](self.model.parameters(), 129 | lr=self.args.selection_lr, 130 | momentum=self.args.selection_momentum, 131 | weight_decay=self.args.selection_weight_decay, 132 | nesterov=self.args.selection_nesterov) 133 | 134 | self.before_run() 135 | 136 | for epoch in range(self.epochs): 137 | list_of_train_idx = np.random.choice(np.arange(self.n_pretrain if self.if_dst_pretrain else self.n_train), 138 | self.n_pretrain_size, replace=False) 139 | self.before_epoch() 140 | self.train(epoch, list_of_train_idx) 141 | if self.dst_test is not None and self.args.selection_test_interval > 0 and ( 142 | epoch + 1) % self.args.selection_test_interval == 0: 143 | self.test(epoch) 144 | self.after_epoch() 145 | 146 | return self.finish_run() 147 | 148 | def test(self, epoch): 149 | self.model.no_grad = True 150 | self.model.eval() 151 | 152 | test_loader = torch.utils.data.DataLoader(self.dst_test if self.args.selection_test_fraction == 1. else 153 | torch.utils.data.Subset(self.dst_test, np.random.choice( 154 | np.arange(len(self.dst_test)), 155 | round(len(self.dst_test) * self.args.selection_test_fraction), 156 | replace=False)), 157 | batch_size=self.args.selection_batch, shuffle=False, 158 | num_workers=self.args.workers, pin_memory=True) 159 | correct = 0. 160 | total = 0. 161 | 162 | print('\n=> Testing Epoch #%d' % epoch) 163 | 164 | for batch_idx, (input, target) in enumerate(test_loader): 165 | output = self.model(input.to(self.args.device)) 166 | loss = self.criterion(output, target.to(self.args.device)).sum() 167 | 168 | predicted = torch.max(output.data, 1).indices.cpu() 169 | correct += predicted.eq(target).sum().item() 170 | total += target.size(0) 171 | 172 | if batch_idx % self.args.print_freq == 0: 173 | print('| Test Epoch [%3d/%3d] Iter[%3d/%3d]\t\tTest Loss: %.4f Test Acc: %.3f%%' % ( 174 | epoch, self.epochs, batch_idx + 1, (round(len(self.dst_test) * self.args.selection_test_fraction) // 175 | self.args.selection_batch) + 1, loss.item(), 176 | 100. * correct / total)) 177 | 178 | self.model.no_grad = False 179 | 180 | def num_classes_mismatch(self): 181 | pass 182 | 183 | def before_train(self): 184 | pass 185 | 186 | def after_loss(self, outputs, loss, targets, batch_inds, epoch): 187 | pass 188 | 189 | def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size): 190 | pass 191 | 192 | def finish_train(self): 193 | pass 194 | 195 | def before_epoch(self): 196 | pass 197 | 198 | def after_epoch(self): 199 | pass 200 | 201 | def before_run(self): 202 | pass 203 | 204 | def finish_run(self): 205 | pass 206 | 207 | def select(self, **kwargs): 208 | selection_result = self.run() 209 | return selection_result 210 | -------------------------------------------------------------------------------- /deepcore/methods/forgetting.py: -------------------------------------------------------------------------------- 1 | from .earlytrain import EarlyTrain 2 | import torch, time 3 | from torch import nn 4 | import numpy as np 5 | 6 | 7 | # Acknowledgement to 8 | # https://github.com/mtoneva/example_forgetting 9 | 10 | class Forgetting(EarlyTrain): 11 | def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None, balance=True, 12 | dst_test=None, **kwargs): 13 | super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model=specific_model, 14 | dst_test=dst_test) 15 | 16 | self.balance = balance 17 | 18 | def get_hms(self, seconds): 19 | # Format time for printing purposes 20 | 21 | m, s = divmod(seconds, 60) 22 | h, m = divmod(m, 60) 23 | 24 | return h, m, s 25 | 26 | def before_train(self): 27 | self.train_loss = 0. 28 | self.correct = 0. 29 | self.total = 0. 30 | 31 | def after_loss(self, outputs, loss, targets, batch_inds, epoch): 32 | with torch.no_grad(): 33 | _, predicted = torch.max(outputs.data, 1) 34 | 35 | cur_acc = (predicted == targets).clone().detach().requires_grad_(False).type(torch.float32) 36 | self.forgetting_events[torch.tensor(batch_inds)[(self.last_acc[batch_inds]-cur_acc)>0.01]]+=1. 37 | self.last_acc[batch_inds] = cur_acc 38 | 39 | def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size): 40 | self.train_loss += loss.item() 41 | self.total += targets.size(0) 42 | _, predicted = torch.max(outputs.data, 1) 43 | self.correct += predicted.eq(targets.data).cpu().sum() 44 | 45 | if batch_idx % self.args.print_freq == 0: 46 | print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%' % ( 47 | epoch, self.epochs, batch_idx + 1, (self.n_train // batch_size) + 1, loss.item(), 48 | 100. * self.correct.item() / self.total)) 49 | 50 | def before_epoch(self): 51 | self.start_time = time.time() 52 | 53 | def after_epoch(self): 54 | epoch_time = time.time() - self.start_time 55 | self.elapsed_time += epoch_time 56 | print('| Elapsed time : %d:%02d:%02d' % (self.get_hms(self.elapsed_time))) 57 | 58 | def before_run(self): 59 | self.elapsed_time = 0 60 | 61 | self.forgetting_events = torch.zeros(self.n_train, requires_grad=False).to(self.args.device) 62 | self.last_acc = torch.zeros(self.n_train, requires_grad=False).to(self.args.device) 63 | 64 | def finish_run(self): 65 | pass 66 | 67 | def select(self, **kwargs): 68 | self.run() 69 | 70 | if not self.balance: 71 | top_examples = self.train_indx[np.argsort(self.forgetting_events.cpu().numpy())][::-1][:self.coreset_size] 72 | else: 73 | top_examples = np.array([], dtype=np.int64) 74 | for c in range(self.num_classes): 75 | c_indx = self.train_indx[self.dst_train.targets == c] 76 | budget = round(self.fraction * len(c_indx)) 77 | top_examples = np.append(top_examples, 78 | c_indx[np.argsort(self.forgetting_events[c_indx].cpu().numpy())[::-1][:budget]]) 79 | 80 | return {"indices": top_examples, "scores": self.forgetting_events} 81 | -------------------------------------------------------------------------------- /deepcore/methods/full.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .coresetmethod import CoresetMethod 3 | 4 | 5 | class Full(CoresetMethod): 6 | def __init__(self, dst_train, args, fraction, random_seed, **kwargs): 7 | self.n_train = len(dst_train) 8 | 9 | def select(self, **kwargs): 10 | return {"indices": np.arange(self.n_train)} 11 | -------------------------------------------------------------------------------- /deepcore/methods/glister.py: -------------------------------------------------------------------------------- 1 | from .earlytrain import EarlyTrain 2 | from .methods_utils import submodular_optimizer 3 | import torch 4 | import numpy as np 5 | from ..nets.nets_utils import MyDataParallel 6 | 7 | 8 | class Glister(EarlyTrain): 9 | def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None, 10 | balance: bool = True, greedy="LazyGreedy", eta=None, dst_val=None, **kwargs): 11 | super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs) 12 | 13 | self.balance = balance 14 | self.eta = args.lr if eta is None else eta 15 | 16 | self.dst_val = dst_train if dst_val is None else dst_val 17 | self.n_val = len(self.dst_val) 18 | 19 | if greedy not in submodular_optimizer.optimizer_choices: 20 | raise ModuleNotFoundError("Greedy optimizer not found.") 21 | self._greedy = greedy 22 | 23 | def calc_gradient(self, index=None, val=False, record_val_detail=False): 24 | ''' 25 | Calculate gradients matrix on current network for training or validation dataset. 26 | ''' 27 | 28 | self.model.eval() 29 | 30 | if val: 31 | batch_loader = torch.utils.data.DataLoader( 32 | self.dst_val if index is None else torch.utils.data.Subset(self.dst_val, index), 33 | batch_size=self.args.selection_batch, num_workers=self.args.workers) 34 | else: 35 | batch_loader = torch.utils.data.DataLoader( 36 | self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index), 37 | batch_size=self.args.selection_batch, num_workers=self.args.workers) 38 | 39 | self.embedding_dim = self.model.get_last_layer().in_features 40 | gradients = [] 41 | if val and record_val_detail: 42 | self.init_out = [] 43 | self.init_emb = [] 44 | self.init_y = [] 45 | 46 | for i, (input, targets) in enumerate(batch_loader): 47 | self.model_optimizer.zero_grad() 48 | outputs = self.model(input.to(self.args.device)) 49 | loss = self.criterion(outputs.requires_grad_(True), targets.to(self.args.device)).sum() 50 | batch_num = targets.shape[0] 51 | with torch.no_grad(): 52 | bias_parameters_grads = torch.autograd.grad(loss, outputs)[0] 53 | weight_parameters_grads = self.model.embedding_recorder.embedding.view(batch_num, 1, 54 | self.embedding_dim).repeat(1, self.args.num_classes, 1) *\ 55 | bias_parameters_grads.view( 56 | batch_num, self.args.num_classes, 1).repeat(1, 1, self.embedding_dim) 57 | gradients.append(torch.cat( 58 | [bias_parameters_grads, weight_parameters_grads.flatten(1)], dim=1).cpu()) 59 | 60 | if val and record_val_detail: 61 | self.init_out.append(outputs.cpu()) 62 | self.init_emb.append(self.model.embedding_recorder.embedding.cpu()) 63 | self.init_y.append(targets) 64 | 65 | gradients = torch.cat(gradients, dim=0) 66 | if val: 67 | self.val_grads = torch.mean(gradients, dim=0) 68 | if self.dst_val == self.dst_train: 69 | # No validation set was provided while instantiating Glister, so self.dst_val == self.dst_train 70 | self.train_grads = gradients 71 | else: 72 | self.train_grads = gradients 73 | if val and record_val_detail: 74 | with torch.no_grad(): 75 | self.init_out = torch.cat(self.init_out, dim=0) 76 | self.init_emb = torch.cat(self.init_emb, dim=0) 77 | self.init_y = torch.cat(self.init_y) 78 | 79 | self.model.train() 80 | 81 | def update_val_gradients(self, new_selection, selected_for_train): 82 | 83 | sum_selected_train_gradients = torch.mean(self.train_grads[selected_for_train], dim=0) 84 | 85 | new_outputs = self.init_out - self.eta * sum_selected_train_gradients[:self.args.num_classes].view(1, 86 | -1).repeat(self.init_out.shape[0], 1) - self.eta * torch.matmul(self.init_emb, 87 | sum_selected_train_gradients[self.args.num_classes:].view(self.args.num_classes, -1).T) 88 | 89 | sample_num = new_outputs.shape[0] 90 | gradients = torch.zeros([sample_num, self.args.num_classes * (self.embedding_dim + 1)], requires_grad=False) 91 | i = 0 92 | while i * self.args.selection_batch < sample_num: 93 | batch_indx = np.arange(sample_num)[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch, 94 | sample_num)] 95 | new_out_puts_batch = new_outputs[batch_indx].clone().detach().requires_grad_(True) 96 | loss = self.criterion(new_out_puts_batch, self.init_y[batch_indx]) 97 | batch_num = len(batch_indx) 98 | bias_parameters_grads = torch.autograd.grad(loss.sum(), new_out_puts_batch, retain_graph=True)[0] 99 | 100 | weight_parameters_grads = self.init_emb[batch_indx].view(batch_num, 1, self.embedding_dim).repeat(1, 101 | self.args.num_classes, 1) * bias_parameters_grads.view(batch_num, 102 | self.args.num_classes, 1).repeat(1, 1, self.embedding_dim) 103 | gradients[batch_indx] = torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)], dim=1).cpu() 104 | i += 1 105 | 106 | self.val_grads = torch.mean(gradients, dim=0) 107 | 108 | def finish_run(self): 109 | if isinstance(self.model, MyDataParallel): 110 | self.model = self.model.module 111 | 112 | self.model.embedding_recorder.record_embedding = True 113 | self.model.no_grad = True 114 | 115 | self.train_indx = np.arange(self.n_train) 116 | self.val_indx = np.arange(self.n_val) 117 | if self.balance: 118 | selection_result = np.array([], dtype=np.int64) 119 | #weights = np.array([], dtype=np.float32) 120 | for c in range(self.num_classes): 121 | c_indx = self.train_indx[self.dst_train.targets == c] 122 | c_val_inx = self.val_indx[self.dst_val.targets == c] 123 | self.calc_gradient(index=c_val_inx, val=True, record_val_detail=True) 124 | if self.dst_val != self.dst_train: 125 | self.calc_gradient(index=c_indx) 126 | submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args, index=c_indx, 127 | budget=round(self.fraction * len(c_indx))) 128 | c_selection_result = submod_optimizer.select(gain_function=lambda idx_gain, selected, 129 | **kwargs: torch.matmul(self.train_grads[idx_gain], 130 | self.val_grads.view(-1, 1)).detach().cpu().numpy(). 131 | flatten(), upadate_state=self.update_val_gradients) 132 | selection_result = np.append(selection_result, c_selection_result) 133 | 134 | else: 135 | self.calc_gradient(val=True, record_val_detail=True) 136 | if self.dst_val != self.dst_train: 137 | self.calc_gradient() 138 | 139 | submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args, 140 | index=np.arange(self.n_train), budget=self.coreset_size) 141 | selection_result = submod_optimizer.select(gain_function=lambda idx_gain, selected, 142 | **kwargs: torch.matmul(self.train_grads[idx_gain], 143 | self.val_grads.view(-1, 1)).detach().cpu().numpy().flatten(), 144 | upadate_state=self.update_val_gradients) 145 | 146 | self.model.embedding_recorder.record_embedding = False 147 | self.model.no_grad = False 148 | return {"indices": selection_result} 149 | 150 | def num_classes_mismatch(self): 151 | raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.") 152 | 153 | def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size): 154 | if batch_idx % self.args.print_freq == 0: 155 | print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % ( 156 | epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item())) 157 | 158 | -------------------------------------------------------------------------------- /deepcore/methods/gradmatch.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from scipy.linalg import lstsq 4 | from scipy.optimize import nnls 5 | from .earlytrain import EarlyTrain 6 | from ..nets.nets_utils import MyDataParallel 7 | 8 | 9 | # https://github.com/krishnatejakk/GradMatch 10 | 11 | class GradMatch(EarlyTrain): 12 | def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None, 13 | balance=True, dst_val=None, lam: float = 1., **kwargs): 14 | super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs) 15 | self.balance = balance 16 | self.dst_val = dst_val 17 | 18 | def num_classes_mismatch(self): 19 | raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.") 20 | 21 | def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size): 22 | if batch_idx % self.args.print_freq == 0: 23 | print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % ( 24 | epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item())) 25 | 26 | def orthogonal_matching_pursuit(self, A, b, budget: int, lam: float = 1.): 27 | '''approximately solves min_x |x|_0 s.t. Ax=b using Orthogonal Matching Pursuit 28 | Acknowlegement to: 29 | https://github.com/krishnatejakk/GradMatch/blob/main/GradMatch/selectionstrategies/helpers/omp_solvers.py 30 | Args: 31 | A: design matrix of size (d, n) 32 | b: measurement vector of length d 33 | budget: selection budget 34 | lam: regularization coef. for the final output vector 35 | Returns: 36 | vector of length n 37 | ''' 38 | with torch.no_grad(): 39 | d, n = A.shape 40 | if budget <= 0: 41 | budget = 0 42 | elif budget > n: 43 | budget = n 44 | 45 | x = np.zeros(n, dtype=np.float32) 46 | resid = b.clone() 47 | indices = [] 48 | boolean_mask = torch.ones(n, dtype=bool, device="cuda") 49 | all_idx = torch.arange(n, device='cuda') 50 | 51 | for i in range(budget): 52 | if i % self.args.print_freq == 0: 53 | print("| Selecting [%3d/%3d]" % (i + 1, budget)) 54 | projections = torch.matmul(A.T, resid) 55 | index = torch.argmax(projections[boolean_mask]) 56 | index = all_idx[boolean_mask][index] 57 | 58 | indices.append(index.item()) 59 | boolean_mask[index] = False 60 | 61 | if indices.__len__() == 1: 62 | A_i = A[:, index] 63 | x_i = projections[index] / torch.dot(A_i, A_i).view(-1) 64 | A_i = A[:, index].view(1, -1) 65 | else: 66 | A_i = torch.cat((A_i, A[:, index].view(1, -1)), dim=0) 67 | temp = torch.matmul(A_i, torch.transpose(A_i, 0, 1)) + lam * torch.eye(A_i.shape[0], device="cuda") 68 | x_i, _ = torch.lstsq(torch.matmul(A_i, b).view(-1, 1), temp) 69 | resid = b - torch.matmul(torch.transpose(A_i, 0, 1), x_i).view(-1) 70 | if budget > 1: 71 | x_i = nnls(temp.cpu().numpy(), torch.matmul(A_i, b).view(-1).cpu().numpy())[0] 72 | x[indices] = x_i 73 | elif budget == 1: 74 | x[indices[0]] = 1. 75 | return x 76 | 77 | def orthogonal_matching_pursuit_np(self, A, b, budget: int, lam: float = 1.): 78 | '''approximately solves min_x |x|_0 s.t. Ax=b using Orthogonal Matching Pursuit 79 | Acknowlegement to: 80 | https://github.com/krishnatejakk/GradMatch/blob/main/GradMatch/selectionstrategies/helpers/omp_solvers.py 81 | Args: 82 | A: design matrix of size (d, n) 83 | b: measurement vector of length d 84 | budget: selection budget 85 | lam: regularization coef. for the final output vector 86 | Returns: 87 | vector of length n 88 | ''' 89 | d, n = A.shape 90 | if budget <= 0: 91 | budget = 0 92 | elif budget > n: 93 | budget = n 94 | 95 | x = np.zeros(n, dtype=np.float32) 96 | resid = np.copy(b) 97 | indices = [] 98 | boolean_mask = np.ones(n, dtype=bool) 99 | all_idx = np.arange(n) 100 | 101 | for i in range(budget): 102 | if i % self.args.print_freq == 0: 103 | print("| Selecting [%3d/%3d]" % (i + 1, budget)) 104 | projections = A.T.dot(resid) 105 | index = np.argmax(projections[boolean_mask]) 106 | index = all_idx[boolean_mask][index] 107 | 108 | indices.append(index.item()) 109 | boolean_mask[index] = False 110 | 111 | if indices.__len__() == 1: 112 | A_i = A[:, index] 113 | x_i = projections[index] / A_i.T.dot(A_i) 114 | else: 115 | A_i = np.vstack([A_i, A[:, index]]) 116 | x_i = lstsq(A_i.dot(A_i.T) + lam * np.identity(A_i.shape[0]), A_i.dot(b))[0] 117 | resid = b - A_i.T.dot(x_i) 118 | if budget > 1: 119 | x_i = nnls(A_i.dot(A_i.T) + lam * np.identity(A_i.shape[0]), A_i.dot(b))[0] 120 | x[indices] = x_i 121 | elif budget == 1: 122 | x[indices[0]] = 1. 123 | return x 124 | 125 | def calc_gradient(self, index=None, val=False): 126 | self.model.eval() 127 | if val: 128 | batch_loader = torch.utils.data.DataLoader( 129 | self.dst_val if index is None else torch.utils.data.Subset(self.dst_val, index), 130 | batch_size=self.args.selection_batch, num_workers=self.args.workers) 131 | sample_num = len(self.dst_val.targets) if index is None else len(index) 132 | else: 133 | batch_loader = torch.utils.data.DataLoader( 134 | self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index), 135 | batch_size=self.args.selection_batch, num_workers=self.args.workers) 136 | sample_num = self.n_train if index is None else len(index) 137 | 138 | self.embedding_dim = self.model.get_last_layer().in_features 139 | gradients = torch.zeros([sample_num, self.args.num_classes * (self.embedding_dim + 1)], 140 | requires_grad=False, device=self.args.device) 141 | 142 | for i, (input, targets) in enumerate(batch_loader): 143 | self.model_optimizer.zero_grad() 144 | outputs = self.model(input.to(self.args.device)).requires_grad_(True) 145 | loss = self.criterion(outputs, targets.to(self.args.device)).sum() 146 | batch_num = targets.shape[0] 147 | with torch.no_grad(): 148 | bias_parameters_grads = torch.autograd.grad(loss, outputs, retain_graph=True)[0].cpu() 149 | weight_parameters_grads = self.model.embedding_recorder.embedding.cpu().view(batch_num, 1, 150 | self.embedding_dim).repeat(1,self.args.num_classes,1) *\ 151 | bias_parameters_grads.view(batch_num, self.args.num_classes, 152 | 1).repeat(1, 1, self.embedding_dim) 153 | gradients[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch, sample_num)] =\ 154 | torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)], dim=1) 155 | 156 | return gradients 157 | 158 | def finish_run(self): 159 | if isinstance(self.model, MyDataParallel): 160 | self.model = self.model.module 161 | 162 | self.model.no_grad = True 163 | with self.model.embedding_recorder: 164 | if self.dst_val is not None: 165 | val_num = len(self.dst_val.targets) 166 | 167 | if self.balance: 168 | selection_result = np.array([], dtype=np.int64) 169 | weights = np.array([], dtype=np.float32) 170 | for c in range(self.args.num_classes): 171 | class_index = np.arange(self.n_train)[self.dst_train.targets == c] 172 | cur_gradients = self.calc_gradient(class_index) 173 | if self.dst_val is not None: 174 | # Also calculate gradients of the validation set. 175 | val_class_index = np.arange(val_num)[self.dst_val.targets == c] 176 | cur_val_gradients = torch.mean(self.calc_gradient(val_class_index, val=True), dim=0) 177 | else: 178 | cur_val_gradients = torch.mean(cur_gradients, dim=0) 179 | if self.args.device == "cpu": 180 | # Compute OMP on numpy 181 | cur_weights = self.orthogonal_matching_pursuit_np(cur_gradients.numpy().T, 182 | cur_val_gradients.numpy(), 183 | budget=round(len(class_index) * self.fraction)) 184 | else: 185 | cur_weights = self.orthogonal_matching_pursuit(cur_gradients.to(self.args.device).T, 186 | cur_val_gradients.to(self.args.device), 187 | budget=round(len(class_index) * self.fraction)) 188 | selection_result = np.append(selection_result, class_index[np.nonzero(cur_weights)[0]]) 189 | weights = np.append(weights, cur_weights[np.nonzero(cur_weights)[0]]) 190 | else: 191 | cur_gradients = self.calc_gradient() 192 | if self.dst_val is not None: 193 | # Also calculate gradients of the validation set. 194 | cur_val_gradients = torch.mean(self.calc_gradient(val=True), dim=0) 195 | else: 196 | cur_val_gradients = torch.mean(cur_gradients, dim=0) 197 | if self.args.device == "cpu": 198 | # Compute OMP on numpy 199 | cur_weights = self.orthogonal_matching_pursuit_np(cur_gradients.numpy().T, 200 | cur_val_gradients.numpy(), 201 | budget=self.coreset_size) 202 | else: 203 | cur_weights = self.orthogonal_matching_pursuit(cur_gradients.T, cur_val_gradients, 204 | budget=self.coreset_size) 205 | selection_result = np.nonzero(cur_weights)[0] 206 | weights = cur_weights[selection_result] 207 | self.model.no_grad = False 208 | return {"indices": selection_result, "weights": weights} 209 | 210 | def select(self, **kwargs): 211 | selection_result = self.run() 212 | return selection_result 213 | 214 | -------------------------------------------------------------------------------- /deepcore/methods/grand.py: -------------------------------------------------------------------------------- 1 | from .earlytrain import EarlyTrain 2 | import torch, time 3 | import numpy as np 4 | from ..nets.nets_utils import MyDataParallel 5 | 6 | 7 | class GraNd(EarlyTrain): 8 | def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, repeat=10, 9 | specific_model=None, balance=False, **kwargs): 10 | super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model) 11 | self.epochs = epochs 12 | self.n_train = len(dst_train) 13 | self.coreset_size = round(self.n_train * fraction) 14 | self.specific_model = specific_model 15 | self.repeat = repeat 16 | 17 | self.balance = balance 18 | 19 | def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size): 20 | if batch_idx % self.args.print_freq == 0: 21 | print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % ( 22 | epoch, self.epochs, batch_idx + 1, (self.n_train // batch_size) + 1, loss.item())) 23 | 24 | def before_run(self): 25 | if isinstance(self.model, MyDataParallel): 26 | self.model = self.model.module 27 | 28 | def finish_run(self): 29 | self.model.embedding_recorder.record_embedding = True # recording embedding vector 30 | 31 | self.model.eval() 32 | 33 | embedding_dim = self.model.get_last_layer().in_features 34 | batch_loader = torch.utils.data.DataLoader( 35 | self.dst_train, batch_size=self.args.selection_batch, num_workers=self.args.workers) 36 | sample_num = self.n_train 37 | 38 | for i, (input, targets) in enumerate(batch_loader): 39 | self.model_optimizer.zero_grad() 40 | outputs = self.model(input.to(self.args.device)) 41 | loss = self.criterion(outputs.requires_grad_(True), 42 | targets.to(self.args.device)).sum() 43 | batch_num = targets.shape[0] 44 | with torch.no_grad(): 45 | bias_parameters_grads = torch.autograd.grad(loss, outputs)[0] 46 | self.norm_matrix[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch, sample_num), 47 | self.cur_repeat] = torch.norm(torch.cat([bias_parameters_grads, ( 48 | self.model.embedding_recorder.embedding.view(batch_num, 1, embedding_dim).repeat(1, 49 | self.args.num_classes, 1) * bias_parameters_grads.view( 50 | batch_num, self.args.num_classes, 1).repeat(1, 1, embedding_dim)). 51 | view(batch_num, -1)], dim=1), dim=1, p=2) 52 | 53 | self.model.train() 54 | 55 | self.model.embedding_recorder.record_embedding = False 56 | 57 | def select(self, **kwargs): 58 | # Initialize a matrix to save norms of each sample on idependent runs 59 | self.norm_matrix = torch.zeros([self.n_train, self.repeat], requires_grad=False).to(self.args.device) 60 | 61 | for self.cur_repeat in range(self.repeat): 62 | self.run() 63 | self.random_seed = self.random_seed + 5 64 | 65 | self.norm_mean = torch.mean(self.norm_matrix, dim=1).cpu().detach().numpy() 66 | if not self.balance: 67 | top_examples = self.train_indx[np.argsort(self.norm_mean)][::-1][:self.coreset_size] 68 | else: 69 | top_examples = np.array([], dtype=np.int64) 70 | for c in range(self.num_classes): 71 | c_indx = self.train_indx[self.dst_train.targets == c] 72 | budget = round(self.fraction * len(c_indx)) 73 | top_examples = np.append(top_examples, c_indx[np.argsort(self.norm_mean[c_indx])[::-1][:budget]]) 74 | 75 | return {"indices": top_examples, "scores": self.norm_mean} 76 | -------------------------------------------------------------------------------- /deepcore/methods/herding.py: -------------------------------------------------------------------------------- 1 | from .earlytrain import EarlyTrain 2 | import torch 3 | import numpy as np 4 | from .methods_utils import euclidean_dist 5 | from ..nets.nets_utils import MyDataParallel 6 | 7 | 8 | class Herding(EarlyTrain): 9 | def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, 10 | specific_model="ResNet18", balance: bool = False, metric="euclidean", **kwargs): 11 | super().__init__(dst_train, args, fraction, random_seed, epochs=epochs, specific_model=specific_model, **kwargs) 12 | 13 | if metric == "euclidean": 14 | self.metric = euclidean_dist 15 | elif callable(metric): 16 | self.metric = metric 17 | else: 18 | self.metric = euclidean_dist 19 | self.run = lambda: self.finish_run() 20 | 21 | def _construct_matrix(index=None): 22 | data_loader = torch.utils.data.DataLoader( 23 | self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index), 24 | batch_size=self.n_train if index is None else len(index), num_workers=self.args.workers) 25 | inputs, _ = next(iter(data_loader)) 26 | return inputs.flatten(1).requires_grad_(False).to(self.args.device) 27 | 28 | self.construct_matrix = _construct_matrix 29 | 30 | self.balance = balance 31 | 32 | def num_classes_mismatch(self): 33 | raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.") 34 | 35 | def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size): 36 | if batch_idx % self.args.print_freq == 0: 37 | print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % ( 38 | epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item())) 39 | 40 | def construct_matrix(self, index=None): 41 | self.model.eval() 42 | self.model.no_grad = True 43 | with torch.no_grad(): 44 | with self.model.embedding_recorder: 45 | sample_num = self.n_train if index is None else len(index) 46 | matrix = torch.zeros([sample_num, self.emb_dim], requires_grad=False).to(self.args.device) 47 | 48 | data_loader = torch.utils.data.DataLoader(self.dst_train if index is None else 49 | torch.utils.data.Subset(self.dst_train, index), 50 | batch_size=self.args.selection_batch, 51 | num_workers=self.args.workers) 52 | 53 | for i, (inputs, _) in enumerate(data_loader): 54 | self.model(inputs.to(self.args.device)) 55 | matrix[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch, sample_num)] = self.model.embedding_recorder.embedding 56 | 57 | self.model.no_grad = False 58 | return matrix 59 | 60 | def before_run(self): 61 | self.emb_dim = self.model.get_last_layer().in_features 62 | 63 | def herding(self, matrix, budget: int, index=None): 64 | 65 | sample_num = matrix.shape[0] 66 | 67 | if budget < 0: 68 | raise ValueError("Illegal budget size.") 69 | elif budget > sample_num: 70 | budget = sample_num 71 | 72 | indices = np.arange(sample_num) 73 | with torch.no_grad(): 74 | mu = torch.mean(matrix, dim=0) 75 | select_result = np.zeros(sample_num, dtype=bool) 76 | 77 | for i in range(budget): 78 | if i % self.args.print_freq == 0: 79 | print("| Selecting [%3d/%3d]" % (i + 1, budget)) 80 | dist = self.metric(((i + 1) * mu - torch.sum(matrix[select_result], dim=0)).view(1, -1), 81 | matrix[~select_result]) 82 | p = torch.argmax(dist).item() 83 | p = indices[~select_result][p] 84 | select_result[p] = True 85 | if index is None: 86 | index = indices 87 | return index[select_result] 88 | 89 | def finish_run(self): 90 | if isinstance(self.model, MyDataParallel): 91 | self.model = self.model.module 92 | 93 | if self.balance: 94 | selection_result = np.array([], dtype=np.int32) 95 | for c in range(self.args.num_classes): 96 | class_index = np.arange(self.n_train)[self.dst_train.targets == c] 97 | 98 | selection_result = np.append(selection_result, self.herding(self.construct_matrix(class_index), 99 | budget=round(self.fraction * len(class_index)), index=class_index)) 100 | else: 101 | selection_result = self.herding(self.construct_matrix(), budget=self.coreset_size) 102 | return {"indices": selection_result} 103 | 104 | def select(self, **kwargs): 105 | selection_result = self.run() 106 | return selection_result 107 | 108 | -------------------------------------------------------------------------------- /deepcore/methods/kcentergreedy.py: -------------------------------------------------------------------------------- 1 | from .earlytrain import EarlyTrain 2 | import torch 3 | import numpy as np 4 | from .methods_utils import euclidean_dist 5 | from ..nets.nets_utils import MyDataParallel 6 | 7 | 8 | def k_center_greedy(matrix, budget: int, metric, device, random_seed=None, index=None, already_selected=None, 9 | print_freq: int = 20): 10 | if type(matrix) == torch.Tensor: 11 | assert matrix.dim() == 2 12 | elif type(matrix) == np.ndarray: 13 | assert matrix.ndim == 2 14 | matrix = torch.from_numpy(matrix).requires_grad_(False).to(device) 15 | 16 | sample_num = matrix.shape[0] 17 | assert sample_num >= 1 18 | 19 | if budget < 0: 20 | raise ValueError("Illegal budget size.") 21 | elif budget > sample_num: 22 | budget = sample_num 23 | 24 | if index is not None: 25 | assert matrix.shape[0] == len(index) 26 | else: 27 | index = np.arange(sample_num) 28 | 29 | assert callable(metric) 30 | 31 | already_selected = np.array(already_selected) 32 | 33 | with torch.no_grad(): 34 | np.random.seed(random_seed) 35 | if already_selected.__len__() == 0: 36 | select_result = np.zeros(sample_num, dtype=bool) 37 | # Randomly select one initial point. 38 | already_selected = [np.random.randint(0, sample_num)] 39 | budget -= 1 40 | select_result[already_selected] = True 41 | else: 42 | select_result = np.in1d(index, already_selected) 43 | 44 | num_of_already_selected = np.sum(select_result) 45 | 46 | # Initialize a (num_of_already_selected+budget-1)*sample_num matrix storing distances of pool points from 47 | # each clustering center. 48 | dis_matrix = -1 * torch.ones([num_of_already_selected + budget - 1, sample_num], requires_grad=False).to(device) 49 | 50 | dis_matrix[:num_of_already_selected, ~select_result] = metric(matrix[select_result], matrix[~select_result]) 51 | 52 | mins = torch.min(dis_matrix[:num_of_already_selected, :], dim=0).values 53 | 54 | for i in range(budget): 55 | if i % print_freq == 0: 56 | print("| Selecting [%3d/%3d]" % (i + 1, budget)) 57 | p = torch.argmax(mins).item() 58 | select_result[p] = True 59 | 60 | if i == budget - 1: 61 | break 62 | mins[p] = -1 63 | dis_matrix[num_of_already_selected + i, ~select_result] = metric(matrix[[p]], matrix[~select_result]) 64 | mins = torch.min(mins, dis_matrix[num_of_already_selected + i]) 65 | return index[select_result] 66 | 67 | 68 | class kCenterGreedy(EarlyTrain): 69 | def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=0, 70 | specific_model="ResNet18", balance: bool = False, already_selected=[], metric="euclidean", 71 | torchvision_pretrain: bool = True, **kwargs): 72 | super().__init__(dst_train, args, fraction, random_seed, epochs=epochs, specific_model=specific_model, 73 | torchvision_pretrain=torchvision_pretrain, **kwargs) 74 | 75 | if already_selected.__len__() != 0: 76 | if min(already_selected) < 0 or max(already_selected) >= self.n_train: 77 | raise ValueError("List of already selected points out of the boundary.") 78 | self.already_selected = np.array(already_selected) 79 | 80 | self.min_distances = None 81 | 82 | if metric == "euclidean": 83 | self.metric = euclidean_dist 84 | elif callable(metric): 85 | self.metric = metric 86 | else: 87 | self.metric = euclidean_dist 88 | self.run = lambda : self.finish_run() 89 | def _construct_matrix(index=None): 90 | data_loader = torch.utils.data.DataLoader( 91 | self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index), 92 | batch_size=self.n_train if index is None else len(index), 93 | num_workers=self.args.workers) 94 | inputs, _ = next(iter(data_loader)) 95 | return inputs.flatten(1).requires_grad_(False).to(self.args.device) 96 | self.construct_matrix = _construct_matrix 97 | 98 | self.balance = balance 99 | 100 | def num_classes_mismatch(self): 101 | raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.") 102 | 103 | def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size): 104 | if batch_idx % self.args.print_freq == 0: 105 | print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % ( 106 | epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item())) 107 | 108 | def old_construct_matrix(self, index=None): 109 | self.model.eval() 110 | self.model.no_grad = True 111 | with torch.no_grad(): 112 | with self.model.embedding_recorder: 113 | sample_num = self.n_train if index is None else len(index) 114 | matrix = torch.zeros([sample_num, self.emb_dim], requires_grad=False).to(self.args.device) 115 | 116 | data_loader = torch.utils.data.DataLoader(self.dst_train if index is None else 117 | torch.utils.data.Subset(self.dst_train, index), 118 | batch_size=self.args.selection_batch, 119 | num_workers=self.args.workers) 120 | 121 | for i, (inputs, _) in enumerate(data_loader): 122 | self.model(inputs.to(self.args.device)) 123 | matrix[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch, 124 | sample_num)] = self.model.embedding_recorder.embedding 125 | 126 | self.model.no_grad = False 127 | return matrix 128 | 129 | def construct_matrix(self, index=None): 130 | self.model.eval() 131 | self.model.no_grad = True 132 | with torch.no_grad(): 133 | with self.model.embedding_recorder: 134 | sample_num = self.n_train if index is None else len(index) 135 | matrix = [] 136 | 137 | data_loader = torch.utils.data.DataLoader(self.dst_train if index is None else 138 | torch.utils.data.Subset(self.dst_train, index), 139 | batch_size=self.args.selection_batch, 140 | num_workers=self.args.workers) 141 | 142 | for i, (inputs, _) in enumerate(data_loader): 143 | self.model(inputs.to(self.args.device)) 144 | matrix.append(self.model.embedding_recorder.embedding) 145 | 146 | self.model.no_grad = False 147 | return torch.cat(matrix, dim=0) 148 | 149 | def before_run(self): 150 | self.emb_dim = self.model.get_last_layer().in_features 151 | 152 | def finish_run(self): 153 | if isinstance(self.model, MyDataParallel): 154 | self.model = self.model.module 155 | 156 | def select(self, **kwargs): 157 | self.run() 158 | if self.balance: 159 | selection_result = np.array([], dtype=np.int32) 160 | for c in range(self.args.num_classes): 161 | class_index = np.arange(self.n_train)[self.dst_train.targets == c] 162 | 163 | selection_result = np.append(selection_result, k_center_greedy(self.construct_matrix(class_index), 164 | budget=round( 165 | self.fraction * len(class_index)), 166 | metric=self.metric, 167 | device=self.args.device, 168 | random_seed=self.random_seed, 169 | index=class_index, 170 | already_selected=self.already_selected[ 171 | np.in1d(self.already_selected, 172 | class_index)], 173 | print_freq=self.args.print_freq)) 174 | else: 175 | matrix = self.construct_matrix() 176 | del self.model_optimizer 177 | del self.model 178 | selection_result = k_center_greedy(matrix, budget=self.coreset_size, 179 | metric=self.metric, device=self.args.device, 180 | random_seed=self.random_seed, 181 | already_selected=self.already_selected, print_freq=self.args.print_freq) 182 | return {"indices": selection_result} 183 | -------------------------------------------------------------------------------- /deepcore/methods/methods_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .euclidean import * 2 | from .cossim import * 3 | from .submodular_function import * 4 | from .submodular_optimizer import * 5 | -------------------------------------------------------------------------------- /deepcore/methods/methods_utils/cossim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def cossim_np(v1, v2): 6 | num = np.dot(v1, v2.T) 7 | denom = np.linalg.norm(v1, axis=1).reshape(-1, 1) * np.linalg.norm(v2, axis=1) 8 | res = num / denom 9 | res[np.isneginf(res)] = 0. 10 | return 0.5 + 0.5 * res 11 | 12 | def cossim_pair_np(v1): 13 | num = np.dot(v1, v1.T) 14 | norm = np.linalg.norm(v1, axis=1) 15 | denom = norm.reshape(-1, 1) * norm 16 | res = num / denom 17 | res[np.isneginf(res)] = 0. 18 | return 0.5 + 0.5 * res 19 | 20 | def cossim(v1, v2): 21 | num = torch.matmul(v1, v2.T) 22 | denom = torch.norm(v1, dim=1).view(-1, 1) * torch.norm(v2, dim=1) 23 | res = num / denom 24 | res[torch.isneginf(res)] = 0. 25 | return 0.5 + 0.5 * res 26 | 27 | def cossim_pair(v1): 28 | num = torch.matmul(v1, v1.T) 29 | norm = torch.norm(v1, dim=1) 30 | denom = norm.view(-1, 1) * norm 31 | res = num / denom 32 | res[torch.isneginf(res)] = 0. 33 | return 0.5 + 0.5 * res -------------------------------------------------------------------------------- /deepcore/methods/methods_utils/euclidean.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | def euclidean_dist(x, y): 6 | m, n = x.size(0), y.size(0) 7 | xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n) 8 | yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t() 9 | dist = xx + yy 10 | dist.addmm_(1, -2, x, y.t()) 11 | dist = dist.clamp(min=1e-12).sqrt() 12 | return dist 13 | 14 | 15 | def euclidean_dist_pair(x): 16 | m = x.size(0) 17 | xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, m) 18 | dist = xx + xx.t() 19 | dist.addmm_(1, -2, x, x.t()) 20 | dist = dist.clamp(min=1e-12).sqrt() 21 | return dist 22 | 23 | def euclidean_dist_np(x, y): 24 | (rowx, colx) = x.shape 25 | (rowy, coly) = y.shape 26 | xy = np.dot(x, y.T) 27 | x2 = np.repeat(np.reshape(np.sum(np.multiply(x, x), axis=1), (rowx, 1)), repeats=rowy, axis=1) 28 | y2 = np.repeat(np.reshape(np.sum(np.multiply(y, y), axis=1), (rowy, 1)), repeats=rowx, axis=1).T 29 | return np.sqrt(np.clip(x2 + y2 - 2. * xy, 1e-12, None)) 30 | 31 | def euclidean_dist_pair_np(x): 32 | (rowx, colx) = x.shape 33 | xy = np.dot(x, x.T) 34 | x2 = np.repeat(np.reshape(np.sum(np.multiply(x, x), axis=1), (rowx, 1)), repeats=rowx, axis=1) 35 | return np.sqrt(np.clip(x2 + x2.T - 2. * xy, 1e-12, None)) 36 | -------------------------------------------------------------------------------- /deepcore/methods/methods_utils/submodular_function.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class SubmodularFunction(object): 5 | def __init__(self, index, similarity_kernel=None, similarity_matrix=None, already_selected=[]): 6 | self.index = index 7 | self.n = len(index) 8 | 9 | self.already_selected = already_selected 10 | 11 | assert similarity_kernel is not None or similarity_matrix is not None 12 | 13 | # For the sample similarity matrix, the method supports two input modes, one is to input a pairwise similarity 14 | # matrix for the whole sample, and the other case allows the input of a similarity kernel to be used to 15 | # calculate similarities incrementally at a later time if required. 16 | if similarity_kernel is not None: 17 | assert callable(similarity_kernel) 18 | self.similarity_kernel = self._similarity_kernel(similarity_kernel) 19 | else: 20 | assert similarity_matrix.shape[0] == self.n and similarity_matrix.shape[1] == self.n 21 | self.similarity_matrix = similarity_matrix 22 | self.similarity_kernel = lambda a, b: self.similarity_matrix[np.ix_(a, b)] 23 | 24 | def _similarity_kernel(self, similarity_kernel): 25 | return similarity_kernel 26 | 27 | 28 | class FacilityLocation(SubmodularFunction): 29 | def __init__(self, **kwargs): 30 | super().__init__(**kwargs) 31 | 32 | if self.already_selected.__len__()==0: 33 | self.cur_max = np.zeros(self.n, dtype=np.float32) 34 | else: 35 | self.cur_max = np.max(self.similarity_kernel(np.arange(self.n), self.already_selected), axis=1) 36 | 37 | self.all_idx = np.ones(self.n, dtype=bool) 38 | 39 | def _similarity_kernel(self, similarity_kernel): 40 | # Initialize a matrix to store similarity values of sample points. 41 | self.sim_matrix = np.zeros([self.n, self.n], dtype=np.float32) 42 | self.if_columns_calculated = np.zeros(self.n, dtype=bool) 43 | 44 | def _func(a, b): 45 | if not np.all(self.if_columns_calculated[b]): 46 | if b.dtype != bool: 47 | temp = ~self.all_idx 48 | temp[b] = True 49 | b = temp 50 | not_calculated = b & ~self.if_columns_calculated 51 | self.sim_matrix[:, not_calculated] = similarity_kernel(self.all_idx, not_calculated) 52 | self.if_columns_calculated[not_calculated] = True 53 | return self.sim_matrix[np.ix_(a, b)] 54 | return _func 55 | 56 | def calc_gain(self, idx_gain, selected, **kwargs): 57 | gains = np.maximum(0., self.similarity_kernel(self.all_idx, idx_gain) - self.cur_max.reshape(-1, 1)).sum(axis=0) 58 | return gains 59 | 60 | def calc_gain_batch(self, idx_gain, selected, **kwargs): 61 | batch_idx = ~self.all_idx 62 | batch_idx[0:kwargs["batch"]] = True 63 | gains = np.maximum(0., self.similarity_kernel(batch_idx, idx_gain) - self.cur_max[batch_idx].reshape(-1, 1)).sum(axis=0) 64 | for i in range(kwargs["batch"], self.n, kwargs["batch"]): 65 | batch_idx = ~self.all_idx 66 | batch_idx[i * kwargs["batch"]:(i + 1) * kwargs["batch"]] = True 67 | gains += np.maximum(0., self.similarity_kernel(batch_idx, idx_gain) - self.cur_max[batch_idx].reshape(-1,1)).sum(axis=0) 68 | return gains 69 | 70 | def update_state(self, new_selection, total_selected, **kwargs): 71 | self.cur_max = np.maximum(self.cur_max, np.max(self.similarity_kernel(self.all_idx, new_selection), axis=1)) 72 | #self.cur_max = np.max(np.append(self.cur_max.reshape(-1, 1), self.similarity_kernel(self.all_idx, new_selection), axis=1), axis=1) 73 | 74 | 75 | class GraphCut(SubmodularFunction): 76 | def __init__(self, lam: float = 1., **kwargs): 77 | super().__init__(**kwargs) 78 | self.lam = lam 79 | 80 | if 'similarity_matrix' in kwargs: 81 | self.sim_matrix_cols_sum = np.sum(self.similarity_matrix, axis=0) 82 | self.all_idx = np.ones(self.n, dtype=bool) 83 | 84 | def _similarity_kernel(self, similarity_kernel): 85 | # Initialize a matrix to store similarity values of sample points. 86 | self.sim_matrix = np.zeros([self.n, self.n], dtype=np.float32) 87 | self.sim_matrix_cols_sum = np.zeros(self.n, dtype=np.float32) 88 | self.if_columns_calculated = np.zeros(self.n, dtype=bool) 89 | 90 | def _func(a, b): 91 | if not np.all(self.if_columns_calculated[b]): 92 | if b.dtype != bool: 93 | temp = ~self.all_idx 94 | temp[b] = True 95 | b = temp 96 | not_calculated = b & ~self.if_columns_calculated 97 | self.sim_matrix[:, not_calculated] = similarity_kernel(self.all_idx, not_calculated) 98 | self.sim_matrix_cols_sum[not_calculated] = np.sum(self.sim_matrix[:, not_calculated], axis=0) 99 | self.if_columns_calculated[not_calculated] = True 100 | return self.sim_matrix[np.ix_(a, b)] 101 | return _func 102 | 103 | def calc_gain(self, idx_gain, selected, **kwargs): 104 | 105 | gain = -2. * np.sum(self.similarity_kernel(selected, idx_gain), axis=0) + self.lam * self.sim_matrix_cols_sum[idx_gain] 106 | 107 | return gain 108 | 109 | def update_state(self, new_selection, total_selected, **kwargs): 110 | pass 111 | 112 | 113 | class LogDeterminant(SubmodularFunction): 114 | def __init__(self, **kwargs): 115 | super().__init__(**kwargs) 116 | 117 | self.all_idx = np.ones(self.n, dtype=bool) 118 | 119 | def _similarity_kernel(self, similarity_kernel): 120 | # Initialize a matrix to store similarity values of sample points. 121 | self.sim_matrix = np.zeros([self.n, self.n], dtype=np.float32) 122 | self.if_columns_calculated = np.zeros(self.n, dtype=bool) 123 | 124 | def _func(a, b): 125 | if not np.all(self.if_columns_calculated[b]): 126 | if b.dtype != bool: 127 | temp = ~self.all_idx 128 | temp[b] = True 129 | b = temp 130 | not_calculated = b & ~self.if_columns_calculated 131 | self.sim_matrix[:, not_calculated] = similarity_kernel(self.all_idx, not_calculated) 132 | self.if_columns_calculated[not_calculated] = True 133 | return self.sim_matrix[np.ix_(a, b)] 134 | return _func 135 | 136 | def calc_gain(self, idx_gain, selected, **kwargs): 137 | # Gain for LogDeterminant can be written as $f(x | A ) = \log\det(S_{a} - S_{a,A}S_{A}^{-1}S_{x,A}^T)$. 138 | sim_idx_gain = self.similarity_kernel(selected, idx_gain).T 139 | sim_selected = self.similarity_kernel(selected, selected) 140 | return (np.dot(sim_idx_gain, np.linalg.pinv(sim_selected)) * sim_idx_gain).sum(-1) 141 | 142 | def update_state(self, new_selection, total_selected, **kwargs): 143 | pass 144 | -------------------------------------------------------------------------------- /deepcore/methods/methods_utils/submodular_optimizer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | optimizer_choices = ["NaiveGreedy", "LazyGreedy", "StochasticGreedy", "ApproximateLazyGreedy"] 5 | 6 | class optimizer(object): 7 | def __init__(self, args, index, budget:int, already_selected=[]): 8 | self.args = args 9 | self.index = index 10 | 11 | if budget <= 0 or budget > index.__len__(): 12 | raise ValueError("Illegal budget for optimizer.") 13 | 14 | self.n = len(index) 15 | self.budget = budget 16 | self.already_selected = already_selected 17 | 18 | 19 | class NaiveGreedy(optimizer): 20 | def __init__(self, args, index, budget:int, already_selected=[]): 21 | super(NaiveGreedy, self).__init__(args, index, budget, already_selected) 22 | 23 | def select(self, gain_function, update_state=None, **kwargs): 24 | assert callable(gain_function) 25 | if update_state is not None: 26 | assert callable(update_state) 27 | selected = np.zeros(self.n, dtype=bool) 28 | selected[self.already_selected] = True 29 | 30 | greedy_gain = np.zeros(len(self.index)) 31 | for i in range(sum(selected), self.budget): 32 | if i % self.args.print_freq == 0: 33 | print("| Selecting [%3d/%3d]" % (i + 1, self.budget)) 34 | greedy_gain[~selected] = gain_function(~selected, selected, **kwargs) 35 | current_selection = greedy_gain.argmax() 36 | selected[current_selection] = True 37 | greedy_gain[current_selection] = -np.inf 38 | if update_state is not None: 39 | update_state(np.array([current_selection]), selected, **kwargs) 40 | return self.index[selected] 41 | 42 | 43 | class LazyGreedy(optimizer): 44 | def __init__(self, args, index, budget:int, already_selected=[]): 45 | super(LazyGreedy, self).__init__(args, index, budget, already_selected) 46 | 47 | def select(self, gain_function, update_state=None, **kwargs): 48 | assert callable(gain_function) 49 | if update_state is not None: 50 | assert callable(update_state) 51 | selected = np.zeros(self.n, dtype=bool) 52 | selected[self.already_selected] = True 53 | 54 | greedy_gain = np.zeros(len(self.index)) 55 | greedy_gain[~selected] = gain_function(~selected, selected, **kwargs) 56 | greedy_gain[selected] = -np.inf 57 | 58 | for i in range(sum(selected), self.budget): 59 | if i % self.args.print_freq == 0: 60 | print("| Selecting [%3d/%3d]" % (i + 1, self.budget)) 61 | best_gain = -np.inf 62 | last_max_element = -1 63 | while True: 64 | cur_max_element = greedy_gain.argmax() 65 | if last_max_element == cur_max_element: 66 | # Select cur_max_element into the current subset 67 | selected[cur_max_element] = True 68 | greedy_gain[cur_max_element] = -np.inf 69 | 70 | if update_state is not None: 71 | update_state(np.array([cur_max_element]), selected, **kwargs) 72 | break 73 | new_gain = gain_function(np.array([cur_max_element]), selected, **kwargs)[0] 74 | greedy_gain[cur_max_element] = new_gain 75 | if new_gain >= best_gain: 76 | best_gain = new_gain 77 | last_max_element = cur_max_element 78 | return self.index[selected] 79 | 80 | 81 | class StochasticGreedy(optimizer): 82 | def __init__(self, args, index, budget:int, already_selected=[], epsilon: float=0.9): 83 | super(StochasticGreedy, self).__init__(args, index, budget, already_selected) 84 | self.epsilon = epsilon 85 | 86 | def select(self, gain_function, update_state=None, **kwargs): 87 | assert callable(gain_function) 88 | if update_state is not None: 89 | assert callable(update_state) 90 | selected = np.zeros(self.n, dtype=bool) 91 | selected[self.already_selected] = True 92 | 93 | sample_size = max(round(-np.log(self.epsilon) * self.n / self.budget), 1) 94 | 95 | greedy_gain = np.zeros(len(self.index)) 96 | all_idx = np.arange(self.n) 97 | for i in range(sum(selected), self.budget): 98 | if i % self.args.print_freq == 0: 99 | print("| Selecting [%3d/%3d]" % (i + 1, self.budget)) 100 | 101 | # Uniformly select a subset from unselected samples with size sample_size 102 | subset = np.random.choice(all_idx[~selected], replace=False, size=min(sample_size, self.n - i)) 103 | 104 | if subset.__len__() == 0: 105 | break 106 | 107 | greedy_gain[subset] = gain_function(subset, selected, **kwargs) 108 | current_selection = greedy_gain[subset].argmax() 109 | selected[subset[current_selection]] = True 110 | greedy_gain[subset[current_selection]] = -np.inf 111 | if update_state is not None: 112 | update_state(np.array([subset[current_selection]]), selected, **kwargs) 113 | return self.index[selected] 114 | 115 | 116 | class ApproximateLazyGreedy(optimizer): 117 | def __init__(self, args, index, budget:int, already_selected=[], beta: float=0.9): 118 | super(ApproximateLazyGreedy, self).__init__(args, index, budget, already_selected) 119 | self.beta = beta 120 | 121 | def select(self, gain_function, update_state=None, **kwargs): 122 | assert callable(gain_function) 123 | if update_state is not None: 124 | assert callable(update_state) 125 | selected = np.zeros(self.n, dtype=bool) 126 | selected[self.already_selected] = True 127 | 128 | greedy_gain = np.zeros(len(self.index)) 129 | greedy_gain[~selected] = gain_function(~selected, selected, **kwargs) 130 | greedy_gain[selected] = -np.inf 131 | 132 | for i in range(sum(selected), self.budget): 133 | if i % self.args.print_freq == 0: 134 | print("| Selecting [%3d/%3d]" % (i + 1, self.budget)) 135 | while True: 136 | cur_max_element = greedy_gain.argmax() 137 | max_gain = greedy_gain[cur_max_element] 138 | 139 | new_gain = gain_function(np.array([cur_max_element]), selected, **kwargs)[0] 140 | 141 | if new_gain >= self.beta * max_gain: 142 | # Select cur_max_element into the current subset 143 | selected[cur_max_element] = True 144 | greedy_gain[cur_max_element] = -np.inf 145 | 146 | if update_state is not None: 147 | update_state(np.array([cur_max_element]), selected, **kwargs) 148 | break 149 | else: 150 | greedy_gain[cur_max_element] = new_gain 151 | return self.index[selected] 152 | 153 | 154 | 155 | 156 | -------------------------------------------------------------------------------- /deepcore/methods/submodular.py: -------------------------------------------------------------------------------- 1 | from .earlytrain import EarlyTrain 2 | import numpy as np 3 | import torch 4 | from .methods_utils import cossim_np, submodular_function, submodular_optimizer 5 | from ..nets.nets_utils import MyDataParallel 6 | 7 | 8 | class Submodular(EarlyTrain): 9 | def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None, balance=False, 10 | function="LogDeterminant", greedy="ApproximateLazyGreedy", metric="cossim", **kwargs): 11 | super(Submodular, self).__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs) 12 | 13 | if greedy not in submodular_optimizer.optimizer_choices: 14 | raise ModuleNotFoundError("Greedy optimizer not found.") 15 | self._greedy = greedy 16 | self._metric = metric 17 | self._function = function 18 | 19 | self.balance = balance 20 | 21 | def before_train(self): 22 | pass 23 | 24 | def after_loss(self, outputs, loss, targets, batch_inds, epoch): 25 | pass 26 | 27 | def before_epoch(self): 28 | pass 29 | 30 | def after_epoch(self): 31 | pass 32 | 33 | def before_run(self): 34 | pass 35 | 36 | def num_classes_mismatch(self): 37 | raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.") 38 | 39 | def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size): 40 | if batch_idx % self.args.print_freq == 0: 41 | print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % ( 42 | epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item())) 43 | 44 | def calc_gradient(self, index=None): 45 | ''' 46 | Calculate gradients matrix on current network for specified training dataset. 47 | ''' 48 | self.model.eval() 49 | 50 | batch_loader = torch.utils.data.DataLoader( 51 | self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index), 52 | batch_size=self.args.selection_batch, 53 | num_workers=self.args.workers) 54 | sample_num = self.n_train if index is None else len(index) 55 | 56 | self.embedding_dim = self.model.get_last_layer().in_features 57 | 58 | # Initialize a matrix to save gradients. 59 | # (on cpu) 60 | gradients = [] 61 | 62 | for i, (input, targets) in enumerate(batch_loader): 63 | self.model_optimizer.zero_grad() 64 | outputs = self.model(input.to(self.args.device)) 65 | loss = self.criterion(outputs.requires_grad_(True), 66 | targets.to(self.args.device)).sum() 67 | batch_num = targets.shape[0] 68 | with torch.no_grad(): 69 | bias_parameters_grads = torch.autograd.grad(loss, outputs)[0] 70 | weight_parameters_grads = self.model.embedding_recorder.embedding.view(batch_num, 1, 71 | self.embedding_dim).repeat(1, self.args.num_classes, 1) *\ 72 | bias_parameters_grads.view(batch_num, self.args.num_classes, 73 | 1).repeat(1, 1, self.embedding_dim) 74 | gradients.append(torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)], 75 | dim=1).cpu().numpy()) 76 | 77 | gradients = np.concatenate(gradients, axis=0) 78 | return gradients 79 | 80 | def finish_run(self): 81 | if isinstance(self.model, MyDataParallel): 82 | self.model = self.model.module 83 | 84 | # Turn on the embedding recorder and the no_grad flag 85 | with self.model.embedding_recorder: 86 | self.model.no_grad = True 87 | self.train_indx = np.arange(self.n_train) 88 | 89 | if self.balance: 90 | selection_result = np.array([], dtype=np.int64) 91 | for c in range(self.num_classes): 92 | c_indx = self.train_indx[self.dst_train.targets == c] 93 | # Calculate gradients into a matrix 94 | gradients = self.calc_gradient(index=c_indx) 95 | # Instantiate a submodular function 96 | submod_function = submodular_function.__dict__[self._function](index=c_indx, 97 | similarity_kernel=lambda a, b:cossim_np(gradients[a], gradients[b])) 98 | submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args, 99 | index=c_indx, budget=round(self.fraction * len(c_indx)), already_selected=[]) 100 | 101 | c_selection_result = submod_optimizer.select(gain_function=submod_function.calc_gain, 102 | update_state=submod_function.update_state) 103 | selection_result = np.append(selection_result, c_selection_result) 104 | else: 105 | # Calculate gradients into a matrix 106 | gradients = self.calc_gradient() 107 | # Instantiate a submodular function 108 | submod_function = submodular_function.__dict__[self._function](index=self.train_indx, 109 | similarity_kernel=lambda a, b: cossim_np(gradients[a], gradients[b])) 110 | submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args, index=self.train_indx, 111 | budget=self.coreset_size) 112 | selection_result = submod_optimizer.select(gain_function=submod_function.calc_gain, 113 | update_state=submod_function.update_state) 114 | 115 | self.model.no_grad = False 116 | return {"indices": selection_result} 117 | 118 | def select(self, **kwargs): 119 | selection_result = self.run() 120 | return selection_result 121 | 122 | 123 | -------------------------------------------------------------------------------- /deepcore/methods/uncertainty.py: -------------------------------------------------------------------------------- 1 | from .earlytrain import EarlyTrain 2 | import torch 3 | import numpy as np 4 | 5 | 6 | class Uncertainty(EarlyTrain): 7 | def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, selection_method="LeastConfidence", 8 | specific_model=None, balance=False, **kwargs): 9 | super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs) 10 | 11 | selection_choices = ["LeastConfidence", 12 | "Entropy", 13 | "Margin"] 14 | if selection_method not in selection_choices: 15 | raise NotImplementedError("Selection algorithm unavailable.") 16 | self.selection_method = selection_method 17 | 18 | self.epochs = epochs 19 | self.balance = balance 20 | 21 | def before_train(self): 22 | pass 23 | 24 | def after_loss(self, outputs, loss, targets, batch_inds, epoch): 25 | pass 26 | 27 | def before_epoch(self): 28 | pass 29 | 30 | def after_epoch(self): 31 | pass 32 | 33 | def before_run(self): 34 | pass 35 | 36 | def num_classes_mismatch(self): 37 | raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.") 38 | 39 | def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size): 40 | if batch_idx % self.args.print_freq == 0: 41 | print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % ( 42 | epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item())) 43 | 44 | def finish_run(self): 45 | if self.balance: 46 | selection_result = np.array([], dtype=np.int64) 47 | scores = [] 48 | for c in range(self.args.num_classes): 49 | class_index = np.arange(self.n_train)[self.dst_train.targets == c] 50 | scores.append(self.rank_uncertainty(class_index)) 51 | selection_result = np.append(selection_result, class_index[np.argsort(scores[-1])[ 52 | :round(len(class_index) * self.fraction)]]) 53 | else: 54 | scores = self.rank_uncertainty() 55 | selection_result = np.argsort(scores)[::-1][:self.coreset_size] 56 | return {"indices": selection_result, "scores": scores} 57 | 58 | def rank_uncertainty(self, index=None): 59 | self.model.eval() 60 | with torch.no_grad(): 61 | train_loader = torch.utils.data.DataLoader( 62 | self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index), 63 | batch_size=self.args.selection_batch, 64 | num_workers=self.args.workers) 65 | 66 | scores = np.array([]) 67 | batch_num = len(train_loader) 68 | 69 | for i, (input, _) in enumerate(train_loader): 70 | if i % self.args.print_freq == 0: 71 | print("| Selecting for batch [%3d/%3d]" % (i + 1, batch_num)) 72 | if self.selection_method == "LeastConfidence": 73 | scores = np.append(scores, self.model(input.to(self.args.device)).max(axis=1).values.cpu().numpy()) 74 | elif self.selection_method == "Entropy": 75 | preds = torch.nn.functional.softmax(self.model(input.to(self.args.device)), dim=1).cpu().numpy() 76 | scores = np.append(scores, (np.log(preds + 1e-6) * preds).sum(axis=1)) 77 | elif self.selection_method == 'Margin': 78 | preds = torch.nn.functional.softmax(self.model(input.to(self.args.device)), dim=1) 79 | preds_argmax = torch.argmax(preds, dim=1) 80 | max_preds = preds[torch.ones(preds.shape[0], dtype=bool), preds_argmax].clone() 81 | preds[torch.ones(preds.shape[0], dtype=bool), preds_argmax] = -1.0 82 | preds_sub_argmax = torch.argmax(preds, dim=1) 83 | scores = np.append(scores, (max_preds - preds[ 84 | torch.ones(preds.shape[0], dtype=bool), preds_sub_argmax]).cpu().numpy()) 85 | return scores 86 | 87 | def select(self, **kwargs): 88 | selection_result = self.run() 89 | return selection_result 90 | -------------------------------------------------------------------------------- /deepcore/methods/uniform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .coresetmethod import CoresetMethod 3 | 4 | 5 | class Uniform(CoresetMethod): 6 | def __init__(self, dst_train, args, fraction=0.5, random_seed=None, balance=False, replace=False, **kwargs): 7 | super().__init__(dst_train, args, fraction, random_seed) 8 | self.balance = balance 9 | self.replace = replace 10 | self.n_train = len(dst_train) 11 | 12 | def select_balance(self): 13 | """The same sampling proportions were used in each class separately.""" 14 | np.random.seed(self.random_seed) 15 | self.index = np.array([], dtype=np.int64) 16 | all_index = np.arange(self.n_train) 17 | for c in range(self.num_classes): 18 | c_index = (self.dst_train.targets == c) 19 | self.index = np.append(self.index, 20 | np.random.choice(all_index[c_index], round(self.fraction * c_index.sum().item()), 21 | replace=self.replace)) 22 | return self.index 23 | 24 | def select_no_balance(self): 25 | np.random.seed(self.random_seed) 26 | self.index = np.random.choice(np.arange(self.n_train), round(self.n_train * self.fraction), 27 | replace=self.replace) 28 | 29 | return self.index 30 | 31 | def select(self, **kwargs): 32 | return {"indices": self.select_balance() if self.balance else self.select_no_balance()} 33 | -------------------------------------------------------------------------------- /deepcore/nets/__init__.py: -------------------------------------------------------------------------------- 1 | from .alexnet import * 2 | from .inceptionv3 import * 3 | from .lenet import * 4 | from .mlp import * 5 | from .mobilenetv3 import * 6 | from .resnet import * 7 | from .vgg import * 8 | from .wideresnet import * 9 | -------------------------------------------------------------------------------- /deepcore/nets/alexnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch import set_grad_enabled 3 | from torchvision import models 4 | import torch 5 | from .nets_utils import EmbeddingRecorder 6 | 7 | 8 | # Acknowledgement to 9 | # https://github.com/kuangliu/pytorch-cifar, 10 | # https://github.com/BIGBALLON/CIFAR-ZOO, 11 | 12 | class AlexNet_32x32(nn.Module): 13 | def __init__(self, channel, num_classes, record_embedding=False, no_grad=False): 14 | super().__init__() 15 | self.features = nn.Sequential( 16 | nn.Conv2d(channel, 128, kernel_size=5, stride=1, padding=4 if channel == 1 else 2), 17 | nn.ReLU(inplace=True), 18 | nn.MaxPool2d(kernel_size=2, stride=2), 19 | nn.Conv2d(128, 192, kernel_size=5, padding=2), 20 | nn.ReLU(inplace=True), 21 | nn.MaxPool2d(kernel_size=2, stride=2), 22 | nn.Conv2d(192, 256, kernel_size=3, padding=1), 23 | nn.ReLU(inplace=True), 24 | nn.Conv2d(256, 192, kernel_size=3, padding=1), 25 | nn.ReLU(inplace=True), 26 | nn.Conv2d(192, 192, kernel_size=3, padding=1), 27 | nn.ReLU(inplace=True), 28 | nn.MaxPool2d(kernel_size=2, stride=2), 29 | ) 30 | self.fc = nn.Linear(192 * 4 * 4, num_classes) 31 | 32 | self.embedding_recorder = EmbeddingRecorder(record_embedding) 33 | self.no_grad = no_grad 34 | 35 | def get_last_layer(self): 36 | return self.fc 37 | 38 | def forward(self, x): 39 | with set_grad_enabled(not self.no_grad): 40 | x = self.features(x) 41 | x = x.view(x.size(0), -1) 42 | x = self.embedding_recorder(x) 43 | x = self.fc(x) 44 | return x 45 | 46 | 47 | class AlexNet_224x224(models.AlexNet): 48 | def __init__(self, channel: int, num_classes: int, record_embedding: bool = False, 49 | no_grad: bool = False, **kwargs): 50 | super().__init__(num_classes, **kwargs) 51 | self.embedding_recorder = EmbeddingRecorder(record_embedding) 52 | if channel != 3: 53 | self.features[0] = nn.Conv2d(channel, 64, kernel_size=11, stride=4, padding=2) 54 | self.fc = self.classifier[-1] 55 | self.classifier[-1] = self.embedding_recorder 56 | self.classifier.add_module("fc", self.fc) 57 | 58 | self.no_grad = no_grad 59 | 60 | def get_last_layer(self): 61 | return self.fc 62 | 63 | def forward(self, x: torch.Tensor) -> torch.Tensor: 64 | with set_grad_enabled(not self.no_grad): 65 | x = self.features(x) 66 | x = self.avgpool(x) 67 | x = torch.flatten(x, 1) 68 | x = self.classifier(x) 69 | return x 70 | 71 | 72 | def AlexNet(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, 73 | pretrained: bool = False): 74 | if pretrained: 75 | if im_size[0] != 224 or im_size[1] != 224: 76 | raise NotImplementedError("torchvison pretrained models only accept inputs with size of 224*224") 77 | net = AlexNet_224x224(channel=3, num_classes=1000, record_embedding=record_embedding, no_grad=no_grad) 78 | 79 | from torch.hub import load_state_dict_from_url 80 | state_dict = load_state_dict_from_url('https://download.pytorch.org/models/alexnet-owt-7be5be79.pth' 81 | , progress=True) 82 | net.load_state_dict(state_dict) 83 | 84 | if channel != 3: 85 | net.features[0] = nn.Conv2d(channel, 64, kernel_size=11, stride=4, padding=2) 86 | if num_classes != 1000: 87 | net.fc = nn.Linear(4096, num_classes) 88 | net.classifier[-1] = net.fc 89 | 90 | elif im_size[0] == 224 and im_size[1] == 224: 91 | net = AlexNet_224x224(channel=channel, num_classes=num_classes, record_embedding=record_embedding, 92 | no_grad=no_grad) 93 | 94 | elif (channel == 1 and im_size[0] == 28 and im_size[1] == 28) or ( 95 | channel == 3 and im_size[0] == 32 and im_size[1] == 32): 96 | net = AlexNet_32x32(channel=channel, num_classes=num_classes, record_embedding=record_embedding, 97 | no_grad=no_grad) 98 | else: 99 | raise NotImplementedError("Network Architecture for current dataset has not been implemented.") 100 | return net 101 | -------------------------------------------------------------------------------- /deepcore/nets/inceptionv3.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torchvision.models import inception 4 | from .nets_utils import EmbeddingRecorder 5 | 6 | 7 | class BasicConv2d(nn.Module): 8 | 9 | def __init__(self, input_channels, output_channels, **kwargs): 10 | super().__init__() 11 | self.conv = nn.Conv2d(input_channels, output_channels, bias=False, **kwargs) 12 | self.bn = nn.BatchNorm2d(output_channels) 13 | self.relu = nn.ReLU(inplace=True) 14 | 15 | def forward(self, x): 16 | x = self.conv(x) 17 | x = self.bn(x) 18 | x = self.relu(x) 19 | 20 | return x 21 | 22 | 23 | # same naive inception module 24 | class InceptionA(nn.Module): 25 | 26 | def __init__(self, input_channels, pool_features): 27 | super().__init__() 28 | self.branch1x1 = BasicConv2d(input_channels, 64, kernel_size=1) 29 | 30 | self.branch5x5 = nn.Sequential( 31 | BasicConv2d(input_channels, 48, kernel_size=1), 32 | BasicConv2d(48, 64, kernel_size=5, padding=2) 33 | ) 34 | 35 | self.branch3x3 = nn.Sequential( 36 | BasicConv2d(input_channels, 64, kernel_size=1), 37 | BasicConv2d(64, 96, kernel_size=3, padding=1), 38 | BasicConv2d(96, 96, kernel_size=3, padding=1) 39 | ) 40 | 41 | self.branchpool = nn.Sequential( 42 | nn.AvgPool2d(kernel_size=3, stride=1, padding=1), 43 | BasicConv2d(input_channels, pool_features, kernel_size=3, padding=1) 44 | ) 45 | 46 | def forward(self, x): 47 | # x -> 1x1(same) 48 | branch1x1 = self.branch1x1(x) 49 | 50 | # x -> 1x1 -> 5x5(same) 51 | branch5x5 = self.branch5x5(x) 52 | # branch5x5 = self.branch5x5_2(branch5x5) 53 | 54 | # x -> 1x1 -> 3x3 -> 3x3(same) 55 | branch3x3 = self.branch3x3(x) 56 | 57 | # x -> pool -> 1x1(same) 58 | branchpool = self.branchpool(x) 59 | 60 | outputs = [branch1x1, branch5x5, branch3x3, branchpool] 61 | 62 | return torch.cat(outputs, 1) 63 | 64 | 65 | # downsample 66 | # Factorization into smaller convolutions 67 | class InceptionB(nn.Module): 68 | 69 | def __init__(self, input_channels): 70 | super().__init__() 71 | 72 | self.branch3x3 = BasicConv2d(input_channels, 384, kernel_size=3, stride=2) 73 | 74 | self.branch3x3stack = nn.Sequential( 75 | BasicConv2d(input_channels, 64, kernel_size=1), 76 | BasicConv2d(64, 96, kernel_size=3, padding=1), 77 | BasicConv2d(96, 96, kernel_size=3, stride=2) 78 | ) 79 | 80 | self.branchpool = nn.MaxPool2d(kernel_size=3, stride=2) 81 | 82 | def forward(self, x): 83 | # x - > 3x3(downsample) 84 | branch3x3 = self.branch3x3(x) 85 | 86 | # x -> 3x3 -> 3x3(downsample) 87 | branch3x3stack = self.branch3x3stack(x) 88 | 89 | # x -> avgpool(downsample) 90 | branchpool = self.branchpool(x) 91 | 92 | # """We can use two parallel stride 2 blocks: P and C. P is a pooling 93 | # layer (either average or maximum pooling) the activation, both of 94 | # them are stride 2 the filter banks of which are concatenated as in 95 | # figure 10.""" 96 | outputs = [branch3x3, branch3x3stack, branchpool] 97 | 98 | return torch.cat(outputs, 1) 99 | 100 | 101 | # Factorizing Convolutions with Large Filter Size 102 | class InceptionC(nn.Module): 103 | def __init__(self, input_channels, channels_7x7): 104 | super().__init__() 105 | self.branch1x1 = BasicConv2d(input_channels, 192, kernel_size=1) 106 | 107 | c7 = channels_7x7 108 | 109 | # In theory, we could go even further and argue that one can replace any n × n 110 | # convolution by a 1 × n convolution followed by a n × 1 convolution and the 111 | # computational cost saving increases dramatically as n grows (see figure 6). 112 | self.branch7x7 = nn.Sequential( 113 | BasicConv2d(input_channels, c7, kernel_size=1), 114 | BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)), 115 | BasicConv2d(c7, 192, kernel_size=(1, 7), padding=(0, 3)) 116 | ) 117 | 118 | self.branch7x7stack = nn.Sequential( 119 | BasicConv2d(input_channels, c7, kernel_size=1), 120 | BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)), 121 | BasicConv2d(c7, c7, kernel_size=(1, 7), padding=(0, 3)), 122 | BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)), 123 | BasicConv2d(c7, 192, kernel_size=(1, 7), padding=(0, 3)) 124 | ) 125 | 126 | self.branch_pool = nn.Sequential( 127 | nn.AvgPool2d(kernel_size=3, stride=1, padding=1), 128 | BasicConv2d(input_channels, 192, kernel_size=1), 129 | ) 130 | 131 | def forward(self, x): 132 | # x -> 1x1(same) 133 | branch1x1 = self.branch1x1(x) 134 | 135 | # x -> 1layer 1*7 and 7*1 (same) 136 | branch7x7 = self.branch7x7(x) 137 | 138 | # x-> 2layer 1*7 and 7*1(same) 139 | branch7x7stack = self.branch7x7stack(x) 140 | 141 | # x-> avgpool (same) 142 | branchpool = self.branch_pool(x) 143 | 144 | outputs = [branch1x1, branch7x7, branch7x7stack, branchpool] 145 | 146 | return torch.cat(outputs, 1) 147 | 148 | 149 | class InceptionD(nn.Module): 150 | 151 | def __init__(self, input_channels): 152 | super().__init__() 153 | 154 | self.branch3x3 = nn.Sequential( 155 | BasicConv2d(input_channels, 192, kernel_size=1), 156 | BasicConv2d(192, 320, kernel_size=3, stride=2) 157 | ) 158 | 159 | self.branch7x7 = nn.Sequential( 160 | BasicConv2d(input_channels, 192, kernel_size=1), 161 | BasicConv2d(192, 192, kernel_size=(1, 7), padding=(0, 3)), 162 | BasicConv2d(192, 192, kernel_size=(7, 1), padding=(3, 0)), 163 | BasicConv2d(192, 192, kernel_size=3, stride=2) 164 | ) 165 | 166 | self.branchpool = nn.AvgPool2d(kernel_size=3, stride=2) 167 | 168 | def forward(self, x): 169 | # x -> 1x1 -> 3x3(downsample) 170 | branch3x3 = self.branch3x3(x) 171 | 172 | # x -> 1x1 -> 1x7 -> 7x1 -> 3x3 (downsample) 173 | branch7x7 = self.branch7x7(x) 174 | 175 | # x -> avgpool (downsample) 176 | branchpool = self.branchpool(x) 177 | 178 | outputs = [branch3x3, branch7x7, branchpool] 179 | 180 | return torch.cat(outputs, 1) 181 | 182 | 183 | # same 184 | class InceptionE(nn.Module): 185 | def __init__(self, input_channels): 186 | super().__init__() 187 | self.branch1x1 = BasicConv2d(input_channels, 320, kernel_size=1) 188 | 189 | self.branch3x3_1 = BasicConv2d(input_channels, 384, kernel_size=1) 190 | self.branch3x3_2a = BasicConv2d(384, 384, kernel_size=(1, 3), padding=(0, 1)) 191 | self.branch3x3_2b = BasicConv2d(384, 384, kernel_size=(3, 1), padding=(1, 0)) 192 | 193 | self.branch3x3stack_1 = BasicConv2d(input_channels, 448, kernel_size=1) 194 | self.branch3x3stack_2 = BasicConv2d(448, 384, kernel_size=3, padding=1) 195 | self.branch3x3stack_3a = BasicConv2d(384, 384, kernel_size=(1, 3), padding=(0, 1)) 196 | self.branch3x3stack_3b = BasicConv2d(384, 384, kernel_size=(3, 1), padding=(1, 0)) 197 | 198 | self.branch_pool = nn.Sequential( 199 | nn.AvgPool2d(kernel_size=3, stride=1, padding=1), 200 | BasicConv2d(input_channels, 192, kernel_size=1) 201 | ) 202 | 203 | def forward(self, x): 204 | # x -> 1x1 (same) 205 | branch1x1 = self.branch1x1(x) 206 | 207 | # x -> 1x1 -> 3x1 208 | # x -> 1x1 -> 1x3 209 | # concatenate(3x1, 1x3) 210 | # """7. Inception modules with expanded the filter bank outputs. 211 | # This architecture is used on the coarsest (8 × 8) grids to promote 212 | # high dimensional representations, as suggested by principle 213 | # 2 of Section 2.""" 214 | branch3x3 = self.branch3x3_1(x) 215 | branch3x3 = [ 216 | self.branch3x3_2a(branch3x3), 217 | self.branch3x3_2b(branch3x3) 218 | ] 219 | branch3x3 = torch.cat(branch3x3, 1) 220 | 221 | # x -> 1x1 -> 3x3 -> 1x3 222 | # x -> 1x1 -> 3x3 -> 3x1 223 | # concatenate(1x3, 3x1) 224 | branch3x3stack = self.branch3x3stack_1(x) 225 | branch3x3stack = self.branch3x3stack_2(branch3x3stack) 226 | branch3x3stack = [ 227 | self.branch3x3stack_3a(branch3x3stack), 228 | self.branch3x3stack_3b(branch3x3stack) 229 | ] 230 | branch3x3stack = torch.cat(branch3x3stack, 1) 231 | 232 | branchpool = self.branch_pool(x) 233 | 234 | outputs = [branch1x1, branch3x3, branch3x3stack, branchpool] 235 | 236 | return torch.cat(outputs, 1) 237 | 238 | 239 | class InceptionV3_32x32(nn.Module): 240 | 241 | def __init__(self, channel, num_classes, record_embedding=False, no_grad=False): 242 | super().__init__() 243 | self.Conv2d_1a_3x3 = BasicConv2d(channel, 32, kernel_size=3, padding=3 if channel == 1 else 1) 244 | self.Conv2d_2a_3x3 = BasicConv2d(32, 32, kernel_size=3, padding=1) 245 | self.Conv2d_2b_3x3 = BasicConv2d(32, 64, kernel_size=3, padding=1) 246 | self.Conv2d_3b_1x1 = BasicConv2d(64, 80, kernel_size=1) 247 | self.Conv2d_4a_3x3 = BasicConv2d(80, 192, kernel_size=3) 248 | 249 | # naive inception module 250 | self.Mixed_5b = InceptionA(192, pool_features=32) 251 | self.Mixed_5c = InceptionA(256, pool_features=64) 252 | self.Mixed_5d = InceptionA(288, pool_features=64) 253 | 254 | # downsample 255 | self.Mixed_6a = InceptionB(288) 256 | 257 | self.Mixed_6b = InceptionC(768, channels_7x7=128) 258 | self.Mixed_6c = InceptionC(768, channels_7x7=160) 259 | self.Mixed_6d = InceptionC(768, channels_7x7=160) 260 | self.Mixed_6e = InceptionC(768, channels_7x7=192) 261 | 262 | # downsample 263 | self.Mixed_7a = InceptionD(768) 264 | 265 | self.Mixed_7b = InceptionE(1280) 266 | self.Mixed_7c = InceptionE(2048) 267 | 268 | # 6*6 feature size 269 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 270 | self.dropout = nn.Dropout2d() 271 | self.linear = nn.Linear(2048, num_classes) 272 | 273 | self.embedding_recorder = EmbeddingRecorder(record_embedding) 274 | self.no_grad = no_grad 275 | 276 | def get_last_layer(self): 277 | return self.linear 278 | 279 | def forward(self, x): 280 | with torch.set_grad_enabled(not self.no_grad): 281 | # 32 -> 30 282 | x = self.Conv2d_1a_3x3(x) 283 | x = self.Conv2d_2a_3x3(x) 284 | x = self.Conv2d_2b_3x3(x) 285 | x = self.Conv2d_3b_1x1(x) 286 | x = self.Conv2d_4a_3x3(x) 287 | 288 | # 30 -> 30 289 | x = self.Mixed_5b(x) 290 | x = self.Mixed_5c(x) 291 | x = self.Mixed_5d(x) 292 | 293 | # 30 -> 14 294 | # Efficient Grid Size Reduction to avoid representation 295 | # bottleneck 296 | x = self.Mixed_6a(x) 297 | 298 | # 14 -> 14 299 | # """In practice, we have found that employing this factorization does not 300 | # work well on early layers, but it gives very good results on medium 301 | # grid-sizes (On m × m feature maps, where m ranges between 12 and 20). 302 | # On that level, very good results can be achieved by using 1 × 7 convolutions 303 | # followed by 7 × 1 convolutions.""" 304 | x = self.Mixed_6b(x) 305 | x = self.Mixed_6c(x) 306 | x = self.Mixed_6d(x) 307 | x = self.Mixed_6e(x) 308 | 309 | # 14 -> 6 310 | # Efficient Grid Size Reduction 311 | x = self.Mixed_7a(x) 312 | 313 | # 6 -> 6 314 | # We are using this solution only on the coarsest grid, 315 | # since that is the place where producing high dimensional 316 | # sparse representation is the most critical as the ratio of 317 | # local processing (by 1 × 1 convolutions) is increased compared 318 | # to the spatial aggregation.""" 319 | x = self.Mixed_7b(x) 320 | x = self.Mixed_7c(x) 321 | 322 | # 6 -> 1 323 | x = self.avgpool(x) 324 | x = self.dropout(x) 325 | x = x.view(x.size(0), -1) 326 | x = self.embedding_recorder(x) 327 | x = self.linear(x) 328 | return x 329 | 330 | 331 | class InceptionV3_224x224(inception.Inception3): 332 | def __init__(self, channel: int, num_classes: int, record_embedding: bool = False, 333 | no_grad: bool = False, **kwargs): 334 | super().__init__(num_classes=num_classes, **kwargs) 335 | self.embedding_recorder = EmbeddingRecorder(record_embedding) 336 | if channel != 3: 337 | self.Conv2d_1a_3x3 = inception.conv_block(channel, 32, kernel_size=3, stride=2) 338 | self.no_grad = no_grad 339 | 340 | def get_last_layer(self): 341 | return self.fc 342 | 343 | def _forward(self, x): 344 | with torch.set_grad_enabled(not self.no_grad): 345 | # N x 3 x 299 x 299 346 | x = self.Conv2d_1a_3x3(x) 347 | # N x 32 x 149 x 149 348 | x = self.Conv2d_2a_3x3(x) 349 | # N x 32 x 147 x 147 350 | x = self.Conv2d_2b_3x3(x) 351 | # N x 64 x 147 x 147 352 | x = self.maxpool1(x) 353 | # N x 64 x 73 x 73 354 | x = self.Conv2d_3b_1x1(x) 355 | # N x 80 x 73 x 73 356 | x = self.Conv2d_4a_3x3(x) 357 | # N x 192 x 71 x 71 358 | x = self.maxpool2(x) 359 | # N x 192 x 35 x 35 360 | x = self.Mixed_5b(x) 361 | # N x 256 x 35 x 35 362 | x = self.Mixed_5c(x) 363 | # N x 288 x 35 x 35 364 | x = self.Mixed_5d(x) 365 | # N x 288 x 35 x 35 366 | x = self.Mixed_6a(x) 367 | # N x 768 x 17 x 17 368 | x = self.Mixed_6b(x) 369 | # N x 768 x 17 x 17 370 | x = self.Mixed_6c(x) 371 | # N x 768 x 17 x 17 372 | x = self.Mixed_6d(x) 373 | # N x 768 x 17 x 17 374 | x = self.Mixed_6e(x) 375 | # N x 768 x 17 x 17 376 | aux = None 377 | if self.AuxLogits is not None: 378 | if self.training: 379 | aux = self.AuxLogits(x) 380 | # N x 768 x 17 x 17 381 | x = self.Mixed_7a(x) 382 | # N x 1280 x 8 x 8 383 | x = self.Mixed_7b(x) 384 | # N x 2048 x 8 x 8 385 | x = self.Mixed_7c(x) 386 | # N x 2048 x 8 x 8 387 | # Adaptive average pooling 388 | x = self.avgpool(x) 389 | # N x 2048 x 1 x 1 390 | x = self.dropout(x) 391 | # N x 2048 x 1 x 1 392 | x = torch.flatten(x, 1) 393 | # N x 2048 394 | x = self.embedding_recorder(x) 395 | x = self.fc(x) 396 | # N x 1000 (num_classes) 397 | return x, aux 398 | 399 | 400 | def InceptionV3(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, 401 | pretrained: bool = False): 402 | if pretrained: 403 | if im_size[0] != 224 or im_size[1] != 224: 404 | raise NotImplementedError("torchvison pretrained models only accept inputs with size of 224*224") 405 | net = InceptionV3_224x224(channel=3, num_classes=1000, record_embedding=record_embedding, no_grad=no_grad) 406 | 407 | from torch.hub import load_state_dict_from_url 408 | state_dict = load_state_dict_from_url(inception.model_urls["inception_v3_google"], progress=True) 409 | net.load_state_dict(state_dict) 410 | 411 | if channel != 3: 412 | net.Conv2d_1a_3x3 = inception.conv_block(channel, 32, kernel_size=3, stride=2) 413 | if num_classes != 1000: 414 | net.fc = nn.Linear(net.fc.in_features, num_classes) 415 | 416 | elif im_size[0] == 224 and im_size[1] == 224: 417 | net = InceptionV3_224x224(channel=channel, num_classes=num_classes, record_embedding=record_embedding, 418 | no_grad=no_grad) 419 | elif (channel == 1 and im_size[0] == 28 and im_size[1] == 28) or ( 420 | channel == 3 and im_size[0] == 32 and im_size[1] == 32): 421 | net = InceptionV3_32x32(channel=channel, num_classes=num_classes, record_embedding=record_embedding, 422 | no_grad=no_grad) 423 | else: 424 | raise NotImplementedError("Network Architecture for current dataset has not been implemented.") 425 | 426 | return net 427 | -------------------------------------------------------------------------------- /deepcore/nets/lenet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from torch import set_grad_enabled 4 | from .nets_utils import EmbeddingRecorder 5 | 6 | 7 | # Acknowledgement to 8 | # https://github.com/kuangliu/pytorch-cifar, 9 | # https://github.com/BIGBALLON/CIFAR-ZOO, 10 | 11 | class LeNet(nn.Module): 12 | def __init__(self, channel, num_classes, im_size, record_embedding: bool = False, no_grad: bool = False, 13 | pretrained: bool = False): 14 | if pretrained: 15 | raise NotImplementedError("torchvison pretrained models not available.") 16 | super(LeNet, self).__init__() 17 | self.features = nn.Sequential( 18 | nn.Conv2d(channel, 6, kernel_size=5, padding=2 if channel == 1 else 0), 19 | nn.ReLU(inplace=True), 20 | nn.MaxPool2d(kernel_size=2, stride=2), 21 | nn.Conv2d(6, 16, kernel_size=5), 22 | nn.ReLU(inplace=True), 23 | nn.MaxPool2d(kernel_size=2, stride=2), 24 | ) 25 | self.fc_1 = nn.Linear(16 * 53 * 53 if im_size[0] == im_size[1] == 224 else 16 * 5 * 5, 120) 26 | self.fc_2 = nn.Linear(120, 84) 27 | self.fc_3 = nn.Linear(84, num_classes) 28 | 29 | self.embedding_recorder = EmbeddingRecorder(record_embedding) 30 | self.no_grad = no_grad 31 | 32 | def get_last_layer(self): 33 | return self.fc_3 34 | 35 | def forward(self, x): 36 | with set_grad_enabled(not self.no_grad): 37 | x = self.features(x) 38 | x = x.view(x.size(0), -1) 39 | x = F.relu(self.fc_1(x)) 40 | x = F.relu(self.fc_2(x)) 41 | x = self.embedding_recorder(x) 42 | x = self.fc_3(x) 43 | return x 44 | -------------------------------------------------------------------------------- /deepcore/nets/mlp.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from torch import set_grad_enabled 4 | from .nets_utils import EmbeddingRecorder 5 | 6 | # Acknowledgement to 7 | # https://github.com/kuangliu/pytorch-cifar, 8 | # https://github.com/BIGBALLON/CIFAR-ZOO, 9 | 10 | 11 | ''' MLP ''' 12 | 13 | 14 | class MLP(nn.Module): 15 | def __init__(self, channel, num_classes, im_size, record_embedding: bool = False, no_grad: bool = False, 16 | pretrained: bool = False): 17 | if pretrained: 18 | raise NotImplementedError("torchvison pretrained models not available.") 19 | super(MLP, self).__init__() 20 | self.fc_1 = nn.Linear(im_size[0] * im_size[1] * channel, 128) 21 | self.fc_2 = nn.Linear(128, 128) 22 | self.fc_3 = nn.Linear(128, num_classes) 23 | 24 | self.embedding_recorder = EmbeddingRecorder(record_embedding) 25 | self.no_grad = no_grad 26 | 27 | def get_last_layer(self): 28 | return self.fc_3 29 | 30 | def forward(self, x): 31 | with set_grad_enabled(not self.no_grad): 32 | out = x.view(x.size(0), -1) 33 | out = F.relu(self.fc_1(out)) 34 | out = F.relu(self.fc_2(out)) 35 | out = self.embedding_recorder(out) 36 | out = self.fc_3(out) 37 | return out 38 | -------------------------------------------------------------------------------- /deepcore/nets/mobilenetv3.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch import set_grad_enabled, flatten, Tensor 3 | from torchvision.models import mobilenetv3 4 | from .nets_utils import EmbeddingRecorder 5 | import math 6 | 7 | '''MobileNetV3 in PyTorch. 8 | Paper: "Inverted Residuals and Linear Bottlenecks:Mobile Networks for Classification, Detection and Segmentation" 9 | 10 | Acknowlegement to: 11 | https://github.com/d-li14/mobilenetv3.pytorch/blob/master/mobilenetv3.py 12 | ''' 13 | 14 | 15 | def _make_divisible(v, divisor, min_value=None): 16 | """ 17 | This function is taken from the original tf repo. 18 | It ensures that all layers have a channel number that is divisible by 8 19 | It can be seen here: 20 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py 21 | """ 22 | if min_value is None: 23 | min_value = divisor 24 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 25 | # Make sure that round down does not go down by more than 10%. 26 | if new_v < 0.9 * v: 27 | new_v += divisor 28 | return new_v 29 | 30 | 31 | class h_sigmoid(nn.Module): 32 | def __init__(self, inplace=True): 33 | super(h_sigmoid, self).__init__() 34 | self.relu = nn.ReLU6(inplace=inplace) 35 | 36 | def forward(self, x): 37 | return self.relu(x + 3) / 6 38 | 39 | 40 | class h_swish(nn.Module): 41 | def __init__(self, inplace=True): 42 | super(h_swish, self).__init__() 43 | self.sigmoid = h_sigmoid(inplace=inplace) 44 | 45 | def forward(self, x): 46 | return x * self.sigmoid(x) 47 | 48 | 49 | class SELayer(nn.Module): 50 | def __init__(self, channel, reduction=4): 51 | super(SELayer, self).__init__() 52 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 53 | self.fc = nn.Sequential( 54 | nn.Linear(channel, _make_divisible(channel // reduction, 8)), 55 | nn.ReLU(inplace=True), 56 | nn.Linear(_make_divisible(channel // reduction, 8), channel), 57 | h_sigmoid() 58 | ) 59 | 60 | def forward(self, x): 61 | b, c, _, _ = x.size() 62 | y = self.avg_pool(x).view(b, c) 63 | y = self.fc(y).view(b, c, 1, 1) 64 | return x * y 65 | 66 | 67 | def conv_3x3_bn(inp, oup, stride, padding=1): 68 | return nn.Sequential( 69 | nn.Conv2d(inp, oup, 3, stride, padding, bias=False), 70 | nn.BatchNorm2d(oup), 71 | h_swish() 72 | ) 73 | 74 | 75 | def conv_1x1_bn(inp, oup): 76 | return nn.Sequential( 77 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False), 78 | nn.BatchNorm2d(oup), 79 | h_swish() 80 | ) 81 | 82 | 83 | class InvertedResidual(nn.Module): 84 | def __init__(self, inp, hidden_dim, oup, kernel_size, stride, use_se, use_hs): 85 | super(InvertedResidual, self).__init__() 86 | assert stride in [1, 2] 87 | 88 | self.identity = stride == 1 and inp == oup 89 | 90 | if inp == hidden_dim: 91 | self.conv = nn.Sequential( 92 | # dw 93 | nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim, 94 | bias=False), 95 | nn.BatchNorm2d(hidden_dim), 96 | h_swish() if use_hs else nn.ReLU(inplace=True), 97 | # Squeeze-and-Excite 98 | SELayer(hidden_dim) if use_se else nn.Identity(), 99 | # pw-linear 100 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), 101 | nn.BatchNorm2d(oup), 102 | ) 103 | else: 104 | self.conv = nn.Sequential( 105 | # pw 106 | nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False), 107 | nn.BatchNorm2d(hidden_dim), 108 | h_swish() if use_hs else nn.ReLU(inplace=True), 109 | # dw 110 | nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim, 111 | bias=False), 112 | nn.BatchNorm2d(hidden_dim), 113 | # Squeeze-and-Excite 114 | SELayer(hidden_dim) if use_se else nn.Identity(), 115 | h_swish() if use_hs else nn.ReLU(inplace=True), 116 | # pw-linear 117 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), 118 | nn.BatchNorm2d(oup), 119 | ) 120 | 121 | def forward(self, x): 122 | if self.identity: 123 | return x + self.conv(x) 124 | else: 125 | return self.conv(x) 126 | 127 | 128 | class MobileNetV3_32x32(nn.Module): 129 | def __init__(self, cfgs, mode, channel=3, num_classes=1000, record_embedding=False, 130 | no_grad=False, width_mult=1.): 131 | super(MobileNetV3_32x32, self).__init__() 132 | # setting of inverted residual blocks 133 | self.cfgs = cfgs 134 | assert mode in ['mobilenet_v3_large', 'mobilenet_v3_small'] 135 | 136 | self.embedding_recorder = EmbeddingRecorder(record_embedding) 137 | self.no_grad = no_grad 138 | 139 | # building first layer 140 | input_channel = _make_divisible(16 * width_mult, 8) 141 | layers = [conv_3x3_bn(channel, input_channel, 2, padding=3 if channel == 1 else 1)] 142 | # building inverted residual blocks 143 | block = InvertedResidual 144 | for k, t, c, use_se, use_hs, s in self.cfgs: 145 | output_channel = _make_divisible(c * width_mult, 8) 146 | exp_size = _make_divisible(input_channel * t, 8) 147 | layers.append(block(input_channel, exp_size, output_channel, k, s, use_se, use_hs)) 148 | input_channel = output_channel 149 | self.features = nn.Sequential(*layers) 150 | # building last several layers 151 | self.conv = conv_1x1_bn(input_channel, exp_size) 152 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 153 | output_channel = {'mobilenet_v3_large': 1280, 'mobilenet_v3_small': 1024} 154 | output_channel = _make_divisible(output_channel[mode] * width_mult, 8) if width_mult > 1.0 else output_channel[ 155 | mode] 156 | self.classifier = nn.Sequential( 157 | nn.Linear(exp_size, output_channel), 158 | h_swish(), 159 | nn.Dropout(0.2), 160 | self.embedding_recorder, 161 | nn.Linear(output_channel, num_classes), 162 | ) 163 | 164 | self._initialize_weights() 165 | 166 | def forward(self, x): 167 | with set_grad_enabled(not self.no_grad): 168 | x = self.features(x) 169 | x = self.conv(x) 170 | x = self.avgpool(x) 171 | x = x.view(x.size(0), -1) 172 | x = self.classifier(x) 173 | return x 174 | 175 | def _initialize_weights(self): 176 | for m in self.modules(): 177 | if isinstance(m, nn.Conv2d): 178 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 179 | m.weight.data.normal_(0, math.sqrt(2. / n)) 180 | if m.bias is not None: 181 | m.bias.data.zero_() 182 | elif isinstance(m, nn.BatchNorm2d): 183 | m.weight.data.fill_(1) 184 | m.bias.data.zero_() 185 | elif isinstance(m, nn.Linear): 186 | m.weight.data.normal_(0, 0.01) 187 | m.bias.data.zero_() 188 | 189 | def get_last_layer(self): 190 | return self.classifier[-1] 191 | 192 | 193 | class MobileNetV3_224x224(mobilenetv3.MobileNetV3): 194 | def __init__(self, inverted_residual_setting, last_channel, 195 | channel=3, num_classes=1000, record_embedding=False, no_grad=False, **kwargs): 196 | super(MobileNetV3_224x224, self).__init__(inverted_residual_setting, last_channel, 197 | num_classes=num_classes, **kwargs) 198 | 199 | self.embedding_recorder = EmbeddingRecorder(record_embedding) 200 | 201 | self.fc = self.classifier[-1] 202 | self.classifier[-1] = self.embedding_recorder 203 | self.classifier.add_module("fc", self.fc) 204 | 205 | self.no_grad = no_grad 206 | 207 | def get_last_layer(self): 208 | return self.fc 209 | 210 | def _forward_impl(self, x: Tensor) -> Tensor: 211 | with set_grad_enabled(not self.no_grad): 212 | x = self.features(x) 213 | x = self.avgpool(x) 214 | x = flatten(x, 1) 215 | x = self.classifier(x) 216 | return x 217 | 218 | 219 | def MobileNetV3(arch: str, channel: int, num_classes: int, im_size, record_embedding: bool = False, 220 | no_grad: bool = False, 221 | pretrained: bool = False, **kwargs): 222 | arch = arch.lower() 223 | if pretrained: 224 | if channel != 3: 225 | raise NotImplementedError("Network Architecture for current dataset has not been implemented.") 226 | 227 | inverted_residual_setting, last_channel = mobilenetv3._mobilenet_v3_conf(arch) 228 | net = MobileNetV3_224x224(inverted_residual_setting=inverted_residual_setting, last_channel=last_channel, 229 | channel=3, num_classes=1000, record_embedding=record_embedding, no_grad=no_grad, 230 | **kwargs) 231 | 232 | from torch.hub import load_state_dict_from_url 233 | state_dict = load_state_dict_from_url(mobilenetv3.model_urls[arch], progress=True) 234 | net.load_state_dict(state_dict) 235 | 236 | if num_classes != 1000: 237 | net.fc = nn.Linear(last_channel, num_classes) 238 | net.classifier[-1] = net.fc 239 | 240 | elif im_size[0] == 224 and im_size[1] == 224: 241 | if channel != 3: 242 | raise NotImplementedError("Network Architecture for current dataset has not been implemented.") 243 | inverted_residual_setting, last_channel = mobilenetv3._mobilenet_v3_conf(arch) 244 | net = MobileNetV3_224x224(inverted_residual_setting=inverted_residual_setting, last_channel=last_channel, 245 | channel=channel, num_classes=num_classes, record_embedding=record_embedding, 246 | no_grad=no_grad, **kwargs) 247 | 248 | elif (channel == 1 and im_size[0] == 28 and im_size[1] == 28) or ( 249 | channel == 3 and im_size[0] == 32 and im_size[1] == 32): 250 | if arch == "mobilenet_v3_large": 251 | cfgs = [ 252 | # k, t, c, SE, HS, s 253 | [3, 1, 16, 0, 0, 1], 254 | [3, 4, 24, 0, 0, 2], 255 | [3, 3, 24, 0, 0, 1], 256 | [5, 3, 40, 1, 0, 2], 257 | [5, 3, 40, 1, 0, 1], 258 | [5, 3, 40, 1, 0, 1], 259 | [3, 6, 80, 0, 1, 2], 260 | [3, 2.5, 80, 0, 1, 1], 261 | [3, 2.3, 80, 0, 1, 1], 262 | [3, 2.3, 80, 0, 1, 1], 263 | [3, 6, 112, 1, 1, 1], 264 | [3, 6, 112, 1, 1, 1], 265 | [5, 6, 160, 1, 1, 2], 266 | [5, 6, 160, 1, 1, 1], 267 | [5, 6, 160, 1, 1, 1] 268 | ] 269 | net = MobileNetV3_32x32(cfgs, arch, channel=channel, num_classes=num_classes, 270 | record_embedding=record_embedding, no_grad=no_grad) 271 | elif arch == "mobilenet_v3_small": 272 | cfgs = [ 273 | # k, t, c, SE, HS, s 274 | [3, 1, 16, 1, 0, 2], 275 | [3, 4.5, 24, 0, 0, 2], 276 | [3, 3.67, 24, 0, 0, 1], 277 | [5, 4, 40, 1, 1, 2], 278 | [5, 6, 40, 1, 1, 1], 279 | [5, 6, 40, 1, 1, 1], 280 | [5, 3, 48, 1, 1, 1], 281 | [5, 3, 48, 1, 1, 1], 282 | [5, 6, 96, 1, 1, 2], 283 | [5, 6, 96, 1, 1, 1], 284 | [5, 6, 96, 1, 1, 1], 285 | ] 286 | net = MobileNetV3_32x32(cfgs, arch, channel=channel, num_classes=num_classes, 287 | record_embedding=record_embedding, no_grad=no_grad) 288 | else: 289 | raise ValueError("Model architecture not found.") 290 | else: 291 | raise NotImplementedError("Network Architecture for current dataset has not been implemented.") 292 | return net 293 | 294 | 295 | def MobileNetV3Large(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, 296 | pretrained: bool = False, **kwargs): 297 | return MobileNetV3("mobilenet_v3_large", channel, num_classes, im_size, record_embedding, no_grad, 298 | pretrained, **kwargs) 299 | 300 | 301 | def MobileNetV3Small(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, 302 | pretrained: bool = False, **kwargs): 303 | return MobileNetV3("mobilenet_v3_small", channel, num_classes, im_size, record_embedding, no_grad, 304 | pretrained, **kwargs) 305 | -------------------------------------------------------------------------------- /deepcore/nets/nets_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .parallel import * 2 | from .recorder import * -------------------------------------------------------------------------------- /deepcore/nets/nets_utils/parallel.py: -------------------------------------------------------------------------------- 1 | from torch.nn import DataParallel 2 | 3 | 4 | class MyDataParallel(DataParallel): 5 | def __getattr__(self, name): 6 | try: 7 | return super().__getattr__(name) 8 | except AttributeError: 9 | return getattr(self.module, name) 10 | def __setattr__(self, name, value): 11 | try: 12 | if name == "no_grad": 13 | return setattr(self.module, name, value) 14 | return super().__setattr__(name, value) 15 | except AttributeError: 16 | return setattr(self.module, name, value) 17 | -------------------------------------------------------------------------------- /deepcore/nets/nets_utils/recorder.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | 4 | class EmbeddingRecorder(nn.Module): 5 | def __init__(self, record_embedding: bool = False): 6 | super().__init__() 7 | self.record_embedding = record_embedding 8 | 9 | def forward(self, x): 10 | if self.record_embedding: 11 | self.embedding = x 12 | return x 13 | 14 | def __enter__(self): 15 | self.record_embedding = True 16 | 17 | def __exit__(self, exc_type, exc_val, exc_tb): 18 | self.record_embedding = False -------------------------------------------------------------------------------- /deepcore/nets/resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from torch import set_grad_enabled, flatten, Tensor 4 | from .nets_utils import EmbeddingRecorder 5 | from torchvision.models import resnet 6 | 7 | 8 | # Acknowledgement to 9 | # https://github.com/kuangliu/pytorch-cifar, 10 | # https://github.com/BIGBALLON/CIFAR-ZOO, 11 | 12 | 13 | def conv3x3(in_planes, out_planes, stride=1): 14 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) 15 | 16 | 17 | class BasicBlock(nn.Module): 18 | expansion = 1 19 | 20 | def __init__(self, in_planes, planes, stride=1): 21 | super(BasicBlock, self).__init__() 22 | self.conv1 = conv3x3(in_planes, planes, stride) 23 | self.bn1 = nn.BatchNorm2d(planes) 24 | self.conv2 = conv3x3(planes, planes) 25 | self.bn2 = nn.BatchNorm2d(planes) 26 | 27 | self.shortcut = nn.Sequential() 28 | if stride != 1 or in_planes != self.expansion * planes: 29 | self.shortcut = nn.Sequential( 30 | nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False), 31 | nn.BatchNorm2d(self.expansion * planes) 32 | ) 33 | 34 | def forward(self, x): 35 | out = F.relu(self.bn1(self.conv1(x))) 36 | out = self.bn2(self.conv2(out)) 37 | out += self.shortcut(x) 38 | out = F.relu(out) 39 | return out 40 | 41 | 42 | class Bottleneck(nn.Module): 43 | expansion = 4 44 | 45 | def __init__(self, in_planes, planes, stride=1): 46 | super(Bottleneck, self).__init__() 47 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 48 | self.bn1 = nn.BatchNorm2d(planes) 49 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 50 | self.bn2 = nn.BatchNorm2d(planes) 51 | self.conv3 = nn.Conv2d(planes, self.expansion * planes, kernel_size=1, bias=False) 52 | self.bn3 = nn.BatchNorm2d(self.expansion * planes) 53 | 54 | self.shortcut = nn.Sequential() 55 | if stride != 1 or in_planes != self.expansion * planes: 56 | self.shortcut = nn.Sequential( 57 | nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False), 58 | nn.BatchNorm2d(self.expansion * planes) 59 | ) 60 | 61 | def forward(self, x): 62 | out = F.relu(self.bn1(self.conv1(x))) 63 | out = F.relu(self.bn2(self.conv2(out))) 64 | out = self.bn3(self.conv3(out)) 65 | out += self.shortcut(x) 66 | out = F.relu(out) 67 | return out 68 | 69 | 70 | class ResNet_32x32(nn.Module): 71 | def __init__(self, block, num_blocks, channel=3, num_classes=10, record_embedding: bool = False, 72 | no_grad: bool = False): 73 | super().__init__() 74 | self.in_planes = 64 75 | 76 | self.conv1 = conv3x3(channel, 64) 77 | self.bn1 = nn.BatchNorm2d(64) 78 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) 79 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) 80 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) 81 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) 82 | self.linear = nn.Linear(512 * block.expansion, num_classes) 83 | 84 | self.embedding_recorder = EmbeddingRecorder(record_embedding) 85 | self.no_grad = no_grad 86 | 87 | def get_last_layer(self): 88 | return self.linear 89 | 90 | def _make_layer(self, block, planes, num_blocks, stride): 91 | strides = [stride] + [1] * (num_blocks - 1) 92 | layers = [] 93 | for stride in strides: 94 | layers.append(block(self.in_planes, planes, stride)) 95 | self.in_planes = planes * block.expansion 96 | return nn.Sequential(*layers) 97 | 98 | def forward(self, x): 99 | with set_grad_enabled(not self.no_grad): 100 | out = F.relu(self.bn1(self.conv1(x))) 101 | out = self.layer1(out) 102 | out = self.layer2(out) 103 | out = self.layer3(out) 104 | out = self.layer4(out) 105 | out = F.avg_pool2d(out, 4) 106 | out = out.view(out.size(0), -1) 107 | out = self.embedding_recorder(out) 108 | out = self.linear(out) 109 | return out 110 | 111 | 112 | class ResNet_224x224(resnet.ResNet): 113 | def __init__(self, block, layers, channel: int, num_classes: int, record_embedding: bool = False, 114 | no_grad: bool = False, **kwargs): 115 | super().__init__(block, layers, **kwargs) 116 | self.embedding_recorder = EmbeddingRecorder(record_embedding) 117 | if channel != 3: 118 | self.conv1 = nn.Conv2d(channel, 64, kernel_size=7, stride=2, padding=3, bias=False) 119 | if num_classes != 1000: 120 | self.fc = nn.Linear(self.fc.in_features, num_classes) 121 | self.no_grad = no_grad 122 | 123 | def get_last_layer(self): 124 | return self.fc 125 | 126 | def _forward_impl(self, x: Tensor) -> Tensor: 127 | # See note [TorchScript super()] 128 | with set_grad_enabled(not self.no_grad): 129 | x = self.conv1(x) 130 | x = self.bn1(x) 131 | x = self.relu(x) 132 | x = self.maxpool(x) 133 | 134 | x = self.layer1(x) 135 | x = self.layer2(x) 136 | x = self.layer3(x) 137 | x = self.layer4(x) 138 | 139 | x = self.avgpool(x) 140 | x = flatten(x, 1) 141 | x = self.embedding_recorder(x) 142 | x = self.fc(x) 143 | 144 | return x 145 | 146 | 147 | def ResNet(arch: str, channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, 148 | pretrained: bool = False): 149 | arch = arch.lower() 150 | if pretrained: 151 | if arch == "resnet18": 152 | net = ResNet_224x224(resnet.BasicBlock, [2, 2, 2, 2], channel=3, num_classes=1000, 153 | record_embedding=record_embedding, no_grad=no_grad) 154 | elif arch == "resnet34": 155 | net = ResNet_224x224(resnet.BasicBlock, [3, 4, 6, 3], channel=3, num_classes=1000, 156 | record_embedding=record_embedding, no_grad=no_grad) 157 | elif arch == "resnet50": 158 | net = ResNet_224x224(resnet.Bottleneck, [3, 4, 6, 3], channel=3, num_classes=1000, 159 | record_embedding=record_embedding, no_grad=no_grad) 160 | elif arch == "resnet101": 161 | net = ResNet_224x224(resnet.Bottleneck, [3, 4, 23, 3], channel=3, num_classes=1000, 162 | record_embedding=record_embedding, no_grad=no_grad) 163 | elif arch == "resnet152": 164 | net = ResNet_224x224(resnet.Bottleneck, [3, 8, 36, 3], channel=3, num_classes=1000, 165 | record_embedding=record_embedding, no_grad=no_grad) 166 | else: 167 | raise ValueError("Model architecture not found.") 168 | from torch.hub import load_state_dict_from_url 169 | state_dict = load_state_dict_from_url(resnet.model_urls[arch], progress=True) 170 | net.load_state_dict(state_dict) 171 | 172 | if channel != 3: 173 | net.conv1 = nn.Conv2d(channel, 64, kernel_size=7, stride=2, padding=3, bias=False) 174 | if num_classes != 1000: 175 | net.fc = nn.Linear(net.fc.in_features, num_classes) 176 | 177 | elif im_size[0] == 224 and im_size[1] == 224: 178 | if arch == "resnet18": 179 | net = ResNet_224x224(resnet.BasicBlock, [2, 2, 2, 2], channel=channel, num_classes=num_classes, 180 | record_embedding=record_embedding, no_grad=no_grad) 181 | elif arch == "resnet34": 182 | net = ResNet_224x224(resnet.BasicBlock, [3, 4, 6, 3], channel=channel, num_classes=num_classes, 183 | record_embedding=record_embedding, no_grad=no_grad) 184 | elif arch == "resnet50": 185 | net = ResNet_224x224(resnet.Bottleneck, [3, 4, 6, 3], channel=channel, num_classes=num_classes, 186 | record_embedding=record_embedding, no_grad=no_grad) 187 | elif arch == "resnet101": 188 | net = ResNet_224x224(resnet.Bottleneck, [3, 4, 23, 3], channel=channel, num_classes=num_classes, 189 | record_embedding=record_embedding, no_grad=no_grad) 190 | elif arch == "resnet152": 191 | net = ResNet_224x224(resnet.Bottleneck, [3, 8, 36, 3], channel=channel, num_classes=num_classes, 192 | record_embedding=record_embedding, no_grad=no_grad) 193 | else: 194 | raise ValueError("Model architecture not found.") 195 | elif (channel == 1 and im_size[0] == 28 and im_size[1] == 28) or ( 196 | channel == 3 and im_size[0] == 32 and im_size[1] == 32): 197 | if arch == "resnet18": 198 | net = ResNet_32x32(BasicBlock, [2, 2, 2, 2], channel=channel, num_classes=num_classes, 199 | record_embedding=record_embedding, no_grad=no_grad) 200 | elif arch == "resnet34": 201 | net = ResNet_32x32(BasicBlock, [3, 4, 6, 3], channel=channel, num_classes=num_classes, 202 | record_embedding=record_embedding, no_grad=no_grad) 203 | elif arch == "resnet50": 204 | net = ResNet_32x32(Bottleneck, [3, 4, 6, 3], channel=channel, num_classes=num_classes, 205 | record_embedding=record_embedding, no_grad=no_grad) 206 | elif arch == "resnet101": 207 | net = ResNet_32x32(Bottleneck, [3, 4, 23, 3], channel=channel, num_classes=num_classes, 208 | record_embedding=record_embedding, no_grad=no_grad) 209 | elif arch == "resnet152": 210 | net = ResNet_32x32(Bottleneck, [3, 8, 36, 3], channel=channel, num_classes=num_classes, 211 | record_embedding=record_embedding, no_grad=no_grad) 212 | else: 213 | raise ValueError("Model architecture not found.") 214 | else: 215 | raise NotImplementedError("Network Architecture for current dataset has not been implemented.") 216 | return net 217 | 218 | 219 | def ResNet18(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, 220 | pretrained: bool = False): 221 | return ResNet("resnet18", channel, num_classes, im_size, record_embedding, no_grad, pretrained) 222 | 223 | 224 | def ResNet34(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, 225 | pretrained: bool = False): 226 | return ResNet("resnet34", channel, num_classes, im_size, record_embedding, no_grad, pretrained) 227 | 228 | 229 | def ResNet50(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, 230 | pretrained: bool = False): 231 | return ResNet("resnet50", channel, num_classes, im_size, record_embedding, no_grad, pretrained) 232 | 233 | 234 | def ResNet101(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, 235 | pretrained: bool = False): 236 | return ResNet("resnet101", channel, num_classes, im_size, record_embedding, no_grad, pretrained) 237 | 238 | 239 | def ResNet152(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, 240 | pretrained: bool = False): 241 | return ResNet("resnet152", channel, num_classes, im_size, record_embedding, no_grad, pretrained) 242 | -------------------------------------------------------------------------------- /deepcore/nets/vgg.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch import set_grad_enabled, flatten, Tensor 3 | from .nets_utils import EmbeddingRecorder 4 | from torchvision.models import vgg 5 | 6 | # Acknowledgement to 7 | # https://github.com/kuangliu/pytorch-cifar, 8 | # https://github.com/BIGBALLON/CIFAR-ZOO, 9 | 10 | cfg_vgg = { 11 | 'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 12 | 'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 13 | 'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 14 | 'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], 15 | } 16 | 17 | 18 | class VGG_32x32(nn.Module): 19 | def __init__(self, vgg_name, channel, num_classes, record_embedding=False, no_grad=False): 20 | super(VGG_32x32, self).__init__() 21 | self.channel = channel 22 | self.features = self._make_layers(cfg_vgg[vgg_name]) 23 | self.classifier = nn.Linear(512 if vgg_name != 'VGGS' else 128, num_classes) 24 | 25 | self.embedding_recorder = EmbeddingRecorder(record_embedding) 26 | self.no_grad = no_grad 27 | 28 | def forward(self, x): 29 | with set_grad_enabled(not self.no_grad): 30 | x = self.features(x) 31 | x = x.view(x.size(0), -1) 32 | x = self.embedding_recorder(x) 33 | x = self.classifier(x) 34 | return x 35 | 36 | def get_last_layer(self): 37 | return self.classifier 38 | 39 | def _make_layers(self, cfg): 40 | layers = [] 41 | in_channels = self.channel 42 | for ic, x in enumerate(cfg): 43 | if x == 'M': 44 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 45 | else: 46 | layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=3 if self.channel == 1 and ic == 0 else 1), 47 | nn.BatchNorm2d(x), 48 | nn.ReLU(inplace=True)] 49 | in_channels = x 50 | layers += [nn.AvgPool2d(kernel_size=1, stride=1)] 51 | return nn.Sequential(*layers) 52 | 53 | 54 | class VGG_224x224(vgg.VGG): 55 | def __init__(self, features: nn.Module, channel: int, num_classes: int, record_embedding: bool = False, 56 | no_grad: bool = False, **kwargs): 57 | super(VGG_224x224, self).__init__(features, num_classes, **kwargs) 58 | self.embedding_recorder = EmbeddingRecorder(record_embedding) 59 | if channel != 3: 60 | self.features[0] = nn.Conv2d(channel, 64, kernel_size=3, padding=1) 61 | self.fc = self.classifier[-1] 62 | self.classifier[-1] = self.embedding_recorder 63 | self.classifier.add_module("fc", self.fc) 64 | 65 | self.no_grad = no_grad 66 | 67 | def get_last_layer(self): 68 | return self.fc 69 | 70 | def forward(self, x: Tensor) -> Tensor: 71 | with set_grad_enabled(not self.no_grad): 72 | x = self.features(x) 73 | x = self.avgpool(x) 74 | x = flatten(x, 1) 75 | x = self.classifier(x) 76 | return x 77 | 78 | 79 | def VGG(arch: str, channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, 80 | pretrained: bool = False): 81 | arch = arch.lower() 82 | if pretrained: 83 | if im_size[0] != 224 or im_size[1] != 224: 84 | raise NotImplementedError("torchvison pretrained models only accept inputs with size of 224*224") 85 | net = VGG_224x224(features=vgg.make_layers(cfg_vgg[arch], True), channel=3, num_classes=1000, 86 | record_embedding=record_embedding, no_grad=no_grad) 87 | 88 | from torch.hub import load_state_dict_from_url 89 | state_dict = load_state_dict_from_url(vgg.model_urls[arch], progress=True) 90 | net.load_state_dict(state_dict) 91 | 92 | if channel != 3: 93 | net.features[0] = nn.Conv2d(channel, 64, kernel_size=3, padding=1) 94 | 95 | if num_classes != 1000: 96 | net.fc = nn.Linear(4096, num_classes) 97 | net.classifier[-1] = net.fc 98 | 99 | elif im_size[0] == 224 and im_size[1] == 224: 100 | net = VGG_224x224(features=vgg.make_layers(cfg_vgg[arch], True), channel=channel, num_classes=num_classes, 101 | record_embedding=record_embedding, no_grad=no_grad) 102 | 103 | elif (channel == 1 and im_size[0] == 28 and im_size[1] == 28) or ( 104 | channel == 3 and im_size[0] == 32 and im_size[1] == 32): 105 | net = VGG_32x32(arch, channel, num_classes=num_classes, record_embedding=record_embedding, no_grad=no_grad) 106 | else: 107 | raise NotImplementedError("Network Architecture for current dataset has not been implemented.") 108 | return net 109 | 110 | 111 | def VGG11(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, 112 | pretrained: bool = False): 113 | return VGG("vgg11", channel, num_classes, im_size, record_embedding, no_grad, pretrained) 114 | 115 | 116 | def VGG13(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, 117 | pretrained: bool = False): 118 | return VGG('vgg13', channel, num_classes, im_size, record_embedding, no_grad, pretrained) 119 | 120 | 121 | def VGG16(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, 122 | pretrained: bool = False): 123 | return VGG('vgg16', channel, num_classes, im_size, record_embedding, no_grad, pretrained) 124 | 125 | 126 | def VGG19(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, 127 | pretrained: bool = False): 128 | return VGG('vgg19', channel, num_classes, im_size, record_embedding, no_grad, pretrained) 129 | -------------------------------------------------------------------------------- /deepcore/nets/wideresnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from .nets_utils import EmbeddingRecorder 5 | from torchvision.models import resnet 6 | from .resnet import ResNet_224x224 7 | 8 | 9 | # Acknowledgement to 10 | # https://github.com/xternalz/WideResNet-pytorch 11 | 12 | class BasicBlock(nn.Module): 13 | def __init__(self, in_planes, out_planes, stride, dropRate=0.0): 14 | super(BasicBlock, self).__init__() 15 | self.bn1 = nn.BatchNorm2d(in_planes) 16 | self.relu1 = nn.ReLU(inplace=True) 17 | self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 18 | padding=1, bias=False) 19 | self.bn2 = nn.BatchNorm2d(out_planes) 20 | self.relu2 = nn.ReLU(inplace=True) 21 | self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1, 22 | padding=1, bias=False) 23 | self.droprate = dropRate 24 | self.equalInOut = (in_planes == out_planes) 25 | self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, 26 | padding=0, bias=False) or None 27 | 28 | def forward(self, x): 29 | if not self.equalInOut: 30 | x = self.relu1(self.bn1(x)) 31 | else: 32 | out = self.relu1(self.bn1(x)) 33 | out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x))) 34 | if self.droprate > 0: 35 | out = F.dropout(out, p=self.droprate, training=self.training) 36 | out = self.conv2(out) 37 | return torch.add(x if self.equalInOut else self.convShortcut(x), out) 38 | 39 | 40 | class NetworkBlock(nn.Module): 41 | def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0): 42 | super(NetworkBlock, self).__init__() 43 | self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropRate) 44 | 45 | def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropRate): 46 | layers = [] 47 | for i in range(int(nb_layers)): 48 | layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, dropRate)) 49 | return nn.Sequential(*layers) 50 | 51 | def forward(self, x): 52 | return self.layer(x) 53 | 54 | 55 | class WideResNet_32x32(nn.Module): 56 | def __init__(self, depth, num_classes, channel=3, widen_factor=1, drop_rate=0.0, record_embedding=False, 57 | no_grad=False): 58 | super(WideResNet_32x32, self).__init__() 59 | nChannels = [16, 16 * widen_factor, 32 * widen_factor, 64 * widen_factor] 60 | assert ((depth - 4) % 6 == 0) 61 | n = (depth - 4) / 6 62 | block = BasicBlock 63 | # 1st conv before any network block 64 | self.conv1 = nn.Conv2d(channel, nChannels[0], kernel_size=3, stride=1, 65 | padding=3 if channel == 1 else 1, bias=False) 66 | # 1st block 67 | self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, drop_rate) 68 | # 2nd block 69 | self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, drop_rate) 70 | # 3rd block 71 | self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, drop_rate) 72 | # global average pooling and classifier 73 | self.bn1 = nn.BatchNorm2d(nChannels[3]) 74 | self.relu = nn.ReLU(inplace=True) 75 | self.fc = nn.Linear(nChannels[3], num_classes) 76 | self.nChannels = nChannels[3] 77 | 78 | for m in self.modules(): 79 | if isinstance(m, nn.Conv2d): 80 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 81 | elif isinstance(m, nn.BatchNorm2d): 82 | m.weight.data.fill_(1) 83 | m.bias.data.zero_() 84 | elif isinstance(m, nn.Linear): 85 | m.bias.data.zero_() 86 | 87 | self.embedding_recorder = EmbeddingRecorder(record_embedding) 88 | self.no_grad = no_grad 89 | 90 | def get_last_layer(self): 91 | return self.fc 92 | 93 | def forward(self, x): 94 | with torch.set_grad_enabled(not self.no_grad): 95 | out = self.conv1(x) 96 | out = self.block1(out) 97 | out = self.block2(out) 98 | out = self.block3(out) 99 | out = self.relu(self.bn1(out)) 100 | out = F.avg_pool2d(out, 8) 101 | out = out.view(-1, self.nChannels) 102 | out = self.embedding_recorder(out) 103 | return self.fc(out) 104 | 105 | 106 | def WideResNet(arch: str, channel: int, num_classes: int, im_size, record_embedding: bool = False, 107 | no_grad: bool = False, pretrained: bool = False): 108 | arch = arch.lower() 109 | if pretrained: 110 | if im_size[0] != 224 or im_size[1] != 224: 111 | raise NotImplementedError("torchvison pretrained models only accept inputs with size of 224*224") 112 | if arch == "wrn502": 113 | arch = "wide_resnet50_2" 114 | net = ResNet_224x224(resnet.Bottleneck, [3, 4, 6, 3], channel=3, num_classes=1000, 115 | record_embedding=record_embedding, no_grad=no_grad, width_per_group=64 * 2) 116 | elif arch == "wrn1012": 117 | arch = "wide_resnet101_2" 118 | net = ResNet_224x224(resnet.Bottleneck, [3, 4, 23, 3], channel=3, num_classes=1000, 119 | record_embedding=record_embedding, no_grad=no_grad, width_per_group=64 * 2) 120 | else: 121 | raise ValueError("Model architecture not found.") 122 | from torch.hub import load_state_dict_from_url 123 | state_dict = load_state_dict_from_url(resnet.model_urls[arch], progress=True) 124 | net.load_state_dict(state_dict) 125 | 126 | if channel != 3: 127 | net.conv1 = nn.Conv2d(channel, 64, kernel_size=7, stride=2, padding=3, bias=False) 128 | if num_classes != 1000: 129 | net.fc = nn.Linear(net.fc.in_features, num_classes) 130 | 131 | elif im_size[0] == 224 and im_size[1] == 224: 132 | # Use torchvision models without pretrained parameters 133 | if arch == "wrn502": 134 | arch = "wide_resnet50_2" 135 | net = ResNet_224x224(resnet.Bottleneck, [3, 4, 6, 3], channel=channel, num_classes=num_classes, 136 | record_embedding=record_embedding, no_grad=no_grad, width_per_group=64 * 2) 137 | elif arch == "wrn1012": 138 | arch = "wide_resnet101_2" 139 | net = ResNet_224x224(resnet.Bottleneck, [3, 4, 23, 3], channel=channel, num_classes=num_classes, 140 | record_embedding=record_embedding, no_grad=no_grad, width_per_group=64 * 2) 141 | else: 142 | raise ValueError("Model architecture not found.") 143 | 144 | elif (channel == 1 and im_size[0] == 28 and im_size[1] == 28) or ( 145 | channel == 3 and im_size[0] == 32 and im_size[1] == 32): 146 | if arch == "wrn168": 147 | net = WideResNet_32x32(16, num_classes, channel, 8) 148 | elif arch == "wrn2810": 149 | net = WideResNet_32x32(28, num_classes, channel, 10) 150 | elif arch == "wrn282": 151 | net = WideResNet_32x32(28, num_classes, channel, 2) 152 | else: 153 | raise ValueError("Model architecture not found.") 154 | else: 155 | raise NotImplementedError("Network Architecture for current dataset has not been implemented.") 156 | return net 157 | 158 | 159 | def WRN168(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, 160 | pretrained: bool = False): 161 | return WideResNet("wrn168", channel, num_classes, im_size, record_embedding, no_grad, pretrained) 162 | 163 | 164 | def WRN2810(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, 165 | pretrained: bool = False): 166 | return WideResNet("wrn2810", channel, num_classes, im_size, record_embedding, no_grad, pretrained) 167 | 168 | 169 | def WRN282(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, 170 | pretrained: bool = False): 171 | return WideResNet('wrn282', channel, num_classes, im_size, record_embedding, no_grad, pretrained) 172 | 173 | 174 | def WRN502(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, 175 | pretrained: bool = False): 176 | return WideResNet("wrn502", channel, num_classes, im_size, record_embedding, no_grad, pretrained) 177 | 178 | 179 | def WRN1012(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, 180 | pretrained: bool = False): 181 | return WideResNet("wrn1012", channel, num_classes, im_size, record_embedding, no_grad, pretrained) 182 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch.nn as nn 3 | import argparse 4 | import deepcore.nets as nets 5 | import deepcore.datasets as datasets 6 | import deepcore.methods as methods 7 | from torchvision import transforms 8 | from utils import * 9 | from datetime import datetime 10 | from time import sleep 11 | 12 | 13 | def main(): 14 | parser = argparse.ArgumentParser(description='Parameter Processing') 15 | 16 | # Basic arguments 17 | parser.add_argument('--dataset', type=str, default='CIFAR10', help='dataset') 18 | parser.add_argument('--model', type=str, default='ResNet18', help='model') 19 | parser.add_argument('--selection', type=str, default="uniform", help="selection method") 20 | parser.add_argument('--num_exp', type=int, default=5, help='the number of experiments') 21 | parser.add_argument('--num_eval', type=int, default=10, help='the number of evaluating randomly initialized models') 22 | parser.add_argument('--epochs', default=200, type=int, help='number of total epochs to run') 23 | parser.add_argument('--data_path', type=str, default='data', help='dataset path') 24 | parser.add_argument('--gpu', default=None, nargs="+", type=int, help='GPU id to use') 25 | parser.add_argument('--print_freq', '-p', default=20, type=int, help='print frequency (default: 20)') 26 | parser.add_argument('--fraction', default=0.1, type=float, help='fraction of data to be selected (default: 0.1)') 27 | parser.add_argument('--seed', default=int(time.time() * 1000) % 100000, type=int, help="random seed") 28 | parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', 29 | help='number of data loading workers (default: 4)') 30 | parser.add_argument("--cross", type=str, nargs="+", default=None, help="models for cross-architecture experiments") 31 | 32 | # Optimizer and scheduler 33 | parser.add_argument('--optimizer', default="SGD", help='optimizer to use, e.g. SGD, Adam') 34 | parser.add_argument('--lr', type=float, default=0.1, help='learning rate for updating network parameters') 35 | parser.add_argument('--min_lr', type=float, default=1e-4, help='minimum learning rate') 36 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M', 37 | help='momentum (default: 0.9)') 38 | parser.add_argument('-wd', '--weight_decay', default=5e-4, type=float, 39 | metavar='W', help='weight decay (default: 5e-4)', 40 | dest='weight_decay') 41 | parser.add_argument("--nesterov", default=True, type=str_to_bool, help="if set nesterov") 42 | parser.add_argument("--scheduler", default="CosineAnnealingLR", type=str, help= 43 | "Learning rate scheduler") 44 | parser.add_argument("--gamma", type=float, default=.5, help="Gamma value for StepLR") 45 | parser.add_argument("--step_size", type=float, default=50, help="Step size for StepLR") 46 | 47 | # Training 48 | parser.add_argument('--batch', '--batch-size', "-b", default=256, type=int, metavar='N', 49 | help='mini-batch size (default: 256)') 50 | parser.add_argument("--train_batch", "-tb", default=None, type=int, 51 | help="batch size for training, if not specified, it will equal to batch size in argument --batch") 52 | parser.add_argument("--selection_batch", "-sb", default=None, type=int, 53 | help="batch size for selection, if not specified, it will equal to batch size in argument --batch") 54 | 55 | # Testing 56 | parser.add_argument("--test_interval", '-ti', default=1, type=int, help= 57 | "the number of training epochs to be preformed between two test epochs; a value of 0 means no test will be run (default: 1)") 58 | parser.add_argument("--test_fraction", '-tf', type=float, default=1., 59 | help="proportion of test dataset used for evaluating the model (default: 1.)") 60 | 61 | # Selecting 62 | parser.add_argument("--selection_epochs", "-se", default=40, type=int, 63 | help="number of epochs whiling performing selection on full dataset") 64 | parser.add_argument('--selection_momentum', '-sm', default=0.9, type=float, metavar='M', 65 | help='momentum whiling performing selection (default: 0.9)') 66 | parser.add_argument('--selection_weight_decay', '-swd', default=5e-4, type=float, 67 | metavar='W', help='weight decay whiling performing selection (default: 5e-4)', 68 | dest='selection_weight_decay') 69 | parser.add_argument('--selection_optimizer', "-so", default="SGD", 70 | help='optimizer to use whiling performing selection, e.g. SGD, Adam') 71 | parser.add_argument("--selection_nesterov", "-sn", default=True, type=str_to_bool, 72 | help="if set nesterov whiling performing selection") 73 | parser.add_argument('--selection_lr', '-slr', type=float, default=0.1, help='learning rate for selection') 74 | parser.add_argument("--selection_test_interval", '-sti', default=1, type=int, help= 75 | "the number of training epochs to be preformed between two test epochs during selection (default: 1)") 76 | parser.add_argument("--selection_test_fraction", '-stf', type=float, default=1., 77 | help="proportion of test dataset used for evaluating the model while preforming selection (default: 1.)") 78 | parser.add_argument('--balance', default=True, type=str_to_bool, 79 | help="whether balance selection is performed per class") 80 | 81 | # Algorithm 82 | parser.add_argument('--submodular', default="GraphCut", help="specifiy submodular function to use") 83 | parser.add_argument('--submodular_greedy', default="LazyGreedy", help="specifiy greedy algorithm for submodular optimization") 84 | parser.add_argument('--uncertainty', default="Entropy", help="specifiy uncertanty score to use") 85 | 86 | # Checkpoint and resumption 87 | parser.add_argument('--save_path', "-sp", type=str, default='', help='path to save results (default: do not save)') 88 | parser.add_argument('--resume', '-r', type=str, default='', help="path to latest checkpoint (default: do not load)") 89 | 90 | args = parser.parse_args() 91 | args.device = 'cuda' if torch.cuda.is_available() else 'cpu' 92 | 93 | if args.train_batch is None: 94 | args.train_batch = args.batch 95 | if args.selection_batch is None: 96 | args.selection_batch = args.batch 97 | if args.save_path != "" and not os.path.exists(args.save_path): 98 | os.mkdir(args.save_path) 99 | if not os.path.exists(args.data_path): 100 | os.mkdir(args.data_path) 101 | 102 | if args.resume != "": 103 | # Load checkpoint 104 | try: 105 | print("=> Loading checkpoint '{}'".format(args.resume)) 106 | checkpoint = torch.load(args.resume, map_location=args.device) 107 | assert {"exp", "epoch", "state_dict", "opt_dict", "best_acc1", "rec", "subset", "sel_args"} <= set( 108 | checkpoint.keys()) 109 | assert 'indices' in checkpoint["subset"].keys() 110 | start_exp = checkpoint['exp'] 111 | start_epoch = checkpoint["epoch"] 112 | except AssertionError: 113 | try: 114 | assert {"exp", "subset", "sel_args"} <= set(checkpoint.keys()) 115 | assert 'indices' in checkpoint["subset"].keys() 116 | print("=> The checkpoint only contains the subset, training will start from the begining") 117 | start_exp = checkpoint['exp'] 118 | start_epoch = 0 119 | except AssertionError: 120 | print("=> Failed to load the checkpoint, an empty one will be created") 121 | checkpoint = {} 122 | start_exp = 0 123 | start_epoch = 0 124 | else: 125 | checkpoint = {} 126 | start_exp = 0 127 | start_epoch = 0 128 | 129 | for exp in range(start_exp, args.num_exp): 130 | if args.save_path != "": 131 | checkpoint_name = "{dst}_{net}_{mtd}_exp{exp}_epoch{epc}_{dat}_{fr}_".format(dst=args.dataset, 132 | net=args.model, 133 | mtd=args.selection, 134 | dat=datetime.now(), 135 | exp=start_exp, 136 | epc=args.epochs, 137 | fr=args.fraction) 138 | 139 | print('\n================== Exp %d ==================\n' % exp) 140 | print("dataset: ", args.dataset, ", model: ", args.model, ", selection: ", args.selection, ", num_ex: ", 141 | args.num_exp, ", epochs: ", args.epochs, ", fraction: ", args.fraction, ", seed: ", args.seed, 142 | ", lr: ", args.lr, ", save_path: ", args.save_path, ", resume: ", args.resume, ", device: ", args.device, 143 | ", checkpoint_name: " + checkpoint_name if args.save_path != "" else "", "\n", sep="") 144 | 145 | channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test = datasets.__dict__[args.dataset] \ 146 | (args.data_path) 147 | args.channel, args.im_size, args.num_classes, args.class_names = channel, im_size, num_classes, class_names 148 | 149 | torch.random.manual_seed(args.seed) 150 | 151 | if "subset" in checkpoint.keys(): 152 | subset = checkpoint['subset'] 153 | selection_args = checkpoint["sel_args"] 154 | else: 155 | selection_args = dict(epochs=args.selection_epochs, 156 | selection_method=args.uncertainty, 157 | balance=args.balance, 158 | greedy=args.submodular_greedy, 159 | function=args.submodular 160 | ) 161 | method = methods.__dict__[args.selection](dst_train, args, args.fraction, args.seed, **selection_args) 162 | subset = method.select() 163 | print(len(subset["indices"])) 164 | 165 | # Augmentation 166 | if args.dataset == "CIFAR10" or args.dataset == "CIFAR100": 167 | dst_train.transform = transforms.Compose( 168 | [transforms.RandomCrop(args.im_size, padding=4, padding_mode="reflect"), 169 | transforms.RandomHorizontalFlip(), dst_train.transform]) 170 | elif args.dataset == "ImageNet": 171 | dst_train.transform = transforms.Compose([ 172 | transforms.RandomResizedCrop(224), 173 | transforms.RandomHorizontalFlip(), 174 | transforms.ToTensor(), 175 | transforms.Normalize(mean, std) 176 | ]) 177 | 178 | # Handle weighted subset 179 | if_weighted = "weights" in subset.keys() 180 | if if_weighted: 181 | dst_subset = WeightedSubset(dst_train, subset["indices"], subset["weights"]) 182 | else: 183 | dst_subset = torch.utils.data.Subset(dst_train, subset["indices"]) 184 | 185 | # BackgroundGenerator for ImageNet to speed up dataloaders 186 | if args.dataset == "ImageNet": 187 | train_loader = DataLoaderX(dst_subset, batch_size=args.train_batch, shuffle=True, 188 | num_workers=args.workers, pin_memory=True) 189 | test_loader = DataLoaderX(dst_test, batch_size=args.train_batch, shuffle=False, 190 | num_workers=args.workers, pin_memory=True) 191 | else: 192 | train_loader = torch.utils.data.DataLoader(dst_subset, batch_size=args.train_batch, shuffle=True, 193 | num_workers=args.workers, pin_memory=True) 194 | test_loader = torch.utils.data.DataLoader(dst_test, batch_size=args.train_batch, shuffle=False, 195 | num_workers=args.workers, pin_memory=True) 196 | 197 | # Listing cross-architecture experiment settings if specified. 198 | models = [args.model] 199 | if isinstance(args.cross, list): 200 | for model in args.cross: 201 | if model != args.model: 202 | models.append(model) 203 | 204 | for model in models: 205 | if len(models) > 1: 206 | print("| Training on model %s" % model) 207 | 208 | network = nets.__dict__[model](channel, num_classes, im_size).to(args.device) 209 | 210 | if args.device == "cpu": 211 | print("Using CPU.") 212 | elif args.gpu is not None: 213 | torch.cuda.set_device(args.gpu[0]) 214 | network = nets.nets_utils.MyDataParallel(network, device_ids=args.gpu) 215 | elif torch.cuda.device_count() > 1: 216 | network = nets.nets_utils.MyDataParallel(network).cuda() 217 | 218 | if "state_dict" in checkpoint.keys(): 219 | # Loading model state_dict 220 | network.load_state_dict(checkpoint["state_dict"]) 221 | 222 | criterion = nn.CrossEntropyLoss(reduction='none').to(args.device) 223 | 224 | # Optimizer 225 | if args.optimizer == "SGD": 226 | optimizer = torch.optim.SGD(network.parameters(), args.lr, momentum=args.momentum, 227 | weight_decay=args.weight_decay, nesterov=args.nesterov) 228 | elif args.optimizer == "Adam": 229 | optimizer = torch.optim.Adam(network.parameters(), args.lr, weight_decay=args.weight_decay) 230 | else: 231 | optimizer = torch.optim.__dict__[args.optimizer](network.parameters(), args.lr, momentum=args.momentum, 232 | weight_decay=args.weight_decay, nesterov=args.nesterov) 233 | 234 | # LR scheduler 235 | if args.scheduler == "CosineAnnealingLR": 236 | scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_loader) * args.epochs, 237 | eta_min=args.min_lr) 238 | elif args.scheduler == "StepLR": 239 | scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=len(train_loader) * args.step_size, 240 | gamma=args.gamma) 241 | else: 242 | scheduler = torch.optim.lr_scheduler.__dict__[args.scheduler](optimizer) 243 | scheduler.last_epoch = (start_epoch - 1) * len(train_loader) 244 | 245 | if "opt_dict" in checkpoint.keys(): 246 | optimizer.load_state_dict(checkpoint["opt_dict"]) 247 | 248 | # Log recorder 249 | if "rec" in checkpoint.keys(): 250 | rec = checkpoint["rec"] 251 | else: 252 | rec = init_recorder() 253 | 254 | best_prec1 = checkpoint["best_acc1"] if "best_acc1" in checkpoint.keys() else 0.0 255 | 256 | # Save the checkpont with only the susbet. 257 | if args.save_path != "" and args.resume == "": 258 | save_checkpoint({"exp": exp, 259 | "subset": subset, 260 | "sel_args": selection_args}, 261 | os.path.join(args.save_path, checkpoint_name + ("" if model == args.model else model 262 | + "_") + "unknown.ckpt"), 0, 0.) 263 | 264 | for epoch in range(start_epoch, args.epochs): 265 | # train for one epoch 266 | train(train_loader, network, criterion, optimizer, scheduler, epoch, args, rec, if_weighted=if_weighted) 267 | 268 | # evaluate on validation set 269 | if args.test_interval > 0 and (epoch + 1) % args.test_interval == 0: 270 | prec1 = test(test_loader, network, criterion, epoch, args, rec) 271 | 272 | # remember best prec@1 and save checkpoint 273 | is_best = prec1 > best_prec1 274 | 275 | if is_best: 276 | best_prec1 = prec1 277 | if args.save_path != "": 278 | rec = record_ckpt(rec, epoch) 279 | save_checkpoint({"exp": exp, 280 | "epoch": epoch + 1, 281 | "state_dict": network.state_dict(), 282 | "opt_dict": optimizer.state_dict(), 283 | "best_acc1": best_prec1, 284 | "rec": rec, 285 | "subset": subset, 286 | "sel_args": selection_args}, 287 | os.path.join(args.save_path, checkpoint_name + ( 288 | "" if model == args.model else model + "_") + "unknown.ckpt"), 289 | epoch=epoch, prec=best_prec1) 290 | 291 | # Prepare for the next checkpoint 292 | if args.save_path != "": 293 | try: 294 | os.rename( 295 | os.path.join(args.save_path, checkpoint_name + ("" if model == args.model else model + "_") + 296 | "unknown.ckpt"), os.path.join(args.save_path, checkpoint_name + 297 | ("" if model == args.model else model + "_") + "%f.ckpt" % best_prec1)) 298 | except: 299 | save_checkpoint({"exp": exp, 300 | "epoch": args.epochs, 301 | "state_dict": network.state_dict(), 302 | "opt_dict": optimizer.state_dict(), 303 | "best_acc1": best_prec1, 304 | "rec": rec, 305 | "subset": subset, 306 | "sel_args": selection_args}, 307 | os.path.join(args.save_path, checkpoint_name + 308 | ("" if model == args.model else model + "_") + "%f.ckpt" % best_prec1), 309 | epoch=args.epochs - 1, 310 | prec=best_prec1) 311 | 312 | print('| Best accuracy: ', best_prec1, ", on model " + model if len(models) > 1 else "", end="\n\n") 313 | start_epoch = 0 314 | checkpoint = {} 315 | sleep(2) 316 | 317 | 318 | if __name__ == '__main__': 319 | main() 320 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.22 2 | prefetch_generator==1.0.1 3 | requests==2.25.1 4 | scipy==1.5.3 5 | torch==1.10.1 6 | torchvision==0.11.2 7 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import time, torch 2 | from argparse import ArgumentTypeError 3 | from prefetch_generator import BackgroundGenerator 4 | 5 | 6 | class WeightedSubset(torch.utils.data.Subset): 7 | def __init__(self, dataset, indices, weights) -> None: 8 | self.dataset = dataset 9 | assert len(indices) == len(weights) 10 | self.indices = indices 11 | self.weights = weights 12 | 13 | def __getitem__(self, idx): 14 | if isinstance(idx, list): 15 | return self.dataset[[self.indices[i] for i in idx]], self.weights[[i for i in idx]] 16 | return self.dataset[self.indices[idx]], self.weights[idx] 17 | 18 | 19 | def train(train_loader, network, criterion, optimizer, scheduler, epoch, args, rec, if_weighted: bool = False): 20 | """Train for one epoch on the training set""" 21 | batch_time = AverageMeter('Time', ':6.3f') 22 | losses = AverageMeter('Loss', ':.4e') 23 | top1 = AverageMeter('Acc@1', ':6.2f') 24 | 25 | # switch to train mode 26 | network.train() 27 | 28 | end = time.time() 29 | for i, contents in enumerate(train_loader): 30 | optimizer.zero_grad() 31 | if if_weighted: 32 | target = contents[0][1].to(args.device) 33 | input = contents[0][0].to(args.device) 34 | 35 | # Compute output 36 | output = network(input) 37 | weights = contents[1].to(args.device).requires_grad_(False) 38 | loss = torch.sum(criterion(output, target) * weights) / torch.sum(weights) 39 | else: 40 | target = contents[1].to(args.device) 41 | input = contents[0].to(args.device) 42 | 43 | # Compute output 44 | output = network(input) 45 | loss = criterion(output, target).mean() 46 | 47 | # Measure accuracy and record loss 48 | prec1 = accuracy(output.data, target, topk=(1,))[0] 49 | losses.update(loss.data.item(), input.size(0)) 50 | top1.update(prec1.item(), input.size(0)) 51 | 52 | # Compute gradient and do SGD step 53 | loss.backward() 54 | optimizer.step() 55 | scheduler.step() 56 | 57 | # Measure elapsed time 58 | batch_time.update(time.time() - end) 59 | end = time.time() 60 | 61 | if i % args.print_freq == 0: 62 | print('Epoch: [{0}][{1}/{2}]\t' 63 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 64 | 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 65 | 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format( 66 | epoch, i, len(train_loader), batch_time=batch_time, 67 | loss=losses, top1=top1)) 68 | 69 | record_train_stats(rec, epoch, losses.avg, top1.avg, optimizer.state_dict()['param_groups'][0]['lr']) 70 | 71 | 72 | def test(test_loader, network, criterion, epoch, args, rec): 73 | batch_time = AverageMeter('Time', ':6.3f') 74 | losses = AverageMeter('Loss', ':.4e') 75 | top1 = AverageMeter('Acc@1', ':6.2f') 76 | 77 | # Switch to evaluate mode 78 | network.eval() 79 | network.no_grad = True 80 | 81 | end = time.time() 82 | for i, (input, target) in enumerate(test_loader): 83 | target = target.to(args.device) 84 | input = input.to(args.device) 85 | 86 | # Compute output 87 | with torch.no_grad(): 88 | output = network(input) 89 | 90 | loss = criterion(output, target).mean() 91 | 92 | # Measure accuracy and record loss 93 | prec1 = accuracy(output.data, target, topk=(1,))[0] 94 | losses.update(loss.data.item(), input.size(0)) 95 | top1.update(prec1.item(), input.size(0)) 96 | 97 | # Measure elapsed time 98 | batch_time.update(time.time() - end) 99 | end = time.time() 100 | 101 | if i % args.print_freq == 0: 102 | print('Test: [{0}/{1}]\t' 103 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 104 | 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 105 | 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format( 106 | i, len(test_loader), batch_time=batch_time, loss=losses, 107 | top1=top1)) 108 | 109 | print(' * Prec@1 {top1.avg:.3f}'.format(top1=top1)) 110 | 111 | network.no_grad = False 112 | 113 | record_test_stats(rec, epoch, losses.avg, top1.avg) 114 | return top1.avg 115 | 116 | 117 | class AverageMeter(object): 118 | """Computes and stores the average and current value""" 119 | 120 | def __init__(self, name, fmt=':f'): 121 | self.name = name 122 | self.fmt = fmt 123 | self.reset() 124 | 125 | def reset(self): 126 | self.val = 0 127 | self.avg = 0 128 | self.sum = 0 129 | self.count = 0 130 | 131 | def update(self, val, n=1): 132 | self.val = val 133 | self.sum += val * n 134 | self.count += n 135 | self.avg = self.sum / self.count 136 | 137 | def __str__(self): 138 | fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' 139 | return fmtstr.format(**self.__dict__) 140 | 141 | 142 | def accuracy(output, target, topk=(1,)): 143 | """Computes the accuracy over the k top predictions for the specified values of k""" 144 | with torch.no_grad(): 145 | maxk = max(topk) 146 | batch_size = target.size(0) 147 | 148 | _, pred = output.topk(maxk, 1, True, True) 149 | pred = pred.t() 150 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 151 | 152 | res = [] 153 | for k in topk: 154 | correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) 155 | res.append(correct_k.mul_(100.0 / batch_size)) 156 | return res 157 | 158 | 159 | def str_to_bool(v): 160 | # Handle boolean type in arguments. 161 | if isinstance(v, bool): 162 | return v 163 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 164 | return True 165 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 166 | return False 167 | else: 168 | raise ArgumentTypeError('Boolean value expected.') 169 | 170 | 171 | def save_checkpoint(state, path, epoch, prec): 172 | print("=> Saving checkpoint for epoch %d, with Prec@1 %f." % (epoch, prec)) 173 | torch.save(state, path) 174 | 175 | 176 | def init_recorder(): 177 | from types import SimpleNamespace 178 | rec = SimpleNamespace() 179 | rec.train_step = [] 180 | rec.train_loss = [] 181 | rec.train_acc = [] 182 | rec.lr = [] 183 | rec.test_step = [] 184 | rec.test_loss = [] 185 | rec.test_acc = [] 186 | rec.ckpts = [] 187 | return rec 188 | 189 | 190 | def record_train_stats(rec, step, loss, acc, lr): 191 | rec.train_step.append(step) 192 | rec.train_loss.append(loss) 193 | rec.train_acc.append(acc) 194 | rec.lr.append(lr) 195 | return rec 196 | 197 | 198 | def record_test_stats(rec, step, loss, acc): 199 | rec.test_step.append(step) 200 | rec.test_loss.append(loss) 201 | rec.test_acc.append(acc) 202 | return rec 203 | 204 | 205 | def record_ckpt(rec, step): 206 | rec.ckpts.append(step) 207 | return rec 208 | 209 | 210 | class DataLoaderX(torch.utils.data.DataLoader): 211 | def __iter__(self): 212 | return BackgroundGenerator(super().__iter__()) 213 | --------------------------------------------------------------------------------