├── .DS_Store ├── README.md ├── baselines ├── baseline.py ├── conse.py ├── costa.py ├── vgse.py └── wDAEGNN │ ├── .gitignore │ ├── Dockerfile │ ├── LICENCE │ ├── README.md │ ├── config │ ├── imagenet_ResNet10CosineClassifier.py │ └── imagenet_wDAE │ │ └── imagenet_ResNet10CosineClassifier_wDAE_GNN.py │ ├── data │ ├── IMAGENET_LOWSHOT_BENCHMARK_CATEGORY_SPLITS.json │ └── mini_imagenet_split │ │ ├── test.csv │ │ ├── train.csv │ │ ├── train_val_test_classnames.csv │ │ └── val.csv │ ├── low_shot_learning │ ├── __init__.py │ ├── algorithms │ │ ├── __init__.py │ │ ├── algorithm.py │ │ ├── classification │ │ │ ├── __init__.py │ │ │ ├── classification.py │ │ │ └── utils.py │ │ ├── fewshot │ │ │ ├── __init__.py │ │ │ ├── fewshot.py │ │ │ ├── imagenet_lowshot.py │ │ │ └── utils.py │ │ └── utils │ │ │ └── save_features.py │ ├── architectures │ │ ├── __init__.py │ │ ├── classifiers │ │ │ ├── __init__.py │ │ │ ├── cosine_classifier_with_DAE_weight_generator.py │ │ │ ├── cosine_classifier_with_weight_generator.py │ │ │ ├── few_shot_classification_with_prototypes.py │ │ │ ├── matching_network_head.py │ │ │ ├── prototypical_network_head.py │ │ │ ├── utils.py │ │ │ └── weights_denoising_autoencoder.py │ │ ├── feature_extractors │ │ │ ├── __init__.py │ │ │ ├── dumb_feat.py │ │ │ ├── resnet_feat.py │ │ │ ├── utils.py │ │ │ └── wide_resnet.py │ │ └── tools.py │ ├── dataloaders │ │ ├── __init__.py │ │ ├── basic_dataloaders.py │ │ └── dataloader_fewshot.py │ ├── datasets │ │ ├── __init__.py │ │ ├── imagenet_dataset.py │ │ └── mini_imagenet_dataset.py │ └── utils.py │ ├── scripts │ ├── __init__.py │ ├── lowshot_evaluate.py │ ├── lowshot_train_stage1.py │ ├── lowshot_train_stage2.py │ └── save_features.py │ └── setup.py ├── embeddings ├── AWA2_classnames.npy ├── CUB_classnames.npy ├── ImageNet1K_classnames.txt ├── SUN_classnames.npy ├── conceptnet │ ├── AWA2_cn_sum_list.npy │ ├── CUB_cn_sum_list.npy │ ├── SUN_cn_sum_list.npy │ └── imgnet_cn_list.npy └── wiki2vec │ ├── AWA2_wiki_sum_list.npy │ ├── CUB_wiki_sum_list.npy │ ├── SUN_wiki_sum_list.npy │ └── imgnet_wiki_list.npy ├── environment.yml ├── figs ├── icis-framework.png └── model-fig.png ├── joint_latent.py ├── main.py ├── regressor.py ├── scripts ├── ablation_data_efficieny_CUB_full.sh ├── table1_awa2.sh ├── table1_cub.sh ├── table1_cub_5seeds.sh ├── table1_sun.sh ├── table2.sh ├── table3_cub.sh ├── table4.sh └── table5_left.sh └── utility ├── ImageNet1K_classnames.txt ├── eval_imagenet.py ├── feature_extraction ├── extract_util.py └── feature_extract.py ├── load_wordembeddings.py ├── model_bases.py ├── plot_data_ablation.py ├── plot_prediction_bins.py ├── train_base.py └── util.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/.DS_Store -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Image-free Zero-Shot Learning 3 | This is the official repository for [Image-free Classifier Injection for Zero-Shot Classification](https://arxiv.org/abs/2308.10599), published at ICCV 2023. 4 | 5 |

6 | 7 |

8 | 9 | 10 | ## Prerequisites 11 | 12 | 1. Extracted fine-tuned CUB, AWA2, and SUN features are available [here](https://github.com/uqzhichen/SDGZSL) (clicking on "datasets" under the "Usage" section will lead you to [this zip file on Google Drive](https://drive.google.com/file/d/1KxFC6T_kGKCNx1JyX2FOaSimA0DOcU_I/view)) Features are originally from [this paper](https://github.com/akshitac8/tfvaegan). 13 | 14 | 2. Pre-trained ResNet101 features for CUB, AWA2, and SUN datasets are available [here](https://drive.google.com/drive/folders/18egafUzqWp7kavtBSk78O0R2L1mx0dLX?usp=sharing). We extract the features using the current version (2) of ResNet101 available via `torchvision`. See `/utility/feature_extraction/feature_extract.py`. We use the [proposed datasplits](https://www.mpi-inf.mpg.de/departments/computer-vision-and-machine-learning/research/zero-shot-learning/zero-shot-learning-the-good-the-bad-and-the-ugly) ("Proposed Split Version 2.0"). Image datasets are avaiable here for [CUB](https://www.vision.caltech.edu/datasets/cub_200_2011/), [AWA2](https://cvml.ista.ac.at/AwA2/), and [SUN](https://groups.csail.mit.edu/vision/SUN/hierarchy.html). 15 | 16 | 3. Modify default "dataroot" and "rootpath" in main.py (to point to your data path and where to save outputs, respectively). 17 | 18 | 19 | ## Usage 20 | 21 | Scripts are supplied (in the scripts folder) for experiments behind the various tables. By default, these scripts will run a single random seed and include training/loading the base classiciation model and evaluation of the specified I-(G)ZSL method. Seed number can be increased either with the --numSeeds arg, or by supplying fixed seeds with the --manualSeed arg (the latter will also speed up experiments, as base classification models can be reused). 22 | 23 | A .yml file is supplied with the environment. (Replace version of `cudatoolkit` with the appropriate CUDA version of your system. Our experiments were done using CUDA/11.1 and CUDNN/8.1). 24 | 25 | Classnames for ImageNet1K are available in `/utility/`. 26 | 27 | Example usage apart from scripts include: 28 | 29 | ```bash 30 | $ python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope 31 | ``` 32 | 33 | ## Citation 34 | 35 | If you use this code, please cite 36 | ``` 37 | @InProceedings{Christensen_2023_ICCV, 38 | author = {Christensen, Anders and Mancini, Massimiliano and Koepke, A. Sophia and Winther, Ole and Akata, Zeynep}, 39 | title = {Image-Free Classifier Injection for Zero-Shot Classification}, 40 | booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, 41 | month = {October}, 42 | year = {2023}, 43 | pages = {19072-19081} 44 | } 45 | ``` 46 | 47 | **Note**: The wDAE-GNN, Sub.Reg. and wAVG/SMO implementations are imported from their respective repositories and adapted to our usecase. If you find those parts useful, please consider citing them. 48 | 49 | ## Contact 50 | 51 | Code will receive minor updates. Questions etc. can be sent by email to 52 | 53 | Anders Christensen 54 | 55 | andchri@dtu.dk 56 | 57 | Technical University of Denmark & University of Tübingen 58 | 59 |

60 | 61 |

62 | -------------------------------------------------------------------------------- /baselines/baseline.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import utility.util as util 3 | from regressor import REGRESSOR 4 | import torchvision 5 | import torch.nn as nn 6 | import os 7 | 8 | class Baseline(REGRESSOR): 9 | def __init__(self, opt, **kwargs): 10 | super().__init__(opt=opt, **kwargs) 11 | 12 | def evaluate_weights(self, pred_weights): 13 | self.unseen_model.fc.weight.data[:, :] = pred_weights[:, :self.input_dim] 14 | self.unseen_model.fc.bias.data[:] = pred_weights[:, self.input_dim] 15 | 16 | self.ext_model.fc.weight.data[len(self.seenclasses):, :] = pred_weights[:, :self.input_dim] 17 | self.ext_model.fc.bias.data[len(self.seenclasses):] = pred_weights[:, self.input_dim] 18 | 19 | if self.opt.zst: 20 | self.acc_target, self.acc_zst_unseen = self.val_zst() 21 | else: 22 | self.acc_gzsl, self.acc_seen, self.acc_unseen, self.H, self.acc_unseen_zsl = self.val_gzsl() -------------------------------------------------------------------------------- /baselines/conse.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | import numpy as np 5 | import utility.util as util 6 | import copy 7 | import random 8 | import sys 9 | import os 10 | import math 11 | import torchvision 12 | from torch.utils.data import Dataset, DataLoader 13 | import utility.model_bases as model 14 | from regressor import REGRESSOR 15 | 16 | class ConSE(REGRESSOR): 17 | def __init__(self, opt, **kwargs): 18 | super().__init__(opt=opt, **kwargs) 19 | self.opt = opt 20 | if self.cuda: 21 | self.model.cuda() 22 | 23 | if self.opt.zst: 24 | data = self.test_unseen_feature 25 | target = self.test_unseen_label 26 | 27 | self.acc_target = self.conse_val(self.model, data, 28 | util.map_label(target, self.unseenclasses-len(self.seenclasses)), 29 | util.map_label(self.unseenclasses-len(self.seenclasses), self.unseenclasses-len(self.seenclasses)), 30 | train_attributes=self.attribute[self.seenclasses], test_attributes=self.attribute[self.unseenclasses]) 31 | 32 | self.acc_zst_unseen = self.conse_val(self.model, data, 33 | util.map_label(target, self.unseenclasses-len(self.seenclasses)) + len(self.seenclasses), 34 | util.map_label_extend(self.unseenclasses, self.unseenclasses, self.seenclasses), 35 | train_attributes=self.attribute[self.seenclasses], test_attributes=torch.cat((self.attribute[self.seenclasses], self.attribute[self.unseenclasses]))) 36 | 37 | else: 38 | # GZSL 39 | self.acc_gzsl = self.conse_val(self.model, torch.cat((self.test_seen_feature, self.test_unseen_feature), 0), 40 | torch.cat((util.map_label(self.test_seen_label, self.seenclasses), util.map_label_extend(self.test_unseen_label, self.unseenclasses, self.seenclasses)), 0), 41 | torch.cat((util.map_label(self.seenclasses, self.seenclasses) , util.map_label_extend(self.unseenclasses, self.unseenclasses, self.seenclasses)), 0), 42 | train_attributes=self.attribute[self.seenclasses], test_attributes=torch.cat((self.attribute[self.seenclasses], self.attribute[self.unseenclasses]))) 43 | self.acc_seen = self.conse_val(self.model, self.test_seen_feature, util.map_label(self.test_seen_label, self.seenclasses), util.map_label(self.seenclasses, self.seenclasses), train_attributes=self.attribute[self.seenclasses], test_attributes=torch.cat((self.attribute[self.seenclasses], self.attribute[self.unseenclasses]))) 44 | self.acc_unseen = self.conse_val(self.model, self.test_unseen_feature, util.map_label(self.test_unseen_label, self.unseenclasses), util.map_label(self.unseenclasses, self.unseenclasses), train_attributes=self.attribute[self.seenclasses], test_attributes=torch.cat((self.attribute[self.seenclasses], self.attribute[self.unseenclasses]))) 45 | self.H = 2*self.acc_seen*self.acc_unseen / (self.acc_seen+self.acc_unseen) 46 | # ZSL 47 | self.acc_unseen_zsl = self.conse_val(self.model, self.test_unseen_feature, util.map_label(self.test_unseen_label, self.unseenclasses), util.map_label(self.unseenclasses, self.unseenclasses), train_attributes=self.attribute[self.seenclasses], test_attributes=self.attribute[self.unseenclasses]) 48 | 49 | def conse_val(self, model, test_X, test_label, target_classes, train_attributes, test_attributes): 50 | """ Predict semantic embedding for input, then compare to class embeddings (attributes) """ 51 | cos = nn.CosineSimilarity(dim=1, eps=1e-8) 52 | soft = torch.nn.Softmax(dim=1) 53 | if self.cuda: 54 | train_attributes = train_attributes.cuda() 55 | test_attributes = test_attributes.cuda() 56 | start = 0 57 | ntest = test_X.size()[0] 58 | predicted_label = torch.LongTensor(test_label.size()) 59 | for i in range(0, ntest, self.batch_size): 60 | end = min(ntest, start+self.batch_size) 61 | if self.cuda: 62 | logits = model(Variable(test_X[start:end].cuda())) 63 | else: 64 | logits = model(Variable(test_X[start:end])) 65 | 66 | if self.opt.class_reduction_ablation: 67 | probs = soft(logits[:, self.perm]) 68 | pred_embeds = torch.sum(train_attributes[self.perm] * probs.unsqueeze(-1), dim=1) 69 | else: 70 | probs = soft(logits) 71 | pred_embeds = torch.sum(train_attributes * probs.unsqueeze(-1), dim=1) 72 | 73 | output = [] 74 | for pred_embed in pred_embeds: 75 | sims = cos(pred_embed[None, :], test_attributes) 76 | _, idx = torch.max(sims, dim=0) 77 | output.append(idx) 78 | 79 | output = torch.stack(output) 80 | predicted_label[start:end] = output 81 | start = end 82 | 83 | acc, acc_per_class, prediction_matrix = self.compute_per_class_acc_gzsl(test_label, predicted_label, target_classes) 84 | if self.opt.save_pred_matrix: 85 | torch.save(acc_per_class, opt.rootpath + '/outputs/' + self.opt.dataset + self.opt.image_embedding + '_len_test_' + str(len(test_X)) + '_len_tar_' + str(len(target_classes)) + '.pt') 86 | torch.save(prediction_matrix, opt.rootpath + '/outputs/' + self.opt.dataset + self.opt.image_embedding + '_len_test_' + str(len(test_X)) + '_len_tar_' + str(len(target_classes)) + '.pt') 87 | 88 | return acc 89 | -------------------------------------------------------------------------------- /baselines/costa.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | import numpy as np 5 | import utility.util as util 6 | import copy 7 | import random 8 | import sys 9 | import os 10 | import math 11 | import torchvision 12 | from torch.utils.data import Dataset, DataLoader 13 | import utility.model_bases as model 14 | from regressor import REGRESSOR 15 | 16 | class COSTA(REGRESSOR): 17 | def __init__(self, opt, **kwargs): 18 | super().__init__(opt=opt, **kwargs) 19 | self.opt = opt 20 | self.unseen_model = model.LINEAR(self.input_dim, len(self.unseenclasses)) 21 | self.ext_model = model.LINEAR(self.input_dim, self.nclass) 22 | if self.cuda: 23 | self.unseen_model.cuda() 24 | self.ext_model.cuda() 25 | 26 | self.ext_model.fc.weight.data[:len(self.seenclasses), :] = self.target_weights[:, :2048] 27 | self.ext_model.fc.bias.data[:len(self.seenclasses)] = self.target_weights[:, 2048] 28 | for n, unseen_att in enumerate(self.attribute[self.unseenclasses]): 29 | cooccs = unseen_att.unsqueeze(0) * self.attribute[self.seenclasses] 30 | norm_coocs = torch.sum(cooccs, dim=-1) / (cooccs.sum() + 10e-5) 31 | if self.opt.cuda: 32 | norm_coocs = norm_coocs.cuda() 33 | pred_weights = torch.sum(norm_coocs[:, None]*self.target_weights, dim=0) 34 | 35 | self.unseen_model.fc.weight.data[n, :] = pred_weights[:-1] 36 | self.unseen_model.fc.bias.data[n] = pred_weights[-1] 37 | 38 | self.ext_model.fc.weight.data[len(self.seenclasses) + n, :] = pred_weights[:-1] 39 | self.ext_model.fc.bias.data[len(self.seenclasses) + n] = pred_weights[-1] 40 | 41 | # GZSL 42 | if self.opt.zst: 43 | self.acc_target, self.acc_zst_unseen = self.val_zst() 44 | 45 | else: 46 | self.acc_gzsl, self.acc_seen, self.acc_unseen, self.H, self.acc_unseen_zsl = self.val_gzsl() 47 | -------------------------------------------------------------------------------- /baselines/vgse.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import torchvision 4 | 5 | from baselines.baseline import Baseline 6 | from utility.model_bases import LINEAR 7 | import utility.util as util 8 | 9 | class VGSE_CRM(Baseline): 10 | """ Baseline inspired by the Class Relation Module (CRM) from 11 | VGSE: Visually-Grounded Semantic Embeddings for Zero-Shot Learning 12 | by Xu et al. Contains implementation of both WAvg and SMO CRM. """ 13 | def __init__(self, opt, **kwargs): 14 | super().__init__(opt=opt, **kwargs) 15 | self.opt = opt 16 | 17 | if opt.vgse_baseline == 'wavg': 18 | pred_weights = self.WAvg(opt.vgse_nbs, opt.vgse_eta) 19 | elif opt.vgse_baseline == 'smo': 20 | pred_weights = self.SMO(alpha=opt.vgse_alpha) 21 | 22 | self.unseen_model = LINEAR(self.input_dim, len(self.unseenclasses)) 23 | self.ext_model = LINEAR(self.input_dim, self.nclass) 24 | if self.cuda: 25 | self.unseen_model.cuda() 26 | self.ext_model.cuda() 27 | 28 | self.ext_model.fc.weight.data[:len(self.seenclasses), :] = self.target_weights[:, :2048] 29 | self.ext_model.fc.bias.data[:len(self.seenclasses)] = self.target_weights[:, 2048] 30 | 31 | self.evaluate_weights(pred_weights) 32 | 33 | def WAvg(self, num_neighbours=5, eta=5): 34 | """ Implementation of Weighted Average (WAvg) CRM. 35 | Hyperparameters (num_neighbours and eta) taken from paper. """ 36 | 37 | unseen_att_sims = np.zeros((len(self.unseenclasses), len(self.seenclasses))) 38 | for i in range(len(self.unseenclasses)): 39 | for j in range(len(self.seenclasses)): 40 | unseen_att_sims[i, j] = torch.exp(-eta*torch.dist(self.attribute[self.unseenclasses[i]], self.attribute[self.seenclasses[j]])) 41 | unseen_att_sims = torch.from_numpy(unseen_att_sims).float() 42 | 43 | if self.opt.cuda: 44 | unseen_att_sims = unseen_att_sims.cuda() 45 | 46 | pred_weights = torch.matmul(unseen_att_sims, self.target_weights) 47 | 48 | return pred_weights 49 | 50 | def SMO(self, alpha=0, eps=10e-8): 51 | """ Implementation of Similarity Matrix Optimization (SMO) CRM """ 52 | assert alpha in [-1, 0] 53 | 54 | loss_fnc = torch.nn.MSELoss() 55 | reg = torch.nn.L1Loss() 56 | sum_constraint = torch.ones(1)[0] 57 | 58 | if alpha == 0: 59 | lr = 1000 60 | domain_fnc = torch.nn.Softmax(dim=0) 61 | else: # alpha = -1 62 | lr = 10e-6 63 | domain_fnc = torch.nn.Tanh() 64 | 65 | all_pred_weights = torch.zeros(len(self.unseenclasses), self.target_weights.size(1)) 66 | if self.cuda: 67 | all_pred_weights = all_pred_weights.cuda() 68 | sum_constraint = sum_constraint.cuda() 69 | self.attribute = self.attribute.cuda() 70 | 71 | for i in range(len(self.unseenclasses)): 72 | converged = False 73 | best_loss = 1000 74 | prev_loss = 1000 75 | counter = 0 76 | 77 | smo = SMOModel(domain_fnc=domain_fnc, dim=len(self.seenclasses)) 78 | if self.cuda: 79 | smo.cuda() 80 | 81 | optim = torch.optim.SGD(smo.parameters(), lr=lr) 82 | 83 | while not converged: 84 | optim.zero_grad() 85 | pred_att = smo(self.attribute[self.seenclasses]) 86 | loss = loss_fnc(pred_att, self.attribute[self.unseenclasses[i]]) - alpha * reg(torch.sum(smo.domain_fnc(smo.r)), sum_constraint) 87 | loss.backward() 88 | optim.step() 89 | 90 | if loss < best_loss: 91 | best_loss = loss 92 | best_r = smo.r 93 | if torch.abs(prev_loss - loss) < eps: 94 | counter += 1 95 | else: 96 | counter = 0 97 | else: 98 | counter += 1 99 | 100 | if counter > 10: 101 | converged = True 102 | 103 | prev_loss = loss 104 | 105 | pred_weights = torch.sum(domain_fnc(best_r)[:, None] * self.target_weights, dim=0) 106 | all_pred_weights[i,:] = pred_weights 107 | 108 | return all_pred_weights 109 | 110 | 111 | class SMOModel(torch.nn.Module): 112 | def __init__(self, domain_fnc, dim): 113 | super().__init__() 114 | self.domain_fnc = domain_fnc 115 | self.dim = dim 116 | self.r = torch.nn.parameter.Parameter(data=torch.normal(mean=torch.zeros(dim), std=2/dim), requires_grad=True) 117 | 118 | def forward(self, data): 119 | pred_att = torch.sum(self.domain_fnc(self.r)[:, None] * data, dim=0) 120 | return pred_att 121 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/.gitignore: -------------------------------------------------------------------------------- 1 | ./experiments 2 | ./experiments/* 3 | ./datasets 4 | *~ 5 | *.pyc 6 | *.pkl 7 | data/IMAGENET 8 | .remote-sync.json 9 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.0-devel-ubuntu18.04 2 | 3 | RUN yes | unminimize 4 | 5 | RUN apt-get update && apt-get install -y wget bzip2 6 | RUN wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh 7 | RUN bash miniconda.sh -b -p /opt/conda && \ 8 | rm miniconda.sh 9 | ENV PATH="/opt/conda/bin:${PATH}" 10 | RUN conda config --set always_yes yes 11 | 12 | RUN pip install https://download.pytorch.org/whl/cu100/torch-1.1.0-cp37-cp37m-linux_x86_64.whl 13 | RUN pip install https://download.pytorch.org/whl/cu100/torchvision-0.3.0-cp37-cp37m-linux_x86_64.whl 14 | 15 | RUN pip install tensorboardX scikit-image tqdm pyyaml easydict future h5py torchnet pip 16 | RUN apt-get install unzip 17 | 18 | COPY ./ ./wDAE_GNN_FewShot 19 | RUN pip install -e ./wDAE_GNN_FewShot 20 | 21 | WORKDIR ./wDAE_GNN_FewShot 22 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/LICENCE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Spyridon Gidaris 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/README.md: -------------------------------------------------------------------------------- 1 | # *Generating Classification Weights with GNN Denoising Autoencoders for Few-Shot Learning* 2 | 3 | The current project page provides [pytorch](http://pytorch.org/) code that implements the following CVPR2019 paper (accepted as oral): 4 | **Title:** "Generating Classification Weights with GNN Denoising Autoencoders for Few-Shot Learning" 5 | **Authors:** Spyros Gidaris, Nikos Komodakis 6 | **Code:** https://github.com/gidariss/wDAE_GNN_FewShot 7 | 8 | **Abstract:** 9 | Given an initial recognition model already trained on a set of base classes, the goal of this work is to develop a meta-model for few-shot learning. The meta-model, given as input some novel classes with few training examples per class, must properly adapt the existing recognition model into a new model that can correctly classify in a unified way both the novel and the base classes. To accomplish this goal it must learn to output the appropriate classification weight vectors for those two types of classes. To build our meta-model we make use of two main innovations: we propose the use of a Denoising Autoencoder network (DAE) that (during training) takes as input a set of classification weights corrupted with Gaussian noise and learns to reconstruct the target-discriminative classification weights. In this case, the injected noise on the classification weights serves the role of regularizing the weight generating meta-model. Furthermore, in order to capture the co-dependencies between different classes in a given task instance of our meta-model, we propose to implement the DAE model as a Graph Neural Network (GNN). In order to verify the efficacy of our approach, we extensively evaluate it on ImageNet based few-shot benchmarks and we report strong results that surpass prior approaches. 10 | 11 | 12 | ### License 13 | This code is released under the MIT License (refer to the LICENSE file for details). 14 | 15 | ## Contents: 16 | **(1)** Code for running the ImageNet-based experiments with the wDAE-GNN-based few-shot model. 17 | 18 | **(2)** Code for running the MiniImageNet-based experiments: would be ready soon. 19 | 20 | ## Preparation 21 | 22 | ### Pre-requisites 23 | * Python 3.7 24 | * Pytorch >= 1.0.0 25 | * CUDA 10.0 or higher 26 | 27 | ### Installation 28 | 29 | **(1)** Clone the repo: 30 | ```bash 31 | $ git clone https://github.com/gidariss/wDAE_GNN_FewShot 32 | ``` 33 | 34 | **(2)** Install this repository and the dependencies using pip: 35 | ```bash 36 | $ pip install -e ./wDAE_GNN_FewShot 37 | ``` 38 | 39 | With this, you can edit the wDAE_GNN_FewShot code on the fly and import function 40 | and classes of wDAE_GNN_FewShot in other project as well. 41 | 42 | **(3)** Optional. To uninstall this package, run: 43 | ```bash 44 | $ pip uninstall wDAE_GNN_FewShot 45 | ``` 46 | 47 | **(4)** Create *dataset* and *experiment* directories: 48 | ```bash 49 | $ cd wDAE_GNN_FewShot 50 | $ mkdir ./datasets 51 | $ mkdir ./experiments 52 | ``` 53 | 54 | You can take a look at the [Dockerfile](./Dockerfile) if you are uncertain about steps to install this project. 55 | 56 | ## Running experiments on the ImageNet based few-shot benchmark 57 | 58 | Here I provide instructions for training and evaluating our method on the ImageNet based low-shot benchmark proposed by Bharath and Girshick [1]. 59 | 60 | **(1)** Download the ImageNet dataset and set in [imagenet_dataset.py](https://github.com/gidariss/wDAE_GNN_FewShot/blob/master/low_shot_learning/datasets/imagenet_dataset.py#L19) the path to where the dataset resides in your machine. 61 | 62 | **(2)** Train a ResNet10 based recognition model with cosine similarity-based classifier [3]: 63 | ```bash 64 | $ cd wDAE_GNN_FewShot # enter the wDAE_GNN_FewShot directory. 65 | $ python scripts/lowshot_train_stage1.py --config=imagenet_ResNet10CosineClassifier 66 | ``` 67 | You can download the already trained by us recognition model from [here](https://github.com/gidariss/wDAE_GNN_FewShot/releases/download/0.1/imagenet_ResNet10CosineClassifier.zip). In that case, place the model inside the './experiments' directory with the name './experiments/imagenet_ResNet10CosineClassifier'. 68 | ```bash 69 | # Run from the wDAE_GNN_FewShot directory 70 | $ cd ./experiments 71 | $ wget https://github.com/gidariss/wDAE_GNN_FewShot/releases/download/0.1/imagenet_ResNet10CosineClassifier.zip 72 | $ unzip imagenet_ResNet10CosineClassifier.zip 73 | $ cd .. 74 | ``` 75 | 76 | **(3)** Extract and save the ResNet10 features (with the above model; see step (2)) from images of the ImageNet dataset: 77 | ```bash 78 | # Run from the wDAE_GNN_FewShot directory 79 | # Extract features from the validation image split of the Imagenet. 80 | $ python scripts/save_features.py --config=imagenet_ResNet10CosineClassifier --split='val' 81 | # Extract features from the training image split of the Imagenet. 82 | $ python scripts/save_features.py --config=imagenet_ResNet10CosineClassifier --split='train' 83 | ``` 84 | The features will be saved on './datasets/feature_datasets/imagenet_ResNet10CosineClassifier'. 85 | You can download the pre-computed features from [here](https://mega.nz/#!bsVlzQBR!MNADfBM4JX2KgWG13oL0pXhHCQqvkPRD4MfP_aUOtXg). In that case, place the downloaded features in './datasets/' with the following structure: 86 | ``` 87 | # Features of the validation images of ImageNet. 88 | ./datasets/feature_datasets/imagenet_ResNet10CosineClassifier/ImageNet_val.h5 89 | # Features of the training images of ImageNet. 90 | ./datasets/feature_datasets/imagenet_ResNet10CosineClassifier/ImageNet_train.h5 91 | ``` 92 | 93 | 94 | **(4)** Train the Graph Neural Network Denoising AutoEncoder few-shot model (wDAE_GNN): 95 | ```bash 96 | # Run from the wDAE_GNN_FewShot directory 97 | # Training the wDAE-GNN few-shot model. 98 | $ python scripts/lowshot_train_stage2.py --config=imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN 99 | ``` 100 | The model will be saved on 'wDAE_GNN_FewShot/experiments/imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN'. 101 | Otherwise, you can download the pre-trained few-shot model from 102 | [here](https://github.com/gidariss/wDAE_GNN_FewShot/releases/download/0.1/imagenet_ResNet10CosineClassifier_wDAE_GNN.zip). 103 | In that case, place the downloaded model in 104 | 'wDAE_GNN_FewShot/experiments/imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN'. 105 | ```bash 106 | # Run from the wDAE_GNN_FewShot directory 107 | $ cd experiments # enter the wDAE_GNN_FewShot directory. 108 | $ mkdir imagenet_wDAE 109 | $ cd imagenet_wDAE 110 | $ wget https://github.com/gidariss/wDAE_GNN_FewShot/releases/download/0.1/imagenet_ResNet10CosineClassifier_wDAE_GNN.zip 111 | $ unzip imagenet_ResNet10CosineClassifier_wDAE_GNN.zip 112 | $ cd ../../ 113 | ``` 114 | 115 | 116 | **(5)** Evaluate the above trained model: 117 | ```bash 118 | # Run from the wDAE_GNN_FewShot directory 119 | # Evaluate the model on the 1-shot setting. 120 | $ python scripts/lowshot_evaluate.py --config=imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN --testset --nexemplars=1 --step_size=1.0 121 | # Expected output: 122 | # ==> Top 5 Accuracies: [Novel: 47.99 | Base: 93.39 | All 59.02 ] 123 | 124 | # Evaluate the model on the 2-shot setting. 125 | $ python scripts/lowshot_evaluate.py --config=imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN --testset --nexemplars=2 --step_size=1.0 126 | # Expected output: 127 | # ==> Top 5 Accuracies: [Novel: 59.54 | Base: 93.39 | All 66.22 ] 128 | 129 | # Evaluate the model on the 5-shot setting. 130 | $ python scripts/lowshot_evaluate.py --config=imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN --testset --nexemplars=5 --step_size=0.6 131 | # Expected output: 132 | # ==> Top 5 Accuracies: [Novel: 70.23 | Base: 93.44 | All 73.20 ] 133 | 134 | # Evaluate the model on the 10-shot setting. 135 | $ python scripts/lowshot_evaluate.py --config=imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN --testset --nexemplars=10 --step_size=0.4 136 | # Expected output: 137 | # ==> Top 5 Accuracies: [Novel: 74.95 | Base: 93.37 | All 76.09 ] 138 | 139 | # Evaluate the model on the 20-shot setting. 140 | $ python scripts/lowshot_evaluate.py --config=imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN --testset --nexemplars=20 --step_size=0.2 141 | # Expected output: 142 | # ==> Top 5 Accuracies: [Novel: 77.77 | Base: 93.33 | All 77.54 ] 143 | ``` 144 | 145 | ## Experimental results on the ImageNet based Low-shot benchmark 146 | 147 | Here I provide the experiment results of the few-shot model trained with this code on the ImageNet-based low-shot [1] using the evaluation metrics proposed by [2]. 148 | Note that after cleaning and refactoring the implementation code of the paper 149 | and re-running the experiments, the results that we got are slightly different. 150 | 151 | ### Top-5 classification accuracy of wDAE-GNN model. 152 | | wDAE-GNN | Novel | All | 153 | | ------------------------------------ | ---------------:|----------------:| 154 | | 1-shot results | 47.99% | 59.02% | 155 | | 2-shot results | 59.54% | 66.22% | 156 | | 5-shot results | 70.23% | 73.20% | 157 | | 10-shot results | 74.95% | 76.09% | 158 | | 20-shot results | 77.77% | 77.54% | 159 | 160 | ### References 161 | ``` 162 | [1] B. Hariharan and R. Girshick. Low-shot visual recognition by shrinking and hallucinating features. 163 | [2] Y.-X. Wang and R. Girshick, M. Hebert, B. Hariharan. Low-shot learning from imaginary data. 164 | [3] S. Gidaris and N. Komodakis. Dynamic few-shot visual learning without forgetting. 165 | ``` 166 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/config/imagenet_ResNet10CosineClassifier.py: -------------------------------------------------------------------------------- 1 | config = {} 2 | # set the parameters related to the training and testing set 3 | 4 | nKbase = 389 5 | 6 | data_train_opt = {} 7 | data_train_opt['nKnovel'] = 0 8 | data_train_opt['nKbase'] = nKbase 9 | data_train_opt['nExemplars'] = 0 10 | data_train_opt['nTestNovel'] = 0 11 | data_train_opt['nTestBase'] = 400 12 | data_train_opt['batch_size'] = 1 13 | data_train_opt['epoch_size'] = 4000 14 | config['data_train_opt'] = data_train_opt 15 | 16 | config['max_num_epochs'] = 100 17 | 18 | networks = {} 19 | net_optim_paramsF = { 20 | 'optim_type': 'sgd', 'lr': 0.1, 'momentum':0.9, 'weight_decay': 5e-4, 21 | 'nesterov': True, 22 | 'LUT_lr':[(30, 0.1), (60, 0.01), (90, 0.001), (100, 0.0001)]} 23 | networks['feature_extractor'] = { 24 | 'def_file': 'feature_extractors.resnet_feat.py', 'pretrained': None, 25 | 'opt': {'userelu': False, 'restype': 'ResNet10'}, 26 | 'optim_params': net_optim_paramsF} 27 | 28 | net_optim_paramsC = { 29 | 'optim_type': 'sgd', 'lr': 0.1, 'momentum':0.9, 'weight_decay': 5e-4, 30 | 'nesterov': True, 31 | 'LUT_lr':[(30, 0.1), (60, 0.01), (90, 0.001), (100, 0.0001)]} 32 | net_optionsC = { 33 | 'num_features':512, 34 | 'num_classes': 1000, 35 | 'global_pooling': False, 36 | 'scale_cls': 10, 37 | 'learn_scale': True} 38 | networks['classifier'] = { 39 | 'def_file': 'classifiers.cosine_classifier_with_weight_generator.py', 40 | 'pretrained': None, 'opt': net_optionsC, 'optim_params': net_optim_paramsC} 41 | 42 | config['networks'] = networks 43 | 44 | criterions = {} 45 | criterions['loss'] = {'ctype':'CrossEntropyLoss', 'opt':None} 46 | config['criterions'] = criterions 47 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/config/imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN.py: -------------------------------------------------------------------------------- 1 | config = {} 2 | # set the parameters related to the training and testing set 3 | 4 | nKbase = 389 5 | nKnovel = 200 6 | nExemplars = 1 7 | 8 | data_train_opt = {} 9 | data_train_opt['nKnovel'] = nKnovel 10 | data_train_opt['nKbase'] = nKbase 11 | data_train_opt['nExemplars'] = nExemplars 12 | data_train_opt['nTestNovel'] = nKnovel 13 | data_train_opt['nTestBase'] = nKnovel 14 | data_train_opt['batch_size'] = 4 15 | data_train_opt['epoch_size'] = 4000 16 | data_train_opt['data_dir'] = './datasets/feature_datasets/imagenet_ResNet10CosineClassifier' 17 | 18 | config['data_train_opt'] = data_train_opt 19 | config['max_num_epochs'] = 15 20 | 21 | num_features = 512 22 | 23 | networks = {} 24 | networks['feature_extractor'] = { 25 | 'def_file': 'feature_extractors.dumb_feat', 'pretrained': None, 26 | 'opt': {'dropout': 0}, 'optim_params': None } 27 | 28 | net_optim_paramsC = { 29 | 'optim_type': 'sgd', 'lr': 0.1, 'momentum':0.9, 'weight_decay': 5e-4, 30 | 'nesterov': True, 'LUT_lr':[(10, 0.01), (15, 0.001)]} 31 | pretrainedC = './experiments/imagenet_ResNet10CosineClassifier/classifier_net_epoch100' 32 | 33 | net_optionsC = { 34 | 'num_features': num_features, 35 | 'num_classes': 1000, 36 | 'global_pooling': False, 37 | 'scale_cls': 10.0, 38 | 'learn_scale': True, 39 | 'dae_config': { 40 | 'gaussian_noise': 0.08, 41 | 'comp_reconstruction_loss': True, 42 | 'targets_as_input': False, 43 | 'dae_type': 'RelationNetBasedGNN', 44 | 'num_layers': 2, 45 | 'num_features_input': num_features, 46 | 'num_features_output': 2 * num_features, 47 | 'num_features_hidden': 3 * num_features, 48 | 'update_dropout': 0.7, 49 | 50 | 'nun_features_msg': 3 * num_features, 51 | 'aggregation_dropout': 0.7, 52 | 'topK_neighbors': 10, 53 | 'temperature': 5.0, 54 | 'learn_temperature': False, 55 | }, 56 | } 57 | networks['classifier'] = { 58 | 'def_file': 'classifiers.cosine_classifier_with_DAE_weight_generator', 59 | 'pretrained': pretrainedC, 'opt': net_optionsC, 60 | 'optim_params': net_optim_paramsC} 61 | config['networks'] = networks 62 | 63 | config['criterions'] = {} 64 | 65 | config['reconstruction_loss_coef'] = 1.0 66 | config['classification_loss_coef'] = 1.0 67 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/data/mini_imagenet_split/train_val_test_classnames.csv: -------------------------------------------------------------------------------- 1 | n01532829 2 | n01558993 3 | n01704323 4 | n01749939 5 | n01770081 6 | n01843383 7 | n01910747 8 | n02074367 9 | n02089867 10 | n02091831 11 | n02101006 12 | n02105505 13 | n02108089 14 | n02108551 15 | n02108915 16 | n02111277 17 | n02113712 18 | n02120079 19 | n02165456 20 | n02457408 21 | n02606052 22 | n02687172 23 | n02747177 24 | n02795169 25 | n02823428 26 | n02966193 27 | n03017168 28 | n03047690 29 | n03062245 30 | n03207743 31 | n03220513 32 | n03337140 33 | n03347037 34 | n03400231 35 | n03476684 36 | n03527444 37 | n03676483 38 | n03838899 39 | n03854065 40 | n03888605 41 | n03908618 42 | n03924679 43 | n03998194 44 | n04067472 45 | n04243546 46 | n04251144 47 | n04258138 48 | n04275548 49 | n04296562 50 | n04389033 51 | n04435653 52 | n04443257 53 | n04509417 54 | n04515003 55 | n04596742 56 | n04604644 57 | n04612504 58 | n06794110 59 | n07584110 60 | n07697537 61 | n07747607 62 | n09246464 63 | n13054560 64 | n13133613 65 | n03535780 66 | n03075370 67 | n02981792 68 | n03980874 69 | n03770439 70 | n02091244 71 | n02114548 72 | n02174001 73 | n03417042 74 | n02971356 75 | n03584254 76 | n02138441 77 | n03773504 78 | n02950826 79 | n01855672 80 | n09256479 81 | n02110341 82 | n01930112 83 | n02219486 84 | n02443484 85 | n01981276 86 | n02129165 87 | n04522168 88 | n02099601 89 | n03775546 90 | n02110063 91 | n02116738 92 | n03146219 93 | n02871525 94 | n03127925 95 | n03544143 96 | n03272010 97 | n07613480 98 | n04146614 99 | n04418357 100 | n04149813 101 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/__init__.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | 3 | 4 | project_root = pathlib.Path(__file__).resolve().parents[1] 5 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/algorithms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/baselines/wDAEGNN/low_shot_learning/algorithms/__init__.py -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/algorithms/classification/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/baselines/wDAEGNN/low_shot_learning/algorithms/classification/__init__.py -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/algorithms/classification/classification.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import torch 4 | 5 | import low_shot_learning.algorithms.algorithm as algorithm 6 | import low_shot_learning.algorithms.fewshot.utils as fs_utils 7 | import low_shot_learning.algorithms.classification.utils as utils 8 | 9 | 10 | class Classification(algorithm.Algorithm): 11 | def __init__(self, opt, _run=None, _log=None): 12 | super().__init__(opt, _run, _log) 13 | feature_name = opt['feature_name'] if ('feature_name' in opt) else None 14 | 15 | if feature_name: 16 | assert isinstance(feature_name, (list, tuple)) 17 | 18 | self.feature_name = feature_name 19 | 20 | def allocate_tensors(self): 21 | self.tensors = {} 22 | self.tensors['images'] = torch.FloatTensor() 23 | self.tensors['labels'] = torch.LongTensor() 24 | 25 | def set_tensors(self, batch): 26 | assert len(batch) == 2 27 | images, labels = batch 28 | self.tensors['images'].resize_(images.size()).copy_(images) 29 | self.tensors['labels'].resize_(labels.size()).copy_(labels) 30 | 31 | return 'classification' 32 | 33 | def train_step(self, batch): 34 | return self.process_batch_classification_task(batch, is_train=True) 35 | 36 | def evaluation_step(self, batch): 37 | return self.process_batch_classification_task(batch, is_train=False) 38 | 39 | def process_batch_classification_task(self, batch, is_train): 40 | self.set_tensors(batch) 41 | 42 | if is_train and (self.optimizers.get('feature_extractor') is None): 43 | self.networks['feature_extractor'].eval() 44 | 45 | record = utils.object_classification( 46 | feature_extractor=self.networks['feature_extractor'], 47 | feature_extractor_optimizer=self.optimizers.get('feature_extractor'), 48 | classifier=self.networks['classifier'], 49 | classifier_optimizer=self.optimizers.get('classifier'), 50 | images=self.tensors['images'], 51 | labels=self.tensors['labels'], 52 | is_train=is_train, 53 | base_ids=None, 54 | feature_name=self.feature_name) 55 | 56 | return record 57 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/algorithms/classification/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | import numpy as np 6 | 7 | import low_shot_learning.utils as utils 8 | 9 | 10 | def compute_top1_and_top5_accuracy(scores, labels): 11 | topk_scores, topk_labels = scores.topk(5, 1, True, True) 12 | label_ind = labels.cpu().numpy() 13 | topk_ind = topk_labels.cpu().numpy() 14 | top1_correct = topk_ind[:,0] == label_ind 15 | top5_correct = np.sum(topk_ind == label_ind.reshape((-1,1)), axis=1) 16 | return top1_correct.astype(float).mean() * 100, top5_correct.astype(float).mean() * 100 17 | 18 | 19 | def extract_features(feature_extractor, images, feature_name=None): 20 | if feature_name: 21 | if isinstance(feature_name, str): 22 | feature_name = [feature_name,] 23 | assert isinstance(feature_name, (list, tuple)) 24 | 25 | features = feature_extractor(images, out_feat_keys=feature_name) 26 | else: 27 | features = feature_extractor(images) 28 | 29 | return features 30 | 31 | 32 | def classification_task(classifier, features, labels, base_ids=None): 33 | if base_ids is not None: 34 | assert(base_ids.dim() == 2) 35 | batch_size = features.size(0) 36 | meta_batch_size = base_ids.size(0) 37 | features = utils.add_dimension(features, dim_size=meta_batch_size) 38 | scores = classifier(features_test=features, base_ids=base_ids) 39 | scores = scores.view(batch_size, -1) 40 | else: 41 | scores = classifier(features) 42 | 43 | loss = F.cross_entropy(scores, labels) 44 | 45 | return scores, loss 46 | 47 | 48 | def object_classification( 49 | feature_extractor, 50 | feature_extractor_optimizer, 51 | classifier, 52 | classifier_optimizer, 53 | images, 54 | labels, 55 | is_train, 56 | base_ids=None, 57 | feature_name=None): 58 | 59 | if isinstance(feature_name, (list, tuple)) and len(feature_name) > 1: 60 | assert base_ids is None 61 | return object_classification_multiple_features( 62 | feature_extractor=feature_extractor, 63 | feature_extractor_optimizer=feature_extractor_optimizer, 64 | classifier=classifier, 65 | classifier_optimizer=classifier_optimizer, 66 | images=images, 67 | labels=labels, 68 | is_train=is_train, 69 | feature_name=feature_name) 70 | 71 | assert images.dim() == 4 72 | assert labels.dim() == 1 73 | assert images.size(0) == labels.size(0) 74 | 75 | if is_train: # Zero gradients. 76 | if feature_extractor_optimizer: 77 | feature_extractor_optimizer.zero_grad() 78 | classifier_optimizer.zero_grad() 79 | 80 | record = {} 81 | train_feature_extractor = ( 82 | is_train and (feature_extractor_optimizer is not None)) 83 | with torch.set_grad_enabled(train_feature_extractor): 84 | # Extract features from the images. 85 | features = extract_features( 86 | feature_extractor, images, feature_name=feature_name) 87 | 88 | if not train_feature_extractor: 89 | # Make sure that no gradients are backproagated to the feature 90 | # extractor when the feature extraction model is freezed. 91 | features = features.detach() 92 | 93 | with torch.set_grad_enabled(is_train): 94 | # Perform the object classification task. 95 | scores_classification, loss_classsification = classification_task( 96 | classifier, features, labels, base_ids) 97 | loss_total = loss_classsification 98 | record['loss'] = loss_total.item() 99 | 100 | with torch.no_grad(): # Compute accuracies. 101 | AccuracyTop1, AccuracyTop5 = compute_top1_and_top5_accuracy( 102 | scores_classification, labels) 103 | record['AccuracyTop1'] = AccuracyTop1 104 | record['AccuracyTop5'] = AccuracyTop5 105 | #record['Accuracy'] = utils.top1accuracy(scores_classification, labels) 106 | 107 | if is_train: # Backward loss and apply gradient steps. 108 | loss_total.backward() 109 | if feature_extractor_optimizer: 110 | feature_extractor_optimizer.step() 111 | classifier_optimizer.step() 112 | 113 | return record 114 | 115 | 116 | def object_classification_multiple_features( 117 | feature_extractor, 118 | feature_extractor_optimizer, 119 | classifier, 120 | classifier_optimizer, 121 | images, 122 | labels, 123 | is_train, 124 | feature_name): 125 | 126 | assert isinstance(feature_name, (list, tuple)) and len(feature_name) > 1 127 | assert images.dim() == 4 128 | assert labels.dim() == 1 129 | assert images.size(0) == labels.size(0) 130 | 131 | if is_train: # Zero gradients. 132 | if feature_extractor_optimizer: 133 | feature_extractor_optimizer.zero_grad() 134 | classifier_optimizer.zero_grad() 135 | 136 | record = {} 137 | train_feature_extractor = ( 138 | is_train and (feature_extractor_optimizer is not None)) 139 | with torch.set_grad_enabled(train_feature_extractor): 140 | # Extract features from the images. 141 | features = extract_features( 142 | feature_extractor, images, feature_name=feature_name) 143 | assert len(features) == len(feature_name) 144 | 145 | if not train_feature_extractor: 146 | # Make sure that no gradients are backproagated to the feature 147 | # extractor when the feature extraction model is freezed. 148 | for i in range(len(features)): 149 | features[i] = features[i].detach() 150 | 151 | with torch.set_grad_enabled(is_train): 152 | # Perform the object classification task. 153 | scores = classifier(features) 154 | assert len(scores) == len(feature_name) 155 | 156 | losses = [] 157 | for i in range(len(scores)): 158 | losses.append(F.cross_entropy(scores[i], labels)) 159 | record['loss_' + feature_name[i]] = losses[i].item() 160 | 161 | with torch.no_grad(): # Compute accuracies. 162 | AccuracyTop1, AccuracyTop5 = compute_top1_and_top5_accuracy( 163 | scores[i], labels) 164 | record['AccuracyTop1_' + feature_name[i]] = AccuracyTop1 165 | record['AccuracyTop5_' + feature_name[i]] = AccuracyTop5 166 | 167 | loss_total = torch.stack(losses).sum() 168 | 169 | if is_train: # Backward loss and apply gradient steps. 170 | loss_total.backward() 171 | if feature_extractor_optimizer: 172 | feature_extractor_optimizer.step() 173 | classifier_optimizer.step() 174 | 175 | return record 176 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/algorithms/fewshot/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/baselines/wDAEGNN/low_shot_learning/algorithms/fewshot/__init__.py -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/algorithms/fewshot/fewshot.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import torch 4 | 5 | import low_shot_learning.algorithms.algorithm as algorithm 6 | import low_shot_learning.algorithms.classification.utils as cls_utils 7 | import low_shot_learning.algorithms.fewshot.utils as fs_utils 8 | import low_shot_learning.utils as utils 9 | 10 | 11 | class FewShot(algorithm.Algorithm): 12 | def __init__(self, opt): 13 | super().__init__(opt) 14 | self.keep_best_model_metric_name = 'AccuracyNovel' 15 | self.nKbase = torch.LongTensor() 16 | 17 | self.activate_dropout = ( 18 | opt['activate_dropout'] if ('activate_dropout' in opt) else False) 19 | 20 | self.all_base_cats = ( 21 | opt['all_base_cats'] if ('all_base_cats' in opt) else False) 22 | 23 | self.classification_loss_coef = ( 24 | opt['classification_loss_coef'] 25 | if ('classification_loss_coef' in opt) else 1.0) 26 | 27 | self.reconstruction_loss_coef = ( 28 | opt['reconstruction_loss_coef'] 29 | if ('reconstruction_loss_coef' in opt) else 0.0) 30 | 31 | self.only_novel = opt['only_novel'] if ('only_novel' in opt) else False 32 | 33 | self.accuracies = {} 34 | 35 | def allocate_tensors(self): 36 | self.tensors = {} 37 | self.tensors['images_train'] = torch.FloatTensor() 38 | self.tensors['labels_train'] = torch.LongTensor() 39 | self.tensors['labels_train_1hot'] = torch.FloatTensor() 40 | self.tensors['images_test'] = torch.FloatTensor() 41 | self.tensors['labels_test'] = torch.LongTensor() 42 | self.tensors['Kids'] = torch.LongTensor() 43 | 44 | def set_tensors(self, batch): 45 | self.nKbase = self.dloader.nKbase 46 | self.nKnovel = self.dloader.nKnovel 47 | 48 | if self.nKnovel > 0: 49 | train_test_stage = 'fewshot' 50 | assert(len(batch) == 6) 51 | images_train, labels_train, images_test, labels_test, K, nKbase = batch 52 | self.nKbase = nKbase[0].item() 53 | self.tensors['images_train'].resize_( 54 | images_train.size()).copy_(images_train) 55 | self.tensors['labels_train'].resize_( 56 | labels_train.size()).copy_(labels_train) 57 | labels_train = self.tensors['labels_train'] 58 | 59 | nKnovel = 1 + labels_train.max().item() - self.nKbase 60 | 61 | labels_train_1hot_size = list(labels_train.size()) + [nKnovel,] 62 | labels_train_unsqueeze = labels_train.unsqueeze(dim=labels_train.dim()) 63 | self.tensors['labels_train_1hot'].resize_(labels_train_1hot_size).fill_(0).scatter_( 64 | len(labels_train_1hot_size) - 1, labels_train_unsqueeze - self.nKbase, 1) 65 | self.tensors['images_test'].resize_(images_test.size()).copy_(images_test) 66 | self.tensors['labels_test'].resize_(labels_test.size()).copy_(labels_test) 67 | self.tensors['Kids'].resize_(K.size()).copy_(K) 68 | else: 69 | train_test_stage = 'base_classification' 70 | assert(len(batch) == 4) 71 | images_test, labels_test, K, nKbase = batch 72 | self.nKbase = nKbase.squeeze().item() 73 | self.tensors['images_test'].resize_(images_test.size()).copy_(images_test) 74 | self.tensors['labels_test'].resize_(labels_test.size()).copy_(labels_test) 75 | self.tensors['Kids'].resize_(K.size()).copy_(K) 76 | 77 | return train_test_stage 78 | 79 | def train_step(self, batch): 80 | return self.process_batch(batch, is_train=True) 81 | 82 | def evaluation_step(self, batch): 83 | return self.process_batch(batch, is_train=False) 84 | 85 | def process_batch(self, batch, is_train): 86 | process_type = self.set_tensors(batch) 87 | if process_type=='fewshot': 88 | return self.process_batch_fewshot_classification_task(is_train) 89 | elif process_type=='base_classification': 90 | return self.process_batch_base_class_classification_task(is_train) 91 | else: 92 | raise ValueError('Unexpected process type {0}'.format(process_type)) 93 | 94 | def process_batch_base_class_classification_task(self, is_train): 95 | images = self.tensors['images_test'] 96 | labels = self.tensors['labels_test'] 97 | Kids = self.tensors['Kids'] 98 | base_ids = None if (self.nKbase==0) else Kids[:,:self.nKbase].contiguous() 99 | 100 | assert(images.dim() == 5 and labels.dim() == 2) 101 | images = utils.convert_from_5d_to_4d(images) 102 | labels = labels.view(-1) 103 | 104 | if self.optimizers.get('feature_extractor') is None: 105 | self.networks['feature_extractor'].eval() 106 | if is_train and self.activate_dropout: 107 | utils.activate_dropout_units(feature_extractor) 108 | 109 | record = cls_utils.object_classification( 110 | feature_extractor=self.networks['feature_extractor'], 111 | feature_extractor_optimizer=self.optimizers['feature_extractor'], 112 | classifier=self.networks['classifier'], 113 | classifier_optimizer=self.optimizers['classifier'], 114 | images=images, 115 | labels=labels, 116 | is_train=is_train, 117 | base_ids=base_ids) 118 | 119 | return record 120 | 121 | def process_batch_fewshot_classification_task(self, is_train): 122 | if self.only_novel: 123 | raise ValueError('Not implemented yet.') 124 | 125 | Kids = self.tensors['Kids'] 126 | nKbase = self.nKbase 127 | if is_train and self.all_base_cats: 128 | assert(nKbase==0) 129 | base_ids = Kids 130 | else: 131 | base_ids = None if (self.nKbase==0) else Kids[:,:nKbase].contiguous() 132 | 133 | self.networks['classifier']._novel_ids = Kids[:,nKbase:].contiguous() 134 | if self.optimizers.get('feature_extractor') is None: 135 | self.networks['feature_extractor'].eval() 136 | if is_train and self.activate_dropout: 137 | utils.activate_dropout_units(self.networks['feature_extractor']) 138 | 139 | record = fs_utils.fewshot_classification( 140 | feature_extractor=self.networks['feature_extractor'], 141 | feature_extractor_optimizer=self.optimizers.get('feature_extractor'), 142 | classifier=self.networks['classifier'], 143 | classifier_optimizer=self.optimizers['classifier'], 144 | images_train=self.tensors['images_train'], 145 | labels_train=self.tensors['labels_train'], 146 | labels_train_1hot=self.tensors['labels_train_1hot'], 147 | images_test=self.tensors['images_test'], 148 | labels_test=self.tensors['labels_test'], 149 | is_train=is_train, 150 | base_ids=base_ids, 151 | classification_coef=self.classification_loss_coef, 152 | reconstruction_coef=self.reconstruction_loss_coef) 153 | 154 | if not is_train: 155 | metrics = ['AccuracyNovel',] 156 | if 'AccuracyBoth' in record: 157 | metrics.append('AccuracyBoth') 158 | record, self.accuracies = fs_utils.compute_95confidence_intervals( 159 | record, episode=self.biter, num_episodes=self.bnumber, 160 | store_accuracies=self.accuracies, metrics=metrics) 161 | 162 | return record 163 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/algorithms/fewshot/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn.functional as F 6 | 7 | import low_shot_learning.algorithms.classification.utils as cls_utils 8 | import low_shot_learning.utils as utils 9 | 10 | 11 | def few_shot_feature_classification( 12 | classifier, features_test, features_train, labels_train_1hot, labels_test, 13 | base_ids=None): 14 | if base_ids is not None: 15 | classification_scores = classifier( 16 | features_test=features_test, 17 | features_train=features_train, 18 | labels_train=labels_train_1hot, 19 | base_ids=base_ids) 20 | else: 21 | classification_scores = classifier( 22 | features_test=features_test, 23 | features_train=features_train, 24 | labels_train=labels_train_1hot) 25 | 26 | assert(classification_scores.dim() == 3) 27 | 28 | classification_scores = classification_scores.view( 29 | classification_scores.size(0) * classification_scores.size(1), -1) 30 | labels_test = labels_test.view(-1) 31 | assert(classification_scores.size(0) == labels_test.size(0)) 32 | 33 | loss = F.cross_entropy(classification_scores, labels_test) 34 | 35 | return classification_scores, loss 36 | 37 | 38 | def compute_accuracy_metrics(scores, labels, num_base, record={}, string_id=''): 39 | assert(isinstance(record, dict)) 40 | 41 | if string_id != '': 42 | string_id = '_' + string_id 43 | 44 | if labels.dim() > 1: 45 | labels = labels.view(scores.size(0)) 46 | 47 | if num_base > 0: 48 | record['AccuracyBoth' + string_id] = utils.top1accuracy(scores, labels) 49 | # scores = scores.cpu() 50 | # labels = labels.cpu() 51 | 52 | base_indices = torch.nonzero(labels < num_base).view(-1) 53 | novel_indices = torch.nonzero(labels >= num_base).view(-1) 54 | if base_indices.dim() != 0 and base_indices.size(0) > 0: 55 | scores_base = scores[base_indices][:, :num_base] 56 | labels_base = labels[base_indices] 57 | record['AccuracyBase' + string_id] = utils.top1accuracy( 58 | scores_base, labels_base) 59 | 60 | scores_novel = scores[novel_indices,:][:, num_base:] 61 | labels_novel = labels[novel_indices] - num_base 62 | record['AccuracyNovel' + string_id] = utils.top1accuracy( 63 | scores_novel, labels_novel) 64 | else: 65 | record['AccuracyNovel' + string_id] = utils.top1accuracy(scores, labels) 66 | 67 | return record 68 | 69 | 70 | def fewshot_classification( 71 | feature_extractor, 72 | feature_extractor_optimizer, 73 | classifier, 74 | classifier_optimizer, 75 | images_train, 76 | labels_train, 77 | labels_train_1hot, 78 | images_test, 79 | labels_test, 80 | is_train, 81 | base_ids=None, 82 | feature_name=None, 83 | classification_coef=1.0, 84 | reconstruction_coef=0.0): 85 | 86 | assert(images_train.dim() == 5) 87 | assert(images_test.dim() == 5) 88 | assert(images_train.size(0) == images_test.size(0)) 89 | assert(images_train.size(2) == images_test.size(2)) 90 | assert(images_train.size(3) == images_test.size(3)) 91 | assert(images_train.size(4) == images_test.size(4)) 92 | assert(labels_train.dim() == 2) 93 | assert(labels_test.dim() == 2) 94 | assert(labels_train.size(0) == labels_test.size(0)) 95 | assert(labels_train.size(0) == images_train.size(0)) 96 | 97 | if (feature_name and 98 | isinstance(feature_name, (list, tuple)) and 99 | len(feature_name) > 1): 100 | assert is_train is False 101 | assert reconstruction_coef == 0.0 102 | assert classification_coef == 1.0 103 | return fewshot_classification_multiple_features( 104 | feature_extractor=feature_extractor, 105 | feature_extractor_optimizer=feature_extractor_optimizer, 106 | classifier=classifier, 107 | classifier_optimizer=classifier_optimizer, 108 | images_train=images_train, 109 | labels_train=labels_train, 110 | labels_train_1hot=labels_train_1hot, 111 | images_test=images_test, 112 | labels_test=labels_test, 113 | is_train=is_train, 114 | base_ids=base_ids, 115 | feature_name=feature_name) 116 | 117 | meta_batch_size = images_train.size(0) 118 | 119 | if is_train: # zero the gradients 120 | if feature_extractor_optimizer: 121 | feature_extractor_optimizer.zero_grad() 122 | classifier_optimizer.zero_grad() 123 | 124 | record = {} 125 | with torch.no_grad(): 126 | images_train = utils.convert_from_5d_to_4d(images_train) 127 | images_test = utils.convert_from_5d_to_4d(images_test) 128 | labels_test = labels_test.view(-1) 129 | batch_size_train = images_train.size(0) 130 | # batch_size_test = images_test.size(0) 131 | images = torch.cat([images_train, images_test], dim=0) 132 | 133 | train_feature_extractor = ( 134 | is_train and (feature_extractor_optimizer is not None)) 135 | with torch.set_grad_enabled(train_feature_extractor): 136 | # Extract features from the train and test images. 137 | features = cls_utils.extract_features( 138 | feature_extractor, images, feature_name=feature_name) 139 | 140 | if not train_feature_extractor: 141 | # Make sure that no gradients are backproagated to the feature 142 | # extractor when the feature extraction model is freezed. 143 | features = features.detach() 144 | 145 | with torch.set_grad_enabled(is_train): 146 | features_train = features[:batch_size_train] 147 | features_test = features[batch_size_train:] 148 | features_train = utils.add_dimension(features_train, meta_batch_size) 149 | features_test = utils.add_dimension(features_test, meta_batch_size) 150 | 151 | classification_scores, loss = few_shot_feature_classification( 152 | classifier, features_test, features_train, labels_train_1hot, 153 | labels_test, base_ids) 154 | record['loss'] = loss.item() 155 | loss_total = loss * classification_coef 156 | 157 | if is_train and (reconstruction_coef > 0.0): 158 | rec_loss = classifier.reconstruction_loss 159 | assert(rec_loss is not None) 160 | loss_total = loss_total + reconstruction_coef * rec_loss 161 | record['rec_loss'] = rec_loss.item() 162 | record['tot_loss'] = loss_total.item() 163 | #******************************************************************* 164 | 165 | with torch.no_grad(): 166 | num_base = base_ids.size(1) if (base_ids is not None) else 0 167 | record = compute_accuracy_metrics( 168 | classification_scores, labels_test, num_base, record) 169 | 170 | if is_train: 171 | loss_total.backward() 172 | if feature_extractor_optimizer: 173 | feature_extractor_optimizer.step() 174 | classifier_optimizer.step() 175 | 176 | return record 177 | 178 | 179 | def fewshot_classification_multiple_features( 180 | feature_extractor, 181 | feature_extractor_optimizer, 182 | classifier, 183 | classifier_optimizer, 184 | images_train, 185 | labels_train, 186 | labels_train_1hot, 187 | images_test, 188 | labels_test, 189 | is_train, 190 | feature_name, 191 | base_ids=None): 192 | 193 | assert is_train is False 194 | assert feature_name and isinstance(feature_name, (list, tuple)) 195 | 196 | meta_batch_size = images_train.size(0) 197 | num_base = base_ids.size(1) if (base_ids is not None) else 0 198 | 199 | record = {} 200 | with torch.no_grad(): 201 | images_train = utils.convert_from_5d_to_4d(images_train) 202 | images_test = utils.convert_from_5d_to_4d(images_test) 203 | labels_test = labels_test.view(-1) 204 | batch_size_train = images_train.size(0) 205 | images = torch.cat([images_train, images_test], dim=0) 206 | 207 | with torch.set_grad_enabled(is_train): 208 | # Extract features from the train and test images. 209 | features = cls_utils.extract_features( 210 | feature_extractor, images, feature_name=feature_name) 211 | assert len(features) == len(feature_name) 212 | 213 | for i, feature_name_i in enumerate(feature_name): 214 | features_train = features[i][:batch_size_train] 215 | features_test = features[i][batch_size_train:] 216 | features_train = utils.add_dimension( 217 | features_train, meta_batch_size) 218 | features_test = utils.add_dimension( 219 | features_test, meta_batch_size) 220 | 221 | if isinstance(classifier, (list, tuple)): 222 | assert len(classifier) == len(feature_name) 223 | classifier_this = classifier[i] 224 | else: 225 | classifier_this = classifier 226 | 227 | classification_scores, loss = few_shot_feature_classification( 228 | classifier_this, features_test, features_train, 229 | labels_train_1hot, labels_test, base_ids) 230 | record['loss_'+feature_name_i] = loss.item() 231 | 232 | with torch.no_grad(): 233 | record = compute_accuracy_metrics( 234 | classification_scores, labels_test, num_base, record, 235 | string_id=feature_name_i) 236 | 237 | return record 238 | 239 | 240 | def compute_95confidence_intervals( 241 | record, 242 | episode, 243 | num_episodes, 244 | store_accuracies, 245 | metrics=['AccuracyNovel',]): 246 | 247 | if episode==0: 248 | store_accuracies = {metric: [] for metric in metrics} 249 | 250 | for metric in metrics: 251 | store_accuracies[metric].append(record[metric]) 252 | if episode == (num_episodes - 1): 253 | # Compute std and confidence interval of the 'metric' accuracies. 254 | accuracies = np.array(store_accuracies[metric]) 255 | stds = np.std(accuracies, 0) 256 | record[metric + '_std'] = stds 257 | record[metric + '_cnf'] = 1.96*stds/np.sqrt(num_episodes) 258 | 259 | return record, store_accuracies 260 | 261 | 262 | def compute_weight_orthogonality_loss(cls_weights): 263 | 264 | nKall = cls_weights.size(1) 265 | device = 'cuda' if cls_weights.is_cuda else 'cpu' 266 | orthogonality_loss = torch.add( 267 | torch.bmm(cls_weights, cls_weights.transpose(1,2)), 268 | -torch.eye(nKall).to(device).view(1, nKall, nKall)).abs().mean() 269 | 270 | return orthogonality_loss 271 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/algorithms/utils/save_features.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os 4 | 5 | import h5py 6 | import numpy as np 7 | import torch 8 | from tqdm import tqdm 9 | 10 | import low_shot_learning.utils as utils 11 | import low_shot_learning.architectures.tools as tools 12 | import low_shot_learning.algorithms.algorithm as algorithm 13 | import low_shot_learning.algorithms.classification.utils as cls_utils 14 | 15 | 16 | class SaveFeatures(algorithm.Algorithm): 17 | def __init__(self, opt): 18 | if 'classifier' in opt['networks']: 19 | del opt['networks']['classifier'] 20 | 21 | super().__init__(opt) 22 | 23 | def allocate_tensors(self): 24 | self.tensors = {} 25 | self.tensors['images'] = torch.FloatTensor() 26 | self.tensors['labels'] = torch.LongTensor() 27 | 28 | def set_tensors(self, batch): 29 | assert len(batch) == 2 30 | images, labels = batch 31 | self.tensors['images'].resize_(images.size()).copy_(images) 32 | self.tensors['labels'].resize_(labels.size()).copy_(labels) 33 | 34 | def save_features( 35 | self, 36 | dataloader, 37 | filename, 38 | feature_name=None, 39 | global_pooling=True): 40 | """Saves features and labels for each image in the dataloader. 41 | 42 | This routines uses the trained feature model (i.e., 43 | self.networks['feature_extractor']) in order to extract a feature for each 44 | image in the dataloader. The extracted features along with the labels 45 | of the images that they come from are saved in a h5py file. 46 | 47 | Args: 48 | dataloader: A dataloader that feeds images and labels. 49 | filename: The file name where the features and the labels of each 50 | images in the dataloader are saved. 51 | """ 52 | 53 | if isinstance(feature_name, (list, tuple)): 54 | assert len(feature_name) == 1 55 | 56 | feature_extractor = self.networks['feature_extractor'] 57 | feature_extractor.eval() 58 | 59 | self.dloader = dataloader 60 | dataloader_iterator = dataloader.get_iterator() 61 | 62 | self.logger.info( 63 | 'Destination filename for features: {0}'.format(filename)) 64 | 65 | data_file = h5py.File(filename, 'w') 66 | max_count = len(dataloader_iterator) * dataloader_iterator.batch_size 67 | all_labels = data_file.create_dataset( 68 | 'all_labels', (max_count,), dtype='i') 69 | all_features = None 70 | 71 | count = 0 72 | for i, batch in enumerate(tqdm(dataloader_iterator)): 73 | with torch.no_grad(): 74 | self.set_tensors(batch) 75 | images = self.tensors['images'].detach() 76 | labels = self.tensors['labels'].detach() 77 | assert images.dim()==4 78 | assert labels.dim()==1 79 | 80 | features = cls_utils.extract_features( 81 | feature_extractor, images, feature_name=feature_name) 82 | 83 | if global_pooling and features.dim() == 4: 84 | features = tools.global_pooling(features, pool_type='avg') 85 | features = features.view(features.size(0), -1) 86 | assert(features.dim()==2) 87 | 88 | if all_features is None: 89 | self.logger.info('Image size: {0}'.format(images.size())) 90 | self.logger.info('Feature size: {0}'.format(features.size())) 91 | self.logger.info('Max_count: {0}'.format(max_count)) 92 | all_features = data_file.create_dataset( 93 | 'all_features', (max_count, features.size(1)), dtype='f') 94 | self.logger.info('Number of feature channels: {0}'.format( 95 | features.size(1))) 96 | 97 | all_features[count:(count + features.size(0)), :] = ( 98 | features.cpu().numpy()) 99 | all_labels[count:(count + features.size(0))] = labels.cpu().numpy() 100 | count = count + features.size(0) 101 | 102 | self.logger.info('Number of processed primages: {0}'.format(count)) 103 | 104 | count_var = data_file.create_dataset('count', (1,), dtype='i') 105 | count_var[0] = count 106 | data_file.close() 107 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/architectures/__init__.py: -------------------------------------------------------------------------------- 1 | from importlib import import_module 2 | 3 | 4 | 5 | def factory(architecture_name, *args, **kwargs): 6 | architecture_module = import_module( 7 | '.architectures.' + architecture_name, package='low_shot_learning') 8 | create_model = getattr(architecture_module, 'create_model') 9 | return create_model(*args, **kwargs) 10 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/architectures/classifiers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/baselines/wDAEGNN/low_shot_learning/architectures/classifiers/__init__.py -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/architectures/classifiers/cosine_classifier_with_DAE_weight_generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | import low_shot_learning.architectures.classifiers.utils as cutils 5 | from low_shot_learning.architectures.classifiers.cosine_classifier_with_weight_generator import \ 6 | CosineClassifierWithWeightGeneration 7 | from low_shot_learning.architectures.classifiers.weights_denoising_autoencoder import WeightsDAE 8 | 9 | 10 | def reconstruction_loss(outputs, targets): 11 | # Both outputs and targets have shape: 12 | # [batch_size x num_nodes x num_features] 13 | assert outputs.dim() == 3 14 | assert targets.dim() == 3 15 | assert outputs.size() == targets.size() 16 | 17 | # Since we use cosine classifier the weights must be L_2 normalized. 18 | targets = F.normalize(targets, p=2, dim=targets.dim()-1, eps=1e-12) 19 | outputs = F.normalize(outputs, p=2, dim=outputs.dim()-1, eps=1e-12) 20 | # return the L2 squared loss (averaged over the first 2 dimensions, i.e., 21 | # batch_size and num_nodes). 22 | return (targets - outputs).pow(2).mean() * outputs.size(2) 23 | 24 | 25 | class CosineClassifierWithDAEWeightGeneration(CosineClassifierWithWeightGeneration): 26 | def __init__( 27 | self, 28 | dae_config, 29 | num_features, 30 | num_classes, 31 | global_pooling, 32 | scale_cls=10.0, 33 | learn_scale=True): 34 | 35 | super(CosineClassifierWithDAEWeightGeneration, self).__init__( 36 | num_features, num_classes, global_pooling, scale_cls, learn_scale) 37 | 38 | self.targets_as_input = ( 39 | dae_config['targets_as_input'] 40 | if ('targets_as_input' in dae_config) else False) 41 | 42 | self.comp_reconstruction_loss = ( 43 | dae_config['comp_reconstruction_loss'] 44 | if ('comp_reconstruction_loss' in dae_config) else True) 45 | 46 | self.weights_dae_generator = WeightsDAE(dae_config) 47 | 48 | def get_classification_weights( 49 | self, base_ids, features_train=None, labels_train=None): 50 | """Gets the classification weights of the base and novel categories. 51 | 52 | This routine returns the classification weight of the base and novel 53 | classes. The latter are returned only if the input arguments 54 | features_train and labels_train are not None. 55 | 56 | Args: 57 | base_ids: A 2D tensor with shape [meta_batch_size x num_base] that 58 | for each training episode in the the batch it includes the 59 | indices of the base categories that are being used. 60 | `meta_batch_size` is the number of training episodes in the 61 | batch and `num_base` is the number of base classes. 62 | features_train: A 3D tensor with shape 63 | [meta_batch_size x num_train_examples x num_channels] that 64 | represents the `num_channels`-dimensional feature vectors of the 65 | training examples of each training episode in the batch. 66 | `num_train_examples` is the number of train examples in each 67 | training episode. Those training examples are from the novel 68 | classes. 69 | labels_train: A 3D tensor with shape 70 | [meta_batch_size x num_train_examples x num_novel] that 71 | represents the labels (encoded as 1-hot vectors of lenght 72 | num_novel) of the training examples of each training episode in 73 | the batch. `num_novel` is the number of novel classes. 74 | 75 | Returns: 76 | classification_weights: A 3D tensor of shape 77 | [meta_batch_size x num_classes x num_channels] 78 | that includes the `num_channels`-dimensional classification 79 | weight vectors of the classes involved on each training episode 80 | in the batch. If the training data for the novel classes are not 81 | provided (i.e., features_train or labels_train are None) then 82 | classification_weights includes only the classification 83 | weights of the base classes; in this case num_channels is equal 84 | to `num_base`. Otherwise, classification_weights includes the 85 | classification weight vectors of both base and novel classses; 86 | in this case `num_classes` is equal to `num_base` + `num_novel`. 87 | """ 88 | 89 | #*********************************************************************** 90 | #******** Get the classification weights for the base categories ******* 91 | meta_batch_size, num_base = base_ids.size() 92 | weight_base = self.weight_base[base_ids.view(-1)] 93 | weight_base = weight_base.view(meta_batch_size, num_base, -1) 94 | self.num_base = num_base 95 | 96 | #*********************************************************************** 97 | if features_train is None or labels_train is None: 98 | # If training data for the novel categories are not provided then 99 | # return only the classification weights of the base categories. 100 | return weight_base 101 | 102 | num_novel = labels_train.size(2) 103 | 104 | if features_train.dim() == 5: 105 | features_train = cutils.preprocess_5D_features( 106 | features_train, self.global_pooling) 107 | assert features_train.dim() == 3 108 | assert features_train.size(2) == self.num_features 109 | features_train = F.normalize(features_train, p=2, dim=2, eps=1e-12) 110 | 111 | #*********************************************************************** 112 | #******* Generate classification weights for base & novel classes ****** 113 | weight_base = weight_base.detach() 114 | 115 | if ((self.targets_as_input or self.comp_reconstruction_loss) and 116 | self.training): 117 | novel_ids = self._novel_ids 118 | assert novel_ids.size(1) == num_novel 119 | weight_novel_target = self.weight_base[novel_ids.view(-1)].detach() 120 | weight_novel_target = weight_novel_target.view( 121 | meta_batch_size, num_novel, self.num_features) 122 | 123 | if self.targets_as_input and self.training: 124 | weight_novel = weight_novel_target 125 | else: 126 | # Estimate the initial classification weights for the novel classes 127 | # by computing the average of the feature vectors of their training 128 | # examples. 129 | weight_novel = cutils.average_train_features( 130 | features_train, labels_train) 131 | 132 | input_weights = torch.cat([weight_base, weight_novel], dim=1) 133 | # Since we use cosine classifier the weights must be L_2 normalized. 134 | # input_weights = F.normalize(input_weights, p=2, dim=2, eps=1e-12) 135 | 136 | output_weights = self.weights_dae_generator(input_weights) 137 | #*********************************************************************** 138 | 139 | if self.training and self.comp_reconstruction_loss: 140 | targets_weights = torch.cat([weight_base, weight_novel_target], 1) 141 | self.reconstruction_loss = reconstruction_loss( 142 | output_weights, targets_weights) 143 | else: 144 | self.reconstruction_loss = None 145 | 146 | return output_weights 147 | 148 | 149 | def create_model(opt): 150 | return CosineClassifierWithDAEWeightGeneration( 151 | dae_config=opt['dae_config'], 152 | num_features=opt['num_features'], 153 | num_classes=opt['num_classes'], 154 | global_pooling=opt['global_pooling'], 155 | scale_cls=opt['scale_cls'], 156 | learn_scale=(opt['learn_scale'] if ('learn_scale' in opt) else True)) 157 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/architectures/classifiers/few_shot_classification_with_prototypes.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | import low_shot_learning.architectures.classifiers.utils as cutils 6 | import low_shot_learning.architectures.tools as tools 7 | 8 | 9 | class FewShotClassifierWithPrototypes(nn.Module): 10 | def __init__(self, global_pooling, scale_cls=10.0, learn_scale=True): 11 | super(FewShotClassifierWithPrototypes, self).__init__() 12 | 13 | self.global_pooling = global_pooling 14 | self.scale_cls = nn.Parameter( 15 | torch.FloatTensor(1).fill_(scale_cls), requires_grad=learn_scale) 16 | 17 | def forward(self, features_test, features_train, labels_train): 18 | 19 | #******* Generate classification weights for the novel categories ****** 20 | if features_train.dim() == 5: 21 | features_train = cutils.preprocess_5D_features( 22 | features_train, self.global_pooling) 23 | assert(features_train.dim() == 3) 24 | 25 | meta_batch_size = features_train.size(0) 26 | num_novel = labels_train.size(2) 27 | features_train = F.normalize(features_train, p=2, dim=2, eps=1e-12) 28 | classification_weights = cutils.average_train_features( 29 | features_train, labels_train) 30 | classification_weights = classification_weights.view( 31 | meta_batch_size, num_novel, -1) 32 | #*********************************************************************** 33 | 34 | if features_test.dim() == 5: 35 | features_test = cutils.preprocess_5D_features( 36 | features_test, self.global_pooling) 37 | assert(features_test.dim() == 3) 38 | 39 | classification_scores = tools.batch_cosine_fully_connected_layer( 40 | features_test, classification_weights.transpose(1,2), 41 | scale=self.scale_cls) 42 | 43 | return classification_scores 44 | 45 | 46 | def create_model(opt): 47 | return FewShotClassifierWithPrototypes( 48 | global_pooling=opt['global_pooling'], 49 | scale_cls=opt['scale_cls'], 50 | learn_scale=opt['learn_scale']) 51 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/architectures/classifiers/matching_network_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class MatchingNetworkHead(nn.Module): 7 | def __init__(self, opt): 8 | super(MatchingNetworkHead, self).__init__() 9 | scale_cls = opt['scale_cls'] if ('scale_cls' in opt) else 10.0 10 | self.scale_cls = nn.Parameter( 11 | torch.FloatTensor(1).fill_(scale_cls), requires_grad=True) 12 | 13 | def forward(self, features_test, features_train, labels_train): 14 | """Recognize novel categories based on the Matching Nets approach. 15 | 16 | Classify the test examples (i.e., `features_test`) using the available 17 | training examples (i.e., `features_test` and `labels_train`) using the 18 | Matching Nets approach. 19 | 20 | Args: 21 | features_test: A 3D tensor with shape 22 | [batch_size x num_test_examples x num_channels] that represents 23 | the test features of each training episode in the batch. 24 | features_train: A 3D tensor with shape 25 | [batch_size x num_train_examples x num_channels] that represents 26 | the train features of each training episode in the batch. 27 | labels_train: A 3D tensor with shape 28 | [batch_size x num_train_examples x nKnovel] that represents 29 | the train labels (encoded as 1-hot vectors) of each training 30 | episode in the batch. 31 | 32 | Return: 33 | scores_cls: A 3D tensor with shape 34 | [batch_size x num_test_examples x nKnovel] that represents the 35 | classification scores of the test feature vectors for the 36 | nKnovel novel categories. 37 | """ 38 | assert features_train.dim() == 3 39 | assert labels_train.dim() == 3 40 | assert features_test.dim() == 3 41 | assert features_train.size(0) == labels_train.size(0) 42 | assert features_train.size(0) == features_test.size(0) 43 | assert features_train.size(1) == labels_train.size(1) 44 | assert features_train.size(2) == features_test.size(2) 45 | 46 | batch_size, num_test_examples, num_channels = features_test.size() 47 | num_train_examples = features_train.size(1) 48 | nKnovel = labels_train.size(2) 49 | 50 | # L2 normalize the feature vectors. 51 | features_test = F.normalize( 52 | features_test, p=2, dim=features_test.dim()-1, eps=1e-12) 53 | features_train = F.normalize( 54 | features_train, p=2, dim=features_train.dim()-1, eps=1e-12) 55 | 56 | # Compute the cosine similrity of the test features with the train 57 | # features. The shape of the cosine similarities tensor is: 58 | # [batch_size x num_test_examples x num_train_examples] 59 | cosine_similarities = self.scale_cls * torch.bmm( 60 | features_test, features_train.transpose(1,2)) 61 | # Apply the softmax operator over the images. 62 | cosine_scores = F.softmax(cosine_similarities, dim=2) 63 | 64 | # Accumulate cosine_scores accross images of the same novel category and 65 | # compute the final classification scores for all the novel categories. 66 | scores_cls = torch.bmm(cosine_scores, labels_train) 67 | scores_cls = torch.log(torch.clamp(scores_cls, min=1e-7)) 68 | 69 | return scores_cls 70 | 71 | def create_model(opt): 72 | return MatchingNetworkHead(opt) 73 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/architectures/classifiers/prototypical_network_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | def L2SquareDist(A, B, average=True): 7 | # input A must be: [nB x Na x nC] 8 | # input B must be: [nB x Nb x nC] 9 | # output C will be: [nB x Na x Nb] 10 | assert A.dim()==3 11 | assert B.dim()==3 12 | assert A.size(0)==B.size(0) and A.size(2)==B.size(2) 13 | nB = A.size(0) 14 | Na = A.size(1) 15 | Nb = B.size(1) 16 | nC = A.size(2) 17 | 18 | # AB = A * B = [nB x Na x nC] * [nB x nC x Nb] = [nB x Na x Nb] 19 | AB = torch.bmm(A, B.transpose(1,2)) 20 | 21 | AA = (A * A).sum(dim=2,keepdim=True).view(nB, Na, 1) # [nB x Na x 1] 22 | BB = (B * B).sum(dim=2,keepdim=True).view(nB, 1, Nb) # [nB x 1 x Nb] 23 | # l2squaredist = A*A + B*B - 2 * A * B 24 | dist = AA.expand_as(AB) + BB.expand_as(AB) - 2 * AB 25 | if average: 26 | dist = dist / nC 27 | 28 | return dist 29 | 30 | 31 | def cosine_similarity(A, B): 32 | # input A must be: [nB x Na x nC] 33 | # input B must be: [nB x Nb x nC] 34 | # output C will be: [nB x Na x Nb] 35 | return torch.bmm(A, B.transpose(1,2)) 36 | 37 | 38 | class PrototypicalNetworkHead(nn.Module): 39 | def __init__(self, opt): 40 | super(PrototypicalNetworkHead, self).__init__() 41 | scale_cls = opt['scale_cls'] if ('scale_cls' in opt) else 1.0 42 | tune_scale = opt['tune_scale'] if ('tune_scale' in opt) else True 43 | self.type = opt['type'] if ('type' in opt) else 'euclidean' 44 | assert self.type in ('euclidean', 'cosine') 45 | self.scale_cls = nn.Parameter( 46 | torch.FloatTensor(1).fill_(scale_cls), requires_grad=tune_scale) 47 | 48 | def forward(self, features_test, features_train, labels_train): 49 | """Recognize novel categories based on the Prototypical Nets approach. 50 | 51 | Classify the test examples (i.e., `features_test`) using the available 52 | training examples (i.e., `features_test` and `labels_train`) using the 53 | Prototypical Nets approach. 54 | 55 | Args: 56 | features_test: A 3D tensor with shape 57 | [batch_size x num_test_examples x num_channels] that represents 58 | the test features of each training episode in the batch. 59 | features_train: A 3D tensor with shape 60 | [batch_size x num_train_examples x num_channels] that represents 61 | the train features of each training episode in the batch. 62 | labels_train: A 3D tensor with shape 63 | [batch_size x num_train_examples x nKnovel] that represents 64 | the train labels (encoded as 1-hot vectors) of each training 65 | episode in the batch. 66 | 67 | Return: 68 | scores_cls: A 3D tensor with shape 69 | [batch_size x num_test_examples x nKnovel] that represents the 70 | classification scores of the test feature vectors for the 71 | nKnovel novel categories. 72 | """ 73 | assert features_train.dim() == 3 74 | assert labels_train.dim() == 3 75 | assert features_test.dim() == 3 76 | assert features_train.size(0) == labels_train.size(0) 77 | assert features_train.size(0) == features_test.size(0) 78 | assert features_train.size(1) == labels_train.size(1) 79 | assert features_train.size(2) == features_test.size(2) 80 | 81 | #************************* Compute Prototypes ************************** 82 | labels_train_transposed = labels_train.transpose(1,2) 83 | # Batch matrix multiplication: 84 | # prototypes = labels_train_transposed * features_train ==> 85 | # [batch_size x nKnovel x num_channels] = 86 | # [batch_size x nKnovel x num_train_examples] * [batch_size * num_train_examples * num_channels] 87 | if self.type == 'cosine': 88 | features_test = F.normalize( 89 | features_test, p=2, dim=features_test.dim()-1, eps=1e-12) 90 | features_train = F.normalize( 91 | features_train, p=2, dim=features_train.dim()-1, eps=1e-12) 92 | 93 | prototypes = torch.bmm(labels_train_transposed, features_train) 94 | # Divide with the number of examples per novel category. 95 | prototypes = prototypes.div( 96 | labels_train_transposed.sum(dim=2, keepdim=True).expand_as( 97 | prototypes)) 98 | #*********************************************************************** 99 | if self.type == 'cosine': 100 | prototypes = F.normalize( 101 | prototypes, p=2, dim=prototypes.dim()-1, eps=1e-12) 102 | scores_cls = self.scale_cls * cosine_similarity( 103 | features_test, prototypes) 104 | else: 105 | scores_cls = -self.scale_cls * L2SquareDist( 106 | features_test, prototypes) 107 | 108 | return scores_cls 109 | 110 | 111 | def create_model(opt): 112 | return PrototypicalNetworkHead(opt) 113 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/architectures/classifiers/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | import low_shot_learning.architectures.tools as tools 6 | 7 | 8 | class CosineClassifier(nn.Module): 9 | def __init__( 10 | self, 11 | num_channels, 12 | num_classes, 13 | scale=20.0, 14 | learn_scale=False, 15 | bias=False): 16 | super(CosineClassifier, self).__init__() 17 | 18 | self.num_channels = num_channels 19 | self.num_classes = num_classes 20 | 21 | weight = torch.FloatTensor(num_classes, num_channels).normal_( 22 | 0.0, np.sqrt(2.0/num_channels)) 23 | self.weight = nn.Parameter(weight, requires_grad=True) 24 | 25 | if bias: 26 | bias = torch.FloatTensor(num_classes).fill_(0.0) 27 | self.bias = nn.Parameter(bias, requires_grad=True) 28 | else: 29 | self.bias = None 30 | 31 | scale_cls = torch.FloatTensor(1).fill_(scale) 32 | self.scale_cls = nn.Parameter(scale_cls, requires_grad=learn_scale) 33 | 34 | def forward(self, x_in): 35 | assert x_in.dim() == 2 36 | return tools.cosine_fully_connected_layer( 37 | x_in, self.weight.t(), scale=self.scale_cls, bias=self.bias) 38 | 39 | def extra_repr(self): 40 | s = ( 41 | 'num_channels={0}, num_classes={1}, scale_cls={2} (learnable={3})' 42 | .format(self.num_channels, self.num_classes, self.scale_cls.item(), 43 | self.scale_cls.requires_grad)) 44 | 45 | if self.bias is None: 46 | s += ', bias=False' 47 | return s 48 | 49 | 50 | def average_train_features(features_train, labels_train): 51 | labels_train_transposed = labels_train.transpose(1,2) 52 | weight_novel = torch.bmm(labels_train_transposed, features_train) 53 | weight_novel = weight_novel.div( 54 | labels_train_transposed.sum(dim=2, keepdim=True).expand_as( 55 | weight_novel)) 56 | 57 | return weight_novel 58 | 59 | 60 | class FeatExemplarAvgBlock(nn.Module): 61 | def __init__(self): 62 | super(FeatExemplarAvgBlock, self).__init__() 63 | 64 | def forward(self, features_train, labels_train): 65 | return average_train_features(features_train, labels_train) 66 | 67 | 68 | def preprocess_5D_features(features, global_pooling): 69 | meta_batch_size, num_examples, channels, height, width = features.size() 70 | features = features.view( 71 | meta_batch_size * num_examples, channels, height, width) 72 | 73 | if global_pooling: 74 | features = tools.global_pooling(features, pool_type='avg') 75 | 76 | features = features.view(meta_batch_size, num_examples, -1) 77 | 78 | return features 79 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/architectures/feature_extractors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/baselines/wDAEGNN/low_shot_learning/architectures/feature_extractors/__init__.py -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/architectures/feature_extractors/dumb_feat.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class DumbFeat(nn.Module): 6 | def __init__(self, dropout): 7 | super(DumbFeat, self).__init__() 8 | 9 | if dropout > 0.0: 10 | self.dropout = torch.nn.Dropout(p=dropout, inplace=False) 11 | else: 12 | self.dropout = None 13 | 14 | def forward(self, x): 15 | 16 | if x.dim() > 2: 17 | x = x.view(x.size(0), -1) 18 | assert(x.dim()==2) 19 | 20 | if self.dropout is not None: 21 | x = self.dropout(x) 22 | 23 | return x 24 | 25 | 26 | def create_model(opt): 27 | dropout = opt['dropout'] if ('dropout' in opt) else 0.0 28 | return DumbFeat(dropout=dropout) 29 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/architectures/feature_extractors/resnet_feat.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import math 4 | 5 | 6 | def init_layer(L): 7 | # Initialization using fan-in 8 | if isinstance(L, nn.Conv2d): 9 | n = L.kernel_size[0]*L.kernel_size[1]*L.out_channels 10 | L.weight.data.normal_(0,math.sqrt(2.0/float(n))) 11 | elif isinstance(L, nn.BatchNorm2d): 12 | L.weight.data.fill_(1) 13 | L.bias.data.fill_(0) 14 | 15 | 16 | # Simple ResNet Block 17 | class SimpleBlock(nn.Module): 18 | def __init__(self, indim, outdim, half_res, userelu=True): 19 | super(SimpleBlock, self).__init__() 20 | self.indim = indim 21 | self.outdim = outdim 22 | self.C1 = nn.Conv2d(indim, outdim, kernel_size=3, stride=2 if half_res else 1, padding=1, bias=False) 23 | self.relu1 = nn.ReLU(inplace=True) 24 | self.userelu = userelu 25 | self.relu2 = nn.ReLU(inplace=True) if userelu else None 26 | self.BN1 = nn.BatchNorm2d(outdim) 27 | self.C2 = nn.Conv2d(outdim, outdim,kernel_size=3, padding=1,bias=False) 28 | self.BN2 = nn.BatchNorm2d(outdim) 29 | 30 | self.parametrized_layers = [self.C1, self.C2, self.BN1, self.BN2] 31 | 32 | self.half_res = half_res 33 | 34 | # if the input number of channels is not equal to the output, then need a 1x1 convolution 35 | if indim!=outdim: 36 | self.shortcut = nn.Conv2d(indim, outdim, 1, 2 if half_res else 1, bias=False) 37 | self.parametrized_layers.append(self.shortcut) 38 | self.BNshortcut = nn.BatchNorm2d(outdim) 39 | self.parametrized_layers.append(self.BNshortcut) 40 | self.shortcut_type = '1x1' 41 | else: 42 | self.shortcut_type = 'identity' 43 | 44 | for layer in self.parametrized_layers: 45 | init_layer(layer) 46 | 47 | def forward(self, x): 48 | out = self.C1(x) 49 | out = self.BN1(out) 50 | out = self.relu1(out) 51 | out = self.C2(out) 52 | out = self.BN2(out) 53 | short_out = x if self.shortcut_type == 'identity' else self.BNshortcut(self.shortcut(x)) 54 | out = out + short_out 55 | if self.userelu: out = self.relu2(out) 56 | return out 57 | 58 | 59 | 60 | # Bottleneck block 61 | class BottleneckBlock(nn.Module): 62 | def __init__(self, indim, outdim, half_res, userelu=True): 63 | super(BottleneckBlock, self).__init__() 64 | bottleneckdim = int(outdim/4) 65 | self.indim = indim 66 | self.outdim = outdim 67 | self.C1 = nn.Conv2d(indim, bottleneckdim, kernel_size=1, bias=False) 68 | self.relu = nn.ReLU() 69 | self.BN1 = nn.BatchNorm2d(bottleneckdim) 70 | self.C2 = nn.Conv2d(bottleneckdim, bottleneckdim, kernel_size=3, stride=2 if half_res else 1,padding=1) 71 | self.BN2 = nn.BatchNorm2d(bottleneckdim) 72 | self.C3 = nn.Conv2d(bottleneckdim, outdim, kernel_size=1, bias=False) 73 | self.BN3 = nn.BatchNorm2d(outdim) 74 | 75 | self.parametrized_layers = [self.C1, self.BN1, self.C2, self.BN2, self.C3, self.BN3] 76 | self.half_res = half_res 77 | 78 | self.userelu = userelu 79 | 80 | # if the input number of channels is not equal to the output, then need a 1x1 convolution 81 | if indim!=outdim: 82 | self.shortcut = nn.Conv2d(indim, outdim, 1, stride=2 if half_res else 1, bias=False) 83 | self.parametrized_layers.append(self.shortcut) 84 | self.shortcut_type = '1x1' 85 | else: 86 | self.shortcut_type = 'identity' 87 | 88 | for layer in self.parametrized_layers: 89 | init_layer(layer) 90 | 91 | 92 | def forward(self, x): 93 | short_out = x if self.shortcut_type == 'identity' else self.shortcut(x) 94 | out = self.C1(x) 95 | out = self.BN1(out) 96 | out = self.relu(out) 97 | out = self.C2(out) 98 | out = self.BN2(out) 99 | out = self.relu(out) 100 | out = self.C3(out) 101 | out = self.BN3(out) 102 | out = out + short_out 103 | 104 | if self.userelu: out = self.relu(out) 105 | return out 106 | 107 | 108 | class ResNet(nn.Module): 109 | def __init__(self,block,list_of_num_layers, list_of_out_dims, userelu=True): 110 | # list_of_num_layers specifies number of layers in each stage 111 | # list_of_out_dims specifies number of output channel for each stage 112 | super(ResNet,self).__init__() 113 | self.grads = [] 114 | self.fmaps = [] 115 | assert len(list_of_num_layers)==4, 'Can have only four stages' 116 | conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 117 | bias=False) 118 | bn1 = nn.BatchNorm2d(64) 119 | relu = nn.ReLU() 120 | pool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 121 | 122 | init_layer(conv1) 123 | init_layer(bn1) 124 | 125 | 126 | trunk = [conv1, bn1, relu, pool1] 127 | indim = 64 128 | for i in range(4): 129 | 130 | for j in range(list_of_num_layers[i]): 131 | half_res = (i>=1) and (j==0) 132 | is_last_layer = (i==3) and (j==list_of_num_layers[i]-1) 133 | userelu_here = userelu if is_last_layer else True 134 | B = block(indim, list_of_out_dims[i], half_res, userelu=userelu_here) 135 | trunk.append(B) 136 | indim = list_of_out_dims[i] 137 | trunk.append(nn.AvgPool2d(7)) 138 | self.trunk = nn.Sequential(*trunk) 139 | 140 | self.final_feat_dim = indim 141 | 142 | def forward(self,x): 143 | out = self.trunk(x) 144 | out = out.view(out.size(0),-1) 145 | return out 146 | 147 | 148 | def create_model(opt): 149 | restype = opt['restype'] 150 | assert(restype=='ResNet10') 151 | userelu = opt['userelu'] 152 | return ResNet(SimpleBlock, [1,1,1,1], [64,128,256,512], userelu=userelu) 153 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/architectures/feature_extractors/utils.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class SequentialFeatureExtractorAbstractClass(nn.Module): 5 | def __init__(self, all_feat_names, feature_blocks): 6 | super(SequentialFeatureExtractorAbstractClass, self).__init__() 7 | 8 | assert(isinstance(feature_blocks, list)) 9 | assert(isinstance(all_feat_names, list)) 10 | assert(len(all_feat_names) == len(feature_blocks)) 11 | 12 | self.all_feat_names = all_feat_names 13 | self._feature_blocks = nn.ModuleList(feature_blocks) 14 | 15 | 16 | def _parse_out_keys_arg(self, out_feat_keys): 17 | # By default return the features of the last layer / module. 18 | out_feat_keys = ( 19 | [self.all_feat_names[-1],] if out_feat_keys is None else 20 | out_feat_keys) 21 | 22 | if len(out_feat_keys) == 0: 23 | raise ValueError('Empty list of output feature keys.') 24 | 25 | for f, key in enumerate(out_feat_keys): 26 | if key not in self.all_feat_names: 27 | raise ValueError( 28 | 'Feature with name {0} does not exist. ' 29 | 'Existing features: {1}.'.format(key, self.all_feat_names)) 30 | elif key in out_feat_keys[:f]: 31 | raise ValueError( 32 | 'Duplicate output feature key: {0}.'.format(key)) 33 | 34 | # Find the highest output feature in `out_feat_keys 35 | max_out_feat = max( 36 | [self.all_feat_names.index(key) for key in out_feat_keys]) 37 | 38 | return out_feat_keys, max_out_feat 39 | 40 | def forward(self, x, out_feat_keys=None): 41 | """Forward the image `x` through the network and output the asked features. 42 | Args: 43 | x: input image. 44 | out_feat_keys: a list/tuple with the feature names of the features 45 | that the function should return. If out_feat_keys is None ( 46 | DEFAULT) then the last feature of the network is returned. 47 | 48 | Return: 49 | out_feats: If multiple output features were asked then `out_feats` 50 | is a list with the asked output features placed in the same 51 | order as in `out_feat_keys`. If a single output feature was 52 | asked then `out_feats` is that output feature (and not a list). 53 | """ 54 | out_feat_keys, max_out_feat = self._parse_out_keys_arg(out_feat_keys) 55 | out_feats = [None] * len(out_feat_keys) 56 | 57 | feat = x 58 | for f in range(max_out_feat+1): 59 | feat = self._feature_blocks[f](feat) 60 | key = self.all_feat_names[f] 61 | if key in out_feat_keys: 62 | out_feats[out_feat_keys.index(key)] = feat 63 | 64 | out_feats = (out_feats[0] if len(out_feats) == 1 else out_feats) 65 | 66 | return out_feats 67 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/architectures/feature_extractors/wide_resnet.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | 5 | import low_shot_learning.architectures.feature_extractors.utils as utils 6 | import low_shot_learning.architectures.tools as tools 7 | 8 | 9 | class BasicBlock(nn.Module): 10 | def __init__(self, in_planes, out_planes, stride, dropRate=0.0): 11 | super(BasicBlock, self).__init__() 12 | 13 | self.equalInOut = (in_planes == out_planes and stride == 1) 14 | 15 | self.convResidual = nn.Sequential() 16 | 17 | if self.equalInOut: 18 | self.convResidual.add_module('bn1', nn.BatchNorm2d(in_planes)) 19 | self.convResidual.add_module('relu1', nn.ReLU(inplace=True)) 20 | self.convResidual.add_module( 21 | 'conv1', 22 | nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 23 | padding=1, bias=False)) 24 | 25 | self.convResidual.add_module('bn2', nn.BatchNorm2d(out_planes)) 26 | self.convResidual.add_module('relu2', nn.ReLU(inplace=True)) 27 | self.convResidual.add_module( 28 | 'conv2', 29 | nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1, 30 | padding=1, bias=False)) 31 | 32 | if dropRate > 0: 33 | self.convResidual.add_module('dropout', nn.Dropout(p=dropRate)) 34 | 35 | if self.equalInOut: 36 | self.convShortcut = nn.Sequential() 37 | else: 38 | self.convShortcut = nn.Conv2d( 39 | in_planes, out_planes, kernel_size=1, stride=stride, 40 | padding=0, bias=False) 41 | 42 | def forward(self, x): 43 | return self.convShortcut(x) + self.convResidual(x) 44 | 45 | 46 | class NetworkBlock(nn.Module): 47 | def __init__( 48 | self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0): 49 | super(NetworkBlock, self).__init__() 50 | 51 | self.layer = self._make_layer( 52 | block, in_planes, out_planes, nb_layers, stride, dropRate) 53 | 54 | def _make_layer( 55 | self, block, in_planes, out_planes, nb_layers, stride, dropRate): 56 | 57 | layers = [] 58 | for i in range(nb_layers): 59 | in_planes_arg = i == 0 and in_planes or out_planes 60 | stride_arg = i == 0 and stride or 1 61 | layers.append( 62 | block(in_planes_arg, out_planes, stride_arg, dropRate)) 63 | 64 | return nn.Sequential(*layers) 65 | 66 | def forward(self, x): 67 | return self.layer(x) 68 | 69 | 70 | class WideResnet(utils.SequentialFeatureExtractorAbstractClass): 71 | def __init__( 72 | self, 73 | depth, 74 | widen_factor=1, 75 | dropRate=0.0, 76 | pool='avg', 77 | extra_block=False, 78 | block_strides=[2, 2, 2, 2]): 79 | nChannels = [16, 16*widen_factor, 32*widen_factor, 64*widen_factor] 80 | assert((depth - 4) % 6 == 0) 81 | n = int((depth - 4) / 6) 82 | block = BasicBlock 83 | 84 | all_feat_names = [] 85 | feature_blocks = [] 86 | 87 | # 1st conv before any network block 88 | conv1 = nn.Sequential() 89 | conv1.add_module( 90 | 'Conv', 91 | nn.Conv2d(3, nChannels[0], kernel_size=3, padding=1, bias=False)) 92 | conv1.add_module('BN', nn.BatchNorm2d(nChannels[0])) 93 | conv1.add_module('ReLU', nn.ReLU(inplace=True)) 94 | feature_blocks.append(conv1) 95 | all_feat_names.append('conv1') 96 | 97 | # 1st block. 98 | block1 = nn.Sequential() 99 | block1.add_module( 100 | 'Block', 101 | NetworkBlock( 102 | n, nChannels[0], nChannels[1], block, 103 | block_strides[0], dropRate)) 104 | block1.add_module('BN', nn.BatchNorm2d(nChannels[1])) 105 | block1.add_module('ReLU', nn.ReLU(inplace=True)) 106 | feature_blocks.append(block1) 107 | all_feat_names.append('block1') 108 | 109 | # 2nd block. 110 | block2 = nn.Sequential() 111 | block2.add_module( 112 | 'Block', 113 | NetworkBlock( 114 | n, nChannels[1], nChannels[2], block, 115 | block_strides[1], dropRate)) 116 | block2.add_module('BN', nn.BatchNorm2d(nChannels[2])) 117 | block2.add_module('ReLU', nn.ReLU(inplace=True)) 118 | feature_blocks.append(block2) 119 | all_feat_names.append('block2') 120 | 121 | # 3rd block. 122 | block3 = nn.Sequential() 123 | block3.add_module( 124 | 'Block', 125 | NetworkBlock( 126 | n, nChannels[2], nChannels[3], block, 127 | block_strides[2], dropRate)) 128 | block3.add_module('BN', nn.BatchNorm2d(nChannels[3])) 129 | block3.add_module('ReLU', nn.ReLU(inplace=True)) 130 | feature_blocks.append(block3) 131 | all_feat_names.append('block3') 132 | 133 | # extra block. 134 | if extra_block: 135 | block4 = nn.Sequential() 136 | block4.add_module( 137 | 'Block', 138 | NetworkBlock( 139 | n, nChannels[3], nChannels[3], block, 140 | block_strides[3], dropRate)) 141 | block4.add_module('BN', nn.BatchNorm2d(nChannels[3])) 142 | block4.add_module('ReLU', nn.ReLU(inplace=True)) 143 | feature_blocks.append(block4) 144 | all_feat_names.append('block4') 145 | 146 | # global average pooling and classifier_type 147 | assert(pool == 'none' or pool == 'avg' or pool == 'max') 148 | if pool == 'max' or pool == 'avg': 149 | feature_blocks.append(tools.GlobalPooling(pool_type=pool)) 150 | all_feat_names.append('GlobalPooling') 151 | 152 | super(WideResnet, self).__init__(all_feat_names, feature_blocks) 153 | 154 | for m in self.modules(): 155 | if isinstance(m, nn.Conv2d): 156 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 157 | m.weight.data.normal_(0, math.sqrt(2. / n)) 158 | elif isinstance(m, nn.BatchNorm2d): 159 | m.weight.data.fill_(1) 160 | m.bias.data.zero_() 161 | 162 | 163 | def create_model(opt): 164 | depth = opt['depth'] 165 | widen_factor = opt['widen_Factor'] 166 | dropRate = opt['dropRate'] if ('dropRate' in opt) else 0.0 167 | pool = opt['pool'] if ('pool' in opt) else 'avg' 168 | extra_block = opt['extra_block'] if ('extra_block' in opt) else False 169 | block_strides = opt['strides'] if ('strides' in opt) else None 170 | 171 | if block_strides is None: 172 | block_strides = [2] * 4 173 | 174 | return WideResnet( 175 | depth, widen_factor, dropRate, pool, extra_block, block_strides) 176 | 177 | 178 | if __name__ == '__main__': 179 | opt = {} 180 | opt['depth'] = 28 181 | opt['widen_Factor'] = 10 182 | opt['dropRate'] = 0.0 183 | opt['extra_block'] = False 184 | opt['pool'] = 'none' 185 | model = create_model(opt) 186 | print(model) 187 | 188 | batch_size = 1 189 | image_size = 80 190 | img = torch.FloatTensor(batch_size, 3, image_size, image_size).normal_() 191 | features = model(img, model.all_feat_names) 192 | for feature, feature_name in zip(features, model.all_feat_names): 193 | print('Feature {0}: size {1}, mean {2}, std {3}'.format( 194 | feature_name, feature.size(), feature.mean().item(), 195 | feature.std().item())) 196 | 197 | count = 0 198 | for parameter in model.parameters(): 199 | if parameter.requires_grad: 200 | count += parameter.numel() 201 | 202 | print(count) 203 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/architectures/tools.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | 7 | class LinearDiag(nn.Module): 8 | def __init__(self, num_features, bias=False): 9 | super(LinearDiag, self).__init__() 10 | # initialize to the identity transform 11 | weight = torch.FloatTensor(num_features).fill_(1) 12 | self.weight = nn.Parameter(weight, requires_grad=True) 13 | 14 | if bias: 15 | bias = torch.FloatTensor(num_features).fill_(0) 16 | self.bias = nn.Parameter(bias, requires_grad=True) 17 | else: 18 | self.register_parameter('bias', None) 19 | 20 | def forward(self, X): 21 | assert(X.dim()==2 and X.size(1)==self.weight.size(0)) 22 | out = X * self.weight.expand_as(X) 23 | if self.bias is not None: 24 | out = out + self.bias.expand_as(out) 25 | return out 26 | 27 | 28 | def cosine_fully_connected_layer(x_in, weight, scale=None, bias=None): 29 | assert(x_in.dim() == 2) 30 | assert(weight.dim() == 2) 31 | assert(x_in.size(1) == weight.size(0)) 32 | 33 | x_in = F.normalize(x_in, p=2, dim=1, eps=1e-12) 34 | weight = F.normalize(weight, p=2, dim=0, eps=1e-12) 35 | 36 | x_out = torch.mm(x_in, weight) 37 | 38 | if scale is not None: 39 | x_out = x_out * scale.view(1, -1) 40 | 41 | if bias is not None: 42 | x_out = x_out + bias.view(1, -1) 43 | 44 | return x_out 45 | 46 | 47 | def batch_cosine_fully_connected_layer(x_in, weight, scale=None, bias=None): 48 | """ 49 | Args: 50 | x_in: a 3D tensor with shape 51 | [meta_batch_size x num_examples x num_features_in] 52 | weight: a 3D tensor with shape 53 | [meta_batch_size x num_features_in x num_features_out] 54 | scale: (optional) a scalar value 55 | bias: (optional) a 1D tensor with shape [num_features_out] 56 | 57 | Returns: 58 | x_out: a 3D tensor with shape 59 | [meta_batch_size x num_examples x num_features_out] 60 | """ 61 | 62 | assert(x_in.dim() == 3) 63 | assert(weight.dim() == 3) 64 | assert(x_in.size(0) == weight.size(0)) 65 | assert(x_in.size(2) == weight.size(1)) 66 | 67 | x_in = F.normalize(x_in, p=2, dim=2, eps=1e-12) 68 | weight = F.normalize(weight, p=2, dim=1, eps=1e-12) 69 | 70 | x_out = torch.bmm(x_in, weight) 71 | 72 | if scale is not None: 73 | x_out = x_out * scale 74 | 75 | if bias is not None: 76 | x_out = x_out + bias 77 | 78 | return x_out 79 | 80 | 81 | class CosineFullyConnectedLayer(nn.Module): 82 | def __init__( 83 | self, 84 | num_inputs, 85 | num_outputs, 86 | scale=20.0, 87 | per_plane=False, 88 | learn_scale=True, 89 | bias=False): 90 | super(CosineFullyConnectedLayer, self).__init__() 91 | 92 | self.num_inputs = num_inputs 93 | self.num_outputs = num_outputs 94 | self.learn_scale = learn_scale 95 | self.per_plane = per_plane 96 | 97 | weight = torch.FloatTensor(num_inputs, num_outputs).normal_( 98 | 0.0, np.sqrt(2.0/num_inputs)) 99 | self.weight = nn.Parameter(weight, requires_grad=True) 100 | 101 | if bias: 102 | bias = torch.FloatTensor(num_outputs).fill_(0.0) 103 | self.bias = nn.Parameter(bias, requires_grad=True) 104 | else: 105 | self.bias = None 106 | 107 | if scale: 108 | num_scale_values = num_outputs if per_plane else 1 109 | scale = torch.FloatTensor(num_scale_values).fill_(scale) 110 | self.scale = nn.Parameter(scale, requires_grad=learn_scale) 111 | else: 112 | self.scale = None 113 | 114 | def forward(self, x_in): 115 | assert(x_in.dim() == 2) 116 | return cosine_fully_connected_layer( 117 | x_in, self.weight, scale=self.scale, bias=self.bias) 118 | 119 | def extra_repr(self): 120 | s = 'num_inputs={0}, num_classes={1}'.format( 121 | self.num_inputs, self.num_outputs) 122 | 123 | if self.scale is not None: 124 | if self.per_plane: 125 | s += 'num_scales={0} (learnable={1})'.format( 126 | self.num_outputs, self.learn_scale) 127 | else: 128 | s += 'num_scales={0} (value={1} learnable={2})'.format( 129 | 1, self.scale.item(), self.learn_scale) 130 | 131 | if self.bias is None: 132 | s += ', bias=False' 133 | 134 | return s 135 | 136 | 137 | def global_pooling(x, pool_type): 138 | assert(x.dim() == 4) 139 | if pool_type == 'max': 140 | return F.max_pool2d(x, (x.size(2), x.size(3))) 141 | elif pool_type == 'avg': 142 | return F.avg_pool2d(x, (x.size(2), x.size(3))) 143 | else: 144 | raise ValueError('Unknown pooling type.') 145 | 146 | 147 | class GlobalPooling(nn.Module): 148 | def __init__(self, pool_type): 149 | super(GlobalPooling, self).__init__() 150 | assert(pool_type == 'avg' or pool_type == 'max') 151 | self.pool_type = pool_type 152 | 153 | def forward(self, x): 154 | return global_pooling(x, pool_type=self.pool_type) 155 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/dataloaders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/baselines/wDAEGNN/low_shot_learning/dataloaders/__init__.py -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/dataloaders/basic_dataloaders.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import numpy as np 4 | import torchnet as tnt 5 | 6 | 7 | def generate_element_list(list_size, dataset_size): 8 | if list_size == dataset_size: 9 | return list(range(dataset_size)) 10 | elif list_size < dataset_size: 11 | return np.random.choice( 12 | dataset_size, list_size, replace=False).tolist() 13 | else: # list_size > list_size 14 | num_times = list_size // dataset_size 15 | residual = list_size % dataset_size 16 | assert((num_times * dataset_size + residual) == list_size) 17 | elem_list = list(range(dataset_size)) * num_times 18 | if residual: 19 | elem_list += np.random.choice( 20 | dataset_size, residual, replace=False).tolist() 21 | 22 | return elem_list 23 | 24 | 25 | class SimpleDataloader: 26 | def __init__( 27 | self, dataset, batch_size, train, num_workers=4, epoch_size=None): 28 | self.dataset = dataset 29 | self.batch_size = batch_size 30 | self.num_workers = num_workers 31 | self.dataset_size = len(dataset) 32 | self.epoch_size = epoch_size if epoch_size else self.dataset_size 33 | self.train = train 34 | 35 | def get_iterator(self, epoch=0): 36 | def load_fun_(idx): 37 | #idx0 = idx 38 | img, label = self.dataset[idx % len(self.dataset)] 39 | #print('idx0={0}, len(d)={1}, idx={2}, label={3}'.format( 40 | # idx0, len(self.dataset), idx % len(self.dataset), label)) 41 | return img, label 42 | 43 | elem_list = generate_element_list(self.epoch_size, self.dataset_size) 44 | 45 | tnt_dataset = tnt.dataset.ListDataset( 46 | elem_list=elem_list, load=load_fun_) 47 | 48 | data_loader = tnt_dataset.parallel( 49 | batch_size=self.batch_size, 50 | num_workers=self.num_workers, 51 | shuffle=self.train, 52 | drop_last=self.train) 53 | 54 | return data_loader 55 | 56 | def __call__(self, epoch=0): 57 | return self.get_iterator() 58 | 59 | def __len__(self): 60 | return self.epoch_size // self.batch_size 61 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from low_shot_learning.datasets.imagenet_dataset import ImageNet 2 | from low_shot_learning.datasets.imagenet_dataset import ImageNetLowShot 3 | from low_shot_learning.datasets.imagenet_dataset import ImageNetFeatures 4 | from low_shot_learning.datasets.imagenet_dataset import ImageNetLowShotFeatures 5 | from low_shot_learning.datasets.mini_imagenet_dataset import MiniImageNet 6 | from low_shot_learning.datasets.mini_imagenet_dataset import MiniImageNet80x80 7 | from low_shot_learning.datasets.mini_imagenet_dataset import MiniImageNetFeatures 8 | 9 | 10 | def dataset_factory(dataset_name, *args, **kwargs): 11 | datasets_collection = {} 12 | datasets_collection['MiniImageNet'] = MiniImageNet 13 | datasets_collection['MiniImageNet80x80'] = MiniImageNet80x80 14 | datasets_collection['MiniImageNetFeatures'] = MiniImageNetFeatures 15 | datasets_collection['ImageNet'] = ImageNet 16 | datasets_collection['ImageNetLowShot'] = ImageNetLowShot 17 | datasets_collection['ImageNetFeatures'] = ImageNetFeatures 18 | datasets_collection['ImageNetLowShotFeatures'] = ImageNetLowShotFeatures 19 | 20 | return datasets_collection[dataset_name](*args, **kwargs) 21 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/low_shot_learning/datasets/imagenet_dataset.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import json 4 | import os 5 | import os.path 6 | 7 | import h5py 8 | import numpy as np 9 | import random 10 | import torch 11 | import torch.utils.data as data 12 | import torchvision.datasets as datasets 13 | import torchvision.transforms as transforms 14 | from PIL import Image 15 | from PIL import ImageEnhance 16 | 17 | import low_shot_learning.utils as utils 18 | 19 | # Set the appropriate paths of the datasets here. 20 | _IMAGENET_DATASET_DIR = '/datasets_local/ImageNet/' 21 | _IMAGENET_LOWSHOT_BENCHMARK_CATEGORY_SPLITS_PATH = './data/IMAGENET_LOWSHOT_BENCHMARK_CATEGORY_SPLITS.json' 22 | _MEAN_PIXEL = [0.485, 0.456, 0.406] 23 | _STD_PIXEL = [0.229, 0.224, 0.225] 24 | 25 | 26 | class ImageJitter: 27 | def __init__(self, transformdict): 28 | transformtypedict=dict( 29 | Brightness=ImageEnhance.Brightness, Contrast=ImageEnhance.Contrast, 30 | Sharpness=ImageEnhance.Sharpness, Color=ImageEnhance.Color 31 | ) 32 | self.transforms = [ 33 | (transformtypedict[k], transformdict[k]) for k in transformdict] 34 | 35 | def __call__(self, img): 36 | out = img 37 | randtensor = torch.rand(len(self.transforms)) 38 | 39 | for i, (transformer, alpha) in enumerate(self.transforms): 40 | r = alpha*(randtensor[i]*2.0 -1.0) + 1 41 | out = transformer(out).enhance(r).convert('RGB') 42 | 43 | return out 44 | 45 | 46 | class ImageNet(data.Dataset): 47 | def __init__( 48 | self, 49 | split='train', 50 | use_geometric_aug=True, 51 | use_simple_geometric_aug=False, 52 | use_color_aug=True): 53 | # use_geometric_aug: If True geometric augmentations are used for the 54 | # images of the training split. 55 | # use_color_aug: if True color augmentations are used for the images 56 | # of the test/val split. 57 | 58 | self.split = split 59 | assert split in ('train', 'val') 60 | self.name = 'ImageNet_Split_' + split 61 | 62 | data_dir = _IMAGENET_DATASET_DIR 63 | print('==> Loading ImageNet dataset - split {0}'.format(split)) 64 | print('==> ImageNet directory: {0}'.format(data_dir)) 65 | 66 | transform_train = [] 67 | assert not (use_simple_geometric_aug and use_geometric_aug) 68 | if use_geometric_aug: 69 | transform_train.append(transforms.RandomResizedCrop(224)) 70 | transform_train.append(transforms.RandomHorizontalFlip()) 71 | elif use_simple_geometric_aug: 72 | transform_train.append(transforms.Resize(256)) 73 | transform_train.append(transforms.RandomCrop(224)) 74 | transform_train.append(transforms.RandomHorizontalFlip()) 75 | else: 76 | transform_train.append(transforms.Resize(256)) 77 | transform_train.append(transforms.CenterCrop(224)) 78 | 79 | if use_color_aug: 80 | jitter_params = {'Brightness': 0.4, 'Contrast': 0.4, 'Color': 0.4} 81 | transform_train.append(ImageJitter(jitter_params)) 82 | 83 | transform_train.append(lambda x: np.asarray(x)) 84 | transform_train.append(transforms.ToTensor()) 85 | transform_train.append( 86 | transforms.Normalize(mean=_MEAN_PIXEL, std=_STD_PIXEL)) 87 | 88 | self.trainsform_train = transform_train 89 | 90 | transform_train = transforms.Compose(transform_train) 91 | 92 | transform_test = transforms.Compose([ 93 | transforms.Resize(256), 94 | transforms.CenterCrop(224), 95 | lambda x: np.asarray(x), 96 | transforms.ToTensor(), 97 | transforms.Normalize(mean=_MEAN_PIXEL, std=_STD_PIXEL), 98 | ]) 99 | 100 | self.transform = transform_train if split=='train' else transform_test 101 | print('==> transform: {0}'.format(self.transform)) 102 | train_dir = os.path.join(data_dir, 'train') 103 | val_dir = os.path.join(data_dir, 'val') 104 | split_dir = train_dir if split=='train' else val_dir 105 | self.data = datasets.ImageFolder(split_dir, self.transform) 106 | self.labels = [item[1] for item in self.data.imgs] 107 | 108 | #@profile 109 | def __getitem__(self, index): 110 | img, label = self.data[index] 111 | return img, label 112 | 113 | def __len__(self): 114 | return len(self.data) 115 | 116 | 117 | class ImageNetLowShot(ImageNet): 118 | def __init__( 119 | self, 120 | phase='train', 121 | split='train', 122 | do_not_use_random_transf=False): 123 | 124 | assert phase in ('train', 'test', 'val') 125 | assert split in ('train', 'val') 126 | 127 | use_aug = (phase=='train') and (do_not_use_random_transf==False) 128 | 129 | ImageNet.__init__( 130 | self, split=split, use_geometric_aug=use_aug, use_color_aug=use_aug) 131 | 132 | self.phase = phase 133 | self.split = split 134 | self.name = 'ImageNetLowShot_Phase_' + phase + '_Split_' + split 135 | print('==> Loading ImageNet dataset (for few-shot benchmark) - phase {0}'. 136 | format(phase)) 137 | 138 | #*********************************************************************** 139 | with open(_IMAGENET_LOWSHOT_BENCHMARK_CATEGORY_SPLITS_PATH, 'r') as f: 140 | label_idx = json.load(f) 141 | base_classes = label_idx['base_classes'] 142 | novel_classes_val_phase = label_idx['novel_classes_1'] 143 | novel_classes_test_phase = label_idx['novel_classes_2'] 144 | #*********************************************************************** 145 | 146 | self.label2ind = utils.buildLabelIndex(self.labels) 147 | self.labelIds = sorted(self.label2ind.keys()) 148 | self.num_cats = len(self.labelIds) 149 | assert self.num_cats==1000 150 | 151 | self.labelIds_base = base_classes 152 | self.num_cats_base = len(self.labelIds_base) 153 | if self.phase=='val' or self.phase=='test': 154 | self.labelIds_novel = ( 155 | novel_classes_val_phase if (self.phase=='val') else 156 | novel_classes_test_phase) 157 | self.num_cats_novel = len(self.labelIds_novel) 158 | 159 | intersection = set(self.labelIds_base) & set(self.labelIds_novel) 160 | assert len(intersection) == 0 161 | 162 | 163 | class ImageNetLowShotFeatures: 164 | def __init__( 165 | self, 166 | data_dir, # path to the directory with the saved ImageNet features. 167 | image_split='train', # the image split of the ImageNet that will be loaded. 168 | phase='train', # whether the dataset will be used for training, validating, or testing a model. 169 | ): 170 | assert image_split in ('train', 'val') 171 | assert phase in ('train', 'val', 'test') 172 | 173 | self.phase = phase 174 | self.image_split = image_split 175 | self.name = (f'ImageNetLowShotFeatures_ImageSplit_{self.image_split}' 176 | f'_Phase_{self.phase}') 177 | 178 | dataset_file = os.path.join( 179 | data_dir, 'ImageNet_' + self.image_split + '.h5') 180 | self.data_file = h5py.File(dataset_file, 'r') 181 | self.count = self.data_file['count'][0] 182 | self.features = self.data_file['all_features'][...] 183 | self.labels = self.data_file['all_labels'][:self.count].tolist() 184 | 185 | #*********************************************************************** 186 | with open(_IMAGENET_LOWSHOT_BENCHMARK_CATEGORY_SPLITS_PATH, 'r') as f: 187 | label_idx = json.load(f) 188 | base_classes = label_idx['base_classes'] 189 | base_classes_val_split = label_idx['base_classes_1'] 190 | base_classes_test_split = label_idx['base_classes_2'] 191 | novel_classes_val_split = label_idx['novel_classes_1'] 192 | novel_classes_test_split = label_idx['novel_classes_2'] 193 | #*********************************************************************** 194 | 195 | self.label2ind = utils.buildLabelIndex(self.labels) 196 | self.labelIds = sorted(self.label2ind.keys()) 197 | self.num_cats = len(self.labelIds) 198 | assert self.num_cats==1000 199 | 200 | self.labelIds_base = base_classes 201 | self.num_cats_base = len(self.labelIds_base) 202 | 203 | if self.phase=='val' or self.phase=='test': 204 | self.labelIds_novel = ( 205 | novel_classes_val_split if (self.phase=='val') else 206 | novel_classes_test_split) 207 | self.num_cats_novel = len(self.labelIds_novel) 208 | 209 | intersection = set(self.labelIds_base) & set(self.labelIds_novel) 210 | assert(len(intersection) == 0) 211 | self.base_classes_eval_split = ( 212 | base_classes_val_split if (self.phase=='val') else 213 | base_classes_test_split) 214 | self.base_classes_subset = self.base_classes_eval_split 215 | 216 | 217 | def __getitem__(self, index): 218 | features_this = torch.Tensor(self.features[index]).view(-1,1,1) 219 | label_this = self.labels[index] 220 | return features_this, label_this 221 | 222 | def __len__(self): 223 | return int(self.count) 224 | 225 | 226 | class ImageNetFeatures: 227 | def __init__( 228 | self, 229 | data_dir, # path to the directory with the saved ImageNet features. 230 | split='train', # the image split of the ImageNet that will be loaded. 231 | ): 232 | assert split in ('train', 'val') 233 | 234 | self.split = split 235 | self.name = (f'ImageNetFeatures_ImageSplit_{self.split}') 236 | 237 | dataset_file = os.path.join( 238 | data_dir, 'ImageNet_' + self.split + '.h5') 239 | self.data_file = h5py.File(dataset_file, 'r') 240 | self.count = self.data_file['count'][0] 241 | self.features = self.data_file['all_features'][...] 242 | self.labels = self.data_file['all_labels'][:self.count].tolist() 243 | 244 | self.label2ind = utils.buildLabelIndex(self.labels) 245 | self.labelIds = sorted(self.label2ind.keys()) 246 | self.num_cats = len(self.labelIds) 247 | assert self.num_cats == 1000 248 | 249 | def __getitem__(self, index): 250 | features_this = torch.Tensor(self.features[index]).view(-1,1,1) 251 | label_this = self.labels[index] 252 | return features_this, label_this 253 | 254 | def __len__(self): 255 | return int(self.count) 256 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/baselines/wDAEGNN/scripts/__init__.py -------------------------------------------------------------------------------- /baselines/wDAEGNN/scripts/lowshot_evaluate.py: -------------------------------------------------------------------------------- 1 | """Evaluates a fewshot recognition models on the low-shot Imagenet dataset[*] 2 | using the improved evaluation metrics proposed by Wang et al [**]. 3 | 4 | Example of usage: 5 | # Evaluate the model on the 1-shot setting. 6 | python scripts/lowshot_evaluate.py --config=imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN --testset --nexemplars=1 --step_size=1.0 7 | ==> Top 5 Accuracies: [Novel: 47.98 | Base: 93.40 | All 58.99 | Novel vs All 41.16 | Base vs All 87.28 | All prior 57.84] 8 | 9 | # Evaluate the model on the 2-shot setting. 10 | python scripts/lowshot_evaluate.py --config=imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN --testset --nexemplars=2 --step_size=1.0 11 | ==> Top 5 Accuracies: [Novel: 59.52 | Base: 93.41 | All 66.20 | Novel vs All 53.40 | Base vs All 86.51 | All prior 64.87] 12 | 13 | # Evaluate the model on the 5-shot setting. 14 | python scripts/lowshot_evaluate.py --config=imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN --testset --nexemplars=5 --step_size=0.6 15 | ==> Top 5 Accuracies: [Novel: 70.21 | Base: 93.41 | All 73.20 | Novel vs All 65.84 | Base vs All 84.87 | All prior 71.87] 16 | 17 | # Evaluate the model on the 10-shot setting. 18 | python scripts/lowshot_evaluate.py --config=imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN --testset --nexemplars=10 --step_size=0.4 19 | ==> Top 5 Accuracies: [Novel: 74.94 | Base: 93.36 | All 76.08 | Novel vs All 71.74 | Base vs All 82.97 | All prior 75.13] 20 | 21 | # Evaluate the model on the 20-shot setting. 22 | python scripts/lowshot_evaluate.py --config=imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN --testset --nexemplars=20 --step_size=0.2 23 | ==> Top 5 Accuracies: [Novel: 77.77 | Base: 93.33 | All 77.53 | Novel vs All 75.36 | Base vs All 80.98 | All prior 77.11] 24 | 25 | The config argument specifies the model that will be evaluated. 26 | 27 | [*] B. Hariharan and R. Girshick. Low-shot visual recognition by shrinking and hallucinating features. 28 | [**] Y.-X. Wang and R. Girshick, M. Hebert, B. Hariharan. Low-shot learning from imaginary data. 29 | """ 30 | 31 | from __future__ import print_function 32 | 33 | import argparse 34 | import os 35 | 36 | from low_shot_learning.algorithms.fewshot.imagenet_lowshot import ImageNetLowShot 37 | from low_shot_learning.dataloaders.dataloader_fewshot import LowShotDataloader 38 | from low_shot_learning.datasets.imagenet_dataset import ImageNetLowShotFeatures 39 | from low_shot_learning import project_root 40 | 41 | 42 | parser = argparse.ArgumentParser() 43 | parser.add_argument('--config', type=str, required=True, default='', 44 | help='config file with parameters of the experiment') 45 | parser.add_argument('--checkpoint', type=int, default=-1, 46 | help='checkpoint (epoch id) that will be loaded. If a negative value is ' 47 | 'given then the latest existing checkpoint is loaded.') 48 | parser.add_argument('--cuda', type=bool, default=True, help='enables cuda') 49 | parser.add_argument('--testset', default=False, action='store_true', 50 | help='If True, the model is evaluated on the test set of ImageNetLowShot. ' 51 | 'Otherwise, the validation set is used for evaluation.') 52 | parser.add_argument('--nepisodes', type=int, default=100, 53 | help='the number of evaluation experiments that will run before computing ' 54 | 'the average performance.') 55 | parser.add_argument('--prior', type=float, default=0.7) 56 | parser.add_argument('--nexemplars', type=int, default=-1) 57 | parser.add_argument('--last', default=False, action='store_true') 58 | parser.add_argument('--workspace', default=False, action='store_true') 59 | parser.add_argument('--step_size', default=1.0, type=float) 60 | args_opt = parser.parse_args() 61 | #args_opt.testset = True 62 | 63 | exp_config_file = os.path.join(project_root, 'config', args_opt.config + '.py') 64 | exp_base_directory = os.path.join(project_root, 'experiments') 65 | exp_directory = os.path.join(exp_base_directory, args_opt.config) 66 | 67 | # Load the configuration params of the experiment 68 | exp_config_file = 'config.' + args_opt.config.replace('/', '.') 69 | #print(f'Launching experiment: {exp_config_file}') 70 | config = __import__(exp_config_file, fromlist=['']).config 71 | config['exp_dir'] = exp_directory # where logs, models, etc will be stored. 72 | print(f'Loading experiment {args_opt.config}') 73 | print(f'Generated logs, snapshots, and model files will be stored on {exp_directory}') 74 | 75 | if args_opt.step_size != 1.0: 76 | config['networks']['classifier']['opt']['dae_config']['step_size'] = args_opt.step_size 77 | 78 | algorithm = ImageNetLowShot(config) 79 | if args_opt.cuda: # enable cuda. 80 | algorithm.load_to_gpu() 81 | 82 | if args_opt.checkpoint != 0: # load checkpoint. 83 | algorithm.load_checkpoint( 84 | epoch=args_opt.checkpoint if (args_opt.checkpoint > 0) else '*', 85 | train=False, 86 | suffix=('' if args_opt.last else '.best')) 87 | 88 | # Prepare the datasets and the the dataloader. 89 | nExemplars = data_train_opt = config['data_train_opt']['nExemplars'] 90 | if args_opt.nexemplars > 0: 91 | nExemplars = args_opt.nexemplars 92 | 93 | eval_phase = 'test' if args_opt.testset else 'val' 94 | data_train_opt = config['data_train_opt'] 95 | feat_data_train = ImageNetLowShotFeatures( 96 | data_dir=data_train_opt['data_dir'], image_split='train', phase=eval_phase) 97 | feat_data_test = ImageNetLowShotFeatures( 98 | data_dir=data_train_opt['data_dir'], image_split='val', phase=eval_phase) 99 | data_loader = LowShotDataloader( 100 | feat_data_train, feat_data_test, 101 | nExemplars=nExemplars, batch_size=1000, num_workers=1) 102 | 103 | results = algorithm.evaluate( 104 | data_loader, 105 | num_eval_exp=args_opt.nepisodes, 106 | prior=args_opt.prior, 107 | suffix='best') 108 | 109 | algorithm.logger.info('==> algorithm_type: {0}'.format('ImageNetLowShot')) 110 | algorithm.logger.info('==> nExemplars: {0}'.format(nExemplars)) 111 | algorithm.logger.info('==> num episodes: {0}'.format(args_opt.nepisodes)) 112 | algorithm.logger.info('==> eval_phase: {0}'.format(eval_phase)) 113 | algorithm.logger.info('==> step_size: {0}'.format(args_opt.step_size)) 114 | algorithm.logger.info('==> results: {0}'.format(results)) 115 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/scripts/lowshot_train_stage1.py: -------------------------------------------------------------------------------- 1 | """Applies the 1st training stage of our approach on the low-shot Imagenet dataset[*]. 2 | 3 | Example of usage - train a cosine-similarity based recognition model with a ResNet10 feature extractor: 4 | python scripts/lowshot_train_stage1.py --config=imagenet_ResNet10CosineClassifier 5 | 6 | The configuration file imagenet_ResNet10CosineClassifier.py used on the above experiment is placed on 7 | the directory ./config . 8 | 9 | [*] B. Hariharan and R. Girshick. Low-shot visual recognition by shrinking and hallucinating features. 10 | """ 11 | 12 | 13 | from __future__ import print_function 14 | 15 | import argparse 16 | import os 17 | 18 | from low_shot_learning.algorithms.fewshot.fewshot import FewShot 19 | from low_shot_learning.dataloaders.dataloader_fewshot import FewShotDataloader 20 | from low_shot_learning.datasets.imagenet_dataset import ImageNetLowShot 21 | from low_shot_learning import project_root 22 | 23 | parser = argparse.ArgumentParser() 24 | parser.add_argument('--config', type=str, required=True, default='', 25 | help='config file with parameters of the experiment.') 26 | parser.add_argument('--checkpoint', type=int, default=0, 27 | help='checkpoint (epoch id) that will be loaded. If a negative value is ' 28 | 'given then the latest existing checkpoint is loaded.') 29 | parser.add_argument('--num_workers', type=int, default=4, 30 | help='number of data loading workers') 31 | parser.add_argument('--cuda', type=bool, default=True, help='enables cuda') 32 | parser.add_argument('--disp_step', type=int, default=200, 33 | help='display step during training') 34 | args_opt = parser.parse_args() 35 | 36 | 37 | exp_config_file = os.path.join(project_root, 'config', args_opt.config + '.py') 38 | exp_base_directory = os.path.join(project_root, 'experiments') 39 | exp_directory = os.path.join(exp_base_directory, args_opt.config) 40 | 41 | # Load the configuration params of the experiment 42 | exp_config_file = 'config.' + args_opt.config.replace('/', '.') 43 | #print(f'Launching experiment: {exp_config_file}') 44 | config = __import__(exp_config_file, fromlist=['']).config 45 | config['exp_dir'] = exp_directory # where logs, models, etc will be stored. 46 | print(f'Loading experiment {args_opt.config}') 47 | print(f'Generated logs, snapshots, and model files will be stored on {exp_directory}') 48 | 49 | 50 | # Set the train dataset and the corresponding data loader. 51 | data_train_opt = config['data_train_opt'] 52 | dataset_train = ImageNetLowShot(phase='train') 53 | dloader_train = FewShotDataloader( 54 | dataset=dataset_train, 55 | nKnovel=data_train_opt['nKnovel'], 56 | nKbase=data_train_opt['nKbase'], 57 | nExemplars=data_train_opt['nExemplars'], # num training examples per novel category 58 | nTestNovel=data_train_opt['nTestNovel'], # num test examples for all the novel categories 59 | nTestBase=data_train_opt['nTestBase'], # num test examples for all the base categories 60 | batch_size=data_train_opt['batch_size'], 61 | num_workers=args_opt.num_workers, 62 | epoch_size=data_train_opt['epoch_size'], # num of batches per epoch 63 | ) 64 | 65 | config['disp_step'] = args_opt.disp_step 66 | algorithm = FewShot(config) 67 | if args_opt.cuda: # enable cuda 68 | algorithm.load_to_gpu() 69 | 70 | if args_opt.checkpoint != 0: # load checkpoint 71 | algorithm.load_checkpoint( 72 | epoch=args_opt.checkpoint if (args_opt.checkpoint > 0) else '*', 73 | train=True) 74 | 75 | algorithm.solve(dloader_train) 76 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/scripts/lowshot_train_stage2.py: -------------------------------------------------------------------------------- 1 | """Train the wDAE-GNN few-shot model on the the low-shot Imagenet dataset[*]. 2 | 3 | Example of usage: 4 | python scripts/lowshot_train_stage2.py --config=imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN 5 | imagenet_ResNet10CosineClassifier_wDAE_GNN 6 | 7 | All the configuration files above (i.e., specified by the --config argument) are 8 | placed on the directory ./config . 9 | 10 | [*] B. Hariharan and R. Girshick. Low-shot visual recognition by shrinking and hallucinating features. 11 | """ 12 | 13 | from __future__ import print_function 14 | 15 | import argparse 16 | import os 17 | 18 | from low_shot_learning.algorithms.fewshot.imagenet_lowshot import ImageNetLowShot 19 | from low_shot_learning.dataloaders.dataloader_fewshot import FewShotDataloader, LowShotDataloader 20 | from low_shot_learning.datasets.imagenet_dataset import ImageNetLowShotFeatures 21 | from low_shot_learning import project_root 22 | 23 | parser = argparse.ArgumentParser() 24 | parser.add_argument('--config', type=str, required=True, default='', 25 | help='config file with parameters of the experiment') 26 | parser.add_argument('--checkpoint', type=int, default=0, 27 | help='checkpoint (epoch id) that will be loaded. If a negative value is ' 28 | 'given then the latest existing checkpoint is loaded.') 29 | parser.add_argument('--num_workers', type=int, default=0, 30 | help='number of data loading workers') 31 | parser.add_argument('--cuda', type=bool, default=True, help='enables cuda') 32 | parser.add_argument('--disp_step', type=int, default=200, 33 | help='display step during training') 34 | args_opt = parser.parse_args() 35 | 36 | exp_config_file = os.path.join(project_root, 'config', args_opt.config + '.py') 37 | exp_base_directory = os.path.join(project_root, 'experiments') 38 | exp_directory = os.path.join(exp_base_directory, args_opt.config) 39 | 40 | # Load the configuration params of the experiment 41 | exp_config_file = 'config.' + args_opt.config.replace('/', '.') 42 | #print(f'Launching experiment: {exp_config_file}') 43 | config = __import__(exp_config_file, fromlist=['']).config 44 | config['exp_dir'] = exp_directory # where logs, models, etc will be stored. 45 | print(f'Loading experiment {args_opt.config}') 46 | print(f'Generated logs, snapshots, and model files will be stored on {exp_directory}') 47 | 48 | config['disp_step'] = args_opt.disp_step 49 | algorithm = ImageNetLowShot(config) 50 | if args_opt.cuda: # enable cuda 51 | algorithm.load_to_gpu() 52 | 53 | if args_opt.checkpoint != 0: # load checkpoint 54 | algorithm.load_checkpoint( 55 | epoch=args_opt.checkpoint if (args_opt.checkpoint > 0) else '*', 56 | train=True) 57 | 58 | # Set the train dataset and the corresponding data loader. 59 | data_train_opt = config['data_train_opt'] 60 | feat_dataset_train = ImageNetLowShotFeatures( 61 | data_dir=data_train_opt['data_dir'], 62 | image_split='train', 63 | phase='train') 64 | dloader_train = FewShotDataloader( 65 | dataset=feat_dataset_train, 66 | nKnovel=data_train_opt['nKnovel'], 67 | nKbase=data_train_opt['nKbase'], 68 | nExemplars=data_train_opt['nExemplars'], # num training examples per novel category 69 | nTestNovel=data_train_opt['nTestNovel'], # num test examples for all the novel categories 70 | nTestBase=data_train_opt['nTestBase'], # num test examples for all the base categories 71 | batch_size=data_train_opt['batch_size'], 72 | num_workers=args_opt.num_workers, 73 | epoch_size=data_train_opt['epoch_size'], # num of batches per epoch 74 | ) 75 | 76 | feat_data_train = ImageNetLowShotFeatures( 77 | data_dir=data_train_opt['data_dir'], image_split='train', phase='val') 78 | feat_data_test = ImageNetLowShotFeatures( 79 | data_dir=data_train_opt['data_dir'], image_split='val', phase='val') 80 | dloader_test = LowShotDataloader( 81 | feat_data_train, feat_data_test, 82 | nExemplars=data_train_opt['nExemplars'], 83 | batch_size=200, 84 | num_workers=0) 85 | 86 | algorithm.solve(dloader_train, dloader_test) 87 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/scripts/save_features.py: -------------------------------------------------------------------------------- 1 | """ 2 | Extracts and saves features (with a model trained by the lowshot_train_stage1.py 3 | routine) from the images of the ImageNet dataset. 4 | 5 | Example of usage: 6 | # Extract features from the validation image split of the Imagenet. 7 | python scripts/save_features.py --config=imagenet_ResNet10CosineClassifier --split='val' 8 | # Extract features from the training image split of the Imagenet. 9 | python scripts/save_features.py --config=imagenet_ResNet10CosineClassifier --split='train' 10 | 11 | The config argument specifies the model that will be used. 12 | """ 13 | 14 | from __future__ import print_function 15 | 16 | import argparse 17 | import os 18 | 19 | from low_shot_learning.algorithms.utils.save_features import SaveFeatures 20 | from low_shot_learning.dataloaders.basic_dataloaders import SimpleDataloader 21 | from low_shot_learning.datasets.imagenet_dataset import ImageNet 22 | from low_shot_learning import project_root 23 | 24 | parser = argparse.ArgumentParser() 25 | parser.add_argument('--config', type=str, required=True, default='', 26 | help='config file with hyper-parameters of the model that we will use for ' 27 | 'extracting features from ImageNet dataset.') 28 | parser.add_argument('--checkpoint', type=int, default=-1, 29 | help='checkpoint (epoch id) that will be loaded. If a negative value is' 30 | ' given then the latest existing checkpoint is loaded.') 31 | parser.add_argument('--cuda', type=bool, default=True, help='enables cuda') 32 | parser.add_argument('--split', type=str, default='val') 33 | parser.add_argument('--num_workers', type=int, default=4) 34 | parser.add_argument('--batch_size', type=int, default=128) 35 | parser.add_argument('--save2exp', default=False, action='store_true') 36 | parser.add_argument('--feature_name', type=str, default='') 37 | parser.add_argument('--global_pooling', default=False, action='store_true') 38 | args_opt = parser.parse_args() 39 | 40 | 41 | exp_base_directory = os.path.join(project_root, 'experiments') 42 | exp_directory = os.path.join(exp_base_directory, args_opt.config) 43 | 44 | # Load the configuration params of the experiment 45 | exp_config_file = 'config.' + args_opt.config.replace('/', '.') 46 | #print(f'Launching experiment: {exp_config_file}') 47 | config = __import__(exp_config_file, fromlist=['']).config 48 | config['exp_dir'] = exp_directory # where logs, models, etc will be stored. 49 | print(f'Loading experiment {args_opt.config}') 50 | print(f'Generated logs, snapshots, and model files will be stored on {exp_directory}') 51 | 52 | if (args_opt.split != 'train') and (args_opt.split != 'val'): 53 | raise ValueError('Not valid split {0}'.format(args_opt.split)) 54 | 55 | dataset = ImageNet( 56 | split=args_opt.split, use_geometric_aug=False, use_color_aug=False) 57 | dloader = SimpleDataloader( 58 | dataset, 59 | batch_size=args_opt.batch_size, 60 | train=False, 61 | num_workers=args_opt.num_workers) 62 | 63 | algorithm = SaveFeatures(config) 64 | if args_opt.cuda: # enable cuda 65 | algorithm.load_to_gpu() 66 | 67 | if args_opt.checkpoint != 0: # load checkpoint 68 | algorithm.load_checkpoint( 69 | epoch=args_opt.checkpoint if (args_opt.checkpoint > 0) else '*', 70 | train=False) 71 | 72 | if args_opt.save2exp: 73 | dst_directory = os.path.join(exp_directory, 'feature_datasets') 74 | else: 75 | dst_directory = os.path.join( 76 | project_root, 'datasets', 'feature_datasets', args_opt.config) 77 | 78 | if args_opt.feature_name == '': 79 | args_opt.feature_name = None 80 | else: 81 | dst_directory = dst_directory + '_' + args_opt.feature_name 82 | 83 | algorithm.logger.info(f"==> Destination directory: {dst_directory}") 84 | if (not os.path.isdir(dst_directory)): 85 | os.makedirs(dst_directory) 86 | 87 | dst_filename = os.path.join( 88 | dst_directory, 'ImageNet_' + args_opt.split + '.h5') 89 | 90 | algorithm.logger.info(f"==> dst_filename: {dst_filename}") 91 | algorithm.logger.info(f"==> args_opt.feature_name: {args_opt.feature_name}") 92 | algorithm.logger.info(f"==> args_opt.global_pooling: {args_opt.global_pooling}") 93 | 94 | algorithm.save_features( 95 | dataloader=dloader, 96 | filename=dst_filename, 97 | feature_name=args_opt.feature_name, 98 | global_pooling=args_opt.global_pooling) 99 | -------------------------------------------------------------------------------- /baselines/wDAEGNN/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools import find_packages 3 | 4 | setup( 5 | name='wDAE_GNN_FewShot', 6 | version='0.0.1', 7 | description='Generating Classification Weights with GNN Denoising Autoencoders for Few-Shot Learning', 8 | author='Spyros Gidaris', 9 | packages=find_packages()) 10 | -------------------------------------------------------------------------------- /embeddings/AWA2_classnames.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/embeddings/AWA2_classnames.npy -------------------------------------------------------------------------------- /embeddings/CUB_classnames.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/embeddings/CUB_classnames.npy -------------------------------------------------------------------------------- /embeddings/SUN_classnames.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/embeddings/SUN_classnames.npy -------------------------------------------------------------------------------- /embeddings/conceptnet/AWA2_cn_sum_list.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/embeddings/conceptnet/AWA2_cn_sum_list.npy -------------------------------------------------------------------------------- /embeddings/conceptnet/CUB_cn_sum_list.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/embeddings/conceptnet/CUB_cn_sum_list.npy -------------------------------------------------------------------------------- /embeddings/conceptnet/SUN_cn_sum_list.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/embeddings/conceptnet/SUN_cn_sum_list.npy -------------------------------------------------------------------------------- /embeddings/conceptnet/imgnet_cn_list.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/embeddings/conceptnet/imgnet_cn_list.npy -------------------------------------------------------------------------------- /embeddings/wiki2vec/AWA2_wiki_sum_list.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/embeddings/wiki2vec/AWA2_wiki_sum_list.npy -------------------------------------------------------------------------------- /embeddings/wiki2vec/CUB_wiki_sum_list.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/embeddings/wiki2vec/CUB_wiki_sum_list.npy -------------------------------------------------------------------------------- /embeddings/wiki2vec/SUN_wiki_sum_list.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/embeddings/wiki2vec/SUN_wiki_sum_list.npy -------------------------------------------------------------------------------- /embeddings/wiki2vec/imgnet_wiki_list.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/embeddings/wiki2vec/imgnet_wiki_list.npy -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: izsl 2 | channels: 3 | - nvidia 4 | - anaconda 5 | - conda-forge 6 | - defaults 7 | dependencies: 8 | - _libgcc_mutex=0.1=conda_forge 9 | - _openmp_mutex=4.5=2_gnu 10 | - blas=1.0=mkl 11 | - bzip2=1.0.8=h7f98852_4 12 | - ca-certificates=2023.7.22=hbcca054_0 13 | - cudatoolkit=11.1.74=h6bb024c_0 14 | - freetype=2.12.1=h4a9f257_0 15 | - intel-openmp=2021.4.0=h06a4308_3561 16 | - joblib=1.3.2=pyhd8ed1ab_0 17 | - jpeg=9e=h5eee18b_1 18 | - lcms2=2.12=h3be6417_0 19 | - ld_impl_linux-64=2.40=h41732ed_0 20 | - libblas=3.9.0=12_linux64_mkl 21 | - libcblas=3.9.0=12_linux64_mkl 22 | - libffi=3.4.2=h7f98852_5 23 | - libgcc-ng=13.2.0=h807b86a_2 24 | - libgfortran-ng=7.5.0=ha8ba4b0_17 25 | - libgfortran4=7.5.0=ha8ba4b0_17 26 | - libgomp=13.2.0=h807b86a_2 27 | - libnsl=2.0.1=hd590300_0 28 | - libpng=1.6.39=h5eee18b_0 29 | - libsqlite=3.43.2=h2797004_0 30 | - libstdcxx-ng=11.2.0=h1234567_1 31 | - libtiff=4.2.0=hecacb30_2 32 | - libuuid=2.38.1=h0b41bf4_0 33 | - libwebp-base=1.3.2=h5eee18b_0 34 | - libzlib=1.2.13=hd590300_5 35 | - lz4-c=1.9.4=h6a678d5_0 36 | - mkl=2021.4.0=h06a4308_640 37 | - mkl-service=2.4.0=py39h7f8727e_0 38 | - mkl_fft=1.3.1=py39hd3c417c_0 39 | - mkl_random=1.2.2=py39h51133e4_0 40 | - ncurses=6.4=h59595ed_2 41 | - olefile=0.46=pyhd3eb1b0_0 42 | - openjpeg=2.4.0=h3ad879b_0 43 | - openssl=3.1.4=hd590300_0 44 | - pillow=8.3.1=py39h2c7a002_0 45 | - pip=21.2.4=py39h06a4308_0 46 | - python=3.9.18=h0755675_0_cpython 47 | - python_abi=3.9=4_cp39 48 | - readline=8.2=h8228510_1 49 | - scikit-learn=1.0.1=py39h4dfa638_3 50 | - scipy=1.7.1=py39h292c36d_2 51 | - setuptools=68.2.2=pyhd8ed1ab_0 52 | - six=1.16.0=pyhd3eb1b0_1 53 | - threadpoolctl=3.2.0=pyha21a80b_0 54 | - tk=8.6.13=h2797004_0 55 | - tzdata=2023c=h71feb2d_0 56 | - wheel=0.41.3=pyhd8ed1ab_0 57 | - xz=5.2.6=h166bdaf_0 58 | - zlib=1.2.13=hd590300_5 59 | - zstd=1.5.2=ha4553b6_0 60 | - pip: 61 | - ftfy==6.1.1 62 | - numpy==1.22.4 63 | - regex==2023.10.3 64 | - torch==1.10.1 65 | - torchvision==0.11.2 66 | - tqdm==4.66.1 67 | - typing-extensions==4.4.0 68 | - wcwidth==0.2.9 69 | - git+https://github.com/openai/CLIP.git 70 | -------------------------------------------------------------------------------- /figs/icis-framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/figs/icis-framework.png -------------------------------------------------------------------------------- /figs/model-fig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/figs/model-fig.png -------------------------------------------------------------------------------- /scripts/table1_awa2.sh: -------------------------------------------------------------------------------- 1 | # Table 1, AWA2 dataset 2 | # Ours 3 | echo "ICIS:" 4 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=AWA2 --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 20 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy 5 | # ConSE 6 | echo "ConSE:" 7 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=AWA2 --image_embedding=res101_finetuned --class_embedding=att --conse_benchmark 8 | # COSTA 9 | echo "COSTA:" 10 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=AWA2 --image_embedding=res101_finetuned --class_embedding=att --costa_benchmark 11 | # Sub. Reg. 12 | echo "Sub. Reg.:" 13 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=AWA2 --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 20 --embed_dim 2048 --strict_eval --early_stopping_slope --subspace_proj 14 | # wDAE 15 | echo "wDAE:" 16 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=AWA2 --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 20 --embed_dim 2048 --strict_eval --early_stopping_slope --daegnn 17 | # WAvg 18 | echo "WAvg:" 19 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=AWA2 --image_embedding=res101_finetuned --vgse_baseline=wavg --class_embedding=att --norm_scale_heuristic 20 | # SMO 21 | echo "SMO:" 22 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=AWA2 --image_embedding=res101_finetuned --vgse_baseline=smo --class_embedding=att --vgse_alpha=0 --norm_scale_heuristic -------------------------------------------------------------------------------- /scripts/table1_cub.sh: -------------------------------------------------------------------------------- 1 | # Table 1, CUB dataset 2 | # Ours 3 | echo "ICIS:" 4 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy 5 | # ConSE 6 | echo "ConSE:" 7 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --conse_benchmark 8 | # COSTA 9 | echo "COSTA:" 10 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --costa_benchmark 11 | # Sub. Reg. 12 | echo "Sub. Reg.:" 13 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --subspace_proj 14 | # wDAE 15 | echo "wDAE:" 16 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --daegnn 17 | # WAvg 18 | echo "WAvg:" 19 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --vgse_baseline=wavg --class_embedding=att --norm_scale_heuristic 20 | # SMO 21 | echo "SMO:" 22 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --vgse_baseline=smo --class_embedding=att --vgse_alpha=0 --norm_scale_heuristic -------------------------------------------------------------------------------- /scripts/table1_cub_5seeds.sh: -------------------------------------------------------------------------------- 1 | # Table 1, CUB dataset 2 | # Ours 3 | echo "ICIS:" 4 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --numSeeds 5 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy 5 | # ConSE 6 | echo "ConSE:" 7 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --numSeeds 5 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --conse_benchmark 8 | # COSTA 9 | echo "COSTA:" 10 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --numSeeds 5 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --costa_benchmark 11 | # Sub. Reg. 12 | echo "Sub. Reg.:" 13 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --numSeeds 5 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --subspace_proj 14 | # wDAE 15 | echo "wDAE:" 16 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --numSeeds 5 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --daegnn 17 | # WAvg 18 | echo "WAvg:" 19 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --numSeeds 5 --dataset=CUB --image_embedding=res101_finetuned --vgse_baseline=wavg --class_embedding=att --norm_scale_heuristic 20 | # SMO 21 | echo "SMO:" 22 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --numSeeds 5 --dataset=CUB --image_embedding=res101_finetuned --vgse_baseline=smo --class_embedding=att --vgse_alpha=0 --norm_scale_heuristic -------------------------------------------------------------------------------- /scripts/table1_sun.sh: -------------------------------------------------------------------------------- 1 | # Table 1, SUN dataset 2 | # Ours 3 | echo "ICIS:" 4 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=SUN --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy 5 | # ConSE 6 | echo "ConSE:" 7 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=SUN --image_embedding=res101_finetuned --class_embedding=att --conse_benchmark 8 | # COSTA 9 | echo "COSTA:" 10 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=SUN --image_embedding=res101_finetuned --class_embedding=att --costa_benchmark 11 | # Sub. Reg. 12 | echo "Sub. Reg.:" 13 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=SUN --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --subspace_proj 14 | # wDAE 15 | echo "wDAE:" 16 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=SUN --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --daegnn 17 | # WAvg 18 | echo "WAvg:" 19 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=SUN --image_embedding=res101_finetuned --vgse_baseline=wavg --class_embedding=att --norm_scale_heuristic 20 | # SMO 21 | echo "SMO:" 22 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=SUN --image_embedding=res101_finetuned --vgse_baseline=smo --class_embedding=att --vgse_alpha=0 --norm_scale_heuristic -------------------------------------------------------------------------------- /scripts/table2.sh: -------------------------------------------------------------------------------- 1 | ### Recreates the ablation results of our ICIS model (Table 2) 2 | 3 | ### CUB 4 | # Baseline MLP 5 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy 6 | # Cosine loss 7 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --cos_sim_loss --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy 8 | # Single-modal 9 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --single_modal_ablation --cos_sim_loss --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy 10 | # Cross-modal 11 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy 12 | # Full 13 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy 14 | 15 | ### AWA2 16 | # Baseline MLP 17 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=AWA2 --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 20 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy 18 | # Cosine loss 19 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=AWA2 --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --cos_sim_loss --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 20 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy 20 | # Single-modal 21 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=AWA2 --image_embedding=res101_finetuned --class_embedding=att --single_modal_ablation --cos_sim_loss --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 20 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy 22 | # Cross-modal 23 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=AWA2 --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 20 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy 24 | # Full 25 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=AWA2 --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 20 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy 26 | 27 | ### SUN 28 | # Baseline MLP 29 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=SUN --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 4096 --strict_eval --early_stopping_slope --calc_entropy 30 | # Cosine loss 31 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=SUN --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --cos_sim_loss --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 4096 --strict_eval --early_stopping_slope --calc_entropy 32 | # Single-modal 33 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=SUN --image_embedding=res101_finetuned --class_embedding=att --single_modal_ablation --cos_sim_loss --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 4096 --strict_eval --early_stopping_slope --calc_entropy 34 | # Cross-modal 35 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=SUN --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 4096 --strict_eval --early_stopping_slope --calc_entropy 36 | # Full 37 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=SUN --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 4096 --strict_eval --early_stopping_slope --calc_entropy 38 | -------------------------------------------------------------------------------- /scripts/table3_cub.sh: -------------------------------------------------------------------------------- 1 | # Table 3, CUB dataset 2 | echo "-- Using Wiki2Vec class label embeddings --" 3 | # Ours 4 | echo "ICIS:" 5 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=wiki2vec --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy --norm_scale_heuristic --zst --zstfrom=imagenet 6 | # ConSE 7 | echo "ConSE:" 8 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=wiki2vec --conse_benchmark --norm_scale_heuristic --zst --zstfrom=imagenet 9 | # COSTA 10 | echo "COSTA:" 11 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=wiki2vec --costa_benchmark --norm_scale_heuristic --zst --zstfrom=imagenet 12 | # Sub. Reg. 13 | echo "Sub. Reg.:" 14 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=wiki2vec --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --subspace_proj --norm_scale_heuristic --zst --zstfrom=imagenet 15 | # wDAE 16 | echo "wDAE:" 17 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=wiki2vec --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --daegnn --norm_scale_heuristic --zst --zstfrom=imagenet 18 | # WAvg 19 | echo "WAvg:" 20 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --vgse_baseline=wavg --class_embedding=wiki2vec --norm_scale_heuristic --zst --zstfrom=imagenet 21 | # SMO 22 | echo "SMO:" 23 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --vgse_baseline=smo --class_embedding=wiki2vec --vgse_alpha=0 --norm_scale_heuristic --zst --zstfrom=imagenet 24 | 25 | echo "-- Using ConceptNet class label embeddings --" 26 | # Ours 27 | echo "ICIS:" 28 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=cn --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy --norm_scale_heuristic --zst --zstfrom=imagenet 29 | # ConSE 30 | echo "ConSE:" 31 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=cn --conse_benchmark --norm_scale_heuristic --zst --zstfrom=imagenet 32 | # COSTA 33 | echo "COSTA:" 34 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=cn --costa_benchmark --norm_scale_heuristic --zst --zstfrom=imagenet 35 | # Sub. Reg. 36 | echo "Sub. Reg.:" 37 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=cn --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --subspace_proj --norm_scale_heuristic --zst --zstfrom=imagenet 38 | # wDAE 39 | echo "wDAE:" 40 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=cn --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --daegnn --norm_scale_heuristic --zst --zstfrom=imagenet 41 | # WAvg 42 | echo "WAvg:" 43 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --vgse_baseline=wavg --class_embedding=cn --norm_scale_heuristic --zst --zstfrom=imagenet 44 | # SMO 45 | echo "SMO:" 46 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --vgse_baseline=smo --class_embedding=cn --vgse_alpha=0 --norm_scale --norm_scale_heuristic --zst --zstfrom=imagenet 47 | 48 | echo "-- Using CLIP class label embeddings --" 49 | # Ours 50 | echo "ICIS:" 51 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=clip --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy --norm_scale_heuristic --zst --zstfrom=imagenet 52 | # ConSE 53 | echo "ConSE:" 54 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=clip --conse_benchmark --norm_scale_heuristic --zst --zstfrom=imagenet 55 | # COSTA 56 | echo "COSTA:" 57 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=clip --costa_benchmark --norm_scale_heuristic --zst --zstfrom=imagenet 58 | # Sub. Reg. 59 | echo "Sub. Reg.:" 60 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=clip --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --subspace_proj --norm_scale_heuristic --zst --zstfrom=imagenet 61 | # wDAE 62 | echo "wDAE:" 63 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=clip --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --daegnn --norm_scale_heuristic --zst --zstfrom=imagenet 64 | # WAvg 65 | echo "WAvg:" 66 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --vgse_baseline=wavg --class_embedding=clip --norm_scale_heuristic --zst --zstfrom=imagenet 67 | # SMO 68 | echo "SMO:" 69 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --vgse_baseline=smo --class_embedding=clip --vgse_alpha=0 --norm_scale --norm_scale_heuristic --zst --zstfrom=imagenet -------------------------------------------------------------------------------- /scripts/table4.sh: -------------------------------------------------------------------------------- 1 | echo "Sub. Reg., CUB:" 2 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --subspace_proj 3 | 4 | echo "Sub. Reg. + ICIS, CUB:" 5 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --daegnn 6 | -------------------------------------------------------------------------------- /scripts/table5_left.sh: -------------------------------------------------------------------------------- 1 | # Table 5 Left 2 | # Ours 3 | echo "ICIS:" 4 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=att --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope 5 | # ConSE 6 | echo "ConSE:" 7 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=att --conse_benchmark 8 | # COSTA 9 | echo "COSTA:" 10 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=att --costa_benchmark 11 | # Sub. Reg. 12 | echo "Sub. Reg.:" 13 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --subspace_proj 14 | # wDAE 15 | echo "wDAE:" 16 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --daegnn 17 | # WAvg 18 | echo "WAvg:" 19 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --vgse_baseline=wavg --class_embedding=att --norm_scale_heuristic 20 | # SMO 21 | echo "SMO:" 22 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --vgse_baseline=smo --class_embedding=att --vgse_alpha=0 --norm_scale_heuristic -------------------------------------------------------------------------------- /utility/eval_imagenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision.models as models 4 | import torchvision.datasets as datasets 5 | import time 6 | 7 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') 8 | parser.add_argument('data', metavar='DIR', nargs='?', default='imagenet', 9 | help='path to dataset (default: imagenet)') 10 | parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18', 11 | choices=model_names, 12 | help='model architecture: ' + 13 | ' | '.join(model_names) + 14 | ' (default: resnet18)') 15 | parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', 16 | help='number of data loading workers (default: 4)') 17 | parser.add_argument('--epochs', default=90, type=int, metavar='N', 18 | help='number of total epochs to run') 19 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N', 20 | help='manual epoch number (useful on restarts)') 21 | parser.add_argument('-b', '--batch-size', default=256, type=int, 22 | metavar='N', 23 | help='mini-batch size (default: 256), this is the total ' 24 | 'batch size of all GPUs on the current node when ' 25 | 'using Data Parallel or Distributed Data Parallel') 26 | parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, 27 | metavar='LR', help='initial learning rate', dest='lr') 28 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M', 29 | help='momentum') 30 | parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, 31 | metavar='W', help='weight decay (default: 1e-4)', 32 | dest='weight_decay') 33 | parser.add_argument('-p', '--print-freq', default=10, type=int, 34 | metavar='N', help='print frequency (default: 10)') 35 | parser.add_argument('--resume', default='', type=str, metavar='PATH', 36 | help='path to latest checkpoint (default: none)') 37 | parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', 38 | help='evaluate model on validation set') 39 | parser.add_argument('--pretrained', dest='pretrained', action='store_true', 40 | help='use pre-trained model') 41 | parser.add_argument('--world-size', default=-1, type=int, 42 | help='number of nodes for distributed training') 43 | parser.add_argument('--rank', default=-1, type=int, 44 | help='node rank for distributed training') 45 | parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str, 46 | help='url used to set up distributed training') 47 | parser.add_argument('--dist-backend', default='nccl', type=str, 48 | help='distributed backend') 49 | parser.add_argument('--seed', default=None, type=int, 50 | help='seed for initializing training. ') 51 | parser.add_argument('--gpu', default=None, type=int, 52 | help='GPU id to use.') 53 | parser.add_argument('--multiprocessing-distributed', action='store_true', 54 | help='Use multi-processing distributed training to launch ' 55 | 'N processes per node, which has N GPUs. This is the ' 56 | 'fastest way to use PyTorch for either single node or ' 57 | 'multi node data parallel training') 58 | parser.add_argument('--dummy', action='store_true', help="use fake data to benchmark") 59 | 60 | args = parser.parse_args() 61 | 62 | class LINEAR(nn.Module): 63 | def __init__(self, input_dim, nclass, bias=True): 64 | super(LINEAR, self).__init__() 65 | self.fc = nn.Linear(input_dim, nclass, bias) 66 | def forward(self, x): 67 | o = self.fc(x) 68 | return o 69 | 70 | # Create model 71 | # Load ImageNet Pre-trained ResNet model 72 | resnet = models.resnet101(pretrained=True) 73 | resnet.eval() 74 | 75 | # Load predicted CUB/SUN/AWA2 classifiers 76 | predicted_classifiers = LINEAR(input_dim=2048, nclass=50) 77 | predicted_classifiers.load_state_dict(torch.load('/home/andchri/APZSL-clean/zst-models/CUB_clip')) 78 | predicted_classifiers.eval() 79 | 80 | # Append predicted classifier to Resnet 81 | resnet.fc.weight = nn.Parameter(torch.cat((resnet.fc.weight, predicted_classifiers.fc.weight))) 82 | resnet.fc.bias = nn.Parameter(torch.cat((resnet.fc.bias, predicted_classifiers.fc.bias))) 83 | 84 | # Create ImageNet dataloader 85 | valdir = os.path.join(args.data, 'val') 86 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 87 | std=[0.229, 0.224, 0.225]) 88 | 89 | val_dataset = datasets.ImageFolder( 90 | valdir, 91 | transforms.Compose([ 92 | transforms.Resize(256), 93 | transforms.CenterCrop(224), 94 | transforms.ToTensor(), 95 | normalize, 96 | ])) 97 | 98 | def validate(val_loader, model, criterion, args): 99 | def run_validate(loader, base_progress=0): 100 | with torch.no_grad(): 101 | end = time.time() 102 | for i, (images, target) in enumerate(loader): 103 | i = base_progress + i 104 | if args.gpu is not None and torch.cuda.is_available(): 105 | images = images.cuda(args.gpu, non_blocking=True) 106 | if torch.backends.mps.is_available(): 107 | images = images.to('mps') 108 | target = target.to('mps') 109 | if torch.cuda.is_available(): 110 | target = target.cuda(args.gpu, non_blocking=True) 111 | 112 | # compute output 113 | output = model(images) 114 | loss = criterion(output, target) 115 | 116 | # measure accuracy and record loss 117 | acc1, acc5 = accuracy(output, target, topk=(1, 5)) 118 | losses.update(loss.item(), images.size(0)) 119 | top1.update(acc1[0], images.size(0)) 120 | top5.update(acc5[0], images.size(0)) 121 | 122 | # measure elapsed time 123 | batch_time.update(time.time() - end) 124 | end = time.time() 125 | 126 | if i % args.print_freq == 0: 127 | progress.display(i + 1) 128 | 129 | batch_time = AverageMeter('Time', ':6.3f', Summary.NONE) 130 | losses = AverageMeter('Loss', ':.4e', Summary.NONE) 131 | top1 = AverageMeter('Acc@1', ':6.2f', Summary.AVERAGE) 132 | top5 = AverageMeter('Acc@5', ':6.2f', Summary.AVERAGE) 133 | progress = ProgressMeter( 134 | len(val_loader) + (args.distributed and (len(val_loader.sampler) * args.world_size < len(val_loader.dataset))), 135 | [batch_time, losses, top1, top5], 136 | prefix='Test: ') 137 | 138 | # switch to evaluate mode 139 | model.eval() 140 | 141 | run_validate(val_loader) 142 | if args.distributed: 143 | top1.all_reduce() 144 | top5.all_reduce() 145 | 146 | if args.distributed and (len(val_loader.sampler) * args.world_size < len(val_loader.dataset)): 147 | aux_val_dataset = Subset(val_loader.dataset, 148 | range(len(val_loader.sampler) * args.world_size, len(val_loader.dataset))) 149 | aux_val_loader = torch.utils.data.DataLoader( 150 | aux_val_dataset, batch_size=args.batch_size, shuffle=False, 151 | num_workers=args.workers, pin_memory=True) 152 | run_validate(aux_val_loader, len(val_loader)) 153 | 154 | progress.display_summary() 155 | 156 | return top1.avg 157 | 158 | 159 | acc1 = validate(val_loader=, model=resnet, criterion, args) 160 | print("Top 1 accuracy Validation set:", acc1) 161 | 162 | class Summary(Enum): 163 | NONE = 0 164 | AVERAGE = 1 165 | SUM = 2 166 | COUNT = 3 167 | 168 | class AverageMeter(object): 169 | """Computes and stores the average and current value""" 170 | def __init__(self, name, fmt=':f', summary_type=Summary.AVERAGE): 171 | self.name = name 172 | self.fmt = fmt 173 | self.summary_type = summary_type 174 | self.reset() 175 | 176 | def reset(self): 177 | self.val = 0 178 | self.avg = 0 179 | self.sum = 0 180 | self.count = 0 181 | 182 | def update(self, val, n=1): 183 | self.val = val 184 | self.sum += val * n 185 | self.count += n 186 | self.avg = self.sum / self.count 187 | 188 | def all_reduce(self): 189 | if torch.cuda.is_available(): 190 | device = torch.device("cuda") 191 | elif torch.backends.mps.is_available(): 192 | device = torch.device("mps") 193 | else: 194 | device = torch.device("cpu") 195 | total = torch.tensor([self.sum, self.count], dtype=torch.float32, device=device) 196 | dist.all_reduce(total, dist.ReduceOp.SUM, async_op=False) 197 | self.sum, self.count = total.tolist() 198 | self.avg = self.sum / self.count 199 | 200 | def __str__(self): 201 | fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' 202 | return fmtstr.format(**self.__dict__) 203 | 204 | def summary(self): 205 | fmtstr = '' 206 | if self.summary_type is Summary.NONE: 207 | fmtstr = '' 208 | elif self.summary_type is Summary.AVERAGE: 209 | fmtstr = '{name} {avg:.3f}' 210 | elif self.summary_type is Summary.SUM: 211 | fmtstr = '{name} {sum:.3f}' 212 | elif self.summary_type is Summary.COUNT: 213 | fmtstr = '{name} {count:.3f}' 214 | else: 215 | raise ValueError('invalid summary type %r' % self.summary_type) 216 | 217 | return fmtstr.format(**self.__dict__) 218 | -------------------------------------------------------------------------------- /utility/feature_extraction/feature_extract.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch.optim as optim 4 | import torch.backends.cudnn as cudnn 5 | from torch.autograd import Variable 6 | import sys 7 | import random 8 | import numpy as np 9 | from tqdm import tqdm 10 | import torch 11 | import torch.nn as nn 12 | import torch.utils.data 13 | from utility.feature_extraction.extract_util import get_loader, prepare_attri_label, DATA_LOADER, map_label 14 | from opt import get_opt 15 | import torchvision 16 | import timm 17 | import scipy 18 | 19 | cudnn.benchmark = True 20 | 21 | opt = get_opt() 22 | # set random seed 23 | if opt.manualSeed is None: 24 | opt.manualSeed = random.randint(1, 10000) 25 | print("Random Seed: ", opt.manualSeed) 26 | random.seed(opt.manualSeed) 27 | torch.manual_seed(opt.manualSeed) 28 | np.random.seed(opt.manualSeed) 29 | if opt.cuda: 30 | torch.cuda.manual_seed_all(opt.manualSeed) 31 | 32 | 33 | 34 | def main(): 35 | # load data 36 | data = DATA_LOADER(opt) 37 | opt.test_seen_label = data.test_seen_label # weird 38 | 39 | class_attribute = data.attribute 40 | attribute_zsl = prepare_attri_label(class_attribute, data.unseenclasses).cuda() 41 | attribute_seen = prepare_attri_label(class_attribute, data.seenclasses).cuda() 42 | attribute_gzsl = torch.transpose(class_attribute, 1, 0).cuda() 43 | 44 | 45 | # define test_classes 46 | if opt.image_type not in ['test_unseen_small_loc', 'test_unseen_loc', 'test_seen_loc']: 47 | try: 48 | sys.exit(0) 49 | except: 50 | print("choose the image_type in ImageFileList") 51 | 52 | 53 | # Dataloader for train, test, visual 54 | trainloader, testloader_unseen, testloader_seen, visloader = get_loader(opt, data) 55 | 56 | # define attribute groups 57 | if opt.dataset == 'CUB': 58 | # Change layer 59 | num_classes = 150 60 | elif opt.dataset == 'AWA2': 61 | # Change layer 62 | num_classes = 40 63 | elif opt.dataset == 'SUN': 64 | # Change layer 65 | num_classes = 645 66 | 67 | if 'vit' in opt.backbone: 68 | model = timm.create_model(opt.backbone,pretrained=True,num_classes=num_classes) 69 | if opt.save_features: 70 | model.head = nn.Identity() 71 | else: 72 | ####### load our network, any from here: https://pytorch.org/vision/0.11/models ####### 73 | if opt.backbone == 'resnet101_old': 74 | model = torchvision.models.resnet101(weights=torchvision.models.ResNet101_Weights.IMAGENET1K_V1) 75 | else: 76 | if opt.resnet_path is not None: 77 | model = torchvision.models.__dict__[opt.backbone](pretrained=False) 78 | model.load_state_dict(torch.load(opt.resnet_path)) 79 | else: 80 | model = torchvision.models.__dict__[opt.backbone](pretrained=True) 81 | model.fc = nn.Linear(opt.feature_size, num_classes) 82 | if opt.save_features: 83 | model.fc = nn.Identity() 84 | print(model) 85 | 86 | 87 | 88 | criterion = nn.CrossEntropyLoss() 89 | 90 | if torch.cuda.is_available(): 91 | model.cuda() 92 | 93 | if opt.save_features: 94 | name=opt.dataset+'_'+opt.backbone+'_fix'+'.mat' 95 | model.eval() 96 | img_files = [] 97 | features = [] 98 | labels = [] 99 | with torch.no_grad(): 100 | loaders = [trainloader] 101 | for loader in loaders: 102 | for i, (batch_input, batch_target, impath) in enumerate(loader): 103 | input_v = Variable(batch_input) 104 | if opt.cuda: 105 | input_v = input_v.cuda() 106 | output = model(input_v).to('cpu') 107 | for j in range(len(batch_target)): 108 | img_files.append([np.array([impath[j].squeeze().replace(' ','')])]) 109 | labels.append(np.array([batch_target[j].item()+1],dtype=np.int16)) 110 | features.append(output[j].numpy()) 111 | scipy.io.savemat(name, mdict={'image_files': img_files, 'features': features, 'labels': np.array(labels)}) 112 | 113 | exit(0) 114 | 115 | 116 | print('Train and test...') 117 | for epoch in range(opt.nepoch): 118 | model.train() 119 | current_lr = opt.classifier_lr * (0.8 ** (epoch // 10)) 120 | optimizer = optim.Adam(params=filter(lambda p: p.requires_grad, model.parameters()), 121 | lr=current_lr, betas=(opt.beta1, 0.999)) 122 | # loss for print 123 | loss_log = {'ave_loss': 0} 124 | 125 | batch = len(trainloader) 126 | for i, (batch_input, batch_target, impath) in enumerate(trainloader): 127 | model.zero_grad() 128 | # map target labels 129 | batch_target = map_label(batch_target, data.seenclasses) 130 | input_v = Variable(batch_input) 131 | label_v = Variable(batch_target) 132 | if opt.cuda: 133 | input_v = input_v.cuda() 134 | label_v = label_v.cuda() 135 | output = model(input_v) 136 | 137 | loss = criterion(output, label_v) 138 | loss_log['ave_loss'] += loss.item() 139 | loss.backward() 140 | optimizer.step() 141 | 142 | print('\n[Epoch %d, Batch %5d] Train loss: %.3f ' 143 | % (epoch+1, batch, loss_log['ave_loss'] / batch)) 144 | 145 | 146 | 147 | if __name__ == '__main__': 148 | main() -------------------------------------------------------------------------------- /utility/load_wordembeddings.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import clip 4 | import os 5 | 6 | def prepare_vocab(opt, matcontent, zst_mode=False): 7 | vocab = [] 8 | if zst_mode: 9 | dataset = opt.zstfrom 10 | else: 11 | dataset = opt.dataset 12 | for cls_name in matcontent['allclasses_names']: 13 | if dataset == 'CUB': 14 | vocab.append(cls_name[0][0][4:]) 15 | elif dataset == 'SUN': 16 | vocab.append(cls_name[0][0]) 17 | elif dataset == 'AWA2': 18 | vocab.append(cls_name[0][0].replace('+', '_')) 19 | else: 20 | raise NotImplementedError 21 | 22 | return vocab 23 | 24 | def prep_imagenet_vocab(imagenet_vocab): 25 | pruned_vocab = [] 26 | for label in imagenet_vocab: 27 | first_label = label.split(",")[0] 28 | pruned_vocab.append(first_label) 29 | return pruned_vocab 30 | 31 | def get_clip_embeddings(opt, vocab): 32 | device = "cuda" if torch.cuda.is_available() else "cpu" 33 | model, preprocess = clip.load("RN101", device=device) 34 | 35 | clip_embeddings = [] 36 | input_text = [] 37 | #prompt = 'an image of a' 38 | prompt = 'a photo of a' 39 | templates80 = [ 40 | 'a bad photo of a {}.', 41 | 'a photo of many {}.', 42 | 'a sculpture of a {}.', 43 | 'a photo of the hard to see {}.', 44 | 'a low resolution photo of the {}.', 45 | 'a rendering of a {}.', 46 | 'graffiti of a {}.', 47 | 'a bad photo of the {}.', 48 | 'a cropped photo of the {}.', 49 | 'a tattoo of a {}.', 50 | 'the embroidered {}.', 51 | 'a photo of a hard to see {}.', 52 | 'a bright photo of a {}.', 53 | 'a photo of a clean {}.', 54 | 'a photo of a dirty {}.', 55 | 'a dark photo of the {}.', 56 | 'a drawing of a {}.', 57 | 'a photo of my {}.', 58 | 'the plastic {}.', 59 | 'a photo of the cool {}.', 60 | 'a close-up photo of a {}.', 61 | 'a black and white photo of the {}.', 62 | 'a painting of the {}.', 63 | 'a painting of a {}.', 64 | 'a pixelated photo of the {}.', 65 | 'a sculpture of the {}.', 66 | 'a bright photo of the {}.', 67 | 'a cropped photo of a {}.', 68 | 'a plastic {}.', 69 | 'a photo of the dirty {}.', 70 | 'a jpeg corrupted photo of a {}.', 71 | 'a blurry photo of the {}.', 72 | 'a photo of the {}.', 73 | 'a good photo of the {}.', 74 | 'a rendering of the {}.', 75 | 'a {} in a video game.', 76 | 'a photo of one {}.', 77 | 'a doodle of a {}.', 78 | 'a close-up photo of the {}.', 79 | 'a photo of a {}.', 80 | 'the origami {}.', 81 | 'the {} in a video game.', 82 | 'a sketch of a {}.', 83 | 'a doodle of the {}.', 84 | 'a origami {}.', 85 | 'a low resolution photo of a {}.', 86 | 'the toy {}.', 87 | 'a rendition of the {}.', 88 | 'a photo of the clean {}.', 89 | 'a photo of a large {}.', 90 | 'a rendition of a {}.', 91 | 'a photo of a nice {}.', 92 | 'a photo of a weird {}.', 93 | 'a blurry photo of a {}.', 94 | 'a cartoon {}.', 95 | 'art of a {}.', 96 | 'a sketch of the {}.', 97 | 'a embroidered {}.', 98 | 'a pixelated photo of a {}.', 99 | 'itap of the {}.', 100 | 'a jpeg corrupted photo of the {}.', 101 | 'a good photo of a {}.', 102 | 'a plushie {}.', 103 | 'a photo of the nice {}.', 104 | 'a photo of the small {}.', 105 | 'a photo of the weird {}.', 106 | 'the cartoon {}.', 107 | 'art of the {}.', 108 | 'a drawing of the {}.', 109 | 'a photo of the large {}.', 110 | 'a black and white photo of a {}.', 111 | 'the plushie {}.', 112 | 'a dark photo of a {}.', 113 | 'itap of a {}.', 114 | 'graffiti of the {}.', 115 | 'a toy {}.', 116 | 'itap of my {}.', 117 | 'a photo of a cool {}.', 118 | 'a photo of a small {}.', 119 | 'a tattoo of the {}.', 120 | ] 121 | 122 | for word in vocab: 123 | word = word.replace('_', ' ') 124 | word = word.lower() 125 | if word[0] in ['a', 'e', 'i', 'o', 'A', 'E', 'I', 'O']: 126 | input_text.append(prompt + 'n ' + word) 127 | else: 128 | input_text.append(prompt + ' ' + word) 129 | text = clip.tokenize(input_text).to(device) 130 | 131 | with torch.no_grad(): 132 | text_features = model.encode_text(text) 133 | 134 | embeddings = text_features / torch.norm(text_features.float(), dim=-1).unsqueeze(-1) 135 | 136 | return embeddings 137 | -------------------------------------------------------------------------------- /utility/model_bases.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | 4 | class LINEAR(nn.Module): 5 | def __init__(self, input_dim, nclass, bias=True): 6 | super(LINEAR, self).__init__() 7 | self.fc = nn.Linear(input_dim, nclass, bias) 8 | def forward(self, x): 9 | o = self.fc(x) 10 | return o 11 | 12 | class LINEAR_TO_COS_SIM(nn.Module): 13 | def __init__(self, weights): 14 | super(LINEAR, self).__init__() 15 | self.weights = weights 16 | self.cos = nn.functional.cosine_similarity(dim=1) 17 | def forward(self, x): 18 | out = [] 19 | for sample in x: 20 | temp = [] 21 | for weight in self.weights: 22 | temp.append(self.cos(weight, sample)) 23 | out.append(torch.stack(temp)) 24 | o = torch.stack(out) 25 | return o 26 | 27 | class WEIGHT_PREDICTOR(nn.Module): 28 | def __init__(self, input_dim, embed_dim, output_dim, num_layers=3): 29 | super(WEIGHT_PREDICTOR, self).__init__() 30 | assert num_layers in [1, 2, 3] 31 | self.num_layers = num_layers 32 | if num_layers == 4: 33 | self.fc1 = nn.Linear(input_dim, embed_dim) 34 | self.fc2 = nn.Linear(embed_dim, embed_dim) 35 | self.fc3 = nn.Linear(embed_dim, embed_dim) 36 | self.fc4 = nn.Linear(embed_dim, output_dim) 37 | if num_layers == 3: 38 | self.fc1 = nn.Linear(input_dim, embed_dim) 39 | self.fc2 = nn.Linear(embed_dim, embed_dim) 40 | self.fc3 = nn.Linear(embed_dim, output_dim) 41 | elif num_layers == 2: 42 | self.fc1 = nn.Linear(input_dim, embed_dim) 43 | self.fc2 = nn.Linear(embed_dim, output_dim) 44 | else: 45 | self.fc1 = nn.Linear(input_dim, output_dim) 46 | self.relu = nn.ReLU() 47 | self.soft = nn.Softmax(dim=1) 48 | 49 | def forward(self, x): 50 | if self.num_layers == 4: 51 | o = self.fc4(self.relu(self.fc3(self.relu(self.fc2(self.relu(self.fc1(x))))))) 52 | elif self.num_layers == 3: 53 | o = self.fc3(self.relu(self.fc2(self.relu(self.fc1(x))))) 54 | elif self.num_layers == 2: 55 | o = self.fc2(self.relu(self.fc1(x))) 56 | else: 57 | o = self.fc1(x) 58 | return o 59 | 60 | 61 | class AUTOENCODER(nn.Module): 62 | def __init__(self, opt, input_dim, embed_dim, output_dim=None, num_layers=3, vae=False, bias=True): 63 | super(AUTOENCODER, self).__init__() 64 | self.opt = opt 65 | self.input_dim = input_dim 66 | self.output_dim = output_dim 67 | if output_dim is None: 68 | self.output_dim = input_dim 69 | if vae: 70 | self.embed_dim = [2 * embed_dim, embed_dim] 71 | else: 72 | self.embed_dim = [embed_dim, embed_dim] 73 | if num_layers == 2: 74 | self.encoder = nn.Sequential( 75 | nn.Linear(self.input_dim, self.embed_dim[0]), 76 | nn.ReLU(inplace=True) if not vae else nn.Identity(inplace=True) 77 | ) 78 | 79 | self.decoder = nn.Sequential( 80 | nn.Linear(self.embed_dim[1], self.output_dim) 81 | ) 82 | if num_layers == 3: 83 | self.encoder = nn.Sequential( 84 | nn.Linear(self.input_dim, self.embed_dim[0]), 85 | nn.ReLU(inplace=True) if not vae else nn.Identity(inplace=True) 86 | ) 87 | 88 | self.decoder = nn.Sequential( 89 | nn.Linear(self.embed_dim[1], 1000), 90 | nn.ReLU(inplace=True), 91 | nn.Linear(1000, self.output_dim) 92 | ) 93 | if num_layers == 4: 94 | self.encoder = nn.Sequential( 95 | nn.Linear(self.input_dim, self.embed_dim[0]), 96 | nn.ReLU(inplace=True), 97 | nn.Linear(self.embed_dim[0], self.embed_dim[0]), 98 | nn.ReLU(inplace=True) if not vae else nn.Identity(inplace=True) 99 | ) 100 | 101 | self.decoder = nn.Sequential( 102 | nn.Linear(self.embed_dim[1], 1000), 103 | nn.ReLU(inplace=True), 104 | nn.Linear(1000, self.output_dim) 105 | ) 106 | 107 | def encode(self, x): 108 | return self.encoder(x) 109 | 110 | def decode(self, x): 111 | return self.decoder(x) 112 | 113 | def forward(self, x): 114 | z = self.encode(x) 115 | return self.decoder(z) 116 | 117 | 118 | class ATT_AUTOENCODER(AUTOENCODER): 119 | def __init__(self, *args, **kwargs): 120 | super().__init__(*args, **kwargs) 121 | self.encoder = nn.Sequential( 122 | nn.Linear(self.input_dim, 1450), 123 | nn.ReLU(inplace=True), 124 | nn.Linear(1450, self.embed_dim), 125 | nn.ReLU(inplace=True) 126 | ) 127 | 128 | self.decoder = nn.Sequential( 129 | nn.Linear(self.embed_dim, 660), 130 | nn.ReLU(inplace=True), 131 | nn.Linear(660, self.output_dim) 132 | ) 133 | 134 | class WEIGHT_AUTOENCODER(AUTOENCODER): 135 | def __init__(self, *args, **kwargs): 136 | super().__init__(*args, **kwargs) 137 | self.encoder = nn.Sequential( 138 | nn.Linear(self.input_dim, 1560), 139 | nn.ReLU(inplace=True), 140 | nn.Linear(1560, self.embed_dim), 141 | nn.ReLU(inplace=True) 142 | ) 143 | 144 | self.decoder = nn.Sequential( 145 | nn.Linear(self.embed_dim, 1660), 146 | nn.ReLU(inplace=True), 147 | nn.Linear(1660, self.output_dim) 148 | ) 149 | 150 | 151 | class JOINT_AUTOENCODER(nn.Module): 152 | def __init__(self, opt, autoencoder1, autoencoder2): 153 | super(JOINT_AUTOENCODER, self).__init__() 154 | self.ae1 = autoencoder1 155 | self.ae2 = autoencoder2 156 | 157 | def encode1(self, x): 158 | return self.ae1.encode(x) 159 | 160 | def encode2(self, x): 161 | return self.ae2.encode(x) 162 | 163 | def decode1(self, x): 164 | return self.ae1.decode(x) 165 | 166 | def decode2(self, x): 167 | return self.ae2.decode(x) 168 | 169 | def forward(self, x): 170 | att_in, weight_in = x 171 | latent_att = self.encode1(att_in) 172 | latent_weight = self.encode2(weight_in) 173 | 174 | att_from_att = self.decode1(latent_att) 175 | att_from_weight = self.decode1(latent_weight) 176 | weight_from_weight = self.decode2(latent_weight) 177 | weight_from_att = self.decode2(latent_att) 178 | 179 | return att_from_att, att_from_weight, weight_from_weight, weight_from_att, latent_att, latent_weight 180 | 181 | def predict(self, x): 182 | # Given attributes, predict weights 183 | latent_att = self.encode1(x) 184 | return self.decode2(latent_att) 185 | 186 | 187 | class VAE(AUTOENCODER): 188 | def __init__(self, *args, **kwargs): 189 | super().__init__(vae=True, *args, **kwargs) 190 | 191 | def reparameterize(self, mu, logvar, noise=True): 192 | if noise: 193 | sigma = torch.exp(logvar) 194 | eps = torch.FloatTensor(logvar.size()[0], 1).normal_(0, 1) 195 | if self.opt.cuda: 196 | eps = eps.cuda() 197 | eps = eps.expand(sigma.size()) 198 | return mu + sigma * eps 199 | else: 200 | return mu 201 | 202 | def encode(self, x): 203 | out = self.encoder(x) 204 | out = torch.split(out,out.shape[1]//2, dim=1) 205 | mu_batch, logvar_batch = out[0], out[1] 206 | kl_div = (0.5 * torch.sum(1 + logvar_batch - mu_batch.pow(2) - logvar_batch.exp())) 207 | return self.reparameterize(mu_batch, logvar_batch), kl_div, mu_batch, logvar_batch 208 | 209 | def decode(self, x): 210 | return self.decoder(x) 211 | 212 | def forward(self, x): 213 | z, kl_div, mu, logvar, = self.encode(x) 214 | return self.decoder(z), kl_div, mu, logvar 215 | 216 | class JOINT_VAE(JOINT_AUTOENCODER): 217 | def __init__(self, *args, **kwargs): 218 | super().__init__(*args, **kwargs) 219 | 220 | def forward(self, x): 221 | att_in, weight_in = x 222 | (z_att, kl_div_att, mu_att, logvar_att) = self.encode1(att_in) 223 | (z_weight, kl_div_weight, mu_weight, logvar_weight) = self.encode2(weight_in) 224 | att_from_att = self.decode1(z_att) 225 | att_from_weight = self.decode1(z_weight) 226 | weight_from_weight = self.decode2(z_weight) 227 | weight_from_att = self.decode2(z_att) 228 | 229 | return att_from_att, att_from_weight, weight_from_weight, weight_from_att, z_att, z_weight, kl_div_att, kl_div_weight, mu_att, logvar_att, mu_weight, logvar_weight 230 | 231 | def predict(self, x): 232 | # Given attributes, predict weights 233 | (z_att, kl_div_att, mu_att, logvar_att) = self.encode1(x) 234 | return self.decode2(z_att) 235 | -------------------------------------------------------------------------------- /utility/plot_data_ablation.py: -------------------------------------------------------------------------------- 1 | import matplotlib as mpl 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | from matplotlib.legend_handler import HandlerLine2D, HandlerTuple 5 | 6 | plt.rcParams.update({ "text.usetex":True, "font.family": "serif"}) 7 | 8 | # CUB 9 | axx = [150/150*100, 125/150*100, 100/150*100, 75/150*100, 50/150*100, 37/150*100, 25/150*100, 12/150*100] 10 | 11 | fs = 14 12 | zslc = 'mediumblue' # 'chocolate' 13 | gzslc ='mediumblue' 14 | tickcl=12 15 | 16 | 17 | ### ZSL 18 | fig,ax = plt.subplots() 19 | fig.set_figheight(3) 20 | fig.set_figwidth(6) 21 | 22 | a1, = ax.plot(axx, 23 | [60.9, 57.4, 51.0, 44.5, 37.4, 31.5, 29.5, 14.3], 24 | color=zslc, 25 | marker="*", 26 | label="Full ICIS (+ include unseen)") 27 | 28 | b1, = ax.plot(axx, 29 | [60.2, 57.8, 50.4, 44.7, 34.8, 29.3, 22.8, 11.1], 30 | color=zslc, 31 | marker="x", 32 | linestyle="dashed", 33 | label="+ Cross-modal") 34 | 35 | c1, = ax.plot(axx, 36 | [58.1, 56.0, 48.7, 44.3, 24.4, 27.6, 21.0, 9.9], 37 | color=zslc, 38 | marker="d", 39 | linestyle="dashdot", 40 | label="+ Single-modal") 41 | 42 | d1, = ax.plot(axx, 43 | [54.1, 49.9, 42.8, 34.3, 5.5, 21.8, 20.2, 8.9], 44 | color=zslc, 45 | marker="s", 46 | linestyle=(0, (5, 10)), 47 | label="+ Cosine loss") 48 | 49 | e1, = ax.plot(axx, 50 | [41.5, 38.8, 32.6, 28.1, 5.2, 8.0, 13.0, 8.5], 51 | color=zslc, 52 | marker="o", 53 | linestyle="dotted", 54 | label="MLP base model") 55 | 56 | # set x-axis label 57 | ax.set_xlabel("\% of seen classes", fontsize = fs) 58 | # set y-axis label 59 | ax.set_ylim(bottom=0, top=65) 60 | ax.yaxis.label.set_color(zslc) 61 | ax.set_ylabel("I-ZSL, Acc\%", 62 | color=zslc, 63 | fontsize=fs) 64 | ax.tick_params(axis='y', colors=zslc,labelsize=tickcl) 65 | ax.set_xlim(left=axx[-1]+1, right=axx[0]+1) 66 | ax.tick_params(axis='x', labelsize=tickcl) 67 | plt.xticks(np.arange(10, 100, 10)) 68 | 69 | l = plt.legend([e1, d1, c1, b1, a1], ['MLP base model', '+ Cosine loss', '+ Single-modal', '+ Cross-modal', 'ICIS (full)'], 70 | handlelength=3, borderpad=0.7, labelspacing=0.7, loc='lower right', fontsize=8) # 'upper left' 71 | 72 | #save the plot as a file 73 | fig.savefig('numsamples_ablation_zsl.pdf', 74 | format='pdf', 75 | dpi=1200, 76 | bbox_inches='tight') 77 | 78 | ### GZSL 79 | fig,ax = plt.subplots() 80 | fig.set_figheight(3) 81 | fig.set_figwidth(6) 82 | # make a plot 83 | a1, = ax.plot(axx, 84 | [56.7, 54.8, 50.4, 46.1, 38.4, 35.3, 29.5, 17.1], 85 | color=gzslc, 86 | marker="*", 87 | label="Full ICIS (+ include unseen)") 88 | 89 | b1, = ax.plot(axx, 90 | [56.0, 53.8, 47.7, 44.2, 30.9, 29.7, 23.2, 12.1], 91 | color=gzslc, 92 | marker="x", 93 | linestyle="dashed", 94 | label="+ Cross-modal") 95 | 96 | c1, = ax.plot(axx, 97 | [52.7, 51.6, 44.9, 42.6, 14.8, 25.4, 19.7, 10.1], 98 | color=gzslc, 99 | marker="d", 100 | linestyle="dashdot", 101 | label="+ Single-modal") 102 | 103 | d1, = ax.plot(axx, 104 | [50.9, 48.3, 42.8, 36.2, 0.0, 23.5, 23.3, 9.8], 105 | color=gzslc, 106 | marker="s", 107 | linestyle=(0, (5, 10)), 108 | label="+ Cosine loss") 109 | 110 | e1, = ax.plot(axx, 111 | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 112 | color=gzslc, 113 | marker="o", 114 | linestyle="dotted", 115 | label="MLP base model") 116 | 117 | # set x-axis label 118 | ax.set_xlabel("\% of seen classes", fontsize = fs) 119 | # set y-axis label 120 | ax.set_ylim(bottom=0, top=65) 121 | ax.yaxis.label.set_color(gzslc) 122 | ax.set_ylabel("I-GZSL, H", 123 | color=gzslc, 124 | fontsize=fs) 125 | ax.tick_params(axis='y', colors=gzslc,labelsize=tickcl) 126 | ax.set_xlim(left=axx[-1]+1, right=axx[0]+1) 127 | ax.tick_params(axis='x', labelsize=tickcl) 128 | plt.xticks(np.arange(10, 100, 10)) 129 | 130 | l = plt.legend([e1, d1, c1, b1, a1], ['MLP base model', '+ Cosine loss', '+ Single-modal', '+ Cross-modal', 'ICIS (full)'], 131 | handlelength=3, borderpad=0.7, labelspacing=0.7, loc='lower right', fontsize=8) 132 | 133 | #save the plot as a file 134 | fig.savefig('numsamples_ablation_gzsl.pdf', 135 | format='pdf', 136 | dpi=1200, 137 | bbox_inches='tight') 138 | -------------------------------------------------------------------------------- /utility/plot_prediction_bins.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import matplotlib.pyplot as plt 3 | from matplotlib.legend_handler import HandlerLine2D, HandlerTuple 4 | import torch 5 | import torch.nn as nn 6 | import numpy as np 7 | import scipy.io as sio 8 | 9 | plt.rcParams.update({ "text.usetex":True, "font.family": "serif"}) 10 | 11 | def create_dict_foldernum_to_label(matcontent_splits): 12 | cls_names = matcontent_splits['allclasses_names'] 13 | folder_to_label_dict = {} 14 | for n, name in enumerate(cls_names): 15 | folder_to_label_dict[name[0][0]] = n 16 | return folder_to_label_dict 17 | 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument('--dataset', default='CUB', help='FLO') 20 | parser.add_argument('--validation', action='store_true', default=False, help='enable cross validation mode (Deprecated)') 21 | parser.add_argument('--dataroot', default='/home/shared/iccv-apzsl/Feature-Generation-datasets', help='path to dataset') 22 | parser.add_argument('--matdataset', default=True, help='Data in matlab format') 23 | parser.add_argument('--image_embedding', default='tf_finetune') 24 | parser.add_argument('--class_embedding', default='att') 25 | 26 | opt = parser.parse_args() 27 | opt.zst = False 28 | assert opt.baseline in ['SMO', 'ConSE'] 29 | 30 | matcontent = sio.loadmat(opt.dataroot + "/" + opt.dataset + "/" + opt.image_embedding + ".mat") 31 | print("using the matcontent:", opt.dataroot + "/" + opt.dataset + "/" + opt.image_embedding + ".mat") 32 | 33 | feature = matcontent['features'].T 34 | if 'CUB_' in opt.image_embedding: 35 | feature = feature.T 36 | label = matcontent['labels'].astype(int).squeeze() - 1 37 | mat_path = opt.dataroot + "/" + opt.dataset + "/" + opt.class_embedding + "_splits.mat" 38 | matcontent = sio.loadmat(mat_path) 39 | print("using the matcontent:", mat_path) 40 | 41 | trainval_loc = matcontent['trainval_loc'].squeeze() - 1 42 | train_loc = matcontent['train_loc'].squeeze() - 1 43 | val_unseen_loc = matcontent['val_loc'].squeeze() - 1 44 | test_seen_loc = matcontent['test_seen_loc'].squeeze() - 1 45 | test_unseen_loc = matcontent['test_unseen_loc'].squeeze() - 1 46 | 47 | attribute = torch.from_numpy(matcontent['att'].T).float() 48 | 49 | folder_to_label_dict = create_dict_foldernum_to_label(matcontent) 50 | label_to_folder_dict = {v: k for k, v in folder_to_label_dict.items()} 51 | 52 | train_feature = torch.from_numpy(feature[trainval_loc]).float() 53 | train_label = torch.from_numpy(label[trainval_loc]).long() 54 | test_unseen_feature = torch.from_numpy(feature[test_unseen_loc]).float() 55 | test_unseen_label = torch.from_numpy(label[test_unseen_loc]).long() 56 | test_seen_feature = torch.from_numpy(feature[test_seen_loc]).float() 57 | test_seen_label = torch.from_numpy(label[test_seen_loc]).long() 58 | 59 | seenclasses = torch.from_numpy(np.unique(train_label.numpy())) 60 | unseenclasses = torch.from_numpy(np.unique(test_unseen_label.numpy())) 61 | ntrain = train_feature.size()[0] 62 | ntrain_class = seenclasses.size(0) 63 | ntest_class = unseenclasses.size(0) 64 | train_class = seenclasses.clone() 65 | allclasses = torch.arange(0, ntrain_class+ntest_class).long() 66 | 67 | per_cls_acc_gzsl = torch.load('nounseen_percls_acc_CUBtf_finetune_len_test_4731_len_tar_200.pt') 68 | per_cls_acc_unseen_zsl = torch.load('nounseen_percls_acc_CUBtf_finetune_len_test_2967_len_tar_50.pt') 69 | pred_matrix_gzsl = torch.load('nounseen_pred_matrixCUBtf_finetune_len_test_4731_len_tar_200.pt') 70 | pred_matrix_unseen_zsl = torch.load('nounseen_pred_matrixCUBtf_finetune_len_test_2967_len_tar_50.pt') 71 | 72 | # GZSL plot 73 | min_acc, min_idx = torch.min(per_cls_acc_gzsl[-len(unseenclasses):], dim=0) 74 | min_idx = min_idx + len(seenclasses) 75 | low_row = pred_matrix_gzsl[min_idx, :] 76 | low_confuse = low_row.nonzero().squeeze() 77 | low_confuse = low_confuse[low_confuse != min_idx] 78 | labels = torch.cat((seenclasses, unseenclasses)) 79 | low_confuse_labels = [] 80 | for idx in low_confuse: 81 | low_confuse_labels.append(labels[idx].numpy()) 82 | low_confuse_labels = np.array(low_confuse_labels) 83 | 84 | low_acc_cls = str(label_to_folder_dict[int(labels[min_idx].numpy())])[4:].replace("_", " ") 85 | print("Ours, GZSL, Low acc class:", low_acc_cls, "with probability:", min_acc) 86 | for label in low_confuse_labels: 87 | print(label_to_folder_dict[int(label)][4:].replace("_", " ")) 88 | 89 | # Ordering based on attribute cosine similarity 90 | min_att = attribute[labels[min_idx]] 91 | cosine_sims = [] 92 | cos = nn.CosineSimilarity(dim=0, eps=1e-8) 93 | attributes = torch.cat((attribute[seenclasses], attribute[unseenclasses])) 94 | for att in attributes: 95 | cosine_sims.append(cos(min_att, att)) 96 | cosine_sims = torch.stack(cosine_sims) 97 | cosine_sims_ordered, cosine_sim_indeces = torch.topk(cosine_sims, k=len(attributes)) 98 | 99 | ordered_probs = low_row[cosine_sim_indeces] 100 | ordered_labels = labels[cosine_sim_indeces] 101 | ordered_cls_names = [] 102 | bar_colors = [] 103 | line_styles = [] 104 | for label in ordered_labels: 105 | cls_name = label_to_folder_dict[int(label)][4:].replace("_", " ") 106 | if label in unseenclasses: 107 | bar_colors.append('mediumblue') 108 | line_styles.append('--') 109 | else: 110 | bar_colors.append('chocolate') 111 | line_styles.append('-') 112 | ordered_cls_names.append(cls_name) 113 | 114 | ordered_cls_names = np.array(ordered_cls_names) 115 | 116 | # Bar plot of bins of similar classes (course) 117 | fig = plt.figure(figsize = (10, 5)) 118 | bin_size = 10 119 | bin_labels = [f'Rank {int(x-bin_size) + 1} to {int(x)}' for x in bin_size * np.arange(start=1, stop=len(ordered_probs)/bin_size+1)] 120 | binned_probs = np.squeeze(np.sum(np.reshape(ordered_probs.numpy(), (len(ordered_probs)//bin_size, bin_size)), axis=1)) 121 | barlist = plt.bar(bin_labels, binned_probs, color ='mediumblue', 122 | width = 0.4) 123 | 124 | plt.xlabel("CUB classes ordered by similarity with " + low_acc_cls, fontsize = 16) 125 | plt.xticks(rotation=45, ha='right') 126 | 127 | plt.ylabel("Fraction of predictions", fontsize = 16) 128 | plt.savefig('overview_low_acc_class.png', 129 | format='png', 130 | dpi=1600, 131 | bbox_inches='tight') 132 | 133 | 134 | # Bar plot of n most similar classes (finegrained) 135 | fig = plt.figure(figsize = (10, 5)) 136 | nonzero_idxs = np.array(ordered_probs.nonzero().squeeze()) 137 | barlist = plt.bar(ordered_cls_names[nonzero_idxs], ordered_probs.numpy()[nonzero_idxs], color ='mediumblue', width = 0.4) 138 | 139 | bar_colors = np.array(bar_colors)[nonzero_idxs] 140 | line_styles = np.array(line_styles)[nonzero_idxs] 141 | for n, ls in enumerate(line_styles): 142 | if ls == '--': 143 | barlist[n].set_color('w') 144 | barlist[n].set_linewidth(4) 145 | barlist[n].set_linestyle(ls) 146 | barlist[n].set_edgecolor('mediumblue') 147 | 148 | plt.xlabel("Classes ordered by similarity to " + low_acc_cls, fontsize = 16) 149 | plt.xticks(rotation=45, ha='right') 150 | 151 | colors = {'Seen class':'mediumblue', 'Unseen class':'w'} 152 | linestyles = {'Seen class':'-', 'Unseen class':'--'} 153 | edgecolors = {'Seen class':'mediumblue', 'Unseen class':'mediumblue'} 154 | labels = list(colors.keys()) 155 | handles = [plt.Rectangle((0,0),1,1, facecolor=colors[label], linewidth=1, linestyle=linestyles[label], edgecolor=edgecolors[label]) for label in labels] 156 | plt.legend(handles, labels) 157 | 158 | plt.ylabel("Fraction of predictions", fontsize = 16) 159 | plt.savefig('low_acc_class.png', 160 | format='png', 161 | dpi=1600, 162 | bbox_inches='tight') 163 | -------------------------------------------------------------------------------- /utility/train_base.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | import torch.optim as optim 5 | import utility.util as util 6 | import os 7 | import copy 8 | import torchvision 9 | import utility.model_bases as model 10 | from regressor import REGRESSOR 11 | 12 | class BASECLASSIFIER(REGRESSOR): 13 | def __init__(self, _train_X, _train_Y, data_loader, _nclass, _cuda, seedinfo, train_base=True, _lr=0.001, _beta1=0.5, _nepoch=20, _batch_size=100, _embed_dim=1000, _num_layers=3, opt=None): 14 | super().__init__(_train_X, _train_Y, data_loader, _nclass, _cuda, seedinfo, train_base, _lr, _beta1, _nepoch, _batch_size, _embed_dim, _num_layers, opt) 15 | self.opt = opt 16 | self.train_base = train_base 17 | 18 | self.nepoch = _nepoch 19 | self.model = model.LINEAR(self.input_dim, len(self.seenclasses)) 20 | self.model.apply(util.weights_init) 21 | self.criterion = nn.CrossEntropyLoss() 22 | self.optimizer_classifier = optim.Adam(self.model.parameters(), lr=self.opt.classifier_lr, betas=(self.opt.classifier_beta1, 0.999)) 23 | self.input = torch.FloatTensor(_batch_size, self.input_dim) 24 | self.label = torch.LongTensor(_batch_size) 25 | 26 | if self.cuda: 27 | self.model.cuda() 28 | self.criterion.cuda() 29 | self.input = self.input.cuda() 30 | self.label = self.label.cuda() 31 | 32 | self.index_in_epoch = 0 33 | self.epochs_completed = 0 34 | self.ntrain = self.train_X.size()[0] 35 | 36 | def fit(self): 37 | best_H = 0 38 | best_seen = 0 39 | best_unseen = 0 40 | 41 | if self.train_base: 42 | for epoch in range(self.nepoch): 43 | for i in range(0, self.ntrain, self.batch_size): 44 | self.model.zero_grad() 45 | batch_input, batch_label = self.next_batch(self.batch_size) 46 | self.input.copy_(batch_input) 47 | self.label.copy_(batch_label) 48 | 49 | inputv = Variable(self.input) 50 | labelv = Variable(self.label) 51 | output = self.model(inputv) 52 | loss = self.criterion(output, labelv) 53 | loss.backward() 54 | self.optimizer_classifier.step() 55 | 56 | acc_val_seen = 0 57 | acc_train = self.val_model(self.model, self.train_X, self.train_Y, util.map_label(self.seenclasses, self.seenclasses)) 58 | acc_val_seen = self.val_model(self.model, self.test_seen_feature, util.map_label(self.test_seen_label, self.seenclasses), util.map_label(self.seenclasses, self.seenclasses)) 59 | if acc_val_seen > best_seen: 60 | print(f'New best validation seen class accuracy={acc_val_seen*100:.4f}% (train seen class accuracy={acc_train*100:.4f}%)') 61 | best_seen = acc_val_seen 62 | best_model = copy.deepcopy(self.model) 63 | else: 64 | best_model = torch.load(self.opt.rootpath + '/models/base-classifiers/' + self.opt.dataset + self.opt.image_embedding + f'_seed{self.seedinfo}_clr{self.opt.classifier_lr}_nep{self.nepoch}') 65 | 66 | return best_model 67 | 68 | def next_batch(self, batch_size): 69 | start = self.index_in_epoch 70 | # shuffle the data at the first epoch 71 | if self.epochs_completed == 0 and start == 0: 72 | perm = torch.randperm(self.ntrain) 73 | self.train_X = self.train_X[perm] 74 | self.train_Y = self.train_Y[perm] 75 | # the last batch 76 | if start + batch_size > self.ntrain: 77 | self.epochs_completed += 1 78 | rest_num_examples = self.ntrain - start 79 | if rest_num_examples > 0: 80 | X_rest_part = self.train_X[start:self.ntrain] 81 | Y_rest_part = self.train_Y[start:self.ntrain] 82 | # shuffle the data 83 | perm = torch.randperm(self.ntrain) 84 | self.train_X = self.train_X[perm] 85 | self.train_Y = self.train_Y[perm] 86 | # start next epoch 87 | start = 0 88 | self.index_in_epoch = batch_size - rest_num_examples 89 | end = self.index_in_epoch 90 | X_new_part = self.train_X[start:end] 91 | Y_new_part = self.train_Y[start:end] 92 | if rest_num_examples > 0: 93 | return torch.cat((X_rest_part, X_new_part), 0) , torch.cat((Y_rest_part, Y_new_part), 0) 94 | else: 95 | return X_new_part, Y_new_part 96 | else: 97 | self.index_in_epoch += batch_size 98 | end = self.index_in_epoch 99 | # from index start to index end-1 100 | return self.train_X[start:end], self.train_Y[start:end] 101 | --------------------------------------------------------------------------------