├── .DS_Store
├── README.md
├── baselines
├── baseline.py
├── conse.py
├── costa.py
├── vgse.py
└── wDAEGNN
│ ├── .gitignore
│ ├── Dockerfile
│ ├── LICENCE
│ ├── README.md
│ ├── config
│ ├── imagenet_ResNet10CosineClassifier.py
│ └── imagenet_wDAE
│ │ └── imagenet_ResNet10CosineClassifier_wDAE_GNN.py
│ ├── data
│ ├── IMAGENET_LOWSHOT_BENCHMARK_CATEGORY_SPLITS.json
│ └── mini_imagenet_split
│ │ ├── test.csv
│ │ ├── train.csv
│ │ ├── train_val_test_classnames.csv
│ │ └── val.csv
│ ├── low_shot_learning
│ ├── __init__.py
│ ├── algorithms
│ │ ├── __init__.py
│ │ ├── algorithm.py
│ │ ├── classification
│ │ │ ├── __init__.py
│ │ │ ├── classification.py
│ │ │ └── utils.py
│ │ ├── fewshot
│ │ │ ├── __init__.py
│ │ │ ├── fewshot.py
│ │ │ ├── imagenet_lowshot.py
│ │ │ └── utils.py
│ │ └── utils
│ │ │ └── save_features.py
│ ├── architectures
│ │ ├── __init__.py
│ │ ├── classifiers
│ │ │ ├── __init__.py
│ │ │ ├── cosine_classifier_with_DAE_weight_generator.py
│ │ │ ├── cosine_classifier_with_weight_generator.py
│ │ │ ├── few_shot_classification_with_prototypes.py
│ │ │ ├── matching_network_head.py
│ │ │ ├── prototypical_network_head.py
│ │ │ ├── utils.py
│ │ │ └── weights_denoising_autoencoder.py
│ │ ├── feature_extractors
│ │ │ ├── __init__.py
│ │ │ ├── dumb_feat.py
│ │ │ ├── resnet_feat.py
│ │ │ ├── utils.py
│ │ │ └── wide_resnet.py
│ │ └── tools.py
│ ├── dataloaders
│ │ ├── __init__.py
│ │ ├── basic_dataloaders.py
│ │ └── dataloader_fewshot.py
│ ├── datasets
│ │ ├── __init__.py
│ │ ├── imagenet_dataset.py
│ │ └── mini_imagenet_dataset.py
│ └── utils.py
│ ├── scripts
│ ├── __init__.py
│ ├── lowshot_evaluate.py
│ ├── lowshot_train_stage1.py
│ ├── lowshot_train_stage2.py
│ └── save_features.py
│ └── setup.py
├── embeddings
├── AWA2_classnames.npy
├── CUB_classnames.npy
├── ImageNet1K_classnames.txt
├── SUN_classnames.npy
├── conceptnet
│ ├── AWA2_cn_sum_list.npy
│ ├── CUB_cn_sum_list.npy
│ ├── SUN_cn_sum_list.npy
│ └── imgnet_cn_list.npy
└── wiki2vec
│ ├── AWA2_wiki_sum_list.npy
│ ├── CUB_wiki_sum_list.npy
│ ├── SUN_wiki_sum_list.npy
│ └── imgnet_wiki_list.npy
├── environment.yml
├── figs
├── icis-framework.png
└── model-fig.png
├── joint_latent.py
├── main.py
├── regressor.py
├── scripts
├── ablation_data_efficieny_CUB_full.sh
├── table1_awa2.sh
├── table1_cub.sh
├── table1_cub_5seeds.sh
├── table1_sun.sh
├── table2.sh
├── table3_cub.sh
├── table4.sh
└── table5_left.sh
└── utility
├── ImageNet1K_classnames.txt
├── eval_imagenet.py
├── feature_extraction
├── extract_util.py
└── feature_extract.py
├── load_wordembeddings.py
├── model_bases.py
├── plot_data_ablation.py
├── plot_prediction_bins.py
├── train_base.py
└── util.py
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/.DS_Store
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # Image-free Zero-Shot Learning
3 | This is the official repository for [Image-free Classifier Injection for Zero-Shot Classification](https://arxiv.org/abs/2308.10599), published at ICCV 2023.
4 |
5 |
6 |
7 |
8 |
9 |
10 | ## Prerequisites
11 |
12 | 1. Extracted fine-tuned CUB, AWA2, and SUN features are available [here](https://github.com/uqzhichen/SDGZSL) (clicking on "datasets" under the "Usage" section will lead you to [this zip file on Google Drive](https://drive.google.com/file/d/1KxFC6T_kGKCNx1JyX2FOaSimA0DOcU_I/view)) Features are originally from [this paper](https://github.com/akshitac8/tfvaegan).
13 |
14 | 2. Pre-trained ResNet101 features for CUB, AWA2, and SUN datasets are available [here](https://drive.google.com/drive/folders/18egafUzqWp7kavtBSk78O0R2L1mx0dLX?usp=sharing). We extract the features using the current version (2) of ResNet101 available via `torchvision`. See `/utility/feature_extraction/feature_extract.py`. We use the [proposed datasplits](https://www.mpi-inf.mpg.de/departments/computer-vision-and-machine-learning/research/zero-shot-learning/zero-shot-learning-the-good-the-bad-and-the-ugly) ("Proposed Split Version 2.0"). Image datasets are avaiable here for [CUB](https://www.vision.caltech.edu/datasets/cub_200_2011/), [AWA2](https://cvml.ista.ac.at/AwA2/), and [SUN](https://groups.csail.mit.edu/vision/SUN/hierarchy.html).
15 |
16 | 3. Modify default "dataroot" and "rootpath" in main.py (to point to your data path and where to save outputs, respectively).
17 |
18 |
19 | ## Usage
20 |
21 | Scripts are supplied (in the scripts folder) for experiments behind the various tables. By default, these scripts will run a single random seed and include training/loading the base classiciation model and evaluation of the specified I-(G)ZSL method. Seed number can be increased either with the --numSeeds arg, or by supplying fixed seeds with the --manualSeed arg (the latter will also speed up experiments, as base classification models can be reused).
22 |
23 | A .yml file is supplied with the environment. (Replace version of `cudatoolkit` with the appropriate CUDA version of your system. Our experiments were done using CUDA/11.1 and CUDNN/8.1).
24 |
25 | Classnames for ImageNet1K are available in `/utility/`.
26 |
27 | Example usage apart from scripts include:
28 |
29 | ```bash
30 | $ python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope
31 | ```
32 |
33 | ## Citation
34 |
35 | If you use this code, please cite
36 | ```
37 | @InProceedings{Christensen_2023_ICCV,
38 | author = {Christensen, Anders and Mancini, Massimiliano and Koepke, A. Sophia and Winther, Ole and Akata, Zeynep},
39 | title = {Image-Free Classifier Injection for Zero-Shot Classification},
40 | booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)},
41 | month = {October},
42 | year = {2023},
43 | pages = {19072-19081}
44 | }
45 | ```
46 |
47 | **Note**: The wDAE-GNN, Sub.Reg. and wAVG/SMO implementations are imported from their respective repositories and adapted to our usecase. If you find those parts useful, please consider citing them.
48 |
49 | ## Contact
50 |
51 | Code will receive minor updates. Questions etc. can be sent by email to
52 |
53 | Anders Christensen
54 |
55 | andchri@dtu.dk
56 |
57 | Technical University of Denmark & University of Tübingen
58 |
59 |
60 |
61 |
62 |
--------------------------------------------------------------------------------
/baselines/baseline.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import utility.util as util
3 | from regressor import REGRESSOR
4 | import torchvision
5 | import torch.nn as nn
6 | import os
7 |
8 | class Baseline(REGRESSOR):
9 | def __init__(self, opt, **kwargs):
10 | super().__init__(opt=opt, **kwargs)
11 |
12 | def evaluate_weights(self, pred_weights):
13 | self.unseen_model.fc.weight.data[:, :] = pred_weights[:, :self.input_dim]
14 | self.unseen_model.fc.bias.data[:] = pred_weights[:, self.input_dim]
15 |
16 | self.ext_model.fc.weight.data[len(self.seenclasses):, :] = pred_weights[:, :self.input_dim]
17 | self.ext_model.fc.bias.data[len(self.seenclasses):] = pred_weights[:, self.input_dim]
18 |
19 | if self.opt.zst:
20 | self.acc_target, self.acc_zst_unseen = self.val_zst()
21 | else:
22 | self.acc_gzsl, self.acc_seen, self.acc_unseen, self.H, self.acc_unseen_zsl = self.val_gzsl()
--------------------------------------------------------------------------------
/baselines/conse.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.autograd import Variable
4 | import numpy as np
5 | import utility.util as util
6 | import copy
7 | import random
8 | import sys
9 | import os
10 | import math
11 | import torchvision
12 | from torch.utils.data import Dataset, DataLoader
13 | import utility.model_bases as model
14 | from regressor import REGRESSOR
15 |
16 | class ConSE(REGRESSOR):
17 | def __init__(self, opt, **kwargs):
18 | super().__init__(opt=opt, **kwargs)
19 | self.opt = opt
20 | if self.cuda:
21 | self.model.cuda()
22 |
23 | if self.opt.zst:
24 | data = self.test_unseen_feature
25 | target = self.test_unseen_label
26 |
27 | self.acc_target = self.conse_val(self.model, data,
28 | util.map_label(target, self.unseenclasses-len(self.seenclasses)),
29 | util.map_label(self.unseenclasses-len(self.seenclasses), self.unseenclasses-len(self.seenclasses)),
30 | train_attributes=self.attribute[self.seenclasses], test_attributes=self.attribute[self.unseenclasses])
31 |
32 | self.acc_zst_unseen = self.conse_val(self.model, data,
33 | util.map_label(target, self.unseenclasses-len(self.seenclasses)) + len(self.seenclasses),
34 | util.map_label_extend(self.unseenclasses, self.unseenclasses, self.seenclasses),
35 | train_attributes=self.attribute[self.seenclasses], test_attributes=torch.cat((self.attribute[self.seenclasses], self.attribute[self.unseenclasses])))
36 |
37 | else:
38 | # GZSL
39 | self.acc_gzsl = self.conse_val(self.model, torch.cat((self.test_seen_feature, self.test_unseen_feature), 0),
40 | torch.cat((util.map_label(self.test_seen_label, self.seenclasses), util.map_label_extend(self.test_unseen_label, self.unseenclasses, self.seenclasses)), 0),
41 | torch.cat((util.map_label(self.seenclasses, self.seenclasses) , util.map_label_extend(self.unseenclasses, self.unseenclasses, self.seenclasses)), 0),
42 | train_attributes=self.attribute[self.seenclasses], test_attributes=torch.cat((self.attribute[self.seenclasses], self.attribute[self.unseenclasses])))
43 | self.acc_seen = self.conse_val(self.model, self.test_seen_feature, util.map_label(self.test_seen_label, self.seenclasses), util.map_label(self.seenclasses, self.seenclasses), train_attributes=self.attribute[self.seenclasses], test_attributes=torch.cat((self.attribute[self.seenclasses], self.attribute[self.unseenclasses])))
44 | self.acc_unseen = self.conse_val(self.model, self.test_unseen_feature, util.map_label(self.test_unseen_label, self.unseenclasses), util.map_label(self.unseenclasses, self.unseenclasses), train_attributes=self.attribute[self.seenclasses], test_attributes=torch.cat((self.attribute[self.seenclasses], self.attribute[self.unseenclasses])))
45 | self.H = 2*self.acc_seen*self.acc_unseen / (self.acc_seen+self.acc_unseen)
46 | # ZSL
47 | self.acc_unseen_zsl = self.conse_val(self.model, self.test_unseen_feature, util.map_label(self.test_unseen_label, self.unseenclasses), util.map_label(self.unseenclasses, self.unseenclasses), train_attributes=self.attribute[self.seenclasses], test_attributes=self.attribute[self.unseenclasses])
48 |
49 | def conse_val(self, model, test_X, test_label, target_classes, train_attributes, test_attributes):
50 | """ Predict semantic embedding for input, then compare to class embeddings (attributes) """
51 | cos = nn.CosineSimilarity(dim=1, eps=1e-8)
52 | soft = torch.nn.Softmax(dim=1)
53 | if self.cuda:
54 | train_attributes = train_attributes.cuda()
55 | test_attributes = test_attributes.cuda()
56 | start = 0
57 | ntest = test_X.size()[0]
58 | predicted_label = torch.LongTensor(test_label.size())
59 | for i in range(0, ntest, self.batch_size):
60 | end = min(ntest, start+self.batch_size)
61 | if self.cuda:
62 | logits = model(Variable(test_X[start:end].cuda()))
63 | else:
64 | logits = model(Variable(test_X[start:end]))
65 |
66 | if self.opt.class_reduction_ablation:
67 | probs = soft(logits[:, self.perm])
68 | pred_embeds = torch.sum(train_attributes[self.perm] * probs.unsqueeze(-1), dim=1)
69 | else:
70 | probs = soft(logits)
71 | pred_embeds = torch.sum(train_attributes * probs.unsqueeze(-1), dim=1)
72 |
73 | output = []
74 | for pred_embed in pred_embeds:
75 | sims = cos(pred_embed[None, :], test_attributes)
76 | _, idx = torch.max(sims, dim=0)
77 | output.append(idx)
78 |
79 | output = torch.stack(output)
80 | predicted_label[start:end] = output
81 | start = end
82 |
83 | acc, acc_per_class, prediction_matrix = self.compute_per_class_acc_gzsl(test_label, predicted_label, target_classes)
84 | if self.opt.save_pred_matrix:
85 | torch.save(acc_per_class, opt.rootpath + '/outputs/' + self.opt.dataset + self.opt.image_embedding + '_len_test_' + str(len(test_X)) + '_len_tar_' + str(len(target_classes)) + '.pt')
86 | torch.save(prediction_matrix, opt.rootpath + '/outputs/' + self.opt.dataset + self.opt.image_embedding + '_len_test_' + str(len(test_X)) + '_len_tar_' + str(len(target_classes)) + '.pt')
87 |
88 | return acc
89 |
--------------------------------------------------------------------------------
/baselines/costa.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.autograd import Variable
4 | import numpy as np
5 | import utility.util as util
6 | import copy
7 | import random
8 | import sys
9 | import os
10 | import math
11 | import torchvision
12 | from torch.utils.data import Dataset, DataLoader
13 | import utility.model_bases as model
14 | from regressor import REGRESSOR
15 |
16 | class COSTA(REGRESSOR):
17 | def __init__(self, opt, **kwargs):
18 | super().__init__(opt=opt, **kwargs)
19 | self.opt = opt
20 | self.unseen_model = model.LINEAR(self.input_dim, len(self.unseenclasses))
21 | self.ext_model = model.LINEAR(self.input_dim, self.nclass)
22 | if self.cuda:
23 | self.unseen_model.cuda()
24 | self.ext_model.cuda()
25 |
26 | self.ext_model.fc.weight.data[:len(self.seenclasses), :] = self.target_weights[:, :2048]
27 | self.ext_model.fc.bias.data[:len(self.seenclasses)] = self.target_weights[:, 2048]
28 | for n, unseen_att in enumerate(self.attribute[self.unseenclasses]):
29 | cooccs = unseen_att.unsqueeze(0) * self.attribute[self.seenclasses]
30 | norm_coocs = torch.sum(cooccs, dim=-1) / (cooccs.sum() + 10e-5)
31 | if self.opt.cuda:
32 | norm_coocs = norm_coocs.cuda()
33 | pred_weights = torch.sum(norm_coocs[:, None]*self.target_weights, dim=0)
34 |
35 | self.unseen_model.fc.weight.data[n, :] = pred_weights[:-1]
36 | self.unseen_model.fc.bias.data[n] = pred_weights[-1]
37 |
38 | self.ext_model.fc.weight.data[len(self.seenclasses) + n, :] = pred_weights[:-1]
39 | self.ext_model.fc.bias.data[len(self.seenclasses) + n] = pred_weights[-1]
40 |
41 | # GZSL
42 | if self.opt.zst:
43 | self.acc_target, self.acc_zst_unseen = self.val_zst()
44 |
45 | else:
46 | self.acc_gzsl, self.acc_seen, self.acc_unseen, self.H, self.acc_unseen_zsl = self.val_gzsl()
47 |
--------------------------------------------------------------------------------
/baselines/vgse.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | import torchvision
4 |
5 | from baselines.baseline import Baseline
6 | from utility.model_bases import LINEAR
7 | import utility.util as util
8 |
9 | class VGSE_CRM(Baseline):
10 | """ Baseline inspired by the Class Relation Module (CRM) from
11 | VGSE: Visually-Grounded Semantic Embeddings for Zero-Shot Learning
12 | by Xu et al. Contains implementation of both WAvg and SMO CRM. """
13 | def __init__(self, opt, **kwargs):
14 | super().__init__(opt=opt, **kwargs)
15 | self.opt = opt
16 |
17 | if opt.vgse_baseline == 'wavg':
18 | pred_weights = self.WAvg(opt.vgse_nbs, opt.vgse_eta)
19 | elif opt.vgse_baseline == 'smo':
20 | pred_weights = self.SMO(alpha=opt.vgse_alpha)
21 |
22 | self.unseen_model = LINEAR(self.input_dim, len(self.unseenclasses))
23 | self.ext_model = LINEAR(self.input_dim, self.nclass)
24 | if self.cuda:
25 | self.unseen_model.cuda()
26 | self.ext_model.cuda()
27 |
28 | self.ext_model.fc.weight.data[:len(self.seenclasses), :] = self.target_weights[:, :2048]
29 | self.ext_model.fc.bias.data[:len(self.seenclasses)] = self.target_weights[:, 2048]
30 |
31 | self.evaluate_weights(pred_weights)
32 |
33 | def WAvg(self, num_neighbours=5, eta=5):
34 | """ Implementation of Weighted Average (WAvg) CRM.
35 | Hyperparameters (num_neighbours and eta) taken from paper. """
36 |
37 | unseen_att_sims = np.zeros((len(self.unseenclasses), len(self.seenclasses)))
38 | for i in range(len(self.unseenclasses)):
39 | for j in range(len(self.seenclasses)):
40 | unseen_att_sims[i, j] = torch.exp(-eta*torch.dist(self.attribute[self.unseenclasses[i]], self.attribute[self.seenclasses[j]]))
41 | unseen_att_sims = torch.from_numpy(unseen_att_sims).float()
42 |
43 | if self.opt.cuda:
44 | unseen_att_sims = unseen_att_sims.cuda()
45 |
46 | pred_weights = torch.matmul(unseen_att_sims, self.target_weights)
47 |
48 | return pred_weights
49 |
50 | def SMO(self, alpha=0, eps=10e-8):
51 | """ Implementation of Similarity Matrix Optimization (SMO) CRM """
52 | assert alpha in [-1, 0]
53 |
54 | loss_fnc = torch.nn.MSELoss()
55 | reg = torch.nn.L1Loss()
56 | sum_constraint = torch.ones(1)[0]
57 |
58 | if alpha == 0:
59 | lr = 1000
60 | domain_fnc = torch.nn.Softmax(dim=0)
61 | else: # alpha = -1
62 | lr = 10e-6
63 | domain_fnc = torch.nn.Tanh()
64 |
65 | all_pred_weights = torch.zeros(len(self.unseenclasses), self.target_weights.size(1))
66 | if self.cuda:
67 | all_pred_weights = all_pred_weights.cuda()
68 | sum_constraint = sum_constraint.cuda()
69 | self.attribute = self.attribute.cuda()
70 |
71 | for i in range(len(self.unseenclasses)):
72 | converged = False
73 | best_loss = 1000
74 | prev_loss = 1000
75 | counter = 0
76 |
77 | smo = SMOModel(domain_fnc=domain_fnc, dim=len(self.seenclasses))
78 | if self.cuda:
79 | smo.cuda()
80 |
81 | optim = torch.optim.SGD(smo.parameters(), lr=lr)
82 |
83 | while not converged:
84 | optim.zero_grad()
85 | pred_att = smo(self.attribute[self.seenclasses])
86 | loss = loss_fnc(pred_att, self.attribute[self.unseenclasses[i]]) - alpha * reg(torch.sum(smo.domain_fnc(smo.r)), sum_constraint)
87 | loss.backward()
88 | optim.step()
89 |
90 | if loss < best_loss:
91 | best_loss = loss
92 | best_r = smo.r
93 | if torch.abs(prev_loss - loss) < eps:
94 | counter += 1
95 | else:
96 | counter = 0
97 | else:
98 | counter += 1
99 |
100 | if counter > 10:
101 | converged = True
102 |
103 | prev_loss = loss
104 |
105 | pred_weights = torch.sum(domain_fnc(best_r)[:, None] * self.target_weights, dim=0)
106 | all_pred_weights[i,:] = pred_weights
107 |
108 | return all_pred_weights
109 |
110 |
111 | class SMOModel(torch.nn.Module):
112 | def __init__(self, domain_fnc, dim):
113 | super().__init__()
114 | self.domain_fnc = domain_fnc
115 | self.dim = dim
116 | self.r = torch.nn.parameter.Parameter(data=torch.normal(mean=torch.zeros(dim), std=2/dim), requires_grad=True)
117 |
118 | def forward(self, data):
119 | pred_att = torch.sum(self.domain_fnc(self.r)[:, None] * data, dim=0)
120 | return pred_att
121 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/.gitignore:
--------------------------------------------------------------------------------
1 | ./experiments
2 | ./experiments/*
3 | ./datasets
4 | *~
5 | *.pyc
6 | *.pkl
7 | data/IMAGENET
8 | .remote-sync.json
9 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nvidia/cuda:10.0-devel-ubuntu18.04
2 |
3 | RUN yes | unminimize
4 |
5 | RUN apt-get update && apt-get install -y wget bzip2
6 | RUN wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
7 | RUN bash miniconda.sh -b -p /opt/conda && \
8 | rm miniconda.sh
9 | ENV PATH="/opt/conda/bin:${PATH}"
10 | RUN conda config --set always_yes yes
11 |
12 | RUN pip install https://download.pytorch.org/whl/cu100/torch-1.1.0-cp37-cp37m-linux_x86_64.whl
13 | RUN pip install https://download.pytorch.org/whl/cu100/torchvision-0.3.0-cp37-cp37m-linux_x86_64.whl
14 |
15 | RUN pip install tensorboardX scikit-image tqdm pyyaml easydict future h5py torchnet pip
16 | RUN apt-get install unzip
17 |
18 | COPY ./ ./wDAE_GNN_FewShot
19 | RUN pip install -e ./wDAE_GNN_FewShot
20 |
21 | WORKDIR ./wDAE_GNN_FewShot
22 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/LICENCE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Spyridon Gidaris
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/README.md:
--------------------------------------------------------------------------------
1 | # *Generating Classification Weights with GNN Denoising Autoencoders for Few-Shot Learning*
2 |
3 | The current project page provides [pytorch](http://pytorch.org/) code that implements the following CVPR2019 paper (accepted as oral):
4 | **Title:** "Generating Classification Weights with GNN Denoising Autoencoders for Few-Shot Learning"
5 | **Authors:** Spyros Gidaris, Nikos Komodakis
6 | **Code:** https://github.com/gidariss/wDAE_GNN_FewShot
7 |
8 | **Abstract:**
9 | Given an initial recognition model already trained on a set of base classes, the goal of this work is to develop a meta-model for few-shot learning. The meta-model, given as input some novel classes with few training examples per class, must properly adapt the existing recognition model into a new model that can correctly classify in a unified way both the novel and the base classes. To accomplish this goal it must learn to output the appropriate classification weight vectors for those two types of classes. To build our meta-model we make use of two main innovations: we propose the use of a Denoising Autoencoder network (DAE) that (during training) takes as input a set of classification weights corrupted with Gaussian noise and learns to reconstruct the target-discriminative classification weights. In this case, the injected noise on the classification weights serves the role of regularizing the weight generating meta-model. Furthermore, in order to capture the co-dependencies between different classes in a given task instance of our meta-model, we propose to implement the DAE model as a Graph Neural Network (GNN). In order to verify the efficacy of our approach, we extensively evaluate it on ImageNet based few-shot benchmarks and we report strong results that surpass prior approaches.
10 |
11 |
12 | ### License
13 | This code is released under the MIT License (refer to the LICENSE file for details).
14 |
15 | ## Contents:
16 | **(1)** Code for running the ImageNet-based experiments with the wDAE-GNN-based few-shot model.
17 |
18 | **(2)** Code for running the MiniImageNet-based experiments: would be ready soon.
19 |
20 | ## Preparation
21 |
22 | ### Pre-requisites
23 | * Python 3.7
24 | * Pytorch >= 1.0.0
25 | * CUDA 10.0 or higher
26 |
27 | ### Installation
28 |
29 | **(1)** Clone the repo:
30 | ```bash
31 | $ git clone https://github.com/gidariss/wDAE_GNN_FewShot
32 | ```
33 |
34 | **(2)** Install this repository and the dependencies using pip:
35 | ```bash
36 | $ pip install -e ./wDAE_GNN_FewShot
37 | ```
38 |
39 | With this, you can edit the wDAE_GNN_FewShot code on the fly and import function
40 | and classes of wDAE_GNN_FewShot in other project as well.
41 |
42 | **(3)** Optional. To uninstall this package, run:
43 | ```bash
44 | $ pip uninstall wDAE_GNN_FewShot
45 | ```
46 |
47 | **(4)** Create *dataset* and *experiment* directories:
48 | ```bash
49 | $ cd wDAE_GNN_FewShot
50 | $ mkdir ./datasets
51 | $ mkdir ./experiments
52 | ```
53 |
54 | You can take a look at the [Dockerfile](./Dockerfile) if you are uncertain about steps to install this project.
55 |
56 | ## Running experiments on the ImageNet based few-shot benchmark
57 |
58 | Here I provide instructions for training and evaluating our method on the ImageNet based low-shot benchmark proposed by Bharath and Girshick [1].
59 |
60 | **(1)** Download the ImageNet dataset and set in [imagenet_dataset.py](https://github.com/gidariss/wDAE_GNN_FewShot/blob/master/low_shot_learning/datasets/imagenet_dataset.py#L19) the path to where the dataset resides in your machine.
61 |
62 | **(2)** Train a ResNet10 based recognition model with cosine similarity-based classifier [3]:
63 | ```bash
64 | $ cd wDAE_GNN_FewShot # enter the wDAE_GNN_FewShot directory.
65 | $ python scripts/lowshot_train_stage1.py --config=imagenet_ResNet10CosineClassifier
66 | ```
67 | You can download the already trained by us recognition model from [here](https://github.com/gidariss/wDAE_GNN_FewShot/releases/download/0.1/imagenet_ResNet10CosineClassifier.zip). In that case, place the model inside the './experiments' directory with the name './experiments/imagenet_ResNet10CosineClassifier'.
68 | ```bash
69 | # Run from the wDAE_GNN_FewShot directory
70 | $ cd ./experiments
71 | $ wget https://github.com/gidariss/wDAE_GNN_FewShot/releases/download/0.1/imagenet_ResNet10CosineClassifier.zip
72 | $ unzip imagenet_ResNet10CosineClassifier.zip
73 | $ cd ..
74 | ```
75 |
76 | **(3)** Extract and save the ResNet10 features (with the above model; see step (2)) from images of the ImageNet dataset:
77 | ```bash
78 | # Run from the wDAE_GNN_FewShot directory
79 | # Extract features from the validation image split of the Imagenet.
80 | $ python scripts/save_features.py --config=imagenet_ResNet10CosineClassifier --split='val'
81 | # Extract features from the training image split of the Imagenet.
82 | $ python scripts/save_features.py --config=imagenet_ResNet10CosineClassifier --split='train'
83 | ```
84 | The features will be saved on './datasets/feature_datasets/imagenet_ResNet10CosineClassifier'.
85 | You can download the pre-computed features from [here](https://mega.nz/#!bsVlzQBR!MNADfBM4JX2KgWG13oL0pXhHCQqvkPRD4MfP_aUOtXg). In that case, place the downloaded features in './datasets/' with the following structure:
86 | ```
87 | # Features of the validation images of ImageNet.
88 | ./datasets/feature_datasets/imagenet_ResNet10CosineClassifier/ImageNet_val.h5
89 | # Features of the training images of ImageNet.
90 | ./datasets/feature_datasets/imagenet_ResNet10CosineClassifier/ImageNet_train.h5
91 | ```
92 |
93 |
94 | **(4)** Train the Graph Neural Network Denoising AutoEncoder few-shot model (wDAE_GNN):
95 | ```bash
96 | # Run from the wDAE_GNN_FewShot directory
97 | # Training the wDAE-GNN few-shot model.
98 | $ python scripts/lowshot_train_stage2.py --config=imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN
99 | ```
100 | The model will be saved on 'wDAE_GNN_FewShot/experiments/imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN'.
101 | Otherwise, you can download the pre-trained few-shot model from
102 | [here](https://github.com/gidariss/wDAE_GNN_FewShot/releases/download/0.1/imagenet_ResNet10CosineClassifier_wDAE_GNN.zip).
103 | In that case, place the downloaded model in
104 | 'wDAE_GNN_FewShot/experiments/imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN'.
105 | ```bash
106 | # Run from the wDAE_GNN_FewShot directory
107 | $ cd experiments # enter the wDAE_GNN_FewShot directory.
108 | $ mkdir imagenet_wDAE
109 | $ cd imagenet_wDAE
110 | $ wget https://github.com/gidariss/wDAE_GNN_FewShot/releases/download/0.1/imagenet_ResNet10CosineClassifier_wDAE_GNN.zip
111 | $ unzip imagenet_ResNet10CosineClassifier_wDAE_GNN.zip
112 | $ cd ../../
113 | ```
114 |
115 |
116 | **(5)** Evaluate the above trained model:
117 | ```bash
118 | # Run from the wDAE_GNN_FewShot directory
119 | # Evaluate the model on the 1-shot setting.
120 | $ python scripts/lowshot_evaluate.py --config=imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN --testset --nexemplars=1 --step_size=1.0
121 | # Expected output:
122 | # ==> Top 5 Accuracies: [Novel: 47.99 | Base: 93.39 | All 59.02 ]
123 |
124 | # Evaluate the model on the 2-shot setting.
125 | $ python scripts/lowshot_evaluate.py --config=imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN --testset --nexemplars=2 --step_size=1.0
126 | # Expected output:
127 | # ==> Top 5 Accuracies: [Novel: 59.54 | Base: 93.39 | All 66.22 ]
128 |
129 | # Evaluate the model on the 5-shot setting.
130 | $ python scripts/lowshot_evaluate.py --config=imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN --testset --nexemplars=5 --step_size=0.6
131 | # Expected output:
132 | # ==> Top 5 Accuracies: [Novel: 70.23 | Base: 93.44 | All 73.20 ]
133 |
134 | # Evaluate the model on the 10-shot setting.
135 | $ python scripts/lowshot_evaluate.py --config=imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN --testset --nexemplars=10 --step_size=0.4
136 | # Expected output:
137 | # ==> Top 5 Accuracies: [Novel: 74.95 | Base: 93.37 | All 76.09 ]
138 |
139 | # Evaluate the model on the 20-shot setting.
140 | $ python scripts/lowshot_evaluate.py --config=imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN --testset --nexemplars=20 --step_size=0.2
141 | # Expected output:
142 | # ==> Top 5 Accuracies: [Novel: 77.77 | Base: 93.33 | All 77.54 ]
143 | ```
144 |
145 | ## Experimental results on the ImageNet based Low-shot benchmark
146 |
147 | Here I provide the experiment results of the few-shot model trained with this code on the ImageNet-based low-shot [1] using the evaluation metrics proposed by [2].
148 | Note that after cleaning and refactoring the implementation code of the paper
149 | and re-running the experiments, the results that we got are slightly different.
150 |
151 | ### Top-5 classification accuracy of wDAE-GNN model.
152 | | wDAE-GNN | Novel | All |
153 | | ------------------------------------ | ---------------:|----------------:|
154 | | 1-shot results | 47.99% | 59.02% |
155 | | 2-shot results | 59.54% | 66.22% |
156 | | 5-shot results | 70.23% | 73.20% |
157 | | 10-shot results | 74.95% | 76.09% |
158 | | 20-shot results | 77.77% | 77.54% |
159 |
160 | ### References
161 | ```
162 | [1] B. Hariharan and R. Girshick. Low-shot visual recognition by shrinking and hallucinating features.
163 | [2] Y.-X. Wang and R. Girshick, M. Hebert, B. Hariharan. Low-shot learning from imaginary data.
164 | [3] S. Gidaris and N. Komodakis. Dynamic few-shot visual learning without forgetting.
165 | ```
166 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/config/imagenet_ResNet10CosineClassifier.py:
--------------------------------------------------------------------------------
1 | config = {}
2 | # set the parameters related to the training and testing set
3 |
4 | nKbase = 389
5 |
6 | data_train_opt = {}
7 | data_train_opt['nKnovel'] = 0
8 | data_train_opt['nKbase'] = nKbase
9 | data_train_opt['nExemplars'] = 0
10 | data_train_opt['nTestNovel'] = 0
11 | data_train_opt['nTestBase'] = 400
12 | data_train_opt['batch_size'] = 1
13 | data_train_opt['epoch_size'] = 4000
14 | config['data_train_opt'] = data_train_opt
15 |
16 | config['max_num_epochs'] = 100
17 |
18 | networks = {}
19 | net_optim_paramsF = {
20 | 'optim_type': 'sgd', 'lr': 0.1, 'momentum':0.9, 'weight_decay': 5e-4,
21 | 'nesterov': True,
22 | 'LUT_lr':[(30, 0.1), (60, 0.01), (90, 0.001), (100, 0.0001)]}
23 | networks['feature_extractor'] = {
24 | 'def_file': 'feature_extractors.resnet_feat.py', 'pretrained': None,
25 | 'opt': {'userelu': False, 'restype': 'ResNet10'},
26 | 'optim_params': net_optim_paramsF}
27 |
28 | net_optim_paramsC = {
29 | 'optim_type': 'sgd', 'lr': 0.1, 'momentum':0.9, 'weight_decay': 5e-4,
30 | 'nesterov': True,
31 | 'LUT_lr':[(30, 0.1), (60, 0.01), (90, 0.001), (100, 0.0001)]}
32 | net_optionsC = {
33 | 'num_features':512,
34 | 'num_classes': 1000,
35 | 'global_pooling': False,
36 | 'scale_cls': 10,
37 | 'learn_scale': True}
38 | networks['classifier'] = {
39 | 'def_file': 'classifiers.cosine_classifier_with_weight_generator.py',
40 | 'pretrained': None, 'opt': net_optionsC, 'optim_params': net_optim_paramsC}
41 |
42 | config['networks'] = networks
43 |
44 | criterions = {}
45 | criterions['loss'] = {'ctype':'CrossEntropyLoss', 'opt':None}
46 | config['criterions'] = criterions
47 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/config/imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN.py:
--------------------------------------------------------------------------------
1 | config = {}
2 | # set the parameters related to the training and testing set
3 |
4 | nKbase = 389
5 | nKnovel = 200
6 | nExemplars = 1
7 |
8 | data_train_opt = {}
9 | data_train_opt['nKnovel'] = nKnovel
10 | data_train_opt['nKbase'] = nKbase
11 | data_train_opt['nExemplars'] = nExemplars
12 | data_train_opt['nTestNovel'] = nKnovel
13 | data_train_opt['nTestBase'] = nKnovel
14 | data_train_opt['batch_size'] = 4
15 | data_train_opt['epoch_size'] = 4000
16 | data_train_opt['data_dir'] = './datasets/feature_datasets/imagenet_ResNet10CosineClassifier'
17 |
18 | config['data_train_opt'] = data_train_opt
19 | config['max_num_epochs'] = 15
20 |
21 | num_features = 512
22 |
23 | networks = {}
24 | networks['feature_extractor'] = {
25 | 'def_file': 'feature_extractors.dumb_feat', 'pretrained': None,
26 | 'opt': {'dropout': 0}, 'optim_params': None }
27 |
28 | net_optim_paramsC = {
29 | 'optim_type': 'sgd', 'lr': 0.1, 'momentum':0.9, 'weight_decay': 5e-4,
30 | 'nesterov': True, 'LUT_lr':[(10, 0.01), (15, 0.001)]}
31 | pretrainedC = './experiments/imagenet_ResNet10CosineClassifier/classifier_net_epoch100'
32 |
33 | net_optionsC = {
34 | 'num_features': num_features,
35 | 'num_classes': 1000,
36 | 'global_pooling': False,
37 | 'scale_cls': 10.0,
38 | 'learn_scale': True,
39 | 'dae_config': {
40 | 'gaussian_noise': 0.08,
41 | 'comp_reconstruction_loss': True,
42 | 'targets_as_input': False,
43 | 'dae_type': 'RelationNetBasedGNN',
44 | 'num_layers': 2,
45 | 'num_features_input': num_features,
46 | 'num_features_output': 2 * num_features,
47 | 'num_features_hidden': 3 * num_features,
48 | 'update_dropout': 0.7,
49 |
50 | 'nun_features_msg': 3 * num_features,
51 | 'aggregation_dropout': 0.7,
52 | 'topK_neighbors': 10,
53 | 'temperature': 5.0,
54 | 'learn_temperature': False,
55 | },
56 | }
57 | networks['classifier'] = {
58 | 'def_file': 'classifiers.cosine_classifier_with_DAE_weight_generator',
59 | 'pretrained': pretrainedC, 'opt': net_optionsC,
60 | 'optim_params': net_optim_paramsC}
61 | config['networks'] = networks
62 |
63 | config['criterions'] = {}
64 |
65 | config['reconstruction_loss_coef'] = 1.0
66 | config['classification_loss_coef'] = 1.0
67 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/data/mini_imagenet_split/train_val_test_classnames.csv:
--------------------------------------------------------------------------------
1 | n01532829
2 | n01558993
3 | n01704323
4 | n01749939
5 | n01770081
6 | n01843383
7 | n01910747
8 | n02074367
9 | n02089867
10 | n02091831
11 | n02101006
12 | n02105505
13 | n02108089
14 | n02108551
15 | n02108915
16 | n02111277
17 | n02113712
18 | n02120079
19 | n02165456
20 | n02457408
21 | n02606052
22 | n02687172
23 | n02747177
24 | n02795169
25 | n02823428
26 | n02966193
27 | n03017168
28 | n03047690
29 | n03062245
30 | n03207743
31 | n03220513
32 | n03337140
33 | n03347037
34 | n03400231
35 | n03476684
36 | n03527444
37 | n03676483
38 | n03838899
39 | n03854065
40 | n03888605
41 | n03908618
42 | n03924679
43 | n03998194
44 | n04067472
45 | n04243546
46 | n04251144
47 | n04258138
48 | n04275548
49 | n04296562
50 | n04389033
51 | n04435653
52 | n04443257
53 | n04509417
54 | n04515003
55 | n04596742
56 | n04604644
57 | n04612504
58 | n06794110
59 | n07584110
60 | n07697537
61 | n07747607
62 | n09246464
63 | n13054560
64 | n13133613
65 | n03535780
66 | n03075370
67 | n02981792
68 | n03980874
69 | n03770439
70 | n02091244
71 | n02114548
72 | n02174001
73 | n03417042
74 | n02971356
75 | n03584254
76 | n02138441
77 | n03773504
78 | n02950826
79 | n01855672
80 | n09256479
81 | n02110341
82 | n01930112
83 | n02219486
84 | n02443484
85 | n01981276
86 | n02129165
87 | n04522168
88 | n02099601
89 | n03775546
90 | n02110063
91 | n02116738
92 | n03146219
93 | n02871525
94 | n03127925
95 | n03544143
96 | n03272010
97 | n07613480
98 | n04146614
99 | n04418357
100 | n04149813
101 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/__init__.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 |
3 |
4 | project_root = pathlib.Path(__file__).resolve().parents[1]
5 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/algorithms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/baselines/wDAEGNN/low_shot_learning/algorithms/__init__.py
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/algorithms/classification/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/baselines/wDAEGNN/low_shot_learning/algorithms/classification/__init__.py
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/algorithms/classification/classification.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import torch
4 |
5 | import low_shot_learning.algorithms.algorithm as algorithm
6 | import low_shot_learning.algorithms.fewshot.utils as fs_utils
7 | import low_shot_learning.algorithms.classification.utils as utils
8 |
9 |
10 | class Classification(algorithm.Algorithm):
11 | def __init__(self, opt, _run=None, _log=None):
12 | super().__init__(opt, _run, _log)
13 | feature_name = opt['feature_name'] if ('feature_name' in opt) else None
14 |
15 | if feature_name:
16 | assert isinstance(feature_name, (list, tuple))
17 |
18 | self.feature_name = feature_name
19 |
20 | def allocate_tensors(self):
21 | self.tensors = {}
22 | self.tensors['images'] = torch.FloatTensor()
23 | self.tensors['labels'] = torch.LongTensor()
24 |
25 | def set_tensors(self, batch):
26 | assert len(batch) == 2
27 | images, labels = batch
28 | self.tensors['images'].resize_(images.size()).copy_(images)
29 | self.tensors['labels'].resize_(labels.size()).copy_(labels)
30 |
31 | return 'classification'
32 |
33 | def train_step(self, batch):
34 | return self.process_batch_classification_task(batch, is_train=True)
35 |
36 | def evaluation_step(self, batch):
37 | return self.process_batch_classification_task(batch, is_train=False)
38 |
39 | def process_batch_classification_task(self, batch, is_train):
40 | self.set_tensors(batch)
41 |
42 | if is_train and (self.optimizers.get('feature_extractor') is None):
43 | self.networks['feature_extractor'].eval()
44 |
45 | record = utils.object_classification(
46 | feature_extractor=self.networks['feature_extractor'],
47 | feature_extractor_optimizer=self.optimizers.get('feature_extractor'),
48 | classifier=self.networks['classifier'],
49 | classifier_optimizer=self.optimizers.get('classifier'),
50 | images=self.tensors['images'],
51 | labels=self.tensors['labels'],
52 | is_train=is_train,
53 | base_ids=None,
54 | feature_name=self.feature_name)
55 |
56 | return record
57 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/algorithms/classification/utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import torch
4 | import torch.nn.functional as F
5 | import numpy as np
6 |
7 | import low_shot_learning.utils as utils
8 |
9 |
10 | def compute_top1_and_top5_accuracy(scores, labels):
11 | topk_scores, topk_labels = scores.topk(5, 1, True, True)
12 | label_ind = labels.cpu().numpy()
13 | topk_ind = topk_labels.cpu().numpy()
14 | top1_correct = topk_ind[:,0] == label_ind
15 | top5_correct = np.sum(topk_ind == label_ind.reshape((-1,1)), axis=1)
16 | return top1_correct.astype(float).mean() * 100, top5_correct.astype(float).mean() * 100
17 |
18 |
19 | def extract_features(feature_extractor, images, feature_name=None):
20 | if feature_name:
21 | if isinstance(feature_name, str):
22 | feature_name = [feature_name,]
23 | assert isinstance(feature_name, (list, tuple))
24 |
25 | features = feature_extractor(images, out_feat_keys=feature_name)
26 | else:
27 | features = feature_extractor(images)
28 |
29 | return features
30 |
31 |
32 | def classification_task(classifier, features, labels, base_ids=None):
33 | if base_ids is not None:
34 | assert(base_ids.dim() == 2)
35 | batch_size = features.size(0)
36 | meta_batch_size = base_ids.size(0)
37 | features = utils.add_dimension(features, dim_size=meta_batch_size)
38 | scores = classifier(features_test=features, base_ids=base_ids)
39 | scores = scores.view(batch_size, -1)
40 | else:
41 | scores = classifier(features)
42 |
43 | loss = F.cross_entropy(scores, labels)
44 |
45 | return scores, loss
46 |
47 |
48 | def object_classification(
49 | feature_extractor,
50 | feature_extractor_optimizer,
51 | classifier,
52 | classifier_optimizer,
53 | images,
54 | labels,
55 | is_train,
56 | base_ids=None,
57 | feature_name=None):
58 |
59 | if isinstance(feature_name, (list, tuple)) and len(feature_name) > 1:
60 | assert base_ids is None
61 | return object_classification_multiple_features(
62 | feature_extractor=feature_extractor,
63 | feature_extractor_optimizer=feature_extractor_optimizer,
64 | classifier=classifier,
65 | classifier_optimizer=classifier_optimizer,
66 | images=images,
67 | labels=labels,
68 | is_train=is_train,
69 | feature_name=feature_name)
70 |
71 | assert images.dim() == 4
72 | assert labels.dim() == 1
73 | assert images.size(0) == labels.size(0)
74 |
75 | if is_train: # Zero gradients.
76 | if feature_extractor_optimizer:
77 | feature_extractor_optimizer.zero_grad()
78 | classifier_optimizer.zero_grad()
79 |
80 | record = {}
81 | train_feature_extractor = (
82 | is_train and (feature_extractor_optimizer is not None))
83 | with torch.set_grad_enabled(train_feature_extractor):
84 | # Extract features from the images.
85 | features = extract_features(
86 | feature_extractor, images, feature_name=feature_name)
87 |
88 | if not train_feature_extractor:
89 | # Make sure that no gradients are backproagated to the feature
90 | # extractor when the feature extraction model is freezed.
91 | features = features.detach()
92 |
93 | with torch.set_grad_enabled(is_train):
94 | # Perform the object classification task.
95 | scores_classification, loss_classsification = classification_task(
96 | classifier, features, labels, base_ids)
97 | loss_total = loss_classsification
98 | record['loss'] = loss_total.item()
99 |
100 | with torch.no_grad(): # Compute accuracies.
101 | AccuracyTop1, AccuracyTop5 = compute_top1_and_top5_accuracy(
102 | scores_classification, labels)
103 | record['AccuracyTop1'] = AccuracyTop1
104 | record['AccuracyTop5'] = AccuracyTop5
105 | #record['Accuracy'] = utils.top1accuracy(scores_classification, labels)
106 |
107 | if is_train: # Backward loss and apply gradient steps.
108 | loss_total.backward()
109 | if feature_extractor_optimizer:
110 | feature_extractor_optimizer.step()
111 | classifier_optimizer.step()
112 |
113 | return record
114 |
115 |
116 | def object_classification_multiple_features(
117 | feature_extractor,
118 | feature_extractor_optimizer,
119 | classifier,
120 | classifier_optimizer,
121 | images,
122 | labels,
123 | is_train,
124 | feature_name):
125 |
126 | assert isinstance(feature_name, (list, tuple)) and len(feature_name) > 1
127 | assert images.dim() == 4
128 | assert labels.dim() == 1
129 | assert images.size(0) == labels.size(0)
130 |
131 | if is_train: # Zero gradients.
132 | if feature_extractor_optimizer:
133 | feature_extractor_optimizer.zero_grad()
134 | classifier_optimizer.zero_grad()
135 |
136 | record = {}
137 | train_feature_extractor = (
138 | is_train and (feature_extractor_optimizer is not None))
139 | with torch.set_grad_enabled(train_feature_extractor):
140 | # Extract features from the images.
141 | features = extract_features(
142 | feature_extractor, images, feature_name=feature_name)
143 | assert len(features) == len(feature_name)
144 |
145 | if not train_feature_extractor:
146 | # Make sure that no gradients are backproagated to the feature
147 | # extractor when the feature extraction model is freezed.
148 | for i in range(len(features)):
149 | features[i] = features[i].detach()
150 |
151 | with torch.set_grad_enabled(is_train):
152 | # Perform the object classification task.
153 | scores = classifier(features)
154 | assert len(scores) == len(feature_name)
155 |
156 | losses = []
157 | for i in range(len(scores)):
158 | losses.append(F.cross_entropy(scores[i], labels))
159 | record['loss_' + feature_name[i]] = losses[i].item()
160 |
161 | with torch.no_grad(): # Compute accuracies.
162 | AccuracyTop1, AccuracyTop5 = compute_top1_and_top5_accuracy(
163 | scores[i], labels)
164 | record['AccuracyTop1_' + feature_name[i]] = AccuracyTop1
165 | record['AccuracyTop5_' + feature_name[i]] = AccuracyTop5
166 |
167 | loss_total = torch.stack(losses).sum()
168 |
169 | if is_train: # Backward loss and apply gradient steps.
170 | loss_total.backward()
171 | if feature_extractor_optimizer:
172 | feature_extractor_optimizer.step()
173 | classifier_optimizer.step()
174 |
175 | return record
176 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/algorithms/fewshot/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/baselines/wDAEGNN/low_shot_learning/algorithms/fewshot/__init__.py
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/algorithms/fewshot/fewshot.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import torch
4 |
5 | import low_shot_learning.algorithms.algorithm as algorithm
6 | import low_shot_learning.algorithms.classification.utils as cls_utils
7 | import low_shot_learning.algorithms.fewshot.utils as fs_utils
8 | import low_shot_learning.utils as utils
9 |
10 |
11 | class FewShot(algorithm.Algorithm):
12 | def __init__(self, opt):
13 | super().__init__(opt)
14 | self.keep_best_model_metric_name = 'AccuracyNovel'
15 | self.nKbase = torch.LongTensor()
16 |
17 | self.activate_dropout = (
18 | opt['activate_dropout'] if ('activate_dropout' in opt) else False)
19 |
20 | self.all_base_cats = (
21 | opt['all_base_cats'] if ('all_base_cats' in opt) else False)
22 |
23 | self.classification_loss_coef = (
24 | opt['classification_loss_coef']
25 | if ('classification_loss_coef' in opt) else 1.0)
26 |
27 | self.reconstruction_loss_coef = (
28 | opt['reconstruction_loss_coef']
29 | if ('reconstruction_loss_coef' in opt) else 0.0)
30 |
31 | self.only_novel = opt['only_novel'] if ('only_novel' in opt) else False
32 |
33 | self.accuracies = {}
34 |
35 | def allocate_tensors(self):
36 | self.tensors = {}
37 | self.tensors['images_train'] = torch.FloatTensor()
38 | self.tensors['labels_train'] = torch.LongTensor()
39 | self.tensors['labels_train_1hot'] = torch.FloatTensor()
40 | self.tensors['images_test'] = torch.FloatTensor()
41 | self.tensors['labels_test'] = torch.LongTensor()
42 | self.tensors['Kids'] = torch.LongTensor()
43 |
44 | def set_tensors(self, batch):
45 | self.nKbase = self.dloader.nKbase
46 | self.nKnovel = self.dloader.nKnovel
47 |
48 | if self.nKnovel > 0:
49 | train_test_stage = 'fewshot'
50 | assert(len(batch) == 6)
51 | images_train, labels_train, images_test, labels_test, K, nKbase = batch
52 | self.nKbase = nKbase[0].item()
53 | self.tensors['images_train'].resize_(
54 | images_train.size()).copy_(images_train)
55 | self.tensors['labels_train'].resize_(
56 | labels_train.size()).copy_(labels_train)
57 | labels_train = self.tensors['labels_train']
58 |
59 | nKnovel = 1 + labels_train.max().item() - self.nKbase
60 |
61 | labels_train_1hot_size = list(labels_train.size()) + [nKnovel,]
62 | labels_train_unsqueeze = labels_train.unsqueeze(dim=labels_train.dim())
63 | self.tensors['labels_train_1hot'].resize_(labels_train_1hot_size).fill_(0).scatter_(
64 | len(labels_train_1hot_size) - 1, labels_train_unsqueeze - self.nKbase, 1)
65 | self.tensors['images_test'].resize_(images_test.size()).copy_(images_test)
66 | self.tensors['labels_test'].resize_(labels_test.size()).copy_(labels_test)
67 | self.tensors['Kids'].resize_(K.size()).copy_(K)
68 | else:
69 | train_test_stage = 'base_classification'
70 | assert(len(batch) == 4)
71 | images_test, labels_test, K, nKbase = batch
72 | self.nKbase = nKbase.squeeze().item()
73 | self.tensors['images_test'].resize_(images_test.size()).copy_(images_test)
74 | self.tensors['labels_test'].resize_(labels_test.size()).copy_(labels_test)
75 | self.tensors['Kids'].resize_(K.size()).copy_(K)
76 |
77 | return train_test_stage
78 |
79 | def train_step(self, batch):
80 | return self.process_batch(batch, is_train=True)
81 |
82 | def evaluation_step(self, batch):
83 | return self.process_batch(batch, is_train=False)
84 |
85 | def process_batch(self, batch, is_train):
86 | process_type = self.set_tensors(batch)
87 | if process_type=='fewshot':
88 | return self.process_batch_fewshot_classification_task(is_train)
89 | elif process_type=='base_classification':
90 | return self.process_batch_base_class_classification_task(is_train)
91 | else:
92 | raise ValueError('Unexpected process type {0}'.format(process_type))
93 |
94 | def process_batch_base_class_classification_task(self, is_train):
95 | images = self.tensors['images_test']
96 | labels = self.tensors['labels_test']
97 | Kids = self.tensors['Kids']
98 | base_ids = None if (self.nKbase==0) else Kids[:,:self.nKbase].contiguous()
99 |
100 | assert(images.dim() == 5 and labels.dim() == 2)
101 | images = utils.convert_from_5d_to_4d(images)
102 | labels = labels.view(-1)
103 |
104 | if self.optimizers.get('feature_extractor') is None:
105 | self.networks['feature_extractor'].eval()
106 | if is_train and self.activate_dropout:
107 | utils.activate_dropout_units(feature_extractor)
108 |
109 | record = cls_utils.object_classification(
110 | feature_extractor=self.networks['feature_extractor'],
111 | feature_extractor_optimizer=self.optimizers['feature_extractor'],
112 | classifier=self.networks['classifier'],
113 | classifier_optimizer=self.optimizers['classifier'],
114 | images=images,
115 | labels=labels,
116 | is_train=is_train,
117 | base_ids=base_ids)
118 |
119 | return record
120 |
121 | def process_batch_fewshot_classification_task(self, is_train):
122 | if self.only_novel:
123 | raise ValueError('Not implemented yet.')
124 |
125 | Kids = self.tensors['Kids']
126 | nKbase = self.nKbase
127 | if is_train and self.all_base_cats:
128 | assert(nKbase==0)
129 | base_ids = Kids
130 | else:
131 | base_ids = None if (self.nKbase==0) else Kids[:,:nKbase].contiguous()
132 |
133 | self.networks['classifier']._novel_ids = Kids[:,nKbase:].contiguous()
134 | if self.optimizers.get('feature_extractor') is None:
135 | self.networks['feature_extractor'].eval()
136 | if is_train and self.activate_dropout:
137 | utils.activate_dropout_units(self.networks['feature_extractor'])
138 |
139 | record = fs_utils.fewshot_classification(
140 | feature_extractor=self.networks['feature_extractor'],
141 | feature_extractor_optimizer=self.optimizers.get('feature_extractor'),
142 | classifier=self.networks['classifier'],
143 | classifier_optimizer=self.optimizers['classifier'],
144 | images_train=self.tensors['images_train'],
145 | labels_train=self.tensors['labels_train'],
146 | labels_train_1hot=self.tensors['labels_train_1hot'],
147 | images_test=self.tensors['images_test'],
148 | labels_test=self.tensors['labels_test'],
149 | is_train=is_train,
150 | base_ids=base_ids,
151 | classification_coef=self.classification_loss_coef,
152 | reconstruction_coef=self.reconstruction_loss_coef)
153 |
154 | if not is_train:
155 | metrics = ['AccuracyNovel',]
156 | if 'AccuracyBoth' in record:
157 | metrics.append('AccuracyBoth')
158 | record, self.accuracies = fs_utils.compute_95confidence_intervals(
159 | record, episode=self.biter, num_episodes=self.bnumber,
160 | store_accuracies=self.accuracies, metrics=metrics)
161 |
162 | return record
163 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/algorithms/fewshot/utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import numpy as np
4 | import torch
5 | import torch.nn.functional as F
6 |
7 | import low_shot_learning.algorithms.classification.utils as cls_utils
8 | import low_shot_learning.utils as utils
9 |
10 |
11 | def few_shot_feature_classification(
12 | classifier, features_test, features_train, labels_train_1hot, labels_test,
13 | base_ids=None):
14 | if base_ids is not None:
15 | classification_scores = classifier(
16 | features_test=features_test,
17 | features_train=features_train,
18 | labels_train=labels_train_1hot,
19 | base_ids=base_ids)
20 | else:
21 | classification_scores = classifier(
22 | features_test=features_test,
23 | features_train=features_train,
24 | labels_train=labels_train_1hot)
25 |
26 | assert(classification_scores.dim() == 3)
27 |
28 | classification_scores = classification_scores.view(
29 | classification_scores.size(0) * classification_scores.size(1), -1)
30 | labels_test = labels_test.view(-1)
31 | assert(classification_scores.size(0) == labels_test.size(0))
32 |
33 | loss = F.cross_entropy(classification_scores, labels_test)
34 |
35 | return classification_scores, loss
36 |
37 |
38 | def compute_accuracy_metrics(scores, labels, num_base, record={}, string_id=''):
39 | assert(isinstance(record, dict))
40 |
41 | if string_id != '':
42 | string_id = '_' + string_id
43 |
44 | if labels.dim() > 1:
45 | labels = labels.view(scores.size(0))
46 |
47 | if num_base > 0:
48 | record['AccuracyBoth' + string_id] = utils.top1accuracy(scores, labels)
49 | # scores = scores.cpu()
50 | # labels = labels.cpu()
51 |
52 | base_indices = torch.nonzero(labels < num_base).view(-1)
53 | novel_indices = torch.nonzero(labels >= num_base).view(-1)
54 | if base_indices.dim() != 0 and base_indices.size(0) > 0:
55 | scores_base = scores[base_indices][:, :num_base]
56 | labels_base = labels[base_indices]
57 | record['AccuracyBase' + string_id] = utils.top1accuracy(
58 | scores_base, labels_base)
59 |
60 | scores_novel = scores[novel_indices,:][:, num_base:]
61 | labels_novel = labels[novel_indices] - num_base
62 | record['AccuracyNovel' + string_id] = utils.top1accuracy(
63 | scores_novel, labels_novel)
64 | else:
65 | record['AccuracyNovel' + string_id] = utils.top1accuracy(scores, labels)
66 |
67 | return record
68 |
69 |
70 | def fewshot_classification(
71 | feature_extractor,
72 | feature_extractor_optimizer,
73 | classifier,
74 | classifier_optimizer,
75 | images_train,
76 | labels_train,
77 | labels_train_1hot,
78 | images_test,
79 | labels_test,
80 | is_train,
81 | base_ids=None,
82 | feature_name=None,
83 | classification_coef=1.0,
84 | reconstruction_coef=0.0):
85 |
86 | assert(images_train.dim() == 5)
87 | assert(images_test.dim() == 5)
88 | assert(images_train.size(0) == images_test.size(0))
89 | assert(images_train.size(2) == images_test.size(2))
90 | assert(images_train.size(3) == images_test.size(3))
91 | assert(images_train.size(4) == images_test.size(4))
92 | assert(labels_train.dim() == 2)
93 | assert(labels_test.dim() == 2)
94 | assert(labels_train.size(0) == labels_test.size(0))
95 | assert(labels_train.size(0) == images_train.size(0))
96 |
97 | if (feature_name and
98 | isinstance(feature_name, (list, tuple)) and
99 | len(feature_name) > 1):
100 | assert is_train is False
101 | assert reconstruction_coef == 0.0
102 | assert classification_coef == 1.0
103 | return fewshot_classification_multiple_features(
104 | feature_extractor=feature_extractor,
105 | feature_extractor_optimizer=feature_extractor_optimizer,
106 | classifier=classifier,
107 | classifier_optimizer=classifier_optimizer,
108 | images_train=images_train,
109 | labels_train=labels_train,
110 | labels_train_1hot=labels_train_1hot,
111 | images_test=images_test,
112 | labels_test=labels_test,
113 | is_train=is_train,
114 | base_ids=base_ids,
115 | feature_name=feature_name)
116 |
117 | meta_batch_size = images_train.size(0)
118 |
119 | if is_train: # zero the gradients
120 | if feature_extractor_optimizer:
121 | feature_extractor_optimizer.zero_grad()
122 | classifier_optimizer.zero_grad()
123 |
124 | record = {}
125 | with torch.no_grad():
126 | images_train = utils.convert_from_5d_to_4d(images_train)
127 | images_test = utils.convert_from_5d_to_4d(images_test)
128 | labels_test = labels_test.view(-1)
129 | batch_size_train = images_train.size(0)
130 | # batch_size_test = images_test.size(0)
131 | images = torch.cat([images_train, images_test], dim=0)
132 |
133 | train_feature_extractor = (
134 | is_train and (feature_extractor_optimizer is not None))
135 | with torch.set_grad_enabled(train_feature_extractor):
136 | # Extract features from the train and test images.
137 | features = cls_utils.extract_features(
138 | feature_extractor, images, feature_name=feature_name)
139 |
140 | if not train_feature_extractor:
141 | # Make sure that no gradients are backproagated to the feature
142 | # extractor when the feature extraction model is freezed.
143 | features = features.detach()
144 |
145 | with torch.set_grad_enabled(is_train):
146 | features_train = features[:batch_size_train]
147 | features_test = features[batch_size_train:]
148 | features_train = utils.add_dimension(features_train, meta_batch_size)
149 | features_test = utils.add_dimension(features_test, meta_batch_size)
150 |
151 | classification_scores, loss = few_shot_feature_classification(
152 | classifier, features_test, features_train, labels_train_1hot,
153 | labels_test, base_ids)
154 | record['loss'] = loss.item()
155 | loss_total = loss * classification_coef
156 |
157 | if is_train and (reconstruction_coef > 0.0):
158 | rec_loss = classifier.reconstruction_loss
159 | assert(rec_loss is not None)
160 | loss_total = loss_total + reconstruction_coef * rec_loss
161 | record['rec_loss'] = rec_loss.item()
162 | record['tot_loss'] = loss_total.item()
163 | #*******************************************************************
164 |
165 | with torch.no_grad():
166 | num_base = base_ids.size(1) if (base_ids is not None) else 0
167 | record = compute_accuracy_metrics(
168 | classification_scores, labels_test, num_base, record)
169 |
170 | if is_train:
171 | loss_total.backward()
172 | if feature_extractor_optimizer:
173 | feature_extractor_optimizer.step()
174 | classifier_optimizer.step()
175 |
176 | return record
177 |
178 |
179 | def fewshot_classification_multiple_features(
180 | feature_extractor,
181 | feature_extractor_optimizer,
182 | classifier,
183 | classifier_optimizer,
184 | images_train,
185 | labels_train,
186 | labels_train_1hot,
187 | images_test,
188 | labels_test,
189 | is_train,
190 | feature_name,
191 | base_ids=None):
192 |
193 | assert is_train is False
194 | assert feature_name and isinstance(feature_name, (list, tuple))
195 |
196 | meta_batch_size = images_train.size(0)
197 | num_base = base_ids.size(1) if (base_ids is not None) else 0
198 |
199 | record = {}
200 | with torch.no_grad():
201 | images_train = utils.convert_from_5d_to_4d(images_train)
202 | images_test = utils.convert_from_5d_to_4d(images_test)
203 | labels_test = labels_test.view(-1)
204 | batch_size_train = images_train.size(0)
205 | images = torch.cat([images_train, images_test], dim=0)
206 |
207 | with torch.set_grad_enabled(is_train):
208 | # Extract features from the train and test images.
209 | features = cls_utils.extract_features(
210 | feature_extractor, images, feature_name=feature_name)
211 | assert len(features) == len(feature_name)
212 |
213 | for i, feature_name_i in enumerate(feature_name):
214 | features_train = features[i][:batch_size_train]
215 | features_test = features[i][batch_size_train:]
216 | features_train = utils.add_dimension(
217 | features_train, meta_batch_size)
218 | features_test = utils.add_dimension(
219 | features_test, meta_batch_size)
220 |
221 | if isinstance(classifier, (list, tuple)):
222 | assert len(classifier) == len(feature_name)
223 | classifier_this = classifier[i]
224 | else:
225 | classifier_this = classifier
226 |
227 | classification_scores, loss = few_shot_feature_classification(
228 | classifier_this, features_test, features_train,
229 | labels_train_1hot, labels_test, base_ids)
230 | record['loss_'+feature_name_i] = loss.item()
231 |
232 | with torch.no_grad():
233 | record = compute_accuracy_metrics(
234 | classification_scores, labels_test, num_base, record,
235 | string_id=feature_name_i)
236 |
237 | return record
238 |
239 |
240 | def compute_95confidence_intervals(
241 | record,
242 | episode,
243 | num_episodes,
244 | store_accuracies,
245 | metrics=['AccuracyNovel',]):
246 |
247 | if episode==0:
248 | store_accuracies = {metric: [] for metric in metrics}
249 |
250 | for metric in metrics:
251 | store_accuracies[metric].append(record[metric])
252 | if episode == (num_episodes - 1):
253 | # Compute std and confidence interval of the 'metric' accuracies.
254 | accuracies = np.array(store_accuracies[metric])
255 | stds = np.std(accuracies, 0)
256 | record[metric + '_std'] = stds
257 | record[metric + '_cnf'] = 1.96*stds/np.sqrt(num_episodes)
258 |
259 | return record, store_accuracies
260 |
261 |
262 | def compute_weight_orthogonality_loss(cls_weights):
263 |
264 | nKall = cls_weights.size(1)
265 | device = 'cuda' if cls_weights.is_cuda else 'cpu'
266 | orthogonality_loss = torch.add(
267 | torch.bmm(cls_weights, cls_weights.transpose(1,2)),
268 | -torch.eye(nKall).to(device).view(1, nKall, nKall)).abs().mean()
269 |
270 | return orthogonality_loss
271 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/algorithms/utils/save_features.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import os
4 |
5 | import h5py
6 | import numpy as np
7 | import torch
8 | from tqdm import tqdm
9 |
10 | import low_shot_learning.utils as utils
11 | import low_shot_learning.architectures.tools as tools
12 | import low_shot_learning.algorithms.algorithm as algorithm
13 | import low_shot_learning.algorithms.classification.utils as cls_utils
14 |
15 |
16 | class SaveFeatures(algorithm.Algorithm):
17 | def __init__(self, opt):
18 | if 'classifier' in opt['networks']:
19 | del opt['networks']['classifier']
20 |
21 | super().__init__(opt)
22 |
23 | def allocate_tensors(self):
24 | self.tensors = {}
25 | self.tensors['images'] = torch.FloatTensor()
26 | self.tensors['labels'] = torch.LongTensor()
27 |
28 | def set_tensors(self, batch):
29 | assert len(batch) == 2
30 | images, labels = batch
31 | self.tensors['images'].resize_(images.size()).copy_(images)
32 | self.tensors['labels'].resize_(labels.size()).copy_(labels)
33 |
34 | def save_features(
35 | self,
36 | dataloader,
37 | filename,
38 | feature_name=None,
39 | global_pooling=True):
40 | """Saves features and labels for each image in the dataloader.
41 |
42 | This routines uses the trained feature model (i.e.,
43 | self.networks['feature_extractor']) in order to extract a feature for each
44 | image in the dataloader. The extracted features along with the labels
45 | of the images that they come from are saved in a h5py file.
46 |
47 | Args:
48 | dataloader: A dataloader that feeds images and labels.
49 | filename: The file name where the features and the labels of each
50 | images in the dataloader are saved.
51 | """
52 |
53 | if isinstance(feature_name, (list, tuple)):
54 | assert len(feature_name) == 1
55 |
56 | feature_extractor = self.networks['feature_extractor']
57 | feature_extractor.eval()
58 |
59 | self.dloader = dataloader
60 | dataloader_iterator = dataloader.get_iterator()
61 |
62 | self.logger.info(
63 | 'Destination filename for features: {0}'.format(filename))
64 |
65 | data_file = h5py.File(filename, 'w')
66 | max_count = len(dataloader_iterator) * dataloader_iterator.batch_size
67 | all_labels = data_file.create_dataset(
68 | 'all_labels', (max_count,), dtype='i')
69 | all_features = None
70 |
71 | count = 0
72 | for i, batch in enumerate(tqdm(dataloader_iterator)):
73 | with torch.no_grad():
74 | self.set_tensors(batch)
75 | images = self.tensors['images'].detach()
76 | labels = self.tensors['labels'].detach()
77 | assert images.dim()==4
78 | assert labels.dim()==1
79 |
80 | features = cls_utils.extract_features(
81 | feature_extractor, images, feature_name=feature_name)
82 |
83 | if global_pooling and features.dim() == 4:
84 | features = tools.global_pooling(features, pool_type='avg')
85 | features = features.view(features.size(0), -1)
86 | assert(features.dim()==2)
87 |
88 | if all_features is None:
89 | self.logger.info('Image size: {0}'.format(images.size()))
90 | self.logger.info('Feature size: {0}'.format(features.size()))
91 | self.logger.info('Max_count: {0}'.format(max_count))
92 | all_features = data_file.create_dataset(
93 | 'all_features', (max_count, features.size(1)), dtype='f')
94 | self.logger.info('Number of feature channels: {0}'.format(
95 | features.size(1)))
96 |
97 | all_features[count:(count + features.size(0)), :] = (
98 | features.cpu().numpy())
99 | all_labels[count:(count + features.size(0))] = labels.cpu().numpy()
100 | count = count + features.size(0)
101 |
102 | self.logger.info('Number of processed primages: {0}'.format(count))
103 |
104 | count_var = data_file.create_dataset('count', (1,), dtype='i')
105 | count_var[0] = count
106 | data_file.close()
107 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/architectures/__init__.py:
--------------------------------------------------------------------------------
1 | from importlib import import_module
2 |
3 |
4 |
5 | def factory(architecture_name, *args, **kwargs):
6 | architecture_module = import_module(
7 | '.architectures.' + architecture_name, package='low_shot_learning')
8 | create_model = getattr(architecture_module, 'create_model')
9 | return create_model(*args, **kwargs)
10 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/architectures/classifiers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/baselines/wDAEGNN/low_shot_learning/architectures/classifiers/__init__.py
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/architectures/classifiers/cosine_classifier_with_DAE_weight_generator.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn.functional as F
3 |
4 | import low_shot_learning.architectures.classifiers.utils as cutils
5 | from low_shot_learning.architectures.classifiers.cosine_classifier_with_weight_generator import \
6 | CosineClassifierWithWeightGeneration
7 | from low_shot_learning.architectures.classifiers.weights_denoising_autoencoder import WeightsDAE
8 |
9 |
10 | def reconstruction_loss(outputs, targets):
11 | # Both outputs and targets have shape:
12 | # [batch_size x num_nodes x num_features]
13 | assert outputs.dim() == 3
14 | assert targets.dim() == 3
15 | assert outputs.size() == targets.size()
16 |
17 | # Since we use cosine classifier the weights must be L_2 normalized.
18 | targets = F.normalize(targets, p=2, dim=targets.dim()-1, eps=1e-12)
19 | outputs = F.normalize(outputs, p=2, dim=outputs.dim()-1, eps=1e-12)
20 | # return the L2 squared loss (averaged over the first 2 dimensions, i.e.,
21 | # batch_size and num_nodes).
22 | return (targets - outputs).pow(2).mean() * outputs.size(2)
23 |
24 |
25 | class CosineClassifierWithDAEWeightGeneration(CosineClassifierWithWeightGeneration):
26 | def __init__(
27 | self,
28 | dae_config,
29 | num_features,
30 | num_classes,
31 | global_pooling,
32 | scale_cls=10.0,
33 | learn_scale=True):
34 |
35 | super(CosineClassifierWithDAEWeightGeneration, self).__init__(
36 | num_features, num_classes, global_pooling, scale_cls, learn_scale)
37 |
38 | self.targets_as_input = (
39 | dae_config['targets_as_input']
40 | if ('targets_as_input' in dae_config) else False)
41 |
42 | self.comp_reconstruction_loss = (
43 | dae_config['comp_reconstruction_loss']
44 | if ('comp_reconstruction_loss' in dae_config) else True)
45 |
46 | self.weights_dae_generator = WeightsDAE(dae_config)
47 |
48 | def get_classification_weights(
49 | self, base_ids, features_train=None, labels_train=None):
50 | """Gets the classification weights of the base and novel categories.
51 |
52 | This routine returns the classification weight of the base and novel
53 | classes. The latter are returned only if the input arguments
54 | features_train and labels_train are not None.
55 |
56 | Args:
57 | base_ids: A 2D tensor with shape [meta_batch_size x num_base] that
58 | for each training episode in the the batch it includes the
59 | indices of the base categories that are being used.
60 | `meta_batch_size` is the number of training episodes in the
61 | batch and `num_base` is the number of base classes.
62 | features_train: A 3D tensor with shape
63 | [meta_batch_size x num_train_examples x num_channels] that
64 | represents the `num_channels`-dimensional feature vectors of the
65 | training examples of each training episode in the batch.
66 | `num_train_examples` is the number of train examples in each
67 | training episode. Those training examples are from the novel
68 | classes.
69 | labels_train: A 3D tensor with shape
70 | [meta_batch_size x num_train_examples x num_novel] that
71 | represents the labels (encoded as 1-hot vectors of lenght
72 | num_novel) of the training examples of each training episode in
73 | the batch. `num_novel` is the number of novel classes.
74 |
75 | Returns:
76 | classification_weights: A 3D tensor of shape
77 | [meta_batch_size x num_classes x num_channels]
78 | that includes the `num_channels`-dimensional classification
79 | weight vectors of the classes involved on each training episode
80 | in the batch. If the training data for the novel classes are not
81 | provided (i.e., features_train or labels_train are None) then
82 | classification_weights includes only the classification
83 | weights of the base classes; in this case num_channels is equal
84 | to `num_base`. Otherwise, classification_weights includes the
85 | classification weight vectors of both base and novel classses;
86 | in this case `num_classes` is equal to `num_base` + `num_novel`.
87 | """
88 |
89 | #***********************************************************************
90 | #******** Get the classification weights for the base categories *******
91 | meta_batch_size, num_base = base_ids.size()
92 | weight_base = self.weight_base[base_ids.view(-1)]
93 | weight_base = weight_base.view(meta_batch_size, num_base, -1)
94 | self.num_base = num_base
95 |
96 | #***********************************************************************
97 | if features_train is None or labels_train is None:
98 | # If training data for the novel categories are not provided then
99 | # return only the classification weights of the base categories.
100 | return weight_base
101 |
102 | num_novel = labels_train.size(2)
103 |
104 | if features_train.dim() == 5:
105 | features_train = cutils.preprocess_5D_features(
106 | features_train, self.global_pooling)
107 | assert features_train.dim() == 3
108 | assert features_train.size(2) == self.num_features
109 | features_train = F.normalize(features_train, p=2, dim=2, eps=1e-12)
110 |
111 | #***********************************************************************
112 | #******* Generate classification weights for base & novel classes ******
113 | weight_base = weight_base.detach()
114 |
115 | if ((self.targets_as_input or self.comp_reconstruction_loss) and
116 | self.training):
117 | novel_ids = self._novel_ids
118 | assert novel_ids.size(1) == num_novel
119 | weight_novel_target = self.weight_base[novel_ids.view(-1)].detach()
120 | weight_novel_target = weight_novel_target.view(
121 | meta_batch_size, num_novel, self.num_features)
122 |
123 | if self.targets_as_input and self.training:
124 | weight_novel = weight_novel_target
125 | else:
126 | # Estimate the initial classification weights for the novel classes
127 | # by computing the average of the feature vectors of their training
128 | # examples.
129 | weight_novel = cutils.average_train_features(
130 | features_train, labels_train)
131 |
132 | input_weights = torch.cat([weight_base, weight_novel], dim=1)
133 | # Since we use cosine classifier the weights must be L_2 normalized.
134 | # input_weights = F.normalize(input_weights, p=2, dim=2, eps=1e-12)
135 |
136 | output_weights = self.weights_dae_generator(input_weights)
137 | #***********************************************************************
138 |
139 | if self.training and self.comp_reconstruction_loss:
140 | targets_weights = torch.cat([weight_base, weight_novel_target], 1)
141 | self.reconstruction_loss = reconstruction_loss(
142 | output_weights, targets_weights)
143 | else:
144 | self.reconstruction_loss = None
145 |
146 | return output_weights
147 |
148 |
149 | def create_model(opt):
150 | return CosineClassifierWithDAEWeightGeneration(
151 | dae_config=opt['dae_config'],
152 | num_features=opt['num_features'],
153 | num_classes=opt['num_classes'],
154 | global_pooling=opt['global_pooling'],
155 | scale_cls=opt['scale_cls'],
156 | learn_scale=(opt['learn_scale'] if ('learn_scale' in opt) else True))
157 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/architectures/classifiers/few_shot_classification_with_prototypes.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | import low_shot_learning.architectures.classifiers.utils as cutils
6 | import low_shot_learning.architectures.tools as tools
7 |
8 |
9 | class FewShotClassifierWithPrototypes(nn.Module):
10 | def __init__(self, global_pooling, scale_cls=10.0, learn_scale=True):
11 | super(FewShotClassifierWithPrototypes, self).__init__()
12 |
13 | self.global_pooling = global_pooling
14 | self.scale_cls = nn.Parameter(
15 | torch.FloatTensor(1).fill_(scale_cls), requires_grad=learn_scale)
16 |
17 | def forward(self, features_test, features_train, labels_train):
18 |
19 | #******* Generate classification weights for the novel categories ******
20 | if features_train.dim() == 5:
21 | features_train = cutils.preprocess_5D_features(
22 | features_train, self.global_pooling)
23 | assert(features_train.dim() == 3)
24 |
25 | meta_batch_size = features_train.size(0)
26 | num_novel = labels_train.size(2)
27 | features_train = F.normalize(features_train, p=2, dim=2, eps=1e-12)
28 | classification_weights = cutils.average_train_features(
29 | features_train, labels_train)
30 | classification_weights = classification_weights.view(
31 | meta_batch_size, num_novel, -1)
32 | #***********************************************************************
33 |
34 | if features_test.dim() == 5:
35 | features_test = cutils.preprocess_5D_features(
36 | features_test, self.global_pooling)
37 | assert(features_test.dim() == 3)
38 |
39 | classification_scores = tools.batch_cosine_fully_connected_layer(
40 | features_test, classification_weights.transpose(1,2),
41 | scale=self.scale_cls)
42 |
43 | return classification_scores
44 |
45 |
46 | def create_model(opt):
47 | return FewShotClassifierWithPrototypes(
48 | global_pooling=opt['global_pooling'],
49 | scale_cls=opt['scale_cls'],
50 | learn_scale=opt['learn_scale'])
51 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/architectures/classifiers/matching_network_head.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class MatchingNetworkHead(nn.Module):
7 | def __init__(self, opt):
8 | super(MatchingNetworkHead, self).__init__()
9 | scale_cls = opt['scale_cls'] if ('scale_cls' in opt) else 10.0
10 | self.scale_cls = nn.Parameter(
11 | torch.FloatTensor(1).fill_(scale_cls), requires_grad=True)
12 |
13 | def forward(self, features_test, features_train, labels_train):
14 | """Recognize novel categories based on the Matching Nets approach.
15 |
16 | Classify the test examples (i.e., `features_test`) using the available
17 | training examples (i.e., `features_test` and `labels_train`) using the
18 | Matching Nets approach.
19 |
20 | Args:
21 | features_test: A 3D tensor with shape
22 | [batch_size x num_test_examples x num_channels] that represents
23 | the test features of each training episode in the batch.
24 | features_train: A 3D tensor with shape
25 | [batch_size x num_train_examples x num_channels] that represents
26 | the train features of each training episode in the batch.
27 | labels_train: A 3D tensor with shape
28 | [batch_size x num_train_examples x nKnovel] that represents
29 | the train labels (encoded as 1-hot vectors) of each training
30 | episode in the batch.
31 |
32 | Return:
33 | scores_cls: A 3D tensor with shape
34 | [batch_size x num_test_examples x nKnovel] that represents the
35 | classification scores of the test feature vectors for the
36 | nKnovel novel categories.
37 | """
38 | assert features_train.dim() == 3
39 | assert labels_train.dim() == 3
40 | assert features_test.dim() == 3
41 | assert features_train.size(0) == labels_train.size(0)
42 | assert features_train.size(0) == features_test.size(0)
43 | assert features_train.size(1) == labels_train.size(1)
44 | assert features_train.size(2) == features_test.size(2)
45 |
46 | batch_size, num_test_examples, num_channels = features_test.size()
47 | num_train_examples = features_train.size(1)
48 | nKnovel = labels_train.size(2)
49 |
50 | # L2 normalize the feature vectors.
51 | features_test = F.normalize(
52 | features_test, p=2, dim=features_test.dim()-1, eps=1e-12)
53 | features_train = F.normalize(
54 | features_train, p=2, dim=features_train.dim()-1, eps=1e-12)
55 |
56 | # Compute the cosine similrity of the test features with the train
57 | # features. The shape of the cosine similarities tensor is:
58 | # [batch_size x num_test_examples x num_train_examples]
59 | cosine_similarities = self.scale_cls * torch.bmm(
60 | features_test, features_train.transpose(1,2))
61 | # Apply the softmax operator over the images.
62 | cosine_scores = F.softmax(cosine_similarities, dim=2)
63 |
64 | # Accumulate cosine_scores accross images of the same novel category and
65 | # compute the final classification scores for all the novel categories.
66 | scores_cls = torch.bmm(cosine_scores, labels_train)
67 | scores_cls = torch.log(torch.clamp(scores_cls, min=1e-7))
68 |
69 | return scores_cls
70 |
71 | def create_model(opt):
72 | return MatchingNetworkHead(opt)
73 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/architectures/classifiers/prototypical_network_head.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | def L2SquareDist(A, B, average=True):
7 | # input A must be: [nB x Na x nC]
8 | # input B must be: [nB x Nb x nC]
9 | # output C will be: [nB x Na x Nb]
10 | assert A.dim()==3
11 | assert B.dim()==3
12 | assert A.size(0)==B.size(0) and A.size(2)==B.size(2)
13 | nB = A.size(0)
14 | Na = A.size(1)
15 | Nb = B.size(1)
16 | nC = A.size(2)
17 |
18 | # AB = A * B = [nB x Na x nC] * [nB x nC x Nb] = [nB x Na x Nb]
19 | AB = torch.bmm(A, B.transpose(1,2))
20 |
21 | AA = (A * A).sum(dim=2,keepdim=True).view(nB, Na, 1) # [nB x Na x 1]
22 | BB = (B * B).sum(dim=2,keepdim=True).view(nB, 1, Nb) # [nB x 1 x Nb]
23 | # l2squaredist = A*A + B*B - 2 * A * B
24 | dist = AA.expand_as(AB) + BB.expand_as(AB) - 2 * AB
25 | if average:
26 | dist = dist / nC
27 |
28 | return dist
29 |
30 |
31 | def cosine_similarity(A, B):
32 | # input A must be: [nB x Na x nC]
33 | # input B must be: [nB x Nb x nC]
34 | # output C will be: [nB x Na x Nb]
35 | return torch.bmm(A, B.transpose(1,2))
36 |
37 |
38 | class PrototypicalNetworkHead(nn.Module):
39 | def __init__(self, opt):
40 | super(PrototypicalNetworkHead, self).__init__()
41 | scale_cls = opt['scale_cls'] if ('scale_cls' in opt) else 1.0
42 | tune_scale = opt['tune_scale'] if ('tune_scale' in opt) else True
43 | self.type = opt['type'] if ('type' in opt) else 'euclidean'
44 | assert self.type in ('euclidean', 'cosine')
45 | self.scale_cls = nn.Parameter(
46 | torch.FloatTensor(1).fill_(scale_cls), requires_grad=tune_scale)
47 |
48 | def forward(self, features_test, features_train, labels_train):
49 | """Recognize novel categories based on the Prototypical Nets approach.
50 |
51 | Classify the test examples (i.e., `features_test`) using the available
52 | training examples (i.e., `features_test` and `labels_train`) using the
53 | Prototypical Nets approach.
54 |
55 | Args:
56 | features_test: A 3D tensor with shape
57 | [batch_size x num_test_examples x num_channels] that represents
58 | the test features of each training episode in the batch.
59 | features_train: A 3D tensor with shape
60 | [batch_size x num_train_examples x num_channels] that represents
61 | the train features of each training episode in the batch.
62 | labels_train: A 3D tensor with shape
63 | [batch_size x num_train_examples x nKnovel] that represents
64 | the train labels (encoded as 1-hot vectors) of each training
65 | episode in the batch.
66 |
67 | Return:
68 | scores_cls: A 3D tensor with shape
69 | [batch_size x num_test_examples x nKnovel] that represents the
70 | classification scores of the test feature vectors for the
71 | nKnovel novel categories.
72 | """
73 | assert features_train.dim() == 3
74 | assert labels_train.dim() == 3
75 | assert features_test.dim() == 3
76 | assert features_train.size(0) == labels_train.size(0)
77 | assert features_train.size(0) == features_test.size(0)
78 | assert features_train.size(1) == labels_train.size(1)
79 | assert features_train.size(2) == features_test.size(2)
80 |
81 | #************************* Compute Prototypes **************************
82 | labels_train_transposed = labels_train.transpose(1,2)
83 | # Batch matrix multiplication:
84 | # prototypes = labels_train_transposed * features_train ==>
85 | # [batch_size x nKnovel x num_channels] =
86 | # [batch_size x nKnovel x num_train_examples] * [batch_size * num_train_examples * num_channels]
87 | if self.type == 'cosine':
88 | features_test = F.normalize(
89 | features_test, p=2, dim=features_test.dim()-1, eps=1e-12)
90 | features_train = F.normalize(
91 | features_train, p=2, dim=features_train.dim()-1, eps=1e-12)
92 |
93 | prototypes = torch.bmm(labels_train_transposed, features_train)
94 | # Divide with the number of examples per novel category.
95 | prototypes = prototypes.div(
96 | labels_train_transposed.sum(dim=2, keepdim=True).expand_as(
97 | prototypes))
98 | #***********************************************************************
99 | if self.type == 'cosine':
100 | prototypes = F.normalize(
101 | prototypes, p=2, dim=prototypes.dim()-1, eps=1e-12)
102 | scores_cls = self.scale_cls * cosine_similarity(
103 | features_test, prototypes)
104 | else:
105 | scores_cls = -self.scale_cls * L2SquareDist(
106 | features_test, prototypes)
107 |
108 | return scores_cls
109 |
110 |
111 | def create_model(opt):
112 | return PrototypicalNetworkHead(opt)
113 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/architectures/classifiers/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import torch.nn as nn
4 |
5 | import low_shot_learning.architectures.tools as tools
6 |
7 |
8 | class CosineClassifier(nn.Module):
9 | def __init__(
10 | self,
11 | num_channels,
12 | num_classes,
13 | scale=20.0,
14 | learn_scale=False,
15 | bias=False):
16 | super(CosineClassifier, self).__init__()
17 |
18 | self.num_channels = num_channels
19 | self.num_classes = num_classes
20 |
21 | weight = torch.FloatTensor(num_classes, num_channels).normal_(
22 | 0.0, np.sqrt(2.0/num_channels))
23 | self.weight = nn.Parameter(weight, requires_grad=True)
24 |
25 | if bias:
26 | bias = torch.FloatTensor(num_classes).fill_(0.0)
27 | self.bias = nn.Parameter(bias, requires_grad=True)
28 | else:
29 | self.bias = None
30 |
31 | scale_cls = torch.FloatTensor(1).fill_(scale)
32 | self.scale_cls = nn.Parameter(scale_cls, requires_grad=learn_scale)
33 |
34 | def forward(self, x_in):
35 | assert x_in.dim() == 2
36 | return tools.cosine_fully_connected_layer(
37 | x_in, self.weight.t(), scale=self.scale_cls, bias=self.bias)
38 |
39 | def extra_repr(self):
40 | s = (
41 | 'num_channels={0}, num_classes={1}, scale_cls={2} (learnable={3})'
42 | .format(self.num_channels, self.num_classes, self.scale_cls.item(),
43 | self.scale_cls.requires_grad))
44 |
45 | if self.bias is None:
46 | s += ', bias=False'
47 | return s
48 |
49 |
50 | def average_train_features(features_train, labels_train):
51 | labels_train_transposed = labels_train.transpose(1,2)
52 | weight_novel = torch.bmm(labels_train_transposed, features_train)
53 | weight_novel = weight_novel.div(
54 | labels_train_transposed.sum(dim=2, keepdim=True).expand_as(
55 | weight_novel))
56 |
57 | return weight_novel
58 |
59 |
60 | class FeatExemplarAvgBlock(nn.Module):
61 | def __init__(self):
62 | super(FeatExemplarAvgBlock, self).__init__()
63 |
64 | def forward(self, features_train, labels_train):
65 | return average_train_features(features_train, labels_train)
66 |
67 |
68 | def preprocess_5D_features(features, global_pooling):
69 | meta_batch_size, num_examples, channels, height, width = features.size()
70 | features = features.view(
71 | meta_batch_size * num_examples, channels, height, width)
72 |
73 | if global_pooling:
74 | features = tools.global_pooling(features, pool_type='avg')
75 |
76 | features = features.view(meta_batch_size, num_examples, -1)
77 |
78 | return features
79 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/architectures/feature_extractors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/baselines/wDAEGNN/low_shot_learning/architectures/feature_extractors/__init__.py
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/architectures/feature_extractors/dumb_feat.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 |
5 | class DumbFeat(nn.Module):
6 | def __init__(self, dropout):
7 | super(DumbFeat, self).__init__()
8 |
9 | if dropout > 0.0:
10 | self.dropout = torch.nn.Dropout(p=dropout, inplace=False)
11 | else:
12 | self.dropout = None
13 |
14 | def forward(self, x):
15 |
16 | if x.dim() > 2:
17 | x = x.view(x.size(0), -1)
18 | assert(x.dim()==2)
19 |
20 | if self.dropout is not None:
21 | x = self.dropout(x)
22 |
23 | return x
24 |
25 |
26 | def create_model(opt):
27 | dropout = opt['dropout'] if ('dropout' in opt) else 0.0
28 | return DumbFeat(dropout=dropout)
29 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/architectures/feature_extractors/resnet_feat.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import math
4 |
5 |
6 | def init_layer(L):
7 | # Initialization using fan-in
8 | if isinstance(L, nn.Conv2d):
9 | n = L.kernel_size[0]*L.kernel_size[1]*L.out_channels
10 | L.weight.data.normal_(0,math.sqrt(2.0/float(n)))
11 | elif isinstance(L, nn.BatchNorm2d):
12 | L.weight.data.fill_(1)
13 | L.bias.data.fill_(0)
14 |
15 |
16 | # Simple ResNet Block
17 | class SimpleBlock(nn.Module):
18 | def __init__(self, indim, outdim, half_res, userelu=True):
19 | super(SimpleBlock, self).__init__()
20 | self.indim = indim
21 | self.outdim = outdim
22 | self.C1 = nn.Conv2d(indim, outdim, kernel_size=3, stride=2 if half_res else 1, padding=1, bias=False)
23 | self.relu1 = nn.ReLU(inplace=True)
24 | self.userelu = userelu
25 | self.relu2 = nn.ReLU(inplace=True) if userelu else None
26 | self.BN1 = nn.BatchNorm2d(outdim)
27 | self.C2 = nn.Conv2d(outdim, outdim,kernel_size=3, padding=1,bias=False)
28 | self.BN2 = nn.BatchNorm2d(outdim)
29 |
30 | self.parametrized_layers = [self.C1, self.C2, self.BN1, self.BN2]
31 |
32 | self.half_res = half_res
33 |
34 | # if the input number of channels is not equal to the output, then need a 1x1 convolution
35 | if indim!=outdim:
36 | self.shortcut = nn.Conv2d(indim, outdim, 1, 2 if half_res else 1, bias=False)
37 | self.parametrized_layers.append(self.shortcut)
38 | self.BNshortcut = nn.BatchNorm2d(outdim)
39 | self.parametrized_layers.append(self.BNshortcut)
40 | self.shortcut_type = '1x1'
41 | else:
42 | self.shortcut_type = 'identity'
43 |
44 | for layer in self.parametrized_layers:
45 | init_layer(layer)
46 |
47 | def forward(self, x):
48 | out = self.C1(x)
49 | out = self.BN1(out)
50 | out = self.relu1(out)
51 | out = self.C2(out)
52 | out = self.BN2(out)
53 | short_out = x if self.shortcut_type == 'identity' else self.BNshortcut(self.shortcut(x))
54 | out = out + short_out
55 | if self.userelu: out = self.relu2(out)
56 | return out
57 |
58 |
59 |
60 | # Bottleneck block
61 | class BottleneckBlock(nn.Module):
62 | def __init__(self, indim, outdim, half_res, userelu=True):
63 | super(BottleneckBlock, self).__init__()
64 | bottleneckdim = int(outdim/4)
65 | self.indim = indim
66 | self.outdim = outdim
67 | self.C1 = nn.Conv2d(indim, bottleneckdim, kernel_size=1, bias=False)
68 | self.relu = nn.ReLU()
69 | self.BN1 = nn.BatchNorm2d(bottleneckdim)
70 | self.C2 = nn.Conv2d(bottleneckdim, bottleneckdim, kernel_size=3, stride=2 if half_res else 1,padding=1)
71 | self.BN2 = nn.BatchNorm2d(bottleneckdim)
72 | self.C3 = nn.Conv2d(bottleneckdim, outdim, kernel_size=1, bias=False)
73 | self.BN3 = nn.BatchNorm2d(outdim)
74 |
75 | self.parametrized_layers = [self.C1, self.BN1, self.C2, self.BN2, self.C3, self.BN3]
76 | self.half_res = half_res
77 |
78 | self.userelu = userelu
79 |
80 | # if the input number of channels is not equal to the output, then need a 1x1 convolution
81 | if indim!=outdim:
82 | self.shortcut = nn.Conv2d(indim, outdim, 1, stride=2 if half_res else 1, bias=False)
83 | self.parametrized_layers.append(self.shortcut)
84 | self.shortcut_type = '1x1'
85 | else:
86 | self.shortcut_type = 'identity'
87 |
88 | for layer in self.parametrized_layers:
89 | init_layer(layer)
90 |
91 |
92 | def forward(self, x):
93 | short_out = x if self.shortcut_type == 'identity' else self.shortcut(x)
94 | out = self.C1(x)
95 | out = self.BN1(out)
96 | out = self.relu(out)
97 | out = self.C2(out)
98 | out = self.BN2(out)
99 | out = self.relu(out)
100 | out = self.C3(out)
101 | out = self.BN3(out)
102 | out = out + short_out
103 |
104 | if self.userelu: out = self.relu(out)
105 | return out
106 |
107 |
108 | class ResNet(nn.Module):
109 | def __init__(self,block,list_of_num_layers, list_of_out_dims, userelu=True):
110 | # list_of_num_layers specifies number of layers in each stage
111 | # list_of_out_dims specifies number of output channel for each stage
112 | super(ResNet,self).__init__()
113 | self.grads = []
114 | self.fmaps = []
115 | assert len(list_of_num_layers)==4, 'Can have only four stages'
116 | conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
117 | bias=False)
118 | bn1 = nn.BatchNorm2d(64)
119 | relu = nn.ReLU()
120 | pool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
121 |
122 | init_layer(conv1)
123 | init_layer(bn1)
124 |
125 |
126 | trunk = [conv1, bn1, relu, pool1]
127 | indim = 64
128 | for i in range(4):
129 |
130 | for j in range(list_of_num_layers[i]):
131 | half_res = (i>=1) and (j==0)
132 | is_last_layer = (i==3) and (j==list_of_num_layers[i]-1)
133 | userelu_here = userelu if is_last_layer else True
134 | B = block(indim, list_of_out_dims[i], half_res, userelu=userelu_here)
135 | trunk.append(B)
136 | indim = list_of_out_dims[i]
137 | trunk.append(nn.AvgPool2d(7))
138 | self.trunk = nn.Sequential(*trunk)
139 |
140 | self.final_feat_dim = indim
141 |
142 | def forward(self,x):
143 | out = self.trunk(x)
144 | out = out.view(out.size(0),-1)
145 | return out
146 |
147 |
148 | def create_model(opt):
149 | restype = opt['restype']
150 | assert(restype=='ResNet10')
151 | userelu = opt['userelu']
152 | return ResNet(SimpleBlock, [1,1,1,1], [64,128,256,512], userelu=userelu)
153 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/architectures/feature_extractors/utils.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 |
3 |
4 | class SequentialFeatureExtractorAbstractClass(nn.Module):
5 | def __init__(self, all_feat_names, feature_blocks):
6 | super(SequentialFeatureExtractorAbstractClass, self).__init__()
7 |
8 | assert(isinstance(feature_blocks, list))
9 | assert(isinstance(all_feat_names, list))
10 | assert(len(all_feat_names) == len(feature_blocks))
11 |
12 | self.all_feat_names = all_feat_names
13 | self._feature_blocks = nn.ModuleList(feature_blocks)
14 |
15 |
16 | def _parse_out_keys_arg(self, out_feat_keys):
17 | # By default return the features of the last layer / module.
18 | out_feat_keys = (
19 | [self.all_feat_names[-1],] if out_feat_keys is None else
20 | out_feat_keys)
21 |
22 | if len(out_feat_keys) == 0:
23 | raise ValueError('Empty list of output feature keys.')
24 |
25 | for f, key in enumerate(out_feat_keys):
26 | if key not in self.all_feat_names:
27 | raise ValueError(
28 | 'Feature with name {0} does not exist. '
29 | 'Existing features: {1}.'.format(key, self.all_feat_names))
30 | elif key in out_feat_keys[:f]:
31 | raise ValueError(
32 | 'Duplicate output feature key: {0}.'.format(key))
33 |
34 | # Find the highest output feature in `out_feat_keys
35 | max_out_feat = max(
36 | [self.all_feat_names.index(key) for key in out_feat_keys])
37 |
38 | return out_feat_keys, max_out_feat
39 |
40 | def forward(self, x, out_feat_keys=None):
41 | """Forward the image `x` through the network and output the asked features.
42 | Args:
43 | x: input image.
44 | out_feat_keys: a list/tuple with the feature names of the features
45 | that the function should return. If out_feat_keys is None (
46 | DEFAULT) then the last feature of the network is returned.
47 |
48 | Return:
49 | out_feats: If multiple output features were asked then `out_feats`
50 | is a list with the asked output features placed in the same
51 | order as in `out_feat_keys`. If a single output feature was
52 | asked then `out_feats` is that output feature (and not a list).
53 | """
54 | out_feat_keys, max_out_feat = self._parse_out_keys_arg(out_feat_keys)
55 | out_feats = [None] * len(out_feat_keys)
56 |
57 | feat = x
58 | for f in range(max_out_feat+1):
59 | feat = self._feature_blocks[f](feat)
60 | key = self.all_feat_names[f]
61 | if key in out_feat_keys:
62 | out_feats[out_feat_keys.index(key)] = feat
63 |
64 | out_feats = (out_feats[0] if len(out_feats) == 1 else out_feats)
65 |
66 | return out_feats
67 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/architectures/feature_extractors/wide_resnet.py:
--------------------------------------------------------------------------------
1 | import math
2 | import torch
3 | import torch.nn as nn
4 |
5 | import low_shot_learning.architectures.feature_extractors.utils as utils
6 | import low_shot_learning.architectures.tools as tools
7 |
8 |
9 | class BasicBlock(nn.Module):
10 | def __init__(self, in_planes, out_planes, stride, dropRate=0.0):
11 | super(BasicBlock, self).__init__()
12 |
13 | self.equalInOut = (in_planes == out_planes and stride == 1)
14 |
15 | self.convResidual = nn.Sequential()
16 |
17 | if self.equalInOut:
18 | self.convResidual.add_module('bn1', nn.BatchNorm2d(in_planes))
19 | self.convResidual.add_module('relu1', nn.ReLU(inplace=True))
20 | self.convResidual.add_module(
21 | 'conv1',
22 | nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
23 | padding=1, bias=False))
24 |
25 | self.convResidual.add_module('bn2', nn.BatchNorm2d(out_planes))
26 | self.convResidual.add_module('relu2', nn.ReLU(inplace=True))
27 | self.convResidual.add_module(
28 | 'conv2',
29 | nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1,
30 | padding=1, bias=False))
31 |
32 | if dropRate > 0:
33 | self.convResidual.add_module('dropout', nn.Dropout(p=dropRate))
34 |
35 | if self.equalInOut:
36 | self.convShortcut = nn.Sequential()
37 | else:
38 | self.convShortcut = nn.Conv2d(
39 | in_planes, out_planes, kernel_size=1, stride=stride,
40 | padding=0, bias=False)
41 |
42 | def forward(self, x):
43 | return self.convShortcut(x) + self.convResidual(x)
44 |
45 |
46 | class NetworkBlock(nn.Module):
47 | def __init__(
48 | self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0):
49 | super(NetworkBlock, self).__init__()
50 |
51 | self.layer = self._make_layer(
52 | block, in_planes, out_planes, nb_layers, stride, dropRate)
53 |
54 | def _make_layer(
55 | self, block, in_planes, out_planes, nb_layers, stride, dropRate):
56 |
57 | layers = []
58 | for i in range(nb_layers):
59 | in_planes_arg = i == 0 and in_planes or out_planes
60 | stride_arg = i == 0 and stride or 1
61 | layers.append(
62 | block(in_planes_arg, out_planes, stride_arg, dropRate))
63 |
64 | return nn.Sequential(*layers)
65 |
66 | def forward(self, x):
67 | return self.layer(x)
68 |
69 |
70 | class WideResnet(utils.SequentialFeatureExtractorAbstractClass):
71 | def __init__(
72 | self,
73 | depth,
74 | widen_factor=1,
75 | dropRate=0.0,
76 | pool='avg',
77 | extra_block=False,
78 | block_strides=[2, 2, 2, 2]):
79 | nChannels = [16, 16*widen_factor, 32*widen_factor, 64*widen_factor]
80 | assert((depth - 4) % 6 == 0)
81 | n = int((depth - 4) / 6)
82 | block = BasicBlock
83 |
84 | all_feat_names = []
85 | feature_blocks = []
86 |
87 | # 1st conv before any network block
88 | conv1 = nn.Sequential()
89 | conv1.add_module(
90 | 'Conv',
91 | nn.Conv2d(3, nChannels[0], kernel_size=3, padding=1, bias=False))
92 | conv1.add_module('BN', nn.BatchNorm2d(nChannels[0]))
93 | conv1.add_module('ReLU', nn.ReLU(inplace=True))
94 | feature_blocks.append(conv1)
95 | all_feat_names.append('conv1')
96 |
97 | # 1st block.
98 | block1 = nn.Sequential()
99 | block1.add_module(
100 | 'Block',
101 | NetworkBlock(
102 | n, nChannels[0], nChannels[1], block,
103 | block_strides[0], dropRate))
104 | block1.add_module('BN', nn.BatchNorm2d(nChannels[1]))
105 | block1.add_module('ReLU', nn.ReLU(inplace=True))
106 | feature_blocks.append(block1)
107 | all_feat_names.append('block1')
108 |
109 | # 2nd block.
110 | block2 = nn.Sequential()
111 | block2.add_module(
112 | 'Block',
113 | NetworkBlock(
114 | n, nChannels[1], nChannels[2], block,
115 | block_strides[1], dropRate))
116 | block2.add_module('BN', nn.BatchNorm2d(nChannels[2]))
117 | block2.add_module('ReLU', nn.ReLU(inplace=True))
118 | feature_blocks.append(block2)
119 | all_feat_names.append('block2')
120 |
121 | # 3rd block.
122 | block3 = nn.Sequential()
123 | block3.add_module(
124 | 'Block',
125 | NetworkBlock(
126 | n, nChannels[2], nChannels[3], block,
127 | block_strides[2], dropRate))
128 | block3.add_module('BN', nn.BatchNorm2d(nChannels[3]))
129 | block3.add_module('ReLU', nn.ReLU(inplace=True))
130 | feature_blocks.append(block3)
131 | all_feat_names.append('block3')
132 |
133 | # extra block.
134 | if extra_block:
135 | block4 = nn.Sequential()
136 | block4.add_module(
137 | 'Block',
138 | NetworkBlock(
139 | n, nChannels[3], nChannels[3], block,
140 | block_strides[3], dropRate))
141 | block4.add_module('BN', nn.BatchNorm2d(nChannels[3]))
142 | block4.add_module('ReLU', nn.ReLU(inplace=True))
143 | feature_blocks.append(block4)
144 | all_feat_names.append('block4')
145 |
146 | # global average pooling and classifier_type
147 | assert(pool == 'none' or pool == 'avg' or pool == 'max')
148 | if pool == 'max' or pool == 'avg':
149 | feature_blocks.append(tools.GlobalPooling(pool_type=pool))
150 | all_feat_names.append('GlobalPooling')
151 |
152 | super(WideResnet, self).__init__(all_feat_names, feature_blocks)
153 |
154 | for m in self.modules():
155 | if isinstance(m, nn.Conv2d):
156 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
157 | m.weight.data.normal_(0, math.sqrt(2. / n))
158 | elif isinstance(m, nn.BatchNorm2d):
159 | m.weight.data.fill_(1)
160 | m.bias.data.zero_()
161 |
162 |
163 | def create_model(opt):
164 | depth = opt['depth']
165 | widen_factor = opt['widen_Factor']
166 | dropRate = opt['dropRate'] if ('dropRate' in opt) else 0.0
167 | pool = opt['pool'] if ('pool' in opt) else 'avg'
168 | extra_block = opt['extra_block'] if ('extra_block' in opt) else False
169 | block_strides = opt['strides'] if ('strides' in opt) else None
170 |
171 | if block_strides is None:
172 | block_strides = [2] * 4
173 |
174 | return WideResnet(
175 | depth, widen_factor, dropRate, pool, extra_block, block_strides)
176 |
177 |
178 | if __name__ == '__main__':
179 | opt = {}
180 | opt['depth'] = 28
181 | opt['widen_Factor'] = 10
182 | opt['dropRate'] = 0.0
183 | opt['extra_block'] = False
184 | opt['pool'] = 'none'
185 | model = create_model(opt)
186 | print(model)
187 |
188 | batch_size = 1
189 | image_size = 80
190 | img = torch.FloatTensor(batch_size, 3, image_size, image_size).normal_()
191 | features = model(img, model.all_feat_names)
192 | for feature, feature_name in zip(features, model.all_feat_names):
193 | print('Feature {0}: size {1}, mean {2}, std {3}'.format(
194 | feature_name, feature.size(), feature.mean().item(),
195 | feature.std().item()))
196 |
197 | count = 0
198 | for parameter in model.parameters():
199 | if parameter.requires_grad:
200 | count += parameter.numel()
201 |
202 | print(count)
203 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/architectures/tools.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 |
6 |
7 | class LinearDiag(nn.Module):
8 | def __init__(self, num_features, bias=False):
9 | super(LinearDiag, self).__init__()
10 | # initialize to the identity transform
11 | weight = torch.FloatTensor(num_features).fill_(1)
12 | self.weight = nn.Parameter(weight, requires_grad=True)
13 |
14 | if bias:
15 | bias = torch.FloatTensor(num_features).fill_(0)
16 | self.bias = nn.Parameter(bias, requires_grad=True)
17 | else:
18 | self.register_parameter('bias', None)
19 |
20 | def forward(self, X):
21 | assert(X.dim()==2 and X.size(1)==self.weight.size(0))
22 | out = X * self.weight.expand_as(X)
23 | if self.bias is not None:
24 | out = out + self.bias.expand_as(out)
25 | return out
26 |
27 |
28 | def cosine_fully_connected_layer(x_in, weight, scale=None, bias=None):
29 | assert(x_in.dim() == 2)
30 | assert(weight.dim() == 2)
31 | assert(x_in.size(1) == weight.size(0))
32 |
33 | x_in = F.normalize(x_in, p=2, dim=1, eps=1e-12)
34 | weight = F.normalize(weight, p=2, dim=0, eps=1e-12)
35 |
36 | x_out = torch.mm(x_in, weight)
37 |
38 | if scale is not None:
39 | x_out = x_out * scale.view(1, -1)
40 |
41 | if bias is not None:
42 | x_out = x_out + bias.view(1, -1)
43 |
44 | return x_out
45 |
46 |
47 | def batch_cosine_fully_connected_layer(x_in, weight, scale=None, bias=None):
48 | """
49 | Args:
50 | x_in: a 3D tensor with shape
51 | [meta_batch_size x num_examples x num_features_in]
52 | weight: a 3D tensor with shape
53 | [meta_batch_size x num_features_in x num_features_out]
54 | scale: (optional) a scalar value
55 | bias: (optional) a 1D tensor with shape [num_features_out]
56 |
57 | Returns:
58 | x_out: a 3D tensor with shape
59 | [meta_batch_size x num_examples x num_features_out]
60 | """
61 |
62 | assert(x_in.dim() == 3)
63 | assert(weight.dim() == 3)
64 | assert(x_in.size(0) == weight.size(0))
65 | assert(x_in.size(2) == weight.size(1))
66 |
67 | x_in = F.normalize(x_in, p=2, dim=2, eps=1e-12)
68 | weight = F.normalize(weight, p=2, dim=1, eps=1e-12)
69 |
70 | x_out = torch.bmm(x_in, weight)
71 |
72 | if scale is not None:
73 | x_out = x_out * scale
74 |
75 | if bias is not None:
76 | x_out = x_out + bias
77 |
78 | return x_out
79 |
80 |
81 | class CosineFullyConnectedLayer(nn.Module):
82 | def __init__(
83 | self,
84 | num_inputs,
85 | num_outputs,
86 | scale=20.0,
87 | per_plane=False,
88 | learn_scale=True,
89 | bias=False):
90 | super(CosineFullyConnectedLayer, self).__init__()
91 |
92 | self.num_inputs = num_inputs
93 | self.num_outputs = num_outputs
94 | self.learn_scale = learn_scale
95 | self.per_plane = per_plane
96 |
97 | weight = torch.FloatTensor(num_inputs, num_outputs).normal_(
98 | 0.0, np.sqrt(2.0/num_inputs))
99 | self.weight = nn.Parameter(weight, requires_grad=True)
100 |
101 | if bias:
102 | bias = torch.FloatTensor(num_outputs).fill_(0.0)
103 | self.bias = nn.Parameter(bias, requires_grad=True)
104 | else:
105 | self.bias = None
106 |
107 | if scale:
108 | num_scale_values = num_outputs if per_plane else 1
109 | scale = torch.FloatTensor(num_scale_values).fill_(scale)
110 | self.scale = nn.Parameter(scale, requires_grad=learn_scale)
111 | else:
112 | self.scale = None
113 |
114 | def forward(self, x_in):
115 | assert(x_in.dim() == 2)
116 | return cosine_fully_connected_layer(
117 | x_in, self.weight, scale=self.scale, bias=self.bias)
118 |
119 | def extra_repr(self):
120 | s = 'num_inputs={0}, num_classes={1}'.format(
121 | self.num_inputs, self.num_outputs)
122 |
123 | if self.scale is not None:
124 | if self.per_plane:
125 | s += 'num_scales={0} (learnable={1})'.format(
126 | self.num_outputs, self.learn_scale)
127 | else:
128 | s += 'num_scales={0} (value={1} learnable={2})'.format(
129 | 1, self.scale.item(), self.learn_scale)
130 |
131 | if self.bias is None:
132 | s += ', bias=False'
133 |
134 | return s
135 |
136 |
137 | def global_pooling(x, pool_type):
138 | assert(x.dim() == 4)
139 | if pool_type == 'max':
140 | return F.max_pool2d(x, (x.size(2), x.size(3)))
141 | elif pool_type == 'avg':
142 | return F.avg_pool2d(x, (x.size(2), x.size(3)))
143 | else:
144 | raise ValueError('Unknown pooling type.')
145 |
146 |
147 | class GlobalPooling(nn.Module):
148 | def __init__(self, pool_type):
149 | super(GlobalPooling, self).__init__()
150 | assert(pool_type == 'avg' or pool_type == 'max')
151 | self.pool_type = pool_type
152 |
153 | def forward(self, x):
154 | return global_pooling(x, pool_type=self.pool_type)
155 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/dataloaders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/baselines/wDAEGNN/low_shot_learning/dataloaders/__init__.py
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/dataloaders/basic_dataloaders.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import numpy as np
4 | import torchnet as tnt
5 |
6 |
7 | def generate_element_list(list_size, dataset_size):
8 | if list_size == dataset_size:
9 | return list(range(dataset_size))
10 | elif list_size < dataset_size:
11 | return np.random.choice(
12 | dataset_size, list_size, replace=False).tolist()
13 | else: # list_size > list_size
14 | num_times = list_size // dataset_size
15 | residual = list_size % dataset_size
16 | assert((num_times * dataset_size + residual) == list_size)
17 | elem_list = list(range(dataset_size)) * num_times
18 | if residual:
19 | elem_list += np.random.choice(
20 | dataset_size, residual, replace=False).tolist()
21 |
22 | return elem_list
23 |
24 |
25 | class SimpleDataloader:
26 | def __init__(
27 | self, dataset, batch_size, train, num_workers=4, epoch_size=None):
28 | self.dataset = dataset
29 | self.batch_size = batch_size
30 | self.num_workers = num_workers
31 | self.dataset_size = len(dataset)
32 | self.epoch_size = epoch_size if epoch_size else self.dataset_size
33 | self.train = train
34 |
35 | def get_iterator(self, epoch=0):
36 | def load_fun_(idx):
37 | #idx0 = idx
38 | img, label = self.dataset[idx % len(self.dataset)]
39 | #print('idx0={0}, len(d)={1}, idx={2}, label={3}'.format(
40 | # idx0, len(self.dataset), idx % len(self.dataset), label))
41 | return img, label
42 |
43 | elem_list = generate_element_list(self.epoch_size, self.dataset_size)
44 |
45 | tnt_dataset = tnt.dataset.ListDataset(
46 | elem_list=elem_list, load=load_fun_)
47 |
48 | data_loader = tnt_dataset.parallel(
49 | batch_size=self.batch_size,
50 | num_workers=self.num_workers,
51 | shuffle=self.train,
52 | drop_last=self.train)
53 |
54 | return data_loader
55 |
56 | def __call__(self, epoch=0):
57 | return self.get_iterator()
58 |
59 | def __len__(self):
60 | return self.epoch_size // self.batch_size
61 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from low_shot_learning.datasets.imagenet_dataset import ImageNet
2 | from low_shot_learning.datasets.imagenet_dataset import ImageNetLowShot
3 | from low_shot_learning.datasets.imagenet_dataset import ImageNetFeatures
4 | from low_shot_learning.datasets.imagenet_dataset import ImageNetLowShotFeatures
5 | from low_shot_learning.datasets.mini_imagenet_dataset import MiniImageNet
6 | from low_shot_learning.datasets.mini_imagenet_dataset import MiniImageNet80x80
7 | from low_shot_learning.datasets.mini_imagenet_dataset import MiniImageNetFeatures
8 |
9 |
10 | def dataset_factory(dataset_name, *args, **kwargs):
11 | datasets_collection = {}
12 | datasets_collection['MiniImageNet'] = MiniImageNet
13 | datasets_collection['MiniImageNet80x80'] = MiniImageNet80x80
14 | datasets_collection['MiniImageNetFeatures'] = MiniImageNetFeatures
15 | datasets_collection['ImageNet'] = ImageNet
16 | datasets_collection['ImageNetLowShot'] = ImageNetLowShot
17 | datasets_collection['ImageNetFeatures'] = ImageNetFeatures
18 | datasets_collection['ImageNetLowShotFeatures'] = ImageNetLowShotFeatures
19 |
20 | return datasets_collection[dataset_name](*args, **kwargs)
21 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/low_shot_learning/datasets/imagenet_dataset.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import json
4 | import os
5 | import os.path
6 |
7 | import h5py
8 | import numpy as np
9 | import random
10 | import torch
11 | import torch.utils.data as data
12 | import torchvision.datasets as datasets
13 | import torchvision.transforms as transforms
14 | from PIL import Image
15 | from PIL import ImageEnhance
16 |
17 | import low_shot_learning.utils as utils
18 |
19 | # Set the appropriate paths of the datasets here.
20 | _IMAGENET_DATASET_DIR = '/datasets_local/ImageNet/'
21 | _IMAGENET_LOWSHOT_BENCHMARK_CATEGORY_SPLITS_PATH = './data/IMAGENET_LOWSHOT_BENCHMARK_CATEGORY_SPLITS.json'
22 | _MEAN_PIXEL = [0.485, 0.456, 0.406]
23 | _STD_PIXEL = [0.229, 0.224, 0.225]
24 |
25 |
26 | class ImageJitter:
27 | def __init__(self, transformdict):
28 | transformtypedict=dict(
29 | Brightness=ImageEnhance.Brightness, Contrast=ImageEnhance.Contrast,
30 | Sharpness=ImageEnhance.Sharpness, Color=ImageEnhance.Color
31 | )
32 | self.transforms = [
33 | (transformtypedict[k], transformdict[k]) for k in transformdict]
34 |
35 | def __call__(self, img):
36 | out = img
37 | randtensor = torch.rand(len(self.transforms))
38 |
39 | for i, (transformer, alpha) in enumerate(self.transforms):
40 | r = alpha*(randtensor[i]*2.0 -1.0) + 1
41 | out = transformer(out).enhance(r).convert('RGB')
42 |
43 | return out
44 |
45 |
46 | class ImageNet(data.Dataset):
47 | def __init__(
48 | self,
49 | split='train',
50 | use_geometric_aug=True,
51 | use_simple_geometric_aug=False,
52 | use_color_aug=True):
53 | # use_geometric_aug: If True geometric augmentations are used for the
54 | # images of the training split.
55 | # use_color_aug: if True color augmentations are used for the images
56 | # of the test/val split.
57 |
58 | self.split = split
59 | assert split in ('train', 'val')
60 | self.name = 'ImageNet_Split_' + split
61 |
62 | data_dir = _IMAGENET_DATASET_DIR
63 | print('==> Loading ImageNet dataset - split {0}'.format(split))
64 | print('==> ImageNet directory: {0}'.format(data_dir))
65 |
66 | transform_train = []
67 | assert not (use_simple_geometric_aug and use_geometric_aug)
68 | if use_geometric_aug:
69 | transform_train.append(transforms.RandomResizedCrop(224))
70 | transform_train.append(transforms.RandomHorizontalFlip())
71 | elif use_simple_geometric_aug:
72 | transform_train.append(transforms.Resize(256))
73 | transform_train.append(transforms.RandomCrop(224))
74 | transform_train.append(transforms.RandomHorizontalFlip())
75 | else:
76 | transform_train.append(transforms.Resize(256))
77 | transform_train.append(transforms.CenterCrop(224))
78 |
79 | if use_color_aug:
80 | jitter_params = {'Brightness': 0.4, 'Contrast': 0.4, 'Color': 0.4}
81 | transform_train.append(ImageJitter(jitter_params))
82 |
83 | transform_train.append(lambda x: np.asarray(x))
84 | transform_train.append(transforms.ToTensor())
85 | transform_train.append(
86 | transforms.Normalize(mean=_MEAN_PIXEL, std=_STD_PIXEL))
87 |
88 | self.trainsform_train = transform_train
89 |
90 | transform_train = transforms.Compose(transform_train)
91 |
92 | transform_test = transforms.Compose([
93 | transforms.Resize(256),
94 | transforms.CenterCrop(224),
95 | lambda x: np.asarray(x),
96 | transforms.ToTensor(),
97 | transforms.Normalize(mean=_MEAN_PIXEL, std=_STD_PIXEL),
98 | ])
99 |
100 | self.transform = transform_train if split=='train' else transform_test
101 | print('==> transform: {0}'.format(self.transform))
102 | train_dir = os.path.join(data_dir, 'train')
103 | val_dir = os.path.join(data_dir, 'val')
104 | split_dir = train_dir if split=='train' else val_dir
105 | self.data = datasets.ImageFolder(split_dir, self.transform)
106 | self.labels = [item[1] for item in self.data.imgs]
107 |
108 | #@profile
109 | def __getitem__(self, index):
110 | img, label = self.data[index]
111 | return img, label
112 |
113 | def __len__(self):
114 | return len(self.data)
115 |
116 |
117 | class ImageNetLowShot(ImageNet):
118 | def __init__(
119 | self,
120 | phase='train',
121 | split='train',
122 | do_not_use_random_transf=False):
123 |
124 | assert phase in ('train', 'test', 'val')
125 | assert split in ('train', 'val')
126 |
127 | use_aug = (phase=='train') and (do_not_use_random_transf==False)
128 |
129 | ImageNet.__init__(
130 | self, split=split, use_geometric_aug=use_aug, use_color_aug=use_aug)
131 |
132 | self.phase = phase
133 | self.split = split
134 | self.name = 'ImageNetLowShot_Phase_' + phase + '_Split_' + split
135 | print('==> Loading ImageNet dataset (for few-shot benchmark) - phase {0}'.
136 | format(phase))
137 |
138 | #***********************************************************************
139 | with open(_IMAGENET_LOWSHOT_BENCHMARK_CATEGORY_SPLITS_PATH, 'r') as f:
140 | label_idx = json.load(f)
141 | base_classes = label_idx['base_classes']
142 | novel_classes_val_phase = label_idx['novel_classes_1']
143 | novel_classes_test_phase = label_idx['novel_classes_2']
144 | #***********************************************************************
145 |
146 | self.label2ind = utils.buildLabelIndex(self.labels)
147 | self.labelIds = sorted(self.label2ind.keys())
148 | self.num_cats = len(self.labelIds)
149 | assert self.num_cats==1000
150 |
151 | self.labelIds_base = base_classes
152 | self.num_cats_base = len(self.labelIds_base)
153 | if self.phase=='val' or self.phase=='test':
154 | self.labelIds_novel = (
155 | novel_classes_val_phase if (self.phase=='val') else
156 | novel_classes_test_phase)
157 | self.num_cats_novel = len(self.labelIds_novel)
158 |
159 | intersection = set(self.labelIds_base) & set(self.labelIds_novel)
160 | assert len(intersection) == 0
161 |
162 |
163 | class ImageNetLowShotFeatures:
164 | def __init__(
165 | self,
166 | data_dir, # path to the directory with the saved ImageNet features.
167 | image_split='train', # the image split of the ImageNet that will be loaded.
168 | phase='train', # whether the dataset will be used for training, validating, or testing a model.
169 | ):
170 | assert image_split in ('train', 'val')
171 | assert phase in ('train', 'val', 'test')
172 |
173 | self.phase = phase
174 | self.image_split = image_split
175 | self.name = (f'ImageNetLowShotFeatures_ImageSplit_{self.image_split}'
176 | f'_Phase_{self.phase}')
177 |
178 | dataset_file = os.path.join(
179 | data_dir, 'ImageNet_' + self.image_split + '.h5')
180 | self.data_file = h5py.File(dataset_file, 'r')
181 | self.count = self.data_file['count'][0]
182 | self.features = self.data_file['all_features'][...]
183 | self.labels = self.data_file['all_labels'][:self.count].tolist()
184 |
185 | #***********************************************************************
186 | with open(_IMAGENET_LOWSHOT_BENCHMARK_CATEGORY_SPLITS_PATH, 'r') as f:
187 | label_idx = json.load(f)
188 | base_classes = label_idx['base_classes']
189 | base_classes_val_split = label_idx['base_classes_1']
190 | base_classes_test_split = label_idx['base_classes_2']
191 | novel_classes_val_split = label_idx['novel_classes_1']
192 | novel_classes_test_split = label_idx['novel_classes_2']
193 | #***********************************************************************
194 |
195 | self.label2ind = utils.buildLabelIndex(self.labels)
196 | self.labelIds = sorted(self.label2ind.keys())
197 | self.num_cats = len(self.labelIds)
198 | assert self.num_cats==1000
199 |
200 | self.labelIds_base = base_classes
201 | self.num_cats_base = len(self.labelIds_base)
202 |
203 | if self.phase=='val' or self.phase=='test':
204 | self.labelIds_novel = (
205 | novel_classes_val_split if (self.phase=='val') else
206 | novel_classes_test_split)
207 | self.num_cats_novel = len(self.labelIds_novel)
208 |
209 | intersection = set(self.labelIds_base) & set(self.labelIds_novel)
210 | assert(len(intersection) == 0)
211 | self.base_classes_eval_split = (
212 | base_classes_val_split if (self.phase=='val') else
213 | base_classes_test_split)
214 | self.base_classes_subset = self.base_classes_eval_split
215 |
216 |
217 | def __getitem__(self, index):
218 | features_this = torch.Tensor(self.features[index]).view(-1,1,1)
219 | label_this = self.labels[index]
220 | return features_this, label_this
221 |
222 | def __len__(self):
223 | return int(self.count)
224 |
225 |
226 | class ImageNetFeatures:
227 | def __init__(
228 | self,
229 | data_dir, # path to the directory with the saved ImageNet features.
230 | split='train', # the image split of the ImageNet that will be loaded.
231 | ):
232 | assert split in ('train', 'val')
233 |
234 | self.split = split
235 | self.name = (f'ImageNetFeatures_ImageSplit_{self.split}')
236 |
237 | dataset_file = os.path.join(
238 | data_dir, 'ImageNet_' + self.split + '.h5')
239 | self.data_file = h5py.File(dataset_file, 'r')
240 | self.count = self.data_file['count'][0]
241 | self.features = self.data_file['all_features'][...]
242 | self.labels = self.data_file['all_labels'][:self.count].tolist()
243 |
244 | self.label2ind = utils.buildLabelIndex(self.labels)
245 | self.labelIds = sorted(self.label2ind.keys())
246 | self.num_cats = len(self.labelIds)
247 | assert self.num_cats == 1000
248 |
249 | def __getitem__(self, index):
250 | features_this = torch.Tensor(self.features[index]).view(-1,1,1)
251 | label_this = self.labels[index]
252 | return features_this, label_this
253 |
254 | def __len__(self):
255 | return int(self.count)
256 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/baselines/wDAEGNN/scripts/__init__.py
--------------------------------------------------------------------------------
/baselines/wDAEGNN/scripts/lowshot_evaluate.py:
--------------------------------------------------------------------------------
1 | """Evaluates a fewshot recognition models on the low-shot Imagenet dataset[*]
2 | using the improved evaluation metrics proposed by Wang et al [**].
3 |
4 | Example of usage:
5 | # Evaluate the model on the 1-shot setting.
6 | python scripts/lowshot_evaluate.py --config=imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN --testset --nexemplars=1 --step_size=1.0
7 | ==> Top 5 Accuracies: [Novel: 47.98 | Base: 93.40 | All 58.99 | Novel vs All 41.16 | Base vs All 87.28 | All prior 57.84]
8 |
9 | # Evaluate the model on the 2-shot setting.
10 | python scripts/lowshot_evaluate.py --config=imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN --testset --nexemplars=2 --step_size=1.0
11 | ==> Top 5 Accuracies: [Novel: 59.52 | Base: 93.41 | All 66.20 | Novel vs All 53.40 | Base vs All 86.51 | All prior 64.87]
12 |
13 | # Evaluate the model on the 5-shot setting.
14 | python scripts/lowshot_evaluate.py --config=imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN --testset --nexemplars=5 --step_size=0.6
15 | ==> Top 5 Accuracies: [Novel: 70.21 | Base: 93.41 | All 73.20 | Novel vs All 65.84 | Base vs All 84.87 | All prior 71.87]
16 |
17 | # Evaluate the model on the 10-shot setting.
18 | python scripts/lowshot_evaluate.py --config=imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN --testset --nexemplars=10 --step_size=0.4
19 | ==> Top 5 Accuracies: [Novel: 74.94 | Base: 93.36 | All 76.08 | Novel vs All 71.74 | Base vs All 82.97 | All prior 75.13]
20 |
21 | # Evaluate the model on the 20-shot setting.
22 | python scripts/lowshot_evaluate.py --config=imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN --testset --nexemplars=20 --step_size=0.2
23 | ==> Top 5 Accuracies: [Novel: 77.77 | Base: 93.33 | All 77.53 | Novel vs All 75.36 | Base vs All 80.98 | All prior 77.11]
24 |
25 | The config argument specifies the model that will be evaluated.
26 |
27 | [*] B. Hariharan and R. Girshick. Low-shot visual recognition by shrinking and hallucinating features.
28 | [**] Y.-X. Wang and R. Girshick, M. Hebert, B. Hariharan. Low-shot learning from imaginary data.
29 | """
30 |
31 | from __future__ import print_function
32 |
33 | import argparse
34 | import os
35 |
36 | from low_shot_learning.algorithms.fewshot.imagenet_lowshot import ImageNetLowShot
37 | from low_shot_learning.dataloaders.dataloader_fewshot import LowShotDataloader
38 | from low_shot_learning.datasets.imagenet_dataset import ImageNetLowShotFeatures
39 | from low_shot_learning import project_root
40 |
41 |
42 | parser = argparse.ArgumentParser()
43 | parser.add_argument('--config', type=str, required=True, default='',
44 | help='config file with parameters of the experiment')
45 | parser.add_argument('--checkpoint', type=int, default=-1,
46 | help='checkpoint (epoch id) that will be loaded. If a negative value is '
47 | 'given then the latest existing checkpoint is loaded.')
48 | parser.add_argument('--cuda', type=bool, default=True, help='enables cuda')
49 | parser.add_argument('--testset', default=False, action='store_true',
50 | help='If True, the model is evaluated on the test set of ImageNetLowShot. '
51 | 'Otherwise, the validation set is used for evaluation.')
52 | parser.add_argument('--nepisodes', type=int, default=100,
53 | help='the number of evaluation experiments that will run before computing '
54 | 'the average performance.')
55 | parser.add_argument('--prior', type=float, default=0.7)
56 | parser.add_argument('--nexemplars', type=int, default=-1)
57 | parser.add_argument('--last', default=False, action='store_true')
58 | parser.add_argument('--workspace', default=False, action='store_true')
59 | parser.add_argument('--step_size', default=1.0, type=float)
60 | args_opt = parser.parse_args()
61 | #args_opt.testset = True
62 |
63 | exp_config_file = os.path.join(project_root, 'config', args_opt.config + '.py')
64 | exp_base_directory = os.path.join(project_root, 'experiments')
65 | exp_directory = os.path.join(exp_base_directory, args_opt.config)
66 |
67 | # Load the configuration params of the experiment
68 | exp_config_file = 'config.' + args_opt.config.replace('/', '.')
69 | #print(f'Launching experiment: {exp_config_file}')
70 | config = __import__(exp_config_file, fromlist=['']).config
71 | config['exp_dir'] = exp_directory # where logs, models, etc will be stored.
72 | print(f'Loading experiment {args_opt.config}')
73 | print(f'Generated logs, snapshots, and model files will be stored on {exp_directory}')
74 |
75 | if args_opt.step_size != 1.0:
76 | config['networks']['classifier']['opt']['dae_config']['step_size'] = args_opt.step_size
77 |
78 | algorithm = ImageNetLowShot(config)
79 | if args_opt.cuda: # enable cuda.
80 | algorithm.load_to_gpu()
81 |
82 | if args_opt.checkpoint != 0: # load checkpoint.
83 | algorithm.load_checkpoint(
84 | epoch=args_opt.checkpoint if (args_opt.checkpoint > 0) else '*',
85 | train=False,
86 | suffix=('' if args_opt.last else '.best'))
87 |
88 | # Prepare the datasets and the the dataloader.
89 | nExemplars = data_train_opt = config['data_train_opt']['nExemplars']
90 | if args_opt.nexemplars > 0:
91 | nExemplars = args_opt.nexemplars
92 |
93 | eval_phase = 'test' if args_opt.testset else 'val'
94 | data_train_opt = config['data_train_opt']
95 | feat_data_train = ImageNetLowShotFeatures(
96 | data_dir=data_train_opt['data_dir'], image_split='train', phase=eval_phase)
97 | feat_data_test = ImageNetLowShotFeatures(
98 | data_dir=data_train_opt['data_dir'], image_split='val', phase=eval_phase)
99 | data_loader = LowShotDataloader(
100 | feat_data_train, feat_data_test,
101 | nExemplars=nExemplars, batch_size=1000, num_workers=1)
102 |
103 | results = algorithm.evaluate(
104 | data_loader,
105 | num_eval_exp=args_opt.nepisodes,
106 | prior=args_opt.prior,
107 | suffix='best')
108 |
109 | algorithm.logger.info('==> algorithm_type: {0}'.format('ImageNetLowShot'))
110 | algorithm.logger.info('==> nExemplars: {0}'.format(nExemplars))
111 | algorithm.logger.info('==> num episodes: {0}'.format(args_opt.nepisodes))
112 | algorithm.logger.info('==> eval_phase: {0}'.format(eval_phase))
113 | algorithm.logger.info('==> step_size: {0}'.format(args_opt.step_size))
114 | algorithm.logger.info('==> results: {0}'.format(results))
115 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/scripts/lowshot_train_stage1.py:
--------------------------------------------------------------------------------
1 | """Applies the 1st training stage of our approach on the low-shot Imagenet dataset[*].
2 |
3 | Example of usage - train a cosine-similarity based recognition model with a ResNet10 feature extractor:
4 | python scripts/lowshot_train_stage1.py --config=imagenet_ResNet10CosineClassifier
5 |
6 | The configuration file imagenet_ResNet10CosineClassifier.py used on the above experiment is placed on
7 | the directory ./config .
8 |
9 | [*] B. Hariharan and R. Girshick. Low-shot visual recognition by shrinking and hallucinating features.
10 | """
11 |
12 |
13 | from __future__ import print_function
14 |
15 | import argparse
16 | import os
17 |
18 | from low_shot_learning.algorithms.fewshot.fewshot import FewShot
19 | from low_shot_learning.dataloaders.dataloader_fewshot import FewShotDataloader
20 | from low_shot_learning.datasets.imagenet_dataset import ImageNetLowShot
21 | from low_shot_learning import project_root
22 |
23 | parser = argparse.ArgumentParser()
24 | parser.add_argument('--config', type=str, required=True, default='',
25 | help='config file with parameters of the experiment.')
26 | parser.add_argument('--checkpoint', type=int, default=0,
27 | help='checkpoint (epoch id) that will be loaded. If a negative value is '
28 | 'given then the latest existing checkpoint is loaded.')
29 | parser.add_argument('--num_workers', type=int, default=4,
30 | help='number of data loading workers')
31 | parser.add_argument('--cuda', type=bool, default=True, help='enables cuda')
32 | parser.add_argument('--disp_step', type=int, default=200,
33 | help='display step during training')
34 | args_opt = parser.parse_args()
35 |
36 |
37 | exp_config_file = os.path.join(project_root, 'config', args_opt.config + '.py')
38 | exp_base_directory = os.path.join(project_root, 'experiments')
39 | exp_directory = os.path.join(exp_base_directory, args_opt.config)
40 |
41 | # Load the configuration params of the experiment
42 | exp_config_file = 'config.' + args_opt.config.replace('/', '.')
43 | #print(f'Launching experiment: {exp_config_file}')
44 | config = __import__(exp_config_file, fromlist=['']).config
45 | config['exp_dir'] = exp_directory # where logs, models, etc will be stored.
46 | print(f'Loading experiment {args_opt.config}')
47 | print(f'Generated logs, snapshots, and model files will be stored on {exp_directory}')
48 |
49 |
50 | # Set the train dataset and the corresponding data loader.
51 | data_train_opt = config['data_train_opt']
52 | dataset_train = ImageNetLowShot(phase='train')
53 | dloader_train = FewShotDataloader(
54 | dataset=dataset_train,
55 | nKnovel=data_train_opt['nKnovel'],
56 | nKbase=data_train_opt['nKbase'],
57 | nExemplars=data_train_opt['nExemplars'], # num training examples per novel category
58 | nTestNovel=data_train_opt['nTestNovel'], # num test examples for all the novel categories
59 | nTestBase=data_train_opt['nTestBase'], # num test examples for all the base categories
60 | batch_size=data_train_opt['batch_size'],
61 | num_workers=args_opt.num_workers,
62 | epoch_size=data_train_opt['epoch_size'], # num of batches per epoch
63 | )
64 |
65 | config['disp_step'] = args_opt.disp_step
66 | algorithm = FewShot(config)
67 | if args_opt.cuda: # enable cuda
68 | algorithm.load_to_gpu()
69 |
70 | if args_opt.checkpoint != 0: # load checkpoint
71 | algorithm.load_checkpoint(
72 | epoch=args_opt.checkpoint if (args_opt.checkpoint > 0) else '*',
73 | train=True)
74 |
75 | algorithm.solve(dloader_train)
76 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/scripts/lowshot_train_stage2.py:
--------------------------------------------------------------------------------
1 | """Train the wDAE-GNN few-shot model on the the low-shot Imagenet dataset[*].
2 |
3 | Example of usage:
4 | python scripts/lowshot_train_stage2.py --config=imagenet_wDAE/imagenet_ResNet10CosineClassifier_wDAE_GNN
5 | imagenet_ResNet10CosineClassifier_wDAE_GNN
6 |
7 | All the configuration files above (i.e., specified by the --config argument) are
8 | placed on the directory ./config .
9 |
10 | [*] B. Hariharan and R. Girshick. Low-shot visual recognition by shrinking and hallucinating features.
11 | """
12 |
13 | from __future__ import print_function
14 |
15 | import argparse
16 | import os
17 |
18 | from low_shot_learning.algorithms.fewshot.imagenet_lowshot import ImageNetLowShot
19 | from low_shot_learning.dataloaders.dataloader_fewshot import FewShotDataloader, LowShotDataloader
20 | from low_shot_learning.datasets.imagenet_dataset import ImageNetLowShotFeatures
21 | from low_shot_learning import project_root
22 |
23 | parser = argparse.ArgumentParser()
24 | parser.add_argument('--config', type=str, required=True, default='',
25 | help='config file with parameters of the experiment')
26 | parser.add_argument('--checkpoint', type=int, default=0,
27 | help='checkpoint (epoch id) that will be loaded. If a negative value is '
28 | 'given then the latest existing checkpoint is loaded.')
29 | parser.add_argument('--num_workers', type=int, default=0,
30 | help='number of data loading workers')
31 | parser.add_argument('--cuda', type=bool, default=True, help='enables cuda')
32 | parser.add_argument('--disp_step', type=int, default=200,
33 | help='display step during training')
34 | args_opt = parser.parse_args()
35 |
36 | exp_config_file = os.path.join(project_root, 'config', args_opt.config + '.py')
37 | exp_base_directory = os.path.join(project_root, 'experiments')
38 | exp_directory = os.path.join(exp_base_directory, args_opt.config)
39 |
40 | # Load the configuration params of the experiment
41 | exp_config_file = 'config.' + args_opt.config.replace('/', '.')
42 | #print(f'Launching experiment: {exp_config_file}')
43 | config = __import__(exp_config_file, fromlist=['']).config
44 | config['exp_dir'] = exp_directory # where logs, models, etc will be stored.
45 | print(f'Loading experiment {args_opt.config}')
46 | print(f'Generated logs, snapshots, and model files will be stored on {exp_directory}')
47 |
48 | config['disp_step'] = args_opt.disp_step
49 | algorithm = ImageNetLowShot(config)
50 | if args_opt.cuda: # enable cuda
51 | algorithm.load_to_gpu()
52 |
53 | if args_opt.checkpoint != 0: # load checkpoint
54 | algorithm.load_checkpoint(
55 | epoch=args_opt.checkpoint if (args_opt.checkpoint > 0) else '*',
56 | train=True)
57 |
58 | # Set the train dataset and the corresponding data loader.
59 | data_train_opt = config['data_train_opt']
60 | feat_dataset_train = ImageNetLowShotFeatures(
61 | data_dir=data_train_opt['data_dir'],
62 | image_split='train',
63 | phase='train')
64 | dloader_train = FewShotDataloader(
65 | dataset=feat_dataset_train,
66 | nKnovel=data_train_opt['nKnovel'],
67 | nKbase=data_train_opt['nKbase'],
68 | nExemplars=data_train_opt['nExemplars'], # num training examples per novel category
69 | nTestNovel=data_train_opt['nTestNovel'], # num test examples for all the novel categories
70 | nTestBase=data_train_opt['nTestBase'], # num test examples for all the base categories
71 | batch_size=data_train_opt['batch_size'],
72 | num_workers=args_opt.num_workers,
73 | epoch_size=data_train_opt['epoch_size'], # num of batches per epoch
74 | )
75 |
76 | feat_data_train = ImageNetLowShotFeatures(
77 | data_dir=data_train_opt['data_dir'], image_split='train', phase='val')
78 | feat_data_test = ImageNetLowShotFeatures(
79 | data_dir=data_train_opt['data_dir'], image_split='val', phase='val')
80 | dloader_test = LowShotDataloader(
81 | feat_data_train, feat_data_test,
82 | nExemplars=data_train_opt['nExemplars'],
83 | batch_size=200,
84 | num_workers=0)
85 |
86 | algorithm.solve(dloader_train, dloader_test)
87 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/scripts/save_features.py:
--------------------------------------------------------------------------------
1 | """
2 | Extracts and saves features (with a model trained by the lowshot_train_stage1.py
3 | routine) from the images of the ImageNet dataset.
4 |
5 | Example of usage:
6 | # Extract features from the validation image split of the Imagenet.
7 | python scripts/save_features.py --config=imagenet_ResNet10CosineClassifier --split='val'
8 | # Extract features from the training image split of the Imagenet.
9 | python scripts/save_features.py --config=imagenet_ResNet10CosineClassifier --split='train'
10 |
11 | The config argument specifies the model that will be used.
12 | """
13 |
14 | from __future__ import print_function
15 |
16 | import argparse
17 | import os
18 |
19 | from low_shot_learning.algorithms.utils.save_features import SaveFeatures
20 | from low_shot_learning.dataloaders.basic_dataloaders import SimpleDataloader
21 | from low_shot_learning.datasets.imagenet_dataset import ImageNet
22 | from low_shot_learning import project_root
23 |
24 | parser = argparse.ArgumentParser()
25 | parser.add_argument('--config', type=str, required=True, default='',
26 | help='config file with hyper-parameters of the model that we will use for '
27 | 'extracting features from ImageNet dataset.')
28 | parser.add_argument('--checkpoint', type=int, default=-1,
29 | help='checkpoint (epoch id) that will be loaded. If a negative value is'
30 | ' given then the latest existing checkpoint is loaded.')
31 | parser.add_argument('--cuda', type=bool, default=True, help='enables cuda')
32 | parser.add_argument('--split', type=str, default='val')
33 | parser.add_argument('--num_workers', type=int, default=4)
34 | parser.add_argument('--batch_size', type=int, default=128)
35 | parser.add_argument('--save2exp', default=False, action='store_true')
36 | parser.add_argument('--feature_name', type=str, default='')
37 | parser.add_argument('--global_pooling', default=False, action='store_true')
38 | args_opt = parser.parse_args()
39 |
40 |
41 | exp_base_directory = os.path.join(project_root, 'experiments')
42 | exp_directory = os.path.join(exp_base_directory, args_opt.config)
43 |
44 | # Load the configuration params of the experiment
45 | exp_config_file = 'config.' + args_opt.config.replace('/', '.')
46 | #print(f'Launching experiment: {exp_config_file}')
47 | config = __import__(exp_config_file, fromlist=['']).config
48 | config['exp_dir'] = exp_directory # where logs, models, etc will be stored.
49 | print(f'Loading experiment {args_opt.config}')
50 | print(f'Generated logs, snapshots, and model files will be stored on {exp_directory}')
51 |
52 | if (args_opt.split != 'train') and (args_opt.split != 'val'):
53 | raise ValueError('Not valid split {0}'.format(args_opt.split))
54 |
55 | dataset = ImageNet(
56 | split=args_opt.split, use_geometric_aug=False, use_color_aug=False)
57 | dloader = SimpleDataloader(
58 | dataset,
59 | batch_size=args_opt.batch_size,
60 | train=False,
61 | num_workers=args_opt.num_workers)
62 |
63 | algorithm = SaveFeatures(config)
64 | if args_opt.cuda: # enable cuda
65 | algorithm.load_to_gpu()
66 |
67 | if args_opt.checkpoint != 0: # load checkpoint
68 | algorithm.load_checkpoint(
69 | epoch=args_opt.checkpoint if (args_opt.checkpoint > 0) else '*',
70 | train=False)
71 |
72 | if args_opt.save2exp:
73 | dst_directory = os.path.join(exp_directory, 'feature_datasets')
74 | else:
75 | dst_directory = os.path.join(
76 | project_root, 'datasets', 'feature_datasets', args_opt.config)
77 |
78 | if args_opt.feature_name == '':
79 | args_opt.feature_name = None
80 | else:
81 | dst_directory = dst_directory + '_' + args_opt.feature_name
82 |
83 | algorithm.logger.info(f"==> Destination directory: {dst_directory}")
84 | if (not os.path.isdir(dst_directory)):
85 | os.makedirs(dst_directory)
86 |
87 | dst_filename = os.path.join(
88 | dst_directory, 'ImageNet_' + args_opt.split + '.h5')
89 |
90 | algorithm.logger.info(f"==> dst_filename: {dst_filename}")
91 | algorithm.logger.info(f"==> args_opt.feature_name: {args_opt.feature_name}")
92 | algorithm.logger.info(f"==> args_opt.global_pooling: {args_opt.global_pooling}")
93 |
94 | algorithm.save_features(
95 | dataloader=dloader,
96 | filename=dst_filename,
97 | feature_name=args_opt.feature_name,
98 | global_pooling=args_opt.global_pooling)
99 |
--------------------------------------------------------------------------------
/baselines/wDAEGNN/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | from setuptools import find_packages
3 |
4 | setup(
5 | name='wDAE_GNN_FewShot',
6 | version='0.0.1',
7 | description='Generating Classification Weights with GNN Denoising Autoencoders for Few-Shot Learning',
8 | author='Spyros Gidaris',
9 | packages=find_packages())
10 |
--------------------------------------------------------------------------------
/embeddings/AWA2_classnames.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/embeddings/AWA2_classnames.npy
--------------------------------------------------------------------------------
/embeddings/CUB_classnames.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/embeddings/CUB_classnames.npy
--------------------------------------------------------------------------------
/embeddings/SUN_classnames.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/embeddings/SUN_classnames.npy
--------------------------------------------------------------------------------
/embeddings/conceptnet/AWA2_cn_sum_list.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/embeddings/conceptnet/AWA2_cn_sum_list.npy
--------------------------------------------------------------------------------
/embeddings/conceptnet/CUB_cn_sum_list.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/embeddings/conceptnet/CUB_cn_sum_list.npy
--------------------------------------------------------------------------------
/embeddings/conceptnet/SUN_cn_sum_list.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/embeddings/conceptnet/SUN_cn_sum_list.npy
--------------------------------------------------------------------------------
/embeddings/conceptnet/imgnet_cn_list.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/embeddings/conceptnet/imgnet_cn_list.npy
--------------------------------------------------------------------------------
/embeddings/wiki2vec/AWA2_wiki_sum_list.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/embeddings/wiki2vec/AWA2_wiki_sum_list.npy
--------------------------------------------------------------------------------
/embeddings/wiki2vec/CUB_wiki_sum_list.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/embeddings/wiki2vec/CUB_wiki_sum_list.npy
--------------------------------------------------------------------------------
/embeddings/wiki2vec/SUN_wiki_sum_list.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/embeddings/wiki2vec/SUN_wiki_sum_list.npy
--------------------------------------------------------------------------------
/embeddings/wiki2vec/imgnet_wiki_list.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/embeddings/wiki2vec/imgnet_wiki_list.npy
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: izsl
2 | channels:
3 | - nvidia
4 | - anaconda
5 | - conda-forge
6 | - defaults
7 | dependencies:
8 | - _libgcc_mutex=0.1=conda_forge
9 | - _openmp_mutex=4.5=2_gnu
10 | - blas=1.0=mkl
11 | - bzip2=1.0.8=h7f98852_4
12 | - ca-certificates=2023.7.22=hbcca054_0
13 | - cudatoolkit=11.1.74=h6bb024c_0
14 | - freetype=2.12.1=h4a9f257_0
15 | - intel-openmp=2021.4.0=h06a4308_3561
16 | - joblib=1.3.2=pyhd8ed1ab_0
17 | - jpeg=9e=h5eee18b_1
18 | - lcms2=2.12=h3be6417_0
19 | - ld_impl_linux-64=2.40=h41732ed_0
20 | - libblas=3.9.0=12_linux64_mkl
21 | - libcblas=3.9.0=12_linux64_mkl
22 | - libffi=3.4.2=h7f98852_5
23 | - libgcc-ng=13.2.0=h807b86a_2
24 | - libgfortran-ng=7.5.0=ha8ba4b0_17
25 | - libgfortran4=7.5.0=ha8ba4b0_17
26 | - libgomp=13.2.0=h807b86a_2
27 | - libnsl=2.0.1=hd590300_0
28 | - libpng=1.6.39=h5eee18b_0
29 | - libsqlite=3.43.2=h2797004_0
30 | - libstdcxx-ng=11.2.0=h1234567_1
31 | - libtiff=4.2.0=hecacb30_2
32 | - libuuid=2.38.1=h0b41bf4_0
33 | - libwebp-base=1.3.2=h5eee18b_0
34 | - libzlib=1.2.13=hd590300_5
35 | - lz4-c=1.9.4=h6a678d5_0
36 | - mkl=2021.4.0=h06a4308_640
37 | - mkl-service=2.4.0=py39h7f8727e_0
38 | - mkl_fft=1.3.1=py39hd3c417c_0
39 | - mkl_random=1.2.2=py39h51133e4_0
40 | - ncurses=6.4=h59595ed_2
41 | - olefile=0.46=pyhd3eb1b0_0
42 | - openjpeg=2.4.0=h3ad879b_0
43 | - openssl=3.1.4=hd590300_0
44 | - pillow=8.3.1=py39h2c7a002_0
45 | - pip=21.2.4=py39h06a4308_0
46 | - python=3.9.18=h0755675_0_cpython
47 | - python_abi=3.9=4_cp39
48 | - readline=8.2=h8228510_1
49 | - scikit-learn=1.0.1=py39h4dfa638_3
50 | - scipy=1.7.1=py39h292c36d_2
51 | - setuptools=68.2.2=pyhd8ed1ab_0
52 | - six=1.16.0=pyhd3eb1b0_1
53 | - threadpoolctl=3.2.0=pyha21a80b_0
54 | - tk=8.6.13=h2797004_0
55 | - tzdata=2023c=h71feb2d_0
56 | - wheel=0.41.3=pyhd8ed1ab_0
57 | - xz=5.2.6=h166bdaf_0
58 | - zlib=1.2.13=hd590300_5
59 | - zstd=1.5.2=ha4553b6_0
60 | - pip:
61 | - ftfy==6.1.1
62 | - numpy==1.22.4
63 | - regex==2023.10.3
64 | - torch==1.10.1
65 | - torchvision==0.11.2
66 | - tqdm==4.66.1
67 | - typing-extensions==4.4.0
68 | - wcwidth==0.2.9
69 | - git+https://github.com/openai/CLIP.git
70 |
--------------------------------------------------------------------------------
/figs/icis-framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/figs/icis-framework.png
--------------------------------------------------------------------------------
/figs/model-fig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExplainableML/ImageFreeZSL/f009293a2886e0123ac938b6b0df8c16d8c2328d/figs/model-fig.png
--------------------------------------------------------------------------------
/scripts/table1_awa2.sh:
--------------------------------------------------------------------------------
1 | # Table 1, AWA2 dataset
2 | # Ours
3 | echo "ICIS:"
4 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=AWA2 --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 20 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy
5 | # ConSE
6 | echo "ConSE:"
7 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=AWA2 --image_embedding=res101_finetuned --class_embedding=att --conse_benchmark
8 | # COSTA
9 | echo "COSTA:"
10 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=AWA2 --image_embedding=res101_finetuned --class_embedding=att --costa_benchmark
11 | # Sub. Reg.
12 | echo "Sub. Reg.:"
13 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=AWA2 --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 20 --embed_dim 2048 --strict_eval --early_stopping_slope --subspace_proj
14 | # wDAE
15 | echo "wDAE:"
16 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=AWA2 --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 20 --embed_dim 2048 --strict_eval --early_stopping_slope --daegnn
17 | # WAvg
18 | echo "WAvg:"
19 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=AWA2 --image_embedding=res101_finetuned --vgse_baseline=wavg --class_embedding=att --norm_scale_heuristic
20 | # SMO
21 | echo "SMO:"
22 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=AWA2 --image_embedding=res101_finetuned --vgse_baseline=smo --class_embedding=att --vgse_alpha=0 --norm_scale_heuristic
--------------------------------------------------------------------------------
/scripts/table1_cub.sh:
--------------------------------------------------------------------------------
1 | # Table 1, CUB dataset
2 | # Ours
3 | echo "ICIS:"
4 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy
5 | # ConSE
6 | echo "ConSE:"
7 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --conse_benchmark
8 | # COSTA
9 | echo "COSTA:"
10 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --costa_benchmark
11 | # Sub. Reg.
12 | echo "Sub. Reg.:"
13 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --subspace_proj
14 | # wDAE
15 | echo "wDAE:"
16 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --daegnn
17 | # WAvg
18 | echo "WAvg:"
19 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --vgse_baseline=wavg --class_embedding=att --norm_scale_heuristic
20 | # SMO
21 | echo "SMO:"
22 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --vgse_baseline=smo --class_embedding=att --vgse_alpha=0 --norm_scale_heuristic
--------------------------------------------------------------------------------
/scripts/table1_cub_5seeds.sh:
--------------------------------------------------------------------------------
1 | # Table 1, CUB dataset
2 | # Ours
3 | echo "ICIS:"
4 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --numSeeds 5 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy
5 | # ConSE
6 | echo "ConSE:"
7 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --numSeeds 5 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --conse_benchmark
8 | # COSTA
9 | echo "COSTA:"
10 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --numSeeds 5 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --costa_benchmark
11 | # Sub. Reg.
12 | echo "Sub. Reg.:"
13 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --numSeeds 5 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --subspace_proj
14 | # wDAE
15 | echo "wDAE:"
16 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --numSeeds 5 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --daegnn
17 | # WAvg
18 | echo "WAvg:"
19 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --numSeeds 5 --dataset=CUB --image_embedding=res101_finetuned --vgse_baseline=wavg --class_embedding=att --norm_scale_heuristic
20 | # SMO
21 | echo "SMO:"
22 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --numSeeds 5 --dataset=CUB --image_embedding=res101_finetuned --vgse_baseline=smo --class_embedding=att --vgse_alpha=0 --norm_scale_heuristic
--------------------------------------------------------------------------------
/scripts/table1_sun.sh:
--------------------------------------------------------------------------------
1 | # Table 1, SUN dataset
2 | # Ours
3 | echo "ICIS:"
4 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=SUN --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy
5 | # ConSE
6 | echo "ConSE:"
7 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=SUN --image_embedding=res101_finetuned --class_embedding=att --conse_benchmark
8 | # COSTA
9 | echo "COSTA:"
10 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=SUN --image_embedding=res101_finetuned --class_embedding=att --costa_benchmark
11 | # Sub. Reg.
12 | echo "Sub. Reg.:"
13 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=SUN --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --subspace_proj
14 | # wDAE
15 | echo "wDAE:"
16 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=SUN --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --daegnn
17 | # WAvg
18 | echo "WAvg:"
19 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=SUN --image_embedding=res101_finetuned --vgse_baseline=wavg --class_embedding=att --norm_scale_heuristic
20 | # SMO
21 | echo "SMO:"
22 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=SUN --image_embedding=res101_finetuned --vgse_baseline=smo --class_embedding=att --vgse_alpha=0 --norm_scale_heuristic
--------------------------------------------------------------------------------
/scripts/table2.sh:
--------------------------------------------------------------------------------
1 | ### Recreates the ablation results of our ICIS model (Table 2)
2 |
3 | ### CUB
4 | # Baseline MLP
5 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy
6 | # Cosine loss
7 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --cos_sim_loss --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy
8 | # Single-modal
9 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --single_modal_ablation --cos_sim_loss --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy
10 | # Cross-modal
11 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy
12 | # Full
13 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy
14 |
15 | ### AWA2
16 | # Baseline MLP
17 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=AWA2 --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 20 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy
18 | # Cosine loss
19 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=AWA2 --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --cos_sim_loss --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 20 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy
20 | # Single-modal
21 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=AWA2 --image_embedding=res101_finetuned --class_embedding=att --single_modal_ablation --cos_sim_loss --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 20 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy
22 | # Cross-modal
23 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=AWA2 --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 20 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy
24 | # Full
25 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=AWA2 --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 20 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy
26 |
27 | ### SUN
28 | # Baseline MLP
29 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=SUN --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 4096 --strict_eval --early_stopping_slope --calc_entropy
30 | # Cosine loss
31 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=SUN --image_embedding=res101_finetuned --class_embedding=att --single_autoencoder_baseline --cos_sim_loss --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 4096 --strict_eval --early_stopping_slope --calc_entropy
32 | # Single-modal
33 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=SUN --image_embedding=res101_finetuned --class_embedding=att --single_modal_ablation --cos_sim_loss --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 4096 --strict_eval --early_stopping_slope --calc_entropy
34 | # Cross-modal
35 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=SUN --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 4096 --strict_eval --early_stopping_slope --calc_entropy
36 | # Full
37 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=SUN --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 4096 --strict_eval --early_stopping_slope --calc_entropy
38 |
--------------------------------------------------------------------------------
/scripts/table3_cub.sh:
--------------------------------------------------------------------------------
1 | # Table 3, CUB dataset
2 | echo "-- Using Wiki2Vec class label embeddings --"
3 | # Ours
4 | echo "ICIS:"
5 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=wiki2vec --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy --norm_scale_heuristic --zst --zstfrom=imagenet
6 | # ConSE
7 | echo "ConSE:"
8 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=wiki2vec --conse_benchmark --norm_scale_heuristic --zst --zstfrom=imagenet
9 | # COSTA
10 | echo "COSTA:"
11 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=wiki2vec --costa_benchmark --norm_scale_heuristic --zst --zstfrom=imagenet
12 | # Sub. Reg.
13 | echo "Sub. Reg.:"
14 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=wiki2vec --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --subspace_proj --norm_scale_heuristic --zst --zstfrom=imagenet
15 | # wDAE
16 | echo "wDAE:"
17 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=wiki2vec --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --daegnn --norm_scale_heuristic --zst --zstfrom=imagenet
18 | # WAvg
19 | echo "WAvg:"
20 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --vgse_baseline=wavg --class_embedding=wiki2vec --norm_scale_heuristic --zst --zstfrom=imagenet
21 | # SMO
22 | echo "SMO:"
23 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --vgse_baseline=smo --class_embedding=wiki2vec --vgse_alpha=0 --norm_scale_heuristic --zst --zstfrom=imagenet
24 |
25 | echo "-- Using ConceptNet class label embeddings --"
26 | # Ours
27 | echo "ICIS:"
28 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=cn --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy --norm_scale_heuristic --zst --zstfrom=imagenet
29 | # ConSE
30 | echo "ConSE:"
31 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=cn --conse_benchmark --norm_scale_heuristic --zst --zstfrom=imagenet
32 | # COSTA
33 | echo "COSTA:"
34 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=cn --costa_benchmark --norm_scale_heuristic --zst --zstfrom=imagenet
35 | # Sub. Reg.
36 | echo "Sub. Reg.:"
37 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=cn --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --subspace_proj --norm_scale_heuristic --zst --zstfrom=imagenet
38 | # wDAE
39 | echo "wDAE:"
40 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=cn --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --daegnn --norm_scale_heuristic --zst --zstfrom=imagenet
41 | # WAvg
42 | echo "WAvg:"
43 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --vgse_baseline=wavg --class_embedding=cn --norm_scale_heuristic --zst --zstfrom=imagenet
44 | # SMO
45 | echo "SMO:"
46 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --vgse_baseline=smo --class_embedding=cn --vgse_alpha=0 --norm_scale --norm_scale_heuristic --zst --zstfrom=imagenet
47 |
48 | echo "-- Using CLIP class label embeddings --"
49 | # Ours
50 | echo "ICIS:"
51 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=clip --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --calc_entropy --norm_scale_heuristic --zst --zstfrom=imagenet
52 | # ConSE
53 | echo "ConSE:"
54 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=clip --conse_benchmark --norm_scale_heuristic --zst --zstfrom=imagenet
55 | # COSTA
56 | echo "COSTA:"
57 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=clip --costa_benchmark --norm_scale_heuristic --zst --zstfrom=imagenet
58 | # Sub. Reg.
59 | echo "Sub. Reg.:"
60 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=clip --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --subspace_proj --norm_scale_heuristic --zst --zstfrom=imagenet
61 | # wDAE
62 | echo "wDAE:"
63 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=clip --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --daegnn --norm_scale_heuristic --zst --zstfrom=imagenet
64 | # WAvg
65 | echo "WAvg:"
66 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --vgse_baseline=wavg --class_embedding=clip --norm_scale_heuristic --zst --zstfrom=imagenet
67 | # SMO
68 | echo "SMO:"
69 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --vgse_baseline=smo --class_embedding=clip --vgse_alpha=0 --norm_scale --norm_scale_heuristic --zst --zstfrom=imagenet
--------------------------------------------------------------------------------
/scripts/table4.sh:
--------------------------------------------------------------------------------
1 | echo "Sub. Reg., CUB:"
2 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --subspace_proj
3 |
4 | echo "Sub. Reg. + ICIS, CUB:"
5 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=res101_finetuned --class_embedding=att --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --daegnn
6 |
--------------------------------------------------------------------------------
/scripts/table5_left.sh:
--------------------------------------------------------------------------------
1 | # Table 5 Left
2 | # Ours
3 | echo "ICIS:"
4 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=att --cos_sim_loss --include_unseen --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope
5 | # ConSE
6 | echo "ConSE:"
7 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=att --conse_benchmark
8 | # COSTA
9 | echo "COSTA:"
10 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=att --costa_benchmark
11 | # Sub. Reg.
12 | echo "Sub. Reg.:"
13 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --subspace_proj
14 | # wDAE
15 | echo "wDAE:"
16 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --class_embedding=att --single_autoencoder_baseline --num_layers 2 --beta1 0.9 --lr 0.00001 --batch_size 16 --embed_dim 2048 --strict_eval --early_stopping_slope --daegnn
17 | # WAvg
18 | echo "WAvg:"
19 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --vgse_baseline=wavg --class_embedding=att --norm_scale_heuristic
20 | # SMO
21 | echo "SMO:"
22 | CUDA_VISIBLE_DEVICES=0 python main.py --cuda --manualSeed 0 --dataset=CUB --image_embedding=pretrained_resnet101 --vgse_baseline=smo --class_embedding=att --vgse_alpha=0 --norm_scale_heuristic
--------------------------------------------------------------------------------
/utility/eval_imagenet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torchvision.models as models
4 | import torchvision.datasets as datasets
5 | import time
6 |
7 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
8 | parser.add_argument('data', metavar='DIR', nargs='?', default='imagenet',
9 | help='path to dataset (default: imagenet)')
10 | parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18',
11 | choices=model_names,
12 | help='model architecture: ' +
13 | ' | '.join(model_names) +
14 | ' (default: resnet18)')
15 | parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
16 | help='number of data loading workers (default: 4)')
17 | parser.add_argument('--epochs', default=90, type=int, metavar='N',
18 | help='number of total epochs to run')
19 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
20 | help='manual epoch number (useful on restarts)')
21 | parser.add_argument('-b', '--batch-size', default=256, type=int,
22 | metavar='N',
23 | help='mini-batch size (default: 256), this is the total '
24 | 'batch size of all GPUs on the current node when '
25 | 'using Data Parallel or Distributed Data Parallel')
26 | parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
27 | metavar='LR', help='initial learning rate', dest='lr')
28 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
29 | help='momentum')
30 | parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
31 | metavar='W', help='weight decay (default: 1e-4)',
32 | dest='weight_decay')
33 | parser.add_argument('-p', '--print-freq', default=10, type=int,
34 | metavar='N', help='print frequency (default: 10)')
35 | parser.add_argument('--resume', default='', type=str, metavar='PATH',
36 | help='path to latest checkpoint (default: none)')
37 | parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
38 | help='evaluate model on validation set')
39 | parser.add_argument('--pretrained', dest='pretrained', action='store_true',
40 | help='use pre-trained model')
41 | parser.add_argument('--world-size', default=-1, type=int,
42 | help='number of nodes for distributed training')
43 | parser.add_argument('--rank', default=-1, type=int,
44 | help='node rank for distributed training')
45 | parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
46 | help='url used to set up distributed training')
47 | parser.add_argument('--dist-backend', default='nccl', type=str,
48 | help='distributed backend')
49 | parser.add_argument('--seed', default=None, type=int,
50 | help='seed for initializing training. ')
51 | parser.add_argument('--gpu', default=None, type=int,
52 | help='GPU id to use.')
53 | parser.add_argument('--multiprocessing-distributed', action='store_true',
54 | help='Use multi-processing distributed training to launch '
55 | 'N processes per node, which has N GPUs. This is the '
56 | 'fastest way to use PyTorch for either single node or '
57 | 'multi node data parallel training')
58 | parser.add_argument('--dummy', action='store_true', help="use fake data to benchmark")
59 |
60 | args = parser.parse_args()
61 |
62 | class LINEAR(nn.Module):
63 | def __init__(self, input_dim, nclass, bias=True):
64 | super(LINEAR, self).__init__()
65 | self.fc = nn.Linear(input_dim, nclass, bias)
66 | def forward(self, x):
67 | o = self.fc(x)
68 | return o
69 |
70 | # Create model
71 | # Load ImageNet Pre-trained ResNet model
72 | resnet = models.resnet101(pretrained=True)
73 | resnet.eval()
74 |
75 | # Load predicted CUB/SUN/AWA2 classifiers
76 | predicted_classifiers = LINEAR(input_dim=2048, nclass=50)
77 | predicted_classifiers.load_state_dict(torch.load('/home/andchri/APZSL-clean/zst-models/CUB_clip'))
78 | predicted_classifiers.eval()
79 |
80 | # Append predicted classifier to Resnet
81 | resnet.fc.weight = nn.Parameter(torch.cat((resnet.fc.weight, predicted_classifiers.fc.weight)))
82 | resnet.fc.bias = nn.Parameter(torch.cat((resnet.fc.bias, predicted_classifiers.fc.bias)))
83 |
84 | # Create ImageNet dataloader
85 | valdir = os.path.join(args.data, 'val')
86 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
87 | std=[0.229, 0.224, 0.225])
88 |
89 | val_dataset = datasets.ImageFolder(
90 | valdir,
91 | transforms.Compose([
92 | transforms.Resize(256),
93 | transforms.CenterCrop(224),
94 | transforms.ToTensor(),
95 | normalize,
96 | ]))
97 |
98 | def validate(val_loader, model, criterion, args):
99 | def run_validate(loader, base_progress=0):
100 | with torch.no_grad():
101 | end = time.time()
102 | for i, (images, target) in enumerate(loader):
103 | i = base_progress + i
104 | if args.gpu is not None and torch.cuda.is_available():
105 | images = images.cuda(args.gpu, non_blocking=True)
106 | if torch.backends.mps.is_available():
107 | images = images.to('mps')
108 | target = target.to('mps')
109 | if torch.cuda.is_available():
110 | target = target.cuda(args.gpu, non_blocking=True)
111 |
112 | # compute output
113 | output = model(images)
114 | loss = criterion(output, target)
115 |
116 | # measure accuracy and record loss
117 | acc1, acc5 = accuracy(output, target, topk=(1, 5))
118 | losses.update(loss.item(), images.size(0))
119 | top1.update(acc1[0], images.size(0))
120 | top5.update(acc5[0], images.size(0))
121 |
122 | # measure elapsed time
123 | batch_time.update(time.time() - end)
124 | end = time.time()
125 |
126 | if i % args.print_freq == 0:
127 | progress.display(i + 1)
128 |
129 | batch_time = AverageMeter('Time', ':6.3f', Summary.NONE)
130 | losses = AverageMeter('Loss', ':.4e', Summary.NONE)
131 | top1 = AverageMeter('Acc@1', ':6.2f', Summary.AVERAGE)
132 | top5 = AverageMeter('Acc@5', ':6.2f', Summary.AVERAGE)
133 | progress = ProgressMeter(
134 | len(val_loader) + (args.distributed and (len(val_loader.sampler) * args.world_size < len(val_loader.dataset))),
135 | [batch_time, losses, top1, top5],
136 | prefix='Test: ')
137 |
138 | # switch to evaluate mode
139 | model.eval()
140 |
141 | run_validate(val_loader)
142 | if args.distributed:
143 | top1.all_reduce()
144 | top5.all_reduce()
145 |
146 | if args.distributed and (len(val_loader.sampler) * args.world_size < len(val_loader.dataset)):
147 | aux_val_dataset = Subset(val_loader.dataset,
148 | range(len(val_loader.sampler) * args.world_size, len(val_loader.dataset)))
149 | aux_val_loader = torch.utils.data.DataLoader(
150 | aux_val_dataset, batch_size=args.batch_size, shuffle=False,
151 | num_workers=args.workers, pin_memory=True)
152 | run_validate(aux_val_loader, len(val_loader))
153 |
154 | progress.display_summary()
155 |
156 | return top1.avg
157 |
158 |
159 | acc1 = validate(val_loader=, model=resnet, criterion, args)
160 | print("Top 1 accuracy Validation set:", acc1)
161 |
162 | class Summary(Enum):
163 | NONE = 0
164 | AVERAGE = 1
165 | SUM = 2
166 | COUNT = 3
167 |
168 | class AverageMeter(object):
169 | """Computes and stores the average and current value"""
170 | def __init__(self, name, fmt=':f', summary_type=Summary.AVERAGE):
171 | self.name = name
172 | self.fmt = fmt
173 | self.summary_type = summary_type
174 | self.reset()
175 |
176 | def reset(self):
177 | self.val = 0
178 | self.avg = 0
179 | self.sum = 0
180 | self.count = 0
181 |
182 | def update(self, val, n=1):
183 | self.val = val
184 | self.sum += val * n
185 | self.count += n
186 | self.avg = self.sum / self.count
187 |
188 | def all_reduce(self):
189 | if torch.cuda.is_available():
190 | device = torch.device("cuda")
191 | elif torch.backends.mps.is_available():
192 | device = torch.device("mps")
193 | else:
194 | device = torch.device("cpu")
195 | total = torch.tensor([self.sum, self.count], dtype=torch.float32, device=device)
196 | dist.all_reduce(total, dist.ReduceOp.SUM, async_op=False)
197 | self.sum, self.count = total.tolist()
198 | self.avg = self.sum / self.count
199 |
200 | def __str__(self):
201 | fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
202 | return fmtstr.format(**self.__dict__)
203 |
204 | def summary(self):
205 | fmtstr = ''
206 | if self.summary_type is Summary.NONE:
207 | fmtstr = ''
208 | elif self.summary_type is Summary.AVERAGE:
209 | fmtstr = '{name} {avg:.3f}'
210 | elif self.summary_type is Summary.SUM:
211 | fmtstr = '{name} {sum:.3f}'
212 | elif self.summary_type is Summary.COUNT:
213 | fmtstr = '{name} {count:.3f}'
214 | else:
215 | raise ValueError('invalid summary type %r' % self.summary_type)
216 |
217 | return fmtstr.format(**self.__dict__)
218 |
--------------------------------------------------------------------------------
/utility/feature_extraction/feature_extract.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os
3 | import torch.optim as optim
4 | import torch.backends.cudnn as cudnn
5 | from torch.autograd import Variable
6 | import sys
7 | import random
8 | import numpy as np
9 | from tqdm import tqdm
10 | import torch
11 | import torch.nn as nn
12 | import torch.utils.data
13 | from utility.feature_extraction.extract_util import get_loader, prepare_attri_label, DATA_LOADER, map_label
14 | from opt import get_opt
15 | import torchvision
16 | import timm
17 | import scipy
18 |
19 | cudnn.benchmark = True
20 |
21 | opt = get_opt()
22 | # set random seed
23 | if opt.manualSeed is None:
24 | opt.manualSeed = random.randint(1, 10000)
25 | print("Random Seed: ", opt.manualSeed)
26 | random.seed(opt.manualSeed)
27 | torch.manual_seed(opt.manualSeed)
28 | np.random.seed(opt.manualSeed)
29 | if opt.cuda:
30 | torch.cuda.manual_seed_all(opt.manualSeed)
31 |
32 |
33 |
34 | def main():
35 | # load data
36 | data = DATA_LOADER(opt)
37 | opt.test_seen_label = data.test_seen_label # weird
38 |
39 | class_attribute = data.attribute
40 | attribute_zsl = prepare_attri_label(class_attribute, data.unseenclasses).cuda()
41 | attribute_seen = prepare_attri_label(class_attribute, data.seenclasses).cuda()
42 | attribute_gzsl = torch.transpose(class_attribute, 1, 0).cuda()
43 |
44 |
45 | # define test_classes
46 | if opt.image_type not in ['test_unseen_small_loc', 'test_unseen_loc', 'test_seen_loc']:
47 | try:
48 | sys.exit(0)
49 | except:
50 | print("choose the image_type in ImageFileList")
51 |
52 |
53 | # Dataloader for train, test, visual
54 | trainloader, testloader_unseen, testloader_seen, visloader = get_loader(opt, data)
55 |
56 | # define attribute groups
57 | if opt.dataset == 'CUB':
58 | # Change layer
59 | num_classes = 150
60 | elif opt.dataset == 'AWA2':
61 | # Change layer
62 | num_classes = 40
63 | elif opt.dataset == 'SUN':
64 | # Change layer
65 | num_classes = 645
66 |
67 | if 'vit' in opt.backbone:
68 | model = timm.create_model(opt.backbone,pretrained=True,num_classes=num_classes)
69 | if opt.save_features:
70 | model.head = nn.Identity()
71 | else:
72 | ####### load our network, any from here: https://pytorch.org/vision/0.11/models #######
73 | if opt.backbone == 'resnet101_old':
74 | model = torchvision.models.resnet101(weights=torchvision.models.ResNet101_Weights.IMAGENET1K_V1)
75 | else:
76 | if opt.resnet_path is not None:
77 | model = torchvision.models.__dict__[opt.backbone](pretrained=False)
78 | model.load_state_dict(torch.load(opt.resnet_path))
79 | else:
80 | model = torchvision.models.__dict__[opt.backbone](pretrained=True)
81 | model.fc = nn.Linear(opt.feature_size, num_classes)
82 | if opt.save_features:
83 | model.fc = nn.Identity()
84 | print(model)
85 |
86 |
87 |
88 | criterion = nn.CrossEntropyLoss()
89 |
90 | if torch.cuda.is_available():
91 | model.cuda()
92 |
93 | if opt.save_features:
94 | name=opt.dataset+'_'+opt.backbone+'_fix'+'.mat'
95 | model.eval()
96 | img_files = []
97 | features = []
98 | labels = []
99 | with torch.no_grad():
100 | loaders = [trainloader]
101 | for loader in loaders:
102 | for i, (batch_input, batch_target, impath) in enumerate(loader):
103 | input_v = Variable(batch_input)
104 | if opt.cuda:
105 | input_v = input_v.cuda()
106 | output = model(input_v).to('cpu')
107 | for j in range(len(batch_target)):
108 | img_files.append([np.array([impath[j].squeeze().replace(' ','')])])
109 | labels.append(np.array([batch_target[j].item()+1],dtype=np.int16))
110 | features.append(output[j].numpy())
111 | scipy.io.savemat(name, mdict={'image_files': img_files, 'features': features, 'labels': np.array(labels)})
112 |
113 | exit(0)
114 |
115 |
116 | print('Train and test...')
117 | for epoch in range(opt.nepoch):
118 | model.train()
119 | current_lr = opt.classifier_lr * (0.8 ** (epoch // 10))
120 | optimizer = optim.Adam(params=filter(lambda p: p.requires_grad, model.parameters()),
121 | lr=current_lr, betas=(opt.beta1, 0.999))
122 | # loss for print
123 | loss_log = {'ave_loss': 0}
124 |
125 | batch = len(trainloader)
126 | for i, (batch_input, batch_target, impath) in enumerate(trainloader):
127 | model.zero_grad()
128 | # map target labels
129 | batch_target = map_label(batch_target, data.seenclasses)
130 | input_v = Variable(batch_input)
131 | label_v = Variable(batch_target)
132 | if opt.cuda:
133 | input_v = input_v.cuda()
134 | label_v = label_v.cuda()
135 | output = model(input_v)
136 |
137 | loss = criterion(output, label_v)
138 | loss_log['ave_loss'] += loss.item()
139 | loss.backward()
140 | optimizer.step()
141 |
142 | print('\n[Epoch %d, Batch %5d] Train loss: %.3f '
143 | % (epoch+1, batch, loss_log['ave_loss'] / batch))
144 |
145 |
146 |
147 | if __name__ == '__main__':
148 | main()
--------------------------------------------------------------------------------
/utility/load_wordembeddings.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | import clip
4 | import os
5 |
6 | def prepare_vocab(opt, matcontent, zst_mode=False):
7 | vocab = []
8 | if zst_mode:
9 | dataset = opt.zstfrom
10 | else:
11 | dataset = opt.dataset
12 | for cls_name in matcontent['allclasses_names']:
13 | if dataset == 'CUB':
14 | vocab.append(cls_name[0][0][4:])
15 | elif dataset == 'SUN':
16 | vocab.append(cls_name[0][0])
17 | elif dataset == 'AWA2':
18 | vocab.append(cls_name[0][0].replace('+', '_'))
19 | else:
20 | raise NotImplementedError
21 |
22 | return vocab
23 |
24 | def prep_imagenet_vocab(imagenet_vocab):
25 | pruned_vocab = []
26 | for label in imagenet_vocab:
27 | first_label = label.split(",")[0]
28 | pruned_vocab.append(first_label)
29 | return pruned_vocab
30 |
31 | def get_clip_embeddings(opt, vocab):
32 | device = "cuda" if torch.cuda.is_available() else "cpu"
33 | model, preprocess = clip.load("RN101", device=device)
34 |
35 | clip_embeddings = []
36 | input_text = []
37 | #prompt = 'an image of a'
38 | prompt = 'a photo of a'
39 | templates80 = [
40 | 'a bad photo of a {}.',
41 | 'a photo of many {}.',
42 | 'a sculpture of a {}.',
43 | 'a photo of the hard to see {}.',
44 | 'a low resolution photo of the {}.',
45 | 'a rendering of a {}.',
46 | 'graffiti of a {}.',
47 | 'a bad photo of the {}.',
48 | 'a cropped photo of the {}.',
49 | 'a tattoo of a {}.',
50 | 'the embroidered {}.',
51 | 'a photo of a hard to see {}.',
52 | 'a bright photo of a {}.',
53 | 'a photo of a clean {}.',
54 | 'a photo of a dirty {}.',
55 | 'a dark photo of the {}.',
56 | 'a drawing of a {}.',
57 | 'a photo of my {}.',
58 | 'the plastic {}.',
59 | 'a photo of the cool {}.',
60 | 'a close-up photo of a {}.',
61 | 'a black and white photo of the {}.',
62 | 'a painting of the {}.',
63 | 'a painting of a {}.',
64 | 'a pixelated photo of the {}.',
65 | 'a sculpture of the {}.',
66 | 'a bright photo of the {}.',
67 | 'a cropped photo of a {}.',
68 | 'a plastic {}.',
69 | 'a photo of the dirty {}.',
70 | 'a jpeg corrupted photo of a {}.',
71 | 'a blurry photo of the {}.',
72 | 'a photo of the {}.',
73 | 'a good photo of the {}.',
74 | 'a rendering of the {}.',
75 | 'a {} in a video game.',
76 | 'a photo of one {}.',
77 | 'a doodle of a {}.',
78 | 'a close-up photo of the {}.',
79 | 'a photo of a {}.',
80 | 'the origami {}.',
81 | 'the {} in a video game.',
82 | 'a sketch of a {}.',
83 | 'a doodle of the {}.',
84 | 'a origami {}.',
85 | 'a low resolution photo of a {}.',
86 | 'the toy {}.',
87 | 'a rendition of the {}.',
88 | 'a photo of the clean {}.',
89 | 'a photo of a large {}.',
90 | 'a rendition of a {}.',
91 | 'a photo of a nice {}.',
92 | 'a photo of a weird {}.',
93 | 'a blurry photo of a {}.',
94 | 'a cartoon {}.',
95 | 'art of a {}.',
96 | 'a sketch of the {}.',
97 | 'a embroidered {}.',
98 | 'a pixelated photo of a {}.',
99 | 'itap of the {}.',
100 | 'a jpeg corrupted photo of the {}.',
101 | 'a good photo of a {}.',
102 | 'a plushie {}.',
103 | 'a photo of the nice {}.',
104 | 'a photo of the small {}.',
105 | 'a photo of the weird {}.',
106 | 'the cartoon {}.',
107 | 'art of the {}.',
108 | 'a drawing of the {}.',
109 | 'a photo of the large {}.',
110 | 'a black and white photo of a {}.',
111 | 'the plushie {}.',
112 | 'a dark photo of a {}.',
113 | 'itap of a {}.',
114 | 'graffiti of the {}.',
115 | 'a toy {}.',
116 | 'itap of my {}.',
117 | 'a photo of a cool {}.',
118 | 'a photo of a small {}.',
119 | 'a tattoo of the {}.',
120 | ]
121 |
122 | for word in vocab:
123 | word = word.replace('_', ' ')
124 | word = word.lower()
125 | if word[0] in ['a', 'e', 'i', 'o', 'A', 'E', 'I', 'O']:
126 | input_text.append(prompt + 'n ' + word)
127 | else:
128 | input_text.append(prompt + ' ' + word)
129 | text = clip.tokenize(input_text).to(device)
130 |
131 | with torch.no_grad():
132 | text_features = model.encode_text(text)
133 |
134 | embeddings = text_features / torch.norm(text_features.float(), dim=-1).unsqueeze(-1)
135 |
136 | return embeddings
137 |
--------------------------------------------------------------------------------
/utility/model_bases.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch
3 |
4 | class LINEAR(nn.Module):
5 | def __init__(self, input_dim, nclass, bias=True):
6 | super(LINEAR, self).__init__()
7 | self.fc = nn.Linear(input_dim, nclass, bias)
8 | def forward(self, x):
9 | o = self.fc(x)
10 | return o
11 |
12 | class LINEAR_TO_COS_SIM(nn.Module):
13 | def __init__(self, weights):
14 | super(LINEAR, self).__init__()
15 | self.weights = weights
16 | self.cos = nn.functional.cosine_similarity(dim=1)
17 | def forward(self, x):
18 | out = []
19 | for sample in x:
20 | temp = []
21 | for weight in self.weights:
22 | temp.append(self.cos(weight, sample))
23 | out.append(torch.stack(temp))
24 | o = torch.stack(out)
25 | return o
26 |
27 | class WEIGHT_PREDICTOR(nn.Module):
28 | def __init__(self, input_dim, embed_dim, output_dim, num_layers=3):
29 | super(WEIGHT_PREDICTOR, self).__init__()
30 | assert num_layers in [1, 2, 3]
31 | self.num_layers = num_layers
32 | if num_layers == 4:
33 | self.fc1 = nn.Linear(input_dim, embed_dim)
34 | self.fc2 = nn.Linear(embed_dim, embed_dim)
35 | self.fc3 = nn.Linear(embed_dim, embed_dim)
36 | self.fc4 = nn.Linear(embed_dim, output_dim)
37 | if num_layers == 3:
38 | self.fc1 = nn.Linear(input_dim, embed_dim)
39 | self.fc2 = nn.Linear(embed_dim, embed_dim)
40 | self.fc3 = nn.Linear(embed_dim, output_dim)
41 | elif num_layers == 2:
42 | self.fc1 = nn.Linear(input_dim, embed_dim)
43 | self.fc2 = nn.Linear(embed_dim, output_dim)
44 | else:
45 | self.fc1 = nn.Linear(input_dim, output_dim)
46 | self.relu = nn.ReLU()
47 | self.soft = nn.Softmax(dim=1)
48 |
49 | def forward(self, x):
50 | if self.num_layers == 4:
51 | o = self.fc4(self.relu(self.fc3(self.relu(self.fc2(self.relu(self.fc1(x)))))))
52 | elif self.num_layers == 3:
53 | o = self.fc3(self.relu(self.fc2(self.relu(self.fc1(x)))))
54 | elif self.num_layers == 2:
55 | o = self.fc2(self.relu(self.fc1(x)))
56 | else:
57 | o = self.fc1(x)
58 | return o
59 |
60 |
61 | class AUTOENCODER(nn.Module):
62 | def __init__(self, opt, input_dim, embed_dim, output_dim=None, num_layers=3, vae=False, bias=True):
63 | super(AUTOENCODER, self).__init__()
64 | self.opt = opt
65 | self.input_dim = input_dim
66 | self.output_dim = output_dim
67 | if output_dim is None:
68 | self.output_dim = input_dim
69 | if vae:
70 | self.embed_dim = [2 * embed_dim, embed_dim]
71 | else:
72 | self.embed_dim = [embed_dim, embed_dim]
73 | if num_layers == 2:
74 | self.encoder = nn.Sequential(
75 | nn.Linear(self.input_dim, self.embed_dim[0]),
76 | nn.ReLU(inplace=True) if not vae else nn.Identity(inplace=True)
77 | )
78 |
79 | self.decoder = nn.Sequential(
80 | nn.Linear(self.embed_dim[1], self.output_dim)
81 | )
82 | if num_layers == 3:
83 | self.encoder = nn.Sequential(
84 | nn.Linear(self.input_dim, self.embed_dim[0]),
85 | nn.ReLU(inplace=True) if not vae else nn.Identity(inplace=True)
86 | )
87 |
88 | self.decoder = nn.Sequential(
89 | nn.Linear(self.embed_dim[1], 1000),
90 | nn.ReLU(inplace=True),
91 | nn.Linear(1000, self.output_dim)
92 | )
93 | if num_layers == 4:
94 | self.encoder = nn.Sequential(
95 | nn.Linear(self.input_dim, self.embed_dim[0]),
96 | nn.ReLU(inplace=True),
97 | nn.Linear(self.embed_dim[0], self.embed_dim[0]),
98 | nn.ReLU(inplace=True) if not vae else nn.Identity(inplace=True)
99 | )
100 |
101 | self.decoder = nn.Sequential(
102 | nn.Linear(self.embed_dim[1], 1000),
103 | nn.ReLU(inplace=True),
104 | nn.Linear(1000, self.output_dim)
105 | )
106 |
107 | def encode(self, x):
108 | return self.encoder(x)
109 |
110 | def decode(self, x):
111 | return self.decoder(x)
112 |
113 | def forward(self, x):
114 | z = self.encode(x)
115 | return self.decoder(z)
116 |
117 |
118 | class ATT_AUTOENCODER(AUTOENCODER):
119 | def __init__(self, *args, **kwargs):
120 | super().__init__(*args, **kwargs)
121 | self.encoder = nn.Sequential(
122 | nn.Linear(self.input_dim, 1450),
123 | nn.ReLU(inplace=True),
124 | nn.Linear(1450, self.embed_dim),
125 | nn.ReLU(inplace=True)
126 | )
127 |
128 | self.decoder = nn.Sequential(
129 | nn.Linear(self.embed_dim, 660),
130 | nn.ReLU(inplace=True),
131 | nn.Linear(660, self.output_dim)
132 | )
133 |
134 | class WEIGHT_AUTOENCODER(AUTOENCODER):
135 | def __init__(self, *args, **kwargs):
136 | super().__init__(*args, **kwargs)
137 | self.encoder = nn.Sequential(
138 | nn.Linear(self.input_dim, 1560),
139 | nn.ReLU(inplace=True),
140 | nn.Linear(1560, self.embed_dim),
141 | nn.ReLU(inplace=True)
142 | )
143 |
144 | self.decoder = nn.Sequential(
145 | nn.Linear(self.embed_dim, 1660),
146 | nn.ReLU(inplace=True),
147 | nn.Linear(1660, self.output_dim)
148 | )
149 |
150 |
151 | class JOINT_AUTOENCODER(nn.Module):
152 | def __init__(self, opt, autoencoder1, autoencoder2):
153 | super(JOINT_AUTOENCODER, self).__init__()
154 | self.ae1 = autoencoder1
155 | self.ae2 = autoencoder2
156 |
157 | def encode1(self, x):
158 | return self.ae1.encode(x)
159 |
160 | def encode2(self, x):
161 | return self.ae2.encode(x)
162 |
163 | def decode1(self, x):
164 | return self.ae1.decode(x)
165 |
166 | def decode2(self, x):
167 | return self.ae2.decode(x)
168 |
169 | def forward(self, x):
170 | att_in, weight_in = x
171 | latent_att = self.encode1(att_in)
172 | latent_weight = self.encode2(weight_in)
173 |
174 | att_from_att = self.decode1(latent_att)
175 | att_from_weight = self.decode1(latent_weight)
176 | weight_from_weight = self.decode2(latent_weight)
177 | weight_from_att = self.decode2(latent_att)
178 |
179 | return att_from_att, att_from_weight, weight_from_weight, weight_from_att, latent_att, latent_weight
180 |
181 | def predict(self, x):
182 | # Given attributes, predict weights
183 | latent_att = self.encode1(x)
184 | return self.decode2(latent_att)
185 |
186 |
187 | class VAE(AUTOENCODER):
188 | def __init__(self, *args, **kwargs):
189 | super().__init__(vae=True, *args, **kwargs)
190 |
191 | def reparameterize(self, mu, logvar, noise=True):
192 | if noise:
193 | sigma = torch.exp(logvar)
194 | eps = torch.FloatTensor(logvar.size()[0], 1).normal_(0, 1)
195 | if self.opt.cuda:
196 | eps = eps.cuda()
197 | eps = eps.expand(sigma.size())
198 | return mu + sigma * eps
199 | else:
200 | return mu
201 |
202 | def encode(self, x):
203 | out = self.encoder(x)
204 | out = torch.split(out,out.shape[1]//2, dim=1)
205 | mu_batch, logvar_batch = out[0], out[1]
206 | kl_div = (0.5 * torch.sum(1 + logvar_batch - mu_batch.pow(2) - logvar_batch.exp()))
207 | return self.reparameterize(mu_batch, logvar_batch), kl_div, mu_batch, logvar_batch
208 |
209 | def decode(self, x):
210 | return self.decoder(x)
211 |
212 | def forward(self, x):
213 | z, kl_div, mu, logvar, = self.encode(x)
214 | return self.decoder(z), kl_div, mu, logvar
215 |
216 | class JOINT_VAE(JOINT_AUTOENCODER):
217 | def __init__(self, *args, **kwargs):
218 | super().__init__(*args, **kwargs)
219 |
220 | def forward(self, x):
221 | att_in, weight_in = x
222 | (z_att, kl_div_att, mu_att, logvar_att) = self.encode1(att_in)
223 | (z_weight, kl_div_weight, mu_weight, logvar_weight) = self.encode2(weight_in)
224 | att_from_att = self.decode1(z_att)
225 | att_from_weight = self.decode1(z_weight)
226 | weight_from_weight = self.decode2(z_weight)
227 | weight_from_att = self.decode2(z_att)
228 |
229 | return att_from_att, att_from_weight, weight_from_weight, weight_from_att, z_att, z_weight, kl_div_att, kl_div_weight, mu_att, logvar_att, mu_weight, logvar_weight
230 |
231 | def predict(self, x):
232 | # Given attributes, predict weights
233 | (z_att, kl_div_att, mu_att, logvar_att) = self.encode1(x)
234 | return self.decode2(z_att)
235 |
--------------------------------------------------------------------------------
/utility/plot_data_ablation.py:
--------------------------------------------------------------------------------
1 | import matplotlib as mpl
2 | import matplotlib.pyplot as plt
3 | import numpy as np
4 | from matplotlib.legend_handler import HandlerLine2D, HandlerTuple
5 |
6 | plt.rcParams.update({ "text.usetex":True, "font.family": "serif"})
7 |
8 | # CUB
9 | axx = [150/150*100, 125/150*100, 100/150*100, 75/150*100, 50/150*100, 37/150*100, 25/150*100, 12/150*100]
10 |
11 | fs = 14
12 | zslc = 'mediumblue' # 'chocolate'
13 | gzslc ='mediumblue'
14 | tickcl=12
15 |
16 |
17 | ### ZSL
18 | fig,ax = plt.subplots()
19 | fig.set_figheight(3)
20 | fig.set_figwidth(6)
21 |
22 | a1, = ax.plot(axx,
23 | [60.9, 57.4, 51.0, 44.5, 37.4, 31.5, 29.5, 14.3],
24 | color=zslc,
25 | marker="*",
26 | label="Full ICIS (+ include unseen)")
27 |
28 | b1, = ax.plot(axx,
29 | [60.2, 57.8, 50.4, 44.7, 34.8, 29.3, 22.8, 11.1],
30 | color=zslc,
31 | marker="x",
32 | linestyle="dashed",
33 | label="+ Cross-modal")
34 |
35 | c1, = ax.plot(axx,
36 | [58.1, 56.0, 48.7, 44.3, 24.4, 27.6, 21.0, 9.9],
37 | color=zslc,
38 | marker="d",
39 | linestyle="dashdot",
40 | label="+ Single-modal")
41 |
42 | d1, = ax.plot(axx,
43 | [54.1, 49.9, 42.8, 34.3, 5.5, 21.8, 20.2, 8.9],
44 | color=zslc,
45 | marker="s",
46 | linestyle=(0, (5, 10)),
47 | label="+ Cosine loss")
48 |
49 | e1, = ax.plot(axx,
50 | [41.5, 38.8, 32.6, 28.1, 5.2, 8.0, 13.0, 8.5],
51 | color=zslc,
52 | marker="o",
53 | linestyle="dotted",
54 | label="MLP base model")
55 |
56 | # set x-axis label
57 | ax.set_xlabel("\% of seen classes", fontsize = fs)
58 | # set y-axis label
59 | ax.set_ylim(bottom=0, top=65)
60 | ax.yaxis.label.set_color(zslc)
61 | ax.set_ylabel("I-ZSL, Acc\%",
62 | color=zslc,
63 | fontsize=fs)
64 | ax.tick_params(axis='y', colors=zslc,labelsize=tickcl)
65 | ax.set_xlim(left=axx[-1]+1, right=axx[0]+1)
66 | ax.tick_params(axis='x', labelsize=tickcl)
67 | plt.xticks(np.arange(10, 100, 10))
68 |
69 | l = plt.legend([e1, d1, c1, b1, a1], ['MLP base model', '+ Cosine loss', '+ Single-modal', '+ Cross-modal', 'ICIS (full)'],
70 | handlelength=3, borderpad=0.7, labelspacing=0.7, loc='lower right', fontsize=8) # 'upper left'
71 |
72 | #save the plot as a file
73 | fig.savefig('numsamples_ablation_zsl.pdf',
74 | format='pdf',
75 | dpi=1200,
76 | bbox_inches='tight')
77 |
78 | ### GZSL
79 | fig,ax = plt.subplots()
80 | fig.set_figheight(3)
81 | fig.set_figwidth(6)
82 | # make a plot
83 | a1, = ax.plot(axx,
84 | [56.7, 54.8, 50.4, 46.1, 38.4, 35.3, 29.5, 17.1],
85 | color=gzslc,
86 | marker="*",
87 | label="Full ICIS (+ include unseen)")
88 |
89 | b1, = ax.plot(axx,
90 | [56.0, 53.8, 47.7, 44.2, 30.9, 29.7, 23.2, 12.1],
91 | color=gzslc,
92 | marker="x",
93 | linestyle="dashed",
94 | label="+ Cross-modal")
95 |
96 | c1, = ax.plot(axx,
97 | [52.7, 51.6, 44.9, 42.6, 14.8, 25.4, 19.7, 10.1],
98 | color=gzslc,
99 | marker="d",
100 | linestyle="dashdot",
101 | label="+ Single-modal")
102 |
103 | d1, = ax.plot(axx,
104 | [50.9, 48.3, 42.8, 36.2, 0.0, 23.5, 23.3, 9.8],
105 | color=gzslc,
106 | marker="s",
107 | linestyle=(0, (5, 10)),
108 | label="+ Cosine loss")
109 |
110 | e1, = ax.plot(axx,
111 | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
112 | color=gzslc,
113 | marker="o",
114 | linestyle="dotted",
115 | label="MLP base model")
116 |
117 | # set x-axis label
118 | ax.set_xlabel("\% of seen classes", fontsize = fs)
119 | # set y-axis label
120 | ax.set_ylim(bottom=0, top=65)
121 | ax.yaxis.label.set_color(gzslc)
122 | ax.set_ylabel("I-GZSL, H",
123 | color=gzslc,
124 | fontsize=fs)
125 | ax.tick_params(axis='y', colors=gzslc,labelsize=tickcl)
126 | ax.set_xlim(left=axx[-1]+1, right=axx[0]+1)
127 | ax.tick_params(axis='x', labelsize=tickcl)
128 | plt.xticks(np.arange(10, 100, 10))
129 |
130 | l = plt.legend([e1, d1, c1, b1, a1], ['MLP base model', '+ Cosine loss', '+ Single-modal', '+ Cross-modal', 'ICIS (full)'],
131 | handlelength=3, borderpad=0.7, labelspacing=0.7, loc='lower right', fontsize=8)
132 |
133 | #save the plot as a file
134 | fig.savefig('numsamples_ablation_gzsl.pdf',
135 | format='pdf',
136 | dpi=1200,
137 | bbox_inches='tight')
138 |
--------------------------------------------------------------------------------
/utility/plot_prediction_bins.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import matplotlib.pyplot as plt
3 | from matplotlib.legend_handler import HandlerLine2D, HandlerTuple
4 | import torch
5 | import torch.nn as nn
6 | import numpy as np
7 | import scipy.io as sio
8 |
9 | plt.rcParams.update({ "text.usetex":True, "font.family": "serif"})
10 |
11 | def create_dict_foldernum_to_label(matcontent_splits):
12 | cls_names = matcontent_splits['allclasses_names']
13 | folder_to_label_dict = {}
14 | for n, name in enumerate(cls_names):
15 | folder_to_label_dict[name[0][0]] = n
16 | return folder_to_label_dict
17 |
18 | parser = argparse.ArgumentParser()
19 | parser.add_argument('--dataset', default='CUB', help='FLO')
20 | parser.add_argument('--validation', action='store_true', default=False, help='enable cross validation mode (Deprecated)')
21 | parser.add_argument('--dataroot', default='/home/shared/iccv-apzsl/Feature-Generation-datasets', help='path to dataset')
22 | parser.add_argument('--matdataset', default=True, help='Data in matlab format')
23 | parser.add_argument('--image_embedding', default='tf_finetune')
24 | parser.add_argument('--class_embedding', default='att')
25 |
26 | opt = parser.parse_args()
27 | opt.zst = False
28 | assert opt.baseline in ['SMO', 'ConSE']
29 |
30 | matcontent = sio.loadmat(opt.dataroot + "/" + opt.dataset + "/" + opt.image_embedding + ".mat")
31 | print("using the matcontent:", opt.dataroot + "/" + opt.dataset + "/" + opt.image_embedding + ".mat")
32 |
33 | feature = matcontent['features'].T
34 | if 'CUB_' in opt.image_embedding:
35 | feature = feature.T
36 | label = matcontent['labels'].astype(int).squeeze() - 1
37 | mat_path = opt.dataroot + "/" + opt.dataset + "/" + opt.class_embedding + "_splits.mat"
38 | matcontent = sio.loadmat(mat_path)
39 | print("using the matcontent:", mat_path)
40 |
41 | trainval_loc = matcontent['trainval_loc'].squeeze() - 1
42 | train_loc = matcontent['train_loc'].squeeze() - 1
43 | val_unseen_loc = matcontent['val_loc'].squeeze() - 1
44 | test_seen_loc = matcontent['test_seen_loc'].squeeze() - 1
45 | test_unseen_loc = matcontent['test_unseen_loc'].squeeze() - 1
46 |
47 | attribute = torch.from_numpy(matcontent['att'].T).float()
48 |
49 | folder_to_label_dict = create_dict_foldernum_to_label(matcontent)
50 | label_to_folder_dict = {v: k for k, v in folder_to_label_dict.items()}
51 |
52 | train_feature = torch.from_numpy(feature[trainval_loc]).float()
53 | train_label = torch.from_numpy(label[trainval_loc]).long()
54 | test_unseen_feature = torch.from_numpy(feature[test_unseen_loc]).float()
55 | test_unseen_label = torch.from_numpy(label[test_unseen_loc]).long()
56 | test_seen_feature = torch.from_numpy(feature[test_seen_loc]).float()
57 | test_seen_label = torch.from_numpy(label[test_seen_loc]).long()
58 |
59 | seenclasses = torch.from_numpy(np.unique(train_label.numpy()))
60 | unseenclasses = torch.from_numpy(np.unique(test_unseen_label.numpy()))
61 | ntrain = train_feature.size()[0]
62 | ntrain_class = seenclasses.size(0)
63 | ntest_class = unseenclasses.size(0)
64 | train_class = seenclasses.clone()
65 | allclasses = torch.arange(0, ntrain_class+ntest_class).long()
66 |
67 | per_cls_acc_gzsl = torch.load('nounseen_percls_acc_CUBtf_finetune_len_test_4731_len_tar_200.pt')
68 | per_cls_acc_unseen_zsl = torch.load('nounseen_percls_acc_CUBtf_finetune_len_test_2967_len_tar_50.pt')
69 | pred_matrix_gzsl = torch.load('nounseen_pred_matrixCUBtf_finetune_len_test_4731_len_tar_200.pt')
70 | pred_matrix_unseen_zsl = torch.load('nounseen_pred_matrixCUBtf_finetune_len_test_2967_len_tar_50.pt')
71 |
72 | # GZSL plot
73 | min_acc, min_idx = torch.min(per_cls_acc_gzsl[-len(unseenclasses):], dim=0)
74 | min_idx = min_idx + len(seenclasses)
75 | low_row = pred_matrix_gzsl[min_idx, :]
76 | low_confuse = low_row.nonzero().squeeze()
77 | low_confuse = low_confuse[low_confuse != min_idx]
78 | labels = torch.cat((seenclasses, unseenclasses))
79 | low_confuse_labels = []
80 | for idx in low_confuse:
81 | low_confuse_labels.append(labels[idx].numpy())
82 | low_confuse_labels = np.array(low_confuse_labels)
83 |
84 | low_acc_cls = str(label_to_folder_dict[int(labels[min_idx].numpy())])[4:].replace("_", " ")
85 | print("Ours, GZSL, Low acc class:", low_acc_cls, "with probability:", min_acc)
86 | for label in low_confuse_labels:
87 | print(label_to_folder_dict[int(label)][4:].replace("_", " "))
88 |
89 | # Ordering based on attribute cosine similarity
90 | min_att = attribute[labels[min_idx]]
91 | cosine_sims = []
92 | cos = nn.CosineSimilarity(dim=0, eps=1e-8)
93 | attributes = torch.cat((attribute[seenclasses], attribute[unseenclasses]))
94 | for att in attributes:
95 | cosine_sims.append(cos(min_att, att))
96 | cosine_sims = torch.stack(cosine_sims)
97 | cosine_sims_ordered, cosine_sim_indeces = torch.topk(cosine_sims, k=len(attributes))
98 |
99 | ordered_probs = low_row[cosine_sim_indeces]
100 | ordered_labels = labels[cosine_sim_indeces]
101 | ordered_cls_names = []
102 | bar_colors = []
103 | line_styles = []
104 | for label in ordered_labels:
105 | cls_name = label_to_folder_dict[int(label)][4:].replace("_", " ")
106 | if label in unseenclasses:
107 | bar_colors.append('mediumblue')
108 | line_styles.append('--')
109 | else:
110 | bar_colors.append('chocolate')
111 | line_styles.append('-')
112 | ordered_cls_names.append(cls_name)
113 |
114 | ordered_cls_names = np.array(ordered_cls_names)
115 |
116 | # Bar plot of bins of similar classes (course)
117 | fig = plt.figure(figsize = (10, 5))
118 | bin_size = 10
119 | bin_labels = [f'Rank {int(x-bin_size) + 1} to {int(x)}' for x in bin_size * np.arange(start=1, stop=len(ordered_probs)/bin_size+1)]
120 | binned_probs = np.squeeze(np.sum(np.reshape(ordered_probs.numpy(), (len(ordered_probs)//bin_size, bin_size)), axis=1))
121 | barlist = plt.bar(bin_labels, binned_probs, color ='mediumblue',
122 | width = 0.4)
123 |
124 | plt.xlabel("CUB classes ordered by similarity with " + low_acc_cls, fontsize = 16)
125 | plt.xticks(rotation=45, ha='right')
126 |
127 | plt.ylabel("Fraction of predictions", fontsize = 16)
128 | plt.savefig('overview_low_acc_class.png',
129 | format='png',
130 | dpi=1600,
131 | bbox_inches='tight')
132 |
133 |
134 | # Bar plot of n most similar classes (finegrained)
135 | fig = plt.figure(figsize = (10, 5))
136 | nonzero_idxs = np.array(ordered_probs.nonzero().squeeze())
137 | barlist = plt.bar(ordered_cls_names[nonzero_idxs], ordered_probs.numpy()[nonzero_idxs], color ='mediumblue', width = 0.4)
138 |
139 | bar_colors = np.array(bar_colors)[nonzero_idxs]
140 | line_styles = np.array(line_styles)[nonzero_idxs]
141 | for n, ls in enumerate(line_styles):
142 | if ls == '--':
143 | barlist[n].set_color('w')
144 | barlist[n].set_linewidth(4)
145 | barlist[n].set_linestyle(ls)
146 | barlist[n].set_edgecolor('mediumblue')
147 |
148 | plt.xlabel("Classes ordered by similarity to " + low_acc_cls, fontsize = 16)
149 | plt.xticks(rotation=45, ha='right')
150 |
151 | colors = {'Seen class':'mediumblue', 'Unseen class':'w'}
152 | linestyles = {'Seen class':'-', 'Unseen class':'--'}
153 | edgecolors = {'Seen class':'mediumblue', 'Unseen class':'mediumblue'}
154 | labels = list(colors.keys())
155 | handles = [plt.Rectangle((0,0),1,1, facecolor=colors[label], linewidth=1, linestyle=linestyles[label], edgecolor=edgecolors[label]) for label in labels]
156 | plt.legend(handles, labels)
157 |
158 | plt.ylabel("Fraction of predictions", fontsize = 16)
159 | plt.savefig('low_acc_class.png',
160 | format='png',
161 | dpi=1600,
162 | bbox_inches='tight')
163 |
--------------------------------------------------------------------------------
/utility/train_base.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.autograd import Variable
4 | import torch.optim as optim
5 | import utility.util as util
6 | import os
7 | import copy
8 | import torchvision
9 | import utility.model_bases as model
10 | from regressor import REGRESSOR
11 |
12 | class BASECLASSIFIER(REGRESSOR):
13 | def __init__(self, _train_X, _train_Y, data_loader, _nclass, _cuda, seedinfo, train_base=True, _lr=0.001, _beta1=0.5, _nepoch=20, _batch_size=100, _embed_dim=1000, _num_layers=3, opt=None):
14 | super().__init__(_train_X, _train_Y, data_loader, _nclass, _cuda, seedinfo, train_base, _lr, _beta1, _nepoch, _batch_size, _embed_dim, _num_layers, opt)
15 | self.opt = opt
16 | self.train_base = train_base
17 |
18 | self.nepoch = _nepoch
19 | self.model = model.LINEAR(self.input_dim, len(self.seenclasses))
20 | self.model.apply(util.weights_init)
21 | self.criterion = nn.CrossEntropyLoss()
22 | self.optimizer_classifier = optim.Adam(self.model.parameters(), lr=self.opt.classifier_lr, betas=(self.opt.classifier_beta1, 0.999))
23 | self.input = torch.FloatTensor(_batch_size, self.input_dim)
24 | self.label = torch.LongTensor(_batch_size)
25 |
26 | if self.cuda:
27 | self.model.cuda()
28 | self.criterion.cuda()
29 | self.input = self.input.cuda()
30 | self.label = self.label.cuda()
31 |
32 | self.index_in_epoch = 0
33 | self.epochs_completed = 0
34 | self.ntrain = self.train_X.size()[0]
35 |
36 | def fit(self):
37 | best_H = 0
38 | best_seen = 0
39 | best_unseen = 0
40 |
41 | if self.train_base:
42 | for epoch in range(self.nepoch):
43 | for i in range(0, self.ntrain, self.batch_size):
44 | self.model.zero_grad()
45 | batch_input, batch_label = self.next_batch(self.batch_size)
46 | self.input.copy_(batch_input)
47 | self.label.copy_(batch_label)
48 |
49 | inputv = Variable(self.input)
50 | labelv = Variable(self.label)
51 | output = self.model(inputv)
52 | loss = self.criterion(output, labelv)
53 | loss.backward()
54 | self.optimizer_classifier.step()
55 |
56 | acc_val_seen = 0
57 | acc_train = self.val_model(self.model, self.train_X, self.train_Y, util.map_label(self.seenclasses, self.seenclasses))
58 | acc_val_seen = self.val_model(self.model, self.test_seen_feature, util.map_label(self.test_seen_label, self.seenclasses), util.map_label(self.seenclasses, self.seenclasses))
59 | if acc_val_seen > best_seen:
60 | print(f'New best validation seen class accuracy={acc_val_seen*100:.4f}% (train seen class accuracy={acc_train*100:.4f}%)')
61 | best_seen = acc_val_seen
62 | best_model = copy.deepcopy(self.model)
63 | else:
64 | best_model = torch.load(self.opt.rootpath + '/models/base-classifiers/' + self.opt.dataset + self.opt.image_embedding + f'_seed{self.seedinfo}_clr{self.opt.classifier_lr}_nep{self.nepoch}')
65 |
66 | return best_model
67 |
68 | def next_batch(self, batch_size):
69 | start = self.index_in_epoch
70 | # shuffle the data at the first epoch
71 | if self.epochs_completed == 0 and start == 0:
72 | perm = torch.randperm(self.ntrain)
73 | self.train_X = self.train_X[perm]
74 | self.train_Y = self.train_Y[perm]
75 | # the last batch
76 | if start + batch_size > self.ntrain:
77 | self.epochs_completed += 1
78 | rest_num_examples = self.ntrain - start
79 | if rest_num_examples > 0:
80 | X_rest_part = self.train_X[start:self.ntrain]
81 | Y_rest_part = self.train_Y[start:self.ntrain]
82 | # shuffle the data
83 | perm = torch.randperm(self.ntrain)
84 | self.train_X = self.train_X[perm]
85 | self.train_Y = self.train_Y[perm]
86 | # start next epoch
87 | start = 0
88 | self.index_in_epoch = batch_size - rest_num_examples
89 | end = self.index_in_epoch
90 | X_new_part = self.train_X[start:end]
91 | Y_new_part = self.train_Y[start:end]
92 | if rest_num_examples > 0:
93 | return torch.cat((X_rest_part, X_new_part), 0) , torch.cat((Y_rest_part, Y_new_part), 0)
94 | else:
95 | return X_new_part, Y_new_part
96 | else:
97 | self.index_in_epoch += batch_size
98 | end = self.index_in_epoch
99 | # from index start to index end-1
100 | return self.train_X[start:end], self.train_Y[start:end]
101 |
--------------------------------------------------------------------------------