├── .gitignore
├── README.md
├── constants.py
├── datasets.py
├── demo.py
├── evaluate.py
├── methods.py
├── metrics.py
├── oracles
    ├── all.csv
    └── controlled.csv
├── requirements.txt
└── utils.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__/
 2 | .ipynb_checkpoints/
 3 | 
 4 | *.pyd
 5 | *.pyc
 6 | 
 7 | cache/
 8 | data/
 9 | models/
10 | plots/
11 | results/
12 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PARC for Scalable Diverse Model Selection
 2 | 
 3 | Here, we present a set of benchmarks for Scalable Diverse Model Selection.
 4 | We also include our method, PARC, as a good baseline on this benchmark.
 5 | 
 6 | This is the code for our NeurIPS 2021 paper available [here](https://arxiv.org/abs/2111.06977).
 7 | 
 8 | 
 9 | ## Installing the Benchmark
10 | To install, first clone this repo:
11 | ```
12 | git clone https://github.com/dbolya/parc.git
13 | cd parc
14 | ```
15 | 
16 | Make sure your python version is at least 3.7.
17 | Then install the requirements:
18 | ```
19 | pip install -r requirements.txt
20 | ```
21 | 
22 | Finally, download the cached probe sets from here:   [500 Image Probe Sets](https://www.dropbox.com/s/l08n4ejuip2b1h6/probes.zip?dl=0)
23 | 
24 | Then extract the probes into `./cache/` (or symlink it there):
25 | ```
26 | unzip probes.zip -d ./cache/
27 | ```
28 | 
29 | Verify that the probe set exists
30 | ```
31 | ls ./cache/probes/fixed_budget_500 | head
32 | ```
33 | You should see a couple of probe sets `pkl` files.
34 | 
35 | And you're done! If you want to create your own probe sets see the `Advanced` section below.
36 | 
37 | 
38 | 
39 | ## Evaluation
40 | See `demo.py` for an example of how to perform evaluation:
41 | ```py
42 | from evaluate import Experiment
43 | from metrics import MetricEval
44 | 
45 | from methods import PARC, kNN
46 | 
47 | 
48 | # Set up the methods to use.
49 | # To define your own method, inherit methods.TransferabilityMethod. See the methods in methods.py for more details.
50 | my_methods = {
51 | 	'PARC f=32': PARC(n_dims=32),
52 | 	'1-NN CV'  : kNN(k=1)
53 | }
54 | 
55 | experiment = Experiment(my_methods, name='test', append=False) # Set up an experiment with those methods named "test".
56 |                                                                # Append=True skips evaluations that already happend. Setting it to False will overwrite.
57 | experiment.run()                                               # Run the experiment and save results to ./results/{name}.csv
58 | 
59 | metric = MetricEval(experiment.out_file)                       # Load the experiment file we just created with the default oracle
60 | metric.add_plasticity()                                        # Adds the "capacity to learn" heuristic defined in the paper
61 | mean, variance, _all = metric.aggregate()                      # Compute metrics and aggregate them
62 | 
63 | # Prints {'PARC f=32': 70.27800205353863, '1-NN CV': 68.01407390300884}. Same as Table 4 in the paper.
64 | print(mean)
65 | ```
66 | 
67 | 
68 | 
69 | If you wish to use the extended set of transfers (using crowd-sourced models), pass `model_bank='all'` to the experiment and pass `oracle_path='./oracles/all.csv'` when creating the metric evaluation object.
70 | 
71 | 
72 | 
73 | ## PARC
74 | If you wish to use PARC to recommend models for you, PARC is defined in `methods.py`. We don't have a well supported way for you to pass arbitrary data in yet, but as long as you pass in everything required in `TransferabilityMethod`, you should be fine.
75 | 
76 | 
77 | ## Advanced
78 | If you want the trained models, they are available here:
79 | [All Trained Models](https://www.dropbox.com/s/gk32wdqmf19lnmt/models.zip?dl=0). Note that this only includes the models we trained from scratch, not the crowd sourced models.
80 | 
81 | If you want to create the probe sets yourself, put / symlink the datasets as `./data/{dataset}/`. Then put the models above in `./models/`. This is not necessary if you use the pre-extracted probe sets instead.
82 | 
83 | 
84 | ## Citation
85 | If you used PARC, this benchmark, or this code your work, please cite:
86 | ```
87 | @inproceedings{parc-neurips2021,
88 |   author    = {Daniel Bolya and Rohit Mittapalli and Judy Hoffman},
89 |   title     = {Scalable Diverse Model Selection for Accessible Transfer Learning},
90 |   booktitle = {NeurIPS},
91 |   year      = {2021},
92 | }
93 | ```
94 | 


--------------------------------------------------------------------------------
/constants.py:
--------------------------------------------------------------------------------
  1 | variables = {
  2 | 	'Source Dataset' : [
  3 | 		'nabird',
  4 | 		'oxford_pets',
  5 | 		'cub200',
  6 | 		'caltech101',
  7 | 		'stanford_dogs',
  8 | 		'voc2007',
  9 | 		'cifar10',
 10 | 		'imagenet'
 11 | 	],
 12 | 
 13 | 	'Target Dataset' : [
 14 | 		'cifar10',
 15 | 		'oxford_pets',
 16 | 		'cub200',
 17 | 		'caltech101',
 18 | 		'stanford_dogs',
 19 | 		'nabird',
 20 | # 		'voc2007'  # Only some methods support multi-label
 21 | 	],
 22 | 
 23 | 	'Architecture': [
 24 | 		'resnet50',
 25 | 		'resnet18',
 26 | 		'googlenet',
 27 | 		'alexnet',
 28 | 	]
 29 | }
 30 | 
 31 | num_classes = {
 32 | 	'nabird': 555,
 33 | 	'oxford_pets': 37,
 34 | 	'cub200': 200,
 35 | 	'caltech101': 101,
 36 | 	'stanford_dogs': 120,
 37 | 	'voc2007': 21,
 38 | 	'cifar10': 10,
 39 | 	'imagenet': 1000
 40 | }
 41 | 
 42 | dataset_images = {
 43 |     'nabird': 24633,
 44 |     'oxford_pets': 3669,
 45 |     'cub200': 5794,
 46 |     'caltech101': 2630,
 47 |     'stanford_dogs': 8580,
 48 |     'voc2007': 2501,
 49 |     'cifar10': 50000,
 50 |     'imagenet': 150000
 51 | }
 52 | 
 53 | model_layers = {
 54 |   'resnet50': 50,
 55 |   'resnet18': 18,
 56 |   'googlenet': 22,
 57 |   'alexnet': 8,
 58 |   'resnet101': 101
 59 | }
 60 | 
 61 | model_banks = [
 62 | 	'controlled',
 63 | 	'all'
 64 | ]
 65 | 
 66 | external = {
 67 | 	('resnet101', 'faster-rcnn_c4_138204752'),
 68 | 	('resnet101', 'faster-rcnn_fpn_137851257'),
 69 | 	('resnet101', 'keypoint-rcnn_fpn_138363331'),
 70 | 	('resnet101', 'mask-rcnn_c4_138363239'),
 71 | 	('resnet101', 'mask-rcnn_fpn_138205316'),
 72 | 	('resnet101', 'mask-rcnn_fpn_lvis_144219035'),
 73 | 	('resnet101', 'panoptic-rcnn_fpn_139514519'),
 74 | 	('resnet101', 'panoptic-rcnn_fpn_139797668'),
 75 | 	('resnet101', 'retinanet_190397697'),
 76 | 	('resnet101', 'simclr_imagenet'),
 77 | 	('resnet50', 'clusterfit_imagenet'),
 78 | 	('resnet50', 'deepclusterv2_imagenet'),
 79 | 	('resnet50', 'faster-rcnn_c4_137257644'),
 80 | 	('resnet50', 'faster-rcnn_c4_137849393'),
 81 | 	('resnet50', 'faster-rcnn_c4_voc_142202221'),
 82 | 	('resnet50', 'faster-rcnn_fpn_137257794'),
 83 | 	('resnet50', 'faster-rcnn_fpn_137849458'),
 84 | 	('resnet50', 'jigsaw_imagenet22k'),
 85 | 	('resnet50', 'keypoint-rcnn_fpn_137261548'),
 86 | 	('resnet50', 'keypoint-rcnn_fpn_137849621'),
 87 | 	('resnet50', 'mask-rcnn_c4_137259246'),
 88 | 	('resnet50', 'mask-rcnn_c4_137849525'),
 89 | 	('resnet50', 'mask-rcnn_fpn_137260431'),
 90 | 	('resnet50', 'mask-rcnn_fpn_137849600'),
 91 | 	('resnet50', 'mask-rcnn_fpn_cityscapes_142423278'),
 92 | 	('resnet50', 'mask-rcnn_fpn_lvis_144219072'),
 93 | 	('resnet50', 'moco_imagenet'),
 94 | 	('resnet50', 'npid_imagenet'),
 95 | 	('resnet50', 'panoptic-rcnn_fpn_139514544'),
 96 | 	('resnet50', 'panoptic-rcnn_fpn_139514569'),
 97 | 	('resnet50', 'pirl_imagenet'),
 98 | 	('resnet50', 'retinanet_190397773'),
 99 | 	('resnet50', 'retinanet_190397829'),
100 | 	('resnet50', 'rotnet_imagenet22k'),
101 | 	('resnet50', 'semisup_instagram'),
102 | 	('resnet50', 'semisup_yfcc100m'),
103 | 	('resnet50', 'simclr_imagenet'),
104 | 	('resnet50', 'supervised_places205'),
105 | 	('resnet50', 'swav_imagenet'),
106 | }
107 | 


--------------------------------------------------------------------------------
/datasets.py:
--------------------------------------------------------------------------------
  1 | import datetime as dt
  2 | import logging
  3 | import os
  4 | import sys
  5 | import xml.etree.ElementTree as ET
  6 | from pathlib import Path
  7 | import pickle
  8 | from collections import defaultdict
  9 | import random
 10 | import glob
 11 | 
 12 | import numpy as np
 13 | import pandas as pd
 14 | import scipy.io
 15 | import torch
 16 | import torch.nn as nn
 17 | import torchvision.datasets as datasets
 18 | import torchvision.models as models
 19 | import torchvision.transforms as transforms
 20 | from PIL import Image
 21 | from torch.utils.data import Dataset
 22 | from torchvision.datasets.folder import default_loader
 23 | from torchvision.datasets.utils import download_url
 24 | from tqdm import tqdm
 25 | 
 26 | import constants
 27 | import utils
 28 | 
 29 | dataset_objs     = {}
 30 | test_transforms  = {}
 31 | train_transforms = {}
 32 | 
 33 | 
 34 | #####################
 35 | # CIFAR 10 Dataset
 36 | #####################
 37 | 
 38 | class CIFAR10_base(datasets.CIFAR10):
 39 | 	"""`CIFAR100 <https://www.cs.toronto.edu/~kriz/cifar.html>`_ Dataset.
 40 | 	This is a subclass of the `CIFAR10` Dataset.
 41 | 	"""
 42 | 	base_folder = 'cifar-10'
 43 | 	url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
 44 | 	filename = "cifar-10-python.tar.gz"
 45 | 	tgz_md5 = 'c58f30108f718f92721af3b95e74349a'
 46 | 	train_list = [
 47 | 		['data_batch_1', 'c99cafc152244af753f735de768cd75f'],
 48 | 		['data_batch_2', 'd4bba439e000b95fd0a9bffe97cbabec'],
 49 | 		['data_batch_3', '54ebc095f3ab1f0389bbae665268c751'],
 50 | 		['data_batch_4', '634d18415352ddfa80567beed471001a'],
 51 | 		['data_batch_5', '482c414d41f54cd18b22e5b47cb7c3cb'],
 52 | 	]
 53 | 
 54 | 	test_list = [
 55 | 		['test_batch', '40351d587109b95175f43aff81a1287e'],
 56 | 	]
 57 | 	meta = {
 58 | 		'filename': 'batches.meta',
 59 | 		'key': 'label_names',
 60 | 		'md5': '5ff9c542aee3614f3951f8cda6e48888',
 61 | 	}
 62 | 
 63 | class CIFAR10(Dataset):
 64 | 	def __init__(self, root, train, transform, download=False):
 65 | 		self.cifar10_base = CIFAR10_base(root=root,
 66 | 										train=train,
 67 | 										download=download,
 68 | 										transform=transform)
 69 | 		
 70 | 	def __getitem__(self, index):
 71 | 		data, target = self.cifar10_base[index]        
 72 | 		return data, target, index
 73 | 
 74 | 	def __len__(self):
 75 | 		return len(self.cifar10_base)
 76 | 
 77 | dataset_objs['cifar10'] = CIFAR10
 78 | 
 79 | train_transforms['cifar10'] = transforms.Compose([
 80 | 	transforms.Resize(224),
 81 | 	transforms.ToTensor(),
 82 | 	transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
 83 | ])
 84 | 
 85 | test_transforms['cifar10'] = transforms.Compose([
 86 | 	transforms.Resize(224),
 87 | 	transforms.ToTensor(),
 88 | 	transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
 89 | ])
 90 | 
 91 | ##################
 92 | # CUB 200 Dataset
 93 | ##################
 94 | 
 95 | class CUB2011(Dataset):
 96 | 	base_folder = 'CUB_200_2011/images'
 97 | 	url = 'http://www.vision.caltech.edu/visipedia-data/CUB-200-2011/CUB_200_2011.tgz'
 98 | 	filename = 'CUB_200_2011.tgz'
 99 | 	tgz_md5 = '97eceeb196236b17998738112f37df78'
100 | 
101 | 	def __init__(self, root, train=True, transform=None, loader=default_loader):
102 | 		self.root = os.path.expanduser(root)
103 | 		self.transform = transform
104 | 		self.loader = default_loader
105 | 		self.train = train
106 | 		self._load_metadata()
107 | 
108 | 	def _load_metadata(self):
109 | 		images = pd.read_csv(os.path.join(self.root, 'CUB_200_2011', 'images.txt'), sep=' ',
110 | 							 names=['img_id', 'filepath'])
111 | 		image_class_labels = pd.read_csv(os.path.join(self.root, 'CUB_200_2011', 'image_class_labels.txt'),
112 | 										 sep=' ', names=['img_id', 'target'])
113 | 		train_test_split = pd.read_csv(os.path.join(self.root, 'CUB_200_2011', 'train_test_split.txt'),
114 | 									   sep=' ', names=['img_id', 'is_training_img'])
115 | 
116 | 		data = images.merge(image_class_labels, on='img_id')
117 | 		self.data = data.merge(train_test_split, on='img_id')
118 | 		
119 | 		if self.train:
120 | 			self.data = self.data[self.data.is_training_img == 1]
121 | 		else:
122 | 			self.data = self.data[self.data.is_training_img == 0]
123 | 
124 | 	def __len__(self):
125 | 		return len(self.data)
126 | 
127 | 	def __getitem__(self, idx):
128 | 		sample = self.data.iloc[idx]
129 | 		path = os.path.join(self.root, self.base_folder, sample.filepath)
130 | 		target = sample.target - 1  # Targets start at 1 by default, so shift to 0
131 | 		img = self.loader(path)
132 | 
133 | 		if self.transform is not None:
134 | 			img = self.transform(img)
135 | 
136 | 		return img, target, idx
137 | 
138 | dataset_objs['cub200'] = CUB2011
139 | 
140 | train_transforms['cub200'] = transforms.Compose([
141 | 	transforms.Resize(256),
142 | 	transforms.RandomResizedCrop(224),
143 | 	transforms.ToTensor(),
144 | 	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
145 | ])
146 | 
147 | test_transforms['cub200'] = transforms.Compose([
148 | 	transforms.Resize(256),
149 | 	transforms.CenterCrop(224),
150 | 	transforms.ToTensor(),
151 | 	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
152 | ])
153 | 
154 | 
155 | 
156 | ####################
157 | # NA Bird Dataset
158 | ####################
159 | 
160 | class NABird(Dataset):
161 | 	base_folder = 'images'
162 | 	tgz_md5 = '97eceeb196236b17998738112f37df78'
163 | 
164 | 	def __init__(self, root, train=True, transform=None, loader=default_loader):
165 | 		self.root = os.path.expanduser(root)
166 | 		self.transform = transform
167 | 		self.loader = default_loader
168 | 		self.train = train
169 | 		self._load_metadata()
170 | 		
171 | 	def _load_metadata(self):
172 | 		images = pd.read_csv(os.path.join(self.root, 'images.txt'), sep=' ',
173 | 							 names=['img_id', 'filepath'])
174 | 		image_class_labels = pd.read_csv(os.path.join(os.path.join(self.root, 'nabird_image_class.txt')),
175 | 										 sep=' ', names=['img_id', 'target'])
176 | 		train_test_split = pd.read_csv(os.path.join(self.root, 'train_test_split.txt'),
177 | 									   sep=' ', names=['img_id', 'is_training_img'])
178 | 
179 | 		data = images.merge(image_class_labels, on='img_id')
180 | 		self.data = data.merge(train_test_split, on='img_id')
181 | 
182 | 		if self.train:
183 | 			self.data = self.data[self.data.is_training_img == 1]
184 | 		else:
185 | 			self.data = self.data[self.data.is_training_img == 0]
186 | 
187 | 	def __len__(self):
188 | 		return len(self.data)
189 | 
190 | 	def __getitem__(self, idx):
191 | 		sample = self.data.iloc[idx]
192 | 		path = os.path.join(self.root, self.base_folder, sample.filepath)
193 | 		target = sample.target - 1  # Targets start at 1 by default, so shift to 0
194 | 		img = self.loader(path)
195 | 
196 | 		if self.transform is not None:
197 | 			img = self.transform(img)
198 | 
199 | 		return img, target, idx
200 | 
201 | 
202 | dataset_objs['nabird'] = NABird
203 | 
204 | train_transforms['nabird'] = transforms.Compose([
205 | 	transforms.Resize(256),
206 | 	transforms.RandomResizedCrop(224),
207 | 	transforms.ToTensor(),
208 | 	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
209 | ])
210 | 
211 | test_transforms['nabird'] = transforms.Compose([
212 | 	transforms.Resize(256),
213 | 	transforms.CenterCrop(224),
214 | 	transforms.ToTensor(),
215 | 	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
216 | ])
217 | 
218 | 
219 | ####################
220 | # VOC2007 Dataset
221 | ####################
222 | 
223 | """
224 | adapted from Biagio Brattoli
225 | """
226 | 
227 | class VOC2007(Dataset):
228 | 	def __init__(self,data_path, train:bool, transform, loader=default_loader, random_crops=0):
229 | 		self.data_path = data_path
230 | 		self.transform = transform
231 | 		self.random_crops = random_crops
232 | 		self.trainval = 'train' if train else 'test'
233 | 		self.loader = default_loader
234 | 		self.__init_classes()
235 | 		self.names, self.labels = self.__dataset_info()
236 | 	
237 | 	def __getitem__(self, index):
238 | 		path = self.data_path + '/JPEGImages/'+self.names[index] + '.jpg'
239 | 		img = self.loader(path)
240 | 
241 | 		if self.transform is not None:
242 | 			img = self.transform(img)
243 | 
244 | 		y = self.labels[index]
245 | 		return img, y, index
246 | 	
247 | 	def __len__(self):
248 | 		return len(self.names)
249 | 	
250 | 	def __dataset_info(self):
251 | 		
252 | 		with open(self.data_path+'/ImageSets/Main/'+self.trainval+'.txt') as f:
253 | 			annotations = f.readlines()
254 | 		
255 | 		annotations = [n[:-1] for n in annotations]
256 | 		
257 | 		names  = []
258 | 		labels = []
259 | 		for af in annotations:
260 | 			if len(af)!=6:
261 | 				continue
262 | 			filename = os.path.join(self.data_path,'Annotations',af)
263 | 			tree = ET.parse(filename+'.xml')
264 | 			objs = tree.findall('object')
265 | 			num_objs = len(objs)
266 | 			
267 | 			boxes = np.zeros((num_objs, 4), dtype=np.uint16)
268 | 			boxes_cl = np.zeros((num_objs), dtype=np.int32)
269 | 			
270 | 			for ix, obj in enumerate(objs):               
271 | 				cls = self.class_to_ind[obj.find('name').text.lower().strip()]
272 | 				boxes_cl[ix] = cls
273 | 			
274 | 			lbl = np.zeros(self.num_classes)
275 | 			lbl[boxes_cl] = 1
276 | 			labels.append(lbl)
277 | 			names.append(af)
278 | 		
279 | 		return np.array(names), np.array(labels)#.astype(np.int_)
280 | 	
281 | 	def __init_classes(self):
282 | 		self.classes = ('__background__','aeroplane', 'bicycle', 'bird', 'boat',
283 | 						 'bottle', 'bus', 'car', 'cat', 'chair',
284 | 						 'cow', 'diningtable', 'dog', 'horse',
285 | 						 'motorbike', 'person', 'pottedplant',
286 | 						 'sheep', 'sofa', 'train', 'tvmonitor')
287 | 		self.num_classes  = len(self.classes)
288 | 		self.class_to_ind = dict(zip(self.classes, range(self.num_classes)))
289 | 
290 | dataset_objs['voc2007'] = VOC2007
291 | 
292 | train_transforms['voc2007'] = transforms.Compose([
293 | 	transforms.RandomResizedCrop(size=256),
294 | 	transforms.CenterCrop(size=224),
295 | 	transforms.ToTensor(),
296 | 	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
297 | ])
298 | 
299 | test_transforms['voc2007'] = transforms.Compose([
300 | 	transforms.Resize(size=256),
301 | 	transforms.CenterCrop(size=224),
302 | 	transforms.ToTensor(),
303 | 	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
304 | ])
305 | 
306 | ########################
307 | # Caltech 101 Dataset
308 | ########################
309 | 
310 | class Caltech101(Dataset):
311 | 	base_folder = '101_ObjectCategories'
312 | 	tgz_md5 = '97eceeb196236b17998738112f37df78'
313 | 
314 | 	def __init__(self, root, train=True, transform=None, loader=default_loader):
315 | 		self.root = os.path.expanduser(root)
316 | 		self.transform = transform
317 | 		self.loader = default_loader
318 | 		self.train = train
319 | 		self._load_metadata()
320 | 
321 | 	def _load_metadata(self):
322 | 		images = pd.read_csv(os.path.join(self.root, 'images.txt'), sep=' ',
323 | 							 names=['img_id', 'filepath'])
324 | 		image_class_labels = pd.read_csv(os.path.join(self.root, 'image_class_labels.txt'),
325 | 										 sep=' ', names=['img_id', 'target'])
326 | 		train_test_split = pd.read_csv(os.path.join(self.root, 'train_test_split.txt'),
327 | 									   sep=' ', names=['img_id', 'is_training_img'])
328 | 
329 | 		data = images.merge(image_class_labels, on='img_id')
330 | 		self.data = data.merge(train_test_split, on='img_id')
331 | 
332 | 		if self.train:
333 | 			self.data = self.data[self.data.is_training_img == 1]
334 | 		else:
335 | 			self.data = self.data[self.data.is_training_img == 0]
336 | 
337 | 	def __len__(self):
338 | 		return len(self.data)
339 | 
340 | 	def __getitem__(self, idx):
341 | 		sample = self.data.iloc[idx]
342 | 		path = os.path.join(self.root, self.base_folder, sample.filepath)
343 | 		target = sample.target - 1  # Targets start at 1 by default, so shift to 0
344 | 		img = self.loader(path)
345 | 
346 | 		if self.transform is not None:
347 | 			img = self.transform(img)
348 | 
349 | 		return img, target, idx
350 | 
351 | dataset_objs['caltech101'] = Caltech101
352 | 
353 | train_transforms['caltech101'] = transforms.Compose([
354 | 	transforms.RandomResizedCrop(size=256),
355 | 	transforms.CenterCrop(size=224),
356 | 	transforms.ToTensor(),
357 | 	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
358 | ])
359 | 
360 | test_transforms['caltech101'] = transforms.Compose([
361 | 	transforms.Resize(size=256),
362 | 	transforms.CenterCrop(size=224),
363 | 	transforms.ToTensor(),
364 | 	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
365 | ])
366 | 
367 | ##################
368 | # Stanford Dogs
369 | ##################
370 | 
371 | class StanfordDogs(Dataset):
372 | 	"""`Stanford Dogs <http://vision.stanford.edu/aditya86/ImageNetDogs/>`_ Dataset.
373 | 	Args:
374 | 		root (string): Root directory of dataset where directory
375 | 			``omniglot-py`` exists.
376 | 		transform (callable, optional): A function/transform that  takes in an PIL image
377 | 			and returns a transformed version. E.g, ``transforms.RandomCrop``
378 | 		download (bool, optional): If true, downloads the dataset tar files from the internet and
379 | 			puts it in root directory. If the tar files are already downloaded, they are not
380 | 			downloaded again.
381 | 	"""
382 | 	folder = 'StanfordDogs'
383 | 	download_url_prefix = 'http://vision.stanford.edu/aditya86/ImageNetDogs'
384 | 
385 | 	def __init__(self,
386 | 				 root,
387 | 				 train=True,
388 | 				 transform=None,
389 | 				 download=False):
390 | 
391 | 		self.root = os.path.join(os.path.expanduser(root), self.folder)
392 | 		self.train = train
393 | 		self.transform = transform
394 | 		if download:
395 | 			self.download()
396 | 
397 | 		split = self.load_split()
398 | 		self.images_folder = os.path.join(self.root, 'Images')
399 | 		
400 | 		self._breed_images = [(annotation+'.jpg', idx) for annotation, idx in split]
401 | 		self._flat_breed_images = self._breed_images
402 | 
403 | 	def __len__(self):
404 | 		return len(self._flat_breed_images)
405 | 
406 | 	def __getitem__(self, index):
407 | 		"""
408 | 		Args:
409 | 			index (int): Index
410 | 		Returns:
411 | 			tuple: (image, target) where target is index of the target character class.
412 | 		"""
413 | 		image_name, target_class = self._flat_breed_images[index]
414 | 		image_path = os.path.join(self.images_folder, image_name)
415 | 		image = Image.open(image_path).convert('RGB')
416 | 
417 | 		if self.transform:
418 | 			image = self.transform(image)
419 | 
420 | 		return image, target_class, index
421 | 
422 | 	def download(self):
423 | 		import tarfile
424 | 
425 | 		if os.path.exists(os.path.join(self.root, 'Images')) and os.path.exists(os.path.join(self.root, 'Annotation')):
426 | 			if len(os.listdir(os.path.join(self.root, 'Images'))) == len(os.listdir(os.path.join(self.root, 'Annotation'))) == 120:
427 | 				return
428 | 
429 | 		for filename in ['images', 'annotation', 'lists']:
430 | 			tar_filename = filename + '.tar'
431 | 			url = self.download_url_prefix + '/' + tar_filename
432 | 			download_url(url, self.root, tar_filename, None)
433 | 			print('Extracting downloaded file: ' + os.path.join(self.root, tar_filename))
434 | 			with tarfile.open(os.path.join(self.root, tar_filename), 'r') as tar_file:
435 | 				tar_file.extractall(self.root)
436 | 			os.remove(os.path.join(self.root, tar_filename))
437 | 
438 | 	def load_split(self):
439 | 		if self.train:
440 | 			split = scipy.io.loadmat(os.path.join(self.root, 'train_list.mat'))['annotation_list']
441 | 			labels = scipy.io.loadmat(os.path.join(self.root, 'train_list.mat'))['labels']
442 | 		else:
443 | 			split = scipy.io.loadmat(os.path.join(self.root, 'test_list.mat'))['annotation_list']
444 | 			labels = scipy.io.loadmat(os.path.join(self.root, 'test_list.mat'))['labels']
445 | 
446 | 		split = [item[0][0] for item in split]
447 | 		labels = [item[0]-1 for item in labels]
448 | 		return list(zip(split, labels))
449 | 
450 | dataset_objs['stanford_dogs'] = StanfordDogs
451 | 
452 | train_transforms['stanford_dogs'] = transforms.Compose([
453 | 	transforms.Resize(256),
454 | 	transforms.RandomResizedCrop(224),
455 | 	transforms.ToTensor(),
456 | 	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
457 | ])
458 | 
459 | test_transforms['stanford_dogs'] = transforms.Compose([
460 | 	transforms.Resize(256),
461 | 	transforms.CenterCrop(224),
462 | 	transforms.ToTensor(),
463 | 	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
464 | ])
465 | 
466 | 
467 | ##################
468 | # Oxford IIIT Dogs
469 | ##################
470 | 
471 | class OxfordPets(Dataset):
472 | 	"""`Oxford Pets <https://www.robots.ox.ac.uk/~vgg/data/pets/>`_ Dataset.
473 | 	Args:
474 | 		root (string): Root directory of dataset where directory
475 | 			``omniglot-py`` exists.
476 | 		transform (callable, optional): A function/transform that  takes in an PIL image
477 | 			and returns a transformed version. E.g, ``transforms.RandomCrop``
478 | 		download (bool, optional): If true, downloads the dataset tar files from the internet and
479 | 			puts it in root directory. If the tar files are already downloaded, they are not
480 | 			downloaded again.
481 | 	"""
482 | 	folder = 'oxford_pets'
483 | 
484 | 	def __init__(self,
485 | 				 root,
486 | 				 train=True,
487 | 				 transform=None,
488 | 				 loader=default_loader):
489 | 
490 | 		self.root = os.path.join(os.path.expanduser(root), self.folder)
491 | 		self.train = train
492 | 		self.transform = transform
493 | 		self.loader = loader
494 | 		self._load_metadata()
495 | 
496 | 	def __getitem__(self, idx):
497 | 
498 | 		sample = self.data.iloc[idx]
499 | 		path = os.path.join(self.root, 'images', sample.img_id) + '.jpg'
500 | 
501 | 		target = sample.class_id - 1  # Targets start at 1 by default, so shift to 0
502 | 		img = self.loader(path)
503 | 		if self.transform is not None:
504 | 			img = self.transform(img)
505 | 
506 | 		return img, target, idx
507 | 	
508 | 	def _load_metadata(self):
509 | 		if self.train:
510 | 			train_file = os.path.join(self.root, 'annotations', 'trainval.txt')
511 | 			self.data = pd.read_csv(train_file, sep=' ', names=['img_id', 'class_id', 'species', 'breed_id'])
512 | 		else:
513 | 			test_file = os.path.join(self.root, 'annotations', 'test.txt')
514 | 			self.data = pd.read_csv(test_file, sep=' ', names=['img_id', 'class_id', 'species', 'breed_id'])
515 | 
516 | 	def __len__(self):
517 | 		return len(self.data)
518 | 
519 | dataset_objs['oxford_pets'] = OxfordPets
520 | 
521 | train_transforms['oxford_pets'] = transforms.Compose([
522 | 	transforms.Resize(256),
523 | 	transforms.RandomResizedCrop(224),
524 | 	transforms.ToTensor(),
525 | 	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
526 | ])
527 | 
528 | test_transforms['oxford_pets'] = transforms.Compose([
529 | 	transforms.Resize(256),
530 | 	transforms.CenterCrop(224),
531 | 	transforms.ToTensor(),
532 | 	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
533 | ])
534 | 
535 | 
536 | ####################
537 | # Dataset Loader
538 | ####################
539 | 
540 | 
541 | 
542 | def construct_dataset(dataset:str, path:str, train:bool=False, **kwdargs) -> torch.utils.data.Dataset:
543 | 	# transform = (train_transforms[dataset] if train else test_transforms[dataset])
544 | 	transform = test_transforms[dataset] # Note: for training, use the above line. We're using the train set as the probe set, so use test transform
545 | 	return dataset_objs[dataset](path, train, transform=transform, **kwdargs)
546 | 
547 | def get_dataset_path(dataset:str) -> str:
548 | 	return f'./data/{dataset}/'
549 | 
550 | 
551 | class ClassMapCache:
552 | 	""" Constructs and stores a cache of which instances map to which classes for each datset. """
553 | 
554 | 	def __init__(self, dataset:str, train:bool):
555 | 		self.dataset = dataset
556 | 		self.train = train
557 | 
558 | 		if not os.path.exists(self.cache_path):
559 | 			self.construct_cache()
560 | 		else:
561 | 			with open(self.cache_path, 'rb') as f:
562 | 				self.idx_to_class, self.class_to_idx = pickle.load(f)
563 | 
564 | 
565 | 	def construct_cache(self):
566 | 		print(f'Constructing class map for {self.dataset}...')
567 | 		dataset    = construct_dataset(self.dataset, get_dataset_path(self.dataset), self.train)
568 | 		dataloader = torch.utils.data.DataLoader(dataset, 32, shuffle=False)
569 | 
570 | 		self.idx_to_class = []
571 | 		self.class_to_idx = defaultdict(list)
572 | 
573 | 		idx = 0
574 | 
575 | 		for batch in tqdm(dataloader):
576 | 			y = batch[1]
577 | 			single_class = (y.ndim == 1)
578 | 
579 | 			for _cls in y:
580 | 				if single_class:
581 | 					_cls = _cls.item()
582 | 				
583 | 				self.idx_to_class.append(_cls)
584 | 				
585 | 				if single_class:
586 | 					self.class_to_idx[_cls].append(idx)
587 | 				
588 | 				idx += 1
589 | 		
590 | 		self.class_to_idx = dict(self.class_to_idx)
591 | 
592 | 		utils.make_dirs(self.cache_path)
593 | 		with open(self.cache_path, 'wb') as f:
594 | 			pickle.dump((self.idx_to_class, self.class_to_idx), f)
595 | 
596 | 
597 | 
598 | 	@property
599 | 	def cache_path(self):
600 | 		return f'./cache/class_map/{self.dataset}_{"train" if self.train else "test"}.pkl'
601 | 
602 | 
603 | class DatasetCache(torch.utils.data.Dataset):
604 | 	""" Constructs and stores a cache for the dataset post-transform. """
605 | 
606 | 	def __init__(self, dataset:str, train:bool):
607 | 		self.dataset = dataset
608 | 		self.train = train
609 | 
610 | 		self.cache_folder = os.path.split(self.cache_path(0))[0]
611 | 		
612 | 		if not os.path.exists(self.cache_path(0)):
613 | 			os.makedirs(self.cache_folder, exist_ok=True)
614 | 			self.construct_cache()
615 | 		
616 | 		self.length = len(glob.glob(self.glob_path()))
617 | 		self.class_map = ClassMapCache(dataset, train)
618 | 		
619 | 		super().__init__()
620 | 
621 | 	def cache_path(self, idx:int) -> str:
622 | 		return f'./cache/datasets/{self.dataset}/{"train" if self.train else "test"}_{idx}.npy'
623 | 		
624 | 	def glob_path(self) -> str:
625 | 		return f'./cache/datasets/{self.dataset}/{"train" if self.train else "test"}_*'
626 | 
627 | 	def construct_cache(self):
628 | 		print(f'Constructing dataset cache for {self.dataset}...')
629 | 		dataset    = construct_dataset(self.dataset, get_dataset_path(self.dataset), self.train)
630 | 		dataloader = torch.utils.data.DataLoader(dataset, 32, shuffle=False)
631 | 
632 | 		idx = 0
633 | 
634 | 		for batch in tqdm(dataloader):
635 | 			x = batch[0]
636 | 			
637 | 			for i in range(x.shape[0]):
638 | 				np.save(self.cache_path(idx), x[i].numpy().astype(np.float16))
639 | 				idx += 1
640 | 	
641 | 	def __getitem__(self, idx:int) -> tuple:
642 | 		x = torch.from_numpy(np.load(self.cache_path(idx)).astype(np.float32))
643 | 		y = self.class_map.idx_to_class[idx]
644 | 		return x, y
645 | 
646 | 	def __len__(self):
647 | 		return self.length
648 | 
649 | 
650 | class BalancedClassSampler(torch.utils.data.DataLoader):
651 | 	""" Samples from a dataloader such that there's an equal number of instances per class. """
652 | 
653 | 	def __init__(self, dataset:str, batch_size:int, instances_per_class:int, train:bool=True, **kwdargs):
654 | 		num_classes = constants.num_classes[dataset]
655 | 		dataset_obj = DatasetCache(dataset, train)
656 | 		map_cache = ClassMapCache(dataset, train)
657 | 
658 | 		sampler_list = []
659 | 
660 | 		for _, v in map_cache.class_to_idx.items():
661 | 			random.shuffle(v)
662 | 		
663 | 		for _ in range(instances_per_class):
664 | 			for i in range(num_classes):
665 | 				if i in map_cache.class_to_idx:
666 | 					idx_list = map_cache.class_to_idx[i]
667 | 					
668 | 					if len(idx_list) > 0:
669 | 						sampler_list.append(idx_list.pop())
670 | 		
671 | 		super().__init__(dataset_obj, batch_size, sampler=sampler_list, **kwdargs)
672 | 
673 | 
674 | class FixedBudgetSampler(torch.utils.data.DataLoader):
675 | 	""" Samples from a dataloader such that there's a fixed number of samples. Classes are distributed evenly. """
676 | 
677 | 	def __init__(self, dataset:str, batch_size:int, probe_size:int, train:bool=True, min_instances_per_class:int=2, **kwdargs):
678 | 		num_classes = constants.num_classes[dataset]
679 | 		dataset_obj = DatasetCache(dataset, train)
680 | 		map_cache = ClassMapCache(dataset, train)
681 | 
682 | 		# VOC is multiclass so just sample a random subset
683 | 		if dataset == 'voc2007':
684 | 			samples = list(range(len(dataset_obj)))
685 | 			random.shuffle(samples)
686 | 			
687 | 			super().__init__(dataset_obj, batch_size, sampler=samples[:probe_size], **kwdargs)
688 | 			return
689 | 
690 | 		sampler_list = []
691 | 		last_len = None
692 | 
693 | 		for _, v in map_cache.class_to_idx.items():
694 | 			random.shuffle(v)
695 | 		
696 | 		class_indices = list(range(num_classes))
697 | 		class_indices = [i for i in class_indices if i in map_cache.class_to_idx] # Ensure that i exists
698 | 
699 | 		# Whether or not to subsample the classes to meet the min_instances and probe_size quotas 
700 | 		if num_classes * min_instances_per_class > probe_size:
701 | 			# Randomly shuffle the classes so if we need to subsample the classes, it's random.
702 | 			random.shuffle(class_indices)
703 | 			# Select a subset of the classes to evaluate on.
704 | 			class_indices = class_indices[:probe_size // min_instances_per_class]
705 | 		
706 | 		# Updated the list of samples (sampler_list) each iteration with 1 image for each class
707 | 		# We stop when we're finished or there's a class we didn't add an image for (i.e., out of images).
708 | 		while last_len != len(sampler_list) and len(sampler_list) < probe_size:
709 | 			# This is to ensure we don't infinitely loop if we run out of images
710 | 			last_len = len(sampler_list)
711 | 
712 | 			for i in class_indices:
713 | 				idx_list = map_cache.class_to_idx[i]
714 | 				
715 | 				# If we still have images left of this class
716 | 				if len(idx_list) > 0:
717 | 					# Add it to the list of samples
718 | 					sampler_list.append(idx_list.pop())
719 | 				
720 | 				if len(sampler_list) >= probe_size:
721 | 					break
722 | 		
723 | 		super().__init__(dataset_obj, batch_size, sampler=sampler_list, **kwdargs)
724 | 		
725 | 
726 | if __name__ == '__main__':
727 | 	FixedBudgetSampler('voc2007', 128, 500, train=True)
728 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
 1 | from evaluate import Experiment
 2 | from metrics import MetricEval
 3 | 
 4 | from methods import PARC, kNN
 5 | 
 6 | 
 7 | # Set up the methods to use.
 8 | # To define your own method, inherit methods.TransferabilityMethod. See the methods in methods.py for more details.
 9 | my_methods = {
10 | 	'PARC f=32': PARC(n_dims=32),
11 | 	'1-NN CV'  : kNN(k=1)
12 | }
13 | 
14 | experiment = Experiment(my_methods, name='test', append=False) # Set up an experiment with those methods named "test".
15 |                                                                # Append=True skips evaluations that already happend. Setting it to False will overwrite.
16 | experiment.run()                                               # Run the experiment and save results to ./results/{name}.csv
17 | 
18 | metric = MetricEval(experiment.out_file)                       # Load the experiment file we just created with the default oracle
19 | metric.add_plasticity()                                        # Adds the "capacity to learn" heuristic defined in the paper
20 | mean, variance, _all = metric.aggregate()                      # Compute metrics and aggregate them
21 | 
22 | # Prints {'PARC f=32': 70.27800205353863, '1-NN CV': 68.01407390300884}
23 | print(mean)
24 | 


--------------------------------------------------------------------------------
/evaluate.py:
--------------------------------------------------------------------------------
  1 | import torchvision
  2 | import torch
  3 | import torch.nn as nn
  4 | import numpy as np
  5 | from tqdm import tqdm
  6 | import os
  7 | import pickle
  8 | import itertools
  9 | import time
 10 | import gc
 11 | import csv
 12 | from collections import defaultdict
 13 | import argparse
 14 | from typing import Dict
 15 | 
 16 | from constants import variables, num_classes, external
 17 | from datasets import BalancedClassSampler, FixedBudgetSampler
 18 | import utils
 19 | from methods import TransferabilityMethod
 20 | 
 21 | 
 22 | class ClassBalancedExperimentParams:
 23 | 	""" Using a fixed number of instances per class. """
 24 | 
 25 | 	def __init__(self, instances_per_class:int):
 26 | 		self.instances_per_class = instances_per_class
 27 | 		self.experiment_name = f'class_balanced_{self.instances_per_class}'
 28 | 
 29 | 	def create_dataloader(self, dataset:str, batch_size:int, **kwdargs):
 30 | 		return BalancedClassSampler(dataset, batch_size=128, instances_per_class=self.instances_per_class, **kwdargs)
 31 | 		
 32 | 
 33 | class FixedBudgetExperimentParams:
 34 | 	""" Using a fixed budget probe size with classes distributed as evenly as possible. """
 35 | 
 36 | 	def __init__(self, budget:int):
 37 | 		self.budget = budget
 38 | 		self.experiment_name = f'fixed_budget_{self.budget}'
 39 | 
 40 | 	def create_dataloader(self, dataset:str, batch_size:int, **kwdargs):
 41 | 		return FixedBudgetSampler(dataset, batch_size=128, probe_size=self.budget, **kwdargs)
 42 | 
 43 | 
 44 | class Experiment:
 45 | 
 46 | 	"""
 47 | 	Runs the given methosd on each probe set and outputs score and timing information into ./results/.
 48 | 	To evaluate the results, see metrics.py.
 49 | 
 50 | 	Params:
 51 | 	 - methods: A dictionary of methods to use.
 52 | 	 - budget: The number of images in each probe set. Leave as default unless you want to extract your own probe sets.
 53 | 	 - runs: The number of different probes sampled per transfer. Leave as default unless you want to extract your own probe sets.
 54 | 	 - probe_only: If True, skips doing method computation and instead only extracts the probe sets.
 55 | 	 - model_bank: Which model bank to use. Options are: "controlled" (default) and "all" (includes crowd-sourced).
 56 | 	 - append: If false (default), the output file will be overwritten. Otherwise, it will resume from where it left off. When resuming, timing information will be lost.
 57 | 	 - name: The name of the experiment. Defaults to the name of the probe set.
 58 | 	"""
 59 | 
 60 | 	def __init__(self, methods:Dict[str, TransferabilityMethod], budget:int=500, runs:int=5, probe_only:bool=False, model_bank:str='controlled', append:bool=False, name:str=None):
 61 | 		self.params = FixedBudgetExperimentParams(budget)
 62 | 		self.runs = runs
 63 | 		self.probe_only = probe_only
 64 | 		self.model_bank = model_bank
 65 | 		self.name = name if name is not None else self.params.experiment_name
 66 | 		self.methods = methods
 67 | 
 68 | 		self.dataloaders = {}
 69 | 
 70 | 		key = ['Run', 'Architecture', 'Source Dataset', 'Target Dataset']
 71 | 		headers = key + list(self.methods.keys())
 72 | 
 73 | 		self.out_cache = utils.CSVCache(self.out_file, headers, key=key, append=append)
 74 | 
 75 | 		self.times = defaultdict(list)
 76 | 		
 77 | 	def cache_path(self, architecture:str, source_dataset:str, target_dataset:str, run:int):
 78 | 		return f'./cache/probes/{self.params.experiment_name}/{architecture}_{source_dataset}_{target_dataset}_{run}.pkl'
 79 | 
 80 | 	@property
 81 | 	def cur_cache_path(self):
 82 | 		return self.cache_path(self.architecture, self.source_dataset, self.target_dataset, self.run)
 83 | 
 84 | 	@property
 85 | 	def out_file(self):
 86 | 		return f'./results/{self.name}.csv'
 87 | 
 88 | 	@property
 89 | 	def timing_file(self):
 90 | 		return f'./results/{self.name}_timing.pkl'
 91 | 
 92 | 	def prep_model(self):
 93 | 		model = utils.load_source_model(self.architecture, self.source_dataset)
 94 | 		model.cuda()
 95 | 		model.eval()
 96 | 
 97 | 		def extract_feats(self, args):
 98 | 			x = args[0]
 99 | 			model._extracted_feats[x.get_device()] = x
100 | 
101 | 		for name, module in model.named_modules():
102 | 			if isinstance(module, nn.Linear):
103 | 				module.register_forward_pre_hook(extract_feats)
104 | 		
105 | 		return model
106 | 
107 | 	def probe(self):
108 | 		""" Returns (and creates if necessary) probe data for the current run. """
109 | 		cache_path = self.cur_cache_path
110 | 		
111 | 		if os.path.exists(cache_path):
112 | 			with open(cache_path, 'rb') as f:
113 | 				return pickle.load(f)
114 | 		
115 | 		if self.model == None:
116 | 			self.model = self.prep_model()
117 | 
118 | 		dataloader_key = (self.target_dataset, self.run)
119 | 
120 | 		if dataloader_key not in self.dataloaders:
121 | 			utils.seed_all(2020 + self.run * 3037)
122 | 			dataloader = self.params.create_dataloader(self.target_dataset, batch_size=128, train=True, pin_memory=True)
123 | 			self.dataloaders[dataloader_key] = dataloader
124 | 		dataloader = self.dataloaders[dataloader_key]
125 | 
126 | 		with torch.no_grad():
127 | 			all_y     = []
128 | 			all_feats = []
129 | 			all_probs = []
130 | 
131 | 			for x, y in dataloader:
132 | 				# Support for using multiple GPUs
133 | 				self.model._extracted_feats = [None] * torch.cuda.device_count()
134 | 
135 | 				x = x.cuda()
136 | 				preds = self.model(x)
137 | 
138 | 				all_y.append(y.cpu())
139 | 				all_probs.append(torch.nn.functional.softmax(preds, dim=-1).cpu())
140 | 				all_feats.append(torch.cat([x.cpu() for x in self.model._extracted_feats], dim=0))
141 | 
142 | 			all_y     = torch.cat(all_y    , dim=0).numpy()
143 | 			all_feats = torch.cat(all_feats, dim=0).numpy()
144 | 			all_probs = torch.cat(all_probs, dim=0).numpy()
145 | 
146 | 			params = {
147 | 				'features': all_feats,
148 | 				'probs': all_probs,
149 | 				'y': all_y,
150 | 				'source_dataset': self.source_dataset,
151 | 				'target_dataset': self.target_dataset,
152 | 				'architecture': self.architecture
153 | 			}
154 | 		
155 | 		utils.make_dirs(cache_path)
156 | 		with open(cache_path, 'wb') as f:
157 | 			pickle.dump(params, f)
158 | 		
159 | 		return params
160 | 
161 | 	def evaluate(self):
162 | 		params = self.probe()
163 | 
164 | 		if self.probe_only:
165 | 			return
166 | 
167 | 		if self.source_dataset == self.target_dataset:
168 | 			return
169 | 
170 | 		params['cache_path_fn'] = lambda architecture, source, target: self.cache_path(architecture, source, target, self.run)
171 | 		
172 | 		scores = [self.run, self.architecture, self.source_dataset, self.target_dataset]
173 | 
174 | 		for idx, (name, method) in enumerate(self.methods.items()):
175 | 			utils.seed_all(1010 + self.run * 2131)
176 | 			last_time = time.time()
177 | 			scores.append(method(**params))
178 | 			self.times[name].append(time.time() - last_time)
179 | 
180 | 		self.out_cache.write_row(scores)
181 | 
182 | 		
183 | 			
184 | 
185 | 	def run(self):
186 | 		""" Run the methods on the data and then saves it to out_path. """
187 | 		last_model = None
188 | 
189 | 		factors = [variables['Architecture'], variables['Source Dataset'], variables['Target Dataset'], list(range(self.runs))]
190 | 
191 | 		iter_obj = []		
192 | 
193 | 		if self.model_bank == 'all':
194 | 			for arch, source in external:
195 | 				for target in variables['Target Dataset']:
196 | 					for run in range(self.runs):
197 | 						iter_obj.append((arch, source, target, run))
198 | 
199 | 		iter_obj += list(itertools.product(*factors))
200 | 
201 | 		for arch, source, target, run in tqdm(iter_obj):
202 | 			# RSA requires source-source extraction, so keep this out
203 | 			# if source == target:
204 | 			# 	continue
205 | 
206 | 			if self.out_cache.exists(run, arch, source, target):
207 | 				continue
208 | 
209 | 			cur_model = (arch, source)
210 | 			if cur_model != last_model:
211 | 				self.model = None
212 | 
213 | 			self.architecture = arch
214 | 			self.source_dataset = source
215 | 			self.target_dataset = target
216 | 			self.run = run
217 | 
218 | 			self.evaluate()
219 | 
220 | 			gc.collect()
221 | 		
222 | 		for name, times in self.times.items():
223 | 			print(f'{name:20s}: {sum(times) / len(times): .3f}s average')
224 | 		
225 | 		with open(self.timing_file, 'wb') as f:
226 | 			pickle.dump(dict(self.times), f)
227 | 		
228 | 
229 | 		
230 | 
231 | 		
232 | 
233 | 
234 | if __name__ == '__main__':
235 | 
236 | 	parser = argparse.ArgumentParser()
237 | 	parser.add_argument('--budget'    , help='Number of image in the probe set. Default is 500.', default=500, type=int)
238 | 	parser.add_argument('--runs'      , help='Number of probe sets sampled per transfer. Default is 5.', default=5, type=int)
239 | 	parser.add_argument('--probe_only', help='Set this flag if you only want to generate probe sets.', action='store_true')
240 | 	parser.add_argument('--model_bank', help='Which model bank to use. Options are "controlled" and "all". Default is "controlled".', default='controlled', type=str)
241 | 	args = parser.parse_args()
242 | 
243 | 	Experiment(args.budget, runs=args.runs, probe_only=args.probe_only, model_bank=args.model_bank).run()
244 | 
245 | 
246 | 


--------------------------------------------------------------------------------
/methods.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pickle
  3 | import pandas as pd
  4 | 
  5 | import constants
  6 | import datasets
  7 | 
  8 | import scipy.stats
  9 | import sklearn.neighbors
 10 | import sklearn.linear_model
 11 | import sklearn.preprocessing
 12 | import sklearn.metrics
 13 | import sklearn.decomposition
 14 | 
 15 | # This is for Logistic so it doesn't complain that it didn't converge
 16 | import warnings
 17 | warnings.filterwarnings("ignore", category=sklearn.exceptions.ConvergenceWarning)
 18 | 
 19 | 
 20 | def split_data(data:np.ndarray, percent_train:float):
 21 | 	split = data.shape[0] - int(percent_train * data.shape[0])
 22 | 	return data[:split], data[split:]
 23 | 
 24 | class TransferabilityMethod:	
 25 | 	def __call__(self, 
 26 | 		features:np.ndarray, probs:np.ndarray, y:np.ndarray,
 27 | 		source_dataset:str, target_dataset:str, architecture:str,
 28 | 		cache_path_fn) -> float:
 29 | 		
 30 | 		self.features = features
 31 | 		self.probs = probs
 32 | 		self.y = y
 33 | 
 34 | 		self.source_dataset = source_dataset
 35 | 		self.target_dataset = target_dataset
 36 | 		self.architecture = architecture
 37 | 
 38 | 		self.cache_path_fn = cache_path_fn
 39 | 
 40 | 		# self.features = sklearn.preprocessing.StandardScaler().fit_transform(self.features)
 41 | 
 42 | 		return self.forward()
 43 | 
 44 | 	def forward(self) -> float:
 45 | 		raise NotImplementedError
 46 | 
 47 | 
 48 | 
 49 | 
 50 | def feature_reduce(features:np.ndarray, f:int=None) -> np.ndarray:
 51 | 	"""
 52 | 	Use PCA to reduce the dimensionality of the features.
 53 | 
 54 | 	If f is none, return the original features.
 55 | 	If f < features.shape[0], default f to be the shape.
 56 | 	"""
 57 | 	if f is None:
 58 | 		return features
 59 | 
 60 | 	if f > features.shape[0]:
 61 | 		f = features.shape[0]
 62 | 
 63 | 	return sklearn.decomposition.PCA(
 64 | 		n_components=f,
 65 | 		svd_solver='randomized',
 66 | 		random_state=1919,
 67 | 		iterated_power=1).fit_transform(features)
 68 | 
 69 | 
 70 | 
 71 | 
 72 | class LEEP(TransferabilityMethod):
 73 | 	"""
 74 | 	LEEP: https://arxiv.org/abs/2002.12462
 75 | 	
 76 | 	src ('probs', 'features') denotes what to use for leep.
 77 | 	
 78 | 	normalization ('l1', 'softmax'). The normalization strategy to get everything to sum to 1.
 79 | 	"""
 80 | 
 81 | 	def __init__(self, n_dims:int=None, src:str='probs', normalization:str=None, use_sigmoid:bool=False):
 82 | 		self.n_dims = n_dims
 83 | 		self.src = src
 84 | 		self.normalization = normalization
 85 | 		self.use_sigmoid = use_sigmoid
 86 | 
 87 | 	def forward(self) -> float:
 88 | 		theta = getattr(self, self.src)
 89 | 		y = self.y
 90 | 		
 91 | 		n = theta.shape[0]
 92 | 		n_y = constants.num_classes[self.target_dataset]
 93 | 
 94 | 		# n             : Number of target data images
 95 | 		# n_z           : Number of source classes
 96 | 		# n_y           : Number of target classes
 97 | 		# theta [n, n_z]: The source task probabilities on the target images
 98 | 		# y     [n]     : The target dataset label indices {0, ..., n_y-1} for each target image
 99 | 
100 | 		unnorm_prob_joint    = np.eye(n_y)[y, :].T @ theta                       # P(y, z): [n_y, n_z]
101 | 		unnorm_prob_marginal = theta.sum(axis=0)                                 # P(z)   : [n_z]
102 | 		prob_conditional     = unnorm_prob_joint / unnorm_prob_marginal[None, :] # P(y|z) : [n_y, n_z]
103 | 
104 | 		leep = np.log((prob_conditional[y] * theta).sum(axis=-1)).sum() / n      # Eq. 2
105 | 
106 | 		return leep
107 | 
108 | 
109 | class NegativeCrossEntropy(TransferabilityMethod):
110 | 	""" NCE: https://arxiv.org/pdf/1908.08142.pdf """
111 | 
112 | 	def forward(self, eps=1e-5) -> float:
113 | 		z = self.probs.argmax(axis=-1)
114 | 
115 | 		n = self.y.shape[0]
116 | 		n_y = constants.num_classes[self.target_dataset]
117 | 		n_z = constants.num_classes[self.source_dataset]
118 | 
119 | 		prob_joint    = (np.eye(n_y)[self.y, :].T @ np.eye(n_z)[z, :]) / n + eps
120 | 		prob_marginal = np.eye(n_z)[z, :].sum(axis=0) / n + eps
121 | 
122 | 		NCE = (prob_joint * np.log(prob_joint / prob_marginal[None, :])).sum()
123 | 
124 | 		return NCE
125 | 
126 | 		
127 | class HScore(TransferabilityMethod):
128 | 	""" HScore from https://ieeexplore.ieee.org/document/8803726 """
129 | 
130 | 	def __init__(self, n_dims:int=None, use_published_implementation:bool=False):
131 | 		self.use_published_implementation = use_published_implementation
132 | 		self.n_dims = n_dims
133 | 
134 | 	def getCov(self, X):
135 | 		X_mean= X - np.mean(X,axis=0,keepdims=True)
136 | 		cov = np.divide(np.dot(X_mean.T, X_mean), len(X)-1) 
137 | 		return cov
138 | 
139 | 	def getHscore(self, f,Z):
140 | 		Covf = self.getCov(f)
141 | 		g = np.zeros_like(f)
142 | 		for z in range(constants.num_classes[self.target_dataset]):
143 | 			idx = (Z == z)
144 | 			if idx.any():
145 | 				Ef_z=np.mean(f[idx, :], axis=0)
146 | 				g[idx]=Ef_z
147 | 		
148 | 		Covg=self.getCov(g)
149 | 		score=np.trace(np.dot(np.linalg.pinv(Covf,rcond=1e-15), Covg))
150 | 
151 | 		return score
152 | 
153 | 	def get_hscore_fast(self, eps=1e-8):
154 | 		# The original implementation of HScore isn't properly vectorized, so do that here
155 | 		cov_f = self.getCov(self.features)
156 | 		n_y = constants.num_classes[self.target_dataset]
157 | 
158 | 		# Vectorize the inner loop over each class
159 | 		one_hot_class = np.eye(n_y)[self.y, :]   # [#probe, #classes]
160 | 		class_counts = one_hot_class.sum(axis=0) # [#classes]
161 | 
162 | 		# Compute the mean feature per class
163 | 		mean_features = (one_hot_class.T @ self.features) / (class_counts[:, None] + eps) # [#classes, #features]
164 | 
165 | 		# Redistribute that into the original features' locations
166 | 		g = one_hot_class @ mean_features # [#probe, #features]
167 | 		cov_g = self.getCov(g)
168 | 		
169 | 		score = np.trace(np.linalg.pinv(cov_f, rcond=1e-15) @ cov_g)
170 | 
171 | 		return score
172 | 		
173 | 
174 | 	def forward(self):
175 | 		self.features = feature_reduce(self.features, self.n_dims)
176 | 
177 | 		scaler = sklearn.preprocessing.StandardScaler()
178 | 		self.features = scaler.fit_transform(self.features)
179 | 
180 | 		if self.use_published_implementation:
181 | 			return self.getHscore(self.features, self.y)
182 | 		else:
183 | 			return self.get_hscore_fast()
184 | 
185 | 
186 | 
187 | class kNN(TransferabilityMethod):
188 | 	"""
189 | 	k Nearest Neighbors with hold-one-out cross-validation.
190 | 
191 | 	Metric can be one of (euclidean, cosine, cityblock)
192 | 
193 | 	This method supports VOC2007.
194 | 	"""
195 | 
196 | 	def __init__(self, k:int=1, metric:str='l2', n_dims:int=None):
197 | 		self.k = k
198 | 		self.metric = metric
199 | 		self.n_dims = n_dims
200 | 	
201 | 	def forward(self) -> float:
202 | 		self.features = feature_reduce(self.features, self.n_dims)
203 | 
204 | 		dist = sklearn.metrics.pairwise_distances(self.features, metric=self.metric)
205 | 		idx = np.argsort(dist, axis=-1)
206 | 
207 | 		# After sorting, the first index will always be the same element (distance = 0), so choose the k after
208 | 		idx = idx[:, 1:self.k+1]
209 | 
210 | 		votes = self.y[idx]
211 | 		preds, counts = scipy.stats.mode(votes, axis=1)
212 | 
213 | 		n_data = self.features.shape[0]
214 | 
215 | 		preds = preds.reshape(n_data, -1)
216 | 		counts = counts.reshape(n_data, -1)
217 | 		votes = votes.reshape(n_data, -1)
218 | 
219 | 		preds = np.where(counts == 1, votes, preds)
220 | 
221 | 		return 100*(preds == self.y.reshape(n_data, -1)).mean()
222 | 		# return -np.abs(preds - self.y).sum(axis=-1).mean() # For object detection
223 | 
224 | class SplitkNN(TransferabilityMethod):
225 | 	""" k Nearest Neighbors using a train-val split using sklearn. Only supports l2 distance. """
226 | 
227 | 	def __init__(self, percent_train:float=0.5, k:int=1, n_dims:int=None):
228 | 		self.percent_train = percent_train
229 | 		self.k = k
230 | 		self.n_dims = n_dims
231 | 
232 | 	def forward(self) -> float:
233 | 		self.features = feature_reduce(self.features, self.n_dims)
234 | 
235 | 		train_x, test_x = split_data(self.features, self.percent_train)
236 | 		train_y, test_y = split_data(self.y       , self.percent_train)
237 | 
238 | 		nn = sklearn.neighbors.KNeighborsClassifier(n_neighbors=self.k).fit(train_x, train_y)
239 | 		return 100*(nn.predict(test_x) == test_y).mean()
240 | 
241 | 
242 | class SplitLogistic(TransferabilityMethod):
243 | 	""" Logistic classifier using a train-val split using sklearn. """
244 | 
245 | 	def __init__(self, percent_train:float=0.5, n_dims:int=None):
246 | 		self.percent_train = percent_train
247 | 		self.n_dims = n_dims
248 | 		
249 | 	def forward(self) -> float:
250 | 		self.features = feature_reduce(self.features, self.n_dims)
251 | 		
252 | 		train_x, test_x = split_data(self.features, self.percent_train)
253 | 		train_y, test_y = split_data(self.y       , self.percent_train)
254 | 
255 | 		logistic = sklearn.linear_model.LogisticRegression(random_state=0, multi_class='multinomial', solver='lbfgs', max_iter=20, tol=1e-1).fit(train_x, train_y)
256 | 		return 100*(logistic.predict(test_x) == test_y).mean()
257 | 
258 | 
259 | class RSA(TransferabilityMethod):
260 | 	"""
261 | 	Computes the RSA similarity metric proposed in https://arxiv.org/abs/1904.11740. 
262 | 	
263 | 	Note that this requires the probes to be fully extracted before running.
264 | 
265 | 	This method supports VOC2007.
266 | 	"""
267 | 	def __init__(self, reference_architecture:str=None, n_dims:int=None):
268 | 		self.reference_architecture = reference_architecture
269 | 		self.n_dims = n_dims
270 | 
271 | 	def forward(self):
272 | 		self.features = feature_reduce(self.features, self.n_dims)
273 | 		
274 | 		reference_architecture = self.reference_architecture if self.reference_architecture is not None else self.architecture
275 | 
276 | 		with open(self.cache_path_fn(reference_architecture, self.target_dataset, self.target_dataset), 'rb') as f:
277 | 			reference_params = pickle.load(f)
278 | 		
279 | 		reference_features = reference_params['features']
280 | 		reference_features = feature_reduce(reference_features, self.n_dims)
281 | 		
282 | 		return self.get_rsa_correlation(self.features, reference_features)
283 | 	
284 | 	def get_rsa_correlation(self, feats1:np.ndarray, feats2:np.ndarray) -> float:
285 | 		scaler = sklearn.preprocessing.StandardScaler()
286 | 		
287 | 		feats1 = scaler.fit_transform(feats1)
288 | 		feats2 = scaler.fit_transform(feats2)
289 | 
290 | 		rdm1 = 1 - np.corrcoef(feats1)
291 | 		rdm2 = 1 - np.corrcoef(feats2)
292 | 
293 | 		lt_rdm1 = self.get_lowertri(rdm1)
294 | 		lt_rdm2 = self.get_lowertri(rdm2)
295 | 
296 | 		return scipy.stats.spearmanr(lt_rdm1, lt_rdm2)[0] * 100
297 | 	
298 | 	def get_lowertri(self, rdm):
299 | 		num_conditions = rdm.shape[0]
300 | 		return rdm[np.triu_indices(num_conditions,1)]
301 | 
302 | 
303 | 
304 | 
305 | class PARC(TransferabilityMethod):
306 | 	"""
307 | 	Computes PARC, a variation of RSA that uses target labels instead of target features to cut down on training time.
308 | 	This was presented in this paper.
309 | 	
310 | 	This method supports VOC2007.
311 | 	"""
312 | 
313 | 	def __init__(self, n_dims:int=None, fmt:str=''):
314 | 		self.n_dims = n_dims
315 | 		self.fmt = fmt
316 | 
317 | 	def forward(self):
318 | 		self.features = feature_reduce(self.features, self.n_dims)
319 | 		
320 | 		num_classes = constants.num_classes[self.target_dataset]
321 | 		labels = np.eye(num_classes)[self.y] if self.y.ndim == 1 else self.y
322 | 
323 | 		return self.get_parc_correlation(self.features, labels)
324 | 
325 | 	def get_parc_correlation(self, feats1, labels2):
326 | 		scaler = sklearn.preprocessing.StandardScaler()
327 | 
328 | 		feats1  = scaler.fit_transform(feats1)
329 | 
330 | 		rdm1 = 1 - np.corrcoef(feats1)
331 | 		rdm2 = 1 - np.corrcoef(labels2)
332 | 		
333 | 		lt_rdm1 = self.get_lowertri(rdm1)
334 | 		lt_rdm2 = self.get_lowertri(rdm2)
335 | 		
336 | 		return scipy.stats.spearmanr(lt_rdm1, lt_rdm2)[0] * 100
337 | 
338 | 	def get_lowertri(self, rdm):
339 | 		num_conditions = rdm.shape[0]
340 | 		return rdm[np.triu_indices(num_conditions,1)]
341 | 
342 | 
343 | 
344 | 
345 | class DDS(TransferabilityMethod):
346 | 	"""
347 | 	DDS from https://github.com/cvai-repo/duality-diagram-similarity/
348 | 	
349 | 	This method supports VOC2007.
350 | 	"""
351 | 
352 | 	def __init__(self, reference_architecture:str=None, n_dims:int=None):
353 | 		self.reference_architecture = reference_architecture
354 | 		self.n_dims = n_dims
355 | 
356 | 	def forward(self):
357 | 		self.features = feature_reduce(self.features, self.n_dims)
358 | 		
359 | 		reference_architecture = self.reference_architecture if self.reference_architecture is not None else self.architecture
360 | 
361 | 		with open(self.cache_path_fn(reference_architecture, self.target_dataset, self.target_dataset), 'rb') as f:
362 | 			reference_params = pickle.load(f)
363 | 		
364 | 		reference_features = reference_params['features']
365 | 		reference_features = feature_reduce(reference_features, self.n_dims)
366 | 		
367 | 		return self.get_similarity_from_rdms(self.features, reference_features)
368 | 
369 | 	
370 | 	def rdm(self, activations_value,dist):
371 | 		"""
372 | 		Parameters
373 | 		----------
374 | 		activations_value : numpy matrix with dimensions n x p 
375 | 			task 1 features (n = number of images, p = feature dimensions) 
376 | 		dist : string
377 | 			distance function to compute dissimilarity matrix
378 | 		Returns
379 | 		-------
380 | 		RDM : numpy matrix with dimensions n x n 
381 | 			dissimilarity matrices
382 | 		"""
383 | 		if dist == 'pearson':
384 | 			RDM = 1 - np.corrcoef(activations_value)
385 | 		elif dist == 'cosine':
386 | 			RDM = 1 - sklearn.metrics.pairwise.cosine_similarity(activations_value)
387 | 		return RDM
388 | 
389 | 
390 | 	def get_similarity_from_rdms(self, x,y,debiased=True,centered=True):
391 | 		"""
392 | 		Parameters
393 | 		----------
394 | 		x : numpy matrix with dimensions n x p 
395 | 			task 1 features (n = number of images, p = feature dimensions) 
396 | 		y : numpy matrix with dimensions n x p
397 | 			task 1 features (n = number of images, p = feature dimensions) 
398 | 		dist : string
399 | 			distance function to compute dissimilarity matrices
400 | 		feature_norm : string
401 | 			feature normalization type
402 | 		debiased : bool, optional
403 | 			set True to perform unbiased centering 
404 | 		centered : bool, optional
405 | 			set True to perform unbiased centering 
406 | 		Returns
407 | 		-------
408 | 		DDS: float
409 | 			DDS between task1 and task2 
410 | 		"""
411 | 		x = sklearn.preprocessing.StandardScaler().fit_transform(x)
412 | 		y = sklearn.preprocessing.StandardScaler().fit_transform(y)
413 | 		
414 | 		return self.cka(self.rdm(x, 'cosine'), self.rdm(y, 'cosine'), debiased=debiased,centered=centered) * 100
415 | 
416 | 	def center_gram(self, gram, unbiased=False):
417 | 		"""
418 | 		Center a symmetric Gram matrix.
419 | 		
420 | 		This is equvialent to centering the (possibly infinite-dimensional) features
421 | 		induced by the kernel before computing the Gram matrix.
422 | 		
423 | 		Args:
424 | 			gram: A num_examples x num_examples symmetric matrix.
425 | 			unbiased: Whether to adjust the Gram matrix in order to compute an unbiased
426 | 			estimate of HSIC. Note that this estimator may be negative.
427 | 		Returns:
428 | 			A symmetric matrix with centered columns and rows.
429 | 		
430 | 		P.S. Function from Kornblith et al., ICML 2019
431 | 		"""
432 | 		if not np.allclose(gram, gram.T):
433 | 			raise ValueError('Input must be a symmetric matrix.')
434 | 		gram = gram.copy()
435 | 
436 | 		if unbiased:
437 | 			# This formulation of the U-statistic, from Szekely, G. J., & Rizzo, M.
438 | 			# L. (2014). Partial distance correlation with methods for dissimilarities.
439 | 			# The Annals of Statistics, 42(6), 2382-2412, seems to be more numerically
440 | 			# stable than the alternative from Song et al. (2007).
441 | 			n = gram.shape[0]
442 | 			np.fill_diagonal(gram, 0)
443 | 			means = np.sum(gram, 0, dtype=np.float64) / (n - 2)
444 | 			means -= np.sum(means) / (2 * (n - 1))
445 | 			gram -= means[:, None]
446 | 			gram -= means[None, :]
447 | 			np.fill_diagonal(gram, 0)
448 | 		else:
449 | 			means = np.mean(gram, 0, dtype=np.float64)
450 | 			means -= np.mean(means) / 2
451 | 			gram -= means[:, None]
452 | 			gram -= means[None, :]
453 | 
454 | 		return gram
455 | 
456 | 
457 | 	def cka(self, gram_x, gram_y, debiased=False,centered=True):
458 | 		"""
459 | 		Compute CKA.
460 | 		Args:
461 | 			gram_x: A num_examples x num_examples Gram matrix.
462 | 			gram_y: A num_examples x num_examples Gram matrix.
463 | 			debiased: Use unbiased estimator of HSIC. CKA may still be biased.
464 | 		Returns:
465 | 			The value of CKA between X and Y.
466 | 			P.S. Function from Kornblith et al., ICML 2019
467 | 		"""
468 | 		if centered:
469 | 			gram_x = self.center_gram(gram_x, unbiased=debiased)
470 | 			gram_y = self.center_gram(gram_y, unbiased=debiased)
471 | 
472 | 		# Note: To obtain HSIC, this should be divided by (n-1)**2 (biased variant) or
473 | 		# n*(n-3) (unbiased variant), but this cancels for CKA.
474 | 		scaled_hsic = gram_x.ravel().dot(gram_y.ravel())
475 | 
476 | 		normalization_x = np.linalg.norm(gram_x)
477 | 		normalization_y = np.linalg.norm(gram_y)
478 | 		
479 | 		return scaled_hsic / (normalization_x * normalization_y)
480 | 
481 | 
482 | 
483 | class LearnedHeuristic():
484 | 
485 | 	def __init__(self, cache_file:str='./cache/learned_heuristic.pkl'):
486 | 		self.cache_file = cache_file
487 | 
488 | 	def predict(self, x:list) -> float:
489 | 		return sum([a * x_i for a, x_i in zip(self.coeffs, x)])
490 | 
491 | 	def make_feature(self, arch:str, source:str, target:str) -> list:
492 | 		feats = [
493 | 			constants.num_classes[source],
494 | 			constants.num_classes[target],
495 | 			constants.dataset_images[source],
496 | 			constants.dataset_images[target],
497 | 			constants.model_layers[arch]
498 | 		]
499 | 
500 | 		return feats + [np.log(x) for x in feats]
501 | 
502 | 	def fit(self, oracle_path:str, percent_train:float=0.5):
503 | 		oracle = pd.read_csv(oracle_path)
504 | 
505 | 		x = []
506 | 		y = []
507 | 
508 | 		for idx, row in oracle.iterrows():
509 | 			arch   = row['Architecture']
510 | 			source = row['Source Dataset']
511 | 			target = row['Target Dataset']
512 | 
513 | 			x.append(self.make_feature(arch, source, target))
514 | 			y.append(row['Oracle'])
515 | 		
516 | 		x = np.array(x)
517 | 		y = np.array(y)
518 | 
519 | 		regr = sklearn.linear_model.LinearRegression()
520 | 		regr.fit(x, y)
521 | 		
522 | 		self.coeffs = list(regr.coef_)
523 | 
524 | 		with open(self.cache_file, 'wb') as f:
525 | 			pickle.dump(self.coeffs, f)
526 | 	
527 | 	def load(self):
528 | 		with open(self.cache_file, 'rb') as f:
529 | 			self.coeffs = pickle.load(f)
530 | 
531 | 
532 | 


--------------------------------------------------------------------------------
/metrics.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | import scipy.stats as stats
  5 | import itertools
  6 | import pickle
  7 | from collections import defaultdict
  8 | 
  9 | from methods import LearnedHeuristic
 10 | import constants
 11 | 
 12 | def add_oracle(results:pd.DataFrame, oracle_path:str):
 13 | 	oracle_scores = []
 14 | 	oracle_csv = pd.read_csv(oracle_path)
 15 | 
 16 | 	for idx, row in results.iterrows():
 17 | 		x = oracle_csv
 18 | 		x = x[x['Architecture'] == row['Architecture']]
 19 | 		x = x[x['Source Dataset'] == row['Source Dataset']]
 20 | 		x = x[x['Target Dataset'] == row['Target Dataset']]
 21 | 		x = x['Oracle']
 22 | 
 23 | 		if row['Source Dataset'] in ('swav_imagenet', 'deepclusterv2_imagenet', 'npid_imagenet', 'moco_imagenet', 'simclr_imagenet'):
 24 | 			x = -1
 25 | 		oracle_scores.append(float(x))
 26 | 
 27 | 	results.insert(len(results.columns), 'Oracle', oracle_scores)
 28 | 	return results
 29 | 
 30 | def add_heuristic(results:pd.DataFrame):	
 31 | 	intuition_scores = []
 32 | 	lh_scores = []
 33 | 
 34 | 	lh = LearnedHeuristic()
 35 | 	lh.load()
 36 | 
 37 | 	for idx, row in results.iterrows():
 38 | 		model = row['Architecture']
 39 | 		dataset = row['Source Dataset']
 40 | 		target = row['Target Dataset']
 41 | 
 42 | 		intuition_score = constants.model_layers[model] + np.log(constants.dataset_images[dataset]) - np.log(constants.dataset_images[target])
 43 | 		intuition_scores.append(intuition_score)
 44 | 
 45 | 		lh_score = lh.predict(lh.make_feature(model, dataset, target))
 46 | 		lh_scores.append(lh_score)
 47 | 
 48 | 	results.insert(len(results.columns), 'Heuristic', intuition_scores)
 49 | 	results.insert(len(results.columns), 'Learned Heuristic', lh_scores)
 50 | 	return results
 51 | 
 52 | def add_plasticity(results:pd.DataFrame, methods:list, weight:float=1) -> pd.DataFrame:
 53 | 	"""
 54 | 		Accounts for model plasticity by ensembling each method with
 55 | 		a heuristic based on the # of layers in the source model.
 56 | 
 57 | 		Scores for each method are normalized first so that the weight
 58 | 		on the heuristic is consistent.
 59 | 	"""
 60 | 	stats = {}
 61 | 
 62 | 	for method in methods:
 63 | 		col = results[method]
 64 | 		stats[method] = (col.mean(), col.min(), col.max(), col.std())
 65 | 	
 66 | 	for idx, row in results.iterrows():
 67 | 		layers = constants.model_layers[row['Architecture']]
 68 | 
 69 | 		for method in methods:
 70 | 			val = results.at[idx, method]
 71 | 			mean, cmin, cmax, std = stats[method]
 72 | 
 73 | 			val = (val - mean) / std
 74 | 			# val = (val - cmin) / (cmax - cmin)
 75 | 
 76 | 			results.at[idx, method] = val + weight * (layers / 50)
 77 | 	
 78 | 	return results
 79 | 
 80 | 
 81 | def pearson(method:np.ndarray, gt:np.ndarray):
 82 | 	idx = (gt > 0)
 83 | 	method = method[idx]
 84 | 	gt = gt[idx]
 85 | 
 86 | 	# import matplotlib.pyplot as plt
 87 | 	# plt.figure()
 88 | 	# plt.plot(method, gt, 'o')
 89 | 	# plt.show()
 90 | 
 91 | 	return stats.pearsonr(method, gt)[0] * 100
 92 | 
 93 | 
 94 | 
 95 | class MetricEval:
 96 | 	"""
 97 | 	Evaluates the csv created by evaluate.py against the oracle transfer performance.
 98 | 
 99 | 	Params:
100 | 		- path: The path to the csv you want to evaluate.
101 | 		- oracle_path: The path to the oracle file. This depends on which benchmark you are using.
102 | 	"""
103 | 
104 | 	def __init__(self, path:str, oracle_path:str='./oracles/controlled.csv'):
105 | 		self.results = pd.read_csv(path)
106 | 		self.methods = list(self.results.columns)[4:]
107 | 
108 | 		# self.results = add_heuristic(self.results)
109 | 		self.results = add_oracle(self.results, oracle_path)
110 | 
111 | 		self.results = self.results.fillna(0)
112 | 	
113 | 	def all_methods(self, ignore_methods:set=None):
114 | 		methods = self.methods # + ['Heuristic', 'Learned Heuristic']
115 | 		if ignore_methods is not None:
116 | 			for method in ignore_methods:
117 | 				methods.remove(methods.index(method))
118 | 		return methods
119 | 
120 | 	def add_plasticity(self, methods:list=None, weight:int=1):
121 | 		if methods is None:
122 | 			methods = self.all_methods()
123 | 
124 | 		add_plasticity(self.results, methods, weight)
125 | 
126 | 	def aggregate(self, constants:list=['Target Dataset'], methods:list=None, variance_over:str='Run', metric=pearson, aggregate:bool=True):
127 | 		"""
128 | 		Aggregates the results for the given methods (default all) and parameters.
129 | 
130 | 		Params:
131 | 			- constants: A list of variables names to keep constant when evaluating. Constants will be averaged over, while correlation is computed over everything not constant.
132 | 			- methods:   The subset of methods to evaluate. This defaults to all methods.
133 | 			- variance_over: The csv column to compute variance over. Probably don't want to change this.
134 | 			- metric: The metric function to use. See the definition of pearson for more details.
135 | 			- aggregate: Whether or not to average over all costants. If False, this will return a separate result for every constant.
136 | 
137 | 		Returns mean, variance, all_runs.
138 | 		"""
139 | 		all_runs = []
140 | 
141 | 		constant_values = {
142 | 			k: list(self.results[k].unique())
143 | 			for k in constants
144 | 		}
145 | 
146 | 		if methods is None:
147 | 			methods = self.all_methods()
148 | 
149 | 		for run in self.results[variance_over].unique():
150 | 			# Select the current run
151 | 			run_results = self.results[self.results[variance_over] == run]
152 | 			run_metrics = {}
153 | 
154 | 			# Set up the iterables for filtering
155 | 			if len(constants) == 0:
156 | 				iter_obj = [None]
157 | 			else:
158 | 				iter_obj = itertools.product(*[constant_values[k] for k in constants])
159 | 
160 | 			# Now iterate through all the possible combination of constant values
161 | 			for cur_values in iter_obj:
162 | 				cur_results = run_results
163 | 				cur_metrics = {}
164 | 
165 | 				# Do the filtering
166 | 				if cur_values != None:
167 | 					for constant, value in zip(constants, cur_values):
168 | 						cur_results = cur_results[cur_results[constant] == value]
169 | 					
170 | 					cur_values = tuple(cur_values)
171 | 				
172 | 				if len(cur_results) < 2:
173 | 					continue
174 | 
175 | 				# Do evaluation
176 | 				gt_results = cur_results['Oracle'].to_numpy()
177 | 
178 | 				# if cur_values[0] != 'voc2007':
179 | 				# 	continue
180 | 				# print(cur_results[cur_results['Oracle'] > 96])
181 | 
182 | 				for method in methods:
183 | 					# print(cur_values, method)
184 | 					method_results = cur_results[method].to_numpy()
185 | 					method_score   = metric(method_results, gt_results)
186 | 					cur_metrics[method] = method_score
187 | 				
188 | 				run_metrics[cur_values] = cur_metrics
189 | 			
190 | 
191 | 			# Aggregate the results
192 | 			if aggregate:
193 | 				aggregated_run_metrics = {k: 0 for k in methods}
194 | 				total = 0
195 | 
196 | 				# Sum all of the metric results for this run over all possibilities for constant values
197 | 				for _, cur_metrics in run_metrics.items():
198 | 					total += 1
199 | 					for k, v in cur_metrics.items():
200 | 						aggregated_run_metrics[k] += v
201 | 				
202 | 				# Turn the sums into means
203 | 				for method in methods:
204 | 					aggregated_run_metrics[method] /= total
205 | 
206 | 				all_runs.append(aggregated_run_metrics)
207 | 			else:
208 | 				all_runs.append(run_metrics)
209 | 
210 | 		
211 | 
212 | 		def compute_variance(run_data:list, mean:float):
213 | 			return sum([(x - mean) ** 2 for x in run_data]) / len(run_data)
214 | 
215 | 
216 | 		# Compute variance
217 | 		if aggregate:
218 | 			run_data = {method: [run[method] for run in all_runs] for method in methods}
219 | 
220 | 			mean     = {method: sum(run_data[method]) / len(all_runs) for method in methods}
221 | 			variance = {method: compute_variance(run_data[method], mean[method]) for method in methods}
222 | 
223 | 			return mean, variance, all_runs
224 | 		else:
225 | 			run_data = {
226 | 				possibility: {
227 | 					method: [run[possibility][method] for run in all_runs]
228 | 					for method in methods
229 | 				} for possibility in all_runs[0]
230 | 			}
231 | 
232 | 			mean = {
233 | 				possibility: {
234 | 					method: sum(run_data[possibility][method]) / len(all_runs)
235 | 					for method in methods
236 | 				} for possibility in all_runs[0]
237 | 			}
238 | 
239 | 			variance = {
240 | 				possibility: {
241 | 					method: compute_variance(run_data[possibility][method], mean[possibility][method])
242 | 					for method in methods
243 | 				} for possibility in all_runs[0]
244 | 			}
245 | 
246 | 			return mean, variance, all_runs
247 | 
248 | 
249 | 
250 | 
251 | if __name__ == '__main__':
252 | 	# path = './results/data_sweep/fixed_budget_500.csv'
253 | 	path = './results/fixed_budget_500.csv'
254 | 	eval_obj = MetricEval(path)
255 | 	eval_obj.add_plasticity()
256 | 	constants = ['Target Dataset'] # ['Source Dataset', 'Architecture'] # 
257 | 
258 | 	with open(path.replace('.csv', '_timing.pkl'), 'rb') as f:
259 | 		timing = pickle.load(f)
260 | 
261 | 	# mean, var, _ = eval_obj.aggregate(constants=constants, aggregate=False)
262 | 	# for possibility in mean:
263 | 	# 	print(f' --- {possibility} --- ')
264 | 	# 	for method in mean[possibility]:
265 | 	# 		print(f'{method:20s}: {mean[possibility][method]:6.2f}% +/- {np.sqrt(var[possibility][method]):4.2f}')
266 | 	# 	print()
267 | 	
268 | 	mean, var, _ = eval_obj.aggregate(constants=constants, aggregate=True)
269 | 	print(' --- TOTAL --- ')
270 | 	for method in mean:
271 | 		avg_time = sum(timing[method]) / len(timing[method]) if method in timing else 0
272 | 		print(f'{method:20s}: {mean[method]:6.2f}% +/- {np.sqrt(var[method]):4.2f} ({avg_time*1000:.1f} ms +/- {np.std(timing[method])*1000:.1f})')
273 | 
274 | 


--------------------------------------------------------------------------------
/oracles/all.csv:
--------------------------------------------------------------------------------
  1 | Architecture,Source Dataset,Target Dataset,Oracle
  2 | resnet50,panoptic-rcnn_fpn_139514544,cifar10,89.1
  3 | resnet101,mask-rcnn_fpn_138205316,cifar10,92.85
  4 | resnet101,faster-rcnn_fpn_137851257,cifar10,92.55
  5 | resnet101,panoptic-rcnn_fpn_139797668,cifar10,86.7
  6 | resnet50,mask-rcnn_c4_137259246,cifar10,90.45
  7 | resnet50,pirl_imagenet,cifar10,94.71000000000001
  8 | resnet50,jigsaw_imagenet22k,cifar10,86.18
  9 | resnet50,keypoint-rcnn_fpn_137261548,cifar10,80.53
 10 | resnet101,simclr_imagenet,cifar10,-1.0
 11 | resnet50,moco_imagenet,cifar10,66.11
 12 | resnet50,simclr_imagenet,cifar10,58.24
 13 | resnet50,semisup_yfcc100m,cifar10,95.88
 14 | resnet101,keypoint-rcnn_fpn_138363331,cifar10,87.58
 15 | resnet50,faster-rcnn_c4_voc_142202221,cifar10,90.59
 16 | resnet50,retinanet_190397773,cifar10,91.25
 17 | resnet50,semisup_instagram,cifar10,96.17999999999999
 18 | resnet50,mask-rcnn_fpn_cityscapes_142423278,cifar10,88.24
 19 | resnet50,mask-rcnn_fpn_lvis_144219072,cifar10,89.25
 20 | resnet50,mask-rcnn_fpn_137260431,cifar10,90.83
 21 | resnet50,supervised_places205,cifar10,89.34
 22 | resnet50,retinanet_190397829,cifar10,90.94
 23 | resnet101,mask-rcnn_c4_138363239,cifar10,92.32000000000001
 24 | resnet50,deepclusterv2_imagenet,cifar10,64.19
 25 | resnet50,keypoint-rcnn_fpn_137849621,cifar10,83.87
 26 | resnet50,faster-rcnn_c4_137849393,cifar10,90.53
 27 | resnet50,rotnet_imagenet22k,cifar10,92.03
 28 | resnet50,swav_imagenet,cifar10,60.129999999999995
 29 | resnet50,faster-rcnn_fpn_137257794,cifar10,90.61
 30 | resnet50,faster-rcnn_fpn_137849458,cifar10,90.42
 31 | resnet50,faster-rcnn_c4_137257644,cifar10,90.38000000000001
 32 | resnet50,npid_imagenet,cifar10,57.13
 33 | resnet101,retinanet_190397697,cifar10,92.53
 34 | resnet101,panoptic-rcnn_fpn_139514519,cifar10,92.04
 35 | resnet50,mask-rcnn_c4_137849525,cifar10,90.49000000000001
 36 | resnet101,faster-rcnn_c4_138204752,cifar10,92.35
 37 | resnet50,mask-rcnn_fpn_137849600,cifar10,90.38000000000001
 38 | resnet101,mask-rcnn_fpn_lvis_144219035,cifar10,91.85
 39 | resnet50,clusterfit_imagenet,cifar10,90.24
 40 | resnet50,panoptic-rcnn_fpn_139514569,cifar10,87.92
 41 | resnet50,mask-rcnn_c4_137259246,oxford_pets,60.86127010084492
 42 | resnet50,pirl_imagenet,oxford_pets,86.09975470155355
 43 | resnet50,jigsaw_imagenet22k,oxford_pets,16.843826655764513
 44 | resnet50,keypoint-rcnn_fpn_137261548,oxford_pets,32.57018261106568
 45 | resnet101,simclr_imagenet,oxford_pets,-1.0
 46 | resnet50,moco_imagenet,oxford_pets,8.77623330607795
 47 | resnet50,simclr_imagenet,oxford_pets,18.097574270918507
 48 | resnet50,semisup_yfcc100m,oxford_pets,92.259471245571
 49 | resnet101,keypoint-rcnn_fpn_138363331,oxford_pets,69.28318342872717
 50 | resnet50,faster-rcnn_c4_voc_142202221,oxford_pets,74.48896156991006
 51 | resnet50,retinanet_190397773,oxford_pets,74.78877078222949
 52 | resnet50,semisup_instagram,oxford_pets,93.56772962660126
 53 | resnet50,mask-rcnn_fpn_cityscapes_142423278,oxford_pets,49.46852003270646
 54 | resnet50,mask-rcnn_fpn_lvis_144219072,oxford_pets,61.215590079040616
 55 | resnet50,mask-rcnn_fpn_137260431,oxford_pets,64.26819296811121
 56 | resnet101,faster-rcnn_fpn_137851257,oxford_pets,73.48051240119923
 57 | resnet50,supervised_places205,oxford_pets,25.45652766421368
 58 | resnet50,retinanet_190397829,oxford_pets,64.45898064867811
 59 | resnet101,mask-rcnn_c4_138363239,oxford_pets,65.57645134914145
 60 | resnet50,deepclusterv2_imagenet,oxford_pets,49.65930771327337
 61 | resnet50,keypoint-rcnn_fpn_137849621,oxford_pets,44.99863723085309
 62 | resnet50,faster-rcnn_c4_137849393,oxford_pets,57.645134914145544
 63 | resnet50,rotnet_imagenet22k,oxford_pets,14.363586808394658
 64 | resnet50,swav_imagenet,oxford_pets,37.31261924230036
 65 | resnet50,faster-rcnn_fpn_137257794,oxford_pets,65.68547288089398
 66 | resnet50,faster-rcnn_fpn_137849458,oxford_pets,59.28045789043336
 67 | resnet50,faster-rcnn_c4_137257644,oxford_pets,62.878168438266556
 68 | resnet50,npid_imagenet,oxford_pets,8.69446715726356
 69 | resnet101,retinanet_190397697,oxford_pets,76.9692014172799
 70 | resnet101,panoptic-rcnn_fpn_139514519,oxford_pets,62.71463614063778
 71 | resnet50,mask-rcnn_c4_137849525,oxford_pets,54.67429817388935
 72 | resnet101,mask-rcnn_fpn_138205316,oxford_pets,69.14690651403652
 73 | resnet101,faster-rcnn_c4_138204752,oxford_pets,70.94576178795312
 74 | resnet50,mask-rcnn_fpn_137849600,oxford_pets,56.33687653311529
 75 | resnet101,mask-rcnn_fpn_lvis_144219035,oxford_pets,75.55192150449714
 76 | resnet50,clusterfit_imagenet,oxford_pets,57.99945489234124
 77 | resnet50,panoptic-rcnn_fpn_139514569,oxford_pets,48.896156991005725
 78 | resnet101,panoptic-rcnn_fpn_139797668,oxford_pets,21.72254020168983
 79 | resnet50,panoptic-rcnn_fpn_139514544,oxford_pets,56.20059961842464
 80 | resnet50,mask-rcnn_c4_137259246,cub200,69.72730410769762
 81 | resnet50,pirl_imagenet,cub200,72.143596824301
 82 | resnet50,jigsaw_imagenet22k,cub200,58.215395236451506
 83 | resnet50,keypoint-rcnn_fpn_137261548,cub200,60.73524335519503
 84 | resnet101,simclr_imagenet,cub200,-1.0
 85 | resnet50,moco_imagenet,cub200,8.24991370383155
 86 | resnet50,simclr_imagenet,cub200,10.545391784604764
 87 | resnet50,semisup_yfcc100m,cub200,83.39661719019675
 88 | resnet101,keypoint-rcnn_fpn_138363331,cub200,62.25405591991715
 89 | resnet50,faster-rcnn_c4_voc_142202221,cub200,72.73041076976182
 90 | resnet50,retinanet_190397773,cub200,72.90300310666207
 91 | resnet50,semisup_instagram,cub200,83.10321021746634
 92 | resnet50,mask-rcnn_fpn_cityscapes_142423278,cub200,65.10182947877115
 93 | resnet50,mask-rcnn_fpn_lvis_144219072,cub200,68.95063859164652
 94 | resnet50,mask-rcnn_fpn_137260431,cub200,71.52226441146013
 95 | resnet101,faster-rcnn_fpn_137851257,cub200,68.01863997238523
 96 | resnet50,supervised_places205,cub200,61.23576113220574
 97 | resnet50,retinanet_190397829,cub200,70.38315498791854
 98 | resnet101,mask-rcnn_c4_138363239,cub200,66.17190196755264
 99 | resnet50,deepclusterv2_imagenet,cub200,33.18950638591647
100 | resnet50,keypoint-rcnn_fpn_137849621,cub200,55.79910251984812
101 | resnet50,faster-rcnn_c4_137849393,cub200,67.863306869175
102 | resnet50,rotnet_imagenet22k,cub200,63.80738695201933
103 | resnet50,swav_imagenet,cub200,22.2126337590611
104 | resnet50,faster-rcnn_fpn_137257794,cub200,71.10804280289955
105 | resnet50,faster-rcnn_fpn_137849458,cub200,68.31204694511564
106 | resnet50,faster-rcnn_c4_137257644,cub200,69.05419399378667
107 | resnet50,npid_imagenet,cub200,10.338280980324473
108 | resnet101,retinanet_190397697,cub200,70.6420434932689
109 | resnet101,panoptic-rcnn_fpn_139514519,cub200,66.53434587504314
110 | resnet50,mask-rcnn_c4_137849525,cub200,65.41249568519157
111 | resnet101,mask-rcnn_fpn_138205316,cub200,67.82878840179495
112 | resnet101,faster-rcnn_c4_138204752,cub200,66.96582671729375
113 | resnet50,mask-rcnn_fpn_137849600,cub200,67.3627890921643
114 | resnet101,mask-rcnn_fpn_lvis_144219035,cub200,70.24508111839835
115 | resnet50,clusterfit_imagenet,cub200,58.19813600276148
116 | resnet50,panoptic-rcnn_fpn_139514569,cub200,66.05108733172247
117 | resnet101,panoptic-rcnn_fpn_139797668,cub200,49.34414911977908
118 | resnet50,panoptic-rcnn_fpn_139514544,cub200,68.10493614083535
119 | resnet50,mask-rcnn_c4_137259246,caltech101,91.63498098859316
120 | resnet50,pirl_imagenet,caltech101,93.68821292775665
121 | resnet50,jigsaw_imagenet22k,caltech101,89.27756653992395
122 | resnet50,keypoint-rcnn_fpn_137261548,caltech101,87.71863117870723
123 | resnet101,simclr_imagenet,caltech101,-1.0
124 | resnet50,moco_imagenet,caltech101,45.70342205323194
125 | resnet50,simclr_imagenet,caltech101,53.269961977186306
126 | resnet50,semisup_yfcc100m,caltech101,96.04562737642586
127 | resnet101,keypoint-rcnn_fpn_138363331,caltech101,89.9619771863118
128 | resnet50,faster-rcnn_c4_voc_142202221,caltech101,92.16730038022814
129 | resnet50,retinanet_190397773,caltech101,90.91254752851711
130 | resnet50,semisup_instagram,caltech101,96.57794676806084
131 | resnet50,mask-rcnn_fpn_cityscapes_142423278,caltech101,88.44106463878327
132 | resnet50,mask-rcnn_fpn_lvis_144219072,caltech101,90.38022813688214
133 | resnet50,mask-rcnn_fpn_137260431,caltech101,91.1787072243346
134 | resnet101,faster-rcnn_fpn_137851257,caltech101,92.16730038022814
135 | resnet50,supervised_places205,caltech101,88.06083650190114
136 | resnet50,retinanet_190397829,caltech101,91.55893536121673
137 | resnet101,mask-rcnn_c4_138363239,caltech101,92.16730038022814
138 | resnet50,deepclusterv2_imagenet,caltech101,71.40684410646388
139 | resnet50,keypoint-rcnn_fpn_137849621,caltech101,86.61596958174906
140 | resnet50,faster-rcnn_c4_137849393,caltech101,91.63498098859316
141 | resnet50,rotnet_imagenet22k,caltech101,91.10266159695819
142 | resnet50,swav_imagenet,caltech101,63.65019011406844
143 | resnet50,faster-rcnn_fpn_137257794,caltech101,90.91254752851711
144 | resnet50,faster-rcnn_fpn_137849458,caltech101,90.30418250950571
145 | resnet50,faster-rcnn_c4_137257644,caltech101,91.40684410646388
146 | resnet50,npid_imagenet,caltech101,39.353612167300376
147 | resnet101,retinanet_190397697,caltech101,91.93916349809886
148 | resnet101,panoptic-rcnn_fpn_139514519,caltech101,91.33079847908745
149 | resnet50,mask-rcnn_c4_137849525,caltech101,90.53231939163499
150 | resnet101,mask-rcnn_fpn_138205316,caltech101,91.55893536121673
151 | resnet101,faster-rcnn_c4_138204752,caltech101,92.9277566539924
152 | resnet50,mask-rcnn_fpn_137849600,caltech101,90.26615969581749
153 | resnet101,mask-rcnn_fpn_lvis_144219035,caltech101,93.07984790874525
154 | resnet50,clusterfit_imagenet,caltech101,90.41825095057034
155 | resnet50,panoptic-rcnn_fpn_139514569,caltech101,89.04942965779468
156 | resnet101,panoptic-rcnn_fpn_139797668,caltech101,83.30798479087453
157 | resnet50,panoptic-rcnn_fpn_139514544,caltech101,90.79847908745246
158 | resnet50,mask-rcnn_c4_137259246,stanford_dogs,43.07692307692308
159 | resnet50,pirl_imagenet,stanford_dogs,68.71794871794872
160 | resnet50,jigsaw_imagenet22k,stanford_dogs,17.494172494172496
161 | resnet50,keypoint-rcnn_fpn_137261548,stanford_dogs,20.92074592074592
162 | resnet101,simclr_imagenet,stanford_dogs,-1.0
163 | resnet50,moco_imagenet,stanford_dogs,5.419580419580419
164 | resnet50,simclr_imagenet,stanford_dogs,7.913752913752914
165 | resnet50,semisup_yfcc100m,stanford_dogs,84.05594405594405
166 | resnet101,keypoint-rcnn_fpn_138363331,stanford_dogs,42.13286713286713
167 | resnet50,faster-rcnn_c4_voc_142202221,stanford_dogs,58.5081585081585
168 | resnet50,retinanet_190397773,stanford_dogs,56.98135198135198
169 | resnet50,semisup_instagram,stanford_dogs,85.82750582750582
170 | resnet50,mask-rcnn_fpn_cityscapes_142423278,stanford_dogs,33.36829836829837
171 | resnet50,mask-rcnn_fpn_lvis_144219072,stanford_dogs,45.3030303030303
172 | resnet50,mask-rcnn_fpn_137260431,stanford_dogs,45.629370629370634
173 | resnet101,faster-rcnn_fpn_137851257,stanford_dogs,58.63636363636363
174 | resnet50,supervised_places205,stanford_dogs,15.932400932400933
175 | resnet50,retinanet_190397829,stanford_dogs,44.592074592074596
176 | resnet101,mask-rcnn_c4_138363239,stanford_dogs,52.9020979020979
177 | resnet50,deepclusterv2_imagenet,stanford_dogs,31.258741258741257
178 | resnet50,keypoint-rcnn_fpn_137849621,stanford_dogs,22.26107226107226
179 | resnet50,faster-rcnn_c4_137849393,stanford_dogs,37.23776223776224
180 | resnet50,rotnet_imagenet22k,stanford_dogs,11.398601398601398
181 | resnet50,swav_imagenet,stanford_dogs,18.496503496503497
182 | resnet50,faster-rcnn_fpn_137257794,stanford_dogs,48.27505827505828
183 | resnet50,faster-rcnn_fpn_137849458,stanford_dogs,39.4988344988345
184 | resnet50,faster-rcnn_c4_137257644,stanford_dogs,45.45454545454545
185 | resnet50,npid_imagenet,stanford_dogs,1.7599067599067602
186 | resnet101,retinanet_190397697,stanford_dogs,63.56643356643357
187 | resnet101,panoptic-rcnn_fpn_139514519,stanford_dogs,49.16083916083916
188 | resnet50,mask-rcnn_c4_137849525,stanford_dogs,33.869463869463864
189 | resnet101,mask-rcnn_fpn_138205316,stanford_dogs,55.46620046620047
190 | resnet101,faster-rcnn_c4_138204752,stanford_dogs,54.06759906759907
191 | resnet50,mask-rcnn_fpn_137849600,stanford_dogs,36.81818181818181
192 | resnet101,mask-rcnn_fpn_lvis_144219035,stanford_dogs,61.759906759906755
193 | resnet50,clusterfit_imagenet,stanford_dogs,47.66899766899767
194 | resnet50,panoptic-rcnn_fpn_139514569,stanford_dogs,29.242424242424242
195 | resnet101,panoptic-rcnn_fpn_139797668,stanford_dogs,8.7995337995338
196 | resnet50,panoptic-rcnn_fpn_139514544,stanford_dogs,38.72960372960373
197 | resnet50,mask-rcnn_c4_137259246,nabird,71.63561076604555
198 | resnet50,pirl_imagenet,nabird,71.99285511305972
199 | resnet50,jigsaw_imagenet22k,nabird,67.17817561807333
200 | resnet50,keypoint-rcnn_fpn_137261548,nabird,67.87236633783948
201 | resnet101,simclr_imagenet,nabird,-1.0
202 | resnet50,moco_imagenet,nabird,67.70998254374213
203 | resnet50,simclr_imagenet,nabird,63.41087159501482
204 | resnet50,semisup_yfcc100m,nabird,77.83867170056429
205 | resnet101,keypoint-rcnn_fpn_138363331,nabird,70.08484553241587
206 | resnet50,faster-rcnn_c4_voc_142202221,nabird,72.7601185401697
207 | resnet50,retinanet_190397773,nabird,72.46376811594203
208 | resnet50,semisup_instagram,nabird,76.75881946981691
209 | resnet50,mask-rcnn_fpn_cityscapes_142423278,nabird,68.87914586124305
210 | resnet50,mask-rcnn_fpn_lvis_144219072,nabird,70.8358705801161
211 | resnet50,mask-rcnn_fpn_137260431,nabird,71.1728169528681
212 | resnet101,faster-rcnn_fpn_137851257,nabird,72.97121747249624
213 | resnet50,supervised_places205,nabird,71.39203507489952
214 | resnet50,retinanet_190397829,nabird,71.40421385945682
215 | resnet101,mask-rcnn_c4_138363239,nabird,72.60991353062964
216 | resnet50,deepclusterv2_imagenet,nabird,70.08890512726829
217 | resnet50,keypoint-rcnn_fpn_137849621,nabird,67.21471197174522
218 | resnet50,faster-rcnn_c4_137849393,nabird,70.56387772500304
219 | resnet50,rotnet_imagenet22k,nabird,70.00365363536719
220 | resnet50,swav_imagenet,nabird,68.94815897373442
221 | resnet50,faster-rcnn_fpn_137257794,nabird,71.60313400722607
222 | resnet50,faster-rcnn_fpn_137849458,nabird,70.3893151463484
223 | resnet50,faster-rcnn_c4_137257644,nabird,71.42857142857143
224 | resnet50,npid_imagenet,nabird,59.78971298664393
225 | resnet101,retinanet_190397697,nabird,-1.0
226 | resnet101,panoptic-rcnn_fpn_139514519,nabird,-1.0
227 | resnet50,mask-rcnn_c4_137849525,nabird,70.50704339706897
228 | resnet101,mask-rcnn_fpn_138205316,nabird,-1.0
229 | resnet101,faster-rcnn_c4_138204752,nabird,73.34875979377257
230 | resnet50,mask-rcnn_fpn_137849600,nabird,70.05642836844883
231 | resnet101,mask-rcnn_fpn_lvis_144219035,nabird,74.02671213412901
232 | resnet50,clusterfit_imagenet,nabird,64.12536028904316
233 | resnet50,panoptic-rcnn_fpn_139514569,nabird,69.58145577071409
234 | resnet101,panoptic-rcnn_fpn_139797668,nabird,68.58279543701539
235 | resnet50,panoptic-rcnn_fpn_139514544,nabird,70.8845857183453
236 | resnet50,mask-rcnn_c4_137259246,voc2007,97.0074621124702
237 | resnet50,pirl_imagenet,voc2007,96.96418955304254
238 | resnet50,jigsaw_imagenet22k,voc2007,96.02469420724671
239 | resnet50,keypoint-rcnn_fpn_137261548,voc2007,95.04192630202323
240 | resnet101,simclr_imagenet,voc2007,-1.0
241 | resnet50,moco_imagenet,voc2007,94.2380183091007
242 | resnet50,simclr_imagenet,voc2007,93.9476113547196
243 | resnet50,semisup_yfcc100m,voc2007,97.97099776905915
244 | resnet101,keypoint-rcnn_fpn_138363331,voc2007,95.92372490191553
245 | resnet50,faster-rcnn_c4_voc_142202221,voc2007,97.1507423647973
246 | resnet50,retinanet_190397773,voc2007,97.23632587122086
247 | resnet50,semisup_instagram,voc2007,97.98061389337641
248 | resnet50,mask-rcnn_fpn_cityscapes_142423278,voc2007,96.58435264251096
249 | resnet50,mask-rcnn_fpn_lvis_144219072,voc2007,96.85168089853066
250 | resnet50,mask-rcnn_fpn_137260431,voc2007,97.11612431725517
251 | resnet101,faster-rcnn_fpn_137851257,voc2007,97.21420878529118
252 | resnet50,supervised_places205,voc2007,96.50453881067774
253 | resnet50,retinanet_190397829,voc2007,97.14689591507039
254 | resnet101,mask-rcnn_c4_138363239,voc2007,97.1920916993615
255 | resnet50,deepclusterv2_imagenet,voc2007,94.12647126702053
256 | resnet50,keypoint-rcnn_fpn_137849621,voc2007,95.30348488345257
257 | resnet50,faster-rcnn_c4_137849393,voc2007,96.98726825140396
258 | resnet50,rotnet_imagenet22k,voc2007,96.03334871913223
259 | resnet50,swav_imagenet,voc2007,94.13512577890607
260 | resnet50,faster-rcnn_fpn_137257794,voc2007,97.15458881452419
261 | resnet50,faster-rcnn_fpn_137849458,voc2007,97.10362335564274
262 | resnet50,faster-rcnn_c4_137257644,voc2007,97.064197245942
263 | resnet50,npid_imagenet,voc2007,93.52546349719209
264 | resnet101,retinanet_190397697,voc2007,97.28056004308023
265 | resnet101,panoptic-rcnn_fpn_139514519,voc2007,97.09208400646203
266 | resnet50,mask-rcnn_c4_137849525,voc2007,96.96515116547427
267 | resnet101,mask-rcnn_fpn_138205316,voc2007,97.22767135933533
268 | resnet101,faster-rcnn_c4_138204752,voc2007,97.1584352642511
269 | resnet50,mask-rcnn_fpn_137849600,voc2007,97.08054465728134
270 | resnet101,mask-rcnn_fpn_lvis_144219035,voc2007,97.11227786752828
271 | resnet50,clusterfit_imagenet,voc2007,96.21509346872837
272 | resnet50,panoptic-rcnn_fpn_139514569,voc2007,96.81513962612509
273 | resnet101,panoptic-rcnn_fpn_139797668,voc2007,94.88422186322025
274 | resnet50,panoptic-rcnn_fpn_139514544,voc2007,96.93726440495422
275 | 
276 | resnet50,nabird,cifar10,89.97
277 | resnet50,oxford_pets,cifar10,94.57
278 | resnet50,cub200,cifar10,93.08999999999999
279 | resnet50,caltech101,cifar10,94.42
280 | resnet50,stanford_dogs,cifar10,94.16
281 | resnet50,voc2007,cifar10,94.45
282 | resnet50,imagenet,cifar10,94.53
283 | resnet18,nabird,cifar10,94.35
284 | resnet18,oxford_pets,cifar10,95.58
285 | resnet18,cub200,cifar10,93.25
286 | resnet18,caltech101,cifar10,95.63000000000001
287 | resnet18,stanford_dogs,cifar10,95.41
288 | resnet18,voc2007,cifar10,95.65
289 | resnet18,imagenet,cifar10,95.86
290 | googlenet,nabird,cifar10,86.16
291 | googlenet,oxford_pets,cifar10,88.72
292 | googlenet,cub200,cifar10,88.47
293 | googlenet,caltech101,cifar10,87.63
294 | googlenet,stanford_dogs,cifar10,89.44
295 | googlenet,voc2007,cifar10,87.82
296 | googlenet,imagenet,cifar10,88.7
297 | alexnet,nabird,cifar10,92.31
298 | alexnet,oxford_pets,cifar10,93.05
299 | alexnet,cub200,cifar10,92.86
300 | alexnet,caltech101,cifar10,92.86999999999999
301 | alexnet,stanford_dogs,cifar10,92.61
302 | alexnet,voc2007,cifar10,92.75999999999999
303 | alexnet,imagenet,cifar10,92.89
304 | resnet50,nabird,oxford_pets,72.47206323248841
305 | resnet50,cub200,oxford_pets,85.74543472335786
306 | resnet50,caltech101,oxford_pets,88.8252929953666
307 | resnet50,stanford_dogs,oxford_pets,91.2782774597983
308 | resnet50,voc2007,oxford_pets,89.26137912237667
309 | resnet50,cifar10,oxford_pets,90.02452984464432
310 | resnet50,imagenet,oxford_pets,90.86944671572635
311 | resnet18,nabird,oxford_pets,65.0858544562551
312 | resnet18,cub200,oxford_pets,39.73834832379395
313 | resnet18,caltech101,oxford_pets,88.30744071954211
314 | resnet18,stanford_dogs,oxford_pets,89.17961297356229
315 | resnet18,voc2007,oxford_pets,88.25292995366584
316 | resnet18,cifar10,oxford_pets,87.08094848732625
317 | resnet18,imagenet,oxford_pets,88.68901608067593
318 | googlenet,nabird,oxford_pets,51.158353774870534
319 | googlenet,cub200,oxford_pets,68.87435268465522
320 | googlenet,caltech101,oxford_pets,69.96456800218043
321 | googlenet,stanford_dogs,oxford_pets,82.88361951485417
322 | googlenet,voc2007,oxford_pets,72.7173616789316
323 | googlenet,cifar10,oxford_pets,67.21177432542927
324 | googlenet,imagenet,oxford_pets,75.30662305805397
325 | alexnet,nabird,oxford_pets,76.45134914145544
326 | alexnet,cub200,oxford_pets,77.5960752248569
327 | alexnet,caltech101,oxford_pets,76.64213682202234
328 | alexnet,stanford_dogs,oxford_pets,79.09512128645407
329 | alexnet,voc2007,oxford_pets,75.98800763150723
330 | alexnet,cifar10,oxford_pets,76.69664758789861
331 | alexnet,imagenet,oxford_pets,78.33197056418642
332 | resnet50,nabird,cub200,84.74283741801864
333 | resnet50,oxford_pets,cub200,78.42595788746979
334 | resnet50,caltech101,cub200,78.20158784949949
335 | resnet50,stanford_dogs,cub200,78.9955125992406
336 | resnet50,voc2007,cub200,78.90921643079048
337 | resnet50,cifar10,cub200,79.47877114256127
338 | resnet50,imagenet,cub200,79.66862271315154
339 | resnet18,nabird,cub200,71.02174663444943
340 | resnet18,oxford_pets,cub200,69.19226786330687
341 | resnet18,caltech101,cub200,68.7780462547463
342 | resnet18,stanford_dogs,cub200,67.69071453227477
343 | resnet18,voc2007,cub200,68.58819468415602
344 | resnet18,cifar10,cub200,67.27649292371419
345 | resnet18,imagenet,cub200,68.98515705902658
346 | googlenet,nabird,cub200,70.45219192267864
347 | googlenet,oxford_pets,cub200,47.03141180531584
348 | googlenet,caltech101,cub200,44.75319295823266
349 | googlenet,stanford_dogs,cub200,45.391784604763544
350 | googlenet,voc2007,cub200,43.54504659993096
351 | googlenet,cifar10,cub200,45.32274767000345
352 | googlenet,imagenet,cub200,49.44770452191923
353 | alexnet,nabird,cub200,61.201242664825685
354 | alexnet,oxford_pets,cub200,53.84880911287539
355 | alexnet,caltech101,cub200,51.242664825681736
356 | alexnet,stanford_dogs,cub200,53.365550569554706
357 | alexnet,voc2007,cub200,53.55540214014498
358 | alexnet,cifar10,cub200,50.6385916465309
359 | alexnet,imagenet,cub200,53.98688298239558
360 | resnet50,nabird,caltech101,91.52091254752852
361 | resnet50,oxford_pets,caltech101,95.55133079847909
362 | resnet50,cub200,caltech101,94.52471482889734
363 | resnet50,stanford_dogs,caltech101,94.90494296577947
364 | resnet50,voc2007,caltech101,95.55133079847909
365 | resnet50,cifar10,caltech101,95.74144486692016
366 | resnet50,imagenet,caltech101,95.2851711026616
367 | resnet18,nabird,caltech101,61.634980988593156
368 | resnet18,oxford_pets,caltech101,87.41444866920152
369 | resnet18,cub200,caltech101,43.2319391634981
370 | resnet18,stanford_dogs,caltech101,83.8022813688213
371 | resnet18,voc2007,caltech101,87.18631178707224
372 | resnet18,cifar10,caltech101,82.69961977186313
373 | resnet18,imagenet,caltech101,87.68060836501901
374 | googlenet,nabird,caltech101,81.4448669201521
375 | googlenet,oxford_pets,caltech101,86.23574144486692
376 | googlenet,cub200,caltech101,84.03041825095056
377 | googlenet,stanford_dogs,caltech101,86.61596958174906
378 | googlenet,voc2007,caltech101,87.3384030418251
379 | googlenet,cifar10,caltech101,88.2509505703422
380 | googlenet,imagenet,caltech101,86.53992395437263
381 | alexnet,nabird,caltech101,80.6083650190114
382 | alexnet,oxford_pets,caltech101,85.39923954372624
383 | alexnet,cub200,caltech101,82.47148288973384
384 | alexnet,stanford_dogs,caltech101,83.65019011406845
385 | alexnet,voc2007,caltech101,85.05703422053233
386 | alexnet,cifar10,caltech101,83.57414448669202
387 | alexnet,imagenet,caltech101,85.55133079847909
388 | resnet50,nabird,stanford_dogs,58.84615384615385
389 | resnet50,oxford_pets,stanford_dogs,83.01864801864802
390 | resnet50,cub200,stanford_dogs,78.54312354312354
391 | resnet50,caltech101,stanford_dogs,80.94405594405595
392 | resnet50,voc2007,stanford_dogs,83.22843822843822
393 | resnet50,cifar10,stanford_dogs,83.14685314685315
394 | resnet50,imagenet,stanford_dogs,84.28904428904428
395 | resnet18,nabird,stanford_dogs,66.37529137529138
396 | resnet18,oxford_pets,stanford_dogs,79.07925407925408
397 | resnet18,cub200,stanford_dogs,59.044289044289044
398 | resnet18,caltech101,stanford_dogs,78.85780885780885
399 | resnet18,voc2007,stanford_dogs,79.8018648018648
400 | resnet18,cifar10,stanford_dogs,77.16783216783217
401 | resnet18,imagenet,stanford_dogs,79.74358974358975
402 | googlenet,nabird,stanford_dogs,56.433566433566426
403 | googlenet,oxford_pets,stanford_dogs,71.27039627039628
404 | googlenet,cub200,stanford_dogs,68.86946386946387
405 | googlenet,caltech101,stanford_dogs,70.1981351981352
406 | googlenet,voc2007,stanford_dogs,72.4009324009324
407 | googlenet,cifar10,stanford_dogs,70.5944055944056
408 | googlenet,imagenet,stanford_dogs,71.56177156177156
409 | alexnet,nabird,stanford_dogs,59.61538461538461
410 | alexnet,oxford_pets,stanford_dogs,61.81818181818181
411 | alexnet,cub200,stanford_dogs,60.641025641025635
412 | alexnet,caltech101,stanford_dogs,61.421911421911425
413 | alexnet,voc2007,stanford_dogs,61.7016317016317
414 | alexnet,cifar10,stanford_dogs,59.84848484848485
415 | alexnet,imagenet,stanford_dogs,62.074592074592076
416 | resnet50,oxford_pets,nabird,77.02675273007755
417 | resnet50,cub200,nabird,76.92932245361914
418 | resnet50,caltech101,nabird,76.57207810660496
419 | resnet50,stanford_dogs,nabird,76.92120326391426
420 | resnet50,voc2007,nabird,77.04299110948728
421 | resnet50,cifar10,nabird,77.12012341168352
422 | resnet50,imagenet,nabird,77.19319611902732
423 | resnet18,oxford_pets,nabird,69.89404457435148
424 | resnet18,cub200,nabird,67.7789956562335
425 | resnet18,caltech101,nabird,70.0523687735964
426 | resnet18,stanford_dogs,nabird,70.11732229123534
427 | resnet18,voc2007,nabird,70.28782527503755
428 | resnet18,cifar10,nabird,69.23639020825722
429 | resnet18,imagenet,nabird,70.01177282507206
430 | googlenet,oxford_pets,nabird,53.805870174156624
431 | googlenet,cub200,nabird,48.95465432549832
432 | googlenet,caltech101,nabird,54.808590102707754
433 | googlenet,stanford_dogs,nabird,52.66512402062274
434 | googlenet,voc2007,nabird,53.87488328664799
435 | googlenet,cifar10,nabird,47.97223237120936
436 | googlenet,imagenet,nabird,52.83156740957252
437 | alexnet,oxford_pets,nabird,46.27938131774449
438 | alexnet,cub200,nabird,47.38359111760646
439 | alexnet,caltech101,nabird,46.12105711849958
440 | alexnet,stanford_dogs,nabird,47.16437299557504
441 | alexnet,voc2007,nabird,46.68128120813543
442 | alexnet,cifar10,nabird,46.31185807656396
443 | alexnet,imagenet,nabird,46.470182275808874
444 | resnet50,nabird,voc2007,96.06988999153782
445 | resnet50,oxford_pets,voc2007,97.4680744672667
446 | resnet50,cub200,voc2007,97.17285945072697
447 | resnet50,caltech101,voc2007,97.29209939226095
448 | resnet50,stanford_dogs,voc2007,97.43153319486115
449 | resnet50,cifar10,voc2007,97.45653511808601
450 | resnet50,imagenet,voc2007,97.47192091699361
451 | resnet18,nabird,voc2007,94.1630125394261
452 | resnet18,oxford_pets,voc2007,96.62474036464343
453 | resnet18,cub200,voc2007,93.65624278790676
454 | resnet18,caltech101,voc2007,96.56415878144473
455 | resnet18,stanford_dogs,voc2007,96.170859296869
456 | resnet18,cifar10,voc2007,96.38049080698515
457 | resnet18,imagenet,voc2007,96.7439803061774
458 | googlenet,nabird,voc2007,94.78229094545735
459 | googlenet,oxford_pets,voc2007,95.97180552350181
460 | googlenet,cub200,voc2007,95.79775367335948
461 | googlenet,caltech101,voc2007,95.88718362950996
462 | googlenet,stanford_dogs,voc2007,95.78429109931534
463 | googlenet,cifar10,voc2007,96.21701669359182
464 | googlenet,imagenet,voc2007,95.95449649973075
465 | alexnet,nabird,voc2007,95.50350026925149
466 | alexnet,oxford_pets,voc2007,95.95449649973075
467 | alexnet,cub200,voc2007,95.77467497499808
468 | alexnet,caltech101,voc2007,95.92372490191553
469 | alexnet,stanford_dogs,voc2007,95.77659819986152
470 | alexnet,cifar10,voc2007,95.81217785983537
471 | alexnet,imagenet,voc2007,95.93622586352797
472 | 


--------------------------------------------------------------------------------
/oracles/controlled.csv:
--------------------------------------------------------------------------------
  1 | Architecture,Source Dataset,Target Dataset,Oracle
  2 | resnet50,nabird,cifar10,89.97
  3 | resnet50,oxford_pets,cifar10,94.57
  4 | resnet50,cub200,cifar10,93.08999999999999
  5 | resnet50,caltech101,cifar10,94.42
  6 | resnet50,stanford_dogs,cifar10,94.16
  7 | resnet50,voc2007,cifar10,94.45
  8 | resnet50,imagenet,cifar10,94.53
  9 | resnet18,nabird,cifar10,94.35
 10 | resnet18,oxford_pets,cifar10,95.58
 11 | resnet18,cub200,cifar10,93.25
 12 | resnet18,caltech101,cifar10,95.63000000000001
 13 | resnet18,stanford_dogs,cifar10,95.41
 14 | resnet18,voc2007,cifar10,95.65
 15 | resnet18,imagenet,cifar10,95.86
 16 | googlenet,nabird,cifar10,86.16
 17 | googlenet,oxford_pets,cifar10,88.72
 18 | googlenet,cub200,cifar10,88.47
 19 | googlenet,caltech101,cifar10,87.63
 20 | googlenet,stanford_dogs,cifar10,89.44
 21 | googlenet,voc2007,cifar10,87.82
 22 | googlenet,imagenet,cifar10,88.7
 23 | alexnet,nabird,cifar10,92.31
 24 | alexnet,oxford_pets,cifar10,93.05
 25 | alexnet,cub200,cifar10,92.86
 26 | alexnet,caltech101,cifar10,92.86999999999999
 27 | alexnet,stanford_dogs,cifar10,92.61
 28 | alexnet,voc2007,cifar10,92.75999999999999
 29 | alexnet,imagenet,cifar10,92.89
 30 | resnet50,nabird,oxford_pets,72.47206323248841
 31 | resnet50,cub200,oxford_pets,85.74543472335786
 32 | resnet50,caltech101,oxford_pets,88.8252929953666
 33 | resnet50,stanford_dogs,oxford_pets,91.2782774597983
 34 | resnet50,voc2007,oxford_pets,89.26137912237667
 35 | resnet50,cifar10,oxford_pets,90.02452984464432
 36 | resnet50,imagenet,oxford_pets,90.86944671572635
 37 | resnet18,nabird,oxford_pets,65.0858544562551
 38 | resnet18,cub200,oxford_pets,39.73834832379395
 39 | resnet18,caltech101,oxford_pets,88.30744071954211
 40 | resnet18,stanford_dogs,oxford_pets,89.17961297356229
 41 | resnet18,voc2007,oxford_pets,88.25292995366584
 42 | resnet18,cifar10,oxford_pets,87.08094848732625
 43 | resnet18,imagenet,oxford_pets,88.68901608067593
 44 | googlenet,nabird,oxford_pets,51.158353774870534
 45 | googlenet,cub200,oxford_pets,68.87435268465522
 46 | googlenet,caltech101,oxford_pets,69.96456800218043
 47 | googlenet,stanford_dogs,oxford_pets,82.88361951485417
 48 | googlenet,voc2007,oxford_pets,72.7173616789316
 49 | googlenet,cifar10,oxford_pets,67.21177432542927
 50 | googlenet,imagenet,oxford_pets,75.30662305805397
 51 | alexnet,nabird,oxford_pets,76.45134914145544
 52 | alexnet,cub200,oxford_pets,77.5960752248569
 53 | alexnet,caltech101,oxford_pets,76.64213682202234
 54 | alexnet,stanford_dogs,oxford_pets,79.09512128645407
 55 | alexnet,voc2007,oxford_pets,75.98800763150723
 56 | alexnet,cifar10,oxford_pets,76.69664758789861
 57 | alexnet,imagenet,oxford_pets,78.33197056418642
 58 | resnet50,nabird,cub200,84.74283741801864
 59 | resnet50,oxford_pets,cub200,78.42595788746979
 60 | resnet50,caltech101,cub200,78.20158784949949
 61 | resnet50,stanford_dogs,cub200,78.9955125992406
 62 | resnet50,voc2007,cub200,78.90921643079048
 63 | resnet50,cifar10,cub200,79.47877114256127
 64 | resnet50,imagenet,cub200,79.66862271315154
 65 | resnet18,nabird,cub200,71.02174663444943
 66 | resnet18,oxford_pets,cub200,69.19226786330687
 67 | resnet18,caltech101,cub200,68.7780462547463
 68 | resnet18,stanford_dogs,cub200,67.69071453227477
 69 | resnet18,voc2007,cub200,68.58819468415602
 70 | resnet18,cifar10,cub200,67.27649292371419
 71 | resnet18,imagenet,cub200,68.98515705902658
 72 | googlenet,nabird,cub200,70.45219192267864
 73 | googlenet,oxford_pets,cub200,47.03141180531584
 74 | googlenet,caltech101,cub200,44.75319295823266
 75 | googlenet,stanford_dogs,cub200,45.391784604763544
 76 | googlenet,voc2007,cub200,43.54504659993096
 77 | googlenet,cifar10,cub200,45.32274767000345
 78 | googlenet,imagenet,cub200,49.44770452191923
 79 | alexnet,nabird,cub200,61.201242664825685
 80 | alexnet,oxford_pets,cub200,53.84880911287539
 81 | alexnet,caltech101,cub200,51.242664825681736
 82 | alexnet,stanford_dogs,cub200,53.365550569554706
 83 | alexnet,voc2007,cub200,53.55540214014498
 84 | alexnet,cifar10,cub200,50.6385916465309
 85 | alexnet,imagenet,cub200,53.98688298239558
 86 | resnet50,nabird,caltech101,91.52091254752852
 87 | resnet50,oxford_pets,caltech101,95.55133079847909
 88 | resnet50,cub200,caltech101,94.52471482889734
 89 | resnet50,stanford_dogs,caltech101,94.90494296577947
 90 | resnet50,voc2007,caltech101,95.55133079847909
 91 | resnet50,cifar10,caltech101,95.74144486692016
 92 | resnet50,imagenet,caltech101,95.2851711026616
 93 | resnet18,nabird,caltech101,61.634980988593156
 94 | resnet18,oxford_pets,caltech101,87.41444866920152
 95 | resnet18,cub200,caltech101,43.2319391634981
 96 | resnet18,stanford_dogs,caltech101,83.8022813688213
 97 | resnet18,voc2007,caltech101,87.18631178707224
 98 | resnet18,cifar10,caltech101,82.69961977186313
 99 | resnet18,imagenet,caltech101,87.68060836501901
100 | googlenet,nabird,caltech101,81.4448669201521
101 | googlenet,oxford_pets,caltech101,86.23574144486692
102 | googlenet,cub200,caltech101,84.03041825095056
103 | googlenet,stanford_dogs,caltech101,86.61596958174906
104 | googlenet,voc2007,caltech101,87.3384030418251
105 | googlenet,cifar10,caltech101,88.2509505703422
106 | googlenet,imagenet,caltech101,86.53992395437263
107 | alexnet,nabird,caltech101,80.6083650190114
108 | alexnet,oxford_pets,caltech101,85.39923954372624
109 | alexnet,cub200,caltech101,82.47148288973384
110 | alexnet,stanford_dogs,caltech101,83.65019011406845
111 | alexnet,voc2007,caltech101,85.05703422053233
112 | alexnet,cifar10,caltech101,83.57414448669202
113 | alexnet,imagenet,caltech101,85.55133079847909
114 | resnet50,nabird,stanford_dogs,58.84615384615385
115 | resnet50,oxford_pets,stanford_dogs,83.01864801864802
116 | resnet50,cub200,stanford_dogs,78.54312354312354
117 | resnet50,caltech101,stanford_dogs,80.94405594405595
118 | resnet50,voc2007,stanford_dogs,83.22843822843822
119 | resnet50,cifar10,stanford_dogs,83.14685314685315
120 | resnet50,imagenet,stanford_dogs,84.28904428904428
121 | resnet18,nabird,stanford_dogs,66.37529137529138
122 | resnet18,oxford_pets,stanford_dogs,79.07925407925408
123 | resnet18,cub200,stanford_dogs,59.044289044289044
124 | resnet18,caltech101,stanford_dogs,78.85780885780885
125 | resnet18,voc2007,stanford_dogs,79.8018648018648
126 | resnet18,cifar10,stanford_dogs,77.16783216783217
127 | resnet18,imagenet,stanford_dogs,79.74358974358975
128 | googlenet,nabird,stanford_dogs,56.433566433566426
129 | googlenet,oxford_pets,stanford_dogs,71.27039627039628
130 | googlenet,cub200,stanford_dogs,68.86946386946387
131 | googlenet,caltech101,stanford_dogs,70.1981351981352
132 | googlenet,voc2007,stanford_dogs,72.4009324009324
133 | googlenet,cifar10,stanford_dogs,70.5944055944056
134 | googlenet,imagenet,stanford_dogs,71.56177156177156
135 | alexnet,nabird,stanford_dogs,59.61538461538461
136 | alexnet,oxford_pets,stanford_dogs,61.81818181818181
137 | alexnet,cub200,stanford_dogs,60.641025641025635
138 | alexnet,caltech101,stanford_dogs,61.421911421911425
139 | alexnet,voc2007,stanford_dogs,61.7016317016317
140 | alexnet,cifar10,stanford_dogs,59.84848484848485
141 | alexnet,imagenet,stanford_dogs,62.074592074592076
142 | resnet50,oxford_pets,nabird,77.02675273007755
143 | resnet50,cub200,nabird,76.92932245361914
144 | resnet50,caltech101,nabird,76.57207810660496
145 | resnet50,stanford_dogs,nabird,76.92120326391426
146 | resnet50,voc2007,nabird,77.04299110948728
147 | resnet50,cifar10,nabird,77.12012341168352
148 | resnet50,imagenet,nabird,77.19319611902732
149 | resnet18,oxford_pets,nabird,69.89404457435148
150 | resnet18,cub200,nabird,67.7789956562335
151 | resnet18,caltech101,nabird,70.0523687735964
152 | resnet18,stanford_dogs,nabird,70.11732229123534
153 | resnet18,voc2007,nabird,70.28782527503755
154 | resnet18,cifar10,nabird,69.23639020825722
155 | resnet18,imagenet,nabird,70.01177282507206
156 | googlenet,oxford_pets,nabird,53.805870174156624
157 | googlenet,cub200,nabird,48.95465432549832
158 | googlenet,caltech101,nabird,54.808590102707754
159 | googlenet,stanford_dogs,nabird,52.66512402062274
160 | googlenet,voc2007,nabird,53.87488328664799
161 | googlenet,cifar10,nabird,47.97223237120936
162 | googlenet,imagenet,nabird,52.83156740957252
163 | alexnet,oxford_pets,nabird,46.27938131774449
164 | alexnet,cub200,nabird,47.38359111760646
165 | alexnet,caltech101,nabird,46.12105711849958
166 | alexnet,stanford_dogs,nabird,47.16437299557504
167 | alexnet,voc2007,nabird,46.68128120813543
168 | alexnet,cifar10,nabird,46.31185807656396
169 | alexnet,imagenet,nabird,46.470182275808874
170 | resnet50,nabird,voc2007,96.06988999153782
171 | resnet50,oxford_pets,voc2007,97.4680744672667
172 | resnet50,cub200,voc2007,97.17285945072697
173 | resnet50,caltech101,voc2007,97.29209939226095
174 | resnet50,stanford_dogs,voc2007,97.43153319486115
175 | resnet50,cifar10,voc2007,97.45653511808601
176 | resnet50,imagenet,voc2007,97.47192091699361
177 | resnet18,nabird,voc2007,94.1630125394261
178 | resnet18,oxford_pets,voc2007,96.62474036464343
179 | resnet18,cub200,voc2007,93.65624278790676
180 | resnet18,caltech101,voc2007,96.56415878144473
181 | resnet18,stanford_dogs,voc2007,96.170859296869
182 | resnet18,cifar10,voc2007,96.38049080698515
183 | resnet18,imagenet,voc2007,96.7439803061774
184 | googlenet,nabird,voc2007,94.78229094545735
185 | googlenet,oxford_pets,voc2007,95.97180552350181
186 | googlenet,cub200,voc2007,95.79775367335948
187 | googlenet,caltech101,voc2007,95.88718362950996
188 | googlenet,stanford_dogs,voc2007,95.78429109931534
189 | googlenet,cifar10,voc2007,96.21701669359182
190 | googlenet,imagenet,voc2007,95.95449649973075
191 | alexnet,nabird,voc2007,95.50350026925149
192 | alexnet,oxford_pets,voc2007,95.95449649973075
193 | alexnet,cub200,voc2007,95.77467497499808
194 | alexnet,caltech101,voc2007,95.92372490191553
195 | alexnet,stanford_dogs,voc2007,95.77659819986152
196 | alexnet,cifar10,voc2007,95.81217785983537
197 | alexnet,imagenet,voc2007,95.93622586352797
198 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | sklearn
2 | numpy
3 | torch
4 | torchvision
5 | tqdm
6 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torchvision
  3 | 
  4 | import random
  5 | import numpy as np
  6 | 
  7 | import os
  8 | import csv
  9 | 
 10 | import constants
 11 | 
 12 | def get_source_model_path(architecture:str, source_dataset:str) -> str:
 13 | 	return f'./models/{architecture}/{architecture}_{source_dataset}.pth'
 14 | 
 15 | def get_transfer_model_path(architecture:str, source_dataset:str, target_dataset:str) -> str:
 16 | 	return f'./models/{architecture}/{architecture}_{target_dataset}_from_{source_dataset}.pth'
 17 | 
 18 | def load_source_model(architecture:str, source_dataset:str) -> torch.nn.DataParallel:
 19 | 	kwdargs = {}
 20 | 	if architecture == 'googlenet':
 21 | 		kwdargs = {'aux_logits': False, 'init_weights': False}
 22 | 
 23 | 	if source_dataset in constants.num_classes:
 24 | 		num_classes = constants.num_classes[source_dataset]
 25 | 	else:
 26 | 		num_classes = 1000
 27 | 
 28 | 	net = torch.nn.DataParallel(getattr(torchvision.models, architecture)(pretrained=False, num_classes=num_classes, **kwdargs))
 29 | 	net.load_state_dict(torch.load(get_source_model_path(architecture, source_dataset)))
 30 | 	return net
 31 | 
 32 | def load_transfer_model(architecture:str, source_dataset:str, target_dataset:str) -> torch.nn.DataParallel:
 33 | 	kwdargs = {}
 34 | 	if architecture == 'googlenet':
 35 | 		kwdargs = {'aux_logits': False, 'init_weights': False}
 36 | 
 37 | 	net = torch.nn.DataParallel(getattr(torchvision.models, architecture)(pretrained=False, num_classes=constants.num_classes[target_dataset], **kwdargs))
 38 | 	net.load_state_dict(torch.load(get_transfer_model_path(architecture, source_dataset, target_dataset)))
 39 | 	return net
 40 | 
 41 | 
 42 | def seed_all(seed:int):
 43 | 	random.seed(seed)
 44 | 	torch.manual_seed(seed)
 45 | 	torch.cuda.manual_seed(seed)
 46 | 	np.random.seed(seed)
 47 | 
 48 | def make_dirs(path: str):
 49 | 	""" Why is this not how the standard library works? """
 50 | 	path = os.path.split(path)[0]
 51 | 	if path != "":
 52 | 		if not os.path.exists(path):
 53 | 			os.makedirs(path, exist_ok=True)
 54 | 
 55 | 
 56 | 
 57 | 
 58 | class CSVCache:
 59 | 	def __init__(self, path:str, header:list, key:list, append:bool=True):
 60 | 		self.path = path
 61 | 		self.cache = {}
 62 | 		self.key_fmt = [header.index(k) for k in key]
 63 | 		self.header = header
 64 | 		self.header_idx = {k: idx for idx, k in enumerate(header)}
 65 | 
 66 | 		if not append and os.path.exists(path):
 67 | 			os.remove(path)
 68 | 		
 69 | 
 70 | 		if os.path.exists(path):
 71 | 			with open(path, 'r') as f:
 72 | 				reader = csv.reader(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
 73 | 
 74 | 				for idx, row in enumerate(reader):
 75 | 					if idx > 0 and len(row) > 0:
 76 | 						self.add_to_cache(row)
 77 | 
 78 | 		else:
 79 | 			make_dirs(path)
 80 | 			self.write_row(header)
 81 | 
 82 | 	def add_to_cache(self, row:list):
 83 | 		key = tuple([row[i] for i in self.key_fmt])
 84 | 		self.cache[key] = row
 85 | 
 86 | 	def write_row(self, row:list):
 87 | 		with open(self.path, 'a') as f:
 88 | 			writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
 89 | 			writer.writerow(row)
 90 | 			self.add_to_cache(row)
 91 | 
 92 | 	def exists(self, *args):
 93 | 		return tuple([str(x) for x in args]) in self.cache
 94 | 
 95 | 	def rewrite(self, path:str=None):
 96 | 		""" Saves the current cache over the source file. """
 97 | 		if path is None:
 98 | 			path = self.path
 99 | 
100 | 		with open(path, 'w') as f:
101 | 			writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
102 | 			writer.writerow(self.header)
103 | 
104 | 			for k, row in self.cache.items():
105 | 				writer.writerow(row)
106 | 


--------------------------------------------------------------------------------