├── CLIP_based
    ├── OOD
    │   ├── 10_val.txt
    │   ├── CLIP
    │   │   ├── CLIP_data.py
    │   │   ├── CLIP_ft.py
    │   │   ├── clip_feature_dataset.py
    │   │   ├── clip_feature_file.py
    │   │   ├── image_folder.py
    │   │   └── logitnorm_loss.py
    │   ├── KNN.py
    │   ├── __init__.py
    │   ├── calculate_log.py
    │   ├── data.py
    │   ├── feature_extraction_im100.py
    │   ├── feature_extraction_im1k.py
    │   ├── gram_matrics.py
    │   ├── test_npos.py
    │   ├── test_npos_imagenet_100.sh
    │   ├── test_npos_imagenet_1k.sh
    │   ├── train_npos.py
    │   ├── train_npos_imagenet_100.sh
    │   ├── train_npos_imagenet_1k.sh
    │   └── utils
    │   │   ├── log.py
    │   │   ├── mahalanobis_lib.py
    │   │   └── test_utils.py
    └── utils
    │   ├── __pycache__
    │       ├── display_results.cpython-37.pyc
    │       ├── score_calculation.cpython-37.pyc
    │       └── validation_dataset.cpython-37.pyc
    │   ├── calibration_tools.py
    │   ├── cifar_resnet.py
    │   ├── display_results.py
    │   ├── lsun_loader.py
    │   ├── score_calculation.py
    │   ├── svhn_loader.py
    │   ├── tiny_resnet.py
    │   ├── tinyimages_80mn_loader.py
    │   └── validation_dataset.py
├── README.md
├── npos.png
└── training_from_scratch
    ├── KNN.py
    ├── __pycache__
        ├── KNN.cpython-37.pyc
        ├── cifar.cpython-37.pyc
        ├── image_folder.cpython-37.pyc
        ├── losses.cpython-37.pyc
        └── util.cpython-37.pyc
    ├── cifar.py
    ├── compute_metrics.py
    ├── eval_ood_detection.py
    ├── eval_ood_detection_in100.py
    ├── evaluation
        ├── __init__.py
        ├── __pycache__
        │   ├── __init__.cpython-37.pyc
        │   ├── cal_metric.cpython-37.pyc
        │   ├── display_results.cpython-37.pyc
        │   ├── eval_utils.cpython-37.pyc
        │   ├── imagenet_loader.cpython-37.pyc
        │   └── svhn_loader.cpython-37.pyc
        ├── cal_metric.py
        ├── display_results.py
        ├── eval_utils.py
        ├── image_folder.py
        ├── imagenet_loader.py
        └── svhn_loader.py
    ├── image_folder.py
    ├── losses.py
    ├── models
        ├── __init__.py
        ├── __pycache__
        │   ├── __init__.cpython-37.pyc
        │   ├── densenet.cpython-37.pyc
        │   ├── resnet.cpython-37.pyc
        │   ├── resnet_im100.cpython-37.pyc
        │   ├── resnet_outliers.cpython-37.pyc
        │   └── wrn.cpython-37.pyc
        ├── densenet.py
        ├── fine_tuning_layer.py
        ├── gmm.py
        ├── layers.py
        ├── resnet.py
        ├── resnet_im100.py
        ├── resnet_outliers.py
        └── wrn.py
    ├── scripts
        ├── test_npos_imagenet_100.sh
        ├── test_npos_imagenet_1k.sh
        ├── train_npos_imagenet_100.sh
        └── train_npos_imagenet_1k.sh
    ├── test_npos_cifar10.sh
    ├── test_npos_cifar100.sh
    ├── test_npos_imagenet100.sh
    ├── train_CIFAR10.py
    ├── train_CIFAR100.py
    ├── train_im100.py
    ├── train_imagenet.py
    ├── train_npos_cifar10.sh
    ├── train_npos_cifar100.sh
    ├── train_npos_imagenet100.sh
    ├── util.py
    └── utils
        ├── __init__.py
        ├── anom_utils.py
        ├── cal_metric.py
        ├── display_results.py
        ├── file_copy.py
        ├── imagenet_loader.py
        ├── losses.py
        ├── mahalanobis_lib.py
        ├── mahalanobis_lib_clf.py
        ├── score_calculation.py
        ├── score_calculation_eccv.py
        ├── svhn_loader.py
        ├── tinyimages_80mn_loader.py
        ├── transform.py
        └── vmf_lib.py


/CLIP_based/OOD/CLIP/clip_feature_dataset.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import Dataset
 3 | 
 4 | 
 5 | class clip_feature(Dataset):
 6 |     def __init__(self, path='/afs/cs.wisc.edu/u/t/a/taoleitian/private/code/dataset/ImageNet-100/'):
 7 |         super().__init__()
 8 |         self.features = torch.load(path+'feature.pt')
 9 |         self.targets = torch.load(path+'target.pt')
10 | 
11 |     def __len__(self):
12 |         return len(self.features)
13 | 
14 |     def __getitem__(self, idx):
15 |         return self.features[idx], self.targets[idx]


--------------------------------------------------------------------------------
/CLIP_based/OOD/CLIP/clip_feature_file.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import Dataset
 3 | 
 4 | 
 5 | class clip_feature_file_dataset(Dataset):
 6 |     def __init__(self, path='/afs/cs.wisc.edu/u/t/a/taoleitian/private/code/dataset/ImageNet-100/'):
 7 |         super().__init__()
 8 |         file = open(path, 'r')
 9 |         path_list = file.read()
10 |         self.path_list = eval(path_list)
11 |         self.target_list = [torch.tensor(int(i.split('/')[-2])).long() for i in self.path_list]
12 |         #for i in self.target_list:
13 |             #print(i)
14 | 
15 | 
16 |     def __len__(self):
17 |         return len(self.target_list)
18 | 
19 |     def __getitem__(self, idx):
20 |         feature = torch.load(self.path_list[idx]).squeeze()
21 |         return feature, self.target_list[idx]


--------------------------------------------------------------------------------
/CLIP_based/OOD/CLIP/image_folder.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from typing import Any, Callable, cast, Dict, List, Optional, Tuple
 3 | from torchvision.datasets import DatasetFolder
 4 | from torchvision.datasets.folder import default_loader, IMG_EXTENSIONS
 5 | 
 6 | 
 7 | class ImageSubfolder(DatasetFolder):
 8 |     """Extend ImageFolder to support fold subsets
 9 |     This class inherits from :class:`~torchvision.datasets.DatasetFolder` so
10 |     the same methods can be overridden to customize the dataset.
11 |     Args:
12 |         root (string): Root directory path.
13 |         transform (callable, optional): A function/transform that  takes in an PIL image
14 |             and returns a transformed version. E.g, ``transforms.RandomCrop``
15 |         target_transform (callable, optional): A function/transform that takes in the
16 |             target and transforms it.
17 |         loader (callable, optional): A function to load an image given its path.
18 |         is_valid_file (callable, optional): A function that takes path of an Image file
19 |             and check if the file is a valid file (used to check of corrupt files)
20 |         class_to_idx (dict): Dict with items (class_name, class_index).
21 |      Attributes:
22 |         classes (list): List of the class names sorted alphabetically.
23 |         class_to_idx (dict): Dict with items (class_name, class_index).
24 |         imgs (list): List of (image path, class_index) tuples
25 |     """
26 | 
27 |     def __init__(
28 |         self,
29 |         root: str,
30 |         transform: Optional[Callable] = None,
31 |         target_transform: Optional[Callable] = None,
32 |         loader: Callable[[str], Any] = default_loader,
33 |         is_valid_file: Optional[Callable[[str], bool]] = None,
34 |         class_to_idx: Optional[Dict] = None,
35 |     ):
36 |         super(DatasetFolder, self).__init__(root, transform=transform, target_transform=target_transform)
37 |         if class_to_idx is not None:
38 |             classes = class_to_idx.keys()
39 |         else:
40 |             classes, class_to_idx = self.find_classes(self.root)
41 |         extensions = IMG_EXTENSIONS if is_valid_file is None else None,
42 |         samples = self.make_dataset(self.root, class_to_idx, extensions[0], is_valid_file)
43 | 
44 |         self.loader = loader
45 |         self.extensions = extensions
46 | 
47 |         self.classes = classes
48 |         self.class_to_idx = class_to_idx
49 |         self.samples = samples
50 |         self.targets = [s[1] for s in samples]
51 |         self.imgs = self.samples


--------------------------------------------------------------------------------
/CLIP_based/OOD/CLIP/logitnorm_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | import torch.nn.functional as F
 4 | 
 5 | class LogitNormLoss(nn.Module):
 6 | 
 7 |     def __init__(self, device='cuda', t=8.):
 8 |         super(LogitNormLoss, self).__init__()
 9 |         self.device = device
10 |         self.t = t
11 | 
12 |     def forward(self, x, target):
13 |         norms = torch.norm(x, p=2, dim=-1, keepdim=True) + 1e-7
14 |         logit_norm = torch.div(x, norms) / self.t
15 |         return F.cross_entropy(logit_norm, target)


--------------------------------------------------------------------------------
/CLIP_based/OOD/KNN.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import faiss
  4 | import umap
  5 | import time
  6 | #import matplotlib.pyplot as plt
  7 | import faiss.contrib.torch_utils
  8 | from sklearn import manifold, datasets
  9 | from torch.distributions import MultivariateNormal
 10 | import torch.nn.functional as F
 11 | 
 12 | def KNN_dis_search_decrease(target, index, K=50, select=1,):
 13 |     '''
 14 |     data_point: Queue for searching k-th points
 15 |     target: the target of the search
 16 |     K
 17 |     '''
 18 |     #Normalize the features
 19 | 
 20 |     target_norm = torch.norm(target, p=2, dim=1,  keepdim=True)
 21 |     normed_target = target / target_norm
 22 |     #start_time = time.time()
 23 | 
 24 |     distance, output_index = index.search(normed_target, K)
 25 |     k_th_distance = distance[:, -1]
 26 |     #k_th_output_index = output_index[:, -1]
 27 |     k_th_distance, minD_idx = torch.topk(k_th_distance, select)
 28 |     #k_th_index = k_th_output_index[minD_idx]
 29 |     return minD_idx, k_th_distance
 30 | 
 31 | def KNN_dis_search_distance(target, index, K=50, num_points=10, length=2000,depth=342):
 32 |     '''
 33 |     data_point: Queue for searching k-th points
 34 |     target: the target of the search
 35 |     K
 36 |     '''
 37 |     #Normalize the features
 38 | 
 39 |     target_norm = torch.norm(target, p=2, dim=1,  keepdim=True)
 40 |     normed_target = target / target_norm
 41 |     #start_time = time.time()
 42 | 
 43 |     distance, output_index = index.search(normed_target, K)
 44 |     k_th_distance = distance[:, -1]
 45 |     k_th = k_th_distance.view(length, -1)
 46 |     target_new = target.view(length, -1, depth)
 47 |     #k_th_output_index = output_index[:, -1]
 48 |     k_th_distance, minD_idx = torch.topk(k_th, num_points, dim=0)
 49 |     minD_idx = minD_idx.squeeze()
 50 |     point_list = []
 51 |     for i in range(minD_idx.shape[1]):
 52 |         point_list.append(i*length + minD_idx[:,i])
 53 |     #return torch.cat(point_list, dim=0)
 54 |     return target[torch.cat(point_list)]
 55 | 
 56 | def generate_outliers(ID, input_index, negative_samples, ID_points_num=2, K=20, select=1, cov_mat=0.1, sampling_ratio=1.0, pic_nums=30, depth=342):
 57 |     length = negative_samples.shape[0]
 58 |     data_norm = torch.norm(ID, p=2, dim=1, keepdim=True)
 59 |     normed_data = ID / data_norm
 60 |     rand_ind = np.random.choice(normed_data.shape[0], int(normed_data.shape[0] * sampling_ratio), replace=False)
 61 |     index = input_index
 62 |     index.add(normed_data[rand_ind])
 63 |     minD_idx, k_th = KNN_dis_search_decrease(ID, index, K, select)
 64 |     minD_idx = minD_idx[np.random.choice(select, int(pic_nums), replace=False)]
 65 |     data_point_list = torch.cat([ID[i:i+1].repeat(length,1) for i in minD_idx])
 66 |     #negative_sample_cov = (torch.mm(negative_samples.cuda(), cov)*cov_mat).repeat(pic_nums,1)
 67 |     negative_sample_cov = cov_mat*negative_samples.cuda().repeat(pic_nums,1)
 68 |     #negative_sample_cov = (negative_samples.cuda()*cov_mat).repeat(select,1)
 69 |     negative_sample_list = negative_sample_cov + data_point_list
 70 |     point = KNN_dis_search_distance(negative_sample_list, index, K, ID_points_num, length,depth)
 71 | 
 72 |     index.reset()
 73 | 
 74 |     #return ID[minD_idx]
 75 |     return point
 76 | 
 77 | def generate_outliers_OOD(ID, input_index, negative_samples, K=100, select=100, sampling_ratio=1.0):
 78 |     data_norm = torch.norm(ID, p=2, dim=1, keepdim=True)
 79 |     normed_data = ID / data_norm
 80 |     rand_ind = np.random.choice(normed_data.shape[1], int(normed_data.shape[1] * sampling_ratio), replace=False)
 81 |     index = input_index
 82 |     index.add(normed_data[rand_ind])
 83 |     minD_idx, k_th = KNN_dis_search_decrease(negative_samples, index, K, select)
 84 | 
 85 |     return negative_samples[minD_idx]
 86 | 
 87 | 
 88 | 
 89 | def generate_outliers_rand(ID, input_index,
 90 |                            negative_samples, ID_points_num=2, K=20, select=1,
 91 |                            cov_mat=0.1, sampling_ratio=1.0, pic_nums=10,
 92 |                            repeat_times=30, depth=342):
 93 |     length = negative_samples.shape[0]
 94 |     data_norm = torch.norm(ID, p=2, dim=1, keepdim=True)
 95 |     normed_data = ID / data_norm
 96 |     rand_ind = np.random.choice(normed_data.shape[1], int(normed_data.shape[1] * sampling_ratio), replace=False)
 97 |     index = input_index
 98 |     index.add(normed_data[rand_ind])
 99 |     minD_idx, k_th = KNN_dis_search_decrease(ID, index, K, select)
100 |     ID_boundary = ID[minD_idx]
101 |     negative_sample_list = []
102 |     for i in range(repeat_times):
103 |         select_idx = np.random.choice(select, int(pic_nums), replace=False)
104 |         sample_list = ID_boundary[select_idx]
105 |         mean = sample_list.mean(0)
106 |         var = torch.cov(sample_list.T)
107 |         var = torch.mm(negative_samples, var)
108 |         trans_samples = mean + var
109 |         negative_sample_list.append(trans_samples)
110 |     negative_sample_list = torch.cat(negative_sample_list, dim=0)
111 |     point = KNN_dis_search_distance(negative_sample_list, index, K, ID_points_num, length,depth)
112 | 
113 |     index.reset()
114 | 
115 |     #return ID[minD_idx]
116 |     return point
117 | 
118 | 


--------------------------------------------------------------------------------
/CLIP_based/OOD/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/npos/583c06db0876c3d1c4e5a9a5371cc3a5cb916255/CLIP_based/OOD/__init__.py


--------------------------------------------------------------------------------
/CLIP_based/OOD/calculate_log.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import torch
  3 | from torch.autograd import Variable
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | import numpy as np
  7 | import torch.optim as optim
  8 | import torchvision
  9 | import torchvision.transforms as transforms
 10 | import numpy as np
 11 | import time
 12 | from scipy import misc
 13 | 
 14 | import matplotlib
 15 | # matplotlib.use('Agg')
 16 | import matplotlib.pyplot as plt
 17 | 
 18 | 
 19 | def compute_metric(known, novel):
 20 |     stype = ""
 21 | 
 22 |     tp, fp = dict(), dict()
 23 |     tnr_at_tpr95 = dict()
 24 | 
 25 |     known.sort()
 26 |     novel.sort()
 27 |     end = np.max([np.max(known), np.max(novel)])
 28 |     start = np.min([np.min(known), np.min(novel)])
 29 |     num_k = known.shape[0]
 30 |     num_n = novel.shape[0]
 31 |     tp[stype] = -np.ones([num_k + num_n + 1], dtype=int)
 32 |     fp[stype] = -np.ones([num_k + num_n + 1], dtype=int)
 33 |     tp[stype][0], fp[stype][0] = num_k, num_n
 34 |     k, n = 0, 0
 35 |     for l in range(num_k + num_n):
 36 |         if k == num_k:
 37 |             tp[stype][l + 1:] = tp[stype][l]
 38 |             fp[stype][l + 1:] = np.arange(fp[stype][l] - 1, -1, -1)
 39 |             break
 40 |         elif n == num_n:
 41 |             tp[stype][l + 1:] = np.arange(tp[stype][l] - 1, -1, -1)
 42 |             fp[stype][l + 1:] = fp[stype][l]
 43 |             break
 44 |         else:
 45 |             if novel[n] < known[k]:
 46 |                 n += 1
 47 |                 tp[stype][l + 1] = tp[stype][l]
 48 |                 fp[stype][l + 1] = fp[stype][l] - 1
 49 |             else:
 50 |                 k += 1
 51 |                 tp[stype][l + 1] = tp[stype][l] - 1
 52 |                 fp[stype][l + 1] = fp[stype][l]
 53 |     tpr95_pos = np.abs(tp[stype] / num_k - .95).argmin()
 54 |     tnr_at_tpr95[stype] = 1. - fp[stype][tpr95_pos] / num_n
 55 |     mtypes = ['TNR', 'AUROC', 'DTACC', 'AUIN', 'AUOUT']
 56 |     results = dict()
 57 |     results[stype] = dict()
 58 | 
 59 |     # TNR
 60 |     mtype = 'TNR'
 61 |     results[stype][mtype] = tnr_at_tpr95[stype]
 62 | 
 63 |     # AUROC
 64 |     mtype = 'AUROC'
 65 |     tpr = np.concatenate([[1.], tp[stype] / tp[stype][0], [0.]])
 66 |     fpr = np.concatenate([[1.], fp[stype] / fp[stype][0], [0.]])
 67 |     results[stype][mtype] = -np.trapz(1. - fpr, tpr)
 68 | 
 69 |     # DTACC
 70 |     mtype = 'DTACC'
 71 |     results[stype][mtype] = .5 * (tp[stype] / tp[stype][0] + 1. - fp[stype] / fp[stype][0]).max()
 72 | 
 73 |     # AUIN
 74 |     mtype = 'AUIN'
 75 |     denom = tp[stype] + fp[stype]
 76 |     denom[denom == 0.] = -1.
 77 |     pin_ind = np.concatenate([[True], denom > 0., [True]])
 78 |     pin = np.concatenate([[.5], tp[stype] / denom, [0.]])
 79 |     results[stype][mtype] = -np.trapz(pin[pin_ind], tpr[pin_ind])
 80 | 
 81 |     # AUOUT
 82 |     mtype = 'AUOUT'
 83 |     denom = tp[stype][0] - tp[stype] + fp[stype][0] - fp[stype]
 84 |     denom[denom == 0.] = -1.
 85 |     pout_ind = np.concatenate([[True], denom > 0., [True]])
 86 |     pout = np.concatenate([[0.], (fp[stype][0] - fp[stype]) / denom, [.5]])
 87 |     results[stype][mtype] = np.trapz(pout[pout_ind], 1. - fpr[pout_ind])
 88 | 
 89 |     return results[stype]
 90 | 
 91 | 
 92 | def print_results(results):
 93 |     mtypes = ['TNR', 'AUROC', 'DTACC', 'AUIN', 'AUOUT']
 94 |     for mtype in mtypes:
 95 |         print(' {mtype:6s}'.format(mtype=mtype), end='')
 96 |     print('')
 97 |     for mtype in mtypes:
 98 |         print(' {val:6.3f}'.format(val=100. * results[mtype]), end='')
 99 |     print('')
100 | 
101 | 
102 | def get_curve(dir_name, stypes=['Baseline', 'Gaussian_LDA']):
103 |     tp, fp = dict(), dict()
104 |     tnr_at_tpr95 = dict()
105 |     for stype in stypes:
106 |         known = np.loadtxt('{}/confidence_{}_In.txt'.format(dir_name, stype), delimiter='\n')
107 |         novel = np.loadtxt('{}/confidence_{}_Out.txt'.format(dir_name, stype), delimiter='\n')
108 |         known.sort()
109 |         novel.sort()
110 |         end = np.max([np.max(known), np.max(novel)])
111 |         start = np.min([np.min(known), np.min(novel)])
112 |         num_k = known.shape[0]
113 |         num_n = novel.shape[0]
114 |         tp[stype] = -np.ones([num_k + num_n + 1], dtype=int)
115 |         fp[stype] = -np.ones([num_k + num_n + 1], dtype=int)
116 |         tp[stype][0], fp[stype][0] = num_k, num_n
117 |         k, n = 0, 0
118 |         for l in range(num_k + num_n):
119 |             if k == num_k:
120 |                 tp[stype][l + 1:] = tp[stype][l]
121 |                 fp[stype][l + 1:] = np.arange(fp[stype][l] - 1, -1, -1)
122 |                 break
123 |             elif n == num_n:
124 |                 tp[stype][l + 1:] = np.arange(tp[stype][l] - 1, -1, -1)
125 |                 fp[stype][l + 1:] = fp[stype][l]
126 |                 break
127 |             else:
128 |                 if novel[n] < known[k]:
129 |                     n += 1
130 |                     tp[stype][l + 1] = tp[stype][l]
131 |                     fp[stype][l + 1] = fp[stype][l] - 1
132 |                 else:
133 |                     k += 1
134 |                     tp[stype][l + 1] = tp[stype][l] - 1
135 |                     fp[stype][l + 1] = fp[stype][l]
136 |         tpr95_pos = np.abs(tp[stype] / num_k - .95).argmin()
137 |         tnr_at_tpr95[stype] = 1. - fp[stype][tpr95_pos] / num_n
138 | 
139 |     return tp, fp, tnr_at_tpr95
140 | 
141 | 
142 | def metric(dir_name, stypes=['Bas', 'Gau'], verbose=False):
143 |     tp, fp, tnr_at_tpr95 = get_curve(dir_name, stypes)
144 |     results = dict()
145 |     mtypes = ['TNR', 'AUROC', 'DTACC', 'AUIN', 'AUOUT']
146 |     if verbose:
147 |         print('      ', end='')
148 |         for mtype in mtypes:
149 |             print(' {mtype:6s}'.format(mtype=mtype), end='')
150 |         print('')
151 | 
152 |     for stype in stypes:
153 |         if verbose:
154 |             print('{stype:5s} '.format(stype=stype), end='')
155 |         results[stype] = dict()
156 | 
157 |         # TNR
158 |         mtype = 'TNR'
159 |         results[stype][mtype] = tnr_at_tpr95[stype]
160 |         if verbose:
161 |             print(' {val:6.3f}'.format(val=100. * results[stype][mtype]), end='')
162 | 
163 |         # AUROC
164 |         mtype = 'AUROC'
165 |         tpr = np.concatenate([[1.], tp[stype] / tp[stype][0], [0.]])
166 |         fpr = np.concatenate([[1.], fp[stype] / fp[stype][0], [0.]])
167 |         results[stype][mtype] = -np.trapz(1. - fpr, tpr)
168 |         if verbose:
169 |             print(' {val:6.3f}'.format(val=100. * results[stype][mtype]), end='')
170 | 
171 |         # DTACC
172 |         mtype = 'DTACC'
173 |         results[stype][mtype] = .5 * (tp[stype] / tp[stype][0] + 1. - fp[stype] / fp[stype][0]).max()
174 |         if verbose:
175 |             print(' {val:6.3f}'.format(val=100. * results[stype][mtype]), end='')
176 | 
177 |         # AUIN
178 |         mtype = 'AUIN'
179 |         denom = tp[stype] + fp[stype]
180 |         denom[denom == 0.] = -1.
181 |         pin_ind = np.concatenate([[True], denom > 0., [True]])
182 |         pin = np.concatenate([[.5], tp[stype] / denom, [0.]])
183 |         results[stype][mtype] = -np.trapz(pin[pin_ind], tpr[pin_ind])
184 |         if verbose:
185 |             print(' {val:6.3f}'.format(val=100. * results[stype][mtype]), end='')
186 | 
187 |         # AUOUT
188 |         mtype = 'AUOUT'
189 |         denom = tp[stype][0] - tp[stype] + fp[stype][0] - fp[stype]
190 |         denom[denom == 0.] = -1.
191 |         pout_ind = np.concatenate([[True], denom > 0., [True]])
192 |         pout = np.concatenate([[0.], (fp[stype][0] - fp[stype]) / denom, [.5]])
193 |         results[stype][mtype] = np.trapz(pout[pout_ind], 1. - fpr[pout_ind])
194 |         if verbose:
195 |             print(' {val:6.3f}'.format(val=100. * results[stype][mtype]), end='')
196 |             print('')
197 | 
198 |     return results


--------------------------------------------------------------------------------
/CLIP_based/OOD/data.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import random
 3 | 
 4 | idx = np.load('/afs/cs.wisc.edu/u/t/a/taoleitian/private/code/KNN-OOD-Tao/OOD/ind.npy')
 5 | path = '/nobackup-fast/ImageNet-1k_CLIP/path/10_val.txt'
 6 | file = open(path, 'r')
 7 | path_list = file.read()
 8 | path_list = eval(path_list)
 9 | cc = list(zip(idx, path_list))
10 | random.shuffle(cc)
11 | idx[:], path_list[:] = zip(*cc)
12 | path_save = path_list[1700:]
13 | for i in range(1700):
14 |     if idx[i] == True:
15 |         path_save.append(path_list[i])
16 | print(len(path_save))
17 | file = open('10_val.txt', 'w')
18 | file.write(str(path_save))
19 | file.close()
20 | 


--------------------------------------------------------------------------------
/CLIP_based/OOD/feature_extraction_im100.py:
--------------------------------------------------------------------------------
 1 | from CLIP.CLIP_ft import CLIP_ft
 2 | import torch
 3 | import torchvision.datasets as dset
 4 | import os
 5 | import torch
 6 | import torchvision.transforms as trn
 7 | from CLIP.image_folder import ImageSubfolder
 8 | 
 9 | test_transform = trn.Compose([
10 |     trn.Resize(size=224, interpolation=trn.InterpolationMode.BICUBIC),
11 |     trn.CenterCrop(size=(224, 224)),
12 |     trn.ToTensor(),
13 |     trn.Normalize(mean=(0.48145466, 0.4578275, 0.40821073), std=(0.26862954, 0.26130258, 0.27577711))
14 | ])
15 | path = '/nobackup-slow/taoleitian/CLIP_feature/'
16 | if not os.path.exists(path):
17 |     os.makedirs(path)
18 | model = CLIP_ft(num_classes=100, layers=8)
19 | net = model
20 | net = torch.nn.DataParallel(model, device_ids=list(range(1)))
21 | root_dir = '/nobackup-slow/dataset/ILSVRC-2012/'
22 | train_dir = root_dir + 'val'
23 | classes, _ = dset.folder.find_classes(train_dir)
24 | index = [125, 788, 630, 535, 474, 694, 146, 914, 447, 208, 182, 621, 271, 646, 328, 119, 772, 928, 610, 891, 340,
25 |              890, 589, 524, 172, 453, 869, 556, 168, 982, 942, 874, 787, 320, 457, 127, 814, 358, 604, 634, 898, 388,
26 |              618, 306, 150, 508, 702, 323, 822, 63, 445, 927, 266, 298, 255, 44, 207, 151, 666, 868, 992, 843, 436, 131,
27 |              384, 908, 278, 169, 294, 428, 60, 472, 778, 304, 76, 289, 199, 152, 584, 510, 825, 236, 395, 762, 917, 573,
28 |              949, 696, 977, 401, 583, 10, 562, 738, 416, 637, 973, 359, 52, 708]
29 | 
30 | num_classes = 100
31 | classes = [classes[i] for i in index]
32 | class_to_idx = {c: i for i, c in enumerate(classes)}
33 | train_data_in = ImageSubfolder(root_dir + 'val', transform=test_transform, class_to_idx=class_to_idx)
34 | 
35 | train_loader_in = torch.utils.data.DataLoader(
36 |     train_data_in,
37 |     batch_size=1024, shuffle=False,
38 |     num_workers=8, pin_memory=True)
39 | image_tensor_list = []
40 | image_target_tensor_list = []
41 | i = 0
42 | for idx, data in enumerate(train_loader_in):
43 |     print(idx)
44 |     with torch.no_grad():
45 |         embedding = net(input=data[0].cuda())
46 |     image_tensor_list.append(embedding.half().cpu())
47 |     image_target_tensor_list.append(data[1])
48 | 
49 | image_tensor = torch.cat(image_tensor_list, dim=0)
50 | image_target_tensor = torch.cat(image_target_tensor_list, dim=0)
51 | torch.save(image_tensor, path +"feature.pt")
52 | torch.save(image_target_tensor, path +"target.pt")
53 | 


--------------------------------------------------------------------------------
/CLIP_based/OOD/feature_extraction_im1k.py:
--------------------------------------------------------------------------------
 1 | from CLIP.CLIP_ft import CLIP_ft
 2 | import torch
 3 | import torchvision.datasets as dset
 4 | import os
 5 | import torch
 6 | import torchvision.transforms as trn
 7 | import torchvision.datasets.imagefolder as imagefolder
 8 | test_transform = trn.Compose([
 9 |     trn.Resize(size=224, interpolation=trn.InterpolationMode.BICUBIC),
10 |     trn.CenterCrop(size=(224, 224)),
11 |     trn.ToTensor(),
12 |     trn.Normalize(mean=(0.48145466, 0.4578275, 0.40821073), std=(0.26862954, 0.26130258, 0.27577711))
13 | ])
14 | path = '/nobackup-slow/taoleitian/CLIP_feature/'
15 | if not os.path.exists(path):
16 |     os.makedirs(path)
17 | model = CLIP_ft(num_classes=100, layers=8)
18 | net = model
19 | net = torch.nn.DataParallel(model, device_ids=list(range(1)))
20 | root_dir = '/nobackup-slow/dataset/ILSVRC-2012/'
21 | train_dir = root_dir + 'train'
22 | train_data_in = imagefolder(train_dir, transform=test_transform)
23 | 
24 | train_loader_in = torch.utils.data.DataLoader(
25 |     train_data_in,
26 |     batch_size=1024, shuffle=False,
27 |     num_workers=8, pin_memory=True)
28 | image_tensor_list = []
29 | image_target_tensor_list = []
30 | i = 0
31 | for idx, data in enumerate(train_loader_in):
32 |     print(idx)
33 |     with torch.no_grad():
34 |         embedding = net(input=data[0].cuda())
35 |     image_tensor_list.append(embedding.half().cpu())
36 |     image_target_tensor_list.append(data[1])
37 | 
38 | image_tensor = torch.cat(image_tensor_list, dim=0)
39 | image_target_tensor = torch.cat(image_target_tensor_list, dim=0)
40 | torch.save(image_tensor, path +"feature.pt")
41 | torch.save(image_target_tensor, path +"target.pt")
42 | 


--------------------------------------------------------------------------------
/CLIP_based/OOD/test_npos_imagenet_100.sh:
--------------------------------------------------------------------------------
1 | python test_npos.py --dataset ImageNet-100 --score MSP --load /nobackup-fast/taoleitian/test/clip_100.pt --T 1
2 | 


--------------------------------------------------------------------------------
/CLIP_based/OOD/test_npos_imagenet_1k.sh:
--------------------------------------------------------------------------------
1 | python test_npos.py --dataset ImageNet-1000 --score MSP --load /nobackup-fast/taoleitian/ImageNet-1k_CLIP.pt --T 1
2 | 


--------------------------------------------------------------------------------
/CLIP_based/OOD/train_npos_imagenet_100.sh:
--------------------------------------------------------------------------------
1 | python train_npos.py --ngpu 4 --start_epoch 10 --sample_number 1000 --epochs 20 --sample_from 1500 --select 300 --loss_weight 0.1 --dataset ImageNet-100 --pick_nums 3 --cov_mat 0.1 --K 400 --save /nobackup-slow/taoleitian/model/ImageNet-100/npos/1 --batch_size 800 --learning_rate 0.1 --decay_rate 0.1


--------------------------------------------------------------------------------
/CLIP_based/OOD/train_npos_imagenet_1k.sh:
--------------------------------------------------------------------------------
1 | python train_npos.py --ngpu 8 --start_epoch 40 --sample_number 1000 --epochs 100 --sample_from 1500 --select 300 --loss_weight 0.1 --dataset ImageNet-1k --pick_nums 1 --cov_mat 0.1 --K 400 --save /nobackup-slow/taoleitian/model/ImageNet-100/npos/1 --batch_size 1000 --learning_rate 0.1 --decay_rate 0.1
2 | 


--------------------------------------------------------------------------------
/CLIP_based/OOD/utils/log.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | import logging.config
 4 | 
 5 | def setup_logger(args):
 6 |     """Creates and returns a fancy logger."""
 7 |     # return logging.basicConfig(level=logging.INFO, format="[%(asctime)s] %(message)s")
 8 |     # Why is setting up proper logging so !@?#! ugly?
 9 |     os.makedirs(os.path.join(args.logdir, args.name), exist_ok=True)
10 |     logging.config.dictConfig({
11 |         "version": 1,
12 |         "disable_existing_loggers": False,
13 |         "formatters": {
14 |             "standard": {
15 |                 "format": "%(asctime)s [%(levelname)s] %(name)s: %(message)s"
16 |             },
17 |         },
18 |         "handlers": {
19 |             "stderr": {
20 |                 "level": "INFO",
21 |                 "formatter": "standard",
22 |                 "class": "logging.StreamHandler",
23 |                 "stream": "ext://sys.stderr",
24 |             },
25 |             "logfile": {
26 |                 "level": "DEBUG",
27 |                 "formatter": "standard",
28 |                 "class": "logging.FileHandler",
29 |                 "filename": os.path.join(args.logdir, args.name, "log.txt"),
30 |                 "mode": "a",
31 |             }
32 |         },
33 |         "loggers": {
34 |             "": {
35 |                 "handlers": ["stderr", "logfile"],
36 |                 "level": "DEBUG",
37 |                 "propagate": True
38 |             },
39 |         }
40 |     })
41 |     logger = logging.getLogger(__name__)
42 |     logger.flush = lambda: [h.flush() for h in logger.handlers]
43 |     logger.info(args)
44 |     return logger
45 | 


--------------------------------------------------------------------------------
/CLIP_based/OOD/utils/mahalanobis_lib.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import torch
  3 | import numpy as np
  4 | 
  5 | from torch.autograd import Variable
  6 | 
  7 | 
  8 | def sample_estimator(model, num_classes, feature_list, train_loader):
  9 |     """
 10 |     compute sample mean and precision (inverse of covariance)
 11 |     return: sample_class_mean: list of class mean
 12 |              precision: list of precisions
 13 |     """
 14 |     import sklearn.covariance
 15 | 
 16 |     model.eval()
 17 |     group_lasso = sklearn.covariance.EmpiricalCovariance(assume_centered=False)
 18 |     correct, total = 0, 0
 19 |     num_output = len(feature_list)
 20 |     num_sample_per_class = np.empty(num_classes)
 21 |     num_sample_per_class.fill(0)
 22 |     list_features = []
 23 |     for i in range(num_output):
 24 |         temp_list = []
 25 |         for j in range(num_classes):
 26 |             temp_list.append(0)
 27 |         list_features.append(temp_list)
 28 | 
 29 |     for num_batch, (data, target) in enumerate(train_loader):
 30 |         if num_batch % 100 == 0:
 31 |             print('{} batches processed...'.format(num_batch))
 32 |         total += data.size(0)
 33 |         # data = data.cuda()
 34 |         with torch.no_grad():
 35 |             data = Variable(data)
 36 |             data = data.cuda()
 37 |             output, out_features = model(x=data, layer_index='all')
 38 | 
 39 |         # get hidden features
 40 |         for i in range(num_output):
 41 |             out_features[i] = out_features[i].view(out_features[i].size(0), out_features[i].size(1), -1)
 42 |             out_features[i] = torch.mean(out_features[i].data, 2).data.cpu()
 43 | 
 44 |         # compute the accuracy
 45 |         pred = output.data.max(1)[1]
 46 |         equal_flag = pred.eq(target.cuda()).cpu()
 47 |         correct += equal_flag.sum()
 48 | 
 49 |         # construct the sample matrix
 50 |         for i in range(data.size(0)):
 51 |             label = target[i]
 52 |             if num_sample_per_class[label] == 0:
 53 |                 out_count = 0
 54 |                 for out in out_features:
 55 |                     list_features[out_count][label] = out[i].view(1, -1)
 56 |                     out_count += 1
 57 |             else:
 58 |                 out_count = 0
 59 |                 for out in out_features:
 60 |                     list_features[out_count][label] \
 61 |                     = torch.cat((list_features[out_count][label], out[i].view(1, -1)), 0)
 62 |                     out_count += 1
 63 |             num_sample_per_class[label] += 1
 64 | 
 65 |     sample_class_mean = []
 66 |     out_count = 0
 67 |     for num_feature in feature_list:
 68 |         temp_list = torch.Tensor(num_classes, int(num_feature)).cpu()
 69 |         for j in range(num_classes):
 70 |             temp_list[j] = torch.mean(list_features[out_count][j], 0)
 71 |         sample_class_mean.append(temp_list)
 72 |         out_count += 1
 73 | 
 74 |     precision = []
 75 |     for k in range(num_output):
 76 |         X = 0
 77 |         for i in range(num_classes):
 78 |             if i == 0:
 79 |                 X = list_features[k][i] - sample_class_mean[k][i]
 80 |             else:
 81 |                 X = torch.cat((X, list_features[k][i] - sample_class_mean[k][i]), 0)
 82 | 
 83 |         # find inverse
 84 |         group_lasso.fit(X.cpu().numpy())
 85 |         temp_precision = group_lasso.precision_
 86 |         temp_precision = torch.from_numpy(temp_precision).float().cuda()
 87 |         precision.append(temp_precision)
 88 | 
 89 |     print('\n Training Accuracy:({:.2f}%)\n'.format(100. * correct / total))
 90 | 
 91 |     return sample_class_mean, precision
 92 | 
 93 | 
 94 | def get_Mahalanobis_score(inputs, model, num_classes, sample_mean, precision, num_output, magnitude):
 95 | 
 96 |     for layer_index in range(num_output):
 97 | 
 98 |         data = Variable(inputs, requires_grad=True)
 99 |         data = data.cuda()
100 | 
101 |         out_features = model(x=data, layer_index=layer_index)
102 |         out_features = out_features.view(out_features.size(0), out_features.size(1), -1)
103 |         out_features = torch.mean(out_features, 2)
104 | 
105 |         gaussian_score = 0
106 |         for i in range(num_classes):
107 |             batch_sample_mean = sample_mean[layer_index][i]
108 |             zero_f = out_features.data - batch_sample_mean
109 |             term_gau = -0.5*torch.mm(torch.mm(zero_f, precision[layer_index]), zero_f.t()).diag()
110 |             if i == 0:
111 |                 gaussian_score = term_gau.view(-1,1)
112 |             else:
113 |                 gaussian_score = torch.cat((gaussian_score, term_gau.view(-1,1)), 1)
114 | 
115 |         # Input_processing
116 |         sample_pred = gaussian_score.max(1)[1]
117 |         batch_sample_mean = sample_mean[layer_index].index_select(0, sample_pred)
118 |         zero_f = out_features - Variable(batch_sample_mean)
119 |         pure_gau = -0.5*torch.mm(torch.mm(zero_f, Variable(precision[layer_index])), zero_f.t()).diag()
120 |         loss = torch.mean(-pure_gau)
121 |         loss.backward()
122 | 
123 |         gradient =  torch.ge(data.grad.data, 0)
124 |         gradient = (gradient.float() - 0.5) * 2
125 | 
126 |         tempInputs = torch.add(data.data, -magnitude, gradient)
127 | 
128 |         noise_out_features = model(x=Variable(tempInputs), layer_index=layer_index)
129 |         noise_out_features = noise_out_features.view(noise_out_features.size(0), noise_out_features.size(1), -1)
130 |         noise_out_features = torch.mean(noise_out_features, 2)
131 |         noise_gaussian_score = 0
132 |         for i in range(num_classes):
133 |             batch_sample_mean = sample_mean[layer_index][i]
134 |             zero_f = noise_out_features.data - batch_sample_mean
135 |             term_gau = -0.5*torch.mm(torch.mm(zero_f, precision[layer_index]), zero_f.t()).diag()
136 |             if i == 0:
137 |                 noise_gaussian_score = term_gau.view(-1,1)
138 |             else:
139 |                 noise_gaussian_score = torch.cat((noise_gaussian_score, term_gau.view(-1,1)), 1)
140 | 
141 |         noise_gaussian_score, _ = torch.max(noise_gaussian_score, dim=1)
142 | 
143 |         noise_gaussian_score = np.asarray(noise_gaussian_score.cpu().numpy(), dtype=np.float32)
144 |         if layer_index == 0:
145 |             Mahalanobis_scores = noise_gaussian_score.reshape((noise_gaussian_score.shape[0], -1))
146 |         else:
147 |             Mahalanobis_scores = np.concatenate((Mahalanobis_scores, noise_gaussian_score.reshape((noise_gaussian_score.shape[0], -1))), axis=1)
148 | 
149 |     return Mahalanobis_scores
150 | 


--------------------------------------------------------------------------------
/CLIP_based/OOD/utils/test_utils.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import numpy as np
 3 | import sklearn.metrics as sk
 4 | 
 5 | 
 6 | def arg_parser():
 7 |     parser = argparse.ArgumentParser()
 8 | 
 9 |     parser.add_argument("--workers", type=int, default=8,
10 |                         help="Number of background threads used to load data.")
11 | 
12 |     parser.add_argument("--logdir", required=True,
13 |                         help="Where to log test info (small).")
14 |     parser.add_argument("--batch", type=int, default=256,
15 |                         help="Batch size.")
16 |     parser.add_argument("--name", required=True,
17 |                         help="Name of this run. Used for monitoring and checkpointing.")
18 | 
19 |     parser.add_argument("--model", default="BiT-S-R101x1", help="Which variant to use")
20 |     parser.add_argument("--model_path", type=str, help="Path to the model you want to test")
21 | 
22 |     return parser
23 | 
24 | 
25 | def stable_cumsum(arr, rtol=1e-05, atol=1e-08):
26 |     """Use high precision for cumsum and check that final value matches sum
27 |     Parameters
28 |     ----------
29 |     arr : array-like
30 |         To be cumulatively summed as flat
31 |     rtol : float
32 |         Relative tolerance, see ``np.allclose``
33 |     atol : float
34 |         Absolute tolerance, see ``np.allclose``
35 |     """
36 |     out = np.cumsum(arr, dtype=np.float64)
37 |     expected = np.sum(arr, dtype=np.float64)
38 |     if not np.allclose(out[-1], expected, rtol=rtol, atol=atol):
39 |         raise RuntimeError('cumsum was found to be unstable: '
40 |                            'its last element does not correspond to sum')
41 |     return out
42 | 
43 | 
44 | def fpr_and_fdr_at_recall(y_true, y_score, recall_level, pos_label=1.):
45 |     # make y_true a boolean vector
46 |     y_true = (y_true == pos_label)
47 | 
48 |     # sort scores and corresponding truth values
49 |     desc_score_indices = np.argsort(y_score, kind="mergesort")[::-1]
50 |     y_score = y_score[desc_score_indices]
51 |     y_true = y_true[desc_score_indices]
52 | 
53 |     # y_score typically has many tied values. Here we extract
54 |     # the indices associated with the distinct values. We also
55 |     # concatenate a value for the end of the curve.
56 |     distinct_value_indices = np.where(np.diff(y_score))[0]
57 |     threshold_idxs = np.r_[distinct_value_indices, y_true.size - 1]
58 | 
59 |     # accumulate the true positives with decreasing threshold
60 |     tps = stable_cumsum(y_true)[threshold_idxs]
61 |     fps = 1 + threshold_idxs - tps      # add one because of zero-based indexing
62 | 
63 |     thresholds = y_score[threshold_idxs]
64 | 
65 |     recall = tps / tps[-1]
66 | 
67 |     last_ind = tps.searchsorted(tps[-1])
68 |     sl = slice(last_ind, None, -1)      # [last_ind::-1]
69 |     recall, fps, tps, thresholds = np.r_[recall[sl], 1], np.r_[fps[sl], 0], np.r_[tps[sl], 0], thresholds[sl]
70 | 
71 |     cutoff = np.argmin(np.abs(recall - recall_level))
72 | 
73 |     return fps[cutoff] / (np.sum(np.logical_not(y_true)))   # , fps[cutoff]/(fps[cutoff] + tps[cutoff])
74 | 
75 | 
76 | def get_measures(in_examples, out_examples):
77 |     num_in = in_examples.shape[0]
78 |     num_out = out_examples.shape[0]
79 | 
80 |     # logger.info("# in example is: {}".format(num_in))
81 |     # logger.info("# out example is: {}".format(num_out))
82 | 
83 |     labels = np.zeros(num_in + num_out, dtype=np.int32)
84 |     labels[:num_in] += 1
85 | 
86 |     examples = np.squeeze(np.vstack((in_examples, out_examples)))
87 |     aupr_in = sk.average_precision_score(labels, examples)
88 |     auroc = sk.roc_auc_score(labels, examples)
89 | 
90 |     recall_level = 0.95
91 |     fpr = fpr_and_fdr_at_recall(labels, examples, recall_level)
92 | 
93 |     labels_rev = np.zeros(num_in + num_out, dtype=np.int32)
94 |     labels_rev[num_in:] += 1
95 |     examples = np.squeeze(-np.vstack((in_examples, out_examples)))
96 |     aupr_out = sk.average_precision_score(labels_rev, examples)
97 |     return auroc, aupr_in, aupr_out, fpr
98 | 


--------------------------------------------------------------------------------
/CLIP_based/utils/__pycache__/display_results.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/npos/583c06db0876c3d1c4e5a9a5371cc3a5cb916255/CLIP_based/utils/__pycache__/display_results.cpython-37.pyc


--------------------------------------------------------------------------------
/CLIP_based/utils/__pycache__/score_calculation.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/npos/583c06db0876c3d1c4e5a9a5371cc3a5cb916255/CLIP_based/utils/__pycache__/score_calculation.cpython-37.pyc


--------------------------------------------------------------------------------
/CLIP_based/utils/__pycache__/validation_dataset.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/npos/583c06db0876c3d1c4e5a9a5371cc3a5cb916255/CLIP_based/utils/__pycache__/validation_dataset.cpython-37.pyc


--------------------------------------------------------------------------------
/CLIP_based/utils/calibration_tools.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | def calib_err(confidence, correct, p='2', beta=100):
  5 |     # beta is target bin size
  6 |     idxs = np.argsort(confidence)
  7 |     confidence = confidence[idxs]
  8 |     correct = correct[idxs]
  9 |     bins = [[i * beta, (i + 1) * beta] for i in range(len(confidence) // beta)]
 10 |     bins[-1] = [bins[-1][0], len(confidence)]
 11 | 
 12 |     cerr = 0
 13 |     total_examples = len(confidence)
 14 |     for i in range(len(bins) - 1):
 15 |         bin_confidence = confidence[bins[i][0]:bins[i][1]]
 16 |         bin_correct = correct[bins[i][0]:bins[i][1]]
 17 |         num_examples_in_bin = len(bin_confidence)
 18 | 
 19 |         if num_examples_in_bin > 0:
 20 |             difference = np.abs(np.nanmean(bin_confidence) - np.nanmean(bin_correct))
 21 | 
 22 |             if p == '2':
 23 |                 cerr += num_examples_in_bin / total_examples * np.square(difference)
 24 |             elif p == '1':
 25 |                 cerr += num_examples_in_bin / total_examples * difference
 26 |             elif p == 'infty' or p == 'infinity' or p == 'max':
 27 |                 cerr = np.maximum(cerr, difference)
 28 |             else:
 29 |                 assert False, "p must be '1', '2', or 'infty'"
 30 | 
 31 |     if p == '2':
 32 |         cerr = np.sqrt(cerr)
 33 | 
 34 |     return cerr
 35 | 
 36 | 
 37 | def soft_f1(confidence, correct):
 38 |     wrong = 1 - correct
 39 | 
 40 |     # # the incorrectly classified samples are our interest
 41 |     # # so they make the positive class
 42 |     # tp_soft = np.sum((1 - confidence) * wrong)
 43 |     # fp_soft = np.sum((1 - confidence) * correct)
 44 |     # fn_soft = np.sum(confidence * wrong)
 45 | 
 46 |     # return 2 * tp_soft / (2 * tp_soft + fn_soft + fp_soft)
 47 |     return 2 * ((1 - confidence) * wrong).sum()/(1 - confidence + wrong).sum()
 48 | 
 49 | 
 50 | def tune_temp(logits, labels, binary_search=True, lower=0.2, upper=5.0, eps=0.0001):
 51 |     logits = np.array(logits)
 52 | 
 53 |     if binary_search:
 54 |         import torch
 55 |         import torch.nn.functional as F
 56 | 
 57 |         logits = torch.FloatTensor(logits)
 58 |         labels = torch.LongTensor(labels)
 59 |         t_guess = torch.FloatTensor([0.5*(lower + upper)]).requires_grad_()
 60 | 
 61 |         while upper - lower > eps:
 62 |             if torch.autograd.grad(F.cross_entropy(logits / t_guess, labels), t_guess)[0] > 0:
 63 |                 upper = 0.5 * (lower + upper)
 64 |             else:
 65 |                 lower = 0.5 * (lower + upper)
 66 |             t_guess = t_guess * 0 + 0.5 * (lower + upper)
 67 | 
 68 |         t = min([lower, 0.5 * (lower + upper), upper], key=lambda x: float(F.cross_entropy(logits / x, labels)))
 69 |     else:
 70 |         import cvxpy as cx
 71 | 
 72 |         set_size = np.array(logits).shape[0]
 73 | 
 74 |         t = cx.Variable()
 75 | 
 76 |         expr = sum((cx.Minimize(cx.log_sum_exp(logits[i, :] * t) - logits[i, labels[i]] * t)
 77 |                     for i in range(set_size)))
 78 |         p = cx.Problem(expr, [lower <= t, t <= upper])
 79 | 
 80 |         p.solve()   # p.solve(solver=cx.SCS)
 81 |         t = 1 / t.value
 82 | 
 83 |     return t
 84 | 
 85 | 
 86 | def get_measures(confidence, correct):
 87 |     rms = calib_err(confidence, correct, p='2')
 88 |     mad = calib_err(confidence, correct, p='1')
 89 |     sf1 = soft_f1(confidence, correct)
 90 | 
 91 |     return rms, mad, sf1
 92 | 
 93 | 
 94 | def print_measures(rms, mad, sf1, method_name='Baseline'):
 95 |     print('\t\t\t\t\t\t\t' + method_name)
 96 |     print('RMS Calib Error (%): \t\t{:.2f}'.format(100 * rms))
 97 |     print('MAD Calib Error (%): \t\t{:.2f}'.format(100 * mad))
 98 |     print('Soft F1 Score (%):   \t\t{:.2f}'.format(100 * sf1))
 99 | 
100 | 
101 | def print_measures_with_std(rmss, mads, sf1s, method_name='Baseline'):
102 |     print('\t\t\t\t\t\t\t' + method_name)
103 |     print('RMS Calib Error (%): \t\t{:.2f}\t+/- {:.2f}'.format(100 * np.mean(rmss), 100 * np.std(rmss)))
104 |     print('MAD Calib Error (%): \t\t{:.2f}\t+/- {:.2f}'.format(100 * np.mean(mads), 100 * np.std(mads)))
105 |     print('Soft F1 Score (%):   \t\t{:.2f}\t+/- {:.2f}'.format(100 * np.mean(sf1s), 100 * np.std(sf1s)))
106 | 
107 | 
108 | def show_calibration_results(confidence, correct, method_name='Baseline'):
109 | 
110 |     print('\t\t\t\t' + method_name)
111 |     print('RMS Calib Error (%): \t\t{:.2f}'.format(
112 |         100 * calib_err(confidence, correct, p='2')))
113 | 
114 |     print('MAD Calib Error (%): \t\t{:.2f}'.format(
115 |         100 * calib_err(confidence, correct, p='1')))
116 | 
117 |     # print('Max Calib Error (%): \t\t{:.2f}'.format(
118 |     #     100 * calib_err(confidence, correct, p='infty')))
119 | 
120 |     print('Soft F1-Score (%): \t\t{:.2f}'.format(
121 |         100 * soft_f1(confidence, correct)))
122 | 
123 |     # add error detection measures?
124 | 


--------------------------------------------------------------------------------
/CLIP_based/utils/cifar_resnet.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | class BasicBlock(nn.Module):
 8 |     def __init__(self, in_planes, out_planes, stride, dropRate=0.0):
 9 |         super(BasicBlock, self).__init__()
10 |         self.bn1 = nn.BatchNorm2d(in_planes)
11 |         self.relu1 = nn.ReLU(inplace=True)
12 |         self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
13 |                                padding=1, bias=False)
14 |         self.bn2 = nn.BatchNorm2d(out_planes)
15 |         self.relu2 = nn.ReLU(inplace=True)
16 |         self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1,
17 |                                padding=1, bias=False)
18 |         self.droprate = dropRate
19 |         self.equalInOut = (in_planes == out_planes)
20 |         self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride,
21 |                                                                 padding=0, bias=False) or None
22 | 
23 |     def forward(self, x):
24 |         if not self.equalInOut:
25 |             x = self.relu1(self.bn1(x))
26 |         else:
27 |             out = self.relu1(self.bn1(x))
28 |         if self.equalInOut:
29 |             out = self.relu2(self.bn2(self.conv1(out)))
30 |         else:
31 |             out = self.relu2(self.bn2(self.conv1(x)))
32 |         if self.droprate > 0:
33 |             out = F.dropout(out, p=self.droprate, training=self.training)
34 |         out = self.conv2(out)
35 |         if not self.equalInOut:
36 |             return torch.add(self.convShortcut(x), out)
37 |         else:
38 |             return torch.add(x, out)
39 | 
40 | 
41 | class NetworkBlock(nn.Module):
42 |     def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0):
43 |         super(NetworkBlock, self).__init__()
44 |         self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropRate)
45 | 
46 |     def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropRate):
47 |         layers = []
48 |         for i in range(nb_layers):
49 |             layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, dropRate))
50 |         return nn.Sequential(*layers)
51 | 
52 |     def forward(self, x):
53 |         return self.layer(x)
54 | 
55 | 
56 | class WideResNet(nn.Module):
57 |     def __init__(self, depth, num_classes, widen_factor=1, dropRate=0.0):
58 |         super(WideResNet, self).__init__()
59 |         nChannels = [16, 16 * widen_factor, 32 * widen_factor, 64 * widen_factor]
60 |         assert ((depth - 4) % 6 == 0)
61 |         n = (depth - 4) // 6
62 |         block = BasicBlock
63 |         # 1st conv before any network block
64 |         self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1,
65 |                                padding=1, bias=False)
66 |         # 1st block
67 |         self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropRate)
68 |         # 2nd block
69 |         self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, dropRate)
70 |         # 3rd block
71 |         self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, dropRate)
72 |         # global average pooling and classifier
73 |         self.bn1 = nn.BatchNorm2d(nChannels[3])
74 |         self.relu = nn.ReLU(inplace=True)
75 |         self.fc = nn.Linear(nChannels[3], num_classes)
76 |         self.nChannels = nChannels[3]
77 | 
78 |         for m in self.modules():
79 |             if isinstance(m, nn.Conv2d):
80 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
81 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
82 |             elif isinstance(m, nn.BatchNorm2d):
83 |                 m.weight.data.fill_(1)
84 |                 m.bias.data.zero_()
85 |             elif isinstance(m, nn.Linear):
86 |                 m.bias.data.zero_()
87 | 
88 |     def forward(self, x):
89 |         out = self.conv1(x)
90 |         out = self.block1(out)
91 |         out = self.block2(out)
92 |         out = self.block3(out)
93 |         out = self.relu(self.bn1(out))
94 |         out = F.avg_pool2d(out, 8)
95 |         out = out.view(-1, self.nChannels)
96 |         return self.fc(out)
97 | 


--------------------------------------------------------------------------------
/CLIP_based/utils/lsun_loader.py:
--------------------------------------------------------------------------------
  1 | import torch.utils.data as data
  2 | from PIL import Image
  3 | import os
  4 | import os.path
  5 | import six
  6 | import string
  7 | import sys
  8 | 
  9 | if sys.version_info[0] == 2:
 10 |     import cPickle as pickle
 11 | else:
 12 |     import pickle
 13 | 
 14 | 
 15 | class LSUNClass(data.Dataset):
 16 |     def __init__(self, db_path, transform=None, target_transform=None):
 17 |         import lmdb
 18 |         self.db_path = db_path
 19 |         self.env = lmdb.open(db_path, max_readers=1, readonly=True, lock=False,
 20 |                              readahead=False, meminit=False)
 21 |         with self.env.begin(write=False) as txn:
 22 |             self.length = txn.stat()['entries']
 23 |         cache_file = '_cache_' + db_path.replace('/', '_')
 24 |         if os.path.isfile(cache_file):
 25 |             self.keys = pickle.load(open(cache_file, "rb"))
 26 |         else:
 27 |             with self.env.begin(write=False) as txn:
 28 |                 self.keys = [key for key, _ in txn.cursor()]
 29 |             pickle.dump(self.keys, open(cache_file, "wb"))
 30 |         self.transform = transform
 31 |         self.target_transform = target_transform
 32 | 
 33 |     def __getitem__(self, index):
 34 |         img, target = None, None
 35 |         env = self.env
 36 |         with env.begin(write=False) as txn:
 37 |             imgbuf = txn.get(self.keys[index])
 38 | 
 39 |         buf = six.BytesIO()
 40 |         buf.write(imgbuf)
 41 |         buf.seek(0)
 42 |         img = Image.open(buf).convert('RGB')
 43 | 
 44 |         if self.transform is not None:
 45 |             img = self.transform(img)
 46 | 
 47 |         if self.target_transform is not None:
 48 |             target = self.target_transform(target)
 49 | 
 50 |         return img, target
 51 | 
 52 |     def __len__(self):
 53 |         return self.length
 54 | 
 55 |     def __repr__(self):
 56 |         return self.__class__.__name__ + ' (' + self.db_path + ')'
 57 | 
 58 | 
 59 | class LSUN(data.Dataset):
 60 |     """
 61 |     `LSUN <http://lsun.cs.princeton.edu>`_ dataset.
 62 | 
 63 |     Args:
 64 |         db_path (string): Root directory for the database files.
 65 |         classes (string or list): One of {'train', 'val', 'test'} or a list of
 66 |             categories to load. e,g. ['bedroom_train', 'church_train'].
 67 |         transform (callable, optional): A function/transform that  takes in an PIL image
 68 |             and returns a transformed version. E.g, ``transforms.RandomCrop``
 69 |         target_transform (callable, optional): A function/transform that takes in the
 70 |             target and transforms it.
 71 |     """
 72 | 
 73 |     def __init__(self, db_path, classes='train',
 74 |                  transform=None, target_transform=None):
 75 |         categories = ['bedroom', 'bridge', 'church_outdoor', 'classroom',
 76 |                       'conference_room', 'dining_room', 'kitchen',
 77 |                       'living_room', 'restaurant', 'tower']
 78 |         dset_opts = ['train', 'val', 'test']
 79 |         self.db_path = db_path
 80 |         if type(classes) == str and classes in dset_opts:
 81 |             if classes == 'test':
 82 |                 classes = [classes]
 83 |             else:
 84 |                 classes = [c + '_' + classes for c in categories]
 85 |         self.classes = classes
 86 | 
 87 |         # for each class, create an LSUNClassDataset
 88 |         self.dbs = []
 89 |         for c in self.classes:
 90 |             self.dbs.append(LSUNClass(
 91 |                 db_path=db_path + '/' + c + '_lmdb',
 92 |                 transform=transform))
 93 | 
 94 |         self.indices = []
 95 |         count = 0
 96 |         for db in self.dbs:
 97 |             count += len(db)
 98 |             self.indices.append(count)
 99 | 
100 |         self.length = count
101 |         self.target_transform = target_transform
102 | 
103 |     def __getitem__(self, index):
104 |         """
105 |         Args:
106 |             index (int): Index
107 | 
108 |         Returns:
109 |             tuple: Tuple (image, target) where target is the index of the target category.
110 |         """
111 |         target = 0
112 |         sub = 0
113 |         for ind in self.indices:
114 |             if index < ind:
115 |                 break
116 |             target += 1
117 |             sub = ind
118 | 
119 |         db = self.dbs[target]
120 |         index = index - sub
121 | 
122 |         if self.target_transform is not None:
123 |             target = self.target_transform(target)
124 | 
125 |         img, _ = db[index]
126 |         return img, target
127 | 
128 |     def __len__(self):
129 |         return self.length
130 | 
131 |     def __repr__(self):
132 |         return self.__class__.__name__ + ' (' + self.db_path + ')'
133 | 


--------------------------------------------------------------------------------
/CLIP_based/utils/svhn_loader.py:
--------------------------------------------------------------------------------
  1 | import torch.utils.data as data
  2 | from PIL import Image
  3 | import os
  4 | import os.path
  5 | import numpy as np
  6 | 
  7 | 
  8 | class SVHN(data.Dataset):
  9 |     url = ""
 10 |     filename = ""
 11 |     file_md5 = ""
 12 | 
 13 |     split_list = {
 14 |         'train': ["http://ufldl.stanford.edu/housenumbers/train_32x32.mat",
 15 |                   "train_32x32.mat", "e26dedcc434d2e4c54c9b2d4a06d8373"],
 16 |         'test': ["http://ufldl.stanford.edu/housenumbers/test_32x32.mat",
 17 |                  "test_32x32.mat", "eb5a983be6a315427106f1b164d9cef3"],
 18 |         'extra': ["http://ufldl.stanford.edu/housenumbers/extra_32x32.mat",
 19 |                   "extra_32x32.mat", "a93ce644f1a588dc4d68dda5feec44a7"],
 20 |         'train_and_extra': [
 21 |                 ["http://ufldl.stanford.edu/housenumbers/train_32x32.mat",
 22 |                  "train_32x32.mat", "e26dedcc434d2e4c54c9b2d4a06d8373"],
 23 |                 ["http://ufldl.stanford.edu/housenumbers/extra_32x32.mat",
 24 |                  "extra_32x32.mat", "a93ce644f1a588dc4d68dda5feec44a7"]]}
 25 | 
 26 |     def __init__(self, root, split='train',
 27 |                  transform=None, target_transform=None, download=False):
 28 |         self.root = root
 29 |         self.transform = transform
 30 |         self.target_transform = target_transform
 31 |         self.split = split  # training set or test set or extra set
 32 | 
 33 |         if self.split not in self.split_list:
 34 |             raise ValueError('Wrong split entered! Please use split="train" '
 35 |                              'or split="extra" or split="test" '
 36 |                              'or split="train_and_extra" ')
 37 | 
 38 |         if self.split == "train_and_extra":
 39 |             self.url = self.split_list[split][0][0]
 40 |             self.filename = self.split_list[split][0][1]
 41 |             self.file_md5 = self.split_list[split][0][2]
 42 |         else:
 43 |             self.url = self.split_list[split][0]
 44 |             self.filename = self.split_list[split][1]
 45 |             self.file_md5 = self.split_list[split][2]
 46 | 
 47 |         # import here rather than at top of file because this is
 48 |         # an optional dependency for torchvision
 49 |         import scipy.io as sio
 50 | 
 51 |         # reading(loading) mat file as array
 52 |         loaded_mat = sio.loadmat(os.path.join(root, self.filename))
 53 | 
 54 |         if self.split == "test":
 55 |             self.data = loaded_mat['X']
 56 |             self.targets = loaded_mat['y']
 57 |             # Note label 10 == 0 so modulo operator required
 58 |             self.targets = (self.targets % 10).squeeze()    # convert to zero-based indexing
 59 |             self.data = np.transpose(self.data, (3, 2, 0, 1))
 60 |         else:
 61 |             self.data = loaded_mat['X']
 62 |             self.targets = loaded_mat['y']
 63 | 
 64 |             if self.split == "train_and_extra":
 65 |                 extra_filename = self.split_list[split][1][1]
 66 |                 loaded_mat = sio.loadmat(os.path.join(root, extra_filename))
 67 |                 self.data = np.concatenate([self.data,
 68 |                                                   loaded_mat['X']], axis=3)
 69 |                 self.targets = np.vstack((self.targets,
 70 |                                                loaded_mat['y']))
 71 |             # Note label 10 == 0 so modulo operator required
 72 |             self.targets = (self.targets % 10).squeeze()    # convert to zero-based indexing
 73 |             self.data = np.transpose(self.data, (3, 2, 0, 1))
 74 | 
 75 |     def __getitem__(self, index):
 76 |         if self.split == "test":
 77 |             img, target = self.data[index], self.targets[index]
 78 |         else:
 79 |             img, target = self.data[index], self.targets[index]
 80 | 
 81 |         # doing this so that it is consistent with all other datasets
 82 |         # to return a PIL Image
 83 |         img = Image.fromarray(np.transpose(img, (1, 2, 0)))
 84 | 
 85 |         if self.transform is not None:
 86 |             img = self.transform(img)
 87 | 
 88 |         if self.target_transform is not None:
 89 |             target = self.target_transform(target)
 90 | 
 91 |         return img, target
 92 | 
 93 |     def __len__(self):
 94 |         if self.split == "test":
 95 |             return len(self.data)
 96 |         else:
 97 |             return len(self.data)
 98 | 
 99 |     def _check_integrity(self):
100 |         root = self.root
101 |         if self.split == "train_and_extra":
102 |             md5 = self.split_list[self.split][0][2]
103 |             fpath = os.path.join(root, self.filename)
104 |             train_integrity = check_integrity(fpath, md5)
105 |             extra_filename = self.split_list[self.split][1][1]
106 |             md5 = self.split_list[self.split][1][2]
107 |             fpath = os.path.join(root, extra_filename)
108 |             return check_integrity(fpath, md5) and train_integrity
109 |         else:
110 |             md5 = self.split_list[self.split][2]
111 |             fpath = os.path.join(root, self.filename)
112 |             return check_integrity(fpath, md5)
113 | 
114 |     def download(self):
115 |         if self.split == "train_and_extra":
116 |             md5 = self.split_list[self.split][0][2]
117 |             download_url(self.url, self.root, self.filename, md5)
118 |             extra_filename = self.split_list[self.split][1][1]
119 |             md5 = self.split_list[self.split][1][2]
120 |             download_url(self.url, self.root, extra_filename, md5)
121 |         else:
122 |             md5 = self.split_list[self.split][2]
123 |             download_url(self.url, self.root, self.filename, md5)
124 | 


--------------------------------------------------------------------------------
/CLIP_based/utils/tiny_resnet.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | class BasicBlock(nn.Module):
 8 |     def __init__(self, in_planes, out_planes, stride, dropRate=0.0):
 9 |         super(BasicBlock, self).__init__()
10 |         self.bn1 = nn.BatchNorm2d(in_planes)
11 |         self.relu1 = nn.ReLU(inplace=True)
12 |         self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
13 |                                padding=1, bias=False)
14 |         self.bn2 = nn.BatchNorm2d(out_planes)
15 |         self.relu2 = nn.ReLU(inplace=True)
16 |         self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1,
17 |                                padding=1, bias=False)
18 |         self.droprate = dropRate
19 |         self.equalInOut = (in_planes == out_planes)
20 |         self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride,
21 |                                                                 padding=0, bias=False) or None
22 | 
23 |     def forward(self, x):
24 |         if not self.equalInOut:
25 |             x = self.relu1(self.bn1(x))
26 |         else:
27 |             out = self.relu1(self.bn1(x))
28 |         if self.equalInOut:
29 |             out = self.relu2(self.bn2(self.conv1(out)))
30 |         else:
31 |             out = self.relu2(self.bn2(self.conv1(x)))
32 |         if self.droprate > 0:
33 |             out = F.dropout(out, p=self.droprate, training=self.training)
34 |         out = self.conv2(out)
35 |         if not self.equalInOut:
36 |             return torch.add(self.convShortcut(x), out)
37 |         else:
38 |             return torch.add(x, out)
39 | 
40 | 
41 | class NetworkBlock(nn.Module):
42 |     def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0):
43 |         super(NetworkBlock, self).__init__()
44 |         self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropRate)
45 | 
46 |     def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropRate):
47 |         layers = []
48 |         for i in range(nb_layers):
49 |             layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, dropRate))
50 |         return nn.Sequential(*layers)
51 | 
52 |     def forward(self, x):
53 |         return self.layer(x)
54 | 
55 | 
56 | class WideResNet(nn.Module):
57 |     def __init__(self, depth, num_classes, widen_factor=1, dropRate=0.0):
58 |         super(WideResNet, self).__init__()
59 |         nChannels = [16, 16 * widen_factor, 32 * widen_factor, 64 * widen_factor]
60 |         assert ((depth - 4) % 6 == 0)
61 |         n = (depth - 4) // 6
62 |         block = BasicBlock
63 |         # 1st conv before any network block
64 |         self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1,
65 |                                padding=1, bias=False)
66 |         # 1st block
67 |         self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropRate)
68 |         # 2nd block
69 |         self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, dropRate)
70 |         # 3rd block
71 |         self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, dropRate)
72 |         # global average pooling and classifier
73 |         self.bn1 = nn.BatchNorm2d(nChannels[3])
74 |         self.relu = nn.ReLU(inplace=True)
75 |         self.fc = nn.Linear(nChannels[3], num_classes)
76 |         self.nChannels = nChannels[3]
77 | 
78 |         for m in self.modules():
79 |             if isinstance(m, nn.Conv2d):
80 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
81 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
82 |             elif isinstance(m, nn.BatchNorm2d):
83 |                 m.weight.data.fill_(1)
84 |                 m.bias.data.zero_()
85 |             elif isinstance(m, nn.Linear):
86 |                 m.bias.data.zero_()
87 | 
88 |     def forward(self, x):
89 |         out = self.conv1(x)
90 |         out = self.block1(out)
91 |         out = self.block2(out)
92 |         out = self.block3(out)
93 |         out = self.relu(self.bn1(out))
94 |         out = F.avg_pool2d(out, 16)
95 |         out = out.view(-1, self.nChannels)
96 |         return self.fc(out)
97 | 


--------------------------------------------------------------------------------
/CLIP_based/utils/tinyimages_80mn_loader.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from bisect import bisect_left
 4 | 
 5 | 
 6 | class TinyImages(torch.utils.data.Dataset):
 7 | 
 8 |     def __init__(self, transform=None, exclude_cifar=True):
 9 | 
10 |         data_file = open('../data/80million/tiny_images.bin', "rb")
11 | 
12 |         def load_image(idx):
13 |             data_file.seek(idx * 3072)
14 |             data = data_file.read(3072)
15 |             return np.fromstring(data, dtype='uint8').reshape(32, 32, 3, order="F")
16 | 
17 |         self.load_image = load_image
18 |         self.offset = 0     # offset index
19 | 
20 |         self.transform = transform
21 |         self.exclude_cifar = exclude_cifar
22 | 
23 |         if exclude_cifar:
24 |             self.cifar_idxs = []
25 |             with open('../utils/80mn_cifar_idxs.txt', 'r') as idxs:
26 |                 for idx in idxs:
27 |                     # indices in file take the 80mn database to start at 1, hence "- 1"
28 |                     self.cifar_idxs.append(int(idx) - 1)
29 | 
30 |             # hash table option
31 |             self.cifar_idxs = set(self.cifar_idxs)
32 |             self.in_cifar = lambda x: x in self.cifar_idxs
33 | 
34 |             # bisection search option
35 |             # self.cifar_idxs = tuple(sorted(self.cifar_idxs))
36 |             #
37 |             # def binary_search(x, hi=len(self.cifar_idxs)):
38 |             #     pos = bisect_left(self.cifar_idxs, x, 0, hi)  # find insertion position
39 |             #     return True if pos != hi and self.cifar_idxs[pos] == x else False
40 |             #
41 |             # self.in_cifar = binary_search
42 | 
43 |     def __getitem__(self, index):
44 |         index = (index + self.offset) % 79302016
45 | 
46 |         if self.exclude_cifar:
47 |             while self.in_cifar(index):
48 |                 index = np.random.randint(79302017)
49 | 
50 |         img = self.load_image(index)
51 |         if self.transform is not None:
52 |             img = self.transform(img)
53 | 
54 |         return img, 0  # 0 is the class
55 | 
56 |     def __len__(self):
57 |         return 79302017
58 | 


--------------------------------------------------------------------------------
/CLIP_based/utils/validation_dataset.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | 
 5 | class PartialDataset(torch.utils.data.Dataset):
 6 |     def __init__(self, parent_ds, offset, length):
 7 |         self.parent_ds = parent_ds
 8 |         self.offset = offset
 9 |         self.length = length
10 |         assert len(parent_ds) >= offset + length, Exception("Parent Dataset not long enough")
11 |         super(PartialDataset, self).__init__()
12 | 
13 |     def __len__(self):
14 |         return self.length
15 | 
16 |     def __getitem__(self, i):
17 |         return self.parent_ds[i + self.offset]
18 | 
19 | 
20 | def validation_split(dataset, val_share=0.1):
21 |     """
22 |        Split a (training and vaidation combined) dataset into training and validation.
23 |        Note that to be statistically sound, the items in the dataset should be statistically
24 |        independent (e.g. not sorted by class, not several instances of the same dataset that
25 |        could end up in either set).
26 | 
27 |        inputs:
28 |           dataset:   ("training") dataset to split into training and validation
29 |           val_share: fraction of validation data (should be 0<val_share<1, default: 0.1)
30 |        returns: input dataset split into test_ds, val_ds
31 | 
32 |     """
33 |     val_offset = int(len(dataset) * (1 - val_share))
34 |     return PartialDataset(dataset, 0, val_offset), PartialDataset(dataset, val_offset, len(dataset) - val_offset)
35 | 
36 | 
37 | class PartialFolder(torch.utils.data.Dataset):
38 |     def __init__(self, parent_ds, perm, length):
39 |         self.parent_ds = parent_ds
40 |         self.perm = perm
41 |         self.length = length
42 |         super(PartialFolder, self).__init__()
43 | 
44 |     def __len__(self):
45 |         return self.length
46 | 
47 |     def __getitem__(self, i):
48 |         return self.parent_ds[self.perm[i]]
49 | 
50 | 
51 | def validation_split_folder(dataset, val_share=0.1):
52 |     """
53 |        Split a (training and vaidation combined) dataset into training and validation.
54 |        Note that to be statistically sound, the items in the dataset should be statistically
55 |        independent (e.g. not sorted by class, not several instances of the same dataset that
56 |        could end up in either set).
57 | 
58 |        inputs:
59 |           dataset:   ("training") dataset to split into training and validation
60 |           val_share: fraction of validation data (should be 0<val_share<1, default: 0.1)
61 |        returns: input dataset split into test_ds, val_ds
62 | 
63 |     """
64 |     num_train = int(len(dataset) * (1 - val_share))
65 |     num_val = len(dataset) - num_train
66 | 
67 |     perm = np.asarray(range(len(dataset)))
68 |     np.random.seed(0)
69 |     np.random.shuffle(perm)
70 | 
71 |     train_perm, val_perm = perm[:num_train], perm[num_train:]
72 | 
73 |     return PartialFolder(dataset, train_perm, num_train), PartialFolder(dataset, val_perm, num_val)
74 | 


--------------------------------------------------------------------------------
/npos.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/npos/583c06db0876c3d1c4e5a9a5371cc3a5cb916255/npos.png


--------------------------------------------------------------------------------
/training_from_scratch/KNN.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import faiss
  4 | import umap
  5 | import time
  6 | #import matplotlib.pyplot as plt
  7 | import faiss.contrib.torch_utils
  8 | from sklearn import manifold, datasets
  9 | from torch.distributions import MultivariateNormal
 10 | import torch.nn.functional as F
 11 | 
 12 | def KNN_dis_search_decrease(target, index, K=50, select=1,):
 13 |     '''
 14 |     data_point: Queue for searching k-th points
 15 |     target: the target of the search
 16 |     K
 17 |     '''
 18 |     #Normalize the features
 19 | 
 20 |     target_norm = torch.norm(target, p=2, dim=1,  keepdim=True)
 21 |     normed_target = target / target_norm
 22 |     #start_time = time.time()
 23 | 
 24 |     distance, output_index = index.search(normed_target, K)
 25 |     k_th_distance = distance[:, -1]
 26 |     #k_th_output_index = output_index[:, -1]
 27 |     k_th_distance, minD_idx = torch.topk(k_th_distance, select)
 28 |     #k_th_index = k_th_output_index[minD_idx]
 29 |     return minD_idx, k_th_distance
 30 | 
 31 | def KNN_dis_search_distance(target, index, K=50, num_points=10, length=2000,depth=342):
 32 |     '''
 33 |     data_point: Queue for searching k-th points
 34 |     target: the target of the search
 35 |     K
 36 |     '''
 37 |     #Normalize the features
 38 | 
 39 |     target_norm = torch.norm(target, p=2, dim=1,  keepdim=True)
 40 |     normed_target = target / target_norm
 41 |     #start_time = time.time()
 42 | 
 43 |     distance, output_index = index.search(normed_target, K)
 44 |     k_th_distance = distance[:, -1]
 45 |     k_th = k_th_distance.view(length, -1)
 46 |     target_new = target.view(length, -1, depth)
 47 |     #k_th_output_index = output_index[:, -1]
 48 |     k_th_distance, minD_idx = torch.topk(k_th, num_points, dim=0)
 49 |     minD_idx = minD_idx.squeeze()
 50 |     point_list = []
 51 |     for i in range(minD_idx.shape[1]):
 52 |         point_list.append(i*length + minD_idx[:,i])
 53 |     #return torch.cat(point_list, dim=0)
 54 |     return target[torch.cat(point_list)]
 55 | 
 56 | def generate_outliers(ID, input_index, negative_samples, ID_points_num=2, K=20, select=1, cov_mat=0.1, sampling_ratio=1.0, pic_nums=30, depth=342):
 57 |     length = negative_samples.shape[0]
 58 |     data_norm = torch.norm(ID, p=2, dim=1, keepdim=True)
 59 |     normed_data = ID / data_norm
 60 |     rand_ind = np.random.choice(normed_data.shape[0], int(normed_data.shape[0] * sampling_ratio), replace=False)
 61 |     index = input_index
 62 |     index.add(normed_data[rand_ind])
 63 |     minD_idx, k_th = KNN_dis_search_decrease(ID, index, K, select)
 64 |     minD_idx = minD_idx[np.random.choice(select, int(pic_nums), replace=False)]
 65 |     data_point_list = torch.cat([ID[i:i+1].repeat(length,1) for i in minD_idx])
 66 |     #negative_sample_cov = (torch.mm(negative_samples.cuda(), cov)*cov_mat).repeat(pic_nums,1)
 67 |     negative_sample_cov = cov_mat*negative_samples.cuda().repeat(pic_nums,1)
 68 |     #negative_sample_cov = (negative_samples.cuda()*cov_mat).repeat(select,1)
 69 |     negative_sample_list = negative_sample_cov + data_point_list
 70 |     point = KNN_dis_search_distance(negative_sample_list, index, K, ID_points_num, length,depth)
 71 | 
 72 |     index.reset()
 73 | 
 74 |     #return ID[minD_idx]
 75 |     return point
 76 | 
 77 | def generate_outliers_OOD(ID, input_index, negative_samples, K=100, select=100, sampling_ratio=1.0):
 78 |     data_norm = torch.norm(ID, p=2, dim=1, keepdim=True)
 79 |     normed_data = ID / data_norm
 80 |     rand_ind = np.random.choice(normed_data.shape[1], int(normed_data.shape[1] * sampling_ratio), replace=False)
 81 |     index = input_index
 82 |     index.add(normed_data[rand_ind])
 83 |     minD_idx, k_th = KNN_dis_search_decrease(negative_samples, index, K, select)
 84 | 
 85 |     return negative_samples[minD_idx]
 86 | 
 87 | 
 88 | 
 89 | def generate_outliers_rand(ID, input_index,
 90 |                            negative_samples, ID_points_num=2, K=20, select=1,
 91 |                            cov_mat=0.1, sampling_ratio=1.0, pic_nums=10,
 92 |                            repeat_times=30, depth=342):
 93 |     length = negative_samples.shape[0]
 94 |     data_norm = torch.norm(ID, p=2, dim=1, keepdim=True)
 95 |     normed_data = ID / data_norm
 96 |     rand_ind = np.random.choice(normed_data.shape[1], int(normed_data.shape[1] * sampling_ratio), replace=False)
 97 |     index = input_index
 98 |     index.add(normed_data[rand_ind])
 99 |     minD_idx, k_th = KNN_dis_search_decrease(ID, index, K, select)
100 |     ID_boundary = ID[minD_idx]
101 |     negative_sample_list = []
102 |     for i in range(repeat_times):
103 |         select_idx = np.random.choice(select, int(pic_nums), replace=False)
104 |         sample_list = ID_boundary[select_idx]
105 |         mean = sample_list.mean(0)
106 |         var = torch.cov(sample_list.T)
107 |         var = torch.mm(negative_samples, var)
108 |         trans_samples = mean + var
109 |         negative_sample_list.append(trans_samples)
110 |     negative_sample_list = torch.cat(negative_sample_list, dim=0)
111 |     point = KNN_dis_search_distance(negative_sample_list, index, K, ID_points_num, length,depth)
112 | 
113 |     index.reset()
114 | 
115 |     #return ID[minD_idx]
116 |     return point
117 | 
118 | 


--------------------------------------------------------------------------------
/training_from_scratch/__pycache__/KNN.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/npos/583c06db0876c3d1c4e5a9a5371cc3a5cb916255/training_from_scratch/__pycache__/KNN.cpython-37.pyc


--------------------------------------------------------------------------------
/training_from_scratch/__pycache__/cifar.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/npos/583c06db0876c3d1c4e5a9a5371cc3a5cb916255/training_from_scratch/__pycache__/cifar.cpython-37.pyc


--------------------------------------------------------------------------------
/training_from_scratch/__pycache__/image_folder.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/npos/583c06db0876c3d1c4e5a9a5371cc3a5cb916255/training_from_scratch/__pycache__/image_folder.cpython-37.pyc


--------------------------------------------------------------------------------
/training_from_scratch/__pycache__/losses.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/npos/583c06db0876c3d1c4e5a9a5371cc3a5cb916255/training_from_scratch/__pycache__/losses.cpython-37.pyc


--------------------------------------------------------------------------------
/training_from_scratch/__pycache__/util.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/npos/583c06db0876c3d1c4e5a9a5371cc3a5cb916255/training_from_scratch/__pycache__/util.cpython-37.pyc


--------------------------------------------------------------------------------
/training_from_scratch/cifar.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import math
  3 | import numpy as np
  4 | import torch
  5 | from PIL import Image
  6 | from torchvision import datasets
  7 | from torchvision import transforms
  8 | logger = logging.getLogger(__name__)
  9 | 
 10 | cifar10_mean = (0.4914, 0.4822, 0.4465)
 11 | cifar10_std = (0.2471, 0.2435, 0.2616)
 12 | cifar100_mean = (0.5071, 0.4867, 0.4408)
 13 | cifar100_std = (0.2675, 0.2565, 0.2761)
 14 | normal_mean = (0.5, 0.5, 0.5)
 15 | normal_std = (0.5, 0.5, 0.5)
 16 | 
 17 | class TwoCropTransform:
 18 |     """Create two crops of the same image"""
 19 |     def __init__(self, transform):
 20 |         self.transform = transform
 21 | 
 22 |     def __call__(self, x):
 23 |         return [self.transform(x), self.transform(x)]
 24 | 
 25 | def get_transforms(
 26 |     mean, std
 27 | ):  
 28 |     normalize = transforms.Normalize(mean=mean, std=std)
 29 |     train_transform = transforms.Compose([
 30 |         transforms.RandomResizedCrop(size=32, scale=(0.2, 1.)),
 31 |         transforms.RandomHorizontalFlip(),
 32 |         transforms.RandomApply([
 33 |             transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)
 34 |         ], p=0.8),
 35 |         transforms.RandomGrayscale(p=0.2),
 36 |         transforms.ToTensor(),
 37 |         normalize,
 38 |     ])
 39 |     return TwoCropTransform(train_transform)
 40 | 
 41 | def get_test_transforms(
 42 |     mean, std
 43 | ):  
 44 |     normalize = transforms.Normalize(mean=mean, std=std)
 45 |     test_transform = transforms.Compose([
 46 |         transforms.ToTensor(),
 47 |         normalize
 48 |         ])
 49 |     return test_transform
 50 | 
 51 | def get_cifar10(args, root):
 52 |     transform = get_transforms(cifar10_mean, cifar10_std)
 53 |     test_transform = get_test_transforms(cifar10_mean, cifar10_std)
 54 | 
 55 |     base_dataset = datasets.CIFAR10(root, train=True, download=True)
 56 |     train_labeled_idxs, train_unlabeled_idxs = x_u_split(
 57 |         args, base_dataset.targets)
 58 | 
 59 |     train_labeled_dataset = CIFAR10SSL(
 60 |         root, train_labeled_idxs, train=True,
 61 |         transform=transform)
 62 |     train_unlabeled_dataset = CIFAR10SSL(
 63 |         root, train_unlabeled_idxs, train=True,
 64 |         transform=transform)
 65 |     test_dataset = datasets.CIFAR10(root, train=False, download=True, transform=test_transform)
 66 |     return train_labeled_dataset, train_unlabeled_dataset, test_dataset
 67 | 
 68 | def get_cifar100(args, root):
 69 |     transform = get_transforms(cifar100_mean, cifar100_std)
 70 |     test_transform = get_test_transforms(cifar100_mean, cifar100_std)
 71 | 
 72 |     base_dataset = datasets.CIFAR100(
 73 |         root, train=True, download=True)
 74 |     train_labeled_idxs, train_unlabeled_idxs = x_u_split(
 75 |         args, base_dataset.targets)
 76 | 
 77 |     train_labeled_dataset = CIFAR100SSL(
 78 |         root, train_labeled_idxs, train=True,
 79 |         transform=transform)
 80 |     train_unlabeled_dataset = CIFAR100SSL(
 81 |         root, train_unlabeled_idxs, train=True,
 82 |         transform=transform)
 83 |     test_dataset = datasets.CIFAR100(root, train=False, download=True, transform=test_transform)
 84 |     return train_labeled_dataset, train_unlabeled_dataset, test_dataset
 85 | 
 86 | def x_u_split(args, labels, expand_labels=True):
 87 |     label_ratio = args.label_ratio
 88 |     num_labeled = int(label_ratio*len(labels))
 89 |     num_unlabeled = len(labels)-num_labeled
 90 |     print("Distribution:")
 91 |     print(num_labeled, num_unlabeled, len(labels))
 92 | 
 93 |     label_per_class = num_labeled // args.n_cls
 94 |     labels = np.array(labels)
 95 |     labeled_idx = []
 96 |     unlabeled_idx = []
 97 |     for i in range(args.n_cls):
 98 |         idx = np.where(labels == i)[0]
 99 |         np.random.shuffle(idx)
100 |         l_idx = idx[:label_per_class]
101 |         u_idx = idx[label_per_class:]
102 |         labeled_idx.extend(l_idx)
103 |         unlabeled_idx.extend(u_idx)
104 |     labeled_idx = np.array(labeled_idx)
105 |     unlabeled_idx = np.array(unlabeled_idx)
106 |     assert len(labeled_idx) == num_labeled
107 |     assert len(unlabeled_idx) == num_unlabeled
108 |     # # unlabeled data: all data (https://github.com/kekmodel/FixMatch-pytorch/issues/10)
109 |     # unlabeled_idx = np.array(range(len(labels)))
110 | 
111 |     if expand_labels or num_labeled < batch_size:
112 |         num_iter = int(len(unlabeled_idx)/(args.mu*args.batch_size))
113 |         num_expand_x = math.ceil(args.batch_size * num_iter / num_labeled)
114 |         print("Expand:", num_expand_x)
115 |         if num_expand_x!=0:
116 |             labeled_idx = np.hstack([labeled_idx for _ in range(num_expand_x)])
117 |     np.random.shuffle(labeled_idx)
118 |     return labeled_idx, unlabeled_idx
119 | 
120 | class CIFAR10SSL(datasets.CIFAR10):
121 |     def __init__(self, root, indexs, train=True,
122 |                  transform=None, target_transform=None,
123 |                  download=False):
124 |         super().__init__(root, train=train,
125 |                          transform=transform,
126 |                          target_transform=target_transform,
127 |                          download=download)
128 |         if indexs is not None and len(indexs)>0:
129 |             self.shrink_data(indexs)
130 |             print(len(self.data), len(self.targets))
131 | 
132 |     def __getitem__(self, index):
133 |         img, target = self.data[index], self.targets[index]
134 |         img = Image.fromarray(img)
135 |         if self.transform is not None:
136 |             img = self.transform(img)
137 |         if self.target_transform is not None:
138 |             target = self.target_transform(target)
139 | 
140 |         return img, target
141 | 
142 |     def shrink_data(self, idxs):
143 |         targets = np.array(self.targets)
144 |         self.targets = torch.from_numpy(targets[idxs])
145 |         self.data = self.data[idxs, ...]
146 | 
147 | class CIFAR100SSL(datasets.CIFAR100):
148 |     def __init__(self, root, indexs, train=True,
149 |                  transform=None, target_transform=None,
150 |                  download=False):
151 |         super().__init__(root, train=train,
152 |                          transform=transform,
153 |                          target_transform=target_transform,
154 |                          download=download)
155 |         if indexs is not None and len(indexs)>0:
156 |             self.shrink_data(indexs)
157 |             print(len(self.data), len(self.targets))
158 | 
159 |     def __getitem__(self, index):
160 |         img, target = self.data[index], self.targets[index]
161 |         img = Image.fromarray(img)
162 |         if self.transform is not None:
163 |             img = self.transform(img)
164 |         if self.target_transform is not None:
165 |             target = self.target_transform(target)
166 | 
167 |         return img, target
168 | 
169 |     def shrink_data(self, idxs):
170 |         targets = np.array(self.targets)
171 |         self.targets = torch.from_numpy(targets[idxs])
172 |         self.data = self.data[idxs, ...]
173 | 
174 | CIFAR_GETTERS = {'CIFAR-10': get_cifar10,
175 |                    'CIFAR-100': get_cifar100}


--------------------------------------------------------------------------------
/training_from_scratch/compute_metrics.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import sys
  4 | from scipy import misc
  5 | import numpy as np
  6 | from collections import defaultdict
  7 | from utils import anom_utils
  8 | 
  9 | parser = argparse.ArgumentParser(description='Compute and present the OOD detection metrics based on scores')
 10 | 
 11 | parser.add_argument('--in-dataset', default="CIFAR-10", type=str, help='in-distribution dataset')
 12 | parser.add_argument('--name', default = '04_10 01:09 [NoNorm]SupCE_resnet34_lr_0.1_bsz_256_proxy anchor_w_0.2_trial_0', type=str,
 13 |                     help='name of the instance')
 14 | parser.add_argument('--method', default='mahalanobis', type=str, help='ood detection method')
 15 | parser.add_argument('--base-dir', default='output/ood_scores', type=str, help='result directory')
 16 | parser.add_argument('--epochs', default ="200", type=str,
 17 |                     help='specify at which epoch the test is conducted')
 18 | 
 19 | parser.set_defaults(argument=True)
 20 | args = parser.parse_args()
 21 | np.random.seed(1)
 22 | 
 23 | def cal_metric(known, novel, method):
 24 |     tp, fp, fpr_at_tpr95 = get_curve(known, novel, method)
 25 |     results = dict()
 26 | 
 27 |     # FP
 28 |     mtype = 'FPR'
 29 |     results[mtype] = fpr_at_tpr95
 30 | 
 31 |     # AUROC
 32 |     mtype = 'AUROC'
 33 |     tpr = np.concatenate([[1.], tp/tp[0], [0.]])
 34 |     fpr = np.concatenate([[1.], fp/fp[0], [0.]])
 35 |     results[mtype] = -np.trapz(1.-fpr, tpr)
 36 | 
 37 |     # DTERR
 38 |     mtype = 'DTERR'
 39 |     results[mtype] = ((tp[0] - tp + fp) / (tp[0] + fp[0])).min()
 40 | 
 41 |     # AUIN
 42 |     mtype = 'AUIN'
 43 |     denom = tp+fp
 44 |     denom[denom == 0.] = -1.
 45 |     pin_ind = np.concatenate([[True], denom > 0., [True]])
 46 |     pin = np.concatenate([[.5], tp/denom, [0.]])
 47 |     results[mtype] = -np.trapz(pin[pin_ind], tpr[pin_ind])
 48 | 
 49 |     # AUOUT
 50 |     mtype = 'AUOUT'
 51 |     denom = tp[0]-tp+fp[0]-fp
 52 |     denom[denom == 0.] = -1.
 53 |     pout_ind = np.concatenate([[True], denom > 0., [True]])
 54 |     pout = np.concatenate([[0.], (fp[0]-fp)/denom, [.5]])
 55 |     results[mtype] = np.trapz(pout[pout_ind], 1.-fpr[pout_ind])
 56 | 
 57 |     return results
 58 | 
 59 | def get_curve(known, novel, method):
 60 |     tp, fp = dict(), dict()
 61 |     fpr_at_tpr95 = dict()
 62 | 
 63 |     known.sort()
 64 |     novel.sort()
 65 | 
 66 |     end = np.max([np.max(known), np.max(novel)])
 67 |     start = np.min([np.min(known),np.min(novel)])
 68 | 
 69 |     all = np.concatenate((known, novel))
 70 |     all.sort()
 71 | 
 72 |     num_k = known.shape[0]
 73 |     num_n = novel.shape[0]
 74 | 
 75 |     if method == 'rowl':
 76 |         threshold = -0.5
 77 |     else:
 78 |         threshold = known[round(0.05 * num_k)]
 79 | 
 80 |     tp = -np.ones([num_k+num_n+1], dtype=int)
 81 |     fp = -np.ones([num_k+num_n+1], dtype=int)
 82 |     tp[0], fp[0] = num_k, num_n
 83 |     k, n = 0, 0
 84 |     for l in range(num_k+num_n):
 85 |         if k == num_k:
 86 |             tp[l+1:] = tp[l]
 87 |             fp[l+1:] = np.arange(fp[l]-1, -1, -1)
 88 |             break
 89 |         elif n == num_n:
 90 |             tp[l+1:] = np.arange(tp[l]-1, -1, -1)
 91 |             fp[l+1:] = fp[l]
 92 |             break
 93 |         else:
 94 |             if novel[n] < known[k]:
 95 |                 n += 1
 96 |                 tp[l+1] = tp[l]
 97 |                 fp[l+1] = fp[l] - 1
 98 |             else:
 99 |                 k += 1
100 |                 tp[l+1] = tp[l] - 1
101 |                 fp[l+1] = fp[l]
102 | 
103 |     j = num_k+num_n-1
104 |     for l in range(num_k+num_n-1):
105 |         if all[j] == all[j-1]:
106 |             tp[j] = tp[j+1]
107 |             fp[j] = fp[j+1]
108 |         j -= 1
109 | 
110 |     fpr_at_tpr95 = np.sum(novel > threshold) / float(num_n)
111 | 
112 |     return tp, fp, fpr_at_tpr95
113 | 
114 | def print_results(results, in_dataset, out_dataset, name, method):
115 |     mtypes = ['FPR', 'DTERR', 'AUROC', 'AUIN', 'AUOUT']
116 | 
117 |     print('in_distribution: ' + in_dataset)
118 |     print('out_distribution: '+ out_dataset)
119 |     print('Model Name: ' + name)
120 |     print('')
121 | 
122 |     print(' OOD detection method: ' + method)
123 |     for mtype in mtypes:
124 |         print(' {mtype:6s}'.format(mtype=mtype), end='')
125 |     print('\n{val:6.2f}'.format(val=100.*results['FPR']), end='')
126 |     print(' {val:6.2f}'.format(val=100.*results['DTERR']), end='')
127 |     print(' {val:6.2f}'.format(val=100.*results['AUROC']), end='')
128 |     print(' {val:6.2f}'.format(val=100.*results['AUIN']), end='')
129 |     print(' {val:6.2f}\n'.format(val=100.*results['AUOUT']), end='')
130 |     print('')
131 | 
132 | def compute_average_results(all_results):
133 |     mtypes = ['FPR', 'DTERR', 'AUROC', 'AUIN', 'AUOUT']
134 |     avg_results = dict()
135 | 
136 |     for mtype in mtypes:
137 |         avg_results[mtype] = 0.0
138 | 
139 |     for results in all_results:
140 |         for mtype in mtypes:
141 |             avg_results[mtype] += results[mtype]
142 | 
143 |     for mtype in mtypes:
144 |         avg_results[mtype] /= float(len(all_results))
145 | 
146 |     return avg_results
147 | 
148 | def compute_traditional_ood(base_dir, in_dataset, out_datasets, method, name, epochs):
149 |     print('Natural OOD')
150 |     print('nat_in vs. nat_out')
151 | 
152 |     known = np.loadtxt('{base_dir}/{in_dataset}/{method}/{name}/{epochs}_nat/in_scores.txt'.format(
153 |                          base_dir=base_dir, in_dataset=in_dataset, method=method, name=name, epochs = epochs), delimiter='\n')
154 | 
155 |     known_sorted = np.sort(known)
156 |     num_k = known.shape[0]
157 | 
158 |     if method == 'rowl':
159 |         threshold = -0.5
160 |     else:
161 |         threshold = known_sorted[round(0.05 * num_k)]
162 | 
163 |     total = 0.0
164 |     print("*" * 30 + f"at epoch {epochs}" + "*" * 30 )
165 |     all_results_e = defaultdict(int)
166 |     all_results = []
167 |     for out_dataset in out_datasets:
168 |         novel = np.loadtxt('{base_dir}/{in_dataset}/{method}/{name}/{epochs}_nat/{out_dataset}/out_scores.txt'.format(
169 |                 base_dir=base_dir, in_dataset=in_dataset, method=method, name=name, epochs = epochs, out_dataset=out_dataset), delimiter='\n')
170 | 
171 |         total += novel.shape[0]
172 |         auroc, aupr, fpr = anom_utils.get_and_print_results(known, novel, f"{out_dataset}", method)
173 |         results = cal_metric(known, novel, method)
174 | 
175 |         all_results.append(results)
176 |         all_results_e["AUROC"] += auroc
177 |         all_results_e["AUPR"] += aupr
178 |         all_results_e["FPR95"] += fpr
179 |         # print_results(results, in_dataset, out_dataset, name, method)
180 |         
181 |     avg_results = compute_average_results(all_results)
182 |     print_results(avg_results, in_dataset, "All", name, method)
183 |     print("Avg FPR95: ", round(100 * all_results_e["FPR95"]/len(out_datasets),2))
184 |     print("Avg AUROC: ", round(all_results_e["AUROC"]/len(out_datasets),4))
185 |     print("Avg AUPR: ", round(all_results_e["AUPR"]/len(out_datasets),4))
186 | 
187 |     return 100.*avg_results['FPR']
188 | 
189 | def compute_in(base_dir, in_dataset, method, name, epochs):
190 | 
191 |     known_nat = np.loadtxt('{base_dir}/{in_dataset}/{method}/{name}/{epochs}_nat/in_scores.txt'.format(
192 |         base_dir=base_dir, in_dataset=in_dataset, method=method, name=name, epochs = epochs), delimiter='\n')
193 |     known_nat_sorted = np.sort(known_nat)
194 |     num_k = known_nat.shape[0]
195 | 
196 |     if method == 'rowl':
197 |         threshold = -0.5
198 |     else:
199 |         threshold = known_nat_sorted[round(0.05 * num_k)]
200 | 
201 |     known_nat_label = np.loadtxt('{base_dir}/{in_dataset}/{method}/{name}/{epochs}_nat/in_labels.txt'.format(
202 |         base_dir=base_dir, in_dataset=in_dataset, method=method, name=name, epochs = epochs))
203 | 
204 |     nat_in_cond = (known_nat>threshold).astype(np.float32)
205 |     nat_correct = (known_nat_label[:,0] == known_nat_label[:,1]).astype(np.float32)
206 |     known_nat_acc = np.mean(nat_correct)
207 |     known_nat_fnr = np.mean((1.0 - nat_in_cond))
208 |     known_nat_eteacc = np.mean(nat_correct * nat_in_cond)
209 | 
210 |     print(f'In-distribution performance: at epoch {epochs}')
211 |     print('FNR: {fnr:6.2f}, Acc: {acc:6.2f}, End-to-end Acc: {eteacc:6.2f}'.format(fnr=known_nat_fnr*100,acc=known_nat_acc*100,eteacc=known_nat_eteacc*100))
212 | 
213 |     return
214 | 
215 | 
216 | 
217 | if __name__ == '__main__':
218 |     if args.in_dataset in ('cifar10', "CIFAR-10"):
219 |         out_datasets = ['LSUN', 'LSUN_resize', 'iSUN', 'dtd', 'SVHN',  "cifar100"]
220 |     elif args.in_dataset == "SVHN":
221 |         out_datasets = ['LSUN', 'LSUN_resize', 'iSUN', 'dtd']
222 |     all_fprs = dict()
223 |     for epochs in args.epochs.split():
224 |         all_fprs[epochs] = compute_traditional_ood(args.base_dir, args.in_dataset, out_datasets, args.method, args.name, epochs)
225 |         compute_in(args.base_dir, args.in_dataset, args.method, args.name, epochs)
226 |     print("FPR95 at different epochs: ", all_fprs)
227 | 


--------------------------------------------------------------------------------
/training_from_scratch/eval_ood_detection.py:
--------------------------------------------------------------------------------
  1 | import torchvision
  2 | from torchvision.transforms import transforms
  3 | import numpy as np
  4 | import sys
  5 | import pdb
  6 | import logging
  7 | import os
  8 | import argparse
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.backends.cudnn as cudnn
 12 | import torchvision.transforms as trn
 13 | import torchvision.datasets as dset
 14 | import torch.nn.functional as F
 15 | from models.resnet_outliers import *
 16 | from skimage.filters import gaussian as gblur
 17 | from PIL import Image as PILImage
 18 | import seaborn as sns
 19 | import matplotlib.pyplot as plt
 20 | import faiss
 21 | from tqdm import tqdm
 22 | from evaluation.eval_utils import *
 23 | from evaluation.display_results import show_performance, get_measures, print_measures, print_measures_with_std
 24 | import evaluation.svhn_loader as svhn
 25 | 
 26 | def process_args():
 27 |     parser = argparse.ArgumentParser(description='Evaluates a CIFAR OOD Detector',
 28 |                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 29 |     parser.add_argument('--in_dataset', default="CIFAR-100", type=str, help='in-distribution dataset')
 30 |     parser.add_argument('-b', '--batch-size', default=64, type=int, help='mini-batch size')
 31 |     parser.add_argument('--name', default = "pretrained")
 32 |     parser.add_argument('--feat_dim', default=128, type=int, help='feature dim')
 33 |     parser.add_argument('--model', default='resnet34', type=str, help='model architecture')
 34 |     parser.add_argument('--epoch',default=500,type=int)
 35 |     parser.add_argument('--K',default=100,type=int)
 36 |     parser.add_argument('--gpus', default=[0], nargs='*', type=int,help='List of GPU indices to use, e.g., --gpus 0 1 2 3')
 37 |     parser.add_argument('--method_name', '-test', type=str, default='test', help='Method name.')
 38 |     parser.add_argument('--ckpt',  type=str, default=
 39 |     '/nobackup-slow/taoleitian/CIDER/paper_results/CIFAR-100_ckpt_500.pt',
 40 |     #'/nobackup-slow/taoleitian/CIDER/10_02_16:25_SupCon_resnet34_lr_0.5_cosine_True_supcon_ws_1_500_128_trial_0_linear_temp_0.1_CIFAR-100/checkpoint_500.pth.tar',
 41 |                         help='Method name.')
 42 | 
 43 |     args = parser.parse_args()
 44 | 
 45 |     # use 512
 46 |     args.name = '29_06_21:14_SupCon_resnet34_lr_0.05_warm_True_cosine_True_bsz_512_ws_1.0_wu_1.0_128_temp_0.1_CIFAR-100_pm_0.95'
 47 |     # args.name = '24_06_23:40_SupCon_resnet34_lr_0.5_cosine_True_bsz_512_triple_ws_0_wu_0.5_wp_1_500_128_trial_0_linear_temp_0.1_CIFAR-100_pm_0.95_momentum_norm'
 48 |     #args.ckpt = f"./checkpoints_save/{args.in_dataset}/{args.name}/checkpoint_{args.epoch}.pth.tar"
 49 |     #args.ckpt = f"./checkpoints_save/{args.in_dataset}/{args.name}/checkpoint_{args.epoch}.pth.tar"
 50 |     args.gpus = list(map(lambda x: torch.device('cuda', x), args.gpus))
 51 |     if args.in_dataset == "CIFAR-10":
 52 |         args.num_classes = 10
 53 |     elif args.in_dataset == "CIFAR-100":
 54 |         args.num_classes = 100
 55 |     return args
 56 | 
 57 | def set_model(args):
 58 |     model = SupCEHeadResNet(name=args.model, feat_dim=args.feat_dim, num_classes=args.num_classes)
 59 |     if torch.cuda.is_available():
 60 |         model = model.cuda()
 61 |         cudnn.benchmark = True
 62 |     return model
 63 | 
 64 | def knn(layer_idx=0, num_classes=100):
 65 |     args = process_args()
 66 |     args.log_directory = f"results/{args.in_dataset}/{args.name}/knn_{args.K}"
 67 |     if not os.path.exists(args.log_directory):
 68 |         os.makedirs(args.log_directory)
 69 |    
 70 |     # setup model
 71 |     train_loader, test_loader = set_loader(args)
 72 |     print(args.ckpt)
 73 |     pretrained_dict= torch.load(args.ckpt,  map_location='cpu')
 74 |     print("Keys:", pretrained_dict.keys())
 75 |     pretrained_dict = {key.replace("module.", ""): value for key, value in pretrained_dict.items()}
 76 |     net = set_model(args)
 77 |     net.load_state_dict(pretrained_dict, strict=False)
 78 |     net.eval()
 79 |     if layer_idx == 1:
 80 |         embedding_dim = 128
 81 |     elif layer_idx == 0:
 82 |         embedding_dim = 512
 83 | 
 84 |     # extract features
 85 |     ftrain = obtain_feature_from_loader(net, train_loader, layer_idx, embedding_dim, num_batches=None)
 86 |     ftest = obtain_feature_from_loader(net, test_loader, layer_idx, embedding_dim, num_batches=None)
 87 |     print('ID finished')
 88 |     out_datasets = ['LSUN_C', 'iSUN', 'SVHN', 'Places', 'Textures']
 89 |     #out_datasets = ['LSUN_C', 'iSUN', 'SVHN', 'places365']
 90 |     # out_datasets = ['LSUN', 'isun', 'SVHN', 'places365', 'texture', 'Imagenet', 'tin']
 91 |     food_all = {}
 92 |     ood_num_examples = len(test_loader.dataset)
 93 |     num_batches = ood_num_examples // args.batch_size
 94 |     for out_dataset in out_datasets:
 95 |         ood_loader = set_ood_loader(args, out_dataset)
 96 |         ood_feat = obtain_feature_from_loader(net, ood_loader, layer_idx, embedding_dim, num_batches)
 97 |         food_all[out_dataset] = ood_feat
 98 |         print(f'OOD {out_dataset} finished')
 99 | 
100 |     # initialization
101 |     auroc_list, aupr_list, fpr_list = [], [], []
102 |     index = faiss.IndexFlatL2(ftrain.shape[1])
103 |     index.add(ftrain.cpu().numpy())
104 |     index_bad = index
105 |     ################### Using KNN distance Directly ###################
106 |     D, _ = index_bad.search(ftest.cpu().numpy(), args.K,)
107 |     scores_in = -D[:,-1]
108 |     for ood_dataset, food in food_all.items():
109 |         print(f"Evaluting OOD dataset {ood_dataset}")
110 |         D, _ = index_bad.search(food.cpu().numpy(),args.K)
111 |         scores_ood_test = -D[:,-1]
112 |         aurocs, auprs, fprs = [], [], []
113 |         print(scores_in, scores_ood_test)
114 |         print(scores_in[:3], scores_ood_test[:3])
115 |         torch.save(net.state_dict(), '/nobackup-fast/taoleitian/CIFAR_100.pt')
116 |         measures = get_measures(scores_in, scores_ood_test)
117 |         aurocs.append(measures[0]); auprs.append(measures[1]); fprs.append(measures[2])
118 |         auroc = np.mean(aurocs); aupr = np.mean(auprs); fpr = np.mean(fprs)
119 |         auroc_list.append(auroc); aupr_list.append(aupr); fpr_list.append(fpr)
120 |         print_measures(None, auroc, aupr, fpr, args.method_name)
121 |         #plot_distribution(args, scores_in, scores_ood_test, ood_dataset)
122 |     print("AVG")
123 |     print_measures(None, np.mean(auroc_list), np.mean(aupr_list), np.mean(fpr_list), method_name=args.method_name)
124 |     save_as_dataframe(args, out_datasets, fpr_list, auroc_list, aupr_list)
125 | 
126 | if __name__ == '__main__':
127 |     knn(layer_idx=0, num_classes=10)
128 | 


--------------------------------------------------------------------------------
/training_from_scratch/eval_ood_detection_in100.py:
--------------------------------------------------------------------------------
  1 | import torchvision
  2 | from torchvision.transforms import transforms
  3 | import numpy as np
  4 | import sys
  5 | import pdb
  6 | import logging
  7 | import os
  8 | import argparse
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.backends.cudnn as cudnn
 12 | import torchvision.transforms as trn
 13 | import torchvision.datasets as dset
 14 | import torch.nn.functional as F
 15 | from models.resnet_im100 import *
 16 | from skimage.filters import gaussian as gblur
 17 | from PIL import Image as PILImage
 18 | import seaborn as sns
 19 | import matplotlib.pyplot as plt
 20 | import faiss
 21 | from tqdm import tqdm
 22 | from evaluation.eval_utils import *
 23 | from evaluation.display_results import show_performance, get_measures, print_measures, print_measures_with_std
 24 | import evaluation.svhn_loader as svhn
 25 | 
 26 | def process_args():
 27 |     parser = argparse.ArgumentParser(description='Evaluates a CIFAR OOD Detector',
 28 |                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 29 |     parser.add_argument('--in_dataset', default="CIFAR-100", type=str, help='in-distribution dataset')
 30 |     parser.add_argument('-b', '--batch-size', default=128, type=int, help='mini-batch size')
 31 |     parser.add_argument('--name', default = "pretrained")
 32 |     parser.add_argument('--feat_dim', default=128, type=int, help='feature dim')
 33 |     parser.add_argument('--model', default='resnet34', type=str, help='model architecture')
 34 |     parser.add_argument('--epoch',default=500,type=int)
 35 |     parser.add_argument('--K',default=100,type=int)
 36 |     parser.add_argument('--gpus', default=[0], nargs='*', type=int,help='List of GPU indices to use, e.g., --gpus 0 1 2 3')
 37 |     parser.add_argument('--method_name', '-test', type=str, default='test', help='Method name.')
 38 |     parser.add_argument('--ckpt',  type=str, default=
 39 |     '/nobackup/leitain/npos/IN-100/checkpoints/ImageNet-100/11_05_22:38_SupCon_resnet101_lr_0.1_warm_False_cosine_True_bsz_256_disp_0.0_comp_1_512_temp_0.1_ImageNet-100_pm_0.95/checkpoint_90.pth.tar',
 40 |     #'/nobackup-slow/taoleitian/CIDER/10_02_16:25_SupCon_resnet34_lr_0.5_cosine_True_supcon_ws_1_500_128_trial_0_linear_temp_0.1_CIFAR-100/checkpoint_500.pth.tar',
 41 |                         help='Method name.')
 42 | 
 43 |     args = parser.parse_args()
 44 | 
 45 |     # use 512
 46 |     args.name = '29_06_21:14_SupCon_resnet34_lr_0.05_warm_True_cosine_True_bsz_512_ws_1.0_wu_1.0_128_temp_0.1_CIFAR-100_pm_0.95'
 47 |     # args.name = '24_06_23:40_SupCon_resnet34_lr_0.5_cosine_True_bsz_512_triple_ws_0_wu_0.5_wp_1_500_128_trial_0_linear_temp_0.1_CIFAR-100_pm_0.95_momentum_norm'
 48 |     #args.ckpt = f"./checkpoints_save/{args.in_dataset}/{args.name}/checkpoint_{args.epoch}.pth.tar"
 49 |     #args.ckpt = f"./checkpoints_save/{args.in_dataset}/{args.name}/checkpoint_{args.epoch}.pth.tar"
 50 |     args.gpus = list(map(lambda x: torch.device('cuda', x), args.gpus))
 51 | 
 52 |     args.num_classes = 100
 53 |     return args
 54 | 
 55 | def set_model(args):
 56 |     model = resnet101(num_class=args.num_classes)
 57 |     if torch.cuda.is_available():
 58 |         model = model.cuda()
 59 |         cudnn.benchmark = True
 60 |     return model
 61 | 
 62 | def knn(layer_idx=0, num_classes=100):
 63 |     args = process_args()
 64 |     args.log_directory = f"results/{args.in_dataset}/{args.name}/knn_{args.K}"
 65 |     if not os.path.exists(args.log_directory):
 66 |         os.makedirs(args.log_directory)
 67 | 
 68 |     # setup model
 69 |     train_loader, test_loader = set_loader_in100(args)
 70 |     print(args.ckpt)
 71 |     pretrained_dict= torch.load(args.ckpt,  map_location='cpu')
 72 |     print("Keys:", pretrained_dict.keys())
 73 |     pretrained_dict = {key.replace("module.", ""): value for key, value in pretrained_dict.items()}
 74 |     net = set_model(args)
 75 | 
 76 |     net.load_state_dict(pretrained_dict, strict=False)
 77 |     net.eval()
 78 | 
 79 |     embedding_dim = 512
 80 | 
 81 |     # extract features
 82 |     ftrain = obtain_feature_from_loader(net, train_loader, layer_idx, embedding_dim, num_batches=None, cifar_dataset=False)
 83 |     ftest = obtain_feature_from_loader(net, test_loader, layer_idx, embedding_dim, num_batches=None, cifar_dataset=False)
 84 |     print('ID finished')
 85 |     out_datasets = ['inat', 'Places', 'Sun', 'Textures']
 86 | 
 87 |     food_all = {}
 88 |     ood_num_examples = len(test_loader.dataset)
 89 |     num_batches = ood_num_examples // args.batch_size
 90 |     for out_dataset in out_datasets:
 91 |         ood_loader = set_ood_loader_in100(args, out_dataset)
 92 |         ood_feat = obtain_feature_from_loader(net, ood_loader, layer_idx, embedding_dim, num_batches, cifar_dataset=False)
 93 |         food_all[out_dataset] = ood_feat
 94 |         print(f'OOD {out_dataset} finished')
 95 | 
 96 |     # initialization
 97 |     auroc_list, aupr_list, fpr_list = [], [], []
 98 |     index = faiss.IndexFlatL2(ftrain.shape[1])
 99 |     index.add(ftrain.cpu().numpy())
100 |     index_bad = index
101 |     ################### Using KNN distance Directly ###################
102 |     D, _ = index_bad.search(ftest.cpu().numpy(), args.K,)
103 |     scores_in = -D[:,-1]
104 |     for ood_dataset, food in food_all.items():
105 |         print(f"Evaluting OOD dataset {ood_dataset}")
106 |         D, _ = index_bad.search(food.cpu().numpy(),args.K)
107 |         scores_ood_test = -D[:,-1]
108 |         aurocs, auprs, fprs = [], [], []
109 |         print(scores_in, scores_ood_test)
110 |         print(scores_in[:3], scores_ood_test[:3])
111 | 
112 |         measures = get_measures(scores_in, scores_ood_test)
113 |         aurocs.append(measures[0]); auprs.append(measures[1]); fprs.append(measures[2])
114 |         auroc = np.mean(aurocs); aupr = np.mean(auprs); fpr = np.mean(fprs)
115 |         auroc_list.append(auroc); aupr_list.append(aupr); fpr_list.append(fpr)
116 |         print_measures(None, auroc, aupr, fpr, args.method_name)
117 |         #plot_distribution(args, scores_in, scores_ood_test, ood_dataset)
118 |     print("AVG")
119 |     print_measures(None, np.mean(auroc_list), np.mean(aupr_list), np.mean(fpr_list), method_name=args.method_name)
120 |     save_as_dataframe(args, out_datasets, fpr_list, auroc_list, aupr_list)
121 | 
122 | if __name__ == '__main__':
123 |     knn(layer_idx=0, num_classes=10)
124 | 


--------------------------------------------------------------------------------
/training_from_scratch/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .imagenet_loader import *
2 | from .svhn_loader import *
3 | from .cal_metric import *
4 | from .display_results import *
5 | 


--------------------------------------------------------------------------------
/training_from_scratch/evaluation/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/npos/583c06db0876c3d1c4e5a9a5371cc3a5cb916255/training_from_scratch/evaluation/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/training_from_scratch/evaluation/__pycache__/cal_metric.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/npos/583c06db0876c3d1c4e5a9a5371cc3a5cb916255/training_from_scratch/evaluation/__pycache__/cal_metric.cpython-37.pyc


--------------------------------------------------------------------------------
/training_from_scratch/evaluation/__pycache__/display_results.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/npos/583c06db0876c3d1c4e5a9a5371cc3a5cb916255/training_from_scratch/evaluation/__pycache__/display_results.cpython-37.pyc


--------------------------------------------------------------------------------
/training_from_scratch/evaluation/__pycache__/eval_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/npos/583c06db0876c3d1c4e5a9a5371cc3a5cb916255/training_from_scratch/evaluation/__pycache__/eval_utils.cpython-37.pyc


--------------------------------------------------------------------------------
/training_from_scratch/evaluation/__pycache__/imagenet_loader.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/npos/583c06db0876c3d1c4e5a9a5371cc3a5cb916255/training_from_scratch/evaluation/__pycache__/imagenet_loader.cpython-37.pyc


--------------------------------------------------------------------------------
/training_from_scratch/evaluation/__pycache__/svhn_loader.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/npos/583c06db0876c3d1c4e5a9a5371cc3a5cb916255/training_from_scratch/evaluation/__pycache__/svhn_loader.cpython-37.pyc


--------------------------------------------------------------------------------
/training_from_scratch/evaluation/cal_metric.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import torch
  3 | from torch.autograd import Variable
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | import numpy as np
  7 | import torch.optim as optim
  8 | import torchvision
  9 | import os
 10 | import torchvision.transforms as transforms
 11 | import numpy as np
 12 | import time
 13 | from scipy import misc
 14 | 
 15 | def get_curve(dir_name, stypes = ['MSP', 'ODIN']):
 16 |     tp, fp = dict(), dict()
 17 |     fpr_at_tpr95 = dict()
 18 |     for stype in stypes:
 19 |         known = np.loadtxt('{}/confidence_{}_In.txt'.format(dir_name, stype), delimiter='\n')
 20 |         novel = np.loadtxt('{}/confidence_{}_Out.txt'.format(dir_name, stype), delimiter='\n')
 21 |         known.sort()
 22 |         novel.sort()
 23 | 
 24 |         end = np.max([np.max(known), np.max(novel)])
 25 |         start = np.min([np.min(known),np.min(novel)])
 26 |         num_k = known.shape[0]
 27 |         num_n = novel.shape[0]
 28 | 
 29 |         threshold = known[round(0.05 * num_k)]
 30 | 
 31 |         tp[stype] = -np.ones([num_k+num_n+1], dtype=int)
 32 |         fp[stype] = -np.ones([num_k+num_n+1], dtype=int)
 33 |         tp[stype][0], fp[stype][0] = num_k, num_n
 34 |         k, n = 0, 0
 35 |         for l in range(num_k+num_n):
 36 |             if k == num_k:
 37 |                 tp[stype][l+1:] = tp[stype][l]
 38 |                 fp[stype][l+1:] = np.arange(fp[stype][l]-1, -1, -1)
 39 |                 break
 40 |             elif n == num_n:
 41 |                 tp[stype][l+1:] = np.arange(tp[stype][l]-1, -1, -1)
 42 |                 fp[stype][l+1:] = fp[stype][l]
 43 |                 break
 44 |             else:
 45 |                 if novel[n] < known[k]:
 46 |                     n += 1
 47 |                     tp[stype][l+1] = tp[stype][l]
 48 |                     fp[stype][l+1] = fp[stype][l] - 1
 49 |                 else:
 50 |                     k += 1
 51 |                     tp[stype][l+1] = tp[stype][l] - 1
 52 |                     fp[stype][l+1] = fp[stype][l]
 53 | 
 54 |         fpr_at_tpr95[stype] = np.sum(novel > threshold) / float(num_n)
 55 | 
 56 |     return tp, fp, fpr_at_tpr95
 57 | 
 58 | def metric(dir_name, stypes = ['MSP', 'ODIN'], verbose=False):
 59 |     tp, fp, fpr_at_tpr95 = get_curve(dir_name, stypes)
 60 |     results = dict()
 61 |     mtypes = ['FPR', 'AUROC', 'DTERR', 'AUIN', 'AUOUT']
 62 |     if verbose:
 63 |         print('      ', end='')
 64 |         for mtype in mtypes:
 65 |             print(' {mtype:6s}'.format(mtype=mtype), end='')
 66 |         print('')
 67 | 
 68 |     for stype in stypes:
 69 |         if verbose:
 70 |             print('{stype:5s} '.format(stype=stype), end='')
 71 |         results[stype] = dict()
 72 | 
 73 |         # FPR
 74 |         mtype = 'FPR'
 75 |         results[stype][mtype] = fpr_at_tpr95[stype]
 76 |         if verbose:
 77 |             print(' {val:6.3f}'.format(val=100.*results[stype][mtype]), end='')
 78 | 
 79 |         # AUROC
 80 |         mtype = 'AUROC'
 81 |         tpr = np.concatenate([[1.], tp[stype]/tp[stype][0], [0.]])
 82 |         fpr = np.concatenate([[1.], fp[stype]/fp[stype][0], [0.]])
 83 |         results[stype][mtype] = -np.trapz(1.-fpr, tpr)
 84 |         if verbose:
 85 |             print(' {val:6.3f}'.format(val=100.*results[stype][mtype]), end='')
 86 | 
 87 |         # DTERR
 88 |         mtype = 'DTERR'
 89 |         results[stype][mtype] = ((tp[stype][0] - tp[stype] + fp[stype]) / (tp[stype][0] + fp[stype][0])).min()
 90 |         if verbose:
 91 |             print(' {val:6.3f}'.format(val=100.*results[stype][mtype]), end='')
 92 | 
 93 |         # AUIN
 94 |         mtype = 'AUIN'
 95 |         denom = tp[stype]+fp[stype]
 96 |         denom[denom == 0.] = -1.
 97 |         pin_ind = np.concatenate([[True], denom > 0., [True]])
 98 |         pin = np.concatenate([[.5], tp[stype]/denom, [0.]])
 99 |         results[stype][mtype] = -np.trapz(pin[pin_ind], tpr[pin_ind])
100 |         if verbose:
101 |             print(' {val:6.3f}'.format(val=100.*results[stype][mtype]), end='')
102 | 
103 |         # AUOUT
104 |         mtype = 'AUOUT'
105 |         denom = tp[stype][0]-tp[stype]+fp[stype][0]-fp[stype]
106 |         denom[denom == 0.] = -1.
107 |         pout_ind = np.concatenate([[True], denom > 0., [True]])
108 |         pout = np.concatenate([[0.], (fp[stype][0]-fp[stype])/denom, [.5]])
109 |         results[stype][mtype] = np.trapz(pout[pout_ind], 1.-fpr[pout_ind])
110 |         if verbose:
111 |             print(' {val:6.3f}'.format(val=100.*results[stype][mtype]), end='')
112 |             print('')
113 | 
114 |     return results
115 | 


--------------------------------------------------------------------------------
/training_from_scratch/evaluation/display_results.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import sklearn.metrics as sk
  3 | 
  4 | recall_level_default = 0.95
  5 | 
  6 | 
  7 | def stable_cumsum(arr, rtol=1e-05, atol=1e-08):
  8 |     """Use high precision for cumsum and check that final value matches sum
  9 |     Parameters
 10 |     ----------
 11 |     arr : array-like
 12 |         To be cumulatively summed as flat
 13 |     rtol : float
 14 |         Relative tolerance, see ``np.allclose``
 15 |     atol : float
 16 |         Absolute tolerance, see ``np.allclose``
 17 |     """
 18 |     out = np.cumsum(arr, dtype=np.float64)
 19 |     expected = np.sum(arr, dtype=np.float64)
 20 |     if not np.allclose(out[-1], expected, rtol=rtol, atol=atol):
 21 |         raise RuntimeError('cumsum was found to be unstable: '
 22 |                            'its last element does not correspond to sum')
 23 |     return out
 24 | 
 25 | 
 26 | def fpr_and_fdr_at_recall(y_true, y_score, recall_level=recall_level_default, pos_label=None):
 27 |     classes = np.unique(y_true)
 28 |     if (pos_label is None and
 29 |             not (np.array_equal(classes, [0, 1]) or
 30 |                      np.array_equal(classes, [-1, 1]) or
 31 |                      np.array_equal(classes, [0]) or
 32 |                      np.array_equal(classes, [-1]) or
 33 |                      np.array_equal(classes, [1]))):
 34 |         raise ValueError("Data is not binary and pos_label is not specified")
 35 |     elif pos_label is None:
 36 |         pos_label = 1.
 37 | 
 38 |     # make y_true a boolean vector
 39 |     y_true = (y_true == pos_label)
 40 | 
 41 |     # sort scores and corresponding truth values
 42 |     desc_score_indices = np.argsort(y_score, kind="mergesort")[::-1]
 43 |     y_score = y_score[desc_score_indices]
 44 |     y_true = y_true[desc_score_indices]
 45 | 
 46 |     # y_score typically has many tied values. Here we extract
 47 |     # the indices associated with the distinct values. We also
 48 |     # concatenate a value for the end of the curve.
 49 |     distinct_value_indices = np.where(np.diff(y_score))[0]
 50 |     threshold_idxs = np.r_[distinct_value_indices, y_true.size - 1]
 51 | 
 52 |     # accumulate the true positives with decreasing threshold
 53 |     tps = stable_cumsum(y_true)[threshold_idxs]
 54 |     fps = 1 + threshold_idxs - tps      # add one because of zero-based indexing
 55 | 
 56 |     thresholds = y_score[threshold_idxs]
 57 | 
 58 |     recall = tps / tps[-1]
 59 | 
 60 |     last_ind = tps.searchsorted(tps[-1])
 61 |     sl = slice(last_ind, None, -1)      # [last_ind::-1]
 62 |     recall, fps, tps, thresholds = np.r_[recall[sl], 1], np.r_[fps[sl], 0], np.r_[tps[sl], 0], thresholds[sl]
 63 | 
 64 |     cutoff = np.argmin(np.abs(recall - recall_level))
 65 | 
 66 |     return fps[cutoff] / (np.sum(np.logical_not(y_true)))   # , fps[cutoff]/(fps[cutoff] + tps[cutoff])
 67 | 
 68 | 
 69 | def get_measures(_pos, _neg, recall_level=recall_level_default):
 70 |     pos = np.array(_pos[:]).reshape((-1, 1))
 71 |     neg = np.array(_neg[:]).reshape((-1, 1))
 72 |     examples = np.squeeze(np.vstack((pos, neg)))
 73 |     labels = np.zeros(len(examples), dtype=np.int32)
 74 |     labels[:len(pos)] += 1
 75 | 
 76 |     auroc = sk.roc_auc_score(labels, examples)
 77 |     aupr = sk.average_precision_score(labels, examples)
 78 |     fpr = fpr_and_fdr_at_recall(labels, examples, recall_level)
 79 | 
 80 |     return auroc, aupr, fpr
 81 | 
 82 | 
 83 | def show_performance(pos, neg, method_name='Ours', recall_level=recall_level_default):
 84 |     '''
 85 |     :param pos: 1's class, class to detect, outliers, or wrongly predicted
 86 |     example scores
 87 |     :param neg: 0's class scores
 88 |     '''
 89 | 
 90 |     auroc, aupr, fpr = get_measures(pos[:], neg[:], recall_level)
 91 | 
 92 |     print(f'\t\t\t {method_name}')
 93 |     print('FPR{:d}:\t\t\t{:.2f}'.format(int(100 * recall_level), 100 * fpr))
 94 |     print('AUROC:\t\t\t{:.2f}'.format(100 * auroc))
 95 |     print('AUPR:\t\t\t{:.2f}'.format(100 * aupr))
 96 |     # print('FDR{:d}:\t\t\t{:.2f}'.format(int(100 * recall_level), 100 * fdr))
 97 | 
 98 | 
 99 | def print_measures(log, auroc, aupr, fpr, method_name='Ours', recall_level=recall_level_default):
100 |     if log == None: 
101 |         print('FPR{:d}:\t\t\t{:.2f}'.format(int(100 * recall_level), 100 * fpr))
102 |         print('AUROC: \t\t\t{:.2f}'.format(100 * auroc))
103 |         print('AUPR:  \t\t\t{:.2f}'.format(100 * aupr))
104 |     else:
105 |         log.debug('\t\t\t\t' + method_name)
106 |         log.debug('  FPR{:d} AUROC AUPR'.format(int(100*recall_level)))
107 |         log.debug('& {:.2f} & {:.2f} & {:.2f}'.format(100*fpr, 100*auroc, 100*aupr))
108 | 
109 | 
110 | 
111 | def print_measures_with_std(log, aurocs, auprs, fprs, method_name='Ours', recall_level=recall_level_default):
112 |     log.debug('\t\t\t\t' + method_name)
113 |     log.debug('  FPR{:d} AUROC AUPR'.format(int(100*recall_level)))
114 |     log.debug('& {:.2f} & {:.2f} & {:.2f}'.format(100*np.mean(fprs), 100*np.mean(aurocs), 100*np.mean(auprs)))
115 |     log.debug('& {:.2f} & {:.2f} & {:.2f}'.format(100*np.std(fprs), 100*np.std(aurocs), 100*np.std(auprs)))
116 |     #print('FPR{:d}:\t\t\t{:.2f}\t+/- {:.2f}'.format(int(100 * recall_level), 100 * np.mean(fprs), 100 * np.std(fprs)))
117 |     #print('AUROC: \t\t\t{:.2f}\t+/- {:.2f}'.format(100 * np.mean(aurocs), 100 * np.std(aurocs)))
118 |     #print('AUPR:  \t\t\t{:.2f}\t+/- {:.2f}'.format(100 * np.mean(auprs), 100 * np.std(auprs)))
119 | 
120 | 
121 | def show_performance_comparison(pos_base, neg_base, pos_ours, neg_ours, baseline_name='Baseline',
122 |                                 method_name='Ours', recall_level=recall_level_default):
123 |     '''
124 |     :param pos_base: 1's class, class to detect, outliers, or wrongly predicted
125 |     example scores from the baseline
126 |     :param neg_base: 0's class scores generated by the baseline
127 |     '''
128 |     auroc_base, aupr_base, fpr_base = get_measures(pos_base[:], neg_base[:], recall_level)
129 |     auroc_ours, aupr_ours, fpr_ours = get_measures(pos_ours[:], neg_ours[:], recall_level)
130 | 
131 |     print('\t\t\t' + baseline_name + '\t' + method_name)
132 |     print('FPR{:d}:\t\t\t{:.2f}\t\t{:.2f}'.format(
133 |         int(100 * recall_level), 100 * fpr_base, 100 * fpr_ours))
134 |     print('AUROC:\t\t\t{:.2f}\t\t{:.2f}'.format(
135 |         100 * auroc_base, 100 * auroc_ours))
136 |     print('AUPR:\t\t\t{:.2f}\t\t{:.2f}'.format(
137 |         100 * aupr_base, 100 * aupr_ours))
138 |     # print('FDR{:d}:\t\t\t{:.2f}\t\t{:.2f}'.format(
139 |     #     int(100 * recall_level), 100 * fdr_base, 100 * fdr_ours))


--------------------------------------------------------------------------------
/training_from_scratch/evaluation/image_folder.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from typing import Any, Callable, cast, Dict, List, Optional, Tuple
 3 | from torchvision.datasets import DatasetFolder
 4 | from torchvision.datasets.folder import default_loader, IMG_EXTENSIONS
 5 | 
 6 | 
 7 | class ImageSubfolder(DatasetFolder):
 8 |     """Extend ImageFolder to support fold subsets
 9 |     This class inherits from :class:`~torchvision.datasets.DatasetFolder` so
10 |     the same methods can be overridden to customize the dataset.
11 |     Args:
12 |         root (string): Root directory path.
13 |         transform (callable, optional): A function/transform that  takes in an PIL image
14 |             and returns a transformed version. E.g, ``transforms.RandomCrop``
15 |         target_transform (callable, optional): A function/transform that takes in the
16 |             target and transforms it.
17 |         loader (callable, optional): A function to load an image given its path.
18 |         is_valid_file (callable, optional): A function that takes path of an Image file
19 |             and check if the file is a valid file (used to check of corrupt files)
20 |         class_to_idx (dict): Dict with items (class_name, class_index).
21 |      Attributes:
22 |         classes (list): List of the class names sorted alphabetically.
23 |         class_to_idx (dict): Dict with items (class_name, class_index).
24 |         imgs (list): List of (image path, class_index) tuples
25 |     """
26 | 
27 |     def __init__(
28 |         self,
29 |         root: str,
30 |         transform: Optional[Callable] = None,
31 |         target_transform: Optional[Callable] = None,
32 |         loader: Callable[[str], Any] = default_loader,
33 |         is_valid_file: Optional[Callable[[str], bool]] = None,
34 |         class_to_idx: Optional[Dict] = None,
35 |     ):
36 |         super(DatasetFolder, self).__init__(root, transform=transform, target_transform=target_transform)
37 |         if class_to_idx is not None:
38 |             classes = class_to_idx.keys()
39 |         else:
40 |             classes, class_to_idx = self.find_classes(self.root)
41 |         extensions = IMG_EXTENSIONS if is_valid_file is None else None,
42 |         samples = self.make_dataset(self.root, class_to_idx, extensions[0], is_valid_file)
43 | 
44 |         self.loader = loader
45 |         self.extensions = extensions
46 | 
47 |         self.classes = classes
48 |         self.class_to_idx = class_to_idx
49 |         self.samples = samples
50 |         self.targets = [s[1] for s in samples]
51 |         self.imgs = self.samples
52 | 


--------------------------------------------------------------------------------
/training_from_scratch/evaluation/imagenet_loader.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import os
 4 | import pickle
 5 | 
 6 | def unpickle(file):
 7 |     with open(file, 'rb') as fo:
 8 |         dict = pickle.load(fo)
 9 |     return dict
10 | 
11 | class ImageNet(torch.utils.data.Dataset):
12 | 
13 |     def __init__(self, transform=None, img_size=64):
14 | 
15 |         self.S = np.zeros(11, dtype=np.int32)
16 |         self.img_size = img_size
17 |         self.labels = []
18 |         for idx in range(1, 11):
19 |             # data_file = os.path.join('/nobackup/ImageNet64/', 'train_data_batch_{}'.format(idx))
20 |             data_file = os.path.join('/nobackup-slow/dataset/ImageNet/', 'train_data_batch_{}'.format(idx))
21 |             d = unpickle(data_file)
22 |             y = d['labels']
23 |             y = [i-1 for i in y]
24 |             self.labels.extend(y)
25 |             self.S[idx] = self.S[idx-1] + len(y)
26 | 
27 |         self.labels = np.array(self.labels)
28 |         self.N = len(self.labels)
29 |         self.curr_batch = -1
30 | 
31 |         self.offset = 0     # offset index
32 |         self.transform = transform
33 | 
34 |     def load_image_batch(self, batch_index):
35 |         # data_file = os.path.join('/nobackup/ImageNet64/', 'train_data_batch_{}'.format(batch_index))
36 |         data_file = os.path.join('/nobackup-slow/dataset/ImageNet/', 'train_data_batch_{}'.format(batch_index))
37 |         d = unpickle(data_file)
38 |         x = d['data']
39 |         
40 |         img_size = self.img_size
41 |         img_size2 = img_size * img_size
42 |         x = np.dstack((x[:, :img_size2], x[:, img_size2:2*img_size2], x[:, 2*img_size2:]))
43 |         x = x.reshape((x.shape[0], img_size, img_size, 3))
44 | 
45 |         self.batch_images = x
46 |         self.curr_batch = batch_index
47 | 
48 |     def get_batch_index(self, index):
49 |         j = 1
50 |         while index >= self.S[j]:
51 |             j += 1
52 |         return j
53 | 
54 |     def load_image(self, index):
55 |         batch_index = self.get_batch_index(index)
56 |         if self.curr_batch != batch_index:
57 |             self.load_image_batch(batch_index)
58 |         
59 |         return self.batch_images[index-self.S[batch_index-1]]
60 | 
61 |     def __getitem__(self, index):
62 |         index = (index + self.offset) % self.N
63 | 
64 |         img = self.load_image(index)
65 |         if self.transform is not None:
66 |             img = self.transform(img)
67 | 
68 |         return img, self.labels[index] 
69 | 
70 |     def __len__(self):
71 |         return self.N


--------------------------------------------------------------------------------
/training_from_scratch/evaluation/svhn_loader.py:
--------------------------------------------------------------------------------
  1 | import torch.utils.data as data
  2 | from PIL import Image
  3 | import os
  4 | import os.path
  5 | import numpy as np
  6 | 
  7 | 
  8 | class SVHN(data.Dataset):
  9 |     url = ""
 10 |     filename = ""
 11 |     file_md5 = ""
 12 |     split_list = {
 13 |         'train': ["http://ufldl.stanford.edu/housenumbers/train_32x32.mat",
 14 |                   "train_32x32.mat", "e26dedcc434d2e4c54c9b2d4a06d8373"],
 15 |         'test': ["http://ufldl.stanford.edu/housenumbers/test_32x32.mat",
 16 |                  "test_32x32.mat", "eb5a983be6a315427106f1b164d9cef3"],
 17 |         'extra': ["http://ufldl.stanford.edu/housenumbers/extra_32x32.mat",
 18 |                   "extra_32x32.mat", "a93ce644f1a588dc4d68dda5feec44a7"],
 19 |         'train_and_extra': [
 20 |                 ["http://ufldl.stanford.edu/housenumbers/train_32x32.mat",
 21 |                  "train_32x32.mat", "e26dedcc434d2e4c54c9b2d4a06d8373"],
 22 |                 ["http://ufldl.stanford.edu/housenumbers/extra_32x32.mat",
 23 |                  "extra_32x32.mat", "a93ce644f1a588dc4d68dda5feec44a7"]]}
 24 | 
 25 |     def __init__(self, root, split='train',
 26 |                  transform=None, target_transform=None, download=False):
 27 |         self.root = root
 28 |         self.transform = transform
 29 |         self.target_transform = target_transform
 30 |         self.split = split  # training set or test set or extra set
 31 | 
 32 |         if self.split not in self.split_list:
 33 |             raise ValueError('Wrong split entered! Please use split="train" '
 34 |                              'or split="extra" or split="test" '
 35 |                              'or split="train_and_extra" ')
 36 | 
 37 |         if self.split == "train_and_extra":
 38 |             self.url = self.split_list[split][0][0]
 39 |             self.filename = self.split_list[split][0][1]
 40 |             self.file_md5 = self.split_list[split][0][2]
 41 |         else:
 42 |             self.url = self.split_list[split][0]
 43 |             self.filename = self.split_list[split][1]
 44 |             self.file_md5 = self.split_list[split][2]
 45 | 
 46 |         # import here rather than at top of file because this is
 47 |         # an optional dependency for torchvision
 48 |         import scipy.io as sio
 49 | 
 50 |         # reading(loading) mat file as array
 51 |         loaded_mat = sio.loadmat(os.path.join(root, self.filename))
 52 | 
 53 |         if self.split == "test":
 54 |             self.data = loaded_mat['X']
 55 |             self.targets = loaded_mat['y']
 56 |             # Note label 10 == 0 so modulo operator required
 57 |             self.targets = (self.targets % 10).squeeze()    # convert to zero-based indexing
 58 |             self.data = np.transpose(self.data, (3, 2, 0, 1))
 59 |         else:
 60 |             self.data = loaded_mat['X']
 61 |             self.targets = loaded_mat['y']
 62 | 
 63 |             if self.split == "train_and_extra":
 64 |                 extra_filename = self.split_list[split][1][1]
 65 |                 loaded_mat = sio.loadmat(os.path.join(root, extra_filename))
 66 |                 self.data = np.concatenate([self.data,
 67 |                                                   loaded_mat['X']], axis=3)
 68 |                 self.targets = np.vstack((self.targets,
 69 |                                                loaded_mat['y']))
 70 |             # Note label 10 == 0 so modulo operator required
 71 |             self.targets = (self.targets % 10).squeeze()    # convert to zero-based indexing
 72 |             self.data = np.transpose(self.data, (3, 2, 0, 1))
 73 | 
 74 |     def __getitem__(self, index):
 75 |         if self.split == "test":
 76 |             img, target = self.data[index], self.targets[index]
 77 |         else:
 78 |             img, target = self.data[index], self.targets[index]
 79 | 
 80 |         # doing this so that it is consistent with all other datasets
 81 |         # to return a PIL Image
 82 |         img = Image.fromarray(np.transpose(img, (1, 2, 0)))
 83 | 
 84 |         if self.transform is not None:
 85 |             img = self.transform(img)
 86 | 
 87 |         if self.target_transform is not None:
 88 |             target = self.target_transform(target)
 89 | 
 90 |         return img, target.astype(np.long)
 91 | 
 92 |     def __len__(self):
 93 |         if self.split == "test":
 94 |             return len(self.data)
 95 |         else:
 96 |             return len(self.data)
 97 | 
 98 |     def _check_integrity(self):
 99 |         root = self.root
100 |         if self.split == "train_and_extra":
101 |             md5 = self.split_list[self.split][0][2]
102 |             fpath = os.path.join(root, self.filename)
103 |             train_integrity = check_integrity(fpath, md5)
104 |             extra_filename = self.split_list[self.split][1][1]
105 |             md5 = self.split_list[self.split][1][2]
106 |             fpath = os.path.join(root, extra_filename)
107 |             return check_integrity(fpath, md5) and train_integrity
108 |         else:
109 |             md5 = self.split_list[self.split][2]
110 |             fpath = os.path.join(root, self.filename)
111 |             return check_integrity(fpath, md5)
112 | 
113 |     def download(self):
114 |         if self.split == "train_and_extra":
115 |             md5 = self.split_list[self.split][0][2]
116 |             download_url(self.url, self.root, self.filename, md5)
117 |             extra_filename = self.split_list[self.split][1][1]
118 |             md5 = self.split_list[self.split][1][2]
119 |             download_url(self.url, self.root, extra_filename, md5)
120 |         else:
121 |             md5 = self.split_list[self.split][2]
122 |             download_url(self.url, self.root, self.filename, md5)
123 | 


--------------------------------------------------------------------------------
/training_from_scratch/image_folder.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from typing import Any, Callable, cast, Dict, List, Optional, Tuple
 3 | from torchvision.datasets import DatasetFolder
 4 | from torchvision.datasets.folder import default_loader, IMG_EXTENSIONS
 5 | 
 6 | 
 7 | class ImageSubfolder(DatasetFolder):
 8 |     """Extend ImageFolder to support fold subsets
 9 |     This class inherits from :class:`~torchvision.datasets.DatasetFolder` so
10 |     the same methods can be overridden to customize the dataset.
11 |     Args:
12 |         root (string): Root directory path.
13 |         transform (callable, optional): A function/transform that  takes in an PIL image
14 |             and returns a transformed version. E.g, ``transforms.RandomCrop``
15 |         target_transform (callable, optional): A function/transform that takes in the
16 |             target and transforms it.
17 |         loader (callable, optional): A function to load an image given its path.
18 |         is_valid_file (callable, optional): A function that takes path of an Image file
19 |             and check if the file is a valid file (used to check of corrupt files)
20 |         class_to_idx (dict): Dict with items (class_name, class_index).
21 |      Attributes:
22 |         classes (list): List of the class names sorted alphabetically.
23 |         class_to_idx (dict): Dict with items (class_name, class_index).
24 |         imgs (list): List of (image path, class_index) tuples
25 |     """
26 | 
27 |     def __init__(
28 |         self,
29 |         root: str,
30 |         transform: Optional[Callable] = None,
31 |         target_transform: Optional[Callable] = None,
32 |         loader: Callable[[str], Any] = default_loader,
33 |         is_valid_file: Optional[Callable[[str], bool]] = None,
34 |         class_to_idx: Optional[Dict] = None,
35 |     ):
36 |         super(DatasetFolder, self).__init__(root, transform=transform, target_transform=target_transform)
37 |         if class_to_idx is not None:
38 |             classes = class_to_idx.keys()
39 |         else:
40 |             classes, class_to_idx = self.find_classes(self.root)
41 |         extensions = IMG_EXTENSIONS if is_valid_file is None else None,
42 |         samples = self.make_dataset(self.root, class_to_idx, extensions[0], is_valid_file)
43 | 
44 |         self.loader = loader
45 |         self.extensions = extensions
46 | 
47 |         self.classes = classes
48 |         self.class_to_idx = class_to_idx
49 |         self.samples = samples
50 |         self.targets = [s[1] for s in samples]
51 |         self.imgs = self.samples


--------------------------------------------------------------------------------
/training_from_scratch/losses.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import time
  6 | from copy import deepcopy
  7 | 
  8 | class CompLoss(nn.Module):
  9 |     def __init__(self, args, temperature=0.07, base_temperature=0.07):
 10 |         super(CompLoss, self).__init__()
 11 |         self.args = args
 12 |         self.temperature = temperature
 13 |         self.base_temperature = base_temperature
 14 | 
 15 |     def forward(self, features, prototypes, labels):
 16 |         device = torch.device('cuda')
 17 | 
 18 |         proxy_labels = torch.arange(0, self.args.n_cls).to(device)
 19 |         batch_size = features.shape[0]
 20 |         labels = labels.contiguous().view(-1, 1)
 21 |         if labels.shape[0] != batch_size:
 22 |             raise ValueError('Num of labels does not match num of features')
 23 |         mask = torch.eq(labels, proxy_labels.T).float().to(device)
 24 | 
 25 |         # compute logits
 26 |         anchor_feature = features
 27 |         contrast_feature = prototypes / prototypes.norm(dim=-1, keepdim=True)
 28 |         anchor_dot_contrast = torch.div(
 29 |             torch.matmul(anchor_feature, contrast_feature.T),
 30 |             self.temperature)
 31 |         # for numerical stability
 32 |         logits_max, _ = torch.max(anchor_dot_contrast, dim=1, keepdim=True)
 33 |         logits = anchor_dot_contrast - logits_max.detach()
 34 | 
 35 |         # compute log_prob
 36 |         exp_logits = torch.exp(logits) 
 37 |         log_prob = logits - torch.log(exp_logits.sum(1, keepdim=True))
 38 |         # compute mean of log-likelihood over positive
 39 |         mean_log_prob_pos = (mask * log_prob).sum(1)
 40 |         loss = - (self.temperature / self.base_temperature) * mean_log_prob_pos.mean()
 41 |         return loss
 42 | 
 43 | class DispLoss(nn.Module):
 44 |     def __init__(self, args, model, loader, temperature= 0.1, base_temperature=0.1, cifar=True):
 45 |         super(DispLoss, self).__init__()
 46 |         self.args = args
 47 |         self.temperature = temperature
 48 |         self.base_temperature = base_temperature
 49 |         self.register_buffer("prototypes", torch.zeros(self.args.n_cls,self.args.feat_dim))
 50 |         self.model = model
 51 |         self.loader = loader
 52 |         self.init_class_prototypes(if_cifar=cifar)
 53 | 
 54 |     def forward(self, features, labels):
 55 | 
 56 |         prototypes = self.prototypes
 57 |         num_cls = self.args.n_cls
 58 |         for j in range(len(features)):
 59 |             prototypes[labels[j].item()] = F.normalize(prototypes[labels[j].item()] *self.args.proto_m + features[j]*(1-self.args.proto_m), dim=0)
 60 |         self.prototypes = prototypes.detach()
 61 |         labels = torch.arange(0, num_cls).cuda()
 62 |         labels = labels.contiguous().view(-1, 1)
 63 |         labels = labels.contiguous().view(-1, 1)
 64 | 
 65 |         mask = (1- torch.eq(labels, labels.T).float()).cuda()
 66 | 
 67 | 
 68 |         logits = torch.div(
 69 |             torch.matmul(prototypes, prototypes.T),
 70 |             self.temperature)
 71 | 
 72 |         logits_mask = torch.scatter(
 73 |             torch.ones_like(mask),
 74 |             1,
 75 |             torch.arange(num_cls).view(-1, 1).cuda(),
 76 |             0
 77 |         )
 78 |         mask = mask * logits_mask
 79 |         mean_prob_neg = torch.log((mask * torch.exp(logits)).sum(1) / mask.sum(1))
 80 |         mean_prob_neg = mean_prob_neg[~torch.isnan(mean_prob_neg)]
 81 |         loss = self.temperature / self.base_temperature * mean_prob_neg.mean()
 82 |         return loss
 83 | 
 84 |     def init_class_prototypes(self, if_cifar):
 85 |         """Initialize class prototypes"""
 86 |         self.model.eval()
 87 |         start = time.time()
 88 |         prototype_counts = [0]*self.args.n_cls
 89 |         with torch.no_grad():
 90 |             prototypes = torch.zeros(self.args.n_cls,self.args.feat_dim).cuda()
 91 |             for i, (input, target) in enumerate(self.loader):
 92 |                 input, target = input.cuda(), target.cuda()
 93 |                 if if_cifar:
 94 |                     features = self.model(input)
 95 |                 else:
 96 |                     _, _, features = self.model(input)
 97 |                 for j, feature in enumerate(features):
 98 |                     prototypes[target[j].item()] += feature
 99 |                     prototype_counts[target[j].item()] += 1
100 |             for cls in range(self.args.n_cls):
101 |                 prototypes[cls] /=  prototype_counts[cls]
102 |             # measure elapsed time
103 |             duration = time.time() - start
104 |             print(f'Time to initialize prototypes: {duration:.3f}')
105 |             prototypes = F.normalize(prototypes, dim=1)
106 |             self.prototypes = prototypes
107 | 
108 | #####
109 | class SupConLoss(nn.Module):
110 |     """Supervised Contrastive Learning: https://arxiv.org/pdf/2004.11362.pdf.
111 |     It also supports the unsupervised contrastive loss in SimCLR"""
112 |     def __init__(self, temperature=0.07, contrast_mode='all',
113 |                  base_temperature=0.07):
114 |         super(SupConLoss, self).__init__()
115 |         self.temperature = temperature
116 |         self.contrast_mode = contrast_mode
117 |         self.base_temperature = base_temperature
118 | 
119 |     def forward(self, features, labels=None, mask=None):
120 |         """Compute loss for model. If both `labels` and `mask` are None,
121 |         it degenerates to SimCLR unsupervised loss:
122 |         https://arxiv.org/pdf/2002.05709.pdf
123 | 
124 |         Args:
125 |             features: hidden vector of shape [bsz, n_views, ...].
126 |             labels: ground truth of shape [bsz].
127 |             mask: contrastive mask of shape [bsz, bsz], mask_{i,j}=1 if sample j
128 |                 has the same class as sample i. Can be asymmetric.
129 |         Returns:
130 |             A loss scalar.
131 |         """
132 |         device = (torch.device('cuda')
133 |                   if features.is_cuda
134 |                   else torch.device('cpu'))
135 | 
136 |         if len(features.shape) < 3:
137 |             raise ValueError('`features` needs to be [bsz, n_views, ...],'
138 |                              'at least 3 dimensions are required')
139 |         if len(features.shape) > 3:
140 |             features = features.view(features.shape[0], features.shape[1], -1)
141 | 
142 |         batch_size = features.shape[0]
143 |         if labels is not None and mask is not None:
144 |             raise ValueError('Cannot define both `labels` and `mask`')
145 |         elif labels is None and mask is None:
146 |             mask = torch.eye(batch_size, dtype=torch.float32).to(device)
147 |         elif labels is not None:
148 |             labels = labels.contiguous().view(-1, 1)
149 |             if labels.shape[0] != batch_size:
150 |                 raise ValueError('Num of labels does not match num of features')
151 |             mask = torch.eq(labels, labels.T).float().to(device)
152 |         else:
153 |             mask = mask.float().to(device)
154 | 
155 |         contrast_count = features.shape[1]
156 |         contrast_feature = torch.cat(torch.unbind(features, dim=1), dim=0)
157 |         if self.contrast_mode == 'one':
158 |             anchor_feature = features[:, 0]
159 |             anchor_count = 1
160 |         elif self.contrast_mode == 'all':
161 |             anchor_feature = contrast_feature
162 |             anchor_count = contrast_count
163 |         else:
164 |             raise ValueError('Unknown mode: {}'.format(self.contrast_mode))
165 | 
166 |         # compute logits
167 |         anchor_dot_contrast = torch.div(
168 |             torch.matmul(anchor_feature, contrast_feature.T),
169 |             self.temperature)
170 |         # for numerical stability
171 |         logits_max, _ = torch.max(anchor_dot_contrast, dim=1, keepdim=True)
172 |         logits = anchor_dot_contrast - logits_max.detach()
173 | 
174 |         # tile mask
175 |         mask = mask.repeat(anchor_count, contrast_count)
176 |         # mask-out self-contrast cases
177 |         logits_mask = torch.scatter(
178 |             torch.ones_like(mask),
179 |             1,
180 |             torch.arange(batch_size * anchor_count).view(-1, 1).to(device),
181 |             0
182 |         )
183 |         mask = mask * logits_mask
184 | 
185 |         # compute log_prob
186 |         exp_logits = torch.exp(logits) * logits_mask
187 |         log_prob = logits - torch.log(exp_logits.sum(1, keepdim=True))
188 | 
189 |         # compute mean of log-likelihood over positive
190 |         mean_log_prob_pos = (mask * log_prob).sum(1) / mask.sum(1)
191 | 
192 |         # loss
193 |         loss = - (self.temperature / self.base_temperature) * mean_log_prob_pos
194 |         loss = loss.view(anchor_count, batch_size).mean()
195 | 
196 |         return loss
197 | 


--------------------------------------------------------------------------------
/training_from_scratch/models/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | 
3 | from .densenet import *
4 | from .resnet import *


--------------------------------------------------------------------------------
/training_from_scratch/models/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/npos/583c06db0876c3d1c4e5a9a5371cc3a5cb916255/training_from_scratch/models/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/training_from_scratch/models/__pycache__/densenet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/npos/583c06db0876c3d1c4e5a9a5371cc3a5cb916255/training_from_scratch/models/__pycache__/densenet.cpython-37.pyc


--------------------------------------------------------------------------------
/training_from_scratch/models/__pycache__/resnet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/npos/583c06db0876c3d1c4e5a9a5371cc3a5cb916255/training_from_scratch/models/__pycache__/resnet.cpython-37.pyc


--------------------------------------------------------------------------------
/training_from_scratch/models/__pycache__/resnet_im100.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/npos/583c06db0876c3d1c4e5a9a5371cc3a5cb916255/training_from_scratch/models/__pycache__/resnet_im100.cpython-37.pyc


--------------------------------------------------------------------------------
/training_from_scratch/models/__pycache__/resnet_outliers.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/npos/583c06db0876c3d1c4e5a9a5371cc3a5cb916255/training_from_scratch/models/__pycache__/resnet_outliers.cpython-37.pyc


--------------------------------------------------------------------------------
/training_from_scratch/models/__pycache__/wrn.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeplearning-wisc/npos/583c06db0876c3d1c4e5a9a5371cc3a5cb916255/training_from_scratch/models/__pycache__/wrn.cpython-37.pyc


--------------------------------------------------------------------------------
/training_from_scratch/models/densenet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | import torch.nn as nn
  4 | import torch.optim as optim
  5 | 
  6 | import torch.nn.functional as F
  7 | from torch.autograd import Variable
  8 | 
  9 | import torchvision.datasets as dset
 10 | import torchvision.transforms as transforms
 11 | from torch.utils.data import DataLoader
 12 | 
 13 | import torchvision.models as models
 14 | 
 15 | import sys
 16 | import math
 17 | 
 18 | class Bottleneck(nn.Module):
 19 |     def __init__(self, nChannels, growthRate):
 20 |         super(Bottleneck, self).__init__()
 21 |         interChannels = 4*growthRate
 22 |         self.bn1 = nn.BatchNorm2d(nChannels)
 23 |         self.conv1 = nn.Conv2d(nChannels, interChannels, kernel_size=1,
 24 |                                bias=False)
 25 |         self.bn2 = nn.BatchNorm2d(interChannels)
 26 |         self.conv2 = nn.Conv2d(interChannels, growthRate, kernel_size=3,
 27 |                                padding=1, bias=False)
 28 | 
 29 |     def forward(self, x):
 30 |         out = self.conv1(F.relu(self.bn1(x)))
 31 |         out = self.conv2(F.relu(self.bn2(out)))
 32 |         out = torch.cat((x, out), 1)
 33 |         return out
 34 | 
 35 | class SingleLayer(nn.Module):
 36 |     def __init__(self, nChannels, growthRate):
 37 |         super(SingleLayer, self).__init__()
 38 |         self.bn1 = nn.BatchNorm2d(nChannels)
 39 |         self.conv1 = nn.Conv2d(nChannels, growthRate, kernel_size=3,
 40 |                                padding=1, bias=False)
 41 | 
 42 |     def forward(self, x):
 43 |         out = self.conv1(F.relu(self.bn1(x)))
 44 |         out = torch.cat((x, out), 1)
 45 |         return out
 46 | 
 47 | class Transition(nn.Module):
 48 |     def __init__(self, nChannels, nOutChannels):
 49 |         super(Transition, self).__init__()
 50 |         self.bn1 = nn.BatchNorm2d(nChannels)
 51 |         self.conv1 = nn.Conv2d(nChannels, nOutChannels, kernel_size=1,
 52 |                                bias=False)
 53 | 
 54 |     def forward(self, x):
 55 |         out = self.conv1(F.relu(self.bn1(x)))
 56 |         out = F.avg_pool2d(out, 2)
 57 |         return out
 58 | 
 59 | class DenseNet(nn.Module):
 60 |     def __init__(self, growthRate = 12, depth = 100, reduction = 0.5, bottleneck = True):
 61 |         super(DenseNet, self).__init__()
 62 | 
 63 |         nDenseBlocks = (depth-4) // 3
 64 |         if bottleneck:
 65 |             nDenseBlocks //= 2
 66 | 
 67 |         nChannels = 2*growthRate
 68 |         self.conv1 = nn.Conv2d(3, nChannels, kernel_size=3, padding=1,
 69 |                                bias=False)
 70 |         self.dense1 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck)
 71 |         nChannels += nDenseBlocks*growthRate
 72 |         nOutChannels = int(math.floor(nChannels*reduction))
 73 |         self.trans1 = Transition(nChannels, nOutChannels)
 74 | 
 75 |         nChannels = nOutChannels
 76 |         self.dense2 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck)
 77 |         nChannels += nDenseBlocks*growthRate
 78 |         nOutChannels = int(math.floor(nChannels*reduction))
 79 |         self.trans2 = Transition(nChannels, nOutChannels)
 80 | 
 81 |         nChannels = nOutChannels
 82 |         self.dense3 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck)
 83 |         nChannels += nDenseBlocks*growthRate
 84 | 
 85 |         self.bn1 = nn.BatchNorm2d(nChannels)
 86 |         # self.fc = nn.Linear(nChannels, nClasses)
 87 | 
 88 |         for m in self.modules():
 89 |             if isinstance(m, nn.Conv2d):
 90 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 91 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
 92 |             elif isinstance(m, nn.BatchNorm2d):
 93 |                 m.weight.data.fill_(1)
 94 |                 m.bias.data.zero_()
 95 |             elif isinstance(m, nn.Linear):
 96 |                 m.bias.data.zero_()
 97 | 
 98 |     def _make_dense(self, nChannels, growthRate, nDenseBlocks, bottleneck):
 99 |         layers = []
100 |         for i in range(int(nDenseBlocks)):
101 |             if bottleneck:
102 |                 layers.append(Bottleneck(nChannels, growthRate))
103 |             else:
104 |                 layers.append(SingleLayer(nChannels, growthRate))
105 |             nChannels += growthRate
106 |         return nn.Sequential(*layers)
107 | 
108 |     def forward(self, x):
109 |         out = self.conv1(x)
110 |         out = self.trans1(self.dense1(out))
111 |         out = self.trans2(self.dense2(out))
112 |         out = self.dense3(out)
113 |         out = torch.squeeze(F.avg_pool2d(F.relu(self.bn1(out)), 8))
114 |         # out = F.log_softmax(self.fc(out))
115 |         return out
116 | 
117 |     def intermediate_forward(self, x, layer_index):
118 |         out = self.conv1(x)
119 |         out = self.trans1(self.dense1(out))
120 |         out = self.trans2(self.dense2(out))
121 |         out = self.dense3(out)
122 |         out = F.relu(self.bn1(out))
123 |         # out = torch.squeeze(F.avg_pool2d(F.relu(self.bn1(out)), 8))
124 |         # out = F.log_softmax(self.fc(out))
125 |         return out
126 | 
127 |     def feature_list(self, x):
128 |         out_list = []
129 |         out = self.conv1(x)
130 |         out = self.trans1(self.dense1(out))
131 |         out = self.trans2(self.dense2(out))
132 |         out = self.dense3(out)
133 |         out = F.relu(self.bn1(out))
134 |         out_list.append(out)
135 |         # out = torch.squeeze(F.avg_pool2d(F.relu(self.bn1(out)), 8))
136 |         # out = F.log_softmax(self.fc(out))
137 |         return out_list
138 | 
139 | model_dict = {
140 |     'densenet100': [DenseNet, 342],
141 | }
142 | 
143 | class SupCEHeadDenseNet(nn.Module):
144 |     """encoder + classifier"""
145 |     def __init__(self, name='densenet100', head='linear', feat_dim = 128, num_classes=100, multiplier = 1):
146 |         super(SupCEHeadDenseNet, self).__init__()
147 |         model_fun, dim_in = model_dict[name]
148 |         self.encoder = model_fun()
149 |         self.fc = nn.Linear(dim_in, num_classes)
150 |         self.multiplier = multiplier
151 | 
152 |         if head == 'linear':
153 |             self.head = nn.Linear(dim_in, feat_dim)
154 |         elif head == 'mlp':
155 |             self.head = nn.Sequential(
156 |                 nn.Linear(dim_in, dim_in),
157 |                 nn.ReLU(inplace=True),
158 |                 nn.Linear(dim_in, feat_dim)
159 |             )
160 |         
161 | 
162 |     def forward(self, x):
163 |         features = self.encoder(x)
164 |         return self.fc(features)
165 |     
166 |     def intermediate_forward(self, x, layer_index):
167 |         if layer_index == 0:
168 |             return self.encoder.intermediate_forward(x, layer_index)
169 |         elif layer_index == 1:
170 |             feat = self.encoder(x)
171 |             feat = self.multiplier * F.normalize(self.head(feat), dim=1) 
172 |             return feat


--------------------------------------------------------------------------------
/training_from_scratch/models/fine_tuning_layer.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | import torch.nn.functional as F
 4 | 
 5 | class clssimp(nn.Module):
 6 |     def __init__(self, ch=2880, num_classes = 80):
 7 |         super(clssimp, self).__init__()
 8 |         # self.way1 = nn.Sequential(
 9 |         #     nn.Linear(ch, 1024, bias=True),
10 |         #     nn.GroupNorm(num_channels=1024, num_groups=32),
11 |         #     # nn.BatchNorm1d(1024),
12 |         #     nn.ReLU(inplace=True),
13 |         # )
14 |         # self.cls= nn.Linear(2048, num_classes,bias=True)
15 | 
16 |         # self.conv = nn.Conv2d(in_channels=2048, out_channels=num_classes, kernel_size=1)
17 |         self.conv = nn.Conv2d(in_channels= ch, out_channels=num_classes, kernel_size=1)
18 |         self.pool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
19 | 
20 |     def forward(self, x):
21 |         # beta = 0
22 |         x = self.conv(x) # 64x9x7x7
23 |         # max_x = F.adaptive_max_pool2d(x, (1,1))  
24 |         # x = x * (x > beta*max_x)     
25 |         x = self.pool(x) # 64x9x1x1
26 |         logits = x.reshape(x.size(0), -1)  #64x9
27 |         return logits, 
28 | 
29 |     def intermediate_forward(self, x):
30 |         x = self.pool(x)
31 |         x = x.reshape(x.size(0), -1)
32 |         x = self.way1(x)
33 |         return x
34 | 
35 | 


--------------------------------------------------------------------------------
/training_from_scratch/models/layers.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | import torch.nn as nn
 4 | from torch.nn.parameter import Parameter
 5 | from torch.nn import functional as F
 6 | 
 7 | 
 8 | class Conv2d(nn.Conv2d):
 9 | 
10 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1,
11 |                  padding=0, dilation=1, groups=1, bias=True):
12 |         super(Conv2d, self).__init__(in_channels, out_channels, kernel_size, stride,
13 |                  padding, dilation, groups, bias)
14 | 
15 |     def forward(self, x):
16 |         # return super(Conv2d, self).forward(x)
17 |         weight = self.weight
18 |         weight_mean = weight.mean(dim=1, keepdim=True).mean(dim=2,
19 |                                   keepdim=True).mean(dim=3, keepdim=True)
20 |         weight = weight - weight_mean
21 |         std = weight.view(weight.size(0), -1).std(dim=1).view(-1, 1, 1, 1) + 1e-5
22 |         weight = weight / std.expand_as(weight)
23 |         return F.conv2d(x, weight, self.bias, self.stride,
24 |                         self.padding, self.dilation, self.groups)
25 | 
26 | 
27 | def BatchNorm2d(num_features):
28 |     return nn.GroupNorm(num_channels=num_features, num_groups=32)


--------------------------------------------------------------------------------
/training_from_scratch/models/resnet_im100.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | from torchvision import models
 4 | class resnet101(nn.Module):
 5 |     def __init__(self, num_class=100):
 6 |         super(resnet101, self).__init__()
 7 |         self.model = models.resnet101(pretrained=False)
 8 |         self.model.avgpool = nn.AdaptiveAvgPool2d((1, 1))
 9 |         self.proj = nn.Sequential(
10 |             nn.Linear(2048, 512),
11 |             nn.ReLU(),
12 |         )
13 |         self.outlier_MLP = nn.Sequential(
14 |             nn.Linear(512, 32),
15 |             nn.ReLU(),
16 |             nn.Linear(32, 1),
17 |         )
18 |         self.fc = nn.Linear(512, num_class)
19 | 
20 | 
21 |     def forward(self, x, fc=True, mlp=False):
22 |         if mlp==True:
23 |             return self.outlier_MLP(x)
24 |         if fc==False:
25 |             batch = x.size(0)
26 |             x = self.model.conv1(x)
27 |             x = self.model.bn1(x)
28 |             x = self.model.relu(x)
29 |             x = self.model.maxpool(x)
30 |             x = self.model.layer1(x)
31 |             x = self.model.layer2(x)
32 |             x = self.model.layer3(x)
33 |             x = self.model.layer4(x)
34 |             x = self.model.avgpool(x)
35 |             # print(x.shape)
36 |             #feature = x.view(batch, -1)
37 |             #feature = self.proj(feature)
38 |             #logit = self.fc(feature)
39 |             return x
40 |         batch = x.size(0)
41 |         x = self.model.conv1(x)
42 |         x = self.model.bn1(x)
43 |         x = self.model.relu(x)
44 |         x = self.model.maxpool(x)
45 |         x = self.model.layer1(x)
46 |         x = self.model.layer2(x)
47 |         x = self.model.layer3(x)
48 |         x = self.model.layer4(x)
49 |         x = self.model.avgpool(x)
50 |         #print(x.shape)
51 |         feature = x.view(batch, -1)
52 |         feature = self.proj(feature)
53 |         logit = self.fc(feature)
54 |         return logit, x, feature
55 | 


--------------------------------------------------------------------------------
/training_from_scratch/models/wrn.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | 
  7 | class BasicBlock(nn.Module):
  8 |     def __init__(self, in_planes, out_planes, stride, dropRate=0.0):
  9 |         super(BasicBlock, self).__init__()
 10 |         self.bn1 = nn.BatchNorm2d(in_planes)
 11 |         self.relu1 = nn.ReLU(inplace=True)
 12 |         self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 13 |                                padding=1, bias=False)
 14 |         self.bn2 = nn.BatchNorm2d(out_planes)
 15 |         self.relu2 = nn.ReLU(inplace=True)
 16 |         self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1,
 17 |                                padding=1, bias=False)
 18 |         self.droprate = dropRate
 19 |         self.equalInOut = (in_planes == out_planes)
 20 |         self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride,
 21 |                                                                 padding=0, bias=False) or None
 22 | 
 23 |     def forward(self, x):
 24 |         if not self.equalInOut:
 25 |             x = self.relu1(self.bn1(x))
 26 |         else:
 27 |             out = self.relu1(self.bn1(x))
 28 |         if self.equalInOut:
 29 |             out = self.relu2(self.bn2(self.conv1(out)))
 30 |         else:
 31 |             out = self.relu2(self.bn2(self.conv1(x)))
 32 |         if self.droprate > 0:
 33 |             out = F.dropout(out, p=self.droprate, training=self.training)
 34 |         out = self.conv2(out)
 35 |         if not self.equalInOut:
 36 |             return torch.add(self.convShortcut(x), out)
 37 |         else:
 38 |             return torch.add(x, out)
 39 | 
 40 | 
 41 | class NetworkBlock(nn.Module):
 42 |     def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0):
 43 |         super(NetworkBlock, self).__init__()
 44 |         self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropRate)
 45 | 
 46 |     def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropRate):
 47 |         layers = []
 48 |         for i in range(nb_layers):
 49 |             layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, dropRate))
 50 |         return nn.Sequential(*layers)
 51 | 
 52 |     def forward(self, x):
 53 |         return self.layer(x)
 54 | 
 55 | 
 56 | class WideResNet(nn.Module):
 57 |     def __init__(self, depth, num_classes, widen_factor=2, dropRate=0.0):
 58 |         super(WideResNet, self).__init__()
 59 |         nChannels = [16, 16 * widen_factor, 32 * widen_factor, 64 * widen_factor]
 60 |         assert ((depth - 4) % 6 == 0)
 61 |         n = (depth - 4) // 6
 62 |         block = BasicBlock
 63 |         # 1st conv before any network block
 64 |         self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1,
 65 |                                padding=1, bias=False)
 66 |         # 1st block
 67 |         self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropRate)
 68 |         # 2nd block
 69 |         self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, dropRate)
 70 |         # 3rd block
 71 |         self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, dropRate)
 72 |         # global average pooling and classifier
 73 |         self.bn1 = nn.BatchNorm2d(nChannels[3])
 74 |         self.relu = nn.ReLU(inplace=True)
 75 |         # self.fc = nn.Linear(nChannels[3], num_classes)
 76 |         self.nChannels = nChannels[3]
 77 | 
 78 |         for m in self.modules():
 79 |             if isinstance(m, nn.Conv2d):
 80 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 81 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
 82 |             elif isinstance(m, nn.BatchNorm2d):
 83 |                 m.weight.data.fill_(1)
 84 |                 m.bias.data.zero_()
 85 |             elif isinstance(m, nn.Linear):
 86 |                 m.bias.data.zero_()
 87 | 
 88 |     def forward(self, x):
 89 |         out = self.conv1(x)
 90 |         out = self.block1(out)
 91 |         out = self.block2(out)
 92 |         out = self.block3(out)
 93 |         out = self.relu(self.bn1(out))
 94 |         out = F.avg_pool2d(out, 8)
 95 |         out = out.view(-1, self.nChannels)
 96 |         # return self.fc(out)\
 97 |         return out
 98 | 
 99 |     def intermediate_forward(self, x, layer_index):
100 |         out = self.conv1(x)
101 |         out = self.block1(out)
102 |         out = self.block2(out)
103 |         out = self.block3(out)
104 |         out = self.relu(self.bn1(out))
105 |         return out
106 |     
107 |     def feature_list(self, x):
108 |         out_list = [] 
109 |         out = self.conv1(x)
110 |         out = self.block1(out)
111 |         out = self.block2(out)
112 |         out = self.block3(out)
113 |         out = self.relu(self.bn1(out))
114 |         out_list.append(out)
115 |         # out = F.avg_pool2d(out, 8)
116 |         # out = out.view(-1, self.nChannels)
117 |         return out_list
118 | 
119 | class SupCEHeadWideResNet(nn.Module):
120 |     """encoder + classifier"""
121 |     def __init__(self, depth, num_classes = 10, widen_factor=2, dropRate=0.0, head='linear', feat_dim = 128, multiplier = 1):
122 |         super(SupCEHeadWideResNet, self).__init__()
123 |         self.encoder = WideResNet(depth, num_classes = num_classes, widen_factor= widen_factor, dropRate=dropRate)
124 |         dim_in = self.encoder.nChannels
125 |         self.fc = nn.Linear(dim_in, num_classes)
126 |         self.multiplier = multiplier
127 |         print(f"representation dim: {dim_in}")
128 |         #FOR ABLATION b, c
129 |         # self.fc = nn.Linear(feat_dim, num_classes)
130 |         # #END
131 | 
132 |         if head == 'linear':
133 |             self.head = nn.Linear(dim_in, feat_dim)
134 |         elif head == 'mlp':
135 |             self.head = nn.Sequential(
136 |                 nn.Linear(dim_in, dim_in),
137 |                 nn.ReLU(inplace=True),
138 |                 nn.Linear(dim_in, feat_dim)
139 |             )
140 | 
141 |     def forward(self, x):
142 |         features = self.encoder(x)
143 |         #FOR ABLATION a
144 |         # features = F.normalize(features, dim=1)
145 |         #END
146 |         #FOR ABLATION b
147 |         # features = F.normalize(self.head(features), dim=1)
148 |         #END
149 |         #FOR ABLATION c
150 |         # features = self.head(features)
151 |         #END
152 | 
153 |         return self.fc(features)
154 |     
155 |     def intermediate_forward(self, x, layer_index):
156 |         if layer_index == 0: #use penultimate layer
157 |             return self.encoder.intermediate_forward(x, layer_index)
158 |         elif layer_index == 1: #use proj head
159 |             feat = self.encoder(x)
160 |             #FOR ABLATION a
161 |             # feat = F.normalize(feat, dim=1)
162 |             # feat = F.normalize(self.head(feat), dim=1)
163 |             feat = self.multiplier * F.normalize(self.head(feat), dim=1) 
164 |             return feat


--------------------------------------------------------------------------------
/training_from_scratch/scripts/test_npos_imagenet_100.sh:
--------------------------------------------------------------------------------
1 | python test_npos.py --dataset ImageNet-100 --score MSP --load ImageNet-100_CLIP.pt --T 1
2 | 


--------------------------------------------------------------------------------
/training_from_scratch/scripts/test_npos_imagenet_1k.sh:
--------------------------------------------------------------------------------
1 | python test_npos.py --dataset ImageNet-1000 --score MSP --load ImageNet-1k_CLIP.pt --T 1
2 | 


--------------------------------------------------------------------------------
/training_from_scratch/scripts/train_npos_imagenet_100.sh:
--------------------------------------------------------------------------------
1 | python train_imagenet.py --


--------------------------------------------------------------------------------
/training_from_scratch/scripts/train_npos_imagenet_1k.sh:
--------------------------------------------------------------------------------
1 | python train_npos.py --ngpu 8 --start_epoch 40 --sample_number 1000 --epochs 100 --sample_from 1500 --select 300 --loss_weight 0.1 --dataset ImageNet-100 --pick_nums 1 --cov_mat 0.1 --K 400 --save /nobackup-slow/taoleitian/model/ImageNet-100/npos/1 --batch_size 1000 --learning_rate 0.1 --decay_rate 0.1


--------------------------------------------------------------------------------
/training_from_scratch/test_npos_cifar10.sh:
--------------------------------------------------------------------------------
1 | python eval_ood_detection.py --in_dataset CIFAR-10 --model resnet18 --ckpt /nobackup-fast/taoleitian/test/CIFAR100.pt --K 300
2 | 


--------------------------------------------------------------------------------
/training_from_scratch/test_npos_cifar100.sh:
--------------------------------------------------------------------------------
1 | python eval_ood_detection.py --in_dataset CIFAR-100 --model resnet34 --ckpt /nobackup-fast/taoleitian/test/CIFAR100.pt --K 300
2 | 


--------------------------------------------------------------------------------
/training_from_scratch/test_npos_imagenet100.sh:
--------------------------------------------------------------------------------
1 | python eval_ood_detection_in100.py --in_dataset ImageNet-100 --model resnet101 --ckpt /nobackup-fast/taoleitian/test/CIFAR100.pt --K 300 --feat_dim 512
2 | 


--------------------------------------------------------------------------------
/training_from_scratch/train_npos_cifar10.sh:
--------------------------------------------------------------------------------
1 | python train_CIFAR10.py --in-dataset CIFAR-10 --model resnet18 --temp 0.1 --sample_from 600 --K 300 --learning_rate 0.5 --batch-size 256 --cov_mat 0.1 --start_epoch_KNN 200 --ID_points_num 200


--------------------------------------------------------------------------------
/training_from_scratch/train_npos_cifar100.sh:
--------------------------------------------------------------------------------
1 | python train_CIFAR100.py --in-dataset CIFAR-100 --model resnet34 --temp 0.1 --sample_from 600 --K 300 --learning_rate 0.5 --batch-size 256 --cov_mat 0.1 --start_epoch_KNN 200 --ID_points_num 200


--------------------------------------------------------------------------------
/training_from_scratch/train_npos_imagenet100.sh:
--------------------------------------------------------------------------------
1 | python train_im100.py --in-dataset ImageNet-100 --model resnet101 --temp 0.1 --sample_from 1000 --K 400 --learning_rate 0.01 --batch-size 512 --cov_mat 0.1 --start_epoch_KNN 200 --ID_points_num 300
2 | 


--------------------------------------------------------------------------------
/training_from_scratch/util.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import math
  3 | from models.wrn import SupCEHeadWideResNet
  4 | import os
  5 | from torch import nn
  6 | from models.resnet import SupCEHeadResNet
  7 | import numpy as np
  8 | import torch
  9 | import torch.optim as optim
 10 | import torch.nn.functional as F
 11 | from torchvision import datasets, transforms
 12 | import torchvision.transforms as transforms
 13 | import torch.backends.cudnn as cudnn
 14 | 
 15 | class AverageMeter(object):
 16 |     """Computes and stores the average and current value"""
 17 |     def __init__(self):
 18 |         self.reset()
 19 | 
 20 |     def reset(self):
 21 |         self.val = 0
 22 |         self.avg = 0
 23 |         self.sum = 0
 24 |         self.count = 0
 25 | 
 26 |     def update(self, val, n=1):
 27 |         self.val = val
 28 |         self.sum += val * n
 29 |         self.count += n
 30 |         self.avg = self.sum / self.count
 31 | 
 32 | 
 33 | def accuracy(output, target, topk=(1,)):
 34 |     """Computes the accuracy over the k top predictions for the specified values of k"""
 35 |     with torch.no_grad():
 36 |         maxk = max(topk)
 37 |         batch_size = target.size(0)
 38 | 
 39 |         _, pred = output.topk(maxk, 1, True, True)
 40 |         pred = pred.t()
 41 |         correct = pred.eq(target.view(1, -1).expand_as(pred))
 42 | 
 43 |         res = []
 44 |         for k in topk:
 45 |             correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
 46 |             res.append(correct_k.mul_(100.0 / batch_size))
 47 |         return res
 48 | 
 49 | def adjust_learning_rate(args, optimizer, epoch):
 50 |     lr = args.learning_rate
 51 |     if args.cosine:
 52 |         eta_min = lr * (args.lr_decay_rate ** 3)
 53 |         lr = eta_min + (lr - eta_min) * (
 54 |                 1 + math.cos(math.pi * epoch / args.epochs)) / 2
 55 |     else:
 56 |         steps = np.sum(epoch > np.asarray(args.lr_decay_epochs))
 57 |         if steps > 0:
 58 |             lr = lr * (args.lr_decay_rate ** steps)
 59 | 
 60 |     for param_group in optimizer.param_groups:
 61 |         param_group['lr'] = lr
 62 | 
 63 | def warmup_learning_rate(args, epoch, batch_id, total_batches, optimizer):
 64 |     if args.warm and epoch <= args.warm_epochs:
 65 |         p = (batch_id + (epoch - 1) * total_batches) / \
 66 |             (args.warm_epochs * total_batches)
 67 |         lr = args.warmup_from + p * (args.warmup_to - args.warmup_from)
 68 | 
 69 |         for param_group in optimizer.param_groups:
 70 |             param_group['lr'] = lr
 71 | 
 72 | def sample_estimator(model, classifier, num_classes, feature_list, train_loader):
 73 |     """
 74 |     compute sample mean and precision (inverse of covariance)
 75 |     return: sample_class_mean: list of class mean
 76 |              precision: list of precisions
 77 |     """
 78 |     import sklearn.covariance
 79 | 
 80 |     model.eval()
 81 |     group_lasso = sklearn.covariance.EmpiricalCovariance(assume_centered=False)
 82 |     correct, total = 0, 0
 83 |     num_output = len(feature_list)
 84 |     num_sample_per_class = np.empty(num_classes)
 85 |     num_sample_per_class.fill(0)
 86 |     list_features = []
 87 |     for i in range(num_output):
 88 |         temp_list = []
 89 |         for j in range(num_classes):
 90 |             temp_list.append(0)
 91 |         list_features.append(temp_list)
 92 | 
 93 |     for data, target in train_loader:
 94 |         total += data.size(0)
 95 |         data = data.cuda()
 96 |         penultimate, out_features = model.encoder.feature_list(data)
 97 |         output = classifier(penultimate)
 98 |         # output, out_features = model.module.feature_list(data)
 99 | 
100 |         # get hidden features
101 |         for i in range(num_output):
102 |             out_features[i] = out_features[i].view(out_features[i].size(0), out_features[i].size(1), -1)
103 |             out_features[i] = torch.mean(out_features[i].data, 2)
104 |         #TEMP
105 |         # out_features[-1] = out_features[i] / out_features[i].norm(p=2, dim=1, keepdim=True)
106 |         out_features[-1] = F.normalize(out_features[-1], dim=1)
107 |         # compute the accuracy
108 |         pred = output.data.max(1)[1]
109 |         equal_flag = pred.eq(target.cuda()).cpu()
110 |         correct += equal_flag.sum()
111 | 
112 |         # construct the sample matrix
113 |         for i in range(data.size(0)):
114 |             label = target[i]
115 |             if num_sample_per_class[label] == 0:
116 |                 out_count = 0
117 |                 for out in out_features:
118 |                     list_features[out_count][label] = out[i].view(1, -1)
119 |                     out_count += 1
120 |             else:
121 |                 out_count = 0
122 |                 for out in out_features:
123 |                     list_features[out_count][label] \
124 |                     = torch.cat((list_features[out_count][label], out[i].view(1, -1)), 0)
125 |                     out_count += 1
126 |             num_sample_per_class[label] += 1
127 | 
128 |     sample_class_mean = []
129 |     out_count = 0
130 |     for num_feature in feature_list:
131 |         temp_list = torch.Tensor(num_classes, int(num_feature)).cuda()
132 |         for j in range(num_classes):
133 |             temp_list[j] = torch.mean(list_features[out_count][j], 0)
134 |         sample_class_mean.append(temp_list)
135 |         out_count += 1
136 | 
137 |     precision = []
138 |     for k in range(num_output):
139 |         X = 0
140 |         for i in range(num_classes):
141 |             if i == 0:
142 |                 X = list_features[k][i] - sample_class_mean[k][i]
143 |             else:
144 |                 X = torch.cat((X, list_features[k][i] - sample_class_mean[k][i]), 0)
145 | 
146 |         # find inverse
147 |         group_lasso.fit(X.cpu().numpy())
148 |         temp_precision = group_lasso.precision_
149 |         temp_precision = torch.from_numpy(temp_precision).float().cuda()
150 |         precision.append(temp_precision)
151 | 
152 |     print('\n Training Accuracy:({:.2f}%)\n'.format(100. * correct / total))
153 | 
154 |     return sample_class_mean, precision
155 | 
156 | 
157 | def estimate_dataset_mean_std(name = 'cifar10'):
158 |     data = datasets.CIFAR10(root='./datasets/cifar10', train=True, download=True,
159 |                     transform=transforms.ToTensor()).data
160 |     data = data.astype(np.float32)/255.
161 | 
162 |     means = []
163 |     stdevs = []
164 |     for i in range(3):
165 |         pixels = data[:,:,:,i].ravel()
166 |         means.append(np.mean(pixels))
167 |         stdevs.append(np.std(pixels))
168 | 
169 |     print("means: {}".format(means))
170 |     print("stdevs: {}".format(stdevs))
171 |     print('transforms.Normalize(mean = {}, std = {})'.format(means, stdevs))
172 | 
173 | if __name__ == '__main__':
174 |     estimate_dataset_mean_std()


--------------------------------------------------------------------------------
/training_from_scratch/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | from .tinyimages_80mn_loader import *
 4 | from .imagenet_loader import *
 5 | # from .mahalanobis_lib import *
 6 | # from .gen_corruption_image import *
 7 | from .cal_metric import *
 8 | from .losses import *
 9 | from .svhn_loader import SVHN
10 | from .display_results import *
11 | 


--------------------------------------------------------------------------------
/training_from_scratch/utils/anom_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch.nn as nn
 3 | import sklearn.metrics as sk
 4 | import time
 5 | import torch
 6 | from torch.autograd import Variable
 7 | import os.path
 8 | 
 9 | recall_level_default = 0.95
10 | 
11 | def stable_cumsum(arr, rtol=1e-05, atol=1e-08):
12 |     """Use high precision for cumsum and check that final value matches sum
13 |     Parameters
14 |     ----------
15 |     arr : array-like
16 |         To be cumulatively summed as flat
17 |     rtol : float
18 |         Relative tolerance, see ``np.allclose``
19 |     atol : float
20 |         Absolute tolerance, see ``np.allclose``
21 |     """
22 |     out = np.cumsum(arr, dtype=np.float64)
23 |     expected = np.sum(arr, dtype=np.float64)
24 |     if not np.allclose(out[-1], expected, rtol=rtol, atol=atol):
25 |         raise RuntimeError('cumsum was found to be unstable: '
26 |                            'its last element does not correspond to sum')
27 |     return out
28 | 
29 | def fpr_and_fdr_at_recall(y_true, y_score, recall_level=recall_level_default, pos_label=None):
30 |     classes = np.unique(y_true)
31 |     if (pos_label is None and
32 |             not (np.array_equal(classes, [0, 1]) or
33 |                      np.array_equal(classes, [-1, 1]) or
34 |                      np.array_equal(classes, [0]) or
35 |                      np.array_equal(classes, [-1]) or
36 |                      np.array_equal(classes, [1]))):
37 |         raise ValueError("Data is not binary and pos_label is not specified")
38 |     elif pos_label is None:
39 |         pos_label = 1.
40 | 
41 |     # make y_true a boolean vector
42 |     y_true = (y_true == pos_label)
43 | 
44 |     # sort scores and corresponding truth values
45 |     desc_score_indices = np.argsort(y_score, kind="mergesort")[::-1]
46 |     y_score = y_score[desc_score_indices]
47 |     y_true = y_true[desc_score_indices]
48 | 
49 |     # y_score typically has many tied values. Here we extract
50 |     # the indices associated with the distinct values. We also
51 |     # concatenate a value for the end of the curve.
52 |     distinct_value_indices = np.where(np.diff(y_score))[0]
53 |     threshold_idxs = np.r_[distinct_value_indices, y_true.size - 1]
54 | 
55 |     # accumulate the true positives with decreasing threshold
56 |     tps = stable_cumsum(y_true)[threshold_idxs]
57 |     fps = 1 + threshold_idxs - tps      # add one because of zero-based indexing
58 | 
59 |     thresholds = y_score[threshold_idxs]
60 | 
61 |     recall = tps / tps[-1]
62 | 
63 |     last_ind = tps.searchsorted(tps[-1])
64 |     sl = slice(last_ind, None, -1)      # [last_ind::-1]
65 |     recall, fps, tps, thresholds = np.r_[recall[sl], 1], np.r_[fps[sl], 0], np.r_[tps[sl], 0], thresholds[sl]
66 | 
67 |     cutoff = np.argmin(np.abs(recall - recall_level))
68 |     return fps[cutoff] / (np.sum(np.logical_not(y_true))), fps[cutoff]/(fps[cutoff] + tps[cutoff])
69 | 
70 | def get_measures(_pos, _neg, recall_level=recall_level_default):
71 |     pos = np.array(_pos[:]).reshape((-1, 1))
72 |     neg = np.array(_neg[:]).reshape((-1, 1))
73 |     examples = np.squeeze(np.vstack((pos, neg)))
74 |     labels = np.zeros(len(examples), dtype=np.int32)
75 |     labels[:len(pos)] += 1
76 | 
77 |     auroc = sk.roc_auc_score(labels, examples)
78 |     aupr = sk.average_precision_score(labels, examples)
79 |     fpr, threshould = fpr_and_fdr_at_recall(labels, examples, recall_level)
80 | 
81 |     return auroc, aupr, fpr, threshould
82 | 
83 | 
84 | def print_measures(auroc, aupr, fpr, ood, method, recall_level=recall_level_default):
85 |     print('\t\t\t' + ood+'_'+method)
86 |     print('FPR{:d}:\t\t\t{:.2f}'.format(int(100 * recall_level), 100 * fpr))
87 |     print('AUROC: \t\t\t{:.2f}'.format(100 * auroc))
88 |     print('AUPR:  \t\t\t{:.2f}'.format(100 * aupr))
89 | 
90 | def get_and_print_results(out_score, in_score, ood, method):
91 | 
92 |     aurocs, auprs, fprs = [], [], []
93 |     measures = get_measures(out_score, in_score)
94 |     aurocs.append(measures[0]); auprs.append(measures[1]); fprs.append(measures[2])
95 | 
96 |     auroc = np.mean(aurocs); aupr = np.mean(auprs); fpr = np.mean(fprs)
97 |     print_measures(auroc, aupr, fpr, ood, method)
98 |     return auroc, aupr, fpr


--------------------------------------------------------------------------------
/training_from_scratch/utils/cal_metric.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import torch
  3 | from torch.autograd import Variable
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | import numpy as np
  7 | import torch.optim as optim
  8 | import torchvision
  9 | import os
 10 | import torchvision.transforms as transforms
 11 | import numpy as np
 12 | import time
 13 | from scipy import misc
 14 | 
 15 | def get_curve(dir_name, stypes = ['MSP', 'ODIN']):
 16 |     tp, fp = dict(), dict()
 17 |     fpr_at_tpr95 = dict()
 18 |     for stype in stypes:
 19 |         known = np.loadtxt('{}/confidence_{}_In.txt'.format(dir_name, stype), delimiter='\n')
 20 |         novel = np.loadtxt('{}/confidence_{}_Out.txt'.format(dir_name, stype), delimiter='\n')
 21 |         known.sort()
 22 |         novel.sort()
 23 | 
 24 |         end = np.max([np.max(known), np.max(novel)])
 25 |         start = np.min([np.min(known),np.min(novel)])
 26 |         num_k = known.shape[0]
 27 |         num_n = novel.shape[0]
 28 | 
 29 |         threshold = known[round(0.05 * num_k)]
 30 | 
 31 |         tp[stype] = -np.ones([num_k+num_n+1], dtype=int)
 32 |         fp[stype] = -np.ones([num_k+num_n+1], dtype=int)
 33 |         tp[stype][0], fp[stype][0] = num_k, num_n
 34 |         k, n = 0, 0
 35 |         for l in range(num_k+num_n):
 36 |             if k == num_k:
 37 |                 tp[stype][l+1:] = tp[stype][l]
 38 |                 fp[stype][l+1:] = np.arange(fp[stype][l]-1, -1, -1)
 39 |                 break
 40 |             elif n == num_n:
 41 |                 tp[stype][l+1:] = np.arange(tp[stype][l]-1, -1, -1)
 42 |                 fp[stype][l+1:] = fp[stype][l]
 43 |                 break
 44 |             else:
 45 |                 if novel[n] < known[k]:
 46 |                     n += 1
 47 |                     tp[stype][l+1] = tp[stype][l]
 48 |                     fp[stype][l+1] = fp[stype][l] - 1
 49 |                 else:
 50 |                     k += 1
 51 |                     tp[stype][l+1] = tp[stype][l] - 1
 52 |                     fp[stype][l+1] = fp[stype][l]
 53 | 
 54 |         fpr_at_tpr95[stype] = np.sum(novel > threshold) / float(num_n)
 55 | 
 56 |     return tp, fp, fpr_at_tpr95
 57 | 
 58 | def metric(dir_name, stypes = ['MSP', 'ODIN'], verbose=False):
 59 |     tp, fp, fpr_at_tpr95 = get_curve(dir_name, stypes)
 60 |     results = dict()
 61 |     mtypes = ['FPR', 'AUROC', 'DTERR', 'AUIN', 'AUOUT']
 62 |     if verbose:
 63 |         print('      ', end='')
 64 |         for mtype in mtypes:
 65 |             print(' {mtype:6s}'.format(mtype=mtype), end='')
 66 |         print('')
 67 | 
 68 |     for stype in stypes:
 69 |         if verbose:
 70 |             print('{stype:5s} '.format(stype=stype), end='')
 71 |         results[stype] = dict()
 72 | 
 73 |         # FPR
 74 |         mtype = 'FPR'
 75 |         results[stype][mtype] = fpr_at_tpr95[stype]
 76 |         if verbose:
 77 |             print(' {val:6.3f}'.format(val=100.*results[stype][mtype]), end='')
 78 | 
 79 |         # AUROC
 80 |         mtype = 'AUROC'
 81 |         tpr = np.concatenate([[1.], tp[stype]/tp[stype][0], [0.]])
 82 |         fpr = np.concatenate([[1.], fp[stype]/fp[stype][0], [0.]])
 83 |         results[stype][mtype] = -np.trapz(1.-fpr, tpr)
 84 |         if verbose:
 85 |             print(' {val:6.3f}'.format(val=100.*results[stype][mtype]), end='')
 86 | 
 87 |         # DTERR
 88 |         mtype = 'DTERR'
 89 |         results[stype][mtype] = ((tp[stype][0] - tp[stype] + fp[stype]) / (tp[stype][0] + fp[stype][0])).min()
 90 |         if verbose:
 91 |             print(' {val:6.3f}'.format(val=100.*results[stype][mtype]), end='')
 92 | 
 93 |         # AUIN
 94 |         mtype = 'AUIN'
 95 |         denom = tp[stype]+fp[stype]
 96 |         denom[denom == 0.] = -1.
 97 |         pin_ind = np.concatenate([[True], denom > 0., [True]])
 98 |         pin = np.concatenate([[.5], tp[stype]/denom, [0.]])
 99 |         results[stype][mtype] = -np.trapz(pin[pin_ind], tpr[pin_ind])
100 |         if verbose:
101 |             print(' {val:6.3f}'.format(val=100.*results[stype][mtype]), end='')
102 | 
103 |         # AUOUT
104 |         mtype = 'AUOUT'
105 |         denom = tp[stype][0]-tp[stype]+fp[stype][0]-fp[stype]
106 |         denom[denom == 0.] = -1.
107 |         pout_ind = np.concatenate([[True], denom > 0., [True]])
108 |         pout = np.concatenate([[0.], (fp[stype][0]-fp[stype])/denom, [.5]])
109 |         results[stype][mtype] = np.trapz(pout[pout_ind], 1.-fpr[pout_ind])
110 |         if verbose:
111 |             print(' {val:6.3f}'.format(val=100.*results[stype][mtype]), end='')
112 |             print('')
113 | 
114 |     return results
115 | 


--------------------------------------------------------------------------------
/training_from_scratch/utils/display_results.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import sklearn.metrics as sk
  3 | 
  4 | recall_level_default = 0.95
  5 | 
  6 | 
  7 | def stable_cumsum(arr, rtol=1e-05, atol=1e-08):
  8 |     """Use high precision for cumsum and check that final value matches sum
  9 |     Parameters
 10 |     ----------
 11 |     arr : array-like
 12 |         To be cumulatively summed as flat
 13 |     rtol : float
 14 |         Relative tolerance, see ``np.allclose``
 15 |     atol : float
 16 |         Absolute tolerance, see ``np.allclose``
 17 |     """
 18 |     out = np.cumsum(arr, dtype=np.float64)
 19 |     expected = np.sum(arr, dtype=np.float64)
 20 |     if not np.allclose(out[-1], expected, rtol=rtol, atol=atol):
 21 |         raise RuntimeError('cumsum was found to be unstable: '
 22 |                            'its last element does not correspond to sum')
 23 |     return out
 24 | 
 25 | 
 26 | def fpr_and_fdr_at_recall(y_true, y_score, recall_level=recall_level_default, pos_label=None):
 27 |     classes = np.unique(y_true)
 28 |     if (pos_label is None and
 29 |             not (np.array_equal(classes, [0, 1]) or
 30 |                      np.array_equal(classes, [-1, 1]) or
 31 |                      np.array_equal(classes, [0]) or
 32 |                      np.array_equal(classes, [-1]) or
 33 |                      np.array_equal(classes, [1]))):
 34 |         raise ValueError("Data is not binary and pos_label is not specified")
 35 |     elif pos_label is None:
 36 |         pos_label = 1.
 37 | 
 38 |     # make y_true a boolean vector
 39 |     y_true = (y_true == pos_label)
 40 | 
 41 |     # sort scores and corresponding truth values
 42 |     desc_score_indices = np.argsort(y_score, kind="mergesort")[::-1]
 43 |     y_score = y_score[desc_score_indices]
 44 |     y_true = y_true[desc_score_indices]
 45 | 
 46 |     # y_score typically has many tied values. Here we extract
 47 |     # the indices associated with the distinct values. We also
 48 |     # concatenate a value for the end of the curve.
 49 |     distinct_value_indices = np.where(np.diff(y_score))[0]
 50 |     threshold_idxs = np.r_[distinct_value_indices, y_true.size - 1]
 51 | 
 52 |     # accumulate the true positives with decreasing threshold
 53 |     tps = stable_cumsum(y_true)[threshold_idxs]
 54 |     fps = 1 + threshold_idxs - tps      # add one because of zero-based indexing
 55 | 
 56 |     thresholds = y_score[threshold_idxs]
 57 | 
 58 |     recall = tps / tps[-1]
 59 | 
 60 |     last_ind = tps.searchsorted(tps[-1])
 61 |     sl = slice(last_ind, None, -1)      # [last_ind::-1]
 62 |     recall, fps, tps, thresholds = np.r_[recall[sl], 1], np.r_[fps[sl], 0], np.r_[tps[sl], 0], thresholds[sl]
 63 | 
 64 |     cutoff = np.argmin(np.abs(recall - recall_level))
 65 | 
 66 |     return fps[cutoff] / (np.sum(np.logical_not(y_true)))   # , fps[cutoff]/(fps[cutoff] + tps[cutoff])
 67 | 
 68 | 
 69 | def get_measures(_pos, _neg, recall_level=recall_level_default):
 70 |     pos = np.array(_pos[:]).reshape((-1, 1))
 71 |     neg = np.array(_neg[:]).reshape((-1, 1))
 72 |     examples = np.squeeze(np.vstack((pos, neg)))
 73 |     labels = np.zeros(len(examples), dtype=np.int32)
 74 |     labels[:len(pos)] += 1
 75 | 
 76 |     auroc = sk.roc_auc_score(labels, examples)
 77 |     aupr = sk.average_precision_score(labels, examples)
 78 |     fpr = fpr_and_fdr_at_recall(labels, examples, recall_level)
 79 | 
 80 |     return auroc, aupr, fpr
 81 | 
 82 | 
 83 | def show_performance(pos, neg, method_name='Ours', recall_level=recall_level_default):
 84 |     '''
 85 |     :param pos: 1's class, class to detect, outliers, or wrongly predicted
 86 |     example scores
 87 |     :param neg: 0's class scores
 88 |     '''
 89 | 
 90 |     auroc, aupr, fpr = get_measures(pos[:], neg[:], recall_level)
 91 | 
 92 |     print(f'\t\t\t {method_name}')
 93 |     print('FPR{:d}:\t\t\t{:.2f}'.format(int(100 * recall_level), 100 * fpr))
 94 |     print('AUROC:\t\t\t{:.2f}'.format(100 * auroc))
 95 |     print('AUPR:\t\t\t{:.2f}'.format(100 * aupr))
 96 |     # print('FDR{:d}:\t\t\t{:.2f}'.format(int(100 * recall_level), 100 * fdr))
 97 | 
 98 | 
 99 | def print_measures(log, auroc, aupr, fpr, method_name='Ours', recall_level=recall_level_default):
100 |     if log == None: 
101 |         print('FPR{:d}:\t\t\t{:.2f}'.format(int(100 * recall_level), 100 * fpr))
102 |         print('AUROC: \t\t\t{:.2f}'.format(100 * auroc))
103 |         print('AUPR:  \t\t\t{:.2f}'.format(100 * aupr))
104 |     else:
105 |         log.debug('\t\t\t\t' + method_name)
106 |         log.debug('  FPR{:d} AUROC AUPR'.format(int(100*recall_level)))
107 |         log.debug('& {:.2f} & {:.2f} & {:.2f}'.format(100*fpr, 100*auroc, 100*aupr))
108 | 
109 | 
110 | 
111 | def print_measures_with_std(log, aurocs, auprs, fprs, method_name='Ours', recall_level=recall_level_default):
112 |     log.debug('\t\t\t\t' + method_name)
113 |     log.debug('  FPR{:d} AUROC AUPR'.format(int(100*recall_level)))
114 |     log.debug('& {:.2f} & {:.2f} & {:.2f}'.format(100*np.mean(fprs), 100*np.mean(aurocs), 100*np.mean(auprs)))
115 |     log.debug('& {:.2f} & {:.2f} & {:.2f}'.format(100*np.std(fprs), 100*np.std(aurocs), 100*np.std(auprs)))
116 |     #print('FPR{:d}:\t\t\t{:.2f}\t+/- {:.2f}'.format(int(100 * recall_level), 100 * np.mean(fprs), 100 * np.std(fprs)))
117 |     #print('AUROC: \t\t\t{:.2f}\t+/- {:.2f}'.format(100 * np.mean(aurocs), 100 * np.std(aurocs)))
118 |     #print('AUPR:  \t\t\t{:.2f}\t+/- {:.2f}'.format(100 * np.mean(auprs), 100 * np.std(auprs)))
119 | 
120 | 
121 | def show_performance_comparison(pos_base, neg_base, pos_ours, neg_ours, baseline_name='Baseline',
122 |                                 method_name='Ours', recall_level=recall_level_default):
123 |     '''
124 |     :param pos_base: 1's class, class to detect, outliers, or wrongly predicted
125 |     example scores from the baseline
126 |     :param neg_base: 0's class scores generated by the baseline
127 |     '''
128 |     auroc_base, aupr_base, fpr_base = get_measures(pos_base[:], neg_base[:], recall_level)
129 |     auroc_ours, aupr_ours, fpr_ours = get_measures(pos_ours[:], neg_ours[:], recall_level)
130 | 
131 |     print('\t\t\t' + baseline_name + '\t' + method_name)
132 |     print('FPR{:d}:\t\t\t{:.2f}\t\t{:.2f}'.format(
133 |         int(100 * recall_level), 100 * fpr_base, 100 * fpr_ours))
134 |     print('AUROC:\t\t\t{:.2f}\t\t{:.2f}'.format(
135 |         100 * auroc_base, 100 * auroc_ours))
136 |     print('AUPR:\t\t\t{:.2f}\t\t{:.2f}'.format(
137 |         100 * aupr_base, 100 * aupr_ours))
138 |     # print('FDR{:d}:\t\t\t{:.2f}\t\t{:.2f}'.format(
139 |     #     int(100 * recall_level), 100 * fdr_base, 100 * fdr_ours))


--------------------------------------------------------------------------------
/training_from_scratch/utils/file_copy.py:
--------------------------------------------------------------------------------
 1 | # all_labels = ['n01443537', 'n01484850', 'n01491361', 'n01494475', 'n01496331', 
 2 | # 'n01498041', 'n01629819', 'n01630670', 'n01641577', 'n01644373', 'n01644900', 'n01664065', 
 3 | # 'n01665541', 'n01667114', 'n01682714', 'n01685808', 'n01687978', 'n01688243', 'n01689811', 
 4 | # 'n01694178', 'n01695060', 'n01697457', 'n01698640', 'n01728572', 'n01728920', 'n01729322', 
 5 | # 'n01749939', 'n01751748', 'n01770393', 'n01773157', 'n01773549', 'n01773797', 'n01914609', 
 6 | # 'n01917289', 'n01924916', 'n01945685', 'n01950731', 'n01978455', 'n01980166', 'n01981276', 
 7 | # 'n01983481', 'n01984695', 'n01985128', 'n01986214', 'n02066245', 'n02071294', 'n02074367', 
 8 | # 'n02077923', 'n02056570', 'n02128385', 'n02128757', 'n02129165', 'n02129604', 'n02130308', 
 9 | # 'n02167151', 'n02168699', 'n02169497',
10 | # 'n02172182', 'n02190166', 'n02206856', 'n02219486', 'n02264363', 'n02268443', 'n02268853',
11 | # 'n02277742', 'n02279972', 'n02280649', 'n02281406', 'n02281787', 'n02317335', 'n02319095',
12 | # 'n02321529', 'n02325366', 'n02326432', 'n02328150', 'n02342885', 'n02346627', 'n02356798',
13 | # 'n02361337', 'n02363005', 'n02364673', 'n02397096', 'n02398521', 'n02422106', 'n02422699',
14 | # 'n02423022', 'n02437312', 'n02443114', 'n02443484', 'n02444819', 'n02445715', 'n02447366',
15 | # 'n02454379', 'n02457408', 'n02480495', 'n02480855', 'n02481823', 'n02483362', 'n02483708', 
16 | # 'n02484975', 'n02486410', 'n02487347', 'n02488291', 'n02488702', 'n02489166', 'n02493793', 
17 | # 'n02494079', 'n02526121', 'n02536864', 'n02606052', 'n02641379', 'n02643566', 'n02655020', 
18 | # 'n02666196', 'n02676566', 'n02747177', 'n02783161', 'n02788148', 'n02794156', 
19 | # 'n02795169', 'n02814860', 'n02825657', 'n02834397', 'n02840245', 'n02906734', 'n02909870', 
20 | # 'n02971356', 'n02978881', 'n02979186', 'n02980441', 'n02988304', 'n03000134', 'n03014705',
21 | # 'n03016953', 'n03017168', 'n03018349', 'n03026506', 'n03047690',  'n03063689',
22 | # 'n03065424', 'n03075370', 'n03109150', 'n03124043', 'n03125729', 'n03131574', 'n03133878',
23 | # 'n03160309', 'n03207743', 'n03220513', 'n03223299', 'n03240683', 'n03271574', 'n03272010',
24 | # 'n03291819',  'n03337140', 'n03355925', 'n03388043', 'n03388183', 'n03400231',
25 | # 'n03425413', 'n03445777', 'n03447721', 'n03481172', 'n03482405', 'n03485794', 'n03492542',
26 | # 'n03494278', 'n03498962', 'n03527444', 'n03530642', 'n03532672', 'n03584829', 'n03598930',
27 | # 'n03627232', 'n03633091', 'n03637318', 'n03657121',  'n03666591', 'n03680355',
28 | # 'n03690938', 'n03691459', 'n03706229', 'n03709823', 'n03710193', 'n03717622', 'n03729826', 
29 | #  'n03743016', 'n03764736', 'n03773504', 'n03777754', 'n03786901', 'n03788195', 
30 | # 'n03804744', 'n03837869', 'n03841143', 'n03854065', 'n03871628', 'n03874599', 'n03876231', 
31 | # 'n03908618', 'n03908714', 'n03929660', 'n03930313', 'n03933933', 'n03935335', 'n03944341', 
32 | # 'n03954731', 'n03956157', 'n03958227', 'n03976467', 'n03998194', 'n04004767', 'n04033901',
33 | # 'n04041544', 'n04044716', 'n04049303', 'n04069434', 'n04116512', 'n04118776', 'n04125021',
34 | # 'n04127249',  'n04141975', 'n04149813', 'n04153751', 'n04179913', 'n04201297',
35 | # 'n04239074', 'n04243546', 'n04254777', 'n04265275', 'n04275548', 'n04277352', 'n04326547',
36 | # 'n04355338', 'n04366367', 'n04372370', 'n04389033', 'n04418357', 'n04423845', 'n04442312',
37 | # 'n04465501', 'n04485082', 'n04523525', 'n04525038', 'n04532670', 'n04554684', 'n04599235', 
38 | # 'n04604644', 'n06359193', 'n06785654', 'n07565083', 'n07684084',  'n07697313', 
39 | # 'n07715103', 'n07716358',  'n07720875', 'n07730033', 'n07734744', 'n07745940', 
40 | # 'n07749582', 'n07753275', 'n07802026', 'n09246464', 'n09256479', 'n09399592', 'n09468604', 
41 | # 'n09472597', 'n11939491', 'n12267677', 'n12620546', 'n12768682', 'n12985857', 'n12998815', 
42 | # 'n13037406', 'n13040303', 'n13044778', 'n13054560']
43 | # from shutil import copytree
44 | # import os
45 | # train_root_dir= "/nobackup-slow/dataset/ILSVRC-2012/train"
46 | # root_dir= "/nobackup-slow/dataset/ILSVRC-2012/ood_train"
47 | # for label in all_labels:
48 | #     src = os.path.join(train_root_dir, label)
49 | #     dst = os.path.join(root_dir, label)
50 | #     if not os.path.exists(dst):
51 | #         print("copying label # ", label)
52 | #         copytree(src, dst)
53 | #     # else:
54 | #     #     print(f"{label} already exists")
55 | # print(len(all_labels)) # 277
56 | 
57 | 
58 | import os
59 | from shutil import copyfile
60 | 
61 | src_dir = '/nobackup-slow/dataset/shape/output'
62 | type_list = [ "Triangle", "Square", "Pentagon", "Hexagon", "Heptagon", "Octagon", "Nonagon", "Circle", "Star"]
63 | for _type in type_list:
64 |     dst_dir_train = f'/nobackup-slow/dataset/shape/train/{_type}'
65 |     dst_dir_val = f'/nobackup-slow/dataset/shape/val/{_type}'
66 |     if not os.path.exists(dst_dir_train):
67 |         os.makedirs(dst_dir_train)
68 |     if not os.path.exists(dst_dir_val):
69 |         os.makedirs(dst_dir_val)
70 |     count = 0
71 |     for f in os.listdir(src_dir):
72 |         if count >= 2200: 
73 |             break
74 |         if f.startswith(_type):
75 |             src = os.path.join(src_dir, f)
76 |             if count < 2000:
77 |                 dst_train = os.path.join(dst_dir_train, f)
78 |                 if not os.path.exists(dst_train):
79 |                     print("Train copying file: ", f)
80 |                     copyfile(src, dst_train)
81 |             else:
82 |                 dst_val = os.path.join(dst_dir_val, f)
83 |                 if not os.path.exists(dst_val):
84 |                     print("Val copying file: ", f)
85 |                     copyfile(src, dst_val)
86 | 
87 |             count += 1


--------------------------------------------------------------------------------
/training_from_scratch/utils/imagenet_loader.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import os
 4 | import pickle
 5 | 
 6 | def unpickle(file):
 7 |     with open(file, 'rb') as fo:
 8 |         dict = pickle.load(fo)
 9 |     return dict
10 | 
11 | class ImageNet(torch.utils.data.Dataset):
12 | 
13 |     def __init__(self, transform=None, img_size=64):
14 | 
15 |         self.S = np.zeros(11, dtype=np.int32)
16 |         self.img_size = img_size
17 |         self.labels = []
18 |         for idx in range(1, 11):
19 |             # data_file = os.path.join('/nobackup/ImageNet64/', 'train_data_batch_{}'.format(idx))
20 |             data_file = os.path.join('/nobackup-slow/dataset/ImageNet/', 'train_data_batch_{}'.format(idx))
21 |             d = unpickle(data_file)
22 |             y = d['labels']
23 |             y = [i-1 for i in y]
24 |             self.labels.extend(y)
25 |             self.S[idx] = self.S[idx-1] + len(y)
26 | 
27 |         self.labels = np.array(self.labels)
28 |         self.N = len(self.labels)
29 |         self.curr_batch = -1
30 | 
31 |         self.offset = 0     # offset index
32 |         self.transform = transform
33 | 
34 |     def load_image_batch(self, batch_index):
35 |         # data_file = os.path.join('/nobackup/ImageNet64/', 'train_data_batch_{}'.format(batch_index))
36 |         data_file = os.path.join('/nobackup-slow/dataset/ImageNet/', 'train_data_batch_{}'.format(batch_index))
37 |         d = unpickle(data_file)
38 |         x = d['data']
39 |         
40 |         img_size = self.img_size
41 |         img_size2 = img_size * img_size
42 |         x = np.dstack((x[:, :img_size2], x[:, img_size2:2*img_size2], x[:, 2*img_size2:]))
43 |         x = x.reshape((x.shape[0], img_size, img_size, 3))
44 | 
45 |         self.batch_images = x
46 |         self.curr_batch = batch_index
47 | 
48 |     def get_batch_index(self, index):
49 |         j = 1
50 |         while index >= self.S[j]:
51 |             j += 1
52 |         return j
53 | 
54 |     def load_image(self, index):
55 |         batch_index = self.get_batch_index(index)
56 |         if self.curr_batch != batch_index:
57 |             self.load_image_batch(batch_index)
58 |         
59 |         return self.batch_images[index-self.S[batch_index-1]]
60 | 
61 |     def __getitem__(self, index):
62 |         index = (index + self.offset) % self.N
63 | 
64 |         img = self.load_image(index)
65 |         if self.transform is not None:
66 |             img = self.transform(img)
67 | 
68 |         return img, self.labels[index] 
69 | 
70 |     def __len__(self):
71 |         return self.N


--------------------------------------------------------------------------------
/training_from_scratch/utils/mahalanobis_lib.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import torch
  3 | import numpy as np
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | from torch.autograd import Variable
  8 | from scipy.spatial.distance import pdist, cdist, squareform
  9 | 
 10 | def sample_estimator(model, num_classes, feature_list, train_loader):
 11 |     """
 12 |     compute sample mean and precision (inverse of covariance)
 13 |     return: sample_class_mean: list of class mean
 14 |              precision: list of precisions
 15 |     """
 16 |     import sklearn.covariance
 17 | 
 18 |     model.eval()
 19 |     group_lasso = sklearn.covariance.EmpiricalCovariance(assume_centered=False)
 20 |     correct, total = 0, 0
 21 |     num_output = len(feature_list)
 22 |     num_sample_per_class = np.empty(num_classes)
 23 |     num_sample_per_class.fill(0)
 24 |     list_features = []
 25 |     for i in range(num_output):
 26 |         temp_list = []
 27 |         for j in range(num_classes):
 28 |             temp_list.append(0)
 29 |         list_features.append(temp_list)
 30 | 
 31 |     for data, target in train_loader:
 32 |         total += data.size(0)
 33 |         # data = data.cuda()
 34 |         # data = Variable(data)
 35 |         data = data.cuda()
 36 |         output, out_features = model.feature_list(data)
 37 |         # output, out_features = model.module.feature_list(data)
 38 | 
 39 |         # get hidden features
 40 |         for i in range(num_output):
 41 |             out_features[i] = out_features[i].view(out_features[i].size(0), out_features[i].size(1), -1)
 42 |             out_features[i] = torch.mean(out_features[i].data, 2)
 43 |             #TEMP
 44 |             # if i == 3: 
 45 |             #     out_features[i] = out_features[i] / out_features[i].norm(p=2, dim=1, keepdim=True)
 46 | 
 47 | 
 48 |         # compute the accuracy
 49 |         pred = output.data.max(1)[1]
 50 |         equal_flag = pred.eq(target.cuda()).cpu()
 51 |         correct += equal_flag.sum()
 52 | 
 53 |         # construct the sample matrix
 54 |         for i in range(data.size(0)):
 55 |             label = target[i]
 56 |             if num_sample_per_class[label] == 0:
 57 |                 out_count = 0
 58 |                 for out in out_features:
 59 |                     list_features[out_count][label] = out[i].view(1, -1)
 60 |                     out_count += 1
 61 |             else:
 62 |                 out_count = 0
 63 |                 for out in out_features:
 64 |                     list_features[out_count][label] \
 65 |                     = torch.cat((list_features[out_count][label], out[i].view(1, -1)), 0)
 66 |                     out_count += 1
 67 |             num_sample_per_class[label] += 1
 68 | 
 69 |     sample_class_mean = []
 70 |     out_count = 0
 71 |     for num_feature in feature_list:
 72 |         temp_list = torch.Tensor(num_classes, int(num_feature)).cuda()
 73 |         for j in range(num_classes):
 74 |             temp_list[j] = torch.mean(list_features[out_count][j], 0)
 75 |         sample_class_mean.append(temp_list)
 76 |         out_count += 1
 77 | 
 78 |     precision = []
 79 |     for k in range(num_output):
 80 |         X = 0
 81 |         for i in range(num_classes):
 82 |             if i == 0:
 83 |                 X = list_features[k][i] - sample_class_mean[k][i]
 84 |             else:
 85 |                 X = torch.cat((X, list_features[k][i] - sample_class_mean[k][i]), 0)
 86 | 
 87 |         # find inverse
 88 |         group_lasso.fit(X.cpu().numpy())
 89 |         temp_precision = group_lasso.precision_
 90 |         temp_precision = torch.from_numpy(temp_precision).float().cuda()
 91 |         precision.append(temp_precision)
 92 | 
 93 |     print('\n Training Accuracy:({:.2f}%)\n'.format(100. * correct / total))
 94 | 
 95 |     return sample_class_mean, precision
 96 | 
 97 | def get_Mahalanobis_score(inputs, model, num_classes, sample_mean, precision, num_output, magnitude):
 98 |     '''
 99 |     e.g. when layer_index = 0, out_features.shape: 80, 24, 32, 32
100 |          -- after flatten--> 80, 24, 1024 --after taken mean --> shape: 80, 24 
101 |     i.e. channel info is reserved
102 |     '''
103 |     for layer_index in range(num_output):
104 |         data = Variable(inputs, requires_grad = True)
105 |         data = data.cuda()
106 | 
107 |         out_features = model.intermediate_forward(data, layer_index)
108 |         # out_features = model.module.intermediate_forward(data, layer_index)
109 |         out_features = out_features.view(out_features.size(0), out_features.size(1), -1) 
110 |         out_features = torch.mean(out_features, 2) 
111 |         #TEMP
112 |         # if layer_index  == 3: 
113 |         #         out_features = out_features / out_features.norm(p=2, dim=1, keepdim=True)
114 | 
115 |         gaussian_score = 0
116 |         for i in range(num_classes):
117 |             batch_sample_mean = sample_mean[layer_index][i]
118 |             zero_f = out_features.data - batch_sample_mean
119 |             term_gau = -0.5*torch.mm(torch.mm(zero_f, precision[layer_index]), zero_f.t()).diag()
120 |             if i == 0:
121 |                 gaussian_score = term_gau.view(-1,1)
122 |             else:
123 |                 gaussian_score = torch.cat((gaussian_score, term_gau.view(-1,1)), 1)
124 | 
125 |         # Input_processing
126 |         sample_pred = gaussian_score.max(1)[1]
127 |         batch_sample_mean = sample_mean[layer_index].index_select(0, sample_pred)
128 |         zero_f = out_features - Variable(batch_sample_mean)
129 |         pure_gau = -0.5*torch.mm(torch.mm(zero_f, Variable(precision[layer_index])), zero_f.t()).diag()
130 |         loss = torch.mean(-pure_gau)
131 |         loss.backward()
132 | 
133 |         gradient =  torch.ge(data.grad.data, 0)
134 |         gradient = (gradient.float() - 0.5) * 2
135 | 
136 |         tempInputs = torch.add(data.data, -magnitude, gradient)
137 | 
138 |         noise_out_features = model.intermediate_forward(Variable(tempInputs), layer_index)
139 |         # noise_out_features = model.module.intermediate_forward(Variable(tempInputs), layer_index)
140 |         noise_out_features = noise_out_features.view(noise_out_features.size(0), noise_out_features.size(1), -1)
141 |         noise_out_features = torch.mean(noise_out_features, 2)
142 |         #TEMP
143 |         # if layer_index == 3: 
144 |         #         noise_out_features = noise_out_features / noise_out_features.norm(p=2, dim=1, keepdim=True)
145 |         noise_gaussian_score = 0
146 |         for i in range(num_classes):
147 |             batch_sample_mean = sample_mean[layer_index][i]
148 |             zero_f = noise_out_features.data - batch_sample_mean
149 |             term_gau = -0.5*torch.mm(torch.mm(zero_f, precision[layer_index]), zero_f.t()).diag()
150 |             if i == 0:
151 |                 noise_gaussian_score = term_gau.view(-1,1)
152 |             else:
153 |                 noise_gaussian_score = torch.cat((noise_gaussian_score, term_gau.view(-1,1)), 1)
154 | 
155 |         noise_gaussian_score, _ = torch.max(noise_gaussian_score, dim=1)
156 | 
157 |         noise_gaussian_score = np.asarray(noise_gaussian_score.cpu().numpy(), dtype=np.float32)
158 |         if layer_index == 0:
159 |             Mahalanobis_scores = noise_gaussian_score.reshape((noise_gaussian_score.shape[0], -1))
160 |         else:
161 |             Mahalanobis_scores = np.concatenate((Mahalanobis_scores, noise_gaussian_score.reshape((noise_gaussian_score.shape[0], -1))), axis=1)
162 | 
163 |     return Mahalanobis_scores
164 | 
165 | 
166 | 


--------------------------------------------------------------------------------
/training_from_scratch/utils/mahalanobis_lib_clf.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import torch
  3 | import numpy as np
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | from torch.autograd import Variable
  8 | from scipy.spatial.distance import pdist, cdist, squareform
  9 | 
 10 | def sample_estimator(model, num_classes, feature_list, train_loader):
 11 |     """
 12 |     compute sample mean and precision (inverse of covariance)
 13 |     return: sample_class_mean: list of class mean
 14 |              precision: list of precisions
 15 |     """
 16 |     import sklearn.covariance
 17 | 
 18 |     model.eval()
 19 |     group_lasso = sklearn.covariance.EmpiricalCovariance(assume_centered=False)
 20 |     correct, total = 0, 0
 21 |     num_output = len(feature_list)
 22 |     num_sample_per_class = np.empty(num_classes)
 23 |     num_sample_per_class.fill(0)
 24 |     list_features = []
 25 |     for i in range(num_output):
 26 |         temp_list = []
 27 |         for j in range(num_classes):
 28 |             temp_list.append(0)
 29 |         list_features.append(temp_list)
 30 | 
 31 |     for data, target in train_loader:
 32 |         total += data.size(0)
 33 |         # data = data.cuda()
 34 |         # data = Variable(data)
 35 |         data = data.cuda()
 36 |         out_features = model.encoder.feature_list(data)
 37 |         output = model(data)
 38 |      
 39 | 
 40 |         # get hidden features
 41 |         for i in range(num_output):
 42 |             out_features[i] = out_features[i].view(out_features[i].size(0), out_features[i].size(1), -1)
 43 |             out_features[i] = torch.mean(out_features[i].data, 2)
 44 |             #TEMP
 45 |             # if i == 4: 
 46 |             #     out_features[i] = out_features[i] / out_features[i].norm(p=2, dim=1, keepdim=True)
 47 | 
 48 | 
 49 |         # compute the accuracy
 50 |         pred = output.data.max(1)[1]
 51 |         equal_flag = pred.eq(target.cuda()).cpu()
 52 |         correct += equal_flag.sum()
 53 | 
 54 |         # construct the sample matrix
 55 |         for i in range(data.size(0)):
 56 |             label = target[i]
 57 |             if num_sample_per_class[label] == 0:
 58 |                 out_count = 0
 59 |                 for out in out_features:
 60 |                     list_features[out_count][label] = out[i].view(1, -1)
 61 |                     out_count += 1
 62 |             else:
 63 |                 out_count = 0
 64 |                 for out in out_features:
 65 |                     list_features[out_count][label] \
 66 |                     = torch.cat((list_features[out_count][label], out[i].view(1, -1)), 0)
 67 |                     out_count += 1
 68 |             num_sample_per_class[label] += 1
 69 | 
 70 |     sample_class_mean = []
 71 |     out_count = 0
 72 |     for num_feature in feature_list:
 73 |         temp_list = torch.Tensor(num_classes, int(num_feature)).cuda()
 74 |         for j in range(num_classes):
 75 |             temp_list[j] = torch.mean(list_features[out_count][j], 0)
 76 |         sample_class_mean.append(temp_list)
 77 |         out_count += 1
 78 | 
 79 |     precision = []
 80 |     for k in range(num_output):
 81 |         X = 0
 82 |         for i in range(num_classes):
 83 |             if i == 0:
 84 |                 X = list_features[k][i] - sample_class_mean[k][i]
 85 |             else:
 86 |                 X = torch.cat((X, list_features[k][i] - sample_class_mean[k][i]), 0)
 87 | 
 88 |         # find inverse
 89 |         group_lasso.fit(X.cpu().numpy())
 90 |         temp_precision = group_lasso.precision_
 91 |         temp_precision = torch.from_numpy(temp_precision).double().cuda()
 92 |         precision.append(temp_precision)
 93 | 
 94 |     print('\n Training Accuracy:({:.2f}%)\n'.format(100. * correct / total))
 95 | 
 96 |     return sample_class_mean, precision
 97 | 
 98 | def get_Mahalanobis_score(inputs, model, num_classes, sample_mean, precision, num_output, magnitude):
 99 |     '''
100 |     e.g. when layer_index = 0, out_features.shape: 80, 24, 32, 32
101 |          -- after flatten--> 80, 24, 1024 --after taken mean --> shape: 80, 24 
102 |     i.e. channel info is reserved
103 |     '''
104 |     for layer_index in range(num_output):
105 |         data = Variable(inputs, requires_grad = True)
106 |         data = data.cuda() #shape 10,3,32,32
107 | 
108 |         out_features = model.encoder.intermediate_forward(data, layer_index)
109 |         # out_features = model.module.intermediate_forward(data, layer_index)
110 |         out_features = out_features.view(out_features.size(0), out_features.size(1), -1) 
111 |         out_features = torch.mean(out_features, 2) 
112 |         #TEMP
113 |         # if layer_index  == 4: 
114 |         #         out_features = out_features / out_features.norm(p=2, dim=1, keepdim=True)
115 | 
116 |         gaussian_score = 0
117 |         for i in range(num_classes):
118 |             batch_sample_mean = sample_mean[layer_index][i]
119 |             zero_f = (out_features.data - batch_sample_mean).double()
120 |             term_gau = -0.5*torch.mm(torch.mm(zero_f, precision[layer_index]), zero_f.t()).diag()
121 |             if i == 0:
122 |                 gaussian_score = term_gau.view(-1,1)
123 |             else:
124 |                 gaussian_score = torch.cat((gaussian_score, term_gau.view(-1,1)), 1)
125 | 
126 |         # Input_processing
127 |         sample_pred = gaussian_score.max(1)[1]
128 |         batch_sample_mean = sample_mean[layer_index].index_select(0, sample_pred)
129 |         zero_f = (out_features - batch_sample_mean).double()
130 |         pure_gau = -0.5*torch.mm(torch.mm(zero_f, Variable(precision[layer_index])), zero_f.t()).diag()
131 |         loss = torch.mean(-pure_gau)
132 |         loss.backward()
133 | 
134 |         gradient =  torch.ge(data.grad.data, 0)
135 |         gradient = (gradient.float() - 0.5) * 2
136 | 
137 |         tempInputs = torch.add(data.data, -magnitude, gradient)
138 | 
139 |         noise_out_features = model.encoder.intermediate_forward(Variable(tempInputs), layer_index)
140 |         # noise_out_features = model.module.intermediate_forward(Variable(tempInputs), layer_index)
141 |         noise_out_features = noise_out_features.view(noise_out_features.size(0), noise_out_features.size(1), -1)
142 |         noise_out_features = torch.mean(noise_out_features, 2)
143 |         #TEMP
144 |         # if layer_index == 4: 
145 |         #         noise_out_features = noise_out_features / noise_out_features.norm(p=2, dim=1, keepdim=True)
146 |         noise_gaussian_score = 0
147 |         for i in range(num_classes):
148 |             batch_sample_mean = sample_mean[layer_index][i]
149 |             zero_f = (noise_out_features.data - batch_sample_mean).double()
150 |             term_gau = -0.5*torch.mm(torch.mm(zero_f, precision[layer_index]), zero_f.t()).diag()
151 |             if i == 0:
152 |                 noise_gaussian_score = term_gau.view(-1,1)
153 |             else:
154 |                 noise_gaussian_score = torch.cat((noise_gaussian_score, term_gau.view(-1,1)), 1)
155 | 
156 |         noise_gaussian_score, _ = torch.max(noise_gaussian_score, dim=1)
157 | 
158 |         noise_gaussian_score = np.asarray(noise_gaussian_score.cpu().numpy(), dtype=np.float32)
159 |         if layer_index == 0:
160 |             Mahalanobis_scores = noise_gaussian_score.reshape((noise_gaussian_score.shape[0], -1))
161 |         else:
162 |             Mahalanobis_scores = np.concatenate((Mahalanobis_scores, noise_gaussian_score.reshape((noise_gaussian_score.shape[0], -1))), axis=1)
163 | 
164 |     return Mahalanobis_scores


--------------------------------------------------------------------------------
/training_from_scratch/utils/svhn_loader.py:
--------------------------------------------------------------------------------
  1 | import torch.utils.data as data
  2 | from PIL import Image
  3 | import os
  4 | import os.path
  5 | import numpy as np
  6 | 
  7 | 
  8 | class SVHN(data.Dataset):
  9 |     url = ""
 10 |     filename = ""
 11 |     file_md5 = ""
 12 | 
 13 |     split_list = {
 14 |         'train': ["http://ufldl.stanford.edu/housenumbers/train_32x32.mat",
 15 |                   "train_32x32.mat", "e26dedcc434d2e4c54c9b2d4a06d8373"],
 16 |         'test': ["http://ufldl.stanford.edu/housenumbers/test_32x32.mat",
 17 |                  "selected_test_32x32.mat", "eb5a983be6a315427106f1b164d9cef3"],
 18 |         'extra': ["http://ufldl.stanford.edu/housenumbers/extra_32x32.mat",
 19 |                   "extra_32x32.mat", "a93ce644f1a588dc4d68dda5feec44a7"],
 20 |         'train_and_extra': [
 21 |                 ["http://ufldl.stanford.edu/housenumbers/train_32x32.mat",
 22 |                  "train_32x32.mat", "e26dedcc434d2e4c54c9b2d4a06d8373"],
 23 |                 ["http://ufldl.stanford.edu/housenumbers/extra_32x32.mat",
 24 |                  "extra_32x32.mat", "a93ce644f1a588dc4d68dda5feec44a7"]]}
 25 | 
 26 |     def __init__(self, root, split='train',
 27 |                  transform=None, target_transform=None, download=False):
 28 |         self.root = root
 29 |         self.transform = transform
 30 |         self.target_transform = target_transform
 31 |         self.split = split  # training set or test set or extra set
 32 | 
 33 |         if self.split not in self.split_list:
 34 |             raise ValueError('Wrong split entered! Please use split="train" '
 35 |                              'or split="extra" or split="test" '
 36 |                              'or split="train_and_extra" ')
 37 | 
 38 |         if self.split == "train_and_extra":
 39 |             self.url = self.split_list[split][0][0]
 40 |             self.filename = self.split_list[split][0][1]
 41 |             self.file_md5 = self.split_list[split][0][2]
 42 |         else:
 43 |             self.url = self.split_list[split][0]
 44 |             self.filename = self.split_list[split][1]
 45 |             self.file_md5 = self.split_list[split][2]
 46 | 
 47 |         # import here rather than at top of file because this is
 48 |         # an optional dependency for torchvision
 49 |         import scipy.io as sio
 50 | 
 51 |         # reading(loading) mat file as array
 52 |         loaded_mat = sio.loadmat(os.path.join(root, self.filename))
 53 | 
 54 |         if self.split == "test":
 55 |             self.data = loaded_mat['X']
 56 |             self.targets = loaded_mat['y']
 57 |             # Note label 10 == 0 so modulo operator required
 58 |             self.targets = (self.targets % 10).squeeze()    # convert to zero-based indexing
 59 |             self.data = np.transpose(self.data, (3, 2, 0, 1))
 60 |         else:
 61 |             self.data = loaded_mat['X']
 62 |             self.targets = loaded_mat['y']
 63 | 
 64 |             if self.split == "train_and_extra":
 65 |                 extra_filename = self.split_list[split][1][1]
 66 |                 loaded_mat = sio.loadmat(os.path.join(root, extra_filename))
 67 |                 self.data = np.concatenate([self.data,
 68 |                                                   loaded_mat['X']], axis=3)
 69 |                 self.targets = np.vstack((self.targets,
 70 |                                                loaded_mat['y']))
 71 |             # Note label 10 == 0 so modulo operator required
 72 |             self.targets = (self.targets % 10).squeeze()    # convert to zero-based indexing
 73 |             self.data = np.transpose(self.data, (3, 2, 0, 1))
 74 | 
 75 |     def __getitem__(self, index):
 76 |         if self.split == "test":
 77 |             img, target = self.data[index], self.targets[index]
 78 |         else:
 79 |             img, target = self.data[index], self.targets[index]
 80 | 
 81 |         # doing this so that it is consistent with all other datasets
 82 |         # to return a PIL Image
 83 |         img = Image.fromarray(np.transpose(img, (1, 2, 0)))
 84 | 
 85 |         if self.transform is not None:
 86 |             img = self.transform(img)
 87 | 
 88 |         if self.target_transform is not None:
 89 |             target = self.target_transform(target)
 90 | 
 91 |         return img, target.astype(np.long)
 92 | 
 93 |     def __len__(self):
 94 |         if self.split == "test":
 95 |             return len(self.data)
 96 |         else:
 97 |             return len(self.data)
 98 | 
 99 |     def _check_integrity(self):
100 |         root = self.root
101 |         if self.split == "train_and_extra":
102 |             md5 = self.split_list[self.split][0][2]
103 |             fpath = os.path.join(root, self.filename)
104 |             train_integrity = check_integrity(fpath, md5)
105 |             extra_filename = self.split_list[self.split][1][1]
106 |             md5 = self.split_list[self.split][1][2]
107 |             fpath = os.path.join(root, extra_filename)
108 |             return check_integrity(fpath, md5) and train_integrity
109 |         else:
110 |             md5 = self.split_list[self.split][2]
111 |             fpath = os.path.join(root, self.filename)
112 |             return check_integrity(fpath, md5)
113 | 
114 |     def download(self):
115 |         if self.split == "train_and_extra":
116 |             md5 = self.split_list[self.split][0][2]
117 |             download_url(self.url, self.root, self.filename, md5)
118 |             extra_filename = self.split_list[self.split][1][1]
119 |             md5 = self.split_list[self.split][1][2]
120 |             download_url(self.url, self.root, extra_filename, md5)
121 |         else:
122 |             md5 = self.split_list[self.split][2]
123 |             download_url(self.url, self.root, self.filename, md5)
124 | 


--------------------------------------------------------------------------------
/training_from_scratch/utils/tinyimages_80mn_loader.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from bisect import bisect_left
 4 | 
 5 | class TinyImages(torch.utils.data.Dataset):
 6 | 
 7 |     def __init__(self, transform=None, exclude_cifar=True):
 8 | 
 9 |         # data_file = open('datasets/unlabeled_datasets/80M_Tiny_Images/tiny_images.bin', "rb")
10 |         data_file = open('/nobackup-slow/dataset/80million/tiny_images.bin', "rb")
11 |         def load_image(idx):
12 |             data_file.seek(idx * 3072)
13 |             data = data_file.read(3072)
14 |             return np.fromstring(data, dtype='uint8').reshape(32, 32, 3, order="F")
15 | 
16 |         self.load_image = load_image
17 |         self.offset = 0     # offset index
18 | 
19 |         self.transform = transform
20 |         self.exclude_cifar = exclude_cifar
21 | 
22 |         if exclude_cifar:
23 |             self.cifar_idxs = []
24 |             label_path = '/u/a/l/alvinming/ood/Atom/informative-outlier-mining/datasets/unlabeled_datasets/80M_Tiny_Images/80mn_cifar_idxs.txt'
25 |             with open(label_path, 'r') as idxs:
26 |                 for idx in idxs:
27 |                     # indices in file take the 80mn database to start at 1, hence "- 1"
28 |                     self.cifar_idxs.append(int(idx) - 1)
29 | 
30 |             # hash table option
31 |             self.cifar_idxs = set(self.cifar_idxs)
32 |             self.in_cifar = lambda x: x in self.cifar_idxs
33 | 
34 |     def __getitem__(self, index):
35 |         index = (index + self.offset) % 79302016
36 | 
37 |         if self.exclude_cifar:
38 |             while self.in_cifar(index):
39 |                 index = np.random.randint(79302017)
40 | 
41 |         img = self.load_image(index)
42 |         if self.transform is not None:
43 |             img = self.transform(img)
44 | 
45 |         return img, 0  # 0 is the class
46 | 
47 |     def __len__(self):
48 |         return 79302017
49 | 


--------------------------------------------------------------------------------
/training_from_scratch/utils/transform.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from PIL import Image
  4 | import collections
  5 | 
  6 | 
  7 | class Scale(object):
  8 |     def __init__(self, size, interpolation=Image.BILINEAR):
  9 |         assert isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size) == 2)
 10 |         self.size = size
 11 |         self.interpolation = interpolation
 12 | 
 13 |     def __call__(self, img):
 14 |         if isinstance(self.size, int):
 15 |             w, h = img.size
 16 |             if (w <= h and w == self.size) or (h <= w and h == self.size):
 17 |                 return img
 18 |             if w < h:
 19 |                 ow = self.size
 20 |                 oh = int(self.size * h / w)
 21 |                 return img.resize((ow, oh), self.interpolation)
 22 |             else:
 23 |                 oh = self.size
 24 |                 ow = int(self.size * w / h)
 25 |                 return img.resize((ow, oh), self.interpolation)
 26 |         else:
 27 |             return img.resize(self.size, self.interpolation)
 28 | 
 29 | 
 30 | class ToParallel(object):
 31 |     def __init__(self, transforms):
 32 |         self.transforms = transforms
 33 | 
 34 |     def __call__(self, img):
 35 |         yield img
 36 |         for t in self.transforms:
 37 |             yield t(img)
 38 | 
 39 | 
 40 | class labelflip(object):
 41 |     def __call__(self, img):
 42 |         if np.random.randint(0,2)==1:
 43 |             img = np.array(img)
 44 |             labels = np.unique(img)
 45 |             index = np.random.randint(0,labels.shape[0])
 46 |             randlabel = np.random.randint(1,22) 
 47 |             img[img==labels[index]]=randlabel
 48 |             img = Image.fromarray(img)
 49 |             return img
 50 |         else:
 51 |             return img
 52 | 
 53 | class labelnoise(object):
 54 |     def __call__(self, img):
 55 |         img = np.array(img)
 56 |         size = img.shape
 57 |         noise = np.random.randint(1,22,(size[0],size[1]))
 58 |         p = np.random.uniform(0, 0.8)
 59 |         flipprob = np.random.binomial(1, p, size[0]*size[1]) #biased
 60 |         flipprob= np.reshape(flipprob,(size[0],size[1]))
 61 |         img[flipprob==1]=noise[flipprob==1]
 62 |         img = Image.fromarray(img)
 63 |         return img
 64 | 
 65 | 
 66 | class ToLabel(object):
 67 |     def __call__(self, inputs):
 68 |         tensors = []
 69 |         # for i in inputs:
 70 |         #     tensors.append(torch.from_numpy(np.array(i)).long())
 71 |         return (torch.from_numpy(np.array(inputs)).long())
 72 | 
 73 | 
 74 | class Todepth(object):
 75 |     def __call__(self, inputs):
 76 |         tensors = []
 77 |         # for i in inputs:
 78 |         #     tensors.append(torch.from_numpy(np.array(i)).long())
 79 |         return (torch.from_numpy(np.array(inputs)).float())
 80 | 
 81 | 
 82 | class ReLabel(object):
 83 |     def __init__(self, olabel, nlabel):
 84 |         self.olabel = olabel
 85 |         self.nlabel = nlabel
 86 | 
 87 |     def __call__(self, inputs):
 88 |         # assert isinstance(input, torch.LongTensor), 'tensor needs to be LongTensor'
 89 |         for i in inputs:
 90 |             i[i == self.olabel] = self.nlabel
 91 |         return inputs
 92 | 
 93 | 
 94 | class ToSP(object):
 95 |     def __init__(self, size):
 96 |         self.scale2 = Scale(size/2, Image.NEAREST)
 97 |         self.scale4 = Scale(size/4, Image.NEAREST)
 98 |         self.scale8 = Scale(size/8, Image.NEAREST)
 99 |         self.scale16 = Scale(size/16, Image.NEAREST)
100 |         self.scale32 = Scale(size/32, Image.NEAREST)
101 | 
102 |     def __call__(self, input):
103 |         input2 = self.scale2(input)
104 |         input4 = self.scale4(input)
105 |         input8 = self.scale8(input)
106 |         input16 = self.scale16(input)
107 |         input32 = self.scale32(input)
108 |         inputs = [input, input2, input4, input8, input16, input32]
109 |         # inputs = [input]
110 | 
111 |         return inputs
112 | 
113 | 
114 | class HorizontalFlip(object):
115 |     """Horizontally flips the given PIL.Image with a probability of 0.5."""
116 | 
117 |     def __call__(self, img):
118 |         return img.transpose(Image.FLIP_LEFT_RIGHT)
119 | 
120 | 
121 | class VerticalFlip(object):
122 |     def __call__(self, img):
123 |         return img.transpose(Image.FLIP_TOP_BOTTOM)
124 | 
125 | def uint82bin(n, count=8):
126 |     """returns the binary of integer n, count refers to amount of bits"""
127 |     return ''.join([str((n >> y) & 1) for y in range(count-1, -1, -1)])
128 | 
129 | def labelcolormap(N):
130 |     cmap = np.zeros((N, 3), dtype=np.uint8)
131 |     for i in range(N):
132 |         r = 0
133 |         g = 0
134 |         b = 0
135 |         id = i
136 |         for j in range(7):
137 |             str_id = uint82bin(id)
138 |             r = r ^ (np.uint8(str_id[-1]) << (7-j))
139 |             g = g ^ (np.uint8(str_id[-2]) << (7-j))
140 |             b = b ^ (np.uint8(str_id[-3]) << (7-j))
141 |             id = id >> 3
142 |         cmap[i, 0] = r
143 |         cmap[i, 1] = g
144 |         cmap[i, 2] = b
145 |     return cmap
146 | 
147 | def colormap(n):
148 |     cmap = np.zeros([n, 3]).astype(np.uint8)
149 | 
150 |     for i in np.arange(n):
151 |         r, g, b = np.zeros(3)
152 | 
153 |         for j in np.arange(8):
154 |             r = r + (1 << (7-j))*((i & (1 << (3*j))) >> (3*j))
155 |             g = g + (1 << (7-j))*((i & (1 << (3*j+1))) >> (3*j+1))
156 |             b = b + (1 << (7-j))*((i & (1 << (3*j+2))) >> (3*j+2))
157 | 
158 |         cmap[i, :] = np.array([r, g, b])
159 | 
160 |     return cmap
161 | 
162 | 
163 | class Colorize(object):
164 |     def __init__(self, n=22):
165 |         self.cmap = labelcolormap(22)
166 |         self.cmap = torch.from_numpy(self.cmap[:n])
167 | 
168 |     def __call__(self, gray_image):
169 |         size = gray_image.size()
170 |         color_image = torch.ByteTensor(3, size[1], size[2]).fill_(0)
171 | 
172 |         for label in range(0, len(self.cmap)):
173 |             mask = (label == gray_image[0]).cpu()
174 |             color_image[0][mask] = self.cmap[label][0]
175 |             color_image[1][mask] = self.cmap[label][1]
176 |             color_image[2][mask] = self.cmap[label][2]
177 | 
178 |         return color_image
179 | 


--------------------------------------------------------------------------------
/training_from_scratch/utils/vmf_lib.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import torch
  3 | import torch.nn.functional as F
  4 | import numpy as np
  5 | import argparse
  6 | import pandas as pd
  7 | import seaborn as sns
  8 | import matplotlib
  9 | matplotlib.use('AGG')
 10 | import matplotlib.pyplot as plt
 11 | import sklearn
 12 | import mpmath
 13 | from sklearn import covariance
 14 | 
 15 | recall_level_default = 0.95
 16 | 
 17 | 
 18 | parser = argparse.ArgumentParser(description='Evaluates an OOD Detector',
 19 |                                  formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 20 | parser.add_argument('--T', default=1., type=float, help='temperature: energy|Odin')
 21 | parser.add_argument('--name', default=1., type=str)
 22 | parser.add_argument('--use_es', default=0, type=int)
 23 | 
 24 | args = parser.parse_args()
 25 | 
 26 | 
 27 | 
 28 | 
 29 | 
 30 | class vMFLogPartition(torch.autograd.Function):
 31 |     '''
 32 |     Evaluates log C_d(kappa) for vMF density
 33 |     Allows autograd wrt kappa
 34 |     '''
 35 | 
 36 |     besseli = np.vectorize(mpmath.besseli)
 37 |     log = np.vectorize(mpmath.log)
 38 |     nhlog2pi = -0.5 * np.log(2 * np.pi)
 39 | 
 40 |     @staticmethod
 41 |     def forward(ctx, *args):
 42 | 
 43 |         '''
 44 |         Args:
 45 |             args[0] = d; scalar (> 0)
 46 |             args[1] = kappa; (> 0) torch tensor of any shape
 47 | 
 48 |         Returns:
 49 |             logC = log C_d(kappa); torch tensor of the same shape as kappa
 50 |         '''
 51 | 
 52 |         d = args[0]
 53 |         kappa = args[1]
 54 | 
 55 |         s = 0.5 * d - 1
 56 | 
 57 |         # log I_s(kappa)
 58 |         mp_kappa = mpmath.mpf(1.0) * kappa.detach().cpu().numpy()
 59 |         mp_logI = vMFLogPartition.log(vMFLogPartition.besseli(s, mp_kappa))
 60 |         logI = torch.from_numpy(np.array(mp_logI.tolist(), dtype=float)).to(kappa)
 61 | 
 62 |         if (logI != logI).sum().item() > 0:  # there is nan
 63 |             raise ValueError('NaN is detected from the output of log-besseli()')
 64 | 
 65 |         logC = d * vMFLogPartition.nhlog2pi + s * kappa.log() - logI
 66 | 
 67 |         # save for backard()
 68 |         ctx.s, ctx.mp_kappa, ctx.logI = s, mp_kappa, logI
 69 | 
 70 |         return logC
 71 | 
 72 |     @staticmethod
 73 |     def backward(ctx, *grad_output):
 74 | 
 75 |         s, mp_kappa, logI = ctx.s, ctx.mp_kappa, ctx.logI
 76 | 
 77 |         # log I_{s+1}(kappa)
 78 |         mp_logI2 = vMFLogPartition.log(vMFLogPartition.besseli(s + 1, mp_kappa))
 79 |         logI2 = torch.from_numpy(np.array(mp_logI2.tolist(), dtype=float)).to(logI)
 80 | 
 81 |         if (logI2 != logI2).sum().item() > 0:  # there is nan
 82 |             raise ValueError('NaN is detected from the output of log-besseli()')
 83 | 
 84 |         dlogC_dkappa = -(logI2 - logI).exp()
 85 | 
 86 |         return None, grad_output[0] * dlogC_dkappa
 87 | 
 88 | 
 89 | 
 90 | # def density(mu, kappa, samples):
 91 | #     mu = torch.from_numpy(mu)
 92 | #     kappa = torch.from_numpy(np.asarray(kappa))
 93 | #     samples = torch.from_numpy(samples)
 94 | #     dotp = (mu.unsqueeze(0) * samples).sum(1)
 95 | #     # breakpoint()
 96 | #     logC = vMFLogPartition.apply(len(mu), kappa.float())
 97 | #     logliks = kappa * dotp + logC
 98 | 
 99 | #     return logliks
100 | 
101 | 


--------------------------------------------------------------------------------