├── data └── imagenet │ └── labels.zip ├── README.md ├── cal_map.py ├── cifar2.py ├── unsupervised_vgg.py ├── cifar1.py └── imagenet.py /data/imagenet/labels.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssppp/GreedyHash/HEAD/data/imagenet/labels.zip -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GreedyHash 2 | 3 | Pytorch implementation for "Greedy Hash: Towards Fast Optimization for Accurate Hash Coding in CNN" (NIPS2018). 4 | 5 | - running environment: pytorch 0.3 and python 2.7 6 | 7 | - please run several times and take the average (as the result is a little bit unstable) 8 | 9 | - you can download the ImageNet dataset [here](https://github.com/thuml/HashNet/tree/master/caffe) and please extract the file in ./data/imagenet 10 | -------------------------------------------------------------------------------- /cal_map.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Variable 2 | import numpy as np 3 | 4 | 5 | def compress(train, test, model, classes=10): 6 | retrievalB = list([]) 7 | retrievalL = list([]) 8 | for batch_step, (data, target) in enumerate(train): 9 | var_data = Variable(data.cuda()) 10 | _,_, code = model(var_data) 11 | retrievalB.extend(code.cpu().data.numpy()) 12 | retrievalL.extend(target) 13 | 14 | queryB = list([]) 15 | queryL = list([]) 16 | for batch_step, (data, target) in enumerate(test): 17 | var_data = Variable(data.cuda()) 18 | _,_, code = model(var_data) 19 | queryB.extend(code.cpu().data.numpy()) 20 | queryL.extend(target) 21 | 22 | retrievalB = np.array(retrievalB) 23 | retrievalL = np.eye(classes)[np.array(retrievalL)] 24 | 25 | queryB = np.array(queryB) 26 | queryL = np.eye(classes)[np.array(queryL)] 27 | return retrievalB, retrievalL, queryB, queryL 28 | 29 | 30 | def calculate_hamming(B1, B2): 31 | """ 32 | :param B1: vector [n] 33 | :param B2: vector [r*n] 34 | :return: hamming distance [r] 35 | """ 36 | q = B2.shape[1] # max inner product value 37 | distH = 0.5 * (q - np.dot(B1, B2.transpose())) 38 | return distH 39 | 40 | 41 | def calculate_map(qB, rB, queryL, retrievalL): 42 | """ 43 | :param qB: {-1,+1}^{mxq} query bits 44 | :param rB: {-1,+1}^{nxq} retrieval bits 45 | :param queryL: {0,1}^{mxl} query label 46 | :param retrievalL: {0,1}^{nxl} retrieval label 47 | :return: 48 | """ 49 | num_query = queryL.shape[0] 50 | map = 0 51 | for iter in range(num_query): 52 | # gnd : check if exists any retrieval items with same label 53 | gnd = (np.dot(queryL[iter, :], retrievalL.transpose()) > 0).astype(np.float32) 54 | # tsum number of items with same label 55 | tsum = np.sum(gnd) 56 | if tsum == 0: 57 | continue 58 | # sort gnd by hamming dist 59 | hamm = calculate_hamming(qB[iter, :], rB) 60 | ind = np.argsort(hamm) 61 | gnd = gnd[ind] 62 | 63 | count = np.linspace(1, tsum, tsum) # [1,2, tsum] 64 | tindex = np.asarray(np.where(gnd == 1)) + 1.0 65 | map_ = np.mean(count / (tindex)) 66 | # print(map_) 67 | map = map + map_ 68 | map = map / num_query 69 | return map 70 | 71 | 72 | def calculate_top_map(qB, rB, queryL, retrievalL, topk): 73 | """ 74 | :param qB: {-1,+1}^{mxq} query bits 75 | :param rB: {-1,+1}^{nxq} retrieval bits 76 | :param queryL: {0,1}^{mxl} query label 77 | :param retrievalL: {0,1}^{nxl} retrieval label 78 | :param topk: 79 | :return: 80 | """ 81 | num_query = queryL.shape[0] 82 | topkmap = 0 83 | for iter in range(num_query): 84 | gnd = (np.dot(queryL[iter, :], retrievalL.transpose()) > 0).astype(np.float32) 85 | hamm = calculate_hamming(qB[iter, :], rB) 86 | ind = np.argsort(hamm) 87 | gnd = gnd[ind] 88 | 89 | tgnd = gnd[0:topk] 90 | tsum = np.sum(tgnd) 91 | if tsum == 0: 92 | continue 93 | count = np.linspace(1, tsum, tsum) 94 | 95 | tindex = np.asarray(np.where(tgnd == 1)) + 1.0 96 | topkmap_ = np.mean(count / (tindex)) 97 | # print(topkmap_) 98 | topkmap = topkmap + topkmap_ 99 | topkmap = topkmap / num_query 100 | return topkmap 101 | 102 | -------------------------------------------------------------------------------- /cifar2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Function 5 | import torchvision.datasets as dsets 6 | from torchvision import transforms 7 | from torch.autograd import Variable 8 | import torchvision 9 | import math 10 | import numpy as np 11 | from cal_map import calculate_map, compress 12 | 13 | 14 | # Hyper Parameters 15 | num_epochs = 40 16 | batch_size = 32 17 | epoch_lr_decrease = 20 18 | learning_rate = 0.001 19 | encode_length = 16 20 | num_classes = 10 21 | 22 | 23 | train_transform = transforms.Compose([ 24 | transforms.Scale(224), 25 | transforms.ToTensor(), 26 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 27 | ]) 28 | 29 | test_transform = transforms.Compose([ 30 | transforms.Scale(224), 31 | transforms.ToTensor(), 32 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 33 | ]) 34 | 35 | # Dataset 36 | train_dataset = dsets.CIFAR10(root='data/', 37 | train=True, 38 | transform=train_transform, 39 | download=True) 40 | 41 | test_dataset = dsets.CIFAR10(root='data/', 42 | train=False, 43 | transform=test_transform) 44 | 45 | 46 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 47 | batch_size=batch_size, 48 | shuffle=True, 49 | num_workers=4) 50 | 51 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 52 | batch_size=batch_size, 53 | shuffle=False, 54 | num_workers=4) 55 | 56 | 57 | # new layer 58 | class hash(Function): 59 | @staticmethod 60 | def forward(ctx, input): 61 | #ctx.save_for_backward(input) 62 | return torch.sign(input) 63 | 64 | @staticmethod 65 | def backward(ctx, grad_output): 66 | #input, = ctx.saved_tensors 67 | #grad_output = grad_output.data 68 | 69 | return grad_output 70 | 71 | 72 | def hash_layer(input): 73 | return hash.apply(input) 74 | 75 | 76 | class CNN(nn.Module): 77 | def __init__(self, encode_length, num_classes): 78 | super(CNN, self).__init__() 79 | self.alex = torchvision.models.alexnet(pretrained=True) 80 | self.alex.classifier = nn.Sequential(*list(self.alex.classifier.children())[:6]) 81 | self.fc_plus = nn.Linear(4096, encode_length) 82 | self.fc = nn.Linear(encode_length, num_classes, bias=False) 83 | 84 | def forward(self, x): 85 | x = self.alex.features(x) 86 | x = x.view(x.size(0), 256 * 6 * 6) 87 | x = self.alex.classifier(x) 88 | x = self.fc_plus(x) 89 | code = hash_layer(x) 90 | output = self.fc(code) 91 | 92 | return output, x, code 93 | 94 | 95 | cnn = CNN(encode_length=encode_length, num_classes=num_classes) 96 | #cnn.load_state_dict(torch.load('temp.pkl')) 97 | 98 | 99 | # Loss and Optimizer 100 | criterion = nn.CrossEntropyLoss().cuda() 101 | optimizer = torch.optim.SGD(cnn.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4) 102 | 103 | 104 | def adjust_learning_rate(optimizer, epoch): 105 | lr = learning_rate * (0.1 ** (epoch // epoch_lr_decrease)) 106 | for param_group in optimizer.param_groups: 107 | param_group['lr'] = lr 108 | 109 | 110 | best = 0.0 111 | 112 | # Train the Model 113 | for epoch in range(num_epochs): 114 | cnn.cuda().train() 115 | adjust_learning_rate(optimizer, epoch) 116 | for i, (images, labels) in enumerate(train_loader): 117 | images = Variable(images.cuda()) 118 | labels = Variable(labels.cuda()) 119 | 120 | # Forward + Backward + Optimize 121 | optimizer.zero_grad() 122 | outputs, feature, _ = cnn(images) 123 | loss1 = criterion(outputs, labels) 124 | #loss2 = F.mse_loss(torch.abs(feature), Variable(torch.ones(feature.size()).cuda())) 125 | loss2 = torch.mean(torch.abs(torch.pow(torch.abs(feature) - Variable(torch.ones(feature.size()).cuda()), 3))) 126 | loss = loss1 + 0.1 * loss2 127 | loss.backward() 128 | optimizer.step() 129 | 130 | if (i + 1) % (len(train_dataset) // batch_size / 2) == 0: 131 | print ('Epoch [%d/%d], Iter [%d/%d] Loss1: %.4f Loss2: %.4f' 132 | % (epoch + 1, num_epochs, i + 1, len(train_dataset) // batch_size, 133 | loss1.data[0], loss2.data[0])) 134 | 135 | # Test the Model 136 | cnn.eval() # Change model to 'eval' mode 137 | correct = 0 138 | total = 0 139 | for images, labels in test_loader: 140 | images = Variable(images.cuda(), volatile=True) 141 | outputs, _, _ = cnn(images) 142 | _, predicted = torch.max(outputs.cpu().data, 1) 143 | total += labels.size(0) 144 | correct += (predicted == labels).sum() 145 | 146 | print('Test Accuracy of the model: %.2f %%' % (100.0 * correct / total)) 147 | 148 | if 1.0 * correct / total > best: 149 | best = 1.0 * correct / total 150 | torch.save(cnn.state_dict(), 'temp.pkl') 151 | 152 | print('best: %.2f %%' % (best * 100.0)) 153 | 154 | 155 | # Save the Trained Model 156 | torch.save(cnn.state_dict(), 'cifar2.pkl') 157 | 158 | 159 | # Calculate MAP 160 | #cnn.load_state_dict(torch.load('temp.pkl')) 161 | cnn.eval() 162 | retrievalB, retrievalL, queryB, queryL = compress(train_loader, test_loader, cnn) 163 | print(np.shape(retrievalB)) 164 | print(np.shape(retrievalL)) 165 | print(np.shape(queryB)) 166 | print(np.shape(queryL)) 167 | 168 | print('---calculate map---') 169 | result = calculate_map(qB=queryB, rB=retrievalB, queryL=queryL, retrievalL=retrievalL) 170 | print(result) 171 | -------------------------------------------------------------------------------- /unsupervised_vgg.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Function 5 | import torchvision.datasets as dsets 6 | from torchvision import transforms 7 | from torch.autograd import Variable 8 | import torchvision 9 | import math 10 | import numpy as np 11 | from cal_map import calculate_top_map, compress 12 | 13 | 14 | # Hyper Parameters 15 | num_epochs = 60 16 | batch_size = 32 17 | # epoch_lr_decrease = 300 18 | learning_rate = 0.0001 19 | encode_length = 64 20 | 21 | if encode_length == 16: 22 | num_epochs = 300 23 | 24 | 25 | train_transform = transforms.Compose([ 26 | transforms.Scale(224), 27 | transforms.ToTensor(), 28 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 29 | ]) 30 | 31 | test_transform = transforms.Compose([ 32 | transforms.Scale(224), 33 | transforms.ToTensor(), 34 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 35 | ]) 36 | 37 | # Dataset 38 | train_dataset = dsets.CIFAR10(root='data/', 39 | train=True, 40 | transform=train_transform, 41 | download=True) 42 | 43 | test_dataset = dsets.CIFAR10(root='data/', 44 | train=False, 45 | transform=test_transform) 46 | 47 | database_dataset = dsets.CIFAR10(root='data/', 48 | train=True, 49 | transform=test_transform) 50 | 51 | # Data Loader 52 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 53 | batch_size=batch_size, 54 | shuffle=True, 55 | num_workers=4) 56 | 57 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 58 | batch_size=batch_size, 59 | shuffle=False, 60 | num_workers=4) 61 | 62 | database_loader = torch.utils.data.DataLoader(dataset=database_dataset, 63 | batch_size=batch_size, 64 | shuffle=False, 65 | num_workers=4) 66 | 67 | 68 | # new layer 69 | class hash(Function): 70 | @staticmethod 71 | def forward(ctx, input): 72 | # ctx.save_for_backward(input) 73 | return torch.sign(input) 74 | 75 | @staticmethod 76 | def backward(ctx, grad_output): 77 | # input, = ctx.saved_tensors 78 | # grad_output = grad_output.data 79 | 80 | return grad_output 81 | 82 | 83 | def hash_layer(input): 84 | return hash.apply(input) 85 | 86 | 87 | class CNN(nn.Module): 88 | def __init__(self, encode_length): 89 | super(CNN, self).__init__() 90 | self.vgg = torchvision.models.vgg16(pretrained=True) 91 | self.vgg.classifier = nn.Sequential(*list(self.vgg.classifier.children())[:6]) 92 | for param in self.vgg.parameters(): 93 | param.requires_grad = False 94 | self.fc_encode = nn.Linear(4096, encode_length) 95 | 96 | 97 | def forward(self, x): 98 | x = self.vgg.features(x) 99 | x = x.view(x.size(0), -1) 100 | x = self.vgg.classifier(x) 101 | h = self.fc_encode(x) 102 | b = hash_layer(h) 103 | 104 | return x, h, b 105 | 106 | 107 | cnn = CNN(encode_length=encode_length) 108 | #cnn.load_state_dict(torch.load('vgg.pkl')) 109 | 110 | 111 | optimizer = torch.optim.SGD(cnn.fc_encode.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4) 112 | 113 | """ 114 | def adjust_learning_rate(optimizer, epoch): 115 | lr = learning_rate * (0.1 ** (epoch // epoch_lr_decrease)) 116 | for param_group in optimizer.param_groups: 117 | param_group['lr'] = lr 118 | """ 119 | 120 | best = 0.0 121 | 122 | # Train the Model 123 | for epoch in range(num_epochs): 124 | cnn.cuda().train() 125 | # adjust_learning_rate(optimizer, epoch) 126 | for i, (images, labels) in enumerate(train_loader): 127 | images = Variable(images.cuda()) 128 | labels = Variable(labels.cuda()) 129 | 130 | # Forward + Backward + Optimize 131 | optimizer.zero_grad() 132 | x, h, b = cnn(images) 133 | 134 | target_b = F.cosine_similarity(b[:labels.size(0) / 2], b[labels.size(0) / 2:]) 135 | target_x = F.cosine_similarity(x[:labels.size(0) / 2], x[labels.size(0) / 2:]) 136 | loss1 = F.mse_loss(target_b, target_x) 137 | #loss2 = F.mse_loss(torch.abs(h), Variable(torch.ones(h.size()).cuda())) 138 | loss2 = torch.mean(torch.abs(torch.pow(torch.abs(h) - Variable(torch.ones(h.size()).cuda()), 3))) 139 | loss = loss1 + 0.1 * loss2 140 | loss.backward() 141 | optimizer.step() 142 | 143 | if (i + 1) % (len(train_dataset) // batch_size / 1) == 0: 144 | print ('Epoch [%d/%d], Iter [%d/%d] Loss1: %.4f Loss2: %.4f' 145 | % (epoch + 1, num_epochs, i + 1, len(train_dataset) // batch_size, 146 | loss1.data[0], loss2.data[0])) 147 | 148 | # Save the Trained Model 149 | torch.save(cnn.state_dict(), 'vgg.pkl') 150 | 151 | # Test the Model 152 | if (epoch + 1) % 5 == 0: 153 | cnn.eval() 154 | retrievalB, retrievalL, queryB, queryL = compress(train_loader, test_loader, cnn) 155 | # print(np.shape(retrievalB)) 156 | # print(np.shape(retrievalL)) 157 | # print(np.shape(queryB)) 158 | # print(np.shape(queryL)) 159 | """ 160 | print('---calculate map---') 161 | result = calculate_map(qB=queryB, rB=retrievalB, queryL=queryL, retrievalL=retrievalL) 162 | print(result) 163 | """ 164 | print('---calculate top map---') 165 | result = calculate_top_map(qB=queryB, rB=retrievalB, queryL=queryL, retrievalL=retrievalL, topk=1000) 166 | print(result) 167 | 168 | if result > best: 169 | best = result 170 | torch.save(cnn.state_dict(), 'temp.pkl') 171 | 172 | print('best: %.6f' % (best)) 173 | 174 | -------------------------------------------------------------------------------- /cifar1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Function 5 | import torchvision.datasets as dsets 6 | from torchvision import transforms 7 | from torch.autograd import Variable 8 | import torchvision 9 | import math 10 | import numpy as np 11 | from cal_map import calculate_map, compress 12 | 13 | 14 | # Hyper Parameters 15 | num_epochs = 50 16 | batch_size = 32 17 | epoch_lr_decrease = 30 18 | learning_rate = 0.001 19 | encode_length = 12 20 | num_classes = 10 21 | 22 | 23 | train_transform = transforms.Compose([ 24 | transforms.Scale(224), 25 | transforms.ToTensor(), 26 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 27 | ]) 28 | 29 | test_transform = transforms.Compose([ 30 | transforms.Scale(224), 31 | transforms.ToTensor(), 32 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 33 | ]) 34 | 35 | # Dataset 36 | train_dataset = dsets.CIFAR10(root='data/', 37 | train=True, 38 | transform=train_transform, 39 | download=True) 40 | 41 | test_dataset = dsets.CIFAR10(root='data/', 42 | train=False, 43 | transform=test_transform) 44 | 45 | database_dataset = dsets.CIFAR10(root='data/', 46 | train=False, 47 | transform=test_transform) 48 | 49 | 50 | # Construct training, query and database set 51 | X = train_dataset.train_data 52 | L = np.array(train_dataset.train_labels) 53 | 54 | X = np.concatenate((X, test_dataset.test_data)) 55 | L = np.concatenate((L, np.array(test_dataset.test_labels))) 56 | 57 | first = True 58 | 59 | for label in range(10): 60 | index = np.where(L == label)[0] 61 | 62 | N = index.shape[0] 63 | perm = np.random.permutation(N) 64 | index = index[perm] 65 | 66 | 67 | data = X[index[0:100]] 68 | labels = L[index[0:100]] 69 | if first: 70 | test_L = labels 71 | test_data = data 72 | else: 73 | test_L = np.concatenate((test_L, labels)) 74 | test_data = np.concatenate((test_data, data)) 75 | 76 | data = X[index[100:6000]] 77 | labels = L[index[100:6000]] 78 | if first: 79 | dataset_L = labels 80 | data_set = data 81 | else: 82 | dataset_L = np.concatenate((dataset_L, labels)) 83 | data_set = np.concatenate((data_set, data)) 84 | 85 | data = X[index[100:600]] 86 | labels = L[index[100:600]] 87 | if first: 88 | train_L = labels 89 | train_data = data 90 | else: 91 | train_L = np.concatenate((train_L, labels)) 92 | train_data = np.concatenate((train_data, data)) 93 | 94 | first = False 95 | 96 | train_dataset.train_data = train_data 97 | train_dataset.train_labels = train_L 98 | test_dataset.test_data = test_data 99 | test_dataset.test_labels = test_L 100 | database_dataset.test_data = data_set 101 | database_dataset.test_labels = dataset_L 102 | 103 | 104 | # Data Loader 105 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 106 | batch_size=batch_size, 107 | shuffle=True, 108 | num_workers=4) 109 | 110 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 111 | batch_size=batch_size, 112 | shuffle=True, 113 | num_workers=4) 114 | 115 | database_loader = torch.utils.data.DataLoader(dataset=database_dataset, 116 | batch_size=batch_size, 117 | shuffle=True, 118 | num_workers=4) 119 | 120 | # new layer 121 | class hash(Function): 122 | @staticmethod 123 | def forward(ctx, input): 124 | #ctx.save_for_backward(input) 125 | return torch.sign(input) 126 | 127 | @staticmethod 128 | def backward(ctx, grad_output): 129 | #input, = ctx.saved_tensors 130 | #grad_output = grad_output.data 131 | 132 | return grad_output 133 | 134 | 135 | def hash_layer(input): 136 | return hash.apply(input) 137 | 138 | 139 | class CNN(nn.Module): 140 | def __init__(self, encode_length, num_classes): 141 | super(CNN, self).__init__() 142 | self.alex = torchvision.models.alexnet(pretrained=True) 143 | self.alex.classifier = nn.Sequential(*list(self.alex.classifier.children())[:6]) 144 | self.fc_plus = nn.Linear(4096, encode_length) 145 | self.fc = nn.Linear(encode_length, num_classes, bias=False) 146 | 147 | def forward(self, x): 148 | x = self.alex.features(x) 149 | x = x.view(x.size(0), 256 * 6 * 6) 150 | x = self.alex.classifier(x) 151 | x = self.fc_plus(x) 152 | code = hash_layer(x) 153 | output = self.fc(code) 154 | 155 | return output, x, code 156 | 157 | 158 | cnn = CNN(encode_length=encode_length, num_classes=num_classes) 159 | #cnn.load_state_dict(torch.load('temp.pkl')) 160 | 161 | 162 | # Loss and Optimizer 163 | criterion = nn.CrossEntropyLoss().cuda() 164 | optimizer = torch.optim.SGD(cnn.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4) 165 | 166 | 167 | def adjust_learning_rate(optimizer, epoch): 168 | lr = learning_rate * (0.1 ** (epoch // epoch_lr_decrease)) 169 | for param_group in optimizer.param_groups: 170 | param_group['lr'] = lr 171 | 172 | 173 | best = 0.0 174 | 175 | # Train the Model 176 | for epoch in range(num_epochs): 177 | cnn.cuda().train() 178 | adjust_learning_rate(optimizer, epoch) 179 | for i, (images, labels) in enumerate(train_loader): 180 | images = Variable(images.cuda()) 181 | labels = Variable(labels.cuda()) 182 | 183 | # Forward + Backward + Optimize 184 | optimizer.zero_grad() 185 | outputs, feature, _ = cnn(images) 186 | loss1 = criterion(outputs, labels) 187 | #loss2 = F.mse_loss(torch.abs(feature), Variable(torch.ones(feature.size()).cuda())) 188 | loss2 = torch.mean(torch.abs(torch.pow(torch.abs(feature) - Variable(torch.ones(feature.size()).cuda()), 3))) 189 | loss = loss1 + 0.1 * loss2 190 | loss.backward() 191 | optimizer.step() 192 | 193 | if (i + 1) % (len(train_dataset) // batch_size / 2) == 0: 194 | print ('Epoch [%d/%d], Iter [%d/%d] Loss1: %.4f Loss2: %.4f' 195 | % (epoch + 1, num_epochs, i + 1, len(train_dataset) // batch_size, 196 | loss1.data[0], loss2.data[0])) 197 | 198 | # Test the Model 199 | cnn.eval() # Change model to 'eval' mode 200 | correct = 0 201 | total = 0 202 | for images, labels in test_loader: 203 | images = Variable(images.cuda(), volatile=True) 204 | outputs, _, _ = cnn(images) 205 | _, predicted = torch.max(outputs.cpu().data, 1) 206 | total += labels.size(0) 207 | correct += (predicted == labels).sum() 208 | 209 | print('Test Accuracy of the model: %.2f %%' % (100.0 * correct / total)) 210 | 211 | if 1.0 * correct / total > best: 212 | best = 1.0 * correct / total 213 | torch.save(cnn.state_dict(), 'temp.pkl') 214 | 215 | print('best: %.2f %%' % (best * 100.0)) 216 | 217 | 218 | # Save the Trained Model 219 | torch.save(cnn.state_dict(), 'cifar1.pkl') 220 | 221 | 222 | # Calculate MAP 223 | #cnn.load_state_dict(torch.load('temp.pkl')) 224 | cnn.eval() 225 | retrievalB, retrievalL, queryB, queryL = compress(database_loader, test_loader, cnn) 226 | print(np.shape(retrievalB)) 227 | print(np.shape(retrievalL)) 228 | print(np.shape(queryB)) 229 | print(np.shape(queryL)) 230 | 231 | print('---calculate map---') 232 | result = calculate_map(qB=queryB, rB=retrievalB, queryL=queryL, retrievalL=retrievalL) 233 | print(result) 234 | """ 235 | print('---calculate top map---') 236 | result = calculate_top_map(qB=queryB, rB=retrievalB, queryL=queryL, retrievalL=retrievalL, topk=1000) 237 | print(result) 238 | """ 239 | -------------------------------------------------------------------------------- /imagenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Function 5 | import torchvision.datasets as dsets 6 | from torchvision import transforms 7 | from torch.autograd import Variable 8 | import torchvision 9 | import math 10 | import numpy as np 11 | from PIL import Image 12 | import os 13 | import os.path 14 | import matplotlib.image as mpimg 15 | from cal_map import calculate_top_map, compress 16 | 17 | 18 | # Hyper Parameters 19 | num_epochs = 100 20 | batch_size = 32 21 | epoch_lr_decrease = 80 22 | learning_rate = 0.001 23 | encode_length = 16 24 | num_classes = 100 25 | 26 | 27 | def pil_loader(path): 28 | # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835) 29 | with open(path, 'rb') as f: 30 | img = Image.open(f) 31 | return img.convert('RGB') 32 | 33 | 34 | def accimage_loader(path): 35 | import accimage 36 | try: 37 | return accimage.Image(path) 38 | except IOError: 39 | # Potentially a decoding problem, fall back to PIL.Image 40 | return pil_loader(path) 41 | 42 | 43 | def default_loader(path): 44 | from torchvision import get_image_backend 45 | if get_image_backend() == 'accimage': 46 | return accimage_loader(path) 47 | else: 48 | return pil_loader(path) 49 | 50 | 51 | class IMAGENET(torch.utils.data.Dataset): 52 | 53 | def __init__(self, root, 54 | transform=None, target_transform=None, train=True, database_bool=False): 55 | self.loader = default_loader 56 | self.root = os.path.expanduser(root) 57 | self.transform = transform 58 | self.target_transform = target_transform 59 | if train: 60 | self.base_folder = 'train.txt' 61 | elif database_bool: 62 | self.base_folder = 'database.txt' 63 | else: 64 | self.base_folder = 'test.txt' 65 | 66 | self.train_data = [] 67 | self.train_labels = [] 68 | 69 | filename = os.path.join(self.root, self.base_folder) 70 | # fo = open(file, 'rb') 71 | 72 | with open(filename, 'r') as file_to_read: 73 | while True: 74 | lines = file_to_read.readline() 75 | # print lines.split() 76 | if not lines: 77 | break 78 | pos_tmp = lines.split()[0] 79 | # print pos_tmp 80 | pos_tmp = os.path.join(self.root, pos_tmp) 81 | label_tmp = lines.split()[1:] 82 | self.train_data.append(pos_tmp) 83 | self.train_labels.append(label_tmp) 84 | self.train_data = np.array(self.train_data) 85 | # self.train_labels.reshape() 86 | self.train_labels = np.array(self.train_labels, dtype=np.float) 87 | self.train_labels.reshape((-1, num_classes)) 88 | 89 | # self.train_data = np.concatenate(self.train_data) 90 | # self.train_data = self.train_data.reshape((50000, 3, 32, 32)) 91 | # self.train_data = self.train_data.transpose((0, 2, 3, 1)) # convert to HWC 92 | 93 | def __getitem__(self, index): 94 | """ 95 | Args: 96 | index (int): Index 97 | Returns: 98 | tuple: (image, target) where target is index of the target class. 99 | """ 100 | 101 | img, target = self.train_data[index], self.train_labels[index] 102 | target = int(np.where(target == 1)[0]) 103 | 104 | img = self.loader(img) 105 | 106 | if self.transform is not None: 107 | img = self.transform(img) 108 | 109 | if self.target_transform is not None: 110 | target = self.target_transform(target) 111 | 112 | return img, target 113 | 114 | def __len__(self): 115 | return len(self.train_data) 116 | 117 | 118 | train_transform = transforms.Compose([ 119 | transforms.RandomHorizontalFlip(), 120 | transforms.Scale(256), 121 | transforms.RandomCrop(224), 122 | transforms.ToTensor(), 123 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 124 | ]) 125 | 126 | test_transform = transforms.Compose([ 127 | transforms.Scale(256), 128 | transforms.CenterCrop(224), 129 | transforms.ToTensor(), 130 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 131 | ]) 132 | 133 | 134 | # Dataset 135 | train_dataset = IMAGENET(root='data/imagenet', 136 | train=True, 137 | transform=train_transform) 138 | 139 | test_dataset = IMAGENET(root='data/imagenet', 140 | train=False, 141 | transform=test_transform) 142 | 143 | database_dataset = IMAGENET(root='data/imagenet', 144 | train=False, 145 | transform=test_transform, 146 | database_bool=True) 147 | 148 | 149 | # Data Loader 150 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 151 | batch_size=batch_size, 152 | shuffle=True, 153 | num_workers=4) 154 | 155 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 156 | batch_size=batch_size, 157 | shuffle=True, 158 | num_workers=4) 159 | 160 | database_loader = torch.utils.data.DataLoader(dataset=database_dataset, 161 | batch_size=batch_size, 162 | shuffle=True, 163 | num_workers=4) 164 | 165 | # new layer 166 | class hash(Function): 167 | @staticmethod 168 | def forward(ctx, input): 169 | #ctx.save_for_backward(input) 170 | return torch.sign(input) 171 | 172 | @staticmethod 173 | def backward(ctx, grad_output): 174 | #input, = ctx.saved_tensors 175 | #grad_output = grad_output.data 176 | 177 | return grad_output 178 | 179 | 180 | def hash_layer(input): 181 | return hash.apply(input) 182 | 183 | 184 | class CNN(nn.Module): 185 | def __init__(self, encode_length, num_classes): 186 | super(CNN, self).__init__() 187 | self.alex = torchvision.models.alexnet(pretrained=True) 188 | self.alex.classifier = nn.Sequential(*list(self.alex.classifier.children())[:6]) 189 | self.fc_plus = nn.Linear(4096, encode_length) 190 | self.fc = nn.Linear(encode_length, num_classes, bias=False) 191 | 192 | def forward(self, x): 193 | x = self.alex.features(x) 194 | x = x.view(x.size(0), 256 * 6 * 6) 195 | x = self.alex.classifier(x) 196 | x = self.fc_plus(x) 197 | code = hash_layer(x) 198 | output = self.fc(code) 199 | 200 | return output, x, code 201 | 202 | 203 | cnn = CNN(encode_length=encode_length, num_classes=num_classes) 204 | #cnn.load_state_dict(torch.load('temp.pkl')) 205 | 206 | 207 | # Loss and Optimizer 208 | criterion = nn.CrossEntropyLoss().cuda() 209 | optimizer = torch.optim.SGD(cnn.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4) 210 | 211 | 212 | def adjust_learning_rate(optimizer, epoch): 213 | lr = learning_rate * (0.1 ** (epoch // epoch_lr_decrease)) 214 | for param_group in optimizer.param_groups: 215 | param_group['lr'] = lr 216 | 217 | 218 | best = 0.0 219 | 220 | # Train the Model 221 | for epoch in range(num_epochs): 222 | cnn.cuda().train() 223 | adjust_learning_rate(optimizer, epoch) 224 | for i, (images, labels) in enumerate(train_loader): 225 | images = Variable(images.cuda()) 226 | labels = Variable(labels.cuda()) 227 | 228 | # Forward + Backward + Optimize 229 | optimizer.zero_grad() 230 | outputs, feature, _ = cnn(images) 231 | loss1 = criterion(outputs, labels) 232 | #loss2 = F.mse_loss(torch.abs(feature), Variable(torch.ones(feature.size()).cuda())) 233 | loss2 = torch.mean(torch.abs(torch.pow(torch.abs(feature) - Variable(torch.ones(feature.size()).cuda()), 3))) 234 | loss = loss1 + 1 * loss2 235 | loss.backward() 236 | optimizer.step() 237 | 238 | if (i + 1) % (len(train_dataset) // batch_size / 2) == 0: 239 | print ('Epoch [%d/%d], Iter [%d/%d] Loss1: %.4f Loss2: %.4f' 240 | % (epoch + 1, num_epochs, i + 1, len(train_dataset) // batch_size, 241 | loss1.data[0], loss2.data[0])) 242 | 243 | # Test the Model 244 | cnn.eval() # Change model to 'eval' mode 245 | correct = 0 246 | total = 0 247 | for images, labels in test_loader: 248 | images = Variable(images.cuda(), volatile=True) 249 | outputs, _, _ = cnn(images) 250 | _, predicted = torch.max(outputs.cpu().data, 1) 251 | total += labels.size(0) 252 | correct += (predicted == labels).sum() 253 | 254 | print('Test Accuracy of the model: %.2f %%' % (100.0 * correct / total)) 255 | 256 | if 1.0 * correct / total > best: 257 | best = 1.0 * correct / total 258 | torch.save(cnn.state_dict(), 'temp.pkl') 259 | 260 | print('best: %.2f %%' % (best * 100.0)) 261 | 262 | 263 | # Save the Trained Model 264 | torch.save(cnn.state_dict(), 'imagenet.pkl') 265 | 266 | 267 | # Calculate MAP 268 | #cnn.load_state_dict(torch.load('temp.pkl')) 269 | cnn.eval() 270 | retrievalB, retrievalL, queryB, queryL = compress(database_loader, test_loader, cnn, classes=num_classes) 271 | print(np.shape(retrievalB)) 272 | print(np.shape(retrievalL)) 273 | print(np.shape(queryB)) 274 | print(np.shape(queryL)) 275 | """ 276 | print('---calculate map---') 277 | result = calculate_map(qB=queryB, rB=retrievalB, queryL=queryL, retrievalL=retrievalL) 278 | print(result) 279 | """ 280 | print('---calculate top map---') 281 | result = calculate_top_map(qB=queryB, rB=retrievalB, queryL=queryL, retrievalL=retrievalL, topk=1000) 282 | print(result) 283 | 284 | --------------------------------------------------------------------------------