├── CUB_attribute.py ├── CUB_data ├── test_cub_googlenet_bn.mat ├── test_labels_cub.mat ├── test_proto.mat ├── testclasses_id.mat ├── train_attr.mat └── train_cub_googlenet_bn.mat ├── README.md ├── __pycache__ └── kNN.cpython-36.pyc ├── kNN.py └── kNN_cosine.py /CUB_attribute.py: -------------------------------------------------------------------------------- 1 | from scipy import io 2 | import numpy as np 3 | import torch 4 | from torch.autograd import Variable 5 | from torch.nn import functional as F 6 | import kNN 7 | 8 | 9 | def compute_accuracy(test_att, test_visual, test_id, test_label): 10 | # test_att: [2993, 312] 11 | # test viaual: [2993, 1024] 12 | # test_id: att2label [50] 13 | # test_label: x2label [2993] 14 | test_att = Variable(torch.from_numpy(test_att).float().cuda()) 15 | att_pred = forward(test_att) 16 | outpred = [0] * 2933 17 | test_label = test_label.astype("float32") 18 | 19 | # att_pre [50, 1024], 20 | # test_visual: [2993, 1024] 21 | # test_id : [50] 22 | 23 | for i in range(2933): 24 | outputLabel = kNN.kNNClassify(test_visual[i, :], att_pred.cpu().data.numpy(), test_id, 1) 25 | outpred[i] = outputLabel 26 | outpred = np.array(outpred) 27 | acc = np.equal(outpred, test_label).mean() 28 | 29 | return acc 30 | 31 | 32 | def data_iterator(): 33 | """ A simple data iterator """ 34 | batch_idx = 0 35 | while True: 36 | # shuffle labels and features 37 | idxs = np.arange(0, len(train_x)) 38 | np.random.shuffle(idxs) 39 | shuf_visual = train_x[idxs] 40 | shuf_att = train_att[idxs] 41 | batch_size = 100 42 | 43 | for batch_idx in range(0, len(train_x), batch_size): 44 | visual_batch = shuf_visual[batch_idx:batch_idx + batch_size] 45 | visual_batch = visual_batch.astype("float32") 46 | att_batch = shuf_att[batch_idx:batch_idx + batch_size] 47 | 48 | att_batch = Variable(torch.from_numpy(att_batch).float().cuda()) 49 | visual_batch = Variable(torch.from_numpy(visual_batch).float().cuda()) 50 | yield att_batch, visual_batch 51 | 52 | 53 | f = io.loadmat('CUB_data/train_attr.mat') 54 | train_att = np.array(f['train_attr']) 55 | print('train attr:', train_att.shape) 56 | 57 | f = io.loadmat('CUB_data/train_cub_googlenet_bn.mat') 58 | train_x = np.array(f['train_cub_googlenet_bn']) 59 | print('train x:', train_x.shape) 60 | 61 | f = io.loadmat('CUB_data/test_cub_googlenet_bn.mat') 62 | test_x = np.array(f['test_cub_googlenet_bn']) 63 | print('test x:', test_x.shape) 64 | 65 | f = io.loadmat('CUB_data/test_proto.mat') 66 | test_att = np.array(f['test_proto']) 67 | print('test att:', test_att.shape) 68 | 69 | f = io.loadmat('CUB_data/test_labels_cub.mat') 70 | test_x2label = np.squeeze(np.array(f['test_labels_cub'])) 71 | print('test x2label:', test_x2label) 72 | 73 | f = io.loadmat('CUB_data/testclasses_id.mat') 74 | test_att2label = np.squeeze(np.array(f['testclasses_id'])) 75 | print('test att2label:', test_att2label) 76 | 77 | w1 = Variable(torch.FloatTensor(312, 700).cuda(), requires_grad=True) 78 | b1 = Variable(torch.FloatTensor(700).cuda(), requires_grad=True) 79 | w2 = Variable(torch.FloatTensor(700, 1024).cuda(), requires_grad=True) 80 | b2 = Variable(torch.FloatTensor(1024).cuda(), requires_grad=True) 81 | 82 | # must initialize! 83 | w1.data.normal_(0, 0.02) 84 | w2.data.normal_(0, 0.02) 85 | b1.data.fill_(0) 86 | b2.data.fill_(0) 87 | 88 | 89 | def forward(att): 90 | a1 = F.relu(torch.mm(att, w1) + b1) 91 | a2 = F.relu(torch.mm(a1, w2) + b2) 92 | 93 | return a2 94 | 95 | 96 | def getloss(pred, x): 97 | loss = torch.pow(x - pred, 2).sum() 98 | loss /= x.size(0) 99 | return loss 100 | 101 | 102 | optimizer = torch.optim.Adam([w1, b1, w2, b2], lr=1e-5, weight_decay=1e-2) 103 | 104 | # # Run 105 | iter_ = data_iterator() 106 | for i in range(1000000): 107 | att_batch_val, visual_batch_val = next(iter_) 108 | 109 | pred = forward(att_batch_val) 110 | loss = getloss(pred, visual_batch_val) 111 | 112 | optimizer.zero_grad() 113 | loss.backward() 114 | # gradient clip makes it converge much faster! 115 | torch.nn.utils.clip_grad_norm([w1, b1, w2, b2], 1) 116 | optimizer.step() 117 | 118 | if i % 1000 == 0: 119 | print(compute_accuracy(test_att, test_x, test_att2label, test_x2label)) 120 | -------------------------------------------------------------------------------- /CUB_data/test_cub_googlenet_bn.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dragen1860/DeepEmbeddingModel_ZSL-Pytorch/56d7d64b62e0174a4d2b948f34d17a732fb67604/CUB_data/test_cub_googlenet_bn.mat -------------------------------------------------------------------------------- /CUB_data/test_labels_cub.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dragen1860/DeepEmbeddingModel_ZSL-Pytorch/56d7d64b62e0174a4d2b948f34d17a732fb67604/CUB_data/test_labels_cub.mat -------------------------------------------------------------------------------- /CUB_data/test_proto.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dragen1860/DeepEmbeddingModel_ZSL-Pytorch/56d7d64b62e0174a4d2b948f34d17a732fb67604/CUB_data/test_proto.mat -------------------------------------------------------------------------------- /CUB_data/testclasses_id.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dragen1860/DeepEmbeddingModel_ZSL-Pytorch/56d7d64b62e0174a4d2b948f34d17a732fb67604/CUB_data/testclasses_id.mat -------------------------------------------------------------------------------- /CUB_data/train_attr.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dragen1860/DeepEmbeddingModel_ZSL-Pytorch/56d7d64b62e0174a4d2b948f34d17a732fb67604/CUB_data/train_attr.mat -------------------------------------------------------------------------------- /CUB_data/train_cub_googlenet_bn.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dragen1860/DeepEmbeddingModel_ZSL-Pytorch/56d7d64b62e0174a4d2b948f34d17a732fb67604/CUB_data/train_cub_googlenet_bn.mat -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepEmbeddingModel_ZSL-Pytorch 2 | 3 | Implementation for CVPR 2017 paper: Learning a Deep Embedding Model for Zero-Shot Learning 4 | 5 | # Howto 6 | 7 | `python cub_attribute.py` 8 | 9 | running with python 3.x, Pytorch 0.3/0.2 10 | 11 | # TODO: 12 | - AWA dataset 13 | 14 | # Tensorflow Implementation 15 | [DeepEmbeddingModel_ZSL](https://github.com/lzrobots/DeepEmbeddingModel_ZSL) 16 | -------------------------------------------------------------------------------- /__pycache__/kNN.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dragen1860/DeepEmbeddingModel_ZSL-Pytorch/56d7d64b62e0174a4d2b948f34d17a732fb67604/__pycache__/kNN.cpython-36.pyc -------------------------------------------------------------------------------- /kNN.py: -------------------------------------------------------------------------------- 1 | ######################################### 2 | # kNN: k Nearest Neighbors 3 | 4 | # Input: newInput: vector to compare to existing dataset (1xN) 5 | # dataSet: size m data set of known vectors (NxM) 6 | # labels: data set labels (1xM vector) 7 | # k: number of neighbors to use for comparison 8 | 9 | # Output: the most popular class label 10 | ######################################### 11 | 12 | from numpy import * 13 | import operator 14 | 15 | # create a dataset which contains 4 samples with 2 classes 16 | def createDataSet(): 17 | # create a matrix: each row as a sample 18 | group = array([[1.0, 0.9], [1.0, 1.0], [0.1, 0.2], [0.0, 0.1]]) 19 | labels = ['A', 'A', 'B', 'B'] # four samples and two classes 20 | return group, labels 21 | 22 | 23 | # classify using kNN 24 | def kNNClassify(newInput, dataSet, labels, k): 25 | numSamples = dataSet.shape[0] # shape[0] stands for the num of row 26 | 27 | ## step 1: calculate Euclidean distance 28 | # tile(A, reps): Construct an array by repeating A reps times 29 | # the following copy numSamples rows for dataSet 30 | diff = tile(newInput, (numSamples, 1)) - dataSet # Subtract element-wise 31 | squaredDiff = diff ** 2 # squared for the subtract 32 | squaredDist = sum(squaredDiff, axis = 1) # sum is performed by row 33 | distance = squaredDist ** 0.5 34 | 35 | ## step 2: sort the distance 36 | # argsort() returns the indices that would sort an array in a ascending order 37 | sortedDistIndices = argsort(distance) 38 | 39 | classCount = {} # define a dictionary (can be append element) 40 | for i in range(k): 41 | ## step 3: choose the min k distance 42 | voteLabel = labels[sortedDistIndices[i]] 43 | 44 | ## step 4: count the times labels occur 45 | # when the key voteLabel is not in dictionary classCount, get() 46 | # will return 0 47 | classCount[voteLabel] = classCount.get(voteLabel, 0) + 1 48 | 49 | ## step 5: the max voted class will return 50 | maxCount = 0 51 | for key, value in classCount.items(): 52 | if value > maxCount: 53 | maxCount = value 54 | maxIndex = key 55 | 56 | return maxIndex 57 | #return sortedDistIndices 58 | -------------------------------------------------------------------------------- /kNN_cosine.py: -------------------------------------------------------------------------------- 1 | ######################################### 2 | # kNN: k Nearest Neighbors 3 | 4 | # Input: newInput: vector to compare to existing dataset (1xN) 5 | # dataSet: size m data set of known vectors (NxM) 6 | # labels: data set labels (1xM vector) 7 | # k: number of neighbors to use for comparison 8 | 9 | # Output: the most popular class label 10 | ######################################### 11 | 12 | from numpy import * 13 | import operator 14 | import math 15 | import tensorflow as tf 16 | import numpy as np 17 | 18 | # create a dataset which contains 4 samples with 2 classes 19 | def createDataSet(): 20 | # create a matrix: each row as a sample 21 | group = array([[1.0, 0.9], [1.0, 1.0], [0.1, 0.2], [0.0, 0.1]]) 22 | labels = ['A', 'A', 'B', 'B'] # four samples and two classes 23 | return group, labels 24 | 25 | def cosine_distance(v1,v2): 26 | "compute cosine similarity of v1 to v2: (v1 dot v2)/{||v1||*||v2||)" 27 | 28 | v1_sq = np.inner(v1,v1) 29 | v2_sq = np.inner(v2,v2) 30 | dis = 1 - np.inner(v1,v2) / math.sqrt(v1_sq * v2_sq) 31 | return dis 32 | 33 | 34 | # classify using kNN 35 | def kNNClassify(newInput, dataSet, labels, k): 36 | global distance 37 | distance = [0]* dataSet.shape[0] 38 | for i in range(dataSet.shape[0]): 39 | distance[i] = cosine_distance(newInput, dataSet[i]) 40 | 41 | 42 | ## step 2: sort the distance 43 | # argsort() returns the indices that would sort an array in a ascending order 44 | sortedDistIndices = argsort(distance) 45 | 46 | classCount = {} # define a dictionary (can be append element) 47 | for i in range(k): 48 | ## step 3: choose the min k distance 49 | voteLabel = labels[sortedDistIndices[i]] 50 | 51 | ## step 4: count the times labels occur 52 | # when the key voteLabel is not in dictionary classCount, get() 53 | # will return 0 54 | classCount[voteLabel] = classCount.get(voteLabel, 0) + 1 55 | 56 | ## step 5: the max voted class will return 57 | maxCount = 0 58 | for key, value in classCount.items(): 59 | if value > maxCount: 60 | maxCount = value 61 | maxIndex = key 62 | 63 | return maxIndex 64 | #return sortedDistIndices 65 | --------------------------------------------------------------------------------