├── kNN.py ├── kNN_cosine.py ├── AwA_wordvector.py ├── CUB_attribute.py ├── AwA_attribute.py ├── AwA2_GBU.py ├── aPY_GBU.py ├── SUN_GBU.py ├── AwA_fusion.py ├── AwA1_GBU.py ├── CUB1_GBU.py └── README.md /kNN.py: -------------------------------------------------------------------------------- 1 | ######################################### 2 | # kNN: k Nearest Neighbors 3 | 4 | # Input: newInput: vector to compare to existing dataset (1xN) 5 | # dataSet: size m data set of known vectors (NxM) 6 | # labels: data set labels (1xM vector) 7 | # k: number of neighbors to use for comparison 8 | 9 | # Output: the most popular class label 10 | ######################################### 11 | 12 | from numpy import * 13 | import operator 14 | 15 | # create a dataset which contains 4 samples with 2 classes 16 | def createDataSet(): 17 | # create a matrix: each row as a sample 18 | group = array([[1.0, 0.9], [1.0, 1.0], [0.1, 0.2], [0.0, 0.1]]) 19 | labels = ['A', 'A', 'B', 'B'] # four samples and two classes 20 | return group, labels 21 | 22 | 23 | # classify using kNN 24 | def kNNClassify(newInput, dataSet, labels, k): 25 | numSamples = dataSet.shape[0] # shape[0] stands for the num of row 26 | 27 | ## step 1: calculate Euclidean distance 28 | # tile(A, reps): Construct an array by repeating A reps times 29 | # the following copy numSamples rows for dataSet 30 | diff = tile(newInput, (numSamples, 1)) - dataSet # Subtract element-wise 31 | squaredDiff = diff ** 2 # squared for the subtract 32 | squaredDist = sum(squaredDiff, axis = 1) # sum is performed by row 33 | distance = squaredDist ** 0.5 34 | 35 | ## step 2: sort the distance 36 | # argsort() returns the indices that would sort an array in a ascending order 37 | sortedDistIndices = argsort(distance) 38 | 39 | classCount = {} # define a dictionary (can be append element) 40 | for i in xrange(k): 41 | ## step 3: choose the min k distance 42 | voteLabel = labels[sortedDistIndices[i]] 43 | 44 | ## step 4: count the times labels occur 45 | # when the key voteLabel is not in dictionary classCount, get() 46 | # will return 0 47 | classCount[voteLabel] = classCount.get(voteLabel, 0) + 1 48 | 49 | ## step 5: the max voted class will return 50 | maxCount = 0 51 | for key, value in classCount.items(): 52 | if value > maxCount: 53 | maxCount = value 54 | maxIndex = key 55 | 56 | return maxIndex 57 | #return sortedDistIndices 58 | -------------------------------------------------------------------------------- /kNN_cosine.py: -------------------------------------------------------------------------------- 1 | ######################################### 2 | # kNN: k Nearest Neighbors 3 | 4 | # Input: newInput: vector to compare to existing dataset (1xN) 5 | # dataSet: size m data set of known vectors (NxM) 6 | # labels: data set labels (1xM vector) 7 | # k: number of neighbors to use for comparison 8 | 9 | # Output: the most popular class label 10 | ######################################### 11 | 12 | from numpy import * 13 | import operator 14 | import math 15 | import tensorflow as tf 16 | import numpy as np 17 | 18 | # create a dataset which contains 4 samples with 2 classes 19 | def createDataSet(): 20 | # create a matrix: each row as a sample 21 | group = array([[1.0, 0.9], [1.0, 1.0], [0.1, 0.2], [0.0, 0.1]]) 22 | labels = ['A', 'A', 'B', 'B'] # four samples and two classes 23 | return group, labels 24 | 25 | def cosine_distance(v1,v2): 26 | "compute cosine similarity of v1 to v2: (v1 dot v2)/{||v1||*||v2||)" 27 | 28 | v1_sq = np.inner(v1,v1) 29 | v2_sq = np.inner(v2,v2) 30 | dis = 1 - np.inner(v1,v2) / math.sqrt(v1_sq * v2_sq) 31 | return dis 32 | 33 | 34 | # classify using kNN 35 | def kNNClassify(newInput, dataSet, labels, k): 36 | global distance 37 | distance = [0]* dataSet.shape[0] 38 | for i in range(dataSet.shape[0]): 39 | distance[i] = cosine_distance(newInput, dataSet[i]) 40 | 41 | 42 | ## step 2: sort the distance 43 | # argsort() returns the indices that would sort an array in a ascending order 44 | sortedDistIndices = argsort(distance) 45 | 46 | classCount = {} # define a dictionary (can be append element) 47 | for i in xrange(k): 48 | ## step 3: choose the min k distance 49 | voteLabel = labels[sortedDistIndices[i]] 50 | 51 | ## step 4: count the times labels occur 52 | # when the key voteLabel is not in dictionary classCount, get() 53 | # will return 0 54 | classCount[voteLabel] = classCount.get(voteLabel, 0) + 1 55 | 56 | ## step 5: the max voted class will return 57 | maxCount = 0 58 | for key, value in classCount.items(): 59 | if value > maxCount: 60 | maxCount = value 61 | maxIndex = key 62 | 63 | return maxIndex 64 | #return sortedDistIndices 65 | -------------------------------------------------------------------------------- /AwA_wordvector.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np, h5py 3 | import scipy.io as sio 4 | import sys 5 | import random 6 | import kNN_cosine 7 | import re 8 | from numpy import * 9 | 10 | def weight_variable(shape): 11 | initial = tf.truncated_normal(shape, stddev=0.1) 12 | return tf.Variable(initial) 13 | 14 | def bias_variable(shape): 15 | initial = tf.constant(0.1, shape=shape) 16 | return tf.Variable(initial) 17 | 18 | 19 | def compute_accuracy(test_word, test_visual, test_id, test_label): 20 | global left_w1 21 | word_pre = sess.run(left_w1, feed_dict={word_features: test_word}) 22 | test_id = np.squeeze(np.asarray(test_id)) 23 | outpre = [0]*6180 24 | test_label = np.squeeze(np.asarray(test_label)) 25 | test_label = test_label.astype("float32") 26 | for i in range(6180): 27 | outputLabel = kNN_cosine.kNNClassify(test_visual[i,:], word_pre, test_id, 1) 28 | outpre[i] = outputLabel 29 | correct_prediction = tf.equal(outpre, test_label) 30 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 31 | result = sess.run(accuracy, feed_dict={ 32 | word_features: test_word, visual_features: test_visual}) 33 | return result 34 | 35 | 36 | # # data 37 | 38 | f=sio.loadmat('./data/AwA_data/wordvector/train_word.mat') 39 | word=np.array(f['train_word']) 40 | word.shape 41 | 42 | f=sio.loadmat('./data/AwA_data/train_googlenet_bn.mat') 43 | x=np.array(f['train_googlenet_bn']) 44 | x.shape 45 | 46 | f=sio.loadmat('./data/AwA_data/test_googlenet_bn.mat') 47 | x_test=np.array(f['test_googlenet_bn']) 48 | x_test.shape 49 | 50 | f=sio.loadmat('./data/AwA_data/test_labels.mat') 51 | test_label=np.array(f['test_labels']) 52 | test_label.shape 53 | 54 | f=sio.loadmat('./data/AwA_data/testclasses_id.mat') 55 | test_id=np.array(f['testclasses_id']) 56 | test_id.shape 57 | 58 | f=sio.loadmat('./data/AwA_data/wordvector/test_vectors.mat') 59 | word_pro=np.array(f['test_vectors']) 60 | word_pro.shape 61 | 62 | 63 | 64 | # # data shuffle 65 | def data_iterator(): 66 | """ A simple data iterator """ 67 | batch_idx = 0 68 | while True: 69 | # shuffle labels and features 70 | idxs = np.arange(0, len(x)) 71 | np.random.shuffle(idxs) 72 | shuf_visual = x[idxs] 73 | shuf_word = word[idxs] 74 | batch_size = 64 75 | for batch_idx in range(0, len(x), batch_size): 76 | visual_batch = shuf_visual[batch_idx:batch_idx+batch_size] 77 | visual_batch = visual_batch.astype("float32") 78 | word_batch = shuf_word[batch_idx:batch_idx+batch_size] 79 | yield word_batch, visual_batch 80 | 81 | 82 | 83 | 84 | # # Placeholder 85 | # define placeholder for inputs to network 86 | word_features = tf.placeholder(tf.float32, [None, 1000]) 87 | visual_features = tf.placeholder(tf.float32, [None, 1024]) 88 | 89 | 90 | # # Network 91 | # AwA 1000 1024 ReLu, 1e-3 * regularisers, 64 batch, 0.0001 Adam 92 | W_left_w1 = weight_variable([1000, 1024]) 93 | b_left_w1 = bias_variable([1024]) 94 | left_w1 = tf.nn.relu(tf.matmul(word_features, W_left_w1) + b_left_w1) 95 | 96 | 97 | # # loss 98 | loss_w = tf.reduce_mean(tf.square(left_w1 - visual_features)) 99 | 100 | # L2 regularisation for the fully connected parameters. 101 | regularisers_w = (tf.nn.l2_loss(W_left_w1) + tf.nn.l2_loss(b_left_w1)) 102 | 103 | 104 | # Add the regularisation term to the loss. 105 | loss_w += 1e-3 * regularisers_w 106 | 107 | 108 | train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_w) 109 | 110 | 111 | 112 | sess = tf.Session() 113 | sess.run(tf.global_variables_initializer()) 114 | 115 | 116 | # # Run 117 | iter_ = data_iterator() 118 | for i in range(1000000): 119 | word_batch_val, visual_batch_val = iter_.next() 120 | sess.run(train_step, feed_dict={word_features: word_batch_val, visual_features: visual_batch_val}) 121 | if i % 1000 == 0: 122 | print(compute_accuracy(word_pro, x_test, test_id, test_label)) 123 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /CUB_attribute.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np, h5py 3 | import scipy.io as sio 4 | import sys 5 | import random 6 | import kNN 7 | import re 8 | import os 9 | from numpy import * 10 | 11 | 12 | def weight_variable(shape): 13 | initial = tf.truncated_normal(shape, stddev=0.1) 14 | return tf.Variable(initial) 15 | 16 | def bias_variable(shape): 17 | initial = tf.constant(0.1, shape=shape) 18 | return tf.Variable(initial) 19 | 20 | 21 | def compute_accuracy(test_att, test_visual, test_id, test_label): 22 | global left_a2 23 | att_pre = sess.run(left_a2, feed_dict={att_features: test_att}) 24 | test_id = np.squeeze(np.asarray(test_id)) 25 | outpre = [0]*2933 26 | test_label = np.squeeze(np.asarray(test_label)) 27 | test_label = test_label.astype("float32") 28 | for i in range(2933): 29 | outputLabel = kNN.kNNClassify(test_visual[i,:], att_pre, test_id, 1) 30 | outpre[i] = outputLabel 31 | correct_prediction = tf.equal(outpre, test_label) 32 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 33 | result = sess.run(accuracy, feed_dict={att_features: test_att, visual_features: test_visual}) 34 | return result 35 | 36 | 37 | f=sio.loadmat('./data/CUB_data/train_attr.mat') 38 | att=np.array(f['train_attr']) 39 | att.shape 40 | 41 | f=sio.loadmat('./data/CUB_data/train_cub_googlenet_bn.mat') 42 | x=np.array(f['train_cub_googlenet_bn']) 43 | x.shape 44 | 45 | f=sio.loadmat('./data/CUB_data/test_cub_googlenet_bn.mat') 46 | x_test=np.array(f['test_cub_googlenet_bn']) 47 | x_test.shape 48 | 49 | f=sio.loadmat('./data/CUB_data/test_labels_cub.mat') 50 | test_label=np.array(f['test_labels_cub']) 51 | test_label.shape 52 | 53 | f=sio.loadmat('./data/CUB_data/testclasses_id.mat') 54 | test_id=np.array(f['testclasses_id']) 55 | 56 | f=sio.loadmat('./data/CUB_data/test_proto.mat') 57 | att_pro=np.array(f['test_proto']) 58 | 59 | 60 | # # data shuffle 61 | def data_iterator(): 62 | """ A simple data iterator """ 63 | batch_idx = 0 64 | while True: 65 | # shuffle labels and features 66 | idxs = np.arange(0, len(x)) 67 | np.random.shuffle(idxs) 68 | shuf_visual = x[idxs] 69 | shuf_att = att[idxs] 70 | batch_size = 100 71 | for batch_idx in range(0, len(x), batch_size): 72 | visual_batch = shuf_visual[batch_idx:batch_idx+batch_size] 73 | visual_batch = visual_batch.astype("float32") 74 | att_batch = shuf_att[batch_idx:batch_idx+batch_size] 75 | yield att_batch, visual_batch 76 | 77 | 78 | 79 | 80 | # # Placeholder 81 | # define placeholder for inputs to network 82 | att_features = tf.placeholder(tf.float32, [None, 312]) 83 | visual_features = tf.placeholder(tf.float32, [None, 1024]) 84 | 85 | 86 | # # Network 87 | 88 | # CUB 312 700 1024 ReLu, 1e-2 * regularisers, 100 batch, 0.00001 Adam 89 | W_left_a1 = weight_variable([312, 700]) 90 | b_left_a1 = bias_variable([700]) 91 | left_a1 = tf.nn.relu(tf.matmul(att_features, W_left_a1) + b_left_a1) 92 | 93 | W_left_a2 = weight_variable([700, 1024]) 94 | b_left_a2 = bias_variable([1024]) 95 | left_a2 = tf.nn.relu(tf.matmul(left_a1, W_left_a2) + b_left_a2) 96 | 97 | 98 | # # loss 99 | 100 | loss_a = tf.reduce_mean(tf.square(left_a2 - visual_features)) 101 | 102 | # L2 regularisation for the fully connected parameters. 103 | regularizers_a = (tf.nn.l2_loss(W_left_a1) + tf.nn.l2_loss(b_left_a1) 104 | + tf.nn.l2_loss(W_left_a2) + tf.nn.l2_loss(b_left_a2)) 105 | 106 | 107 | 108 | # Add the regularization term to the loss. 109 | loss_a += 1e-2 * regularizers_a 110 | 111 | 112 | 113 | train_step = tf.train.AdamOptimizer(0.00001).minimize(loss_a) 114 | 115 | sess = tf.Session() 116 | sess.run(tf.global_variables_initializer()) 117 | 118 | 119 | # # Run 120 | iter_ = data_iterator() 121 | for i in range(1000000): 122 | att_batch_val, visual_batch_val = iter_.next() 123 | sess.run(train_step, feed_dict={att_features: att_batch_val, visual_features: visual_batch_val}) 124 | if i % 1000 == 0: 125 | print(compute_accuracy(att_pro, x_test, test_id, test_label)) 126 | 127 | 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /AwA_attribute.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np, h5py 3 | import scipy.io as sio 4 | import sys 5 | import random 6 | import kNN 7 | import re 8 | from numpy import * 9 | 10 | def weight_variable(shape): 11 | initial = tf.truncated_normal(shape, stddev=0.1) 12 | return tf.Variable(initial) 13 | 14 | def bias_variable(shape): 15 | initial = tf.constant(0.1, shape=shape) 16 | return tf.Variable(initial) 17 | 18 | 19 | def compute_accuracy(test_att, test_visual, test_id, test_label): 20 | global left_a2 21 | att_pre = sess.run(left_a2, feed_dict={att_features: test_att}) 22 | test_id = np.squeeze(np.asarray(test_id)) 23 | outpre = [0]*6180 # CUB 2933 24 | test_label = np.squeeze(np.asarray(test_label)) 25 | test_label = test_label.astype("float32") 26 | for i in range(6180): # CUB 2933 27 | outputLabel = kNN.kNNClassify(test_visual[i,:], att_pre, test_id, 1) 28 | outpre[i] = outputLabel 29 | correct_prediction = tf.equal(outpre, test_label) 30 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 31 | result = sess.run(accuracy, feed_dict={ 32 | att_features: test_att, visual_features: test_visual}) 33 | return result 34 | 35 | 36 | # # data 37 | 38 | f=h5py.File('./data/AwA_data/attribute/Z_s_con.mat','r') 39 | att=np.array(f['Z_s_con']) 40 | att.shape 41 | 42 | f=sio.loadmat('./data/AwA_data/train_googlenet_bn.mat') 43 | x=np.array(f['train_googlenet_bn']) 44 | x.shape 45 | 46 | f=sio.loadmat('./data/AwA_data/test_googlenet_bn.mat') 47 | x_test=np.array(f['test_googlenet_bn']) 48 | x_test.shape 49 | 50 | f=sio.loadmat('./data/AwA_data/test_labels.mat') 51 | test_label=np.array(f['test_labels']) 52 | test_label.shape 53 | 54 | f=sio.loadmat('./data/AwA_data/testclasses_id.mat') 55 | test_id=np.array(f['testclasses_id']) 56 | test_id.shape 57 | 58 | f=sio.loadmat('./data/AwA_data/attribute/pca_te_con_10x85.mat') 59 | att_pro=np.array(f['pca_te_con_10x85']) 60 | att_pro.shape 61 | 62 | 63 | 64 | # # data shuffle 65 | def data_iterator(): 66 | """ A simple data iterator """ 67 | batch_idx = 0 68 | while True: 69 | # shuffle labels and features 70 | idxs = np.arange(0, len(x)) 71 | np.random.shuffle(idxs) 72 | shuf_visual = x[idxs] 73 | shuf_att = att[idxs] 74 | batch_size = 64 75 | for batch_idx in range(0, len(x), batch_size): 76 | visual_batch = shuf_visual[batch_idx:batch_idx+batch_size] 77 | visual_batch = visual_batch.astype("float32") 78 | att_batch = shuf_att[batch_idx:batch_idx+batch_size] 79 | yield att_batch, visual_batch 80 | 81 | 82 | 83 | 84 | # # Placeholder 85 | # define placeholder for inputs to network 86 | att_features = tf.placeholder(tf.float32, [None, 85]) 87 | visual_features = tf.placeholder(tf.float32, [None, 1024]) 88 | 89 | 90 | # # Network 91 | # AwA 85 300 1024 ReLu, 1e-2 * regularisers, 64 batch, 0.0001 Adam 92 | W_left_a1 = weight_variable([85, 300]) 93 | b_left_a1 = bias_variable([300]) 94 | left_a1 = tf.nn.relu(tf.matmul(att_features, W_left_a1) + b_left_a1) 95 | 96 | 97 | W_left_a2 = weight_variable([300, 1024]) 98 | b_left_a2 = bias_variable([1024]) 99 | left_a2 = tf.nn.relu(tf.matmul(left_a1, W_left_a2) + b_left_a2) 100 | 101 | 102 | # # loss 103 | loss_a = tf.reduce_mean(tf.square(left_a2 - visual_features)) 104 | 105 | # L2 regularisation for the fully connected parameters. 106 | regularisers_a = (tf.nn.l2_loss(W_left_a1) + tf.nn.l2_loss(b_left_a1) 107 | + tf.nn.l2_loss(W_left_a2) + tf.nn.l2_loss(b_left_a2)) 108 | 109 | # Add the regularisation term to the loss. 110 | loss_a += 1e-2 * regularisers_a 111 | 112 | 113 | 114 | train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_a) 115 | 116 | 117 | 118 | sess = tf.Session() 119 | sess.run(tf.global_variables_initializer()) 120 | 121 | 122 | # # Run 123 | iter_ = data_iterator() 124 | for i in range(1000000): 125 | att_batch_val, visual_batch_val = iter_.next() 126 | sess.run(train_step, feed_dict={att_features: att_batch_val, visual_features: visual_batch_val}) 127 | if i % 1000 == 0: 128 | print(compute_accuracy(att_pro, x_test, test_id, test_label)) 129 | 130 | 131 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /AwA2_GBU.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import scipy.io as sio 4 | import kNN 5 | import kNN_cosine 6 | from numpy import * 7 | from sklearn.metrics import accuracy_score 8 | 9 | 10 | def weight_variable(shape): 11 | initial = tf.truncated_normal(shape, stddev=0.1) 12 | return tf.Variable(initial) 13 | 14 | 15 | def bias_variable(shape): 16 | initial = tf.constant(0.1, shape=shape) 17 | return tf.Variable(initial) 18 | 19 | 20 | def compute_accuracy(test_att, test_visual, test_id, test_label): 21 | global left_a2 22 | att_pre = sess.run(left_a2, feed_dict={att_features: test_att}) 23 | test_id = np.squeeze(np.asarray(test_id)) 24 | outpre = [0] * test_visual.shape[0] # CUB 2933 25 | test_label = np.squeeze(np.asarray(test_label)) 26 | test_label = test_label.astype("float32") 27 | for i in range(test_visual.shape[0]): # CUB 2933 28 | outputLabel = kNN.kNNClassify(test_visual[i, :], att_pre, test_id, 1) 29 | outpre[i] = outputLabel 30 | # compute averaged per class accuracy 31 | outpre = np.array(outpre, dtype='int') 32 | unique_labels = np.unique(test_label) 33 | acc = 0 34 | for l in unique_labels: 35 | idx = np.nonzero(test_label == l)[0] 36 | acc += accuracy_score(test_label[idx], outpre[idx]) 37 | acc = acc / unique_labels.shape[0] 38 | return acc 39 | 40 | 41 | dataroot = './data/' 42 | dataset = 'AwA2_data' 43 | image_embedding = 'res101' 44 | class_embedding = 'att' # original_att 45 | 46 | matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + image_embedding + ".mat") 47 | feature = matcontent['features'].T 48 | label = matcontent['labels'].astype(int).squeeze() - 1 49 | matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + class_embedding + "_splits.mat") 50 | # numpy array index starts from 0, matlab starts from 1 51 | trainval_loc = matcontent['trainval_loc'].squeeze() - 1 52 | test_seen_loc = matcontent['test_seen_loc'].squeeze() - 1 53 | test_unseen_loc = matcontent['test_unseen_loc'].squeeze() - 1 54 | 55 | attribute = matcontent['original_att'].T # att 56 | 57 | x = feature[trainval_loc] 58 | train_label = label[trainval_loc].astype(int) 59 | att = attribute[train_label] 60 | print(att.shape) 61 | x_test = feature[test_unseen_loc] 62 | test_label = label[test_unseen_loc].astype(int) 63 | x_test_seen = feature[test_seen_loc] 64 | test_label_seen = label[test_seen_loc].astype(int) 65 | test_id = np.unique(test_label) 66 | att_pro = attribute[test_id] 67 | 68 | 69 | def data_iterator(): 70 | """ A simple data iterator """ 71 | batch_idx = 0 72 | while True: 73 | # shuffle labels and features 74 | idxs = np.arange(0, len(x)) 75 | np.random.shuffle(idxs) 76 | shuf_visual = x[idxs] 77 | shuf_att = att[idxs] 78 | batch_size = 64 79 | for batch_idx in range(0, len(x), batch_size): 80 | visual_batch = shuf_visual[batch_idx:batch_idx + batch_size] 81 | visual_batch = visual_batch.astype("float32") 82 | att_batch = shuf_att[batch_idx:batch_idx + batch_size] 83 | yield att_batch, visual_batch 84 | 85 | 86 | # # Placeholder 87 | # define placeholder for inputs to network 88 | att_features = tf.placeholder(tf.float32, [None, 85]) 89 | visual_features = tf.placeholder(tf.float32, [None, 2048]) 90 | 91 | # # Network 92 | # AwA 85 1600 2048 ReLu, 1e-3 * regularisers, 64 batch, 0.0001 Adam 93 | W_left_a1 = weight_variable([85, 1600]) 94 | b_left_a1 = bias_variable([1600]) 95 | left_a1 = tf.nn.relu(tf.matmul(att_features, W_left_a1) + b_left_a1) 96 | 97 | W_left_a2 = weight_variable([1600, 2048]) 98 | b_left_a2 = bias_variable([2048]) 99 | left_a2 = tf.nn.relu(tf.matmul(left_a1, W_left_a2) + b_left_a2) 100 | 101 | # # loss 102 | loss_a = tf.reduce_mean(tf.square(left_a2 - visual_features)) 103 | 104 | # L2 regularisation for the fully connected parameters. 105 | regularisers_a = (tf.nn.l2_loss(W_left_a1) + tf.nn.l2_loss(b_left_a1) 106 | + tf.nn.l2_loss(W_left_a2) + tf.nn.l2_loss(b_left_a2)) 107 | 108 | loss_a += 1e-3 * regularisers_a 109 | 110 | train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_a) 111 | 112 | sess = tf.Session() 113 | sess.run(tf.global_variables_initializer()) 114 | 115 | # # Run 116 | iter_ = data_iterator() 117 | for i in range(1000000): 118 | att_batch_val, visual_batch_val = iter_.next() 119 | sess.run(train_step, feed_dict={att_features: att_batch_val, visual_features: visual_batch_val}) 120 | if i % 1000 == 0: 121 | acc_zsl = compute_accuracy(att_pro, x_test, test_id, test_label) 122 | acc_seen_gzsl = compute_accuracy(attribute, x_test_seen, np.arange(50), test_label_seen) 123 | acc_unseen_gzsl = compute_accuracy(attribute, x_test, np.arange(50), test_label) 124 | H = 2 * acc_seen_gzsl * acc_unseen_gzsl / (acc_seen_gzsl + acc_unseen_gzsl) 125 | print('zsl:', acc_zsl) 126 | print('gzsl: seen=%.4f, unseen=%.4f, h=%.4f' % (acc_seen_gzsl, acc_unseen_gzsl, H)) 127 | 128 | 129 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /aPY_GBU.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import scipy.io as sio 4 | import kNN 5 | import kNN_cosine 6 | from numpy import * 7 | from sklearn.metrics import accuracy_score 8 | 9 | 10 | def weight_variable(shape): 11 | initial = tf.truncated_normal(shape, stddev=0.1) 12 | return tf.Variable(initial) 13 | 14 | 15 | def bias_variable(shape): 16 | initial = tf.constant(0.1, shape=shape) 17 | return tf.Variable(initial) 18 | 19 | 20 | def compute_accuracy(test_att, test_visual, test_id, test_label): 21 | global left_a2 22 | att_pre = sess.run(left_a2, feed_dict={att_features: test_att}) 23 | test_id = np.squeeze(np.asarray(test_id)) 24 | outpre = [0] * test_visual.shape[0] # CUB 2933 25 | test_label = np.squeeze(np.asarray(test_label)) 26 | test_label = test_label.astype("float32") 27 | for i in range(test_visual.shape[0]): # CUB 2933 28 | outputLabel = kNN.kNNClassify(test_visual[i, :], att_pre, test_id, 1) 29 | outpre[i] = outputLabel 30 | # compute averaged per class accuracy 31 | outpre = np.array(outpre, dtype='int') 32 | unique_labels = np.unique(test_label) 33 | acc = 0 34 | for l in unique_labels: 35 | idx = np.nonzero(test_label == l)[0] 36 | acc += accuracy_score(test_label[idx], outpre[idx]) 37 | acc = acc / unique_labels.shape[0] 38 | return acc 39 | 40 | 41 | dataroot = './data/' 42 | dataset = 'APY_data' 43 | image_embedding = 'res101' 44 | class_embedding = 'att' # original_att 45 | 46 | matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + image_embedding + ".mat") 47 | feature = matcontent['features'].T 48 | label = matcontent['labels'].astype(int).squeeze() - 1 49 | matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + class_embedding + "_splits.mat") 50 | # numpy array index starts from 0, matlab starts from 1 51 | trainval_loc = matcontent['trainval_loc'].squeeze() - 1 52 | test_seen_loc = matcontent['test_seen_loc'].squeeze() - 1 53 | test_unseen_loc = matcontent['test_unseen_loc'].squeeze() - 1 54 | 55 | attribute = matcontent['original_att'].T # att 56 | 57 | x = feature[trainval_loc] 58 | train_label = label[trainval_loc].astype(int) 59 | att = attribute[train_label] 60 | print(att.shape) 61 | x_test = feature[test_unseen_loc] 62 | test_label = label[test_unseen_loc].astype(int) 63 | x_test_seen = feature[test_seen_loc] 64 | test_label_seen = label[test_seen_loc].astype(int) 65 | test_id = np.unique(test_label) 66 | att_pro = attribute[test_id] 67 | 68 | 69 | def data_iterator(): 70 | """ A simple data iterator """ 71 | batch_idx = 0 72 | while True: 73 | # shuffle labels and features 74 | idxs = np.arange(0, len(x)) 75 | np.random.shuffle(idxs) 76 | shuf_visual = x[idxs] 77 | shuf_att = att[idxs] 78 | batch_size = 64 79 | for batch_idx in range(0, len(x), batch_size): 80 | visual_batch = shuf_visual[batch_idx:batch_idx + batch_size] 81 | visual_batch = visual_batch.astype("float32") 82 | att_batch = shuf_att[batch_idx:batch_idx + batch_size] 83 | yield att_batch, visual_batch 84 | 85 | 86 | # # Placeholder 87 | # define placeholder for inputs to network 88 | att_features = tf.placeholder(tf.float32, [None, 64]) 89 | visual_features = tf.placeholder(tf.float32, [None, 2048]) 90 | 91 | # # Network 92 | # AwA 85 1600 2048 ReLu, 1e-3 * regularisers, 64 batch, 0.0001 Adam 93 | W_left_a1 = weight_variable([64, 1600]) 94 | b_left_a1 = bias_variable([1600]) 95 | left_a1 = tf.nn.relu(tf.matmul(att_features, W_left_a1) + b_left_a1) 96 | 97 | W_left_a2 = weight_variable([1600, 2048]) 98 | b_left_a2 = bias_variable([2048]) 99 | left_a2 = tf.nn.relu(tf.matmul(left_a1, W_left_a2) + b_left_a2) 100 | 101 | # # loss 102 | loss_a = tf.reduce_mean(tf.square(left_a2 - visual_features)) 103 | 104 | # L2 regularisation for the fully connected parameters. 105 | regularisers_a = (tf.nn.l2_loss(W_left_a1) + tf.nn.l2_loss(b_left_a1) 106 | + tf.nn.l2_loss(W_left_a2) + tf.nn.l2_loss(b_left_a2)) 107 | 108 | loss_a += 1e-4 * regularisers_a 109 | 110 | train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_a) 111 | 112 | sess = tf.Session() 113 | sess.run(tf.global_variables_initializer()) 114 | 115 | # # Run 116 | iter_ = data_iterator() 117 | for i in range(1000000): 118 | att_batch_val, visual_batch_val = iter_.next() 119 | sess.run(train_step, feed_dict={att_features: att_batch_val, visual_features: visual_batch_val}) 120 | if i % 1000 == 0: 121 | acc_zsl = compute_accuracy(att_pro, x_test, test_id, test_label) 122 | acc_seen_gzsl = compute_accuracy(attribute, x_test_seen, np.arange(32), test_label_seen) 123 | acc_unseen_gzsl = compute_accuracy(attribute, x_test, np.arange(32), test_label) 124 | H = 2 * acc_seen_gzsl * acc_unseen_gzsl / (acc_seen_gzsl + acc_unseen_gzsl) 125 | print('zsl:', acc_zsl) 126 | print('gzsl: seen=%.4f, unseen=%.4f, h=%.4f' % (acc_seen_gzsl, acc_unseen_gzsl, H)) 127 | 128 | 129 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /SUN_GBU.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import scipy.io as sio 4 | import kNN 5 | import kNN_cosine 6 | from numpy import * 7 | from sklearn.metrics import accuracy_score 8 | 9 | 10 | def weight_variable(shape): 11 | initial = tf.truncated_normal(shape, stddev=0.1) 12 | return tf.Variable(initial) 13 | 14 | 15 | def bias_variable(shape): 16 | initial = tf.constant(0.1, shape=shape) 17 | return tf.Variable(initial) 18 | 19 | 20 | def compute_accuracy(test_att, test_visual, test_id, test_label): 21 | global left_a2 22 | att_pre = sess.run(left_a2, feed_dict={att_features: test_att}) 23 | test_id = np.squeeze(np.asarray(test_id)) 24 | outpre = [0] * test_visual.shape[0] # CUB 2933 25 | test_label = np.squeeze(np.asarray(test_label)) 26 | test_label = test_label.astype("float32") 27 | for i in range(test_visual.shape[0]): # CUB 2933 28 | outputLabel = kNN.kNNClassify(test_visual[i, :], att_pre, test_id, 1) 29 | outpre[i] = outputLabel 30 | # compute averaged per class accuracy 31 | outpre = np.array(outpre, dtype='int') 32 | unique_labels = np.unique(test_label) 33 | acc = 0 34 | for l in unique_labels: 35 | idx = np.nonzero(test_label == l)[0] 36 | acc += accuracy_score(test_label[idx], outpre[idx]) 37 | acc = acc / unique_labels.shape[0] 38 | return acc 39 | 40 | 41 | dataroot = './data/' 42 | dataset = 'SUN_data' 43 | image_embedding = 'res101' 44 | class_embedding = 'att' # original_att 45 | 46 | matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + image_embedding + ".mat") 47 | feature = matcontent['features'].T 48 | label = matcontent['labels'].astype(int).squeeze() - 1 49 | matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + class_embedding + "_splits.mat") 50 | # numpy array index starts from 0, matlab starts from 1 51 | trainval_loc = matcontent['trainval_loc'].squeeze() - 1 52 | test_seen_loc = matcontent['test_seen_loc'].squeeze() - 1 53 | test_unseen_loc = matcontent['test_unseen_loc'].squeeze() - 1 54 | 55 | attribute = matcontent['original_att'].T # att 56 | 57 | x = feature[trainval_loc] 58 | train_label = label[trainval_loc].astype(int) 59 | att = attribute[train_label] 60 | print(att.shape) 61 | x_test = feature[test_unseen_loc] 62 | test_label = label[test_unseen_loc].astype(int) 63 | x_test_seen = feature[test_seen_loc] 64 | test_label_seen = label[test_seen_loc].astype(int) 65 | test_id = np.unique(test_label) 66 | att_pro = attribute[test_id] 67 | 68 | 69 | def data_iterator(): 70 | """ A simple data iterator """ 71 | batch_idx = 0 72 | while True: 73 | # shuffle labels and features 74 | idxs = np.arange(0, len(x)) 75 | np.random.shuffle(idxs) 76 | shuf_visual = x[idxs] 77 | shuf_att = att[idxs] 78 | batch_size = 64 79 | for batch_idx in range(0, len(x), batch_size): 80 | visual_batch = shuf_visual[batch_idx:batch_idx + batch_size] 81 | visual_batch = visual_batch.astype("float32") 82 | att_batch = shuf_att[batch_idx:batch_idx + batch_size] 83 | yield att_batch, visual_batch 84 | 85 | 86 | # # Placeholder 87 | # define placeholder for inputs to network 88 | att_features = tf.placeholder(tf.float32, [None, 102]) 89 | visual_features = tf.placeholder(tf.float32, [None, 2048]) 90 | 91 | # # Network 92 | # AwA 85 1600 2048 ReLu, 1e-3 * regularisers, 64 batch, 0.0001 Adam 93 | W_left_a1 = weight_variable([102, 1600]) 94 | b_left_a1 = bias_variable([1600]) 95 | left_a1 = tf.nn.relu(tf.matmul(att_features, W_left_a1) + b_left_a1) 96 | 97 | W_left_a2 = weight_variable([1600, 2048]) 98 | b_left_a2 = bias_variable([2048]) 99 | left_a2 = tf.nn.relu(tf.matmul(left_a1, W_left_a2) + b_left_a2) 100 | 101 | # # loss 102 | loss_a = tf.reduce_mean(tf.square(left_a2 - visual_features)) 103 | 104 | # L2 regularisation for the fully connected parameters. 105 | regularisers_a = (tf.nn.l2_loss(W_left_a1) + tf.nn.l2_loss(b_left_a1) 106 | + tf.nn.l2_loss(W_left_a2) + tf.nn.l2_loss(b_left_a2)) 107 | 108 | loss_a += 1e-5 * regularisers_a 109 | 110 | train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_a) 111 | 112 | sess = tf.Session() 113 | sess.run(tf.global_variables_initializer()) 114 | 115 | # # Run 116 | iter_ = data_iterator() 117 | for i in range(1000000): 118 | att_batch_val, visual_batch_val = iter_.next() 119 | sess.run(train_step, feed_dict={att_features: att_batch_val, visual_features: visual_batch_val}) 120 | if i % 1000 == 0: 121 | acc_zsl = compute_accuracy(att_pro, x_test, test_id, test_label) 122 | acc_seen_gzsl = compute_accuracy(attribute, x_test_seen, np.arange(717), test_label_seen) 123 | acc_unseen_gzsl = compute_accuracy(attribute, x_test, np.arange(717), test_label) 124 | H = 2 * acc_seen_gzsl * acc_unseen_gzsl / (acc_seen_gzsl + acc_unseen_gzsl) 125 | print('zsl:', acc_zsl) 126 | print('gzsl: seen=%.4f, unseen=%.4f, h=%.4f' % (acc_seen_gzsl, acc_unseen_gzsl, H)) 127 | 128 | 129 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /AwA_fusion.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np, h5py 3 | import scipy.io as sio 4 | import sys 5 | import random 6 | import kNN 7 | import re 8 | from numpy import * 9 | 10 | def weight_variable(shape): 11 | initial = tf.truncated_normal(shape, stddev=0.1) 12 | return tf.Variable(initial) 13 | 14 | def bias_variable(shape): 15 | initial = tf.constant(0.1, shape=shape) 16 | return tf.Variable(initial) 17 | 18 | 19 | def compute_accuracy(test_att, test_word, test_visual, test_id, test_label): 20 | global center_1 21 | pre = sess.run(center_1, feed_dict={att_features: test_att, word_features: test_word}) 22 | test_id = np.squeeze(np.asarray(test_id)) 23 | outpre = [0]*6180 24 | test_label = np.squeeze(np.asarray(test_label)) 25 | test_label = test_label.astype("float32") 26 | for i in range(6180): 27 | outputLabel = kNN.kNNClassify(test_visual[i,:], pre, test_id, 1) 28 | outpre[i] = outputLabel 29 | correct_prediction = tf.equal(outpre, test_label) 30 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 31 | result = sess.run(accuracy, feed_dict={att_features: test_att, 32 | word_features: test_word, visual_features: test_visual}) 33 | return result 34 | 35 | 36 | # # data 37 | 38 | f=h5py.File('./data/AwA_data/attribute/Z_s_con.mat','r') 39 | att=np.array(f['Z_s_con']) 40 | att.shape 41 | 42 | f=sio.loadmat('./data/AwA_data/wordvector/train_word.mat') 43 | word=np.array(f['train_word']) 44 | word.shape 45 | 46 | f=sio.loadmat('./data/AwA_data/train_googlenet_bn.mat') 47 | x=np.array(f['train_googlenet_bn']) 48 | x.shape 49 | 50 | f=sio.loadmat('./data/AwA_data/test_googlenet_bn.mat') 51 | x_test=np.array(f['test_googlenet_bn']) 52 | x_test.shape 53 | 54 | f=sio.loadmat('./data/AwA_data/test_labels.mat') 55 | test_label=np.array(f['test_labels']) 56 | test_label.shape 57 | 58 | f=sio.loadmat('./data/AwA_data/testclasses_id.mat') 59 | test_id=np.array(f['testclasses_id']) 60 | test_id.shape 61 | 62 | f=sio.loadmat('./data/AwA_data/attribute/pca_te_con_10x85.mat') 63 | att_pro=np.array(f['pca_te_con_10x85']) 64 | att_pro.shape 65 | 66 | f=sio.loadmat('./data/AwA_data/wordvector/test_vectors.mat') 67 | word_pro=np.array(f['test_vectors']) 68 | word_pro.shape 69 | 70 | 71 | 72 | # # data shuffle 73 | def data_iterator(): 74 | """ A simple data iterator """ 75 | batch_idx = 0 76 | while True: 77 | # shuffle labels and features 78 | idxs = np.arange(0, len(x)) 79 | np.random.shuffle(idxs) 80 | shuf_visual = x[idxs] 81 | shuf_att = att[idxs] 82 | shuf_word = word[idxs] 83 | batch_size = 64 84 | for batch_idx in range(0, len(x), batch_size): 85 | visual_batch = shuf_visual[batch_idx:batch_idx+batch_size] 86 | visual_batch = visual_batch.astype("float32") 87 | att_batch = shuf_att[batch_idx:batch_idx+batch_size] 88 | word_batch = shuf_word[batch_idx:batch_idx+batch_size] 89 | yield att_batch, word_batch, visual_batch 90 | 91 | 92 | 93 | # # Placeholder 94 | 95 | # define placeholder for inputs to network 96 | att_features = tf.placeholder(tf.float32, [None, 85]) 97 | word_features = tf.placeholder(tf.float32, [None, 1000]) 98 | visual_features = tf.placeholder(tf.float32, [None, 1024]) 99 | 100 | 101 | # # Network 102 | 103 | W_left_w1 = weight_variable([1000, 900]) 104 | b_left_w1 = bias_variable([900]) 105 | left_w1 = tf.tanh(tf.matmul(word_features, W_left_w1) + b_left_w1) 106 | 107 | 108 | W_left_a1 = weight_variable([85, 900]) 109 | b_left_a1 = bias_variable([900]) 110 | left_a1 = tf.tanh(tf.matmul(att_features, W_left_a1) + b_left_a1) 111 | 112 | multimodal = left_w1 + 3 * left_a1 113 | 114 | W_center_1 = weight_variable([900, 1024]) 115 | b_center_1 = bias_variable([1024]) 116 | center_1 = tf.nn.relu((tf.matmul(multimodal, W_center_1) + b_center_1)) 117 | 118 | 119 | 120 | # # loss 121 | 122 | 123 | loss = tf.reduce_mean(tf.square(center_1 - visual_features)) 124 | 125 | # L2 regularisation for the fully connected parameters. 126 | regularisers_1 = tf.nn.l2_loss(W_left_a1) + tf.nn.l2_loss(b_left_a1) 127 | regularisers_2 = tf.nn.l2_loss(W_left_w1) + tf.nn.l2_loss(b_left_w1) 128 | regularisers_3 = tf.nn.l2_loss(W_center_1) + tf.nn.l2_loss(b_center_1) 129 | 130 | regularisers = 1e-2 * regularisers_1 + 1e-3 * regularisers_2 + 1e-2 * regularisers_3 131 | 132 | # Add the regularization term to the loss. 133 | loss += regularisers 134 | 135 | 136 | 137 | train_step = tf.train.AdamOptimizer(0.0001).minimize(loss) 138 | 139 | 140 | sess = tf.Session() 141 | sess.run(tf.global_variables_initializer()) 142 | 143 | 144 | # # Run 145 | iter_ = data_iterator() 146 | for i in range(1000000): 147 | att_batch_val, word_batch_val, visual_batch_val = iter_.next() 148 | sess.run(train_step, feed_dict={att_features: att_batch_val, 149 | word_features: word_batch_val, visual_features: visual_batch_val}) 150 | if i % 1000 == 0: 151 | print(compute_accuracy(att_pro, word_pro, x_test, test_id, test_label)) 152 | 153 | 154 | 155 | -------------------------------------------------------------------------------- /AwA1_GBU.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import scipy.io as sio 4 | import kNN 5 | import kNN_cosine 6 | from numpy import * 7 | from sklearn.metrics import accuracy_score 8 | 9 | def weight_variable(shape): 10 | initial = tf.truncated_normal(shape, stddev=0.1) 11 | return tf.Variable(initial) 12 | 13 | def bias_variable(shape): 14 | initial = tf.constant(0.1, shape=shape) 15 | return tf.Variable(initial) 16 | 17 | def compute_accuracy(test_att, test_visual, test_id, test_label): 18 | global left_a2 19 | att_pre = sess.run(left_a2, feed_dict={att_features: test_att}) 20 | test_id = np.squeeze(np.asarray(test_id)) 21 | outpre = [0]*test_visual.shape[0] # CUB 2933 22 | test_label = np.squeeze(np.asarray(test_label)) 23 | test_label = test_label.astype("float32") 24 | for i in range(test_visual.shape[0]): # CUB 2933 25 | outputLabel = kNN.kNNClassify(test_visual[i,:], att_pre, test_id, 1) 26 | outpre[i] = outputLabel 27 | #compute averaged per class accuracy 28 | outpre = np.array(outpre, dtype='int') 29 | unique_labels = np.unique(test_label) 30 | acc = 0 31 | for l in unique_labels: 32 | idx = np.nonzero(test_label == l)[0] 33 | acc += accuracy_score(test_label[idx], outpre[idx]) 34 | acc = acc / unique_labels.shape[0] 35 | return acc 36 | 37 | dataroot = './data/' 38 | dataset = 'AwA1_data' 39 | image_embedding = 'res101' 40 | class_embedding = 'original_att' 41 | 42 | matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + image_embedding + ".mat") 43 | feature = matcontent['features'].T 44 | label = matcontent['labels'].astype(int).squeeze() - 1 45 | matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + class_embedding + "_splits.mat") 46 | # numpy array index starts from 0, matlab starts from 1 47 | trainval_loc = matcontent['trainval_loc'].squeeze() - 1 48 | test_seen_loc = matcontent['test_seen_loc'].squeeze() - 1 49 | test_unseen_loc = matcontent['test_unseen_loc'].squeeze() - 1 50 | 51 | attribute = matcontent['att'].T 52 | 53 | x = feature[trainval_loc] 54 | train_label = label[trainval_loc].astype(int) 55 | att = attribute[train_label] 56 | print(att.shape) 57 | x_test = feature[test_unseen_loc] 58 | test_label = label[test_unseen_loc].astype(int) 59 | x_test_seen = feature[test_seen_loc] 60 | test_label_seen = label[test_seen_loc].astype(int) 61 | test_id = np.unique(test_label) 62 | att_pro = attribute[test_id] 63 | 64 | 65 | def data_iterator(): 66 | """ A simple data iterator """ 67 | batch_idx = 0 68 | while True: 69 | # shuffle labels and features 70 | idxs = np.arange(0, len(x)) 71 | np.random.shuffle(idxs) 72 | shuf_visual = x[idxs] 73 | shuf_att = att[idxs] 74 | batch_size = 64 75 | for batch_idx in range(0, len(x), batch_size): 76 | visual_batch = shuf_visual[batch_idx:batch_idx+batch_size] 77 | visual_batch = visual_batch.astype("float32") 78 | att_batch = shuf_att[batch_idx:batch_idx+batch_size] 79 | yield att_batch, visual_batch 80 | 81 | 82 | # # Placeholder 83 | # define placeholder for inputs to network 84 | att_features = tf.placeholder(tf.float32, [None, 85]) 85 | visual_features = tf.placeholder(tf.float32, [None, 2048]) 86 | 87 | 88 | # # Network 89 | # AwA 85 1600 2048 ReLu, 1e-3 * regularisers, 64 batch, 0.0001 Adam 90 | W_left_a1 = weight_variable([85, 1600]) 91 | b_left_a1 = bias_variable([1600]) 92 | left_a1 = tf.nn.relu(tf.matmul(att_features, W_left_a1) + b_left_a1) 93 | 94 | 95 | W_left_a2 = weight_variable([1600, 2048]) 96 | b_left_a2 = bias_variable([2048]) 97 | left_a2 = tf.nn.relu(tf.matmul(left_a1, W_left_a2) + b_left_a2) 98 | 99 | 100 | # # loss 101 | loss_a = tf.reduce_mean(tf.square(left_a2 - visual_features)) 102 | 103 | # L2 regularisation for the fully connected parameters. 104 | regularisers_a = (tf.nn.l2_loss(W_left_a1) + tf.nn.l2_loss(b_left_a1) 105 | + tf.nn.l2_loss(W_left_a2) + tf.nn.l2_loss(b_left_a2)) 106 | 107 | loss_a += 1e-3 * regularisers_a 108 | 109 | 110 | train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_a) 111 | 112 | 113 | sess = tf.Session() 114 | sess.run(tf.global_variables_initializer()) 115 | 116 | 117 | # # Run 118 | iter_ = data_iterator() 119 | for i in range(1000000): 120 | att_batch_val, visual_batch_val = iter_.next() 121 | sess.run(train_step, feed_dict={att_features: att_batch_val, visual_features: visual_batch_val}) 122 | if i % 1000 == 0: 123 | acc_zsl = compute_accuracy(att_pro, x_test, test_id, test_label) 124 | acc_seen_gzsl = compute_accuracy(attribute, x_test_seen, np.arange(50), test_label_seen) 125 | acc_unseen_gzsl = compute_accuracy(attribute, x_test, np.arange(50), test_label) 126 | H = 2 * acc_seen_gzsl * acc_unseen_gzsl / (acc_seen_gzsl + acc_unseen_gzsl) 127 | print('zsl:', acc_zsl) 128 | print('gzsl: seen=%.4f, unseen=%.4f, h=%.4f' % (acc_seen_gzsl, acc_unseen_gzsl, H)) 129 | 130 | 131 | 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /CUB1_GBU.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import scipy.io as sio 4 | import kNN 5 | import kNN_cosine 6 | from numpy import * 7 | from sklearn.metrics import accuracy_score 8 | 9 | def weight_variable(shape): 10 | initial = tf.truncated_normal(shape, stddev=0.1) 11 | return tf.Variable(initial) 12 | 13 | def bias_variable(shape): 14 | initial = tf.constant(0.1, shape=shape) 15 | return tf.Variable(initial) 16 | 17 | def compute_accuracy(test_att, test_visual, test_id, test_label): 18 | global left_a2 19 | att_pre = sess.run(left_a2, feed_dict={att_features: test_att}) 20 | test_id = np.squeeze(np.asarray(test_id)) 21 | outpre = [0]*test_visual.shape[0] # CUB 2933 22 | test_label = np.squeeze(np.asarray(test_label)) 23 | test_label = test_label.astype("float32") 24 | for i in range(test_visual.shape[0]): # CUB 2933 25 | outputLabel = kNN_cosine.kNNClassify(test_visual[i,:], att_pre, test_id, 1) 26 | outpre[i] = outputLabel 27 | #compute averaged per class accuracy 28 | outpre = np.array(outpre, dtype='int') 29 | unique_labels = np.unique(test_label) 30 | acc = 0 31 | for l in unique_labels: 32 | idx = np.nonzero(test_label == l)[0] 33 | acc += accuracy_score(test_label[idx], outpre[idx]) 34 | acc = acc / unique_labels.shape[0] 35 | return acc 36 | 37 | dataroot = './data/' 38 | dataset = 'CUB1_data' 39 | image_embedding = 'res101' 40 | class_embedding = 'original_att' 41 | 42 | matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + image_embedding + ".mat") 43 | feature = matcontent['features'].T 44 | label = matcontent['labels'].astype(int).squeeze() - 1 45 | matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + class_embedding + "_splits.mat") 46 | # numpy array index starts from 0, matlab starts from 1 47 | trainval_loc = matcontent['trainval_loc'].squeeze() - 1 48 | test_seen_loc = matcontent['test_seen_loc'].squeeze() - 1 49 | test_unseen_loc = matcontent['test_unseen_loc'].squeeze() - 1 50 | 51 | attribute = matcontent['att'].T 52 | 53 | x = feature[trainval_loc] 54 | train_label = label[trainval_loc].astype(int) 55 | att = attribute[train_label] 56 | print(att.shape) 57 | x_test = feature[test_unseen_loc] 58 | test_label = label[test_unseen_loc].astype(int) 59 | x_test_seen = feature[test_seen_loc] 60 | test_label_seen = label[test_seen_loc].astype(int) 61 | test_id = np.unique(test_label) 62 | att_pro = attribute[test_id] 63 | 64 | 65 | def data_iterator(): 66 | """ A simple data iterator """ 67 | batch_idx = 0 68 | while True: 69 | # shuffle labels and features 70 | idxs = np.arange(0, len(x)) 71 | np.random.shuffle(idxs) 72 | shuf_visual = x[idxs] 73 | shuf_att = att[idxs] 74 | batch_size = 100 75 | for batch_idx in range(0, len(x), batch_size): 76 | visual_batch = shuf_visual[batch_idx:batch_idx+batch_size] 77 | visual_batch = visual_batch.astype("float32") 78 | att_batch = shuf_att[batch_idx:batch_idx+batch_size] 79 | yield att_batch, visual_batch 80 | 81 | 82 | 83 | 84 | # # Placeholder 85 | # define placeholder for inputs to network 86 | att_features = tf.placeholder(tf.float32, [None, 312]) 87 | visual_features = tf.placeholder(tf.float32, [None, 2048]) 88 | 89 | 90 | # # Network 91 | # CUB 312 300 2048 ReLu, 1e-2 * regularisers, 64 batch, 0.00001 Adam 92 | W_left_a1 = weight_variable([312, 1200]) 93 | b_left_a1 = bias_variable([1200]) 94 | left_a1 = tf.nn.relu(tf.matmul(att_features, W_left_a1) + b_left_a1) 95 | 96 | 97 | W_left_a2 = weight_variable([1200, 2048]) 98 | b_left_a2 = bias_variable([2048]) 99 | left_a2 = tf.nn.relu(tf.matmul(left_a1, W_left_a2) + b_left_a2) 100 | 101 | 102 | # # loss 103 | loss = tf.reduce_mean(tf.square(left_a2 - visual_features)) 104 | 105 | # L2 regularisation for the fully connected parameters. 106 | regularisers = (tf.nn.l2_loss(W_left_a1) + tf.nn.l2_loss(b_left_a1) 107 | + tf.nn.l2_loss(W_left_a2) + tf.nn.l2_loss(b_left_a2)) 108 | 109 | loss += 1e-2 * regularisers 110 | 111 | train_step = tf.train.AdamOptimizer(0.00001).minimize(loss) 112 | 113 | 114 | sess = tf.Session() 115 | sess.run(tf.global_variables_initializer()) 116 | 117 | 118 | # # Run 119 | iter_ = data_iterator() 120 | for i in range(1000000): 121 | att_batch_val, visual_batch_val = iter_.next() 122 | sess.run(train_step, feed_dict={att_features: att_batch_val, visual_features: visual_batch_val}) 123 | if i % 1000 == 0: 124 | acc_zsl = compute_accuracy(att_pro, x_test, test_id, test_label) 125 | acc_seen_gzsl = compute_accuracy(attribute, x_test_seen, np.arange(200), test_label_seen) 126 | acc_unseen_gzsl = compute_accuracy(attribute, x_test, np.arange(200), test_label) 127 | H = 2 * acc_seen_gzsl * acc_unseen_gzsl / (acc_seen_gzsl + acc_unseen_gzsl) 128 | print('zsl:', acc_zsl) 129 | print('gzsl: seen=%.4f, unseen=%.4f, h=%.4f' % (acc_seen_gzsl, acc_unseen_gzsl, H)) 130 | 131 | 132 | 133 | 134 | 135 | 136 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepEmbeddingModel_ZSL 2 | Tensorflow code for CVPR 2017 paper: [Learning a Deep Embedding Model for Zero-Shot Learning](https://arxiv.org/abs/1611.05088) 3 | 4 | [Li Zhang](http://www.robots.ox.ac.uk/~lz/) 5 | 6 | # Requirement 7 | Python 2.7 8 | 9 | Tensorflow > 1.0 10 | 11 | # Data 12 | Download data from [here](http://www.robots.ox.ac.uk/~lz/DEM_cvpr2017/data.zip) and unzip it `unzip data.zip`. 13 | 14 | # Run 15 | `AwA_attribute.py` will give you ZSL performance on AwA with attribute. 16 | 17 | `AwA_wordvector.py` will give you ZSL performance on AwA with wordvector. 18 | 19 | `AwA_fusion.py` will give you ZSL performance on AwA with attribute and wordvector fusion. 20 | 21 | `CUB_attribute.py`will give you ZSL performance on CUB with attribute. 22 | 23 | # GBU setting 24 | 25 | ZSL and GZSL performance evaluated under GBU setting [1]: ResNet feature, GBU split, averaged per class accuracy. 26 | 27 | `AwA1_GBU.py` will give you ZSL and GZSL performance on AwA1 with attribute under GBU setting [1]. 28 | 29 | `AwA2_GBU.py` will give you ZSL and GZSL performance on AwA2 with attribute under GBU setting [1]. 30 | 31 | `CUB1_GBU.py` will give you ZSL and GZSL performance on CUB with attribute under GBU setting [1]. 32 | 33 | `aPY_GBU.py` will give you ZSL and GZSL performance on aPY with attribute under GBU setting [1]. 34 | 35 | `SUN_GBU.py` will give you ZSL and GZSL performance on SUN with attribute under GBU setting [1]. 36 | 37 | 38 | 39 | 40 | 41 | | Model | AwA1 T1 | u | s | H | CUB T1 | u | s | H | 42 | |------------|---------|---------|---------|---------|---------|---------|---------|---------| 43 | | DAP [2] | 44.1 | 0.0 | **88.7** | 0.0 | 40.0 | 1.7 | 67.9 | 3.3 | 44 | | CONSE [3] | 45.6 | 0.4 | 88.6 | 0.8 | 34.3 | 1.6 | **72.2** | 3.1 | 45 | | SSE [4] | 60.1 | 7.0 | 80.5 | 12.9 | 43.9 | 8.5 | 46.9 | 14.4 | 46 | | DEVISE [5] | 54.2 | 13.4 | 68.7 | 22.4 | 52.0 | **23.8** | 53.0 | 32.8 | 47 | | SJE [6] | 65.6 | 11.3 | 74.6 | 19.6 | 53.9 | 23.5 | 59.2 | 33.6 | 48 | | LATEM [7] | 55.1 | 7.3 | 71.7 | 13.3 | 49.3 | 15.2 | 57.3 | 24.0 | 49 | | ESZSL [8] | 58.2 | 6.6 | 75.6 | 12.1 | 53.9 | 12.6 | 63.8 | 21.0 | 50 | | ALE [9] | 59.9 | 16.8 | 76.1 | 27.5 | 54.9 | 23.7 | 62.8 | **34.4** | 51 | | SYNC [10] | 54.0 | 8.9 | 87.3 | 16.2 | **55.6** | 11.5 | 70.9 | 19.8 | 52 | | SAE [11] | 53.0 | 1.8 | 77.1 | 3.5 | 33.3 | 7.8 | 54.0 | 13.6 | 53 | | **DEM (OURS)** | **68.4** | **32.8** | 84.7 | **47.3** | 51.7 | 19.6 | 57.9 | 29.2 | 54 | 55 | 56 | | Model | AwA2 T1 | u | s | H | aPY T1 | u | s | H | 57 | |------------|---------|---------|---------|---------|---------|---------|---------|---------| 58 | | DAP [2] | 46.1 | 0.0 | 84.7 | 0.0 | 33.8 | 4.8 | 78.3 | 9.0 | 59 | | CONSE [3] | 44.5 | 0.5 | **90.6**| 1.0 | 26.9 | 0.0 |**91.2** | 0.0 | 60 | | SSE [4] | 61.0 | 8.1 | 82.5 | 14.8 | 34.0 | 0.2 | 78.9 | 0.4 | 61 | | DEVISE [5] | 59.7 | 17.1 | 74.7 | 27.8 | 39.8 | 4.9 | 76.9 | 9.2 | 62 | | SJE [6] | 61.9 | 8.0 | 73.9 | 14.4 | 32.9 | 3.7 | 55.7 | 6.9 | 63 | | LATEM [7] | 55.8 | 11.5 | 77.3 | 20.0 | 35.2 | 0.1 | 73.0 | 0.2 | 64 | | ESZSL [8] | 58.6 | 5.9 | 77.8 | 11.0 | 38.3 | 2.4 | 70.1 | 4.6 | 65 | | ALE [9] | 62.5 | 14.0 | 81.8 | 23.9 | 39.7 | 4.6 | 73.7 | 8.7 | 66 | | SYNC [10] | 46.6 | 10.0 | 90.5 | 18.0 | 23.9 | 7.4 | 66.3 | 13.3 | 67 | | SAE [11] | 54.1 | 1.1 | 82.2 | 2.2 | 8.3 | 0.4 | 80.9 | 0.9 | 68 | | **DEM (OURS)** | **67.1** | **30.5** | 86.4 | **45.1**|   35.0 | **11.1**| 75.1 |**19.4** | 69 | 70 | 71 | 72 | 73 | | Model | SUN T1 | u | s | H | 74 | |------------|---------|---------|---------|---------| 75 | | DAP [2] | 39.9 | 4.2 | 25.1 | 7.2 | 76 | | CONSE [3] | 38.8 | 6.8 | 39.9 | 11.6 | 77 | | SSE [4] | 51.5 | 2.1 | 36.4 | 4.0 | 78 | | DEVISE [5] | 56.5 | 16.9 | 27.4 | 20.9 | 79 | | SJE [6] | 53.7 | 14.7 | 30.5 | 19.8 | 80 | | LATEM [7] | 55.3 | 14.7 | 28.8 | 19.5 | 81 | | ESZSL [8] | 54.5 | 11.0 | 27.9 | 15.8 | 82 | | ALE [9] | 58.1 | **21.8** | 33.1 | **26.3** | 83 | | SYNC [10] | 56.3 | 7.9 | **43.3** | 13.4 | 84 | | SAE [11]       |   40.3 |   8.8   |   18.0 |   11.8  | 85 | | **DEM (OURS)** | **61.9** | 20.5 | 34.3 | 25.6 | 86 | 87 | 88 | 89 | # PyTorch implementation 90 | [DeepEmbeddingModel_ZSL-Pytorch](https://github.com/dragen1860/DeepEmbeddingModel_ZSL-Pytorch) 91 | 92 | ## Citing 93 | 94 | If you use this code in your research, please use the following BibTeX entry. 95 | 96 | ``` 97 | @inproceedings{zhang2017learning, 98 | title={Learning a deep embedding model for zero-shot learning}, 99 | author={Zhang, Li and Xiang, Tao and Gong, Shaogang}, 100 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, 101 | year={2017} 102 | } 103 | ``` 104 | 105 | ## References 106 | 107 | - [1] [Zero-Shot Learning - A Comprehensive Evaluation of the Good, the Bad and the Ugly](https://arxiv.org/abs/1707.00600). 108 | Yongqin Xian, Christoph H. Lampert, Bernt Schiele, Zeynep Akata. 109 | arXiv, 2017. 110 | - [2] [Attribute-Based Classification forZero-Shot Visual Object Categorization](https://cvml.ist.ac.at/papers/lampert-pami2013.pdf). 111 | Christoph H. Lampert, Hannes Nickisch and Stefan Harmeling. 112 | PAMI, 2014. 113 | - [3] [Zero-Shot Learning by Convex Combination of Semantic Embeddings](https://arxiv.org/abs/1312.5650). 114 | Mohammad Norouzi, Tomas Mikolov, Samy Bengio, Yoram Singer, Jonathon Shlens, Andrea Frome, Greg S. Corrado, Jeffrey Dean. 115 | arXiv, 2013. 116 | - [4] [Zero-Shot Learning via Semantic Similarity Embedding](https://arxiv.org/abs/1509.04767). 117 | Ziming Zhang, Venkatesh Saligrama. 118 | ICCV, 2015. 119 | - [5] [DeViSE: A Deep Visual-Semantic Embedding Model](http://papers.nips.cc/paper/5204-devise-a-deep-visual-semantic-embedding-model.pdf). 120 | Andrea Frome*, Greg S. Corrado*, Jonathon Shlens*, Samy BengioJeffrey Dean, Marc’Aurelio Ranzato, Tomas Mikolov. 121 | NIPS, 2013. 122 | - [6] [Evaluation of Output Embeddings for Fine-Grained Image Classification](https://arxiv.org/abs/1409.8403). 123 | Zeynep Akata, Scott Reed, Daniel Walter, Honglak Lee, Bernt Schiele. 124 | CVPR, 2015. 125 | - [7] [Latent Embeddings for Zero-shot Classification](https://arxiv.org/abs/1603.08895). 126 | Yongqin Xian, Zeynep Akata, Gaurav Sharma, Quynh Nguyen, Matthias Hein, Bernt Schiele 127 | CVPR, 2016. 128 | - [8] [An embarrassingly simple approach to zero-shot learning](http://proceedings.mlr.press/v37/romera-paredes15.pdf). 129 | Bernardino Romera-Paredes, Philip H. S. Torr. 130 | ICML, 2015. 131 | - [9] [Label-Embedding for Image Classification](https://arxiv.org/abs/1503.08677). 132 | Zeynep Akata, Florent Perronnin, Zaid Harchaoui, Cordelia Schmid. 133 | PAMI, 2016. 134 | - [10] [Synthesized Classifiers for Zero-Shot Learning](https://arxiv.org/abs/1603.00550). 135 | Soravit Changpinyo, Wei-Lun Chao, Boqing Gong, Fei Sha. 136 | CVPR, 2016. 137 | - [11] [Semantic Autoencoder for Zero-Shot Learning](https://arxiv.org/abs/1704.08345). 138 | Elyor Kodirov, Tao Xiang, Shaogang Gong. 139 | CVPR, 2017. 140 | --------------------------------------------------------------------------------