├── kNN.py
├── kNN_cosine.py
├── AwA_wordvector.py
├── CUB_attribute.py
├── AwA_attribute.py
├── AwA2_GBU.py
├── aPY_GBU.py
├── SUN_GBU.py
├── AwA_fusion.py
├── AwA1_GBU.py
├── CUB1_GBU.py
└── README.md


/kNN.py:
--------------------------------------------------------------------------------
 1 | #########################################  
 2 | # kNN: k Nearest Neighbors  
 3 |   
 4 | # Input:      newInput: vector to compare to existing dataset (1xN)  
 5 | #             dataSet:  size m data set of known vectors (NxM)  
 6 | #             labels:   data set labels (1xM vector)  
 7 | #             k:        number of neighbors to use for comparison   
 8 |               
 9 | # Output:     the most popular class label  
10 | #########################################  
11 |   
12 | from numpy import *  
13 | import operator  
14 | 
15 | # create a dataset which contains 4 samples with 2 classes  
16 | def createDataSet():  
17 |     # create a matrix: each row as a sample  
18 |     group = array([[1.0, 0.9], [1.0, 1.0], [0.1, 0.2], [0.0, 0.1]])  
19 |     labels = ['A', 'A', 'B', 'B'] # four samples and two classes  
20 |     return group, labels  
21 | 
22 | 
23 | # classify using kNN  
24 | def kNNClassify(newInput, dataSet, labels, k):  
25 |     numSamples = dataSet.shape[0] # shape[0] stands for the num of row  
26 |   
27 |     ## step 1: calculate Euclidean distance  
28 |     # tile(A, reps): Construct an array by repeating A reps times  
29 |     # the following copy numSamples rows for dataSet  
30 |     diff = tile(newInput, (numSamples, 1)) - dataSet # Subtract element-wise  
31 |     squaredDiff = diff ** 2 # squared for the subtract  
32 |     squaredDist = sum(squaredDiff, axis = 1) # sum is performed by row  
33 |     distance = squaredDist ** 0.5  
34 |   
35 |     ## step 2: sort the distance  
36 |     # argsort() returns the indices that would sort an array in a ascending order  
37 |     sortedDistIndices = argsort(distance)  
38 |   
39 |     classCount = {} # define a dictionary (can be append element)  
40 |     for i in xrange(k):  
41 |         ## step 3: choose the min k distance  
42 |         voteLabel = labels[sortedDistIndices[i]]  
43 |   
44 |         ## step 4: count the times labels occur  
45 |         # when the key voteLabel is not in dictionary classCount, get()  
46 |         # will return 0  
47 |         classCount[voteLabel] = classCount.get(voteLabel, 0) + 1  
48 |   
49 |     ## step 5: the max voted class will return  
50 |     maxCount = 0  
51 |     for key, value in classCount.items():  
52 |         if value > maxCount:  
53 |             maxCount = value  
54 |             maxIndex = key  
55 |   
56 |     return maxIndex
57 |     #return sortedDistIndices   
58 | 


--------------------------------------------------------------------------------
/kNN_cosine.py:
--------------------------------------------------------------------------------
 1 | #########################################  
 2 | # kNN: k Nearest Neighbors  
 3 |   
 4 | # Input:      newInput: vector to compare to existing dataset (1xN)  
 5 | #             dataSet:  size m data set of known vectors (NxM)  
 6 | #             labels:   data set labels (1xM vector)  
 7 | #             k:        number of neighbors to use for comparison   
 8 |               
 9 | # Output:     the most popular class label  
10 | #########################################  
11 |   
12 | from numpy import *  
13 | import operator  
14 | import math
15 | import tensorflow as tf
16 | import numpy as np
17 | 
18 | # create a dataset which contains 4 samples with 2 classes  
19 | def createDataSet():  
20 |     # create a matrix: each row as a sample  
21 |     group = array([[1.0, 0.9], [1.0, 1.0], [0.1, 0.2], [0.0, 0.1]])  
22 |     labels = ['A', 'A', 'B', 'B'] # four samples and two classes  
23 |     return group, labels 
24 |  
25 | def cosine_distance(v1,v2):
26 |     "compute cosine similarity of v1 to v2: (v1 dot v2)/{||v1||*||v2||)"
27 | 
28 |     v1_sq =  np.inner(v1,v1)
29 |     v2_sq =  np.inner(v2,v2)
30 |     dis = 1 - np.inner(v1,v2) / math.sqrt(v1_sq * v2_sq)
31 |     return dis
32 |    
33 | 
34 | # classify using kNN  
35 | def kNNClassify(newInput, dataSet, labels, k): 
36 |     global distance 
37 |     distance = [0]* dataSet.shape[0]
38 |     for i in range(dataSet.shape[0]):
39 |         distance[i] = cosine_distance(newInput, dataSet[i])
40 |   
41 |   
42 |     ## step 2: sort the distance  
43 |     # argsort() returns the indices that would sort an array in a ascending order  
44 |     sortedDistIndices = argsort(distance)  
45 |   
46 |     classCount = {} # define a dictionary (can be append element)  
47 |     for i in xrange(k):  
48 |         ## step 3: choose the min k distance  
49 |         voteLabel = labels[sortedDistIndices[i]]  
50 |   
51 |         ## step 4: count the times labels occur  
52 |         # when the key voteLabel is not in dictionary classCount, get()  
53 |         # will return 0  
54 |         classCount[voteLabel] = classCount.get(voteLabel, 0) + 1  
55 |   
56 |     ## step 5: the max voted class will return  
57 |     maxCount = 0  
58 |     for key, value in classCount.items():  
59 |         if value > maxCount:  
60 |             maxCount = value  
61 |             maxIndex = key  
62 |   
63 |     return maxIndex
64 |     #return sortedDistIndices   
65 | 


--------------------------------------------------------------------------------
/AwA_wordvector.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np, h5py
  3 | import scipy.io as sio
  4 | import sys
  5 | import random
  6 | import kNN_cosine
  7 | import re
  8 | from numpy import *   
  9 | 
 10 | def weight_variable(shape):
 11 |     initial = tf.truncated_normal(shape, stddev=0.1)
 12 |     return tf.Variable(initial)
 13 | 
 14 | def bias_variable(shape):
 15 |     initial = tf.constant(0.1, shape=shape)
 16 |     return tf.Variable(initial)
 17 | 
 18 | 
 19 | def compute_accuracy(test_word, test_visual, test_id, test_label):
 20 |     global left_w1     
 21 |     word_pre = sess.run(left_w1, feed_dict={word_features: test_word})
 22 |     test_id = np.squeeze(np.asarray(test_id))
 23 |     outpre = [0]*6180  
 24 |     test_label = np.squeeze(np.asarray(test_label))
 25 |     test_label = test_label.astype("float32")
 26 |     for i in range(6180):  
 27 |         outputLabel = kNN_cosine.kNNClassify(test_visual[i,:], word_pre, test_id, 1) 
 28 |         outpre[i] = outputLabel
 29 |     correct_prediction = tf.equal(outpre, test_label)
 30 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
 31 |     result = sess.run(accuracy, feed_dict={
 32 |                                            word_features: test_word, visual_features: test_visual})
 33 |     return result
 34 | 
 35 | 
 36 | # # data
 37 | 
 38 | f=sio.loadmat('./data/AwA_data/wordvector/train_word.mat')
 39 | word=np.array(f['train_word'])
 40 | word.shape
 41 | 
 42 | f=sio.loadmat('./data/AwA_data/train_googlenet_bn.mat')
 43 | x=np.array(f['train_googlenet_bn'])
 44 | x.shape
 45 | 
 46 | f=sio.loadmat('./data/AwA_data/test_googlenet_bn.mat')
 47 | x_test=np.array(f['test_googlenet_bn'])
 48 | x_test.shape
 49 | 
 50 | f=sio.loadmat('./data/AwA_data/test_labels.mat')
 51 | test_label=np.array(f['test_labels'])
 52 | test_label.shape
 53 | 
 54 | f=sio.loadmat('./data/AwA_data/testclasses_id.mat')
 55 | test_id=np.array(f['testclasses_id'])
 56 | test_id.shape
 57 | 
 58 | f=sio.loadmat('./data/AwA_data/wordvector/test_vectors.mat')
 59 | word_pro=np.array(f['test_vectors'])
 60 | word_pro.shape
 61 | 
 62 | 
 63 | 
 64 | # # data shuffle
 65 | def data_iterator():
 66 |     """ A simple data iterator """
 67 |     batch_idx = 0
 68 |     while True:
 69 |         # shuffle labels and features
 70 |         idxs = np.arange(0, len(x))
 71 |         np.random.shuffle(idxs)
 72 |         shuf_visual = x[idxs]
 73 |         shuf_word = word[idxs]
 74 |         batch_size = 64
 75 |         for batch_idx in range(0, len(x), batch_size):
 76 |             visual_batch = shuf_visual[batch_idx:batch_idx+batch_size]
 77 |             visual_batch = visual_batch.astype("float32")
 78 |             word_batch = shuf_word[batch_idx:batch_idx+batch_size]
 79 |             yield word_batch, visual_batch 
 80 |             
 81 | 
 82 | 
 83 | 
 84 | # # Placeholder
 85 | # define placeholder for inputs to network
 86 | word_features = tf.placeholder(tf.float32, [None, 1000])
 87 | visual_features = tf.placeholder(tf.float32, [None, 1024])
 88 | 
 89 | 
 90 | # # Network
 91 | # AwA 1000 1024 ReLu, 1e-3 * regularisers, 64 batch, 0.0001 Adam
 92 | W_left_w1 = weight_variable([1000, 1024])
 93 | b_left_w1 = bias_variable([1024])
 94 | left_w1 = tf.nn.relu(tf.matmul(word_features, W_left_w1) + b_left_w1)
 95 | 
 96 | 
 97 | # # loss
 98 | loss_w = tf.reduce_mean(tf.square(left_w1 - visual_features))
 99 | 
100 | # L2 regularisation for the fully connected parameters.             
101 | regularisers_w = (tf.nn.l2_loss(W_left_w1) + tf.nn.l2_loss(b_left_w1))
102 |                   
103 |                   
104 | # Add the regularisation term to the loss.
105 | loss_w += 1e-3 * regularisers_w
106 | 
107 | 
108 | train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_w)
109 | 
110 | 
111 | 
112 | sess = tf.Session()
113 | sess.run(tf.global_variables_initializer())
114 | 
115 | 
116 | # # Run
117 | iter_ = data_iterator()
118 | for i in range(1000000):
119 |     word_batch_val, visual_batch_val = iter_.next()
120 |     sess.run(train_step, feed_dict={word_features: word_batch_val, visual_features: visual_batch_val})
121 |     if i % 1000 == 0:
122 |         print(compute_accuracy(word_pro, x_test, test_id, test_label))
123 | 
124 | 
125 | 
126 | 


--------------------------------------------------------------------------------
/CUB_attribute.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np, h5py
  3 | import scipy.io as sio
  4 | import sys
  5 | import random
  6 | import kNN
  7 | import re
  8 | import os
  9 | from numpy import *   
 10 | 
 11 | 
 12 | def weight_variable(shape):
 13 |     initial = tf.truncated_normal(shape, stddev=0.1)
 14 |     return tf.Variable(initial)
 15 | 
 16 | def bias_variable(shape):
 17 |     initial = tf.constant(0.1, shape=shape)
 18 |     return tf.Variable(initial)
 19 | 
 20 | 
 21 | def compute_accuracy(test_att, test_visual, test_id, test_label):
 22 |     global left_a2 
 23 |     att_pre = sess.run(left_a2, feed_dict={att_features: test_att})
 24 |     test_id = np.squeeze(np.asarray(test_id))
 25 |     outpre = [0]*2933 
 26 |     test_label = np.squeeze(np.asarray(test_label))
 27 |     test_label = test_label.astype("float32")
 28 |     for i in range(2933): 
 29 |         outputLabel = kNN.kNNClassify(test_visual[i,:], att_pre, test_id, 1) 
 30 |         outpre[i] = outputLabel
 31 |     correct_prediction = tf.equal(outpre, test_label)
 32 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
 33 |     result = sess.run(accuracy, feed_dict={att_features: test_att, visual_features: test_visual})
 34 |     return result
 35 | 
 36 | 
 37 | f=sio.loadmat('./data/CUB_data/train_attr.mat')
 38 | att=np.array(f['train_attr'])
 39 | att.shape
 40 | 
 41 | f=sio.loadmat('./data/CUB_data/train_cub_googlenet_bn.mat')
 42 | x=np.array(f['train_cub_googlenet_bn'])
 43 | x.shape
 44 | 
 45 | f=sio.loadmat('./data/CUB_data/test_cub_googlenet_bn.mat')
 46 | x_test=np.array(f['test_cub_googlenet_bn'])
 47 | x_test.shape
 48 | 
 49 | f=sio.loadmat('./data/CUB_data/test_labels_cub.mat')
 50 | test_label=np.array(f['test_labels_cub'])
 51 | test_label.shape
 52 | 
 53 | f=sio.loadmat('./data/CUB_data/testclasses_id.mat')
 54 | test_id=np.array(f['testclasses_id'])
 55 | 
 56 | f=sio.loadmat('./data/CUB_data/test_proto.mat')
 57 | att_pro=np.array(f['test_proto'])
 58 | 
 59 | 
 60 | # # data shuffle
 61 | def data_iterator():
 62 |     """ A simple data iterator """
 63 |     batch_idx = 0
 64 |     while True:
 65 |         # shuffle labels and features
 66 |         idxs = np.arange(0, len(x))
 67 |         np.random.shuffle(idxs)
 68 |         shuf_visual = x[idxs]
 69 |         shuf_att = att[idxs]
 70 |         batch_size = 100
 71 |         for batch_idx in range(0, len(x), batch_size):
 72 |             visual_batch = shuf_visual[batch_idx:batch_idx+batch_size]
 73 |             visual_batch = visual_batch.astype("float32")
 74 |             att_batch = shuf_att[batch_idx:batch_idx+batch_size]
 75 |             yield att_batch, visual_batch
 76 |             
 77 | 
 78 | 
 79 | 
 80 | # # Placeholder
 81 | # define placeholder for inputs to network
 82 | att_features = tf.placeholder(tf.float32, [None, 312])
 83 | visual_features = tf.placeholder(tf.float32, [None, 1024])
 84 | 
 85 | 
 86 | # # Network
 87 | 
 88 | # CUB 312 700 1024 ReLu, 1e-2 * regularisers, 100 batch, 0.00001 Adam
 89 | W_left_a1 = weight_variable([312, 700])
 90 | b_left_a1 = bias_variable([700])
 91 | left_a1 = tf.nn.relu(tf.matmul(att_features, W_left_a1) + b_left_a1)
 92 | 
 93 | W_left_a2 = weight_variable([700, 1024])
 94 | b_left_a2 = bias_variable([1024])
 95 | left_a2 = tf.nn.relu(tf.matmul(left_a1, W_left_a2) + b_left_a2)
 96 | 
 97 | 
 98 | # # loss
 99 | 
100 | loss_a = tf.reduce_mean(tf.square(left_a2 - visual_features))
101 | 
102 | # L2 regularisation for the fully connected parameters.
103 | regularizers_a = (tf.nn.l2_loss(W_left_a1) + tf.nn.l2_loss(b_left_a1)
104 |                 + tf.nn.l2_loss(W_left_a2) + tf.nn.l2_loss(b_left_a2))
105 |                
106 | 
107 |                   
108 | # Add the regularization term to the loss.
109 | loss_a += 1e-2 * regularizers_a
110 | 
111 | 
112 | 
113 | train_step = tf.train.AdamOptimizer(0.00001).minimize(loss_a)
114 | 
115 | sess = tf.Session()
116 | sess.run(tf.global_variables_initializer())
117 | 
118 | 
119 | # # Run
120 | iter_ = data_iterator()
121 | for i in range(1000000):
122 |     att_batch_val, visual_batch_val = iter_.next()
123 |     sess.run(train_step, feed_dict={att_features: att_batch_val, visual_features: visual_batch_val})
124 |     if i % 1000 == 0:
125 |         print(compute_accuracy(att_pro, x_test, test_id, test_label))
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 


--------------------------------------------------------------------------------
/AwA_attribute.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np, h5py
  3 | import scipy.io as sio
  4 | import sys
  5 | import random
  6 | import kNN
  7 | import re
  8 | from numpy import *   
  9 | 
 10 | def weight_variable(shape):
 11 |     initial = tf.truncated_normal(shape, stddev=0.1)
 12 |     return tf.Variable(initial)
 13 | 
 14 | def bias_variable(shape):
 15 |     initial = tf.constant(0.1, shape=shape)
 16 |     return tf.Variable(initial)
 17 | 
 18 | 
 19 | def compute_accuracy(test_att, test_visual, test_id, test_label):
 20 |     global left_a2  
 21 |     att_pre = sess.run(left_a2, feed_dict={att_features: test_att})
 22 |     test_id = np.squeeze(np.asarray(test_id))
 23 |     outpre = [0]*6180  # CUB 2933
 24 |     test_label = np.squeeze(np.asarray(test_label))
 25 |     test_label = test_label.astype("float32")
 26 |     for i in range(6180):  # CUB 2933
 27 |         outputLabel = kNN.kNNClassify(test_visual[i,:], att_pre, test_id, 1) 
 28 |         outpre[i] = outputLabel
 29 |     correct_prediction = tf.equal(outpre, test_label)
 30 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
 31 |     result = sess.run(accuracy, feed_dict={
 32 |                                            att_features: test_att, visual_features: test_visual})
 33 |     return result
 34 | 
 35 | 
 36 | # # data
 37 | 
 38 | f=h5py.File('./data/AwA_data/attribute/Z_s_con.mat','r')
 39 | att=np.array(f['Z_s_con'])
 40 | att.shape
 41 | 
 42 | f=sio.loadmat('./data/AwA_data/train_googlenet_bn.mat')
 43 | x=np.array(f['train_googlenet_bn'])
 44 | x.shape
 45 | 
 46 | f=sio.loadmat('./data/AwA_data/test_googlenet_bn.mat')
 47 | x_test=np.array(f['test_googlenet_bn'])
 48 | x_test.shape
 49 | 
 50 | f=sio.loadmat('./data/AwA_data/test_labels.mat')
 51 | test_label=np.array(f['test_labels'])
 52 | test_label.shape
 53 | 
 54 | f=sio.loadmat('./data/AwA_data/testclasses_id.mat')
 55 | test_id=np.array(f['testclasses_id'])
 56 | test_id.shape
 57 | 
 58 | f=sio.loadmat('./data/AwA_data/attribute/pca_te_con_10x85.mat')
 59 | att_pro=np.array(f['pca_te_con_10x85'])
 60 | att_pro.shape
 61 | 
 62 | 
 63 | 
 64 | # # data shuffle
 65 | def data_iterator():
 66 |     """ A simple data iterator """
 67 |     batch_idx = 0
 68 |     while True:
 69 |         # shuffle labels and features
 70 |         idxs = np.arange(0, len(x))
 71 |         np.random.shuffle(idxs)
 72 |         shuf_visual = x[idxs]
 73 |         shuf_att = att[idxs]
 74 |         batch_size = 64
 75 |         for batch_idx in range(0, len(x), batch_size):
 76 |             visual_batch = shuf_visual[batch_idx:batch_idx+batch_size]
 77 |             visual_batch = visual_batch.astype("float32")
 78 |             att_batch = shuf_att[batch_idx:batch_idx+batch_size]
 79 |             yield att_batch, visual_batch
 80 |             
 81 | 
 82 | 
 83 | 
 84 | # # Placeholder
 85 | # define placeholder for inputs to network
 86 | att_features = tf.placeholder(tf.float32, [None, 85])
 87 | visual_features = tf.placeholder(tf.float32, [None, 1024])
 88 | 
 89 | 
 90 | # # Network
 91 | # AwA 85 300 1024 ReLu, 1e-2 * regularisers, 64 batch, 0.0001 Adam
 92 | W_left_a1 = weight_variable([85, 300])
 93 | b_left_a1 = bias_variable([300])
 94 | left_a1 = tf.nn.relu(tf.matmul(att_features, W_left_a1) + b_left_a1)
 95 | 
 96 | 
 97 | W_left_a2 = weight_variable([300, 1024])
 98 | b_left_a2 = bias_variable([1024])
 99 | left_a2 = tf.nn.relu(tf.matmul(left_a1, W_left_a2) + b_left_a2)
100 | 
101 | 
102 | # # loss
103 | loss_a = tf.reduce_mean(tf.square(left_a2 - visual_features))    
104 | 
105 | # L2 regularisation for the fully connected parameters.
106 | regularisers_a = (tf.nn.l2_loss(W_left_a1) + tf.nn.l2_loss(b_left_a1)
107 |                 + tf.nn.l2_loss(W_left_a2) + tf.nn.l2_loss(b_left_a2))
108 | 
109 | # Add the regularisation term to the loss.            
110 | loss_a += 1e-2 * regularisers_a
111 | 
112 | 
113 | 
114 | train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_a)
115 | 
116 | 
117 | 
118 | sess = tf.Session()
119 | sess.run(tf.global_variables_initializer())
120 | 
121 | 
122 | # # Run
123 | iter_ = data_iterator()
124 | for i in range(1000000):
125 |     att_batch_val, visual_batch_val = iter_.next()
126 |     sess.run(train_step, feed_dict={att_features: att_batch_val, visual_features: visual_batch_val})
127 |     if i % 1000 == 0:
128 |         print(compute_accuracy(att_pro, x_test, test_id, test_label))
129 | 
130 | 
131 | 
132 | 
133 | 
134 | 


--------------------------------------------------------------------------------
/AwA2_GBU.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import scipy.io as sio
  4 | import kNN
  5 | import kNN_cosine
  6 | from numpy import *
  7 | from sklearn.metrics import accuracy_score
  8 | 
  9 | 
 10 | def weight_variable(shape):
 11 |     initial = tf.truncated_normal(shape, stddev=0.1)
 12 |     return tf.Variable(initial)
 13 | 
 14 | 
 15 | def bias_variable(shape):
 16 |     initial = tf.constant(0.1, shape=shape)
 17 |     return tf.Variable(initial)
 18 | 
 19 | 
 20 | def compute_accuracy(test_att, test_visual, test_id, test_label):
 21 |     global left_a2
 22 |     att_pre = sess.run(left_a2, feed_dict={att_features: test_att})
 23 |     test_id = np.squeeze(np.asarray(test_id))
 24 |     outpre = [0] * test_visual.shape[0]  # CUB 2933
 25 |     test_label = np.squeeze(np.asarray(test_label))
 26 |     test_label = test_label.astype("float32")
 27 |     for i in range(test_visual.shape[0]):  # CUB 2933
 28 |         outputLabel = kNN.kNNClassify(test_visual[i, :], att_pre, test_id, 1)
 29 |         outpre[i] = outputLabel
 30 |     # compute averaged per class accuracy
 31 |     outpre = np.array(outpre, dtype='int')
 32 |     unique_labels = np.unique(test_label)
 33 |     acc = 0
 34 |     for l in unique_labels:
 35 |         idx = np.nonzero(test_label == l)[0]
 36 |         acc += accuracy_score(test_label[idx], outpre[idx])
 37 |     acc = acc / unique_labels.shape[0]
 38 |     return acc
 39 | 
 40 | 
 41 | dataroot = './data/'
 42 | dataset = 'AwA2_data'
 43 | image_embedding = 'res101'
 44 | class_embedding = 'att' # original_att
 45 | 
 46 | matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + image_embedding + ".mat")
 47 | feature = matcontent['features'].T
 48 | label = matcontent['labels'].astype(int).squeeze() - 1
 49 | matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + class_embedding + "_splits.mat")
 50 | # numpy array index starts from 0, matlab starts from 1
 51 | trainval_loc = matcontent['trainval_loc'].squeeze() - 1
 52 | test_seen_loc = matcontent['test_seen_loc'].squeeze() - 1
 53 | test_unseen_loc = matcontent['test_unseen_loc'].squeeze() - 1
 54 | 
 55 | attribute = matcontent['original_att'].T # att
 56 | 
 57 | x = feature[trainval_loc]
 58 | train_label = label[trainval_loc].astype(int)
 59 | att = attribute[train_label]
 60 | print(att.shape)
 61 | x_test = feature[test_unseen_loc]
 62 | test_label = label[test_unseen_loc].astype(int)
 63 | x_test_seen = feature[test_seen_loc]
 64 | test_label_seen = label[test_seen_loc].astype(int)
 65 | test_id = np.unique(test_label)
 66 | att_pro = attribute[test_id]
 67 | 
 68 | 
 69 | def data_iterator():
 70 |     """ A simple data iterator """
 71 |     batch_idx = 0
 72 |     while True:
 73 |         # shuffle labels and features
 74 |         idxs = np.arange(0, len(x))
 75 |         np.random.shuffle(idxs)
 76 |         shuf_visual = x[idxs]
 77 |         shuf_att = att[idxs]
 78 |         batch_size = 64
 79 |         for batch_idx in range(0, len(x), batch_size):
 80 |             visual_batch = shuf_visual[batch_idx:batch_idx + batch_size]
 81 |             visual_batch = visual_batch.astype("float32")
 82 |             att_batch = shuf_att[batch_idx:batch_idx + batch_size]
 83 |             yield att_batch, visual_batch
 84 | 
 85 | 
 86 | # # Placeholder
 87 | # define placeholder for inputs to network
 88 | att_features = tf.placeholder(tf.float32, [None, 85])
 89 | visual_features = tf.placeholder(tf.float32, [None, 2048])
 90 | 
 91 | # # Network
 92 | # AwA 85 1600 2048 ReLu, 1e-3 * regularisers, 64 batch, 0.0001 Adam
 93 | W_left_a1 = weight_variable([85, 1600])
 94 | b_left_a1 = bias_variable([1600])
 95 | left_a1 = tf.nn.relu(tf.matmul(att_features, W_left_a1) + b_left_a1)
 96 | 
 97 | W_left_a2 = weight_variable([1600, 2048])
 98 | b_left_a2 = bias_variable([2048])
 99 | left_a2 = tf.nn.relu(tf.matmul(left_a1, W_left_a2) + b_left_a2)
100 | 
101 | # # loss
102 | loss_a = tf.reduce_mean(tf.square(left_a2 - visual_features))
103 | 
104 | # L2 regularisation for the fully connected parameters.
105 | regularisers_a = (tf.nn.l2_loss(W_left_a1) + tf.nn.l2_loss(b_left_a1)
106 |                   + tf.nn.l2_loss(W_left_a2) + tf.nn.l2_loss(b_left_a2))
107 | 
108 | loss_a += 1e-3 * regularisers_a
109 | 
110 | train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_a)
111 | 
112 | sess = tf.Session()
113 | sess.run(tf.global_variables_initializer())
114 | 
115 | # # Run
116 | iter_ = data_iterator()
117 | for i in range(1000000):
118 |     att_batch_val, visual_batch_val = iter_.next()
119 |     sess.run(train_step, feed_dict={att_features: att_batch_val, visual_features: visual_batch_val})
120 |     if i % 1000 == 0:
121 |         acc_zsl = compute_accuracy(att_pro, x_test, test_id, test_label)
122 |         acc_seen_gzsl = compute_accuracy(attribute, x_test_seen, np.arange(50), test_label_seen)
123 |         acc_unseen_gzsl = compute_accuracy(attribute, x_test, np.arange(50), test_label)
124 |         H = 2 * acc_seen_gzsl * acc_unseen_gzsl / (acc_seen_gzsl + acc_unseen_gzsl)
125 |         print('zsl:', acc_zsl)
126 |         print('gzsl: seen=%.4f, unseen=%.4f, h=%.4f' % (acc_seen_gzsl, acc_unseen_gzsl, H))
127 | 
128 | 
129 | 
130 | 
131 | 
132 | 
133 | 


--------------------------------------------------------------------------------
/aPY_GBU.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import scipy.io as sio
  4 | import kNN
  5 | import kNN_cosine
  6 | from numpy import *
  7 | from sklearn.metrics import accuracy_score
  8 | 
  9 | 
 10 | def weight_variable(shape):
 11 |     initial = tf.truncated_normal(shape, stddev=0.1)
 12 |     return tf.Variable(initial)
 13 | 
 14 | 
 15 | def bias_variable(shape):
 16 |     initial = tf.constant(0.1, shape=shape)
 17 |     return tf.Variable(initial)
 18 | 
 19 | 
 20 | def compute_accuracy(test_att, test_visual, test_id, test_label):
 21 |     global left_a2
 22 |     att_pre = sess.run(left_a2, feed_dict={att_features: test_att})
 23 |     test_id = np.squeeze(np.asarray(test_id))
 24 |     outpre = [0] * test_visual.shape[0]  # CUB 2933
 25 |     test_label = np.squeeze(np.asarray(test_label))
 26 |     test_label = test_label.astype("float32")
 27 |     for i in range(test_visual.shape[0]):  # CUB 2933
 28 |         outputLabel = kNN.kNNClassify(test_visual[i, :], att_pre, test_id, 1)
 29 |         outpre[i] = outputLabel
 30 |     # compute averaged per class accuracy
 31 |     outpre = np.array(outpre, dtype='int')
 32 |     unique_labels = np.unique(test_label)
 33 |     acc = 0
 34 |     for l in unique_labels:
 35 |         idx = np.nonzero(test_label == l)[0]
 36 |         acc += accuracy_score(test_label[idx], outpre[idx])
 37 |     acc = acc / unique_labels.shape[0]
 38 |     return acc
 39 | 
 40 | 
 41 | dataroot = './data/'
 42 | dataset = 'APY_data'
 43 | image_embedding = 'res101'
 44 | class_embedding = 'att' # original_att
 45 | 
 46 | matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + image_embedding + ".mat")
 47 | feature = matcontent['features'].T
 48 | label = matcontent['labels'].astype(int).squeeze() - 1
 49 | matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + class_embedding + "_splits.mat")
 50 | # numpy array index starts from 0, matlab starts from 1
 51 | trainval_loc = matcontent['trainval_loc'].squeeze() - 1
 52 | test_seen_loc = matcontent['test_seen_loc'].squeeze() - 1
 53 | test_unseen_loc = matcontent['test_unseen_loc'].squeeze() - 1
 54 | 
 55 | attribute = matcontent['original_att'].T # att
 56 | 
 57 | x = feature[trainval_loc]
 58 | train_label = label[trainval_loc].astype(int)
 59 | att = attribute[train_label]
 60 | print(att.shape)
 61 | x_test = feature[test_unseen_loc]
 62 | test_label = label[test_unseen_loc].astype(int)
 63 | x_test_seen = feature[test_seen_loc]
 64 | test_label_seen = label[test_seen_loc].astype(int)
 65 | test_id = np.unique(test_label)
 66 | att_pro = attribute[test_id]
 67 | 
 68 | 
 69 | def data_iterator():
 70 |     """ A simple data iterator """
 71 |     batch_idx = 0
 72 |     while True:
 73 |         # shuffle labels and features
 74 |         idxs = np.arange(0, len(x))
 75 |         np.random.shuffle(idxs)
 76 |         shuf_visual = x[idxs]
 77 |         shuf_att = att[idxs]
 78 |         batch_size = 64
 79 |         for batch_idx in range(0, len(x), batch_size):
 80 |             visual_batch = shuf_visual[batch_idx:batch_idx + batch_size]
 81 |             visual_batch = visual_batch.astype("float32")
 82 |             att_batch = shuf_att[batch_idx:batch_idx + batch_size]
 83 |             yield att_batch, visual_batch
 84 | 
 85 | 
 86 | # # Placeholder
 87 | # define placeholder for inputs to network
 88 | att_features = tf.placeholder(tf.float32, [None, 64])
 89 | visual_features = tf.placeholder(tf.float32, [None, 2048])
 90 | 
 91 | # # Network
 92 | # AwA 85 1600 2048 ReLu, 1e-3 * regularisers, 64 batch, 0.0001 Adam
 93 | W_left_a1 = weight_variable([64, 1600])
 94 | b_left_a1 = bias_variable([1600])
 95 | left_a1 = tf.nn.relu(tf.matmul(att_features, W_left_a1) + b_left_a1)
 96 | 
 97 | W_left_a2 = weight_variable([1600, 2048])
 98 | b_left_a2 = bias_variable([2048])
 99 | left_a2 = tf.nn.relu(tf.matmul(left_a1, W_left_a2) + b_left_a2)
100 | 
101 | # # loss
102 | loss_a = tf.reduce_mean(tf.square(left_a2 - visual_features))
103 | 
104 | # L2 regularisation for the fully connected parameters.
105 | regularisers_a = (tf.nn.l2_loss(W_left_a1) + tf.nn.l2_loss(b_left_a1)
106 |                   + tf.nn.l2_loss(W_left_a2) + tf.nn.l2_loss(b_left_a2))
107 | 
108 | loss_a += 1e-4 * regularisers_a
109 | 
110 | train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_a)
111 | 
112 | sess = tf.Session()
113 | sess.run(tf.global_variables_initializer())
114 | 
115 | # # Run
116 | iter_ = data_iterator()
117 | for i in range(1000000):
118 |     att_batch_val, visual_batch_val = iter_.next()
119 |     sess.run(train_step, feed_dict={att_features: att_batch_val, visual_features: visual_batch_val})
120 |     if i % 1000 == 0:
121 |         acc_zsl = compute_accuracy(att_pro, x_test, test_id, test_label)
122 |         acc_seen_gzsl = compute_accuracy(attribute, x_test_seen, np.arange(32), test_label_seen)
123 |         acc_unseen_gzsl = compute_accuracy(attribute, x_test, np.arange(32), test_label)
124 |         H = 2 * acc_seen_gzsl * acc_unseen_gzsl / (acc_seen_gzsl + acc_unseen_gzsl)
125 |         print('zsl:', acc_zsl)
126 |         print('gzsl: seen=%.4f, unseen=%.4f, h=%.4f' % (acc_seen_gzsl, acc_unseen_gzsl, H))
127 | 
128 | 
129 | 
130 | 
131 | 
132 | 
133 | 


--------------------------------------------------------------------------------
/SUN_GBU.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import scipy.io as sio
  4 | import kNN
  5 | import kNN_cosine
  6 | from numpy import *
  7 | from sklearn.metrics import accuracy_score
  8 | 
  9 | 
 10 | def weight_variable(shape):
 11 |     initial = tf.truncated_normal(shape, stddev=0.1)
 12 |     return tf.Variable(initial)
 13 | 
 14 | 
 15 | def bias_variable(shape):
 16 |     initial = tf.constant(0.1, shape=shape)
 17 |     return tf.Variable(initial)
 18 | 
 19 | 
 20 | def compute_accuracy(test_att, test_visual, test_id, test_label):
 21 |     global left_a2
 22 |     att_pre = sess.run(left_a2, feed_dict={att_features: test_att})
 23 |     test_id = np.squeeze(np.asarray(test_id))
 24 |     outpre = [0] * test_visual.shape[0]  # CUB 2933
 25 |     test_label = np.squeeze(np.asarray(test_label))
 26 |     test_label = test_label.astype("float32")
 27 |     for i in range(test_visual.shape[0]):  # CUB 2933
 28 |         outputLabel = kNN.kNNClassify(test_visual[i, :], att_pre, test_id, 1)
 29 |         outpre[i] = outputLabel
 30 |     # compute averaged per class accuracy
 31 |     outpre = np.array(outpre, dtype='int')
 32 |     unique_labels = np.unique(test_label)
 33 |     acc = 0
 34 |     for l in unique_labels:
 35 |         idx = np.nonzero(test_label == l)[0]
 36 |         acc += accuracy_score(test_label[idx], outpre[idx])
 37 |     acc = acc / unique_labels.shape[0]
 38 |     return acc
 39 | 
 40 | 
 41 | dataroot = './data/'
 42 | dataset = 'SUN_data'
 43 | image_embedding = 'res101'
 44 | class_embedding = 'att' # original_att
 45 | 
 46 | matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + image_embedding + ".mat")
 47 | feature = matcontent['features'].T
 48 | label = matcontent['labels'].astype(int).squeeze() - 1
 49 | matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + class_embedding + "_splits.mat")
 50 | # numpy array index starts from 0, matlab starts from 1
 51 | trainval_loc = matcontent['trainval_loc'].squeeze() - 1
 52 | test_seen_loc = matcontent['test_seen_loc'].squeeze() - 1
 53 | test_unseen_loc = matcontent['test_unseen_loc'].squeeze() - 1
 54 | 
 55 | attribute = matcontent['original_att'].T # att
 56 | 
 57 | x = feature[trainval_loc]
 58 | train_label = label[trainval_loc].astype(int)
 59 | att = attribute[train_label]
 60 | print(att.shape)
 61 | x_test = feature[test_unseen_loc]
 62 | test_label = label[test_unseen_loc].astype(int)
 63 | x_test_seen = feature[test_seen_loc]
 64 | test_label_seen = label[test_seen_loc].astype(int)
 65 | test_id = np.unique(test_label)
 66 | att_pro = attribute[test_id]
 67 | 
 68 | 
 69 | def data_iterator():
 70 |     """ A simple data iterator """
 71 |     batch_idx = 0
 72 |     while True:
 73 |         # shuffle labels and features
 74 |         idxs = np.arange(0, len(x))
 75 |         np.random.shuffle(idxs)
 76 |         shuf_visual = x[idxs]
 77 |         shuf_att = att[idxs]
 78 |         batch_size = 64
 79 |         for batch_idx in range(0, len(x), batch_size):
 80 |             visual_batch = shuf_visual[batch_idx:batch_idx + batch_size]
 81 |             visual_batch = visual_batch.astype("float32")
 82 |             att_batch = shuf_att[batch_idx:batch_idx + batch_size]
 83 |             yield att_batch, visual_batch
 84 | 
 85 | 
 86 | # # Placeholder
 87 | # define placeholder for inputs to network
 88 | att_features = tf.placeholder(tf.float32, [None, 102])
 89 | visual_features = tf.placeholder(tf.float32, [None, 2048])
 90 | 
 91 | # # Network
 92 | # AwA 85 1600 2048 ReLu, 1e-3 * regularisers, 64 batch, 0.0001 Adam
 93 | W_left_a1 = weight_variable([102, 1600])
 94 | b_left_a1 = bias_variable([1600])
 95 | left_a1 = tf.nn.relu(tf.matmul(att_features, W_left_a1) + b_left_a1)
 96 | 
 97 | W_left_a2 = weight_variable([1600, 2048])
 98 | b_left_a2 = bias_variable([2048])
 99 | left_a2 = tf.nn.relu(tf.matmul(left_a1, W_left_a2) + b_left_a2)
100 | 
101 | # # loss
102 | loss_a = tf.reduce_mean(tf.square(left_a2 - visual_features))
103 | 
104 | # L2 regularisation for the fully connected parameters.
105 | regularisers_a = (tf.nn.l2_loss(W_left_a1) + tf.nn.l2_loss(b_left_a1)
106 |                   + tf.nn.l2_loss(W_left_a2) + tf.nn.l2_loss(b_left_a2))
107 | 
108 | loss_a += 1e-5 * regularisers_a
109 | 
110 | train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_a)
111 | 
112 | sess = tf.Session()
113 | sess.run(tf.global_variables_initializer())
114 | 
115 | # # Run
116 | iter_ = data_iterator()
117 | for i in range(1000000):
118 |     att_batch_val, visual_batch_val = iter_.next()
119 |     sess.run(train_step, feed_dict={att_features: att_batch_val, visual_features: visual_batch_val})
120 |     if i % 1000 == 0:
121 |         acc_zsl = compute_accuracy(att_pro, x_test, test_id, test_label)
122 |         acc_seen_gzsl = compute_accuracy(attribute, x_test_seen, np.arange(717), test_label_seen)
123 |         acc_unseen_gzsl = compute_accuracy(attribute, x_test, np.arange(717), test_label)
124 |         H = 2 * acc_seen_gzsl * acc_unseen_gzsl / (acc_seen_gzsl + acc_unseen_gzsl)
125 |         print('zsl:', acc_zsl)
126 |         print('gzsl: seen=%.4f, unseen=%.4f, h=%.4f' % (acc_seen_gzsl, acc_unseen_gzsl, H))
127 | 
128 | 
129 | 
130 | 
131 | 
132 | 
133 | 


--------------------------------------------------------------------------------
/AwA_fusion.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np, h5py
  3 | import scipy.io as sio
  4 | import sys
  5 | import random
  6 | import kNN
  7 | import re
  8 | from numpy import *   
  9 | 
 10 | def weight_variable(shape):
 11 |     initial = tf.truncated_normal(shape, stddev=0.1)
 12 |     return tf.Variable(initial)
 13 | 
 14 | def bias_variable(shape):
 15 |     initial = tf.constant(0.1, shape=shape)
 16 |     return tf.Variable(initial)
 17 | 
 18 | 
 19 | def compute_accuracy(test_att, test_word, test_visual, test_id, test_label):
 20 |     global center_1
 21 |     pre = sess.run(center_1, feed_dict={att_features: test_att, word_features: test_word})
 22 |     test_id = np.squeeze(np.asarray(test_id))
 23 |     outpre = [0]*6180  
 24 |     test_label = np.squeeze(np.asarray(test_label))
 25 |     test_label = test_label.astype("float32")
 26 |     for i in range(6180):  
 27 |         outputLabel = kNN.kNNClassify(test_visual[i,:], pre, test_id, 1) 
 28 |         outpre[i] = outputLabel
 29 |     correct_prediction = tf.equal(outpre, test_label)
 30 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
 31 |     result = sess.run(accuracy, feed_dict={att_features: test_att, 
 32 |                                            word_features: test_word, visual_features: test_visual})
 33 |     return result
 34 | 
 35 | 
 36 | # # data
 37 | 
 38 | f=h5py.File('./data/AwA_data/attribute/Z_s_con.mat','r')
 39 | att=np.array(f['Z_s_con'])
 40 | att.shape
 41 | 
 42 | f=sio.loadmat('./data/AwA_data/wordvector/train_word.mat')
 43 | word=np.array(f['train_word'])
 44 | word.shape
 45 | 
 46 | f=sio.loadmat('./data/AwA_data/train_googlenet_bn.mat')
 47 | x=np.array(f['train_googlenet_bn'])
 48 | x.shape
 49 | 
 50 | f=sio.loadmat('./data/AwA_data/test_googlenet_bn.mat')
 51 | x_test=np.array(f['test_googlenet_bn'])
 52 | x_test.shape
 53 | 
 54 | f=sio.loadmat('./data/AwA_data/test_labels.mat')
 55 | test_label=np.array(f['test_labels'])
 56 | test_label.shape
 57 | 
 58 | f=sio.loadmat('./data/AwA_data/testclasses_id.mat')
 59 | test_id=np.array(f['testclasses_id'])
 60 | test_id.shape
 61 | 
 62 | f=sio.loadmat('./data/AwA_data/attribute/pca_te_con_10x85.mat')
 63 | att_pro=np.array(f['pca_te_con_10x85'])
 64 | att_pro.shape
 65 | 
 66 | f=sio.loadmat('./data/AwA_data/wordvector/test_vectors.mat')
 67 | word_pro=np.array(f['test_vectors'])
 68 | word_pro.shape
 69 | 
 70 | 
 71 | 
 72 | # # data shuffle
 73 | def data_iterator():
 74 |     """ A simple data iterator """
 75 |     batch_idx = 0
 76 |     while True:
 77 |         # shuffle labels and features
 78 |         idxs = np.arange(0, len(x))
 79 |         np.random.shuffle(idxs)
 80 |         shuf_visual = x[idxs]
 81 |         shuf_att = att[idxs]
 82 |         shuf_word = word[idxs]
 83 |         batch_size = 64
 84 |         for batch_idx in range(0, len(x), batch_size):
 85 |             visual_batch = shuf_visual[batch_idx:batch_idx+batch_size]
 86 |             visual_batch = visual_batch.astype("float32")
 87 |             att_batch = shuf_att[batch_idx:batch_idx+batch_size]
 88 |             word_batch = shuf_word[batch_idx:batch_idx+batch_size]
 89 |             yield att_batch, word_batch, visual_batch
 90 | 
 91 | 
 92 | 
 93 | # # Placeholder
 94 | 
 95 | # define placeholder for inputs to network
 96 | att_features = tf.placeholder(tf.float32, [None, 85])
 97 | word_features = tf.placeholder(tf.float32, [None, 1000])
 98 | visual_features = tf.placeholder(tf.float32, [None, 1024])
 99 | 
100 | 
101 | # # Network
102 | 
103 | W_left_w1 = weight_variable([1000, 900])
104 | b_left_w1 = bias_variable([900])
105 | left_w1 = tf.tanh(tf.matmul(word_features, W_left_w1) + b_left_w1)
106 | 
107 | 
108 | W_left_a1 = weight_variable([85, 900])
109 | b_left_a1 = bias_variable([900])
110 | left_a1 = tf.tanh(tf.matmul(att_features, W_left_a1) + b_left_a1)
111 | 
112 | multimodal =   left_w1 +  3 * left_a1
113 | 
114 | W_center_1 = weight_variable([900, 1024])
115 | b_center_1 = bias_variable([1024])
116 | center_1 = tf.nn.relu((tf.matmul(multimodal, W_center_1) + b_center_1))
117 | 
118 | 
119 | 
120 | # # loss
121 | 
122 | 
123 | loss = tf.reduce_mean(tf.square(center_1 - visual_features))
124 | 
125 | # L2 regularisation for the fully connected parameters.
126 | regularisers_1 = tf.nn.l2_loss(W_left_a1) + tf.nn.l2_loss(b_left_a1)
127 | regularisers_2 = tf.nn.l2_loss(W_left_w1) + tf.nn.l2_loss(b_left_w1)
128 | regularisers_3 = tf.nn.l2_loss(W_center_1) + tf.nn.l2_loss(b_center_1)
129 |                   
130 | regularisers =  1e-2 * regularisers_1 + 1e-3 * regularisers_2 + 1e-2 * regularisers_3 
131 |         
132 | # Add the regularization term to the loss.
133 | loss += regularisers
134 | 
135 | 
136 | 
137 | train_step = tf.train.AdamOptimizer(0.0001).minimize(loss)
138 | 
139 | 
140 | sess = tf.Session()
141 | sess.run(tf.global_variables_initializer())
142 | 
143 | 
144 | # # Run
145 | iter_ = data_iterator()
146 | for i in range(1000000):
147 |     att_batch_val, word_batch_val, visual_batch_val = iter_.next()
148 |     sess.run(train_step, feed_dict={att_features: att_batch_val, 
149 |                                     word_features: word_batch_val, visual_features: visual_batch_val})
150 |     if i % 1000 == 0:
151 |         print(compute_accuracy(att_pro, word_pro, x_test, test_id, test_label))
152 | 
153 | 
154 | 
155 | 


--------------------------------------------------------------------------------
/AwA1_GBU.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import scipy.io as sio
  4 | import kNN
  5 | import kNN_cosine
  6 | from numpy import *   
  7 | from sklearn.metrics import accuracy_score
  8 | 
  9 | def weight_variable(shape):
 10 |     initial = tf.truncated_normal(shape, stddev=0.1)
 11 |     return tf.Variable(initial)
 12 | 
 13 | def bias_variable(shape):
 14 |     initial = tf.constant(0.1, shape=shape)
 15 |     return tf.Variable(initial)
 16 | 
 17 | def compute_accuracy(test_att, test_visual, test_id, test_label):
 18 |     global left_a2  
 19 |     att_pre = sess.run(left_a2, feed_dict={att_features: test_att})
 20 |     test_id = np.squeeze(np.asarray(test_id))
 21 |     outpre = [0]*test_visual.shape[0]  # CUB 2933
 22 |     test_label = np.squeeze(np.asarray(test_label))
 23 |     test_label = test_label.astype("float32")
 24 |     for i in range(test_visual.shape[0]):  # CUB 2933
 25 |         outputLabel = kNN.kNNClassify(test_visual[i,:], att_pre, test_id, 1)
 26 |         outpre[i] = outputLabel
 27 |     #compute averaged per class accuracy
 28 |     outpre = np.array(outpre, dtype='int')
 29 |     unique_labels = np.unique(test_label)
 30 |     acc = 0
 31 |     for l in unique_labels:
 32 |         idx = np.nonzero(test_label == l)[0]
 33 |         acc += accuracy_score(test_label[idx], outpre[idx])
 34 |     acc = acc / unique_labels.shape[0]
 35 |     return acc
 36 | 
 37 | dataroot = './data/'
 38 | dataset = 'AwA1_data'
 39 | image_embedding = 'res101' 
 40 | class_embedding = 'original_att'
 41 | 
 42 | matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + image_embedding + ".mat")
 43 | feature = matcontent['features'].T
 44 | label = matcontent['labels'].astype(int).squeeze() - 1
 45 | matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + class_embedding + "_splits.mat")
 46 | # numpy array index starts from 0, matlab starts from 1
 47 | trainval_loc = matcontent['trainval_loc'].squeeze() - 1
 48 | test_seen_loc = matcontent['test_seen_loc'].squeeze() - 1
 49 | test_unseen_loc = matcontent['test_unseen_loc'].squeeze() - 1
 50 |   
 51 | attribute = matcontent['att'].T 
 52 | 
 53 | x = feature[trainval_loc] 
 54 | train_label = label[trainval_loc].astype(int) 
 55 | att = attribute[train_label]
 56 | print(att.shape)
 57 | x_test = feature[test_unseen_loc] 
 58 | test_label = label[test_unseen_loc].astype(int)
 59 | x_test_seen = feature[test_seen_loc] 
 60 | test_label_seen = label[test_seen_loc].astype(int)
 61 | test_id = np.unique(test_label)
 62 | att_pro = attribute[test_id]
 63 | 
 64 | 
 65 | def data_iterator():
 66 |     """ A simple data iterator """
 67 |     batch_idx = 0
 68 |     while True:
 69 |         # shuffle labels and features
 70 |         idxs = np.arange(0, len(x))
 71 |         np.random.shuffle(idxs)
 72 |         shuf_visual = x[idxs]
 73 |         shuf_att = att[idxs]
 74 |         batch_size = 64
 75 |         for batch_idx in range(0, len(x), batch_size):
 76 |             visual_batch = shuf_visual[batch_idx:batch_idx+batch_size]
 77 |             visual_batch = visual_batch.astype("float32")
 78 |             att_batch = shuf_att[batch_idx:batch_idx+batch_size]
 79 |             yield att_batch, visual_batch
 80 | 
 81 | 
 82 | # # Placeholder
 83 | # define placeholder for inputs to network
 84 | att_features = tf.placeholder(tf.float32, [None, 85])
 85 | visual_features = tf.placeholder(tf.float32, [None, 2048])
 86 | 
 87 | 
 88 | # # Network
 89 | # AwA 85 1600 2048 ReLu, 1e-3 * regularisers, 64 batch, 0.0001 Adam
 90 | W_left_a1 = weight_variable([85, 1600])
 91 | b_left_a1 = bias_variable([1600])
 92 | left_a1 = tf.nn.relu(tf.matmul(att_features, W_left_a1) + b_left_a1)
 93 | 
 94 | 
 95 | W_left_a2 = weight_variable([1600, 2048])
 96 | b_left_a2 = bias_variable([2048])
 97 | left_a2 = tf.nn.relu(tf.matmul(left_a1, W_left_a2) + b_left_a2)
 98 | 
 99 | 
100 | # # loss
101 | loss_a = tf.reduce_mean(tf.square(left_a2 - visual_features))    
102 | 
103 | # L2 regularisation for the fully connected parameters.
104 | regularisers_a = (tf.nn.l2_loss(W_left_a1) + tf.nn.l2_loss(b_left_a1)
105 |                 + tf.nn.l2_loss(W_left_a2) + tf.nn.l2_loss(b_left_a2))
106 |                
107 | loss_a += 1e-3 * regularisers_a
108 | 
109 | 
110 | train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_a)
111 | 
112 | 
113 | sess = tf.Session()
114 | sess.run(tf.global_variables_initializer())
115 | 
116 | 
117 | # # Run
118 | iter_ = data_iterator()
119 | for i in range(1000000):
120 |     att_batch_val, visual_batch_val = iter_.next()
121 |     sess.run(train_step, feed_dict={att_features: att_batch_val, visual_features: visual_batch_val})
122 |     if i % 1000 == 0:
123 |         acc_zsl = compute_accuracy(att_pro, x_test, test_id, test_label)
124 |         acc_seen_gzsl = compute_accuracy(attribute, x_test_seen, np.arange(50), test_label_seen)
125 |         acc_unseen_gzsl = compute_accuracy(attribute, x_test, np.arange(50), test_label)
126 |         H = 2 * acc_seen_gzsl * acc_unseen_gzsl / (acc_seen_gzsl + acc_unseen_gzsl)
127 |         print('zsl:', acc_zsl)
128 |         print('gzsl: seen=%.4f, unseen=%.4f, h=%.4f' % (acc_seen_gzsl, acc_unseen_gzsl, H))
129 |         
130 | 
131 | 
132 | 
133 | 
134 | 
135 | 


--------------------------------------------------------------------------------
/CUB1_GBU.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import scipy.io as sio
  4 | import kNN
  5 | import kNN_cosine
  6 | from numpy import *   
  7 | from sklearn.metrics import accuracy_score
  8 | 
  9 | def weight_variable(shape):
 10 |     initial = tf.truncated_normal(shape, stddev=0.1)
 11 |     return tf.Variable(initial)
 12 | 
 13 | def bias_variable(shape):
 14 |     initial = tf.constant(0.1, shape=shape)
 15 |     return tf.Variable(initial)
 16 | 
 17 | def compute_accuracy(test_att, test_visual, test_id, test_label):
 18 |     global left_a2  
 19 |     att_pre = sess.run(left_a2, feed_dict={att_features: test_att})
 20 |     test_id = np.squeeze(np.asarray(test_id))
 21 |     outpre = [0]*test_visual.shape[0]  # CUB 2933
 22 |     test_label = np.squeeze(np.asarray(test_label))
 23 |     test_label = test_label.astype("float32")
 24 |     for i in range(test_visual.shape[0]):  # CUB 2933
 25 |         outputLabel = kNN_cosine.kNNClassify(test_visual[i,:], att_pre, test_id, 1)
 26 |         outpre[i] = outputLabel
 27 |     #compute averaged per class accuracy
 28 |     outpre = np.array(outpre, dtype='int')
 29 |     unique_labels = np.unique(test_label)
 30 |     acc = 0
 31 |     for l in unique_labels:
 32 |         idx = np.nonzero(test_label == l)[0]
 33 |         acc += accuracy_score(test_label[idx], outpre[idx])
 34 |     acc = acc / unique_labels.shape[0]
 35 |     return acc
 36 | 
 37 | dataroot = './data/'
 38 | dataset = 'CUB1_data'
 39 | image_embedding = 'res101' 
 40 | class_embedding = 'original_att'
 41 | 
 42 | matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + image_embedding + ".mat")
 43 | feature = matcontent['features'].T
 44 | label = matcontent['labels'].astype(int).squeeze() - 1
 45 | matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + class_embedding + "_splits.mat")
 46 | # numpy array index starts from 0, matlab starts from 1
 47 | trainval_loc = matcontent['trainval_loc'].squeeze() - 1
 48 | test_seen_loc = matcontent['test_seen_loc'].squeeze() - 1
 49 | test_unseen_loc = matcontent['test_unseen_loc'].squeeze() - 1
 50 |   
 51 | attribute = matcontent['att'].T 
 52 | 
 53 | x = feature[trainval_loc] 
 54 | train_label = label[trainval_loc].astype(int) 
 55 | att = attribute[train_label]
 56 | print(att.shape)
 57 | x_test = feature[test_unseen_loc] 
 58 | test_label = label[test_unseen_loc].astype(int)
 59 | x_test_seen = feature[test_seen_loc] 
 60 | test_label_seen = label[test_seen_loc].astype(int)
 61 | test_id = np.unique(test_label)
 62 | att_pro = attribute[test_id]
 63 | 
 64 | 
 65 | def data_iterator():
 66 |     """ A simple data iterator """
 67 |     batch_idx = 0
 68 |     while True:
 69 |         # shuffle labels and features
 70 |         idxs = np.arange(0, len(x))
 71 |         np.random.shuffle(idxs)
 72 |         shuf_visual = x[idxs]
 73 |         shuf_att = att[idxs]
 74 |         batch_size = 100
 75 |         for batch_idx in range(0, len(x), batch_size):
 76 |             visual_batch = shuf_visual[batch_idx:batch_idx+batch_size]
 77 |             visual_batch = visual_batch.astype("float32")
 78 |             att_batch = shuf_att[batch_idx:batch_idx+batch_size]
 79 |             yield att_batch, visual_batch
 80 |             
 81 | 
 82 | 
 83 | 
 84 | # # Placeholder
 85 | # define placeholder for inputs to network
 86 | att_features = tf.placeholder(tf.float32, [None, 312])
 87 | visual_features = tf.placeholder(tf.float32, [None, 2048])
 88 | 
 89 | 
 90 | # # Network
 91 | # CUB 312 300 2048 ReLu, 1e-2 * regularisers, 64 batch, 0.00001 Adam
 92 | W_left_a1 = weight_variable([312, 1200])
 93 | b_left_a1 = bias_variable([1200])
 94 | left_a1 = tf.nn.relu(tf.matmul(att_features, W_left_a1) + b_left_a1)
 95 | 
 96 | 
 97 | W_left_a2 = weight_variable([1200, 2048])
 98 | b_left_a2 = bias_variable([2048])
 99 | left_a2 = tf.nn.relu(tf.matmul(left_a1, W_left_a2) + b_left_a2)
100 | 
101 | 
102 | # # loss
103 | loss = tf.reduce_mean(tf.square(left_a2 - visual_features))
104 | 
105 | # L2 regularisation for the fully connected parameters.
106 | regularisers = (tf.nn.l2_loss(W_left_a1) + tf.nn.l2_loss(b_left_a1)
107 |                 + tf.nn.l2_loss(W_left_a2) + tf.nn.l2_loss(b_left_a2))
108 |                
109 | loss += 1e-2 * regularisers
110 | 
111 | train_step = tf.train.AdamOptimizer(0.00001).minimize(loss)
112 | 
113 | 
114 | sess = tf.Session()
115 | sess.run(tf.global_variables_initializer())
116 | 
117 | 
118 | # # Run
119 | iter_ = data_iterator()
120 | for i in range(1000000):
121 |     att_batch_val, visual_batch_val = iter_.next()
122 |     sess.run(train_step, feed_dict={att_features: att_batch_val, visual_features: visual_batch_val})
123 |     if i % 1000 == 0:
124 |         acc_zsl = compute_accuracy(att_pro, x_test, test_id, test_label)
125 |         acc_seen_gzsl = compute_accuracy(attribute, x_test_seen, np.arange(200), test_label_seen)
126 |         acc_unseen_gzsl = compute_accuracy(attribute, x_test, np.arange(200), test_label)
127 |         H = 2 * acc_seen_gzsl * acc_unseen_gzsl / (acc_seen_gzsl + acc_unseen_gzsl)
128 |         print('zsl:', acc_zsl)
129 |         print('gzsl: seen=%.4f, unseen=%.4f, h=%.4f' % (acc_seen_gzsl, acc_unseen_gzsl, H))
130 |         
131 | 
132 | 
133 | 
134 | 
135 | 
136 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # DeepEmbeddingModel_ZSL
  2 | Tensorflow code for CVPR 2017 paper: [Learning a Deep Embedding Model for Zero-Shot Learning](https://arxiv.org/abs/1611.05088)
  3 | 
  4 | [Li Zhang](http://www.robots.ox.ac.uk/~lz/)
  5 | 
  6 | # Requirement
  7 | Python 2.7
  8 | 
  9 | Tensorflow > 1.0
 10 | 
 11 | # Data
 12 | Download data from [here](http://www.robots.ox.ac.uk/~lz/DEM_cvpr2017/data.zip) and unzip it `unzip data.zip`.
 13 | 
 14 | # Run
 15 | `AwA_attribute.py` will give you ZSL performance on AwA with attribute.
 16 | 
 17 | `AwA_wordvector.py` will give you ZSL performance on AwA with wordvector.
 18 | 
 19 | `AwA_fusion.py` will give you ZSL performance on AwA with attribute and wordvector fusion.
 20 | 
 21 | `CUB_attribute.py`will give you ZSL performance on CUB with attribute.
 22 | 
 23 | # GBU setting
 24 | 
 25 | ZSL and GZSL performance evaluated under GBU setting [1]: ResNet feature, GBU split, averaged per class accuracy.
 26 | 
 27 | `AwA1_GBU.py` will give you ZSL and GZSL performance on AwA1 with attribute under GBU setting [1].
 28 | 
 29 | `AwA2_GBU.py` will give you ZSL and GZSL performance on AwA2 with attribute under GBU setting [1].
 30 | 
 31 | `CUB1_GBU.py` will give you ZSL and GZSL performance on CUB with attribute under GBU setting [1].
 32 | 
 33 | `aPY_GBU.py` will give you ZSL and GZSL performance on aPY with attribute under GBU setting [1].
 34 | 
 35 | `SUN_GBU.py` will give you ZSL and GZSL performance on SUN with attribute under GBU setting [1].
 36 | 
 37 | 
 38 | 
 39 | 
 40 | 
 41 | | Model      |   AwA1 T1    |    u    |    s    |    H    |   CUB T1    |    u    |    s    |    H    |
 42 | |------------|---------|---------|---------|---------|---------|---------|---------|---------|
 43 | | DAP [2]      |   44.1  |   0.0   |   **88.7**  |   0.0   |   40.0  |   1.7   |   67.9  |   3.3   |
 44 | | CONSE [3]     |   45.6  |   0.4   |   88.6  |   0.8   |   34.3  |   1.6   |   **72.2**  |   3.1   |
 45 | | SSE [4]       |   60.1  |   7.0   |   80.5  |   12.9  |   43.9  |   8.5   |   46.9  |   14.4  |
 46 | | DEVISE [5]    |   54.2  |   13.4  |   68.7  |   22.4  |   52.0  |   **23.8**  |   53.0  |   32.8  |
 47 | | SJE [6]       |   65.6  |   11.3  |   74.6  |   19.6  |   53.9  |   23.5  |   59.2  |   33.6  |
 48 | | LATEM [7]     |   55.1  |   7.3   |   71.7  |   13.3  |   49.3  |   15.2  |   57.3  |   24.0  |
 49 | | ESZSL [8]     |   58.2  |   6.6   |   75.6  |   12.1  |   53.9  |   12.6  |   63.8  |   21.0  |
 50 | | ALE [9]       |   59.9  |   16.8  |   76.1  |   27.5  |   54.9  |   23.7  |   62.8  |   **34.4**  |
 51 | | SYNC [10]      |   54.0  |   8.9   |   87.3  |   16.2  |   **55.6**  |   11.5  |   70.9  |   19.8  |
 52 | | SAE [11]       |   53.0  |   1.8   |   77.1  |   3.5   |   33.3  |   7.8   |   54.0  |   13.6  |
 53 | | **DEM (OURS)** | **68.4** | **32.8** | 84.7  |  **47.3** | 51.7  |   19.6  |  57.9  |  29.2 |
 54 | 
 55 | 
 56 | | Model      |   AwA2 T1    |    u    |    s    |    H    |   aPY T1    |    u    |    s    |    H    |
 57 | |------------|---------|---------|---------|---------|---------|---------|---------|---------|
 58 | | DAP [2]      |   46.1  |   0.0    |   84.7  |   0.0   |   33.8  |   4.8   |   78.3  |   9.0   |
 59 | | CONSE [3]     |   44.5  |   0.5   | **90.6**|   1.0   |   26.9  |   0.0   |**91.2** |   0.0   |
 60 | | SSE [4]       |   61.0  |   8.1   |   82.5  |   14.8  |   34.0  |   0.2   |   78.9  |   0.4   |
 61 | | DEVISE [5]    |   59.7  |   17.1  |   74.7  |   27.8  |   39.8  |   4.9   |   76.9  |   9.2   |
 62 | | SJE [6]       |   61.9  |   8.0   |   73.9  |   14.4  |   32.9  |   3.7   |   55.7  |   6.9   |
 63 | | LATEM [7]     |   55.8  |   11.5  |   77.3  |   20.0  |   35.2  |   0.1   |   73.0  |   0.2   |
 64 | | ESZSL [8]     |   58.6  |   5.9   |   77.8  |   11.0  |   38.3  |   2.4   |   70.1  |   4.6   |
 65 | | ALE [9]       |   62.5  |   14.0  |   81.8  |   23.9  |   39.7  |   4.6   |   73.7  |   8.7   |
 66 | | SYNC [10]     |   46.6  |   10.0  |   90.5  |   18.0  |   23.9  |   7.4   |   66.3  |   13.3  |
 67 | | SAE [11]      |   54.1  |   1.1   |   82.2  |   2.2   |   8.3   |   0.4   |   80.9  |   0.9   |
 68 | | **DEM (OURS)** | **67.1** | **30.5** | 86.4 | **45.1**|   35.0  | **11.1**|  75.1   |**19.4** |
 69 | 
 70 | 
 71 | 
 72 | 
 73 | | Model      |   SUN T1    |    u    |    s    |    H    |  
 74 | |------------|---------|---------|---------|---------|
 75 | | DAP [2]      |   39.9  |   4.2   |   25.1  |   7.2   | 
 76 | | CONSE [3]     |   38.8  |   6.8  |   39.9  |   11.6   |  
 77 | | SSE [4]       |   51.5 |   2.1  |   36.4 |   4.0  |   
 78 | | DEVISE [5]    |   56.5  |   16.9  |   27.4  |   20.9  |   
 79 | | SJE [6]       |   53.7  |   14.7  |   30.5  |   19.8  |  
 80 | | LATEM [7]     |   55.3  |   14.7  |   28.8  |   19.5  |  
 81 | | ESZSL [8]     |   54.5  |   11.0   |  27.9  |   15.8  |   
 82 | | ALE [9]       |   58.1  |   **21.8**  |   33.1  |   **26.3**  |   
 83 | | SYNC [10]      |   56.3  |   7.9   |   **43.3**  |   13.4  |  
 84 | | SAE [11]       |   40.3  |   8.8   |   18.0  |   11.8  |  
 85 | | **DEM (OURS)** | **61.9** | 20.5 | 34.3 |  25.6 | 
 86 | 
 87 | 
 88 | 
 89 | # PyTorch implementation
 90 | [DeepEmbeddingModel_ZSL-Pytorch](https://github.com/dragen1860/DeepEmbeddingModel_ZSL-Pytorch)
 91 | 
 92 | ## Citing
 93 | 
 94 | If you use this code in your research, please use the following BibTeX entry.
 95 | 
 96 | ```
 97 | @inproceedings{zhang2017learning,
 98 |   title={Learning a deep embedding model for zero-shot learning},
 99 |   author={Zhang, Li and Xiang, Tao and Gong, Shaogang},
100 |   booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
101 |   year={2017}
102 | }
103 | ```
104 | 
105 | ## References
106 | 
107 | - [1] [Zero-Shot Learning - A Comprehensive Evaluation of the Good, the Bad and the Ugly](https://arxiv.org/abs/1707.00600).
108 |   Yongqin Xian, Christoph H. Lampert, Bernt Schiele, Zeynep Akata.
109 |   arXiv, 2017.
110 | - [2] [Attribute-Based Classification forZero-Shot Visual Object Categorization](https://cvml.ist.ac.at/papers/lampert-pami2013.pdf).
111 |   Christoph H. Lampert, Hannes Nickisch and Stefan Harmeling.
112 |   PAMI, 2014.
113 | - [3] [Zero-Shot Learning by Convex Combination of Semantic Embeddings](https://arxiv.org/abs/1312.5650).
114 |   Mohammad Norouzi, Tomas Mikolov, Samy Bengio, Yoram Singer, Jonathon Shlens, Andrea Frome, Greg S. Corrado, Jeffrey Dean.
115 |   arXiv, 2013.
116 | - [4] [Zero-Shot Learning via Semantic Similarity Embedding](https://arxiv.org/abs/1509.04767).
117 |   Ziming Zhang, Venkatesh Saligrama.
118 |   ICCV, 2015.
119 | - [5] [DeViSE: A Deep Visual-Semantic Embedding Model](http://papers.nips.cc/paper/5204-devise-a-deep-visual-semantic-embedding-model.pdf).
120 |   Andrea Frome*, Greg S. Corrado*, Jonathon Shlens*, Samy BengioJeffrey Dean, Marc’Aurelio Ranzato, Tomas Mikolov.
121 |   NIPS, 2013.
122 | - [6] [Evaluation of Output Embeddings for Fine-Grained Image Classification](https://arxiv.org/abs/1409.8403).
123 |   Zeynep Akata, Scott Reed, Daniel Walter, Honglak Lee, Bernt Schiele.
124 |   CVPR, 2015.
125 | - [7] [Latent Embeddings for Zero-shot Classification](https://arxiv.org/abs/1603.08895).
126 |   Yongqin Xian, Zeynep Akata, Gaurav Sharma, Quynh Nguyen, Matthias Hein, Bernt Schiele
127 |   CVPR, 2016.
128 | - [8] [An embarrassingly simple approach to zero-shot learning](http://proceedings.mlr.press/v37/romera-paredes15.pdf).
129 |   Bernardino Romera-Paredes, Philip H. S. Torr.
130 |   ICML, 2015.
131 | - [9] [Label-Embedding for Image Classification](https://arxiv.org/abs/1503.08677).
132 |   Zeynep Akata, Florent Perronnin, Zaid Harchaoui, Cordelia Schmid.
133 |   PAMI, 2016.
134 | - [10] [Synthesized Classifiers for Zero-Shot Learning](https://arxiv.org/abs/1603.00550).
135 |   Soravit Changpinyo, Wei-Lun Chao, Boqing Gong, Fei Sha.
136 |   CVPR, 2016.
137 | - [11] [Semantic Autoencoder for Zero-Shot Learning](https://arxiv.org/abs/1704.08345).
138 |   Elyor Kodirov, Tao Xiang, Shaogang Gong.
139 |   CVPR, 2017.
140 | 


--------------------------------------------------------------------------------