├── README.md
├── data
├── test
│ ├── answers.txt
│ ├── img_ids.txt
│ ├── questions.txt
│ └── types.txt
└── train
│ ├── answers.txt
│ ├── img_ids.txt
│ ├── questions.txt
│ └── types.txt
├── imageQA_demo.gif
├── preprocessing
├── 1_data2pickle.py
├── 2_makeDict.py
├── 3_modifyPkl.py
├── 4_downloadImage.py
├── 5_getCNNFeature.py
└── cnn.py
├── tensorflow_simple
├── imageQA_tensorflow.py
└── web
│ ├── cnn.pkl
│ ├── cnn.py
│ ├── cnn.pyc
│ ├── cnn4web.py
│ ├── imageQA_tensorflow.py
│ ├── imageQA_tensorflow.pyc
│ ├── images
│ └── moodo.jpg
│ ├── server.py
│ └── templates
│ └── iqa.html
├── theano_attention
├── cnn.py
├── cnn.pyc
├── main.py
├── models.py
├── models.pyc
├── optimizer.py
├── optimizer.pyc
└── prediction.py
└── theano_simple
├── cnn.py
├── main.py
├── models.py
├── optimizer.py
└── prediction.py
/README.md:
--------------------------------------------------------------------------------
1 | # Image Question Answering
2 |
3 | reference paper : http://arxiv.org/abs/1505.02074
4 |
5 | dataset : http://www.cs.toronto.edu/~mren/imageqa/data/cocoqa/
6 |
7 |
8 | theano
9 | - simple : fc7 features with RNNs
10 |
11 | - attention : conv5_4 features (similar to 'show, attend and tell' http://arxiv.org/abs/1502.03044) with RNNs
12 |
13 |
14 |
15 | tensorflow 16 | - simple : fc7 features with RNNs 17 | 18 |
19 | [demo]
20 |
21 |
--------------------------------------------------------------------------------
/imageQA_demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seankim902/imageQA/25f70ba1dc013eac6f7c734830f04663fd7b58f2/imageQA_demo.gif
--------------------------------------------------------------------------------
/preprocessing/1_data2pickle.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import pandas as pd
4 | import os
5 |
6 | ### test data to pickle
7 | os.chdir('/Users/seonhoon/Desktop/workspace_python/ImageQA/data/test')
8 | test=pd.DataFrame()
9 |
10 | data=[]
11 | with open('img_ids.txt') as f:
12 | for line in f:
13 | data.append(line.split('\n')[0])
14 | test['img_id']=data
15 |
16 | data=[]
17 | with open('questions.txt') as f:
18 | for line in f:
19 | data.append(line.split('\n')[0])
20 | test['question']=data
21 |
22 | data=[]
23 | with open('answers.txt') as f:
24 | for line in f:
25 | data.append(line.split('\n')[0])
26 | test['answer']=data
27 |
28 | data=[]
29 | with open('types.txt') as f:
30 | for line in f:
31 | data.append(line.split('\n')[0])
32 | test['type']=data
33 |
34 | test.to_pickle('../test.pkl')
35 |
36 |
37 | ### train data to pickle
38 | os.chdir('/Users/seonhoon/Desktop/workspace_python/ImageQA/data/train')
39 | train=pd.DataFrame()
40 |
41 | data=[]
42 | with open('img_ids.txt') as f:
43 | for line in f:
44 | data.append(line.split('\n')[0])
45 | train['img_id']=data
46 |
47 | data=[]
48 | with open('questions.txt') as f:
49 | for line in f:
50 | data.append(line.split('\n')[0])
51 | train['question']=data
52 |
53 | data=[]
54 | with open('answers.txt') as f:
55 | for line in f:
56 | data.append(line.split('\n')[0])
57 | train['answer']=data
58 |
59 | data=[]
60 | with open('types.txt') as f:
61 | for line in f:
62 | data.append(line.split('\n')[0])
63 | train['type']=data
64 |
65 | train.to_pickle('../train.pkl')
66 |
--------------------------------------------------------------------------------
/preprocessing/2_makeDict.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import pandas as pd
3 | import os
4 | import cPickle
5 |
6 | os.chdir('/Users/seonhoon/Desktop/workspace_python/ImageQA/data/')
7 |
8 | train=pd.read_pickle('train.pkl')
9 | test=pd.read_pickle('test.pkl')
10 |
11 | questions = train['question'].tolist()+test['question'].tolist()
12 | questions = [question.split() for question in questions]
13 |
14 | tokens=[]
15 |
16 | for question in questions:
17 | for word in question:
18 | tokens.append(word)
19 |
20 | tokens = list(set(tokens))
21 | tokens.sort()
22 |
23 | idx2word = dict([(i,k) for i,k in enumerate(tokens)])
24 | word2idx = dict([(k,i) for i,k in enumerate(tokens)])
25 |
26 |
27 |
28 |
29 | answers = list(set(train['answer'].tolist()+test['answer'].tolist()))
30 | answers.sort()
31 |
32 | idx2answer = dict([(i,k) for i,k in enumerate(answers)])
33 | answer2idx = dict([(k,i) for i,k in enumerate(answers)])
34 |
35 | with open('dict.pkl', 'wb') as f:
36 | cPickle.dump([idx2word, word2idx, idx2answer, answer2idx], f)
--------------------------------------------------------------------------------
/preprocessing/3_modifyPkl.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import pandas as pd
4 | import os
5 | import cPickle
6 |
7 | os.chdir('/Users/seonhoon/Desktop/workspace_python/ImageQA/data/')
8 |
9 |
10 | with open('dict.pkl', 'rb') as f:
11 | dict=cPickle.load(f)
12 | idx2word=dict[0]
13 | word2idx=dict[1]
14 | idx2answer=dict[2]
15 | answer2idx=dict[3]
16 |
17 | train=pd.read_pickle('train.pkl')
18 | test=pd.read_pickle('test.pkl')
19 |
20 | def getIndex(x, type='question'):
21 | xs=x.split()
22 | idx=[]
23 | getIdx=word2idx
24 | if type=='answer':
25 | getIdx=answer2idx
26 | for x in xs:
27 | idx.append(getIdx[x])
28 | return idx
29 |
30 | train['q']=train['question'].apply(lambda x : getIndex(x))
31 | train['a']=train['answer'].apply(lambda x : getIndex(x,type='answer'))
32 | test['q']=test['question'].apply(lambda x : getIndex(x))
33 | test['a']=test['answer'].apply(lambda x : getIndex(x,type='answer'))
34 |
35 | train.to_pickle('train.pkl')
36 | test.to_pickle('test.pkl')
37 |
--------------------------------------------------------------------------------
/preprocessing/4_downloadImage.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import urllib
4 | import pandas as pd
5 | import os
6 | import shutil
7 |
8 | os.chdir("/home/seonhoon/Desktop/workspace/ImageQA/data/images/")
9 |
10 |
11 |
12 | i=0
13 |
14 | def downloadImage(id, location):
15 | global i
16 | i=i+1
17 | if i%1000==0 :
18 | print 'iter : ', i
19 | fn=id.zfill(12)
20 | if os.path.isfile('/home/seonhoon/Downloads/test2014/COCO_test2014_'+fn+'.jpg'):
21 | print 'shutil.copy2 test ', id
22 | shutil.copy2('/home/seonhoon/Downloads/test2014/COCO_test2014_'+fn+'.jpg', location+'/'+id+".jpg")
23 | elif os.path.isfile('/home/seonhoon/Downloads/train2014/COCO_train2014_'+fn+'.jpg'):
24 | print 'shutil.copy2 train ', id
25 | shutil.copy2('/home/seonhoon/Downloads/train2014/COCO_train2014_'+fn+'.jpg', location+'/'+id+".jpg")
26 | elif os.path.isfile(location+'/'+id+".jpg"):
27 | print 'location ', id
28 | else:
29 | print 'download ', id
30 | urllib.urlretrieve("http://mscoco.org/images/"+id, location+'/'+id+".jpg")
31 |
32 | #train=pd.read_pickle('/home/seonhoon/Desktop/workspace/ImageQA/data/train.pkl')
33 |
34 | #train['img_id'].apply(lambda x : downloadImage(x, 'train'))
35 |
36 | test=pd.read_pickle('/home/seonhoon/Desktop/workspace/ImageQA/data/test.pkl')
37 |
38 | test['img_id'].apply(lambda x : downloadImage(x, 'test'))
--------------------------------------------------------------------------------
/preprocessing/5_getCNNFeature.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 |
4 | from cnn import *
5 | import pandas as pd
6 | import os
7 |
8 |
9 | os.chdir('/home/seonhoon/Desktop/workspace/ImageQA/data/')
10 | train=pd.read_pickle('train.pkl')
11 | test=pd.read_pickle('test.pkl')
12 |
13 |
14 | os.chdir('/home/seonhoon/Desktop/workspace/ImageQA/data/caffemodel/vgg16')
15 |
16 | #/home/seonhoon/Desktop/workspace/ImageQA/data/images/train/xxx.jpg
17 | img_folder='/home/seonhoon/Desktop/workspace/ImageQA/data/images/'
18 |
19 | # train
20 | print 'train .. '
21 | imglist = train['img_id'].apply(lambda x : img_folder+'train/'+x+'.jpg')
22 | imglist = imglist.tolist()
23 | cnn = CNN()
24 | featurelist = cnn.get_features(imglist)
25 | train['cnn_feature']=0
26 | train['cnn_feature']=train['cnn_feature'].astype(object)
27 | for i in range(len(train)):
28 | train.loc[i,'cnn_feature']=featurelist[i]
29 |
30 |
31 |
32 | #test
33 | print 'test .. '
34 | imglist = test['img_id'].apply(lambda x : img_folder+'test/'+x+'.jpg')
35 | imglist = imglist.tolist()
36 | cnn = CNN()
37 | featurelist = cnn.get_features(imglist)
38 | test['cnn_feature']=0
39 | test['cnn_feature']=test['cnn_feature'].astype(object)
40 | for i in range(len(test)):
41 | test.loc[i,'cnn_feature']=featurelist[i]
42 |
43 | #modify pickle
44 | os.chdir('/home/seonhoon/Desktop/workspace/ImageQA/data/')
45 | train.to_pickle('train_vgg.pkl')
46 | test.to_pickle('test_vgg.pkl')
47 |
--------------------------------------------------------------------------------
/preprocessing/cnn.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 |
4 | import caffe
5 | import numpy as np
6 | from scipy.misc import imread, imresize
7 |
8 | class CNN(object):
9 |
10 | def __init__(self, deploy='VGG_ILSVRC_16_layers_deploy.prototxt', model='VGG_ILSVRC_16_layers.caffemodel'):
11 | caffe.set_mode_gpu()
12 | self.net = caffe.Net(deploy, model, caffe.TEST)
13 |
14 | if model.startswith('VGG_ILSVRC_16_layers'):
15 | self.mean = np.array([103.939, 116.779, 123.68])
16 |
17 |
18 | def get_batch_features(self, in_data, net, layer):
19 | out = net.forward(blobs=[layer], **{net.inputs[0]: in_data})
20 | features = out[layer]#.squeeze(axis=(2,3))
21 | return features
22 |
23 | def get_features(self, filelist, layer='fc7'):
24 |
25 | N, channel, height, width = self.net.blobs[self.net.inputs[0]].data.shape
26 | feature = self.net.blobs[layer].data.shape[1]
27 | n_files = len(filelist)
28 | img_height, img_width, _ = imread(filelist[0]).shape
29 | all_features = np.zeros((n_files, feature))
30 | for i in range(0, n_files, N):
31 | in_data = np.zeros((N, channel, height, width), dtype=np.float32)
32 |
33 | batch_range = range(i, min(i+N, n_files))
34 | batch_filelist = [filelist[j] for j in batch_range]
35 |
36 | batch_images = np.zeros((len(batch_range), 3, height, width))
37 | for j,file in enumerate(batch_filelist):
38 | im = imread(file)
39 | if len(im.shape) == 2:
40 | im = np.tile(im[:,:,np.newaxis], (1,1,3))
41 | im = im[:,:,(2,1,0)] # RGB -> BGR
42 | im = im - self.mean # mean subtraction
43 | im = imresize(im, (height, width), 'bicubic') # resize
44 | im = np.transpose(im, (2, 0, 1)) # get channel in correct dimension
45 | batch_images[j,:,:,:] = im
46 |
47 | in_data[0:len(batch_range), :, :, :] = batch_images
48 |
49 | features = self.get_batch_features(in_data, self.net, layer)
50 |
51 | for j in range(len(batch_range)):
52 | all_features[i+j,:] = features[j,:]
53 |
54 | print 'Done %d/%d files' % (i+len(batch_range), len(filelist))
55 |
56 | return all_features
57 |
--------------------------------------------------------------------------------
/tensorflow_simple/imageQA_tensorflow.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import os
4 | import time
5 | import pandas as pd
6 | import numpy as np
7 |
8 | from keras.utils import np_utils
9 |
10 | #import tensorflow.python.platform
11 | import tensorflow as tf
12 | from tensorflow.models.rnn import rnn_cell
13 |
14 |
15 | # To do
16 | '''
17 | 마지막 배치 사이즈 이슈
18 |
19 |
20 | '''
21 |
22 | def prepare_data(seqs_x, maxlen=None):
23 | lengths_x = [len(s) for s in seqs_x]
24 |
25 | n_samples = len(seqs_x)
26 | if maxlen is None:
27 | maxlen = np.max(lengths_x) + 1
28 |
29 | x = np.zeros((n_samples, maxlen)).astype('int32')
30 | x_mask = np.zeros((n_samples, maxlen)).astype('float32')
31 |
32 | for idx, s_x in enumerate(seqs_x):
33 | x[idx, :lengths_x[idx]] = s_x
34 | x_mask[idx, :lengths_x[idx]+1] = 1. # Adding 1, for image
35 |
36 | return x, x_mask
37 |
38 |
39 | def get_minibatch_indices(n, batch_size, shuffle=False):
40 |
41 | idx_list = np.arange(n, dtype="int32")
42 |
43 | if shuffle:
44 | np.random.shuffle(idx_list)
45 |
46 | minibatches = []
47 | minibatch_start = 0
48 | for i in range(n // batch_size):
49 | minibatches.append(idx_list[minibatch_start:
50 | minibatch_start + batch_size])
51 | minibatch_start += batch_size
52 | # if (minibatch_start != n): # last mini-batch issue !!!
53 | # minibatches.append(idx_list[minibatch_start:])
54 | return minibatches
55 |
56 |
57 |
58 | class ImageQA(object):
59 |
60 | def __init__(self, config):
61 |
62 | self.config = config
63 |
64 | self.vocab_size = vocab_size = config.vocab_size
65 | self.y_size = y_size = config.y_size
66 |
67 | self.batch_size = batch_size = config.batch_size
68 | self.steps = config.steps
69 |
70 | self.layers = layers = config.layers
71 |
72 | self.dim_ictx = dim_ictx = config.dim_ictx
73 | self.dim_iemb = dim_iemb = config.dim_iemb
74 | self.dim_wemb = dim_wemb = config.dim_wemb
75 | self.dim_hidden = dim_hidden = config.dim_hidden
76 |
77 | self.lr = tf.Variable(config.lr, trainable=False)
78 |
79 | rnn_type = config.rnn_type
80 | if rnn_type == 'gru':
81 | rnn_ = rnn_cell.GRUCell(dim_hidden)
82 | elif rnn_type == 'lstm':
83 | rnn_ = rnn_cell.BasicLSTMCell(dim_hidden)
84 |
85 | if layers is not None:
86 | self.my_rnn = my_rnn = rnn_cell.MultiRNNCell([rnn_] * layers)
87 | self.init_state = my_rnn.zero_state(batch_size, tf.float32)
88 | else:
89 | self.my_rnn = my_rnn = rnn_
90 | self.init_state = tf.zeros([batch_size, my_rnn.state_size])
91 |
92 | self.W_iemb = tf.get_variable("W_iemb", [dim_ictx, dim_iemb])
93 | self.b_iemb = tf.get_variable("b_iemb", [dim_iemb])
94 | with tf.device("/cpu:0"):
95 | self.W_wemb = tf.get_variable("W_wemb", [vocab_size, dim_wemb])
96 |
97 | if config.is_birnn : # add 보다 concat이 더 잘나오는듯..
98 | self.W_pred = tf.get_variable("W_pred", [dim_hidden * 2, y_size])
99 | else :
100 | self.W_pred = tf.get_variable("W_pred", [dim_hidden, y_size])
101 |
102 | self.b_pred = tf.get_variable("b_pred", [y_size])
103 |
104 |
105 | def build_model(self):
106 |
107 | x = tf.placeholder(tf.int32, [self.batch_size, self.steps])
108 | x_mask = tf.placeholder(tf.float32, [self.batch_size, self.steps])
109 | y = tf.placeholder(tf.float32, [self.batch_size, self.y_size])
110 | img = tf.placeholder(tf.float32, [self.batch_size, self.dim_ictx])
111 |
112 |
113 |
114 | with tf.device("/cpu:0"):
115 | inputs = tf.split(1, self.steps, tf.nn.embedding_lookup(self.W_wemb, x))
116 | # sample * steps * dim -> split -> sample * 1 * dim
117 | inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
118 | # [sample * dim, sample * dim, sample * dim, ... ]
119 | img_emb = tf.nn.xw_plus_b(img, self.W_iemb, self.b_iemb)
120 | inputs = [img_emb]+inputs[:-1] # -1 is for img
121 |
122 |
123 |
124 | hiddens = []
125 | states = []
126 |
127 |
128 | state = self.init_state
129 | with tf.variable_scope("RNN", reuse=None):
130 | for i in range(len(inputs)):
131 | if i == 0:
132 | (hidden, state) = self.my_rnn(inputs[i], state)
133 | else:
134 | m = x_mask[:, i]
135 |
136 | tf.get_variable_scope().reuse_variables()
137 | (prev_hidden, prev_state) = (hidden, state) # for masking
138 | (hidden, state) = self.my_rnn(inputs[i], state)
139 |
140 | m_1 = tf.expand_dims(m,1)
141 | m_1 = tf.tile(m_1, [1, self.dim_hidden])
142 | m_0 = tf.expand_dims(1. - m,1)
143 | m_0 = tf.tile(m_0, [1, self.dim_hidden])
144 | hidden = tf.add(tf.mul(m_1, hidden), tf.mul(m_0, prev_hidden))
145 | state = tf.add(tf.mul(m_1, state), tf.mul(m_0, prev_state))
146 | hiddens.append(hidden)
147 | states.append(state)
148 |
149 | if self.config.is_birnn :
150 | rhiddens = []
151 | rstates = []
152 | rx_mask = tf.reverse(x_mask,[False, True])
153 | rinputs = inputs[::-1]
154 | state = self.init_state
155 | with tf.variable_scope("rRNN", reuse=None):
156 | for i in range(len(rinputs)):
157 | if i == 0:
158 | (hidden, state) = self.my_rnn(inputs[i], state)
159 | else:
160 | m = rx_mask[:, i]
161 |
162 | tf.get_variable_scope().reuse_variables()
163 | (prev_hidden, prev_state) = (hidden, state) # for masking
164 | (hidden, state) = self.my_rnn(rinputs[i], state)
165 | m_1 = tf.expand_dims(m,1)
166 | m_1 = tf.tile(m_1, [1, self.dim_hidden])
167 | m_0 = tf.expand_dims(1. - m,1)
168 | m_0 = tf.tile(m_0, [1, self.dim_hidden])
169 | hidden = tf.add(tf.mul(m_1, hidden), tf.mul(m_0, prev_hidden))
170 | state = tf.add(tf.mul(m_1, state), tf.mul(m_0, prev_state))
171 | rhiddens.append(hidden)
172 | rstates.append(state)
173 |
174 |
175 | hiddens = tf.concat(2, [tf.pack(hiddens), tf.pack(rhiddens[::-1])])
176 | #hiddens = tf.add(tf.pack(hiddens), tf.pack(rhiddens[::-1]))
177 | hiddens = tf.unpack(hiddens)
178 |
179 | '''
180 | mean or last hidden -> logit_hidden : sample * dim
181 | '''
182 | # 1. last hidden
183 | #logit_hidden = hiddens[-1] #tf.reduce_mean(hiddens, 0)
184 | # 2. mean of hiddens
185 | x_mask_t = tf.transpose(x_mask)
186 | x_mask_t_denom = tf.expand_dims(tf.reduce_sum(x_mask_t, 0), 1)
187 | x_mask_t_denom = tf.tile(x_mask_t_denom, [1, self.W_pred.get_shape().dims[0].value])
188 | x_mask_t = tf.expand_dims(x_mask_t, 2)
189 | x_mask_t = tf.tile(x_mask_t, [1, 1, self.W_pred.get_shape().dims[0].value])
190 | hiddens = tf.pack(hiddens)
191 | logit_hidden = tf.reduce_sum(tf.mul(hiddens, x_mask_t), 0)
192 | logit_hidden = tf.div(logit_hidden, x_mask_t_denom)
193 |
194 |
195 | if self.config.dropout is not None:
196 | logit_hidden = tf.nn.dropout(logit_hidden, self.config.dropout)
197 |
198 |
199 | logits = tf.nn.xw_plus_b(logit_hidden, self.W_pred, self.b_pred)
200 |
201 | probs = tf.nn.softmax(logits)
202 | prediction = tf.argmax(probs, 1)
203 | correct_prediction = tf.equal(prediction, tf.argmax(y,1))
204 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
205 | cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, y)
206 | loss = tf.reduce_mean(cross_entropy)
207 | train_op = tf.train.AdamOptimizer(self.lr).minimize(loss)
208 | return x, x_mask, y, img, loss, train_op, accuracy, prediction
209 |
210 |
211 | def train():
212 |
213 |
214 | config = get_config()
215 |
216 | os.chdir('/home/seonhoon/Desktop/workspace/ImageQA/data/')
217 | train = pd.read_pickle('train_vgg.pkl')
218 | train_x = [ q for q in train['q'] ]
219 | train_y = [ a[0] for a in train['a'] ]
220 | train_y = np.array(train_y)[:,None]
221 | train_y = np_utils.to_categorical(train_y, config.y_size).astype('float32')
222 | train_x , train_x_mask = prepare_data(train_x, config.steps)
223 | train_x_img = np.array([ img.tolist() for img in train['cnn_feature'] ]).astype('float32')
224 |
225 | n_train = len(train_x)
226 |
227 | print 'train_x :', train_x.shape
228 | print 'train_x_mask :', train_x_mask.shape
229 | print 'train_x_img :', train_x_img.shape
230 | print 'train_y :',train_y.shape
231 |
232 | if config.valid_epoch is not None:
233 | valid=pd.read_pickle('test_vgg.pkl')
234 | valid_x=[ q for q in valid['q'] ]
235 | valid_y=[ a[0] for a in valid['a'] ]
236 | valid_y=np.array(valid_y)[:,None]
237 | valid_y = np_utils.to_categorical(valid_y, config.y_size).astype('float32')
238 | valid_x , valid_x_mask = prepare_data(valid_x, config.steps)
239 | valid_x_img = np.array([ img.tolist() for img in valid['cnn_feature'] ]).astype('float32')
240 | n_valid = len(valid_x)
241 | valid_batch_indices=get_minibatch_indices(n_valid, config.batch_size, shuffle=False)
242 |
243 | print 'valid_x :', valid_x.shape
244 | print 'valid_x_mask :', valid_x_mask.shape
245 | print 'valid_x_img :', valid_x_img.shape
246 | print 'valid_y :',valid_y.shape
247 |
248 |
249 |
250 | with tf.Session() as sess:
251 |
252 |
253 | initializer = tf.random_normal_initializer(0, 0.1)
254 | with tf.variable_scope("model", reuse=None, initializer=initializer):
255 | model = ImageQA(config = config)
256 |
257 | x, x_mask, y, img, loss, train_op, accuracy, prediction = model.build_model()
258 |
259 | saver = tf.train.Saver()
260 | sess.run(tf.initialize_all_variables())
261 |
262 | for i in range(config.epoch):
263 | start = time.time()
264 | lr_decay = config.lr_decay ** max(i - config.decay_epoch, 0.0)
265 | sess.run(tf.assign(model.lr, config.lr * lr_decay))
266 |
267 |
268 | batch_indices=get_minibatch_indices(n_train, config.batch_size, shuffle=True)
269 |
270 | preds = []
271 | for j, indices in enumerate(batch_indices):
272 |
273 | x_ = np.array([ train_x[k,:] for k in indices])
274 | x_mask_ = np.array([ train_x_mask[k,:] for k in indices])
275 | y_ = np.array([ train_y[k,:] for k in indices])
276 | img_ = np.array([ train_x_img[k,:] for k in indices])
277 |
278 |
279 |
280 | cost, _, acc, pred = sess.run([loss, train_op, accuracy, prediction],
281 | {x: x_,
282 | x_mask: x_mask_,
283 | y: y_,
284 | img : img_})
285 | preds = preds + pred.tolist()
286 | if j % 99 == 0 :
287 | print 'cost : ', cost, ', accuracy : ', acc, ', iter : ', j+1, ' in epoch : ',i+1
288 | print 'cost : ', cost, ', accuracy : ', acc, ', iter : ', j+1, ' in epoch : ',i+1,' elapsed time : ', int(time.time()-start)
289 | if config.valid_epoch is not None: # for validation
290 | best_accuracy = 0.
291 |
292 | if (i+1) % config.valid_epoch == 0:
293 | val_preds = []
294 | for j, indices in enumerate(valid_batch_indices):
295 | x_ = np.array([ valid_x[k,:] for k in indices])
296 | x_mask_ = np.array([ valid_x_mask[k,:] for k in indices])
297 | y_ = np.array([ valid_y[k,:] for k in indices])
298 | img_ = np.array([ valid_x_img[k,:] for k in indices])
299 |
300 | pred = sess.run(prediction,
301 | {x: x_,
302 | x_mask: x_mask_,
303 | y: y_,
304 | img : img_})
305 |
306 | val_preds = val_preds + pred.tolist()
307 | valid_acc = np.mean(np.equal(val_preds, np.argmax(valid_y,1)))
308 | print '##### valid accuracy : ', valid_acc, ' after epoch ', i+1
309 | if valid_acc > best_accuracy and i >= 10:
310 | best_accuracy = valid_acc
311 | saver.save(sess, config.model_ckpt_path, global_step=int(best_accuracy*100))
312 |
313 |
314 |
315 | def test():
316 |
317 | config = get_config()
318 |
319 | os.chdir('/home/seonhoon/Desktop/workspace/ImageQA/data/')
320 |
321 | test=pd.read_pickle('test_vgg.pkl')
322 | test_x=[ q for q in test['q'] ]
323 | test_y=[ a[0] for a in test['a'] ]
324 | test_y=np.array(test_y)[:,None]
325 | test_y = np_utils.to_categorical(test_y, config.y_size).astype('float32')
326 | test_x , test_x_mask = prepare_data(test_x, config.steps)
327 | test_x_img = np.array([ img.tolist() for img in test['cnn_feature'] ]).astype('float32')
328 | n_test = len(test_x)
329 | test_batch_indices=get_minibatch_indices(n_test, config.batch_size, shuffle=False)
330 |
331 |
332 |
333 | with tf.Session() as sess:
334 |
335 |
336 | with tf.variable_scope("model", reuse=None):
337 | model = ImageQA(config = config)
338 |
339 | x, x_mask, y, img, _, _, _, prediction = model.build_model()
340 | saver = tf.train.Saver()
341 | ckpt = tf.train.get_checkpoint_state(os.path.dirname(config.model_ckpt_path))
342 | sess.run(tf.initialize_all_variables())
343 | saver.restore(sess, ckpt.model_checkpoint_path)
344 | test_preds = []
345 | for j, indices in enumerate(test_batch_indices):
346 | x_ = np.array([test_x[k,:] for k in indices])
347 | x_mask_ = np.array([ test_x_mask[k,:] for k in indices])
348 | y_ = np.array([ test_y[k,:] for k in indices])
349 | img_ = np.array([ test_x_img[k,:] for k in indices])
350 |
351 | pred = sess.run(prediction,
352 | {x: x_,
353 | x_mask: x_mask_,
354 | y: y_,
355 | img : img_})
356 | test_preds = test_preds + pred.tolist()
357 |
358 |
359 | test_acc = np.mean(np.equal(test_preds, np.argmax(test_y,1)))
360 | print 'test accuracy :', test_acc
361 |
362 |
363 | def test_sample(test_x, test_x_maks, test_x_img):
364 |
365 | config = get_config()
366 | config.batch_size = 1
367 |
368 | with tf.Session() as sess:
369 | with tf.variable_scope("model", reuse=None):
370 | model = ImageQA(config = config)
371 |
372 | x, x_mask, _, img, _, _, _, prediction = model.build_model()
373 | saver = tf.train.Saver()
374 | ckpt = tf.train.get_checkpoint_state(os.path.dirname(config.model_ckpt_path))
375 | saver.restore(sess, ckpt.model_checkpoint_path)
376 |
377 | x_ = test_x
378 | x_mask_ = test_x_maks
379 | img_ = test_x_img
380 |
381 | pred = sess.run(prediction,
382 | {x: x_,
383 | x_mask: x_mask_,
384 | img : img_})
385 |
386 | return pred
387 |
388 |
389 |
390 |
391 | def get_config():
392 | class Config1(object):
393 | vocab_size = 12047
394 | y_size = 430
395 | batch_size = 364 #703 #s28
396 | steps = 60
397 |
398 | dim_ictx = 4096
399 | dim_iemb = 1024 # image embedding
400 | dim_wemb = 1024 # word embedding
401 | dim_hidden = 1024
402 | epoch = 100
403 |
404 | lr = 0.001
405 | lr_decay = 0.9
406 | decay_epoch = 333. # epoch 보다 크면 decay 하지않음.
407 |
408 | dropout = 0.4
409 |
410 | rnn_type = 'gru'
411 | layers = None #it doesn’t work yet
412 | is_birnn = True #False
413 | valid_epoch = 1 # or None
414 | model_ckpt_path = '/home/seonhoon/Desktop/workspace/ImageQA/version_tensorflow/model/model.ckpt'
415 | return Config1()
416 |
417 | def main(_):
418 |
419 |
420 | is_train = False # if False then test
421 |
422 | if is_train :
423 | train()
424 |
425 | else:
426 | test()
427 |
428 |
429 | if __name__ == "__main__":
430 | tf.app.run()
431 |
432 |
433 |
434 |
435 |
436 |
--------------------------------------------------------------------------------
/tensorflow_simple/web/cnn.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seankim902/imageQA/25f70ba1dc013eac6f7c734830f04663fd7b58f2/tensorflow_simple/web/cnn.pkl
--------------------------------------------------------------------------------
/tensorflow_simple/web/cnn.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import caffe
4 | import numpy as np
5 | from scipy.misc import imread, imresize
6 |
7 | class CNN(object):
8 |
9 | def __init__(self, deploy='/home/seonhoon/Desktop/caffemodel/vgg16/VGG_ILSVRC_16_layers_deploy.prototxt',
10 | model='/home/seonhoon/Desktop/caffemodel/vgg16/VGG_ILSVRC_16_layers.caffemodel'):
11 | caffe.set_mode_gpu()
12 | self.net = caffe.Net(deploy, model, caffe.TEST)
13 |
14 | #if model.startswith('VGG'):
15 | self.mean = np.array([103.939, 116.779, 123.68])
16 |
17 |
18 | def get_batch_features(self, in_data, net, layer):
19 | out = net.forward(blobs=[layer], **{net.inputs[0]: in_data})
20 | features = out[layer]#.squeeze(axis=(2,3))
21 | return features
22 |
23 | def get_features(self, filelist, layer='fc7'):
24 |
25 | N, channel, height, width = self.net.blobs[self.net.inputs[0]].data.shape
26 | n_files = len(filelist)
27 |
28 |
29 | if str(layer).startswith('fc'):
30 | feature = self.net.blobs[layer].data.shape[1]
31 | all_features = np.zeros((n_files, feature))
32 | else :
33 | feature1, feature2, feature3 = self.net.blobs[layer].data.shape[1], self.net.blobs[layer].data.shape[2], self.net.blobs[layer].data.shape[3]
34 | all_features = np.zeros((n_files, feature1, feature2, feature3))
35 |
36 |
37 | for i in range(0, n_files, N):
38 | in_data = np.zeros((N, channel, height, width), dtype=np.float32)
39 |
40 | batch_range = range(i, min(i+N, n_files))
41 | batch_filelist = [filelist[j] for j in batch_range]
42 |
43 | batch_images = np.zeros((len(batch_range), 3, height, width))
44 | for j,file in enumerate(batch_filelist):
45 | im = imread(file)
46 | if len(im.shape) == 2:
47 | im = np.tile(im[:,:,np.newaxis], (1,1,3))
48 | im = im[:,:,(2,1,0)] # RGB -> BGR
49 | im = im - self.mean # mean subtraction
50 | im = imresize(im, (height, width), 'bicubic') # resize
51 | im = np.transpose(im, (2, 0, 1)) # get channel in correct dimension
52 | batch_images[j,:,:,:] = im
53 |
54 | in_data[0:len(batch_range), :, :, :] = batch_images
55 |
56 | features = self.get_batch_features(in_data, self.net, layer)
57 |
58 | for j in range(len(batch_range)):
59 | all_features[i+j,:] = features[j,:]
60 |
61 | #print 'Done %d/%d files' % (i+len(batch_range), len(filelist))
62 |
63 | return all_features
64 |
--------------------------------------------------------------------------------
/tensorflow_simple/web/cnn.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seankim902/imageQA/25f70ba1dc013eac6f7c734830f04663fd7b58f2/tensorflow_simple/web/cnn.pyc
--------------------------------------------------------------------------------
/tensorflow_simple/web/cnn4web.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from cnn import CNN
4 | import pandas as pd
5 |
6 |
7 | cnn = CNN()
8 | data=pd.DataFrame()
9 |
10 | data['cnn_feature']=0
11 | data['cnn_feature']=data['cnn_feature'].astype(object)
12 |
13 | featurelist = cnn.get_features(['/home/seonhoon/Desktop/workspace/ImageQA/version_tensorflow/web/images/moodo.jpg'], layer='fc7')
14 | data.loc[0,'cnn_feature']=featurelist[0].flatten()
15 |
16 |
17 | data.to_pickle('/home/seonhoon/Desktop/workspace/ImageQA/version_tensorflow/web/cnn.pkl')
18 |
--------------------------------------------------------------------------------
/tensorflow_simple/web/imageQA_tensorflow.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import os
4 | import time
5 | import pandas as pd
6 | import numpy as np
7 |
8 | from keras.utils import np_utils
9 |
10 | import tensorflow.python.platform
11 | import tensorflow as tf
12 | from tensorflow.models.rnn import rnn_cell
13 |
14 |
15 | # To do
16 | '''
17 | 마지막 배치 사이즈 이슈
18 |
19 |
20 | '''
21 |
22 | def prepare_data(seqs_x, maxlen=None):
23 | lengths_x = [len(s) for s in seqs_x]
24 |
25 | n_samples = len(seqs_x)
26 | if maxlen is None:
27 | maxlen = np.max(lengths_x) + 1
28 |
29 | x = np.zeros((n_samples, maxlen)).astype('int32')
30 | x_mask = np.zeros((n_samples, maxlen)).astype('float32')
31 |
32 | for idx, s_x in enumerate(seqs_x):
33 | x[idx, :lengths_x[idx]] = s_x
34 | x_mask[idx, :lengths_x[idx]+1] = 1. # Adding 1, for image
35 |
36 | return x, x_mask
37 |
38 |
39 | def get_minibatch_indices(n, batch_size, shuffle=False):
40 |
41 | idx_list = np.arange(n, dtype="int32")
42 |
43 | if shuffle:
44 | np.random.shuffle(idx_list)
45 |
46 | minibatches = []
47 | minibatch_start = 0
48 | for i in range(n // batch_size):
49 | minibatches.append(idx_list[minibatch_start:
50 | minibatch_start + batch_size])
51 | minibatch_start += batch_size
52 | # if (minibatch_start != n): # last mini-batch issue !!!
53 | # minibatches.append(idx_list[minibatch_start:])
54 | return minibatches
55 |
56 |
57 |
58 | class ImageQA(object):
59 |
60 | def __init__(self, config):
61 |
62 | self.config = config
63 |
64 | self.vocab_size = vocab_size = config.vocab_size
65 | self.y_size = y_size = config.y_size
66 |
67 | self.batch_size = batch_size = config.batch_size
68 | self.steps = config.steps
69 |
70 | self.layers = layers = config.layers
71 |
72 | self.dim_ictx = dim_ictx = config.dim_ictx
73 | self.dim_iemb = dim_iemb = config.dim_iemb
74 | self.dim_wemb = dim_wemb = config.dim_wemb
75 | self.dim_hidden = dim_hidden = config.dim_hidden
76 |
77 | self.lr = tf.Variable(config.lr, trainable=False)
78 |
79 | rnn_type = config.rnn_type
80 | if rnn_type == 'gru':
81 | rnn_ = rnn_cell.GRUCell(dim_hidden)
82 | elif rnn_type == 'lstm':
83 | rnn_ = rnn_cell.BasicLSTMCell(dim_hidden)
84 |
85 | if layers is not None:
86 | self.my_rnn = my_rnn = rnn_cell.MultiRNNCell([rnn_] * layers)
87 | self.init_state = my_rnn.zero_state(batch_size, tf.float32)
88 | else:
89 | self.my_rnn = my_rnn = rnn_
90 | self.init_state = tf.zeros([batch_size, my_rnn.state_size])
91 |
92 | self.W_iemb = tf.get_variable("W_iemb", [dim_ictx, dim_iemb])
93 | self.b_iemb = tf.get_variable("b_iemb", [dim_iemb])
94 | with tf.device("/cpu:0"):
95 | self.W_wemb = tf.get_variable("W_wemb", [vocab_size, dim_wemb])
96 |
97 | if config.is_birnn : # add 보다 concat이 더 잘나오는듯..
98 | self.W_pred = tf.get_variable("W_pred", [dim_hidden * 2, y_size])
99 | else :
100 | self.W_pred = tf.get_variable("W_pred", [dim_hidden, y_size])
101 |
102 | self.b_pred = tf.get_variable("b_pred", [y_size])
103 |
104 |
105 | def build_model(self):
106 |
107 | x = tf.placeholder(tf.int32, [self.batch_size, self.steps])
108 | x_mask = tf.placeholder(tf.float32, [self.batch_size, self.steps])
109 | y = tf.placeholder(tf.float32, [self.batch_size, self.y_size])
110 | img = tf.placeholder(tf.float32, [self.batch_size, self.dim_ictx])
111 |
112 |
113 |
114 | with tf.device("/cpu:0"):
115 | inputs = tf.split(1, self.steps, tf.nn.embedding_lookup(self.W_wemb, x))
116 | # sample * steps * dim -> split -> sample * 1 * dim
117 | inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
118 | # [sample * dim, sample * dim, sample * dim, ... ]
119 | img_emb = tf.nn.xw_plus_b(img, self.W_iemb, self.b_iemb)
120 | inputs = [img_emb]+inputs[:-1] # -1 is for img
121 |
122 |
123 |
124 | hiddens = []
125 | states = []
126 |
127 |
128 | state = self.init_state
129 | with tf.variable_scope("RNN", reuse=None):
130 | for i in range(len(inputs)):
131 | if i == 0:
132 | (hidden, state) = self.my_rnn(inputs[i], state)
133 | else:
134 | m = x_mask[:, i]
135 |
136 | tf.get_variable_scope().reuse_variables()
137 | (prev_hidden, prev_state) = (hidden, state) # for masking
138 | (hidden, state) = self.my_rnn(inputs[i], state)
139 |
140 | m_1 = tf.expand_dims(m,1)
141 | m_1 = tf.tile(m_1, [1, self.dim_hidden])
142 | m_0 = tf.expand_dims(1. - m,1)
143 | m_0 = tf.tile(m_0, [1, self.dim_hidden])
144 | hidden = tf.add(tf.mul(m_1, hidden), tf.mul(m_0, prev_hidden))
145 | state = tf.add(tf.mul(m_1, state), tf.mul(m_0, prev_state))
146 | hiddens.append(hidden)
147 | states.append(state)
148 |
149 | if self.config.is_birnn :
150 | rhiddens = []
151 | rstates = []
152 | rx_mask = tf.reverse(x_mask,[False, True])
153 | rinputs = inputs[::-1]
154 | state = self.init_state
155 | with tf.variable_scope("rRNN", reuse=None):
156 | for i in range(len(rinputs)):
157 | if i == 0:
158 | (hidden, state) = self.my_rnn(inputs[i], state)
159 | else:
160 | m = rx_mask[:, i]
161 |
162 | tf.get_variable_scope().reuse_variables()
163 | (prev_hidden, prev_state) = (hidden, state) # for masking
164 | (hidden, state) = self.my_rnn(rinputs[i], state)
165 | m_1 = tf.expand_dims(m,1)
166 | m_1 = tf.tile(m_1, [1, self.dim_hidden])
167 | m_0 = tf.expand_dims(1. - m,1)
168 | m_0 = tf.tile(m_0, [1, self.dim_hidden])
169 | hidden = tf.add(tf.mul(m_1, hidden), tf.mul(m_0, prev_hidden))
170 | state = tf.add(tf.mul(m_1, state), tf.mul(m_0, prev_state))
171 | rhiddens.append(hidden)
172 | rstates.append(state)
173 |
174 |
175 | hiddens = tf.concat(2, [tf.pack(hiddens), tf.pack(rhiddens[::-1])])
176 | #hiddens = tf.add(tf.pack(hiddens), tf.pack(rhiddens[::-1]))
177 | hiddens = tf.unpack(hiddens)
178 |
179 | '''
180 | mean or last hidden -> logit_hidden : sample * dim
181 | '''
182 | # 1. last hidden
183 | #logit_hidden = hiddens[-1] #tf.reduce_mean(hiddens, 0)
184 | # 2. mean of hiddens
185 | x_mask_t = tf.transpose(x_mask)
186 | x_mask_t_denom = tf.expand_dims(tf.reduce_sum(x_mask_t, 0), 1)
187 | x_mask_t_denom = tf.tile(x_mask_t_denom, [1, self.W_pred.get_shape().dims[0].value])
188 | x_mask_t = tf.expand_dims(x_mask_t, 2)
189 | x_mask_t = tf.tile(x_mask_t, [1, 1, self.W_pred.get_shape().dims[0].value])
190 | hiddens = tf.pack(hiddens)
191 | logit_hidden = tf.reduce_sum(tf.mul(hiddens, x_mask_t), 0)
192 | logit_hidden = tf.div(logit_hidden, x_mask_t_denom)
193 |
194 |
195 | if self.config.dropout is not None:
196 | logit_hidden = tf.nn.dropout(logit_hidden, self.config.dropout)
197 |
198 |
199 | logits = tf.nn.xw_plus_b(logit_hidden, self.W_pred, self.b_pred)
200 |
201 | probs = tf.nn.softmax(logits)
202 | prediction = tf.argmax(probs, 1)
203 | correct_prediction = tf.equal(prediction, tf.argmax(y,1))
204 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
205 | cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, y)
206 | loss = tf.reduce_mean(cross_entropy)
207 | train_op = tf.train.AdamOptimizer(self.lr).minimize(loss)
208 | return x, x_mask, y, img, loss, train_op, accuracy, prediction
209 |
210 |
211 | def train():
212 |
213 |
214 | config = get_config()
215 |
216 | os.chdir('/home/seonhoon/Desktop/workspace/ImageQA/data/')
217 | train = pd.read_pickle('train_vgg.pkl')
218 | train_x = [ q for q in train['q'] ]
219 | train_y = [ a[0] for a in train['a'] ]
220 | train_y = np.array(train_y)[:,None]
221 | train_y = np_utils.to_categorical(train_y, config.y_size).astype('float32')
222 | train_x , train_x_mask = prepare_data(train_x, config.steps)
223 | train_x_img = np.array([ img.tolist() for img in train['cnn_feature'] ]).astype('float32')
224 |
225 | n_train = len(train_x)
226 |
227 | print 'train_x :', train_x.shape
228 | print 'train_x_mask :', train_x_mask.shape
229 | print 'train_x_img :', train_x_img.shape
230 | print 'train_y :',train_y.shape
231 |
232 | if config.valid_epoch is not None:
233 | valid=pd.read_pickle('test_vgg.pkl')
234 | valid_x=[ q for q in valid['q'] ]
235 | valid_y=[ a[0] for a in valid['a'] ]
236 | valid_y=np.array(valid_y)[:,None]
237 | valid_y = np_utils.to_categorical(valid_y, config.y_size).astype('float32')
238 | valid_x , valid_x_mask = prepare_data(valid_x, config.steps)
239 | valid_x_img = np.array([ img.tolist() for img in valid['cnn_feature'] ]).astype('float32')
240 | n_valid = len(valid_x)
241 | valid_batch_indices=get_minibatch_indices(n_valid, config.batch_size, shuffle=False)
242 |
243 | print 'valid_x :', valid_x.shape
244 | print 'valid_x_mask :', valid_x_mask.shape
245 | print 'valid_x_img :', valid_x_img.shape
246 | print 'valid_y :',valid_y.shape
247 |
248 |
249 |
250 | with tf.Session() as sess:
251 |
252 |
253 | initializer = tf.random_normal_initializer(0, 0.1)
254 | with tf.variable_scope("model", reuse=None, initializer=initializer):
255 | model = ImageQA(config = config)
256 |
257 | x, x_mask, y, img, loss, train_op, accuracy, prediction = model.build_model()
258 |
259 | saver = tf.train.Saver()
260 | sess.run(tf.initialize_all_variables())
261 |
262 | for i in range(config.epoch):
263 | start = time.time()
264 | lr_decay = config.lr_decay ** max(i - config.decay_epoch, 0.0)
265 | sess.run(tf.assign(model.lr, config.lr * lr_decay))
266 |
267 |
268 | batch_indices=get_minibatch_indices(n_train, config.batch_size, shuffle=True)
269 |
270 | preds = []
271 | for j, indices in enumerate(batch_indices):
272 |
273 | x_ = np.array([ train_x[k,:] for k in indices])
274 | x_mask_ = np.array([ train_x_mask[k,:] for k in indices])
275 | y_ = np.array([ train_y[k,:] for k in indices])
276 | img_ = np.array([ train_x_img[k,:] for k in indices])
277 |
278 |
279 |
280 | cost, _, acc, pred = sess.run([loss, train_op, accuracy, prediction],
281 | {x: x_,
282 | x_mask: x_mask_,
283 | y: y_,
284 | img : img_})
285 | preds = preds + pred.tolist()
286 | if j % 99 == 0 :
287 | print 'cost : ', cost, ', accuracy : ', acc, ', iter : ', j+1, ' in epoch : ',i+1
288 | print 'cost : ', cost, ', accuracy : ', acc, ', iter : ', j+1, ' in epoch : ',i+1,' elapsed time : ', int(time.time()-start)
289 | if config.valid_epoch is not None: # for validation
290 | best_accuracy = 0.
291 |
292 | if (i+1) % config.valid_epoch == 0:
293 | val_preds = []
294 | for j, indices in enumerate(valid_batch_indices):
295 | x_ = np.array([ valid_x[k,:] for k in indices])
296 | x_mask_ = np.array([ valid_x_mask[k,:] for k in indices])
297 | y_ = np.array([ valid_y[k,:] for k in indices])
298 | img_ = np.array([ valid_x_img[k,:] for k in indices])
299 |
300 | pred = sess.run(prediction,
301 | {x: x_,
302 | x_mask: x_mask_,
303 | y: y_,
304 | img : img_})
305 |
306 | val_preds = val_preds + pred.tolist()
307 | valid_acc = np.mean(np.equal(val_preds, np.argmax(valid_y,1)))
308 | print '##### valid accuracy : ', valid_acc, ' after epoch ', i+1
309 | if valid_acc > best_accuracy and i >= 10:
310 | best_accuracy = valid_acc
311 | saver.save(sess, config.model_ckpt_path, global_step=int(best_accuracy*100))
312 |
313 |
314 |
315 | def test():
316 |
317 | config = get_config()
318 |
319 | os.chdir('/home/seonhoon/Desktop/workspace/ImageQA/data/')
320 |
321 | test=pd.read_pickle('test_vgg.pkl')
322 | test_x=[ q for q in test['q'] ]
323 | test_y=[ a[0] for a in test['a'] ]
324 | test_y=np.array(test_y)[:,None]
325 | test_y = np_utils.to_categorical(test_y, config.y_size).astype('float32')
326 | test_x , test_x_mask = prepare_data(test_x, config.steps)
327 | test_x_img = np.array([ img.tolist() for img in test['cnn_feature'] ]).astype('float32')
328 | n_test = len(test_x)
329 | test_batch_indices=get_minibatch_indices(n_test, config.batch_size, shuffle=False)
330 |
331 |
332 |
333 | with tf.Session() as sess:
334 |
335 |
336 | with tf.variable_scope("model", reuse=None):
337 | model = ImageQA(config = config)
338 |
339 | x, x_mask, y, img, _, _, _, prediction = model.build_model()
340 | saver = tf.train.Saver()
341 | ckpt = tf.train.get_checkpoint_state(os.path.dirname(config.model_ckpt_path))
342 | sess.run(tf.initialize_all_variables())
343 | saver.restore(sess, ckpt.model_checkpoint_path)
344 | test_preds = []
345 | for j, indices in enumerate(test_batch_indices):
346 | x_ = np.array([test_x[k,:] for k in indices])
347 | x_mask_ = np.array([ test_x_mask[k,:] for k in indices])
348 | y_ = np.array([ test_y[k,:] for k in indices])
349 | img_ = np.array([ test_x_img[k,:] for k in indices])
350 |
351 | pred = sess.run(prediction,
352 | {x: x_,
353 | x_mask: x_mask_,
354 | y: y_,
355 | img : img_})
356 | test_preds = test_preds + pred.tolist()
357 |
358 |
359 | test_acc = np.mean(np.equal(test_preds, np.argmax(test_y,1)))
360 | print 'test accuracy :', test_acc
361 |
362 | def test_sample(test_x, test_x_maks, test_x_img):
363 |
364 | config = get_config()
365 | config.batch_size = 1
366 |
367 | with tf.Session() as sess:
368 | with tf.variable_scope("model", reuse=None):
369 | model = ImageQA(config = config)
370 |
371 | x, x_mask, _, img, _, _, _, prediction = model.build_model()
372 | saver = tf.train.Saver()
373 | ckpt = tf.train.get_checkpoint_state(os.path.dirname(config.model_ckpt_path))
374 | saver.restore(sess, ckpt.model_checkpoint_path)
375 |
376 | x_ = test_x
377 | x_mask_ = test_x_maks
378 | img_ = test_x_img
379 |
380 | pred = sess.run(prediction,
381 | {x: x_,
382 | x_mask: x_mask_,
383 | img : img_})
384 |
385 | return pred
386 |
387 |
388 |
389 |
390 | def get_config():
391 | class Config1(object):
392 | vocab_size = 12047
393 | y_size = 430
394 | batch_size = 364 #703 #s28
395 | steps = 60
396 |
397 | dim_ictx = 4096
398 | dim_iemb = 1024 # image embedding
399 | dim_wemb = 1024 # word embedding
400 | dim_hidden = 1024
401 | epoch = 100
402 |
403 | lr = 0.001
404 | lr_decay = 0.9
405 | decay_epoch = 333. # epoch 보다 크면 decay 하지않음.
406 |
407 | dropout = 0.4
408 |
409 | rnn_type = 'gru'
410 | layers = None #Don't work yet
411 | is_birnn = True #False
412 | valid_epoch = 1 # or None
413 | model_ckpt_path = '/home/seonhoon/Desktop/workspace/ImageQA/version_tensorflow/model/model.ckpt'
414 | return Config1()
415 |
416 | def main(_):
417 |
418 |
419 | is_train = False # if False then test
420 |
421 | if is_train :
422 | train()
423 |
424 | else:
425 | test()
426 |
427 |
428 | if __name__ == "__main__":
429 | tf.app.run()
430 |
431 |
432 |
433 |
434 |
435 |
--------------------------------------------------------------------------------
/tensorflow_simple/web/imageQA_tensorflow.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seankim902/imageQA/25f70ba1dc013eac6f7c734830f04663fd7b58f2/tensorflow_simple/web/imageQA_tensorflow.pyc
--------------------------------------------------------------------------------
/tensorflow_simple/web/images/moodo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seankim902/imageQA/25f70ba1dc013eac6f7c734830f04663fd7b58f2/tensorflow_simple/web/images/moodo.jpg
--------------------------------------------------------------------------------
/tensorflow_simple/web/server.py:
--------------------------------------------------------------------------------
1 |
2 | from flask import Flask
3 | from flask import request
4 | from flask import render_template
5 |
6 | import cPickle
7 | import pandas as pd
8 | import numpy as np
9 | import sys
10 | import os
11 |
12 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
13 |
14 | from imageQA_tensorflow import *
15 |
16 | app = Flask(__name__, static_url_path = "/images", static_folder='images')
17 |
18 | config = get_config()
19 |
20 |
21 | # http://127.0.0.1:5000/
22 | @app.route('/')
23 | def my_form():
24 | return render_template('iqa.html')
25 |
26 |
27 | @app.route('/', methods=['POST'])
28 | def my_form_post(answer=None):
29 |
30 | filename='/home/seonhoon/Desktop/workspace/ImageQA/data/dict.pkl'
31 |
32 | with open(filename, 'rb') as fp:
33 | idx2word, word2idx, idx2answer, answer2idx = cPickle.load(fp)
34 |
35 | text = request.form['text']
36 | print text
37 |
38 | question=text.split()
39 |
40 | q_idx=[]
41 | for i in range(len(question)):
42 | q_idx.append(word2idx[question[i]])
43 | q_idx=np.array(q_idx)
44 |
45 | print q_idx
46 |
47 | #running caffe and tensorflow seems not so easy simultaneously
48 | pd.read_pickle('/home/seonhoon/Desktop/workspace/ImageQA_Web/cnn.pkl')
49 | x_img = np.array([pd.read_pickle('/home/seonhoon/Desktop/workspace/ImageQA_Web/cnn.pkl')['cnn_feature'][0].tolist()])
50 |
51 |
52 |
53 | x , x_mask = prepare_data([q_idx], config.steps)
54 |
55 |
56 | y = test_sample(x, x_mask, x_img)
57 |
58 | print idx2answer[y[0]]
59 |
60 | params = {'answer' : idx2answer[y[0]], 'text' : text}
61 |
62 | return render_template('iqa.html', **params)
63 |
64 |
65 | if __name__ == '__main__':
66 | app.run()
67 |
--------------------------------------------------------------------------------
/tensorflow_simple/web/templates/iqa.html:
--------------------------------------------------------------------------------
1 |
2 |
13 |
14 |
the answer is {{answer}}
27 | {% endif %} 28 | 29 | 30 |