├── ABC ├── ABC.png ├── README.md ├── abCNN.py └── localAtt.py ├── CoA ├── CoA_arch.png ├── README.md ├── co_attention.py └── selfDef.py ├── NIPS2016 ├── README.md ├── VQA.png ├── co_attention.py └── selfDef.py ├── README.md └── TAB-LSTM ├── README.md ├── TAB-LSTM.png ├── topicAtt.py └── topicLSTM.py /ABC/ABC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zhangtd/Models-reproducing/48cff578ef2b1a0f0a265bc7a0d10b17bd12e3fd/ABC/ABC.png -------------------------------------------------------------------------------- /ABC/README.md: -------------------------------------------------------------------------------- 1 | # ABC model 2 | 3 | ### Proposed in [Hashtag Recommendation Using Attention-Based Convolutional Neural Network](https://www.ijcai.org/Proceedings/16/Papers/395.pdf) IJCAI 2016, by Yuyun Gong et al. 4 | 5 | ## **Overview** 6 | ### ABC adopts an attentionbased CNN architecture to recommend tags for textual content. It models the text sequential order with both local attention and the global attention. 7 | ### Model Architecture 8 | ![ABC model](ABC.png) 9 | 10 | ## **Usage** 11 | ### Source files in this repository can not be executed immediately, as there is no data preprocessing and data inputs statements. So you should get your data prepared according to your need. 12 | 13 | ## **Requirements** 14 | - Python 3.x 15 | - Tensorflow >= 1.7 16 | - Keras >= 2.1.5 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /ABC/abCNN.py: -------------------------------------------------------------------------------- 1 | from keras.models import Model 2 | from keras.layers import Input, Reshape, Dense, Embedding, Dropout, LSTM, MaxPooling1D, Lambda, Concatenate, \ 3 | Multiply, RepeatVector, Flatten, Activation, Permute, Conv1D 4 | import keras.backend as K 5 | from localAtt import LocalAttention 6 | import numpy as np 7 | 8 | 9 | num_tags = 2207 10 | num_words = 20000 11 | index_from = 3 12 | seq_length = 30 13 | batch_size = 256 14 | embedding_size = 100 15 | drop_rate = 0.75 16 | num_epoch = 35 17 | 18 | # prepare the following data. img data is the output of VGG-16 19 | img_train, text_train, tag_train, img_test, text_test, tag_test 20 | 21 | 22 | def myLossFunc(y_true, y_pred): 23 | probs_log = -K.log(y_pred) 24 | loss = K.mean(K.sum(probs_log*y_true, axis=-1)) 25 | return loss 26 | 27 | 28 | def modelDef(): 29 | input_text = Input(shape=(seq_length, )) 30 | embeddings = Embedding(input_dim=num_words + index_from, output_dim=embedding_size, 31 | mask_zero=False, input_length=seq_length)(input_text) 32 | #Global channel 33 | gc1 = Conv1D(filters=embedding_size, kernel_size=1, activation="tanh", use_bias=True)(embeddings) 34 | gc2 = Conv1D(filters=embedding_size, kernel_size=2, activation="tanh", use_bias=True)(embeddings) 35 | gc3 = Conv1D(filters=embedding_size, kernel_size=3, activation="tanh", use_bias=True)(embeddings) 36 | 37 | gc1m = MaxPooling1D(pool_size=30)(gc1) 38 | gc2m = MaxPooling1D(pool_size=29)(gc2) 39 | gc3m = MaxPooling1D(pool_size=28)(gc3) 40 | gc = MaxPooling1D(pool_size=3)(Concatenate(axis=1)([gc1m, gc2m, gc3m])) 41 | gc = Lambda(lambda x:K.squeeze(x, axis=1))(gc) 42 | # print(gc) 43 | 44 | #Local channel 45 | lcs = Conv1D(filters=1, kernel_size=5, activation="tanh", use_bias=True, padding="same")(embeddings) 46 | lcs = Lambda(lambda x:K.squeeze(x, axis=-1))(lcs) 47 | bools = LocalAttention()(lcs) 48 | bools = Permute([2, 1])(RepeatVector(embedding_size)(bools)) 49 | lca = Multiply()([embeddings, bools]) 50 | lcf = Activation("tanh")(Lambda(lambda x:K.sum(x, axis=1))(lca)) 51 | # print(lcf) 52 | 53 | gc = RepeatVector(1)(gc) 54 | lcf = RepeatVector(1)(lcf) 55 | h = Concatenate(axis=1)([gc, lcf]) 56 | h = Conv1D(filters=embedding_size, kernel_size=2, activation="tanh", use_bias=True)(h) 57 | dropout = Dropout(drop_rate)(Lambda(lambda x:K.squeeze(x, axis=1))(h)) 58 | 59 | Softmax = Dense(num_tags, activation="softmax", use_bias=True)(dropout) 60 | model = Model(inputs=input_text, outputs=Softmax) 61 | model.compile(optimizer="adam", loss=myLossFunc) 62 | return model 63 | 64 | 65 | def evaluation(y_true, y_pred, top_K): 66 | acc_count = 0 67 | correct = 0 68 | 69 | for i in range(y_pred.shape[0]): 70 | top_indices = y_pred[i].argsort()[-top_K:] 71 | if np.sum(y_true[i, top_indices]) >= 1: 72 | acc_count += 1 73 | correct += np.sum(y_true[i, top_indices]) 74 | 75 | acc_K = acc_count * 1.0 / y_pred.shape[0] 76 | precision_K = correct * 1.0 / (top_K * y_pred.shape[0]) 77 | recall_K = correct * 1.0 / np.sum(y_true) 78 | f1_K = 2 * precision_K * recall_K / (precision_K + recall_K) 79 | 80 | return acc_K, precision_K, recall_K, f1_K 81 | 82 | 83 | if __name__ == "__main__": 84 | myModel = modelDef() 85 | history = myModel.fit(x=texts_train, 86 | y=tags_train, 87 | batch_size=batch_size, 88 | epochs=1, 89 | verbose=1,) 90 | y_pred = myModel.predict(x=[texts_test]) 91 | acc, precision, recall, f1 = evaluation(tags_test, y_pred, 3) 92 | -------------------------------------------------------------------------------- /ABC/localAtt.py: -------------------------------------------------------------------------------- 1 | import keras.backend as K 2 | from keras.engine.topology import Layer, InputSpec 3 | from keras.utils import conv_utils 4 | from keras import activations, initializers 5 | 6 | theta = 0.8 7 | 8 | 9 | class LocalAttention(Layer): 10 | """ 11 | generate local attention maps based on input score tensor. 12 | input: score tensor 13 | output: attention weights tensor(0/1) 14 | 15 | input shape: (batch_size, seq_length) 16 | output shape: (batch_size, seq_length) 17 | """ 18 | def __init__(self, **kwargs): 19 | super(LocalAttention, self).__init__(**kwargs) 20 | self.input_spec = InputSpec(min_ndim=2) 21 | 22 | def build(self, input_shape): 23 | super(LocalAttention, self).build(input_shape) # Be sure to call this somewhere! 24 | 25 | def call(self, x, mask=None): 26 | max_score = K.max(x) 27 | min_score = K.min(x) 28 | threhold = theta*min_score + (1-theta)*max_score 29 | threholds = threhold * K.ones_like(x) 30 | output = K.relu(x-threholds) 31 | return output 32 | 33 | def compute_output_shape(self, input_shape): 34 | output_shape = input_shape 35 | return output_shape 36 | 37 | -------------------------------------------------------------------------------- /CoA/CoA_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zhangtd/Models-reproducing/48cff578ef2b1a0f0a265bc7a0d10b17bd12e3fd/CoA/CoA_arch.png -------------------------------------------------------------------------------- /CoA/README.md: -------------------------------------------------------------------------------- 1 | # CoA model 2 | 3 | ### Proposed in [Hashtag Recommendation for Multimodal Microblog Using Co-Attention Networks](https://www.ijcai.org/proceedings/2017/0478.pdf) IJCAI 2017, by Qi Zhang et al. 4 | 5 | ## **Overview** 6 | ### CoA is can be used to recommend hashtags for multimodal microblogs which are composed of both texual and visual content. The main contribution of CoA lies in an alternative co-attention mechanism to extract post features. 7 | ### Model Architecture 8 | ![CoA model](CoA_arch.png) 9 | 10 | ## **Usage** 11 | ### Source files in this repository can not be executed immediately, as there is no data preprocessing and data inputs statements. So you should get your data prepared according to your need. 12 | 13 | ## **Requirements** 14 | - Python 3.x 15 | - Tensorflow >= 1.7 16 | - Keras >= 2.1.5 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /CoA/co_attention.py: -------------------------------------------------------------------------------- 1 | ''' 2 | hierarchical Co-attention model based on IJCAI article 3 | ''' 4 | from keras.models import Model 5 | from keras.layers.core import Activation, Flatten, Reshape, RepeatVector 6 | from keras.layers.recurrent import LSTM 7 | from keras.layers.convolutional import AveragePooling1D 8 | from keras.layers.wrappers import TimeDistributed 9 | from keras.layers import Input, Dense, Embedding, Merge, Dropout, Lambda 10 | import keras.backend as K 11 | 12 | from selfDef import coAttention_alt, myLossFunc 13 | import numpy as np 14 | 15 | num_tags = 3896 16 | num_words = 212000 17 | index_from = 3 18 | seq_length = 30 19 | batch_size = 512 20 | embedding_size = 200 21 | hidden_size = 100 22 | attention_size = 200 23 | dim_k = 100 24 | num_region = 7*7 25 | drop_rate = 0.5 26 | TopK= 6 27 | 28 | # prepare the following data. img data is the output of VGG-16 29 | img_train, text_train, tag_train, img_test, text_test, tag_test 30 | 31 | 32 | def imageFeature(inputs): 33 | features = Reshape(target_shape=(num_region, 512))(inputs) 34 | features = Dense(embedding_size, activation="tanh", use_bias=False)(features) 35 | features_pooling = AveragePooling1D(pool_size=num_region, padding="same")(features) 36 | features_pooling = Lambda(lambda x: K.squeeze(x, axis=1))(features_pooling) 37 | 38 | return features, features_pooling 39 | 40 | 41 | def textFeature(X): 42 | embeddings = Embedding(input_dim=num_words + index_from, output_dim=embedding_size, 43 | mask_zero=True, input_length=seq_length)(X) 44 | tFeature = LSTM(units=embedding_size, return_sequences=True)(embeddings) 45 | 46 | return tFeature 47 | 48 | 49 | def modelDef(): 50 | inputs_img = Input(shape=(7, 7, 512)) 51 | inputs_text = Input(shape=(seq_length,)) 52 | 53 | iFeature, iFeature_pooling = imageFeature(inputs_img) 54 | tFeature = textFeature(inputs_text) 55 | co_feature = coAttention_alt(dim_k=dim_k)([iFeature, tFeature]) 56 | dropout = Dropout(drop_rate)(co_feature) 57 | Softmax = Dense(num_tags, activation="softmax", use_bias=True)(dropout) 58 | model = Model(inputs=[inputs_img, inputs_text], 59 | outputs=[Softmax]) 60 | # adam = optimizers.adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.00001) 61 | model.compile(optimizer="adam", loss=myLossFunc) 62 | return model 63 | 64 | 65 | def evaluation(y_true, y_pred, top_K): 66 | acc_count = 0 67 | precision_K = [] 68 | recall_K = [] 69 | f1_K = [] 70 | 71 | for i in range(y_pred.shape[0]): 72 | top_indices = y_pred[i].argsort()[-top_K:] 73 | if np.sum(y_true[i, top_indices]) >= 1: 74 | acc_count += 1 75 | p = np.sum(y_true[i, top_indices])/top_K 76 | r = np.sum(y_true[i, top_indices])/np.sum(y_true[i, :]) 77 | precision_K.append(p) 78 | recall_K.append(r) 79 | if p != 0 or r != 0: 80 | f1_K.append(2 * p * r / (p + r)) 81 | else: 82 | f1_K.append(0) 83 | 84 | acc_K = acc_count * 1.0 / y_pred.shape[0] 85 | 86 | return acc_K, np.mean(np.array(precision_K)), np.mean(np.array(recall_K)), np.mean(np.array(f1_K)) 87 | 88 | 89 | if __name__ == "__main__": 90 | model = modelDef() 91 | history = model.fit(x=[img_train, text_train], 92 | y=tag_train, 93 | batch_size=batch_size, 94 | epochs=1, 95 | verbose=1,) 96 | y_pred = model.predict(x=[test_img, test_text]) 97 | acc_K, precision_K, recall_K, f1_K = evaluation(test_tag, y_pred, TopK) 98 | 99 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /CoA/selfDef.py: -------------------------------------------------------------------------------- 1 | """ 2 | some DIY components used in co_attention.py 3 | 4 | coAttention_alt -- DIY coAttention layer using alternative mechanism 5 | 6 | myLossFunc -- DIY loss function. Loss is defined as the sum of -log(p), 7 | where p is the probability of a hashtag in a train instance 8 | 9 | """ 10 | import keras.backend as K 11 | from keras.engine.topology import Layer, InputSpec 12 | 13 | 14 | class coAttention_alt(Layer): 15 | """ 16 | self defined co-attention layer. 17 | alternative co-attention 18 | inputs: [image feature tensor, hidden text feature tensor] 19 | output: co-Attention feature of image and text 20 | 21 | input dimensions:[(batchSize, num_region, CNN_dimension), 22 | (batchSize, seq_length, CNN_dimension)] 23 | output dimension: batch_size*CNN_dimension 24 | """ 25 | def __init__(self, dim_k, **kwargs): 26 | super(coAttention_alt, self).__init__(**kwargs) 27 | self.dim_k = dim_k # internal tensor dimension 28 | # self.input_spec = InputSpec(min_ndim=3) 29 | self.supports_masking = True 30 | 31 | def build(self, input_shape): 32 | if not isinstance(input_shape, list): 33 | raise ValueError('A Co-Attention_alt layer should be called ' 34 | 'on a list of inputs.') 35 | if len(input_shape) != 2: 36 | raise ValueError('A Co-Attention_alt layer should be called on a list of 3 inputs.' 37 | 'Got '+str(len(input_shape))+'inputs.') 38 | # print(input_shape) 39 | self.num_imgRegion = input_shape[0][1] 40 | self.seq_len = input_shape[1][1] 41 | self.output_dim = input_shape[0][2] 42 | 43 | """trainable variables naming rule: 44 | w/b + '_Dense_' + Vi/Vt + '_' + 0/1 45 | w: weight 46 | b: bias 47 | Vi: about image feature 48 | Vt: about text feature 49 | 0: phase 0 50 | 1: phase 1 51 | """ 52 | self.w_Dense_Vi_0 = self.add_weight(name='w_Dense_Vi_0', 53 | shape=(self.output_dim, self.dim_k), 54 | initializer='random_normal', 55 | trainable=True) 56 | self.w_Dense_Vt_0 = self.add_weight(name='w_Dense_Vt_0', 57 | shape=(self.output_dim, self.dim_k), 58 | initializer='random_normal', 59 | trainable=True) 60 | self.w_Dense_Pi_0 = self.add_weight(name='w_Dense_Pi_0', 61 | shape=(2*self.dim_k, 1), 62 | initializer='random_normal', 63 | trainable=True) 64 | self.b_Dense_Pi_0 = self.add_weight(name='b_Dense_Pi_0', 65 | shape=(self.num_imgRegion,), 66 | initializer='zeros', 67 | trainable=True) 68 | 69 | self.w_Dense_Vi_1 = self.add_weight(name='w_Dense_Vi_1', 70 | shape=(self.output_dim, self.dim_k), 71 | initializer='random_normal', 72 | trainable=True) 73 | self.w_Dense_Vt_1 = self.add_weight(name='w_Dense_Vt_1', 74 | shape=(self.output_dim, self.dim_k), 75 | initializer='random_normal', 76 | trainable=True) 77 | self.w_Dense_Pi_1 = self.add_weight(name='w_Dense_Pi_1', 78 | shape=(2*self.dim_k, 1), 79 | initializer='random_normal', 80 | trainable=True) 81 | self.b_Dense_Pi_1 = self.add_weight(name='b_Dense_Pi_1', 82 | shape=(self.seq_len,), 83 | initializer='zeros', 84 | trainable=True) 85 | 86 | super(coAttention_alt, self).build(input_shape) # Be sure to call this somewhere! 87 | 88 | def call(self, x, mask=None): 89 | ifeature = x[0] 90 | tfeature_h = x[1] 91 | # tfeature = x[2] 92 | output_dim = self.output_dim 93 | num_imgRegion = self.num_imgRegion 94 | dim_k = self.dim_k 95 | seq_len = self.seq_len 96 | tfeature = K.mean(tfeature_h, axis=1) 97 | # print(tfeature_h, tfeature) 98 | 99 | # phase 0: text-guided image feature computation 100 | w_Vi_0 = K.dot(K.reshape(ifeature, [-1, output_dim]), self.w_Dense_Vi_0) 101 | # shape=((batchSize*num_imgRegion),dim_k) 102 | w_Vi_0 = K.reshape(w_Vi_0, [-1, num_imgRegion, dim_k]) # shape=(batchSize,num_imgRegion,dim_k) 103 | w_Vt_0 = K.repeat(K.dot(tfeature, self.w_Dense_Vt_0), num_imgRegion) # shape=(batchSize,num_imgRegion,dim_k) 104 | Vi_Vt_0 = K.concatenate([w_Vi_0, w_Vt_0], axis=-1) # shape=(batchSize,num_imgRegion,2*dim_k) 105 | Hi = K.tanh(Vi_Vt_0) 106 | # Hi_w = K.squeeze(K.dot(K.reshape(Hi, [-1, 2*dim_k]), self.w_Dense_Pi_0), axis=-1) 107 | # Hi_w_b = K.reshape(Hi_w, [-1, num_imgRegion]) + self.b_Dense_Pi_0 108 | Hi_w_b = K.squeeze(K.dot(Hi, self.w_Dense_Pi_0), axis=-1) + self.b_Dense_Pi_0 # shape=(batchSize,num_imgRegion) 109 | Pi = K.softmax(Hi_w_b) 110 | Pi = K.permute_dimensions(K.repeat(Pi, output_dim), (0, 2, 1)) # shape=(batchSize,num_imgRegion,output_dim) 111 | Pi_Vi = Pi*ifeature 112 | Vi = K.sum(Pi_Vi, axis=1) # shape=(batchSize,output_dim) 113 | 114 | # phase 1: image-guided text feature computation 115 | w_Vi_1 = K.repeat(K.dot(Vi, self.w_Dense_Vi_1), seq_len) # shape=(batchSize,seq_len,dim_k) 116 | w_Vt_1 = K.dot(K.reshape(tfeature_h, [-1, output_dim]), self.w_Dense_Vt_1) # shape=((batchSize*seq_len),dim_k) 117 | w_Vt_1 = K.reshape(w_Vt_1, (-1, seq_len, dim_k)) # shape= (batchSize, seq_len, dim_k) 118 | Vi_Vt_1 = K.concatenate([w_Vi_1, w_Vt_1], axis=-1) # shape=(batchSize, seq_len, 2*dim_k) 119 | Ht = K.tanh(Vi_Vt_1) 120 | Ht_b = K.squeeze(K.dot(Ht, self.w_Dense_Pi_1), axis=-1) + self.b_Dense_Pi_1 # shape=(batch_size, seq_len) 121 | Pt = K.softmax(Ht_b) 122 | Pt = K.permute_dimensions(K.repeat(Pt, output_dim), (0, 2, 1)) # shape=(batchSize, seq_len, output_dim) 123 | Pt_Vt = Pt*tfeature_h 124 | Vt = K.sum(Pt_Vt, axis=1) # shape=(batchSize, output_dim) 125 | 126 | return Vi+Vt 127 | 128 | def compute_output_shape(self, input_shape): 129 | output_shape = (input_shape[0][0], input_shape[0][-1]) 130 | return output_shape 131 | 132 | def get_config(self): 133 | return super(coAttention_alt, self).get_config() 134 | 135 | 136 | def myLossFunc(y_true, y_pred): 137 | probs_log = -K.log(y_pred) 138 | loss = K.mean(K.sum(probs_log*y_true, axis=-1)) 139 | # loss = K.mean(K.sum(K.clip(probs_log * y_true, -1e40, 100), axis=-1)) 140 | return loss 141 | 142 | 143 | if __name__ == "__main__": 144 | from keras.layers import Input 145 | i1 = Input(batch_shape=(10, 25, 100)) 146 | i2 = Input(batch_shape=(10, 36, 100)) 147 | y = coAttention_alt(100)([i1, i2]) 148 | print(y) 149 | -------------------------------------------------------------------------------- /NIPS2016/README.md: -------------------------------------------------------------------------------- 1 | # Hierarchical Co-Attention VQA 2 | 3 | Proposed in [Hierarchical Question-Image Co-Attention for Visual Question Answering](http://papers.nips.cc/paper/6202-hierarchical-question-image-co-attention-for-visual-question-answering.pdf) NIPS 2016, by Jiasen Lu et al. 4 | 5 | ## **Overview** 6 | ### Two kinds of co-attention mechanisms are proposedin this article. Basic idea of co-attention mechanism is that for certain target, the importance of subparts in text and image vary so they should be treated differently. Moreover, question text and image are usually semantically related and one can be used to understand the other. So they resort to co-attention mechanism and propose co-attention in alternative way and parallel way. 7 | ### Model Architecture 8 | ![VQA model](VQA.png) 9 | 10 | ## **Usage** 11 | ### Source files in this repository can not be executed immediately, as there is no data preprocessing and data inputs statements. So you should get your data prepared according to your need. 12 | 13 | ## **Requirements** 14 | - Python 3.x 15 | - Tensorflow >= 1.7 16 | - Keras >= 2.1.5 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /NIPS2016/VQA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zhangtd/Models-reproducing/48cff578ef2b1a0f0a265bc7a0d10b17bd12e3fd/NIPS2016/VQA.png -------------------------------------------------------------------------------- /NIPS2016/co_attention.py: -------------------------------------------------------------------------------- 1 | from keras.applications.inception_v3 import InceptionV3 2 | from keras.models import Model, model_from_json 3 | from keras.layers import Input, Reshape, Dense, Embedding, Bidirectional, GRU, Dropout 4 | from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard 5 | from keras import optimizers 6 | 7 | from selfDef import coAttention_alt, coAttention_para, myLossFunc, self_conv1d, \ 8 | self_maxpooling, text_attention, encoding 9 | 10 | 11 | def textFeature(X): 12 | embeddings = Embedding(input_dim=num_words + 3, output_dim=embedding_size, 13 | weights=[init_embeddings], mask_zero=True, input_length=seq_length)(X) 14 | 15 | word_level = embeddings 16 | phrase_level_1 = self_conv1d(filters=embedding_size, 17 | kernel_size=1, 18 | padding="same", 19 | activation="tanh")(word_level) 20 | phrase_level_2 = self_conv1d(filters=embedding_size, 21 | kernel_size=2, 22 | padding="same", 23 | activation="tanh")(word_level) 24 | phrase_level_3 = self_conv1d(filters=embedding_size, 25 | kernel_size=3, 26 | padding="same", 27 | activation="tanh")(word_level) 28 | 29 | phrase_level = self_maxpooling()([phrase_level_1, phrase_level_2, phrase_level_3]) 30 | 31 | text_level = Bidirectional(GRU(units=hidden_size, return_sequences=True))(phrase_level) 32 | return word_level, phrase_level, text_level 33 | 34 | 35 | def imageFeature(inputs): 36 | imageModel = InceptionV3(weights='imagenet', include_top=False, ) 37 | for layer in imageModel.layers: 38 | layer.trainable = False 39 | features = imageModel(inputs) 40 | features = Reshape(target_shape=(num_region, 2048))(features) 41 | features = Dense(hidden_size * 2, activation="tanh", use_bias=False)(features) 42 | return features 43 | 44 | 45 | def modelDef(): 46 | inputs_img = Input(shape=(299, 299, 3,)) 47 | inputs_text = Input(shape=(seq_length,)) 48 | text_mask = Input(shape=(seq_length,)) 49 | 50 | iFeature = imageFeature(inputs_img) 51 | tFeature_word, tFeature_phrase, tFeature_text = textFeature(inputs_text) 52 | 53 | sum_tFeature_word = text_attention(attention_size)(tFeature_word) 54 | sum_tFeature_phrase = text_attention(attention_size)(tFeature_phrase) 55 | sum_tFeature_text = text_attention(attention_size)(tFeature_text) 56 | 57 | co_feature_word = coAttention_alt(dim_k=dim_k)([iFeature, tFeature_word, sum_tFeature_word]) 58 | co_feature_phrase = coAttention_alt(dim_k=dim_k)([iFeature, tFeature_phrase, sum_tFeature_phrase]) 59 | co_feature_text = coAttention_alt(dim_k=dim_k)([iFeature, tFeature_text, sum_tFeature_text]) 60 | 61 | h = encoding()([co_feature_word, co_feature_phrase, co_feature_text]) 62 | dropout = Dropout(drop_rate)(h) 63 | 64 | Softmax = Dense(num_tags, activation="softmax", use_bias=True)(dropout) 65 | 66 | model = Model(inputs=[inputs_img, inputs_text, text_mask], 67 | outputs=[Softmax]) 68 | sgd = optimizers.SGD(lr=0.15, momentum=0.9, clipnorm=1.0) 69 | model.compile(optimizer=sgd, loss=myLossFunc, metrics=[accuracy]) 70 | # res = model.predict(x=..) 71 | return model 72 | 73 | 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /NIPS2016/selfDef.py: -------------------------------------------------------------------------------- 1 | import keras.backend as K 2 | from keras.engine.topology import Layer, InputSpec 3 | from keras.utils import conv_utils 4 | from keras import activations, initializers, regularizers, constraints 5 | 6 | top_K = 1 7 | REMOVE_FACTOR = -10000 8 | 9 | 10 | class text_attention(Layer): 11 | """ 12 | self defined text attention layer. 13 | input: hidden text feature 14 | output: summarized text feature with attention mechanism 15 | 16 | input shape: (batch_size, seq_length, embedding_size) 17 | output shape: (batch_size, embedding_size) 18 | """ 19 | def __init__(self, units, return_alphas=False, **kwargs): 20 | super(text_attention, self).__init__(**kwargs) 21 | self.units = units 22 | self.input_spec = InputSpec(min_ndim=3) 23 | self.supports_masking = True 24 | self.return_alphas = return_alphas 25 | 26 | def build(self, input_shape): 27 | input_dim = input_shape[-1] 28 | # Create a trainable weight variable for this layer. 29 | self.w_omega = self.add_weight(name='w_omega', 30 | shape=(input_dim, self.units), 31 | initializer='random_normal', 32 | trainable=True) 33 | self.b_omega = self.add_weight(name='b_omega', 34 | shape=(self.units,), 35 | initializer='zeros', 36 | trainable=True) 37 | self.u_omega = self.add_weight(name='u_omega', 38 | shape=(self.units,), 39 | initializer='random_normal', 40 | trainable=True) 41 | super(text_attention, self).build(input_shape) # Be sure to call this somewhere! 42 | 43 | def call(self, x, mask=None): 44 | input_dim = K.shape(x)[-1] 45 | v = K.tanh(K.dot(K.reshape(x, [-1, input_dim]), self.w_omega) + K.expand_dims(self.b_omega, 0)) 46 | vu = K.dot(v, K.expand_dims(self.u_omega, -1)) 47 | vu = K.reshape(vu, K.shape(x)[:2]) 48 | m = K.cast(mask, dtype='float32') 49 | m = m - 1 50 | m = m * REMOVE_FACTOR 51 | vu = vu + m 52 | alphas = K.softmax(vu) 53 | output = K.sum(x * K.expand_dims(alphas, -1), 1) 54 | if self.return_alphas: 55 | return [output] + [alphas] 56 | else: 57 | return output 58 | 59 | def compute_mask(self, inputs, mask=None): 60 | return None 61 | 62 | def compute_output_shape(self, input_shape): 63 | output_shape = (input_shape[0], input_shape[2]) 64 | if self.return_alphas: 65 | alphas_shape = [(input_shape[0], input_shape[1])] 66 | return [output_shape] + alphas_shape 67 | else: 68 | return output_shape 69 | 70 | def get_config(self): 71 | return super(text_attention, self).get_config() 72 | 73 | 74 | class coAttention_alt(Layer): 75 | """ 76 | self defined co-attention layer. 77 | alternative co-attention 78 | inputs: [image feature tensor, hidden text feature tensor, summarized text feature tensor(after attention)] 79 | output: co-Attention feature of image and text 80 | 81 | input dimensions:[(batchSize, num_region, CNN_dimension), 82 | (batchSize, seq_length, CNN_dimension),(batchSize, CNN_dimension)] 83 | output dimension: batch_size*CNN_dimension 84 | """ 85 | def __init__(self, dim_k, **kwargs): 86 | super(coAttention_alt, self).__init__(**kwargs) 87 | self.dim_k = dim_k # internal tensor dimension 88 | # self.input_spec = InputSpec(min_ndim=3) 89 | self.supports_masking = True 90 | 91 | def build(self, input_shape): 92 | if not isinstance(input_shape, list): 93 | raise ValueError('A Co-Attention_alt layer should be called ' 94 | 'on a list of inputs.') 95 | if len(input_shape) != 3: 96 | raise ValueError('A Co-Attention_alt layer should be called on a list of 3 inputs.' 97 | 'Got '+str(len(input_shape))+'inputs.') 98 | self.num_imgRegion = input_shape[0][1] 99 | self.seq_len = input_shape[1][1] 100 | self.output_dim = input_shape[0][2] 101 | 102 | """trainable variables naming rule: 103 | w/b + '_Dense_' + Vi/Vt + '_' + 0/1 104 | w: weight 105 | b: bias 106 | Vi: about image feature 107 | Vt: about text feature 108 | 0: phase 0 109 | 1: phase 1 110 | """ 111 | self.w_Dense_Vi_0 = self.add_weight(name='w_Dense_Vi_0', 112 | shape=(self.output_dim, self.dim_k), 113 | initializer='random_normal', 114 | trainable=True) 115 | self.w_Dense_Vt_0 = self.add_weight(name='w_Dense_Vt_0', 116 | shape=(self.output_dim, self.dim_k), 117 | initializer='random_normal', 118 | trainable=True) 119 | self.w_Dense_Pi_0 = self.add_weight(name='w_Dense_Pi_0', 120 | shape=(2*self.dim_k, 1), 121 | initializer='random_normal', 122 | trainable=True) 123 | self.b_Dense_Pi_0 = self.add_weight(name='b_Dense_Pi_0', 124 | shape=(self.num_imgRegion,), 125 | initializer='zeros', 126 | trainable=True) 127 | 128 | self.w_Dense_Vi_1 = self.add_weight(name='w_Dense_Vi_1', 129 | shape=(self.output_dim, self.dim_k), 130 | initializer='random_normal', 131 | trainable=True) 132 | self.w_Dense_Vt_1 = self.add_weight(name='w_Dense_Vt_1', 133 | shape=(self.output_dim, self.dim_k), 134 | initializer='random_normal', 135 | trainable=True) 136 | self.w_Dense_Pi_1 = self.add_weight(name='w_Dense_Pi_1', 137 | shape=(2*self.dim_k, 1), 138 | initializer='random_normal', 139 | trainable=True) 140 | self.b_Dense_Pi_1 = self.add_weight(name='b_Dense_Pi_1', 141 | shape=(self.seq_len,), 142 | initializer='zeros', 143 | trainable=True) 144 | 145 | super(coAttention_alt, self).build(input_shape) # Be sure to call this somewhere! 146 | 147 | def call(self, x, mask=None): 148 | ifeature = x[0] 149 | tfeature_h = x[1] 150 | tfeature = x[2] 151 | output_dim = self.output_dim 152 | num_imgRegion = self.num_imgRegion 153 | dim_k = self.dim_k 154 | seq_len = self.seq_len 155 | 156 | # phase 0: text-guided image feature computation 157 | w_Vi_0 = K.dot(K.reshape(ifeature, [-1, output_dim]), self.w_Dense_Vi_0) 158 | # shape=((batchSize*num_imgRegion),dim_k) 159 | w_Vi_0 = K.reshape(w_Vi_0, [-1, num_imgRegion, dim_k]) # shape=(batchSize,num_imgRegion,dim_k) 160 | w_Vt_0 = K.repeat(K.dot(tfeature, self.w_Dense_Vt_0), num_imgRegion) # shape=(batchSize,num_imgRegion,dim_k) 161 | Vi_Vt_0 = K.concatenate([w_Vi_0, w_Vt_0], axis=-1) # shape=(batchSize,num_imgRegion,2*dim_k) 162 | Hi = K.tanh(Vi_Vt_0) 163 | # Hi_w = K.squeeze(K.dot(K.reshape(Hi, [-1, 2*dim_k]), self.w_Dense_Pi_0), axis=-1) 164 | # Hi_w_b = K.reshape(Hi_w, [-1, num_imgRegion]) + self.b_Dense_Pi_0 165 | Hi_w_b = K.squeeze(K.dot(Hi, self.w_Dense_Pi_0), axis=-1) + self.b_Dense_Pi_0 # shape=(batchSize,num_imgRegion) 166 | Pi = K.softmax(Hi_w_b) 167 | Pi = K.permute_dimensions(K.repeat(Pi, output_dim), (0, 2, 1)) # shape=(batchSize,num_imgRegion,output_dim) 168 | Pi_Vi = Pi*ifeature 169 | Vi = K.sum(Pi_Vi, axis=1) # shape=(batchSize,output_dim) 170 | 171 | # phase 1: image-guided text feature computation 172 | w_Vi_1 = K.repeat(K.dot(Vi, self.w_Dense_Vi_1), seq_len) # shape=(batchSize,seq_len,dim_k) 173 | w_Vt_1 = K.dot(K.reshape(tfeature_h, [-1, output_dim]), self.w_Dense_Vt_1) # shape=((batchSize*seq_len),dim_k) 174 | w_Vt_1 = K.reshape(w_Vt_1, (-1, seq_len, dim_k)) # shape= (batchSize, seq_len, dim_k) 175 | Vi_Vt_1 = K.concatenate([w_Vi_1, w_Vt_1], axis=-1) # shape=(batchSize, seq_len, 2*dim_k) 176 | Ht = K.tanh(Vi_Vt_1) 177 | Ht_b = K.squeeze(K.dot(Ht, self.w_Dense_Pi_1), axis=-1) + self.b_Dense_Pi_1 # shape=(batch_size, seq_len) 178 | Pt = K.softmax(Ht_b) 179 | Pt = K.permute_dimensions(K.repeat(Pt, output_dim), (0, 2, 1)) # shape=(batchSize, seq_len, output_dim) 180 | Pt_Vt = Pt*tfeature_h 181 | Vt = K.sum(Pt_Vt, axis=1) # shape=(batchSize, output_dim) 182 | 183 | return Vi+Vt 184 | 185 | def compute_output_shape(self, input_shape): 186 | output_shape = (input_shape[0][0], input_shape[0][-1]) 187 | return output_shape 188 | 189 | def get_config(self): 190 | return super(coAttention_alt, self).get_config() 191 | 192 | 193 | class coAttention_para(Layer): 194 | """ 195 | self-defined parallel co-attention layer. 196 | inputs: [tFeature, iFeature] 197 | outputs: [coFeature] 198 | 199 | dimension: 200 | input dimensions: [(batch_size, seq_length, embedding_size), (batch_size, num_img_region, 2*hidden_size)] 201 | considering subsequent operation, better to set embedding_size == 2*hidden_size 202 | output dimensions:[(batch_size, 2*hidden_size)] 203 | """ 204 | def __init__(self, dim_k, **kwargs): 205 | super(coAttention_para, self).__init__(**kwargs) 206 | self.dim_k = dim_k # internal tensor dimension 207 | self.supports_masking = True 208 | 209 | def build(self, input_shape): 210 | if not isinstance(input_shape, list): 211 | raise ValueError('A Co-Attention_para layer should be called ' 212 | 'on a list of inputs.') 213 | if len(input_shape) != 2: 214 | raise ValueError('A Co-Attention_para layer should be called on a list of 2 inputs.' 215 | 'Got '+str(len(input_shape))+'inputs.') 216 | self.embedding_size = input_shape[0][-1] 217 | self.num_region = input_shape[1][1] 218 | self.seq_len = input_shape[0][1] 219 | """ 220 | naming variables following the VQA paper 221 | """ 222 | self.Wb = self.add_weight(name="Wb", 223 | initializer="random_normal", 224 | # initializer="ones", 225 | shape=(self.embedding_size, self.embedding_size), 226 | trainable=True) 227 | self.Wq = self.add_weight(name="Wq", 228 | initializer="random_normal", 229 | # initializer="ones", 230 | shape=(self.embedding_size, self.dim_k), 231 | trainable=True) 232 | self.Wv = self.add_weight(name="Wv", 233 | initializer="random_normal", 234 | # initializer="ones", 235 | shape=(self.embedding_size, self.dim_k), 236 | trainable=True) 237 | self.Whv = self.add_weight(name="Whv", 238 | initializer="random_normal", 239 | # initializer="ones", 240 | shape=(self.dim_k, 1), 241 | trainable=True) 242 | self.Whq = self.add_weight(name="Whq", 243 | initializer="random_normal", 244 | # initializer="ones", 245 | shape=(self.dim_k, 1), 246 | trainable=True) 247 | 248 | super(coAttention_para, self).build(input_shape) # Be sure to call this somewhere! 249 | 250 | def call(self, inputs, mask=None): 251 | tFeature = inputs[0] 252 | iFeature = inputs[1] 253 | # affinity matrix C 254 | affi_mat = K.dot(tFeature, self.Wb) 255 | affi_mat = K.batch_dot(affi_mat, K.permute_dimensions(iFeature, (0, 2, 1))) # (batch_size, seq_len, num_region) 256 | # Hq, Hv, av, aq 257 | tmp_Hv = K.dot(tFeature, self.Wq) 258 | Hv = K.dot(iFeature, self.Wv) + K.batch_dot(K.permute_dimensions(affi_mat, (0, 2, 1)), tmp_Hv) 259 | Hv = K.tanh(Hv) 260 | av = K.softmax(K.squeeze(K.dot(Hv, self.Whv), axis=-1)) 261 | 262 | tmp_Hq = K.dot(iFeature, self.Wv) 263 | Hq = K.dot(tFeature, self.Wq) + K.batch_dot(affi_mat, tmp_Hq) 264 | Hq = K.tanh(Hq) 265 | aq = K.softmax(K.squeeze(K.dot(Hq, self.Whq), axis=-1)) 266 | 267 | av = K.permute_dimensions(K.repeat(av, self.embedding_size), (0, 2, 1)) 268 | aq = K.permute_dimensions(K.repeat(aq, self.embedding_size), (0, 2, 1)) 269 | 270 | tfeature = K.sum(aq * tFeature, axis=1) 271 | ifeature = K.sum(av * iFeature, axis=1) 272 | 273 | return tfeature+ifeature 274 | 275 | def get_config(self): 276 | return super(coAttention_para, self).get_config() 277 | 278 | 279 | class encoding(Layer): 280 | """ 281 | self defined encoding layer, summarize total co-feature based on three level co-features 282 | input: [co_feature_word, co_feature_phrase, co_feature_text] 283 | output: total co_feature 284 | 285 | dimension : 286 | input dimensions : [(batch_size, embedding_size)]*3 287 | output dimension: (batch_size, embedding_size) 288 | """ 289 | def __init__(self, **kwargs): 290 | super(encoding, self).__init__(**kwargs) 291 | # self.input_spec = InputSpec(min_ndim=3) 292 | self.supports_masking = True 293 | 294 | def build(self, input_shape): 295 | if not isinstance(input_shape, list): 296 | raise ValueError('A Co-Attention_alt layer should be called ' 297 | 'on a list of inputs.') 298 | if len(input_shape) != 3: 299 | raise ValueError('A Co-Attention_alt layer should be called on a list of 3 inputs.' 300 | 'Got '+str(len(input_shape))+'inputs.') 301 | self.output_dim = input_shape[0][-1] 302 | 303 | self.w_word = self.add_weight(name='w_word', 304 | shape=(self.output_dim, self.output_dim), 305 | initializer='random_normal', 306 | trainable=True) 307 | self.w_phrase = self.add_weight(name="w_phrase", 308 | shape=(self.output_dim*2, self.output_dim), 309 | initializer="random_normal", 310 | trainable=True) 311 | self.w_text = self.add_weight(name="w_text", 312 | shape=(self.output_dim*2, self.output_dim), 313 | initializer="random_normal", 314 | trainable=True) 315 | super(encoding, self).build(input_shape) # Be sure to call this somewhere! 316 | 317 | def call(self, inputs, mask=None): 318 | feature_word = inputs[0] 319 | feature_phrase = inputs[1] 320 | feature_text = inputs[2] 321 | 322 | h_w = K.tanh(K.dot(feature_word, self.w_word)) 323 | h_p = K.tanh(K.dot(K.concatenate([feature_phrase, h_w]), self.w_phrase)) 324 | h_t = K.tanh(K.dot(K.concatenate([feature_text, h_p]), self.w_text)) 325 | 326 | return h_t 327 | 328 | def compute_output_shape(self, input_shape): 329 | output_shape = (input_shape[0][0], input_shape[0][1]) 330 | return output_shape 331 | 332 | def get_config(self): 333 | return super(encoding, self).get_config() 334 | 335 | 336 | class self_conv1d(Layer): 337 | def __init__(self, filters, 338 | kernel_size, 339 | strides=1, 340 | padding='same', 341 | data_format=None, 342 | activation='tanh', 343 | kernel_initializer='glorot_uniform', 344 | bias_initializer='zeros', 345 | use_bias=True, 346 | ** kwargs): 347 | super(self_conv1d, self).__init__(**kwargs) 348 | self.rank = 1 349 | self.filters = filters 350 | self.data_format = conv_utils.normalize_data_format(data_format) 351 | self.kernel_size = conv_utils.normalize_tuple(kernel_size, self.rank, 'kernel_size') 352 | self.strides = conv_utils.normalize_tuple(strides, self.rank, 'strides') 353 | self.padding = conv_utils.normalize_padding(padding) 354 | self.activation = activations.get(activation) 355 | self.use_bias = use_bias 356 | self.supports_masking = True 357 | self.kernel_initializer = initializers.get(kernel_initializer) 358 | self.bias_initializer = initializers.get(bias_initializer) 359 | 360 | def build(self, input_shape): 361 | if len(input_shape) != 3: 362 | raise ValueError('A Co-Attention_alt layer should be called on a tensor of 3 dims.' 363 | 'Got '+str(len(input_shape))+'dims.') 364 | if self.data_format == "channels_first": 365 | channel_axis = 1 366 | else: 367 | channel_axis = -1 368 | if input_shape[channel_axis] is None: 369 | raise ValueError('The channel dimension of the inputs ' 370 | 'should be defined. Found `None`.') 371 | input_dim = input_shape[channel_axis] 372 | kernel_shape = self.kernel_size + (input_dim, self.filters) 373 | self.kernel = self.add_weight(shape=kernel_shape, 374 | initializer=self.kernel_initializer, 375 | # initializer='ones', 376 | name='kernel', 377 | trainable=True) 378 | if self.use_bias: 379 | self.bias = self.add_weight(shape=(self.filters,), 380 | initializer=self.bias_initializer, 381 | # initializer='zeros', 382 | name='bias', 383 | trainable=True) 384 | else: 385 | self.bias = None 386 | 387 | self.built = True 388 | 389 | def call(self, inputs, mask=None): 390 | # print(K.get_value(self.kernel)) 391 | outputs = K.conv1d( 392 | inputs, 393 | self.kernel, 394 | strides=self.strides[0], 395 | padding=self.padding, 396 | data_format=self.data_format) 397 | 398 | if self.use_bias: 399 | outputs = K.bias_add( 400 | outputs, 401 | self.bias, 402 | data_format=self.data_format) 403 | 404 | if self.activation is not None: 405 | return self.activation(outputs) 406 | 407 | return outputs 408 | 409 | def compute_mask(self, inputs, mask=None): 410 | if isinstance(mask, list): 411 | mask = mask[0] 412 | return mask 413 | 414 | def compute_output_shape(self, input_shape): 415 | output_shape = input_shape 416 | return output_shape 417 | 418 | def get_config(self): 419 | return super(self_conv1d, self).get_config() 420 | 421 | 422 | class self_maxpooling(Layer): 423 | def __init__(self, **kwargs): 424 | self.supports_masking = True 425 | super(self_maxpooling, self).__init__(**kwargs) 426 | 427 | def build(self, input_shape): 428 | if not isinstance(input_shape, list): 429 | raise ValueError('A Co-Attention_alt layer should be called ' 430 | 'on a list of inputs.') 431 | self.num_inputs = len(input_shape) 432 | 433 | def call(self, inputs, mask=None): 434 | tmp = K.stack([inputs[0], inputs[1], inputs[2]], axis=1) 435 | 436 | outputs = K.max(tmp, axis=1) 437 | 438 | return outputs 439 | 440 | def compute_mask(self, inputs, mask=None): 441 | if isinstance(mask, list): 442 | mask = mask[0] 443 | return mask 444 | 445 | def compute_output_shape(self, input_shape): 446 | output_shape = input_shape[0] 447 | return output_shape 448 | 449 | def get_config(self): 450 | return super(self_maxpooling, self).get_config() 451 | 452 | 453 | def myLossFunc(y_true, y_pred): 454 | probs_log = -K.log(y_pred) 455 | loss = K.mean(K.sum(probs_log*y_true, axis=-1)) 456 | return loss 457 | 458 | 459 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Models-reproducing 2 | Some models in AI academical papers implemented by myself. 3 | 4 | **Things really useful are the self defined Keras layers.** 5 | ### 1. **CoA** from [Hashtag Recommendation for Multimodal Microblog Using Co-Attention Networks.](https://www.ijcai.org/proceedings/2017/0478.pdf) 6 | A hashtag recommendation model using a co-attention mechanism. 7 | 8 | ### 2. **TAB-LSTM** from [Hashtag recommendation with topical attention-based LSTM](http://www.aclweb.org/anthology/C16-1284). 9 | A hashtag recommendation model for texual content using attention mechanism and topical distribution. 10 | 11 | ### 3. **ABC (attention based convolutional)** from [Hashtag Recommendation Using Attention-Based Convolutional Neural Network](https://www.ijcai.org/Proceedings/16/Papers/395.pdf). 12 | A hashtag recommendation model for textual content using two level (local and global) convolutional NN based attention mechanism. 13 | 14 | ### 4. **Hierarchical Co-Attention VQA** from [Hierarchical Question-Image Co-Attention for Visual Question Answering](http://papers.nips.cc/paper/6202-hierarchical-question-image-co-attention-for-visual-question-answering.pdf). 15 | Co-Attention mechanism is proposed in this article for the first time. -------------------------------------------------------------------------------- /TAB-LSTM/README.md: -------------------------------------------------------------------------------- 1 | # TAB-LSTM model 2 | 3 | ### Proposed in [Hashtag recommendation with topical attention-based LSTM](http://www.aclweb.org/anthology/C16-1284) COLING 2016, by Yang Li et al. 4 | 5 | ## **Overview** 6 | ### TAB-LSTM is used to recommend hashtags for texual content. it employs LSTM to extract textual features and integrates an attention mechanism by pre-trained topical distributions. 7 | 8 | ### Model Architecture 9 | ![TAB-LSTM model](TAB-LSTM.png) 10 | 11 | ## **Usage** 12 | ### Source files in this repository can not be executed immediately, as there is no data preprocessing and data inputs statements. So you should get your data prepared according to your need. 13 | 14 | ### Topical distribution can be pre-trained with LDA model. 15 | 16 | ## **Requirements** 17 | - Python 3.x 18 | - Tensorflow >= 1.7 19 | - Keras >= 2.1.5 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /TAB-LSTM/TAB-LSTM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zhangtd/Models-reproducing/48cff578ef2b1a0f0a265bc7a0d10b17bd12e3fd/TAB-LSTM/TAB-LSTM.png -------------------------------------------------------------------------------- /TAB-LSTM/topicAtt.py: -------------------------------------------------------------------------------- 1 | from keras.models import Model 2 | from keras.layers import Input 3 | import keras.backend as K 4 | from keras.engine.topology import Layer, InputSpec 5 | from keras.utils import conv_utils 6 | from keras import activations, initializers 7 | 8 | 9 | class topicAttention(Layer): 10 | """ 11 | self defined topical attention layer. 12 | input: [hiddenStates, topicDistribution] 13 | input_shape: [(batch_size, seq_len, embedding_size), (batch_size, topic_num)] 14 | output: topical_text_feature 15 | output shape: (batch_size, embedding_size) 16 | """ 17 | def __init__(self, **kwargs): 18 | super(topicAttention, self).__init__(**kwargs) 19 | # self.input_spec = InputSpec(min_ndim=3) 20 | self.supports_masking = True 21 | 22 | def build(self, input_shape): 23 | self.embedding_size = input_shape[0][-1] 24 | self.topic_num = input_shape[1][-1] 25 | self.seq_len = input_shape[0][1] 26 | 27 | # Create a trainable weight variable for this layer. 28 | self.w = self.add_weight(name='w', 29 | shape=(self.embedding_size, self.topic_num), 30 | initializer='random_normal', 31 | trainable=True) 32 | self.v = self.add_weight(name='v', 33 | shape=(self.embedding_size, 1), 34 | initializer='zeros', 35 | trainable=True) 36 | self.u = self.add_weight(name='u', 37 | shape=(self.embedding_size, self.embedding_size), 38 | initializer='random_normal', 39 | trainable=True) 40 | super(topicAttention, self).build(input_shape) # Be sure to call this somewhere! 41 | 42 | def call(self, x, mask=None): 43 | h = x[0] 44 | theta = x[1] 45 | 46 | theta_w = K.dot(theta, K.transpose(self.w)) 47 | theta_w = K.repeat(theta_w, self.seq_len) 48 | h_ = K.reshape(h, [-1, self.embedding_size]) 49 | h_u = K.dot(h_, self.u) 50 | h_u = K.reshape(h_u, [-1, self.seq_len, self.embedding_size]) 51 | 52 | g = K.dot(K.tanh(theta_w+h_u), self.v) 53 | weight = K.softmax(K.squeeze(g, axis=-1)) 54 | weight = K.expand_dims(weight, axis=-1) 55 | weight = K.repeat_elements(weight, self.embedding_size, axis=-1) 56 | vec = weight * h 57 | vec = K.sum(vec, axis=1) 58 | 59 | return vec 60 | 61 | def compute_mask(self, inputs, mask=None): 62 | return None 63 | 64 | def compute_output_shape(self, input_shape): 65 | output_shape = (input_shape[0][0], input_shape[0][-1]) 66 | return output_shape 67 | 68 | 69 | if __name__ == "__main__": 70 | input1 = Input(batch_shape=(10, 25, 50)) 71 | input2 = Input(batch_shape=(10, 20)) 72 | 73 | topic_h = topicAttention()([input1, input2]) 74 | print(topic_h) 75 | -------------------------------------------------------------------------------- /TAB-LSTM/topicLSTM.py: -------------------------------------------------------------------------------- 1 | 2 | from keras.models import Model 3 | from keras.layers import Input, Reshape, Dense, Embedding, Dropout, LSTM, AveragePooling1D, Lambda, Concatenate, \ 4 | Multiply, RepeatVector, Flatten, Activation, Permute, merge 5 | import keras.backend as K 6 | 7 | from topicAtt import topicAttention 8 | import numpy as np 9 | 10 | num_tags = 3896 11 | num_words = 212000 12 | index_from = 3 13 | seq_length = 30 14 | batch_size = 512 15 | embedding_size = 300 16 | attention_size = 200 17 | topic_num = 100 18 | dim_k = 100 19 | drop_rate = 0.75 20 | 21 | # prepare the following data. img data is the output of VGG-16 22 | topics_train, text_train, tag_train, topics_test, text_test, tag_test 23 | 24 | 25 | def myLossFunc(y_true, y_pred): 26 | probs_log = -K.log(y_pred) 27 | loss = K.mean(K.sum(probs_log*y_true, axis=-1)) 28 | # loss = K.mean(K.sum(K.clip(probs_log * y_true, -1e40, 100), axis=-1)) 29 | return loss 30 | 31 | 32 | def modelDef(): 33 | input_text = Input(shape=(seq_length, )) 34 | input_topic = Input(shape=(topic_num,)) 35 | 36 | embeddings = Embedding(input_dim=num_words+index_from, output_dim=embedding_size, 37 | mask_zero=True, input_length=seq_length)(input_text) 38 | tFeature = LSTM(units=embedding_size, return_sequences=True)(embeddings) 39 | topic_h = topicAttention()([tFeature, input_topic]) 40 | dropout = Dropout(drop_rate)(topic_h) 41 | Softmax = Dense(num_tags, activation="softmax", use_bias=True)(dropout) 42 | 43 | model = Model(inputs=[input_text, input_topic], outputs=[Softmax]) 44 | model.compile(optimizer="adam", loss=myLossFunc) 45 | 46 | return model 47 | 48 | 49 | def evaluation(y_true, y_pred, top_K): 50 | acc_count = 0 51 | precision_K = [] 52 | recall_K = [] 53 | f1_K = [] 54 | 55 | for i in range(y_pred.shape[0]): 56 | top_indices = y_pred[i].argsort()[-top_K:] 57 | if np.sum(y_true[i, top_indices]) >= 1: 58 | acc_count += 1 59 | p = np.sum(y_true[i, top_indices])/top_K 60 | r = np.sum(y_true[i, top_indices])/np.sum(y_true[i, :]) 61 | precision_K.append(p) 62 | recall_K.append(r) 63 | if p != 0 or r != 0: 64 | f1_K.append(2 * p * r / (p + r)) 65 | else: 66 | f1_K.append(0) 67 | 68 | acc_K = acc_count * 1.0 / y_pred.shape[0] 69 | 70 | return acc_K, np.mean(np.array(precision_K)), np.mean(np.array(recall_K)), np.mean(np.array(f1_K)) 71 | 72 | 73 | if __name__ == "__main__": 74 | history = myModel.fit(x=[texts_train, topics_train], 75 | y=tags_train, 76 | batch_size=batch_size, 77 | epochs=1, 78 | verbose=1, ) 79 | y_pred = myModel.predict(x=[texts_test, topics_test]) 80 | acc, precision, recall, f1 = evaluation(tags_test, y_pred, top_K) 81 | 82 | 83 | --------------------------------------------------------------------------------